master
   1const std = @import("std");
   2const assert = std.debug.assert;
   3const log = std.log.scoped(.x86_64_encoder);
   4const math = std.math;
   5const testing = std.testing;
   6const Writer = std.Io.Writer;
   7
   8const bits = @import("bits.zig");
   9const Encoding = @import("Encoding.zig");
  10const FrameIndex = bits.FrameIndex;
  11const Register = bits.Register;
  12const Symbol = bits.Symbol;
  13
  14pub const Instruction = struct {
  15    prefix: Prefix = .none,
  16    encoding: Encoding,
  17    ops: [4]Operand = .{.none} ** 4,
  18
  19    pub const Mnemonic = Encoding.Mnemonic;
  20
  21    pub const Prefix = enum(u3) {
  22        none,
  23        lock,
  24        rep,
  25        repe,
  26        repz,
  27        repne,
  28        repnz,
  29        directive,
  30    };
  31
  32    pub const Immediate = union(enum) {
  33        signed: i32,
  34        unsigned: u64,
  35
  36        pub fn u(x: u64) Immediate {
  37            return .{ .unsigned = x };
  38        }
  39
  40        pub fn s(x: i32) Immediate {
  41            return .{ .signed = x };
  42        }
  43
  44        pub fn asSigned(imm: Immediate, bit_size: u64) i64 {
  45            return switch (imm) {
  46                .signed => |x| switch (bit_size) {
  47                    1, 8 => @as(i8, @intCast(x)),
  48                    16 => @as(i16, @intCast(x)),
  49                    32, 64 => x,
  50                    else => unreachable,
  51                },
  52                .unsigned => |x| switch (bit_size) {
  53                    1, 8 => @as(i8, @bitCast(@as(u8, @intCast(x)))),
  54                    16 => @as(i16, @bitCast(@as(u16, @intCast(x)))),
  55                    32 => @as(i32, @bitCast(@as(u32, @intCast(x)))),
  56                    64 => @bitCast(x),
  57                    else => unreachable,
  58                },
  59            };
  60        }
  61
  62        pub fn asUnsigned(imm: Immediate, bit_size: u64) u64 {
  63            return switch (imm) {
  64                .signed => |x| switch (bit_size) {
  65                    1, 8 => @as(u8, @bitCast(@as(i8, @intCast(x)))),
  66                    16 => @as(u16, @bitCast(@as(i16, @intCast(x)))),
  67                    32, 64 => @as(u32, @bitCast(x)),
  68                    else => unreachable,
  69                },
  70                .unsigned => |x| switch (bit_size) {
  71                    1, 8 => @as(u8, @intCast(x)),
  72                    16 => @as(u16, @intCast(x)),
  73                    32 => @as(u32, @intCast(x)),
  74                    64 => x,
  75                    else => unreachable,
  76                },
  77            };
  78        }
  79    };
  80
  81    pub const Memory = union(enum) {
  82        sib: Sib,
  83        rip: Rip,
  84        moffs: Moffs,
  85
  86        pub const Base = bits.Memory.Base;
  87
  88        pub const ScaleIndex = struct {
  89            scale: u4,
  90            index: Register,
  91
  92            const none = ScaleIndex{ .scale = 0, .index = undefined };
  93        };
  94
  95        pub const PtrSize = bits.Memory.Size;
  96
  97        pub const Sib = struct {
  98            ptr_size: PtrSize,
  99            base: Base,
 100            scale_index: ScaleIndex,
 101            disp: i32,
 102        };
 103
 104        pub const Rip = struct {
 105            ptr_size: PtrSize,
 106            disp: i32,
 107        };
 108
 109        pub const Moffs = struct {
 110            seg: Register,
 111            offset: u64,
 112        };
 113
 114        pub fn initMoffs(reg: Register, offset: u64) Memory {
 115            assert(reg.isClass(.segment));
 116            return .{ .moffs = .{ .seg = reg, .offset = offset } };
 117        }
 118
 119        pub fn initSib(ptr_size: PtrSize, args: struct {
 120            disp: i32 = 0,
 121            base: Base = .none,
 122            scale_index: ?ScaleIndex = null,
 123        }) Memory {
 124            if (args.scale_index) |si| assert(std.math.isPowerOfTwo(si.scale));
 125            return .{ .sib = .{
 126                .base = args.base,
 127                .disp = args.disp,
 128                .ptr_size = ptr_size,
 129                .scale_index = if (args.scale_index) |si| si else ScaleIndex.none,
 130            } };
 131        }
 132
 133        pub fn initRip(ptr_size: PtrSize, displacement: i32) Memory {
 134            return .{ .rip = .{ .ptr_size = ptr_size, .disp = displacement } };
 135        }
 136
 137        pub fn isSegmentRegister(mem: Memory) bool {
 138            return switch (mem) {
 139                .moffs => true,
 140                .rip => false,
 141                .sib => |s| switch (s.base) {
 142                    .none, .frame, .table, .rip_inst, .nav, .uav, .lazy_sym, .extern_func => false,
 143                    .reg => |reg| reg.isClass(.segment),
 144                },
 145            };
 146        }
 147
 148        pub fn base(mem: Memory) Base {
 149            return switch (mem) {
 150                .moffs => |m| .{ .reg = m.seg },
 151                .sib => |s| s.base,
 152                .rip => .none,
 153            };
 154        }
 155
 156        pub fn scaleIndex(mem: Memory) ?ScaleIndex {
 157            return switch (mem) {
 158                .moffs, .rip => null,
 159                .sib => |s| if (s.scale_index.scale > 0) s.scale_index else null,
 160            };
 161        }
 162
 163        pub fn disp(mem: Memory) Immediate {
 164            return switch (mem) {
 165                .sib => |s| .s(s.disp),
 166                .rip => |r| .s(r.disp),
 167                .moffs => |m| .u(m.offset),
 168            };
 169        }
 170
 171        pub fn bitSize(mem: Memory, target: *const std.Target) u64 {
 172            return switch (mem) {
 173                .rip => |r| r.ptr_size.bitSize(target),
 174                .sib => |s| s.ptr_size.bitSize(target),
 175                .moffs => target.ptrBitWidth(),
 176            };
 177        }
 178    };
 179
 180    pub const Operand = union(enum) {
 181        none,
 182        reg: Register,
 183        mem: Memory,
 184        imm: Immediate,
 185        bytes: []const u8,
 186
 187        /// Returns the bitsize of the operand.
 188        pub fn bitSize(op: Operand) u64 {
 189            return switch (op) {
 190                .none => unreachable,
 191                .reg => |reg| reg.bitSize(),
 192                .mem => |mem| mem.bitSize(),
 193                .imm => unreachable,
 194                .bytes => unreachable,
 195            };
 196        }
 197
 198        /// Returns true if the operand is a segment register.
 199        /// Asserts the operand is either register or memory.
 200        pub fn isSegmentRegister(op: Operand) bool {
 201            return switch (op) {
 202                .none => unreachable,
 203                .reg => |reg| reg.isClass(.segment),
 204                .mem => |mem| mem.isSegmentRegister(),
 205                .imm => unreachable,
 206                .bytes => unreachable,
 207            };
 208        }
 209
 210        pub fn baseExtEnc(op: Operand) u2 {
 211            return switch (op) {
 212                .none, .imm => 0b00,
 213                .reg => |reg| @truncate(reg.enc() >> 3),
 214                .mem => |mem| switch (mem.base()) {
 215                    .none, .frame, .table, .rip_inst, .nav, .uav, .lazy_sym, .extern_func => 0b00, // rsp, rbp, and rip are not extended
 216                    .reg => |reg| @truncate(reg.enc() >> 3),
 217                },
 218                .bytes => unreachable,
 219            };
 220        }
 221
 222        pub fn indexExtEnc(op: Operand) u2 {
 223            return switch (op) {
 224                .none, .reg, .imm => 0b00,
 225                .mem => |mem| if (mem.scaleIndex()) |si| @truncate(si.index.enc() >> 3) else 0b00,
 226                .bytes => unreachable,
 227            };
 228        }
 229
 230        const Format = struct {
 231            op: Operand,
 232            enc_op: Encoding.Op,
 233
 234            fn default(f: Format, w: *Writer) Writer.Error!void {
 235                const op = f.op;
 236                const enc_op = f.enc_op;
 237                switch (op) {
 238                    .none => {},
 239                    .reg => |reg| try w.writeAll(@tagName(reg)),
 240                    .mem => |mem| switch (mem) {
 241                        .rip => |rip| {
 242                            try w.print("{f} [rip", .{rip.ptr_size});
 243                            if (rip.disp != 0) try w.print(" {c} 0x{x}", .{
 244                                @as(u8, if (rip.disp < 0) '-' else '+'),
 245                                @abs(rip.disp),
 246                            });
 247                            try w.writeByte(']');
 248                        },
 249                        .sib => |sib| {
 250                            try w.print("{f} ", .{sib.ptr_size});
 251
 252                            if (mem.isSegmentRegister()) {
 253                                return w.print("{s}:0x{x}", .{ @tagName(sib.base.reg), sib.disp });
 254                            }
 255
 256                            try w.writeByte('[');
 257
 258                            var any = true;
 259                            switch (sib.base) {
 260                                .none => any = false,
 261                                .reg => |reg| try w.print("{s}", .{@tagName(reg)}),
 262                                .frame => |frame_index| try w.print("{f}", .{frame_index}),
 263                                .table => try w.print("Table", .{}),
 264                                .rip_inst => |inst_index| try w.print("RipInst({d})", .{inst_index}),
 265                                .nav => |nav| try w.print("Nav({d})", .{@intFromEnum(nav)}),
 266                                .uav => |uav| try w.print("Uav({d})", .{@intFromEnum(uav.val)}),
 267                                .lazy_sym => |lazy_sym| try w.print("LazySym({s}, {d})", .{
 268                                    @tagName(lazy_sym.kind),
 269                                    @intFromEnum(lazy_sym.ty),
 270                                }),
 271                                .extern_func => |extern_func| try w.print("ExternFunc({d})", .{@intFromEnum(extern_func)}),
 272                            }
 273                            if (mem.scaleIndex()) |si| {
 274                                if (any) try w.writeAll(" + ");
 275                                try w.print("{s} * {d}", .{ @tagName(si.index), si.scale });
 276                                any = true;
 277                            }
 278                            if (sib.disp != 0 or !any) {
 279                                if (any)
 280                                    try w.print(" {c} ", .{@as(u8, if (sib.disp < 0) '-' else '+')})
 281                                else if (sib.disp < 0)
 282                                    try w.writeByte('-');
 283                                try w.print("0x{x}", .{@abs(sib.disp)});
 284                                any = true;
 285                            }
 286
 287                            try w.writeByte(']');
 288                        },
 289                        .moffs => |moffs| try w.print("{s}:0x{x}", .{
 290                            @tagName(moffs.seg),
 291                            moffs.offset,
 292                        }),
 293                    },
 294                    .imm => |imm| if (enc_op.isSigned()) {
 295                        const imms = imm.asSigned(enc_op.immBitSize());
 296                        if (imms < 0) try w.writeByte('-');
 297                        try w.print("0x{x}", .{@abs(imms)});
 298                    } else try w.print("0x{x}", .{imm.asUnsigned(enc_op.immBitSize())}),
 299                    .bytes => unreachable,
 300                }
 301            }
 302        };
 303
 304        pub fn fmt(op: Operand, enc_op: Encoding.Op) std.fmt.Alt(Format, Format.default) {
 305            return .{ .data = .{ .op = op, .enc_op = enc_op } };
 306        }
 307    };
 308
 309    pub fn new(
 310        prefix: Prefix,
 311        mnemonic: Mnemonic,
 312        ops: []const Operand,
 313        target: *const std.Target,
 314    ) !Instruction {
 315        const encoding: Encoding = switch (prefix) {
 316            else => (try Encoding.findByMnemonic(prefix, mnemonic, ops, target)) orelse {
 317                log.err("no encoding found for: {s} {s} {s} {s} {s} {s}", .{
 318                    @tagName(prefix),
 319                    @tagName(mnemonic),
 320                    @tagName(if (ops.len > 0) Encoding.Op.fromOperand(ops[0], target) else .none),
 321                    @tagName(if (ops.len > 1) Encoding.Op.fromOperand(ops[1], target) else .none),
 322                    @tagName(if (ops.len > 2) Encoding.Op.fromOperand(ops[2], target) else .none),
 323                    @tagName(if (ops.len > 3) Encoding.Op.fromOperand(ops[3], target) else .none),
 324                });
 325                return error.InvalidInstruction;
 326            },
 327            .directive => .{
 328                .mnemonic = mnemonic,
 329                .data = .{
 330                    .op_en = .z,
 331                    .ops = .{
 332                        if (ops.len > 0) Encoding.Op.fromOperand(ops[0], target) else .none,
 333                        if (ops.len > 1) Encoding.Op.fromOperand(ops[1], target) else .none,
 334                        if (ops.len > 2) Encoding.Op.fromOperand(ops[2], target) else .none,
 335                        if (ops.len > 3) Encoding.Op.fromOperand(ops[3], target) else .none,
 336                    },
 337                    .opc_len = 0,
 338                    .opc = undefined,
 339                    .modrm_ext = 0,
 340                    .mode = .none,
 341                    .feature = .none,
 342                },
 343            },
 344        };
 345        log.debug("selected encoding: {f}", .{encoding});
 346
 347        var inst: Instruction = .{
 348            .prefix = prefix,
 349            .encoding = encoding,
 350            .ops = [1]Operand{.none} ** 4,
 351        };
 352        @memcpy(inst.ops[0..ops.len], ops);
 353        return inst;
 354    }
 355
 356    pub fn format(inst: Instruction, w: *Writer) Writer.Error!void {
 357        switch (inst.prefix) {
 358            .none, .directive => {},
 359            else => try w.print("{s} ", .{@tagName(inst.prefix)}),
 360        }
 361        try w.print("{s}", .{@tagName(inst.encoding.mnemonic)});
 362        for (inst.ops, inst.encoding.data.ops, 0..) |op, enc, i| {
 363            if (op == .none) break;
 364            if (i > 0) try w.writeByte(',');
 365            try w.print(" {f}", .{op.fmt(enc)});
 366        }
 367    }
 368
 369    pub fn encode(inst: Instruction, w: *Writer, comptime opts: Options) !void {
 370        assert(inst.prefix != .directive);
 371        const encoder: Encoder(opts) = .{ .w = w };
 372        const enc = inst.encoding;
 373        const data = enc.data;
 374
 375        try inst.encodeWait(encoder);
 376        if (data.mode.isVex()) {
 377            try inst.encodeVexPrefix(encoder);
 378            const opc = inst.encoding.opcode();
 379            try encoder.opcode_1byte(opc[opc.len - 1]);
 380        } else {
 381            try inst.encodeLegacyPrefixes(encoder);
 382            try inst.encodeMandatoryPrefix(encoder);
 383            try inst.encodeRexPrefix(encoder);
 384            try inst.encodeOpcode(encoder);
 385        }
 386
 387        switch (data.op_en) {
 388            .z, .o, .zo, .oz => {},
 389            .i, .d => try encodeImm(inst.ops[0].imm, data.ops[0], encoder),
 390            .zi, .oi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder),
 391            .ii => {
 392                try encodeImm(inst.ops[0].imm, data.ops[0], encoder);
 393                try encodeImm(inst.ops[1].imm, data.ops[1], encoder);
 394            },
 395            .fd => try encoder.imm64(inst.ops[1].mem.moffs.offset),
 396            .td => try encoder.imm64(inst.ops[0].mem.moffs.offset),
 397            else => {
 398                const mem_op: Operand = switch (data.op_en) {
 399                    .ia => .{ .reg = .eax },
 400                    .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
 401                    .rm, .rmi, .rm0, .vm, .vmi, .rmv => inst.ops[1],
 402                    .rvm, .rvmr, .rvmi => inst.ops[2],
 403                    else => unreachable,
 404                };
 405                switch (mem_op) {
 406                    .reg => |reg| {
 407                        const rm: u3 = switch (data.op_en) {
 408                            .ia, .m, .mi, .m1, .mc, .vm, .vmi => enc.modRmExt(),
 409                            .mr, .mri, .mrc => @truncate(inst.ops[1].reg.enc()),
 410                            .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => @truncate(inst.ops[0].reg.enc()),
 411                            .mvr => @truncate(inst.ops[2].reg.enc()),
 412                            else => unreachable,
 413                        };
 414                        try encoder.modRm_direct(rm, @truncate(reg.enc()));
 415                    },
 416                    .mem => |mem| {
 417                        const op = switch (data.op_en) {
 418                            .m, .mi, .m1, .mc, .vm, .vmi => .none,
 419                            .mr, .mri, .mrc => inst.ops[1],
 420                            .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0],
 421                            .mvr => inst.ops[2],
 422                            else => unreachable,
 423                        };
 424                        try encodeMemory(enc, mem, op, encoder);
 425                    },
 426                    else => unreachable,
 427                }
 428
 429                switch (data.op_en) {
 430                    .ia => try encodeImm(inst.ops[0].imm, data.ops[0], encoder),
 431                    .mi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder),
 432                    .rmi, .mri, .vmi => try encodeImm(inst.ops[2].imm, data.ops[2], encoder),
 433                    .rvmr => try encoder.imm8(@as(u8, @as(u4, @intCast(inst.ops[3].reg.enc()))) << 4),
 434                    .rvmi => try encodeImm(inst.ops[3].imm, data.ops[3], encoder),
 435                    else => {},
 436                }
 437            },
 438        }
 439    }
 440
 441    fn encodeOpcode(inst: Instruction, encoder: anytype) !void {
 442        const opcode = inst.encoding.opcode();
 443        const first = @intFromBool(inst.encoding.mandatoryPrefix() != null);
 444        const final = opcode.len - 1;
 445        for (opcode[first..final]) |byte| try encoder.opcode_1byte(byte);
 446        switch (inst.encoding.data.op_en) {
 447            .o, .oz, .oi => try encoder.opcode_withReg(opcode[final], @truncate(inst.ops[0].reg.enc())),
 448            .zo => try encoder.opcode_withReg(opcode[final], @truncate(inst.ops[1].reg.enc())),
 449            else => try encoder.opcode_1byte(opcode[final]),
 450        }
 451    }
 452
 453    fn encodeWait(inst: Instruction, encoder: anytype) !void {
 454        switch (inst.encoding.data.mode) {
 455            .wait => try encoder.opcode_1byte(0x9b),
 456            else => {},
 457        }
 458    }
 459
 460    fn encodeLegacyPrefixes(inst: Instruction, encoder: anytype) !void {
 461        const enc = inst.encoding;
 462        const data = enc.data;
 463        const op_en = data.op_en;
 464
 465        var legacy = LegacyPrefixes{};
 466
 467        switch (inst.prefix) {
 468            .none => {},
 469            .lock => legacy.prefix_f0 = true,
 470            .repne, .repnz => legacy.prefix_f2 = true,
 471            .rep, .repe, .repz => legacy.prefix_f3 = true,
 472            .directive => unreachable,
 473        }
 474
 475        switch (data.mode) {
 476            .short, .rex_short => legacy.set16BitOverride(),
 477            else => {},
 478        }
 479
 480        const segment_override: ?Register = switch (op_en) {
 481            .z, .i, .zi, .ii, .ia, .o, .zo, .oz, .oi, .d => null,
 482            .fd => inst.ops[1].mem.base().reg,
 483            .td => inst.ops[0].mem.base().reg,
 484            .rm, .rmi, .rm0 => if (inst.ops[1].isSegmentRegister())
 485                switch (inst.ops[1]) {
 486                    .reg => |reg| reg,
 487                    .mem => |mem| mem.base().reg,
 488                    else => unreachable,
 489                }
 490            else
 491                null,
 492            .m, .mi, .m1, .mc, .mr, .mri, .mrc => if (inst.ops[0].isSegmentRegister())
 493                switch (inst.ops[0]) {
 494                    .reg => |reg| reg,
 495                    .mem => |mem| mem.base().reg,
 496                    else => unreachable,
 497                }
 498            else
 499                null,
 500            .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable,
 501        };
 502        if (segment_override) |seg| {
 503            legacy.setSegmentOverride(seg);
 504        }
 505
 506        try encoder.legacyPrefixes(legacy);
 507    }
 508
 509    fn encodeRexPrefix(inst: Instruction, encoder: anytype) !void {
 510        const op_en = inst.encoding.data.op_en;
 511
 512        var rex = Rex{};
 513        rex.present = inst.encoding.data.mode == .rex;
 514        rex.w = inst.encoding.data.mode == .long;
 515
 516        switch (op_en) {
 517            .z, .i, .zi, .ii, .ia, .fd, .td, .d => {},
 518            .o, .oz, .oi => rex.b = inst.ops[0].reg.enc() & 0b01000 != 0,
 519            .zo => rex.b = inst.ops[1].reg.enc() & 0b01000 != 0,
 520            .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .rmv => {
 521                const r_op = switch (op_en) {
 522                    .rm, .rmi, .rm0, .rmv => inst.ops[0],
 523                    .mr, .mri, .mrc => inst.ops[1],
 524                    else => .none,
 525                };
 526                const r_op_base_ext_enc = r_op.baseExtEnc();
 527                rex.r = r_op_base_ext_enc & 0b01 != 0;
 528                assert(r_op_base_ext_enc & 0b10 == 0);
 529
 530                const b_x_op = switch (op_en) {
 531                    .rm, .rmi, .rm0 => inst.ops[1],
 532                    .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0],
 533                    else => unreachable,
 534                };
 535                const b_x_op_base_ext_enc = b_x_op.baseExtEnc();
 536                rex.b = b_x_op_base_ext_enc & 0b01 != 0;
 537                assert(b_x_op_base_ext_enc & 0b10 == 0);
 538                const b_x_op_index_ext_enc = b_x_op.indexExtEnc();
 539                rex.x = b_x_op_index_ext_enc & 0b01 != 0;
 540                assert(b_x_op_index_ext_enc & 0b10 == 0);
 541            },
 542            .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable,
 543        }
 544
 545        try encoder.rex(rex);
 546    }
 547
 548    fn encodeVexPrefix(inst: Instruction, encoder: anytype) !void {
 549        const op_en = inst.encoding.data.op_en;
 550        const opc = inst.encoding.opcode();
 551        const mand_pre = inst.encoding.mandatoryPrefix();
 552
 553        var vex = Vex{};
 554
 555        vex.w = inst.encoding.data.mode.isLong();
 556
 557        switch (op_en) {
 558            .z, .i, .zi, .ii, .ia, .fd, .td, .d, .o, .oz, .oi, .zo => unreachable,
 559            .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => {
 560                const r_op = switch (op_en) {
 561                    .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0],
 562                    .mr, .mri, .mrc => inst.ops[1],
 563                    .mvr => inst.ops[2],
 564                    .m, .mi, .m1, .mc, .vm, .vmi => .none,
 565                    else => unreachable,
 566                };
 567                const r_op_base_ext_enc = r_op.baseExtEnc();
 568                vex.r = r_op_base_ext_enc & 0b01 != 0;
 569                assert(r_op_base_ext_enc & 0b10 == 0);
 570
 571                const b_x_op = switch (op_en) {
 572                    .rm, .rmi, .rm0, .vm, .vmi, .rmv => inst.ops[1],
 573                    .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
 574                    .rvm, .rvmr, .rvmi => inst.ops[2],
 575                    else => unreachable,
 576                };
 577                const b_x_op_base_ext_enc = b_x_op.baseExtEnc();
 578                vex.b = b_x_op_base_ext_enc & 0b01 != 0;
 579                assert(b_x_op_base_ext_enc & 0b10 == 0);
 580                const b_x_op_index_ext_enc = b_x_op.indexExtEnc();
 581                vex.x = b_x_op_index_ext_enc & 0b01 != 0;
 582                assert(b_x_op_index_ext_enc & 0b10 == 0);
 583            },
 584        }
 585
 586        vex.l = inst.encoding.data.mode.isVecLong();
 587
 588        vex.p = if (mand_pre) |mand| switch (mand) {
 589            0x66 => .@"66",
 590            0xf2 => .f2,
 591            0xf3 => .f3,
 592            else => unreachable,
 593        } else .none;
 594
 595        const leading: usize = if (mand_pre) |_| 1 else 0;
 596        assert(opc[leading] == 0x0f);
 597        vex.m = switch (opc[leading + 1]) {
 598            else => .@"0f",
 599            0x38 => .@"0f38",
 600            0x3a => .@"0f3a",
 601        };
 602
 603        switch (op_en) {
 604            else => {},
 605            .vm, .vmi => vex.v = inst.ops[0].reg,
 606            .rvm, .rvmr, .rvmi => vex.v = inst.ops[1].reg,
 607            .rmv => vex.v = inst.ops[2].reg,
 608        }
 609
 610        try encoder.vex(vex);
 611    }
 612
 613    fn encodeMandatoryPrefix(inst: Instruction, encoder: anytype) !void {
 614        const prefix = inst.encoding.mandatoryPrefix() orelse return;
 615        try encoder.opcode_1byte(prefix);
 616    }
 617
 618    fn encodeMemory(encoding: Encoding, mem: Memory, operand: Operand, encoder: anytype) !void {
 619        const operand_enc: u3 = switch (operand) {
 620            .reg => |reg| @truncate(reg.enc()),
 621            .none => encoding.modRmExt(),
 622            else => unreachable,
 623        };
 624
 625        switch (mem) {
 626            .moffs => unreachable,
 627            .sib => |sib| switch (sib.base) {
 628                .none, .table => {
 629                    try encoder.modRm_SIBDisp0(operand_enc);
 630                    if (mem.scaleIndex()) |si| {
 631                        const scale = math.log2_int(u4, si.scale);
 632                        try encoder.sib_scaleIndexDisp32(scale, @truncate(si.index.enc()));
 633                    } else {
 634                        try encoder.sib_disp32();
 635                    }
 636                    try encoder.disp32(sib.disp);
 637                },
 638                .reg => |base| switch (base.class()) {
 639                    .segment => {
 640                        // TODO audit this wrt SIB
 641                        try encoder.modRm_SIBDisp0(operand_enc);
 642                        if (mem.scaleIndex()) |si| {
 643                            const scale = math.log2_int(u4, si.scale);
 644                            try encoder.sib_scaleIndexDisp32(scale, @truncate(si.index.enc()));
 645                        } else {
 646                            try encoder.sib_disp32();
 647                        }
 648                        try encoder.disp32(sib.disp);
 649                    },
 650                    .general_purpose => {
 651                        const dst: u3 = @truncate(base.enc());
 652                        const src = operand_enc;
 653                        if (dst == 4 or mem.scaleIndex() != null) {
 654                            if (sib.disp == 0 and dst != 5) {
 655                                try encoder.modRm_SIBDisp0(src);
 656                                if (mem.scaleIndex()) |si| {
 657                                    const scale = math.log2_int(u4, si.scale);
 658                                    try encoder.sib_scaleIndexBase(scale, @truncate(si.index.enc()), dst);
 659                                } else {
 660                                    try encoder.sib_base(dst);
 661                                }
 662                            } else if (math.cast(i8, sib.disp)) |_| {
 663                                try encoder.modRm_SIBDisp8(src);
 664                                if (mem.scaleIndex()) |si| {
 665                                    const scale = math.log2_int(u4, si.scale);
 666                                    try encoder.sib_scaleIndexBaseDisp8(scale, @truncate(si.index.enc()), dst);
 667                                } else {
 668                                    try encoder.sib_baseDisp8(dst);
 669                                }
 670                                try encoder.disp8(@as(i8, @truncate(sib.disp)));
 671                            } else {
 672                                try encoder.modRm_SIBDisp32(src);
 673                                if (mem.scaleIndex()) |si| {
 674                                    const scale = math.log2_int(u4, si.scale);
 675                                    try encoder.sib_scaleIndexBaseDisp32(scale, @truncate(si.index.enc()), dst);
 676                                } else {
 677                                    try encoder.sib_baseDisp32(dst);
 678                                }
 679                                try encoder.disp32(sib.disp);
 680                            }
 681                        } else {
 682                            if (sib.disp == 0 and dst != 5) {
 683                                try encoder.modRm_indirectDisp0(src, dst);
 684                            } else if (math.cast(i8, sib.disp)) |_| {
 685                                try encoder.modRm_indirectDisp8(src, dst);
 686                                try encoder.disp8(@as(i8, @truncate(sib.disp)));
 687                            } else {
 688                                try encoder.modRm_indirectDisp32(src, dst);
 689                                try encoder.disp32(sib.disp);
 690                            }
 691                        }
 692                    },
 693                    else => unreachable,
 694                },
 695                .frame => if (@TypeOf(encoder).options.allow_frame_locs) {
 696                    try encoder.modRm_indirectDisp32(operand_enc, 0);
 697                    try encoder.disp32(undefined);
 698                } else return error.CannotEncode,
 699                .nav, .uav, .lazy_sym, .extern_func => if (@TypeOf(encoder).options.allow_symbols) {
 700                    try encoder.modRm_indirectDisp32(operand_enc, 0);
 701                    try encoder.disp32(undefined);
 702                } else return error.CannotEncode,
 703                .rip_inst => {
 704                    try encoder.modRm_RIPDisp32(operand_enc);
 705                    try encoder.disp32(sib.disp);
 706                },
 707            },
 708            .rip => |rip| {
 709                try encoder.modRm_RIPDisp32(operand_enc);
 710                try encoder.disp32(rip.disp);
 711            },
 712        }
 713    }
 714
 715    fn encodeImm(imm: Immediate, kind: Encoding.Op, encoder: anytype) !void {
 716        const raw = imm.asUnsigned(kind.immBitSize());
 717        switch (kind.immBitSize()) {
 718            8 => try encoder.imm8(@as(u8, @intCast(raw))),
 719            16 => try encoder.imm16(@as(u16, @intCast(raw))),
 720            32 => try encoder.imm32(@as(u32, @intCast(raw))),
 721            64 => try encoder.imm64(raw),
 722            else => unreachable,
 723        }
 724    }
 725};
 726
 727pub const LegacyPrefixes = packed struct {
 728    /// LOCK
 729    prefix_f0: bool = false,
 730    /// REPNZ, REPNE, REP, Scalar Double-precision
 731    prefix_f2: bool = false,
 732    /// REPZ, REPE, REP, Scalar Single-precision
 733    prefix_f3: bool = false,
 734
 735    /// CS segment override or Branch not taken
 736    prefix_2e: bool = false,
 737    /// SS segment override
 738    prefix_36: bool = false,
 739    /// ES segment override
 740    prefix_26: bool = false,
 741    /// FS segment override
 742    prefix_64: bool = false,
 743    /// GS segment override
 744    prefix_65: bool = false,
 745
 746    /// Branch taken
 747    prefix_3e: bool = false,
 748
 749    /// Address size override (enables 16 bit address size)
 750    prefix_67: bool = false,
 751
 752    /// Operand size override (enables 16 bit operation)
 753    prefix_66: bool = false,
 754
 755    padding: u5 = 0,
 756
 757    pub fn setSegmentOverride(self: *LegacyPrefixes, reg: Register) void {
 758        assert(reg.isClass(.segment));
 759        switch (reg) {
 760            .cs => self.prefix_2e = true,
 761            .ss => self.prefix_36 = true,
 762            .es => self.prefix_26 = true,
 763            .fs => self.prefix_64 = true,
 764            .gs => self.prefix_65 = true,
 765            .ds => {},
 766            else => unreachable,
 767        }
 768    }
 769
 770    pub fn set16BitOverride(self: *LegacyPrefixes) void {
 771        self.prefix_66 = true;
 772    }
 773};
 774
 775pub const Options = struct { allow_frame_locs: bool = false, allow_symbols: bool = false };
 776
 777fn Encoder(comptime opts: Options) type {
 778    return struct {
 779        w: *Writer,
 780
 781        const Self = @This();
 782        pub const options = opts;
 783
 784        // --------
 785        // Prefixes
 786        // --------
 787
 788        /// Encodes legacy prefixes
 789        pub fn legacyPrefixes(self: Self, prefixes: LegacyPrefixes) !void {
 790            if (@as(u16, @bitCast(prefixes)) != 0) {
 791                // Hopefully this path isn't taken very often, so we'll do it the slow way for now
 792
 793                // LOCK
 794                if (prefixes.prefix_f0) try self.w.writeByte(0xf0);
 795                // REPNZ, REPNE, REP, Scalar Double-precision
 796                if (prefixes.prefix_f2) try self.w.writeByte(0xf2);
 797                // REPZ, REPE, REP, Scalar Single-precision
 798                if (prefixes.prefix_f3) try self.w.writeByte(0xf3);
 799
 800                // CS segment override or Branch not taken
 801                if (prefixes.prefix_2e) try self.w.writeByte(0x2e);
 802                // DS segment override
 803                if (prefixes.prefix_36) try self.w.writeByte(0x36);
 804                // ES segment override
 805                if (prefixes.prefix_26) try self.w.writeByte(0x26);
 806                // FS segment override
 807                if (prefixes.prefix_64) try self.w.writeByte(0x64);
 808                // GS segment override
 809                if (prefixes.prefix_65) try self.w.writeByte(0x65);
 810
 811                // Branch taken
 812                if (prefixes.prefix_3e) try self.w.writeByte(0x3e);
 813
 814                // Operand size override
 815                if (prefixes.prefix_66) try self.w.writeByte(0x66);
 816
 817                // Address size override
 818                if (prefixes.prefix_67) try self.w.writeByte(0x67);
 819            }
 820        }
 821
 822        /// Use 16 bit operand size
 823        ///
 824        /// Note that this flag is overridden by REX.W, if both are present.
 825        pub fn prefix16BitMode(self: Self) !void {
 826            try self.w.writeByte(0x66);
 827        }
 828
 829        /// Encodes a REX prefix byte given all the fields
 830        ///
 831        /// Use this byte whenever you need 64 bit operation,
 832        /// or one of reg, index, r/m, base, or opcode-reg might be extended.
 833        ///
 834        /// See struct `Rex` for a description of each field.
 835        pub fn rex(self: Self, fields: Rex) !void {
 836            if (!fields.present and !fields.isSet()) return;
 837
 838            var byte: u8 = 0b0100_0000;
 839
 840            if (fields.w) byte |= 0b1000;
 841            if (fields.r) byte |= 0b0100;
 842            if (fields.x) byte |= 0b0010;
 843            if (fields.b) byte |= 0b0001;
 844
 845            try self.w.writeByte(byte);
 846        }
 847
 848        /// Encodes a VEX prefix given all the fields
 849        ///
 850        /// See struct `Vex` for a description of each field.
 851        pub fn vex(self: Self, fields: Vex) !void {
 852            if (fields.is3Byte()) {
 853                try self.w.writeByte(0b1100_0100);
 854
 855                try self.w.writeByte(
 856                    @as(u8, ~@intFromBool(fields.r)) << 7 |
 857                        @as(u8, ~@intFromBool(fields.x)) << 6 |
 858                        @as(u8, ~@intFromBool(fields.b)) << 5 |
 859                        @as(u8, @intFromEnum(fields.m)) << 0,
 860                );
 861
 862                try self.w.writeByte(
 863                    @as(u8, @intFromBool(fields.w)) << 7 |
 864                        @as(u8, ~@as(u4, @intCast(fields.v.enc()))) << 3 |
 865                        @as(u8, @intFromBool(fields.l)) << 2 |
 866                        @as(u8, @intFromEnum(fields.p)) << 0,
 867                );
 868            } else {
 869                try self.w.writeByte(0b1100_0101);
 870                try self.w.writeByte(
 871                    @as(u8, ~@intFromBool(fields.r)) << 7 |
 872                        @as(u8, ~@as(u4, @intCast(fields.v.enc()))) << 3 |
 873                        @as(u8, @intFromBool(fields.l)) << 2 |
 874                        @as(u8, @intFromEnum(fields.p)) << 0,
 875                );
 876            }
 877        }
 878
 879        // ------
 880        // Opcode
 881        // ------
 882
 883        /// Encodes a 1 byte opcode
 884        pub fn opcode_1byte(self: Self, opcode: u8) !void {
 885            try self.w.writeByte(opcode);
 886        }
 887
 888        /// Encodes a 2 byte opcode
 889        ///
 890        /// e.g. IMUL has the opcode 0x0f 0xaf, so you use
 891        ///
 892        /// encoder.opcode_2byte(0x0f, 0xaf);
 893        pub fn opcode_2byte(self: Self, prefix: u8, opcode: u8) !void {
 894            try self.w.writeAll(&.{ prefix, opcode });
 895        }
 896
 897        /// Encodes a 3 byte opcode
 898        ///
 899        /// e.g. MOVSD has the opcode 0xf2 0x0f 0x10
 900        ///
 901        /// encoder.opcode_3byte(0xf2, 0x0f, 0x10);
 902        pub fn opcode_3byte(self: Self, prefix_1: u8, prefix_2: u8, opcode: u8) !void {
 903            try self.w.writeAll(&.{ prefix_1, prefix_2, opcode });
 904        }
 905
 906        /// Encodes a 1 byte opcode with a reg field
 907        ///
 908        /// Remember to add a REX prefix byte if reg is extended!
 909        pub fn opcode_withReg(self: Self, opcode: u8, reg: u3) !void {
 910            assert(opcode & 0b111 == 0);
 911            try self.w.writeByte(opcode | reg);
 912        }
 913
 914        // ------
 915        // ModR/M
 916        // ------
 917
 918        /// Construct a ModR/M byte given all the fields
 919        ///
 920        /// Remember to add a REX prefix byte if reg or rm are extended!
 921        pub fn modRm(self: Self, mod: u2, reg_or_opx: u3, rm: u3) !void {
 922            try self.w.writeByte(@as(u8, mod) << 6 | @as(u8, reg_or_opx) << 3 | rm);
 923        }
 924
 925        /// Construct a ModR/M byte using direct r/m addressing
 926        /// r/m effective address: r/m
 927        ///
 928        /// Note reg's effective address is always just reg for the ModR/M byte.
 929        /// Remember to add a REX prefix byte if reg or rm are extended!
 930        pub fn modRm_direct(self: Self, reg_or_opx: u3, rm: u3) !void {
 931            try self.modRm(0b11, reg_or_opx, rm);
 932        }
 933
 934        /// Construct a ModR/M byte using indirect r/m addressing
 935        /// r/m effective address: [r/m]
 936        ///
 937        /// Note reg's effective address is always just reg for the ModR/M byte.
 938        /// Remember to add a REX prefix byte if reg or rm are extended!
 939        pub fn modRm_indirectDisp0(self: Self, reg_or_opx: u3, rm: u3) !void {
 940            assert(rm != 4 and rm != 5);
 941            try self.modRm(0b00, reg_or_opx, rm);
 942        }
 943
 944        /// Construct a ModR/M byte using indirect SIB addressing
 945        /// r/m effective address: [SIB]
 946        ///
 947        /// Note reg's effective address is always just reg for the ModR/M byte.
 948        /// Remember to add a REX prefix byte if reg or rm are extended!
 949        pub fn modRm_SIBDisp0(self: Self, reg_or_opx: u3) !void {
 950            try self.modRm(0b00, reg_or_opx, 0b100);
 951        }
 952
 953        /// Construct a ModR/M byte using RIP-relative addressing
 954        /// r/m effective address: [RIP + disp32]
 955        ///
 956        /// Note reg's effective address is always just reg for the ModR/M byte.
 957        /// Remember to add a REX prefix byte if reg or rm are extended!
 958        pub fn modRm_RIPDisp32(self: Self, reg_or_opx: u3) !void {
 959            try self.modRm(0b00, reg_or_opx, 0b101);
 960        }
 961
 962        /// Construct a ModR/M byte using indirect r/m with a 8bit displacement
 963        /// r/m effective address: [r/m + disp8]
 964        ///
 965        /// Note reg's effective address is always just reg for the ModR/M byte.
 966        /// Remember to add a REX prefix byte if reg or rm are extended!
 967        pub fn modRm_indirectDisp8(self: Self, reg_or_opx: u3, rm: u3) !void {
 968            assert(rm != 4);
 969            try self.modRm(0b01, reg_or_opx, rm);
 970        }
 971
 972        /// Construct a ModR/M byte using indirect SIB with a 8bit displacement
 973        /// r/m effective address: [SIB + disp8]
 974        ///
 975        /// Note reg's effective address is always just reg for the ModR/M byte.
 976        /// Remember to add a REX prefix byte if reg or rm are extended!
 977        pub fn modRm_SIBDisp8(self: Self, reg_or_opx: u3) !void {
 978            try self.modRm(0b01, reg_or_opx, 0b100);
 979        }
 980
 981        /// Construct a ModR/M byte using indirect r/m with a 32bit displacement
 982        /// r/m effective address: [r/m + disp32]
 983        ///
 984        /// Note reg's effective address is always just reg for the ModR/M byte.
 985        /// Remember to add a REX prefix byte if reg or rm are extended!
 986        pub fn modRm_indirectDisp32(self: Self, reg_or_opx: u3, rm: u3) !void {
 987            assert(rm != 4);
 988            try self.modRm(0b10, reg_or_opx, rm);
 989        }
 990
 991        /// Construct a ModR/M byte using indirect SIB with a 32bit displacement
 992        /// r/m effective address: [SIB + disp32]
 993        ///
 994        /// Note reg's effective address is always just reg for the ModR/M byte.
 995        /// Remember to add a REX prefix byte if reg or rm are extended!
 996        pub fn modRm_SIBDisp32(self: Self, reg_or_opx: u3) !void {
 997            try self.modRm(0b10, reg_or_opx, 0b100);
 998        }
 999
1000        // ---
1001        // SIB
1002        // ---
1003
1004        /// Construct a SIB byte given all the fields
1005        ///
1006        /// Remember to add a REX prefix byte if index or base are extended!
1007        pub fn sib(self: Self, scale: u2, index: u3, base: u3) !void {
1008            try self.w.writeByte(@as(u8, scale) << 6 | @as(u8, index) << 3 | base);
1009        }
1010
1011        /// Construct a SIB byte with scale * index + base, no frills.
1012        /// r/m effective address: [base + scale * index]
1013        ///
1014        /// Remember to add a REX prefix byte if index or base are extended!
1015        pub fn sib_scaleIndexBase(self: Self, scale: u2, index: u3, base: u3) !void {
1016            assert(base != 5);
1017
1018            try self.sib(scale, index, base);
1019        }
1020
1021        /// Construct a SIB byte with scale * index + disp32
1022        /// r/m effective address: [scale * index + disp32]
1023        ///
1024        /// Remember to add a REX prefix byte if index or base are extended!
1025        pub fn sib_scaleIndexDisp32(self: Self, scale: u2, index: u3) !void {
1026            // scale is actually ignored
1027            // index = 4 means no index if and only if we haven't extended the register
1028            // TODO enforce this
1029            // base = 5 means no base, if mod == 0.
1030            try self.sib(scale, index, 5);
1031        }
1032
1033        /// Construct a SIB byte with just base
1034        /// r/m effective address: [base]
1035        ///
1036        /// Remember to add a REX prefix byte if index or base are extended!
1037        pub fn sib_base(self: Self, base: u3) !void {
1038            assert(base != 5);
1039
1040            // scale is actually ignored
1041            // index = 4 means no index
1042            try self.sib(0, 4, base);
1043        }
1044
1045        /// Construct a SIB byte with just disp32
1046        /// r/m effective address: [disp32]
1047        ///
1048        /// Remember to add a REX prefix byte if index or base are extended!
1049        pub fn sib_disp32(self: Self) !void {
1050            // scale is actually ignored
1051            // index = 4 means no index
1052            // base = 5 means no base, if mod == 0.
1053            try self.sib(0, 4, 5);
1054        }
1055
1056        /// Construct a SIB byte with scale * index + base + disp8
1057        /// r/m effective address: [base + scale * index + disp8]
1058        ///
1059        /// Remember to add a REX prefix byte if index or base are extended!
1060        pub fn sib_scaleIndexBaseDisp8(self: Self, scale: u2, index: u3, base: u3) !void {
1061            try self.sib(scale, index, base);
1062        }
1063
1064        /// Construct a SIB byte with base + disp8, no index
1065        /// r/m effective address: [base + disp8]
1066        ///
1067        /// Remember to add a REX prefix byte if index or base are extended!
1068        pub fn sib_baseDisp8(self: Self, base: u3) !void {
1069            // scale is ignored
1070            // index = 4 means no index
1071            try self.sib(0, 4, base);
1072        }
1073
1074        /// Construct a SIB byte with scale * index + base + disp32
1075        /// r/m effective address: [base + scale * index + disp32]
1076        ///
1077        /// Remember to add a REX prefix byte if index or base are extended!
1078        pub fn sib_scaleIndexBaseDisp32(self: Self, scale: u2, index: u3, base: u3) !void {
1079            try self.sib(scale, index, base);
1080        }
1081
1082        /// Construct a SIB byte with base + disp32, no index
1083        /// r/m effective address: [base + disp32]
1084        ///
1085        /// Remember to add a REX prefix byte if index or base are extended!
1086        pub fn sib_baseDisp32(self: Self, base: u3) !void {
1087            // scale is ignored
1088            // index = 4 means no index
1089            try self.sib(0, 4, base);
1090        }
1091
1092        // -------------------------
1093        // Trivial (no bit fiddling)
1094        // -------------------------
1095
1096        /// Encode an 8 bit displacement
1097        ///
1098        /// It is sign-extended to 64 bits by the cpu.
1099        pub fn disp8(self: Self, disp: i8) !void {
1100            try self.w.writeByte(@as(u8, @bitCast(disp)));
1101        }
1102
1103        /// Encode an 32 bit displacement
1104        ///
1105        /// It is sign-extended to 64 bits by the cpu.
1106        pub fn disp32(self: Self, disp: i32) !void {
1107            try self.w.writeInt(i32, disp, .little);
1108        }
1109
1110        /// Encode an 8 bit immediate
1111        ///
1112        /// It is sign-extended to 64 bits by the cpu.
1113        pub fn imm8(self: Self, imm: u8) !void {
1114            try self.w.writeByte(imm);
1115        }
1116
1117        /// Encode an 16 bit immediate
1118        ///
1119        /// It is sign-extended to 64 bits by the cpu.
1120        pub fn imm16(self: Self, imm: u16) !void {
1121            try self.w.writeInt(u16, imm, .little);
1122        }
1123
1124        /// Encode an 32 bit immediate
1125        ///
1126        /// It is sign-extended to 64 bits by the cpu.
1127        pub fn imm32(self: Self, imm: u32) !void {
1128            try self.w.writeInt(u32, imm, .little);
1129        }
1130
1131        /// Encode an 64 bit immediate
1132        ///
1133        /// It is sign-extended to 64 bits by the cpu.
1134        pub fn imm64(self: Self, imm: u64) !void {
1135            try self.w.writeInt(u64, imm, .little);
1136        }
1137    };
1138}
1139
1140pub const Rex = struct {
1141    w: bool = false,
1142    r: bool = false,
1143    x: bool = false,
1144    b: bool = false,
1145    present: bool = false,
1146
1147    pub fn isSet(rex: Rex) bool {
1148        return rex.w or rex.r or rex.x or rex.b;
1149    }
1150};
1151
1152pub const Vex = struct {
1153    w: bool = false,
1154    r: bool = false,
1155    x: bool = false,
1156    b: bool = false,
1157    l: bool = false,
1158    p: enum(u2) {
1159        none = 0b00,
1160        @"66" = 0b01,
1161        f3 = 0b10,
1162        f2 = 0b11,
1163    } = .none,
1164    m: enum(u5) {
1165        @"0f" = 0b0_0001,
1166        @"0f38" = 0b0_0010,
1167        @"0f3a" = 0b0_0011,
1168        _,
1169    } = .@"0f",
1170    v: Register = .ymm0,
1171
1172    pub fn is3Byte(vex: Vex) bool {
1173        return vex.w or vex.x or vex.b or vex.m != .@"0f";
1174    }
1175};
1176
1177// Tests
1178fn expectEqualHexStrings(expected: []const u8, given: []const u8, assembly: []const u8) !void {
1179    assert(expected.len > 0);
1180    if (std.mem.eql(u8, expected, given)) return;
1181    const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{expected});
1182    defer testing.allocator.free(expected_fmt);
1183    const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{given});
1184    defer testing.allocator.free(given_fmt);
1185    const idx = std.mem.indexOfDiff(u8, expected_fmt, given_fmt).?;
1186    const padding = try testing.allocator.alloc(u8, idx + 5);
1187    defer testing.allocator.free(padding);
1188    @memset(padding, ' ');
1189    std.debug.print("\nASM: {s}\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{
1190        assembly,
1191        expected_fmt,
1192        given_fmt,
1193        padding,
1194    });
1195    return error.TestFailed;
1196}
1197
1198const TestEncode = struct {
1199    buffer: [32]u8 = undefined,
1200    index: usize = 0,
1201
1202    fn encode(
1203        enc: *TestEncode,
1204        mnemonic: Instruction.Mnemonic,
1205        ops: []const Instruction.Operand,
1206    ) !void {
1207        var writer: std.Io.Writer = .fixed(&enc.buffer);
1208        const inst: Instruction = try .new(.none, mnemonic, ops);
1209        try inst.encode(&writer, .{});
1210        enc.index = writer.bufferedLen();
1211    }
1212
1213    fn code(enc: TestEncode) []const u8 {
1214        return enc.buffer[0..enc.index];
1215    }
1216};
1217
1218test "encode" {
1219    var buf = std.array_list.Managed(u8).init(testing.allocator);
1220    defer buf.deinit();
1221
1222    const inst: Instruction = try .new(.none, .mov, &.{
1223        .{ .reg = .rbx },
1224        .{ .imm = .u(4) },
1225    });
1226    try inst.encode(buf.writer(), .{});
1227    try testing.expectEqualSlices(u8, &.{ 0x48, 0xc7, 0xc3, 0x4, 0x0, 0x0, 0x0 }, buf.items);
1228}
1229
1230test "lower I encoding" {
1231    var enc = TestEncode{};
1232
1233    try enc.encode(.push, &.{
1234        .{ .imm = .u(0x10) },
1235    });
1236    try expectEqualHexStrings("\x6A\x10", enc.code(), "push 0x10");
1237
1238    try enc.encode(.push, &.{
1239        .{ .imm = .u(0x1000) },
1240    });
1241    try expectEqualHexStrings("\x66\x68\x00\x10", enc.code(), "push 0x1000");
1242
1243    try enc.encode(.push, &.{
1244        .{ .imm = .u(0x10000000) },
1245    });
1246    try expectEqualHexStrings("\x68\x00\x00\x00\x10", enc.code(), "push 0x10000000");
1247
1248    try enc.encode(.adc, &.{
1249        .{ .reg = .rax },
1250        .{ .imm = .u(0x10000000) },
1251    });
1252    try expectEqualHexStrings("\x48\x15\x00\x00\x00\x10", enc.code(), "adc rax, 0x10000000");
1253
1254    try enc.encode(.add, &.{
1255        .{ .reg = .al },
1256        .{ .imm = .u(0x10) },
1257    });
1258    try expectEqualHexStrings("\x04\x10", enc.code(), "add al, 0x10");
1259
1260    try enc.encode(.add, &.{
1261        .{ .reg = .rax },
1262        .{ .imm = .u(0x10) },
1263    });
1264    try expectEqualHexStrings("\x48\x83\xC0\x10", enc.code(), "add rax, 0x10");
1265
1266    try enc.encode(.sbb, &.{
1267        .{ .reg = .ax },
1268        .{ .imm = .u(0x10) },
1269    });
1270    try expectEqualHexStrings("\x66\x1D\x10\x00", enc.code(), "sbb ax, 0x10");
1271
1272    try enc.encode(.xor, &.{
1273        .{ .reg = .al },
1274        .{ .imm = .u(0x10) },
1275    });
1276    try expectEqualHexStrings("\x34\x10", enc.code(), "xor al, 0x10");
1277}
1278
1279test "lower MI encoding" {
1280    var enc = TestEncode{};
1281
1282    try enc.encode(.mov, &.{
1283        .{ .reg = .r12 },
1284        .{ .imm = .u(0x1000) },
1285    });
1286    try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000");
1287
1288    try enc.encode(.mov, &.{
1289        .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .r12 } }) },
1290        .{ .imm = .u(0x10) },
1291    });
1292    try expectEqualHexStrings("\x41\xC6\x04\x24\x10", enc.code(), "mov BYTE PTR [r12], 0x10");
1293
1294    try enc.encode(.mov, &.{
1295        .{ .reg = .r12 },
1296        .{ .imm = .u(0x1000) },
1297    });
1298    try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000");
1299
1300    try enc.encode(.mov, &.{
1301        .{ .reg = .r12 },
1302        .{ .imm = .u(0x1000) },
1303    });
1304    try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000");
1305
1306    try enc.encode(.mov, &.{
1307        .{ .reg = .rax },
1308        .{ .imm = .u(0x10) },
1309    });
1310    try expectEqualHexStrings("\x48\xc7\xc0\x10\x00\x00\x00", enc.code(), "mov rax, 0x10");
1311
1312    try enc.encode(.mov, &.{
1313        .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .r11 } }) },
1314        .{ .imm = .u(0x10) },
1315    });
1316    try expectEqualHexStrings("\x41\xc7\x03\x10\x00\x00\x00", enc.code(), "mov DWORD PTR [r11], 0x10");
1317
1318    try enc.encode(.mov, &.{
1319        .{ .mem = Instruction.Memory.initRip(.qword, 0x10) },
1320        .{ .imm = .u(0x10) },
1321    });
1322    try expectEqualHexStrings(
1323        "\x48\xC7\x05\x10\x00\x00\x00\x10\x00\x00\x00",
1324        enc.code(),
1325        "mov QWORD PTR [rip + 0x10], 0x10",
1326    );
1327
1328    try enc.encode(.mov, &.{
1329        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .rbp }, .disp = -8 }) },
1330        .{ .imm = .u(0x10) },
1331    });
1332    try expectEqualHexStrings("\x48\xc7\x45\xf8\x10\x00\x00\x00", enc.code(), "mov QWORD PTR [rbp - 8], 0x10");
1333
1334    try enc.encode(.mov, &.{
1335        .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp }, .disp = -2 }) },
1336        .{ .imm = .s(-16) },
1337    });
1338    try expectEqualHexStrings("\x66\xC7\x45\xFE\xF0\xFF", enc.code(), "mov WORD PTR [rbp - 2], -16");
1339
1340    try enc.encode(.mov, &.{
1341        .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .rbp }, .disp = -1 }) },
1342        .{ .imm = .u(0x10) },
1343    });
1344    try expectEqualHexStrings("\xC6\x45\xFF\x10", enc.code(), "mov BYTE PTR [rbp - 1], 0x10");
1345
1346    try enc.encode(.mov, &.{
1347        .{ .mem = Instruction.Memory.initSib(.qword, .{
1348            .base = .{ .reg = .ds },
1349            .disp = 0x10000000,
1350            .scale_index = .{ .scale = 2, .index = .rcx },
1351        }) },
1352        .{ .imm = .u(0x10) },
1353    });
1354    try expectEqualHexStrings(
1355        "\x48\xC7\x04\x4D\x00\x00\x00\x10\x10\x00\x00\x00",
1356        enc.code(),
1357        "mov QWORD PTR [rcx*2 + 0x10000000], 0x10",
1358    );
1359
1360    try enc.encode(.adc, &.{
1361        .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .rbp }, .disp = -0x10 }) },
1362        .{ .imm = .u(0x10) },
1363    });
1364    try expectEqualHexStrings("\x80\x55\xF0\x10", enc.code(), "adc BYTE PTR [rbp - 0x10], 0x10");
1365
1366    try enc.encode(.adc, &.{
1367        .{ .mem = Instruction.Memory.initRip(.qword, 0) },
1368        .{ .imm = .u(0x10) },
1369    });
1370    try expectEqualHexStrings("\x48\x83\x15\x00\x00\x00\x00\x10", enc.code(), "adc QWORD PTR [rip], 0x10");
1371
1372    try enc.encode(.adc, &.{
1373        .{ .reg = .rax },
1374        .{ .imm = .u(0x10) },
1375    });
1376    try expectEqualHexStrings("\x48\x83\xD0\x10", enc.code(), "adc rax, 0x10");
1377
1378    try enc.encode(.add, &.{
1379        .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .rdx }, .disp = -8 }) },
1380        .{ .imm = .u(0x10) },
1381    });
1382    try expectEqualHexStrings("\x83\x42\xF8\x10", enc.code(), "add DWORD PTR [rdx - 8], 0x10");
1383
1384    try enc.encode(.add, &.{
1385        .{ .reg = .rax },
1386        .{ .imm = .u(0x10) },
1387    });
1388    try expectEqualHexStrings("\x48\x83\xC0\x10", enc.code(), "add rax, 0x10");
1389
1390    try enc.encode(.add, &.{
1391        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .rbp }, .disp = -0x10 }) },
1392        .{ .imm = .s(-0x10) },
1393    });
1394    try expectEqualHexStrings("\x48\x83\x45\xF0\xF0", enc.code(), "add QWORD PTR [rbp - 0x10], -0x10");
1395
1396    try enc.encode(.@"and", &.{
1397        .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .ds }, .disp = 0x10000000 }) },
1398        .{ .imm = .u(0x10) },
1399    });
1400    try expectEqualHexStrings(
1401        "\x83\x24\x25\x00\x00\x00\x10\x10",
1402        enc.code(),
1403        "and DWORD PTR ds:0x10000000, 0x10",
1404    );
1405
1406    try enc.encode(.@"and", &.{
1407        .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .es }, .disp = 0x10000000 }) },
1408        .{ .imm = .u(0x10) },
1409    });
1410    try expectEqualHexStrings(
1411        "\x26\x83\x24\x25\x00\x00\x00\x10\x10",
1412        enc.code(),
1413        "and DWORD PTR es:0x10000000, 0x10",
1414    );
1415
1416    try enc.encode(.@"and", &.{
1417        .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .r12 }, .disp = 0x10000000 }) },
1418        .{ .imm = .u(0x10) },
1419    });
1420    try expectEqualHexStrings(
1421        "\x41\x83\xA4\x24\x00\x00\x00\x10\x10",
1422        enc.code(),
1423        "and DWORD PTR [r12 + 0x10000000], 0x10",
1424    );
1425
1426    try enc.encode(.sub, &.{
1427        .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .r11 }, .disp = 0x10000000 }) },
1428        .{ .imm = .u(0x10) },
1429    });
1430    try expectEqualHexStrings(
1431        "\x41\x83\xAB\x00\x00\x00\x10\x10",
1432        enc.code(),
1433        "sub DWORD PTR [r11 + 0x10000000], 0x10",
1434    );
1435}
1436
1437test "lower RM encoding" {
1438    var enc = TestEncode{};
1439
1440    try enc.encode(.mov, &.{
1441        .{ .reg = .rax },
1442        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .r11 } }) },
1443    });
1444    try expectEqualHexStrings("\x49\x8b\x03", enc.code(), "mov rax, QWORD PTR [r11]");
1445
1446    try enc.encode(.mov, &.{
1447        .{ .reg = .rbx },
1448        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .ds }, .disp = 0x10 }) },
1449    });
1450    try expectEqualHexStrings("\x48\x8B\x1C\x25\x10\x00\x00\x00", enc.code(), "mov rbx, QWORD PTR ds:0x10");
1451
1452    try enc.encode(.mov, &.{
1453        .{ .reg = .rax },
1454        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .rbp }, .disp = -4 }) },
1455    });
1456    try expectEqualHexStrings("\x48\x8B\x45\xFC", enc.code(), "mov rax, QWORD PTR [rbp - 4]");
1457
1458    try enc.encode(.mov, &.{
1459        .{ .reg = .rax },
1460        .{ .mem = Instruction.Memory.initSib(.qword, .{
1461            .base = .{ .reg = .rbp },
1462            .scale_index = .{ .scale = 1, .index = .rcx },
1463            .disp = -8,
1464        }) },
1465    });
1466    try expectEqualHexStrings("\x48\x8B\x44\x0D\xF8", enc.code(), "mov rax, QWORD PTR [rbp + rcx*1 - 8]");
1467
1468    try enc.encode(.mov, &.{
1469        .{ .reg = .eax },
1470        .{ .mem = Instruction.Memory.initSib(.dword, .{
1471            .base = .{ .reg = .rbp },
1472            .scale_index = .{ .scale = 4, .index = .rdx },
1473            .disp = -4,
1474        }) },
1475    });
1476    try expectEqualHexStrings("\x8B\x44\x95\xFC", enc.code(), "mov eax, dword ptr [rbp + rdx*4 - 4]");
1477
1478    try enc.encode(.mov, &.{
1479        .{ .reg = .rax },
1480        .{ .mem = Instruction.Memory.initSib(.qword, .{
1481            .base = .{ .reg = .rbp },
1482            .scale_index = .{ .scale = 8, .index = .rcx },
1483            .disp = -8,
1484        }) },
1485    });
1486    try expectEqualHexStrings("\x48\x8B\x44\xCD\xF8", enc.code(), "mov rax, QWORD PTR [rbp + rcx*8 - 8]");
1487
1488    try enc.encode(.mov, &.{
1489        .{ .reg = .r8b },
1490        .{ .mem = Instruction.Memory.initSib(.byte, .{
1491            .base = .{ .reg = .rsi },
1492            .scale_index = .{ .scale = 1, .index = .rcx },
1493            .disp = -24,
1494        }) },
1495    });
1496    try expectEqualHexStrings("\x44\x8A\x44\x0E\xE8", enc.code(), "mov r8b, BYTE PTR [rsi + rcx*1 - 24]");
1497
1498    // TODO this mnemonic needs cleanup as some prefixes are obsolete.
1499    try enc.encode(.mov, &.{
1500        .{ .reg = .rax },
1501        .{ .reg = .cs },
1502    });
1503    try expectEqualHexStrings("\x48\x8C\xC8", enc.code(), "mov rax, cs");
1504
1505    try enc.encode(.mov, &.{
1506        .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp }, .disp = -16 }) },
1507        .{ .reg = .fs },
1508    });
1509    try expectEqualHexStrings("\x8C\x65\xF0", enc.code(), "mov WORD PTR [rbp - 16], fs");
1510
1511    try enc.encode(.mov, &.{
1512        .{ .reg = .r12w },
1513        .{ .reg = .cs },
1514    });
1515    try expectEqualHexStrings("\x66\x41\x8C\xCC", enc.code(), "mov r12w, cs");
1516
1517    try enc.encode(.movsx, &.{
1518        .{ .reg = .eax },
1519        .{ .reg = .bx },
1520    });
1521    try expectEqualHexStrings("\x0F\xBF\xC3", enc.code(), "movsx eax, bx");
1522
1523    try enc.encode(.movsx, &.{
1524        .{ .reg = .eax },
1525        .{ .reg = .bl },
1526    });
1527    try expectEqualHexStrings("\x0F\xBE\xC3", enc.code(), "movsx eax, bl");
1528
1529    try enc.encode(.movsx, &.{
1530        .{ .reg = .ax },
1531        .{ .reg = .bl },
1532    });
1533    try expectEqualHexStrings("\x66\x0F\xBE\xC3", enc.code(), "movsx ax, bl");
1534
1535    try enc.encode(.movsx, &.{
1536        .{ .reg = .eax },
1537        .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp } }) },
1538    });
1539    try expectEqualHexStrings("\x0F\xBF\x45\x00", enc.code(), "movsx eax, BYTE PTR [rbp]");
1540
1541    try enc.encode(.movsx, &.{
1542        .{ .reg = .eax },
1543        .{ .mem = Instruction.Memory.initSib(.byte, .{ .scale_index = .{ .index = .rax, .scale = 2 } }) },
1544    });
1545    try expectEqualHexStrings("\x0F\xBE\x04\x45\x00\x00\x00\x00", enc.code(), "movsx eax, BYTE PTR [rax * 2]");
1546
1547    try enc.encode(.movsx, &.{
1548        .{ .reg = .ax },
1549        .{ .mem = Instruction.Memory.initRip(.byte, 0x10) },
1550    });
1551    try expectEqualHexStrings("\x66\x0F\xBE\x05\x10\x00\x00\x00", enc.code(), "movsx ax, BYTE PTR [rip + 0x10]");
1552
1553    try enc.encode(.movsx, &.{
1554        .{ .reg = .rax },
1555        .{ .reg = .bx },
1556    });
1557    try expectEqualHexStrings("\x48\x0F\xBF\xC3", enc.code(), "movsx rax, bx");
1558
1559    try enc.encode(.movsxd, &.{
1560        .{ .reg = .rax },
1561        .{ .reg = .ebx },
1562    });
1563    try expectEqualHexStrings("\x48\x63\xC3", enc.code(), "movsxd rax, ebx");
1564
1565    try enc.encode(.lea, &.{
1566        .{ .reg = .rax },
1567        .{ .mem = Instruction.Memory.initRip(.qword, 0x10) },
1568    });
1569    try expectEqualHexStrings("\x48\x8D\x05\x10\x00\x00\x00", enc.code(), "lea rax, QWORD PTR [rip + 0x10]");
1570
1571    try enc.encode(.lea, &.{
1572        .{ .reg = .rax },
1573        .{ .mem = Instruction.Memory.initRip(.dword, 0x10) },
1574    });
1575    try expectEqualHexStrings("\x48\x8D\x05\x10\x00\x00\x00", enc.code(), "lea rax, DWORD PTR [rip + 0x10]");
1576
1577    try enc.encode(.lea, &.{
1578        .{ .reg = .eax },
1579        .{ .mem = Instruction.Memory.initRip(.dword, 0x10) },
1580    });
1581    try expectEqualHexStrings("\x8D\x05\x10\x00\x00\x00", enc.code(), "lea eax, DWORD PTR [rip + 0x10]");
1582
1583    try enc.encode(.lea, &.{
1584        .{ .reg = .eax },
1585        .{ .mem = Instruction.Memory.initRip(.word, 0x10) },
1586    });
1587    try expectEqualHexStrings("\x8D\x05\x10\x00\x00\x00", enc.code(), "lea eax, WORD PTR [rip + 0x10]");
1588
1589    try enc.encode(.lea, &.{
1590        .{ .reg = .ax },
1591        .{ .mem = Instruction.Memory.initRip(.byte, 0x10) },
1592    });
1593    try expectEqualHexStrings("\x66\x8D\x05\x10\x00\x00\x00", enc.code(), "lea ax, BYTE PTR [rip + 0x10]");
1594
1595    try enc.encode(.lea, &.{
1596        .{ .reg = .rsi },
1597        .{ .mem = Instruction.Memory.initSib(.qword, .{
1598            .base = .{ .reg = .rbp },
1599            .scale_index = .{ .scale = 1, .index = .rcx },
1600        }) },
1601    });
1602    try expectEqualHexStrings("\x48\x8D\x74\x0D\x00", enc.code(), "lea rsi, QWORD PTR [rbp + rcx*1 + 0]");
1603
1604    try enc.encode(.add, &.{
1605        .{ .reg = .r11 },
1606        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .ds }, .disp = 0x10000000 }) },
1607    });
1608    try expectEqualHexStrings("\x4C\x03\x1C\x25\x00\x00\x00\x10", enc.code(), "add r11, QWORD PTR ds:0x10000000");
1609
1610    try enc.encode(.add, &.{
1611        .{ .reg = .r12b },
1612        .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .ds }, .disp = 0x10000000 }) },
1613    });
1614    try expectEqualHexStrings("\x44\x02\x24\x25\x00\x00\x00\x10", enc.code(), "add r11b, BYTE PTR ds:0x10000000");
1615
1616    try enc.encode(.add, &.{
1617        .{ .reg = .r12b },
1618        .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .fs }, .disp = 0x10000000 }) },
1619    });
1620    try expectEqualHexStrings("\x64\x44\x02\x24\x25\x00\x00\x00\x10", enc.code(), "add r11b, BYTE PTR fs:0x10000000");
1621
1622    try enc.encode(.sub, &.{
1623        .{ .reg = .r11 },
1624        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .r13 }, .disp = 0x10000000 }) },
1625    });
1626    try expectEqualHexStrings("\x4D\x2B\x9D\x00\x00\x00\x10", enc.code(), "sub r11, QWORD PTR [r13 + 0x10000000]");
1627
1628    try enc.encode(.sub, &.{
1629        .{ .reg = .r11 },
1630        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .r12 }, .disp = 0x10000000 }) },
1631    });
1632    try expectEqualHexStrings("\x4D\x2B\x9C\x24\x00\x00\x00\x10", enc.code(), "sub r11, QWORD PTR [r12 + 0x10000000]");
1633
1634    try enc.encode(.imul, &.{
1635        .{ .reg = .r11 },
1636        .{ .reg = .r12 },
1637    });
1638    try expectEqualHexStrings("\x4D\x0F\xAF\xDC", enc.code(), "mov r11, r12");
1639}
1640
1641test "lower RMI encoding" {
1642    var enc = TestEncode{};
1643
1644    try enc.encode(.imul, &.{
1645        .{ .reg = .r11 },
1646        .{ .reg = .r12 },
1647        .{ .imm = .s(-2) },
1648    });
1649    try expectEqualHexStrings("\x4D\x6B\xDC\xFE", enc.code(), "imul r11, r12, -2");
1650
1651    try enc.encode(.imul, &.{
1652        .{ .reg = .r11 },
1653        .{ .mem = Instruction.Memory.initRip(.qword, -16) },
1654        .{ .imm = .s(-1024) },
1655    });
1656    try expectEqualHexStrings(
1657        "\x4C\x69\x1D\xF0\xFF\xFF\xFF\x00\xFC\xFF\xFF",
1658        enc.code(),
1659        "imul r11, QWORD PTR [rip - 16], -1024",
1660    );
1661
1662    try enc.encode(.imul, &.{
1663        .{ .reg = .bx },
1664        .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp }, .disp = -16 }) },
1665        .{ .imm = .s(-1024) },
1666    });
1667    try expectEqualHexStrings(
1668        "\x66\x69\x5D\xF0\x00\xFC",
1669        enc.code(),
1670        "imul bx, WORD PTR [rbp - 16], -1024",
1671    );
1672
1673    try enc.encode(.imul, &.{
1674        .{ .reg = .bx },
1675        .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp }, .disp = -16 }) },
1676        .{ .imm = .u(1024) },
1677    });
1678    try expectEqualHexStrings(
1679        "\x66\x69\x5D\xF0\x00\x04",
1680        enc.code(),
1681        "imul bx, WORD PTR [rbp - 16], 1024",
1682    );
1683}
1684
1685test "lower MR encoding" {
1686    var enc = TestEncode{};
1687
1688    try enc.encode(.mov, &.{
1689        .{ .reg = .rax },
1690        .{ .reg = .rbx },
1691    });
1692    try expectEqualHexStrings("\x48\x89\xD8", enc.code(), "mov rax, rbx");
1693
1694    try enc.encode(.mov, &.{
1695        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .rbp }, .disp = -4 }) },
1696        .{ .reg = .r11 },
1697    });
1698    try expectEqualHexStrings("\x4c\x89\x5d\xfc", enc.code(), "mov QWORD PTR [rbp - 4], r11");
1699
1700    try enc.encode(.mov, &.{
1701        .{ .mem = Instruction.Memory.initRip(.qword, 0x10) },
1702        .{ .reg = .r12 },
1703    });
1704    try expectEqualHexStrings("\x4C\x89\x25\x10\x00\x00\x00", enc.code(), "mov QWORD PTR [rip + 0x10], r12");
1705
1706    try enc.encode(.mov, &.{
1707        .{ .mem = Instruction.Memory.initSib(.qword, .{
1708            .base = .{ .reg = .r11 },
1709            .scale_index = .{ .scale = 2, .index = .r12 },
1710            .disp = 0x10,
1711        }) },
1712        .{ .reg = .r13 },
1713    });
1714    try expectEqualHexStrings("\x4F\x89\x6C\x63\x10", enc.code(), "mov QWORD PTR [r11 + 2 * r12 + 0x10], r13");
1715
1716    try enc.encode(.mov, &.{
1717        .{ .mem = Instruction.Memory.initRip(.word, -0x10) },
1718        .{ .reg = .r12w },
1719    });
1720    try expectEqualHexStrings("\x66\x44\x89\x25\xF0\xFF\xFF\xFF", enc.code(), "mov WORD PTR [rip - 0x10], r12w");
1721
1722    try enc.encode(.mov, &.{
1723        .{ .mem = Instruction.Memory.initSib(.byte, .{
1724            .base = .{ .reg = .r11 },
1725            .scale_index = .{ .scale = 2, .index = .r12 },
1726            .disp = 0x10,
1727        }) },
1728        .{ .reg = .r13b },
1729    });
1730    try expectEqualHexStrings("\x47\x88\x6C\x63\x10", enc.code(), "mov BYTE PTR [r11 + 2 * r12 + 0x10], r13b");
1731
1732    try enc.encode(.add, &.{
1733        .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .ds }, .disp = 0x10000000 }) },
1734        .{ .reg = .r12b },
1735    });
1736    try expectEqualHexStrings("\x44\x00\x24\x25\x00\x00\x00\x10", enc.code(), "add BYTE PTR ds:0x10000000, r12b");
1737
1738    try enc.encode(.add, &.{
1739        .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .ds }, .disp = 0x10000000 }) },
1740        .{ .reg = .r12d },
1741    });
1742    try expectEqualHexStrings("\x44\x01\x24\x25\x00\x00\x00\x10", enc.code(), "add DWORD PTR [ds:0x10000000], r12d");
1743
1744    try enc.encode(.add, &.{
1745        .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .gs }, .disp = 0x10000000 }) },
1746        .{ .reg = .r12d },
1747    });
1748    try expectEqualHexStrings("\x65\x44\x01\x24\x25\x00\x00\x00\x10", enc.code(), "add DWORD PTR [gs:0x10000000], r12d");
1749
1750    try enc.encode(.sub, &.{
1751        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .r11 }, .disp = 0x10000000 }) },
1752        .{ .reg = .r12 },
1753    });
1754    try expectEqualHexStrings("\x4D\x29\xA3\x00\x00\x00\x10", enc.code(), "sub QWORD PTR [r11 + 0x10000000], r12");
1755}
1756
1757test "lower M encoding" {
1758    var enc = TestEncode{};
1759
1760    try enc.encode(.call, &.{
1761        .{ .reg = .r12 },
1762    });
1763    try expectEqualHexStrings("\x41\xFF\xD4", enc.code(), "call r12");
1764
1765    try enc.encode(.call, &.{
1766        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .r12 } }) },
1767    });
1768    try expectEqualHexStrings("\x41\xFF\x14\x24", enc.code(), "call QWORD PTR [r12]");
1769
1770    try enc.encode(.call, &.{
1771        .{ .mem = Instruction.Memory.initSib(.qword, .{
1772            .base = .none,
1773            .scale_index = .{ .index = .r11, .scale = 2 },
1774        }) },
1775    });
1776    try expectEqualHexStrings("\x42\xFF\x14\x5D\x00\x00\x00\x00", enc.code(), "call QWORD PTR [r11 * 2]");
1777
1778    try enc.encode(.call, &.{
1779        .{ .mem = Instruction.Memory.initSib(.qword, .{
1780            .base = .none,
1781            .scale_index = .{ .index = .r12, .scale = 2 },
1782        }) },
1783    });
1784    try expectEqualHexStrings("\x42\xFF\x14\x65\x00\x00\x00\x00", enc.code(), "call QWORD PTR [r12 * 2]");
1785
1786    try enc.encode(.call, &.{
1787        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .gs } }) },
1788    });
1789    try expectEqualHexStrings("\x65\xFF\x14\x25\x00\x00\x00\x00", enc.code(), "call gs:0x0");
1790
1791    try enc.encode(.call, &.{
1792        .{ .imm = .s(0) },
1793    });
1794    try expectEqualHexStrings("\xE8\x00\x00\x00\x00", enc.code(), "call 0x0");
1795
1796    try enc.encode(.push, &.{
1797        .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .rbp } }) },
1798    });
1799    try expectEqualHexStrings("\xFF\x75\x00", enc.code(), "push QWORD PTR [rbp]");
1800
1801    try enc.encode(.push, &.{
1802        .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp } }) },
1803    });
1804    try expectEqualHexStrings("\x66\xFF\x75\x00", enc.code(), "push QWORD PTR [rbp]");
1805
1806    try enc.encode(.pop, &.{
1807        .{ .mem = Instruction.Memory.initRip(.qword, 0) },
1808    });
1809    try expectEqualHexStrings("\x8F\x05\x00\x00\x00\x00", enc.code(), "pop QWORD PTR [rip]");
1810
1811    try enc.encode(.pop, &.{
1812        .{ .mem = Instruction.Memory.initRip(.word, 0) },
1813    });
1814    try expectEqualHexStrings("\x66\x8F\x05\x00\x00\x00\x00", enc.code(), "pop WORD PTR [rbp]");
1815
1816    try enc.encode(.imul, &.{
1817        .{ .reg = .rax },
1818    });
1819    try expectEqualHexStrings("\x48\xF7\xE8", enc.code(), "imul rax");
1820
1821    try enc.encode(.imul, &.{
1822        .{ .reg = .r12 },
1823    });
1824    try expectEqualHexStrings("\x49\xF7\xEC", enc.code(), "imul r12");
1825}
1826
1827test "lower O encoding" {
1828    var enc = TestEncode{};
1829
1830    try enc.encode(.push, &.{
1831        .{ .reg = .rax },
1832    });
1833    try expectEqualHexStrings("\x50", enc.code(), "push rax");
1834
1835    try enc.encode(.push, &.{
1836        .{ .reg = .r12w },
1837    });
1838    try expectEqualHexStrings("\x66\x41\x54", enc.code(), "push r12w");
1839
1840    try enc.encode(.pop, &.{
1841        .{ .reg = .r12 },
1842    });
1843    try expectEqualHexStrings("\x41\x5c", enc.code(), "pop r12");
1844}
1845
1846test "lower OI encoding" {
1847    var enc = TestEncode{};
1848
1849    try enc.encode(.mov, &.{
1850        .{ .reg = .rax },
1851        .{ .imm = .u(0x1000000000000000) },
1852    });
1853    try expectEqualHexStrings(
1854        "\x48\xB8\x00\x00\x00\x00\x00\x00\x00\x10",
1855        enc.code(),
1856        "movabs rax, 0x1000000000000000",
1857    );
1858
1859    try enc.encode(.mov, &.{
1860        .{ .reg = .r11 },
1861        .{ .imm = .u(0x1000000000000000) },
1862    });
1863    try expectEqualHexStrings(
1864        "\x49\xBB\x00\x00\x00\x00\x00\x00\x00\x10",
1865        enc.code(),
1866        "movabs r11, 0x1000000000000000",
1867    );
1868
1869    try enc.encode(.mov, &.{
1870        .{ .reg = .r11d },
1871        .{ .imm = .u(0x10000000) },
1872    });
1873    try expectEqualHexStrings("\x41\xBB\x00\x00\x00\x10", enc.code(), "mov r11d, 0x10000000");
1874
1875    try enc.encode(.mov, &.{
1876        .{ .reg = .r11w },
1877        .{ .imm = .u(0x1000) },
1878    });
1879    try expectEqualHexStrings("\x66\x41\xBB\x00\x10", enc.code(), "mov r11w, 0x1000");
1880
1881    try enc.encode(.mov, &.{
1882        .{ .reg = .r11b },
1883        .{ .imm = .u(0x10) },
1884    });
1885    try expectEqualHexStrings("\x41\xB3\x10", enc.code(), "mov r11b, 0x10");
1886}
1887
1888test "lower FD/TD encoding" {
1889    var enc = TestEncode{};
1890
1891    try enc.encode(.mov, &.{
1892        .{ .reg = .rax },
1893        .{ .mem = Instruction.Memory.initMoffs(.cs, 0x10) },
1894    });
1895    try expectEqualHexStrings("\x2E\x48\xA1\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs rax, cs:0x10");
1896
1897    try enc.encode(.mov, &.{
1898        .{ .reg = .eax },
1899        .{ .mem = Instruction.Memory.initMoffs(.fs, 0x10) },
1900    });
1901    try expectEqualHexStrings("\x64\xA1\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs eax, fs:0x10");
1902
1903    try enc.encode(.mov, &.{
1904        .{ .reg = .ax },
1905        .{ .mem = Instruction.Memory.initMoffs(.gs, 0x10) },
1906    });
1907    try expectEqualHexStrings("\x65\x66\xA1\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs ax, gs:0x10");
1908
1909    try enc.encode(.mov, &.{
1910        .{ .reg = .al },
1911        .{ .mem = Instruction.Memory.initMoffs(.ds, 0x10) },
1912    });
1913    try expectEqualHexStrings("\xA0\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs al, ds:0x10");
1914
1915    try enc.encode(.mov, &.{
1916        .{ .mem = Instruction.Memory.initMoffs(.cs, 0x10) },
1917        .{ .reg = .rax },
1918    });
1919    try expectEqualHexStrings("\x2E\x48\xA3\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs cs:0x10, rax");
1920
1921    try enc.encode(.mov, &.{
1922        .{ .mem = Instruction.Memory.initMoffs(.fs, 0x10) },
1923        .{ .reg = .eax },
1924    });
1925    try expectEqualHexStrings("\x64\xA3\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs fs:0x10, eax");
1926
1927    try enc.encode(.mov, &.{
1928        .{ .mem = Instruction.Memory.initMoffs(.gs, 0x10) },
1929        .{ .reg = .ax },
1930    });
1931    try expectEqualHexStrings("\x65\x66\xA3\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs gs:0x10, ax");
1932
1933    try enc.encode(.mov, &.{
1934        .{ .mem = Instruction.Memory.initMoffs(.ds, 0x10) },
1935        .{ .reg = .al },
1936    });
1937    try expectEqualHexStrings("\xA2\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs ds:0x10, al");
1938}
1939
1940test "lower NP encoding" {
1941    var enc = TestEncode{};
1942
1943    try enc.encode(.int3, &.{});
1944    try expectEqualHexStrings("\xCC", enc.code(), "int3");
1945
1946    try enc.encode(.nop, &.{});
1947    try expectEqualHexStrings("\x90", enc.code(), "nop");
1948
1949    try enc.encode(.ret, &.{});
1950    try expectEqualHexStrings("\xC3", enc.code(), "ret");
1951
1952    try enc.encode(.syscall, &.{});
1953    try expectEqualHexStrings("\x0f\x05", enc.code(), "syscall");
1954}
1955
1956fn invalidInstruction(mnemonic: Instruction.Mnemonic, ops: []const Instruction.Operand) !void {
1957    const err: Instruction = .new(.none, mnemonic, ops);
1958    try testing.expectError(error.InvalidInstruction, err);
1959}
1960
1961test "invalid instruction" {
1962    try invalidInstruction(.call, &.{
1963        .{ .reg = .eax },
1964    });
1965    try invalidInstruction(.call, &.{
1966        .{ .reg = .ax },
1967    });
1968    try invalidInstruction(.call, &.{
1969        .{ .reg = .al },
1970    });
1971    try invalidInstruction(.call, &.{
1972        .{ .mem = Instruction.Memory.initRip(.dword, 0) },
1973    });
1974    try invalidInstruction(.call, &.{
1975        .{ .mem = Instruction.Memory.initRip(.word, 0) },
1976    });
1977    try invalidInstruction(.call, &.{
1978        .{ .mem = Instruction.Memory.initRip(.byte, 0) },
1979    });
1980    try invalidInstruction(.mov, &.{
1981        .{ .mem = Instruction.Memory.initRip(.word, 0x10) },
1982        .{ .reg = .r12 },
1983    });
1984    try invalidInstruction(.lea, &.{
1985        .{ .reg = .rax },
1986        .{ .reg = .rbx },
1987    });
1988    try invalidInstruction(.lea, &.{
1989        .{ .reg = .al },
1990        .{ .mem = Instruction.Memory.initRip(.byte, 0) },
1991    });
1992    try invalidInstruction(.pop, &.{
1993        .{ .reg = .r12b },
1994    });
1995    try invalidInstruction(.pop, &.{
1996        .{ .reg = .r12d },
1997    });
1998    try invalidInstruction(.push, &.{
1999        .{ .reg = .r12b },
2000    });
2001    try invalidInstruction(.push, &.{
2002        .{ .reg = .r12d },
2003    });
2004    try invalidInstruction(.push, &.{
2005        .{ .imm = .u(0x1000000000000000) },
2006    });
2007}
2008
2009fn cannotEncode(mnemonic: Instruction.Mnemonic, ops: []const Instruction.Operand) !void {
2010    try testing.expectError(error.CannotEncode, .new(.none, mnemonic, ops));
2011}
2012
2013test "cannot encode" {
2014    try cannotEncode(.@"test", &.{
2015        .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .r12 } }) },
2016        .{ .reg = .ah },
2017    });
2018    try cannotEncode(.@"test", &.{
2019        .{ .reg = .r11b },
2020        .{ .reg = .bh },
2021    });
2022    try cannotEncode(.mov, &.{
2023        .{ .reg = .sil },
2024        .{ .reg = .ah },
2025    });
2026}
2027
2028const Assembler = struct {
2029    it: Tokenizer,
2030
2031    const Tokenizer = struct {
2032        input: []const u8,
2033        pos: usize = 0,
2034
2035        const Error = error{InvalidToken};
2036
2037        const Token = struct {
2038            id: Id,
2039            start: usize,
2040            end: usize,
2041
2042            const Id = enum {
2043                eof,
2044
2045                space,
2046                new_line,
2047
2048                colon,
2049                comma,
2050                open_br,
2051                close_br,
2052                plus,
2053                minus,
2054                star,
2055
2056                string,
2057                numeral,
2058            };
2059        };
2060
2061        const Iterator = struct {};
2062
2063        fn next(it: *Tokenizer) !Token {
2064            var result = Token{
2065                .id = .eof,
2066                .start = it.pos,
2067                .end = it.pos,
2068            };
2069
2070            var state: enum {
2071                start,
2072                space,
2073                new_line,
2074                string,
2075                numeral,
2076                numeral_hex,
2077            } = .start;
2078
2079            while (it.pos < it.input.len) : (it.pos += 1) {
2080                const ch = it.input[it.pos];
2081                switch (state) {
2082                    .start => switch (ch) {
2083                        ',' => {
2084                            result.id = .comma;
2085                            it.pos += 1;
2086                            break;
2087                        },
2088                        ':' => {
2089                            result.id = .colon;
2090                            it.pos += 1;
2091                            break;
2092                        },
2093                        '[' => {
2094                            result.id = .open_br;
2095                            it.pos += 1;
2096                            break;
2097                        },
2098                        ']' => {
2099                            result.id = .close_br;
2100                            it.pos += 1;
2101                            break;
2102                        },
2103                        '+' => {
2104                            result.id = .plus;
2105                            it.pos += 1;
2106                            break;
2107                        },
2108                        '-' => {
2109                            result.id = .minus;
2110                            it.pos += 1;
2111                            break;
2112                        },
2113                        '*' => {
2114                            result.id = .star;
2115                            it.pos += 1;
2116                            break;
2117                        },
2118                        ' ', '\t' => state = .space,
2119                        '\n', '\r' => state = .new_line,
2120                        'a'...'z', 'A'...'Z' => state = .string,
2121                        '0'...'9' => state = .numeral,
2122                        else => return error.InvalidToken,
2123                    },
2124
2125                    .space => switch (ch) {
2126                        ' ', '\t' => {},
2127                        else => {
2128                            result.id = .space;
2129                            break;
2130                        },
2131                    },
2132
2133                    .new_line => switch (ch) {
2134                        '\n', '\r', ' ', '\t' => {},
2135                        else => {
2136                            result.id = .new_line;
2137                            break;
2138                        },
2139                    },
2140
2141                    .string => switch (ch) {
2142                        'a'...'z', 'A'...'Z', '0'...'9' => {},
2143                        else => {
2144                            result.id = .string;
2145                            break;
2146                        },
2147                    },
2148
2149                    .numeral => switch (ch) {
2150                        'x' => state = .numeral_hex,
2151                        '0'...'9' => {},
2152                        else => {
2153                            result.id = .numeral;
2154                            break;
2155                        },
2156                    },
2157
2158                    .numeral_hex => switch (ch) {
2159                        'a'...'f' => {},
2160                        '0'...'9' => {},
2161                        else => {
2162                            result.id = .numeral;
2163                            break;
2164                        },
2165                    },
2166                }
2167            }
2168
2169            if (it.pos >= it.input.len) {
2170                switch (state) {
2171                    .string => result.id = .string,
2172                    .numeral, .numeral_hex => result.id = .numeral,
2173                    else => {},
2174                }
2175            }
2176
2177            result.end = it.pos;
2178            return result;
2179        }
2180
2181        fn seekTo(it: *Tokenizer, pos: usize) void {
2182            it.pos = pos;
2183        }
2184    };
2185
2186    pub fn init(input: []const u8) Assembler {
2187        return .{
2188            .it = Tokenizer{ .input = input },
2189        };
2190    }
2191
2192    pub fn assemble(as: *Assembler, w: *Writer) !void {
2193        while (try as.next()) |parsed_inst| {
2194            const inst: Instruction = try .new(.none, parsed_inst.mnemonic, &parsed_inst.ops);
2195            try inst.encode(w, .{});
2196        }
2197    }
2198
2199    const ParseResult = struct {
2200        mnemonic: Instruction.Mnemonic,
2201        ops: [4]Instruction.Operand,
2202    };
2203
2204    const ParseError = error{
2205        UnexpectedToken,
2206        InvalidMnemonic,
2207        InvalidOperand,
2208        InvalidRegister,
2209        InvalidPtrSize,
2210        InvalidMemoryOperand,
2211        InvalidScaleIndex,
2212    } || Tokenizer.Error || std.fmt.ParseIntError;
2213
2214    fn next(as: *Assembler) ParseError!?ParseResult {
2215        try as.skip(2, .{ .space, .new_line });
2216        const mnemonic_tok = as.expect(.string) catch |err| switch (err) {
2217            error.UnexpectedToken => return if (try as.peek() == .eof) null else err,
2218            else => return err,
2219        };
2220        const mnemonic = mnemonicFromString(as.source(mnemonic_tok)) orelse
2221            return error.InvalidMnemonic;
2222        try as.skip(1, .{.space});
2223
2224        const rules = .{
2225            .{},
2226            .{.register},
2227            .{.memory},
2228            .{.immediate},
2229            .{ .register, .register },
2230            .{ .register, .memory },
2231            .{ .memory, .register },
2232            .{ .register, .immediate },
2233            .{ .memory, .immediate },
2234            .{ .register, .register, .immediate },
2235            .{ .register, .memory, .immediate },
2236        };
2237
2238        const pos = as.it.pos;
2239        inline for (rules) |rule| {
2240            var ops = [4]Instruction.Operand{ .none, .none, .none, .none };
2241            if (as.parseOperandRule(rule, &ops)) {
2242                return .{
2243                    .mnemonic = mnemonic,
2244                    .ops = ops,
2245                };
2246            } else |_| {
2247                as.it.seekTo(pos);
2248            }
2249        }
2250
2251        return error.InvalidOperand;
2252    }
2253
2254    fn source(as: *Assembler, token: Tokenizer.Token) []const u8 {
2255        return as.it.input[token.start..token.end];
2256    }
2257
2258    fn peek(as: *Assembler) Tokenizer.Error!Tokenizer.Token.Id {
2259        const pos = as.it.pos;
2260        const next_tok = try as.it.next();
2261        const id = next_tok.id;
2262        as.it.seekTo(pos);
2263        return id;
2264    }
2265
2266    fn expect(as: *Assembler, id: Tokenizer.Token.Id) ParseError!Tokenizer.Token {
2267        const next_tok_id = try as.peek();
2268        if (next_tok_id == id) return as.it.next();
2269        return error.UnexpectedToken;
2270    }
2271
2272    fn skip(as: *Assembler, comptime num: comptime_int, tok_ids: [num]Tokenizer.Token.Id) Tokenizer.Error!void {
2273        outer: while (true) {
2274            const pos = as.it.pos;
2275            const next_tok = try as.it.next();
2276            inline for (tok_ids) |tok_id| {
2277                if (next_tok.id == tok_id) continue :outer;
2278            }
2279            as.it.seekTo(pos);
2280            break;
2281        }
2282    }
2283
2284    fn mnemonicFromString(bytes: []const u8) ?Instruction.Mnemonic {
2285        const ti = @typeInfo(Instruction.Mnemonic).@"enum";
2286        inline for (ti.fields) |field| {
2287            if (std.mem.eql(u8, bytes, field.name)) {
2288                return @field(Instruction.Mnemonic, field.name);
2289            }
2290        }
2291        return null;
2292    }
2293
2294    fn parseOperandRule(as: *Assembler, rule: anytype, ops: *[4]Instruction.Operand) ParseError!void {
2295        inline for (rule, 0..) |cond, i| {
2296            comptime assert(i < 4);
2297            if (i > 0) {
2298                _ = try as.expect(.comma);
2299                try as.skip(1, .{.space});
2300            }
2301            if (@typeInfo(@TypeOf(cond)) != .enum_literal) {
2302                @compileError("invalid condition in the rule: " ++ @typeName(@TypeOf(cond)));
2303            }
2304            switch (cond) {
2305                .register => {
2306                    const reg_tok = try as.expect(.string);
2307                    const reg = registerFromString(as.source(reg_tok)) orelse
2308                        return error.InvalidOperand;
2309                    ops[i] = .{ .reg = reg };
2310                },
2311                .memory => {
2312                    const mem = try as.parseMemory();
2313                    ops[i] = .{ .mem = mem };
2314                },
2315                .immediate => {
2316                    const is_neg = if (as.expect(.minus)) |_| true else |_| false;
2317                    const imm_tok = try as.expect(.numeral);
2318                    const imm: Instruction.Immediate = if (is_neg) blk: {
2319                        const imm = try std.fmt.parseInt(i32, as.source(imm_tok), 0);
2320                        break :blk .{ .signed = imm * -1 };
2321                    } else .{ .unsigned = try std.fmt.parseInt(u64, as.source(imm_tok), 0) };
2322                    ops[i] = .{ .imm = imm };
2323                },
2324                else => @compileError("unhandled enum literal " ++ @tagName(cond)),
2325            }
2326            try as.skip(1, .{.space});
2327        }
2328
2329        try as.skip(1, .{.space});
2330        const tok = try as.it.next();
2331        switch (tok.id) {
2332            .new_line, .eof => {},
2333            else => return error.InvalidOperand,
2334        }
2335    }
2336
2337    fn registerFromString(bytes: []const u8) ?Register {
2338        const ti = @typeInfo(Register).@"enum";
2339        inline for (ti.fields) |field| {
2340            if (std.mem.eql(u8, bytes, field.name)) {
2341                return @field(Register, field.name);
2342            }
2343        }
2344        return null;
2345    }
2346
2347    fn parseMemory(as: *Assembler) ParseError!Instruction.Memory {
2348        const ptr_size: ?Instruction.Memory.PtrSize = blk: {
2349            const pos = as.it.pos;
2350            const ptr_size = as.parsePtrSize() catch |err| switch (err) {
2351                error.UnexpectedToken => {
2352                    as.it.seekTo(pos);
2353                    break :blk null;
2354                },
2355                else => return err,
2356            };
2357            break :blk ptr_size;
2358        };
2359
2360        try as.skip(1, .{.space});
2361
2362        // Supported rules and orderings.
2363        const rules = .{
2364            .{ .open_br, .general_purpose, .close_br }, // [ general_purpose ]
2365            .{ .open_br, .general_purpose, .plus, .disp, .close_br }, // [ general_purpose + disp ]
2366            .{ .open_br, .general_purpose, .minus, .disp, .close_br }, // [ general_purpose - disp ]
2367            .{ .open_br, .disp, .plus, .general_purpose, .close_br }, // [ disp + general_purpose ]
2368            .{ .open_br, .general_purpose, .plus, .index, .close_br }, // [ general_purpose + index ]
2369            .{ .open_br, .general_purpose, .plus, .index, .star, .scale, .close_br }, // [ general_purpose + index * scale ]
2370            .{ .open_br, .index, .star, .scale, .plus, .general_purpose, .close_br }, // [ index * scale + general_purpose ]
2371            .{ .open_br, .general_purpose, .plus, .index, .star, .scale, .plus, .disp, .close_br }, // [ general_purpose + index * scale + disp ]
2372            .{ .open_br, .general_purpose, .plus, .index, .star, .scale, .minus, .disp, .close_br }, // [ general_purpose + index * scale - disp ]
2373            .{ .open_br, .index, .star, .scale, .plus, .general_purpose, .plus, .disp, .close_br }, // [ index * scale + general_purpose + disp ]
2374            .{ .open_br, .index, .star, .scale, .plus, .general_purpose, .minus, .disp, .close_br }, // [ index * scale + general_purpose - disp ]
2375            .{ .open_br, .disp, .plus, .index, .star, .scale, .plus, .general_purpose, .close_br }, // [ disp + index * scale + general_purpose ]
2376            .{ .open_br, .disp, .plus, .general_purpose, .plus, .index, .star, .scale, .close_br }, // [ disp + general_purpose + index * scale ]
2377            .{ .open_br, .general_purpose, .plus, .disp, .plus, .index, .star, .scale, .close_br }, // [ general_purpose + disp + index * scale ]
2378            .{ .open_br, .general_purpose, .minus, .disp, .plus, .index, .star, .scale, .close_br }, // [ general_purpose - disp + index * scale ]
2379            .{ .open_br, .general_purpose, .plus, .disp, .plus, .scale, .star, .index, .close_br }, // [ general_purpose + disp + scale * index ]
2380            .{ .open_br, .general_purpose, .minus, .disp, .plus, .scale, .star, .index, .close_br }, // [ general_purpose - disp + scale * index ]
2381            .{ .open_br, .rip, .plus, .disp, .close_br }, // [ rip + disp ]
2382            .{ .open_br, .rip, .minus, .disp, .close_br }, // [ rig - disp ]
2383            .{ .segment, .colon, .disp }, // seg:disp
2384        };
2385
2386        const pos = as.it.pos;
2387        inline for (rules) |rule| {
2388            if (as.parseMemoryRule(rule)) |res| {
2389                if (res.rip) {
2390                    if (res.base != null or res.scale_index != null or res.offset != null)
2391                        return error.InvalidMemoryOperand;
2392                    return Instruction.Memory.initRip(ptr_size orelse .qword, res.disp orelse 0);
2393                }
2394                if (res.base) |base| {
2395                    if (res.rip)
2396                        return error.InvalidMemoryOperand;
2397                    if (res.offset) |offset| {
2398                        if (res.scale_index != null or res.disp != null)
2399                            return error.InvalidMemoryOperand;
2400                        return Instruction.Memory.initMoffs(base, offset);
2401                    }
2402                    return Instruction.Memory.initSib(ptr_size orelse .qword, .{
2403                        .base = .{ .reg = base },
2404                        .scale_index = res.scale_index,
2405                        .disp = res.disp orelse 0,
2406                    });
2407                }
2408                return error.InvalidMemoryOperand;
2409            } else |_| {
2410                as.it.seekTo(pos);
2411            }
2412        }
2413
2414        return error.InvalidOperand;
2415    }
2416
2417    const MemoryParseResult = struct {
2418        rip: bool = false,
2419        base: ?Register = null,
2420        scale_index: ?Instruction.Memory.ScaleIndex = null,
2421        disp: ?i32 = null,
2422        offset: ?u64 = null,
2423    };
2424
2425    fn parseMemoryRule(as: *Assembler, rule: anytype) ParseError!MemoryParseResult {
2426        var res: MemoryParseResult = .{};
2427        inline for (rule, 0..) |cond, i| {
2428            if (@typeInfo(@TypeOf(cond)) != .enum_literal) {
2429                @compileError("unsupported condition type in the rule: " ++ @typeName(@TypeOf(cond)));
2430            }
2431            switch (cond) {
2432                .open_br, .close_br, .plus, .minus, .star, .colon => {
2433                    _ = try as.expect(cond);
2434                },
2435                .general_purpose, .segment => {
2436                    const tok = try as.expect(.string);
2437                    const base = registerFromString(as.source(tok)) orelse return error.InvalidMemoryOperand;
2438                    if (!base.isClass(cond)) return error.InvalidMemoryOperand;
2439                    res.base = base;
2440                },
2441                .rip => {
2442                    const tok = try as.expect(.string);
2443                    if (!std.mem.eql(u8, as.source(tok), "rip")) return error.InvalidMemoryOperand;
2444                    res.rip = true;
2445                },
2446                .index => {
2447                    const tok = try as.expect(.string);
2448                    const index = registerFromString(as.source(tok)) orelse
2449                        return error.InvalidMemoryOperand;
2450                    if (res.scale_index) |*si| {
2451                        si.index = index;
2452                    } else {
2453                        res.scale_index = .{ .scale = 1, .index = index };
2454                    }
2455                },
2456                .scale => {
2457                    const tok = try as.expect(.numeral);
2458                    const scale = try std.fmt.parseInt(u2, as.source(tok), 0);
2459                    if (res.scale_index) |*si| {
2460                        si.scale = scale;
2461                    } else {
2462                        res.scale_index = .{ .scale = scale, .index = undefined };
2463                    }
2464                },
2465                .disp => {
2466                    const tok = try as.expect(.numeral);
2467                    const is_neg = blk: {
2468                        if (i > 0) {
2469                            if (rule[i - 1] == .minus) break :blk true;
2470                        }
2471                        break :blk false;
2472                    };
2473                    if (std.fmt.parseInt(i32, as.source(tok), 0)) |disp| {
2474                        res.disp = if (is_neg) -1 * disp else disp;
2475                    } else |err| switch (err) {
2476                        error.Overflow => {
2477                            if (is_neg) return err;
2478                            if (res.base) |base| {
2479                                if (!base.isClass(.segment)) return err;
2480                            }
2481                            const offset = try std.fmt.parseInt(u64, as.source(tok), 0);
2482                            res.offset = offset;
2483                        },
2484                        else => return err,
2485                    }
2486                },
2487                else => @compileError("unhandled operand output type: " ++ @tagName(cond)),
2488            }
2489            try as.skip(1, .{.space});
2490        }
2491        return res;
2492    }
2493
2494    fn parsePtrSize(as: *Assembler) ParseError!Instruction.Memory.PtrSize {
2495        const size = try as.expect(.string);
2496        try as.skip(1, .{.space});
2497        const ptr = try as.expect(.string);
2498
2499        const size_raw = as.source(size);
2500        const ptr_raw = as.source(ptr);
2501        const len = size_raw.len + ptr_raw.len + 1;
2502        var buf: ["qword ptr".len]u8 = undefined;
2503        if (len > buf.len) return error.InvalidPtrSize;
2504
2505        for (size_raw, 0..) |c, i| {
2506            buf[i] = std.ascii.toLower(c);
2507        }
2508        buf[size_raw.len] = ' ';
2509        for (ptr_raw, 0..) |c, i| {
2510            buf[size_raw.len + i + 1] = std.ascii.toLower(c);
2511        }
2512
2513        const slice = buf[0..len];
2514        if (std.mem.eql(u8, slice, "qword ptr")) return .qword;
2515        if (std.mem.eql(u8, slice, "dword ptr")) return .dword;
2516        if (std.mem.eql(u8, slice, "word ptr")) return .word;
2517        if (std.mem.eql(u8, slice, "byte ptr")) return .byte;
2518        if (std.mem.eql(u8, slice, "tbyte ptr")) return .tbyte;
2519        return error.InvalidPtrSize;
2520    }
2521};
2522
2523test "assemble" {
2524    const input =
2525        \\int3
2526        \\mov rax, rbx
2527        \\mov qword ptr [rbp], rax
2528        \\mov qword ptr [rbp - 16], rax
2529        \\mov qword ptr [16 + rbp], rax
2530        \\mov rax, 0x10
2531        \\mov byte ptr [rbp - 0x10], 0x10
2532        \\mov word ptr [rbp + r12], r11w
2533        \\mov word ptr [rbp + r12 * 2], r11w
2534        \\mov word ptr [rbp + r12 * 2 - 16], r11w
2535        \\mov dword ptr [rip - 16], r12d
2536        \\mov rax, fs:0x0
2537        \\mov rax, gs:0x1000000000000000
2538        \\movzx r12, al
2539        \\imul r12, qword ptr [rbp - 16], 6
2540        \\jmp 0x0
2541        \\jc 0x0
2542        \\jb 0x0
2543        \\sal rax, 1
2544        \\sal rax, 63
2545        \\shl rax, 63
2546        \\sar rax, 63
2547        \\shr rax, 63
2548        \\test byte ptr [rbp - 16], r12b
2549        \\sal r12, cl
2550        \\mul qword ptr [rip - 16]
2551        \\div r12
2552        \\idiv byte ptr [rbp - 16]
2553        \\cwde
2554        \\cbw
2555        \\cdqe
2556        \\test byte ptr [rbp], ah
2557        \\test byte ptr [r12], spl
2558        \\cdq
2559        \\cwd
2560        \\cqo
2561        \\test bl, 0x1
2562        \\mov rbx,0x8000000000000000
2563        \\movss xmm0, dword ptr [rbp]
2564        \\movss xmm0, xmm1
2565        \\movss dword ptr [rbp - 16 + rax * 2], xmm7
2566        \\movss dword ptr [rbp - 16 + rax * 2], xmm8
2567        \\movss xmm15, xmm9
2568        \\movsd xmm8, qword ptr [rbp - 16]
2569        \\movsd qword ptr [rbp - 8], xmm0
2570        \\movq xmm8, qword ptr [rbp - 16]
2571        \\movq qword ptr [rbp - 16], xmm8
2572        \\ucomisd xmm0, qword ptr [rbp - 16]
2573        \\fisttp qword ptr [rbp - 16]
2574        \\fisttp word ptr [rip + 32]
2575        \\fisttp dword ptr [rax]
2576        \\fld tbyte ptr [rbp]
2577        \\fld dword ptr [rbp]
2578        \\xor bl, 0xff
2579        \\ud2
2580        \\add rsp, -1
2581        \\add rsp, 0xff
2582        \\mov sil, byte ptr [rax + rcx * 1]
2583        \\
2584    ;
2585
2586    // zig fmt: off
2587    const expected = &[_]u8{
2588        0xCC,
2589        0x48, 0x89, 0xD8,
2590        0x48, 0x89, 0x45, 0x00,
2591        0x48, 0x89, 0x45, 0xF0,
2592        0x48, 0x89, 0x45, 0x10,
2593        0x48, 0xC7, 0xC0, 0x10, 0x00, 0x00, 0x00,
2594        0xC6, 0x45, 0xF0, 0x10,
2595        0x66, 0x46, 0x89, 0x5C, 0x25, 0x00,
2596        0x66, 0x46, 0x89, 0x5C, 0x65, 0x00,
2597        0x66, 0x46, 0x89, 0x5C, 0x65, 0xF0,
2598        0x44, 0x89, 0x25, 0xF0, 0xFF, 0xFF, 0xFF,
2599        0x64, 0x48, 0x8B, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00,
2600        0x65, 0x48, 0xA1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
2601        0x4C, 0x0F, 0xB6, 0xE0,
2602        0x4C, 0x6B, 0x65, 0xF0, 0x06,
2603        0xE9, 0x00, 0x00, 0x00, 0x00,
2604        0x0F, 0x82, 0x00, 0x00, 0x00, 0x00,
2605        0x0F, 0x82, 0x00, 0x00, 0x00, 0x00,
2606        0x48, 0xD1, 0xE0,
2607        0x48, 0xC1, 0xE0, 0x3F,
2608        0x48, 0xC1, 0xE0, 0x3F,
2609        0x48, 0xC1, 0xF8, 0x3F,
2610        0x48, 0xC1, 0xE8, 0x3F,
2611        0x44, 0x84, 0x65, 0xF0,
2612        0x49, 0xD3, 0xE4,
2613        0x48, 0xF7, 0x25, 0xF0, 0xFF, 0xFF, 0xFF,
2614        0x49, 0xF7, 0xF4,
2615        0xF6, 0x7D, 0xF0,
2616        0x98,
2617        0x66, 0x98,
2618        0x48, 0x98,
2619        0x84, 0x65, 0x00,
2620        0x41, 0x84, 0x24, 0x24,
2621        0x99,
2622        0x66, 0x99,
2623        0x48, 0x99,
2624        0xF6, 0xC3, 0x01,
2625        0x48, 0xBB, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
2626        0xF3, 0x0F, 0x10, 0x45, 0x00,
2627        0xF3, 0x0F, 0x10, 0xC1,
2628        0xF3, 0x0F, 0x11, 0x7C, 0x45, 0xF0,
2629        0xF3, 0x44, 0x0F, 0x11, 0x44, 0x45, 0xF0,
2630        0xF3, 0x45, 0x0F, 0x10, 0xF9,
2631        0xF2, 0x44, 0x0F, 0x10, 0x45, 0xF0,
2632        0xF2, 0x0F, 0x11, 0x45, 0xF8,
2633        0x66, 0x4C, 0x0F, 0x6E, 0x45, 0xF0,
2634        0x66, 0x4C, 0x0F, 0x7E, 0x45, 0xF0,
2635        0x66, 0x0F, 0x2E, 0x45, 0xF0,
2636        0xDD, 0x4D, 0xF0,
2637        0xDF, 0x0D, 0x20, 0x00, 0x00, 0x00,
2638        0xDB, 0x08,
2639        0xDB, 0x6D, 0x00,
2640        0xD9, 0x45, 0x00,
2641        0x80, 0xF3, 0xFF,
2642        0x0F, 0x0B,
2643        0x48, 0x83, 0xC4, 0xFF,
2644        0x48, 0x81, 0xC4, 0xFF, 0x00, 0x00, 0x00,
2645        0x40, 0x8A, 0x34, 0x08,
2646    };
2647    // zig fmt: on
2648
2649    var as = Assembler.init(input);
2650    var output = std.array_list.Managed(u8).init(testing.allocator);
2651    defer output.deinit();
2652    try as.assemble(output.writer());
2653    try expectEqualHexStrings(expected, output.items, input);
2654}
2655
2656test "assemble - Jcc" {
2657    const mnemonics = [_]struct { Instruction.Mnemonic, u8 }{
2658        .{ .ja, 0x87 },
2659        .{ .jae, 0x83 },
2660        .{ .jb, 0x82 },
2661        .{ .jbe, 0x86 },
2662        .{ .jc, 0x82 },
2663        .{ .je, 0x84 },
2664        .{ .jg, 0x8f },
2665        .{ .jge, 0x8d },
2666        .{ .jl, 0x8c },
2667        .{ .jle, 0x8e },
2668        .{ .jna, 0x86 },
2669        .{ .jnae, 0x82 },
2670        .{ .jnb, 0x83 },
2671        .{ .jnbe, 0x87 },
2672        .{ .jnc, 0x83 },
2673        .{ .jne, 0x85 },
2674        .{ .jng, 0x8e },
2675        .{ .jnge, 0x8c },
2676        .{ .jnl, 0x8d },
2677        .{ .jnle, 0x8f },
2678        .{ .jno, 0x81 },
2679        .{ .jnp, 0x8b },
2680        .{ .jns, 0x89 },
2681        .{ .jnz, 0x85 },
2682        .{ .jo, 0x80 },
2683        .{ .jp, 0x8a },
2684        .{ .jpe, 0x8a },
2685        .{ .jpo, 0x8b },
2686        .{ .js, 0x88 },
2687        .{ .jz, 0x84 },
2688    };
2689
2690    inline for (&mnemonics) |mnemonic| {
2691        const input = @tagName(mnemonic[0]) ++ " 0x0";
2692        const expected = [_]u8{ 0x0f, mnemonic[1], 0x0, 0x0, 0x0, 0x0 };
2693        var as = Assembler.init(input);
2694        var output = std.array_list.Managed(u8).init(testing.allocator);
2695        defer output.deinit();
2696        try as.assemble(output.writer());
2697        try expectEqualHexStrings(&expected, output.items, input);
2698    }
2699}
2700
2701test "assemble - SETcc" {
2702    const mnemonics = [_]struct { Instruction.Mnemonic, u8 }{
2703        .{ .seta, 0x97 },
2704        .{ .setae, 0x93 },
2705        .{ .setb, 0x92 },
2706        .{ .setbe, 0x96 },
2707        .{ .setc, 0x92 },
2708        .{ .sete, 0x94 },
2709        .{ .setg, 0x9f },
2710        .{ .setge, 0x9d },
2711        .{ .setl, 0x9c },
2712        .{ .setle, 0x9e },
2713        .{ .setna, 0x96 },
2714        .{ .setnae, 0x92 },
2715        .{ .setnb, 0x93 },
2716        .{ .setnbe, 0x97 },
2717        .{ .setnc, 0x93 },
2718        .{ .setne, 0x95 },
2719        .{ .setng, 0x9e },
2720        .{ .setnge, 0x9c },
2721        .{ .setnl, 0x9d },
2722        .{ .setnle, 0x9f },
2723        .{ .setno, 0x91 },
2724        .{ .setnp, 0x9b },
2725        .{ .setns, 0x99 },
2726        .{ .setnz, 0x95 },
2727        .{ .seto, 0x90 },
2728        .{ .setp, 0x9a },
2729        .{ .setpe, 0x9a },
2730        .{ .setpo, 0x9b },
2731        .{ .sets, 0x98 },
2732        .{ .setz, 0x94 },
2733    };
2734
2735    inline for (&mnemonics) |mnemonic| {
2736        const input = @tagName(mnemonic[0]) ++ " al";
2737        const expected = [_]u8{ 0x0f, mnemonic[1], 0xC0 };
2738        var as = Assembler.init(input);
2739        var output = std.array_list.Managed(u8).init(testing.allocator);
2740        defer output.deinit();
2741        try as.assemble(output.writer());
2742        try expectEqualHexStrings(&expected, output.items, input);
2743    }
2744}
2745
2746test "assemble - CMOVcc" {
2747    const mnemonics = [_]struct { Instruction.Mnemonic, u8 }{
2748        .{ .cmova, 0x47 },
2749        .{ .cmovae, 0x43 },
2750        .{ .cmovb, 0x42 },
2751        .{ .cmovbe, 0x46 },
2752        .{ .cmovc, 0x42 },
2753        .{ .cmove, 0x44 },
2754        .{ .cmovg, 0x4f },
2755        .{ .cmovge, 0x4d },
2756        .{ .cmovl, 0x4c },
2757        .{ .cmovle, 0x4e },
2758        .{ .cmovna, 0x46 },
2759        .{ .cmovnae, 0x42 },
2760        .{ .cmovnb, 0x43 },
2761        .{ .cmovnbe, 0x47 },
2762        .{ .cmovnc, 0x43 },
2763        .{ .cmovne, 0x45 },
2764        .{ .cmovng, 0x4e },
2765        .{ .cmovnge, 0x4c },
2766        .{ .cmovnl, 0x4d },
2767        .{ .cmovnle, 0x4f },
2768        .{ .cmovno, 0x41 },
2769        .{ .cmovnp, 0x4b },
2770        .{ .cmovns, 0x49 },
2771        .{ .cmovnz, 0x45 },
2772        .{ .cmovo, 0x40 },
2773        .{ .cmovp, 0x4a },
2774        .{ .cmovpe, 0x4a },
2775        .{ .cmovpo, 0x4b },
2776        .{ .cmovs, 0x48 },
2777        .{ .cmovz, 0x44 },
2778    };
2779
2780    inline for (&mnemonics) |mnemonic| {
2781        const input = @tagName(mnemonic[0]) ++ " rax, rbx";
2782        const expected = [_]u8{ 0x48, 0x0f, mnemonic[1], 0xC3 };
2783        var as = Assembler.init(input);
2784        var output = std.array_list.Managed(u8).init(testing.allocator);
2785        defer output.deinit();
2786        try as.assemble(output.writer());
2787        try expectEqualHexStrings(&expected, output.items, input);
2788    }
2789}