master
  1const Disassembler = @This();
  2
  3const std = @import("std");
  4const assert = std.debug.assert;
  5const math = std.math;
  6
  7const bits = @import("bits.zig");
  8const encoder = @import("encoder.zig");
  9
 10const Encoding = @import("Encoding.zig");
 11const Immediate = Instruction.Immediate;
 12const Instruction = encoder.Instruction;
 13const LegacyPrefixes = encoder.LegacyPrefixes;
 14const Memory = Instruction.Memory;
 15const Register = bits.Register;
 16const Rex = encoder.Rex;
 17
 18pub const Error = error{
 19    EndOfStream,
 20    /// After the TODO below is solved this will make sense.
 21    ReadFailed,
 22    LegacyPrefixAfterRex,
 23    UnknownOpcode,
 24    Overflow,
 25    Todo,
 26};
 27
 28// TODO these fields should be replaced by std.Io.Reader
 29
 30code: []const u8,
 31pos: usize = 0,
 32
 33pub fn init(code: []const u8) Disassembler {
 34    return .{ .code = code };
 35}
 36
 37pub fn next(dis: *Disassembler) Error!?Instruction {
 38    const prefixes = dis.parsePrefixes() catch |err| switch (err) {
 39        error.EndOfStream => return null,
 40        else => |e| return e,
 41    };
 42
 43    const enc = try dis.parseEncoding(prefixes) orelse return error.UnknownOpcode;
 44    switch (enc.data.op_en) {
 45        .z => return inst(enc, .{}),
 46        .o => {
 47            const reg_low_enc: u3 = @truncate(dis.code[dis.pos - 1]);
 48            return inst(enc, .{
 49                .op1 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) },
 50            });
 51        },
 52        .zo => {
 53            const reg_low_enc: u3 = @truncate(dis.code[dis.pos - 1]);
 54            return inst(enc, .{
 55                .op1 = .{ .reg = enc.data.ops[0].toReg() },
 56                .op2 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[1].regBitSize()) },
 57            });
 58        },
 59        .oz => {
 60            const reg_low_enc: u3 = @truncate(dis.code[dis.pos - 1]);
 61            return inst(enc, .{
 62                .op1 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) },
 63                .op2 = .{ .reg = enc.data.ops[1].toReg() },
 64            });
 65        },
 66        .oi => {
 67            const reg_low_enc: u3 = @truncate(dis.code[dis.pos - 1]);
 68            const imm = try dis.parseImm(enc.data.ops[1]);
 69            return inst(enc, .{
 70                .op1 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) },
 71                .op2 = .{ .imm = imm },
 72            });
 73        },
 74        .i, .d => {
 75            const imm = try dis.parseImm(enc.data.ops[0]);
 76            return inst(enc, .{
 77                .op1 = .{ .imm = imm },
 78            });
 79        },
 80        .zi => {
 81            const imm = try dis.parseImm(enc.data.ops[1]);
 82            return inst(enc, .{
 83                .op1 = .{ .reg = enc.data.ops[0].toReg() },
 84                .op2 = .{ .imm = imm },
 85            });
 86        },
 87        .ii => {
 88            const imm1 = try dis.parseImm(enc.data.ops[0]);
 89            const imm2 = try dis.parseImm(enc.data.ops[1]);
 90            return inst(enc, .{
 91                .op1 = .{ .imm = imm1 },
 92                .op2 = .{ .imm = imm2 },
 93            });
 94        },
 95        .ia => {
 96            const imm = try dis.parseImm(enc.data.ops[0]);
 97            return inst(enc, .{
 98                .op1 = .{ .imm = imm },
 99                .op2 = .{ .reg = .eax },
100            });
101        },
102        .m, .mi, .m1, .mc => {
103            const modrm = try dis.parseModRmByte();
104            const act_enc = Encoding.findByOpcode(enc.opcode(), .{
105                .legacy = prefixes.legacy,
106                .rex = prefixes.rex,
107            }, modrm.op1) orelse return error.UnknownOpcode;
108            const sib = if (modrm.sib()) try dis.parseSibByte() else null;
109
110            if (modrm.direct()) {
111                const op2: Instruction.Operand = switch (act_enc.data.op_en) {
112                    .mi => .{ .imm = try dis.parseImm(act_enc.data.ops[1]) },
113                    .m1 => .{ .imm = Immediate.u(1) },
114                    .mc => .{ .reg = .cl },
115                    .m => .none,
116                    else => unreachable,
117                };
118                return inst(act_enc, .{
119                    .op1 = .{ .reg = parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, act_enc.data.ops[0].regBitSize()) },
120                    .op2 = op2,
121                });
122            }
123
124            const disp = try dis.parseDisplacement(modrm, sib);
125            const op2: Instruction.Operand = switch (act_enc.data.op_en) {
126                .mi => .{ .imm = try dis.parseImm(act_enc.data.ops[1]) },
127                .m1 => .{ .imm = Immediate.u(1) },
128                .mc => .{ .reg = .cl },
129                .m => .none,
130                else => unreachable,
131            };
132
133            if (modrm.rip()) {
134                return inst(act_enc, .{
135                    .op1 = .{ .mem = Memory.initRip(Memory.PtrSize.fromBitSize(act_enc.data.ops[0].memBitSize()), disp) },
136                    .op2 = op2,
137                });
138            }
139
140            const scale_index = if (sib) |info| info.scaleIndex(prefixes.rex) else null;
141            const base = if (sib) |info|
142                info.baseReg(modrm, prefixes)
143            else
144                parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, 64);
145            return inst(act_enc, .{
146                .op1 = .{ .mem = Memory.initSib(Memory.PtrSize.fromBitSize(act_enc.data.ops[0].memBitSize()), .{
147                    .base = if (base) |base_reg| .{ .reg = base_reg } else .none,
148                    .scale_index = scale_index,
149                    .disp = disp,
150                }) },
151                .op2 = op2,
152            });
153        },
154        .fd => {
155            const seg = segmentRegister(prefixes.legacy);
156            const offset = try dis.parseOffset();
157            return inst(enc, .{
158                .op1 = .{ .reg = enc.data.ops[0].toReg() },
159                .op2 = .{ .mem = Memory.initMoffs(seg, offset) },
160            });
161        },
162        .td => {
163            const seg = segmentRegister(prefixes.legacy);
164            const offset = try dis.parseOffset();
165            return inst(enc, .{
166                .op1 = .{ .mem = Memory.initMoffs(seg, offset) },
167                .op2 = .{ .reg = enc.data.ops[1].toReg() },
168            });
169        },
170        .mr, .mri, .mrc => {
171            const modrm = try dis.parseModRmByte();
172            const sib = if (modrm.sib()) try dis.parseSibByte() else null;
173            const src_bit_size = enc.data.ops[1].regBitSize();
174
175            if (modrm.direct()) {
176                return inst(enc, .{
177                    .op1 = .{ .reg = parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) },
178                    .op2 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.x, prefixes.rex, src_bit_size) },
179                });
180            }
181
182            const dst_bit_size = enc.data.ops[0].memBitSize();
183            const disp = try dis.parseDisplacement(modrm, sib);
184            const op3: Instruction.Operand = switch (enc.data.op_en) {
185                .mri => .{ .imm = try dis.parseImm(enc.data.ops[2]) },
186                .mrc => .{ .reg = .cl },
187                .mr => .none,
188                else => unreachable,
189            };
190
191            if (modrm.rip()) {
192                return inst(enc, .{
193                    .op1 = .{ .mem = Memory.initRip(Memory.PtrSize.fromBitSize(dst_bit_size), disp) },
194                    .op2 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.r, prefixes.rex, src_bit_size) },
195                    .op3 = op3,
196                });
197            }
198
199            const scale_index = if (sib) |info| info.scaleIndex(prefixes.rex) else null;
200            const base = if (sib) |info|
201                info.baseReg(modrm, prefixes)
202            else
203                parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, 64);
204            return inst(enc, .{
205                .op1 = .{ .mem = Memory.initSib(Memory.PtrSize.fromBitSize(dst_bit_size), .{
206                    .base = if (base) |base_reg| .{ .reg = base_reg } else .none,
207                    .scale_index = scale_index,
208                    .disp = disp,
209                }) },
210                .op2 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.r, prefixes.rex, src_bit_size) },
211                .op3 = op3,
212            });
213        },
214        .rm, .rmi => {
215            const modrm = try dis.parseModRmByte();
216            const sib = if (modrm.sib()) try dis.parseSibByte() else null;
217            const dst_bit_size = enc.data.ops[0].regBitSize();
218
219            if (modrm.direct()) {
220                const op3: Instruction.Operand = switch (enc.data.op_en) {
221                    .rm => .none,
222                    .rmi => .{ .imm = try dis.parseImm(enc.data.ops[2]) },
223                    else => unreachable,
224                };
225                return inst(enc, .{
226                    .op1 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.x, prefixes.rex, dst_bit_size) },
227                    .op2 = .{ .reg = parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, enc.data.ops[1].regBitSize()) },
228                    .op3 = op3,
229                });
230            }
231
232            const src_bit_size = if (enc.data.ops[1] == .m) dst_bit_size else enc.data.ops[1].memBitSize();
233            const disp = try dis.parseDisplacement(modrm, sib);
234            const op3: Instruction.Operand = switch (enc.data.op_en) {
235                .rmi => .{ .imm = try dis.parseImm(enc.data.ops[2]) },
236                .rm => .none,
237                else => unreachable,
238            };
239
240            if (modrm.rip()) {
241                return inst(enc, .{
242                    .op1 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.r, prefixes.rex, dst_bit_size) },
243                    .op2 = .{ .mem = Memory.initRip(Memory.PtrSize.fromBitSize(src_bit_size), disp) },
244                    .op3 = op3,
245                });
246            }
247
248            const scale_index = if (sib) |info| info.scaleIndex(prefixes.rex) else null;
249            const base = if (sib) |info|
250                info.baseReg(modrm, prefixes)
251            else
252                parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, 64);
253            return inst(enc, .{
254                .op1 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.r, prefixes.rex, dst_bit_size) },
255                .op2 = .{ .mem = Memory.initSib(Memory.PtrSize.fromBitSize(src_bit_size), .{
256                    .base = if (base) |base_reg| .{ .reg = base_reg } else .none,
257                    .scale_index = scale_index,
258                    .disp = disp,
259                }) },
260                .op3 = op3,
261            });
262        },
263        .rm0, .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable, // TODO
264    }
265}
266
267fn inst(encoding: Encoding, args: struct {
268    prefix: Instruction.Prefix = .none,
269    op1: Instruction.Operand = .none,
270    op2: Instruction.Operand = .none,
271    op3: Instruction.Operand = .none,
272    op4: Instruction.Operand = .none,
273}) Instruction {
274    return .{ .encoding = encoding, .prefix = args.prefix, .ops = .{
275        args.op1,
276        args.op2,
277        args.op3,
278        args.op4,
279    } };
280}
281
282const Prefixes = struct {
283    legacy: LegacyPrefixes = .{},
284    rex: Rex = .{},
285    // TODO add support for VEX prefix
286};
287
288fn parsePrefixes(dis: *Disassembler) !Prefixes {
289    const rex_prefix_mask: u4 = 0b0100;
290    var reader: std.Io.Reader = .fixed(dis.code[dis.pos..]);
291
292    var res: Prefixes = .{};
293
294    while (true) {
295        const next_byte = try reader.takeByte();
296        dis.pos += 1;
297
298        switch (next_byte) {
299            0xf0, 0xf2, 0xf3, 0x2e, 0x36, 0x26, 0x64, 0x65, 0x3e, 0x66, 0x67 => {
300                // Legacy prefix
301                if (res.rex.present) return error.LegacyPrefixAfterRex;
302                switch (next_byte) {
303                    0xf0 => res.legacy.prefix_f0 = true,
304                    0xf2 => res.legacy.prefix_f2 = true,
305                    0xf3 => res.legacy.prefix_f3 = true,
306                    0x2e => res.legacy.prefix_2e = true,
307                    0x36 => res.legacy.prefix_36 = true,
308                    0x26 => res.legacy.prefix_26 = true,
309                    0x64 => res.legacy.prefix_64 = true,
310                    0x65 => res.legacy.prefix_65 = true,
311                    0x3e => res.legacy.prefix_3e = true,
312                    0x66 => res.legacy.prefix_66 = true,
313                    0x67 => res.legacy.prefix_67 = true,
314                    else => unreachable,
315                }
316            },
317            else => {
318                if (rex_prefix_mask == @as(u4, @truncate(next_byte >> 4))) {
319                    // REX prefix
320                    res.rex.w = next_byte & 0b1000 != 0;
321                    res.rex.r = next_byte & 0b100 != 0;
322                    res.rex.x = next_byte & 0b10 != 0;
323                    res.rex.b = next_byte & 0b1 != 0;
324                    res.rex.present = true;
325                    continue;
326                }
327
328                // TODO VEX prefix
329
330                dis.pos -= 1;
331                break;
332            },
333        }
334    }
335
336    return res;
337}
338
339fn parseEncoding(dis: *Disassembler, prefixes: Prefixes) !?Encoding {
340    const o_mask: u8 = 0b1111_1000;
341
342    var opcode: [3]u8 = .{ 0, 0, 0 };
343    var reader: std.Io.Reader = .fixed(dis.code[dis.pos..]);
344
345    comptime var opc_count = 0;
346    inline while (opc_count < 3) : (opc_count += 1) {
347        const byte = try reader.takeByte();
348        opcode[opc_count] = byte;
349        dis.pos += 1;
350
351        if (byte == 0x0f) {
352            // Multi-byte opcode
353        } else if (opc_count > 0) {
354            // Multi-byte opcode
355            if (Encoding.findByOpcode(opcode[0 .. opc_count + 1], .{
356                .legacy = prefixes.legacy,
357                .rex = prefixes.rex,
358            }, null)) |mnemonic| {
359                return mnemonic;
360            }
361        } else {
362            // Single-byte opcode
363            if (Encoding.findByOpcode(opcode[0..1], .{
364                .legacy = prefixes.legacy,
365                .rex = prefixes.rex,
366            }, null)) |mnemonic| {
367                return mnemonic;
368            } else {
369                // Try O* encoding
370                return Encoding.findByOpcode(&.{opcode[0] & o_mask}, .{
371                    .legacy = prefixes.legacy,
372                    .rex = prefixes.rex,
373                }, null);
374            }
375        }
376    }
377    return null;
378}
379
380fn parseGpRegister(low_enc: u3, is_extended: bool, rex: Rex, bit_size: u64) Register {
381    const reg_id: u4 = @as(u4, @intCast(@intFromBool(is_extended))) << 3 | low_enc;
382    const reg = @as(Register, @enumFromInt(reg_id)).toBitSize(bit_size);
383    return switch (reg) {
384        .spl => if (rex.present or rex.isSet()) .spl else .ah,
385        .dil => if (rex.present or rex.isSet()) .dil else .bh,
386        .bpl => if (rex.present or rex.isSet()) .bpl else .ch,
387        .sil => if (rex.present or rex.isSet()) .sil else .dh,
388        else => reg,
389    };
390}
391
392fn parseImm(dis: *Disassembler, kind: Encoding.Op) !Immediate {
393    var reader: std.Io.Reader = .fixed(dis.code);
394    reader.seek = dis.pos;
395    defer dis.pos = reader.seek;
396
397    const imm = switch (kind) {
398        .imm8s, .rel8 => Immediate.s(try reader.takeInt(i8, .little)),
399        .imm16s, .rel16 => Immediate.s(try reader.takeInt(i16, .little)),
400        .imm32s, .rel32 => Immediate.s(try reader.takeInt(i32, .little)),
401        .imm8 => Immediate.u(try reader.takeInt(u8, .little)),
402        .imm16 => Immediate.u(try reader.takeInt(u16, .little)),
403        .imm32 => Immediate.u(try reader.takeInt(u32, .little)),
404        .imm64 => Immediate.u(try reader.takeInt(u64, .little)),
405        else => unreachable,
406    };
407    return imm;
408}
409
410fn parseOffset(dis: *Disassembler) !u64 {
411    var reader: std.Io.Reader = .fixed(dis.code);
412    reader.seek = dis.pos;
413    defer dis.pos = reader.seek;
414
415    return reader.takeInt(u64, .little);
416}
417
418const ModRm = packed struct {
419    mod: u2,
420    op1: u3,
421    op2: u3,
422
423    inline fn direct(self: ModRm) bool {
424        return self.mod == 0b11;
425    }
426
427    inline fn rip(self: ModRm) bool {
428        return self.mod == 0 and self.op2 == 0b101;
429    }
430
431    inline fn sib(self: ModRm) bool {
432        return !self.direct() and self.op2 == 0b100;
433    }
434};
435
436fn parseModRmByte(dis: *Disassembler) !ModRm {
437    if (dis.code[dis.pos..].len == 0) return error.EndOfStream;
438    const modrm_byte = dis.code[dis.pos];
439    dis.pos += 1;
440    const mod: u2 = @as(u2, @truncate(modrm_byte >> 6));
441    const op1: u3 = @as(u3, @truncate(modrm_byte >> 3));
442    const op2: u3 = @as(u3, @truncate(modrm_byte));
443    return ModRm{ .mod = mod, .op1 = op1, .op2 = op2 };
444}
445
446fn segmentRegister(prefixes: LegacyPrefixes) Register {
447    if (prefixes.prefix_2e) return .cs;
448    if (prefixes.prefix_36) return .ss;
449    if (prefixes.prefix_26) return .es;
450    if (prefixes.prefix_64) return .fs;
451    if (prefixes.prefix_65) return .gs;
452    return .ds;
453}
454
455const Sib = packed struct {
456    scale: u2,
457    index: u3,
458    base: u3,
459
460    fn scaleIndex(self: Sib, rex: Rex) ?Memory.ScaleIndex {
461        if (self.index == 0b100 and !rex.x) return null;
462        return .{
463            .scale = @as(u4, 1) << self.scale,
464            .index = parseGpRegister(self.index, rex.x, rex, 64),
465        };
466    }
467
468    fn baseReg(self: Sib, modrm: ModRm, prefixes: Prefixes) ?Register {
469        if (self.base == 0b101 and modrm.mod == 0) {
470            if (self.scaleIndex(prefixes.rex)) |_| return null;
471            return segmentRegister(prefixes.legacy);
472        }
473        return parseGpRegister(self.base, prefixes.rex.b, prefixes.rex, 64);
474    }
475};
476
477fn parseSibByte(dis: *Disassembler) !Sib {
478    if (dis.code[dis.pos..].len == 0) return error.EndOfStream;
479    const sib_byte = dis.code[dis.pos];
480    dis.pos += 1;
481    const scale: u2 = @as(u2, @truncate(sib_byte >> 6));
482    const index: u3 = @as(u3, @truncate(sib_byte >> 3));
483    const base: u3 = @as(u3, @truncate(sib_byte));
484    return Sib{ .scale = scale, .index = index, .base = base };
485}
486
487fn parseDisplacement(dis: *Disassembler, modrm: ModRm, sib: ?Sib) !i32 {
488    var reader: std.Io.Reader = .fixed(dis.code);
489    reader.seek = dis.pos;
490    defer dis.pos = reader.seek;
491
492    const disp = disp: {
493        if (sib) |info| {
494            if (info.base == 0b101 and modrm.mod == 0) {
495                break :disp try reader.takeInt(i32, .little);
496            }
497        }
498        if (modrm.rip()) {
499            break :disp try reader.takeInt(i32, .little);
500        }
501        break :disp switch (modrm.mod) {
502            0b00 => 0,
503            0b01 => try reader.takeInt(i8, .little),
504            0b10 => try reader.takeInt(i32, .little),
505            0b11 => unreachable,
506        };
507    };
508    return disp;
509}