master
1const Disassembler = @This();
2
3const std = @import("std");
4const assert = std.debug.assert;
5const math = std.math;
6
7const bits = @import("bits.zig");
8const encoder = @import("encoder.zig");
9
10const Encoding = @import("Encoding.zig");
11const Immediate = Instruction.Immediate;
12const Instruction = encoder.Instruction;
13const LegacyPrefixes = encoder.LegacyPrefixes;
14const Memory = Instruction.Memory;
15const Register = bits.Register;
16const Rex = encoder.Rex;
17
18pub const Error = error{
19 EndOfStream,
20 /// After the TODO below is solved this will make sense.
21 ReadFailed,
22 LegacyPrefixAfterRex,
23 UnknownOpcode,
24 Overflow,
25 Todo,
26};
27
28// TODO these fields should be replaced by std.Io.Reader
29
30code: []const u8,
31pos: usize = 0,
32
33pub fn init(code: []const u8) Disassembler {
34 return .{ .code = code };
35}
36
37pub fn next(dis: *Disassembler) Error!?Instruction {
38 const prefixes = dis.parsePrefixes() catch |err| switch (err) {
39 error.EndOfStream => return null,
40 else => |e| return e,
41 };
42
43 const enc = try dis.parseEncoding(prefixes) orelse return error.UnknownOpcode;
44 switch (enc.data.op_en) {
45 .z => return inst(enc, .{}),
46 .o => {
47 const reg_low_enc: u3 = @truncate(dis.code[dis.pos - 1]);
48 return inst(enc, .{
49 .op1 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) },
50 });
51 },
52 .zo => {
53 const reg_low_enc: u3 = @truncate(dis.code[dis.pos - 1]);
54 return inst(enc, .{
55 .op1 = .{ .reg = enc.data.ops[0].toReg() },
56 .op2 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[1].regBitSize()) },
57 });
58 },
59 .oz => {
60 const reg_low_enc: u3 = @truncate(dis.code[dis.pos - 1]);
61 return inst(enc, .{
62 .op1 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) },
63 .op2 = .{ .reg = enc.data.ops[1].toReg() },
64 });
65 },
66 .oi => {
67 const reg_low_enc: u3 = @truncate(dis.code[dis.pos - 1]);
68 const imm = try dis.parseImm(enc.data.ops[1]);
69 return inst(enc, .{
70 .op1 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) },
71 .op2 = .{ .imm = imm },
72 });
73 },
74 .i, .d => {
75 const imm = try dis.parseImm(enc.data.ops[0]);
76 return inst(enc, .{
77 .op1 = .{ .imm = imm },
78 });
79 },
80 .zi => {
81 const imm = try dis.parseImm(enc.data.ops[1]);
82 return inst(enc, .{
83 .op1 = .{ .reg = enc.data.ops[0].toReg() },
84 .op2 = .{ .imm = imm },
85 });
86 },
87 .ii => {
88 const imm1 = try dis.parseImm(enc.data.ops[0]);
89 const imm2 = try dis.parseImm(enc.data.ops[1]);
90 return inst(enc, .{
91 .op1 = .{ .imm = imm1 },
92 .op2 = .{ .imm = imm2 },
93 });
94 },
95 .ia => {
96 const imm = try dis.parseImm(enc.data.ops[0]);
97 return inst(enc, .{
98 .op1 = .{ .imm = imm },
99 .op2 = .{ .reg = .eax },
100 });
101 },
102 .m, .mi, .m1, .mc => {
103 const modrm = try dis.parseModRmByte();
104 const act_enc = Encoding.findByOpcode(enc.opcode(), .{
105 .legacy = prefixes.legacy,
106 .rex = prefixes.rex,
107 }, modrm.op1) orelse return error.UnknownOpcode;
108 const sib = if (modrm.sib()) try dis.parseSibByte() else null;
109
110 if (modrm.direct()) {
111 const op2: Instruction.Operand = switch (act_enc.data.op_en) {
112 .mi => .{ .imm = try dis.parseImm(act_enc.data.ops[1]) },
113 .m1 => .{ .imm = Immediate.u(1) },
114 .mc => .{ .reg = .cl },
115 .m => .none,
116 else => unreachable,
117 };
118 return inst(act_enc, .{
119 .op1 = .{ .reg = parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, act_enc.data.ops[0].regBitSize()) },
120 .op2 = op2,
121 });
122 }
123
124 const disp = try dis.parseDisplacement(modrm, sib);
125 const op2: Instruction.Operand = switch (act_enc.data.op_en) {
126 .mi => .{ .imm = try dis.parseImm(act_enc.data.ops[1]) },
127 .m1 => .{ .imm = Immediate.u(1) },
128 .mc => .{ .reg = .cl },
129 .m => .none,
130 else => unreachable,
131 };
132
133 if (modrm.rip()) {
134 return inst(act_enc, .{
135 .op1 = .{ .mem = Memory.initRip(Memory.PtrSize.fromBitSize(act_enc.data.ops[0].memBitSize()), disp) },
136 .op2 = op2,
137 });
138 }
139
140 const scale_index = if (sib) |info| info.scaleIndex(prefixes.rex) else null;
141 const base = if (sib) |info|
142 info.baseReg(modrm, prefixes)
143 else
144 parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, 64);
145 return inst(act_enc, .{
146 .op1 = .{ .mem = Memory.initSib(Memory.PtrSize.fromBitSize(act_enc.data.ops[0].memBitSize()), .{
147 .base = if (base) |base_reg| .{ .reg = base_reg } else .none,
148 .scale_index = scale_index,
149 .disp = disp,
150 }) },
151 .op2 = op2,
152 });
153 },
154 .fd => {
155 const seg = segmentRegister(prefixes.legacy);
156 const offset = try dis.parseOffset();
157 return inst(enc, .{
158 .op1 = .{ .reg = enc.data.ops[0].toReg() },
159 .op2 = .{ .mem = Memory.initMoffs(seg, offset) },
160 });
161 },
162 .td => {
163 const seg = segmentRegister(prefixes.legacy);
164 const offset = try dis.parseOffset();
165 return inst(enc, .{
166 .op1 = .{ .mem = Memory.initMoffs(seg, offset) },
167 .op2 = .{ .reg = enc.data.ops[1].toReg() },
168 });
169 },
170 .mr, .mri, .mrc => {
171 const modrm = try dis.parseModRmByte();
172 const sib = if (modrm.sib()) try dis.parseSibByte() else null;
173 const src_bit_size = enc.data.ops[1].regBitSize();
174
175 if (modrm.direct()) {
176 return inst(enc, .{
177 .op1 = .{ .reg = parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) },
178 .op2 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.x, prefixes.rex, src_bit_size) },
179 });
180 }
181
182 const dst_bit_size = enc.data.ops[0].memBitSize();
183 const disp = try dis.parseDisplacement(modrm, sib);
184 const op3: Instruction.Operand = switch (enc.data.op_en) {
185 .mri => .{ .imm = try dis.parseImm(enc.data.ops[2]) },
186 .mrc => .{ .reg = .cl },
187 .mr => .none,
188 else => unreachable,
189 };
190
191 if (modrm.rip()) {
192 return inst(enc, .{
193 .op1 = .{ .mem = Memory.initRip(Memory.PtrSize.fromBitSize(dst_bit_size), disp) },
194 .op2 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.r, prefixes.rex, src_bit_size) },
195 .op3 = op3,
196 });
197 }
198
199 const scale_index = if (sib) |info| info.scaleIndex(prefixes.rex) else null;
200 const base = if (sib) |info|
201 info.baseReg(modrm, prefixes)
202 else
203 parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, 64);
204 return inst(enc, .{
205 .op1 = .{ .mem = Memory.initSib(Memory.PtrSize.fromBitSize(dst_bit_size), .{
206 .base = if (base) |base_reg| .{ .reg = base_reg } else .none,
207 .scale_index = scale_index,
208 .disp = disp,
209 }) },
210 .op2 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.r, prefixes.rex, src_bit_size) },
211 .op3 = op3,
212 });
213 },
214 .rm, .rmi => {
215 const modrm = try dis.parseModRmByte();
216 const sib = if (modrm.sib()) try dis.parseSibByte() else null;
217 const dst_bit_size = enc.data.ops[0].regBitSize();
218
219 if (modrm.direct()) {
220 const op3: Instruction.Operand = switch (enc.data.op_en) {
221 .rm => .none,
222 .rmi => .{ .imm = try dis.parseImm(enc.data.ops[2]) },
223 else => unreachable,
224 };
225 return inst(enc, .{
226 .op1 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.x, prefixes.rex, dst_bit_size) },
227 .op2 = .{ .reg = parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, enc.data.ops[1].regBitSize()) },
228 .op3 = op3,
229 });
230 }
231
232 const src_bit_size = if (enc.data.ops[1] == .m) dst_bit_size else enc.data.ops[1].memBitSize();
233 const disp = try dis.parseDisplacement(modrm, sib);
234 const op3: Instruction.Operand = switch (enc.data.op_en) {
235 .rmi => .{ .imm = try dis.parseImm(enc.data.ops[2]) },
236 .rm => .none,
237 else => unreachable,
238 };
239
240 if (modrm.rip()) {
241 return inst(enc, .{
242 .op1 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.r, prefixes.rex, dst_bit_size) },
243 .op2 = .{ .mem = Memory.initRip(Memory.PtrSize.fromBitSize(src_bit_size), disp) },
244 .op3 = op3,
245 });
246 }
247
248 const scale_index = if (sib) |info| info.scaleIndex(prefixes.rex) else null;
249 const base = if (sib) |info|
250 info.baseReg(modrm, prefixes)
251 else
252 parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, 64);
253 return inst(enc, .{
254 .op1 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.r, prefixes.rex, dst_bit_size) },
255 .op2 = .{ .mem = Memory.initSib(Memory.PtrSize.fromBitSize(src_bit_size), .{
256 .base = if (base) |base_reg| .{ .reg = base_reg } else .none,
257 .scale_index = scale_index,
258 .disp = disp,
259 }) },
260 .op3 = op3,
261 });
262 },
263 .rm0, .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable, // TODO
264 }
265}
266
267fn inst(encoding: Encoding, args: struct {
268 prefix: Instruction.Prefix = .none,
269 op1: Instruction.Operand = .none,
270 op2: Instruction.Operand = .none,
271 op3: Instruction.Operand = .none,
272 op4: Instruction.Operand = .none,
273}) Instruction {
274 return .{ .encoding = encoding, .prefix = args.prefix, .ops = .{
275 args.op1,
276 args.op2,
277 args.op3,
278 args.op4,
279 } };
280}
281
282const Prefixes = struct {
283 legacy: LegacyPrefixes = .{},
284 rex: Rex = .{},
285 // TODO add support for VEX prefix
286};
287
288fn parsePrefixes(dis: *Disassembler) !Prefixes {
289 const rex_prefix_mask: u4 = 0b0100;
290 var reader: std.Io.Reader = .fixed(dis.code[dis.pos..]);
291
292 var res: Prefixes = .{};
293
294 while (true) {
295 const next_byte = try reader.takeByte();
296 dis.pos += 1;
297
298 switch (next_byte) {
299 0xf0, 0xf2, 0xf3, 0x2e, 0x36, 0x26, 0x64, 0x65, 0x3e, 0x66, 0x67 => {
300 // Legacy prefix
301 if (res.rex.present) return error.LegacyPrefixAfterRex;
302 switch (next_byte) {
303 0xf0 => res.legacy.prefix_f0 = true,
304 0xf2 => res.legacy.prefix_f2 = true,
305 0xf3 => res.legacy.prefix_f3 = true,
306 0x2e => res.legacy.prefix_2e = true,
307 0x36 => res.legacy.prefix_36 = true,
308 0x26 => res.legacy.prefix_26 = true,
309 0x64 => res.legacy.prefix_64 = true,
310 0x65 => res.legacy.prefix_65 = true,
311 0x3e => res.legacy.prefix_3e = true,
312 0x66 => res.legacy.prefix_66 = true,
313 0x67 => res.legacy.prefix_67 = true,
314 else => unreachable,
315 }
316 },
317 else => {
318 if (rex_prefix_mask == @as(u4, @truncate(next_byte >> 4))) {
319 // REX prefix
320 res.rex.w = next_byte & 0b1000 != 0;
321 res.rex.r = next_byte & 0b100 != 0;
322 res.rex.x = next_byte & 0b10 != 0;
323 res.rex.b = next_byte & 0b1 != 0;
324 res.rex.present = true;
325 continue;
326 }
327
328 // TODO VEX prefix
329
330 dis.pos -= 1;
331 break;
332 },
333 }
334 }
335
336 return res;
337}
338
339fn parseEncoding(dis: *Disassembler, prefixes: Prefixes) !?Encoding {
340 const o_mask: u8 = 0b1111_1000;
341
342 var opcode: [3]u8 = .{ 0, 0, 0 };
343 var reader: std.Io.Reader = .fixed(dis.code[dis.pos..]);
344
345 comptime var opc_count = 0;
346 inline while (opc_count < 3) : (opc_count += 1) {
347 const byte = try reader.takeByte();
348 opcode[opc_count] = byte;
349 dis.pos += 1;
350
351 if (byte == 0x0f) {
352 // Multi-byte opcode
353 } else if (opc_count > 0) {
354 // Multi-byte opcode
355 if (Encoding.findByOpcode(opcode[0 .. opc_count + 1], .{
356 .legacy = prefixes.legacy,
357 .rex = prefixes.rex,
358 }, null)) |mnemonic| {
359 return mnemonic;
360 }
361 } else {
362 // Single-byte opcode
363 if (Encoding.findByOpcode(opcode[0..1], .{
364 .legacy = prefixes.legacy,
365 .rex = prefixes.rex,
366 }, null)) |mnemonic| {
367 return mnemonic;
368 } else {
369 // Try O* encoding
370 return Encoding.findByOpcode(&.{opcode[0] & o_mask}, .{
371 .legacy = prefixes.legacy,
372 .rex = prefixes.rex,
373 }, null);
374 }
375 }
376 }
377 return null;
378}
379
380fn parseGpRegister(low_enc: u3, is_extended: bool, rex: Rex, bit_size: u64) Register {
381 const reg_id: u4 = @as(u4, @intCast(@intFromBool(is_extended))) << 3 | low_enc;
382 const reg = @as(Register, @enumFromInt(reg_id)).toBitSize(bit_size);
383 return switch (reg) {
384 .spl => if (rex.present or rex.isSet()) .spl else .ah,
385 .dil => if (rex.present or rex.isSet()) .dil else .bh,
386 .bpl => if (rex.present or rex.isSet()) .bpl else .ch,
387 .sil => if (rex.present or rex.isSet()) .sil else .dh,
388 else => reg,
389 };
390}
391
392fn parseImm(dis: *Disassembler, kind: Encoding.Op) !Immediate {
393 var reader: std.Io.Reader = .fixed(dis.code);
394 reader.seek = dis.pos;
395 defer dis.pos = reader.seek;
396
397 const imm = switch (kind) {
398 .imm8s, .rel8 => Immediate.s(try reader.takeInt(i8, .little)),
399 .imm16s, .rel16 => Immediate.s(try reader.takeInt(i16, .little)),
400 .imm32s, .rel32 => Immediate.s(try reader.takeInt(i32, .little)),
401 .imm8 => Immediate.u(try reader.takeInt(u8, .little)),
402 .imm16 => Immediate.u(try reader.takeInt(u16, .little)),
403 .imm32 => Immediate.u(try reader.takeInt(u32, .little)),
404 .imm64 => Immediate.u(try reader.takeInt(u64, .little)),
405 else => unreachable,
406 };
407 return imm;
408}
409
410fn parseOffset(dis: *Disassembler) !u64 {
411 var reader: std.Io.Reader = .fixed(dis.code);
412 reader.seek = dis.pos;
413 defer dis.pos = reader.seek;
414
415 return reader.takeInt(u64, .little);
416}
417
418const ModRm = packed struct {
419 mod: u2,
420 op1: u3,
421 op2: u3,
422
423 inline fn direct(self: ModRm) bool {
424 return self.mod == 0b11;
425 }
426
427 inline fn rip(self: ModRm) bool {
428 return self.mod == 0 and self.op2 == 0b101;
429 }
430
431 inline fn sib(self: ModRm) bool {
432 return !self.direct() and self.op2 == 0b100;
433 }
434};
435
436fn parseModRmByte(dis: *Disassembler) !ModRm {
437 if (dis.code[dis.pos..].len == 0) return error.EndOfStream;
438 const modrm_byte = dis.code[dis.pos];
439 dis.pos += 1;
440 const mod: u2 = @as(u2, @truncate(modrm_byte >> 6));
441 const op1: u3 = @as(u3, @truncate(modrm_byte >> 3));
442 const op2: u3 = @as(u3, @truncate(modrm_byte));
443 return ModRm{ .mod = mod, .op1 = op1, .op2 = op2 };
444}
445
446fn segmentRegister(prefixes: LegacyPrefixes) Register {
447 if (prefixes.prefix_2e) return .cs;
448 if (prefixes.prefix_36) return .ss;
449 if (prefixes.prefix_26) return .es;
450 if (prefixes.prefix_64) return .fs;
451 if (prefixes.prefix_65) return .gs;
452 return .ds;
453}
454
455const Sib = packed struct {
456 scale: u2,
457 index: u3,
458 base: u3,
459
460 fn scaleIndex(self: Sib, rex: Rex) ?Memory.ScaleIndex {
461 if (self.index == 0b100 and !rex.x) return null;
462 return .{
463 .scale = @as(u4, 1) << self.scale,
464 .index = parseGpRegister(self.index, rex.x, rex, 64),
465 };
466 }
467
468 fn baseReg(self: Sib, modrm: ModRm, prefixes: Prefixes) ?Register {
469 if (self.base == 0b101 and modrm.mod == 0) {
470 if (self.scaleIndex(prefixes.rex)) |_| return null;
471 return segmentRegister(prefixes.legacy);
472 }
473 return parseGpRegister(self.base, prefixes.rex.b, prefixes.rex, 64);
474 }
475};
476
477fn parseSibByte(dis: *Disassembler) !Sib {
478 if (dis.code[dis.pos..].len == 0) return error.EndOfStream;
479 const sib_byte = dis.code[dis.pos];
480 dis.pos += 1;
481 const scale: u2 = @as(u2, @truncate(sib_byte >> 6));
482 const index: u3 = @as(u3, @truncate(sib_byte >> 3));
483 const base: u3 = @as(u3, @truncate(sib_byte));
484 return Sib{ .scale = scale, .index = index, .base = base };
485}
486
487fn parseDisplacement(dis: *Disassembler, modrm: ModRm, sib: ?Sib) !i32 {
488 var reader: std.Io.Reader = .fixed(dis.code);
489 reader.seek = dis.pos;
490 defer dis.pos = reader.seek;
491
492 const disp = disp: {
493 if (sib) |info| {
494 if (info.base == 0b101 and modrm.mod == 0) {
495 break :disp try reader.takeInt(i32, .little);
496 }
497 }
498 if (modrm.rip()) {
499 break :disp try reader.takeInt(i32, .little);
500 }
501 break :disp switch (modrm.mod) {
502 0b00 => 0,
503 0b01 => try reader.takeInt(i8, .little),
504 0b10 => try reader.takeInt(i32, .little),
505 0b11 => unreachable,
506 };
507 };
508 return disp;
509}