master
1const std = @import("std");
2const assert = std.debug.assert;
3const log = std.log.scoped(.x86_64_encoder);
4const math = std.math;
5const testing = std.testing;
6const Writer = std.Io.Writer;
7
8const bits = @import("bits.zig");
9const Encoding = @import("Encoding.zig");
10const FrameIndex = bits.FrameIndex;
11const Register = bits.Register;
12const Symbol = bits.Symbol;
13
14pub const Instruction = struct {
15 prefix: Prefix = .none,
16 encoding: Encoding,
17 ops: [4]Operand = .{.none} ** 4,
18
19 pub const Mnemonic = Encoding.Mnemonic;
20
21 pub const Prefix = enum(u3) {
22 none,
23 lock,
24 rep,
25 repe,
26 repz,
27 repne,
28 repnz,
29 directive,
30 };
31
32 pub const Immediate = union(enum) {
33 signed: i32,
34 unsigned: u64,
35
36 pub fn u(x: u64) Immediate {
37 return .{ .unsigned = x };
38 }
39
40 pub fn s(x: i32) Immediate {
41 return .{ .signed = x };
42 }
43
44 pub fn asSigned(imm: Immediate, bit_size: u64) i64 {
45 return switch (imm) {
46 .signed => |x| switch (bit_size) {
47 1, 8 => @as(i8, @intCast(x)),
48 16 => @as(i16, @intCast(x)),
49 32, 64 => x,
50 else => unreachable,
51 },
52 .unsigned => |x| switch (bit_size) {
53 1, 8 => @as(i8, @bitCast(@as(u8, @intCast(x)))),
54 16 => @as(i16, @bitCast(@as(u16, @intCast(x)))),
55 32 => @as(i32, @bitCast(@as(u32, @intCast(x)))),
56 64 => @bitCast(x),
57 else => unreachable,
58 },
59 };
60 }
61
62 pub fn asUnsigned(imm: Immediate, bit_size: u64) u64 {
63 return switch (imm) {
64 .signed => |x| switch (bit_size) {
65 1, 8 => @as(u8, @bitCast(@as(i8, @intCast(x)))),
66 16 => @as(u16, @bitCast(@as(i16, @intCast(x)))),
67 32, 64 => @as(u32, @bitCast(x)),
68 else => unreachable,
69 },
70 .unsigned => |x| switch (bit_size) {
71 1, 8 => @as(u8, @intCast(x)),
72 16 => @as(u16, @intCast(x)),
73 32 => @as(u32, @intCast(x)),
74 64 => x,
75 else => unreachable,
76 },
77 };
78 }
79 };
80
81 pub const Memory = union(enum) {
82 sib: Sib,
83 rip: Rip,
84 moffs: Moffs,
85
86 pub const Base = bits.Memory.Base;
87
88 pub const ScaleIndex = struct {
89 scale: u4,
90 index: Register,
91
92 const none = ScaleIndex{ .scale = 0, .index = undefined };
93 };
94
95 pub const PtrSize = bits.Memory.Size;
96
97 pub const Sib = struct {
98 ptr_size: PtrSize,
99 base: Base,
100 scale_index: ScaleIndex,
101 disp: i32,
102 };
103
104 pub const Rip = struct {
105 ptr_size: PtrSize,
106 disp: i32,
107 };
108
109 pub const Moffs = struct {
110 seg: Register,
111 offset: u64,
112 };
113
114 pub fn initMoffs(reg: Register, offset: u64) Memory {
115 assert(reg.isClass(.segment));
116 return .{ .moffs = .{ .seg = reg, .offset = offset } };
117 }
118
119 pub fn initSib(ptr_size: PtrSize, args: struct {
120 disp: i32 = 0,
121 base: Base = .none,
122 scale_index: ?ScaleIndex = null,
123 }) Memory {
124 if (args.scale_index) |si| assert(std.math.isPowerOfTwo(si.scale));
125 return .{ .sib = .{
126 .base = args.base,
127 .disp = args.disp,
128 .ptr_size = ptr_size,
129 .scale_index = if (args.scale_index) |si| si else ScaleIndex.none,
130 } };
131 }
132
133 pub fn initRip(ptr_size: PtrSize, displacement: i32) Memory {
134 return .{ .rip = .{ .ptr_size = ptr_size, .disp = displacement } };
135 }
136
137 pub fn isSegmentRegister(mem: Memory) bool {
138 return switch (mem) {
139 .moffs => true,
140 .rip => false,
141 .sib => |s| switch (s.base) {
142 .none, .frame, .table, .rip_inst, .nav, .uav, .lazy_sym, .extern_func => false,
143 .reg => |reg| reg.isClass(.segment),
144 },
145 };
146 }
147
148 pub fn base(mem: Memory) Base {
149 return switch (mem) {
150 .moffs => |m| .{ .reg = m.seg },
151 .sib => |s| s.base,
152 .rip => .none,
153 };
154 }
155
156 pub fn scaleIndex(mem: Memory) ?ScaleIndex {
157 return switch (mem) {
158 .moffs, .rip => null,
159 .sib => |s| if (s.scale_index.scale > 0) s.scale_index else null,
160 };
161 }
162
163 pub fn disp(mem: Memory) Immediate {
164 return switch (mem) {
165 .sib => |s| .s(s.disp),
166 .rip => |r| .s(r.disp),
167 .moffs => |m| .u(m.offset),
168 };
169 }
170
171 pub fn bitSize(mem: Memory, target: *const std.Target) u64 {
172 return switch (mem) {
173 .rip => |r| r.ptr_size.bitSize(target),
174 .sib => |s| s.ptr_size.bitSize(target),
175 .moffs => target.ptrBitWidth(),
176 };
177 }
178 };
179
180 pub const Operand = union(enum) {
181 none,
182 reg: Register,
183 mem: Memory,
184 imm: Immediate,
185 bytes: []const u8,
186
187 /// Returns the bitsize of the operand.
188 pub fn bitSize(op: Operand) u64 {
189 return switch (op) {
190 .none => unreachable,
191 .reg => |reg| reg.bitSize(),
192 .mem => |mem| mem.bitSize(),
193 .imm => unreachable,
194 .bytes => unreachable,
195 };
196 }
197
198 /// Returns true if the operand is a segment register.
199 /// Asserts the operand is either register or memory.
200 pub fn isSegmentRegister(op: Operand) bool {
201 return switch (op) {
202 .none => unreachable,
203 .reg => |reg| reg.isClass(.segment),
204 .mem => |mem| mem.isSegmentRegister(),
205 .imm => unreachable,
206 .bytes => unreachable,
207 };
208 }
209
210 pub fn baseExtEnc(op: Operand) u2 {
211 return switch (op) {
212 .none, .imm => 0b00,
213 .reg => |reg| @truncate(reg.enc() >> 3),
214 .mem => |mem| switch (mem.base()) {
215 .none, .frame, .table, .rip_inst, .nav, .uav, .lazy_sym, .extern_func => 0b00, // rsp, rbp, and rip are not extended
216 .reg => |reg| @truncate(reg.enc() >> 3),
217 },
218 .bytes => unreachable,
219 };
220 }
221
222 pub fn indexExtEnc(op: Operand) u2 {
223 return switch (op) {
224 .none, .reg, .imm => 0b00,
225 .mem => |mem| if (mem.scaleIndex()) |si| @truncate(si.index.enc() >> 3) else 0b00,
226 .bytes => unreachable,
227 };
228 }
229
230 const Format = struct {
231 op: Operand,
232 enc_op: Encoding.Op,
233
234 fn default(f: Format, w: *Writer) Writer.Error!void {
235 const op = f.op;
236 const enc_op = f.enc_op;
237 switch (op) {
238 .none => {},
239 .reg => |reg| try w.writeAll(@tagName(reg)),
240 .mem => |mem| switch (mem) {
241 .rip => |rip| {
242 try w.print("{f} [rip", .{rip.ptr_size});
243 if (rip.disp != 0) try w.print(" {c} 0x{x}", .{
244 @as(u8, if (rip.disp < 0) '-' else '+'),
245 @abs(rip.disp),
246 });
247 try w.writeByte(']');
248 },
249 .sib => |sib| {
250 try w.print("{f} ", .{sib.ptr_size});
251
252 if (mem.isSegmentRegister()) {
253 return w.print("{s}:0x{x}", .{ @tagName(sib.base.reg), sib.disp });
254 }
255
256 try w.writeByte('[');
257
258 var any = true;
259 switch (sib.base) {
260 .none => any = false,
261 .reg => |reg| try w.print("{s}", .{@tagName(reg)}),
262 .frame => |frame_index| try w.print("{f}", .{frame_index}),
263 .table => try w.print("Table", .{}),
264 .rip_inst => |inst_index| try w.print("RipInst({d})", .{inst_index}),
265 .nav => |nav| try w.print("Nav({d})", .{@intFromEnum(nav)}),
266 .uav => |uav| try w.print("Uav({d})", .{@intFromEnum(uav.val)}),
267 .lazy_sym => |lazy_sym| try w.print("LazySym({s}, {d})", .{
268 @tagName(lazy_sym.kind),
269 @intFromEnum(lazy_sym.ty),
270 }),
271 .extern_func => |extern_func| try w.print("ExternFunc({d})", .{@intFromEnum(extern_func)}),
272 }
273 if (mem.scaleIndex()) |si| {
274 if (any) try w.writeAll(" + ");
275 try w.print("{s} * {d}", .{ @tagName(si.index), si.scale });
276 any = true;
277 }
278 if (sib.disp != 0 or !any) {
279 if (any)
280 try w.print(" {c} ", .{@as(u8, if (sib.disp < 0) '-' else '+')})
281 else if (sib.disp < 0)
282 try w.writeByte('-');
283 try w.print("0x{x}", .{@abs(sib.disp)});
284 any = true;
285 }
286
287 try w.writeByte(']');
288 },
289 .moffs => |moffs| try w.print("{s}:0x{x}", .{
290 @tagName(moffs.seg),
291 moffs.offset,
292 }),
293 },
294 .imm => |imm| if (enc_op.isSigned()) {
295 const imms = imm.asSigned(enc_op.immBitSize());
296 if (imms < 0) try w.writeByte('-');
297 try w.print("0x{x}", .{@abs(imms)});
298 } else try w.print("0x{x}", .{imm.asUnsigned(enc_op.immBitSize())}),
299 .bytes => unreachable,
300 }
301 }
302 };
303
304 pub fn fmt(op: Operand, enc_op: Encoding.Op) std.fmt.Alt(Format, Format.default) {
305 return .{ .data = .{ .op = op, .enc_op = enc_op } };
306 }
307 };
308
309 pub fn new(
310 prefix: Prefix,
311 mnemonic: Mnemonic,
312 ops: []const Operand,
313 target: *const std.Target,
314 ) !Instruction {
315 const encoding: Encoding = switch (prefix) {
316 else => (try Encoding.findByMnemonic(prefix, mnemonic, ops, target)) orelse {
317 log.err("no encoding found for: {s} {s} {s} {s} {s} {s}", .{
318 @tagName(prefix),
319 @tagName(mnemonic),
320 @tagName(if (ops.len > 0) Encoding.Op.fromOperand(ops[0], target) else .none),
321 @tagName(if (ops.len > 1) Encoding.Op.fromOperand(ops[1], target) else .none),
322 @tagName(if (ops.len > 2) Encoding.Op.fromOperand(ops[2], target) else .none),
323 @tagName(if (ops.len > 3) Encoding.Op.fromOperand(ops[3], target) else .none),
324 });
325 return error.InvalidInstruction;
326 },
327 .directive => .{
328 .mnemonic = mnemonic,
329 .data = .{
330 .op_en = .z,
331 .ops = .{
332 if (ops.len > 0) Encoding.Op.fromOperand(ops[0], target) else .none,
333 if (ops.len > 1) Encoding.Op.fromOperand(ops[1], target) else .none,
334 if (ops.len > 2) Encoding.Op.fromOperand(ops[2], target) else .none,
335 if (ops.len > 3) Encoding.Op.fromOperand(ops[3], target) else .none,
336 },
337 .opc_len = 0,
338 .opc = undefined,
339 .modrm_ext = 0,
340 .mode = .none,
341 .feature = .none,
342 },
343 },
344 };
345 log.debug("selected encoding: {f}", .{encoding});
346
347 var inst: Instruction = .{
348 .prefix = prefix,
349 .encoding = encoding,
350 .ops = [1]Operand{.none} ** 4,
351 };
352 @memcpy(inst.ops[0..ops.len], ops);
353 return inst;
354 }
355
356 pub fn format(inst: Instruction, w: *Writer) Writer.Error!void {
357 switch (inst.prefix) {
358 .none, .directive => {},
359 else => try w.print("{s} ", .{@tagName(inst.prefix)}),
360 }
361 try w.print("{s}", .{@tagName(inst.encoding.mnemonic)});
362 for (inst.ops, inst.encoding.data.ops, 0..) |op, enc, i| {
363 if (op == .none) break;
364 if (i > 0) try w.writeByte(',');
365 try w.print(" {f}", .{op.fmt(enc)});
366 }
367 }
368
369 pub fn encode(inst: Instruction, w: *Writer, comptime opts: Options) !void {
370 assert(inst.prefix != .directive);
371 const encoder: Encoder(opts) = .{ .w = w };
372 const enc = inst.encoding;
373 const data = enc.data;
374
375 try inst.encodeWait(encoder);
376 if (data.mode.isVex()) {
377 try inst.encodeVexPrefix(encoder);
378 const opc = inst.encoding.opcode();
379 try encoder.opcode_1byte(opc[opc.len - 1]);
380 } else {
381 try inst.encodeLegacyPrefixes(encoder);
382 try inst.encodeMandatoryPrefix(encoder);
383 try inst.encodeRexPrefix(encoder);
384 try inst.encodeOpcode(encoder);
385 }
386
387 switch (data.op_en) {
388 .z, .o, .zo, .oz => {},
389 .i, .d => try encodeImm(inst.ops[0].imm, data.ops[0], encoder),
390 .zi, .oi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder),
391 .ii => {
392 try encodeImm(inst.ops[0].imm, data.ops[0], encoder);
393 try encodeImm(inst.ops[1].imm, data.ops[1], encoder);
394 },
395 .fd => try encoder.imm64(inst.ops[1].mem.moffs.offset),
396 .td => try encoder.imm64(inst.ops[0].mem.moffs.offset),
397 else => {
398 const mem_op: Operand = switch (data.op_en) {
399 .ia => .{ .reg = .eax },
400 .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
401 .rm, .rmi, .rm0, .vm, .vmi, .rmv => inst.ops[1],
402 .rvm, .rvmr, .rvmi => inst.ops[2],
403 else => unreachable,
404 };
405 switch (mem_op) {
406 .reg => |reg| {
407 const rm: u3 = switch (data.op_en) {
408 .ia, .m, .mi, .m1, .mc, .vm, .vmi => enc.modRmExt(),
409 .mr, .mri, .mrc => @truncate(inst.ops[1].reg.enc()),
410 .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => @truncate(inst.ops[0].reg.enc()),
411 .mvr => @truncate(inst.ops[2].reg.enc()),
412 else => unreachable,
413 };
414 try encoder.modRm_direct(rm, @truncate(reg.enc()));
415 },
416 .mem => |mem| {
417 const op = switch (data.op_en) {
418 .m, .mi, .m1, .mc, .vm, .vmi => .none,
419 .mr, .mri, .mrc => inst.ops[1],
420 .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0],
421 .mvr => inst.ops[2],
422 else => unreachable,
423 };
424 try encodeMemory(enc, mem, op, encoder);
425 },
426 else => unreachable,
427 }
428
429 switch (data.op_en) {
430 .ia => try encodeImm(inst.ops[0].imm, data.ops[0], encoder),
431 .mi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder),
432 .rmi, .mri, .vmi => try encodeImm(inst.ops[2].imm, data.ops[2], encoder),
433 .rvmr => try encoder.imm8(@as(u8, @as(u4, @intCast(inst.ops[3].reg.enc()))) << 4),
434 .rvmi => try encodeImm(inst.ops[3].imm, data.ops[3], encoder),
435 else => {},
436 }
437 },
438 }
439 }
440
441 fn encodeOpcode(inst: Instruction, encoder: anytype) !void {
442 const opcode = inst.encoding.opcode();
443 const first = @intFromBool(inst.encoding.mandatoryPrefix() != null);
444 const final = opcode.len - 1;
445 for (opcode[first..final]) |byte| try encoder.opcode_1byte(byte);
446 switch (inst.encoding.data.op_en) {
447 .o, .oz, .oi => try encoder.opcode_withReg(opcode[final], @truncate(inst.ops[0].reg.enc())),
448 .zo => try encoder.opcode_withReg(opcode[final], @truncate(inst.ops[1].reg.enc())),
449 else => try encoder.opcode_1byte(opcode[final]),
450 }
451 }
452
453 fn encodeWait(inst: Instruction, encoder: anytype) !void {
454 switch (inst.encoding.data.mode) {
455 .wait => try encoder.opcode_1byte(0x9b),
456 else => {},
457 }
458 }
459
460 fn encodeLegacyPrefixes(inst: Instruction, encoder: anytype) !void {
461 const enc = inst.encoding;
462 const data = enc.data;
463 const op_en = data.op_en;
464
465 var legacy = LegacyPrefixes{};
466
467 switch (inst.prefix) {
468 .none => {},
469 .lock => legacy.prefix_f0 = true,
470 .repne, .repnz => legacy.prefix_f2 = true,
471 .rep, .repe, .repz => legacy.prefix_f3 = true,
472 .directive => unreachable,
473 }
474
475 switch (data.mode) {
476 .short, .rex_short => legacy.set16BitOverride(),
477 else => {},
478 }
479
480 const segment_override: ?Register = switch (op_en) {
481 .z, .i, .zi, .ii, .ia, .o, .zo, .oz, .oi, .d => null,
482 .fd => inst.ops[1].mem.base().reg,
483 .td => inst.ops[0].mem.base().reg,
484 .rm, .rmi, .rm0 => if (inst.ops[1].isSegmentRegister())
485 switch (inst.ops[1]) {
486 .reg => |reg| reg,
487 .mem => |mem| mem.base().reg,
488 else => unreachable,
489 }
490 else
491 null,
492 .m, .mi, .m1, .mc, .mr, .mri, .mrc => if (inst.ops[0].isSegmentRegister())
493 switch (inst.ops[0]) {
494 .reg => |reg| reg,
495 .mem => |mem| mem.base().reg,
496 else => unreachable,
497 }
498 else
499 null,
500 .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable,
501 };
502 if (segment_override) |seg| {
503 legacy.setSegmentOverride(seg);
504 }
505
506 try encoder.legacyPrefixes(legacy);
507 }
508
509 fn encodeRexPrefix(inst: Instruction, encoder: anytype) !void {
510 const op_en = inst.encoding.data.op_en;
511
512 var rex = Rex{};
513 rex.present = inst.encoding.data.mode == .rex;
514 rex.w = inst.encoding.data.mode == .long;
515
516 switch (op_en) {
517 .z, .i, .zi, .ii, .ia, .fd, .td, .d => {},
518 .o, .oz, .oi => rex.b = inst.ops[0].reg.enc() & 0b01000 != 0,
519 .zo => rex.b = inst.ops[1].reg.enc() & 0b01000 != 0,
520 .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .rmv => {
521 const r_op = switch (op_en) {
522 .rm, .rmi, .rm0, .rmv => inst.ops[0],
523 .mr, .mri, .mrc => inst.ops[1],
524 else => .none,
525 };
526 const r_op_base_ext_enc = r_op.baseExtEnc();
527 rex.r = r_op_base_ext_enc & 0b01 != 0;
528 assert(r_op_base_ext_enc & 0b10 == 0);
529
530 const b_x_op = switch (op_en) {
531 .rm, .rmi, .rm0 => inst.ops[1],
532 .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0],
533 else => unreachable,
534 };
535 const b_x_op_base_ext_enc = b_x_op.baseExtEnc();
536 rex.b = b_x_op_base_ext_enc & 0b01 != 0;
537 assert(b_x_op_base_ext_enc & 0b10 == 0);
538 const b_x_op_index_ext_enc = b_x_op.indexExtEnc();
539 rex.x = b_x_op_index_ext_enc & 0b01 != 0;
540 assert(b_x_op_index_ext_enc & 0b10 == 0);
541 },
542 .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable,
543 }
544
545 try encoder.rex(rex);
546 }
547
548 fn encodeVexPrefix(inst: Instruction, encoder: anytype) !void {
549 const op_en = inst.encoding.data.op_en;
550 const opc = inst.encoding.opcode();
551 const mand_pre = inst.encoding.mandatoryPrefix();
552
553 var vex = Vex{};
554
555 vex.w = inst.encoding.data.mode.isLong();
556
557 switch (op_en) {
558 .z, .i, .zi, .ii, .ia, .fd, .td, .d, .o, .oz, .oi, .zo => unreachable,
559 .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vm, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => {
560 const r_op = switch (op_en) {
561 .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0],
562 .mr, .mri, .mrc => inst.ops[1],
563 .mvr => inst.ops[2],
564 .m, .mi, .m1, .mc, .vm, .vmi => .none,
565 else => unreachable,
566 };
567 const r_op_base_ext_enc = r_op.baseExtEnc();
568 vex.r = r_op_base_ext_enc & 0b01 != 0;
569 assert(r_op_base_ext_enc & 0b10 == 0);
570
571 const b_x_op = switch (op_en) {
572 .rm, .rmi, .rm0, .vm, .vmi, .rmv => inst.ops[1],
573 .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0],
574 .rvm, .rvmr, .rvmi => inst.ops[2],
575 else => unreachable,
576 };
577 const b_x_op_base_ext_enc = b_x_op.baseExtEnc();
578 vex.b = b_x_op_base_ext_enc & 0b01 != 0;
579 assert(b_x_op_base_ext_enc & 0b10 == 0);
580 const b_x_op_index_ext_enc = b_x_op.indexExtEnc();
581 vex.x = b_x_op_index_ext_enc & 0b01 != 0;
582 assert(b_x_op_index_ext_enc & 0b10 == 0);
583 },
584 }
585
586 vex.l = inst.encoding.data.mode.isVecLong();
587
588 vex.p = if (mand_pre) |mand| switch (mand) {
589 0x66 => .@"66",
590 0xf2 => .f2,
591 0xf3 => .f3,
592 else => unreachable,
593 } else .none;
594
595 const leading: usize = if (mand_pre) |_| 1 else 0;
596 assert(opc[leading] == 0x0f);
597 vex.m = switch (opc[leading + 1]) {
598 else => .@"0f",
599 0x38 => .@"0f38",
600 0x3a => .@"0f3a",
601 };
602
603 switch (op_en) {
604 else => {},
605 .vm, .vmi => vex.v = inst.ops[0].reg,
606 .rvm, .rvmr, .rvmi => vex.v = inst.ops[1].reg,
607 .rmv => vex.v = inst.ops[2].reg,
608 }
609
610 try encoder.vex(vex);
611 }
612
613 fn encodeMandatoryPrefix(inst: Instruction, encoder: anytype) !void {
614 const prefix = inst.encoding.mandatoryPrefix() orelse return;
615 try encoder.opcode_1byte(prefix);
616 }
617
618 fn encodeMemory(encoding: Encoding, mem: Memory, operand: Operand, encoder: anytype) !void {
619 const operand_enc: u3 = switch (operand) {
620 .reg => |reg| @truncate(reg.enc()),
621 .none => encoding.modRmExt(),
622 else => unreachable,
623 };
624
625 switch (mem) {
626 .moffs => unreachable,
627 .sib => |sib| switch (sib.base) {
628 .none, .table => {
629 try encoder.modRm_SIBDisp0(operand_enc);
630 if (mem.scaleIndex()) |si| {
631 const scale = math.log2_int(u4, si.scale);
632 try encoder.sib_scaleIndexDisp32(scale, @truncate(si.index.enc()));
633 } else {
634 try encoder.sib_disp32();
635 }
636 try encoder.disp32(sib.disp);
637 },
638 .reg => |base| switch (base.class()) {
639 .segment => {
640 // TODO audit this wrt SIB
641 try encoder.modRm_SIBDisp0(operand_enc);
642 if (mem.scaleIndex()) |si| {
643 const scale = math.log2_int(u4, si.scale);
644 try encoder.sib_scaleIndexDisp32(scale, @truncate(si.index.enc()));
645 } else {
646 try encoder.sib_disp32();
647 }
648 try encoder.disp32(sib.disp);
649 },
650 .general_purpose => {
651 const dst: u3 = @truncate(base.enc());
652 const src = operand_enc;
653 if (dst == 4 or mem.scaleIndex() != null) {
654 if (sib.disp == 0 and dst != 5) {
655 try encoder.modRm_SIBDisp0(src);
656 if (mem.scaleIndex()) |si| {
657 const scale = math.log2_int(u4, si.scale);
658 try encoder.sib_scaleIndexBase(scale, @truncate(si.index.enc()), dst);
659 } else {
660 try encoder.sib_base(dst);
661 }
662 } else if (math.cast(i8, sib.disp)) |_| {
663 try encoder.modRm_SIBDisp8(src);
664 if (mem.scaleIndex()) |si| {
665 const scale = math.log2_int(u4, si.scale);
666 try encoder.sib_scaleIndexBaseDisp8(scale, @truncate(si.index.enc()), dst);
667 } else {
668 try encoder.sib_baseDisp8(dst);
669 }
670 try encoder.disp8(@as(i8, @truncate(sib.disp)));
671 } else {
672 try encoder.modRm_SIBDisp32(src);
673 if (mem.scaleIndex()) |si| {
674 const scale = math.log2_int(u4, si.scale);
675 try encoder.sib_scaleIndexBaseDisp32(scale, @truncate(si.index.enc()), dst);
676 } else {
677 try encoder.sib_baseDisp32(dst);
678 }
679 try encoder.disp32(sib.disp);
680 }
681 } else {
682 if (sib.disp == 0 and dst != 5) {
683 try encoder.modRm_indirectDisp0(src, dst);
684 } else if (math.cast(i8, sib.disp)) |_| {
685 try encoder.modRm_indirectDisp8(src, dst);
686 try encoder.disp8(@as(i8, @truncate(sib.disp)));
687 } else {
688 try encoder.modRm_indirectDisp32(src, dst);
689 try encoder.disp32(sib.disp);
690 }
691 }
692 },
693 else => unreachable,
694 },
695 .frame => if (@TypeOf(encoder).options.allow_frame_locs) {
696 try encoder.modRm_indirectDisp32(operand_enc, 0);
697 try encoder.disp32(undefined);
698 } else return error.CannotEncode,
699 .nav, .uav, .lazy_sym, .extern_func => if (@TypeOf(encoder).options.allow_symbols) {
700 try encoder.modRm_indirectDisp32(operand_enc, 0);
701 try encoder.disp32(undefined);
702 } else return error.CannotEncode,
703 .rip_inst => {
704 try encoder.modRm_RIPDisp32(operand_enc);
705 try encoder.disp32(sib.disp);
706 },
707 },
708 .rip => |rip| {
709 try encoder.modRm_RIPDisp32(operand_enc);
710 try encoder.disp32(rip.disp);
711 },
712 }
713 }
714
715 fn encodeImm(imm: Immediate, kind: Encoding.Op, encoder: anytype) !void {
716 const raw = imm.asUnsigned(kind.immBitSize());
717 switch (kind.immBitSize()) {
718 8 => try encoder.imm8(@as(u8, @intCast(raw))),
719 16 => try encoder.imm16(@as(u16, @intCast(raw))),
720 32 => try encoder.imm32(@as(u32, @intCast(raw))),
721 64 => try encoder.imm64(raw),
722 else => unreachable,
723 }
724 }
725};
726
727pub const LegacyPrefixes = packed struct {
728 /// LOCK
729 prefix_f0: bool = false,
730 /// REPNZ, REPNE, REP, Scalar Double-precision
731 prefix_f2: bool = false,
732 /// REPZ, REPE, REP, Scalar Single-precision
733 prefix_f3: bool = false,
734
735 /// CS segment override or Branch not taken
736 prefix_2e: bool = false,
737 /// SS segment override
738 prefix_36: bool = false,
739 /// ES segment override
740 prefix_26: bool = false,
741 /// FS segment override
742 prefix_64: bool = false,
743 /// GS segment override
744 prefix_65: bool = false,
745
746 /// Branch taken
747 prefix_3e: bool = false,
748
749 /// Address size override (enables 16 bit address size)
750 prefix_67: bool = false,
751
752 /// Operand size override (enables 16 bit operation)
753 prefix_66: bool = false,
754
755 padding: u5 = 0,
756
757 pub fn setSegmentOverride(self: *LegacyPrefixes, reg: Register) void {
758 assert(reg.isClass(.segment));
759 switch (reg) {
760 .cs => self.prefix_2e = true,
761 .ss => self.prefix_36 = true,
762 .es => self.prefix_26 = true,
763 .fs => self.prefix_64 = true,
764 .gs => self.prefix_65 = true,
765 .ds => {},
766 else => unreachable,
767 }
768 }
769
770 pub fn set16BitOverride(self: *LegacyPrefixes) void {
771 self.prefix_66 = true;
772 }
773};
774
775pub const Options = struct { allow_frame_locs: bool = false, allow_symbols: bool = false };
776
777fn Encoder(comptime opts: Options) type {
778 return struct {
779 w: *Writer,
780
781 const Self = @This();
782 pub const options = opts;
783
784 // --------
785 // Prefixes
786 // --------
787
788 /// Encodes legacy prefixes
789 pub fn legacyPrefixes(self: Self, prefixes: LegacyPrefixes) !void {
790 if (@as(u16, @bitCast(prefixes)) != 0) {
791 // Hopefully this path isn't taken very often, so we'll do it the slow way for now
792
793 // LOCK
794 if (prefixes.prefix_f0) try self.w.writeByte(0xf0);
795 // REPNZ, REPNE, REP, Scalar Double-precision
796 if (prefixes.prefix_f2) try self.w.writeByte(0xf2);
797 // REPZ, REPE, REP, Scalar Single-precision
798 if (prefixes.prefix_f3) try self.w.writeByte(0xf3);
799
800 // CS segment override or Branch not taken
801 if (prefixes.prefix_2e) try self.w.writeByte(0x2e);
802 // DS segment override
803 if (prefixes.prefix_36) try self.w.writeByte(0x36);
804 // ES segment override
805 if (prefixes.prefix_26) try self.w.writeByte(0x26);
806 // FS segment override
807 if (prefixes.prefix_64) try self.w.writeByte(0x64);
808 // GS segment override
809 if (prefixes.prefix_65) try self.w.writeByte(0x65);
810
811 // Branch taken
812 if (prefixes.prefix_3e) try self.w.writeByte(0x3e);
813
814 // Operand size override
815 if (prefixes.prefix_66) try self.w.writeByte(0x66);
816
817 // Address size override
818 if (prefixes.prefix_67) try self.w.writeByte(0x67);
819 }
820 }
821
822 /// Use 16 bit operand size
823 ///
824 /// Note that this flag is overridden by REX.W, if both are present.
825 pub fn prefix16BitMode(self: Self) !void {
826 try self.w.writeByte(0x66);
827 }
828
829 /// Encodes a REX prefix byte given all the fields
830 ///
831 /// Use this byte whenever you need 64 bit operation,
832 /// or one of reg, index, r/m, base, or opcode-reg might be extended.
833 ///
834 /// See struct `Rex` for a description of each field.
835 pub fn rex(self: Self, fields: Rex) !void {
836 if (!fields.present and !fields.isSet()) return;
837
838 var byte: u8 = 0b0100_0000;
839
840 if (fields.w) byte |= 0b1000;
841 if (fields.r) byte |= 0b0100;
842 if (fields.x) byte |= 0b0010;
843 if (fields.b) byte |= 0b0001;
844
845 try self.w.writeByte(byte);
846 }
847
848 /// Encodes a VEX prefix given all the fields
849 ///
850 /// See struct `Vex` for a description of each field.
851 pub fn vex(self: Self, fields: Vex) !void {
852 if (fields.is3Byte()) {
853 try self.w.writeByte(0b1100_0100);
854
855 try self.w.writeByte(
856 @as(u8, ~@intFromBool(fields.r)) << 7 |
857 @as(u8, ~@intFromBool(fields.x)) << 6 |
858 @as(u8, ~@intFromBool(fields.b)) << 5 |
859 @as(u8, @intFromEnum(fields.m)) << 0,
860 );
861
862 try self.w.writeByte(
863 @as(u8, @intFromBool(fields.w)) << 7 |
864 @as(u8, ~@as(u4, @intCast(fields.v.enc()))) << 3 |
865 @as(u8, @intFromBool(fields.l)) << 2 |
866 @as(u8, @intFromEnum(fields.p)) << 0,
867 );
868 } else {
869 try self.w.writeByte(0b1100_0101);
870 try self.w.writeByte(
871 @as(u8, ~@intFromBool(fields.r)) << 7 |
872 @as(u8, ~@as(u4, @intCast(fields.v.enc()))) << 3 |
873 @as(u8, @intFromBool(fields.l)) << 2 |
874 @as(u8, @intFromEnum(fields.p)) << 0,
875 );
876 }
877 }
878
879 // ------
880 // Opcode
881 // ------
882
883 /// Encodes a 1 byte opcode
884 pub fn opcode_1byte(self: Self, opcode: u8) !void {
885 try self.w.writeByte(opcode);
886 }
887
888 /// Encodes a 2 byte opcode
889 ///
890 /// e.g. IMUL has the opcode 0x0f 0xaf, so you use
891 ///
892 /// encoder.opcode_2byte(0x0f, 0xaf);
893 pub fn opcode_2byte(self: Self, prefix: u8, opcode: u8) !void {
894 try self.w.writeAll(&.{ prefix, opcode });
895 }
896
897 /// Encodes a 3 byte opcode
898 ///
899 /// e.g. MOVSD has the opcode 0xf2 0x0f 0x10
900 ///
901 /// encoder.opcode_3byte(0xf2, 0x0f, 0x10);
902 pub fn opcode_3byte(self: Self, prefix_1: u8, prefix_2: u8, opcode: u8) !void {
903 try self.w.writeAll(&.{ prefix_1, prefix_2, opcode });
904 }
905
906 /// Encodes a 1 byte opcode with a reg field
907 ///
908 /// Remember to add a REX prefix byte if reg is extended!
909 pub fn opcode_withReg(self: Self, opcode: u8, reg: u3) !void {
910 assert(opcode & 0b111 == 0);
911 try self.w.writeByte(opcode | reg);
912 }
913
914 // ------
915 // ModR/M
916 // ------
917
918 /// Construct a ModR/M byte given all the fields
919 ///
920 /// Remember to add a REX prefix byte if reg or rm are extended!
921 pub fn modRm(self: Self, mod: u2, reg_or_opx: u3, rm: u3) !void {
922 try self.w.writeByte(@as(u8, mod) << 6 | @as(u8, reg_or_opx) << 3 | rm);
923 }
924
925 /// Construct a ModR/M byte using direct r/m addressing
926 /// r/m effective address: r/m
927 ///
928 /// Note reg's effective address is always just reg for the ModR/M byte.
929 /// Remember to add a REX prefix byte if reg or rm are extended!
930 pub fn modRm_direct(self: Self, reg_or_opx: u3, rm: u3) !void {
931 try self.modRm(0b11, reg_or_opx, rm);
932 }
933
934 /// Construct a ModR/M byte using indirect r/m addressing
935 /// r/m effective address: [r/m]
936 ///
937 /// Note reg's effective address is always just reg for the ModR/M byte.
938 /// Remember to add a REX prefix byte if reg or rm are extended!
939 pub fn modRm_indirectDisp0(self: Self, reg_or_opx: u3, rm: u3) !void {
940 assert(rm != 4 and rm != 5);
941 try self.modRm(0b00, reg_or_opx, rm);
942 }
943
944 /// Construct a ModR/M byte using indirect SIB addressing
945 /// r/m effective address: [SIB]
946 ///
947 /// Note reg's effective address is always just reg for the ModR/M byte.
948 /// Remember to add a REX prefix byte if reg or rm are extended!
949 pub fn modRm_SIBDisp0(self: Self, reg_or_opx: u3) !void {
950 try self.modRm(0b00, reg_or_opx, 0b100);
951 }
952
953 /// Construct a ModR/M byte using RIP-relative addressing
954 /// r/m effective address: [RIP + disp32]
955 ///
956 /// Note reg's effective address is always just reg for the ModR/M byte.
957 /// Remember to add a REX prefix byte if reg or rm are extended!
958 pub fn modRm_RIPDisp32(self: Self, reg_or_opx: u3) !void {
959 try self.modRm(0b00, reg_or_opx, 0b101);
960 }
961
962 /// Construct a ModR/M byte using indirect r/m with a 8bit displacement
963 /// r/m effective address: [r/m + disp8]
964 ///
965 /// Note reg's effective address is always just reg for the ModR/M byte.
966 /// Remember to add a REX prefix byte if reg or rm are extended!
967 pub fn modRm_indirectDisp8(self: Self, reg_or_opx: u3, rm: u3) !void {
968 assert(rm != 4);
969 try self.modRm(0b01, reg_or_opx, rm);
970 }
971
972 /// Construct a ModR/M byte using indirect SIB with a 8bit displacement
973 /// r/m effective address: [SIB + disp8]
974 ///
975 /// Note reg's effective address is always just reg for the ModR/M byte.
976 /// Remember to add a REX prefix byte if reg or rm are extended!
977 pub fn modRm_SIBDisp8(self: Self, reg_or_opx: u3) !void {
978 try self.modRm(0b01, reg_or_opx, 0b100);
979 }
980
981 /// Construct a ModR/M byte using indirect r/m with a 32bit displacement
982 /// r/m effective address: [r/m + disp32]
983 ///
984 /// Note reg's effective address is always just reg for the ModR/M byte.
985 /// Remember to add a REX prefix byte if reg or rm are extended!
986 pub fn modRm_indirectDisp32(self: Self, reg_or_opx: u3, rm: u3) !void {
987 assert(rm != 4);
988 try self.modRm(0b10, reg_or_opx, rm);
989 }
990
991 /// Construct a ModR/M byte using indirect SIB with a 32bit displacement
992 /// r/m effective address: [SIB + disp32]
993 ///
994 /// Note reg's effective address is always just reg for the ModR/M byte.
995 /// Remember to add a REX prefix byte if reg or rm are extended!
996 pub fn modRm_SIBDisp32(self: Self, reg_or_opx: u3) !void {
997 try self.modRm(0b10, reg_or_opx, 0b100);
998 }
999
1000 // ---
1001 // SIB
1002 // ---
1003
1004 /// Construct a SIB byte given all the fields
1005 ///
1006 /// Remember to add a REX prefix byte if index or base are extended!
1007 pub fn sib(self: Self, scale: u2, index: u3, base: u3) !void {
1008 try self.w.writeByte(@as(u8, scale) << 6 | @as(u8, index) << 3 | base);
1009 }
1010
1011 /// Construct a SIB byte with scale * index + base, no frills.
1012 /// r/m effective address: [base + scale * index]
1013 ///
1014 /// Remember to add a REX prefix byte if index or base are extended!
1015 pub fn sib_scaleIndexBase(self: Self, scale: u2, index: u3, base: u3) !void {
1016 assert(base != 5);
1017
1018 try self.sib(scale, index, base);
1019 }
1020
1021 /// Construct a SIB byte with scale * index + disp32
1022 /// r/m effective address: [scale * index + disp32]
1023 ///
1024 /// Remember to add a REX prefix byte if index or base are extended!
1025 pub fn sib_scaleIndexDisp32(self: Self, scale: u2, index: u3) !void {
1026 // scale is actually ignored
1027 // index = 4 means no index if and only if we haven't extended the register
1028 // TODO enforce this
1029 // base = 5 means no base, if mod == 0.
1030 try self.sib(scale, index, 5);
1031 }
1032
1033 /// Construct a SIB byte with just base
1034 /// r/m effective address: [base]
1035 ///
1036 /// Remember to add a REX prefix byte if index or base are extended!
1037 pub fn sib_base(self: Self, base: u3) !void {
1038 assert(base != 5);
1039
1040 // scale is actually ignored
1041 // index = 4 means no index
1042 try self.sib(0, 4, base);
1043 }
1044
1045 /// Construct a SIB byte with just disp32
1046 /// r/m effective address: [disp32]
1047 ///
1048 /// Remember to add a REX prefix byte if index or base are extended!
1049 pub fn sib_disp32(self: Self) !void {
1050 // scale is actually ignored
1051 // index = 4 means no index
1052 // base = 5 means no base, if mod == 0.
1053 try self.sib(0, 4, 5);
1054 }
1055
1056 /// Construct a SIB byte with scale * index + base + disp8
1057 /// r/m effective address: [base + scale * index + disp8]
1058 ///
1059 /// Remember to add a REX prefix byte if index or base are extended!
1060 pub fn sib_scaleIndexBaseDisp8(self: Self, scale: u2, index: u3, base: u3) !void {
1061 try self.sib(scale, index, base);
1062 }
1063
1064 /// Construct a SIB byte with base + disp8, no index
1065 /// r/m effective address: [base + disp8]
1066 ///
1067 /// Remember to add a REX prefix byte if index or base are extended!
1068 pub fn sib_baseDisp8(self: Self, base: u3) !void {
1069 // scale is ignored
1070 // index = 4 means no index
1071 try self.sib(0, 4, base);
1072 }
1073
1074 /// Construct a SIB byte with scale * index + base + disp32
1075 /// r/m effective address: [base + scale * index + disp32]
1076 ///
1077 /// Remember to add a REX prefix byte if index or base are extended!
1078 pub fn sib_scaleIndexBaseDisp32(self: Self, scale: u2, index: u3, base: u3) !void {
1079 try self.sib(scale, index, base);
1080 }
1081
1082 /// Construct a SIB byte with base + disp32, no index
1083 /// r/m effective address: [base + disp32]
1084 ///
1085 /// Remember to add a REX prefix byte if index or base are extended!
1086 pub fn sib_baseDisp32(self: Self, base: u3) !void {
1087 // scale is ignored
1088 // index = 4 means no index
1089 try self.sib(0, 4, base);
1090 }
1091
1092 // -------------------------
1093 // Trivial (no bit fiddling)
1094 // -------------------------
1095
1096 /// Encode an 8 bit displacement
1097 ///
1098 /// It is sign-extended to 64 bits by the cpu.
1099 pub fn disp8(self: Self, disp: i8) !void {
1100 try self.w.writeByte(@as(u8, @bitCast(disp)));
1101 }
1102
1103 /// Encode an 32 bit displacement
1104 ///
1105 /// It is sign-extended to 64 bits by the cpu.
1106 pub fn disp32(self: Self, disp: i32) !void {
1107 try self.w.writeInt(i32, disp, .little);
1108 }
1109
1110 /// Encode an 8 bit immediate
1111 ///
1112 /// It is sign-extended to 64 bits by the cpu.
1113 pub fn imm8(self: Self, imm: u8) !void {
1114 try self.w.writeByte(imm);
1115 }
1116
1117 /// Encode an 16 bit immediate
1118 ///
1119 /// It is sign-extended to 64 bits by the cpu.
1120 pub fn imm16(self: Self, imm: u16) !void {
1121 try self.w.writeInt(u16, imm, .little);
1122 }
1123
1124 /// Encode an 32 bit immediate
1125 ///
1126 /// It is sign-extended to 64 bits by the cpu.
1127 pub fn imm32(self: Self, imm: u32) !void {
1128 try self.w.writeInt(u32, imm, .little);
1129 }
1130
1131 /// Encode an 64 bit immediate
1132 ///
1133 /// It is sign-extended to 64 bits by the cpu.
1134 pub fn imm64(self: Self, imm: u64) !void {
1135 try self.w.writeInt(u64, imm, .little);
1136 }
1137 };
1138}
1139
1140pub const Rex = struct {
1141 w: bool = false,
1142 r: bool = false,
1143 x: bool = false,
1144 b: bool = false,
1145 present: bool = false,
1146
1147 pub fn isSet(rex: Rex) bool {
1148 return rex.w or rex.r or rex.x or rex.b;
1149 }
1150};
1151
1152pub const Vex = struct {
1153 w: bool = false,
1154 r: bool = false,
1155 x: bool = false,
1156 b: bool = false,
1157 l: bool = false,
1158 p: enum(u2) {
1159 none = 0b00,
1160 @"66" = 0b01,
1161 f3 = 0b10,
1162 f2 = 0b11,
1163 } = .none,
1164 m: enum(u5) {
1165 @"0f" = 0b0_0001,
1166 @"0f38" = 0b0_0010,
1167 @"0f3a" = 0b0_0011,
1168 _,
1169 } = .@"0f",
1170 v: Register = .ymm0,
1171
1172 pub fn is3Byte(vex: Vex) bool {
1173 return vex.w or vex.x or vex.b or vex.m != .@"0f";
1174 }
1175};
1176
1177// Tests
1178fn expectEqualHexStrings(expected: []const u8, given: []const u8, assembly: []const u8) !void {
1179 assert(expected.len > 0);
1180 if (std.mem.eql(u8, expected, given)) return;
1181 const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{expected});
1182 defer testing.allocator.free(expected_fmt);
1183 const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{given});
1184 defer testing.allocator.free(given_fmt);
1185 const idx = std.mem.indexOfDiff(u8, expected_fmt, given_fmt).?;
1186 const padding = try testing.allocator.alloc(u8, idx + 5);
1187 defer testing.allocator.free(padding);
1188 @memset(padding, ' ');
1189 std.debug.print("\nASM: {s}\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{
1190 assembly,
1191 expected_fmt,
1192 given_fmt,
1193 padding,
1194 });
1195 return error.TestFailed;
1196}
1197
1198const TestEncode = struct {
1199 buffer: [32]u8 = undefined,
1200 index: usize = 0,
1201
1202 fn encode(
1203 enc: *TestEncode,
1204 mnemonic: Instruction.Mnemonic,
1205 ops: []const Instruction.Operand,
1206 ) !void {
1207 var writer: std.Io.Writer = .fixed(&enc.buffer);
1208 const inst: Instruction = try .new(.none, mnemonic, ops);
1209 try inst.encode(&writer, .{});
1210 enc.index = writer.bufferedLen();
1211 }
1212
1213 fn code(enc: TestEncode) []const u8 {
1214 return enc.buffer[0..enc.index];
1215 }
1216};
1217
1218test "encode" {
1219 var buf = std.array_list.Managed(u8).init(testing.allocator);
1220 defer buf.deinit();
1221
1222 const inst: Instruction = try .new(.none, .mov, &.{
1223 .{ .reg = .rbx },
1224 .{ .imm = .u(4) },
1225 });
1226 try inst.encode(buf.writer(), .{});
1227 try testing.expectEqualSlices(u8, &.{ 0x48, 0xc7, 0xc3, 0x4, 0x0, 0x0, 0x0 }, buf.items);
1228}
1229
1230test "lower I encoding" {
1231 var enc = TestEncode{};
1232
1233 try enc.encode(.push, &.{
1234 .{ .imm = .u(0x10) },
1235 });
1236 try expectEqualHexStrings("\x6A\x10", enc.code(), "push 0x10");
1237
1238 try enc.encode(.push, &.{
1239 .{ .imm = .u(0x1000) },
1240 });
1241 try expectEqualHexStrings("\x66\x68\x00\x10", enc.code(), "push 0x1000");
1242
1243 try enc.encode(.push, &.{
1244 .{ .imm = .u(0x10000000) },
1245 });
1246 try expectEqualHexStrings("\x68\x00\x00\x00\x10", enc.code(), "push 0x10000000");
1247
1248 try enc.encode(.adc, &.{
1249 .{ .reg = .rax },
1250 .{ .imm = .u(0x10000000) },
1251 });
1252 try expectEqualHexStrings("\x48\x15\x00\x00\x00\x10", enc.code(), "adc rax, 0x10000000");
1253
1254 try enc.encode(.add, &.{
1255 .{ .reg = .al },
1256 .{ .imm = .u(0x10) },
1257 });
1258 try expectEqualHexStrings("\x04\x10", enc.code(), "add al, 0x10");
1259
1260 try enc.encode(.add, &.{
1261 .{ .reg = .rax },
1262 .{ .imm = .u(0x10) },
1263 });
1264 try expectEqualHexStrings("\x48\x83\xC0\x10", enc.code(), "add rax, 0x10");
1265
1266 try enc.encode(.sbb, &.{
1267 .{ .reg = .ax },
1268 .{ .imm = .u(0x10) },
1269 });
1270 try expectEqualHexStrings("\x66\x1D\x10\x00", enc.code(), "sbb ax, 0x10");
1271
1272 try enc.encode(.xor, &.{
1273 .{ .reg = .al },
1274 .{ .imm = .u(0x10) },
1275 });
1276 try expectEqualHexStrings("\x34\x10", enc.code(), "xor al, 0x10");
1277}
1278
1279test "lower MI encoding" {
1280 var enc = TestEncode{};
1281
1282 try enc.encode(.mov, &.{
1283 .{ .reg = .r12 },
1284 .{ .imm = .u(0x1000) },
1285 });
1286 try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000");
1287
1288 try enc.encode(.mov, &.{
1289 .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .r12 } }) },
1290 .{ .imm = .u(0x10) },
1291 });
1292 try expectEqualHexStrings("\x41\xC6\x04\x24\x10", enc.code(), "mov BYTE PTR [r12], 0x10");
1293
1294 try enc.encode(.mov, &.{
1295 .{ .reg = .r12 },
1296 .{ .imm = .u(0x1000) },
1297 });
1298 try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000");
1299
1300 try enc.encode(.mov, &.{
1301 .{ .reg = .r12 },
1302 .{ .imm = .u(0x1000) },
1303 });
1304 try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000");
1305
1306 try enc.encode(.mov, &.{
1307 .{ .reg = .rax },
1308 .{ .imm = .u(0x10) },
1309 });
1310 try expectEqualHexStrings("\x48\xc7\xc0\x10\x00\x00\x00", enc.code(), "mov rax, 0x10");
1311
1312 try enc.encode(.mov, &.{
1313 .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .r11 } }) },
1314 .{ .imm = .u(0x10) },
1315 });
1316 try expectEqualHexStrings("\x41\xc7\x03\x10\x00\x00\x00", enc.code(), "mov DWORD PTR [r11], 0x10");
1317
1318 try enc.encode(.mov, &.{
1319 .{ .mem = Instruction.Memory.initRip(.qword, 0x10) },
1320 .{ .imm = .u(0x10) },
1321 });
1322 try expectEqualHexStrings(
1323 "\x48\xC7\x05\x10\x00\x00\x00\x10\x00\x00\x00",
1324 enc.code(),
1325 "mov QWORD PTR [rip + 0x10], 0x10",
1326 );
1327
1328 try enc.encode(.mov, &.{
1329 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .rbp }, .disp = -8 }) },
1330 .{ .imm = .u(0x10) },
1331 });
1332 try expectEqualHexStrings("\x48\xc7\x45\xf8\x10\x00\x00\x00", enc.code(), "mov QWORD PTR [rbp - 8], 0x10");
1333
1334 try enc.encode(.mov, &.{
1335 .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp }, .disp = -2 }) },
1336 .{ .imm = .s(-16) },
1337 });
1338 try expectEqualHexStrings("\x66\xC7\x45\xFE\xF0\xFF", enc.code(), "mov WORD PTR [rbp - 2], -16");
1339
1340 try enc.encode(.mov, &.{
1341 .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .rbp }, .disp = -1 }) },
1342 .{ .imm = .u(0x10) },
1343 });
1344 try expectEqualHexStrings("\xC6\x45\xFF\x10", enc.code(), "mov BYTE PTR [rbp - 1], 0x10");
1345
1346 try enc.encode(.mov, &.{
1347 .{ .mem = Instruction.Memory.initSib(.qword, .{
1348 .base = .{ .reg = .ds },
1349 .disp = 0x10000000,
1350 .scale_index = .{ .scale = 2, .index = .rcx },
1351 }) },
1352 .{ .imm = .u(0x10) },
1353 });
1354 try expectEqualHexStrings(
1355 "\x48\xC7\x04\x4D\x00\x00\x00\x10\x10\x00\x00\x00",
1356 enc.code(),
1357 "mov QWORD PTR [rcx*2 + 0x10000000], 0x10",
1358 );
1359
1360 try enc.encode(.adc, &.{
1361 .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .rbp }, .disp = -0x10 }) },
1362 .{ .imm = .u(0x10) },
1363 });
1364 try expectEqualHexStrings("\x80\x55\xF0\x10", enc.code(), "adc BYTE PTR [rbp - 0x10], 0x10");
1365
1366 try enc.encode(.adc, &.{
1367 .{ .mem = Instruction.Memory.initRip(.qword, 0) },
1368 .{ .imm = .u(0x10) },
1369 });
1370 try expectEqualHexStrings("\x48\x83\x15\x00\x00\x00\x00\x10", enc.code(), "adc QWORD PTR [rip], 0x10");
1371
1372 try enc.encode(.adc, &.{
1373 .{ .reg = .rax },
1374 .{ .imm = .u(0x10) },
1375 });
1376 try expectEqualHexStrings("\x48\x83\xD0\x10", enc.code(), "adc rax, 0x10");
1377
1378 try enc.encode(.add, &.{
1379 .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .rdx }, .disp = -8 }) },
1380 .{ .imm = .u(0x10) },
1381 });
1382 try expectEqualHexStrings("\x83\x42\xF8\x10", enc.code(), "add DWORD PTR [rdx - 8], 0x10");
1383
1384 try enc.encode(.add, &.{
1385 .{ .reg = .rax },
1386 .{ .imm = .u(0x10) },
1387 });
1388 try expectEqualHexStrings("\x48\x83\xC0\x10", enc.code(), "add rax, 0x10");
1389
1390 try enc.encode(.add, &.{
1391 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .rbp }, .disp = -0x10 }) },
1392 .{ .imm = .s(-0x10) },
1393 });
1394 try expectEqualHexStrings("\x48\x83\x45\xF0\xF0", enc.code(), "add QWORD PTR [rbp - 0x10], -0x10");
1395
1396 try enc.encode(.@"and", &.{
1397 .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .ds }, .disp = 0x10000000 }) },
1398 .{ .imm = .u(0x10) },
1399 });
1400 try expectEqualHexStrings(
1401 "\x83\x24\x25\x00\x00\x00\x10\x10",
1402 enc.code(),
1403 "and DWORD PTR ds:0x10000000, 0x10",
1404 );
1405
1406 try enc.encode(.@"and", &.{
1407 .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .es }, .disp = 0x10000000 }) },
1408 .{ .imm = .u(0x10) },
1409 });
1410 try expectEqualHexStrings(
1411 "\x26\x83\x24\x25\x00\x00\x00\x10\x10",
1412 enc.code(),
1413 "and DWORD PTR es:0x10000000, 0x10",
1414 );
1415
1416 try enc.encode(.@"and", &.{
1417 .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .r12 }, .disp = 0x10000000 }) },
1418 .{ .imm = .u(0x10) },
1419 });
1420 try expectEqualHexStrings(
1421 "\x41\x83\xA4\x24\x00\x00\x00\x10\x10",
1422 enc.code(),
1423 "and DWORD PTR [r12 + 0x10000000], 0x10",
1424 );
1425
1426 try enc.encode(.sub, &.{
1427 .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .r11 }, .disp = 0x10000000 }) },
1428 .{ .imm = .u(0x10) },
1429 });
1430 try expectEqualHexStrings(
1431 "\x41\x83\xAB\x00\x00\x00\x10\x10",
1432 enc.code(),
1433 "sub DWORD PTR [r11 + 0x10000000], 0x10",
1434 );
1435}
1436
1437test "lower RM encoding" {
1438 var enc = TestEncode{};
1439
1440 try enc.encode(.mov, &.{
1441 .{ .reg = .rax },
1442 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .r11 } }) },
1443 });
1444 try expectEqualHexStrings("\x49\x8b\x03", enc.code(), "mov rax, QWORD PTR [r11]");
1445
1446 try enc.encode(.mov, &.{
1447 .{ .reg = .rbx },
1448 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .ds }, .disp = 0x10 }) },
1449 });
1450 try expectEqualHexStrings("\x48\x8B\x1C\x25\x10\x00\x00\x00", enc.code(), "mov rbx, QWORD PTR ds:0x10");
1451
1452 try enc.encode(.mov, &.{
1453 .{ .reg = .rax },
1454 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .rbp }, .disp = -4 }) },
1455 });
1456 try expectEqualHexStrings("\x48\x8B\x45\xFC", enc.code(), "mov rax, QWORD PTR [rbp - 4]");
1457
1458 try enc.encode(.mov, &.{
1459 .{ .reg = .rax },
1460 .{ .mem = Instruction.Memory.initSib(.qword, .{
1461 .base = .{ .reg = .rbp },
1462 .scale_index = .{ .scale = 1, .index = .rcx },
1463 .disp = -8,
1464 }) },
1465 });
1466 try expectEqualHexStrings("\x48\x8B\x44\x0D\xF8", enc.code(), "mov rax, QWORD PTR [rbp + rcx*1 - 8]");
1467
1468 try enc.encode(.mov, &.{
1469 .{ .reg = .eax },
1470 .{ .mem = Instruction.Memory.initSib(.dword, .{
1471 .base = .{ .reg = .rbp },
1472 .scale_index = .{ .scale = 4, .index = .rdx },
1473 .disp = -4,
1474 }) },
1475 });
1476 try expectEqualHexStrings("\x8B\x44\x95\xFC", enc.code(), "mov eax, dword ptr [rbp + rdx*4 - 4]");
1477
1478 try enc.encode(.mov, &.{
1479 .{ .reg = .rax },
1480 .{ .mem = Instruction.Memory.initSib(.qword, .{
1481 .base = .{ .reg = .rbp },
1482 .scale_index = .{ .scale = 8, .index = .rcx },
1483 .disp = -8,
1484 }) },
1485 });
1486 try expectEqualHexStrings("\x48\x8B\x44\xCD\xF8", enc.code(), "mov rax, QWORD PTR [rbp + rcx*8 - 8]");
1487
1488 try enc.encode(.mov, &.{
1489 .{ .reg = .r8b },
1490 .{ .mem = Instruction.Memory.initSib(.byte, .{
1491 .base = .{ .reg = .rsi },
1492 .scale_index = .{ .scale = 1, .index = .rcx },
1493 .disp = -24,
1494 }) },
1495 });
1496 try expectEqualHexStrings("\x44\x8A\x44\x0E\xE8", enc.code(), "mov r8b, BYTE PTR [rsi + rcx*1 - 24]");
1497
1498 // TODO this mnemonic needs cleanup as some prefixes are obsolete.
1499 try enc.encode(.mov, &.{
1500 .{ .reg = .rax },
1501 .{ .reg = .cs },
1502 });
1503 try expectEqualHexStrings("\x48\x8C\xC8", enc.code(), "mov rax, cs");
1504
1505 try enc.encode(.mov, &.{
1506 .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp }, .disp = -16 }) },
1507 .{ .reg = .fs },
1508 });
1509 try expectEqualHexStrings("\x8C\x65\xF0", enc.code(), "mov WORD PTR [rbp - 16], fs");
1510
1511 try enc.encode(.mov, &.{
1512 .{ .reg = .r12w },
1513 .{ .reg = .cs },
1514 });
1515 try expectEqualHexStrings("\x66\x41\x8C\xCC", enc.code(), "mov r12w, cs");
1516
1517 try enc.encode(.movsx, &.{
1518 .{ .reg = .eax },
1519 .{ .reg = .bx },
1520 });
1521 try expectEqualHexStrings("\x0F\xBF\xC3", enc.code(), "movsx eax, bx");
1522
1523 try enc.encode(.movsx, &.{
1524 .{ .reg = .eax },
1525 .{ .reg = .bl },
1526 });
1527 try expectEqualHexStrings("\x0F\xBE\xC3", enc.code(), "movsx eax, bl");
1528
1529 try enc.encode(.movsx, &.{
1530 .{ .reg = .ax },
1531 .{ .reg = .bl },
1532 });
1533 try expectEqualHexStrings("\x66\x0F\xBE\xC3", enc.code(), "movsx ax, bl");
1534
1535 try enc.encode(.movsx, &.{
1536 .{ .reg = .eax },
1537 .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp } }) },
1538 });
1539 try expectEqualHexStrings("\x0F\xBF\x45\x00", enc.code(), "movsx eax, BYTE PTR [rbp]");
1540
1541 try enc.encode(.movsx, &.{
1542 .{ .reg = .eax },
1543 .{ .mem = Instruction.Memory.initSib(.byte, .{ .scale_index = .{ .index = .rax, .scale = 2 } }) },
1544 });
1545 try expectEqualHexStrings("\x0F\xBE\x04\x45\x00\x00\x00\x00", enc.code(), "movsx eax, BYTE PTR [rax * 2]");
1546
1547 try enc.encode(.movsx, &.{
1548 .{ .reg = .ax },
1549 .{ .mem = Instruction.Memory.initRip(.byte, 0x10) },
1550 });
1551 try expectEqualHexStrings("\x66\x0F\xBE\x05\x10\x00\x00\x00", enc.code(), "movsx ax, BYTE PTR [rip + 0x10]");
1552
1553 try enc.encode(.movsx, &.{
1554 .{ .reg = .rax },
1555 .{ .reg = .bx },
1556 });
1557 try expectEqualHexStrings("\x48\x0F\xBF\xC3", enc.code(), "movsx rax, bx");
1558
1559 try enc.encode(.movsxd, &.{
1560 .{ .reg = .rax },
1561 .{ .reg = .ebx },
1562 });
1563 try expectEqualHexStrings("\x48\x63\xC3", enc.code(), "movsxd rax, ebx");
1564
1565 try enc.encode(.lea, &.{
1566 .{ .reg = .rax },
1567 .{ .mem = Instruction.Memory.initRip(.qword, 0x10) },
1568 });
1569 try expectEqualHexStrings("\x48\x8D\x05\x10\x00\x00\x00", enc.code(), "lea rax, QWORD PTR [rip + 0x10]");
1570
1571 try enc.encode(.lea, &.{
1572 .{ .reg = .rax },
1573 .{ .mem = Instruction.Memory.initRip(.dword, 0x10) },
1574 });
1575 try expectEqualHexStrings("\x48\x8D\x05\x10\x00\x00\x00", enc.code(), "lea rax, DWORD PTR [rip + 0x10]");
1576
1577 try enc.encode(.lea, &.{
1578 .{ .reg = .eax },
1579 .{ .mem = Instruction.Memory.initRip(.dword, 0x10) },
1580 });
1581 try expectEqualHexStrings("\x8D\x05\x10\x00\x00\x00", enc.code(), "lea eax, DWORD PTR [rip + 0x10]");
1582
1583 try enc.encode(.lea, &.{
1584 .{ .reg = .eax },
1585 .{ .mem = Instruction.Memory.initRip(.word, 0x10) },
1586 });
1587 try expectEqualHexStrings("\x8D\x05\x10\x00\x00\x00", enc.code(), "lea eax, WORD PTR [rip + 0x10]");
1588
1589 try enc.encode(.lea, &.{
1590 .{ .reg = .ax },
1591 .{ .mem = Instruction.Memory.initRip(.byte, 0x10) },
1592 });
1593 try expectEqualHexStrings("\x66\x8D\x05\x10\x00\x00\x00", enc.code(), "lea ax, BYTE PTR [rip + 0x10]");
1594
1595 try enc.encode(.lea, &.{
1596 .{ .reg = .rsi },
1597 .{ .mem = Instruction.Memory.initSib(.qword, .{
1598 .base = .{ .reg = .rbp },
1599 .scale_index = .{ .scale = 1, .index = .rcx },
1600 }) },
1601 });
1602 try expectEqualHexStrings("\x48\x8D\x74\x0D\x00", enc.code(), "lea rsi, QWORD PTR [rbp + rcx*1 + 0]");
1603
1604 try enc.encode(.add, &.{
1605 .{ .reg = .r11 },
1606 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .ds }, .disp = 0x10000000 }) },
1607 });
1608 try expectEqualHexStrings("\x4C\x03\x1C\x25\x00\x00\x00\x10", enc.code(), "add r11, QWORD PTR ds:0x10000000");
1609
1610 try enc.encode(.add, &.{
1611 .{ .reg = .r12b },
1612 .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .ds }, .disp = 0x10000000 }) },
1613 });
1614 try expectEqualHexStrings("\x44\x02\x24\x25\x00\x00\x00\x10", enc.code(), "add r11b, BYTE PTR ds:0x10000000");
1615
1616 try enc.encode(.add, &.{
1617 .{ .reg = .r12b },
1618 .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .fs }, .disp = 0x10000000 }) },
1619 });
1620 try expectEqualHexStrings("\x64\x44\x02\x24\x25\x00\x00\x00\x10", enc.code(), "add r11b, BYTE PTR fs:0x10000000");
1621
1622 try enc.encode(.sub, &.{
1623 .{ .reg = .r11 },
1624 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .r13 }, .disp = 0x10000000 }) },
1625 });
1626 try expectEqualHexStrings("\x4D\x2B\x9D\x00\x00\x00\x10", enc.code(), "sub r11, QWORD PTR [r13 + 0x10000000]");
1627
1628 try enc.encode(.sub, &.{
1629 .{ .reg = .r11 },
1630 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .r12 }, .disp = 0x10000000 }) },
1631 });
1632 try expectEqualHexStrings("\x4D\x2B\x9C\x24\x00\x00\x00\x10", enc.code(), "sub r11, QWORD PTR [r12 + 0x10000000]");
1633
1634 try enc.encode(.imul, &.{
1635 .{ .reg = .r11 },
1636 .{ .reg = .r12 },
1637 });
1638 try expectEqualHexStrings("\x4D\x0F\xAF\xDC", enc.code(), "mov r11, r12");
1639}
1640
1641test "lower RMI encoding" {
1642 var enc = TestEncode{};
1643
1644 try enc.encode(.imul, &.{
1645 .{ .reg = .r11 },
1646 .{ .reg = .r12 },
1647 .{ .imm = .s(-2) },
1648 });
1649 try expectEqualHexStrings("\x4D\x6B\xDC\xFE", enc.code(), "imul r11, r12, -2");
1650
1651 try enc.encode(.imul, &.{
1652 .{ .reg = .r11 },
1653 .{ .mem = Instruction.Memory.initRip(.qword, -16) },
1654 .{ .imm = .s(-1024) },
1655 });
1656 try expectEqualHexStrings(
1657 "\x4C\x69\x1D\xF0\xFF\xFF\xFF\x00\xFC\xFF\xFF",
1658 enc.code(),
1659 "imul r11, QWORD PTR [rip - 16], -1024",
1660 );
1661
1662 try enc.encode(.imul, &.{
1663 .{ .reg = .bx },
1664 .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp }, .disp = -16 }) },
1665 .{ .imm = .s(-1024) },
1666 });
1667 try expectEqualHexStrings(
1668 "\x66\x69\x5D\xF0\x00\xFC",
1669 enc.code(),
1670 "imul bx, WORD PTR [rbp - 16], -1024",
1671 );
1672
1673 try enc.encode(.imul, &.{
1674 .{ .reg = .bx },
1675 .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp }, .disp = -16 }) },
1676 .{ .imm = .u(1024) },
1677 });
1678 try expectEqualHexStrings(
1679 "\x66\x69\x5D\xF0\x00\x04",
1680 enc.code(),
1681 "imul bx, WORD PTR [rbp - 16], 1024",
1682 );
1683}
1684
1685test "lower MR encoding" {
1686 var enc = TestEncode{};
1687
1688 try enc.encode(.mov, &.{
1689 .{ .reg = .rax },
1690 .{ .reg = .rbx },
1691 });
1692 try expectEqualHexStrings("\x48\x89\xD8", enc.code(), "mov rax, rbx");
1693
1694 try enc.encode(.mov, &.{
1695 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .rbp }, .disp = -4 }) },
1696 .{ .reg = .r11 },
1697 });
1698 try expectEqualHexStrings("\x4c\x89\x5d\xfc", enc.code(), "mov QWORD PTR [rbp - 4], r11");
1699
1700 try enc.encode(.mov, &.{
1701 .{ .mem = Instruction.Memory.initRip(.qword, 0x10) },
1702 .{ .reg = .r12 },
1703 });
1704 try expectEqualHexStrings("\x4C\x89\x25\x10\x00\x00\x00", enc.code(), "mov QWORD PTR [rip + 0x10], r12");
1705
1706 try enc.encode(.mov, &.{
1707 .{ .mem = Instruction.Memory.initSib(.qword, .{
1708 .base = .{ .reg = .r11 },
1709 .scale_index = .{ .scale = 2, .index = .r12 },
1710 .disp = 0x10,
1711 }) },
1712 .{ .reg = .r13 },
1713 });
1714 try expectEqualHexStrings("\x4F\x89\x6C\x63\x10", enc.code(), "mov QWORD PTR [r11 + 2 * r12 + 0x10], r13");
1715
1716 try enc.encode(.mov, &.{
1717 .{ .mem = Instruction.Memory.initRip(.word, -0x10) },
1718 .{ .reg = .r12w },
1719 });
1720 try expectEqualHexStrings("\x66\x44\x89\x25\xF0\xFF\xFF\xFF", enc.code(), "mov WORD PTR [rip - 0x10], r12w");
1721
1722 try enc.encode(.mov, &.{
1723 .{ .mem = Instruction.Memory.initSib(.byte, .{
1724 .base = .{ .reg = .r11 },
1725 .scale_index = .{ .scale = 2, .index = .r12 },
1726 .disp = 0x10,
1727 }) },
1728 .{ .reg = .r13b },
1729 });
1730 try expectEqualHexStrings("\x47\x88\x6C\x63\x10", enc.code(), "mov BYTE PTR [r11 + 2 * r12 + 0x10], r13b");
1731
1732 try enc.encode(.add, &.{
1733 .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .ds }, .disp = 0x10000000 }) },
1734 .{ .reg = .r12b },
1735 });
1736 try expectEqualHexStrings("\x44\x00\x24\x25\x00\x00\x00\x10", enc.code(), "add BYTE PTR ds:0x10000000, r12b");
1737
1738 try enc.encode(.add, &.{
1739 .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .ds }, .disp = 0x10000000 }) },
1740 .{ .reg = .r12d },
1741 });
1742 try expectEqualHexStrings("\x44\x01\x24\x25\x00\x00\x00\x10", enc.code(), "add DWORD PTR [ds:0x10000000], r12d");
1743
1744 try enc.encode(.add, &.{
1745 .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .gs }, .disp = 0x10000000 }) },
1746 .{ .reg = .r12d },
1747 });
1748 try expectEqualHexStrings("\x65\x44\x01\x24\x25\x00\x00\x00\x10", enc.code(), "add DWORD PTR [gs:0x10000000], r12d");
1749
1750 try enc.encode(.sub, &.{
1751 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .r11 }, .disp = 0x10000000 }) },
1752 .{ .reg = .r12 },
1753 });
1754 try expectEqualHexStrings("\x4D\x29\xA3\x00\x00\x00\x10", enc.code(), "sub QWORD PTR [r11 + 0x10000000], r12");
1755}
1756
1757test "lower M encoding" {
1758 var enc = TestEncode{};
1759
1760 try enc.encode(.call, &.{
1761 .{ .reg = .r12 },
1762 });
1763 try expectEqualHexStrings("\x41\xFF\xD4", enc.code(), "call r12");
1764
1765 try enc.encode(.call, &.{
1766 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .r12 } }) },
1767 });
1768 try expectEqualHexStrings("\x41\xFF\x14\x24", enc.code(), "call QWORD PTR [r12]");
1769
1770 try enc.encode(.call, &.{
1771 .{ .mem = Instruction.Memory.initSib(.qword, .{
1772 .base = .none,
1773 .scale_index = .{ .index = .r11, .scale = 2 },
1774 }) },
1775 });
1776 try expectEqualHexStrings("\x42\xFF\x14\x5D\x00\x00\x00\x00", enc.code(), "call QWORD PTR [r11 * 2]");
1777
1778 try enc.encode(.call, &.{
1779 .{ .mem = Instruction.Memory.initSib(.qword, .{
1780 .base = .none,
1781 .scale_index = .{ .index = .r12, .scale = 2 },
1782 }) },
1783 });
1784 try expectEqualHexStrings("\x42\xFF\x14\x65\x00\x00\x00\x00", enc.code(), "call QWORD PTR [r12 * 2]");
1785
1786 try enc.encode(.call, &.{
1787 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .gs } }) },
1788 });
1789 try expectEqualHexStrings("\x65\xFF\x14\x25\x00\x00\x00\x00", enc.code(), "call gs:0x0");
1790
1791 try enc.encode(.call, &.{
1792 .{ .imm = .s(0) },
1793 });
1794 try expectEqualHexStrings("\xE8\x00\x00\x00\x00", enc.code(), "call 0x0");
1795
1796 try enc.encode(.push, &.{
1797 .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .rbp } }) },
1798 });
1799 try expectEqualHexStrings("\xFF\x75\x00", enc.code(), "push QWORD PTR [rbp]");
1800
1801 try enc.encode(.push, &.{
1802 .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp } }) },
1803 });
1804 try expectEqualHexStrings("\x66\xFF\x75\x00", enc.code(), "push QWORD PTR [rbp]");
1805
1806 try enc.encode(.pop, &.{
1807 .{ .mem = Instruction.Memory.initRip(.qword, 0) },
1808 });
1809 try expectEqualHexStrings("\x8F\x05\x00\x00\x00\x00", enc.code(), "pop QWORD PTR [rip]");
1810
1811 try enc.encode(.pop, &.{
1812 .{ .mem = Instruction.Memory.initRip(.word, 0) },
1813 });
1814 try expectEqualHexStrings("\x66\x8F\x05\x00\x00\x00\x00", enc.code(), "pop WORD PTR [rbp]");
1815
1816 try enc.encode(.imul, &.{
1817 .{ .reg = .rax },
1818 });
1819 try expectEqualHexStrings("\x48\xF7\xE8", enc.code(), "imul rax");
1820
1821 try enc.encode(.imul, &.{
1822 .{ .reg = .r12 },
1823 });
1824 try expectEqualHexStrings("\x49\xF7\xEC", enc.code(), "imul r12");
1825}
1826
1827test "lower O encoding" {
1828 var enc = TestEncode{};
1829
1830 try enc.encode(.push, &.{
1831 .{ .reg = .rax },
1832 });
1833 try expectEqualHexStrings("\x50", enc.code(), "push rax");
1834
1835 try enc.encode(.push, &.{
1836 .{ .reg = .r12w },
1837 });
1838 try expectEqualHexStrings("\x66\x41\x54", enc.code(), "push r12w");
1839
1840 try enc.encode(.pop, &.{
1841 .{ .reg = .r12 },
1842 });
1843 try expectEqualHexStrings("\x41\x5c", enc.code(), "pop r12");
1844}
1845
1846test "lower OI encoding" {
1847 var enc = TestEncode{};
1848
1849 try enc.encode(.mov, &.{
1850 .{ .reg = .rax },
1851 .{ .imm = .u(0x1000000000000000) },
1852 });
1853 try expectEqualHexStrings(
1854 "\x48\xB8\x00\x00\x00\x00\x00\x00\x00\x10",
1855 enc.code(),
1856 "movabs rax, 0x1000000000000000",
1857 );
1858
1859 try enc.encode(.mov, &.{
1860 .{ .reg = .r11 },
1861 .{ .imm = .u(0x1000000000000000) },
1862 });
1863 try expectEqualHexStrings(
1864 "\x49\xBB\x00\x00\x00\x00\x00\x00\x00\x10",
1865 enc.code(),
1866 "movabs r11, 0x1000000000000000",
1867 );
1868
1869 try enc.encode(.mov, &.{
1870 .{ .reg = .r11d },
1871 .{ .imm = .u(0x10000000) },
1872 });
1873 try expectEqualHexStrings("\x41\xBB\x00\x00\x00\x10", enc.code(), "mov r11d, 0x10000000");
1874
1875 try enc.encode(.mov, &.{
1876 .{ .reg = .r11w },
1877 .{ .imm = .u(0x1000) },
1878 });
1879 try expectEqualHexStrings("\x66\x41\xBB\x00\x10", enc.code(), "mov r11w, 0x1000");
1880
1881 try enc.encode(.mov, &.{
1882 .{ .reg = .r11b },
1883 .{ .imm = .u(0x10) },
1884 });
1885 try expectEqualHexStrings("\x41\xB3\x10", enc.code(), "mov r11b, 0x10");
1886}
1887
1888test "lower FD/TD encoding" {
1889 var enc = TestEncode{};
1890
1891 try enc.encode(.mov, &.{
1892 .{ .reg = .rax },
1893 .{ .mem = Instruction.Memory.initMoffs(.cs, 0x10) },
1894 });
1895 try expectEqualHexStrings("\x2E\x48\xA1\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs rax, cs:0x10");
1896
1897 try enc.encode(.mov, &.{
1898 .{ .reg = .eax },
1899 .{ .mem = Instruction.Memory.initMoffs(.fs, 0x10) },
1900 });
1901 try expectEqualHexStrings("\x64\xA1\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs eax, fs:0x10");
1902
1903 try enc.encode(.mov, &.{
1904 .{ .reg = .ax },
1905 .{ .mem = Instruction.Memory.initMoffs(.gs, 0x10) },
1906 });
1907 try expectEqualHexStrings("\x65\x66\xA1\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs ax, gs:0x10");
1908
1909 try enc.encode(.mov, &.{
1910 .{ .reg = .al },
1911 .{ .mem = Instruction.Memory.initMoffs(.ds, 0x10) },
1912 });
1913 try expectEqualHexStrings("\xA0\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs al, ds:0x10");
1914
1915 try enc.encode(.mov, &.{
1916 .{ .mem = Instruction.Memory.initMoffs(.cs, 0x10) },
1917 .{ .reg = .rax },
1918 });
1919 try expectEqualHexStrings("\x2E\x48\xA3\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs cs:0x10, rax");
1920
1921 try enc.encode(.mov, &.{
1922 .{ .mem = Instruction.Memory.initMoffs(.fs, 0x10) },
1923 .{ .reg = .eax },
1924 });
1925 try expectEqualHexStrings("\x64\xA3\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs fs:0x10, eax");
1926
1927 try enc.encode(.mov, &.{
1928 .{ .mem = Instruction.Memory.initMoffs(.gs, 0x10) },
1929 .{ .reg = .ax },
1930 });
1931 try expectEqualHexStrings("\x65\x66\xA3\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs gs:0x10, ax");
1932
1933 try enc.encode(.mov, &.{
1934 .{ .mem = Instruction.Memory.initMoffs(.ds, 0x10) },
1935 .{ .reg = .al },
1936 });
1937 try expectEqualHexStrings("\xA2\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs ds:0x10, al");
1938}
1939
1940test "lower NP encoding" {
1941 var enc = TestEncode{};
1942
1943 try enc.encode(.int3, &.{});
1944 try expectEqualHexStrings("\xCC", enc.code(), "int3");
1945
1946 try enc.encode(.nop, &.{});
1947 try expectEqualHexStrings("\x90", enc.code(), "nop");
1948
1949 try enc.encode(.ret, &.{});
1950 try expectEqualHexStrings("\xC3", enc.code(), "ret");
1951
1952 try enc.encode(.syscall, &.{});
1953 try expectEqualHexStrings("\x0f\x05", enc.code(), "syscall");
1954}
1955
1956fn invalidInstruction(mnemonic: Instruction.Mnemonic, ops: []const Instruction.Operand) !void {
1957 const err: Instruction = .new(.none, mnemonic, ops);
1958 try testing.expectError(error.InvalidInstruction, err);
1959}
1960
1961test "invalid instruction" {
1962 try invalidInstruction(.call, &.{
1963 .{ .reg = .eax },
1964 });
1965 try invalidInstruction(.call, &.{
1966 .{ .reg = .ax },
1967 });
1968 try invalidInstruction(.call, &.{
1969 .{ .reg = .al },
1970 });
1971 try invalidInstruction(.call, &.{
1972 .{ .mem = Instruction.Memory.initRip(.dword, 0) },
1973 });
1974 try invalidInstruction(.call, &.{
1975 .{ .mem = Instruction.Memory.initRip(.word, 0) },
1976 });
1977 try invalidInstruction(.call, &.{
1978 .{ .mem = Instruction.Memory.initRip(.byte, 0) },
1979 });
1980 try invalidInstruction(.mov, &.{
1981 .{ .mem = Instruction.Memory.initRip(.word, 0x10) },
1982 .{ .reg = .r12 },
1983 });
1984 try invalidInstruction(.lea, &.{
1985 .{ .reg = .rax },
1986 .{ .reg = .rbx },
1987 });
1988 try invalidInstruction(.lea, &.{
1989 .{ .reg = .al },
1990 .{ .mem = Instruction.Memory.initRip(.byte, 0) },
1991 });
1992 try invalidInstruction(.pop, &.{
1993 .{ .reg = .r12b },
1994 });
1995 try invalidInstruction(.pop, &.{
1996 .{ .reg = .r12d },
1997 });
1998 try invalidInstruction(.push, &.{
1999 .{ .reg = .r12b },
2000 });
2001 try invalidInstruction(.push, &.{
2002 .{ .reg = .r12d },
2003 });
2004 try invalidInstruction(.push, &.{
2005 .{ .imm = .u(0x1000000000000000) },
2006 });
2007}
2008
2009fn cannotEncode(mnemonic: Instruction.Mnemonic, ops: []const Instruction.Operand) !void {
2010 try testing.expectError(error.CannotEncode, .new(.none, mnemonic, ops));
2011}
2012
2013test "cannot encode" {
2014 try cannotEncode(.@"test", &.{
2015 .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .r12 } }) },
2016 .{ .reg = .ah },
2017 });
2018 try cannotEncode(.@"test", &.{
2019 .{ .reg = .r11b },
2020 .{ .reg = .bh },
2021 });
2022 try cannotEncode(.mov, &.{
2023 .{ .reg = .sil },
2024 .{ .reg = .ah },
2025 });
2026}
2027
2028const Assembler = struct {
2029 it: Tokenizer,
2030
2031 const Tokenizer = struct {
2032 input: []const u8,
2033 pos: usize = 0,
2034
2035 const Error = error{InvalidToken};
2036
2037 const Token = struct {
2038 id: Id,
2039 start: usize,
2040 end: usize,
2041
2042 const Id = enum {
2043 eof,
2044
2045 space,
2046 new_line,
2047
2048 colon,
2049 comma,
2050 open_br,
2051 close_br,
2052 plus,
2053 minus,
2054 star,
2055
2056 string,
2057 numeral,
2058 };
2059 };
2060
2061 const Iterator = struct {};
2062
2063 fn next(it: *Tokenizer) !Token {
2064 var result = Token{
2065 .id = .eof,
2066 .start = it.pos,
2067 .end = it.pos,
2068 };
2069
2070 var state: enum {
2071 start,
2072 space,
2073 new_line,
2074 string,
2075 numeral,
2076 numeral_hex,
2077 } = .start;
2078
2079 while (it.pos < it.input.len) : (it.pos += 1) {
2080 const ch = it.input[it.pos];
2081 switch (state) {
2082 .start => switch (ch) {
2083 ',' => {
2084 result.id = .comma;
2085 it.pos += 1;
2086 break;
2087 },
2088 ':' => {
2089 result.id = .colon;
2090 it.pos += 1;
2091 break;
2092 },
2093 '[' => {
2094 result.id = .open_br;
2095 it.pos += 1;
2096 break;
2097 },
2098 ']' => {
2099 result.id = .close_br;
2100 it.pos += 1;
2101 break;
2102 },
2103 '+' => {
2104 result.id = .plus;
2105 it.pos += 1;
2106 break;
2107 },
2108 '-' => {
2109 result.id = .minus;
2110 it.pos += 1;
2111 break;
2112 },
2113 '*' => {
2114 result.id = .star;
2115 it.pos += 1;
2116 break;
2117 },
2118 ' ', '\t' => state = .space,
2119 '\n', '\r' => state = .new_line,
2120 'a'...'z', 'A'...'Z' => state = .string,
2121 '0'...'9' => state = .numeral,
2122 else => return error.InvalidToken,
2123 },
2124
2125 .space => switch (ch) {
2126 ' ', '\t' => {},
2127 else => {
2128 result.id = .space;
2129 break;
2130 },
2131 },
2132
2133 .new_line => switch (ch) {
2134 '\n', '\r', ' ', '\t' => {},
2135 else => {
2136 result.id = .new_line;
2137 break;
2138 },
2139 },
2140
2141 .string => switch (ch) {
2142 'a'...'z', 'A'...'Z', '0'...'9' => {},
2143 else => {
2144 result.id = .string;
2145 break;
2146 },
2147 },
2148
2149 .numeral => switch (ch) {
2150 'x' => state = .numeral_hex,
2151 '0'...'9' => {},
2152 else => {
2153 result.id = .numeral;
2154 break;
2155 },
2156 },
2157
2158 .numeral_hex => switch (ch) {
2159 'a'...'f' => {},
2160 '0'...'9' => {},
2161 else => {
2162 result.id = .numeral;
2163 break;
2164 },
2165 },
2166 }
2167 }
2168
2169 if (it.pos >= it.input.len) {
2170 switch (state) {
2171 .string => result.id = .string,
2172 .numeral, .numeral_hex => result.id = .numeral,
2173 else => {},
2174 }
2175 }
2176
2177 result.end = it.pos;
2178 return result;
2179 }
2180
2181 fn seekTo(it: *Tokenizer, pos: usize) void {
2182 it.pos = pos;
2183 }
2184 };
2185
2186 pub fn init(input: []const u8) Assembler {
2187 return .{
2188 .it = Tokenizer{ .input = input },
2189 };
2190 }
2191
2192 pub fn assemble(as: *Assembler, w: *Writer) !void {
2193 while (try as.next()) |parsed_inst| {
2194 const inst: Instruction = try .new(.none, parsed_inst.mnemonic, &parsed_inst.ops);
2195 try inst.encode(w, .{});
2196 }
2197 }
2198
2199 const ParseResult = struct {
2200 mnemonic: Instruction.Mnemonic,
2201 ops: [4]Instruction.Operand,
2202 };
2203
2204 const ParseError = error{
2205 UnexpectedToken,
2206 InvalidMnemonic,
2207 InvalidOperand,
2208 InvalidRegister,
2209 InvalidPtrSize,
2210 InvalidMemoryOperand,
2211 InvalidScaleIndex,
2212 } || Tokenizer.Error || std.fmt.ParseIntError;
2213
2214 fn next(as: *Assembler) ParseError!?ParseResult {
2215 try as.skip(2, .{ .space, .new_line });
2216 const mnemonic_tok = as.expect(.string) catch |err| switch (err) {
2217 error.UnexpectedToken => return if (try as.peek() == .eof) null else err,
2218 else => return err,
2219 };
2220 const mnemonic = mnemonicFromString(as.source(mnemonic_tok)) orelse
2221 return error.InvalidMnemonic;
2222 try as.skip(1, .{.space});
2223
2224 const rules = .{
2225 .{},
2226 .{.register},
2227 .{.memory},
2228 .{.immediate},
2229 .{ .register, .register },
2230 .{ .register, .memory },
2231 .{ .memory, .register },
2232 .{ .register, .immediate },
2233 .{ .memory, .immediate },
2234 .{ .register, .register, .immediate },
2235 .{ .register, .memory, .immediate },
2236 };
2237
2238 const pos = as.it.pos;
2239 inline for (rules) |rule| {
2240 var ops = [4]Instruction.Operand{ .none, .none, .none, .none };
2241 if (as.parseOperandRule(rule, &ops)) {
2242 return .{
2243 .mnemonic = mnemonic,
2244 .ops = ops,
2245 };
2246 } else |_| {
2247 as.it.seekTo(pos);
2248 }
2249 }
2250
2251 return error.InvalidOperand;
2252 }
2253
2254 fn source(as: *Assembler, token: Tokenizer.Token) []const u8 {
2255 return as.it.input[token.start..token.end];
2256 }
2257
2258 fn peek(as: *Assembler) Tokenizer.Error!Tokenizer.Token.Id {
2259 const pos = as.it.pos;
2260 const next_tok = try as.it.next();
2261 const id = next_tok.id;
2262 as.it.seekTo(pos);
2263 return id;
2264 }
2265
2266 fn expect(as: *Assembler, id: Tokenizer.Token.Id) ParseError!Tokenizer.Token {
2267 const next_tok_id = try as.peek();
2268 if (next_tok_id == id) return as.it.next();
2269 return error.UnexpectedToken;
2270 }
2271
2272 fn skip(as: *Assembler, comptime num: comptime_int, tok_ids: [num]Tokenizer.Token.Id) Tokenizer.Error!void {
2273 outer: while (true) {
2274 const pos = as.it.pos;
2275 const next_tok = try as.it.next();
2276 inline for (tok_ids) |tok_id| {
2277 if (next_tok.id == tok_id) continue :outer;
2278 }
2279 as.it.seekTo(pos);
2280 break;
2281 }
2282 }
2283
2284 fn mnemonicFromString(bytes: []const u8) ?Instruction.Mnemonic {
2285 const ti = @typeInfo(Instruction.Mnemonic).@"enum";
2286 inline for (ti.fields) |field| {
2287 if (std.mem.eql(u8, bytes, field.name)) {
2288 return @field(Instruction.Mnemonic, field.name);
2289 }
2290 }
2291 return null;
2292 }
2293
2294 fn parseOperandRule(as: *Assembler, rule: anytype, ops: *[4]Instruction.Operand) ParseError!void {
2295 inline for (rule, 0..) |cond, i| {
2296 comptime assert(i < 4);
2297 if (i > 0) {
2298 _ = try as.expect(.comma);
2299 try as.skip(1, .{.space});
2300 }
2301 if (@typeInfo(@TypeOf(cond)) != .enum_literal) {
2302 @compileError("invalid condition in the rule: " ++ @typeName(@TypeOf(cond)));
2303 }
2304 switch (cond) {
2305 .register => {
2306 const reg_tok = try as.expect(.string);
2307 const reg = registerFromString(as.source(reg_tok)) orelse
2308 return error.InvalidOperand;
2309 ops[i] = .{ .reg = reg };
2310 },
2311 .memory => {
2312 const mem = try as.parseMemory();
2313 ops[i] = .{ .mem = mem };
2314 },
2315 .immediate => {
2316 const is_neg = if (as.expect(.minus)) |_| true else |_| false;
2317 const imm_tok = try as.expect(.numeral);
2318 const imm: Instruction.Immediate = if (is_neg) blk: {
2319 const imm = try std.fmt.parseInt(i32, as.source(imm_tok), 0);
2320 break :blk .{ .signed = imm * -1 };
2321 } else .{ .unsigned = try std.fmt.parseInt(u64, as.source(imm_tok), 0) };
2322 ops[i] = .{ .imm = imm };
2323 },
2324 else => @compileError("unhandled enum literal " ++ @tagName(cond)),
2325 }
2326 try as.skip(1, .{.space});
2327 }
2328
2329 try as.skip(1, .{.space});
2330 const tok = try as.it.next();
2331 switch (tok.id) {
2332 .new_line, .eof => {},
2333 else => return error.InvalidOperand,
2334 }
2335 }
2336
2337 fn registerFromString(bytes: []const u8) ?Register {
2338 const ti = @typeInfo(Register).@"enum";
2339 inline for (ti.fields) |field| {
2340 if (std.mem.eql(u8, bytes, field.name)) {
2341 return @field(Register, field.name);
2342 }
2343 }
2344 return null;
2345 }
2346
2347 fn parseMemory(as: *Assembler) ParseError!Instruction.Memory {
2348 const ptr_size: ?Instruction.Memory.PtrSize = blk: {
2349 const pos = as.it.pos;
2350 const ptr_size = as.parsePtrSize() catch |err| switch (err) {
2351 error.UnexpectedToken => {
2352 as.it.seekTo(pos);
2353 break :blk null;
2354 },
2355 else => return err,
2356 };
2357 break :blk ptr_size;
2358 };
2359
2360 try as.skip(1, .{.space});
2361
2362 // Supported rules and orderings.
2363 const rules = .{
2364 .{ .open_br, .general_purpose, .close_br }, // [ general_purpose ]
2365 .{ .open_br, .general_purpose, .plus, .disp, .close_br }, // [ general_purpose + disp ]
2366 .{ .open_br, .general_purpose, .minus, .disp, .close_br }, // [ general_purpose - disp ]
2367 .{ .open_br, .disp, .plus, .general_purpose, .close_br }, // [ disp + general_purpose ]
2368 .{ .open_br, .general_purpose, .plus, .index, .close_br }, // [ general_purpose + index ]
2369 .{ .open_br, .general_purpose, .plus, .index, .star, .scale, .close_br }, // [ general_purpose + index * scale ]
2370 .{ .open_br, .index, .star, .scale, .plus, .general_purpose, .close_br }, // [ index * scale + general_purpose ]
2371 .{ .open_br, .general_purpose, .plus, .index, .star, .scale, .plus, .disp, .close_br }, // [ general_purpose + index * scale + disp ]
2372 .{ .open_br, .general_purpose, .plus, .index, .star, .scale, .minus, .disp, .close_br }, // [ general_purpose + index * scale - disp ]
2373 .{ .open_br, .index, .star, .scale, .plus, .general_purpose, .plus, .disp, .close_br }, // [ index * scale + general_purpose + disp ]
2374 .{ .open_br, .index, .star, .scale, .plus, .general_purpose, .minus, .disp, .close_br }, // [ index * scale + general_purpose - disp ]
2375 .{ .open_br, .disp, .plus, .index, .star, .scale, .plus, .general_purpose, .close_br }, // [ disp + index * scale + general_purpose ]
2376 .{ .open_br, .disp, .plus, .general_purpose, .plus, .index, .star, .scale, .close_br }, // [ disp + general_purpose + index * scale ]
2377 .{ .open_br, .general_purpose, .plus, .disp, .plus, .index, .star, .scale, .close_br }, // [ general_purpose + disp + index * scale ]
2378 .{ .open_br, .general_purpose, .minus, .disp, .plus, .index, .star, .scale, .close_br }, // [ general_purpose - disp + index * scale ]
2379 .{ .open_br, .general_purpose, .plus, .disp, .plus, .scale, .star, .index, .close_br }, // [ general_purpose + disp + scale * index ]
2380 .{ .open_br, .general_purpose, .minus, .disp, .plus, .scale, .star, .index, .close_br }, // [ general_purpose - disp + scale * index ]
2381 .{ .open_br, .rip, .plus, .disp, .close_br }, // [ rip + disp ]
2382 .{ .open_br, .rip, .minus, .disp, .close_br }, // [ rig - disp ]
2383 .{ .segment, .colon, .disp }, // seg:disp
2384 };
2385
2386 const pos = as.it.pos;
2387 inline for (rules) |rule| {
2388 if (as.parseMemoryRule(rule)) |res| {
2389 if (res.rip) {
2390 if (res.base != null or res.scale_index != null or res.offset != null)
2391 return error.InvalidMemoryOperand;
2392 return Instruction.Memory.initRip(ptr_size orelse .qword, res.disp orelse 0);
2393 }
2394 if (res.base) |base| {
2395 if (res.rip)
2396 return error.InvalidMemoryOperand;
2397 if (res.offset) |offset| {
2398 if (res.scale_index != null or res.disp != null)
2399 return error.InvalidMemoryOperand;
2400 return Instruction.Memory.initMoffs(base, offset);
2401 }
2402 return Instruction.Memory.initSib(ptr_size orelse .qword, .{
2403 .base = .{ .reg = base },
2404 .scale_index = res.scale_index,
2405 .disp = res.disp orelse 0,
2406 });
2407 }
2408 return error.InvalidMemoryOperand;
2409 } else |_| {
2410 as.it.seekTo(pos);
2411 }
2412 }
2413
2414 return error.InvalidOperand;
2415 }
2416
2417 const MemoryParseResult = struct {
2418 rip: bool = false,
2419 base: ?Register = null,
2420 scale_index: ?Instruction.Memory.ScaleIndex = null,
2421 disp: ?i32 = null,
2422 offset: ?u64 = null,
2423 };
2424
2425 fn parseMemoryRule(as: *Assembler, rule: anytype) ParseError!MemoryParseResult {
2426 var res: MemoryParseResult = .{};
2427 inline for (rule, 0..) |cond, i| {
2428 if (@typeInfo(@TypeOf(cond)) != .enum_literal) {
2429 @compileError("unsupported condition type in the rule: " ++ @typeName(@TypeOf(cond)));
2430 }
2431 switch (cond) {
2432 .open_br, .close_br, .plus, .minus, .star, .colon => {
2433 _ = try as.expect(cond);
2434 },
2435 .general_purpose, .segment => {
2436 const tok = try as.expect(.string);
2437 const base = registerFromString(as.source(tok)) orelse return error.InvalidMemoryOperand;
2438 if (!base.isClass(cond)) return error.InvalidMemoryOperand;
2439 res.base = base;
2440 },
2441 .rip => {
2442 const tok = try as.expect(.string);
2443 if (!std.mem.eql(u8, as.source(tok), "rip")) return error.InvalidMemoryOperand;
2444 res.rip = true;
2445 },
2446 .index => {
2447 const tok = try as.expect(.string);
2448 const index = registerFromString(as.source(tok)) orelse
2449 return error.InvalidMemoryOperand;
2450 if (res.scale_index) |*si| {
2451 si.index = index;
2452 } else {
2453 res.scale_index = .{ .scale = 1, .index = index };
2454 }
2455 },
2456 .scale => {
2457 const tok = try as.expect(.numeral);
2458 const scale = try std.fmt.parseInt(u2, as.source(tok), 0);
2459 if (res.scale_index) |*si| {
2460 si.scale = scale;
2461 } else {
2462 res.scale_index = .{ .scale = scale, .index = undefined };
2463 }
2464 },
2465 .disp => {
2466 const tok = try as.expect(.numeral);
2467 const is_neg = blk: {
2468 if (i > 0) {
2469 if (rule[i - 1] == .minus) break :blk true;
2470 }
2471 break :blk false;
2472 };
2473 if (std.fmt.parseInt(i32, as.source(tok), 0)) |disp| {
2474 res.disp = if (is_neg) -1 * disp else disp;
2475 } else |err| switch (err) {
2476 error.Overflow => {
2477 if (is_neg) return err;
2478 if (res.base) |base| {
2479 if (!base.isClass(.segment)) return err;
2480 }
2481 const offset = try std.fmt.parseInt(u64, as.source(tok), 0);
2482 res.offset = offset;
2483 },
2484 else => return err,
2485 }
2486 },
2487 else => @compileError("unhandled operand output type: " ++ @tagName(cond)),
2488 }
2489 try as.skip(1, .{.space});
2490 }
2491 return res;
2492 }
2493
2494 fn parsePtrSize(as: *Assembler) ParseError!Instruction.Memory.PtrSize {
2495 const size = try as.expect(.string);
2496 try as.skip(1, .{.space});
2497 const ptr = try as.expect(.string);
2498
2499 const size_raw = as.source(size);
2500 const ptr_raw = as.source(ptr);
2501 const len = size_raw.len + ptr_raw.len + 1;
2502 var buf: ["qword ptr".len]u8 = undefined;
2503 if (len > buf.len) return error.InvalidPtrSize;
2504
2505 for (size_raw, 0..) |c, i| {
2506 buf[i] = std.ascii.toLower(c);
2507 }
2508 buf[size_raw.len] = ' ';
2509 for (ptr_raw, 0..) |c, i| {
2510 buf[size_raw.len + i + 1] = std.ascii.toLower(c);
2511 }
2512
2513 const slice = buf[0..len];
2514 if (std.mem.eql(u8, slice, "qword ptr")) return .qword;
2515 if (std.mem.eql(u8, slice, "dword ptr")) return .dword;
2516 if (std.mem.eql(u8, slice, "word ptr")) return .word;
2517 if (std.mem.eql(u8, slice, "byte ptr")) return .byte;
2518 if (std.mem.eql(u8, slice, "tbyte ptr")) return .tbyte;
2519 return error.InvalidPtrSize;
2520 }
2521};
2522
2523test "assemble" {
2524 const input =
2525 \\int3
2526 \\mov rax, rbx
2527 \\mov qword ptr [rbp], rax
2528 \\mov qword ptr [rbp - 16], rax
2529 \\mov qword ptr [16 + rbp], rax
2530 \\mov rax, 0x10
2531 \\mov byte ptr [rbp - 0x10], 0x10
2532 \\mov word ptr [rbp + r12], r11w
2533 \\mov word ptr [rbp + r12 * 2], r11w
2534 \\mov word ptr [rbp + r12 * 2 - 16], r11w
2535 \\mov dword ptr [rip - 16], r12d
2536 \\mov rax, fs:0x0
2537 \\mov rax, gs:0x1000000000000000
2538 \\movzx r12, al
2539 \\imul r12, qword ptr [rbp - 16], 6
2540 \\jmp 0x0
2541 \\jc 0x0
2542 \\jb 0x0
2543 \\sal rax, 1
2544 \\sal rax, 63
2545 \\shl rax, 63
2546 \\sar rax, 63
2547 \\shr rax, 63
2548 \\test byte ptr [rbp - 16], r12b
2549 \\sal r12, cl
2550 \\mul qword ptr [rip - 16]
2551 \\div r12
2552 \\idiv byte ptr [rbp - 16]
2553 \\cwde
2554 \\cbw
2555 \\cdqe
2556 \\test byte ptr [rbp], ah
2557 \\test byte ptr [r12], spl
2558 \\cdq
2559 \\cwd
2560 \\cqo
2561 \\test bl, 0x1
2562 \\mov rbx,0x8000000000000000
2563 \\movss xmm0, dword ptr [rbp]
2564 \\movss xmm0, xmm1
2565 \\movss dword ptr [rbp - 16 + rax * 2], xmm7
2566 \\movss dword ptr [rbp - 16 + rax * 2], xmm8
2567 \\movss xmm15, xmm9
2568 \\movsd xmm8, qword ptr [rbp - 16]
2569 \\movsd qword ptr [rbp - 8], xmm0
2570 \\movq xmm8, qword ptr [rbp - 16]
2571 \\movq qword ptr [rbp - 16], xmm8
2572 \\ucomisd xmm0, qword ptr [rbp - 16]
2573 \\fisttp qword ptr [rbp - 16]
2574 \\fisttp word ptr [rip + 32]
2575 \\fisttp dword ptr [rax]
2576 \\fld tbyte ptr [rbp]
2577 \\fld dword ptr [rbp]
2578 \\xor bl, 0xff
2579 \\ud2
2580 \\add rsp, -1
2581 \\add rsp, 0xff
2582 \\mov sil, byte ptr [rax + rcx * 1]
2583 \\
2584 ;
2585
2586 // zig fmt: off
2587 const expected = &[_]u8{
2588 0xCC,
2589 0x48, 0x89, 0xD8,
2590 0x48, 0x89, 0x45, 0x00,
2591 0x48, 0x89, 0x45, 0xF0,
2592 0x48, 0x89, 0x45, 0x10,
2593 0x48, 0xC7, 0xC0, 0x10, 0x00, 0x00, 0x00,
2594 0xC6, 0x45, 0xF0, 0x10,
2595 0x66, 0x46, 0x89, 0x5C, 0x25, 0x00,
2596 0x66, 0x46, 0x89, 0x5C, 0x65, 0x00,
2597 0x66, 0x46, 0x89, 0x5C, 0x65, 0xF0,
2598 0x44, 0x89, 0x25, 0xF0, 0xFF, 0xFF, 0xFF,
2599 0x64, 0x48, 0x8B, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00,
2600 0x65, 0x48, 0xA1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
2601 0x4C, 0x0F, 0xB6, 0xE0,
2602 0x4C, 0x6B, 0x65, 0xF0, 0x06,
2603 0xE9, 0x00, 0x00, 0x00, 0x00,
2604 0x0F, 0x82, 0x00, 0x00, 0x00, 0x00,
2605 0x0F, 0x82, 0x00, 0x00, 0x00, 0x00,
2606 0x48, 0xD1, 0xE0,
2607 0x48, 0xC1, 0xE0, 0x3F,
2608 0x48, 0xC1, 0xE0, 0x3F,
2609 0x48, 0xC1, 0xF8, 0x3F,
2610 0x48, 0xC1, 0xE8, 0x3F,
2611 0x44, 0x84, 0x65, 0xF0,
2612 0x49, 0xD3, 0xE4,
2613 0x48, 0xF7, 0x25, 0xF0, 0xFF, 0xFF, 0xFF,
2614 0x49, 0xF7, 0xF4,
2615 0xF6, 0x7D, 0xF0,
2616 0x98,
2617 0x66, 0x98,
2618 0x48, 0x98,
2619 0x84, 0x65, 0x00,
2620 0x41, 0x84, 0x24, 0x24,
2621 0x99,
2622 0x66, 0x99,
2623 0x48, 0x99,
2624 0xF6, 0xC3, 0x01,
2625 0x48, 0xBB, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
2626 0xF3, 0x0F, 0x10, 0x45, 0x00,
2627 0xF3, 0x0F, 0x10, 0xC1,
2628 0xF3, 0x0F, 0x11, 0x7C, 0x45, 0xF0,
2629 0xF3, 0x44, 0x0F, 0x11, 0x44, 0x45, 0xF0,
2630 0xF3, 0x45, 0x0F, 0x10, 0xF9,
2631 0xF2, 0x44, 0x0F, 0x10, 0x45, 0xF0,
2632 0xF2, 0x0F, 0x11, 0x45, 0xF8,
2633 0x66, 0x4C, 0x0F, 0x6E, 0x45, 0xF0,
2634 0x66, 0x4C, 0x0F, 0x7E, 0x45, 0xF0,
2635 0x66, 0x0F, 0x2E, 0x45, 0xF0,
2636 0xDD, 0x4D, 0xF0,
2637 0xDF, 0x0D, 0x20, 0x00, 0x00, 0x00,
2638 0xDB, 0x08,
2639 0xDB, 0x6D, 0x00,
2640 0xD9, 0x45, 0x00,
2641 0x80, 0xF3, 0xFF,
2642 0x0F, 0x0B,
2643 0x48, 0x83, 0xC4, 0xFF,
2644 0x48, 0x81, 0xC4, 0xFF, 0x00, 0x00, 0x00,
2645 0x40, 0x8A, 0x34, 0x08,
2646 };
2647 // zig fmt: on
2648
2649 var as = Assembler.init(input);
2650 var output = std.array_list.Managed(u8).init(testing.allocator);
2651 defer output.deinit();
2652 try as.assemble(output.writer());
2653 try expectEqualHexStrings(expected, output.items, input);
2654}
2655
2656test "assemble - Jcc" {
2657 const mnemonics = [_]struct { Instruction.Mnemonic, u8 }{
2658 .{ .ja, 0x87 },
2659 .{ .jae, 0x83 },
2660 .{ .jb, 0x82 },
2661 .{ .jbe, 0x86 },
2662 .{ .jc, 0x82 },
2663 .{ .je, 0x84 },
2664 .{ .jg, 0x8f },
2665 .{ .jge, 0x8d },
2666 .{ .jl, 0x8c },
2667 .{ .jle, 0x8e },
2668 .{ .jna, 0x86 },
2669 .{ .jnae, 0x82 },
2670 .{ .jnb, 0x83 },
2671 .{ .jnbe, 0x87 },
2672 .{ .jnc, 0x83 },
2673 .{ .jne, 0x85 },
2674 .{ .jng, 0x8e },
2675 .{ .jnge, 0x8c },
2676 .{ .jnl, 0x8d },
2677 .{ .jnle, 0x8f },
2678 .{ .jno, 0x81 },
2679 .{ .jnp, 0x8b },
2680 .{ .jns, 0x89 },
2681 .{ .jnz, 0x85 },
2682 .{ .jo, 0x80 },
2683 .{ .jp, 0x8a },
2684 .{ .jpe, 0x8a },
2685 .{ .jpo, 0x8b },
2686 .{ .js, 0x88 },
2687 .{ .jz, 0x84 },
2688 };
2689
2690 inline for (&mnemonics) |mnemonic| {
2691 const input = @tagName(mnemonic[0]) ++ " 0x0";
2692 const expected = [_]u8{ 0x0f, mnemonic[1], 0x0, 0x0, 0x0, 0x0 };
2693 var as = Assembler.init(input);
2694 var output = std.array_list.Managed(u8).init(testing.allocator);
2695 defer output.deinit();
2696 try as.assemble(output.writer());
2697 try expectEqualHexStrings(&expected, output.items, input);
2698 }
2699}
2700
2701test "assemble - SETcc" {
2702 const mnemonics = [_]struct { Instruction.Mnemonic, u8 }{
2703 .{ .seta, 0x97 },
2704 .{ .setae, 0x93 },
2705 .{ .setb, 0x92 },
2706 .{ .setbe, 0x96 },
2707 .{ .setc, 0x92 },
2708 .{ .sete, 0x94 },
2709 .{ .setg, 0x9f },
2710 .{ .setge, 0x9d },
2711 .{ .setl, 0x9c },
2712 .{ .setle, 0x9e },
2713 .{ .setna, 0x96 },
2714 .{ .setnae, 0x92 },
2715 .{ .setnb, 0x93 },
2716 .{ .setnbe, 0x97 },
2717 .{ .setnc, 0x93 },
2718 .{ .setne, 0x95 },
2719 .{ .setng, 0x9e },
2720 .{ .setnge, 0x9c },
2721 .{ .setnl, 0x9d },
2722 .{ .setnle, 0x9f },
2723 .{ .setno, 0x91 },
2724 .{ .setnp, 0x9b },
2725 .{ .setns, 0x99 },
2726 .{ .setnz, 0x95 },
2727 .{ .seto, 0x90 },
2728 .{ .setp, 0x9a },
2729 .{ .setpe, 0x9a },
2730 .{ .setpo, 0x9b },
2731 .{ .sets, 0x98 },
2732 .{ .setz, 0x94 },
2733 };
2734
2735 inline for (&mnemonics) |mnemonic| {
2736 const input = @tagName(mnemonic[0]) ++ " al";
2737 const expected = [_]u8{ 0x0f, mnemonic[1], 0xC0 };
2738 var as = Assembler.init(input);
2739 var output = std.array_list.Managed(u8).init(testing.allocator);
2740 defer output.deinit();
2741 try as.assemble(output.writer());
2742 try expectEqualHexStrings(&expected, output.items, input);
2743 }
2744}
2745
2746test "assemble - CMOVcc" {
2747 const mnemonics = [_]struct { Instruction.Mnemonic, u8 }{
2748 .{ .cmova, 0x47 },
2749 .{ .cmovae, 0x43 },
2750 .{ .cmovb, 0x42 },
2751 .{ .cmovbe, 0x46 },
2752 .{ .cmovc, 0x42 },
2753 .{ .cmove, 0x44 },
2754 .{ .cmovg, 0x4f },
2755 .{ .cmovge, 0x4d },
2756 .{ .cmovl, 0x4c },
2757 .{ .cmovle, 0x4e },
2758 .{ .cmovna, 0x46 },
2759 .{ .cmovnae, 0x42 },
2760 .{ .cmovnb, 0x43 },
2761 .{ .cmovnbe, 0x47 },
2762 .{ .cmovnc, 0x43 },
2763 .{ .cmovne, 0x45 },
2764 .{ .cmovng, 0x4e },
2765 .{ .cmovnge, 0x4c },
2766 .{ .cmovnl, 0x4d },
2767 .{ .cmovnle, 0x4f },
2768 .{ .cmovno, 0x41 },
2769 .{ .cmovnp, 0x4b },
2770 .{ .cmovns, 0x49 },
2771 .{ .cmovnz, 0x45 },
2772 .{ .cmovo, 0x40 },
2773 .{ .cmovp, 0x4a },
2774 .{ .cmovpe, 0x4a },
2775 .{ .cmovpo, 0x4b },
2776 .{ .cmovs, 0x48 },
2777 .{ .cmovz, 0x44 },
2778 };
2779
2780 inline for (&mnemonics) |mnemonic| {
2781 const input = @tagName(mnemonic[0]) ++ " rax, rbx";
2782 const expected = [_]u8{ 0x48, 0x0f, mnemonic[1], 0xC3 };
2783 var as = Assembler.init(input);
2784 var output = std.array_list.Managed(u8).init(testing.allocator);
2785 defer output.deinit();
2786 try as.assemble(output.writer());
2787 try expectEqualHexStrings(&expected, output.items, input);
2788 }
2789}