master
1const std = @import("../../std.zig");
2const errno = linux.errno;
3const unexpectedErrno = std.posix.unexpectedErrno;
4const expectEqual = std.testing.expectEqual;
5const expectError = std.testing.expectError;
6const expect = std.testing.expect;
7
8const linux = std.os.linux;
9const fd_t = linux.fd_t;
10const pid_t = linux.pid_t;
11
12pub const btf = @import("bpf/btf.zig");
13pub const kern = @import("bpf/kern.zig");
14
15// instruction classes
16pub const LD = 0x00;
17pub const LDX = 0x01;
18pub const ST = 0x02;
19pub const STX = 0x03;
20pub const ALU = 0x04;
21pub const JMP = 0x05;
22pub const RET = 0x06;
23pub const MISC = 0x07;
24
25/// 32-bit
26pub const W = 0x00;
27/// 16-bit
28pub const H = 0x08;
29/// 8-bit
30pub const B = 0x10;
31/// 64-bit
32pub const DW = 0x18;
33
34pub const IMM = 0x00;
35pub const ABS = 0x20;
36pub const IND = 0x40;
37pub const MEM = 0x60;
38pub const LEN = 0x80;
39pub const MSH = 0xa0;
40
41// alu fields
42pub const ADD = 0x00;
43pub const SUB = 0x10;
44pub const MUL = 0x20;
45pub const DIV = 0x30;
46pub const OR = 0x40;
47pub const AND = 0x50;
48pub const LSH = 0x60;
49pub const RSH = 0x70;
50pub const NEG = 0x80;
51pub const MOD = 0x90;
52pub const XOR = 0xa0;
53
54// jmp fields
55pub const JA = 0x00;
56pub const JEQ = 0x10;
57pub const JGT = 0x20;
58pub const JGE = 0x30;
59pub const JSET = 0x40;
60
61//#define BPF_SRC(code) ((code) & 0x08)
62pub const K = 0x00;
63pub const X = 0x08;
64
65pub const MAXINSNS = 4096;
66
67// instruction classes
68/// jmp mode in word width
69pub const JMP32 = 0x06;
70
71/// alu mode in double word width
72pub const ALU64 = 0x07;
73
74// ld/ldx fields
75/// exclusive add
76pub const XADD = 0xc0;
77
78// alu/jmp fields
79/// mov reg to reg
80pub const MOV = 0xb0;
81
82/// sign extending arithmetic shift right */
83pub const ARSH = 0xc0;
84
85// change endianness of a register
86/// flags for endianness conversion:
87pub const END = 0xd0;
88
89/// convert to little-endian */
90pub const TO_LE = 0x00;
91
92/// convert to big-endian
93pub const TO_BE = 0x08;
94pub const FROM_LE = TO_LE;
95pub const FROM_BE = TO_BE;
96
97// jmp encodings
98/// jump != *
99pub const JNE = 0x50;
100
101/// LT is unsigned, '<'
102pub const JLT = 0xa0;
103
104/// LE is unsigned, '<=' *
105pub const JLE = 0xb0;
106
107/// SGT is signed '>', GT in x86
108pub const JSGT = 0x60;
109
110/// SGE is signed '>=', GE in x86
111pub const JSGE = 0x70;
112
113/// SLT is signed, '<'
114pub const JSLT = 0xc0;
115
116/// SLE is signed, '<='
117pub const JSLE = 0xd0;
118
119/// function call
120pub const CALL = 0x80;
121
122/// function return
123pub const EXIT = 0x90;
124
125/// Flag for prog_attach command. If a sub-cgroup installs some bpf program, the
126/// program in this cgroup yields to sub-cgroup program.
127pub const F_ALLOW_OVERRIDE = 0x1;
128
129/// Flag for prog_attach command. If a sub-cgroup installs some bpf program,
130/// that cgroup program gets run in addition to the program in this cgroup.
131pub const F_ALLOW_MULTI = 0x2;
132
133/// Flag for prog_attach command.
134pub const F_REPLACE = 0x4;
135
136/// If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the verifier
137/// will perform strict alignment checking as if the kernel has been built with
138/// CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, and NET_IP_ALIGN defined to 2.
139pub const F_STRICT_ALIGNMENT = 0x1;
140
141/// If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the verifier will
142/// allow any alignment whatsoever. On platforms with strict alignment
143/// requirements for loads and stores (such as sparc and mips) the verifier
144/// validates that all loads and stores provably follow this requirement. This
145/// flag turns that checking and enforcement off.
146///
147/// It is mostly used for testing when we want to validate the context and
148/// memory access aspects of the verifier, but because of an unaligned access
149/// the alignment check would trigger before the one we are interested in.
150pub const F_ANY_ALIGNMENT = 0x2;
151
152/// BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
153/// Verifier does sub-register def/use analysis and identifies instructions
154/// whose def only matters for low 32-bit, high 32-bit is never referenced later
155/// through implicit zero extension. Therefore verifier notifies JIT back-ends
156/// that it is safe to ignore clearing high 32-bit for these instructions. This
157/// saves some back-ends a lot of code-gen. However such optimization is not
158/// necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
159/// hence hasn't used verifier's analysis result. But, we really want to have a
160/// way to be able to verify the correctness of the described optimization on
161/// x86_64 on which testsuites are frequently exercised.
162///
163/// So, this flag is introduced. Once it is set, verifier will randomize high
164/// 32-bit for those instructions who has been identified as safe to ignore
165/// them. Then, if verifier is not doing correct analysis, such randomization
166/// will regress tests to expose bugs.
167pub const F_TEST_RND_HI32 = 0x4;
168
169/// If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
170/// restrict map and helper usage for such programs. Sleepable BPF programs can
171/// only be attached to hooks where kernel execution context allows sleeping.
172/// Such programs are allowed to use helpers that may sleep like
173/// bpf_copy_from_user().
174pub const F_SLEEPABLE = 0x10;
175
176/// When BPF ldimm64's insn[0].src_reg != 0 then this can have two extensions:
177/// insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
178/// insn[0].imm: map fd map fd
179/// insn[1].imm: 0 offset into value
180/// insn[0].off: 0 0
181/// insn[1].off: 0 0
182/// ldimm64 rewrite: address of map address of map[0]+offset
183/// verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
184pub const PSEUDO_MAP_FD = 1;
185pub const PSEUDO_MAP_VALUE = 2;
186
187/// when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
188/// offset to another bpf function
189pub const PSEUDO_CALL = 1;
190
191/// flag for BPF_MAP_UPDATE_ELEM command. create new element or update existing
192pub const ANY = 0;
193
194/// flag for BPF_MAP_UPDATE_ELEM command. create new element if it didn't exist
195pub const NOEXIST = 1;
196
197/// flag for BPF_MAP_UPDATE_ELEM command. update existing element
198pub const EXIST = 2;
199
200/// flag for BPF_MAP_UPDATE_ELEM command. spin_lock-ed map_lookup/map_update
201pub const F_LOCK = 4;
202
203/// flag for BPF_MAP_CREATE command */
204pub const BPF_F_NO_PREALLOC = 0x1;
205
206/// flag for BPF_MAP_CREATE command. Instead of having one common LRU list in
207/// the BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list which can
208/// scale and perform better. Note, the LRU nodes (including free nodes) cannot
209/// be moved across different LRU lists.
210pub const BPF_F_NO_COMMON_LRU = 0x2;
211
212/// flag for BPF_MAP_CREATE command. Specify numa node during map creation
213pub const BPF_F_NUMA_NODE = 0x4;
214
215/// flag for BPF_MAP_CREATE command. Flags for BPF object read access from
216/// syscall side
217pub const BPF_F_RDONLY = 0x8;
218
219/// flag for BPF_MAP_CREATE command. Flags for BPF object write access from
220/// syscall side
221pub const BPF_F_WRONLY = 0x10;
222
223/// flag for BPF_MAP_CREATE command. Flag for stack_map, store build_id+offset
224/// instead of pointer
225pub const BPF_F_STACK_BUILD_ID = 0x20;
226
227/// flag for BPF_MAP_CREATE command. Zero-initialize hash function seed. This
228/// should only be used for testing.
229pub const BPF_F_ZERO_SEED = 0x40;
230
231/// flag for BPF_MAP_CREATE command Flags for accessing BPF object from program
232/// side.
233pub const BPF_F_RDONLY_PROG = 0x80;
234
235/// flag for BPF_MAP_CREATE command. Flags for accessing BPF object from program
236/// side.
237pub const BPF_F_WRONLY_PROG = 0x100;
238
239/// flag for BPF_MAP_CREATE command. Clone map from listener for newly accepted
240/// socket
241pub const BPF_F_CLONE = 0x200;
242
243/// flag for BPF_MAP_CREATE command. Enable memory-mapping BPF map
244pub const BPF_F_MMAPABLE = 0x400;
245
246/// These values correspond to "syscalls" within the BPF program's environment,
247/// each one is documented in std.os.linux.BPF.kern
248pub const Helper = enum(i32) {
249 unspec,
250 map_lookup_elem,
251 map_update_elem,
252 map_delete_elem,
253 probe_read,
254 ktime_get_ns,
255 trace_printk,
256 get_prandom_u32,
257 get_smp_processor_id,
258 skb_store_bytes,
259 l3_csum_replace,
260 l4_csum_replace,
261 tail_call,
262 clone_redirect,
263 get_current_pid_tgid,
264 get_current_uid_gid,
265 get_current_comm,
266 get_cgroup_classid,
267 skb_vlan_push,
268 skb_vlan_pop,
269 skb_get_tunnel_key,
270 skb_set_tunnel_key,
271 perf_event_read,
272 redirect,
273 get_route_realm,
274 perf_event_output,
275 skb_load_bytes,
276 get_stackid,
277 csum_diff,
278 skb_get_tunnel_opt,
279 skb_set_tunnel_opt,
280 skb_change_proto,
281 skb_change_type,
282 skb_under_cgroup,
283 get_hash_recalc,
284 get_current_task,
285 probe_write_user,
286 current_task_under_cgroup,
287 skb_change_tail,
288 skb_pull_data,
289 csum_update,
290 set_hash_invalid,
291 get_numa_node_id,
292 skb_change_head,
293 xdp_adjust_head,
294 probe_read_str,
295 get_socket_cookie,
296 get_socket_uid,
297 set_hash,
298 setsockopt,
299 skb_adjust_room,
300 redirect_map,
301 sk_redirect_map,
302 sock_map_update,
303 xdp_adjust_meta,
304 perf_event_read_value,
305 perf_prog_read_value,
306 getsockopt,
307 override_return,
308 sock_ops_cb_flags_set,
309 msg_redirect_map,
310 msg_apply_bytes,
311 msg_cork_bytes,
312 msg_pull_data,
313 bind,
314 xdp_adjust_tail,
315 skb_get_xfrm_state,
316 get_stack,
317 skb_load_bytes_relative,
318 fib_lookup,
319 sock_hash_update,
320 msg_redirect_hash,
321 sk_redirect_hash,
322 lwt_push_encap,
323 lwt_seg6_store_bytes,
324 lwt_seg6_adjust_srh,
325 lwt_seg6_action,
326 rc_repeat,
327 rc_keydown,
328 skb_cgroup_id,
329 get_current_cgroup_id,
330 get_local_storage,
331 sk_select_reuseport,
332 skb_ancestor_cgroup_id,
333 sk_lookup_tcp,
334 sk_lookup_udp,
335 sk_release,
336 map_push_elem,
337 map_pop_elem,
338 map_peek_elem,
339 msg_push_data,
340 msg_pop_data,
341 rc_pointer_rel,
342 spin_lock,
343 spin_unlock,
344 sk_fullsock,
345 tcp_sock,
346 skb_ecn_set_ce,
347 get_listener_sock,
348 skc_lookup_tcp,
349 tcp_check_syncookie,
350 sysctl_get_name,
351 sysctl_get_current_value,
352 sysctl_get_new_value,
353 sysctl_set_new_value,
354 strtol,
355 strtoul,
356 sk_storage_get,
357 sk_storage_delete,
358 send_signal,
359 tcp_gen_syncookie,
360 skb_output,
361 probe_read_user,
362 probe_read_kernel,
363 probe_read_user_str,
364 probe_read_kernel_str,
365 tcp_send_ack,
366 send_signal_thread,
367 jiffies64,
368 read_branch_records,
369 get_ns_current_pid_tgid,
370 xdp_output,
371 get_netns_cookie,
372 get_current_ancestor_cgroup_id,
373 sk_assign,
374 ktime_get_boot_ns,
375 seq_printf,
376 seq_write,
377 sk_cgroup_id,
378 sk_ancestor_cgroup_id,
379 ringbuf_output,
380 ringbuf_reserve,
381 ringbuf_submit,
382 ringbuf_discard,
383 ringbuf_query,
384 csum_level,
385 skc_to_tcp6_sock,
386 skc_to_tcp_sock,
387 skc_to_tcp_timewait_sock,
388 skc_to_tcp_request_sock,
389 skc_to_udp6_sock,
390 get_task_stack,
391 _,
392};
393
394// TODO: determine that this is the expected bit layout for both little and big
395// endian systems
396/// a single BPF instruction
397pub const Insn = packed struct {
398 code: u8,
399 dst: u4,
400 src: u4,
401 off: i16,
402 imm: i32,
403
404 /// r0 - r9 are general purpose 64-bit registers, r10 points to the stack
405 /// frame
406 pub const Reg = enum(u4) { r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10 };
407 const Source = enum(u1) { reg, imm };
408
409 const Mode = enum(u8) {
410 imm = IMM,
411 abs = ABS,
412 ind = IND,
413 mem = MEM,
414 len = LEN,
415 msh = MSH,
416 };
417
418 pub const AluOp = enum(u8) {
419 add = ADD,
420 sub = SUB,
421 mul = MUL,
422 div = DIV,
423 alu_or = OR,
424 alu_and = AND,
425 lsh = LSH,
426 rsh = RSH,
427 neg = NEG,
428 mod = MOD,
429 xor = XOR,
430 mov = MOV,
431 arsh = ARSH,
432 };
433
434 pub const Size = enum(u8) {
435 byte = B,
436 half_word = H,
437 word = W,
438 double_word = DW,
439 };
440
441 pub const JmpOp = enum(u8) {
442 ja = JA,
443 jeq = JEQ,
444 jgt = JGT,
445 jge = JGE,
446 jset = JSET,
447 jlt = JLT,
448 jle = JLE,
449 jne = JNE,
450 jsgt = JSGT,
451 jsge = JSGE,
452 jslt = JSLT,
453 jsle = JSLE,
454 };
455
456 const ImmOrReg = union(Source) {
457 reg: Reg,
458 imm: i32,
459 };
460
461 fn imm_reg(code: u8, dst: Reg, src: anytype, off: i16) Insn {
462 const imm_or_reg = if (@TypeOf(src) == Reg or @typeInfo(@TypeOf(src)) == .enum_literal)
463 ImmOrReg{ .reg = @as(Reg, src) }
464 else
465 ImmOrReg{ .imm = src };
466
467 const src_type: u8 = switch (imm_or_reg) {
468 .imm => K,
469 .reg => X,
470 };
471
472 return Insn{
473 .code = code | src_type,
474 .dst = @intFromEnum(dst),
475 .src = switch (imm_or_reg) {
476 .imm => 0,
477 .reg => |r| @intFromEnum(r),
478 },
479 .off = off,
480 .imm = switch (imm_or_reg) {
481 .imm => |i| i,
482 .reg => 0,
483 },
484 };
485 }
486
487 pub fn alu(comptime width: comptime_int, op: AluOp, dst: Reg, src: anytype) Insn {
488 const width_bitfield = switch (width) {
489 32 => ALU,
490 64 => ALU64,
491 else => @compileError("width must be 32 or 64"),
492 };
493
494 return imm_reg(width_bitfield | @intFromEnum(op), dst, src, 0);
495 }
496
497 pub fn mov(dst: Reg, src: anytype) Insn {
498 return alu(64, .mov, dst, src);
499 }
500
501 pub fn add(dst: Reg, src: anytype) Insn {
502 return alu(64, .add, dst, src);
503 }
504
505 pub fn sub(dst: Reg, src: anytype) Insn {
506 return alu(64, .sub, dst, src);
507 }
508
509 pub fn mul(dst: Reg, src: anytype) Insn {
510 return alu(64, .mul, dst, src);
511 }
512
513 pub fn div(dst: Reg, src: anytype) Insn {
514 return alu(64, .div, dst, src);
515 }
516
517 pub fn alu_or(dst: Reg, src: anytype) Insn {
518 return alu(64, .alu_or, dst, src);
519 }
520
521 pub fn alu_and(dst: Reg, src: anytype) Insn {
522 return alu(64, .alu_and, dst, src);
523 }
524
525 pub fn lsh(dst: Reg, src: anytype) Insn {
526 return alu(64, .lsh, dst, src);
527 }
528
529 pub fn rsh(dst: Reg, src: anytype) Insn {
530 return alu(64, .rsh, dst, src);
531 }
532
533 pub fn neg(dst: Reg) Insn {
534 return alu(64, .neg, dst, 0);
535 }
536
537 pub fn mod(dst: Reg, src: anytype) Insn {
538 return alu(64, .mod, dst, src);
539 }
540
541 pub fn xor(dst: Reg, src: anytype) Insn {
542 return alu(64, .xor, dst, src);
543 }
544
545 pub fn arsh(dst: Reg, src: anytype) Insn {
546 return alu(64, .arsh, dst, src);
547 }
548
549 pub fn jmp(op: JmpOp, dst: Reg, src: anytype, off: i16) Insn {
550 return imm_reg(JMP | @intFromEnum(op), dst, src, off);
551 }
552
553 pub fn ja(off: i16) Insn {
554 return jmp(.ja, .r0, 0, off);
555 }
556
557 pub fn jeq(dst: Reg, src: anytype, off: i16) Insn {
558 return jmp(.jeq, dst, src, off);
559 }
560
561 pub fn jgt(dst: Reg, src: anytype, off: i16) Insn {
562 return jmp(.jgt, dst, src, off);
563 }
564
565 pub fn jge(dst: Reg, src: anytype, off: i16) Insn {
566 return jmp(.jge, dst, src, off);
567 }
568
569 pub fn jlt(dst: Reg, src: anytype, off: i16) Insn {
570 return jmp(.jlt, dst, src, off);
571 }
572
573 pub fn jle(dst: Reg, src: anytype, off: i16) Insn {
574 return jmp(.jle, dst, src, off);
575 }
576
577 pub fn jset(dst: Reg, src: anytype, off: i16) Insn {
578 return jmp(.jset, dst, src, off);
579 }
580
581 pub fn jne(dst: Reg, src: anytype, off: i16) Insn {
582 return jmp(.jne, dst, src, off);
583 }
584
585 pub fn jsgt(dst: Reg, src: anytype, off: i16) Insn {
586 return jmp(.jsgt, dst, src, off);
587 }
588
589 pub fn jsge(dst: Reg, src: anytype, off: i16) Insn {
590 return jmp(.jsge, dst, src, off);
591 }
592
593 pub fn jslt(dst: Reg, src: anytype, off: i16) Insn {
594 return jmp(.jslt, dst, src, off);
595 }
596
597 pub fn jsle(dst: Reg, src: anytype, off: i16) Insn {
598 return jmp(.jsle, dst, src, off);
599 }
600
601 pub fn xadd(dst: Reg, src: Reg) Insn {
602 return Insn{
603 .code = STX | XADD | DW,
604 .dst = @intFromEnum(dst),
605 .src = @intFromEnum(src),
606 .off = 0,
607 .imm = 0,
608 };
609 }
610
611 fn ld(mode: Mode, size: Size, dst: Reg, src: Reg, imm: i32) Insn {
612 return Insn{
613 .code = @intFromEnum(mode) | @intFromEnum(size) | LD,
614 .dst = @intFromEnum(dst),
615 .src = @intFromEnum(src),
616 .off = 0,
617 .imm = imm,
618 };
619 }
620
621 pub fn ld_abs(size: Size, dst: Reg, src: Reg, imm: i32) Insn {
622 return ld(.abs, size, dst, src, imm);
623 }
624
625 pub fn ld_ind(size: Size, dst: Reg, src: Reg, imm: i32) Insn {
626 return ld(.ind, size, dst, src, imm);
627 }
628
629 pub fn ldx(size: Size, dst: Reg, src: Reg, off: i16) Insn {
630 return Insn{
631 .code = MEM | @intFromEnum(size) | LDX,
632 .dst = @intFromEnum(dst),
633 .src = @intFromEnum(src),
634 .off = off,
635 .imm = 0,
636 };
637 }
638
639 fn ld_imm_impl1(dst: Reg, src: Reg, imm: u64) Insn {
640 return Insn{
641 .code = LD | DW | IMM,
642 .dst = @intFromEnum(dst),
643 .src = @intFromEnum(src),
644 .off = 0,
645 .imm = @as(i32, @bitCast(@as(u32, @truncate(imm)))),
646 };
647 }
648
649 fn ld_imm_impl2(imm: u64) Insn {
650 return Insn{
651 .code = 0,
652 .dst = 0,
653 .src = 0,
654 .off = 0,
655 .imm = @as(i32, @bitCast(@as(u32, @truncate(imm >> 32)))),
656 };
657 }
658
659 pub fn ld_dw1(dst: Reg, imm: u64) Insn {
660 return ld_imm_impl1(dst, .r0, imm);
661 }
662
663 pub fn ld_dw2(imm: u64) Insn {
664 return ld_imm_impl2(imm);
665 }
666
667 pub fn ld_map_fd1(dst: Reg, map_fd: fd_t) Insn {
668 return ld_imm_impl1(dst, @as(Reg, @enumFromInt(PSEUDO_MAP_FD)), @as(u64, @intCast(map_fd)));
669 }
670
671 pub fn ld_map_fd2(map_fd: fd_t) Insn {
672 return ld_imm_impl2(@as(u64, @intCast(map_fd)));
673 }
674
675 pub fn st(size: Size, dst: Reg, off: i16, imm: i32) Insn {
676 return Insn{
677 .code = MEM | @intFromEnum(size) | ST,
678 .dst = @intFromEnum(dst),
679 .src = 0,
680 .off = off,
681 .imm = imm,
682 };
683 }
684
685 pub fn stx(size: Size, dst: Reg, off: i16, src: Reg) Insn {
686 return Insn{
687 .code = MEM | @intFromEnum(size) | STX,
688 .dst = @intFromEnum(dst),
689 .src = @intFromEnum(src),
690 .off = off,
691 .imm = 0,
692 };
693 }
694
695 fn endian_swap(endian: std.builtin.Endian, comptime size: Size, dst: Reg) Insn {
696 return Insn{
697 .code = switch (endian) {
698 .big => 0xdc,
699 .little => 0xd4,
700 },
701 .dst = @intFromEnum(dst),
702 .src = 0,
703 .off = 0,
704 .imm = switch (size) {
705 .byte => @compileError("can't swap a single byte"),
706 .half_word => 16,
707 .word => 32,
708 .double_word => 64,
709 },
710 };
711 }
712
713 pub fn le(comptime size: Size, dst: Reg) Insn {
714 return endian_swap(.little, size, dst);
715 }
716
717 pub fn be(comptime size: Size, dst: Reg) Insn {
718 return endian_swap(.big, size, dst);
719 }
720
721 pub fn call(helper: Helper) Insn {
722 return Insn{
723 .code = JMP | CALL,
724 .dst = 0,
725 .src = 0,
726 .off = 0,
727 .imm = @intFromEnum(helper),
728 };
729 }
730
731 /// exit BPF program
732 pub fn exit() Insn {
733 return Insn{
734 .code = JMP | EXIT,
735 .dst = 0,
736 .src = 0,
737 .off = 0,
738 .imm = 0,
739 };
740 }
741};
742
743test "insn bitsize" {
744 try expectEqual(@bitSizeOf(Insn), 64);
745}
746
747fn expect_opcode(code: u8, insn: Insn) !void {
748 try expectEqual(code, insn.code);
749}
750
751// The opcodes were grabbed from https://github.com/iovisor/bpf-docs/blob/master/eBPF.md
752test "opcodes" {
753 // instructions that have a name that end with 1 or 2 are consecutive for
754 // loading 64-bit immediates (imm is only 32 bits wide)
755
756 // alu instructions
757 try expect_opcode(0x07, Insn.add(.r1, 0));
758 try expect_opcode(0x0f, Insn.add(.r1, .r2));
759 try expect_opcode(0x17, Insn.sub(.r1, 0));
760 try expect_opcode(0x1f, Insn.sub(.r1, .r2));
761 try expect_opcode(0x27, Insn.mul(.r1, 0));
762 try expect_opcode(0x2f, Insn.mul(.r1, .r2));
763 try expect_opcode(0x37, Insn.div(.r1, 0));
764 try expect_opcode(0x3f, Insn.div(.r1, .r2));
765 try expect_opcode(0x47, Insn.alu_or(.r1, 0));
766 try expect_opcode(0x4f, Insn.alu_or(.r1, .r2));
767 try expect_opcode(0x57, Insn.alu_and(.r1, 0));
768 try expect_opcode(0x5f, Insn.alu_and(.r1, .r2));
769 try expect_opcode(0x67, Insn.lsh(.r1, 0));
770 try expect_opcode(0x6f, Insn.lsh(.r1, .r2));
771 try expect_opcode(0x77, Insn.rsh(.r1, 0));
772 try expect_opcode(0x7f, Insn.rsh(.r1, .r2));
773 try expect_opcode(0x87, Insn.neg(.r1));
774 try expect_opcode(0x97, Insn.mod(.r1, 0));
775 try expect_opcode(0x9f, Insn.mod(.r1, .r2));
776 try expect_opcode(0xa7, Insn.xor(.r1, 0));
777 try expect_opcode(0xaf, Insn.xor(.r1, .r2));
778 try expect_opcode(0xb7, Insn.mov(.r1, 0));
779 try expect_opcode(0xbf, Insn.mov(.r1, .r2));
780 try expect_opcode(0xc7, Insn.arsh(.r1, 0));
781 try expect_opcode(0xcf, Insn.arsh(.r1, .r2));
782
783 // atomic instructions: might be more of these not documented in the wild
784 try expect_opcode(0xdb, Insn.xadd(.r1, .r2));
785
786 // TODO: byteswap instructions
787 try expect_opcode(0xd4, Insn.le(.half_word, .r1));
788 try expectEqual(@as(i32, @intCast(16)), Insn.le(.half_word, .r1).imm);
789 try expect_opcode(0xd4, Insn.le(.word, .r1));
790 try expectEqual(@as(i32, @intCast(32)), Insn.le(.word, .r1).imm);
791 try expect_opcode(0xd4, Insn.le(.double_word, .r1));
792 try expectEqual(@as(i32, @intCast(64)), Insn.le(.double_word, .r1).imm);
793 try expect_opcode(0xdc, Insn.be(.half_word, .r1));
794 try expectEqual(@as(i32, @intCast(16)), Insn.be(.half_word, .r1).imm);
795 try expect_opcode(0xdc, Insn.be(.word, .r1));
796 try expectEqual(@as(i32, @intCast(32)), Insn.be(.word, .r1).imm);
797 try expect_opcode(0xdc, Insn.be(.double_word, .r1));
798 try expectEqual(@as(i32, @intCast(64)), Insn.be(.double_word, .r1).imm);
799
800 // memory instructions
801 try expect_opcode(0x18, Insn.ld_dw1(.r1, 0));
802 try expect_opcode(0x00, Insn.ld_dw2(0));
803
804 // loading a map fd
805 try expect_opcode(0x18, Insn.ld_map_fd1(.r1, 0));
806 try expectEqual(@as(u4, @intCast(PSEUDO_MAP_FD)), Insn.ld_map_fd1(.r1, 0).src);
807 try expect_opcode(0x00, Insn.ld_map_fd2(0));
808
809 try expect_opcode(0x38, Insn.ld_abs(.double_word, .r1, .r2, 0));
810 try expect_opcode(0x20, Insn.ld_abs(.word, .r1, .r2, 0));
811 try expect_opcode(0x28, Insn.ld_abs(.half_word, .r1, .r2, 0));
812 try expect_opcode(0x30, Insn.ld_abs(.byte, .r1, .r2, 0));
813
814 try expect_opcode(0x58, Insn.ld_ind(.double_word, .r1, .r2, 0));
815 try expect_opcode(0x40, Insn.ld_ind(.word, .r1, .r2, 0));
816 try expect_opcode(0x48, Insn.ld_ind(.half_word, .r1, .r2, 0));
817 try expect_opcode(0x50, Insn.ld_ind(.byte, .r1, .r2, 0));
818
819 try expect_opcode(0x79, Insn.ldx(.double_word, .r1, .r2, 0));
820 try expect_opcode(0x61, Insn.ldx(.word, .r1, .r2, 0));
821 try expect_opcode(0x69, Insn.ldx(.half_word, .r1, .r2, 0));
822 try expect_opcode(0x71, Insn.ldx(.byte, .r1, .r2, 0));
823
824 try expect_opcode(0x62, Insn.st(.word, .r1, 0, 0));
825 try expect_opcode(0x6a, Insn.st(.half_word, .r1, 0, 0));
826 try expect_opcode(0x72, Insn.st(.byte, .r1, 0, 0));
827
828 try expect_opcode(0x63, Insn.stx(.word, .r1, 0, .r2));
829 try expect_opcode(0x6b, Insn.stx(.half_word, .r1, 0, .r2));
830 try expect_opcode(0x73, Insn.stx(.byte, .r1, 0, .r2));
831 try expect_opcode(0x7b, Insn.stx(.double_word, .r1, 0, .r2));
832
833 // branch instructions
834 try expect_opcode(0x05, Insn.ja(0));
835 try expect_opcode(0x15, Insn.jeq(.r1, 0, 0));
836 try expect_opcode(0x1d, Insn.jeq(.r1, .r2, 0));
837 try expect_opcode(0x25, Insn.jgt(.r1, 0, 0));
838 try expect_opcode(0x2d, Insn.jgt(.r1, .r2, 0));
839 try expect_opcode(0x35, Insn.jge(.r1, 0, 0));
840 try expect_opcode(0x3d, Insn.jge(.r1, .r2, 0));
841 try expect_opcode(0xa5, Insn.jlt(.r1, 0, 0));
842 try expect_opcode(0xad, Insn.jlt(.r1, .r2, 0));
843 try expect_opcode(0xb5, Insn.jle(.r1, 0, 0));
844 try expect_opcode(0xbd, Insn.jle(.r1, .r2, 0));
845 try expect_opcode(0x45, Insn.jset(.r1, 0, 0));
846 try expect_opcode(0x4d, Insn.jset(.r1, .r2, 0));
847 try expect_opcode(0x55, Insn.jne(.r1, 0, 0));
848 try expect_opcode(0x5d, Insn.jne(.r1, .r2, 0));
849 try expect_opcode(0x65, Insn.jsgt(.r1, 0, 0));
850 try expect_opcode(0x6d, Insn.jsgt(.r1, .r2, 0));
851 try expect_opcode(0x75, Insn.jsge(.r1, 0, 0));
852 try expect_opcode(0x7d, Insn.jsge(.r1, .r2, 0));
853 try expect_opcode(0xc5, Insn.jslt(.r1, 0, 0));
854 try expect_opcode(0xcd, Insn.jslt(.r1, .r2, 0));
855 try expect_opcode(0xd5, Insn.jsle(.r1, 0, 0));
856 try expect_opcode(0xdd, Insn.jsle(.r1, .r2, 0));
857 try expect_opcode(0x85, Insn.call(.unspec));
858 try expect_opcode(0x95, Insn.exit());
859}
860
861pub const Cmd = enum(usize) {
862 /// Create a map and return a file descriptor that refers to the map. The
863 /// close-on-exec file descriptor flag is automatically enabled for the new
864 /// file descriptor.
865 ///
866 /// uses MapCreateAttr
867 map_create,
868
869 /// Look up an element by key in a specified map and return its value.
870 ///
871 /// uses MapElemAttr
872 map_lookup_elem,
873
874 /// Create or update an element (key/value pair) in a specified map.
875 ///
876 /// uses MapElemAttr
877 map_update_elem,
878
879 /// Look up and delete an element by key in a specified map.
880 ///
881 /// uses MapElemAttr
882 map_delete_elem,
883
884 /// Look up an element by key in a specified map and return the key of the
885 /// next element.
886 map_get_next_key,
887
888 /// Verify and load an eBPF program, returning a new file descriptor
889 /// associated with the program. The close-on-exec file descriptor flag
890 /// is automatically enabled for the new file descriptor.
891 ///
892 /// uses ProgLoadAttr
893 prog_load,
894
895 /// Pin a map or eBPF program to a path within the minimal BPF filesystem
896 ///
897 /// uses ObjAttr
898 obj_pin,
899
900 /// Get the file descriptor of a BPF object pinned to a certain path
901 ///
902 /// uses ObjAttr
903 obj_get,
904
905 /// uses ProgAttachAttr
906 prog_attach,
907
908 /// uses ProgAttachAttr
909 prog_detach,
910
911 /// uses TestRunAttr
912 prog_test_run,
913
914 /// uses GetIdAttr
915 prog_get_next_id,
916
917 /// uses GetIdAttr
918 map_get_next_id,
919
920 /// uses GetIdAttr
921 prog_get_fd_by_id,
922
923 /// uses GetIdAttr
924 map_get_fd_by_id,
925
926 /// uses InfoAttr
927 obj_get_info_by_fd,
928
929 /// uses QueryAttr
930 prog_query,
931
932 /// uses RawTracepointAttr
933 raw_tracepoint_open,
934
935 /// uses BtfLoadAttr
936 btf_load,
937
938 /// uses GetIdAttr
939 btf_get_fd_by_id,
940
941 /// uses TaskFdQueryAttr
942 task_fd_query,
943
944 /// uses MapElemAttr
945 map_lookup_and_delete_elem,
946 map_freeze,
947
948 /// uses GetIdAttr
949 btf_get_next_id,
950
951 /// uses MapBatchAttr
952 map_lookup_batch,
953
954 /// uses MapBatchAttr
955 map_lookup_and_delete_batch,
956
957 /// uses MapBatchAttr
958 map_update_batch,
959
960 /// uses MapBatchAttr
961 map_delete_batch,
962
963 /// uses LinkCreateAttr
964 link_create,
965
966 /// uses LinkUpdateAttr
967 link_update,
968
969 /// uses GetIdAttr
970 link_get_fd_by_id,
971
972 /// uses GetIdAttr
973 link_get_next_id,
974
975 /// uses EnableStatsAttr
976 enable_stats,
977
978 /// uses IterCreateAttr
979 iter_create,
980 link_detach,
981 _,
982};
983
984pub const MapType = enum(u32) {
985 unspec,
986 hash,
987 array,
988 prog_array,
989 perf_event_array,
990 percpu_hash,
991 percpu_array,
992 stack_trace,
993 cgroup_array,
994 lru_hash,
995 lru_percpu_hash,
996 lpm_trie,
997 array_of_maps,
998 hash_of_maps,
999 devmap,
1000 sockmap,
1001 cpumap,
1002 xskmap,
1003 sockhash,
1004 cgroup_storage_deprecated,
1005 reuseport_sockarray,
1006 percpu_cgroup_storage,
1007 queue,
1008 stack,
1009 sk_storage,
1010 devmap_hash,
1011 struct_ops,
1012
1013 /// An ordered and shared CPU version of perf_event_array. They have
1014 /// similar semantics:
1015 /// - variable length records
1016 /// - no blocking: when full, reservation fails
1017 /// - memory mappable for ease and speed
1018 /// - epoll notifications for new data, but can busy poll
1019 ///
1020 /// Ringbufs give BPF programs two sets of APIs:
1021 /// - ringbuf_output() allows copy data from one place to a ring
1022 /// buffer, similar to bpf_perf_event_output()
1023 /// - ringbuf_reserve()/ringbuf_commit()/ringbuf_discard() split the
1024 /// process into two steps. First a fixed amount of space is reserved,
1025 /// if that is successful then the program gets a pointer to a chunk of
1026 /// memory and can be submitted with commit() or discarded with
1027 /// discard()
1028 ///
1029 /// ringbuf_output() will incur an extra memory copy, but allows to submit
1030 /// records of the length that's not known beforehand, and is an easy
1031 /// replacement for perf_event_output().
1032 ///
1033 /// ringbuf_reserve() avoids the extra memory copy but requires a known size
1034 /// of memory beforehand.
1035 ///
1036 /// ringbuf_query() allows to query properties of the map, 4 are currently
1037 /// supported:
1038 /// - BPF_RB_AVAIL_DATA: amount of unconsumed data in ringbuf
1039 /// - BPF_RB_RING_SIZE: returns size of ringbuf
1040 /// - BPF_RB_CONS_POS/BPF_RB_PROD_POS returns current logical position
1041 /// of consumer and producer respectively
1042 ///
1043 /// key size: 0
1044 /// value size: 0
1045 /// max entries: size of ringbuf, must be power of 2
1046 ringbuf,
1047 inode_storage,
1048 task_storage,
1049 bloom_filter,
1050 user_ringbuf,
1051 cgroup_storage,
1052 arena,
1053
1054 _,
1055};
1056
1057pub const ProgType = enum(u32) {
1058 unspec,
1059
1060 /// context type: __sk_buff
1061 socket_filter,
1062
1063 /// context type: bpf_user_pt_regs_t
1064 kprobe,
1065
1066 /// context type: __sk_buff
1067 sched_cls,
1068
1069 /// context type: __sk_buff
1070 sched_act,
1071
1072 /// context type: u64
1073 tracepoint,
1074
1075 /// context type: xdp_md
1076 xdp,
1077
1078 /// context type: bpf_perf_event_data
1079 perf_event,
1080
1081 /// context type: __sk_buff
1082 cgroup_skb,
1083
1084 /// context type: bpf_sock
1085 cgroup_sock,
1086
1087 /// context type: __sk_buff
1088 lwt_in,
1089
1090 /// context type: __sk_buff
1091 lwt_out,
1092
1093 /// context type: __sk_buff
1094 lwt_xmit,
1095
1096 /// context type: bpf_sock_ops
1097 sock_ops,
1098
1099 /// context type: __sk_buff
1100 sk_skb,
1101
1102 /// context type: bpf_cgroup_dev_ctx
1103 cgroup_device,
1104
1105 /// context type: sk_msg_md
1106 sk_msg,
1107
1108 /// context type: bpf_raw_tracepoint_args
1109 raw_tracepoint,
1110
1111 /// context type: bpf_sock_addr
1112 cgroup_sock_addr,
1113
1114 /// context type: __sk_buff
1115 lwt_seg6local,
1116
1117 /// context type: u32
1118 lirc_mode2,
1119
1120 /// context type: sk_reuseport_md
1121 sk_reuseport,
1122
1123 /// context type: __sk_buff
1124 flow_dissector,
1125
1126 /// context type: bpf_sysctl
1127 cgroup_sysctl,
1128
1129 /// context type: bpf_raw_tracepoint_args
1130 raw_tracepoint_writable,
1131
1132 /// context type: bpf_sockopt
1133 cgroup_sockopt,
1134
1135 /// context type: void *
1136 tracing,
1137
1138 /// context type: void *
1139 struct_ops,
1140
1141 /// context type: void *
1142 ext,
1143
1144 /// context type: void *
1145 lsm,
1146
1147 /// context type: bpf_sk_lookup
1148 sk_lookup,
1149
1150 /// context type: void *
1151 syscall,
1152
1153 /// context type: bpf_nf_ctx
1154 netfilter,
1155
1156 _,
1157};
1158
1159pub const AttachType = enum(u32) {
1160 cgroup_inet_ingress,
1161 cgroup_inet_egress,
1162 cgroup_inet_sock_create,
1163 cgroup_sock_ops,
1164 sk_skb_stream_parser,
1165 sk_skb_stream_verdict,
1166 cgroup_device,
1167 sk_msg_verdict,
1168 cgroup_inet4_bind,
1169 cgroup_inet6_bind,
1170 cgroup_inet4_connect,
1171 cgroup_inet6_connect,
1172 cgroup_inet4_post_bind,
1173 cgroup_inet6_post_bind,
1174 cgroup_udp4_sendmsg,
1175 cgroup_udp6_sendmsg,
1176 lirc_mode2,
1177 flow_dissector,
1178 cgroup_sysctl,
1179 cgroup_udp4_recvmsg,
1180 cgroup_udp6_recvmsg,
1181 cgroup_getsockopt,
1182 cgroup_setsockopt,
1183 trace_raw_tp,
1184 trace_fentry,
1185 trace_fexit,
1186 modify_return,
1187 lsm_mac,
1188 trace_iter,
1189 cgroup_inet4_getpeername,
1190 cgroup_inet6_getpeername,
1191 cgroup_inet4_getsockname,
1192 cgroup_inet6_getsockname,
1193 xdp_devmap,
1194 cgroup_inet_sock_release,
1195 xdp_cpumap,
1196 sk_lookup,
1197 xdp,
1198 sk_skb_verdict,
1199 sk_reuseport_select,
1200 sk_reuseport_select_or_migrate,
1201 perf_event,
1202 trace_kprobe_multi,
1203 lsm_cgroup,
1204 struct_ops,
1205 netfilter,
1206 tcx_ingress,
1207 tcx_egress,
1208 trace_uprobe_multi,
1209 cgroup_unix_connect,
1210 cgroup_unix_sendmsg,
1211 cgroup_unix_recvmsg,
1212 cgroup_unix_getpeername,
1213 cgroup_unix_getsockname,
1214 netkit_primary,
1215 netkit_peer,
1216 trace_kprobe_session,
1217 _,
1218};
1219
1220const obj_name_len = 16;
1221/// struct used by Cmd.map_create command
1222pub const MapCreateAttr = extern struct {
1223 /// one of MapType
1224 map_type: u32,
1225
1226 /// size of key in bytes
1227 key_size: u32,
1228
1229 /// size of value in bytes
1230 value_size: u32,
1231
1232 /// max number of entries in a map
1233 max_entries: u32,
1234
1235 /// .map_create related flags
1236 map_flags: u32,
1237
1238 /// fd pointing to the inner map
1239 inner_map_fd: fd_t,
1240
1241 /// numa node (effective only if MapCreateFlags.numa_node is set)
1242 numa_node: u32,
1243 map_name: [obj_name_len]u8,
1244
1245 /// ifindex of netdev to create on
1246 map_ifindex: u32,
1247
1248 /// fd pointing to a BTF type data
1249 btf_fd: fd_t,
1250
1251 /// BTF type_id of the key
1252 btf_key_type_id: u32,
1253
1254 /// BTF type_id of the value
1255 bpf_value_type_id: u32,
1256
1257 /// BTF type_id of a kernel struct stored as the map value
1258 btf_vmlinux_value_type_id: u32,
1259};
1260
1261/// struct used by Cmd.map_*_elem commands
1262pub const MapElemAttr = extern struct {
1263 map_fd: fd_t,
1264 key: u64,
1265 result: extern union {
1266 value: u64,
1267 next_key: u64,
1268 },
1269 flags: u64,
1270};
1271
1272/// struct used by Cmd.map_*_batch commands
1273pub const MapBatchAttr = extern struct {
1274 /// start batch, NULL to start from beginning
1275 in_batch: u64,
1276
1277 /// output: next start batch
1278 out_batch: u64,
1279 keys: u64,
1280 values: u64,
1281
1282 /// input/output:
1283 /// input: # of key/value elements
1284 /// output: # of filled elements
1285 count: u32,
1286 map_fd: fd_t,
1287 elem_flags: u64,
1288 flags: u64,
1289};
1290
1291/// struct used by Cmd.prog_load command
1292pub const ProgLoadAttr = extern struct {
1293 /// one of ProgType
1294 prog_type: u32,
1295 insn_cnt: u32,
1296 insns: u64,
1297 license: u64,
1298
1299 /// verbosity level of verifier
1300 log_level: u32,
1301
1302 /// size of user buffer
1303 log_size: u32,
1304
1305 /// user supplied buffer
1306 log_buf: u64,
1307
1308 /// not used
1309 kern_version: u32,
1310 prog_flags: u32,
1311 prog_name: [obj_name_len]u8,
1312
1313 /// ifindex of netdev to prep for.
1314 prog_ifindex: u32,
1315
1316 /// For some prog types expected attach type must be known at load time to
1317 /// verify attach type specific parts of prog (context accesses, allowed
1318 /// helpers, etc).
1319 expected_attach_type: u32,
1320
1321 /// fd pointing to BTF type data
1322 prog_btf_fd: fd_t,
1323
1324 /// userspace bpf_func_info size
1325 func_info_rec_size: u32,
1326 func_info: u64,
1327
1328 /// number of bpf_func_info records
1329 func_info_cnt: u32,
1330
1331 /// userspace bpf_line_info size
1332 line_info_rec_size: u32,
1333 line_info: u64,
1334
1335 /// number of bpf_line_info records
1336 line_info_cnt: u32,
1337
1338 /// in-kernel BTF type id to attach to
1339 attact_btf_id: u32,
1340
1341 /// 0 to attach to vmlinux
1342 attach_prog_id: u32,
1343};
1344
1345/// struct used by Cmd.obj_* commands
1346pub const ObjAttr = extern struct {
1347 pathname: u64,
1348 bpf_fd: fd_t,
1349 file_flags: u32,
1350};
1351
1352/// struct used by Cmd.prog_attach/detach commands
1353pub const ProgAttachAttr = extern struct {
1354 /// container object to attach to
1355 target_fd: fd_t,
1356
1357 /// eBPF program to attach
1358 attach_bpf_fd: fd_t,
1359
1360 attach_type: u32,
1361 attach_flags: u32,
1362
1363 // TODO: BPF_F_REPLACE flags
1364 /// previously attached eBPF program to replace if .replace is used
1365 replace_bpf_fd: fd_t,
1366};
1367
1368/// struct used by Cmd.prog_test_run command
1369pub const TestRunAttr = extern struct {
1370 prog_fd: fd_t,
1371 retval: u32,
1372
1373 /// input: len of data_in
1374 data_size_in: u32,
1375
1376 /// input/output: len of data_out. returns ENOSPC if data_out is too small.
1377 data_size_out: u32,
1378 data_in: u64,
1379 data_out: u64,
1380 repeat: u32,
1381 duration: u32,
1382
1383 /// input: len of ctx_in
1384 ctx_size_in: u32,
1385
1386 /// input/output: len of ctx_out. returns ENOSPC if ctx_out is too small.
1387 ctx_size_out: u32,
1388 ctx_in: u64,
1389 ctx_out: u64,
1390};
1391
1392/// struct used by Cmd.*_get_*_id commands
1393pub const GetIdAttr = extern struct {
1394 id: extern union {
1395 start_id: u32,
1396 prog_id: u32,
1397 map_id: u32,
1398 btf_id: u32,
1399 link_id: u32,
1400 },
1401 next_id: u32,
1402 open_flags: u32,
1403};
1404
1405/// struct used by Cmd.obj_get_info_by_fd command
1406pub const InfoAttr = extern struct {
1407 bpf_fd: fd_t,
1408 info_len: u32,
1409 info: u64,
1410};
1411
1412/// struct used by Cmd.prog_query command
1413pub const QueryAttr = extern struct {
1414 /// container object to query
1415 target_fd: fd_t,
1416 attach_type: u32,
1417 query_flags: u32,
1418 attach_flags: u32,
1419 prog_ids: u64,
1420 prog_cnt: u32,
1421};
1422
1423/// struct used by Cmd.raw_tracepoint_open command
1424pub const RawTracepointAttr = extern struct {
1425 name: u64,
1426 prog_fd: fd_t,
1427};
1428
1429/// struct used by Cmd.btf_load command
1430pub const BtfLoadAttr = extern struct {
1431 btf: u64,
1432 btf_log_buf: u64,
1433 btf_size: u32,
1434 btf_log_size: u32,
1435 btf_log_level: u32,
1436};
1437
1438/// struct used by Cmd.task_fd_query
1439pub const TaskFdQueryAttr = extern struct {
1440 /// input: pid
1441 pid: pid_t,
1442
1443 /// input: fd
1444 fd: fd_t,
1445
1446 /// input: flags
1447 flags: u32,
1448
1449 /// input/output: buf len
1450 buf_len: u32,
1451
1452 /// input/output:
1453 /// tp_name for tracepoint
1454 /// symbol for kprobe
1455 /// filename for uprobe
1456 buf: u64,
1457
1458 /// output: prod_id
1459 prog_id: u32,
1460
1461 /// output: BPF_FD_TYPE
1462 fd_type: u32,
1463
1464 /// output: probe_offset
1465 probe_offset: u64,
1466
1467 /// output: probe_addr
1468 probe_addr: u64,
1469};
1470
1471/// struct used by Cmd.link_create command
1472pub const LinkCreateAttr = extern struct {
1473 /// eBPF program to attach
1474 prog_fd: fd_t,
1475
1476 /// object to attach to
1477 target_fd: fd_t,
1478 attach_type: u32,
1479
1480 /// extra flags
1481 flags: u32,
1482};
1483
1484/// struct used by Cmd.link_update command
1485pub const LinkUpdateAttr = extern struct {
1486 link_fd: fd_t,
1487
1488 /// new program to update link with
1489 new_prog_fd: fd_t,
1490
1491 /// extra flags
1492 flags: u32,
1493
1494 /// expected link's program fd, it is specified only if BPF_F_REPLACE is
1495 /// set in flags
1496 old_prog_fd: fd_t,
1497};
1498
1499/// struct used by Cmd.enable_stats command
1500pub const EnableStatsAttr = extern struct {
1501 type: u32,
1502};
1503
1504/// struct used by Cmd.iter_create command
1505pub const IterCreateAttr = extern struct {
1506 link_fd: fd_t,
1507 flags: u32,
1508};
1509
1510/// Mega struct that is passed to the bpf() syscall
1511pub const Attr = extern union {
1512 map_create: MapCreateAttr,
1513 map_elem: MapElemAttr,
1514 map_batch: MapBatchAttr,
1515 prog_load: ProgLoadAttr,
1516 obj: ObjAttr,
1517 prog_attach: ProgAttachAttr,
1518 test_run: TestRunAttr,
1519 get_id: GetIdAttr,
1520 info: InfoAttr,
1521 query: QueryAttr,
1522 raw_tracepoint: RawTracepointAttr,
1523 btf_load: BtfLoadAttr,
1524 task_fd_query: TaskFdQueryAttr,
1525 link_create: LinkCreateAttr,
1526 link_update: LinkUpdateAttr,
1527 enable_stats: EnableStatsAttr,
1528 iter_create: IterCreateAttr,
1529};
1530
1531pub const Log = struct {
1532 level: u32,
1533 buf: []u8,
1534};
1535
1536pub fn map_create(map_type: MapType, key_size: u32, value_size: u32, max_entries: u32) !fd_t {
1537 var attr = Attr{
1538 .map_create = std.mem.zeroes(MapCreateAttr),
1539 };
1540
1541 attr.map_create.map_type = @intFromEnum(map_type);
1542 attr.map_create.key_size = key_size;
1543 attr.map_create.value_size = value_size;
1544 attr.map_create.max_entries = max_entries;
1545
1546 const rc = linux.bpf(.map_create, &attr, @sizeOf(MapCreateAttr));
1547 switch (errno(rc)) {
1548 .SUCCESS => return @as(fd_t, @intCast(rc)),
1549 .INVAL => return error.MapTypeOrAttrInvalid,
1550 .NOMEM => return error.SystemResources,
1551 .PERM => return error.PermissionDenied,
1552 else => |err| return unexpectedErrno(err),
1553 }
1554}
1555
1556test "map_create" {
1557 const map = try map_create(.hash, 4, 4, 32);
1558 defer std.os.close(map);
1559}
1560
1561pub fn map_lookup_elem(fd: fd_t, key: []const u8, value: []u8) !void {
1562 var attr = Attr{
1563 .map_elem = std.mem.zeroes(MapElemAttr),
1564 };
1565
1566 attr.map_elem.map_fd = fd;
1567 attr.map_elem.key = @intFromPtr(key.ptr);
1568 attr.map_elem.result.value = @intFromPtr(value.ptr);
1569
1570 const rc = linux.bpf(.map_lookup_elem, &attr, @sizeOf(MapElemAttr));
1571 switch (errno(rc)) {
1572 .SUCCESS => return,
1573 .BADF => return error.BadFd,
1574 .FAULT => unreachable,
1575 .INVAL => return error.FieldInAttrNeedsZeroing,
1576 .NOENT => return error.NotFound,
1577 .PERM => return error.PermissionDenied,
1578 else => |err| return unexpectedErrno(err),
1579 }
1580}
1581
1582pub fn map_update_elem(fd: fd_t, key: []const u8, value: []const u8, flags: u64) !void {
1583 var attr = Attr{
1584 .map_elem = std.mem.zeroes(MapElemAttr),
1585 };
1586
1587 attr.map_elem.map_fd = fd;
1588 attr.map_elem.key = @intFromPtr(key.ptr);
1589 attr.map_elem.result = .{ .value = @intFromPtr(value.ptr) };
1590 attr.map_elem.flags = flags;
1591
1592 const rc = linux.bpf(.map_update_elem, &attr, @sizeOf(MapElemAttr));
1593 switch (errno(rc)) {
1594 .SUCCESS => return,
1595 .@"2BIG" => return error.ReachedMaxEntries,
1596 .BADF => return error.BadFd,
1597 .FAULT => unreachable,
1598 .INVAL => return error.FieldInAttrNeedsZeroing,
1599 .NOMEM => return error.SystemResources,
1600 .PERM => return error.PermissionDenied,
1601 else => |err| return unexpectedErrno(err),
1602 }
1603}
1604
1605pub fn map_delete_elem(fd: fd_t, key: []const u8) !void {
1606 var attr = Attr{
1607 .map_elem = std.mem.zeroes(MapElemAttr),
1608 };
1609
1610 attr.map_elem.map_fd = fd;
1611 attr.map_elem.key = @intFromPtr(key.ptr);
1612
1613 const rc = linux.bpf(.map_delete_elem, &attr, @sizeOf(MapElemAttr));
1614 switch (errno(rc)) {
1615 .SUCCESS => return,
1616 .BADF => return error.BadFd,
1617 .FAULT => unreachable,
1618 .INVAL => return error.FieldInAttrNeedsZeroing,
1619 .NOENT => return error.NotFound,
1620 .PERM => return error.PermissionDenied,
1621 else => |err| return unexpectedErrno(err),
1622 }
1623}
1624
1625pub fn map_get_next_key(fd: fd_t, key: []const u8, next_key: []u8) !bool {
1626 var attr = Attr{
1627 .map_elem = std.mem.zeroes(MapElemAttr),
1628 };
1629
1630 attr.map_elem.map_fd = fd;
1631 attr.map_elem.key = @intFromPtr(key.ptr);
1632 attr.map_elem.result.next_key = @intFromPtr(next_key.ptr);
1633
1634 const rc = linux.bpf(.map_get_next_key, &attr, @sizeOf(MapElemAttr));
1635 switch (errno(rc)) {
1636 .SUCCESS => return true,
1637 .BADF => return error.BadFd,
1638 .FAULT => unreachable,
1639 .INVAL => return error.FieldInAttrNeedsZeroing,
1640 .NOENT => return false,
1641 .PERM => return error.PermissionDenied,
1642 else => |err| return unexpectedErrno(err),
1643 }
1644}
1645
1646test "map lookup, update, and delete" {
1647 const key_size = 4;
1648 const value_size = 4;
1649 const map = try map_create(.hash, key_size, value_size, 1);
1650 defer std.os.close(map);
1651
1652 const key = std.mem.zeroes([key_size]u8);
1653 var value = std.mem.zeroes([value_size]u8);
1654
1655 // fails looking up value that doesn't exist
1656 try expectError(error.NotFound, map_lookup_elem(map, &key, &value));
1657
1658 // succeed at updating and looking up element
1659 try map_update_elem(map, &key, &value, 0);
1660 try map_lookup_elem(map, &key, &value);
1661
1662 // fails inserting more than max entries
1663 const second_key = [key_size]u8{ 0, 0, 0, 1 };
1664 try expectError(error.ReachedMaxEntries, map_update_elem(map, &second_key, &value, 0));
1665
1666 // succeed at iterating all keys of map
1667 var lookup_key = [_]u8{ 1, 0, 0, 0 };
1668 var next_key = [_]u8{ 2, 3, 4, 5 }; // garbage value
1669 const status = try map_get_next_key(map, &lookup_key, &next_key);
1670 try expectEqual(status, true);
1671 try expectEqual(next_key, key);
1672 lookup_key = next_key;
1673 const status2 = try map_get_next_key(map, &lookup_key, &next_key);
1674 try expectEqual(status2, false);
1675
1676 // succeed at deleting an existing elem
1677 try map_delete_elem(map, &key);
1678 try expectError(error.NotFound, map_lookup_elem(map, &key, &value));
1679
1680 // fail at deleting a non-existing elem
1681 try expectError(error.NotFound, map_delete_elem(map, &key));
1682}
1683
1684pub fn prog_load(
1685 prog_type: ProgType,
1686 insns: []const Insn,
1687 log: ?*Log,
1688 license: []const u8,
1689 kern_version: u32,
1690 flags: u32,
1691) !fd_t {
1692 var attr = Attr{
1693 .prog_load = std.mem.zeroes(ProgLoadAttr),
1694 };
1695
1696 attr.prog_load.prog_type = @intFromEnum(prog_type);
1697 attr.prog_load.insns = @intFromPtr(insns.ptr);
1698 attr.prog_load.insn_cnt = @as(u32, @intCast(insns.len));
1699 attr.prog_load.license = @intFromPtr(license.ptr);
1700 attr.prog_load.kern_version = kern_version;
1701 attr.prog_load.prog_flags = flags;
1702
1703 if (log) |l| {
1704 attr.prog_load.log_buf = @intFromPtr(l.buf.ptr);
1705 attr.prog_load.log_size = @as(u32, @intCast(l.buf.len));
1706 attr.prog_load.log_level = l.level;
1707 }
1708
1709 const rc = linux.bpf(.prog_load, &attr, @sizeOf(ProgLoadAttr));
1710 return switch (errno(rc)) {
1711 .SUCCESS => @as(fd_t, @intCast(rc)),
1712 .ACCES => error.UnsafeProgram,
1713 .FAULT => unreachable,
1714 .INVAL => error.InvalidProgram,
1715 .PERM => error.PermissionDenied,
1716 else => |err| unexpectedErrno(err),
1717 };
1718}
1719
1720test "prog_load" {
1721 // this should fail because it does not set r0 before exiting
1722 const bad_prog = [_]Insn{
1723 Insn.exit(),
1724 };
1725
1726 const good_prog = [_]Insn{
1727 Insn.mov(.r0, 0),
1728 Insn.exit(),
1729 };
1730
1731 const prog = try prog_load(.socket_filter, &good_prog, null, "MIT", 0, 0);
1732 defer std.os.close(prog);
1733
1734 try expectError(error.UnsafeProgram, prog_load(.socket_filter, &bad_prog, null, "MIT", 0, 0));
1735}