master
   1const std = @import("../../std.zig");
   2const errno = linux.errno;
   3const unexpectedErrno = std.posix.unexpectedErrno;
   4const expectEqual = std.testing.expectEqual;
   5const expectError = std.testing.expectError;
   6const expect = std.testing.expect;
   7
   8const linux = std.os.linux;
   9const fd_t = linux.fd_t;
  10const pid_t = linux.pid_t;
  11
  12pub const btf = @import("bpf/btf.zig");
  13pub const kern = @import("bpf/kern.zig");
  14
  15// instruction classes
  16pub const LD = 0x00;
  17pub const LDX = 0x01;
  18pub const ST = 0x02;
  19pub const STX = 0x03;
  20pub const ALU = 0x04;
  21pub const JMP = 0x05;
  22pub const RET = 0x06;
  23pub const MISC = 0x07;
  24
  25/// 32-bit
  26pub const W = 0x00;
  27/// 16-bit
  28pub const H = 0x08;
  29/// 8-bit
  30pub const B = 0x10;
  31/// 64-bit
  32pub const DW = 0x18;
  33
  34pub const IMM = 0x00;
  35pub const ABS = 0x20;
  36pub const IND = 0x40;
  37pub const MEM = 0x60;
  38pub const LEN = 0x80;
  39pub const MSH = 0xa0;
  40
  41// alu fields
  42pub const ADD = 0x00;
  43pub const SUB = 0x10;
  44pub const MUL = 0x20;
  45pub const DIV = 0x30;
  46pub const OR = 0x40;
  47pub const AND = 0x50;
  48pub const LSH = 0x60;
  49pub const RSH = 0x70;
  50pub const NEG = 0x80;
  51pub const MOD = 0x90;
  52pub const XOR = 0xa0;
  53
  54// jmp fields
  55pub const JA = 0x00;
  56pub const JEQ = 0x10;
  57pub const JGT = 0x20;
  58pub const JGE = 0x30;
  59pub const JSET = 0x40;
  60
  61//#define BPF_SRC(code)   ((code) & 0x08)
  62pub const K = 0x00;
  63pub const X = 0x08;
  64
  65pub const MAXINSNS = 4096;
  66
  67// instruction classes
  68/// jmp mode in word width
  69pub const JMP32 = 0x06;
  70
  71/// alu mode in double word width
  72pub const ALU64 = 0x07;
  73
  74// ld/ldx fields
  75/// exclusive add
  76pub const XADD = 0xc0;
  77
  78// alu/jmp fields
  79/// mov reg to reg
  80pub const MOV = 0xb0;
  81
  82/// sign extending arithmetic shift right */
  83pub const ARSH = 0xc0;
  84
  85// change endianness of a register
  86/// flags for endianness conversion:
  87pub const END = 0xd0;
  88
  89/// convert to little-endian */
  90pub const TO_LE = 0x00;
  91
  92/// convert to big-endian
  93pub const TO_BE = 0x08;
  94pub const FROM_LE = TO_LE;
  95pub const FROM_BE = TO_BE;
  96
  97// jmp encodings
  98/// jump != *
  99pub const JNE = 0x50;
 100
 101/// LT is unsigned, '<'
 102pub const JLT = 0xa0;
 103
 104/// LE is unsigned, '<=' *
 105pub const JLE = 0xb0;
 106
 107/// SGT is signed '>', GT in x86
 108pub const JSGT = 0x60;
 109
 110/// SGE is signed '>=', GE in x86
 111pub const JSGE = 0x70;
 112
 113/// SLT is signed, '<'
 114pub const JSLT = 0xc0;
 115
 116/// SLE is signed, '<='
 117pub const JSLE = 0xd0;
 118
 119/// function call
 120pub const CALL = 0x80;
 121
 122/// function return
 123pub const EXIT = 0x90;
 124
 125/// Flag for prog_attach command. If a sub-cgroup installs some bpf program, the
 126/// program in this cgroup yields to sub-cgroup program.
 127pub const F_ALLOW_OVERRIDE = 0x1;
 128
 129/// Flag for prog_attach command. If a sub-cgroup installs some bpf program,
 130/// that cgroup program gets run in addition to the program in this cgroup.
 131pub const F_ALLOW_MULTI = 0x2;
 132
 133/// Flag for prog_attach command.
 134pub const F_REPLACE = 0x4;
 135
 136/// If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the verifier
 137/// will perform strict alignment checking as if the kernel has been built with
 138/// CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, and NET_IP_ALIGN defined to 2.
 139pub const F_STRICT_ALIGNMENT = 0x1;
 140
 141/// If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the verifier will
 142/// allow any alignment whatsoever. On platforms with strict alignment
 143/// requirements for loads and stores (such as sparc and mips) the verifier
 144/// validates that all loads and stores provably follow this requirement. This
 145/// flag turns that checking and enforcement off.
 146///
 147/// It is mostly used for testing when we want to validate the context and
 148/// memory access aspects of the verifier, but because of an unaligned access
 149/// the alignment check would trigger before the one we are interested in.
 150pub const F_ANY_ALIGNMENT = 0x2;
 151
 152/// BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
 153/// Verifier does sub-register def/use analysis and identifies instructions
 154/// whose def only matters for low 32-bit, high 32-bit is never referenced later
 155/// through implicit zero extension. Therefore verifier notifies JIT back-ends
 156/// that it is safe to ignore clearing high 32-bit for these instructions. This
 157/// saves some back-ends a lot of code-gen. However such optimization is not
 158/// necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
 159/// hence hasn't used verifier's analysis result. But, we really want to have a
 160/// way to be able to verify the correctness of the described optimization on
 161/// x86_64 on which testsuites are frequently exercised.
 162///
 163/// So, this flag is introduced. Once it is set, verifier will randomize high
 164/// 32-bit for those instructions who has been identified as safe to ignore
 165/// them. Then, if verifier is not doing correct analysis, such randomization
 166/// will regress tests to expose bugs.
 167pub const F_TEST_RND_HI32 = 0x4;
 168
 169/// If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
 170/// restrict map and helper usage for such programs. Sleepable BPF programs can
 171/// only be attached to hooks where kernel execution context allows sleeping.
 172/// Such programs are allowed to use helpers that may sleep like
 173/// bpf_copy_from_user().
 174pub const F_SLEEPABLE = 0x10;
 175
 176/// When BPF ldimm64's insn[0].src_reg != 0 then this can have two extensions:
 177/// insn[0].src_reg:  BPF_PSEUDO_MAP_FD   BPF_PSEUDO_MAP_VALUE
 178/// insn[0].imm:      map fd              map fd
 179/// insn[1].imm:      0                   offset into value
 180/// insn[0].off:      0                   0
 181/// insn[1].off:      0                   0
 182/// ldimm64 rewrite:  address of map      address of map[0]+offset
 183/// verifier type:    CONST_PTR_TO_MAP    PTR_TO_MAP_VALUE
 184pub const PSEUDO_MAP_FD = 1;
 185pub const PSEUDO_MAP_VALUE = 2;
 186
 187/// when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
 188/// offset to another bpf function
 189pub const PSEUDO_CALL = 1;
 190
 191/// flag for BPF_MAP_UPDATE_ELEM command. create new element or update existing
 192pub const ANY = 0;
 193
 194/// flag for BPF_MAP_UPDATE_ELEM command. create new element if it didn't exist
 195pub const NOEXIST = 1;
 196
 197/// flag for BPF_MAP_UPDATE_ELEM command. update existing element
 198pub const EXIST = 2;
 199
 200/// flag for BPF_MAP_UPDATE_ELEM command. spin_lock-ed map_lookup/map_update
 201pub const F_LOCK = 4;
 202
 203/// flag for BPF_MAP_CREATE command */
 204pub const BPF_F_NO_PREALLOC = 0x1;
 205
 206/// flag for BPF_MAP_CREATE command. Instead of having one common LRU list in
 207/// the BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list which can
 208/// scale and perform better. Note, the LRU nodes (including free nodes) cannot
 209/// be moved across different LRU lists.
 210pub const BPF_F_NO_COMMON_LRU = 0x2;
 211
 212/// flag for BPF_MAP_CREATE command. Specify numa node during map creation
 213pub const BPF_F_NUMA_NODE = 0x4;
 214
 215/// flag for BPF_MAP_CREATE command. Flags for BPF object read access from
 216/// syscall side
 217pub const BPF_F_RDONLY = 0x8;
 218
 219/// flag for BPF_MAP_CREATE command. Flags for BPF object write access from
 220/// syscall side
 221pub const BPF_F_WRONLY = 0x10;
 222
 223/// flag for BPF_MAP_CREATE command. Flag for stack_map, store build_id+offset
 224/// instead of pointer
 225pub const BPF_F_STACK_BUILD_ID = 0x20;
 226
 227/// flag for BPF_MAP_CREATE command. Zero-initialize hash function seed. This
 228/// should only be used for testing.
 229pub const BPF_F_ZERO_SEED = 0x40;
 230
 231/// flag for BPF_MAP_CREATE command Flags for accessing BPF object from program
 232/// side.
 233pub const BPF_F_RDONLY_PROG = 0x80;
 234
 235/// flag for BPF_MAP_CREATE command. Flags for accessing BPF object from program
 236/// side.
 237pub const BPF_F_WRONLY_PROG = 0x100;
 238
 239/// flag for BPF_MAP_CREATE command. Clone map from listener for newly accepted
 240/// socket
 241pub const BPF_F_CLONE = 0x200;
 242
 243/// flag for BPF_MAP_CREATE command. Enable memory-mapping BPF map
 244pub const BPF_F_MMAPABLE = 0x400;
 245
 246/// These values correspond to "syscalls" within the BPF program's environment,
 247/// each one is documented in std.os.linux.BPF.kern
 248pub const Helper = enum(i32) {
 249    unspec,
 250    map_lookup_elem,
 251    map_update_elem,
 252    map_delete_elem,
 253    probe_read,
 254    ktime_get_ns,
 255    trace_printk,
 256    get_prandom_u32,
 257    get_smp_processor_id,
 258    skb_store_bytes,
 259    l3_csum_replace,
 260    l4_csum_replace,
 261    tail_call,
 262    clone_redirect,
 263    get_current_pid_tgid,
 264    get_current_uid_gid,
 265    get_current_comm,
 266    get_cgroup_classid,
 267    skb_vlan_push,
 268    skb_vlan_pop,
 269    skb_get_tunnel_key,
 270    skb_set_tunnel_key,
 271    perf_event_read,
 272    redirect,
 273    get_route_realm,
 274    perf_event_output,
 275    skb_load_bytes,
 276    get_stackid,
 277    csum_diff,
 278    skb_get_tunnel_opt,
 279    skb_set_tunnel_opt,
 280    skb_change_proto,
 281    skb_change_type,
 282    skb_under_cgroup,
 283    get_hash_recalc,
 284    get_current_task,
 285    probe_write_user,
 286    current_task_under_cgroup,
 287    skb_change_tail,
 288    skb_pull_data,
 289    csum_update,
 290    set_hash_invalid,
 291    get_numa_node_id,
 292    skb_change_head,
 293    xdp_adjust_head,
 294    probe_read_str,
 295    get_socket_cookie,
 296    get_socket_uid,
 297    set_hash,
 298    setsockopt,
 299    skb_adjust_room,
 300    redirect_map,
 301    sk_redirect_map,
 302    sock_map_update,
 303    xdp_adjust_meta,
 304    perf_event_read_value,
 305    perf_prog_read_value,
 306    getsockopt,
 307    override_return,
 308    sock_ops_cb_flags_set,
 309    msg_redirect_map,
 310    msg_apply_bytes,
 311    msg_cork_bytes,
 312    msg_pull_data,
 313    bind,
 314    xdp_adjust_tail,
 315    skb_get_xfrm_state,
 316    get_stack,
 317    skb_load_bytes_relative,
 318    fib_lookup,
 319    sock_hash_update,
 320    msg_redirect_hash,
 321    sk_redirect_hash,
 322    lwt_push_encap,
 323    lwt_seg6_store_bytes,
 324    lwt_seg6_adjust_srh,
 325    lwt_seg6_action,
 326    rc_repeat,
 327    rc_keydown,
 328    skb_cgroup_id,
 329    get_current_cgroup_id,
 330    get_local_storage,
 331    sk_select_reuseport,
 332    skb_ancestor_cgroup_id,
 333    sk_lookup_tcp,
 334    sk_lookup_udp,
 335    sk_release,
 336    map_push_elem,
 337    map_pop_elem,
 338    map_peek_elem,
 339    msg_push_data,
 340    msg_pop_data,
 341    rc_pointer_rel,
 342    spin_lock,
 343    spin_unlock,
 344    sk_fullsock,
 345    tcp_sock,
 346    skb_ecn_set_ce,
 347    get_listener_sock,
 348    skc_lookup_tcp,
 349    tcp_check_syncookie,
 350    sysctl_get_name,
 351    sysctl_get_current_value,
 352    sysctl_get_new_value,
 353    sysctl_set_new_value,
 354    strtol,
 355    strtoul,
 356    sk_storage_get,
 357    sk_storage_delete,
 358    send_signal,
 359    tcp_gen_syncookie,
 360    skb_output,
 361    probe_read_user,
 362    probe_read_kernel,
 363    probe_read_user_str,
 364    probe_read_kernel_str,
 365    tcp_send_ack,
 366    send_signal_thread,
 367    jiffies64,
 368    read_branch_records,
 369    get_ns_current_pid_tgid,
 370    xdp_output,
 371    get_netns_cookie,
 372    get_current_ancestor_cgroup_id,
 373    sk_assign,
 374    ktime_get_boot_ns,
 375    seq_printf,
 376    seq_write,
 377    sk_cgroup_id,
 378    sk_ancestor_cgroup_id,
 379    ringbuf_output,
 380    ringbuf_reserve,
 381    ringbuf_submit,
 382    ringbuf_discard,
 383    ringbuf_query,
 384    csum_level,
 385    skc_to_tcp6_sock,
 386    skc_to_tcp_sock,
 387    skc_to_tcp_timewait_sock,
 388    skc_to_tcp_request_sock,
 389    skc_to_udp6_sock,
 390    get_task_stack,
 391    _,
 392};
 393
 394// TODO: determine that this is the expected bit layout for both little and big
 395// endian systems
 396/// a single BPF instruction
 397pub const Insn = packed struct {
 398    code: u8,
 399    dst: u4,
 400    src: u4,
 401    off: i16,
 402    imm: i32,
 403
 404    /// r0 - r9 are general purpose 64-bit registers, r10 points to the stack
 405    /// frame
 406    pub const Reg = enum(u4) { r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10 };
 407    const Source = enum(u1) { reg, imm };
 408
 409    const Mode = enum(u8) {
 410        imm = IMM,
 411        abs = ABS,
 412        ind = IND,
 413        mem = MEM,
 414        len = LEN,
 415        msh = MSH,
 416    };
 417
 418    pub const AluOp = enum(u8) {
 419        add = ADD,
 420        sub = SUB,
 421        mul = MUL,
 422        div = DIV,
 423        alu_or = OR,
 424        alu_and = AND,
 425        lsh = LSH,
 426        rsh = RSH,
 427        neg = NEG,
 428        mod = MOD,
 429        xor = XOR,
 430        mov = MOV,
 431        arsh = ARSH,
 432    };
 433
 434    pub const Size = enum(u8) {
 435        byte = B,
 436        half_word = H,
 437        word = W,
 438        double_word = DW,
 439    };
 440
 441    pub const JmpOp = enum(u8) {
 442        ja = JA,
 443        jeq = JEQ,
 444        jgt = JGT,
 445        jge = JGE,
 446        jset = JSET,
 447        jlt = JLT,
 448        jle = JLE,
 449        jne = JNE,
 450        jsgt = JSGT,
 451        jsge = JSGE,
 452        jslt = JSLT,
 453        jsle = JSLE,
 454    };
 455
 456    const ImmOrReg = union(Source) {
 457        reg: Reg,
 458        imm: i32,
 459    };
 460
 461    fn imm_reg(code: u8, dst: Reg, src: anytype, off: i16) Insn {
 462        const imm_or_reg = if (@TypeOf(src) == Reg or @typeInfo(@TypeOf(src)) == .enum_literal)
 463            ImmOrReg{ .reg = @as(Reg, src) }
 464        else
 465            ImmOrReg{ .imm = src };
 466
 467        const src_type: u8 = switch (imm_or_reg) {
 468            .imm => K,
 469            .reg => X,
 470        };
 471
 472        return Insn{
 473            .code = code | src_type,
 474            .dst = @intFromEnum(dst),
 475            .src = switch (imm_or_reg) {
 476                .imm => 0,
 477                .reg => |r| @intFromEnum(r),
 478            },
 479            .off = off,
 480            .imm = switch (imm_or_reg) {
 481                .imm => |i| i,
 482                .reg => 0,
 483            },
 484        };
 485    }
 486
 487    pub fn alu(comptime width: comptime_int, op: AluOp, dst: Reg, src: anytype) Insn {
 488        const width_bitfield = switch (width) {
 489            32 => ALU,
 490            64 => ALU64,
 491            else => @compileError("width must be 32 or 64"),
 492        };
 493
 494        return imm_reg(width_bitfield | @intFromEnum(op), dst, src, 0);
 495    }
 496
 497    pub fn mov(dst: Reg, src: anytype) Insn {
 498        return alu(64, .mov, dst, src);
 499    }
 500
 501    pub fn add(dst: Reg, src: anytype) Insn {
 502        return alu(64, .add, dst, src);
 503    }
 504
 505    pub fn sub(dst: Reg, src: anytype) Insn {
 506        return alu(64, .sub, dst, src);
 507    }
 508
 509    pub fn mul(dst: Reg, src: anytype) Insn {
 510        return alu(64, .mul, dst, src);
 511    }
 512
 513    pub fn div(dst: Reg, src: anytype) Insn {
 514        return alu(64, .div, dst, src);
 515    }
 516
 517    pub fn alu_or(dst: Reg, src: anytype) Insn {
 518        return alu(64, .alu_or, dst, src);
 519    }
 520
 521    pub fn alu_and(dst: Reg, src: anytype) Insn {
 522        return alu(64, .alu_and, dst, src);
 523    }
 524
 525    pub fn lsh(dst: Reg, src: anytype) Insn {
 526        return alu(64, .lsh, dst, src);
 527    }
 528
 529    pub fn rsh(dst: Reg, src: anytype) Insn {
 530        return alu(64, .rsh, dst, src);
 531    }
 532
 533    pub fn neg(dst: Reg) Insn {
 534        return alu(64, .neg, dst, 0);
 535    }
 536
 537    pub fn mod(dst: Reg, src: anytype) Insn {
 538        return alu(64, .mod, dst, src);
 539    }
 540
 541    pub fn xor(dst: Reg, src: anytype) Insn {
 542        return alu(64, .xor, dst, src);
 543    }
 544
 545    pub fn arsh(dst: Reg, src: anytype) Insn {
 546        return alu(64, .arsh, dst, src);
 547    }
 548
 549    pub fn jmp(op: JmpOp, dst: Reg, src: anytype, off: i16) Insn {
 550        return imm_reg(JMP | @intFromEnum(op), dst, src, off);
 551    }
 552
 553    pub fn ja(off: i16) Insn {
 554        return jmp(.ja, .r0, 0, off);
 555    }
 556
 557    pub fn jeq(dst: Reg, src: anytype, off: i16) Insn {
 558        return jmp(.jeq, dst, src, off);
 559    }
 560
 561    pub fn jgt(dst: Reg, src: anytype, off: i16) Insn {
 562        return jmp(.jgt, dst, src, off);
 563    }
 564
 565    pub fn jge(dst: Reg, src: anytype, off: i16) Insn {
 566        return jmp(.jge, dst, src, off);
 567    }
 568
 569    pub fn jlt(dst: Reg, src: anytype, off: i16) Insn {
 570        return jmp(.jlt, dst, src, off);
 571    }
 572
 573    pub fn jle(dst: Reg, src: anytype, off: i16) Insn {
 574        return jmp(.jle, dst, src, off);
 575    }
 576
 577    pub fn jset(dst: Reg, src: anytype, off: i16) Insn {
 578        return jmp(.jset, dst, src, off);
 579    }
 580
 581    pub fn jne(dst: Reg, src: anytype, off: i16) Insn {
 582        return jmp(.jne, dst, src, off);
 583    }
 584
 585    pub fn jsgt(dst: Reg, src: anytype, off: i16) Insn {
 586        return jmp(.jsgt, dst, src, off);
 587    }
 588
 589    pub fn jsge(dst: Reg, src: anytype, off: i16) Insn {
 590        return jmp(.jsge, dst, src, off);
 591    }
 592
 593    pub fn jslt(dst: Reg, src: anytype, off: i16) Insn {
 594        return jmp(.jslt, dst, src, off);
 595    }
 596
 597    pub fn jsle(dst: Reg, src: anytype, off: i16) Insn {
 598        return jmp(.jsle, dst, src, off);
 599    }
 600
 601    pub fn xadd(dst: Reg, src: Reg) Insn {
 602        return Insn{
 603            .code = STX | XADD | DW,
 604            .dst = @intFromEnum(dst),
 605            .src = @intFromEnum(src),
 606            .off = 0,
 607            .imm = 0,
 608        };
 609    }
 610
 611    fn ld(mode: Mode, size: Size, dst: Reg, src: Reg, imm: i32) Insn {
 612        return Insn{
 613            .code = @intFromEnum(mode) | @intFromEnum(size) | LD,
 614            .dst = @intFromEnum(dst),
 615            .src = @intFromEnum(src),
 616            .off = 0,
 617            .imm = imm,
 618        };
 619    }
 620
 621    pub fn ld_abs(size: Size, dst: Reg, src: Reg, imm: i32) Insn {
 622        return ld(.abs, size, dst, src, imm);
 623    }
 624
 625    pub fn ld_ind(size: Size, dst: Reg, src: Reg, imm: i32) Insn {
 626        return ld(.ind, size, dst, src, imm);
 627    }
 628
 629    pub fn ldx(size: Size, dst: Reg, src: Reg, off: i16) Insn {
 630        return Insn{
 631            .code = MEM | @intFromEnum(size) | LDX,
 632            .dst = @intFromEnum(dst),
 633            .src = @intFromEnum(src),
 634            .off = off,
 635            .imm = 0,
 636        };
 637    }
 638
 639    fn ld_imm_impl1(dst: Reg, src: Reg, imm: u64) Insn {
 640        return Insn{
 641            .code = LD | DW | IMM,
 642            .dst = @intFromEnum(dst),
 643            .src = @intFromEnum(src),
 644            .off = 0,
 645            .imm = @as(i32, @bitCast(@as(u32, @truncate(imm)))),
 646        };
 647    }
 648
 649    fn ld_imm_impl2(imm: u64) Insn {
 650        return Insn{
 651            .code = 0,
 652            .dst = 0,
 653            .src = 0,
 654            .off = 0,
 655            .imm = @as(i32, @bitCast(@as(u32, @truncate(imm >> 32)))),
 656        };
 657    }
 658
 659    pub fn ld_dw1(dst: Reg, imm: u64) Insn {
 660        return ld_imm_impl1(dst, .r0, imm);
 661    }
 662
 663    pub fn ld_dw2(imm: u64) Insn {
 664        return ld_imm_impl2(imm);
 665    }
 666
 667    pub fn ld_map_fd1(dst: Reg, map_fd: fd_t) Insn {
 668        return ld_imm_impl1(dst, @as(Reg, @enumFromInt(PSEUDO_MAP_FD)), @as(u64, @intCast(map_fd)));
 669    }
 670
 671    pub fn ld_map_fd2(map_fd: fd_t) Insn {
 672        return ld_imm_impl2(@as(u64, @intCast(map_fd)));
 673    }
 674
 675    pub fn st(size: Size, dst: Reg, off: i16, imm: i32) Insn {
 676        return Insn{
 677            .code = MEM | @intFromEnum(size) | ST,
 678            .dst = @intFromEnum(dst),
 679            .src = 0,
 680            .off = off,
 681            .imm = imm,
 682        };
 683    }
 684
 685    pub fn stx(size: Size, dst: Reg, off: i16, src: Reg) Insn {
 686        return Insn{
 687            .code = MEM | @intFromEnum(size) | STX,
 688            .dst = @intFromEnum(dst),
 689            .src = @intFromEnum(src),
 690            .off = off,
 691            .imm = 0,
 692        };
 693    }
 694
 695    fn endian_swap(endian: std.builtin.Endian, comptime size: Size, dst: Reg) Insn {
 696        return Insn{
 697            .code = switch (endian) {
 698                .big => 0xdc,
 699                .little => 0xd4,
 700            },
 701            .dst = @intFromEnum(dst),
 702            .src = 0,
 703            .off = 0,
 704            .imm = switch (size) {
 705                .byte => @compileError("can't swap a single byte"),
 706                .half_word => 16,
 707                .word => 32,
 708                .double_word => 64,
 709            },
 710        };
 711    }
 712
 713    pub fn le(comptime size: Size, dst: Reg) Insn {
 714        return endian_swap(.little, size, dst);
 715    }
 716
 717    pub fn be(comptime size: Size, dst: Reg) Insn {
 718        return endian_swap(.big, size, dst);
 719    }
 720
 721    pub fn call(helper: Helper) Insn {
 722        return Insn{
 723            .code = JMP | CALL,
 724            .dst = 0,
 725            .src = 0,
 726            .off = 0,
 727            .imm = @intFromEnum(helper),
 728        };
 729    }
 730
 731    /// exit BPF program
 732    pub fn exit() Insn {
 733        return Insn{
 734            .code = JMP | EXIT,
 735            .dst = 0,
 736            .src = 0,
 737            .off = 0,
 738            .imm = 0,
 739        };
 740    }
 741};
 742
 743test "insn bitsize" {
 744    try expectEqual(@bitSizeOf(Insn), 64);
 745}
 746
 747fn expect_opcode(code: u8, insn: Insn) !void {
 748    try expectEqual(code, insn.code);
 749}
 750
 751// The opcodes were grabbed from https://github.com/iovisor/bpf-docs/blob/master/eBPF.md
 752test "opcodes" {
 753    // instructions that have a name that end with 1 or 2 are consecutive for
 754    // loading 64-bit immediates (imm is only 32 bits wide)
 755
 756    // alu instructions
 757    try expect_opcode(0x07, Insn.add(.r1, 0));
 758    try expect_opcode(0x0f, Insn.add(.r1, .r2));
 759    try expect_opcode(0x17, Insn.sub(.r1, 0));
 760    try expect_opcode(0x1f, Insn.sub(.r1, .r2));
 761    try expect_opcode(0x27, Insn.mul(.r1, 0));
 762    try expect_opcode(0x2f, Insn.mul(.r1, .r2));
 763    try expect_opcode(0x37, Insn.div(.r1, 0));
 764    try expect_opcode(0x3f, Insn.div(.r1, .r2));
 765    try expect_opcode(0x47, Insn.alu_or(.r1, 0));
 766    try expect_opcode(0x4f, Insn.alu_or(.r1, .r2));
 767    try expect_opcode(0x57, Insn.alu_and(.r1, 0));
 768    try expect_opcode(0x5f, Insn.alu_and(.r1, .r2));
 769    try expect_opcode(0x67, Insn.lsh(.r1, 0));
 770    try expect_opcode(0x6f, Insn.lsh(.r1, .r2));
 771    try expect_opcode(0x77, Insn.rsh(.r1, 0));
 772    try expect_opcode(0x7f, Insn.rsh(.r1, .r2));
 773    try expect_opcode(0x87, Insn.neg(.r1));
 774    try expect_opcode(0x97, Insn.mod(.r1, 0));
 775    try expect_opcode(0x9f, Insn.mod(.r1, .r2));
 776    try expect_opcode(0xa7, Insn.xor(.r1, 0));
 777    try expect_opcode(0xaf, Insn.xor(.r1, .r2));
 778    try expect_opcode(0xb7, Insn.mov(.r1, 0));
 779    try expect_opcode(0xbf, Insn.mov(.r1, .r2));
 780    try expect_opcode(0xc7, Insn.arsh(.r1, 0));
 781    try expect_opcode(0xcf, Insn.arsh(.r1, .r2));
 782
 783    // atomic instructions: might be more of these not documented in the wild
 784    try expect_opcode(0xdb, Insn.xadd(.r1, .r2));
 785
 786    // TODO: byteswap instructions
 787    try expect_opcode(0xd4, Insn.le(.half_word, .r1));
 788    try expectEqual(@as(i32, @intCast(16)), Insn.le(.half_word, .r1).imm);
 789    try expect_opcode(0xd4, Insn.le(.word, .r1));
 790    try expectEqual(@as(i32, @intCast(32)), Insn.le(.word, .r1).imm);
 791    try expect_opcode(0xd4, Insn.le(.double_word, .r1));
 792    try expectEqual(@as(i32, @intCast(64)), Insn.le(.double_word, .r1).imm);
 793    try expect_opcode(0xdc, Insn.be(.half_word, .r1));
 794    try expectEqual(@as(i32, @intCast(16)), Insn.be(.half_word, .r1).imm);
 795    try expect_opcode(0xdc, Insn.be(.word, .r1));
 796    try expectEqual(@as(i32, @intCast(32)), Insn.be(.word, .r1).imm);
 797    try expect_opcode(0xdc, Insn.be(.double_word, .r1));
 798    try expectEqual(@as(i32, @intCast(64)), Insn.be(.double_word, .r1).imm);
 799
 800    // memory instructions
 801    try expect_opcode(0x18, Insn.ld_dw1(.r1, 0));
 802    try expect_opcode(0x00, Insn.ld_dw2(0));
 803
 804    //   loading a map fd
 805    try expect_opcode(0x18, Insn.ld_map_fd1(.r1, 0));
 806    try expectEqual(@as(u4, @intCast(PSEUDO_MAP_FD)), Insn.ld_map_fd1(.r1, 0).src);
 807    try expect_opcode(0x00, Insn.ld_map_fd2(0));
 808
 809    try expect_opcode(0x38, Insn.ld_abs(.double_word, .r1, .r2, 0));
 810    try expect_opcode(0x20, Insn.ld_abs(.word, .r1, .r2, 0));
 811    try expect_opcode(0x28, Insn.ld_abs(.half_word, .r1, .r2, 0));
 812    try expect_opcode(0x30, Insn.ld_abs(.byte, .r1, .r2, 0));
 813
 814    try expect_opcode(0x58, Insn.ld_ind(.double_word, .r1, .r2, 0));
 815    try expect_opcode(0x40, Insn.ld_ind(.word, .r1, .r2, 0));
 816    try expect_opcode(0x48, Insn.ld_ind(.half_word, .r1, .r2, 0));
 817    try expect_opcode(0x50, Insn.ld_ind(.byte, .r1, .r2, 0));
 818
 819    try expect_opcode(0x79, Insn.ldx(.double_word, .r1, .r2, 0));
 820    try expect_opcode(0x61, Insn.ldx(.word, .r1, .r2, 0));
 821    try expect_opcode(0x69, Insn.ldx(.half_word, .r1, .r2, 0));
 822    try expect_opcode(0x71, Insn.ldx(.byte, .r1, .r2, 0));
 823
 824    try expect_opcode(0x62, Insn.st(.word, .r1, 0, 0));
 825    try expect_opcode(0x6a, Insn.st(.half_word, .r1, 0, 0));
 826    try expect_opcode(0x72, Insn.st(.byte, .r1, 0, 0));
 827
 828    try expect_opcode(0x63, Insn.stx(.word, .r1, 0, .r2));
 829    try expect_opcode(0x6b, Insn.stx(.half_word, .r1, 0, .r2));
 830    try expect_opcode(0x73, Insn.stx(.byte, .r1, 0, .r2));
 831    try expect_opcode(0x7b, Insn.stx(.double_word, .r1, 0, .r2));
 832
 833    // branch instructions
 834    try expect_opcode(0x05, Insn.ja(0));
 835    try expect_opcode(0x15, Insn.jeq(.r1, 0, 0));
 836    try expect_opcode(0x1d, Insn.jeq(.r1, .r2, 0));
 837    try expect_opcode(0x25, Insn.jgt(.r1, 0, 0));
 838    try expect_opcode(0x2d, Insn.jgt(.r1, .r2, 0));
 839    try expect_opcode(0x35, Insn.jge(.r1, 0, 0));
 840    try expect_opcode(0x3d, Insn.jge(.r1, .r2, 0));
 841    try expect_opcode(0xa5, Insn.jlt(.r1, 0, 0));
 842    try expect_opcode(0xad, Insn.jlt(.r1, .r2, 0));
 843    try expect_opcode(0xb5, Insn.jle(.r1, 0, 0));
 844    try expect_opcode(0xbd, Insn.jle(.r1, .r2, 0));
 845    try expect_opcode(0x45, Insn.jset(.r1, 0, 0));
 846    try expect_opcode(0x4d, Insn.jset(.r1, .r2, 0));
 847    try expect_opcode(0x55, Insn.jne(.r1, 0, 0));
 848    try expect_opcode(0x5d, Insn.jne(.r1, .r2, 0));
 849    try expect_opcode(0x65, Insn.jsgt(.r1, 0, 0));
 850    try expect_opcode(0x6d, Insn.jsgt(.r1, .r2, 0));
 851    try expect_opcode(0x75, Insn.jsge(.r1, 0, 0));
 852    try expect_opcode(0x7d, Insn.jsge(.r1, .r2, 0));
 853    try expect_opcode(0xc5, Insn.jslt(.r1, 0, 0));
 854    try expect_opcode(0xcd, Insn.jslt(.r1, .r2, 0));
 855    try expect_opcode(0xd5, Insn.jsle(.r1, 0, 0));
 856    try expect_opcode(0xdd, Insn.jsle(.r1, .r2, 0));
 857    try expect_opcode(0x85, Insn.call(.unspec));
 858    try expect_opcode(0x95, Insn.exit());
 859}
 860
 861pub const Cmd = enum(usize) {
 862    /// Create  a map and return a file descriptor that refers to the map. The
 863    /// close-on-exec file descriptor flag is automatically enabled for the new
 864    /// file descriptor.
 865    ///
 866    /// uses MapCreateAttr
 867    map_create,
 868
 869    /// Look up an element by key in a specified map and return its value.
 870    ///
 871    /// uses MapElemAttr
 872    map_lookup_elem,
 873
 874    /// Create or update an element (key/value pair) in a specified map.
 875    ///
 876    /// uses MapElemAttr
 877    map_update_elem,
 878
 879    /// Look up and delete an element by key in a specified map.
 880    ///
 881    /// uses MapElemAttr
 882    map_delete_elem,
 883
 884    /// Look up an element by key in a specified map and return the key of the
 885    /// next element.
 886    map_get_next_key,
 887
 888    /// Verify and load an eBPF program, returning a new file descriptor
 889    /// associated with the program. The close-on-exec file descriptor flag
 890    /// is automatically enabled for the new file descriptor.
 891    ///
 892    /// uses ProgLoadAttr
 893    prog_load,
 894
 895    /// Pin a map or eBPF program to a path within the minimal BPF filesystem
 896    ///
 897    /// uses ObjAttr
 898    obj_pin,
 899
 900    /// Get the file descriptor of a BPF object pinned to a certain path
 901    ///
 902    /// uses ObjAttr
 903    obj_get,
 904
 905    /// uses ProgAttachAttr
 906    prog_attach,
 907
 908    /// uses ProgAttachAttr
 909    prog_detach,
 910
 911    /// uses TestRunAttr
 912    prog_test_run,
 913
 914    /// uses GetIdAttr
 915    prog_get_next_id,
 916
 917    /// uses GetIdAttr
 918    map_get_next_id,
 919
 920    /// uses GetIdAttr
 921    prog_get_fd_by_id,
 922
 923    /// uses GetIdAttr
 924    map_get_fd_by_id,
 925
 926    /// uses InfoAttr
 927    obj_get_info_by_fd,
 928
 929    /// uses QueryAttr
 930    prog_query,
 931
 932    /// uses RawTracepointAttr
 933    raw_tracepoint_open,
 934
 935    /// uses BtfLoadAttr
 936    btf_load,
 937
 938    /// uses GetIdAttr
 939    btf_get_fd_by_id,
 940
 941    /// uses TaskFdQueryAttr
 942    task_fd_query,
 943
 944    /// uses MapElemAttr
 945    map_lookup_and_delete_elem,
 946    map_freeze,
 947
 948    /// uses GetIdAttr
 949    btf_get_next_id,
 950
 951    /// uses MapBatchAttr
 952    map_lookup_batch,
 953
 954    /// uses MapBatchAttr
 955    map_lookup_and_delete_batch,
 956
 957    /// uses MapBatchAttr
 958    map_update_batch,
 959
 960    /// uses MapBatchAttr
 961    map_delete_batch,
 962
 963    /// uses LinkCreateAttr
 964    link_create,
 965
 966    /// uses LinkUpdateAttr
 967    link_update,
 968
 969    /// uses GetIdAttr
 970    link_get_fd_by_id,
 971
 972    /// uses GetIdAttr
 973    link_get_next_id,
 974
 975    /// uses EnableStatsAttr
 976    enable_stats,
 977
 978    /// uses IterCreateAttr
 979    iter_create,
 980    link_detach,
 981    _,
 982};
 983
 984pub const MapType = enum(u32) {
 985    unspec,
 986    hash,
 987    array,
 988    prog_array,
 989    perf_event_array,
 990    percpu_hash,
 991    percpu_array,
 992    stack_trace,
 993    cgroup_array,
 994    lru_hash,
 995    lru_percpu_hash,
 996    lpm_trie,
 997    array_of_maps,
 998    hash_of_maps,
 999    devmap,
1000    sockmap,
1001    cpumap,
1002    xskmap,
1003    sockhash,
1004    cgroup_storage_deprecated,
1005    reuseport_sockarray,
1006    percpu_cgroup_storage,
1007    queue,
1008    stack,
1009    sk_storage,
1010    devmap_hash,
1011    struct_ops,
1012
1013    /// An ordered and shared CPU version of perf_event_array. They have
1014    /// similar semantics:
1015    ///     - variable length records
1016    ///     - no blocking: when full, reservation fails
1017    ///     - memory mappable for ease and speed
1018    ///     - epoll notifications for new data, but can busy poll
1019    ///
1020    /// Ringbufs give BPF programs two sets of APIs:
1021    ///     - ringbuf_output() allows copy data from one place to a ring
1022    ///     buffer, similar to bpf_perf_event_output()
1023    ///     - ringbuf_reserve()/ringbuf_commit()/ringbuf_discard() split the
1024    ///     process into two steps. First a fixed amount of space is reserved,
1025    ///     if that is successful then the program gets a pointer to a chunk of
1026    ///     memory and can be submitted with commit() or discarded with
1027    ///     discard()
1028    ///
1029    /// ringbuf_output() will incur an extra memory copy, but allows to submit
1030    /// records of the length that's not known beforehand, and is an easy
1031    /// replacement for perf_event_output().
1032    ///
1033    /// ringbuf_reserve() avoids the extra memory copy but requires a known size
1034    /// of memory beforehand.
1035    ///
1036    /// ringbuf_query() allows to query properties of the map, 4 are currently
1037    /// supported:
1038    ///     - BPF_RB_AVAIL_DATA: amount of unconsumed data in ringbuf
1039    ///     - BPF_RB_RING_SIZE: returns size of ringbuf
1040    ///     - BPF_RB_CONS_POS/BPF_RB_PROD_POS returns current logical position
1041    ///     of consumer and producer respectively
1042    ///
1043    /// key size: 0
1044    /// value size: 0
1045    /// max entries: size of ringbuf, must be power of 2
1046    ringbuf,
1047    inode_storage,
1048    task_storage,
1049    bloom_filter,
1050    user_ringbuf,
1051    cgroup_storage,
1052    arena,
1053
1054    _,
1055};
1056
1057pub const ProgType = enum(u32) {
1058    unspec,
1059
1060    /// context type: __sk_buff
1061    socket_filter,
1062
1063    /// context type: bpf_user_pt_regs_t
1064    kprobe,
1065
1066    /// context type: __sk_buff
1067    sched_cls,
1068
1069    /// context type: __sk_buff
1070    sched_act,
1071
1072    /// context type: u64
1073    tracepoint,
1074
1075    /// context type: xdp_md
1076    xdp,
1077
1078    /// context type: bpf_perf_event_data
1079    perf_event,
1080
1081    /// context type: __sk_buff
1082    cgroup_skb,
1083
1084    /// context type: bpf_sock
1085    cgroup_sock,
1086
1087    /// context type: __sk_buff
1088    lwt_in,
1089
1090    /// context type: __sk_buff
1091    lwt_out,
1092
1093    /// context type: __sk_buff
1094    lwt_xmit,
1095
1096    /// context type: bpf_sock_ops
1097    sock_ops,
1098
1099    /// context type: __sk_buff
1100    sk_skb,
1101
1102    /// context type: bpf_cgroup_dev_ctx
1103    cgroup_device,
1104
1105    /// context type: sk_msg_md
1106    sk_msg,
1107
1108    /// context type: bpf_raw_tracepoint_args
1109    raw_tracepoint,
1110
1111    /// context type: bpf_sock_addr
1112    cgroup_sock_addr,
1113
1114    /// context type: __sk_buff
1115    lwt_seg6local,
1116
1117    /// context type: u32
1118    lirc_mode2,
1119
1120    /// context type: sk_reuseport_md
1121    sk_reuseport,
1122
1123    /// context type: __sk_buff
1124    flow_dissector,
1125
1126    /// context type: bpf_sysctl
1127    cgroup_sysctl,
1128
1129    /// context type: bpf_raw_tracepoint_args
1130    raw_tracepoint_writable,
1131
1132    /// context type: bpf_sockopt
1133    cgroup_sockopt,
1134
1135    /// context type: void *
1136    tracing,
1137
1138    /// context type: void *
1139    struct_ops,
1140
1141    /// context type: void *
1142    ext,
1143
1144    /// context type: void *
1145    lsm,
1146
1147    /// context type: bpf_sk_lookup
1148    sk_lookup,
1149
1150    /// context type: void *
1151    syscall,
1152
1153    /// context type: bpf_nf_ctx
1154    netfilter,
1155
1156    _,
1157};
1158
1159pub const AttachType = enum(u32) {
1160    cgroup_inet_ingress,
1161    cgroup_inet_egress,
1162    cgroup_inet_sock_create,
1163    cgroup_sock_ops,
1164    sk_skb_stream_parser,
1165    sk_skb_stream_verdict,
1166    cgroup_device,
1167    sk_msg_verdict,
1168    cgroup_inet4_bind,
1169    cgroup_inet6_bind,
1170    cgroup_inet4_connect,
1171    cgroup_inet6_connect,
1172    cgroup_inet4_post_bind,
1173    cgroup_inet6_post_bind,
1174    cgroup_udp4_sendmsg,
1175    cgroup_udp6_sendmsg,
1176    lirc_mode2,
1177    flow_dissector,
1178    cgroup_sysctl,
1179    cgroup_udp4_recvmsg,
1180    cgroup_udp6_recvmsg,
1181    cgroup_getsockopt,
1182    cgroup_setsockopt,
1183    trace_raw_tp,
1184    trace_fentry,
1185    trace_fexit,
1186    modify_return,
1187    lsm_mac,
1188    trace_iter,
1189    cgroup_inet4_getpeername,
1190    cgroup_inet6_getpeername,
1191    cgroup_inet4_getsockname,
1192    cgroup_inet6_getsockname,
1193    xdp_devmap,
1194    cgroup_inet_sock_release,
1195    xdp_cpumap,
1196    sk_lookup,
1197    xdp,
1198    sk_skb_verdict,
1199    sk_reuseport_select,
1200    sk_reuseport_select_or_migrate,
1201    perf_event,
1202    trace_kprobe_multi,
1203    lsm_cgroup,
1204    struct_ops,
1205    netfilter,
1206    tcx_ingress,
1207    tcx_egress,
1208    trace_uprobe_multi,
1209    cgroup_unix_connect,
1210    cgroup_unix_sendmsg,
1211    cgroup_unix_recvmsg,
1212    cgroup_unix_getpeername,
1213    cgroup_unix_getsockname,
1214    netkit_primary,
1215    netkit_peer,
1216    trace_kprobe_session,
1217    _,
1218};
1219
1220const obj_name_len = 16;
1221/// struct used by Cmd.map_create command
1222pub const MapCreateAttr = extern struct {
1223    /// one of MapType
1224    map_type: u32,
1225
1226    /// size of key in bytes
1227    key_size: u32,
1228
1229    /// size of value in bytes
1230    value_size: u32,
1231
1232    /// max number of entries in a map
1233    max_entries: u32,
1234
1235    /// .map_create related flags
1236    map_flags: u32,
1237
1238    /// fd pointing to the inner map
1239    inner_map_fd: fd_t,
1240
1241    /// numa node (effective only if MapCreateFlags.numa_node is set)
1242    numa_node: u32,
1243    map_name: [obj_name_len]u8,
1244
1245    /// ifindex of netdev to create on
1246    map_ifindex: u32,
1247
1248    /// fd pointing to a BTF type data
1249    btf_fd: fd_t,
1250
1251    /// BTF type_id of the key
1252    btf_key_type_id: u32,
1253
1254    /// BTF type_id of the value
1255    bpf_value_type_id: u32,
1256
1257    /// BTF type_id of a kernel struct stored as the map value
1258    btf_vmlinux_value_type_id: u32,
1259};
1260
1261/// struct used by Cmd.map_*_elem commands
1262pub const MapElemAttr = extern struct {
1263    map_fd: fd_t,
1264    key: u64,
1265    result: extern union {
1266        value: u64,
1267        next_key: u64,
1268    },
1269    flags: u64,
1270};
1271
1272/// struct used by Cmd.map_*_batch commands
1273pub const MapBatchAttr = extern struct {
1274    /// start batch, NULL to start from beginning
1275    in_batch: u64,
1276
1277    /// output: next start batch
1278    out_batch: u64,
1279    keys: u64,
1280    values: u64,
1281
1282    /// input/output:
1283    /// input: # of key/value elements
1284    /// output: # of filled elements
1285    count: u32,
1286    map_fd: fd_t,
1287    elem_flags: u64,
1288    flags: u64,
1289};
1290
1291/// struct used by Cmd.prog_load command
1292pub const ProgLoadAttr = extern struct {
1293    /// one of ProgType
1294    prog_type: u32,
1295    insn_cnt: u32,
1296    insns: u64,
1297    license: u64,
1298
1299    /// verbosity level of verifier
1300    log_level: u32,
1301
1302    /// size of user buffer
1303    log_size: u32,
1304
1305    /// user supplied buffer
1306    log_buf: u64,
1307
1308    /// not used
1309    kern_version: u32,
1310    prog_flags: u32,
1311    prog_name: [obj_name_len]u8,
1312
1313    /// ifindex of netdev to prep for.
1314    prog_ifindex: u32,
1315
1316    /// For some prog types expected attach type must be known at load time to
1317    /// verify attach type specific parts of prog (context accesses, allowed
1318    /// helpers, etc).
1319    expected_attach_type: u32,
1320
1321    /// fd pointing to BTF type data
1322    prog_btf_fd: fd_t,
1323
1324    /// userspace bpf_func_info size
1325    func_info_rec_size: u32,
1326    func_info: u64,
1327
1328    /// number of bpf_func_info records
1329    func_info_cnt: u32,
1330
1331    /// userspace bpf_line_info size
1332    line_info_rec_size: u32,
1333    line_info: u64,
1334
1335    /// number of bpf_line_info records
1336    line_info_cnt: u32,
1337
1338    /// in-kernel BTF type id to attach to
1339    attact_btf_id: u32,
1340
1341    /// 0 to attach to vmlinux
1342    attach_prog_id: u32,
1343};
1344
1345/// struct used by Cmd.obj_* commands
1346pub const ObjAttr = extern struct {
1347    pathname: u64,
1348    bpf_fd: fd_t,
1349    file_flags: u32,
1350};
1351
1352/// struct used by Cmd.prog_attach/detach commands
1353pub const ProgAttachAttr = extern struct {
1354    /// container object to attach to
1355    target_fd: fd_t,
1356
1357    /// eBPF program to attach
1358    attach_bpf_fd: fd_t,
1359
1360    attach_type: u32,
1361    attach_flags: u32,
1362
1363    // TODO: BPF_F_REPLACE flags
1364    /// previously attached eBPF program to replace if .replace is used
1365    replace_bpf_fd: fd_t,
1366};
1367
1368/// struct used by Cmd.prog_test_run command
1369pub const TestRunAttr = extern struct {
1370    prog_fd: fd_t,
1371    retval: u32,
1372
1373    /// input: len of data_in
1374    data_size_in: u32,
1375
1376    /// input/output: len of data_out. returns ENOSPC if data_out is too small.
1377    data_size_out: u32,
1378    data_in: u64,
1379    data_out: u64,
1380    repeat: u32,
1381    duration: u32,
1382
1383    /// input: len of ctx_in
1384    ctx_size_in: u32,
1385
1386    /// input/output: len of ctx_out. returns ENOSPC if ctx_out is too small.
1387    ctx_size_out: u32,
1388    ctx_in: u64,
1389    ctx_out: u64,
1390};
1391
1392/// struct used by Cmd.*_get_*_id commands
1393pub const GetIdAttr = extern struct {
1394    id: extern union {
1395        start_id: u32,
1396        prog_id: u32,
1397        map_id: u32,
1398        btf_id: u32,
1399        link_id: u32,
1400    },
1401    next_id: u32,
1402    open_flags: u32,
1403};
1404
1405/// struct used by Cmd.obj_get_info_by_fd command
1406pub const InfoAttr = extern struct {
1407    bpf_fd: fd_t,
1408    info_len: u32,
1409    info: u64,
1410};
1411
1412/// struct used by Cmd.prog_query command
1413pub const QueryAttr = extern struct {
1414    /// container object to query
1415    target_fd: fd_t,
1416    attach_type: u32,
1417    query_flags: u32,
1418    attach_flags: u32,
1419    prog_ids: u64,
1420    prog_cnt: u32,
1421};
1422
1423/// struct used by Cmd.raw_tracepoint_open command
1424pub const RawTracepointAttr = extern struct {
1425    name: u64,
1426    prog_fd: fd_t,
1427};
1428
1429/// struct used by Cmd.btf_load command
1430pub const BtfLoadAttr = extern struct {
1431    btf: u64,
1432    btf_log_buf: u64,
1433    btf_size: u32,
1434    btf_log_size: u32,
1435    btf_log_level: u32,
1436};
1437
1438/// struct used by Cmd.task_fd_query
1439pub const TaskFdQueryAttr = extern struct {
1440    /// input: pid
1441    pid: pid_t,
1442
1443    /// input: fd
1444    fd: fd_t,
1445
1446    /// input: flags
1447    flags: u32,
1448
1449    /// input/output: buf len
1450    buf_len: u32,
1451
1452    /// input/output:
1453    ///     tp_name for tracepoint
1454    ///     symbol for kprobe
1455    ///     filename for uprobe
1456    buf: u64,
1457
1458    /// output: prod_id
1459    prog_id: u32,
1460
1461    /// output: BPF_FD_TYPE
1462    fd_type: u32,
1463
1464    /// output: probe_offset
1465    probe_offset: u64,
1466
1467    /// output: probe_addr
1468    probe_addr: u64,
1469};
1470
1471/// struct used by Cmd.link_create command
1472pub const LinkCreateAttr = extern struct {
1473    /// eBPF program to attach
1474    prog_fd: fd_t,
1475
1476    /// object to attach to
1477    target_fd: fd_t,
1478    attach_type: u32,
1479
1480    /// extra flags
1481    flags: u32,
1482};
1483
1484/// struct used by Cmd.link_update command
1485pub const LinkUpdateAttr = extern struct {
1486    link_fd: fd_t,
1487
1488    /// new program to update link with
1489    new_prog_fd: fd_t,
1490
1491    /// extra flags
1492    flags: u32,
1493
1494    /// expected link's program fd, it is specified only if BPF_F_REPLACE is
1495    /// set in flags
1496    old_prog_fd: fd_t,
1497};
1498
1499/// struct used by Cmd.enable_stats command
1500pub const EnableStatsAttr = extern struct {
1501    type: u32,
1502};
1503
1504/// struct used by Cmd.iter_create command
1505pub const IterCreateAttr = extern struct {
1506    link_fd: fd_t,
1507    flags: u32,
1508};
1509
1510/// Mega struct that is passed to the bpf() syscall
1511pub const Attr = extern union {
1512    map_create: MapCreateAttr,
1513    map_elem: MapElemAttr,
1514    map_batch: MapBatchAttr,
1515    prog_load: ProgLoadAttr,
1516    obj: ObjAttr,
1517    prog_attach: ProgAttachAttr,
1518    test_run: TestRunAttr,
1519    get_id: GetIdAttr,
1520    info: InfoAttr,
1521    query: QueryAttr,
1522    raw_tracepoint: RawTracepointAttr,
1523    btf_load: BtfLoadAttr,
1524    task_fd_query: TaskFdQueryAttr,
1525    link_create: LinkCreateAttr,
1526    link_update: LinkUpdateAttr,
1527    enable_stats: EnableStatsAttr,
1528    iter_create: IterCreateAttr,
1529};
1530
1531pub const Log = struct {
1532    level: u32,
1533    buf: []u8,
1534};
1535
1536pub fn map_create(map_type: MapType, key_size: u32, value_size: u32, max_entries: u32) !fd_t {
1537    var attr = Attr{
1538        .map_create = std.mem.zeroes(MapCreateAttr),
1539    };
1540
1541    attr.map_create.map_type = @intFromEnum(map_type);
1542    attr.map_create.key_size = key_size;
1543    attr.map_create.value_size = value_size;
1544    attr.map_create.max_entries = max_entries;
1545
1546    const rc = linux.bpf(.map_create, &attr, @sizeOf(MapCreateAttr));
1547    switch (errno(rc)) {
1548        .SUCCESS => return @as(fd_t, @intCast(rc)),
1549        .INVAL => return error.MapTypeOrAttrInvalid,
1550        .NOMEM => return error.SystemResources,
1551        .PERM => return error.PermissionDenied,
1552        else => |err| return unexpectedErrno(err),
1553    }
1554}
1555
1556test "map_create" {
1557    const map = try map_create(.hash, 4, 4, 32);
1558    defer std.os.close(map);
1559}
1560
1561pub fn map_lookup_elem(fd: fd_t, key: []const u8, value: []u8) !void {
1562    var attr = Attr{
1563        .map_elem = std.mem.zeroes(MapElemAttr),
1564    };
1565
1566    attr.map_elem.map_fd = fd;
1567    attr.map_elem.key = @intFromPtr(key.ptr);
1568    attr.map_elem.result.value = @intFromPtr(value.ptr);
1569
1570    const rc = linux.bpf(.map_lookup_elem, &attr, @sizeOf(MapElemAttr));
1571    switch (errno(rc)) {
1572        .SUCCESS => return,
1573        .BADF => return error.BadFd,
1574        .FAULT => unreachable,
1575        .INVAL => return error.FieldInAttrNeedsZeroing,
1576        .NOENT => return error.NotFound,
1577        .PERM => return error.PermissionDenied,
1578        else => |err| return unexpectedErrno(err),
1579    }
1580}
1581
1582pub fn map_update_elem(fd: fd_t, key: []const u8, value: []const u8, flags: u64) !void {
1583    var attr = Attr{
1584        .map_elem = std.mem.zeroes(MapElemAttr),
1585    };
1586
1587    attr.map_elem.map_fd = fd;
1588    attr.map_elem.key = @intFromPtr(key.ptr);
1589    attr.map_elem.result = .{ .value = @intFromPtr(value.ptr) };
1590    attr.map_elem.flags = flags;
1591
1592    const rc = linux.bpf(.map_update_elem, &attr, @sizeOf(MapElemAttr));
1593    switch (errno(rc)) {
1594        .SUCCESS => return,
1595        .@"2BIG" => return error.ReachedMaxEntries,
1596        .BADF => return error.BadFd,
1597        .FAULT => unreachable,
1598        .INVAL => return error.FieldInAttrNeedsZeroing,
1599        .NOMEM => return error.SystemResources,
1600        .PERM => return error.PermissionDenied,
1601        else => |err| return unexpectedErrno(err),
1602    }
1603}
1604
1605pub fn map_delete_elem(fd: fd_t, key: []const u8) !void {
1606    var attr = Attr{
1607        .map_elem = std.mem.zeroes(MapElemAttr),
1608    };
1609
1610    attr.map_elem.map_fd = fd;
1611    attr.map_elem.key = @intFromPtr(key.ptr);
1612
1613    const rc = linux.bpf(.map_delete_elem, &attr, @sizeOf(MapElemAttr));
1614    switch (errno(rc)) {
1615        .SUCCESS => return,
1616        .BADF => return error.BadFd,
1617        .FAULT => unreachable,
1618        .INVAL => return error.FieldInAttrNeedsZeroing,
1619        .NOENT => return error.NotFound,
1620        .PERM => return error.PermissionDenied,
1621        else => |err| return unexpectedErrno(err),
1622    }
1623}
1624
1625pub fn map_get_next_key(fd: fd_t, key: []const u8, next_key: []u8) !bool {
1626    var attr = Attr{
1627        .map_elem = std.mem.zeroes(MapElemAttr),
1628    };
1629
1630    attr.map_elem.map_fd = fd;
1631    attr.map_elem.key = @intFromPtr(key.ptr);
1632    attr.map_elem.result.next_key = @intFromPtr(next_key.ptr);
1633
1634    const rc = linux.bpf(.map_get_next_key, &attr, @sizeOf(MapElemAttr));
1635    switch (errno(rc)) {
1636        .SUCCESS => return true,
1637        .BADF => return error.BadFd,
1638        .FAULT => unreachable,
1639        .INVAL => return error.FieldInAttrNeedsZeroing,
1640        .NOENT => return false,
1641        .PERM => return error.PermissionDenied,
1642        else => |err| return unexpectedErrno(err),
1643    }
1644}
1645
1646test "map lookup, update, and delete" {
1647    const key_size = 4;
1648    const value_size = 4;
1649    const map = try map_create(.hash, key_size, value_size, 1);
1650    defer std.os.close(map);
1651
1652    const key = std.mem.zeroes([key_size]u8);
1653    var value = std.mem.zeroes([value_size]u8);
1654
1655    // fails looking up value that doesn't exist
1656    try expectError(error.NotFound, map_lookup_elem(map, &key, &value));
1657
1658    // succeed at updating and looking up element
1659    try map_update_elem(map, &key, &value, 0);
1660    try map_lookup_elem(map, &key, &value);
1661
1662    // fails inserting more than max entries
1663    const second_key = [key_size]u8{ 0, 0, 0, 1 };
1664    try expectError(error.ReachedMaxEntries, map_update_elem(map, &second_key, &value, 0));
1665
1666    // succeed at iterating all keys of map
1667    var lookup_key = [_]u8{ 1, 0, 0, 0 };
1668    var next_key = [_]u8{ 2, 3, 4, 5 }; // garbage value
1669    const status = try map_get_next_key(map, &lookup_key, &next_key);
1670    try expectEqual(status, true);
1671    try expectEqual(next_key, key);
1672    lookup_key = next_key;
1673    const status2 = try map_get_next_key(map, &lookup_key, &next_key);
1674    try expectEqual(status2, false);
1675
1676    // succeed at deleting an existing elem
1677    try map_delete_elem(map, &key);
1678    try expectError(error.NotFound, map_lookup_elem(map, &key, &value));
1679
1680    // fail at deleting a non-existing elem
1681    try expectError(error.NotFound, map_delete_elem(map, &key));
1682}
1683
1684pub fn prog_load(
1685    prog_type: ProgType,
1686    insns: []const Insn,
1687    log: ?*Log,
1688    license: []const u8,
1689    kern_version: u32,
1690    flags: u32,
1691) !fd_t {
1692    var attr = Attr{
1693        .prog_load = std.mem.zeroes(ProgLoadAttr),
1694    };
1695
1696    attr.prog_load.prog_type = @intFromEnum(prog_type);
1697    attr.prog_load.insns = @intFromPtr(insns.ptr);
1698    attr.prog_load.insn_cnt = @as(u32, @intCast(insns.len));
1699    attr.prog_load.license = @intFromPtr(license.ptr);
1700    attr.prog_load.kern_version = kern_version;
1701    attr.prog_load.prog_flags = flags;
1702
1703    if (log) |l| {
1704        attr.prog_load.log_buf = @intFromPtr(l.buf.ptr);
1705        attr.prog_load.log_size = @as(u32, @intCast(l.buf.len));
1706        attr.prog_load.log_level = l.level;
1707    }
1708
1709    const rc = linux.bpf(.prog_load, &attr, @sizeOf(ProgLoadAttr));
1710    return switch (errno(rc)) {
1711        .SUCCESS => @as(fd_t, @intCast(rc)),
1712        .ACCES => error.UnsafeProgram,
1713        .FAULT => unreachable,
1714        .INVAL => error.InvalidProgram,
1715        .PERM => error.PermissionDenied,
1716        else => |err| unexpectedErrno(err),
1717    };
1718}
1719
1720test "prog_load" {
1721    // this should fail because it does not set r0 before exiting
1722    const bad_prog = [_]Insn{
1723        Insn.exit(),
1724    };
1725
1726    const good_prog = [_]Insn{
1727        Insn.mov(.r0, 0),
1728        Insn.exit(),
1729    };
1730
1731    const prog = try prog_load(.socket_filter, &good_prog, null, "MIT", 0, 0);
1732    defer std.os.close(prog);
1733
1734    try expectError(error.UnsafeProgram, prog_load(.socket_filter, &bad_prog, null, "MIT", 0, 0));
1735}