Commit d605af511a

Matthew Knight <mattnite@protonmail.com>
2020-08-18 04:17:04
added bpf() syscall and some supporting structs (#6061)
* added bpf syscall and some supporting structs * moved bpf to bits and added flags
1 parent e26dda5
Changed files (2)
lib
std
os
bits
lib/std/os/bits/linux/bpf.zig
@@ -0,0 +1,606 @@
+usingnamespace std.os;
+const std = @import("../../../std.zig");
+
+// instruction classes
+/// jmp mode in word width
+pub const JMP32 = 0x06;
+/// alu mode in double word width
+pub const ALU64 = 0x07;
+
+// ld/ldx fields
+/// double word (64-bit)
+pub const DW = 0x18;
+/// exclusive add
+pub const XADD = 0xc0;
+
+// alu/jmp fields
+/// mov reg to reg
+pub const MOV = 0xb0;
+/// sign extending arithmetic shift right */
+pub const ARSH = 0xc0;
+
+// change endianness of a register
+/// flags for endianness conversion:
+pub const END = 0xd0;
+/// convert to little-endian */
+pub const TO_LE = 0x00;
+/// convert to big-endian
+pub const TO_BE = 0x08;
+pub const FROM_LE = TO_LE;
+pub const FROM_BE = TO_BE;
+
+// jmp encodings
+/// jump != *
+pub const JNE = 0x50;
+/// LT is unsigned, '<'
+pub const JLT = 0xa0;
+/// LE is unsigned, '<=' *
+pub const JLE = 0xb0;
+/// SGT is signed '>', GT in x86
+pub const JSGT = 0x60;
+/// SGE is signed '>=', GE in x86
+pub const JSGE = 0x70;
+/// SLT is signed, '<'
+pub const JSLT = 0xc0;
+/// SLE is signed, '<='
+pub const JSLE = 0xd0;
+/// function call
+pub const CALL = 0x80;
+/// function return
+pub const EXIT = 0x90;
+
+/// Flag for prog_attach command. If a sub-cgroup installs some bpf program, the
+/// program in this cgroup yields to sub-cgroup program.
+pub const F_ALLOW_OVERRIDE = 0x1;
+/// Flag for prog_attach command. If a sub-cgroup installs some bpf program,
+/// that cgroup program gets run in addition to the program in this cgroup.
+pub const F_ALLOW_MULTI = 0x2;
+/// Flag for prog_attach command.
+pub const F_REPLACE = 0x4;
+
+/// If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the verifier
+/// will perform strict alignment checking as if the kernel has been built with
+/// CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, and NET_IP_ALIGN defined to 2.
+pub const F_STRICT_ALIGNMENT = 0x1;
+
+/// If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the verifier will
+/// allow any alignment whatsoever.  On platforms with strict alignment
+/// requirements for loads ands stores (such as sparc and mips) the verifier
+/// validates that all loads and stores provably follow this requirement.  This
+/// flag turns that checking and enforcement off.
+///
+/// It is mostly used for testing when we want to validate the context and
+/// memory access aspects of the verifier, but because of an unaligned access
+/// the alignment check would trigger before the one we are interested in.
+pub const F_ANY_ALIGNMENT = 0x2;
+
+/// BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
+/// Verifier does sub-register def/use analysis and identifies instructions
+/// whose def only matters for low 32-bit, high 32-bit is never referenced later
+/// through implicit zero extension. Therefore verifier notifies JIT back-ends
+/// that it is safe to ignore clearing high 32-bit for these instructions. This
+/// saves some back-ends a lot of code-gen. However such optimization is not
+/// necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
+/// hence hasn't used verifier's analysis result. But, we really want to have a
+/// way to be able to verify the correctness of the described optimization on
+/// x86_64 on which testsuites are frequently exercised.
+///
+/// So, this flag is introduced. Once it is set, verifier will randomize high
+/// 32-bit for those instructions who has been identified as safe to ignore
+/// them.  Then, if verifier is not doing correct analysis, such randomization
+/// will regress tests to expose bugs.
+pub const F_TEST_RND_HI32 = 0x4;
+
+/// When BPF ldimm64's insn[0].src_reg != 0 then this can have two extensions:
+/// insn[0].src_reg:  BPF_PSEUDO_MAP_FD   BPF_PSEUDO_MAP_VALUE
+/// insn[0].imm:      map fd              map fd
+/// insn[1].imm:      0                   offset into value
+/// insn[0].off:      0                   0
+/// insn[1].off:      0                   0
+/// ldimm64 rewrite:  address of map      address of map[0]+offset
+/// verifier type:    CONST_PTR_TO_MAP    PTR_TO_MAP_VALUE
+pub const PSEUDO_MAP_FD = 1;
+pub const PSEUDO_MAP_VALUE = 2;
+
+/// when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
+/// offset to another bpf function
+pub const PSEUDO_CALL = 1;
+
+/// flag for BPF_MAP_UPDATE_ELEM command. create new element or update existing
+pub const ANY = 0;
+/// flag for BPF_MAP_UPDATE_ELEM command. create new element if it didn't exist
+pub const NOEXIST = 1;
+/// flag for BPF_MAP_UPDATE_ELEM command. update existing element
+pub const EXIST = 2;
+/// flag for BPF_MAP_UPDATE_ELEM command. spin_lock-ed map_lookup/map_update
+pub const F_LOCK = 4;
+
+/// flag for BPF_MAP_CREATE command */
+pub const BPF_F_NO_PREALLOC = 0x1;
+/// flag for BPF_MAP_CREATE command. Instead of having one common LRU list in
+/// the BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list which can
+/// scale and perform better.  Note, the LRU nodes (including free nodes) cannot
+/// be moved across different LRU lists.
+pub const BPF_F_NO_COMMON_LRU = 0x2;
+/// flag for BPF_MAP_CREATE command. Specify numa node during map creation
+pub const BPF_F_NUMA_NODE = 0x4;
+/// flag for BPF_MAP_CREATE command. Flags for BPF object read access from
+/// syscall side
+pub const BPF_F_RDONLY = 0x8;
+/// flag for BPF_MAP_CREATE command. Flags for BPF object write access from
+/// syscall side
+pub const BPF_F_WRONLY = 0x10;
+/// flag for BPF_MAP_CREATE command. Flag for stack_map, store build_id+offset
+/// instead of pointer
+pub const BPF_F_STACK_BUILD_ID = 0x20;
+/// flag for BPF_MAP_CREATE command. Zero-initialize hash function seed. This
+/// should only be used for testing.
+pub const BPF_F_ZERO_SEED = 0x40;
+/// flag for BPF_MAP_CREATE command Flags for accessing BPF object from program
+/// side.
+pub const BPF_F_RDONLY_PROG = 0x80;
+/// flag for BPF_MAP_CREATE command. Flags for accessing BPF object from program
+/// side.
+pub const BPF_F_WRONLY_PROG = 0x100;
+/// flag for BPF_MAP_CREATE command. Clone map from listener for newly accepted
+/// socket
+pub const BPF_F_CLONE = 0x200;
+/// flag for BPF_MAP_CREATE command. Enable memory-mapping BPF map
+pub const BPF_F_MMAPABLE = 0x400;
+
+/// a single BPF instruction
+pub const Insn = packed struct {
+    code: u8,
+    dst: u4,
+    src: u4,
+    off: i16,
+    imm: i32,
+
+    /// r0 - r9 are general purpose 64-bit registers, r10 points to the stack
+    /// frame
+    pub const Reg = enum(u4) {
+        r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10
+    };
+
+    const alu = 0x04;
+    const jmp = 0x05;
+    const mov = 0xb0;
+    const k = 0;
+    const exit_code = 0x90;
+
+    // TODO: implement more factory functions for the other instructions
+    /// load immediate value into a register
+    pub fn load_imm(dst: Reg, imm: i32) Insn {
+        return Insn{
+            .code = alu | mov | k,
+            .dst = @enumToInt(dst),
+            .src = 0,
+            .off = 0,
+            .imm = imm,
+        };
+    }
+
+    /// exit BPF program
+    pub fn exit() Insn {
+        return Insn{
+            .code = jmp | exit_code,
+            .dst = 0,
+            .src = 0,
+            .off = 0,
+            .imm = 0,
+        };
+    }
+};
+
+pub const Cmd = extern enum(usize) {
+    map_create,
+    map_lookup_elem,
+    map_update_elem,
+    map_delete_elem,
+    map_get_next_key,
+    prog_load,
+    obj_pin,
+    obj_get,
+    prog_attach,
+    prog_detach,
+    prog_test_run,
+    prog_get_next_id,
+    map_get_next_id,
+    prog_get_fd_by_id,
+    map_get_fd_by_id,
+    obj_get_info_by_fd,
+    prog_query,
+    raw_tracepoint_open,
+    btf_load,
+    btf_get_fd_by_id,
+    task_fd_query,
+    map_lookup_and_delete_elem,
+    map_freeze,
+    btf_get_next_id,
+    map_lookup_batch,
+    map_lookup_and_delete_batch,
+    map_update_batch,
+    map_delete_batch,
+    link_create,
+    link_update,
+    link_get_fd_by_id,
+    link_get_next_id,
+    enable_stats,
+    iter_create,
+    link_detach,
+    _,
+};
+
+pub const MapType = extern enum(u32) {
+    unspec,
+    hash,
+    array,
+    prog_array,
+    perf_event_array,
+    percpu_hash,
+    percpu_array,
+    stack_trace,
+    cgroup_array,
+    lru_hash,
+    lru_percpu_hash,
+    lpm_trie,
+    array_of_maps,
+    hash_of_maps,
+    devmap,
+    sockmap,
+    cpumap,
+    xskmap,
+    sockhash,
+    cgroup_storage,
+    reuseport_sockarray,
+    percpu_cgroup_storage,
+    queue,
+    stack,
+    sk_storage,
+    devmap_hash,
+    struct_ops,
+    ringbuf,
+    _,
+};
+
+pub const ProgType = extern enum(u32) {
+    unspec,
+    socket_filter,
+    kprobe,
+    sched_cls,
+    sched_act,
+    tracepoint,
+    xdp,
+    perf_event,
+    cgroup_skb,
+    cgroup_sock,
+    lwt_in,
+    lwt_out,
+    lwt_xmit,
+    sock_ops,
+    sk_skb,
+    cgroup_device,
+    sk_msg,
+    raw_tracepoint,
+    cgroup_sock_addr,
+    lwt_seg6local,
+    lirc_mode2,
+    sk_reuseport,
+    flow_dissector,
+    cgroup_sysctl,
+    raw_tracepoint_writable,
+    cgroup_sockopt,
+    tracing,
+    struct_ops,
+    ext,
+    lsm,
+    sk_lookup,
+};
+
+pub const AttachType = extern enum(u32) {
+    cgroup_inet_ingress,
+    cgroup_inet_egress,
+    cgroup_inet_sock_create,
+    cgroup_sock_ops,
+    sk_skb_stream_parser,
+    sk_skb_stream_verdict,
+    cgroup_device,
+    sk_msg_verdict,
+    cgroup_inet4_bind,
+    cgroup_inet6_bind,
+    cgroup_inet4_connect,
+    cgroup_inet6_connect,
+    cgroup_inet4_post_bind,
+    cgroup_inet6_post_bind,
+    cgroup_udp4_sendmsg,
+    cgroup_udp6_sendmsg,
+    lirc_mode2,
+    flow_dissector,
+    cgroup_sysctl,
+    cgroup_udp4_recvmsg,
+    cgroup_udp6_recvmsg,
+    cgroup_getsockopt,
+    cgroup_setsockopt,
+    trace_raw_tp,
+    trace_fentry,
+    trace_fexit,
+    modify_return,
+    lsm_mac,
+    trace_iter,
+    cgroup_inet4_getpeername,
+    cgroup_inet6_getpeername,
+    cgroup_inet4_getsockname,
+    cgroup_inet6_getsockname,
+    xdp_devmap,
+    cgroup_inet_sock_release,
+    xdp_cpumap,
+    sk_lookup,
+    xdp,
+    _,
+};
+
+const obj_name_len = 16;
+/// struct used by Cmd.map_create command
+pub const MapCreateAttr = extern struct {
+    /// one of MapType
+    map_type: u32,
+    /// size of key in bytes
+    key_size: u32,
+    /// size of value in bytes
+    value_size: u32,
+    /// max number of entries in a map
+    max_entries: u32,
+    /// .map_create related flags
+    map_flags: u32,
+    /// fd pointing to the inner map
+    inner_map_fd: fd_t,
+    /// numa node (effective only if MapCreateFlags.numa_node is set)
+    numa_node: u32,
+    map_name: [obj_name_len]u8,
+    /// ifindex of netdev to create on
+    map_ifindex: u32,
+    /// fd pointing to a BTF type data
+    btf_fd: fd_t,
+    /// BTF type_id of the key
+    btf_key_type_id: u32,
+    /// BTF type_id of the value
+    bpf_value_type_id: u32,
+    /// BTF type_id of a kernel struct stored as the map value
+    btf_vmlinux_value_type_id: u32,
+};
+
+/// struct used by Cmd.map_*_elem commands
+pub const MapElemAttr = extern struct {
+    map_fd: fd_t,
+    key: u64,
+    result: extern union {
+        value: u64,
+        next_key: u64,
+    },
+    flags: u64,
+};
+
+/// struct used by Cmd.map_*_batch commands
+pub const MapBatchAttr = extern struct {
+    /// start batch, NULL to start from beginning
+    in_batch: u64,
+    /// output: next start batch
+    out_batch: u64,
+    keys: u64,
+    values: u64,
+    /// input/output:
+    /// input: # of key/value elements
+    /// output: # of filled elements
+    count: u32,
+    map_fd: fd_t,
+    elem_flags: u64,
+    flags: u64,
+};
+
+/// struct used by Cmd.prog_load command
+pub const ProgLoadAttr = extern struct {
+    /// one of ProgType
+    prog_type: u32,
+    insn_cnt: u32,
+    insns: u64,
+    license: u64,
+    /// verbosity level of verifier
+    log_level: u32,
+    /// size of user buffer
+    log_size: u32,
+    /// user supplied buffer
+    log_buf: u64,
+    /// not used
+    kern_version: u32,
+    prog_flags: u32,
+    prog_name: [obj_name_len]u8,
+    /// ifindex of netdev to prep for. For some prog types expected attach
+    /// type must be known at load time to verify attach type specific parts
+    /// of prog (context accesses, allowed helpers, etc).
+    prog_ifindex: u32,
+    expected_attach_type: u32,
+    /// fd pointing to BTF type data
+    prog_btf_fd: fd_t,
+    /// userspace bpf_func_info size
+    func_info_rec_size: u32,
+    func_info: u64,
+    /// number of bpf_func_info records
+    func_info_cnt: u32,
+    /// userspace bpf_line_info size
+    line_info_rec_size: u32,
+    line_info: u64,
+    /// number of bpf_line_info records
+    line_info_cnt: u32,
+    /// in-kernel BTF type id to attach to
+    attact_btf_id: u32,
+    /// 0 to attach to vmlinux
+    attach_prog_id: u32,
+};
+
+/// struct used by Cmd.obj_* commands
+pub const ObjAttr = extern struct {
+    pathname: u64,
+    bpf_fd: fd_t,
+    file_flags: u32,
+};
+
+/// struct used by Cmd.prog_attach/detach commands
+pub const ProgAttachAttr = extern struct {
+    /// container object to attach to
+    target_fd: fd_t,
+    /// eBPF program to attach
+    attach_bpf_fd: fd_t,
+    attach_type: u32,
+    attach_flags: u32,
+    // TODO: BPF_F_REPLACE flags
+    /// previously attached eBPF program to replace if .replace is used
+    replace_bpf_fd: fd_t,
+};
+
+/// struct used by Cmd.prog_test_run command
+pub const TestAttr = extern struct {
+    prog_fd: fd_t,
+    retval: u32,
+    /// input: len of data_in
+    data_size_in: u32,
+    /// input/output: len of data_out. returns ENOSPC if data_out is too small.
+    data_size_out: u32,
+    data_in: u64,
+    data_out: u64,
+    repeat: u32,
+    duration: u32,
+    /// input: len of ctx_in
+    ctx_size_in: u32,
+    /// input/output: len of ctx_out. returns ENOSPC if ctx_out is too small.
+    ctx_size_out: u32,
+    ctx_in: u64,
+    ctx_out: u64,
+};
+
+/// struct used by Cmd.*_get_*_id commands
+pub const GetIdAttr = extern struct {
+    id: extern union {
+        start_id: u32,
+        prog_id: u32,
+        map_id: u32,
+        btf_id: u32,
+        link_id: u32,
+    },
+    next_id: u32,
+    open_flags: u32,
+};
+
+/// struct used by Cmd.obj_get_info_by_fd command
+pub const InfoAttr = extern struct {
+    bpf_fd: fd_t,
+    info_len: u32,
+    info: u64,
+};
+
+/// struct used by Cmd.prog_query command
+pub const QueryAttr = extern struct {
+    /// container object to query
+    target_fd: fd_t,
+    attach_type: u32,
+    query_flags: u32,
+    attach_flags: u32,
+    prog_ids: u64,
+    prog_cnt: u32,
+};
+
+/// struct used by Cmd.raw_tracepoint_open command
+pub const RawTracepointAttr = extern struct {
+    name: u64,
+    prog_fd: fd_t,
+};
+
+/// struct used by Cmd.btf_load command
+pub const BtfLoadAttr = extern struct {
+    btf: u64,
+    btf_log_buf: u64,
+    btf_size: u32,
+    btf_log_size: u32,
+    btf_log_level: u32,
+};
+
+pub const TaskFdQueryAttr = extern struct {
+    /// input: pid
+    pid: pid_t,
+    /// input: fd
+    fd: fd_t,
+    /// input: flags
+    flags: u32,
+    /// input/output: buf len
+    buf_len: u32,
+    /// input/output:
+    ///     tp_name for tracepoint
+    ///     symbol for kprobe
+    ///     filename for uprobe
+    buf: u64,
+    /// output: prod_id
+    prog_id: u32,
+    /// output: BPF_FD_TYPE
+    fd_type: u32,
+    /// output: probe_offset
+    probe_offset: u64,
+    /// output: probe_addr
+    probe_addr: u64,
+};
+
+/// struct used by Cmd.link_create command
+pub const LinkCreateAttr = extern struct {
+    /// eBPF program to attach
+    prog_fd: fd_t,
+    /// object to attach to
+    target_fd: fd_t,
+    attach_type: u32,
+    /// extra flags
+    flags: u32,
+};
+
+/// struct used by Cmd.link_update command
+pub const LinkUpdateAttr = extern struct {
+    link_fd: fd_t,
+    /// new program to update link with
+    new_prog_fd: fd_t,
+    /// extra flags
+    flags: u32,
+    /// expected link's program fd, it is specified only if BPF_F_REPLACE is
+    /// set in flags
+    old_prog_fd: fd_t,
+};
+
+/// struct used by Cmd.enable_stats command
+pub const EnableStatsAttr = extern struct {
+    type: u32,
+};
+
+/// struct used by Cmd.iter_create command
+pub const IterCreateAttr = extern struct {
+    link_fd: fd_t,
+    flags: u32,
+};
+
+pub const Attr = extern union {
+    map_create: MapCreateAttr,
+    map_elem: MapElemAttr,
+    map_batch: MapBatchAttr,
+    prog_load: ProgLoadAttr,
+    obj: ObjAttr,
+    prog_attach: ProgAttachAttr,
+    test_run: TestRunAttr,
+    get_id: GetIdAttr,
+    info: InfoAttr,
+    query: QueryAttr,
+    raw_tracepoint: RawTracepointAttr,
+    btf_load: BtfLoadAttr,
+    task_fd_query: TaskFdQueryAttr,
+    link_create: LinkCreateAttr,
+    link_update: LinkUpdateAttr,
+    enable_stats: EnableStatsAttr,
+    iter_create: IterCreateAttr,
+};
+
+pub fn bpf(cmd: Cmd, attr: *Attr, size: u32) usize {
+    return syscall3(.bpf, @enumToInt(cmd), @ptrToInt(attr), size);
+}
lib/std/os/bits/linux.zig
@@ -19,6 +19,7 @@ pub usingnamespace switch (builtin.arch) {
 };
 
 pub usingnamespace @import("linux/netlink.zig");
+pub const bpf = @import("linux/bpf.zig");
 
 const is_mips = builtin.arch.isMIPS();