Commit 63f4778827

Andrew Kelley <andrew@ziglang.org>
2021-11-01 22:45:05
std: add Linux perf syscall bits
Example usage: ```zig const std = @import("std"); const PERF = std.os.linux.PERF; const assert = std.debug.assert; test "perf" { var attr: std.os.linux.perf_event_attr = .{ .type = PERF.TYPE.HARDWARE, .config = @enumToInt(PERF.COUNT.HW.INSTRUCTIONS), .flags = .{ .disabled = true, .exclude_kernel = true, .exclude_hv = true, }, }; const fd = try std.os.perf_event_open(&attr, 0, -1, -1, PERF.FLAG.FD_CLOEXEC); defer std.os.close(fd); _ = std.os.linux.ioctl(fd, PERF.EVENT_IOC.RESET, 0); _ = std.os.linux.ioctl(fd, PERF.EVENT_IOC.ENABLE, 0); long(); _ = std.os.linux.ioctl(fd, PERF.EVENT_IOC.DISABLE, 0); var result: usize = 0; assert((try std.os.read(fd, std.mem.asBytes(&result))) == @sizeOf(usize)); std.debug.print("instruction count: {d}\n", .{result}); } fn long() void { var i: usize = 0; while (i < 100000) : (i += 1) {} } ```
1 parent 70ef9bc
Changed files (2)
lib
lib/std/os/linux.zig
@@ -1638,6 +1638,23 @@ pub fn fadvise(fd: fd_t, offset: i64, len: i64, advice: usize) usize {
     }
 }
 
+pub fn perf_event_open(
+    attr: *perf_event_attr,
+    pid: pid_t,
+    cpu: i32,
+    group_fd: fd_t,
+    flags: usize,
+) usize {
+    return syscall5(
+        .perf_event_open,
+        @ptrToInt(attr),
+        @bitCast(usize, @as(isize, pid)),
+        @bitCast(usize, @as(isize, cpu)),
+        @bitCast(usize, @as(isize, group_fd)),
+        flags,
+    );
+}
+
 pub const E = switch (native_arch) {
     .mips, .mipsel => @import("linux/errno/mips.zig").E,
     .sparc, .sparcel, .sparcv9 => @import("linux/errno/sparc.zig").E,
@@ -4925,3 +4942,261 @@ pub const rtnl_link_stats64 = extern struct {
     /// dropped, no handler found
     rx_nohandler: u64,
 };
+
+pub const perf_event_attr = extern struct {
+    /// Major type: hardware/software/tracepoint/etc.
+    type: PERF.TYPE = undefined,
+    /// Size of the attr structure, for fwd/bwd compat.
+    size: u32 = @sizeOf(perf_event_attr),
+    /// Type specific configuration information.
+    config: u64 = 0,
+
+    sample_period_or_freq: u64 = 0,
+    sample_type: u64 = 0,
+    read_format: u64 = 0,
+
+    flags: packed struct {
+        /// off by default
+        disabled: bool = false,
+        /// children inherit it
+        inherit: bool = false,
+        /// must always be on PMU
+        pinned: bool = false,
+        /// only group on PMU
+        exclusive: bool = false,
+        /// don't count user
+        exclude_user: bool = false,
+        /// ditto kernel
+        exclude_kernel: bool = false,
+        /// ditto hypervisor
+        exclude_hv: bool = false,
+        /// don't count when idle
+        exclude_idle: bool = false,
+        /// include mmap data
+        mmap: bool = false,
+        /// include comm data
+        comm: bool = false,
+        /// use freq, not period
+        freq: bool = false,
+        /// per task counts
+        inherit_stat: bool = false,
+        /// next exec enables
+        enable_on_exec: bool = false,
+        /// trace fork/exit
+        task: bool = false,
+        /// wakeup_watermark
+        watermark: bool = false,
+        /// precise_ip:
+        ///
+        ///  0 - SAMPLE_IP can have arbitrary skid
+        ///  1 - SAMPLE_IP must have constant skid
+        ///  2 - SAMPLE_IP requested to have 0 skid
+        ///  3 - SAMPLE_IP must have 0 skid
+        ///
+        ///  See also PERF_RECORD_MISC_EXACT_IP
+        /// skid constraint
+        precise_ip: u2 = 0,
+        /// non-exec mmap data
+        mmap_data: bool = false,
+        /// sample_type all events
+        sample_id_all: bool = false,
+
+        /// don't count in host
+        exclude_host: bool = false,
+        /// don't count in guest
+        exclude_guest: bool = false,
+
+        /// exclude kernel callchains
+        exclude_callchain_kernel: bool = false,
+        /// exclude user callchains
+        exclude_callchain_user: bool = false,
+        /// include mmap with inode data
+        mmap2: bool = false,
+        /// flag comm events that are due to an exec
+        comm_exec: bool = false,
+        /// use @clockid for time fields
+        use_clockid: bool = false,
+        /// context switch data
+        context_switch: bool = false,
+        /// Write ring buffer from end to beginning
+        write_backward: bool = false,
+        /// include namespaces data
+        namespaces: bool = false,
+
+        __reserved_1: u35 = 0,
+    } = .{},
+    /// wakeup every n events, or
+    /// bytes before wakeup
+    wakeup_events_or_watermark: u32 = 0,
+
+    bp_type: u32 = 0,
+
+    /// This field is also used for:
+    /// bp_addr
+    /// kprobe_func for perf_kprobe
+    /// uprobe_path for perf_uprobe
+    config1: u64 = 0,
+    /// This field is also used for:
+    /// bp_len
+    /// kprobe_addr when kprobe_func == null
+    /// probe_offset for perf_[k,u]probe
+    config2: u64 = 0,
+
+    /// enum perf_branch_sample_type
+    branch_sample_type: u64 = 0,
+
+    /// Defines set of user regs to dump on samples.
+    /// See asm/perf_regs.h for details.
+    sample_regs_user: u64 = 0,
+
+    /// Defines size of the user stack to dump on samples.
+    sample_stack_user: u32 = 0,
+
+    clockid: i32 = 0,
+    /// Defines set of regs to dump for each sample
+    /// state captured on:
+    ///  - precise = 0: PMU interrupt
+    ///  - precise > 0: sampled instruction
+    ///
+    /// See asm/perf_regs.h for details.
+    sample_regs_intr: u64 = 0,
+
+    /// Wakeup watermark for AUX area
+    aux_watermark: u32 = 0,
+    sample_max_stack: u16 = 0,
+    /// Align to u64
+    __reserved_2: u16 = 0,
+};
+
+pub const PERF = struct {
+    pub const TYPE = enum(u32) {
+        HARDWARE,
+        SOFTWARE,
+        TRACEPOINT,
+        HW_CACHE,
+        RAW,
+        BREAKPOINT,
+        MAX,
+    };
+
+    pub const COUNT = struct {
+        pub const HW = enum(u32) {
+            CPU_CYCLES,
+            INSTRUCTIONS,
+            CACHE_REFERENCES,
+            CACHE_MISSES,
+            BRANCH_INSTRUCTIONS,
+            BRANCH_MISSES,
+            BUS_CYCLES,
+            STALLED_CYCLES_FRONTEND,
+            STALLED_CYCLES_BACKEND,
+            REF_CPU_CYCLES,
+            MAX,
+
+            pub const CACHE = enum(u32) {
+                L1D,
+                L1I,
+                LL,
+                DTLB,
+                ITLB,
+                BPU,
+                NODE,
+                MAX,
+
+                pub const OP = enum(u32) {
+                    READ,
+                    WRITE,
+                    PREFETCH,
+                    MAX,
+                };
+
+                pub const RESULT = enum(u32) {
+                    ACCESS,
+                    MISS,
+                    MAX,
+                };
+            };
+        };
+
+        pub const SW = enum(u32) {
+            CPU_CLOCK,
+            TASK_CLOCK,
+            PAGE_FAULTS,
+            CONTEXT_SWITCHES,
+            CPU_MIGRATIONS,
+            PAGE_FAULTS_MIN,
+            PAGE_FAULTS_MAJ,
+            ALIGNMENT_FAULTS,
+            EMULATION_FAULTS,
+            DUMMY,
+            BPF_OUTPUT,
+            MAX,
+        };
+    };
+
+    pub const SAMPLE = struct {
+        pub const IP = 1;
+        pub const TID = 2;
+        pub const TIME = 4;
+        pub const ADDR = 8;
+        pub const READ = 16;
+        pub const CALLCHAIN = 32;
+        pub const ID = 64;
+        pub const CPU = 128;
+        pub const PERIOD = 256;
+        pub const STREAM_ID = 512;
+        pub const RAW = 1024;
+        pub const BRANCH_STACK = 2048;
+        pub const REGS_USER = 4096;
+        pub const STACK_USER = 8192;
+        pub const WEIGHT = 16384;
+        pub const DATA_SRC = 32768;
+        pub const IDENTIFIER = 65536;
+        pub const TRANSACTION = 131072;
+        pub const REGS_INTR = 262144;
+        pub const PHYS_ADDR = 524288;
+        pub const MAX = 1048576;
+
+        pub const BRANCH = struct {
+            pub const USER = 1 << 0;
+            pub const KERNEL = 1 << 1;
+            pub const HV = 1 << 2;
+            pub const ANY = 1 << 3;
+            pub const ANY_CALL = 1 << 4;
+            pub const ANY_RETURN = 1 << 5;
+            pub const IND_CALL = 1 << 6;
+            pub const ABORT_TX = 1 << 7;
+            pub const IN_TX = 1 << 8;
+            pub const NO_TX = 1 << 9;
+            pub const COND = 1 << 10;
+            pub const CALL_STACK = 1 << 11;
+            pub const IND_JUMP = 1 << 12;
+            pub const CALL = 1 << 13;
+            pub const NO_FLAGS = 1 << 14;
+            pub const NO_CYCLES = 1 << 15;
+            pub const TYPE_SAVE = 1 << 16;
+            pub const MAX = 1 << 17;
+        };
+    };
+
+    pub const FLAG = struct {
+        pub const FD_NO_GROUP = 1 << 0;
+        pub const FD_OUTPUT = 1 << 1;
+        pub const PID_CGROUP = 1 << 2;
+        pub const FD_CLOEXEC = 1 << 3;
+    };
+
+    pub const EVENT_IOC = struct {
+        pub const ENABLE = 9216;
+        pub const DISABLE = 9217;
+        pub const REFRESH = 9218;
+        pub const RESET = 9219;
+        pub const PERIOD = 1074275332;
+        pub const SET_OUTPUT = 9221;
+        pub const SET_FILTER = 1074275334;
+        pub const SET_BPF = 1074013192;
+        pub const PAUSE_OUTPUT = 1074013193;
+        pub const QUERY_BPF = 3221758986;
+        pub const MODIFY_ATTRIBUTES = 1074275339;
+    };
+};
lib/std/os.zig
@@ -6349,3 +6349,87 @@ pub fn madvise(ptr: [*]align(mem.page_size) u8, length: usize, advice: u32) Madv
         else => |err| return unexpectedErrno(err),
     }
 }
+
+pub const PerfEventOpenError = error{
+    /// Returned if the perf_event_attr size value is too small (smaller
+    /// than PERF_ATTR_SIZE_VER0), too big (larger than the page  size),
+    /// or  larger  than the kernel supports and the extra bytes are not
+    /// zero.  When E2BIG is returned, the perf_event_attr size field is
+    /// overwritten by the kernel to be the size of the structure it was
+    /// expecting.
+    TooBig,
+    /// Returned when the requested event requires CAP_SYS_ADMIN permis‐
+    /// sions  (or a more permissive perf_event paranoid setting).  Some
+    /// common cases where an unprivileged process  may  encounter  this
+    /// error:  attaching  to a process owned by a different user; moni‐
+    /// toring all processes on a given CPU (i.e.,  specifying  the  pid
+    /// argument  as  -1); and not setting exclude_kernel when the para‐
+    /// noid setting requires it.
+    /// Also:
+    /// Returned on many (but not all) architectures when an unsupported
+    /// exclude_hv,  exclude_idle,  exclude_user, or exclude_kernel set‐
+    /// ting is specified.
+    /// It can also happen, as with EACCES, when the requested event re‐
+    /// quires   CAP_SYS_ADMIN   permissions   (or   a  more  permissive
+    /// perf_event paranoid setting).  This includes  setting  a  break‐
+    /// point on a kernel address, and (since Linux 3.13) setting a ker‐
+    /// nel function-trace tracepoint.
+    PermissionDenied,
+    /// Returned if another event already has exclusive  access  to  the
+    /// PMU.
+    DeviceBusy,
+    /// Each  opened  event uses one file descriptor.  If a large number
+    /// of events are opened, the per-process limit  on  the  number  of
+    /// open file descriptors will be reached, and no more events can be
+    /// created.
+    ProcessResources,
+    EventRequiresUnsupportedCpuFeature,
+    /// Returned if  you  try  to  add  more  breakpoint
+    /// events than supported by the hardware.
+    TooManyBreakpoints,
+    /// Returned  if PERF_SAMPLE_STACK_USER is set in sample_type and it
+    /// is not supported by hardware.
+    SampleStackNotSupported,
+    /// Returned if an event requiring a specific  hardware  feature  is
+    /// requested  but  there is no hardware support.  This includes re‐
+    /// questing low-skid events if not supported, branch tracing if  it
+    /// is not available, sampling if no PMU interrupt is available, and
+    /// branch stacks for software events.
+    EventNotSupported,
+    /// Returned  if  PERF_SAMPLE_CALLCHAIN  is   requested   and   sam‐
+    /// ple_max_stack   is   larger   than   the  maximum  specified  in
+    /// /proc/sys/kernel/perf_event_max_stack.
+    SampleMaxStackOverflow,
+    /// Returned if attempting to attach to a process that does not  exist.
+    ProcessNotFound,
+} || UnexpectedError;
+
+pub fn perf_event_open(
+    attr: *linux.perf_event_attr,
+    pid: pid_t,
+    cpu: i32,
+    group_fd: fd_t,
+    flags: usize,
+) PerfEventOpenError!fd_t {
+    const rc = system.perf_event_open(attr, pid, cpu, group_fd, flags);
+    switch (errno(rc)) {
+        .SUCCESS => return @intCast(fd_t, rc),
+        .@"2BIG" => return error.TooBig,
+        .ACCES => return error.PermissionDenied,
+        .BADF => unreachable, // group_fd file descriptor is not valid.
+        .BUSY => return error.DeviceBusy,
+        .FAULT => unreachable, // Segmentation fault.
+        .INVAL => unreachable, // Bad attr settings.
+        .INTR => unreachable, // Mixed perf and ftrace handling for a uprobe.
+        .MFILE => return error.ProcessResources,
+        .NODEV => return error.EventRequiresUnsupportedCpuFeature,
+        .NOENT => unreachable, // Invalid type setting.
+        .NOSPC => return error.TooManyBreakpoints,
+        .NOSYS => return error.SampleStackNotSupported,
+        .OPNOTSUPP => return error.EventNotSupported,
+        .OVERFLOW => return error.SampleMaxStackOverflow,
+        .PERM => return error.PermissionDenied,
+        .SRCH => return error.ProcessNotFound,
+        else => |err| return unexpectedErrno(err),
+    }
+}