1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
  2#ifndef _LINUX_FS_H
  3#define _LINUX_FS_H
  4
  5/*
  6 * This file has definitions for some important file table structures
  7 * and constants and structures used by various generic file system
  8 * ioctl's.  Please do not make any changes in this file before
  9 * sending patches for review to linux-fsdevel@vger.kernel.org and
 10 * linux-api@vger.kernel.org.
 11 */
 12
 13#include <linux/limits.h>
 14#include <linux/ioctl.h>
 15#include <linux/types.h>
 16#include <linux/fscrypt.h>
 17
 18/* Use of MS_* flags within the kernel is restricted to core mount(2) code. */
 19#include <linux/mount.h>
 20
 21/*
 22 * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
 23 * the file limit at runtime and only root can increase the per-process
 24 * nr_file rlimit, so it's safe to set up a ridiculously high absolute
 25 * upper limit on files-per-process.
 26 *
 27 * Some programs (notably those using select()) may have to be 
 28 * recompiled to take full advantage of the new limits..  
 29 */
 30
 31/* Fixed constants first: */
 32#undef NR_OPEN
 33#define INR_OPEN_CUR 1024	/* Initial setting for nfile rlimits */
 34#define INR_OPEN_MAX 4096	/* Hard limit for nfile rlimits */
 35
 36#define BLOCK_SIZE_BITS 10
 37#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
 38
 39/* flags for integrity meta */
 40#define IO_INTEGRITY_CHK_GUARD		(1U << 0) /* enforce guard check */
 41#define IO_INTEGRITY_CHK_REFTAG		(1U << 1) /* enforce ref check */
 42#define IO_INTEGRITY_CHK_APPTAG		(1U << 2) /* enforce app check */
 43
 44#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \
 45				  IO_INTEGRITY_CHK_REFTAG | \
 46				  IO_INTEGRITY_CHK_APPTAG)
 47
 48#define SEEK_SET	0	/* seek relative to beginning of file */
 49#define SEEK_CUR	1	/* seek relative to current file position */
 50#define SEEK_END	2	/* seek relative to end of file */
 51#define SEEK_DATA	3	/* seek to the next data */
 52#define SEEK_HOLE	4	/* seek to the next hole */
 53#define SEEK_MAX	SEEK_HOLE
 54
 55#define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
 56#define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
 57#define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
 58
 59/*
 60 * The root inode of procfs is guaranteed to always have the same inode number.
 61 * For programs that make heavy use of procfs, verifying that the root is a
 62 * real procfs root and using openat2(RESOLVE_{NO_{XDEV,MAGICLINKS},BENEATH})
 63 * will allow you to make sure you are never tricked into operating on the
 64 * wrong procfs file.
 65 */
 66enum procfs_ino {
 67	PROCFS_ROOT_INO = 1,
 68};
 69
 70struct file_clone_range {
 71	__s64 src_fd;
 72	__u64 src_offset;
 73	__u64 src_length;
 74	__u64 dest_offset;
 75};
 76
 77struct fstrim_range {
 78	__u64 start;
 79	__u64 len;
 80	__u64 minlen;
 81};
 82
 83/*
 84 * We include a length field because some filesystems (vfat) have an identifier
 85 * that we do want to expose as a UUID, but doesn't have the standard length.
 86 *
 87 * We use a fixed size buffer beacuse this interface will, by fiat, never
 88 * support "UUIDs" longer than 16 bytes; we don't want to force all downstream
 89 * users to have to deal with that.
 90 */
 91struct fsuuid2 {
 92	__u8	len;
 93	__u8	uuid[16];
 94};
 95
 96struct fs_sysfs_path {
 97	__u8			len;
 98	__u8			name[128];
 99};
100
101/* Protection info capability flags */
102#define	LBMD_PI_CAP_INTEGRITY		(1 << 0)
103#define	LBMD_PI_CAP_REFTAG		(1 << 1)
104
105/* Checksum types for Protection Information */
106#define LBMD_PI_CSUM_NONE		0
107#define LBMD_PI_CSUM_IP			1
108#define LBMD_PI_CSUM_CRC16_T10DIF	2
109#define LBMD_PI_CSUM_CRC64_NVME		4
110
111/* sizeof first published struct */
112#define LBMD_SIZE_VER0			16
113
114/*
115 * Logical block metadata capability descriptor
116 * If the device does not support metadata, all the fields will be zero.
117 * Applications must check lbmd_flags to determine whether metadata is
118 * supported or not.
119 */
120struct logical_block_metadata_cap {
121	/* Bitmask of logical block metadata capability flags */
122	__u32	lbmd_flags;
123	/*
124	 * The amount of data described by each unit of logical block
125	 * metadata
126	 */
127	__u16	lbmd_interval;
128	/*
129	 * Size in bytes of the logical block metadata associated with each
130	 * interval
131	 */
132	__u8	lbmd_size;
133	/*
134	 * Size in bytes of the opaque block tag associated with each
135	 * interval
136	 */
137	__u8	lbmd_opaque_size;
138	/*
139	 * Offset in bytes of the opaque block tag within the logical block
140	 * metadata
141	 */
142	__u8	lbmd_opaque_offset;
143	/* Size in bytes of the T10 PI tuple associated with each interval */
144	__u8	lbmd_pi_size;
145	/* Offset in bytes of T10 PI tuple within the logical block metadata */
146	__u8	lbmd_pi_offset;
147	/* T10 PI guard tag type */
148	__u8	lbmd_guard_tag_type;
149	/* Size in bytes of the T10 PI application tag */
150	__u8	lbmd_app_tag_size;
151	/* Size in bytes of the T10 PI reference tag */
152	__u8	lbmd_ref_tag_size;
153	/* Size in bytes of the T10 PI storage tag */
154	__u8	lbmd_storage_tag_size;
155	__u8	pad;
156};
157
158/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
159#define FILE_DEDUPE_RANGE_SAME		0
160#define FILE_DEDUPE_RANGE_DIFFERS	1
161
162/* from struct btrfs_ioctl_file_extent_same_info */
163struct file_dedupe_range_info {
164	__s64 dest_fd;		/* in - destination file */
165	__u64 dest_offset;	/* in - start of extent in destination */
166	__u64 bytes_deduped;	/* out - total # of bytes we were able
167				 * to dedupe from this file. */
168	/* status of this dedupe operation:
169	 * < 0 for error
170	 * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
171	 * == FILE_DEDUPE_RANGE_DIFFERS if data differs
172	 */
173	__s32 status;		/* out - see above description */
174	__u32 reserved;		/* must be zero */
175};
176
177/* from struct btrfs_ioctl_file_extent_same_args */
178struct file_dedupe_range {
179	__u64 src_offset;	/* in - start of extent in source */
180	__u64 src_length;	/* in - length of extent */
181	__u16 dest_count;	/* in - total elements in info array */
182	__u16 reserved1;	/* must be zero */
183	__u32 reserved2;	/* must be zero */
184	struct file_dedupe_range_info info[];
185};
186
187/* And dynamically-tunable limits and defaults: */
188struct files_stat_struct {
189	unsigned long nr_files;		/* read only */
190	unsigned long nr_free_files;	/* read only */
191	unsigned long max_files;		/* tunable */
192};
193
194struct inodes_stat_t {
195	long nr_inodes;
196	long nr_unused;
197	long dummy[5];		/* padding for sysctl ABI compatibility */
198};
199
200
201#define NR_FILE  8192	/* this can well be larger on a larger system */
202
203/*
204 * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
205 */
206struct fsxattr {
207	__u32		fsx_xflags;	/* xflags field value (get/set) */
208	__u32		fsx_extsize;	/* extsize field value (get/set)*/
209	__u32		fsx_nextents;	/* nextents field value (get)	*/
210	__u32		fsx_projid;	/* project identifier (get/set) */
211	__u32		fsx_cowextsize;	/* CoW extsize field value (get/set)*/
212	unsigned char	fsx_pad[8];
213};
214
215/*
216 * Variable size structure for file_[sg]et_attr().
217 *
218 * Note. This is alternative to the structure 'struct file_kattr'/'struct fsxattr'.
219 * As this structure is passed to/from userspace with its size, this can
220 * be versioned based on the size.
221 */
222struct file_attr {
223	__u64 fa_xflags;	/* xflags field value (get/set) */
224	__u32 fa_extsize;	/* extsize field value (get/set)*/
225	__u32 fa_nextents;	/* nextents field value (get)   */
226	__u32 fa_projid;	/* project identifier (get/set) */
227	__u32 fa_cowextsize;	/* CoW extsize field value (get/set) */
228};
229
230#define FILE_ATTR_SIZE_VER0 24
231#define FILE_ATTR_SIZE_LATEST FILE_ATTR_SIZE_VER0
232
233/*
234 * Flags for the fsx_xflags field
235 */
236#define FS_XFLAG_REALTIME	0x00000001	/* data in realtime volume */
237#define FS_XFLAG_PREALLOC	0x00000002	/* preallocated file extents */
238#define FS_XFLAG_IMMUTABLE	0x00000008	/* file cannot be modified */
239#define FS_XFLAG_APPEND		0x00000010	/* all writes append */
240#define FS_XFLAG_SYNC		0x00000020	/* all writes synchronous */
241#define FS_XFLAG_NOATIME	0x00000040	/* do not update access time */
242#define FS_XFLAG_NODUMP		0x00000080	/* do not include in backups */
243#define FS_XFLAG_RTINHERIT	0x00000100	/* create with rt bit set */
244#define FS_XFLAG_PROJINHERIT	0x00000200	/* create with parents projid */
245#define FS_XFLAG_NOSYMLINKS	0x00000400	/* disallow symlink creation */
246#define FS_XFLAG_EXTSIZE	0x00000800	/* extent size allocator hint */
247#define FS_XFLAG_EXTSZINHERIT	0x00001000	/* inherit inode extent size */
248#define FS_XFLAG_NODEFRAG	0x00002000	/* do not defragment */
249#define FS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
250#define FS_XFLAG_DAX		0x00008000	/* use DAX for IO */
251#define FS_XFLAG_COWEXTSIZE	0x00010000	/* CoW extent size allocator hint */
252#define FS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
253
254/* the read-only stuff doesn't really belong here, but any other place is
255   probably as bad and I don't want to create yet another include file. */
256
257#define BLKROSET   _IO(0x12,93)	/* set device read-only (0 = read-write) */
258#define BLKROGET   _IO(0x12,94)	/* get read-only status (0 = read_write) */
259#define BLKRRPART  _IO(0x12,95)	/* re-read partition table */
260#define BLKGETSIZE _IO(0x12,96)	/* return device size /512 (long *arg) */
261#define BLKFLSBUF  _IO(0x12,97)	/* flush buffer cache */
262#define BLKRASET   _IO(0x12,98)	/* set read ahead for block device */
263#define BLKRAGET   _IO(0x12,99)	/* get current read ahead setting */
264#define BLKFRASET  _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */
265#define BLKFRAGET  _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */
266#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
267#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
268#define BLKSSZGET  _IO(0x12,104)/* get block device sector size */
269#if 0
270#define BLKPG      _IO(0x12,105)/* See blkpg.h */
271
272/* Some people are morons.  Do not use sizeof! */
273
274#define BLKELVGET  _IOR(0x12,106,size_t)/* elevator get */
275#define BLKELVSET  _IOW(0x12,107,size_t)/* elevator set */
276/* This was here just to show that the number is taken -
277   probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */
278#endif
279/* A jump here: 108-111 have been used for various private purposes. */
280#define BLKBSZGET  _IOR(0x12,112,size_t)
281#define BLKBSZSET  _IOW(0x12,113,size_t)
282#define BLKGETSIZE64 _IOR(0x12,114,size_t)	/* return device size in bytes (u64 *arg) */
283#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
284#define BLKTRACESTART _IO(0x12,116)
285#define BLKTRACESTOP _IO(0x12,117)
286#define BLKTRACETEARDOWN _IO(0x12,118)
287#define BLKDISCARD _IO(0x12,119)
288#define BLKIOMIN _IO(0x12,120)
289#define BLKIOOPT _IO(0x12,121)
290#define BLKALIGNOFF _IO(0x12,122)
291#define BLKPBSZGET _IO(0x12,123)
292#define BLKDISCARDZEROES _IO(0x12,124)
293#define BLKSECDISCARD _IO(0x12,125)
294#define BLKROTATIONAL _IO(0x12,126)
295#define BLKZEROOUT _IO(0x12,127)
296#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
297/* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */
298/* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */
299
300#define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
301#define FIBMAP	   _IO(0x00,1)	/* bmap access */
302#define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
303#define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
304#define FITHAW		_IOWR('X', 120, int)	/* Thaw */
305#define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
306#define FICLONE		_IOW(0x94, 9, int)
307#define FICLONERANGE	_IOW(0x94, 13, struct file_clone_range)
308#define FIDEDUPERANGE	_IOWR(0x94, 54, struct file_dedupe_range)
309
310#define FSLABEL_MAX 256	/* Max chars for the interface; each fs may differ */
311
312#define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
313#define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
314#define	FS_IOC_GETVERSION		_IOR('v', 1, long)
315#define	FS_IOC_SETVERSION		_IOW('v', 2, long)
316#define FS_IOC_FIEMAP			_IOWR('f', 11, struct fiemap)
317#define FS_IOC32_GETFLAGS		_IOR('f', 1, int)
318#define FS_IOC32_SETFLAGS		_IOW('f', 2, int)
319#define FS_IOC32_GETVERSION		_IOR('v', 1, int)
320#define FS_IOC32_SETVERSION		_IOW('v', 2, int)
321#define FS_IOC_FSGETXATTR		_IOR('X', 31, struct fsxattr)
322#define FS_IOC_FSSETXATTR		_IOW('X', 32, struct fsxattr)
323#define FS_IOC_GETFSLABEL		_IOR(0x94, 49, char[FSLABEL_MAX])
324#define FS_IOC_SETFSLABEL		_IOW(0x94, 50, char[FSLABEL_MAX])
325/* Returns the external filesystem UUID, the same one blkid returns */
326#define FS_IOC_GETFSUUID		_IOR(0x15, 0, struct fsuuid2)
327/*
328 * Returns the path component under /sys/fs/ that refers to this filesystem;
329 * also /sys/kernel/debug/ for filesystems with debugfs exports
330 */
331#define FS_IOC_GETFSSYSFSPATH		_IOR(0x15, 1, struct fs_sysfs_path)
332/* Get logical block metadata capability details */
333#define FS_IOC_GETLBMD_CAP		_IOWR(0x15, 2, struct logical_block_metadata_cap)
334
335/*
336 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
337 *
338 * Note: for historical reasons, these flags were originally used and
339 * defined for use by ext2/ext3, and then other file systems started
340 * using these flags so they wouldn't need to write their own version
341 * of chattr/lsattr (which was shipped as part of e2fsprogs).  You
342 * should think twice before trying to use these flags in new
343 * contexts, or trying to assign these flags, since they are used both
344 * as the UAPI and the on-disk encoding for ext2/3/4.  Also, we are
345 * almost out of 32-bit flags.  :-)
346 *
347 * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
348 * XFS to the generic FS level interface.  This uses a structure that
349 * has padding and hence has more room to grow, so it may be more
350 * appropriate for many new use cases.
351 *
352 * Please do not change these flags or interfaces before checking with
353 * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
354 */
355#define	FS_SECRM_FL			0x00000001 /* Secure deletion */
356#define	FS_UNRM_FL			0x00000002 /* Undelete */
357#define	FS_COMPR_FL			0x00000004 /* Compress file */
358#define FS_SYNC_FL			0x00000008 /* Synchronous updates */
359#define FS_IMMUTABLE_FL			0x00000010 /* Immutable file */
360#define FS_APPEND_FL			0x00000020 /* writes to file may only append */
361#define FS_NODUMP_FL			0x00000040 /* do not dump file */
362#define FS_NOATIME_FL			0x00000080 /* do not update atime */
363/* Reserved for compression usage... */
364#define FS_DIRTY_FL			0x00000100
365#define FS_COMPRBLK_FL			0x00000200 /* One or more compressed clusters */
366#define FS_NOCOMP_FL			0x00000400 /* Don't compress */
367/* End compression flags --- maybe not all used */
368#define FS_ENCRYPT_FL			0x00000800 /* Encrypted file */
369#define FS_BTREE_FL			0x00001000 /* btree format dir */
370#define FS_INDEX_FL			0x00001000 /* hash-indexed directory */
371#define FS_IMAGIC_FL			0x00002000 /* AFS directory */
372#define FS_JOURNAL_DATA_FL		0x00004000 /* Reserved for ext3 */
373#define FS_NOTAIL_FL			0x00008000 /* file tail should not be merged */
374#define FS_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
375#define FS_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
376#define FS_HUGE_FILE_FL			0x00040000 /* Reserved for ext4 */
377#define FS_EXTENT_FL			0x00080000 /* Extents */
378#define FS_VERITY_FL			0x00100000 /* Verity protected inode */
379#define FS_EA_INODE_FL			0x00200000 /* Inode used for large EA */
380#define FS_EOFBLOCKS_FL			0x00400000 /* Reserved for ext4 */
381#define FS_NOCOW_FL			0x00800000 /* Do not cow file */
382#define FS_DAX_FL			0x02000000 /* Inode is DAX */
383#define FS_INLINE_DATA_FL		0x10000000 /* Reserved for ext4 */
384#define FS_PROJINHERIT_FL		0x20000000 /* Create with parents projid */
385#define FS_CASEFOLD_FL			0x40000000 /* Folder is case insensitive */
386#define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
387
388#define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
389#define FS_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
390
391
392#define SYNC_FILE_RANGE_WAIT_BEFORE	1
393#define SYNC_FILE_RANGE_WRITE		2
394#define SYNC_FILE_RANGE_WAIT_AFTER	4
395#define SYNC_FILE_RANGE_WRITE_AND_WAIT	(SYNC_FILE_RANGE_WRITE | \
396					 SYNC_FILE_RANGE_WAIT_BEFORE | \
397					 SYNC_FILE_RANGE_WAIT_AFTER)
398
399/*
400 * Flags for preadv2/pwritev2:
401 */
402
403typedef int __bitwise __kernel_rwf_t;
404
405/* high priority request, poll if possible */
406#define RWF_HIPRI	((__kernel_rwf_t)0x00000001)
407
408/* per-IO O_DSYNC */
409#define RWF_DSYNC	((__kernel_rwf_t)0x00000002)
410
411/* per-IO O_SYNC */
412#define RWF_SYNC	((__kernel_rwf_t)0x00000004)
413
414/* per-IO, return -EAGAIN if operation would block */
415#define RWF_NOWAIT	((__kernel_rwf_t)0x00000008)
416
417/* per-IO O_APPEND */
418#define RWF_APPEND	((__kernel_rwf_t)0x00000010)
419
420/* per-IO negation of O_APPEND */
421#define RWF_NOAPPEND	((__kernel_rwf_t)0x00000020)
422
423/* Atomic Write */
424#define RWF_ATOMIC	((__kernel_rwf_t)0x00000040)
425
426/* buffered IO that drops the cache after reading or writing data */
427#define RWF_DONTCACHE	((__kernel_rwf_t)0x00000080)
428
429/* mask of flags supported by the kernel */
430#define RWF_SUPPORTED	(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
431			 RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
432			 RWF_DONTCACHE)
433
434#define PROCFS_IOCTL_MAGIC 'f'
435
436/* Pagemap ioctl */
437#define PAGEMAP_SCAN	_IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg)
438
439/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
440#define PAGE_IS_WPALLOWED	(1 << 0)
441#define PAGE_IS_WRITTEN		(1 << 1)
442#define PAGE_IS_FILE		(1 << 2)
443#define PAGE_IS_PRESENT		(1 << 3)
444#define PAGE_IS_SWAPPED		(1 << 4)
445#define PAGE_IS_PFNZERO		(1 << 5)
446#define PAGE_IS_HUGE		(1 << 6)
447#define PAGE_IS_SOFT_DIRTY	(1 << 7)
448#define PAGE_IS_GUARD		(1 << 8)
449
450/*
451 * struct page_region - Page region with flags
452 * @start:	Start of the region
453 * @end:	End of the region (exclusive)
454 * @categories:	PAGE_IS_* category bitmask for the region
455 */
456struct page_region {
457	__u64 start;
458	__u64 end;
459	__u64 categories;
460};
461
462/* Flags for PAGEMAP_SCAN ioctl */
463#define PM_SCAN_WP_MATCHING	(1 << 0)	/* Write protect the pages matched. */
464#define PM_SCAN_CHECK_WPASYNC	(1 << 1)	/* Abort the scan when a non-WP-enabled page is found. */
465
466/*
467 * struct pm_scan_arg - Pagemap ioctl argument
468 * @size:		Size of the structure
469 * @flags:		Flags for the IOCTL
470 * @start:		Starting address of the region
471 * @end:		Ending address of the region
472 * @walk_end		Address where the scan stopped (written by kernel).
473 *			walk_end == end (address tags cleared) informs that the scan completed on entire range.
474 * @vec:		Address of page_region struct array for output
475 * @vec_len:		Length of the page_region struct array
476 * @max_pages:		Optional limit for number of returned pages (0 = disabled)
477 * @category_inverted:	PAGE_IS_* categories which values match if 0 instead of 1
478 * @category_mask:	Skip pages for which any category doesn't match
479 * @category_anyof_mask: Skip pages for which no category matches
480 * @return_mask:	PAGE_IS_* categories that are to be reported in `page_region`s returned
481 */
482struct pm_scan_arg {
483	__u64 size;
484	__u64 flags;
485	__u64 start;
486	__u64 end;
487	__u64 walk_end;
488	__u64 vec;
489	__u64 vec_len;
490	__u64 max_pages;
491	__u64 category_inverted;
492	__u64 category_mask;
493	__u64 category_anyof_mask;
494	__u64 return_mask;
495};
496
497/* /proc/<pid>/maps ioctl */
498#define PROCMAP_QUERY	_IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query)
499
500enum procmap_query_flags {
501	/*
502	 * VMA permission flags.
503	 *
504	 * Can be used as part of procmap_query.query_flags field to look up
505	 * only VMAs satisfying specified subset of permissions. E.g., specifying
506	 * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs,
507	 * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only
508	 * return read/write VMAs, though both executable/non-executable and
509	 * private/shared will be ignored.
510	 *
511	 * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags
512	 * field to specify actual VMA permissions.
513	 */
514	PROCMAP_QUERY_VMA_READABLE		= 0x01,
515	PROCMAP_QUERY_VMA_WRITABLE		= 0x02,
516	PROCMAP_QUERY_VMA_EXECUTABLE		= 0x04,
517	PROCMAP_QUERY_VMA_SHARED		= 0x08,
518	/*
519	 * Query modifier flags.
520	 *
521	 * By default VMA that covers provided address is returned, or -ENOENT
522	 * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest
523	 * VMA with vma_start > addr will be returned if no covering VMA is
524	 * found.
525	 *
526	 * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that
527	 * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA
528	 * to iterate all VMAs with file backing.
529	 */
530	PROCMAP_QUERY_COVERING_OR_NEXT_VMA	= 0x10,
531	PROCMAP_QUERY_FILE_BACKED_VMA		= 0x20,
532};
533
534/*
535 * Input/output argument structured passed into ioctl() call. It can be used
536 * to query a set of VMAs (Virtual Memory Areas) of a process.
537 *
538 * Each field can be one of three kinds, marked in a short comment to the
539 * right of the field:
540 *   - "in", input argument, user has to provide this value, kernel doesn't modify it;
541 *   - "out", output argument, kernel sets this field with VMA data;
542 *   - "in/out", input and output argument; user provides initial value (used
543 *     to specify maximum allowable buffer size), and kernel sets it to actual
544 *     amount of data written (or zero, if there is no data).
545 *
546 * If matching VMA is found (according to criterias specified by
547 * query_addr/query_flags, all the out fields are filled out, and ioctl()
548 * returns 0. If there is no matching VMA, -ENOENT will be returned.
549 * In case of any other error, negative error code other than -ENOENT is
550 * returned.
551 *
552 * Most of the data is similar to the one returned as text in /proc/<pid>/maps
553 * file, but procmap_query provides more querying flexibility. There are no
554 * consistency guarantees between subsequent ioctl() calls, but data returned
555 * for matched VMA is self-consistent.
556 */
557struct procmap_query {
558	/* Query struct size, for backwards/forward compatibility */
559	__u64 size;
560	/*
561	 * Query flags, a combination of enum procmap_query_flags values.
562	 * Defines query filtering and behavior, see enum procmap_query_flags.
563	 *
564	 * Input argument, provided by user. Kernel doesn't modify it.
565	 */
566	__u64 query_flags;		/* in */
567	/*
568	 * Query address. By default, VMA that covers this address will
569	 * be looked up. PROCMAP_QUERY_* flags above modify this default
570	 * behavior further.
571	 *
572	 * Input argument, provided by user. Kernel doesn't modify it.
573	 */
574	__u64 query_addr;		/* in */
575	/* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */
576	__u64 vma_start;		/* out */
577	__u64 vma_end;			/* out */
578	/* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */
579	__u64 vma_flags;		/* out */
580	/* VMA backing page size granularity. */
581	__u64 vma_page_size;		/* out */
582	/*
583	 * VMA file offset. If VMA has file backing, this specifies offset
584	 * within the file that VMA's start address corresponds to.
585	 * Is set to zero if VMA has no backing file.
586	 */
587	__u64 vma_offset;		/* out */
588	/* Backing file's inode number, or zero, if VMA has no backing file. */
589	__u64 inode;			/* out */
590	/* Backing file's device major/minor number, or zero, if VMA has no backing file. */
591	__u32 dev_major;		/* out */
592	__u32 dev_minor;		/* out */
593	/*
594	 * If set to non-zero value, signals the request to return VMA name
595	 * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix
596	 * appended, if file was unlinked from FS) for matched VMA. VMA name
597	 * can also be some special name (e.g., "[heap]", "[stack]") or could
598	 * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME).
599	 *
600	 * Kernel will set this field to zero, if VMA has no associated name.
601	 * Otherwise kernel will return actual amount of bytes filled in
602	 * user-supplied buffer (see vma_name_addr field below), including the
603	 * terminating zero.
604	 *
605	 * If VMA name is longer that user-supplied maximum buffer size,
606	 * -E2BIG error is returned.
607	 *
608	 * If this field is set to non-zero value, vma_name_addr should point
609	 * to valid user space memory buffer of at least vma_name_size bytes.
610	 * If set to zero, vma_name_addr should be set to zero as well
611	 */
612	__u32 vma_name_size;		/* in/out */
613	/*
614	 * If set to non-zero value, signals the request to extract and return
615	 * VMA's backing file's build ID, if the backing file is an ELF file
616	 * and it contains embedded build ID.
617	 *
618	 * Kernel will set this field to zero, if VMA has no backing file,
619	 * backing file is not an ELF file, or ELF file has no build ID
620	 * embedded.
621	 *
622	 * Build ID is a binary value (not a string). Kernel will set
623	 * build_id_size field to exact number of bytes used for build ID.
624	 * If build ID is requested and present, but needs more bytes than
625	 * user-supplied maximum buffer size (see build_id_addr field below),
626	 * -E2BIG error will be returned.
627	 *
628	 * If this field is set to non-zero value, build_id_addr should point
629	 * to valid user space memory buffer of at least build_id_size bytes.
630	 * If set to zero, build_id_addr should be set to zero as well
631	 */
632	__u32 build_id_size;		/* in/out */
633	/*
634	 * User-supplied address of a buffer of at least vma_name_size bytes
635	 * for kernel to fill with matched VMA's name (see vma_name_size field
636	 * description above for details).
637	 *
638	 * Should be set to zero if VMA name should not be returned.
639	 */
640	__u64 vma_name_addr;		/* in */
641	/*
642	 * User-supplied address of a buffer of at least build_id_size bytes
643	 * for kernel to fill with matched VMA's ELF build ID, if available
644	 * (see build_id_size field description above for details).
645	 *
646	 * Should be set to zero if build ID should not be returned.
647	 */
648	__u64 build_id_addr;		/* in */
649};
650
651#endif /* _LINUX_FS_H */