master
  1/*	$NetBSD: filedesc.h,v 1.70.2.1 2024/11/17 16:16:10 martin Exp $	*/
  2
  3/*-
  4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
  5 * All rights reserved.
  6 *
  7 * Redistribution and use in source and binary forms, with or without
  8 * modification, are permitted provided that the following conditions
  9 * are met:
 10 * 1. Redistributions of source code must retain the above copyright
 11 *    notice, this list of conditions and the following disclaimer.
 12 * 2. Redistributions in binary form must reproduce the above copyright
 13 *    notice, this list of conditions and the following disclaimer in the
 14 *    documentation and/or other materials provided with the distribution.
 15 *
 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 26 * POSSIBILITY OF SUCH DAMAGE.
 27 */
 28
 29/*
 30 * Copyright (c) 1990, 1993
 31 *	The Regents of the University of California.  All rights reserved.
 32 *
 33 * Redistribution and use in source and binary forms, with or without
 34 * modification, are permitted provided that the following conditions
 35 * are met:
 36 * 1. Redistributions of source code must retain the above copyright
 37 *    notice, this list of conditions and the following disclaimer.
 38 * 2. Redistributions in binary form must reproduce the above copyright
 39 *    notice, this list of conditions and the following disclaimer in the
 40 *    documentation and/or other materials provided with the distribution.
 41 * 3. Neither the name of the University nor the names of its contributors
 42 *    may be used to endorse or promote products derived from this software
 43 *    without specific prior written permission.
 44 *
 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 48 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 55 * SUCH DAMAGE.
 56 *
 57 *	@(#)filedesc.h	8.1 (Berkeley) 6/2/93
 58 */
 59
 60#ifndef _SYS_FILEDESC_H_
 61#define	_SYS_FILEDESC_H_
 62
 63#include <sys/param.h>
 64#include <sys/queue.h>
 65#include <sys/mutex.h>
 66#include <sys/rwlock.h>
 67#include <sys/condvar.h>
 68
 69/*
 70 * This structure is used for the management of descriptors.  It may be
 71 * shared by multiple processes.
 72 *
 73 * A process is initially started out with NDFILE descriptors stored within
 74 * this structure, selected to be enough for typical applications based on
 75 * the historical limit of 20 open files (and the usage of descriptors by
 76 * shells).  If these descriptors are exhausted, a larger descriptor table
 77 * may be allocated, up to a process' resource limit; the internal arrays
 78 * are then unused.  The initial expansion is set to NDEXTENT; each time
 79 * it runs out, it is doubled until the resource limit is reached. NDEXTENT
 80 * should be selected to be the biggest multiple of OFILESIZE (see below)
 81 * that will fit in a power-of-two sized piece of memory.
 82 */
 83#define	NDFILE		20
 84#define	NDEXTENT	50		/* 250 bytes in 256-byte alloc */
 85#define	NDENTRIES	32		/* 32 fds per entry */
 86#define	NDENTRYMASK	(NDENTRIES - 1)
 87#define	NDENTRYSHIFT	5		/* bits per entry */
 88#define	NDLOSLOTS(x)	(((x) + NDENTRIES - 1) >> NDENTRYSHIFT)
 89#define	NDHISLOTS(x)	((NDLOSLOTS(x) + NDENTRIES - 1) >> NDENTRYSHIFT)
 90#define	NDFDFILE	6		/* first 6 descriptors are free */
 91
 92/*
 93 * Process-private descriptor reference, one for each descriptor slot
 94 * in use.  Locks:
 95 *
 96 * :	unlocked
 97 * a	atomic operations + filedesc_t::fd_lock in some cases
 98 * d	filedesc_t::fd_lock
 99 *
100 * Note that ff_exclose and ff_allocated are likely to be byte sized
101 * (bool).  In general adjacent sub-word sized fields must be locked
102 * the same way, but in this case it's ok: ff_exclose can only be
103 * modified while the descriptor slot is live, and ff_allocated when
104 * it's invalid.
105 *
106 * NOTE: ff_exclose should generally be set with fd_set_exclose(), not
107 * written to directly, when implementing flags like O_CLOEXEC or
108 * SOCK_CLOEXEC, so that struct filedesc::fd_exclose is updated as
109 * needed.  See PR kern/58855: close-on-exec is broken for dup3 and
110 * opening cloning devices.
111 */
112typedef struct fdfile {
113	bool		ff_exclose;	/* :: close on exec (fd_set_exclose) */
114	bool		ff_allocated;	/* d: descriptor slot is allocated */
115	u_int		ff_refcnt;	/* a: reference count on structure */
116	struct file	*ff_file;	/* d: pointer to file if open */
117	SLIST_HEAD(,knote) ff_knlist;	/* d: knotes attached to this fd */
118	kcondvar_t	ff_closing;	/* d: notifier for close */
119} fdfile_t;
120
121#define FDFILE_SIZE ((sizeof(fdfile_t)+CACHE_LINE_SIZE-1)/CACHE_LINE_SIZE*CACHE_LINE_SIZE)
122
123/* Reference count */
124#define	FR_CLOSING	(0x80000000)	/* closing: must interlock */
125#define	FR_MASK		(~FR_CLOSING)	/* reference count */
126
127/*
128 * Open file table, potentially many 'active' tables per filedesc_t
129 * in a multi-threaded process, or with a shared filedesc_t (clone()).
130 * nfiles is first to avoid pointer arithmetic.
131 */
132typedef struct fdtab {
133	u_int		dt_nfiles;	/* number of open files allocated */
134	struct fdtab	*dt_link;	/* for lists of dtab */
135	fdfile_t	*dt_ff[NDFILE];	/* file structures for open fds */
136} fdtab_t;
137
138typedef struct filedesc {
139	/*
140	 * Built-in fdfile_t records first, since they have strict
141	 * alignment requirements.
142	 */
143	uint8_t		fd_dfdfile[NDFDFILE][FDFILE_SIZE];
144	/*
145	 * All of the remaining fields are locked by fd_lock.
146	 */
147	kmutex_t	fd_lock;	/* lock on structure */
148	fdtab_t * volatile fd_dt;	/* active descriptor table */
149	uint32_t	*fd_himap;	/* each bit points to 32 fds */
150	uint32_t	*fd_lomap;	/* bitmap of free fds */
151	struct klist	*fd_knhash;	/* hash of attached non-fd knotes */
152	int		fd_lastkqfile;	/* max descriptor for kqueue */
153	int		fd_lastfile;	/* high-water mark of fd_ofiles */
154	int		fd_refcnt;	/* reference count */
155	u_long		fd_knhashmask;	/* size of fd_knhash */
156	int		fd_freefile;	/* approx. next free file */
157	int		fd_unused;	/* unused */
158	bool		fd_exclose;	/* non-zero if >0 fd with EXCLOSE */
159	/*
160	 * This structure is used when the number of open files is
161	 * <= NDFILE, and are then pointed to by the pointers above.
162	 */
163	fdtab_t		fd_dtbuiltin;
164	/*
165	 * These arrays are used when the number of open files is
166	 * <= 1024, and are then pointed to by the pointers above.
167	 */
168#define fd_startzero	fd_dhimap	/* area to zero on return to cache */
169	uint32_t	fd_dhimap[NDENTRIES >> NDENTRYSHIFT];
170	uint32_t	fd_dlomap[NDENTRIES];
171} filedesc_t;
172
173/*
174 * Working directory, root and umask information.  Serialization:
175 *
176 * a	atomic operations
177 * l	cwdi_lock
178 */
179typedef struct cwdinfo {
180	struct vnode	*cwdi_cdir;	/* l: current directory */
181	struct vnode	*cwdi_rdir;	/* l: root directory */
182	struct vnode	*cwdi_edir;	/* l: emulation root (if known) */
183	u_int		cwdi_cmask;	/* a: mask for file creation */
184	u_int		cwdi_refcnt;	/* a: reference count */
185
186	krwlock_t	cwdi_lock	/* :: lock on struct */
187	    __aligned(COHERENCY_UNIT);	/* -> gets own cache line */
188} cwdinfo_t;
189
190#ifdef _KERNEL
191
192struct fileops;
193struct socket;
194struct proc;
195
196extern struct cwdinfo cwdi0;
197
198/*
199 * Kernel global variables and routines.
200 */
201void	fd_sys_init(void);
202int	fd_open(const char*, int, int, int*);
203int	fd_dupopen(int, bool, int, int *);
204int	fd_alloc(struct proc *, int, int *);
205void	fd_tryexpand(struct proc *);
206int	fd_allocfile(file_t **, int *);
207void	fd_affix(struct proc *, file_t *, unsigned);
208void	fd_abort(struct proc *, file_t *, unsigned);
209filedesc_t *fd_copy(void);
210filedesc_t *fd_init(filedesc_t *);
211void	fd_share(proc_t *);
212void	fd_hold(lwp_t *);
213void	fd_free(void);
214void	fd_closeexec(void);
215void	fd_ktrexecfd(void);
216int	fd_checkstd(void);
217file_t	*fd_getfile(unsigned);
218file_t	*fd_getfile2(proc_t *, unsigned);
219void	fd_putfile(unsigned);
220int	fd_getvnode(unsigned, file_t **);
221int	fd_getsock(unsigned, struct socket **);
222int	fd_getsock1(unsigned, struct socket **, file_t **);
223void	fd_putvnode(unsigned);
224void	fd_putsock(unsigned);
225int	fd_close(unsigned);
226int	fd_dup(file_t *, int, int *, bool);
227int	fd_dup2(file_t *, unsigned, int);
228int	fd_clone(file_t *, unsigned, int, const struct fileops *, void *);
229void	fd_set_exclose(struct lwp *, int, bool);
230int	pipe1(struct lwp *, int *, int);
231int	dodup(struct lwp *, int, int, int, register_t *);
232
233void	cwd_sys_init(void);
234struct cwdinfo *cwdinit(void);
235void	cwdshare(proc_t *);
236void	cwdunshare(proc_t *);
237void	cwdfree(struct cwdinfo *);
238void	cwdexec(struct proc *);
239
240#define GETCWD_CHECK_ACCESS 0x0001
241int	getcwd_common(struct vnode *, struct vnode *, char **, char *, int,
242    int, struct lwp *);
243int	vnode_to_path(char *, size_t, struct vnode *, struct lwp *,
244    struct proc *);
245
246int	closef(file_t *);
247file_t *fgetdummy(void);
248void	fputdummy(file_t *);
249
250struct stat;
251int	do_sys_fstat(int, struct stat *);
252struct flock;
253int	do_fcntl_lock(int, int, struct flock *);
254int	do_posix_fadvise(int, off_t, off_t, int);
255
256extern kmutex_t filelist_lock;
257extern filedesc_t filedesc0;
258
259#endif /* _KERNEL */
260
261#endif /* !_SYS_FILEDESC_H_ */