1/*	$NetBSD: lwp.h,v 1.217 2022/07/23 19:15:29 mrg Exp $	*/
  2
  3/*
  4 * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010, 2019, 2020
  5 *    The NetBSD Foundation, Inc.
  6 * All rights reserved.
  7 *
  8 * This code is derived from software contributed to The NetBSD Foundation
  9 * by Nathan J. Williams and Andrew Doran.
 10 *
 11 * Redistribution and use in source and binary forms, with or without
 12 * modification, are permitted provided that the following conditions
 13 * are met:
 14 * 1. Redistributions of source code must retain the above copyright
 15 *    notice, this list of conditions and the following disclaimer.
 16 * 2. Redistributions in binary form must reproduce the above copyright
 17 *    notice, this list of conditions and the following disclaimer in the
 18 *    documentation and/or other materials provided with the distribution.
 19 *
 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 30 * POSSIBILITY OF SUCH DAMAGE.
 31 */
 32
 33#ifndef _SYS_LWP_H_
 34#define _SYS_LWP_H_
 35
 36#if defined(_KERNEL) || defined(_KMEMUSER)
 37
 38#include <sys/param.h>
 39#include <sys/time.h>
 40#include <sys/queue.h>
 41#include <sys/callout.h>
 42#include <sys/kcpuset.h>
 43#include <sys/mutex.h>
 44#include <sys/condvar.h>
 45#include <sys/signalvar.h>
 46#include <sys/sched.h>
 47#include <sys/specificdata.h>
 48#include <sys/syncobj.h>
 49#include <sys/resource.h>
 50
 51#if defined(_KERNEL)
 52struct lwp;
 53/* forward declare this for <machine/cpu.h> so it can get l_cpu. */
 54static __inline struct cpu_info *lwp_getcpu(struct lwp *);
 55#include <machine/cpu.h>		/* curcpu() and cpu_info */
 56#include <sys/atomic.h>
 57#ifdef _KERNEL_OPT
 58#include "opt_kcov.h"
 59#include "opt_kmsan.h"
 60#include "opt_maxlwp.h"
 61#endif
 62#endif
 63
 64#include <machine/proc.h>		/* Machine-dependent proc substruct. */
 65
 66/*
 67 * Lightweight process.  Field markings and the corresponding locks:
 68 *
 69 * a:	proc_lock
 70 * c:	condition variable interlock, passed to cv_wait()
 71 * l:	*l_mutex
 72 * p:	l_proc->p_lock
 73 * s:	spc_mutex, which may or may not be referenced by l_mutex
 74 * S:	l_selcluster->sc_lock
 75 * (:	unlocked, stable
 76 * !:	unlocked, may only be reliably accessed by the LWP itself
 77 *
 78 * Fields are clustered together by usage (to increase the likelihood
 79 * of cache hits) and by size (to reduce dead space in the structure).
 80 */
 81
 82#include <sys/pcu.h>
 83
 84struct lockdebug;
 85struct sysent;
 86
 87struct lwp {
 88	/* Must not be zeroed on free. */
 89	struct cpu_info *volatile l_cpu;/* s: CPU we're on if LSONPROC */
 90	kmutex_t * volatile l_mutex;	/* l: ptr to mutex on sched state */
 91	struct turnstile *l_ts;		/* l: current turnstile */
 92	int		l_stat;		/* l: overall LWP status */
 93	int		l__reserved;	/*  : padding - reuse as needed */
 94
 95	/* Scheduling and overall state. */
 96#define	l_startzero l_runq
 97	TAILQ_ENTRY(lwp) l_runq;	/* s: run queue */
 98	union {
 99		void *	info;		/* s: scheduler-specific structure */
100		u_int	timeslice;	/* l: time-quantum for SCHED_M2 */
101	} l_sched;
102	void		*l_addr;	/* l: PCB address; use lwp_getpcb() */
103	struct mdlwp	l_md;		/* l: machine-dependent fields. */
104	struct bintime 	l_rtime;	/* l: real time */
105	struct bintime	l_stime;	/* l: start time (while ONPROC) */
106	int		l_flag;		/* l: misc flag values */
107	u_int		l_swtime;	/* l: time swapped in or out */
108	u_int		l_rticks;	/* l: Saved start time of run */
109	u_int		l_rticksum;	/* l: Sum of ticks spent running */
110	u_int		l_slpticks;	/* l: Saved start time of sleep */
111	u_int		l_slpticksum;	/* l: Sum of ticks spent sleeping */
112	int		l_biglocks;	/* l: biglock count before sleep */
113	int		l_class;	/* l: scheduling class */
114	int		l_kpriority;	/* !: has kernel priority boost */
115	pri_t		l_kpribase;	/* !: kernel priority base level */
116	pri_t		l_priority;	/* l: scheduler priority */
117	pri_t		l_inheritedprio;/* l: inherited priority */
118	pri_t		l_protectprio;	/* l: for PTHREAD_PRIO_PROTECT */
119	pri_t		l_auxprio;	/* l: max(inherit,protect) priority */
120	int		l_protectdepth;	/* l: for PTHREAD_PRIO_PROTECT */
121	u_int		l_cpticks;	/* (: Ticks of CPU time */
122	psetid_t	l_psid;		/* l: assigned processor-set ID */
123	fixpt_t		l_pctcpu;	/* p: %cpu during l_swtime */
124	fixpt_t		l_estcpu;	/* l: cpu time for SCHED_4BSD */
125	volatile uint64_t l_ncsw;	/* l: total context switches */
126	volatile uint64_t l_nivcsw;	/* l: involuntary context switches */
127	SLIST_HEAD(, turnstile) l_pi_lenders; /* l: ts lending us priority */
128	struct cpu_info *l_target_cpu;	/* l: target CPU to migrate */
129	struct lwpctl	*l_lwpctl;	/* p: lwpctl block kernel address */
130	struct lcpage	*l_lcpage;	/* p: lwpctl containing page */
131	kcpuset_t	*l_affinity;	/* l: CPU set for affinity */
132
133	/* Synchronisation. */
134	struct syncobj	*l_syncobj;	/* l: sync object operations set */
135	LIST_ENTRY(lwp) l_sleepchain;	/* l: sleep queue */
136	wchan_t		l_wchan;	/* l: sleep address */
137	const char	*l_wmesg;	/* l: reason for sleep */
138	struct sleepq	*l_sleepq;	/* l: current sleep queue */
139	callout_t	l_timeout_ch;	/* !: callout for tsleep */
140	kcondvar_t	l_waitcv;	/* a: vfork() wait */
141	u_int		l_slptime;	/* l: time since last blocked */
142	bool		l_vforkwaiting;	/* a: vfork() waiting */
143
144	/* User-space synchronization. */
145	uintptr_t	l_robust_head;	/* !: list of robust futexes */
146	uint32_t	l___rsvd1;	/* reserved for future use */
147
148#if PCU_UNIT_COUNT > 0
149	struct cpu_info	* volatile l_pcu_cpu[PCU_UNIT_COUNT];
150	uint32_t	l_pcu_valid;
151#endif
152
153	/* Process level and global state, misc. */
154	lwpid_t		l_lid;		/* (: LWP identifier; local to proc */
155	LIST_ENTRY(lwp)	l_list;		/* a: entry on list of all LWPs */
156	void		*l_ctxlink;	/* p: uc_link {get,set}context */
157	struct proc	*l_proc;	/* p: parent process */
158	LIST_ENTRY(lwp)	l_sibling;	/* p: entry on proc's list of LWPs */
159	char		*l_name;	/* (: name, optional */
160	lwpid_t		l_waiter;	/* p: first LWP waiting on us */
161	lwpid_t 	l_waitingfor;	/* p: specific LWP we are waiting on */
162	int		l_prflag;	/* p: process level flags */
163	u_int		l_refcnt;	/* p: reference count on this LWP */
164
165	/* State of select() or poll(). */
166	int		l_selflag;	/* S: polling state flags */
167	int		l_selret;	/* S: return value of select/poll */
168	SLIST_HEAD(,selinfo) l_selwait;	/* S: descriptors waited on */
169	uintptr_t	l_selrec;	/* !: argument for selrecord() */
170	struct selcluster *l_selcluster;/* !: associated cluster data */
171	void *		l_selbits;	/* (: select() bit-field */
172	size_t		l_selni;	/* (: size of a single bit-field */
173
174	/* Signals. */
175	int		l_sigrestore;	/* p: need to restore old sig mask */
176	sigset_t	l_sigwaitset;	/* p: signals being waited for */
177	kcondvar_t	l_sigcv;	/* p: for sigsuspend() */
178	struct ksiginfo	*l_sigwaited;	/* p: delivered signals from set */
179	sigpend_t	*l_sigpendset;	/* p: XXX issignal()/postsig() baton */
180	LIST_ENTRY(lwp)	l_sigwaiter;	/* p: chain on list of waiting LWPs */
181	stack_t		l_sigstk;	/* p: sp & on stack state variable */
182	sigset_t	l_sigmask;	/* p: signal mask */
183	sigpend_t	l_sigpend;	/* p: signals to this LWP */
184	sigset_t	l_sigoldmask;	/* p: mask for sigpause */
185
186	/* Private data. */
187	specificdata_reference
188		l_specdataref;		/* !: subsystem lwp-specific data */
189	struct timespec l_ktrcsw;	/* !: for ktrace CSW trace XXX */
190	void		*l_private;	/* !: svr4-style lwp-private data */
191	struct lwp	*l_switchto;	/* !: mi_switch: switch to this LWP */
192	struct kauth_cred *l_cred;	/* !: cached credentials */
193	struct filedesc	*l_fd;		/* !: cached copy of proc::p_fd */
194	void		*l_emuldata;	/* !: kernel lwp-private data */
195	struct fstrans_lwp_info *l_fstrans; /* (: fstrans private data */
196	u_short		l_shlocks;	/* !: lockdebug: shared locks held */
197	u_short		l_exlocks;	/* !: lockdebug: excl. locks held */
198	u_short		l_psrefs;	/* !: count of psref held */
199	u_short		l_blcnt;	/* !: count of kernel_lock held */
200	volatile int	l_nopreempt;	/* !: don't preempt me! */
201	volatile u_int	l_dopreempt;	/* s: kernel preemption pending */
202	int		l_pflag;	/* !: LWP private flags */
203	int		l_dupfd;	/* !: side return from cloning devs XXX */
204	const struct sysent * volatile l_sysent;/* !: currently active syscall */
205	struct rusage	l_ru;		/* !: accounting information */
206	uint64_t	l_pfailtime;	/* !: for kernel preemption */
207	uintptr_t	l_pfailaddr;	/* !: for kernel preemption */
208	uintptr_t	l_pfaillock;	/* !: for kernel preemption */
209	_TAILQ_HEAD(,struct lockdebug,volatile) l_ld_locks;/* !: locks held by LWP */
210	volatile void	*l_ld_wanted;	/* !: lock currently wanted by LWP */
211	uintptr_t	l_rwcallsite;	/* !: rwlock actual callsite */
212	int		l_tcgen;	/* !: for timecounter removal */
213
214	/* These are only used by 'options SYSCALL_TIMES'. */
215	uint32_t	l_syscall_time;	/* !: time epoch for current syscall */
216	uint64_t	*l_syscall_counter; /* !: counter for current process */
217
218	struct kdtrace_thread *l_dtrace; /* (: DTrace-specific data. */
219
220#ifdef KMSAN
221	void		*l_kmsan; /* !: KMSAN private data. */
222#endif
223#ifdef KCOV
224	void		*l_kcov; /* !: KCOV private data. */
225#endif
226};
227
228/*
229 * UAREA_PCB_OFFSET: an offset of PCB structure in the uarea.  MD code may
230 * define it in <machine/proc.h>, to indicate a different uarea layout.
231 */
232#ifndef UAREA_PCB_OFFSET
233#define	UAREA_PCB_OFFSET	0
234#endif
235
236LIST_HEAD(lwplist, lwp);		/* A list of LWPs. */
237
238#ifdef _KERNEL
239extern struct lwplist	alllwp;		/* List of all LWPs. */
240extern lwp_t		lwp0;		/* LWP for proc0. */
241extern int		maxlwp __read_mostly;	/* max number of lwps */
242#ifndef MAXLWP
243#define	MAXLWP		4096		/* default max */
244#endif
245#ifndef MAXMAXLWP
246#define MAXMAXLWP	65535		/* absolute max */
247#endif
248#endif
249
250#endif /* _KERNEL || _KMEMUSER */
251
252/*
253 * These flags are kept in l_flag, and they are modified only with the LWP
254 * locked.
255 */
256#define	LW_IDLE		0x00000001 /* Idle lwp. */
257#define	LW_LWPCTL	0x00000002 /* Adjust lwpctl in userret */
258#define	LW_STIMO	0x00000040 /* Sleep timed out */
259#define	LW_SINTR	0x00000080 /* Sleep is interruptible. */
260#define	LW_CATCHINTR	0x00000100 /* LW_SINTR intent; see sleepq_block(). */
261#define	LW_SYSTEM	0x00000200 /* Kernel thread */
262#define	LW_SYSTEM_FPU	0x00000400 /* Kernel thread with vector/FP enabled */
263#define	LW_DBGSUSPEND	0x00010000 /* Suspend by debugger */
264#define	LW_WSUSPEND	0x00020000 /* Suspend before return to user */
265#define	LW_BATCH	0x00040000 /* LWP tends to hog CPU */
266#define	LW_WCORE	0x00080000 /* Stop for core dump on return to user */
267#define	LW_WEXIT	0x00100000 /* Exit before return to user */
268#define	LW_PENDSIG	0x01000000 /* Pending signal for us */
269#define	LW_CANCELLED	0x02000000 /* tsleep should not sleep */
270#define	LW_WREBOOT	0x08000000 /* System is rebooting, please suspend */
271#define	LW_UNPARKED	0x10000000 /* Unpark op pending */
272#define	LW_RUMP_CLEAR	0x40000000 /* Clear curlwp in RUMP scheduler */
273#define	LW_RUMP_QEXIT	0x80000000 /* LWP should exit ASAP */
274
275/*
276 * The second set of flags is kept in l_pflag, and they are modified only by
277 * the LWP itself, or modified when it's known the LWP cannot be running.
278 * LP_RUNNING is typically updated with the LWP locked, but not always in
279 * the case of soft interrupt handlers.
280 */
281#define	LP_KTRACTIVE	0x00000001 /* Executing ktrace operation */
282#define	LP_KTRCSW	0x00000002 /* ktrace context switch marker */
283#define	LP_KTRCSWUSER	0x00000004 /* ktrace context switch marker */
284	/* 		0x00000008    was LP_PIDLID */
285#define	LP_OWEUPC	0x00000010 /* Owe user profiling tick */
286#define	LP_MPSAFE	0x00000020 /* Starts life without kernel_lock */
287#define	LP_INTR		0x00000040 /* Soft interrupt handler */
288#define	LP_SYSCTLWRITE	0x00000080 /* sysctl write lock held */
289#define	LP_MUSTJOIN	0x00000100 /* Must join kthread on exit */
290#define	LP_SINGLESTEP	0x00000400 /* Single step thread in ptrace(2) */
291#define	LP_TIMEINTR	0x00010000 /* Time this soft interrupt */
292#define	LP_PREEMPTING	0x00020000 /* mi_switch called involuntarily */
293#define	LP_RUNNING	0x20000000 /* Active on a CPU */
294#define	LP_TELEPORT	0x40000000 /* Teleport to new CPU on preempt() */
295#define	LP_BOUND	0x80000000 /* Bound to a CPU */
296
297/*
298 * The third set of flags is kept in l_prflag and they are modified only
299 * with p_lock held.
300 */
301#define	LPR_DETACHED	0x00800000 /* Won't be waited for. */
302#define	LPR_CRMOD	0x00000100 /* Credentials modified */
303#define	LPR_DRAINING	0x80000000 /* Draining references before exiting */
304
305/*
306 * Mask indicating that there is "exceptional" work to be done on return to
307 * user.
308 */
309#define	LW_USERRET	\
310    (LW_WEXIT | LW_PENDSIG | LW_WREBOOT | LW_WSUSPEND | LW_WCORE | LW_LWPCTL)
311
312/*
313 * Status values.
314 *
315 * A note about LSRUN and LSONPROC: LSRUN indicates that a process is
316 * runnable but *not* yet running, i.e. is on a run queue.  LSONPROC
317 * indicates that the process is actually executing on a CPU, i.e.
318 * it is no longer on a run queue.
319 *
320 * These values are set in stone and must not be reused with future changes.
321 */
322#define	LSIDL		1	/* Process being created by fork. */
323#define	LSRUN		2	/* Currently runnable. */
324#define	LSSLEEP		3	/* Sleeping on an address. */
325#define	LSSTOP		4	/* Process debugging or suspension. */
326#define	LSZOMB		5	/* Awaiting collection by parent. */
327/* define	LSDEAD	6	Process is almost a zombie. (removed in 5.0) */
328#define	LSONPROC	7	/* Process is currently on a CPU. */
329#define	LSSUSPENDED	8	/* Not running, not signalable. */
330
331#if defined(_KERNEL) || defined(_KMEMUSER)
332static __inline void *
333lwp_getpcb(struct lwp *l)
334{
335
336	return l->l_addr;
337}
338#endif /* _KERNEL || _KMEMUSER */
339
340#ifdef _KERNEL
341#define	LWP_CACHE_CREDS(l, p)						\
342do {									\
343	(void)p;							\
344	if (__predict_false((l)->l_prflag & LPR_CRMOD))			\
345		lwp_update_creds(l);					\
346} while (/* CONSTCOND */ 0)
347
348void	lwpinit(void);
349void	lwp0_init(void);
350
351void	lwp_startup(lwp_t *, lwp_t *);
352void	startlwp(void *);
353
354int	lwp_locked(lwp_t *, kmutex_t *);
355kmutex_t *lwp_setlock(lwp_t *, kmutex_t *);
356void	lwp_unlock_to(lwp_t *, kmutex_t *);
357int	lwp_trylock(lwp_t *);
358void	lwp_addref(lwp_t *);
359void	lwp_delref(lwp_t *);
360void	lwp_delref2(lwp_t *);
361bool	lwp_drainrefs(lwp_t *);
362bool	lwp_alive(lwp_t *);
363lwp_t	*lwp_find_first(proc_t *);
364
365int	lwp_wait(lwp_t *, lwpid_t, lwpid_t *, bool);
366void	lwp_continue(lwp_t *);
367void	lwp_unsleep(lwp_t *, bool);
368void	lwp_unstop(lwp_t *);
369void	lwp_exit(lwp_t *);
370int	lwp_suspend(lwp_t *, lwp_t *);
371int	lwp_create1(lwp_t *, const void *, size_t, u_long, lwpid_t *);
372void	lwp_start(lwp_t *, int);
373void	lwp_update_creds(lwp_t *);
374void	lwp_migrate(lwp_t *, struct cpu_info *);
375lwp_t *	lwp_find2(pid_t, lwpid_t);
376lwp_t *	lwp_find(proc_t *, int);
377void	lwp_userret(lwp_t *);
378void	lwp_need_userret(lwp_t *);
379void	lwp_free(lwp_t *, bool, bool);
380uint64_t lwp_pctr(void);
381int	lwp_setprivate(lwp_t *, void *);
382int	do_lwp_create(lwp_t *, void *, u_long, lwp_t **, const sigset_t *,
383    const stack_t *);
384
385void	lwp_thread_cleanup(lwp_t *);
386
387void	lwpinit_specificdata(void);
388int	lwp_specific_key_create(specificdata_key_t *, specificdata_dtor_t);
389void	lwp_specific_key_delete(specificdata_key_t);
390void	lwp_initspecific(lwp_t *);
391void	lwp_finispecific(lwp_t *);
392void	*lwp_getspecific(specificdata_key_t);
393#if defined(_LWP_API_PRIVATE)
394void	*_lwp_getspecific_by_lwp(lwp_t *, specificdata_key_t);
395#endif
396void	lwp_setspecific(specificdata_key_t, void *);
397void	lwp_setspecific_by_lwp(lwp_t *, specificdata_key_t, void *);
398
399/* Syscalls. */
400int	lwp_park(clockid_t, int, struct timespec *);
401int	lwp_unpark(const lwpid_t *, const u_int);
402
403/* DDB. */
404void	lwp_whatis(uintptr_t, void (*)(const char *, ...) __printflike(1, 2));
405
406/*
407 * Lock an LWP. XXX _MODULE
408 */
409static __inline void
410lwp_lock(lwp_t *l)
411{
412	kmutex_t *old = atomic_load_consume(&l->l_mutex);
413
414	/*
415	 * Note: mutex_spin_enter() will have posted a read barrier.
416	 * Re-test l->l_mutex.  If it has changed, we need to try again.
417	 */
418	mutex_spin_enter(old);
419	while (__predict_false(atomic_load_relaxed(&l->l_mutex) != old)) {
420		mutex_spin_exit(old);
421		old = atomic_load_consume(&l->l_mutex);
422		mutex_spin_enter(old);
423	}
424}
425
426/*
427 * Unlock an LWP. XXX _MODULE
428 */
429static __inline void
430lwp_unlock(lwp_t *l)
431{
432	mutex_spin_exit(l->l_mutex);
433}
434
435static __inline void
436lwp_changepri(lwp_t *l, pri_t pri)
437{
438	KASSERT(mutex_owned(l->l_mutex));
439
440	if (l->l_priority == pri)
441		return;
442
443	(*l->l_syncobj->sobj_changepri)(l, pri);
444	KASSERT(l->l_priority == pri);
445}
446
447static __inline void
448lwp_lendpri(lwp_t *l, pri_t pri)
449{
450	KASSERT(mutex_owned(l->l_mutex));
451
452	(*l->l_syncobj->sobj_lendpri)(l, pri);
453	KASSERT(l->l_inheritedprio == pri);
454}
455
456static __inline pri_t
457lwp_eprio(lwp_t *l)
458{
459	pri_t pri;
460
461	pri = l->l_priority;
462	if ((l->l_flag & LW_SYSTEM) == 0 && l->l_kpriority && pri < PRI_KERNEL)
463		pri = (pri >> 1) + l->l_kpribase;
464	return MAX(l->l_auxprio, pri);
465}
466
467int lwp_create(lwp_t *, struct proc *, vaddr_t, int, void *, size_t,
468    void (*)(void *), void *, lwp_t **, int, const sigset_t *, const stack_t *);
469
470/*
471 * XXX _MODULE
472 * We should provide real stubs for the below that modules can use.
473 */
474
475static __inline void
476spc_lock(struct cpu_info *ci)
477{
478	mutex_spin_enter(ci->ci_schedstate.spc_mutex);
479}
480
481static __inline void
482spc_unlock(struct cpu_info *ci)
483{
484	mutex_spin_exit(ci->ci_schedstate.spc_mutex);
485}
486
487static __inline void
488spc_dlock(struct cpu_info *ci1, struct cpu_info *ci2)
489{
490	struct schedstate_percpu *spc1 = &ci1->ci_schedstate;
491	struct schedstate_percpu *spc2 = &ci2->ci_schedstate;
492
493	KASSERT(ci1 != ci2);
494	if (ci1 < ci2) {
495		mutex_spin_enter(spc1->spc_mutex);
496		mutex_spin_enter(spc2->spc_mutex);
497	} else {
498		mutex_spin_enter(spc2->spc_mutex);
499		mutex_spin_enter(spc1->spc_mutex);
500	}
501}
502
503/*
504 * Allow machine-dependent code to override curlwp in <machine/cpu.h> for
505 * its own convenience.  Otherwise, we declare it as appropriate.
506 */
507#if !defined(curlwp)
508#if defined(MULTIPROCESSOR)
509#define	curlwp		curcpu()->ci_curlwp	/* Current running LWP */
510#else
511extern struct lwp	*curlwp;		/* Current running LWP */
512#endif /* MULTIPROCESSOR */
513#endif /* ! curlwp */
514#define	curproc		(curlwp->l_proc)
515
516/*
517 * This provides a way for <machine/cpu.h> to get l_cpu for curlwp before
518 * struct lwp is defined.
519 */
520static __inline struct cpu_info *
521lwp_getcpu(struct lwp *l)
522{
523	return l->l_cpu;
524}
525
526static __inline bool
527CURCPU_IDLE_P(void)
528{
529	struct cpu_info *ci = curcpu();
530	return ci->ci_onproc == ci->ci_data.cpu_idlelwp;
531}
532
533/*
534 * Disable and re-enable preemption.  Only for low-level kernel
535 * use.  Device drivers and anything that could potentially be
536 * compiled as a module should use kpreempt_disable() and
537 * kpreempt_enable().
538 */
539static __inline void
540KPREEMPT_DISABLE(lwp_t *l)
541{
542
543	KASSERT(l == curlwp);
544	l->l_nopreempt++;
545	__insn_barrier();
546}
547
548static __inline void
549KPREEMPT_ENABLE(lwp_t *l)
550{
551
552	KASSERT(l == curlwp);
553	KASSERT(l->l_nopreempt > 0);
554	__insn_barrier();
555	if (--l->l_nopreempt != 0)
556		return;
557	__insn_barrier();
558	if (__predict_false(l->l_dopreempt))
559		kpreempt(0);
560	__insn_barrier();
561}
562
563/* For lwp::l_dopreempt */
564#define	DOPREEMPT_ACTIVE	0x01
565#define	DOPREEMPT_COUNTED	0x02
566
567/*
568 * Prevent curlwp from migrating between CPUs between curlwp_bind and
569 * curlwp_bindx. One use case is psref(9) that has a contract that
570 * forbids migrations.
571 */
572static __inline int
573curlwp_bind(void)
574{
575	int bound;
576
577	bound = curlwp->l_pflag & LP_BOUND;
578	curlwp->l_pflag |= LP_BOUND;
579	__insn_barrier();
580
581	return bound;
582}
583
584static __inline void
585curlwp_bindx(int bound)
586{
587
588	KASSERT(curlwp->l_pflag & LP_BOUND);
589	__insn_barrier();
590	curlwp->l_pflag ^= bound ^ LP_BOUND;
591}
592
593#endif /* _KERNEL */
594
595/* Flags for _lwp_create(), as per Solaris. */
596#define	LWP_DETACHED	0x00000040
597#define	LWP_SUSPENDED	0x00000080
598
599/* Kernel-internal flags for LWP creation. */
600	/*		0x40000000	was LWP_PIDLID */
601#define	LWP_VFORK	0x80000000
602
603#endif	/* !_SYS_LWP_H_ */