1/*	$NetBSD: pool.h,v 1.96 2021/12/22 16:57:28 thorpej Exp $	*/
  2
  3/*-
  4 * Copyright (c) 1997, 1998, 1999, 2000, 2007, 2020
  5 *     The NetBSD Foundation, Inc.
  6 * All rights reserved.
  7 *
  8 * This code is derived from software contributed to The NetBSD Foundation
  9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
 10 * Simulation Facility, NASA Ames Research Center.
 11 *
 12 * Redistribution and use in source and binary forms, with or without
 13 * modification, are permitted provided that the following conditions
 14 * are met:
 15 * 1. Redistributions of source code must retain the above copyright
 16 *    notice, this list of conditions and the following disclaimer.
 17 * 2. Redistributions in binary form must reproduce the above copyright
 18 *    notice, this list of conditions and the following disclaimer in the
 19 *    documentation and/or other materials provided with the distribution.
 20 *
 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 24 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 31 * POSSIBILITY OF SUCH DAMAGE.
 32 */
 33
 34#ifndef _SYS_POOL_H_
 35#define _SYS_POOL_H_
 36
 37#include <sys/stdbool.h>
 38#include <sys/stdint.h>
 39
 40struct pool_sysctl {
 41	char pr_wchan[16];
 42	uint64_t pr_flags;
 43	uint64_t pr_size;
 44	uint64_t pr_pagesize;
 45	uint64_t pr_itemsperpage;
 46	uint64_t pr_nitems;
 47	uint64_t pr_nout;
 48	uint64_t pr_hardlimit;
 49	uint64_t pr_npages;
 50	uint64_t pr_minpages;
 51	uint64_t pr_maxpages;
 52
 53	uint64_t pr_nget;
 54	uint64_t pr_nfail;
 55	uint64_t pr_nput;
 56	uint64_t pr_npagealloc;
 57	uint64_t pr_npagefree;
 58	uint64_t pr_hiwat;
 59	uint64_t pr_nidle;
 60
 61	uint64_t pr_cache_meta_size;
 62	uint64_t pr_cache_nfull;
 63	uint64_t pr_cache_npartial;
 64	uint64_t pr_cache_nempty;
 65	uint64_t pr_cache_ncontended;
 66	uint64_t pr_cache_nmiss_global;
 67	uint64_t pr_cache_nhit_global;
 68	uint64_t pr_cache_nmiss_pcpu;
 69	uint64_t pr_cache_nhit_pcpu;
 70};
 71
 72#ifdef _KERNEL
 73#define __POOL_EXPOSE
 74#endif
 75
 76#ifdef __POOL_EXPOSE
 77#include <sys/param.h>
 78#include <sys/mutex.h>
 79#include <sys/condvar.h>
 80#include <sys/queue.h>
 81#include <sys/time.h>
 82#include <sys/tree.h>
 83#include <sys/callback.h>
 84
 85#ifdef _KERNEL_OPT
 86#include "opt_pool.h"
 87#endif
 88
 89#define POOL_PADDR_INVALID	((paddr_t) -1)
 90
 91struct pool;
 92
 93struct pool_allocator {
 94	void		*(*pa_alloc)(struct pool *, int);
 95	void		(*pa_free)(struct pool *, void *);
 96	unsigned int	pa_pagesz;
 97
 98	/* The following fields are for internal use only. */
 99	kmutex_t	pa_lock;
100	TAILQ_HEAD(, pool) pa_list;	/* list of pools using this allocator */
101	uint32_t	pa_refcnt;	/* number of pools using this allocator */
102	int		pa_pagemask;
103	int		pa_pageshift;
104};
105
106LIST_HEAD(pool_pagelist,pool_item_header);
107SPLAY_HEAD(phtree, pool_item_header);
108
109#define POOL_QUARANTINE_DEPTH	128
110typedef struct {
111	size_t rotor;
112	intptr_t list[POOL_QUARANTINE_DEPTH];
113} pool_quar_t;
114
115struct pool {
116	TAILQ_ENTRY(pool)
117			pr_poollist;
118	struct pool_pagelist
119			pr_emptypages;	/* Empty pages */
120	struct pool_pagelist
121			pr_fullpages;	/* Full pages */
122	struct pool_pagelist
123			pr_partpages;	/* Partially-allocated pages */
124	struct pool_item_header	*pr_curpage;
125	struct pool	*pr_phpool;	/* Pool item header pool */
126	struct pool_cache *pr_cache;	/* Cache for this pool */
127	unsigned int	pr_size;	/* Size of item */
128	unsigned int	pr_align;	/* Requested alignment, must be 2^n */
129	unsigned int	pr_itemoffset;	/* offset of the item space */
130	unsigned int	pr_minitems;	/* minimum # of free items to keep */
131	unsigned int	pr_maxitems;	/* maximum # of free items to keep */
132	unsigned int	pr_minpages;	/* minimum # of pages to keep */
133	unsigned int	pr_maxpages;	/* maximum # of pages to keep */
134	unsigned int	pr_npages;	/* # of pages allocated */
135	unsigned int	pr_itemsperpage;/* # items that fit in a page */
136	unsigned int	pr_poolid;	/* id of the pool */
137	unsigned int	pr_nitems;	/* number of free items in pool */
138	unsigned int	pr_nout;	/* # items currently allocated */
139	unsigned int	pr_hardlimit;	/* hard limit to number of allocated
140					   items */
141	unsigned int	pr_refcnt;	/* ref count for pagedaemon, etc */
142	struct pool_allocator *pr_alloc;/* back-end allocator */
143	TAILQ_ENTRY(pool) pr_alloc_list;/* link on allocator's pool list */
144
145	/* Drain hook. */
146	void		(*pr_drain_hook)(void *, int);
147	void		*pr_drain_hook_arg;
148
149	const char	*pr_wchan;	/* tsleep(9) identifier */
150	unsigned int	pr_flags;	/* r/w flags */
151	unsigned int	pr_roflags;	/* r/o flags */
152#define PR_WAITOK	0x01	/* Note: matches KM_SLEEP */
153#define PR_NOWAIT	0x02	/* Note: matches KM_NOSLEEP */
154#define PR_WANTED	0x04	/* waiting for free objects */
155#define PR_PHINPAGE	0x40	/* page header in page */
156#define PR_LIMITFAIL	0x100	/* even if waiting, fail if we hit limit */
157#define PR_RECURSIVE	0x200	/* pool contains pools, for vmstat(8) */
158#define PR_NOTOUCH	0x400	/* don't use free items to keep internal state*/
159#define PR_NOALIGN	0x800	/* don't assume backend alignment */
160#define PR_LARGECACHE	0x1000	/* use large cache groups */
161#define PR_GROWING	0x2000	/* pool_grow in progress */
162#define PR_GROWINGNOWAIT 0x4000	/* pool_grow in progress by PR_NOWAIT alloc */
163#define PR_ZERO		0x8000	/* zero data before returning */
164#define PR_USEBMAP	0x10000	/* use a bitmap to manage freed items */
165#define PR_PSERIALIZE	0x20000	/* needs pserialize sync point before free */
166
167	/*
168	 * `pr_lock' protects the pool's data structures when removing
169	 * items from or returning items to the pool, or when reading
170	 * or updating read/write fields in the pool descriptor.
171	 *
172	 * We assume back-end page allocators provide their own locking
173	 * scheme.  They will be called with the pool descriptor _unlocked_,
174	 * since the page allocators may block.
175	 */
176	kmutex_t	pr_lock;
177	kcondvar_t	pr_cv;
178	int		pr_ipl;
179
180	struct phtree	pr_phtree;
181
182	int		pr_maxcolor;	/* Cache colouring */
183	int		pr_curcolor;
184	int		pr_phoffset;	/* unused */
185
186	/*
187	 * Warning message to be issued, and a per-time-delta rate cap,
188	 * if the hard limit is reached.
189	 */
190	const char	*pr_hardlimit_warning;
191	struct timeval	pr_hardlimit_ratecap;
192	struct timeval	pr_hardlimit_warning_last;
193
194	/*
195	 * Instrumentation
196	 */
197	unsigned long	pr_nget;	/* # of successful requests */
198	unsigned long	pr_nfail;	/* # of unsuccessful requests */
199	unsigned long	pr_nput;	/* # of releases */
200	unsigned long	pr_npagealloc;	/* # of pages allocated */
201	unsigned long	pr_npagefree;	/* # of pages released */
202	unsigned int	pr_hiwat;	/* max # of pages in pool */
203	unsigned long	pr_nidle;	/* # of idle pages */
204
205	/*
206	 * Diagnostic aides.
207	 */
208	void		*pr_freecheck;
209	void		*pr_qcache;
210	bool		pr_redzone;
211	size_t		pr_reqsize;
212	size_t		pr_reqsize_with_redzone;
213#ifdef POOL_QUARANTINE
214	pool_quar_t	pr_quar;
215#endif
216};
217
218/*
219 * Cache group sizes, assuming 4-byte paddr_t on !_LP64.
220 * All groups will be aligned to COHERENCY_UNIT.
221 */
222#ifdef _LP64
223#define PCG_NOBJECTS_NORMAL	15	/* 256 byte group */
224#define PCG_NOBJECTS_LARGE	63	/* 1024 byte group */
225#else
226#define PCG_NOBJECTS_NORMAL	14	/* 124 byte group */
227#define PCG_NOBJECTS_LARGE	62	/* 508 byte group */
228#endif
229
230typedef struct pcgpair {
231	void	*pcgo_va;		/* object virtual address */
232	paddr_t	pcgo_pa;		/* object physical address */
233} pcgpair_t;
234
235/* The pool cache group. */
236typedef struct pool_cache_group {
237	struct pool_cache_group	*pcg_next;	/* link to next group */
238	u_int			pcg_avail;	/* # available objects */
239	u_int			pcg_size;	/* max number objects */
240	pcgpair_t 		pcg_objects[1];	/* the objects */
241} pcg_t;
242
243/* Pool cache CPU.  Sized to 64 bytes on _LP64. */
244typedef struct pool_cache_cpu {
245	struct pool_cache_group	*cc_current;
246	struct pool_cache_group	*cc_previous;	
247	pcg_t *volatile 	*cc_pcgcache;
248	uint64_t		cc_misses;
249	uint64_t		cc_hits;
250	uint64_t		cc_pcmisses;
251	uint64_t		cc_contended;
252	uint32_t		cc_nfull;
253	uint32_t		cc_npart;
254} pool_cache_cpu_t;
255
256struct pool_cache {
257	/* Pool layer. */
258	struct pool	pc_pool;
259	
260	/* Cache layer. */
261	TAILQ_ENTRY(pool_cache)
262			pc_cachelist;	/* entry on global cache list */
263	struct pool	*pc_pcgpool;	/* Pool of cache groups */
264	pcg_t *volatile *pc_pcgcache;	/* list of empty cache groups */
265	int		pc_pcgsize;	/* Use large cache groups? */
266	int		pc_ncpu;	/* number cpus set up */
267	int		(*pc_ctor)(void *, void *, int);
268	void		(*pc_dtor)(void *, void *);
269	void		*pc_arg;	/* for ctor/dtor */
270	unsigned int	pc_refcnt;	/* ref count for pagedaemon, etc */
271	unsigned int	pc_roflags;	/* r/o cache flags */
272	void		*pc_cpus[MAXCPUS];
273
274	/* Diagnostic aides. */
275	void		*pc_freecheck;
276	bool		pc_redzone;
277	size_t		pc_reqsize;
278
279	/* Hot items. */
280	pcg_t *volatile pc_fullgroups	/* list of full cache groups */
281	    __aligned(CACHE_LINE_SIZE);
282	pcg_t *volatile pc_partgroups;	/* groups for reclamation */
283
284	/* Boot cpu. */
285	pool_cache_cpu_t pc_cpu0 __aligned(CACHE_LINE_SIZE);
286};
287
288#endif /* __POOL_EXPOSE */
289
290typedef struct pool_cache *pool_cache_t;
291
292#ifdef _KERNEL
293/*
294 * pool_allocator_kmem is the default that all pools get unless
295 * otherwise specified.  pool_allocator_nointr is provided for
296 * pools that know they will never be accessed in interrupt
297 * context.
298 */
299extern struct pool_allocator pool_allocator_kmem;
300extern struct pool_allocator pool_allocator_nointr;
301extern struct pool_allocator pool_allocator_meta;
302
303void		pool_subsystem_init(void);
304
305void		pool_init(struct pool *, size_t, u_int, u_int,
306		    int, const char *, struct pool_allocator *, int);
307void		pool_destroy(struct pool *);
308
309void		pool_set_drain_hook(struct pool *,
310		    void (*)(void *, int), void *);
311
312void		*pool_get(struct pool *, int);
313void		pool_put(struct pool *, void *);
314int		pool_reclaim(struct pool *);
315
316void		pool_prime(struct pool *, int);
317void		pool_setlowat(struct pool *, int);
318void		pool_sethiwat(struct pool *, int);
319void		pool_sethardlimit(struct pool *, int, const char *, int);
320bool		pool_drain(struct pool **);
321int		pool_totalpages(void);
322int		pool_totalpages_locked(void);
323
324unsigned int	pool_nget(struct pool *);
325unsigned int	pool_nput(struct pool *);
326
327/*
328 * Debugging and diagnostic aides.
329 */
330void		pool_printit(struct pool *, const char *,
331    void (*)(const char *, ...) __printflike(1, 2));
332void		pool_printall(const char *, void (*)(const char *, ...)
333    __printflike(1, 2));
334int		pool_chk(struct pool *, const char *);
335
336/*
337 * Pool cache routines.
338 */
339pool_cache_t	pool_cache_init(size_t, u_int, u_int, u_int, const char *,
340		    struct pool_allocator *, int, int (*)(void *, void *, int),
341		    void (*)(void *, void *), void *);
342void		pool_cache_bootstrap(pool_cache_t, size_t, u_int, u_int, u_int,
343		    const char *, struct pool_allocator *, int,
344		    int (*)(void *, void *, int), void (*)(void *, void *),
345		    void *);
346void		pool_cache_destroy(pool_cache_t);
347void		pool_cache_bootstrap_destroy(pool_cache_t);
348void		*pool_cache_get_paddr(pool_cache_t, int, paddr_t *);
349void		pool_cache_put_paddr(pool_cache_t, void *, paddr_t);
350void		pool_cache_destruct_object(pool_cache_t, void *);
351void		pool_cache_invalidate(pool_cache_t);
352bool		pool_cache_reclaim(pool_cache_t);
353void		pool_cache_set_drain_hook(pool_cache_t,
354		    void (*)(void *, int), void *);
355void		pool_cache_setlowat(pool_cache_t, int);
356void		pool_cache_sethiwat(pool_cache_t, int);
357void		pool_cache_sethardlimit(pool_cache_t, int, const char *, int);
358void		pool_cache_prime(pool_cache_t, int);
359void		pool_cache_cpu_init(struct cpu_info *);
360
361unsigned int	pool_cache_nget(pool_cache_t);
362unsigned int	pool_cache_nput(pool_cache_t);
363
364#define		pool_cache_get(pc, f) pool_cache_get_paddr((pc), (f), NULL)
365#define		pool_cache_put(pc, o) pool_cache_put_paddr((pc), (o), \
366				          POOL_PADDR_INVALID)
367
368void		pool_whatis(uintptr_t, void (*)(const char *, ...)
369    __printflike(1, 2));
370#endif /* _KERNEL */
371
372#endif /* _SYS_POOL_H_ */