master
  1/*-
  2 * SPDX-License-Identifier: BSD-2-Clause
  3 *
  4 * Copyright (c) 2004 Poul-Henning Kamp
  5 * All rights reserved.
  6 *
  7 * Redistribution and use in source and binary forms, with or without
  8 * modification, are permitted provided that the following conditions
  9 * are met:
 10 * 1. Redistributions of source code must retain the above copyright
 11 *    notice, this list of conditions and the following disclaimer.
 12 * 2. Redistributions in binary form must reproduce the above copyright
 13 *    notice, this list of conditions and the following disclaimer in the
 14 *    documentation and/or other materials provided with the distribution.
 15 *
 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 26 * SUCH DAMAGE.
 27 */
 28
 29/*
 30 * Architectural notes:
 31 *
 32 * bufobj is a new object which is what buffers hang from in the buffer
 33 * cache.
 34 *
 35 * This used to be vnodes, but we need non-vnode code to be able
 36 * to use the buffer cache as well, specifically geom classes like gbde,
 37 * raid3 and raid5.
 38 *
 39 * All vnodes will contain a bufobj initially, but down the road we may
 40 * want to only allocate bufobjs when they are needed.  There could be a
 41 * large number of vnodes in the system which wouldn't need a bufobj during
 42 * their lifetime.
 43 *
 44 * The exact relationship to the vmobject is not determined at this point,
 45 * it may in fact be that we find them to be two sides of the same object 
 46 * once things starts to crystalize.
 47 */
 48
 49#ifndef _SYS_BUFOBJ_H_
 50#define _SYS_BUFOBJ_H_
 51
 52#if defined(_KERNEL) || defined(_KVM_VNODE)
 53
 54#include <sys/queue.h>
 55#include <sys/_lock.h>
 56#include <sys/_rwlock.h>
 57#include <sys/_pctrie.h>
 58
 59struct bufobj;
 60struct buf_ops;
 61
 62extern struct buf_ops buf_ops_bio;
 63
 64TAILQ_HEAD(buflists, buf);
 65
 66/* A Buffer list & trie */
 67struct bufv {
 68	struct buflists	bv_hd;		/* Sorted blocklist */
 69	struct pctrie	bv_root;	/* Buf trie */
 70	int		bv_cnt;		/* Number of buffers */
 71};
 72
 73typedef void b_strategy_t(struct bufobj *, struct buf *);
 74typedef int b_write_t(struct buf *);
 75typedef int b_sync_t(struct bufobj *, int waitfor);
 76typedef void b_bdflush_t(struct bufobj *, struct buf *);
 77
 78struct buf_ops {
 79	const char	*bop_name;
 80	b_write_t	*bop_write;
 81	b_strategy_t	*bop_strategy;
 82	b_sync_t	*bop_sync;
 83	b_bdflush_t	*bop_bdflush;
 84};
 85
 86#define BO_STRATEGY(bo, bp)	((bo)->bo_ops->bop_strategy((bo), (bp)))
 87#define BO_SYNC(bo, w)		((bo)->bo_ops->bop_sync((bo), (w)))
 88#define BO_WRITE(bo, bp)	((bo)->bo_ops->bop_write((bp)))
 89#define BO_BDFLUSH(bo, bp)	((bo)->bo_ops->bop_bdflush((bo), (bp)))
 90
 91/*
 92 * Locking notes:
 93 * 'S' is sync_mtx
 94 * 'v' is the vnode lock which embeds the bufobj.
 95 * '-' Constant and unchanging after initialization.
 96 */
 97struct bufobj {
 98	struct rwlock	bo_lock;	/* Lock which protects "i" things */
 99	struct buf_ops	*bo_ops;	/* - Buffer operations */
100	struct vm_object *bo_object;	/* v Place to store VM object */
101	LIST_ENTRY(bufobj) bo_synclist;	/* S dirty vnode list */
102	void		*bo_private;	/* private pointer */
103	struct bufv	bo_clean;	/* i Clean buffers */
104	struct bufv	bo_dirty;	/* i Dirty buffers */
105	int		bo_numoutput;	/* i Writes in progress */
106	u_int		bo_flag;	/* i Flags */
107	int		bo_domain;	/* - Clean queue affinity */
108	int		bo_bsize;	/* - Block size for i/o */
109};
110
111/*
112 * XXX BO_ONWORKLST could be replaced with a check for NULL list elements
113 * in v_synclist.
114 */
115#define	BO_ONWORKLST	(1 << 0)	/* On syncer work-list */
116#define	BO_WWAIT	(1 << 1)	/* Wait for output to complete */
117#define	BO_DEAD		(1 << 2)	/* Dead; only with INVARIANTS */
118#define	BO_NOBUFS	(1 << 3)	/* No bufs allowed */
119
120#define	BO_LOCKPTR(bo)		(&(bo)->bo_lock)
121#define	BO_LOCK(bo)		rw_wlock(BO_LOCKPTR((bo)))
122#define	BO_UNLOCK(bo)		rw_wunlock(BO_LOCKPTR((bo)))
123#define	BO_RLOCK(bo)		rw_rlock(BO_LOCKPTR((bo)))
124#define	BO_RUNLOCK(bo)		rw_runlock(BO_LOCKPTR((bo)))
125#define	ASSERT_BO_WLOCKED(bo)	rw_assert(BO_LOCKPTR((bo)), RA_WLOCKED)
126#define	ASSERT_BO_LOCKED(bo)	rw_assert(BO_LOCKPTR((bo)), RA_LOCKED)
127#define	ASSERT_BO_UNLOCKED(bo)	rw_assert(BO_LOCKPTR((bo)), RA_UNLOCKED)
128
129void bufobj_init(struct bufobj *bo, void *priv);
130void bufobj_wdrop(struct bufobj *bo);
131void bufobj_wref(struct bufobj *bo);
132void bufobj_wrefl(struct bufobj *bo);
133int bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo);
134int bufobj_wwait(struct bufobj *bo, int slpflag, int timeo);
135int bufsync(struct bufobj *bo, int waitfor);
136void bufbdflush(struct bufobj *bo, struct buf *bp);
137
138#endif /* defined(_KERNEL) || defined(_KVM_VNODE) */
139#endif /* _SYS_BUFOBJ_H_ */