1/*-
  2 * SPDX-License-Identifier: BSD-2-Clause
  3 *
  4 * Copyright (c) 2019, 2020 Jeffrey Roberson <jeff@FreeBSD.org>
  5 *
  6 * Redistribution and use in source and binary forms, with or without
  7 * modification, are permitted provided that the following conditions
  8 * are met:
  9 * 1. Redistributions of source code must retain the above copyright
 10 *    notice unmodified, this list of conditions, and the following
 11 *    disclaimer.
 12 * 2. Redistributions in binary form must reproduce the above copyright
 13 *    notice, this list of conditions and the following disclaimer in the
 14 *    documentation and/or other materials provided with the distribution.
 15 *
 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 *
 27 */
 28
 29#ifndef _SYS_SMR_H_
 30#define	_SYS_SMR_H_
 31
 32#include <sys/_smr.h>
 33
 34/*
 35 * Safe memory reclamation.  See subr_smr.c for a description of the
 36 * algorithm, and smr_types.h for macros to define and access SMR-protected
 37 * data structures.
 38 *
 39 * Readers synchronize with smr_enter()/exit() and writers may either
 40 * free directly to a SMR UMA zone or use smr_synchronize or wait.
 41 */
 42
 43/*
 44 * Modular arithmetic for comparing sequence numbers that have
 45 * potentially wrapped.  Copied from tcp_seq.h.
 46 */
 47#define	SMR_SEQ_LT(a, b)	((smr_delta_t)((a)-(b)) < 0)
 48#define	SMR_SEQ_LEQ(a, b)	((smr_delta_t)((a)-(b)) <= 0)
 49#define	SMR_SEQ_GT(a, b)	((smr_delta_t)((a)-(b)) > 0)
 50#define	SMR_SEQ_GEQ(a, b)	((smr_delta_t)((a)-(b)) >= 0)
 51#define	SMR_SEQ_DELTA(a, b)	((smr_delta_t)((a)-(b)))
 52#define	SMR_SEQ_MIN(a, b)	(SMR_SEQ_LT((a), (b)) ? (a) : (b))
 53#define	SMR_SEQ_MAX(a, b)	(SMR_SEQ_GT((a), (b)) ? (a) : (b))
 54
 55#define	SMR_SEQ_INVALID		0
 56
 57/* Shared SMR state. */
 58union s_wr {
 59	struct {
 60		smr_seq_t	seq;	/* Current write sequence #. */
 61		int		ticks;	/* tick of last update (LAZY) */
 62	};
 63	uint64_t	_pair;
 64};
 65struct smr_shared {
 66	const char	*s_name;	/* Name for debugging/reporting. */
 67	union s_wr	s_wr;		/* Write sequence */
 68	smr_seq_t	s_rd_seq;	/* Minimum observed read sequence. */
 69};
 70typedef struct smr_shared *smr_shared_t;
 71
 72/* Per-cpu SMR state. */
 73struct smr {
 74	smr_seq_t	c_seq;		/* Current observed sequence. */
 75	smr_shared_t	c_shared;	/* Shared SMR state. */
 76	int		c_deferred;	/* Deferred advance counter. */
 77	int		c_limit;	/* Deferred advance limit. */
 78	int		c_flags;	/* SMR Configuration */
 79};
 80
 81#define	SMR_LAZY	0x0001		/* Higher latency write, fast read. */
 82#define	SMR_DEFERRED	0x0002		/* Aggregate updates to wr_seq. */
 83
 84/*
 85 * Return the current write sequence number.  This is not the same as the
 86 * current goal which may be in the future.
 87 */
 88static inline smr_seq_t
 89smr_shared_current(smr_shared_t s)
 90{
 91
 92	return (atomic_load_int(&s->s_wr.seq));
 93}
 94
 95static inline smr_seq_t
 96smr_current(smr_t smr)
 97{
 98
 99	return (smr_shared_current(zpcpu_get(smr)->c_shared));
100}
101
102/*
103 * Enter a read section.
104 */
105static inline void
106smr_enter(smr_t smr)
107{
108
109	critical_enter();
110	smr = zpcpu_get(smr);
111	KASSERT((smr->c_flags & SMR_LAZY) == 0,
112	    ("smr_enter(%s) lazy smr.", smr->c_shared->s_name));
113	KASSERT(smr->c_seq == 0,
114	    ("smr_enter(%s) does not support recursion.",
115	    smr->c_shared->s_name));
116
117	/*
118	 * Store the current observed write sequence number in our
119	 * per-cpu state so that it can be queried via smr_poll().
120	 * Frees that are newer than this stored value will be
121	 * deferred until we call smr_exit().
122	 *
123	 * Subsequent loads must not be re-ordered with the store.  On
124	 * x86 platforms, any locked instruction will provide this
125	 * guarantee, so as an optimization we use a single operation to
126	 * both store the cached write sequence number and provide the
127	 * requisite barrier, taking advantage of the fact that
128	 * SMR_SEQ_INVALID is zero.
129	 *
130	 * It is possible that a long delay between loading the wr_seq
131	 * and storing the c_seq could create a situation where the
132	 * rd_seq advances beyond our stored c_seq.  In this situation
133	 * only the observed wr_seq is stale, the fence still orders
134	 * the load.  See smr_poll() for details on how this condition
135	 * is detected and handled there.
136	 */
137#if defined(__amd64__) || defined(__i386__)
138	atomic_add_acq_int(&smr->c_seq, smr_shared_current(smr->c_shared));
139#else
140	atomic_store_int(&smr->c_seq, smr_shared_current(smr->c_shared));
141	atomic_thread_fence_seq_cst();
142#endif
143}
144
145/*
146 * Exit a read section.
147 */
148static inline void
149smr_exit(smr_t smr)
150{
151
152	smr = zpcpu_get(smr);
153	CRITICAL_ASSERT(curthread);
154	KASSERT((smr->c_flags & SMR_LAZY) == 0,
155	    ("smr_exit(%s) lazy smr.", smr->c_shared->s_name));
156	KASSERT(smr->c_seq != SMR_SEQ_INVALID,
157	    ("smr_exit(%s) not in a smr section.", smr->c_shared->s_name));
158
159	/*
160	 * Clear the recorded sequence number.  This allows poll() to
161	 * detect CPUs not in read sections.
162	 *
163	 * Use release semantics to retire any stores before the sequence
164	 * number is cleared.
165	 */
166	atomic_store_rel_int(&smr->c_seq, SMR_SEQ_INVALID);
167	critical_exit();
168}
169
170/*
171 * Enter a lazy smr section.  This is used for read-mostly state that
172 * can tolerate a high free latency.
173 */
174static inline void
175smr_lazy_enter(smr_t smr)
176{
177
178	critical_enter();
179	smr = zpcpu_get(smr);
180	KASSERT((smr->c_flags & SMR_LAZY) != 0,
181	    ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
182	KASSERT(smr->c_seq == 0,
183	    ("smr_lazy_enter(%s) does not support recursion.",
184	    smr->c_shared->s_name));
185
186	/*
187	 * This needs no serialization.  If an interrupt occurs before we
188	 * assign sr_seq to c_seq any speculative loads will be discarded.
189	 * If we assign a stale wr_seq value due to interrupt we use the
190	 * same algorithm that renders smr_enter() safe.
191	 */
192	atomic_store_int(&smr->c_seq, smr_shared_current(smr->c_shared));
193}
194
195/*
196 * Exit a lazy smr section.  This is used for read-mostly state that
197 * can tolerate a high free latency.
198 */
199static inline void
200smr_lazy_exit(smr_t smr)
201{
202
203	smr = zpcpu_get(smr);
204	CRITICAL_ASSERT(curthread);
205	KASSERT((smr->c_flags & SMR_LAZY) != 0,
206	    ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
207	KASSERT(smr->c_seq != SMR_SEQ_INVALID,
208	    ("smr_lazy_exit(%s) not in a smr section.", smr->c_shared->s_name));
209
210	/*
211	 * All loads/stores must be retired before the sequence becomes
212	 * visible.  The fence compiles away on amd64.  Another
213	 * alternative would be to omit the fence but store the exit
214	 * time and wait 1 tick longer.
215	 */
216	atomic_thread_fence_rel();
217	atomic_store_int(&smr->c_seq, SMR_SEQ_INVALID);
218	critical_exit();
219}
220
221/*
222 * Advances the write sequence number.  Returns the sequence number
223 * required to ensure that all modifications are visible to readers.
224 */
225smr_seq_t smr_advance(smr_t smr);
226
227/*
228 * Returns true if a goal sequence has been reached.  If
229 * wait is true this will busy loop until success.
230 */
231bool smr_poll(smr_t smr, smr_seq_t goal, bool wait);
232
233/* Create a new SMR context. */
234smr_t smr_create(const char *name, int limit, int flags);
235
236/* Destroy the context. */
237void smr_destroy(smr_t smr);
238
239/*
240 * Blocking wait for all readers to observe 'goal'.
241 */
242static inline void
243smr_wait(smr_t smr, smr_seq_t goal)
244{
245
246	(void)smr_poll(smr, goal, true);
247}
248
249/*
250 * Synchronize advances the write sequence and returns when all
251 * readers have observed it. 
252 *
253 * If your application can cache a sequence number returned from
254 * smr_advance() and poll or wait at a later time there will
255 * be less chance of busy looping while waiting for readers.
256 */
257static inline void
258smr_synchronize(smr_t smr)
259{
260
261        smr_wait(smr, smr_advance(smr));
262}
263
264/* Only at startup. */
265void smr_init(void);
266
267#endif	/* _SYS_SMR_H_ */