master
  1/*-
  2 * SPDX-License-Identifier: BSD-3-Clause
  3 *
  4 * Copyright (c) 1982, 1986, 1990, 1993
  5 *	The Regents of the University of California.  All rights reserved.
  6 *
  7 * Redistribution and use in source and binary forms, with or without
  8 * modification, are permitted provided that the following conditions
  9 * are met:
 10 * 1. Redistributions of source code must retain the above copyright
 11 *    notice, this list of conditions and the following disclaimer.
 12 * 2. Redistributions in binary form must reproduce the above copyright
 13 *    notice, this list of conditions and the following disclaimer in the
 14 *    documentation and/or other materials provided with the distribution.
 15 * 3. Neither the name of the University nor the names of its contributors
 16 *    may be used to endorse or promote products derived from this software
 17 *    without specific prior written permission.
 18 *
 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 29 * SUCH DAMAGE.
 30 *
 31 *	@(#)socketvar.h	8.3 (Berkeley) 2/19/95
 32 */
 33#ifndef _SYS_SOCKBUF_H_
 34#define _SYS_SOCKBUF_H_
 35
 36/*
 37 * Constants for sb_flags field of struct sockbuf/xsockbuf.
 38 */
 39#define	SB_TLS_RX	0x01		/* using KTLS on RX */
 40#define	SB_TLS_RX_RUNNING 0x02		/* KTLS RX operation running */
 41#define	SB_WAIT		0x04		/* someone is waiting for data/space */
 42#define	SB_SEL		0x08		/* someone is selecting */
 43#define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
 44#define	SB_UPCALL	0x20		/* someone wants an upcall */
 45#define	SB_NOINTR	0x40		/* operations not interruptible */
 46#define	SB_AIO		0x80		/* AIO operations queued */
 47#define	SB_KNOTE	0x100		/* kernel note attached */
 48#define	SB_NOCOALESCE	0x200		/* don't coalesce new data into existing mbufs */
 49#define	SB_IN_TOE	0x400		/* socket buffer is in the middle of an operation */
 50#define	SB_AUTOSIZE	0x800		/* automatically size socket buffer */
 51#define	SB_STOP		0x1000		/* backpressure indicator */
 52#define	SB_AIO_RUNNING	0x2000		/* AIO operation running */
 53#define	SB_SPLICED	0x4000		/* socket buffer is spliced;
 54					   previously used for SB_TLS_IFNET */
 55#define	SB_TLS_RX_RESYNC 0x8000		/* KTLS RX lost HW sync */
 56
 57#define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
 58#define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
 59#define	SBS_RCVATMARK		0x0040	/* at mark on input */
 60
 61#if defined(_KERNEL) || defined(_WANT_SOCKET)
 62#include <sys/_lock.h>
 63#include <sys/_mutex.h>
 64#include <sys/_sx.h>
 65#include <sys/_task.h>
 66
 67#define	SB_MAX		(2*1024*1024)	/* default for max chars in sockbuf */
 68
 69struct ktls_session;
 70struct mbuf;
 71struct sockaddr;
 72struct socket;
 73struct sockopt;
 74struct thread;
 75struct selinfo;
 76
 77/*
 78 * Socket buffer
 79 *
 80 * A buffer starts with the fields that are accessed by I/O multiplexing
 81 * APIs like select(2), kevent(2) or AIO and thus are shared between different
 82 * buffer implementations.  They are protected by the SOCK_RECVBUF_LOCK()
 83 * or SOCK_SENDBUF_LOCK() of the owning socket.
 84 *
 85 * XXX: sb_acc, sb_ccc and sb_mbcnt shall become implementation specific
 86 * methods.
 87 *
 88 * Protocol specific implementations follow in a union.
 89 */
 90struct sockbuf {
 91	struct	selinfo *sb_sel;	/* process selecting read/write */
 92	short	sb_state;		/* socket state on sockbuf */
 93	short	sb_flags;		/* flags, see above */
 94	u_int	sb_acc;			/* available chars in buffer */
 95	u_int	sb_ccc;			/* claimed chars in buffer */
 96	u_int	sb_mbcnt;		/* chars of mbufs used */
 97	u_int	sb_ctl;			/* non-data chars in buffer */
 98	u_int	sb_hiwat;		/* max actual char count */
 99	u_int	sb_lowat;		/* low water mark */
100	u_int	sb_mbmax;		/* max chars of mbufs to use */
101	sbintime_t sb_timeo;		/* timeout for read/write */
102	int	(*sb_upcall)(struct socket *, void *, int);
103	void	*sb_upcallarg;
104	TAILQ_HEAD(, kaiocb) sb_aiojobq;	/* pending AIO ops */
105	struct	task sb_aiotask;		/* AIO task */
106	union {
107		/*
108		 * Classic BSD one-size-fits-all socket buffer, capable of
109		 * doing streams and datagrams. The stream part is able
110		 * to perform special features:
111		 * - not ready data (sendfile)
112		 * - TLS
113		 */
114		struct {
115			/* compat: sockbuf lock pointer */
116			struct	mtx *sb_mtx;
117			/* first and last mbufs in the chain */
118			struct	mbuf *sb_mb;
119			struct	mbuf *sb_mbtail;
120			/* first mbuf of last record in socket buffer */
121			struct	mbuf *sb_lastrecord;
122			/* pointer to data to send next (TCP */
123			struct	mbuf *sb_sndptr;
124			/* pointer to first not ready buffer */
125			struct	mbuf *sb_fnrdy;
126			/* byte offset of ptr into chain, used with sb_sndptr */
127			u_int	sb_sndptroff;
128			/* TLS */
129			u_int	sb_tlscc;	/* TLS chain characters */
130			u_int	sb_tlsdcc;	/* characters being decrypted */
131			struct	mbuf *sb_mtls;	/*  TLS mbuf chain */
132			struct	mbuf *sb_mtlstail; /* last mbuf in TLS chain */
133			uint64_t sb_tls_seqno;	/* TLS seqno */
134			/* TLS state, locked by sockbuf and sock I/O mutexes. */
135			struct	ktls_session *sb_tls_info;
136		};
137		/*
138		 * PF_UNIX/SOCK_DGRAM
139		 *
140		 * Local protocol, thus we should buffer on the receive side
141		 * only.  However, in one to many configuration we don't want
142		 * a single receive buffer to be shared.  So we would link
143		 * send buffers onto receive buffer.  All the fields are locked
144		 * by the receive buffer lock.
145		 */
146		struct {
147			/*
148			 * For receive buffer: own queue of this buffer for
149			 * unconnected sends.  For send buffer: queue lended
150			 * to the peer receive buffer, to isolate ourselves
151			 * from other senders.
152			 */
153			STAILQ_HEAD(, mbuf)	uxdg_mb;
154			/* For receive buffer: datagram seen via MSG_PEEK. */
155			struct mbuf		*uxdg_peeked;
156			/*
157			 * For receive buffer: queue of send buffers of
158			 * connected peers.  For send buffer: linkage on
159			 * connected peer receive buffer queue.
160			 */
161			union {
162				TAILQ_HEAD(, sockbuf)	uxdg_conns;
163				TAILQ_ENTRY(sockbuf)	uxdg_clist;
164			};
165			/* Counters for this buffer uxdg_mb chain + peeked. */
166			u_int uxdg_cc;
167			u_int uxdg_ctl;
168			u_int uxdg_mbcnt;
169		};
170	};
171};
172
173#endif	/* defined(_KERNEL) || defined(_WANT_SOCKET) */
174#ifdef _KERNEL
175
176/* 'which' values for KPIs that operate on one buffer of a socket. */
177typedef enum { SO_RCV, SO_SND } sb_which;
178
179/*
180 * Per-socket buffer mutex used to protect most fields in the socket buffer.
181 * These make use of the mutex pointer embedded in struct sockbuf, which
182 * currently just references mutexes in the containing socket.  The
183 * SOCK_SENDBUF_LOCK() etc. macros can be used instead of or in combination with
184 * these locking macros.
185 */
186#define	SOCKBUF_MTX(_sb)		((_sb)->sb_mtx)
187#define	SOCKBUF_LOCK(_sb)		mtx_lock(SOCKBUF_MTX(_sb))
188#define	SOCKBUF_OWNED(_sb)		mtx_owned(SOCKBUF_MTX(_sb))
189#define	SOCKBUF_UNLOCK(_sb)		mtx_unlock(SOCKBUF_MTX(_sb))
190#define	SOCKBUF_LOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
191#define	SOCKBUF_UNLOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
192
193/*
194 * Socket buffer private mbuf(9) flags.
195 */
196#define	M_NOTREADY	M_PROTO1	/* m_data not populated yet */
197#define	M_BLOCKED	M_PROTO2	/* M_NOTREADY in front of m */
198#define	M_NOTAVAIL	(M_NOTREADY | M_BLOCKED)
199
200void	sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
201void	sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
202void	sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
203void	sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
204int	sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
205	    struct mbuf *m0, struct mbuf *control);
206int	sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
207	    struct mbuf *m0, struct mbuf *control);
208int	sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
209	    const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
210void	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
211	    struct mbuf *control, int flags);
212void	sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
213	    struct mbuf *control, int flags);
214void	sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
215void	sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
216void	sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
217struct mbuf *
218	sbcreatecontrol(const void *p, u_int size, int type, int level,
219	    int wait);
220void	sbdestroy(struct socket *, sb_which);
221void	sbdrop(struct sockbuf *sb, int len);
222void	sbdrop_locked(struct sockbuf *sb, int len);
223struct mbuf *
224	sbcut_locked(struct sockbuf *sb, int len);
225void	sbdroprecord(struct sockbuf *sb);
226void	sbdroprecord_locked(struct sockbuf *sb);
227void	sbflush(struct sockbuf *sb);
228void	sbflush_locked(struct sockbuf *sb);
229void	sbrelease(struct socket *, sb_which);
230void	sbrelease_locked(struct socket *, sb_which);
231int	sbsetopt(struct socket *so, struct sockopt *);
232bool	sbreserve_locked(struct socket *so, sb_which which, u_long cc,
233	    struct thread *td);
234bool	sbreserve_locked_limit(struct socket *so, sb_which which, u_long cc,
235	    u_long buf_max, struct thread *td);
236void	sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len);
237struct mbuf *
238	sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff);
239struct mbuf *
240	sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
241int	sbwait(struct socket *, sb_which);
242void	sballoc(struct sockbuf *, struct mbuf *);
243void	sbfree(struct sockbuf *, struct mbuf *);
244void	sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
245void	sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m);
246int	sbready(struct sockbuf *, struct mbuf *, int);
247
248/*
249 * Return how much data is available to be taken out of socket
250 * buffer right now.
251 */
252static inline u_int
253sbavail(struct sockbuf *sb)
254{
255
256#if 0
257	SOCKBUF_LOCK_ASSERT(sb);
258#endif
259	return (sb->sb_acc);
260}
261
262/*
263 * Return how much data sits there in the socket buffer
264 * It might be that some data is not yet ready to be read.
265 */
266static inline u_int
267sbused(struct sockbuf *sb)
268{
269
270#if 0
271	SOCKBUF_LOCK_ASSERT(sb);
272#endif
273	return (sb->sb_ccc);
274}
275
276/*
277 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
278 * This is problematical if the fields are unsigned, as the space might
279 * still be negative (ccc > hiwat or mbcnt > mbmax).
280 */
281static inline long
282sbspace(struct sockbuf *sb)
283{
284	int bleft, mleft;		/* size should match sockbuf fields */
285
286#if 0
287	SOCKBUF_LOCK_ASSERT(sb);
288#endif
289
290	if (sb->sb_flags & SB_STOP)
291		return(0);
292
293	bleft = sb->sb_hiwat - sb->sb_ccc;
294	mleft = sb->sb_mbmax - sb->sb_mbcnt;
295
296	return ((bleft < mleft) ? bleft : mleft);
297}
298
299#define SB_EMPTY_FIXUP(sb) do {						\
300	if ((sb)->sb_mb == NULL) {					\
301		(sb)->sb_mbtail = NULL;					\
302		(sb)->sb_lastrecord = NULL;				\
303	}								\
304} while (/*CONSTCOND*/0)
305
306#ifdef SOCKBUF_DEBUG
307void	sblastrecordchk(struct sockbuf *, const char *, int);
308void	sblastmbufchk(struct sockbuf *, const char *, int);
309void	sbcheck(struct sockbuf *, const char *, int);
310#define	SBLASTRECORDCHK(sb)	sblastrecordchk((sb), __FILE__, __LINE__)
311#define	SBLASTMBUFCHK(sb)	sblastmbufchk((sb), __FILE__, __LINE__)
312#define	SBCHECK(sb)		sbcheck((sb), __FILE__, __LINE__)
313#else
314#define	SBLASTRECORDCHK(sb)	do {} while (0)
315#define	SBLASTMBUFCHK(sb)	do {} while (0)
316#define	SBCHECK(sb)		do {} while (0)
317#endif /* SOCKBUF_DEBUG */
318
319#endif /* _KERNEL */
320
321#endif /* _SYS_SOCKBUF_H_ */