master
1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)socketvar.h 8.3 (Berkeley) 2/19/95
32 */
33#ifndef _SYS_SOCKBUF_H_
34#define _SYS_SOCKBUF_H_
35
36/*
37 * Constants for sb_flags field of struct sockbuf/xsockbuf.
38 */
39#define SB_TLS_RX 0x01 /* using KTLS on RX */
40#define SB_TLS_RX_RUNNING 0x02 /* KTLS RX operation running */
41#define SB_WAIT 0x04 /* someone is waiting for data/space */
42#define SB_SEL 0x08 /* someone is selecting */
43#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */
44#define SB_UPCALL 0x20 /* someone wants an upcall */
45#define SB_NOINTR 0x40 /* operations not interruptible */
46#define SB_AIO 0x80 /* AIO operations queued */
47#define SB_KNOTE 0x100 /* kernel note attached */
48#define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */
49#define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */
50#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */
51#define SB_STOP 0x1000 /* backpressure indicator */
52#define SB_AIO_RUNNING 0x2000 /* AIO operation running */
53#define SB_SPLICED 0x4000 /* socket buffer is spliced;
54 previously used for SB_TLS_IFNET */
55#define SB_TLS_RX_RESYNC 0x8000 /* KTLS RX lost HW sync */
56
57#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */
58#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */
59#define SBS_RCVATMARK 0x0040 /* at mark on input */
60
61#if defined(_KERNEL) || defined(_WANT_SOCKET)
62#include <sys/_lock.h>
63#include <sys/_mutex.h>
64#include <sys/_sx.h>
65#include <sys/_task.h>
66
67#define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */
68
69struct ktls_session;
70struct mbuf;
71struct sockaddr;
72struct socket;
73struct sockopt;
74struct thread;
75struct selinfo;
76
77/*
78 * Socket buffer
79 *
80 * A buffer starts with the fields that are accessed by I/O multiplexing
81 * APIs like select(2), kevent(2) or AIO and thus are shared between different
82 * buffer implementations. They are protected by the SOCK_RECVBUF_LOCK()
83 * or SOCK_SENDBUF_LOCK() of the owning socket.
84 *
85 * XXX: sb_acc, sb_ccc and sb_mbcnt shall become implementation specific
86 * methods.
87 *
88 * Protocol specific implementations follow in a union.
89 */
90struct sockbuf {
91 struct selinfo *sb_sel; /* process selecting read/write */
92 short sb_state; /* socket state on sockbuf */
93 short sb_flags; /* flags, see above */
94 u_int sb_acc; /* available chars in buffer */
95 u_int sb_ccc; /* claimed chars in buffer */
96 u_int sb_mbcnt; /* chars of mbufs used */
97 u_int sb_ctl; /* non-data chars in buffer */
98 u_int sb_hiwat; /* max actual char count */
99 u_int sb_lowat; /* low water mark */
100 u_int sb_mbmax; /* max chars of mbufs to use */
101 sbintime_t sb_timeo; /* timeout for read/write */
102 int (*sb_upcall)(struct socket *, void *, int);
103 void *sb_upcallarg;
104 TAILQ_HEAD(, kaiocb) sb_aiojobq; /* pending AIO ops */
105 struct task sb_aiotask; /* AIO task */
106 union {
107 /*
108 * Classic BSD one-size-fits-all socket buffer, capable of
109 * doing streams and datagrams. The stream part is able
110 * to perform special features:
111 * - not ready data (sendfile)
112 * - TLS
113 */
114 struct {
115 /* compat: sockbuf lock pointer */
116 struct mtx *sb_mtx;
117 /* first and last mbufs in the chain */
118 struct mbuf *sb_mb;
119 struct mbuf *sb_mbtail;
120 /* first mbuf of last record in socket buffer */
121 struct mbuf *sb_lastrecord;
122 /* pointer to data to send next (TCP */
123 struct mbuf *sb_sndptr;
124 /* pointer to first not ready buffer */
125 struct mbuf *sb_fnrdy;
126 /* byte offset of ptr into chain, used with sb_sndptr */
127 u_int sb_sndptroff;
128 /* TLS */
129 u_int sb_tlscc; /* TLS chain characters */
130 u_int sb_tlsdcc; /* characters being decrypted */
131 struct mbuf *sb_mtls; /* TLS mbuf chain */
132 struct mbuf *sb_mtlstail; /* last mbuf in TLS chain */
133 uint64_t sb_tls_seqno; /* TLS seqno */
134 /* TLS state, locked by sockbuf and sock I/O mutexes. */
135 struct ktls_session *sb_tls_info;
136 };
137 /*
138 * PF_UNIX/SOCK_DGRAM
139 *
140 * Local protocol, thus we should buffer on the receive side
141 * only. However, in one to many configuration we don't want
142 * a single receive buffer to be shared. So we would link
143 * send buffers onto receive buffer. All the fields are locked
144 * by the receive buffer lock.
145 */
146 struct {
147 /*
148 * For receive buffer: own queue of this buffer for
149 * unconnected sends. For send buffer: queue lended
150 * to the peer receive buffer, to isolate ourselves
151 * from other senders.
152 */
153 STAILQ_HEAD(, mbuf) uxdg_mb;
154 /* For receive buffer: datagram seen via MSG_PEEK. */
155 struct mbuf *uxdg_peeked;
156 /*
157 * For receive buffer: queue of send buffers of
158 * connected peers. For send buffer: linkage on
159 * connected peer receive buffer queue.
160 */
161 union {
162 TAILQ_HEAD(, sockbuf) uxdg_conns;
163 TAILQ_ENTRY(sockbuf) uxdg_clist;
164 };
165 /* Counters for this buffer uxdg_mb chain + peeked. */
166 u_int uxdg_cc;
167 u_int uxdg_ctl;
168 u_int uxdg_mbcnt;
169 };
170 };
171};
172
173#endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */
174#ifdef _KERNEL
175
176/* 'which' values for KPIs that operate on one buffer of a socket. */
177typedef enum { SO_RCV, SO_SND } sb_which;
178
179/*
180 * Per-socket buffer mutex used to protect most fields in the socket buffer.
181 * These make use of the mutex pointer embedded in struct sockbuf, which
182 * currently just references mutexes in the containing socket. The
183 * SOCK_SENDBUF_LOCK() etc. macros can be used instead of or in combination with
184 * these locking macros.
185 */
186#define SOCKBUF_MTX(_sb) ((_sb)->sb_mtx)
187#define SOCKBUF_LOCK(_sb) mtx_lock(SOCKBUF_MTX(_sb))
188#define SOCKBUF_OWNED(_sb) mtx_owned(SOCKBUF_MTX(_sb))
189#define SOCKBUF_UNLOCK(_sb) mtx_unlock(SOCKBUF_MTX(_sb))
190#define SOCKBUF_LOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
191#define SOCKBUF_UNLOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
192
193/*
194 * Socket buffer private mbuf(9) flags.
195 */
196#define M_NOTREADY M_PROTO1 /* m_data not populated yet */
197#define M_BLOCKED M_PROTO2 /* M_NOTREADY in front of m */
198#define M_NOTAVAIL (M_NOTREADY | M_BLOCKED)
199
200void sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
201void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
202void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
203void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
204int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
205 struct mbuf *m0, struct mbuf *control);
206int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
207 struct mbuf *m0, struct mbuf *control);
208int sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
209 const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
210void sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
211 struct mbuf *control, int flags);
212void sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
213 struct mbuf *control, int flags);
214void sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
215void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
216void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
217struct mbuf *
218 sbcreatecontrol(const void *p, u_int size, int type, int level,
219 int wait);
220void sbdestroy(struct socket *, sb_which);
221void sbdrop(struct sockbuf *sb, int len);
222void sbdrop_locked(struct sockbuf *sb, int len);
223struct mbuf *
224 sbcut_locked(struct sockbuf *sb, int len);
225void sbdroprecord(struct sockbuf *sb);
226void sbdroprecord_locked(struct sockbuf *sb);
227void sbflush(struct sockbuf *sb);
228void sbflush_locked(struct sockbuf *sb);
229void sbrelease(struct socket *, sb_which);
230void sbrelease_locked(struct socket *, sb_which);
231int sbsetopt(struct socket *so, struct sockopt *);
232bool sbreserve_locked(struct socket *so, sb_which which, u_long cc,
233 struct thread *td);
234bool sbreserve_locked_limit(struct socket *so, sb_which which, u_long cc,
235 u_long buf_max, struct thread *td);
236void sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len);
237struct mbuf *
238 sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff);
239struct mbuf *
240 sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
241int sbwait(struct socket *, sb_which);
242void sballoc(struct sockbuf *, struct mbuf *);
243void sbfree(struct sockbuf *, struct mbuf *);
244void sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
245void sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m);
246int sbready(struct sockbuf *, struct mbuf *, int);
247
248/*
249 * Return how much data is available to be taken out of socket
250 * buffer right now.
251 */
252static inline u_int
253sbavail(struct sockbuf *sb)
254{
255
256#if 0
257 SOCKBUF_LOCK_ASSERT(sb);
258#endif
259 return (sb->sb_acc);
260}
261
262/*
263 * Return how much data sits there in the socket buffer
264 * It might be that some data is not yet ready to be read.
265 */
266static inline u_int
267sbused(struct sockbuf *sb)
268{
269
270#if 0
271 SOCKBUF_LOCK_ASSERT(sb);
272#endif
273 return (sb->sb_ccc);
274}
275
276/*
277 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
278 * This is problematical if the fields are unsigned, as the space might
279 * still be negative (ccc > hiwat or mbcnt > mbmax).
280 */
281static inline long
282sbspace(struct sockbuf *sb)
283{
284 int bleft, mleft; /* size should match sockbuf fields */
285
286#if 0
287 SOCKBUF_LOCK_ASSERT(sb);
288#endif
289
290 if (sb->sb_flags & SB_STOP)
291 return(0);
292
293 bleft = sb->sb_hiwat - sb->sb_ccc;
294 mleft = sb->sb_mbmax - sb->sb_mbcnt;
295
296 return ((bleft < mleft) ? bleft : mleft);
297}
298
299#define SB_EMPTY_FIXUP(sb) do { \
300 if ((sb)->sb_mb == NULL) { \
301 (sb)->sb_mbtail = NULL; \
302 (sb)->sb_lastrecord = NULL; \
303 } \
304} while (/*CONSTCOND*/0)
305
306#ifdef SOCKBUF_DEBUG
307void sblastrecordchk(struct sockbuf *, const char *, int);
308void sblastmbufchk(struct sockbuf *, const char *, int);
309void sbcheck(struct sockbuf *, const char *, int);
310#define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__)
311#define SBLASTMBUFCHK(sb) sblastmbufchk((sb), __FILE__, __LINE__)
312#define SBCHECK(sb) sbcheck((sb), __FILE__, __LINE__)
313#else
314#define SBLASTRECORDCHK(sb) do {} while (0)
315#define SBLASTMBUFCHK(sb) do {} while (0)
316#define SBCHECK(sb) do {} while (0)
317#endif /* SOCKBUF_DEBUG */
318
319#endif /* _KERNEL */
320
321#endif /* _SYS_SOCKBUF_H_ */