master
1/* $NetBSD: mbuf.h,v 1.237 2022/12/16 08:42:55 msaitoh Exp $ */
2
3/*
4 * Copyright (c) 1996, 1997, 1999, 2001, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center and Matt Thomas of 3am Software Foundry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)mbuf.h 8.5 (Berkeley) 2/19/95
62 */
63
64#ifndef _SYS_MBUF_H_
65#define _SYS_MBUF_H_
66
67#ifdef _KERNEL_OPT
68#include "opt_mbuftrace.h"
69#endif
70
71#ifndef M_WAITOK
72#include <sys/malloc.h>
73#endif
74#include <sys/pool.h>
75#include <sys/queue.h>
76#if defined(_KERNEL)
77#include <sys/percpu_types.h>
78#include <sys/socket.h> /* for AF_UNSPEC */
79#include <sys/psref.h>
80#endif /* defined(_KERNEL) */
81
82/* For offsetof() */
83#if defined(_KERNEL) || defined(_STANDALONE)
84#include <sys/systm.h>
85#else
86#include <stddef.h>
87#endif
88
89#include <uvm/uvm_param.h> /* for MIN_PAGE_SIZE */
90
91#include <net/if.h>
92
93/*
94 * Mbufs are of a single size, MSIZE (machine/param.h), which
95 * includes overhead. An mbuf may add a single "mbuf cluster" of size
96 * MCLBYTES (also in machine/param.h), which has no additional overhead
97 * and is used instead of the internal data area; this is done when
98 * at least MINCLSIZE of data must be stored.
99 */
100
101/* Packet tags structure */
102struct m_tag {
103 SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */
104 uint16_t m_tag_id; /* Tag ID */
105 uint16_t m_tag_len; /* Length of data */
106};
107
108/* mbuf ownership structure */
109struct mowner {
110 char mo_name[16]; /* owner name (fxp0) */
111 char mo_descr[16]; /* owner description (input) */
112 LIST_ENTRY(mowner) mo_link; /* */
113 struct percpu *mo_counters;
114};
115
116#define MOWNER_INIT(x, y) { .mo_name = x, .mo_descr = y }
117
118enum mowner_counter_index {
119 MOWNER_COUNTER_CLAIMS, /* # of small mbuf claimed */
120 MOWNER_COUNTER_RELEASES, /* # of small mbuf released */
121 MOWNER_COUNTER_CLUSTER_CLAIMS, /* # of cluster mbuf claimed */
122 MOWNER_COUNTER_CLUSTER_RELEASES,/* # of cluster mbuf released */
123 MOWNER_COUNTER_EXT_CLAIMS, /* # of M_EXT mbuf claimed */
124 MOWNER_COUNTER_EXT_RELEASES, /* # of M_EXT mbuf released */
125
126 MOWNER_COUNTER_NCOUNTERS,
127};
128
129#if defined(_KERNEL)
130struct mowner_counter {
131 u_long mc_counter[MOWNER_COUNTER_NCOUNTERS];
132};
133#endif
134
135/* userland-exported version of struct mowner */
136struct mowner_user {
137 char mo_name[16]; /* owner name (fxp0) */
138 char mo_descr[16]; /* owner description (input) */
139 LIST_ENTRY(mowner) mo_link; /* unused padding; for compatibility */
140 u_long mo_counter[MOWNER_COUNTER_NCOUNTERS]; /* counters */
141};
142
143/*
144 * Macros for type conversion
145 * mtod(m,t) - convert mbuf pointer to data pointer of correct type
146 */
147#define mtod(m, t) ((t)((m)->m_data))
148
149/* header at beginning of each mbuf */
150struct m_hdr {
151 struct mbuf *mh_next; /* next buffer in chain */
152 struct mbuf *mh_nextpkt; /* next chain in queue/record */
153 char *mh_data; /* location of data */
154 struct mowner *mh_owner; /* mbuf owner */
155 int mh_len; /* amount of data in this mbuf */
156 int mh_flags; /* flags; see below */
157 paddr_t mh_paddr; /* physical address of mbuf */
158 short mh_type; /* type of data in this mbuf */
159};
160
161/*
162 * record/packet header in first mbuf of chain; valid if M_PKTHDR set
163 *
164 * A note about csum_data:
165 *
166 * o For the out-bound direction, the low 16 bits indicates the offset after
167 * the L4 header where the final L4 checksum value is to be stored and the
168 * high 16 bits is the length of the L3 header (the start of the data to
169 * be checksummed).
170 *
171 * o For the in-bound direction, it is only valid if the M_CSUM_DATA flag is
172 * set. In this case, an L4 checksum has been calculated by hardware and
173 * is stored in csum_data, but it is up to software to perform final
174 * verification.
175 *
176 * Note for in-bound TCP/UDP checksums: we expect the csum_data to NOT
177 * be bit-wise inverted (the final step in the calculation of an IP
178 * checksum) -- this is so we can accumulate the checksum for fragmented
179 * packets during reassembly.
180 *
181 * Size ILP32: 40
182 * LP64: 56
183 */
184struct pkthdr {
185 union {
186 void *ctx; /* for M_GETCTX/M_SETCTX */
187 if_index_t index; /* rcv interface index */
188 } _rcvif;
189#define rcvif_index _rcvif.index
190 SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
191 int len; /* total packet length */
192 int csum_flags; /* checksum flags */
193 uint32_t csum_data; /* checksum data */
194 u_int segsz; /* segment size */
195 uint16_t ether_vtag; /* ethernet 802.1p+q vlan tag */
196 uint16_t pkthdr_flags; /* flags for pkthdr, see blow */
197#define PKTHDR_FLAG_IPSEC_SKIP_PFIL 0x0001 /* skip pfil_run_hooks() after ipsec decrypt */
198
199 /*
200 * Following three fields are open-coded struct altq_pktattr
201 * to rearrange struct pkthdr fields flexibly.
202 */
203 int pattr_af; /* ALTQ: address family */
204 void *pattr_class; /* ALTQ: sched class set by classifier */
205 void *pattr_hdr; /* ALTQ: saved header position in mbuf */
206};
207
208/* Checksumming flags (csum_flags). */
209#define M_CSUM_TCPv4 0x00000001 /* TCP header/payload */
210#define M_CSUM_UDPv4 0x00000002 /* UDP header/payload */
211#define M_CSUM_TCP_UDP_BAD 0x00000004 /* TCP/UDP checksum bad */
212#define M_CSUM_DATA 0x00000008 /* consult csum_data */
213#define M_CSUM_TCPv6 0x00000010 /* IPv6 TCP header/payload */
214#define M_CSUM_UDPv6 0x00000020 /* IPv6 UDP header/payload */
215#define M_CSUM_IPv4 0x00000040 /* IPv4 header */
216#define M_CSUM_IPv4_BAD 0x00000080 /* IPv4 header checksum bad */
217#define M_CSUM_TSOv4 0x00000100 /* TCPv4 segmentation offload */
218#define M_CSUM_TSOv6 0x00000200 /* TCPv6 segmentation offload */
219
220/* Checksum-assist quirks: keep separate from jump-table bits. */
221#define M_CSUM_BLANK 0x40000000 /* csum is missing */
222#define M_CSUM_NO_PSEUDOHDR 0x80000000 /* Rx csum_data does not include
223 * the UDP/TCP pseudo-hdr, and
224 * is not yet 1s-complemented.
225 */
226
227#define M_CSUM_BITS \
228 "\20\1TCPv4\2UDPv4\3TCP_UDP_BAD\4DATA\5TCPv6\6UDPv6\7IPv4\10IPv4_BAD" \
229 "\11TSOv4\12TSOv6\39BLANK\40NO_PSEUDOHDR"
230
231/*
232 * Macros for manipulating csum_data on outgoing packets. These are
233 * used to pass information down from the L4/L3 to the L2.
234 *
235 * _IPHL: Length of the IPv{4/6} header, plus the options; in other
236 * words the offset of the UDP/TCP header in the packet.
237 * _OFFSET: Offset of the checksum field in the UDP/TCP header.
238 */
239#define M_CSUM_DATA_IPv4_IPHL(x) ((x) >> 16)
240#define M_CSUM_DATA_IPv4_OFFSET(x) ((x) & 0xffff)
241#define M_CSUM_DATA_IPv6_IPHL(x) ((x) >> 16)
242#define M_CSUM_DATA_IPv6_OFFSET(x) ((x) & 0xffff)
243#define M_CSUM_DATA_IPv6_SET(x, v) (x) = ((x) & 0xffff) | ((v) << 16)
244
245/*
246 * Max # of pages we can attach to m_ext. This is carefully chosen
247 * to be able to handle SOSEND_LOAN_CHUNK with our minimum sized page.
248 */
249#ifdef MIN_PAGE_SIZE
250#define M_EXT_MAXPAGES ((65536 / MIN_PAGE_SIZE) + 1)
251#endif
252
253/*
254 * Description of external storage mapped into mbuf, valid if M_EXT set.
255 */
256struct _m_ext_storage {
257 unsigned int ext_refcnt;
258 char *ext_buf; /* start of buffer */
259 void (*ext_free) /* free routine if not the usual */
260 (struct mbuf *, void *, size_t, void *);
261 void *ext_arg; /* argument for ext_free */
262 size_t ext_size; /* size of buffer, for ext_free */
263
264 union {
265 /* M_EXT_CLUSTER: physical address */
266 paddr_t extun_paddr;
267#ifdef M_EXT_MAXPAGES
268 /* M_EXT_PAGES: pages */
269 struct vm_page *extun_pgs[M_EXT_MAXPAGES];
270#endif
271 } ext_un;
272#define ext_paddr ext_un.extun_paddr
273#define ext_pgs ext_un.extun_pgs
274};
275
276struct _m_ext {
277 struct mbuf *ext_ref;
278 struct _m_ext_storage ext_storage;
279};
280
281#define M_PADDR_INVALID POOL_PADDR_INVALID
282
283/*
284 * Definition of "struct mbuf".
285 * Don't change this without understanding how MHLEN/MLEN are defined.
286 */
287#define MBUF_DEFINE(name, mhlen, mlen) \
288 struct name { \
289 struct m_hdr m_hdr; \
290 union { \
291 struct { \
292 struct pkthdr MH_pkthdr; \
293 union { \
294 struct _m_ext MH_ext; \
295 char MH_databuf[(mhlen)]; \
296 } MH_dat; \
297 } MH; \
298 char M_databuf[(mlen)]; \
299 } M_dat; \
300 }
301#define m_next m_hdr.mh_next
302#define m_len m_hdr.mh_len
303#define m_data m_hdr.mh_data
304#define m_owner m_hdr.mh_owner
305#define m_type m_hdr.mh_type
306#define m_flags m_hdr.mh_flags
307#define m_nextpkt m_hdr.mh_nextpkt
308#define m_paddr m_hdr.mh_paddr
309#define m_pkthdr M_dat.MH.MH_pkthdr
310#define m_ext_storage M_dat.MH.MH_dat.MH_ext.ext_storage
311#define m_ext_ref M_dat.MH.MH_dat.MH_ext.ext_ref
312#define m_ext m_ext_ref->m_ext_storage
313#define m_pktdat M_dat.MH.MH_dat.MH_databuf
314#define m_dat M_dat.M_databuf
315
316/*
317 * Dummy mbuf structure to calculate the right values for MLEN/MHLEN, taking
318 * into account inter-structure padding.
319 */
320MBUF_DEFINE(_mbuf_dummy, 1, 1);
321
322/* normal data len */
323#define MLEN ((int)(MSIZE - offsetof(struct _mbuf_dummy, m_dat)))
324/* data len w/pkthdr */
325#define MHLEN ((int)(MSIZE - offsetof(struct _mbuf_dummy, m_pktdat)))
326
327#define MINCLSIZE (MHLEN+MLEN+1) /* smallest amount to put in cluster */
328
329/*
330 * The *real* struct mbuf
331 */
332MBUF_DEFINE(mbuf, MHLEN, MLEN);
333
334/* mbuf flags */
335#define M_EXT 0x00000001 /* has associated external storage */
336#define M_PKTHDR 0x00000002 /* start of record */
337#define M_EOR 0x00000004 /* end of record */
338#define M_PROTO1 0x00000008 /* protocol-specific */
339
340/* mbuf pkthdr flags, also in m_flags */
341#define M_AUTHIPHDR 0x00000010 /* authenticated (IPsec) */
342#define M_DECRYPTED 0x00000020 /* decrypted (IPsec) */
343#define M_LOOP 0x00000040 /* received on loopback */
344#define M_BCAST 0x00000100 /* send/received as L2 broadcast */
345#define M_MCAST 0x00000200 /* send/received as L2 multicast */
346#define M_CANFASTFWD 0x00000400 /* packet can be fast-forwarded */
347#define M_ANYCAST6 0x00000800 /* received as IPv6 anycast */
348
349#define M_LINK0 0x00001000 /* link layer specific flag */
350#define M_LINK1 0x00002000 /* link layer specific flag */
351#define M_LINK2 0x00004000 /* link layer specific flag */
352#define M_LINK3 0x00008000 /* link layer specific flag */
353#define M_LINK4 0x00010000 /* link layer specific flag */
354#define M_LINK5 0x00020000 /* link layer specific flag */
355#define M_LINK6 0x00040000 /* link layer specific flag */
356#define M_LINK7 0x00080000 /* link layer specific flag */
357
358#define M_VLANTAG 0x00100000 /* ether_vtag is valid */
359
360/* additional flags for M_EXT mbufs */
361#define M_EXT_FLAGS 0xff000000
362#define M_EXT_CLUSTER 0x01000000 /* ext is a cluster */
363#define M_EXT_PAGES 0x02000000 /* ext_pgs is valid */
364#define M_EXT_ROMAP 0x04000000 /* ext mapping is r-o at MMU */
365#define M_EXT_RW 0x08000000 /* ext storage is writable */
366
367/* for source-level compatibility */
368#define M_NOTIFICATION M_PROTO1
369
370#define M_FLAGS_BITS \
371 "\20\1EXT\2PKTHDR\3EOR\4PROTO1\5AUTHIPHDR\6DECRYPTED\7LOOP\10NONE" \
372 "\11BCAST\12MCAST\13CANFASTFWD\14ANYCAST6\15LINK0\16LINK1\17LINK2\20LINK3" \
373 "\21LINK4\22LINK5\23LINK6\24LINK7" \
374 "\25VLANTAG" \
375 "\31EXT_CLUSTER\32EXT_PAGES\33EXT_ROMAP\34EXT_RW"
376
377/* flags copied when copying m_pkthdr */
378#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_BCAST|M_MCAST|M_CANFASTFWD| \
379 M_ANYCAST6|M_LINK0|M_LINK1|M_LINK2|M_AUTHIPHDR|M_DECRYPTED|M_LOOP| \
380 M_VLANTAG)
381
382/* flag copied when shallow-copying external storage */
383#define M_EXTCOPYFLAGS (M_EXT|M_EXT_FLAGS)
384
385/* mbuf types */
386#define MT_FREE 0 /* should be on free list */
387#define MT_DATA 1 /* dynamic (data) allocation */
388#define MT_HEADER 2 /* packet header */
389#define MT_SONAME 3 /* socket name */
390#define MT_SOOPTS 4 /* socket options */
391#define MT_FTABLE 5 /* fragment reassembly header */
392#define MT_CONTROL 6 /* extra-data protocol message */
393#define MT_OOBDATA 7 /* expedited data */
394
395#ifdef MBUFTYPES
396const char * const mbuftypes[] = {
397 "mbfree",
398 "mbdata",
399 "mbheader",
400 "mbsoname",
401 "mbsopts",
402 "mbftable",
403 "mbcontrol",
404 "mboobdata",
405};
406#else
407extern const char * const mbuftypes[];
408#endif
409
410/* flags to m_get/MGET */
411#define M_DONTWAIT M_NOWAIT
412#define M_WAIT M_WAITOK
413
414#ifdef MBUFTRACE
415/* Mbuf allocation tracing. */
416void mowner_init_owner(struct mowner *, const char *, const char *);
417void mowner_init(struct mbuf *, int);
418void mowner_ref(struct mbuf *, int);
419void m_claim(struct mbuf *, struct mowner *);
420void mowner_revoke(struct mbuf *, bool, int);
421void mowner_attach(struct mowner *);
422void mowner_detach(struct mowner *);
423void m_claimm(struct mbuf *, struct mowner *);
424#else
425#define mowner_init_owner(mo, n, d) __nothing
426#define mowner_init(m, type) __nothing
427#define mowner_ref(m, flags) __nothing
428#define mowner_revoke(m, all, flags) __nothing
429#define m_claim(m, mowner) __nothing
430#define mowner_attach(mo) __nothing
431#define mowner_detach(mo) __nothing
432#define m_claimm(m, mo) __nothing
433#endif
434
435#define MCLAIM(m, mo) m_claim((m), (mo))
436#define MOWNER_ATTACH(mo) mowner_attach(mo)
437#define MOWNER_DETACH(mo) mowner_detach(mo)
438
439/*
440 * mbuf allocation/deallocation macros:
441 *
442 * MGET(struct mbuf *m, int how, int type)
443 * allocates an mbuf and initializes it to contain internal data.
444 *
445 * MGETHDR(struct mbuf *m, int how, int type)
446 * allocates an mbuf and initializes it to contain a packet header
447 * and internal data.
448 *
449 * If 'how' is M_WAIT, these macros (and the corresponding functions)
450 * are guaranteed to return successfully.
451 */
452#define MGET(m, how, type) m = m_get((how), (type))
453#define MGETHDR(m, how, type) m = m_gethdr((how), (type))
454
455#if defined(_KERNEL)
456
457#define MCLINITREFERENCE(m) \
458do { \
459 KASSERT(((m)->m_flags & M_EXT) == 0); \
460 (m)->m_ext_ref = (m); \
461 (m)->m_ext.ext_refcnt = 1; \
462} while (/* CONSTCOND */ 0)
463
464/*
465 * Macros for mbuf external storage.
466 *
467 * MCLGET allocates and adds an mbuf cluster to a normal mbuf;
468 * the flag M_EXT is set upon success.
469 *
470 * MEXTMALLOC allocates external storage and adds it to
471 * a normal mbuf; the flag M_EXT is set upon success.
472 *
473 * MEXTADD adds pre-allocated external storage to
474 * a normal mbuf; the flag M_EXT is set upon success.
475 */
476
477#define MCLGET(m, how) m_clget((m), (how))
478
479#define MEXTMALLOC(m, size, how) \
480do { \
481 (m)->m_ext_storage.ext_buf = malloc((size), 0, (how)); \
482 if ((m)->m_ext_storage.ext_buf != NULL) { \
483 MCLINITREFERENCE(m); \
484 (m)->m_data = (m)->m_ext.ext_buf; \
485 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | \
486 M_EXT|M_EXT_RW; \
487 (m)->m_ext.ext_size = (size); \
488 (m)->m_ext.ext_free = NULL; \
489 (m)->m_ext.ext_arg = NULL; \
490 mowner_ref((m), M_EXT); \
491 } \
492} while (/* CONSTCOND */ 0)
493
494#define MEXTADD(m, buf, size, type, free, arg) \
495do { \
496 MCLINITREFERENCE(m); \
497 (m)->m_data = (m)->m_ext.ext_buf = (char *)(buf); \
498 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | M_EXT; \
499 (m)->m_ext.ext_size = (size); \
500 (m)->m_ext.ext_free = (free); \
501 (m)->m_ext.ext_arg = (arg); \
502 mowner_ref((m), M_EXT); \
503} while (/* CONSTCOND */ 0)
504
505#define M_BUFADDR(m) \
506 (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \
507 ((m)->m_flags & M_PKTHDR) ? (m)->m_pktdat : (m)->m_dat)
508
509#define M_BUFSIZE(m) \
510 (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size : \
511 ((m)->m_flags & M_PKTHDR) ? MHLEN : MLEN)
512
513#define MRESETDATA(m) (m)->m_data = M_BUFADDR(m)
514
515/*
516 * Compute the offset of the beginning of the data buffer of a non-ext
517 * mbuf.
518 */
519#define M_BUFOFFSET(m) \
520 (((m)->m_flags & M_PKTHDR) ? \
521 offsetof(struct mbuf, m_pktdat) : offsetof(struct mbuf, m_dat))
522
523/*
524 * Determine if an mbuf's data area is read-only. This is true
525 * if external storage is read-only mapped, or not marked as R/W,
526 * or referenced by more than one mbuf.
527 */
528#define M_READONLY(m) \
529 (((m)->m_flags & M_EXT) != 0 && \
530 (((m)->m_flags & (M_EXT_ROMAP|M_EXT_RW)) != M_EXT_RW || \
531 (m)->m_ext.ext_refcnt > 1))
532
533#define M_UNWRITABLE(__m, __len) \
534 ((__m)->m_len < (__len) || M_READONLY((__m)))
535
536/*
537 * Determine if an mbuf's data area is read-only at the MMU.
538 */
539#define M_ROMAP(m) \
540 (((m)->m_flags & (M_EXT|M_EXT_ROMAP)) == (M_EXT|M_EXT_ROMAP))
541
542/*
543 * Compute the amount of space available before the current start of
544 * data in an mbuf.
545 */
546#define M_LEADINGSPACE(m) \
547 (M_READONLY((m)) ? 0 : ((m)->m_data - M_BUFADDR(m)))
548
549/*
550 * Compute the amount of space available
551 * after the end of data in an mbuf.
552 */
553#define _M_TRAILINGSPACE(m) \
554 ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \
555 ((m)->m_data + (m)->m_len) : \
556 &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
557
558#define M_TRAILINGSPACE(m) \
559 (M_READONLY((m)) ? 0 : _M_TRAILINGSPACE((m)))
560
561/*
562 * Arrange to prepend space of size plen to mbuf m.
563 * If a new mbuf must be allocated, how specifies whether to wait.
564 * If how is M_DONTWAIT and allocation fails, the original mbuf chain
565 * is freed and m is set to NULL.
566 */
567#define M_PREPEND(m, plen, how) \
568do { \
569 if (M_LEADINGSPACE(m) >= (plen)) { \
570 (m)->m_data -= (plen); \
571 (m)->m_len += (plen); \
572 } else \
573 (m) = m_prepend((m), (plen), (how)); \
574 if ((m) && (m)->m_flags & M_PKTHDR) \
575 (m)->m_pkthdr.len += (plen); \
576} while (/* CONSTCOND */ 0)
577
578/* change mbuf to new type */
579#define MCHTYPE(m, t) \
580do { \
581 KASSERT((t) != MT_FREE); \
582 mbstat_type_add((m)->m_type, -1); \
583 mbstat_type_add(t, 1); \
584 (m)->m_type = t; \
585} while (/* CONSTCOND */ 0)
586
587#ifdef DIAGNOSTIC
588#define M_VERIFY_PACKET(m) m_verify_packet(m)
589#else
590#define M_VERIFY_PACKET(m) __nothing
591#endif
592
593/* The "copy all" special length. */
594#define M_COPYALL -1
595
596/*
597 * Allow drivers and/or protocols to store private context information.
598 */
599#define M_GETCTX(m, t) ((t)(m)->m_pkthdr._rcvif.ctx)
600#define M_SETCTX(m, c) ((void)((m)->m_pkthdr._rcvif.ctx = (void *)(c)))
601#define M_CLEARCTX(m) M_SETCTX((m), NULL)
602
603/*
604 * M_REGION_GET ensures that the "len"-sized region of type "typ" starting
605 * from "off" within "m" is located in a single mbuf, contiguously.
606 *
607 * The pointer to the region will be returned to pointer variable "val".
608 */
609#define M_REGION_GET(val, typ, m, off, len) \
610do { \
611 struct mbuf *_t; \
612 int _tmp; \
613 if ((m)->m_len >= (off) + (len)) \
614 (val) = (typ)(mtod((m), char *) + (off)); \
615 else { \
616 _t = m_pulldown((m), (off), (len), &_tmp); \
617 if (_t) { \
618 if (_t->m_len < _tmp + (len)) \
619 panic("m_pulldown malfunction"); \
620 (val) = (typ)(mtod(_t, char *) + _tmp); \
621 } else { \
622 (val) = (typ)NULL; \
623 (m) = NULL; \
624 } \
625 } \
626} while (/*CONSTCOND*/ 0)
627
628#endif /* defined(_KERNEL) */
629
630/*
631 * Simple mbuf queueing system
632 *
633 * this is basically a SIMPLEQ adapted to mbuf use (ie using
634 * m_nextpkt instead of field.sqe_next).
635 *
636 * m_next is ignored, so queueing chains of mbufs is possible
637 */
638#define MBUFQ_HEAD(name) \
639struct name { \
640 struct mbuf *mq_first; \
641 struct mbuf **mq_last; \
642}
643
644#define MBUFQ_INIT(q) do { \
645 (q)->mq_first = NULL; \
646 (q)->mq_last = &(q)->mq_first; \
647} while (/*CONSTCOND*/0)
648
649#define MBUFQ_ENQUEUE(q, m) do { \
650 (m)->m_nextpkt = NULL; \
651 *(q)->mq_last = (m); \
652 (q)->mq_last = &(m)->m_nextpkt; \
653} while (/*CONSTCOND*/0)
654
655#define MBUFQ_PREPEND(q, m) do { \
656 if (((m)->m_nextpkt = (q)->mq_first) == NULL) \
657 (q)->mq_last = &(m)->m_nextpkt; \
658 (q)->mq_first = (m); \
659} while (/*CONSTCOND*/0)
660
661#define MBUFQ_DEQUEUE(q, m) do { \
662 if (((m) = (q)->mq_first) != NULL) { \
663 if (((q)->mq_first = (m)->m_nextpkt) == NULL) \
664 (q)->mq_last = &(q)->mq_first; \
665 else \
666 (m)->m_nextpkt = NULL; \
667 } \
668} while (/*CONSTCOND*/0)
669
670#define MBUFQ_DRAIN(q) do { \
671 struct mbuf *__m0; \
672 while ((__m0 = (q)->mq_first) != NULL) { \
673 (q)->mq_first = __m0->m_nextpkt; \
674 m_freem(__m0); \
675 } \
676 (q)->mq_last = &(q)->mq_first; \
677} while (/*CONSTCOND*/0)
678
679#define MBUFQ_FIRST(q) ((q)->mq_first)
680#define MBUFQ_NEXT(m) ((m)->m_nextpkt)
681#define MBUFQ_LAST(q) (*(q)->mq_last)
682
683/*
684 * Mbuf statistics.
685 * For statistics related to mbuf and cluster allocations, see also the
686 * pool headers (mb_cache and mcl_cache).
687 */
688struct mbstat {
689 u_long _m_spare; /* formerly m_mbufs */
690 u_long _m_spare1; /* formerly m_clusters */
691 u_long _m_spare2; /* spare field */
692 u_long _m_spare3; /* formely m_clfree - free clusters */
693 u_long m_drops; /* times failed to find space */
694 u_long m_wait; /* times waited for space */
695 u_long m_drain; /* times drained protocols for space */
696 u_short m_mtypes[256]; /* type specific mbuf allocations */
697};
698
699struct mbstat_cpu {
700 u_int m_mtypes[256]; /* type specific mbuf allocations */
701};
702
703/*
704 * Mbuf sysctl variables.
705 */
706#define MBUF_MSIZE 1 /* int: mbuf base size */
707#define MBUF_MCLBYTES 2 /* int: mbuf cluster size */
708#define MBUF_NMBCLUSTERS 3 /* int: limit on the # of clusters */
709#define MBUF_MBLOWAT 4 /* int: mbuf low water mark */
710#define MBUF_MCLLOWAT 5 /* int: mbuf cluster low water mark */
711#define MBUF_STATS 6 /* struct: mbstat */
712#define MBUF_MOWNERS 7 /* struct: m_owner[] */
713#define MBUF_NMBCLUSTERS_LIMIT 8 /* int: limit of nmbclusters */
714
715#ifdef _KERNEL
716extern struct mbstat mbstat;
717extern int nmbclusters; /* limit on the # of clusters */
718extern int mblowat; /* mbuf low water mark */
719extern int mcllowat; /* mbuf cluster low water mark */
720extern int max_linkhdr; /* largest link-level header */
721extern int max_protohdr; /* largest protocol header */
722extern int max_hdr; /* largest link+protocol header */
723extern int max_datalen; /* MHLEN - max_hdr */
724extern const int msize; /* mbuf base size */
725extern const int mclbytes; /* mbuf cluster size */
726extern pool_cache_t mb_cache;
727#ifdef MBUFTRACE
728LIST_HEAD(mownerhead, mowner);
729extern struct mownerhead mowners;
730extern struct mowner unknown_mowners[];
731extern struct mowner revoked_mowner;
732#endif
733
734MALLOC_DECLARE(M_MBUF);
735MALLOC_DECLARE(M_SONAME);
736
737struct mbuf *m_copym(struct mbuf *, int, int, int);
738struct mbuf *m_copypacket(struct mbuf *, int);
739struct mbuf *m_devget(char *, int, int, struct ifnet *);
740struct mbuf *m_dup(struct mbuf *, int, int, int);
741struct mbuf *m_get(int, int);
742struct mbuf *m_gethdr(int, int);
743struct mbuf *m_prepend(struct mbuf *,int, int);
744struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
745struct mbuf *m_pullup(struct mbuf *, int);
746struct mbuf *m_copyup(struct mbuf *, int, int);
747struct mbuf *m_split(struct mbuf *,int, int);
748struct mbuf *m_getptr(struct mbuf *, int, int *);
749void m_adj(struct mbuf *, int);
750struct mbuf *m_defrag(struct mbuf *, int);
751int m_apply(struct mbuf *, int, int,
752 int (*)(void *, void *, unsigned int), void *);
753void m_cat(struct mbuf *,struct mbuf *);
754void m_clget(struct mbuf *, int);
755void m_copyback(struct mbuf *, int, int, const void *);
756struct mbuf *m_copyback_cow(struct mbuf *, int, int, const void *, int);
757int m_makewritable(struct mbuf **, int, int, int);
758struct mbuf *m_getcl(int, int, int);
759void m_copydata(struct mbuf *, int, int, void *);
760void m_verify_packet(struct mbuf *);
761struct mbuf *m_free(struct mbuf *);
762void m_freem(struct mbuf *);
763void mbinit(void);
764void m_remove_pkthdr(struct mbuf *);
765void m_copy_pkthdr(struct mbuf *, struct mbuf *);
766void m_move_pkthdr(struct mbuf *, struct mbuf *);
767void m_align(struct mbuf *, int);
768
769bool m_ensure_contig(struct mbuf **, int);
770struct mbuf *m_add(struct mbuf *, struct mbuf *);
771
772/* Inline routines. */
773static __inline u_int m_length(const struct mbuf *) __unused;
774
775/* Statistics */
776void mbstat_type_add(int, int);
777
778/* Packet tag routines */
779struct m_tag *m_tag_get(int, int, int);
780void m_tag_free(struct m_tag *);
781void m_tag_prepend(struct mbuf *, struct m_tag *);
782void m_tag_unlink(struct mbuf *, struct m_tag *);
783void m_tag_delete(struct mbuf *, struct m_tag *);
784void m_tag_delete_chain(struct mbuf *);
785struct m_tag *m_tag_find(const struct mbuf *, int);
786struct m_tag *m_tag_copy(struct m_tag *);
787int m_tag_copy_chain(struct mbuf *, struct mbuf *);
788
789/* Packet tag types */
790#define PACKET_TAG_NONE 0 /* Nothing */
791#define PACKET_TAG_SO 4 /* sending socket pointer */
792#define PACKET_TAG_NPF 10 /* packet filter */
793#define PACKET_TAG_PF 11 /* packet filter */
794#define PACKET_TAG_ALTQ_QID 12 /* ALTQ queue id */
795#define PACKET_TAG_IPSEC_OUT_DONE 18
796#define PACKET_TAG_IPSEC_NAT_T_PORTS 25 /* two uint16_t */
797#define PACKET_TAG_INET6 26 /* IPv6 info */
798#define PACKET_TAG_TUNNEL_INFO 28 /* tunnel identification and
799 * protocol callback, for loop
800 * detection/recovery
801 */
802#define PACKET_TAG_MPLS 29 /* Indicate it's for MPLS */
803#define PACKET_TAG_SRCROUTE 30 /* IPv4 source routing */
804#define PACKET_TAG_ETHERNET_SRC 31 /* Ethernet source address */
805
806/*
807 * Return the number of bytes in the mbuf chain, m.
808 */
809static __inline u_int
810m_length(const struct mbuf *m)
811{
812 const struct mbuf *m0;
813 u_int pktlen;
814
815 if ((m->m_flags & M_PKTHDR) != 0)
816 return m->m_pkthdr.len;
817
818 pktlen = 0;
819 for (m0 = m; m0 != NULL; m0 = m0->m_next)
820 pktlen += m0->m_len;
821 return pktlen;
822}
823
824static __inline void
825m_set_rcvif(struct mbuf *m, const struct ifnet *ifp)
826{
827 KASSERT(m->m_flags & M_PKTHDR);
828 m->m_pkthdr.rcvif_index = ifp->if_index;
829}
830
831static __inline void
832m_reset_rcvif(struct mbuf *m)
833{
834 KASSERT(m->m_flags & M_PKTHDR);
835 /* A caller may expect whole _rcvif union is zeroed */
836 /* m->m_pkthdr.rcvif_index = 0; */
837 m->m_pkthdr._rcvif.ctx = NULL;
838}
839
840static __inline void
841m_copy_rcvif(struct mbuf *m, const struct mbuf *n)
842{
843 KASSERT(m->m_flags & M_PKTHDR);
844 KASSERT(n->m_flags & M_PKTHDR);
845 m->m_pkthdr.rcvif_index = n->m_pkthdr.rcvif_index;
846}
847
848#define M_GET_ALIGNED_HDR(m, type, linkhdr) \
849 m_get_aligned_hdr((m), __alignof(type) - 1, sizeof(type), (linkhdr))
850
851static __inline int
852m_get_aligned_hdr(struct mbuf **m, int mask, size_t hlen, bool linkhdr)
853{
854#ifndef __NO_STRICT_ALIGNMENT
855 if (((uintptr_t)mtod(*m, void *) & mask) != 0)
856 *m = m_copyup(*m, hlen,
857 linkhdr ? (max_linkhdr + mask) & ~mask : 0);
858 else
859#endif
860 if (__predict_false((size_t)(*m)->m_len < hlen))
861 *m = m_pullup(*m, hlen);
862
863 return *m == NULL;
864}
865
866void m_print(const struct mbuf *, const char *, void (*)(const char *, ...)
867 __printflike(1, 2));
868
869/* from uipc_mbufdebug.c */
870void m_examine(const struct mbuf *, int, const char *,
871 void (*)(const char *, ...) __printflike(1, 2));
872
873/* parsers for m_examine() */
874void m_examine_ether(const struct mbuf *, int, const char *,
875 void (*)(const char *, ...) __printflike(1, 2));
876void m_examine_pppoe(const struct mbuf *, int, const char *,
877 void (*)(const char *, ...) __printflike(1, 2));
878void m_examine_ppp(const struct mbuf *, int, const char *,
879 void (*)(const char *, ...) __printflike(1, 2));
880void m_examine_arp(const struct mbuf *, int, const char *,
881 void (*)(const char *, ...) __printflike(1, 2));
882void m_examine_ip(const struct mbuf *, int, const char *,
883 void (*)(const char *, ...) __printflike(1, 2));
884void m_examine_icmp(const struct mbuf *, int, const char *,
885 void (*)(const char *, ...) __printflike(1, 2));
886void m_examine_ip6(const struct mbuf *, int, const char *,
887 void (*)(const char *, ...) __printflike(1, 2));
888void m_examine_icmp6(const struct mbuf *, int, const char *,
889 void (*)(const char *, ...) __printflike(1, 2));
890void m_examine_tcp(const struct mbuf *, int, const char *,
891 void (*)(const char *, ...) __printflike(1, 2));
892void m_examine_udp(const struct mbuf *, int, const char *,
893 void (*)(const char *, ...) __printflike(1, 2));
894void m_examine_hex(const struct mbuf *, int, const char *,
895 void (*)(const char *, ...) __printflike(1, 2));
896
897/*
898 * Get rcvif of a mbuf.
899 *
900 * The caller must call m_put_rcvif after using rcvif if the returned rcvif
901 * isn't NULL. If the returned rcvif is NULL, the caller doesn't need to call
902 * m_put_rcvif (although calling it is safe).
903 *
904 * The caller must not block or sleep while using rcvif. The API ensures a
905 * returned rcvif isn't freed until m_put_rcvif is called.
906 */
907static __inline struct ifnet *
908m_get_rcvif(const struct mbuf *m, int *s)
909{
910 struct ifnet *ifp;
911
912 KASSERT(m->m_flags & M_PKTHDR);
913 *s = pserialize_read_enter();
914 ifp = if_byindex(m->m_pkthdr.rcvif_index);
915 if (__predict_false(ifp == NULL))
916 pserialize_read_exit(*s);
917
918 return ifp;
919}
920
921static __inline void
922m_put_rcvif(struct ifnet *ifp, int *s)
923{
924
925 if (ifp == NULL)
926 return;
927 pserialize_read_exit(*s);
928}
929
930/*
931 * Get rcvif of a mbuf.
932 *
933 * The caller must call m_put_rcvif_psref after using rcvif. The API ensures
934 * a got rcvif isn't be freed until m_put_rcvif_psref is called.
935 */
936static __inline struct ifnet *
937m_get_rcvif_psref(const struct mbuf *m, struct psref *psref)
938{
939 KASSERT(m->m_flags & M_PKTHDR);
940 return if_get_byindex(m->m_pkthdr.rcvif_index, psref);
941}
942
943static __inline void
944m_put_rcvif_psref(struct ifnet *ifp, struct psref *psref)
945{
946
947 if (ifp == NULL)
948 return;
949 if_put(ifp, psref);
950}
951
952/*
953 * Get rcvif of a mbuf.
954 *
955 * This is NOT an MP-safe API and shouldn't be used at where we want MP-safe.
956 */
957static __inline struct ifnet *
958m_get_rcvif_NOMPSAFE(const struct mbuf *m)
959{
960 KASSERT(m->m_flags & M_PKTHDR);
961 return if_byindex(m->m_pkthdr.rcvif_index);
962}
963
964#endif /* _KERNEL */
965#endif /* !_SYS_MBUF_H_ */