master
  1/*-
  2 * SPDX-License-Identifier: BSD-2-Clause
  3 *
  4 * Copyright (c) 2016-2020 Netflix, Inc.
  5 *
  6 * Redistribution and use in source and binary forms, with or without
  7 * modification, are permitted provided that the following conditions
  8 * are met:
  9 * 1. Redistributions of source code must retain the above copyright
 10 *    notice, this list of conditions and the following disclaimer.
 11 * 2. Redistributions in binary form must reproduce the above copyright
 12 *    notice, this list of conditions and the following disclaimer in the
 13 *    documentation and/or other materials provided with the distribution.
 14 *
 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 25 * SUCH DAMAGE.
 26 */
 27
 28#ifndef __tcp_log_buf_h__
 29#define __tcp_log_buf_h__
 30
 31#define	TCP_LOG_REASON_LEN	32
 32#define	TCP_LOG_TAG_LEN		32
 33#define	TCP_LOG_BUF_VER		(9)
 34
 35/*
 36 * Because the (struct tcp_log_buffer) includes 8-byte uint64_t's, it requires
 37 * 8-byte alignment to work properly on all platforms. Therefore, we will
 38 * enforce 8-byte alignment for all the structures that may appear by
 39 * themselves (instead of being embedded in another structure) in a data
 40 * stream.
 41 */
 42#define	ALIGN_TCP_LOG		__aligned(8)
 43
 44/* Information about the socketbuffer state. */
 45struct tcp_log_sockbuf
 46{
 47	uint32_t	tls_sb_acc;	/* available chars (sb->sb_acc) */
 48	uint32_t	tls_sb_ccc;	/* claimed chars (sb->sb_ccc) */
 49	uint32_t	tls_sb_spare;	/* spare */
 50};
 51
 52/* Optional, verbose information that may be appended to an event log. */
 53struct tcp_log_verbose
 54{
 55#define	TCP_FUNC_LEN	32
 56	char		tlv_snd_frm[TCP_FUNC_LEN]; /* tcp_output() caller */
 57	char		tlv_trace_func[TCP_FUNC_LEN]; /* Function that
 58							 generated trace */
 59	uint32_t	tlv_trace_line;	/* Line number that generated trace */
 60	uint8_t		_pad[4];
 61} ALIGN_TCP_LOG;
 62
 63/* Internal RACK state variables. */
 64struct tcp_log_rack
 65{
 66	uint32_t	tlr_rack_rtt;		/* rc_rack_rtt */
 67	uint8_t		tlr_state;		/* Internal RACK state */
 68	uint8_t		_pad[3];		/* Padding */
 69};
 70
 71struct tcp_log_bbr {
 72	uint64_t cur_del_rate;
 73	uint64_t delRate;
 74	uint64_t rttProp;
 75	uint64_t bw_inuse;
 76	uint32_t inflight;
 77	uint32_t applimited;
 78	uint32_t delivered;
 79	uint32_t timeStamp;
 80	uint32_t epoch;
 81	uint32_t lt_epoch;
 82	uint32_t pkts_out;
 83	uint32_t flex1;
 84	uint32_t flex2;
 85	uint32_t flex3;
 86	uint32_t flex4;
 87	uint32_t flex5;
 88	uint32_t flex6;
 89	uint32_t lost;
 90	uint16_t pacing_gain;
 91	uint16_t cwnd_gain;
 92	uint16_t flex7;
 93	uint8_t bbr_state;
 94	uint8_t bbr_substate;
 95	uint8_t inhpts;
 96	uint8_t __spare;
 97	uint8_t use_lt_bw;
 98	uint8_t flex8;
 99	uint32_t pkt_epoch;
100};
101
102/* shadows tcp_log_bbr struct element sizes */
103struct tcp_log_raw {
104	uint64_t u64_flex[4];
105	uint32_t u32_flex[14];
106	uint16_t u16_flex[3];
107	uint8_t u8_flex[6];
108	uint32_t u32_flex2[1];
109};
110
111struct tcp_log_uint64 {
112	uint64_t u64_flex[13];
113};
114
115struct tcp_log_sendfile {
116	uint64_t offset;
117	uint64_t length;
118	uint32_t flags;
119};
120
121/*
122 * tcp_log_stackspecific is currently being used as "event specific" log
123 * info by all stacks (i.e. struct tcp_log_bbr is used for generic event
124 * logging). Until this is cleaned up more generically and throughout,
125 * allow events to use the same space in the union.
126 */
127union tcp_log_stackspecific
128{
129	struct tcp_log_rack u_rack;
130	struct tcp_log_bbr u_bbr;
131	struct tcp_log_sendfile u_sf;
132	struct tcp_log_raw u_raw;	/* "raw" log access */
133	struct tcp_log_uint64 u64_raw;	/* just u64's - used by process info */
134};
135
136typedef union tcp_log_stackspecific tcp_log_eventspecific_t;
137
138struct tcp_log_buffer
139{
140	/* Event basics */
141	struct timeval	tlb_tv;		/* Timestamp of trace */
142	uint32_t	tlb_ticks;	/* Timestamp of trace */
143	uint32_t	tlb_sn;		/* Serial number */
144	uint8_t		tlb_stackid;	/* Stack ID */
145	uint8_t		tlb_eventid;	/* Event ID */
146	uint16_t	tlb_eventflags;	/* Flags for the record */
147#define	TLB_FLAG_RXBUF		0x0001	/* Includes receive buffer info */
148#define	TLB_FLAG_TXBUF		0x0002	/* Includes send buffer info */
149#define	TLB_FLAG_HDR		0x0004	/* Includes a TCP header */
150#define	TLB_FLAG_VERBOSE	0x0008	/* Includes function/line numbers */
151#define	TLB_FLAG_STACKINFO	0x0010	/* Includes stack-specific info */
152	int		tlb_errno;	/* Event error (if any) */
153
154	/* Internal session state */
155	struct tcp_log_sockbuf tlb_rxbuf; /* Receive buffer */
156	struct tcp_log_sockbuf tlb_txbuf; /* Send buffer */
157
158	int		tlb_state;	/* TCPCB t_state */
159	uint32_t	tlb_starttime;	/* TCPCB t_starttime */
160	uint32_t	tlb_iss;	/* TCPCB iss */
161	uint32_t	tlb_flags;	/* TCPCB flags */
162	uint32_t	tlb_snd_una;	/* TCPCB snd_una */
163	uint32_t	tlb_snd_max;	/* TCPCB snd_max */
164	uint32_t	tlb_snd_cwnd;	/* TCPCB snd_cwnd */
165	uint32_t	tlb_snd_nxt;	/* TCPCB snd_nxt */
166	uint32_t	tlb_snd_recover;/* TCPCB snd_recover */
167	uint32_t	tlb_snd_wnd;	/* TCPCB snd_wnd */
168	uint32_t	tlb_snd_ssthresh; /* TCPCB snd_ssthresh */
169	uint32_t	tlb_srtt;	/* TCPCB t_srtt */
170	uint32_t	tlb_rttvar;	/* TCPCB t_rttvar */
171	uint32_t	tlb_rcv_up;	/* TCPCB rcv_up */
172	uint32_t	tlb_rcv_adv;	/* TCPCB rcv_adv */
173	uint32_t	tlb_flags2;	/* TCPCB t_flags2 */
174	uint32_t	tlb_rcv_nxt;	/* TCPCB rcv_nxt */
175	uint32_t	tlb_rcv_wnd;	/* TCPCB rcv_wnd */
176	uint32_t	tlb_dupacks;	/* TCPCB t_dupacks */
177	int		tlb_segqlen;	/* TCPCB segqlen */
178	int		tlb_snd_numholes; /* TCPCB snd_numholes */
179	uint32_t	tlb_flex1;	/* Event specific information */
180	uint32_t	tlb_flex2;	/* Event specific information */
181	uint32_t	tlb_fbyte_in;	/* TCPCB first byte in time */
182	uint32_t	tlb_fbyte_out;	/* TCPCB first byte out time */
183	uint8_t		tlb_snd_scale:4, /* TCPCB snd_scale */
184			tlb_rcv_scale:4; /* TCPCB rcv_scale */
185	uint8_t		_pad[3];	/* Padding */
186	/* Per-stack info */
187	union tcp_log_stackspecific tlb_stackinfo;
188#define	tlb_rack	tlb_stackinfo.u_rack
189
190	/* The packet */
191	uint32_t	tlb_len;	/* The packet's data length */
192	struct tcphdr	tlb_th;		/* The TCP header */
193	uint8_t		tlb_opts[TCP_MAXOLEN]; /* The TCP options */
194
195	/* Verbose information (optional) */
196	struct tcp_log_verbose tlb_verbose[0];
197} ALIGN_TCP_LOG;
198
199enum tcp_log_events {
200	TCP_LOG_IN = 1,		/* Incoming packet                   1 */
201	TCP_LOG_OUT,		/* Transmit (without other event)    2 */
202	TCP_LOG_RTO,		/* Retransmit timeout                3 */
203	TCP_LOG_SB_WAKE,	/* Awaken socket buffer              4 */
204	TCP_LOG_BAD_RETRAN,	/* Detected bad retransmission       5 */
205	TCP_LOG_PRR,		/* Doing PRR                         6 */
206	TCP_LOG_REORDER,	/* Detected reorder                  7 */
207	TCP_LOG_HPTS,		/* Hpts sending a packet             8 */
208	BBR_LOG_BBRUPD,		/* We updated BBR info               9 */
209	BBR_LOG_BBRSND,		/* We did a slot calculation and sending is done 10 */
210	BBR_LOG_ACKCLEAR,	/* A ack clears all outstanding     11 */
211	BBR_LOG_INQUEUE,	/* The tcb had a packet input to it 12 */
212	BBR_LOG_TIMERSTAR,	/* Start a timer                    13 */
213	BBR_LOG_TIMERCANC,	/* Cancel a timer                   14 */
214	BBR_LOG_ENTREC,		/* Entered recovery                 15 */
215	BBR_LOG_EXITREC,	/* Exited recovery                  16 */
216	BBR_LOG_CWND,		/* Cwnd change                      17 */
217	BBR_LOG_BWSAMP,		/* LT B/W sample has been made      18 */
218	BBR_LOG_MSGSIZE,	/* We received a EMSGSIZE error     19 */
219	BBR_LOG_BBRRTT,		/* BBR RTT is updated               20 */
220	BBR_LOG_JUSTRET,	/* We just returned out of output   21 */
221	BBR_LOG_STATE,		/* A BBR state change occurred      22 */
222	BBR_LOG_PKT_EPOCH,	/* A BBR packet epoch occurred      23 */
223	BBR_LOG_PERSIST,	/* BBR changed to/from a persists   24 */
224	TCP_LOG_FLOWEND,	/* End of a flow                    25 */
225	BBR_LOG_RTO,		/* BBR's timeout includes BBR info  26 */
226	BBR_LOG_DOSEG_DONE,	/* hpts do_segment completes        27 */
227	BBR_LOG_EXIT_GAIN,	/* hpts do_segment completes        28 */
228	BBR_LOG_THRESH_CALC,	/* Doing threshold calculation      29 */
229	TCP_LOG_MAPCHG,		/* Map Changes to the sendmap       30 */
230	TCP_LOG_USERSEND,	/* User level sends data            31 */
231	BBR_RSM_CLEARED,	/* RSM cleared of ACK flags         32 */
232	BBR_LOG_STATE_TARGET,	/* Log of target at state           33 */
233	BBR_LOG_TIME_EPOCH,	/* A timed based Epoch occurred     34 */
234	BBR_LOG_TO_PROCESS,	/* A to was processed               35 */
235	BBR_LOG_BBRTSO,		/* TSO update                       36 */
236	BBR_LOG_HPTSDIAG,	/* Hpts diag insert                 37 */
237	BBR_LOG_LOWGAIN,	/* Low gain accounting              38 */
238	BBR_LOG_PROGRESS,	/* Progress timer event             39 */
239	TCP_LOG_SOCKET_OPT,	/* A socket option is set           40 */
240	BBR_LOG_TIMERPREP,	/* A BBR var to debug out TLP issues  41 */
241	BBR_LOG_ENOBUF_JMP,	/* We had a enobuf jump             42 */
242	BBR_LOG_HPTSI_CALC,	/* calc the hptsi time              43 */
243	BBR_LOG_RTT_SHRINKS,	/* We had a log reduction of rttProp 44 */
244	BBR_LOG_BW_RED_EV,	/* B/W reduction events             45 */
245	BBR_LOG_REDUCE,		/* old bbr log reduce for 4.1 and earlier 46*/
246	TCP_LOG_RTT,		/* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
247	BBR_LOG_SETTINGS_CHG,	/* Settings changed for loss response 48 */
248	BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining -- now not used    49 */
249	TCP_LOG_REASS,		/* Reassembly buffer logging        50 */
250	TCP_HDWR_PACE_SIZE,	/*  TCP pacing size set (rl and rack uses this)  51 */
251	BBR_LOG_HDWR_PACE,	/* TCP Hardware pacing log          52 */
252	BBR_LOG_TSTMP_VAL,	/* Temp debug timestamp validation  53 */
253	TCP_LOG_CONNEND,	/* End of connection                54 */
254	TCP_LOG_LRO,		/* LRO entry                        55 */
255	TCP_SACK_FILTER_RES,	/* Results of SACK Filter           56 */
256	TCP_SAD_DETECT,		/* Sack Attack Detection            57 */
257	TCP_TIMELY_WORK,	/* Logs regarding Timely CC tweaks  58 */
258	TCP_LOG_USER_EVENT,	/* User space event data            59 */
259	TCP_LOG_SENDFILE,	/* sendfile() logging for TCP connections 60 */
260	TCP_LOG_REQ_T,		/* logging of request tracking      61 */
261	TCP_LOG_ACCOUNTING,	/* Log of TCP Accounting data       62 */
262	TCP_LOG_FSB,		/* FSB information                  63 */
263	RACK_DSACK_HANDLING,	/* Handling of DSACK in rack for reordering window 64 */
264	TCP_HYSTART,		/* TCP Hystart logging              65 */
265	TCP_CHG_QUERY,		/* Change query during fnc_init()   66 */
266	TCP_RACK_LOG_COLLAPSE,	/* Window collapse by peer          67 */
267	TCP_RACK_TP_TRIGGERED,	/* A rack tracepoint is triggered   68 */
268	TCP_HYBRID_PACING_LOG,	/* Hybrid pacing log                69 */
269	TCP_LOG_PRU,		/* TCP protocol user request        70 */
270	TCP_LOG_END		/* End (keep at end)                71 */
271};
272
273enum tcp_log_states {
274	TCP_LOG_STATE_RATIO_OFF = -2,	/* Log ratio evaluation yielded an OFF
275					   result. Only used for tlb_logstate */
276	TCP_LOG_STATE_CLEAR = -1,	/* Deactivate and clear tracing. Passed
277					   to tcp_log_state_change() but never
278					   stored in any logstate variable */
279	TCP_LOG_STATE_OFF = 0,		/* Pause */
280
281	/* Positively numbered states represent active logging modes */
282	TCP_LOG_STATE_TAIL=1,		/* Keep the trailing events */
283	TCP_LOG_STATE_HEAD=2,		/* Keep the leading events */
284	TCP_LOG_STATE_HEAD_AUTO=3,	/* Keep the leading events, and
285					   automatically dump them to the
286					   device  */
287	TCP_LOG_STATE_CONTINUAL=4,	/* Continually dump the data when full */
288	TCP_LOG_STATE_TAIL_AUTO=5,	/* Keep the trailing events, and
289					   automatically dump them when the
290					   session ends */
291	TCP_LOG_VIA_BBPOINTS=6		/* Log only if the BB point has been configured */
292};
293
294/* Use this if we don't know whether the operation succeeded. */
295#define	ERRNO_UNK	(-1)
296
297/*
298 * If the user included dev/tcp_log/tcp_log_dev.h, then include our private
299 * headers. Otherwise, there is no reason to pollute all the files with an
300 * additional include.
301 *
302 * This structure is aligned to an 8-byte boundary to match the alignment
303 * requirements of (struct tcp_log_buffer).
304 */
305#ifdef __tcp_log_dev_h__
306struct tcp_log_header {
307	struct tcp_log_common_header tlh_common;
308#define	tlh_version	tlh_common.tlch_version
309#define	tlh_type	tlh_common.tlch_type
310#define	tlh_length	tlh_common.tlch_length
311	struct in_endpoints	tlh_ie;
312	struct timeval		tlh_offset;	/* Uptime -> UTC offset */
313	char			tlh_id[TCP_LOG_ID_LEN];
314	char			tlh_reason[TCP_LOG_REASON_LEN];
315	char			tlh_tag[TCP_LOG_TAG_LEN];
316	uint8_t		tlh_af;
317	uint8_t		_pad[7];
318} ALIGN_TCP_LOG;
319
320#ifdef _KERNEL
321struct tcp_log_dev_log_queue {
322	struct tcp_log_dev_queue tldl_common;
323	char			tldl_id[TCP_LOG_ID_LEN];
324	char			tldl_reason[TCP_LOG_REASON_LEN];
325	char			tldl_tag[TCP_LOG_TAG_LEN];
326	struct in_endpoints	tldl_ie;
327	struct tcp_log_stailq	tldl_entries;
328	int			tldl_count;
329	uint8_t			tldl_af;
330};
331#endif /* _KERNEL */
332#endif /* __tcp_log_dev_h__ */
333
334/*
335 * Defined BBPOINTS that can be used
336 * with TCP_LOG_VIA_BBPOINTS.
337 */
338#define TCP_BBPOINT_NONE		0
339#define TCP_BBPOINT_REQ_LEVEL_LOGGING	1
340
341/*********************/
342/* TCP Trace points */
343/*********************/
344/*
345 * TCP trace points are interesting points within
346 * the TCP code that the author/debugger may want
347 * to have BB logging enabled if we hit that point.
348 * In order to enable a trace point you set the
349 * sysctl var net.inet.tcp.bb.tp.number to
350 * one of the numbers listed below. You also
351 * must make sure net.inet.tcp.bb.tp.bbmode is
352 * non-zero, the default is 4 for continuous tracing.
353 * You also set in the number of connections you want
354 * have get BB logs in net.inet.tcp.bb.tp.count.
355 *
356 * Count will decrement every time BB logging is assigned
357 * to a connection that hit your tracepoint.
358 *
359 * You can enable all trace points by setting the number
360 * to 0xffffffff. You can disable all trace points by
361 * setting number to zero (or count to 0).
362 *
363 * Below are the enumerated list of tracepoints that
364 * have currently been defined in the code. Add more
365 * as you add a call to rack_trace_point(rack, <name>);
366 * where <name> is defined below.
367 */
368#define TCP_TP_HWENOBUF		0x00000001	/* When we are doing hardware pacing and hit enobufs */
369#define TCP_TP_ENOBUF		0x00000002	/* When we hit enobufs with software pacing */
370#define TCP_TP_COLLAPSED_WND	0x00000003	/* When a peer to collapses its rwnd on us */
371#define TCP_TP_COLLAPSED_RXT	0x00000004	/* When we actually retransmit a collapsed window rsm */
372#define TCP_TP_REQ_LOG_FAIL	0x00000005	/* We tried to allocate a Request log but had no space */
373#define TCP_TP_RESET_RCV	0x00000006	/* Triggers when we receive a RST */
374#define TCP_TP_EXCESS_RXT	0x00000007	/* When we get excess RXT's clamping the cwnd */
375#define TCP_TP_SAD_TRIGGERED	0x00000008	/* Sack Attack Detection triggers */
376
377#define TCP_TP_SAD_SUSPECT	0x0000000a	/* A sack has supicious information in it */
378
379#ifdef _KERNEL
380
381extern uint32_t tcp_trace_point_config;
382extern uint32_t tcp_trace_point_bb_mode;
383extern int32_t tcp_trace_point_count;
384
385/*
386 * Returns true if any sort of BB logging is enabled,
387 * commonly used throughout the codebase. 
388 */
389static inline int
390tcp_bblogging_on(struct tcpcb *tp)
391{
392	if (tp->_t_logstate <= TCP_LOG_STATE_OFF) 
393		return (0);
394	if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS)
395		return (0);
396	return (1);
397}
398
399/*
400 * Returns true if we match a specific bbpoint when
401 * in TCP_LOG_VIA_BBPOINTS, but also returns true
402 * for all the other logging states.
403 */
404static inline int
405tcp_bblogging_point_on(struct tcpcb *tp, uint8_t bbpoint)
406{
407	if (tp->_t_logstate <= TCP_LOG_STATE_OFF)
408		return (0);
409	if ((tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) &&
410	    (tp->_t_logpoint == bbpoint))
411		return (1);
412	else if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS)
413		return (0);
414	return (1);
415}
416
417static inline void
418tcp_set_bblog_state(struct tcpcb *tp, uint8_t ls, uint8_t bbpoint)
419{
420	if ((ls == TCP_LOG_VIA_BBPOINTS) &&
421	    (tp->_t_logstate == TCP_LOG_STATE_OFF)){
422		/*
423		 * We don't allow a BBPOINTS set to override
424		 * other types of BB logging set by other means such
425		 * as the bb_ratio/bb_state URL parameters. In other
426		 * words BBlogging must be *off* in order to turn on
427		 * a BBpoint.
428		 */
429		tp->_t_logpoint = bbpoint;
430		tp->_t_logstate = ls;
431	} else if (ls < TCP_LOG_VIA_BBPOINTS) {
432		tp->_t_logpoint = TCP_BBPOINT_NONE;
433		tp->_t_logstate = ls;
434	}
435}
436
437static inline uint32_t 
438tcp_get_bblog_state(struct tcpcb *tp)
439{
440	return (tp->_t_logstate);
441}
442
443static inline void
444tcp_trace_point(struct tcpcb *tp, int num)
445{
446#ifdef TCP_BLACKBOX
447	if (((tcp_trace_point_config == num)  ||
448	     (tcp_trace_point_config == 0xffffffff)) &&
449	    (tcp_trace_point_bb_mode != 0) &&
450	    (tcp_trace_point_count > 0) &&
451	    (tcp_bblogging_on(tp) == 0)) {
452		int res;
453		res = atomic_fetchadd_int(&tcp_trace_point_count, -1);
454		if (res > 0) {
455			tcp_set_bblog_state(tp, tcp_trace_point_bb_mode, TCP_BBPOINT_NONE);
456		} else {
457			/* Loss a race assure its zero now */
458			tcp_trace_point_count = 0;
459		}
460	}
461#endif
462}
463
464#define	TCP_LOG_BUF_DEFAULT_SESSION_LIMIT	5000
465#define	TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT	5000000
466
467/*
468 * TCP_LOG_EVENT_VERBOSE: The same as TCP_LOG_EVENT, except it always
469 * tries to record verbose information.
470 */
471#define	TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \
472	do {								\
473		if (tcp_bblogging_on(tp)) \
474			tcp_log_event(tp, th, rxbuf, txbuf, eventid,	\
475			    errornum, len, stackinfo, th_hostorder,	\
476			    tp->t_output_caller, __func__, __LINE__, tv);\
477	} while (0)
478
479/*
480 * TCP_LOG_EVENT: This is a macro so we can capture function/line
481 * information when needed. You can use the macro when you are not
482 * doing a lot of prep in the stack specific information i.e. you
483 * don't add extras (stackinfo). If you are adding extras which
484 * means filling out a stack variable instead use the tcp_log_event()
485 * function but enclose the call to the log (and all the setup) in a
486 * if (tcp_bblogging_on(tp)) {
487 *   ... setup and logging call ...
488 * }
489 *
490 * Always use the macro tcp_bblogging_on() since sometimes the defintions
491 * do change.
492 *
493 * BBlogging also supports the concept of a BBpoint. The idea behind this
494 * is that when you set a specific BBpoint on and turn the logging into
495 * the BBpoint mode (TCP_LOG_VIA_BBPOINTS) you will be defining very very
496 * few of these points to come out. The point is specific to a code you
497 * want tied to that one BB logging. This allows you to turn on a much broader
498 * scale set of limited logging on more connections without overwhelming the
499 * I/O system with too much BBlogs. This of course means you need to be quite
500 * careful on how many BBlogs go with each point, but you can have multiple points
501 * only one of which is active at a time.
502 *
503 * To define a point you add it above under the define for TCP_BBPOINT_NONE (which
504 * is the default i.e. no point is defined. You then, for your point use the
505 * tcp_bblogging_point_on(struct tcpcb *tp, uint8_t bbpoint) inline to enclose
506 * your call to tcp_log_event.  Do not use one of the TCP_LOGGING macros else
507 * your point will never come out. You specify your defined point in the bbpoint
508 * side of the inline. An example of this you can find in rack where the
509 * TCP_BBPOINT_REQ_LEVEL_LOGGING is used. There a specific set of logs are generated
510 * for each request that tcp is tracking.
511 *
512 * When turning on BB logging use the inline:
513 * tcp_set_bblog_state(struct tcpcb *tp, uint8_t ls, uint8_t bbpoint)
514 * the ls field is the logging state TCP_LOG_STATE_CONTINUAL etc. The
515 * bbpoint field is ignored unless the ls field is set to TCP_LOG_VIA_BBPOINTS.
516 * Currently there is only a socket option that turns on the non-BBPOINT
517 * logging.
518 *
519 * Prototype:
520 * TCP_LOG_EVENT(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
521 *     struct sockbuf *txbuf, uint8_t eventid, int errornum,
522 *     union tcp_log_stackspecific *stackinfo)
523 *
524 * tp is mandatory and must be write locked.
525 * th is optional; if present, it will appear in the record.
526 * rxbuf and txbuf are optional; if present, they will appear in the record.
527 * eventid is mandatory.
528 * errornum is mandatory (it indicates the success or failure of the
529 *     operation associated with the event).
530 * len indicates the length of the packet. If no packet, use 0.
531 * stackinfo is optional; if present, it will appear in the record.
532 */
533struct tcpcb;
534#ifdef TCP_LOG_FORCEVERBOSE
535#define	TCP_LOG_EVENT	TCP_LOG_EVENT_VERBOSE
536#else
537#define	TCP_LOG_EVENT(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder) \
538	do {								\
539		if (tcp_log_verbose)					\
540			TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf,	\
541			    eventid, errornum, len, stackinfo,		\
542			    th_hostorder, NULL);			\
543		else if (tcp_bblogging_on(tp))				\
544			tcp_log_event(tp, th, rxbuf, txbuf, eventid,	\
545			    errornum, len, stackinfo, th_hostorder,	\
546			    NULL, NULL, 0, NULL);			\
547	} while (0)
548#endif /* TCP_LOG_FORCEVERBOSE */
549#define	TCP_LOG_EVENTP(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \
550	do {								\
551		if (tcp_bblogging_on(tp))				\
552			tcp_log_event(tp, th, rxbuf, txbuf, eventid,	\
553			    errornum, len, stackinfo, th_hostorder,	\
554			    NULL, NULL, 0, tv);				\
555	} while (0)
556
557#ifdef TCP_BLACKBOX
558extern bool tcp_log_verbose;
559void tcp_log_drain(struct tcpcb *tp);
560int tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force);
561void tcp_log_dump_tp_bucket_logbufs(struct tcpcb *tp, char *reason);
562struct tcp_log_buffer *tcp_log_event(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
563    struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len,
564    union tcp_log_stackspecific *stackinfo, int th_hostorder,
565    const char *output_caller, const char *func, int line, const struct timeval *tv);
566size_t tcp_log_get_id(struct tcpcb *tp, char *buf);
567size_t tcp_log_get_tag(struct tcpcb *tp, char *buf);
568u_int tcp_log_get_id_cnt(struct tcpcb *tp);
569int tcp_log_getlogbuf(struct sockopt *sopt, struct tcpcb *tp);
570void tcp_log_init(void);
571int tcp_log_set_id(struct tcpcb *tp, char *id);
572int tcp_log_set_tag(struct tcpcb *tp, char *tag);
573int tcp_log_state_change(struct tcpcb *tp, int state);
574void tcp_log_tcpcbinit(struct tcpcb *tp);
575void tcp_log_tcpcbfini(struct tcpcb *tp);
576void tcp_log_flowend(struct tcpcb *tp);
577void tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes,
578    int flags);
579int tcp_log_apply_ratio(struct tcpcb *tp, int ratio);
580#else /* !TCP_BLACKBOX */
581#define tcp_log_verbose	(false)
582
583static inline struct tcp_log_buffer *
584tcp_log_event(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
585    struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len,
586    union tcp_log_stackspecific *stackinfo, int th_hostorder,
587    const char *output_caller, const char *func, int line,
588    const struct timeval *tv)
589{
590
591	return (NULL);
592}
593#endif /* TCP_BLACKBOX */
594
595#endif	/* _KERNEL */
596#endif	/* __tcp_log_buf_h__ */