1/*-
  2 * Copyright (c) 2007-2008
  3 * 	Swinburne University of Technology, Melbourne, Australia.
  4 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
  5 * Copyright (c) 2010 The FreeBSD Foundation
  6 * All rights reserved.
  7 *
  8 * This software was developed at the Centre for Advanced Internet
  9 * Architectures, Swinburne University of Technology, by Lawrence Stewart and
 10 * James Healy, made possible in part by a grant from the Cisco University
 11 * Research Program Fund at Community Foundation Silicon Valley.
 12 *
 13 * Portions of this software were developed at the Centre for Advanced
 14 * Internet Architectures, Swinburne University of Technology, Melbourne,
 15 * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
 16 *
 17 * Redistribution and use in source and binary forms, with or without
 18 * modification, are permitted provided that the following conditions
 19 * are met:
 20 * 1. Redistributions of source code must retain the above copyright
 21 *    notice, this list of conditions and the following disclaimer.
 22 * 2. Redistributions in binary form must reproduce the above copyright
 23 *    notice, this list of conditions and the following disclaimer in the
 24 *    documentation and/or other materials provided with the distribution.
 25 *
 26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 36 * SUCH DAMAGE.
 37 */
 38
 39/*
 40 * This software was first released in 2007 by James Healy and Lawrence Stewart
 41 * whilst working on the NewTCP research project at Swinburne University of
 42 * Technology's Centre for Advanced Internet Architectures, Melbourne,
 43 * Australia, which was made possible in part by a grant from the Cisco
 44 * University Research Program Fund at Community Foundation Silicon Valley.
 45 * More details are available at:
 46 *   http://caia.swin.edu.au/urp/newtcp/
 47 */
 48
 49#ifndef _NETINET_CC_CC_H_
 50#define _NETINET_CC_CC_H_
 51
 52#ifdef _KERNEL
 53
 54MALLOC_DECLARE(M_CC_MEM);
 55
 56/* Global CC vars. */
 57extern STAILQ_HEAD(cc_head, cc_algo) cc_list;
 58extern const int tcprexmtthresh;
 59
 60/* Per-netstack bits. */
 61VNET_DECLARE(struct cc_algo *, default_cc_ptr);
 62#define	V_default_cc_ptr VNET(default_cc_ptr)
 63
 64VNET_DECLARE(int, cc_do_abe);
 65#define	V_cc_do_abe			VNET(cc_do_abe)
 66
 67VNET_DECLARE(int, cc_abe_frlossreduce);
 68#define	V_cc_abe_frlossreduce		VNET(cc_abe_frlossreduce)
 69
 70/* Define the new net.inet.tcp.cc sysctl tree. */
 71#ifdef _SYS_SYSCTL_H_
 72SYSCTL_DECL(_net_inet_tcp_cc);
 73#endif
 74
 75/* For CC modules that use hystart++ */
 76extern uint32_t hystart_lowcwnd;
 77extern uint32_t hystart_minrtt_thresh;
 78extern uint32_t hystart_maxrtt_thresh;
 79extern uint32_t hystart_n_rttsamples;
 80extern uint32_t hystart_css_growth_div;
 81extern uint32_t hystart_css_rounds;
 82extern uint32_t hystart_bblogs;
 83
 84/* CC housekeeping functions. */
 85int	cc_register_algo(struct cc_algo *add_cc);
 86int	cc_deregister_algo(struct cc_algo *remove_cc);
 87#endif /* _KERNEL */
 88
 89#if defined(_KERNEL) || defined(_WANT_TCPCB)
 90/*
 91 * Wrapper around transport structs that contain same-named congestion
 92 * control variables. Allows algos to be shared amongst multiple CC aware
 93 * transprots.
 94 */
 95struct cc_var {
 96	void		*cc_data; /* Per-connection private CC algorithm data. */
 97	int		bytes_this_ack; /* # bytes acked by the current ACK. */
 98	tcp_seq		curack; /* Most recent ACK. */
 99	uint32_t	flags; /* Flags for cc_var (see below) */
100	int		type; /* Indicates which ptr is valid in ccvc. */
101	union ccv_container {
102		struct tcpcb		*tcp;
103		struct sctp_nets	*sctp;
104	} ccvc;
105	uint16_t	nsegs; /* # segments coalesced into current chain. */
106	uint8_t		labc;  /* Dont use system abc use passed in */
107};
108
109/* cc_var flags. */
110#define	CCF_ABC_SENTAWND	0x0001	/* ABC counted cwnd worth of bytes? */
111#define	CCF_CWND_LIMITED	0x0002	/* Are we currently cwnd limited? */
112#define	CCF_USE_LOCAL_ABC       0x0004  /* Dont use the system l_abc val */
113#define	CCF_ACKNOW		0x0008	/* Will this ack be sent now? */
114#define	CCF_IPHDR_CE		0x0010	/* Does this packet set CE bit? */
115#define	CCF_TCPHDR_CWR		0x0020	/* Does this packet set CWR bit? */
116#define	CCF_MAX_CWND		0x0040	/* Have we reached maximum cwnd? */
117#define	CCF_CHG_MAX_CWND	0x0080	/* CUBIC max_cwnd changed, for K */
118#define	CCF_USR_IWND		0x0100	/* User specified initial window */
119#define	CCF_USR_IWND_INIT_NSEG	0x0200	/* Convert segs to bytes on conn init */
120#define CCF_HYSTART_ALLOWED	0x0400	/* If the CC supports it Hystart is allowed */
121#define CCF_HYSTART_CAN_SH_CWND	0x0800  /* Can hystart when going CSS -> CA slam the cwnd */
122#define CCF_HYSTART_CONS_SSTH	0x1000	/* Should hystart use the more conservative ssthresh */
123
124/* ACK types passed to the ack_received() hook. */
125#define	CC_ACK		0x0001	/* Regular in sequence ACK. */
126#define	CC_DUPACK	0x0002	/* Duplicate ACK. */
127#define	CC_PARTIALACK	0x0004	/* Not yet. */
128#define	CC_SACK		0x0008	/* Not yet. */
129#endif /* defined(_KERNEL) || defined(_WANT_TCPCB) */
130
131/*
132 * Congestion signal types passed to the cong_signal() hook. The highest order 8
133 * bits (0x01000000 - 0x80000000) are reserved for CC algos to declare their own
134 * congestion signal types.
135 */
136#define	CC_ECN		0x00000001	/* ECN marked packet received. */
137#define	CC_RTO		0x00000002	/* RTO fired. */
138#define	CC_RTO_ERR	0x00000004	/* RTO fired in error. */
139#define	CC_NDUPACK	0x00000008	/* Threshold of dupack's reached. */
140
141#define	CC_SIGPRIVMASK	0xFF000000	/* Mask to check if sig is private. */
142
143#ifdef _KERNEL
144/*
145 * Structure to hold data and function pointers that together represent a
146 * congestion control algorithm.
147 */
148struct cc_algo {
149	char	name[TCP_CA_NAME_MAX];
150
151	/* Init global module state on kldload. */
152	int	(*mod_init)(void);
153
154	/* Cleanup global module state on kldunload. */
155	int	(*mod_destroy)(void);
156
157	/* Return the size of the void pointer the CC needs for state */
158	size_t  (*cc_data_sz)(void);
159
160	/*
161	 * Init CC state for a new control block. The CC
162	 * module may be passed a NULL ptr indicating that
163	 * it must allocate the memory. If it is passed a
164	 * non-null pointer it is pre-allocated memory by
165	 * the caller and the cb_init is expected to use that memory.
166	 * It is not expected to fail if memory is passed in and
167	 * all currently defined modules do not.
168	 */
169	int	(*cb_init)(struct cc_var *ccv, void *ptr);
170
171	/* Cleanup CC state for a terminating control block. */
172	void	(*cb_destroy)(struct cc_var *ccv);
173
174	/* Init variables for a newly established connection. */
175	void	(*conn_init)(struct cc_var *ccv);
176
177	/* Called on receipt of an ack. */
178	void	(*ack_received)(struct cc_var *ccv, uint16_t type);
179
180	/* Called on detection of a congestion signal. */
181	void	(*cong_signal)(struct cc_var *ccv, uint32_t type);
182
183	/* Called after exiting congestion recovery. */
184	void	(*post_recovery)(struct cc_var *ccv);
185
186	/* Called when data transfer resumes after an idle period. */
187	void	(*after_idle)(struct cc_var *ccv);
188
189	/* Called for an additional ECN processing apart from RFC3168. */
190	void	(*ecnpkt_handler)(struct cc_var *ccv);
191
192	/* Called when a new "round" begins, if the transport is tracking rounds.  */
193	void	(*newround)(struct cc_var *ccv, uint32_t round_cnt);
194
195	/*
196	 *  Called when a RTT sample is made (fas = flight at send, if you dont have it
197	 *  send the cwnd in).
198	 */
199	void	(*rttsample)(struct cc_var *ccv, uint32_t usec_rtt, uint32_t rxtcnt, uint32_t fas);
200
201	/* Called for {get|set}sockopt() on a TCP socket with TCP_CCALGOOPT. */
202	int     (*ctl_output)(struct cc_var *, struct sockopt *, void *);
203
204	STAILQ_ENTRY (cc_algo) entries;
205	u_int	cc_refcount;
206	uint8_t flags;
207};
208
209#define CC_MODULE_BEING_REMOVED		0x01	/* The module is being removed */
210
211/* Macro to obtain the CC algo's struct ptr. */
212#define	CC_ALGO(tp)	((tp)->t_cc)
213
214/* Macro to obtain the CC algo's data ptr. */
215#define	CC_DATA(tp)	((tp)->t_ccv.cc_data)
216
217/* Macro to obtain the system default CC algo's struct ptr. */
218#define	CC_DEFAULT_ALGO()	V_default_cc_ptr
219
220extern struct rwlock cc_list_lock;
221#define	CC_LIST_LOCK_INIT()	rw_init(&cc_list_lock, "cc_list")
222#define	CC_LIST_LOCK_DESTROY()	rw_destroy(&cc_list_lock)
223#define	CC_LIST_RLOCK()		rw_rlock(&cc_list_lock)
224#define	CC_LIST_RUNLOCK()	rw_runlock(&cc_list_lock)
225#define	CC_LIST_WLOCK()		rw_wlock(&cc_list_lock)
226#define	CC_LIST_WUNLOCK()	rw_wunlock(&cc_list_lock)
227#define	CC_LIST_LOCK_ASSERT()	rw_assert(&cc_list_lock, RA_LOCKED)
228
229#define CC_ALGOOPT_LIMIT	2048
230
231/*
232 * These routines give NewReno behavior to the caller
233 * they require no state and can be used by any other CC
234 * module that wishes to use NewReno type behaviour (along
235 * with anything else they may add on, pre or post call).
236 */
237void newreno_cc_post_recovery(struct cc_var *);
238void newreno_cc_after_idle(struct cc_var *);
239void newreno_cc_cong_signal(struct cc_var *, uint32_t );
240void newreno_cc_ack_received(struct cc_var *, uint16_t);
241
242/* Called to temporarily keep an algo from going away during change */
243void cc_refer(struct cc_algo *algo);
244/* Called to release the temporary hold */
245void cc_release(struct cc_algo *algo);
246
247/* Called to attach a CC algorithm to a tcpcb */
248void cc_attach(struct tcpcb *, struct cc_algo *);
249/* Called to detach a CC algorithm from a tcpcb */
250void cc_detach(struct tcpcb *);
251
252#endif /* _KERNEL */
253#endif /* _NETINET_CC_CC_H_ */