xref: /openbsd-src/sys/netinet/tcp_subr.c (revision 33b792a3c1c87b47219fdf9a73548c4003214de3)
1 /*	$OpenBSD: tcp_subr.c,v 1.57 2002/01/24 22:42:49 provos Exp $	*/
2 /*	$NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  * 	This product includes software developed by the University of
50  * 	California, Berkeley and its contributors.
51  * 	This product includes software developed at the Information
52  * 	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/mbuf.h>
79 #include <sys/socket.h>
80 #include <sys/socketvar.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 
84 #include <net/route.h>
85 #include <net/if.h>
86 
87 #include <netinet/in.h>
88 #include <netinet/in_systm.h>
89 #include <netinet/ip.h>
90 #include <netinet/in_pcb.h>
91 #include <netinet/ip_var.h>
92 #include <netinet/ip_icmp.h>
93 #include <netinet/tcp.h>
94 #include <netinet/tcp_fsm.h>
95 #include <netinet/tcp_seq.h>
96 #include <netinet/tcp_timer.h>
97 #include <netinet/tcp_var.h>
98 #include <netinet/tcpip.h>
99 #include <dev/rndvar.h>
100 
101 #ifdef INET6
102 #include <netinet6/in6_var.h>
103 #include <netinet6/ip6protosw.h>
104 #endif /* INET6 */
105 
106 #ifdef TCP_SIGNATURE
107 #include <sys/md5k.h>
108 #endif /* TCP_SIGNATURE */
109 
110 /* patchable/settable parameters for tcp */
111 int	tcp_mssdflt = TCP_MSS;
112 int	tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
113 
114 /*
115  * Configure kernel with options "TCP_DO_RFC1323=0" to disable RFC1323 stuff.
116  * This is a good idea over slow SLIP/PPP links, because the timestamp
117  * pretty well destroys the VJ compression (any packet with a timestamp
118  * different from the previous one can't be compressed), as well as adding
119  * more overhead.
120  * XXX And it should be a settable per route characteristic (with this just
121  * used as the default).
122  */
123 #ifndef TCP_DO_RFC1323
124 #define TCP_DO_RFC1323	1
125 #endif
126 int    tcp_do_rfc1323 = TCP_DO_RFC1323;
127 
128 #ifndef TCP_DO_SACK
129 #ifdef TCP_SACK
130 #define TCP_DO_SACK	1
131 #else
132 #define TCP_DO_SACK	0
133 #endif
134 #endif
135 int    tcp_do_sack = TCP_DO_SACK;		/* RFC 2018 selective ACKs */
136 
137 #ifndef TCBHASHSIZE
138 #define	TCBHASHSIZE	128
139 #endif
140 int	tcbhashsize = TCBHASHSIZE;
141 
142 #ifdef INET6
143 extern int ip6_defhlim;
144 #endif /* INET6 */
145 
146 struct pool tcpcb_pool;
147 #ifdef TCP_SACK
148 struct pool sackhl_pool;
149 #endif
150 
151 int	tcp_freeq __P((struct tcpcb *));
152 
153 struct tcpstat tcpstat;		/* tcp statistics */
154 
155 /*
156  * Tcp initialization
157  */
158 void
159 tcp_init()
160 {
161 #ifdef TCP_COMPAT_42
162 	tcp_iss = 1;		/* wrong */
163 #endif /* TCP_COMPAT_42 */
164 	pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcbpl",
165 	    NULL);
166 #ifdef TCP_SACK
167 	pool_init(&sackhl_pool, sizeof(struct sackhole), 0, 0, 0, "sackhlpl",
168 	    NULL);
169 #endif /* TCP_SACK */
170 	in_pcbinit(&tcbtable, tcbhashsize);
171 	tcp_now = arc4random() / 2;
172 
173 #ifdef INET6
174 	/*
175 	 * Since sizeof(struct ip6_hdr) > sizeof(struct ip), we
176 	 * do max length checks/computations only on the former.
177 	 */
178 	if (max_protohdr < (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)))
179 		max_protohdr = (sizeof(struct ip6_hdr) + sizeof(struct tcphdr));
180 	if ((max_linkhdr + sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) >
181 	    MHLEN)
182 		panic("tcp_init");
183 
184 	icmp6_mtudisc_callback_register(tcp6_mtudisc_callback);
185 #endif /* INET6 */
186 }
187 
188 /*
189  * Create template to be used to send tcp packets on a connection.
190  * Call after host entry created, allocates an mbuf and fills
191  * in a skeletal tcp/ip header, minimizing the amount of work
192  * necessary when the connection is used.
193  *
194  * To support IPv6 in addition to IPv4 and considering that the sizes of
195  * the IPv4 and IPv6 headers are not the same, we now use a separate pointer
196  * for the TCP header.  Also, we made the former tcpiphdr header pointer
197  * into just an IP overlay pointer, with casting as appropriate for v6. rja
198  */
199 struct mbuf *
200 tcp_template(tp)
201 	struct tcpcb *tp;
202 {
203 	register struct inpcb *inp = tp->t_inpcb;
204 	register struct mbuf *m;
205 	register struct tcphdr *th;
206 
207 	if ((m = tp->t_template) == 0) {
208 		m = m_get(M_DONTWAIT, MT_HEADER);
209 		if (m == NULL)
210 			return (0);
211 
212 		switch (tp->pf) {
213 		case 0:	/*default to PF_INET*/
214 #ifdef INET
215 		case AF_INET:
216 			m->m_len = sizeof(struct ip);
217 			break;
218 #endif /* INET */
219 #ifdef INET6
220 		case AF_INET6:
221 			m->m_len = sizeof(struct ip6_hdr);
222 			break;
223 #endif /* INET6 */
224 		}
225 		m->m_len += sizeof (struct tcphdr);
226 
227 		/*
228 		 * The link header, network header, TCP header, and TCP options
229 		 * all must fit in this mbuf. For now, assume the worst case of
230 		 * TCP options size. Eventually, compute this from tp flags.
231 		 */
232 		if (m->m_len + MAX_TCPOPTLEN + max_linkhdr >= MHLEN) {
233 			MCLGET(m, M_DONTWAIT);
234 			if ((m->m_flags & M_EXT) == 0) {
235 				m_free(m);
236 				return (0);
237 			}
238 		}
239 	}
240 
241 	switch(tp->pf) {
242 #ifdef INET
243 	case AF_INET:
244 		{
245 			struct ipovly *ipovly;
246 
247 			ipovly = mtod(m, struct ipovly *);
248 
249 			bzero(ipovly->ih_x1, sizeof ipovly->ih_x1);
250 			ipovly->ih_pr = IPPROTO_TCP;
251 			ipovly->ih_len = htons(sizeof (struct tcphdr));
252 			ipovly->ih_src = inp->inp_laddr;
253 			ipovly->ih_dst = inp->inp_faddr;
254 
255 			th = (struct tcphdr *)(mtod(m, caddr_t) +
256 				sizeof(struct ip));
257 			th->th_sum = in_cksum_phdr(ipovly->ih_src.s_addr,
258 			    ipovly->ih_dst.s_addr,
259 			    htons(sizeof (struct tcphdr) + IPPROTO_TCP));
260 		}
261 		break;
262 #endif /* INET */
263 #ifdef INET6
264 	case AF_INET6:
265 		{
266 			struct ip6_hdr *ipv6;
267 
268 			ipv6 = mtod(m, struct ip6_hdr *);
269 
270 			ipv6->ip6_src = inp->inp_laddr6;
271 			ipv6->ip6_dst = inp->inp_faddr6;
272 			ipv6->ip6_flow = htonl(0x60000000) |
273 			    (inp->inp_ipv6.ip6_flow & htonl(0x0fffffff));
274 
275 			ipv6->ip6_nxt = IPPROTO_TCP;
276 			ipv6->ip6_plen = htons(sizeof(struct tcphdr)); /*XXX*/
277 			ipv6->ip6_hlim = in6_selecthlim(inp, NULL);	/*XXX*/
278 
279 			th = (struct tcphdr *)(mtod(m, caddr_t) +
280 				sizeof(struct ip6_hdr));
281 			th->th_sum = 0;
282 		}
283 		break;
284 #endif /* INET6 */
285 	}
286 
287 	th->th_sport = inp->inp_lport;
288 	th->th_dport = inp->inp_fport;
289 	th->th_seq = 0;
290 	th->th_ack = 0;
291 	th->th_x2  = 0;
292 	th->th_off = 5;
293 	th->th_flags = 0;
294 	th->th_win = 0;
295 	th->th_urp = 0;
296 	return (m);
297 }
298 
299 /*
300  * Send a single message to the TCP at address specified by
301  * the given TCP/IP header.  If m == 0, then we make a copy
302  * of the tcpiphdr at ti and send directly to the addressed host.
303  * This is used to force keep alive messages out using the TCP
304  * template for a connection tp->t_template.  If flags are given
305  * then we send a message back to the TCP which originated the
306  * segment ti, and discard the mbuf containing it and any other
307  * attached mbufs.
308  *
309  * In any case the ack and sequence number of the transmitted
310  * segment are as specified by the parameters.
311  */
312 #ifdef INET6
313 /* This function looks hairy, because it was so IPv4-dependent. */
314 #endif /* INET6 */
315 void
316 tcp_respond(tp, template, m, ack, seq, flags)
317 	struct tcpcb *tp;
318 	caddr_t template;
319 	register struct mbuf *m;
320 	tcp_seq ack, seq;
321 	int flags;
322 {
323 	register int tlen;
324 	int win = 0;
325 	struct route *ro = 0;
326 	register struct tcphdr *th;
327 	register struct tcpiphdr *ti = (struct tcpiphdr *)template;
328 #ifdef INET6
329 	int is_ipv6 = 0;   /* true iff IPv6 */
330 #endif /* INET6 */
331 
332 	if (tp) {
333 		win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
334 #ifdef INET6
335 		/*
336 		 * If this is called with an unconnected
337 		 * socket/tp/pcb (tp->pf is 0), we lose.
338 		 */
339 		is_ipv6 = (tp->pf == PF_INET6);
340 
341 		/*
342 		 * The route/route6 distinction is meaningless
343 		 * unless you're allocating space or passing parameters.
344 		 */
345 #endif /* INET6 */
346 		ro = &tp->t_inpcb->inp_route;
347 	}
348 #ifdef INET6
349 	else
350 		is_ipv6 = (((struct ip *)ti)->ip_v == 6);
351 #endif /* INET6 */
352 	if (m == 0) {
353 		m = m_gethdr(M_DONTWAIT, MT_HEADER);
354 		if (m == NULL)
355 			return;
356 #ifdef TCP_COMPAT_42
357 		tlen = 1;
358 #else
359 		tlen = 0;
360 #endif
361 		m->m_data += max_linkhdr;
362 #ifdef INET6
363 		if (is_ipv6)
364 			bcopy(ti, mtod(m, caddr_t), sizeof(struct tcphdr) +
365 			    sizeof(struct ip6_hdr));
366 		else
367 #endif /* INET6 */
368 			bcopy(ti, mtod(m, caddr_t), sizeof(struct tcphdr) +
369 			    sizeof(struct ip));
370 
371 		ti = mtod(m, struct tcpiphdr *);
372 		flags = TH_ACK;
373 	} else {
374 		m_freem(m->m_next);
375 		m->m_next = 0;
376 		m->m_data = (caddr_t)ti;
377 		tlen = 0;
378 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
379 #ifdef INET6
380 		if (is_ipv6) {
381 			m->m_len = sizeof(struct tcphdr) + sizeof(struct ip6_hdr);
382 			xchg(((struct ip6_hdr *)ti)->ip6_dst,\
383 			    ((struct ip6_hdr *)ti)->ip6_src,\
384 			    struct in6_addr);
385 			th = (void *)((caddr_t)ti + sizeof(struct ip6_hdr));
386 		} else
387 #endif /* INET6 */
388 		{
389 			m->m_len = sizeof (struct tcpiphdr);
390 			xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_int32_t);
391 			th = (void *)((caddr_t)ti + sizeof(struct ip));
392 		}
393 		xchg(th->th_dport, th->th_sport, u_int16_t);
394 #undef xchg
395 	}
396 #ifdef INET6
397 	if (is_ipv6) {
398 		tlen += sizeof(struct tcphdr) + sizeof(struct ip6_hdr);
399 		th = (struct tcphdr *)((caddr_t)ti + sizeof(struct ip6_hdr));
400 	} else
401 #endif /* INET6 */
402 	{
403 		ti->ti_len = htons((u_int16_t)(sizeof (struct tcphdr) + tlen));
404 		tlen += sizeof (struct tcpiphdr);
405 		th = (struct tcphdr *)((caddr_t)ti + sizeof(struct ip));
406 	}
407 
408 	m->m_len = tlen;
409 	m->m_pkthdr.len = tlen;
410 	m->m_pkthdr.rcvif = (struct ifnet *) 0;
411 	th->th_seq = htonl(seq);
412 	th->th_ack = htonl(ack);
413 	th->th_x2 = 0;
414 	th->th_off = sizeof (struct tcphdr) >> 2;
415 	th->th_flags = flags;
416 	if (tp)
417 		win >>= tp->rcv_scale;
418 	if (win > TCP_MAXWIN)
419 		win = TCP_MAXWIN;
420 	th->th_win = htons((u_int16_t)win);
421 	th->th_urp = 0;
422 
423 #ifdef INET6
424 	if (is_ipv6) {
425 		((struct ip6_hdr *)ti)->ip6_flow   = htonl(0x60000000);
426 		((struct ip6_hdr *)ti)->ip6_nxt  = IPPROTO_TCP;
427 		((struct ip6_hdr *)ti)->ip6_hlim =
428 			in6_selecthlim(tp ? tp->t_inpcb : NULL, NULL);	/*XXX*/
429 		((struct ip6_hdr *)ti)->ip6_plen = tlen - sizeof(struct ip6_hdr);
430 		th->th_sum = 0;
431 		th->th_sum = in6_cksum(m, IPPROTO_TCP,
432 		   sizeof(struct ip6_hdr), ((struct ip6_hdr *)ti)->ip6_plen);
433 		HTONS(((struct ip6_hdr *)ti)->ip6_plen);
434 		ip6_output(m, tp ? tp->t_inpcb->inp_outputopts6 : NULL,
435 			(struct route_in6 *)ro, 0, NULL, NULL);
436 	} else
437 #endif /* INET6 */
438 	{
439 		bzero(ti->ti_x1, sizeof ti->ti_x1);
440 		ti->ti_len = htons((u_short)tlen - sizeof(struct ip));
441 
442 		/*
443 		 * There's no point deferring to hardware checksum processing
444 		 * here, as we only send a minimal TCP packet whose checksum
445 		 * we need to compute in any case.
446 		 */
447 		th->th_sum = 0;
448 		th->th_sum = in_cksum(m, tlen);
449 		((struct ip *)ti)->ip_len = tlen;
450 		((struct ip *)ti)->ip_ttl = ip_defttl;
451 		ip_output(m, NULL, ro, ip_mtudisc ? IP_MTUDISC : 0, NULL,
452 			  tp ? tp->t_inpcb : NULL);
453 	}
454 }
455 
456 /*
457  * Create a new TCP control block, making an
458  * empty reassembly queue and hooking it to the argument
459  * protocol control block.
460  */
461 struct tcpcb *
462 tcp_newtcpcb(struct inpcb *inp)
463 {
464 	struct tcpcb *tp;
465 	int i;
466 
467 	tp = pool_get(&tcpcb_pool, PR_NOWAIT);
468 	if (tp == NULL)
469 		return ((struct tcpcb *)0);
470 	bzero((char *) tp, sizeof(struct tcpcb));
471 	LIST_INIT(&tp->segq);
472 	tp->t_maxseg = tcp_mssdflt;
473 	tp->t_maxopd = 0;
474 
475 	for (i = 0; i < TCPT_NTIMERS; i++)
476 		TCP_TIMER_INIT(tp, i);
477 
478 #ifdef TCP_SACK
479 	tp->sack_disable = tcp_do_sack ? 0 : 1;
480 #endif
481 	tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
482 	tp->t_inpcb = inp;
483 	/*
484 	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
485 	 * rtt estimate.  Set rttvar so that srtt + 2 * rttvar gives
486 	 * reasonable initial retransmit time.
487 	 */
488 	tp->t_srtt = TCPTV_SRTTBASE;
489 	tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << (TCP_RTTVAR_SHIFT + 2 - 1);
490 	tp->t_rttmin = TCPTV_MIN;
491 	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
492 	    TCPTV_MIN, TCPTV_REXMTMAX);
493 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
494 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
495 #ifdef INET6
496 	/* we disallow IPv4 mapped address completely. */
497 	if ((inp->inp_flags & INP_IPV6) == 0)
498 		tp->pf = PF_INET;
499 	else
500 		tp->pf = PF_INET6;
501 #else
502 	tp->pf = PF_INET;
503 #endif
504 
505 #ifdef INET6
506 	if (inp->inp_flags & INP_IPV6)
507 		inp->inp_ipv6.ip6_hlim = ip6_defhlim;
508 	else
509 #endif /* INET6 */
510 		inp->inp_ip.ip_ttl = ip_defttl;
511 
512 	inp->inp_ppcb = (caddr_t)tp;
513 	return (tp);
514 }
515 
516 /*
517  * Drop a TCP connection, reporting
518  * the specified error.  If connection is synchronized,
519  * then send a RST to peer.
520  */
521 struct tcpcb *
522 tcp_drop(tp, errno)
523 	register struct tcpcb *tp;
524 	int errno;
525 {
526 	struct socket *so = tp->t_inpcb->inp_socket;
527 
528 	if (TCPS_HAVERCVDSYN(tp->t_state)) {
529 		tp->t_state = TCPS_CLOSED;
530 		(void) tcp_output(tp);
531 		tcpstat.tcps_drops++;
532 	} else
533 		tcpstat.tcps_conndrops++;
534 	if (errno == ETIMEDOUT && tp->t_softerror)
535 		errno = tp->t_softerror;
536 	so->so_error = errno;
537 	return (tcp_close(tp));
538 }
539 
540 /*
541  * Close a TCP control block:
542  *	discard all space held by the tcp
543  *	discard internet protocol block
544  *	wake up any sleepers
545  */
546 struct tcpcb *
547 tcp_close(struct tcpcb *tp)
548 {
549 	struct inpcb *inp = tp->t_inpcb;
550 	struct socket *so = inp->inp_socket;
551 #ifdef TCP_SACK
552 	struct sackhole *p, *q;
553 #endif
554 #ifdef RTV_RTT
555 	register struct rtentry *rt;
556 #ifdef INET6
557 	register int bound_to_specific = 0;  /* I.e. non-default */
558 
559 	/*
560 	 * This code checks the nature of the route for this connection.
561 	 * Normally this is done by two simple checks in the next
562 	 * INET/INET6 ifdef block, but because of two possible lower layers,
563 	 * that check is done here.
564 	 *
565 	 * Perhaps should be doing this only for a RTF_HOST route.
566 	 */
567 	rt = inp->inp_route.ro_rt;  /* Same for route or route6. */
568 	if (tp->pf == PF_INET6) {
569 		if (rt)
570 			bound_to_specific =
571 			    !(IN6_IS_ADDR_UNSPECIFIED(&
572 			    ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr));
573 	} else {
574 		if (rt)
575 			bound_to_specific =
576 			    (((struct sockaddr_in *)rt_key(rt))->
577 			    sin_addr.s_addr != INADDR_ANY);
578 	}
579 #endif /* INET6 */
580 
581 	/*
582 	 * If we sent enough data to get some meaningful characteristics,
583 	 * save them in the routing entry.  'Enough' is arbitrarily
584 	 * defined as the sendpipesize (default 4K) * 16.  This would
585 	 * give us 16 rtt samples assuming we only get one sample per
586 	 * window (the usual case on a long haul net).  16 samples is
587 	 * enough for the srtt filter to converge to within 5% of the correct
588 	 * value; fewer samples and we could save a very bogus rtt.
589 	 *
590 	 * Don't update the default route's characteristics and don't
591 	 * update anything that the user "locked".
592 	 */
593 #ifdef INET6
594 	/*
595 	 * Note that rt and bound_to_specific are set above.
596 	 */
597 	if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
598 	    rt && bound_to_specific) {
599 #else /* INET6 */
600 	if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
601 	    (rt = inp->inp_route.ro_rt) &&
602 	    satosin(rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
603 #endif /* INET6 */
604 		register u_long i = 0;
605 
606 		if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
607 			i = tp->t_srtt *
608 			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
609 			if (rt->rt_rmx.rmx_rtt && i)
610 				/*
611 				 * filter this update to half the old & half
612 				 * the new values, converting scale.
613 				 * See route.h and tcp_var.h for a
614 				 * description of the scaling constants.
615 				 */
616 				rt->rt_rmx.rmx_rtt =
617 				    (rt->rt_rmx.rmx_rtt + i) / 2;
618 			else
619 				rt->rt_rmx.rmx_rtt = i;
620 		}
621 		if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
622 			i = tp->t_rttvar *
623 			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
624 			if (rt->rt_rmx.rmx_rttvar && i)
625 				rt->rt_rmx.rmx_rttvar =
626 				    (rt->rt_rmx.rmx_rttvar + i) / 2;
627 			else
628 				rt->rt_rmx.rmx_rttvar = i;
629 		}
630 		/*
631 		 * update the pipelimit (ssthresh) if it has been updated
632 		 * already or if a pipesize was specified & the threshhold
633 		 * got below half the pipesize.  I.e., wait for bad news
634 		 * before we start updating, then update on both good
635 		 * and bad news.
636 		 */
637 		if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
638 		    (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh) ||
639 		    i < (rt->rt_rmx.rmx_sendpipe / 2)) {
640 			/*
641 			 * convert the limit from user data bytes to
642 			 * packets then to packet data bytes.
643 			 */
644 			i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
645 			if (i < 2)
646 				i = 2;
647 #ifdef INET6
648 			if (tp->pf == PF_INET6)
649 				i *= (u_long)(tp->t_maxseg + sizeof (struct tcphdr)
650 				    + sizeof(struct ip6_hdr));
651 			else
652 #endif /* INET6 */
653 				i *= (u_long)(tp->t_maxseg +
654 				    sizeof (struct tcpiphdr));
655 
656 			if (rt->rt_rmx.rmx_ssthresh)
657 				rt->rt_rmx.rmx_ssthresh =
658 				    (rt->rt_rmx.rmx_ssthresh + i) / 2;
659 			else
660 				rt->rt_rmx.rmx_ssthresh = i;
661 		}
662 	}
663 #endif /* RTV_RTT */
664 
665 	/* free the reassembly queue, if any */
666 	tcp_freeq(tp);
667 
668 #ifdef TCP_SACK
669 	/* Free SACK holes. */
670 	q = p = tp->snd_holes;
671 	while (p != 0) {
672 		q = p->next;
673 		pool_put(&sackhl_pool, p);
674 		p = q;
675 	}
676 #endif
677 	if (tp->t_template)
678 		(void) m_free(tp->t_template);
679 	pool_put(&tcpcb_pool, tp);
680 	inp->inp_ppcb = 0;
681 	soisdisconnected(so);
682 	in_pcbdetach(inp);
683 	tcpstat.tcps_closed++;
684 	return ((struct tcpcb *)0);
685 }
686 
687 int
688 tcp_freeq(struct tcpcb *tp)
689 {
690 	struct ipqent *qe;
691 	int rv = 0;
692 
693 	while ((qe = LIST_FIRST(&tp->segq)) != NULL) {
694 		LIST_REMOVE(qe, ipqe_q);
695 		m_freem(qe->ipqe_m);
696 		pool_put(&ipqent_pool, qe);
697 		rv = 1;
698 	}
699 	return (rv);
700 }
701 
702 void
703 tcp_drain()
704 {
705 
706 }
707 
708 /*
709  * Compute proper scaling value for receiver window from buffer space
710  */
711 
712 void
713 tcp_rscale(struct tcpcb *tp, u_long hiwat)
714 {
715 	tp->request_r_scale = 0;
716 	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
717 	       TCP_MAXWIN << tp->request_r_scale < hiwat)
718 		tp->request_r_scale++;
719 }
720 
721 /*
722  * Notify a tcp user of an asynchronous error;
723  * store error as soft error, but wake up user
724  * (for now, won't do anything until can select for soft error).
725  */
726 void
727 tcp_notify(inp, error)
728 	struct inpcb *inp;
729 	int error;
730 {
731 	register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
732 	register struct socket *so = inp->inp_socket;
733 
734 	/*
735 	 * Ignore some errors if we are hooked up.
736 	 * If connection hasn't completed, has retransmitted several times,
737 	 * and receives a second error, give up now.  This is better
738 	 * than waiting a long time to establish a connection that
739 	 * can never complete.
740 	 */
741 	if (tp->t_state == TCPS_ESTABLISHED &&
742 	     (error == EHOSTUNREACH || error == ENETUNREACH ||
743 	      error == EHOSTDOWN)) {
744 		return;
745 	} else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 &&
746 	    tp->t_rxtshift > 3 && tp->t_softerror)
747 		so->so_error = error;
748 	else
749 		tp->t_softerror = error;
750 	wakeup((caddr_t) &so->so_timeo);
751 	sorwakeup(so);
752 	sowwakeup(so);
753 }
754 
755 #ifdef INET6
756 void
757 tcp6_ctlinput(cmd, sa, d)
758 	int cmd;
759 	struct sockaddr *sa;
760 	void *d;
761 {
762 	struct tcphdr th;
763 	void (*notify) __P((struct inpcb *, int)) = tcp_notify;
764 	struct ip6_hdr *ip6;
765 	const struct sockaddr_in6 *sa6_src = NULL;
766 	struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
767 	struct mbuf *m;
768 	int off;
769 	struct {
770 		u_int16_t th_sport;
771 		u_int16_t th_dport;
772 	} *thp;
773 
774 	if (sa->sa_family != AF_INET6 ||
775 	    sa->sa_len != sizeof(struct sockaddr_in6))
776 		return;
777 	if ((unsigned)cmd >= PRC_NCMDS)
778 		return;
779 	else if (cmd == PRC_QUENCH) {
780 		/* XXX there's no PRC_QUENCH in IPv6 */
781 		notify = tcp_quench;
782 	} else if (PRC_IS_REDIRECT(cmd))
783 		notify = in_rtchange, d = NULL;
784 	else if (cmd == PRC_MSGSIZE)
785 		; /* special code is present, see below */
786 	else if (cmd == PRC_HOSTDEAD)
787 		d = NULL;
788 	else if (inet6ctlerrmap[cmd] == 0)
789 		return;
790 
791 	/* if the parameter is from icmp6, decode it. */
792 	if (d != NULL) {
793 		struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
794 		m = ip6cp->ip6c_m;
795 		ip6 = ip6cp->ip6c_ip6;
796 		off = ip6cp->ip6c_off;
797 		sa6_src = ip6cp->ip6c_src;
798 	} else {
799 		m = NULL;
800 		ip6 = NULL;
801 		sa6_src = &sa6_any;
802 	}
803 
804 	if (ip6) {
805 		/*
806 		 * XXX: We assume that when ip6 is non NULL,
807 		 * M and OFF are valid.
808 		 */
809 
810 		/* check if we can safely examine src and dst ports */
811 		if (m->m_pkthdr.len < off + sizeof(*thp))
812 			return;
813 
814 		bzero(&th, sizeof(th));
815 #ifdef DIAGNOSTIC
816 		if (sizeof(*thp) > sizeof(th))
817 			panic("assumption failed in tcp6_ctlinput");
818 #endif
819 		m_copydata(m, off, sizeof(*thp), (caddr_t)&th);
820 
821 		if (cmd == PRC_MSGSIZE) {
822 			int valid = 0;
823 
824 			/*
825 			 * Check to see if we have a valid TCP connection
826 			 * corresponding to the address in the ICMPv6 message
827 			 * payload.
828 			 */
829 			if (in6_pcbhashlookup(&tcbtable, &sa6->sin6_addr,
830 			    th.th_dport, (struct in6_addr *)&sa6_src->sin6_addr,
831 			    th.th_sport))
832 				valid++;
833 			else if (in_pcblookup(&tcbtable, &sa6->sin6_addr,
834 			    th.th_dport, (struct in6_addr *)&sa6_src->sin6_addr,
835 			    th.th_sport, INPLOOKUP_IPV6))
836 				valid++;
837 
838 			/*
839 			 * Depending on the value of "valid" and routing table
840 			 * size (mtudisc_{hi,lo}wat), we will:
841 			 * - recalcurate the new MTU and create the
842 			 *   corresponding routing entry, or
843 			 * - ignore the MTU change notification.
844 			 */
845 			icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
846 
847 			return;
848 		}
849 
850 		(void) in6_pcbnotify(&tcbtable, sa, th.th_dport,
851 		    (struct sockaddr *)sa6_src, th.th_sport, cmd, NULL, notify);
852 	} else {
853 		(void) in6_pcbnotify(&tcbtable, sa, 0,
854 		    (struct sockaddr *)sa6_src, 0, cmd, NULL, notify);
855 	}
856 }
857 #endif
858 
859 void *
860 tcp_ctlinput(cmd, sa, v)
861 	int cmd;
862 	struct sockaddr *sa;
863 	register void *v;
864 {
865 	register struct ip *ip = v;
866 	register struct tcphdr *th;
867 	extern int inetctlerrmap[];
868 	void (*notify) __P((struct inpcb *, int)) = tcp_notify;
869 	int errno;
870 
871 	if (sa->sa_family != AF_INET)
872 		return NULL;
873 
874 	if ((unsigned)cmd >= PRC_NCMDS)
875 		return NULL;
876 	errno = inetctlerrmap[cmd];
877 	if (cmd == PRC_QUENCH)
878 		notify = tcp_quench;
879 	else if (PRC_IS_REDIRECT(cmd))
880 		notify = in_rtchange, ip = 0;
881 	else if (cmd == PRC_MSGSIZE && ip_mtudisc) {
882 		th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
883 		/*
884 		 * Verify that the packet in the icmp payload refers
885 		 * to an existing TCP connection.
886 		 */
887 		if (in_pcblookup(&tcbtable,
888 				 &ip->ip_dst, th->th_dport,
889 				 &ip->ip_src, th->th_sport,
890 				 INPLOOKUP_WILDCARD)) {
891 			struct icmp *icp;
892 			icp = (struct icmp *)((caddr_t)ip -
893 					      offsetof(struct icmp, icmp_ip));
894 
895 			/* Calculate new mtu and create corresponding route */
896 			icmp_mtudisc(icp);
897 		}
898 		notify = tcp_mtudisc, ip = 0;
899 	} else if (cmd == PRC_MTUINC)
900 		notify = tcp_mtudisc_increase, ip = 0;
901 	else if (cmd == PRC_HOSTDEAD)
902 		ip = 0;
903 	else if (errno == 0)
904 		return NULL;
905 
906 	if (ip) {
907 		th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
908 		in_pcbnotify(&tcbtable, sa, th->th_dport, ip->ip_src,
909 			     th->th_sport, errno, notify);
910 	} else
911 		in_pcbnotifyall(&tcbtable, sa, errno, notify);
912 
913 	return NULL;
914 }
915 
916 /*
917  * When a source quench is received, close congestion window
918  * to one segment.  We will gradually open it again as we proceed.
919  */
920 void
921 tcp_quench(inp, errno)
922 	struct inpcb *inp;
923 	int errno;
924 {
925 	struct tcpcb *tp = intotcpcb(inp);
926 
927 	if (tp)
928 		tp->snd_cwnd = tp->t_maxseg;
929 }
930 
931 #ifdef INET6
932 /*
933  * Path MTU Discovery handlers.
934  */
935 void
936 tcp6_mtudisc_callback(faddr)
937 	struct in6_addr *faddr;
938 {
939 	struct sockaddr_in6 sin6;
940 
941 	bzero(&sin6, sizeof(sin6));
942 	sin6.sin6_family = AF_INET6;
943 	sin6.sin6_len = sizeof(struct sockaddr_in6);
944 	sin6.sin6_addr = *faddr;
945 	(void) in6_pcbnotify(&tcbtable, (struct sockaddr *)&sin6, 0,
946 	    (struct sockaddr *)&sa6_any, 0, PRC_MSGSIZE, NULL, tcp_mtudisc);
947 }
948 #endif /* INET6 */
949 
950 /*
951  * On receipt of path MTU corrections, flush old route and replace it
952  * with the new one.  Retransmit all unacknowledged packets, to ensure
953  * that all packets will be received.
954  */
955 void
956 tcp_mtudisc(inp, errno)
957 	struct inpcb *inp;
958 	int errno;
959 {
960 	struct tcpcb *tp = intotcpcb(inp);
961 	struct rtentry *rt = in_pcbrtentry(inp);
962 
963 	if (tp != 0) {
964 		if (rt != 0) {
965 			/*
966 			 * If this was not a host route, remove and realloc.
967 			 */
968 			if ((rt->rt_flags & RTF_HOST) == 0) {
969 				in_rtchange(inp, errno);
970 				if ((rt = in_pcbrtentry(inp)) == 0)
971 					return;
972 			}
973 
974 			if (rt->rt_rmx.rmx_mtu != 0) {
975 				/* also takes care of congestion window */
976 				tcp_mss(tp, -1);
977 			}
978 		}
979 
980 		/*
981 		 * Resend unacknowledged packets.
982 		 */
983 		tp->snd_nxt = tp->snd_una;
984 		tcp_output(tp);
985 	}
986 }
987 
988 void
989 tcp_mtudisc_increase(inp, errno)
990 	struct inpcb *inp;
991 	int errno;
992 {
993 	struct tcpcb *tp = intotcpcb(inp);
994 	struct rtentry *rt = in_pcbrtentry(inp);
995 
996 	if (tp != 0 && rt != 0) {
997 		/*
998 		 * If this was a host route, remove and realloc.
999 		 */
1000 		if (rt->rt_flags & RTF_HOST)
1001 			in_rtchange(inp, errno);
1002 
1003 		/* also takes care of congestion window */
1004 		tcp_mss(tp, -1);
1005 	}
1006 }
1007 
1008 #ifdef TCP_SIGNATURE
1009 int
1010 tcp_signature_tdb_attach()
1011 {
1012 	return (0);
1013 }
1014 
1015 int
1016 tcp_signature_tdb_init(tdbp, xsp, ii)
1017 	struct tdb *tdbp;
1018 	struct xformsw *xsp;
1019 	struct ipsecinit *ii;
1020 {
1021 	char *c;
1022 #define isdigit(c)	  (((c) >= '0') && ((c) <= '9'))
1023 #define isalpha(c)	( (((c) >= 'A') && ((c) <= 'Z')) || \
1024 			  (((c) >= 'a') && ((c) <= 'z')) )
1025 
1026 	if ((ii->ii_authkeylen < 1) || (ii->ii_authkeylen > 80))
1027 		return (EINVAL);
1028 
1029 	c = (char *)ii->ii_authkey;
1030 
1031 	while (c < (char *)ii->ii_authkey + ii->ii_authkeylen - 1) {
1032 		if (isdigit(*c)) {
1033 			if (*(c + 1) == ' ')
1034 				return (EINVAL);
1035 		} else {
1036 			if (!isalpha(*c))
1037 				return (EINVAL);
1038 		}
1039 
1040 		c++;
1041 	}
1042 
1043 	if (!isdigit(*c) && !isalpha(*c))
1044 		return (EINVAL);
1045 
1046 	tdbp->tdb_amxkey = malloc(ii->ii_authkeylen, M_XDATA, M_DONTWAIT);
1047 	if (tdbp->tdb_amxkey == NULL)
1048 		return (ENOMEM);
1049 	bcopy(ii->ii_authkey, tdbp->tdb_amxkey, ii->ii_authkeylen);
1050 	tdbp->tdb_amxkeylen = ii->ii_authkeylen;
1051 
1052 	return (0);
1053 }
1054 
1055 int
1056 tcp_signature_tdb_zeroize(tdbp)
1057 	struct tdb *tdbp;
1058 {
1059 	if (tdbp->tdb_amxkey) {
1060 		bzero(tdbp->tdb_amxkey, tdbp->tdb_amxkeylen);
1061 		free(tdbp->tdb_amxkey, M_XDATA);
1062 		tdbp->tdb_amxkey = NULL;
1063 	}
1064 
1065 	return (0);
1066 }
1067 
1068 int
1069 tcp_signature_tdb_input(m, tdbp, skip, protoff)
1070 	struct mbuf *m;
1071 	struct tdb *tdbp;
1072 	int skip, protoff;
1073 {
1074 	return (0);
1075 }
1076 
1077 int
1078 tcp_signature_tdb_output(m, tdbp, mp, skip, protoff)
1079 	struct mbuf *m;
1080 	struct tdb *tdbp;
1081 	struct mbuf **mp;
1082 	int skip, protoff;
1083 {
1084 	return (EINVAL);
1085 }
1086 
1087 int
1088 tcp_signature_apply(fstate, data, len)
1089 	caddr_t fstate;
1090 	caddr_t data;
1091 	unsigned int len;
1092 {
1093 	MD5Update((MD5_CTX *)fstate, (char *)data, len);
1094 	return 0;
1095 }
1096 #endif /* TCP_SIGNATURE */
1097 
1098 #define TCP_RNDISS_ROUNDS	16
1099 #define TCP_RNDISS_OUT	7200
1100 #define TCP_RNDISS_MAX	30000
1101 
1102 u_int8_t tcp_rndiss_sbox[128];
1103 u_int16_t tcp_rndiss_msb;
1104 u_int16_t tcp_rndiss_cnt;
1105 long tcp_rndiss_reseed;
1106 
1107 u_int16_t
1108 tcp_rndiss_encrypt(val)
1109 	u_int16_t val;
1110 {
1111 	u_int16_t sum = 0, i;
1112 
1113 	for (i = 0; i < TCP_RNDISS_ROUNDS; i++) {
1114 		sum += 0x79b9;
1115 		val ^= ((u_int16_t)tcp_rndiss_sbox[(val^sum) & 0x7f]) << 7;
1116 		val = ((val & 0xff) << 7) | (val >> 8);
1117 	}
1118 
1119 	return val;
1120 }
1121 
1122 void
1123 tcp_rndiss_init()
1124 {
1125 	get_random_bytes(tcp_rndiss_sbox, sizeof(tcp_rndiss_sbox));
1126 
1127 	tcp_rndiss_reseed = time.tv_sec + TCP_RNDISS_OUT;
1128 	tcp_rndiss_msb = tcp_rndiss_msb == 0x8000 ? 0 : 0x8000;
1129 	tcp_rndiss_cnt = 0;
1130 }
1131 
1132 tcp_seq
1133 tcp_rndiss_next()
1134 {
1135         if (tcp_rndiss_cnt >= TCP_RNDISS_MAX ||
1136 	    time.tv_sec > tcp_rndiss_reseed)
1137                 tcp_rndiss_init();
1138 
1139 	/* (arc4random() & 0x7fff) ensures a 32768 byte gap between ISS */
1140 	return ((tcp_rndiss_encrypt(tcp_rndiss_cnt++) | tcp_rndiss_msb) <<16) |
1141 		(arc4random() & 0x7fff);
1142 }
1143 
1144