xref: /openbsd-src/sys/netinet/tcp_subr.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: tcp_subr.c,v 1.52 2001/07/21 09:26:06 itojun Exp $	*/
2 /*	$NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  * 	This product includes software developed by the University of
50  * 	California, Berkeley and its contributors.
51  * 	This product includes software developed at the Information
52  * 	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78 #include <sys/mbuf.h>
79 #include <sys/socket.h>
80 #include <sys/socketvar.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 
84 #include <net/route.h>
85 #include <net/if.h>
86 
87 #include <netinet/in.h>
88 #include <netinet/in_systm.h>
89 #include <netinet/ip.h>
90 #include <netinet/in_pcb.h>
91 #include <netinet/ip_var.h>
92 #include <netinet/ip_icmp.h>
93 #include <netinet/tcp.h>
94 #include <netinet/tcp_fsm.h>
95 #include <netinet/tcp_seq.h>
96 #include <netinet/tcp_timer.h>
97 #include <netinet/tcp_var.h>
98 #include <netinet/tcpip.h>
99 #include <dev/rndvar.h>
100 
101 #ifdef INET6
102 #include <netinet6/in6_var.h>
103 #include <netinet6/ip6protosw.h>
104 #endif /* INET6 */
105 
106 #ifdef TCP_SIGNATURE
107 #include <sys/md5k.h>
108 #endif /* TCP_SIGNATURE */
109 
110 /* patchable/settable parameters for tcp */
111 int	tcp_mssdflt = TCP_MSS;
112 int	tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
113 
114 /*
115  * Configure kernel with options "TCP_DO_RFC1323=0" to disable RFC1323 stuff.
116  * This is a good idea over slow SLIP/PPP links, because the timestamp
117  * pretty well destroys the VJ compression (any packet with a timestamp
118  * different from the previous one can't be compressed), as well as adding
119  * more overhead.
120  * XXX And it should be a settable per route characteristic (with this just
121  * used as the default).
122  */
123 #ifndef TCP_DO_RFC1323
124 #define TCP_DO_RFC1323	1
125 #endif
126 int    tcp_do_rfc1323 = TCP_DO_RFC1323;
127 
128 #ifndef TCP_DO_SACK
129 #ifdef TCP_SACK
130 #define TCP_DO_SACK	1
131 #else
132 #define TCP_DO_SACK	0
133 #endif
134 #endif
135 int    tcp_do_sack = TCP_DO_SACK;		/* RFC 2018 selective ACKs */
136 
137 #ifndef TCBHASHSIZE
138 #define	TCBHASHSIZE	128
139 #endif
140 int	tcbhashsize = TCBHASHSIZE;
141 
142 #ifdef INET6
143 extern int ip6_defhlim;
144 #endif /* INET6 */
145 
146 struct tcpstat tcpstat;		/* tcp statistics */
147 
148 /*
149  * Tcp initialization
150  */
151 void
152 tcp_init()
153 {
154 #ifdef TCP_COMPAT_42
155 	tcp_iss = 1;		/* wrong */
156 #endif /* TCP_COMPAT_42 */
157 	in_pcbinit(&tcbtable, tcbhashsize);
158 	tcp_now = arc4random() / 2;
159 
160 #ifdef INET6
161 	/*
162 	 * Since sizeof(struct ip6_hdr) > sizeof(struct ip), we
163 	 * do max length checks/computations only on the former.
164 	 */
165 	if (max_protohdr < (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)))
166 		max_protohdr = (sizeof(struct ip6_hdr) + sizeof(struct tcphdr));
167 	if ((max_linkhdr + sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) >
168 	    MHLEN)
169 		panic("tcp_init");
170 
171 	icmp6_mtudisc_callback_register(tcp6_mtudisc_callback);
172 #endif /* INET6 */
173 }
174 
175 /*
176  * Create template to be used to send tcp packets on a connection.
177  * Call after host entry created, allocates an mbuf and fills
178  * in a skeletal tcp/ip header, minimizing the amount of work
179  * necessary when the connection is used.
180  *
181  * To support IPv6 in addition to IPv4 and considering that the sizes of
182  * the IPv4 and IPv6 headers are not the same, we now use a separate pointer
183  * for the TCP header.  Also, we made the former tcpiphdr header pointer
184  * into just an IP overlay pointer, with casting as appropriate for v6. rja
185  */
186 struct mbuf *
187 tcp_template(tp)
188 	struct tcpcb *tp;
189 {
190 	register struct inpcb *inp = tp->t_inpcb;
191 	register struct mbuf *m;
192 	register struct tcphdr *th;
193 
194 	if ((m = tp->t_template) == 0) {
195 		m = m_get(M_DONTWAIT, MT_HEADER);
196 		if (m == NULL)
197 			return (0);
198 
199 		switch (tp->pf) {
200 		case 0:	/*default to PF_INET*/
201 #ifdef INET
202 		case AF_INET:
203 			m->m_len = sizeof(struct ip);
204 			break;
205 #endif /* INET */
206 #ifdef INET6
207 		case AF_INET6:
208 			m->m_len = sizeof(struct ip6_hdr);
209 			break;
210 #endif /* INET6 */
211 		}
212 		m->m_len += sizeof (struct tcphdr);
213 
214 		/*
215 		 * The link header, network header, TCP header, and TCP options
216 		 * all must fit in this mbuf. For now, assume the worst case of
217 		 * TCP options size. Eventually, compute this from tp flags.
218 		 */
219 		if (m->m_len + MAX_TCPOPTLEN + max_linkhdr >= MHLEN) {
220 			MCLGET(m, M_DONTWAIT);
221 			if ((m->m_flags & M_EXT) == 0) {
222 				m_free(m);
223 				return (0);
224 			}
225 		}
226 	}
227 
228 	switch(tp->pf) {
229 #ifdef INET
230 	case AF_INET:
231 		{
232 			struct ipovly *ipovly;
233 
234 			ipovly = mtod(m, struct ipovly *);
235 
236 			bzero(ipovly->ih_x1, sizeof ipovly->ih_x1);
237 			ipovly->ih_pr = IPPROTO_TCP;
238 			ipovly->ih_len = htons(sizeof (struct tcphdr));
239 			ipovly->ih_src = inp->inp_laddr;
240 			ipovly->ih_dst = inp->inp_faddr;
241 
242 			th = (struct tcphdr *)(mtod(m, caddr_t) +
243 				sizeof(struct ip));
244 			th->th_sum = in_cksum_phdr(ipovly->ih_src.s_addr,
245 			    ipovly->ih_dst.s_addr,
246 			    htons(sizeof (struct tcphdr) + IPPROTO_TCP));
247 		}
248 		break;
249 #endif /* INET */
250 #ifdef INET6
251 	case AF_INET6:
252 		{
253 			struct ip6_hdr *ipv6;
254 
255 			ipv6 = mtod(m, struct ip6_hdr *);
256 
257 			ipv6->ip6_src = inp->inp_laddr6;
258 			ipv6->ip6_dst = inp->inp_faddr6;
259 			ipv6->ip6_flow = htonl(0x60000000) |
260 			    (inp->inp_ipv6.ip6_flow & htonl(0x0fffffff));
261 
262 			ipv6->ip6_nxt = IPPROTO_TCP;
263 			ipv6->ip6_plen = htons(sizeof(struct tcphdr)); /*XXX*/
264 			ipv6->ip6_hlim = in6_selecthlim(inp, NULL);	/*XXX*/
265 
266 			th = (struct tcphdr *)(mtod(m, caddr_t) +
267 				sizeof(struct ip6_hdr));
268 			th->th_sum = 0;
269 		}
270 		break;
271 #endif /* INET6 */
272 	}
273 
274 	th->th_sport = inp->inp_lport;
275 	th->th_dport = inp->inp_fport;
276 	th->th_seq = 0;
277 	th->th_ack = 0;
278 	th->th_x2  = 0;
279 	th->th_off = 5;
280 	th->th_flags = 0;
281 	th->th_win = 0;
282 	th->th_urp = 0;
283 	return (m);
284 }
285 
286 /*
287  * Send a single message to the TCP at address specified by
288  * the given TCP/IP header.  If m == 0, then we make a copy
289  * of the tcpiphdr at ti and send directly to the addressed host.
290  * This is used to force keep alive messages out using the TCP
291  * template for a connection tp->t_template.  If flags are given
292  * then we send a message back to the TCP which originated the
293  * segment ti, and discard the mbuf containing it and any other
294  * attached mbufs.
295  *
296  * In any case the ack and sequence number of the transmitted
297  * segment are as specified by the parameters.
298  */
299 #ifdef INET6
300 /* This function looks hairy, because it was so IPv4-dependent. */
301 #endif /* INET6 */
302 void
303 tcp_respond(tp, template, m, ack, seq, flags)
304 	struct tcpcb *tp;
305 	caddr_t template;
306 	register struct mbuf *m;
307 	tcp_seq ack, seq;
308 	int flags;
309 {
310 	register int tlen;
311 	int win = 0;
312 	struct route *ro = 0;
313 	register struct tcphdr *th;
314 	register struct tcpiphdr *ti = (struct tcpiphdr *)template;
315 #ifdef INET6
316 	int is_ipv6 = 0;   /* true iff IPv6 */
317 #endif /* INET6 */
318 
319 	if (tp) {
320 		win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
321 #ifdef INET6
322 		/*
323 		 * If this is called with an unconnected
324 		 * socket/tp/pcb (tp->pf is 0), we lose.
325 		 */
326 		is_ipv6 = (tp->pf == PF_INET6);
327 
328 		/*
329 		 * The route/route6 distinction is meaningless
330 		 * unless you're allocating space or passing parameters.
331 		 */
332 #endif /* INET6 */
333 		ro = &tp->t_inpcb->inp_route;
334 	}
335 #ifdef INET6
336 	else
337 		is_ipv6 = (((struct ip *)ti)->ip_v == 6);
338 #endif /* INET6 */
339 	if (m == 0) {
340 		m = m_gethdr(M_DONTWAIT, MT_HEADER);
341 		if (m == NULL)
342 			return;
343 #ifdef TCP_COMPAT_42
344 		tlen = 1;
345 #else
346 		tlen = 0;
347 #endif
348 		m->m_data += max_linkhdr;
349 #ifdef INET6
350 		if (is_ipv6)
351 			bcopy(ti, mtod(m, caddr_t), sizeof(struct tcphdr) +
352 			    sizeof(struct ip6_hdr));
353 		else
354 #endif /* INET6 */
355 			bcopy(ti, mtod(m, caddr_t), sizeof(struct tcphdr) +
356 			    sizeof(struct ip));
357 
358 		ti = mtod(m, struct tcpiphdr *);
359 		flags = TH_ACK;
360 	} else {
361 		m_freem(m->m_next);
362 		m->m_next = 0;
363 		m->m_data = (caddr_t)ti;
364 		tlen = 0;
365 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
366 #ifdef INET6
367 		if (is_ipv6) {
368 			m->m_len = sizeof(struct tcphdr) + sizeof(struct ip6_hdr);
369 			xchg(((struct ip6_hdr *)ti)->ip6_dst,\
370 			    ((struct ip6_hdr *)ti)->ip6_src,\
371 			    struct in6_addr);
372 			th = (void *)((caddr_t)ti + sizeof(struct ip6_hdr));
373 		} else
374 #endif /* INET6 */
375 		{
376 			m->m_len = sizeof (struct tcpiphdr);
377 			xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_int32_t);
378 			th = (void *)((caddr_t)ti + sizeof(struct ip));
379 		}
380 		xchg(th->th_dport, th->th_sport, u_int16_t);
381 #undef xchg
382 	}
383 #ifdef INET6
384 	if (is_ipv6) {
385 		tlen += sizeof(struct tcphdr) + sizeof(struct ip6_hdr);
386 		th = (struct tcphdr *)((caddr_t)ti + sizeof(struct ip6_hdr));
387 	} else
388 #endif /* INET6 */
389 	{
390 		ti->ti_len = htons((u_int16_t)(sizeof (struct tcphdr) + tlen));
391 		tlen += sizeof (struct tcpiphdr);
392 		th = (struct tcphdr *)((caddr_t)ti + sizeof(struct ip));
393 	}
394 
395 	m->m_len = tlen;
396 	m->m_pkthdr.len = tlen;
397 	m->m_pkthdr.rcvif = (struct ifnet *) 0;
398 	th->th_seq = htonl(seq);
399 	th->th_ack = htonl(ack);
400 	th->th_x2 = 0;
401 	th->th_off = sizeof (struct tcphdr) >> 2;
402 	th->th_flags = flags;
403 	if (tp)
404 		win >>= tp->rcv_scale;
405 	if (win > TCP_MAXWIN)
406 		win = TCP_MAXWIN;
407 	th->th_win = htons((u_int16_t)win);
408 	th->th_urp = 0;
409 
410 #ifdef INET6
411 	if (is_ipv6) {
412 		((struct ip6_hdr *)ti)->ip6_flow   = htonl(0x60000000);
413 		((struct ip6_hdr *)ti)->ip6_nxt  = IPPROTO_TCP;
414 		((struct ip6_hdr *)ti)->ip6_hlim =
415 			in6_selecthlim(tp ? tp->t_inpcb : NULL, NULL);	/*XXX*/
416 		((struct ip6_hdr *)ti)->ip6_plen = tlen - sizeof(struct ip6_hdr);
417 		th->th_sum = 0;
418 		th->th_sum = in6_cksum(m, IPPROTO_TCP,
419 		   sizeof(struct ip6_hdr), ((struct ip6_hdr *)ti)->ip6_plen);
420 		HTONS(((struct ip6_hdr *)ti)->ip6_plen);
421 		ip6_output(m, tp ? tp->t_inpcb->inp_outputopts6 : NULL,
422 			(struct route_in6 *)ro, 0, NULL, NULL);
423 	} else
424 #endif /* INET6 */
425 	{
426 		bzero(ti->ti_x1, sizeof ti->ti_x1);
427 		ti->ti_len = htons((u_short)tlen - sizeof(struct ip));
428 
429 		/*
430 		 * There's no point deferring to hardware checksum processing
431 		 * here, as we only send a minimal TCP packet whose checksum
432 		 * we need to compute in any case.
433 		 */
434 		th->th_sum = 0;
435 		th->th_sum = in_cksum(m, tlen);
436 		((struct ip *)ti)->ip_len = tlen;
437 		((struct ip *)ti)->ip_ttl = ip_defttl;
438 		ip_output(m, NULL, ro, ip_mtudisc ? IP_MTUDISC : 0, NULL,
439 			  tp ? tp->t_inpcb : NULL);
440 	}
441 }
442 
443 /*
444  * Create a new TCP control block, making an
445  * empty reassembly queue and hooking it to the argument
446  * protocol control block.
447  */
448 struct tcpcb *
449 tcp_newtcpcb(inp)
450 	struct inpcb *inp;
451 {
452 	register struct tcpcb *tp;
453 
454 	tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT);
455 	if (tp == NULL)
456 		return ((struct tcpcb *)0);
457 	bzero((char *) tp, sizeof(struct tcpcb));
458 	LIST_INIT(&tp->segq);
459 	tp->t_maxseg = tcp_mssdflt;
460 	tp->t_maxopd = 0;
461 
462 #ifdef TCP_SACK
463 	tp->sack_disable = tcp_do_sack ? 0 : 1;
464 #endif
465 	tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
466 	tp->t_inpcb = inp;
467 	/*
468 	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
469 	 * rtt estimate.  Set rttvar so that srtt + 2 * rttvar gives
470 	 * reasonable initial retransmit time.
471 	 */
472 	tp->t_srtt = TCPTV_SRTTBASE;
473 	tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << (TCP_RTTVAR_SHIFT + 2 - 1);
474 	tp->t_rttmin = TCPTV_MIN;
475 	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
476 	    TCPTV_MIN, TCPTV_REXMTMAX);
477 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
478 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
479 #ifdef INET6
480 	/* we disallow IPv4 mapped address completely. */
481 	if ((inp->inp_flags & INP_IPV6) == 0)
482 		tp->pf = PF_INET;
483 	else
484 		tp->pf = PF_INET6;
485 #else
486 	tp->pf = PF_INET;
487 #endif
488 
489 #ifdef INET6
490 	if (inp->inp_flags & INP_IPV6)
491 		inp->inp_ipv6.ip6_hlim = ip6_defhlim;
492 	else
493 #endif /* INET6 */
494 		inp->inp_ip.ip_ttl = ip_defttl;
495 
496 	inp->inp_ppcb = (caddr_t)tp;
497 	return (tp);
498 }
499 
500 /*
501  * Drop a TCP connection, reporting
502  * the specified error.  If connection is synchronized,
503  * then send a RST to peer.
504  */
505 struct tcpcb *
506 tcp_drop(tp, errno)
507 	register struct tcpcb *tp;
508 	int errno;
509 {
510 	struct socket *so = tp->t_inpcb->inp_socket;
511 
512 	if (TCPS_HAVERCVDSYN(tp->t_state)) {
513 		tp->t_state = TCPS_CLOSED;
514 		(void) tcp_output(tp);
515 		tcpstat.tcps_drops++;
516 	} else
517 		tcpstat.tcps_conndrops++;
518 	if (errno == ETIMEDOUT && tp->t_softerror)
519 		errno = tp->t_softerror;
520 	so->so_error = errno;
521 	return (tcp_close(tp));
522 }
523 
524 /*
525  * Close a TCP control block:
526  *	discard all space held by the tcp
527  *	discard internet protocol block
528  *	wake up any sleepers
529  */
530 struct tcpcb *
531 tcp_close(tp)
532 	register struct tcpcb *tp;
533 {
534 	register struct ipqent *qe;
535 	struct inpcb *inp = tp->t_inpcb;
536 	struct socket *so = inp->inp_socket;
537 #ifdef TCP_SACK
538 	struct sackhole *p, *q;
539 #endif
540 #ifdef RTV_RTT
541 	register struct rtentry *rt;
542 #ifdef INET6
543 	register int bound_to_specific = 0;  /* I.e. non-default */
544 
545 	/*
546 	 * This code checks the nature of the route for this connection.
547 	 * Normally this is done by two simple checks in the next
548 	 * INET/INET6 ifdef block, but because of two possible lower layers,
549 	 * that check is done here.
550 	 *
551 	 * Perhaps should be doing this only for a RTF_HOST route.
552 	 */
553 	rt = inp->inp_route.ro_rt;  /* Same for route or route6. */
554 	if (tp->pf == PF_INET6) {
555 		if (rt)
556 			bound_to_specific =
557 			    !(IN6_IS_ADDR_UNSPECIFIED(&
558 			    ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr));
559 	} else {
560 		if (rt)
561 			bound_to_specific =
562 			    (((struct sockaddr_in *)rt_key(rt))->
563 			    sin_addr.s_addr != INADDR_ANY);
564 	}
565 #endif /* INET6 */
566 
567 	/*
568 	 * If we sent enough data to get some meaningful characteristics,
569 	 * save them in the routing entry.  'Enough' is arbitrarily
570 	 * defined as the sendpipesize (default 4K) * 16.  This would
571 	 * give us 16 rtt samples assuming we only get one sample per
572 	 * window (the usual case on a long haul net).  16 samples is
573 	 * enough for the srtt filter to converge to within 5% of the correct
574 	 * value; fewer samples and we could save a very bogus rtt.
575 	 *
576 	 * Don't update the default route's characteristics and don't
577 	 * update anything that the user "locked".
578 	 */
579 #ifdef INET6
580 	/*
581 	 * Note that rt and bound_to_specific are set above.
582 	 */
583 	if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
584 	    rt && bound_to_specific) {
585 #else /* INET6 */
586 	if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
587 	    (rt = inp->inp_route.ro_rt) &&
588 	    satosin(rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
589 #endif /* INET6 */
590 		register u_long i = 0;
591 
592 		if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
593 			i = tp->t_srtt *
594 			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
595 			if (rt->rt_rmx.rmx_rtt && i)
596 				/*
597 				 * filter this update to half the old & half
598 				 * the new values, converting scale.
599 				 * See route.h and tcp_var.h for a
600 				 * description of the scaling constants.
601 				 */
602 				rt->rt_rmx.rmx_rtt =
603 				    (rt->rt_rmx.rmx_rtt + i) / 2;
604 			else
605 				rt->rt_rmx.rmx_rtt = i;
606 		}
607 		if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
608 			i = tp->t_rttvar *
609 			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
610 			if (rt->rt_rmx.rmx_rttvar && i)
611 				rt->rt_rmx.rmx_rttvar =
612 				    (rt->rt_rmx.rmx_rttvar + i) / 2;
613 			else
614 				rt->rt_rmx.rmx_rttvar = i;
615 		}
616 		/*
617 		 * update the pipelimit (ssthresh) if it has been updated
618 		 * already or if a pipesize was specified & the threshhold
619 		 * got below half the pipesize.  I.e., wait for bad news
620 		 * before we start updating, then update on both good
621 		 * and bad news.
622 		 */
623 		if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
624 		    (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh) ||
625 		    i < (rt->rt_rmx.rmx_sendpipe / 2)) {
626 			/*
627 			 * convert the limit from user data bytes to
628 			 * packets then to packet data bytes.
629 			 */
630 			i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
631 			if (i < 2)
632 				i = 2;
633 #ifdef INET6
634 			if (tp->pf == PF_INET6)
635 				i *= (u_long)(tp->t_maxseg + sizeof (struct tcphdr)
636 				    + sizeof(struct ip6_hdr));
637 			else
638 #endif /* INET6 */
639 				i *= (u_long)(tp->t_maxseg +
640 				    sizeof (struct tcpiphdr));
641 
642 			if (rt->rt_rmx.rmx_ssthresh)
643 				rt->rt_rmx.rmx_ssthresh =
644 				    (rt->rt_rmx.rmx_ssthresh + i) / 2;
645 			else
646 				rt->rt_rmx.rmx_ssthresh = i;
647 		}
648 	}
649 #endif /* RTV_RTT */
650 
651 	/* free the reassembly queue, if any */
652 #ifdef INET6
653 	/* Reassembling TCP segments in v6 might be sufficiently different
654 	 * to merit two codepaths to free the reasssembly queue.
655 	 * If an undecided TCP socket, then the IPv4 codepath will be used
656 	 * because it won't matter much anyway.
657 	 */
658 	if (tp->pf == AF_INET6) {
659 		while ((qe = tp->segq.lh_first) != NULL) {
660 			LIST_REMOVE(qe, ipqe_q);
661 			m_freem(qe->ipqe_m);
662 			FREE(qe, M_IPQ);
663 		}
664 	} else
665 #endif /* INET6 */
666 		while ((qe = tp->segq.lh_first) != NULL) {
667 			LIST_REMOVE(qe, ipqe_q);
668 			m_freem(qe->ipqe_m);
669 			FREE(qe, M_IPQ);
670 		}
671 #ifdef TCP_SACK
672 	/* Free SACK holes. */
673 	q = p = tp->snd_holes;
674 	while (p != 0) {
675 		q = p->next;
676 		free(p, M_PCB);
677 		p = q;
678 	}
679 #endif
680 	if (tp->t_template)
681 		(void) m_free(tp->t_template);
682 	free(tp, M_PCB);
683 	inp->inp_ppcb = 0;
684 	soisdisconnected(so);
685 	in_pcbdetach(inp);
686 	tcpstat.tcps_closed++;
687 	return ((struct tcpcb *)0);
688 }
689 
690 void
691 tcp_drain()
692 {
693 
694 }
695 
696 /*
697  * Compute proper scaling value for receiver window from buffer space
698  */
699 
700 void
701 tcp_rscale(struct tcpcb *tp, u_long hiwat)
702 {
703 	tp->request_r_scale = 0;
704 	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
705 	       TCP_MAXWIN << tp->request_r_scale < hiwat)
706 		tp->request_r_scale++;
707 }
708 
709 /*
710  * Notify a tcp user of an asynchronous error;
711  * store error as soft error, but wake up user
712  * (for now, won't do anything until can select for soft error).
713  */
714 void
715 tcp_notify(inp, error)
716 	struct inpcb *inp;
717 	int error;
718 {
719 	register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
720 	register struct socket *so = inp->inp_socket;
721 
722 	/*
723 	 * Ignore some errors if we are hooked up.
724 	 * If connection hasn't completed, has retransmitted several times,
725 	 * and receives a second error, give up now.  This is better
726 	 * than waiting a long time to establish a connection that
727 	 * can never complete.
728 	 */
729 	if (tp->t_state == TCPS_ESTABLISHED &&
730 	     (error == EHOSTUNREACH || error == ENETUNREACH ||
731 	      error == EHOSTDOWN)) {
732 		return;
733 	} else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 &&
734 	    tp->t_rxtshift > 3 && tp->t_softerror)
735 		so->so_error = error;
736 	else
737 		tp->t_softerror = error;
738 	wakeup((caddr_t) &so->so_timeo);
739 	sorwakeup(so);
740 	sowwakeup(so);
741 }
742 
743 #ifdef INET6
744 void
745 tcp6_ctlinput(cmd, sa, d)
746 	int cmd;
747 	struct sockaddr *sa;
748 	void *d;
749 {
750 	struct tcphdr th;
751 	void (*notify) __P((struct inpcb *, int)) = tcp_notify;
752 	struct ip6_hdr *ip6;
753 	const struct sockaddr_in6 *sa6_src = NULL;
754 	struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
755 	struct mbuf *m;
756 	int off;
757 	struct {
758 		u_int16_t th_sport;
759 		u_int16_t th_dport;
760 	} *thp;
761 
762 	if (sa->sa_family != AF_INET6 ||
763 	    sa->sa_len != sizeof(struct sockaddr_in6))
764 		return;
765 	if ((unsigned)cmd >= PRC_NCMDS)
766 		return;
767 	else if (cmd == PRC_QUENCH) {
768 		/* XXX there's no PRC_QUENCH in IPv6 */
769 		notify = tcp_quench;
770 	} else if (PRC_IS_REDIRECT(cmd))
771 		notify = in_rtchange, d = NULL;
772 	else if (cmd == PRC_MSGSIZE)
773 		; /* special code is present, see below */
774 	else if (cmd == PRC_HOSTDEAD)
775 		d = NULL;
776 	else if (inet6ctlerrmap[cmd] == 0)
777 		return;
778 
779 	/* if the parameter is from icmp6, decode it. */
780 	if (d != NULL) {
781 		struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
782 		m = ip6cp->ip6c_m;
783 		ip6 = ip6cp->ip6c_ip6;
784 		off = ip6cp->ip6c_off;
785 		sa6_src = ip6cp->ip6c_src;
786 	} else {
787 		m = NULL;
788 		ip6 = NULL;
789 		sa6_src = &sa6_any;
790 	}
791 
792 	if (ip6) {
793 		/*
794 		 * XXX: We assume that when ip6 is non NULL,
795 		 * M and OFF are valid.
796 		 */
797 
798 		/* check if we can safely examine src and dst ports */
799 		if (m->m_pkthdr.len < off + sizeof(*thp))
800 			return;
801 
802 		bzero(&th, sizeof(th));
803 #ifdef DIAGNOSTIC
804 		if (sizeof(*thp) > sizeof(th))
805 			panic("assumption failed in tcp6_ctlinput");
806 #endif
807 		m_copydata(m, off, sizeof(*thp), (caddr_t)&th);
808 
809 		if (cmd == PRC_MSGSIZE) {
810 			int valid = 0;
811 
812 			/*
813 			 * Check to see if we have a valid TCP connection
814 			 * corresponding to the address in the ICMPv6 message
815 			 * payload.
816 			 */
817 			if (in6_pcbhashlookup(&tcbtable, &sa6->sin6_addr,
818 			    th.th_dport, (struct in6_addr *)&sa6_src->sin6_addr,
819 			    th.th_sport))
820 				valid++;
821 			else if (in_pcblookup(&tcbtable, &sa6->sin6_addr,
822 			    th.th_dport, (struct in6_addr *)&sa6_src->sin6_addr,
823 			    th.th_sport, INPLOOKUP_IPV6))
824 				valid++;
825 
826 			/*
827 			 * Depending on the value of "valid" and routing table
828 			 * size (mtudisc_{hi,lo}wat), we will:
829 			 * - recalcurate the new MTU and create the
830 			 *   corresponding routing entry, or
831 			 * - ignore the MTU change notification.
832 			 */
833 			icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
834 
835 			return;
836 		}
837 
838 		(void) in6_pcbnotify(&tcbtable, sa, th.th_dport,
839 		    (struct sockaddr *)sa6_src, th.th_sport, cmd, NULL, notify);
840 	} else {
841 		(void) in6_pcbnotify(&tcbtable, sa, 0,
842 		    (struct sockaddr *)sa6_src, 0, cmd, NULL, notify);
843 	}
844 }
845 #endif
846 
847 void *
848 tcp_ctlinput(cmd, sa, v)
849 	int cmd;
850 	struct sockaddr *sa;
851 	register void *v;
852 {
853 	register struct ip *ip = v;
854 	register struct tcphdr *th;
855 	extern int inetctlerrmap[];
856 	void (*notify) __P((struct inpcb *, int)) = tcp_notify;
857 	int errno;
858 
859 	if (sa->sa_family != AF_INET)
860 		return NULL;
861 
862 	if ((unsigned)cmd >= PRC_NCMDS)
863 		return NULL;
864 	errno = inetctlerrmap[cmd];
865 	if (cmd == PRC_QUENCH)
866 		notify = tcp_quench;
867 	else if (PRC_IS_REDIRECT(cmd))
868 		notify = in_rtchange, ip = 0;
869 	else if (cmd == PRC_MSGSIZE && ip_mtudisc) {
870 		th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
871 		/*
872 		 * Verify that the packet in the icmp payload refers
873 		 * to an existing TCP connection.
874 		 */
875 		if (in_pcblookup(&tcbtable,
876 				 &ip->ip_dst, th->th_dport,
877 				 &ip->ip_src, th->th_sport,
878 				 INPLOOKUP_WILDCARD)) {
879 			struct icmp *icp;
880 			icp = (struct icmp *)((caddr_t)ip -
881 					      offsetof(struct icmp, icmp_ip));
882 
883 			/* Calculate new mtu and create corresponding route */
884 			icmp_mtudisc(icp);
885 		}
886 		notify = tcp_mtudisc, ip = 0;
887 	} else if (cmd == PRC_MTUINC)
888 		notify = tcp_mtudisc_increase, ip = 0;
889 	else if (cmd == PRC_HOSTDEAD)
890 		ip = 0;
891 	else if (errno == 0)
892 		return NULL;
893 
894 	if (ip) {
895 		th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
896 		in_pcbnotify(&tcbtable, sa, th->th_dport, ip->ip_src,
897 			     th->th_sport, errno, notify);
898 	} else
899 		in_pcbnotifyall(&tcbtable, sa, errno, notify);
900 
901 	return NULL;
902 }
903 
904 /*
905  * When a source quench is received, close congestion window
906  * to one segment.  We will gradually open it again as we proceed.
907  */
908 void
909 tcp_quench(inp, errno)
910 	struct inpcb *inp;
911 	int errno;
912 {
913 	struct tcpcb *tp = intotcpcb(inp);
914 
915 	if (tp)
916 		tp->snd_cwnd = tp->t_maxseg;
917 }
918 
919 #ifdef INET6
920 /*
921  * Path MTU Discovery handlers.
922  */
923 void
924 tcp6_mtudisc_callback(faddr)
925 	struct in6_addr *faddr;
926 {
927 	struct sockaddr_in6 sin6;
928 
929 	bzero(&sin6, sizeof(sin6));
930 	sin6.sin6_family = AF_INET6;
931 	sin6.sin6_len = sizeof(struct sockaddr_in6);
932 	sin6.sin6_addr = *faddr;
933 	(void) in6_pcbnotify(&tcbtable, (struct sockaddr *)&sin6, 0,
934 	    (struct sockaddr *)&sa6_any, 0, PRC_MSGSIZE, NULL, tcp_mtudisc);
935 }
936 #endif /* INET6 */
937 
938 /*
939  * On receipt of path MTU corrections, flush old route and replace it
940  * with the new one.  Retransmit all unacknowledged packets, to ensure
941  * that all packets will be received.
942  */
943 void
944 tcp_mtudisc(inp, errno)
945 	struct inpcb *inp;
946 	int errno;
947 {
948 	struct tcpcb *tp = intotcpcb(inp);
949 	struct rtentry *rt = in_pcbrtentry(inp);
950 
951 	if (tp != 0) {
952 		if (rt != 0) {
953 			/*
954 			 * If this was not a host route, remove and realloc.
955 			 */
956 			if ((rt->rt_flags & RTF_HOST) == 0) {
957 				in_rtchange(inp, errno);
958 				if ((rt = in_pcbrtentry(inp)) == 0)
959 					return;
960 			}
961 
962 			if (rt->rt_rmx.rmx_mtu != 0) {
963 				/* also takes care of congestion window */
964 				tcp_mss(tp, -1);
965 			}
966 		}
967 
968 		/*
969 		 * Resend unacknowledged packets.
970 		 */
971 		tp->snd_nxt = tp->snd_una;
972 		tcp_output(tp);
973 	}
974 }
975 
976 void
977 tcp_mtudisc_increase(inp, errno)
978 	struct inpcb *inp;
979 	int errno;
980 {
981 	struct tcpcb *tp = intotcpcb(inp);
982 	struct rtentry *rt = in_pcbrtentry(inp);
983 
984 	if (tp != 0 && rt != 0) {
985 		/*
986 		 * If this was a host route, remove and realloc.
987 		 */
988 		if (rt->rt_flags & RTF_HOST)
989 			in_rtchange(inp, errno);
990 
991 		/* also takes care of congestion window */
992 		tcp_mss(tp, -1);
993 	}
994 }
995 
996 #ifdef TCP_SIGNATURE
997 int
998 tcp_signature_tdb_attach()
999 {
1000 	return (0);
1001 }
1002 
1003 int
1004 tcp_signature_tdb_init(tdbp, xsp, ii)
1005 	struct tdb *tdbp;
1006 	struct xformsw *xsp;
1007 	struct ipsecinit *ii;
1008 {
1009 	char *c;
1010 #define isdigit(c)	  (((c) >= '0') && ((c) <= '9'))
1011 #define isalpha(c)	( (((c) >= 'A') && ((c) <= 'Z')) || \
1012 			  (((c) >= 'a') && ((c) <= 'z')) )
1013 
1014 	if ((ii->ii_authkeylen < 1) || (ii->ii_authkeylen > 80))
1015 		return (EINVAL);
1016 
1017 	c = (char *)ii->ii_authkey;
1018 
1019 	while (c < (char *)ii->ii_authkey + ii->ii_authkeylen - 1) {
1020 		if (isdigit(*c)) {
1021 			if (*(c + 1) == ' ')
1022 				return (EINVAL);
1023 		} else {
1024 			if (!isalpha(*c))
1025 				return (EINVAL);
1026 		}
1027 
1028 		c++;
1029 	}
1030 
1031 	if (!isdigit(*c) && !isalpha(*c))
1032 		return (EINVAL);
1033 
1034 	tdbp->tdb_amxkey = malloc(ii->ii_authkeylen, M_XDATA, M_DONTWAIT);
1035 	if (tdbp->tdb_amxkey == NULL)
1036 		return (ENOMEM);
1037 	bcopy(ii->ii_authkey, tdbp->tdb_amxkey, ii->ii_authkeylen);
1038 	tdbp->tdb_amxkeylen = ii->ii_authkeylen;
1039 
1040 	return (0);
1041 }
1042 
1043 int
1044 tcp_signature_tdb_zeroize(tdbp)
1045 	struct tdb *tdbp;
1046 {
1047 	if (tdbp->tdb_amxkey) {
1048 		bzero(tdbp->tdb_amxkey, tdbp->tdb_amxkeylen);
1049 		free(tdbp->tdb_amxkey, M_XDATA);
1050 		tdbp->tdb_amxkey = NULL;
1051 	}
1052 
1053 	return (0);
1054 }
1055 
1056 int
1057 tcp_signature_tdb_input(m, tdbp, skip, protoff)
1058 	struct mbuf *m;
1059 	struct tdb *tdbp;
1060 	int skip, protoff;
1061 {
1062 	return (0);
1063 }
1064 
1065 int
1066 tcp_signature_tdb_output(m, tdbp, mp, skip, protoff)
1067 	struct mbuf *m;
1068 	struct tdb *tdbp;
1069 	struct mbuf **mp;
1070 	int skip, protoff;
1071 {
1072 	return (EINVAL);
1073 }
1074 
1075 int
1076 tcp_signature_apply(fstate, data, len)
1077 	caddr_t fstate;
1078 	caddr_t data;
1079 	unsigned int len;
1080 {
1081 	MD5Update((MD5_CTX *)fstate, (char *)data, len);
1082 	return 0;
1083 }
1084 #endif /* TCP_SIGNATURE */
1085 
1086 #define TCP_RNDISS_ROUNDS	16
1087 #define TCP_RNDISS_OUT	7200
1088 #define TCP_RNDISS_MAX	30000
1089 
1090 u_int8_t tcp_rndiss_sbox[128];
1091 u_int16_t tcp_rndiss_msb;
1092 u_int16_t tcp_rndiss_cnt;
1093 long tcp_rndiss_reseed;
1094 
1095 u_int16_t
1096 tcp_rndiss_encrypt(val)
1097 	u_int16_t val;
1098 {
1099 	u_int16_t sum = 0, i;
1100 
1101 	for (i = 0; i < TCP_RNDISS_ROUNDS; i++) {
1102 		sum += 0x79b9;
1103 		val ^= ((u_int16_t)tcp_rndiss_sbox[(val^sum) & 0x7f]) << 7;
1104 		val = ((val & 0xff) << 7) | (val >> 8);
1105 	}
1106 
1107 	return val;
1108 }
1109 
1110 void
1111 tcp_rndiss_init()
1112 {
1113 	get_random_bytes(tcp_rndiss_sbox, sizeof(tcp_rndiss_sbox));
1114 
1115 	tcp_rndiss_reseed = time.tv_sec + TCP_RNDISS_OUT;
1116 	tcp_rndiss_msb = tcp_rndiss_msb == 0x8000 ? 0 : 0x8000;
1117 	tcp_rndiss_cnt = 0;
1118 }
1119 
1120 tcp_seq
1121 tcp_rndiss_next()
1122 {
1123         if (tcp_rndiss_cnt >= TCP_RNDISS_MAX ||
1124 	    time.tv_sec > tcp_rndiss_reseed)
1125                 tcp_rndiss_init();
1126 
1127 	/* (arc4random() & 0x7fff) ensures a 32768 byte gap between ISS */
1128 	return ((tcp_rndiss_encrypt(tcp_rndiss_cnt++) | tcp_rndiss_msb) <<16) |
1129 		(arc4random() & 0x7fff);
1130 }
1131 
1132