xref: /netbsd-src/sys/netinet/tcp_usrreq.c (revision 81e0d2b0af8485d94ed5da487d4253841a2e6e45)
1 /*	$NetBSD: tcp_usrreq.c,v 1.94 2005/02/03 23:54:17 perry Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to The NetBSD Foundation
37  * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
38  * Facility, NASA Ames Research Center.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *	This product includes software developed by the NetBSD
51  *	Foundation, Inc. and its contributors.
52  * 4. Neither the name of The NetBSD Foundation nor the names of its
53  *    contributors may be used to endorse or promote products derived
54  *    from this software without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 /*
70  * Copyright (c) 1982, 1986, 1988, 1993, 1995
71  *	The Regents of the University of California.  All rights reserved.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. Neither the name of the University nor the names of its contributors
82  *    may be used to endorse or promote products derived from this software
83  *    without specific prior written permission.
84  *
85  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
86  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
87  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
88  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
89  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
90  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
91  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
92  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
93  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
94  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
95  * SUCH DAMAGE.
96  *
97  *	@(#)tcp_usrreq.c	8.5 (Berkeley) 6/21/95
98  */
99 
100 #include <sys/cdefs.h>
101 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.94 2005/02/03 23:54:17 perry Exp $");
102 
103 #include "opt_inet.h"
104 #include "opt_ipsec.h"
105 #include "opt_tcp_debug.h"
106 #include "opt_mbuftrace.h"
107 
108 #include <sys/param.h>
109 #include <sys/systm.h>
110 #include <sys/kernel.h>
111 #include <sys/malloc.h>
112 #include <sys/mbuf.h>
113 #include <sys/socket.h>
114 #include <sys/socketvar.h>
115 #include <sys/protosw.h>
116 #include <sys/errno.h>
117 #include <sys/stat.h>
118 #include <sys/proc.h>
119 #include <sys/domain.h>
120 #include <sys/sysctl.h>
121 
122 #include <net/if.h>
123 #include <net/route.h>
124 
125 #include <netinet/in.h>
126 #include <netinet/in_systm.h>
127 #include <netinet/in_var.h>
128 #include <netinet/ip.h>
129 #include <netinet/in_pcb.h>
130 #include <netinet/ip_var.h>
131 
132 #ifdef INET6
133 #ifndef INET
134 #include <netinet/in.h>
135 #endif
136 #include <netinet/ip6.h>
137 #include <netinet6/in6_pcb.h>
138 #include <netinet6/ip6_var.h>
139 #endif
140 
141 #include <netinet/tcp.h>
142 #include <netinet/tcp_fsm.h>
143 #include <netinet/tcp_seq.h>
144 #include <netinet/tcp_timer.h>
145 #include <netinet/tcp_var.h>
146 #include <netinet/tcpip.h>
147 #include <netinet/tcp_debug.h>
148 
149 #include "opt_tcp_space.h"
150 
151 #ifdef IPSEC
152 #include <netinet6/ipsec.h>
153 #endif /*IPSEC*/
154 
155 /*
156  * TCP protocol interface to socket abstraction.
157  */
158 
159 /*
160  * Process a TCP user request for TCP tb.  If this is a send request
161  * then m is the mbuf chain of send data.  If this is a timer expiration
162  * (called from the software clock routine), then timertype tells which timer.
163  */
164 /*ARGSUSED*/
165 int
166 tcp_usrreq(struct socket *so, int req,
167     struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct proc *p)
168 {
169 	struct inpcb *inp;
170 #ifdef INET6
171 	struct in6pcb *in6p;
172 #endif
173 	struct tcpcb *tp = NULL;
174 	int s;
175 	int error = 0;
176 #ifdef TCP_DEBUG
177 	int ostate = 0;
178 #endif
179 	int family;	/* family of the socket */
180 
181 	family = so->so_proto->pr_domain->dom_family;
182 
183 	if (req == PRU_CONTROL) {
184 		switch (family) {
185 #ifdef INET
186 		case PF_INET:
187 			return (in_control(so, (long)m, (caddr_t)nam,
188 			    (struct ifnet *)control, p));
189 #endif
190 #ifdef INET6
191 		case PF_INET6:
192 			return (in6_control(so, (long)m, (caddr_t)nam,
193 			    (struct ifnet *)control, p));
194 #endif
195 		default:
196 			return EAFNOSUPPORT;
197 		}
198 	}
199 
200 	if (req == PRU_PURGEIF) {
201 		switch (family) {
202 #ifdef INET
203 		case PF_INET:
204 			in_pcbpurgeif0(&tcbtable, (struct ifnet *)control);
205 			in_purgeif((struct ifnet *)control);
206 			in_pcbpurgeif(&tcbtable, (struct ifnet *)control);
207 			break;
208 #endif
209 #ifdef INET6
210 		case PF_INET6:
211 			in6_pcbpurgeif0(&tcbtable, (struct ifnet *)control);
212 			in6_purgeif((struct ifnet *)control);
213 			in6_pcbpurgeif(&tcbtable, (struct ifnet *)control);
214 			break;
215 #endif
216 		default:
217 			return (EAFNOSUPPORT);
218 		}
219 		return (0);
220 	}
221 
222 	s = splsoftnet();
223 	switch (family) {
224 #ifdef INET
225 	case PF_INET:
226 		inp = sotoinpcb(so);
227 #ifdef INET6
228 		in6p = NULL;
229 #endif
230 		break;
231 #endif
232 #ifdef INET6
233 	case PF_INET6:
234 		inp = NULL;
235 		in6p = sotoin6pcb(so);
236 		break;
237 #endif
238 	default:
239 		splx(s);
240 		return EAFNOSUPPORT;
241 	}
242 
243 #ifdef DIAGNOSTIC
244 #ifdef INET6
245 	if (inp && in6p)
246 		panic("tcp_usrreq: both inp and in6p set to non-NULL");
247 #endif
248 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
249 		panic("tcp_usrreq: unexpected control mbuf");
250 #endif
251 	/*
252 	 * When a TCP is attached to a socket, then there will be
253 	 * a (struct inpcb) pointed at by the socket, and this
254 	 * structure will point at a subsidary (struct tcpcb).
255 	 */
256 #ifndef INET6
257 	if (inp == 0 && req != PRU_ATTACH)
258 #else
259 	if ((inp == 0 && in6p == 0) && req != PRU_ATTACH)
260 #endif
261 	{
262 		error = EINVAL;
263 		goto release;
264 	}
265 #ifdef INET
266 	if (inp) {
267 		tp = intotcpcb(inp);
268 		/* WHAT IF TP IS 0? */
269 #ifdef KPROF
270 		tcp_acounts[tp->t_state][req]++;
271 #endif
272 #ifdef TCP_DEBUG
273 		ostate = tp->t_state;
274 #endif
275 	}
276 #endif
277 #ifdef INET6
278 	if (in6p) {
279 		tp = in6totcpcb(in6p);
280 		/* WHAT IF TP IS 0? */
281 #ifdef KPROF
282 		tcp_acounts[tp->t_state][req]++;
283 #endif
284 #ifdef TCP_DEBUG
285 		ostate = tp->t_state;
286 #endif
287 	}
288 #endif
289 
290 	switch (req) {
291 
292 	/*
293 	 * TCP attaches to socket via PRU_ATTACH, reserving space,
294 	 * and an internet control block.
295 	 */
296 	case PRU_ATTACH:
297 #ifndef INET6
298 		if (inp != 0)
299 #else
300 		if (inp != 0 || in6p != 0)
301 #endif
302 		{
303 			error = EISCONN;
304 			break;
305 		}
306 		error = tcp_attach(so);
307 		if (error)
308 			break;
309 		if ((so->so_options & SO_LINGER) && so->so_linger == 0)
310 			so->so_linger = TCP_LINGERTIME;
311 		tp = sototcpcb(so);
312 		break;
313 
314 	/*
315 	 * PRU_DETACH detaches the TCP protocol from the socket.
316 	 */
317 	case PRU_DETACH:
318 		tp = tcp_disconnect(tp);
319 		break;
320 
321 	/*
322 	 * Give the socket an address.
323 	 */
324 	case PRU_BIND:
325 		switch (family) {
326 #ifdef INET
327 		case PF_INET:
328 			error = in_pcbbind(inp, nam, p);
329 			break;
330 #endif
331 #ifdef INET6
332 		case PF_INET6:
333 			error = in6_pcbbind(in6p, nam, p);
334 			if (!error) {
335 				/* mapped addr case */
336 				if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr))
337 					tp->t_family = AF_INET;
338 				else
339 					tp->t_family = AF_INET6;
340 			}
341 			break;
342 #endif
343 		}
344 		break;
345 
346 	/*
347 	 * Prepare to accept connections.
348 	 */
349 	case PRU_LISTEN:
350 #ifdef INET
351 		if (inp && inp->inp_lport == 0) {
352 			error = in_pcbbind(inp, (struct mbuf *)0,
353 			    (struct proc *)0);
354 			if (error)
355 				break;
356 		}
357 #endif
358 #ifdef INET6
359 		if (in6p && in6p->in6p_lport == 0) {
360 			error = in6_pcbbind(in6p, (struct mbuf *)0,
361 			    (struct proc *)0);
362 			if (error)
363 				break;
364 		}
365 #endif
366 		tp->t_state = TCPS_LISTEN;
367 		break;
368 
369 	/*
370 	 * Initiate connection to peer.
371 	 * Create a template for use in transmissions on this connection.
372 	 * Enter SYN_SENT state, and mark socket as connecting.
373 	 * Start keep-alive timer, and seed output sequence space.
374 	 * Send initial segment on connection.
375 	 */
376 	case PRU_CONNECT:
377 #ifdef INET
378 		if (inp) {
379 			if (inp->inp_lport == 0) {
380 				error = in_pcbbind(inp, (struct mbuf *)0,
381 				    (struct proc *)0);
382 				if (error)
383 					break;
384 			}
385 			error = in_pcbconnect(inp, nam);
386 		}
387 #endif
388 #ifdef INET6
389 		if (in6p) {
390 			if (in6p->in6p_lport == 0) {
391 				error = in6_pcbbind(in6p, (struct mbuf *)0,
392 				    (struct proc *)0);
393 				if (error)
394 					break;
395 			}
396 			error = in6_pcbconnect(in6p, nam);
397 			if (!error) {
398 				/* mapped addr case */
399 				if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr))
400 					tp->t_family = AF_INET;
401 				else
402 					tp->t_family = AF_INET6;
403 			}
404 		}
405 #endif
406 		if (error)
407 			break;
408 		tp->t_template = tcp_template(tp);
409 		if (tp->t_template == 0) {
410 #ifdef INET
411 			if (inp)
412 				in_pcbdisconnect(inp);
413 #endif
414 #ifdef INET6
415 			if (in6p)
416 				in6_pcbdisconnect(in6p);
417 #endif
418 			error = ENOBUFS;
419 			break;
420 		}
421 		/* Compute window scaling to request.  */
422 		while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
423 		    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
424 			tp->request_r_scale++;
425 		soisconnecting(so);
426 		tcpstat.tcps_connattempt++;
427 		tp->t_state = TCPS_SYN_SENT;
428 		TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT);
429 		tp->iss = tcp_new_iss(tp, 0);
430 		tcp_sendseqinit(tp);
431 		error = tcp_output(tp);
432 		break;
433 
434 	/*
435 	 * Create a TCP connection between two sockets.
436 	 */
437 	case PRU_CONNECT2:
438 		error = EOPNOTSUPP;
439 		break;
440 
441 	/*
442 	 * Initiate disconnect from peer.
443 	 * If connection never passed embryonic stage, just drop;
444 	 * else if don't need to let data drain, then can just drop anyways,
445 	 * else have to begin TCP shutdown process: mark socket disconnecting,
446 	 * drain unread data, state switch to reflect user close, and
447 	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
448 	 * when peer sends FIN and acks ours.
449 	 *
450 	 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
451 	 */
452 	case PRU_DISCONNECT:
453 		tp = tcp_disconnect(tp);
454 		break;
455 
456 	/*
457 	 * Accept a connection.  Essentially all the work is
458 	 * done at higher levels; just return the address
459 	 * of the peer, storing through addr.
460 	 */
461 	case PRU_ACCEPT:
462 #ifdef INET
463 		if (inp)
464 			in_setpeeraddr(inp, nam);
465 #endif
466 #ifdef INET6
467 		if (in6p)
468 			in6_setpeeraddr(in6p, nam);
469 #endif
470 		break;
471 
472 	/*
473 	 * Mark the connection as being incapable of further output.
474 	 */
475 	case PRU_SHUTDOWN:
476 		socantsendmore(so);
477 		tp = tcp_usrclosed(tp);
478 		if (tp)
479 			error = tcp_output(tp);
480 		break;
481 
482 	/*
483 	 * After a receive, possibly send window update to peer.
484 	 */
485 	case PRU_RCVD:
486 		/*
487 		 * soreceive() calls this function when a user receives
488 		 * ancillary data on a listening socket. We don't call
489 		 * tcp_output in such a case, since there is no header
490 		 * template for a listening socket and hence the kernel
491 		 * will panic.
492 		 */
493 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
494 			(void) tcp_output(tp);
495 		break;
496 
497 	/*
498 	 * Do a send by putting data in output queue and updating urgent
499 	 * marker if URG set.  Possibly send more data.
500 	 */
501 	case PRU_SEND:
502 		if (control && control->m_len) {
503 			m_freem(control);
504 			m_freem(m);
505 			error = EINVAL;
506 			break;
507 		}
508 		sbappendstream(&so->so_snd, m);
509 		error = tcp_output(tp);
510 		break;
511 
512 	/*
513 	 * Abort the TCP.
514 	 */
515 	case PRU_ABORT:
516 		tp = tcp_drop(tp, ECONNABORTED);
517 		break;
518 
519 	case PRU_SENSE:
520 		/*
521 		 * stat: don't bother with a blocksize.
522 		 */
523 		splx(s);
524 		return (0);
525 
526 	case PRU_RCVOOB:
527 		if (control && control->m_len) {
528 			m_freem(control);
529 			m_freem(m);
530 			error = EINVAL;
531 			break;
532 		}
533 		if ((so->so_oobmark == 0 &&
534 		    (so->so_state & SS_RCVATMARK) == 0) ||
535 		    so->so_options & SO_OOBINLINE ||
536 		    tp->t_oobflags & TCPOOB_HADDATA) {
537 			error = EINVAL;
538 			break;
539 		}
540 		if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
541 			error = EWOULDBLOCK;
542 			break;
543 		}
544 		m->m_len = 1;
545 		*mtod(m, caddr_t) = tp->t_iobc;
546 		if (((long)nam & MSG_PEEK) == 0)
547 			tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
548 		break;
549 
550 	case PRU_SENDOOB:
551 		if (sbspace(&so->so_snd) < -512) {
552 			m_freem(m);
553 			error = ENOBUFS;
554 			break;
555 		}
556 		/*
557 		 * According to RFC961 (Assigned Protocols),
558 		 * the urgent pointer points to the last octet
559 		 * of urgent data.  We continue, however,
560 		 * to consider it to indicate the first octet
561 		 * of data past the urgent section.
562 		 * Otherwise, snd_up should be one lower.
563 		 */
564 		sbappendstream(&so->so_snd, m);
565 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
566 		tp->t_force = 1;
567 		error = tcp_output(tp);
568 		tp->t_force = 0;
569 		break;
570 
571 	case PRU_SOCKADDR:
572 #ifdef INET
573 		if (inp)
574 			in_setsockaddr(inp, nam);
575 #endif
576 #ifdef INET6
577 		if (in6p)
578 			in6_setsockaddr(in6p, nam);
579 #endif
580 		break;
581 
582 	case PRU_PEERADDR:
583 #ifdef INET
584 		if (inp)
585 			in_setpeeraddr(inp, nam);
586 #endif
587 #ifdef INET6
588 		if (in6p)
589 			in6_setpeeraddr(in6p, nam);
590 #endif
591 		break;
592 
593 	default:
594 		panic("tcp_usrreq");
595 	}
596 #ifdef TCP_DEBUG
597 	if (tp && (so->so_options & SO_DEBUG))
598 		tcp_trace(TA_USER, ostate, tp, NULL, req);
599 #endif
600 
601 release:
602 	splx(s);
603 	return (error);
604 }
605 
606 int
607 tcp_ctloutput(int op, struct socket *so, int level, int optname,
608     struct mbuf **mp)
609 {
610 	int error = 0, s;
611 	struct inpcb *inp;
612 #ifdef INET6
613 	struct in6pcb *in6p;
614 #endif
615 	struct tcpcb *tp;
616 	struct mbuf *m;
617 	int i;
618 	int family;	/* family of the socket */
619 
620 	family = so->so_proto->pr_domain->dom_family;
621 
622 	s = splsoftnet();
623 	switch (family) {
624 #ifdef INET
625 	case PF_INET:
626 		inp = sotoinpcb(so);
627 #ifdef INET6
628 		in6p = NULL;
629 #endif
630 		break;
631 #endif
632 #ifdef INET6
633 	case PF_INET6:
634 		inp = NULL;
635 		in6p = sotoin6pcb(so);
636 		break;
637 #endif
638 	default:
639 		splx(s);
640 		return EAFNOSUPPORT;
641 	}
642 #ifndef INET6
643 	if (inp == NULL)
644 #else
645 	if (inp == NULL && in6p == NULL)
646 #endif
647 	{
648 		splx(s);
649 		if (op == PRCO_SETOPT && *mp)
650 			(void) m_free(*mp);
651 		return (ECONNRESET);
652 	}
653 	if (level != IPPROTO_TCP) {
654 		switch (family) {
655 #ifdef INET
656 		case PF_INET:
657 			error = ip_ctloutput(op, so, level, optname, mp);
658 			break;
659 #endif
660 #ifdef INET6
661 		case PF_INET6:
662 			error = ip6_ctloutput(op, so, level, optname, mp);
663 			break;
664 #endif
665 		}
666 		splx(s);
667 		return (error);
668 	}
669 	if (inp)
670 		tp = intotcpcb(inp);
671 #ifdef INET6
672 	else if (in6p)
673 		tp = in6totcpcb(in6p);
674 #endif
675 	else
676 		tp = NULL;
677 
678 	switch (op) {
679 
680 	case PRCO_SETOPT:
681 		m = *mp;
682 		switch (optname) {
683 
684 #ifdef TCP_SIGNATURE
685 		case TCP_MD5SIG:
686 			if (m == NULL || m->m_len < sizeof (int))
687 				error = EINVAL;
688 			if (error)
689 				break;
690 			if (*mtod(m, int *) > 0)
691 				tp->t_flags |= TF_SIGNATURE;
692 			else
693 				tp->t_flags &= ~TF_SIGNATURE;
694 			break;
695 #endif /* TCP_SIGNATURE */
696 
697 		case TCP_NODELAY:
698 			if (m == NULL || m->m_len < sizeof (int))
699 				error = EINVAL;
700 			else if (*mtod(m, int *))
701 				tp->t_flags |= TF_NODELAY;
702 			else
703 				tp->t_flags &= ~TF_NODELAY;
704 			break;
705 
706 		case TCP_MAXSEG:
707 			if (m && (i = *mtod(m, int *)) > 0 &&
708 			    i <= tp->t_peermss)
709 				tp->t_peermss = i;  /* limit on send size */
710 			else
711 				error = EINVAL;
712 			break;
713 
714 		default:
715 			error = ENOPROTOOPT;
716 			break;
717 		}
718 		if (m)
719 			(void) m_free(m);
720 		break;
721 
722 	case PRCO_GETOPT:
723 		*mp = m = m_get(M_WAIT, MT_SOOPTS);
724 		m->m_len = sizeof(int);
725 		MCLAIM(m, so->so_mowner);
726 
727 		switch (optname) {
728 #ifdef TCP_SIGNATURE
729 		case TCP_MD5SIG:
730 			*mtod(m, int *) = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
731 			break;
732 #endif
733 		case TCP_NODELAY:
734 			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
735 			break;
736 		case TCP_MAXSEG:
737 			*mtod(m, int *) = tp->t_peermss;
738 			break;
739 		default:
740 			error = ENOPROTOOPT;
741 			break;
742 		}
743 		break;
744 	}
745 	splx(s);
746 	return (error);
747 }
748 
749 #ifndef TCP_SENDSPACE
750 #define	TCP_SENDSPACE	1024*32
751 #endif
752 int	tcp_sendspace = TCP_SENDSPACE;
753 #ifndef TCP_RECVSPACE
754 #define	TCP_RECVSPACE	1024*32
755 #endif
756 int	tcp_recvspace = TCP_RECVSPACE;
757 
758 /*
759  * Attach TCP protocol to socket, allocating
760  * internet protocol control block, tcp control block,
761  * bufer space, and entering LISTEN state if to accept connections.
762  */
763 int
764 tcp_attach(struct socket *so)
765 {
766 	struct tcpcb *tp;
767 	struct inpcb *inp;
768 #ifdef INET6
769 	struct in6pcb *in6p;
770 #endif
771 	int error;
772 	int family;	/* family of the socket */
773 
774 	family = so->so_proto->pr_domain->dom_family;
775 
776 #ifdef MBUFTRACE
777 	so->so_mowner = &tcp_mowner;
778 	so->so_rcv.sb_mowner = &tcp_rx_mowner;
779 	so->so_snd.sb_mowner = &tcp_tx_mowner;
780 #endif
781 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
782 		error = soreserve(so, tcp_sendspace, tcp_recvspace);
783 		if (error)
784 			return (error);
785 	}
786 	switch (family) {
787 #ifdef INET
788 	case PF_INET:
789 		error = in_pcballoc(so, &tcbtable);
790 		if (error)
791 			return (error);
792 		inp = sotoinpcb(so);
793 #ifdef INET6
794 		in6p = NULL;
795 #endif
796 		break;
797 #endif
798 #ifdef INET6
799 	case PF_INET6:
800 		error = in6_pcballoc(so, &tcbtable);
801 		if (error)
802 			return (error);
803 		inp = NULL;
804 		in6p = sotoin6pcb(so);
805 		break;
806 #endif
807 	default:
808 		return EAFNOSUPPORT;
809 	}
810 	if (inp)
811 		tp = tcp_newtcpcb(family, (void *)inp);
812 #ifdef INET6
813 	else if (in6p)
814 		tp = tcp_newtcpcb(family, (void *)in6p);
815 #endif
816 	else
817 		tp = NULL;
818 
819 	if (tp == 0) {
820 		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
821 
822 		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
823 #ifdef INET
824 		if (inp)
825 			in_pcbdetach(inp);
826 #endif
827 #ifdef INET6
828 		if (in6p)
829 			in6_pcbdetach(in6p);
830 #endif
831 		so->so_state |= nofd;
832 		return (ENOBUFS);
833 	}
834 	tp->t_state = TCPS_CLOSED;
835 	return (0);
836 }
837 
838 /*
839  * Initiate (or continue) disconnect.
840  * If embryonic state, just send reset (once).
841  * If in ``let data drain'' option and linger null, just drop.
842  * Otherwise (hard), mark socket disconnecting and drop
843  * current input data; switch states based on user close, and
844  * send segment to peer (with FIN).
845  */
846 struct tcpcb *
847 tcp_disconnect(struct tcpcb *tp)
848 {
849 	struct socket *so;
850 
851 	if (tp->t_inpcb)
852 		so = tp->t_inpcb->inp_socket;
853 #ifdef INET6
854 	else if (tp->t_in6pcb)
855 		so = tp->t_in6pcb->in6p_socket;
856 #endif
857 	else
858 		so = NULL;
859 
860 	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
861 		tp = tcp_close(tp);
862 	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
863 		tp = tcp_drop(tp, 0);
864 	else {
865 		soisdisconnecting(so);
866 		sbflush(&so->so_rcv);
867 		tp = tcp_usrclosed(tp);
868 		if (tp)
869 			(void) tcp_output(tp);
870 	}
871 	return (tp);
872 }
873 
874 /*
875  * User issued close, and wish to trail through shutdown states:
876  * if never received SYN, just forget it.  If got a SYN from peer,
877  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
878  * If already got a FIN from peer, then almost done; go to LAST_ACK
879  * state.  In all other cases, have already sent FIN to peer (e.g.
880  * after PRU_SHUTDOWN), and just have to play tedious game waiting
881  * for peer to send FIN or not respond to keep-alives, etc.
882  * We can let the user exit from the close as soon as the FIN is acked.
883  */
884 struct tcpcb *
885 tcp_usrclosed(struct tcpcb *tp)
886 {
887 
888 	switch (tp->t_state) {
889 
890 	case TCPS_CLOSED:
891 	case TCPS_LISTEN:
892 	case TCPS_SYN_SENT:
893 		tp->t_state = TCPS_CLOSED;
894 		tp = tcp_close(tp);
895 		break;
896 
897 	case TCPS_SYN_RECEIVED:
898 	case TCPS_ESTABLISHED:
899 		tp->t_state = TCPS_FIN_WAIT_1;
900 		break;
901 
902 	case TCPS_CLOSE_WAIT:
903 		tp->t_state = TCPS_LAST_ACK;
904 		break;
905 	}
906 	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
907 		struct socket *so;
908 		if (tp->t_inpcb)
909 			so = tp->t_inpcb->inp_socket;
910 #ifdef INET6
911 		else if (tp->t_in6pcb)
912 			so = tp->t_in6pcb->in6p_socket;
913 #endif
914 		else
915 			so = NULL;
916 		soisdisconnected(so);
917 		/*
918 		 * If we are in FIN_WAIT_2, we arrived here because the
919 		 * application did a shutdown of the send side.  Like the
920 		 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
921 		 * a full close, we start a timer to make sure sockets are
922 		 * not left in FIN_WAIT_2 forever.
923 		 */
924 		if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0))
925 			TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
926 	}
927 	return (tp);
928 }
929 
930 /*
931  * sysctl helper routine for net.inet.ip.mssdflt.  it can't be less
932  * than 32.
933  */
934 static int
935 sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS)
936 {
937 	int error, mssdflt;
938 	struct sysctlnode node;
939 
940 	mssdflt = tcp_mssdflt;
941 	node = *rnode;
942 	node.sysctl_data = &mssdflt;
943 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
944 	if (error || newp == NULL)
945 		return (error);
946 
947 	if (mssdflt < 32)
948 		return (EINVAL);
949 	tcp_mssdflt = mssdflt;
950 
951 	return (0);
952 }
953 
954 /*
955  * sysctl helper routine for setting port related values under
956  * net.inet.ip and net.inet6.ip6.  does basic range checking and does
957  * additional checks for each type.  this code has placed in
958  * tcp_input.c since INET and INET6 both use the same tcp code.
959  *
960  * this helper is not static so that both inet and inet6 can use it.
961  */
962 int
963 sysctl_net_inet_ip_ports(SYSCTLFN_ARGS)
964 {
965 	int error, tmp;
966 	int apmin, apmax;
967 #ifndef IPNOPRIVPORTS
968 	int lpmin, lpmax;
969 #endif /* IPNOPRIVPORTS */
970 	struct sysctlnode node;
971 
972 	if (namelen != 0)
973 		return (EINVAL);
974 
975 	switch (name[-3]) {
976 #ifdef INET
977 	    case PF_INET:
978 		apmin = anonportmin;
979 		apmax = anonportmax;
980 #ifndef IPNOPRIVPORTS
981 		lpmin = lowportmin;
982 		lpmax = lowportmax;
983 #endif /* IPNOPRIVPORTS */
984 		break;
985 #endif /* INET */
986 #ifdef INET6
987 	    case PF_INET6:
988 		apmin = ip6_anonportmin;
989 		apmax = ip6_anonportmax;
990 #ifndef IPNOPRIVPORTS
991 		lpmin = ip6_lowportmin;
992 		lpmax = ip6_lowportmax;
993 #endif /* IPNOPRIVPORTS */
994 		break;
995 #endif /* INET6 */
996 	    default:
997 		return (EINVAL);
998 	}
999 
1000 	/*
1001 	 * insert temporary copy into node, perform lookup on
1002 	 * temporary, then restore pointer
1003 	 */
1004 	node = *rnode;
1005 	tmp = *(int*)rnode->sysctl_data;
1006 	node.sysctl_data = &tmp;
1007 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
1008 	if (error || newp == NULL)
1009 		return (error);
1010 
1011 	/*
1012 	 * simple port range check
1013 	 */
1014 	if (tmp < 0 || tmp > 65535)
1015 		return (EINVAL);
1016 
1017 	/*
1018 	 * per-node range checks
1019 	 */
1020 	switch (rnode->sysctl_num) {
1021 	case IPCTL_ANONPORTMIN:
1022 		if (tmp >= apmax)
1023 			return (EINVAL);
1024 #ifndef IPNOPRIVPORTS
1025 		if (tmp < IPPORT_RESERVED)
1026                         return (EINVAL);
1027 #endif /* IPNOPRIVPORTS */
1028 		break;
1029 
1030 	case IPCTL_ANONPORTMAX:
1031                 if (apmin >= tmp)
1032 			return (EINVAL);
1033 #ifndef IPNOPRIVPORTS
1034 		if (tmp < IPPORT_RESERVED)
1035                         return (EINVAL);
1036 #endif /* IPNOPRIVPORTS */
1037 		break;
1038 
1039 #ifndef IPNOPRIVPORTS
1040 	case IPCTL_LOWPORTMIN:
1041 		if (tmp >= lpmax ||
1042 		    tmp > IPPORT_RESERVEDMAX ||
1043 		    tmp < IPPORT_RESERVEDMIN)
1044 			return (EINVAL);
1045 		break;
1046 
1047 	case IPCTL_LOWPORTMAX:
1048 		if (lpmin >= tmp ||
1049 		    tmp > IPPORT_RESERVEDMAX ||
1050 		    tmp < IPPORT_RESERVEDMIN)
1051 			return (EINVAL);
1052 		break;
1053 #endif /* IPNOPRIVPORTS */
1054 
1055 	default:
1056 		return (EINVAL);
1057 	}
1058 
1059 	*(int*)rnode->sysctl_data = tmp;
1060 
1061 	return (0);
1062 }
1063 
1064 /*
1065  * sysctl helper routine for the net.inet.tcp.ident and
1066  * net.inet6.tcp6.ident nodes.  contains backwards compat code for the
1067  * old way of looking up the ident information for ipv4 which involves
1068  * stuffing the port/addr pairs into the mib lookup.
1069  */
1070 static int
1071 sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS)
1072 {
1073 #ifdef INET
1074 	struct inpcb *inb;
1075 	struct sockaddr_in *si4[2];
1076 #endif /* INET */
1077 #ifdef INET6
1078 	struct in6pcb *in6b;
1079 	struct sockaddr_in6 *si6[2];
1080 #endif /* INET6 */
1081 	struct sockaddr_storage sa[2];
1082 	struct socket *sockp;
1083 	size_t sz;
1084 	uid_t uid;
1085 	int error, pf;
1086 
1087 	if (namelen != 4 && namelen != 0)
1088 		return (EINVAL);
1089 	if (name[-2] != IPPROTO_TCP)
1090 		return (EINVAL);
1091 	pf = name[-3];
1092 
1093 	/* old style lookup, ipv4 only */
1094 	if (namelen == 4) {
1095 #ifdef INET
1096 		struct in_addr laddr, raddr;
1097 		u_int lport, rport;
1098 
1099 		if (pf != PF_INET)
1100 			return (EPROTONOSUPPORT);
1101 		raddr.s_addr = (uint32_t)name[0];
1102 		rport = (u_int)name[1];
1103 		laddr.s_addr = (uint32_t)name[2];
1104 		lport = (u_int)name[3];
1105 		inb = in_pcblookup_connect(&tcbtable, raddr, rport,
1106 					   laddr, lport);
1107 		if (inb == NULL || (sockp = inb->inp_socket) == NULL)
1108 			return (ESRCH);
1109 		uid = sockp->so_uid;
1110 		if (oldp) {
1111 			sz = MIN(sizeof(uid), *oldlenp);
1112 			error = copyout(&uid, oldp, sz);
1113 			if (error)
1114 				return (error);
1115 		}
1116 		*oldlenp = sizeof(uid);
1117 		return (0);
1118 #else /* INET */
1119 		return (EINVAL);
1120 #endif /* INET */
1121 	}
1122 
1123 	if (newp == NULL || newlen != sizeof(sa))
1124 		return (EINVAL);
1125 	error = copyin(newp, &sa, newlen);
1126 	if (error)
1127 		return (error);
1128 
1129 	/*
1130 	 * requested families must match
1131 	 */
1132 	if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family)
1133 		return (EINVAL);
1134 
1135 	switch (pf) {
1136 #ifdef INET
1137 	    case PF_INET:
1138 		si4[0] = (struct sockaddr_in*)&sa[0];
1139 		si4[1] = (struct sockaddr_in*)&sa[1];
1140 		if (si4[0]->sin_len != sizeof(*si4[0]) ||
1141 		    si4[0]->sin_len != si4[1]->sin_len)
1142 			return (EINVAL);
1143 		inb = in_pcblookup_connect(&tcbtable,
1144 		    si4[0]->sin_addr, si4[0]->sin_port,
1145 		    si4[1]->sin_addr, si4[1]->sin_port);
1146 		if (inb == NULL || (sockp = inb->inp_socket) == NULL)
1147 			return (ESRCH);
1148 		break;
1149 #endif /* INET */
1150 #ifdef INET6
1151 	    case PF_INET6:
1152 		si6[0] = (struct sockaddr_in6*)&sa[0];
1153 		si6[1] = (struct sockaddr_in6*)&sa[1];
1154 		if (si6[0]->sin6_len != sizeof(*si6[0]) ||
1155 		    si6[0]->sin6_len != si6[1]->sin6_len)
1156 			return (EINVAL);
1157 		in6b = in6_pcblookup_connect(&tcbtable,
1158 		    &si6[0]->sin6_addr, si6[0]->sin6_port,
1159 		    &si6[1]->sin6_addr, si6[1]->sin6_port, 0);
1160 		if (in6b == NULL || (sockp = in6b->in6p_socket) == NULL)
1161 			return (ESRCH);
1162 		break;
1163 #endif /* INET6 */
1164 	    default:
1165 		return (EPROTONOSUPPORT);
1166 	}
1167 
1168 	uid = sockp->so_uid;
1169 	if (oldp) {
1170 		sz = MIN(sizeof(uid), *oldlenp);
1171 		error = copyout(&uid, oldp, sz);
1172 		if (error)
1173 			return (error);
1174 	}
1175 	*oldlenp = sizeof(uid);
1176 
1177 	return (0);
1178 }
1179 
1180 /*
1181  * this (second stage) setup routine is a replacement for tcp_sysctl()
1182  * (which is currently used for ipv4 and ipv6)
1183  */
1184 static void
1185 sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname,
1186 			   const char *tcpname)
1187 {
1188 
1189 	sysctl_createv(clog, 0, NULL, NULL,
1190 		       CTLFLAG_PERMANENT,
1191 		       CTLTYPE_NODE, "net", NULL,
1192 		       NULL, 0, NULL, 0,
1193 		       CTL_NET, CTL_EOL);
1194 	sysctl_createv(clog, 0, NULL, NULL,
1195 		       CTLFLAG_PERMANENT,
1196 		       CTLTYPE_NODE, pfname, NULL,
1197 		       NULL, 0, NULL, 0,
1198 		       CTL_NET, pf, CTL_EOL);
1199 	sysctl_createv(clog, 0, NULL, NULL,
1200 		       CTLFLAG_PERMANENT,
1201 		       CTLTYPE_NODE, tcpname,
1202 		       SYSCTL_DESCR("TCP related settings"),
1203 		       NULL, 0, NULL, 0,
1204 		       CTL_NET, pf, IPPROTO_TCP, CTL_EOL);
1205 
1206 	sysctl_createv(clog, 0, NULL, NULL,
1207 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1208 		       CTLTYPE_INT, "rfc1323",
1209 		       SYSCTL_DESCR("Enable RFC1323 TCP extensions"),
1210 		       NULL, 0, &tcp_do_rfc1323, 0,
1211 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL);
1212 	sysctl_createv(clog, 0, NULL, NULL,
1213 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1214 		       CTLTYPE_INT, "sendspace",
1215 		       SYSCTL_DESCR("Default TCP send buffer size"),
1216 		       NULL, 0, &tcp_sendspace, 0,
1217 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL);
1218 	sysctl_createv(clog, 0, NULL, NULL,
1219 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1220 		       CTLTYPE_INT, "recvspace",
1221 		       SYSCTL_DESCR("Default TCP receive buffer size"),
1222 		       NULL, 0, &tcp_recvspace, 0,
1223 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL);
1224 	sysctl_createv(clog, 0, NULL, NULL,
1225 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1226 		       CTLTYPE_INT, "mssdflt",
1227 		       SYSCTL_DESCR("Default maximum segment size"),
1228 		       sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0,
1229 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL);
1230 	sysctl_createv(clog, 0, NULL, NULL,
1231 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1232 		       CTLTYPE_INT, "syn_cache_limit",
1233 		       SYSCTL_DESCR("Maximum number of entries in the TCP "
1234 				    "compressed state engine"),
1235 		       NULL, 0, &tcp_syn_cache_limit, 0,
1236 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT,
1237 		       CTL_EOL);
1238 	sysctl_createv(clog, 0, NULL, NULL,
1239 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1240 		       CTLTYPE_INT, "syn_bucket_limit",
1241 		       SYSCTL_DESCR("Maximum number of entries per hash "
1242 				    "bucket in the TCP compressed state "
1243 				    "engine"),
1244 		       NULL, 0, &tcp_syn_bucket_limit, 0,
1245 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT,
1246 		       CTL_EOL);
1247 #if 0 /* obsoleted */
1248 	sysctl_createv(clog, 0, NULL, NULL,
1249 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1250 		       CTLTYPE_INT, "syn_cache_interval",
1251 		       SYSCTL_DESCR("TCP compressed state engine's timer interval"),
1252 		       NULL, 0, &tcp_syn_cache_interval, 0,
1253 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER,
1254 		       CTL_EOL);
1255 #endif
1256 	sysctl_createv(clog, 0, NULL, NULL,
1257 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1258 		       CTLTYPE_INT, "init_win",
1259 		       SYSCTL_DESCR("Initial TCP congestion window"),
1260 		       NULL, 0, &tcp_init_win, 0,
1261 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL);
1262 	sysctl_createv(clog, 0, NULL, NULL,
1263 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1264 		       CTLTYPE_INT, "mss_ifmtu",
1265 		       SYSCTL_DESCR("Use interface MTU for calculating MSS"),
1266 		       NULL, 0, &tcp_mss_ifmtu, 0,
1267 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL);
1268 	sysctl_createv(clog, 0, NULL, NULL,
1269 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1270 		       CTLTYPE_INT, "sack",
1271 		       SYSCTL_DESCR("Enable RFC2018 Selection ACKnowledgement "
1272 				    "(not implemented)"),
1273 		       NULL, 0, &tcp_do_sack, 0,
1274 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL);
1275 	sysctl_createv(clog, 0, NULL, NULL,
1276 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1277 		       CTLTYPE_INT, "win_scale",
1278 		       SYSCTL_DESCR("Use RFC1323 window scale options"),
1279 		       NULL, 0, &tcp_do_win_scale, 0,
1280 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL);
1281 	sysctl_createv(clog, 0, NULL, NULL,
1282 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1283 		       CTLTYPE_INT, "timestamps",
1284 		       SYSCTL_DESCR("Use RFC1323 time stamp options"),
1285 		       NULL, 0, &tcp_do_timestamps, 0,
1286 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL);
1287 	sysctl_createv(clog, 0, NULL, NULL,
1288 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1289 		       CTLTYPE_INT, "compat_42",
1290 		       SYSCTL_DESCR("Enable workarounds for 4.2BSD TCP bugs"),
1291 		       NULL, 0, &tcp_compat_42, 0,
1292 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_COMPAT_42, CTL_EOL);
1293 	sysctl_createv(clog, 0, NULL, NULL,
1294 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1295 		       CTLTYPE_INT, "cwm",
1296 		       SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window "
1297 				    "Monitoring"),
1298 		       NULL, 0, &tcp_cwm, 0,
1299 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL);
1300 	sysctl_createv(clog, 0, NULL, NULL,
1301 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1302 		       CTLTYPE_INT, "cwm_burstsize",
1303 		       SYSCTL_DESCR("Congestion Window Monitoring allowed "
1304 				    "burst count in packets"),
1305 		       NULL, 0, &tcp_cwm_burstsize, 0,
1306 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE,
1307 		       CTL_EOL);
1308 	sysctl_createv(clog, 0, NULL, NULL,
1309 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1310 		       CTLTYPE_INT, "ack_on_push",
1311 		       SYSCTL_DESCR("Immediately return ACK when PSH is "
1312 				    "received"),
1313 		       NULL, 0, &tcp_ack_on_push, 0,
1314 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL);
1315 	sysctl_createv(clog, 0, NULL, NULL,
1316 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1317 		       CTLTYPE_INT, "keepidle",
1318 		       SYSCTL_DESCR("Allowed connection idle ticks before a "
1319 				    "keepalive probe is sent"),
1320 		       NULL, 0, &tcp_keepidle, 0,
1321 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL);
1322 	sysctl_createv(clog, 0, NULL, NULL,
1323 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1324 		       CTLTYPE_INT, "keepintvl",
1325 		       SYSCTL_DESCR("Ticks before next keepalive probe is sent"),
1326 		       NULL, 0, &tcp_keepintvl, 0,
1327 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL);
1328 	sysctl_createv(clog, 0, NULL, NULL,
1329 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1330 		       CTLTYPE_INT, "keepcnt",
1331 		       SYSCTL_DESCR("Number of keepalive probes to send"),
1332 		       NULL, 0, &tcp_keepcnt, 0,
1333 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL);
1334 	sysctl_createv(clog, 0, NULL, NULL,
1335 		       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
1336 		       CTLTYPE_INT, "slowhz",
1337 		       SYSCTL_DESCR("Keepalive ticks per second"),
1338 		       NULL, PR_SLOWHZ, NULL, 0,
1339 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL);
1340 	sysctl_createv(clog, 0, NULL, NULL,
1341 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1342 		       CTLTYPE_INT, "newreno",
1343 		       SYSCTL_DESCR("NewReno congestion control algorithm"),
1344 		       NULL, 0, &tcp_do_newreno, 0,
1345 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_NEWRENO, CTL_EOL);
1346 	sysctl_createv(clog, 0, NULL, NULL,
1347 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1348 		       CTLTYPE_INT, "log_refused",
1349 		       SYSCTL_DESCR("Log refused TCP connections"),
1350 		       NULL, 0, &tcp_log_refused, 0,
1351 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL);
1352 #if 0 /* obsoleted */
1353 	sysctl_createv(clog, 0, NULL, NULL,
1354 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1355 		       CTLTYPE_INT, "rstratelimit", NULL,
1356 		       NULL, 0, &tcp_rst_ratelim, 0,
1357 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL);
1358 #endif
1359 	sysctl_createv(clog, 0, NULL, NULL,
1360 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1361 		       CTLTYPE_INT, "rstppslimit",
1362 		       SYSCTL_DESCR("Maximum number of RST packets to send "
1363 				    "per second"),
1364 		       NULL, 0, &tcp_rst_ppslim, 0,
1365 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL);
1366 	sysctl_createv(clog, 0, NULL, NULL,
1367 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1368 		       CTLTYPE_INT, "delack_ticks",
1369 		       SYSCTL_DESCR("Number of ticks to delay sending an ACK"),
1370 		       NULL, 0, &tcp_delack_ticks, 0,
1371 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL);
1372 	sysctl_createv(clog, 0, NULL, NULL,
1373 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1374 		       CTLTYPE_INT, "init_win_local",
1375 		       SYSCTL_DESCR("Initial TCP window size (in segments)"),
1376 		       NULL, 0, &tcp_init_win_local, 0,
1377 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL,
1378 		       CTL_EOL);
1379 	sysctl_createv(clog, 0, NULL, NULL,
1380 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1381 		       CTLTYPE_STRUCT, "ident",
1382 		       SYSCTL_DESCR("RFC1413 Identification Protocol lookups"),
1383 		       sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t),
1384 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL);
1385 	sysctl_createv(clog, 0, NULL, NULL,
1386 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1387 		       CTLTYPE_INT, "do_loopback_cksum",
1388 		       SYSCTL_DESCR("Perform TCP checksum on loopback"),
1389 		       NULL, 0, &tcp_do_loopback_cksum, 0,
1390 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM,
1391 		       CTL_EOL);
1392 }
1393 
1394 /*
1395  * Sysctl for tcp variables.
1396  */
1397 #ifdef INET
1398 SYSCTL_SETUP(sysctl_net_inet_tcp_setup, "sysctl net.inet.tcp subtree setup")
1399 {
1400 
1401 	sysctl_net_inet_tcp_setup2(clog, PF_INET, "inet", "tcp");
1402 }
1403 #endif /* INET */
1404 
1405 #ifdef INET6
1406 SYSCTL_SETUP(sysctl_net_inet6_tcp6_setup, "sysctl net.inet6.tcp6 subtree setup")
1407 {
1408 
1409 	sysctl_net_inet_tcp_setup2(clog, PF_INET6, "inet6", "tcp6");
1410 }
1411 #endif /* INET6 */
1412 
1413