xref: /netbsd-src/sys/netinet/tcp_usrreq.c (revision 481fca6e59249d8ffcf24fef7cfbe7b131bfb080)
1 /*	$NetBSD: tcp_usrreq.c,v 1.52 2000/06/28 03:01:17 mrg Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to The NetBSD Foundation
37  * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
38  * Facility, NASA Ames Research Center.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *	This product includes software developed by the NetBSD
51  *	Foundation, Inc. and its contributors.
52  * 4. Neither the name of The NetBSD Foundation nor the names of its
53  *    contributors may be used to endorse or promote products derived
54  *    from this software without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 /*
70  * Copyright (c) 1982, 1986, 1988, 1993, 1995
71  *	The Regents of the University of California.  All rights reserved.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. All advertising materials mentioning features or use of this software
82  *    must display the following acknowledgement:
83  *	This product includes software developed by the University of
84  *	California, Berkeley and its contributors.
85  * 4. Neither the name of the University nor the names of its contributors
86  *    may be used to endorse or promote products derived from this software
87  *    without specific prior written permission.
88  *
89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  *
101  *	@(#)tcp_usrreq.c	8.5 (Berkeley) 6/21/95
102  */
103 
104 #include "opt_inet.h"
105 #include "opt_ipsec.h"
106 
107 #include <sys/param.h>
108 #include <sys/systm.h>
109 #include <sys/kernel.h>
110 #include <sys/malloc.h>
111 #include <sys/mbuf.h>
112 #include <sys/socket.h>
113 #include <sys/socketvar.h>
114 #include <sys/protosw.h>
115 #include <sys/errno.h>
116 #include <sys/stat.h>
117 #include <sys/proc.h>
118 #include <sys/ucred.h>
119 #include <sys/domain.h>
120 
121 #include <uvm/uvm_extern.h>
122 #include <sys/sysctl.h>
123 
124 #include <net/if.h>
125 #include <net/route.h>
126 
127 #include <netinet/in.h>
128 #include <netinet/in_systm.h>
129 #include <netinet/in_var.h>
130 #include <netinet/ip.h>
131 #include <netinet/in_pcb.h>
132 #include <netinet/ip_var.h>
133 
134 #ifdef INET6
135 #ifndef INET
136 #include <netinet/in.h>
137 #endif
138 #include <netinet/ip6.h>
139 #include <netinet6/in6_pcb.h>
140 #include <netinet6/ip6_var.h>
141 #endif
142 
143 #include <netinet/tcp.h>
144 #include <netinet/tcp_fsm.h>
145 #include <netinet/tcp_seq.h>
146 #include <netinet/tcp_timer.h>
147 #include <netinet/tcp_var.h>
148 #include <netinet/tcpip.h>
149 #include <netinet/tcp_debug.h>
150 
151 #include "opt_tcp_recvspace.h"
152 #include "opt_tcp_sendspace.h"
153 
154 #ifdef IPSEC
155 #include <netinet6/ipsec.h>
156 #endif /*IPSEC*/
157 
158 /*
159  * TCP protocol interface to socket abstraction.
160  */
161 extern	char *tcpstates[];
162 
163 /*
164  * Process a TCP user request for TCP tb.  If this is a send request
165  * then m is the mbuf chain of send data.  If this is a timer expiration
166  * (called from the software clock routine), then timertype tells which timer.
167  */
168 /*ARGSUSED*/
169 int
170 tcp_usrreq(so, req, m, nam, control, p)
171 	struct socket *so;
172 	int req;
173 	struct mbuf *m, *nam, *control;
174 	struct proc *p;
175 {
176 	struct inpcb *inp;
177 #ifdef INET6
178 	struct in6pcb *in6p;
179 #endif
180 	struct tcpcb *tp = NULL;
181 	int s;
182 	int error = 0;
183 	int ostate;
184 	int family;	/* family of the socket */
185 
186 	family = so->so_proto->pr_domain->dom_family;
187 
188 	if (req == PRU_CONTROL) {
189 		switch (family) {
190 		case PF_INET:
191 			return (in_control(so, (long)m, (caddr_t)nam,
192 			    (struct ifnet *)control, p));
193 #ifdef INET6
194 		case PF_INET6:
195 			return (in6_control(so, (long)m, (caddr_t)nam,
196 			    (struct ifnet *)control, p));
197 #endif
198 		default:
199 			return EAFNOSUPPORT;
200 		}
201 	}
202 
203 	if (req == PRU_PURGEIF) {
204 		in_purgeif((struct ifnet *)control);
205 		in_pcbpurgeif(&tcbtable, (struct ifnet *)control);
206 #ifdef INET6
207 		in6_purgeif((struct ifnet *)control);
208 		in6_pcbpurgeif(&tcb6, (struct ifnet *)control);
209 #endif
210 		return (0);
211 	}
212 
213 	s = splsoftnet();
214 	switch (family) {
215 	case PF_INET:
216 		inp = sotoinpcb(so);
217 #ifdef INET6
218 		in6p = NULL;
219 #endif
220 		break;
221 #ifdef INET6
222 	case PF_INET6:
223 		inp = NULL;
224 		in6p = sotoin6pcb(so);
225 		break;
226 #endif
227 	default:
228 		splx(s);
229 		return EAFNOSUPPORT;
230 	}
231 
232 #ifdef DIAGNOSTIC
233 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
234 		panic("tcp_usrreq: unexpected control mbuf");
235 #endif
236 	/*
237 	 * When a TCP is attached to a socket, then there will be
238 	 * a (struct inpcb) pointed at by the socket, and this
239 	 * structure will point at a subsidary (struct tcpcb).
240 	 */
241 #ifndef INET6
242 	if (inp == 0 && req != PRU_ATTACH)
243 #else
244 	if ((inp == 0 && in6p == 0) && req != PRU_ATTACH)
245 #endif
246 	{
247 		error = EINVAL;
248 		goto release;
249 	}
250 	if (inp) {
251 		tp = intotcpcb(inp);
252 		/* WHAT IF TP IS 0? */
253 #ifdef KPROF
254 		tcp_acounts[tp->t_state][req]++;
255 #endif
256 		ostate = tp->t_state;
257 	}
258 #ifdef INET6
259 	else if (in6p) {
260 		tp = in6totcpcb(in6p);
261 		/* WHAT IF TP IS 0? */
262 #ifdef KPROF
263 		tcp_acounts[tp->t_state][req]++;
264 #endif
265 		ostate = tp->t_state;
266 	}
267 #endif
268 	else
269 		ostate = 0;
270 
271 	switch (req) {
272 
273 	/*
274 	 * TCP attaches to socket via PRU_ATTACH, reserving space,
275 	 * and an internet control block.
276 	 */
277 	case PRU_ATTACH:
278 #ifndef INET6
279 		if (inp != 0)
280 #else
281 		if (inp != 0 || in6p != 0)
282 #endif
283 		{
284 			error = EISCONN;
285 			break;
286 		}
287 		error = tcp_attach(so);
288 		if (error)
289 			break;
290 		if ((so->so_options & SO_LINGER) && so->so_linger == 0)
291 			so->so_linger = TCP_LINGERTIME;
292 		tp = sototcpcb(so);
293 		break;
294 
295 	/*
296 	 * PRU_DETACH detaches the TCP protocol from the socket.
297 	 */
298 	case PRU_DETACH:
299 		tp = tcp_disconnect(tp);
300 		break;
301 
302 	/*
303 	 * Give the socket an address.
304 	 */
305 	case PRU_BIND:
306 		switch (family) {
307 		case PF_INET:
308 			error = in_pcbbind(inp, nam, p);
309 			break;
310 #ifdef INET6
311 		case PF_INET6:
312 			error = in6_pcbbind(in6p, nam, p);
313 			/* mapped addr case */
314 			if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr))
315 				tp->t_family = AF_INET;
316 			break;
317 #endif
318 		}
319 		break;
320 
321 	/*
322 	 * Prepare to accept connections.
323 	 */
324 	case PRU_LISTEN:
325 		if (inp && inp->inp_lport == 0) {
326 			error = in_pcbbind(inp, (struct mbuf *)0,
327 			    (struct proc *)0);
328 			if (error)
329 				break;
330 		}
331 #ifdef INET6
332 		else if (in6p && in6p->in6p_lport == 0) {
333 			error = in6_pcbbind(in6p, (struct mbuf *)0,
334 			    (struct proc *)0);
335 			if (error)
336 				break;
337 		}
338 #endif
339 		tp->t_state = TCPS_LISTEN;
340 		break;
341 
342 	/*
343 	 * Initiate connection to peer.
344 	 * Create a template for use in transmissions on this connection.
345 	 * Enter SYN_SENT state, and mark socket as connecting.
346 	 * Start keep-alive timer, and seed output sequence space.
347 	 * Send initial segment on connection.
348 	 */
349 	case PRU_CONNECT:
350 		if (inp) {
351 			if (inp->inp_lport == 0) {
352 				error = in_pcbbind(inp, (struct mbuf *)0,
353 				    (struct proc *)0);
354 				if (error)
355 					break;
356 			}
357 			error = in_pcbconnect(inp, nam);
358 		}
359 #ifdef INET6
360 		else if (in6p) {
361 			if (in6p->in6p_lport == 0) {
362 				error = in6_pcbbind(in6p, (struct mbuf *)0,
363 				    (struct proc *)0);
364 				if (error)
365 					break;
366 			}
367 			error = in6_pcbconnect(in6p, nam);
368 			/* mapped addr case */
369 			if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr))
370 				tp->t_family = AF_INET;
371 		}
372 #endif
373 		if (error)
374 			break;
375 		tp->t_template = tcp_template(tp);
376 		if (tp->t_template == 0) {
377 			if (inp)
378 				in_pcbdisconnect(inp);
379 #ifdef INET6
380 			else if (in6p)
381 				in6_pcbdisconnect(in6p);
382 #endif
383 			error = ENOBUFS;
384 			break;
385 		}
386 		/* Compute window scaling to request.  */
387 		while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
388 		    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
389 			tp->request_r_scale++;
390 		soisconnecting(so);
391 		tcpstat.tcps_connattempt++;
392 		tp->t_state = TCPS_SYN_SENT;
393 		TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT);
394 		tp->iss = tcp_new_iss(tp, sizeof(struct tcpcb), 0);
395 		tcp_sendseqinit(tp);
396 		error = tcp_output(tp);
397 		break;
398 
399 	/*
400 	 * Create a TCP connection between two sockets.
401 	 */
402 	case PRU_CONNECT2:
403 		error = EOPNOTSUPP;
404 		break;
405 
406 	/*
407 	 * Initiate disconnect from peer.
408 	 * If connection never passed embryonic stage, just drop;
409 	 * else if don't need to let data drain, then can just drop anyways,
410 	 * else have to begin TCP shutdown process: mark socket disconnecting,
411 	 * drain unread data, state switch to reflect user close, and
412 	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
413 	 * when peer sends FIN and acks ours.
414 	 *
415 	 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
416 	 */
417 	case PRU_DISCONNECT:
418 		tp = tcp_disconnect(tp);
419 		break;
420 
421 	/*
422 	 * Accept a connection.  Essentially all the work is
423 	 * done at higher levels; just return the address
424 	 * of the peer, storing through addr.
425 	 */
426 	case PRU_ACCEPT:
427 		if (inp)
428 			in_setpeeraddr(inp, nam);
429 #ifdef INET6
430 		else if (in6p)
431 			in6_setpeeraddr(in6p, nam);
432 #endif
433 		break;
434 
435 	/*
436 	 * Mark the connection as being incapable of further output.
437 	 */
438 	case PRU_SHUTDOWN:
439 		socantsendmore(so);
440 		tp = tcp_usrclosed(tp);
441 		if (tp)
442 			error = tcp_output(tp);
443 		break;
444 
445 	/*
446 	 * After a receive, possibly send window update to peer.
447 	 */
448 	case PRU_RCVD:
449 		(void) tcp_output(tp);
450 		break;
451 
452 	/*
453 	 * Do a send by putting data in output queue and updating urgent
454 	 * marker if URG set.  Possibly send more data.
455 	 */
456 	case PRU_SEND:
457 		if (control && control->m_len) {
458 			m_freem(control);
459 			m_freem(m);
460 			error = EINVAL;
461 			break;
462 		}
463 		sbappend(&so->so_snd, m);
464 		error = tcp_output(tp);
465 		break;
466 
467 	/*
468 	 * Abort the TCP.
469 	 */
470 	case PRU_ABORT:
471 		tp = tcp_drop(tp, ECONNABORTED);
472 		break;
473 
474 	case PRU_SENSE:
475 		/*
476 		 * stat: don't bother with a blocksize.
477 		 */
478 		splx(s);
479 		return (0);
480 
481 	case PRU_RCVOOB:
482 		if (control && control->m_len) {
483 			m_freem(control);
484 			m_freem(m);
485 			error = EINVAL;
486 			break;
487 		}
488 		if ((so->so_oobmark == 0 &&
489 		    (so->so_state & SS_RCVATMARK) == 0) ||
490 		    so->so_options & SO_OOBINLINE ||
491 		    tp->t_oobflags & TCPOOB_HADDATA) {
492 			error = EINVAL;
493 			break;
494 		}
495 		if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
496 			error = EWOULDBLOCK;
497 			break;
498 		}
499 		m->m_len = 1;
500 		*mtod(m, caddr_t) = tp->t_iobc;
501 		if (((long)nam & MSG_PEEK) == 0)
502 			tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
503 		break;
504 
505 	case PRU_SENDOOB:
506 		if (sbspace(&so->so_snd) < -512) {
507 			m_freem(m);
508 			error = ENOBUFS;
509 			break;
510 		}
511 		/*
512 		 * According to RFC961 (Assigned Protocols),
513 		 * the urgent pointer points to the last octet
514 		 * of urgent data.  We continue, however,
515 		 * to consider it to indicate the first octet
516 		 * of data past the urgent section.
517 		 * Otherwise, snd_up should be one lower.
518 		 */
519 		sbappend(&so->so_snd, m);
520 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
521 		tp->t_force = 1;
522 		error = tcp_output(tp);
523 		tp->t_force = 0;
524 		break;
525 
526 	case PRU_SOCKADDR:
527 		if (inp)
528 			in_setsockaddr(inp, nam);
529 #ifdef INET6
530 		else if (in6p)
531 			in6_setsockaddr(in6p, nam);
532 #endif
533 		break;
534 
535 	case PRU_PEERADDR:
536 		if (inp)
537 			in_setpeeraddr(inp, nam);
538 #ifdef INET6
539 		else if (in6p)
540 			in6_setpeeraddr(in6p, nam);
541 #endif
542 		break;
543 
544 	/*
545 	 * TCP slow timer went off; going through this
546 	 * routine for tracing's sake.
547 	 */
548 	case PRU_SLOWTIMO:
549 		tp = tcp_timers(tp, (long)nam);
550 		req |= (long)nam << 8;		/* for debug's sake */
551 		break;
552 
553 	default:
554 		panic("tcp_usrreq");
555 	}
556 	if (tp && (so->so_options & SO_DEBUG))
557 		tcp_trace(TA_USER, ostate, tp, NULL, req);
558 
559 release:
560 	splx(s);
561 	return (error);
562 }
563 
564 int
565 tcp_ctloutput(op, so, level, optname, mp)
566 	int op;
567 	struct socket *so;
568 	int level, optname;
569 	struct mbuf **mp;
570 {
571 	int error = 0, s;
572 	struct inpcb *inp;
573 #ifdef INET6
574 	struct in6pcb *in6p;
575 #endif
576 	struct tcpcb *tp;
577 	struct mbuf *m;
578 	int i;
579 	int family;	/* family of the socket */
580 
581 	family = so->so_proto->pr_domain->dom_family;
582 
583 	s = splsoftnet();
584 	switch (family) {
585 	case PF_INET:
586 		inp = sotoinpcb(so);
587 #ifdef INET6
588 		in6p = NULL;
589 #endif
590 		break;
591 #ifdef INET6
592 	case PF_INET6:
593 		inp = NULL;
594 		in6p = sotoin6pcb(so);
595 		break;
596 #endif
597 	default:
598 		splx(s);
599 		return EAFNOSUPPORT;
600 	}
601 #ifndef INET6
602 	if (inp == NULL)
603 #else
604 	if (inp == NULL && in6p == NULL)
605 #endif
606 	{
607 		splx(s);
608 		if (op == PRCO_SETOPT && *mp)
609 			(void) m_free(*mp);
610 		return (ECONNRESET);
611 	}
612 	if (level != IPPROTO_TCP) {
613 		switch (family) {
614 		case PF_INET:
615 			error = ip_ctloutput(op, so, level, optname, mp);
616 			break;
617 #ifdef INET6
618 		case PF_INET6:
619 			error = ip6_ctloutput(op, so, level, optname, mp);
620 			break;
621 #endif
622 		}
623 		splx(s);
624 		return (error);
625 	}
626 	if (inp)
627 		tp = intotcpcb(inp);
628 #ifdef INET6
629 	else if (in6p)
630 		tp = in6totcpcb(in6p);
631 #endif
632 	else
633 		tp = NULL;
634 
635 	switch (op) {
636 
637 	case PRCO_SETOPT:
638 		m = *mp;
639 		switch (optname) {
640 
641 		case TCP_NODELAY:
642 			if (m == NULL || m->m_len < sizeof (int))
643 				error = EINVAL;
644 			else if (*mtod(m, int *))
645 				tp->t_flags |= TF_NODELAY;
646 			else
647 				tp->t_flags &= ~TF_NODELAY;
648 			break;
649 
650 		case TCP_MAXSEG:
651 			if (m && (i = *mtod(m, int *)) > 0 &&
652 			    i <= tp->t_peermss)
653 				tp->t_peermss = i;  /* limit on send size */
654 			else
655 				error = EINVAL;
656 			break;
657 
658 		default:
659 			error = ENOPROTOOPT;
660 			break;
661 		}
662 		if (m)
663 			(void) m_free(m);
664 		break;
665 
666 	case PRCO_GETOPT:
667 		*mp = m = m_get(M_WAIT, MT_SOOPTS);
668 		m->m_len = sizeof(int);
669 
670 		switch (optname) {
671 		case TCP_NODELAY:
672 			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
673 			break;
674 		case TCP_MAXSEG:
675 			*mtod(m, int *) = tp->t_peermss;
676 			break;
677 		default:
678 			error = ENOPROTOOPT;
679 			break;
680 		}
681 		break;
682 	}
683 	splx(s);
684 	return (error);
685 }
686 
687 #ifndef TCP_SENDSPACE
688 #define	TCP_SENDSPACE	1024*16;
689 #endif
690 int	tcp_sendspace = TCP_SENDSPACE;
691 #ifndef TCP_RECVSPACE
692 #define	TCP_RECVSPACE	1024*16;
693 #endif
694 int	tcp_recvspace = TCP_RECVSPACE;
695 
696 /*
697  * Attach TCP protocol to socket, allocating
698  * internet protocol control block, tcp control block,
699  * bufer space, and entering LISTEN state if to accept connections.
700  */
701 int
702 tcp_attach(so)
703 	struct socket *so;
704 {
705 	struct tcpcb *tp;
706 	struct inpcb *inp;
707 #ifdef INET6
708 	struct in6pcb *in6p;
709 #endif
710 	int error;
711 	int family;	/* family of the socket */
712 
713 	family = so->so_proto->pr_domain->dom_family;
714 
715 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
716 		error = soreserve(so, tcp_sendspace, tcp_recvspace);
717 		if (error)
718 			return (error);
719 	}
720 	switch (family) {
721 	case PF_INET:
722 		error = in_pcballoc(so, &tcbtable);
723 		if (error)
724 			return (error);
725 		inp = sotoinpcb(so);
726 #ifdef INET6
727 		in6p = NULL;
728 #endif
729 		break;
730 #ifdef INET6
731 	case PF_INET6:
732 		error = in6_pcballoc(so, &tcb6);
733 		if (error)
734 			return (error);
735 		inp = NULL;
736 		in6p = sotoin6pcb(so);
737 		break;
738 #endif
739 	default:
740 		return EAFNOSUPPORT;
741 	}
742 #ifdef IPSEC
743 	if (inp) {
744 		error = ipsec_init_policy(so, &inp->inp_sp);
745 		if (error != 0) {
746 			in_pcbdetach(inp);
747 			return (error);
748 		}
749 	}
750 #ifdef INET6
751 	else if (in6p) {
752 		error = ipsec_init_policy(so, &in6p->in6p_sp);
753 		if (error != 0) {
754 			in6_pcbdetach(in6p);
755 			return (error);
756 		}
757 	}
758 #endif
759 #endif /*IPSEC*/
760 	if (inp)
761 		tp = tcp_newtcpcb(family, (void *)inp);
762 #ifdef INET6
763 	else if (in6p)
764 		tp = tcp_newtcpcb(family, (void *)in6p);
765 #endif
766 	else
767 		tp = NULL;
768 
769 	if (tp == 0) {
770 		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
771 
772 		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
773 		if (inp)
774 			in_pcbdetach(inp);
775 #ifdef INET6
776 		else if (in6p)
777 			in6_pcbdetach(in6p);
778 #endif
779 		so->so_state |= nofd;
780 		return (ENOBUFS);
781 	}
782 	tp->t_state = TCPS_CLOSED;
783 	return (0);
784 }
785 
786 /*
787  * Initiate (or continue) disconnect.
788  * If embryonic state, just send reset (once).
789  * If in ``let data drain'' option and linger null, just drop.
790  * Otherwise (hard), mark socket disconnecting and drop
791  * current input data; switch states based on user close, and
792  * send segment to peer (with FIN).
793  */
794 struct tcpcb *
795 tcp_disconnect(tp)
796 	struct tcpcb *tp;
797 {
798 	struct socket *so;
799 
800 	if (tp->t_inpcb)
801 		so = tp->t_inpcb->inp_socket;
802 #ifdef INET6
803 	else if (tp->t_in6pcb)
804 		so = tp->t_in6pcb->in6p_socket;
805 #endif
806 	else
807 		so = NULL;
808 
809 	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
810 		tp = tcp_close(tp);
811 	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
812 		tp = tcp_drop(tp, 0);
813 	else {
814 		soisdisconnecting(so);
815 		sbflush(&so->so_rcv);
816 		tp = tcp_usrclosed(tp);
817 		if (tp)
818 			(void) tcp_output(tp);
819 	}
820 	return (tp);
821 }
822 
823 /*
824  * User issued close, and wish to trail through shutdown states:
825  * if never received SYN, just forget it.  If got a SYN from peer,
826  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
827  * If already got a FIN from peer, then almost done; go to LAST_ACK
828  * state.  In all other cases, have already sent FIN to peer (e.g.
829  * after PRU_SHUTDOWN), and just have to play tedious game waiting
830  * for peer to send FIN or not respond to keep-alives, etc.
831  * We can let the user exit from the close as soon as the FIN is acked.
832  */
833 struct tcpcb *
834 tcp_usrclosed(tp)
835 	struct tcpcb *tp;
836 {
837 
838 	switch (tp->t_state) {
839 
840 	case TCPS_CLOSED:
841 	case TCPS_LISTEN:
842 	case TCPS_SYN_SENT:
843 		tp->t_state = TCPS_CLOSED;
844 		tp = tcp_close(tp);
845 		break;
846 
847 	case TCPS_SYN_RECEIVED:
848 	case TCPS_ESTABLISHED:
849 		tp->t_state = TCPS_FIN_WAIT_1;
850 		break;
851 
852 	case TCPS_CLOSE_WAIT:
853 		tp->t_state = TCPS_LAST_ACK;
854 		break;
855 	}
856 	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
857 		struct socket *so;
858 		if (tp->t_inpcb)
859 			so = tp->t_inpcb->inp_socket;
860 #ifdef INET6
861 		else if (tp->t_in6pcb)
862 			so = tp->t_in6pcb->in6p_socket;
863 #endif
864 		else
865 			so = NULL;
866 		soisdisconnected(so);
867 		/*
868 		 * If we are in FIN_WAIT_2, we arrived here because the
869 		 * application did a shutdown of the send side.  Like the
870 		 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
871 		 * a full close, we start a timer to make sure sockets are
872 		 * not left in FIN_WAIT_2 forever.
873 		 */
874 		if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0))
875 			TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
876 	}
877 	return (tp);
878 }
879 
880 static struct {
881 	 unsigned int valid : 1;
882 	 unsigned int rdonly : 1;
883 	 int *var;
884 	 int val;
885 	 } tcp_ctlvars[] = TCPCTL_VARIABLES;
886 
887 /*
888  * Sysctl for tcp variables.
889  */
890 int
891 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
892 	int *name;
893 	u_int namelen;
894 	void *oldp;
895 	size_t *oldlenp;
896 	void *newp;
897 	size_t newlen;
898 {
899 
900 	/* All sysctl names at this level are terminal. */
901 	if (namelen != 1)
902 		return (ENOTDIR);
903 
904 	/*
905 	 * The sysctl specifies usec-between-RST, so we must
906 	 * convert from/to a timeval.
907 	 */
908 	if (name[0] == TCPCTL_RSTRATELIMIT) {
909 		int rate_usec, error, s;
910 
911 		rate_usec = (tcp_rst_ratelim.tv_sec * 1000000) +
912 		    tcp_rst_ratelim.tv_usec;
913 		error = sysctl_int(oldp, oldlenp, newp, newlen, &rate_usec);
914 		if (error)
915 			return (error);
916 		if (rate_usec < 0)
917 			return (EINVAL);
918 		s = splsoftnet();
919 		tcp_rst_ratelim.tv_sec = rate_usec / 1000000;
920 		tcp_rst_ratelim.tv_usec = rate_usec % 1000000;
921 		splx(s);
922 
923 		return (0);
924 	}
925 
926 	if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0])
927 	    && tcp_ctlvars[name[0]].valid) {
928 		if (tcp_ctlvars[name[0]].rdonly)
929 			return (sysctl_rdint(oldp, oldlenp, newp,
930 			    tcp_ctlvars[name[0]].val));
931 		else
932 			return (sysctl_int(oldp, oldlenp, newp, newlen,
933 			    tcp_ctlvars[name[0]].var));
934 	}
935 
936 	return (ENOPROTOOPT);
937 }
938