xref: /openbsd-src/sys/netinet/tcp_usrreq.c (revision 47911bd667ac77dc523b8a13ef40b012dbffa741)
1 /*	$OpenBSD: tcp_usrreq.c,v 1.67 2002/09/11 03:15:36 itojun Exp $	*/
2 /*	$NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  * 	This product includes software developed by the University of
50  * 	California, Berkeley and its contributors.
51  * 	This product includes software developed at the Information
52  * 	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/protosw.h>
81 #include <sys/stat.h>
82 #include <sys/sysctl.h>
83 #include <sys/domain.h>
84 #include <sys/kernel.h>
85 
86 #include <net/if.h>
87 #include <net/route.h>
88 
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet/ip_var.h>
95 #include <netinet/tcp.h>
96 #include <netinet/tcp_fsm.h>
97 #include <netinet/tcp_seq.h>
98 #include <netinet/tcp_timer.h>
99 #include <netinet/tcp_var.h>
100 #include <netinet/tcpip.h>
101 #include <netinet/tcp_debug.h>
102 
103 /*
104  * TCP protocol interface to socket abstraction.
105  */
106 extern	char *tcpstates[];
107 extern	int tcptv_keep_init;
108 
109 extern int tcp_rst_ppslim;
110 
111 /* from in_pcb.c */
112 extern	struct baddynamicports baddynamicports;
113 
114 int tcp_ident(void *, size_t *, void *, size_t);
115 
116 #ifdef INET6
117 int
118 tcp6_usrreq(so, req, m, nam, control, p)
119 	struct socket *so;
120 	int req;
121 	struct mbuf *m, *nam, *control;
122 	struct proc *p;
123 {
124 
125 	return tcp_usrreq(so, req, m, nam, control);
126 }
127 #endif
128 
129 /*
130  * Process a TCP user request for TCP tb.  If this is a send request
131  * then m is the mbuf chain of send data.  If this is a timer expiration
132  * (called from the software clock routine), then timertype tells which timer.
133  */
134 /*ARGSUSED*/
135 int
136 tcp_usrreq(so, req, m, nam, control)
137 	struct socket *so;
138 	int req;
139 	struct mbuf *m, *nam, *control;
140 {
141 	struct sockaddr_in *sin;
142 	register struct inpcb *inp;
143 	register struct tcpcb *tp = NULL;
144 	int s;
145 	int error = 0;
146 	int ostate;
147 
148 	if (req == PRU_CONTROL) {
149 #ifdef INET6
150 		if (sotopf(so) == PF_INET6)
151 			return in6_control(so, (u_long)m, (caddr_t)nam,
152 			    (struct ifnet *)control, 0);
153 		else
154 #endif /* INET6 */
155 			return (in_control(so, (u_long)m, (caddr_t)nam,
156 			    (struct ifnet *)control));
157 	}
158 	if (control && control->m_len) {
159 		m_freem(control);
160 		if (m)
161 			m_freem(m);
162 		return (EINVAL);
163 	}
164 
165 	s = splsoftnet();
166 	inp = sotoinpcb(so);
167 	/*
168 	 * When a TCP is attached to a socket, then there will be
169 	 * a (struct inpcb) pointed at by the socket, and this
170 	 * structure will point at a subsidary (struct tcpcb).
171 	 */
172 	if (inp == 0 && req != PRU_ATTACH) {
173 		splx(s);
174 		/*
175 		 * The following corrects an mbuf leak under rare
176 		 * circumstances
177 		 */
178 		if (m && (req == PRU_SEND || req == PRU_SENDOOB))
179 			m_freem(m);
180 		return (EINVAL);		/* XXX */
181 	}
182 	if (inp) {
183 		tp = intotcpcb(inp);
184 		/* WHAT IF TP IS 0? */
185 #ifdef KPROF
186 		tcp_acounts[tp->t_state][req]++;
187 #endif
188 		ostate = tp->t_state;
189 	} else
190 		ostate = 0;
191 	switch (req) {
192 
193 	/*
194 	 * TCP attaches to socket via PRU_ATTACH, reserving space,
195 	 * and an internet control block.
196 	 */
197 	case PRU_ATTACH:
198 		if (inp) {
199 			error = EISCONN;
200 			break;
201 		}
202 		error = tcp_attach(so);
203 		if (error)
204 			break;
205 		if ((so->so_options & SO_LINGER) && so->so_linger == 0)
206 			so->so_linger = TCP_LINGERTIME;
207 		tp = sototcpcb(so);
208 		break;
209 
210 	/*
211 	 * PRU_DETACH detaches the TCP protocol from the socket.
212 	 * If the protocol state is non-embryonic, then can't
213 	 * do this directly: have to initiate a PRU_DISCONNECT,
214 	 * which may finish later; embryonic TCB's can just
215 	 * be discarded here.
216 	 */
217 	case PRU_DETACH:
218 		tp = tcp_disconnect(tp);
219 		break;
220 
221 	/*
222 	 * Give the socket an address.
223 	 */
224 	case PRU_BIND:
225 #ifdef INET6
226 		if (inp->inp_flags & INP_IPV6)
227 			error = in6_pcbbind(inp, nam);
228 		else
229 #endif
230 			error = in_pcbbind(inp, nam);
231 		if (error)
232 			break;
233 		break;
234 
235 	/*
236 	 * Prepare to accept connections.
237 	 */
238 	case PRU_LISTEN:
239 		if (inp->inp_lport == 0) {
240 #ifdef INET6
241 			if (inp->inp_flags & INP_IPV6)
242 				error = in6_pcbbind(inp, NULL);
243 			else
244 #endif
245 				error = in_pcbbind(inp, NULL);
246 		}
247 		/* If the in_pcbbind() above is called, the tp->pf
248 		   should still be whatever it was before. */
249 		if (error == 0)
250 			tp->t_state = TCPS_LISTEN;
251 		break;
252 
253 	/*
254 	 * Initiate connection to peer.
255 	 * Create a template for use in transmissions on this connection.
256 	 * Enter SYN_SENT state, and mark socket as connecting.
257 	 * Start keep-alive timer, and seed output sequence space.
258 	 * Send initial segment on connection.
259 	 */
260 	case PRU_CONNECT:
261 		sin = mtod(nam, struct sockaddr_in *);
262 
263 #ifdef INET6
264 		if (sin->sin_family == AF_INET6) {
265 			struct in6_addr *in6_addr = &mtod(nam,
266 			    struct sockaddr_in6 *)->sin6_addr;
267 
268 			if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) ||
269 			    IN6_IS_ADDR_MULTICAST(in6_addr) ||
270 			    (IN6_IS_ADDR_V4MAPPED(in6_addr) &&
271 			    ((in6_addr->s6_addr32[3] == INADDR_ANY) ||
272 			    IN_MULTICAST(in6_addr->s6_addr32[3]) ||
273 			    in_broadcast(sin->sin_addr, NULL)))) {
274 				error = EINVAL;
275 				break;
276 			}
277 
278 			if (inp->inp_lport == 0) {
279 				error = in6_pcbbind(inp, NULL);
280 				if (error)
281 					break;
282 			}
283 			error = in6_pcbconnect(inp, nam);
284 		} else if (sin->sin_family == AF_INET)
285 #endif /* INET6 */
286 		{
287 			if ((sin->sin_addr.s_addr == INADDR_ANY) ||
288 			    IN_MULTICAST(sin->sin_addr.s_addr) ||
289 			    in_broadcast(sin->sin_addr, NULL)) {
290 				error = EINVAL;
291 				break;
292 			}
293 
294 			/* Trying to connect to some broadcast address */
295 			if (in_broadcast(sin->sin_addr, NULL)) {
296 				error = EINVAL;
297 				break;
298 			}
299 
300 			if (inp->inp_lport == 0) {
301 				error = in_pcbbind(inp, NULL);
302 				if (error)
303 					break;
304 			}
305 			error = in_pcbconnect(inp, nam);
306 		}
307 
308 		if (error)
309 			break;
310 
311 		tp->t_template = tcp_template(tp);
312 		if (tp->t_template == 0) {
313 			in_pcbdisconnect(inp);
314 			error = ENOBUFS;
315 			break;
316 		}
317 
318 		so->so_state |= SS_CONNECTOUT;
319 		/* Compute window scaling to request.  */
320 		tcp_rscale(tp, so->so_rcv.sb_hiwat);
321 
322 		soisconnecting(so);
323 		tcpstat.tcps_connattempt++;
324 		tp->t_state = TCPS_SYN_SENT;
325 		TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
326 #ifdef TCP_COMPAT_42
327 		tp->iss = tcp_iss;
328 		tcp_iss += TCP_ISSINCR/2;
329 #else  /* TCP_COMPAT_42 */
330 		tp->iss = tcp_rndiss_next();
331 #endif /* !TCP_COMPAT_42 */
332 		tcp_sendseqinit(tp);
333 #if defined(TCP_SACK)
334 		tp->snd_last = tp->snd_una;
335 #endif
336 #if defined(TCP_SACK) && defined(TCP_FACK)
337 		tp->snd_fack = tp->snd_una;
338 		tp->retran_data = 0;
339 		tp->snd_awnd = 0;
340 #endif
341 		error = tcp_output(tp);
342 		break;
343 
344 	/*
345 	 * Create a TCP connection between two sockets.
346 	 */
347 	case PRU_CONNECT2:
348 		error = EOPNOTSUPP;
349 		break;
350 
351 	/*
352 	 * Initiate disconnect from peer.
353 	 * If connection never passed embryonic stage, just drop;
354 	 * else if don't need to let data drain, then can just drop anyways,
355 	 * else have to begin TCP shutdown process: mark socket disconnecting,
356 	 * drain unread data, state switch to reflect user close, and
357 	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
358 	 * when peer sends FIN and acks ours.
359 	 *
360 	 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
361 	 */
362 	case PRU_DISCONNECT:
363 		tp = tcp_disconnect(tp);
364 		break;
365 
366 	/*
367 	 * Accept a connection.  Essentially all the work is
368 	 * done at higher levels; just return the address
369 	 * of the peer, storing through addr.
370 	 */
371 	case PRU_ACCEPT:
372 #ifdef INET6
373 		if (inp->inp_flags & INP_IPV6)
374 			in6_setpeeraddr(inp, nam);
375 		else
376 #endif
377 			in_setpeeraddr(inp, nam);
378 		break;
379 
380 	/*
381 	 * Mark the connection as being incapable of further output.
382 	 */
383 	case PRU_SHUTDOWN:
384 		if (so->so_state & SS_CANTSENDMORE)
385 			break;
386 		socantsendmore(so);
387 		tp = tcp_usrclosed(tp);
388 		if (tp)
389 			error = tcp_output(tp);
390 		break;
391 
392 	/*
393 	 * After a receive, possibly send window update to peer.
394 	 */
395 	case PRU_RCVD:
396 		/*
397 		 * soreceive() calls this function when a user receives
398 		 * ancillary data on a listening socket. We don't call
399 		 * tcp_output in such a case, since there is no header
400 		 * template for a listening socket and hence the kernel
401 		 * will panic.
402 		 */
403 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
404 			(void) tcp_output(tp);
405 		break;
406 
407 	/*
408 	 * Do a send by putting data in output queue and updating urgent
409 	 * marker if URG set.  Possibly send more data.
410 	 */
411 	case PRU_SEND:
412 		sbappendstream(&so->so_snd, m);
413 		error = tcp_output(tp);
414 		break;
415 
416 	/*
417 	 * Abort the TCP.
418 	 */
419 	case PRU_ABORT:
420 		tp = tcp_drop(tp, ECONNABORTED);
421 		break;
422 
423 	case PRU_SENSE:
424 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
425 		splx(s);
426 		return (0);
427 
428 	case PRU_RCVOOB:
429 		if ((so->so_oobmark == 0 &&
430 		    (so->so_state & SS_RCVATMARK) == 0) ||
431 		    so->so_options & SO_OOBINLINE ||
432 		    tp->t_oobflags & TCPOOB_HADDATA) {
433 			error = EINVAL;
434 			break;
435 		}
436 		if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
437 			error = EWOULDBLOCK;
438 			break;
439 		}
440 		m->m_len = 1;
441 		*mtod(m, caddr_t) = tp->t_iobc;
442 		if (((long)nam & MSG_PEEK) == 0)
443 			tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
444 		break;
445 
446 	case PRU_SENDOOB:
447 		if (sbspace(&so->so_snd) < -512) {
448 			m_freem(m);
449 			error = ENOBUFS;
450 			break;
451 		}
452 		/*
453 		 * According to RFC961 (Assigned Protocols),
454 		 * the urgent pointer points to the last octet
455 		 * of urgent data.  We continue, however,
456 		 * to consider it to indicate the first octet
457 		 * of data past the urgent section.
458 		 * Otherwise, snd_up should be one lower.
459 		 */
460 		sbappendstream(&so->so_snd, m);
461 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
462 		tp->t_force = 1;
463 		error = tcp_output(tp);
464 		tp->t_force = 0;
465 		break;
466 
467 	case PRU_SOCKADDR:
468 #ifdef INET6
469 		if (inp->inp_flags & INP_IPV6)
470 			in6_setsockaddr(inp, nam);
471 		else
472 #endif
473 			in_setsockaddr(inp, nam);
474 		break;
475 
476 	case PRU_PEERADDR:
477 #ifdef INET6
478 		if (inp->inp_flags & INP_IPV6)
479 			in6_setpeeraddr(inp, nam);
480 		else
481 #endif
482 			in_setpeeraddr(inp, nam);
483 		break;
484 
485 	default:
486 		panic("tcp_usrreq");
487 	}
488 	if (tp && (so->so_options & SO_DEBUG))
489 		tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0);
490 	splx(s);
491 	return (error);
492 }
493 
494 int
495 tcp_ctloutput(op, so, level, optname, mp)
496 	int op;
497 	struct socket *so;
498 	int level, optname;
499 	struct mbuf **mp;
500 {
501 	int error = 0, s;
502 	struct inpcb *inp;
503 	register struct tcpcb *tp;
504 	register struct mbuf *m;
505 	register int i;
506 
507 	s = splsoftnet();
508 	inp = sotoinpcb(so);
509 	if (inp == NULL) {
510 		splx(s);
511 		if (op == PRCO_SETOPT && *mp)
512 			(void) m_free(*mp);
513 		return (ECONNRESET);
514 	}
515 #ifdef INET6
516 	tp = intotcpcb(inp);
517 #endif /* INET6 */
518 	if (level != IPPROTO_TCP) {
519 		switch (so->so_proto->pr_domain->dom_family) {
520 #ifdef INET6
521 		case PF_INET6:
522 			error = ip6_ctloutput(op, so, level, optname, mp);
523 			break;
524 #endif /* INET6 */
525 		case PF_INET:
526 			error = ip_ctloutput(op, so, level, optname, mp);
527 			break;
528 		default:
529 			error = EAFNOSUPPORT;	/*?*/
530 			break;
531 		}
532 		splx(s);
533 		return (error);
534 	}
535 #ifndef INET6
536 	tp = intotcpcb(inp);
537 #endif /* !INET6 */
538 
539 	switch (op) {
540 
541 	case PRCO_SETOPT:
542 		m = *mp;
543 		switch (optname) {
544 
545 		case TCP_NODELAY:
546 			if (m == NULL || m->m_len < sizeof (int))
547 				error = EINVAL;
548 			else if (*mtod(m, int *))
549 				tp->t_flags |= TF_NODELAY;
550 			else
551 				tp->t_flags &= ~TF_NODELAY;
552 			break;
553 
554 		case TCP_MAXSEG:
555 			if (m == NULL || m->m_len < sizeof (int)) {
556 				error = EINVAL;
557 				break;
558 			}
559 
560 			i = *mtod(m, int *);
561 			if (i > 0 && i <= tp->t_maxseg)
562 				tp->t_maxseg = i;
563 			else
564 				error = EINVAL;
565 			break;
566 
567 #ifdef TCP_SACK
568 		case TCP_SACK_DISABLE:
569 			if (m == NULL || m->m_len < sizeof (int)) {
570 				error = EINVAL;
571 				break;
572 			}
573 
574 			if (TCPS_HAVEESTABLISHED(tp->t_state)) {
575 				error = EPERM;
576 				break;
577 			}
578 
579 			if (tp->t_flags & TF_SIGNATURE) {
580 				error = EPERM;
581 				break;
582 			}
583 
584 			if (*mtod(m, int *))
585 				tp->sack_disable = 1;
586 			else
587 				tp->sack_disable = 0;
588 			break;
589 #endif
590 #ifdef TCP_SIGNATURE
591 		case TCP_SIGNATURE_ENABLE:
592 			if (m == NULL || m->m_len < sizeof (int)) {
593 				error = EINVAL;
594 				break;
595 			}
596 
597 			if (TCPS_HAVEESTABLISHED(tp->t_state)) {
598 				error = EPERM;
599 				break;
600 			}
601 
602 			if (*mtod(m, int *)) {
603 				tp->t_flags |= TF_SIGNATURE;
604 #ifdef TCP_SACK
605 				tp->sack_disable = 1;
606 #endif /* TCP_SACK */
607 			} else
608 				tp->t_flags &= ~TF_SIGNATURE;
609 			break;
610 #endif /* TCP_SIGNATURE */
611 		default:
612 			error = ENOPROTOOPT;
613 			break;
614 		}
615 		if (m)
616 			(void) m_free(m);
617 		break;
618 
619 	case PRCO_GETOPT:
620 		*mp = m = m_get(M_WAIT, MT_SOOPTS);
621 		m->m_len = sizeof(int);
622 
623 		switch (optname) {
624 		case TCP_NODELAY:
625 			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
626 			break;
627 		case TCP_MAXSEG:
628 			*mtod(m, int *) = tp->t_maxseg;
629 			break;
630 #ifdef TCP_SACK
631 		case TCP_SACK_DISABLE:
632 			*mtod(m, int *) = tp->sack_disable;
633 			break;
634 #endif
635 		default:
636 			error = ENOPROTOOPT;
637 			break;
638 		}
639 		break;
640 	}
641 	splx(s);
642 	return (error);
643 }
644 
645 #ifndef TCP_SENDSPACE
646 #define	TCP_SENDSPACE	1024*16
647 #endif
648 u_int	tcp_sendspace = TCP_SENDSPACE;
649 #ifndef TCP_RECVSPACE
650 #define	TCP_RECVSPACE	1024*16
651 #endif
652 u_int	tcp_recvspace = TCP_RECVSPACE;
653 
654 /*
655  * Attach TCP protocol to socket, allocating
656  * internet protocol control block, tcp control block,
657  * bufer space, and entering LISTEN state if to accept connections.
658  */
659 int
660 tcp_attach(so)
661 	struct socket *so;
662 {
663 	register struct tcpcb *tp;
664 	struct inpcb *inp;
665 	int error;
666 
667 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
668 		error = soreserve(so, tcp_sendspace, tcp_recvspace);
669 		if (error)
670 			return (error);
671 	}
672 	error = in_pcballoc(so, &tcbtable);
673 	if (error)
674 		return (error);
675 	inp = sotoinpcb(so);
676 	tp = tcp_newtcpcb(inp);
677 	if (tp == NULL) {
678 		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
679 
680 		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
681 		in_pcbdetach(inp);
682 		so->so_state |= nofd;
683 		return (ENOBUFS);
684 	}
685 	tp->t_state = TCPS_CLOSED;
686 #ifdef INET6
687 	/* we disallow IPv4 mapped address completely. */
688 	if (inp->inp_flags & INP_IPV6)
689 		tp->pf = PF_INET6;
690 	else
691 		tp->pf = PF_INET;
692 #else
693 	tp->pf = PF_INET;
694 #endif
695 	return (0);
696 }
697 
698 /*
699  * Initiate (or continue) disconnect.
700  * If embryonic state, just send reset (once).
701  * If in ``let data drain'' option and linger null, just drop.
702  * Otherwise (hard), mark socket disconnecting and drop
703  * current input data; switch states based on user close, and
704  * send segment to peer (with FIN).
705  */
706 struct tcpcb *
707 tcp_disconnect(tp)
708 	register struct tcpcb *tp;
709 {
710 	struct socket *so = tp->t_inpcb->inp_socket;
711 
712 	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
713 		tp = tcp_close(tp);
714 	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
715 		tp = tcp_drop(tp, 0);
716 	else {
717 		soisdisconnecting(so);
718 		sbflush(&so->so_rcv);
719 		tp = tcp_usrclosed(tp);
720 		if (tp)
721 			(void) tcp_output(tp);
722 	}
723 	return (tp);
724 }
725 
726 /*
727  * User issued close, and wish to trail through shutdown states:
728  * if never received SYN, just forget it.  If got a SYN from peer,
729  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
730  * If already got a FIN from peer, then almost done; go to LAST_ACK
731  * state.  In all other cases, have already sent FIN to peer (e.g.
732  * after PRU_SHUTDOWN), and just have to play tedious game waiting
733  * for peer to send FIN or not respond to keep-alives, etc.
734  * We can let the user exit from the close as soon as the FIN is acked.
735  */
736 struct tcpcb *
737 tcp_usrclosed(tp)
738 	register struct tcpcb *tp;
739 {
740 
741 	switch (tp->t_state) {
742 
743 	case TCPS_CLOSED:
744 	case TCPS_LISTEN:
745 	case TCPS_SYN_SENT:
746 		tp->t_state = TCPS_CLOSED;
747 		tp = tcp_close(tp);
748 		break;
749 
750 	case TCPS_SYN_RECEIVED:
751 	case TCPS_ESTABLISHED:
752 		tp->t_state = TCPS_FIN_WAIT_1;
753 		break;
754 
755 	case TCPS_CLOSE_WAIT:
756 		tp->t_state = TCPS_LAST_ACK;
757 		break;
758 	}
759 	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
760 		soisdisconnected(tp->t_inpcb->inp_socket);
761 		/*
762 		 * If we are in FIN_WAIT_2, we arrived here because the
763 		 * application did a shutdown of the send side.  Like the
764 		 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
765 		 * a full close, we start a timer to make sure sockets are
766 		 * not left in FIN_WAIT_2 forever.
767 		 */
768 		if (tp->t_state == TCPS_FIN_WAIT_2)
769 			TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
770 	}
771 	return (tp);
772 }
773 
774 /*
775  * Look up a socket for ident..
776  */
777 int
778 tcp_ident(oldp, oldlenp, newp, newlen)
779 	void *oldp;
780 	size_t *oldlenp;
781 	void *newp;
782 	size_t newlen;
783 {
784 	int error = 0, s;
785 	struct tcp_ident_mapping tir;
786 	struct inpcb *inp;
787 	struct sockaddr_in *fin, *lin;
788 #ifdef INET6
789 	struct sockaddr_in6 *fin6, *lin6;
790 	struct in6_addr f6, l6;
791 #endif
792 
793 	if (oldp == NULL || newp != NULL || newlen != 0)
794 		return (EINVAL);
795 	if  (*oldlenp < sizeof(tir))
796 		return (ENOMEM);
797 	if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 )
798 		return (error);
799 	switch (tir.faddr.ss_family) {
800 #ifdef INET6
801 	case AF_INET6:
802 		fin6 = (struct sockaddr_in6 *)&tir.faddr;
803 		error = in6_embedscope(&f6, fin6, NULL, NULL);
804 		if (error)
805 			return EINVAL;	/*?*/
806 		lin6 = (struct sockaddr_in6 *)&tir.laddr;
807 		error = in6_embedscope(&l6, lin6, NULL, NULL);
808 		if (error)
809 			return EINVAL;	/*?*/
810 		break;
811 #endif
812 	case AF_INET:
813 	  	fin = (struct sockaddr_in *)&tir.faddr;
814 		lin = (struct sockaddr_in *)&tir.laddr;
815 		break;
816 	default:
817 		return (EINVAL);
818 	}
819 
820 	s = splsoftnet();
821 	switch (tir.faddr.ss_family) {
822 	case AF_INET6:
823 #ifdef INET6
824 		inp = in6_pcbhashlookup(&tcbtable, &f6,
825 		    fin6->sin6_port, &l6, lin6->sin6_port);
826 		break;
827 #endif
828 	case AF_INET:
829 		inp = in_pcbhashlookup(&tcbtable,  fin->sin_addr,
830 		    fin->sin_port, lin->sin_addr, lin->sin_port);
831 		break;
832 	}
833 
834 	if (inp == NULL) {
835 		++tcpstat.tcps_pcbhashmiss;
836 		switch (tir.faddr.ss_family) {
837 #ifdef INET6
838 		case AF_INET6:
839 			inp = in_pcblookup(&tcbtable, &f6,
840 			    fin6->sin6_port, &l6, lin6->sin6_port,
841 			    INPLOOKUP_WILDCARD | INPLOOKUP_IPV6);
842 			break;
843 #endif
844 		case AF_INET:
845 			inp = in_pcblookup(&tcbtable, &fin->sin_addr,
846 			    fin->sin_port, &lin->sin_addr, lin->sin_port,
847 			    INPLOOKUP_WILDCARD);
848 			break;
849 		}
850 	}
851 
852 	if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) {
853 		tir.ruid = inp->inp_socket->so_ruid;
854 		tir.euid = inp->inp_socket->so_euid;
855 	} else {
856 		tir.ruid = -1;
857 		tir.euid = -1;
858 	}
859 	splx(s);
860 
861 	*oldlenp = sizeof (tir);
862 	error = copyout((void *)&tir, oldp, sizeof (tir));
863 	return (error);
864 }
865 
866 /*
867  * Sysctl for tcp variables.
868  */
869 int
870 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
871 	int *name;
872 	u_int namelen;
873 	void *oldp;
874 	size_t *oldlenp;
875 	void *newp;
876 	size_t newlen;
877 {
878 
879 	/* All sysctl names at this level are terminal. */
880 	if (namelen != 1)
881 		return (ENOTDIR);
882 
883 	switch (name[0]) {
884 	case TCPCTL_RFC1323:
885 		return (sysctl_int(oldp, oldlenp, newp, newlen,
886 		    &tcp_do_rfc1323));
887 #ifdef TCP_SACK
888 	case TCPCTL_SACK:
889 		return (sysctl_int(oldp, oldlenp, newp, newlen,
890 		    &tcp_do_sack));
891 #endif
892 	case TCPCTL_MSSDFLT:
893 		return (sysctl_int(oldp, oldlenp, newp, newlen,
894 		    &tcp_mssdflt));
895 	case TCPCTL_KEEPINITTIME:
896 		return (sysctl_int(oldp, oldlenp, newp, newlen,
897 		    &tcptv_keep_init));
898 
899 	case TCPCTL_KEEPIDLE:
900 		return (sysctl_int(oldp, oldlenp, newp, newlen,
901 		    &tcp_keepidle));
902 
903 	case TCPCTL_KEEPINTVL:
904 		return (sysctl_int(oldp, oldlenp, newp, newlen,
905 		    &tcp_keepintvl));
906 
907 	case TCPCTL_SLOWHZ:
908 		return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ));
909 
910 	case TCPCTL_BADDYNAMIC:
911 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
912 		    baddynamicports.tcp, sizeof(baddynamicports.tcp)));
913 
914 	case TCPCTL_RECVSPACE:
915 		return (sysctl_int(oldp, oldlenp, newp, newlen,&tcp_recvspace));
916 
917 	case TCPCTL_SENDSPACE:
918 		return (sysctl_int(oldp, oldlenp, newp, newlen,&tcp_sendspace));
919 	case TCPCTL_IDENT:
920 		return (tcp_ident(oldp, oldlenp, newp, newlen));
921 	case TCPCTL_RSTPPSLIMIT:
922 		return (sysctl_int(oldp, oldlenp, newp, newlen,
923 		    &tcp_rst_ppslim));
924 	case TCPCTL_ACK_ON_PUSH:
925 		return (sysctl_int(oldp, oldlenp, newp, newlen,
926 		    &tcp_ack_on_push));
927 #ifdef TCP_ECN
928 	case TCPCTL_ECN:
929 		return (sysctl_int(oldp, oldlenp, newp, newlen,
930 		   &tcp_do_ecn));
931 #endif
932 	default:
933 		return (ENOPROTOOPT);
934 	}
935 	/* NOTREACHED */
936 }
937