xref: /openbsd-src/sys/netinet/tcp_usrreq.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: tcp_usrreq.c,v 1.54 2001/06/26 06:55:32 aaron Exp $	*/
2 /*	$NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  * 	This product includes software developed by the University of
50  * 	California, Berkeley and its contributors.
51  * 	This product includes software developed at the Information
52  * 	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/protosw.h>
81 #include <sys/stat.h>
82 #include <sys/sysctl.h>
83 #include <sys/domain.h>
84 
85 #include <net/if.h>
86 #include <net/route.h>
87 
88 #include <netinet/in.h>
89 #include <netinet/in_systm.h>
90 #include <netinet/in_var.h>
91 #include <netinet/ip.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/ip_var.h>
94 #include <netinet/tcp.h>
95 #include <netinet/tcp_fsm.h>
96 #include <netinet/tcp_seq.h>
97 #include <netinet/tcp_timer.h>
98 #include <netinet/tcp_var.h>
99 #include <netinet/tcpip.h>
100 #include <netinet/tcp_debug.h>
101 
102 /*
103  * TCP protocol interface to socket abstraction.
104  */
105 extern	char *tcpstates[];
106 extern	int tcptv_keep_init;
107 
108 extern int tcp_rst_ppslim;
109 
110 /* from in_pcb.c */
111 extern	struct baddynamicports baddynamicports;
112 
113 int tcp_ident __P((void *, size_t *, void *, size_t));
114 
115 #ifdef INET6
116 int
117 tcp6_usrreq(so, req, m, nam, control, p)
118 	struct socket *so;
119 	int req;
120 	struct mbuf *m, *nam, *control;
121 	struct proc *p;
122 {
123 
124 	return tcp_usrreq(so, req, m, nam, control);
125 }
126 #endif
127 
128 /*
129  * Process a TCP user request for TCP tb.  If this is a send request
130  * then m is the mbuf chain of send data.  If this is a timer expiration
131  * (called from the software clock routine), then timertype tells which timer.
132  */
133 /*ARGSUSED*/
134 int
135 tcp_usrreq(so, req, m, nam, control)
136 	struct socket *so;
137 	int req;
138 	struct mbuf *m, *nam, *control;
139 {
140 	struct sockaddr_in *sin;
141 	register struct inpcb *inp;
142 	register struct tcpcb *tp = NULL;
143 	int s;
144 	int error = 0;
145 	int ostate;
146 
147 	if (req == PRU_CONTROL) {
148 #ifdef INET6
149 		if (sotopf(so) == PF_INET6)
150 			return in6_control(so, (u_long)m, (caddr_t)nam,
151 			    (struct ifnet *)control, 0);
152 		else
153 #endif /* INET6 */
154 			return (in_control(so, (u_long)m, (caddr_t)nam,
155 			    (struct ifnet *)control));
156 	}
157 	if (control && control->m_len) {
158 		m_freem(control);
159 		if (m)
160 			m_freem(m);
161 		return (EINVAL);
162 	}
163 
164 	s = splsoftnet();
165 	inp = sotoinpcb(so);
166 	/*
167 	 * When a TCP is attached to a socket, then there will be
168 	 * a (struct inpcb) pointed at by the socket, and this
169 	 * structure will point at a subsidary (struct tcpcb).
170 	 */
171 	if (inp == 0 && req != PRU_ATTACH) {
172 		splx(s);
173 		/*
174 		 * The following corrects an mbuf leak under rare
175 		 * circumstances
176 		 */
177 		if (m && (req == PRU_SEND || req == PRU_SENDOOB))
178 			m_freem(m);
179 		return (EINVAL);		/* XXX */
180 	}
181 	if (inp) {
182 		tp = intotcpcb(inp);
183 		/* WHAT IF TP IS 0? */
184 #ifdef KPROF
185 		tcp_acounts[tp->t_state][req]++;
186 #endif
187 		ostate = tp->t_state;
188 	} else
189 		ostate = 0;
190 	switch (req) {
191 
192 	/*
193 	 * TCP attaches to socket via PRU_ATTACH, reserving space,
194 	 * and an internet control block.
195 	 */
196 	case PRU_ATTACH:
197 		if (inp) {
198 			error = EISCONN;
199 			break;
200 		}
201 		error = tcp_attach(so);
202 		if (error)
203 			break;
204 		if ((so->so_options & SO_LINGER) && so->so_linger == 0)
205 			so->so_linger = TCP_LINGERTIME;
206 		tp = sototcpcb(so);
207 		break;
208 
209 	/*
210 	 * PRU_DETACH detaches the TCP protocol from the socket.
211 	 * If the protocol state is non-embryonic, then can't
212 	 * do this directly: have to initiate a PRU_DISCONNECT,
213 	 * which may finish later; embryonic TCB's can just
214 	 * be discarded here.
215 	 */
216 	case PRU_DETACH:
217 		tp = tcp_disconnect(tp);
218 		break;
219 
220 	/*
221 	 * Give the socket an address.
222 	 */
223 	case PRU_BIND:
224 #ifdef INET6
225 		if (inp->inp_flags & INP_IPV6)
226 			error = in6_pcbbind(inp, nam);
227 		else
228 #endif
229 			error = in_pcbbind(inp, nam);
230 		if (error)
231 			break;
232 		break;
233 
234 	/*
235 	 * Prepare to accept connections.
236 	 */
237 	case PRU_LISTEN:
238 		if (inp->inp_lport == 0) {
239 #ifdef INET6
240 			if (inp->inp_flags & INP_IPV6)
241 				error = in6_pcbbind(inp, NULL);
242 			else
243 #endif
244 				error = in_pcbbind(inp, NULL);
245 		}
246 		/* If the in_pcbbind() above is called, the tp->pf
247 		   should still be whatever it was before. */
248 		if (error == 0)
249 			tp->t_state = TCPS_LISTEN;
250 		break;
251 
252 	/*
253 	 * Initiate connection to peer.
254 	 * Create a template for use in transmissions on this connection.
255 	 * Enter SYN_SENT state, and mark socket as connecting.
256 	 * Start keep-alive timer, and seed output sequence space.
257 	 * Send initial segment on connection.
258 	 */
259 	case PRU_CONNECT:
260 		sin = mtod(nam, struct sockaddr_in *);
261 
262 #ifdef INET6
263 		if (sin->sin_family == AF_INET6) {
264 			struct in6_addr *in6_addr = &mtod(nam,
265 			    struct sockaddr_in6 *)->sin6_addr;
266 
267 			if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) ||
268 			    IN6_IS_ADDR_MULTICAST(in6_addr) ||
269 			    (IN6_IS_ADDR_V4MAPPED(in6_addr) &&
270 			    ((in6_addr->s6_addr32[3] == INADDR_ANY) ||
271 			    IN_MULTICAST(in6_addr->s6_addr32[3]) ||
272 			    in_broadcast(sin->sin_addr, NULL)))) {
273 				error = EINVAL;
274 				break;
275 			}
276 
277 			if (inp->inp_lport == 0) {
278 				error = in6_pcbbind(inp, NULL);
279 				if (error)
280 					break;
281 			}
282 			error = in6_pcbconnect(inp, nam);
283 		} else if (sin->sin_family == AF_INET)
284 #endif /* INET6 */
285 		{
286 			if ((sin->sin_addr.s_addr == INADDR_ANY) ||
287 			    IN_MULTICAST(sin->sin_addr.s_addr) ||
288 			    in_broadcast(sin->sin_addr, NULL)) {
289 				error = EINVAL;
290 				break;
291 			}
292 
293 			/* Trying to connect to some broadcast address */
294 			if (in_broadcast(sin->sin_addr, NULL)) {
295 				error = EINVAL;
296 				break;
297 			}
298 
299 			if (inp->inp_lport == 0) {
300 				error = in_pcbbind(inp, NULL);
301 				if (error)
302 					break;
303 			}
304 			error = in_pcbconnect(inp, nam);
305 		}
306 
307 		if (error)
308 			break;
309 
310 		tp->t_template = tcp_template(tp);
311 		if (tp->t_template == 0) {
312 			in_pcbdisconnect(inp);
313 			error = ENOBUFS;
314 			break;
315 		}
316 
317 		so->so_state |= SS_CONNECTOUT;
318 		/* Compute window scaling to request.  */
319 		tcp_rscale(tp, so->so_rcv.sb_hiwat);
320 
321 		soisconnecting(so);
322 		tcpstat.tcps_connattempt++;
323 		tp->t_state = TCPS_SYN_SENT;
324 		tp->t_timer[TCPT_KEEP] = tcptv_keep_init;
325 #ifdef TCP_COMPAT_42
326 		tp->iss = tcp_iss;
327 		tcp_iss += TCP_ISSINCR/2;
328 #else  /* TCP_COMPAT_42 */
329 		tp->iss = tcp_rndiss_next();
330 #endif /* !TCP_COMPAT_42 */
331 		tcp_sendseqinit(tp);
332 #if defined(TCP_SACK)
333 		tp->snd_last = tp->snd_una;
334 #endif
335 #if defined(TCP_SACK) && defined(TCP_FACK)
336 		tp->snd_fack = tp->snd_una;
337 		tp->retran_data = 0;
338 		tp->snd_awnd = 0;
339 #endif
340 		error = tcp_output(tp);
341 		break;
342 
343 	/*
344 	 * Create a TCP connection between two sockets.
345 	 */
346 	case PRU_CONNECT2:
347 		error = EOPNOTSUPP;
348 		break;
349 
350 	/*
351 	 * Initiate disconnect from peer.
352 	 * If connection never passed embryonic stage, just drop;
353 	 * else if don't need to let data drain, then can just drop anyways,
354 	 * else have to begin TCP shutdown process: mark socket disconnecting,
355 	 * drain unread data, state switch to reflect user close, and
356 	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
357 	 * when peer sends FIN and acks ours.
358 	 *
359 	 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
360 	 */
361 	case PRU_DISCONNECT:
362 		tp = tcp_disconnect(tp);
363 		break;
364 
365 	/*
366 	 * Accept a connection.  Essentially all the work is
367 	 * done at higher levels; just return the address
368 	 * of the peer, storing through addr.
369 	 */
370 	case PRU_ACCEPT:
371 #ifdef INET6
372 		if (inp->inp_flags & INP_IPV6)
373 			in6_setpeeraddr(inp, nam);
374 		else
375 #endif
376 			in_setpeeraddr(inp, nam);
377 		break;
378 
379 	/*
380 	 * Mark the connection as being incapable of further output.
381 	 */
382 	case PRU_SHUTDOWN:
383 		if (so->so_state & SS_CANTSENDMORE)
384 			break;
385 		socantsendmore(so);
386 		tp = tcp_usrclosed(tp);
387 		if (tp)
388 			error = tcp_output(tp);
389 		break;
390 
391 	/*
392 	 * After a receive, possibly send window update to peer.
393 	 */
394 	case PRU_RCVD:
395 		/*
396 		 * soreceive() calls this function when a user receives
397 		 * ancillary data on a listening socket. We don't call
398 		 * tcp_output in such a case, since there is no header
399 		 * template for a listening socket and hence the kernel
400 		 * will panic.
401 		 */
402 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
403 			(void) tcp_output(tp);
404 		break;
405 
406 	/*
407 	 * Do a send by putting data in output queue and updating urgent
408 	 * marker if URG set.  Possibly send more data.
409 	 */
410 	case PRU_SEND:
411 		sbappend(&so->so_snd, m);
412 		error = tcp_output(tp);
413 		break;
414 
415 	/*
416 	 * Abort the TCP.
417 	 */
418 	case PRU_ABORT:
419 		tp = tcp_drop(tp, ECONNABORTED);
420 		break;
421 
422 	case PRU_SENSE:
423 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
424 		(void) splx(s);
425 		return (0);
426 
427 	case PRU_RCVOOB:
428 		if ((so->so_oobmark == 0 &&
429 		    (so->so_state & SS_RCVATMARK) == 0) ||
430 		    so->so_options & SO_OOBINLINE ||
431 		    tp->t_oobflags & TCPOOB_HADDATA) {
432 			error = EINVAL;
433 			break;
434 		}
435 		if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
436 			error = EWOULDBLOCK;
437 			break;
438 		}
439 		m->m_len = 1;
440 		*mtod(m, caddr_t) = tp->t_iobc;
441 		if (((long)nam & MSG_PEEK) == 0)
442 			tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
443 		break;
444 
445 	case PRU_SENDOOB:
446 		if (sbspace(&so->so_snd) < -512) {
447 			m_freem(m);
448 			error = ENOBUFS;
449 			break;
450 		}
451 		/*
452 		 * According to RFC961 (Assigned Protocols),
453 		 * the urgent pointer points to the last octet
454 		 * of urgent data.  We continue, however,
455 		 * to consider it to indicate the first octet
456 		 * of data past the urgent section.
457 		 * Otherwise, snd_up should be one lower.
458 		 */
459 		sbappend(&so->so_snd, m);
460 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
461 		tp->t_force = 1;
462 		error = tcp_output(tp);
463 		tp->t_force = 0;
464 		break;
465 
466 	case PRU_SOCKADDR:
467 #ifdef INET6
468 		if (inp->inp_flags & INP_IPV6)
469 			in6_setsockaddr(inp, nam);
470 		else
471 #endif
472 			in_setsockaddr(inp, nam);
473 		break;
474 
475 	case PRU_PEERADDR:
476 #ifdef INET6
477 		if (inp->inp_flags & INP_IPV6)
478 			in6_setpeeraddr(inp, nam);
479 		else
480 #endif
481 			in_setpeeraddr(inp, nam);
482 		break;
483 
484 	/*
485 	 * TCP slow timer went off; going through this
486 	 * routine for tracing's sake.
487 	 */
488 	case PRU_SLOWTIMO:
489 		tp = tcp_timers(tp, (long)nam);
490 		req |= (long)nam << 8;		/* for debug's sake */
491 		break;
492 
493 	default:
494 		panic("tcp_usrreq");
495 	}
496 	if (tp && (so->so_options & SO_DEBUG))
497 		tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0);
498 	splx(s);
499 	return (error);
500 }
501 
502 int
503 tcp_ctloutput(op, so, level, optname, mp)
504 	int op;
505 	struct socket *so;
506 	int level, optname;
507 	struct mbuf **mp;
508 {
509 	int error = 0, s;
510 	struct inpcb *inp;
511 	register struct tcpcb *tp;
512 	register struct mbuf *m;
513 	register int i;
514 
515 	s = splsoftnet();
516 	inp = sotoinpcb(so);
517 	if (inp == NULL) {
518 		splx(s);
519 		if (op == PRCO_SETOPT && *mp)
520 			(void) m_free(*mp);
521 		return (ECONNRESET);
522 	}
523 #ifdef INET6
524 	tp = intotcpcb(inp);
525 #endif /* INET6 */
526 	if (level != IPPROTO_TCP) {
527 		switch (so->so_proto->pr_domain->dom_family) {
528 #ifdef INET6
529 		case PF_INET6:
530 			error = ip6_ctloutput(op, so, level, optname, mp);
531 			break;
532 #endif /* INET6 */
533 		case PF_INET:
534 			error = ip_ctloutput(op, so, level, optname, mp);
535 			break;
536 		default:
537 			error = EAFNOSUPPORT;	/*?*/
538 			break;
539 		}
540 		splx(s);
541 		return (error);
542 	}
543 #ifndef INET6
544 	tp = intotcpcb(inp);
545 #endif /* !INET6 */
546 
547 	switch (op) {
548 
549 	case PRCO_SETOPT:
550 		m = *mp;
551 		switch (optname) {
552 
553 		case TCP_NODELAY:
554 			if (m == NULL || m->m_len < sizeof (int))
555 				error = EINVAL;
556 			else if (*mtod(m, int *))
557 				tp->t_flags |= TF_NODELAY;
558 			else
559 				tp->t_flags &= ~TF_NODELAY;
560 			break;
561 
562 		case TCP_MAXSEG:
563 			if (m == NULL || m->m_len < sizeof (int)) {
564 				error = EINVAL;
565 				break;
566 			}
567 
568 			i = *mtod(m, int *);
569 			if (i > 0 && i <= tp->t_maxseg)
570 				tp->t_maxseg = i;
571 			else
572 				error = EINVAL;
573 			break;
574 
575 #ifdef TCP_SACK
576 		case TCP_SACK_DISABLE:
577 			if (m == NULL || m->m_len < sizeof (int)) {
578 				error = EINVAL;
579 				break;
580 			}
581 
582 			if (TCPS_HAVEESTABLISHED(tp->t_state)) {
583 				error = EPERM;
584 				break;
585 			}
586 
587 			if (tp->t_flags & TF_SIGNATURE) {
588 				error = EPERM;
589 				break;
590 			}
591 
592 			if (*mtod(m, int *))
593 				tp->sack_disable = 1;
594 			else
595 				tp->sack_disable = 0;
596 			break;
597 #endif
598 #ifdef TCP_SIGNATURE
599 		case TCP_SIGNATURE_ENABLE:
600 			if (m == NULL || m->m_len < sizeof (int)) {
601 				error = EINVAL;
602 				break;
603 			}
604 
605 			if (TCPS_HAVEESTABLISHED(tp->t_state)) {
606 				error = EPERM;
607 				break;
608 			}
609 
610 			if (*mtod(m, int *)) {
611 				tp->t_flags |= TF_SIGNATURE;
612 #ifdef TCP_SACK
613 				tp->sack_disable = 1;
614 #endif /* TCP_SACK */
615 			} else
616 				tp->t_flags &= ~TF_SIGNATURE;
617 			break;
618 #endif /* TCP_SIGNATURE */
619  		default:
620 			error = ENOPROTOOPT;
621 			break;
622 		}
623 		if (m)
624 			(void) m_free(m);
625 		break;
626 
627 	case PRCO_GETOPT:
628 		*mp = m = m_get(M_WAIT, MT_SOOPTS);
629 		m->m_len = sizeof(int);
630 
631 		switch (optname) {
632 		case TCP_NODELAY:
633 			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
634 			break;
635 		case TCP_MAXSEG:
636 			*mtod(m, int *) = tp->t_maxseg;
637 			break;
638 #ifdef TCP_SACK
639 		case TCP_SACK_DISABLE:
640 			*mtod(m, int *) = tp->sack_disable;
641 			break;
642 #endif
643 		default:
644 			error = ENOPROTOOPT;
645 			break;
646 		}
647 		break;
648 	}
649 	splx(s);
650 	return (error);
651 }
652 
653 #ifndef TCP_SENDSPACE
654 #define	TCP_SENDSPACE	1024*16;
655 #endif
656 u_int	tcp_sendspace = TCP_SENDSPACE;
657 #ifndef TCP_RECVSPACE
658 #define	TCP_RECVSPACE	1024*16;
659 #endif
660 u_int	tcp_recvspace = TCP_RECVSPACE;
661 
662 /*
663  * Attach TCP protocol to socket, allocating
664  * internet protocol control block, tcp control block,
665  * bufer space, and entering LISTEN state if to accept connections.
666  */
667 int
668 tcp_attach(so)
669 	struct socket *so;
670 {
671 	register struct tcpcb *tp;
672 	struct inpcb *inp;
673 	int error;
674 
675 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
676 		error = soreserve(so, tcp_sendspace, tcp_recvspace);
677 		if (error)
678 			return (error);
679 	}
680 	error = in_pcballoc(so, &tcbtable);
681 	if (error)
682 		return (error);
683 	inp = sotoinpcb(so);
684 	tp = tcp_newtcpcb(inp);
685 	if (tp == NULL) {
686 		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
687 
688 		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
689 		in_pcbdetach(inp);
690 		so->so_state |= nofd;
691 		return (ENOBUFS);
692 	}
693 	tp->t_state = TCPS_CLOSED;
694 #ifdef INET6
695 	/* we disallow IPv4 mapped address completely. */
696 	if (inp->inp_flags & INP_IPV6)
697 		tp->pf = PF_INET6;
698 	else
699 		tp->pf = PF_INET;
700 #else
701 	tp->pf = PF_INET;
702 #endif
703 	return (0);
704 }
705 
706 /*
707  * Initiate (or continue) disconnect.
708  * If embryonic state, just send reset (once).
709  * If in ``let data drain'' option and linger null, just drop.
710  * Otherwise (hard), mark socket disconnecting and drop
711  * current input data; switch states based on user close, and
712  * send segment to peer (with FIN).
713  */
714 struct tcpcb *
715 tcp_disconnect(tp)
716 	register struct tcpcb *tp;
717 {
718 	struct socket *so = tp->t_inpcb->inp_socket;
719 
720 	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
721 		tp = tcp_close(tp);
722 	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
723 		tp = tcp_drop(tp, 0);
724 	else {
725 		soisdisconnecting(so);
726 		sbflush(&so->so_rcv);
727 		tp = tcp_usrclosed(tp);
728 		if (tp)
729 			(void) tcp_output(tp);
730 	}
731 	return (tp);
732 }
733 
734 /*
735  * User issued close, and wish to trail through shutdown states:
736  * if never received SYN, just forget it.  If got a SYN from peer,
737  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
738  * If already got a FIN from peer, then almost done; go to LAST_ACK
739  * state.  In all other cases, have already sent FIN to peer (e.g.
740  * after PRU_SHUTDOWN), and just have to play tedious game waiting
741  * for peer to send FIN or not respond to keep-alives, etc.
742  * We can let the user exit from the close as soon as the FIN is acked.
743  */
744 struct tcpcb *
745 tcp_usrclosed(tp)
746 	register struct tcpcb *tp;
747 {
748 
749 	switch (tp->t_state) {
750 
751 	case TCPS_CLOSED:
752 	case TCPS_LISTEN:
753 	case TCPS_SYN_SENT:
754 		tp->t_state = TCPS_CLOSED;
755 		tp = tcp_close(tp);
756 		break;
757 
758 	case TCPS_SYN_RECEIVED:
759 	case TCPS_ESTABLISHED:
760 		tp->t_state = TCPS_FIN_WAIT_1;
761 		break;
762 
763 	case TCPS_CLOSE_WAIT:
764 		tp->t_state = TCPS_LAST_ACK;
765 		break;
766 	}
767 	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
768 		soisdisconnected(tp->t_inpcb->inp_socket);
769 		/*
770 		 * If we are in FIN_WAIT_2, we arrived here because the
771 		 * application did a shutdown of the send side.  Like the
772 		 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
773 		 * a full close, we start a timer to make sure sockets are
774 		 * not left in FIN_WAIT_2 forever.
775 		 */
776 		if (tp->t_state == TCPS_FIN_WAIT_2)
777 			tp->t_timer[TCPT_2MSL] = tcp_maxidle;
778 	}
779 	return (tp);
780 }
781 
782 /*
783  * Look up a socket for ident..
784  */
785 int
786 tcp_ident(oldp, oldlenp, newp, newlen)
787 	void *oldp;
788 	size_t *oldlenp;
789 	void *newp;
790 	size_t newlen;
791 {
792 	int error = 0, s;
793 	int is_ipv6 = 0;
794 	struct tcp_ident_mapping tir;
795 	struct inpcb *inp;
796 	struct sockaddr_in *fin, *lin;
797 #ifdef INET6
798 	struct sockaddr_in6 *fin6, *lin6;
799 	struct in6_addr f6, l6;
800 #endif
801 
802 	if (oldp == NULL || newp != NULL || newlen != 0)
803 		return (EINVAL);
804 	if  (*oldlenp < sizeof(tir))
805 		return (ENOMEM);
806 	if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 )
807 		return (error);
808 	switch (tir.faddr.ss_family) {
809 #ifdef INET6
810 	case AF_INET6:
811 		is_ipv6 = 1;
812 		fin6 = (struct sockaddr_in6 *)&tir.faddr;
813 		error = in6_embedscope(&f6, fin6, NULL, NULL);
814 		if (error)
815 			return EINVAL;	/*?*/
816 		lin6 = (struct sockaddr_in6 *)&tir.laddr;
817 		error = in6_embedscope(&l6, lin6, NULL, NULL);
818 		if (error)
819 			return EINVAL;	/*?*/
820 		break;
821 #endif
822 	case AF_INET:
823 	  	fin = (struct sockaddr_in *)&tir.faddr;
824 		lin = (struct sockaddr_in *)&tir.laddr;
825 		break;
826 	default:
827 		return(EINVAL);
828 	}
829 
830 	s = splsoftnet();
831 	if (is_ipv6) {
832 #ifdef INET6
833 		inp = in6_pcbhashlookup(&tcbtable, &f6,
834 		    fin6->sin6_port, &l6, lin6->sin6_port);
835 #else
836 		panic("tcp_ident: cannot happen");
837 #endif
838 	}
839 	else
840 		inp = in_pcbhashlookup(&tcbtable,  fin->sin_addr,
841 		    fin->sin_port, lin->sin_addr, lin->sin_port);
842 
843 	if (inp == NULL) {
844 		++tcpstat.tcps_pcbhashmiss;
845 		if (is_ipv6) {
846 #ifdef INET6
847 			inp = in_pcblookup(&tcbtable, &f6,
848 			    fin6->sin6_port, &l6, lin6->sin6_port,
849 			    INPLOOKUP_WILDCARD | INPLOOKUP_IPV6);
850 #else
851 			panic("tcp_ident: cannot happen");
852 #endif
853 		}
854 		else
855 			inp = in_pcblookup(&tcbtable, &fin->sin_addr,
856 			    fin->sin_port, &lin->sin_addr, lin->sin_port,
857 			    INPLOOKUP_WILDCARD);
858 	}
859 
860 	if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) {
861 		tir.ruid = inp->inp_socket->so_ruid;
862 		tir.euid = inp->inp_socket->so_euid;
863 	} else {
864 		tir.ruid = -1;
865 		tir.euid = -1;
866 	}
867 	splx(s);
868 
869 	*oldlenp = sizeof (tir);
870 	error = copyout((void *)&tir, oldp, sizeof (tir));
871 	return (error);
872 }
873 
874 /*
875  * Sysctl for tcp variables.
876  */
877 int
878 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
879 	int *name;
880 	u_int namelen;
881 	void *oldp;
882 	size_t *oldlenp;
883 	void *newp;
884 	size_t newlen;
885 {
886 
887 	/* All sysctl names at this level are terminal. */
888 	if (namelen != 1)
889 		return (ENOTDIR);
890 
891 	switch (name[0]) {
892 	case TCPCTL_RFC1323:
893 		return (sysctl_int(oldp, oldlenp, newp, newlen,
894 		    &tcp_do_rfc1323));
895 #ifdef TCP_SACK
896 	case TCPCTL_SACK:
897 		return (sysctl_int(oldp, oldlenp, newp, newlen,
898 		    &tcp_do_sack));
899 #endif
900 	case TCPCTL_MSSDFLT:
901 		return (sysctl_int(oldp, oldlenp, newp, newlen,
902 		    &tcp_mssdflt));
903 	case TCPCTL_KEEPINITTIME:
904 		return (sysctl_int(oldp, oldlenp, newp, newlen,
905 		    &tcptv_keep_init));
906 
907 	case TCPCTL_KEEPIDLE:
908 		return (sysctl_int(oldp, oldlenp, newp, newlen,
909 		    &tcp_keepidle));
910 
911 	case TCPCTL_KEEPINTVL:
912 		return (sysctl_int(oldp, oldlenp, newp, newlen,
913 		    &tcp_keepintvl));
914 
915 	case TCPCTL_SLOWHZ:
916 		return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ));
917 
918 	case TCPCTL_BADDYNAMIC:
919 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
920 		    baddynamicports.tcp, sizeof(baddynamicports.tcp)));
921 
922 	case TCPCTL_RECVSPACE:
923 		return (sysctl_int(oldp, oldlenp, newp, newlen,&tcp_recvspace));
924 
925 	case TCPCTL_SENDSPACE:
926 		return (sysctl_int(oldp, oldlenp, newp, newlen,&tcp_sendspace));
927 	case TCPCTL_IDENT:
928 		return (tcp_ident(oldp, oldlenp, newp, newlen));
929 	case TCPCTL_RSTPPSLIMIT:
930 		return (sysctl_int(oldp, oldlenp, newp, newlen,
931 		    &tcp_rst_ppslim));
932 	default:
933 		return (ENOPROTOOPT);
934 	}
935 	/* NOTREACHED */
936 }
937