xref: /openbsd-src/sys/netinet/tcp_usrreq.c (revision b725ae7711052a2233e31a66fefb8a752c388d7a)
1 /*	$OpenBSD: tcp_usrreq.c,v 1.85 2004/04/27 17:51:33 otto Exp $	*/
2 /*	$NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  * 	This product includes software developed by the University of
46  * 	California, Berkeley and its contributors.
47  * 	This product includes software developed at the Information
48  * 	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/protosw.h>
77 #include <sys/stat.h>
78 #include <sys/sysctl.h>
79 #include <sys/domain.h>
80 #include <sys/kernel.h>
81 
82 #include <net/if.h>
83 #include <net/route.h>
84 
85 #include <netinet/in.h>
86 #include <netinet/in_systm.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip.h>
89 #include <netinet/in_pcb.h>
90 #include <netinet/ip_var.h>
91 #include <netinet/tcp.h>
92 #include <netinet/tcp_fsm.h>
93 #include <netinet/tcp_seq.h>
94 #include <netinet/tcp_timer.h>
95 #include <netinet/tcp_var.h>
96 #include <netinet/tcpip.h>
97 #include <netinet/tcp_debug.h>
98 
99 /*
100  * TCP protocol interface to socket abstraction.
101  */
102 extern	char *tcpstates[];
103 extern	int tcptv_keep_init;
104 
105 extern int tcp_rst_ppslim;
106 
107 /* from in_pcb.c */
108 extern	struct baddynamicports baddynamicports;
109 
110 #ifndef TCP_SENDSPACE
111 #define	TCP_SENDSPACE	1024*16
112 #endif
113 u_int	tcp_sendspace = TCP_SENDSPACE;
114 #ifndef TCP_RECVSPACE
115 #define	TCP_RECVSPACE	1024*16
116 #endif
117 u_int	tcp_recvspace = TCP_RECVSPACE;
118 
119 int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS;
120 
121 struct	inpcbtable tcbtable;
122 
123 int tcp_ident(void *, size_t *, void *, size_t, int);
124 
125 #ifdef INET6
126 int
127 tcp6_usrreq(so, req, m, nam, control, p)
128 	struct socket *so;
129 	int req;
130 	struct mbuf *m, *nam, *control;
131 	struct proc *p;
132 {
133 
134 	return tcp_usrreq(so, req, m, nam, control);
135 }
136 #endif
137 
138 /*
139  * Process a TCP user request for TCP tb.  If this is a send request
140  * then m is the mbuf chain of send data.  If this is a timer expiration
141  * (called from the software clock routine), then timertype tells which timer.
142  */
143 /*ARGSUSED*/
144 int
145 tcp_usrreq(so, req, m, nam, control)
146 	struct socket *so;
147 	int req;
148 	struct mbuf *m, *nam, *control;
149 {
150 	struct sockaddr_in *sin;
151 	struct inpcb *inp;
152 	struct tcpcb *tp = NULL;
153 	int s;
154 	int error = 0;
155 	int ostate;
156 
157 	if (req == PRU_CONTROL) {
158 #ifdef INET6
159 		if (sotopf(so) == PF_INET6)
160 			return in6_control(so, (u_long)m, (caddr_t)nam,
161 			    (struct ifnet *)control, 0);
162 		else
163 #endif /* INET6 */
164 			return (in_control(so, (u_long)m, (caddr_t)nam,
165 			    (struct ifnet *)control));
166 	}
167 	if (control && control->m_len) {
168 		m_freem(control);
169 		if (m)
170 			m_freem(m);
171 		return (EINVAL);
172 	}
173 
174 	s = splsoftnet();
175 	inp = sotoinpcb(so);
176 	/*
177 	 * When a TCP is attached to a socket, then there will be
178 	 * a (struct inpcb) pointed at by the socket, and this
179 	 * structure will point at a subsidiary (struct tcpcb).
180 	 */
181 	if (inp == 0 && req != PRU_ATTACH) {
182 		splx(s);
183 		/*
184 		 * The following corrects an mbuf leak under rare
185 		 * circumstances
186 		 */
187 		if (m && (req == PRU_SEND || req == PRU_SENDOOB))
188 			m_freem(m);
189 		return (EINVAL);		/* XXX */
190 	}
191 	if (inp) {
192 		tp = intotcpcb(inp);
193 		/* WHAT IF TP IS 0? */
194 #ifdef KPROF
195 		tcp_acounts[tp->t_state][req]++;
196 #endif
197 		ostate = tp->t_state;
198 	} else
199 		ostate = 0;
200 	switch (req) {
201 
202 	/*
203 	 * TCP attaches to socket via PRU_ATTACH, reserving space,
204 	 * and an internet control block.
205 	 */
206 	case PRU_ATTACH:
207 		if (inp) {
208 			error = EISCONN;
209 			break;
210 		}
211 		error = tcp_attach(so);
212 		if (error)
213 			break;
214 		if ((so->so_options & SO_LINGER) && so->so_linger == 0)
215 			so->so_linger = TCP_LINGERTIME;
216 		tp = sototcpcb(so);
217 		break;
218 
219 	/*
220 	 * PRU_DETACH detaches the TCP protocol from the socket.
221 	 * If the protocol state is non-embryonic, then can't
222 	 * do this directly: have to initiate a PRU_DISCONNECT,
223 	 * which may finish later; embryonic TCB's can just
224 	 * be discarded here.
225 	 */
226 	case PRU_DETACH:
227 		tp = tcp_disconnect(tp);
228 		break;
229 
230 	/*
231 	 * Give the socket an address.
232 	 */
233 	case PRU_BIND:
234 #ifdef INET6
235 		if (inp->inp_flags & INP_IPV6)
236 			error = in6_pcbbind(inp, nam);
237 		else
238 #endif
239 			error = in_pcbbind(inp, nam);
240 		if (error)
241 			break;
242 		break;
243 
244 	/*
245 	 * Prepare to accept connections.
246 	 */
247 	case PRU_LISTEN:
248 		if (inp->inp_lport == 0) {
249 #ifdef INET6
250 			if (inp->inp_flags & INP_IPV6)
251 				error = in6_pcbbind(inp, NULL);
252 			else
253 #endif
254 				error = in_pcbbind(inp, NULL);
255 		}
256 		/* If the in_pcbbind() above is called, the tp->pf
257 		   should still be whatever it was before. */
258 		if (error == 0)
259 			tp->t_state = TCPS_LISTEN;
260 		break;
261 
262 	/*
263 	 * Initiate connection to peer.
264 	 * Create a template for use in transmissions on this connection.
265 	 * Enter SYN_SENT state, and mark socket as connecting.
266 	 * Start keep-alive timer, and seed output sequence space.
267 	 * Send initial segment on connection.
268 	 */
269 	case PRU_CONNECT:
270 		sin = mtod(nam, struct sockaddr_in *);
271 
272 #ifdef INET6
273 		if (sin->sin_family == AF_INET6) {
274 			struct in6_addr *in6_addr = &mtod(nam,
275 			    struct sockaddr_in6 *)->sin6_addr;
276 
277 			if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) ||
278 			    IN6_IS_ADDR_MULTICAST(in6_addr) ||
279 			    (IN6_IS_ADDR_V4MAPPED(in6_addr) &&
280 			    ((in6_addr->s6_addr32[3] == INADDR_ANY) ||
281 			    IN_MULTICAST(in6_addr->s6_addr32[3]) ||
282 			    in_broadcast(sin->sin_addr, NULL)))) {
283 				error = EINVAL;
284 				break;
285 			}
286 
287 			if (inp->inp_lport == 0) {
288 				error = in6_pcbbind(inp, NULL);
289 				if (error)
290 					break;
291 			}
292 			error = in6_pcbconnect(inp, nam);
293 		} else if (sin->sin_family == AF_INET)
294 #endif /* INET6 */
295 		{
296 			if ((sin->sin_addr.s_addr == INADDR_ANY) ||
297 			    IN_MULTICAST(sin->sin_addr.s_addr) ||
298 			    in_broadcast(sin->sin_addr, NULL)) {
299 				error = EINVAL;
300 				break;
301 			}
302 
303 			if (inp->inp_lport == 0) {
304 				error = in_pcbbind(inp, NULL);
305 				if (error)
306 					break;
307 			}
308 			error = in_pcbconnect(inp, nam);
309 		}
310 
311 		if (error)
312 			break;
313 
314 		tp->t_template = tcp_template(tp);
315 		if (tp->t_template == 0) {
316 			in_pcbdisconnect(inp);
317 			error = ENOBUFS;
318 			break;
319 		}
320 
321 		so->so_state |= SS_CONNECTOUT;
322 		/* Compute window scaling to request.  */
323 		tcp_rscale(tp, so->so_rcv.sb_hiwat);
324 
325 		soisconnecting(so);
326 		tcpstat.tcps_connattempt++;
327 		tp->t_state = TCPS_SYN_SENT;
328 		TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
329 #ifdef TCP_COMPAT_42
330 		tp->iss = tcp_iss;
331 		tcp_iss += TCP_ISSINCR/2;
332 #else  /* TCP_COMPAT_42 */
333 		tp->iss = tcp_rndiss_next();
334 #endif /* !TCP_COMPAT_42 */
335 		tcp_sendseqinit(tp);
336 #if defined(TCP_SACK)
337 		tp->snd_last = tp->snd_una;
338 #endif
339 #if defined(TCP_SACK) && defined(TCP_FACK)
340 		tp->snd_fack = tp->snd_una;
341 		tp->retran_data = 0;
342 		tp->snd_awnd = 0;
343 #endif
344 		error = tcp_output(tp);
345 		break;
346 
347 	/*
348 	 * Create a TCP connection between two sockets.
349 	 */
350 	case PRU_CONNECT2:
351 		error = EOPNOTSUPP;
352 		break;
353 
354 	/*
355 	 * Initiate disconnect from peer.
356 	 * If connection never passed embryonic stage, just drop;
357 	 * else if don't need to let data drain, then can just drop anyways,
358 	 * else have to begin TCP shutdown process: mark socket disconnecting,
359 	 * drain unread data, state switch to reflect user close, and
360 	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
361 	 * when peer sends FIN and acks ours.
362 	 *
363 	 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
364 	 */
365 	case PRU_DISCONNECT:
366 		tp = tcp_disconnect(tp);
367 		break;
368 
369 	/*
370 	 * Accept a connection.  Essentially all the work is
371 	 * done at higher levels; just return the address
372 	 * of the peer, storing through addr.
373 	 */
374 	case PRU_ACCEPT:
375 #ifdef INET6
376 		if (inp->inp_flags & INP_IPV6)
377 			in6_setpeeraddr(inp, nam);
378 		else
379 #endif
380 			in_setpeeraddr(inp, nam);
381 		break;
382 
383 	/*
384 	 * Mark the connection as being incapable of further output.
385 	 */
386 	case PRU_SHUTDOWN:
387 		if (so->so_state & SS_CANTSENDMORE)
388 			break;
389 		socantsendmore(so);
390 		tp = tcp_usrclosed(tp);
391 		if (tp)
392 			error = tcp_output(tp);
393 		break;
394 
395 	/*
396 	 * After a receive, possibly send window update to peer.
397 	 */
398 	case PRU_RCVD:
399 		/*
400 		 * soreceive() calls this function when a user receives
401 		 * ancillary data on a listening socket. We don't call
402 		 * tcp_output in such a case, since there is no header
403 		 * template for a listening socket and hence the kernel
404 		 * will panic.
405 		 */
406 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
407 			(void) tcp_output(tp);
408 		break;
409 
410 	/*
411 	 * Do a send by putting data in output queue and updating urgent
412 	 * marker if URG set.  Possibly send more data.
413 	 */
414 	case PRU_SEND:
415 		sbappendstream(&so->so_snd, m);
416 		error = tcp_output(tp);
417 		break;
418 
419 	/*
420 	 * Abort the TCP.
421 	 */
422 	case PRU_ABORT:
423 		tp = tcp_drop(tp, ECONNABORTED);
424 		break;
425 
426 	case PRU_SENSE:
427 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
428 		splx(s);
429 		return (0);
430 
431 	case PRU_RCVOOB:
432 		if ((so->so_oobmark == 0 &&
433 		    (so->so_state & SS_RCVATMARK) == 0) ||
434 		    so->so_options & SO_OOBINLINE ||
435 		    tp->t_oobflags & TCPOOB_HADDATA) {
436 			error = EINVAL;
437 			break;
438 		}
439 		if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
440 			error = EWOULDBLOCK;
441 			break;
442 		}
443 		m->m_len = 1;
444 		*mtod(m, caddr_t) = tp->t_iobc;
445 		if (((long)nam & MSG_PEEK) == 0)
446 			tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
447 		break;
448 
449 	case PRU_SENDOOB:
450 		if (sbspace(&so->so_snd) < -512) {
451 			m_freem(m);
452 			error = ENOBUFS;
453 			break;
454 		}
455 		/*
456 		 * According to RFC961 (Assigned Protocols),
457 		 * the urgent pointer points to the last octet
458 		 * of urgent data.  We continue, however,
459 		 * to consider it to indicate the first octet
460 		 * of data past the urgent section.
461 		 * Otherwise, snd_up should be one lower.
462 		 */
463 		sbappendstream(&so->so_snd, m);
464 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
465 		tp->t_force = 1;
466 		error = tcp_output(tp);
467 		tp->t_force = 0;
468 		break;
469 
470 	case PRU_SOCKADDR:
471 #ifdef INET6
472 		if (inp->inp_flags & INP_IPV6)
473 			in6_setsockaddr(inp, nam);
474 		else
475 #endif
476 			in_setsockaddr(inp, nam);
477 		break;
478 
479 	case PRU_PEERADDR:
480 #ifdef INET6
481 		if (inp->inp_flags & INP_IPV6)
482 			in6_setpeeraddr(inp, nam);
483 		else
484 #endif
485 			in_setpeeraddr(inp, nam);
486 		break;
487 
488 	default:
489 		panic("tcp_usrreq");
490 	}
491 	if (tp && (so->so_options & SO_DEBUG))
492 		tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0);
493 	splx(s);
494 	return (error);
495 }
496 
497 int
498 tcp_ctloutput(op, so, level, optname, mp)
499 	int op;
500 	struct socket *so;
501 	int level, optname;
502 	struct mbuf **mp;
503 {
504 	int error = 0, s;
505 	struct inpcb *inp;
506 	struct tcpcb *tp;
507 	struct mbuf *m;
508 	int i;
509 
510 	s = splsoftnet();
511 	inp = sotoinpcb(so);
512 	if (inp == NULL) {
513 		splx(s);
514 		if (op == PRCO_SETOPT && *mp)
515 			(void) m_free(*mp);
516 		return (ECONNRESET);
517 	}
518 #ifdef INET6
519 	tp = intotcpcb(inp);
520 #endif /* INET6 */
521 	if (level != IPPROTO_TCP) {
522 		switch (so->so_proto->pr_domain->dom_family) {
523 #ifdef INET6
524 		case PF_INET6:
525 			error = ip6_ctloutput(op, so, level, optname, mp);
526 			break;
527 #endif /* INET6 */
528 		case PF_INET:
529 			error = ip_ctloutput(op, so, level, optname, mp);
530 			break;
531 		default:
532 			error = EAFNOSUPPORT;	/*?*/
533 			break;
534 		}
535 		splx(s);
536 		return (error);
537 	}
538 #ifndef INET6
539 	tp = intotcpcb(inp);
540 #endif /* !INET6 */
541 
542 	switch (op) {
543 
544 	case PRCO_SETOPT:
545 		m = *mp;
546 		switch (optname) {
547 
548 		case TCP_NODELAY:
549 			if (m == NULL || m->m_len < sizeof (int))
550 				error = EINVAL;
551 			else if (*mtod(m, int *))
552 				tp->t_flags |= TF_NODELAY;
553 			else
554 				tp->t_flags &= ~TF_NODELAY;
555 			break;
556 
557 		case TCP_MAXSEG:
558 			if (m == NULL || m->m_len < sizeof (int)) {
559 				error = EINVAL;
560 				break;
561 			}
562 
563 			i = *mtod(m, int *);
564 			if (i > 0 && i <= tp->t_maxseg)
565 				tp->t_maxseg = i;
566 			else
567 				error = EINVAL;
568 			break;
569 
570 #ifdef TCP_SACK
571 		case TCP_SACK_ENABLE:
572 			if (m == NULL || m->m_len < sizeof (int)) {
573 				error = EINVAL;
574 				break;
575 			}
576 
577 			if (TCPS_HAVEESTABLISHED(tp->t_state)) {
578 				error = EPERM;
579 				break;
580 			}
581 
582 			if (tp->t_flags & TF_SIGNATURE) {
583 				error = EPERM;
584 				break;
585 			}
586 
587 			if (*mtod(m, int *))
588 				tp->sack_enable = 1;
589 			else
590 				tp->sack_enable = 0;
591 			break;
592 #endif
593 #ifdef TCP_SIGNATURE
594 		case TCP_MD5SIG:
595 			if (m == NULL || m->m_len < sizeof (int)) {
596 				error = EINVAL;
597 				break;
598 			}
599 
600 			if (TCPS_HAVEESTABLISHED(tp->t_state)) {
601 				error = EPERM;
602 				break;
603 			}
604 
605 			if (*mtod(m, int *)) {
606 				tp->t_flags |= TF_SIGNATURE;
607 #ifdef TCP_SACK
608 				tp->sack_enable = 0;
609 #endif /* TCP_SACK */
610 			} else
611 				tp->t_flags &= ~TF_SIGNATURE;
612 			break;
613 #endif /* TCP_SIGNATURE */
614 		default:
615 			error = ENOPROTOOPT;
616 			break;
617 		}
618 		if (m)
619 			(void) m_free(m);
620 		break;
621 
622 	case PRCO_GETOPT:
623 		*mp = m = m_get(M_WAIT, MT_SOOPTS);
624 		m->m_len = sizeof(int);
625 
626 		switch (optname) {
627 		case TCP_NODELAY:
628 			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
629 			break;
630 		case TCP_MAXSEG:
631 			*mtod(m, int *) = tp->t_maxseg;
632 			break;
633 #ifdef TCP_SACK
634 		case TCP_SACK_ENABLE:
635 			*mtod(m, int *) = tp->sack_enable;
636 			break;
637 #endif
638 #ifdef TCP_SIGNATURE
639 		case TCP_MD5SIG:
640 			*mtod(m, int *) = tp->t_flags & TF_SIGNATURE;
641 			break;
642 #endif
643 		default:
644 			error = ENOPROTOOPT;
645 			break;
646 		}
647 		break;
648 	}
649 	splx(s);
650 	return (error);
651 }
652 
653 /*
654  * Attach TCP protocol to socket, allocating
655  * internet protocol control block, tcp control block,
656  * bufer space, and entering LISTEN state if to accept connections.
657  */
658 int
659 tcp_attach(so)
660 	struct socket *so;
661 {
662 	struct tcpcb *tp;
663 	struct inpcb *inp;
664 	int error;
665 
666 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
667 		error = soreserve(so, tcp_sendspace, tcp_recvspace);
668 		if (error)
669 			return (error);
670 	}
671 	error = in_pcballoc(so, &tcbtable);
672 	if (error)
673 		return (error);
674 	inp = sotoinpcb(so);
675 	tp = tcp_newtcpcb(inp);
676 	if (tp == NULL) {
677 		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
678 
679 		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
680 		in_pcbdetach(inp);
681 		so->so_state |= nofd;
682 		return (ENOBUFS);
683 	}
684 	tp->t_state = TCPS_CLOSED;
685 #ifdef INET6
686 	/* we disallow IPv4 mapped address completely. */
687 	if (inp->inp_flags & INP_IPV6)
688 		tp->pf = PF_INET6;
689 	else
690 		tp->pf = PF_INET;
691 #else
692 	tp->pf = PF_INET;
693 #endif
694 	return (0);
695 }
696 
697 /*
698  * Initiate (or continue) disconnect.
699  * If embryonic state, just send reset (once).
700  * If in ``let data drain'' option and linger null, just drop.
701  * Otherwise (hard), mark socket disconnecting and drop
702  * current input data; switch states based on user close, and
703  * send segment to peer (with FIN).
704  */
705 struct tcpcb *
706 tcp_disconnect(tp)
707 	struct tcpcb *tp;
708 {
709 	struct socket *so = tp->t_inpcb->inp_socket;
710 
711 	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
712 		tp = tcp_close(tp);
713 	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
714 		tp = tcp_drop(tp, 0);
715 	else {
716 		soisdisconnecting(so);
717 		sbflush(&so->so_rcv);
718 		tp = tcp_usrclosed(tp);
719 		if (tp)
720 			(void) tcp_output(tp);
721 	}
722 	return (tp);
723 }
724 
725 /*
726  * User issued close, and wish to trail through shutdown states:
727  * if never received SYN, just forget it.  If got a SYN from peer,
728  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
729  * If already got a FIN from peer, then almost done; go to LAST_ACK
730  * state.  In all other cases, have already sent FIN to peer (e.g.
731  * after PRU_SHUTDOWN), and just have to play tedious game waiting
732  * for peer to send FIN or not respond to keep-alives, etc.
733  * We can let the user exit from the close as soon as the FIN is acked.
734  */
735 struct tcpcb *
736 tcp_usrclosed(tp)
737 	struct tcpcb *tp;
738 {
739 
740 	switch (tp->t_state) {
741 
742 	case TCPS_CLOSED:
743 	case TCPS_LISTEN:
744 	case TCPS_SYN_SENT:
745 		tp->t_state = TCPS_CLOSED;
746 		tp = tcp_close(tp);
747 		break;
748 
749 	case TCPS_SYN_RECEIVED:
750 	case TCPS_ESTABLISHED:
751 		tp->t_state = TCPS_FIN_WAIT_1;
752 		break;
753 
754 	case TCPS_CLOSE_WAIT:
755 		tp->t_state = TCPS_LAST_ACK;
756 		break;
757 	}
758 	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
759 		soisdisconnected(tp->t_inpcb->inp_socket);
760 		/*
761 		 * If we are in FIN_WAIT_2, we arrived here because the
762 		 * application did a shutdown of the send side.  Like the
763 		 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
764 		 * a full close, we start a timer to make sure sockets are
765 		 * not left in FIN_WAIT_2 forever.
766 		 */
767 		if (tp->t_state == TCPS_FIN_WAIT_2)
768 			TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
769 	}
770 	return (tp);
771 }
772 
773 /*
774  * Look up a socket for ident or tcpdrop, ...
775  */
776 int
777 tcp_ident(oldp, oldlenp, newp, newlen, dodrop)
778 	void *oldp;
779 	size_t *oldlenp;
780 	void *newp;
781 	size_t newlen;
782 	int dodrop;
783 {
784 	int error = 0, s;
785 	struct tcp_ident_mapping tir;
786 	struct inpcb *inp;
787 	struct tcpcb *tp = NULL;
788 	struct sockaddr_in *fin, *lin;
789 #ifdef INET6
790 	struct sockaddr_in6 *fin6, *lin6;
791 	struct in6_addr f6, l6;
792 #endif
793 	if (dodrop) {
794 		if (oldp != NULL || *oldlenp != 0)
795 			return (EINVAL);
796 		if (newp == NULL)
797 			return (EPERM);
798 		if (newlen < sizeof(tir))
799 			return (ENOMEM);
800 		if ((error = copyin(newp, &tir, sizeof (tir))) != 0 )
801 			return (error);
802 	} else {
803 		if (oldp == NULL)
804 			return (EINVAL);
805 		if (*oldlenp < sizeof(tir))
806 			return (ENOMEM);
807 		if (newp != NULL || newlen != 0)
808 			return (EINVAL);
809 		if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 )
810 			return (error);
811 	}
812 	switch (tir.faddr.ss_family) {
813 #ifdef INET6
814 	case AF_INET6:
815 		fin6 = (struct sockaddr_in6 *)&tir.faddr;
816 		error = in6_embedscope(&f6, fin6, NULL, NULL);
817 		if (error)
818 			return EINVAL;	/*?*/
819 		lin6 = (struct sockaddr_in6 *)&tir.laddr;
820 		error = in6_embedscope(&l6, lin6, NULL, NULL);
821 		if (error)
822 			return EINVAL;	/*?*/
823 		break;
824 #endif
825 	case AF_INET:
826 	  	fin = (struct sockaddr_in *)&tir.faddr;
827 		lin = (struct sockaddr_in *)&tir.laddr;
828 		break;
829 	default:
830 		return (EINVAL);
831 	}
832 
833 	s = splsoftnet();
834 	switch (tir.faddr.ss_family) {
835 	case AF_INET6:
836 #ifdef INET6
837 		inp = in6_pcbhashlookup(&tcbtable, &f6,
838 		    fin6->sin6_port, &l6, lin6->sin6_port);
839 		break;
840 #endif
841 	case AF_INET:
842 		inp = in_pcbhashlookup(&tcbtable,  fin->sin_addr,
843 		    fin->sin_port, lin->sin_addr, lin->sin_port);
844 		break;
845 	}
846 
847 	if (dodrop) {
848 		if (inp && (tp = intotcpcb(inp)) &&
849 		    ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0))
850 			tp = tcp_drop(tp, ECONNABORTED);
851 		else
852 			error = ESRCH;
853 		splx(s);
854 		return (error);
855 	}
856 
857 	if (inp == NULL) {
858 		++tcpstat.tcps_pcbhashmiss;
859 		switch (tir.faddr.ss_family) {
860 #ifdef INET6
861 		case AF_INET6:
862 			inp = in6_pcblookup_listen(&tcbtable,
863 			    &l6, lin6->sin6_port, 0);
864 			break;
865 #endif
866 		case AF_INET:
867 			inp = in_pcblookup_listen(&tcbtable,
868 			    lin->sin_addr, lin->sin_port, 0);
869 			break;
870 		}
871 	}
872 
873 	if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) {
874 		tir.ruid = inp->inp_socket->so_ruid;
875 		tir.euid = inp->inp_socket->so_euid;
876 	} else {
877 		tir.ruid = -1;
878 		tir.euid = -1;
879 	}
880 	splx(s);
881 
882 	*oldlenp = sizeof (tir);
883 	error = copyout((void *)&tir, oldp, sizeof (tir));
884 	return (error);
885 }
886 
887 /*
888  * Sysctl for tcp variables.
889  */
890 int
891 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
892 	int *name;
893 	u_int namelen;
894 	void *oldp;
895 	size_t *oldlenp;
896 	void *newp;
897 	size_t newlen;
898 {
899 	int error, nval;
900 
901 	/* All sysctl names at this level are terminal. */
902 	if (namelen != 1)
903 		return (ENOTDIR);
904 
905 	switch (name[0]) {
906 #ifdef TCP_SACK
907 	case TCPCTL_SACK:
908 		return (sysctl_int(oldp, oldlenp, newp, newlen,
909 		    &tcp_do_sack));
910 #endif
911 	case TCPCTL_SLOWHZ:
912 		return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ));
913 
914 	case TCPCTL_BADDYNAMIC:
915 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
916 		    baddynamicports.tcp, sizeof(baddynamicports.tcp)));
917 
918 	case TCPCTL_IDENT:
919 		return (tcp_ident(oldp, oldlenp, newp, newlen, 0));
920 
921 	case TCPCTL_DROP:
922 		return (tcp_ident(oldp, oldlenp, newp, newlen, 1));
923 
924 #ifdef TCP_ECN
925 	case TCPCTL_ECN:
926 		return (sysctl_int(oldp, oldlenp, newp, newlen,
927 		   &tcp_do_ecn));
928 #endif
929 	case TCPCTL_REASS_LIMIT:
930 		nval = tcp_reass_limit;
931 		error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
932 		if (error)
933 			return (error);
934 		if (nval != tcp_reass_limit) {
935 			error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0);
936 			if (error)
937 				return (error);
938 			tcp_reass_limit = nval;
939 		}
940 		return (0);
941 	default:
942 		if (name[0] < TCPCTL_MAXID)
943 			return (sysctl_int_arr(tcpctl_vars, name, namelen,
944 			    oldp, oldlenp, newp, newlen));
945 		return (ENOPROTOOPT);
946 	}
947 	/* NOTREACHED */
948 }
949