xref: /netbsd-src/sys/netinet/in_pcb.c (revision 9ee9e0d7de4c59c936a17df52be682915dc66f43)
1 /*	$NetBSD: in_pcb.c,v 1.135 2009/04/30 20:26:09 elad Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1998 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to The NetBSD Foundation
37  * by Public Access Networks Corporation ("Panix").  It was developed under
38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59  * POSSIBILITY OF SUCH DAMAGE.
60  */
61 
62 /*
63  * Copyright (c) 1982, 1986, 1991, 1993, 1995
64  *	The Regents of the University of California.  All rights reserved.
65  *
66  * Redistribution and use in source and binary forms, with or without
67  * modification, are permitted provided that the following conditions
68  * are met:
69  * 1. Redistributions of source code must retain the above copyright
70  *    notice, this list of conditions and the following disclaimer.
71  * 2. Redistributions in binary form must reproduce the above copyright
72  *    notice, this list of conditions and the following disclaimer in the
73  *    documentation and/or other materials provided with the distribution.
74  * 3. Neither the name of the University nor the names of its contributors
75  *    may be used to endorse or promote products derived from this software
76  *    without specific prior written permission.
77  *
78  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88  * SUCH DAMAGE.
89  *
90  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
91  */
92 
93 #include <sys/cdefs.h>
94 __KERNEL_RCSID(0, "$NetBSD: in_pcb.c,v 1.135 2009/04/30 20:26:09 elad Exp $");
95 
96 #include "opt_inet.h"
97 #include "opt_ipsec.h"
98 
99 #include <sys/param.h>
100 #include <sys/systm.h>
101 #include <sys/malloc.h>
102 #include <sys/mbuf.h>
103 #include <sys/protosw.h>
104 #include <sys/socket.h>
105 #include <sys/socketvar.h>
106 #include <sys/ioctl.h>
107 #include <sys/errno.h>
108 #include <sys/time.h>
109 #include <sys/once.h>
110 #include <sys/pool.h>
111 #include <sys/proc.h>
112 #include <sys/kauth.h>
113 #include <sys/uidinfo.h>
114 #include <sys/domain.h>
115 
116 #include <net/if.h>
117 #include <net/route.h>
118 
119 #include <netinet/in.h>
120 #include <netinet/in_systm.h>
121 #include <netinet/ip.h>
122 #include <netinet/in_pcb.h>
123 #include <netinet/in_var.h>
124 #include <netinet/ip_var.h>
125 
126 #ifdef INET6
127 #include <netinet/ip6.h>
128 #include <netinet6/ip6_var.h>
129 #include <netinet6/in6_pcb.h>
130 #endif
131 
132 #ifdef IPSEC
133 #include <netinet6/ipsec.h>
134 #include <netkey/key.h>
135 #elif FAST_IPSEC
136 #include <netipsec/ipsec.h>
137 #include <netipsec/key.h>
138 #endif /* IPSEC */
139 
140 struct	in_addr zeroin_addr;
141 
142 #define	INPCBHASH_PORT(table, lport) \
143 	&(table)->inpt_porthashtbl[ntohs(lport) & (table)->inpt_porthash]
144 #define	INPCBHASH_BIND(table, laddr, lport) \
145 	&(table)->inpt_bindhashtbl[ \
146 	    ((ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_bindhash]
147 #define	INPCBHASH_CONNECT(table, faddr, fport, laddr, lport) \
148 	&(table)->inpt_connecthashtbl[ \
149 	    ((ntohl((faddr).s_addr) + ntohs(fport)) + \
150 	     (ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_connecthash]
151 
152 int	anonportmin = IPPORT_ANONMIN;
153 int	anonportmax = IPPORT_ANONMAX;
154 int	lowportmin  = IPPORT_RESERVEDMIN;
155 int	lowportmax  = IPPORT_RESERVEDMAX;
156 
157 static struct pool inpcb_pool;
158 
159 static int
160 inpcb_poolinit(void)
161 {
162 
163 	pool_init(&inpcb_pool, sizeof(struct inpcb), 0, 0, 0, "inpcbpl", NULL,
164 	    IPL_NET);
165 	return 0;
166 }
167 
168 void
169 in_pcbinit(struct inpcbtable *table, int bindhashsize, int connecthashsize)
170 {
171 	static ONCE_DECL(control);
172 
173 	CIRCLEQ_INIT(&table->inpt_queue);
174 	table->inpt_porthashtbl = hashinit(bindhashsize, HASH_LIST, true,
175 	    &table->inpt_porthash);
176 	table->inpt_bindhashtbl = hashinit(bindhashsize, HASH_LIST, true,
177 	    &table->inpt_bindhash);
178 	table->inpt_connecthashtbl = hashinit(connecthashsize, HASH_LIST, true,
179 	    &table->inpt_connecthash);
180 	table->inpt_lastlow = IPPORT_RESERVEDMAX;
181 	table->inpt_lastport = (u_int16_t)anonportmax;
182 
183 	RUN_ONCE(&control, inpcb_poolinit);
184 }
185 
186 int
187 in_pcballoc(struct socket *so, void *v)
188 {
189 	struct inpcbtable *table = v;
190 	struct inpcb *inp;
191 	int s;
192 #if defined(IPSEC) || defined(FAST_IPSEC)
193 	int error;
194 #endif
195 
196 	s = splnet();
197 	inp = pool_get(&inpcb_pool, PR_NOWAIT);
198 	splx(s);
199 	if (inp == NULL)
200 		return (ENOBUFS);
201 	memset((void *)inp, 0, sizeof(*inp));
202 	inp->inp_af = AF_INET;
203 	inp->inp_table = table;
204 	inp->inp_socket = so;
205 	inp->inp_errormtu = -1;
206 #if defined(IPSEC) || defined(FAST_IPSEC)
207 	error = ipsec_init_pcbpolicy(so, &inp->inp_sp);
208 	if (error != 0) {
209 		s = splnet();
210 		pool_put(&inpcb_pool, inp);
211 		splx(s);
212 		return error;
213 	}
214 #endif
215 	so->so_pcb = inp;
216 	s = splnet();
217 	CIRCLEQ_INSERT_HEAD(&table->inpt_queue, &inp->inp_head,
218 	    inph_queue);
219 	LIST_INSERT_HEAD(INPCBHASH_PORT(table, inp->inp_lport), &inp->inp_head,
220 	    inph_lhash);
221 	in_pcbstate(inp, INP_ATTACHED);
222 	splx(s);
223 	return (0);
224 }
225 
226 static int
227 in_pcbsetport(struct sockaddr_in *sin, struct inpcb *inp, kauth_cred_t cred)
228 {
229 	struct inpcbtable *table = inp->inp_table;
230 	struct socket *so = inp->inp_socket;
231 	int	   cnt;
232 	u_int16_t  mymin, mymax;
233 	u_int16_t *lastport;
234 	u_int16_t lport = 0;
235 	enum kauth_network_req req;
236 	int error;
237 
238 	if (inp->inp_flags & INP_LOWPORT) {
239 #ifndef IPNOPRIVPORTS
240 		req = KAUTH_REQ_NETWORK_BIND_PRIVPORT;
241 #else
242 		req = KAUTH_REQ_NETWORK_BIND_PORT;
243 #endif
244 
245 		mymin = lowportmin;
246 		mymax = lowportmax;
247 		lastport = &table->inpt_lastlow;
248 	} else {
249 		req = KAUTH_REQ_NETWORK_BIND_PORT;
250 
251 		mymin = anonportmin;
252 		mymax = anonportmax;
253 		lastport = &table->inpt_lastport;
254 	}
255 
256 	/* XXX-kauth: KAUTH_REQ_NETWORK_BIND_AUTOASSIGN_{,PRIV}PORT */
257 	error = kauth_authorize_network(cred, KAUTH_NETWORK_BIND, req, so, sin,
258 	    NULL);
259 	if (error)
260 		return (error);
261 
262 	if (mymin > mymax) {	/* sanity check */
263 		u_int16_t swp;
264 
265 		swp = mymin;
266 		mymin = mymax;
267 		mymax = swp;
268 	}
269 
270 	lport = *lastport - 1;
271 	for (cnt = mymax - mymin + 1; cnt; cnt--, lport--) {
272 		if (lport < mymin || lport > mymax)
273 			lport = mymax;
274 		if (!in_pcblookup_port(table, sin->sin_addr, htons(lport), 1)) {
275 			/* We have a free port, check with the secmodel(s). */
276 			sin->sin_port = lport;
277 			error = kauth_authorize_network(cred,
278 			    KAUTH_NETWORK_BIND, req, so, sin, NULL);
279 			if (error) {
280 				/* Secmodel says no. Keep looking. */
281 				continue;
282 			}
283 
284 			goto found;
285 		}
286 	}
287 
288 	return (EAGAIN);
289 
290  found:
291 	inp->inp_flags |= INP_ANONPORT;
292 	*lastport = lport;
293 	lport = htons(lport);
294 	inp->inp_lport = lport;
295 	in_pcbstate(inp, INP_BOUND);
296 
297 	return (0);
298 }
299 
300 static int
301 in_pcbbind_addr(struct inpcb *inp, struct sockaddr_in *sin, kauth_cred_t cred)
302 {
303 	if (sin->sin_family != AF_INET)
304 		return (EAFNOSUPPORT);
305 
306 	if (!in_nullhost(sin->sin_addr)) {
307 		struct in_ifaddr *ia = NULL;
308 
309 		INADDR_TO_IA(sin->sin_addr, ia);
310 		/* check for broadcast addresses */
311 		if (ia == NULL)
312 			ia = ifatoia(ifa_ifwithaddr(sintosa(sin)));
313 		if (ia == NULL)
314 			return (EADDRNOTAVAIL);
315 	}
316 
317 	inp->inp_laddr = sin->sin_addr;
318 
319 	return (0);
320 }
321 
322 static int
323 in_pcbbind_port(struct inpcb *inp, struct sockaddr_in *sin, kauth_cred_t cred)
324 {
325 	struct inpcbtable *table = inp->inp_table;
326 	struct socket *so = inp->inp_socket;
327 	int reuseport = (so->so_options & SO_REUSEPORT);
328 	int wild = 0, error;
329 
330 	if (IN_MULTICAST(sin->sin_addr.s_addr)) {
331 		/*
332 		 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
333 		 * allow complete duplication of binding if
334 		 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
335 		 * and a multicast address is bound on both
336 		 * new and duplicated sockets.
337 		 */
338 		if (so->so_options & SO_REUSEADDR)
339 			reuseport = SO_REUSEADDR|SO_REUSEPORT;
340 	}
341 
342 	if (sin->sin_port == 0) {
343 		error = in_pcbsetport(sin, inp, cred);
344 		if (error)
345 			return (error);
346 	} else {
347 		struct inpcb *t;
348 #ifdef INET6
349 		struct in6pcb *t6;
350 		struct in6_addr mapped;
351 #endif
352 		enum kauth_network_req req;
353 
354 		if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
355 			wild = 1;
356 
357 #ifndef IPNOPRIVPORTS
358 		if (ntohs(sin->sin_port) < IPPORT_RESERVED)
359 			req = KAUTH_REQ_NETWORK_BIND_PRIVPORT;
360 		else
361 #endif /* !IPNOPRIVPORTS */
362 			req = KAUTH_REQ_NETWORK_BIND_PORT;
363 
364 		error = kauth_authorize_network(cred, KAUTH_NETWORK_BIND, req,
365 		    so, sin, NULL);
366 		if (error)
367 			return (error);
368 
369 #ifdef INET6
370 		memset(&mapped, 0, sizeof(mapped));
371 		mapped.s6_addr16[5] = 0xffff;
372 		memcpy(&mapped.s6_addr32[3], &sin->sin_addr,
373 		    sizeof(mapped.s6_addr32[3]));
374 		t6 = in6_pcblookup_port(table, &mapped, sin->sin_port, wild);
375 		if (t6 && (reuseport & t6->in6p_socket->so_options) == 0)
376 			return (EADDRINUSE);
377 #endif
378 
379 		/* XXX-kauth */
380 		if (so->so_uidinfo->ui_uid && !IN_MULTICAST(sin->sin_addr.s_addr)) {
381 			t = in_pcblookup_port(table, sin->sin_addr, sin->sin_port, 1);
382 			/*
383 			 * XXX:	investigate ramifications of loosening this
384 			 *	restriction so that as long as both ports have
385 			 *	SO_REUSEPORT allow the bind
386 			 */
387 			if (t &&
388 			    (!in_nullhost(sin->sin_addr) ||
389 			     !in_nullhost(t->inp_laddr) ||
390 			     (t->inp_socket->so_options & SO_REUSEPORT) == 0)
391 			    && (so->so_uidinfo->ui_uid != t->inp_socket->so_uidinfo->ui_uid)) {
392 				return (EADDRINUSE);
393 			}
394 		}
395 		t = in_pcblookup_port(table, sin->sin_addr, sin->sin_port, wild);
396 		if (t && (reuseport & t->inp_socket->so_options) == 0)
397 			return (EADDRINUSE);
398 
399 		inp->inp_lport = sin->sin_port;
400 		in_pcbstate(inp, INP_BOUND);
401 	}
402 
403 	LIST_REMOVE(&inp->inp_head, inph_lhash);
404 	LIST_INSERT_HEAD(INPCBHASH_PORT(table, inp->inp_lport), &inp->inp_head,
405 	    inph_lhash);
406 
407 	return (0);
408 }
409 
410 int
411 in_pcbbind(void *v, struct mbuf *nam, struct lwp *l)
412 {
413 	struct inpcb *inp = v;
414 	struct sockaddr_in *sin = NULL; /* XXXGCC */
415 	struct sockaddr_in lsin;
416 	int error;
417 
418 	if (inp->inp_af != AF_INET)
419 		return (EINVAL);
420 
421 	if (TAILQ_FIRST(&in_ifaddrhead) == 0)
422 		return (EADDRNOTAVAIL);
423 	if (inp->inp_lport || !in_nullhost(inp->inp_laddr))
424 		return (EINVAL);
425 
426 	if (nam != NULL) {
427 		sin = mtod(nam, struct sockaddr_in *);
428 		if (nam->m_len != sizeof (*sin))
429 			return (EINVAL);
430 	} else {
431 		lsin = *((const struct sockaddr_in *)
432 		    inp->inp_socket->so_proto->pr_domain->dom_sa_any);
433 		sin = &lsin;
434 	}
435 
436 	/* Bind address. */
437 	error = in_pcbbind_addr(inp, sin, l->l_cred);
438 	if (error)
439 		return (error);
440 
441 	/* Bind port. */
442 	error = in_pcbbind_port(inp, sin, l->l_cred);
443 	if (error) {
444 		inp->inp_laddr.s_addr = INADDR_ANY;
445 
446 		return (error);
447 	}
448 
449 	return (0);
450 }
451 
452 /*
453  * Connect from a socket to a specified address.
454  * Both address and port must be specified in argument sin.
455  * If don't have a local address for this socket yet,
456  * then pick one.
457  */
458 int
459 in_pcbconnect(void *v, struct mbuf *nam, struct lwp *l)
460 {
461 	struct inpcb *inp = v;
462 	struct in_ifaddr *ia = NULL;
463 	struct sockaddr_in *ifaddr = NULL;
464 	struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
465 	int error;
466 
467 	if (inp->inp_af != AF_INET)
468 		return (EINVAL);
469 
470 	if (nam->m_len != sizeof (*sin))
471 		return (EINVAL);
472 	if (sin->sin_family != AF_INET)
473 		return (EAFNOSUPPORT);
474 	if (sin->sin_port == 0)
475 		return (EADDRNOTAVAIL);
476 	if (TAILQ_FIRST(&in_ifaddrhead) != 0) {
477 		/*
478 		 * If the destination address is INADDR_ANY,
479 		 * use any local address (likely loopback).
480 		 * If the supplied address is INADDR_BROADCAST,
481 		 * use the broadcast address of an interface
482 		 * which supports broadcast. (loopback does not)
483 		 */
484 
485 		if (in_nullhost(sin->sin_addr)) {
486 			sin->sin_addr =
487 			    TAILQ_FIRST(&in_ifaddrhead)->ia_addr.sin_addr;
488 		} else if (sin->sin_addr.s_addr == INADDR_BROADCAST) {
489 			TAILQ_FOREACH(ia, &in_ifaddrhead, ia_list) {
490 				if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
491 					sin->sin_addr =
492 					    ia->ia_broadaddr.sin_addr;
493 					break;
494 				}
495 			}
496 		}
497 	}
498 	/*
499 	 * If we haven't bound which network number to use as ours,
500 	 * we will use the number of the outgoing interface.
501 	 * This depends on having done a routing lookup, which
502 	 * we will probably have to do anyway, so we might
503 	 * as well do it now.  On the other hand if we are
504 	 * sending to multiple destinations we may have already
505 	 * done the lookup, so see if we can use the route
506 	 * from before.  In any case, we only
507 	 * chose a port number once, even if sending to multiple
508 	 * destinations.
509 	 */
510 	if (in_nullhost(inp->inp_laddr)) {
511 		int xerror;
512 		ifaddr = in_selectsrc(sin, &inp->inp_route,
513 		    inp->inp_socket->so_options, inp->inp_moptions, &xerror);
514 		if (ifaddr == NULL) {
515 			if (xerror == 0)
516 				xerror = EADDRNOTAVAIL;
517 			return xerror;
518 		}
519 		INADDR_TO_IA(ifaddr->sin_addr, ia);
520 		if (ia == NULL)
521 			return (EADDRNOTAVAIL);
522 	}
523 	if (in_pcblookup_connect(inp->inp_table, sin->sin_addr, sin->sin_port,
524 	    !in_nullhost(inp->inp_laddr) ? inp->inp_laddr : ifaddr->sin_addr,
525 	    inp->inp_lport) != 0)
526 		return (EADDRINUSE);
527 	if (in_nullhost(inp->inp_laddr)) {
528 		if (inp->inp_lport == 0) {
529 			error = in_pcbbind(inp, NULL, l);
530 			/*
531 			 * This used to ignore the return value
532 			 * completely, but we need to check for
533 			 * ephemeral port shortage.
534 			 * And attempts to request low ports if not root.
535 			 */
536 			if (error != 0)
537 				return (error);
538 		}
539 		inp->inp_laddr = ifaddr->sin_addr;
540 	}
541 	inp->inp_faddr = sin->sin_addr;
542 	inp->inp_fport = sin->sin_port;
543 	in_pcbstate(inp, INP_CONNECTED);
544 #if defined(IPSEC) || defined(FAST_IPSEC)
545 	if (inp->inp_socket->so_type == SOCK_STREAM)
546 		ipsec_pcbconn(inp->inp_sp);
547 #endif
548 	return (0);
549 }
550 
551 void
552 in_pcbdisconnect(void *v)
553 {
554 	struct inpcb *inp = v;
555 
556 	if (inp->inp_af != AF_INET)
557 		return;
558 
559 	inp->inp_faddr = zeroin_addr;
560 	inp->inp_fport = 0;
561 	in_pcbstate(inp, INP_BOUND);
562 #if defined(IPSEC) || defined(FAST_IPSEC)
563 	ipsec_pcbdisconn(inp->inp_sp);
564 #endif
565 	if (inp->inp_socket->so_state & SS_NOFDREF)
566 		in_pcbdetach(inp);
567 }
568 
569 void
570 in_pcbdetach(void *v)
571 {
572 	struct inpcb *inp = v;
573 	struct socket *so = inp->inp_socket;
574 	int s;
575 
576 	if (inp->inp_af != AF_INET)
577 		return;
578 
579 #if defined(IPSEC) || defined(FAST_IPSEC)
580 	ipsec4_delete_pcbpolicy(inp);
581 #endif /*IPSEC*/
582 	so->so_pcb = 0;
583 	if (inp->inp_options)
584 		(void)m_free(inp->inp_options);
585 	rtcache_free(&inp->inp_route);
586 	ip_freemoptions(inp->inp_moptions);
587 	s = splnet();
588 	in_pcbstate(inp, INP_ATTACHED);
589 	LIST_REMOVE(&inp->inp_head, inph_lhash);
590 	CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, &inp->inp_head,
591 	    inph_queue);
592 	pool_put(&inpcb_pool, inp);
593 	splx(s);
594 	sofree(so);			/* drops the socket's lock */
595 	mutex_enter(softnet_lock);	/* reacquire the softnet_lock */
596 }
597 
598 void
599 in_setsockaddr(struct inpcb *inp, struct mbuf *nam)
600 {
601 	struct sockaddr_in *sin;
602 
603 	if (inp->inp_af != AF_INET)
604 		return;
605 
606 	sin = mtod(nam, struct sockaddr_in *);
607 	sockaddr_in_init(sin, &inp->inp_laddr, inp->inp_lport);
608 	nam->m_len = sin->sin_len;
609 }
610 
611 void
612 in_setpeeraddr(struct inpcb *inp, struct mbuf *nam)
613 {
614 	struct sockaddr_in *sin;
615 
616 	if (inp->inp_af != AF_INET)
617 		return;
618 
619 	sin = mtod(nam, struct sockaddr_in *);
620 	sockaddr_in_init(sin, &inp->inp_faddr, inp->inp_fport);
621 	nam->m_len = sin->sin_len;
622 }
623 
624 /*
625  * Pass some notification to all connections of a protocol
626  * associated with address dst.  The local address and/or port numbers
627  * may be specified to limit the search.  The "usual action" will be
628  * taken, depending on the ctlinput cmd.  The caller must filter any
629  * cmds that are uninteresting (e.g., no error in the map).
630  * Call the protocol specific routine (if any) to report
631  * any errors for each matching socket.
632  *
633  * Must be called at splsoftnet.
634  */
635 int
636 in_pcbnotify(struct inpcbtable *table, struct in_addr faddr, u_int fport_arg,
637     struct in_addr laddr, u_int lport_arg, int errno,
638     void (*notify)(struct inpcb *, int))
639 {
640 	struct inpcbhead *head;
641 	struct inpcb *inp, *ninp;
642 	u_int16_t fport = fport_arg, lport = lport_arg;
643 	int nmatch;
644 
645 	if (in_nullhost(faddr) || notify == 0)
646 		return (0);
647 
648 	nmatch = 0;
649 	head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport);
650 	for (inp = (struct inpcb *)LIST_FIRST(head); inp != NULL; inp = ninp) {
651 		ninp = (struct inpcb *)LIST_NEXT(inp, inp_hash);
652 		if (inp->inp_af != AF_INET)
653 			continue;
654 		if (in_hosteq(inp->inp_faddr, faddr) &&
655 		    inp->inp_fport == fport &&
656 		    inp->inp_lport == lport &&
657 		    in_hosteq(inp->inp_laddr, laddr)) {
658 			(*notify)(inp, errno);
659 			nmatch++;
660 		}
661 	}
662 	return (nmatch);
663 }
664 
665 void
666 in_pcbnotifyall(struct inpcbtable *table, struct in_addr faddr, int errno,
667     void (*notify)(struct inpcb *, int))
668 {
669 	struct inpcb *inp, *ninp;
670 
671 	if (in_nullhost(faddr) || notify == 0)
672 		return;
673 
674 	for (inp = (struct inpcb *)CIRCLEQ_FIRST(&table->inpt_queue);
675 	    inp != (void *)&table->inpt_queue;
676 	    inp = ninp) {
677 		ninp = (struct inpcb *)CIRCLEQ_NEXT(inp, inp_queue);
678 		if (inp->inp_af != AF_INET)
679 			continue;
680 		if (in_hosteq(inp->inp_faddr, faddr))
681 			(*notify)(inp, errno);
682 	}
683 }
684 
685 void
686 in_pcbpurgeif0(struct inpcbtable *table, struct ifnet *ifp)
687 {
688 	struct inpcb *inp, *ninp;
689 	struct ip_moptions *imo;
690 	int i, gap;
691 
692 	for (inp = (struct inpcb *)CIRCLEQ_FIRST(&table->inpt_queue);
693 	    inp != (void *)&table->inpt_queue;
694 	    inp = ninp) {
695 		ninp = (struct inpcb *)CIRCLEQ_NEXT(inp, inp_queue);
696 		if (inp->inp_af != AF_INET)
697 			continue;
698 		imo = inp->inp_moptions;
699 		if (imo != NULL) {
700 			/*
701 			 * Unselect the outgoing interface if it is being
702 			 * detached.
703 			 */
704 			if (imo->imo_multicast_ifp == ifp)
705 				imo->imo_multicast_ifp = NULL;
706 
707 			/*
708 			 * Drop multicast group membership if we joined
709 			 * through the interface being detached.
710 			 */
711 			for (i = 0, gap = 0; i < imo->imo_num_memberships;
712 			    i++) {
713 				if (imo->imo_membership[i]->inm_ifp == ifp) {
714 					in_delmulti(imo->imo_membership[i]);
715 					gap++;
716 				} else if (gap != 0)
717 					imo->imo_membership[i - gap] =
718 					    imo->imo_membership[i];
719 			}
720 			imo->imo_num_memberships -= gap;
721 		}
722 	}
723 }
724 
725 void
726 in_pcbpurgeif(struct inpcbtable *table, struct ifnet *ifp)
727 {
728 	struct rtentry *rt;
729 	struct inpcb *inp, *ninp;
730 
731 	for (inp = (struct inpcb *)CIRCLEQ_FIRST(&table->inpt_queue);
732 	    inp != (void *)&table->inpt_queue;
733 	    inp = ninp) {
734 		ninp = (struct inpcb *)CIRCLEQ_NEXT(inp, inp_queue);
735 		if (inp->inp_af != AF_INET)
736 			continue;
737 		if ((rt = rtcache_validate(&inp->inp_route)) != NULL &&
738 		    rt->rt_ifp == ifp)
739 			in_rtchange(inp, 0);
740 	}
741 }
742 
743 /*
744  * Check for alternatives when higher level complains
745  * about service problems.  For now, invalidate cached
746  * routing information.  If the route was created dynamically
747  * (by a redirect), time to try a default gateway again.
748  */
749 void
750 in_losing(struct inpcb *inp)
751 {
752 	struct rtentry *rt;
753 	struct rt_addrinfo info;
754 
755 	if (inp->inp_af != AF_INET)
756 		return;
757 
758 	if ((rt = rtcache_validate(&inp->inp_route)) == NULL)
759 		return;
760 
761 	memset(&info, 0, sizeof(info));
762 	info.rti_info[RTAX_DST] = rtcache_getdst(&inp->inp_route);
763 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
764 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
765 	rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
766 	if (rt->rt_flags & RTF_DYNAMIC)
767 		(void) rtrequest(RTM_DELETE, rt_getkey(rt),
768 			rt->rt_gateway, rt_mask(rt), rt->rt_flags,
769 			NULL);
770 	/*
771 	 * A new route can be allocated
772 	 * the next time output is attempted.
773 	 */
774 	rtcache_free(&inp->inp_route);
775 }
776 
777 /*
778  * After a routing change, flush old routing.  A new route can be
779  * allocated the next time output is attempted.
780  */
781 void
782 in_rtchange(struct inpcb *inp, int errno)
783 {
784 
785 	if (inp->inp_af != AF_INET)
786 		return;
787 
788 	rtcache_free(&inp->inp_route);
789 
790 	/* XXX SHOULD NOTIFY HIGHER-LEVEL PROTOCOLS */
791 }
792 
793 struct inpcb *
794 in_pcblookup_port(struct inpcbtable *table, struct in_addr laddr,
795     u_int lport_arg, int lookup_wildcard)
796 {
797 	struct inpcbhead *head;
798 	struct inpcb_hdr *inph;
799 	struct inpcb *inp, *match = 0;
800 	int matchwild = 3, wildcard;
801 	u_int16_t lport = lport_arg;
802 
803 	head = INPCBHASH_PORT(table, lport);
804 	LIST_FOREACH(inph, head, inph_lhash) {
805 		inp = (struct inpcb *)inph;
806 		if (inp->inp_af != AF_INET)
807 			continue;
808 
809 		if (inp->inp_lport != lport)
810 			continue;
811 		wildcard = 0;
812 		if (!in_nullhost(inp->inp_faddr))
813 			wildcard++;
814 		if (in_nullhost(inp->inp_laddr)) {
815 			if (!in_nullhost(laddr))
816 				wildcard++;
817 		} else {
818 			if (in_nullhost(laddr))
819 				wildcard++;
820 			else {
821 				if (!in_hosteq(inp->inp_laddr, laddr))
822 					continue;
823 			}
824 		}
825 		if (wildcard && !lookup_wildcard)
826 			continue;
827 		if (wildcard < matchwild) {
828 			match = inp;
829 			matchwild = wildcard;
830 			if (matchwild == 0)
831 				break;
832 		}
833 	}
834 	return (match);
835 }
836 
837 #ifdef DIAGNOSTIC
838 int	in_pcbnotifymiss = 0;
839 #endif
840 
841 struct inpcb *
842 in_pcblookup_connect(struct inpcbtable *table,
843     struct in_addr faddr, u_int fport_arg,
844     struct in_addr laddr, u_int lport_arg)
845 {
846 	struct inpcbhead *head;
847 	struct inpcb_hdr *inph;
848 	struct inpcb *inp;
849 	u_int16_t fport = fport_arg, lport = lport_arg;
850 
851 	head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport);
852 	LIST_FOREACH(inph, head, inph_hash) {
853 		inp = (struct inpcb *)inph;
854 		if (inp->inp_af != AF_INET)
855 			continue;
856 
857 		if (in_hosteq(inp->inp_faddr, faddr) &&
858 		    inp->inp_fport == fport &&
859 		    inp->inp_lport == lport &&
860 		    in_hosteq(inp->inp_laddr, laddr))
861 			goto out;
862 	}
863 #ifdef DIAGNOSTIC
864 	if (in_pcbnotifymiss) {
865 		printf("in_pcblookup_connect: faddr=%08x fport=%d laddr=%08x lport=%d\n",
866 		    ntohl(faddr.s_addr), ntohs(fport),
867 		    ntohl(laddr.s_addr), ntohs(lport));
868 	}
869 #endif
870 	return (0);
871 
872 out:
873 	/* Move this PCB to the head of hash chain. */
874 	inph = &inp->inp_head;
875 	if (inph != LIST_FIRST(head)) {
876 		LIST_REMOVE(inph, inph_hash);
877 		LIST_INSERT_HEAD(head, inph, inph_hash);
878 	}
879 	return (inp);
880 }
881 
882 struct inpcb *
883 in_pcblookup_bind(struct inpcbtable *table,
884     struct in_addr laddr, u_int lport_arg)
885 {
886 	struct inpcbhead *head;
887 	struct inpcb_hdr *inph;
888 	struct inpcb *inp;
889 	u_int16_t lport = lport_arg;
890 
891 	head = INPCBHASH_BIND(table, laddr, lport);
892 	LIST_FOREACH(inph, head, inph_hash) {
893 		inp = (struct inpcb *)inph;
894 		if (inp->inp_af != AF_INET)
895 			continue;
896 
897 		if (inp->inp_lport == lport &&
898 		    in_hosteq(inp->inp_laddr, laddr))
899 			goto out;
900 	}
901 	head = INPCBHASH_BIND(table, zeroin_addr, lport);
902 	LIST_FOREACH(inph, head, inph_hash) {
903 		inp = (struct inpcb *)inph;
904 		if (inp->inp_af != AF_INET)
905 			continue;
906 
907 		if (inp->inp_lport == lport &&
908 		    in_hosteq(inp->inp_laddr, zeroin_addr))
909 			goto out;
910 	}
911 #ifdef DIAGNOSTIC
912 	if (in_pcbnotifymiss) {
913 		printf("in_pcblookup_bind: laddr=%08x lport=%d\n",
914 		    ntohl(laddr.s_addr), ntohs(lport));
915 	}
916 #endif
917 	return (0);
918 
919 out:
920 	/* Move this PCB to the head of hash chain. */
921 	inph = &inp->inp_head;
922 	if (inph != LIST_FIRST(head)) {
923 		LIST_REMOVE(inph, inph_hash);
924 		LIST_INSERT_HEAD(head, inph, inph_hash);
925 	}
926 	return (inp);
927 }
928 
929 void
930 in_pcbstate(struct inpcb *inp, int state)
931 {
932 
933 	if (inp->inp_af != AF_INET)
934 		return;
935 
936 	if (inp->inp_state > INP_ATTACHED)
937 		LIST_REMOVE(&inp->inp_head, inph_hash);
938 
939 	switch (state) {
940 	case INP_BOUND:
941 		LIST_INSERT_HEAD(INPCBHASH_BIND(inp->inp_table,
942 		    inp->inp_laddr, inp->inp_lport), &inp->inp_head,
943 		    inph_hash);
944 		break;
945 	case INP_CONNECTED:
946 		LIST_INSERT_HEAD(INPCBHASH_CONNECT(inp->inp_table,
947 		    inp->inp_faddr, inp->inp_fport,
948 		    inp->inp_laddr, inp->inp_lport), &inp->inp_head,
949 		    inph_hash);
950 		break;
951 	}
952 
953 	inp->inp_state = state;
954 }
955 
956 struct rtentry *
957 in_pcbrtentry(struct inpcb *inp)
958 {
959 	struct route *ro;
960 	union {
961 		struct sockaddr		dst;
962 		struct sockaddr_in	dst4;
963 	} u;
964 
965 	if (inp->inp_af != AF_INET)
966 		return (NULL);
967 
968 	ro = &inp->inp_route;
969 
970 	sockaddr_in_init(&u.dst4, &inp->inp_faddr, 0);
971 	return rtcache_lookup(ro, &u.dst);
972 }
973 
974 struct sockaddr_in *
975 in_selectsrc(struct sockaddr_in *sin, struct route *ro,
976     int soopts, struct ip_moptions *mopts, int *errorp)
977 {
978 	struct rtentry *rt = NULL;
979 	struct in_ifaddr *ia = NULL;
980 
981 	/*
982          * If route is known or can be allocated now, take the
983          * source address from the interface.  Otherwise, punt.
984 	 */
985 	if ((soopts & SO_DONTROUTE) != 0)
986 		rtcache_free(ro);
987 	else {
988 		union {
989 			struct sockaddr		dst;
990 			struct sockaddr_in	dst4;
991 		} u;
992 
993 		sockaddr_in_init(&u.dst4, &sin->sin_addr, 0);
994 		rt = rtcache_lookup(ro, &u.dst);
995 	}
996 	/*
997 	 * If we found a route, use the address
998 	 * corresponding to the outgoing interface
999 	 * unless it is the loopback (in case a route
1000 	 * to our address on another net goes to loopback).
1001 	 *
1002 	 * XXX Is this still true?  Do we care?
1003 	 */
1004 	if (rt != NULL && (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
1005 		ia = ifatoia(rt->rt_ifa);
1006 	if (ia == NULL) {
1007 		u_int16_t fport = sin->sin_port;
1008 
1009 		sin->sin_port = 0;
1010 		ia = ifatoia(ifa_ifwithladdr(sintosa(sin)));
1011 		sin->sin_port = fport;
1012 		if (ia == NULL) {
1013 			/* Find 1st non-loopback AF_INET address */
1014 			TAILQ_FOREACH(ia, &in_ifaddrhead, ia_list) {
1015 				if (!(ia->ia_ifp->if_flags & IFF_LOOPBACK))
1016 					break;
1017 			}
1018 		}
1019 		if (ia == NULL) {
1020 			*errorp = EADDRNOTAVAIL;
1021 			return NULL;
1022 		}
1023 	}
1024 	/*
1025 	 * If the destination address is multicast and an outgoing
1026 	 * interface has been set as a multicast option, use the
1027 	 * address of that interface as our source address.
1028 	 */
1029 	if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) {
1030 		struct ip_moptions *imo;
1031 		struct ifnet *ifp;
1032 
1033 		imo = mopts;
1034 		if (imo->imo_multicast_ifp != NULL) {
1035 			ifp = imo->imo_multicast_ifp;
1036 			IFP_TO_IA(ifp, ia);		/* XXX */
1037 			if (ia == 0) {
1038 				*errorp = EADDRNOTAVAIL;
1039 				return NULL;
1040 			}
1041 		}
1042 	}
1043 	if (ia->ia_ifa.ifa_getifa != NULL) {
1044 		ia = ifatoia((*ia->ia_ifa.ifa_getifa)(&ia->ia_ifa,
1045 		                                      sintosa(sin)));
1046 	}
1047 #ifdef GETIFA_DEBUG
1048 	else
1049 		printf("%s: missing ifa_getifa\n", __func__);
1050 #endif
1051 	return satosin(&ia->ia_addr);
1052 }
1053