xref: /openbsd-src/sys/netinet/in_pcb.c (revision 5054e3e78af0749a9bb00ba9a024b3ee2d90290f)
1 /*	$OpenBSD: in_pcb.c,v 1.108 2009/11/13 20:54:05 claudio Exp $	*/
2 /*	$NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  * 	This product includes software developed by the University of
46  * 	California, Berkeley and its contributors.
47  * 	This product includes software developed at the Information
48  * 	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include "pf.h"
72 
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/mbuf.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/proc.h>
80 #include <sys/domain.h>
81 #include <sys/pool.h>
82 
83 #include <net/if.h>
84 #include <net/route.h>
85 #include <net/pfvar.h>
86 
87 #include <netinet/in.h>
88 #include <netinet/in_systm.h>
89 #include <netinet/ip.h>
90 #include <netinet/in_pcb.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip_var.h>
93 #include <dev/rndvar.h>
94 
95 #include <sys/mount.h>
96 #include <nfs/nfsproto.h>
97 
98 #ifdef INET6
99 #include <netinet6/ip6_var.h>
100 #endif /* INET6 */
101 #ifdef IPSEC
102 #include <netinet/ip_esp.h>
103 #endif /* IPSEC */
104 
105 struct	in_addr zeroin_addr;
106 
107 extern int ipsec_auth_default_level;
108 extern int ipsec_esp_trans_default_level;
109 extern int ipsec_esp_network_default_level;
110 extern int ipsec_ipcomp_default_level;
111 
112 /*
113  * These configure the range of local port addresses assigned to
114  * "unspecified" outgoing connections/packets/whatever.
115  */
116 int ipport_firstauto = IPPORT_RESERVED;
117 int ipport_lastauto = IPPORT_USERRESERVED;
118 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO;
119 int ipport_hilastauto = IPPORT_HILASTAUTO;
120 
121 struct pool inpcb_pool;
122 int inpcb_pool_initialized = 0;
123 
124 #define	INPCBHASH(table, faddr, fport, laddr, lport, rdom) \
125 	&(table)->inpt_hashtbl[(ntohl((faddr)->s_addr) + \
126 	ntohs((fport)) + ntohs((lport)) + (rdom)) & (table->inpt_hash)]
127 
128 #define	IN6PCBHASH(table, faddr, fport, laddr, lport) \
129 	&(table)->inpt_hashtbl[(ntohl((faddr)->s6_addr32[0] ^ \
130 	(faddr)->s6_addr32[3]) + ntohs((fport)) + ntohs((lport))) & \
131 	(table->inpt_hash)]
132 
133 #define	INPCBLHASH(table, lport, rdom) \
134 	&(table)->inpt_lhashtbl[(ntohs((lport)) + (rdom)) & table->inpt_lhash]
135 
136 void
137 in_pcbinit(table, hashsize)
138 	struct inpcbtable *table;
139 	int hashsize;
140 {
141 
142 	CIRCLEQ_INIT(&table->inpt_queue);
143 	table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_NOWAIT,
144 	    &table->inpt_hash);
145 	if (table->inpt_hashtbl == NULL)
146 		panic("in_pcbinit: hashinit failed");
147 	table->inpt_lhashtbl = hashinit(hashsize, M_PCB, M_NOWAIT,
148 	    &table->inpt_lhash);
149 	if (table->inpt_lhashtbl == NULL)
150 		panic("in_pcbinit: hashinit failed for lport");
151 	table->inpt_lastport = 0;
152 }
153 
154 struct baddynamicports baddynamicports;
155 
156 /*
157  * Check if the specified port is invalid for dynamic allocation.
158  */
159 int
160 in_baddynamic(u_int16_t port, u_int16_t proto)
161 {
162 	switch (proto) {
163 	case IPPROTO_TCP:
164 		return (DP_ISSET(baddynamicports.tcp, port));
165 	case IPPROTO_UDP:
166 #ifdef IPSEC
167 		/* Cannot preset this as it is a sysctl */
168 		if (port == udpencap_port)
169 			return (1);
170 #endif
171 		return (DP_ISSET(baddynamicports.udp, port));
172 	default:
173 		return (0);
174 	}
175 }
176 
177 int
178 in_pcballoc(so, v)
179 	struct socket *so;
180 	void *v;
181 {
182 	struct inpcbtable *table = v;
183 	struct inpcb *inp;
184 	int s;
185 
186 	if (inpcb_pool_initialized == 0) {
187 		pool_init(&inpcb_pool, sizeof(struct inpcb), 0, 0, 0,
188 		    "inpcbpl", NULL);
189 		inpcb_pool_initialized = 1;
190 	}
191 	inp = pool_get(&inpcb_pool, PR_NOWAIT);
192 	if (inp == NULL)
193 		return (ENOBUFS);
194 	bzero((caddr_t)inp, sizeof(*inp));
195 	inp->inp_table = table;
196 	inp->inp_socket = so;
197 	inp->inp_seclevel[SL_AUTH] = ipsec_auth_default_level;
198 	inp->inp_seclevel[SL_ESP_TRANS] = ipsec_esp_trans_default_level;
199 	inp->inp_seclevel[SL_ESP_NETWORK] = ipsec_esp_network_default_level;
200 	inp->inp_seclevel[SL_IPCOMP] = ipsec_ipcomp_default_level;
201 	s = splnet();
202 	CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue);
203 	LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport,
204 	    inp->inp_rdomain), inp, inp_lhash);
205 	LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, inp->inp_fport,
206 	    &inp->inp_laddr, inp->inp_lport, inp->inp_rdomain),
207 	    inp, inp_hash);
208 	splx(s);
209 	so->so_pcb = inp;
210 	inp->inp_hops = -1;
211 
212 #ifdef INET6
213 	/*
214 	 * Small change in this function to set the INP_IPV6 flag so routines
215 	 * outside pcb-specific routines don't need to use sotopf(), and all
216 	 * of its pointer chasing, later.
217 	 */
218 	if (sotopf(so) == PF_INET6)
219 		inp->inp_flags = INP_IPV6;
220 	inp->in6p_cksum = -1;
221 #endif /* INET6 */
222 	return (0);
223 }
224 
225 int
226 in_pcbbind(v, nam, p)
227 	void *v;
228 	struct mbuf *nam;
229 	struct proc *p;
230 {
231 	struct inpcb *inp = v;
232 	struct socket *so = inp->inp_socket;
233 	struct inpcbtable *table = inp->inp_table;
234 	u_int16_t *lastport = &inp->inp_table->inpt_lastport;
235 	struct sockaddr_in *sin;
236 	u_int16_t lport = 0;
237 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
238 	int error;
239 
240 #ifdef INET6
241 	if (sotopf(so) == PF_INET6)
242 		return in6_pcbbind(inp, nam, p);
243 #endif /* INET6 */
244 
245 	if (TAILQ_EMPTY(&in_ifaddr))
246 		return (EADDRNOTAVAIL);
247 	if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
248 		return (EINVAL);
249 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
250 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
251 	     (so->so_options & SO_ACCEPTCONN) == 0))
252 		wild = INPLOOKUP_WILDCARD;
253 	if (nam) {
254 		sin = mtod(nam, struct sockaddr_in *);
255 		if (nam->m_len != sizeof (*sin))
256 			return (EINVAL);
257 #ifdef notdef
258 		/*
259 		 * We should check the family, but old programs
260 		 * incorrectly fail to initialize it.
261 		 */
262 		if (sin->sin_family != AF_INET)
263 			return (EAFNOSUPPORT);
264 #endif
265 		lport = sin->sin_port;
266 		if (IN_MULTICAST(sin->sin_addr.s_addr)) {
267 			/*
268 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
269 			 * allow complete duplication of binding if
270 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
271 			 * and a multicast address is bound on both
272 			 * new and duplicated sockets.
273 			 */
274 			if (so->so_options & SO_REUSEADDR)
275 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
276 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
277 			sin->sin_port = 0;		/* yech... */
278 			if (!(so->so_options & SO_BINDANY) &&
279 			    in_iawithaddr(sin->sin_addr, NULL,
280 			    inp->inp_rdomain) == 0)
281 				return (EADDRNOTAVAIL);
282 		}
283 		if (lport) {
284 			struct inpcb *t;
285 
286 			/* GROSS */
287 			if (ntohs(lport) < IPPORT_RESERVED &&
288 			    (error = suser(p, 0)))
289 				return (EACCES);
290 			if (so->so_euid) {
291 				t = in_pcblookup(table, &zeroin_addr, 0,
292 				    &sin->sin_addr, lport, INPLOOKUP_WILDCARD,
293 				    inp->inp_rdomain);
294 				if (t && (so->so_euid != t->inp_socket->so_euid))
295 					return (EADDRINUSE);
296 			}
297 			t = in_pcblookup(table, &zeroin_addr, 0,
298 			    &sin->sin_addr, lport, wild, inp->inp_rdomain);
299 			if (t && (reuseport & t->inp_socket->so_options) == 0)
300 				return (EADDRINUSE);
301 		}
302 		inp->inp_laddr = sin->sin_addr;
303 	}
304 	if (lport == 0) {
305 		u_int16_t first, last;
306 		int count;
307 
308 		if (inp->inp_flags & INP_HIGHPORT) {
309 			first = ipport_hifirstauto;	/* sysctl */
310 			last = ipport_hilastauto;
311 		} else if (inp->inp_flags & INP_LOWPORT) {
312 			if ((error = suser(p, 0)))
313 				return (EACCES);
314 			first = IPPORT_RESERVED-1; /* 1023 */
315 			last = 600;		   /* not IPPORT_RESERVED/2 */
316 		} else {
317 			first = ipport_firstauto;	/* sysctl */
318 			last  = ipport_lastauto;
319 		}
320 
321 		/*
322 		 * Simple check to ensure all ports are not used up causing
323 		 * a deadlock here.
324 		 *
325 		 * We split the two cases (up and down) so that the direction
326 		 * is not being tested on each round of the loop.
327 		 */
328 
329 		if (first > last) {
330 			/*
331 			 * counting down
332 			 */
333 			count = first - last;
334 			if (count)
335 				*lastport = first - arc4random_uniform(count);
336 
337 			do {
338 				if (count-- < 0)	/* completely used? */
339 					return (EADDRNOTAVAIL);
340 				--*lastport;
341 				if (*lastport > first || *lastport < last)
342 					*lastport = first;
343 				lport = htons(*lastport);
344 			} while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
345 			    in_pcblookup(table, &zeroin_addr, 0,
346 			    &inp->inp_laddr, lport, wild, inp->inp_rdomain));
347 		} else {
348 			/*
349 			 * counting up
350 			 */
351 			count = last - first;
352 			if (count)
353 				*lastport = first + arc4random_uniform(count);
354 
355 			do {
356 				if (count-- < 0)	/* completely used? */
357 					return (EADDRNOTAVAIL);
358 				++*lastport;
359 				if (*lastport < first || *lastport > last)
360 					*lastport = first;
361 				lport = htons(*lastport);
362 			} while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
363 			    in_pcblookup(table, &zeroin_addr, 0,
364 			    &inp->inp_laddr, lport, wild, inp->inp_rdomain));
365 		}
366 	}
367 	inp->inp_lport = lport;
368 	in_pcbrehash(inp);
369 	return (0);
370 }
371 
372 /*
373  * Connect from a socket to a specified address.
374  * Both address and port must be specified in argument sin.
375  * If don't have a local address for this socket yet,
376  * then pick one.
377  */
378 int
379 in_pcbconnect(v, nam)
380 	void *v;
381 	struct mbuf *nam;
382 {
383 	struct inpcb *inp = v;
384 	struct sockaddr_in *ifaddr = NULL;
385 	struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
386 
387 #ifdef INET6
388 	if (sotopf(inp->inp_socket) == PF_INET6)
389 		return (in6_pcbconnect(inp, nam));
390 	if ((inp->inp_flags & INP_IPV6) != 0)
391 		panic("IPv6 pcb passed into in_pcbconnect");
392 #endif /* INET6 */
393 
394 	if (nam->m_len != sizeof (*sin))
395 		return (EINVAL);
396 	if (sin->sin_family != AF_INET)
397 		return (EAFNOSUPPORT);
398 	if (sin->sin_port == 0)
399 		return (EADDRNOTAVAIL);
400 	if (!TAILQ_EMPTY(&in_ifaddr)) {
401 		/*
402 		 * If the destination address is INADDR_ANY,
403 		 * use the primary local address.
404 		 * If the supplied address is INADDR_BROADCAST,
405 		 * and the primary interface supports broadcast,
406 		 * choose the broadcast address for that interface.
407 		 */
408 		if (sin->sin_addr.s_addr == INADDR_ANY)
409 			sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_addr.sin_addr;
410 		else if (sin->sin_addr.s_addr == INADDR_BROADCAST &&
411 		  (TAILQ_FIRST(&in_ifaddr)->ia_ifp->if_flags & IFF_BROADCAST))
412 			sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_broadaddr.sin_addr;
413 	}
414 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
415 		int error;
416 		ifaddr = in_selectsrc(sin, &inp->inp_route,
417 			inp->inp_socket->so_options, inp->inp_moptions, &error,
418 			inp->inp_rdomain);
419 		if (ifaddr == NULL) {
420 			if (error == 0)
421 				error = EADDRNOTAVAIL;
422 			return error;
423 		}
424 	}
425 	if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port,
426 	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
427 	    inp->inp_lport, inp->inp_rdomain) != 0)
428 		return (EADDRINUSE);
429 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
430 		if (inp->inp_lport == 0 &&
431 		    in_pcbbind(inp, NULL, curproc) == EADDRNOTAVAIL)
432 			return (EADDRNOTAVAIL);
433 		inp->inp_laddr = ifaddr->sin_addr;
434 	}
435 	inp->inp_faddr = sin->sin_addr;
436 	inp->inp_fport = sin->sin_port;
437 	in_pcbrehash(inp);
438 #ifdef IPSEC
439 	{
440 		int error; /* This is just ignored */
441 
442 		/* Cause an IPsec SA to be established. */
443 		ipsp_spd_inp(NULL, AF_INET, 0, &error, IPSP_DIRECTION_OUT,
444 		    NULL, inp, NULL);
445 	}
446 #endif
447 	return (0);
448 }
449 
450 void
451 in_pcbdisconnect(v)
452 	void *v;
453 {
454 	struct inpcb *inp = v;
455 
456 	switch (sotopf(inp->inp_socket)) {
457 #ifdef INET6
458 	case PF_INET6:
459 		inp->inp_faddr6 = in6addr_any;
460 		break;
461 #endif
462 	case PF_INET:
463 		inp->inp_faddr.s_addr = INADDR_ANY;
464 		break;
465 	}
466 
467 	inp->inp_fport = 0;
468 	in_pcbrehash(inp);
469 	if (inp->inp_socket->so_state & SS_NOFDREF)
470 		in_pcbdetach(inp);
471 }
472 
473 void
474 in_pcbdetach(v)
475 	void *v;
476 {
477 	struct inpcb *inp = v;
478 	struct socket *so = inp->inp_socket;
479 	int s;
480 
481 	so->so_pcb = 0;
482 	sofree(so);
483 	if (inp->inp_options)
484 		m_freem(inp->inp_options);
485 	if (inp->inp_route.ro_rt)
486 		rtfree(inp->inp_route.ro_rt);
487 #ifdef INET6
488 	if (inp->inp_flags & INP_IPV6) {
489 		ip6_freepcbopts(inp->inp_outputopts6);
490 		ip6_freemoptions(inp->inp_moptions6);
491 	} else
492 #endif
493 		ip_freemoptions(inp->inp_moptions);
494 #ifdef IPSEC
495 	/* IPsec cleanup here */
496 	s = spltdb();
497 	if (inp->inp_tdb_in)
498 		TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
499 			     inp, inp_tdb_in_next);
500 	if (inp->inp_tdb_out)
501 	        TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, inp,
502 			     inp_tdb_out_next);
503 	if (inp->inp_ipsec_remotecred)
504 		ipsp_reffree(inp->inp_ipsec_remotecred);
505 	if (inp->inp_ipsec_remoteauth)
506 		ipsp_reffree(inp->inp_ipsec_remoteauth);
507 	if (inp->inp_ipo)
508 		ipsec_delete_policy(inp->inp_ipo);
509 	splx(s);
510 #endif
511 #if NPF > 0
512 	if (inp->inp_pf_sk)
513 		((struct pf_state_key *)inp->inp_pf_sk)->inp = NULL;
514 #endif
515 	s = splnet();
516 	LIST_REMOVE(inp, inp_lhash);
517 	LIST_REMOVE(inp, inp_hash);
518 	CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue);
519 	splx(s);
520 	pool_put(&inpcb_pool, inp);
521 }
522 
523 void
524 in_setsockaddr(inp, nam)
525 	struct inpcb *inp;
526 	struct mbuf *nam;
527 {
528 	struct sockaddr_in *sin;
529 
530 	nam->m_len = sizeof (*sin);
531 	sin = mtod(nam, struct sockaddr_in *);
532 	bzero((caddr_t)sin, sizeof (*sin));
533 	sin->sin_family = AF_INET;
534 	sin->sin_len = sizeof(*sin);
535 	sin->sin_port = inp->inp_lport;
536 	sin->sin_addr = inp->inp_laddr;
537 }
538 
539 void
540 in_setpeeraddr(inp, nam)
541 	struct inpcb *inp;
542 	struct mbuf *nam;
543 {
544 	struct sockaddr_in *sin;
545 
546 #ifdef INET6
547 	if (sotopf(inp->inp_socket) == PF_INET6) {
548 		in6_setpeeraddr(inp, nam);
549 		return;
550 	}
551 #endif /* INET6 */
552 
553 	nam->m_len = sizeof (*sin);
554 	sin = mtod(nam, struct sockaddr_in *);
555 	bzero((caddr_t)sin, sizeof (*sin));
556 	sin->sin_family = AF_INET;
557 	sin->sin_len = sizeof(*sin);
558 	sin->sin_port = inp->inp_fport;
559 	sin->sin_addr = inp->inp_faddr;
560 }
561 
562 /*
563  * Pass some notification to all connections of a protocol
564  * associated with address dst.  The "usual action" will be
565  * taken, depending on the ctlinput cmd.  The caller must filter any
566  * cmds that are uninteresting (e.g., no error in the map).
567  * Call the protocol specific routine (if any) to report
568  * any errors for each matching socket.
569  *
570  * Must be called at splsoftnet.
571  */
572 void
573 in_pcbnotifyall(table, dst, rdomain, errno, notify)
574 	struct inpcbtable *table;
575 	struct sockaddr *dst;
576 	u_int rdomain;
577 	int errno;
578 	void (*notify)(struct inpcb *, int);
579 {
580 	struct inpcb *inp, *oinp;
581 	struct in_addr faddr;
582 
583 	splsoftassert(IPL_SOFTNET);
584 
585 #ifdef INET6
586 	/*
587 	 * See in6_pcbnotify() for IPv6 codepath.  By the time this
588 	 * gets called, the addresses passed are either definitely IPv4 or
589 	 * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses.
590 	 */
591 #endif /* INET6 */
592 
593 	if (dst->sa_family != AF_INET)
594 		return;
595 	faddr = satosin(dst)->sin_addr;
596 	if (faddr.s_addr == INADDR_ANY)
597 		return;
598 
599 	for (inp = CIRCLEQ_FIRST(&table->inpt_queue);
600 	    inp != CIRCLEQ_END(&table->inpt_queue);) {
601 #ifdef INET6
602 		if (inp->inp_flags & INP_IPV6) {
603 			inp = CIRCLEQ_NEXT(inp, inp_queue);
604 			continue;
605 		}
606 #endif
607 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
608 		    inp->inp_rdomain != rdomain ||
609 		    inp->inp_socket == 0) {
610 			inp = CIRCLEQ_NEXT(inp, inp_queue);
611 			continue;
612 		}
613 		oinp = inp;
614 		inp = CIRCLEQ_NEXT(inp, inp_queue);
615 		if (notify)
616 			(*notify)(oinp, errno);
617 	}
618 }
619 
620 /*
621  * Check for alternatives when higher level complains
622  * about service problems.  For now, invalidate cached
623  * routing information.  If the route was created dynamically
624  * (by a redirect), time to try a default gateway again.
625  */
626 void
627 in_losing(inp)
628 	struct inpcb *inp;
629 {
630 	struct rtentry *rt;
631 	struct rt_addrinfo info;
632 
633 	if ((rt = inp->inp_route.ro_rt)) {
634 		inp->inp_route.ro_rt = 0;
635 		bzero((caddr_t)&info, sizeof(info));
636 		info.rti_flags = rt->rt_flags;
637 		info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst;
638 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
639 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
640 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, rt->rt_ifp, 0,
641 		    inp->inp_rdomain);
642 		if (rt->rt_flags & RTF_DYNAMIC)
643 			(void)rtrequest1(RTM_DELETE, &info, rt->rt_priority,
644 				(struct rtentry **)0, inp->inp_rdomain);
645 		/*
646 		 * A new route can be allocated
647 		 * the next time output is attempted.
648 		 * rtfree() needs to be called in anycase because the inp
649 		 * is still holding a reference to rt.
650 		 */
651 		rtfree(rt);
652 	}
653 }
654 
655 /*
656  * After a routing change, flush old routing
657  * and allocate a (hopefully) better one.
658  */
659 void
660 in_rtchange(inp, errno)
661 	struct inpcb *inp;
662 	int errno;
663 {
664 	if (inp->inp_route.ro_rt) {
665 		rtfree(inp->inp_route.ro_rt);
666 		inp->inp_route.ro_rt = 0;
667 		/*
668 		 * A new route can be allocated the next time
669 		 * output is attempted.
670 		 */
671 	}
672 }
673 
674 struct inpcb *
675 in_pcblookup(struct inpcbtable *table, void *faddrp, u_int fport_arg, void *laddrp, u_int lport_arg, int flags, u_int rdomain)
676 {
677 	struct inpcb *inp, *match = 0;
678 	int matchwild = 3, wildcard;
679 	u_int16_t fport = fport_arg, lport = lport_arg;
680 	struct in_addr faddr = *(struct in_addr *)faddrp;
681 	struct in_addr laddr = *(struct in_addr *)laddrp;
682 
683 	rdomain = rtable_l2(rdomain);
684 	for (inp = LIST_FIRST(INPCBLHASH(table, lport, rdomain)); inp;
685 	    inp = LIST_NEXT(inp, inp_lhash)) {
686 		if (inp->inp_rdomain != rdomain)
687 			continue;
688 		if (inp->inp_lport != lport)
689 			continue;
690 		wildcard = 0;
691 #ifdef INET6
692 		if (flags & INPLOOKUP_IPV6) {
693 			struct in6_addr *laddr6 = (struct in6_addr *)laddrp;
694 			struct in6_addr *faddr6 = (struct in6_addr *)faddrp;
695 
696 			if (!(inp->inp_flags & INP_IPV6))
697 				continue;
698 
699 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
700 				if (IN6_IS_ADDR_UNSPECIFIED(laddr6))
701 					wildcard++;
702 				else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr6))
703 					continue;
704 			} else {
705 				if (!IN6_IS_ADDR_UNSPECIFIED(laddr6))
706 					wildcard++;
707 			}
708 
709 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) {
710 				if (IN6_IS_ADDR_UNSPECIFIED(faddr6))
711 					wildcard++;
712 				else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6,
713 				    faddr6) || inp->inp_fport != fport)
714 					continue;
715 			} else {
716 				if (!IN6_IS_ADDR_UNSPECIFIED(faddr6))
717 					wildcard++;
718 			}
719 		} else
720 #endif /* INET6 */
721 		{
722 #ifdef INET6
723 		        if (inp->inp_flags & INP_IPV6)
724 			        continue;
725 #endif /* INET6 */
726 
727 			if (inp->inp_faddr.s_addr != INADDR_ANY) {
728 				if (faddr.s_addr == INADDR_ANY)
729 					wildcard++;
730 				else if (inp->inp_faddr.s_addr != faddr.s_addr ||
731 				    inp->inp_fport != fport)
732 					continue;
733 			} else {
734 				if (faddr.s_addr != INADDR_ANY)
735 					wildcard++;
736 			}
737 			if (inp->inp_laddr.s_addr != INADDR_ANY) {
738 				if (laddr.s_addr == INADDR_ANY)
739 					wildcard++;
740 				else if (inp->inp_laddr.s_addr != laddr.s_addr)
741 					continue;
742 			} else {
743 				if (laddr.s_addr != INADDR_ANY)
744 					wildcard++;
745 			}
746 		}
747 		if ((!wildcard || (flags & INPLOOKUP_WILDCARD)) &&
748 		    wildcard < matchwild) {
749 			match = inp;
750 			if ((matchwild = wildcard) == 0)
751 				break;
752 		}
753 	}
754 	return (match);
755 }
756 
757 struct rtentry *
758 in_pcbrtentry(inp)
759 	struct inpcb *inp;
760 {
761 	struct route *ro;
762 
763 	ro = &inp->inp_route;
764 
765 	/*
766 	 * No route yet, so try to acquire one.
767 	 */
768 	if (ro->ro_rt == NULL) {
769 #ifdef INET6
770 		bzero(ro, sizeof(struct route_in6));
771 #else
772 		bzero(ro, sizeof(struct route));
773 #endif
774 
775 		switch(sotopf(inp->inp_socket)) {
776 #ifdef INET6
777 		case PF_INET6:
778 			if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
779 				break;
780 			ro->ro_dst.sa_family = AF_INET6;
781 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in6);
782 			((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr =
783 			    inp->inp_faddr6;
784 			rtalloc_mpath(ro, &inp->inp_laddr6.s6_addr32[0], 0);
785 			break;
786 #endif /* INET6 */
787 		case PF_INET:
788 			if (inp->inp_faddr.s_addr == INADDR_ANY)
789 				break;
790 			ro->ro_dst.sa_family = AF_INET;
791 			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
792 			satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr;
793 			rtalloc_mpath(ro, &inp->inp_laddr.s_addr,
794 			    inp->inp_rdomain);
795 			break;
796 		}
797 	}
798 	return (ro->ro_rt);
799 }
800 
801 struct sockaddr_in *
802 in_selectsrc(struct sockaddr_in *sin, struct route *ro, int soopts,
803     struct ip_moptions *mopts, int *errorp, u_int rdomain)
804 {
805 	struct sockaddr_in *sin2;
806 	struct in_ifaddr *ia;
807 
808 	ia = (struct in_ifaddr *)0;
809 	/*
810 	 * If route is known or can be allocated now,
811 	 * our src addr is taken from the i/f, else punt.
812 	 */
813 	if (ro->ro_rt &&
814 	    (satosin(&ro->ro_dst)->sin_addr.s_addr !=
815 		sin->sin_addr.s_addr ||
816 	    soopts & SO_DONTROUTE)) {
817 		RTFREE(ro->ro_rt);
818 		ro->ro_rt = (struct rtentry *)0;
819 	}
820 	if ((soopts & SO_DONTROUTE) == 0 && /*XXX*/
821 	    (ro->ro_rt == (struct rtentry *)0 ||
822 	    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
823 		/* No route yet, so try to acquire one */
824 		ro->ro_dst.sa_family = AF_INET;
825 		ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
826 		satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
827 		rtalloc_mpath(ro, NULL, rdomain);
828 
829 		/*
830 		 * It is important to bzero out the rest of the
831 		 * struct sockaddr_in when mixing v6 & v4!
832 		 */
833 		sin2 = (struct sockaddr_in *)&ro->ro_dst;
834 		bzero(sin2->sin_zero, sizeof(sin2->sin_zero));
835 	}
836 	/*
837 	 * If we found a route, use the address
838 	 * corresponding to the outgoing interface
839 	 * unless it is the loopback (in case a route
840 	 * to our address on another net goes to loopback).
841 	 */
842 	if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
843 		ia = ifatoia(ro->ro_rt->rt_ifa);
844 	if (ia == 0) {
845 		u_int16_t fport = sin->sin_port;
846 
847 		sin->sin_port = 0;
848 		ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin), rdomain));
849 		if (ia == 0)
850 			ia = ifatoia(ifa_ifwithnet(sintosa(sin), rdomain));
851 		sin->sin_port = fport;
852 		if (ia == 0)
853 			ia = TAILQ_FIRST(&in_ifaddr);
854 		if (ia == 0) {
855 			*errorp = EADDRNOTAVAIL;
856 			return NULL;
857 		}
858 	}
859 	/*
860 	 * If the destination address is multicast and an outgoing
861 	 * interface has been set as a multicast option, use the
862 	 * address of that interface as our source address.
863 	 */
864 	if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) {
865 		struct ip_moptions *imo;
866 		struct ifnet *ifp;
867 
868 		imo = mopts;
869 		if (imo->imo_multicast_ifp != NULL) {
870 			ifp = imo->imo_multicast_ifp;
871 			TAILQ_FOREACH(ia, &in_ifaddr, ia_list)
872 				if (ia->ia_ifp == ifp)
873 					break;
874 			if (ia == 0) {
875 				*errorp = EADDRNOTAVAIL;
876 				return NULL;
877 			}
878 		}
879 	}
880 	return satosin(&ia->ia_addr);
881 }
882 
883 void
884 in_pcbrehash(inp)
885 	struct inpcb *inp;
886 {
887 	struct inpcbtable *table = inp->inp_table;
888 	int s;
889 
890 	s = splnet();
891 	LIST_REMOVE(inp, inp_lhash);
892 	LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport, inp->inp_rdomain),
893 	    inp, inp_lhash);
894 	LIST_REMOVE(inp, inp_hash);
895 #ifdef INET6
896 	if (inp->inp_flags & INP_IPV6) {
897 		LIST_INSERT_HEAD(IN6PCBHASH(table, &inp->inp_faddr6,
898 		    inp->inp_fport, &inp->inp_laddr6, inp->inp_lport),
899 		    inp, inp_hash);
900 	} else {
901 #endif /* INET6 */
902 		LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr,
903 		    inp->inp_fport, &inp->inp_laddr, inp->inp_lport,
904 		    inp->inp_rdomain), inp, inp_hash);
905 #ifdef INET6
906 	}
907 #endif /* INET6 */
908 	splx(s);
909 }
910 
911 #ifdef DIAGNOSTIC
912 int	in_pcbnotifymiss = 0;
913 #endif
914 
915 /*
916  * The in(6)_pcbhashlookup functions are used to locate connected sockets
917  * quickly:
918  * 		faddr.fport <-> laddr.lport
919  * No wildcard matching is done so that listening sockets are not found.
920  * If the functions return NULL in(6)_pcblookup_listen can be used to
921  * find a listening/bound socket that may accept the connection.
922  * After those two lookups no other are necessary.
923  */
924 struct inpcb *
925 in_pcbhashlookup(struct inpcbtable *table, struct in_addr faddr,
926     u_int fport_arg, struct in_addr laddr, u_int lport_arg, u_int rdomain)
927 {
928 	struct inpcbhead *head;
929 	struct inpcb *inp;
930 	u_int16_t fport = fport_arg, lport = lport_arg;
931 
932 	rdomain = rtable_l2(rdomain);
933 	head = INPCBHASH(table, &faddr, fport, &laddr, lport, rdomain);
934 	LIST_FOREACH(inp, head, inp_hash) {
935 #ifdef INET6
936 		if (inp->inp_flags & INP_IPV6)
937 			continue;	/*XXX*/
938 #endif
939 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
940 		    inp->inp_fport == fport &&
941 		    inp->inp_lport == lport &&
942 		    inp->inp_laddr.s_addr == laddr.s_addr &&
943 		    inp->inp_rdomain == rdomain) {
944 			/*
945 			 * Move this PCB to the head of hash chain so that
946 			 * repeated accesses are quicker.  This is analogous to
947 			 * the historic single-entry PCB cache.
948 			 */
949 			if (inp != LIST_FIRST(head)) {
950 				LIST_REMOVE(inp, inp_hash);
951 				LIST_INSERT_HEAD(head, inp, inp_hash);
952 			}
953 			break;
954 		}
955 	}
956 #ifdef DIAGNOSTIC
957 	if (inp == NULL && in_pcbnotifymiss) {
958 		printf("in_pcbhashlookup: faddr=%08x fport=%d laddr=%08x lport=%d rdom=%d\n",
959 		    ntohl(faddr.s_addr), ntohs(fport),
960 		    ntohl(laddr.s_addr), ntohs(lport), rdomain);
961 	}
962 #endif
963 	return (inp);
964 }
965 
966 #ifdef INET6
967 struct inpcb *
968 in6_pcbhashlookup(struct inpcbtable *table, struct in6_addr *faddr,
969     u_int fport_arg, struct in6_addr *laddr, u_int lport_arg)
970 {
971 	struct inpcbhead *head;
972 	struct inpcb *inp;
973 	u_int16_t fport = fport_arg, lport = lport_arg;
974 
975 	head = IN6PCBHASH(table, faddr, fport, laddr, lport);
976 	LIST_FOREACH(inp, head, inp_hash) {
977 		if (!(inp->inp_flags & INP_IPV6))
978 			continue;
979 		if (IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) &&
980 		    inp->inp_fport == fport && inp->inp_lport == lport &&
981 		    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr)) {
982 			/*
983 			 * Move this PCB to the head of hash chain so that
984 			 * repeated accesses are quicker.  This is analogous to
985 			 * the historic single-entry PCB cache.
986 			 */
987 			if (inp != LIST_FIRST(head)) {
988 				LIST_REMOVE(inp, inp_hash);
989 				LIST_INSERT_HEAD(head, inp, inp_hash);
990 			}
991 			break;
992 		}
993 	}
994 #ifdef DIAGNOSTIC
995 	if (inp == NULL && in_pcbnotifymiss) {
996 		printf("in6_pcbhashlookup: faddr=");
997 		printf(" fport=%d laddr=", ntohs(fport));
998 		printf(" lport=%d\n", ntohs(lport));
999 	}
1000 #endif
1001 	return (inp);
1002 }
1003 #endif /* INET6 */
1004 
1005 /*
1006  * The in(6)_pcblookup_listen functions are used to locate listening
1007  * sockets quickly.  This are sockets with unspecified foreign address
1008  * and port:
1009  *		*.*     <-> laddr.lport
1010  *		*.*     <->     *.lport
1011  */
1012 struct inpcb *
1013 in_pcblookup_listen(struct inpcbtable *table, struct in_addr laddr,
1014     u_int lport_arg, int reverse, struct mbuf *m, u_int rdomain)
1015 {
1016 	struct inpcbhead *head;
1017 	struct in_addr *key1, *key2;
1018 	struct inpcb *inp;
1019 	u_int16_t lport = lport_arg;
1020 
1021 	rdomain = rtable_l2(rdomain);
1022 #if NPF > 0
1023 	if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
1024 		struct pf_divert *divert;
1025 		/* XXX rdomain */
1026 		if ((divert = pf_find_divert(m)) == NULL)
1027 			return (NULL);
1028 		key1 = key2 = &divert->addr.ipv4;
1029 		lport = divert->port;
1030 	} else
1031 #endif
1032 	if (reverse) {
1033 		key1 = &zeroin_addr;
1034 		key2 = &laddr;
1035 	} else {
1036 		key1 = &laddr;
1037 		key2 = &zeroin_addr;
1038 	}
1039 
1040 	head = INPCBHASH(table, &zeroin_addr, 0, key1, lport, rdomain);
1041 	LIST_FOREACH(inp, head, inp_hash) {
1042 #ifdef INET6
1043 		if (inp->inp_flags & INP_IPV6)
1044 			continue;	/*XXX*/
1045 #endif
1046 		if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1047 		    inp->inp_laddr.s_addr == key1->s_addr &&
1048 		    inp->inp_faddr.s_addr == INADDR_ANY &&
1049 		    inp->inp_rdomain == rdomain)
1050 			break;
1051 	}
1052 	if (inp == NULL && key1->s_addr != key2->s_addr) {
1053 		head = INPCBHASH(table, &zeroin_addr, 0, key2, lport, rdomain);
1054 		LIST_FOREACH(inp, head, inp_hash) {
1055 #ifdef INET6
1056 			if (inp->inp_flags & INP_IPV6)
1057 				continue;	/*XXX*/
1058 #endif
1059 			if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1060 			    inp->inp_laddr.s_addr == key2->s_addr &&
1061 			    inp->inp_faddr.s_addr == INADDR_ANY &&
1062 			    inp->inp_rdomain == rdomain)
1063 				break;
1064 		}
1065 	}
1066 #ifdef DIAGNOSTIC
1067 	if (inp == NULL && in_pcbnotifymiss) {
1068 		printf("in_pcblookup_listen: laddr=%08x lport=%d\n",
1069 		    ntohl(laddr.s_addr), ntohs(lport));
1070 	}
1071 #endif
1072 	/*
1073 	 * Move this PCB to the head of hash chain so that
1074 	 * repeated accesses are quicker.  This is analogous to
1075 	 * the historic single-entry PCB cache.
1076 	 */
1077 	if (inp != NULL && inp != LIST_FIRST(head)) {
1078 		LIST_REMOVE(inp, inp_hash);
1079 		LIST_INSERT_HEAD(head, inp, inp_hash);
1080 	}
1081 	return (inp);
1082 }
1083 
1084 #ifdef INET6
1085 struct inpcb *
1086 in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr,
1087     u_int lport_arg, int reverse, struct mbuf *m)
1088 {
1089 	struct inpcbhead *head;
1090 	struct in6_addr *key1, *key2;
1091 	struct inpcb *inp;
1092 	u_int16_t lport = lport_arg;
1093 
1094 #if NPF > 0
1095 	if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
1096 		struct pf_divert *divert;
1097 
1098 		if ((divert = pf_find_divert(m)) == NULL)
1099 			return (NULL);
1100 		key1 = key2 = &divert->addr.ipv6;
1101 		lport = divert->port;
1102 	} else
1103 #endif
1104 	if (reverse) {
1105 		key1 = &zeroin6_addr;
1106 		key2 = laddr;
1107 	} else {
1108 		key1 = laddr;
1109 		key2 = &zeroin6_addr;
1110 	}
1111 
1112 	head = IN6PCBHASH(table, &zeroin6_addr, 0, key1, lport);
1113 	LIST_FOREACH(inp, head, inp_hash) {
1114 		if (!(inp->inp_flags & INP_IPV6))
1115 			continue;
1116 		if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1117 		    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key1) &&
1118 		    IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
1119 			break;
1120 	}
1121 	if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) {
1122 		head = IN6PCBHASH(table, &zeroin6_addr, 0, key2, lport);
1123 		LIST_FOREACH(inp, head, inp_hash) {
1124 			if (!(inp->inp_flags & INP_IPV6))
1125 				continue;
1126 			if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1127 		    	    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key2) &&
1128 			    IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
1129 				break;
1130 		}
1131 	}
1132 #ifdef DIAGNOSTIC
1133 	if (inp == NULL && in_pcbnotifymiss) {
1134 		printf("in6_pcblookup_listen: laddr= lport=%d\n",
1135 		    ntohs(lport));
1136 	}
1137 #endif
1138 	/*
1139 	 * Move this PCB to the head of hash chain so that
1140 	 * repeated accesses are quicker.  This is analogous to
1141 	 * the historic single-entry PCB cache.
1142 	 */
1143 	if (inp != NULL && inp != LIST_FIRST(head)) {
1144 		LIST_REMOVE(inp, inp_hash);
1145 		LIST_INSERT_HEAD(head, inp, inp_hash);
1146 	}
1147 	return (inp);
1148 }
1149 #endif /* INET6 */
1150