xref: /openbsd-src/sys/netinet/in_pcb.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /*	$OpenBSD: in_pcb.c,v 1.104 2009/03/15 19:40:41 miod Exp $	*/
2 /*	$NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  * 	This product includes software developed by the University of
46  * 	California, Berkeley and its contributors.
47  * 	This product includes software developed at the Information
48  * 	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include "pf.h"
72 
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/mbuf.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/proc.h>
80 #include <sys/domain.h>
81 #include <sys/pool.h>
82 
83 #include <net/if.h>
84 #include <net/route.h>
85 #include <net/pfvar.h>
86 
87 #include <netinet/in.h>
88 #include <netinet/in_systm.h>
89 #include <netinet/ip.h>
90 #include <netinet/in_pcb.h>
91 #include <netinet/in_var.h>
92 #include <netinet/ip_var.h>
93 #include <dev/rndvar.h>
94 
95 #include <sys/mount.h>
96 #include <nfs/nfsproto.h>
97 
98 #ifdef INET6
99 #include <netinet6/ip6_var.h>
100 #endif /* INET6 */
101 #ifdef IPSEC
102 #include <netinet/ip_esp.h>
103 #endif /* IPSEC */
104 
105 struct	in_addr zeroin_addr;
106 
107 extern int ipsec_auth_default_level;
108 extern int ipsec_esp_trans_default_level;
109 extern int ipsec_esp_network_default_level;
110 extern int ipsec_ipcomp_default_level;
111 
112 /*
113  * These configure the range of local port addresses assigned to
114  * "unspecified" outgoing connections/packets/whatever.
115  */
116 int ipport_firstauto = IPPORT_RESERVED;
117 int ipport_lastauto = IPPORT_USERRESERVED;
118 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO;
119 int ipport_hilastauto = IPPORT_HILASTAUTO;
120 
121 struct pool inpcb_pool;
122 int inpcb_pool_initialized = 0;
123 
124 #define	INPCBHASH(table, faddr, fport, laddr, lport) \
125 	&(table)->inpt_hashtbl[(ntohl((faddr)->s_addr) + \
126 	ntohs((fport)) + ntohs((lport))) & (table->inpt_hash)]
127 
128 #define	IN6PCBHASH(table, faddr, fport, laddr, lport) \
129 	&(table)->inpt_hashtbl[(ntohl((faddr)->s6_addr32[0] ^ \
130 	(faddr)->s6_addr32[3]) + ntohs((fport)) + ntohs((lport))) & \
131 	(table->inpt_hash)]
132 
133 #define	INPCBLHASH(table, lport) \
134 	&(table)->inpt_lhashtbl[lport & table->inpt_lhash]
135 
136 void
137 in_pcbinit(table, hashsize)
138 	struct inpcbtable *table;
139 	int hashsize;
140 {
141 
142 	CIRCLEQ_INIT(&table->inpt_queue);
143 	table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_NOWAIT,
144 	    &table->inpt_hash);
145 	if (table->inpt_hashtbl == NULL)
146 		panic("in_pcbinit: hashinit failed");
147 	table->inpt_lhashtbl = hashinit(hashsize, M_PCB, M_NOWAIT,
148 	    &table->inpt_lhash);
149 	if (table->inpt_lhashtbl == NULL)
150 		panic("in_pcbinit: hashinit failed for lport");
151 	table->inpt_lastport = 0;
152 }
153 
154 struct baddynamicports baddynamicports;
155 
156 /*
157  * Check if the specified port is invalid for dynamic allocation.
158  */
159 int
160 in_baddynamic(u_int16_t port, u_int16_t proto)
161 {
162 	switch (proto) {
163 	case IPPROTO_TCP:
164 		return (DP_ISSET(baddynamicports.tcp, port));
165 	case IPPROTO_UDP:
166 #ifdef IPSEC
167 		/* Cannot preset this as it is a sysctl */
168 		if (port == udpencap_port)
169 			return (1);
170 #endif
171 		return (DP_ISSET(baddynamicports.udp, port));
172 	default:
173 		return (0);
174 	}
175 }
176 
177 int
178 in_pcballoc(so, v)
179 	struct socket *so;
180 	void *v;
181 {
182 	struct inpcbtable *table = v;
183 	struct inpcb *inp;
184 	int s;
185 
186 	if (inpcb_pool_initialized == 0) {
187 		pool_init(&inpcb_pool, sizeof(struct inpcb), 0, 0, 0,
188 		    "inpcbpl", NULL);
189 		inpcb_pool_initialized = 1;
190 	}
191 	inp = pool_get(&inpcb_pool, PR_NOWAIT);
192 	if (inp == NULL)
193 		return (ENOBUFS);
194 	bzero((caddr_t)inp, sizeof(*inp));
195 	inp->inp_table = table;
196 	inp->inp_socket = so;
197 	inp->inp_seclevel[SL_AUTH] = ipsec_auth_default_level;
198 	inp->inp_seclevel[SL_ESP_TRANS] = ipsec_esp_trans_default_level;
199 	inp->inp_seclevel[SL_ESP_NETWORK] = ipsec_esp_network_default_level;
200 	inp->inp_seclevel[SL_IPCOMP] = ipsec_ipcomp_default_level;
201 	s = splnet();
202 	CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue);
203 	LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport), inp, inp_lhash);
204 	LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, inp->inp_fport,
205 	    &inp->inp_laddr, inp->inp_lport), inp, inp_hash);
206 	splx(s);
207 	so->so_pcb = inp;
208 	inp->inp_hops = -1;
209 
210 #ifdef INET6
211 	/*
212 	 * Small change in this function to set the INP_IPV6 flag so routines
213 	 * outside pcb-specific routines don't need to use sotopf(), and all
214 	 * of its pointer chasing, later.
215 	 */
216 	if (sotopf(so) == PF_INET6)
217 		inp->inp_flags = INP_IPV6;
218 	inp->in6p_cksum = -1;
219 #endif /* INET6 */
220 	return (0);
221 }
222 
223 int
224 in_pcbbind(v, nam, p)
225 	void *v;
226 	struct mbuf *nam;
227 	struct proc *p;
228 {
229 	struct inpcb *inp = v;
230 	struct socket *so = inp->inp_socket;
231 	struct inpcbtable *table = inp->inp_table;
232 	u_int16_t *lastport = &inp->inp_table->inpt_lastport;
233 	struct sockaddr_in *sin;
234 	u_int16_t lport = 0;
235 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
236 	int error;
237 
238 #ifdef INET6
239 	if (sotopf(so) == PF_INET6)
240 		return in6_pcbbind(inp, nam, p);
241 #endif /* INET6 */
242 
243 	if (TAILQ_EMPTY(&in_ifaddr))
244 		return (EADDRNOTAVAIL);
245 	if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
246 		return (EINVAL);
247 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
248 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
249 	     (so->so_options & SO_ACCEPTCONN) == 0))
250 		wild = INPLOOKUP_WILDCARD;
251 	if (nam) {
252 		sin = mtod(nam, struct sockaddr_in *);
253 		if (nam->m_len != sizeof (*sin))
254 			return (EINVAL);
255 #ifdef notdef
256 		/*
257 		 * We should check the family, but old programs
258 		 * incorrectly fail to initialize it.
259 		 */
260 		if (sin->sin_family != AF_INET)
261 			return (EAFNOSUPPORT);
262 #endif
263 		lport = sin->sin_port;
264 		if (IN_MULTICAST(sin->sin_addr.s_addr)) {
265 			/*
266 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
267 			 * allow complete duplication of binding if
268 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
269 			 * and a multicast address is bound on both
270 			 * new and duplicated sockets.
271 			 */
272 			if (so->so_options & SO_REUSEADDR)
273 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
274 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
275 			sin->sin_port = 0;		/* yech... */
276 			if (!(so->so_options & SO_BINDANY) &&
277 			    in_iawithaddr(sin->sin_addr, NULL) == 0)
278 				return (EADDRNOTAVAIL);
279 		}
280 		if (lport) {
281 			struct inpcb *t;
282 
283 			/* GROSS */
284 			if (ntohs(lport) < IPPORT_RESERVED &&
285 			    (error = suser(p, 0)))
286 				return (EACCES);
287 			if (so->so_euid) {
288 				t = in_pcblookup(table, &zeroin_addr, 0,
289 				    &sin->sin_addr, lport, INPLOOKUP_WILDCARD);
290 				if (t && (so->so_euid != t->inp_socket->so_euid))
291 					return (EADDRINUSE);
292 			}
293 			t = in_pcblookup(table, &zeroin_addr, 0,
294 			    &sin->sin_addr, lport, wild);
295 			if (t && (reuseport & t->inp_socket->so_options) == 0)
296 				return (EADDRINUSE);
297 		}
298 		inp->inp_laddr = sin->sin_addr;
299 	}
300 	if (lport == 0) {
301 		u_int16_t first, last;
302 		int count;
303 
304 		if (inp->inp_flags & INP_HIGHPORT) {
305 			first = ipport_hifirstauto;	/* sysctl */
306 			last = ipport_hilastauto;
307 		} else if (inp->inp_flags & INP_LOWPORT) {
308 			if ((error = suser(p, 0)))
309 				return (EACCES);
310 			first = IPPORT_RESERVED-1; /* 1023 */
311 			last = 600;		   /* not IPPORT_RESERVED/2 */
312 		} else {
313 			first = ipport_firstauto;	/* sysctl */
314 			last  = ipport_lastauto;
315 		}
316 
317 		/*
318 		 * Simple check to ensure all ports are not used up causing
319 		 * a deadlock here.
320 		 *
321 		 * We split the two cases (up and down) so that the direction
322 		 * is not being tested on each round of the loop.
323 		 */
324 
325 		if (first > last) {
326 			/*
327 			 * counting down
328 			 */
329 			count = first - last;
330 			if (count)
331 				*lastport = first - arc4random_uniform(count);
332 
333 			do {
334 				if (count-- < 0)	/* completely used? */
335 					return (EADDRNOTAVAIL);
336 				--*lastport;
337 				if (*lastport > first || *lastport < last)
338 					*lastport = first;
339 				lport = htons(*lastport);
340 			} while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
341 			    in_pcblookup(table, &zeroin_addr, 0,
342 			    &inp->inp_laddr, lport, wild));
343 		} else {
344 			/*
345 			 * counting up
346 			 */
347 			count = last - first;
348 			if (count)
349 				*lastport = first + arc4random_uniform(count);
350 
351 			do {
352 				if (count-- < 0)	/* completely used? */
353 					return (EADDRNOTAVAIL);
354 				++*lastport;
355 				if (*lastport < first || *lastport > last)
356 					*lastport = first;
357 				lport = htons(*lastport);
358 			} while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
359 			    in_pcblookup(table, &zeroin_addr, 0,
360 			    &inp->inp_laddr, lport, wild));
361 		}
362 	}
363 	inp->inp_lport = lport;
364 	in_pcbrehash(inp);
365 	return (0);
366 }
367 
368 /*
369  * Connect from a socket to a specified address.
370  * Both address and port must be specified in argument sin.
371  * If don't have a local address for this socket yet,
372  * then pick one.
373  */
374 int
375 in_pcbconnect(v, nam)
376 	void *v;
377 	struct mbuf *nam;
378 {
379 	struct inpcb *inp = v;
380 	struct sockaddr_in *ifaddr = NULL;
381 	struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
382 
383 #ifdef INET6
384 	if (sotopf(inp->inp_socket) == PF_INET6)
385 		return (in6_pcbconnect(inp, nam));
386 	if ((inp->inp_flags & INP_IPV6) != 0)
387 		panic("IPv6 pcb passed into in_pcbconnect");
388 #endif /* INET6 */
389 
390 	if (nam->m_len != sizeof (*sin))
391 		return (EINVAL);
392 	if (sin->sin_family != AF_INET)
393 		return (EAFNOSUPPORT);
394 	if (sin->sin_port == 0)
395 		return (EADDRNOTAVAIL);
396 	if (!TAILQ_EMPTY(&in_ifaddr)) {
397 		/*
398 		 * If the destination address is INADDR_ANY,
399 		 * use the primary local address.
400 		 * If the supplied address is INADDR_BROADCAST,
401 		 * and the primary interface supports broadcast,
402 		 * choose the broadcast address for that interface.
403 		 */
404 		if (sin->sin_addr.s_addr == INADDR_ANY)
405 			sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_addr.sin_addr;
406 		else if (sin->sin_addr.s_addr == INADDR_BROADCAST &&
407 		  (TAILQ_FIRST(&in_ifaddr)->ia_ifp->if_flags & IFF_BROADCAST))
408 			sin->sin_addr = TAILQ_FIRST(&in_ifaddr)->ia_broadaddr.sin_addr;
409 	}
410 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
411 		int error;
412 		ifaddr = in_selectsrc(sin, &inp->inp_route,
413 			inp->inp_socket->so_options, inp->inp_moptions, &error);
414 		if (ifaddr == NULL) {
415 			if (error == 0)
416 				error = EADDRNOTAVAIL;
417 			return error;
418 		}
419 	}
420 	if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port,
421 	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
422 	    inp->inp_lport) != 0)
423 		return (EADDRINUSE);
424 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
425 		if (inp->inp_lport == 0 &&
426 		    in_pcbbind(inp, NULL, curproc) == EADDRNOTAVAIL)
427 			return (EADDRNOTAVAIL);
428 		inp->inp_laddr = ifaddr->sin_addr;
429 	}
430 	inp->inp_faddr = sin->sin_addr;
431 	inp->inp_fport = sin->sin_port;
432 	in_pcbrehash(inp);
433 #ifdef IPSEC
434 	{
435 		int error; /* This is just ignored */
436 
437 		/* Cause an IPsec SA to be established. */
438 		ipsp_spd_inp(NULL, AF_INET, 0, &error, IPSP_DIRECTION_OUT,
439 		    NULL, inp, NULL);
440 	}
441 #endif
442 	return (0);
443 }
444 
445 void
446 in_pcbdisconnect(v)
447 	void *v;
448 {
449 	struct inpcb *inp = v;
450 
451 	switch (sotopf(inp->inp_socket)) {
452 #ifdef INET6
453 	case PF_INET6:
454 		inp->inp_faddr6 = in6addr_any;
455 		break;
456 #endif
457 	case PF_INET:
458 		inp->inp_faddr.s_addr = INADDR_ANY;
459 		break;
460 	}
461 
462 	inp->inp_fport = 0;
463 	in_pcbrehash(inp);
464 	if (inp->inp_socket->so_state & SS_NOFDREF)
465 		in_pcbdetach(inp);
466 }
467 
468 void
469 in_pcbdetach(v)
470 	void *v;
471 {
472 	struct inpcb *inp = v;
473 	struct socket *so = inp->inp_socket;
474 	int s;
475 
476 	so->so_pcb = 0;
477 	sofree(so);
478 	if (inp->inp_options)
479 		(void)m_freem(inp->inp_options);
480 	if (inp->inp_route.ro_rt)
481 		rtfree(inp->inp_route.ro_rt);
482 #ifdef INET6
483 	if (inp->inp_flags & INP_IPV6) {
484 		ip6_freepcbopts(inp->inp_outputopts6);
485 		ip6_freemoptions(inp->inp_moptions6);
486 	} else
487 #endif
488 		ip_freemoptions(inp->inp_moptions);
489 #ifdef IPSEC
490 	/* IPsec cleanup here */
491 	s = spltdb();
492 	if (inp->inp_tdb_in)
493 		TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
494 			     inp, inp_tdb_in_next);
495 	if (inp->inp_tdb_out)
496 	        TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, inp,
497 			     inp_tdb_out_next);
498 	if (inp->inp_ipsec_remotecred)
499 		ipsp_reffree(inp->inp_ipsec_remotecred);
500 	if (inp->inp_ipsec_remoteauth)
501 		ipsp_reffree(inp->inp_ipsec_remoteauth);
502 	if (inp->inp_ipo)
503 		ipsec_delete_policy(inp->inp_ipo);
504 	splx(s);
505 #endif
506 #if NPF > 0
507 	if (inp->inp_pf_sk)
508 		((struct pf_state_key *)inp->inp_pf_sk)->inp = NULL;
509 #endif
510 	s = splnet();
511 	LIST_REMOVE(inp, inp_lhash);
512 	LIST_REMOVE(inp, inp_hash);
513 	CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue);
514 	splx(s);
515 	pool_put(&inpcb_pool, inp);
516 }
517 
518 void
519 in_setsockaddr(inp, nam)
520 	struct inpcb *inp;
521 	struct mbuf *nam;
522 {
523 	struct sockaddr_in *sin;
524 
525 	nam->m_len = sizeof (*sin);
526 	sin = mtod(nam, struct sockaddr_in *);
527 	bzero((caddr_t)sin, sizeof (*sin));
528 	sin->sin_family = AF_INET;
529 	sin->sin_len = sizeof(*sin);
530 	sin->sin_port = inp->inp_lport;
531 	sin->sin_addr = inp->inp_laddr;
532 }
533 
534 void
535 in_setpeeraddr(inp, nam)
536 	struct inpcb *inp;
537 	struct mbuf *nam;
538 {
539 	struct sockaddr_in *sin;
540 
541 #ifdef INET6
542 	if (sotopf(inp->inp_socket) == PF_INET6) {
543 		in6_setpeeraddr(inp, nam);
544 		return;
545 	}
546 #endif /* INET6 */
547 
548 	nam->m_len = sizeof (*sin);
549 	sin = mtod(nam, struct sockaddr_in *);
550 	bzero((caddr_t)sin, sizeof (*sin));
551 	sin->sin_family = AF_INET;
552 	sin->sin_len = sizeof(*sin);
553 	sin->sin_port = inp->inp_fport;
554 	sin->sin_addr = inp->inp_faddr;
555 }
556 
557 /*
558  * Pass some notification to all connections of a protocol
559  * associated with address dst.  The "usual action" will be
560  * taken, depending on the ctlinput cmd.  The caller must filter any
561  * cmds that are uninteresting (e.g., no error in the map).
562  * Call the protocol specific routine (if any) to report
563  * any errors for each matching socket.
564  *
565  * Must be called at splsoftnet.
566  */
567 void
568 in_pcbnotifyall(table, dst, errno, notify)
569 	struct inpcbtable *table;
570 	struct sockaddr *dst;
571 	int errno;
572 	void (*notify)(struct inpcb *, int);
573 {
574 	struct inpcb *inp, *oinp;
575 	struct in_addr faddr;
576 
577 	splsoftassert(IPL_SOFTNET);
578 
579 #ifdef INET6
580 	/*
581 	 * See in6_pcbnotify() for IPv6 codepath.  By the time this
582 	 * gets called, the addresses passed are either definitely IPv4 or
583 	 * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses.
584 	 */
585 #endif /* INET6 */
586 
587 	if (dst->sa_family != AF_INET)
588 		return;
589 	faddr = satosin(dst)->sin_addr;
590 	if (faddr.s_addr == INADDR_ANY)
591 		return;
592 
593 	for (inp = CIRCLEQ_FIRST(&table->inpt_queue);
594 	    inp != CIRCLEQ_END(&table->inpt_queue);) {
595 #ifdef INET6
596 		if (inp->inp_flags & INP_IPV6) {
597 			inp = CIRCLEQ_NEXT(inp, inp_queue);
598 			continue;
599 		}
600 #endif
601 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
602 		    inp->inp_socket == 0) {
603 			inp = CIRCLEQ_NEXT(inp, inp_queue);
604 			continue;
605 		}
606 		oinp = inp;
607 		inp = CIRCLEQ_NEXT(inp, inp_queue);
608 		if (notify)
609 			(*notify)(oinp, errno);
610 	}
611 }
612 
613 /*
614  * Check for alternatives when higher level complains
615  * about service problems.  For now, invalidate cached
616  * routing information.  If the route was created dynamically
617  * (by a redirect), time to try a default gateway again.
618  */
619 void
620 in_losing(inp)
621 	struct inpcb *inp;
622 {
623 	struct rtentry *rt;
624 	struct rt_addrinfo info;
625 
626 	if ((rt = inp->inp_route.ro_rt)) {
627 		inp->inp_route.ro_rt = 0;
628 		bzero((caddr_t)&info, sizeof(info));
629 		info.rti_flags = rt->rt_flags;
630 		info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst;
631 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
632 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
633 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, rt->rt_ifp, 0, 0);
634 		if (rt->rt_flags & RTF_DYNAMIC)
635 			(void)rtrequest1(RTM_DELETE, &info, rt->rt_priority,
636 				(struct rtentry **)0, 0);
637 		/*
638 		 * A new route can be allocated
639 		 * the next time output is attempted.
640 		 * rtfree() needs to be called in anycase because the inp
641 		 * is still holding a reference to rt.
642 		 */
643 		rtfree(rt);
644 	}
645 }
646 
647 /*
648  * After a routing change, flush old routing
649  * and allocate a (hopefully) better one.
650  */
651 void
652 in_rtchange(inp, errno)
653 	struct inpcb *inp;
654 	int errno;
655 {
656 	if (inp->inp_route.ro_rt) {
657 		rtfree(inp->inp_route.ro_rt);
658 		inp->inp_route.ro_rt = 0;
659 		/*
660 		 * A new route can be allocated the next time
661 		 * output is attempted.
662 		 */
663 	}
664 }
665 
666 struct inpcb *
667 in_pcblookup(table, faddrp, fport_arg, laddrp, lport_arg, flags)
668 	struct inpcbtable *table;
669 	void *faddrp, *laddrp;
670 	u_int fport_arg, lport_arg;
671 	int flags;
672 {
673 	struct inpcb *inp, *match = 0;
674 	int matchwild = 3, wildcard;
675 	u_int16_t fport = fport_arg, lport = lport_arg;
676 	struct in_addr faddr = *(struct in_addr *)faddrp;
677 	struct in_addr laddr = *(struct in_addr *)laddrp;
678 
679 	for (inp = LIST_FIRST(INPCBLHASH(table, lport)); inp;
680 	    inp = LIST_NEXT(inp, inp_lhash)) {
681 		if (inp->inp_lport != lport)
682 			continue;
683 		wildcard = 0;
684 #ifdef INET6
685 		if (flags & INPLOOKUP_IPV6) {
686 			struct in6_addr *laddr6 = (struct in6_addr *)laddrp;
687 			struct in6_addr *faddr6 = (struct in6_addr *)faddrp;
688 
689 			if (!(inp->inp_flags & INP_IPV6))
690 				continue;
691 
692 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
693 				if (IN6_IS_ADDR_UNSPECIFIED(laddr6))
694 					wildcard++;
695 				else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr6))
696 					continue;
697 			} else {
698 				if (!IN6_IS_ADDR_UNSPECIFIED(laddr6))
699 					wildcard++;
700 			}
701 
702 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) {
703 				if (IN6_IS_ADDR_UNSPECIFIED(faddr6))
704 					wildcard++;
705 				else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6,
706 				    faddr6) || inp->inp_fport != fport)
707 					continue;
708 			} else {
709 				if (!IN6_IS_ADDR_UNSPECIFIED(faddr6))
710 					wildcard++;
711 			}
712 		} else
713 #endif /* INET6 */
714 		{
715 #ifdef INET6
716 		        if (inp->inp_flags & INP_IPV6)
717 			        continue;
718 #endif /* INET6 */
719 
720 			if (inp->inp_faddr.s_addr != INADDR_ANY) {
721 				if (faddr.s_addr == INADDR_ANY)
722 					wildcard++;
723 				else if (inp->inp_faddr.s_addr != faddr.s_addr ||
724 				    inp->inp_fport != fport)
725 					continue;
726 			} else {
727 				if (faddr.s_addr != INADDR_ANY)
728 					wildcard++;
729 			}
730 			if (inp->inp_laddr.s_addr != INADDR_ANY) {
731 				if (laddr.s_addr == INADDR_ANY)
732 					wildcard++;
733 				else if (inp->inp_laddr.s_addr != laddr.s_addr)
734 					continue;
735 			} else {
736 				if (laddr.s_addr != INADDR_ANY)
737 					wildcard++;
738 			}
739 		}
740 		if ((!wildcard || (flags & INPLOOKUP_WILDCARD)) &&
741 		    wildcard < matchwild) {
742 			match = inp;
743 			if ((matchwild = wildcard) == 0)
744 				break;
745 		}
746 	}
747 	return (match);
748 }
749 
750 struct rtentry *
751 in_pcbrtentry(inp)
752 	struct inpcb *inp;
753 {
754 	struct route *ro;
755 
756 	ro = &inp->inp_route;
757 
758 	/*
759 	 * No route yet, so try to acquire one.
760 	 */
761 	if (ro->ro_rt == NULL) {
762 #ifdef INET6
763 		bzero(ro, sizeof(struct route_in6));
764 #else
765 		bzero(ro, sizeof(struct route));
766 #endif
767 
768 		switch(sotopf(inp->inp_socket)) {
769 #ifdef INET6
770 		case PF_INET6:
771 			if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
772 				break;
773 			ro->ro_dst.sa_family = AF_INET6;
774 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in6);
775 			((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr =
776 			    inp->inp_faddr6;
777 			rtalloc_mpath(ro, &inp->inp_laddr6.s6_addr32[0], 0);
778 			break;
779 #endif /* INET6 */
780 		case PF_INET:
781 			if (inp->inp_faddr.s_addr == INADDR_ANY)
782 				break;
783 			ro->ro_dst.sa_family = AF_INET;
784 			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
785 			satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr;
786 			rtalloc_mpath(ro, &inp->inp_laddr.s_addr, 0);
787 			break;
788 		}
789 	}
790 	return (ro->ro_rt);
791 }
792 
793 struct sockaddr_in *
794 in_selectsrc(sin, ro, soopts, mopts, errorp)
795 	struct sockaddr_in *sin;
796 	struct route *ro;
797 	int soopts;
798 	struct ip_moptions *mopts;
799 	int *errorp;
800 {
801 	struct sockaddr_in *sin2;
802 	struct in_ifaddr *ia;
803 
804 	ia = (struct in_ifaddr *)0;
805 	/*
806 	 * If route is known or can be allocated now,
807 	 * our src addr is taken from the i/f, else punt.
808 	 */
809 	if (ro->ro_rt &&
810 	    (satosin(&ro->ro_dst)->sin_addr.s_addr !=
811 		sin->sin_addr.s_addr ||
812 	    soopts & SO_DONTROUTE)) {
813 		RTFREE(ro->ro_rt);
814 		ro->ro_rt = (struct rtentry *)0;
815 	}
816 	if ((soopts & SO_DONTROUTE) == 0 && /*XXX*/
817 	    (ro->ro_rt == (struct rtentry *)0 ||
818 	    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
819 		/* No route yet, so try to acquire one */
820 		ro->ro_dst.sa_family = AF_INET;
821 		ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
822 		satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
823 		rtalloc_mpath(ro, NULL, 0);
824 
825 		/*
826 		 * It is important to bzero out the rest of the
827 		 * struct sockaddr_in when mixing v6 & v4!
828 		 */
829 		sin2 = (struct sockaddr_in *)&ro->ro_dst;
830 		bzero(sin2->sin_zero, sizeof(sin2->sin_zero));
831 	}
832 	/*
833 	 * If we found a route, use the address
834 	 * corresponding to the outgoing interface
835 	 * unless it is the loopback (in case a route
836 	 * to our address on another net goes to loopback).
837 	 */
838 	if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
839 		ia = ifatoia(ro->ro_rt->rt_ifa);
840 	if (ia == 0) {
841 		u_int16_t fport = sin->sin_port;
842 
843 		sin->sin_port = 0;
844 		ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
845 		if (ia == 0)
846 			ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
847 		sin->sin_port = fport;
848 		if (ia == 0)
849 			ia = TAILQ_FIRST(&in_ifaddr);
850 		if (ia == 0) {
851 			*errorp = EADDRNOTAVAIL;
852 			return NULL;
853 		}
854 	}
855 	/*
856 	 * If the destination address is multicast and an outgoing
857 	 * interface has been set as a multicast option, use the
858 	 * address of that interface as our source address.
859 	 */
860 	if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) {
861 		struct ip_moptions *imo;
862 		struct ifnet *ifp;
863 
864 		imo = mopts;
865 		if (imo->imo_multicast_ifp != NULL) {
866 			ifp = imo->imo_multicast_ifp;
867 			TAILQ_FOREACH(ia, &in_ifaddr, ia_list)
868 				if (ia->ia_ifp == ifp)
869 					break;
870 			if (ia == 0) {
871 				*errorp = EADDRNOTAVAIL;
872 				return NULL;
873 			}
874 		}
875 	}
876 	return satosin(&ia->ia_addr);
877 }
878 
879 void
880 in_pcbrehash(inp)
881 	struct inpcb *inp;
882 {
883 	struct inpcbtable *table = inp->inp_table;
884 	int s;
885 
886 	s = splnet();
887 	LIST_REMOVE(inp, inp_lhash);
888 	LIST_INSERT_HEAD(INPCBLHASH(table, inp->inp_lport), inp, inp_lhash);
889 	LIST_REMOVE(inp, inp_hash);
890 #ifdef INET6
891 	if (inp->inp_flags & INP_IPV6) {
892 		LIST_INSERT_HEAD(IN6PCBHASH(table, &inp->inp_faddr6,
893 		    inp->inp_fport, &inp->inp_laddr6, inp->inp_lport),
894 		    inp, inp_hash);
895 	} else {
896 #endif /* INET6 */
897 		LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr,
898 		    inp->inp_fport, &inp->inp_laddr, inp->inp_lport),
899 		    inp, inp_hash);
900 #ifdef INET6
901 	}
902 #endif /* INET6 */
903 	splx(s);
904 }
905 
906 #ifdef DIAGNOSTIC
907 int	in_pcbnotifymiss = 0;
908 #endif
909 
910 /*
911  * The in(6)_pcbhashlookup functions are used to locate connected sockets
912  * quickly:
913  * 		faddr.fport <-> laddr.lport
914  * No wildcard matching is done so that listening sockets are not found.
915  * If the functions return NULL in(6)_pcblookup_listen can be used to
916  * find a listening/bound socket that may accept the connection.
917  * After those two lookups no other are necessary.
918  */
919 struct inpcb *
920 in_pcbhashlookup(table, faddr, fport_arg, laddr, lport_arg)
921 	struct inpcbtable *table;
922 	struct in_addr faddr, laddr;
923 	u_int fport_arg, lport_arg;
924 {
925 	struct inpcbhead *head;
926 	struct inpcb *inp;
927 	u_int16_t fport = fport_arg, lport = lport_arg;
928 
929 	head = INPCBHASH(table, &faddr, fport, &laddr, lport);
930 	LIST_FOREACH(inp, head, inp_hash) {
931 #ifdef INET6
932 		if (inp->inp_flags & INP_IPV6)
933 			continue;	/*XXX*/
934 #endif
935 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
936 		    inp->inp_fport == fport &&
937 		    inp->inp_lport == lport &&
938 		    inp->inp_laddr.s_addr == laddr.s_addr) {
939 			/*
940 			 * Move this PCB to the head of hash chain so that
941 			 * repeated accesses are quicker.  This is analogous to
942 			 * the historic single-entry PCB cache.
943 			 */
944 			if (inp != LIST_FIRST(head)) {
945 				LIST_REMOVE(inp, inp_hash);
946 				LIST_INSERT_HEAD(head, inp, inp_hash);
947 			}
948 			break;
949 		}
950 	}
951 #ifdef DIAGNOSTIC
952 	if (inp == NULL && in_pcbnotifymiss) {
953 		printf("in_pcbhashlookup: faddr=%08x fport=%d laddr=%08x lport=%d\n",
954 		    ntohl(faddr.s_addr), ntohs(fport),
955 		    ntohl(laddr.s_addr), ntohs(lport));
956 	}
957 #endif
958 	return (inp);
959 }
960 
961 #ifdef INET6
962 struct inpcb *
963 in6_pcbhashlookup(table, faddr, fport_arg, laddr, lport_arg)
964 	struct inpcbtable *table;
965 	struct in6_addr *faddr, *laddr;
966 	u_int fport_arg, lport_arg;
967 {
968 	struct inpcbhead *head;
969 	struct inpcb *inp;
970 	u_int16_t fport = fport_arg, lport = lport_arg;
971 
972 	head = IN6PCBHASH(table, faddr, fport, laddr, lport);
973 	LIST_FOREACH(inp, head, inp_hash) {
974 		if (!(inp->inp_flags & INP_IPV6))
975 			continue;
976 		if (IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) &&
977 		    inp->inp_fport == fport && inp->inp_lport == lport &&
978 		    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr)) {
979 			/*
980 			 * Move this PCB to the head of hash chain so that
981 			 * repeated accesses are quicker.  This is analogous to
982 			 * the historic single-entry PCB cache.
983 			 */
984 			if (inp != LIST_FIRST(head)) {
985 				LIST_REMOVE(inp, inp_hash);
986 				LIST_INSERT_HEAD(head, inp, inp_hash);
987 			}
988 			break;
989 		}
990 	}
991 #ifdef DIAGNOSTIC
992 	if (inp == NULL && in_pcbnotifymiss) {
993 		printf("in6_pcbhashlookup: faddr=");
994 		printf(" fport=%d laddr=", ntohs(fport));
995 		printf(" lport=%d\n", ntohs(lport));
996 	}
997 #endif
998 	return (inp);
999 }
1000 #endif /* INET6 */
1001 
1002 /*
1003  * The in(6)_pcblookup_listen functions are used to locate listening
1004  * sockets quickly.  This are sockets with unspecified foreign address
1005  * and port:
1006  *		*.*     <-> laddr.lport
1007  *		*.*     <->     *.lport
1008  */
1009 struct inpcb *
1010 in_pcblookup_listen(struct inpcbtable *table, struct in_addr laddr,
1011     u_int lport_arg, int reverse, struct mbuf *m)
1012 {
1013 	struct inpcbhead *head;
1014 	struct in_addr *key1, *key2;
1015 	struct inpcb *inp;
1016 	u_int16_t lport = lport_arg;
1017 
1018 #if NPF > 0
1019 	if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
1020 		struct pf_divert *divert;
1021 
1022 		if ((divert = pf_find_divert(m)) == NULL)
1023 			return (NULL);
1024 		key1 = key2 = &divert->addr.ipv4;
1025 		lport = divert->port;
1026 	} else
1027 #endif
1028 	if (reverse) {
1029 		key1 = &zeroin_addr;
1030 		key2 = &laddr;
1031 	} else {
1032 		key1 = &laddr;
1033 		key2 = &zeroin_addr;
1034 	}
1035 
1036 	head = INPCBHASH(table, &zeroin_addr, 0, key1, lport);
1037 	LIST_FOREACH(inp, head, inp_hash) {
1038 #ifdef INET6
1039 		if (inp->inp_flags & INP_IPV6)
1040 			continue;	/*XXX*/
1041 #endif
1042 		if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1043 		    inp->inp_laddr.s_addr == key1->s_addr &&
1044 		    inp->inp_faddr.s_addr == INADDR_ANY)
1045 			break;
1046 	}
1047 	if (inp == NULL && key1->s_addr != key2->s_addr) {
1048 		head = INPCBHASH(table, &zeroin_addr, 0, key2, lport);
1049 		LIST_FOREACH(inp, head, inp_hash) {
1050 #ifdef INET6
1051 			if (inp->inp_flags & INP_IPV6)
1052 				continue;	/*XXX*/
1053 #endif
1054 			if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1055 			    inp->inp_laddr.s_addr == key2->s_addr &&
1056 			    inp->inp_faddr.s_addr == INADDR_ANY)
1057 				break;
1058 		}
1059 	}
1060 #ifdef DIAGNOSTIC
1061 	if (inp == NULL && in_pcbnotifymiss) {
1062 		printf("in_pcblookup_listen: laddr=%08x lport=%d\n",
1063 		    ntohl(laddr.s_addr), ntohs(lport));
1064 	}
1065 #endif
1066 	/*
1067 	 * Move this PCB to the head of hash chain so that
1068 	 * repeated accesses are quicker.  This is analogous to
1069 	 * the historic single-entry PCB cache.
1070 	 */
1071 	if (inp != NULL && inp != LIST_FIRST(head)) {
1072 		LIST_REMOVE(inp, inp_hash);
1073 		LIST_INSERT_HEAD(head, inp, inp_hash);
1074 	}
1075 	return (inp);
1076 }
1077 
1078 #ifdef INET6
1079 struct inpcb *
1080 in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr,
1081     u_int lport_arg, int reverse, struct mbuf *m)
1082 {
1083 	struct inpcbhead *head;
1084 	struct in6_addr *key1, *key2;
1085 	struct inpcb *inp;
1086 	u_int16_t lport = lport_arg;
1087 
1088 #if NPF > 0
1089 	if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
1090 		struct pf_divert *divert;
1091 
1092 		if ((divert = pf_find_divert(m)) == NULL)
1093 			return (NULL);
1094 		key1 = key2 = &divert->addr.ipv6;
1095 		lport = divert->port;
1096 	} else
1097 #endif
1098 	if (reverse) {
1099 		key1 = &zeroin6_addr;
1100 		key2 = laddr;
1101 	} else {
1102 		key1 = laddr;
1103 		key2 = &zeroin6_addr;
1104 	}
1105 
1106 	head = IN6PCBHASH(table, &zeroin6_addr, 0, key1, lport);
1107 	LIST_FOREACH(inp, head, inp_hash) {
1108 		if (!(inp->inp_flags & INP_IPV6))
1109 			continue;
1110 		if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1111 		    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key1) &&
1112 		    IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
1113 			break;
1114 	}
1115 	if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) {
1116 		head = IN6PCBHASH(table, &zeroin6_addr, 0, key2, lport);
1117 		LIST_FOREACH(inp, head, inp_hash) {
1118 			if (!(inp->inp_flags & INP_IPV6))
1119 				continue;
1120 			if (inp->inp_lport == lport && inp->inp_fport == 0 &&
1121 		    	    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, key2) &&
1122 			    IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
1123 				break;
1124 		}
1125 	}
1126 #ifdef DIAGNOSTIC
1127 	if (inp == NULL && in_pcbnotifymiss) {
1128 		printf("in6_pcblookup_listen: laddr= lport=%d\n",
1129 		    ntohs(lport));
1130 	}
1131 #endif
1132 	/*
1133 	 * Move this PCB to the head of hash chain so that
1134 	 * repeated accesses are quicker.  This is analogous to
1135 	 * the historic single-entry PCB cache.
1136 	 */
1137 	if (inp != NULL && inp != LIST_FIRST(head)) {
1138 		LIST_REMOVE(inp, inp_hash);
1139 		LIST_INSERT_HEAD(head, inp, inp_hash);
1140 	}
1141 	return (inp);
1142 }
1143 #endif /* INET6 */
1144