xref: /openbsd-src/sys/netinet/in_pcb.c (revision db3296cf5c1dd9058ceecc3a29fe4aaa0bd26000)
1 /*	$OpenBSD: in_pcb.c,v 1.66 2003/06/02 23:28:13 millert Exp $	*/
2 /*	$NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  * 	This product includes software developed by the University of
46  * 	California, Berkeley and its contributors.
47  * 	This product includes software developed at the Information
48  * 	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/protosw.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/proc.h>
78 #include <sys/domain.h>
79 
80 #include <net/if.h>
81 #include <net/route.h>
82 
83 #include <netinet/in.h>
84 #include <netinet/in_systm.h>
85 #include <netinet/ip.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip_var.h>
89 #include <dev/rndvar.h>
90 
91 #ifdef INET6
92 #include <netinet6/ip6_var.h>
93 #endif /* INET6 */
94 
95 struct	in_addr zeroin_addr;
96 
97 extern int ipsec_auth_default_level;
98 extern int ipsec_esp_trans_default_level;
99 extern int ipsec_esp_network_default_level;
100 extern int ipsec_ipcomp_default_level;
101 
102 /*
103  * These configure the range of local port addresses assigned to
104  * "unspecified" outgoing connections/packets/whatever.
105  */
106 int ipport_firstauto = IPPORT_RESERVED;		/* 1024 */
107 int ipport_lastauto = IPPORT_USERRESERVED;	/* 5000 */
108 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* 40000 */
109 int ipport_hilastauto = IPPORT_HILASTAUTO;	/* 44999 */
110 
111 #define	INPCBHASH(table, faddr, fport, laddr, lport) \
112 	&(table)->inpt_hashtbl[(ntohl((faddr)->s_addr) + \
113 	ntohs((fport)) + ntohs((lport))) & (table->inpt_hash)]
114 
115 #define	IN6PCBHASH(table, faddr, fport, laddr, lport) \
116 	&(table)->inpt_hashtbl[(ntohl((faddr)->s6_addr32[0] ^ \
117 	(faddr)->s6_addr32[3]) + ntohs((fport)) + ntohs((lport))) & \
118 	(table->inpt_hash)]
119 
120 void
121 in_pcbinit(table, hashsize)
122 	struct inpcbtable *table;
123 	int hashsize;
124 {
125 
126 	CIRCLEQ_INIT(&table->inpt_queue);
127 	table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_NOWAIT, &table->inpt_hash);
128 	if (table->inpt_hashtbl == NULL)
129 		panic("in_pcbinit: hashinit failed");
130 	table->inpt_lastport = 0;
131 }
132 
133 struct baddynamicports baddynamicports;
134 
135 /*
136  * Check if the specified port is invalid for dynamic allocation.
137  */
138 int
139 in_baddynamic(port, proto)
140 	u_int16_t port;
141 	u_int16_t proto;
142 {
143 
144 	if (port < IPPORT_RESERVED/2 || port >= IPPORT_RESERVED)
145 		return (0);
146 
147 	switch (proto) {
148 	case IPPROTO_TCP:
149 		return (DP_ISSET(baddynamicports.tcp, port));
150 	case IPPROTO_UDP:
151 		return (DP_ISSET(baddynamicports.udp, port));
152 	default:
153 		return (0);
154 	}
155 }
156 
157 int
158 in_pcballoc(so, v)
159 	struct socket *so;
160 	void *v;
161 {
162 	struct inpcbtable *table = v;
163 	register struct inpcb *inp;
164 	int s;
165 
166 	MALLOC(inp, struct inpcb *, sizeof(*inp), M_PCB, M_NOWAIT);
167 	if (inp == NULL)
168 		return (ENOBUFS);
169 	bzero((caddr_t)inp, sizeof(*inp));
170 	inp->inp_table = table;
171 	inp->inp_socket = so;
172 	inp->inp_seclevel[SL_AUTH] = ipsec_auth_default_level;
173 	inp->inp_seclevel[SL_ESP_TRANS] = ipsec_esp_trans_default_level;
174 	inp->inp_seclevel[SL_ESP_NETWORK] = ipsec_esp_network_default_level;
175 	inp->inp_seclevel[SL_IPCOMP] = ipsec_ipcomp_default_level;
176 	s = splnet();
177 	CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue);
178 	LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, inp->inp_fport,
179 	    &inp->inp_laddr, inp->inp_lport), inp, inp_hash);
180 	splx(s);
181 	so->so_pcb = inp;
182 	inp->inp_hops = -1;
183 
184 #ifdef INET6
185 	/*
186 	 * Small change in this function to set the INP_IPV6 flag so routines
187 	 * outside pcb-specific routines don't need to use sotopf(), and all
188 	 * of it's pointer chasing, later.
189 	 */
190 	if (sotopf(so) == PF_INET6)
191 		inp->inp_flags = INP_IPV6;
192 	inp->in6p_cksum = -1;
193 #endif /* INET6 */
194 	return (0);
195 }
196 
197 int
198 in_pcbbind(v, nam)
199 	register void *v;
200 	struct mbuf *nam;
201 {
202 	register struct inpcb *inp = v;
203 	register struct socket *so = inp->inp_socket;
204 	register struct inpcbtable *table = inp->inp_table;
205 	u_int16_t *lastport = &inp->inp_table->inpt_lastport;
206 	register struct sockaddr_in *sin;
207 	struct proc *p = curproc;		/* XXX */
208 	u_int16_t lport = 0;
209 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
210 	int error;
211 
212 #ifdef INET6
213 	if (sotopf(so) == PF_INET6)
214 		return in6_pcbbind(inp, nam);
215 #endif /* INET6 */
216 
217 	if (in_ifaddr.tqh_first == 0)
218 		return (EADDRNOTAVAIL);
219 	if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
220 		return (EINVAL);
221 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
222 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
223 	     (so->so_options & SO_ACCEPTCONN) == 0))
224 		wild = INPLOOKUP_WILDCARD;
225 	if (nam) {
226 		sin = mtod(nam, struct sockaddr_in *);
227 		if (nam->m_len != sizeof (*sin))
228 			return (EINVAL);
229 #ifdef notdef
230 		/*
231 		 * We should check the family, but old programs
232 		 * incorrectly fail to initialize it.
233 		 */
234 		if (sin->sin_family != AF_INET)
235 			return (EAFNOSUPPORT);
236 #endif
237 		lport = sin->sin_port;
238 		if (IN_MULTICAST(sin->sin_addr.s_addr)) {
239 			/*
240 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
241 			 * allow complete duplication of binding if
242 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
243 			 * and a multicast address is bound on both
244 			 * new and duplicated sockets.
245 			 */
246 			if (so->so_options & SO_REUSEADDR)
247 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
248 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
249 			sin->sin_port = 0;		/* yech... */
250 			if (in_iawithaddr(sin->sin_addr, NULL) == 0)
251 				return (EADDRNOTAVAIL);
252 		}
253 		if (lport) {
254 			struct inpcb *t;
255 
256 			/* GROSS */
257 			if (ntohs(lport) < IPPORT_RESERVED &&
258 			    (error = suser(p->p_ucred, &p->p_acflag)))
259 				return (EACCES);
260 			if (so->so_euid) {
261 				t = in_pcblookup(table, &zeroin_addr, 0,
262 				    &sin->sin_addr, lport, INPLOOKUP_WILDCARD);
263 				if (t && (so->so_euid != t->inp_socket->so_euid))
264 					return (EADDRINUSE);
265 			}
266 			t = in_pcblookup(table, &zeroin_addr, 0,
267 			    &sin->sin_addr, lport, wild);
268 			if (t && (reuseport & t->inp_socket->so_options) == 0)
269 				return (EADDRINUSE);
270 		}
271 		inp->inp_laddr = sin->sin_addr;
272 	}
273 	if (lport == 0) {
274 		u_int16_t first, last, old = 0;
275 		int count;
276 		int loopcount = 0;
277 
278 		if (inp->inp_flags & INP_HIGHPORT) {
279 			first = ipport_hifirstauto;	/* sysctl */
280 			last = ipport_hilastauto;
281 		} else if (inp->inp_flags & INP_LOWPORT) {
282 			if ((error = suser(p->p_ucred, &p->p_acflag)))
283 				return (EACCES);
284 			first = IPPORT_RESERVED-1; /* 1023 */
285 			last = 600;		   /* not IPPORT_RESERVED/2 */
286 		} else {
287 			first = ipport_firstauto;	/* sysctl */
288 			last  = ipport_lastauto;
289 		}
290 
291 		/*
292 		 * Simple check to ensure all ports are not used up causing
293 		 * a deadlock here.
294 		 *
295 		 * We split the two cases (up and down) so that the direction
296 		 * is not being tested on each round of the loop.
297 		 */
298 
299 portloop:
300 		if (first > last) {
301 			/*
302 			 * counting down
303 			 */
304 			if (loopcount == 0) {	/* only do this once. */
305 				old = first;
306 				first -= (arc4random() % (first - last));
307 			}
308 			count = first - last;
309 			*lastport = first;		/* restart each time */
310 
311 			do {
312 				if (count-- <= 0) {	/* completely used? */
313 					if (loopcount == 0) {
314 						last = old;
315 						loopcount++;
316 						goto portloop;
317 					}
318 					return (EADDRNOTAVAIL);
319 				}
320 				--*lastport;
321 				if (*lastport > first || *lastport < last)
322 					*lastport = first;
323 				lport = htons(*lastport);
324 			} while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
325 			    in_pcblookup(table, &zeroin_addr, 0,
326 			    &inp->inp_laddr, lport, wild));
327 		} else {
328 			/*
329 			 * counting up
330 			 */
331 			if (loopcount == 0) {	/* only do this once. */
332 				old = first;
333 				first += (arc4random() % (last - first));
334 			}
335 			count = last - first;
336 			*lastport = first;		/* restart each time */
337 
338 			do {
339 				if (count-- <= 0) {	/* completely used? */
340 					if (loopcount == 0) {
341 						first = old;
342 						loopcount++;
343 						goto portloop;
344 					}
345 					return (EADDRNOTAVAIL);
346 				}
347 				++*lastport;
348 				if (*lastport < first || *lastport > last)
349 					*lastport = first;
350 				lport = htons(*lastport);
351 			} while (in_baddynamic(*lastport, so->so_proto->pr_protocol) ||
352 			    in_pcblookup(table, &zeroin_addr, 0,
353 			    &inp->inp_laddr, lport, wild));
354 		}
355 	}
356 	inp->inp_lport = lport;
357 	in_pcbrehash(inp);
358 	return (0);
359 }
360 
361 /*
362  * Connect from a socket to a specified address.
363  * Both address and port must be specified in argument sin.
364  * If don't have a local address for this socket yet,
365  * then pick one.
366  */
367 int
368 in_pcbconnect(v, nam)
369 	register void *v;
370 	struct mbuf *nam;
371 {
372 	register struct inpcb *inp = v;
373 	struct sockaddr_in *ifaddr = NULL;
374 	register struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
375 
376 #ifdef INET6
377 	if (sotopf(inp->inp_socket) == PF_INET6)
378 		return (in6_pcbconnect(inp, nam));
379 	if ((inp->inp_flags & INP_IPV6) != 0)
380 		panic("IPv6 pcb passed into in_pcbconnect");
381 #endif /* INET6 */
382 
383 	if (nam->m_len != sizeof (*sin))
384 		return (EINVAL);
385 	if (sin->sin_family != AF_INET)
386 		return (EAFNOSUPPORT);
387 	if (sin->sin_port == 0)
388 		return (EADDRNOTAVAIL);
389 	if (in_ifaddr.tqh_first != 0) {
390 		/*
391 		 * If the destination address is INADDR_ANY,
392 		 * use the primary local address.
393 		 * If the supplied address is INADDR_BROADCAST,
394 		 * and the primary interface supports broadcast,
395 		 * choose the broadcast address for that interface.
396 		 */
397 		if (sin->sin_addr.s_addr == INADDR_ANY)
398 			sin->sin_addr = in_ifaddr.tqh_first->ia_addr.sin_addr;
399 		else if (sin->sin_addr.s_addr == INADDR_BROADCAST &&
400 		  (in_ifaddr.tqh_first->ia_ifp->if_flags & IFF_BROADCAST))
401 			sin->sin_addr = in_ifaddr.tqh_first->ia_broadaddr.sin_addr;
402 	}
403 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
404 		int error;
405 		ifaddr = in_selectsrc(sin, &inp->inp_route,
406 			inp->inp_socket->so_options, inp->inp_moptions, &error);
407 		if (ifaddr == NULL) {
408 			if (error == 0)
409 				error = EADDRNOTAVAIL;
410 			return error;
411 		}
412 	}
413 	if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port,
414 	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
415 	    inp->inp_lport) != 0)
416 		return (EADDRINUSE);
417 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
418 		if (inp->inp_lport == 0 &&
419 		    in_pcbbind(inp, (struct mbuf *)0) == EADDRNOTAVAIL)
420 			return (EADDRNOTAVAIL);
421 		inp->inp_laddr = ifaddr->sin_addr;
422 	}
423 	inp->inp_faddr = sin->sin_addr;
424 	inp->inp_fport = sin->sin_port;
425 	in_pcbrehash(inp);
426 #ifdef IPSEC
427 	{
428 		int error; /* This is just ignored */
429 
430 		/* Cause an IPsec SA to be established. */
431 		ipsp_spd_inp(NULL, AF_INET, 0, &error, IPSP_DIRECTION_OUT,
432 		    NULL, inp, NULL);
433 	}
434 #endif
435 	return (0);
436 }
437 
438 void
439 in_pcbdisconnect(v)
440 	void *v;
441 {
442 	struct inpcb *inp = v;
443 
444 	switch (sotopf(inp->inp_socket)) {
445 #ifdef INET6
446 	case PF_INET6:
447 		inp->inp_faddr6 = in6addr_any;
448 		break;
449 #endif
450 	case PF_INET:
451 		inp->inp_faddr.s_addr = INADDR_ANY;
452 		break;
453 	}
454 
455 	inp->inp_fport = 0;
456 	in_pcbrehash(inp);
457 	if (inp->inp_socket->so_state & SS_NOFDREF)
458 		in_pcbdetach(inp);
459 }
460 
461 void
462 in_pcbdetach(v)
463 	void *v;
464 {
465 	struct inpcb *inp = v;
466 	struct socket *so = inp->inp_socket;
467 	int s;
468 
469 	so->so_pcb = 0;
470 	sofree(so);
471 	if (inp->inp_options)
472 		(void)m_freem(inp->inp_options);
473 	if (inp->inp_route.ro_rt)
474 		rtfree(inp->inp_route.ro_rt);
475 #ifdef INET6
476 	if (inp->inp_flags & INP_IPV6)
477 		ip6_freemoptions(inp->inp_moptions6);
478 	else
479 #endif
480 		ip_freemoptions(inp->inp_moptions);
481 #ifdef IPSEC
482 	/* IPsec cleanup here */
483 	s = spltdb();
484 	if (inp->inp_tdb_in)
485 		TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
486 			     inp, inp_tdb_in_next);
487 	if (inp->inp_tdb_out)
488 	        TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, inp,
489 			     inp_tdb_out_next);
490 	if (inp->inp_ipsec_remotecred)
491 		ipsp_reffree(inp->inp_ipsec_remotecred);
492 	if (inp->inp_ipsec_remoteauth)
493 		ipsp_reffree(inp->inp_ipsec_remoteauth);
494 	if (inp->inp_ipo)
495 		ipsec_delete_policy(inp->inp_ipo);
496 	splx(s);
497 #endif
498 	s = splnet();
499 	LIST_REMOVE(inp, inp_hash);
500 	CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue);
501 	splx(s);
502 	FREE(inp, M_PCB);
503 }
504 
505 void
506 in_setsockaddr(inp, nam)
507 	register struct inpcb *inp;
508 	struct mbuf *nam;
509 {
510 	register struct sockaddr_in *sin;
511 
512 	nam->m_len = sizeof (*sin);
513 	sin = mtod(nam, struct sockaddr_in *);
514 	bzero((caddr_t)sin, sizeof (*sin));
515 	sin->sin_family = AF_INET;
516 	sin->sin_len = sizeof(*sin);
517 	sin->sin_port = inp->inp_lport;
518 	sin->sin_addr = inp->inp_laddr;
519 }
520 
521 void
522 in_setpeeraddr(inp, nam)
523 	struct inpcb *inp;
524 	struct mbuf *nam;
525 {
526 	register struct sockaddr_in *sin;
527 
528 #ifdef INET6
529 	if (sotopf(inp->inp_socket) == PF_INET6) {
530 		in6_setpeeraddr(inp, nam);
531 		return;
532 	}
533 #endif /* INET6 */
534 
535 	nam->m_len = sizeof (*sin);
536 	sin = mtod(nam, struct sockaddr_in *);
537 	bzero((caddr_t)sin, sizeof (*sin));
538 	sin->sin_family = AF_INET;
539 	sin->sin_len = sizeof(*sin);
540 	sin->sin_port = inp->inp_fport;
541 	sin->sin_addr = inp->inp_faddr;
542 }
543 
544 /*
545  * Pass some notification to all connections of a protocol
546  * associated with address dst.  The local address and/or port numbers
547  * may be specified to limit the search.  The "usual action" will be
548  * taken, depending on the ctlinput cmd.  The caller must filter any
549  * cmds that are uninteresting (e.g., no error in the map).
550  * Call the protocol specific routine (if any) to report
551  * any errors for each matching socket.
552  *
553  * Must be called at splsoftnet.
554  */
555 void
556 in_pcbnotify(table, dst, fport_arg, laddr, lport_arg, errno, notify)
557 	struct inpcbtable *table;
558 	struct sockaddr *dst;
559 	u_int fport_arg, lport_arg;
560 	struct in_addr laddr;
561 	int errno;
562 	void (*notify)(struct inpcb *, int);
563 {
564 	struct inpcb *inp, *oinp;
565 	struct in_addr faddr;
566 	u_int16_t fport = fport_arg, lport = lport_arg;
567 
568 	splassert(IPL_SOFTNET);
569 
570 #ifdef INET6
571 	/*
572 	 * See in6_pcbnotify() for IPv6 codepath.  By the time this
573 	 * gets called, the addresses passed are either definitely IPv4 or
574 	 * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses.
575 	 */
576 #endif /* INET6 */
577 
578 	if (dst->sa_family != AF_INET)
579 		return;
580 	faddr = satosin(dst)->sin_addr;
581 	if (faddr.s_addr == INADDR_ANY)
582 		return;
583 
584 	for (inp = table->inpt_queue.cqh_first;
585 	    inp != (struct inpcb *)&table->inpt_queue;) {
586 #ifdef INET6
587 		if (inp->inp_flags & INP_IPV6) {
588 			inp = inp->inp_queue.cqe_next;
589 			continue;
590 		}
591 #endif
592 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
593 		    inp->inp_socket == 0 ||
594 		    inp->inp_fport != fport ||
595 		    inp->inp_lport != lport ||
596 		    inp->inp_laddr.s_addr != laddr.s_addr) {
597 			inp = inp->inp_queue.cqe_next;
598 			continue;
599 		}
600 		oinp = inp;
601 		inp = inp->inp_queue.cqe_next;
602 		if (notify)
603 			(*notify)(oinp, errno);
604 	}
605 }
606 
607 void
608 in_pcbnotifyall(table, dst, errno, notify)
609 	struct inpcbtable *table;
610 	struct sockaddr *dst;
611 	int errno;
612 	void (*notify)(struct inpcb *, int);
613 {
614 	register struct inpcb *inp, *oinp;
615 	struct in_addr faddr;
616 
617 #ifdef INET6
618 	/*
619 	 * See in6_pcbnotify() for IPv6 codepath.  By the time this
620 	 * gets called, the addresses passed are either definitely IPv4 or
621 	 * IPv6; *_pcbnotify() never gets called with v4-mapped v6 addresses.
622 	 */
623 #endif /* INET6 */
624 
625 	if (dst->sa_family != AF_INET)
626 		return;
627 	faddr = satosin(dst)->sin_addr;
628 	if (faddr.s_addr == INADDR_ANY)
629 		return;
630 
631 	for (inp = table->inpt_queue.cqh_first;
632 	    inp != (struct inpcb *)&table->inpt_queue;) {
633 #ifdef INET6
634 		if (inp->inp_flags & INP_IPV6) {
635 			inp = inp->inp_queue.cqe_next;
636 			continue;
637 		}
638 #endif
639 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
640 		    inp->inp_socket == 0) {
641 			inp = inp->inp_queue.cqe_next;
642 			continue;
643 		}
644 		oinp = inp;
645 		inp = inp->inp_queue.cqe_next;
646 		if (notify)
647 			(*notify)(oinp, errno);
648 	}
649 }
650 
651 /*
652  * Check for alternatives when higher level complains
653  * about service problems.  For now, invalidate cached
654  * routing information.  If the route was created dynamically
655  * (by a redirect), time to try a default gateway again.
656  */
657 void
658 in_losing(inp)
659 	struct inpcb *inp;
660 {
661 	register struct rtentry *rt;
662 	struct rt_addrinfo info;
663 
664 	if ((rt = inp->inp_route.ro_rt)) {
665 		inp->inp_route.ro_rt = 0;
666 		bzero((caddr_t)&info, sizeof(info));
667 		info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst;
668 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
669 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
670 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
671 		if (rt->rt_flags & RTF_DYNAMIC)
672 			(void) rtrequest(RTM_DELETE, rt_key(rt),
673 				rt->rt_gateway, rt_mask(rt), rt->rt_flags,
674 				(struct rtentry **)0);
675 		else
676 		/*
677 		 * A new route can be allocated
678 		 * the next time output is attempted.
679 		 */
680 			rtfree(rt);
681 	}
682 }
683 
684 /*
685  * After a routing change, flush old routing
686  * and allocate a (hopefully) better one.
687  */
688 void
689 in_rtchange(inp, errno)
690 	register struct inpcb *inp;
691 	int errno;
692 {
693 	if (inp->inp_route.ro_rt) {
694 		rtfree(inp->inp_route.ro_rt);
695 		inp->inp_route.ro_rt = 0;
696 		/*
697 		 * A new route can be allocated the next time
698 		 * output is attempted.
699 		 */
700 	}
701 }
702 
703 struct inpcb *
704 in_pcblookup(table, faddrp, fport_arg, laddrp, lport_arg, flags)
705 	struct inpcbtable *table;
706 	void *faddrp, *laddrp;
707 	u_int fport_arg, lport_arg;
708 	int flags;
709 {
710 	register struct inpcb *inp, *match = 0;
711 	int matchwild = 3, wildcard;
712 	u_int16_t fport = fport_arg, lport = lport_arg;
713 	struct in_addr faddr = *(struct in_addr *)faddrp;
714 	struct in_addr laddr = *(struct in_addr *)laddrp;
715 
716 	for (inp = table->inpt_queue.cqh_first;
717 	    inp != (struct inpcb *)&table->inpt_queue;
718 	    inp = inp->inp_queue.cqe_next) {
719 		if (inp->inp_lport != lport)
720 			continue;
721 		wildcard = 0;
722 #ifdef INET6
723 		if (flags & INPLOOKUP_IPV6) {
724 			struct in6_addr *laddr6 = (struct in6_addr *)laddrp;
725 			struct in6_addr *faddr6 = (struct in6_addr *)faddrp;
726 
727 			if (!(inp->inp_flags & INP_IPV6))
728 				continue;
729 
730 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
731 				if (IN6_IS_ADDR_UNSPECIFIED(laddr6))
732 					wildcard++;
733 				else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr6))
734 					continue;
735 			} else {
736 				if (!IN6_IS_ADDR_UNSPECIFIED(laddr6))
737 					wildcard++;
738 			}
739 
740 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) {
741 				if (IN6_IS_ADDR_UNSPECIFIED(faddr6))
742 					wildcard++;
743 				else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6,
744 				    faddr6) || inp->inp_fport != fport)
745 					continue;
746 			} else {
747 				if (!IN6_IS_ADDR_UNSPECIFIED(faddr6))
748 					wildcard++;
749 			}
750 		} else
751 #endif /* INET6 */
752 		{
753 #ifdef INET6
754 		        if (inp->inp_flags & INP_IPV6)
755 			        continue;
756 #endif /* INET6 */
757 
758 			if (inp->inp_faddr.s_addr != INADDR_ANY) {
759 				if (faddr.s_addr == INADDR_ANY)
760 					wildcard++;
761 				else if (inp->inp_faddr.s_addr != faddr.s_addr ||
762 				    inp->inp_fport != fport)
763 					continue;
764 			} else {
765 				if (faddr.s_addr != INADDR_ANY)
766 					wildcard++;
767 			}
768 			if (inp->inp_laddr.s_addr != INADDR_ANY) {
769 				if (laddr.s_addr == INADDR_ANY)
770 					wildcard++;
771 				else if (inp->inp_laddr.s_addr != laddr.s_addr)
772 					continue;
773 			} else {
774 				if (laddr.s_addr != INADDR_ANY)
775 					wildcard++;
776 			}
777 		}
778 		if ((!wildcard || (flags & INPLOOKUP_WILDCARD)) &&
779 		    wildcard < matchwild) {
780 			match = inp;
781 			if ((matchwild = wildcard) == 0)
782 				break;
783 		}
784 	}
785 	return (match);
786 }
787 
788 struct rtentry *
789 in_pcbrtentry(inp)
790 	struct inpcb *inp;
791 {
792 	struct route *ro;
793 
794 	ro = &inp->inp_route;
795 
796 	/*
797 	 * No route yet, so try to acquire one.
798 	 */
799 	if (ro->ro_rt == NULL) {
800 #ifdef INET6
801 		bzero(ro, sizeof(struct route_in6));
802 #else
803 		bzero(ro, sizeof(struct route));
804 #endif
805 
806 		switch(sotopf(inp->inp_socket)) {
807 #ifdef INET6
808 		case PF_INET6:
809 			if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
810 				break;
811 			ro->ro_dst.sa_family = AF_INET6;
812 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in6);
813 			((struct sockaddr_in6 *) &ro->ro_dst)->sin6_addr =
814 			    inp->inp_faddr6;
815 			rtalloc(ro);
816 			break;
817 #endif /* INET6 */
818 		case PF_INET:
819 			if (inp->inp_faddr.s_addr == INADDR_ANY)
820 				break;
821 			ro->ro_dst.sa_family = AF_INET;
822 			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
823 			satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr;
824 			rtalloc(ro);
825 			break;
826 		}
827 	}
828 	return (ro->ro_rt);
829 }
830 
831 struct sockaddr_in *
832 in_selectsrc(sin, ro, soopts, mopts, errorp)
833 	struct sockaddr_in *sin;
834 	struct route *ro;
835 	int soopts;
836 	struct ip_moptions *mopts;
837 	int *errorp;
838 {
839 	struct sockaddr_in *sin2;
840 	struct in_ifaddr *ia;
841 
842 	ia = (struct in_ifaddr *)0;
843 	/*
844 	 * If route is known or can be allocated now,
845 	 * our src addr is taken from the i/f, else punt.
846 	 */
847 	if (ro->ro_rt &&
848 	    (satosin(&ro->ro_dst)->sin_addr.s_addr !=
849 		sin->sin_addr.s_addr ||
850 	    soopts & SO_DONTROUTE)) {
851 		RTFREE(ro->ro_rt);
852 		ro->ro_rt = (struct rtentry *)0;
853 	}
854 	if ((soopts & SO_DONTROUTE) == 0 && /*XXX*/
855 	    (ro->ro_rt == (struct rtentry *)0 ||
856 	    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
857 		/* No route yet, so try to acquire one */
858 		ro->ro_dst.sa_family = AF_INET;
859 		ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
860 		satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
861 		rtalloc(ro);
862 
863 		/*
864 		 * It is important to bzero out the rest of the
865 		 * struct sockaddr_in when mixing v6 & v4!
866 		 */
867 		sin2 = (struct sockaddr_in *)&ro->ro_dst;
868 		bzero(sin2->sin_zero, sizeof(sin2->sin_zero));
869 	}
870 	/*
871 	 * If we found a route, use the address
872 	 * corresponding to the outgoing interface
873 	 * unless it is the loopback (in case a route
874 	 * to our address on another net goes to loopback).
875 	 */
876 	if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
877 		ia = ifatoia(ro->ro_rt->rt_ifa);
878 	if (ia == 0) {
879 		u_int16_t fport = sin->sin_port;
880 
881 		sin->sin_port = 0;
882 		ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
883 		if (ia == 0)
884 			ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
885 		sin->sin_port = fport;
886 		if (ia == 0)
887 			ia = in_ifaddr.tqh_first;
888 		if (ia == 0) {
889 			*errorp = EADDRNOTAVAIL;
890 			return NULL;
891 		}
892 	}
893 	/*
894 	 * If the destination address is multicast and an outgoing
895 	 * interface has been set as a multicast option, use the
896 	 * address of that interface as our source address.
897 	 */
898 	if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) {
899 		struct ip_moptions *imo;
900 		struct ifnet *ifp;
901 
902 		imo = mopts;
903 		if (imo->imo_multicast_ifp != NULL) {
904 			ifp = imo->imo_multicast_ifp;
905 			for (ia = in_ifaddr.tqh_first; ia != 0;
906 			    ia = ia->ia_list.tqe_next)
907 				if (ia->ia_ifp == ifp)
908 					break;
909 			if (ia == 0) {
910 				*errorp = EADDRNOTAVAIL;
911 				return NULL;
912 			}
913 		}
914 	}
915 	return satosin(&ia->ia_addr);
916 }
917 
918 void
919 in_pcbrehash(inp)
920 	struct inpcb *inp;
921 {
922 	struct inpcbtable *table = inp->inp_table;
923 	int s;
924 
925 	s = splnet();
926 	LIST_REMOVE(inp, inp_hash);
927 #ifdef INET6
928 	if (inp->inp_flags & INP_IPV6) {
929 		LIST_INSERT_HEAD(IN6PCBHASH(table, &inp->inp_faddr6,
930 		    inp->inp_fport, &inp->inp_laddr6, inp->inp_lport),
931 		    inp, inp_hash);
932 	} else {
933 #endif /* INET6 */
934 		LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr,
935 		    inp->inp_fport, &inp->inp_laddr, inp->inp_lport),
936 		    inp, inp_hash);
937 #ifdef INET6
938 	}
939 #endif /* INET6 */
940 	splx(s);
941 }
942 
943 #ifdef DIAGNOSTIC
944 int	in_pcbnotifymiss = 0;
945 #endif
946 
947 struct inpcb *
948 in_pcbhashlookup(table, faddr, fport_arg, laddr, lport_arg)
949 	struct inpcbtable *table;
950 	struct in_addr faddr, laddr;
951 	u_int fport_arg, lport_arg;
952 {
953 	struct inpcbhead *head;
954 	register struct inpcb *inp;
955 	u_int16_t fport = fport_arg, lport = lport_arg;
956 
957 	head = INPCBHASH(table, &faddr, fport, &laddr, lport);
958 	for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
959 #ifdef INET6
960 		if (inp->inp_flags & INP_IPV6)
961 			continue;	/*XXX*/
962 #endif
963 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
964 		    inp->inp_fport == fport &&
965 		    inp->inp_lport == lport &&
966 		    inp->inp_laddr.s_addr == laddr.s_addr) {
967 			/*
968 			 * Move this PCB to the head of hash chain so that
969 			 * repeated accesses are quicker.  This is analogous to
970 			 * the historic single-entry PCB cache.
971 			 */
972 			if (inp != head->lh_first) {
973 				LIST_REMOVE(inp, inp_hash);
974 				LIST_INSERT_HEAD(head, inp, inp_hash);
975 			}
976 			break;
977 		}
978 	}
979 #ifdef DIAGNOSTIC
980 	if (inp == NULL && in_pcbnotifymiss) {
981 		printf("in_pcbhashlookup: faddr=%08x fport=%d laddr=%08x lport=%d\n",
982 		    ntohl(faddr.s_addr), ntohs(fport),
983 		    ntohl(laddr.s_addr), ntohs(lport));
984 	}
985 #endif
986 	return (inp);
987 }
988 
989 #ifdef INET6
990 struct inpcb *
991 in6_pcbhashlookup(table, faddr, fport_arg, laddr, lport_arg)
992 	struct inpcbtable *table;
993 	struct in6_addr *faddr, *laddr;
994 	u_int fport_arg, lport_arg;
995 {
996 	struct inpcbhead *head;
997 	register struct inpcb *inp;
998 	u_int16_t fport = fport_arg, lport = lport_arg;
999 
1000 	head = IN6PCBHASH(table, faddr, fport, laddr, lport);
1001 	for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
1002 		if (!(inp->inp_flags & INP_IPV6))
1003 			continue;
1004 		if (IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) &&
1005 		    inp->inp_fport == fport && inp->inp_lport == lport &&
1006 		    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr)) {
1007 			/*
1008 			 * Move this PCB to the head of hash chain so that
1009 			 * repeated accesses are quicker.  This is analogous to
1010 			 * the historic single-entry PCB cache.
1011 			 */
1012 			if (inp != head->lh_first) {
1013 				LIST_REMOVE(inp, inp_hash);
1014 				LIST_INSERT_HEAD(head, inp, inp_hash);
1015 			}
1016 			break;
1017 		}
1018 	}
1019 #ifdef DIAGNOSTIC
1020 	if (inp == NULL && in_pcbnotifymiss) {
1021 		printf("in6_pcblookup_connect: faddr=");
1022 		printf(" fport=%d laddr=", ntohs(fport));
1023 		printf(" lport=%d\n", ntohs(lport));
1024 	}
1025 #endif
1026 	return (inp);
1027 }
1028 #endif /* INET6 */
1029 
1030 
1031