xref: /netbsd-src/sys/netinet/in_pcb.c (revision 5aefcfdc06931dd97e76246d2fe0302f7b3fe094)
1 /*	$NetBSD: in_pcb.c,v 1.68 2000/11/08 14:28:14 ad Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1998 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to The NetBSD Foundation
37  * by Public Access Networks Corporation ("Panix").  It was developed under
38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *	This product includes software developed by the NetBSD
51  *	Foundation, Inc. and its contributors.
52  * 4. Neither the name of The NetBSD Foundation nor the names of its
53  *    contributors may be used to endorse or promote products derived
54  *    from this software without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 /*
70  * Copyright (c) 1982, 1986, 1991, 1993, 1995
71  *	The Regents of the University of California.  All rights reserved.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. All advertising materials mentioning features or use of this software
82  *    must display the following acknowledgement:
83  *	This product includes software developed by the University of
84  *	California, Berkeley and its contributors.
85  * 4. Neither the name of the University nor the names of its contributors
86  *    may be used to endorse or promote products derived from this software
87  *    without specific prior written permission.
88  *
89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  *
101  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
102  */
103 
104 #include "opt_ipsec.h"
105 
106 #include <sys/param.h>
107 #include <sys/systm.h>
108 #include <sys/malloc.h>
109 #include <sys/mbuf.h>
110 #include <sys/protosw.h>
111 #include <sys/socket.h>
112 #include <sys/socketvar.h>
113 #include <sys/ioctl.h>
114 #include <sys/errno.h>
115 #include <sys/time.h>
116 #include <sys/pool.h>
117 #include <sys/proc.h>
118 
119 #include <net/if.h>
120 #include <net/route.h>
121 
122 #include <netinet/in.h>
123 #include <netinet/in_systm.h>
124 #include <netinet/ip.h>
125 #include <netinet/in_pcb.h>
126 #include <netinet/in_var.h>
127 #include <netinet/ip_var.h>
128 
129 #ifdef IPSEC
130 #include <netinet6/ipsec.h>
131 #include <netkey/key.h>
132 #endif /* IPSEC */
133 
134 struct	in_addr zeroin_addr;
135 
136 #define	INPCBHASH_BIND(table, laddr, lport) \
137 	&(table)->inpt_bindhashtbl[ \
138 	    ((ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_bindhash]
139 #define	INPCBHASH_CONNECT(table, faddr, fport, laddr, lport) \
140 	&(table)->inpt_connecthashtbl[ \
141 	    ((ntohl((faddr).s_addr) + ntohs(fport)) + \
142 	     (ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_connecthash]
143 
144 struct inpcb *
145 	in_pcblookup_port __P((struct inpcbtable *,
146 	    struct in_addr, u_int, int));
147 
148 int	anonportmin = IPPORT_ANONMIN;
149 int	anonportmax = IPPORT_ANONMAX;
150 int	lowportmin  = IPPORT_RESERVEDMIN;
151 int	lowportmax  = IPPORT_RESERVEDMAX;
152 
153 struct pool inpcb_pool;
154 
155 void
156 in_pcbinit(table, bindhashsize, connecthashsize)
157 	struct inpcbtable *table;
158 	int bindhashsize, connecthashsize;
159 {
160 	static int inpcb_pool_initialized;
161 
162 	if (inpcb_pool_initialized == 0) {
163 		pool_init(&inpcb_pool, sizeof(struct inpcb), 0, 0, 0,
164 		    "inpcbpl", 0, NULL, NULL, M_PCB);
165 		inpcb_pool_initialized = 1;
166 	}
167 
168 	CIRCLEQ_INIT(&table->inpt_queue);
169 	table->inpt_bindhashtbl = hashinit(bindhashsize, HASH_LIST, M_PCB,
170 	    M_WAITOK, &table->inpt_bindhash);
171 	table->inpt_connecthashtbl = hashinit(connecthashsize, HASH_LIST,
172 	    M_PCB, M_WAITOK, &table->inpt_connecthash);
173 	table->inpt_lastlow = IPPORT_RESERVEDMAX;
174 	table->inpt_lastport = (u_int16_t)anonportmax;
175 }
176 
177 int
178 in_pcballoc(so, v)
179 	struct socket *so;
180 	void *v;
181 {
182 	struct inpcbtable *table = v;
183 	struct inpcb *inp;
184 	int s;
185 
186 	inp = pool_get(&inpcb_pool, PR_NOWAIT);
187 	if (inp == NULL)
188 		return (ENOBUFS);
189 	bzero((caddr_t)inp, sizeof(*inp));
190 	inp->inp_table = table;
191 	inp->inp_socket = so;
192 	inp->inp_errormtu = -1;
193 	so->so_pcb = inp;
194 	s = splnet();
195 	CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue);
196 	in_pcbstate(inp, INP_ATTACHED);
197 	splx(s);
198 	return (0);
199 }
200 
201 int
202 in_pcbbind(v, nam, p)
203 	void *v;
204 	struct mbuf *nam;
205 	struct proc *p;
206 {
207 	struct inpcb *inp = v;
208 	struct socket *so = inp->inp_socket;
209 	struct inpcbtable *table = inp->inp_table;
210 	struct sockaddr_in *sin;
211 	u_int16_t lport = 0;
212 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
213 #ifndef IPNOPRIVPORTS
214 	int error;
215 #endif
216 
217 	if (in_ifaddr.tqh_first == 0)
218 		return (EADDRNOTAVAIL);
219 	if (inp->inp_lport || !in_nullhost(inp->inp_laddr))
220 		return (EINVAL);
221 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
222 		wild = 1;
223 	if (nam == 0)
224 		goto noname;
225 	sin = mtod(nam, struct sockaddr_in *);
226 	if (nam->m_len != sizeof (*sin))
227 		return (EINVAL);
228 #ifdef notdef
229 	/*
230 	 * We should check the family, but old programs
231 	 * incorrectly fail to initialize it.
232 	 */
233 	if (sin->sin_family != AF_INET)
234 		return (EAFNOSUPPORT);
235 #endif
236 	lport = sin->sin_port;
237 	if (IN_MULTICAST(sin->sin_addr.s_addr)) {
238 		/*
239 		 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
240 		 * allow complete duplication of binding if
241 		 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
242 		 * and a multicast address is bound on both
243 		 * new and duplicated sockets.
244 		 */
245 		if (so->so_options & SO_REUSEADDR)
246 			reuseport = SO_REUSEADDR|SO_REUSEPORT;
247 	} else if (!in_nullhost(sin->sin_addr)) {
248 		sin->sin_port = 0;		/* yech... */
249 		if (ifa_ifwithaddr(sintosa(sin)) == 0)
250 			return (EADDRNOTAVAIL);
251 	}
252 	if (lport) {
253 		struct inpcb *t;
254 #ifndef IPNOPRIVPORTS
255 		/* GROSS */
256 		if (ntohs(lport) < IPPORT_RESERVED &&
257 		    (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))))
258 			return (EACCES);
259 #endif
260 		if (so->so_uid && !IN_MULTICAST(sin->sin_addr.s_addr)) {
261 			t = in_pcblookup_port(table, sin->sin_addr, lport, 1);
262 		/*
263 		 * XXX:	investigate ramifications of loosening this
264 		 *	restriction so that as long as both ports have
265 		 *	SO_REUSEPORT allow the bind
266 		 */
267 			if (t &&
268 			    (!in_nullhost(sin->sin_addr) ||
269 			     !in_nullhost(t->inp_laddr) ||
270 			     (t->inp_socket->so_options & SO_REUSEPORT) == 0)
271 			    && (so->so_uid != t->inp_socket->so_uid)) {
272 				return (EADDRINUSE);
273 			}
274 		}
275 		t = in_pcblookup_port(table, sin->sin_addr, lport, wild);
276 		if (t && (reuseport & t->inp_socket->so_options) == 0)
277 			return (EADDRINUSE);
278 	}
279 	inp->inp_laddr = sin->sin_addr;
280 
281 noname:
282 	if (lport == 0) {
283 		int	   cnt;
284 		u_int16_t  min, max;
285 		u_int16_t *lastport;
286 
287 		if (inp->inp_flags & INP_LOWPORT) {
288 #ifndef IPNOPRIVPORTS
289 			if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag)))
290 				return (EACCES);
291 #endif
292 			min = lowportmin;
293 			max = lowportmax;
294 			lastport = &table->inpt_lastlow;
295 		} else {
296 			min = anonportmin;
297 			max = anonportmax;
298 			lastport = &table->inpt_lastport;
299 		}
300 		if (min > max) {	/* sanity check */
301 			u_int16_t swp;
302 
303 			swp = min;
304 			min = max;
305 			max = swp;
306 		}
307 
308 		lport = *lastport - 1;
309 		for (cnt = max - min + 1; cnt; cnt--, lport--) {
310 			if (lport < min || lport > max)
311 				lport = max;
312 			if (!in_pcblookup_port(table, inp->inp_laddr,
313 			    htons(lport), 1))
314 				goto found;
315 		}
316 		if (!in_nullhost(inp->inp_laddr))
317 			inp->inp_laddr.s_addr = INADDR_ANY;
318 		return (EAGAIN);
319 	found:
320 		inp->inp_flags |= INP_ANONPORT;
321 		*lastport = lport;
322 		lport = htons(lport);
323 	}
324 	inp->inp_lport = lport;
325 	in_pcbstate(inp, INP_BOUND);
326 	return (0);
327 }
328 
329 /*
330  * Connect from a socket to a specified address.
331  * Both address and port must be specified in argument sin.
332  * If don't have a local address for this socket yet,
333  * then pick one.
334  */
335 int
336 in_pcbconnect(v, nam)
337 	void *v;
338 	struct mbuf *nam;
339 {
340 	struct inpcb *inp = v;
341 	struct in_ifaddr *ia;
342 	struct sockaddr_in *ifaddr = NULL;
343 	struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
344 	int error;
345 
346 	if (nam->m_len != sizeof (*sin))
347 		return (EINVAL);
348 	if (sin->sin_family != AF_INET)
349 		return (EAFNOSUPPORT);
350 	if (sin->sin_port == 0)
351 		return (EADDRNOTAVAIL);
352 	if (in_ifaddr.tqh_first != 0) {
353 		/*
354 		 * If the destination address is INADDR_ANY,
355 		 * use any local address (likely loopback).
356 		 * If the supplied address is INADDR_BROADCAST,
357 		 * use the broadcast address of an interface
358 		 * which supports broadcast. (loopback does not)
359 		 */
360 
361 		if (in_nullhost(sin->sin_addr))
362 			sin->sin_addr = in_ifaddr.tqh_first->ia_addr.sin_addr;
363 		else if (sin->sin_addr.s_addr == INADDR_BROADCAST)
364 		    for (ia = in_ifaddr.tqh_first; ia != NULL;
365 		      ia = ia->ia_list.tqe_next)
366 			if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
367 			    sin->sin_addr = ia->ia_broadaddr.sin_addr;
368 			    break;
369 			}
370 	}
371 	/*
372 	 * If we haven't bound which network number to use as ours,
373 	 * we will use the number of the outgoing interface.
374 	 * This depends on having done a routing lookup, which
375 	 * we will probably have to do anyway, so we might
376 	 * as well do it now.  On the other hand if we are
377 	 * sending to multiple destinations we may have already
378 	 * done the lookup, so see if we can use the route
379 	 * from before.  In any case, we only
380 	 * chose a port number once, even if sending to multiple
381 	 * destinations.
382 	 */
383 	if (in_nullhost(inp->inp_laddr)) {
384 #if 0
385 		struct route *ro;
386 
387 		ia = (struct in_ifaddr *)0;
388 		/*
389 		 * If route is known or can be allocated now,
390 		 * our src addr is taken from the i/f, else punt.
391 		 */
392 		ro = &inp->inp_route;
393 		if (ro->ro_rt &&
394 		    (!in_hosteq(satosin(&ro->ro_dst)->sin_addr,
395 			sin->sin_addr) ||
396 		    inp->inp_socket->so_options & SO_DONTROUTE)) {
397 			RTFREE(ro->ro_rt);
398 			ro->ro_rt = (struct rtentry *)0;
399 		}
400 		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
401 		    (ro->ro_rt == (struct rtentry *)0 ||
402 		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
403 			/* No route yet, so try to acquire one */
404 			ro->ro_dst.sa_family = AF_INET;
405 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
406 			satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
407 			rtalloc(ro);
408 		}
409 		/*
410 		 * If we found a route, use the address
411 		 * corresponding to the outgoing interface
412 		 * unless it is the loopback (in case a route
413 		 * to our address on another net goes to loopback).
414 		 *
415 		 * XXX Is this still true?  Do we care?
416 		 */
417 		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
418 			ia = ifatoia(ro->ro_rt->rt_ifa);
419 		if (ia == NULL) {
420 			u_int16_t fport = sin->sin_port;
421 
422 			sin->sin_port = 0;
423 			ia = ifatoia(ifa_ifwithladdr(sintosa(sin)));
424 			sin->sin_port = fport;
425 			if (ia == 0) {
426 				/* Find 1st non-loopback AF_INET address */
427 				for (ia = in_ifaddr.tqh_first ; ia != NULL;
428 				     ia = ia->ia_list.tqe_next) {
429 					if ((ia->ia_ifp->if_flags &
430 					     IFF_LOOPBACK) == 0)
431 						break;
432 				}
433 			}
434 			if (ia == NULL)
435 				return (EADDRNOTAVAIL);
436 		}
437 		/*
438 		 * If the destination address is multicast and an outgoing
439 		 * interface has been set as a multicast option, use the
440 		 * address of that interface as our source address.
441 		 */
442 		if (IN_MULTICAST(sin->sin_addr.s_addr) &&
443 		    inp->inp_moptions != NULL) {
444 			struct ip_moptions *imo;
445 			struct ifnet *ifp;
446 
447 			imo = inp->inp_moptions;
448 			if (imo->imo_multicast_ifp != NULL) {
449 				ifp = imo->imo_multicast_ifp;
450 				IFP_TO_IA(ifp, ia);		/* XXX */
451 				if (ia == 0)
452 					return (EADDRNOTAVAIL);
453 			}
454 		}
455 		ifaddr = satosin(&ia->ia_addr);
456 #else
457 		int error;
458 		ifaddr = in_selectsrc(sin, &inp->inp_route,
459 			inp->inp_socket->so_options, inp->inp_moptions, &error);
460 		if (ifaddr == NULL) {
461 			if (error == 0)
462 				error = EADDRNOTAVAIL;
463 			return error;
464 		}
465 #endif
466 	}
467 	if (in_pcblookup_connect(inp->inp_table, sin->sin_addr, sin->sin_port,
468 	    !in_nullhost(inp->inp_laddr) ? inp->inp_laddr : ifaddr->sin_addr,
469 	    inp->inp_lport) != 0)
470 		return (EADDRINUSE);
471 	if (in_nullhost(inp->inp_laddr)) {
472 		if (inp->inp_lport == 0) {
473 			error = in_pcbbind(inp, (struct mbuf *)0,
474 			    (struct proc *)0);
475 			/*
476 			 * This used to ignore the return value
477 			 * completely, but we need to check for
478 			 * ephemeral port shortage.
479 			 * XXX Should we check for other errors, too?
480 			 */
481 			if (error == EAGAIN)
482 				return (error);
483 		}
484 		inp->inp_laddr = ifaddr->sin_addr;
485 	}
486 	inp->inp_faddr = sin->sin_addr;
487 	inp->inp_fport = sin->sin_port;
488 	in_pcbstate(inp, INP_CONNECTED);
489 	return (0);
490 }
491 
492 void
493 in_pcbdisconnect(v)
494 	void *v;
495 {
496 	struct inpcb *inp = v;
497 
498 	inp->inp_faddr = zeroin_addr;
499 	inp->inp_fport = 0;
500 	in_pcbstate(inp, INP_BOUND);
501 	if (inp->inp_socket->so_state & SS_NOFDREF)
502 		in_pcbdetach(inp);
503 }
504 
505 void
506 in_pcbdetach(v)
507 	void *v;
508 {
509 	struct inpcb *inp = v;
510 	struct socket *so = inp->inp_socket;
511 	int s;
512 
513 #ifdef IPSEC
514 	ipsec4_delete_pcbpolicy(inp);
515 #endif /*IPSEC*/
516 	so->so_pcb = 0;
517 	sofree(so);
518 	if (inp->inp_options)
519 		(void)m_free(inp->inp_options);
520 	if (inp->inp_route.ro_rt)
521 		rtfree(inp->inp_route.ro_rt);
522 	ip_freemoptions(inp->inp_moptions);
523 	s = splnet();
524 	in_pcbstate(inp, INP_ATTACHED);
525 	CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue);
526 	splx(s);
527 	pool_put(&inpcb_pool, inp);
528 }
529 
530 void
531 in_setsockaddr(inp, nam)
532 	struct inpcb *inp;
533 	struct mbuf *nam;
534 {
535 	struct sockaddr_in *sin;
536 
537 	nam->m_len = sizeof (*sin);
538 	sin = mtod(nam, struct sockaddr_in *);
539 	bzero((caddr_t)sin, sizeof (*sin));
540 	sin->sin_family = AF_INET;
541 	sin->sin_len = sizeof(*sin);
542 	sin->sin_port = inp->inp_lport;
543 	sin->sin_addr = inp->inp_laddr;
544 }
545 
546 void
547 in_setpeeraddr(inp, nam)
548 	struct inpcb *inp;
549 	struct mbuf *nam;
550 {
551 	struct sockaddr_in *sin;
552 
553 	nam->m_len = sizeof (*sin);
554 	sin = mtod(nam, struct sockaddr_in *);
555 	bzero((caddr_t)sin, sizeof (*sin));
556 	sin->sin_family = AF_INET;
557 	sin->sin_len = sizeof(*sin);
558 	sin->sin_port = inp->inp_fport;
559 	sin->sin_addr = inp->inp_faddr;
560 }
561 
562 /*
563  * Pass some notification to all connections of a protocol
564  * associated with address dst.  The local address and/or port numbers
565  * may be specified to limit the search.  The "usual action" will be
566  * taken, depending on the ctlinput cmd.  The caller must filter any
567  * cmds that are uninteresting (e.g., no error in the map).
568  * Call the protocol specific routine (if any) to report
569  * any errors for each matching socket.
570  *
571  * Must be called at splsoftnet.
572  */
573 int
574 in_pcbnotify(table, faddr, fport_arg, laddr, lport_arg, errno, notify)
575 	struct inpcbtable *table;
576 	struct in_addr faddr, laddr;
577 	u_int fport_arg, lport_arg;
578 	int errno;
579 	void (*notify) __P((struct inpcb *, int));
580 {
581 	struct inpcbhead *head;
582 	struct inpcb *inp, *ninp;
583 	u_int16_t fport = fport_arg, lport = lport_arg;
584 	int nmatch;
585 
586 	if (in_nullhost(faddr) || notify == 0)
587 		return (0);
588 
589 	nmatch = 0;
590 	head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport);
591 	for (inp = head->lh_first; inp != NULL; inp = ninp) {
592 		ninp = inp->inp_hash.le_next;
593 		if (in_hosteq(inp->inp_faddr, faddr) &&
594 		    inp->inp_fport == fport &&
595 		    inp->inp_lport == lport &&
596 		    in_hosteq(inp->inp_laddr, laddr)) {
597 			(*notify)(inp, errno);
598 			nmatch++;
599 		}
600 	}
601 	return (nmatch);
602 }
603 
604 void
605 in_pcbnotifyall(table, faddr, errno, notify)
606 	struct inpcbtable *table;
607 	struct in_addr faddr;
608 	int errno;
609 	void (*notify) __P((struct inpcb *, int));
610 {
611 	struct inpcb *inp, *ninp;
612 
613 	if (in_nullhost(faddr) || notify == 0)
614 		return;
615 
616 	for (inp = table->inpt_queue.cqh_first;
617 	    inp != (struct inpcb *)&table->inpt_queue;
618 	    inp = ninp) {
619 		ninp = inp->inp_queue.cqe_next;
620 		if (in_hosteq(inp->inp_faddr, faddr))
621 			(*notify)(inp, errno);
622 	}
623 }
624 
625 void
626 in_pcbpurgeif(table, ifp)
627 	struct inpcbtable *table;
628 	struct ifnet *ifp;
629 {
630 	struct inpcb *inp, *ninp;
631 	struct ip_moptions *imo;
632 	int i, gap;
633 
634 	for (inp = table->inpt_queue.cqh_first;
635 	    inp != (struct inpcb *)&table->inpt_queue;
636 	    inp = ninp) {
637 		ninp = inp->inp_queue.cqe_next;
638 		if (inp->inp_route.ro_rt != NULL &&
639 		    inp->inp_route.ro_rt->rt_ifp == ifp)
640 			in_rtchange(inp, 0);
641 		imo = inp->inp_moptions;
642 		if (imo != NULL) {
643 			/*
644 			 * Unselect the outgoing interface if it is being
645 			 * detached.
646 			 */
647 			if (imo->imo_multicast_ifp == ifp)
648 				imo->imo_multicast_ifp = NULL;
649 
650 			/*
651 			 * Drop multicast group membership if we joined
652 			 * through the interface being detached.
653 			 */
654 			for (i = 0, gap = 0; i < imo->imo_num_memberships;
655 			    i++) {
656 				if (imo->imo_membership[i]->inm_ifp == ifp) {
657 					in_delmulti(imo->imo_membership[i]);
658 					gap++;
659 				} else if (gap != 0)
660 					imo->imo_membership[i - gap] =
661 					    imo->imo_membership[i];
662 			}
663 			imo->imo_num_memberships -= gap;
664 		}
665 	}
666 }
667 
668 /*
669  * Check for alternatives when higher level complains
670  * about service problems.  For now, invalidate cached
671  * routing information.  If the route was created dynamically
672  * (by a redirect), time to try a default gateway again.
673  */
674 void
675 in_losing(inp)
676 	struct inpcb *inp;
677 {
678 	struct rtentry *rt;
679 	struct rt_addrinfo info;
680 
681 	if ((rt = inp->inp_route.ro_rt)) {
682 		inp->inp_route.ro_rt = 0;
683 		bzero((caddr_t)&info, sizeof(info));
684 		info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst;
685 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
686 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
687 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
688 		if (rt->rt_flags & RTF_DYNAMIC)
689 			(void) rtrequest(RTM_DELETE, rt_key(rt),
690 				rt->rt_gateway, rt_mask(rt), rt->rt_flags,
691 				(struct rtentry **)0);
692 		else
693 		/*
694 		 * A new route can be allocated
695 		 * the next time output is attempted.
696 		 */
697 			rtfree(rt);
698 	}
699 }
700 
701 /*
702  * After a routing change, flush old routing
703  * and allocate a (hopefully) better one.
704  */
705 void
706 in_rtchange(inp, errno)
707 	struct inpcb *inp;
708 	int errno;
709 {
710 
711 	if (inp->inp_route.ro_rt) {
712 		rtfree(inp->inp_route.ro_rt);
713 		inp->inp_route.ro_rt = 0;
714 		/*
715 		 * A new route can be allocated the next time
716 		 * output is attempted.
717 		 */
718 	}
719 	/* XXX SHOULD NOTIFY HIGHER-LEVEL PROTOCOLS */
720 }
721 
722 struct inpcb *
723 in_pcblookup_port(table, laddr, lport_arg, lookup_wildcard)
724 	struct inpcbtable *table;
725 	struct in_addr laddr;
726 	u_int lport_arg;
727 	int lookup_wildcard;
728 {
729 	struct inpcb *inp, *match = 0;
730 	int matchwild = 3, wildcard;
731 	u_int16_t lport = lport_arg;
732 
733 	for (inp = table->inpt_queue.cqh_first;
734 	    inp != (struct inpcb *)&table->inpt_queue;
735 	    inp = inp->inp_queue.cqe_next) {
736 		if (inp->inp_lport != lport)
737 			continue;
738 		wildcard = 0;
739 		if (!in_nullhost(inp->inp_faddr))
740 			wildcard++;
741 		if (in_nullhost(inp->inp_laddr)) {
742 			if (!in_nullhost(laddr))
743 				wildcard++;
744 		} else {
745 			if (in_nullhost(laddr))
746 				wildcard++;
747 			else {
748 				if (!in_hosteq(inp->inp_laddr, laddr))
749 					continue;
750 			}
751 		}
752 		if (wildcard && !lookup_wildcard)
753 			continue;
754 		if (wildcard < matchwild) {
755 			match = inp;
756 			matchwild = wildcard;
757 			if (matchwild == 0)
758 				break;
759 		}
760 	}
761 	return (match);
762 }
763 
764 #ifdef DIAGNOSTIC
765 int	in_pcbnotifymiss = 0;
766 #endif
767 
768 struct inpcb *
769 in_pcblookup_connect(table, faddr, fport_arg, laddr, lport_arg)
770 	struct inpcbtable *table;
771 	struct in_addr faddr, laddr;
772 	u_int fport_arg, lport_arg;
773 {
774 	struct inpcbhead *head;
775 	struct inpcb *inp;
776 	u_int16_t fport = fport_arg, lport = lport_arg;
777 
778 	head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport);
779 	for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
780 		if (in_hosteq(inp->inp_faddr, faddr) &&
781 		    inp->inp_fport == fport &&
782 		    inp->inp_lport == lport &&
783 		    in_hosteq(inp->inp_laddr, laddr))
784 			goto out;
785 	}
786 #ifdef DIAGNOSTIC
787 	if (in_pcbnotifymiss) {
788 		printf("in_pcblookup_connect: faddr=%08x fport=%d laddr=%08x lport=%d\n",
789 		    ntohl(faddr.s_addr), ntohs(fport),
790 		    ntohl(laddr.s_addr), ntohs(lport));
791 	}
792 #endif
793 	return (0);
794 
795 out:
796 	/* Move this PCB to the head of hash chain. */
797 	if (inp != head->lh_first) {
798 		LIST_REMOVE(inp, inp_hash);
799 		LIST_INSERT_HEAD(head, inp, inp_hash);
800 	}
801 	return (inp);
802 }
803 
804 struct inpcb *
805 in_pcblookup_bind(table, laddr, lport_arg)
806 	struct inpcbtable *table;
807 	struct in_addr laddr;
808 	u_int lport_arg;
809 {
810 	struct inpcbhead *head;
811 	struct inpcb *inp;
812 	u_int16_t lport = lport_arg;
813 
814 	head = INPCBHASH_BIND(table, laddr, lport);
815 	for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
816 		if (inp->inp_lport == lport &&
817 		    in_hosteq(inp->inp_laddr, laddr))
818 			goto out;
819 	}
820 	head = INPCBHASH_BIND(table, zeroin_addr, lport);
821 	for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
822 		if (inp->inp_lport == lport &&
823 		    in_hosteq(inp->inp_laddr, zeroin_addr))
824 			goto out;
825 	}
826 #ifdef DIAGNOSTIC
827 	if (in_pcbnotifymiss) {
828 		printf("in_pcblookup_bind: laddr=%08x lport=%d\n",
829 		    ntohl(laddr.s_addr), ntohs(lport));
830 	}
831 #endif
832 	return (0);
833 
834 out:
835 	/* Move this PCB to the head of hash chain. */
836 	if (inp != head->lh_first) {
837 		LIST_REMOVE(inp, inp_hash);
838 		LIST_INSERT_HEAD(head, inp, inp_hash);
839 	}
840 	return (inp);
841 }
842 
843 void
844 in_pcbstate(inp, state)
845 	struct inpcb *inp;
846 	int state;
847 {
848 
849 	if (inp->inp_state > INP_ATTACHED)
850 		LIST_REMOVE(inp, inp_hash);
851 
852 	switch (state) {
853 	case INP_BOUND:
854 		LIST_INSERT_HEAD(INPCBHASH_BIND(inp->inp_table,
855 		    inp->inp_laddr, inp->inp_lport), inp, inp_hash);
856 		break;
857 	case INP_CONNECTED:
858 		LIST_INSERT_HEAD(INPCBHASH_CONNECT(inp->inp_table,
859 		    inp->inp_faddr, inp->inp_fport,
860 		    inp->inp_laddr, inp->inp_lport), inp, inp_hash);
861 		break;
862 	}
863 
864 	inp->inp_state = state;
865 }
866 
867 struct rtentry *
868 in_pcbrtentry(inp)
869 	struct inpcb *inp;
870 {
871 	struct route *ro;
872 
873 	ro = &inp->inp_route;
874 
875 	if (ro->ro_rt == NULL) {
876 		/*
877 		 * No route yet, so try to acquire one.
878 		 */
879 		if (!in_nullhost(inp->inp_faddr)) {
880 			ro->ro_dst.sa_family = AF_INET;
881 			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
882 			satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr;
883 			rtalloc(ro);
884 		}
885 	}
886 	return (ro->ro_rt);
887 }
888 
889 struct sockaddr_in *
890 in_selectsrc(sin, ro, soopts, mopts, errorp)
891 	struct sockaddr_in *sin;
892 	struct route *ro;
893 	int soopts;
894 	struct ip_moptions *mopts;
895 	int *errorp;
896 {
897 	struct in_ifaddr *ia;
898 
899 	ia = (struct in_ifaddr *)0;
900 	/*
901 	 * If route is known or can be allocated now,
902 	 * our src addr is taken from the i/f, else punt.
903 	 */
904 	if (ro->ro_rt &&
905 	    (!in_hosteq(satosin(&ro->ro_dst)->sin_addr, sin->sin_addr) ||
906 	    soopts & SO_DONTROUTE)) {
907 		RTFREE(ro->ro_rt);
908 		ro->ro_rt = (struct rtentry *)0;
909 	}
910 	if ((soopts & SO_DONTROUTE) == 0 && /*XXX*/
911 	    (ro->ro_rt == (struct rtentry *)0 ||
912 	    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
913 		/* No route yet, so try to acquire one */
914 		ro->ro_dst.sa_family = AF_INET;
915 		ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
916 		satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
917 		rtalloc(ro);
918 	}
919 	/*
920 	 * If we found a route, use the address
921 	 * corresponding to the outgoing interface
922 	 * unless it is the loopback (in case a route
923 	 * to our address on another net goes to loopback).
924 	 *
925 	 * XXX Is this still true?  Do we care?
926 	 */
927 	if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
928 		ia = ifatoia(ro->ro_rt->rt_ifa);
929 	if (ia == NULL) {
930 		u_int16_t fport = sin->sin_port;
931 
932 		sin->sin_port = 0;
933 		ia = ifatoia(ifa_ifwithladdr(sintosa(sin)));
934 		sin->sin_port = fport;
935 		if (ia == 0) {
936 			/* Find 1st non-loopback AF_INET address */
937 			for (ia = in_ifaddr.tqh_first;
938 			     ia != NULL;
939 			     ia = ia->ia_list.tqe_next) {
940 				if (!(ia->ia_ifp->if_flags & IFF_LOOPBACK))
941 					break;
942 			}
943 		}
944 		if (ia == NULL) {
945 			*errorp = EADDRNOTAVAIL;
946 			return NULL;
947 		}
948 	}
949 	/*
950 	 * If the destination address is multicast and an outgoing
951 	 * interface has been set as a multicast option, use the
952 	 * address of that interface as our source address.
953 	 */
954 	if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) {
955 		struct ip_moptions *imo;
956 		struct ifnet *ifp;
957 
958 		imo = mopts;
959 		if (imo->imo_multicast_ifp != NULL) {
960 			ifp = imo->imo_multicast_ifp;
961 			IFP_TO_IA(ifp, ia);		/* XXX */
962 			if (ia == 0) {
963 				*errorp = EADDRNOTAVAIL;
964 				return NULL;
965 			}
966 		}
967 	}
968 	return satosin(&ia->ia_addr);
969 }
970