xref: /netbsd-src/sys/netinet/in_pcb.c (revision 4472dbe5e3bd91ef2540bada7a7ca7384627ff9b)
1 /*	$NetBSD: in_pcb.c,v 1.65 2000/04/03 03:51:16 enami Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1998 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to The NetBSD Foundation
37  * by Public Access Networks Corporation ("Panix").  It was developed under
38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. All advertising materials mentioning features or use of this software
49  *    must display the following acknowledgement:
50  *	This product includes software developed by the NetBSD
51  *	Foundation, Inc. and its contributors.
52  * 4. Neither the name of The NetBSD Foundation nor the names of its
53  *    contributors may be used to endorse or promote products derived
54  *    from this software without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 /*
70  * Copyright (c) 1982, 1986, 1991, 1993, 1995
71  *	The Regents of the University of California.  All rights reserved.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. All advertising materials mentioning features or use of this software
82  *    must display the following acknowledgement:
83  *	This product includes software developed by the University of
84  *	California, Berkeley and its contributors.
85  * 4. Neither the name of the University nor the names of its contributors
86  *    may be used to endorse or promote products derived from this software
87  *    without specific prior written permission.
88  *
89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  *
101  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
102  */
103 
104 #include "opt_ipsec.h"
105 
106 #include <sys/param.h>
107 #include <sys/systm.h>
108 #include <sys/malloc.h>
109 #include <sys/mbuf.h>
110 #include <sys/protosw.h>
111 #include <sys/socket.h>
112 #include <sys/socketvar.h>
113 #include <sys/ioctl.h>
114 #include <sys/errno.h>
115 #include <sys/time.h>
116 #include <sys/pool.h>
117 #include <sys/proc.h>
118 
119 #include <net/if.h>
120 #include <net/route.h>
121 
122 #include <netinet/in.h>
123 #include <netinet/in_systm.h>
124 #include <netinet/ip.h>
125 #include <netinet/in_pcb.h>
126 #include <netinet/in_var.h>
127 #include <netinet/ip_var.h>
128 
129 #ifdef IPSEC
130 #include <netinet6/ipsec.h>
131 #include <netkey/key.h>
132 #include <netkey/key_debug.h>
133 #endif /* IPSEC */
134 
135 struct	in_addr zeroin_addr;
136 
137 #define	INPCBHASH_BIND(table, laddr, lport) \
138 	&(table)->inpt_bindhashtbl[ \
139 	    ((ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_bindhash]
140 #define	INPCBHASH_CONNECT(table, faddr, fport, laddr, lport) \
141 	&(table)->inpt_connecthashtbl[ \
142 	    ((ntohl((faddr).s_addr) + ntohs(fport)) + \
143 	     (ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_connecthash]
144 
145 struct inpcb *
146 	in_pcblookup_port __P((struct inpcbtable *,
147 	    struct in_addr, u_int, int));
148 
149 int	anonportmin = IPPORT_ANONMIN;
150 int	anonportmax = IPPORT_ANONMAX;
151 
152 struct pool inpcb_pool;
153 
154 void
155 in_pcbinit(table, bindhashsize, connecthashsize)
156 	struct inpcbtable *table;
157 	int bindhashsize, connecthashsize;
158 {
159 	static int inpcb_pool_initialized;
160 
161 	if (inpcb_pool_initialized == 0) {
162 		pool_init(&inpcb_pool, sizeof(struct inpcb), 0, 0, 0,
163 		    "inpcbpl", 0, NULL, NULL, M_PCB);
164 		inpcb_pool_initialized = 1;
165 	}
166 
167 	CIRCLEQ_INIT(&table->inpt_queue);
168 	table->inpt_bindhashtbl =
169 	    hashinit(bindhashsize, M_PCB, M_WAITOK, &table->inpt_bindhash);
170 	table->inpt_connecthashtbl =
171 	    hashinit(connecthashsize, M_PCB, M_WAITOK, &table->inpt_connecthash);
172 	table->inpt_lastlow = IPPORT_RESERVEDMAX;
173 	table->inpt_lastport = (u_int16_t)anonportmax;
174 }
175 
176 int
177 in_pcballoc(so, v)
178 	struct socket *so;
179 	void *v;
180 {
181 	struct inpcbtable *table = v;
182 	struct inpcb *inp;
183 	int s;
184 
185 	inp = pool_get(&inpcb_pool, PR_NOWAIT);
186 	if (inp == NULL)
187 		return (ENOBUFS);
188 	bzero((caddr_t)inp, sizeof(*inp));
189 	inp->inp_table = table;
190 	inp->inp_socket = so;
191 	inp->inp_errormtu = -1;
192 	so->so_pcb = inp;
193 	s = splnet();
194 	CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue);
195 	in_pcbstate(inp, INP_ATTACHED);
196 	splx(s);
197 	return (0);
198 }
199 
200 int
201 in_pcbbind(v, nam, p)
202 	void *v;
203 	struct mbuf *nam;
204 	struct proc *p;
205 {
206 	struct inpcb *inp = v;
207 	struct socket *so = inp->inp_socket;
208 	struct inpcbtable *table = inp->inp_table;
209 	struct sockaddr_in *sin;
210 	u_int16_t lport = 0;
211 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
212 #ifndef IPNOPRIVPORTS
213 	int error;
214 #endif
215 
216 	if (in_ifaddr.tqh_first == 0)
217 		return (EADDRNOTAVAIL);
218 	if (inp->inp_lport || !in_nullhost(inp->inp_laddr))
219 		return (EINVAL);
220 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
221 		wild = 1;
222 	if (nam == 0)
223 		goto noname;
224 	sin = mtod(nam, struct sockaddr_in *);
225 	if (nam->m_len != sizeof (*sin))
226 		return (EINVAL);
227 #ifdef notdef
228 	/*
229 	 * We should check the family, but old programs
230 	 * incorrectly fail to initialize it.
231 	 */
232 	if (sin->sin_family != AF_INET)
233 		return (EAFNOSUPPORT);
234 #endif
235 	lport = sin->sin_port;
236 	if (IN_MULTICAST(sin->sin_addr.s_addr)) {
237 		/*
238 		 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
239 		 * allow complete duplication of binding if
240 		 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
241 		 * and a multicast address is bound on both
242 		 * new and duplicated sockets.
243 		 */
244 		if (so->so_options & SO_REUSEADDR)
245 			reuseport = SO_REUSEADDR|SO_REUSEPORT;
246 	} else if (!in_nullhost(sin->sin_addr)) {
247 		sin->sin_port = 0;		/* yech... */
248 		if (ifa_ifwithaddr(sintosa(sin)) == 0)
249 			return (EADDRNOTAVAIL);
250 	}
251 	if (lport) {
252 		struct inpcb *t;
253 #ifndef IPNOPRIVPORTS
254 		/* GROSS */
255 		if (ntohs(lport) < IPPORT_RESERVED &&
256 		    (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))))
257 			return (EACCES);
258 #endif
259 		if (so->so_uid && !IN_MULTICAST(sin->sin_addr.s_addr)) {
260 			t = in_pcblookup_port(table, sin->sin_addr, lport, 1);
261 		/*
262 		 * XXX:	investigate ramifications of loosening this
263 		 *	restriction so that as long as both ports have
264 		 *	SO_REUSEPORT allow the bind
265 		 */
266 			if (t &&
267 			    (!in_nullhost(sin->sin_addr) ||
268 			     !in_nullhost(t->inp_laddr) ||
269 			     (t->inp_socket->so_options & SO_REUSEPORT) == 0)
270 			    && (so->so_uid != t->inp_socket->so_uid)) {
271 				return (EADDRINUSE);
272 			}
273 		}
274 		t = in_pcblookup_port(table, sin->sin_addr, lport, wild);
275 		if (t && (reuseport & t->inp_socket->so_options) == 0)
276 			return (EADDRINUSE);
277 	}
278 	inp->inp_laddr = sin->sin_addr;
279 
280 noname:
281 	if (lport == 0) {
282 		int	   cnt;
283 		u_int16_t  min, max;
284 		u_int16_t *lastport;
285 
286 		if (inp->inp_flags & INP_LOWPORT) {
287 #ifndef IPNOPRIVPORTS
288 			if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag)))
289 				return (EACCES);
290 #endif
291 			min = IPPORT_RESERVEDMIN;
292 			max = IPPORT_RESERVEDMAX;
293 			lastport = &table->inpt_lastlow;
294 		} else {
295 			min = anonportmin;
296 			max = anonportmax;
297 			lastport = &table->inpt_lastport;
298 		}
299 		if (min > max) {	/* sanity check */
300 			u_int16_t swp;
301 
302 			swp = min;
303 			min = max;
304 			max = swp;
305 		}
306 
307 		lport = *lastport - 1;
308 		for (cnt = max - min + 1; cnt; cnt--, lport--) {
309 			if (lport < min || lport > max)
310 				lport = max;
311 			if (!in_pcblookup_port(table, inp->inp_laddr,
312 			    htons(lport), 1))
313 				goto found;
314 		}
315 		if (!in_nullhost(inp->inp_laddr))
316 			inp->inp_laddr.s_addr = INADDR_ANY;
317 		return (EAGAIN);
318 	found:
319 		inp->inp_flags |= INP_ANONPORT;
320 		*lastport = lport;
321 		lport = htons(lport);
322 	}
323 	inp->inp_lport = lport;
324 	in_pcbstate(inp, INP_BOUND);
325 	return (0);
326 }
327 
328 /*
329  * Connect from a socket to a specified address.
330  * Both address and port must be specified in argument sin.
331  * If don't have a local address for this socket yet,
332  * then pick one.
333  */
334 int
335 in_pcbconnect(v, nam)
336 	void *v;
337 	struct mbuf *nam;
338 {
339 	struct inpcb *inp = v;
340 	struct in_ifaddr *ia;
341 	struct sockaddr_in *ifaddr = NULL;
342 	struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
343 	int error;
344 
345 	if (nam->m_len != sizeof (*sin))
346 		return (EINVAL);
347 	if (sin->sin_family != AF_INET)
348 		return (EAFNOSUPPORT);
349 	if (sin->sin_port == 0)
350 		return (EADDRNOTAVAIL);
351 	if (in_ifaddr.tqh_first != 0) {
352 		/*
353 		 * If the destination address is INADDR_ANY,
354 		 * use any local address (likely loopback).
355 		 * If the supplied address is INADDR_BROADCAST,
356 		 * use the broadcast address of an interface
357 		 * which supports broadcast. (loopback does not)
358 		 */
359 
360 		if (in_nullhost(sin->sin_addr))
361 			sin->sin_addr = in_ifaddr.tqh_first->ia_addr.sin_addr;
362 		else if (sin->sin_addr.s_addr == INADDR_BROADCAST)
363 		    for (ia = in_ifaddr.tqh_first; ia != NULL;
364 		      ia = ia->ia_list.tqe_next)
365 			if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
366 			    sin->sin_addr = ia->ia_broadaddr.sin_addr;
367 			    break;
368 			}
369 	}
370 	/*
371 	 * If we haven't bound which network number to use as ours,
372 	 * we will use the number of the outgoing interface.
373 	 * This depends on having done a routing lookup, which
374 	 * we will probably have to do anyway, so we might
375 	 * as well do it now.  On the other hand if we are
376 	 * sending to multiple destinations we may have already
377 	 * done the lookup, so see if we can use the route
378 	 * from before.  In any case, we only
379 	 * chose a port number once, even if sending to multiple
380 	 * destinations.
381 	 */
382 	if (in_nullhost(inp->inp_laddr)) {
383 #if 0
384 		struct route *ro;
385 
386 		ia = (struct in_ifaddr *)0;
387 		/*
388 		 * If route is known or can be allocated now,
389 		 * our src addr is taken from the i/f, else punt.
390 		 */
391 		ro = &inp->inp_route;
392 		if (ro->ro_rt &&
393 		    (!in_hosteq(satosin(&ro->ro_dst)->sin_addr,
394 			sin->sin_addr) ||
395 		    inp->inp_socket->so_options & SO_DONTROUTE)) {
396 			RTFREE(ro->ro_rt);
397 			ro->ro_rt = (struct rtentry *)0;
398 		}
399 		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
400 		    (ro->ro_rt == (struct rtentry *)0 ||
401 		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
402 			/* No route yet, so try to acquire one */
403 			ro->ro_dst.sa_family = AF_INET;
404 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
405 			satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
406 			rtalloc(ro);
407 		}
408 		/*
409 		 * If we found a route, use the address
410 		 * corresponding to the outgoing interface
411 		 * unless it is the loopback (in case a route
412 		 * to our address on another net goes to loopback).
413 		 *
414 		 * XXX Is this still true?  Do we care?
415 		 */
416 		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
417 			ia = ifatoia(ro->ro_rt->rt_ifa);
418 		if (ia == NULL) {
419 			u_int16_t fport = sin->sin_port;
420 
421 			sin->sin_port = 0;
422 			ia = ifatoia(ifa_ifwithladdr(sintosa(sin)));
423 			sin->sin_port = fport;
424 			if (ia == 0) {
425 				/* Find 1st non-loopback AF_INET address */
426 				for (ia = in_ifaddr.tqh_first ; ia != NULL;
427 				     ia = ia->ia_list.tqe_next) {
428 					if ((ia->ia_ifp->if_flags &
429 					     IFF_LOOPBACK) == 0)
430 						break;
431 				}
432 			}
433 			if (ia == NULL)
434 				return (EADDRNOTAVAIL);
435 		}
436 		/*
437 		 * If the destination address is multicast and an outgoing
438 		 * interface has been set as a multicast option, use the
439 		 * address of that interface as our source address.
440 		 */
441 		if (IN_MULTICAST(sin->sin_addr.s_addr) &&
442 		    inp->inp_moptions != NULL) {
443 			struct ip_moptions *imo;
444 			struct ifnet *ifp;
445 
446 			imo = inp->inp_moptions;
447 			if (imo->imo_multicast_ifp != NULL) {
448 				ifp = imo->imo_multicast_ifp;
449 				IFP_TO_IA(ifp, ia);		/* XXX */
450 				if (ia == 0)
451 					return (EADDRNOTAVAIL);
452 			}
453 		}
454 		ifaddr = satosin(&ia->ia_addr);
455 #else
456 		int error;
457 		ifaddr = in_selectsrc(sin, &inp->inp_route,
458 			inp->inp_socket->so_options, inp->inp_moptions, &error);
459 		if (ifaddr == NULL) {
460 			if (error == 0)
461 				error = EADDRNOTAVAIL;
462 			return error;
463 		}
464 #endif
465 	}
466 	if (in_pcblookup_connect(inp->inp_table, sin->sin_addr, sin->sin_port,
467 	    !in_nullhost(inp->inp_laddr) ? inp->inp_laddr : ifaddr->sin_addr,
468 	    inp->inp_lport) != 0)
469 		return (EADDRINUSE);
470 	if (in_nullhost(inp->inp_laddr)) {
471 		if (inp->inp_lport == 0) {
472 			error = in_pcbbind(inp, (struct mbuf *)0,
473 			    (struct proc *)0);
474 			/*
475 			 * This used to ignore the return value
476 			 * completely, but we need to check for
477 			 * ephemeral port shortage.
478 			 * XXX Should we check for other errors, too?
479 			 */
480 			if (error == EAGAIN)
481 				return (error);
482 		}
483 		inp->inp_laddr = ifaddr->sin_addr;
484 	}
485 	inp->inp_faddr = sin->sin_addr;
486 	inp->inp_fport = sin->sin_port;
487 	in_pcbstate(inp, INP_CONNECTED);
488 	return (0);
489 }
490 
491 void
492 in_pcbdisconnect(v)
493 	void *v;
494 {
495 	struct inpcb *inp = v;
496 
497 	inp->inp_faddr = zeroin_addr;
498 	inp->inp_fport = 0;
499 	in_pcbstate(inp, INP_BOUND);
500 	if (inp->inp_socket->so_state & SS_NOFDREF)
501 		in_pcbdetach(inp);
502 }
503 
504 void
505 in_pcbdetach(v)
506 	void *v;
507 {
508 	struct inpcb *inp = v;
509 	struct socket *so = inp->inp_socket;
510 	int s;
511 
512 #ifdef IPSEC
513 	ipsec4_delete_pcbpolicy(inp);
514 #endif /*IPSEC*/
515 	so->so_pcb = 0;
516 	sofree(so);
517 	if (inp->inp_options)
518 		(void)m_free(inp->inp_options);
519 	if (inp->inp_route.ro_rt)
520 		rtfree(inp->inp_route.ro_rt);
521 	ip_freemoptions(inp->inp_moptions);
522 	s = splnet();
523 	in_pcbstate(inp, INP_ATTACHED);
524 	CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue);
525 	splx(s);
526 	pool_put(&inpcb_pool, inp);
527 }
528 
529 void
530 in_setsockaddr(inp, nam)
531 	struct inpcb *inp;
532 	struct mbuf *nam;
533 {
534 	struct sockaddr_in *sin;
535 
536 	nam->m_len = sizeof (*sin);
537 	sin = mtod(nam, struct sockaddr_in *);
538 	bzero((caddr_t)sin, sizeof (*sin));
539 	sin->sin_family = AF_INET;
540 	sin->sin_len = sizeof(*sin);
541 	sin->sin_port = inp->inp_lport;
542 	sin->sin_addr = inp->inp_laddr;
543 }
544 
545 void
546 in_setpeeraddr(inp, nam)
547 	struct inpcb *inp;
548 	struct mbuf *nam;
549 {
550 	struct sockaddr_in *sin;
551 
552 	nam->m_len = sizeof (*sin);
553 	sin = mtod(nam, struct sockaddr_in *);
554 	bzero((caddr_t)sin, sizeof (*sin));
555 	sin->sin_family = AF_INET;
556 	sin->sin_len = sizeof(*sin);
557 	sin->sin_port = inp->inp_fport;
558 	sin->sin_addr = inp->inp_faddr;
559 }
560 
561 /*
562  * Pass some notification to all connections of a protocol
563  * associated with address dst.  The local address and/or port numbers
564  * may be specified to limit the search.  The "usual action" will be
565  * taken, depending on the ctlinput cmd.  The caller must filter any
566  * cmds that are uninteresting (e.g., no error in the map).
567  * Call the protocol specific routine (if any) to report
568  * any errors for each matching socket.
569  *
570  * Must be called at splsoftnet.
571  */
572 int
573 in_pcbnotify(table, faddr, fport_arg, laddr, lport_arg, errno, notify)
574 	struct inpcbtable *table;
575 	struct in_addr faddr, laddr;
576 	u_int fport_arg, lport_arg;
577 	int errno;
578 	void (*notify) __P((struct inpcb *, int));
579 {
580 	struct inpcbhead *head;
581 	struct inpcb *inp, *ninp;
582 	u_int16_t fport = fport_arg, lport = lport_arg;
583 	int nmatch;
584 
585 	if (in_nullhost(faddr) || notify == 0)
586 		return (0);
587 
588 	nmatch = 0;
589 	head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport);
590 	for (inp = head->lh_first; inp != NULL; inp = ninp) {
591 		ninp = inp->inp_hash.le_next;
592 		if (in_hosteq(inp->inp_faddr, faddr) &&
593 		    inp->inp_fport == fport &&
594 		    inp->inp_lport == lport &&
595 		    in_hosteq(inp->inp_laddr, laddr)) {
596 			(*notify)(inp, errno);
597 			nmatch++;
598 		}
599 	}
600 	return (nmatch);
601 }
602 
603 void
604 in_pcbnotifyall(table, faddr, errno, notify)
605 	struct inpcbtable *table;
606 	struct in_addr faddr;
607 	int errno;
608 	void (*notify) __P((struct inpcb *, int));
609 {
610 	struct inpcb *inp, *ninp;
611 
612 	if (in_nullhost(faddr) || notify == 0)
613 		return;
614 
615 	for (inp = table->inpt_queue.cqh_first;
616 	    inp != (struct inpcb *)&table->inpt_queue;
617 	    inp = ninp) {
618 		ninp = inp->inp_queue.cqe_next;
619 		if (in_hosteq(inp->inp_faddr, faddr))
620 			(*notify)(inp, errno);
621 	}
622 }
623 
624 void
625 in_pcbpurgeif(table, ifp)
626 	struct inpcbtable *table;
627 	struct ifnet *ifp;
628 {
629 	struct inpcb *inp, *ninp;
630 	struct ip_moptions *imo;
631 	int i, gap;
632 
633 	for (inp = table->inpt_queue.cqh_first;
634 	    inp != (struct inpcb *)&table->inpt_queue;
635 	    inp = ninp) {
636 		ninp = inp->inp_queue.cqe_next;
637 		if (inp->inp_route.ro_rt != NULL &&
638 		    inp->inp_route.ro_rt->rt_ifp == ifp)
639 			in_rtchange(inp, 0);
640 		imo = inp->inp_moptions;
641 		if (imo != NULL) {
642 			/*
643 			 * Unselect the outgoing interface if it is being
644 			 * detached.
645 			 */
646 			if (imo->imo_multicast_ifp == ifp)
647 				imo->imo_multicast_ifp = NULL;
648 
649 			/*
650 			 * Drop multicast group membership if we joined
651 			 * through the interface being detached.
652 			 */
653 			for (i = 0, gap = 0; i < imo->imo_num_memberships;
654 			    i++) {
655 				if (imo->imo_membership[i]->inm_ifp == ifp) {
656 					in_delmulti(imo->imo_membership[i]);
657 					gap++;
658 				} else if (gap != 0)
659 					imo->imo_membership[i - gap] =
660 					    imo->imo_membership[i];
661 			}
662 			imo->imo_num_memberships -= gap;
663 		}
664 	}
665 }
666 
667 /*
668  * Check for alternatives when higher level complains
669  * about service problems.  For now, invalidate cached
670  * routing information.  If the route was created dynamically
671  * (by a redirect), time to try a default gateway again.
672  */
673 void
674 in_losing(inp)
675 	struct inpcb *inp;
676 {
677 	struct rtentry *rt;
678 	struct rt_addrinfo info;
679 
680 	if ((rt = inp->inp_route.ro_rt)) {
681 		inp->inp_route.ro_rt = 0;
682 		bzero((caddr_t)&info, sizeof(info));
683 		info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst;
684 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
685 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
686 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
687 		if (rt->rt_flags & RTF_DYNAMIC)
688 			(void) rtrequest(RTM_DELETE, rt_key(rt),
689 				rt->rt_gateway, rt_mask(rt), rt->rt_flags,
690 				(struct rtentry **)0);
691 		else
692 		/*
693 		 * A new route can be allocated
694 		 * the next time output is attempted.
695 		 */
696 			rtfree(rt);
697 	}
698 }
699 
700 /*
701  * After a routing change, flush old routing
702  * and allocate a (hopefully) better one.
703  */
704 void
705 in_rtchange(inp, errno)
706 	struct inpcb *inp;
707 	int errno;
708 {
709 
710 	if (inp->inp_route.ro_rt) {
711 		rtfree(inp->inp_route.ro_rt);
712 		inp->inp_route.ro_rt = 0;
713 		/*
714 		 * A new route can be allocated the next time
715 		 * output is attempted.
716 		 */
717 	}
718 	/* XXX SHOULD NOTIFY HIGHER-LEVEL PROTOCOLS */
719 }
720 
721 struct inpcb *
722 in_pcblookup_port(table, laddr, lport_arg, lookup_wildcard)
723 	struct inpcbtable *table;
724 	struct in_addr laddr;
725 	u_int lport_arg;
726 	int lookup_wildcard;
727 {
728 	struct inpcb *inp, *match = 0;
729 	int matchwild = 3, wildcard;
730 	u_int16_t lport = lport_arg;
731 
732 	for (inp = table->inpt_queue.cqh_first;
733 	    inp != (struct inpcb *)&table->inpt_queue;
734 	    inp = inp->inp_queue.cqe_next) {
735 		if (inp->inp_lport != lport)
736 			continue;
737 		wildcard = 0;
738 		if (!in_nullhost(inp->inp_faddr))
739 			wildcard++;
740 		if (in_nullhost(inp->inp_laddr)) {
741 			if (!in_nullhost(laddr))
742 				wildcard++;
743 		} else {
744 			if (in_nullhost(laddr))
745 				wildcard++;
746 			else {
747 				if (!in_hosteq(inp->inp_laddr, laddr))
748 					continue;
749 			}
750 		}
751 		if (wildcard && !lookup_wildcard)
752 			continue;
753 		if (wildcard < matchwild) {
754 			match = inp;
755 			matchwild = wildcard;
756 			if (matchwild == 0)
757 				break;
758 		}
759 	}
760 	return (match);
761 }
762 
763 #ifdef DIAGNOSTIC
764 int	in_pcbnotifymiss = 0;
765 #endif
766 
767 struct inpcb *
768 in_pcblookup_connect(table, faddr, fport_arg, laddr, lport_arg)
769 	struct inpcbtable *table;
770 	struct in_addr faddr, laddr;
771 	u_int fport_arg, lport_arg;
772 {
773 	struct inpcbhead *head;
774 	struct inpcb *inp;
775 	u_int16_t fport = fport_arg, lport = lport_arg;
776 
777 	head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport);
778 	for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
779 		if (in_hosteq(inp->inp_faddr, faddr) &&
780 		    inp->inp_fport == fport &&
781 		    inp->inp_lport == lport &&
782 		    in_hosteq(inp->inp_laddr, laddr))
783 			goto out;
784 	}
785 #ifdef DIAGNOSTIC
786 	if (in_pcbnotifymiss) {
787 		printf("in_pcblookup_connect: faddr=%08x fport=%d laddr=%08x lport=%d\n",
788 		    ntohl(faddr.s_addr), ntohs(fport),
789 		    ntohl(laddr.s_addr), ntohs(lport));
790 	}
791 #endif
792 	return (0);
793 
794 out:
795 	/* Move this PCB to the head of hash chain. */
796 	if (inp != head->lh_first) {
797 		LIST_REMOVE(inp, inp_hash);
798 		LIST_INSERT_HEAD(head, inp, inp_hash);
799 	}
800 	return (inp);
801 }
802 
803 struct inpcb *
804 in_pcblookup_bind(table, laddr, lport_arg)
805 	struct inpcbtable *table;
806 	struct in_addr laddr;
807 	u_int lport_arg;
808 {
809 	struct inpcbhead *head;
810 	struct inpcb *inp;
811 	u_int16_t lport = lport_arg;
812 
813 	head = INPCBHASH_BIND(table, laddr, lport);
814 	for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
815 		if (inp->inp_lport == lport &&
816 		    in_hosteq(inp->inp_laddr, laddr))
817 			goto out;
818 	}
819 	head = INPCBHASH_BIND(table, zeroin_addr, lport);
820 	for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
821 		if (inp->inp_lport == lport &&
822 		    in_hosteq(inp->inp_laddr, zeroin_addr))
823 			goto out;
824 	}
825 #ifdef DIAGNOSTIC
826 	if (in_pcbnotifymiss) {
827 		printf("in_pcblookup_bind: laddr=%08x lport=%d\n",
828 		    ntohl(laddr.s_addr), ntohs(lport));
829 	}
830 #endif
831 	return (0);
832 
833 out:
834 	/* Move this PCB to the head of hash chain. */
835 	if (inp != head->lh_first) {
836 		LIST_REMOVE(inp, inp_hash);
837 		LIST_INSERT_HEAD(head, inp, inp_hash);
838 	}
839 	return (inp);
840 }
841 
842 void
843 in_pcbstate(inp, state)
844 	struct inpcb *inp;
845 	int state;
846 {
847 
848 	if (inp->inp_state > INP_ATTACHED)
849 		LIST_REMOVE(inp, inp_hash);
850 
851 	switch (state) {
852 	case INP_BOUND:
853 		LIST_INSERT_HEAD(INPCBHASH_BIND(inp->inp_table,
854 		    inp->inp_laddr, inp->inp_lport), inp, inp_hash);
855 		break;
856 	case INP_CONNECTED:
857 		LIST_INSERT_HEAD(INPCBHASH_CONNECT(inp->inp_table,
858 		    inp->inp_faddr, inp->inp_fport,
859 		    inp->inp_laddr, inp->inp_lport), inp, inp_hash);
860 		break;
861 	}
862 
863 	inp->inp_state = state;
864 }
865 
866 struct rtentry *
867 in_pcbrtentry(inp)
868 	struct inpcb *inp;
869 {
870 	struct route *ro;
871 
872 	ro = &inp->inp_route;
873 
874 	if (ro->ro_rt == NULL) {
875 		/*
876 		 * No route yet, so try to acquire one.
877 		 */
878 		if (!in_nullhost(inp->inp_faddr)) {
879 			ro->ro_dst.sa_family = AF_INET;
880 			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
881 			satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr;
882 			rtalloc(ro);
883 		}
884 	}
885 	return (ro->ro_rt);
886 }
887 
888 struct sockaddr_in *
889 in_selectsrc(sin, ro, soopts, mopts, errorp)
890 	struct sockaddr_in *sin;
891 	struct route *ro;
892 	int soopts;
893 	struct ip_moptions *mopts;
894 	int *errorp;
895 {
896 	struct in_ifaddr *ia;
897 
898 	ia = (struct in_ifaddr *)0;
899 	/*
900 	 * If route is known or can be allocated now,
901 	 * our src addr is taken from the i/f, else punt.
902 	 */
903 	if (ro->ro_rt &&
904 	    (!in_hosteq(satosin(&ro->ro_dst)->sin_addr, sin->sin_addr) ||
905 	    soopts & SO_DONTROUTE)) {
906 		RTFREE(ro->ro_rt);
907 		ro->ro_rt = (struct rtentry *)0;
908 	}
909 	if ((soopts & SO_DONTROUTE) == 0 && /*XXX*/
910 	    (ro->ro_rt == (struct rtentry *)0 ||
911 	    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
912 		/* No route yet, so try to acquire one */
913 		ro->ro_dst.sa_family = AF_INET;
914 		ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
915 		satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
916 		rtalloc(ro);
917 	}
918 	/*
919 	 * If we found a route, use the address
920 	 * corresponding to the outgoing interface
921 	 * unless it is the loopback (in case a route
922 	 * to our address on another net goes to loopback).
923 	 *
924 	 * XXX Is this still true?  Do we care?
925 	 */
926 	if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
927 		ia = ifatoia(ro->ro_rt->rt_ifa);
928 	if (ia == NULL) {
929 		u_int16_t fport = sin->sin_port;
930 
931 		sin->sin_port = 0;
932 		ia = ifatoia(ifa_ifwithladdr(sintosa(sin)));
933 		sin->sin_port = fport;
934 		if (ia == 0) {
935 			/* Find 1st non-loopback AF_INET address */
936 			for (ia = in_ifaddr.tqh_first;
937 			     ia != NULL;
938 			     ia = ia->ia_list.tqe_next) {
939 				if (!(ia->ia_ifp->if_flags & IFF_LOOPBACK))
940 					break;
941 			}
942 		}
943 		if (ia == NULL) {
944 			*errorp = EADDRNOTAVAIL;
945 			return NULL;
946 		}
947 	}
948 	/*
949 	 * If the destination address is multicast and an outgoing
950 	 * interface has been set as a multicast option, use the
951 	 * address of that interface as our source address.
952 	 */
953 	if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) {
954 		struct ip_moptions *imo;
955 		struct ifnet *ifp;
956 
957 		imo = mopts;
958 		if (imo->imo_multicast_ifp != NULL) {
959 			ifp = imo->imo_multicast_ifp;
960 			IFP_TO_IA(ifp, ia);		/* XXX */
961 			if (ia == 0) {
962 				*errorp = EADDRNOTAVAIL;
963 				return NULL;
964 			}
965 		}
966 	}
967 	return satosin(&ia->ia_addr);
968 }
969