xref: /netbsd-src/sys/netinet/if_arp.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /*	$NetBSD: if_arp.c,v 1.307 2021/02/19 14:51:59 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 1998, 2000, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Public Access Networks Corporation ("Panix").  It was developed under
9  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if_ether.c	8.2 (Berkeley) 9/26/94
62  */
63 
64 /*
65  * Ethernet address resolution protocol.
66  * TODO:
67  *	add "inuse/lock" bit (or ref. count) along with valid bit
68  */
69 
70 #include <sys/cdefs.h>
71 __KERNEL_RCSID(0, "$NetBSD: if_arp.c,v 1.307 2021/02/19 14:51:59 christos Exp $");
72 
73 #ifdef _KERNEL_OPT
74 #include "opt_ddb.h"
75 #include "opt_inet.h"
76 #include "opt_net_mpsafe.h"
77 #endif
78 
79 #ifdef INET
80 
81 #include "arp.h"
82 #include "bridge.h"
83 
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/callout.h>
87 #include <sys/kmem.h>
88 #include <sys/mbuf.h>
89 #include <sys/socket.h>
90 #include <sys/time.h>
91 #include <sys/timetc.h>
92 #include <sys/kernel.h>
93 #include <sys/errno.h>
94 #include <sys/ioctl.h>
95 #include <sys/syslog.h>
96 #include <sys/proc.h>
97 #include <sys/protosw.h>
98 #include <sys/domain.h>
99 #include <sys/sysctl.h>
100 #include <sys/socketvar.h>
101 #include <sys/percpu.h>
102 #include <sys/cprng.h>
103 #include <sys/kmem.h>
104 
105 #include <net/ethertypes.h>
106 #include <net/if.h>
107 #include <net/if_dl.h>
108 #include <net/if_types.h>
109 #include <net/if_ether.h>
110 #include <net/if_llatbl.h>
111 #include <net/nd.h>
112 #include <net/route.h>
113 #include <net/net_stats.h>
114 
115 #include <netinet/in.h>
116 #include <netinet/in_systm.h>
117 #include <netinet/in_var.h>
118 #include <netinet/ip.h>
119 #include <netinet/if_inarp.h>
120 
121 #include "arcnet.h"
122 #if NARCNET > 0
123 #include <net/if_arc.h>
124 #endif
125 #include "carp.h"
126 #if NCARP > 0
127 #include <netinet/ip_carp.h>
128 #endif
129 
130 /*
131  * ARP trailer negotiation.  Trailer protocol is not IP specific,
132  * but ARP request/response use IP addresses.
133  */
134 #define ETHERTYPE_IPTRAILERS ETHERTYPE_TRAIL
135 
136 /* timers */
137 static int arp_reachable = REACHABLE_TIME;
138 static int arp_retrans = RETRANS_TIMER;
139 static int arp_perform_nud = 1;
140 
141 static bool arp_nud_enabled(struct ifnet *);
142 static unsigned int arp_llinfo_reachable(struct ifnet *);
143 static unsigned int arp_llinfo_retrans(struct ifnet *);
144 static union l3addr *arp_llinfo_holdsrc(struct llentry *, union l3addr *);
145 static void arp_llinfo_output(struct ifnet *, const union l3addr *,
146     const union l3addr *, const uint8_t *, const union l3addr *);
147 static void arp_llinfo_missed(struct ifnet *, const union l3addr *,
148     int16_t, struct mbuf *);
149 static void arp_free(struct llentry *, int);
150 
151 static struct nd_domain arp_nd_domain = {
152 	.nd_family = AF_INET,
153 	.nd_delay = 5,		/* delay first probe time 5 second */
154 	.nd_mmaxtries = 3,	/* maximum broadcast query */
155 	.nd_umaxtries = 3,	/* maximum unicast query */
156 	.nd_retransmultiple = BACKOFF_MULTIPLE,
157 	.nd_maxretrans = MAX_RETRANS_TIMER,
158 	.nd_maxnudhint = 0,	/* max # of subsequent upper layer hints */
159 	.nd_maxqueuelen = 1,	/* max # of packets in unresolved ND entries */
160 	.nd_nud_enabled = arp_nud_enabled,
161 	.nd_reachable = arp_llinfo_reachable,
162 	.nd_retrans = arp_llinfo_retrans,
163 	.nd_holdsrc = arp_llinfo_holdsrc,
164 	.nd_output = arp_llinfo_output,
165 	.nd_missed = arp_llinfo_missed,
166 	.nd_free = arp_free,
167 };
168 
169 int ip_dad_count = PROBE_NUM;
170 #ifdef ARP_DEBUG
171 int arp_debug = 1;
172 #else
173 int arp_debug = 0;
174 #endif
175 
176 static void arp_init(void);
177 static void arp_dad_init(void);
178 
179 static void arprequest(struct ifnet *,
180     const struct in_addr *, const struct in_addr *,
181     const uint8_t *, const uint8_t *);
182 static void arpannounce1(struct ifaddr *);
183 static struct sockaddr *arp_setgate(struct rtentry *, struct sockaddr *,
184     const struct sockaddr *);
185 static struct llentry *arpcreate(struct ifnet *,
186     const struct in_addr *, const struct sockaddr *, int);
187 static void in_arpinput(struct mbuf *);
188 static void in_revarpinput(struct mbuf *);
189 static void revarprequest(struct ifnet *);
190 
191 static void arp_drainstub(void);
192 
193 struct dadq;
194 static void arp_dad_timer(struct dadq *);
195 static void arp_dad_start(struct ifaddr *);
196 static void arp_dad_stop(struct ifaddr *);
197 static void arp_dad_duplicated(struct ifaddr *, const struct sockaddr_dl *);
198 
199 struct ifqueue arpintrq = {
200 	.ifq_head = NULL,
201 	.ifq_tail = NULL,
202 	.ifq_len = 0,
203 	.ifq_maxlen = 50,
204 	.ifq_drops = 0,
205 };
206 static int useloopback = 1;	/* use loopback interface for local traffic */
207 
208 static percpu_t *arpstat_percpu;
209 
210 #define	ARP_STAT_GETREF()	_NET_STAT_GETREF(arpstat_percpu)
211 #define	ARP_STAT_PUTREF()	_NET_STAT_PUTREF(arpstat_percpu)
212 
213 #define	ARP_STATINC(x)		_NET_STATINC(arpstat_percpu, x)
214 #define	ARP_STATADD(x, v)	_NET_STATADD(arpstat_percpu, x, v)
215 
216 /* revarp state */
217 static struct in_addr myip, srv_ip;
218 static int myip_initialized = 0;
219 static int revarp_in_progress = 0;
220 static struct ifnet *myip_ifp = NULL;
221 
222 static int arp_drainwanted;
223 
224 static int log_movements = 0;
225 static int log_permanent_modify = 1;
226 static int log_wrong_iface = 1;
227 
228 DOMAIN_DEFINE(arpdomain);	/* forward declare and add to link set */
229 
230 static void
231 arp_fasttimo(void)
232 {
233 	if (arp_drainwanted) {
234 		arp_drain();
235 		arp_drainwanted = 0;
236 	}
237 }
238 
239 static const struct protosw arpsw[] = {
240 	{
241 		.pr_type = 0,
242 		.pr_domain = &arpdomain,
243 		.pr_protocol = 0,
244 		.pr_flags = 0,
245 		.pr_input = 0,
246 		.pr_ctlinput = 0,
247 		.pr_ctloutput = 0,
248 		.pr_usrreqs = 0,
249 		.pr_init = arp_init,
250 		.pr_fasttimo = arp_fasttimo,
251 		.pr_slowtimo = 0,
252 		.pr_drain = arp_drainstub,
253 	}
254 };
255 
256 struct domain arpdomain = {
257 	.dom_family = PF_ARP,
258 	.dom_name = "arp",
259 	.dom_protosw = arpsw,
260 	.dom_protoswNPROTOSW = &arpsw[__arraycount(arpsw)],
261 #ifdef MBUFTRACE
262 	.dom_mowner = MOWNER_INIT("internet", "arp"),
263 #endif
264 };
265 
266 static void sysctl_net_inet_arp_setup(struct sysctllog **);
267 
268 void
269 arp_init(void)
270 {
271 
272 	sysctl_net_inet_arp_setup(NULL);
273 	arpstat_percpu = percpu_alloc(sizeof(uint64_t) * ARP_NSTATS);
274 	IFQ_LOCK_INIT(&arpintrq);
275 
276 #ifdef MBUFTRACE
277 	MOWNER_ATTACH(&arpdomain.dom_mowner);
278 #endif
279 
280 	nd_attach_domain(&arp_nd_domain);
281 	arp_dad_init();
282 }
283 
284 static void
285 arp_drainstub(void)
286 {
287 	arp_drainwanted = 1;
288 }
289 
290 /*
291  * ARP protocol drain routine.  Called when memory is in short supply.
292  * Called at splvm();  don't acquire softnet_lock as can be called from
293  * hardware interrupt handlers.
294  */
295 void
296 arp_drain(void)
297 {
298 
299 	lltable_drain(AF_INET);
300 }
301 
302 /*
303  * We set the gateway for RTF_CLONING routes to a "prototype"
304  * link-layer sockaddr whose interface type (if_type) and interface
305  * index (if_index) fields are prepared.
306  */
307 static struct sockaddr *
308 arp_setgate(struct rtentry *rt, struct sockaddr *gate,
309     const struct sockaddr *netmask)
310 {
311 	const struct ifnet *ifp = rt->rt_ifp;
312 	uint8_t namelen = strlen(ifp->if_xname);
313 	uint8_t addrlen = ifp->if_addrlen;
314 
315 	/*
316 	 * XXX: If this is a manually added route to interface
317 	 * such as older version of routed or gated might provide,
318 	 * restore cloning bit.
319 	 */
320 	if ((rt->rt_flags & RTF_HOST) == 0 && netmask != NULL &&
321 	    satocsin(netmask)->sin_addr.s_addr != 0xffffffff)
322 		rt->rt_flags |= RTF_CONNECTED;
323 
324 	if ((rt->rt_flags & (RTF_CONNECTED | RTF_LOCAL))) {
325 		union {
326 			struct sockaddr sa;
327 			struct sockaddr_storage ss;
328 			struct sockaddr_dl sdl;
329 		} u;
330 		/*
331 		 * Case 1: This route should come from a route to iface.
332 		 */
333 		sockaddr_dl_init(&u.sdl, sizeof(u.ss),
334 		    ifp->if_index, ifp->if_type, NULL, namelen, NULL, addrlen);
335 		rt_setgate(rt, &u.sa);
336 		gate = rt->rt_gateway;
337 	}
338 	return gate;
339 }
340 
341 /*
342  * Parallel to llc_rtrequest.
343  */
344 void
345 arp_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info)
346 {
347 	struct sockaddr *gate = rt->rt_gateway;
348 	struct in_ifaddr *ia;
349 	struct ifaddr *ifa;
350 	struct ifnet *ifp = rt->rt_ifp;
351 	int bound;
352 	int s;
353 
354 	if (req == RTM_LLINFO_UPD) {
355 		if ((ifa = info->rti_ifa) != NULL)
356 			arpannounce1(ifa);
357 		return;
358 	}
359 
360 	if ((rt->rt_flags & RTF_GATEWAY) != 0) {
361 		if (req != RTM_ADD)
362 			return;
363 
364 		/*
365 		 * linklayers with particular link MTU limitation.
366 		 */
367 		switch(ifp->if_type) {
368 #if NARCNET > 0
369 		case IFT_ARCNET:
370 		    {
371 			int arcipifmtu;
372 
373 			if (ifp->if_flags & IFF_LINK0)
374 				arcipifmtu = arc_ipmtu;
375 			else
376 				arcipifmtu = ARCMTU;
377 			if (ifp->if_mtu > arcipifmtu)
378 				rt->rt_rmx.rmx_mtu = arcipifmtu;
379 			break;
380 		    }
381 #endif
382 		}
383 		return;
384 	}
385 
386 	switch (req) {
387 	case RTM_SETGATE:
388 		gate = arp_setgate(rt, gate, info->rti_info[RTAX_NETMASK]);
389 		break;
390 	case RTM_ADD:
391 		gate = arp_setgate(rt, gate, info->rti_info[RTAX_NETMASK]);
392 		if (gate == NULL) {
393 			log(LOG_ERR, "%s: arp_setgate failed\n", __func__);
394 			break;
395 		}
396 		if ((rt->rt_flags & RTF_CONNECTED) ||
397 		    (rt->rt_flags & RTF_LOCAL)) {
398 			/*
399 			 * linklayers with particular link MTU limitation.
400 			 */
401 			switch (ifp->if_type) {
402 #if NARCNET > 0
403 			case IFT_ARCNET:
404 			    {
405 				int arcipifmtu;
406 				if (ifp->if_flags & IFF_LINK0)
407 					arcipifmtu = arc_ipmtu;
408 				else
409 					arcipifmtu = ARCMTU;
410 
411 				if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
412 				    (rt->rt_rmx.rmx_mtu > arcipifmtu ||
413 				     (rt->rt_rmx.rmx_mtu == 0 &&
414 				      ifp->if_mtu > arcipifmtu)))
415 					rt->rt_rmx.rmx_mtu = arcipifmtu;
416 				break;
417 			    }
418 #endif
419 			}
420 			if (rt->rt_flags & RTF_CONNECTED)
421 				break;
422 		}
423 
424 		bound = curlwp_bind();
425 		/* Announce a new entry if requested. */
426 		if (rt->rt_flags & RTF_ANNOUNCE) {
427 			struct psref psref;
428 			ia = in_get_ia_on_iface_psref(
429 			    satocsin(rt_getkey(rt))->sin_addr, ifp, &psref);
430 			if (ia != NULL) {
431 				arpannounce(ifp, &ia->ia_ifa,
432 				    CLLADDR(satocsdl(gate)));
433 				ia4_release(ia, &psref);
434 			}
435 		}
436 
437 		if (gate->sa_family != AF_LINK ||
438 		    gate->sa_len < sockaddr_dl_measure(0, ifp->if_addrlen)) {
439 			log(LOG_DEBUG, "%s: bad gateway value\n", __func__);
440 			goto out;
441 		}
442 
443 		satosdl(gate)->sdl_type = ifp->if_type;
444 		satosdl(gate)->sdl_index = ifp->if_index;
445 
446 		/*
447 		 * If the route is for a broadcast address mark it as such.
448 		 * This way we can avoid an expensive call to in_broadcast()
449 		 * in ip_output() most of the time (because the route passed
450 		 * to ip_output() is almost always a host route).
451 		 */
452 		if (rt->rt_flags & RTF_HOST &&
453 		    !(rt->rt_flags & RTF_BROADCAST) &&
454 		    in_broadcast(satocsin(rt_getkey(rt))->sin_addr, rt->rt_ifp))
455 			rt->rt_flags |= RTF_BROADCAST;
456 		/* There is little point in resolving the broadcast address */
457 		if (rt->rt_flags & RTF_BROADCAST)
458 			goto out;
459 
460 		/*
461 		 * When called from rt_ifa_addlocal, we cannot depend on that
462 		 * the address (rt_getkey(rt)) exits in the address list of the
463 		 * interface. So check RTF_LOCAL instead.
464 		 */
465 		if (rt->rt_flags & RTF_LOCAL) {
466 			if (useloopback) {
467 				rt->rt_ifp = lo0ifp;
468 				rt->rt_rmx.rmx_mtu = 0;
469 			}
470 			goto out;
471 		}
472 
473 		s = pserialize_read_enter();
474 		ia = in_get_ia_on_iface(satocsin(rt_getkey(rt))->sin_addr, ifp);
475 		if (ia == NULL) {
476 			pserialize_read_exit(s);
477 			goto out;
478 		}
479 
480 		if (useloopback) {
481 			rt->rt_ifp = lo0ifp;
482 			rt->rt_rmx.rmx_mtu = 0;
483 		}
484 		rt->rt_flags |= RTF_LOCAL;
485 
486 		if (ISSET(info->rti_flags, RTF_DONTCHANGEIFA)) {
487 			pserialize_read_exit(s);
488 			goto out;
489 		}
490 		/*
491 		 * make sure to set rt->rt_ifa to the interface
492 		 * address we are using, otherwise we will have trouble
493 		 * with source address selection.
494 		 */
495 		ifa = &ia->ia_ifa;
496 		if (ifa != rt->rt_ifa)
497 			/* Assume it doesn't sleep */
498 			rt_replace_ifa(rt, ifa);
499 		pserialize_read_exit(s);
500 	out:
501 		curlwp_bindx(bound);
502 		break;
503 	}
504 }
505 
506 /*
507  * Broadcast an ARP request. Caller specifies:
508  *	- arp header source ip address
509  *	- arp header target ip address
510  *	- arp header source ethernet address
511  */
512 static void
513 arprequest(struct ifnet *ifp,
514     const struct in_addr *sip, const struct in_addr *tip,
515     const uint8_t *saddr, const uint8_t *taddr)
516 {
517 	struct mbuf *m;
518 	struct arphdr *ah;
519 	struct sockaddr sa;
520 	uint64_t *arps;
521 
522 	KASSERT(sip != NULL);
523 	KASSERT(tip != NULL);
524 	KASSERT(saddr != NULL);
525 
526 	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
527 		return;
528 	MCLAIM(m, &arpdomain.dom_mowner);
529 	switch (ifp->if_type) {
530 	case IFT_IEEE1394:
531 		m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
532 		    ifp->if_addrlen;
533 		break;
534 	default:
535 		m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
536 		    2 * ifp->if_addrlen;
537 		break;
538 	}
539 	m->m_pkthdr.len = m->m_len;
540 	m_align(m, m->m_len);
541 	ah = mtod(m, struct arphdr *);
542 	memset(ah, 0, m->m_len);
543 	switch (ifp->if_type) {
544 	case IFT_IEEE1394:	/* RFC2734 */
545 		/* fill it now for ar_tpa computation */
546 		ah->ar_hrd = htons(ARPHRD_IEEE1394);
547 		break;
548 	default:
549 		/* ifp->if_output will fill ar_hrd */
550 		break;
551 	}
552 	ah->ar_pro = htons(ETHERTYPE_IP);
553 	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
554 	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
555 	ah->ar_op = htons(ARPOP_REQUEST);
556 	memcpy(ar_sha(ah), saddr, ah->ar_hln);
557 	if (taddr == NULL)
558 		m->m_flags |= M_BCAST;
559 	else
560 		memcpy(ar_tha(ah), taddr, ah->ar_hln);
561 	memcpy(ar_spa(ah), sip, ah->ar_pln);
562 	memcpy(ar_tpa(ah), tip, ah->ar_pln);
563 	sa.sa_family = AF_ARP;
564 	sa.sa_len = 2;
565 	arps = ARP_STAT_GETREF();
566 	arps[ARP_STAT_SNDTOTAL]++;
567 	arps[ARP_STAT_SENDREQUEST]++;
568 	ARP_STAT_PUTREF();
569 	if_output_lock(ifp, ifp, m, &sa, NULL);
570 }
571 
572 void
573 arpannounce(struct ifnet *ifp, struct ifaddr *ifa, const uint8_t *enaddr)
574 {
575 	struct in_ifaddr *ia = ifatoia(ifa);
576 	struct in_addr *ip = &IA_SIN(ifa)->sin_addr;
577 
578 	if (ia->ia4_flags & (IN_IFF_NOTREADY | IN_IFF_DETACHED)) {
579 		ARPLOG(LOG_DEBUG, "%s not ready\n", ARPLOGADDR(ip));
580 		return;
581 	}
582 	arprequest(ifp, ip, ip, enaddr, NULL);
583 }
584 
585 static void
586 arpannounce1(struct ifaddr *ifa)
587 {
588 
589 	arpannounce(ifa->ifa_ifp, ifa, CLLADDR(ifa->ifa_ifp->if_sadl));
590 }
591 
592 /*
593  * Resolve an IP address into an ethernet address.  If success, desten is
594  * filled in. If there is no entry in arptab, set one up and broadcast a
595  * request for the IP address. Hold onto this mbuf and resend it once the
596  * address is finally resolved.
597  *
598  * A return value of 0 indicates that desten has been filled in and the packet
599  * should be sent normally; a return value of EWOULDBLOCK indicates that the
600  * packet has been held pending resolution. Any other value indicates an
601  * error.
602  */
603 int
604 arpresolve(struct ifnet *ifp, const struct rtentry *rt, struct mbuf *m,
605     const struct sockaddr *dst, void *desten, size_t destlen)
606 {
607 	struct llentry *la;
608 	const char *create_lookup;
609 	int error;
610 
611 #if NCARP > 0
612 	if (rt != NULL && rt->rt_ifp->if_type == IFT_CARP)
613 		ifp = rt->rt_ifp;
614 #endif
615 
616 	KASSERT(m != NULL);
617 
618 	la = arplookup(ifp, NULL, dst, 0);
619 	if (la == NULL)
620 		goto notfound;
621 
622 	if (la->la_flags & LLE_VALID && la->ln_state == ND_LLINFO_REACHABLE) {
623 		KASSERT(destlen >= ifp->if_addrlen);
624 		memcpy(desten, &la->ll_addr, ifp->if_addrlen);
625 		LLE_RUNLOCK(la);
626 		return 0;
627 	}
628 
629 notfound:
630 	if (ifp->if_flags & IFF_NOARP) {
631 		if (la != NULL)
632 			LLE_RUNLOCK(la);
633 		error = ENOTSUP;
634 		goto bad;
635 	}
636 
637 	if (la == NULL) {
638 		struct rtentry *_rt;
639 
640 		create_lookup = "create";
641 		_rt = rtalloc1(dst, 0);
642 		IF_AFDATA_WLOCK(ifp);
643 		la = lla_create(LLTABLE(ifp), LLE_EXCLUSIVE, dst, _rt);
644 		IF_AFDATA_WUNLOCK(ifp);
645 		if (_rt != NULL)
646 			rt_unref(_rt);
647 		if (la == NULL)
648 			ARP_STATINC(ARP_STAT_ALLOCFAIL);
649 		else
650 			la->ln_state = ND_LLINFO_NOSTATE;
651 	} else if (LLE_TRY_UPGRADE(la) == 0) {
652 		create_lookup = "lookup";
653 		LLE_RUNLOCK(la);
654 		IF_AFDATA_RLOCK(ifp);
655 		la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
656 		IF_AFDATA_RUNLOCK(ifp);
657 	}
658 
659 	error = EINVAL;
660 	if (la == NULL) {
661 		log(LOG_DEBUG,
662 		    "%s: failed to %s llentry for %s on %s\n",
663 		    __func__, create_lookup, inet_ntoa(satocsin(dst)->sin_addr),
664 		    ifp->if_xname);
665 		goto bad;
666 	}
667 
668 	error = nd_resolve(la, rt, m, desten, destlen);
669 	return error;
670 
671 bad:
672 	m_freem(m);
673 	return error;
674 }
675 
676 /*
677  * Common length and type checks are done here,
678  * then the protocol-specific routine is called.
679  */
680 void
681 arpintr(void)
682 {
683 	struct mbuf *m;
684 	struct arphdr *ar;
685 	int s;
686 	int arplen;
687 	struct ifnet *rcvif;
688 	bool badhrd;
689 
690 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
691 	for (;;) {
692 
693 		IFQ_LOCK(&arpintrq);
694 		IF_DEQUEUE(&arpintrq, m);
695 		IFQ_UNLOCK(&arpintrq);
696 		if (m == NULL)
697 			goto out;
698 		if ((m->m_flags & M_PKTHDR) == 0)
699 			panic("arpintr");
700 
701 		MCLAIM(m, &arpdomain.dom_mowner);
702 		ARP_STATINC(ARP_STAT_RCVTOTAL);
703 
704 		if (__predict_false(m->m_len < sizeof(*ar))) {
705 			if ((m = m_pullup(m, sizeof(*ar))) == NULL)
706 				goto badlen;
707 		}
708 		ar = mtod(m, struct arphdr *);
709 		KASSERT(ACCESSIBLE_POINTER(ar, struct arphdr));
710 
711 		rcvif = m_get_rcvif(m, &s);
712 		if (__predict_false(rcvif == NULL)) {
713 			ARP_STATINC(ARP_STAT_RCVNOINT);
714 			goto free;
715 		}
716 
717 		/*
718 		 * We don't want non-IEEE1394 ARP packets on IEEE1394
719 		 * interfaces, and vice versa. Our life depends on that.
720 		 */
721 		if (ntohs(ar->ar_hrd) == ARPHRD_IEEE1394)
722 			badhrd = rcvif->if_type != IFT_IEEE1394;
723 		else
724 			badhrd = rcvif->if_type == IFT_IEEE1394;
725 
726 		m_put_rcvif(rcvif, &s);
727 
728 		if (badhrd) {
729 			ARP_STATINC(ARP_STAT_RCVBADPROTO);
730 			goto free;
731 		}
732 
733 		arplen = sizeof(*ar) + 2 * ar->ar_hln + 2 * ar->ar_pln;
734 		if (__predict_false(m->m_len < arplen)) {
735 			if ((m = m_pullup(m, arplen)) == NULL)
736 				goto badlen;
737 			ar = mtod(m, struct arphdr *);
738 			KASSERT(ACCESSIBLE_POINTER(ar, struct arphdr));
739 		}
740 
741 		switch (ntohs(ar->ar_pro)) {
742 		case ETHERTYPE_IP:
743 		case ETHERTYPE_IPTRAILERS:
744 			in_arpinput(m);
745 			continue;
746 		default:
747 			ARP_STATINC(ARP_STAT_RCVBADPROTO);
748 			goto free;
749 		}
750 
751 badlen:
752 		ARP_STATINC(ARP_STAT_RCVBADLEN);
753 free:
754 		m_freem(m);
755 	}
756 
757 out:
758 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
759 	return; /* XXX gcc */
760 }
761 
762 /*
763  * ARP for Internet protocols on 10 Mb/s Ethernet. Algorithm is that given in
764  * RFC 826. In addition, a sanity check is performed on the sender protocol
765  * address, to catch impersonators.
766  *
767  * We no longer handle negotiations for use of trailer protocol: formerly, ARP
768  * replied for protocol type ETHERTYPE_TRAIL sent along with IP replies if we
769  * wanted trailers sent to us, and also sent them in response to IP replies.
770  * This allowed either end to announce the desire to receive trailer packets.
771  *
772  * We no longer reply to requests for ETHERTYPE_TRAIL protocol either, but
773  * formerly didn't normally send requests.
774  */
775 static void
776 in_arpinput(struct mbuf *m)
777 {
778 	struct arphdr *ah;
779 	struct ifnet *ifp, *rcvif = NULL;
780 	struct llentry *la = NULL;
781 	struct in_ifaddr *ia = NULL;
782 #if NBRIDGE > 0
783 	struct in_ifaddr *bridge_ia = NULL;
784 #endif
785 #if NCARP > 0
786 	uint32_t count = 0, index = 0;
787 #endif
788 	struct sockaddr sa;
789 	struct in_addr isaddr, itaddr, myaddr;
790 	int op, rt_cmd, new_state = 0;
791 	void *tha;
792 	uint64_t *arps;
793 	struct psref psref, psref_ia;
794 	int s;
795 	char ipbuf[INET_ADDRSTRLEN];
796 	bool find_source, do_dad;
797 
798 	if (__predict_false(m_makewritable(&m, 0, m->m_pkthdr.len, M_DONTWAIT)))
799 		goto out;
800 	ah = mtod(m, struct arphdr *);
801 	op = ntohs(ah->ar_op);
802 
803 	if (ah->ar_pln != sizeof(struct in_addr))
804 		goto out;
805 
806 	ifp = if_get_bylla(ar_sha(ah), ah->ar_hln, &psref);
807 	if (ifp) {
808 		/* it's from me, ignore it. */
809 		if_put(ifp, &psref);
810 		ARP_STATINC(ARP_STAT_RCVLOCALSHA);
811 		goto out;
812 	}
813 
814 	rcvif = ifp = m_get_rcvif_psref(m, &psref);
815 	if (__predict_false(rcvif == NULL))
816 		goto out;
817 	if (rcvif->if_flags & IFF_NOARP)
818 		goto out;
819 
820 	memcpy(&isaddr, ar_spa(ah), sizeof(isaddr));
821 	memcpy(&itaddr, ar_tpa(ah), sizeof(itaddr));
822 
823 	if (m->m_flags & (M_BCAST|M_MCAST))
824 		ARP_STATINC(ARP_STAT_RCVMCAST);
825 
826 	/*
827 	 * Search for a matching interface address
828 	 * or any address on the interface to use
829 	 * as a dummy address in the rest of this function.
830 	 *
831 	 * First try and find the source address for early
832 	 * duplicate address detection.
833 	 */
834 	if (in_nullhost(isaddr)) {
835 		if (in_nullhost(itaddr)) /* very bogus ARP */
836 			goto out;
837 		find_source = false;
838 		myaddr = itaddr;
839 	} else {
840 		find_source = true;
841 		myaddr = isaddr;
842 	}
843 	s = pserialize_read_enter();
844 again:
845 	IN_ADDRHASH_READER_FOREACH(ia, myaddr.s_addr) {
846 		if (!in_hosteq(ia->ia_addr.sin_addr, myaddr))
847 			continue;
848 #if NCARP > 0
849 		if (ia->ia_ifp->if_type == IFT_CARP &&
850 		    ((ia->ia_ifp->if_flags & (IFF_UP|IFF_RUNNING)) ==
851 		    (IFF_UP|IFF_RUNNING))) {
852 			index++;
853 			/* XXX: ar_hln? */
854 			if (ia->ia_ifp == rcvif && (ah->ar_hln >= 6) &&
855 			    carp_iamatch(ia, ar_sha(ah),
856 			    &count, index)) {
857 				break;
858 			}
859 		} else
860 #endif
861 		if (ia->ia_ifp == rcvif)
862 			break;
863 #if NBRIDGE > 0
864 		/*
865 		 * If the interface we received the packet on
866 		 * is part of a bridge, check to see if we need
867 		 * to "bridge" the packet to ourselves at this
868 		 * layer.  Note we still prefer a perfect match,
869 		 * but allow this weaker match if necessary.
870 		 */
871 		if (rcvif->if_bridge != NULL &&
872 		    rcvif->if_bridge == ia->ia_ifp->if_bridge)
873 			bridge_ia = ia;
874 #endif
875 	}
876 
877 #if NBRIDGE > 0
878 	if (ia == NULL && bridge_ia != NULL) {
879 		ia = bridge_ia;
880 		m_put_rcvif_psref(rcvif, &psref);
881 		rcvif = NULL;
882 		/* FIXME */
883 		ifp = bridge_ia->ia_ifp;
884 	}
885 #endif
886 
887 	/* If we failed to find the source address then find
888 	 * the target address. */
889 	if (ia == NULL && find_source && !in_nullhost(itaddr)) {
890 		find_source = false;
891 		myaddr = itaddr;
892 		goto again;
893 	}
894 
895 	if (ia != NULL)
896 		ia4_acquire(ia, &psref_ia);
897 	pserialize_read_exit(s);
898 
899 	if (ah->ar_hln != ifp->if_addrlen) {
900 		ARP_STATINC(ARP_STAT_RCVBADLEN);
901 		log(LOG_WARNING,
902 		    "arp from %s: addr len: new %d, i/f %d (ignored)\n",
903 		    IN_PRINT(ipbuf, &isaddr), ah->ar_hln, ifp->if_addrlen);
904 		goto out;
905 	}
906 
907 	/* Only do DaD if we have a matching address. */
908 	do_dad = (ia != NULL);
909 
910 	if (ia == NULL) {
911 		ia = in_get_ia_on_iface_psref(isaddr, rcvif, &psref_ia);
912 		if (ia == NULL) {
913 			ia = in_get_ia_from_ifp_psref(ifp, &psref_ia);
914 			if (ia == NULL) {
915 				ARP_STATINC(ARP_STAT_RCVNOINT);
916 				goto out;
917 			}
918 		}
919 	}
920 
921 	myaddr = ia->ia_addr.sin_addr;
922 
923 	/* XXX checks for bridge case? */
924 	if (!memcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
925 		ARP_STATINC(ARP_STAT_RCVBCASTSHA);
926 		log(LOG_ERR,
927 		    "%s: arp: link address is broadcast for IP address %s!\n",
928 		    ifp->if_xname, IN_PRINT(ipbuf, &isaddr));
929 		goto out;
930 	}
931 
932 	/*
933 	 * If the source IP address is zero, this is an RFC 5227 ARP probe
934 	 */
935 	if (in_nullhost(isaddr))
936 		ARP_STATINC(ARP_STAT_RCVZEROSPA);
937 	else if (in_hosteq(isaddr, myaddr))
938 		ARP_STATINC(ARP_STAT_RCVLOCALSPA);
939 
940 	if (in_nullhost(itaddr))
941 		ARP_STATINC(ARP_STAT_RCVZEROTPA);
942 
943 	/*
944 	 * DAD check, RFC 5227.
945 	 * Collision on sender address is always a duplicate.
946 	 * Collision on target address is only a duplicate
947 	 * IF the sender address is the null host (ie a DAD probe)
948 	 * AND the message was broadcast
949 	 * AND our address is either tentative or duplicated
950 	 * If it was unicast then it's a valid Unicast Poll from RFC 1122.
951 	 */
952 	if (do_dad &&
953 	    (in_hosteq(isaddr, myaddr) ||
954 	    (in_nullhost(isaddr) && in_hosteq(itaddr, myaddr) &&
955 	     m->m_flags & M_BCAST &&
956 	     ia->ia4_flags & (IN_IFF_TENTATIVE | IN_IFF_DUPLICATED))))
957 	{
958 		struct sockaddr_dl sdl, *sdlp;
959 
960 		sdlp = sockaddr_dl_init(&sdl, sizeof(sdl),
961 		    ifp->if_index, ifp->if_type,
962 		    NULL, 0, ar_sha(ah), ah->ar_hln);
963 		arp_dad_duplicated((struct ifaddr *)ia, sdlp);
964 		goto out;
965 	}
966 
967 	/*
968 	 * If the target IP address is zero, ignore the packet.
969 	 * This prevents the code below from trying to answer
970 	 * when we are using IP address zero (booting).
971 	 */
972 	if (in_nullhost(itaddr))
973 		goto out;
974 
975 	if (in_nullhost(isaddr))
976 		goto reply;
977 
978 	if (in_hosteq(itaddr, myaddr))
979 		la = arpcreate(ifp, &isaddr, NULL, 1);
980 	else
981 		la = arplookup(ifp, &isaddr, NULL, 1);
982 	if (la == NULL)
983 		goto reply;
984 
985 	if ((la->la_flags & LLE_VALID) &&
986 	    memcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen))
987 	{
988 		char llabuf[LLA_ADDRSTRLEN], *llastr;
989 
990 		llastr = lla_snprintf(llabuf, sizeof(llabuf),
991 		    ar_sha(ah), ah->ar_hln);
992 
993 		if (la->la_flags & LLE_STATIC) {
994 			ARP_STATINC(ARP_STAT_RCVOVERPERM);
995 			if (!log_permanent_modify)
996 				goto out;
997 			log(LOG_INFO,
998 			    "%s tried to overwrite permanent arp info"
999 			    " for %s\n", llastr, IN_PRINT(ipbuf, &isaddr));
1000 			goto out;
1001 		} else if (la->lle_tbl->llt_ifp != ifp) {
1002 			/* XXX should not happen? */
1003 			ARP_STATINC(ARP_STAT_RCVOVERINT);
1004 			if (!log_wrong_iface)
1005 				goto out;
1006 			log(LOG_INFO,
1007 			    "%s on %s tried to overwrite "
1008 			    "arp info for %s on %s\n",
1009 			    llastr,
1010 			    ifp->if_xname, IN_PRINT(ipbuf, &isaddr),
1011 			    la->lle_tbl->llt_ifp->if_xname);
1012 				goto out;
1013 		} else {
1014 			ARP_STATINC(ARP_STAT_RCVOVER);
1015 			if (log_movements)
1016 				log(LOG_INFO, "arp info overwritten "
1017 				    "for %s by %s\n",
1018 				    IN_PRINT(ipbuf, &isaddr), llastr);
1019 		}
1020 		rt_cmd = RTM_CHANGE;
1021 		new_state = ND_LLINFO_STALE;
1022 	} else {
1023 		if (op == ARPOP_REPLY && in_hosteq(itaddr, myaddr)) {
1024 			/* This was a solicited ARP reply. */
1025 			la->ln_byhint = 0;
1026 			new_state = ND_LLINFO_REACHABLE;
1027 		}
1028 		rt_cmd = la->la_flags & LLE_VALID ? 0 : RTM_ADD;
1029 	}
1030 
1031 	KASSERT(ifp->if_sadl->sdl_alen == ifp->if_addrlen);
1032 
1033 	KASSERT(sizeof(la->ll_addr) >= ifp->if_addrlen);
1034 	memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
1035 	la->la_flags |= LLE_VALID;
1036 	la->ln_asked = 0;
1037 	if (new_state != 0) {
1038 		la->ln_state = new_state;
1039 
1040 		if (new_state != ND_LLINFO_REACHABLE ||
1041 		    !(la->la_flags & LLE_STATIC))
1042 		{
1043 			int timer = ND_TIMER_GC;
1044 
1045 			if (new_state == ND_LLINFO_REACHABLE)
1046 				timer = ND_TIMER_REACHABLE;
1047 			nd_set_timer(la, timer);
1048 		}
1049 	}
1050 
1051 	if (rt_cmd != 0) {
1052 		struct sockaddr_in sin;
1053 
1054 		sockaddr_in_init(&sin, &la->r_l3addr.addr4, 0);
1055 		rt_clonedmsg(rt_cmd, NULL, sintosa(&sin), ar_sha(ah), ifp);
1056 	}
1057 
1058 	if (la->la_hold != NULL) {
1059 		int n = la->la_numheld;
1060 		struct mbuf *m_hold, *m_hold_next;
1061 		struct sockaddr_in sin;
1062 
1063 		sockaddr_in_init(&sin, &la->r_l3addr.addr4, 0);
1064 
1065 		m_hold = la->la_hold;
1066 		la->la_hold = NULL;
1067 		la->la_numheld = 0;
1068 		/*
1069 		 * We have to unlock here because if_output would call
1070 		 * arpresolve
1071 		 */
1072 		LLE_WUNLOCK(la);
1073 		ARP_STATADD(ARP_STAT_DFRSENT, n);
1074 		ARP_STATADD(ARP_STAT_DFRTOTAL, n);
1075 		for (; m_hold != NULL; m_hold = m_hold_next) {
1076 			m_hold_next = m_hold->m_nextpkt;
1077 			m_hold->m_nextpkt = NULL;
1078 			if_output_lock(ifp, ifp, m_hold, sintosa(&sin), NULL);
1079 		}
1080 	} else
1081 		LLE_WUNLOCK(la);
1082 	la = NULL;
1083 
1084 reply:
1085 	if (la != NULL) {
1086 		LLE_WUNLOCK(la);
1087 		la = NULL;
1088 	}
1089 	if (op != ARPOP_REQUEST) {
1090 		if (op == ARPOP_REPLY)
1091 			ARP_STATINC(ARP_STAT_RCVREPLY);
1092 		goto out;
1093 	}
1094 	ARP_STATINC(ARP_STAT_RCVREQUEST);
1095 	if (in_hosteq(itaddr, myaddr)) {
1096 		/* If our address is unusable, don't reply */
1097 		if (ia->ia4_flags & (IN_IFF_NOTREADY | IN_IFF_DETACHED))
1098 			goto out;
1099 		/* I am the target */
1100 		tha = ar_tha(ah);
1101 		if (tha)
1102 			memcpy(tha, ar_sha(ah), ah->ar_hln);
1103 		memcpy(ar_sha(ah), CLLADDR(ifp->if_sadl), ah->ar_hln);
1104 	} else {
1105 		/* Proxy ARP */
1106 		struct llentry *lle = NULL;
1107 		struct sockaddr_in sin;
1108 
1109 #if NCARP > 0
1110 		if (ifp->if_type == IFT_CARP) {
1111 			struct ifnet *_rcvif = m_get_rcvif(m, &s);
1112 			int iftype = 0;
1113 			if (__predict_true(_rcvif != NULL))
1114 				iftype = _rcvif->if_type;
1115 			m_put_rcvif(_rcvif, &s);
1116 			if (iftype != IFT_CARP)
1117 				goto out;
1118 		}
1119 #endif
1120 
1121 		tha = ar_tha(ah);
1122 
1123 		sockaddr_in_init(&sin, &itaddr, 0);
1124 
1125 		IF_AFDATA_RLOCK(ifp);
1126 		lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin);
1127 		IF_AFDATA_RUNLOCK(ifp);
1128 
1129 		if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
1130 			if (tha)
1131 				memcpy(tha, ar_sha(ah), ah->ar_hln);
1132 			memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln);
1133 			LLE_RUNLOCK(lle);
1134 		} else {
1135 			if (lle != NULL)
1136 				LLE_RUNLOCK(lle);
1137 			goto out;
1138 		}
1139 	}
1140 	ia4_release(ia, &psref_ia);
1141 
1142 	/*
1143 	 * XXX XXX: Here we're recycling the mbuf. But the mbuf could have
1144 	 * other mbufs in its chain, and just overwriting m->m_pkthdr.len
1145 	 * would be wrong in this case (the length becomes smaller than the
1146 	 * real chain size).
1147 	 *
1148 	 * This can theoretically cause bugs in the lower layers (drivers,
1149 	 * and L2encap), in some corner cases.
1150 	 */
1151 	memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
1152 	memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
1153 	ah->ar_op = htons(ARPOP_REPLY);
1154 	ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
1155 	switch (ifp->if_type) {
1156 	case IFT_IEEE1394:
1157 		/* ieee1394 arp reply is broadcast */
1158 		m->m_flags &= ~M_MCAST;
1159 		m->m_flags |= M_BCAST;
1160 		m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + ah->ar_hln;
1161 		break;
1162 	default:
1163 		m->m_flags &= ~(M_BCAST|M_MCAST); /* never reply by broadcast */
1164 		m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);
1165 		break;
1166 	}
1167 	m->m_pkthdr.len = m->m_len;
1168 	sa.sa_family = AF_ARP;
1169 	sa.sa_len = 2;
1170 	arps = ARP_STAT_GETREF();
1171 	arps[ARP_STAT_SNDTOTAL]++;
1172 	arps[ARP_STAT_SNDREPLY]++;
1173 	ARP_STAT_PUTREF();
1174 	if_output_lock(ifp, ifp, m, &sa, NULL);
1175 	if (rcvif != NULL)
1176 		m_put_rcvif_psref(rcvif, &psref);
1177 	return;
1178 
1179 out:
1180 	if (la != NULL)
1181 		LLE_WUNLOCK(la);
1182 	if (ia != NULL)
1183 		ia4_release(ia, &psref_ia);
1184 	if (rcvif != NULL)
1185 		m_put_rcvif_psref(rcvif, &psref);
1186 	m_freem(m);
1187 }
1188 
1189 /*
1190  * Lookup or a new address in arptab.
1191  */
1192 struct llentry *
1193 arplookup(struct ifnet *ifp, const struct in_addr *addr,
1194     const struct sockaddr *sa, int wlock)
1195 {
1196 	struct sockaddr_in sin;
1197 	struct llentry *la;
1198 	int flags = wlock ? LLE_EXCLUSIVE : 0;
1199 
1200 	if (sa == NULL) {
1201 		KASSERT(addr != NULL);
1202 		sockaddr_in_init(&sin, addr, 0);
1203 		sa = sintocsa(&sin);
1204 	}
1205 
1206 	IF_AFDATA_RLOCK(ifp);
1207 	la = lla_lookup(LLTABLE(ifp), flags, sa);
1208 	IF_AFDATA_RUNLOCK(ifp);
1209 
1210 	return la;
1211 }
1212 
1213 static struct llentry *
1214 arpcreate(struct ifnet *ifp, const struct in_addr *addr,
1215     const struct sockaddr *sa, int wlock)
1216 {
1217 	struct sockaddr_in sin;
1218 	struct llentry *la;
1219 	int flags = wlock ? LLE_EXCLUSIVE : 0;
1220 
1221 	if (sa == NULL) {
1222 		KASSERT(addr != NULL);
1223 		sockaddr_in_init(&sin, addr, 0);
1224 		sa = sintocsa(&sin);
1225 	}
1226 
1227 	la = arplookup(ifp, addr, sa, wlock);
1228 
1229 	if (la == NULL) {
1230 		struct rtentry *rt;
1231 
1232 		rt = rtalloc1(sa, 0);
1233 		IF_AFDATA_WLOCK(ifp);
1234 		la = lla_create(LLTABLE(ifp), flags, sa, rt);
1235 		IF_AFDATA_WUNLOCK(ifp);
1236 		if (rt != NULL)
1237 			rt_unref(rt);
1238 
1239 		if (la != NULL)
1240 			la->ln_state = ND_LLINFO_NOSTATE;
1241 	}
1242 
1243 	return la;
1244 }
1245 
1246 int
1247 arpioctl(u_long cmd, void *data)
1248 {
1249 
1250 	return EOPNOTSUPP;
1251 }
1252 
1253 void
1254 arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
1255 {
1256 	struct in_ifaddr *ia = (struct in_ifaddr *)ifa;
1257 
1258 	ifa->ifa_rtrequest = arp_rtrequest;
1259 	ifa->ifa_flags |= RTF_CONNECTED;
1260 
1261 	/* ARP will handle DAD for this address. */
1262 	if (in_nullhost(IA_SIN(ifa)->sin_addr)) {
1263 		if (ia->ia_dad_stop != NULL)	/* safety */
1264 			ia->ia_dad_stop(ifa);
1265 		ia->ia_dad_start = NULL;
1266 		ia->ia_dad_stop = NULL;
1267 		ia->ia4_flags &= ~IN_IFF_TENTATIVE;
1268 	} else {
1269 		ia->ia_dad_start = arp_dad_start;
1270 		ia->ia_dad_stop = arp_dad_stop;
1271 		if (ia->ia4_flags & IN_IFF_TRYTENTATIVE && ip_dad_enabled())
1272 			ia->ia4_flags |= IN_IFF_TENTATIVE;
1273 		else
1274 			arpannounce1(ifa);
1275 	}
1276 }
1277 
1278 static bool
1279 arp_nud_enabled(__unused struct ifnet *ifp)
1280 {
1281 
1282 	return arp_perform_nud != 0;
1283 }
1284 
1285 static unsigned int
1286 arp_llinfo_reachable(__unused struct ifnet *ifp)
1287 {
1288 
1289 	return arp_reachable;
1290 }
1291 
1292 static unsigned int
1293 arp_llinfo_retrans(__unused struct ifnet *ifp)
1294 {
1295 
1296 	return arp_retrans;
1297 }
1298 
1299 /*
1300  * Gets source address of the first packet in hold queue
1301  * and stores it in @src.
1302  * Returns pointer to @src (if hold queue is not empty) or NULL.
1303  */
1304 static union l3addr *
1305 arp_llinfo_holdsrc(struct llentry *ln, union l3addr *src)
1306 {
1307 	struct ip *ip;
1308 
1309 	if (ln == NULL || ln->ln_hold == NULL)
1310 		return NULL;
1311 
1312 	/*
1313 	 * assuming every packet in ln_hold has the same IP header
1314 	 */
1315 	ip = mtod(ln->ln_hold, struct ip *);
1316 	/* XXX pullup? */
1317 	if (sizeof(*ip) < ln->ln_hold->m_len)
1318 		src->addr4 = ip->ip_src;
1319 	else
1320 		src = NULL;
1321 
1322 	return src;
1323 }
1324 
1325 static void
1326 arp_llinfo_output(struct ifnet *ifp, __unused const union l3addr *daddr,
1327     const union l3addr *taddr, const uint8_t *tlladdr,
1328     const union l3addr *hsrc)
1329 {
1330 	struct in_addr tip = taddr->addr4, sip = zeroin_addr;
1331 	const uint8_t *slladdr = CLLADDR(ifp->if_sadl);
1332 
1333 	if (hsrc != NULL) {
1334 		struct in_ifaddr *ia;
1335 		struct psref psref;
1336 
1337 		ia = in_get_ia_on_iface_psref(hsrc->addr4, ifp, &psref);
1338 		if (ia != NULL) {
1339 			sip = hsrc->addr4;
1340 			ia4_release(ia, &psref);
1341 		}
1342 	}
1343 
1344 	if (sip.s_addr == INADDR_ANY) {
1345 		struct sockaddr_in dst;
1346 		struct rtentry *rt;
1347 
1348 		sockaddr_in_init(&dst, &tip, 0);
1349 		rt = rtalloc1(sintosa(&dst), 0);
1350 		if (rt != NULL) {
1351 			if (rt->rt_ifp == ifp &&
1352 			    rt->rt_ifa != NULL &&
1353 			    rt->rt_ifa->ifa_addr->sa_family == AF_INET)
1354 				sip = satosin(rt->rt_ifa->ifa_addr)->sin_addr;
1355 			rt_unref(rt);
1356 		}
1357 		if (sip.s_addr == INADDR_ANY) {
1358 			char ipbuf[INET_ADDRSTRLEN];
1359 
1360 			log(LOG_DEBUG, "source can't be "
1361 			    "determined: dst=%s\n",
1362 			    IN_PRINT(ipbuf, &tip));
1363 			return;
1364 		}
1365 	}
1366 
1367 	arprequest(ifp, &sip, &tip, slladdr, tlladdr);
1368 }
1369 
1370 
1371 static void
1372 arp_llinfo_missed(struct ifnet *ifp, const union l3addr *taddr,
1373     __unused int16_t type, struct mbuf *m)
1374 {
1375 	struct in_addr mdaddr = zeroin_addr;
1376 	struct sockaddr_in dsin, tsin;
1377 	struct sockaddr *sa;
1378 
1379 	if (m != NULL) {
1380 		struct ip *ip = mtod(m, struct ip *);
1381 
1382 		if (sizeof(*ip) < m->m_len)
1383 			mdaddr = ip->ip_src;
1384 
1385 		/* ip_input() will send ICMP_UNREACH_HOST, not us. */
1386 		m_freem(m);
1387 	}
1388 
1389 	if (mdaddr.s_addr != INADDR_ANY) {
1390 		sockaddr_in_init(&dsin, &mdaddr, 0);
1391 		sa = sintosa(&dsin);
1392 	} else
1393 		sa = NULL;
1394 
1395 	sockaddr_in_init(&tsin, &taddr->addr4, 0);
1396 	rt_clonedmsg(RTM_MISS, sa, sintosa(&tsin), NULL, ifp);
1397 }
1398 
1399 static void
1400 arp_free(struct llentry *ln, int gc)
1401 {
1402 	struct ifnet *ifp;
1403 
1404 	KASSERT(ln != NULL);
1405 	LLE_WLOCK_ASSERT(ln);
1406 
1407 	ifp = ln->lle_tbl->llt_ifp;
1408 
1409 	if (ln->la_flags & LLE_VALID || gc) {
1410 		struct sockaddr_in sin;
1411 		const char *lladdr;
1412 
1413 		sockaddr_in_init(&sin, &ln->r_l3addr.addr4, 0);
1414 		lladdr = ln->la_flags & LLE_VALID ?
1415 		    (const char *)&ln->ll_addr : NULL;
1416 		rt_clonedmsg(RTM_DELETE, NULL, sintosa(&sin), lladdr, ifp);
1417 	}
1418 
1419 	/*
1420 	 * Save to unlock. We still hold an extra reference and will not
1421 	 * free(9) in llentry_free() if someone else holds one as well.
1422 	 */
1423 	LLE_WUNLOCK(ln);
1424 	IF_AFDATA_LOCK(ifp);
1425 	LLE_WLOCK(ln);
1426 
1427 	lltable_free_entry(LLTABLE(ifp), ln);
1428 
1429 	IF_AFDATA_UNLOCK(ifp);
1430 }
1431 
1432 /*
1433  * Upper-layer reachability hint for Neighbor Unreachability Detection.
1434  *
1435  * XXX cost-effective methods?
1436  */
1437 void
1438 arp_nud_hint(struct rtentry *rt)
1439 {
1440 	struct llentry *ln;
1441 	struct ifnet *ifp;
1442 
1443 	if (rt == NULL)
1444 		return;
1445 
1446 	ifp = rt->rt_ifp;
1447 	ln = arplookup(ifp, NULL, rt_getkey(rt), 1);
1448 	nd_nud_hint(ln);
1449 }
1450 
1451 TAILQ_HEAD(dadq_head, dadq);
1452 struct dadq {
1453 	TAILQ_ENTRY(dadq) dad_list;
1454 	struct ifaddr *dad_ifa;
1455 	int dad_count;		/* max ARP to send */
1456 	int dad_arp_tcount;	/* # of trials to send ARP */
1457 	int dad_arp_ocount;	/* ARP sent so far */
1458 	int dad_arp_announce;	/* max ARP announcements */
1459 	int dad_arp_acount;	/* # of announcements */
1460 	struct callout dad_timer_ch;
1461 };
1462 
1463 static struct dadq_head dadq;
1464 static int dad_maxtry = 15;     /* max # of *tries* to transmit DAD packet */
1465 static kmutex_t arp_dad_lock;
1466 
1467 static void
1468 arp_dad_init(void)
1469 {
1470 
1471 	TAILQ_INIT(&dadq);
1472 	mutex_init(&arp_dad_lock, MUTEX_DEFAULT, IPL_NONE);
1473 }
1474 
1475 static struct dadq *
1476 arp_dad_find(struct ifaddr *ifa)
1477 {
1478 	struct dadq *dp;
1479 
1480 	KASSERT(mutex_owned(&arp_dad_lock));
1481 
1482 	TAILQ_FOREACH(dp, &dadq, dad_list) {
1483 		if (dp->dad_ifa == ifa)
1484 			return dp;
1485 	}
1486 	return NULL;
1487 }
1488 
1489 static void
1490 arp_dad_starttimer(struct dadq *dp, int ticks)
1491 {
1492 
1493 	callout_reset(&dp->dad_timer_ch, ticks,
1494 	    (void (*)(void *))arp_dad_timer, dp);
1495 }
1496 
1497 static void
1498 arp_dad_stoptimer(struct dadq *dp)
1499 {
1500 
1501 	KASSERT(mutex_owned(&arp_dad_lock));
1502 
1503 	TAILQ_REMOVE(&dadq, dp, dad_list);
1504 	/* Tell the timer that dp is being destroyed. */
1505 	dp->dad_ifa = NULL;
1506 	callout_halt(&dp->dad_timer_ch, &arp_dad_lock);
1507 }
1508 
1509 static void
1510 arp_dad_destroytimer(struct dadq *dp)
1511 {
1512 
1513 	callout_destroy(&dp->dad_timer_ch);
1514 	KASSERT(dp->dad_ifa == NULL);
1515 	kmem_intr_free(dp, sizeof(*dp));
1516 }
1517 
1518 static void
1519 arp_dad_output(struct dadq *dp, struct ifaddr *ifa)
1520 {
1521 	struct in_ifaddr *ia = (struct in_ifaddr *)ifa;
1522 	struct ifnet *ifp = ifa->ifa_ifp;
1523 	struct in_addr sip;
1524 
1525 	dp->dad_arp_tcount++;
1526 	if ((ifp->if_flags & IFF_UP) == 0)
1527 		return;
1528 	if ((ifp->if_flags & IFF_RUNNING) == 0)
1529 		return;
1530 
1531 	dp->dad_arp_tcount = 0;
1532 	dp->dad_arp_ocount++;
1533 
1534 	memset(&sip, 0, sizeof(sip));
1535 	arprequest(ifa->ifa_ifp, &sip, &ia->ia_addr.sin_addr,
1536 	    CLLADDR(ifa->ifa_ifp->if_sadl), NULL);
1537 }
1538 
1539 /*
1540  * Start Duplicate Address Detection (DAD) for specified interface address.
1541  */
1542 static void
1543 arp_dad_start(struct ifaddr *ifa)
1544 {
1545 	struct in_ifaddr *ia = (struct in_ifaddr *)ifa;
1546 	struct dadq *dp;
1547 	char ipbuf[INET_ADDRSTRLEN];
1548 
1549 	/*
1550 	 * If we don't need DAD, don't do it.
1551 	 * - DAD is disabled
1552 	 */
1553 	if (!(ia->ia4_flags & IN_IFF_TENTATIVE)) {
1554 		log(LOG_DEBUG,
1555 		    "%s: called with non-tentative address %s(%s)\n", __func__,
1556 		    IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
1557 		    ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1558 		return;
1559 	}
1560 	if (!ip_dad_enabled()) {
1561 		ia->ia4_flags &= ~IN_IFF_TENTATIVE;
1562 		rt_addrmsg(RTM_NEWADDR, ifa);
1563 		arpannounce1(ifa);
1564 		return;
1565 	}
1566 	KASSERT(ifa->ifa_ifp != NULL);
1567 	if (!(ifa->ifa_ifp->if_flags & IFF_UP))
1568 		return;
1569 
1570 	dp = kmem_intr_alloc(sizeof(*dp), KM_NOSLEEP);
1571 
1572 	mutex_enter(&arp_dad_lock);
1573 	if (arp_dad_find(ifa) != NULL) {
1574 		mutex_exit(&arp_dad_lock);
1575 		/* DAD already in progress */
1576 		if (dp != NULL)
1577 			kmem_intr_free(dp, sizeof(*dp));
1578 		return;
1579 	}
1580 
1581 	if (dp == NULL) {
1582 		mutex_exit(&arp_dad_lock);
1583 		log(LOG_ERR, "%s: memory allocation failed for %s(%s)\n",
1584 		    __func__, IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
1585 		    ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1586 		return;
1587 	}
1588 
1589 	/*
1590 	 * Send ARP packet for DAD, ip_dad_count times.
1591 	 * Note that we must delay the first transmission.
1592 	 */
1593 	callout_init(&dp->dad_timer_ch, CALLOUT_MPSAFE);
1594 	dp->dad_ifa = ifa;
1595 	ifaref(ifa);	/* just for safety */
1596 	dp->dad_count = ip_dad_count;
1597 	dp->dad_arp_announce = 0; /* Will be set when starting to announce */
1598 	dp->dad_arp_acount = dp->dad_arp_ocount = dp->dad_arp_tcount = 0;
1599 	TAILQ_INSERT_TAIL(&dadq, (struct dadq *)dp, dad_list);
1600 
1601 	ARPLOG(LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
1602 	    ARPLOGADDR(&ia->ia_addr.sin_addr));
1603 
1604 	arp_dad_starttimer(dp, cprng_fast32() % (PROBE_WAIT * hz));
1605 
1606 	mutex_exit(&arp_dad_lock);
1607 }
1608 
1609 /*
1610  * terminate DAD unconditionally.  used for address removals.
1611  */
1612 static void
1613 arp_dad_stop(struct ifaddr *ifa)
1614 {
1615 	struct dadq *dp;
1616 
1617 	mutex_enter(&arp_dad_lock);
1618 	dp = arp_dad_find(ifa);
1619 	if (dp == NULL) {
1620 		mutex_exit(&arp_dad_lock);
1621 		/* DAD wasn't started yet */
1622 		return;
1623 	}
1624 
1625 	arp_dad_stoptimer(dp);
1626 
1627 	mutex_exit(&arp_dad_lock);
1628 
1629 	arp_dad_destroytimer(dp);
1630 	ifafree(ifa);
1631 }
1632 
1633 static void
1634 arp_dad_timer(struct dadq *dp)
1635 {
1636 	struct ifaddr *ifa;
1637 	struct in_ifaddr *ia;
1638 	char ipbuf[INET_ADDRSTRLEN];
1639 	bool need_free = false;
1640 
1641 	KERNEL_LOCK_UNLESS_NET_MPSAFE();
1642 	mutex_enter(&arp_dad_lock);
1643 
1644 	ifa = dp->dad_ifa;
1645 	if (ifa == NULL) {
1646 		/* dp is being destroyed by someone.  Do nothing. */
1647 		goto done;
1648 	}
1649 
1650 	ia = (struct in_ifaddr *)ifa;
1651 	if (ia->ia4_flags & IN_IFF_DUPLICATED) {
1652 		log(LOG_ERR, "%s: called with duplicate address %s(%s)\n",
1653 		    __func__, IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
1654 		    ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1655 		goto done;
1656 	}
1657 	if ((ia->ia4_flags & IN_IFF_TENTATIVE) == 0 && dp->dad_arp_acount == 0)
1658 	{
1659 		log(LOG_ERR, "%s: called with non-tentative address %s(%s)\n",
1660 		    __func__, IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
1661 		    ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1662 		goto done;
1663 	}
1664 
1665 	/* timeouted with IFF_{RUNNING,UP} check */
1666 	if (dp->dad_arp_tcount > dad_maxtry) {
1667 		ARPLOG(LOG_INFO, "%s: could not run DAD, driver problem?\n",
1668 		    if_name(ifa->ifa_ifp));
1669 
1670 		arp_dad_stoptimer(dp);
1671 		need_free = true;
1672 		goto done;
1673 	}
1674 
1675 	/* Need more checks? */
1676 	if (dp->dad_arp_ocount < dp->dad_count) {
1677 		int adelay;
1678 
1679 		/*
1680 		 * We have more ARP to go.  Send ARP packet for DAD.
1681 		 */
1682 		arp_dad_output(dp, ifa);
1683 		if (dp->dad_arp_ocount < dp->dad_count)
1684 			adelay = (PROBE_MIN * hz) +
1685 			    (cprng_fast32() %
1686 			    ((PROBE_MAX * hz) - (PROBE_MIN * hz)));
1687 		else
1688 			adelay = ANNOUNCE_WAIT * hz;
1689 		arp_dad_starttimer(dp, adelay);
1690 		goto done;
1691 	} else if (dp->dad_arp_acount == 0) {
1692 		/*
1693 		 * We are done with DAD.
1694 		 * No duplicate address found.
1695 		 */
1696 		ia->ia4_flags &= ~IN_IFF_TENTATIVE;
1697 		rt_addrmsg(RTM_NEWADDR, ifa);
1698 		ARPLOG(LOG_DEBUG,
1699 		    "%s: DAD complete for %s - no duplicates found\n",
1700 		    if_name(ifa->ifa_ifp), ARPLOGADDR(&ia->ia_addr.sin_addr));
1701 		dp->dad_arp_announce = ANNOUNCE_NUM;
1702 		goto announce;
1703 	} else if (dp->dad_arp_acount < dp->dad_arp_announce) {
1704 announce:
1705 		/*
1706 		 * Announce the address.
1707 		 */
1708 		arpannounce1(ifa);
1709 		dp->dad_arp_acount++;
1710 		if (dp->dad_arp_acount < dp->dad_arp_announce) {
1711 			arp_dad_starttimer(dp, ANNOUNCE_INTERVAL * hz);
1712 			goto done;
1713 		}
1714 		ARPLOG(LOG_DEBUG,
1715 		    "%s: ARP announcement complete for %s\n",
1716 		    if_name(ifa->ifa_ifp), ARPLOGADDR(&ia->ia_addr.sin_addr));
1717 	}
1718 
1719 	arp_dad_stoptimer(dp);
1720 	need_free = true;
1721 done:
1722 	mutex_exit(&arp_dad_lock);
1723 
1724 	if (need_free) {
1725 		arp_dad_destroytimer(dp);
1726 		KASSERT(ifa != NULL);
1727 		ifafree(ifa);
1728 	}
1729 
1730 	KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
1731 }
1732 
1733 static void
1734 arp_dad_duplicated(struct ifaddr *ifa, const struct sockaddr_dl *from)
1735 {
1736 	struct in_ifaddr *ia = ifatoia(ifa);
1737 	struct ifnet *ifp = ifa->ifa_ifp;
1738 	char ipbuf[INET_ADDRSTRLEN], llabuf[LLA_ADDRSTRLEN];
1739 	const char *iastr, *llastr;
1740 
1741 	iastr = IN_PRINT(ipbuf, &ia->ia_addr.sin_addr);
1742 	if (__predict_false(from == NULL))
1743 		llastr = NULL;
1744 	else
1745 		llastr = lla_snprintf(llabuf, sizeof(llabuf),
1746 		    CLLADDR(from), from->sdl_alen);
1747 
1748 	if (ia->ia4_flags & (IN_IFF_TENTATIVE|IN_IFF_DUPLICATED)) {
1749 		log(LOG_ERR,
1750 		    "%s: DAD duplicate address %s from %s\n",
1751 		    if_name(ifp), iastr, llastr);
1752 	} else if (ia->ia_dad_defended == 0 ||
1753 		   ia->ia_dad_defended < time_uptime - DEFEND_INTERVAL) {
1754 		ia->ia_dad_defended = time_uptime;
1755 		arpannounce1(ifa);
1756 		log(LOG_ERR,
1757 		    "%s: DAD defended address %s from %s\n",
1758 		    if_name(ifp), iastr, llastr);
1759 		return;
1760 	} else {
1761 		/* If DAD is disabled, just report the duplicate. */
1762 		if (!ip_dad_enabled()) {
1763 			log(LOG_ERR,
1764 			    "%s: DAD ignoring duplicate address %s from %s\n",
1765 			    if_name(ifp), iastr, llastr);
1766 			return;
1767 		}
1768 		log(LOG_ERR,
1769 		    "%s: DAD defence failed for %s from %s\n",
1770 		    if_name(ifp), iastr, llastr);
1771 	}
1772 
1773 	arp_dad_stop(ifa);
1774 
1775 	ia->ia4_flags &= ~IN_IFF_TENTATIVE;
1776 	if ((ia->ia4_flags & IN_IFF_DUPLICATED) == 0) {
1777 		ia->ia4_flags |= IN_IFF_DUPLICATED;
1778 		/* Inform the routing socket of the duplicate address */
1779 		rt_addrmsg_src(RTM_NEWADDR, ifa, (const struct sockaddr *)from);
1780 	}
1781 }
1782 
1783 /*
1784  * Called from 10 Mb/s Ethernet interrupt handlers
1785  * when ether packet type ETHERTYPE_REVARP
1786  * is received.  Common length and type checks are done here,
1787  * then the protocol-specific routine is called.
1788  */
1789 void
1790 revarpinput(struct mbuf *m)
1791 {
1792 	struct arphdr *ar;
1793 	int arplen;
1794 
1795 	arplen = sizeof(struct arphdr);
1796 	if (m->m_len < arplen && (m = m_pullup(m, arplen)) == NULL)
1797 		return;
1798 	ar = mtod(m, struct arphdr *);
1799 
1800 	if (ntohs(ar->ar_hrd) == ARPHRD_IEEE1394) {
1801 		goto out;
1802 	}
1803 
1804 	arplen = sizeof(struct arphdr) + 2 * (ar->ar_hln + ar->ar_pln);
1805 	if (m->m_len < arplen && (m = m_pullup(m, arplen)) == NULL)
1806 		return;
1807 	ar = mtod(m, struct arphdr *);
1808 
1809 	switch (ntohs(ar->ar_pro)) {
1810 	case ETHERTYPE_IP:
1811 	case ETHERTYPE_IPTRAILERS:
1812 		in_revarpinput(m);
1813 		return;
1814 
1815 	default:
1816 		break;
1817 	}
1818 
1819 out:
1820 	m_freem(m);
1821 }
1822 
1823 /*
1824  * RARP for Internet protocols on 10 Mb/s Ethernet.
1825  * Algorithm is that given in RFC 903.
1826  * We are only using for bootstrap purposes to get an ip address for one of
1827  * our interfaces.  Thus we support no user-interface.
1828  *
1829  * Since the contents of the RARP reply are specific to the interface that
1830  * sent the request, this code must ensure that they are properly associated.
1831  *
1832  * Note: also supports ARP via RARP packets, per the RFC.
1833  */
1834 void
1835 in_revarpinput(struct mbuf *m)
1836 {
1837 	struct arphdr *ah;
1838 	void *tha;
1839 	int op;
1840 	struct ifnet *rcvif;
1841 	int s;
1842 
1843 	ah = mtod(m, struct arphdr *);
1844 	op = ntohs(ah->ar_op);
1845 
1846 	rcvif = m_get_rcvif(m, &s);
1847 	if (__predict_false(rcvif == NULL))
1848 		goto out;
1849 	if (rcvif->if_flags & IFF_NOARP)
1850 		goto out;
1851 
1852 	switch (rcvif->if_type) {
1853 	case IFT_IEEE1394:
1854 		/* ARP without target hardware address is not supported */
1855 		goto out;
1856 	default:
1857 		break;
1858 	}
1859 
1860 	switch (op) {
1861 	case ARPOP_REQUEST:
1862 	case ARPOP_REPLY:	/* per RFC */
1863 		m_put_rcvif(rcvif, &s);
1864 		in_arpinput(m);
1865 		return;
1866 	case ARPOP_REVREPLY:
1867 		break;
1868 	case ARPOP_REVREQUEST:	/* handled by rarpd(8) */
1869 	default:
1870 		goto out;
1871 	}
1872 	if (!revarp_in_progress)
1873 		goto out;
1874 	if (rcvif != myip_ifp) /* !same interface */
1875 		goto out;
1876 	if (myip_initialized)
1877 		goto wake;
1878 	tha = ar_tha(ah);
1879 	if (tha == NULL)
1880 		goto out;
1881 	if (ah->ar_pln != sizeof(struct in_addr))
1882 		goto out;
1883 	if (ah->ar_hln != rcvif->if_sadl->sdl_alen)
1884 		goto out;
1885 	if (memcmp(tha, CLLADDR(rcvif->if_sadl), rcvif->if_sadl->sdl_alen))
1886 		goto out;
1887 	memcpy(&srv_ip, ar_spa(ah), sizeof(srv_ip));
1888 	memcpy(&myip, ar_tpa(ah), sizeof(myip));
1889 	myip_initialized = 1;
1890 wake:	/* Do wakeup every time in case it was missed. */
1891 	wakeup((void *)&myip);
1892 
1893 out:
1894 	m_put_rcvif(rcvif, &s);
1895 	m_freem(m);
1896 }
1897 
1898 /*
1899  * Send a RARP request for the ip address of the specified interface.
1900  * The request should be RFC 903-compliant.
1901  */
1902 static void
1903 revarprequest(struct ifnet *ifp)
1904 {
1905 	struct sockaddr sa;
1906 	struct mbuf *m;
1907 	struct arphdr *ah;
1908 	void *tha;
1909 
1910 	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
1911 		return;
1912 	MCLAIM(m, &arpdomain.dom_mowner);
1913 	m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) +
1914 	    2*ifp->if_addrlen;
1915 	m->m_pkthdr.len = m->m_len;
1916 	m_align(m, m->m_len);
1917 	ah = mtod(m, struct arphdr *);
1918 	memset(ah, 0, m->m_len);
1919 	ah->ar_pro = htons(ETHERTYPE_IP);
1920 	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
1921 	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
1922 	ah->ar_op = htons(ARPOP_REVREQUEST);
1923 
1924 	memcpy(ar_sha(ah), CLLADDR(ifp->if_sadl), ah->ar_hln);
1925 	tha = ar_tha(ah);
1926 	if (tha == NULL) {
1927 		m_free(m);
1928 		return;
1929 	}
1930 	memcpy(tha, CLLADDR(ifp->if_sadl), ah->ar_hln);
1931 
1932 	sa.sa_family = AF_ARP;
1933 	sa.sa_len = 2;
1934 	m->m_flags |= M_BCAST;
1935 
1936 	if_output_lock(ifp, ifp, m, &sa, NULL);
1937 }
1938 
1939 /*
1940  * RARP for the ip address of the specified interface, but also
1941  * save the ip address of the server that sent the answer.
1942  * Timeout if no response is received.
1943  */
1944 int
1945 revarpwhoarewe(struct ifnet *ifp, struct in_addr *serv_in,
1946     struct in_addr *clnt_in)
1947 {
1948 	int result, count = 20;
1949 
1950 	myip_initialized = 0;
1951 	myip_ifp = ifp;
1952 
1953 	revarp_in_progress = 1;
1954 	while (count--) {
1955 		revarprequest(ifp);
1956 		result = tsleep((void *)&myip, PSOCK, "revarp", hz/2);
1957 		if (result != EWOULDBLOCK)
1958 			break;
1959 	}
1960 	revarp_in_progress = 0;
1961 
1962 	if (!myip_initialized)
1963 		return ENETUNREACH;
1964 
1965 	memcpy(serv_in, &srv_ip, sizeof(*serv_in));
1966 	memcpy(clnt_in, &myip, sizeof(*clnt_in));
1967 	return 0;
1968 }
1969 
1970 void
1971 arp_stat_add(int type, uint64_t count)
1972 {
1973 	ARP_STATADD(type, count);
1974 }
1975 
1976 static int
1977 sysctl_net_inet_arp_stats(SYSCTLFN_ARGS)
1978 {
1979 
1980 	return NETSTAT_SYSCTL(arpstat_percpu, ARP_NSTATS);
1981 }
1982 
1983 static void
1984 sysctl_net_inet_arp_setup(struct sysctllog **clog)
1985 {
1986 	const struct sysctlnode *node;
1987 
1988 	sysctl_createv(clog, 0, NULL, NULL,
1989 			CTLFLAG_PERMANENT,
1990 			CTLTYPE_NODE, "inet", NULL,
1991 			NULL, 0, NULL, 0,
1992 			CTL_NET, PF_INET, CTL_EOL);
1993 	sysctl_createv(clog, 0, NULL, &node,
1994 			CTLFLAG_PERMANENT,
1995 			CTLTYPE_NODE, "arp",
1996 			SYSCTL_DESCR("Address Resolution Protocol"),
1997 			NULL, 0, NULL, 0,
1998 			CTL_NET, PF_INET, CTL_CREATE, CTL_EOL);
1999 
2000 	sysctl_createv(clog, 0, NULL, NULL,
2001 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2002 		       CTLTYPE_INT, "nd_delay",
2003 		       SYSCTL_DESCR("First probe delay time"),
2004 		       NULL, 0, &arp_nd_domain.nd_delay, 0,
2005 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2006 	sysctl_createv(clog, 0, NULL, NULL,
2007 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2008 		       CTLTYPE_INT, "nd_bmaxtries",
2009 		       SYSCTL_DESCR("Number of broadcast discovery attempts"),
2010 		       NULL, 0, &arp_nd_domain.nd_mmaxtries, 0,
2011 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2012 	sysctl_createv(clog, 0, NULL, NULL,
2013 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2014 		       CTLTYPE_INT, "nd_umaxtries",
2015 		       SYSCTL_DESCR("Number of unicast discovery attempts"),
2016 		       NULL, 0, &arp_nd_domain.nd_umaxtries, 0,
2017 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2018 	sysctl_createv(clog, 0, NULL, NULL,
2019 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2020 		       CTLTYPE_INT, "nd_reachable",
2021 		       SYSCTL_DESCR("Reachable time"),
2022 		       NULL, 0, &arp_reachable, 0,
2023 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2024 	sysctl_createv(clog, 0, NULL, NULL,
2025 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2026 		       CTLTYPE_INT, "nd_retrans",
2027 		       SYSCTL_DESCR("Retransmission time"),
2028 		       NULL, 0, &arp_retrans, 0,
2029 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2030 	sysctl_createv(clog, 0, NULL, NULL,
2031 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2032 		       CTLTYPE_INT, "nd_nud",
2033 		       SYSCTL_DESCR("Perform neighbour unreachability detection"),
2034 		       NULL, 0, &arp_perform_nud, 0,
2035 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2036 	sysctl_createv(clog, 0, NULL, NULL,
2037 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2038 		       CTLTYPE_INT, "nd_maxnudhint",
2039 		       SYSCTL_DESCR("Maximum neighbor unreachable hint count"),
2040 		       NULL, 0, &arp_nd_domain.nd_maxnudhint, 0,
2041 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2042 	sysctl_createv(clog, 0, NULL, NULL,
2043 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2044 		       CTLTYPE_INT, "maxqueuelen",
2045 		       SYSCTL_DESCR("max packet queue len for a unresolved ARP"),
2046 		       NULL, 1, &arp_nd_domain.nd_maxqueuelen, 0,
2047 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2048 
2049 	sysctl_createv(clog, 0, NULL, NULL,
2050 			CTLFLAG_PERMANENT,
2051 			CTLTYPE_STRUCT, "stats",
2052 			SYSCTL_DESCR("ARP statistics"),
2053 			sysctl_net_inet_arp_stats, 0, NULL, 0,
2054 			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2055 
2056 	sysctl_createv(clog, 0, NULL, NULL,
2057 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2058 			CTLTYPE_INT, "log_movements",
2059 			SYSCTL_DESCR("log ARP replies from MACs different than"
2060 			    " the one in the cache"),
2061 			NULL, 0, &log_movements, 0,
2062 			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2063 
2064 	sysctl_createv(clog, 0, NULL, NULL,
2065 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2066 			CTLTYPE_INT, "log_permanent_modify",
2067 			SYSCTL_DESCR("log ARP replies from MACs different than"
2068 			    " the one in the permanent arp entry"),
2069 			NULL, 0, &log_permanent_modify, 0,
2070 			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2071 
2072 	sysctl_createv(clog, 0, NULL, NULL,
2073 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2074 			CTLTYPE_INT, "log_wrong_iface",
2075 			SYSCTL_DESCR("log ARP packets arriving on the wrong"
2076 			    " interface"),
2077 			NULL, 0, &log_wrong_iface, 0,
2078 			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2079 
2080 	sysctl_createv(clog, 0, NULL, NULL,
2081 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2082 			CTLTYPE_INT, "debug",
2083 			SYSCTL_DESCR("Enable ARP DAD debug output"),
2084 			NULL, 0, &arp_debug, 0,
2085 			CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2086 }
2087 
2088 #endif /* INET */
2089