xref: /netbsd-src/sys/netinet/if_arp.c (revision 4d342c046e3288fb5a1edcd33cfec48c41c80664)
1 /*	$NetBSD: if_arp.c,v 1.297 2020/09/15 10:05:36 roy Exp $	*/
2 
3 /*
4  * Copyright (c) 1998, 2000, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Public Access Networks Corporation ("Panix").  It was developed under
9  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if_ether.c	8.2 (Berkeley) 9/26/94
62  */
63 
64 /*
65  * Ethernet address resolution protocol.
66  * TODO:
67  *	add "inuse/lock" bit (or ref. count) along with valid bit
68  */
69 
70 #include <sys/cdefs.h>
71 __KERNEL_RCSID(0, "$NetBSD: if_arp.c,v 1.297 2020/09/15 10:05:36 roy Exp $");
72 
73 #ifdef _KERNEL_OPT
74 #include "opt_ddb.h"
75 #include "opt_inet.h"
76 #include "opt_net_mpsafe.h"
77 #endif
78 
79 #ifdef INET
80 
81 #include "arp.h"
82 #include "bridge.h"
83 
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/callout.h>
87 #include <sys/kmem.h>
88 #include <sys/mbuf.h>
89 #include <sys/socket.h>
90 #include <sys/time.h>
91 #include <sys/timetc.h>
92 #include <sys/kernel.h>
93 #include <sys/errno.h>
94 #include <sys/ioctl.h>
95 #include <sys/syslog.h>
96 #include <sys/proc.h>
97 #include <sys/protosw.h>
98 #include <sys/domain.h>
99 #include <sys/sysctl.h>
100 #include <sys/socketvar.h>
101 #include <sys/percpu.h>
102 #include <sys/cprng.h>
103 #include <sys/kmem.h>
104 
105 #include <net/ethertypes.h>
106 #include <net/if.h>
107 #include <net/if_dl.h>
108 #include <net/if_types.h>
109 #include <net/if_ether.h>
110 #include <net/if_llatbl.h>
111 #include <net/nd.h>
112 #include <net/route.h>
113 #include <net/net_stats.h>
114 
115 #include <netinet/in.h>
116 #include <netinet/in_systm.h>
117 #include <netinet/in_var.h>
118 #include <netinet/ip.h>
119 #include <netinet/if_inarp.h>
120 
121 #include "arcnet.h"
122 #if NARCNET > 0
123 #include <net/if_arc.h>
124 #endif
125 #include "carp.h"
126 #if NCARP > 0
127 #include <netinet/ip_carp.h>
128 #endif
129 
130 /*
131  * ARP trailer negotiation.  Trailer protocol is not IP specific,
132  * but ARP request/response use IP addresses.
133  */
134 #define ETHERTYPE_IPTRAILERS ETHERTYPE_TRAIL
135 
136 /* timers */
137 static int arp_reachable = REACHABLE_TIME;
138 static int arp_retrans = RETRANS_TIMER;
139 static int arp_perform_nud = 1;
140 
141 static bool arp_nud_enabled(struct ifnet *);
142 static unsigned int arp_llinfo_reachable(struct ifnet *);
143 static unsigned int arp_llinfo_retrans(struct ifnet *);
144 static union l3addr *arp_llinfo_holdsrc(struct llentry *, union l3addr *);
145 static void arp_llinfo_output(struct ifnet *, const union l3addr *,
146     const union l3addr *, const uint8_t *, const union l3addr *);
147 static void arp_llinfo_missed(struct ifnet *, const union l3addr *,
148     int16_t, struct mbuf *);
149 static void arp_free(struct llentry *, int);
150 
151 static struct nd_domain arp_nd_domain = {
152 	.nd_family = AF_INET,
153 	.nd_delay = 5,		/* delay first probe time 5 second */
154 	.nd_mmaxtries = 3,	/* maximum broadcast query */
155 	.nd_umaxtries = 3,	/* maximum unicast query */
156 	.nd_retransmultiple = BACKOFF_MULTIPLE,
157 	.nd_maxretrans = MAX_RETRANS_TIMER,
158 	.nd_maxnudhint = 0,	/* max # of subsequent upper layer hints */
159 	.nd_maxqueuelen = 1,	/* max # of packets in unresolved ND entries */
160 	.nd_nud_enabled = arp_nud_enabled,
161 	.nd_reachable = arp_llinfo_reachable,
162 	.nd_retrans = arp_llinfo_retrans,
163 	.nd_holdsrc = arp_llinfo_holdsrc,
164 	.nd_output = arp_llinfo_output,
165 	.nd_missed = arp_llinfo_missed,
166 	.nd_free = arp_free,
167 };
168 
169 int ip_dad_count = PROBE_NUM;
170 #ifdef ARP_DEBUG
171 int arp_debug = 1;
172 #else
173 int arp_debug = 0;
174 #endif
175 
176 static void arp_init(void);
177 static void arp_dad_init(void);
178 
179 static void arprequest(struct ifnet *,
180     const struct in_addr *, const struct in_addr *,
181     const uint8_t *, const uint8_t *);
182 static void arpannounce1(struct ifaddr *);
183 static struct sockaddr *arp_setgate(struct rtentry *, struct sockaddr *,
184     const struct sockaddr *);
185 static struct llentry *arpcreate(struct ifnet *,
186     const struct in_addr *, const struct sockaddr *, int);
187 static void in_arpinput(struct mbuf *);
188 static void in_revarpinput(struct mbuf *);
189 static void revarprequest(struct ifnet *);
190 
191 static void arp_drainstub(void);
192 
193 struct dadq;
194 static void arp_dad_timer(struct dadq *);
195 static void arp_dad_start(struct ifaddr *);
196 static void arp_dad_stop(struct ifaddr *);
197 static void arp_dad_duplicated(struct ifaddr *, const struct sockaddr_dl *);
198 
199 struct ifqueue arpintrq = {
200 	.ifq_head = NULL,
201 	.ifq_tail = NULL,
202 	.ifq_len = 0,
203 	.ifq_maxlen = 50,
204 	.ifq_drops = 0,
205 };
206 static int useloopback = 1;	/* use loopback interface for local traffic */
207 
208 static percpu_t *arpstat_percpu;
209 
210 #define	ARP_STAT_GETREF()	_NET_STAT_GETREF(arpstat_percpu)
211 #define	ARP_STAT_PUTREF()	_NET_STAT_PUTREF(arpstat_percpu)
212 
213 #define	ARP_STATINC(x)		_NET_STATINC(arpstat_percpu, x)
214 #define	ARP_STATADD(x, v)	_NET_STATADD(arpstat_percpu, x, v)
215 
216 /* revarp state */
217 static struct in_addr myip, srv_ip;
218 static int myip_initialized = 0;
219 static int revarp_in_progress = 0;
220 static struct ifnet *myip_ifp = NULL;
221 
222 static int arp_drainwanted;
223 
224 static int log_movements = 0;
225 static int log_permanent_modify = 1;
226 static int log_wrong_iface = 1;
227 
228 DOMAIN_DEFINE(arpdomain);	/* forward declare and add to link set */
229 
230 static void
231 arp_fasttimo(void)
232 {
233 	if (arp_drainwanted) {
234 		arp_drain();
235 		arp_drainwanted = 0;
236 	}
237 }
238 
239 static const struct protosw arpsw[] = {
240 	{
241 		.pr_type = 0,
242 		.pr_domain = &arpdomain,
243 		.pr_protocol = 0,
244 		.pr_flags = 0,
245 		.pr_input = 0,
246 		.pr_ctlinput = 0,
247 		.pr_ctloutput = 0,
248 		.pr_usrreqs = 0,
249 		.pr_init = arp_init,
250 		.pr_fasttimo = arp_fasttimo,
251 		.pr_slowtimo = 0,
252 		.pr_drain = arp_drainstub,
253 	}
254 };
255 
256 struct domain arpdomain = {
257 	.dom_family = PF_ARP,
258 	.dom_name = "arp",
259 	.dom_protosw = arpsw,
260 	.dom_protoswNPROTOSW = &arpsw[__arraycount(arpsw)],
261 #ifdef MBUFTRACE
262 	.dom_mowner = MOWNER_INIT("internet", "arp"),
263 #endif
264 };
265 
266 static void sysctl_net_inet_arp_setup(struct sysctllog **);
267 
268 void
269 arp_init(void)
270 {
271 
272 	sysctl_net_inet_arp_setup(NULL);
273 	arpstat_percpu = percpu_alloc(sizeof(uint64_t) * ARP_NSTATS);
274 	IFQ_LOCK_INIT(&arpintrq);
275 
276 #ifdef MBUFTRACE
277 	MOWNER_ATTACH(&arpdomain.dom_mowner);
278 #endif
279 
280 	nd_attach_domain(&arp_nd_domain);
281 	arp_dad_init();
282 }
283 
284 static void
285 arp_drainstub(void)
286 {
287 	arp_drainwanted = 1;
288 }
289 
290 /*
291  * ARP protocol drain routine.  Called when memory is in short supply.
292  * Called at splvm();  don't acquire softnet_lock as can be called from
293  * hardware interrupt handlers.
294  */
295 void
296 arp_drain(void)
297 {
298 
299 	lltable_drain(AF_INET);
300 }
301 
302 /*
303  * We set the gateway for RTF_CLONING routes to a "prototype"
304  * link-layer sockaddr whose interface type (if_type) and interface
305  * index (if_index) fields are prepared.
306  */
307 static struct sockaddr *
308 arp_setgate(struct rtentry *rt, struct sockaddr *gate,
309     const struct sockaddr *netmask)
310 {
311 	const struct ifnet *ifp = rt->rt_ifp;
312 	uint8_t namelen = strlen(ifp->if_xname);
313 	uint8_t addrlen = ifp->if_addrlen;
314 
315 	/*
316 	 * XXX: If this is a manually added route to interface
317 	 * such as older version of routed or gated might provide,
318 	 * restore cloning bit.
319 	 */
320 	if ((rt->rt_flags & RTF_HOST) == 0 && netmask != NULL &&
321 	    satocsin(netmask)->sin_addr.s_addr != 0xffffffff)
322 		rt->rt_flags |= RTF_CONNECTED;
323 
324 	if ((rt->rt_flags & (RTF_CONNECTED | RTF_LOCAL))) {
325 		union {
326 			struct sockaddr sa;
327 			struct sockaddr_storage ss;
328 			struct sockaddr_dl sdl;
329 		} u;
330 		/*
331 		 * Case 1: This route should come from a route to iface.
332 		 */
333 		sockaddr_dl_init(&u.sdl, sizeof(u.ss),
334 		    ifp->if_index, ifp->if_type, NULL, namelen, NULL, addrlen);
335 		rt_setgate(rt, &u.sa);
336 		gate = rt->rt_gateway;
337 	}
338 	return gate;
339 }
340 
341 /*
342  * Parallel to llc_rtrequest.
343  */
344 void
345 arp_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info)
346 {
347 	struct sockaddr *gate = rt->rt_gateway;
348 	struct in_ifaddr *ia;
349 	struct ifaddr *ifa;
350 	struct ifnet *ifp = rt->rt_ifp;
351 	int bound;
352 	int s;
353 
354 	if (req == RTM_LLINFO_UPD) {
355 		if ((ifa = info->rti_ifa) != NULL)
356 			arpannounce1(ifa);
357 		return;
358 	}
359 
360 	if ((rt->rt_flags & RTF_GATEWAY) != 0) {
361 		if (req != RTM_ADD)
362 			return;
363 
364 		/*
365 		 * linklayers with particular link MTU limitation.
366 		 */
367 		switch(ifp->if_type) {
368 #if NARCNET > 0
369 		case IFT_ARCNET:
370 		    {
371 			int arcipifmtu;
372 
373 			if (ifp->if_flags & IFF_LINK0)
374 				arcipifmtu = arc_ipmtu;
375 			else
376 				arcipifmtu = ARCMTU;
377 			if (ifp->if_mtu > arcipifmtu)
378 				rt->rt_rmx.rmx_mtu = arcipifmtu;
379 			break;
380 		    }
381 #endif
382 		}
383 		return;
384 	}
385 
386 	switch (req) {
387 	case RTM_SETGATE:
388 		gate = arp_setgate(rt, gate, info->rti_info[RTAX_NETMASK]);
389 		break;
390 	case RTM_ADD:
391 		gate = arp_setgate(rt, gate, info->rti_info[RTAX_NETMASK]);
392 		if (gate == NULL) {
393 			log(LOG_ERR, "%s: arp_setgate failed\n", __func__);
394 			break;
395 		}
396 		if ((rt->rt_flags & RTF_CONNECTED) ||
397 		    (rt->rt_flags & RTF_LOCAL)) {
398 			/*
399 			 * linklayers with particular link MTU limitation.
400 			 */
401 			switch (ifp->if_type) {
402 #if NARCNET > 0
403 			case IFT_ARCNET:
404 			    {
405 				int arcipifmtu;
406 				if (ifp->if_flags & IFF_LINK0)
407 					arcipifmtu = arc_ipmtu;
408 				else
409 					arcipifmtu = ARCMTU;
410 
411 				if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
412 				    (rt->rt_rmx.rmx_mtu > arcipifmtu ||
413 				     (rt->rt_rmx.rmx_mtu == 0 &&
414 				      ifp->if_mtu > arcipifmtu)))
415 					rt->rt_rmx.rmx_mtu = arcipifmtu;
416 				break;
417 			    }
418 #endif
419 			}
420 			if (rt->rt_flags & RTF_CONNECTED)
421 				break;
422 		}
423 
424 		bound = curlwp_bind();
425 		/* Announce a new entry if requested. */
426 		if (rt->rt_flags & RTF_ANNOUNCE) {
427 			struct psref psref;
428 			ia = in_get_ia_on_iface_psref(
429 			    satocsin(rt_getkey(rt))->sin_addr, ifp, &psref);
430 			if (ia != NULL) {
431 				arpannounce(ifp, &ia->ia_ifa,
432 				    CLLADDR(satocsdl(gate)));
433 				ia4_release(ia, &psref);
434 			}
435 		}
436 
437 		if (gate->sa_family != AF_LINK ||
438 		    gate->sa_len < sockaddr_dl_measure(0, ifp->if_addrlen)) {
439 			log(LOG_DEBUG, "%s: bad gateway value\n", __func__);
440 			goto out;
441 		}
442 
443 		satosdl(gate)->sdl_type = ifp->if_type;
444 		satosdl(gate)->sdl_index = ifp->if_index;
445 
446 		/*
447 		 * If the route is for a broadcast address mark it as such.
448 		 * This way we can avoid an expensive call to in_broadcast()
449 		 * in ip_output() most of the time (because the route passed
450 		 * to ip_output() is almost always a host route).
451 		 */
452 		if (rt->rt_flags & RTF_HOST &&
453 		    !(rt->rt_flags & RTF_BROADCAST) &&
454 		    in_broadcast(satocsin(rt_getkey(rt))->sin_addr, rt->rt_ifp))
455 			rt->rt_flags |= RTF_BROADCAST;
456 		/* There is little point in resolving the broadcast address */
457 		if (rt->rt_flags & RTF_BROADCAST)
458 			goto out;
459 
460 		/*
461 		 * When called from rt_ifa_addlocal, we cannot depend on that
462 		 * the address (rt_getkey(rt)) exits in the address list of the
463 		 * interface. So check RTF_LOCAL instead.
464 		 */
465 		if (rt->rt_flags & RTF_LOCAL) {
466 			if (useloopback) {
467 				rt->rt_ifp = lo0ifp;
468 				rt->rt_rmx.rmx_mtu = 0;
469 			}
470 			goto out;
471 		}
472 
473 		s = pserialize_read_enter();
474 		ia = in_get_ia_on_iface(satocsin(rt_getkey(rt))->sin_addr, ifp);
475 		if (ia == NULL) {
476 			pserialize_read_exit(s);
477 			goto out;
478 		}
479 
480 		if (useloopback) {
481 			rt->rt_ifp = lo0ifp;
482 			rt->rt_rmx.rmx_mtu = 0;
483 		}
484 		rt->rt_flags |= RTF_LOCAL;
485 
486 		if (ISSET(info->rti_flags, RTF_DONTCHANGEIFA)) {
487 			pserialize_read_exit(s);
488 			goto out;
489 		}
490 		/*
491 		 * make sure to set rt->rt_ifa to the interface
492 		 * address we are using, otherwise we will have trouble
493 		 * with source address selection.
494 		 */
495 		ifa = &ia->ia_ifa;
496 		if (ifa != rt->rt_ifa)
497 			/* Assume it doesn't sleep */
498 			rt_replace_ifa(rt, ifa);
499 		pserialize_read_exit(s);
500 	out:
501 		curlwp_bindx(bound);
502 		break;
503 	}
504 }
505 
506 /*
507  * Broadcast an ARP request. Caller specifies:
508  *	- arp header source ip address
509  *	- arp header target ip address
510  *	- arp header source ethernet address
511  */
512 static void
513 arprequest(struct ifnet *ifp,
514     const struct in_addr *sip, const struct in_addr *tip,
515     const uint8_t *saddr, const uint8_t *taddr)
516 {
517 	struct mbuf *m;
518 	struct arphdr *ah;
519 	struct sockaddr sa;
520 	uint64_t *arps;
521 
522 	KASSERT(sip != NULL);
523 	KASSERT(tip != NULL);
524 	KASSERT(saddr != NULL);
525 
526 	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
527 		return;
528 	MCLAIM(m, &arpdomain.dom_mowner);
529 	switch (ifp->if_type) {
530 	case IFT_IEEE1394:
531 		m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
532 		    ifp->if_addrlen;
533 		break;
534 	default:
535 		m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
536 		    2 * ifp->if_addrlen;
537 		break;
538 	}
539 	m->m_pkthdr.len = m->m_len;
540 	m_align(m, m->m_len);
541 	ah = mtod(m, struct arphdr *);
542 	memset(ah, 0, m->m_len);
543 	switch (ifp->if_type) {
544 	case IFT_IEEE1394:	/* RFC2734 */
545 		/* fill it now for ar_tpa computation */
546 		ah->ar_hrd = htons(ARPHRD_IEEE1394);
547 		break;
548 	default:
549 		/* ifp->if_output will fill ar_hrd */
550 		break;
551 	}
552 	ah->ar_pro = htons(ETHERTYPE_IP);
553 	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
554 	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
555 	ah->ar_op = htons(ARPOP_REQUEST);
556 	memcpy(ar_sha(ah), saddr, ah->ar_hln);
557 	if (taddr == NULL)
558 		m->m_flags |= M_BCAST;
559 	else
560 		memcpy(ar_tha(ah), taddr, ah->ar_hln);
561 	memcpy(ar_spa(ah), sip, ah->ar_pln);
562 	memcpy(ar_tpa(ah), tip, ah->ar_pln);
563 	sa.sa_family = AF_ARP;
564 	sa.sa_len = 2;
565 	arps = ARP_STAT_GETREF();
566 	arps[ARP_STAT_SNDTOTAL]++;
567 	arps[ARP_STAT_SENDREQUEST]++;
568 	ARP_STAT_PUTREF();
569 	if_output_lock(ifp, ifp, m, &sa, NULL);
570 }
571 
572 void
573 arpannounce(struct ifnet *ifp, struct ifaddr *ifa, const uint8_t *enaddr)
574 {
575 	struct in_ifaddr *ia = ifatoia(ifa);
576 	struct in_addr *ip = &IA_SIN(ifa)->sin_addr;
577 
578 	if (ia->ia4_flags & (IN_IFF_NOTREADY | IN_IFF_DETACHED)) {
579 		ARPLOG(LOG_DEBUG, "%s not ready\n", ARPLOGADDR(ip));
580 		return;
581 	}
582 	arprequest(ifp, ip, ip, enaddr, NULL);
583 }
584 
585 static void
586 arpannounce1(struct ifaddr *ifa)
587 {
588 
589 	arpannounce(ifa->ifa_ifp, ifa, CLLADDR(ifa->ifa_ifp->if_sadl));
590 }
591 
592 /*
593  * Resolve an IP address into an ethernet address.  If success, desten is
594  * filled in. If there is no entry in arptab, set one up and broadcast a
595  * request for the IP address. Hold onto this mbuf and resend it once the
596  * address is finally resolved.
597  *
598  * A return value of 0 indicates that desten has been filled in and the packet
599  * should be sent normally; a return value of EWOULDBLOCK indicates that the
600  * packet has been held pending resolution. Any other value indicates an
601  * error.
602  */
603 int
604 arpresolve(struct ifnet *ifp, const struct rtentry *rt, struct mbuf *m,
605     const struct sockaddr *dst, void *desten, size_t destlen)
606 {
607 	struct llentry *la;
608 	const char *create_lookup;
609 	int error;
610 
611 #if NCARP > 0
612 	if (rt != NULL && rt->rt_ifp->if_type == IFT_CARP)
613 		ifp = rt->rt_ifp;
614 #endif
615 
616 	KASSERT(m != NULL);
617 
618 	la = arplookup(ifp, NULL, dst, 0);
619 	if (la == NULL)
620 		goto notfound;
621 
622 	if (la->la_flags & LLE_VALID && la->ln_state == ND_LLINFO_REACHABLE) {
623 		KASSERT(destlen >= ifp->if_addrlen);
624 		memcpy(desten, &la->ll_addr, ifp->if_addrlen);
625 		LLE_RUNLOCK(la);
626 		return 0;
627 	}
628 
629 notfound:
630 	if (ifp->if_flags & IFF_NOARP) {
631 		if (la != NULL)
632 			LLE_RUNLOCK(la);
633 		error = ENOTSUP;
634 		goto bad;
635 	}
636 
637 	if (la == NULL) {
638 		struct rtentry *_rt;
639 
640 		create_lookup = "create";
641 		_rt = rtalloc1(dst, 0);
642 		IF_AFDATA_WLOCK(ifp);
643 		la = lla_create(LLTABLE(ifp), LLE_EXCLUSIVE, dst, _rt);
644 		IF_AFDATA_WUNLOCK(ifp);
645 		if (_rt != NULL)
646 			rt_unref(_rt);
647 		if (la == NULL)
648 			ARP_STATINC(ARP_STAT_ALLOCFAIL);
649 		else
650 			la->ln_state = ND_LLINFO_NOSTATE;
651 	} else if (LLE_TRY_UPGRADE(la) == 0) {
652 		create_lookup = "lookup";
653 		LLE_RUNLOCK(la);
654 		IF_AFDATA_RLOCK(ifp);
655 		la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
656 		IF_AFDATA_RUNLOCK(ifp);
657 	}
658 
659 	error = EINVAL;
660 	if (la == NULL) {
661 		log(LOG_DEBUG,
662 		    "%s: failed to %s llentry for %s on %s\n",
663 		    __func__, create_lookup, inet_ntoa(satocsin(dst)->sin_addr),
664 		    ifp->if_xname);
665 		goto bad;
666 	}
667 
668 	error = nd_resolve(la, rt, m, desten, destlen);
669 	return error;
670 
671 bad:
672 	m_freem(m);
673 	return error;
674 }
675 
676 /*
677  * Common length and type checks are done here,
678  * then the protocol-specific routine is called.
679  */
680 void
681 arpintr(void)
682 {
683 	struct mbuf *m;
684 	struct arphdr *ar;
685 	int s;
686 	int arplen;
687 
688 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
689 	for (;;) {
690 		struct ifnet *rcvif;
691 
692 		IFQ_LOCK(&arpintrq);
693 		IF_DEQUEUE(&arpintrq, m);
694 		IFQ_UNLOCK(&arpintrq);
695 		if (m == NULL)
696 			goto out;
697 		if ((m->m_flags & M_PKTHDR) == 0)
698 			panic("arpintr");
699 
700 		MCLAIM(m, &arpdomain.dom_mowner);
701 		ARP_STATINC(ARP_STAT_RCVTOTAL);
702 
703 		arplen = sizeof(struct arphdr);
704 		if (m->m_len < arplen && (m = m_pullup(m, arplen)) == NULL)
705 			goto badlen;
706 		ar = mtod(m, struct arphdr *);
707 
708 		rcvif = m_get_rcvif(m, &s);
709 		if (__predict_false(rcvif == NULL)) {
710 			ARP_STATINC(ARP_STAT_RCVNOINT);
711 			goto free;
712 		}
713 
714 		/*
715 		 * We don't want non-IEEE1394 ARP packets on IEEE1394
716 		 * interfaces, and vice versa. Our life depends on that.
717 		 */
718 		switch (rcvif->if_type) {
719 		case IFT_IEEE1394:
720 			if (ntohs(ar->ar_hrd) != ARPHRD_IEEE1394) {
721 				m_put_rcvif(rcvif, &s);
722 				ARP_STATINC(ARP_STAT_RCVBADPROTO);
723 				goto free;
724 			}
725 
726 			arplen = sizeof(struct arphdr) +
727 			    ar->ar_hln + 2 * ar->ar_pln;
728 			break;
729 		default:
730 			if (ntohs(ar->ar_hrd) == ARPHRD_IEEE1394) {
731 				m_put_rcvif(rcvif, &s);
732 				ARP_STATINC(ARP_STAT_RCVBADPROTO);
733 				goto free;
734 			}
735 
736 			arplen = sizeof(struct arphdr) +
737 			    2 * ar->ar_hln + 2 * ar->ar_pln;
738 			break;
739 		}
740 
741 		m_put_rcvif(rcvif, &s);
742 
743 		if (m->m_len < arplen && (m = m_pullup(m, arplen)) == NULL)
744 			goto badlen;
745 		ar = mtod(m, struct arphdr *);
746 
747 		switch (ntohs(ar->ar_pro)) {
748 		case ETHERTYPE_IP:
749 		case ETHERTYPE_IPTRAILERS:
750 			in_arpinput(m);
751 			continue;
752 		default:
753 			ARP_STATINC(ARP_STAT_RCVBADPROTO);
754 			goto free;
755 		}
756 
757 badlen:
758 		ARP_STATINC(ARP_STAT_RCVBADLEN);
759 free:
760 		m_freem(m);
761 	}
762 
763 out:
764 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
765 	return; /* XXX gcc */
766 }
767 
768 /*
769  * ARP for Internet protocols on 10 Mb/s Ethernet. Algorithm is that given in
770  * RFC 826. In addition, a sanity check is performed on the sender protocol
771  * address, to catch impersonators.
772  *
773  * We no longer handle negotiations for use of trailer protocol: formerly, ARP
774  * replied for protocol type ETHERTYPE_TRAIL sent along with IP replies if we
775  * wanted trailers sent to us, and also sent them in response to IP replies.
776  * This allowed either end to announce the desire to receive trailer packets.
777  *
778  * We no longer reply to requests for ETHERTYPE_TRAIL protocol either, but
779  * formerly didn't normally send requests.
780  */
781 static void
782 in_arpinput(struct mbuf *m)
783 {
784 	struct arphdr *ah;
785 	struct ifnet *ifp, *rcvif = NULL;
786 	struct llentry *la = NULL;
787 	struct in_ifaddr *ia = NULL;
788 #if NBRIDGE > 0
789 	struct in_ifaddr *bridge_ia = NULL;
790 #endif
791 #if NCARP > 0
792 	uint32_t count = 0, index = 0;
793 #endif
794 	struct sockaddr sa;
795 	struct in_addr isaddr, itaddr, myaddr;
796 	int op, rt_cmd, new_state = 0;
797 	void *tha;
798 	uint64_t *arps;
799 	struct psref psref, psref_ia;
800 	int s;
801 	char ipbuf[INET_ADDRSTRLEN];
802 	bool find_source, do_dad;
803 
804 	if (__predict_false(m_makewritable(&m, 0, m->m_pkthdr.len, M_DONTWAIT)))
805 		goto out;
806 	ah = mtod(m, struct arphdr *);
807 	op = ntohs(ah->ar_op);
808 
809 	if (ah->ar_pln != sizeof(struct in_addr))
810 		goto out;
811 
812 	ifp = if_get_bylla(ar_sha(ah), ah->ar_hln, &psref);
813 	if (ifp) {
814 		/* it's from me, ignore it. */
815 		if_put(ifp, &psref);
816 		ARP_STATINC(ARP_STAT_RCVLOCALSHA);
817 		goto out;
818 	}
819 
820 	rcvif = ifp = m_get_rcvif_psref(m, &psref);
821 	if (__predict_false(rcvif == NULL))
822 		goto out;
823 	if (rcvif->if_flags & IFF_NOARP)
824 		goto out;
825 
826 	memcpy(&isaddr, ar_spa(ah), sizeof(isaddr));
827 	memcpy(&itaddr, ar_tpa(ah), sizeof(itaddr));
828 
829 	if (m->m_flags & (M_BCAST|M_MCAST))
830 		ARP_STATINC(ARP_STAT_RCVMCAST);
831 
832 	/*
833 	 * Search for a matching interface address
834 	 * or any address on the interface to use
835 	 * as a dummy address in the rest of this function.
836 	 *
837 	 * First try and find the source address for early
838 	 * duplicate address detection.
839 	 */
840 	if (in_nullhost(isaddr)) {
841 		if (in_nullhost(itaddr)) /* very bogus ARP */
842 			goto out;
843 		find_source = false;
844 		myaddr = itaddr;
845 	} else {
846 		find_source = true;
847 		myaddr = isaddr;
848 	}
849 	s = pserialize_read_enter();
850 again:
851 	IN_ADDRHASH_READER_FOREACH(ia, myaddr.s_addr) {
852 		if (!in_hosteq(ia->ia_addr.sin_addr, myaddr))
853 			continue;
854 #if NCARP > 0
855 		if (ia->ia_ifp->if_type == IFT_CARP &&
856 		    ((ia->ia_ifp->if_flags & (IFF_UP|IFF_RUNNING)) ==
857 		    (IFF_UP|IFF_RUNNING))) {
858 			index++;
859 			/* XXX: ar_hln? */
860 			if (ia->ia_ifp == rcvif && (ah->ar_hln >= 6) &&
861 			    carp_iamatch(ia, ar_sha(ah),
862 			    &count, index)) {
863 				break;
864 			}
865 		} else
866 #endif
867 		if (ia->ia_ifp == rcvif)
868 			break;
869 #if NBRIDGE > 0
870 		/*
871 		 * If the interface we received the packet on
872 		 * is part of a bridge, check to see if we need
873 		 * to "bridge" the packet to ourselves at this
874 		 * layer.  Note we still prefer a perfect match,
875 		 * but allow this weaker match if necessary.
876 		 */
877 		if (rcvif->if_bridge != NULL &&
878 		    rcvif->if_bridge == ia->ia_ifp->if_bridge)
879 			bridge_ia = ia;
880 #endif
881 	}
882 
883 #if NBRIDGE > 0
884 	if (ia == NULL && bridge_ia != NULL) {
885 		ia = bridge_ia;
886 		m_put_rcvif_psref(rcvif, &psref);
887 		rcvif = NULL;
888 		/* FIXME */
889 		ifp = bridge_ia->ia_ifp;
890 	}
891 #endif
892 
893 	/* If we failed to find the source address then find
894 	 * the target address. */
895 	if (ia == NULL && find_source && !in_nullhost(itaddr)) {
896 		find_source = false;
897 		myaddr = itaddr;
898 		goto again;
899 	}
900 
901 	if (ia != NULL)
902 		ia4_acquire(ia, &psref_ia);
903 	pserialize_read_exit(s);
904 
905 	if (ah->ar_hln != ifp->if_addrlen) {
906 		ARP_STATINC(ARP_STAT_RCVBADLEN);
907 		log(LOG_WARNING,
908 		    "arp from %s: addr len: new %d, i/f %d (ignored)\n",
909 		    IN_PRINT(ipbuf, &isaddr), ah->ar_hln, ifp->if_addrlen);
910 		goto out;
911 	}
912 
913 	/* Only do DaD if we have a matching address. */
914 	do_dad = (ia != NULL);
915 
916 	if (ia == NULL) {
917 		ia = in_get_ia_on_iface_psref(isaddr, rcvif, &psref_ia);
918 		if (ia == NULL) {
919 			ia = in_get_ia_from_ifp_psref(ifp, &psref_ia);
920 			if (ia == NULL) {
921 				ARP_STATINC(ARP_STAT_RCVNOINT);
922 				goto out;
923 			}
924 		}
925 	}
926 
927 	myaddr = ia->ia_addr.sin_addr;
928 
929 	/* XXX checks for bridge case? */
930 	if (!memcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
931 		ARP_STATINC(ARP_STAT_RCVBCASTSHA);
932 		log(LOG_ERR,
933 		    "%s: arp: link address is broadcast for IP address %s!\n",
934 		    ifp->if_xname, IN_PRINT(ipbuf, &isaddr));
935 		goto out;
936 	}
937 
938 	/*
939 	 * If the source IP address is zero, this is an RFC 5227 ARP probe
940 	 */
941 	if (in_nullhost(isaddr))
942 		ARP_STATINC(ARP_STAT_RCVZEROSPA);
943 	else if (in_hosteq(isaddr, myaddr))
944 		ARP_STATINC(ARP_STAT_RCVLOCALSPA);
945 
946 	if (in_nullhost(itaddr))
947 		ARP_STATINC(ARP_STAT_RCVZEROTPA);
948 
949 	/*
950 	 * DAD check, RFC 5227.
951 	 * Collision on sender address is always a duplicate.
952 	 * Collision on target address is only a duplicate
953 	 * IF the sender address is the null host (ie a DAD probe)
954 	 * AND the message was broadcast
955 	 * AND our address is either tentative or duplicated
956 	 * If it was unicast then it's a valid Unicast Poll from RFC 1122.
957 	 */
958 	if (do_dad &&
959 	    (in_hosteq(isaddr, myaddr) ||
960 	    (in_nullhost(isaddr) && in_hosteq(itaddr, myaddr) &&
961 	     m->m_flags & M_BCAST &&
962 	     ia->ia4_flags & (IN_IFF_TENTATIVE | IN_IFF_DUPLICATED))))
963 	{
964 		struct sockaddr_dl sdl, *sdlp;
965 
966 		sdlp = sockaddr_dl_init(&sdl, sizeof(sdl),
967 		    ifp->if_index, ifp->if_type,
968 		    NULL, 0, ar_sha(ah), ah->ar_hln);
969 		arp_dad_duplicated((struct ifaddr *)ia, sdlp);
970 		goto out;
971 	}
972 
973 	/*
974 	 * If the target IP address is zero, ignore the packet.
975 	 * This prevents the code below from trying to answer
976 	 * when we are using IP address zero (booting).
977 	 */
978 	if (in_nullhost(itaddr))
979 		goto out;
980 
981 	if (in_nullhost(isaddr))
982 		goto reply;
983 
984 	if (in_hosteq(itaddr, myaddr))
985 		la = arpcreate(ifp, &isaddr, NULL, 1);
986 	else
987 		la = arplookup(ifp, &isaddr, NULL, 1);
988 	if (la == NULL)
989 		goto reply;
990 
991 	if ((la->la_flags & LLE_VALID) &&
992 	    memcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen))
993 	{
994 		char llabuf[LLA_ADDRSTRLEN], *llastr;
995 
996 		llastr = lla_snprintf(llabuf, sizeof(llabuf),
997 		    ar_sha(ah), ah->ar_hln);
998 
999 		if (la->la_flags & LLE_STATIC) {
1000 			ARP_STATINC(ARP_STAT_RCVOVERPERM);
1001 			if (!log_permanent_modify)
1002 				goto out;
1003 			log(LOG_INFO,
1004 			    "%s tried to overwrite permanent arp info"
1005 			    " for %s\n", llastr, IN_PRINT(ipbuf, &isaddr));
1006 			goto out;
1007 		} else if (la->lle_tbl->llt_ifp != ifp) {
1008 			/* XXX should not happen? */
1009 			ARP_STATINC(ARP_STAT_RCVOVERINT);
1010 			if (!log_wrong_iface)
1011 				goto out;
1012 			log(LOG_INFO,
1013 			    "%s on %s tried to overwrite "
1014 			    "arp info for %s on %s\n",
1015 			    llastr,
1016 			    ifp->if_xname, IN_PRINT(ipbuf, &isaddr),
1017 			    la->lle_tbl->llt_ifp->if_xname);
1018 				goto out;
1019 		} else {
1020 			ARP_STATINC(ARP_STAT_RCVOVER);
1021 			if (log_movements)
1022 				log(LOG_INFO, "arp info overwritten "
1023 				    "for %s by %s\n",
1024 				    IN_PRINT(ipbuf, &isaddr), llastr);
1025 		}
1026 		rt_cmd = RTM_CHANGE;
1027 		new_state = ND_LLINFO_STALE;
1028 	} else {
1029 		if (op == ARPOP_REPLY && in_hosteq(itaddr, myaddr)) {
1030 			/* This was a solicited ARP reply. */
1031 			la->ln_byhint = 0;
1032 			new_state = ND_LLINFO_REACHABLE;
1033 		}
1034 		rt_cmd = la->la_flags & LLE_VALID ? 0 : RTM_ADD;
1035 	}
1036 
1037 	KASSERT(ifp->if_sadl->sdl_alen == ifp->if_addrlen);
1038 
1039 	KASSERT(sizeof(la->ll_addr) >= ifp->if_addrlen);
1040 	memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
1041 	la->la_flags |= LLE_VALID;
1042 	la->ln_asked = 0;
1043 	if (new_state != 0) {
1044 		la->ln_state = new_state;
1045 
1046 		if (new_state != ND_LLINFO_REACHABLE ||
1047 		    !(la->la_flags & LLE_STATIC))
1048 		{
1049 			int timer = ND_TIMER_GC;
1050 
1051 			if (new_state == ND_LLINFO_REACHABLE)
1052 				timer = ND_TIMER_REACHABLE;
1053 			nd_set_timer(la, timer);
1054 		}
1055 	}
1056 
1057 	if (rt_cmd != 0) {
1058 		struct sockaddr_in sin;
1059 
1060 		sockaddr_in_init(&sin, &la->r_l3addr.addr4, 0);
1061 		rt_clonedmsg(rt_cmd, NULL, sintosa(&sin), ar_sha(ah), ifp);
1062 	}
1063 
1064 	if (la->la_hold != NULL) {
1065 		int n = la->la_numheld;
1066 		struct mbuf *m_hold, *m_hold_next;
1067 		struct sockaddr_in sin;
1068 
1069 		sockaddr_in_init(&sin, &la->r_l3addr.addr4, 0);
1070 
1071 		m_hold = la->la_hold;
1072 		la->la_hold = NULL;
1073 		la->la_numheld = 0;
1074 		/*
1075 		 * We have to unlock here because if_output would call
1076 		 * arpresolve
1077 		 */
1078 		LLE_WUNLOCK(la);
1079 		ARP_STATADD(ARP_STAT_DFRSENT, n);
1080 		ARP_STATADD(ARP_STAT_DFRTOTAL, n);
1081 		for (; m_hold != NULL; m_hold = m_hold_next) {
1082 			m_hold_next = m_hold->m_nextpkt;
1083 			m_hold->m_nextpkt = NULL;
1084 			if_output_lock(ifp, ifp, m_hold, sintosa(&sin), NULL);
1085 		}
1086 	} else
1087 		LLE_WUNLOCK(la);
1088 	la = NULL;
1089 
1090 reply:
1091 	if (la != NULL) {
1092 		LLE_WUNLOCK(la);
1093 		la = NULL;
1094 	}
1095 	if (op != ARPOP_REQUEST) {
1096 		if (op == ARPOP_REPLY)
1097 			ARP_STATINC(ARP_STAT_RCVREPLY);
1098 		goto out;
1099 	}
1100 	ARP_STATINC(ARP_STAT_RCVREQUEST);
1101 	if (in_hosteq(itaddr, myaddr)) {
1102 		/* If our address is unusable, don't reply */
1103 		if (ia->ia4_flags & (IN_IFF_NOTREADY | IN_IFF_DETACHED))
1104 			goto out;
1105 		/* I am the target */
1106 		tha = ar_tha(ah);
1107 		if (tha)
1108 			memcpy(tha, ar_sha(ah), ah->ar_hln);
1109 		memcpy(ar_sha(ah), CLLADDR(ifp->if_sadl), ah->ar_hln);
1110 	} else {
1111 		/* Proxy ARP */
1112 		struct llentry *lle = NULL;
1113 		struct sockaddr_in sin;
1114 
1115 #if NCARP > 0
1116 		if (ifp->if_type == IFT_CARP) {
1117 			struct ifnet *_rcvif = m_get_rcvif(m, &s);
1118 			int iftype = 0;
1119 			if (__predict_true(_rcvif != NULL))
1120 				iftype = _rcvif->if_type;
1121 			m_put_rcvif(_rcvif, &s);
1122 			if (iftype != IFT_CARP)
1123 				goto out;
1124 		}
1125 #endif
1126 
1127 		tha = ar_tha(ah);
1128 
1129 		sockaddr_in_init(&sin, &itaddr, 0);
1130 
1131 		IF_AFDATA_RLOCK(ifp);
1132 		lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin);
1133 		IF_AFDATA_RUNLOCK(ifp);
1134 
1135 		if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
1136 			if (tha)
1137 				memcpy(tha, ar_sha(ah), ah->ar_hln);
1138 			memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln);
1139 			LLE_RUNLOCK(lle);
1140 		} else {
1141 			if (lle != NULL)
1142 				LLE_RUNLOCK(lle);
1143 			goto out;
1144 		}
1145 	}
1146 	ia4_release(ia, &psref_ia);
1147 
1148 	/*
1149 	 * XXX XXX: Here we're recycling the mbuf. But the mbuf could have
1150 	 * other mbufs in its chain, and just overwriting m->m_pkthdr.len
1151 	 * would be wrong in this case (the length becomes smaller than the
1152 	 * real chain size).
1153 	 *
1154 	 * This can theoretically cause bugs in the lower layers (drivers,
1155 	 * and L2encap), in some corner cases.
1156 	 */
1157 	memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
1158 	memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
1159 	ah->ar_op = htons(ARPOP_REPLY);
1160 	ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
1161 	switch (ifp->if_type) {
1162 	case IFT_IEEE1394:
1163 		/* ieee1394 arp reply is broadcast */
1164 		m->m_flags &= ~M_MCAST;
1165 		m->m_flags |= M_BCAST;
1166 		m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + ah->ar_hln;
1167 		break;
1168 	default:
1169 		m->m_flags &= ~(M_BCAST|M_MCAST); /* never reply by broadcast */
1170 		m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);
1171 		break;
1172 	}
1173 	m->m_pkthdr.len = m->m_len;
1174 	sa.sa_family = AF_ARP;
1175 	sa.sa_len = 2;
1176 	arps = ARP_STAT_GETREF();
1177 	arps[ARP_STAT_SNDTOTAL]++;
1178 	arps[ARP_STAT_SNDREPLY]++;
1179 	ARP_STAT_PUTREF();
1180 	if_output_lock(ifp, ifp, m, &sa, NULL);
1181 	if (rcvif != NULL)
1182 		m_put_rcvif_psref(rcvif, &psref);
1183 	return;
1184 
1185 out:
1186 	if (la != NULL)
1187 		LLE_WUNLOCK(la);
1188 	if (ia != NULL)
1189 		ia4_release(ia, &psref_ia);
1190 	if (rcvif != NULL)
1191 		m_put_rcvif_psref(rcvif, &psref);
1192 	m_freem(m);
1193 }
1194 
1195 /*
1196  * Lookup or a new address in arptab.
1197  */
1198 struct llentry *
1199 arplookup(struct ifnet *ifp, const struct in_addr *addr,
1200     const struct sockaddr *sa, int wlock)
1201 {
1202 	struct sockaddr_in sin;
1203 	struct llentry *la;
1204 	int flags = wlock ? LLE_EXCLUSIVE : 0;
1205 
1206 	if (sa == NULL) {
1207 		KASSERT(addr != NULL);
1208 		sockaddr_in_init(&sin, addr, 0);
1209 		sa = sintocsa(&sin);
1210 	}
1211 
1212 	IF_AFDATA_RLOCK(ifp);
1213 	la = lla_lookup(LLTABLE(ifp), flags, sa);
1214 	IF_AFDATA_RUNLOCK(ifp);
1215 
1216 	return la;
1217 }
1218 
1219 static struct llentry *
1220 arpcreate(struct ifnet *ifp, const struct in_addr *addr,
1221     const struct sockaddr *sa, int wlock)
1222 {
1223 	struct sockaddr_in sin;
1224 	struct llentry *la;
1225 	int flags = wlock ? LLE_EXCLUSIVE : 0;
1226 
1227 	if (sa == NULL) {
1228 		KASSERT(addr != NULL);
1229 		sockaddr_in_init(&sin, addr, 0);
1230 		sa = sintocsa(&sin);
1231 	}
1232 
1233 	la = arplookup(ifp, addr, sa, wlock);
1234 
1235 	if (la == NULL) {
1236 		struct rtentry *rt;
1237 
1238 		rt = rtalloc1(sa, 0);
1239 		IF_AFDATA_WLOCK(ifp);
1240 		la = lla_create(LLTABLE(ifp), flags, sa, rt);
1241 		IF_AFDATA_WUNLOCK(ifp);
1242 		if (rt != NULL)
1243 			rt_unref(rt);
1244 
1245 		if (la != NULL)
1246 			la->ln_state = ND_LLINFO_NOSTATE;
1247 	}
1248 
1249 	return la;
1250 }
1251 
1252 int
1253 arpioctl(u_long cmd, void *data)
1254 {
1255 
1256 	return EOPNOTSUPP;
1257 }
1258 
1259 void
1260 arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
1261 {
1262 	struct in_ifaddr *ia = (struct in_ifaddr *)ifa;
1263 
1264 	ifa->ifa_rtrequest = arp_rtrequest;
1265 	ifa->ifa_flags |= RTF_CONNECTED;
1266 
1267 	/* ARP will handle DAD for this address. */
1268 	if (in_nullhost(IA_SIN(ifa)->sin_addr)) {
1269 		if (ia->ia_dad_stop != NULL)	/* safety */
1270 			ia->ia_dad_stop(ifa);
1271 		ia->ia_dad_start = NULL;
1272 		ia->ia_dad_stop = NULL;
1273 		ia->ia4_flags &= ~IN_IFF_TENTATIVE;
1274 	} else {
1275 		ia->ia_dad_start = arp_dad_start;
1276 		ia->ia_dad_stop = arp_dad_stop;
1277 		if (ia->ia4_flags & IN_IFF_TRYTENTATIVE && ip_dad_enabled())
1278 			ia->ia4_flags |= IN_IFF_TENTATIVE;
1279 		else
1280 			arpannounce1(ifa);
1281 	}
1282 }
1283 
1284 static bool
1285 arp_nud_enabled(__unused struct ifnet *ifp)
1286 {
1287 
1288 	return arp_perform_nud != 0;
1289 }
1290 
1291 static unsigned int
1292 arp_llinfo_reachable(__unused struct ifnet *ifp)
1293 {
1294 
1295 	return arp_reachable;
1296 }
1297 
1298 static unsigned int
1299 arp_llinfo_retrans(__unused struct ifnet *ifp)
1300 {
1301 
1302 	return arp_retrans;
1303 }
1304 
1305 /*
1306  * Gets source address of the first packet in hold queue
1307  * and stores it in @src.
1308  * Returns pointer to @src (if hold queue is not empty) or NULL.
1309  */
1310 static union l3addr *
1311 arp_llinfo_holdsrc(struct llentry *ln, union l3addr *src)
1312 {
1313 	struct ip *ip;
1314 
1315 	if (ln == NULL || ln->ln_hold == NULL)
1316 		return NULL;
1317 
1318 	/*
1319 	 * assuming every packet in ln_hold has the same IP header
1320 	 */
1321 	ip = mtod(ln->ln_hold, struct ip *);
1322 	/* XXX pullup? */
1323 	if (sizeof(*ip) < ln->ln_hold->m_len)
1324 		src->addr4 = ip->ip_src;
1325 	else
1326 		src = NULL;
1327 
1328 	return src;
1329 }
1330 
1331 static void
1332 arp_llinfo_output(struct ifnet *ifp, __unused const union l3addr *daddr,
1333     const union l3addr *taddr, const uint8_t *tlladdr,
1334     const union l3addr *hsrc)
1335 {
1336 	struct in_addr tip = taddr->addr4, sip = zeroin_addr;
1337 	const uint8_t *slladdr = CLLADDR(ifp->if_sadl);
1338 
1339 	if (hsrc != NULL) {
1340 		struct in_ifaddr *ia;
1341 		struct psref psref;
1342 
1343 		ia = in_get_ia_on_iface_psref(hsrc->addr4, ifp, &psref);
1344 		if (ia != NULL) {
1345 			sip = hsrc->addr4;
1346 			ia4_release(ia, &psref);
1347 		}
1348 	}
1349 
1350 	if (sip.s_addr == INADDR_ANY) {
1351 		struct sockaddr_in dst;
1352 		struct rtentry *rt;
1353 
1354 		sockaddr_in_init(&dst, &tip, 0);
1355 		rt = rtalloc1(sintosa(&dst), 0);
1356 		if (rt != NULL) {
1357 			if (rt->rt_ifp == ifp &&
1358 			    rt->rt_ifa != NULL &&
1359 			    rt->rt_ifa->ifa_addr->sa_family == AF_INET)
1360 				sip = satosin(rt->rt_ifa->ifa_addr)->sin_addr;
1361 			rt_unref(rt);
1362 		}
1363 		if (sip.s_addr == INADDR_ANY) {
1364 			char ipbuf[INET_ADDRSTRLEN];
1365 
1366 			log(LOG_DEBUG, "source can't be "
1367 			    "determined: dst=%s\n",
1368 			    IN_PRINT(ipbuf, &tip));
1369 			return;
1370 		}
1371 	}
1372 
1373 	arprequest(ifp, &sip, &tip, slladdr, tlladdr);
1374 }
1375 
1376 
1377 static void
1378 arp_llinfo_missed(struct ifnet *ifp, const union l3addr *taddr,
1379     __unused int16_t type, struct mbuf *m)
1380 {
1381 	struct in_addr mdaddr = zeroin_addr;
1382 	struct sockaddr_in dsin, tsin;
1383 	struct sockaddr *sa;
1384 
1385 	if (m != NULL) {
1386 		struct ip *ip = mtod(m, struct ip *);
1387 
1388 		if (sizeof(*ip) < m->m_len)
1389 			mdaddr = ip->ip_src;
1390 
1391 		/* ip_input() will send ICMP_UNREACH_HOST, not us. */
1392 		m_free(m);
1393 	}
1394 
1395 	if (mdaddr.s_addr != INADDR_ANY) {
1396 		sockaddr_in_init(&dsin, &mdaddr, 0);
1397 		sa = sintosa(&dsin);
1398 	} else
1399 		sa = NULL;
1400 
1401 	sockaddr_in_init(&tsin, &taddr->addr4, 0);
1402 	rt_clonedmsg(RTM_MISS, sa, sintosa(&tsin), NULL, ifp);
1403 }
1404 
1405 static void
1406 arp_free(struct llentry *ln, int gc)
1407 {
1408 	struct ifnet *ifp;
1409 
1410 	KASSERT(ln != NULL);
1411 	LLE_WLOCK_ASSERT(ln);
1412 
1413 	ifp = ln->lle_tbl->llt_ifp;
1414 
1415 	if (ln->la_flags & LLE_VALID || gc) {
1416 		struct sockaddr_in sin;
1417 		const char *lladdr;
1418 
1419 		sockaddr_in_init(&sin, &ln->r_l3addr.addr4, 0);
1420 		lladdr = ln->la_flags & LLE_VALID ?
1421 		    (const char *)&ln->ll_addr : NULL;
1422 		rt_clonedmsg(RTM_DELETE, NULL, sintosa(&sin), lladdr, ifp);
1423 	}
1424 
1425 	/*
1426 	 * Save to unlock. We still hold an extra reference and will not
1427 	 * free(9) in llentry_free() if someone else holds one as well.
1428 	 */
1429 	LLE_WUNLOCK(ln);
1430 	IF_AFDATA_LOCK(ifp);
1431 	LLE_WLOCK(ln);
1432 
1433 	lltable_free_entry(LLTABLE(ifp), ln);
1434 
1435 	IF_AFDATA_UNLOCK(ifp);
1436 }
1437 
1438 /*
1439  * Upper-layer reachability hint for Neighbor Unreachability Detection.
1440  *
1441  * XXX cost-effective methods?
1442  */
1443 void
1444 arp_nud_hint(struct rtentry *rt)
1445 {
1446 	struct llentry *ln;
1447 	struct ifnet *ifp;
1448 
1449 	if (rt == NULL)
1450 		return;
1451 
1452 	ifp = rt->rt_ifp;
1453 	ln = arplookup(ifp, NULL, rt_getkey(rt), 1);
1454 	nd_nud_hint(ln);
1455 }
1456 
1457 TAILQ_HEAD(dadq_head, dadq);
1458 struct dadq {
1459 	TAILQ_ENTRY(dadq) dad_list;
1460 	struct ifaddr *dad_ifa;
1461 	int dad_count;		/* max ARP to send */
1462 	int dad_arp_tcount;	/* # of trials to send ARP */
1463 	int dad_arp_ocount;	/* ARP sent so far */
1464 	int dad_arp_announce;	/* max ARP announcements */
1465 	int dad_arp_acount;	/* # of announcements */
1466 	struct callout dad_timer_ch;
1467 };
1468 
1469 static struct dadq_head dadq;
1470 static int dad_maxtry = 15;     /* max # of *tries* to transmit DAD packet */
1471 static kmutex_t arp_dad_lock;
1472 
1473 static void
1474 arp_dad_init(void)
1475 {
1476 
1477 	TAILQ_INIT(&dadq);
1478 	mutex_init(&arp_dad_lock, MUTEX_DEFAULT, IPL_NONE);
1479 }
1480 
1481 static struct dadq *
1482 arp_dad_find(struct ifaddr *ifa)
1483 {
1484 	struct dadq *dp;
1485 
1486 	KASSERT(mutex_owned(&arp_dad_lock));
1487 
1488 	TAILQ_FOREACH(dp, &dadq, dad_list) {
1489 		if (dp->dad_ifa == ifa)
1490 			return dp;
1491 	}
1492 	return NULL;
1493 }
1494 
1495 static void
1496 arp_dad_starttimer(struct dadq *dp, int ticks)
1497 {
1498 
1499 	callout_reset(&dp->dad_timer_ch, ticks,
1500 	    (void (*)(void *))arp_dad_timer, dp);
1501 }
1502 
1503 static void
1504 arp_dad_stoptimer(struct dadq *dp)
1505 {
1506 
1507 	KASSERT(mutex_owned(&arp_dad_lock));
1508 
1509 	TAILQ_REMOVE(&dadq, dp, dad_list);
1510 	/* Tell the timer that dp is being destroyed. */
1511 	dp->dad_ifa = NULL;
1512 	callout_halt(&dp->dad_timer_ch, &arp_dad_lock);
1513 }
1514 
1515 static void
1516 arp_dad_destroytimer(struct dadq *dp)
1517 {
1518 
1519 	callout_destroy(&dp->dad_timer_ch);
1520 	KASSERT(dp->dad_ifa == NULL);
1521 	kmem_intr_free(dp, sizeof(*dp));
1522 }
1523 
1524 static void
1525 arp_dad_output(struct dadq *dp, struct ifaddr *ifa)
1526 {
1527 	struct in_ifaddr *ia = (struct in_ifaddr *)ifa;
1528 	struct ifnet *ifp = ifa->ifa_ifp;
1529 	struct in_addr sip;
1530 
1531 	dp->dad_arp_tcount++;
1532 	if ((ifp->if_flags & IFF_UP) == 0)
1533 		return;
1534 	if ((ifp->if_flags & IFF_RUNNING) == 0)
1535 		return;
1536 
1537 	dp->dad_arp_tcount = 0;
1538 	dp->dad_arp_ocount++;
1539 
1540 	memset(&sip, 0, sizeof(sip));
1541 	arprequest(ifa->ifa_ifp, &sip, &ia->ia_addr.sin_addr,
1542 	    CLLADDR(ifa->ifa_ifp->if_sadl), NULL);
1543 }
1544 
1545 /*
1546  * Start Duplicate Address Detection (DAD) for specified interface address.
1547  */
1548 static void
1549 arp_dad_start(struct ifaddr *ifa)
1550 {
1551 	struct in_ifaddr *ia = (struct in_ifaddr *)ifa;
1552 	struct dadq *dp;
1553 	char ipbuf[INET_ADDRSTRLEN];
1554 
1555 	/*
1556 	 * If we don't need DAD, don't do it.
1557 	 * - DAD is disabled
1558 	 */
1559 	if (!(ia->ia4_flags & IN_IFF_TENTATIVE)) {
1560 		log(LOG_DEBUG,
1561 		    "%s: called with non-tentative address %s(%s)\n", __func__,
1562 		    IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
1563 		    ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1564 		return;
1565 	}
1566 	if (!ip_dad_enabled()) {
1567 		ia->ia4_flags &= ~IN_IFF_TENTATIVE;
1568 		rt_addrmsg(RTM_NEWADDR, ifa);
1569 		arpannounce1(ifa);
1570 		return;
1571 	}
1572 	KASSERT(ifa->ifa_ifp != NULL);
1573 	if (!(ifa->ifa_ifp->if_flags & IFF_UP))
1574 		return;
1575 
1576 	dp = kmem_intr_alloc(sizeof(*dp), KM_NOSLEEP);
1577 
1578 	mutex_enter(&arp_dad_lock);
1579 	if (arp_dad_find(ifa) != NULL) {
1580 		mutex_exit(&arp_dad_lock);
1581 		/* DAD already in progress */
1582 		if (dp != NULL)
1583 			kmem_intr_free(dp, sizeof(*dp));
1584 		return;
1585 	}
1586 
1587 	if (dp == NULL) {
1588 		mutex_exit(&arp_dad_lock);
1589 		log(LOG_ERR, "%s: memory allocation failed for %s(%s)\n",
1590 		    __func__, IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
1591 		    ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1592 		return;
1593 	}
1594 
1595 	/*
1596 	 * Send ARP packet for DAD, ip_dad_count times.
1597 	 * Note that we must delay the first transmission.
1598 	 */
1599 	callout_init(&dp->dad_timer_ch, CALLOUT_MPSAFE);
1600 	dp->dad_ifa = ifa;
1601 	ifaref(ifa);	/* just for safety */
1602 	dp->dad_count = ip_dad_count;
1603 	dp->dad_arp_announce = 0; /* Will be set when starting to announce */
1604 	dp->dad_arp_acount = dp->dad_arp_ocount = dp->dad_arp_tcount = 0;
1605 	TAILQ_INSERT_TAIL(&dadq, (struct dadq *)dp, dad_list);
1606 
1607 	ARPLOG(LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
1608 	    ARPLOGADDR(&ia->ia_addr.sin_addr));
1609 
1610 	arp_dad_starttimer(dp, cprng_fast32() % (PROBE_WAIT * hz));
1611 
1612 	mutex_exit(&arp_dad_lock);
1613 }
1614 
1615 /*
1616  * terminate DAD unconditionally.  used for address removals.
1617  */
1618 static void
1619 arp_dad_stop(struct ifaddr *ifa)
1620 {
1621 	struct dadq *dp;
1622 
1623 	mutex_enter(&arp_dad_lock);
1624 	dp = arp_dad_find(ifa);
1625 	if (dp == NULL) {
1626 		mutex_exit(&arp_dad_lock);
1627 		/* DAD wasn't started yet */
1628 		return;
1629 	}
1630 
1631 	arp_dad_stoptimer(dp);
1632 
1633 	mutex_exit(&arp_dad_lock);
1634 
1635 	arp_dad_destroytimer(dp);
1636 	ifafree(ifa);
1637 }
1638 
1639 static void
1640 arp_dad_timer(struct dadq *dp)
1641 {
1642 	struct ifaddr *ifa;
1643 	struct in_ifaddr *ia;
1644 	char ipbuf[INET_ADDRSTRLEN];
1645 	bool need_free = false;
1646 
1647 	KERNEL_LOCK_UNLESS_NET_MPSAFE();
1648 	mutex_enter(&arp_dad_lock);
1649 
1650 	ifa = dp->dad_ifa;
1651 	if (ifa == NULL) {
1652 		/* dp is being destroyed by someone.  Do nothing. */
1653 		goto done;
1654 	}
1655 
1656 	ia = (struct in_ifaddr *)ifa;
1657 	if (ia->ia4_flags & IN_IFF_DUPLICATED) {
1658 		log(LOG_ERR, "%s: called with duplicate address %s(%s)\n",
1659 		    __func__, IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
1660 		    ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1661 		goto done;
1662 	}
1663 	if ((ia->ia4_flags & IN_IFF_TENTATIVE) == 0 && dp->dad_arp_acount == 0)
1664 	{
1665 		log(LOG_ERR, "%s: called with non-tentative address %s(%s)\n",
1666 		    __func__, IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
1667 		    ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
1668 		goto done;
1669 	}
1670 
1671 	/* timeouted with IFF_{RUNNING,UP} check */
1672 	if (dp->dad_arp_tcount > dad_maxtry) {
1673 		ARPLOG(LOG_INFO, "%s: could not run DAD, driver problem?\n",
1674 		    if_name(ifa->ifa_ifp));
1675 
1676 		arp_dad_stoptimer(dp);
1677 		need_free = true;
1678 		goto done;
1679 	}
1680 
1681 	/* Need more checks? */
1682 	if (dp->dad_arp_ocount < dp->dad_count) {
1683 		int adelay;
1684 
1685 		/*
1686 		 * We have more ARP to go.  Send ARP packet for DAD.
1687 		 */
1688 		arp_dad_output(dp, ifa);
1689 		if (dp->dad_arp_ocount < dp->dad_count)
1690 			adelay = (PROBE_MIN * hz) +
1691 			    (cprng_fast32() %
1692 			    ((PROBE_MAX * hz) - (PROBE_MIN * hz)));
1693 		else
1694 			adelay = ANNOUNCE_WAIT * hz;
1695 		arp_dad_starttimer(dp, adelay);
1696 		goto done;
1697 	} else if (dp->dad_arp_acount == 0) {
1698 		/*
1699 		 * We are done with DAD.
1700 		 * No duplicate address found.
1701 		 */
1702 		ia->ia4_flags &= ~IN_IFF_TENTATIVE;
1703 		rt_addrmsg(RTM_NEWADDR, ifa);
1704 		ARPLOG(LOG_DEBUG,
1705 		    "%s: DAD complete for %s - no duplicates found\n",
1706 		    if_name(ifa->ifa_ifp), ARPLOGADDR(&ia->ia_addr.sin_addr));
1707 		dp->dad_arp_announce = ANNOUNCE_NUM;
1708 		goto announce;
1709 	} else if (dp->dad_arp_acount < dp->dad_arp_announce) {
1710 announce:
1711 		/*
1712 		 * Announce the address.
1713 		 */
1714 		arpannounce1(ifa);
1715 		dp->dad_arp_acount++;
1716 		if (dp->dad_arp_acount < dp->dad_arp_announce) {
1717 			arp_dad_starttimer(dp, ANNOUNCE_INTERVAL * hz);
1718 			goto done;
1719 		}
1720 		ARPLOG(LOG_DEBUG,
1721 		    "%s: ARP announcement complete for %s\n",
1722 		    if_name(ifa->ifa_ifp), ARPLOGADDR(&ia->ia_addr.sin_addr));
1723 	}
1724 
1725 	arp_dad_stoptimer(dp);
1726 	need_free = true;
1727 done:
1728 	mutex_exit(&arp_dad_lock);
1729 
1730 	if (need_free) {
1731 		arp_dad_destroytimer(dp);
1732 		KASSERT(ifa != NULL);
1733 		ifafree(ifa);
1734 	}
1735 
1736 	KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
1737 }
1738 
1739 static void
1740 arp_dad_duplicated(struct ifaddr *ifa, const struct sockaddr_dl *from)
1741 {
1742 	struct in_ifaddr *ia = ifatoia(ifa);
1743 	struct ifnet *ifp = ifa->ifa_ifp;
1744 	char ipbuf[INET_ADDRSTRLEN], llabuf[LLA_ADDRSTRLEN];
1745 	const char *iastr, *llastr;
1746 
1747 	iastr = IN_PRINT(ipbuf, &ia->ia_addr.sin_addr);
1748 	if (__predict_false(from == NULL))
1749 		llastr = NULL;
1750 	else
1751 		llastr = lla_snprintf(llabuf, sizeof(llabuf),
1752 		    CLLADDR(from), from->sdl_alen);
1753 
1754 	if (ia->ia4_flags & (IN_IFF_TENTATIVE|IN_IFF_DUPLICATED)) {
1755 		log(LOG_ERR,
1756 		    "%s: DAD duplicate address %s from %s\n",
1757 		    if_name(ifp), iastr, llastr);
1758 	} else if (ia->ia_dad_defended == 0 ||
1759 		   ia->ia_dad_defended < time_uptime - DEFEND_INTERVAL) {
1760 		ia->ia_dad_defended = time_uptime;
1761 		arpannounce1(ifa);
1762 		log(LOG_ERR,
1763 		    "%s: DAD defended address %s from %s\n",
1764 		    if_name(ifp), iastr, llastr);
1765 		return;
1766 	} else {
1767 		/* If DAD is disabled, just report the duplicate. */
1768 		if (!ip_dad_enabled()) {
1769 			log(LOG_ERR,
1770 			    "%s: DAD ignoring duplicate address %s from %s\n",
1771 			    if_name(ifp), iastr, llastr);
1772 			return;
1773 		}
1774 		log(LOG_ERR,
1775 		    "%s: DAD defence failed for %s from %s\n",
1776 		    if_name(ifp), iastr, llastr);
1777 	}
1778 
1779 	arp_dad_stop(ifa);
1780 
1781 	ia->ia4_flags &= ~IN_IFF_TENTATIVE;
1782 	if ((ia->ia4_flags & IN_IFF_DUPLICATED) == 0) {
1783 		ia->ia4_flags |= IN_IFF_DUPLICATED;
1784 		/* Inform the routing socket of the duplicate address */
1785 		rt_addrmsg_src(RTM_NEWADDR, ifa, (const struct sockaddr *)from);
1786 	}
1787 }
1788 
1789 /*
1790  * Called from 10 Mb/s Ethernet interrupt handlers
1791  * when ether packet type ETHERTYPE_REVARP
1792  * is received.  Common length and type checks are done here,
1793  * then the protocol-specific routine is called.
1794  */
1795 void
1796 revarpinput(struct mbuf *m)
1797 {
1798 	struct arphdr *ar;
1799 	int arplen;
1800 
1801 	arplen = sizeof(struct arphdr);
1802 	if (m->m_len < arplen && (m = m_pullup(m, arplen)) == NULL)
1803 		return;
1804 	ar = mtod(m, struct arphdr *);
1805 
1806 	if (ntohs(ar->ar_hrd) == ARPHRD_IEEE1394) {
1807 		goto out;
1808 	}
1809 
1810 	arplen = sizeof(struct arphdr) + 2 * (ar->ar_hln + ar->ar_pln);
1811 	if (m->m_len < arplen && (m = m_pullup(m, arplen)) == NULL)
1812 		return;
1813 	ar = mtod(m, struct arphdr *);
1814 
1815 	switch (ntohs(ar->ar_pro)) {
1816 	case ETHERTYPE_IP:
1817 	case ETHERTYPE_IPTRAILERS:
1818 		in_revarpinput(m);
1819 		return;
1820 
1821 	default:
1822 		break;
1823 	}
1824 
1825 out:
1826 	m_freem(m);
1827 }
1828 
1829 /*
1830  * RARP for Internet protocols on 10 Mb/s Ethernet.
1831  * Algorithm is that given in RFC 903.
1832  * We are only using for bootstrap purposes to get an ip address for one of
1833  * our interfaces.  Thus we support no user-interface.
1834  *
1835  * Since the contents of the RARP reply are specific to the interface that
1836  * sent the request, this code must ensure that they are properly associated.
1837  *
1838  * Note: also supports ARP via RARP packets, per the RFC.
1839  */
1840 void
1841 in_revarpinput(struct mbuf *m)
1842 {
1843 	struct arphdr *ah;
1844 	void *tha;
1845 	int op;
1846 	struct ifnet *rcvif;
1847 	int s;
1848 
1849 	ah = mtod(m, struct arphdr *);
1850 	op = ntohs(ah->ar_op);
1851 
1852 	rcvif = m_get_rcvif(m, &s);
1853 	if (__predict_false(rcvif == NULL))
1854 		goto out;
1855 	if (rcvif->if_flags & IFF_NOARP)
1856 		goto out;
1857 
1858 	switch (rcvif->if_type) {
1859 	case IFT_IEEE1394:
1860 		/* ARP without target hardware address is not supported */
1861 		goto out;
1862 	default:
1863 		break;
1864 	}
1865 
1866 	switch (op) {
1867 	case ARPOP_REQUEST:
1868 	case ARPOP_REPLY:	/* per RFC */
1869 		m_put_rcvif(rcvif, &s);
1870 		in_arpinput(m);
1871 		return;
1872 	case ARPOP_REVREPLY:
1873 		break;
1874 	case ARPOP_REVREQUEST:	/* handled by rarpd(8) */
1875 	default:
1876 		goto out;
1877 	}
1878 	if (!revarp_in_progress)
1879 		goto out;
1880 	if (rcvif != myip_ifp) /* !same interface */
1881 		goto out;
1882 	if (myip_initialized)
1883 		goto wake;
1884 	tha = ar_tha(ah);
1885 	if (tha == NULL)
1886 		goto out;
1887 	if (ah->ar_pln != sizeof(struct in_addr))
1888 		goto out;
1889 	if (ah->ar_hln != rcvif->if_sadl->sdl_alen)
1890 		goto out;
1891 	if (memcmp(tha, CLLADDR(rcvif->if_sadl), rcvif->if_sadl->sdl_alen))
1892 		goto out;
1893 	memcpy(&srv_ip, ar_spa(ah), sizeof(srv_ip));
1894 	memcpy(&myip, ar_tpa(ah), sizeof(myip));
1895 	myip_initialized = 1;
1896 wake:	/* Do wakeup every time in case it was missed. */
1897 	wakeup((void *)&myip);
1898 
1899 out:
1900 	m_put_rcvif(rcvif, &s);
1901 	m_freem(m);
1902 }
1903 
1904 /*
1905  * Send a RARP request for the ip address of the specified interface.
1906  * The request should be RFC 903-compliant.
1907  */
1908 static void
1909 revarprequest(struct ifnet *ifp)
1910 {
1911 	struct sockaddr sa;
1912 	struct mbuf *m;
1913 	struct arphdr *ah;
1914 	void *tha;
1915 
1916 	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
1917 		return;
1918 	MCLAIM(m, &arpdomain.dom_mowner);
1919 	m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) +
1920 	    2*ifp->if_addrlen;
1921 	m->m_pkthdr.len = m->m_len;
1922 	m_align(m, m->m_len);
1923 	ah = mtod(m, struct arphdr *);
1924 	memset(ah, 0, m->m_len);
1925 	ah->ar_pro = htons(ETHERTYPE_IP);
1926 	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
1927 	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
1928 	ah->ar_op = htons(ARPOP_REVREQUEST);
1929 
1930 	memcpy(ar_sha(ah), CLLADDR(ifp->if_sadl), ah->ar_hln);
1931 	tha = ar_tha(ah);
1932 	if (tha == NULL) {
1933 		m_free(m);
1934 		return;
1935 	}
1936 	memcpy(tha, CLLADDR(ifp->if_sadl), ah->ar_hln);
1937 
1938 	sa.sa_family = AF_ARP;
1939 	sa.sa_len = 2;
1940 	m->m_flags |= M_BCAST;
1941 
1942 	if_output_lock(ifp, ifp, m, &sa, NULL);
1943 }
1944 
1945 /*
1946  * RARP for the ip address of the specified interface, but also
1947  * save the ip address of the server that sent the answer.
1948  * Timeout if no response is received.
1949  */
1950 int
1951 revarpwhoarewe(struct ifnet *ifp, struct in_addr *serv_in,
1952     struct in_addr *clnt_in)
1953 {
1954 	int result, count = 20;
1955 
1956 	myip_initialized = 0;
1957 	myip_ifp = ifp;
1958 
1959 	revarp_in_progress = 1;
1960 	while (count--) {
1961 		revarprequest(ifp);
1962 		result = tsleep((void *)&myip, PSOCK, "revarp", hz/2);
1963 		if (result != EWOULDBLOCK)
1964 			break;
1965 	}
1966 	revarp_in_progress = 0;
1967 
1968 	if (!myip_initialized)
1969 		return ENETUNREACH;
1970 
1971 	memcpy(serv_in, &srv_ip, sizeof(*serv_in));
1972 	memcpy(clnt_in, &myip, sizeof(*clnt_in));
1973 	return 0;
1974 }
1975 
1976 void
1977 arp_stat_add(int type, uint64_t count)
1978 {
1979 	ARP_STATADD(type, count);
1980 }
1981 
1982 static int
1983 sysctl_net_inet_arp_stats(SYSCTLFN_ARGS)
1984 {
1985 
1986 	return NETSTAT_SYSCTL(arpstat_percpu, ARP_NSTATS);
1987 }
1988 
1989 static void
1990 sysctl_net_inet_arp_setup(struct sysctllog **clog)
1991 {
1992 	const struct sysctlnode *node;
1993 
1994 	sysctl_createv(clog, 0, NULL, NULL,
1995 			CTLFLAG_PERMANENT,
1996 			CTLTYPE_NODE, "inet", NULL,
1997 			NULL, 0, NULL, 0,
1998 			CTL_NET, PF_INET, CTL_EOL);
1999 	sysctl_createv(clog, 0, NULL, &node,
2000 			CTLFLAG_PERMANENT,
2001 			CTLTYPE_NODE, "arp",
2002 			SYSCTL_DESCR("Address Resolution Protocol"),
2003 			NULL, 0, NULL, 0,
2004 			CTL_NET, PF_INET, CTL_CREATE, CTL_EOL);
2005 
2006 	sysctl_createv(clog, 0, NULL, NULL,
2007 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2008 		       CTLTYPE_INT, "nd_delay",
2009 		       SYSCTL_DESCR("First probe delay time"),
2010 		       NULL, 0, &arp_nd_domain.nd_delay, 0,
2011 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2012 	sysctl_createv(clog, 0, NULL, NULL,
2013 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2014 		       CTLTYPE_INT, "nd_bmaxtries",
2015 		       SYSCTL_DESCR("Number of broadcast discovery attempts"),
2016 		       NULL, 0, &arp_nd_domain.nd_mmaxtries, 0,
2017 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2018 	sysctl_createv(clog, 0, NULL, NULL,
2019 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2020 		       CTLTYPE_INT, "nd_umaxtries",
2021 		       SYSCTL_DESCR("Number of unicast discovery attempts"),
2022 		       NULL, 0, &arp_nd_domain.nd_umaxtries, 0,
2023 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2024 	sysctl_createv(clog, 0, NULL, NULL,
2025 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2026 		       CTLTYPE_INT, "nd_reachable",
2027 		       SYSCTL_DESCR("Reachable time"),
2028 		       NULL, 0, &arp_reachable, 0,
2029 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2030 	sysctl_createv(clog, 0, NULL, NULL,
2031 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2032 		       CTLTYPE_INT, "nd_retrans",
2033 		       SYSCTL_DESCR("Retransmission time"),
2034 		       NULL, 0, &arp_retrans, 0,
2035 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2036 	sysctl_createv(clog, 0, NULL, NULL,
2037 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2038 		       CTLTYPE_INT, "nd_nud",
2039 		       SYSCTL_DESCR("Perform neighbour unreachability detection"),
2040 		       NULL, 0, &arp_perform_nud, 0,
2041 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2042 	sysctl_createv(clog, 0, NULL, NULL,
2043 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2044 		       CTLTYPE_INT, "nd_maxnudhint",
2045 		       SYSCTL_DESCR("Maximum neighbor unreachable hint count"),
2046 		       NULL, 0, &arp_nd_domain.nd_maxnudhint, 0,
2047 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2048 	sysctl_createv(clog, 0, NULL, NULL,
2049 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2050 		       CTLTYPE_INT, "maxqueuelen",
2051 		       SYSCTL_DESCR("max packet queue len for a unresolved ARP"),
2052 		       NULL, 1, &arp_nd_domain.nd_maxqueuelen, 0,
2053 		       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2054 
2055 	sysctl_createv(clog, 0, NULL, NULL,
2056 			CTLFLAG_PERMANENT,
2057 			CTLTYPE_STRUCT, "stats",
2058 			SYSCTL_DESCR("ARP statistics"),
2059 			sysctl_net_inet_arp_stats, 0, NULL, 0,
2060 			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2061 
2062 	sysctl_createv(clog, 0, NULL, NULL,
2063 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2064 			CTLTYPE_INT, "log_movements",
2065 			SYSCTL_DESCR("log ARP replies from MACs different than"
2066 			    " the one in the cache"),
2067 			NULL, 0, &log_movements, 0,
2068 			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2069 
2070 	sysctl_createv(clog, 0, NULL, NULL,
2071 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2072 			CTLTYPE_INT, "log_permanent_modify",
2073 			SYSCTL_DESCR("log ARP replies from MACs different than"
2074 			    " the one in the permanent arp entry"),
2075 			NULL, 0, &log_permanent_modify, 0,
2076 			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2077 
2078 	sysctl_createv(clog, 0, NULL, NULL,
2079 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2080 			CTLTYPE_INT, "log_wrong_iface",
2081 			SYSCTL_DESCR("log ARP packets arriving on the wrong"
2082 			    " interface"),
2083 			NULL, 0, &log_wrong_iface, 0,
2084 			CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2085 
2086 	sysctl_createv(clog, 0, NULL, NULL,
2087 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2088 			CTLTYPE_INT, "debug",
2089 			SYSCTL_DESCR("Enable ARP DAD debug output"),
2090 			NULL, 0, &arp_debug, 0,
2091 			CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
2092 }
2093 
2094 #endif /* INET */
2095