xref: /openbsd-src/sys/netinet/if_ether.c (revision 097a140d792de8b2bbe59ad827d39eabf9b4280a)
1 /*	$OpenBSD: if_ether.c,v 1.247 2021/04/28 10:33:34 bluhm Exp $	*/
2 /*	$NetBSD: if_ether.c,v 1.31 1996/05/11 12:59:58 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)if_ether.c	8.1 (Berkeley) 6/10/93
33  */
34 
35 /*
36  * Ethernet address resolution protocol.
37  * TODO:
38  *	add "inuse/lock" bit (or ref. count) along with valid bit
39  */
40 
41 #include "carp.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/socket.h>
47 #include <sys/timeout.h>
48 #include <sys/kernel.h>
49 #include <sys/syslog.h>
50 #include <sys/queue.h>
51 #include <sys/pool.h>
52 
53 #include <net/if.h>
54 #include <net/if_var.h>
55 #include <net/if_dl.h>
56 #include <net/route.h>
57 #include <net/if_types.h>
58 #include <net/netisr.h>
59 
60 #include <netinet/in.h>
61 #include <netinet/in_var.h>
62 #include <netinet/if_ether.h>
63 #include <netinet/ip_var.h>
64 #if NCARP > 0
65 #include <netinet/ip_carp.h>
66 #endif
67 
68 /*
69  *  Locks used to protect struct members in this file:
70  *	a	atomic operations
71  *	I	immutable after creation
72  *	K	kernel lock
73  *	m	arp mutex, needed when net lock is shared
74  *	N	net lock
75  */
76 
77 struct llinfo_arp {
78 	LIST_ENTRY(llinfo_arp)	 la_list;	/* [mN] global arp_list */
79 	struct rtentry		*la_rt;		/* [I] backpointer to rtentry */
80 	struct mbuf_queue	 la_mq;		/* packet hold queue */
81 	time_t			 la_refreshed;	/* when was refresh sent */
82 	int			 la_asked;	/* number of queries sent */
83 };
84 #define LA_HOLD_QUEUE 10
85 #define LA_HOLD_TOTAL 100
86 
87 /* timer values */
88 int 	arpt_prune = (5 * 60);	/* [I] walk list every 5 minutes */
89 int 	arpt_keep = (20 * 60);	/* [a] once resolved, cache for 20 minutes */
90 int 	arpt_down = 20;	/* [a] once declared down, don't send for 20 secs */
91 
92 struct mbuf *arppullup(struct mbuf *m);
93 void arpinvalidate(struct rtentry *);
94 void arptfree(struct rtentry *);
95 void arptimer(void *);
96 struct rtentry *arplookup(struct in_addr *, int, int, unsigned int);
97 void in_arpinput(struct ifnet *, struct mbuf *);
98 void in_revarpinput(struct ifnet *, struct mbuf *);
99 int arpcache(struct ifnet *, struct ether_arp *, struct rtentry *);
100 void arpreply(struct ifnet *, struct mbuf *, struct in_addr *, uint8_t *,
101     unsigned int);
102 
103 struct niqueue arpinq = NIQUEUE_INITIALIZER(50, NETISR_ARP);
104 struct mutex arp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
105 
106 LIST_HEAD(, llinfo_arp) arp_list; /* [mN] list of all llinfo_arp structures */
107 struct	pool arp_pool;		/* [I] pool for llinfo_arp structures */
108 int	arp_maxtries = 5;	/* [I] arp requests before set to rejected */
109 int	la_hold_total;		/* [a] packets currently in the arp queue */
110 
111 #ifdef NFSCLIENT
112 /* revarp state */
113 struct in_addr revarp_myip, revarp_srvip;
114 int revarp_finished;
115 unsigned int revarp_ifidx;
116 #endif /* NFSCLIENT */
117 
118 /*
119  * Timeout routine.  Age arp_tab entries periodically.
120  */
121 /* ARGSUSED */
122 void
123 arptimer(void *arg)
124 {
125 	struct timeout *to = arg;
126 	struct llinfo_arp *la, *nla;
127 
128 	NET_LOCK();
129 	timeout_add_sec(to, arpt_prune);
130 	/* Net lock is exclusive, no arp mutex needed for arp_list here. */
131 	LIST_FOREACH_SAFE(la, &arp_list, la_list, nla) {
132 		struct rtentry *rt = la->la_rt;
133 
134 		if (rt->rt_expire && rt->rt_expire < getuptime())
135 			arptfree(rt); /* timer has expired; clear */
136 	}
137 	NET_UNLOCK();
138 }
139 
140 void
141 arpinit(void)
142 {
143 	static struct timeout arptimer_to;
144 
145 	pool_init(&arp_pool, sizeof(struct llinfo_arp), 0,
146 	    IPL_SOFTNET, 0, "arp", NULL);
147 
148 	timeout_set_proc(&arptimer_to, arptimer, &arptimer_to);
149 	timeout_add_sec(&arptimer_to, arpt_prune);
150 }
151 
152 void
153 arp_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
154 {
155 	struct sockaddr *gate = rt->rt_gateway;
156 	struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
157 
158 	NET_ASSERT_LOCKED();
159 
160 	if (ISSET(rt->rt_flags,
161 	    RTF_GATEWAY|RTF_BROADCAST|RTF_MULTICAST|RTF_MPLS))
162 		return;
163 
164 	switch (req) {
165 
166 	case RTM_ADD:
167 		if (rt->rt_flags & RTF_CLONING) {
168 			rt->rt_expire = 0;
169 			break;
170 		}
171 		if ((rt->rt_flags & RTF_LOCAL) && !la)
172 			rt->rt_expire = 0;
173 		/*
174 		 * Announce a new entry if requested or warn the user
175 		 * if another station has this IP address.
176 		 */
177 		if (rt->rt_flags & (RTF_ANNOUNCE|RTF_LOCAL))
178 			arprequest(ifp,
179 			    &satosin(rt_key(rt))->sin_addr.s_addr,
180 			    &satosin(rt_key(rt))->sin_addr.s_addr,
181 			    (u_char *)LLADDR(satosdl(gate)));
182 		/*FALLTHROUGH*/
183 	case RTM_RESOLVE:
184 		if (gate->sa_family != AF_LINK ||
185 		    gate->sa_len < sizeof(struct sockaddr_dl)) {
186 			log(LOG_DEBUG, "%s: bad gateway value: %s\n", __func__,
187 			    ifp->if_xname);
188 			break;
189 		}
190 		satosdl(gate)->sdl_type = ifp->if_type;
191 		satosdl(gate)->sdl_index = ifp->if_index;
192 		if (la != NULL)
193 			break; /* This happens on a route change */
194 		/*
195 		 * Case 2:  This route may come from cloning, or a manual route
196 		 * add with a LL address.
197 		 */
198 		la = pool_get(&arp_pool, PR_NOWAIT | PR_ZERO);
199 		rt->rt_llinfo = (caddr_t)la;
200 		if (la == NULL) {
201 			log(LOG_DEBUG, "%s: pool get failed\n", __func__);
202 			break;
203 		}
204 
205 		mq_init(&la->la_mq, LA_HOLD_QUEUE, IPL_SOFTNET);
206 		la->la_rt = rt;
207 		rt->rt_flags |= RTF_LLINFO;
208 		if ((rt->rt_flags & RTF_LOCAL) == 0)
209 			rt->rt_expire = getuptime();
210 		mtx_enter(&arp_mtx);
211 		LIST_INSERT_HEAD(&arp_list, la, la_list);
212 		mtx_leave(&arp_mtx);
213 		break;
214 
215 	case RTM_DELETE:
216 		if (la == NULL)
217 			break;
218 		mtx_enter(&arp_mtx);
219 		LIST_REMOVE(la, la_list);
220 		mtx_leave(&arp_mtx);
221 		rt->rt_llinfo = NULL;
222 		rt->rt_flags &= ~RTF_LLINFO;
223 		atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
224 		pool_put(&arp_pool, la);
225 		break;
226 
227 	case RTM_INVALIDATE:
228 		if (la == NULL)
229 			break;
230 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
231 			arpinvalidate(rt);
232 		break;
233 	}
234 }
235 
236 /*
237  * Broadcast an ARP request. Caller specifies:
238  *	- arp header source ip address
239  *	- arp header target ip address
240  *	- arp header source ethernet address
241  */
242 void
243 arprequest(struct ifnet *ifp, u_int32_t *sip, u_int32_t *tip, u_int8_t *enaddr)
244 {
245 	struct mbuf *m;
246 	struct ether_header *eh;
247 	struct ether_arp *ea;
248 	struct sockaddr sa;
249 
250 	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
251 		return;
252 	m->m_len = sizeof(*ea);
253 	m->m_pkthdr.len = sizeof(*ea);
254 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
255 	m->m_pkthdr.pf.prio = ifp->if_llprio;
256 	m_align(m, sizeof(*ea));
257 	ea = mtod(m, struct ether_arp *);
258 	eh = (struct ether_header *)sa.sa_data;
259 	memset(ea, 0, sizeof(*ea));
260 	memcpy(eh->ether_dhost, etherbroadcastaddr, sizeof(eh->ether_dhost));
261 	eh->ether_type = htons(ETHERTYPE_ARP);	/* if_output will not swap */
262 	ea->arp_hrd = htons(ARPHRD_ETHER);
263 	ea->arp_pro = htons(ETHERTYPE_IP);
264 	ea->arp_hln = sizeof(ea->arp_sha);	/* hardware address length */
265 	ea->arp_pln = sizeof(ea->arp_spa);	/* protocol address length */
266 	ea->arp_op = htons(ARPOP_REQUEST);
267 	memcpy(eh->ether_shost, enaddr, sizeof(eh->ether_shost));
268 	memcpy(ea->arp_sha, enaddr, sizeof(ea->arp_sha));
269 	memcpy(ea->arp_spa, sip, sizeof(ea->arp_spa));
270 	memcpy(ea->arp_tpa, tip, sizeof(ea->arp_tpa));
271 	sa.sa_family = pseudo_AF_HDRCMPLT;
272 	sa.sa_len = sizeof(sa);
273 	m->m_flags |= M_BCAST;
274 	ifp->if_output(ifp, m, &sa, NULL);
275 }
276 
277 void
278 arpreply(struct ifnet *ifp, struct mbuf *m, struct in_addr *sip, uint8_t *eaddr,
279     unsigned int rdomain)
280 {
281 	struct ether_header *eh;
282 	struct ether_arp *ea;
283 	struct sockaddr sa;
284 
285 	m_resethdr(m);
286 	m->m_pkthdr.ph_rtableid = rdomain;
287 
288 	ea = mtod(m, struct ether_arp *);
289 	ea->arp_op = htons(ARPOP_REPLY);
290 	ea->arp_pro = htons(ETHERTYPE_IP); /* let's be sure! */
291 
292 	/* We're replying to a request. */
293 	memcpy(ea->arp_tha, ea->arp_sha, sizeof(ea->arp_sha));
294 	memcpy(ea->arp_tpa, ea->arp_spa, sizeof(ea->arp_spa));
295 
296 	memcpy(ea->arp_sha, eaddr, sizeof(ea->arp_sha));
297 	memcpy(ea->arp_spa, sip, sizeof(ea->arp_spa));
298 
299 	eh = (struct ether_header *)sa.sa_data;
300 	memcpy(eh->ether_dhost, ea->arp_tha, sizeof(eh->ether_dhost));
301 	memcpy(eh->ether_shost, eaddr, sizeof(eh->ether_shost));
302 	eh->ether_type = htons(ETHERTYPE_ARP);
303 	sa.sa_family = pseudo_AF_HDRCMPLT;
304 	sa.sa_len = sizeof(sa);
305 	ifp->if_output(ifp, m, &sa, NULL);
306 }
307 
308 /*
309  * Resolve an IP address into an ethernet address.  If success,
310  * desten is filled in.  If there is no entry in arptab,
311  * set one up and broadcast a request for the IP address.
312  * Hold onto this mbuf and resend it once the address
313  * is finally resolved.  A return value of 0 indicates
314  * that desten has been filled in and the packet should be sent
315  * normally; A return value of EAGAIN indicates that the packet
316  * has been taken over here, either now or for later transmission.
317  * Any other return value indicates an error.
318  */
319 int
320 arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
321     struct sockaddr *dst, u_char *desten)
322 {
323 	struct arpcom *ac = (struct arpcom *)ifp;
324 	struct llinfo_arp *la;
325 	struct sockaddr_dl *sdl;
326 	struct rtentry *rt = NULL;
327 	char addr[INET_ADDRSTRLEN];
328 
329 	if (m->m_flags & M_BCAST) {	/* broadcast */
330 		memcpy(desten, etherbroadcastaddr, sizeof(etherbroadcastaddr));
331 		return (0);
332 	}
333 	if (m->m_flags & M_MCAST) {	/* multicast */
334 		ETHER_MAP_IP_MULTICAST(&satosin(dst)->sin_addr, desten);
335 		return (0);
336 	}
337 
338 	rt = rt_getll(rt0);
339 
340 	if (ISSET(rt->rt_flags, RTF_REJECT) &&
341 	    (rt->rt_expire == 0 || rt->rt_expire > getuptime() )) {
342 		m_freem(m);
343 		return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
344 	}
345 
346 	if (!ISSET(rt->rt_flags, RTF_LLINFO)) {
347 		log(LOG_DEBUG, "%s: %s: route contains no arp information\n",
348 		    __func__, inet_ntop(AF_INET, &satosin(rt_key(rt))->sin_addr,
349 		    addr, sizeof(addr)));
350 		m_freem(m);
351 		return (EINVAL);
352 	}
353 
354 	sdl = satosdl(rt->rt_gateway);
355 	if (sdl->sdl_alen > 0 && sdl->sdl_alen != ETHER_ADDR_LEN) {
356 		log(LOG_DEBUG, "%s: %s: incorrect arp information\n", __func__,
357 		    inet_ntop(AF_INET, &satosin(dst)->sin_addr,
358 			addr, sizeof(addr)));
359 		goto bad;
360 	}
361 
362 	la = (struct llinfo_arp *)rt->rt_llinfo;
363 	KASSERT(la != NULL);
364 
365 	/*
366 	 * Check the address family and length is valid, the address
367 	 * is resolved; otherwise, try to resolve.
368 	 */
369 	if ((rt->rt_expire == 0 || rt->rt_expire > getuptime()) &&
370 	    sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
371 		memcpy(desten, LLADDR(sdl), sdl->sdl_alen);
372 
373 		/* refresh ARP entry when timeout gets close */
374 		if (rt->rt_expire != 0 &&
375 		    rt->rt_expire - arpt_keep / 8 < getuptime() &&
376 		    la->la_refreshed + 30 < getuptime()) {
377 			la->la_refreshed = getuptime();
378 			arprequest(ifp,
379 			    &satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr,
380 			    &satosin(dst)->sin_addr.s_addr,
381 			    ac->ac_enaddr);
382 		}
383 		return (0);
384 	}
385 
386 	if (ifp->if_flags & (IFF_NOARP|IFF_STATICARP))
387 		goto bad;
388 
389 	/*
390 	 * There is an arptab entry, but no ethernet address
391 	 * response yet. Insert mbuf in hold queue if below limit
392 	 * if above the limit free the queue without queuing the new packet.
393 	 */
394 	if (atomic_inc_int_nv(&la_hold_total) <= LA_HOLD_TOTAL) {
395 		if (mq_push(&la->la_mq, m) != 0)
396 			atomic_dec_int(&la_hold_total);
397 	} else {
398 		atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq) + 1);
399 		m_freem(m);
400 	}
401 
402 	/*
403 	 * Re-send the ARP request when appropriate.
404 	 */
405 #ifdef	DIAGNOSTIC
406 	if (rt->rt_expire == 0) {
407 		/* This should never happen. (Should it? -gwr) */
408 		printf("%s: unresolved and rt_expire == 0\n", __func__);
409 		/* Set expiration time to now (expired). */
410 		rt->rt_expire = getuptime();
411 	}
412 #endif
413 	if (rt->rt_expire) {
414 		rt->rt_flags &= ~RTF_REJECT;
415 		if (la->la_asked == 0 || rt->rt_expire != getuptime()) {
416 			rt->rt_expire = getuptime();
417 			if (la->la_asked++ < arp_maxtries)
418 				arprequest(ifp,
419 				    &satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr,
420 				    &satosin(dst)->sin_addr.s_addr,
421 				    ac->ac_enaddr);
422 			else {
423 				rt->rt_flags |= RTF_REJECT;
424 				rt->rt_expire += arpt_down;
425 				la->la_asked = 0;
426 				la->la_refreshed = 0;
427 				atomic_sub_int(&la_hold_total,
428 				    mq_purge(&la->la_mq));
429 			}
430 		}
431 	}
432 
433 	return (EAGAIN);
434 
435 bad:
436 	m_freem(m);
437 	return (EINVAL);
438 }
439 
440 struct mbuf *
441 arppullup(struct mbuf *m)
442 {
443 	struct arphdr *ar;
444 	int len;
445 
446 #ifdef DIAGNOSTIC
447 	if ((m->m_flags & M_PKTHDR) == 0)
448 		panic("arp without packet header");
449 #endif
450 
451 	len = sizeof(struct arphdr);
452 	if (m->m_len < len && (m = m_pullup(m, len)) == NULL)
453 		return NULL;
454 
455 	ar = mtod(m, struct arphdr *);
456 	if (ntohs(ar->ar_hrd) != ARPHRD_ETHER ||
457 	    ntohs(ar->ar_pro) != ETHERTYPE_IP ||
458 	    ar->ar_hln != ETHER_ADDR_LEN ||
459 	    ar->ar_pln != sizeof(struct in_addr)) {
460 		m_freem(m);
461 		return NULL;
462 	}
463 
464 	len += 2 * (ar->ar_hln + ar->ar_pln);
465 	if (m->m_len < len && (m = m_pullup(m, len)) == NULL)
466 		return NULL;
467 
468 	return m;
469 }
470 
471 /*
472  * Common length and type checks are done here,
473  * then the protocol-specific routine is called.
474  */
475 void
476 arpinput(struct ifnet *ifp, struct mbuf *m)
477 {
478 	if ((m = arppullup(m)) == NULL)
479 		return;
480 	niq_enqueue(&arpinq, m);
481 }
482 
483 void
484 arpintr(void)
485 {
486 	struct mbuf_list ml;
487 	struct mbuf *m;
488 	struct ifnet *ifp;
489 
490 	niq_delist(&arpinq, &ml);
491 
492 	while ((m = ml_dequeue(&ml)) != NULL) {
493 		ifp = if_get(m->m_pkthdr.ph_ifidx);
494 
495 		if (ifp != NULL)
496 			in_arpinput(ifp, m);
497 		else
498 			m_freem(m);
499 
500 		if_put(ifp);
501 	}
502 }
503 
504 /*
505  * ARP for Internet protocols on Ethernet, RFC 826.
506  * In addition, a sanity check is performed on the sender
507  * protocol address, to catch impersonators.
508  */
509 void
510 in_arpinput(struct ifnet *ifp, struct mbuf *m)
511 {
512 	struct ether_arp *ea;
513 	struct rtentry *rt = NULL;
514 	struct sockaddr_in sin;
515 	struct in_addr isaddr, itaddr;
516 	char addr[INET_ADDRSTRLEN];
517 	int op, target = 0;
518 	unsigned int rdomain;
519 
520 	rdomain = rtable_l2(m->m_pkthdr.ph_rtableid);
521 
522 	ea = mtod(m, struct ether_arp *);
523 	op = ntohs(ea->arp_op);
524 	if ((op != ARPOP_REQUEST) && (op != ARPOP_REPLY))
525 		goto out;
526 
527 	memcpy(&itaddr, ea->arp_tpa, sizeof(itaddr));
528 	memcpy(&isaddr, ea->arp_spa, sizeof(isaddr));
529 	memset(&sin, 0, sizeof(sin));
530 	sin.sin_len = sizeof(sin);
531 	sin.sin_family = AF_INET;
532 
533 	if (ETHER_IS_MULTICAST(ea->arp_sha) &&
534 	    ETHER_IS_BROADCAST(ea->arp_sha)) {
535 		inet_ntop(AF_INET, &isaddr, addr, sizeof(addr));
536 		log(LOG_ERR, "arp: ether address is broadcast for IP address "
537 		    "%s!\n", addr);
538 		goto out;
539 	}
540 
541 	if (!memcmp(ea->arp_sha, LLADDR(ifp->if_sadl), sizeof(ea->arp_sha)))
542 		goto out;	/* it's from me, ignore it. */
543 
544 	/* Check target against our interface addresses. */
545 	sin.sin_addr = itaddr;
546 	rt = rtalloc(sintosa(&sin), 0, rdomain);
547 	if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) &&
548 	    rt->rt_ifidx == ifp->if_index)
549 		target = 1;
550 	rtfree(rt);
551 	rt = NULL;
552 
553 #if NCARP > 0
554 	if (target && op == ARPOP_REQUEST && ifp->if_type == IFT_CARP &&
555 	    !carp_iamatch(ifp))
556 		goto out;
557 #endif
558 
559 	/* Do we have an ARP cache for the sender? Create if we are target. */
560 	rt = arplookup(&isaddr, target, 0, rdomain);
561 
562 	/* Check sender against our interface addresses. */
563 	if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) &&
564 	    rt->rt_ifidx == ifp->if_index && isaddr.s_addr != INADDR_ANY) {
565 		inet_ntop(AF_INET, &isaddr, addr, sizeof(addr));
566 		log(LOG_ERR, "duplicate IP address %s sent from ethernet "
567 		    "address %s\n", addr, ether_sprintf(ea->arp_sha));
568 		itaddr = isaddr;
569 	} else if (rt != NULL) {
570 		int error;
571 
572 		KERNEL_LOCK();
573 		error = arpcache(ifp, ea, rt);
574 		KERNEL_UNLOCK();
575 		if (error)
576 			goto out;
577 	}
578 
579 	if (op == ARPOP_REQUEST) {
580 		uint8_t *eaddr;
581 
582 		if (target) {
583 			/* We already have all info for the reply */
584 			eaddr = LLADDR(ifp->if_sadl);
585 		} else {
586 			rtfree(rt);
587 			rt = arplookup(&itaddr, 0, SIN_PROXY, rdomain);
588 			/*
589 			 * Protect from possible duplicates, only owner
590 			 * should respond
591 			 */
592 			if ((rt == NULL) || (rt->rt_ifidx != ifp->if_index))
593 				goto out;
594 			eaddr = LLADDR(satosdl(rt->rt_gateway));
595 		}
596 		arpreply(ifp, m, &itaddr, eaddr, rdomain);
597 		rtfree(rt);
598 		return;
599 	}
600 
601 out:
602 	rtfree(rt);
603 	m_freem(m);
604 }
605 
606 int
607 arpcache(struct ifnet *ifp, struct ether_arp *ea, struct rtentry *rt)
608 {
609 	struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
610 	struct sockaddr_dl *sdl = satosdl(rt->rt_gateway);
611 	struct in_addr *spa = (struct in_addr *)ea->arp_spa;
612 	char addr[INET_ADDRSTRLEN];
613 	struct ifnet *rifp;
614 	struct mbuf *m;
615 	int changed = 0;
616 
617 	KERNEL_ASSERT_LOCKED();
618 	KASSERT(sdl != NULL);
619 
620 	/*
621 	 * This can happen if the entry has been deleted by another CPU
622 	 * after we found it.
623 	 */
624 	if (la == NULL)
625 		return (0);
626 
627 	if (sdl->sdl_alen > 0) {
628 		if (memcmp(ea->arp_sha, LLADDR(sdl), sdl->sdl_alen)) {
629 			if (ISSET(rt->rt_flags, RTF_PERMANENT_ARP|RTF_LOCAL)) {
630 				inet_ntop(AF_INET, spa, addr, sizeof(addr));
631 				log(LOG_WARNING, "arp: attempt to overwrite "
632 				   "permanent entry for %s by %s on %s\n", addr,
633 				   ether_sprintf(ea->arp_sha), ifp->if_xname);
634 				return (-1);
635 			} else if (rt->rt_ifidx != ifp->if_index) {
636 #if NCARP > 0
637 				if (ifp->if_type != IFT_CARP)
638 #endif
639 				{
640 					rifp = if_get(rt->rt_ifidx);
641 					if (rifp == NULL)
642 						return (-1);
643 					inet_ntop(AF_INET, spa, addr,
644 					    sizeof(addr));
645 					log(LOG_WARNING, "arp: attempt to "
646 					    "overwrite entry for %s on %s by "
647 					    "%s on %s\n", addr, rifp->if_xname,
648 					    ether_sprintf(ea->arp_sha),
649 					    ifp->if_xname);
650 					if_put(rifp);
651 				}
652 				return (-1);
653 			} else {
654 				inet_ntop(AF_INET, spa, addr, sizeof(addr));
655 				log(LOG_INFO, "arp info overwritten for %s by "
656 				    "%s on %s\n", addr,
657 				    ether_sprintf(ea->arp_sha), ifp->if_xname);
658 				rt->rt_expire = 1;/* no longer static */
659 			}
660 			changed = 1;
661 		}
662 	} else if (!if_isconnected(ifp, rt->rt_ifidx)) {
663 		rifp = if_get(rt->rt_ifidx);
664 		if (rifp == NULL)
665 			return (-1);
666 		inet_ntop(AF_INET, spa, addr, sizeof(addr));
667 		log(LOG_WARNING, "arp: attempt to add entry for %s on %s by %s"
668 		    " on %s\n", addr, rifp->if_xname,
669 		    ether_sprintf(ea->arp_sha), ifp->if_xname);
670 		if_put(rifp);
671 		return (-1);
672 	}
673 	sdl->sdl_alen = sizeof(ea->arp_sha);
674 	memcpy(LLADDR(sdl), ea->arp_sha, sizeof(ea->arp_sha));
675 	if (rt->rt_expire)
676 		rt->rt_expire = getuptime() + arpt_keep;
677 	rt->rt_flags &= ~RTF_REJECT;
678 
679 	/* Notify userland that an ARP resolution has been done. */
680 	if (la->la_asked || changed) {
681 		rtm_send(rt, RTM_RESOLVE, 0, ifp->if_rdomain);
682 	}
683 
684 	la->la_asked = 0;
685 	la->la_refreshed = 0;
686 	while ((m = mq_dequeue(&la->la_mq)) != NULL) {
687 		unsigned int len;
688 
689 		atomic_dec_int(&la_hold_total);
690 		len = mq_len(&la->la_mq);
691 
692 		ifp->if_output(ifp, m, rt_key(rt), rt);
693 
694 		/* XXXSMP we discard if other CPU enqueues */
695 		if (mq_len(&la->la_mq) > len) {
696 			/* mbuf is back in queue. Discard. */
697 			atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
698 			break;
699 		}
700 	}
701 
702 	return (0);
703 }
704 
705 void
706 arpinvalidate(struct rtentry *rt)
707 {
708 	struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
709 	struct sockaddr_dl *sdl = satosdl(rt->rt_gateway);
710 
711 	atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
712 	sdl->sdl_alen = 0;
713 	la->la_asked = 0;
714 }
715 
716 /*
717  * Free an arp entry.
718  */
719 void
720 arptfree(struct rtentry *rt)
721 {
722 	struct ifnet *ifp;
723 
724 	KASSERT(!ISSET(rt->rt_flags, RTF_LOCAL));
725 	arpinvalidate(rt);
726 
727 	ifp = if_get(rt->rt_ifidx);
728 	KASSERT(ifp != NULL);
729 	if (!ISSET(rt->rt_flags, RTF_STATIC|RTF_CACHED))
730 		rtdeletemsg(rt, ifp, ifp->if_rdomain);
731 	if_put(ifp);
732 }
733 
734 /*
735  * Lookup or enter a new address in arptab.
736  */
737 struct rtentry *
738 arplookup(struct in_addr *inp, int create, int proxy, u_int tableid)
739 {
740 	struct rtentry *rt;
741 	struct sockaddr_inarp sin;
742 	int flags;
743 
744 	memset(&sin, 0, sizeof(sin));
745 	sin.sin_len = sizeof(sin);
746 	sin.sin_family = AF_INET;
747 	sin.sin_addr.s_addr = inp->s_addr;
748 	sin.sin_other = proxy ? SIN_PROXY : 0;
749 	flags = (create) ? RT_RESOLVE : 0;
750 
751 	rt = rtalloc((struct sockaddr *)&sin, flags, tableid);
752 	if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_GATEWAY) ||
753 	    !ISSET(rt->rt_flags, RTF_LLINFO) ||
754 	    rt->rt_gateway->sa_family != AF_LINK) {
755 		rtfree(rt);
756 		return (NULL);
757 	}
758 
759 	if (proxy && !ISSET(rt->rt_flags, RTF_ANNOUNCE)) {
760 		while ((rt = rtable_iterate(rt)) != NULL) {
761 			if (ISSET(rt->rt_flags, RTF_ANNOUNCE)) {
762 				break;
763 			}
764 		}
765 	}
766 
767 	return (rt);
768 }
769 
770 /*
771  * Check whether we do proxy ARP for this address and we point to ourselves.
772  */
773 int
774 arpproxy(struct in_addr in, unsigned int rtableid)
775 {
776 	struct sockaddr_dl *sdl;
777 	struct rtentry *rt;
778 	struct ifnet *ifp;
779 	int found = 0;
780 
781 	rt = arplookup(&in, 0, SIN_PROXY, rtableid);
782 	if (!rtisvalid(rt)) {
783 		rtfree(rt);
784 		return (0);
785 	}
786 
787 	/* Check that arp information are correct. */
788 	sdl = satosdl(rt->rt_gateway);
789 	if (sdl->sdl_alen != ETHER_ADDR_LEN) {
790 		rtfree(rt);
791 		return (0);
792 	}
793 
794 	ifp = if_get(rt->rt_ifidx);
795 	if (ifp == NULL) {
796 		rtfree(rt);
797 		return (0);
798 	}
799 
800 	if (!memcmp(LLADDR(sdl), LLADDR(ifp->if_sadl), sdl->sdl_alen))
801 		found = 1;
802 
803 	if_put(ifp);
804 	rtfree(rt);
805 	return (found);
806 }
807 
808 /*
809  * Called from Ethernet interrupt handlers
810  * when ether packet type ETHERTYPE_REVARP
811  * is received.  Common length and type checks are done here,
812  * then the protocol-specific routine is called.
813  */
814 void
815 revarpinput(struct ifnet *ifp, struct mbuf *m)
816 {
817 	if ((m = arppullup(m)) == NULL)
818 		return;
819 	in_revarpinput(ifp, m);
820 }
821 
822 /*
823  * RARP for Internet protocols on Ethernet.
824  * Algorithm is that given in RFC 903.
825  * We are only using for bootstrap purposes to get an ip address for one of
826  * our interfaces.  Thus we support no user-interface.
827  *
828  * Since the contents of the RARP reply are specific to the interface that
829  * sent the request, this code must ensure that they are properly associated.
830  *
831  * Note: also supports ARP via RARP packets, per the RFC.
832  */
833 void
834 in_revarpinput(struct ifnet *ifp, struct mbuf *m)
835 {
836 	struct ether_arp *ar;
837 	int op;
838 
839 	ar = mtod(m, struct ether_arp *);
840 	op = ntohs(ar->arp_op);
841 	switch (op) {
842 	case ARPOP_REQUEST:
843 	case ARPOP_REPLY:	/* per RFC */
844 		niq_enqueue(&arpinq, m);
845 		return;
846 	case ARPOP_REVREPLY:
847 		break;
848 	case ARPOP_REVREQUEST:	/* handled by rarpd(8) */
849 	default:
850 		goto out;
851 	}
852 #ifdef NFSCLIENT
853 	if (revarp_ifidx == 0)
854 		goto out;
855 	if (revarp_ifidx != m->m_pkthdr.ph_ifidx) /* !same interface */
856 		goto out;
857 	if (revarp_finished)
858 		goto wake;
859 	if (memcmp(ar->arp_tha, LLADDR(ifp->if_sadl), sizeof(ar->arp_tha)))
860 		goto out;
861 	memcpy(&revarp_srvip, ar->arp_spa, sizeof(revarp_srvip));
862 	memcpy(&revarp_myip, ar->arp_tpa, sizeof(revarp_myip));
863 	revarp_finished = 1;
864 wake:	/* Do wakeup every time in case it was missed. */
865 	wakeup((caddr_t)&revarp_myip);
866 #endif /* NFSCLIENT */
867 
868 out:
869 	m_freem(m);
870 }
871 
872 /*
873  * Send a RARP request for the ip address of the specified interface.
874  * The request should be RFC 903-compliant.
875  */
876 void
877 revarprequest(struct ifnet *ifp)
878 {
879 	struct sockaddr sa;
880 	struct mbuf *m;
881 	struct ether_header *eh;
882 	struct ether_arp *ea;
883 	struct arpcom *ac = (struct arpcom *)ifp;
884 
885 	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
886 		return;
887 	m->m_len = sizeof(*ea);
888 	m->m_pkthdr.len = sizeof(*ea);
889 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
890 	m->m_pkthdr.pf.prio = ifp->if_llprio;
891 	m_align(m, sizeof(*ea));
892 	ea = mtod(m, struct ether_arp *);
893 	eh = (struct ether_header *)sa.sa_data;
894 	memset(ea, 0, sizeof(*ea));
895 	memcpy(eh->ether_dhost, etherbroadcastaddr, sizeof(eh->ether_dhost));
896 	eh->ether_type = htons(ETHERTYPE_REVARP);
897 	ea->arp_hrd = htons(ARPHRD_ETHER);
898 	ea->arp_pro = htons(ETHERTYPE_IP);
899 	ea->arp_hln = sizeof(ea->arp_sha);	/* hardware address length */
900 	ea->arp_pln = sizeof(ea->arp_spa);	/* protocol address length */
901 	ea->arp_op = htons(ARPOP_REVREQUEST);
902 	memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(ea->arp_tha));
903 	memcpy(ea->arp_sha, ac->ac_enaddr, sizeof(ea->arp_sha));
904 	memcpy(ea->arp_tha, ac->ac_enaddr, sizeof(ea->arp_tha));
905 	sa.sa_family = pseudo_AF_HDRCMPLT;
906 	sa.sa_len = sizeof(sa);
907 	m->m_flags |= M_BCAST;
908 	ifp->if_output(ifp, m, &sa, NULL);
909 }
910 
911 #ifdef NFSCLIENT
912 /*
913  * RARP for the ip address of the specified interface, but also
914  * save the ip address of the server that sent the answer.
915  * Timeout if no response is received.
916  */
917 int
918 revarpwhoarewe(struct ifnet *ifp, struct in_addr *serv_in,
919     struct in_addr *clnt_in)
920 {
921 	int result, count = 20;
922 
923 	if (revarp_finished)
924 		return EIO;
925 
926 	revarp_ifidx = ifp->if_index;
927 	while (count--) {
928 		revarprequest(ifp);
929 		result = tsleep_nsec(&revarp_myip, PSOCK, "revarp",
930 		    MSEC_TO_NSEC(500));
931 		if (result != EWOULDBLOCK)
932 			break;
933 	}
934 	revarp_ifidx = 0;
935 	if (!revarp_finished)
936 		return ENETUNREACH;
937 
938 	memcpy(serv_in, &revarp_srvip, sizeof(*serv_in));
939 	memcpy(clnt_in, &revarp_myip, sizeof(*clnt_in));
940 	return 0;
941 }
942 
943 /* For compatibility: only saves interface address. */
944 int
945 revarpwhoami(struct in_addr *in, struct ifnet *ifp)
946 {
947 	struct in_addr server;
948 	return (revarpwhoarewe(ifp, &server, in));
949 }
950 #endif /* NFSCLIENT */
951