xref: /openbsd-src/sys/netinet/if_ether.c (revision 24bb5fcea3ed904bc467217bdaadb5dfc618d5bf)
1 /*	$OpenBSD: if_ether.c,v 1.248 2021/04/28 21:21:44 bluhm Exp $	*/
2 /*	$NetBSD: if_ether.c,v 1.31 1996/05/11 12:59:58 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)if_ether.c	8.1 (Berkeley) 6/10/93
33  */
34 
35 /*
36  * Ethernet address resolution protocol.
37  * TODO:
38  *	add "inuse/lock" bit (or ref. count) along with valid bit
39  */
40 
41 #include "carp.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/socket.h>
47 #include <sys/timeout.h>
48 #include <sys/kernel.h>
49 #include <sys/syslog.h>
50 #include <sys/queue.h>
51 #include <sys/pool.h>
52 
53 #include <net/if.h>
54 #include <net/if_var.h>
55 #include <net/if_dl.h>
56 #include <net/route.h>
57 #include <net/if_types.h>
58 #include <net/netisr.h>
59 
60 #include <netinet/in.h>
61 #include <netinet/in_var.h>
62 #include <netinet/if_ether.h>
63 #include <netinet/ip_var.h>
64 #if NCARP > 0
65 #include <netinet/ip_carp.h>
66 #endif
67 
68 /*
69  *  Locks used to protect struct members in this file:
70  *	a	atomic operations
71  *	I	immutable after creation
72  *	K	kernel lock
73  *	m	arp mutex, needed when net lock is shared
74  *	N	net lock
75  */
76 
77 struct llinfo_arp {
78 	LIST_ENTRY(llinfo_arp)	 la_list;	/* [mN] global arp_list */
79 	struct rtentry		*la_rt;		/* [I] backpointer to rtentry */
80 	struct mbuf_queue	 la_mq;		/* packet hold queue */
81 	time_t			 la_refreshed;	/* when was refresh sent */
82 	int			 la_asked;	/* number of queries sent */
83 };
84 #define LA_HOLD_QUEUE 10
85 #define LA_HOLD_TOTAL 100
86 
87 /* timer values */
88 int 	arpt_prune = (5 * 60);	/* [I] walk list every 5 minutes */
89 int 	arpt_keep = (20 * 60);	/* [a] once resolved, cache for 20 minutes */
90 int 	arpt_down = 20;	/* [a] once declared down, don't send for 20 secs */
91 
92 struct mbuf *arppullup(struct mbuf *m);
93 void arpinvalidate(struct rtentry *);
94 void arptfree(struct rtentry *);
95 void arptimer(void *);
96 struct rtentry *arplookup(struct in_addr *, int, int, unsigned int);
97 void in_arpinput(struct ifnet *, struct mbuf *);
98 void in_revarpinput(struct ifnet *, struct mbuf *);
99 int arpcache(struct ifnet *, struct ether_arp *, struct rtentry *);
100 void arpreply(struct ifnet *, struct mbuf *, struct in_addr *, uint8_t *,
101     unsigned int);
102 
103 struct niqueue arpinq = NIQUEUE_INITIALIZER(50, NETISR_ARP);
104 struct mutex arp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
105 
106 LIST_HEAD(, llinfo_arp) arp_list; /* [mN] list of all llinfo_arp structures */
107 struct	pool arp_pool;		/* [I] pool for llinfo_arp structures */
108 int	arp_maxtries = 5;	/* [I] arp requests before set to rejected */
109 int	la_hold_total;		/* [a] packets currently in the arp queue */
110 
111 #ifdef NFSCLIENT
112 /* revarp state */
113 struct in_addr revarp_myip, revarp_srvip;
114 int revarp_finished;
115 unsigned int revarp_ifidx;
116 #endif /* NFSCLIENT */
117 
118 /*
119  * Timeout routine.  Age arp_tab entries periodically.
120  */
121 /* ARGSUSED */
122 void
123 arptimer(void *arg)
124 {
125 	struct timeout *to = arg;
126 	struct llinfo_arp *la, *nla;
127 
128 	NET_LOCK();
129 	timeout_add_sec(to, arpt_prune);
130 	/* Net lock is exclusive, no arp mutex needed for arp_list here. */
131 	LIST_FOREACH_SAFE(la, &arp_list, la_list, nla) {
132 		struct rtentry *rt = la->la_rt;
133 
134 		if (rt->rt_expire && rt->rt_expire < getuptime())
135 			arptfree(rt); /* timer has expired; clear */
136 	}
137 	NET_UNLOCK();
138 }
139 
140 void
141 arpinit(void)
142 {
143 	static struct timeout arptimer_to;
144 
145 	pool_init(&arp_pool, sizeof(struct llinfo_arp), 0,
146 	    IPL_SOFTNET, 0, "arp", NULL);
147 
148 	timeout_set_proc(&arptimer_to, arptimer, &arptimer_to);
149 	timeout_add_sec(&arptimer_to, arpt_prune);
150 }
151 
152 void
153 arp_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
154 {
155 	struct sockaddr *gate = rt->rt_gateway;
156 	struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
157 
158 	NET_ASSERT_LOCKED();
159 
160 	if (ISSET(rt->rt_flags,
161 	    RTF_GATEWAY|RTF_BROADCAST|RTF_MULTICAST|RTF_MPLS))
162 		return;
163 
164 	switch (req) {
165 
166 	case RTM_ADD:
167 		if (rt->rt_flags & RTF_CLONING) {
168 			rt->rt_expire = 0;
169 			break;
170 		}
171 		if ((rt->rt_flags & RTF_LOCAL) && !la)
172 			rt->rt_expire = 0;
173 		/*
174 		 * Announce a new entry if requested or warn the user
175 		 * if another station has this IP address.
176 		 */
177 		if (rt->rt_flags & (RTF_ANNOUNCE|RTF_LOCAL))
178 			arprequest(ifp,
179 			    &satosin(rt_key(rt))->sin_addr.s_addr,
180 			    &satosin(rt_key(rt))->sin_addr.s_addr,
181 			    (u_char *)LLADDR(satosdl(gate)));
182 		/*FALLTHROUGH*/
183 	case RTM_RESOLVE:
184 		if (gate->sa_family != AF_LINK ||
185 		    gate->sa_len < sizeof(struct sockaddr_dl)) {
186 			log(LOG_DEBUG, "%s: bad gateway value: %s\n", __func__,
187 			    ifp->if_xname);
188 			break;
189 		}
190 		satosdl(gate)->sdl_type = ifp->if_type;
191 		satosdl(gate)->sdl_index = ifp->if_index;
192 		if (la != NULL)
193 			break; /* This happens on a route change */
194 		/*
195 		 * Case 2:  This route may come from cloning, or a manual route
196 		 * add with a LL address.
197 		 */
198 		la = pool_get(&arp_pool, PR_NOWAIT | PR_ZERO);
199 		rt->rt_llinfo = (caddr_t)la;
200 		if (la == NULL) {
201 			log(LOG_DEBUG, "%s: pool get failed\n", __func__);
202 			break;
203 		}
204 
205 		mq_init(&la->la_mq, LA_HOLD_QUEUE, IPL_SOFTNET);
206 		la->la_rt = rt;
207 		rt->rt_flags |= RTF_LLINFO;
208 		if ((rt->rt_flags & RTF_LOCAL) == 0)
209 			rt->rt_expire = getuptime();
210 		mtx_enter(&arp_mtx);
211 		LIST_INSERT_HEAD(&arp_list, la, la_list);
212 		mtx_leave(&arp_mtx);
213 		break;
214 
215 	case RTM_DELETE:
216 		if (la == NULL)
217 			break;
218 		mtx_enter(&arp_mtx);
219 		LIST_REMOVE(la, la_list);
220 		mtx_leave(&arp_mtx);
221 		rt->rt_llinfo = NULL;
222 		rt->rt_flags &= ~RTF_LLINFO;
223 		atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
224 		pool_put(&arp_pool, la);
225 		break;
226 
227 	case RTM_INVALIDATE:
228 		if (la == NULL)
229 			break;
230 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
231 			arpinvalidate(rt);
232 		break;
233 	}
234 }
235 
236 /*
237  * Broadcast an ARP request. Caller specifies:
238  *	- arp header source ip address
239  *	- arp header target ip address
240  *	- arp header source ethernet address
241  */
242 void
243 arprequest(struct ifnet *ifp, u_int32_t *sip, u_int32_t *tip, u_int8_t *enaddr)
244 {
245 	struct mbuf *m;
246 	struct ether_header *eh;
247 	struct ether_arp *ea;
248 	struct sockaddr sa;
249 
250 	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
251 		return;
252 	m->m_len = sizeof(*ea);
253 	m->m_pkthdr.len = sizeof(*ea);
254 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
255 	m->m_pkthdr.pf.prio = ifp->if_llprio;
256 	m_align(m, sizeof(*ea));
257 	ea = mtod(m, struct ether_arp *);
258 	eh = (struct ether_header *)sa.sa_data;
259 	memset(ea, 0, sizeof(*ea));
260 	memcpy(eh->ether_dhost, etherbroadcastaddr, sizeof(eh->ether_dhost));
261 	eh->ether_type = htons(ETHERTYPE_ARP);	/* if_output will not swap */
262 	ea->arp_hrd = htons(ARPHRD_ETHER);
263 	ea->arp_pro = htons(ETHERTYPE_IP);
264 	ea->arp_hln = sizeof(ea->arp_sha);	/* hardware address length */
265 	ea->arp_pln = sizeof(ea->arp_spa);	/* protocol address length */
266 	ea->arp_op = htons(ARPOP_REQUEST);
267 	memcpy(eh->ether_shost, enaddr, sizeof(eh->ether_shost));
268 	memcpy(ea->arp_sha, enaddr, sizeof(ea->arp_sha));
269 	memcpy(ea->arp_spa, sip, sizeof(ea->arp_spa));
270 	memcpy(ea->arp_tpa, tip, sizeof(ea->arp_tpa));
271 	sa.sa_family = pseudo_AF_HDRCMPLT;
272 	sa.sa_len = sizeof(sa);
273 	m->m_flags |= M_BCAST;
274 	ifp->if_output(ifp, m, &sa, NULL);
275 }
276 
277 void
278 arpreply(struct ifnet *ifp, struct mbuf *m, struct in_addr *sip, uint8_t *eaddr,
279     unsigned int rdomain)
280 {
281 	struct ether_header *eh;
282 	struct ether_arp *ea;
283 	struct sockaddr sa;
284 
285 	m_resethdr(m);
286 	m->m_pkthdr.ph_rtableid = rdomain;
287 
288 	ea = mtod(m, struct ether_arp *);
289 	ea->arp_op = htons(ARPOP_REPLY);
290 	ea->arp_pro = htons(ETHERTYPE_IP); /* let's be sure! */
291 
292 	/* We're replying to a request. */
293 	memcpy(ea->arp_tha, ea->arp_sha, sizeof(ea->arp_sha));
294 	memcpy(ea->arp_tpa, ea->arp_spa, sizeof(ea->arp_spa));
295 
296 	memcpy(ea->arp_sha, eaddr, sizeof(ea->arp_sha));
297 	memcpy(ea->arp_spa, sip, sizeof(ea->arp_spa));
298 
299 	eh = (struct ether_header *)sa.sa_data;
300 	memcpy(eh->ether_dhost, ea->arp_tha, sizeof(eh->ether_dhost));
301 	memcpy(eh->ether_shost, eaddr, sizeof(eh->ether_shost));
302 	eh->ether_type = htons(ETHERTYPE_ARP);
303 	sa.sa_family = pseudo_AF_HDRCMPLT;
304 	sa.sa_len = sizeof(sa);
305 	ifp->if_output(ifp, m, &sa, NULL);
306 }
307 
308 /*
309  * Resolve an IP address into an ethernet address.  If success,
310  * desten is filled in.  If there is no entry in arptab,
311  * set one up and broadcast a request for the IP address.
312  * Hold onto this mbuf and resend it once the address
313  * is finally resolved.  A return value of 0 indicates
314  * that desten has been filled in and the packet should be sent
315  * normally; A return value of EAGAIN indicates that the packet
316  * has been taken over here, either now or for later transmission.
317  * Any other return value indicates an error.
318  */
319 int
320 arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
321     struct sockaddr *dst, u_char *desten)
322 {
323 	struct arpcom *ac = (struct arpcom *)ifp;
324 	struct llinfo_arp *la;
325 	struct sockaddr_dl *sdl;
326 	struct rtentry *rt = NULL;
327 	char addr[INET_ADDRSTRLEN];
328 
329 	if (m->m_flags & M_BCAST) {	/* broadcast */
330 		memcpy(desten, etherbroadcastaddr, sizeof(etherbroadcastaddr));
331 		return (0);
332 	}
333 	if (m->m_flags & M_MCAST) {	/* multicast */
334 		ETHER_MAP_IP_MULTICAST(&satosin(dst)->sin_addr, desten);
335 		return (0);
336 	}
337 
338 	rt = rt_getll(rt0);
339 
340 	if (ISSET(rt->rt_flags, RTF_REJECT) &&
341 	    (rt->rt_expire == 0 || rt->rt_expire > getuptime() )) {
342 		m_freem(m);
343 		return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
344 	}
345 
346 	if (!ISSET(rt->rt_flags, RTF_LLINFO)) {
347 		log(LOG_DEBUG, "%s: %s: route contains no arp information\n",
348 		    __func__, inet_ntop(AF_INET, &satosin(rt_key(rt))->sin_addr,
349 		    addr, sizeof(addr)));
350 		m_freem(m);
351 		return (EINVAL);
352 	}
353 
354 	sdl = satosdl(rt->rt_gateway);
355 	if (sdl->sdl_alen > 0 && sdl->sdl_alen != ETHER_ADDR_LEN) {
356 		log(LOG_DEBUG, "%s: %s: incorrect arp information\n", __func__,
357 		    inet_ntop(AF_INET, &satosin(dst)->sin_addr,
358 			addr, sizeof(addr)));
359 		goto bad;
360 	}
361 
362 	la = (struct llinfo_arp *)rt->rt_llinfo;
363 	KASSERT(la != NULL);
364 
365 	/*
366 	 * Check the address family and length is valid, the address
367 	 * is resolved; otherwise, try to resolve.
368 	 */
369 	if ((rt->rt_expire == 0 || rt->rt_expire > getuptime()) &&
370 	    sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
371 		memcpy(desten, LLADDR(sdl), sdl->sdl_alen);
372 
373 		/* refresh ARP entry when timeout gets close */
374 		if (rt->rt_expire != 0 &&
375 		    rt->rt_expire - arpt_keep / 8 < getuptime() &&
376 		    la->la_refreshed + 30 < getuptime()) {
377 			la->la_refreshed = getuptime();
378 			arprequest(ifp,
379 			    &satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr,
380 			    &satosin(dst)->sin_addr.s_addr,
381 			    ac->ac_enaddr);
382 		}
383 		return (0);
384 	}
385 
386 	if (ifp->if_flags & (IFF_NOARP|IFF_STATICARP))
387 		goto bad;
388 
389 	/*
390 	 * There is an arptab entry, but no ethernet address
391 	 * response yet. Insert mbuf in hold queue if below limit
392 	 * if above the limit free the queue without queuing the new packet.
393 	 */
394 	if (atomic_inc_int_nv(&la_hold_total) <= LA_HOLD_TOTAL) {
395 		if (mq_push(&la->la_mq, m) != 0)
396 			atomic_dec_int(&la_hold_total);
397 	} else {
398 		atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq) + 1);
399 		m_freem(m);
400 	}
401 
402 	/*
403 	 * Re-send the ARP request when appropriate.
404 	 */
405 #ifdef	DIAGNOSTIC
406 	if (rt->rt_expire == 0) {
407 		/* This should never happen. (Should it? -gwr) */
408 		printf("%s: unresolved and rt_expire == 0\n", __func__);
409 		/* Set expiration time to now (expired). */
410 		rt->rt_expire = getuptime();
411 	}
412 #endif
413 	if (rt->rt_expire) {
414 		rt->rt_flags &= ~RTF_REJECT;
415 		if (la->la_asked == 0 || rt->rt_expire != getuptime()) {
416 			rt->rt_expire = getuptime();
417 			if (la->la_asked++ < arp_maxtries)
418 				arprequest(ifp,
419 				    &satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr,
420 				    &satosin(dst)->sin_addr.s_addr,
421 				    ac->ac_enaddr);
422 			else {
423 				rt->rt_flags |= RTF_REJECT;
424 				rt->rt_expire += arpt_down;
425 				la->la_asked = 0;
426 				la->la_refreshed = 0;
427 				atomic_sub_int(&la_hold_total,
428 				    mq_purge(&la->la_mq));
429 			}
430 		}
431 	}
432 
433 	return (EAGAIN);
434 
435 bad:
436 	m_freem(m);
437 	return (EINVAL);
438 }
439 
440 struct mbuf *
441 arppullup(struct mbuf *m)
442 {
443 	struct arphdr *ar;
444 	int len;
445 
446 #ifdef DIAGNOSTIC
447 	if ((m->m_flags & M_PKTHDR) == 0)
448 		panic("arp without packet header");
449 #endif
450 
451 	len = sizeof(struct arphdr);
452 	if (m->m_len < len && (m = m_pullup(m, len)) == NULL)
453 		return NULL;
454 
455 	ar = mtod(m, struct arphdr *);
456 	if (ntohs(ar->ar_hrd) != ARPHRD_ETHER ||
457 	    ntohs(ar->ar_pro) != ETHERTYPE_IP ||
458 	    ar->ar_hln != ETHER_ADDR_LEN ||
459 	    ar->ar_pln != sizeof(struct in_addr)) {
460 		m_freem(m);
461 		return NULL;
462 	}
463 
464 	len += 2 * (ar->ar_hln + ar->ar_pln);
465 	if (m->m_len < len && (m = m_pullup(m, len)) == NULL)
466 		return NULL;
467 
468 	return m;
469 }
470 
471 /*
472  * Common length and type checks are done here,
473  * then the protocol-specific routine is called.
474  */
475 void
476 arpinput(struct ifnet *ifp, struct mbuf *m)
477 {
478 	if ((m = arppullup(m)) == NULL)
479 		return;
480 	niq_enqueue(&arpinq, m);
481 }
482 
483 void
484 arpintr(void)
485 {
486 	struct mbuf_list ml;
487 	struct mbuf *m;
488 	struct ifnet *ifp;
489 
490 	niq_delist(&arpinq, &ml);
491 
492 	while ((m = ml_dequeue(&ml)) != NULL) {
493 		ifp = if_get(m->m_pkthdr.ph_ifidx);
494 
495 		if (ifp != NULL)
496 			in_arpinput(ifp, m);
497 		else
498 			m_freem(m);
499 
500 		if_put(ifp);
501 	}
502 }
503 
504 /*
505  * ARP for Internet protocols on Ethernet, RFC 826.
506  * In addition, a sanity check is performed on the sender
507  * protocol address, to catch impersonators.
508  */
509 void
510 in_arpinput(struct ifnet *ifp, struct mbuf *m)
511 {
512 	struct ether_arp *ea;
513 	struct rtentry *rt = NULL;
514 	struct sockaddr_in sin;
515 	struct in_addr isaddr, itaddr;
516 	char addr[INET_ADDRSTRLEN];
517 	int op, target = 0;
518 	unsigned int rdomain;
519 
520 	rdomain = rtable_l2(m->m_pkthdr.ph_rtableid);
521 
522 	ea = mtod(m, struct ether_arp *);
523 	op = ntohs(ea->arp_op);
524 	if ((op != ARPOP_REQUEST) && (op != ARPOP_REPLY))
525 		goto out;
526 
527 	memcpy(&itaddr, ea->arp_tpa, sizeof(itaddr));
528 	memcpy(&isaddr, ea->arp_spa, sizeof(isaddr));
529 	memset(&sin, 0, sizeof(sin));
530 	sin.sin_len = sizeof(sin);
531 	sin.sin_family = AF_INET;
532 
533 	if (ETHER_IS_MULTICAST(ea->arp_sha) &&
534 	    ETHER_IS_BROADCAST(ea->arp_sha)) {
535 		inet_ntop(AF_INET, &isaddr, addr, sizeof(addr));
536 		log(LOG_ERR, "arp: ether address is broadcast for IP address "
537 		    "%s!\n", addr);
538 		goto out;
539 	}
540 
541 	if (!memcmp(ea->arp_sha, LLADDR(ifp->if_sadl), sizeof(ea->arp_sha)))
542 		goto out;	/* it's from me, ignore it. */
543 
544 	/* Check target against our interface addresses. */
545 	sin.sin_addr = itaddr;
546 	rt = rtalloc(sintosa(&sin), 0, rdomain);
547 	if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) &&
548 	    rt->rt_ifidx == ifp->if_index)
549 		target = 1;
550 	rtfree(rt);
551 	rt = NULL;
552 
553 #if NCARP > 0
554 	if (target && op == ARPOP_REQUEST && ifp->if_type == IFT_CARP &&
555 	    !carp_iamatch(ifp))
556 		goto out;
557 #endif
558 
559 	/* Do we have an ARP cache for the sender? Create if we are target. */
560 	rt = arplookup(&isaddr, target, 0, rdomain);
561 
562 	/* Check sender against our interface addresses. */
563 	if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) &&
564 	    rt->rt_ifidx == ifp->if_index && isaddr.s_addr != INADDR_ANY) {
565 		inet_ntop(AF_INET, &isaddr, addr, sizeof(addr));
566 		log(LOG_ERR, "duplicate IP address %s sent from ethernet "
567 		    "address %s\n", addr, ether_sprintf(ea->arp_sha));
568 		itaddr = isaddr;
569 	} else if (rt != NULL) {
570 		int error;
571 
572 		KERNEL_LOCK();
573 		error = arpcache(ifp, ea, rt);
574 		KERNEL_UNLOCK();
575 		if (error)
576 			goto out;
577 	}
578 
579 	if (op == ARPOP_REQUEST) {
580 		uint8_t *eaddr;
581 
582 		if (target) {
583 			/* We already have all info for the reply */
584 			eaddr = LLADDR(ifp->if_sadl);
585 		} else {
586 			rtfree(rt);
587 			rt = arplookup(&itaddr, 0, SIN_PROXY, rdomain);
588 			/*
589 			 * Protect from possible duplicates, only owner
590 			 * should respond
591 			 */
592 			if ((rt == NULL) || (rt->rt_ifidx != ifp->if_index))
593 				goto out;
594 			eaddr = LLADDR(satosdl(rt->rt_gateway));
595 		}
596 		arpreply(ifp, m, &itaddr, eaddr, rdomain);
597 		rtfree(rt);
598 		return;
599 	}
600 
601 out:
602 	rtfree(rt);
603 	m_freem(m);
604 }
605 
606 int
607 arpcache(struct ifnet *ifp, struct ether_arp *ea, struct rtentry *rt)
608 {
609 	struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
610 	struct sockaddr_dl *sdl = satosdl(rt->rt_gateway);
611 	struct in_addr *spa = (struct in_addr *)ea->arp_spa;
612 	char addr[INET_ADDRSTRLEN];
613 	struct ifnet *rifp;
614 	struct mbuf_list ml;
615 	struct mbuf *m;
616 	unsigned int len;
617 	int changed = 0;
618 
619 	KERNEL_ASSERT_LOCKED();
620 	KASSERT(sdl != NULL);
621 
622 	/*
623 	 * This can happen if the entry has been deleted by another CPU
624 	 * after we found it.
625 	 */
626 	if (la == NULL)
627 		return (0);
628 
629 	if (sdl->sdl_alen > 0) {
630 		if (memcmp(ea->arp_sha, LLADDR(sdl), sdl->sdl_alen)) {
631 			if (ISSET(rt->rt_flags, RTF_PERMANENT_ARP|RTF_LOCAL)) {
632 				inet_ntop(AF_INET, spa, addr, sizeof(addr));
633 				log(LOG_WARNING, "arp: attempt to overwrite "
634 				   "permanent entry for %s by %s on %s\n", addr,
635 				   ether_sprintf(ea->arp_sha), ifp->if_xname);
636 				return (-1);
637 			} else if (rt->rt_ifidx != ifp->if_index) {
638 #if NCARP > 0
639 				if (ifp->if_type != IFT_CARP)
640 #endif
641 				{
642 					rifp = if_get(rt->rt_ifidx);
643 					if (rifp == NULL)
644 						return (-1);
645 					inet_ntop(AF_INET, spa, addr,
646 					    sizeof(addr));
647 					log(LOG_WARNING, "arp: attempt to "
648 					    "overwrite entry for %s on %s by "
649 					    "%s on %s\n", addr, rifp->if_xname,
650 					    ether_sprintf(ea->arp_sha),
651 					    ifp->if_xname);
652 					if_put(rifp);
653 				}
654 				return (-1);
655 			} else {
656 				inet_ntop(AF_INET, spa, addr, sizeof(addr));
657 				log(LOG_INFO, "arp info overwritten for %s by "
658 				    "%s on %s\n", addr,
659 				    ether_sprintf(ea->arp_sha), ifp->if_xname);
660 				rt->rt_expire = 1;/* no longer static */
661 			}
662 			changed = 1;
663 		}
664 	} else if (!if_isconnected(ifp, rt->rt_ifidx)) {
665 		rifp = if_get(rt->rt_ifidx);
666 		if (rifp == NULL)
667 			return (-1);
668 		inet_ntop(AF_INET, spa, addr, sizeof(addr));
669 		log(LOG_WARNING, "arp: attempt to add entry for %s on %s by %s"
670 		    " on %s\n", addr, rifp->if_xname,
671 		    ether_sprintf(ea->arp_sha), ifp->if_xname);
672 		if_put(rifp);
673 		return (-1);
674 	}
675 	sdl->sdl_alen = sizeof(ea->arp_sha);
676 	memcpy(LLADDR(sdl), ea->arp_sha, sizeof(ea->arp_sha));
677 	if (rt->rt_expire)
678 		rt->rt_expire = getuptime() + arpt_keep;
679 	rt->rt_flags &= ~RTF_REJECT;
680 
681 	/* Notify userland that an ARP resolution has been done. */
682 	if (la->la_asked || changed) {
683 		rtm_send(rt, RTM_RESOLVE, 0, ifp->if_rdomain);
684 	}
685 
686 	la->la_asked = 0;
687 	la->la_refreshed = 0;
688 	mq_delist(&la->la_mq, &ml);
689 	len = ml_len(&ml);
690 	while ((m = ml_dequeue(&ml)) != NULL)
691 		ifp->if_output(ifp, m, rt_key(rt), rt);
692 	/* XXXSMP we discard if other CPU enqueues */
693 	if (mq_len(&la->la_mq) > 0) {
694 		/* mbuf is back in queue. Discard. */
695 		atomic_sub_int(&la_hold_total, len + mq_purge(&la->la_mq));
696 	} else
697 		atomic_sub_int(&la_hold_total, len);
698 
699 	return (0);
700 }
701 
702 void
703 arpinvalidate(struct rtentry *rt)
704 {
705 	struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
706 	struct sockaddr_dl *sdl = satosdl(rt->rt_gateway);
707 
708 	atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq));
709 	sdl->sdl_alen = 0;
710 	la->la_asked = 0;
711 }
712 
713 /*
714  * Free an arp entry.
715  */
716 void
717 arptfree(struct rtentry *rt)
718 {
719 	struct ifnet *ifp;
720 
721 	KASSERT(!ISSET(rt->rt_flags, RTF_LOCAL));
722 	arpinvalidate(rt);
723 
724 	ifp = if_get(rt->rt_ifidx);
725 	KASSERT(ifp != NULL);
726 	if (!ISSET(rt->rt_flags, RTF_STATIC|RTF_CACHED))
727 		rtdeletemsg(rt, ifp, ifp->if_rdomain);
728 	if_put(ifp);
729 }
730 
731 /*
732  * Lookup or enter a new address in arptab.
733  */
734 struct rtentry *
735 arplookup(struct in_addr *inp, int create, int proxy, u_int tableid)
736 {
737 	struct rtentry *rt;
738 	struct sockaddr_inarp sin;
739 	int flags;
740 
741 	memset(&sin, 0, sizeof(sin));
742 	sin.sin_len = sizeof(sin);
743 	sin.sin_family = AF_INET;
744 	sin.sin_addr.s_addr = inp->s_addr;
745 	sin.sin_other = proxy ? SIN_PROXY : 0;
746 	flags = (create) ? RT_RESOLVE : 0;
747 
748 	rt = rtalloc((struct sockaddr *)&sin, flags, tableid);
749 	if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_GATEWAY) ||
750 	    !ISSET(rt->rt_flags, RTF_LLINFO) ||
751 	    rt->rt_gateway->sa_family != AF_LINK) {
752 		rtfree(rt);
753 		return (NULL);
754 	}
755 
756 	if (proxy && !ISSET(rt->rt_flags, RTF_ANNOUNCE)) {
757 		while ((rt = rtable_iterate(rt)) != NULL) {
758 			if (ISSET(rt->rt_flags, RTF_ANNOUNCE)) {
759 				break;
760 			}
761 		}
762 	}
763 
764 	return (rt);
765 }
766 
767 /*
768  * Check whether we do proxy ARP for this address and we point to ourselves.
769  */
770 int
771 arpproxy(struct in_addr in, unsigned int rtableid)
772 {
773 	struct sockaddr_dl *sdl;
774 	struct rtentry *rt;
775 	struct ifnet *ifp;
776 	int found = 0;
777 
778 	rt = arplookup(&in, 0, SIN_PROXY, rtableid);
779 	if (!rtisvalid(rt)) {
780 		rtfree(rt);
781 		return (0);
782 	}
783 
784 	/* Check that arp information are correct. */
785 	sdl = satosdl(rt->rt_gateway);
786 	if (sdl->sdl_alen != ETHER_ADDR_LEN) {
787 		rtfree(rt);
788 		return (0);
789 	}
790 
791 	ifp = if_get(rt->rt_ifidx);
792 	if (ifp == NULL) {
793 		rtfree(rt);
794 		return (0);
795 	}
796 
797 	if (!memcmp(LLADDR(sdl), LLADDR(ifp->if_sadl), sdl->sdl_alen))
798 		found = 1;
799 
800 	if_put(ifp);
801 	rtfree(rt);
802 	return (found);
803 }
804 
805 /*
806  * Called from Ethernet interrupt handlers
807  * when ether packet type ETHERTYPE_REVARP
808  * is received.  Common length and type checks are done here,
809  * then the protocol-specific routine is called.
810  */
811 void
812 revarpinput(struct ifnet *ifp, struct mbuf *m)
813 {
814 	if ((m = arppullup(m)) == NULL)
815 		return;
816 	in_revarpinput(ifp, m);
817 }
818 
819 /*
820  * RARP for Internet protocols on Ethernet.
821  * Algorithm is that given in RFC 903.
822  * We are only using for bootstrap purposes to get an ip address for one of
823  * our interfaces.  Thus we support no user-interface.
824  *
825  * Since the contents of the RARP reply are specific to the interface that
826  * sent the request, this code must ensure that they are properly associated.
827  *
828  * Note: also supports ARP via RARP packets, per the RFC.
829  */
830 void
831 in_revarpinput(struct ifnet *ifp, struct mbuf *m)
832 {
833 	struct ether_arp *ar;
834 	int op;
835 
836 	ar = mtod(m, struct ether_arp *);
837 	op = ntohs(ar->arp_op);
838 	switch (op) {
839 	case ARPOP_REQUEST:
840 	case ARPOP_REPLY:	/* per RFC */
841 		niq_enqueue(&arpinq, m);
842 		return;
843 	case ARPOP_REVREPLY:
844 		break;
845 	case ARPOP_REVREQUEST:	/* handled by rarpd(8) */
846 	default:
847 		goto out;
848 	}
849 #ifdef NFSCLIENT
850 	if (revarp_ifidx == 0)
851 		goto out;
852 	if (revarp_ifidx != m->m_pkthdr.ph_ifidx) /* !same interface */
853 		goto out;
854 	if (revarp_finished)
855 		goto wake;
856 	if (memcmp(ar->arp_tha, LLADDR(ifp->if_sadl), sizeof(ar->arp_tha)))
857 		goto out;
858 	memcpy(&revarp_srvip, ar->arp_spa, sizeof(revarp_srvip));
859 	memcpy(&revarp_myip, ar->arp_tpa, sizeof(revarp_myip));
860 	revarp_finished = 1;
861 wake:	/* Do wakeup every time in case it was missed. */
862 	wakeup((caddr_t)&revarp_myip);
863 #endif /* NFSCLIENT */
864 
865 out:
866 	m_freem(m);
867 }
868 
869 /*
870  * Send a RARP request for the ip address of the specified interface.
871  * The request should be RFC 903-compliant.
872  */
873 void
874 revarprequest(struct ifnet *ifp)
875 {
876 	struct sockaddr sa;
877 	struct mbuf *m;
878 	struct ether_header *eh;
879 	struct ether_arp *ea;
880 	struct arpcom *ac = (struct arpcom *)ifp;
881 
882 	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
883 		return;
884 	m->m_len = sizeof(*ea);
885 	m->m_pkthdr.len = sizeof(*ea);
886 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
887 	m->m_pkthdr.pf.prio = ifp->if_llprio;
888 	m_align(m, sizeof(*ea));
889 	ea = mtod(m, struct ether_arp *);
890 	eh = (struct ether_header *)sa.sa_data;
891 	memset(ea, 0, sizeof(*ea));
892 	memcpy(eh->ether_dhost, etherbroadcastaddr, sizeof(eh->ether_dhost));
893 	eh->ether_type = htons(ETHERTYPE_REVARP);
894 	ea->arp_hrd = htons(ARPHRD_ETHER);
895 	ea->arp_pro = htons(ETHERTYPE_IP);
896 	ea->arp_hln = sizeof(ea->arp_sha);	/* hardware address length */
897 	ea->arp_pln = sizeof(ea->arp_spa);	/* protocol address length */
898 	ea->arp_op = htons(ARPOP_REVREQUEST);
899 	memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(ea->arp_tha));
900 	memcpy(ea->arp_sha, ac->ac_enaddr, sizeof(ea->arp_sha));
901 	memcpy(ea->arp_tha, ac->ac_enaddr, sizeof(ea->arp_tha));
902 	sa.sa_family = pseudo_AF_HDRCMPLT;
903 	sa.sa_len = sizeof(sa);
904 	m->m_flags |= M_BCAST;
905 	ifp->if_output(ifp, m, &sa, NULL);
906 }
907 
908 #ifdef NFSCLIENT
909 /*
910  * RARP for the ip address of the specified interface, but also
911  * save the ip address of the server that sent the answer.
912  * Timeout if no response is received.
913  */
914 int
915 revarpwhoarewe(struct ifnet *ifp, struct in_addr *serv_in,
916     struct in_addr *clnt_in)
917 {
918 	int result, count = 20;
919 
920 	if (revarp_finished)
921 		return EIO;
922 
923 	revarp_ifidx = ifp->if_index;
924 	while (count--) {
925 		revarprequest(ifp);
926 		result = tsleep_nsec(&revarp_myip, PSOCK, "revarp",
927 		    MSEC_TO_NSEC(500));
928 		if (result != EWOULDBLOCK)
929 			break;
930 	}
931 	revarp_ifidx = 0;
932 	if (!revarp_finished)
933 		return ENETUNREACH;
934 
935 	memcpy(serv_in, &revarp_srvip, sizeof(*serv_in));
936 	memcpy(clnt_in, &revarp_myip, sizeof(*clnt_in));
937 	return 0;
938 }
939 
940 /* For compatibility: only saves interface address. */
941 int
942 revarpwhoami(struct in_addr *in, struct ifnet *ifp)
943 {
944 	struct in_addr server;
945 	return (revarpwhoarewe(ifp, &server, in));
946 }
947 #endif /* NFSCLIENT */
948