xref: /netbsd-src/sys/net/nd.c (revision 9017c2c0e202f5ca190712d0a33835063905389a)
1 /*	$NetBSD: nd.c,v 1.7 2024/05/30 23:00:39 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Roy Marples.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.7 2024/05/30 23:00:39 riastradh Exp $");
32 
33 #include <sys/callout.h>
34 #include <sys/mbuf.h>
35 #include <sys/socketvar.h> /* for softnet_lock */
36 
37 #include <net/if_llatbl.h>
38 #include <net/nd.h>
39 #include <net/route.h>
40 
41 #include <netinet/in.h>
42 #include <netinet/ip6.h>
43 
44 static struct nd_domain *nd_domains[AF_MAX];
45 
46 static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */
47 
48 static void nd_set_timertick(struct llentry *, time_t);
49 static struct nd_domain *nd_find_domain(int);
50 
51 static void
nd_timer(void * arg)52 nd_timer(void *arg)
53 {
54 	struct llentry *ln = arg;
55 	struct nd_domain *nd;
56 	struct ifnet *ifp = NULL;
57 	struct psref psref;
58 	struct mbuf *m = NULL;
59 	bool send_ns = false;
60 	int16_t missed = ND_LLINFO_NOSTATE;
61 	union l3addr taddr, *daddrp = NULL;
62 
63 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
64 	LLE_WLOCK(ln);
65 
66 	if (!(ln->la_flags & LLE_LINKED))
67 		goto out;
68 	if (ln->ln_ntick > 0) {
69 		nd_set_timer(ln, ND_TIMER_TICK);
70 		goto out;
71 	}
72 
73 	nd = nd_find_domain(ln->lle_tbl->llt_af);
74 	ifp = ln->lle_tbl->llt_ifp;
75 	KASSERT(ifp != NULL);
76 	if_acquire(ifp, &psref);
77 
78 	memcpy(&taddr, &ln->r_l3addr, sizeof(taddr));
79 
80 	switch (ln->ln_state) {
81 	case ND_LLINFO_WAITDELETE:
82 		LLE_REMREF(ln);
83 		nd->nd_free(ln, 0);
84 		ln = NULL;
85 		break;
86 
87 	case ND_LLINFO_INCOMPLETE:
88 		send_ns = true;
89 		if (ln->ln_asked++ < nd->nd_mmaxtries)
90 			break;
91 
92 		if (ln->ln_hold) {
93 			struct mbuf *m0, *mnxt;
94 
95 			/*
96 			 * Assuming every packet in ln_hold
97 			 * has the same IP header.
98 			 */
99 			m = ln->ln_hold;
100 			for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) {
101 				mnxt = m0->m_nextpkt;
102 				m0->m_nextpkt = NULL;
103 				m_freem(m0);
104 			}
105 
106 			m->m_nextpkt = NULL;
107 			ln->ln_hold = NULL;
108 			ln->la_numheld = 0;
109 		}
110 
111 		KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
112 		    ln->la_numheld);
113 
114 		missed = ND_LLINFO_INCOMPLETE;
115 		ln->ln_state = ND_LLINFO_WAITDELETE;
116 		break;
117 
118 	case ND_LLINFO_REACHABLE:
119 		if (!ND_IS_LLINFO_PERMANENT(ln)) {
120 			ln->ln_state = ND_LLINFO_STALE;
121 			nd_set_timer(ln, ND_TIMER_GC);
122 		}
123 		break;
124 
125 	case ND_LLINFO_PURGE: /* FALLTHROUGH */
126 	case ND_LLINFO_STALE:
127 		if (!ND_IS_LLINFO_PERMANENT(ln)) {
128 			LLE_REMREF(ln);
129 			nd->nd_free(ln, 1);
130 			ln = NULL;
131 		}
132 		break;
133 
134 	case ND_LLINFO_DELAY:
135 		if (nd->nd_nud_enabled(ifp)) {
136 			ln->ln_asked = 1;
137 			ln->ln_state = ND_LLINFO_PROBE;
138 			send_ns = true;
139 			daddrp = &taddr;
140 		} else {
141 			ln->ln_state = ND_LLINFO_STALE;
142 			nd_set_timer(ln, ND_TIMER_GC);
143 		}
144 		break;
145 
146 	case ND_LLINFO_PROBE:
147 		send_ns = true;
148 		if (ln->ln_asked++ < nd->nd_umaxtries) {
149 			daddrp = &taddr;
150 		} else {
151 			ln->ln_state = ND_LLINFO_UNREACHABLE;
152 			ln->ln_asked = 1;
153 			missed = ND_LLINFO_PROBE;
154 			/* nd_missed() consumers can use missed to know if
155 			 * they need to send ICMP UNREACHABLE or not. */
156 		}
157 		break;
158 	case ND_LLINFO_UNREACHABLE:
159 		/*
160 		 * RFC 7048 Section 3 says in the UNREACHABLE state
161 		 * packets continue to be sent to the link-layer address and
162 		 * then backoff exponentially.
163 		 * We adjust this slightly and move to the INCOMPLETE state
164 		 * after nd_mmaxtries probes and then start backing off.
165 		 *
166 		 * This results in simpler code whilst providing a more robust
167 		 * model which doubles the time to failure over what we did
168 		 * before. We don't want to be back to the old ARP model where
169 		 * no unreachability errors are returned because very
170 		 * few applications would look at unreachability hints provided
171 		 * such as ND_LLINFO_UNREACHABLE or RTM_MISS.
172 		 */
173 		send_ns = true;
174 		if (ln->ln_asked++ < nd->nd_mmaxtries)
175 			break;
176 
177 		missed = ND_LLINFO_UNREACHABLE;
178 		ln->ln_state = ND_LLINFO_WAITDELETE;
179 		ln->la_flags &= ~LLE_VALID;
180 		break;
181 	}
182 
183 	if (send_ns) {
184 		uint8_t lladdr[255], *lladdrp;
185 		union l3addr src, *psrc;
186 
187 		if (ln->ln_state == ND_LLINFO_WAITDELETE)
188 			nd_set_timer(ln, ND_TIMER_RETRANS_BACKOFF);
189 		else
190 			nd_set_timer(ln, ND_TIMER_RETRANS);
191 		if (ln->ln_state > ND_LLINFO_INCOMPLETE &&
192 		    ln->la_flags & LLE_VALID)
193 		{
194 			KASSERT(sizeof(lladdr) >= ifp->if_addrlen);
195 			memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
196 			lladdrp = lladdr;
197 		} else
198 			lladdrp = NULL;
199 		psrc = nd->nd_holdsrc(ln, &src);
200 		LLE_FREE_LOCKED(ln);
201 		ln = NULL;
202 		nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc);
203 	}
204 
205 out:
206 	if (ln != NULL)
207 		LLE_FREE_LOCKED(ln);
208 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
209 
210 	if (missed != ND_LLINFO_NOSTATE)
211 		nd->nd_missed(ifp, &taddr, missed, m);
212 	if (ifp != NULL)
213 		if_release(ifp, &psref);
214 }
215 
216 static void
nd_set_timertick(struct llentry * ln,time_t xtick)217 nd_set_timertick(struct llentry *ln, time_t xtick)
218 {
219 
220 	CTASSERT(sizeof(time_t) > sizeof(int));
221 	KASSERT(xtick >= 0);
222 
223 	/*
224 	 * We have to take care of a reference leak which occurs if
225 	 * callout_reset overwrites a pending callout schedule.  Unfortunately
226 	 * we don't have a mean to know the overwrite, so we need to know it
227 	 * using callout_stop.  We need to call callout_pending first to exclude
228 	 * the case that the callout has never been scheduled.
229 	 */
230 	if (callout_pending(&ln->la_timer)) {
231 		bool expired;
232 
233 		expired = callout_stop(&ln->la_timer);
234 		if (!expired)
235 			LLE_REMREF(ln);
236 	}
237 
238 	ln->ln_expire = time_uptime + xtick / hz;
239 	LLE_ADDREF(ln);
240 	if (xtick > INT_MAX) {
241 		ln->ln_ntick = xtick - INT_MAX;
242 		xtick = INT_MAX;
243 	} else {
244 		ln->ln_ntick = 0;
245 	}
246 	callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln);
247 }
248 
249 void
nd_set_timer(struct llentry * ln,int type)250 nd_set_timer(struct llentry *ln, int type)
251 {
252 	time_t xtick;
253 	struct ifnet *ifp;
254 	struct nd_domain *nd;
255 
256 	LLE_WLOCK_ASSERT(ln);
257 
258 	ifp = ln->lle_tbl->llt_ifp;
259 	nd = nd_find_domain(ln->lle_tbl->llt_af);
260 
261 	switch (type) {
262 	case ND_TIMER_IMMEDIATE:
263 		xtick = 0;
264 		break;
265 	case ND_TIMER_TICK:
266 		xtick = ln->ln_ntick;
267 		break;
268 	case ND_TIMER_RETRANS:
269 		xtick = nd->nd_retrans(ifp) * hz / 1000;
270 		break;
271 	case ND_TIMER_RETRANS_BACKOFF:
272 	{
273 		unsigned int retrans = nd->nd_retrans(ifp);
274 		unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries;
275 
276 		xtick = retrans;
277 		while (attempts-- != 0) {
278 			xtick *= nd->nd_retransmultiple;
279 			if (xtick > nd->nd_maxretrans || xtick < retrans) {
280 				xtick = nd->nd_maxretrans;
281 				break;
282 			}
283 		}
284 		xtick = xtick * hz / 1000;
285 		break;
286 	}
287 	case ND_TIMER_REACHABLE:
288 		xtick = nd->nd_reachable(ifp) * hz / 1000;
289 		break;
290 	case ND_TIMER_EXPIRE:
291 		if (ln->ln_expire > time_uptime)
292 			xtick = (ln->ln_expire - time_uptime) * hz;
293 		else
294 			xtick = nd_gctimer * hz;
295 		break;
296 	case ND_TIMER_DELAY:
297 		xtick = nd->nd_delay * hz;
298 		break;
299 	case ND_TIMER_GC:
300 		xtick = nd_gctimer * hz;
301 		break;
302 	default:
303 		panic("%s: invalid timer type\n", __func__);
304 	}
305 
306 	nd_set_timertick(ln, xtick);
307 }
308 
309 int
nd_resolve(struct llentry * ln,const struct rtentry * rt,struct mbuf * m,uint8_t * lldst,size_t dstsize)310 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m,
311     uint8_t *lldst, size_t dstsize)
312 {
313 	struct ifnet *ifp;
314 	struct nd_domain *nd;
315 	int error;
316 
317 	LLE_WLOCK_ASSERT(ln);
318 
319 	ifp = ln->lle_tbl->llt_ifp;
320 	nd = nd_find_domain(ln->lle_tbl->llt_af);
321 
322 	/* We don't have to do link-layer address resolution on a p2p link. */
323 	if (ifp->if_flags & IFF_POINTOPOINT &&
324 	    ln->ln_state < ND_LLINFO_REACHABLE)
325 	{
326 		ln->ln_state = ND_LLINFO_STALE;
327 		nd_set_timer(ln, ND_TIMER_GC);
328 	}
329 
330 	/*
331 	 * The first time we send a packet to a neighbor whose entry is
332 	 * STALE, we have to change the state to DELAY and a sets a timer to
333 	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
334 	 * neighbor unreachability detection on expiration.
335 	 * (RFC 2461 7.3.3)
336 	 */
337 	if (ln->ln_state == ND_LLINFO_STALE) {
338 		ln->ln_asked = 0;
339 		ln->ln_state = ND_LLINFO_DELAY;
340 		nd_set_timer(ln, ND_TIMER_DELAY);
341 	}
342 
343 	/*
344 	 * If the neighbor cache entry has a state other than INCOMPLETE
345 	 * (i.e. its link-layer address is already resolved), just
346 	 * send the packet.
347 	 */
348 	if (ln->ln_state > ND_LLINFO_INCOMPLETE) {
349 		KASSERT((ln->la_flags & LLE_VALID) != 0);
350 		memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen));
351 		LLE_WUNLOCK(ln);
352 		return 0;
353 	}
354 
355 	/*
356 	 * There is a neighbor cache entry, but no ethernet address
357 	 * response yet.  Append this latest packet to the end of the
358 	 * packet queue in the mbuf, unless the number of the packet
359 	 * does not exceed maxqueuelen.  When it exceeds maxqueuelen,
360 	 * the oldest packet in the queue will be removed.
361 	 */
362 	if (ln->ln_state == ND_LLINFO_NOSTATE ||
363 	    ln->ln_state == ND_LLINFO_WAITDELETE)
364 		ln->ln_state = ND_LLINFO_INCOMPLETE;
365 
366 #ifdef MBUFTRACE
367 	m_claimm(m, ln->lle_tbl->llt_mowner);
368 #endif
369 	if (ln->ln_hold != NULL) {
370 		struct mbuf *m_hold;
371 		int i;
372 
373 		i = 0;
374 		for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) {
375 			i++;
376 			if (m_hold->m_nextpkt == NULL) {
377 				m_hold->m_nextpkt = m;
378 				break;
379 			}
380 		}
381 		KASSERTMSG(ln->la_numheld == i, "la_numheld=%d i=%d",
382 		    ln->la_numheld, i);
383 		while (i >= nd->nd_maxqueuelen) {
384 			m_hold = ln->ln_hold;
385 			ln->ln_hold = ln->ln_hold->m_nextpkt;
386 			m_freem(m_hold);
387 			i--;
388 			ln->la_numheld--;
389 		}
390 	} else {
391 		KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
392 		    ln->la_numheld);
393 		ln->ln_hold = m;
394 	}
395 
396 	KASSERTMSG(ln->la_numheld < nd->nd_maxqueuelen,
397 	    "la_numheld=%d nd_maxqueuelen=%d",
398 	    ln->la_numheld, nd->nd_maxqueuelen);
399 	ln->la_numheld++;
400 
401 	if (ln->ln_asked >= nd->nd_mmaxtries)
402 		error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ?
403 		    EHOSTUNREACH : EHOSTDOWN;
404 	else
405 		error = EWOULDBLOCK;
406 
407 	/*
408 	 * If there has been no NS for the neighbor after entering the
409 	 * INCOMPLETE state, send the first solicitation.
410 	 */
411 	if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
412 		struct psref psref;
413 		union l3addr dst, src, *psrc;
414 
415 		ln->ln_asked++;
416 		nd_set_timer(ln, ND_TIMER_RETRANS);
417 		memcpy(&dst, &ln->r_l3addr, sizeof(dst));
418 		psrc = nd->nd_holdsrc(ln, &src);
419 		if_acquire(ifp, &psref);
420 		LLE_WUNLOCK(ln);
421 
422 		nd->nd_output(ifp, NULL, &dst, NULL, psrc);
423 		if_release(ifp, &psref);
424 	} else
425 		LLE_WUNLOCK(ln);
426 
427 	return error;
428 }
429 
430 void
nd_nud_hint(struct llentry * ln)431 nd_nud_hint(struct llentry *ln)
432 {
433 	struct nd_domain *nd;
434 
435 	if (ln == NULL)
436 		return;
437 
438 	LLE_WLOCK_ASSERT(ln);
439 
440 	if (ln->ln_state < ND_LLINFO_REACHABLE)
441 		goto done;
442 
443 	nd = nd_find_domain(ln->lle_tbl->llt_af);
444 
445 	/*
446 	 * if we get upper-layer reachability confirmation many times,
447 	 * it is possible we have false information.
448 	 */
449 	ln->ln_byhint++;
450 	if (ln->ln_byhint > nd->nd_maxnudhint)
451 		goto done;
452 
453 	ln->ln_state = ND_LLINFO_REACHABLE;
454 	if (!ND_IS_LLINFO_PERMANENT(ln))
455 		nd_set_timer(ln, ND_TIMER_REACHABLE);
456 
457 done:
458 	LLE_WUNLOCK(ln);
459 
460 	return;
461 }
462 
463 static struct nd_domain *
nd_find_domain(int af)464 nd_find_domain(int af)
465 {
466 
467 	KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL);
468 	return nd_domains[af];
469 }
470 
471 void
nd_attach_domain(struct nd_domain * nd)472 nd_attach_domain(struct nd_domain *nd)
473 {
474 
475 	KASSERT(nd->nd_family < __arraycount(nd_domains));
476 	nd_domains[nd->nd_family] = nd;
477 }
478