xref: /netbsd-src/sys/net/nd.c (revision 627f7eb200a4419d89b531d55fccd2ee3ffdcde0)
1 /*	$NetBSD: nd.c,v 1.4 2020/09/15 23:40:03 roy Exp $	*/
2 
3 /*
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Roy Marples.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.4 2020/09/15 23:40:03 roy Exp $");
32 
33 #include <sys/callout.h>
34 #include <sys/mbuf.h>
35 #include <sys/socketvar.h> /* for softnet_lock */
36 
37 #include <net/if_llatbl.h>
38 #include <net/nd.h>
39 #include <net/route.h>
40 
41 #include <netinet/in.h>
42 #include <netinet/ip6.h>
43 
44 static struct nd_domain *nd_domains[AF_MAX];
45 
46 static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */
47 
48 static void nd_set_timertick(struct llentry *, time_t);
49 static struct nd_domain *nd_find_domain(int);
50 
51 static void
52 nd_timer(void *arg)
53 {
54 	struct llentry *ln = arg;
55 	struct nd_domain *nd;
56 	struct ifnet *ifp = NULL;
57 	struct psref psref;
58 	struct mbuf *m = NULL;
59 	bool send_ns = false;
60 	int16_t missed = ND_LLINFO_NOSTATE;
61 	union l3addr taddr, *daddrp = NULL;
62 
63 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
64 	LLE_WLOCK(ln);
65 
66 	if (!(ln->la_flags & LLE_LINKED))
67 		goto out;
68 	if (ln->ln_ntick > 0) {
69 		nd_set_timer(ln, ND_TIMER_TICK);
70 		goto out;
71 	}
72 
73 	nd = nd_find_domain(ln->lle_tbl->llt_af);
74 	ifp = ln->lle_tbl->llt_ifp;
75 	KASSERT(ifp != NULL);
76 	if_acquire(ifp, &psref);
77 
78 	memcpy(&taddr, &ln->r_l3addr, sizeof(taddr));
79 
80 	switch (ln->ln_state) {
81 	case ND_LLINFO_WAITDELETE:
82 		LLE_REMREF(ln);
83 		nd->nd_free(ln, 0);
84 		ln = NULL;
85 		break;
86 
87 	case ND_LLINFO_INCOMPLETE:
88 		send_ns = true;
89 		if (ln->ln_asked++ < nd->nd_mmaxtries)
90 			break;
91 
92 		if (ln->ln_hold) {
93 			struct mbuf *m0, *mnxt;
94 
95 			/*
96 			 * Assuming every packet in ln_hold
97 			 * has the same IP header.
98 			 */
99 			m = ln->ln_hold;
100 			for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) {
101 				mnxt = m0->m_nextpkt;
102 				m0->m_nextpkt = NULL;
103 				m_freem(m0);
104 			}
105 
106 			m->m_nextpkt = NULL;
107 			ln->ln_hold = NULL;
108 		}
109 
110 		missed = ND_LLINFO_INCOMPLETE;
111 		ln->ln_state = ND_LLINFO_WAITDELETE;
112 		break;
113 
114 	case ND_LLINFO_REACHABLE:
115 		if (!ND_IS_LLINFO_PERMANENT(ln)) {
116 			ln->ln_state = ND_LLINFO_STALE;
117 			nd_set_timer(ln, ND_TIMER_GC);
118 		}
119 		break;
120 
121 	case ND_LLINFO_PURGE: /* FALLTHROUGH */
122 	case ND_LLINFO_STALE:
123 		if (!ND_IS_LLINFO_PERMANENT(ln)) {
124 			LLE_REMREF(ln);
125 			nd->nd_free(ln, 1);
126 			ln = NULL;
127 		}
128 		break;
129 
130 	case ND_LLINFO_DELAY:
131 		if (nd->nd_nud_enabled(ifp)) {
132 			ln->ln_asked = 1;
133 			ln->ln_state = ND_LLINFO_PROBE;
134 			send_ns = true;
135 			daddrp = &taddr;
136 		} else {
137 			ln->ln_state = ND_LLINFO_STALE;
138 			nd_set_timer(ln, ND_TIMER_GC);
139 		}
140 		break;
141 
142 	case ND_LLINFO_PROBE:
143 		send_ns = true;
144 		if (ln->ln_asked++ < nd->nd_umaxtries) {
145 			daddrp = &taddr;
146 		} else {
147 			ln->ln_state = ND_LLINFO_UNREACHABLE;
148 			ln->ln_asked = 1;
149 			missed = ND_LLINFO_PROBE;
150 			/* nd_missed() consumers can use missed to know if
151 			 * they need to send ICMP UNREACHABLE or not. */
152 		}
153 		break;
154 	case ND_LLINFO_UNREACHABLE:
155 		/*
156 		 * RFC 7048 Section 3 says in the UNREACHABLE state
157 		 * packets continue to be sent to the link-layer address and
158 		 * then backoff exponentially.
159 		 * We adjust this slightly and move to the INCOMPLETE state
160 		 * after nd_mmaxtries probes and then start backing off.
161 		 *
162 		 * This results in simpler code whilst providing a more robust
163 		 * model which doubles the time to failure over what we did
164 		 * before. We don't want to be back to the old ARP model where
165 		 * no unreachability errors are returned because very
166 		 * few applications would look at unreachability hints provided
167 		 * such as ND_LLINFO_UNREACHABLE or RTM_MISS.
168 		 */
169 		send_ns = true;
170 		if (ln->ln_asked++ < nd->nd_mmaxtries)
171 			break;
172 
173 		missed = ND_LLINFO_UNREACHABLE;
174 		ln->ln_state = ND_LLINFO_WAITDELETE;
175 		ln->la_flags &= ~LLE_VALID;
176 		break;
177 	}
178 
179 	if (send_ns) {
180 		uint8_t lladdr[255], *lladdrp;
181 		union l3addr src, *psrc;
182 
183 		if (ln->ln_state == ND_LLINFO_WAITDELETE)
184 			nd_set_timer(ln, ND_TIMER_RETRANS_BACKOFF);
185 		else
186 			nd_set_timer(ln, ND_TIMER_RETRANS);
187 		if (ln->ln_state > ND_LLINFO_INCOMPLETE &&
188 		    ln->la_flags & LLE_VALID)
189 		{
190 			KASSERT(sizeof(lladdr) >= ifp->if_addrlen);
191 			memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
192 			lladdrp = lladdr;
193 		} else
194 			lladdrp = NULL;
195 		psrc = nd->nd_holdsrc(ln, &src);
196 		LLE_FREE_LOCKED(ln);
197 		ln = NULL;
198 		nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc);
199 	}
200 
201 out:
202 	if (ln != NULL)
203 		LLE_FREE_LOCKED(ln);
204 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
205 
206 	if (missed != ND_LLINFO_NOSTATE)
207 		nd->nd_missed(ifp, &taddr, missed, m);
208 	if (ifp != NULL)
209 		if_release(ifp, &psref);
210 }
211 
212 static void
213 nd_set_timertick(struct llentry *ln, time_t xtick)
214 {
215 
216 	CTASSERT(sizeof(time_t) > sizeof(int));
217 	KASSERT(xtick >= 0);
218 
219 	/*
220 	 * We have to take care of a reference leak which occurs if
221 	 * callout_reset overwrites a pending callout schedule.  Unfortunately
222 	 * we don't have a mean to know the overwrite, so we need to know it
223 	 * using callout_stop.  We need to call callout_pending first to exclude
224 	 * the case that the callout has never been scheduled.
225 	 */
226 	if (callout_pending(&ln->la_timer)) {
227 		bool expired;
228 
229 		expired = callout_stop(&ln->la_timer);
230 		if (!expired)
231 			LLE_REMREF(ln);
232 	}
233 
234 	ln->ln_expire = time_uptime + xtick / hz;
235 	LLE_ADDREF(ln);
236 	if (xtick > INT_MAX) {
237 		ln->ln_ntick = xtick - INT_MAX;
238 		xtick = INT_MAX;
239 	} else {
240 		ln->ln_ntick = 0;
241 	}
242 	callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln);
243 }
244 
245 void
246 nd_set_timer(struct llentry *ln, int type)
247 {
248 	time_t xtick;
249 	struct ifnet *ifp;
250 	struct nd_domain *nd;
251 
252 	LLE_WLOCK_ASSERT(ln);
253 
254 	ifp = ln->lle_tbl->llt_ifp;
255 	nd = nd_find_domain(ln->lle_tbl->llt_af);
256 
257 	switch (type) {
258 	case ND_TIMER_IMMEDIATE:
259 		xtick = 0;
260 		break;
261 	case ND_TIMER_TICK:
262 		xtick = ln->ln_ntick;
263 		break;
264 	case ND_TIMER_RETRANS:
265 		xtick = nd->nd_retrans(ifp) * hz / 1000;
266 		break;
267 	case ND_TIMER_RETRANS_BACKOFF:
268 	{
269 		unsigned int retrans = nd->nd_retrans(ifp);
270 		unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries;
271 
272 		xtick = retrans;
273 		while (attempts-- != 0) {
274 			xtick *= nd->nd_retransmultiple;
275 			if (xtick > nd->nd_maxretrans || xtick < retrans) {
276 				xtick = nd->nd_maxretrans;
277 				break;
278 			}
279 		}
280 		xtick = xtick * hz / 1000;
281 		break;
282 	}
283 	case ND_TIMER_REACHABLE:
284 		xtick = nd->nd_reachable(ifp) * hz / 1000;
285 		break;
286 	case ND_TIMER_EXPIRE:
287 		if (ln->ln_expire > time_uptime)
288 			xtick = (ln->ln_expire - time_uptime) * hz;
289 		else
290 			xtick = nd_gctimer * hz;
291 		break;
292 	case ND_TIMER_DELAY:
293 		xtick = nd->nd_delay * hz;
294 		break;
295 	case ND_TIMER_GC:
296 		xtick = nd_gctimer * hz;
297 		break;
298 	default:
299 		panic("%s: invalid timer type\n", __func__);
300 	}
301 
302 	nd_set_timertick(ln, xtick);
303 }
304 
305 int
306 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m,
307     uint8_t *lldst, size_t dstsize)
308 {
309 	struct ifnet *ifp;
310 	struct nd_domain *nd;
311 	int error;
312 
313 	LLE_WLOCK_ASSERT(ln);
314 
315 	ifp = ln->lle_tbl->llt_ifp;
316 	nd = nd_find_domain(ln->lle_tbl->llt_af);
317 
318 	/* We don't have to do link-layer address resolution on a p2p link. */
319 	if (ifp->if_flags & IFF_POINTOPOINT &&
320 	    ln->ln_state < ND_LLINFO_REACHABLE)
321 	{
322 		ln->ln_state = ND_LLINFO_STALE;
323 		nd_set_timer(ln, ND_TIMER_GC);
324 	}
325 
326 	/*
327 	 * The first time we send a packet to a neighbor whose entry is
328 	 * STALE, we have to change the state to DELAY and a sets a timer to
329 	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
330 	 * neighbor unreachability detection on expiration.
331 	 * (RFC 2461 7.3.3)
332 	 */
333 	if (ln->ln_state == ND_LLINFO_STALE) {
334 		ln->ln_asked = 0;
335 		ln->ln_state = ND_LLINFO_DELAY;
336 		nd_set_timer(ln, ND_TIMER_DELAY);
337 	}
338 
339 	/*
340 	 * If the neighbor cache entry has a state other than INCOMPLETE
341 	 * (i.e. its link-layer address is already resolved), just
342 	 * send the packet.
343 	 */
344 	if (ln->ln_state > ND_LLINFO_INCOMPLETE) {
345 		KASSERT((ln->la_flags & LLE_VALID) != 0);
346 		memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen));
347 		LLE_WUNLOCK(ln);
348 		return 0;
349 	}
350 
351 	/*
352 	 * There is a neighbor cache entry, but no ethernet address
353 	 * response yet.  Append this latest packet to the end of the
354 	 * packet queue in the mbuf, unless the number of the packet
355 	 * does not exceed maxqueuelen.  When it exceeds maxqueuelen,
356 	 * the oldest packet in the queue will be removed.
357 	 */
358 	if (ln->ln_state == ND_LLINFO_NOSTATE ||
359 	    ln->ln_state == ND_LLINFO_WAITDELETE)
360 		ln->ln_state = ND_LLINFO_INCOMPLETE;
361 
362 	if (ln->ln_hold != NULL) {
363 		struct mbuf *m_hold;
364 		int i;
365 
366 		i = 0;
367 		for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) {
368 			i++;
369 			if (m_hold->m_nextpkt == NULL) {
370 				m_hold->m_nextpkt = m;
371 				break;
372 			}
373 		}
374 		while (i >= nd->nd_maxqueuelen) {
375 			m_hold = ln->ln_hold;
376 			ln->ln_hold = ln->ln_hold->m_nextpkt;
377 			m_freem(m_hold);
378 			i--;
379 		}
380 	} else
381 		ln->ln_hold = m;
382 
383 	if (ln->ln_asked >= nd->nd_mmaxtries)
384 		error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ?
385 		    EHOSTUNREACH : EHOSTDOWN;
386 	else
387 		error = EWOULDBLOCK;
388 
389 	/*
390 	 * If there has been no NS for the neighbor after entering the
391 	 * INCOMPLETE state, send the first solicitation.
392 	 */
393 	if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
394 		struct psref psref;
395 		union l3addr dst, src, *psrc;
396 
397 		ln->ln_asked++;
398 		nd_set_timer(ln, ND_TIMER_RETRANS);
399 		memcpy(&dst, &ln->r_l3addr, sizeof(dst));
400 		psrc = nd->nd_holdsrc(ln, &src);
401 		if_acquire(ifp, &psref);
402 		LLE_WUNLOCK(ln);
403 
404 		nd->nd_output(ifp, NULL, &dst, NULL, psrc);
405 		if_release(ifp, &psref);
406 	} else
407 		LLE_WUNLOCK(ln);
408 
409 	return error;
410 }
411 
412 void
413 nd_nud_hint(struct llentry *ln)
414 {
415 	struct nd_domain *nd;
416 
417 	if (ln == NULL)
418 		return;
419 
420 	LLE_WLOCK_ASSERT(ln);
421 
422 	if (ln->ln_state < ND_LLINFO_REACHABLE)
423 		goto done;
424 
425 	nd = nd_find_domain(ln->lle_tbl->llt_af);
426 
427 	/*
428 	 * if we get upper-layer reachability confirmation many times,
429 	 * it is possible we have false information.
430 	 */
431 	ln->ln_byhint++;
432 	if (ln->ln_byhint > nd->nd_maxnudhint)
433 		goto done;
434 
435 	ln->ln_state = ND_LLINFO_REACHABLE;
436 	if (!ND_IS_LLINFO_PERMANENT(ln))
437 		nd_set_timer(ln, ND_TIMER_REACHABLE);
438 
439 done:
440 	LLE_WUNLOCK(ln);
441 
442 	return;
443 }
444 
445 static struct nd_domain *
446 nd_find_domain(int af)
447 {
448 
449 	KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL);
450 	return nd_domains[af];
451 }
452 
453 void
454 nd_attach_domain(struct nd_domain *nd)
455 {
456 
457 	KASSERT(nd->nd_family < __arraycount(nd_domains));
458 	nd_domains[nd->nd_family] = nd;
459 }
460