1 /* $NetBSD: nd.c,v 1.7 2024/05/30 23:00:39 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Roy Marples.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.7 2024/05/30 23:00:39 riastradh Exp $");
32
33 #include <sys/callout.h>
34 #include <sys/mbuf.h>
35 #include <sys/socketvar.h> /* for softnet_lock */
36
37 #include <net/if_llatbl.h>
38 #include <net/nd.h>
39 #include <net/route.h>
40
41 #include <netinet/in.h>
42 #include <netinet/ip6.h>
43
44 static struct nd_domain *nd_domains[AF_MAX];
45
46 static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */
47
48 static void nd_set_timertick(struct llentry *, time_t);
49 static struct nd_domain *nd_find_domain(int);
50
51 static void
nd_timer(void * arg)52 nd_timer(void *arg)
53 {
54 struct llentry *ln = arg;
55 struct nd_domain *nd;
56 struct ifnet *ifp = NULL;
57 struct psref psref;
58 struct mbuf *m = NULL;
59 bool send_ns = false;
60 int16_t missed = ND_LLINFO_NOSTATE;
61 union l3addr taddr, *daddrp = NULL;
62
63 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
64 LLE_WLOCK(ln);
65
66 if (!(ln->la_flags & LLE_LINKED))
67 goto out;
68 if (ln->ln_ntick > 0) {
69 nd_set_timer(ln, ND_TIMER_TICK);
70 goto out;
71 }
72
73 nd = nd_find_domain(ln->lle_tbl->llt_af);
74 ifp = ln->lle_tbl->llt_ifp;
75 KASSERT(ifp != NULL);
76 if_acquire(ifp, &psref);
77
78 memcpy(&taddr, &ln->r_l3addr, sizeof(taddr));
79
80 switch (ln->ln_state) {
81 case ND_LLINFO_WAITDELETE:
82 LLE_REMREF(ln);
83 nd->nd_free(ln, 0);
84 ln = NULL;
85 break;
86
87 case ND_LLINFO_INCOMPLETE:
88 send_ns = true;
89 if (ln->ln_asked++ < nd->nd_mmaxtries)
90 break;
91
92 if (ln->ln_hold) {
93 struct mbuf *m0, *mnxt;
94
95 /*
96 * Assuming every packet in ln_hold
97 * has the same IP header.
98 */
99 m = ln->ln_hold;
100 for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) {
101 mnxt = m0->m_nextpkt;
102 m0->m_nextpkt = NULL;
103 m_freem(m0);
104 }
105
106 m->m_nextpkt = NULL;
107 ln->ln_hold = NULL;
108 ln->la_numheld = 0;
109 }
110
111 KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
112 ln->la_numheld);
113
114 missed = ND_LLINFO_INCOMPLETE;
115 ln->ln_state = ND_LLINFO_WAITDELETE;
116 break;
117
118 case ND_LLINFO_REACHABLE:
119 if (!ND_IS_LLINFO_PERMANENT(ln)) {
120 ln->ln_state = ND_LLINFO_STALE;
121 nd_set_timer(ln, ND_TIMER_GC);
122 }
123 break;
124
125 case ND_LLINFO_PURGE: /* FALLTHROUGH */
126 case ND_LLINFO_STALE:
127 if (!ND_IS_LLINFO_PERMANENT(ln)) {
128 LLE_REMREF(ln);
129 nd->nd_free(ln, 1);
130 ln = NULL;
131 }
132 break;
133
134 case ND_LLINFO_DELAY:
135 if (nd->nd_nud_enabled(ifp)) {
136 ln->ln_asked = 1;
137 ln->ln_state = ND_LLINFO_PROBE;
138 send_ns = true;
139 daddrp = &taddr;
140 } else {
141 ln->ln_state = ND_LLINFO_STALE;
142 nd_set_timer(ln, ND_TIMER_GC);
143 }
144 break;
145
146 case ND_LLINFO_PROBE:
147 send_ns = true;
148 if (ln->ln_asked++ < nd->nd_umaxtries) {
149 daddrp = &taddr;
150 } else {
151 ln->ln_state = ND_LLINFO_UNREACHABLE;
152 ln->ln_asked = 1;
153 missed = ND_LLINFO_PROBE;
154 /* nd_missed() consumers can use missed to know if
155 * they need to send ICMP UNREACHABLE or not. */
156 }
157 break;
158 case ND_LLINFO_UNREACHABLE:
159 /*
160 * RFC 7048 Section 3 says in the UNREACHABLE state
161 * packets continue to be sent to the link-layer address and
162 * then backoff exponentially.
163 * We adjust this slightly and move to the INCOMPLETE state
164 * after nd_mmaxtries probes and then start backing off.
165 *
166 * This results in simpler code whilst providing a more robust
167 * model which doubles the time to failure over what we did
168 * before. We don't want to be back to the old ARP model where
169 * no unreachability errors are returned because very
170 * few applications would look at unreachability hints provided
171 * such as ND_LLINFO_UNREACHABLE or RTM_MISS.
172 */
173 send_ns = true;
174 if (ln->ln_asked++ < nd->nd_mmaxtries)
175 break;
176
177 missed = ND_LLINFO_UNREACHABLE;
178 ln->ln_state = ND_LLINFO_WAITDELETE;
179 ln->la_flags &= ~LLE_VALID;
180 break;
181 }
182
183 if (send_ns) {
184 uint8_t lladdr[255], *lladdrp;
185 union l3addr src, *psrc;
186
187 if (ln->ln_state == ND_LLINFO_WAITDELETE)
188 nd_set_timer(ln, ND_TIMER_RETRANS_BACKOFF);
189 else
190 nd_set_timer(ln, ND_TIMER_RETRANS);
191 if (ln->ln_state > ND_LLINFO_INCOMPLETE &&
192 ln->la_flags & LLE_VALID)
193 {
194 KASSERT(sizeof(lladdr) >= ifp->if_addrlen);
195 memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
196 lladdrp = lladdr;
197 } else
198 lladdrp = NULL;
199 psrc = nd->nd_holdsrc(ln, &src);
200 LLE_FREE_LOCKED(ln);
201 ln = NULL;
202 nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc);
203 }
204
205 out:
206 if (ln != NULL)
207 LLE_FREE_LOCKED(ln);
208 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
209
210 if (missed != ND_LLINFO_NOSTATE)
211 nd->nd_missed(ifp, &taddr, missed, m);
212 if (ifp != NULL)
213 if_release(ifp, &psref);
214 }
215
216 static void
nd_set_timertick(struct llentry * ln,time_t xtick)217 nd_set_timertick(struct llentry *ln, time_t xtick)
218 {
219
220 CTASSERT(sizeof(time_t) > sizeof(int));
221 KASSERT(xtick >= 0);
222
223 /*
224 * We have to take care of a reference leak which occurs if
225 * callout_reset overwrites a pending callout schedule. Unfortunately
226 * we don't have a mean to know the overwrite, so we need to know it
227 * using callout_stop. We need to call callout_pending first to exclude
228 * the case that the callout has never been scheduled.
229 */
230 if (callout_pending(&ln->la_timer)) {
231 bool expired;
232
233 expired = callout_stop(&ln->la_timer);
234 if (!expired)
235 LLE_REMREF(ln);
236 }
237
238 ln->ln_expire = time_uptime + xtick / hz;
239 LLE_ADDREF(ln);
240 if (xtick > INT_MAX) {
241 ln->ln_ntick = xtick - INT_MAX;
242 xtick = INT_MAX;
243 } else {
244 ln->ln_ntick = 0;
245 }
246 callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln);
247 }
248
249 void
nd_set_timer(struct llentry * ln,int type)250 nd_set_timer(struct llentry *ln, int type)
251 {
252 time_t xtick;
253 struct ifnet *ifp;
254 struct nd_domain *nd;
255
256 LLE_WLOCK_ASSERT(ln);
257
258 ifp = ln->lle_tbl->llt_ifp;
259 nd = nd_find_domain(ln->lle_tbl->llt_af);
260
261 switch (type) {
262 case ND_TIMER_IMMEDIATE:
263 xtick = 0;
264 break;
265 case ND_TIMER_TICK:
266 xtick = ln->ln_ntick;
267 break;
268 case ND_TIMER_RETRANS:
269 xtick = nd->nd_retrans(ifp) * hz / 1000;
270 break;
271 case ND_TIMER_RETRANS_BACKOFF:
272 {
273 unsigned int retrans = nd->nd_retrans(ifp);
274 unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries;
275
276 xtick = retrans;
277 while (attempts-- != 0) {
278 xtick *= nd->nd_retransmultiple;
279 if (xtick > nd->nd_maxretrans || xtick < retrans) {
280 xtick = nd->nd_maxretrans;
281 break;
282 }
283 }
284 xtick = xtick * hz / 1000;
285 break;
286 }
287 case ND_TIMER_REACHABLE:
288 xtick = nd->nd_reachable(ifp) * hz / 1000;
289 break;
290 case ND_TIMER_EXPIRE:
291 if (ln->ln_expire > time_uptime)
292 xtick = (ln->ln_expire - time_uptime) * hz;
293 else
294 xtick = nd_gctimer * hz;
295 break;
296 case ND_TIMER_DELAY:
297 xtick = nd->nd_delay * hz;
298 break;
299 case ND_TIMER_GC:
300 xtick = nd_gctimer * hz;
301 break;
302 default:
303 panic("%s: invalid timer type\n", __func__);
304 }
305
306 nd_set_timertick(ln, xtick);
307 }
308
309 int
nd_resolve(struct llentry * ln,const struct rtentry * rt,struct mbuf * m,uint8_t * lldst,size_t dstsize)310 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m,
311 uint8_t *lldst, size_t dstsize)
312 {
313 struct ifnet *ifp;
314 struct nd_domain *nd;
315 int error;
316
317 LLE_WLOCK_ASSERT(ln);
318
319 ifp = ln->lle_tbl->llt_ifp;
320 nd = nd_find_domain(ln->lle_tbl->llt_af);
321
322 /* We don't have to do link-layer address resolution on a p2p link. */
323 if (ifp->if_flags & IFF_POINTOPOINT &&
324 ln->ln_state < ND_LLINFO_REACHABLE)
325 {
326 ln->ln_state = ND_LLINFO_STALE;
327 nd_set_timer(ln, ND_TIMER_GC);
328 }
329
330 /*
331 * The first time we send a packet to a neighbor whose entry is
332 * STALE, we have to change the state to DELAY and a sets a timer to
333 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
334 * neighbor unreachability detection on expiration.
335 * (RFC 2461 7.3.3)
336 */
337 if (ln->ln_state == ND_LLINFO_STALE) {
338 ln->ln_asked = 0;
339 ln->ln_state = ND_LLINFO_DELAY;
340 nd_set_timer(ln, ND_TIMER_DELAY);
341 }
342
343 /*
344 * If the neighbor cache entry has a state other than INCOMPLETE
345 * (i.e. its link-layer address is already resolved), just
346 * send the packet.
347 */
348 if (ln->ln_state > ND_LLINFO_INCOMPLETE) {
349 KASSERT((ln->la_flags & LLE_VALID) != 0);
350 memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen));
351 LLE_WUNLOCK(ln);
352 return 0;
353 }
354
355 /*
356 * There is a neighbor cache entry, but no ethernet address
357 * response yet. Append this latest packet to the end of the
358 * packet queue in the mbuf, unless the number of the packet
359 * does not exceed maxqueuelen. When it exceeds maxqueuelen,
360 * the oldest packet in the queue will be removed.
361 */
362 if (ln->ln_state == ND_LLINFO_NOSTATE ||
363 ln->ln_state == ND_LLINFO_WAITDELETE)
364 ln->ln_state = ND_LLINFO_INCOMPLETE;
365
366 #ifdef MBUFTRACE
367 m_claimm(m, ln->lle_tbl->llt_mowner);
368 #endif
369 if (ln->ln_hold != NULL) {
370 struct mbuf *m_hold;
371 int i;
372
373 i = 0;
374 for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) {
375 i++;
376 if (m_hold->m_nextpkt == NULL) {
377 m_hold->m_nextpkt = m;
378 break;
379 }
380 }
381 KASSERTMSG(ln->la_numheld == i, "la_numheld=%d i=%d",
382 ln->la_numheld, i);
383 while (i >= nd->nd_maxqueuelen) {
384 m_hold = ln->ln_hold;
385 ln->ln_hold = ln->ln_hold->m_nextpkt;
386 m_freem(m_hold);
387 i--;
388 ln->la_numheld--;
389 }
390 } else {
391 KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
392 ln->la_numheld);
393 ln->ln_hold = m;
394 }
395
396 KASSERTMSG(ln->la_numheld < nd->nd_maxqueuelen,
397 "la_numheld=%d nd_maxqueuelen=%d",
398 ln->la_numheld, nd->nd_maxqueuelen);
399 ln->la_numheld++;
400
401 if (ln->ln_asked >= nd->nd_mmaxtries)
402 error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ?
403 EHOSTUNREACH : EHOSTDOWN;
404 else
405 error = EWOULDBLOCK;
406
407 /*
408 * If there has been no NS for the neighbor after entering the
409 * INCOMPLETE state, send the first solicitation.
410 */
411 if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
412 struct psref psref;
413 union l3addr dst, src, *psrc;
414
415 ln->ln_asked++;
416 nd_set_timer(ln, ND_TIMER_RETRANS);
417 memcpy(&dst, &ln->r_l3addr, sizeof(dst));
418 psrc = nd->nd_holdsrc(ln, &src);
419 if_acquire(ifp, &psref);
420 LLE_WUNLOCK(ln);
421
422 nd->nd_output(ifp, NULL, &dst, NULL, psrc);
423 if_release(ifp, &psref);
424 } else
425 LLE_WUNLOCK(ln);
426
427 return error;
428 }
429
430 void
nd_nud_hint(struct llentry * ln)431 nd_nud_hint(struct llentry *ln)
432 {
433 struct nd_domain *nd;
434
435 if (ln == NULL)
436 return;
437
438 LLE_WLOCK_ASSERT(ln);
439
440 if (ln->ln_state < ND_LLINFO_REACHABLE)
441 goto done;
442
443 nd = nd_find_domain(ln->lle_tbl->llt_af);
444
445 /*
446 * if we get upper-layer reachability confirmation many times,
447 * it is possible we have false information.
448 */
449 ln->ln_byhint++;
450 if (ln->ln_byhint > nd->nd_maxnudhint)
451 goto done;
452
453 ln->ln_state = ND_LLINFO_REACHABLE;
454 if (!ND_IS_LLINFO_PERMANENT(ln))
455 nd_set_timer(ln, ND_TIMER_REACHABLE);
456
457 done:
458 LLE_WUNLOCK(ln);
459
460 return;
461 }
462
463 static struct nd_domain *
nd_find_domain(int af)464 nd_find_domain(int af)
465 {
466
467 KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL);
468 return nd_domains[af];
469 }
470
471 void
nd_attach_domain(struct nd_domain * nd)472 nd_attach_domain(struct nd_domain *nd)
473 {
474
475 KASSERT(nd->nd_family < __arraycount(nd_domains));
476 nd_domains[nd->nd_family] = nd;
477 }
478