1 /* $NetBSD: nd.c,v 1.7 2024/05/30 23:00:39 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Roy Marples. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.7 2024/05/30 23:00:39 riastradh Exp $"); 32 33 #include <sys/callout.h> 34 #include <sys/mbuf.h> 35 #include <sys/socketvar.h> /* for softnet_lock */ 36 37 #include <net/if_llatbl.h> 38 #include <net/nd.h> 39 #include <net/route.h> 40 41 #include <netinet/in.h> 42 #include <netinet/ip6.h> 43 44 static struct nd_domain *nd_domains[AF_MAX]; 45 46 static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */ 47 48 static void nd_set_timertick(struct llentry *, time_t); 49 static struct nd_domain *nd_find_domain(int); 50 51 static void 52 nd_timer(void *arg) 53 { 54 struct llentry *ln = arg; 55 struct nd_domain *nd; 56 struct ifnet *ifp = NULL; 57 struct psref psref; 58 struct mbuf *m = NULL; 59 bool send_ns = false; 60 int16_t missed = ND_LLINFO_NOSTATE; 61 union l3addr taddr, *daddrp = NULL; 62 63 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE(); 64 LLE_WLOCK(ln); 65 66 if (!(ln->la_flags & LLE_LINKED)) 67 goto out; 68 if (ln->ln_ntick > 0) { 69 nd_set_timer(ln, ND_TIMER_TICK); 70 goto out; 71 } 72 73 nd = nd_find_domain(ln->lle_tbl->llt_af); 74 ifp = ln->lle_tbl->llt_ifp; 75 KASSERT(ifp != NULL); 76 if_acquire(ifp, &psref); 77 78 memcpy(&taddr, &ln->r_l3addr, sizeof(taddr)); 79 80 switch (ln->ln_state) { 81 case ND_LLINFO_WAITDELETE: 82 LLE_REMREF(ln); 83 nd->nd_free(ln, 0); 84 ln = NULL; 85 break; 86 87 case ND_LLINFO_INCOMPLETE: 88 send_ns = true; 89 if (ln->ln_asked++ < nd->nd_mmaxtries) 90 break; 91 92 if (ln->ln_hold) { 93 struct mbuf *m0, *mnxt; 94 95 /* 96 * Assuming every packet in ln_hold 97 * has the same IP header. 98 */ 99 m = ln->ln_hold; 100 for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) { 101 mnxt = m0->m_nextpkt; 102 m0->m_nextpkt = NULL; 103 m_freem(m0); 104 } 105 106 m->m_nextpkt = NULL; 107 ln->ln_hold = NULL; 108 ln->la_numheld = 0; 109 } 110 111 KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d", 112 ln->la_numheld); 113 114 missed = ND_LLINFO_INCOMPLETE; 115 ln->ln_state = ND_LLINFO_WAITDELETE; 116 break; 117 118 case ND_LLINFO_REACHABLE: 119 if (!ND_IS_LLINFO_PERMANENT(ln)) { 120 ln->ln_state = ND_LLINFO_STALE; 121 nd_set_timer(ln, ND_TIMER_GC); 122 } 123 break; 124 125 case ND_LLINFO_PURGE: /* FALLTHROUGH */ 126 case ND_LLINFO_STALE: 127 if (!ND_IS_LLINFO_PERMANENT(ln)) { 128 LLE_REMREF(ln); 129 nd->nd_free(ln, 1); 130 ln = NULL; 131 } 132 break; 133 134 case ND_LLINFO_DELAY: 135 if (nd->nd_nud_enabled(ifp)) { 136 ln->ln_asked = 1; 137 ln->ln_state = ND_LLINFO_PROBE; 138 send_ns = true; 139 daddrp = &taddr; 140 } else { 141 ln->ln_state = ND_LLINFO_STALE; 142 nd_set_timer(ln, ND_TIMER_GC); 143 } 144 break; 145 146 case ND_LLINFO_PROBE: 147 send_ns = true; 148 if (ln->ln_asked++ < nd->nd_umaxtries) { 149 daddrp = &taddr; 150 } else { 151 ln->ln_state = ND_LLINFO_UNREACHABLE; 152 ln->ln_asked = 1; 153 missed = ND_LLINFO_PROBE; 154 /* nd_missed() consumers can use missed to know if 155 * they need to send ICMP UNREACHABLE or not. */ 156 } 157 break; 158 case ND_LLINFO_UNREACHABLE: 159 /* 160 * RFC 7048 Section 3 says in the UNREACHABLE state 161 * packets continue to be sent to the link-layer address and 162 * then backoff exponentially. 163 * We adjust this slightly and move to the INCOMPLETE state 164 * after nd_mmaxtries probes and then start backing off. 165 * 166 * This results in simpler code whilst providing a more robust 167 * model which doubles the time to failure over what we did 168 * before. We don't want to be back to the old ARP model where 169 * no unreachability errors are returned because very 170 * few applications would look at unreachability hints provided 171 * such as ND_LLINFO_UNREACHABLE or RTM_MISS. 172 */ 173 send_ns = true; 174 if (ln->ln_asked++ < nd->nd_mmaxtries) 175 break; 176 177 missed = ND_LLINFO_UNREACHABLE; 178 ln->ln_state = ND_LLINFO_WAITDELETE; 179 ln->la_flags &= ~LLE_VALID; 180 break; 181 } 182 183 if (send_ns) { 184 uint8_t lladdr[255], *lladdrp; 185 union l3addr src, *psrc; 186 187 if (ln->ln_state == ND_LLINFO_WAITDELETE) 188 nd_set_timer(ln, ND_TIMER_RETRANS_BACKOFF); 189 else 190 nd_set_timer(ln, ND_TIMER_RETRANS); 191 if (ln->ln_state > ND_LLINFO_INCOMPLETE && 192 ln->la_flags & LLE_VALID) 193 { 194 KASSERT(sizeof(lladdr) >= ifp->if_addrlen); 195 memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen); 196 lladdrp = lladdr; 197 } else 198 lladdrp = NULL; 199 psrc = nd->nd_holdsrc(ln, &src); 200 LLE_FREE_LOCKED(ln); 201 ln = NULL; 202 nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc); 203 } 204 205 out: 206 if (ln != NULL) 207 LLE_FREE_LOCKED(ln); 208 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 209 210 if (missed != ND_LLINFO_NOSTATE) 211 nd->nd_missed(ifp, &taddr, missed, m); 212 if (ifp != NULL) 213 if_release(ifp, &psref); 214 } 215 216 static void 217 nd_set_timertick(struct llentry *ln, time_t xtick) 218 { 219 220 CTASSERT(sizeof(time_t) > sizeof(int)); 221 KASSERT(xtick >= 0); 222 223 /* 224 * We have to take care of a reference leak which occurs if 225 * callout_reset overwrites a pending callout schedule. Unfortunately 226 * we don't have a mean to know the overwrite, so we need to know it 227 * using callout_stop. We need to call callout_pending first to exclude 228 * the case that the callout has never been scheduled. 229 */ 230 if (callout_pending(&ln->la_timer)) { 231 bool expired; 232 233 expired = callout_stop(&ln->la_timer); 234 if (!expired) 235 LLE_REMREF(ln); 236 } 237 238 ln->ln_expire = time_uptime + xtick / hz; 239 LLE_ADDREF(ln); 240 if (xtick > INT_MAX) { 241 ln->ln_ntick = xtick - INT_MAX; 242 xtick = INT_MAX; 243 } else { 244 ln->ln_ntick = 0; 245 } 246 callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln); 247 } 248 249 void 250 nd_set_timer(struct llentry *ln, int type) 251 { 252 time_t xtick; 253 struct ifnet *ifp; 254 struct nd_domain *nd; 255 256 LLE_WLOCK_ASSERT(ln); 257 258 ifp = ln->lle_tbl->llt_ifp; 259 nd = nd_find_domain(ln->lle_tbl->llt_af); 260 261 switch (type) { 262 case ND_TIMER_IMMEDIATE: 263 xtick = 0; 264 break; 265 case ND_TIMER_TICK: 266 xtick = ln->ln_ntick; 267 break; 268 case ND_TIMER_RETRANS: 269 xtick = nd->nd_retrans(ifp) * hz / 1000; 270 break; 271 case ND_TIMER_RETRANS_BACKOFF: 272 { 273 unsigned int retrans = nd->nd_retrans(ifp); 274 unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries; 275 276 xtick = retrans; 277 while (attempts-- != 0) { 278 xtick *= nd->nd_retransmultiple; 279 if (xtick > nd->nd_maxretrans || xtick < retrans) { 280 xtick = nd->nd_maxretrans; 281 break; 282 } 283 } 284 xtick = xtick * hz / 1000; 285 break; 286 } 287 case ND_TIMER_REACHABLE: 288 xtick = nd->nd_reachable(ifp) * hz / 1000; 289 break; 290 case ND_TIMER_EXPIRE: 291 if (ln->ln_expire > time_uptime) 292 xtick = (ln->ln_expire - time_uptime) * hz; 293 else 294 xtick = nd_gctimer * hz; 295 break; 296 case ND_TIMER_DELAY: 297 xtick = nd->nd_delay * hz; 298 break; 299 case ND_TIMER_GC: 300 xtick = nd_gctimer * hz; 301 break; 302 default: 303 panic("%s: invalid timer type\n", __func__); 304 } 305 306 nd_set_timertick(ln, xtick); 307 } 308 309 int 310 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m, 311 uint8_t *lldst, size_t dstsize) 312 { 313 struct ifnet *ifp; 314 struct nd_domain *nd; 315 int error; 316 317 LLE_WLOCK_ASSERT(ln); 318 319 ifp = ln->lle_tbl->llt_ifp; 320 nd = nd_find_domain(ln->lle_tbl->llt_af); 321 322 /* We don't have to do link-layer address resolution on a p2p link. */ 323 if (ifp->if_flags & IFF_POINTOPOINT && 324 ln->ln_state < ND_LLINFO_REACHABLE) 325 { 326 ln->ln_state = ND_LLINFO_STALE; 327 nd_set_timer(ln, ND_TIMER_GC); 328 } 329 330 /* 331 * The first time we send a packet to a neighbor whose entry is 332 * STALE, we have to change the state to DELAY and a sets a timer to 333 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do 334 * neighbor unreachability detection on expiration. 335 * (RFC 2461 7.3.3) 336 */ 337 if (ln->ln_state == ND_LLINFO_STALE) { 338 ln->ln_asked = 0; 339 ln->ln_state = ND_LLINFO_DELAY; 340 nd_set_timer(ln, ND_TIMER_DELAY); 341 } 342 343 /* 344 * If the neighbor cache entry has a state other than INCOMPLETE 345 * (i.e. its link-layer address is already resolved), just 346 * send the packet. 347 */ 348 if (ln->ln_state > ND_LLINFO_INCOMPLETE) { 349 KASSERT((ln->la_flags & LLE_VALID) != 0); 350 memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen)); 351 LLE_WUNLOCK(ln); 352 return 0; 353 } 354 355 /* 356 * There is a neighbor cache entry, but no ethernet address 357 * response yet. Append this latest packet to the end of the 358 * packet queue in the mbuf, unless the number of the packet 359 * does not exceed maxqueuelen. When it exceeds maxqueuelen, 360 * the oldest packet in the queue will be removed. 361 */ 362 if (ln->ln_state == ND_LLINFO_NOSTATE || 363 ln->ln_state == ND_LLINFO_WAITDELETE) 364 ln->ln_state = ND_LLINFO_INCOMPLETE; 365 366 #ifdef MBUFTRACE 367 m_claimm(m, ln->lle_tbl->llt_mowner); 368 #endif 369 if (ln->ln_hold != NULL) { 370 struct mbuf *m_hold; 371 int i; 372 373 i = 0; 374 for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) { 375 i++; 376 if (m_hold->m_nextpkt == NULL) { 377 m_hold->m_nextpkt = m; 378 break; 379 } 380 } 381 KASSERTMSG(ln->la_numheld == i, "la_numheld=%d i=%d", 382 ln->la_numheld, i); 383 while (i >= nd->nd_maxqueuelen) { 384 m_hold = ln->ln_hold; 385 ln->ln_hold = ln->ln_hold->m_nextpkt; 386 m_freem(m_hold); 387 i--; 388 ln->la_numheld--; 389 } 390 } else { 391 KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d", 392 ln->la_numheld); 393 ln->ln_hold = m; 394 } 395 396 KASSERTMSG(ln->la_numheld < nd->nd_maxqueuelen, 397 "la_numheld=%d nd_maxqueuelen=%d", 398 ln->la_numheld, nd->nd_maxqueuelen); 399 ln->la_numheld++; 400 401 if (ln->ln_asked >= nd->nd_mmaxtries) 402 error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ? 403 EHOSTUNREACH : EHOSTDOWN; 404 else 405 error = EWOULDBLOCK; 406 407 /* 408 * If there has been no NS for the neighbor after entering the 409 * INCOMPLETE state, send the first solicitation. 410 */ 411 if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) { 412 struct psref psref; 413 union l3addr dst, src, *psrc; 414 415 ln->ln_asked++; 416 nd_set_timer(ln, ND_TIMER_RETRANS); 417 memcpy(&dst, &ln->r_l3addr, sizeof(dst)); 418 psrc = nd->nd_holdsrc(ln, &src); 419 if_acquire(ifp, &psref); 420 LLE_WUNLOCK(ln); 421 422 nd->nd_output(ifp, NULL, &dst, NULL, psrc); 423 if_release(ifp, &psref); 424 } else 425 LLE_WUNLOCK(ln); 426 427 return error; 428 } 429 430 void 431 nd_nud_hint(struct llentry *ln) 432 { 433 struct nd_domain *nd; 434 435 if (ln == NULL) 436 return; 437 438 LLE_WLOCK_ASSERT(ln); 439 440 if (ln->ln_state < ND_LLINFO_REACHABLE) 441 goto done; 442 443 nd = nd_find_domain(ln->lle_tbl->llt_af); 444 445 /* 446 * if we get upper-layer reachability confirmation many times, 447 * it is possible we have false information. 448 */ 449 ln->ln_byhint++; 450 if (ln->ln_byhint > nd->nd_maxnudhint) 451 goto done; 452 453 ln->ln_state = ND_LLINFO_REACHABLE; 454 if (!ND_IS_LLINFO_PERMANENT(ln)) 455 nd_set_timer(ln, ND_TIMER_REACHABLE); 456 457 done: 458 LLE_WUNLOCK(ln); 459 460 return; 461 } 462 463 static struct nd_domain * 464 nd_find_domain(int af) 465 { 466 467 KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL); 468 return nd_domains[af]; 469 } 470 471 void 472 nd_attach_domain(struct nd_domain *nd) 473 { 474 475 KASSERT(nd->nd_family < __arraycount(nd_domains)); 476 nd_domains[nd->nd_family] = nd; 477 } 478