1 /* $NetBSD: nd.c,v 1.4 2020/09/15 23:40:03 roy Exp $ */ 2 3 /* 4 * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Roy Marples. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.4 2020/09/15 23:40:03 roy Exp $"); 32 33 #include <sys/callout.h> 34 #include <sys/mbuf.h> 35 #include <sys/socketvar.h> /* for softnet_lock */ 36 37 #include <net/if_llatbl.h> 38 #include <net/nd.h> 39 #include <net/route.h> 40 41 #include <netinet/in.h> 42 #include <netinet/ip6.h> 43 44 static struct nd_domain *nd_domains[AF_MAX]; 45 46 static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */ 47 48 static void nd_set_timertick(struct llentry *, time_t); 49 static struct nd_domain *nd_find_domain(int); 50 51 static void 52 nd_timer(void *arg) 53 { 54 struct llentry *ln = arg; 55 struct nd_domain *nd; 56 struct ifnet *ifp = NULL; 57 struct psref psref; 58 struct mbuf *m = NULL; 59 bool send_ns = false; 60 int16_t missed = ND_LLINFO_NOSTATE; 61 union l3addr taddr, *daddrp = NULL; 62 63 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE(); 64 LLE_WLOCK(ln); 65 66 if (!(ln->la_flags & LLE_LINKED)) 67 goto out; 68 if (ln->ln_ntick > 0) { 69 nd_set_timer(ln, ND_TIMER_TICK); 70 goto out; 71 } 72 73 nd = nd_find_domain(ln->lle_tbl->llt_af); 74 ifp = ln->lle_tbl->llt_ifp; 75 KASSERT(ifp != NULL); 76 if_acquire(ifp, &psref); 77 78 memcpy(&taddr, &ln->r_l3addr, sizeof(taddr)); 79 80 switch (ln->ln_state) { 81 case ND_LLINFO_WAITDELETE: 82 LLE_REMREF(ln); 83 nd->nd_free(ln, 0); 84 ln = NULL; 85 break; 86 87 case ND_LLINFO_INCOMPLETE: 88 send_ns = true; 89 if (ln->ln_asked++ < nd->nd_mmaxtries) 90 break; 91 92 if (ln->ln_hold) { 93 struct mbuf *m0, *mnxt; 94 95 /* 96 * Assuming every packet in ln_hold 97 * has the same IP header. 98 */ 99 m = ln->ln_hold; 100 for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) { 101 mnxt = m0->m_nextpkt; 102 m0->m_nextpkt = NULL; 103 m_freem(m0); 104 } 105 106 m->m_nextpkt = NULL; 107 ln->ln_hold = NULL; 108 } 109 110 missed = ND_LLINFO_INCOMPLETE; 111 ln->ln_state = ND_LLINFO_WAITDELETE; 112 break; 113 114 case ND_LLINFO_REACHABLE: 115 if (!ND_IS_LLINFO_PERMANENT(ln)) { 116 ln->ln_state = ND_LLINFO_STALE; 117 nd_set_timer(ln, ND_TIMER_GC); 118 } 119 break; 120 121 case ND_LLINFO_PURGE: /* FALLTHROUGH */ 122 case ND_LLINFO_STALE: 123 if (!ND_IS_LLINFO_PERMANENT(ln)) { 124 LLE_REMREF(ln); 125 nd->nd_free(ln, 1); 126 ln = NULL; 127 } 128 break; 129 130 case ND_LLINFO_DELAY: 131 if (nd->nd_nud_enabled(ifp)) { 132 ln->ln_asked = 1; 133 ln->ln_state = ND_LLINFO_PROBE; 134 send_ns = true; 135 daddrp = &taddr; 136 } else { 137 ln->ln_state = ND_LLINFO_STALE; 138 nd_set_timer(ln, ND_TIMER_GC); 139 } 140 break; 141 142 case ND_LLINFO_PROBE: 143 send_ns = true; 144 if (ln->ln_asked++ < nd->nd_umaxtries) { 145 daddrp = &taddr; 146 } else { 147 ln->ln_state = ND_LLINFO_UNREACHABLE; 148 ln->ln_asked = 1; 149 missed = ND_LLINFO_PROBE; 150 /* nd_missed() consumers can use missed to know if 151 * they need to send ICMP UNREACHABLE or not. */ 152 } 153 break; 154 case ND_LLINFO_UNREACHABLE: 155 /* 156 * RFC 7048 Section 3 says in the UNREACHABLE state 157 * packets continue to be sent to the link-layer address and 158 * then backoff exponentially. 159 * We adjust this slightly and move to the INCOMPLETE state 160 * after nd_mmaxtries probes and then start backing off. 161 * 162 * This results in simpler code whilst providing a more robust 163 * model which doubles the time to failure over what we did 164 * before. We don't want to be back to the old ARP model where 165 * no unreachability errors are returned because very 166 * few applications would look at unreachability hints provided 167 * such as ND_LLINFO_UNREACHABLE or RTM_MISS. 168 */ 169 send_ns = true; 170 if (ln->ln_asked++ < nd->nd_mmaxtries) 171 break; 172 173 missed = ND_LLINFO_UNREACHABLE; 174 ln->ln_state = ND_LLINFO_WAITDELETE; 175 ln->la_flags &= ~LLE_VALID; 176 break; 177 } 178 179 if (send_ns) { 180 uint8_t lladdr[255], *lladdrp; 181 union l3addr src, *psrc; 182 183 if (ln->ln_state == ND_LLINFO_WAITDELETE) 184 nd_set_timer(ln, ND_TIMER_RETRANS_BACKOFF); 185 else 186 nd_set_timer(ln, ND_TIMER_RETRANS); 187 if (ln->ln_state > ND_LLINFO_INCOMPLETE && 188 ln->la_flags & LLE_VALID) 189 { 190 KASSERT(sizeof(lladdr) >= ifp->if_addrlen); 191 memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen); 192 lladdrp = lladdr; 193 } else 194 lladdrp = NULL; 195 psrc = nd->nd_holdsrc(ln, &src); 196 LLE_FREE_LOCKED(ln); 197 ln = NULL; 198 nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc); 199 } 200 201 out: 202 if (ln != NULL) 203 LLE_FREE_LOCKED(ln); 204 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 205 206 if (missed != ND_LLINFO_NOSTATE) 207 nd->nd_missed(ifp, &taddr, missed, m); 208 if (ifp != NULL) 209 if_release(ifp, &psref); 210 } 211 212 static void 213 nd_set_timertick(struct llentry *ln, time_t xtick) 214 { 215 216 CTASSERT(sizeof(time_t) > sizeof(int)); 217 KASSERT(xtick >= 0); 218 219 /* 220 * We have to take care of a reference leak which occurs if 221 * callout_reset overwrites a pending callout schedule. Unfortunately 222 * we don't have a mean to know the overwrite, so we need to know it 223 * using callout_stop. We need to call callout_pending first to exclude 224 * the case that the callout has never been scheduled. 225 */ 226 if (callout_pending(&ln->la_timer)) { 227 bool expired; 228 229 expired = callout_stop(&ln->la_timer); 230 if (!expired) 231 LLE_REMREF(ln); 232 } 233 234 ln->ln_expire = time_uptime + xtick / hz; 235 LLE_ADDREF(ln); 236 if (xtick > INT_MAX) { 237 ln->ln_ntick = xtick - INT_MAX; 238 xtick = INT_MAX; 239 } else { 240 ln->ln_ntick = 0; 241 } 242 callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln); 243 } 244 245 void 246 nd_set_timer(struct llentry *ln, int type) 247 { 248 time_t xtick; 249 struct ifnet *ifp; 250 struct nd_domain *nd; 251 252 LLE_WLOCK_ASSERT(ln); 253 254 ifp = ln->lle_tbl->llt_ifp; 255 nd = nd_find_domain(ln->lle_tbl->llt_af); 256 257 switch (type) { 258 case ND_TIMER_IMMEDIATE: 259 xtick = 0; 260 break; 261 case ND_TIMER_TICK: 262 xtick = ln->ln_ntick; 263 break; 264 case ND_TIMER_RETRANS: 265 xtick = nd->nd_retrans(ifp) * hz / 1000; 266 break; 267 case ND_TIMER_RETRANS_BACKOFF: 268 { 269 unsigned int retrans = nd->nd_retrans(ifp); 270 unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries; 271 272 xtick = retrans; 273 while (attempts-- != 0) { 274 xtick *= nd->nd_retransmultiple; 275 if (xtick > nd->nd_maxretrans || xtick < retrans) { 276 xtick = nd->nd_maxretrans; 277 break; 278 } 279 } 280 xtick = xtick * hz / 1000; 281 break; 282 } 283 case ND_TIMER_REACHABLE: 284 xtick = nd->nd_reachable(ifp) * hz / 1000; 285 break; 286 case ND_TIMER_EXPIRE: 287 if (ln->ln_expire > time_uptime) 288 xtick = (ln->ln_expire - time_uptime) * hz; 289 else 290 xtick = nd_gctimer * hz; 291 break; 292 case ND_TIMER_DELAY: 293 xtick = nd->nd_delay * hz; 294 break; 295 case ND_TIMER_GC: 296 xtick = nd_gctimer * hz; 297 break; 298 default: 299 panic("%s: invalid timer type\n", __func__); 300 } 301 302 nd_set_timertick(ln, xtick); 303 } 304 305 int 306 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m, 307 uint8_t *lldst, size_t dstsize) 308 { 309 struct ifnet *ifp; 310 struct nd_domain *nd; 311 int error; 312 313 LLE_WLOCK_ASSERT(ln); 314 315 ifp = ln->lle_tbl->llt_ifp; 316 nd = nd_find_domain(ln->lle_tbl->llt_af); 317 318 /* We don't have to do link-layer address resolution on a p2p link. */ 319 if (ifp->if_flags & IFF_POINTOPOINT && 320 ln->ln_state < ND_LLINFO_REACHABLE) 321 { 322 ln->ln_state = ND_LLINFO_STALE; 323 nd_set_timer(ln, ND_TIMER_GC); 324 } 325 326 /* 327 * The first time we send a packet to a neighbor whose entry is 328 * STALE, we have to change the state to DELAY and a sets a timer to 329 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do 330 * neighbor unreachability detection on expiration. 331 * (RFC 2461 7.3.3) 332 */ 333 if (ln->ln_state == ND_LLINFO_STALE) { 334 ln->ln_asked = 0; 335 ln->ln_state = ND_LLINFO_DELAY; 336 nd_set_timer(ln, ND_TIMER_DELAY); 337 } 338 339 /* 340 * If the neighbor cache entry has a state other than INCOMPLETE 341 * (i.e. its link-layer address is already resolved), just 342 * send the packet. 343 */ 344 if (ln->ln_state > ND_LLINFO_INCOMPLETE) { 345 KASSERT((ln->la_flags & LLE_VALID) != 0); 346 memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen)); 347 LLE_WUNLOCK(ln); 348 return 0; 349 } 350 351 /* 352 * There is a neighbor cache entry, but no ethernet address 353 * response yet. Append this latest packet to the end of the 354 * packet queue in the mbuf, unless the number of the packet 355 * does not exceed maxqueuelen. When it exceeds maxqueuelen, 356 * the oldest packet in the queue will be removed. 357 */ 358 if (ln->ln_state == ND_LLINFO_NOSTATE || 359 ln->ln_state == ND_LLINFO_WAITDELETE) 360 ln->ln_state = ND_LLINFO_INCOMPLETE; 361 362 if (ln->ln_hold != NULL) { 363 struct mbuf *m_hold; 364 int i; 365 366 i = 0; 367 for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) { 368 i++; 369 if (m_hold->m_nextpkt == NULL) { 370 m_hold->m_nextpkt = m; 371 break; 372 } 373 } 374 while (i >= nd->nd_maxqueuelen) { 375 m_hold = ln->ln_hold; 376 ln->ln_hold = ln->ln_hold->m_nextpkt; 377 m_freem(m_hold); 378 i--; 379 } 380 } else 381 ln->ln_hold = m; 382 383 if (ln->ln_asked >= nd->nd_mmaxtries) 384 error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ? 385 EHOSTUNREACH : EHOSTDOWN; 386 else 387 error = EWOULDBLOCK; 388 389 /* 390 * If there has been no NS for the neighbor after entering the 391 * INCOMPLETE state, send the first solicitation. 392 */ 393 if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) { 394 struct psref psref; 395 union l3addr dst, src, *psrc; 396 397 ln->ln_asked++; 398 nd_set_timer(ln, ND_TIMER_RETRANS); 399 memcpy(&dst, &ln->r_l3addr, sizeof(dst)); 400 psrc = nd->nd_holdsrc(ln, &src); 401 if_acquire(ifp, &psref); 402 LLE_WUNLOCK(ln); 403 404 nd->nd_output(ifp, NULL, &dst, NULL, psrc); 405 if_release(ifp, &psref); 406 } else 407 LLE_WUNLOCK(ln); 408 409 return error; 410 } 411 412 void 413 nd_nud_hint(struct llentry *ln) 414 { 415 struct nd_domain *nd; 416 417 if (ln == NULL) 418 return; 419 420 LLE_WLOCK_ASSERT(ln); 421 422 if (ln->ln_state < ND_LLINFO_REACHABLE) 423 goto done; 424 425 nd = nd_find_domain(ln->lle_tbl->llt_af); 426 427 /* 428 * if we get upper-layer reachability confirmation many times, 429 * it is possible we have false information. 430 */ 431 ln->ln_byhint++; 432 if (ln->ln_byhint > nd->nd_maxnudhint) 433 goto done; 434 435 ln->ln_state = ND_LLINFO_REACHABLE; 436 if (!ND_IS_LLINFO_PERMANENT(ln)) 437 nd_set_timer(ln, ND_TIMER_REACHABLE); 438 439 done: 440 LLE_WUNLOCK(ln); 441 442 return; 443 } 444 445 static struct nd_domain * 446 nd_find_domain(int af) 447 { 448 449 KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL); 450 return nd_domains[af]; 451 } 452 453 void 454 nd_attach_domain(struct nd_domain *nd) 455 { 456 457 KASSERT(nd->nd_family < __arraycount(nd_domains)); 458 nd_domains[nd->nd_family] = nd; 459 } 460