1 /* $OpenBSD: radix.c,v 1.56 2017/01/24 10:08:30 krw Exp $ */ 2 /* $NetBSD: radix.c,v 1.20 2003/08/07 16:32:56 agc Exp $ */ 3 4 /* 5 * Copyright (c) 1988, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)radix.c 8.6 (Berkeley) 10/17/95 33 */ 34 35 /* 36 * Routines to build and maintain radix trees for routing lookups. 37 */ 38 39 #ifndef _KERNEL 40 #include "kern_compat.h" 41 #else 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/malloc.h> 45 #include <sys/syslog.h> 46 #include <sys/pool.h> 47 #endif 48 49 #include <net/radix.h> 50 51 static unsigned int max_keylen; 52 struct radix_node_head *mask_rnhead; 53 static char *addmask_key; 54 static char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1}; 55 static char *rn_zeros, *rn_ones; 56 57 struct pool rtmask_pool; /* pool for radix_mask structures */ 58 59 #define rn_masktop (mask_rnhead->rnh_treetop) 60 61 static inline int rn_satisfies_leaf(char *, struct radix_node *, int); 62 static inline int rn_lexobetter(void *, void *); 63 static inline struct radix_mask *rn_new_radix_mask(struct radix_node *, 64 struct radix_mask *); 65 66 struct radix_node *rn_insert(void *, struct radix_node_head *, int *, 67 struct radix_node [2]); 68 struct radix_node *rn_newpair(void *, int, struct radix_node[2]); 69 70 static inline struct radix_node *rn_search(void *, struct radix_node *); 71 struct radix_node *rn_search_m(void *, struct radix_node *, void *); 72 int rn_add_dupedkey(struct radix_node *, struct radix_node_head *, 73 struct radix_node [2], u_int8_t); 74 void rn_fixup_nodes(struct radix_node *); 75 static inline struct radix_node *rn_lift_node(struct radix_node *); 76 void rn_add_radix_mask(struct radix_node *, int); 77 int rn_del_radix_mask(struct radix_node *); 78 static inline void rn_swap_nodes(struct radix_node *, struct radix_node *); 79 80 /* 81 * The data structure for the keys is a radix tree with one way 82 * branching removed. The index rn_b at an internal node n represents a bit 83 * position to be tested. The tree is arranged so that all descendants 84 * of a node n have keys whose bits all agree up to position rn_b - 1. 85 * (We say the index of n is rn_b.) 86 * 87 * There is at least one descendant which has a one bit at position rn_b, 88 * and at least one with a zero there. 89 * 90 * A route is determined by a pair of key and mask. We require that the 91 * bit-wise logical and of the key and mask to be the key. 92 * We define the index of a route to associated with the mask to be 93 * the first bit number in the mask where 0 occurs (with bit number 0 94 * representing the highest order bit). 95 * 96 * We say a mask is normal if every bit is 0, past the index of the mask. 97 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b, 98 * and m is a normal mask, then the route applies to every descendant of n. 99 * If the index(m) < rn_b, this implies the trailing last few bits of k 100 * before bit b are all 0, (and hence consequently true of every descendant 101 * of n), so the route applies to all descendants of the node as well. 102 * 103 * Similar logic shows that a non-normal mask m such that 104 * index(m) <= index(n) could potentially apply to many children of n. 105 * Thus, for each non-host route, we attach its mask to a list at an internal 106 * node as high in the tree as we can go. 107 * 108 * The present version of the code makes use of normal routes in short- 109 * circuiting an explicit mask and compare operation when testing whether 110 * a key satisfies a normal route, and also in remembering the unique leaf 111 * that governs a subtree. 112 */ 113 114 static inline struct radix_node * 115 rn_search(void *v_arg, struct radix_node *head) 116 { 117 struct radix_node *x = head; 118 caddr_t v = v_arg; 119 120 while (x->rn_b >= 0) { 121 if (x->rn_bmask & v[x->rn_off]) 122 x = x->rn_r; 123 else 124 x = x->rn_l; 125 } 126 return (x); 127 } 128 129 struct radix_node * 130 rn_search_m(void *v_arg, struct radix_node *head, void *m_arg) 131 { 132 struct radix_node *x = head; 133 caddr_t v = v_arg; 134 caddr_t m = m_arg; 135 136 while (x->rn_b >= 0) { 137 if ((x->rn_bmask & m[x->rn_off]) && 138 (x->rn_bmask & v[x->rn_off])) 139 x = x->rn_r; 140 else 141 x = x->rn_l; 142 } 143 return x; 144 } 145 146 int 147 rn_refines(void *m_arg, void *n_arg) 148 { 149 caddr_t m = m_arg; 150 caddr_t n = n_arg; 151 caddr_t lim, lim2; 152 int longer; 153 int masks_are_equal = 1; 154 155 lim2 = lim = n + *(u_char *)n; 156 longer = (*(u_char *)n++) - (int)(*(u_char *)m++); 157 if (longer > 0) 158 lim -= longer; 159 while (n < lim) { 160 if (*n & ~(*m)) 161 return 0; 162 if (*n++ != *m++) 163 masks_are_equal = 0; 164 } 165 while (n < lim2) 166 if (*n++) 167 return 0; 168 if (masks_are_equal && (longer < 0)) 169 for (lim2 = m - longer; m < lim2; ) 170 if (*m++) 171 return 1; 172 return (!masks_are_equal); 173 } 174 175 /* return a perfect match if m_arg is set, else do a regular rn_match */ 176 struct radix_node * 177 rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head) 178 { 179 struct radix_node *x, *tm; 180 caddr_t netmask = 0; 181 182 if (m_arg) { 183 tm = rn_addmask(m_arg, 1, head->rnh_treetop->rn_off); 184 if (tm == NULL) 185 return (NULL); 186 netmask = tm->rn_key; 187 } 188 x = rn_match(v_arg, head); 189 if (x && netmask) { 190 while (x && x->rn_mask != netmask) 191 x = x->rn_dupedkey; 192 } 193 /* Never return internal nodes to the upper layer. */ 194 if (x && (x->rn_flags & RNF_ROOT)) 195 return (NULL); 196 return x; 197 } 198 199 static inline int 200 rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip) 201 { 202 char *cp = trial; 203 char *cp2 = leaf->rn_key; 204 char *cp3 = leaf->rn_mask; 205 char *cplim; 206 int length; 207 208 length = min(*(u_char *)cp, *(u_char *)cp2); 209 if (cp3 == NULL) 210 cp3 = rn_ones; 211 else 212 length = min(length, *(u_char *)cp3); 213 cplim = cp + length; 214 cp += skip; 215 cp2 += skip; 216 cp3 += skip; 217 while (cp < cplim) { 218 if ((*cp ^ *cp2) & *cp3) 219 return 0; 220 cp++, cp2++, cp3++; 221 } 222 return 1; 223 } 224 225 struct radix_node * 226 rn_match(void *v_arg, struct radix_node_head *head) 227 { 228 caddr_t v = v_arg; 229 caddr_t cp, cp2, cplim; 230 struct radix_node *top = head->rnh_treetop; 231 struct radix_node *saved_t, *t; 232 int off = top->rn_off; 233 int vlen, matched_off; 234 int test, b, rn_b; 235 236 t = rn_search(v, top); 237 /* 238 * See if we match exactly as a host destination 239 * or at least learn how many bits match, for normal mask finesse. 240 * 241 * It doesn't hurt us to limit how many bytes to check 242 * to the length of the mask, since if it matches we had a genuine 243 * match and the leaf we have is the most specific one anyway; 244 * if it didn't match with a shorter length it would fail 245 * with a long one. This wins big for class B&C netmasks which 246 * are probably the most common case... 247 */ 248 if (t->rn_mask) 249 vlen = *(u_char *)t->rn_mask; 250 else 251 vlen = *(u_char *)v; 252 cp = v + off; 253 cp2 = t->rn_key + off; 254 cplim = v + vlen; 255 for (; cp < cplim; cp++, cp2++) 256 if (*cp != *cp2) 257 goto on1; 258 /* 259 * This extra grot is in case we are explicitly asked 260 * to look up the default. Ugh! 261 */ 262 if (t->rn_flags & RNF_ROOT) 263 t = t->rn_dupedkey; 264 265 KASSERT(t == NULL || (t->rn_flags & RNF_ROOT) == 0); 266 return t; 267 on1: 268 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ 269 for (b = 7; (test >>= 1) > 0;) 270 b--; 271 matched_off = cp - v; 272 b += matched_off << 3; 273 rn_b = -1 - b; 274 /* 275 * If there is a host route in a duped-key chain, it will be first. 276 */ 277 saved_t = t; 278 if (t->rn_mask == NULL) 279 t = t->rn_dupedkey; 280 for (; t; t = t->rn_dupedkey) 281 /* 282 * Even if we don't match exactly as a host, 283 * we may match if the leaf we wound up at is 284 * a route to a net. 285 */ 286 if (t->rn_flags & RNF_NORMAL) { 287 if (rn_b <= t->rn_b) { 288 KASSERT((t->rn_flags & RNF_ROOT) == 0); 289 return t; 290 } 291 } else if (rn_satisfies_leaf(v, t, matched_off)) { 292 KASSERT((t->rn_flags & RNF_ROOT) == 0); 293 return t; 294 } 295 t = saved_t; 296 /* start searching up the tree */ 297 do { 298 struct radix_mask *m; 299 t = t->rn_p; 300 m = t->rn_mklist; 301 while (m) { 302 /* 303 * If non-contiguous masks ever become important 304 * we can restore the masking and open coding of 305 * the search and satisfaction test and put the 306 * calculation of "off" back before the "do". 307 */ 308 if (m->rm_flags & RNF_NORMAL) { 309 if (rn_b <= m->rm_b) { 310 KASSERT((m->rm_leaf->rn_flags & 311 RNF_ROOT) == 0); 312 return (m->rm_leaf); 313 } 314 } else { 315 struct radix_node *x; 316 off = min(t->rn_off, matched_off); 317 x = rn_search_m(v, t, m->rm_mask); 318 while (x && x->rn_mask != m->rm_mask) 319 x = x->rn_dupedkey; 320 if (x && rn_satisfies_leaf(v, x, off)) { 321 KASSERT((x->rn_flags & RNF_ROOT) == 0); 322 return x; 323 } 324 } 325 m = m->rm_mklist; 326 } 327 } while (t != top); 328 return NULL; 329 } 330 331 struct radix_node * 332 rn_newpair(void *v, int b, struct radix_node nodes[2]) 333 { 334 struct radix_node *tt = nodes, *t = nodes + 1; 335 t->rn_b = b; 336 t->rn_bmask = 0x80 >> (b & 7); 337 t->rn_l = tt; 338 t->rn_off = b >> 3; 339 tt->rn_b = -1; 340 tt->rn_key = v; 341 tt->rn_p = t; 342 tt->rn_flags = t->rn_flags = RNF_ACTIVE; 343 return t; 344 } 345 346 struct radix_node * 347 rn_insert(void *v_arg, struct radix_node_head *head, 348 int *dupentry, struct radix_node nodes[2]) 349 { 350 caddr_t v = v_arg; 351 struct radix_node *top = head->rnh_treetop; 352 struct radix_node *t, *tt; 353 int off = top->rn_off; 354 int b; 355 356 t = rn_search(v_arg, top); 357 /* 358 * Find first bit at which v and t->rn_key differ 359 */ 360 { 361 caddr_t cp, cp2, cplim; 362 int vlen, cmp_res; 363 364 vlen = *(u_char *)v; 365 cp = v + off; 366 cp2 = t->rn_key + off; 367 cplim = v + vlen; 368 369 while (cp < cplim) 370 if (*cp2++ != *cp++) 371 goto on1; 372 *dupentry = 1; 373 return t; 374 on1: 375 *dupentry = 0; 376 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; 377 for (b = (cp - v) << 3; cmp_res; b--) 378 cmp_res >>= 1; 379 } 380 { 381 struct radix_node *p, *x = top; 382 caddr_t cp = v; 383 do { 384 p = x; 385 if (cp[x->rn_off] & x->rn_bmask) 386 x = x->rn_r; 387 else 388 x = x->rn_l; 389 } while (b > (unsigned int) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */ 390 t = rn_newpair(v_arg, b, nodes); 391 tt = t->rn_l; 392 if ((cp[p->rn_off] & p->rn_bmask) == 0) 393 p->rn_l = t; 394 else 395 p->rn_r = t; 396 x->rn_p = t; 397 t->rn_p = p; /* frees x, p as temp vars below */ 398 if ((cp[t->rn_off] & t->rn_bmask) == 0) { 399 t->rn_r = x; 400 } else { 401 t->rn_r = tt; 402 t->rn_l = x; 403 } 404 } 405 return (tt); 406 } 407 408 struct radix_node * 409 rn_addmask(void *n_arg, int search, int skip) 410 { 411 caddr_t netmask = n_arg; 412 struct radix_node *tm, *saved_tm; 413 caddr_t cp, cplim; 414 int b = 0, mlen, j; 415 int maskduplicated, m0, isnormal; 416 static int last_zeroed = 0; 417 418 if ((mlen = *(u_char *)netmask) > max_keylen) 419 mlen = max_keylen; 420 if (skip == 0) 421 skip = 1; 422 if (mlen <= skip) 423 return (mask_rnhead->rnh_nodes); /* rn_zero root node */ 424 if (skip > 1) 425 memcpy(addmask_key + 1, rn_ones + 1, skip - 1); 426 if ((m0 = mlen) > skip) 427 memcpy(addmask_key + skip, netmask + skip, mlen - skip); 428 /* 429 * Trim trailing zeroes. 430 */ 431 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) 432 cp--; 433 mlen = cp - addmask_key; 434 if (mlen <= skip) { 435 if (m0 >= last_zeroed) 436 last_zeroed = mlen; 437 return (mask_rnhead->rnh_nodes); 438 } 439 if (m0 < last_zeroed) 440 memset(addmask_key + m0, 0, last_zeroed - m0); 441 *addmask_key = last_zeroed = mlen; 442 tm = rn_search(addmask_key, rn_masktop); 443 if (memcmp(addmask_key, tm->rn_key, mlen) != 0) 444 tm = NULL; 445 if (tm || search) 446 return (tm); 447 tm = malloc(max_keylen + 2 * sizeof (*tm), M_RTABLE, M_NOWAIT | M_ZERO); 448 if (tm == NULL) 449 return (0); 450 saved_tm = tm; 451 netmask = cp = (caddr_t)(tm + 2); 452 memcpy(cp, addmask_key, mlen); 453 tm = rn_insert(cp, mask_rnhead, &maskduplicated, tm); 454 if (maskduplicated) { 455 log(LOG_ERR, "rn_addmask: mask impossibly already in tree\n"); 456 free(saved_tm, M_RTABLE, 0); 457 return (tm); 458 } 459 /* 460 * Calculate index of mask, and check for normalcy. 461 */ 462 cplim = netmask + mlen; 463 isnormal = 1; 464 for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) 465 cp++; 466 if (cp != cplim) { 467 for (j = 0x80; (j & *cp) != 0; j >>= 1) 468 b++; 469 if (*cp != normal_chars[b] || cp != (cplim - 1)) 470 isnormal = 0; 471 } 472 b += (cp - netmask) << 3; 473 tm->rn_b = -1 - b; 474 if (isnormal) 475 tm->rn_flags |= RNF_NORMAL; 476 return (tm); 477 } 478 479 /* rn_lexobetter: return a arbitrary ordering for non-contiguous masks */ 480 static inline int 481 rn_lexobetter(void *m_arg, void *n_arg) 482 { 483 u_char *mp = m_arg, *np = n_arg; 484 485 /* 486 * Longer masks might not really be lexicographically better, 487 * but longer masks always have precedence since they must be checked 488 * first. The netmasks were normalized before calling this function and 489 * don't have unneeded trailing zeros. 490 */ 491 if (*mp > *np) 492 return 1; 493 if (*mp < *np) 494 return 0; 495 /* 496 * Must return the first difference between the masks 497 * to ensure deterministic sorting. 498 */ 499 return (memcmp(mp, np, *mp) > 0); 500 } 501 502 static inline struct radix_mask * 503 rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next) 504 { 505 struct radix_mask *m; 506 507 m = pool_get(&rtmask_pool, PR_NOWAIT | PR_ZERO); 508 if (m == NULL) { 509 log(LOG_ERR, "Mask for route not entered\n"); 510 return (0); 511 } 512 m->rm_b = tt->rn_b; 513 m->rm_flags = tt->rn_flags; 514 if (tt->rn_flags & RNF_NORMAL) 515 m->rm_leaf = tt; 516 else 517 m->rm_mask = tt->rn_mask; 518 m->rm_mklist = next; 519 tt->rn_mklist = m; 520 return m; 521 } 522 523 /* 524 * Find the point where the rn_mklist needs to be changed. 525 */ 526 static inline struct radix_node * 527 rn_lift_node(struct radix_node *t) 528 { 529 struct radix_node *x = t; 530 int b = -1 - t->rn_b; 531 532 /* rewind possible dupedkey list to head */ 533 while (t->rn_b < 0) 534 t = t->rn_p; 535 536 /* can't lift node above head of dupedkey list, give up */ 537 if (b > t->rn_b) 538 return (NULL); 539 540 do { 541 x = t; 542 t = t->rn_p; 543 } while (b <= t->rn_b && x != t); 544 545 return (x); 546 } 547 548 void 549 rn_add_radix_mask(struct radix_node *tt, int keyduplicated) 550 { 551 caddr_t netmask, mmask; 552 struct radix_node *x; 553 struct radix_mask *m, **mp; 554 int b_leaf = tt->rn_b; 555 556 /* Add new route to highest possible ancestor's list */ 557 if (tt->rn_mask == NULL) 558 return; /* can't lift at all */ 559 x = rn_lift_node(tt); 560 if (x == NULL) 561 return; /* didn't lift either */ 562 563 /* 564 * Search through routes associated with node to 565 * insert new route according to index. 566 * Need same criteria as when sorting dupedkeys to avoid 567 * double loop on deletion. 568 */ 569 netmask = tt->rn_mask; 570 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) { 571 if (m->rm_b < b_leaf) 572 continue; 573 if (m->rm_b > b_leaf) 574 break; 575 if (m->rm_flags & RNF_NORMAL) { 576 if (keyduplicated) { 577 if (m->rm_leaf->rn_p == tt) 578 /* new route is better */ 579 m->rm_leaf = tt; 580 #ifdef DIAGNOSTIC 581 else { 582 struct radix_node *t; 583 584 for (t = m->rm_leaf; 585 t && t->rn_mklist == m; 586 t = t->rn_dupedkey) 587 if (t == tt) 588 break; 589 if (t == NULL) { 590 log(LOG_ERR, "Non-unique " 591 "normal route on dupedkey, " 592 "mask not entered\n"); 593 return; 594 } 595 } 596 #endif 597 m->rm_refs++; 598 tt->rn_mklist = m; 599 return; 600 } else if (tt->rn_flags & RNF_NORMAL) { 601 log(LOG_ERR, "Non-unique normal route," 602 " mask not entered\n"); 603 return; 604 } 605 mmask = m->rm_leaf->rn_mask; 606 } else 607 mmask = m->rm_mask; 608 if (mmask == netmask) { 609 m->rm_refs++; 610 tt->rn_mklist = m; 611 return; 612 } 613 if (rn_refines(netmask, mmask) || rn_lexobetter(netmask, mmask)) 614 break; 615 } 616 *mp = rn_new_radix_mask(tt, *mp); 617 } 618 619 int 620 rn_add_dupedkey(struct radix_node *saved_tt, struct radix_node_head *head, 621 struct radix_node *tt, u_int8_t prio) 622 { 623 caddr_t netmask = tt->rn_mask; 624 struct radix_node *x = saved_tt, *xp; 625 int before = -1; 626 int b_leaf = 0; 627 628 if (netmask) 629 b_leaf = tt->rn_b; 630 631 for (xp = x; x; xp = x, x = x->rn_dupedkey) { 632 if (x->rn_mask == netmask) 633 return (-1); 634 if (netmask == NULL || 635 (x->rn_mask && 636 ((b_leaf < x->rn_b) || /* index(netmask) > node */ 637 rn_refines(netmask, x->rn_mask) || 638 rn_lexobetter(netmask, x->rn_mask)))) 639 break; 640 } 641 /* 642 * If the mask is not duplicated, we wouldn't 643 * find it among possible duplicate key entries 644 * anyway, so the above test doesn't hurt. 645 * 646 * We sort the masks for a duplicated key the same way as 647 * in a masklist -- most specific to least specific. 648 * This may require the unfortunate nuisance of relocating 649 * the head of the list. 650 * 651 * We also reverse, or doubly link the list through the 652 * parent pointer. 653 */ 654 655 if ((x == saved_tt && before) || before == 1) 656 before = 1; 657 else 658 before = 0; 659 rn_link_dupedkey(tt, xp, before); 660 661 return (0); 662 } 663 664 /* 665 * Insert tt after x or in place of x if before is true. 666 */ 667 void 668 rn_link_dupedkey(struct radix_node *tt, struct radix_node *x, int before) 669 { 670 if (before) { 671 if (x->rn_p->rn_b > 0) { 672 /* link in at head of list */ 673 tt->rn_dupedkey = x; 674 tt->rn_flags = x->rn_flags; 675 tt->rn_p = x->rn_p; 676 x->rn_p = tt; 677 if (tt->rn_p->rn_l == x) 678 tt->rn_p->rn_l = tt; 679 else 680 tt->rn_p->rn_r = tt; 681 } else { 682 tt->rn_dupedkey = x; 683 x->rn_p->rn_dupedkey = tt; 684 tt->rn_p = x->rn_p; 685 x->rn_p = tt; 686 } 687 } else { 688 tt->rn_dupedkey = x->rn_dupedkey; 689 x->rn_dupedkey = tt; 690 tt->rn_p = x; 691 if (tt->rn_dupedkey) 692 tt->rn_dupedkey->rn_p = tt; 693 } 694 } 695 696 /* 697 * This function ensures that routes are properly promoted upwards. 698 * It adjusts the rn_mklist of the parent node to make sure overlapping 699 * routes can be found. 700 * 701 * There are two cases: 702 * - leaf nodes with possible rn_dupedkey list 703 * - internal nodes with maybe their own mklist 704 * If the mask of the route is bigger than the current branch bit then 705 * a rn_mklist entrie needs to be made. 706 */ 707 void 708 rn_fixup_nodes(struct radix_node *tt) 709 { 710 struct radix_node *tp, *x; 711 struct radix_mask *m, **mp; 712 int b_leaf; 713 714 tp = tt->rn_p; 715 if (tp->rn_r == tt) 716 x = tp->rn_l; 717 else 718 x = tp->rn_r; 719 720 b_leaf = -1 - tp->rn_b; 721 if (x->rn_b < 0) { /* x is a leaf node */ 722 struct radix_node *xx = NULL; 723 724 for (mp = &tp->rn_mklist; x; xx = x, x = x->rn_dupedkey) { 725 if (xx && xx->rn_mklist && xx->rn_mask == x->rn_mask && 726 x->rn_mklist == 0) { 727 /* multipath route */ 728 x->rn_mklist = xx->rn_mklist; 729 x->rn_mklist->rm_refs++; 730 } 731 if (x->rn_mask && (x->rn_b >= b_leaf) && 732 x->rn_mklist == 0) { 733 *mp = m = rn_new_radix_mask(x, 0); 734 if (m) 735 mp = &m->rm_mklist; 736 } 737 } 738 } else if (x->rn_mklist) { /* x is an internal node */ 739 /* 740 * Skip over masks whose index is > that of new node 741 */ 742 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) 743 if (m->rm_b >= b_leaf) 744 break; 745 tp->rn_mklist = m; 746 *mp = 0; 747 } 748 } 749 750 struct radix_node * 751 rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, 752 struct radix_node treenodes[2], u_int8_t prio) 753 { 754 caddr_t v = v_arg; 755 struct radix_node *top = head->rnh_treetop; 756 struct radix_node *tt, *saved_tt, *tm = NULL; 757 int keyduplicated; 758 759 /* 760 * In dealing with non-contiguous masks, there may be 761 * many different routes which have the same mask. 762 * We will find it useful to have a unique pointer to 763 * the mask to speed avoiding duplicate references at 764 * nodes and possibly save time in calculating indices. 765 */ 766 if (n_arg) { 767 if ((tm = rn_addmask(n_arg, 0, top->rn_off)) == 0) 768 return (0); 769 } 770 771 tt = rn_insert(v, head, &keyduplicated, treenodes); 772 773 if (keyduplicated) { 774 saved_tt = tt; 775 tt = treenodes; 776 777 tt->rn_key = v_arg; 778 tt->rn_b = -1; 779 tt->rn_flags = RNF_ACTIVE; 780 } 781 782 /* Put mask into the node. */ 783 if (tm) { 784 tt->rn_mask = tm->rn_key; 785 tt->rn_b = tm->rn_b; 786 tt->rn_flags |= tm->rn_flags & RNF_NORMAL; 787 } 788 789 /* Either insert into dupedkey list or as a leaf node. */ 790 if (keyduplicated) { 791 if (rn_add_dupedkey(saved_tt, head, tt, prio)) 792 return (NULL); 793 } else { 794 rn_fixup_nodes(tt); 795 } 796 797 /* finally insert a radix_mask element if needed */ 798 rn_add_radix_mask(tt, keyduplicated); 799 return (tt); 800 } 801 802 /* 803 * Cleanup mask list, tt points to route that needs to be cleaned 804 */ 805 int 806 rn_del_radix_mask(struct radix_node *tt) 807 { 808 struct radix_node *x; 809 struct radix_mask *m, *saved_m, **mp; 810 811 /* 812 * Cleanup mask list from possible references to this route. 813 */ 814 saved_m = m = tt->rn_mklist; 815 if (tt->rn_mask == NULL || m == NULL) 816 return (0); 817 818 if (tt->rn_flags & RNF_NORMAL) { 819 if (m->rm_leaf != tt && m->rm_refs == 0) { 820 log(LOG_ERR, "rn_delete: inconsistent normal " 821 "annotation\n"); 822 return (-1); 823 } 824 if (m->rm_leaf != tt) { 825 if (--m->rm_refs >= 0) 826 return (0); 827 else 828 log(LOG_ERR, "rn_delete: " 829 "inconsistent mklist refcount\n"); 830 } 831 /* 832 * If we end up here tt should be m->rm_leaf and therefor 833 * tt should be the head of a multipath chain. 834 * If this is not the case the table is no longer consistent. 835 */ 836 if (m->rm_refs > 0) { 837 if (tt->rn_dupedkey == NULL || 838 tt->rn_dupedkey->rn_mklist != m) { 839 log(LOG_ERR, "rn_delete: inconsistent " 840 "dupedkey list\n"); 841 return (-1); 842 } 843 m->rm_leaf = tt->rn_dupedkey; 844 --m->rm_refs; 845 return (0); 846 } 847 /* else tt is last and only route */ 848 } else { 849 if (m->rm_mask != tt->rn_mask) { 850 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 851 return (0); 852 } 853 if (--m->rm_refs >= 0) 854 return (0); 855 } 856 857 /* 858 * No other references hold to the radix_mask remove it from 859 * the tree. 860 */ 861 x = rn_lift_node(tt); 862 if (x == NULL) 863 return (0); /* Wasn't lifted at all */ 864 865 /* Finally eliminate the radix_mask from the tree */ 866 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) 867 if (m == saved_m) { 868 *mp = m->rm_mklist; 869 pool_put(&rtmask_pool, m); 870 break; 871 } 872 873 if (m == NULL) { 874 log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); 875 if (tt->rn_flags & RNF_NORMAL) 876 return (-1); /* Dangling ref to us */ 877 } 878 879 return (0); 880 } 881 882 /* swap two internal nodes and fixup the parent and child pointers */ 883 static inline void 884 rn_swap_nodes(struct radix_node *from, struct radix_node *to) 885 { 886 *to = *from; 887 if (from->rn_p->rn_l == from) 888 from->rn_p->rn_l = to; 889 else 890 from->rn_p->rn_r = to; 891 892 to->rn_l->rn_p = to; 893 to->rn_r->rn_p = to; 894 } 895 896 struct radix_node * 897 rn_delete(void *v_arg, void *n_arg, struct radix_node_head *head, 898 struct radix_node *rn) 899 { 900 caddr_t v = v_arg; 901 caddr_t netmask = n_arg; 902 struct radix_node *top = head->rnh_treetop; 903 struct radix_node *tt, *tp, *pp, *x; 904 struct radix_node *dupedkey_tt, *saved_tt; 905 int off = top->rn_off; 906 int vlen; 907 908 vlen = *(u_char *)v; 909 910 /* 911 * Implement a lookup similar to rn_lookup but we need to save 912 * the radix leaf node (where th rn_dupedkey list starts) so 913 * it is not possible to use rn_lookup. 914 */ 915 tt = rn_search(v, top); 916 /* make sure the key is a perfect match */ 917 if (memcmp(v + off, tt->rn_key + off, vlen - off)) 918 return (NULL); 919 920 /* 921 * Here, tt is the deletion target, and 922 * saved_tt is the head of the dupedkey chain. 923 * dupedkey_tt will point to the start of the multipath chain. 924 */ 925 saved_tt = tt; 926 927 /* 928 * make tt point to the start of the rn_dupedkey list of multipath 929 * routes. 930 */ 931 if (netmask) { 932 struct radix_node *tm; 933 934 if ((tm = rn_addmask(netmask, 1, off)) == NULL) 935 return (NULL); 936 netmask = tm->rn_key; 937 while (tt->rn_mask != netmask) 938 if ((tt = tt->rn_dupedkey) == NULL) 939 return (NULL); 940 } 941 942 /* save start of multi path chain for later use */ 943 dupedkey_tt = tt; 944 945 KASSERT((tt->rn_flags & RNF_ROOT) == 0); 946 947 /* remove possible radix_mask */ 948 if (rn_del_radix_mask(tt)) 949 return (NULL); 950 951 /* 952 * Finally eliminate us from tree 953 */ 954 tp = tt->rn_p; 955 if (saved_tt->rn_dupedkey) { 956 if (tt == saved_tt) { 957 x = saved_tt->rn_dupedkey; 958 x->rn_p = tp; 959 if (tp->rn_l == tt) 960 tp->rn_l = x; 961 else 962 tp->rn_r = x; 963 /* head changed adjust dupedkey pointer */ 964 dupedkey_tt = x; 965 } else { 966 x = saved_tt; 967 /* dupedkey will change so adjust pointer */ 968 if (dupedkey_tt == tt) 969 dupedkey_tt = tt->rn_dupedkey; 970 tp->rn_dupedkey = tt->rn_dupedkey; 971 if (tt->rn_dupedkey) 972 tt->rn_dupedkey->rn_p = tp; 973 } 974 975 /* 976 * We may be holding an active internal node in the tree. 977 */ 978 if (tt[1].rn_flags & RNF_ACTIVE) 979 rn_swap_nodes(&tt[1], &x[1]); 980 981 /* over and out */ 982 goto out; 983 } 984 985 /* non-rn_dupedkey case, remove tt and tp node from the tree */ 986 if (tp->rn_l == tt) 987 x = tp->rn_r; 988 else 989 x = tp->rn_l; 990 pp = tp->rn_p; 991 if (pp->rn_r == tp) 992 pp->rn_r = x; 993 else 994 pp->rn_l = x; 995 x->rn_p = pp; 996 997 /* 998 * Demote routes attached to us (actually on the internal parent node). 999 */ 1000 if (tp->rn_mklist) { 1001 struct radix_mask *m, **mp; 1002 if (x->rn_b >= 0) { 1003 for (mp = &x->rn_mklist; (m = *mp);) 1004 mp = &m->rm_mklist; 1005 *mp = tp->rn_mklist; 1006 } else { 1007 /* If there are any key,mask pairs in a sibling 1008 duped-key chain, some subset will appear sorted 1009 in the same order attached to our mklist */ 1010 for (m = tp->rn_mklist; m && x; x = x->rn_dupedkey) 1011 if (m == x->rn_mklist) { 1012 struct radix_mask *mm = m->rm_mklist; 1013 x->rn_mklist = 0; 1014 if (--(m->rm_refs) < 0) 1015 pool_put(&rtmask_pool, m); 1016 else if (m->rm_flags & RNF_NORMAL) 1017 /* 1018 * don't progress because this 1019 * a multipath route. Next 1020 * route will use the same m. 1021 */ 1022 mm = m; 1023 m = mm; 1024 } 1025 if (m) 1026 log(LOG_ERR, "%s %p at %p\n", 1027 "rn_delete: Orphaned Mask", m, x); 1028 } 1029 } 1030 1031 /* 1032 * We may be holding an active internal node in the tree. 1033 * If so swap our internal node (t) with the parent node (tp) 1034 * since that one was just removed from the tree. 1035 */ 1036 if (tp != &tt[1]) 1037 rn_swap_nodes(&tt[1], tp); 1038 1039 /* no rn_dupedkey list so no need to fixup multipath chains */ 1040 out: 1041 tt[0].rn_flags &= ~RNF_ACTIVE; 1042 tt[1].rn_flags &= ~RNF_ACTIVE; 1043 return (tt); 1044 } 1045 1046 int 1047 rn_walktree(struct radix_node_head *h, int (*f)(struct radix_node *, void *, 1048 u_int), void *w) 1049 { 1050 int error; 1051 struct radix_node *base, *next; 1052 struct radix_node *rn = h->rnh_treetop; 1053 /* 1054 * This gets complicated because we may delete the node 1055 * while applying the function f to it, so we need to calculate 1056 * the successor node in advance. 1057 */ 1058 /* First time through node, go left */ 1059 while (rn->rn_b >= 0) 1060 rn = rn->rn_l; 1061 for (;;) { 1062 base = rn; 1063 /* If at right child go back up, otherwise, go right */ 1064 while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0) 1065 rn = rn->rn_p; 1066 /* Find the next *leaf* since next node might vanish, too */ 1067 for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;) 1068 rn = rn->rn_l; 1069 next = rn; 1070 /* Process leaves */ 1071 while ((rn = base) != NULL) { 1072 base = rn->rn_dupedkey; 1073 if (!(rn->rn_flags & RNF_ROOT) && 1074 (error = (*f)(rn, w, h->rnh_rtableid))) 1075 return (error); 1076 } 1077 rn = next; 1078 if (rn->rn_flags & RNF_ROOT) 1079 return (0); 1080 } 1081 /* NOTREACHED */ 1082 } 1083 1084 int 1085 rn_initmask(void) 1086 { 1087 if (mask_rnhead != NULL) 1088 return (0); 1089 1090 KASSERT(max_keylen > 0); 1091 1092 mask_rnhead = malloc(sizeof(*mask_rnhead), M_RTABLE, M_NOWAIT); 1093 if (mask_rnhead == NULL) 1094 return (1); 1095 1096 rn_inithead0(mask_rnhead, 0); 1097 return (0); 1098 } 1099 1100 int 1101 rn_inithead(void **head, int off) 1102 { 1103 struct radix_node_head *rnh; 1104 1105 if (*head != NULL) 1106 return (1); 1107 1108 if (rn_initmask()) 1109 panic("failed to initialize the mask tree"); 1110 1111 rnh = malloc(sizeof(*rnh), M_RTABLE, M_NOWAIT); 1112 if (rnh == NULL) 1113 return (0); 1114 *head = rnh; 1115 rn_inithead0(rnh, off); 1116 return (1); 1117 } 1118 1119 int 1120 rn_inithead0(struct radix_node_head *rnh, int offset) 1121 { 1122 struct radix_node *t, *tt, *ttt; 1123 int off = offset * NBBY; 1124 1125 memset(rnh, 0, sizeof(*rnh)); 1126 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); 1127 ttt = rnh->rnh_nodes + 2; 1128 t->rn_r = ttt; 1129 t->rn_p = t; 1130 tt = t->rn_l; 1131 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; 1132 tt->rn_b = -1 - off; 1133 *ttt = *tt; 1134 ttt->rn_key = rn_ones; 1135 rnh->rnh_treetop = t; 1136 return (1); 1137 } 1138 1139 /* 1140 * rn_init() can be called multiple time with a different key length 1141 * as long as not radix tree head has been allocated. 1142 */ 1143 void 1144 rn_init(unsigned int keylen) 1145 { 1146 char *cp, *cplim; 1147 1148 if (max_keylen == 0) { 1149 pool_init(&rtmask_pool, sizeof(struct radix_mask), 0, 1150 IPL_SOFTNET, 0, "rtmask", NULL); 1151 } 1152 1153 if (keylen <= max_keylen) 1154 return; 1155 1156 KASSERT(mask_rnhead == NULL); 1157 1158 free(rn_zeros, M_RTABLE, 3 * max_keylen); 1159 rn_zeros = mallocarray(3, keylen, M_RTABLE, M_NOWAIT | M_ZERO); 1160 if (rn_zeros == NULL) 1161 panic("cannot initialize a radix tree without memory"); 1162 max_keylen = keylen; 1163 1164 rn_ones = cp = rn_zeros + max_keylen; 1165 addmask_key = cplim = rn_ones + max_keylen; 1166 while (cp < cplim) 1167 *cp++ = -1; 1168 } 1169