1 /* 2 * Copyright (c) 1988, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)radix.c 8.4 (Berkeley) 11/2/94 30 * $FreeBSD: src/sys/net/radix.c,v 1.20.2.3 2002/04/28 05:40:25 suz Exp $ 31 */ 32 33 /* 34 * Routines to build and maintain radix trees for routing lookups. 35 */ 36 37 #include <sys/param.h> 38 #ifdef _KERNEL 39 #include <sys/systm.h> 40 #include <sys/malloc.h> 41 #include <sys/domain.h> 42 #include <sys/globaldata.h> 43 #include <sys/thread.h> 44 #else 45 #include <stdlib.h> 46 #endif 47 #include <sys/syslog.h> 48 49 #include <net/radix.h> 50 #include <net/netmsg2.h> 51 #include <net/netisr2.h> 52 53 /* 54 * The arguments to the radix functions are really counted byte arrays with 55 * the length in the first byte. struct sockaddr's fit this type structurally. 56 * Cast the result to int as this is the dominant usage. 57 */ 58 #define clen(c) (int)(*(const u_char *)(c)) 59 60 61 static struct radix_mask *rn_mkfreelist[MAXCPU]; 62 static struct radix_node_head *mask_rnheads[MAXCPU]; 63 64 static char rn_zeros[RN_MAXKEYLEN]; 65 static char rn_ones[RN_MAXKEYLEN] = RN_MAXKEYONES; 66 67 #ifdef RN_DEBUG 68 static int rn_nodenum; 69 static struct radix_node *rn_clist; 70 static int rn_saveinfo; 71 static bool rn_debug = true; 72 #endif 73 74 75 static __inline struct radix_mask * 76 MKGet(struct radix_mask **l) 77 { 78 struct radix_mask *m; 79 80 if (*l != NULL) { 81 m = *l; 82 *l = m->rm_next; 83 } else { 84 R_Malloc(m, struct radix_mask *, sizeof *m); 85 } 86 return m; 87 } 88 89 static __inline void 90 MKFree(struct radix_mask **l, struct radix_mask *m) 91 { 92 m->rm_next = *l; 93 *l = m; 94 } 95 96 /* 97 * The data structure for the keys is a radix tree with one way 98 * branching removed. The index rn_bit at an internal node n represents a bit 99 * position to be tested. The tree is arranged so that all descendants 100 * of a node n have keys whose bits all agree up to position rn_bit - 1. 101 * (We say the index of n is rn_bit.) 102 * 103 * There is at least one descendant which has a one bit at position rn_bit, 104 * and at least one with a zero there. 105 * 106 * A route is determined by a pair of key and mask. We require that the 107 * bit-wise logical and of the key and mask to be the key. 108 * We define the index of a route to associated with the mask to be 109 * the first bit number in the mask where 0 occurs (with bit number 0 110 * representing the highest order bit). 111 * 112 * We say a mask is normal if every bit is 0, past the index of the mask. 113 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit, 114 * and m is a normal mask, then the route applies to every descendant of n. 115 * If the index(m) < rn_bit, this implies the trailing last few bits of k 116 * before bit b are all 0, (and hence consequently true of every descendant 117 * of n), so the route applies to all descendants of the node as well. 118 * 119 * Similar logic shows that a non-normal mask m such that 120 * index(m) <= index(n) could potentially apply to many children of n. 121 * Thus, for each non-host route, we attach its mask to a list at an internal 122 * node as high in the tree as we can go. 123 * 124 * The present version of the code makes use of normal routes in short- 125 * circuiting an explict mask and compare operation when testing whether 126 * a key satisfies a normal route, and also in remembering the unique leaf 127 * that governs a subtree. 128 */ 129 130 static struct radix_node * 131 rn_search(const char *v, struct radix_node *head) 132 { 133 struct radix_node *x; 134 135 x = head; 136 while (x->rn_bit >= 0) { 137 if (x->rn_bmask & v[x->rn_offset]) 138 x = x->rn_right; 139 else 140 x = x->rn_left; 141 } 142 return (x); 143 } 144 145 static struct radix_node * 146 rn_search_m(const char *v, struct radix_node *head, const char *m) 147 { 148 struct radix_node *x; 149 150 x = head; 151 while (x->rn_bit >= 0) { 152 if ((x->rn_bmask & m[x->rn_offset]) && 153 (x->rn_bmask & v[x->rn_offset])) 154 x = x->rn_right; 155 else 156 x = x->rn_left; 157 } 158 return x; 159 } 160 161 /* 162 * Compare the two netmasks and return true if netmask <m> is strictly more 163 * specific than netmask <n>. 164 * 165 * NOTE: Non-contiguous netmask is supported. 166 */ 167 bool 168 rn_refines(const char *m, const char *n) 169 { 170 const char *lim, *lim2; 171 int longer; 172 bool equal; 173 174 equal = true; 175 lim2 = lim = n + clen(n); 176 longer = clen(n++) - clen(m++); 177 if (longer > 0) 178 lim -= longer; 179 180 while (n < lim) { 181 if (*n & ~(*m)) 182 return false; 183 if (*n++ != *m++) 184 equal = false; 185 } 186 while (n < lim2) { 187 if (*n++) /* n is longer and more specific */ 188 return false; 189 } 190 if (equal && (longer < 0)) { 191 lim2 = m - longer; 192 while (m < lim2) { 193 if (*m++) /* m is longer and more specific */ 194 return true; 195 } 196 } 197 198 return (!equal); 199 } 200 201 struct radix_node * 202 rn_lookup(const char *key, const char *mask, struct radix_node_head *head) 203 { 204 struct radix_node *x; 205 const char *netmask = NULL; 206 207 if (mask != NULL) { 208 x = rn_addmask(mask, true, head->rnh_treetop->rn_offset, 209 head->rnh_maskhead); 210 if (x == NULL) 211 return (NULL); 212 netmask = x->rn_key; 213 } 214 215 x = rn_match(key, head); 216 if (x != NULL && netmask != NULL) { 217 while (x != NULL && x->rn_mask != netmask) 218 x = x->rn_dupedkey; 219 } 220 221 return x; 222 } 223 224 static bool 225 rn_satisfies_leaf(const char *trial, struct radix_node *leaf, int skip) 226 { 227 const char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask; 228 const char *cplim; 229 int length; 230 231 length = min(clen(cp), clen(cp2)); 232 if (cp3 == NULL) 233 cp3 = rn_ones; 234 else 235 length = min(length, clen(cp3)); 236 237 cplim = cp + length; 238 cp2 += skip; 239 cp3 += skip; 240 for (cp += skip; cp < cplim; cp++, cp2++, cp3++) { 241 if ((*cp ^ *cp2) & *cp3) 242 return false; 243 } 244 245 return true; 246 } 247 248 struct radix_node * 249 rn_match(const char *key, struct radix_node_head *head) 250 { 251 struct radix_node *t, *x; 252 const char *cp = key, *cp2; 253 const char *cplim; 254 struct radix_node *saved_t, *top = head->rnh_treetop; 255 int off = top->rn_offset, klen, matched_off; 256 int test, b, rn_bit; 257 258 t = rn_search(key, top); 259 /* 260 * See if we match exactly as a host destination 261 * or at least learn how many bits match, for normal mask finesse. 262 * 263 * It doesn't hurt us to limit how many bytes to check 264 * to the length of the mask, since if it matches we had a genuine 265 * match and the leaf we have is the most specific one anyway; 266 * if it didn't match with a shorter length it would fail 267 * with a long one. This wins big for class B&C netmasks which 268 * are probably the most common case... 269 */ 270 if (t->rn_mask != NULL) 271 klen = clen(t->rn_mask); 272 else 273 klen = clen(key); 274 cp += off; cp2 = t->rn_key + off; cplim = key + klen; 275 for (; cp < cplim; cp++, cp2++) 276 if (*cp != *cp2) 277 goto on1; 278 279 /* 280 * This extra grot is in case we are explicitly asked 281 * to look up the default. Ugh! 282 * 283 * Never return the root node itself, it seems to cause a 284 * lot of confusion. 285 */ 286 if (t->rn_flags & RNF_ROOT) 287 t = t->rn_dupedkey; 288 return t; 289 290 on1: 291 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ 292 for (b = 7; (test >>= 1) > 0;) 293 b--; 294 matched_off = cp - key; 295 b += matched_off << 3; 296 rn_bit = -1 - b; 297 298 /* 299 * If there is a host route in a duped-key chain, it will be first. 300 */ 301 saved_t = t; 302 if (t->rn_mask == NULL) 303 t = t->rn_dupedkey; 304 for (; t != NULL; t = t->rn_dupedkey) { 305 /* 306 * Even if we don't match exactly as a host, 307 * we may match if the leaf we wound up at is 308 * a route to a net. 309 */ 310 if (t->rn_flags & RNF_NORMAL) { 311 if (rn_bit <= t->rn_bit) 312 return t; 313 } else if (rn_satisfies_leaf(key, t, matched_off)) 314 return t; 315 } 316 t = saved_t; 317 318 /* start searching up the tree */ 319 do { 320 struct radix_mask *m; 321 322 t = t->rn_parent; 323 /* 324 * If non-contiguous masks ever become important 325 * we can restore the masking and open coding of 326 * the search and satisfaction test and put the 327 * calculation of "off" back before the "do". 328 */ 329 m = t->rn_mklist; 330 while (m != NULL) { 331 if (m->rm_flags & RNF_NORMAL) { 332 if (rn_bit <= m->rm_bit) 333 return (m->rm_leaf); 334 } else { 335 off = min(t->rn_offset, matched_off); 336 x = rn_search_m(key, t, m->rm_mask); 337 while (x != NULL && x->rn_mask != m->rm_mask) 338 x = x->rn_dupedkey; 339 if (x != NULL && rn_satisfies_leaf(key, x, off)) 340 return x; 341 } 342 m = m->rm_next; 343 } 344 } while (t != top); 345 346 return NULL; 347 } 348 349 static struct radix_node * 350 rn_newpair(const char *key, int indexbit, struct radix_node nodes[2]) 351 { 352 struct radix_node *leaf = &nodes[0], *interior = &nodes[1]; 353 354 interior->rn_bit = indexbit; 355 interior->rn_bmask = 0x80 >> (indexbit & 0x7); 356 interior->rn_offset = indexbit >> 3; 357 interior->rn_left = leaf; 358 interior->rn_flags = RNF_ACTIVE; 359 interior->rn_mklist = NULL; 360 361 leaf->rn_bit = -1; 362 leaf->rn_key = key; 363 leaf->rn_parent = interior; 364 leaf->rn_flags = interior->rn_flags; 365 leaf->rn_mklist = NULL; 366 367 #ifdef RN_DEBUG 368 leaf->rn_info = rn_nodenum++; 369 interior->rn_info = rn_nodenum++; 370 leaf->rn_twin = interior; 371 leaf->rn_ybro = rn_clist; 372 rn_clist = leaf; 373 #endif 374 375 return interior; 376 } 377 378 static struct radix_node * 379 rn_insert(const char *key, struct radix_node_head *head, bool *dupentry, 380 struct radix_node nodes[2]) 381 { 382 struct radix_node *top = head->rnh_treetop; 383 int head_off = top->rn_offset, klen = clen(key); 384 struct radix_node *t = rn_search(key, top); 385 const char *cp = key + head_off; 386 int b; 387 struct radix_node *tt; 388 389 /* 390 * Find first bit at which the key and t->rn_key differ 391 */ 392 { 393 const char *cp2 = t->rn_key + head_off; 394 int cmp_res; 395 const char *cplim = key + klen; 396 397 while (cp < cplim) { 398 if (*cp2++ != *cp++) 399 goto on1; 400 } 401 402 *dupentry = true; 403 return t; 404 405 on1: 406 *dupentry = false; 407 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; 408 for (b = (cp - key) << 3; cmp_res; b--) 409 cmp_res >>= 1; 410 } 411 { 412 struct radix_node *p, *x = top; 413 414 cp = key; 415 do { 416 p = x; 417 if (cp[x->rn_offset] & x->rn_bmask) 418 x = x->rn_right; 419 else 420 x = x->rn_left; 421 } while (b > (unsigned) x->rn_bit); 422 /* x->rn_bit < b && x->rn_bit >= 0 */ 423 #ifdef RN_DEBUG 424 if (rn_debug) { 425 log(LOG_DEBUG, "%s: Going In:\n", __func__); 426 traverse(p); 427 } 428 #endif 429 t = rn_newpair(key, b, nodes); 430 tt = t->rn_left; 431 if ((cp[p->rn_offset] & p->rn_bmask) == 0) 432 p->rn_left = t; 433 else 434 p->rn_right = t; 435 x->rn_parent = t; 436 t->rn_parent = p; /* frees x, p as temp vars below */ 437 if ((cp[t->rn_offset] & t->rn_bmask) == 0) { 438 t->rn_right = x; 439 } else { 440 t->rn_right = tt; 441 t->rn_left = x; 442 } 443 #ifdef RN_DEBUG 444 if (rn_debug) { 445 log(LOG_DEBUG, "%s: Coming Out:\n", __func__); 446 traverse(p); 447 } 448 #endif 449 } 450 return (tt); 451 } 452 453 struct radix_node * 454 rn_addmask(const char *netmask, bool search, int skip, 455 struct radix_node_head *mask_rnh) 456 { 457 struct radix_node *x, *saved_x; 458 const char *cp, *cplim; 459 char *p; 460 int b = 0, mlen, j; 461 bool maskduplicated, isnormal; 462 char addmask_key[RN_MAXKEYLEN]; 463 464 if ((mlen = clen(netmask)) > RN_MAXKEYLEN) 465 mlen = RN_MAXKEYLEN; 466 if (skip == 0) 467 skip = 1; 468 if (mlen <= skip) 469 return (mask_rnh->rnh_nodes); /* all-zero key */ 470 471 bzero(addmask_key, sizeof(addmask_key)); 472 if (skip > 1) 473 bcopy(rn_ones + 1, addmask_key + 1, skip - 1); 474 bcopy(netmask + skip, addmask_key + skip, mlen - skip); 475 /* Trim trailing zeroes. */ 476 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) 477 cp--; 478 mlen = cp - addmask_key; 479 if (mlen <= skip) 480 return (mask_rnh->rnh_nodes); /* all-zero key */ 481 482 *addmask_key = mlen; 483 x = rn_search(addmask_key, mask_rnh->rnh_treetop); 484 if (x->rn_key == NULL) { 485 kprintf("WARNING: radix_node->rn_key is NULL rn=%p\n", x); 486 print_backtrace(-1); 487 x = NULL; 488 } else if (bcmp(addmask_key, x->rn_key, mlen) != 0) { 489 x = NULL; 490 } 491 if (x != NULL || search) 492 return (x); 493 494 R_Malloc(x, struct radix_node *, RN_MAXKEYLEN + 2 * (sizeof *x)); 495 if ((saved_x = x) == NULL) 496 return (NULL); 497 498 bzero(x, RN_MAXKEYLEN + 2 * (sizeof *x)); 499 netmask = p = (char *)(x + 2); 500 bcopy(addmask_key, p, mlen); 501 x = rn_insert(netmask, mask_rnh, &maskduplicated, x); 502 if (maskduplicated) { 503 log(LOG_ERR, "%s: mask impossibly already in tree", __func__); 504 R_Free(saved_x); 505 return (x); 506 } 507 508 /* 509 * Calculate index of mask, and check for normalcy. 510 */ 511 isnormal = true; 512 cplim = netmask + mlen; 513 for (cp = netmask + skip; cp < cplim && clen(cp) == 0xff;) 514 cp++; 515 if (cp != cplim) { 516 static const char normal_chars[] = { 517 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1 518 }; 519 520 for (j = 0x80; (j & *cp) != 0; j >>= 1) 521 b++; 522 if (*cp != normal_chars[b] || cp != (cplim - 1)) 523 isnormal = false; 524 } 525 b += (cp - netmask) << 3; 526 x->rn_bit = -1 - b; 527 if (isnormal) 528 x->rn_flags |= RNF_NORMAL; 529 return (x); 530 } 531 532 /* 533 * Compare the two netmasks and return true if netmask <m> is more 534 * specific than netmask <n>. 535 * 536 * NOTE: arbitrary ordering for non-contiguous masks. 537 */ 538 static bool 539 rn_lexobetter(const char *mp, const char *np) 540 { 541 const char *lim; 542 543 if (clen(mp) > clen(np)) 544 return true; /* not really, but need to check longer one first */ 545 546 if (clen(mp) == clen(np)) { 547 for (lim = mp + clen(mp); mp < lim; mp++, np++) { 548 if ((unsigned)*mp > (unsigned)*np) 549 return true; 550 } 551 } 552 553 return false; 554 } 555 556 static struct radix_mask * 557 rn_new_radix_mask(struct radix_node *tt, struct radix_mask *nextmask) 558 { 559 struct radix_mask *m; 560 561 m = MKGet(&rn_mkfreelist[mycpuid]); 562 if (m == NULL) { 563 log(LOG_ERR, "Mask for route not entered\n"); 564 return (NULL); 565 } 566 567 bzero(m, sizeof *m); 568 m->rm_bit = tt->rn_bit; 569 m->rm_flags = tt->rn_flags; 570 if (tt->rn_flags & RNF_NORMAL) 571 m->rm_leaf = tt; 572 else 573 m->rm_mask = tt->rn_mask; 574 m->rm_next = nextmask; 575 tt->rn_mklist = m; 576 577 return m; 578 } 579 580 struct radix_node * 581 rn_addroute(const char *key, const char *netmask, 582 struct radix_node_head *head, struct radix_node treenodes[2]) 583 { 584 struct radix_node *t, *x = NULL, *tt; 585 struct radix_node *saved_tt, *top = head->rnh_treetop; 586 short b = 0, b_leaf = 0; 587 bool keyduplicated; 588 const char *mmask; 589 struct radix_mask *m, **mp; 590 591 /* 592 * In dealing with non-contiguous masks, there may be 593 * many different routes which have the same mask. 594 * We will find it useful to have a unique pointer to 595 * the mask to speed avoiding duplicate references at 596 * nodes and possibly save time in calculating indices. 597 */ 598 if (netmask != NULL) { 599 if ((x = rn_addmask(netmask, false, top->rn_offset, 600 head->rnh_maskhead)) == NULL) 601 return (NULL); 602 b_leaf = x->rn_bit; 603 b = -1 - x->rn_bit; 604 netmask = x->rn_key; 605 } 606 /* 607 * Deal with duplicated keys: attach node to previous instance 608 */ 609 saved_tt = tt = rn_insert(key, head, &keyduplicated, treenodes); 610 if (keyduplicated) { 611 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { 612 if (tt->rn_mask == netmask) 613 return (NULL); 614 if (netmask == NULL || 615 (tt->rn_mask && 616 ((b_leaf < tt->rn_bit) /* index(netmask) > node */ 617 || rn_refines(netmask, tt->rn_mask) 618 || rn_lexobetter(netmask, tt->rn_mask)))) 619 break; 620 } 621 /* 622 * If the mask is not duplicated, we wouldn't 623 * find it among possible duplicate key entries 624 * anyway, so the above test doesn't hurt. 625 * 626 * We sort the masks for a duplicated key the same way as 627 * in a masklist -- most specific to least specific. 628 * This may require the unfortunate nuisance of relocating 629 * the head of the list. 630 */ 631 if (tt == saved_tt) { 632 struct radix_node *xx = x; 633 /* link in at head of list */ 634 (tt = treenodes)->rn_dupedkey = t; 635 tt->rn_flags = t->rn_flags; 636 tt->rn_parent = x = t->rn_parent; 637 t->rn_parent = tt; /* parent */ 638 if (x->rn_left == t) 639 x->rn_left = tt; 640 else 641 x->rn_right = tt; 642 saved_tt = tt; x = xx; 643 } else { 644 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; 645 t->rn_dupedkey = tt; 646 tt->rn_parent = t; /* parent */ 647 if (tt->rn_dupedkey != NULL) /* parent */ 648 tt->rn_dupedkey->rn_parent = tt; /* parent */ 649 } 650 #ifdef RN_DEBUG 651 t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; 652 tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt; 653 #endif 654 tt->rn_key = key; 655 tt->rn_bit = -1; 656 tt->rn_flags = RNF_ACTIVE; 657 } 658 /* 659 * Put mask in tree. 660 */ 661 if (netmask != NULL) { 662 tt->rn_mask = netmask; 663 tt->rn_bit = x->rn_bit; 664 tt->rn_flags |= x->rn_flags & RNF_NORMAL; 665 } 666 t = saved_tt->rn_parent; 667 if (keyduplicated) 668 goto on2; 669 b_leaf = -1 - t->rn_bit; 670 if (t->rn_right == saved_tt) 671 x = t->rn_left; 672 else 673 x = t->rn_right; 674 /* Promote general routes from below */ 675 if (x->rn_bit < 0) { 676 mp = &t->rn_mklist; 677 while (x != NULL) { 678 if (x->rn_mask != NULL && 679 x->rn_bit >= b_leaf && 680 x->rn_mklist == NULL) { 681 *mp = m = rn_new_radix_mask(x, NULL); 682 if (m != NULL) 683 mp = &m->rm_next; 684 } 685 x = x->rn_dupedkey; 686 } 687 } else if (x->rn_mklist != NULL) { 688 /* 689 * Skip over masks whose index is > that of new node 690 */ 691 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_next) { 692 if (m->rm_bit >= b_leaf) 693 break; 694 } 695 t->rn_mklist = m; 696 *mp = NULL; 697 } 698 on2: 699 /* Add new route to highest possible ancestor's list */ 700 if ((netmask == NULL) || (b > t->rn_bit )) 701 return tt; /* can't lift at all */ 702 b_leaf = tt->rn_bit; 703 do { 704 x = t; 705 t = t->rn_parent; 706 } while (b <= t->rn_bit && x != top); 707 /* 708 * Search through routes associated with node to 709 * insert new route according to index. 710 * Need same criteria as when sorting dupedkeys to avoid 711 * double loop on deletion. 712 */ 713 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_next) { 714 if (m->rm_bit < b_leaf) 715 continue; 716 if (m->rm_bit > b_leaf) 717 break; 718 if (m->rm_flags & RNF_NORMAL) { 719 mmask = m->rm_leaf->rn_mask; 720 if (tt->rn_flags & RNF_NORMAL) { 721 log(LOG_ERR, 722 "Non-unique normal route, mask not entered\n"); 723 return tt; 724 } 725 } else 726 mmask = m->rm_mask; 727 if (mmask == netmask) { 728 m->rm_refs++; 729 tt->rn_mklist = m; 730 return tt; 731 } 732 if (rn_refines(netmask, mmask) || rn_lexobetter(netmask, mmask)) 733 break; 734 } 735 *mp = rn_new_radix_mask(tt, *mp); 736 return tt; 737 } 738 739 struct radix_node * 740 rn_delete(const char *key, const char *netmask, struct radix_node_head *head) 741 { 742 struct radix_node *t, *p, *x, *tt; 743 struct radix_mask *m, *saved_m, **mp; 744 struct radix_node *dupedkey, *saved_tt, *top; 745 int b, head_off, klen; 746 int cpu = mycpuid; 747 748 x = head->rnh_treetop; 749 tt = rn_search(key, x); 750 head_off = x->rn_offset; 751 klen = clen(key); 752 saved_tt = tt; 753 top = x; 754 if (tt == NULL || 755 bcmp(key + head_off, tt->rn_key + head_off, klen - head_off)) 756 return (NULL); 757 758 /* 759 * Delete our route from mask lists. 760 */ 761 if (netmask != NULL) { 762 if ((x = rn_addmask(netmask, true, head_off, 763 head->rnh_maskhead)) == NULL) 764 return (NULL); 765 netmask = x->rn_key; 766 while (tt->rn_mask != netmask) 767 if ((tt = tt->rn_dupedkey) == NULL) 768 return (NULL); 769 } 770 if (tt->rn_mask == NULL || (saved_m = m = tt->rn_mklist) == NULL) 771 goto on1; 772 if (tt->rn_flags & RNF_NORMAL) { 773 if (m->rm_leaf != tt || m->rm_refs > 0) { 774 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 775 return (NULL); /* dangling ref could cause disaster */ 776 } 777 } else { 778 if (m->rm_mask != tt->rn_mask) { 779 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 780 goto on1; 781 } 782 if (--m->rm_refs >= 0) 783 goto on1; 784 } 785 b = -1 - tt->rn_bit; 786 t = saved_tt->rn_parent; 787 if (b > t->rn_bit) 788 goto on1; /* Wasn't lifted at all */ 789 790 do { 791 x = t; 792 t = t->rn_parent; 793 } while (b <= t->rn_bit && x != top); 794 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_next) 795 if (m == saved_m) { 796 *mp = m->rm_next; 797 MKFree(&rn_mkfreelist[cpu], m); 798 break; 799 } 800 if (m == NULL) { 801 log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); 802 if (tt->rn_flags & RNF_NORMAL) 803 return (NULL); /* Dangling ref to us */ 804 } 805 806 on1: 807 /* 808 * Eliminate us from tree 809 */ 810 if (tt->rn_flags & RNF_ROOT) 811 return (NULL); 812 813 #ifdef RN_DEBUG 814 /* Get us out of the creation list */ 815 for (t = rn_clist; t != NULL && t->rn_ybro != tt; t = t->rn_ybro) 816 ; 817 if (t != NULL) 818 t->rn_ybro = tt->rn_ybro; 819 #endif 820 821 t = tt->rn_parent; 822 dupedkey = saved_tt->rn_dupedkey; 823 if (dupedkey != NULL) { 824 /* 825 * at this point, tt is the deletion target and saved_tt 826 * is the head of the dupekey chain 827 */ 828 if (tt == saved_tt) { 829 /* remove from head of chain */ 830 x = dupedkey; 831 x->rn_parent = t; 832 if (t->rn_left == tt) 833 t->rn_left = x; 834 else 835 t->rn_right = x; 836 } else { 837 /* find node in front of tt on the chain */ 838 for (x = p = saved_tt; p != NULL && p->rn_dupedkey != tt;) 839 p = p->rn_dupedkey; 840 if (p) { 841 p->rn_dupedkey = tt->rn_dupedkey; 842 if (tt->rn_dupedkey) /* parent */ 843 tt->rn_dupedkey->rn_parent = p; 844 /* parent */ 845 } else { 846 log(LOG_ERR, "rn_delete: couldn't find us\n"); 847 } 848 } 849 t = tt + 1; 850 if (t->rn_flags & RNF_ACTIVE) { 851 #ifndef RN_DEBUG 852 *++x = *t; 853 p = t->rn_parent; 854 #else 855 b = t->rn_info; 856 *++x = *t; 857 t->rn_info = b; 858 p = t->rn_parent; 859 #endif 860 if (p->rn_left == t) 861 p->rn_left = x; 862 else 863 p->rn_right = x; 864 x->rn_left->rn_parent = x; 865 x->rn_right->rn_parent = x; 866 } 867 goto out; 868 } 869 if (t->rn_left == tt) 870 x = t->rn_right; 871 else 872 x = t->rn_left; 873 p = t->rn_parent; 874 if (p->rn_right == t) 875 p->rn_right = x; 876 else 877 p->rn_left = x; 878 x->rn_parent = p; 879 /* 880 * Demote routes attached to us. 881 */ 882 if (t->rn_mklist != NULL) { 883 if (x->rn_bit >= 0) { 884 for (mp = &x->rn_mklist; (m = *mp) != NULL;) 885 mp = &m->rm_next; 886 *mp = t->rn_mklist; 887 } else { 888 /* 889 * If there are any (key, mask) pairs in a sibling 890 * duped-key chain, some subset will appear sorted 891 * in the same order attached to our mklist. 892 */ 893 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) 894 if (m == x->rn_mklist) { 895 struct radix_mask *mm = m->rm_next; 896 897 x->rn_mklist = NULL; 898 if (--(m->rm_refs) < 0) 899 MKFree(&rn_mkfreelist[cpu], m); 900 m = mm; 901 } 902 if (m) { 903 log(LOG_ERR, 904 "rn_delete: Orphaned Mask %p at %p\n", 905 (void *)m, (void *)x); 906 } 907 } 908 } 909 /* 910 * We may be holding an active internal node in the tree. 911 */ 912 x = tt + 1; 913 if (t != x) { 914 #ifndef RN_DEBUG 915 *t = *x; 916 #else 917 b = t->rn_info; 918 *t = *x; 919 t->rn_info = b; 920 #endif 921 t->rn_left->rn_parent = t; 922 t->rn_right->rn_parent = t; 923 p = x->rn_parent; 924 if (p->rn_left == x) 925 p->rn_left = t; 926 else 927 p->rn_right = t; 928 } 929 930 out: 931 tt->rn_flags &= ~RNF_ACTIVE; 932 tt[1].rn_flags &= ~RNF_ACTIVE; 933 return (tt); 934 } 935 936 /* 937 * This is the same as rn_walktree() except for the parameters and the 938 * exit. 939 */ 940 static int 941 rn_walktree_from(struct radix_node_head *h, const char *xa, const char *xm, 942 walktree_f_t *f, void *w) 943 { 944 struct radix_node *base, *next; 945 struct radix_node *rn, *last = NULL /* shut up gcc */; 946 bool stopping = false; 947 int lastb, error; 948 949 /* 950 * rn_search_m is sort-of-open-coded here. 951 */ 952 /* kprintf("about to search\n"); */ 953 for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) { 954 last = rn; 955 /* kprintf("rn_bit %d, rn_bmask %x, xm[rn_offset] %x\n", 956 rn->rn_bit, rn->rn_bmask, xm[rn->rn_offset]); */ 957 if (!(rn->rn_bmask & xm[rn->rn_offset])) { 958 break; 959 } 960 if (rn->rn_bmask & xa[rn->rn_offset]) { 961 rn = rn->rn_right; 962 } else { 963 rn = rn->rn_left; 964 } 965 } 966 /* kprintf("done searching\n"); */ 967 968 /* 969 * Two cases: either we stepped off the end of our mask, 970 * in which case last == rn, or we reached a leaf, in which 971 * case we want to start from the last node we looked at. 972 * Either way, last is the node we want to start from. 973 */ 974 rn = last; 975 lastb = rn->rn_bit; 976 977 /* kprintf("rn %p, lastb %d\n", rn, lastb);*/ 978 979 /* 980 * This gets complicated because we may delete the node 981 * while applying the function f to it, so we need to calculate 982 * the successor node in advance. 983 */ 984 while (rn->rn_bit >= 0) 985 rn = rn->rn_left; 986 987 while (!stopping) { 988 /* kprintf("node %p (%d)\n", rn, rn->rn_bit); */ 989 base = rn; 990 /* If at right child go back up, otherwise, go right */ 991 while (rn->rn_parent->rn_right == rn && 992 !(rn->rn_flags & RNF_ROOT)) { 993 rn = rn->rn_parent; 994 995 /* if went up beyond last, stop */ 996 if (rn->rn_bit < lastb) { 997 stopping = true; 998 /* kprintf("up too far\n"); */ 999 } 1000 } 1001 1002 /* Find the next *leaf* since next node might vanish, too */ 1003 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) 1004 rn = rn->rn_left; 1005 next = rn; 1006 /* Process leaves */ 1007 while ((rn = base) != NULL) { 1008 base = rn->rn_dupedkey; 1009 /* kprintf("leaf %p\n", rn); */ 1010 if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w))) 1011 return (error); 1012 } 1013 rn = next; 1014 1015 if (rn->rn_flags & RNF_ROOT) { 1016 /* kprintf("root, stopping"); */ 1017 stopping = true; 1018 } 1019 } 1020 1021 return 0; 1022 } 1023 1024 static int 1025 rn_walktree_at(struct radix_node_head *h, const char *a, const char *m, 1026 walktree_f_t *f, void *w) 1027 { 1028 struct radix_node *base, *next; 1029 struct radix_node *rn = h->rnh_treetop; 1030 int error; 1031 1032 /* 1033 * This gets complicated because we may delete the node 1034 * while applying the function f to it, so we need to calculate 1035 * the successor node in advance. 1036 */ 1037 if (a == NULL) { 1038 /* First time through node, go left */ 1039 while (rn->rn_bit >= 0) 1040 rn = rn->rn_left; 1041 } else { 1042 if (m != NULL) 1043 rn = rn_search_m(a, rn, m); 1044 else 1045 rn = rn_search(a, rn); 1046 } 1047 for (;;) { 1048 base = rn; 1049 /* If at right child go back up, otherwise, go right */ 1050 while (rn->rn_parent->rn_right == rn && 1051 !(rn->rn_flags & RNF_ROOT)) 1052 rn = rn->rn_parent; 1053 /* Find the next *leaf* since next node might vanish, too */ 1054 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) 1055 rn = rn->rn_left; 1056 next = rn; 1057 /* Process leaves */ 1058 while ((rn = base)) { 1059 base = rn->rn_dupedkey; 1060 if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w))) 1061 return (error); 1062 } 1063 rn = next; 1064 if (rn->rn_flags & RNF_ROOT) 1065 return (0); 1066 } 1067 /* NOTREACHED */ 1068 } 1069 1070 static int 1071 rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w) 1072 { 1073 return rn_walktree_at(h, NULL, NULL, f, w); 1074 } 1075 1076 int 1077 rn_inithead(void **head, struct radix_node_head *maskhead, int off) 1078 { 1079 struct radix_node_head *rnh; 1080 struct radix_node *root, *left, *right; 1081 1082 if (*head != NULL) /* already initialized */ 1083 return (1); 1084 1085 R_Malloc(rnh, struct radix_node_head *, sizeof *rnh); 1086 if (rnh == NULL) 1087 return (0); 1088 1089 bzero(rnh, sizeof *rnh); 1090 *head = rnh; 1091 1092 root = rn_newpair(rn_zeros, off, rnh->rnh_nodes); 1093 right = &rnh->rnh_nodes[2]; 1094 root->rn_parent = root; 1095 root->rn_flags = RNF_ROOT | RNF_ACTIVE; 1096 root->rn_right = right; 1097 1098 left = root->rn_left; 1099 left->rn_bit = -1 - off; 1100 left->rn_flags = root->rn_flags; 1101 1102 *right = *left; 1103 right->rn_key = rn_ones; 1104 1105 rnh->rnh_treetop = root; 1106 rnh->rnh_maskhead = maskhead; 1107 1108 rnh->rnh_addaddr = rn_addroute; 1109 rnh->rnh_deladdr = rn_delete; 1110 rnh->rnh_matchaddr = rn_match; 1111 rnh->rnh_lookup = rn_lookup; 1112 rnh->rnh_walktree = rn_walktree; 1113 rnh->rnh_walktree_from = rn_walktree_from; 1114 rnh->rnh_walktree_at = rn_walktree_at; 1115 1116 return (1); 1117 } 1118 1119 static void 1120 rn_init_handler(netmsg_t msg) 1121 { 1122 int cpu = mycpuid; 1123 1124 ASSERT_NETISR_NCPUS(cpu); 1125 if (rn_inithead((void **)&mask_rnheads[cpu], NULL, 0) == 0) 1126 panic("rn_init 2"); 1127 1128 netisr_forwardmsg(&msg->base, cpu + 1); 1129 } 1130 1131 void 1132 rn_init(void) 1133 { 1134 struct netmsg_base msg; 1135 #ifdef _KERNEL 1136 struct domain *dom; 1137 1138 SLIST_FOREACH(dom, &domains, dom_next) { 1139 if (dom->dom_maxrtkey > RN_MAXKEYLEN) { 1140 panic("domain %s maxkey too big %d/%d", 1141 dom->dom_name, dom->dom_maxrtkey, RN_MAXKEYLEN); 1142 } 1143 } 1144 #endif 1145 1146 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, rn_init_handler); 1147 netisr_domsg_global(&msg); 1148 } 1149 1150 struct radix_node_head * 1151 rn_cpumaskhead(int cpu) 1152 { 1153 ASSERT_NETISR_NCPUS(cpu); 1154 KKASSERT(mask_rnheads[cpu] != NULL); 1155 return mask_rnheads[cpu]; 1156 } 1157