1 /* $NetBSD: rbt-cachedb.c,v 1.2 2025/01/26 16:25:24 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16 /*! \file */ 17 18 #include <inttypes.h> 19 #include <stdbool.h> 20 #include <sys/mman.h> 21 22 #include <isc/ascii.h> 23 #include <isc/async.h> 24 #include <isc/atomic.h> 25 #include <isc/crc64.h> 26 #include <isc/file.h> 27 #include <isc/hash.h> 28 #include <isc/hashmap.h> 29 #include <isc/heap.h> 30 #include <isc/hex.h> 31 #include <isc/loop.h> 32 #include <isc/mem.h> 33 #include <isc/mutex.h> 34 #include <isc/once.h> 35 #include <isc/random.h> 36 #include <isc/refcount.h> 37 #include <isc/result.h> 38 #include <isc/rwlock.h> 39 #include <isc/serial.h> 40 #include <isc/stdio.h> 41 #include <isc/string.h> 42 #include <isc/time.h> 43 #include <isc/urcu.h> 44 #include <isc/util.h> 45 46 #include <dns/callbacks.h> 47 #include <dns/db.h> 48 #include <dns/dbiterator.h> 49 #include <dns/fixedname.h> 50 #include <dns/log.h> 51 #include <dns/masterdump.h> 52 #include <dns/nsec.h> 53 #include <dns/nsec3.h> 54 #include <dns/rbt.h> 55 #include <dns/rdata.h> 56 #include <dns/rdataset.h> 57 #include <dns/rdatasetiter.h> 58 #include <dns/rdataslab.h> 59 #include <dns/rdatastruct.h> 60 #include <dns/stats.h> 61 #include <dns/time.h> 62 #include <dns/view.h> 63 #include <dns/zone.h> 64 #include <dns/zonekey.h> 65 66 #include "db_p.h" 67 #include "rbtdb_p.h" 68 69 #define CHECK(op) \ 70 do { \ 71 result = (op); \ 72 if (result != ISC_R_SUCCESS) \ 73 goto failure; \ 74 } while (0) 75 76 /*% 77 * Whether to rate-limit updating the LRU to avoid possible thread contention. 78 * Updating LRU requires write locking, so we don't do it every time the 79 * record is touched - only after some time passes. 80 */ 81 #ifndef DNS_RBTDB_LIMITLRUUPDATE 82 #define DNS_RBTDB_LIMITLRUUPDATE 1 83 #endif 84 85 /*% Time after which we update LRU for glue records, 5 minutes */ 86 #define DNS_RBTDB_LRUUPDATE_GLUE 300 87 /*% Time after which we update LRU for all other records, 10 minutes */ 88 #define DNS_RBTDB_LRUUPDATE_REGULAR 600 89 90 #define EXISTS(header) \ 91 ((atomic_load_acquire(&(header)->attributes) & \ 92 DNS_SLABHEADERATTR_NONEXISTENT) == 0) 93 #define NONEXISTENT(header) \ 94 ((atomic_load_acquire(&(header)->attributes) & \ 95 DNS_SLABHEADERATTR_NONEXISTENT) != 0) 96 #define NXDOMAIN(header) \ 97 ((atomic_load_acquire(&(header)->attributes) & \ 98 DNS_SLABHEADERATTR_NXDOMAIN) != 0) 99 #define STALE(header) \ 100 ((atomic_load_acquire(&(header)->attributes) & \ 101 DNS_SLABHEADERATTR_STALE) != 0) 102 #define NEGATIVE(header) \ 103 ((atomic_load_acquire(&(header)->attributes) & \ 104 DNS_SLABHEADERATTR_NEGATIVE) != 0) 105 #define ZEROTTL(header) \ 106 ((atomic_load_acquire(&(header)->attributes) & \ 107 DNS_SLABHEADERATTR_ZEROTTL) != 0) 108 #define ANCIENT(header) \ 109 ((atomic_load_acquire(&(header)->attributes) & \ 110 DNS_SLABHEADERATTR_ANCIENT) != 0) 111 #define STATCOUNT(header) \ 112 ((atomic_load_acquire(&(header)->attributes) & \ 113 DNS_SLABHEADERATTR_STATCOUNT) != 0) 114 115 #define STALE_TTL(header, rbtdb) \ 116 (NXDOMAIN(header) ? 0 : rbtdb->common.serve_stale_ttl) 117 118 #define ACTIVE(header, now) \ 119 (((header)->ttl > (now)) || ((header)->ttl == (now) && ZEROTTL(header))) 120 121 #define KEEPSTALE(rbtdb) ((rbtdb)->common.serve_stale_ttl > 0) 122 123 /*% 124 * Routines for LRU-based cache management. 125 */ 126 127 /*% 128 * See if a given cache entry that is being reused needs to be updated 129 * in the LRU-list. From the LRU management point of view, this function is 130 * expected to return true for almost all cases. When used with threads, 131 * however, this may cause a non-negligible performance penalty because a 132 * writer lock will have to be acquired before updating the list. 133 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this 134 * function returns true if the entry has not been updated for some period of 135 * time. We differentiate the NS or glue address case and the others since 136 * experiments have shown that the former tends to be accessed relatively 137 * infrequently and the cost of cache miss is higher (e.g., a missing NS records 138 * may cause external queries at a higher level zone, involving more 139 * transactions). 140 * 141 * Caller must hold the node (read or write) lock. 142 */ 143 static bool 144 need_headerupdate(dns_slabheader_t *header, isc_stdtime_t now) { 145 if (DNS_SLABHEADER_GETATTR(header, (DNS_SLABHEADERATTR_NONEXISTENT | 146 DNS_SLABHEADERATTR_ANCIENT | 147 DNS_SLABHEADERATTR_ZEROTTL)) != 0) 148 { 149 return false; 150 } 151 152 #if DNS_RBTDB_LIMITLRUUPDATE 153 if (header->type == dns_rdatatype_ns || 154 (header->trust == dns_trust_glue && 155 (header->type == dns_rdatatype_a || 156 header->type == dns_rdatatype_aaaa))) 157 { 158 /* 159 * Glue records are updated if at least DNS_RBTDB_LRUUPDATE_GLUE 160 * seconds have passed since the previous update time. 161 */ 162 return header->last_used + DNS_RBTDB_LRUUPDATE_GLUE <= now; 163 } 164 165 /* 166 * Other records are updated if DNS_RBTDB_LRUUPDATE_REGULAR seconds 167 * have passed. 168 */ 169 return header->last_used + DNS_RBTDB_LRUUPDATE_REGULAR <= now; 170 #else 171 UNUSED(now); 172 173 return true; 174 #endif /* if DNS_RBTDB_LIMITLRUUPDATE */ 175 } 176 177 /*% 178 * Update the timestamp of a given cache entry and move it to the head 179 * of the corresponding LRU list. 180 * 181 * Caller must hold the node (write) lock. 182 * 183 * Note that the we do NOT touch the heap here, as the TTL has not changed. 184 */ 185 static void 186 update_header(dns_rbtdb_t *rbtdb, dns_slabheader_t *header, isc_stdtime_t now) { 187 INSIST(IS_CACHE(rbtdb)); 188 189 /* To be checked: can we really assume this? XXXMLG */ 190 INSIST(ISC_LINK_LINKED(header, link)); 191 192 ISC_LIST_UNLINK(rbtdb->lru[RBTDB_HEADERNODE(header)->locknum], header, 193 link); 194 header->last_used = now; 195 ISC_LIST_PREPEND(rbtdb->lru[RBTDB_HEADERNODE(header)->locknum], header, 196 link); 197 } 198 199 /* 200 * Locking 201 * 202 * If a routine is going to lock more than one lock in this module, then 203 * the locking must be done in the following order: 204 * 205 * Tree Lock 206 * 207 * Node Lock (Only one from the set may be locked at one time by 208 * any caller) 209 * 210 * Database Lock 211 * 212 * Failure to follow this hierarchy can result in deadlock. 213 */ 214 215 /* 216 * Deleting Nodes 217 * 218 * For zone databases the node for the origin of the zone MUST NOT be deleted. 219 */ 220 221 /* 222 * DB Routines 223 */ 224 225 static void 226 update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) { 227 INSIST(IS_CACHE(rbtdb)); 228 229 if (rbtdb->cachestats == NULL) { 230 return; 231 } 232 233 switch (result) { 234 case DNS_R_COVERINGNSEC: 235 isc_stats_increment(rbtdb->cachestats, 236 dns_cachestatscounter_coveringnsec); 237 FALLTHROUGH; 238 case ISC_R_SUCCESS: 239 case DNS_R_CNAME: 240 case DNS_R_DNAME: 241 case DNS_R_DELEGATION: 242 case DNS_R_NCACHENXDOMAIN: 243 case DNS_R_NCACHENXRRSET: 244 isc_stats_increment(rbtdb->cachestats, 245 dns_cachestatscounter_hits); 246 break; 247 default: 248 isc_stats_increment(rbtdb->cachestats, 249 dns_cachestatscounter_misses); 250 } 251 } 252 253 static void 254 clean_stale_headers(dns_slabheader_t *top) { 255 dns_slabheader_t *d = NULL, *down_next = NULL; 256 257 for (d = top->down; d != NULL; d = down_next) { 258 down_next = d->down; 259 dns_slabheader_destroy(&d); 260 } 261 top->down = NULL; 262 } 263 264 static isc_result_t 265 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep, 266 dns_name_t *foundname, dns_rdataset_t *rdataset, 267 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 268 dns_name_t *zcname = NULL; 269 dns_typepair_t type; 270 dns_rbtnode_t *node = NULL; 271 272 REQUIRE(search != NULL); 273 REQUIRE(search->zonecut != NULL); 274 REQUIRE(search->zonecut_header != NULL); 275 276 /* 277 * The caller MUST NOT be holding any node locks. 278 */ 279 280 node = search->zonecut; 281 type = search->zonecut_header->type; 282 283 /* 284 * If we have to set foundname, we do it before anything else. 285 * If we were to set foundname after we had set nodep or bound the 286 * rdataset, then we'd have to undo that work if dns_name_copy() 287 * failed. By setting foundname first, there's nothing to undo if 288 * we have trouble. 289 */ 290 if (foundname != NULL && search->copy_name) { 291 zcname = dns_fixedname_name(&search->zonecut_name); 292 dns_name_copy(zcname, foundname); 293 } 294 if (nodep != NULL) { 295 /* 296 * Note that we don't have to increment the node's reference 297 * count here because we're going to use the reference we 298 * already have in the search block. 299 */ 300 *nodep = node; 301 search->need_cleanup = false; 302 } 303 if (rdataset != NULL) { 304 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 305 NODE_RDLOCK(&(search->rbtdb->node_locks[node->locknum].lock), 306 &nlocktype); 307 dns__rbtdb_bindrdataset(search->rbtdb, node, 308 search->zonecut_header, search->now, 309 isc_rwlocktype_read, 310 rdataset DNS__DB_FLARG_PASS); 311 if (sigrdataset != NULL && search->zonecut_sigheader != NULL) { 312 dns__rbtdb_bindrdataset( 313 search->rbtdb, node, search->zonecut_sigheader, 314 search->now, isc_rwlocktype_read, 315 sigrdataset DNS__DB_FLARG_PASS); 316 } 317 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), 318 &nlocktype); 319 } 320 321 if (type == dns_rdatatype_dname) { 322 return DNS_R_DNAME; 323 } 324 return DNS_R_DELEGATION; 325 } 326 327 static bool 328 check_stale_header(dns_rbtnode_t *node, dns_slabheader_t *header, 329 isc_rwlocktype_t *nlocktypep, isc_rwlock_t *lock, 330 rbtdb_search_t *search, dns_slabheader_t **header_prev) { 331 if (!ACTIVE(header, search->now)) { 332 dns_ttl_t stale = header->ttl + 333 STALE_TTL(header, search->rbtdb); 334 /* 335 * If this data is in the stale window keep it and if 336 * DNS_DBFIND_STALEOK is not set we tell the caller to 337 * skip this record. We skip the records with ZEROTTL 338 * (these records should not be cached anyway). 339 */ 340 341 DNS_SLABHEADER_CLRATTR(header, DNS_SLABHEADERATTR_STALE_WINDOW); 342 if (!ZEROTTL(header) && KEEPSTALE(search->rbtdb) && 343 stale > search->now) 344 { 345 dns__rbtdb_mark(header, DNS_SLABHEADERATTR_STALE); 346 *header_prev = header; 347 /* 348 * If DNS_DBFIND_STALESTART is set then it means we 349 * failed to resolve the name during recursion, in 350 * this case we mark the time in which the refresh 351 * failed. 352 */ 353 if ((search->options & DNS_DBFIND_STALESTART) != 0) { 354 atomic_store_release( 355 &header->last_refresh_fail_ts, 356 search->now); 357 } else if ((search->options & 358 DNS_DBFIND_STALEENABLED) != 0 && 359 search->now < 360 (atomic_load_acquire( 361 &header->last_refresh_fail_ts) + 362 search->rbtdb->serve_stale_refresh)) 363 { 364 /* 365 * If we are within interval between last 366 * refresh failure time + 'stale-refresh-time', 367 * then don't skip this stale entry but use it 368 * instead. 369 */ 370 DNS_SLABHEADER_SETATTR( 371 header, 372 DNS_SLABHEADERATTR_STALE_WINDOW); 373 return false; 374 } else if ((search->options & 375 DNS_DBFIND_STALETIMEOUT) != 0) 376 { 377 /* 378 * We want stale RRset due to timeout, so we 379 * don't skip it. 380 */ 381 return false; 382 } 383 return (search->options & DNS_DBFIND_STALEOK) == 0; 384 } 385 386 /* 387 * This rdataset is stale. If no one else is using the 388 * node, we can clean it up right now, otherwise we mark 389 * it as ancient, and the node as dirty, so it will get 390 * cleaned up later. 391 */ 392 if ((header->ttl < search->now - RBTDB_VIRTUAL) && 393 (*nlocktypep == isc_rwlocktype_write || 394 NODE_TRYUPGRADE(lock, nlocktypep) == ISC_R_SUCCESS)) 395 { 396 /* 397 * We update the node's status only when we can 398 * get write access; otherwise, we leave others 399 * to this work. Periodical cleaning will 400 * eventually take the job as the last resort. 401 * We won't downgrade the lock, since other 402 * rdatasets are probably stale, too. 403 */ 404 405 if (isc_refcount_current(&node->references) == 0) { 406 /* 407 * header->down can be non-NULL if the 408 * refcount has just decremented to 0 409 * but dns__rbtdb_decref() has not 410 * performed clean_cache_node(), in 411 * which case we need to purge the stale 412 * headers first. 413 */ 414 clean_stale_headers(header); 415 if (*header_prev != NULL) { 416 (*header_prev)->next = header->next; 417 } else { 418 node->data = header->next; 419 } 420 dns_slabheader_destroy(&header); 421 } else { 422 dns__rbtdb_mark(header, 423 DNS_SLABHEADERATTR_ANCIENT); 424 RBTDB_HEADERNODE(header)->dirty = 1; 425 *header_prev = header; 426 } 427 } else { 428 *header_prev = header; 429 } 430 return true; 431 } 432 return false; 433 } 434 435 static isc_result_t 436 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, 437 void *arg DNS__DB_FLARG) { 438 rbtdb_search_t *search = arg; 439 dns_slabheader_t *header = NULL; 440 dns_slabheader_t *header_prev = NULL, *header_next = NULL; 441 dns_slabheader_t *dname_header = NULL, *sigdname_header = NULL; 442 isc_result_t result; 443 isc_rwlock_t *lock = NULL; 444 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 445 446 REQUIRE(search->zonecut == NULL); 447 448 /* 449 * Keep compiler silent. 450 */ 451 UNUSED(name); 452 453 lock = &(search->rbtdb->node_locks[node->locknum].lock); 454 NODE_RDLOCK(lock, &nlocktype); 455 456 /* 457 * Look for a DNAME or RRSIG DNAME rdataset. 458 */ 459 for (header = node->data; header != NULL; header = header_next) { 460 header_next = header->next; 461 if (check_stale_header(node, header, &nlocktype, lock, search, 462 &header_prev)) 463 { 464 /* Do nothing. */ 465 } else if (header->type == dns_rdatatype_dname && 466 EXISTS(header) && !ANCIENT(header)) 467 { 468 dname_header = header; 469 header_prev = header; 470 } else if (header->type == DNS_SIGTYPE(dns_rdatatype_dname) && 471 EXISTS(header) && !ANCIENT(header)) 472 { 473 sigdname_header = header; 474 header_prev = header; 475 } else { 476 header_prev = header; 477 } 478 } 479 480 if (dname_header != NULL && 481 (!DNS_TRUST_PENDING(dname_header->trust) || 482 (search->options & DNS_DBFIND_PENDINGOK) != 0)) 483 { 484 /* 485 * We increment the reference count on node to ensure that 486 * search->zonecut_header will still be valid later. 487 */ 488 dns__rbtdb_newref(search->rbtdb, node, 489 nlocktype DNS__DB_FLARG_PASS); 490 search->zonecut = node; 491 search->zonecut_header = dname_header; 492 search->zonecut_sigheader = sigdname_header; 493 search->need_cleanup = true; 494 result = DNS_R_PARTIALMATCH; 495 } else { 496 result = DNS_R_CONTINUE; 497 } 498 499 NODE_UNLOCK(lock, &nlocktype); 500 501 return result; 502 } 503 504 static isc_result_t 505 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node, 506 dns_dbnode_t **nodep, dns_name_t *foundname, 507 dns_rdataset_t *rdataset, 508 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 509 unsigned int i; 510 isc_result_t result = ISC_R_NOTFOUND; 511 dns_name_t name; 512 dns_rbtdb_t *rbtdb = NULL; 513 bool done; 514 515 /* 516 * Caller must be holding the tree lock. 517 */ 518 519 rbtdb = search->rbtdb; 520 i = search->chain.level_matches; 521 done = false; 522 do { 523 dns_slabheader_t *header = NULL; 524 dns_slabheader_t *header_prev = NULL, *header_next = NULL; 525 dns_slabheader_t *found = NULL, *foundsig = NULL; 526 isc_rwlock_t *lock = &rbtdb->node_locks[node->locknum].lock; 527 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 528 529 NODE_RDLOCK(lock, &nlocktype); 530 531 /* 532 * Look for NS and RRSIG NS rdatasets. 533 */ 534 for (header = node->data; header != NULL; header = header_next) 535 { 536 header_next = header->next; 537 if (check_stale_header(node, header, &nlocktype, lock, 538 search, &header_prev)) 539 { 540 /* Do nothing. */ 541 } else if (EXISTS(header) && !ANCIENT(header)) { 542 /* 543 * We've found an extant rdataset. See if 544 * we're interested in it. 545 */ 546 if (header->type == dns_rdatatype_ns) { 547 found = header; 548 if (foundsig != NULL) { 549 break; 550 } 551 } else if (header->type == 552 DNS_SIGTYPE(dns_rdatatype_ns)) 553 { 554 foundsig = header; 555 if (found != NULL) { 556 break; 557 } 558 } 559 header_prev = header; 560 } else { 561 header_prev = header; 562 } 563 } 564 565 if (found != NULL) { 566 /* 567 * If we have to set foundname, we do it before 568 * anything else. If we were to set foundname after 569 * we had set nodep or bound the rdataset, then we'd 570 * have to undo that work if dns_name_concatenate() 571 * failed. By setting foundname first, there's 572 * nothing to undo if we have trouble. 573 */ 574 if (foundname != NULL) { 575 dns_name_init(&name, NULL); 576 dns_rbt_namefromnode(node, &name); 577 dns_name_copy(&name, foundname); 578 while (i > 0) { 579 dns_rbtnode_t *level_node = 580 search->chain.levels[--i]; 581 dns_name_init(&name, NULL); 582 dns_rbt_namefromnode(level_node, &name); 583 result = dns_name_concatenate( 584 foundname, &name, foundname, 585 NULL); 586 if (result != ISC_R_SUCCESS) { 587 if (nodep != NULL) { 588 *nodep = NULL; 589 } 590 goto node_exit; 591 } 592 } 593 } 594 result = DNS_R_DELEGATION; 595 if (nodep != NULL) { 596 dns__rbtdb_newref(search->rbtdb, node, 597 nlocktype DNS__DB_FLARG_PASS); 598 *nodep = node; 599 } 600 dns__rbtdb_bindrdataset(search->rbtdb, node, found, 601 search->now, nlocktype, 602 rdataset DNS__DB_FLARG_PASS); 603 if (foundsig != NULL) { 604 dns__rbtdb_bindrdataset( 605 search->rbtdb, node, foundsig, 606 search->now, nlocktype, 607 sigrdataset DNS__DB_FLARG_PASS); 608 } 609 if (need_headerupdate(found, search->now) || 610 (foundsig != NULL && 611 need_headerupdate(foundsig, search->now))) 612 { 613 if (nlocktype != isc_rwlocktype_write) { 614 NODE_FORCEUPGRADE(lock, &nlocktype); 615 POST(nlocktype); 616 } 617 if (need_headerupdate(found, search->now)) { 618 update_header(search->rbtdb, found, 619 search->now); 620 } 621 if (foundsig != NULL && 622 need_headerupdate(foundsig, search->now)) 623 { 624 update_header(search->rbtdb, foundsig, 625 search->now); 626 } 627 } 628 } 629 630 node_exit: 631 NODE_UNLOCK(lock, &nlocktype); 632 633 if (found == NULL && i > 0) { 634 i--; 635 node = search->chain.levels[i]; 636 } else { 637 done = true; 638 } 639 } while (!done); 640 641 return result; 642 } 643 644 /* 645 * Look for a potentially covering NSEC in the cache where `name` 646 * is known not to exist. This uses the auxiliary NSEC tree to find 647 * the potential NSEC owner. If found, we update 'foundname', 'nodep', 648 * 'rdataset' and 'sigrdataset', and return DNS_R_COVERINGNSEC. 649 * Otherwise, return ISC_R_NOTFOUND. 650 */ 651 static isc_result_t 652 find_coveringnsec(rbtdb_search_t *search, const dns_name_t *name, 653 dns_dbnode_t **nodep, isc_stdtime_t now, 654 dns_name_t *foundname, dns_rdataset_t *rdataset, 655 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 656 dns_fixedname_t fprefix, forigin, ftarget, fixed; 657 dns_name_t *prefix = NULL, *origin = NULL; 658 dns_name_t *target = NULL, *fname = NULL; 659 dns_rbtnode_t *node = NULL; 660 dns_rbtnodechain_t chain; 661 isc_result_t result; 662 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 663 isc_rwlock_t *lock = NULL; 664 dns_typepair_t matchtype, sigmatchtype; 665 dns_slabheader_t *found = NULL, *foundsig = NULL; 666 dns_slabheader_t *header = NULL; 667 dns_slabheader_t *header_next = NULL, *header_prev = NULL; 668 669 /* 670 * Look for the node in the auxilary tree. 671 */ 672 dns_rbtnodechain_init(&chain); 673 target = dns_fixedname_initname(&ftarget); 674 result = dns_rbt_findnode(search->rbtdb->nsec, name, target, &node, 675 &chain, DNS_RBTFIND_EMPTYDATA, NULL, NULL); 676 if (result != DNS_R_PARTIALMATCH) { 677 dns_rbtnodechain_reset(&chain); 678 return ISC_R_NOTFOUND; 679 } 680 681 prefix = dns_fixedname_initname(&fprefix); 682 origin = dns_fixedname_initname(&forigin); 683 target = dns_fixedname_initname(&ftarget); 684 fname = dns_fixedname_initname(&fixed); 685 686 matchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_nsec, 0); 687 sigmatchtype = DNS_SIGTYPE(dns_rdatatype_nsec); 688 689 /* 690 * Extract predecessor from chain. 691 */ 692 result = dns_rbtnodechain_current(&chain, prefix, origin, NULL); 693 dns_rbtnodechain_reset(&chain); 694 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) { 695 return ISC_R_NOTFOUND; 696 } 697 698 result = dns_name_concatenate(prefix, origin, target, NULL); 699 if (result != ISC_R_SUCCESS) { 700 return ISC_R_NOTFOUND; 701 } 702 703 /* 704 * Lookup the predecessor in the main tree. 705 */ 706 node = NULL; 707 result = dns_rbt_findnode(search->rbtdb->tree, target, fname, &node, 708 NULL, DNS_RBTFIND_EMPTYDATA, NULL, NULL); 709 if (result != ISC_R_SUCCESS) { 710 return ISC_R_NOTFOUND; 711 } 712 713 lock = &(search->rbtdb->node_locks[node->locknum].lock); 714 NODE_RDLOCK(lock, &nlocktype); 715 for (header = node->data; header != NULL; header = header_next) { 716 header_next = header->next; 717 if (check_stale_header(node, header, &nlocktype, lock, search, 718 &header_prev)) 719 { 720 continue; 721 } 722 if (NONEXISTENT(header) || DNS_TYPEPAIR_TYPE(header->type) == 0) 723 { 724 header_prev = header; 725 continue; 726 } 727 if (header->type == matchtype) { 728 found = header; 729 if (foundsig != NULL) { 730 break; 731 } 732 } else if (header->type == sigmatchtype) { 733 foundsig = header; 734 if (found != NULL) { 735 break; 736 } 737 } 738 header_prev = header; 739 } 740 if (found != NULL) { 741 dns__rbtdb_bindrdataset(search->rbtdb, node, found, now, 742 nlocktype, rdataset DNS__DB_FLARG_PASS); 743 if (foundsig != NULL) { 744 dns__rbtdb_bindrdataset(search->rbtdb, node, foundsig, 745 now, nlocktype, 746 sigrdataset DNS__DB_FLARG_PASS); 747 } 748 dns__rbtdb_newref(search->rbtdb, node, 749 nlocktype DNS__DB_FLARG_PASS); 750 751 dns_name_copy(fname, foundname); 752 753 *nodep = node; 754 result = DNS_R_COVERINGNSEC; 755 } else { 756 result = ISC_R_NOTFOUND; 757 } 758 NODE_UNLOCK(lock, &nlocktype); 759 return result; 760 } 761 762 static isc_result_t 763 cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, 764 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now, 765 dns_dbnode_t **nodep, dns_name_t *foundname, 766 dns_rdataset_t *rdataset, 767 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 768 dns_rbtnode_t *node = NULL; 769 isc_result_t result; 770 rbtdb_search_t search; 771 bool cname_ok = true; 772 bool found_noqname = false; 773 bool all_negative = true; 774 bool empty_node; 775 isc_rwlock_t *lock = NULL; 776 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 777 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 778 dns_slabheader_t *header = NULL; 779 dns_slabheader_t *header_prev = NULL, *header_next = NULL; 780 dns_slabheader_t *found = NULL, *nsheader = NULL; 781 dns_slabheader_t *foundsig = NULL, *nssig = NULL, *cnamesig = NULL; 782 dns_slabheader_t *update = NULL, *updatesig = NULL; 783 dns_slabheader_t *nsecheader = NULL, *nsecsig = NULL; 784 dns_typepair_t sigtype, negtype; 785 786 UNUSED(version); 787 788 REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); 789 REQUIRE(version == NULL); 790 791 if (now == 0) { 792 now = isc_stdtime_now(); 793 } 794 795 search = (rbtdb_search_t){ 796 .rbtdb = (dns_rbtdb_t *)db, 797 .serial = 1, 798 .options = options, 799 .now = now, 800 }; 801 dns_fixedname_init(&search.zonecut_name); 802 dns_rbtnodechain_init(&search.chain); 803 804 TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype); 805 806 /* 807 * Search down from the root of the tree. If, while going down, we 808 * encounter a callback node, cache_zonecut_callback() will search the 809 * rdatasets at the zone cut for a DNAME rdataset. 810 */ 811 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node, 812 &search.chain, DNS_RBTFIND_EMPTYDATA, 813 cache_zonecut_callback, &search); 814 815 if (result == DNS_R_PARTIALMATCH) { 816 /* 817 * If dns_rbt_findnode discovered a covering DNAME skip 818 * looking for a covering NSEC. 819 */ 820 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && 821 (search.zonecut_header == NULL || 822 search.zonecut_header->type != dns_rdatatype_dname)) 823 { 824 result = find_coveringnsec( 825 &search, name, nodep, now, foundname, rdataset, 826 sigrdataset DNS__DB_FLARG_PASS); 827 if (result == DNS_R_COVERINGNSEC) { 828 goto tree_exit; 829 } 830 } 831 if (search.zonecut != NULL) { 832 result = setup_delegation( 833 &search, nodep, foundname, rdataset, 834 sigrdataset DNS__DB_FLARG_PASS); 835 goto tree_exit; 836 } else { 837 find_ns: 838 result = find_deepest_zonecut( 839 &search, node, nodep, foundname, rdataset, 840 sigrdataset DNS__DB_FLARG_PASS); 841 goto tree_exit; 842 } 843 } else if (result != ISC_R_SUCCESS) { 844 goto tree_exit; 845 } 846 847 /* 848 * Certain DNSSEC types are not subject to CNAME matching 849 * (RFC4035, section 2.5 and RFC3007). 850 * 851 * We don't check for RRSIG, because we don't store RRSIG records 852 * directly. 853 */ 854 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) { 855 cname_ok = false; 856 } 857 858 /* 859 * We now go looking for rdata... 860 */ 861 862 lock = &(search.rbtdb->node_locks[node->locknum].lock); 863 NODE_RDLOCK(lock, &nlocktype); 864 865 /* 866 * These pointers need to be reset here in case we did 867 * 'goto find_ns' from somewhere below. 868 */ 869 found = NULL; 870 foundsig = NULL; 871 sigtype = DNS_SIGTYPE(type); 872 negtype = DNS_TYPEPAIR_VALUE(0, type); 873 nsheader = NULL; 874 nsecheader = NULL; 875 nssig = NULL; 876 nsecsig = NULL; 877 cnamesig = NULL; 878 empty_node = true; 879 header_prev = NULL; 880 for (header = node->data; header != NULL; header = header_next) { 881 header_next = header->next; 882 if (check_stale_header(node, header, &nlocktype, lock, &search, 883 &header_prev)) 884 { 885 /* Do nothing. */ 886 } else if (EXISTS(header) && !ANCIENT(header)) { 887 /* 888 * We now know that there is at least one active 889 * non-stale rdataset at this node. 890 */ 891 empty_node = false; 892 if (header->noqname != NULL && 893 header->trust == dns_trust_secure) 894 { 895 found_noqname = true; 896 } 897 if (!NEGATIVE(header)) { 898 all_negative = false; 899 } 900 901 /* 902 * If we found a type we were looking for, remember 903 * it. 904 */ 905 if (header->type == type || 906 (type == dns_rdatatype_any && 907 DNS_TYPEPAIR_TYPE(header->type) != 0) || 908 (cname_ok && header->type == dns_rdatatype_cname)) 909 { 910 /* 911 * We've found the answer. 912 */ 913 found = header; 914 if (header->type == dns_rdatatype_cname && 915 cname_ok) 916 { 917 /* 918 * If we've already got the 919 * CNAME RRSIG, use it. 920 */ 921 if (cnamesig != NULL) { 922 foundsig = cnamesig; 923 } else { 924 sigtype = DNS_SIGTYPE( 925 dns_rdatatype_cname); 926 } 927 } 928 } else if (header->type == sigtype) { 929 /* 930 * We've found the RRSIG rdataset for our 931 * target type. Remember it. 932 */ 933 foundsig = header; 934 } else if (header->type == RDATATYPE_NCACHEANY || 935 header->type == negtype) 936 { 937 /* 938 * We've found a negative cache entry. 939 */ 940 found = header; 941 } else if (header->type == dns_rdatatype_ns) { 942 /* 943 * Remember a NS rdataset even if we're 944 * not specifically looking for it, because 945 * we might need it later. 946 */ 947 nsheader = header; 948 } else if (header->type == 949 DNS_SIGTYPE(dns_rdatatype_ns)) 950 { 951 /* 952 * If we need the NS rdataset, we'll also 953 * need its signature. 954 */ 955 nssig = header; 956 } else if (header->type == dns_rdatatype_nsec) { 957 nsecheader = header; 958 } else if (header->type == 959 DNS_SIGTYPE(dns_rdatatype_nsec)) 960 { 961 nsecsig = header; 962 } else if (cname_ok && 963 header->type == 964 DNS_SIGTYPE(dns_rdatatype_cname)) 965 { 966 /* 967 * If we get a CNAME match, we'll also need 968 * its signature. 969 */ 970 cnamesig = header; 971 } 972 header_prev = header; 973 } else { 974 header_prev = header; 975 } 976 } 977 978 if (empty_node) { 979 /* 980 * We have an exact match for the name, but there are no 981 * extant rdatasets. That means that this node doesn't 982 * meaningfully exist, and that we really have a partial match. 983 */ 984 NODE_UNLOCK(lock, &nlocktype); 985 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) { 986 result = find_coveringnsec( 987 &search, name, nodep, now, foundname, rdataset, 988 sigrdataset DNS__DB_FLARG_PASS); 989 if (result == DNS_R_COVERINGNSEC) { 990 goto tree_exit; 991 } 992 } 993 goto find_ns; 994 } 995 996 /* 997 * If we didn't find what we were looking for... 998 */ 999 if (found == NULL || 1000 (DNS_TRUST_ADDITIONAL(found->trust) && 1001 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) || 1002 (found->trust == dns_trust_glue && 1003 ((options & DNS_DBFIND_GLUEOK) == 0)) || 1004 (DNS_TRUST_PENDING(found->trust) && 1005 ((options & DNS_DBFIND_PENDINGOK) == 0))) 1006 { 1007 /* 1008 * Return covering NODATA NSEC record. 1009 */ 1010 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && 1011 nsecheader != NULL) 1012 { 1013 if (nodep != NULL) { 1014 dns__rbtdb_newref(search.rbtdb, node, 1015 nlocktype DNS__DB_FLARG_PASS); 1016 *nodep = node; 1017 } 1018 dns__rbtdb_bindrdataset(search.rbtdb, node, nsecheader, 1019 search.now, nlocktype, 1020 rdataset DNS__DB_FLARG_PASS); 1021 if (need_headerupdate(nsecheader, search.now)) { 1022 update = nsecheader; 1023 } 1024 if (nsecsig != NULL) { 1025 dns__rbtdb_bindrdataset( 1026 search.rbtdb, node, nsecsig, search.now, 1027 nlocktype, 1028 sigrdataset DNS__DB_FLARG_PASS); 1029 if (need_headerupdate(nsecsig, search.now)) { 1030 updatesig = nsecsig; 1031 } 1032 } 1033 result = DNS_R_COVERINGNSEC; 1034 goto node_exit; 1035 } 1036 1037 /* 1038 * This name was from a wild card. Look for a covering NSEC. 1039 */ 1040 if (found == NULL && (found_noqname || all_negative) && 1041 (search.options & DNS_DBFIND_COVERINGNSEC) != 0) 1042 { 1043 NODE_UNLOCK(lock, &nlocktype); 1044 result = find_coveringnsec( 1045 &search, name, nodep, now, foundname, rdataset, 1046 sigrdataset DNS__DB_FLARG_PASS); 1047 if (result == DNS_R_COVERINGNSEC) { 1048 goto tree_exit; 1049 } 1050 goto find_ns; 1051 } 1052 1053 /* 1054 * If there is an NS rdataset at this node, then this is the 1055 * deepest zone cut. 1056 */ 1057 if (nsheader != NULL) { 1058 if (nodep != NULL) { 1059 dns__rbtdb_newref(search.rbtdb, node, 1060 nlocktype DNS__DB_FLARG_PASS); 1061 *nodep = node; 1062 } 1063 dns__rbtdb_bindrdataset(search.rbtdb, node, nsheader, 1064 search.now, nlocktype, 1065 rdataset DNS__DB_FLARG_PASS); 1066 if (need_headerupdate(nsheader, search.now)) { 1067 update = nsheader; 1068 } 1069 if (nssig != NULL) { 1070 dns__rbtdb_bindrdataset( 1071 search.rbtdb, node, nssig, search.now, 1072 nlocktype, 1073 sigrdataset DNS__DB_FLARG_PASS); 1074 if (need_headerupdate(nssig, search.now)) { 1075 updatesig = nssig; 1076 } 1077 } 1078 result = DNS_R_DELEGATION; 1079 goto node_exit; 1080 } 1081 1082 /* 1083 * Go find the deepest zone cut. 1084 */ 1085 NODE_UNLOCK(lock, &nlocktype); 1086 goto find_ns; 1087 } 1088 1089 /* 1090 * We found what we were looking for, or we found a CNAME. 1091 */ 1092 1093 if (nodep != NULL) { 1094 dns__rbtdb_newref(search.rbtdb, node, 1095 nlocktype DNS__DB_FLARG_PASS); 1096 *nodep = node; 1097 } 1098 1099 if (NEGATIVE(found)) { 1100 /* 1101 * We found a negative cache entry. 1102 */ 1103 if (NXDOMAIN(found)) { 1104 result = DNS_R_NCACHENXDOMAIN; 1105 } else { 1106 result = DNS_R_NCACHENXRRSET; 1107 } 1108 } else if (type != found->type && type != dns_rdatatype_any && 1109 found->type == dns_rdatatype_cname) 1110 { 1111 /* 1112 * We weren't doing an ANY query and we found a CNAME instead 1113 * of the type we were looking for, so we need to indicate 1114 * that result to the caller. 1115 */ 1116 result = DNS_R_CNAME; 1117 } else { 1118 /* 1119 * An ordinary successful query! 1120 */ 1121 result = ISC_R_SUCCESS; 1122 } 1123 1124 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN || 1125 result == DNS_R_NCACHENXRRSET) 1126 { 1127 dns__rbtdb_bindrdataset(search.rbtdb, node, found, search.now, 1128 nlocktype, rdataset DNS__DB_FLARG_PASS); 1129 if (need_headerupdate(found, search.now)) { 1130 update = found; 1131 } 1132 if (!NEGATIVE(found) && foundsig != NULL) { 1133 dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig, 1134 search.now, nlocktype, 1135 sigrdataset DNS__DB_FLARG_PASS); 1136 if (need_headerupdate(foundsig, search.now)) { 1137 updatesig = foundsig; 1138 } 1139 } 1140 } 1141 1142 node_exit: 1143 if ((update != NULL || updatesig != NULL) && 1144 nlocktype != isc_rwlocktype_write) 1145 { 1146 NODE_FORCEUPGRADE(lock, &nlocktype); 1147 POST(nlocktype); 1148 } 1149 if (update != NULL && need_headerupdate(update, search.now)) { 1150 update_header(search.rbtdb, update, search.now); 1151 } 1152 if (updatesig != NULL && need_headerupdate(updatesig, search.now)) { 1153 update_header(search.rbtdb, updatesig, search.now); 1154 } 1155 1156 NODE_UNLOCK(lock, &nlocktype); 1157 1158 tree_exit: 1159 TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype); 1160 1161 /* 1162 * If we found a zonecut but aren't going to use it, we have to 1163 * let go of it. 1164 */ 1165 if (search.need_cleanup) { 1166 node = search.zonecut; 1167 INSIST(node != NULL); 1168 lock = &(search.rbtdb->node_locks[node->locknum].lock); 1169 1170 NODE_RDLOCK(lock, &nlocktype); 1171 dns__rbtdb_decref(search.rbtdb, node, 0, &nlocktype, &tlocktype, 1172 true, false DNS__DB_FLARG_PASS); 1173 NODE_UNLOCK(lock, &nlocktype); 1174 INSIST(tlocktype == isc_rwlocktype_none); 1175 } 1176 1177 dns_rbtnodechain_reset(&search.chain); 1178 1179 update_cachestats(search.rbtdb, result); 1180 return result; 1181 } 1182 1183 static isc_result_t 1184 cache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options, 1185 isc_stdtime_t now, dns_dbnode_t **nodep, 1186 dns_name_t *foundname, dns_name_t *dcname, 1187 dns_rdataset_t *rdataset, 1188 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 1189 dns_rbtnode_t *node = NULL; 1190 isc_rwlock_t *lock = NULL; 1191 isc_result_t result; 1192 rbtdb_search_t search; 1193 dns_slabheader_t *header = NULL; 1194 dns_slabheader_t *header_prev = NULL, *header_next = NULL; 1195 dns_slabheader_t *found = NULL, *foundsig = NULL; 1196 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA; 1197 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1198 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1199 bool dcnull = (dcname == NULL); 1200 1201 REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); 1202 1203 if (now == 0) { 1204 now = isc_stdtime_now(); 1205 } 1206 1207 search = (rbtdb_search_t){ 1208 .rbtdb = (dns_rbtdb_t *)db, 1209 .serial = 1, 1210 .options = options, 1211 .now = now, 1212 }; 1213 dns_fixedname_init(&search.zonecut_name); 1214 dns_rbtnodechain_init(&search.chain); 1215 1216 if (dcnull) { 1217 dcname = foundname; 1218 } 1219 1220 if ((options & DNS_DBFIND_NOEXACT) != 0) { 1221 rbtoptions |= DNS_RBTFIND_NOEXACT; 1222 } 1223 1224 TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype); 1225 1226 /* 1227 * Search down from the root of the tree. 1228 */ 1229 result = dns_rbt_findnode(search.rbtdb->tree, name, dcname, &node, 1230 &search.chain, rbtoptions, NULL, &search); 1231 1232 if (result == DNS_R_PARTIALMATCH) { 1233 result = find_deepest_zonecut(&search, node, nodep, foundname, 1234 rdataset, 1235 sigrdataset DNS__DB_FLARG_PASS); 1236 goto tree_exit; 1237 } else if (result != ISC_R_SUCCESS) { 1238 goto tree_exit; 1239 } else if (!dcnull) { 1240 dns_name_copy(dcname, foundname); 1241 } 1242 1243 /* 1244 * We now go looking for an NS rdataset at the node. 1245 */ 1246 1247 lock = &(search.rbtdb->node_locks[node->locknum].lock); 1248 NODE_RDLOCK(lock, &nlocktype); 1249 1250 for (header = node->data; header != NULL; header = header_next) { 1251 header_next = header->next; 1252 if (check_stale_header(node, header, &nlocktype, lock, &search, 1253 &header_prev)) 1254 { 1255 /* 1256 * The function dns_rbt_findnode found us the a matching 1257 * node for 'name' and stored the result in 'dcname'. 1258 * This is the deepest known zonecut in our database. 1259 * However, this node may be stale and if serve-stale 1260 * is not enabled (in other words 'stale-answer-enable' 1261 * is set to no), this node may not be used as a 1262 * zonecut we know about. If so, find the deepest 1263 * zonecut from this node up and return that instead. 1264 */ 1265 NODE_UNLOCK(lock, &nlocktype); 1266 result = find_deepest_zonecut( 1267 &search, node, nodep, foundname, rdataset, 1268 sigrdataset DNS__DB_FLARG_PASS); 1269 dns_name_copy(foundname, dcname); 1270 goto tree_exit; 1271 } else if (EXISTS(header) && !ANCIENT(header)) { 1272 /* 1273 * If we found a type we were looking for, remember 1274 * it. 1275 */ 1276 if (header->type == dns_rdatatype_ns) { 1277 /* 1278 * Remember a NS rdataset even if we're 1279 * not specifically looking for it, because 1280 * we might need it later. 1281 */ 1282 found = header; 1283 } else if (header->type == 1284 DNS_SIGTYPE(dns_rdatatype_ns)) 1285 { 1286 /* 1287 * If we need the NS rdataset, we'll also 1288 * need its signature. 1289 */ 1290 foundsig = header; 1291 } 1292 header_prev = header; 1293 } else { 1294 header_prev = header; 1295 } 1296 } 1297 1298 if (found == NULL) { 1299 /* 1300 * No NS records here. 1301 */ 1302 NODE_UNLOCK(lock, &nlocktype); 1303 result = find_deepest_zonecut(&search, node, nodep, foundname, 1304 rdataset, 1305 sigrdataset DNS__DB_FLARG_PASS); 1306 goto tree_exit; 1307 } 1308 1309 if (nodep != NULL) { 1310 dns__rbtdb_newref(search.rbtdb, node, 1311 nlocktype DNS__DB_FLARG_PASS); 1312 *nodep = node; 1313 } 1314 1315 dns__rbtdb_bindrdataset(search.rbtdb, node, found, search.now, 1316 nlocktype, rdataset DNS__DB_FLARG_PASS); 1317 if (foundsig != NULL) { 1318 dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig, 1319 search.now, nlocktype, 1320 sigrdataset DNS__DB_FLARG_PASS); 1321 } 1322 1323 if (need_headerupdate(found, search.now) || 1324 (foundsig != NULL && need_headerupdate(foundsig, search.now))) 1325 { 1326 if (nlocktype != isc_rwlocktype_write) { 1327 NODE_FORCEUPGRADE(lock, &nlocktype); 1328 POST(nlocktype); 1329 } 1330 if (need_headerupdate(found, search.now)) { 1331 update_header(search.rbtdb, found, search.now); 1332 } 1333 if (foundsig != NULL && need_headerupdate(foundsig, search.now)) 1334 { 1335 update_header(search.rbtdb, foundsig, search.now); 1336 } 1337 } 1338 1339 NODE_UNLOCK(lock, &nlocktype); 1340 1341 tree_exit: 1342 TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype); 1343 1344 INSIST(!search.need_cleanup); 1345 1346 dns_rbtnodechain_reset(&search.chain); 1347 1348 if (result == DNS_R_DELEGATION) { 1349 result = ISC_R_SUCCESS; 1350 } 1351 1352 return result; 1353 } 1354 1355 static isc_result_t 1356 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 1357 dns_rdatatype_t type, dns_rdatatype_t covers, 1358 isc_stdtime_t now, dns_rdataset_t *rdataset, 1359 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 1360 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1361 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 1362 dns_slabheader_t *header = NULL, *header_next = NULL; 1363 dns_slabheader_t *found = NULL, *foundsig = NULL; 1364 dns_typepair_t matchtype, sigmatchtype, negtype; 1365 isc_result_t result; 1366 isc_rwlock_t *lock = NULL; 1367 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1368 1369 REQUIRE(VALID_RBTDB(rbtdb)); 1370 REQUIRE(type != dns_rdatatype_any); 1371 1372 UNUSED(version); 1373 1374 result = ISC_R_SUCCESS; 1375 1376 if (now == 0) { 1377 now = isc_stdtime_now(); 1378 } 1379 1380 lock = &rbtdb->node_locks[rbtnode->locknum].lock; 1381 NODE_RDLOCK(lock, &nlocktype); 1382 1383 matchtype = DNS_TYPEPAIR_VALUE(type, covers); 1384 negtype = DNS_TYPEPAIR_VALUE(0, type); 1385 if (covers == 0) { 1386 sigmatchtype = DNS_SIGTYPE(type); 1387 } else { 1388 sigmatchtype = 0; 1389 } 1390 1391 for (header = rbtnode->data; header != NULL; header = header_next) { 1392 header_next = header->next; 1393 if (!ACTIVE(header, now)) { 1394 if ((header->ttl + STALE_TTL(header, rbtdb) < 1395 now - RBTDB_VIRTUAL) && 1396 (nlocktype == isc_rwlocktype_write || 1397 NODE_TRYUPGRADE(lock, &nlocktype) == 1398 ISC_R_SUCCESS)) 1399 { 1400 /* 1401 * We update the node's status only when we 1402 * can get write access. 1403 * 1404 * We don't check if refcurrent(rbtnode) == 0 1405 * and try to free like we do in cache_find(), 1406 * because refcurrent(rbtnode) must be 1407 * non-zero. This is so because 'node' is an 1408 * argument to the function. 1409 */ 1410 dns__rbtdb_mark(header, 1411 DNS_SLABHEADERATTR_ANCIENT); 1412 RBTDB_HEADERNODE(header)->dirty = 1; 1413 } 1414 } else if (EXISTS(header) && !ANCIENT(header)) { 1415 if (header->type == matchtype) { 1416 found = header; 1417 } else if (header->type == RDATATYPE_NCACHEANY || 1418 header->type == negtype) 1419 { 1420 found = header; 1421 } else if (header->type == sigmatchtype) { 1422 foundsig = header; 1423 } 1424 } 1425 } 1426 if (found != NULL) { 1427 dns__rbtdb_bindrdataset(rbtdb, rbtnode, found, now, nlocktype, 1428 rdataset DNS__DB_FLARG_PASS); 1429 if (!NEGATIVE(found) && foundsig != NULL) { 1430 dns__rbtdb_bindrdataset(rbtdb, rbtnode, foundsig, now, 1431 nlocktype, 1432 sigrdataset DNS__DB_FLARG_PASS); 1433 } 1434 } 1435 1436 NODE_UNLOCK(lock, &nlocktype); 1437 1438 if (found == NULL) { 1439 return ISC_R_NOTFOUND; 1440 } 1441 1442 if (NEGATIVE(found)) { 1443 /* 1444 * We found a negative cache entry. 1445 */ 1446 if (NXDOMAIN(found)) { 1447 result = DNS_R_NCACHENXDOMAIN; 1448 } else { 1449 result = DNS_R_NCACHENXRRSET; 1450 } 1451 } 1452 1453 update_cachestats(rbtdb, result); 1454 1455 return result; 1456 } 1457 1458 static size_t 1459 hashsize(dns_db_t *db) { 1460 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1461 size_t size; 1462 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1463 1464 REQUIRE(VALID_RBTDB(rbtdb)); 1465 1466 TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); 1467 size = dns_rbt_hashsize(rbtdb->tree); 1468 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 1469 1470 return size; 1471 } 1472 1473 static isc_result_t 1474 setcachestats(dns_db_t *db, isc_stats_t *stats) { 1475 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1476 1477 REQUIRE(VALID_RBTDB(rbtdb)); 1478 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ 1479 REQUIRE(stats != NULL); 1480 1481 isc_stats_attach(stats, &rbtdb->cachestats); 1482 return ISC_R_SUCCESS; 1483 } 1484 1485 static dns_stats_t * 1486 getrrsetstats(dns_db_t *db) { 1487 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1488 1489 REQUIRE(VALID_RBTDB(rbtdb)); 1490 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ 1491 1492 return rbtdb->rrsetstats; 1493 } 1494 1495 static isc_result_t 1496 setservestalettl(dns_db_t *db, dns_ttl_t ttl) { 1497 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1498 1499 REQUIRE(VALID_RBTDB(rbtdb)); 1500 REQUIRE(IS_CACHE(rbtdb)); 1501 1502 /* currently no bounds checking. 0 means disable. */ 1503 rbtdb->common.serve_stale_ttl = ttl; 1504 return ISC_R_SUCCESS; 1505 } 1506 1507 static isc_result_t 1508 getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { 1509 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1510 1511 REQUIRE(VALID_RBTDB(rbtdb)); 1512 REQUIRE(IS_CACHE(rbtdb)); 1513 1514 *ttl = rbtdb->common.serve_stale_ttl; 1515 return ISC_R_SUCCESS; 1516 } 1517 1518 static isc_result_t 1519 setservestalerefresh(dns_db_t *db, uint32_t interval) { 1520 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1521 1522 REQUIRE(VALID_RBTDB(rbtdb)); 1523 REQUIRE(IS_CACHE(rbtdb)); 1524 1525 /* currently no bounds checking. 0 means disable. */ 1526 rbtdb->serve_stale_refresh = interval; 1527 return ISC_R_SUCCESS; 1528 } 1529 1530 static isc_result_t 1531 getservestalerefresh(dns_db_t *db, uint32_t *interval) { 1532 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1533 1534 REQUIRE(VALID_RBTDB(rbtdb)); 1535 REQUIRE(IS_CACHE(rbtdb)); 1536 1537 *interval = rbtdb->serve_stale_refresh; 1538 return ISC_R_SUCCESS; 1539 } 1540 1541 static void 1542 expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) { 1543 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1544 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 1545 dns_slabheader_t *header = data; 1546 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1547 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1548 1549 NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 1550 dns__cacherbt_expireheader(header, &tlocktype, 1551 dns_expire_flush DNS__DB_FILELINE); 1552 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 1553 INSIST(tlocktype == isc_rwlocktype_none); 1554 } 1555 1556 dns_dbmethods_t dns__rbtdb_cachemethods = { 1557 .destroy = dns__rbtdb_destroy, 1558 .currentversion = dns__rbtdb_currentversion, 1559 .newversion = dns__rbtdb_newversion, 1560 .attachversion = dns__rbtdb_attachversion, 1561 .closeversion = dns__rbtdb_closeversion, 1562 .findnode = dns__rbtdb_findnode, 1563 .find = cache_find, 1564 .findzonecut = cache_findzonecut, 1565 .attachnode = dns__rbtdb_attachnode, 1566 .detachnode = dns__rbtdb_detachnode, 1567 .createiterator = dns__rbtdb_createiterator, 1568 .findrdataset = cache_findrdataset, 1569 .allrdatasets = dns__rbtdb_allrdatasets, 1570 .addrdataset = dns__rbtdb_addrdataset, 1571 .subtractrdataset = dns__rbtdb_subtractrdataset, 1572 .deleterdataset = dns__rbtdb_deleterdataset, 1573 .nodecount = dns__rbtdb_nodecount, 1574 .setloop = dns__rbtdb_setloop, 1575 .getoriginnode = dns__rbtdb_getoriginnode, 1576 .getrrsetstats = getrrsetstats, 1577 .setcachestats = setcachestats, 1578 .hashsize = hashsize, 1579 .setservestalettl = setservestalettl, 1580 .getservestalettl = getservestalettl, 1581 .setservestalerefresh = setservestalerefresh, 1582 .getservestalerefresh = getservestalerefresh, 1583 .locknode = dns__rbtdb_locknode, 1584 .unlocknode = dns__rbtdb_unlocknode, 1585 .expiredata = expiredata, 1586 .deletedata = dns__rbtdb_deletedata, 1587 .setmaxrrperset = dns__rbtdb_setmaxrrperset, 1588 .setmaxtypepername = dns__rbtdb_setmaxtypepername, 1589 }; 1590 1591 /* 1592 * Caller must hold the node (write) lock. 1593 */ 1594 void 1595 dns__cacherbt_expireheader(dns_slabheader_t *header, 1596 isc_rwlocktype_t *tlocktypep, 1597 dns_expire_t reason DNS__DB_FLARG) { 1598 dns__rbtdb_setttl(header, 0); 1599 dns__rbtdb_mark(header, DNS_SLABHEADERATTR_ANCIENT); 1600 RBTDB_HEADERNODE(header)->dirty = 1; 1601 1602 if (isc_refcount_current(&RBTDB_HEADERNODE(header)->references) == 0) { 1603 isc_rwlocktype_t nlocktype = isc_rwlocktype_write; 1604 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)header->db; 1605 1606 /* 1607 * If no one else is using the node, we can clean it up now. 1608 * We first need to gain a new reference to the node to meet a 1609 * requirement of dns__rbtdb_decref(). 1610 */ 1611 dns__rbtdb_newref(rbtdb, RBTDB_HEADERNODE(header), 1612 nlocktype DNS__DB_FLARG_PASS); 1613 dns__rbtdb_decref(rbtdb, RBTDB_HEADERNODE(header), 0, 1614 &nlocktype, tlocktypep, true, 1615 false DNS__DB_FLARG_PASS); 1616 1617 if (rbtdb->cachestats == NULL) { 1618 return; 1619 } 1620 1621 switch (reason) { 1622 case dns_expire_ttl: 1623 isc_stats_increment(rbtdb->cachestats, 1624 dns_cachestatscounter_deletettl); 1625 break; 1626 case dns_expire_lru: 1627 isc_stats_increment(rbtdb->cachestats, 1628 dns_cachestatscounter_deletelru); 1629 break; 1630 default: 1631 break; 1632 } 1633 } 1634 } 1635 1636 static size_t 1637 rdataset_size(dns_slabheader_t *header) { 1638 if (!NONEXISTENT(header)) { 1639 return dns_rdataslab_size((unsigned char *)header, 1640 sizeof(*header)); 1641 } 1642 1643 return sizeof(*header); 1644 } 1645 1646 static size_t 1647 expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, 1648 isc_rwlocktype_t *tlocktypep, 1649 size_t purgesize DNS__DB_FLARG) { 1650 dns_slabheader_t *header = NULL; 1651 size_t purged = 0; 1652 1653 for (header = ISC_LIST_TAIL(rbtdb->lru[locknum]); 1654 header != NULL && header->last_used <= rbtdb->last_used && 1655 purged <= purgesize; 1656 header = ISC_LIST_TAIL(rbtdb->lru[locknum])) 1657 { 1658 size_t header_size = rdataset_size(header); 1659 1660 /* 1661 * Unlink the entry at this point to avoid checking it 1662 * again even if it's currently used someone else and 1663 * cannot be purged at this moment. This entry won't be 1664 * referenced any more (so unlinking is safe) since the 1665 * TTL will be reset to 0. 1666 */ 1667 ISC_LIST_UNLINK(rbtdb->lru[locknum], header, link); 1668 dns__cacherbt_expireheader(header, tlocktypep, 1669 dns_expire_lru DNS__DB_FLARG_PASS); 1670 purged += header_size; 1671 } 1672 1673 return purged; 1674 } 1675 1676 /*% 1677 * Purge some expired and/or stale (i.e. unused for some period) cache entries 1678 * due to an overmem condition. To recover from this condition quickly, 1679 * we clean up entries up to the size of newly added rdata that triggered 1680 * the overmem; this is accessible via newheader. 1681 * 1682 * The LRU lists tails are processed in LRU order to the nearest second. 1683 * 1684 * A write lock on the tree must be held. 1685 */ 1686 void 1687 dns__cacherbt_overmem(dns_rbtdb_t *rbtdb, dns_slabheader_t *newheader, 1688 isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) { 1689 uint32_t locknum_start = rbtdb->lru_sweep++ % rbtdb->node_lock_count; 1690 uint32_t locknum = locknum_start; 1691 /* Size of added data, possible node and possible ENT node. */ 1692 size_t purgesize = 1693 rdataset_size(newheader) + 1694 2 * dns__rbtnode_getsize(RBTDB_HEADERNODE(newheader)); 1695 size_t purged = 0; 1696 isc_stdtime_t min_last_used = 0; 1697 size_t max_passes = 8; 1698 1699 again: 1700 do { 1701 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1702 NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); 1703 1704 purged += expire_lru_headers(rbtdb, locknum, tlocktypep, 1705 purgesize - 1706 purged DNS__DB_FLARG_PASS); 1707 1708 /* 1709 * Work out the oldest remaining last_used values of the list 1710 * tails as we walk across the array of lru lists. 1711 */ 1712 dns_slabheader_t *header = ISC_LIST_TAIL(rbtdb->lru[locknum]); 1713 if (header != NULL && 1714 (min_last_used == 0 || header->last_used < min_last_used)) 1715 { 1716 min_last_used = header->last_used; 1717 } 1718 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); 1719 locknum = (locknum + 1) % rbtdb->node_lock_count; 1720 } while (locknum != locknum_start && purged <= purgesize); 1721 1722 /* 1723 * Update rbtdb->last_used if we have walked all the list tails and have 1724 * not freed the required amount of memory. 1725 */ 1726 if (purged < purgesize) { 1727 if (min_last_used != 0) { 1728 rbtdb->last_used = min_last_used; 1729 if (max_passes-- > 0) { 1730 goto again; 1731 } 1732 } 1733 } 1734 } 1735