1 /* $NetBSD: qpcache.c,v 1.2 2025/01/26 16:25:24 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16 /*! \file */ 17 18 #include <inttypes.h> 19 #include <stdbool.h> 20 #include <sys/mman.h> 21 22 #include <isc/ascii.h> 23 #include <isc/async.h> 24 #include <isc/atomic.h> 25 #include <isc/crc64.h> 26 #include <isc/file.h> 27 #include <isc/hash.h> 28 #include <isc/hashmap.h> 29 #include <isc/heap.h> 30 #include <isc/hex.h> 31 #include <isc/loop.h> 32 #include <isc/mem.h> 33 #include <isc/mutex.h> 34 #include <isc/once.h> 35 #include <isc/queue.h> 36 #include <isc/random.h> 37 #include <isc/refcount.h> 38 #include <isc/result.h> 39 #include <isc/rwlock.h> 40 #include <isc/stdio.h> 41 #include <isc/string.h> 42 #include <isc/time.h> 43 #include <isc/urcu.h> 44 #include <isc/util.h> 45 46 #include <dns/callbacks.h> 47 #include <dns/db.h> 48 #include <dns/dbiterator.h> 49 #include <dns/fixedname.h> 50 #include <dns/log.h> 51 #include <dns/masterdump.h> 52 #include <dns/nsec.h> 53 #include <dns/qp.h> 54 #include <dns/rdata.h> 55 #include <dns/rdataset.h> 56 #include <dns/rdatasetiter.h> 57 #include <dns/rdataslab.h> 58 #include <dns/rdatastruct.h> 59 #include <dns/stats.h> 60 #include <dns/time.h> 61 #include <dns/view.h> 62 #include <dns/zonekey.h> 63 64 #include "db_p.h" 65 #include "qpcache_p.h" 66 67 #define CHECK(op) \ 68 do { \ 69 result = (op); \ 70 if (result != ISC_R_SUCCESS) \ 71 goto failure; \ 72 } while (0) 73 74 #define EXISTS(header) \ 75 ((atomic_load_acquire(&(header)->attributes) & \ 76 DNS_SLABHEADERATTR_NONEXISTENT) == 0) 77 #define NONEXISTENT(header) \ 78 ((atomic_load_acquire(&(header)->attributes) & \ 79 DNS_SLABHEADERATTR_NONEXISTENT) != 0) 80 #define IGNORE(header) \ 81 ((atomic_load_acquire(&(header)->attributes) & \ 82 DNS_SLABHEADERATTR_IGNORE) != 0) 83 #define NXDOMAIN(header) \ 84 ((atomic_load_acquire(&(header)->attributes) & \ 85 DNS_SLABHEADERATTR_NXDOMAIN) != 0) 86 #define STALE(header) \ 87 ((atomic_load_acquire(&(header)->attributes) & \ 88 DNS_SLABHEADERATTR_STALE) != 0) 89 #define STALE_WINDOW(header) \ 90 ((atomic_load_acquire(&(header)->attributes) & \ 91 DNS_SLABHEADERATTR_STALE_WINDOW) != 0) 92 #define OPTOUT(header) \ 93 ((atomic_load_acquire(&(header)->attributes) & \ 94 DNS_SLABHEADERATTR_OPTOUT) != 0) 95 #define NEGATIVE(header) \ 96 ((atomic_load_acquire(&(header)->attributes) & \ 97 DNS_SLABHEADERATTR_NEGATIVE) != 0) 98 #define PREFETCH(header) \ 99 ((atomic_load_acquire(&(header)->attributes) & \ 100 DNS_SLABHEADERATTR_PREFETCH) != 0) 101 #define ZEROTTL(header) \ 102 ((atomic_load_acquire(&(header)->attributes) & \ 103 DNS_SLABHEADERATTR_ZEROTTL) != 0) 104 #define ANCIENT(header) \ 105 ((atomic_load_acquire(&(header)->attributes) & \ 106 DNS_SLABHEADERATTR_ANCIENT) != 0) 107 #define STATCOUNT(header) \ 108 ((atomic_load_acquire(&(header)->attributes) & \ 109 DNS_SLABHEADERATTR_STATCOUNT) != 0) 110 111 #define STALE_TTL(header, qpdb) \ 112 (NXDOMAIN(header) ? 0 : qpdb->common.serve_stale_ttl) 113 114 #define ACTIVE(header, now) \ 115 (((header)->ttl > (now)) || ((header)->ttl == (now) && ZEROTTL(header))) 116 117 #define EXPIREDOK(iterator) \ 118 (((iterator)->common.options & DNS_DB_EXPIREDOK) != 0) 119 120 #define STALEOK(iterator) (((iterator)->common.options & DNS_DB_STALEOK) != 0) 121 122 #define KEEPSTALE(qpdb) ((qpdb)->common.serve_stale_ttl > 0) 123 124 /*% 125 * Note that "impmagic" is not the first four bytes of the struct, so 126 * ISC_MAGIC_VALID cannot be used. 127 */ 128 #define QPDB_MAGIC ISC_MAGIC('Q', 'P', 'D', '4') 129 #define VALID_QPDB(qpdb) \ 130 ((qpdb) != NULL && (qpdb)->common.impmagic == QPDB_MAGIC) 131 132 #define HEADERNODE(h) ((qpcnode_t *)((h)->node)) 133 134 /* 135 * Allow clients with a virtual time of up to 5 minutes in the past to see 136 * records that would have otherwise have expired. 137 */ 138 #define QPDB_VIRTUAL 300 139 140 /*% 141 * Whether to rate-limit updating the LRU to avoid possible thread contention. 142 * Updating LRU requires write locking, so we don't do it every time the 143 * record is touched - only after some time passes. 144 */ 145 #ifndef DNS_QPDB_LIMITLRUUPDATE 146 #define DNS_QPDB_LIMITLRUUPDATE 1 147 #endif 148 149 /*% Time after which we update LRU for glue records, 5 minutes */ 150 #define DNS_QPDB_LRUUPDATE_GLUE 300 151 /*% Time after which we update LRU for all other records, 10 minutes */ 152 #define DNS_QPDB_LRUUPDATE_REGULAR 600 153 154 /* 155 * This defines the number of headers that we try to expire each time the 156 * expire_ttl_headers() is run. The number should be small enough, so the 157 * TTL-based header expiration doesn't take too long, but it should be large 158 * enough, so we expire enough headers if their TTL is clustered. 159 */ 160 #define DNS_QPDB_EXPIRE_TTL_COUNT 10 161 162 /*% 163 * This is the structure that is used for each node in the qp trie of trees. 164 */ 165 typedef struct qpcnode qpcnode_t; 166 struct qpcnode { 167 dns_name_t name; 168 isc_mem_t *mctx; 169 170 uint8_t : 0; 171 unsigned int delegating : 1; 172 unsigned int nsec : 2; /*%< range is 0..3 */ 173 uint8_t : 0; 174 175 isc_refcount_t references; 176 isc_refcount_t erefs; 177 uint16_t locknum; 178 void *data; 179 180 /*% 181 * NOTE: The 'dirty' flag is protected by the node lock, so 182 * this bitfield has to be separated from the one above. 183 * We don't want it to share the same qword with bits 184 * that can be accessed without the node lock. 185 */ 186 uint8_t : 0; 187 uint8_t dirty : 1; 188 uint8_t : 0; 189 190 /*% 191 * Used for dead nodes cleaning. This linked list is used to mark nodes 192 * which have no data any longer, but we cannot unlink at that exact 193 * moment because we did not or could not obtain a write lock on the 194 * tree. 195 */ 196 isc_queue_node_t deadlink; 197 }; 198 199 typedef struct qpcache qpcache_t; 200 struct qpcache { 201 /* Unlocked. */ 202 dns_db_t common; 203 /* Loopmgr */ 204 isc_loopmgr_t *loopmgr; 205 /* Locks the data in this struct */ 206 isc_rwlock_t lock; 207 /* Locks the tree structure (prevents nodes appearing/disappearing) */ 208 isc_rwlock_t tree_lock; 209 /* Locks for individual tree nodes */ 210 unsigned int node_lock_count; 211 db_nodelock_t *node_locks; 212 qpcnode_t *origin_node; 213 dns_stats_t *rrsetstats; /* cache DB only */ 214 isc_stats_t *cachestats; /* cache DB only */ 215 isc_stats_t *gluecachestats; /* zone DB only */ 216 /* Locked by lock. */ 217 unsigned int active; 218 219 uint32_t maxrrperset; /* Maximum RRs per RRset */ 220 uint32_t maxtypepername; /* Maximum number of RR types per owner */ 221 222 /* 223 * The time after a failed lookup, where stale answers from cache 224 * may be used directly in a DNS response without attempting a 225 * new iterative lookup. 226 */ 227 uint32_t serve_stale_refresh; 228 229 /* 230 * This is an array of linked lists used to implement the LRU cache. 231 * There will be node_lock_count linked lists here. Nodes in bucket 1 232 * will be placed on the linked list lru[1]. 233 */ 234 dns_slabheaderlist_t *lru; 235 236 /* 237 * Start point % node_lock_count for next LRU cleanup. 238 */ 239 atomic_uint lru_sweep; 240 241 /* 242 * When performing LRU cleaning limit cleaning to headers that were 243 * last used at or before this. 244 */ 245 _Atomic(isc_stdtime_t) last_used; 246 247 /*% 248 * Temporary storage for stale cache nodes and dynamically deleted 249 * nodes that await being cleaned up. 250 */ 251 isc_queue_t *deadnodes; 252 253 /* 254 * Heaps. These are used for TTL based expiry in a cache, 255 * or for zone resigning in a zone DB. hmctx is the memory 256 * context to use for the heap (which differs from the main 257 * database memory context in the case of a cache). 258 */ 259 isc_mem_t *hmctx; 260 isc_heap_t **heaps; 261 262 /* Locked by tree_lock. */ 263 dns_qp_t *tree; 264 dns_qp_t *nsec; 265 }; 266 267 /*% 268 * Search Context 269 */ 270 typedef struct { 271 qpcache_t *qpdb; 272 unsigned int options; 273 dns_qpchain_t chain; 274 dns_qpiter_t iter; 275 bool need_cleanup; 276 qpcnode_t *zonecut; 277 dns_slabheader_t *zonecut_header; 278 dns_slabheader_t *zonecut_sigheader; 279 isc_stdtime_t now; 280 } qpc_search_t; 281 282 #ifdef DNS_DB_NODETRACE 283 #define qpcnode_ref(ptr) qpcnode__ref(ptr, __func__, __FILE__, __LINE__) 284 #define qpcnode_unref(ptr) qpcnode__unref(ptr, __func__, __FILE__, __LINE__) 285 #define qpcnode_attach(ptr, ptrp) \ 286 qpcnode__attach(ptr, ptrp, __func__, __FILE__, __LINE__) 287 #define qpcnode_detach(ptrp) qpcnode__detach(ptrp, __func__, __FILE__, __LINE__) 288 ISC_REFCOUNT_STATIC_TRACE_DECL(qpcnode); 289 #else 290 ISC_REFCOUNT_STATIC_DECL(qpcnode); 291 #endif 292 293 /* QP methods */ 294 static void 295 qp_attach(void *uctx, void *pval, uint32_t ival); 296 static void 297 qp_detach(void *uctx, void *pval, uint32_t ival); 298 static size_t 299 qp_makekey(dns_qpkey_t key, void *uctx, void *pval, uint32_t ival); 300 static void 301 qp_triename(void *uctx, char *buf, size_t size); 302 303 static dns_qpmethods_t qpmethods = { 304 qp_attach, 305 qp_detach, 306 qp_makekey, 307 qp_triename, 308 }; 309 310 static void 311 qp_attach(void *uctx ISC_ATTR_UNUSED, void *pval, 312 uint32_t ival ISC_ATTR_UNUSED) { 313 qpcnode_t *data = pval; 314 qpcnode_ref(data); 315 } 316 317 static void 318 qp_detach(void *uctx ISC_ATTR_UNUSED, void *pval, 319 uint32_t ival ISC_ATTR_UNUSED) { 320 qpcnode_t *data = pval; 321 qpcnode_detach(&data); 322 } 323 324 static size_t 325 qp_makekey(dns_qpkey_t key, void *uctx ISC_ATTR_UNUSED, void *pval, 326 uint32_t ival ISC_ATTR_UNUSED) { 327 qpcnode_t *data = pval; 328 return dns_qpkey_fromname(key, &data->name); 329 } 330 331 static void 332 qp_triename(void *uctx, char *buf, size_t size) { 333 UNUSED(uctx); 334 snprintf(buf, size, "qpdb-lite"); 335 } 336 337 static void 338 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG); 339 static isc_result_t 340 rdatasetiter_first(dns_rdatasetiter_t *iterator DNS__DB_FLARG); 341 static isc_result_t 342 rdatasetiter_next(dns_rdatasetiter_t *iterator DNS__DB_FLARG); 343 static void 344 rdatasetiter_current(dns_rdatasetiter_t *iterator, 345 dns_rdataset_t *rdataset DNS__DB_FLARG); 346 347 static dns_rdatasetitermethods_t rdatasetiter_methods = { 348 rdatasetiter_destroy, rdatasetiter_first, rdatasetiter_next, 349 rdatasetiter_current 350 }; 351 352 typedef struct qpc_rditer { 353 dns_rdatasetiter_t common; 354 dns_slabheader_t *current; 355 } qpc_rditer_t; 356 357 static void 358 dbiterator_destroy(dns_dbiterator_t **iteratorp DNS__DB_FLARG); 359 static isc_result_t 360 dbiterator_first(dns_dbiterator_t *iterator DNS__DB_FLARG); 361 static isc_result_t 362 dbiterator_last(dns_dbiterator_t *iterator DNS__DB_FLARG); 363 static isc_result_t 364 dbiterator_seek(dns_dbiterator_t *iterator, 365 const dns_name_t *name DNS__DB_FLARG); 366 static isc_result_t 367 dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG); 368 static isc_result_t 369 dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG); 370 static isc_result_t 371 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, 372 dns_name_t *name DNS__DB_FLARG); 373 static isc_result_t 374 dbiterator_pause(dns_dbiterator_t *iterator); 375 static isc_result_t 376 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name); 377 378 static dns_dbiteratormethods_t dbiterator_methods = { 379 dbiterator_destroy, dbiterator_first, dbiterator_last, 380 dbiterator_seek, dbiterator_prev, dbiterator_next, 381 dbiterator_current, dbiterator_pause, dbiterator_origin 382 }; 383 384 /* 385 * Note that the QP cache database only needs a single QP iterator, because 386 * unlike the QP zone database, NSEC3 records are cached in the main tree. 387 * 388 * If we ever implement synth-from-dnssec using NSEC3 records, we'll need 389 * to have a separate tree for NSEC3 records, and to copy in the more complex 390 * iterator implementation from qpzone.c. 391 */ 392 typedef struct qpc_dbit { 393 dns_dbiterator_t common; 394 bool paused; 395 isc_rwlocktype_t tree_locked; 396 isc_result_t result; 397 dns_fixedname_t fixed; 398 dns_name_t *name; 399 dns_qpiter_t iter; 400 qpcnode_t *node; 401 } qpc_dbit_t; 402 403 static void 404 free_qpdb(qpcache_t *qpdb, bool log); 405 406 static dns_dbmethods_t qpdb_cachemethods; 407 408 /*% 409 * 'init_count' is used to initialize 'newheader->count' which in turn 410 * is used to determine where in the cycle rrset-order cyclic starts. 411 * We don't lock this as we don't care about simultaneous updates. 412 */ 413 static atomic_uint_fast16_t init_count = 0; 414 415 /* 416 * Locking 417 * 418 * If a routine is going to lock more than one lock in this module, then 419 * the locking must be done in the following order: 420 * 421 * Tree Lock 422 * 423 * Node Lock (Only one from the set may be locked at one time by 424 * any caller) 425 * 426 * Database Lock 427 * 428 * Failure to follow this hierarchy can result in deadlock. 429 */ 430 431 /*% 432 * Routines for LRU-based cache management. 433 */ 434 435 /*% 436 * See if a given cache entry that is being reused needs to be updated 437 * in the LRU-list. From the LRU management point of view, this function is 438 * expected to return true for almost all cases. When used with threads, 439 * however, this may cause a non-negligible performance penalty because a 440 * writer lock will have to be acquired before updating the list. 441 * If DNS_QPDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this 442 * function returns true if the entry has not been updated for some period of 443 * time. We differentiate the NS or glue address case and the others since 444 * experiments have shown that the former tends to be accessed relatively 445 * infrequently and the cost of cache miss is higher (e.g., a missing NS records 446 * may cause external queries at a higher level zone, involving more 447 * transactions). 448 * 449 * Caller must hold the node (read or write) lock. 450 */ 451 static bool 452 need_headerupdate(dns_slabheader_t *header, isc_stdtime_t now) { 453 if (DNS_SLABHEADER_GETATTR(header, (DNS_SLABHEADERATTR_NONEXISTENT | 454 DNS_SLABHEADERATTR_ANCIENT | 455 DNS_SLABHEADERATTR_ZEROTTL)) != 0) 456 { 457 return false; 458 } 459 460 #if DNS_QPDB_LIMITLRUUPDATE 461 if (header->type == dns_rdatatype_ns || 462 (header->trust == dns_trust_glue && 463 (header->type == dns_rdatatype_a || 464 header->type == dns_rdatatype_aaaa))) 465 { 466 /* 467 * Glue records are updated if at least DNS_QPDB_LRUUPDATE_GLUE 468 * seconds have passed since the previous update time. 469 */ 470 return header->last_used + DNS_QPDB_LRUUPDATE_GLUE <= now; 471 } 472 473 /* 474 * Other records are updated if DNS_QPDB_LRUUPDATE_REGULAR seconds 475 * have passed. 476 */ 477 return header->last_used + DNS_QPDB_LRUUPDATE_REGULAR <= now; 478 #else 479 UNUSED(now); 480 481 return true; 482 #endif /* if DNS_QPDB_LIMITLRUUPDATE */ 483 } 484 485 /*% 486 * Update the timestamp of a given cache entry and move it to the head 487 * of the corresponding LRU list. 488 * 489 * Caller must hold the node (write) lock. 490 * 491 * Note that the we do NOT touch the heap here, as the TTL has not changed. 492 */ 493 static void 494 update_header(qpcache_t *qpdb, dns_slabheader_t *header, isc_stdtime_t now) { 495 /* To be checked: can we really assume this? XXXMLG */ 496 INSIST(ISC_LINK_LINKED(header, link)); 497 498 ISC_LIST_UNLINK(qpdb->lru[HEADERNODE(header)->locknum], header, link); 499 header->last_used = now; 500 ISC_LIST_PREPEND(qpdb->lru[HEADERNODE(header)->locknum], header, link); 501 } 502 503 /* 504 * Locking: 505 * If a routine is going to lock more than one lock in this module, then 506 * the locking must be done in the following order: 507 * 508 * Tree Lock 509 * 510 * Node Lock (Only one from the set may be locked at one time by 511 * any caller) 512 * 513 * Database Lock 514 * 515 * Failure to follow this hierarchy can result in deadlock. 516 * 517 * Deleting Nodes: 518 * For zone databases the node for the origin of the zone MUST NOT be deleted. 519 */ 520 521 /* 522 * DB Routines 523 */ 524 525 static void 526 clean_stale_headers(dns_slabheader_t *top) { 527 dns_slabheader_t *d = NULL, *down_next = NULL; 528 529 for (d = top->down; d != NULL; d = down_next) { 530 down_next = d->down; 531 dns_slabheader_destroy(&d); 532 } 533 top->down = NULL; 534 } 535 536 static void 537 clean_cache_node(qpcache_t *qpdb, qpcnode_t *node) { 538 dns_slabheader_t *current = NULL, *top_prev = NULL, *top_next = NULL; 539 540 /* 541 * Caller must be holding the node lock. 542 */ 543 544 for (current = node->data; current != NULL; current = top_next) { 545 top_next = current->next; 546 clean_stale_headers(current); 547 /* 548 * If current is nonexistent, ancient, or stale and 549 * we are not keeping stale, we can clean it up. 550 */ 551 if (NONEXISTENT(current) || ANCIENT(current) || 552 (STALE(current) && !KEEPSTALE(qpdb))) 553 { 554 if (top_prev != NULL) { 555 top_prev->next = current->next; 556 } else { 557 node->data = current->next; 558 } 559 dns_slabheader_destroy(¤t); 560 } else { 561 top_prev = current; 562 } 563 } 564 node->dirty = 0; 565 } 566 567 /* 568 * tree_lock(write) must be held. 569 */ 570 static void 571 delete_node(qpcache_t *qpdb, qpcnode_t *node) { 572 isc_result_t result = ISC_R_UNEXPECTED; 573 574 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) { 575 char printname[DNS_NAME_FORMATSIZE]; 576 dns_name_format(&node->name, printname, sizeof(printname)); 577 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 578 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 579 "delete_node(): %p %s (bucket %d)", node, 580 printname, node->locknum); 581 } 582 583 switch (node->nsec) { 584 case DNS_DB_NSEC_HAS_NSEC: 585 /* 586 * Delete the corresponding node from the auxiliary NSEC 587 * tree before deleting from the main tree. 588 */ 589 result = dns_qp_deletename(qpdb->nsec, &node->name, NULL, NULL); 590 if (result != ISC_R_SUCCESS) { 591 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 592 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 593 "delete_node(): " 594 "dns_qp_deletename: %s", 595 isc_result_totext(result)); 596 } 597 /* FALLTHROUGH */ 598 case DNS_DB_NSEC_NORMAL: 599 result = dns_qp_deletename(qpdb->tree, &node->name, NULL, NULL); 600 break; 601 case DNS_DB_NSEC_NSEC: 602 result = dns_qp_deletename(qpdb->nsec, &node->name, NULL, NULL); 603 break; 604 } 605 if (result != ISC_R_SUCCESS) { 606 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 607 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 608 "delete_node(): " 609 "dns_qp_deletename: %s", 610 isc_result_totext(result)); 611 } 612 } 613 614 /* 615 * The caller must specify its currect node and tree lock status. 616 * It's okay for neither lock to be held if there are existing external 617 * references to the node, but if this is the first external reference, 618 * then the caller must be holding at least one lock. 619 */ 620 static void 621 newref(qpcache_t *qpdb, qpcnode_t *node, isc_rwlocktype_t nlocktype, 622 isc_rwlocktype_t tlocktype DNS__DB_FLARG) { 623 uint_fast32_t refs; 624 625 qpcnode_ref(node); 626 refs = isc_refcount_increment0(&node->erefs); 627 628 #if DNS_DB_NODETRACE 629 fprintf(stderr, "incr:node:%s:%s:%u:%p->erefs = %" PRIuFAST32 "\n", 630 func, file, line, node, refs + 1); 631 #endif 632 633 if (refs == 0) { 634 /* 635 * this is the first external reference to the node. 636 * 637 * we need to hold the node or tree lock to avoid 638 * incrementing the reference count while also deleting 639 * the node. delete_node() is always protected by both 640 * tree and node locks being write-locked. 641 */ 642 INSIST(nlocktype != isc_rwlocktype_none || 643 tlocktype != isc_rwlocktype_none); 644 645 refs = isc_refcount_increment0( 646 &qpdb->node_locks[node->locknum].references); 647 #if DNS_DB_NODETRACE 648 fprintf(stderr, 649 "incr:nodelock:%s:%s:%u:%p:%p->references = " 650 "%" PRIuFAST32 "\n", 651 func, file, line, node, 652 &qpdb->node_locks[node->locknum], refs + 1); 653 #else 654 UNUSED(refs); 655 #endif 656 } 657 } 658 659 static void 660 cleanup_deadnodes(void *arg); 661 662 /* 663 * Caller must be holding the node lock; either the read or write lock. 664 * Note that the lock must be held even when node references are 665 * atomically modified; in that case the decrement operation itself does not 666 * have to be protected, but we must avoid a race condition where multiple 667 * threads are decreasing the reference to zero simultaneously and at least 668 * one of them is going to free the node. 669 * 670 * This decrements both the internal and external node reference counters. 671 * If the external reference count drops to zero, then the node lock 672 * reference count is also decremented. 673 * 674 * This function returns true if and only if the node reference decreases 675 * to zero. (NOTE: Decrementing the reference count of a node to zero does 676 * not mean it will be immediately freed.) 677 */ 678 static bool 679 decref(qpcache_t *qpdb, qpcnode_t *node, isc_rwlocktype_t *nlocktypep, 680 isc_rwlocktype_t *tlocktypep, bool tryupgrade DNS__DB_FLARG) { 681 isc_result_t result; 682 bool locked = *tlocktypep != isc_rwlocktype_none; 683 bool write_locked = false; 684 db_nodelock_t *nodelock = NULL; 685 int bucket = node->locknum; 686 uint_fast32_t refs; 687 688 REQUIRE(*nlocktypep != isc_rwlocktype_none); 689 690 nodelock = &qpdb->node_locks[bucket]; 691 692 #define KEEP_NODE(n, r) ((n)->data != NULL || (n) == (r)->origin_node) 693 694 /* Handle easy and typical case first. */ 695 if (!node->dirty && KEEP_NODE(node, qpdb)) { 696 bool no_reference = false; 697 698 refs = isc_refcount_decrement(&node->erefs); 699 #if DNS_DB_NODETRACE 700 fprintf(stderr, 701 "decr:node:%s:%s:%u:%p->erefs = %" PRIuFAST32 "\n", 702 func, file, line, node, refs - 1); 703 #else 704 UNUSED(refs); 705 #endif 706 if (refs == 1) { 707 refs = isc_refcount_decrement(&nodelock->references); 708 #if DNS_DB_NODETRACE 709 fprintf(stderr, 710 "decr:nodelock:%s:%s:%u:%p:%p->references = " 711 "%" PRIuFAST32 "\n", 712 func, file, line, node, nodelock, refs - 1); 713 #else 714 UNUSED(refs); 715 #endif 716 no_reference = true; 717 } 718 719 qpcnode_unref(node); 720 return no_reference; 721 } 722 723 /* Upgrade the lock? */ 724 if (*nlocktypep == isc_rwlocktype_read) { 725 NODE_FORCEUPGRADE(&nodelock->lock, nlocktypep); 726 } 727 728 refs = isc_refcount_decrement(&node->erefs); 729 #if DNS_DB_NODETRACE 730 fprintf(stderr, "decr:node:%s:%s:%u:%p->erefs = %" PRIuFAST32 "\n", 731 func, file, line, node, refs - 1); 732 #endif 733 734 if (refs > 1) { 735 qpcnode_unref(node); 736 return false; 737 } 738 739 INSIST(refs == 1); 740 741 if (node->dirty) { 742 clean_cache_node(qpdb, node); 743 } 744 745 /* 746 * Attempt to switch to a write lock on the tree. If this fails, 747 * we will add this node to a linked list of nodes in this locking 748 * bucket which we will free later. 749 * 750 * Locking hierarchy notwithstanding, we don't need to free 751 * the node lock before acquiring the tree write lock because 752 * we only do a trylock. 753 */ 754 /* We are allowed to upgrade the tree lock */ 755 756 switch (*tlocktypep) { 757 case isc_rwlocktype_write: 758 result = ISC_R_SUCCESS; 759 break; 760 case isc_rwlocktype_read: 761 if (tryupgrade) { 762 result = TREE_TRYUPGRADE(&qpdb->tree_lock, tlocktypep); 763 } else { 764 result = ISC_R_LOCKBUSY; 765 } 766 break; 767 case isc_rwlocktype_none: 768 result = TREE_TRYWRLOCK(&qpdb->tree_lock, tlocktypep); 769 break; 770 default: 771 UNREACHABLE(); 772 } 773 RUNTIME_CHECK(result == ISC_R_SUCCESS || result == ISC_R_LOCKBUSY); 774 if (result == ISC_R_SUCCESS) { 775 write_locked = true; 776 } 777 778 refs = isc_refcount_decrement(&nodelock->references); 779 #if DNS_DB_NODETRACE 780 fprintf(stderr, 781 "decr:nodelock:%s:%s:%u:%p:%p->references = %" PRIuFAST32 "\n", 782 func, file, line, node, nodelock, refs - 1); 783 #else 784 UNUSED(refs); 785 #endif 786 787 if (KEEP_NODE(node, qpdb)) { 788 goto restore_locks; 789 } 790 791 #undef KEEP_NODE 792 793 if (write_locked) { 794 /* 795 * We can now delete the node. 796 */ 797 delete_node(qpdb, node); 798 } else { 799 newref(qpdb, node, *nlocktypep, *tlocktypep DNS__DB_FLARG_PASS); 800 801 isc_queue_node_init(&node->deadlink); 802 if (!isc_queue_enqueue_entry(&qpdb->deadnodes[bucket], node, 803 deadlink)) 804 { 805 /* Queue was empty, trigger new cleaning */ 806 isc_loop_t *loop = isc_loop_get(qpdb->loopmgr, bucket); 807 808 isc_async_run(loop, cleanup_deadnodes, qpdb); 809 } 810 } 811 812 restore_locks: 813 /* 814 * Relock a read lock, or unlock the write lock if no lock was held. 815 */ 816 if (!locked && write_locked) { 817 TREE_UNLOCK(&qpdb->tree_lock, tlocktypep); 818 } 819 820 qpcnode_unref(node); 821 return true; 822 } 823 824 static void 825 update_rrsetstats(dns_stats_t *stats, const dns_typepair_t htype, 826 const uint_least16_t hattributes, const bool increment) { 827 dns_rdatastatstype_t statattributes = 0; 828 dns_rdatastatstype_t base = 0; 829 dns_rdatastatstype_t type; 830 dns_slabheader_t *header = &(dns_slabheader_t){ 831 .type = htype, 832 .attributes = hattributes, 833 }; 834 835 if (!EXISTS(header) || !STATCOUNT(header)) { 836 return; 837 } 838 839 if (NEGATIVE(header)) { 840 if (NXDOMAIN(header)) { 841 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN; 842 } else { 843 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET; 844 base = DNS_TYPEPAIR_COVERS(header->type); 845 } 846 } else { 847 base = DNS_TYPEPAIR_TYPE(header->type); 848 } 849 850 if (STALE(header)) { 851 statattributes |= DNS_RDATASTATSTYPE_ATTR_STALE; 852 } 853 if (ANCIENT(header)) { 854 statattributes |= DNS_RDATASTATSTYPE_ATTR_ANCIENT; 855 } 856 857 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes); 858 if (increment) { 859 dns_rdatasetstats_increment(stats, type); 860 } else { 861 dns_rdatasetstats_decrement(stats, type); 862 } 863 } 864 865 static void 866 mark(dns_slabheader_t *header, uint_least16_t flag) { 867 uint_least16_t attributes = atomic_load_acquire(&header->attributes); 868 uint_least16_t newattributes = 0; 869 dns_stats_t *stats = NULL; 870 871 /* 872 * If we are already ancient there is nothing to do. 873 */ 874 do { 875 if ((attributes & flag) != 0) { 876 return; 877 } 878 newattributes = attributes | flag; 879 } while (!atomic_compare_exchange_weak_acq_rel( 880 &header->attributes, &attributes, newattributes)); 881 882 /* 883 * Decrement and increment the stats counter for the appropriate 884 * RRtype. 885 */ 886 stats = dns_db_getrrsetstats(header->db); 887 if (stats != NULL) { 888 update_rrsetstats(stats, header->type, attributes, false); 889 update_rrsetstats(stats, header->type, newattributes, true); 890 } 891 } 892 893 static void 894 setttl(dns_slabheader_t *header, dns_ttl_t newttl) { 895 dns_ttl_t oldttl = header->ttl; 896 897 header->ttl = newttl; 898 899 if (header->db == NULL || !dns_db_iscache(header->db)) { 900 return; 901 } 902 903 /* 904 * This is a cache. Adjust the heaps if necessary. 905 */ 906 if (header->heap == NULL || header->heap_index == 0 || newttl == oldttl) 907 { 908 return; 909 } 910 911 if (newttl < oldttl) { 912 isc_heap_increased(header->heap, header->heap_index); 913 } else { 914 isc_heap_decreased(header->heap, header->heap_index); 915 } 916 917 if (newttl == 0) { 918 isc_heap_delete(header->heap, header->heap_index); 919 } 920 } 921 922 /* 923 * Caller must hold the node (write) lock. 924 */ 925 static void 926 expireheader(dns_slabheader_t *header, isc_rwlocktype_t *nlocktypep, 927 isc_rwlocktype_t *tlocktypep, dns_expire_t reason DNS__DB_FLARG) { 928 setttl(header, 0); 929 mark(header, DNS_SLABHEADERATTR_ANCIENT); 930 HEADERNODE(header)->dirty = 1; 931 932 if (isc_refcount_current(&HEADERNODE(header)->erefs) == 0) { 933 qpcache_t *qpdb = (qpcache_t *)header->db; 934 935 /* 936 * If no one else is using the node, we can clean it up now. 937 * We first need to gain a new reference to the node to meet a 938 * requirement of decref(). 939 */ 940 newref(qpdb, HEADERNODE(header), *nlocktypep, 941 *tlocktypep DNS__DB_FLARG_PASS); 942 decref(qpdb, HEADERNODE(header), nlocktypep, tlocktypep, 943 true DNS__DB_FLARG_PASS); 944 945 if (qpdb->cachestats == NULL) { 946 return; 947 } 948 949 switch (reason) { 950 case dns_expire_ttl: 951 isc_stats_increment(qpdb->cachestats, 952 dns_cachestatscounter_deletettl); 953 break; 954 case dns_expire_lru: 955 isc_stats_increment(qpdb->cachestats, 956 dns_cachestatscounter_deletelru); 957 break; 958 default: 959 break; 960 } 961 } 962 } 963 964 static void 965 update_cachestats(qpcache_t *qpdb, isc_result_t result) { 966 if (qpdb->cachestats == NULL) { 967 return; 968 } 969 970 switch (result) { 971 case DNS_R_COVERINGNSEC: 972 isc_stats_increment(qpdb->cachestats, 973 dns_cachestatscounter_coveringnsec); 974 FALLTHROUGH; 975 case ISC_R_SUCCESS: 976 case DNS_R_CNAME: 977 case DNS_R_DNAME: 978 case DNS_R_DELEGATION: 979 case DNS_R_NCACHENXDOMAIN: 980 case DNS_R_NCACHENXRRSET: 981 isc_stats_increment(qpdb->cachestats, 982 dns_cachestatscounter_hits); 983 break; 984 default: 985 isc_stats_increment(qpdb->cachestats, 986 dns_cachestatscounter_misses); 987 } 988 } 989 990 static void 991 bindrdataset(qpcache_t *qpdb, qpcnode_t *node, dns_slabheader_t *header, 992 isc_stdtime_t now, isc_rwlocktype_t nlocktype, 993 isc_rwlocktype_t tlocktype, 994 dns_rdataset_t *rdataset DNS__DB_FLARG) { 995 bool stale = STALE(header); 996 bool ancient = ANCIENT(header); 997 998 /* 999 * Caller must be holding the node reader lock. 1000 * XXXJT: technically, we need a writer lock, since we'll increment 1001 * the header count below. However, since the actual counter value 1002 * doesn't matter, we prioritize performance here. (We may want to 1003 * use atomic increment when available). 1004 */ 1005 1006 if (rdataset == NULL) { 1007 return; 1008 } 1009 1010 newref(qpdb, node, nlocktype, tlocktype DNS__DB_FLARG_PASS); 1011 1012 INSIST(rdataset->methods == NULL); /* We must be disassociated. */ 1013 1014 /* 1015 * Mark header stale or ancient if the RRset is no longer active. 1016 */ 1017 if (!ACTIVE(header, now)) { 1018 dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, qpdb); 1019 /* 1020 * If this data is in the stale window keep it and if 1021 * DNS_DBFIND_STALEOK is not set we tell the caller to 1022 * skip this record. We skip the records with ZEROTTL 1023 * (these records should not be cached anyway). 1024 */ 1025 1026 if (KEEPSTALE(qpdb) && stale_ttl > now) { 1027 stale = true; 1028 } else { 1029 /* 1030 * We are not keeping stale, or it is outside the 1031 * stale window. Mark ancient, i.e. ready for cleanup. 1032 */ 1033 ancient = true; 1034 } 1035 } 1036 1037 rdataset->methods = &dns_rdataslab_rdatasetmethods; 1038 rdataset->rdclass = qpdb->common.rdclass; 1039 rdataset->type = DNS_TYPEPAIR_TYPE(header->type); 1040 rdataset->covers = DNS_TYPEPAIR_COVERS(header->type); 1041 rdataset->ttl = header->ttl - now; 1042 rdataset->trust = header->trust; 1043 rdataset->resign = 0; 1044 1045 if (NEGATIVE(header)) { 1046 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE; 1047 } 1048 if (NXDOMAIN(header)) { 1049 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN; 1050 } 1051 if (OPTOUT(header)) { 1052 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT; 1053 } 1054 if (PREFETCH(header)) { 1055 rdataset->attributes |= DNS_RDATASETATTR_PREFETCH; 1056 } 1057 1058 if (stale && !ancient) { 1059 dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, qpdb); 1060 if (stale_ttl > now) { 1061 rdataset->ttl = stale_ttl - now; 1062 } else { 1063 rdataset->ttl = 0; 1064 } 1065 if (STALE_WINDOW(header)) { 1066 rdataset->attributes |= DNS_RDATASETATTR_STALE_WINDOW; 1067 } 1068 rdataset->attributes |= DNS_RDATASETATTR_STALE; 1069 } else if (!ACTIVE(header, now)) { 1070 rdataset->attributes |= DNS_RDATASETATTR_ANCIENT; 1071 rdataset->ttl = header->ttl; 1072 } 1073 1074 rdataset->count = atomic_fetch_add_relaxed(&header->count, 1); 1075 1076 rdataset->slab.db = (dns_db_t *)qpdb; 1077 rdataset->slab.node = (dns_dbnode_t *)node; 1078 rdataset->slab.raw = dns_slabheader_raw(header); 1079 rdataset->slab.iter_pos = NULL; 1080 rdataset->slab.iter_count = 0; 1081 1082 /* 1083 * Add noqname proof. 1084 */ 1085 rdataset->slab.noqname = header->noqname; 1086 if (header->noqname != NULL) { 1087 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME; 1088 } 1089 rdataset->slab.closest = header->closest; 1090 if (header->closest != NULL) { 1091 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST; 1092 } 1093 } 1094 1095 static isc_result_t 1096 setup_delegation(qpc_search_t *search, dns_dbnode_t **nodep, 1097 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset, 1098 isc_rwlocktype_t tlocktype DNS__DB_FLARG) { 1099 dns_typepair_t type; 1100 qpcnode_t *node = NULL; 1101 1102 REQUIRE(search != NULL); 1103 REQUIRE(search->zonecut != NULL); 1104 REQUIRE(search->zonecut_header != NULL); 1105 1106 /* 1107 * The caller MUST NOT be holding any node locks. 1108 */ 1109 1110 node = search->zonecut; 1111 type = search->zonecut_header->type; 1112 1113 if (nodep != NULL) { 1114 /* 1115 * Note that we don't have to increment the node's reference 1116 * count here because we're going to use the reference we 1117 * already have in the search block. 1118 */ 1119 *nodep = node; 1120 search->need_cleanup = false; 1121 } 1122 if (rdataset != NULL) { 1123 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1124 NODE_RDLOCK(&(search->qpdb->node_locks[node->locknum].lock), 1125 &nlocktype); 1126 bindrdataset(search->qpdb, node, search->zonecut_header, 1127 search->now, nlocktype, tlocktype, 1128 rdataset DNS__DB_FLARG_PASS); 1129 if (sigrdataset != NULL && search->zonecut_sigheader != NULL) { 1130 bindrdataset(search->qpdb, node, 1131 search->zonecut_sigheader, search->now, 1132 nlocktype, tlocktype, 1133 sigrdataset DNS__DB_FLARG_PASS); 1134 } 1135 NODE_UNLOCK(&(search->qpdb->node_locks[node->locknum].lock), 1136 &nlocktype); 1137 } 1138 1139 if (type == dns_rdatatype_dname) { 1140 return DNS_R_DNAME; 1141 } 1142 return DNS_R_DELEGATION; 1143 } 1144 1145 static bool 1146 check_stale_header(qpcnode_t *node, dns_slabheader_t *header, 1147 isc_rwlocktype_t *nlocktypep, isc_rwlock_t *lock, 1148 qpc_search_t *search, dns_slabheader_t **header_prev) { 1149 if (!ACTIVE(header, search->now)) { 1150 dns_ttl_t stale = header->ttl + STALE_TTL(header, search->qpdb); 1151 /* 1152 * If this data is in the stale window keep it and if 1153 * DNS_DBFIND_STALEOK is not set we tell the caller to 1154 * skip this record. We skip the records with ZEROTTL 1155 * (these records should not be cached anyway). 1156 */ 1157 1158 DNS_SLABHEADER_CLRATTR(header, DNS_SLABHEADERATTR_STALE_WINDOW); 1159 if (!ZEROTTL(header) && KEEPSTALE(search->qpdb) && 1160 stale > search->now) 1161 { 1162 mark(header, DNS_SLABHEADERATTR_STALE); 1163 *header_prev = header; 1164 /* 1165 * If DNS_DBFIND_STALESTART is set then it means we 1166 * failed to resolve the name during recursion, in 1167 * this case we mark the time in which the refresh 1168 * failed. 1169 */ 1170 if ((search->options & DNS_DBFIND_STALESTART) != 0) { 1171 atomic_store_release( 1172 &header->last_refresh_fail_ts, 1173 search->now); 1174 } else if ((search->options & 1175 DNS_DBFIND_STALEENABLED) != 0 && 1176 search->now < 1177 (atomic_load_acquire( 1178 &header->last_refresh_fail_ts) + 1179 search->qpdb->serve_stale_refresh)) 1180 { 1181 /* 1182 * If we are within interval between last 1183 * refresh failure time + 'stale-refresh-time', 1184 * then don't skip this stale entry but use it 1185 * instead. 1186 */ 1187 DNS_SLABHEADER_SETATTR( 1188 header, 1189 DNS_SLABHEADERATTR_STALE_WINDOW); 1190 return false; 1191 } else if ((search->options & 1192 DNS_DBFIND_STALETIMEOUT) != 0) 1193 { 1194 /* 1195 * We want stale RRset due to timeout, so we 1196 * don't skip it. 1197 */ 1198 return false; 1199 } 1200 return (search->options & DNS_DBFIND_STALEOK) == 0; 1201 } 1202 1203 /* 1204 * This rdataset is stale. If no one else is using the 1205 * node, we can clean it up right now, otherwise we mark 1206 * it as ancient, and the node as dirty, so it will get 1207 * cleaned up later. 1208 */ 1209 if ((header->ttl < search->now - QPDB_VIRTUAL) && 1210 (*nlocktypep == isc_rwlocktype_write || 1211 NODE_TRYUPGRADE(lock, nlocktypep) == ISC_R_SUCCESS)) 1212 { 1213 /* 1214 * We update the node's status only when we can 1215 * get write access; otherwise, we leave others 1216 * to this work. Periodical cleaning will 1217 * eventually take the job as the last resort. 1218 * We won't downgrade the lock, since other 1219 * rdatasets are probably stale, too. 1220 */ 1221 1222 if (isc_refcount_current(&node->references) == 0) { 1223 /* 1224 * header->down can be non-NULL if the 1225 * refcount has just decremented to 0 1226 * but decref() has not 1227 * performed clean_cache_node(), in 1228 * which case we need to purge the stale 1229 * headers first. 1230 */ 1231 clean_stale_headers(header); 1232 if (*header_prev != NULL) { 1233 (*header_prev)->next = header->next; 1234 } else { 1235 node->data = header->next; 1236 } 1237 dns_slabheader_destroy(&header); 1238 } else { 1239 mark(header, DNS_SLABHEADERATTR_ANCIENT); 1240 HEADERNODE(header)->dirty = 1; 1241 *header_prev = header; 1242 } 1243 } else { 1244 *header_prev = header; 1245 } 1246 return true; 1247 } 1248 return false; 1249 } 1250 1251 static isc_result_t 1252 check_zonecut(qpcnode_t *node, void *arg DNS__DB_FLARG) { 1253 qpc_search_t *search = arg; 1254 dns_slabheader_t *header = NULL; 1255 dns_slabheader_t *header_prev = NULL, *header_next = NULL; 1256 dns_slabheader_t *dname_header = NULL, *sigdname_header = NULL; 1257 isc_result_t result; 1258 isc_rwlock_t *lock = NULL; 1259 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1260 1261 REQUIRE(search->zonecut == NULL); 1262 1263 lock = &(search->qpdb->node_locks[node->locknum].lock); 1264 NODE_RDLOCK(lock, &nlocktype); 1265 1266 /* 1267 * Look for a DNAME or RRSIG DNAME rdataset. 1268 */ 1269 for (header = node->data; header != NULL; header = header_next) { 1270 header_next = header->next; 1271 if (check_stale_header(node, header, &nlocktype, lock, search, 1272 &header_prev)) 1273 { 1274 /* Do nothing. */ 1275 } else if (header->type == dns_rdatatype_dname && 1276 EXISTS(header) && !ANCIENT(header)) 1277 { 1278 dname_header = header; 1279 header_prev = header; 1280 } else if (header->type == DNS_SIGTYPE(dns_rdatatype_dname) && 1281 EXISTS(header) && !ANCIENT(header)) 1282 { 1283 sigdname_header = header; 1284 header_prev = header; 1285 } else { 1286 header_prev = header; 1287 } 1288 } 1289 1290 if (dname_header != NULL && 1291 (!DNS_TRUST_PENDING(dname_header->trust) || 1292 (search->options & DNS_DBFIND_PENDINGOK) != 0)) 1293 { 1294 /* 1295 * We increment the reference count on node to ensure that 1296 * search->zonecut_header will still be valid later. 1297 */ 1298 newref(search->qpdb, node, nlocktype, 1299 isc_rwlocktype_none DNS__DB_FLARG_PASS); 1300 search->zonecut = node; 1301 search->zonecut_header = dname_header; 1302 search->zonecut_sigheader = sigdname_header; 1303 search->need_cleanup = true; 1304 result = DNS_R_PARTIALMATCH; 1305 } else { 1306 result = DNS_R_CONTINUE; 1307 } 1308 1309 NODE_UNLOCK(lock, &nlocktype); 1310 1311 return result; 1312 } 1313 1314 static isc_result_t 1315 find_deepest_zonecut(qpc_search_t *search, qpcnode_t *node, 1316 dns_dbnode_t **nodep, dns_name_t *foundname, 1317 dns_rdataset_t *rdataset, 1318 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 1319 isc_result_t result = ISC_R_NOTFOUND; 1320 qpcache_t *qpdb = NULL; 1321 1322 /* 1323 * Caller must be holding the tree lock. 1324 */ 1325 1326 qpdb = search->qpdb; 1327 1328 for (int i = dns_qpchain_length(&search->chain) - 1; i >= 0; i--) { 1329 dns_slabheader_t *header = NULL; 1330 dns_slabheader_t *header_prev = NULL, *header_next = NULL; 1331 dns_slabheader_t *found = NULL, *foundsig = NULL; 1332 isc_rwlock_t *lock = NULL; 1333 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1334 1335 dns_qpchain_node(&search->chain, i, NULL, (void **)&node, NULL); 1336 lock = &qpdb->node_locks[node->locknum].lock; 1337 1338 NODE_RDLOCK(lock, &nlocktype); 1339 1340 /* 1341 * Look for NS and RRSIG NS rdatasets. 1342 */ 1343 for (header = node->data; header != NULL; header = header_next) 1344 { 1345 header_next = header->next; 1346 if (check_stale_header(node, header, &nlocktype, lock, 1347 search, &header_prev)) 1348 { 1349 /* Do nothing. */ 1350 } else if (EXISTS(header) && !ANCIENT(header)) { 1351 /* 1352 * We've found an extant rdataset. See if 1353 * we're interested in it. 1354 */ 1355 if (header->type == dns_rdatatype_ns) { 1356 found = header; 1357 if (foundsig != NULL) { 1358 break; 1359 } 1360 } else if (header->type == 1361 DNS_SIGTYPE(dns_rdatatype_ns)) 1362 { 1363 foundsig = header; 1364 if (found != NULL) { 1365 break; 1366 } 1367 } 1368 header_prev = header; 1369 } else { 1370 header_prev = header; 1371 } 1372 } 1373 1374 if (found != NULL) { 1375 /* 1376 * If we have to set foundname, we do it before 1377 * anything else. 1378 */ 1379 if (foundname != NULL) { 1380 dns_name_copy(&node->name, foundname); 1381 } 1382 result = DNS_R_DELEGATION; 1383 if (nodep != NULL) { 1384 newref(search->qpdb, node, nlocktype, 1385 isc_rwlocktype_none DNS__DB_FLARG_PASS); 1386 *nodep = node; 1387 } 1388 bindrdataset(search->qpdb, node, found, search->now, 1389 nlocktype, isc_rwlocktype_none, 1390 rdataset DNS__DB_FLARG_PASS); 1391 if (foundsig != NULL) { 1392 bindrdataset(search->qpdb, node, foundsig, 1393 search->now, nlocktype, 1394 isc_rwlocktype_none, 1395 sigrdataset DNS__DB_FLARG_PASS); 1396 } 1397 if (need_headerupdate(found, search->now) || 1398 (foundsig != NULL && 1399 need_headerupdate(foundsig, search->now))) 1400 { 1401 if (nlocktype != isc_rwlocktype_write) { 1402 NODE_FORCEUPGRADE(lock, &nlocktype); 1403 POST(nlocktype); 1404 } 1405 if (need_headerupdate(found, search->now)) { 1406 update_header(search->qpdb, found, 1407 search->now); 1408 } 1409 if (foundsig != NULL && 1410 need_headerupdate(foundsig, search->now)) 1411 { 1412 update_header(search->qpdb, foundsig, 1413 search->now); 1414 } 1415 } 1416 } 1417 1418 NODE_UNLOCK(lock, &nlocktype); 1419 1420 if (found != NULL) { 1421 break; 1422 } 1423 } 1424 1425 return result; 1426 } 1427 1428 /* 1429 * Look for a potentially covering NSEC in the cache where `name` 1430 * is known not to exist. This uses the auxiliary NSEC tree to find 1431 * the potential NSEC owner. If found, we update 'foundname', 'nodep', 1432 * 'rdataset' and 'sigrdataset', and return DNS_R_COVERINGNSEC. 1433 * Otherwise, return ISC_R_NOTFOUND. 1434 */ 1435 static isc_result_t 1436 find_coveringnsec(qpc_search_t *search, const dns_name_t *name, 1437 dns_dbnode_t **nodep, isc_stdtime_t now, 1438 dns_name_t *foundname, dns_rdataset_t *rdataset, 1439 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 1440 dns_fixedname_t fpredecessor, fixed; 1441 dns_name_t *predecessor = NULL, *fname = NULL; 1442 qpcnode_t *node = NULL; 1443 dns_qpiter_t iter; 1444 isc_result_t result; 1445 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1446 isc_rwlock_t *lock = NULL; 1447 dns_typepair_t matchtype, sigmatchtype; 1448 dns_slabheader_t *found = NULL, *foundsig = NULL; 1449 dns_slabheader_t *header = NULL; 1450 dns_slabheader_t *header_next = NULL, *header_prev = NULL; 1451 1452 /* 1453 * Look for the node in the auxilary tree. 1454 */ 1455 result = dns_qp_lookup(search->qpdb->nsec, name, NULL, &iter, NULL, 1456 (void **)&node, NULL); 1457 if (result != DNS_R_PARTIALMATCH) { 1458 return ISC_R_NOTFOUND; 1459 } 1460 1461 fname = dns_fixedname_initname(&fixed); 1462 predecessor = dns_fixedname_initname(&fpredecessor); 1463 matchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_nsec, 0); 1464 sigmatchtype = DNS_SIGTYPE(dns_rdatatype_nsec); 1465 1466 /* 1467 * Extract predecessor from iterator. 1468 */ 1469 result = dns_qpiter_current(&iter, predecessor, NULL, NULL); 1470 if (result != ISC_R_SUCCESS) { 1471 return ISC_R_NOTFOUND; 1472 } 1473 1474 /* 1475 * Lookup the predecessor in the main tree. 1476 */ 1477 node = NULL; 1478 result = dns_qp_getname(search->qpdb->tree, predecessor, (void **)&node, 1479 NULL); 1480 if (result != ISC_R_SUCCESS) { 1481 return result; 1482 } 1483 dns_name_copy(&node->name, fname); 1484 1485 lock = &(search->qpdb->node_locks[node->locknum].lock); 1486 NODE_RDLOCK(lock, &nlocktype); 1487 for (header = node->data; header != NULL; header = header_next) { 1488 header_next = header->next; 1489 if (check_stale_header(node, header, &nlocktype, lock, search, 1490 &header_prev)) 1491 { 1492 continue; 1493 } 1494 if (NONEXISTENT(header) || DNS_TYPEPAIR_TYPE(header->type) == 0) 1495 { 1496 header_prev = header; 1497 continue; 1498 } 1499 if (header->type == matchtype) { 1500 found = header; 1501 if (foundsig != NULL) { 1502 break; 1503 } 1504 } else if (header->type == sigmatchtype) { 1505 foundsig = header; 1506 if (found != NULL) { 1507 break; 1508 } 1509 } 1510 header_prev = header; 1511 } 1512 if (found != NULL) { 1513 bindrdataset(search->qpdb, node, found, now, nlocktype, 1514 isc_rwlocktype_none, rdataset DNS__DB_FLARG_PASS); 1515 if (foundsig != NULL) { 1516 bindrdataset(search->qpdb, node, foundsig, now, 1517 nlocktype, isc_rwlocktype_none, 1518 sigrdataset DNS__DB_FLARG_PASS); 1519 } 1520 newref(search->qpdb, node, nlocktype, 1521 isc_rwlocktype_none DNS__DB_FLARG_PASS); 1522 1523 dns_name_copy(fname, foundname); 1524 1525 *nodep = node; 1526 result = DNS_R_COVERINGNSEC; 1527 } else { 1528 result = ISC_R_NOTFOUND; 1529 } 1530 NODE_UNLOCK(lock, &nlocktype); 1531 return result; 1532 } 1533 1534 static isc_result_t 1535 find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, 1536 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now, 1537 dns_dbnode_t **nodep, dns_name_t *foundname, dns_rdataset_t *rdataset, 1538 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 1539 qpcnode_t *node = NULL; 1540 isc_result_t result; 1541 qpc_search_t search; 1542 bool cname_ok = true; 1543 bool found_noqname = false; 1544 bool all_negative = true; 1545 bool empty_node; 1546 isc_rwlock_t *lock = NULL; 1547 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1548 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1549 dns_slabheader_t *header = NULL; 1550 dns_slabheader_t *header_prev = NULL, *header_next = NULL; 1551 dns_slabheader_t *found = NULL, *nsheader = NULL; 1552 dns_slabheader_t *foundsig = NULL, *nssig = NULL, *cnamesig = NULL; 1553 dns_slabheader_t *update = NULL, *updatesig = NULL; 1554 dns_slabheader_t *nsecheader = NULL, *nsecsig = NULL; 1555 dns_typepair_t sigtype, negtype; 1556 1557 UNUSED(version); 1558 1559 REQUIRE(VALID_QPDB((qpcache_t *)db)); 1560 REQUIRE(version == NULL); 1561 1562 if (now == 0) { 1563 now = isc_stdtime_now(); 1564 } 1565 1566 search = (qpc_search_t){ 1567 .qpdb = (qpcache_t *)db, 1568 .options = options, 1569 .now = now, 1570 }; 1571 1572 TREE_RDLOCK(&search.qpdb->tree_lock, &tlocktype); 1573 1574 /* 1575 * Search down from the root of the tree. 1576 */ 1577 result = dns_qp_lookup(search.qpdb->tree, name, NULL, NULL, 1578 &search.chain, (void **)&node, NULL); 1579 if (result != ISC_R_NOTFOUND && foundname != NULL) { 1580 dns_name_copy(&node->name, foundname); 1581 } 1582 1583 /* 1584 * Check the QP chain to see if there's a node above us with a 1585 * active DNAME or NS rdatasets. 1586 * 1587 * We're only interested in nodes above QNAME, so if the result 1588 * was success, then we skip the last item in the chain. 1589 */ 1590 unsigned int len = dns_qpchain_length(&search.chain); 1591 if (result == ISC_R_SUCCESS) { 1592 len--; 1593 } 1594 1595 for (unsigned int i = 0; i < len; i++) { 1596 isc_result_t zcresult; 1597 qpcnode_t *encloser = NULL; 1598 1599 dns_qpchain_node(&search.chain, i, NULL, (void **)&encloser, 1600 NULL); 1601 1602 zcresult = check_zonecut(encloser, 1603 (void *)&search DNS__DB_FLARG_PASS); 1604 if (zcresult != DNS_R_CONTINUE) { 1605 result = DNS_R_PARTIALMATCH; 1606 search.chain.len = i - 1; 1607 node = encloser; 1608 if (foundname != NULL) { 1609 dns_name_copy(&node->name, foundname); 1610 } 1611 break; 1612 } 1613 } 1614 1615 if (result == DNS_R_PARTIALMATCH) { 1616 /* 1617 * If we discovered a covering DNAME skip looking for a covering 1618 * NSEC. 1619 */ 1620 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && 1621 (search.zonecut_header == NULL || 1622 search.zonecut_header->type != dns_rdatatype_dname)) 1623 { 1624 result = find_coveringnsec( 1625 &search, name, nodep, now, foundname, rdataset, 1626 sigrdataset DNS__DB_FLARG_PASS); 1627 if (result == DNS_R_COVERINGNSEC) { 1628 goto tree_exit; 1629 } 1630 } 1631 if (search.zonecut != NULL) { 1632 result = setup_delegation(&search, nodep, rdataset, 1633 sigrdataset, 1634 tlocktype DNS__DB_FLARG_PASS); 1635 goto tree_exit; 1636 } else { 1637 find_ns: 1638 result = find_deepest_zonecut( 1639 &search, node, nodep, foundname, rdataset, 1640 sigrdataset DNS__DB_FLARG_PASS); 1641 goto tree_exit; 1642 } 1643 } else if (result != ISC_R_SUCCESS) { 1644 goto tree_exit; 1645 } 1646 1647 /* 1648 * Certain DNSSEC types are not subject to CNAME matching 1649 * (RFC4035, section 2.5 and RFC3007). 1650 * 1651 * We don't check for RRSIG, because we don't store RRSIG records 1652 * directly. 1653 */ 1654 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) { 1655 cname_ok = false; 1656 } 1657 1658 /* 1659 * We now go looking for rdata... 1660 */ 1661 1662 lock = &(search.qpdb->node_locks[node->locknum].lock); 1663 NODE_RDLOCK(lock, &nlocktype); 1664 1665 /* 1666 * These pointers need to be reset here in case we did 1667 * 'goto find_ns' from somewhere below. 1668 */ 1669 found = NULL; 1670 foundsig = NULL; 1671 sigtype = DNS_SIGTYPE(type); 1672 negtype = DNS_TYPEPAIR_VALUE(0, type); 1673 nsheader = NULL; 1674 nsecheader = NULL; 1675 nssig = NULL; 1676 nsecsig = NULL; 1677 cnamesig = NULL; 1678 empty_node = true; 1679 header_prev = NULL; 1680 for (header = node->data; header != NULL; header = header_next) { 1681 header_next = header->next; 1682 if (check_stale_header(node, header, &nlocktype, lock, &search, 1683 &header_prev)) 1684 { 1685 /* Do nothing. */ 1686 } else if (EXISTS(header) && !ANCIENT(header)) { 1687 /* 1688 * We now know that there is at least one active 1689 * non-stale rdataset at this node. 1690 */ 1691 empty_node = false; 1692 if (header->noqname != NULL && 1693 header->trust == dns_trust_secure) 1694 { 1695 found_noqname = true; 1696 } 1697 if (!NEGATIVE(header)) { 1698 all_negative = false; 1699 } 1700 1701 /* 1702 * If we found a type we were looking for, remember 1703 * it. 1704 */ 1705 if (header->type == type || 1706 (type == dns_rdatatype_any && 1707 DNS_TYPEPAIR_TYPE(header->type) != 0) || 1708 (cname_ok && header->type == dns_rdatatype_cname)) 1709 { 1710 /* 1711 * We've found the answer. 1712 */ 1713 found = header; 1714 if (header->type == dns_rdatatype_cname && 1715 cname_ok) 1716 { 1717 /* 1718 * If we've already got the 1719 * CNAME RRSIG, use it. 1720 */ 1721 if (cnamesig != NULL) { 1722 foundsig = cnamesig; 1723 } else { 1724 sigtype = DNS_SIGTYPE( 1725 dns_rdatatype_cname); 1726 } 1727 } 1728 } else if (header->type == sigtype) { 1729 /* 1730 * We've found the RRSIG rdataset for our 1731 * target type. Remember it. 1732 */ 1733 foundsig = header; 1734 } else if (header->type == RDATATYPE_NCACHEANY || 1735 header->type == negtype) 1736 { 1737 /* 1738 * We've found a negative cache entry. 1739 */ 1740 found = header; 1741 } else if (header->type == dns_rdatatype_ns) { 1742 /* 1743 * Remember a NS rdataset even if we're 1744 * not specifically looking for it, because 1745 * we might need it later. 1746 */ 1747 nsheader = header; 1748 } else if (header->type == 1749 DNS_SIGTYPE(dns_rdatatype_ns)) 1750 { 1751 /* 1752 * If we need the NS rdataset, we'll also 1753 * need its signature. 1754 */ 1755 nssig = header; 1756 } else if (header->type == dns_rdatatype_nsec) { 1757 nsecheader = header; 1758 } else if (header->type == 1759 DNS_SIGTYPE(dns_rdatatype_nsec)) 1760 { 1761 nsecsig = header; 1762 } else if (cname_ok && 1763 header->type == 1764 DNS_SIGTYPE(dns_rdatatype_cname)) 1765 { 1766 /* 1767 * If we get a CNAME match, we'll also need 1768 * its signature. 1769 */ 1770 cnamesig = header; 1771 } 1772 header_prev = header; 1773 } else { 1774 header_prev = header; 1775 } 1776 } 1777 1778 if (empty_node) { 1779 /* 1780 * We have an exact match for the name, but there are no 1781 * extant rdatasets. That means that this node doesn't 1782 * meaningfully exist, and that we really have a partial match. 1783 */ 1784 NODE_UNLOCK(lock, &nlocktype); 1785 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) { 1786 result = find_coveringnsec( 1787 &search, name, nodep, now, foundname, rdataset, 1788 sigrdataset DNS__DB_FLARG_PASS); 1789 if (result == DNS_R_COVERINGNSEC) { 1790 goto tree_exit; 1791 } 1792 } 1793 goto find_ns; 1794 } 1795 1796 /* 1797 * If we didn't find what we were looking for... 1798 */ 1799 if (found == NULL || 1800 (DNS_TRUST_ADDITIONAL(found->trust) && 1801 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) || 1802 (found->trust == dns_trust_glue && 1803 ((options & DNS_DBFIND_GLUEOK) == 0)) || 1804 (DNS_TRUST_PENDING(found->trust) && 1805 ((options & DNS_DBFIND_PENDINGOK) == 0))) 1806 { 1807 /* 1808 * Return covering NODATA NSEC record. 1809 */ 1810 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && 1811 nsecheader != NULL) 1812 { 1813 if (nodep != NULL) { 1814 newref(search.qpdb, node, nlocktype, 1815 tlocktype DNS__DB_FLARG_PASS); 1816 *nodep = node; 1817 } 1818 bindrdataset(search.qpdb, node, nsecheader, search.now, 1819 nlocktype, tlocktype, 1820 rdataset DNS__DB_FLARG_PASS); 1821 if (need_headerupdate(nsecheader, search.now)) { 1822 update = nsecheader; 1823 } 1824 if (nsecsig != NULL) { 1825 bindrdataset(search.qpdb, node, nsecsig, 1826 search.now, nlocktype, tlocktype, 1827 sigrdataset DNS__DB_FLARG_PASS); 1828 if (need_headerupdate(nsecsig, search.now)) { 1829 updatesig = nsecsig; 1830 } 1831 } 1832 result = DNS_R_COVERINGNSEC; 1833 goto node_exit; 1834 } 1835 1836 /* 1837 * This name was from a wild card. Look for a covering NSEC. 1838 */ 1839 if (found == NULL && (found_noqname || all_negative) && 1840 (search.options & DNS_DBFIND_COVERINGNSEC) != 0) 1841 { 1842 NODE_UNLOCK(lock, &nlocktype); 1843 result = find_coveringnsec( 1844 &search, name, nodep, now, foundname, rdataset, 1845 sigrdataset DNS__DB_FLARG_PASS); 1846 if (result == DNS_R_COVERINGNSEC) { 1847 goto tree_exit; 1848 } 1849 goto find_ns; 1850 } 1851 1852 /* 1853 * If there is an NS rdataset at this node, then this is the 1854 * deepest zone cut. 1855 */ 1856 if (nsheader != NULL) { 1857 if (nodep != NULL) { 1858 newref(search.qpdb, node, nlocktype, 1859 tlocktype DNS__DB_FLARG_PASS); 1860 *nodep = node; 1861 } 1862 bindrdataset(search.qpdb, node, nsheader, search.now, 1863 nlocktype, tlocktype, 1864 rdataset DNS__DB_FLARG_PASS); 1865 if (need_headerupdate(nsheader, search.now)) { 1866 update = nsheader; 1867 } 1868 if (nssig != NULL) { 1869 bindrdataset(search.qpdb, node, nssig, 1870 search.now, nlocktype, tlocktype, 1871 sigrdataset DNS__DB_FLARG_PASS); 1872 if (need_headerupdate(nssig, search.now)) { 1873 updatesig = nssig; 1874 } 1875 } 1876 result = DNS_R_DELEGATION; 1877 goto node_exit; 1878 } 1879 1880 /* 1881 * Go find the deepest zone cut. 1882 */ 1883 NODE_UNLOCK(lock, &nlocktype); 1884 goto find_ns; 1885 } 1886 1887 /* 1888 * We found what we were looking for, or we found a CNAME. 1889 */ 1890 1891 if (nodep != NULL) { 1892 newref(search.qpdb, node, nlocktype, 1893 tlocktype DNS__DB_FLARG_PASS); 1894 *nodep = node; 1895 } 1896 1897 if (NEGATIVE(found)) { 1898 /* 1899 * We found a negative cache entry. 1900 */ 1901 if (NXDOMAIN(found)) { 1902 result = DNS_R_NCACHENXDOMAIN; 1903 } else { 1904 result = DNS_R_NCACHENXRRSET; 1905 } 1906 } else if (type != found->type && type != dns_rdatatype_any && 1907 found->type == dns_rdatatype_cname) 1908 { 1909 /* 1910 * We weren't doing an ANY query and we found a CNAME instead 1911 * of the type we were looking for, so we need to indicate 1912 * that result to the caller. 1913 */ 1914 result = DNS_R_CNAME; 1915 } else { 1916 /* 1917 * An ordinary successful query! 1918 */ 1919 result = ISC_R_SUCCESS; 1920 } 1921 1922 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN || 1923 result == DNS_R_NCACHENXRRSET) 1924 { 1925 bindrdataset(search.qpdb, node, found, search.now, nlocktype, 1926 tlocktype, rdataset DNS__DB_FLARG_PASS); 1927 if (need_headerupdate(found, search.now)) { 1928 update = found; 1929 } 1930 if (!NEGATIVE(found) && foundsig != NULL) { 1931 bindrdataset(search.qpdb, node, foundsig, search.now, 1932 nlocktype, tlocktype, 1933 sigrdataset DNS__DB_FLARG_PASS); 1934 if (need_headerupdate(foundsig, search.now)) { 1935 updatesig = foundsig; 1936 } 1937 } 1938 } 1939 1940 node_exit: 1941 if ((update != NULL || updatesig != NULL) && 1942 nlocktype != isc_rwlocktype_write) 1943 { 1944 NODE_FORCEUPGRADE(lock, &nlocktype); 1945 POST(nlocktype); 1946 } 1947 if (update != NULL && need_headerupdate(update, search.now)) { 1948 update_header(search.qpdb, update, search.now); 1949 } 1950 if (updatesig != NULL && need_headerupdate(updatesig, search.now)) { 1951 update_header(search.qpdb, updatesig, search.now); 1952 } 1953 1954 NODE_UNLOCK(lock, &nlocktype); 1955 1956 tree_exit: 1957 TREE_UNLOCK(&search.qpdb->tree_lock, &tlocktype); 1958 1959 /* 1960 * If we found a zonecut but aren't going to use it, we have to 1961 * let go of it. 1962 */ 1963 if (search.need_cleanup) { 1964 node = search.zonecut; 1965 INSIST(node != NULL); 1966 lock = &(search.qpdb->node_locks[node->locknum].lock); 1967 1968 NODE_RDLOCK(lock, &nlocktype); 1969 decref(search.qpdb, node, &nlocktype, &tlocktype, 1970 true DNS__DB_FLARG_PASS); 1971 NODE_UNLOCK(lock, &nlocktype); 1972 INSIST(tlocktype == isc_rwlocktype_none); 1973 } 1974 1975 update_cachestats(search.qpdb, result); 1976 return result; 1977 } 1978 1979 static isc_result_t 1980 findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options, 1981 isc_stdtime_t now, dns_dbnode_t **nodep, dns_name_t *foundname, 1982 dns_name_t *dcname, dns_rdataset_t *rdataset, 1983 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 1984 qpcnode_t *node = NULL; 1985 isc_rwlock_t *lock = NULL; 1986 isc_result_t result; 1987 qpc_search_t search; 1988 dns_slabheader_t *header = NULL; 1989 dns_slabheader_t *header_prev = NULL, *header_next = NULL; 1990 dns_slabheader_t *found = NULL, *foundsig = NULL; 1991 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1992 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1993 bool dcnull = (dcname == NULL); 1994 1995 REQUIRE(VALID_QPDB((qpcache_t *)db)); 1996 1997 if (now == 0) { 1998 now = isc_stdtime_now(); 1999 } 2000 2001 search = (qpc_search_t){ 2002 .qpdb = (qpcache_t *)db, 2003 .options = options, 2004 .now = now, 2005 }; 2006 2007 if (dcnull) { 2008 dcname = foundname; 2009 } 2010 2011 TREE_RDLOCK(&search.qpdb->tree_lock, &tlocktype); 2012 2013 /* 2014 * Search down from the root of the tree. 2015 */ 2016 result = dns_qp_lookup(search.qpdb->tree, name, NULL, NULL, 2017 &search.chain, (void **)&node, NULL); 2018 if (result != ISC_R_NOTFOUND) { 2019 dns_name_copy(&node->name, dcname); 2020 } 2021 if ((options & DNS_DBFIND_NOEXACT) != 0 && result == ISC_R_SUCCESS) { 2022 int len = dns_qpchain_length(&search.chain); 2023 if (len >= 2) { 2024 node = NULL; 2025 dns_qpchain_node(&search.chain, len - 2, NULL, 2026 (void **)&node, NULL); 2027 search.chain.len = len - 1; 2028 result = DNS_R_PARTIALMATCH; 2029 } else { 2030 result = ISC_R_NOTFOUND; 2031 } 2032 } 2033 2034 if (result == DNS_R_PARTIALMATCH) { 2035 result = find_deepest_zonecut(&search, node, nodep, foundname, 2036 rdataset, 2037 sigrdataset DNS__DB_FLARG_PASS); 2038 goto tree_exit; 2039 } else if (result != ISC_R_SUCCESS) { 2040 goto tree_exit; 2041 } else if (!dcnull) { 2042 dns_name_copy(dcname, foundname); 2043 } 2044 2045 /* 2046 * We now go looking for an NS rdataset at the node. 2047 */ 2048 2049 lock = &(search.qpdb->node_locks[node->locknum].lock); 2050 NODE_RDLOCK(lock, &nlocktype); 2051 2052 for (header = node->data; header != NULL; header = header_next) { 2053 header_next = header->next; 2054 if (check_stale_header(node, header, &nlocktype, lock, &search, 2055 &header_prev)) 2056 { 2057 /* 2058 * The function dns_qp_lookup found us a matching 2059 * node for 'name' and stored the result in 'dcname'. 2060 * This is the deepest known zonecut in our database. 2061 * However, this node may be stale and if serve-stale 2062 * is not enabled (in other words 'stale-answer-enable' 2063 * is set to no), this node may not be used as a 2064 * zonecut we know about. If so, find the deepest 2065 * zonecut from this node up and return that instead. 2066 */ 2067 NODE_UNLOCK(lock, &nlocktype); 2068 result = find_deepest_zonecut( 2069 &search, node, nodep, foundname, rdataset, 2070 sigrdataset DNS__DB_FLARG_PASS); 2071 dns_name_copy(foundname, dcname); 2072 goto tree_exit; 2073 } else if (EXISTS(header) && !ANCIENT(header)) { 2074 /* 2075 * If we found a type we were looking for, remember 2076 * it. 2077 */ 2078 if (header->type == dns_rdatatype_ns) { 2079 /* 2080 * Remember a NS rdataset even if we're 2081 * not specifically looking for it, because 2082 * we might need it later. 2083 */ 2084 found = header; 2085 } else if (header->type == 2086 DNS_SIGTYPE(dns_rdatatype_ns)) 2087 { 2088 /* 2089 * If we need the NS rdataset, we'll also 2090 * need its signature. 2091 */ 2092 foundsig = header; 2093 } 2094 header_prev = header; 2095 } else { 2096 header_prev = header; 2097 } 2098 } 2099 2100 if (found == NULL) { 2101 /* 2102 * No NS records here. 2103 */ 2104 NODE_UNLOCK(lock, &nlocktype); 2105 result = find_deepest_zonecut(&search, node, nodep, foundname, 2106 rdataset, 2107 sigrdataset DNS__DB_FLARG_PASS); 2108 goto tree_exit; 2109 } 2110 2111 if (nodep != NULL) { 2112 newref(search.qpdb, node, nlocktype, 2113 tlocktype DNS__DB_FLARG_PASS); 2114 *nodep = node; 2115 } 2116 2117 bindrdataset(search.qpdb, node, found, search.now, nlocktype, tlocktype, 2118 rdataset DNS__DB_FLARG_PASS); 2119 if (foundsig != NULL) { 2120 bindrdataset(search.qpdb, node, foundsig, search.now, nlocktype, 2121 tlocktype, sigrdataset DNS__DB_FLARG_PASS); 2122 } 2123 2124 if (need_headerupdate(found, search.now) || 2125 (foundsig != NULL && need_headerupdate(foundsig, search.now))) 2126 { 2127 if (nlocktype != isc_rwlocktype_write) { 2128 NODE_FORCEUPGRADE(lock, &nlocktype); 2129 POST(nlocktype); 2130 } 2131 if (need_headerupdate(found, search.now)) { 2132 update_header(search.qpdb, found, search.now); 2133 } 2134 if (foundsig != NULL && need_headerupdate(foundsig, search.now)) 2135 { 2136 update_header(search.qpdb, foundsig, search.now); 2137 } 2138 } 2139 2140 NODE_UNLOCK(lock, &nlocktype); 2141 2142 tree_exit: 2143 TREE_UNLOCK(&search.qpdb->tree_lock, &tlocktype); 2144 2145 INSIST(!search.need_cleanup); 2146 2147 if (result == DNS_R_DELEGATION) { 2148 result = ISC_R_SUCCESS; 2149 } 2150 2151 return result; 2152 } 2153 2154 static isc_result_t 2155 findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 2156 dns_rdatatype_t type, dns_rdatatype_t covers, isc_stdtime_t now, 2157 dns_rdataset_t *rdataset, 2158 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 2159 qpcache_t *qpdb = (qpcache_t *)db; 2160 qpcnode_t *qpnode = (qpcnode_t *)node; 2161 dns_slabheader_t *header = NULL, *header_next = NULL; 2162 dns_slabheader_t *found = NULL, *foundsig = NULL; 2163 dns_typepair_t matchtype, sigmatchtype, negtype; 2164 isc_result_t result; 2165 isc_rwlock_t *lock = NULL; 2166 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 2167 2168 REQUIRE(VALID_QPDB(qpdb)); 2169 REQUIRE(type != dns_rdatatype_any); 2170 2171 UNUSED(version); 2172 2173 result = ISC_R_SUCCESS; 2174 2175 if (now == 0) { 2176 now = isc_stdtime_now(); 2177 } 2178 2179 lock = &qpdb->node_locks[qpnode->locknum].lock; 2180 NODE_RDLOCK(lock, &nlocktype); 2181 2182 matchtype = DNS_TYPEPAIR_VALUE(type, covers); 2183 negtype = DNS_TYPEPAIR_VALUE(0, type); 2184 if (covers == 0) { 2185 sigmatchtype = DNS_SIGTYPE(type); 2186 } else { 2187 sigmatchtype = 0; 2188 } 2189 2190 for (header = qpnode->data; header != NULL; header = header_next) { 2191 header_next = header->next; 2192 if (!ACTIVE(header, now)) { 2193 if ((header->ttl + STALE_TTL(header, qpdb) < 2194 now - QPDB_VIRTUAL) && 2195 (nlocktype == isc_rwlocktype_write || 2196 NODE_TRYUPGRADE(lock, &nlocktype) == 2197 ISC_R_SUCCESS)) 2198 { 2199 /* 2200 * We update the node's status only when we 2201 * can get write access. 2202 * 2203 * We don't check if refcurrent(qpnode) == 0 2204 * and try to free like we do in find(), 2205 * because refcurrent(qpnode) must be 2206 * non-zero. This is so because 'node' is an 2207 * argument to the function. 2208 */ 2209 mark(header, DNS_SLABHEADERATTR_ANCIENT); 2210 HEADERNODE(header)->dirty = 1; 2211 } 2212 } else if (EXISTS(header) && !ANCIENT(header)) { 2213 if (header->type == matchtype) { 2214 found = header; 2215 } else if (header->type == RDATATYPE_NCACHEANY || 2216 header->type == negtype) 2217 { 2218 found = header; 2219 } else if (header->type == sigmatchtype) { 2220 foundsig = header; 2221 } 2222 } 2223 } 2224 if (found != NULL) { 2225 bindrdataset(qpdb, qpnode, found, now, nlocktype, 2226 isc_rwlocktype_none, rdataset DNS__DB_FLARG_PASS); 2227 if (!NEGATIVE(found) && foundsig != NULL) { 2228 bindrdataset(qpdb, qpnode, foundsig, now, nlocktype, 2229 isc_rwlocktype_none, 2230 sigrdataset DNS__DB_FLARG_PASS); 2231 } 2232 } 2233 2234 NODE_UNLOCK(lock, &nlocktype); 2235 2236 if (found == NULL) { 2237 return ISC_R_NOTFOUND; 2238 } 2239 2240 if (NEGATIVE(found)) { 2241 /* 2242 * We found a negative cache entry. 2243 */ 2244 if (NXDOMAIN(found)) { 2245 result = DNS_R_NCACHENXDOMAIN; 2246 } else { 2247 result = DNS_R_NCACHENXRRSET; 2248 } 2249 } 2250 2251 update_cachestats(qpdb, result); 2252 2253 return result; 2254 } 2255 2256 static isc_result_t 2257 setcachestats(dns_db_t *db, isc_stats_t *stats) { 2258 qpcache_t *qpdb = (qpcache_t *)db; 2259 2260 REQUIRE(VALID_QPDB(qpdb)); 2261 REQUIRE(stats != NULL); 2262 2263 isc_stats_attach(stats, &qpdb->cachestats); 2264 return ISC_R_SUCCESS; 2265 } 2266 2267 static dns_stats_t * 2268 getrrsetstats(dns_db_t *db) { 2269 qpcache_t *qpdb = (qpcache_t *)db; 2270 2271 REQUIRE(VALID_QPDB(qpdb)); 2272 2273 return qpdb->rrsetstats; 2274 } 2275 2276 static isc_result_t 2277 setservestalettl(dns_db_t *db, dns_ttl_t ttl) { 2278 qpcache_t *qpdb = (qpcache_t *)db; 2279 2280 REQUIRE(VALID_QPDB(qpdb)); 2281 2282 /* currently no bounds checking. 0 means disable. */ 2283 qpdb->common.serve_stale_ttl = ttl; 2284 return ISC_R_SUCCESS; 2285 } 2286 2287 static isc_result_t 2288 getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { 2289 qpcache_t *qpdb = (qpcache_t *)db; 2290 2291 REQUIRE(VALID_QPDB(qpdb)); 2292 2293 *ttl = qpdb->common.serve_stale_ttl; 2294 return ISC_R_SUCCESS; 2295 } 2296 2297 static isc_result_t 2298 setservestalerefresh(dns_db_t *db, uint32_t interval) { 2299 qpcache_t *qpdb = (qpcache_t *)db; 2300 2301 REQUIRE(VALID_QPDB(qpdb)); 2302 2303 /* currently no bounds checking. 0 means disable. */ 2304 qpdb->serve_stale_refresh = interval; 2305 return ISC_R_SUCCESS; 2306 } 2307 2308 static isc_result_t 2309 getservestalerefresh(dns_db_t *db, uint32_t *interval) { 2310 qpcache_t *qpdb = (qpcache_t *)db; 2311 2312 REQUIRE(VALID_QPDB(qpdb)); 2313 2314 *interval = qpdb->serve_stale_refresh; 2315 return ISC_R_SUCCESS; 2316 } 2317 2318 static void 2319 expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) { 2320 qpcache_t *qpdb = (qpcache_t *)db; 2321 qpcnode_t *qpnode = (qpcnode_t *)node; 2322 dns_slabheader_t *header = data; 2323 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 2324 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 2325 2326 NODE_WRLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 2327 expireheader(header, &nlocktype, &tlocktype, 2328 dns_expire_flush DNS__DB_FILELINE); 2329 NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 2330 INSIST(tlocktype == isc_rwlocktype_none); 2331 } 2332 2333 static size_t 2334 rdataset_size(dns_slabheader_t *header) { 2335 if (!NONEXISTENT(header)) { 2336 return dns_rdataslab_size((unsigned char *)header, 2337 sizeof(*header)); 2338 } 2339 2340 return sizeof(*header); 2341 } 2342 2343 static size_t 2344 expire_lru_headers(qpcache_t *qpdb, unsigned int locknum, 2345 isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep, 2346 size_t purgesize DNS__DB_FLARG) { 2347 dns_slabheader_t *header = NULL; 2348 size_t purged = 0; 2349 2350 for (header = ISC_LIST_TAIL(qpdb->lru[locknum]); 2351 header != NULL && header->last_used <= qpdb->last_used && 2352 purged <= purgesize; 2353 header = ISC_LIST_TAIL(qpdb->lru[locknum])) 2354 { 2355 size_t header_size = rdataset_size(header); 2356 2357 /* 2358 * Unlink the entry at this point to avoid checking it 2359 * again even if it's currently used someone else and 2360 * cannot be purged at this moment. This entry won't be 2361 * referenced any more (so unlinking is safe) since the 2362 * TTL will be reset to 0. 2363 */ 2364 ISC_LIST_UNLINK(qpdb->lru[locknum], header, link); 2365 expireheader(header, nlocktypep, tlocktypep, 2366 dns_expire_lru DNS__DB_FLARG_PASS); 2367 purged += header_size; 2368 } 2369 2370 return purged; 2371 } 2372 2373 /*% 2374 * Purge some expired and/or stale (i.e. unused for some period) cache entries 2375 * due to an overmem condition. To recover from this condition quickly, 2376 * we clean up entries up to the size of newly added rdata that triggered 2377 * the overmem; this is accessible via newheader. 2378 * 2379 * The LRU lists tails are processed in LRU order to the nearest second. 2380 * 2381 * A write lock on the tree must be held. 2382 */ 2383 static void 2384 overmem(qpcache_t *qpdb, dns_slabheader_t *newheader, 2385 isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) { 2386 uint32_t locknum_start = qpdb->lru_sweep++ % qpdb->node_lock_count; 2387 uint32_t locknum = locknum_start; 2388 size_t purgesize, purged = 0; 2389 isc_stdtime_t min_last_used = 0; 2390 size_t max_passes = 8; 2391 2392 /* 2393 * Maximum estimated size of the data being added: The size 2394 * of the rdataset, plus a new QP database node and nodename, 2395 * and a possible additional NSEC node and nodename. Also add 2396 * a 12k margin for a possible QP-trie chunk allocation. 2397 * (It's okay to overestimate, we want to get cache memory 2398 * down quickly.) 2399 */ 2400 purgesize = 2 * (sizeof(qpcnode_t) + 2401 dns_name_size(&HEADERNODE(newheader)->name)) + 2402 rdataset_size(newheader) + 12288; 2403 again: 2404 do { 2405 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 2406 NODE_WRLOCK(&qpdb->node_locks[locknum].lock, &nlocktype); 2407 2408 purged += expire_lru_headers( 2409 qpdb, locknum, &nlocktype, tlocktypep, 2410 purgesize - purged DNS__DB_FLARG_PASS); 2411 2412 /* 2413 * Work out the oldest remaining last_used values of the list 2414 * tails as we walk across the array of lru lists. 2415 */ 2416 dns_slabheader_t *header = ISC_LIST_TAIL(qpdb->lru[locknum]); 2417 if (header != NULL && 2418 (min_last_used == 0 || header->last_used < min_last_used)) 2419 { 2420 min_last_used = header->last_used; 2421 } 2422 NODE_UNLOCK(&qpdb->node_locks[locknum].lock, &nlocktype); 2423 locknum = (locknum + 1) % qpdb->node_lock_count; 2424 } while (locknum != locknum_start && purged <= purgesize); 2425 2426 /* 2427 * Update qpdb->last_used if we have walked all the list tails and have 2428 * not freed the required amount of memory. 2429 */ 2430 if (purged < purgesize) { 2431 if (min_last_used != 0) { 2432 qpdb->last_used = min_last_used; 2433 if (max_passes-- > 0) { 2434 goto again; 2435 } 2436 } 2437 } 2438 } 2439 2440 /*% 2441 * These functions allow the heap code to rank the priority of each 2442 * element. It returns true if v1 happens "sooner" than v2. 2443 */ 2444 static bool 2445 ttl_sooner(void *v1, void *v2) { 2446 dns_slabheader_t *h1 = v1; 2447 dns_slabheader_t *h2 = v2; 2448 2449 return h1->ttl < h2->ttl; 2450 } 2451 2452 /*% 2453 * This function sets the heap index into the header. 2454 */ 2455 static void 2456 set_index(void *what, unsigned int idx) { 2457 dns_slabheader_t *h = what; 2458 2459 h->heap_index = idx; 2460 } 2461 2462 static void 2463 free_qpdb(qpcache_t *qpdb, bool log) { 2464 unsigned int i; 2465 char buf[DNS_NAME_FORMATSIZE]; 2466 dns_qp_t **treep = NULL; 2467 2468 for (;;) { 2469 /* 2470 * pick the next tree to (start to) destroy 2471 */ 2472 treep = &qpdb->tree; 2473 if (*treep == NULL) { 2474 treep = &qpdb->nsec; 2475 if (*treep == NULL) { 2476 break; 2477 } 2478 } 2479 2480 dns_qp_destroy(treep); 2481 INSIST(*treep == NULL); 2482 } 2483 2484 if (log) { 2485 if (dns_name_dynamic(&qpdb->common.origin)) { 2486 dns_name_format(&qpdb->common.origin, buf, sizeof(buf)); 2487 } else { 2488 strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 2489 } 2490 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 2491 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 2492 "done free_qpdb(%s)", buf); 2493 } 2494 if (dns_name_dynamic(&qpdb->common.origin)) { 2495 dns_name_free(&qpdb->common.origin, qpdb->common.mctx); 2496 } 2497 for (i = 0; i < qpdb->node_lock_count; i++) { 2498 isc_refcount_destroy(&qpdb->node_locks[i].references); 2499 NODE_DESTROYLOCK(&qpdb->node_locks[i].lock); 2500 } 2501 2502 /* 2503 * Clean up LRU / re-signing order lists. 2504 */ 2505 if (qpdb->lru != NULL) { 2506 for (i = 0; i < qpdb->node_lock_count; i++) { 2507 INSIST(ISC_LIST_EMPTY(qpdb->lru[i])); 2508 } 2509 isc_mem_cput(qpdb->common.mctx, qpdb->lru, 2510 qpdb->node_lock_count, 2511 sizeof(dns_slabheaderlist_t)); 2512 } 2513 /* 2514 * Clean up dead node buckets. 2515 */ 2516 for (i = 0; i < qpdb->node_lock_count; i++) { 2517 INSIST(isc_queue_empty(&qpdb->deadnodes[i])); 2518 isc_queue_destroy(&qpdb->deadnodes[i]); 2519 } 2520 isc_mem_cput(qpdb->common.mctx, qpdb->deadnodes, qpdb->node_lock_count, 2521 sizeof(qpdb->deadnodes[0])); 2522 2523 /* 2524 * Clean up heap objects. 2525 */ 2526 if (qpdb->heaps != NULL) { 2527 for (i = 0; i < qpdb->node_lock_count; i++) { 2528 isc_heap_destroy(&qpdb->heaps[i]); 2529 } 2530 isc_mem_cput(qpdb->hmctx, qpdb->heaps, qpdb->node_lock_count, 2531 sizeof(isc_heap_t *)); 2532 } 2533 2534 if (qpdb->rrsetstats != NULL) { 2535 dns_stats_detach(&qpdb->rrsetstats); 2536 } 2537 if (qpdb->cachestats != NULL) { 2538 isc_stats_detach(&qpdb->cachestats); 2539 } 2540 if (qpdb->gluecachestats != NULL) { 2541 isc_stats_detach(&qpdb->gluecachestats); 2542 } 2543 2544 isc_mem_cput(qpdb->common.mctx, qpdb->node_locks, qpdb->node_lock_count, 2545 sizeof(db_nodelock_t)); 2546 TREE_DESTROYLOCK(&qpdb->tree_lock); 2547 isc_refcount_destroy(&qpdb->common.references); 2548 2549 isc_rwlock_destroy(&qpdb->lock); 2550 qpdb->common.magic = 0; 2551 qpdb->common.impmagic = 0; 2552 isc_mem_detach(&qpdb->hmctx); 2553 2554 isc_mem_putanddetach(&qpdb->common.mctx, qpdb, sizeof(*qpdb)); 2555 } 2556 2557 static void 2558 qpdb_destroy(dns_db_t *arg) { 2559 qpcache_t *qpdb = (qpcache_t *)arg; 2560 bool want_free = false; 2561 unsigned int i; 2562 unsigned int inactive = 0; 2563 2564 if (qpdb->origin_node != NULL) { 2565 qpcnode_detach(&qpdb->origin_node); 2566 } 2567 2568 /* 2569 * Even though there are no external direct references, there still 2570 * may be nodes in use. 2571 */ 2572 for (i = 0; i < qpdb->node_lock_count; i++) { 2573 isc_rwlocktype_t nodelock = isc_rwlocktype_none; 2574 NODE_WRLOCK(&qpdb->node_locks[i].lock, &nodelock); 2575 qpdb->node_locks[i].exiting = true; 2576 if (isc_refcount_current(&qpdb->node_locks[i].references) == 0) 2577 { 2578 inactive++; 2579 } 2580 NODE_UNLOCK(&qpdb->node_locks[i].lock, &nodelock); 2581 } 2582 2583 if (inactive != 0) { 2584 RWLOCK(&qpdb->lock, isc_rwlocktype_write); 2585 qpdb->active -= inactive; 2586 if (qpdb->active == 0) { 2587 want_free = true; 2588 } 2589 RWUNLOCK(&qpdb->lock, isc_rwlocktype_write); 2590 if (want_free) { 2591 char buf[DNS_NAME_FORMATSIZE]; 2592 if (dns_name_dynamic(&qpdb->common.origin)) { 2593 dns_name_format(&qpdb->common.origin, buf, 2594 sizeof(buf)); 2595 } else { 2596 strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 2597 } 2598 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 2599 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 2600 "calling free_qpdb(%s)", buf); 2601 free_qpdb(qpdb, true); 2602 } 2603 } 2604 } 2605 2606 static void 2607 mark_ancient(dns_slabheader_t *header) { 2608 setttl(header, 0); 2609 mark(header, DNS_SLABHEADERATTR_ANCIENT); 2610 HEADERNODE(header)->dirty = 1; 2611 } 2612 2613 /*% 2614 * Clean up dead nodes. These are nodes which have no references, and 2615 * have no data. They are dead but we could not or chose not to delete 2616 * them when we deleted all the data at that node because we did not want 2617 * to wait for the tree write lock. 2618 */ 2619 static void 2620 cleanup_deadnodes(void *arg) { 2621 qpcache_t *qpdb = arg; 2622 uint16_t locknum = isc_tid(); 2623 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 2624 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 2625 qpcnode_t *qpnode = NULL, *qpnext = NULL; 2626 isc_queue_t deadnodes; 2627 2628 INSIST(locknum < qpdb->node_lock_count); 2629 2630 isc_queue_init(&deadnodes); 2631 2632 TREE_WRLOCK(&qpdb->tree_lock, &tlocktype); 2633 NODE_WRLOCK(&qpdb->node_locks[locknum].lock, &nlocktype); 2634 2635 RUNTIME_CHECK(isc_queue_splice(&deadnodes, &qpdb->deadnodes[locknum])); 2636 isc_queue_for_each_entry_safe(&deadnodes, qpnode, qpnext, deadlink) { 2637 decref(qpdb, qpnode, &nlocktype, &tlocktype, false); 2638 } 2639 2640 NODE_UNLOCK(&qpdb->node_locks[locknum].lock, &nlocktype); 2641 TREE_UNLOCK(&qpdb->tree_lock, &tlocktype); 2642 } 2643 2644 /* 2645 * This function is assumed to be called when a node is newly referenced 2646 * and can be in the deadnode list. In that case the node will be references 2647 * and cleanup_deadnodes() will remove it from the list when the cleaning 2648 * happens. 2649 * Note: while a new reference is gained in multiple places, there are only very 2650 * few cases where the node can be in the deadnode list (only empty nodes can 2651 * have been added to the list). 2652 */ 2653 static void 2654 reactivate_node(qpcache_t *qpdb, qpcnode_t *node, 2655 isc_rwlocktype_t tlocktype ISC_ATTR_UNUSED DNS__DB_FLARG) { 2656 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 2657 isc_rwlock_t *nodelock = &qpdb->node_locks[node->locknum].lock; 2658 2659 NODE_RDLOCK(nodelock, &nlocktype); 2660 newref(qpdb, node, nlocktype, tlocktype DNS__DB_FLARG_PASS); 2661 NODE_UNLOCK(nodelock, &nlocktype); 2662 } 2663 2664 static qpcnode_t * 2665 new_qpcnode(qpcache_t *qpdb, const dns_name_t *name) { 2666 qpcnode_t *newdata = isc_mem_get(qpdb->common.mctx, sizeof(*newdata)); 2667 *newdata = (qpcnode_t){ 2668 .name = DNS_NAME_INITEMPTY, 2669 .references = ISC_REFCOUNT_INITIALIZER(1), 2670 .locknum = isc_random_uniform(qpdb->node_lock_count), 2671 }; 2672 2673 INSIST(newdata->locknum < qpdb->node_lock_count); 2674 2675 isc_mem_attach(qpdb->common.mctx, &newdata->mctx); 2676 dns_name_dupwithoffsets(name, newdata->mctx, &newdata->name); 2677 2678 #ifdef DNS_DB_NODETRACE 2679 fprintf(stderr, "new_qpcnode:%s:%s:%d:%p->references = 1\n", __func__, 2680 __FILE__, __LINE__ + 1, name); 2681 #endif 2682 return newdata; 2683 } 2684 2685 static isc_result_t 2686 findnode(dns_db_t *db, const dns_name_t *name, bool create, 2687 dns_dbnode_t **nodep DNS__DB_FLARG) { 2688 qpcache_t *qpdb = (qpcache_t *)db; 2689 qpcnode_t *node = NULL; 2690 isc_result_t result; 2691 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 2692 2693 TREE_RDLOCK(&qpdb->tree_lock, &tlocktype); 2694 result = dns_qp_getname(qpdb->tree, name, (void **)&node, NULL); 2695 if (result != ISC_R_SUCCESS) { 2696 if (!create) { 2697 goto unlock; 2698 } 2699 /* 2700 * Try to upgrade the lock and if that fails unlock then relock. 2701 */ 2702 TREE_FORCEUPGRADE(&qpdb->tree_lock, &tlocktype); 2703 result = dns_qp_getname(qpdb->tree, name, (void **)&node, NULL); 2704 if (result != ISC_R_SUCCESS) { 2705 node = new_qpcnode(qpdb, name); 2706 result = dns_qp_insert(qpdb->tree, node, 0); 2707 INSIST(result == ISC_R_SUCCESS); 2708 qpcnode_unref(node); 2709 } 2710 } 2711 2712 reactivate_node(qpdb, node, tlocktype DNS__DB_FLARG_PASS); 2713 2714 *nodep = (dns_dbnode_t *)node; 2715 unlock: 2716 TREE_UNLOCK(&qpdb->tree_lock, &tlocktype); 2717 2718 return result; 2719 } 2720 2721 static void 2722 attachnode(dns_db_t *db, dns_dbnode_t *source, 2723 dns_dbnode_t **targetp DNS__DB_FLARG) { 2724 REQUIRE(VALID_QPDB((qpcache_t *)db)); 2725 REQUIRE(targetp != NULL && *targetp == NULL); 2726 2727 qpcache_t *qpdb = (qpcache_t *)db; 2728 qpcnode_t *node = (qpcnode_t *)source; 2729 2730 newref(qpdb, node, isc_rwlocktype_none, 2731 isc_rwlocktype_none DNS__DB_FLARG_PASS); 2732 2733 *targetp = source; 2734 } 2735 2736 static void 2737 detachnode(dns_db_t *db, dns_dbnode_t **targetp DNS__DB_FLARG) { 2738 qpcache_t *qpdb = (qpcache_t *)db; 2739 qpcnode_t *node = NULL; 2740 bool want_free = false; 2741 bool inactive = false; 2742 db_nodelock_t *nodelock = NULL; 2743 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 2744 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 2745 2746 REQUIRE(VALID_QPDB(qpdb)); 2747 REQUIRE(targetp != NULL && *targetp != NULL); 2748 2749 node = (qpcnode_t *)(*targetp); 2750 nodelock = &qpdb->node_locks[node->locknum]; 2751 2752 NODE_RDLOCK(&nodelock->lock, &nlocktype); 2753 2754 if (decref(qpdb, node, &nlocktype, &tlocktype, true DNS__DB_FLARG_PASS)) 2755 { 2756 if (isc_refcount_current(&nodelock->references) == 0 && 2757 nodelock->exiting) 2758 { 2759 inactive = true; 2760 } 2761 } 2762 2763 NODE_UNLOCK(&nodelock->lock, &nlocktype); 2764 INSIST(tlocktype == isc_rwlocktype_none); 2765 2766 *targetp = NULL; 2767 2768 if (inactive) { 2769 RWLOCK(&qpdb->lock, isc_rwlocktype_write); 2770 qpdb->active--; 2771 if (qpdb->active == 0) { 2772 want_free = true; 2773 } 2774 RWUNLOCK(&qpdb->lock, isc_rwlocktype_write); 2775 if (want_free) { 2776 char buf[DNS_NAME_FORMATSIZE]; 2777 if (dns_name_dynamic(&qpdb->common.origin)) { 2778 dns_name_format(&qpdb->common.origin, buf, 2779 sizeof(buf)); 2780 } else { 2781 strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 2782 } 2783 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 2784 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 2785 "calling free_qpdb(%s)", buf); 2786 free_qpdb(qpdb, true); 2787 } 2788 } 2789 } 2790 2791 static isc_result_t 2792 createiterator(dns_db_t *db, unsigned int options ISC_ATTR_UNUSED, 2793 dns_dbiterator_t **iteratorp) { 2794 qpcache_t *qpdb = (qpcache_t *)db; 2795 qpc_dbit_t *qpdbiter = NULL; 2796 2797 REQUIRE(VALID_QPDB(qpdb)); 2798 2799 qpdbiter = isc_mem_get(qpdb->common.mctx, sizeof(*qpdbiter)); 2800 *qpdbiter = (qpc_dbit_t){ 2801 .common.methods = &dbiterator_methods, 2802 .common.magic = DNS_DBITERATOR_MAGIC, 2803 .paused = true, 2804 }; 2805 2806 qpdbiter->name = dns_fixedname_initname(&qpdbiter->fixed); 2807 dns_db_attach(db, &qpdbiter->common.db); 2808 dns_qpiter_init(qpdb->tree, &qpdbiter->iter); 2809 2810 *iteratorp = (dns_dbiterator_t *)qpdbiter; 2811 return ISC_R_SUCCESS; 2812 } 2813 2814 static isc_result_t 2815 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 2816 unsigned int options, isc_stdtime_t now, 2817 dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) { 2818 qpcache_t *qpdb = (qpcache_t *)db; 2819 qpcnode_t *qpnode = (qpcnode_t *)node; 2820 qpc_rditer_t *iterator = NULL; 2821 2822 REQUIRE(VALID_QPDB(qpdb)); 2823 2824 UNUSED(version); 2825 2826 iterator = isc_mem_get(qpdb->common.mctx, sizeof(*iterator)); 2827 2828 if (now == 0) { 2829 now = isc_stdtime_now(); 2830 } 2831 2832 iterator->common.magic = DNS_RDATASETITER_MAGIC; 2833 iterator->common.methods = &rdatasetiter_methods; 2834 iterator->common.db = db; 2835 iterator->common.node = node; 2836 iterator->common.version = NULL; 2837 iterator->common.options = options; 2838 iterator->common.now = now; 2839 iterator->current = NULL; 2840 2841 newref(qpdb, qpnode, isc_rwlocktype_none, 2842 isc_rwlocktype_none DNS__DB_FLARG_PASS); 2843 2844 *iteratorp = (dns_rdatasetiter_t *)iterator; 2845 2846 return ISC_R_SUCCESS; 2847 } 2848 2849 static bool 2850 overmaxtype(qpcache_t *qpdb, uint32_t ntypes) { 2851 if (qpdb->maxtypepername == 0) { 2852 return false; 2853 } 2854 2855 return ntypes >= qpdb->maxtypepername; 2856 } 2857 2858 static bool 2859 prio_header(dns_slabheader_t *header) { 2860 if (NEGATIVE(header) && prio_type(DNS_TYPEPAIR_COVERS(header->type))) { 2861 return true; 2862 } 2863 2864 return prio_type(header->type); 2865 } 2866 2867 static isc_result_t 2868 add(qpcache_t *qpdb, qpcnode_t *qpnode, 2869 const dns_name_t *nodename ISC_ATTR_UNUSED, dns_slabheader_t *newheader, 2870 unsigned int options, bool loading, dns_rdataset_t *addedrdataset, 2871 isc_stdtime_t now, isc_rwlocktype_t nlocktype, 2872 isc_rwlocktype_t tlocktype DNS__DB_FLARG) { 2873 dns_slabheader_t *topheader = NULL, *topheader_prev = NULL; 2874 dns_slabheader_t *header = NULL, *sigheader = NULL; 2875 dns_slabheader_t *prioheader = NULL, *expireheader = NULL; 2876 bool header_nx; 2877 bool newheader_nx; 2878 dns_typepair_t negtype = 0; 2879 dns_trust_t trust; 2880 int idx; 2881 uint32_t ntypes = 0; 2882 2883 if ((options & DNS_DBADD_FORCE) != 0) { 2884 trust = dns_trust_ultimate; 2885 } else { 2886 trust = newheader->trust; 2887 } 2888 2889 newheader_nx = NONEXISTENT(newheader) ? true : false; 2890 2891 if (!newheader_nx) { 2892 dns_rdatatype_t rdtype = DNS_TYPEPAIR_TYPE(newheader->type); 2893 dns_rdatatype_t covers = DNS_TYPEPAIR_COVERS(newheader->type); 2894 dns_typepair_t sigtype = DNS_SIGTYPE(covers); 2895 if (NEGATIVE(newheader)) { 2896 /* 2897 * We're adding a negative cache entry. 2898 */ 2899 if (covers == dns_rdatatype_any) { 2900 /* 2901 * If we're adding an negative cache entry 2902 * which covers all types (NXDOMAIN, 2903 * NODATA(QTYPE=ANY)), 2904 * 2905 * We make all other data ancient so that the 2906 * only rdataset that can be found at this 2907 * node is the negative cache entry. 2908 */ 2909 for (topheader = qpnode->data; 2910 topheader != NULL; 2911 topheader = topheader->next) 2912 { 2913 mark_ancient(topheader); 2914 } 2915 goto find_header; 2916 } 2917 /* 2918 * Otherwise look for any RRSIGs of the given 2919 * type so they can be marked ancient later. 2920 */ 2921 for (topheader = qpnode->data; topheader != NULL; 2922 topheader = topheader->next) 2923 { 2924 if (topheader->type == sigtype) { 2925 sigheader = topheader; 2926 break; 2927 } 2928 } 2929 negtype = DNS_TYPEPAIR_VALUE(covers, 0); 2930 } else { 2931 /* 2932 * We're adding something that isn't a 2933 * negative cache entry. Look for an extant 2934 * non-ancient NXDOMAIN/NODATA(QTYPE=ANY) negative 2935 * cache entry. If we're adding an RRSIG, also 2936 * check for an extant non-ancient NODATA ncache 2937 * entry which covers the same type as the RRSIG. 2938 */ 2939 for (topheader = qpnode->data; topheader != NULL; 2940 topheader = topheader->next) 2941 { 2942 if ((topheader->type == RDATATYPE_NCACHEANY) || 2943 (newheader->type == sigtype && 2944 topheader->type == 2945 DNS_TYPEPAIR_VALUE(0, covers))) 2946 { 2947 break; 2948 } 2949 } 2950 if (topheader != NULL && EXISTS(topheader) && 2951 ACTIVE(topheader, now)) 2952 { 2953 /* 2954 * Found one. 2955 */ 2956 if (trust < topheader->trust) { 2957 /* 2958 * The NXDOMAIN/NODATA(QTYPE=ANY) 2959 * is more trusted. 2960 */ 2961 dns_slabheader_destroy(&newheader); 2962 if (addedrdataset != NULL) { 2963 bindrdataset( 2964 qpdb, qpnode, topheader, 2965 now, nlocktype, 2966 tlocktype, 2967 addedrdataset 2968 DNS__DB_FLARG_PASS); 2969 } 2970 return DNS_R_UNCHANGED; 2971 } 2972 /* 2973 * The new rdataset is better. Expire the 2974 * ncache entry. 2975 */ 2976 mark_ancient(topheader); 2977 topheader = NULL; 2978 goto find_header; 2979 } 2980 negtype = DNS_TYPEPAIR_VALUE(0, rdtype); 2981 } 2982 } 2983 2984 for (topheader = qpnode->data; topheader != NULL; 2985 topheader = topheader->next) 2986 { 2987 if (ACTIVE(topheader, now)) { 2988 ++ntypes; 2989 expireheader = topheader; 2990 } 2991 if (prio_header(topheader)) { 2992 prioheader = topheader; 2993 } 2994 2995 if (topheader->type == newheader->type || 2996 topheader->type == negtype) 2997 { 2998 break; 2999 } 3000 topheader_prev = topheader; 3001 } 3002 3003 find_header: 3004 /* 3005 * If header isn't NULL, we've found the right type. There may be 3006 * IGNORE rdatasets between the top of the chain and the first real 3007 * data. We skip over them. 3008 */ 3009 header = topheader; 3010 while (header != NULL && IGNORE(header)) { 3011 header = header->down; 3012 } 3013 if (header != NULL) { 3014 header_nx = NONEXISTENT(header) ? true : false; 3015 3016 /* 3017 * Deleting an already non-existent rdataset has no effect. 3018 */ 3019 if (header_nx && newheader_nx) { 3020 dns_slabheader_destroy(&newheader); 3021 return DNS_R_UNCHANGED; 3022 } 3023 3024 /* 3025 * Trying to add an rdataset with lower trust to a cache 3026 * DB has no effect, provided that the cache data isn't 3027 * stale. If the cache data is stale, new lower trust 3028 * data will supersede it below. Unclear what the best 3029 * policy is here. 3030 */ 3031 if (trust < header->trust && (ACTIVE(header, now) || header_nx)) 3032 { 3033 dns_slabheader_destroy(&newheader); 3034 if (addedrdataset != NULL) { 3035 bindrdataset(qpdb, qpnode, header, now, 3036 nlocktype, tlocktype, 3037 addedrdataset DNS__DB_FLARG_PASS); 3038 } 3039 return DNS_R_UNCHANGED; 3040 } 3041 3042 /* 3043 * Don't replace existing NS, A and AAAA RRsets in the 3044 * cache if they are already exist. This prevents named 3045 * being locked to old servers. Don't lower trust of 3046 * existing record if the update is forced. Nothing 3047 * special to be done w.r.t stale data; it gets replaced 3048 * normally further down. 3049 */ 3050 if (ACTIVE(header, now) && header->type == dns_rdatatype_ns && 3051 !header_nx && !newheader_nx && 3052 header->trust >= newheader->trust && 3053 dns_rdataslab_equalx((unsigned char *)header, 3054 (unsigned char *)newheader, 3055 (unsigned int)(sizeof(*newheader)), 3056 qpdb->common.rdclass, 3057 (dns_rdatatype_t)header->type)) 3058 { 3059 /* 3060 * Honour the new ttl if it is less than the 3061 * older one. 3062 */ 3063 if (header->ttl > newheader->ttl) { 3064 setttl(header, newheader->ttl); 3065 } 3066 if (header->last_used != now) { 3067 ISC_LIST_UNLINK( 3068 qpdb->lru[HEADERNODE(header)->locknum], 3069 header, link); 3070 header->last_used = now; 3071 ISC_LIST_PREPEND( 3072 qpdb->lru[HEADERNODE(header)->locknum], 3073 header, link); 3074 } 3075 if (header->noqname == NULL && 3076 newheader->noqname != NULL) 3077 { 3078 header->noqname = newheader->noqname; 3079 newheader->noqname = NULL; 3080 } 3081 if (header->closest == NULL && 3082 newheader->closest != NULL) 3083 { 3084 header->closest = newheader->closest; 3085 newheader->closest = NULL; 3086 } 3087 dns_slabheader_destroy(&newheader); 3088 if (addedrdataset != NULL) { 3089 bindrdataset(qpdb, qpnode, header, now, 3090 nlocktype, tlocktype, 3091 addedrdataset DNS__DB_FLARG_PASS); 3092 } 3093 return ISC_R_SUCCESS; 3094 } 3095 3096 /* 3097 * If we have will be replacing a NS RRset force its TTL 3098 * to be no more than the current NS RRset's TTL. This 3099 * ensures the delegations that are withdrawn are honoured. 3100 */ 3101 if (ACTIVE(header, now) && header->type == dns_rdatatype_ns && 3102 !header_nx && !newheader_nx && 3103 header->trust <= newheader->trust) 3104 { 3105 if (newheader->ttl > header->ttl) { 3106 newheader->ttl = header->ttl; 3107 } 3108 } 3109 if (ACTIVE(header, now) && 3110 (options & DNS_DBADD_PREFETCH) == 0 && 3111 (header->type == dns_rdatatype_a || 3112 header->type == dns_rdatatype_aaaa || 3113 header->type == dns_rdatatype_ds || 3114 header->type == DNS_SIGTYPE(dns_rdatatype_ds)) && 3115 !header_nx && !newheader_nx && 3116 header->trust >= newheader->trust && 3117 dns_rdataslab_equal((unsigned char *)header, 3118 (unsigned char *)newheader, 3119 (unsigned int)(sizeof(*newheader)))) 3120 { 3121 /* 3122 * Honour the new ttl if it is less than the 3123 * older one. 3124 */ 3125 if (header->ttl > newheader->ttl) { 3126 setttl(header, newheader->ttl); 3127 } 3128 if (header->last_used != now) { 3129 ISC_LIST_UNLINK( 3130 qpdb->lru[HEADERNODE(header)->locknum], 3131 header, link); 3132 header->last_used = now; 3133 ISC_LIST_PREPEND( 3134 qpdb->lru[HEADERNODE(header)->locknum], 3135 header, link); 3136 } 3137 if (header->noqname == NULL && 3138 newheader->noqname != NULL) 3139 { 3140 header->noqname = newheader->noqname; 3141 newheader->noqname = NULL; 3142 } 3143 if (header->closest == NULL && 3144 newheader->closest != NULL) 3145 { 3146 header->closest = newheader->closest; 3147 newheader->closest = NULL; 3148 } 3149 dns_slabheader_destroy(&newheader); 3150 if (addedrdataset != NULL) { 3151 bindrdataset(qpdb, qpnode, header, now, 3152 nlocktype, tlocktype, 3153 addedrdataset DNS__DB_FLARG_PASS); 3154 } 3155 return ISC_R_SUCCESS; 3156 } 3157 3158 if (loading) { 3159 newheader->down = NULL; 3160 idx = HEADERNODE(newheader)->locknum; 3161 if (ZEROTTL(newheader)) { 3162 newheader->last_used = qpdb->last_used + 1; 3163 ISC_LIST_APPEND(qpdb->lru[idx], newheader, 3164 link); 3165 } else { 3166 ISC_LIST_PREPEND(qpdb->lru[idx], newheader, 3167 link); 3168 } 3169 INSIST(qpdb->heaps != NULL); 3170 isc_heap_insert(qpdb->heaps[idx], newheader); 3171 newheader->heap = qpdb->heaps[idx]; 3172 3173 /* 3174 * There are no other references to 'header' when 3175 * loading, so we MAY clean up 'header' now. 3176 * Since we don't generate changed records when 3177 * loading, we MUST clean up 'header' now. 3178 */ 3179 if (topheader_prev != NULL) { 3180 topheader_prev->next = newheader; 3181 } else { 3182 qpnode->data = newheader; 3183 } 3184 newheader->next = topheader->next; 3185 dns_slabheader_destroy(&header); 3186 } else { 3187 idx = HEADERNODE(newheader)->locknum; 3188 INSIST(qpdb->heaps != NULL); 3189 isc_heap_insert(qpdb->heaps[idx], newheader); 3190 newheader->heap = qpdb->heaps[idx]; 3191 if (ZEROTTL(newheader)) { 3192 newheader->last_used = qpdb->last_used + 1; 3193 ISC_LIST_APPEND(qpdb->lru[idx], newheader, 3194 link); 3195 } else { 3196 ISC_LIST_PREPEND(qpdb->lru[idx], newheader, 3197 link); 3198 } 3199 if (topheader_prev != NULL) { 3200 topheader_prev->next = newheader; 3201 } else { 3202 qpnode->data = newheader; 3203 } 3204 newheader->next = topheader->next; 3205 newheader->down = topheader; 3206 topheader->next = newheader; 3207 qpnode->dirty = 1; 3208 mark_ancient(header); 3209 if (sigheader != NULL) { 3210 mark_ancient(sigheader); 3211 } 3212 } 3213 } else { 3214 /* 3215 * No non-IGNORED rdatasets of the given type exist at 3216 * this node. 3217 */ 3218 3219 /* 3220 * If we're trying to delete the type, don't bother. 3221 */ 3222 if (newheader_nx) { 3223 dns_slabheader_destroy(&newheader); 3224 return DNS_R_UNCHANGED; 3225 } 3226 3227 idx = HEADERNODE(newheader)->locknum; 3228 isc_heap_insert(qpdb->heaps[idx], newheader); 3229 newheader->heap = qpdb->heaps[idx]; 3230 if (ZEROTTL(newheader)) { 3231 ISC_LIST_APPEND(qpdb->lru[idx], newheader, link); 3232 } else { 3233 ISC_LIST_PREPEND(qpdb->lru[idx], newheader, link); 3234 } 3235 3236 if (topheader != NULL) { 3237 /* 3238 * We have a list of rdatasets of the given type, 3239 * but they're all marked IGNORE. We simply insert 3240 * the new rdataset at the head of the list. 3241 * 3242 * Ignored rdatasets cannot occur during loading, so 3243 * we INSIST on it. 3244 */ 3245 INSIST(!loading); 3246 if (topheader_prev != NULL) { 3247 topheader_prev->next = newheader; 3248 } else { 3249 qpnode->data = newheader; 3250 } 3251 newheader->next = topheader->next; 3252 newheader->down = topheader; 3253 topheader->next = newheader; 3254 qpnode->dirty = 1; 3255 } else { 3256 /* 3257 * No rdatasets of the given type exist at the node. 3258 */ 3259 INSIST(newheader->down == NULL); 3260 3261 if (prio_header(newheader)) { 3262 /* This is a priority type, prepend it */ 3263 newheader->next = qpnode->data; 3264 qpnode->data = newheader; 3265 } else if (prioheader != NULL) { 3266 /* Append after the priority headers */ 3267 newheader->next = prioheader->next; 3268 prioheader->next = newheader; 3269 } else { 3270 /* There were no priority headers */ 3271 newheader->next = qpnode->data; 3272 qpnode->data = newheader; 3273 } 3274 3275 if (overmaxtype(qpdb, ntypes)) { 3276 if (expireheader == NULL) { 3277 expireheader = newheader; 3278 } 3279 if (NEGATIVE(newheader) && 3280 !prio_header(newheader)) 3281 { 3282 /* 3283 * Add the new non-priority negative 3284 * header to the database only 3285 * temporarily. 3286 */ 3287 expireheader = newheader; 3288 } 3289 3290 mark_ancient(expireheader); 3291 /* 3292 * FIXME: In theory, we should mark the RRSIG 3293 * and the header at the same time, but there is 3294 * no direct link between those two header, so 3295 * we would have to check the whole list again. 3296 */ 3297 } 3298 } 3299 } 3300 3301 if (addedrdataset != NULL) { 3302 bindrdataset(qpdb, qpnode, newheader, now, nlocktype, tlocktype, 3303 addedrdataset DNS__DB_FLARG_PASS); 3304 } 3305 3306 return ISC_R_SUCCESS; 3307 } 3308 3309 static isc_result_t 3310 addnoqname(isc_mem_t *mctx, dns_slabheader_t *newheader, uint32_t maxrrperset, 3311 dns_rdataset_t *rdataset) { 3312 isc_result_t result; 3313 dns_slabheader_proof_t *noqname = NULL; 3314 dns_name_t name = DNS_NAME_INITEMPTY; 3315 dns_rdataset_t neg = DNS_RDATASET_INIT, negsig = DNS_RDATASET_INIT; 3316 isc_region_t r1, r2; 3317 3318 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig); 3319 RUNTIME_CHECK(result == ISC_R_SUCCESS); 3320 3321 result = dns_rdataslab_fromrdataset(&neg, mctx, &r1, 0, maxrrperset); 3322 if (result != ISC_R_SUCCESS) { 3323 goto cleanup; 3324 } 3325 3326 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r2, 0, maxrrperset); 3327 if (result != ISC_R_SUCCESS) { 3328 goto cleanup; 3329 } 3330 3331 noqname = isc_mem_get(mctx, sizeof(*noqname)); 3332 *noqname = (dns_slabheader_proof_t){ 3333 .neg = r1.base, 3334 .negsig = r2.base, 3335 .type = neg.type, 3336 .name = DNS_NAME_INITEMPTY, 3337 }; 3338 dns_name_dup(&name, mctx, &noqname->name); 3339 newheader->noqname = noqname; 3340 3341 cleanup: 3342 dns_rdataset_disassociate(&neg); 3343 dns_rdataset_disassociate(&negsig); 3344 3345 return result; 3346 } 3347 3348 static isc_result_t 3349 addclosest(isc_mem_t *mctx, dns_slabheader_t *newheader, uint32_t maxrrperset, 3350 dns_rdataset_t *rdataset) { 3351 isc_result_t result; 3352 dns_slabheader_proof_t *closest = NULL; 3353 dns_name_t name = DNS_NAME_INITEMPTY; 3354 dns_rdataset_t neg = DNS_RDATASET_INIT, negsig = DNS_RDATASET_INIT; 3355 isc_region_t r1, r2; 3356 3357 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig); 3358 RUNTIME_CHECK(result == ISC_R_SUCCESS); 3359 3360 result = dns_rdataslab_fromrdataset(&neg, mctx, &r1, 0, maxrrperset); 3361 if (result != ISC_R_SUCCESS) { 3362 goto cleanup; 3363 } 3364 3365 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r2, 0, maxrrperset); 3366 if (result != ISC_R_SUCCESS) { 3367 goto cleanup; 3368 } 3369 3370 closest = isc_mem_get(mctx, sizeof(*closest)); 3371 *closest = (dns_slabheader_proof_t){ 3372 .neg = r1.base, 3373 .negsig = r2.base, 3374 .name = DNS_NAME_INITEMPTY, 3375 .type = neg.type, 3376 }; 3377 dns_name_dup(&name, mctx, &closest->name); 3378 newheader->closest = closest; 3379 3380 cleanup: 3381 dns_rdataset_disassociate(&neg); 3382 dns_rdataset_disassociate(&negsig); 3383 return result; 3384 } 3385 3386 static void 3387 expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum, 3388 isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep, 3389 isc_stdtime_t now, bool cache_is_overmem DNS__DB_FLARG); 3390 3391 static isc_result_t 3392 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 3393 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options, 3394 dns_rdataset_t *addedrdataset DNS__DB_FLARG) { 3395 qpcache_t *qpdb = (qpcache_t *)db; 3396 qpcnode_t *qpnode = (qpcnode_t *)node; 3397 isc_region_t region; 3398 dns_slabheader_t *newheader = NULL; 3399 isc_result_t result; 3400 bool delegating = false; 3401 bool newnsec; 3402 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 3403 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 3404 bool cache_is_overmem = false; 3405 dns_fixedname_t fixed; 3406 dns_name_t *name = NULL; 3407 3408 REQUIRE(VALID_QPDB(qpdb)); 3409 REQUIRE(version == NULL); 3410 3411 if (now == 0) { 3412 now = isc_stdtime_now(); 3413 } 3414 3415 result = dns_rdataslab_fromrdataset(rdataset, qpdb->common.mctx, 3416 ®ion, sizeof(dns_slabheader_t), 3417 qpdb->maxrrperset); 3418 if (result != ISC_R_SUCCESS) { 3419 if (result == DNS_R_TOOMANYRECORDS) { 3420 dns__db_logtoomanyrecords((dns_db_t *)qpdb, 3421 &qpnode->name, rdataset->type, 3422 "adding", qpdb->maxrrperset); 3423 } 3424 return result; 3425 } 3426 3427 name = dns_fixedname_initname(&fixed); 3428 dns_name_copy(&qpnode->name, name); 3429 dns_rdataset_getownercase(rdataset, name); 3430 3431 newheader = (dns_slabheader_t *)region.base; 3432 *newheader = (dns_slabheader_t){ 3433 .type = DNS_TYPEPAIR_VALUE(rdataset->type, rdataset->covers), 3434 .trust = rdataset->trust, 3435 .last_used = now, 3436 .node = qpnode, 3437 }; 3438 3439 dns_slabheader_reset(newheader, db, node); 3440 setttl(newheader, rdataset->ttl + now); 3441 if (rdataset->ttl == 0U) { 3442 DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_ZEROTTL); 3443 } 3444 atomic_init(&newheader->count, 3445 atomic_fetch_add_relaxed(&init_count, 1)); 3446 if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0) { 3447 DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_PREFETCH); 3448 } 3449 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0) { 3450 DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_NEGATIVE); 3451 } 3452 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0) { 3453 DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_NXDOMAIN); 3454 } 3455 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0) { 3456 DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_OPTOUT); 3457 } 3458 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) { 3459 result = addnoqname(qpdb->common.mctx, newheader, 3460 qpdb->maxrrperset, rdataset); 3461 if (result != ISC_R_SUCCESS) { 3462 dns_slabheader_destroy(&newheader); 3463 return result; 3464 } 3465 } 3466 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) { 3467 result = addclosest(qpdb->common.mctx, newheader, 3468 qpdb->maxrrperset, rdataset); 3469 if (result != ISC_R_SUCCESS) { 3470 dns_slabheader_destroy(&newheader); 3471 return result; 3472 } 3473 } 3474 3475 /* 3476 * If we're adding a delegation type (which would be an NS or DNAME 3477 * for a zone, but only DNAME counts for a cache), we need to set 3478 * the callback bit on the node. 3479 */ 3480 if (rdataset->type == dns_rdatatype_dname) { 3481 delegating = true; 3482 } 3483 3484 /* 3485 * Add to the auxiliary NSEC tree if we're adding an NSEC record. 3486 */ 3487 TREE_RDLOCK(&qpdb->tree_lock, &tlocktype); 3488 if (qpnode->nsec != DNS_DB_NSEC_HAS_NSEC && 3489 rdataset->type == dns_rdatatype_nsec) 3490 { 3491 newnsec = true; 3492 } else { 3493 newnsec = false; 3494 } 3495 TREE_UNLOCK(&qpdb->tree_lock, &tlocktype); 3496 3497 /* 3498 * If we're adding a delegation type, adding to the auxiliary NSEC 3499 * tree, or the DB is a cache in an overmem state, hold an 3500 * exclusive lock on the tree. In the latter case the lock does 3501 * not necessarily have to be acquired but it will help purge 3502 * ancient entries more effectively. 3503 */ 3504 if (isc_mem_isovermem(qpdb->common.mctx)) { 3505 cache_is_overmem = true; 3506 } 3507 if (delegating || newnsec || cache_is_overmem) { 3508 TREE_WRLOCK(&qpdb->tree_lock, &tlocktype); 3509 } 3510 3511 if (cache_is_overmem) { 3512 overmem(qpdb, newheader, &tlocktype DNS__DB_FLARG_PASS); 3513 } 3514 3515 NODE_WRLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 3516 3517 if (qpdb->rrsetstats != NULL) { 3518 DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_STATCOUNT); 3519 update_rrsetstats(qpdb->rrsetstats, newheader->type, 3520 atomic_load_acquire(&newheader->attributes), 3521 true); 3522 } 3523 3524 expire_ttl_headers(qpdb, qpnode->locknum, &nlocktype, &tlocktype, now, 3525 cache_is_overmem DNS__DB_FLARG_PASS); 3526 3527 /* 3528 * If we've been holding a write lock on the tree just for 3529 * cleaning, we can release it now. However, we still need the 3530 * node lock. 3531 */ 3532 if (tlocktype == isc_rwlocktype_write && !delegating && !newnsec) { 3533 TREE_UNLOCK(&qpdb->tree_lock, &tlocktype); 3534 } 3535 3536 result = ISC_R_SUCCESS; 3537 if (newnsec) { 3538 qpcnode_t *nsecnode = NULL; 3539 3540 result = dns_qp_getname(qpdb->nsec, name, (void **)&nsecnode, 3541 NULL); 3542 if (result == ISC_R_SUCCESS) { 3543 result = ISC_R_SUCCESS; 3544 } else { 3545 INSIST(nsecnode == NULL); 3546 nsecnode = new_qpcnode(qpdb, name); 3547 nsecnode->nsec = DNS_DB_NSEC_NSEC; 3548 result = dns_qp_insert(qpdb->nsec, nsecnode, 0); 3549 INSIST(result == ISC_R_SUCCESS); 3550 qpcnode_detach(&nsecnode); 3551 } 3552 qpnode->nsec = DNS_DB_NSEC_HAS_NSEC; 3553 } 3554 3555 if (result == ISC_R_SUCCESS) { 3556 result = add(qpdb, qpnode, name, newheader, options, false, 3557 addedrdataset, now, nlocktype, 3558 tlocktype DNS__DB_FLARG_PASS); 3559 } 3560 if (result == ISC_R_SUCCESS && delegating) { 3561 qpnode->delegating = 1; 3562 } 3563 3564 NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 3565 3566 if (tlocktype != isc_rwlocktype_none) { 3567 TREE_UNLOCK(&qpdb->tree_lock, &tlocktype); 3568 } 3569 INSIST(tlocktype == isc_rwlocktype_none); 3570 3571 return result; 3572 } 3573 3574 static isc_result_t 3575 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 3576 dns_rdatatype_t type, dns_rdatatype_t covers DNS__DB_FLARG) { 3577 qpcache_t *qpdb = (qpcache_t *)db; 3578 qpcnode_t *qpnode = (qpcnode_t *)node; 3579 isc_result_t result; 3580 dns_slabheader_t *newheader = NULL; 3581 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 3582 3583 REQUIRE(VALID_QPDB(qpdb)); 3584 REQUIRE(version == NULL); 3585 3586 if (type == dns_rdatatype_any) { 3587 return ISC_R_NOTIMPLEMENTED; 3588 } 3589 if (type == dns_rdatatype_rrsig && covers == 0) { 3590 return ISC_R_NOTIMPLEMENTED; 3591 } 3592 3593 newheader = dns_slabheader_new(db, node); 3594 newheader->type = DNS_TYPEPAIR_VALUE(type, covers); 3595 setttl(newheader, 0); 3596 atomic_init(&newheader->attributes, DNS_SLABHEADERATTR_NONEXISTENT); 3597 3598 NODE_WRLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 3599 result = add(qpdb, qpnode, NULL, newheader, DNS_DBADD_FORCE, false, 3600 NULL, 0, nlocktype, 3601 isc_rwlocktype_none DNS__DB_FLARG_PASS); 3602 NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 3603 3604 return result; 3605 } 3606 3607 static unsigned int 3608 nodecount(dns_db_t *db, dns_dbtree_t tree) { 3609 qpcache_t *qpdb = (qpcache_t *)db; 3610 dns_qp_memusage_t mu; 3611 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 3612 3613 REQUIRE(VALID_QPDB(qpdb)); 3614 3615 TREE_RDLOCK(&qpdb->tree_lock, &tlocktype); 3616 switch (tree) { 3617 case dns_dbtree_main: 3618 mu = dns_qp_memusage(qpdb->tree); 3619 break; 3620 case dns_dbtree_nsec: 3621 mu = dns_qp_memusage(qpdb->nsec); 3622 break; 3623 default: 3624 UNREACHABLE(); 3625 } 3626 TREE_UNLOCK(&qpdb->tree_lock, &tlocktype); 3627 3628 return mu.leaves; 3629 } 3630 3631 static isc_result_t 3632 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) { 3633 qpcache_t *qpdb = (qpcache_t *)db; 3634 qpcnode_t *onode = NULL; 3635 isc_result_t result = ISC_R_SUCCESS; 3636 3637 REQUIRE(VALID_QPDB(qpdb)); 3638 REQUIRE(nodep != NULL && *nodep == NULL); 3639 3640 /* Note that the access to origin_node doesn't require a DB lock */ 3641 onode = (qpcnode_t *)qpdb->origin_node; 3642 if (onode != NULL) { 3643 newref(qpdb, onode, isc_rwlocktype_none, 3644 isc_rwlocktype_none DNS__DB_FLARG_PASS); 3645 *nodep = qpdb->origin_node; 3646 } else { 3647 result = ISC_R_NOTFOUND; 3648 } 3649 3650 return result; 3651 } 3652 3653 static void 3654 locknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) { 3655 qpcache_t *qpdb = (qpcache_t *)db; 3656 qpcnode_t *qpnode = (qpcnode_t *)node; 3657 3658 RWLOCK(&qpdb->node_locks[qpnode->locknum].lock, type); 3659 } 3660 3661 static void 3662 unlocknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) { 3663 qpcache_t *qpdb = (qpcache_t *)db; 3664 qpcnode_t *qpnode = (qpcnode_t *)node; 3665 3666 RWUNLOCK(&qpdb->node_locks[qpnode->locknum].lock, type); 3667 } 3668 3669 isc_result_t 3670 dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin, 3671 dns_dbtype_t type, dns_rdataclass_t rdclass, 3672 unsigned int argc, char *argv[], 3673 void *driverarg ISC_ATTR_UNUSED, dns_db_t **dbp) { 3674 qpcache_t *qpdb = NULL; 3675 isc_mem_t *hmctx = mctx; 3676 isc_loop_t *loop = isc_loop(); 3677 int i; 3678 3679 /* This database implementation only supports cache semantics */ 3680 REQUIRE(type == dns_dbtype_cache); 3681 REQUIRE(loop != NULL); 3682 3683 qpdb = isc_mem_get(mctx, sizeof(*qpdb)); 3684 *qpdb = (qpcache_t){ 3685 .common.methods = &qpdb_cachemethods, 3686 .common.origin = DNS_NAME_INITEMPTY, 3687 .common.rdclass = rdclass, 3688 .common.attributes = DNS_DBATTR_CACHE, 3689 .loopmgr = isc_loop_getloopmgr(loop), 3690 }; 3691 3692 isc_refcount_init(&qpdb->common.references, 1); 3693 3694 /* 3695 * If argv[0] exists, it points to a memory context to use for heap 3696 */ 3697 if (argc != 0) { 3698 hmctx = (isc_mem_t *)argv[0]; 3699 } 3700 3701 isc_rwlock_init(&qpdb->lock); 3702 TREE_INITLOCK(&qpdb->tree_lock); 3703 3704 qpdb->node_lock_count = isc_loopmgr_nloops(qpdb->loopmgr); 3705 qpdb->node_locks = isc_mem_cget(mctx, qpdb->node_lock_count, 3706 sizeof(db_nodelock_t)); 3707 3708 dns_rdatasetstats_create(mctx, &qpdb->rrsetstats); 3709 qpdb->lru = isc_mem_cget(mctx, qpdb->node_lock_count, 3710 sizeof(dns_slabheaderlist_t)); 3711 for (i = 0; i < (int)qpdb->node_lock_count; i++) { 3712 ISC_LIST_INIT(qpdb->lru[i]); 3713 } 3714 3715 /* 3716 * Create the heaps. 3717 */ 3718 qpdb->heaps = isc_mem_cget(hmctx, qpdb->node_lock_count, 3719 sizeof(isc_heap_t *)); 3720 for (i = 0; i < (int)qpdb->node_lock_count; i++) { 3721 isc_heap_create(hmctx, ttl_sooner, set_index, 0, 3722 &qpdb->heaps[i]); 3723 } 3724 3725 /* 3726 * Create deadnode lists. 3727 */ 3728 qpdb->deadnodes = isc_mem_cget(mctx, qpdb->node_lock_count, 3729 sizeof(qpdb->deadnodes[0])); 3730 for (i = 0; i < (int)(qpdb->node_lock_count); i++) { 3731 isc_queue_init(&qpdb->deadnodes[i]); 3732 } 3733 3734 qpdb->active = qpdb->node_lock_count; 3735 3736 for (i = 0; i < (int)(qpdb->node_lock_count); i++) { 3737 NODE_INITLOCK(&qpdb->node_locks[i].lock); 3738 isc_refcount_init(&qpdb->node_locks[i].references, 0); 3739 qpdb->node_locks[i].exiting = false; 3740 } 3741 3742 /* 3743 * Attach to the mctx. The database will persist so long as there 3744 * are references to it, and attaching to the mctx ensures that our 3745 * mctx won't disappear out from under us. 3746 */ 3747 isc_mem_attach(mctx, &qpdb->common.mctx); 3748 isc_mem_attach(hmctx, &qpdb->hmctx); 3749 3750 /* 3751 * Make a copy of the origin name. 3752 */ 3753 dns_name_dupwithoffsets(origin, mctx, &qpdb->common.origin); 3754 3755 /* 3756 * Make the qp tries. 3757 */ 3758 dns_qp_create(mctx, &qpmethods, qpdb, &qpdb->tree); 3759 dns_qp_create(mctx, &qpmethods, qpdb, &qpdb->nsec); 3760 3761 qpdb->common.magic = DNS_DB_MAGIC; 3762 qpdb->common.impmagic = QPDB_MAGIC; 3763 3764 *dbp = (dns_db_t *)qpdb; 3765 3766 return ISC_R_SUCCESS; 3767 } 3768 3769 /* 3770 * Rdataset Iterator Methods 3771 */ 3772 3773 static void 3774 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) { 3775 qpc_rditer_t *iterator = NULL; 3776 3777 iterator = (qpc_rditer_t *)(*iteratorp); 3778 3779 dns__db_detachnode(iterator->common.db, 3780 &iterator->common.node DNS__DB_FLARG_PASS); 3781 isc_mem_put(iterator->common.db->mctx, iterator, sizeof(*iterator)); 3782 3783 *iteratorp = NULL; 3784 } 3785 3786 static bool 3787 iterator_active(qpcache_t *qpdb, qpc_rditer_t *iterator, 3788 dns_slabheader_t *header) { 3789 dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, qpdb); 3790 3791 /* 3792 * Is this a "this rdataset doesn't exist" record? 3793 */ 3794 if (NONEXISTENT(header)) { 3795 return false; 3796 } 3797 3798 /* 3799 * If this header is still active then return it. 3800 */ 3801 if (ACTIVE(header, iterator->common.now)) { 3802 return true; 3803 } 3804 3805 /* 3806 * If we are not returning stale records or the rdataset is 3807 * too old don't return it. 3808 */ 3809 if (!STALEOK(iterator) || (iterator->common.now > stale_ttl)) { 3810 return false; 3811 } 3812 return true; 3813 } 3814 3815 static isc_result_t 3816 rdatasetiter_first(dns_rdatasetiter_t *it DNS__DB_FLARG) { 3817 qpc_rditer_t *iterator = (qpc_rditer_t *)it; 3818 qpcache_t *qpdb = (qpcache_t *)(iterator->common.db); 3819 qpcnode_t *qpnode = iterator->common.node; 3820 dns_slabheader_t *header = NULL, *top_next = NULL; 3821 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 3822 3823 NODE_RDLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 3824 3825 for (header = qpnode->data; header != NULL; header = top_next) { 3826 top_next = header->next; 3827 do { 3828 if (EXPIREDOK(iterator)) { 3829 if (!NONEXISTENT(header)) { 3830 break; 3831 } 3832 header = header->down; 3833 } else if (!IGNORE(header)) { 3834 if (!iterator_active(qpdb, iterator, header)) { 3835 header = NULL; 3836 } 3837 break; 3838 } else { 3839 header = header->down; 3840 } 3841 } while (header != NULL); 3842 if (header != NULL) { 3843 break; 3844 } 3845 } 3846 3847 NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 3848 3849 iterator->current = header; 3850 3851 if (header == NULL) { 3852 return ISC_R_NOMORE; 3853 } 3854 3855 return ISC_R_SUCCESS; 3856 } 3857 3858 static isc_result_t 3859 rdatasetiter_next(dns_rdatasetiter_t *it DNS__DB_FLARG) { 3860 qpc_rditer_t *iterator = (qpc_rditer_t *)it; 3861 qpcache_t *qpdb = (qpcache_t *)(iterator->common.db); 3862 qpcnode_t *qpnode = iterator->common.node; 3863 dns_slabheader_t *header = NULL, *top_next = NULL; 3864 dns_typepair_t type, negtype; 3865 dns_rdatatype_t rdtype, covers; 3866 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 3867 bool expiredok = EXPIREDOK(iterator); 3868 3869 header = iterator->current; 3870 if (header == NULL) { 3871 return ISC_R_NOMORE; 3872 } 3873 3874 NODE_RDLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 3875 3876 type = header->type; 3877 rdtype = DNS_TYPEPAIR_TYPE(header->type); 3878 if (NEGATIVE(header)) { 3879 covers = DNS_TYPEPAIR_COVERS(header->type); 3880 negtype = DNS_TYPEPAIR_VALUE(covers, 0); 3881 } else { 3882 negtype = DNS_TYPEPAIR_VALUE(0, rdtype); 3883 } 3884 3885 /* 3886 * Find the start of the header chain for the next type 3887 * by walking back up the list. 3888 */ 3889 top_next = header->next; 3890 while (top_next != NULL && 3891 (top_next->type == type || top_next->type == negtype)) 3892 { 3893 top_next = top_next->next; 3894 } 3895 if (expiredok) { 3896 /* 3897 * Keep walking down the list if possible or 3898 * start the next type. 3899 */ 3900 header = header->down != NULL ? header->down : top_next; 3901 } else { 3902 header = top_next; 3903 } 3904 for (; header != NULL; header = top_next) { 3905 top_next = header->next; 3906 do { 3907 if (expiredok) { 3908 if (!NONEXISTENT(header)) { 3909 break; 3910 } 3911 header = header->down; 3912 } else if (!IGNORE(header)) { 3913 if (!iterator_active(qpdb, iterator, header)) { 3914 header = NULL; 3915 } 3916 break; 3917 } else { 3918 header = header->down; 3919 } 3920 } while (header != NULL); 3921 if (header != NULL) { 3922 break; 3923 } 3924 /* 3925 * Find the start of the header chain for the next type 3926 * by walking back up the list. 3927 */ 3928 while (top_next != NULL && 3929 (top_next->type == type || top_next->type == negtype)) 3930 { 3931 top_next = top_next->next; 3932 } 3933 } 3934 3935 NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 3936 3937 iterator->current = header; 3938 3939 if (header == NULL) { 3940 return ISC_R_NOMORE; 3941 } 3942 3943 return ISC_R_SUCCESS; 3944 } 3945 3946 static void 3947 rdatasetiter_current(dns_rdatasetiter_t *it, 3948 dns_rdataset_t *rdataset DNS__DB_FLARG) { 3949 qpc_rditer_t *iterator = (qpc_rditer_t *)it; 3950 qpcache_t *qpdb = (qpcache_t *)(iterator->common.db); 3951 qpcnode_t *qpnode = iterator->common.node; 3952 dns_slabheader_t *header = NULL; 3953 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 3954 3955 header = iterator->current; 3956 REQUIRE(header != NULL); 3957 3958 NODE_RDLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 3959 3960 bindrdataset(qpdb, qpnode, header, iterator->common.now, nlocktype, 3961 isc_rwlocktype_none, rdataset DNS__DB_FLARG_PASS); 3962 3963 NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 3964 } 3965 3966 /* 3967 * Database Iterator Methods 3968 */ 3969 3970 static void 3971 reference_iter_node(qpc_dbit_t *qpdbiter DNS__DB_FLARG) { 3972 qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db; 3973 qpcnode_t *node = qpdbiter->node; 3974 3975 if (node == NULL) { 3976 return; 3977 } 3978 3979 INSIST(qpdbiter->tree_locked != isc_rwlocktype_none); 3980 reactivate_node(qpdb, node, qpdbiter->tree_locked DNS__DB_FLARG_PASS); 3981 } 3982 3983 static void 3984 dereference_iter_node(qpc_dbit_t *qpdbiter DNS__DB_FLARG) { 3985 qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db; 3986 qpcnode_t *node = qpdbiter->node; 3987 isc_rwlock_t *lock = NULL; 3988 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 3989 isc_rwlocktype_t tlocktype = qpdbiter->tree_locked; 3990 3991 if (node == NULL) { 3992 return; 3993 } 3994 3995 REQUIRE(tlocktype != isc_rwlocktype_write); 3996 3997 lock = &qpdb->node_locks[node->locknum].lock; 3998 NODE_RDLOCK(lock, &nlocktype); 3999 decref(qpdb, node, &nlocktype, &qpdbiter->tree_locked, 4000 false DNS__DB_FLARG_PASS); 4001 NODE_UNLOCK(lock, &nlocktype); 4002 4003 INSIST(qpdbiter->tree_locked == tlocktype); 4004 4005 qpdbiter->node = NULL; 4006 } 4007 4008 static void 4009 resume_iteration(qpc_dbit_t *qpdbiter, bool continuing) { 4010 qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db; 4011 4012 REQUIRE(qpdbiter->paused); 4013 REQUIRE(qpdbiter->tree_locked == isc_rwlocktype_none); 4014 4015 TREE_RDLOCK(&qpdb->tree_lock, &qpdbiter->tree_locked); 4016 4017 /* 4018 * If we're being called from dbiterator_next or _prev, 4019 * then we may need to reinitialize the iterator to the current 4020 * name. The tree could have changed while it was unlocked, 4021 * would make the iterator traversal inconsistent. 4022 * 4023 * As long as the iterator is holding a reference to 4024 * qpdbiter->node, the node won't be removed from the tree, 4025 * so the lookup should always succeed. 4026 */ 4027 if (continuing && qpdbiter->node != NULL) { 4028 isc_result_t result; 4029 result = dns_qp_lookup(qpdb->tree, qpdbiter->name, NULL, 4030 &qpdbiter->iter, NULL, NULL, NULL); 4031 INSIST(result == ISC_R_SUCCESS); 4032 } 4033 4034 qpdbiter->paused = false; 4035 } 4036 4037 static void 4038 dbiterator_destroy(dns_dbiterator_t **iteratorp DNS__DB_FLARG) { 4039 qpc_dbit_t *qpdbiter = (qpc_dbit_t *)(*iteratorp); 4040 qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db; 4041 dns_db_t *db = NULL; 4042 4043 if (qpdbiter->tree_locked == isc_rwlocktype_read) { 4044 TREE_UNLOCK(&qpdb->tree_lock, &qpdbiter->tree_locked); 4045 } 4046 INSIST(qpdbiter->tree_locked == isc_rwlocktype_none); 4047 4048 dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 4049 4050 dns_db_attach(qpdbiter->common.db, &db); 4051 dns_db_detach(&qpdbiter->common.db); 4052 4053 isc_mem_put(db->mctx, qpdbiter, sizeof(*qpdbiter)); 4054 dns_db_detach(&db); 4055 4056 *iteratorp = NULL; 4057 } 4058 4059 static isc_result_t 4060 dbiterator_first(dns_dbiterator_t *iterator DNS__DB_FLARG) { 4061 isc_result_t result; 4062 qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 4063 qpcache_t *qpdb = (qpcache_t *)iterator->db; 4064 4065 if (qpdbiter->result != ISC_R_SUCCESS && 4066 qpdbiter->result != ISC_R_NOTFOUND && 4067 qpdbiter->result != DNS_R_PARTIALMATCH && 4068 qpdbiter->result != ISC_R_NOMORE) 4069 { 4070 return qpdbiter->result; 4071 } 4072 4073 if (qpdbiter->paused) { 4074 resume_iteration(qpdbiter, false); 4075 } 4076 4077 dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 4078 4079 dns_qpiter_init(qpdb->tree, &qpdbiter->iter); 4080 result = dns_qpiter_next(&qpdbiter->iter, NULL, 4081 (void **)&qpdbiter->node, NULL); 4082 4083 if (result == ISC_R_SUCCESS) { 4084 dns_name_copy(&qpdbiter->node->name, qpdbiter->name); 4085 reference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 4086 } else { 4087 INSIST(result == ISC_R_NOMORE); /* The tree is empty. */ 4088 qpdbiter->node = NULL; 4089 } 4090 4091 qpdbiter->result = result; 4092 4093 if (result != ISC_R_SUCCESS) { 4094 ENSURE(!qpdbiter->paused); 4095 } 4096 4097 return result; 4098 } 4099 4100 static isc_result_t 4101 dbiterator_last(dns_dbiterator_t *iterator DNS__DB_FLARG) { 4102 isc_result_t result; 4103 qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 4104 qpcache_t *qpdb = (qpcache_t *)iterator->db; 4105 4106 if (qpdbiter->result != ISC_R_SUCCESS && 4107 qpdbiter->result != ISC_R_NOTFOUND && 4108 qpdbiter->result != DNS_R_PARTIALMATCH && 4109 qpdbiter->result != ISC_R_NOMORE) 4110 { 4111 return qpdbiter->result; 4112 } 4113 4114 if (qpdbiter->paused) { 4115 resume_iteration(qpdbiter, false); 4116 } 4117 4118 dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 4119 4120 dns_qpiter_init(qpdb->tree, &qpdbiter->iter); 4121 result = dns_qpiter_prev(&qpdbiter->iter, NULL, 4122 (void **)&qpdbiter->node, NULL); 4123 4124 if (result == ISC_R_SUCCESS) { 4125 dns_name_copy(&qpdbiter->node->name, qpdbiter->name); 4126 reference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 4127 } else { 4128 INSIST(result == ISC_R_NOMORE); /* The tree is empty. */ 4129 qpdbiter->node = NULL; 4130 } 4131 4132 qpdbiter->result = result; 4133 return result; 4134 } 4135 4136 static isc_result_t 4137 dbiterator_seek(dns_dbiterator_t *iterator, 4138 const dns_name_t *name DNS__DB_FLARG) { 4139 isc_result_t result; 4140 qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 4141 qpcache_t *qpdb = (qpcache_t *)iterator->db; 4142 4143 if (qpdbiter->result != ISC_R_SUCCESS && 4144 qpdbiter->result != ISC_R_NOTFOUND && 4145 qpdbiter->result != DNS_R_PARTIALMATCH && 4146 qpdbiter->result != ISC_R_NOMORE) 4147 { 4148 return qpdbiter->result; 4149 } 4150 4151 if (qpdbiter->paused) { 4152 resume_iteration(qpdbiter, false); 4153 } 4154 4155 dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 4156 4157 result = dns_qp_lookup(qpdb->tree, name, NULL, &qpdbiter->iter, NULL, 4158 (void **)&qpdbiter->node, NULL); 4159 4160 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) { 4161 dns_name_copy(&qpdbiter->node->name, qpdbiter->name); 4162 reference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 4163 } else { 4164 qpdbiter->node = NULL; 4165 } 4166 4167 qpdbiter->result = (result == DNS_R_PARTIALMATCH) ? ISC_R_SUCCESS 4168 : result; 4169 return result; 4170 } 4171 4172 static isc_result_t 4173 dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG) { 4174 isc_result_t result; 4175 qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 4176 4177 REQUIRE(qpdbiter->node != NULL); 4178 4179 if (qpdbiter->result != ISC_R_SUCCESS) { 4180 return qpdbiter->result; 4181 } 4182 4183 if (qpdbiter->paused) { 4184 resume_iteration(qpdbiter, true); 4185 } 4186 4187 dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 4188 4189 result = dns_qpiter_prev(&qpdbiter->iter, NULL, 4190 (void **)&qpdbiter->node, NULL); 4191 4192 if (result == ISC_R_SUCCESS) { 4193 dns_name_copy(&qpdbiter->node->name, qpdbiter->name); 4194 reference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 4195 } else { 4196 INSIST(result == ISC_R_NOMORE); 4197 qpdbiter->node = NULL; 4198 } 4199 4200 qpdbiter->result = result; 4201 return result; 4202 } 4203 4204 static isc_result_t 4205 dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG) { 4206 isc_result_t result; 4207 qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 4208 4209 REQUIRE(qpdbiter->node != NULL); 4210 4211 if (qpdbiter->result != ISC_R_SUCCESS) { 4212 return qpdbiter->result; 4213 } 4214 4215 if (qpdbiter->paused) { 4216 resume_iteration(qpdbiter, true); 4217 } 4218 4219 dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 4220 4221 result = dns_qpiter_next(&qpdbiter->iter, NULL, 4222 (void **)&qpdbiter->node, NULL); 4223 4224 if (result == ISC_R_SUCCESS) { 4225 dns_name_copy(&qpdbiter->node->name, qpdbiter->name); 4226 reference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 4227 } else { 4228 INSIST(result == ISC_R_NOMORE); 4229 qpdbiter->node = NULL; 4230 } 4231 4232 qpdbiter->result = result; 4233 return result; 4234 } 4235 4236 static isc_result_t 4237 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, 4238 dns_name_t *name DNS__DB_FLARG) { 4239 qpcache_t *qpdb = (qpcache_t *)iterator->db; 4240 qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 4241 qpcnode_t *node = qpdbiter->node; 4242 4243 REQUIRE(qpdbiter->result == ISC_R_SUCCESS); 4244 REQUIRE(node != NULL); 4245 4246 if (qpdbiter->paused) { 4247 resume_iteration(qpdbiter, false); 4248 } 4249 4250 if (name != NULL) { 4251 dns_name_copy(&node->name, name); 4252 } 4253 4254 newref(qpdb, node, isc_rwlocktype_none, 4255 qpdbiter->tree_locked DNS__DB_FLARG_PASS); 4256 4257 *nodep = qpdbiter->node; 4258 return ISC_R_SUCCESS; 4259 } 4260 4261 static isc_result_t 4262 dbiterator_pause(dns_dbiterator_t *iterator) { 4263 qpcache_t *qpdb = (qpcache_t *)iterator->db; 4264 qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 4265 4266 if (qpdbiter->result != ISC_R_SUCCESS && 4267 qpdbiter->result != ISC_R_NOTFOUND && 4268 qpdbiter->result != DNS_R_PARTIALMATCH && 4269 qpdbiter->result != ISC_R_NOMORE) 4270 { 4271 return qpdbiter->result; 4272 } 4273 4274 if (qpdbiter->paused) { 4275 return ISC_R_SUCCESS; 4276 } 4277 4278 qpdbiter->paused = true; 4279 4280 if (qpdbiter->tree_locked == isc_rwlocktype_read) { 4281 TREE_UNLOCK(&qpdb->tree_lock, &qpdbiter->tree_locked); 4282 } 4283 INSIST(qpdbiter->tree_locked == isc_rwlocktype_none); 4284 4285 return ISC_R_SUCCESS; 4286 } 4287 4288 static isc_result_t 4289 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) { 4290 qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 4291 4292 if (qpdbiter->result != ISC_R_SUCCESS) { 4293 return qpdbiter->result; 4294 } 4295 4296 dns_name_copy(dns_rootname, name); 4297 return ISC_R_SUCCESS; 4298 } 4299 4300 static void 4301 deletedata(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *node ISC_ATTR_UNUSED, 4302 void *data) { 4303 dns_slabheader_t *header = data; 4304 qpcache_t *qpdb = (qpcache_t *)header->db; 4305 4306 if (header->heap != NULL && header->heap_index != 0) { 4307 isc_heap_delete(header->heap, header->heap_index); 4308 } 4309 4310 update_rrsetstats(qpdb->rrsetstats, header->type, 4311 atomic_load_acquire(&header->attributes), false); 4312 4313 if (ISC_LINK_LINKED(header, link)) { 4314 int idx = HEADERNODE(header)->locknum; 4315 ISC_LIST_UNLINK(qpdb->lru[idx], header, link); 4316 } 4317 4318 if (header->noqname != NULL) { 4319 dns_slabheader_freeproof(db->mctx, &header->noqname); 4320 } 4321 if (header->closest != NULL) { 4322 dns_slabheader_freeproof(db->mctx, &header->closest); 4323 } 4324 } 4325 4326 /* 4327 * Caller must be holding the node write lock. 4328 */ 4329 static void 4330 expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum, 4331 isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep, 4332 isc_stdtime_t now, bool cache_is_overmem DNS__DB_FLARG) { 4333 isc_heap_t *heap = qpdb->heaps[locknum]; 4334 4335 for (size_t i = 0; i < DNS_QPDB_EXPIRE_TTL_COUNT; i++) { 4336 dns_slabheader_t *header = isc_heap_element(heap, 1); 4337 4338 if (header == NULL) { 4339 /* No headers left on this TTL heap; exit cleaning */ 4340 return; 4341 } 4342 4343 dns_ttl_t ttl = header->ttl; 4344 4345 if (!cache_is_overmem) { 4346 /* Only account for stale TTL if cache is not overmem */ 4347 ttl += STALE_TTL(header, qpdb); 4348 } 4349 4350 if (ttl >= now - QPDB_VIRTUAL) { 4351 /* 4352 * The header at the top of this TTL heap is not yet 4353 * eligible for expiry, so none of the other headers on 4354 * the same heap can be eligible for expiry, either; 4355 * exit cleaning. 4356 */ 4357 return; 4358 } 4359 4360 expireheader(header, nlocktypep, tlocktypep, 4361 dns_expire_ttl DNS__DB_FLARG_PASS); 4362 } 4363 } 4364 4365 static void 4366 setmaxrrperset(dns_db_t *db, uint32_t value) { 4367 qpcache_t *qpdb = (qpcache_t *)db; 4368 4369 REQUIRE(VALID_QPDB(qpdb)); 4370 4371 qpdb->maxrrperset = value; 4372 } 4373 4374 static void 4375 setmaxtypepername(dns_db_t *db, uint32_t value) { 4376 qpcache_t *qpdb = (qpcache_t *)db; 4377 4378 REQUIRE(VALID_QPDB(qpdb)); 4379 4380 qpdb->maxtypepername = value; 4381 } 4382 4383 static dns_dbmethods_t qpdb_cachemethods = { 4384 .destroy = qpdb_destroy, 4385 .findnode = findnode, 4386 .find = find, 4387 .findzonecut = findzonecut, 4388 .attachnode = attachnode, 4389 .detachnode = detachnode, 4390 .createiterator = createiterator, 4391 .findrdataset = findrdataset, 4392 .allrdatasets = allrdatasets, 4393 .addrdataset = addrdataset, 4394 .deleterdataset = deleterdataset, 4395 .nodecount = nodecount, 4396 .getoriginnode = getoriginnode, 4397 .getrrsetstats = getrrsetstats, 4398 .setcachestats = setcachestats, 4399 .setservestalettl = setservestalettl, 4400 .getservestalettl = getservestalettl, 4401 .setservestalerefresh = setservestalerefresh, 4402 .getservestalerefresh = getservestalerefresh, 4403 .locknode = locknode, 4404 .unlocknode = unlocknode, 4405 .expiredata = expiredata, 4406 .deletedata = deletedata, 4407 .setmaxrrperset = setmaxrrperset, 4408 .setmaxtypepername = setmaxtypepername, 4409 }; 4410 4411 static void 4412 qpcnode_destroy(qpcnode_t *data) { 4413 dns_slabheader_t *current = NULL, *next = NULL; 4414 4415 for (current = data->data; current != NULL; current = next) { 4416 dns_slabheader_t *down = current->down, *down_next = NULL; 4417 4418 next = current->next; 4419 4420 for (down = current->down; down != NULL; down = down_next) { 4421 down_next = down->down; 4422 dns_slabheader_destroy(&down); 4423 } 4424 4425 dns_slabheader_destroy(¤t); 4426 } 4427 4428 dns_name_free(&data->name, data->mctx); 4429 isc_mem_putanddetach(&data->mctx, data, sizeof(qpcnode_t)); 4430 } 4431 4432 #ifdef DNS_DB_NODETRACE 4433 ISC_REFCOUNT_STATIC_TRACE_IMPL(qpcnode, qpcnode_destroy); 4434 #else 4435 ISC_REFCOUNT_STATIC_IMPL(qpcnode, qpcnode_destroy); 4436 #endif 4437