1 /* $NetBSD: rbtdb.c,v 1.21 2025/01/26 16:25:24 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16 /*! \file */ 17 18 #include <inttypes.h> 19 #include <stdbool.h> 20 #include <sys/mman.h> 21 22 #include <isc/ascii.h> 23 #include <isc/async.h> 24 #include <isc/atomic.h> 25 #include <isc/crc64.h> 26 #include <isc/file.h> 27 #include <isc/hash.h> 28 #include <isc/hashmap.h> 29 #include <isc/heap.h> 30 #include <isc/hex.h> 31 #include <isc/loop.h> 32 #include <isc/mem.h> 33 #include <isc/mutex.h> 34 #include <isc/once.h> 35 #include <isc/random.h> 36 #include <isc/refcount.h> 37 #include <isc/result.h> 38 #include <isc/rwlock.h> 39 #include <isc/serial.h> 40 #include <isc/stdio.h> 41 #include <isc/string.h> 42 #include <isc/time.h> 43 #include <isc/urcu.h> 44 #include <isc/util.h> 45 46 #include <dns/callbacks.h> 47 #include <dns/db.h> 48 #include <dns/dbiterator.h> 49 #include <dns/fixedname.h> 50 #include <dns/log.h> 51 #include <dns/masterdump.h> 52 #include <dns/nsec.h> 53 #include <dns/nsec3.h> 54 #include <dns/rbt.h> 55 #include <dns/rdata.h> 56 #include <dns/rdataset.h> 57 #include <dns/rdatasetiter.h> 58 #include <dns/rdataslab.h> 59 #include <dns/rdatastruct.h> 60 #include <dns/stats.h> 61 #include <dns/time.h> 62 #include <dns/view.h> 63 #include <dns/zone.h> 64 #include <dns/zonekey.h> 65 66 #include "db_p.h" 67 #include "rbtdb_p.h" 68 69 #define CHECK(op) \ 70 do { \ 71 result = (op); \ 72 if (result != ISC_R_SUCCESS) \ 73 goto failure; \ 74 } while (0) 75 76 #define EXISTS(header) \ 77 ((atomic_load_acquire(&(header)->attributes) & \ 78 DNS_SLABHEADERATTR_NONEXISTENT) == 0) 79 #define NONEXISTENT(header) \ 80 ((atomic_load_acquire(&(header)->attributes) & \ 81 DNS_SLABHEADERATTR_NONEXISTENT) != 0) 82 #define IGNORE(header) \ 83 ((atomic_load_acquire(&(header)->attributes) & \ 84 DNS_SLABHEADERATTR_IGNORE) != 0) 85 #define NXDOMAIN(header) \ 86 ((atomic_load_acquire(&(header)->attributes) & \ 87 DNS_SLABHEADERATTR_NXDOMAIN) != 0) 88 #define STALE(header) \ 89 ((atomic_load_acquire(&(header)->attributes) & \ 90 DNS_SLABHEADERATTR_STALE) != 0) 91 #define STALE_WINDOW(header) \ 92 ((atomic_load_acquire(&(header)->attributes) & \ 93 DNS_SLABHEADERATTR_STALE_WINDOW) != 0) 94 #define RESIGN(header) \ 95 ((atomic_load_acquire(&(header)->attributes) & \ 96 DNS_SLABHEADERATTR_RESIGN) != 0) 97 #define OPTOUT(header) \ 98 ((atomic_load_acquire(&(header)->attributes) & \ 99 DNS_SLABHEADERATTR_OPTOUT) != 0) 100 #define NEGATIVE(header) \ 101 ((atomic_load_acquire(&(header)->attributes) & \ 102 DNS_SLABHEADERATTR_NEGATIVE) != 0) 103 #define PREFETCH(header) \ 104 ((atomic_load_acquire(&(header)->attributes) & \ 105 DNS_SLABHEADERATTR_PREFETCH) != 0) 106 #define CASESET(header) \ 107 ((atomic_load_acquire(&(header)->attributes) & \ 108 DNS_SLABHEADERATTR_CASESET) != 0) 109 #define ZEROTTL(header) \ 110 ((atomic_load_acquire(&(header)->attributes) & \ 111 DNS_SLABHEADERATTR_ZEROTTL) != 0) 112 #define ANCIENT(header) \ 113 ((atomic_load_acquire(&(header)->attributes) & \ 114 DNS_SLABHEADERATTR_ANCIENT) != 0) 115 #define STATCOUNT(header) \ 116 ((atomic_load_acquire(&(header)->attributes) & \ 117 DNS_SLABHEADERATTR_STATCOUNT) != 0) 118 119 #define STALE_TTL(header, rbtdb) \ 120 (NXDOMAIN(header) ? 0 : rbtdb->common.serve_stale_ttl) 121 122 #define ACTIVE(header, now) \ 123 (((header)->ttl > (now)) || ((header)->ttl == (now) && ZEROTTL(header))) 124 125 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */ 126 127 #define EXPIREDOK(rbtiterator) \ 128 (((rbtiterator)->common.options & DNS_DB_EXPIREDOK) != 0) 129 130 #define STALEOK(rbtiterator) \ 131 (((rbtiterator)->common.options & DNS_DB_STALEOK) != 0) 132 133 #define KEEPSTALE(rbtdb) ((rbtdb)->common.serve_stale_ttl > 0) 134 135 #define RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, iterator) \ 136 ((iterator)->current == &(iterator)->nsec3chain && \ 137 (iterator)->node == (rbtdb)->nsec3_origin_node) 138 139 /*% 140 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps). 141 * There is a tradeoff issue about configuring this value: if this is too 142 * small, it may cause heavier contention between threads; if this is too large, 143 * LRU purge algorithm won't work well (entries tend to be purged prematurely). 144 * The default value should work well for most environments, but this can 145 * also be configurable at compilation time via the 146 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than 147 * 1 due to the assumption of dns__cacherbt_overmem(). 148 */ 149 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT 150 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 151 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1" 152 #else /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */ 153 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT 154 #endif /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */ 155 #else /* ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT */ 156 #define DEFAULT_CACHE_NODE_LOCK_COUNT 17 157 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */ 158 159 /* 160 * This defines the number of headers that we try to expire each time the 161 * expire_ttl_headers() is run. The number should be small enough, so the 162 * TTL-based header expiration doesn't take too long, but it should be large 163 * enough, so we expire enough headers if their TTL is clustered. 164 */ 165 #define DNS_RBTDB_EXPIRE_TTL_COUNT 10 166 167 static void 168 delete_callback(void *data, void *arg); 169 static void 170 prune_tree(void *arg); 171 static void 172 free_gluetable(struct cds_lfht *glue_table); 173 174 static void 175 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG); 176 static isc_result_t 177 rdatasetiter_first(dns_rdatasetiter_t *iterator DNS__DB_FLARG); 178 static isc_result_t 179 rdatasetiter_next(dns_rdatasetiter_t *iterator DNS__DB_FLARG); 180 static void 181 rdatasetiter_current(dns_rdatasetiter_t *iterator, 182 dns_rdataset_t *rdataset DNS__DB_FLARG); 183 184 static dns_rdatasetitermethods_t rdatasetiter_methods = { 185 rdatasetiter_destroy, rdatasetiter_first, rdatasetiter_next, 186 rdatasetiter_current 187 }; 188 189 typedef struct rbtdb_rdatasetiter { 190 dns_rdatasetiter_t common; 191 dns_slabheader_t *current; 192 } rbtdb_rdatasetiter_t; 193 194 /* 195 * Note that these iterators, unless created with either DNS_DB_NSEC3ONLY or 196 * DNS_DB_NONSEC3, will transparently move between the last node of the 197 * "regular" RBT ("chain" field) and the root node of the NSEC3 RBT 198 * ("nsec3chain" field) of the database in question, as if the latter was a 199 * successor to the former in lexical order. The "current" field always holds 200 * the address of either "chain" or "nsec3chain", depending on which RBT is 201 * being traversed at given time. 202 */ 203 static void 204 dbiterator_destroy(dns_dbiterator_t **iteratorp DNS__DB_FLARG); 205 static isc_result_t 206 dbiterator_first(dns_dbiterator_t *iterator DNS__DB_FLARG); 207 static isc_result_t 208 dbiterator_last(dns_dbiterator_t *iterator DNS__DB_FLARG); 209 static isc_result_t 210 dbiterator_seek(dns_dbiterator_t *iterator, 211 const dns_name_t *name DNS__DB_FLARG); 212 static isc_result_t 213 dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG); 214 static isc_result_t 215 dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG); 216 static isc_result_t 217 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, 218 dns_name_t *name DNS__DB_FLARG); 219 static isc_result_t 220 dbiterator_pause(dns_dbiterator_t *iterator); 221 static isc_result_t 222 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name); 223 224 static dns_dbiteratormethods_t dbiterator_methods = { 225 dbiterator_destroy, dbiterator_first, dbiterator_last, 226 dbiterator_seek, dbiterator_prev, dbiterator_next, 227 dbiterator_current, dbiterator_pause, dbiterator_origin 228 }; 229 230 /* 231 * If 'paused' is true, then the tree lock is not being held. 232 */ 233 typedef struct rbtdb_dbiterator { 234 dns_dbiterator_t common; 235 bool paused; 236 bool new_origin; 237 isc_rwlocktype_t tree_locked; 238 isc_result_t result; 239 dns_fixedname_t name; 240 dns_fixedname_t origin; 241 dns_rbtnodechain_t chain; 242 dns_rbtnodechain_t nsec3chain; 243 dns_rbtnodechain_t *current; 244 dns_rbtnode_t *node; 245 enum { full, nonsec3, nsec3only } nsec3mode; 246 } rbtdb_dbiterator_t; 247 248 static void 249 free_rbtdb(dns_rbtdb_t *rbtdb, bool log); 250 static void 251 setnsec3parameters(dns_db_t *db, dns_rbtdb_version_t *version); 252 253 /*% 254 * 'init_count' is used to initialize 'newheader->count' which inturn 255 * is used to determine where in the cycle rrset-order cyclic starts. 256 * We don't lock this as we don't care about simultaneous updates. 257 */ 258 static atomic_uint_fast16_t init_count = 0; 259 260 /* 261 * Locking 262 * 263 * If a routine is going to lock more than one lock in this module, then 264 * the locking must be done in the following order: 265 * 266 * Tree Lock 267 * 268 * Node Lock (Only one from the set may be locked at one time by 269 * any caller) 270 * 271 * Database Lock 272 * 273 * Failure to follow this hierarchy can result in deadlock. 274 */ 275 276 /* 277 * Deleting Nodes 278 * 279 * For zone databases the node for the origin of the zone MUST NOT be deleted. 280 */ 281 282 /* 283 * DB Routines 284 */ 285 286 static void 287 update_rrsetstats(dns_stats_t *stats, const dns_typepair_t htype, 288 const uint_least16_t hattributes, const bool increment) { 289 dns_rdatastatstype_t statattributes = 0; 290 dns_rdatastatstype_t base = 0; 291 dns_rdatastatstype_t type; 292 dns_slabheader_t *header = &(dns_slabheader_t){ 293 .type = htype, 294 .attributes = hattributes, 295 }; 296 297 if (!EXISTS(header) || !STATCOUNT(header)) { 298 return; 299 } 300 301 if (NEGATIVE(header)) { 302 if (NXDOMAIN(header)) { 303 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN; 304 } else { 305 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET; 306 base = DNS_TYPEPAIR_COVERS(header->type); 307 } 308 } else { 309 base = DNS_TYPEPAIR_TYPE(header->type); 310 } 311 312 if (STALE(header)) { 313 statattributes |= DNS_RDATASTATSTYPE_ATTR_STALE; 314 } 315 if (ANCIENT(header)) { 316 statattributes |= DNS_RDATASTATSTYPE_ATTR_ANCIENT; 317 } 318 319 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes); 320 if (increment) { 321 dns_rdatasetstats_increment(stats, type); 322 } else { 323 dns_rdatasetstats_decrement(stats, type); 324 } 325 } 326 327 void 328 dns__rbtdb_setttl(dns_slabheader_t *header, dns_ttl_t newttl) { 329 dns_ttl_t oldttl = header->ttl; 330 331 header->ttl = newttl; 332 333 if (header->db == NULL || !dns_db_iscache(header->db)) { 334 return; 335 } 336 337 /* 338 * This is a cache. Adjust the heaps if necessary. 339 */ 340 if (header->heap == NULL || header->heap_index == 0 || newttl == oldttl) 341 { 342 return; 343 } 344 345 if (newttl < oldttl) { 346 isc_heap_increased(header->heap, header->heap_index); 347 } else { 348 isc_heap_decreased(header->heap, header->heap_index); 349 } 350 351 if (newttl == 0) { 352 isc_heap_delete(header->heap, header->heap_index); 353 } 354 } 355 356 /*% 357 * These functions allow the heap code to rank the priority of each 358 * element. It returns true if v1 happens "sooner" than v2. 359 */ 360 static bool 361 ttl_sooner(void *v1, void *v2) { 362 dns_slabheader_t *h1 = v1; 363 dns_slabheader_t *h2 = v2; 364 365 return h1->ttl < h2->ttl; 366 } 367 368 /*% 369 * Return which RRset should be resigned sooner. If the RRsets have the 370 * same signing time, prefer the other RRset over the SOA RRset. 371 */ 372 static bool 373 resign_sooner(void *v1, void *v2) { 374 dns_slabheader_t *h1 = v1; 375 dns_slabheader_t *h2 = v2; 376 377 return h1->resign < h2->resign || 378 (h1->resign == h2->resign && h1->resign_lsb < h2->resign_lsb) || 379 (h1->resign == h2->resign && h1->resign_lsb == h2->resign_lsb && 380 h2->type == DNS_SIGTYPE(dns_rdatatype_soa)); 381 } 382 383 /*% 384 * This function sets the heap index into the header. 385 */ 386 static void 387 set_index(void *what, unsigned int idx) { 388 dns_slabheader_t *h = what; 389 390 h->heap_index = idx; 391 } 392 393 /*% 394 * Work out how many nodes can be deleted in the time between two 395 * requests to the nameserver. Smooth the resulting number and use it 396 * as a estimate for the number of nodes to be deleted in the next 397 * iteration. 398 */ 399 static unsigned int 400 adjust_quantum(unsigned int old, isc_time_t *start) { 401 unsigned int pps = dns_pps; /* packets per second */ 402 unsigned int interval; 403 uint64_t usecs; 404 isc_time_t end; 405 unsigned int nodes; 406 407 if (pps < 100) { 408 pps = 100; 409 } 410 end = isc_time_now(); 411 412 interval = 1000000 / pps; /* interval in usec */ 413 if (interval == 0) { 414 interval = 1; 415 } 416 usecs = isc_time_microdiff(&end, start); 417 if (usecs == 0) { 418 /* 419 * We were unable to measure the amount of time taken. 420 * Double the nodes deleted next time. 421 */ 422 old *= 2; 423 if (old > 1000) { 424 old = 1000; 425 } 426 return old; 427 } 428 nodes = old * interval; 429 nodes /= (unsigned int)usecs; 430 if (nodes == 0) { 431 nodes = 1; 432 } else if (nodes > 1000) { 433 nodes = 1000; 434 } 435 436 /* Smooth */ 437 nodes = (nodes + old * 3) / 4; 438 439 if (nodes != old) { 440 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 441 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 442 "adjust_quantum: old=%d, new=%d", old, nodes); 443 } 444 445 return nodes; 446 } 447 448 static void 449 free_rbtdb_callback(void *arg) { 450 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)arg; 451 452 free_rbtdb(rbtdb, true); 453 } 454 455 static void 456 free_rbtdb(dns_rbtdb_t *rbtdb, bool log) { 457 unsigned int i; 458 isc_result_t result; 459 char buf[DNS_NAME_FORMATSIZE]; 460 dns_rbt_t **treep = NULL; 461 isc_time_t start; 462 463 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions)); 464 REQUIRE(rbtdb->future_version == NULL); 465 466 if (rbtdb->current_version != NULL) { 467 isc_refcount_decrementz(&rbtdb->current_version->references); 468 469 isc_refcount_destroy(&rbtdb->current_version->references); 470 UNLINK(rbtdb->open_versions, rbtdb->current_version, link); 471 isc_rwlock_destroy(&rbtdb->current_version->rwlock); 472 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version, 473 sizeof(*rbtdb->current_version)); 474 } 475 476 /* 477 * We assume the number of remaining dead nodes is reasonably small; 478 * the overhead of unlinking all nodes here should be negligible. 479 */ 480 for (i = 0; i < rbtdb->node_lock_count; i++) { 481 dns_rbtnode_t *node = NULL; 482 483 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]); 484 while (node != NULL) { 485 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink); 486 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]); 487 } 488 } 489 490 rbtdb->quantum = (rbtdb->loop != NULL) ? 100 : 0; 491 492 for (;;) { 493 /* 494 * pick the next tree to (start to) destroy 495 */ 496 treep = &rbtdb->tree; 497 if (*treep == NULL) { 498 treep = &rbtdb->nsec; 499 if (*treep == NULL) { 500 treep = &rbtdb->nsec3; 501 /* 502 * we're finished after clear cutting 503 */ 504 if (*treep == NULL) { 505 break; 506 } 507 } 508 } 509 510 start = isc_time_now(); 511 result = dns_rbt_destroy(treep, rbtdb->quantum); 512 if (result == ISC_R_QUOTA) { 513 INSIST(rbtdb->loop != NULL); 514 if (rbtdb->quantum != 0) { 515 rbtdb->quantum = adjust_quantum(rbtdb->quantum, 516 &start); 517 } 518 isc_async_run(rbtdb->loop, free_rbtdb_callback, rbtdb); 519 return; 520 } 521 INSIST(result == ISC_R_SUCCESS && *treep == NULL); 522 } 523 524 if (log) { 525 if (dns_name_dynamic(&rbtdb->common.origin)) { 526 dns_name_format(&rbtdb->common.origin, buf, 527 sizeof(buf)); 528 } else { 529 strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 530 } 531 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 532 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 533 "done free_rbtdb(%s)", buf); 534 } 535 if (dns_name_dynamic(&rbtdb->common.origin)) { 536 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx); 537 } 538 for (i = 0; i < rbtdb->node_lock_count; i++) { 539 isc_refcount_destroy(&rbtdb->node_locks[i].references); 540 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock); 541 } 542 543 /* 544 * Clean up LRU / re-signing order lists. 545 */ 546 if (rbtdb->lru != NULL) { 547 for (i = 0; i < rbtdb->node_lock_count; i++) { 548 INSIST(ISC_LIST_EMPTY(rbtdb->lru[i])); 549 } 550 isc_mem_cput(rbtdb->common.mctx, rbtdb->lru, 551 rbtdb->node_lock_count, 552 sizeof(dns_slabheaderlist_t)); 553 } 554 /* 555 * Clean up dead node buckets. 556 */ 557 if (rbtdb->deadnodes != NULL) { 558 for (i = 0; i < rbtdb->node_lock_count; i++) { 559 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i])); 560 } 561 isc_mem_cput(rbtdb->common.mctx, rbtdb->deadnodes, 562 rbtdb->node_lock_count, sizeof(dns_rbtnodelist_t)); 563 } 564 /* 565 * Clean up heap objects. 566 */ 567 if (rbtdb->heaps != NULL) { 568 for (i = 0; i < rbtdb->node_lock_count; i++) { 569 isc_heap_destroy(&rbtdb->heaps[i]); 570 } 571 isc_mem_cput(rbtdb->hmctx, rbtdb->heaps, rbtdb->node_lock_count, 572 sizeof(isc_heap_t *)); 573 } 574 575 if (rbtdb->rrsetstats != NULL) { 576 dns_stats_detach(&rbtdb->rrsetstats); 577 } 578 if (rbtdb->cachestats != NULL) { 579 isc_stats_detach(&rbtdb->cachestats); 580 } 581 if (rbtdb->gluecachestats != NULL) { 582 isc_stats_detach(&rbtdb->gluecachestats); 583 } 584 585 isc_mem_cput(rbtdb->common.mctx, rbtdb->node_locks, 586 rbtdb->node_lock_count, sizeof(db_nodelock_t)); 587 TREE_DESTROYLOCK(&rbtdb->tree_lock); 588 isc_refcount_destroy(&rbtdb->common.references); 589 if (rbtdb->loop != NULL) { 590 isc_loop_detach(&rbtdb->loop); 591 } 592 593 isc_rwlock_destroy(&rbtdb->lock); 594 rbtdb->common.magic = 0; 595 rbtdb->common.impmagic = 0; 596 isc_mem_detach(&rbtdb->hmctx); 597 598 if (rbtdb->common.update_listeners != NULL) { 599 INSIST(!cds_lfht_destroy(rbtdb->common.update_listeners, NULL)); 600 } 601 602 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb)); 603 } 604 605 void 606 dns__rbtdb_destroy(dns_db_t *arg) { 607 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)arg; 608 bool want_free = false; 609 unsigned int i; 610 unsigned int inactive = 0; 611 612 /* XXX check for open versions here */ 613 614 if (rbtdb->soanode != NULL) { 615 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode); 616 } 617 if (rbtdb->nsnode != NULL) { 618 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode); 619 } 620 621 /* 622 * The current version's glue table needs to be freed early 623 * so the nodes are dereferenced before we check the active 624 * node count below. 625 */ 626 if (rbtdb->current_version != NULL) { 627 free_gluetable(rbtdb->current_version->glue_table); 628 } 629 630 /* 631 * Even though there are no external direct references, there still 632 * may be nodes in use. 633 */ 634 for (i = 0; i < rbtdb->node_lock_count; i++) { 635 isc_rwlocktype_t nodelock = isc_rwlocktype_none; 636 NODE_WRLOCK(&rbtdb->node_locks[i].lock, &nodelock); 637 rbtdb->node_locks[i].exiting = true; 638 if (isc_refcount_current(&rbtdb->node_locks[i].references) == 0) 639 { 640 inactive++; 641 } 642 NODE_UNLOCK(&rbtdb->node_locks[i].lock, &nodelock); 643 } 644 645 if (inactive != 0) { 646 RWLOCK(&rbtdb->lock, isc_rwlocktype_write); 647 rbtdb->active -= inactive; 648 if (rbtdb->active == 0) { 649 want_free = true; 650 } 651 RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); 652 if (want_free) { 653 char buf[DNS_NAME_FORMATSIZE]; 654 if (dns_name_dynamic(&rbtdb->common.origin)) { 655 dns_name_format(&rbtdb->common.origin, buf, 656 sizeof(buf)); 657 } else { 658 strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 659 } 660 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 661 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 662 "calling free_rbtdb(%s)", buf); 663 free_rbtdb(rbtdb, true); 664 } 665 } 666 } 667 668 void 669 dns__rbtdb_currentversion(dns_db_t *db, dns_dbversion_t **versionp) { 670 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 671 dns_rbtdb_version_t *version = NULL; 672 673 REQUIRE(VALID_RBTDB(rbtdb)); 674 675 RWLOCK(&rbtdb->lock, isc_rwlocktype_read); 676 version = rbtdb->current_version; 677 isc_refcount_increment(&version->references); 678 RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); 679 680 *versionp = (dns_dbversion_t *)version; 681 } 682 683 static dns_rbtdb_version_t * 684 allocate_version(isc_mem_t *mctx, uint32_t serial, unsigned int references, 685 bool writer) { 686 dns_rbtdb_version_t *version = isc_mem_get(mctx, sizeof(*version)); 687 *version = (dns_rbtdb_version_t){ 688 .serial = serial, 689 .writer = writer, 690 .changed_list = ISC_LIST_INITIALIZER, 691 .resigned_list = ISC_LIST_INITIALIZER, 692 .link = ISC_LINK_INITIALIZER, 693 .glue_table = cds_lfht_new(GLUETABLE_INIT_SIZE, 694 GLUETABLE_MIN_SIZE, 0, 695 CDS_LFHT_AUTO_RESIZE, NULL), 696 }; 697 698 isc_rwlock_init(&version->rwlock); 699 isc_refcount_init(&version->references, references); 700 701 return version; 702 } 703 704 isc_result_t 705 dns__rbtdb_newversion(dns_db_t *db, dns_dbversion_t **versionp) { 706 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 707 dns_rbtdb_version_t *version = NULL; 708 709 REQUIRE(VALID_RBTDB(rbtdb)); 710 REQUIRE(versionp != NULL && *versionp == NULL); 711 REQUIRE(rbtdb->future_version == NULL); 712 713 RWLOCK(&rbtdb->lock, isc_rwlocktype_write); 714 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */ 715 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1, 716 true); 717 version->rbtdb = rbtdb; 718 version->commit_ok = true; 719 version->secure = rbtdb->current_version->secure; 720 version->havensec3 = rbtdb->current_version->havensec3; 721 if (version->havensec3) { 722 version->flags = rbtdb->current_version->flags; 723 version->iterations = rbtdb->current_version->iterations; 724 version->hash = rbtdb->current_version->hash; 725 version->salt_length = rbtdb->current_version->salt_length; 726 memmove(version->salt, rbtdb->current_version->salt, 727 version->salt_length); 728 } else { 729 version->flags = 0; 730 version->iterations = 0; 731 version->hash = 0; 732 version->salt_length = 0; 733 memset(version->salt, 0, sizeof(version->salt)); 734 } 735 RWLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read); 736 version->records = rbtdb->current_version->records; 737 version->xfrsize = rbtdb->current_version->xfrsize; 738 RWUNLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read); 739 rbtdb->next_serial++; 740 rbtdb->future_version = version; 741 RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); 742 743 *versionp = version; 744 745 return ISC_R_SUCCESS; 746 } 747 748 void 749 dns__rbtdb_attachversion(dns_db_t *db, dns_dbversion_t *source, 750 dns_dbversion_t **targetp) { 751 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 752 dns_rbtdb_version_t *rbtversion = source; 753 754 REQUIRE(VALID_RBTDB(rbtdb)); 755 INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb); 756 757 isc_refcount_increment(&rbtversion->references); 758 759 *targetp = rbtversion; 760 } 761 762 static rbtdb_changed_t * 763 add_changed(dns_slabheader_t *header, 764 dns_rbtdb_version_t *version DNS__DB_FLARG) { 765 rbtdb_changed_t *changed = NULL; 766 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)header->db; 767 768 /* 769 * Caller must be holding the node lock if its reference must be 770 * protected by the lock. 771 */ 772 773 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed)); 774 775 RWLOCK(&rbtdb->lock, isc_rwlocktype_write); 776 777 REQUIRE(version->writer); 778 779 if (changed != NULL) { 780 dns_rbtnode_t *node = (dns_rbtnode_t *)header->node; 781 uint_fast32_t refs = isc_refcount_increment(&node->references); 782 #if DNS_DB_NODETRACE 783 fprintf(stderr, 784 "incr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", 785 func, file, line, node, refs + 1); 786 #else 787 UNUSED(refs); 788 #endif 789 changed->node = node; 790 changed->dirty = false; 791 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link); 792 } else { 793 version->commit_ok = false; 794 } 795 796 RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); 797 798 return changed; 799 } 800 801 static void 802 rollback_node(dns_rbtnode_t *node, uint32_t serial) { 803 dns_slabheader_t *header = NULL, *dcurrent = NULL; 804 bool make_dirty = false; 805 806 /* 807 * Caller must hold the node lock. 808 */ 809 810 /* 811 * We set the IGNORE attribute on rdatasets with serial number 812 * 'serial'. When the reference count goes to zero, these rdatasets 813 * will be cleaned up; until that time, they will be ignored. 814 */ 815 for (header = node->data; header != NULL; header = header->next) { 816 if (header->serial == serial) { 817 DNS_SLABHEADER_SETATTR(header, 818 DNS_SLABHEADERATTR_IGNORE); 819 make_dirty = true; 820 } 821 for (dcurrent = header->down; dcurrent != NULL; 822 dcurrent = dcurrent->down) 823 { 824 if (dcurrent->serial == serial) { 825 DNS_SLABHEADER_SETATTR( 826 dcurrent, DNS_SLABHEADERATTR_IGNORE); 827 make_dirty = true; 828 } 829 } 830 } 831 if (make_dirty) { 832 node->dirty = 1; 833 } 834 } 835 836 void 837 dns__rbtdb_mark(dns_slabheader_t *header, uint_least16_t flag) { 838 uint_least16_t attributes = atomic_load_acquire(&header->attributes); 839 uint_least16_t newattributes = 0; 840 dns_stats_t *stats = NULL; 841 842 /* 843 * If we are already ancient there is nothing to do. 844 */ 845 do { 846 if ((attributes & flag) != 0) { 847 return; 848 } 849 newattributes = attributes | flag; 850 } while (!atomic_compare_exchange_weak_acq_rel( 851 &header->attributes, &attributes, newattributes)); 852 853 /* 854 * Decrement and increment the stats counter for the appropriate 855 * RRtype. 856 */ 857 stats = dns_db_getrrsetstats(header->db); 858 if (stats != NULL) { 859 update_rrsetstats(stats, header->type, attributes, false); 860 update_rrsetstats(stats, header->type, newattributes, true); 861 } 862 } 863 864 static void 865 mark_ancient(dns_slabheader_t *header) { 866 dns__rbtdb_setttl(header, 0); 867 dns__rbtdb_mark(header, DNS_SLABHEADERATTR_ANCIENT); 868 RBTDB_HEADERNODE(header)->dirty = 1; 869 } 870 871 static void 872 clean_stale_headers(dns_slabheader_t *top) { 873 dns_slabheader_t *d = NULL, *down_next = NULL; 874 875 for (d = top->down; d != NULL; d = down_next) { 876 down_next = d->down; 877 dns_slabheader_destroy(&d); 878 } 879 top->down = NULL; 880 } 881 882 static void 883 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) { 884 dns_slabheader_t *current = NULL, *top_prev = NULL, *top_next = NULL; 885 886 /* 887 * Caller must be holding the node lock. 888 */ 889 890 for (current = node->data; current != NULL; current = top_next) { 891 top_next = current->next; 892 clean_stale_headers(current); 893 /* 894 * If current is nonexistent, ancient, or stale and 895 * we are not keeping stale, we can clean it up. 896 */ 897 if (NONEXISTENT(current) || ANCIENT(current) || 898 (STALE(current) && !KEEPSTALE(rbtdb))) 899 { 900 if (top_prev != NULL) { 901 top_prev->next = current->next; 902 } else { 903 node->data = current->next; 904 } 905 dns_slabheader_destroy(¤t); 906 } else { 907 top_prev = current; 908 } 909 } 910 node->dirty = 0; 911 } 912 913 static void 914 clean_zone_node(dns_rbtnode_t *node, uint32_t least_serial) { 915 dns_slabheader_t *current = NULL, *dcurrent = NULL; 916 dns_slabheader_t *down_next = NULL, *dparent = NULL; 917 dns_slabheader_t *top_prev = NULL, *top_next = NULL; 918 bool still_dirty = false; 919 920 /* 921 * Caller must be holding the node lock. 922 */ 923 REQUIRE(least_serial != 0); 924 925 for (current = node->data; current != NULL; current = top_next) { 926 top_next = current->next; 927 928 /* 929 * First, we clean up any instances of multiple rdatasets 930 * with the same serial number, or that have the IGNORE 931 * attribute. 932 */ 933 dparent = current; 934 for (dcurrent = current->down; dcurrent != NULL; 935 dcurrent = down_next) 936 { 937 down_next = dcurrent->down; 938 INSIST(dcurrent->serial <= dparent->serial); 939 if (dcurrent->serial == dparent->serial || 940 IGNORE(dcurrent)) 941 { 942 if (down_next != NULL) { 943 down_next->next = dparent; 944 } 945 dparent->down = down_next; 946 dns_slabheader_destroy(&dcurrent); 947 } else { 948 dparent = dcurrent; 949 } 950 } 951 952 /* 953 * We've now eliminated all IGNORE datasets with the possible 954 * exception of current, which we now check. 955 */ 956 if (IGNORE(current)) { 957 down_next = current->down; 958 if (down_next == NULL) { 959 if (top_prev != NULL) { 960 top_prev->next = current->next; 961 } else { 962 node->data = current->next; 963 } 964 dns_slabheader_destroy(¤t); 965 /* 966 * current no longer exists, so we can 967 * just continue with the loop. 968 */ 969 continue; 970 } else { 971 /* 972 * Pull up current->down, making it the new 973 * current. 974 */ 975 if (top_prev != NULL) { 976 top_prev->next = down_next; 977 } else { 978 node->data = down_next; 979 } 980 down_next->next = top_next; 981 dns_slabheader_destroy(¤t); 982 current = down_next; 983 } 984 } 985 986 /* 987 * We now try to find the first down node less than the 988 * least serial. 989 */ 990 dparent = current; 991 for (dcurrent = current->down; dcurrent != NULL; 992 dcurrent = down_next) 993 { 994 down_next = dcurrent->down; 995 if (dcurrent->serial < least_serial) { 996 break; 997 } 998 dparent = dcurrent; 999 } 1000 1001 /* 1002 * If there is a such an rdataset, delete it and any older 1003 * versions. 1004 */ 1005 if (dcurrent != NULL) { 1006 do { 1007 down_next = dcurrent->down; 1008 INSIST(dcurrent->serial <= least_serial); 1009 dns_slabheader_destroy(&dcurrent); 1010 dcurrent = down_next; 1011 } while (dcurrent != NULL); 1012 dparent->down = NULL; 1013 } 1014 1015 /* 1016 * Note. The serial number of 'current' might be less than 1017 * least_serial too, but we cannot delete it because it is 1018 * the most recent version, unless it is a NONEXISTENT 1019 * rdataset. 1020 */ 1021 if (current->down != NULL) { 1022 still_dirty = true; 1023 top_prev = current; 1024 } else { 1025 /* 1026 * If this is a NONEXISTENT rdataset, we can delete it. 1027 */ 1028 if (NONEXISTENT(current)) { 1029 if (top_prev != NULL) { 1030 top_prev->next = current->next; 1031 } else { 1032 node->data = current->next; 1033 } 1034 dns_slabheader_destroy(¤t); 1035 } else { 1036 top_prev = current; 1037 } 1038 } 1039 } 1040 if (!still_dirty) { 1041 node->dirty = 0; 1042 } 1043 } 1044 1045 /* 1046 * tree_lock(write) must be held. 1047 */ 1048 static void 1049 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) { 1050 dns_rbtnode_t *nsecnode = NULL; 1051 dns_fixedname_t fname; 1052 dns_name_t *name = NULL; 1053 isc_result_t result = ISC_R_UNEXPECTED; 1054 1055 INSIST(!ISC_LINK_LINKED(node, deadlink)); 1056 1057 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) { 1058 char printname[DNS_NAME_FORMATSIZE]; 1059 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1060 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 1061 "delete_node(): %p %s (bucket %d)", node, 1062 dns_rbt_formatnodename(node, printname, 1063 sizeof(printname)), 1064 node->locknum); 1065 } 1066 1067 switch (node->nsec) { 1068 case DNS_DB_NSEC_NORMAL: 1069 result = dns_rbt_deletenode(rbtdb->tree, node, false); 1070 break; 1071 case DNS_DB_NSEC_HAS_NSEC: 1072 /* 1073 * Though this may be wasteful, it has to be done before 1074 * node is deleted. 1075 */ 1076 name = dns_fixedname_initname(&fname); 1077 dns_rbt_fullnamefromnode(node, name); 1078 /* 1079 * Delete the corresponding node from the auxiliary NSEC 1080 * tree before deleting from the main tree. 1081 */ 1082 nsecnode = NULL; 1083 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode, 1084 NULL, DNS_RBTFIND_EMPTYDATA, NULL, 1085 NULL); 1086 if (result != ISC_R_SUCCESS) { 1087 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1088 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 1089 "delete_node: " 1090 "dns_rbt_findnode(nsec): %s", 1091 isc_result_totext(result)); 1092 } else { 1093 result = dns_rbt_deletenode(rbtdb->nsec, nsecnode, 1094 false); 1095 if (result != ISC_R_SUCCESS) { 1096 isc_log_write( 1097 dns_lctx, DNS_LOGCATEGORY_DATABASE, 1098 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 1099 "delete_node(): " 1100 "dns_rbt_deletenode(nsecnode): %s", 1101 isc_result_totext(result)); 1102 } 1103 } 1104 result = dns_rbt_deletenode(rbtdb->tree, node, false); 1105 break; 1106 case DNS_DB_NSEC_NSEC: 1107 result = dns_rbt_deletenode(rbtdb->nsec, node, false); 1108 break; 1109 case DNS_DB_NSEC_NSEC3: 1110 result = dns_rbt_deletenode(rbtdb->nsec3, node, false); 1111 break; 1112 } 1113 if (result != ISC_R_SUCCESS) { 1114 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1115 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 1116 "delete_node(): " 1117 "dns_rbt_deletenode: %s", 1118 isc_result_totext(result)); 1119 } 1120 } 1121 1122 /* 1123 * Caller must be holding the node lock. 1124 */ 1125 void 1126 dns__rbtdb_newref(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 1127 isc_rwlocktype_t nlocktype DNS__DB_FLARG) { 1128 uint_fast32_t refs; 1129 1130 if (nlocktype == isc_rwlocktype_write && 1131 ISC_LINK_LINKED(node, deadlink)) 1132 { 1133 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node, 1134 deadlink); 1135 } 1136 1137 refs = isc_refcount_increment0(&node->references); 1138 #if DNS_DB_NODETRACE 1139 fprintf(stderr, "incr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", 1140 func, file, line, node, refs + 1); 1141 #else 1142 UNUSED(refs); 1143 #endif 1144 1145 if (refs == 0) { 1146 /* this is the first reference to the node */ 1147 refs = isc_refcount_increment0( 1148 &rbtdb->node_locks[node->locknum].references); 1149 #if DNS_DB_NODETRACE 1150 fprintf(stderr, 1151 "incr:nodelock:%s:%s:%u:%p:%p->references = " 1152 "%" PRIuFAST32 "\n", 1153 func, file, line, node, 1154 &rbtdb->node_locks[node->locknum], refs + 1); 1155 #else 1156 UNUSED(refs); 1157 #endif 1158 } 1159 } 1160 1161 /*% 1162 * The tree lock must be held for the result to be valid. 1163 */ 1164 static bool 1165 is_last_node_on_its_level(dns_rbtnode_t *node) { 1166 return node->parent != NULL && node->parent->down == node && 1167 node->left == NULL && node->right == NULL; 1168 } 1169 1170 static void 1171 send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 1172 isc_rwlocktype_t nlocktype DNS__DB_FLARG) { 1173 rbtdb_prune_t *prune = isc_mem_get(rbtdb->common.mctx, sizeof(*prune)); 1174 *prune = (rbtdb_prune_t){ .node = node }; 1175 1176 dns_db_attach((dns_db_t *)rbtdb, &prune->db); 1177 dns__rbtdb_newref(rbtdb, node, nlocktype DNS__DB_FLARG_PASS); 1178 1179 isc_async_run(rbtdb->loop, prune_tree, prune); 1180 } 1181 1182 /*% 1183 * Clean up dead nodes. These are nodes which have no references, and 1184 * have no data. They are dead but we could not or chose not to delete 1185 * them when we deleted all the data at that node because we did not want 1186 * to wait for the tree write lock. 1187 * 1188 * The caller must hold a tree write lock and bucketnum'th node (write) lock. 1189 */ 1190 static void 1191 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum DNS__DB_FLARG) { 1192 dns_rbtnode_t *node = NULL; 1193 int count = 10; /* XXXJT: should be adjustable */ 1194 1195 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]); 1196 while (node != NULL && count > 0) { 1197 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink); 1198 1199 /* 1200 * We might have reactivated this node without a tree write 1201 * lock, so we couldn't remove this node from deadnodes then 1202 * and we have to do it now. 1203 */ 1204 if (isc_refcount_current(&node->references) != 0 || 1205 node->data != NULL) 1206 { 1207 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]); 1208 count--; 1209 continue; 1210 } 1211 1212 if (is_last_node_on_its_level(node) && rbtdb->loop != NULL) { 1213 send_to_prune_tree( 1214 rbtdb, node, 1215 isc_rwlocktype_write DNS__DB_FLARG_PASS); 1216 } else if (node->down == NULL && node->data == NULL) { 1217 /* 1218 * Not a interior node and not needing to be 1219 * reactivated. 1220 */ 1221 delete_node(rbtdb, node); 1222 } else if (node->data == NULL) { 1223 /* 1224 * A interior node without data. Leave linked to 1225 * to be cleaned up when node->down becomes NULL. 1226 */ 1227 ISC_LIST_APPEND(rbtdb->deadnodes[bucketnum], node, 1228 deadlink); 1229 } 1230 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]); 1231 count--; 1232 } 1233 } 1234 1235 /* 1236 * This function is assumed to be called when a node is newly referenced 1237 * and can be in the deadnode list. In that case the node must be retrieved 1238 * from the list because it is going to be used. In addition, if the caller 1239 * happens to hold a write lock on the tree, it's a good chance to purge dead 1240 * nodes. 1241 * Note: while a new reference is gained in multiple places, there are only very 1242 * few cases where the node can be in the deadnode list (only empty nodes can 1243 * have been added to the list). 1244 */ 1245 static void 1246 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 1247 isc_rwlocktype_t tlocktype DNS__DB_FLARG) { 1248 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1249 isc_rwlock_t *nodelock = &rbtdb->node_locks[node->locknum].lock; 1250 bool maybe_cleanup = false; 1251 1252 POST(nlocktype); 1253 1254 NODE_RDLOCK(nodelock, &nlocktype); 1255 1256 /* 1257 * Check if we can possibly cleanup the dead node. If so, upgrade 1258 * the node lock below to perform the cleanup. 1259 */ 1260 if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) && 1261 tlocktype == isc_rwlocktype_write) 1262 { 1263 maybe_cleanup = true; 1264 } 1265 1266 if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) { 1267 /* 1268 * Upgrade the lock and test if we still need to unlink. 1269 */ 1270 NODE_FORCEUPGRADE(nodelock, &nlocktype); 1271 POST(nlocktype); 1272 if (ISC_LINK_LINKED(node, deadlink)) { 1273 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node, 1274 deadlink); 1275 } 1276 if (maybe_cleanup) { 1277 cleanup_dead_nodes(rbtdb, 1278 node->locknum DNS__DB_FILELINE); 1279 } 1280 } 1281 1282 dns__rbtdb_newref(rbtdb, node, nlocktype DNS__DB_FLARG_PASS); 1283 1284 NODE_UNLOCK(nodelock, &nlocktype); 1285 } 1286 1287 /* 1288 * Caller must be holding the node lock; either the read or write lock. 1289 * Note that the lock must be held even when node references are 1290 * atomically modified; in that case the decrement operation itself does not 1291 * have to be protected, but we must avoid a race condition where multiple 1292 * threads are decreasing the reference to zero simultaneously and at least 1293 * one of them is going to free the node. 1294 * 1295 * This function returns true if and only if the node reference decreases 1296 * to zero. 1297 * 1298 * NOTE: Decrementing the reference count of a node to zero does not mean it 1299 * will be immediately freed. 1300 */ 1301 bool 1302 dns__rbtdb_decref(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 1303 uint32_t least_serial, isc_rwlocktype_t *nlocktypep, 1304 isc_rwlocktype_t *tlocktypep, bool tryupgrade, 1305 bool pruning DNS__DB_FLARG) { 1306 isc_result_t result; 1307 bool locked = *tlocktypep != isc_rwlocktype_none; 1308 bool write_locked = false; 1309 db_nodelock_t *nodelock = NULL; 1310 int bucket = node->locknum; 1311 bool no_reference = true; 1312 uint_fast32_t refs; 1313 1314 REQUIRE(*nlocktypep != isc_rwlocktype_none); 1315 1316 nodelock = &rbtdb->node_locks[bucket]; 1317 1318 #define KEEP_NODE(n, r, l) \ 1319 ((n)->data != NULL || ((l) && (n)->down != NULL) || \ 1320 (n) == (r)->origin_node || (n) == (r)->nsec3_origin_node) 1321 1322 /* Handle easy and typical case first. */ 1323 if (!node->dirty && KEEP_NODE(node, rbtdb, locked)) { 1324 refs = isc_refcount_decrement(&node->references); 1325 #if DNS_DB_NODETRACE 1326 fprintf(stderr, 1327 "decr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", 1328 func, file, line, node, refs - 1); 1329 #else 1330 UNUSED(refs); 1331 #endif 1332 if (refs == 1) { 1333 refs = isc_refcount_decrement(&nodelock->references); 1334 #if DNS_DB_NODETRACE 1335 fprintf(stderr, 1336 "decr:nodelock:%s:%s:%u:%p:%p->references = " 1337 "%" PRIuFAST32 "\n", 1338 func, file, line, node, nodelock, refs - 1); 1339 #else 1340 UNUSED(refs); 1341 #endif 1342 return true; 1343 } else { 1344 return false; 1345 } 1346 } 1347 1348 /* Upgrade the lock? */ 1349 if (*nlocktypep == isc_rwlocktype_read) { 1350 NODE_FORCEUPGRADE(&nodelock->lock, nlocktypep); 1351 } 1352 1353 refs = isc_refcount_decrement(&node->references); 1354 #if DNS_DB_NODETRACE 1355 fprintf(stderr, "decr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", 1356 func, file, line, node, refs - 1); 1357 #else 1358 UNUSED(refs); 1359 #endif 1360 if (refs > 1) { 1361 return false; 1362 } 1363 1364 if (node->dirty) { 1365 if (IS_CACHE(rbtdb)) { 1366 clean_cache_node(rbtdb, node); 1367 } else { 1368 if (least_serial == 0) { 1369 /* 1370 * Caller doesn't know the least serial. 1371 * Get it. 1372 */ 1373 RWLOCK(&rbtdb->lock, isc_rwlocktype_read); 1374 least_serial = rbtdb->least_serial; 1375 RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); 1376 } 1377 clean_zone_node(node, least_serial); 1378 } 1379 } 1380 1381 /* 1382 * Attempt to switch to a write lock on the tree. If this fails, 1383 * we will add this node to a linked list of nodes in this locking 1384 * bucket which we will free later. 1385 * 1386 * Locking hierarchy notwithstanding, we don't need to free 1387 * the node lock before acquiring the tree write lock because 1388 * we only do a trylock. 1389 */ 1390 /* We are allowed to upgrade the tree lock */ 1391 switch (*tlocktypep) { 1392 case isc_rwlocktype_write: 1393 result = ISC_R_SUCCESS; 1394 break; 1395 case isc_rwlocktype_read: 1396 if (tryupgrade) { 1397 result = TREE_TRYUPGRADE(&rbtdb->tree_lock, tlocktypep); 1398 } else { 1399 result = ISC_R_LOCKBUSY; 1400 } 1401 break; 1402 case isc_rwlocktype_none: 1403 result = TREE_TRYWRLOCK(&rbtdb->tree_lock, tlocktypep); 1404 break; 1405 default: 1406 UNREACHABLE(); 1407 } 1408 RUNTIME_CHECK(result == ISC_R_SUCCESS || result == ISC_R_LOCKBUSY); 1409 if (result == ISC_R_SUCCESS) { 1410 write_locked = true; 1411 } 1412 1413 refs = isc_refcount_decrement(&nodelock->references); 1414 #if DNS_DB_NODETRACE 1415 fprintf(stderr, 1416 "decr:nodelock:%s:%s:%u:%p:%p->references = %" PRIuFAST32 "\n", 1417 func, file, line, node, nodelock, refs - 1); 1418 #else 1419 UNUSED(refs); 1420 #endif 1421 1422 if (KEEP_NODE(node, rbtdb, locked || write_locked)) { 1423 goto restore_locks; 1424 } 1425 1426 #undef KEEP_NODE 1427 1428 if (write_locked) { 1429 /* 1430 * If this node is the only one left on its RBTDB level, 1431 * attempt pruning the RBTDB (i.e. deleting empty nodes that 1432 * are ancestors of 'node' and are not interior nodes) starting 1433 * from this node (see prune_tree()). The main reason this is 1434 * not done immediately, but asynchronously, is that the 1435 * ancestors of 'node' are almost guaranteed to belong to 1436 * different node buckets and we don't want to do juggle locks 1437 * right now. 1438 * 1439 * Since prune_tree() also calls dns__rbtdb_decref(), check the 1440 * value of the 'pruning' parameter (which is only set to 1441 * 'true' in the dns__rbtdb_decref() call present in 1442 * prune_tree()) to prevent an infinite loop and to allow a 1443 * node sent to prune_tree() to be deleted by the delete_node() 1444 * call in the code branch below. 1445 */ 1446 if (!pruning && is_last_node_on_its_level(node) && 1447 rbtdb->loop != NULL) 1448 { 1449 send_to_prune_tree(rbtdb, node, 1450 *nlocktypep DNS__DB_FLARG_PASS); 1451 no_reference = false; 1452 } else { 1453 /* 1454 * The node can now be deleted. 1455 */ 1456 delete_node(rbtdb, node); 1457 } 1458 } else { 1459 INSIST(node->data == NULL); 1460 if (!ISC_LINK_LINKED(node, deadlink)) { 1461 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, 1462 deadlink); 1463 } 1464 } 1465 1466 restore_locks: 1467 /* 1468 * Relock a read lock, or unlock the write lock if no lock was held. 1469 */ 1470 if (!locked && write_locked) { 1471 TREE_UNLOCK(&rbtdb->tree_lock, tlocktypep); 1472 } 1473 1474 return no_reference; 1475 } 1476 1477 /* 1478 * Prune the RBTDB tree of trees. Start by attempting to delete a node that is 1479 * the only one left on its RBTDB level (see the send_to_prune_tree() call in 1480 * dns__rbtdb_decref()). Then, if the node has a parent (which can either 1481 * exist on the same RBTDB level or on an upper RBTDB level), check whether the 1482 * latter is an interior node (i.e. a node with a non-NULL 'down' pointer). If 1483 * the parent node is not an interior node, attempt deleting the parent node as 1484 * well and then move on to examining the parent node's parent, etc. Continue 1485 * traversing the RBTDB tree until a node is encountered that is still an 1486 * interior node after the previously-processed node gets deleted. 1487 * 1488 * It is acceptable for a node sent to this function to NOT be deleted in the 1489 * process (e.g. if it gets reactivated in the meantime). Furthermore, node 1490 * deletion is not a prerequisite for continuing RBTDB traversal. 1491 * 1492 * This function gets called once for every "starting node" and it continues 1493 * traversing the RBTDB until the stop condition is met. In the worst case, 1494 * the number of nodes processed by a single execution of this function is the 1495 * number of tree levels, which is at most the maximum number of domain name 1496 * labels (127); however, it should be much smaller in practice and deleting 1497 * empty RBTDB nodes is critical to keeping the amount of memory used by the 1498 * cache memory context within the configured limit anyway. 1499 */ 1500 static void 1501 prune_tree(void *arg) { 1502 rbtdb_prune_t *prune = (rbtdb_prune_t *)arg; 1503 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)prune->db; 1504 dns_rbtnode_t *node = prune->node; 1505 dns_rbtnode_t *parent = NULL; 1506 unsigned int locknum = node->locknum; 1507 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1508 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1509 1510 isc_mem_put(rbtdb->common.mctx, prune, sizeof(*prune)); 1511 1512 TREE_WRLOCK(&rbtdb->tree_lock, &tlocktype); 1513 NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); 1514 do { 1515 parent = node->parent; 1516 dns__rbtdb_decref(rbtdb, node, 0, &nlocktype, &tlocktype, true, 1517 true DNS__DB_FILELINE); 1518 1519 /* 1520 * Check whether the parent is an interior node. Note that it 1521 * might have been one before the dns__rbtdb_decref() call on 1522 * the previous line, but decrementing the reference count for 1523 * 'node' could have caused 'node->parent->down' to become 1524 * NULL. 1525 */ 1526 if (parent != NULL && parent->down == NULL) { 1527 /* 1528 * Keep the node lock if possible; otherwise, release 1529 * the old lock and acquire one for the parent. 1530 */ 1531 if (parent->locknum != locknum) { 1532 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, 1533 &nlocktype); 1534 locknum = parent->locknum; 1535 NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, 1536 &nlocktype); 1537 } 1538 1539 /* 1540 * We need to gain a reference to the parent node 1541 * before decrementing it in the next iteration. 1542 */ 1543 dns__rbtdb_newref(rbtdb, parent, 1544 nlocktype DNS__DB_FLARG_PASS); 1545 } else { 1546 parent = NULL; 1547 } 1548 1549 node = parent; 1550 } while (node != NULL); 1551 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); 1552 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 1553 1554 dns_db_detach((dns_db_t **)&rbtdb); 1555 } 1556 1557 static void 1558 make_least_version(dns_rbtdb_t *rbtdb, dns_rbtdb_version_t *version, 1559 rbtdb_changedlist_t *cleanup_list) { 1560 /* 1561 * Caller must be holding the database lock. 1562 */ 1563 1564 rbtdb->least_serial = version->serial; 1565 *cleanup_list = version->changed_list; 1566 ISC_LIST_INIT(version->changed_list); 1567 } 1568 1569 static void 1570 cleanup_nondirty(dns_rbtdb_version_t *version, 1571 rbtdb_changedlist_t *cleanup_list) { 1572 rbtdb_changed_t *changed = NULL, *next_changed = NULL; 1573 1574 /* 1575 * If the changed record is dirty, then 1576 * an update created multiple versions of 1577 * a given rdataset. We keep this list 1578 * until we're the least open version, at 1579 * which point it's safe to get rid of any 1580 * older versions. 1581 * 1582 * If the changed record isn't dirty, then 1583 * we don't need it anymore since we're 1584 * committing and not rolling back. 1585 * 1586 * The caller must be holding the database lock. 1587 */ 1588 for (changed = HEAD(version->changed_list); changed != NULL; 1589 changed = next_changed) 1590 { 1591 next_changed = NEXT(changed, link); 1592 if (!changed->dirty) { 1593 UNLINK(version->changed_list, changed, link); 1594 APPEND(*cleanup_list, changed, link); 1595 } 1596 } 1597 } 1598 1599 void 1600 dns__rbtdb_setsecure(dns_db_t *db, dns_rbtdb_version_t *version, 1601 dns_dbnode_t *origin) { 1602 dns_rdataset_t keyset; 1603 dns_rdataset_t nsecset, signsecset; 1604 bool haszonekey = false; 1605 bool hasnsec = false; 1606 isc_result_t result; 1607 1608 REQUIRE(version != NULL); 1609 1610 dns_rdataset_init(&keyset); 1611 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey, 1612 0, 0, &keyset, NULL); 1613 if (result == ISC_R_SUCCESS) { 1614 result = dns_rdataset_first(&keyset); 1615 while (result == ISC_R_SUCCESS) { 1616 dns_rdata_t keyrdata = DNS_RDATA_INIT; 1617 dns_rdataset_current(&keyset, &keyrdata); 1618 if (dns_zonekey_iszonekey(&keyrdata)) { 1619 haszonekey = true; 1620 break; 1621 } 1622 result = dns_rdataset_next(&keyset); 1623 } 1624 dns_rdataset_disassociate(&keyset); 1625 } 1626 if (!haszonekey) { 1627 version->secure = false; 1628 version->havensec3 = false; 1629 return; 1630 } 1631 1632 dns_rdataset_init(&nsecset); 1633 dns_rdataset_init(&signsecset); 1634 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec, 0, 1635 0, &nsecset, &signsecset); 1636 if (result == ISC_R_SUCCESS) { 1637 if (dns_rdataset_isassociated(&signsecset)) { 1638 hasnsec = true; 1639 dns_rdataset_disassociate(&signsecset); 1640 } 1641 dns_rdataset_disassociate(&nsecset); 1642 } 1643 1644 setnsec3parameters(db, version); 1645 1646 /* 1647 * Do we have a valid NSEC/NSEC3 chain? 1648 */ 1649 if (version->havensec3 || hasnsec) { 1650 version->secure = true; 1651 } else { 1652 version->secure = false; 1653 } 1654 } 1655 1656 /*%< 1657 * Walk the origin node looking for NSEC3PARAM records. 1658 * Cache the nsec3 parameters. 1659 */ 1660 static void 1661 setnsec3parameters(dns_db_t *db, dns_rbtdb_version_t *version) { 1662 dns_rbtnode_t *node = NULL; 1663 dns_rdata_nsec3param_t nsec3param; 1664 dns_rdata_t rdata = DNS_RDATA_INIT; 1665 isc_region_t region; 1666 isc_result_t result; 1667 dns_slabheader_t *header = NULL, *header_next = NULL; 1668 unsigned char *raw; /* RDATASLAB */ 1669 unsigned int count, length; 1670 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1671 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1672 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1673 1674 TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); 1675 version->havensec3 = false; 1676 node = rbtdb->origin_node; 1677 NODE_RDLOCK(&(rbtdb->node_locks[node->locknum].lock), &nlocktype); 1678 for (header = node->data; header != NULL; header = header_next) { 1679 header_next = header->next; 1680 do { 1681 if (header->serial <= version->serial && 1682 !IGNORE(header)) 1683 { 1684 if (NONEXISTENT(header)) { 1685 header = NULL; 1686 } 1687 break; 1688 } else { 1689 header = header->down; 1690 } 1691 } while (header != NULL); 1692 1693 if (header != NULL && 1694 (header->type == dns_rdatatype_nsec3param)) 1695 { 1696 /* 1697 * Find A NSEC3PARAM with a supported algorithm. 1698 */ 1699 raw = dns_slabheader_raw(header); 1700 count = raw[0] * 256 + raw[1]; /* count */ 1701 raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH; 1702 while (count-- > 0U) { 1703 length = raw[0] * 256 + raw[1]; 1704 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; 1705 region.base = raw; 1706 region.length = length; 1707 raw += length; 1708 dns_rdata_fromregion( 1709 &rdata, rbtdb->common.rdclass, 1710 dns_rdatatype_nsec3param, ®ion); 1711 result = dns_rdata_tostruct(&rdata, &nsec3param, 1712 NULL); 1713 INSIST(result == ISC_R_SUCCESS); 1714 dns_rdata_reset(&rdata); 1715 1716 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG && 1717 !dns_nsec3_supportedhash(nsec3param.hash)) 1718 { 1719 continue; 1720 } 1721 1722 if (nsec3param.flags != 0) { 1723 continue; 1724 } 1725 1726 memmove(version->salt, nsec3param.salt, 1727 nsec3param.salt_length); 1728 version->hash = nsec3param.hash; 1729 version->salt_length = nsec3param.salt_length; 1730 version->iterations = nsec3param.iterations; 1731 version->flags = nsec3param.flags; 1732 version->havensec3 = true; 1733 /* 1734 * Look for a better algorithm than the 1735 * unknown test algorithm. 1736 */ 1737 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG) { 1738 goto unlock; 1739 } 1740 } 1741 } 1742 } 1743 unlock: 1744 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), &nlocktype); 1745 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 1746 } 1747 1748 static void 1749 cleanup_dead_nodes_callback(void *arg) { 1750 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)arg; 1751 bool again = false; 1752 unsigned int locknum; 1753 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1754 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1755 1756 TREE_WRLOCK(&rbtdb->tree_lock, &tlocktype); 1757 for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) { 1758 NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); 1759 cleanup_dead_nodes(rbtdb, locknum DNS__DB_FILELINE); 1760 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL) { 1761 again = true; 1762 } 1763 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); 1764 } 1765 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 1766 if (again) { 1767 isc_async_run(rbtdb->loop, cleanup_dead_nodes_callback, rbtdb); 1768 } else { 1769 dns_db_detach((dns_db_t **)&rbtdb); 1770 } 1771 } 1772 1773 void 1774 dns__rbtdb_closeversion(dns_db_t *db, dns_dbversion_t **versionp, 1775 bool commit DNS__DB_FLARG) { 1776 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1777 dns_rbtdb_version_t *version = NULL, *cleanup_version = NULL; 1778 dns_rbtdb_version_t *least_greater = NULL; 1779 bool rollback = false; 1780 rbtdb_changedlist_t cleanup_list; 1781 dns_slabheaderlist_t resigned_list; 1782 rbtdb_changed_t *changed = NULL, *next_changed = NULL; 1783 uint32_t serial, least_serial; 1784 dns_rbtnode_t *rbtnode = NULL; 1785 dns_slabheader_t *header = NULL; 1786 1787 REQUIRE(VALID_RBTDB(rbtdb)); 1788 version = (dns_rbtdb_version_t *)*versionp; 1789 INSIST(version->rbtdb == rbtdb); 1790 1791 ISC_LIST_INIT(cleanup_list); 1792 ISC_LIST_INIT(resigned_list); 1793 1794 if (isc_refcount_decrement(&version->references) > 1) { 1795 /* typical and easy case first */ 1796 if (commit) { 1797 RWLOCK(&rbtdb->lock, isc_rwlocktype_read); 1798 INSIST(!version->writer); 1799 RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); 1800 } 1801 goto end; 1802 } 1803 1804 /* 1805 * Update the zone's secure status in version before making 1806 * it the current version. 1807 */ 1808 if (version->writer && commit && !IS_CACHE(rbtdb)) { 1809 dns__rbtdb_setsecure(db, version, rbtdb->origin_node); 1810 } 1811 1812 RWLOCK(&rbtdb->lock, isc_rwlocktype_write); 1813 serial = version->serial; 1814 if (version->writer) { 1815 if (commit) { 1816 unsigned int cur_ref; 1817 dns_rbtdb_version_t *cur_version = NULL; 1818 1819 INSIST(version->commit_ok); 1820 INSIST(version == rbtdb->future_version); 1821 /* 1822 * The current version is going to be replaced. 1823 * Release the (likely last) reference to it from the 1824 * DB itself and unlink it from the open list. 1825 */ 1826 cur_version = rbtdb->current_version; 1827 cur_ref = isc_refcount_decrement( 1828 &cur_version->references); 1829 if (cur_ref == 1) { 1830 (void)isc_refcount_current( 1831 &cur_version->references); 1832 if (cur_version->serial == rbtdb->least_serial) 1833 { 1834 INSIST(EMPTY( 1835 cur_version->changed_list)); 1836 } 1837 UNLINK(rbtdb->open_versions, cur_version, link); 1838 } 1839 if (EMPTY(rbtdb->open_versions)) { 1840 /* 1841 * We're going to become the least open 1842 * version. 1843 */ 1844 make_least_version(rbtdb, version, 1845 &cleanup_list); 1846 } else { 1847 /* 1848 * Some other open version is the 1849 * least version. We can't cleanup 1850 * records that were changed in this 1851 * version because the older versions 1852 * may still be in use by an open 1853 * version. 1854 * 1855 * We can, however, discard the 1856 * changed records for things that 1857 * we've added that didn't exist in 1858 * prior versions. 1859 */ 1860 cleanup_nondirty(version, &cleanup_list); 1861 } 1862 /* 1863 * If the (soon to be former) current version 1864 * isn't being used by anyone, we can clean 1865 * it up. 1866 */ 1867 if (cur_ref == 1) { 1868 cleanup_version = cur_version; 1869 APPENDLIST(version->changed_list, 1870 cleanup_version->changed_list, link); 1871 } 1872 /* 1873 * Become the current version. 1874 */ 1875 version->writer = false; 1876 rbtdb->current_version = version; 1877 rbtdb->current_serial = version->serial; 1878 rbtdb->future_version = NULL; 1879 1880 /* 1881 * Keep the current version in the open list, and 1882 * gain a reference for the DB itself (see the DB 1883 * creation function below). This must be the only 1884 * case where we need to increment the counter from 1885 * zero and need to use isc_refcount_increment0(). 1886 */ 1887 INSIST(isc_refcount_increment0(&version->references) == 1888 0); 1889 PREPEND(rbtdb->open_versions, rbtdb->current_version, 1890 link); 1891 resigned_list = version->resigned_list; 1892 ISC_LIST_INIT(version->resigned_list); 1893 } else { 1894 /* 1895 * We're rolling back this transaction. 1896 */ 1897 cleanup_list = version->changed_list; 1898 ISC_LIST_INIT(version->changed_list); 1899 resigned_list = version->resigned_list; 1900 ISC_LIST_INIT(version->resigned_list); 1901 rollback = true; 1902 cleanup_version = version; 1903 rbtdb->future_version = NULL; 1904 } 1905 } else { 1906 if (version != rbtdb->current_version) { 1907 /* 1908 * There are no external or internal references 1909 * to this version and it can be cleaned up. 1910 */ 1911 cleanup_version = version; 1912 1913 /* 1914 * Find the version with the least serial 1915 * number greater than ours. 1916 */ 1917 least_greater = PREV(version, link); 1918 if (least_greater == NULL) { 1919 least_greater = rbtdb->current_version; 1920 } 1921 1922 INSIST(version->serial < least_greater->serial); 1923 /* 1924 * Is this the least open version? 1925 */ 1926 if (version->serial == rbtdb->least_serial) { 1927 /* 1928 * Yes. Install the new least open 1929 * version. 1930 */ 1931 make_least_version(rbtdb, least_greater, 1932 &cleanup_list); 1933 } else { 1934 /* 1935 * Add any unexecuted cleanups to 1936 * those of the least greater version. 1937 */ 1938 APPENDLIST(least_greater->changed_list, 1939 version->changed_list, link); 1940 } 1941 } else if (version->serial == rbtdb->least_serial) { 1942 INSIST(EMPTY(version->changed_list)); 1943 } 1944 UNLINK(rbtdb->open_versions, version, link); 1945 } 1946 least_serial = rbtdb->least_serial; 1947 RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); 1948 1949 if (cleanup_version != NULL) { 1950 isc_refcount_destroy(&cleanup_version->references); 1951 INSIST(EMPTY(cleanup_version->changed_list)); 1952 free_gluetable(cleanup_version->glue_table); 1953 isc_rwlock_destroy(&cleanup_version->rwlock); 1954 isc_mem_put(rbtdb->common.mctx, cleanup_version, 1955 sizeof(*cleanup_version)); 1956 } 1957 1958 /* 1959 * Commit/rollback re-signed headers. 1960 */ 1961 for (header = HEAD(resigned_list); header != NULL; 1962 header = HEAD(resigned_list)) 1963 { 1964 isc_rwlock_t *lock = NULL; 1965 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1966 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1967 1968 ISC_LIST_UNLINK(resigned_list, header, link); 1969 1970 lock = &rbtdb->node_locks[RBTDB_HEADERNODE(header)->locknum] 1971 .lock; 1972 NODE_WRLOCK(lock, &nlocktype); 1973 if (rollback && !IGNORE(header)) { 1974 dns__zonerbt_resigninsert( 1975 rbtdb, RBTDB_HEADERNODE(header)->locknum, 1976 header); 1977 } 1978 dns__rbtdb_decref(rbtdb, RBTDB_HEADERNODE(header), least_serial, 1979 &nlocktype, &tlocktype, true, 1980 false DNS__DB_FLARG_PASS); 1981 NODE_UNLOCK(lock, &nlocktype); 1982 INSIST(tlocktype == isc_rwlocktype_none); 1983 } 1984 1985 if (!EMPTY(cleanup_list)) { 1986 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1987 1988 if (rbtdb->loop == NULL) { 1989 /* 1990 * We acquire a tree write lock here in order to make 1991 * sure that stale nodes will be removed in 1992 * dns__rbtdb_decref(). If we didn't have the lock, 1993 * those nodes could miss the chance to be removed 1994 * until the server stops. The write lock is 1995 * expensive, but this should be rare enough 1996 * to justify the cost. 1997 */ 1998 TREE_WRLOCK(&rbtdb->tree_lock, &tlocktype); 1999 } 2000 2001 for (changed = HEAD(cleanup_list); changed != NULL; 2002 changed = next_changed) 2003 { 2004 isc_rwlock_t *lock = NULL; 2005 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 2006 2007 next_changed = NEXT(changed, link); 2008 rbtnode = changed->node; 2009 lock = &rbtdb->node_locks[rbtnode->locknum].lock; 2010 2011 NODE_WRLOCK(lock, &nlocktype); 2012 /* 2013 * This is a good opportunity to purge any dead nodes, 2014 * so use it. 2015 */ 2016 if (rbtdb->loop == NULL) { 2017 cleanup_dead_nodes( 2018 rbtdb, 2019 rbtnode->locknum DNS__DB_FLARG_PASS); 2020 } 2021 2022 if (rollback) { 2023 rollback_node(rbtnode, serial); 2024 } 2025 dns__rbtdb_decref(rbtdb, rbtnode, least_serial, 2026 &nlocktype, &tlocktype, true, 2027 false DNS__DB_FILELINE); 2028 2029 NODE_UNLOCK(lock, &nlocktype); 2030 2031 isc_mem_put(rbtdb->common.mctx, changed, 2032 sizeof(*changed)); 2033 } 2034 if (rbtdb->loop != NULL) { 2035 dns_db_attach((dns_db_t *)rbtdb, &(dns_db_t *){ NULL }); 2036 isc_async_run(rbtdb->loop, cleanup_dead_nodes_callback, 2037 rbtdb); 2038 } else { 2039 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 2040 } 2041 2042 INSIST(tlocktype == isc_rwlocktype_none); 2043 } 2044 2045 end: 2046 *versionp = NULL; 2047 } 2048 2049 isc_result_t 2050 dns__rbtdb_findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, 2051 const dns_name_t *name, bool create, 2052 dns_dbnode_t **nodep DNS__DB_FLARG) { 2053 dns_rbtnode_t *node = NULL; 2054 dns_name_t nodename; 2055 isc_result_t result; 2056 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 2057 2058 INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3); 2059 2060 dns_name_init(&nodename, NULL); 2061 TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); 2062 result = dns_rbt_findnode(tree, name, NULL, &node, NULL, 2063 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 2064 if (result != ISC_R_SUCCESS) { 2065 if (!create) { 2066 if (result == DNS_R_PARTIALMATCH) { 2067 result = ISC_R_NOTFOUND; 2068 } 2069 goto unlock; 2070 } 2071 /* 2072 * Try to upgrade the lock and if that fails unlock then relock. 2073 */ 2074 TREE_FORCEUPGRADE(&rbtdb->tree_lock, &tlocktype); 2075 node = NULL; 2076 result = dns_rbt_addnode(tree, name, &node); 2077 if (result == ISC_R_SUCCESS) { 2078 dns_rbt_namefromnode(node, &nodename); 2079 node->locknum = node->hashval % rbtdb->node_lock_count; 2080 if (tree == rbtdb->tree) { 2081 dns__zonerbt_addwildcards(rbtdb, name, true); 2082 2083 if (dns_name_iswildcard(name)) { 2084 result = dns__zonerbt_wildcardmagic( 2085 rbtdb, name, true); 2086 if (result != ISC_R_SUCCESS) { 2087 goto unlock; 2088 } 2089 } 2090 } 2091 if (tree == rbtdb->nsec3) { 2092 node->nsec = DNS_DB_NSEC_NSEC3; 2093 } 2094 } else if (result == ISC_R_EXISTS) { 2095 result = ISC_R_SUCCESS; 2096 } else { 2097 goto unlock; 2098 } 2099 } 2100 2101 if (tree == rbtdb->nsec3) { 2102 INSIST(node->nsec == DNS_DB_NSEC_NSEC3); 2103 } 2104 2105 reactivate_node(rbtdb, node, tlocktype DNS__DB_FLARG_PASS); 2106 2107 *nodep = (dns_dbnode_t *)node; 2108 unlock: 2109 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 2110 2111 return result; 2112 } 2113 2114 isc_result_t 2115 dns__rbtdb_findnode(dns_db_t *db, const dns_name_t *name, bool create, 2116 dns_dbnode_t **nodep DNS__DB_FLARG) { 2117 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 2118 2119 REQUIRE(VALID_RBTDB(rbtdb)); 2120 2121 return dns__rbtdb_findnodeintree(rbtdb, rbtdb->tree, name, create, 2122 nodep DNS__DB_FLARG_PASS); 2123 } 2124 2125 void 2126 dns__rbtdb_bindrdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 2127 dns_slabheader_t *header, isc_stdtime_t now, 2128 isc_rwlocktype_t locktype, 2129 dns_rdataset_t *rdataset DNS__DB_FLARG) { 2130 bool stale = STALE(header); 2131 bool ancient = ANCIENT(header); 2132 2133 /* 2134 * Caller must be holding the node reader lock. 2135 * XXXJT: technically, we need a writer lock, since we'll increment 2136 * the header count below. However, since the actual counter value 2137 * doesn't matter, we prioritize performance here. (We may want to 2138 * use atomic increment when available). 2139 */ 2140 2141 if (rdataset == NULL) { 2142 return; 2143 } 2144 2145 dns__rbtdb_newref(rbtdb, node, locktype DNS__DB_FLARG_PASS); 2146 2147 INSIST(rdataset->methods == NULL); /* We must be disassociated. */ 2148 2149 /* 2150 * Mark header stale or ancient if the RRset is no longer active. 2151 */ 2152 if (!ACTIVE(header, now)) { 2153 dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, rbtdb); 2154 /* 2155 * If this data is in the stale window keep it and if 2156 * DNS_DBFIND_STALEOK is not set we tell the caller to 2157 * skip this record. We skip the records with ZEROTTL 2158 * (these records should not be cached anyway). 2159 */ 2160 2161 if (KEEPSTALE(rbtdb) && stale_ttl > now) { 2162 stale = true; 2163 } else { 2164 /* 2165 * We are not keeping stale, or it is outside the 2166 * stale window. Mark ancient, i.e. ready for cleanup. 2167 */ 2168 ancient = true; 2169 } 2170 } 2171 2172 rdataset->methods = &dns_rdataslab_rdatasetmethods; 2173 rdataset->rdclass = rbtdb->common.rdclass; 2174 rdataset->type = DNS_TYPEPAIR_TYPE(header->type); 2175 rdataset->covers = DNS_TYPEPAIR_COVERS(header->type); 2176 rdataset->ttl = header->ttl - now; 2177 rdataset->trust = header->trust; 2178 2179 if (NEGATIVE(header)) { 2180 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE; 2181 } 2182 if (NXDOMAIN(header)) { 2183 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN; 2184 } 2185 if (OPTOUT(header)) { 2186 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT; 2187 } 2188 if (PREFETCH(header)) { 2189 rdataset->attributes |= DNS_RDATASETATTR_PREFETCH; 2190 } 2191 2192 if (stale && !ancient) { 2193 dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, rbtdb); 2194 if (stale_ttl > now) { 2195 rdataset->ttl = stale_ttl - now; 2196 } else { 2197 rdataset->ttl = 0; 2198 } 2199 if (STALE_WINDOW(header)) { 2200 rdataset->attributes |= DNS_RDATASETATTR_STALE_WINDOW; 2201 } 2202 rdataset->attributes |= DNS_RDATASETATTR_STALE; 2203 } else if (IS_CACHE(rbtdb) && !ACTIVE(header, now)) { 2204 rdataset->attributes |= DNS_RDATASETATTR_ANCIENT; 2205 rdataset->ttl = header->ttl; 2206 } 2207 2208 rdataset->count = atomic_fetch_add_relaxed(&header->count, 1); 2209 2210 rdataset->slab.db = (dns_db_t *)rbtdb; 2211 rdataset->slab.node = (dns_dbnode_t *)node; 2212 rdataset->slab.raw = dns_slabheader_raw(header); 2213 rdataset->slab.iter_pos = NULL; 2214 rdataset->slab.iter_count = 0; 2215 2216 /* 2217 * Add noqname proof. 2218 */ 2219 rdataset->slab.noqname = header->noqname; 2220 if (header->noqname != NULL) { 2221 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME; 2222 } 2223 rdataset->slab.closest = header->closest; 2224 if (header->closest != NULL) { 2225 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST; 2226 } 2227 2228 /* 2229 * Copy out re-signing information. 2230 */ 2231 if (RESIGN(header)) { 2232 rdataset->attributes |= DNS_RDATASETATTR_RESIGN; 2233 rdataset->resign = (header->resign << 1) | header->resign_lsb; 2234 } else { 2235 rdataset->resign = 0; 2236 } 2237 } 2238 2239 void 2240 dns__rbtdb_attachnode(dns_db_t *db, dns_dbnode_t *source, 2241 dns_dbnode_t **targetp DNS__DB_FLARG) { 2242 REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); 2243 REQUIRE(targetp != NULL && *targetp == NULL); 2244 2245 dns_rbtnode_t *node = (dns_rbtnode_t *)source; 2246 uint_fast32_t refs = isc_refcount_increment(&node->references); 2247 2248 #if DNS_DB_NODETRACE 2249 fprintf(stderr, "incr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", 2250 func, file, line, node, refs + 1); 2251 #else 2252 UNUSED(refs); 2253 #endif 2254 2255 *targetp = source; 2256 } 2257 2258 void 2259 dns__rbtdb_detachnode(dns_db_t *db, dns_dbnode_t **targetp DNS__DB_FLARG) { 2260 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 2261 dns_rbtnode_t *node = NULL; 2262 bool want_free = false; 2263 bool inactive = false; 2264 db_nodelock_t *nodelock = NULL; 2265 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 2266 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 2267 2268 REQUIRE(VALID_RBTDB(rbtdb)); 2269 REQUIRE(targetp != NULL && *targetp != NULL); 2270 2271 node = (dns_rbtnode_t *)(*targetp); 2272 nodelock = &rbtdb->node_locks[node->locknum]; 2273 2274 NODE_RDLOCK(&nodelock->lock, &nlocktype); 2275 2276 if (dns__rbtdb_decref(rbtdb, node, 0, &nlocktype, &tlocktype, true, 2277 false DNS__DB_FLARG_PASS)) 2278 { 2279 if (isc_refcount_current(&nodelock->references) == 0 && 2280 nodelock->exiting) 2281 { 2282 inactive = true; 2283 } 2284 } 2285 2286 NODE_UNLOCK(&nodelock->lock, &nlocktype); 2287 INSIST(tlocktype == isc_rwlocktype_none); 2288 2289 *targetp = NULL; 2290 2291 if (inactive) { 2292 RWLOCK(&rbtdb->lock, isc_rwlocktype_write); 2293 rbtdb->active--; 2294 if (rbtdb->active == 0) { 2295 want_free = true; 2296 } 2297 RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); 2298 if (want_free) { 2299 char buf[DNS_NAME_FORMATSIZE]; 2300 if (dns_name_dynamic(&rbtdb->common.origin)) { 2301 dns_name_format(&rbtdb->common.origin, buf, 2302 sizeof(buf)); 2303 } else { 2304 strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 2305 } 2306 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 2307 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 2308 "calling free_rbtdb(%s)", buf); 2309 free_rbtdb(rbtdb, true); 2310 } 2311 } 2312 } 2313 2314 isc_result_t 2315 dns__rbtdb_createiterator(dns_db_t *db, unsigned int options, 2316 dns_dbiterator_t **iteratorp) { 2317 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 2318 rbtdb_dbiterator_t *rbtdbiter = NULL; 2319 2320 REQUIRE(VALID_RBTDB(rbtdb)); 2321 REQUIRE((options & (DNS_DB_NSEC3ONLY | DNS_DB_NONSEC3)) != 2322 (DNS_DB_NSEC3ONLY | DNS_DB_NONSEC3)); 2323 2324 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter)); 2325 2326 rbtdbiter->common.methods = &dbiterator_methods; 2327 rbtdbiter->common.db = NULL; 2328 dns_db_attach(db, &rbtdbiter->common.db); 2329 rbtdbiter->common.relative_names = ((options & DNS_DB_RELATIVENAMES) != 2330 0); 2331 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC; 2332 rbtdbiter->paused = true; 2333 rbtdbiter->tree_locked = isc_rwlocktype_none; 2334 rbtdbiter->result = ISC_R_SUCCESS; 2335 dns_fixedname_init(&rbtdbiter->name); 2336 dns_fixedname_init(&rbtdbiter->origin); 2337 rbtdbiter->node = NULL; 2338 if ((options & DNS_DB_NSEC3ONLY) != 0) { 2339 rbtdbiter->nsec3mode = nsec3only; 2340 } else if ((options & DNS_DB_NONSEC3) != 0) { 2341 rbtdbiter->nsec3mode = nonsec3; 2342 } else { 2343 rbtdbiter->nsec3mode = full; 2344 } 2345 dns_rbtnodechain_init(&rbtdbiter->chain); 2346 dns_rbtnodechain_init(&rbtdbiter->nsec3chain); 2347 if (rbtdbiter->nsec3mode == nsec3only) { 2348 rbtdbiter->current = &rbtdbiter->nsec3chain; 2349 } else { 2350 rbtdbiter->current = &rbtdbiter->chain; 2351 } 2352 2353 *iteratorp = (dns_dbiterator_t *)rbtdbiter; 2354 2355 return ISC_R_SUCCESS; 2356 } 2357 2358 isc_result_t 2359 dns__rbtdb_allrdatasets(dns_db_t *db, dns_dbnode_t *node, 2360 dns_dbversion_t *version, unsigned int options, 2361 isc_stdtime_t now, 2362 dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) { 2363 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 2364 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 2365 dns_rbtdb_version_t *rbtversion = version; 2366 rbtdb_rdatasetiter_t *iterator = NULL; 2367 uint_fast32_t refs; 2368 2369 REQUIRE(VALID_RBTDB(rbtdb)); 2370 2371 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator)); 2372 2373 if ((db->attributes & DNS_DBATTR_CACHE) == 0) { 2374 now = 0; 2375 if (rbtversion == NULL) { 2376 dns__rbtdb_currentversion( 2377 db, (dns_dbversion_t **)(void *)(&rbtversion)); 2378 } else { 2379 INSIST(rbtversion->rbtdb == rbtdb); 2380 2381 (void)isc_refcount_increment(&rbtversion->references); 2382 } 2383 } else { 2384 if (now == 0) { 2385 now = isc_stdtime_now(); 2386 } 2387 rbtversion = NULL; 2388 } 2389 2390 iterator->common.magic = DNS_RDATASETITER_MAGIC; 2391 iterator->common.methods = &rdatasetiter_methods; 2392 iterator->common.db = db; 2393 iterator->common.node = node; 2394 iterator->common.version = (dns_dbversion_t *)rbtversion; 2395 iterator->common.options = options; 2396 iterator->common.now = now; 2397 2398 refs = isc_refcount_increment(&rbtnode->references); 2399 #if DNS_DB_NODETRACE 2400 fprintf(stderr, "incr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", 2401 func, file, line, node, refs + 1); 2402 #else 2403 UNUSED(refs); 2404 #endif 2405 2406 iterator->current = NULL; 2407 2408 *iteratorp = (dns_rdatasetiter_t *)iterator; 2409 2410 return ISC_R_SUCCESS; 2411 } 2412 2413 static bool 2414 cname_and_other_data(dns_rbtnode_t *node, uint32_t serial) { 2415 dns_slabheader_t *header = NULL, *header_next = NULL; 2416 bool cname = false, other_data = false; 2417 dns_rdatatype_t rdtype; 2418 2419 /* 2420 * The caller must hold the node lock. 2421 */ 2422 2423 /* 2424 * Look for CNAME and "other data" rdatasets active in our version. 2425 */ 2426 for (header = node->data; header != NULL; header = header_next) { 2427 header_next = header->next; 2428 if (!prio_type(header->type)) { 2429 /* 2430 * CNAME is in the priority list, so if we are done 2431 * with the priority list, we know there will not be 2432 * CNAME, so we are safe to skip the rest of the types. 2433 */ 2434 return false; 2435 } 2436 if (header->type == dns_rdatatype_cname) { 2437 /* 2438 * Look for an active extant CNAME. 2439 */ 2440 do { 2441 if (header->serial <= serial && !IGNORE(header)) 2442 { 2443 /* 2444 * Is this a "this rdataset doesn't 2445 * exist" record? 2446 */ 2447 if (NONEXISTENT(header)) { 2448 header = NULL; 2449 } 2450 break; 2451 } else { 2452 header = header->down; 2453 } 2454 } while (header != NULL); 2455 if (header != NULL) { 2456 cname = true; 2457 } 2458 } else { 2459 /* 2460 * Look for active extant "other data". 2461 * 2462 * "Other data" is any rdataset whose type is not 2463 * KEY, NSEC, SIG or RRSIG. 2464 */ 2465 rdtype = DNS_TYPEPAIR_TYPE(header->type); 2466 if (rdtype != dns_rdatatype_key && 2467 rdtype != dns_rdatatype_sig && 2468 rdtype != dns_rdatatype_nsec && 2469 rdtype != dns_rdatatype_rrsig) 2470 { 2471 /* 2472 * Is it active and extant? 2473 */ 2474 do { 2475 if (header->serial <= serial && 2476 !IGNORE(header)) 2477 { 2478 /* 2479 * Is this a "this rdataset 2480 * doesn't exist" record? 2481 */ 2482 if (NONEXISTENT(header)) { 2483 header = NULL; 2484 } 2485 break; 2486 } else { 2487 header = header->down; 2488 } 2489 } while (header != NULL); 2490 if (header != NULL) { 2491 other_data = true; 2492 } 2493 } 2494 } 2495 if (cname && other_data) { 2496 return true; 2497 } 2498 } 2499 2500 return false; 2501 } 2502 2503 static uint64_t 2504 recordsize(dns_slabheader_t *header, unsigned int namelen) { 2505 return dns_rdataslab_rdatasize((unsigned char *)header, 2506 sizeof(*header)) + 2507 sizeof(dns_ttl_t) + sizeof(dns_rdatatype_t) + 2508 sizeof(dns_rdataclass_t) + namelen; 2509 } 2510 2511 static void 2512 update_recordsandxfrsize(bool add, dns_rbtdb_version_t *rbtversion, 2513 dns_slabheader_t *header, unsigned int namelen) { 2514 unsigned char *hdr = (unsigned char *)header; 2515 size_t hdrsize = sizeof(*header); 2516 2517 RWLOCK(&rbtversion->rwlock, isc_rwlocktype_write); 2518 if (add) { 2519 rbtversion->records += dns_rdataslab_count(hdr, hdrsize); 2520 rbtversion->xfrsize += recordsize(header, namelen); 2521 } else { 2522 rbtversion->records -= dns_rdataslab_count(hdr, hdrsize); 2523 rbtversion->xfrsize -= recordsize(header, namelen); 2524 } 2525 RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_write); 2526 } 2527 2528 static bool 2529 overmaxtype(dns_rbtdb_t *rbtdb, uint32_t ntypes) { 2530 if (rbtdb->maxtypepername == 0) { 2531 return false; 2532 } 2533 2534 return ntypes >= rbtdb->maxtypepername; 2535 } 2536 2537 static bool 2538 prio_header(dns_slabheader_t *header) { 2539 if (NEGATIVE(header) && prio_type(DNS_TYPEPAIR_COVERS(header->type))) { 2540 return true; 2541 } 2542 2543 return prio_type(header->type); 2544 } 2545 2546 isc_result_t 2547 dns__rbtdb_add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, 2548 const dns_name_t *nodename, dns_rbtdb_version_t *rbtversion, 2549 dns_slabheader_t *newheader, unsigned int options, bool loading, 2550 dns_rdataset_t *addedrdataset, isc_stdtime_t now DNS__DB_FLARG) { 2551 rbtdb_changed_t *changed = NULL; 2552 dns_slabheader_t *topheader = NULL, *topheader_prev = NULL; 2553 dns_slabheader_t *header = NULL, *sigheader = NULL; 2554 dns_slabheader_t *prioheader = NULL, *expireheader = NULL; 2555 unsigned char *merged = NULL; 2556 isc_result_t result; 2557 bool header_nx; 2558 bool newheader_nx; 2559 bool merge; 2560 dns_rdatatype_t rdtype, covers; 2561 dns_typepair_t negtype = 0, sigtype; 2562 dns_trust_t trust; 2563 int idx; 2564 uint32_t ntypes = 0; 2565 2566 if ((options & DNS_DBADD_MERGE) != 0) { 2567 REQUIRE(rbtversion != NULL); 2568 merge = true; 2569 } else { 2570 merge = false; 2571 } 2572 2573 if ((options & DNS_DBADD_FORCE) != 0) { 2574 trust = dns_trust_ultimate; 2575 } else { 2576 trust = newheader->trust; 2577 } 2578 2579 if (rbtversion != NULL && !loading) { 2580 /* 2581 * We always add a changed record, even if no changes end up 2582 * being made to this node, because it's harmless and 2583 * simplifies the code. 2584 */ 2585 changed = add_changed(newheader, rbtversion DNS__DB_FLARG_PASS); 2586 if (changed == NULL) { 2587 dns_slabheader_destroy(&newheader); 2588 return ISC_R_NOMEMORY; 2589 } 2590 } 2591 2592 newheader_nx = NONEXISTENT(newheader) ? true : false; 2593 if (rbtversion == NULL && !newheader_nx) { 2594 rdtype = DNS_TYPEPAIR_TYPE(newheader->type); 2595 covers = DNS_TYPEPAIR_COVERS(newheader->type); 2596 sigtype = DNS_SIGTYPE(covers); 2597 if (NEGATIVE(newheader)) { 2598 /* 2599 * We're adding a negative cache entry. 2600 */ 2601 if (covers == dns_rdatatype_any) { 2602 /* 2603 * If we're adding an negative cache entry 2604 * which covers all types (NXDOMAIN, 2605 * NODATA(QTYPE=ANY)), 2606 * 2607 * We make all other data ancient so that the 2608 * only rdataset that can be found at this 2609 * node is the negative cache entry. 2610 */ 2611 for (topheader = rbtnode->data; 2612 topheader != NULL; 2613 topheader = topheader->next) 2614 { 2615 mark_ancient(topheader); 2616 } 2617 goto find_header; 2618 } 2619 /* 2620 * Otherwise look for any RRSIGs of the given 2621 * type so they can be marked ancient later. 2622 */ 2623 for (topheader = rbtnode->data; topheader != NULL; 2624 topheader = topheader->next) 2625 { 2626 if (topheader->type == sigtype) { 2627 sigheader = topheader; 2628 break; 2629 } 2630 } 2631 negtype = DNS_TYPEPAIR_VALUE(covers, 0); 2632 } else { 2633 /* 2634 * We're adding something that isn't a 2635 * negative cache entry. Look for an extant 2636 * non-ancient NXDOMAIN/NODATA(QTYPE=ANY) negative 2637 * cache entry. If we're adding an RRSIG, also 2638 * check for an extant non-ancient NODATA ncache 2639 * entry which covers the same type as the RRSIG. 2640 */ 2641 for (topheader = rbtnode->data; topheader != NULL; 2642 topheader = topheader->next) 2643 { 2644 if ((topheader->type == RDATATYPE_NCACHEANY) || 2645 (newheader->type == sigtype && 2646 topheader->type == 2647 DNS_TYPEPAIR_VALUE(0, covers))) 2648 { 2649 break; 2650 } 2651 } 2652 if (topheader != NULL && EXISTS(topheader) && 2653 ACTIVE(topheader, now)) 2654 { 2655 /* 2656 * Found one. 2657 */ 2658 if (trust < topheader->trust) { 2659 /* 2660 * The NXDOMAIN/NODATA(QTYPE=ANY) 2661 * is more trusted. 2662 */ 2663 dns_slabheader_destroy(&newheader); 2664 if (addedrdataset != NULL) { 2665 dns__rbtdb_bindrdataset( 2666 rbtdb, rbtnode, 2667 topheader, now, 2668 isc_rwlocktype_write, 2669 addedrdataset 2670 DNS__DB_FLARG_PASS); 2671 } 2672 return DNS_R_UNCHANGED; 2673 } 2674 /* 2675 * The new rdataset is better. Expire the 2676 * ncache entry. 2677 */ 2678 mark_ancient(topheader); 2679 topheader = NULL; 2680 goto find_header; 2681 } 2682 negtype = DNS_TYPEPAIR_VALUE(0, rdtype); 2683 } 2684 } 2685 2686 for (topheader = rbtnode->data; topheader != NULL; 2687 topheader = topheader->next) 2688 { 2689 if (IS_CACHE(rbtdb) && ACTIVE(topheader, now)) { 2690 ++ntypes; 2691 expireheader = topheader; 2692 } else if (!IS_CACHE(rbtdb)) { 2693 ++ntypes; 2694 } 2695 if (prio_header(topheader)) { 2696 prioheader = topheader; 2697 } 2698 if (topheader->type == newheader->type || 2699 topheader->type == negtype) 2700 { 2701 break; 2702 } 2703 topheader_prev = topheader; 2704 } 2705 2706 find_header: 2707 /* 2708 * If header isn't NULL, we've found the right type. There may be 2709 * IGNORE rdatasets between the top of the chain and the first real 2710 * data. We skip over them. 2711 */ 2712 header = topheader; 2713 while (header != NULL && IGNORE(header)) { 2714 header = header->down; 2715 } 2716 if (header != NULL) { 2717 header_nx = NONEXISTENT(header) ? true : false; 2718 2719 /* 2720 * Deleting an already non-existent rdataset has no effect. 2721 */ 2722 if (header_nx && newheader_nx) { 2723 dns_slabheader_destroy(&newheader); 2724 return DNS_R_UNCHANGED; 2725 } 2726 2727 /* 2728 * Trying to add an rdataset with lower trust to a cache 2729 * DB has no effect, provided that the cache data isn't 2730 * stale. If the cache data is stale, new lower trust 2731 * data will supersede it below. Unclear what the best 2732 * policy is here. 2733 */ 2734 if (rbtversion == NULL && trust < header->trust && 2735 (ACTIVE(header, now) || header_nx)) 2736 { 2737 dns_slabheader_destroy(&newheader); 2738 if (addedrdataset != NULL) { 2739 dns__rbtdb_bindrdataset( 2740 rbtdb, rbtnode, header, now, 2741 isc_rwlocktype_write, 2742 addedrdataset DNS__DB_FLARG_PASS); 2743 } 2744 return DNS_R_UNCHANGED; 2745 } 2746 2747 /* 2748 * Don't merge if a nonexistent rdataset is involved. 2749 */ 2750 if (merge && (header_nx || newheader_nx)) { 2751 merge = false; 2752 } 2753 2754 /* 2755 * If 'merge' is true, we'll try to create a new rdataset 2756 * that is the union of 'newheader' and 'header'. 2757 */ 2758 if (merge) { 2759 unsigned int flags = 0; 2760 INSIST(rbtversion->serial >= header->serial); 2761 merged = NULL; 2762 result = ISC_R_SUCCESS; 2763 2764 if ((options & DNS_DBADD_EXACT) != 0) { 2765 flags |= DNS_RDATASLAB_EXACT; 2766 } 2767 /* 2768 * TTL use here is irrelevant to the cache; 2769 * merge is only done with zonedbs. 2770 */ 2771 if ((options & DNS_DBADD_EXACTTTL) != 0 && 2772 newheader->ttl != header->ttl) 2773 { 2774 result = DNS_R_NOTEXACT; 2775 } else if (newheader->ttl != header->ttl) { 2776 flags |= DNS_RDATASLAB_FORCE; 2777 } 2778 if (result == ISC_R_SUCCESS) { 2779 result = dns_rdataslab_merge( 2780 (unsigned char *)header, 2781 (unsigned char *)newheader, 2782 (unsigned int)(sizeof(*newheader)), 2783 rbtdb->common.mctx, 2784 rbtdb->common.rdclass, 2785 (dns_rdatatype_t)header->type, flags, 2786 rbtdb->maxrrperset, &merged); 2787 } 2788 if (result == ISC_R_SUCCESS) { 2789 /* 2790 * If 'header' has the same serial number as 2791 * we do, we could clean it up now if we knew 2792 * that our caller had no references to it. 2793 * We don't know this, however, so we leave it 2794 * alone. It will get cleaned up when 2795 * clean_zone_node() runs. 2796 */ 2797 dns_slabheader_destroy(&newheader); 2798 newheader = (dns_slabheader_t *)merged; 2799 dns_slabheader_reset(newheader, 2800 (dns_db_t *)rbtdb, 2801 (dns_dbnode_t *)rbtnode); 2802 dns_slabheader_copycase(newheader, header); 2803 if (loading && RESIGN(newheader) && 2804 RESIGN(header) && 2805 resign_sooner(header, newheader)) 2806 { 2807 newheader->resign = header->resign; 2808 newheader->resign_lsb = 2809 header->resign_lsb; 2810 } 2811 } else { 2812 if (result == DNS_R_TOOMANYRECORDS) { 2813 dns__db_logtoomanyrecords( 2814 (dns_db_t *)rbtdb, nodename, 2815 (dns_rdatatype_t)(header->type), 2816 "updating", rbtdb->maxrrperset); 2817 } 2818 dns_slabheader_destroy(&newheader); 2819 return result; 2820 } 2821 } 2822 /* 2823 * Don't replace existing NS, A and AAAA RRsets in the 2824 * cache if they are already exist. This prevents named 2825 * being locked to old servers. Don't lower trust of 2826 * existing record if the update is forced. Nothing 2827 * special to be done w.r.t stale data; it gets replaced 2828 * normally further down. 2829 */ 2830 if (IS_CACHE(rbtdb) && ACTIVE(header, now) && 2831 header->type == dns_rdatatype_ns && !header_nx && 2832 !newheader_nx && header->trust >= newheader->trust && 2833 dns_rdataslab_equalx((unsigned char *)header, 2834 (unsigned char *)newheader, 2835 (unsigned int)(sizeof(*newheader)), 2836 rbtdb->common.rdclass, 2837 (dns_rdatatype_t)header->type)) 2838 { 2839 /* 2840 * Honour the new ttl if it is less than the 2841 * older one. 2842 */ 2843 if (header->ttl > newheader->ttl) { 2844 dns__rbtdb_setttl(header, newheader->ttl); 2845 } 2846 if (header->last_used != now) { 2847 ISC_LIST_UNLINK( 2848 rbtdb->lru[RBTDB_HEADERNODE(header) 2849 ->locknum], 2850 header, link); 2851 header->last_used = now; 2852 ISC_LIST_PREPEND( 2853 rbtdb->lru[RBTDB_HEADERNODE(header) 2854 ->locknum], 2855 header, link); 2856 } 2857 if (header->noqname == NULL && 2858 newheader->noqname != NULL) 2859 { 2860 header->noqname = newheader->noqname; 2861 newheader->noqname = NULL; 2862 } 2863 if (header->closest == NULL && 2864 newheader->closest != NULL) 2865 { 2866 header->closest = newheader->closest; 2867 newheader->closest = NULL; 2868 } 2869 dns_slabheader_destroy(&newheader); 2870 if (addedrdataset != NULL) { 2871 dns__rbtdb_bindrdataset( 2872 rbtdb, rbtnode, header, now, 2873 isc_rwlocktype_write, 2874 addedrdataset DNS__DB_FLARG_PASS); 2875 } 2876 return ISC_R_SUCCESS; 2877 } 2878 2879 /* 2880 * If we have will be replacing a NS RRset force its TTL 2881 * to be no more than the current NS RRset's TTL. This 2882 * ensures the delegations that are withdrawn are honoured. 2883 */ 2884 if (IS_CACHE(rbtdb) && ACTIVE(header, now) && 2885 header->type == dns_rdatatype_ns && !header_nx && 2886 !newheader_nx && header->trust <= newheader->trust) 2887 { 2888 if (newheader->ttl > header->ttl) { 2889 newheader->ttl = header->ttl; 2890 } 2891 } 2892 if (IS_CACHE(rbtdb) && ACTIVE(header, now) && 2893 (options & DNS_DBADD_PREFETCH) == 0 && 2894 (header->type == dns_rdatatype_a || 2895 header->type == dns_rdatatype_aaaa || 2896 header->type == dns_rdatatype_ds || 2897 header->type == DNS_SIGTYPE(dns_rdatatype_ds)) && 2898 !header_nx && !newheader_nx && 2899 header->trust >= newheader->trust && 2900 dns_rdataslab_equal((unsigned char *)header, 2901 (unsigned char *)newheader, 2902 (unsigned int)(sizeof(*newheader)))) 2903 { 2904 /* 2905 * Honour the new ttl if it is less than the 2906 * older one. 2907 */ 2908 if (header->ttl > newheader->ttl) { 2909 dns__rbtdb_setttl(header, newheader->ttl); 2910 } 2911 if (header->last_used != now) { 2912 ISC_LIST_UNLINK( 2913 rbtdb->lru[RBTDB_HEADERNODE(header) 2914 ->locknum], 2915 header, link); 2916 header->last_used = now; 2917 ISC_LIST_PREPEND( 2918 rbtdb->lru[RBTDB_HEADERNODE(header) 2919 ->locknum], 2920 header, link); 2921 } 2922 if (header->noqname == NULL && 2923 newheader->noqname != NULL) 2924 { 2925 header->noqname = newheader->noqname; 2926 newheader->noqname = NULL; 2927 } 2928 if (header->closest == NULL && 2929 newheader->closest != NULL) 2930 { 2931 header->closest = newheader->closest; 2932 newheader->closest = NULL; 2933 } 2934 dns_slabheader_destroy(&newheader); 2935 if (addedrdataset != NULL) { 2936 dns__rbtdb_bindrdataset( 2937 rbtdb, rbtnode, header, now, 2938 isc_rwlocktype_write, 2939 addedrdataset DNS__DB_FLARG_PASS); 2940 } 2941 return ISC_R_SUCCESS; 2942 } 2943 INSIST(rbtversion == NULL || 2944 rbtversion->serial >= topheader->serial); 2945 if (loading) { 2946 newheader->down = NULL; 2947 idx = RBTDB_HEADERNODE(newheader)->locknum; 2948 if (IS_CACHE(rbtdb)) { 2949 if (ZEROTTL(newheader)) { 2950 newheader->last_used = 2951 rbtdb->last_used + 1; 2952 ISC_LIST_APPEND(rbtdb->lru[idx], 2953 newheader, link); 2954 } else { 2955 ISC_LIST_PREPEND(rbtdb->lru[idx], 2956 newheader, link); 2957 } 2958 INSIST(rbtdb->heaps != NULL); 2959 isc_heap_insert(rbtdb->heaps[idx], newheader); 2960 newheader->heap = rbtdb->heaps[idx]; 2961 } else if (RESIGN(newheader)) { 2962 dns__zonerbt_resigninsert(rbtdb, idx, 2963 newheader); 2964 /* 2965 * Don't call resigndelete, we don't need 2966 * to reverse the delete. The free_slabheader 2967 * call below will clean up the heap entry. 2968 */ 2969 } 2970 2971 /* 2972 * There are no other references to 'header' when 2973 * loading, so we MAY clean up 'header' now. 2974 * Since we don't generate changed records when 2975 * loading, we MUST clean up 'header' now. 2976 */ 2977 if (topheader_prev != NULL) { 2978 topheader_prev->next = newheader; 2979 } else { 2980 rbtnode->data = newheader; 2981 } 2982 newheader->next = topheader->next; 2983 if (rbtversion != NULL && !header_nx) { 2984 update_recordsandxfrsize(false, rbtversion, 2985 header, 2986 nodename->length); 2987 } 2988 dns_slabheader_destroy(&header); 2989 } else { 2990 idx = RBTDB_HEADERNODE(newheader)->locknum; 2991 if (IS_CACHE(rbtdb)) { 2992 INSIST(rbtdb->heaps != NULL); 2993 isc_heap_insert(rbtdb->heaps[idx], newheader); 2994 newheader->heap = rbtdb->heaps[idx]; 2995 if (ZEROTTL(newheader)) { 2996 newheader->last_used = 2997 rbtdb->last_used + 1; 2998 ISC_LIST_APPEND(rbtdb->lru[idx], 2999 newheader, link); 3000 } else { 3001 ISC_LIST_PREPEND(rbtdb->lru[idx], 3002 newheader, link); 3003 } 3004 } else if (RESIGN(newheader)) { 3005 dns__zonerbt_resigninsert(rbtdb, idx, 3006 newheader); 3007 dns__zonerbt_resigndelete( 3008 rbtdb, rbtversion, 3009 header DNS__DB_FLARG_PASS); 3010 } 3011 if (topheader_prev != NULL) { 3012 topheader_prev->next = newheader; 3013 } else { 3014 rbtnode->data = newheader; 3015 } 3016 newheader->next = topheader->next; 3017 newheader->down = topheader; 3018 topheader->next = newheader; 3019 rbtnode->dirty = 1; 3020 if (changed != NULL) { 3021 changed->dirty = true; 3022 } 3023 if (rbtversion == NULL) { 3024 mark_ancient(header); 3025 if (sigheader != NULL) { 3026 mark_ancient(sigheader); 3027 } 3028 } 3029 if (rbtversion != NULL && !header_nx) { 3030 update_recordsandxfrsize(false, rbtversion, 3031 header, 3032 nodename->length); 3033 } 3034 } 3035 } else { 3036 /* 3037 * No non-IGNORED rdatasets of the given type exist at 3038 * this node. 3039 */ 3040 3041 /* 3042 * If we're trying to delete the type, don't bother. 3043 */ 3044 if (newheader_nx) { 3045 dns_slabheader_destroy(&newheader); 3046 return DNS_R_UNCHANGED; 3047 } 3048 3049 idx = RBTDB_HEADERNODE(newheader)->locknum; 3050 if (IS_CACHE(rbtdb)) { 3051 isc_heap_insert(rbtdb->heaps[idx], newheader); 3052 newheader->heap = rbtdb->heaps[idx]; 3053 if (ZEROTTL(newheader)) { 3054 ISC_LIST_APPEND(rbtdb->lru[idx], newheader, 3055 link); 3056 } else { 3057 ISC_LIST_PREPEND(rbtdb->lru[idx], newheader, 3058 link); 3059 } 3060 } else if (RESIGN(newheader)) { 3061 dns__zonerbt_resigninsert(rbtdb, idx, newheader); 3062 dns__zonerbt_resigndelete(rbtdb, rbtversion, 3063 header DNS__DB_FLARG_PASS); 3064 } 3065 3066 if (topheader != NULL) { 3067 /* 3068 * We have an list of rdatasets of the given type, 3069 * but they're all marked IGNORE. We simply insert 3070 * the new rdataset at the head of the list. 3071 * 3072 * Ignored rdatasets cannot occur during loading, so 3073 * we INSIST on it. 3074 */ 3075 INSIST(!loading); 3076 INSIST(rbtversion == NULL || 3077 rbtversion->serial >= topheader->serial); 3078 if (topheader_prev != NULL) { 3079 topheader_prev->next = newheader; 3080 } else { 3081 rbtnode->data = newheader; 3082 } 3083 newheader->next = topheader->next; 3084 newheader->down = topheader; 3085 topheader->next = newheader; 3086 rbtnode->dirty = 1; 3087 if (changed != NULL) { 3088 changed->dirty = true; 3089 } 3090 } else { 3091 /* 3092 * No rdatasets of the given type exist at the node. 3093 */ 3094 INSIST(newheader->down == NULL); 3095 3096 if (!IS_CACHE(rbtdb) && overmaxtype(rbtdb, ntypes)) { 3097 dns_slabheader_destroy(&newheader); 3098 return DNS_R_TOOMANYRECORDS; 3099 } 3100 3101 if (prio_header(newheader)) { 3102 /* This is a priority type, prepend it */ 3103 newheader->next = rbtnode->data; 3104 rbtnode->data = newheader; 3105 } else if (prioheader != NULL) { 3106 /* Append after the priority headers */ 3107 newheader->next = prioheader->next; 3108 prioheader->next = newheader; 3109 } else { 3110 /* There were no priority headers */ 3111 newheader->next = rbtnode->data; 3112 rbtnode->data = newheader; 3113 } 3114 3115 if (IS_CACHE(rbtdb) && overmaxtype(rbtdb, ntypes)) { 3116 if (expireheader == NULL) { 3117 expireheader = newheader; 3118 } 3119 if (NEGATIVE(newheader) && 3120 !prio_header(newheader)) 3121 { 3122 /* 3123 * Add the new non-priority negative 3124 * header to the database only 3125 * temporarily. 3126 */ 3127 expireheader = newheader; 3128 } 3129 3130 mark_ancient(expireheader); 3131 /* 3132 * FIXME: In theory, we should mark the RRSIG 3133 * and the header at the same time, but there is 3134 * no direct link between those two header, so 3135 * we would have to check the whole list again. 3136 */ 3137 } 3138 } 3139 } 3140 3141 if (rbtversion != NULL && !newheader_nx) { 3142 update_recordsandxfrsize(true, rbtversion, newheader, 3143 nodename->length); 3144 } 3145 3146 /* 3147 * Check if the node now contains CNAME and other data. 3148 */ 3149 if (rbtversion != NULL && 3150 cname_and_other_data(rbtnode, rbtversion->serial)) 3151 { 3152 return DNS_R_CNAMEANDOTHER; 3153 } 3154 3155 if (addedrdataset != NULL) { 3156 dns__rbtdb_bindrdataset(rbtdb, rbtnode, newheader, now, 3157 isc_rwlocktype_write, 3158 addedrdataset DNS__DB_FLARG_PASS); 3159 } 3160 3161 return ISC_R_SUCCESS; 3162 } 3163 3164 static bool 3165 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, dns_typepair_t type) { 3166 if (IS_CACHE(rbtdb)) { 3167 if (type == dns_rdatatype_dname) { 3168 return true; 3169 } else { 3170 return false; 3171 } 3172 } else if (type == dns_rdatatype_dname || 3173 (type == dns_rdatatype_ns && 3174 (node != rbtdb->origin_node || IS_STUB(rbtdb)))) 3175 { 3176 return true; 3177 } 3178 return false; 3179 } 3180 3181 static isc_result_t 3182 addnoqname(isc_mem_t *mctx, dns_slabheader_t *newheader, uint32_t maxrrperset, 3183 dns_rdataset_t *rdataset) { 3184 isc_result_t result; 3185 dns_slabheader_proof_t *noqname = NULL; 3186 dns_name_t name = DNS_NAME_INITEMPTY; 3187 dns_rdataset_t neg = DNS_RDATASET_INIT, negsig = DNS_RDATASET_INIT; 3188 isc_region_t r1, r2; 3189 3190 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig); 3191 RUNTIME_CHECK(result == ISC_R_SUCCESS); 3192 3193 result = dns_rdataslab_fromrdataset(&neg, mctx, &r1, 0, maxrrperset); 3194 if (result != ISC_R_SUCCESS) { 3195 goto cleanup; 3196 } 3197 3198 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r2, 0, maxrrperset); 3199 if (result != ISC_R_SUCCESS) { 3200 goto cleanup; 3201 } 3202 3203 noqname = isc_mem_get(mctx, sizeof(*noqname)); 3204 *noqname = (dns_slabheader_proof_t){ 3205 .neg = r1.base, 3206 .negsig = r2.base, 3207 .type = neg.type, 3208 .name = DNS_NAME_INITEMPTY, 3209 }; 3210 dns_name_dup(&name, mctx, &noqname->name); 3211 newheader->noqname = noqname; 3212 3213 cleanup: 3214 dns_rdataset_disassociate(&neg); 3215 dns_rdataset_disassociate(&negsig); 3216 3217 return result; 3218 } 3219 3220 static isc_result_t 3221 addclosest(isc_mem_t *mctx, dns_slabheader_t *newheader, uint32_t maxrrperset, 3222 dns_rdataset_t *rdataset) { 3223 isc_result_t result; 3224 dns_slabheader_proof_t *closest = NULL; 3225 dns_name_t name = DNS_NAME_INITEMPTY; 3226 dns_rdataset_t neg = DNS_RDATASET_INIT, negsig = DNS_RDATASET_INIT; 3227 isc_region_t r1, r2; 3228 3229 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig); 3230 RUNTIME_CHECK(result == ISC_R_SUCCESS); 3231 3232 result = dns_rdataslab_fromrdataset(&neg, mctx, &r1, 0, maxrrperset); 3233 if (result != ISC_R_SUCCESS) { 3234 goto cleanup; 3235 } 3236 3237 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r2, 0, maxrrperset); 3238 if (result != ISC_R_SUCCESS) { 3239 goto cleanup; 3240 } 3241 3242 closest = isc_mem_get(mctx, sizeof(*closest)); 3243 *closest = (dns_slabheader_proof_t){ 3244 .neg = r1.base, 3245 .negsig = r2.base, 3246 .name = DNS_NAME_INITEMPTY, 3247 .type = neg.type, 3248 }; 3249 dns_name_dup(&name, mctx, &closest->name); 3250 newheader->closest = closest; 3251 3252 cleanup: 3253 dns_rdataset_disassociate(&neg); 3254 dns_rdataset_disassociate(&negsig); 3255 return result; 3256 } 3257 3258 static void 3259 expire_ttl_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, 3260 isc_rwlocktype_t *tlocktypep, isc_stdtime_t now, 3261 bool cache_is_overmem DNS__DB_FLARG); 3262 3263 isc_result_t 3264 dns__rbtdb_addrdataset(dns_db_t *db, dns_dbnode_t *node, 3265 dns_dbversion_t *version, isc_stdtime_t now, 3266 dns_rdataset_t *rdataset, unsigned int options, 3267 dns_rdataset_t *addedrdataset DNS__DB_FLARG) { 3268 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 3269 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 3270 dns_rbtdb_version_t *rbtversion = version; 3271 isc_region_t region; 3272 dns_slabheader_t *newheader = NULL; 3273 isc_result_t result; 3274 bool delegating; 3275 bool newnsec; 3276 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 3277 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 3278 bool cache_is_overmem = false; 3279 dns_fixedname_t fixed; 3280 dns_name_t *name = NULL; 3281 3282 REQUIRE(VALID_RBTDB(rbtdb)); 3283 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 3284 3285 if (!IS_CACHE(rbtdb)) { 3286 /* 3287 * SOA records are only allowed at top of zone. 3288 */ 3289 if (rdataset->type == dns_rdatatype_soa && 3290 node != rbtdb->origin_node) 3291 { 3292 return DNS_R_NOTZONETOP; 3293 } 3294 TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); 3295 REQUIRE((rbtnode->nsec == DNS_DB_NSEC_NSEC3 && 3296 (rdataset->type == dns_rdatatype_nsec3 || 3297 rdataset->covers == dns_rdatatype_nsec3)) || 3298 (rbtnode->nsec != DNS_DB_NSEC_NSEC3 && 3299 rdataset->type != dns_rdatatype_nsec3 && 3300 rdataset->covers != dns_rdatatype_nsec3)); 3301 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 3302 } 3303 3304 if (rbtversion == NULL) { 3305 if (now == 0) { 3306 now = isc_stdtime_now(); 3307 } 3308 } else { 3309 now = 0; 3310 } 3311 3312 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, 3313 ®ion, sizeof(dns_slabheader_t), 3314 rbtdb->maxrrperset); 3315 if (result != ISC_R_SUCCESS) { 3316 if (result == DNS_R_TOOMANYRECORDS) { 3317 name = dns_fixedname_initname(&fixed); 3318 dns__rbtdb_nodefullname(db, node, name); 3319 dns__db_logtoomanyrecords((dns_db_t *)rbtdb, name, 3320 rdataset->type, "adding", 3321 rbtdb->maxrrperset); 3322 } 3323 return result; 3324 } 3325 3326 name = dns_fixedname_initname(&fixed); 3327 dns__rbtdb_nodefullname(db, node, name); 3328 dns_rdataset_getownercase(rdataset, name); 3329 3330 newheader = (dns_slabheader_t *)region.base; 3331 *newheader = (dns_slabheader_t){ 3332 .type = DNS_TYPEPAIR_VALUE(rdataset->type, rdataset->covers), 3333 .trust = rdataset->trust, 3334 .last_used = now, 3335 .node = rbtnode, 3336 }; 3337 3338 dns_slabheader_reset(newheader, db, node); 3339 dns__rbtdb_setttl(newheader, rdataset->ttl + now); 3340 if (rdataset->ttl == 0U) { 3341 DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_ZEROTTL); 3342 } 3343 atomic_init(&newheader->count, 3344 atomic_fetch_add_relaxed(&init_count, 1)); 3345 if (rbtversion != NULL) { 3346 newheader->serial = rbtversion->serial; 3347 now = 0; 3348 3349 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) { 3350 DNS_SLABHEADER_SETATTR(newheader, 3351 DNS_SLABHEADERATTR_RESIGN); 3352 newheader->resign = 3353 (isc_stdtime_t)(dns_time64_from32( 3354 rdataset->resign) >> 3355 1); 3356 newheader->resign_lsb = rdataset->resign & 0x1; 3357 } 3358 } else { 3359 newheader->serial = 1; 3360 if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0) { 3361 DNS_SLABHEADER_SETATTR(newheader, 3362 DNS_SLABHEADERATTR_PREFETCH); 3363 } 3364 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0) { 3365 DNS_SLABHEADER_SETATTR(newheader, 3366 DNS_SLABHEADERATTR_NEGATIVE); 3367 } 3368 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0) { 3369 DNS_SLABHEADER_SETATTR(newheader, 3370 DNS_SLABHEADERATTR_NXDOMAIN); 3371 } 3372 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0) { 3373 DNS_SLABHEADER_SETATTR(newheader, 3374 DNS_SLABHEADERATTR_OPTOUT); 3375 } 3376 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) { 3377 result = addnoqname(rbtdb->common.mctx, newheader, 3378 rbtdb->maxrrperset, rdataset); 3379 if (result != ISC_R_SUCCESS) { 3380 dns_slabheader_destroy(&newheader); 3381 return result; 3382 } 3383 } 3384 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) { 3385 result = addclosest(rbtdb->common.mctx, newheader, 3386 rbtdb->maxrrperset, rdataset); 3387 if (result != ISC_R_SUCCESS) { 3388 dns_slabheader_destroy(&newheader); 3389 return result; 3390 } 3391 } 3392 } 3393 3394 /* 3395 * If we're adding a delegation type (e.g. NS or DNAME for a zone, 3396 * just DNAME for the cache), then we need to set the callback bit 3397 * on the node. 3398 */ 3399 if (delegating_type(rbtdb, rbtnode, rdataset->type)) { 3400 delegating = true; 3401 } else { 3402 delegating = false; 3403 } 3404 3405 /* 3406 * Add to the auxiliary NSEC tree if we're adding an NSEC record. 3407 */ 3408 TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); 3409 if (rbtnode->nsec != DNS_DB_NSEC_HAS_NSEC && 3410 rdataset->type == dns_rdatatype_nsec) 3411 { 3412 newnsec = true; 3413 } else { 3414 newnsec = false; 3415 } 3416 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 3417 3418 /* 3419 * If we're adding a delegation type, adding to the auxiliary NSEC 3420 * tree, or the DB is a cache in an overmem state, hold an 3421 * exclusive lock on the tree. In the latter case the lock does 3422 * not necessarily have to be acquired but it will help purge 3423 * ancient entries more effectively. 3424 */ 3425 if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx)) { 3426 cache_is_overmem = true; 3427 } 3428 if (delegating || newnsec || cache_is_overmem) { 3429 TREE_WRLOCK(&rbtdb->tree_lock, &tlocktype); 3430 } 3431 3432 if (cache_is_overmem) { 3433 dns__cacherbt_overmem(rbtdb, newheader, 3434 &tlocktype DNS__DB_FLARG_PASS); 3435 } 3436 3437 NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 3438 3439 if (rbtdb->rrsetstats != NULL) { 3440 DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_STATCOUNT); 3441 update_rrsetstats(rbtdb->rrsetstats, newheader->type, 3442 atomic_load_acquire(&newheader->attributes), 3443 true); 3444 } 3445 3446 if (IS_CACHE(rbtdb)) { 3447 if (tlocktype == isc_rwlocktype_write) { 3448 cleanup_dead_nodes(rbtdb, 3449 rbtnode->locknum DNS__DB_FLARG_PASS); 3450 } 3451 3452 expire_ttl_headers(rbtdb, rbtnode->locknum, &tlocktype, now, 3453 cache_is_overmem DNS__DB_FLARG_PASS); 3454 3455 /* 3456 * If we've been holding a write lock on the tree just for 3457 * cleaning, we can release it now. However, we still need the 3458 * node lock. 3459 */ 3460 if (tlocktype == isc_rwlocktype_write && !delegating && 3461 !newnsec) 3462 { 3463 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 3464 } 3465 } 3466 3467 result = ISC_R_SUCCESS; 3468 if (newnsec) { 3469 dns_rbtnode_t *nsecnode = NULL; 3470 3471 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode); 3472 if (result == ISC_R_SUCCESS) { 3473 nsecnode->nsec = DNS_DB_NSEC_NSEC; 3474 rbtnode->nsec = DNS_DB_NSEC_HAS_NSEC; 3475 } else if (result == ISC_R_EXISTS) { 3476 rbtnode->nsec = DNS_DB_NSEC_HAS_NSEC; 3477 result = ISC_R_SUCCESS; 3478 } 3479 } 3480 3481 if (result == ISC_R_SUCCESS) { 3482 result = dns__rbtdb_add(rbtdb, rbtnode, name, rbtversion, 3483 newheader, options, false, 3484 addedrdataset, now DNS__DB_FLARG_PASS); 3485 } 3486 if (result == ISC_R_SUCCESS && delegating) { 3487 rbtnode->find_callback = 1; 3488 } 3489 3490 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 3491 3492 if (tlocktype != isc_rwlocktype_none) { 3493 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 3494 } 3495 INSIST(tlocktype == isc_rwlocktype_none); 3496 3497 return result; 3498 } 3499 3500 isc_result_t 3501 dns__rbtdb_subtractrdataset(dns_db_t *db, dns_dbnode_t *node, 3502 dns_dbversion_t *version, dns_rdataset_t *rdataset, 3503 unsigned int options, 3504 dns_rdataset_t *newrdataset DNS__DB_FLARG) { 3505 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 3506 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 3507 dns_rbtdb_version_t *rbtversion = version; 3508 dns_fixedname_t fname; 3509 dns_name_t *nodename = dns_fixedname_initname(&fname); 3510 dns_slabheader_t *topheader = NULL, *topheader_prev = NULL; 3511 dns_slabheader_t *header = NULL, *newheader = NULL; 3512 unsigned char *subresult = NULL; 3513 isc_region_t region; 3514 isc_result_t result; 3515 rbtdb_changed_t *changed = NULL; 3516 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 3517 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 3518 3519 REQUIRE(VALID_RBTDB(rbtdb)); 3520 REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb); 3521 3522 if (!IS_CACHE(rbtdb)) { 3523 TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); 3524 REQUIRE((rbtnode->nsec == DNS_DB_NSEC_NSEC3 && 3525 (rdataset->type == dns_rdatatype_nsec3 || 3526 rdataset->covers == dns_rdatatype_nsec3)) || 3527 (rbtnode->nsec != DNS_DB_NSEC_NSEC3 && 3528 rdataset->type != dns_rdatatype_nsec3 && 3529 rdataset->covers != dns_rdatatype_nsec3)); 3530 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 3531 } 3532 3533 dns__rbtdb_nodefullname(db, node, nodename); 3534 3535 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, 3536 ®ion, sizeof(dns_slabheader_t), 3537 0); 3538 if (result != ISC_R_SUCCESS) { 3539 return result; 3540 } 3541 3542 newheader = (dns_slabheader_t *)region.base; 3543 dns_slabheader_reset(newheader, db, node); 3544 dns__rbtdb_setttl(newheader, rdataset->ttl); 3545 newheader->type = DNS_TYPEPAIR_VALUE(rdataset->type, rdataset->covers); 3546 atomic_init(&newheader->attributes, 0); 3547 newheader->serial = rbtversion->serial; 3548 newheader->trust = 0; 3549 newheader->noqname = NULL; 3550 newheader->closest = NULL; 3551 atomic_init(&newheader->count, 3552 atomic_fetch_add_relaxed(&init_count, 1)); 3553 newheader->last_used = 0; 3554 newheader->node = rbtnode; 3555 newheader->db = (dns_db_t *)rbtdb; 3556 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) { 3557 DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_RESIGN); 3558 newheader->resign = 3559 (isc_stdtime_t)(dns_time64_from32(rdataset->resign) >> 3560 1); 3561 newheader->resign_lsb = rdataset->resign & 0x1; 3562 } else { 3563 newheader->resign = 0; 3564 newheader->resign_lsb = 0; 3565 } 3566 3567 NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 3568 3569 changed = add_changed(newheader, rbtversion DNS__DB_FLARG_PASS); 3570 if (changed == NULL) { 3571 dns_slabheader_destroy(&newheader); 3572 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 3573 &nlocktype); 3574 return ISC_R_NOMEMORY; 3575 } 3576 3577 for (topheader = rbtnode->data; topheader != NULL; 3578 topheader = topheader->next) 3579 { 3580 if (topheader->type == newheader->type) { 3581 break; 3582 } 3583 topheader_prev = topheader; 3584 } 3585 /* 3586 * If header isn't NULL, we've found the right type. There may be 3587 * IGNORE rdatasets between the top of the chain and the first real 3588 * data. We skip over them. 3589 */ 3590 header = topheader; 3591 while (header != NULL && IGNORE(header)) { 3592 header = header->down; 3593 } 3594 if (header != NULL && EXISTS(header)) { 3595 unsigned int flags = 0; 3596 subresult = NULL; 3597 result = ISC_R_SUCCESS; 3598 if ((options & DNS_DBSUB_EXACT) != 0) { 3599 flags |= DNS_RDATASLAB_EXACT; 3600 if (newheader->ttl != header->ttl) { 3601 result = DNS_R_NOTEXACT; 3602 } 3603 } 3604 if (result == ISC_R_SUCCESS) { 3605 result = dns_rdataslab_subtract( 3606 (unsigned char *)header, 3607 (unsigned char *)newheader, 3608 (unsigned int)(sizeof(*newheader)), 3609 rbtdb->common.mctx, rbtdb->common.rdclass, 3610 (dns_rdatatype_t)header->type, flags, 3611 &subresult); 3612 } 3613 if (result == ISC_R_SUCCESS) { 3614 dns_slabheader_destroy(&newheader); 3615 newheader = (dns_slabheader_t *)subresult; 3616 dns_slabheader_reset(newheader, db, node); 3617 dns_slabheader_copycase(newheader, header); 3618 if (RESIGN(header)) { 3619 DNS_SLABHEADER_SETATTR( 3620 newheader, DNS_SLABHEADERATTR_RESIGN); 3621 newheader->resign = header->resign; 3622 newheader->resign_lsb = header->resign_lsb; 3623 dns__zonerbt_resigninsert( 3624 rbtdb, rbtnode->locknum, newheader); 3625 } 3626 /* 3627 * We have to set the serial since the rdataslab 3628 * subtraction routine copies the reserved portion of 3629 * header, not newheader. 3630 */ 3631 newheader->serial = rbtversion->serial; 3632 /* 3633 * XXXJT: dns_rdataslab_subtract() copied the pointers 3634 * to additional info. We need to clear these fields 3635 * to avoid having duplicated references. 3636 */ 3637 update_recordsandxfrsize(true, rbtversion, newheader, 3638 nodename->length); 3639 } else if (result == DNS_R_NXRRSET) { 3640 /* 3641 * This subtraction would remove all of the rdata; 3642 * add a nonexistent header instead. 3643 */ 3644 dns_slabheader_destroy(&newheader); 3645 newheader = dns_slabheader_new((dns_db_t *)rbtdb, 3646 (dns_dbnode_t *)rbtnode); 3647 dns__rbtdb_setttl(newheader, 0); 3648 newheader->type = topheader->type; 3649 atomic_init(&newheader->attributes, 3650 DNS_SLABHEADERATTR_NONEXISTENT); 3651 newheader->serial = rbtversion->serial; 3652 } else { 3653 dns_slabheader_destroy(&newheader); 3654 goto unlock; 3655 } 3656 3657 /* 3658 * If we're here, we want to link newheader in front of 3659 * topheader. 3660 */ 3661 INSIST(rbtversion->serial >= topheader->serial); 3662 update_recordsandxfrsize(false, rbtversion, header, 3663 nodename->length); 3664 if (topheader_prev != NULL) { 3665 topheader_prev->next = newheader; 3666 } else { 3667 rbtnode->data = newheader; 3668 } 3669 newheader->next = topheader->next; 3670 newheader->down = topheader; 3671 topheader->next = newheader; 3672 rbtnode->dirty = 1; 3673 changed->dirty = true; 3674 dns__zonerbt_resigndelete(rbtdb, rbtversion, 3675 header DNS__DB_FLARG_PASS); 3676 } else { 3677 /* 3678 * The rdataset doesn't exist, so we don't need to do anything 3679 * to satisfy the deletion request. 3680 */ 3681 dns_slabheader_destroy(&newheader); 3682 if ((options & DNS_DBSUB_EXACT) != 0) { 3683 result = DNS_R_NOTEXACT; 3684 } else { 3685 result = DNS_R_UNCHANGED; 3686 } 3687 } 3688 3689 if (result == ISC_R_SUCCESS && newrdataset != NULL) { 3690 dns__rbtdb_bindrdataset(rbtdb, rbtnode, newheader, 0, 3691 isc_rwlocktype_write, 3692 newrdataset DNS__DB_FLARG_PASS); 3693 } 3694 3695 if (result == DNS_R_NXRRSET && newrdataset != NULL && 3696 (options & DNS_DBSUB_WANTOLD) != 0) 3697 { 3698 dns__rbtdb_bindrdataset(rbtdb, rbtnode, header, 0, 3699 isc_rwlocktype_write, 3700 newrdataset DNS__DB_FLARG_PASS); 3701 } 3702 3703 unlock: 3704 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 3705 3706 /* 3707 * Update the zone's secure status. If version is non-NULL 3708 * this is deferred until dns__rbtdb_closeversion() is called. 3709 */ 3710 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) { 3711 RWLOCK(&rbtdb->lock, isc_rwlocktype_read); 3712 version = rbtdb->current_version; 3713 RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); 3714 dns__rbtdb_setsecure(db, version, rbtdb->origin_node); 3715 } 3716 3717 return result; 3718 } 3719 3720 isc_result_t 3721 dns__rbtdb_deleterdataset(dns_db_t *db, dns_dbnode_t *node, 3722 dns_dbversion_t *version, dns_rdatatype_t type, 3723 dns_rdatatype_t covers DNS__DB_FLARG) { 3724 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 3725 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 3726 dns_rbtdb_version_t *rbtversion = version; 3727 dns_fixedname_t fname; 3728 dns_name_t *nodename = dns_fixedname_initname(&fname); 3729 isc_result_t result; 3730 dns_slabheader_t *newheader = NULL; 3731 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 3732 3733 REQUIRE(VALID_RBTDB(rbtdb)); 3734 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 3735 3736 if (type == dns_rdatatype_any) { 3737 return ISC_R_NOTIMPLEMENTED; 3738 } 3739 if (type == dns_rdatatype_rrsig && covers == 0) { 3740 return ISC_R_NOTIMPLEMENTED; 3741 } 3742 3743 newheader = dns_slabheader_new(db, node); 3744 newheader->type = DNS_TYPEPAIR_VALUE(type, covers); 3745 dns__rbtdb_setttl(newheader, 0); 3746 atomic_init(&newheader->attributes, DNS_SLABHEADERATTR_NONEXISTENT); 3747 if (rbtversion != NULL) { 3748 newheader->serial = rbtversion->serial; 3749 } 3750 3751 dns__rbtdb_nodefullname(db, node, nodename); 3752 3753 NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 3754 result = dns__rbtdb_add(rbtdb, rbtnode, nodename, rbtversion, newheader, 3755 DNS_DBADD_FORCE, false, NULL, 3756 0 DNS__DB_FLARG_PASS); 3757 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 3758 3759 /* 3760 * Update the zone's secure status. If version is non-NULL 3761 * this is deferred until dns__rbtdb_closeversion() is called. 3762 */ 3763 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) { 3764 RWLOCK(&rbtdb->lock, isc_rwlocktype_read); 3765 version = rbtdb->current_version; 3766 RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); 3767 dns__rbtdb_setsecure(db, version, rbtdb->origin_node); 3768 } 3769 3770 return result; 3771 } 3772 3773 static void 3774 delete_callback(void *data, void *arg) { 3775 dns_rbtdb_t *rbtdb = arg; 3776 dns_slabheader_t *current = NULL, *next = NULL; 3777 unsigned int locknum; 3778 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 3779 3780 current = data; 3781 locknum = RBTDB_HEADERNODE(current)->locknum; 3782 NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); 3783 while (current != NULL) { 3784 next = current->next; 3785 dns_slabheader_destroy(¤t); 3786 current = next; 3787 } 3788 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); 3789 } 3790 3791 unsigned int 3792 dns__rbtdb_nodecount(dns_db_t *db, dns_dbtree_t tree) { 3793 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 3794 unsigned int count; 3795 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 3796 3797 REQUIRE(VALID_RBTDB(rbtdb)); 3798 3799 TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); 3800 switch (tree) { 3801 case dns_dbtree_main: 3802 count = dns_rbt_nodecount(rbtdb->tree); 3803 break; 3804 case dns_dbtree_nsec: 3805 count = dns_rbt_nodecount(rbtdb->nsec); 3806 break; 3807 case dns_dbtree_nsec3: 3808 count = dns_rbt_nodecount(rbtdb->nsec3); 3809 break; 3810 default: 3811 UNREACHABLE(); 3812 } 3813 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 3814 3815 return count; 3816 } 3817 3818 void 3819 dns__rbtdb_setloop(dns_db_t *db, isc_loop_t *loop) { 3820 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 3821 3822 REQUIRE(VALID_RBTDB(rbtdb)); 3823 3824 RWLOCK(&rbtdb->lock, isc_rwlocktype_write); 3825 if (rbtdb->loop != NULL) { 3826 isc_loop_detach(&rbtdb->loop); 3827 } 3828 if (loop != NULL) { 3829 isc_loop_attach(loop, &rbtdb->loop); 3830 } 3831 RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); 3832 } 3833 3834 isc_result_t 3835 dns__rbtdb_getoriginnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) { 3836 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 3837 dns_rbtnode_t *onode = NULL; 3838 isc_result_t result = ISC_R_SUCCESS; 3839 3840 REQUIRE(VALID_RBTDB(rbtdb)); 3841 REQUIRE(nodep != NULL && *nodep == NULL); 3842 3843 /* Note that the access to origin_node doesn't require a DB lock */ 3844 onode = (dns_rbtnode_t *)rbtdb->origin_node; 3845 if (onode != NULL) { 3846 dns__rbtdb_newref(rbtdb, onode, 3847 isc_rwlocktype_none DNS__DB_FLARG_PASS); 3848 *nodep = rbtdb->origin_node; 3849 } else { 3850 INSIST(IS_CACHE(rbtdb)); 3851 result = ISC_R_NOTFOUND; 3852 } 3853 3854 return result; 3855 } 3856 3857 void 3858 dns__rbtdb_locknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) { 3859 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 3860 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 3861 3862 RWLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, type); 3863 } 3864 3865 void 3866 dns__rbtdb_unlocknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) { 3867 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 3868 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 3869 3870 RWUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, type); 3871 } 3872 3873 isc_result_t 3874 dns__rbtdb_nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) { 3875 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 3876 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 3877 isc_result_t result; 3878 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 3879 3880 REQUIRE(VALID_RBTDB(rbtdb)); 3881 REQUIRE(node != NULL); 3882 REQUIRE(name != NULL); 3883 3884 TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); 3885 result = dns_rbt_fullnamefromnode(rbtnode, name); 3886 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 3887 3888 return result; 3889 } 3890 3891 isc_result_t 3892 dns__rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type, 3893 dns_rdataclass_t rdclass, unsigned int argc, char *argv[], 3894 void *driverarg ISC_ATTR_UNUSED, dns_db_t **dbp) { 3895 dns_rbtdb_t *rbtdb = NULL; 3896 isc_result_t result; 3897 int i; 3898 dns_name_t name; 3899 isc_mem_t *hmctx = mctx; 3900 3901 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb)); 3902 *rbtdb = (dns_rbtdb_t){ 3903 .common.origin = DNS_NAME_INITEMPTY, 3904 .common.rdclass = rdclass, 3905 .current_serial = 1, 3906 .least_serial = 1, 3907 .next_serial = 2, 3908 .open_versions = ISC_LIST_INITIALIZER, 3909 }; 3910 3911 isc_refcount_init(&rbtdb->common.references, 1); 3912 3913 /* 3914 * If argv[0] exists, it points to a memory context to use for heap 3915 */ 3916 if (argc != 0) { 3917 hmctx = (isc_mem_t *)argv[0]; 3918 } 3919 3920 if (type == dns_dbtype_cache) { 3921 rbtdb->common.methods = &dns__rbtdb_cachemethods; 3922 rbtdb->common.attributes |= DNS_DBATTR_CACHE; 3923 } else if (type == dns_dbtype_stub) { 3924 rbtdb->common.methods = &dns__rbtdb_zonemethods; 3925 rbtdb->common.attributes |= DNS_DBATTR_STUB; 3926 } else { 3927 rbtdb->common.methods = &dns__rbtdb_zonemethods; 3928 } 3929 3930 isc_rwlock_init(&rbtdb->lock); 3931 TREE_INITLOCK(&rbtdb->tree_lock); 3932 3933 /* 3934 * Initialize node_lock_count in a generic way to support future 3935 * extension which allows the user to specify this value on creation. 3936 * Note that when specified for a cache DB it must be larger than 1 3937 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT. 3938 */ 3939 if (rbtdb->node_lock_count == 0) { 3940 if (IS_CACHE(rbtdb)) { 3941 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT; 3942 } else { 3943 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT; 3944 } 3945 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) { 3946 result = ISC_R_RANGE; 3947 goto cleanup_tree_lock; 3948 } 3949 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH)); 3950 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count * 3951 sizeof(db_nodelock_t)); 3952 3953 rbtdb->common.update_listeners = cds_lfht_new(16, 16, 0, 0, NULL); 3954 3955 if (IS_CACHE(rbtdb)) { 3956 dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats); 3957 rbtdb->lru = isc_mem_get(mctx, 3958 rbtdb->node_lock_count * 3959 sizeof(dns_slabheaderlist_t)); 3960 for (i = 0; i < (int)rbtdb->node_lock_count; i++) { 3961 ISC_LIST_INIT(rbtdb->lru[i]); 3962 } 3963 } 3964 3965 /* 3966 * Create the heaps. 3967 */ 3968 rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count * 3969 sizeof(isc_heap_t *)); 3970 for (i = 0; i < (int)rbtdb->node_lock_count; i++) { 3971 rbtdb->heaps[i] = NULL; 3972 } 3973 3974 rbtdb->sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner; 3975 for (i = 0; i < (int)rbtdb->node_lock_count; i++) { 3976 isc_heap_create(hmctx, rbtdb->sooner, set_index, 0, 3977 &rbtdb->heaps[i]); 3978 } 3979 3980 /* 3981 * Create deadnode lists. 3982 */ 3983 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count * 3984 sizeof(dns_rbtnodelist_t)); 3985 for (i = 0; i < (int)rbtdb->node_lock_count; i++) { 3986 ISC_LIST_INIT(rbtdb->deadnodes[i]); 3987 } 3988 3989 rbtdb->active = rbtdb->node_lock_count; 3990 3991 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) { 3992 NODE_INITLOCK(&rbtdb->node_locks[i].lock); 3993 isc_refcount_init(&rbtdb->node_locks[i].references, 0); 3994 rbtdb->node_locks[i].exiting = false; 3995 } 3996 3997 /* 3998 * Attach to the mctx. The database will persist so long as there 3999 * are references to it, and attaching to the mctx ensures that our 4000 * mctx won't disappear out from under us. 4001 */ 4002 isc_mem_attach(mctx, &rbtdb->common.mctx); 4003 isc_mem_attach(hmctx, &rbtdb->hmctx); 4004 4005 /* 4006 * Make a copy of the origin name. 4007 */ 4008 dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin); 4009 4010 /* 4011 * Make the Red-Black Trees. 4012 */ 4013 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree); 4014 if (result != ISC_R_SUCCESS) { 4015 free_rbtdb(rbtdb, false); 4016 return result; 4017 } 4018 4019 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec); 4020 if (result != ISC_R_SUCCESS) { 4021 free_rbtdb(rbtdb, false); 4022 return result; 4023 } 4024 4025 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3); 4026 if (result != ISC_R_SUCCESS) { 4027 free_rbtdb(rbtdb, false); 4028 return result; 4029 } 4030 4031 /* 4032 * In order to set the node callback bit correctly in zone databases, 4033 * we need to know if the node has the origin name of the zone. 4034 * In loading_addrdataset() we could simply compare the new name 4035 * to the origin name, but this is expensive. Also, we don't know the 4036 * node name in dns__rbtdb_addrdataset(), so we need another way of 4037 * knowing the zone's top. 4038 * 4039 * We now explicitly create a node for the zone's origin, and then 4040 * we simply remember the node's address. This is safe, because 4041 * the top-of-zone node can never be deleted, nor can its address 4042 * change. 4043 */ 4044 if (!IS_CACHE(rbtdb)) { 4045 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin, 4046 &rbtdb->origin_node); 4047 if (result != ISC_R_SUCCESS) { 4048 INSIST(result != ISC_R_EXISTS); 4049 free_rbtdb(rbtdb, false); 4050 return result; 4051 } 4052 INSIST(rbtdb->origin_node != NULL); 4053 rbtdb->origin_node->nsec = DNS_DB_NSEC_NORMAL; 4054 /* 4055 * We need to give the origin node the right locknum. 4056 */ 4057 dns_name_init(&name, NULL); 4058 dns_rbt_namefromnode(rbtdb->origin_node, &name); 4059 rbtdb->origin_node->locknum = rbtdb->origin_node->hashval % 4060 rbtdb->node_lock_count; 4061 /* 4062 * Add an apex node to the NSEC3 tree so that NSEC3 searches 4063 * return partial matches when there is only a single NSEC3 4064 * record in the tree. 4065 */ 4066 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin, 4067 &rbtdb->nsec3_origin_node); 4068 if (result != ISC_R_SUCCESS) { 4069 INSIST(result != ISC_R_EXISTS); 4070 free_rbtdb(rbtdb, false); 4071 return result; 4072 } 4073 rbtdb->nsec3_origin_node->nsec = DNS_DB_NSEC_NSEC3; 4074 /* 4075 * We need to give the nsec3 origin node the right locknum. 4076 */ 4077 dns_name_init(&name, NULL); 4078 dns_rbt_namefromnode(rbtdb->nsec3_origin_node, &name); 4079 rbtdb->nsec3_origin_node->locknum = 4080 rbtdb->nsec3_origin_node->hashval % 4081 rbtdb->node_lock_count; 4082 } 4083 4084 /* 4085 * Version Initialization. 4086 */ 4087 rbtdb->current_version = allocate_version(mctx, 1, 1, false); 4088 rbtdb->current_version->rbtdb = rbtdb; 4089 4090 /* 4091 * Keep the current version in the open list so that list operation 4092 * won't happen in normal lookup operations. 4093 */ 4094 PREPEND(rbtdb->open_versions, rbtdb->current_version, link); 4095 4096 rbtdb->common.magic = DNS_DB_MAGIC; 4097 rbtdb->common.impmagic = RBTDB_MAGIC; 4098 4099 *dbp = (dns_db_t *)rbtdb; 4100 4101 return ISC_R_SUCCESS; 4102 4103 cleanup_tree_lock: 4104 TREE_DESTROYLOCK(&rbtdb->tree_lock); 4105 isc_rwlock_destroy(&rbtdb->lock); 4106 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb)); 4107 return result; 4108 } 4109 4110 /* 4111 * Rdataset Iterator Methods 4112 */ 4113 4114 static void 4115 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) { 4116 rbtdb_rdatasetiter_t *rbtiterator = NULL; 4117 4118 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp); 4119 4120 if (rbtiterator->common.version != NULL) { 4121 dns__rbtdb_closeversion(rbtiterator->common.db, 4122 &rbtiterator->common.version, 4123 false DNS__DB_FLARG_PASS); 4124 } 4125 dns__db_detachnode(rbtiterator->common.db, 4126 &rbtiterator->common.node DNS__DB_FLARG_PASS); 4127 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator, 4128 sizeof(*rbtiterator)); 4129 4130 *iteratorp = NULL; 4131 } 4132 4133 static bool 4134 iterator_active(dns_rbtdb_t *rbtdb, rbtdb_rdatasetiter_t *rbtiterator, 4135 dns_slabheader_t *header) { 4136 dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, rbtdb); 4137 4138 /* 4139 * Is this a "this rdataset doesn't exist" record? 4140 */ 4141 if (NONEXISTENT(header)) { 4142 return false; 4143 } 4144 4145 /* 4146 * If this is a zone or this header still active then return it. 4147 */ 4148 if (!IS_CACHE(rbtdb) || ACTIVE(header, rbtiterator->common.now)) { 4149 return true; 4150 } 4151 4152 /* 4153 * If we are not returning stale records or the rdataset is 4154 * too old don't return it. 4155 */ 4156 if (!STALEOK(rbtiterator) || (rbtiterator->common.now > stale_ttl)) { 4157 return false; 4158 } 4159 return true; 4160 } 4161 4162 static isc_result_t 4163 rdatasetiter_first(dns_rdatasetiter_t *iterator DNS__DB_FLARG) { 4164 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator; 4165 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db); 4166 dns_rbtnode_t *rbtnode = rbtiterator->common.node; 4167 dns_rbtdb_version_t *rbtversion = rbtiterator->common.version; 4168 dns_slabheader_t *header = NULL, *top_next = NULL; 4169 uint32_t serial = IS_CACHE(rbtdb) ? 1 : rbtversion->serial; 4170 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 4171 4172 NODE_RDLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 4173 4174 for (header = rbtnode->data; header != NULL; header = top_next) { 4175 top_next = header->next; 4176 do { 4177 if (EXPIREDOK(rbtiterator)) { 4178 if (!NONEXISTENT(header)) { 4179 break; 4180 } 4181 header = header->down; 4182 } else if (header->serial <= serial && !IGNORE(header)) 4183 { 4184 if (!iterator_active(rbtdb, rbtiterator, 4185 header)) 4186 { 4187 header = NULL; 4188 } 4189 break; 4190 } else { 4191 header = header->down; 4192 } 4193 } while (header != NULL); 4194 if (header != NULL) { 4195 break; 4196 } 4197 } 4198 4199 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 4200 4201 rbtiterator->current = header; 4202 4203 if (header == NULL) { 4204 return ISC_R_NOMORE; 4205 } 4206 4207 return ISC_R_SUCCESS; 4208 } 4209 4210 static isc_result_t 4211 rdatasetiter_next(dns_rdatasetiter_t *iterator DNS__DB_FLARG) { 4212 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator; 4213 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db); 4214 dns_rbtnode_t *rbtnode = rbtiterator->common.node; 4215 dns_rbtdb_version_t *rbtversion = rbtiterator->common.version; 4216 dns_slabheader_t *header = NULL, *top_next = NULL; 4217 uint32_t serial = IS_CACHE(rbtdb) ? 1 : rbtversion->serial; 4218 dns_typepair_t type, negtype; 4219 dns_rdatatype_t rdtype, covers; 4220 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 4221 bool expiredok = EXPIREDOK(rbtiterator); 4222 4223 header = rbtiterator->current; 4224 if (header == NULL) { 4225 return ISC_R_NOMORE; 4226 } 4227 4228 NODE_RDLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 4229 4230 type = header->type; 4231 rdtype = DNS_TYPEPAIR_TYPE(header->type); 4232 if (NEGATIVE(header)) { 4233 covers = DNS_TYPEPAIR_COVERS(header->type); 4234 negtype = DNS_TYPEPAIR_VALUE(covers, 0); 4235 } else { 4236 negtype = DNS_TYPEPAIR_VALUE(0, rdtype); 4237 } 4238 4239 /* 4240 * Find the start of the header chain for the next type 4241 * by walking back up the list. 4242 */ 4243 top_next = header->next; 4244 while (top_next != NULL && 4245 (top_next->type == type || top_next->type == negtype)) 4246 { 4247 top_next = top_next->next; 4248 } 4249 if (expiredok) { 4250 /* 4251 * Keep walking down the list if possible or 4252 * start the next type. 4253 */ 4254 header = header->down != NULL ? header->down : top_next; 4255 } else { 4256 header = top_next; 4257 } 4258 for (; header != NULL; header = top_next) { 4259 top_next = header->next; 4260 do { 4261 if (expiredok) { 4262 if (!NONEXISTENT(header)) { 4263 break; 4264 } 4265 header = header->down; 4266 } else if (header->serial <= serial && !IGNORE(header)) 4267 { 4268 if (!iterator_active(rbtdb, rbtiterator, 4269 header)) 4270 { 4271 header = NULL; 4272 } 4273 break; 4274 } else { 4275 header = header->down; 4276 } 4277 } while (header != NULL); 4278 if (header != NULL) { 4279 break; 4280 } 4281 /* 4282 * Find the start of the header chain for the next type 4283 * by walking back up the list. 4284 */ 4285 while (top_next != NULL && 4286 (top_next->type == type || top_next->type == negtype)) 4287 { 4288 top_next = top_next->next; 4289 } 4290 } 4291 4292 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 4293 4294 rbtiterator->current = header; 4295 4296 if (header == NULL) { 4297 return ISC_R_NOMORE; 4298 } 4299 4300 return ISC_R_SUCCESS; 4301 } 4302 4303 static void 4304 rdatasetiter_current(dns_rdatasetiter_t *iterator, 4305 dns_rdataset_t *rdataset DNS__DB_FLARG) { 4306 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator; 4307 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db); 4308 dns_rbtnode_t *rbtnode = rbtiterator->common.node; 4309 dns_slabheader_t *header = NULL; 4310 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 4311 4312 header = rbtiterator->current; 4313 REQUIRE(header != NULL); 4314 4315 NODE_RDLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 4316 4317 dns__rbtdb_bindrdataset(rbtdb, rbtnode, header, rbtiterator->common.now, 4318 isc_rwlocktype_read, 4319 rdataset DNS__DB_FLARG_PASS); 4320 4321 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 4322 } 4323 4324 /* 4325 * Database Iterator Methods 4326 */ 4327 4328 static void 4329 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter DNS__DB_FLARG) { 4330 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 4331 dns_rbtnode_t *node = rbtdbiter->node; 4332 4333 if (node == NULL) { 4334 return; 4335 } 4336 4337 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none); 4338 reactivate_node(rbtdb, node, rbtdbiter->tree_locked DNS__DB_FLARG_PASS); 4339 } 4340 4341 static void 4342 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter DNS__DB_FLARG) { 4343 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 4344 dns_rbtnode_t *node = rbtdbiter->node; 4345 isc_rwlock_t *lock = NULL; 4346 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 4347 isc_rwlocktype_t tlocktype = rbtdbiter->tree_locked; 4348 4349 if (node == NULL) { 4350 return; 4351 } 4352 4353 REQUIRE(tlocktype != isc_rwlocktype_write); 4354 4355 lock = &rbtdb->node_locks[node->locknum].lock; 4356 NODE_RDLOCK(lock, &nlocktype); 4357 dns__rbtdb_decref(rbtdb, node, 0, &nlocktype, &rbtdbiter->tree_locked, 4358 false, false DNS__DB_FLARG_PASS); 4359 NODE_UNLOCK(lock, &nlocktype); 4360 4361 INSIST(rbtdbiter->tree_locked == tlocktype); 4362 4363 rbtdbiter->node = NULL; 4364 } 4365 4366 static void 4367 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) { 4368 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 4369 4370 REQUIRE(rbtdbiter->paused); 4371 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none); 4372 4373 TREE_RDLOCK(&rbtdb->tree_lock, &rbtdbiter->tree_locked); 4374 4375 rbtdbiter->paused = false; 4376 } 4377 4378 static void 4379 dbiterator_destroy(dns_dbiterator_t **iteratorp DNS__DB_FLARG) { 4380 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp); 4381 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 4382 dns_db_t *db = NULL; 4383 4384 if (rbtdbiter->tree_locked == isc_rwlocktype_read) { 4385 TREE_UNLOCK(&rbtdb->tree_lock, &rbtdbiter->tree_locked); 4386 } 4387 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none); 4388 4389 dereference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); 4390 4391 dns_db_attach(rbtdbiter->common.db, &db); 4392 dns_db_detach(&rbtdbiter->common.db); 4393 4394 dns_rbtnodechain_reset(&rbtdbiter->chain); 4395 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); 4396 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter)); 4397 dns_db_detach(&db); 4398 4399 *iteratorp = NULL; 4400 } 4401 4402 static isc_result_t 4403 dbiterator_first(dns_dbiterator_t *iterator DNS__DB_FLARG) { 4404 isc_result_t result; 4405 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 4406 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 4407 dns_name_t *name = NULL, *origin = NULL; 4408 4409 if (rbtdbiter->result != ISC_R_SUCCESS && 4410 rbtdbiter->result != ISC_R_NOTFOUND && 4411 rbtdbiter->result != DNS_R_PARTIALMATCH && 4412 rbtdbiter->result != ISC_R_NOMORE) 4413 { 4414 return rbtdbiter->result; 4415 } 4416 4417 if (rbtdbiter->paused) { 4418 resume_iteration(rbtdbiter); 4419 } 4420 4421 dereference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); 4422 4423 name = dns_fixedname_name(&rbtdbiter->name); 4424 origin = dns_fixedname_name(&rbtdbiter->origin); 4425 dns_rbtnodechain_reset(&rbtdbiter->chain); 4426 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); 4427 4428 switch (rbtdbiter->nsec3mode) { 4429 case nsec3only: 4430 rbtdbiter->current = &rbtdbiter->nsec3chain; 4431 result = dns_rbtnodechain_first(rbtdbiter->current, 4432 rbtdb->nsec3, name, origin); 4433 break; 4434 case nonsec3: 4435 rbtdbiter->current = &rbtdbiter->chain; 4436 result = dns_rbtnodechain_first(rbtdbiter->current, rbtdb->tree, 4437 name, origin); 4438 break; 4439 case full: 4440 rbtdbiter->current = &rbtdbiter->chain; 4441 result = dns_rbtnodechain_first(rbtdbiter->current, rbtdb->tree, 4442 name, origin); 4443 if (result == ISC_R_NOTFOUND) { 4444 rbtdbiter->current = &rbtdbiter->nsec3chain; 4445 result = dns_rbtnodechain_first( 4446 rbtdbiter->current, rbtdb->nsec3, name, origin); 4447 } 4448 break; 4449 default: 4450 UNREACHABLE(); 4451 } 4452 4453 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { 4454 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 4455 NULL, &rbtdbiter->node); 4456 4457 /* If we're in the NSEC3 tree, skip the origin */ 4458 if (RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, rbtdbiter)) { 4459 rbtdbiter->node = NULL; 4460 result = dns_rbtnodechain_next(rbtdbiter->current, name, 4461 origin); 4462 if (result == ISC_R_SUCCESS || 4463 result == DNS_R_NEWORIGIN) 4464 { 4465 result = dns_rbtnodechain_current( 4466 rbtdbiter->current, NULL, NULL, 4467 &rbtdbiter->node); 4468 } 4469 } 4470 if (result == ISC_R_SUCCESS) { 4471 rbtdbiter->new_origin = true; 4472 reference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); 4473 } 4474 } else { 4475 INSIST(result == ISC_R_NOTFOUND); 4476 result = ISC_R_NOMORE; /* The tree is empty. */ 4477 } 4478 4479 rbtdbiter->result = result; 4480 4481 if (result != ISC_R_SUCCESS) { 4482 ENSURE(!rbtdbiter->paused); 4483 } 4484 4485 return result; 4486 } 4487 4488 static isc_result_t 4489 dbiterator_last(dns_dbiterator_t *iterator DNS__DB_FLARG) { 4490 isc_result_t result; 4491 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 4492 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 4493 dns_name_t *name = NULL, *origin = NULL; 4494 4495 if (rbtdbiter->result != ISC_R_SUCCESS && 4496 rbtdbiter->result != ISC_R_NOTFOUND && 4497 rbtdbiter->result != DNS_R_PARTIALMATCH && 4498 rbtdbiter->result != ISC_R_NOMORE) 4499 { 4500 return rbtdbiter->result; 4501 } 4502 4503 if (rbtdbiter->paused) { 4504 resume_iteration(rbtdbiter); 4505 } 4506 4507 dereference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); 4508 4509 name = dns_fixedname_name(&rbtdbiter->name); 4510 origin = dns_fixedname_name(&rbtdbiter->origin); 4511 dns_rbtnodechain_reset(&rbtdbiter->chain); 4512 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); 4513 4514 switch (rbtdbiter->nsec3mode) { 4515 case nsec3only: 4516 rbtdbiter->current = &rbtdbiter->nsec3chain; 4517 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->nsec3, 4518 name, origin); 4519 break; 4520 case nonsec3: 4521 rbtdbiter->current = &rbtdbiter->chain; 4522 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree, 4523 name, origin); 4524 break; 4525 case full: 4526 rbtdbiter->current = &rbtdbiter->nsec3chain; 4527 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->nsec3, 4528 name, origin); 4529 if (result == ISC_R_NOTFOUND) { 4530 rbtdbiter->current = &rbtdbiter->chain; 4531 result = dns_rbtnodechain_last( 4532 rbtdbiter->current, rbtdb->tree, name, origin); 4533 } 4534 break; 4535 default: 4536 UNREACHABLE(); 4537 } 4538 4539 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { 4540 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 4541 NULL, &rbtdbiter->node); 4542 if (RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, rbtdbiter)) { 4543 /* 4544 * NSEC3 tree only has an origin node. 4545 */ 4546 rbtdbiter->node = NULL; 4547 switch (rbtdbiter->nsec3mode) { 4548 case nsec3only: 4549 result = ISC_R_NOMORE; 4550 break; 4551 case nonsec3: 4552 case full: 4553 rbtdbiter->current = &rbtdbiter->chain; 4554 result = dns_rbtnodechain_last( 4555 rbtdbiter->current, rbtdb->tree, name, 4556 origin); 4557 if (result == ISC_R_SUCCESS || 4558 result == DNS_R_NEWORIGIN) 4559 { 4560 result = dns_rbtnodechain_current( 4561 rbtdbiter->current, NULL, NULL, 4562 &rbtdbiter->node); 4563 } 4564 break; 4565 default: 4566 UNREACHABLE(); 4567 } 4568 } 4569 if (result == ISC_R_SUCCESS) { 4570 rbtdbiter->new_origin = true; 4571 reference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); 4572 } 4573 } else { 4574 INSIST(result == ISC_R_NOTFOUND); 4575 result = ISC_R_NOMORE; /* The tree is empty. */ 4576 } 4577 4578 rbtdbiter->result = result; 4579 4580 return result; 4581 } 4582 4583 static isc_result_t 4584 dbiterator_seek(dns_dbiterator_t *iterator, 4585 const dns_name_t *name DNS__DB_FLARG) { 4586 isc_result_t result, tresult; 4587 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 4588 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 4589 dns_name_t *iname = NULL, *origin = NULL; 4590 4591 if (rbtdbiter->result != ISC_R_SUCCESS && 4592 rbtdbiter->result != ISC_R_NOTFOUND && 4593 rbtdbiter->result != DNS_R_PARTIALMATCH && 4594 rbtdbiter->result != ISC_R_NOMORE) 4595 { 4596 return rbtdbiter->result; 4597 } 4598 4599 if (rbtdbiter->paused) { 4600 resume_iteration(rbtdbiter); 4601 } 4602 4603 dereference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); 4604 4605 iname = dns_fixedname_name(&rbtdbiter->name); 4606 origin = dns_fixedname_name(&rbtdbiter->origin); 4607 dns_rbtnodechain_reset(&rbtdbiter->chain); 4608 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); 4609 4610 switch (rbtdbiter->nsec3mode) { 4611 case nsec3only: 4612 rbtdbiter->current = &rbtdbiter->nsec3chain; 4613 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, 4614 &rbtdbiter->node, rbtdbiter->current, 4615 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 4616 break; 4617 case nonsec3: 4618 rbtdbiter->current = &rbtdbiter->chain; 4619 result = dns_rbt_findnode(rbtdb->tree, name, NULL, 4620 &rbtdbiter->node, rbtdbiter->current, 4621 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 4622 break; 4623 case full: 4624 /* 4625 * Stay on main chain if not found on either chain. 4626 */ 4627 rbtdbiter->current = &rbtdbiter->chain; 4628 result = dns_rbt_findnode(rbtdb->tree, name, NULL, 4629 &rbtdbiter->node, rbtdbiter->current, 4630 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 4631 if (result == DNS_R_PARTIALMATCH) { 4632 dns_rbtnode_t *node = NULL; 4633 tresult = dns_rbt_findnode( 4634 rbtdb->nsec3, name, NULL, &node, 4635 &rbtdbiter->nsec3chain, DNS_RBTFIND_EMPTYDATA, 4636 NULL, NULL); 4637 if (tresult == ISC_R_SUCCESS) { 4638 rbtdbiter->node = node; 4639 rbtdbiter->current = &rbtdbiter->nsec3chain; 4640 result = tresult; 4641 } 4642 } 4643 break; 4644 default: 4645 UNREACHABLE(); 4646 } 4647 4648 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) { 4649 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname, 4650 origin, NULL); 4651 if (tresult == ISC_R_SUCCESS) { 4652 rbtdbiter->new_origin = true; 4653 reference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); 4654 } else { 4655 result = tresult; 4656 rbtdbiter->node = NULL; 4657 } 4658 } else { 4659 rbtdbiter->node = NULL; 4660 } 4661 4662 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ? ISC_R_SUCCESS 4663 : result; 4664 4665 return result; 4666 } 4667 4668 static isc_result_t 4669 dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG) { 4670 isc_result_t result; 4671 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 4672 dns_name_t *name = NULL, *origin = NULL; 4673 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 4674 4675 REQUIRE(rbtdbiter->node != NULL); 4676 4677 if (rbtdbiter->result != ISC_R_SUCCESS) { 4678 return rbtdbiter->result; 4679 } 4680 4681 if (rbtdbiter->paused) { 4682 resume_iteration(rbtdbiter); 4683 } 4684 4685 dereference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); 4686 4687 name = dns_fixedname_name(&rbtdbiter->name); 4688 origin = dns_fixedname_name(&rbtdbiter->origin); 4689 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin); 4690 if (rbtdbiter->current == &rbtdbiter->nsec3chain && 4691 (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN)) 4692 { 4693 /* 4694 * If we're in the NSEC3 tree, it's empty or we've 4695 * reached the origin, then we're done with it. 4696 */ 4697 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 4698 NULL, &rbtdbiter->node); 4699 if (result == ISC_R_NOTFOUND || 4700 RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, rbtdbiter)) 4701 { 4702 rbtdbiter->node = NULL; 4703 result = ISC_R_NOMORE; 4704 } 4705 } 4706 if (result == ISC_R_NOMORE && rbtdbiter->nsec3mode != nsec3only && 4707 &rbtdbiter->nsec3chain == rbtdbiter->current) 4708 { 4709 rbtdbiter->current = &rbtdbiter->chain; 4710 dns_rbtnodechain_reset(rbtdbiter->current); 4711 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree, 4712 name, origin); 4713 if (result == ISC_R_NOTFOUND) { 4714 result = ISC_R_NOMORE; 4715 } 4716 } 4717 4718 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) { 4719 rbtdbiter->new_origin = (result == DNS_R_NEWORIGIN); 4720 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 4721 NULL, &rbtdbiter->node); 4722 } 4723 4724 if (result == ISC_R_SUCCESS) { 4725 reference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); 4726 } 4727 4728 rbtdbiter->result = result; 4729 4730 return result; 4731 } 4732 4733 static isc_result_t 4734 dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG) { 4735 isc_result_t result; 4736 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 4737 dns_name_t *name = NULL, *origin = NULL; 4738 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 4739 4740 REQUIRE(rbtdbiter->node != NULL); 4741 4742 if (rbtdbiter->result != ISC_R_SUCCESS) { 4743 return rbtdbiter->result; 4744 } 4745 4746 if (rbtdbiter->paused) { 4747 resume_iteration(rbtdbiter); 4748 } 4749 4750 name = dns_fixedname_name(&rbtdbiter->name); 4751 origin = dns_fixedname_name(&rbtdbiter->origin); 4752 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin); 4753 if (result == ISC_R_NOMORE && rbtdbiter->nsec3mode != nonsec3 && 4754 &rbtdbiter->chain == rbtdbiter->current) 4755 { 4756 rbtdbiter->current = &rbtdbiter->nsec3chain; 4757 dns_rbtnodechain_reset(rbtdbiter->current); 4758 result = dns_rbtnodechain_first(rbtdbiter->current, 4759 rbtdb->nsec3, name, origin); 4760 if (result == ISC_R_NOTFOUND) { 4761 result = ISC_R_NOMORE; 4762 } 4763 } 4764 4765 dereference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); 4766 4767 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) { 4768 /* 4769 * If we've just started the NSEC3 tree, 4770 * skip over the origin. 4771 */ 4772 rbtdbiter->new_origin = (result == DNS_R_NEWORIGIN); 4773 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 4774 NULL, &rbtdbiter->node); 4775 if (RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, rbtdbiter)) { 4776 rbtdbiter->node = NULL; 4777 result = dns_rbtnodechain_next(rbtdbiter->current, name, 4778 origin); 4779 if (result == ISC_R_SUCCESS || 4780 result == DNS_R_NEWORIGIN) 4781 { 4782 result = dns_rbtnodechain_current( 4783 rbtdbiter->current, NULL, NULL, 4784 &rbtdbiter->node); 4785 } 4786 } 4787 } 4788 if (result == ISC_R_SUCCESS) { 4789 reference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); 4790 } 4791 4792 rbtdbiter->result = result; 4793 4794 return result; 4795 } 4796 4797 static isc_result_t 4798 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, 4799 dns_name_t *name DNS__DB_FLARG) { 4800 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 4801 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 4802 dns_rbtnode_t *node = rbtdbiter->node; 4803 isc_result_t result; 4804 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name); 4805 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin); 4806 4807 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS); 4808 REQUIRE(rbtdbiter->node != NULL); 4809 4810 if (rbtdbiter->paused) { 4811 resume_iteration(rbtdbiter); 4812 } 4813 4814 if (name != NULL) { 4815 if (rbtdbiter->common.relative_names) { 4816 origin = NULL; 4817 } 4818 result = dns_name_concatenate(nodename, origin, name, NULL); 4819 if (result != ISC_R_SUCCESS) { 4820 return result; 4821 } 4822 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin) { 4823 result = DNS_R_NEWORIGIN; 4824 } 4825 } else { 4826 result = ISC_R_SUCCESS; 4827 } 4828 4829 dns__rbtdb_newref(rbtdb, node, isc_rwlocktype_none DNS__DB_FLARG_PASS); 4830 4831 *nodep = rbtdbiter->node; 4832 4833 return result; 4834 } 4835 4836 static isc_result_t 4837 dbiterator_pause(dns_dbiterator_t *iterator) { 4838 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 4839 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 4840 4841 if (rbtdbiter->result != ISC_R_SUCCESS && 4842 rbtdbiter->result != ISC_R_NOTFOUND && 4843 rbtdbiter->result != DNS_R_PARTIALMATCH && 4844 rbtdbiter->result != ISC_R_NOMORE) 4845 { 4846 return rbtdbiter->result; 4847 } 4848 4849 if (rbtdbiter->paused) { 4850 return ISC_R_SUCCESS; 4851 } 4852 4853 rbtdbiter->paused = true; 4854 4855 if (rbtdbiter->tree_locked == isc_rwlocktype_read) { 4856 TREE_UNLOCK(&rbtdb->tree_lock, &rbtdbiter->tree_locked); 4857 } 4858 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none); 4859 4860 return ISC_R_SUCCESS; 4861 } 4862 4863 static isc_result_t 4864 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) { 4865 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 4866 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin); 4867 4868 if (rbtdbiter->result != ISC_R_SUCCESS) { 4869 return rbtdbiter->result; 4870 } 4871 4872 dns_name_copy(origin, name); 4873 return ISC_R_SUCCESS; 4874 } 4875 4876 static void 4877 freeglue(isc_mem_t *mctx, dns_glue_t *glue) { 4878 while (glue != NULL) { 4879 dns_glue_t *next = glue->next; 4880 4881 if (dns_rdataset_isassociated(&glue->rdataset_a)) { 4882 dns_rdataset_disassociate(&glue->rdataset_a); 4883 } 4884 if (dns_rdataset_isassociated(&glue->sigrdataset_a)) { 4885 dns_rdataset_disassociate(&glue->sigrdataset_a); 4886 } 4887 4888 if (dns_rdataset_isassociated(&glue->rdataset_aaaa)) { 4889 dns_rdataset_disassociate(&glue->rdataset_aaaa); 4890 } 4891 if (dns_rdataset_isassociated(&glue->sigrdataset_aaaa)) { 4892 dns_rdataset_disassociate(&glue->sigrdataset_aaaa); 4893 } 4894 4895 dns_rdataset_invalidate(&glue->rdataset_a); 4896 dns_rdataset_invalidate(&glue->sigrdataset_a); 4897 dns_rdataset_invalidate(&glue->rdataset_aaaa); 4898 dns_rdataset_invalidate(&glue->sigrdataset_aaaa); 4899 4900 isc_mem_put(mctx, glue, sizeof(*glue)); 4901 4902 glue = next; 4903 } 4904 } 4905 4906 void 4907 dns__rbtdb_free_gluenode_rcu(struct rcu_head *rcu_head) { 4908 dns_gluenode_t *gluenode = caa_container_of(rcu_head, dns_gluenode_t, 4909 rcu_head); 4910 4911 freeglue(gluenode->mctx, gluenode->glue); 4912 4913 dns_db_detachnode(gluenode->db, (dns_dbnode_t **)&gluenode->node); 4914 4915 isc_mem_putanddetach(&gluenode->mctx, gluenode, sizeof(*gluenode)); 4916 } 4917 4918 void 4919 dns__rbtdb_free_gluenode(dns_gluenode_t *gluenode) { 4920 call_rcu(&gluenode->rcu_head, dns__rbtdb_free_gluenode_rcu); 4921 } 4922 4923 static void 4924 free_gluetable(struct cds_lfht *glue_table) { 4925 struct cds_lfht_iter iter; 4926 dns_gluenode_t *gluenode = NULL; 4927 4928 rcu_read_lock(); 4929 cds_lfht_for_each_entry(glue_table, &iter, gluenode, ht_node) { 4930 INSIST(!cds_lfht_del(glue_table, &gluenode->ht_node)); 4931 dns__rbtdb_free_gluenode(gluenode); 4932 } 4933 rcu_read_unlock(); 4934 4935 cds_lfht_destroy(glue_table, NULL); 4936 } 4937 4938 void 4939 dns__rbtdb_deletedata(dns_db_t *db ISC_ATTR_UNUSED, 4940 dns_dbnode_t *node ISC_ATTR_UNUSED, void *data) { 4941 dns_slabheader_t *header = data; 4942 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)header->db; 4943 4944 if (header->heap != NULL && header->heap_index != 0) { 4945 isc_heap_delete(header->heap, header->heap_index); 4946 } 4947 4948 if (IS_CACHE(rbtdb)) { 4949 update_rrsetstats(rbtdb->rrsetstats, header->type, 4950 atomic_load_acquire(&header->attributes), 4951 false); 4952 4953 if (ISC_LINK_LINKED(header, link)) { 4954 int idx = RBTDB_HEADERNODE(header)->locknum; 4955 INSIST(IS_CACHE(rbtdb)); 4956 ISC_LIST_UNLINK(rbtdb->lru[idx], header, link); 4957 } 4958 4959 if (header->noqname != NULL) { 4960 dns_slabheader_freeproof(db->mctx, &header->noqname); 4961 } 4962 if (header->closest != NULL) { 4963 dns_slabheader_freeproof(db->mctx, &header->closest); 4964 } 4965 } 4966 } 4967 4968 /* 4969 * Caller must be holding the node write lock. 4970 */ 4971 static void 4972 expire_ttl_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, 4973 isc_rwlocktype_t *tlocktypep, isc_stdtime_t now, 4974 bool cache_is_overmem DNS__DB_FLARG) { 4975 isc_heap_t *heap = rbtdb->heaps[locknum]; 4976 4977 for (size_t i = 0; i < DNS_RBTDB_EXPIRE_TTL_COUNT; i++) { 4978 dns_slabheader_t *header = isc_heap_element(heap, 1); 4979 4980 if (header == NULL) { 4981 /* No headers left on this TTL heap; exit cleaning */ 4982 return; 4983 } 4984 4985 dns_ttl_t ttl = header->ttl; 4986 4987 if (!cache_is_overmem) { 4988 /* Only account for stale TTL if cache is not overmem */ 4989 ttl += STALE_TTL(header, rbtdb); 4990 } 4991 4992 if (ttl >= now - RBTDB_VIRTUAL) { 4993 /* 4994 * The header at the top of this TTL heap is not yet 4995 * eligible for expiry, so none of the other headers on 4996 * the same heap can be eligible for expiry, either; 4997 * exit cleaning. 4998 */ 4999 return; 5000 } 5001 5002 dns__cacherbt_expireheader(header, tlocktypep, 5003 dns_expire_ttl DNS__DB_FLARG_PASS); 5004 } 5005 } 5006 5007 void 5008 dns__rbtdb_setmaxrrperset(dns_db_t *db, uint32_t value) { 5009 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5010 5011 REQUIRE(VALID_RBTDB(rbtdb)); 5012 5013 rbtdb->maxrrperset = value; 5014 } 5015 5016 void 5017 dns__rbtdb_setmaxtypepername(dns_db_t *db, uint32_t maxtypepername) { 5018 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5019 5020 REQUIRE(VALID_RBTDB(rbtdb)); 5021 5022 rbtdb->maxtypepername = maxtypepername; 5023 } 5024