1 /* $NetBSD: rbtdb.c,v 1.20 2024/09/22 00:14:06 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16 /*! \file */ 17 18 #include <ctype.h> 19 #include <inttypes.h> 20 #include <stdbool.h> 21 #include <sys/mman.h> 22 23 #include <isc/atomic.h> 24 #include <isc/crc64.h> 25 #include <isc/event.h> 26 #include <isc/file.h> 27 #include <isc/hash.h> 28 #include <isc/heap.h> 29 #include <isc/hex.h> 30 #include <isc/mem.h> 31 #include <isc/mutex.h> 32 #include <isc/once.h> 33 #include <isc/print.h> 34 #include <isc/random.h> 35 #include <isc/refcount.h> 36 #include <isc/result.h> 37 #include <isc/rwlock.h> 38 #include <isc/serial.h> 39 #include <isc/stdio.h> 40 #include <isc/string.h> 41 #include <isc/task.h> 42 #include <isc/time.h> 43 #include <isc/util.h> 44 45 #include <dns/callbacks.h> 46 #include <dns/db.h> 47 #include <dns/dbiterator.h> 48 #include <dns/events.h> 49 #include <dns/fixedname.h> 50 #include <dns/log.h> 51 #include <dns/masterdump.h> 52 #include <dns/nsec.h> 53 #include <dns/nsec3.h> 54 #include <dns/rbt.h> 55 #include <dns/rdata.h> 56 #include <dns/rdataset.h> 57 #include <dns/rdatasetiter.h> 58 #include <dns/rdataslab.h> 59 #include <dns/rdatastruct.h> 60 #include <dns/stats.h> 61 #include <dns/time.h> 62 #include <dns/view.h> 63 #include <dns/zone.h> 64 #include <dns/zonekey.h> 65 66 #include "rbtdb.h" 67 68 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4') 69 70 #define CHECK(op) \ 71 do { \ 72 result = (op); \ 73 if (result != ISC_R_SUCCESS) \ 74 goto failure; \ 75 } while (0) 76 77 /*% 78 * Note that "impmagic" is not the first four bytes of the struct, so 79 * ISC_MAGIC_VALID cannot be used. 80 */ 81 #define VALID_RBTDB(rbtdb) \ 82 ((rbtdb) != NULL && (rbtdb)->common.impmagic == RBTDB_MAGIC) 83 84 typedef uint32_t rbtdb_serial_t; 85 typedef uint32_t rbtdb_rdatatype_t; 86 87 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF)) 88 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16)) 89 #define RBTDB_RDATATYPE_VALUE(base, ext) \ 90 ((rbtdb_rdatatype_t)(((uint32_t)ext) << 16) | \ 91 (((uint32_t)base) & 0xffff)) 92 93 #define RBTDB_RDATATYPE_SIGNSEC \ 94 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec) 95 #define RBTDB_RDATATYPE_SIGNSEC3 \ 96 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3) 97 #define RBTDB_RDATATYPE_SIGNS \ 98 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns) 99 #define RBTDB_RDATATYPE_SIGCNAME \ 100 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname) 101 #define RBTDB_RDATATYPE_SIGDNAME \ 102 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname) 103 #define RBTDB_RDATATYPE_SIGDS \ 104 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds) 105 #define RBTDB_RDATATYPE_SIGSOA \ 106 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_soa) 107 #define RBTDB_RDATATYPE_NCACHEANY RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any) 108 109 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0) 110 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l) 111 #define RBTDB_LOCK(l, t) RWLOCK((l), (t)) 112 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t)) 113 114 /* 115 * Since node locking is sensitive to both performance and memory footprint, 116 * we need some trick here. If we have both high-performance rwlock and 117 * high performance and small-memory reference counters, we use rwlock for 118 * node lock and isc_refcount for node references. In this case, we don't have 119 * to protect the access to the counters by locks. 120 * Otherwise, we simply use ordinary mutex lock for node locking, and use 121 * simple integers as reference counters which is protected by the lock. 122 * In most cases, we can simply use wrapper macros such as NODE_LOCK and 123 * NODE_UNLOCK. In some other cases, however, we need to protect reference 124 * counters first and then protect other parts of a node as read-only data. 125 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also 126 * provided for these special cases. When we can use the efficient backend 127 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read). 128 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical 129 * section including the access to the reference counter. 130 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected 131 * section is also protected by NODE_STRONGLOCK(). 132 */ 133 typedef isc_rwlock_t nodelock_t; 134 135 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0) 136 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l) 137 #define NODE_LOCK(l, t) RWLOCK((l), (t)) 138 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t)) 139 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l) 140 #define NODE_DOWNGRADE(l) isc_rwlock_downgrade(l) 141 142 /*% 143 * Whether to rate-limit updating the LRU to avoid possible thread contention. 144 * Updating LRU requires write locking, so we don't do it every time the 145 * record is touched - only after some time passes. 146 */ 147 #ifndef DNS_RBTDB_LIMITLRUUPDATE 148 #define DNS_RBTDB_LIMITLRUUPDATE 1 149 #endif 150 151 /*% Time after which we update LRU for glue records, 5 minutes */ 152 #define DNS_RBTDB_LRUUPDATE_GLUE 300 153 /*% Time after which we update LRU for all other records, 10 minutes */ 154 #define DNS_RBTDB_LRUUPDATE_REGULAR 600 155 156 /* 157 * Allow clients with a virtual time of up to 5 minutes in the past to see 158 * records that would have otherwise have expired. 159 */ 160 #define RBTDB_VIRTUAL 300 161 162 struct noqname { 163 dns_name_t name; 164 void *neg; 165 void *negsig; 166 dns_rdatatype_t type; 167 }; 168 169 typedef struct rdatasetheader { 170 /*% 171 * Locked by the owning node's lock. 172 */ 173 rbtdb_serial_t serial; 174 dns_ttl_t rdh_ttl; 175 rbtdb_rdatatype_t type; 176 atomic_uint_least16_t attributes; 177 dns_trust_t trust; 178 atomic_uint_fast32_t last_refresh_fail_ts; 179 struct noqname *noqname; 180 struct noqname *closest; 181 unsigned int resign_lsb : 1; 182 /*%< 183 * We don't use the LIST macros, because the LIST structure has 184 * both head and tail pointers, and is doubly linked. 185 */ 186 187 struct rdatasetheader *next; 188 /*%< 189 * If this is the top header for an rdataset, 'next' points 190 * to the top header for the next rdataset (i.e., the next type). 191 * Otherwise, it points up to the header whose down pointer points 192 * at this header. 193 */ 194 195 struct rdatasetheader *down; 196 /*%< 197 * Points to the header for the next older version of 198 * this rdataset. 199 */ 200 201 atomic_uint_fast32_t count; 202 /*%< 203 * Monotonously increased every time this rdataset is bound so that 204 * it is used as the base of the starting point in DNS responses 205 * when the "cyclic" rrset-order is required. 206 */ 207 208 dns_rbtnode_t *node; 209 isc_stdtime_t last_used; 210 ISC_LINK(struct rdatasetheader) link; 211 212 unsigned int heap_index; 213 /*%< 214 * Used for TTL-based cache cleaning. 215 */ 216 isc_stdtime_t resign; 217 /*%< 218 * Case vector. If the bit is set then the corresponding 219 * character in the owner name needs to be AND'd with 0x20, 220 * rendering that character upper case. 221 */ 222 unsigned char upper[32]; 223 } rdatasetheader_t; 224 225 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t; 226 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t; 227 228 #define RDATASET_ATTR_NONEXISTENT 0x0001 229 /*%< May be potentially served as stale data. */ 230 #define RDATASET_ATTR_STALE 0x0002 231 #define RDATASET_ATTR_IGNORE 0x0004 232 #define RDATASET_ATTR_RETAIN 0x0008 233 #define RDATASET_ATTR_NXDOMAIN 0x0010 234 #define RDATASET_ATTR_RESIGN 0x0020 235 #define RDATASET_ATTR_STATCOUNT 0x0040 236 #define RDATASET_ATTR_OPTOUT 0x0080 237 #define RDATASET_ATTR_NEGATIVE 0x0100 238 #define RDATASET_ATTR_PREFETCH 0x0200 239 #define RDATASET_ATTR_CASESET 0x0400 240 #define RDATASET_ATTR_ZEROTTL 0x0800 241 #define RDATASET_ATTR_CASEFULLYLOWER 0x1000 242 /*%< Ancient - awaiting cleanup. */ 243 #define RDATASET_ATTR_ANCIENT 0x2000 244 #define RDATASET_ATTR_STALE_WINDOW 0x4000 245 246 /* 247 * XXX 248 * When the cache will pre-expire data (due to memory low or other 249 * situations) before the rdataset's TTL has expired, it MUST 250 * respect the RETAIN bit and not expire the data until its TTL is 251 * expired. 252 */ 253 254 #define EXISTS(header) \ 255 ((atomic_load_acquire(&(header)->attributes) & \ 256 RDATASET_ATTR_NONEXISTENT) == 0) 257 #define NONEXISTENT(header) \ 258 ((atomic_load_acquire(&(header)->attributes) & \ 259 RDATASET_ATTR_NONEXISTENT) != 0) 260 #define IGNORE(header) \ 261 ((atomic_load_acquire(&(header)->attributes) & \ 262 RDATASET_ATTR_IGNORE) != 0) 263 #define RETAIN(header) \ 264 ((atomic_load_acquire(&(header)->attributes) & \ 265 RDATASET_ATTR_RETAIN) != 0) 266 #define NXDOMAIN(header) \ 267 ((atomic_load_acquire(&(header)->attributes) & \ 268 RDATASET_ATTR_NXDOMAIN) != 0) 269 #define STALE(header) \ 270 ((atomic_load_acquire(&(header)->attributes) & RDATASET_ATTR_STALE) != \ 271 0) 272 #define STALE_WINDOW(header) \ 273 ((atomic_load_acquire(&(header)->attributes) & \ 274 RDATASET_ATTR_STALE_WINDOW) != 0) 275 #define RESIGN(header) \ 276 ((atomic_load_acquire(&(header)->attributes) & \ 277 RDATASET_ATTR_RESIGN) != 0) 278 #define OPTOUT(header) \ 279 ((atomic_load_acquire(&(header)->attributes) & \ 280 RDATASET_ATTR_OPTOUT) != 0) 281 #define NEGATIVE(header) \ 282 ((atomic_load_acquire(&(header)->attributes) & \ 283 RDATASET_ATTR_NEGATIVE) != 0) 284 #define PREFETCH(header) \ 285 ((atomic_load_acquire(&(header)->attributes) & \ 286 RDATASET_ATTR_PREFETCH) != 0) 287 #define CASESET(header) \ 288 ((atomic_load_acquire(&(header)->attributes) & \ 289 RDATASET_ATTR_CASESET) != 0) 290 #define ZEROTTL(header) \ 291 ((atomic_load_acquire(&(header)->attributes) & \ 292 RDATASET_ATTR_ZEROTTL) != 0) 293 #define CASEFULLYLOWER(header) \ 294 ((atomic_load_acquire(&(header)->attributes) & \ 295 RDATASET_ATTR_CASEFULLYLOWER) != 0) 296 #define ANCIENT(header) \ 297 ((atomic_load_acquire(&(header)->attributes) & \ 298 RDATASET_ATTR_ANCIENT) != 0) 299 #define STATCOUNT(header) \ 300 ((atomic_load_acquire(&(header)->attributes) & \ 301 RDATASET_ATTR_STATCOUNT) != 0) 302 #define STALE_TTL(header, rbtdb) (NXDOMAIN(header) ? 0 : rbtdb->serve_stale_ttl) 303 304 #define RDATASET_ATTR_GET(header, attribute) \ 305 (atomic_load_acquire(&(header)->attributes) & attribute) 306 #define RDATASET_ATTR_SET(header, attribute) \ 307 atomic_fetch_or_release(&(header)->attributes, attribute) 308 #define RDATASET_ATTR_CLR(header, attribute) \ 309 atomic_fetch_and_release(&(header)->attributes, ~(attribute)) 310 311 #define ACTIVE(header, now) \ 312 (((header)->rdh_ttl > (now)) || \ 313 ((header)->rdh_ttl == (now) && ZEROTTL(header))) 314 315 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */ 316 #define RBTDB_GLUE_TABLE_INIT_BITS 2U 317 #define RBTDB_GLUE_TABLE_MAX_BITS 32U 318 #define RBTDB_GLUE_TABLE_OVERCOMMIT 3 319 320 #define GOLDEN_RATIO_32 0x61C88647 321 #define HASHSIZE(bits) (UINT64_C(1) << (bits)) 322 323 static uint32_t 324 hash_32(uint32_t val, unsigned int bits) { 325 REQUIRE(bits <= RBTDB_GLUE_TABLE_MAX_BITS); 326 /* High bits are more random. */ 327 return (val * GOLDEN_RATIO_32 >> (32 - bits)); 328 } 329 330 #define EXPIREDOK(rbtiterator) \ 331 (((rbtiterator)->common.options & DNS_DB_EXPIREDOK) != 0) 332 333 #define STALEOK(rbtiterator) \ 334 (((rbtiterator)->common.options & DNS_DB_STALEOK) != 0) 335 336 #define RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, iterator) \ 337 ((iterator)->current == &(iterator)->nsec3chain && \ 338 (iterator)->node == (rbtdb)->nsec3_origin_node) 339 340 /*% 341 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps). 342 * There is a tradeoff issue about configuring this value: if this is too 343 * small, it may cause heavier contention between threads; if this is too large, 344 * LRU purge algorithm won't work well (entries tend to be purged prematurely). 345 * The default value should work well for most environments, but this can 346 * also be configurable at compilation time via the 347 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than 348 * 1 due to the assumption of overmem_purge(). 349 */ 350 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT 351 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 352 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1" 353 #else /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */ 354 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT 355 #endif /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */ 356 #else /* ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT */ 357 #define DEFAULT_CACHE_NODE_LOCK_COUNT 17 358 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */ 359 360 /* 361 * This defines the number of headers that we try to expire each time the 362 * expire_ttl_headers() is run. The number should be small enough, so the 363 * TTL-based header expiration doesn't take too long, but it should be large 364 * enough, so we expire enough headers if their TTL is clustered. 365 */ 366 #define DNS_RBTDB_EXPIRE_TTL_COUNT 10 367 368 typedef struct { 369 nodelock_t lock; 370 /* Protected in the refcount routines. */ 371 isc_refcount_t references; 372 /* Locked by lock. */ 373 bool exiting; 374 } rbtdb_nodelock_t; 375 376 typedef struct rbtdb_changed { 377 dns_rbtnode_t *node; 378 bool dirty; 379 ISC_LINK(struct rbtdb_changed) link; 380 } rbtdb_changed_t; 381 382 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t; 383 384 typedef enum { dns_db_insecure, dns_db_partial, dns_db_secure } dns_db_secure_t; 385 386 typedef struct dns_rbtdb dns_rbtdb_t; 387 388 /* Reason for expiring a record from cache */ 389 typedef enum { expire_lru, expire_ttl, expire_flush } expire_t; 390 391 typedef struct rbtdb_glue rbtdb_glue_t; 392 393 typedef struct rbtdb_glue_table_node { 394 struct rbtdb_glue_table_node *next; 395 dns_rbtnode_t *node; 396 rbtdb_glue_t *glue_list; 397 } rbtdb_glue_table_node_t; 398 399 typedef enum { 400 rdataset_ttl_fresh, 401 rdataset_ttl_stale, 402 rdataset_ttl_ancient 403 } rdataset_ttl_t; 404 405 typedef struct rbtdb_version { 406 /* Not locked */ 407 rbtdb_serial_t serial; 408 dns_rbtdb_t *rbtdb; 409 /* 410 * Protected in the refcount routines. 411 * XXXJT: should we change the lock policy based on the refcount 412 * performance? 413 */ 414 isc_refcount_t references; 415 /* Locked by database lock. */ 416 bool writer; 417 bool commit_ok; 418 rbtdb_changedlist_t changed_list; 419 rdatasetheaderlist_t resigned_list; 420 ISC_LINK(struct rbtdb_version) link; 421 dns_db_secure_t secure; 422 bool havensec3; 423 /* NSEC3 parameters */ 424 dns_hash_t hash; 425 uint8_t flags; 426 uint16_t iterations; 427 uint8_t salt_length; 428 unsigned char salt[DNS_NSEC3_SALTSIZE]; 429 430 /* 431 * records and xfrsize are covered by rwlock. 432 */ 433 isc_rwlock_t rwlock; 434 uint64_t records; 435 uint64_t xfrsize; 436 437 isc_rwlock_t glue_rwlock; 438 size_t glue_table_bits; 439 size_t glue_table_nodecount; 440 rbtdb_glue_table_node_t **glue_table; 441 } rbtdb_version_t; 442 443 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t; 444 445 struct dns_rbtdb { 446 /* Unlocked. */ 447 dns_db_t common; 448 /* Locks the data in this struct */ 449 isc_rwlock_t lock; 450 /* Locks the tree structure (prevents nodes appearing/disappearing) */ 451 isc_rwlock_t tree_lock; 452 /* Locks for individual tree nodes */ 453 unsigned int node_lock_count; 454 rbtdb_nodelock_t *node_locks; 455 dns_rbtnode_t *origin_node; 456 dns_rbtnode_t *nsec3_origin_node; 457 dns_stats_t *rrsetstats; /* cache DB only */ 458 isc_stats_t *cachestats; /* cache DB only */ 459 isc_stats_t *gluecachestats; /* zone DB only */ 460 /* Locked by lock. */ 461 unsigned int active; 462 isc_refcount_t references; 463 unsigned int attributes; 464 rbtdb_serial_t current_serial; 465 rbtdb_serial_t least_serial; 466 rbtdb_serial_t next_serial; 467 uint32_t maxrrperset; 468 uint32_t maxtypepername; 469 rbtdb_version_t *current_version; 470 rbtdb_version_t *future_version; 471 rbtdb_versionlist_t open_versions; 472 isc_task_t *task; 473 isc_task_t *prunetask; 474 dns_dbnode_t *soanode; 475 dns_dbnode_t *nsnode; 476 477 /* 478 * Maximum length of time to keep using a stale answer past its 479 * normal TTL expiry. 480 */ 481 dns_ttl_t serve_stale_ttl; 482 483 /* 484 * The time after a failed lookup, where stale answers from cache 485 * may be used directly in a DNS response without attempting a 486 * new iterative lookup. 487 */ 488 uint32_t serve_stale_refresh; 489 490 /* 491 * This is a linked list used to implement the LRU cache. There will 492 * be node_lock_count linked lists here. Nodes in bucket 1 will be 493 * placed on the linked list rdatasets[1]. 494 */ 495 rdatasetheaderlist_t *rdatasets; 496 497 /* 498 * Start point % node_lock_count for next LRU cleanup. 499 */ 500 atomic_uint lru_sweep; 501 502 /* 503 * When performing LRU cleaning limit cleaning to headers that were 504 * last used at or before this. 505 */ 506 atomic_uint last_used; 507 508 /*% 509 * Temporary storage for stale cache nodes and dynamically deleted 510 * nodes that await being cleaned up. 511 */ 512 rbtnodelist_t *deadnodes; 513 514 /* 515 * Heaps. These are used for TTL based expiry in a cache, 516 * or for zone resigning in a zone DB. hmctx is the memory 517 * context to use for the heap (which differs from the main 518 * database memory context in the case of a cache). 519 */ 520 isc_mem_t *hmctx; 521 isc_heap_t **heaps; 522 523 /* Locked by tree_lock. */ 524 dns_rbt_t *tree; 525 dns_rbt_t *nsec; 526 dns_rbt_t *nsec3; 527 528 /* Unlocked */ 529 unsigned int quantum; 530 }; 531 532 #define RBTDB_ATTR_LOADED 0x01 533 #define RBTDB_ATTR_LOADING 0x02 534 535 #define KEEPSTALE(rbtdb) ((rbtdb)->serve_stale_ttl > 0) 536 537 /*% 538 * Search Context 539 */ 540 typedef struct { 541 dns_rbtdb_t *rbtdb; 542 rbtdb_version_t *rbtversion; 543 rbtdb_serial_t serial; 544 unsigned int options; 545 dns_rbtnodechain_t chain; 546 bool copy_name; 547 bool need_cleanup; 548 bool wild; 549 dns_rbtnode_t *zonecut; 550 rdatasetheader_t *zonecut_rdataset; 551 rdatasetheader_t *zonecut_sigrdataset; 552 dns_fixedname_t zonecut_name; 553 isc_stdtime_t now; 554 } rbtdb_search_t; 555 556 /*% 557 * Load Context 558 */ 559 typedef struct { 560 dns_rbtdb_t *rbtdb; 561 isc_stdtime_t now; 562 } rbtdb_load_t; 563 564 static void 565 delete_callback(void *data, void *arg); 566 static void 567 rdataset_disassociate(dns_rdataset_t *rdataset); 568 static isc_result_t 569 rdataset_first(dns_rdataset_t *rdataset); 570 static isc_result_t 571 rdataset_next(dns_rdataset_t *rdataset); 572 static void 573 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata); 574 static void 575 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target); 576 static unsigned int 577 rdataset_count(dns_rdataset_t *rdataset); 578 static isc_result_t 579 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name, 580 dns_rdataset_t *neg, dns_rdataset_t *negsig); 581 static isc_result_t 582 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name, 583 dns_rdataset_t *neg, dns_rdataset_t *negsig); 584 static bool 585 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now); 586 static void 587 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now); 588 static void 589 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked, 590 expire_t reason); 591 static void 592 overmem_purge(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked); 593 static void 594 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader); 595 static void 596 resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version, 597 rdatasetheader_t *header); 598 static void 599 prune_tree(isc_task_t *task, isc_event_t *event); 600 static void 601 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust); 602 static void 603 rdataset_expire(dns_rdataset_t *rdataset); 604 static void 605 rdataset_clearprefetch(dns_rdataset_t *rdataset); 606 static void 607 rdataset_setownercase(dns_rdataset_t *rdataset, const dns_name_t *name); 608 static void 609 rdataset_getownercase(const dns_rdataset_t *rdataset, dns_name_t *name); 610 static isc_result_t 611 rdataset_addglue(dns_rdataset_t *rdataset, dns_dbversion_t *version, 612 dns_message_t *msg); 613 static void 614 free_gluetable(rbtdb_version_t *version); 615 static isc_result_t 616 nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name); 617 618 static dns_rdatasetmethods_t rdataset_methods = { rdataset_disassociate, 619 rdataset_first, 620 rdataset_next, 621 rdataset_current, 622 rdataset_clone, 623 rdataset_count, 624 NULL, /* addnoqname */ 625 rdataset_getnoqname, 626 NULL, /* addclosest */ 627 rdataset_getclosest, 628 rdataset_settrust, 629 rdataset_expire, 630 rdataset_clearprefetch, 631 rdataset_setownercase, 632 rdataset_getownercase, 633 rdataset_addglue }; 634 635 static dns_rdatasetmethods_t slab_methods = { 636 rdataset_disassociate, 637 rdataset_first, 638 rdataset_next, 639 rdataset_current, 640 rdataset_clone, 641 rdataset_count, 642 NULL, /* addnoqname */ 643 NULL, /* getnoqname */ 644 NULL, /* addclosest */ 645 NULL, /* getclosest */ 646 NULL, /* settrust */ 647 NULL, /* expire */ 648 NULL, /* clearprefetch */ 649 NULL, /* setownercase */ 650 NULL, /* getownercase */ 651 NULL /* addglue */ 652 }; 653 654 static void 655 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp); 656 static isc_result_t 657 rdatasetiter_first(dns_rdatasetiter_t *iterator); 658 static isc_result_t 659 rdatasetiter_next(dns_rdatasetiter_t *iterator); 660 static void 661 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset); 662 663 static dns_rdatasetitermethods_t rdatasetiter_methods = { 664 rdatasetiter_destroy, rdatasetiter_first, rdatasetiter_next, 665 rdatasetiter_current 666 }; 667 668 typedef struct rbtdb_rdatasetiter { 669 dns_rdatasetiter_t common; 670 rdatasetheader_t *current; 671 } rbtdb_rdatasetiter_t; 672 673 /* 674 * Note that these iterators, unless created with either DNS_DB_NSEC3ONLY or 675 * DNS_DB_NONSEC3, will transparently move between the last node of the 676 * "regular" RBT ("chain" field) and the root node of the NSEC3 RBT 677 * ("nsec3chain" field) of the database in question, as if the latter was a 678 * successor to the former in lexical order. The "current" field always holds 679 * the address of either "chain" or "nsec3chain", depending on which RBT is 680 * being traversed at given time. 681 */ 682 static void 683 dbiterator_destroy(dns_dbiterator_t **iteratorp); 684 static isc_result_t 685 dbiterator_first(dns_dbiterator_t *iterator); 686 static isc_result_t 687 dbiterator_last(dns_dbiterator_t *iterator); 688 static isc_result_t 689 dbiterator_seek(dns_dbiterator_t *iterator, const dns_name_t *name); 690 static isc_result_t 691 dbiterator_prev(dns_dbiterator_t *iterator); 692 static isc_result_t 693 dbiterator_next(dns_dbiterator_t *iterator); 694 static isc_result_t 695 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, 696 dns_name_t *name); 697 static isc_result_t 698 dbiterator_pause(dns_dbiterator_t *iterator); 699 static isc_result_t 700 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name); 701 702 static dns_dbiteratormethods_t dbiterator_methods = { 703 dbiterator_destroy, dbiterator_first, dbiterator_last, 704 dbiterator_seek, dbiterator_prev, dbiterator_next, 705 dbiterator_current, dbiterator_pause, dbiterator_origin 706 }; 707 708 #define DELETION_BATCH_MAX 64 709 710 /* 711 * If 'paused' is true, then the tree lock is not being held. 712 */ 713 typedef struct rbtdb_dbiterator { 714 dns_dbiterator_t common; 715 bool paused; 716 bool new_origin; 717 isc_rwlocktype_t tree_locked; 718 isc_result_t result; 719 dns_fixedname_t name; 720 dns_fixedname_t origin; 721 dns_rbtnodechain_t chain; 722 dns_rbtnodechain_t nsec3chain; 723 dns_rbtnodechain_t *current; 724 dns_rbtnode_t *node; 725 dns_rbtnode_t *deletions[DELETION_BATCH_MAX]; 726 int delcnt; 727 enum { full, nonsec3, nsec3only } nsec3mode; 728 } rbtdb_dbiterator_t; 729 730 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0) 731 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0) 732 733 static void 734 free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event); 735 static void 736 overmem(dns_db_t *db, bool over); 737 static void 738 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version); 739 static void 740 setownercase(rdatasetheader_t *header, const dns_name_t *name); 741 742 /*% 743 * 'init_count' is used to initialize 'newheader->count' which inturn 744 * is used to determine where in the cycle rrset-order cyclic starts. 745 * We don't lock this as we don't care about simultaneous updates. 746 * 747 * Note: 748 * Both init_count and header->count can be UINT32_MAX. 749 * The count on the returned rdataset however can't be as 750 * that indicates that the database does not implement cyclic 751 * processing. 752 */ 753 static atomic_uint_fast32_t init_count = 0; 754 755 /* 756 * Locking 757 * 758 * If a routine is going to lock more than one lock in this module, then 759 * the locking must be done in the following order: 760 * 761 * Tree Lock 762 * 763 * Node Lock (Only one from the set may be locked at one time by 764 * any caller) 765 * 766 * Database Lock 767 * 768 * Failure to follow this hierarchy can result in deadlock. 769 */ 770 771 /* 772 * Deleting Nodes 773 * 774 * For zone databases the node for the origin of the zone MUST NOT be deleted. 775 */ 776 777 /* Fixed RRSet helper macros */ 778 779 #define DNS_RDATASET_LENGTH 2; 780 781 #if DNS_RDATASET_FIXED 782 #define DNS_RDATASET_ORDER 2 783 #define DNS_RDATASET_COUNT (count * 4) 784 #else /* !DNS_RDATASET_FIXED */ 785 #define DNS_RDATASET_ORDER 0 786 #define DNS_RDATASET_COUNT 0 787 #endif /* DNS_RDATASET_FIXED */ 788 789 /* 790 * DB Routines 791 */ 792 793 static void 794 attach(dns_db_t *source, dns_db_t **targetp) { 795 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source; 796 797 REQUIRE(VALID_RBTDB(rbtdb)); 798 799 isc_refcount_increment(&rbtdb->references); 800 801 *targetp = source; 802 } 803 804 static void 805 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) { 806 dns_rbtdb_t *rbtdb = event->ev_arg; 807 808 UNUSED(task); 809 810 free_rbtdb(rbtdb, true, event); 811 } 812 813 static void 814 update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) { 815 INSIST(IS_CACHE(rbtdb)); 816 817 if (rbtdb->cachestats == NULL) { 818 return; 819 } 820 821 switch (result) { 822 case DNS_R_COVERINGNSEC: 823 isc_stats_increment(rbtdb->cachestats, 824 dns_cachestatscounter_coveringnsec); 825 FALLTHROUGH; 826 case ISC_R_SUCCESS: 827 case DNS_R_CNAME: 828 case DNS_R_DNAME: 829 case DNS_R_DELEGATION: 830 case DNS_R_NCACHENXDOMAIN: 831 case DNS_R_NCACHENXRRSET: 832 isc_stats_increment(rbtdb->cachestats, 833 dns_cachestatscounter_hits); 834 break; 835 default: 836 isc_stats_increment(rbtdb->cachestats, 837 dns_cachestatscounter_misses); 838 } 839 } 840 841 static bool 842 do_stats(rdatasetheader_t *header) { 843 return (EXISTS(header) && STATCOUNT(header)); 844 } 845 846 static void 847 update_rrsetstats(dns_rbtdb_t *rbtdb, const rbtdb_rdatatype_t htype, 848 const uint_least16_t hattributes, const bool increment) { 849 dns_rdatastatstype_t statattributes = 0; 850 dns_rdatastatstype_t base = 0; 851 dns_rdatastatstype_t type; 852 rdatasetheader_t *header = &(rdatasetheader_t){ 853 .type = htype, 854 .attributes = hattributes, 855 }; 856 857 if (!do_stats(header)) { 858 return; 859 } 860 861 /* At the moment we count statistics only for cache DB */ 862 INSIST(IS_CACHE(rbtdb)); 863 864 if (NEGATIVE(header)) { 865 if (NXDOMAIN(header)) { 866 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN; 867 } else { 868 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET; 869 base = RBTDB_RDATATYPE_EXT(header->type); 870 } 871 } else { 872 base = RBTDB_RDATATYPE_BASE(header->type); 873 } 874 875 if (STALE(header)) { 876 statattributes |= DNS_RDATASTATSTYPE_ATTR_STALE; 877 } 878 if (ANCIENT(header)) { 879 statattributes |= DNS_RDATASTATSTYPE_ATTR_ANCIENT; 880 } 881 882 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes); 883 if (increment) { 884 dns_rdatasetstats_increment(rbtdb->rrsetstats, type); 885 } else { 886 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type); 887 } 888 } 889 890 static void 891 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) { 892 int idx; 893 isc_heap_t *heap; 894 dns_ttl_t oldttl; 895 896 if (!IS_CACHE(rbtdb)) { 897 header->rdh_ttl = newttl; 898 return; 899 } 900 901 oldttl = header->rdh_ttl; 902 header->rdh_ttl = newttl; 903 904 /* 905 * It's possible the rbtdb is not a cache. If this is the case, 906 * we will not have a heap, and we move on. If we do, though, 907 * we might need to adjust things. 908 */ 909 if (header->heap_index == 0 || newttl == oldttl) { 910 return; 911 } 912 idx = header->node->locknum; 913 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL) { 914 return; 915 } 916 heap = rbtdb->heaps[idx]; 917 918 if (newttl < oldttl) { 919 isc_heap_increased(heap, header->heap_index); 920 } else { 921 isc_heap_decreased(heap, header->heap_index); 922 } 923 924 if (newttl == 0) { 925 isc_heap_delete(heap, header->heap_index); 926 } 927 } 928 929 static bool 930 prio_type(rbtdb_rdatatype_t type) { 931 switch (type) { 932 case dns_rdatatype_soa: 933 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_soa): 934 case dns_rdatatype_a: 935 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_a): 936 case dns_rdatatype_mx: 937 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_mx): 938 case dns_rdatatype_aaaa: 939 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_aaaa): 940 case dns_rdatatype_nsec: 941 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec): 942 case dns_rdatatype_nsec3: 943 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3): 944 case dns_rdatatype_ns: 945 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns): 946 case dns_rdatatype_ds: 947 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds): 948 case dns_rdatatype_cname: 949 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname): 950 case dns_rdatatype_dname: 951 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname): 952 case dns_rdatatype_svcb: 953 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_svcb): 954 case dns_rdatatype_https: 955 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_https): 956 case dns_rdatatype_dnskey: 957 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dnskey): 958 case dns_rdatatype_srv: 959 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_srv): 960 case dns_rdatatype_txt: 961 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_txt): 962 case dns_rdatatype_ptr: 963 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ptr): 964 case dns_rdatatype_naptr: 965 case RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_naptr): 966 return (true); 967 } 968 return (false); 969 } 970 971 /*% 972 * These functions allow the heap code to rank the priority of each 973 * element. It returns true if v1 happens "sooner" than v2. 974 */ 975 static bool 976 ttl_sooner(void *v1, void *v2) { 977 rdatasetheader_t *h1 = v1; 978 rdatasetheader_t *h2 = v2; 979 980 return (h1->rdh_ttl < h2->rdh_ttl); 981 } 982 983 /*% 984 * Return which RRset should be resigned sooner. If the RRsets have the 985 * same signing time, prefer the other RRset over the SOA RRset. 986 */ 987 static bool 988 resign_sooner(void *v1, void *v2) { 989 rdatasetheader_t *h1 = v1; 990 rdatasetheader_t *h2 = v2; 991 992 return (h1->resign < h2->resign || 993 (h1->resign == h2->resign && h1->resign_lsb < h2->resign_lsb) || 994 (h1->resign == h2->resign && h1->resign_lsb == h2->resign_lsb && 995 h2->type == RBTDB_RDATATYPE_SIGSOA)); 996 } 997 998 /*% 999 * This function sets the heap index into the header. 1000 */ 1001 static void 1002 set_index(void *what, unsigned int idx) { 1003 rdatasetheader_t *h = what; 1004 1005 h->heap_index = idx; 1006 } 1007 1008 /*% 1009 * Work out how many nodes can be deleted in the time between two 1010 * requests to the nameserver. Smooth the resulting number and use it 1011 * as a estimate for the number of nodes to be deleted in the next 1012 * iteration. 1013 */ 1014 static unsigned int 1015 adjust_quantum(unsigned int old, isc_time_t *start) { 1016 unsigned int pps = dns_pps; /* packets per second */ 1017 unsigned int interval; 1018 uint64_t usecs; 1019 isc_time_t end; 1020 unsigned int nodes; 1021 1022 if (pps < 100) { 1023 pps = 100; 1024 } 1025 isc_time_now(&end); 1026 1027 interval = 1000000 / pps; /* interval in usec */ 1028 if (interval == 0) { 1029 interval = 1; 1030 } 1031 usecs = isc_time_microdiff(&end, start); 1032 if (usecs == 0) { 1033 /* 1034 * We were unable to measure the amount of time taken. 1035 * Double the nodes deleted next time. 1036 */ 1037 old *= 2; 1038 if (old > 1000) { 1039 old = 1000; 1040 } 1041 return (old); 1042 } 1043 nodes = old * interval; 1044 nodes /= (unsigned int)usecs; 1045 if (nodes == 0) { 1046 nodes = 1; 1047 } else if (nodes > 1000) { 1048 nodes = 1000; 1049 } 1050 1051 /* Smooth */ 1052 nodes = (nodes + old * 3) / 4; 1053 1054 if (nodes != old) { 1055 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1056 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 1057 "adjust_quantum: old=%d, new=%d", old, nodes); 1058 } 1059 1060 return (nodes); 1061 } 1062 1063 static void 1064 free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) { 1065 unsigned int i; 1066 isc_result_t result; 1067 char buf[DNS_NAME_FORMATSIZE]; 1068 dns_rbt_t **treep; 1069 isc_time_t start; 1070 1071 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) { 1072 overmem((dns_db_t *)rbtdb, (bool)-1); 1073 } 1074 1075 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions)); 1076 REQUIRE(rbtdb->future_version == NULL); 1077 1078 if (rbtdb->current_version != NULL) { 1079 isc_refcount_decrementz(&rbtdb->current_version->references); 1080 UNLINK(rbtdb->open_versions, rbtdb->current_version, link); 1081 isc_rwlock_destroy(&rbtdb->current_version->glue_rwlock); 1082 isc_refcount_destroy(&rbtdb->current_version->references); 1083 isc_rwlock_destroy(&rbtdb->current_version->rwlock); 1084 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version, 1085 sizeof(rbtdb_version_t)); 1086 } 1087 1088 /* 1089 * We assume the number of remaining dead nodes is reasonably small; 1090 * the overhead of unlinking all nodes here should be negligible. 1091 */ 1092 for (i = 0; i < rbtdb->node_lock_count; i++) { 1093 dns_rbtnode_t *node; 1094 1095 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]); 1096 while (node != NULL) { 1097 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink); 1098 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]); 1099 } 1100 } 1101 1102 if (event == NULL) { 1103 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0; 1104 } 1105 1106 for (;;) { 1107 /* 1108 * pick the next tree to (start to) destroy 1109 */ 1110 treep = &rbtdb->tree; 1111 if (*treep == NULL) { 1112 treep = &rbtdb->nsec; 1113 if (*treep == NULL) { 1114 treep = &rbtdb->nsec3; 1115 /* 1116 * we're finished after clear cutting 1117 */ 1118 if (*treep == NULL) { 1119 break; 1120 } 1121 } 1122 } 1123 1124 isc_time_now(&start); 1125 result = dns_rbt_destroy2(treep, rbtdb->quantum); 1126 if (result == ISC_R_QUOTA) { 1127 INSIST(rbtdb->task != NULL); 1128 if (rbtdb->quantum != 0) { 1129 rbtdb->quantum = adjust_quantum(rbtdb->quantum, 1130 &start); 1131 } 1132 if (event == NULL) { 1133 event = isc_event_allocate( 1134 rbtdb->common.mctx, NULL, 1135 DNS_EVENT_FREESTORAGE, 1136 free_rbtdb_callback, rbtdb, 1137 sizeof(isc_event_t)); 1138 } 1139 isc_task_send(rbtdb->task, &event); 1140 return; 1141 } 1142 INSIST(result == ISC_R_SUCCESS && *treep == NULL); 1143 } 1144 1145 if (event != NULL) { 1146 isc_event_free(&event); 1147 } 1148 if (log) { 1149 if (dns_name_dynamic(&rbtdb->common.origin)) { 1150 dns_name_format(&rbtdb->common.origin, buf, 1151 sizeof(buf)); 1152 } else { 1153 strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 1154 } 1155 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1156 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 1157 "done free_rbtdb(%s)", buf); 1158 } 1159 if (dns_name_dynamic(&rbtdb->common.origin)) { 1160 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx); 1161 } 1162 for (i = 0; i < rbtdb->node_lock_count; i++) { 1163 isc_refcount_destroy(&rbtdb->node_locks[i].references); 1164 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock); 1165 } 1166 1167 /* 1168 * Clean up LRU / re-signing order lists. 1169 */ 1170 if (rbtdb->rdatasets != NULL) { 1171 for (i = 0; i < rbtdb->node_lock_count; i++) { 1172 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i])); 1173 } 1174 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets, 1175 rbtdb->node_lock_count * 1176 sizeof(rdatasetheaderlist_t)); 1177 } 1178 /* 1179 * Clean up dead node buckets. 1180 */ 1181 if (rbtdb->deadnodes != NULL) { 1182 for (i = 0; i < rbtdb->node_lock_count; i++) { 1183 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i])); 1184 } 1185 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes, 1186 rbtdb->node_lock_count * sizeof(rbtnodelist_t)); 1187 } 1188 /* 1189 * Clean up heap objects. 1190 */ 1191 if (rbtdb->heaps != NULL) { 1192 for (i = 0; i < rbtdb->node_lock_count; i++) { 1193 isc_heap_destroy(&rbtdb->heaps[i]); 1194 } 1195 isc_mem_put(rbtdb->hmctx, rbtdb->heaps, 1196 rbtdb->node_lock_count * sizeof(isc_heap_t *)); 1197 } 1198 1199 if (rbtdb->rrsetstats != NULL) { 1200 dns_stats_detach(&rbtdb->rrsetstats); 1201 } 1202 if (rbtdb->cachestats != NULL) { 1203 isc_stats_detach(&rbtdb->cachestats); 1204 } 1205 if (rbtdb->gluecachestats != NULL) { 1206 isc_stats_detach(&rbtdb->gluecachestats); 1207 } 1208 1209 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks, 1210 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t)); 1211 isc_rwlock_destroy(&rbtdb->tree_lock); 1212 isc_refcount_destroy(&rbtdb->references); 1213 if (rbtdb->task != NULL) { 1214 isc_task_detach(&rbtdb->task); 1215 } 1216 if (rbtdb->prunetask != NULL) { 1217 isc_task_detach(&rbtdb->prunetask); 1218 } 1219 1220 RBTDB_DESTROYLOCK(&rbtdb->lock); 1221 rbtdb->common.magic = 0; 1222 rbtdb->common.impmagic = 0; 1223 isc_mem_detach(&rbtdb->hmctx); 1224 1225 INSIST(ISC_LIST_EMPTY(rbtdb->common.update_listeners)); 1226 1227 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb)); 1228 } 1229 1230 static void 1231 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) { 1232 bool want_free = false; 1233 unsigned int i; 1234 unsigned int inactive = 0; 1235 1236 /* XXX check for open versions here */ 1237 1238 if (rbtdb->soanode != NULL) { 1239 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode); 1240 } 1241 if (rbtdb->nsnode != NULL) { 1242 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode); 1243 } 1244 1245 /* 1246 * The current version's glue table needs to be freed early 1247 * so the nodes are dereferenced before we check the active 1248 * node count below. 1249 */ 1250 if (rbtdb->current_version != NULL) { 1251 free_gluetable(rbtdb->current_version); 1252 } 1253 1254 /* 1255 * Even though there are no external direct references, there still 1256 * may be nodes in use. 1257 */ 1258 for (i = 0; i < rbtdb->node_lock_count; i++) { 1259 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write); 1260 rbtdb->node_locks[i].exiting = true; 1261 if (isc_refcount_current(&rbtdb->node_locks[i].references) == 0) 1262 { 1263 inactive++; 1264 } 1265 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write); 1266 } 1267 1268 if (inactive != 0) { 1269 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 1270 rbtdb->active -= inactive; 1271 if (rbtdb->active == 0) { 1272 want_free = true; 1273 } 1274 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 1275 if (want_free) { 1276 char buf[DNS_NAME_FORMATSIZE]; 1277 if (dns_name_dynamic(&rbtdb->common.origin)) { 1278 dns_name_format(&rbtdb->common.origin, buf, 1279 sizeof(buf)); 1280 } else { 1281 strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 1282 } 1283 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1284 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 1285 "calling free_rbtdb(%s)", buf); 1286 free_rbtdb(rbtdb, true, NULL); 1287 } 1288 } 1289 } 1290 1291 static void 1292 detach(dns_db_t **dbp) { 1293 REQUIRE(dbp != NULL && VALID_RBTDB((dns_rbtdb_t *)(*dbp))); 1294 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp); 1295 *dbp = NULL; 1296 1297 if (isc_refcount_decrement(&rbtdb->references) == 1) { 1298 maybe_free_rbtdb(rbtdb); 1299 } 1300 } 1301 1302 static void 1303 currentversion(dns_db_t *db, dns_dbversion_t **versionp) { 1304 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1305 rbtdb_version_t *version; 1306 1307 REQUIRE(VALID_RBTDB(rbtdb)); 1308 1309 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 1310 version = rbtdb->current_version; 1311 isc_refcount_increment(&version->references); 1312 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 1313 1314 *versionp = (dns_dbversion_t *)version; 1315 } 1316 1317 static rbtdb_version_t * 1318 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial, 1319 unsigned int references, bool writer) { 1320 rbtdb_version_t *version; 1321 size_t size; 1322 1323 version = isc_mem_get(mctx, sizeof(*version)); 1324 version->serial = serial; 1325 1326 isc_refcount_init(&version->references, references); 1327 isc_rwlock_init(&version->glue_rwlock, 0, 0); 1328 1329 version->glue_table_bits = RBTDB_GLUE_TABLE_INIT_BITS; 1330 version->glue_table_nodecount = 0U; 1331 1332 size = HASHSIZE(version->glue_table_bits) * 1333 sizeof(version->glue_table[0]); 1334 version->glue_table = isc_mem_get(mctx, size); 1335 memset(version->glue_table, 0, size); 1336 1337 version->writer = writer; 1338 version->commit_ok = false; 1339 ISC_LIST_INIT(version->changed_list); 1340 ISC_LIST_INIT(version->resigned_list); 1341 ISC_LINK_INIT(version, link); 1342 1343 return (version); 1344 } 1345 1346 static isc_result_t 1347 newversion(dns_db_t *db, dns_dbversion_t **versionp) { 1348 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1349 rbtdb_version_t *version; 1350 1351 REQUIRE(VALID_RBTDB(rbtdb)); 1352 REQUIRE(versionp != NULL && *versionp == NULL); 1353 REQUIRE(rbtdb->future_version == NULL); 1354 1355 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 1356 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */ 1357 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1, 1358 true); 1359 version->rbtdb = rbtdb; 1360 version->commit_ok = true; 1361 version->secure = rbtdb->current_version->secure; 1362 version->havensec3 = rbtdb->current_version->havensec3; 1363 if (version->havensec3) { 1364 version->flags = rbtdb->current_version->flags; 1365 version->iterations = rbtdb->current_version->iterations; 1366 version->hash = rbtdb->current_version->hash; 1367 version->salt_length = rbtdb->current_version->salt_length; 1368 memmove(version->salt, rbtdb->current_version->salt, 1369 version->salt_length); 1370 } else { 1371 version->flags = 0; 1372 version->iterations = 0; 1373 version->hash = 0; 1374 version->salt_length = 0; 1375 memset(version->salt, 0, sizeof(version->salt)); 1376 } 1377 isc_rwlock_init(&version->rwlock, 0, 0); 1378 RWLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read); 1379 version->records = rbtdb->current_version->records; 1380 version->xfrsize = rbtdb->current_version->xfrsize; 1381 RWUNLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read); 1382 rbtdb->next_serial++; 1383 rbtdb->future_version = version; 1384 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 1385 1386 *versionp = version; 1387 1388 return (ISC_R_SUCCESS); 1389 } 1390 1391 static void 1392 attachversion(dns_db_t *db, dns_dbversion_t *source, 1393 dns_dbversion_t **targetp) { 1394 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1395 rbtdb_version_t *rbtversion = source; 1396 1397 REQUIRE(VALID_RBTDB(rbtdb)); 1398 INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb); 1399 1400 isc_refcount_increment(&rbtversion->references); 1401 1402 *targetp = rbtversion; 1403 } 1404 1405 static rbtdb_changed_t * 1406 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version, dns_rbtnode_t *node) { 1407 rbtdb_changed_t *changed; 1408 1409 /* 1410 * Caller must be holding the node lock if its reference must be 1411 * protected by the lock. 1412 */ 1413 1414 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed)); 1415 1416 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 1417 1418 REQUIRE(version->writer); 1419 1420 if (changed != NULL) { 1421 isc_refcount_increment(&node->references); 1422 changed->node = node; 1423 changed->dirty = false; 1424 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link); 1425 } else { 1426 version->commit_ok = false; 1427 } 1428 1429 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 1430 1431 return (changed); 1432 } 1433 1434 static void 1435 free_noqname(isc_mem_t *mctx, struct noqname **noqname) { 1436 if (dns_name_dynamic(&(*noqname)->name)) { 1437 dns_name_free(&(*noqname)->name, mctx); 1438 } 1439 if ((*noqname)->neg != NULL) { 1440 isc_mem_put(mctx, (*noqname)->neg, 1441 dns_rdataslab_size((*noqname)->neg, 0)); 1442 } 1443 if ((*noqname)->negsig != NULL) { 1444 isc_mem_put(mctx, (*noqname)->negsig, 1445 dns_rdataslab_size((*noqname)->negsig, 0)); 1446 } 1447 isc_mem_put(mctx, *noqname, sizeof(**noqname)); 1448 *noqname = NULL; 1449 } 1450 1451 static void 1452 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h) { 1453 ISC_LINK_INIT(h, link); 1454 h->heap_index = 0; 1455 atomic_init(&h->attributes, 0); 1456 atomic_init(&h->last_refresh_fail_ts, 0); 1457 1458 STATIC_ASSERT((sizeof(h->attributes) == 2), 1459 "The .attributes field of rdatasetheader_t needs to be " 1460 "16-bit int type exactly."); 1461 1462 #if TRACE_HEADER 1463 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) { 1464 fprintf(stderr, "initialized header: %p\n", h); 1465 } 1466 #else /* if TRACE_HEADER */ 1467 UNUSED(rbtdb); 1468 #endif /* if TRACE_HEADER */ 1469 } 1470 1471 static void 1472 update_newheader(rdatasetheader_t *newh, rdatasetheader_t *old) { 1473 if (CASESET(old)) { 1474 uint_least16_t attr = RDATASET_ATTR_GET( 1475 old, 1476 (RDATASET_ATTR_CASESET | RDATASET_ATTR_CASEFULLYLOWER)); 1477 RDATASET_ATTR_SET(newh, attr); 1478 memmove(newh->upper, old->upper, sizeof(old->upper)); 1479 } 1480 } 1481 1482 static rdatasetheader_t * 1483 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx) { 1484 rdatasetheader_t *h; 1485 1486 h = isc_mem_get(mctx, sizeof(*h)); 1487 1488 #if TRACE_HEADER 1489 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) { 1490 fprintf(stderr, "allocated header: %p\n", h); 1491 } 1492 #endif /* if TRACE_HEADER */ 1493 memset(h->upper, 0xeb, sizeof(h->upper)); 1494 init_rdataset(rbtdb, h); 1495 h->rdh_ttl = 0; 1496 return (h); 1497 } 1498 1499 static void 1500 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset) { 1501 unsigned int size; 1502 int idx; 1503 1504 update_rrsetstats(rbtdb, rdataset->type, 1505 atomic_load_acquire(&rdataset->attributes), false); 1506 1507 idx = rdataset->node->locknum; 1508 if (ISC_LINK_LINKED(rdataset, link)) { 1509 INSIST(IS_CACHE(rbtdb)); 1510 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link); 1511 } 1512 1513 if (rdataset->heap_index != 0) { 1514 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index); 1515 } 1516 rdataset->heap_index = 0; 1517 1518 if (rdataset->noqname != NULL) { 1519 free_noqname(mctx, &rdataset->noqname); 1520 } 1521 if (rdataset->closest != NULL) { 1522 free_noqname(mctx, &rdataset->closest); 1523 } 1524 1525 if (NONEXISTENT(rdataset)) { 1526 size = sizeof(*rdataset); 1527 } else { 1528 size = dns_rdataslab_size((unsigned char *)rdataset, 1529 sizeof(*rdataset)); 1530 } 1531 1532 isc_mem_put(mctx, rdataset, size); 1533 } 1534 1535 static void 1536 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) { 1537 rdatasetheader_t *header, *dcurrent; 1538 bool make_dirty = false; 1539 1540 /* 1541 * Caller must hold the node lock. 1542 */ 1543 1544 /* 1545 * We set the IGNORE attribute on rdatasets with serial number 1546 * 'serial'. When the reference count goes to zero, these rdatasets 1547 * will be cleaned up; until that time, they will be ignored. 1548 */ 1549 for (header = node->data; header != NULL; header = header->next) { 1550 if (header->serial == serial) { 1551 RDATASET_ATTR_SET(header, RDATASET_ATTR_IGNORE); 1552 make_dirty = true; 1553 } 1554 for (dcurrent = header->down; dcurrent != NULL; 1555 dcurrent = dcurrent->down) 1556 { 1557 if (dcurrent->serial == serial) { 1558 RDATASET_ATTR_SET(dcurrent, 1559 RDATASET_ATTR_IGNORE); 1560 make_dirty = true; 1561 } 1562 } 1563 } 1564 if (make_dirty) { 1565 node->dirty = 1; 1566 } 1567 } 1568 1569 static void 1570 mark_header_ancient(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) { 1571 uint_least16_t attributes = atomic_load_acquire(&header->attributes); 1572 uint_least16_t newattributes = 0; 1573 1574 /* 1575 * If we are already ancient there is nothing to do. 1576 */ 1577 do { 1578 if ((attributes & RDATASET_ATTR_ANCIENT) != 0) { 1579 return; 1580 } 1581 newattributes = attributes | RDATASET_ATTR_ANCIENT; 1582 } while (!atomic_compare_exchange_weak_acq_rel( 1583 &header->attributes, &attributes, newattributes)); 1584 1585 /* 1586 * Decrement the stats counter for the appropriate RRtype. 1587 * If the STALE attribute is set, this will decrement the 1588 * stale type counter, otherwise it decrements the active 1589 * stats type counter. 1590 */ 1591 update_rrsetstats(rbtdb, header->type, attributes, false); 1592 header->node->dirty = 1; 1593 1594 /* Increment the stats counter for the ancient RRtype. */ 1595 update_rrsetstats(rbtdb, header->type, newattributes, true); 1596 } 1597 1598 static void 1599 mark_header_stale(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) { 1600 uint_least16_t attributes = atomic_load_acquire(&header->attributes); 1601 uint_least16_t newattributes = 0; 1602 1603 INSIST((attributes & RDATASET_ATTR_ZEROTTL) == 0); 1604 1605 /* 1606 * If we are already stale there is nothing to do. 1607 */ 1608 do { 1609 if ((attributes & RDATASET_ATTR_STALE) != 0) { 1610 return; 1611 } 1612 newattributes = attributes | RDATASET_ATTR_STALE; 1613 } while (!atomic_compare_exchange_weak_acq_rel( 1614 &header->attributes, &attributes, newattributes)); 1615 1616 /* Decrement the stats counter for the appropriate RRtype. 1617 * If the ANCIENT attribute is set (although it is very 1618 * unlikely that an RRset goes from ANCIENT to STALE), this 1619 * will decrement the ancient stale type counter, otherwise it 1620 * decrements the active stats type counter. 1621 */ 1622 1623 update_rrsetstats(rbtdb, header->type, attributes, false); 1624 update_rrsetstats(rbtdb, header->type, newattributes, true); 1625 } 1626 1627 static void 1628 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, 1629 rdatasetheader_t *top) { 1630 rdatasetheader_t *d, *down_next; 1631 1632 for (d = top->down; d != NULL; d = down_next) { 1633 down_next = d->down; 1634 free_rdataset(rbtdb, mctx, d); 1635 } 1636 top->down = NULL; 1637 } 1638 1639 static void 1640 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) { 1641 rdatasetheader_t *current, *top_prev, *top_next; 1642 isc_mem_t *mctx = rbtdb->common.mctx; 1643 1644 /* 1645 * Caller must be holding the node lock. 1646 */ 1647 1648 top_prev = NULL; 1649 for (current = node->data; current != NULL; current = top_next) { 1650 top_next = current->next; 1651 clean_stale_headers(rbtdb, mctx, current); 1652 /* 1653 * If current is nonexistent, ancient, or stale and 1654 * we are not keeping stale, we can clean it up. 1655 */ 1656 if (NONEXISTENT(current) || ANCIENT(current) || 1657 (STALE(current) && !KEEPSTALE(rbtdb))) 1658 { 1659 if (top_prev != NULL) { 1660 top_prev->next = current->next; 1661 } else { 1662 node->data = current->next; 1663 } 1664 free_rdataset(rbtdb, mctx, current); 1665 } else { 1666 top_prev = current; 1667 } 1668 } 1669 node->dirty = 0; 1670 } 1671 1672 static void 1673 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 1674 rbtdb_serial_t least_serial) { 1675 rdatasetheader_t *current, *dcurrent, *down_next, *dparent; 1676 rdatasetheader_t *top_prev, *top_next; 1677 isc_mem_t *mctx = rbtdb->common.mctx; 1678 bool still_dirty = false; 1679 1680 /* 1681 * Caller must be holding the node lock. 1682 */ 1683 REQUIRE(least_serial != 0); 1684 1685 top_prev = NULL; 1686 for (current = node->data; current != NULL; current = top_next) { 1687 top_next = current->next; 1688 1689 /* 1690 * First, we clean up any instances of multiple rdatasets 1691 * with the same serial number, or that have the IGNORE 1692 * attribute. 1693 */ 1694 dparent = current; 1695 for (dcurrent = current->down; dcurrent != NULL; 1696 dcurrent = down_next) 1697 { 1698 down_next = dcurrent->down; 1699 INSIST(dcurrent->serial <= dparent->serial); 1700 if (dcurrent->serial == dparent->serial || 1701 IGNORE(dcurrent)) 1702 { 1703 if (down_next != NULL) { 1704 down_next->next = dparent; 1705 } 1706 dparent->down = down_next; 1707 free_rdataset(rbtdb, mctx, dcurrent); 1708 } else { 1709 dparent = dcurrent; 1710 } 1711 } 1712 1713 /* 1714 * We've now eliminated all IGNORE datasets with the possible 1715 * exception of current, which we now check. 1716 */ 1717 if (IGNORE(current)) { 1718 down_next = current->down; 1719 if (down_next == NULL) { 1720 if (top_prev != NULL) { 1721 top_prev->next = current->next; 1722 } else { 1723 node->data = current->next; 1724 } 1725 free_rdataset(rbtdb, mctx, current); 1726 /* 1727 * current no longer exists, so we can 1728 * just continue with the loop. 1729 */ 1730 continue; 1731 } else { 1732 /* 1733 * Pull up current->down, making it the new 1734 * current. 1735 */ 1736 if (top_prev != NULL) { 1737 top_prev->next = down_next; 1738 } else { 1739 node->data = down_next; 1740 } 1741 down_next->next = top_next; 1742 free_rdataset(rbtdb, mctx, current); 1743 current = down_next; 1744 } 1745 } 1746 1747 /* 1748 * We now try to find the first down node less than the 1749 * least serial. 1750 */ 1751 dparent = current; 1752 for (dcurrent = current->down; dcurrent != NULL; 1753 dcurrent = down_next) 1754 { 1755 down_next = dcurrent->down; 1756 if (dcurrent->serial < least_serial) { 1757 break; 1758 } 1759 dparent = dcurrent; 1760 } 1761 1762 /* 1763 * If there is a such an rdataset, delete it and any older 1764 * versions. 1765 */ 1766 if (dcurrent != NULL) { 1767 do { 1768 down_next = dcurrent->down; 1769 INSIST(dcurrent->serial <= least_serial); 1770 free_rdataset(rbtdb, mctx, dcurrent); 1771 dcurrent = down_next; 1772 } while (dcurrent != NULL); 1773 dparent->down = NULL; 1774 } 1775 1776 /* 1777 * Note. The serial number of 'current' might be less than 1778 * least_serial too, but we cannot delete it because it is 1779 * the most recent version, unless it is a NONEXISTENT 1780 * rdataset. 1781 */ 1782 if (current->down != NULL) { 1783 still_dirty = true; 1784 top_prev = current; 1785 } else { 1786 /* 1787 * If this is a NONEXISTENT rdataset, we can delete it. 1788 */ 1789 if (NONEXISTENT(current)) { 1790 if (top_prev != NULL) { 1791 top_prev->next = current->next; 1792 } else { 1793 node->data = current->next; 1794 } 1795 free_rdataset(rbtdb, mctx, current); 1796 } else { 1797 top_prev = current; 1798 } 1799 } 1800 } 1801 if (!still_dirty) { 1802 node->dirty = 0; 1803 } 1804 } 1805 1806 /* 1807 * tree_lock(write) must be held. 1808 */ 1809 static void 1810 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) { 1811 dns_rbtnode_t *nsecnode; 1812 dns_fixedname_t fname; 1813 dns_name_t *name; 1814 isc_result_t result = ISC_R_UNEXPECTED; 1815 1816 INSIST(!ISC_LINK_LINKED(node, deadlink)); 1817 1818 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) { 1819 char printname[DNS_NAME_FORMATSIZE]; 1820 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1821 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 1822 "delete_node(): %p %s (bucket %d)", node, 1823 dns_rbt_formatnodename(node, printname, 1824 sizeof(printname)), 1825 node->locknum); 1826 } 1827 1828 switch (node->nsec) { 1829 case DNS_RBT_NSEC_NORMAL: 1830 result = dns_rbt_deletenode(rbtdb->tree, node, false); 1831 break; 1832 case DNS_RBT_NSEC_HAS_NSEC: 1833 /* 1834 * Though this may be wasteful, it has to be done before 1835 * node is deleted. 1836 */ 1837 name = dns_fixedname_initname(&fname); 1838 dns_rbt_fullnamefromnode(node, name); 1839 /* 1840 * Delete the corresponding node from the auxiliary NSEC 1841 * tree before deleting from the main tree. 1842 */ 1843 nsecnode = NULL; 1844 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode, 1845 NULL, DNS_RBTFIND_EMPTYDATA, NULL, 1846 NULL); 1847 if (result != ISC_R_SUCCESS) { 1848 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1849 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 1850 "delete_node: " 1851 "dns_rbt_findnode(nsec): %s", 1852 isc_result_totext(result)); 1853 } else { 1854 result = dns_rbt_deletenode(rbtdb->nsec, nsecnode, 1855 false); 1856 if (result != ISC_R_SUCCESS) { 1857 isc_log_write( 1858 dns_lctx, DNS_LOGCATEGORY_DATABASE, 1859 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 1860 "delete_node(): " 1861 "dns_rbt_deletenode(nsecnode): %s", 1862 isc_result_totext(result)); 1863 } 1864 } 1865 result = dns_rbt_deletenode(rbtdb->tree, node, false); 1866 break; 1867 case DNS_RBT_NSEC_NSEC: 1868 result = dns_rbt_deletenode(rbtdb->nsec, node, false); 1869 break; 1870 case DNS_RBT_NSEC_NSEC3: 1871 result = dns_rbt_deletenode(rbtdb->nsec3, node, false); 1872 break; 1873 } 1874 if (result != ISC_R_SUCCESS) { 1875 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1876 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 1877 "delete_node(): " 1878 "dns_rbt_deletenode: %s", 1879 isc_result_totext(result)); 1880 } 1881 } 1882 1883 /* 1884 * Caller must be holding the node lock. 1885 */ 1886 static void 1887 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 1888 isc_rwlocktype_t nlocktype) { 1889 if (nlocktype == isc_rwlocktype_write && 1890 ISC_LINK_LINKED(node, deadlink)) 1891 { 1892 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node, 1893 deadlink); 1894 } 1895 if (isc_refcount_increment0(&node->references) == 0) { 1896 /* this is the first reference to the node */ 1897 isc_refcount_increment0( 1898 &rbtdb->node_locks[node->locknum].references); 1899 } 1900 } 1901 1902 /*% 1903 * The tree lock must be held for the result to be valid. 1904 */ 1905 static bool 1906 is_last_node_on_its_level(dns_rbtnode_t *node) { 1907 return (node->parent != NULL && node->parent->down == node && 1908 node->left == NULL && node->right == NULL); 1909 } 1910 1911 static void 1912 send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 1913 isc_rwlocktype_t nlocktype) { 1914 isc_event_t *ev; 1915 dns_db_t *db; 1916 1917 ev = isc_event_allocate(rbtdb->common.mctx, NULL, DNS_EVENT_RBTPRUNE, 1918 prune_tree, node, sizeof(isc_event_t)); 1919 new_reference(rbtdb, node, nlocktype); 1920 db = NULL; 1921 attach((dns_db_t *)rbtdb, &db); 1922 ev->ev_sender = db; 1923 isc_task_send(rbtdb->prunetask, &ev); 1924 } 1925 1926 /*% 1927 * Clean up dead nodes. These are nodes which have no references, and 1928 * have no data. They are dead but we could not or chose not to delete 1929 * them when we deleted all the data at that node because we did not want 1930 * to wait for the tree write lock. 1931 * 1932 * The caller must hold a tree write lock and bucketnum'th node (write) lock. 1933 */ 1934 static void 1935 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) { 1936 dns_rbtnode_t *node; 1937 int count = 10; /* XXXJT: should be adjustable */ 1938 1939 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]); 1940 while (node != NULL && count > 0) { 1941 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink); 1942 1943 /* 1944 * We might have reactivated this node without a tree write 1945 * lock, so we couldn't remove this node from deadnodes then 1946 * and we have to do it now. 1947 */ 1948 if (isc_refcount_current(&node->references) != 0 || 1949 node->data != NULL) 1950 { 1951 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]); 1952 count--; 1953 continue; 1954 } 1955 1956 if (is_last_node_on_its_level(node) && rbtdb->task != NULL) { 1957 send_to_prune_tree(rbtdb, node, isc_rwlocktype_write); 1958 } else if (node->down == NULL && node->data == NULL) { 1959 /* 1960 * Not a interior node and not needing to be 1961 * reactivated. 1962 */ 1963 delete_node(rbtdb, node); 1964 } else if (node->data == NULL) { 1965 /* 1966 * A interior node without data. Leave linked to 1967 * to be cleaned up when node->down becomes NULL. 1968 */ 1969 ISC_LIST_APPEND(rbtdb->deadnodes[bucketnum], node, 1970 deadlink); 1971 } 1972 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]); 1973 count--; 1974 } 1975 } 1976 1977 /* 1978 * This function is assumed to be called when a node is newly referenced 1979 * and can be in the deadnode list. In that case the node must be retrieved 1980 * from the list because it is going to be used. In addition, if the caller 1981 * happens to hold a write lock on the tree, it's a good chance to purge dead 1982 * nodes. 1983 * Note: while a new reference is gained in multiple places, there are only very 1984 * few cases where the node can be in the deadnode list (only empty nodes can 1985 * have been added to the list). 1986 */ 1987 static void 1988 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 1989 isc_rwlocktype_t treelocktype) { 1990 isc_rwlocktype_t locktype = isc_rwlocktype_read; 1991 nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock; 1992 bool maybe_cleanup = false; 1993 1994 POST(locktype); 1995 1996 NODE_LOCK(nodelock, locktype); 1997 1998 /* 1999 * Check if we can possibly cleanup the dead node. If so, upgrade 2000 * the node lock below to perform the cleanup. 2001 */ 2002 if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) && 2003 treelocktype == isc_rwlocktype_write) 2004 { 2005 maybe_cleanup = true; 2006 } 2007 2008 if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) { 2009 /* 2010 * Upgrade the lock and test if we still need to unlink. 2011 */ 2012 NODE_UNLOCK(nodelock, locktype); 2013 locktype = isc_rwlocktype_write; 2014 POST(locktype); 2015 NODE_LOCK(nodelock, locktype); 2016 if (ISC_LINK_LINKED(node, deadlink)) { 2017 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node, 2018 deadlink); 2019 } 2020 if (maybe_cleanup) { 2021 cleanup_dead_nodes(rbtdb, node->locknum); 2022 } 2023 } 2024 2025 new_reference(rbtdb, node, locktype); 2026 2027 NODE_UNLOCK(nodelock, locktype); 2028 } 2029 2030 /* 2031 * Caller must be holding the node lock; either the "strong", read or write 2032 * lock. Note that the lock must be held even when node references are 2033 * atomically modified; in that case the decrement operation itself does not 2034 * have to be protected, but we must avoid a race condition where multiple 2035 * threads are decreasing the reference to zero simultaneously and at least 2036 * one of them is going to free the node. 2037 * 2038 * This function returns true if and only if the node reference decreases 2039 * to zero. 2040 * 2041 * NOTE: Decrementing the reference count of a node to zero does not mean it 2042 * will be immediately freed. 2043 */ 2044 static bool 2045 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 2046 rbtdb_serial_t least_serial, isc_rwlocktype_t nlock, 2047 isc_rwlocktype_t tlock, bool pruning) { 2048 isc_result_t result; 2049 bool write_locked; 2050 bool locked = tlock != isc_rwlocktype_none; 2051 rbtdb_nodelock_t *nodelock; 2052 int bucket = node->locknum; 2053 bool no_reference = true; 2054 uint_fast32_t refs; 2055 2056 nodelock = &rbtdb->node_locks[bucket]; 2057 2058 #define KEEP_NODE(n, r, l) \ 2059 ((n)->data != NULL || ((l) && (n)->down != NULL) || \ 2060 (n) == (r)->origin_node || (n) == (r)->nsec3_origin_node) 2061 2062 /* Handle easy and typical case first. */ 2063 if (!node->dirty && KEEP_NODE(node, rbtdb, locked)) { 2064 if (isc_refcount_decrement(&node->references) == 1) { 2065 refs = isc_refcount_decrement(&nodelock->references); 2066 INSIST(refs > 0); 2067 return (true); 2068 } else { 2069 return (false); 2070 } 2071 } 2072 2073 /* Upgrade the lock? */ 2074 if (nlock == isc_rwlocktype_read) { 2075 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read); 2076 NODE_LOCK(&nodelock->lock, isc_rwlocktype_write); 2077 } 2078 2079 if (isc_refcount_decrement(&node->references) > 1) { 2080 /* Restore the lock? */ 2081 if (nlock == isc_rwlocktype_read) { 2082 NODE_DOWNGRADE(&nodelock->lock); 2083 } 2084 return (false); 2085 } 2086 2087 if (node->dirty) { 2088 if (IS_CACHE(rbtdb)) { 2089 clean_cache_node(rbtdb, node); 2090 } else { 2091 if (least_serial == 0) { 2092 /* 2093 * Caller doesn't know the least serial. 2094 * Get it. 2095 */ 2096 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 2097 least_serial = rbtdb->least_serial; 2098 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 2099 } 2100 clean_zone_node(rbtdb, node, least_serial); 2101 } 2102 } 2103 2104 /* 2105 * Attempt to switch to a write lock on the tree. If this fails, 2106 * we will add this node to a linked list of nodes in this locking 2107 * bucket which we will free later. 2108 */ 2109 if (tlock != isc_rwlocktype_write) { 2110 /* 2111 * Locking hierarchy notwithstanding, we don't need to free 2112 * the node lock before acquiring the tree write lock because 2113 * we only do a trylock. 2114 */ 2115 if (tlock == isc_rwlocktype_read) { 2116 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock); 2117 } else { 2118 result = isc_rwlock_trylock(&rbtdb->tree_lock, 2119 isc_rwlocktype_write); 2120 } 2121 RUNTIME_CHECK(result == ISC_R_SUCCESS || 2122 result == ISC_R_LOCKBUSY); 2123 2124 write_locked = (result == ISC_R_SUCCESS); 2125 } else { 2126 write_locked = true; 2127 } 2128 2129 refs = isc_refcount_decrement(&nodelock->references); 2130 INSIST(refs > 0); 2131 2132 if (KEEP_NODE(node, rbtdb, locked || write_locked)) { 2133 goto restore_locks; 2134 } 2135 2136 #undef KEEP_NODE 2137 2138 if (write_locked) { 2139 /* 2140 * If this node is the only one left on its RBTDB level, 2141 * attempt pruning the RBTDB (i.e. deleting empty nodes that 2142 * are ancestors of 'node' and are not interior nodes) starting 2143 * from this node (see prune_tree()). The main reason this is 2144 * not done immediately, but asynchronously, is that the 2145 * ancestors of 'node' are almost guaranteed to belong to 2146 * different node buckets and we don't want to do juggle locks 2147 * right now. 2148 * 2149 * Since prune_tree() also calls decrement_reference(), check 2150 * the value of the 'pruning' parameter (which is only set to 2151 * 'true' in the decrement_reference() call present in 2152 * prune_tree()) to prevent an infinite loop and to allow a 2153 * node sent to prune_tree() to be deleted by the delete_node() 2154 * call in the code branch below. 2155 */ 2156 if (!pruning && is_last_node_on_its_level(node) && 2157 rbtdb->task != NULL) 2158 { 2159 send_to_prune_tree(rbtdb, node, isc_rwlocktype_write); 2160 no_reference = false; 2161 } else { 2162 /* 2163 * The node can now be deleted. 2164 */ 2165 delete_node(rbtdb, node); 2166 } 2167 } else { 2168 INSIST(node->data == NULL); 2169 if (!ISC_LINK_LINKED(node, deadlink)) { 2170 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, 2171 deadlink); 2172 } 2173 } 2174 2175 restore_locks: 2176 /* Restore the lock? */ 2177 if (nlock == isc_rwlocktype_read) { 2178 NODE_DOWNGRADE(&nodelock->lock); 2179 } 2180 2181 /* 2182 * Relock a read lock, or unlock the write lock if no lock was held. 2183 */ 2184 if (tlock == isc_rwlocktype_none) { 2185 if (write_locked) { 2186 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2187 } 2188 } 2189 2190 if (tlock == isc_rwlocktype_read) { 2191 if (write_locked) { 2192 isc_rwlock_downgrade(&rbtdb->tree_lock); 2193 } 2194 } 2195 2196 return (no_reference); 2197 } 2198 2199 /* 2200 * Prune the RBTDB tree of trees. Start by attempting to delete a node that is 2201 * the only one left on its RBTDB level (see the send_to_prune_tree() call in 2202 * decrement_reference()). Then, if the node has a parent (which can either 2203 * exist on the same RBTDB level or on an upper RBTDB level), check whether the 2204 * latter is an interior node (i.e. a node with a non-NULL 'down' pointer). If 2205 * the parent node is not an interior node, attempt deleting the parent node as 2206 * well and then move on to examining the parent node's parent, etc. Continue 2207 * traversing the RBTDB tree until a node is encountered that is still an 2208 * interior node after the previously-processed node gets deleted. 2209 * 2210 * It is acceptable for a node sent to this function to NOT be deleted in the 2211 * process (e.g. if it gets reactivated in the meantime). Furthermore, node 2212 * deletion is not a prerequisite for continuing RBTDB traversal. 2213 * 2214 * This function gets called once for every "starting node" and it continues 2215 * traversing the RBTDB until the stop condition is met. In the worst case, 2216 * the number of nodes processed by a single execution of this function is the 2217 * number of tree levels, which is at most the maximum number of domain name 2218 * labels (127); however, it should be much smaller in practice and deleting 2219 * empty RBTDB nodes is critical to keeping the amount of memory used by the 2220 * cache memory context within the configured limit anyway. 2221 */ 2222 static void 2223 prune_tree(isc_task_t *task, isc_event_t *event) { 2224 dns_rbtdb_t *rbtdb = event->ev_sender; 2225 dns_rbtnode_t *node = event->ev_arg; 2226 dns_rbtnode_t *parent = NULL; 2227 unsigned int locknum = node->locknum; 2228 2229 UNUSED(task); 2230 2231 isc_event_free(&event); 2232 2233 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2234 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); 2235 do { 2236 parent = node->parent; 2237 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write, 2238 isc_rwlocktype_write, true); 2239 2240 /* 2241 * Check whether the parent is an interior node. Note that it 2242 * might have been one before the decrement_reference() call on 2243 * the previous line, but decrementing the reference count for 2244 * 'node' could have caused 'node->parent->down' to become 2245 * NULL. 2246 */ 2247 if (parent != NULL && parent->down == NULL) { 2248 /* 2249 * Keep the node lock if possible; otherwise, release 2250 * the old lock and acquire one for the parent. 2251 */ 2252 if (parent->locknum != locknum) { 2253 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, 2254 isc_rwlocktype_write); 2255 locknum = parent->locknum; 2256 NODE_LOCK(&rbtdb->node_locks[locknum].lock, 2257 isc_rwlocktype_write); 2258 } 2259 2260 /* 2261 * We need to gain a reference to the parent node 2262 * before decrementing it in the next iteration. 2263 */ 2264 new_reference(rbtdb, parent, isc_rwlocktype_write); 2265 } else { 2266 parent = NULL; 2267 } 2268 2269 node = parent; 2270 } while (node != NULL); 2271 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); 2272 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2273 2274 detach((dns_db_t **)(void *)&rbtdb); 2275 } 2276 2277 static void 2278 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version, 2279 rbtdb_changedlist_t *cleanup_list) { 2280 /* 2281 * Caller must be holding the database lock. 2282 */ 2283 2284 rbtdb->least_serial = version->serial; 2285 *cleanup_list = version->changed_list; 2286 ISC_LIST_INIT(version->changed_list); 2287 } 2288 2289 static void 2290 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) { 2291 rbtdb_changed_t *changed, *next_changed; 2292 2293 /* 2294 * If the changed record is dirty, then 2295 * an update created multiple versions of 2296 * a given rdataset. We keep this list 2297 * until we're the least open version, at 2298 * which point it's safe to get rid of any 2299 * older versions. 2300 * 2301 * If the changed record isn't dirty, then 2302 * we don't need it anymore since we're 2303 * committing and not rolling back. 2304 * 2305 * The caller must be holding the database lock. 2306 */ 2307 for (changed = HEAD(version->changed_list); changed != NULL; 2308 changed = next_changed) 2309 { 2310 next_changed = NEXT(changed, link); 2311 if (!changed->dirty) { 2312 UNLINK(version->changed_list, changed, link); 2313 APPEND(*cleanup_list, changed, link); 2314 } 2315 } 2316 } 2317 2318 static void 2319 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) { 2320 dns_rdataset_t keyset; 2321 dns_rdataset_t nsecset, signsecset; 2322 bool haszonekey = false; 2323 bool hasnsec = false; 2324 isc_result_t result; 2325 2326 REQUIRE(version != NULL); 2327 2328 dns_rdataset_init(&keyset); 2329 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey, 2330 0, 0, &keyset, NULL); 2331 if (result == ISC_R_SUCCESS) { 2332 result = dns_rdataset_first(&keyset); 2333 while (result == ISC_R_SUCCESS) { 2334 dns_rdata_t keyrdata = DNS_RDATA_INIT; 2335 dns_rdataset_current(&keyset, &keyrdata); 2336 if (dns_zonekey_iszonekey(&keyrdata)) { 2337 haszonekey = true; 2338 break; 2339 } 2340 result = dns_rdataset_next(&keyset); 2341 } 2342 dns_rdataset_disassociate(&keyset); 2343 } 2344 if (!haszonekey) { 2345 version->secure = dns_db_insecure; 2346 version->havensec3 = false; 2347 return; 2348 } 2349 2350 dns_rdataset_init(&nsecset); 2351 dns_rdataset_init(&signsecset); 2352 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec, 0, 2353 0, &nsecset, &signsecset); 2354 if (result == ISC_R_SUCCESS) { 2355 if (dns_rdataset_isassociated(&signsecset)) { 2356 hasnsec = true; 2357 dns_rdataset_disassociate(&signsecset); 2358 } 2359 dns_rdataset_disassociate(&nsecset); 2360 } 2361 2362 setnsec3parameters(db, version); 2363 2364 /* 2365 * Do we have a valid NSEC/NSEC3 chain? 2366 */ 2367 if (version->havensec3 || hasnsec) { 2368 version->secure = dns_db_secure; 2369 } else { 2370 version->secure = dns_db_insecure; 2371 } 2372 } 2373 2374 /*%< 2375 * Walk the origin node looking for NSEC3PARAM records. 2376 * Cache the nsec3 parameters. 2377 */ 2378 static void 2379 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) { 2380 dns_rbtnode_t *node; 2381 dns_rdata_nsec3param_t nsec3param; 2382 dns_rdata_t rdata = DNS_RDATA_INIT; 2383 isc_region_t region; 2384 isc_result_t result; 2385 rdatasetheader_t *header, *header_next; 2386 unsigned char *raw; /* RDATASLAB */ 2387 unsigned int count, length; 2388 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 2389 2390 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 2391 version->havensec3 = false; 2392 node = rbtdb->origin_node; 2393 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock), 2394 isc_rwlocktype_read); 2395 for (header = node->data; header != NULL; header = header_next) { 2396 header_next = header->next; 2397 do { 2398 if (header->serial <= version->serial && 2399 !IGNORE(header)) 2400 { 2401 if (NONEXISTENT(header)) { 2402 header = NULL; 2403 } 2404 break; 2405 } else { 2406 header = header->down; 2407 } 2408 } while (header != NULL); 2409 2410 if (header != NULL && 2411 (header->type == dns_rdatatype_nsec3param)) 2412 { 2413 /* 2414 * Find A NSEC3PARAM with a supported algorithm. 2415 */ 2416 raw = (unsigned char *)header + sizeof(*header); 2417 count = raw[0] * 256 + raw[1]; /* count */ 2418 raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH; 2419 while (count-- > 0U) { 2420 length = raw[0] * 256 + raw[1]; 2421 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; 2422 region.base = raw; 2423 region.length = length; 2424 raw += length; 2425 dns_rdata_fromregion( 2426 &rdata, rbtdb->common.rdclass, 2427 dns_rdatatype_nsec3param, ®ion); 2428 result = dns_rdata_tostruct(&rdata, &nsec3param, 2429 NULL); 2430 INSIST(result == ISC_R_SUCCESS); 2431 dns_rdata_reset(&rdata); 2432 2433 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG && 2434 !dns_nsec3_supportedhash(nsec3param.hash)) 2435 { 2436 continue; 2437 } 2438 2439 if (nsec3param.flags != 0) { 2440 continue; 2441 } 2442 2443 memmove(version->salt, nsec3param.salt, 2444 nsec3param.salt_length); 2445 version->hash = nsec3param.hash; 2446 version->salt_length = nsec3param.salt_length; 2447 version->iterations = nsec3param.iterations; 2448 version->flags = nsec3param.flags; 2449 version->havensec3 = true; 2450 /* 2451 * Look for a better algorithm than the 2452 * unknown test algorithm. 2453 */ 2454 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG) { 2455 goto unlock; 2456 } 2457 } 2458 } 2459 } 2460 unlock: 2461 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), 2462 isc_rwlocktype_read); 2463 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 2464 } 2465 2466 static void 2467 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) { 2468 dns_rbtdb_t *rbtdb = event->ev_arg; 2469 bool again = false; 2470 unsigned int locknum; 2471 2472 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2473 for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) { 2474 NODE_LOCK(&rbtdb->node_locks[locknum].lock, 2475 isc_rwlocktype_write); 2476 cleanup_dead_nodes(rbtdb, locknum); 2477 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL) { 2478 again = true; 2479 } 2480 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, 2481 isc_rwlocktype_write); 2482 } 2483 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2484 if (again) { 2485 isc_task_send(task, &event); 2486 } else { 2487 isc_event_free(&event); 2488 if (isc_refcount_decrement(&rbtdb->references) == 1) { 2489 (void)isc_refcount_current(&rbtdb->references); 2490 maybe_free_rbtdb(rbtdb); 2491 } 2492 } 2493 } 2494 2495 static void 2496 closeversion(dns_db_t *db, dns_dbversion_t **versionp, bool commit) { 2497 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 2498 rbtdb_version_t *version, *cleanup_version, *least_greater; 2499 bool rollback = false; 2500 rbtdb_changedlist_t cleanup_list; 2501 rdatasetheaderlist_t resigned_list; 2502 rbtdb_changed_t *changed, *next_changed; 2503 rbtdb_serial_t serial, least_serial; 2504 dns_rbtnode_t *rbtnode; 2505 rdatasetheader_t *header; 2506 2507 REQUIRE(VALID_RBTDB(rbtdb)); 2508 version = (rbtdb_version_t *)*versionp; 2509 INSIST(version->rbtdb == rbtdb); 2510 2511 cleanup_version = NULL; 2512 ISC_LIST_INIT(cleanup_list); 2513 ISC_LIST_INIT(resigned_list); 2514 2515 if (isc_refcount_decrement(&version->references) > 1) { 2516 /* typical and easy case first */ 2517 if (commit) { 2518 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 2519 INSIST(!version->writer); 2520 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 2521 } 2522 goto end; 2523 } 2524 2525 /* 2526 * Update the zone's secure status in version before making 2527 * it the current version. 2528 */ 2529 if (version->writer && commit && !IS_CACHE(rbtdb)) { 2530 iszonesecure(db, version, rbtdb->origin_node); 2531 } 2532 2533 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 2534 serial = version->serial; 2535 if (version->writer) { 2536 if (commit) { 2537 unsigned cur_ref; 2538 rbtdb_version_t *cur_version; 2539 2540 INSIST(version->commit_ok); 2541 INSIST(version == rbtdb->future_version); 2542 /* 2543 * The current version is going to be replaced. 2544 * Release the (likely last) reference to it from the 2545 * DB itself and unlink it from the open list. 2546 */ 2547 cur_version = rbtdb->current_version; 2548 cur_ref = isc_refcount_decrement( 2549 &cur_version->references); 2550 if (cur_ref == 1) { 2551 (void)isc_refcount_current( 2552 &cur_version->references); 2553 if (cur_version->serial == rbtdb->least_serial) 2554 { 2555 INSIST(EMPTY( 2556 cur_version->changed_list)); 2557 } 2558 UNLINK(rbtdb->open_versions, cur_version, link); 2559 } 2560 if (EMPTY(rbtdb->open_versions)) { 2561 /* 2562 * We're going to become the least open 2563 * version. 2564 */ 2565 make_least_version(rbtdb, version, 2566 &cleanup_list); 2567 } else { 2568 /* 2569 * Some other open version is the 2570 * least version. We can't cleanup 2571 * records that were changed in this 2572 * version because the older versions 2573 * may still be in use by an open 2574 * version. 2575 * 2576 * We can, however, discard the 2577 * changed records for things that 2578 * we've added that didn't exist in 2579 * prior versions. 2580 */ 2581 cleanup_nondirty(version, &cleanup_list); 2582 } 2583 /* 2584 * If the (soon to be former) current version 2585 * isn't being used by anyone, we can clean 2586 * it up. 2587 */ 2588 if (cur_ref == 1) { 2589 cleanup_version = cur_version; 2590 APPENDLIST(version->changed_list, 2591 cleanup_version->changed_list, link); 2592 } 2593 /* 2594 * Become the current version. 2595 */ 2596 version->writer = false; 2597 rbtdb->current_version = version; 2598 rbtdb->current_serial = version->serial; 2599 rbtdb->future_version = NULL; 2600 2601 /* 2602 * Keep the current version in the open list, and 2603 * gain a reference for the DB itself (see the DB 2604 * creation function below). This must be the only 2605 * case where we need to increment the counter from 2606 * zero and need to use isc_refcount_increment0(). 2607 */ 2608 INSIST(isc_refcount_increment0(&version->references) == 2609 0); 2610 PREPEND(rbtdb->open_versions, rbtdb->current_version, 2611 link); 2612 resigned_list = version->resigned_list; 2613 ISC_LIST_INIT(version->resigned_list); 2614 } else { 2615 /* 2616 * We're rolling back this transaction. 2617 */ 2618 cleanup_list = version->changed_list; 2619 ISC_LIST_INIT(version->changed_list); 2620 resigned_list = version->resigned_list; 2621 ISC_LIST_INIT(version->resigned_list); 2622 rollback = true; 2623 cleanup_version = version; 2624 rbtdb->future_version = NULL; 2625 } 2626 } else { 2627 if (version != rbtdb->current_version) { 2628 /* 2629 * There are no external or internal references 2630 * to this version and it can be cleaned up. 2631 */ 2632 cleanup_version = version; 2633 2634 /* 2635 * Find the version with the least serial 2636 * number greater than ours. 2637 */ 2638 least_greater = PREV(version, link); 2639 if (least_greater == NULL) { 2640 least_greater = rbtdb->current_version; 2641 } 2642 2643 INSIST(version->serial < least_greater->serial); 2644 /* 2645 * Is this the least open version? 2646 */ 2647 if (version->serial == rbtdb->least_serial) { 2648 /* 2649 * Yes. Install the new least open 2650 * version. 2651 */ 2652 make_least_version(rbtdb, least_greater, 2653 &cleanup_list); 2654 } else { 2655 /* 2656 * Add any unexecuted cleanups to 2657 * those of the least greater version. 2658 */ 2659 APPENDLIST(least_greater->changed_list, 2660 version->changed_list, link); 2661 } 2662 } else if (version->serial == rbtdb->least_serial) { 2663 INSIST(EMPTY(version->changed_list)); 2664 } 2665 UNLINK(rbtdb->open_versions, version, link); 2666 } 2667 least_serial = rbtdb->least_serial; 2668 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 2669 2670 if (cleanup_version != NULL) { 2671 INSIST(EMPTY(cleanup_version->changed_list)); 2672 free_gluetable(cleanup_version); 2673 isc_rwlock_destroy(&cleanup_version->glue_rwlock); 2674 isc_rwlock_destroy(&cleanup_version->rwlock); 2675 isc_mem_put(rbtdb->common.mctx, cleanup_version, 2676 sizeof(*cleanup_version)); 2677 } 2678 2679 /* 2680 * Commit/rollback re-signed headers. 2681 */ 2682 for (header = HEAD(resigned_list); header != NULL; 2683 header = HEAD(resigned_list)) 2684 { 2685 nodelock_t *lock; 2686 2687 ISC_LIST_UNLINK(resigned_list, header, link); 2688 2689 lock = &rbtdb->node_locks[header->node->locknum].lock; 2690 NODE_LOCK(lock, isc_rwlocktype_write); 2691 if (rollback && !IGNORE(header)) { 2692 resign_insert(rbtdb, header->node->locknum, header); 2693 } 2694 decrement_reference(rbtdb, header->node, least_serial, 2695 isc_rwlocktype_write, isc_rwlocktype_none, 2696 false); 2697 NODE_UNLOCK(lock, isc_rwlocktype_write); 2698 } 2699 2700 if (!EMPTY(cleanup_list)) { 2701 isc_event_t *event = NULL; 2702 isc_rwlocktype_t tlock = isc_rwlocktype_none; 2703 2704 if (rbtdb->task != NULL) { 2705 event = isc_event_allocate(rbtdb->common.mctx, NULL, 2706 DNS_EVENT_RBTDEADNODES, 2707 cleanup_dead_nodes_callback, 2708 rbtdb, sizeof(isc_event_t)); 2709 } 2710 if (event == NULL) { 2711 /* 2712 * We acquire a tree write lock here in order to make 2713 * sure that stale nodes will be removed in 2714 * decrement_reference(). If we didn't have the lock, 2715 * those nodes could miss the chance to be removed 2716 * until the server stops. The write lock is 2717 * expensive, but this event should be rare enough 2718 * to justify the cost. 2719 */ 2720 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2721 tlock = isc_rwlocktype_write; 2722 } 2723 2724 for (changed = HEAD(cleanup_list); changed != NULL; 2725 changed = next_changed) 2726 { 2727 nodelock_t *lock; 2728 2729 next_changed = NEXT(changed, link); 2730 rbtnode = changed->node; 2731 lock = &rbtdb->node_locks[rbtnode->locknum].lock; 2732 2733 NODE_LOCK(lock, isc_rwlocktype_write); 2734 /* 2735 * This is a good opportunity to purge any dead nodes, 2736 * so use it. 2737 */ 2738 if (event == NULL) { 2739 cleanup_dead_nodes(rbtdb, rbtnode->locknum); 2740 } 2741 2742 if (rollback) { 2743 rollback_node(rbtnode, serial); 2744 } 2745 decrement_reference(rbtdb, rbtnode, least_serial, 2746 isc_rwlocktype_write, tlock, false); 2747 2748 NODE_UNLOCK(lock, isc_rwlocktype_write); 2749 2750 isc_mem_put(rbtdb->common.mctx, changed, 2751 sizeof(*changed)); 2752 } 2753 if (event != NULL) { 2754 isc_refcount_increment(&rbtdb->references); 2755 isc_task_send(rbtdb->task, &event); 2756 } else { 2757 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2758 } 2759 } 2760 2761 end: 2762 *versionp = NULL; 2763 } 2764 2765 /* 2766 * Add the necessary magic for the wildcard name 'name' 2767 * to be found in 'rbtdb'. 2768 * 2769 * In order for wildcard matching to work correctly in 2770 * zone_find(), we must ensure that a node for the wildcarding 2771 * level exists in the database, and has its 'find_callback' 2772 * and 'wild' bits set. 2773 * 2774 * E.g. if the wildcard name is "*.sub.example." then we 2775 * must ensure that "sub.example." exists and is marked as 2776 * a wildcard level. 2777 * 2778 * tree_lock(write) must be held. 2779 */ 2780 static isc_result_t 2781 add_wildcard_magic(dns_rbtdb_t *rbtdb, const dns_name_t *name, bool lock) { 2782 isc_result_t result; 2783 dns_name_t foundname; 2784 dns_offsets_t offsets; 2785 unsigned int n; 2786 dns_rbtnode_t *node = NULL; 2787 2788 dns_name_init(&foundname, offsets); 2789 n = dns_name_countlabels(name); 2790 INSIST(n >= 2); 2791 n--; 2792 dns_name_getlabelsequence(name, 1, n, &foundname); 2793 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node); 2794 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) { 2795 return (result); 2796 } 2797 if (result == ISC_R_SUCCESS) { 2798 node->nsec = DNS_RBT_NSEC_NORMAL; 2799 } 2800 node->find_callback = 1; 2801 if (lock) { 2802 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock, 2803 isc_rwlocktype_write); 2804 } 2805 node->wild = 1; 2806 if (lock) { 2807 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock, 2808 isc_rwlocktype_write); 2809 } 2810 return (ISC_R_SUCCESS); 2811 } 2812 2813 /* 2814 * tree_lock(write) must be held. 2815 */ 2816 static isc_result_t 2817 add_empty_wildcards(dns_rbtdb_t *rbtdb, const dns_name_t *name, bool lock) { 2818 isc_result_t result; 2819 dns_name_t foundname; 2820 dns_offsets_t offsets; 2821 unsigned int n, l, i; 2822 2823 dns_name_init(&foundname, offsets); 2824 n = dns_name_countlabels(name); 2825 l = dns_name_countlabels(&rbtdb->common.origin); 2826 i = l + 1; 2827 while (i < n) { 2828 dns_rbtnode_t *node = NULL; /* dummy */ 2829 dns_name_getlabelsequence(name, n - i, i, &foundname); 2830 if (dns_name_iswildcard(&foundname)) { 2831 result = add_wildcard_magic(rbtdb, &foundname, lock); 2832 if (result != ISC_R_SUCCESS) { 2833 return (result); 2834 } 2835 result = dns_rbt_addnode(rbtdb->tree, &foundname, 2836 &node); 2837 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) { 2838 return (result); 2839 } 2840 if (result == ISC_R_SUCCESS) { 2841 node->nsec = DNS_RBT_NSEC_NORMAL; 2842 } 2843 } 2844 i++; 2845 } 2846 return (ISC_R_SUCCESS); 2847 } 2848 2849 static isc_result_t 2850 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, const dns_name_t *name, 2851 bool create, dns_dbnode_t **nodep) { 2852 dns_rbtnode_t *node = NULL; 2853 dns_name_t nodename; 2854 isc_result_t result; 2855 isc_rwlocktype_t locktype = isc_rwlocktype_read; 2856 2857 INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3); 2858 2859 dns_name_init(&nodename, NULL); 2860 RWLOCK(&rbtdb->tree_lock, locktype); 2861 result = dns_rbt_findnode(tree, name, NULL, &node, NULL, 2862 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 2863 if (result != ISC_R_SUCCESS) { 2864 RWUNLOCK(&rbtdb->tree_lock, locktype); 2865 if (!create) { 2866 if (result == DNS_R_PARTIALMATCH) { 2867 result = ISC_R_NOTFOUND; 2868 } 2869 return (result); 2870 } 2871 /* 2872 * It would be nice to try to upgrade the lock instead of 2873 * unlocking then relocking. 2874 */ 2875 locktype = isc_rwlocktype_write; 2876 RWLOCK(&rbtdb->tree_lock, locktype); 2877 node = NULL; 2878 result = dns_rbt_addnode(tree, name, &node); 2879 if (result == ISC_R_SUCCESS) { 2880 dns_rbt_namefromnode(node, &nodename); 2881 node->locknum = node->hashval % rbtdb->node_lock_count; 2882 if (tree == rbtdb->tree) { 2883 add_empty_wildcards(rbtdb, name, true); 2884 2885 if (dns_name_iswildcard(name)) { 2886 result = add_wildcard_magic(rbtdb, name, 2887 true); 2888 if (result != ISC_R_SUCCESS) { 2889 RWUNLOCK(&rbtdb->tree_lock, 2890 locktype); 2891 return (result); 2892 } 2893 } 2894 } 2895 if (tree == rbtdb->nsec3) { 2896 node->nsec = DNS_RBT_NSEC_NSEC3; 2897 } 2898 } else if (result != ISC_R_EXISTS) { 2899 RWUNLOCK(&rbtdb->tree_lock, locktype); 2900 return (result); 2901 } 2902 } 2903 2904 if (tree == rbtdb->nsec3) { 2905 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3); 2906 } 2907 2908 reactivate_node(rbtdb, node, locktype); 2909 2910 RWUNLOCK(&rbtdb->tree_lock, locktype); 2911 2912 *nodep = (dns_dbnode_t *)node; 2913 2914 return (ISC_R_SUCCESS); 2915 } 2916 2917 static isc_result_t 2918 findnode(dns_db_t *db, const dns_name_t *name, bool create, 2919 dns_dbnode_t **nodep) { 2920 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 2921 2922 REQUIRE(VALID_RBTDB(rbtdb)); 2923 2924 return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep)); 2925 } 2926 2927 static isc_result_t 2928 findnsec3node(dns_db_t *db, const dns_name_t *name, bool create, 2929 dns_dbnode_t **nodep) { 2930 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 2931 2932 REQUIRE(VALID_RBTDB(rbtdb)); 2933 2934 return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep)); 2935 } 2936 2937 static isc_result_t 2938 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) { 2939 rbtdb_search_t *search = arg; 2940 rdatasetheader_t *header, *header_next; 2941 rdatasetheader_t *dname_header, *sigdname_header, *ns_header; 2942 rdatasetheader_t *found; 2943 isc_result_t result; 2944 dns_rbtnode_t *onode; 2945 2946 /* 2947 * We only want to remember the topmost zone cut, since it's the one 2948 * that counts, so we'll just continue if we've already found a 2949 * zonecut. 2950 */ 2951 if (search->zonecut != NULL) { 2952 return (DNS_R_CONTINUE); 2953 } 2954 2955 found = NULL; 2956 result = DNS_R_CONTINUE; 2957 onode = search->rbtdb->origin_node; 2958 2959 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock), 2960 isc_rwlocktype_read); 2961 2962 /* 2963 * Look for an NS or DNAME rdataset active in our version. 2964 */ 2965 ns_header = NULL; 2966 dname_header = NULL; 2967 sigdname_header = NULL; 2968 for (header = node->data; header != NULL; header = header_next) { 2969 header_next = header->next; 2970 if (header->type == dns_rdatatype_ns || 2971 header->type == dns_rdatatype_dname || 2972 header->type == RBTDB_RDATATYPE_SIGDNAME) 2973 { 2974 do { 2975 if (header->serial <= search->serial && 2976 !IGNORE(header)) 2977 { 2978 /* 2979 * Is this a "this rdataset doesn't 2980 * exist" record? 2981 */ 2982 if (NONEXISTENT(header)) { 2983 header = NULL; 2984 } 2985 break; 2986 } else { 2987 header = header->down; 2988 } 2989 } while (header != NULL); 2990 if (header != NULL) { 2991 if (header->type == dns_rdatatype_dname) { 2992 dname_header = header; 2993 } else if (header->type == 2994 RBTDB_RDATATYPE_SIGDNAME) 2995 { 2996 sigdname_header = header; 2997 } else if (node != onode || 2998 IS_STUB(search->rbtdb)) 2999 { 3000 /* 3001 * We've found an NS rdataset that 3002 * isn't at the origin node. We check 3003 * that they're not at the origin node, 3004 * because otherwise we'd erroneously 3005 * treat the zone top as if it were 3006 * a delegation. 3007 */ 3008 ns_header = header; 3009 } 3010 } 3011 } 3012 } 3013 3014 /* 3015 * Did we find anything? 3016 */ 3017 if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) && 3018 ns_header != NULL) 3019 { 3020 /* 3021 * Note that NS has precedence over DNAME if both exist 3022 * in a zone. Otherwise DNAME take precedence over NS. 3023 */ 3024 found = ns_header; 3025 search->zonecut_sigrdataset = NULL; 3026 } else if (dname_header != NULL) { 3027 found = dname_header; 3028 search->zonecut_sigrdataset = sigdname_header; 3029 } else if (ns_header != NULL) { 3030 found = ns_header; 3031 search->zonecut_sigrdataset = NULL; 3032 } 3033 3034 if (found != NULL) { 3035 /* 3036 * We increment the reference count on node to ensure that 3037 * search->zonecut_rdataset will still be valid later. 3038 */ 3039 new_reference(search->rbtdb, node, isc_rwlocktype_read); 3040 search->zonecut = node; 3041 search->zonecut_rdataset = found; 3042 search->need_cleanup = true; 3043 /* 3044 * Since we've found a zonecut, anything beneath it is 3045 * glue and is not subject to wildcard matching, so we 3046 * may clear search->wild. 3047 */ 3048 search->wild = false; 3049 if ((search->options & DNS_DBFIND_GLUEOK) == 0) { 3050 /* 3051 * If the caller does not want to find glue, then 3052 * this is the best answer and the search should 3053 * stop now. 3054 */ 3055 result = DNS_R_PARTIALMATCH; 3056 } else { 3057 dns_name_t *zcname; 3058 3059 /* 3060 * The search will continue beneath the zone cut. 3061 * This may or may not be the best match. In case it 3062 * is, we need to remember the node name. 3063 */ 3064 zcname = dns_fixedname_name(&search->zonecut_name); 3065 dns_name_copy(name, zcname); 3066 search->copy_name = true; 3067 } 3068 } else { 3069 /* 3070 * There is no zonecut at this node which is active in this 3071 * version. 3072 * 3073 * If this is a "wild" node and the caller hasn't disabled 3074 * wildcard matching, remember that we've seen a wild node 3075 * in case we need to go searching for wildcard matches 3076 * later on. 3077 */ 3078 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0) { 3079 search->wild = true; 3080 } 3081 } 3082 3083 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), 3084 isc_rwlocktype_read); 3085 3086 return (result); 3087 } 3088 3089 static void 3090 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, rdatasetheader_t *header, 3091 isc_stdtime_t now, isc_rwlocktype_t locktype, 3092 dns_rdataset_t *rdataset) { 3093 unsigned char *raw; /* RDATASLAB */ 3094 bool stale = STALE(header); 3095 bool ancient = ANCIENT(header); 3096 3097 /* 3098 * Caller must be holding the node reader lock. 3099 * XXXJT: technically, we need a writer lock, since we'll increment 3100 * the header count below. However, since the actual counter value 3101 * doesn't matter, we prioritize performance here. (We may want to 3102 * use atomic increment when available). 3103 */ 3104 3105 if (rdataset == NULL) { 3106 return; 3107 } 3108 3109 new_reference(rbtdb, node, locktype); 3110 3111 INSIST(rdataset->methods == NULL); /* We must be disassociated. */ 3112 3113 /* 3114 * Mark header stale or ancient if the RRset is no longer active. 3115 */ 3116 if (!ACTIVE(header, now)) { 3117 dns_ttl_t stale_ttl = header->rdh_ttl + 3118 STALE_TTL(header, rbtdb); 3119 /* 3120 * If this data is in the stale window keep it and if 3121 * DNS_DBFIND_STALEOK is not set we tell the caller to 3122 * skip this record. We skip the records with ZEROTTL 3123 * (these records should not be cached anyway). 3124 */ 3125 3126 if (KEEPSTALE(rbtdb) && stale_ttl > now) { 3127 stale = true; 3128 } else { 3129 /* 3130 * We are not keeping stale, or it is outside the 3131 * stale window. Mark ancient, i.e. ready for cleanup. 3132 */ 3133 ancient = true; 3134 } 3135 } 3136 3137 rdataset->methods = &rdataset_methods; 3138 rdataset->rdclass = rbtdb->common.rdclass; 3139 rdataset->type = RBTDB_RDATATYPE_BASE(header->type); 3140 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type); 3141 rdataset->ttl = header->rdh_ttl - now; 3142 rdataset->trust = header->trust; 3143 3144 if (NEGATIVE(header)) { 3145 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE; 3146 } 3147 if (NXDOMAIN(header)) { 3148 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN; 3149 } 3150 if (OPTOUT(header)) { 3151 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT; 3152 } 3153 if (PREFETCH(header)) { 3154 rdataset->attributes |= DNS_RDATASETATTR_PREFETCH; 3155 } 3156 3157 if (stale && !ancient) { 3158 dns_ttl_t stale_ttl = header->rdh_ttl + 3159 STALE_TTL(header, rbtdb); 3160 if (stale_ttl > now) { 3161 rdataset->ttl = stale_ttl - now; 3162 } else { 3163 rdataset->ttl = 0; 3164 } 3165 if (STALE_WINDOW(header)) { 3166 rdataset->attributes |= DNS_RDATASETATTR_STALE_WINDOW; 3167 } 3168 rdataset->attributes |= DNS_RDATASETATTR_STALE; 3169 } else if (IS_CACHE(rbtdb) && !ACTIVE(header, now)) { 3170 rdataset->attributes |= DNS_RDATASETATTR_ANCIENT; 3171 rdataset->ttl = header->rdh_ttl; 3172 } 3173 3174 rdataset->private1 = rbtdb; 3175 rdataset->private2 = node; 3176 raw = (unsigned char *)header + sizeof(*header); 3177 rdataset->private3 = raw; 3178 rdataset->count = atomic_fetch_add_relaxed(&header->count, 1); 3179 if (rdataset->count == UINT32_MAX) { 3180 rdataset->count = 0; 3181 } 3182 3183 /* 3184 * Reset iterator state. 3185 */ 3186 rdataset->privateuint4 = 0; 3187 rdataset->private5 = NULL; 3188 3189 /* 3190 * Add noqname proof. 3191 */ 3192 rdataset->private6 = header->noqname; 3193 if (rdataset->private6 != NULL) { 3194 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME; 3195 } 3196 rdataset->private7 = header->closest; 3197 if (rdataset->private7 != NULL) { 3198 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST; 3199 } 3200 3201 /* 3202 * Copy out re-signing information. 3203 */ 3204 if (RESIGN(header)) { 3205 rdataset->attributes |= DNS_RDATASETATTR_RESIGN; 3206 rdataset->resign = (header->resign << 1) | header->resign_lsb; 3207 } else { 3208 rdataset->resign = 0; 3209 } 3210 } 3211 3212 static isc_result_t 3213 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep, 3214 dns_name_t *foundname, dns_rdataset_t *rdataset, 3215 dns_rdataset_t *sigrdataset) { 3216 dns_name_t *zcname; 3217 rbtdb_rdatatype_t type; 3218 dns_rbtnode_t *node; 3219 3220 REQUIRE(search != NULL); 3221 REQUIRE(search->zonecut != NULL); 3222 REQUIRE(search->zonecut_rdataset != NULL); 3223 3224 /* 3225 * The caller MUST NOT be holding any node locks. 3226 */ 3227 3228 node = search->zonecut; 3229 type = search->zonecut_rdataset->type; 3230 3231 /* 3232 * If we have to set foundname, we do it before anything else. 3233 * If we were to set foundname after we had set nodep or bound the 3234 * rdataset, then we'd have to undo that work if dns_name_copy() 3235 * failed. By setting foundname first, there's nothing to undo if 3236 * we have trouble. 3237 */ 3238 if (foundname != NULL && search->copy_name) { 3239 zcname = dns_fixedname_name(&search->zonecut_name); 3240 dns_name_copy(zcname, foundname); 3241 } 3242 if (nodep != NULL) { 3243 /* 3244 * Note that we don't have to increment the node's reference 3245 * count here because we're going to use the reference we 3246 * already have in the search block. 3247 */ 3248 *nodep = node; 3249 search->need_cleanup = false; 3250 } 3251 if (rdataset != NULL) { 3252 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock), 3253 isc_rwlocktype_read); 3254 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset, 3255 search->now, isc_rwlocktype_read, rdataset); 3256 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL) 3257 { 3258 bind_rdataset(search->rbtdb, node, 3259 search->zonecut_sigrdataset, search->now, 3260 isc_rwlocktype_read, sigrdataset); 3261 } 3262 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), 3263 isc_rwlocktype_read); 3264 } 3265 3266 if (type == dns_rdatatype_dname) { 3267 return (DNS_R_DNAME); 3268 } 3269 return (DNS_R_DELEGATION); 3270 } 3271 3272 static bool 3273 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type, 3274 dns_rbtnode_t *node) { 3275 unsigned char *raw; /* RDATASLAB */ 3276 unsigned int count, size; 3277 dns_name_t ns_name; 3278 bool valid = false; 3279 dns_offsets_t offsets; 3280 isc_region_t region; 3281 rdatasetheader_t *header; 3282 3283 /* 3284 * No additional locking is required. 3285 */ 3286 3287 /* 3288 * Valid glue types are A, AAAA, A6. NS is also a valid glue type 3289 * if it occurs at a zone cut, but is not valid below it. 3290 */ 3291 if (type == dns_rdatatype_ns) { 3292 if (node != search->zonecut) { 3293 return (false); 3294 } 3295 } else if (type != dns_rdatatype_a && type != dns_rdatatype_aaaa && 3296 type != dns_rdatatype_a6) 3297 { 3298 return (false); 3299 } 3300 3301 header = search->zonecut_rdataset; 3302 raw = (unsigned char *)header + sizeof(*header); 3303 count = raw[0] * 256 + raw[1]; 3304 raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH; 3305 3306 while (count > 0) { 3307 count--; 3308 size = raw[0] * 256 + raw[1]; 3309 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; 3310 region.base = raw; 3311 region.length = size; 3312 raw += size; 3313 /* 3314 * XXX Until we have rdata structures, we have no choice but 3315 * to directly access the rdata format. 3316 */ 3317 dns_name_init(&ns_name, offsets); 3318 dns_name_fromregion(&ns_name, ®ion); 3319 if (dns_name_compare(&ns_name, name) == 0) { 3320 valid = true; 3321 break; 3322 } 3323 } 3324 3325 return (valid); 3326 } 3327 3328 static bool 3329 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain, 3330 const dns_name_t *name) { 3331 dns_fixedname_t fnext; 3332 dns_fixedname_t forigin; 3333 dns_name_t *next; 3334 dns_name_t *origin; 3335 dns_name_t prefix; 3336 dns_rbtdb_t *rbtdb; 3337 dns_rbtnode_t *node; 3338 isc_result_t result; 3339 bool answer = false; 3340 rdatasetheader_t *header; 3341 3342 rbtdb = search->rbtdb; 3343 3344 dns_name_init(&prefix, NULL); 3345 next = dns_fixedname_initname(&fnext); 3346 origin = dns_fixedname_initname(&forigin); 3347 3348 result = dns_rbtnodechain_next(chain, NULL, NULL); 3349 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { 3350 node = NULL; 3351 result = dns_rbtnodechain_current(chain, &prefix, origin, 3352 &node); 3353 if (result != ISC_R_SUCCESS) { 3354 break; 3355 } 3356 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock), 3357 isc_rwlocktype_read); 3358 for (header = node->data; header != NULL; header = header->next) 3359 { 3360 if (header->serial <= search->serial && 3361 !IGNORE(header) && EXISTS(header)) 3362 { 3363 break; 3364 } 3365 } 3366 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), 3367 isc_rwlocktype_read); 3368 if (header != NULL) { 3369 break; 3370 } 3371 result = dns_rbtnodechain_next(chain, NULL, NULL); 3372 } 3373 if (result == ISC_R_SUCCESS) { 3374 result = dns_name_concatenate(&prefix, origin, next, NULL); 3375 } 3376 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name)) { 3377 answer = true; 3378 } 3379 return (answer); 3380 } 3381 3382 static bool 3383 activeemptynode(rbtdb_search_t *search, const dns_name_t *qname, 3384 dns_name_t *wname) { 3385 dns_fixedname_t fnext; 3386 dns_fixedname_t forigin; 3387 dns_fixedname_t fprev; 3388 dns_name_t *next; 3389 dns_name_t *origin; 3390 dns_name_t *prev; 3391 dns_name_t name; 3392 dns_name_t rname; 3393 dns_name_t tname; 3394 dns_rbtdb_t *rbtdb; 3395 dns_rbtnode_t *node; 3396 dns_rbtnodechain_t chain; 3397 bool check_next = true; 3398 bool check_prev = true; 3399 bool answer = false; 3400 isc_result_t result; 3401 rdatasetheader_t *header; 3402 unsigned int n; 3403 3404 rbtdb = search->rbtdb; 3405 3406 dns_name_init(&name, NULL); 3407 dns_name_init(&tname, NULL); 3408 dns_name_init(&rname, NULL); 3409 next = dns_fixedname_initname(&fnext); 3410 prev = dns_fixedname_initname(&fprev); 3411 origin = dns_fixedname_initname(&forigin); 3412 3413 /* 3414 * Find if qname is at or below a empty node. 3415 * Use our own copy of the chain. 3416 */ 3417 3418 chain = search->chain; 3419 do { 3420 node = NULL; 3421 result = dns_rbtnodechain_current(&chain, &name, origin, &node); 3422 if (result != ISC_R_SUCCESS) { 3423 break; 3424 } 3425 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock), 3426 isc_rwlocktype_read); 3427 for (header = node->data; header != NULL; header = header->next) 3428 { 3429 if (header->serial <= search->serial && 3430 !IGNORE(header) && EXISTS(header)) 3431 { 3432 break; 3433 } 3434 } 3435 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), 3436 isc_rwlocktype_read); 3437 if (header != NULL) { 3438 break; 3439 } 3440 result = dns_rbtnodechain_prev(&chain, NULL, NULL); 3441 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN); 3442 if (result == ISC_R_SUCCESS) { 3443 result = dns_name_concatenate(&name, origin, prev, NULL); 3444 } 3445 if (result != ISC_R_SUCCESS) { 3446 check_prev = false; 3447 } 3448 3449 result = dns_rbtnodechain_next(&chain, NULL, NULL); 3450 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { 3451 node = NULL; 3452 result = dns_rbtnodechain_current(&chain, &name, origin, &node); 3453 if (result != ISC_R_SUCCESS) { 3454 break; 3455 } 3456 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock), 3457 isc_rwlocktype_read); 3458 for (header = node->data; header != NULL; header = header->next) 3459 { 3460 if (header->serial <= search->serial && 3461 !IGNORE(header) && EXISTS(header)) 3462 { 3463 break; 3464 } 3465 } 3466 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), 3467 isc_rwlocktype_read); 3468 if (header != NULL) { 3469 break; 3470 } 3471 result = dns_rbtnodechain_next(&chain, NULL, NULL); 3472 } 3473 if (result == ISC_R_SUCCESS) { 3474 result = dns_name_concatenate(&name, origin, next, NULL); 3475 } 3476 if (result != ISC_R_SUCCESS) { 3477 check_next = false; 3478 } 3479 3480 dns_name_clone(qname, &rname); 3481 3482 /* 3483 * Remove the wildcard label to find the terminal name. 3484 */ 3485 n = dns_name_countlabels(wname); 3486 dns_name_getlabelsequence(wname, 1, n - 1, &tname); 3487 3488 do { 3489 if ((check_prev && dns_name_issubdomain(prev, &rname)) || 3490 (check_next && dns_name_issubdomain(next, &rname))) 3491 { 3492 answer = true; 3493 break; 3494 } 3495 /* 3496 * Remove the left hand label. 3497 */ 3498 n = dns_name_countlabels(&rname); 3499 dns_name_getlabelsequence(&rname, 1, n - 1, &rname); 3500 } while (!dns_name_equal(&rname, &tname)); 3501 return (answer); 3502 } 3503 3504 static isc_result_t 3505 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep, 3506 const dns_name_t *qname) { 3507 unsigned int i, j; 3508 dns_rbtnode_t *node, *level_node, *wnode; 3509 rdatasetheader_t *header; 3510 isc_result_t result = ISC_R_NOTFOUND; 3511 dns_name_t name; 3512 dns_name_t *wname; 3513 dns_fixedname_t fwname; 3514 dns_rbtdb_t *rbtdb; 3515 bool done, wild, active; 3516 dns_rbtnodechain_t wchain; 3517 3518 /* 3519 * Caller must be holding the tree lock and MUST NOT be holding 3520 * any node locks. 3521 */ 3522 3523 /* 3524 * Examine each ancestor level. If the level's wild bit 3525 * is set, then construct the corresponding wildcard name and 3526 * search for it. If the wildcard node exists, and is active in 3527 * this version, we're done. If not, then we next check to see 3528 * if the ancestor is active in this version. If so, then there 3529 * can be no possible wildcard match and again we're done. If not, 3530 * continue the search. 3531 */ 3532 3533 rbtdb = search->rbtdb; 3534 i = search->chain.level_matches; 3535 done = false; 3536 node = *nodep; 3537 do { 3538 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock), 3539 isc_rwlocktype_read); 3540 3541 /* 3542 * First we try to figure out if this node is active in 3543 * the search's version. We do this now, even though we 3544 * may not need the information, because it simplifies the 3545 * locking and code flow. 3546 */ 3547 for (header = node->data; header != NULL; header = header->next) 3548 { 3549 if (header->serial <= search->serial && 3550 !IGNORE(header) && EXISTS(header) && 3551 !ANCIENT(header)) 3552 { 3553 break; 3554 } 3555 } 3556 if (header != NULL) { 3557 active = true; 3558 } else { 3559 active = false; 3560 } 3561 3562 if (node->wild) { 3563 wild = true; 3564 } else { 3565 wild = false; 3566 } 3567 3568 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), 3569 isc_rwlocktype_read); 3570 3571 if (wild) { 3572 /* 3573 * Construct the wildcard name for this level. 3574 */ 3575 dns_name_init(&name, NULL); 3576 dns_rbt_namefromnode(node, &name); 3577 wname = dns_fixedname_initname(&fwname); 3578 result = dns_name_concatenate(dns_wildcardname, &name, 3579 wname, NULL); 3580 j = i; 3581 while (result == ISC_R_SUCCESS && j != 0) { 3582 j--; 3583 level_node = search->chain.levels[j]; 3584 dns_name_init(&name, NULL); 3585 dns_rbt_namefromnode(level_node, &name); 3586 result = dns_name_concatenate(wname, &name, 3587 wname, NULL); 3588 } 3589 if (result != ISC_R_SUCCESS) { 3590 break; 3591 } 3592 3593 wnode = NULL; 3594 dns_rbtnodechain_init(&wchain); 3595 result = dns_rbt_findnode( 3596 rbtdb->tree, wname, NULL, &wnode, &wchain, 3597 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 3598 if (result == ISC_R_SUCCESS) { 3599 nodelock_t *lock; 3600 3601 /* 3602 * We have found the wildcard node. If it 3603 * is active in the search's version, we're 3604 * done. 3605 */ 3606 lock = &rbtdb->node_locks[wnode->locknum].lock; 3607 NODE_LOCK(lock, isc_rwlocktype_read); 3608 for (header = wnode->data; header != NULL; 3609 header = header->next) 3610 { 3611 if (header->serial <= search->serial && 3612 !IGNORE(header) && EXISTS(header) && 3613 !ANCIENT(header)) 3614 { 3615 break; 3616 } 3617 } 3618 NODE_UNLOCK(lock, isc_rwlocktype_read); 3619 if (header != NULL || 3620 activeempty(search, &wchain, wname)) 3621 { 3622 if (activeemptynode(search, qname, 3623 wname)) 3624 { 3625 return (ISC_R_NOTFOUND); 3626 } 3627 /* 3628 * The wildcard node is active! 3629 * 3630 * Note: result is still ISC_R_SUCCESS 3631 * so we don't have to set it. 3632 */ 3633 *nodep = wnode; 3634 break; 3635 } 3636 } else if (result != ISC_R_NOTFOUND && 3637 result != DNS_R_PARTIALMATCH) 3638 { 3639 /* 3640 * An error has occurred. Bail out. 3641 */ 3642 break; 3643 } 3644 } 3645 3646 if (active) { 3647 /* 3648 * The level node is active. Any wildcarding 3649 * present at higher levels has no 3650 * effect and we're done. 3651 */ 3652 result = ISC_R_NOTFOUND; 3653 break; 3654 } 3655 3656 if (i > 0) { 3657 i--; 3658 node = search->chain.levels[i]; 3659 } else { 3660 done = true; 3661 } 3662 } while (!done); 3663 3664 return (result); 3665 } 3666 3667 static bool 3668 matchparams(rdatasetheader_t *header, rbtdb_search_t *search) { 3669 dns_rdata_t rdata = DNS_RDATA_INIT; 3670 dns_rdata_nsec3_t nsec3; 3671 unsigned char *raw; /* RDATASLAB */ 3672 unsigned int rdlen, count; 3673 isc_region_t region; 3674 isc_result_t result; 3675 3676 REQUIRE(header->type == dns_rdatatype_nsec3); 3677 3678 raw = (unsigned char *)header + sizeof(*header); 3679 count = raw[0] * 256 + raw[1]; /* count */ 3680 raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH; 3681 3682 while (count-- > 0) { 3683 rdlen = raw[0] * 256 + raw[1]; 3684 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; 3685 region.base = raw; 3686 region.length = rdlen; 3687 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass, 3688 dns_rdatatype_nsec3, ®ion); 3689 raw += rdlen; 3690 result = dns_rdata_tostruct(&rdata, &nsec3, NULL); 3691 INSIST(result == ISC_R_SUCCESS); 3692 if (nsec3.hash == search->rbtversion->hash && 3693 nsec3.iterations == search->rbtversion->iterations && 3694 nsec3.salt_length == search->rbtversion->salt_length && 3695 memcmp(nsec3.salt, search->rbtversion->salt, 3696 nsec3.salt_length) == 0) 3697 { 3698 return (true); 3699 } 3700 dns_rdata_reset(&rdata); 3701 } 3702 return (false); 3703 } 3704 3705 /* 3706 * Find node of the NSEC/NSEC3 record that is 'name'. 3707 */ 3708 static isc_result_t 3709 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search, 3710 dns_name_t *name, dns_name_t *origin, 3711 dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain, 3712 bool *firstp) { 3713 dns_fixedname_t ftarget; 3714 dns_name_t *target; 3715 dns_rbtnode_t *nsecnode; 3716 isc_result_t result; 3717 3718 REQUIRE(nodep != NULL && *nodep == NULL); 3719 REQUIRE(type == dns_rdatatype_nsec3 || firstp != NULL); 3720 3721 if (type == dns_rdatatype_nsec3) { 3722 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL); 3723 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) { 3724 return (result); 3725 } 3726 result = dns_rbtnodechain_current(&search->chain, name, origin, 3727 nodep); 3728 return (result); 3729 } 3730 3731 target = dns_fixedname_initname(&ftarget); 3732 3733 for (;;) { 3734 if (*firstp) { 3735 /* 3736 * Construct the name of the second node to check. 3737 * It is the first node sought in the NSEC tree. 3738 */ 3739 *firstp = false; 3740 dns_rbtnodechain_init(nsecchain); 3741 result = dns_name_concatenate(name, origin, target, 3742 NULL); 3743 if (result != ISC_R_SUCCESS) { 3744 return (result); 3745 } 3746 nsecnode = NULL; 3747 result = dns_rbt_findnode( 3748 search->rbtdb->nsec, target, NULL, &nsecnode, 3749 nsecchain, DNS_RBTFIND_EMPTYDATA, NULL, NULL); 3750 if (result == ISC_R_SUCCESS) { 3751 /* 3752 * Since this was the first loop, finding the 3753 * name in the NSEC tree implies that the first 3754 * node checked in the main tree had an 3755 * unacceptable NSEC record. 3756 * Try the previous node in the NSEC tree. 3757 */ 3758 result = dns_rbtnodechain_prev(nsecchain, name, 3759 origin); 3760 if (result == DNS_R_NEWORIGIN) { 3761 result = ISC_R_SUCCESS; 3762 } 3763 } else if (result == ISC_R_NOTFOUND || 3764 result == DNS_R_PARTIALMATCH) 3765 { 3766 result = dns_rbtnodechain_current( 3767 nsecchain, name, origin, NULL); 3768 if (result == ISC_R_NOTFOUND) { 3769 result = ISC_R_NOMORE; 3770 } 3771 } 3772 } else { 3773 /* 3774 * This is a second or later trip through the auxiliary 3775 * tree for the name of a third or earlier NSEC node in 3776 * the main tree. Previous trips through the NSEC tree 3777 * must have found nodes in the main tree with NSEC 3778 * records. Perhaps they lacked signature records. 3779 */ 3780 result = dns_rbtnodechain_prev(nsecchain, name, origin); 3781 if (result == DNS_R_NEWORIGIN) { 3782 result = ISC_R_SUCCESS; 3783 } 3784 } 3785 if (result != ISC_R_SUCCESS) { 3786 return (result); 3787 } 3788 3789 /* 3790 * Construct the name to seek in the main tree. 3791 */ 3792 result = dns_name_concatenate(name, origin, target, NULL); 3793 if (result != ISC_R_SUCCESS) { 3794 return (result); 3795 } 3796 3797 *nodep = NULL; 3798 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL, 3799 nodep, &search->chain, 3800 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 3801 if (result == ISC_R_SUCCESS) { 3802 return (result); 3803 } 3804 3805 /* 3806 * There should always be a node in the main tree with the 3807 * same name as the node in the auxiliary NSEC tree, except for 3808 * nodes in the auxiliary tree that are awaiting deletion. 3809 */ 3810 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) { 3811 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 3812 DNS_LOGMODULE_CACHE, ISC_LOG_ERROR, 3813 "previous_closest_nsec(): %s", 3814 isc_result_totext(result)); 3815 return (DNS_R_BADDB); 3816 } 3817 } 3818 } 3819 3820 /* 3821 * Find the NSEC/NSEC3 which is or before the current point on the 3822 * search chain. For NSEC3 records only NSEC3 records that match the 3823 * current NSEC3PARAM record are considered. 3824 */ 3825 static isc_result_t 3826 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep, 3827 dns_name_t *foundname, dns_rdataset_t *rdataset, 3828 dns_rdataset_t *sigrdataset, dns_rbt_t *tree, 3829 dns_db_secure_t secure) { 3830 dns_rbtnode_t *node, *prevnode; 3831 rdatasetheader_t *header, *header_next, *found, *foundsig; 3832 dns_rbtnodechain_t nsecchain; 3833 bool empty_node; 3834 isc_result_t result; 3835 dns_fixedname_t fname, forigin; 3836 dns_name_t *name, *origin; 3837 dns_rdatatype_t type; 3838 rbtdb_rdatatype_t sigtype; 3839 bool wraps; 3840 bool first = true; 3841 bool need_sig = (secure == dns_db_secure); 3842 3843 if (tree == search->rbtdb->nsec3) { 3844 type = dns_rdatatype_nsec3; 3845 sigtype = RBTDB_RDATATYPE_SIGNSEC3; 3846 wraps = true; 3847 } else { 3848 type = dns_rdatatype_nsec; 3849 sigtype = RBTDB_RDATATYPE_SIGNSEC; 3850 wraps = false; 3851 } 3852 3853 /* 3854 * Use the auxiliary tree only starting with the second node in the 3855 * hope that the original node will be right much of the time. 3856 */ 3857 name = dns_fixedname_initname(&fname); 3858 origin = dns_fixedname_initname(&forigin); 3859 again: 3860 node = NULL; 3861 prevnode = NULL; 3862 result = dns_rbtnodechain_current(&search->chain, name, origin, &node); 3863 if (result != ISC_R_SUCCESS) { 3864 return (result); 3865 } 3866 do { 3867 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock), 3868 isc_rwlocktype_read); 3869 found = NULL; 3870 foundsig = NULL; 3871 empty_node = true; 3872 for (header = node->data; header != NULL; header = header_next) 3873 { 3874 header_next = header->next; 3875 /* 3876 * Look for an active, extant NSEC or RRSIG NSEC. 3877 */ 3878 do { 3879 if (header->serial <= search->serial && 3880 !IGNORE(header)) 3881 { 3882 /* 3883 * Is this a "this rdataset doesn't 3884 * exist" record? 3885 */ 3886 if (NONEXISTENT(header)) { 3887 header = NULL; 3888 } 3889 break; 3890 } else { 3891 header = header->down; 3892 } 3893 } while (header != NULL); 3894 if (header != NULL) { 3895 /* 3896 * We now know that there is at least one 3897 * active rdataset at this node. 3898 */ 3899 empty_node = false; 3900 if (header->type == type) { 3901 found = header; 3902 if (foundsig != NULL) { 3903 break; 3904 } 3905 } else if (header->type == sigtype) { 3906 foundsig = header; 3907 if (found != NULL) { 3908 break; 3909 } 3910 } 3911 } 3912 } 3913 if (!empty_node) { 3914 if (found != NULL && search->rbtversion->havensec3 && 3915 found->type == dns_rdatatype_nsec3 && 3916 !matchparams(found, search)) 3917 { 3918 empty_node = true; 3919 found = NULL; 3920 foundsig = NULL; 3921 result = previous_closest_nsec( 3922 type, search, name, origin, &prevnode, 3923 NULL, NULL); 3924 } else if (found != NULL && 3925 (foundsig != NULL || !need_sig)) 3926 { 3927 /* 3928 * We've found the right NSEC/NSEC3 record. 3929 * 3930 * Note: for this to really be the right 3931 * NSEC record, it's essential that the NSEC 3932 * records of any nodes obscured by a zone 3933 * cut have been removed; we assume this is 3934 * the case. 3935 */ 3936 result = dns_name_concatenate(name, origin, 3937 foundname, NULL); 3938 if (result == ISC_R_SUCCESS) { 3939 if (nodep != NULL) { 3940 new_reference( 3941 search->rbtdb, node, 3942 isc_rwlocktype_read); 3943 *nodep = node; 3944 } 3945 bind_rdataset(search->rbtdb, node, 3946 found, search->now, 3947 isc_rwlocktype_read, 3948 rdataset); 3949 if (foundsig != NULL) { 3950 bind_rdataset( 3951 search->rbtdb, node, 3952 foundsig, search->now, 3953 isc_rwlocktype_read, 3954 sigrdataset); 3955 } 3956 } 3957 } else if (found == NULL && foundsig == NULL) { 3958 /* 3959 * This node is active, but has no NSEC or 3960 * RRSIG NSEC. That means it's glue or 3961 * other obscured zone data that isn't 3962 * relevant for our search. Treat the 3963 * node as if it were empty and keep looking. 3964 */ 3965 empty_node = true; 3966 result = previous_closest_nsec( 3967 type, search, name, origin, &prevnode, 3968 &nsecchain, &first); 3969 } else { 3970 /* 3971 * We found an active node, but either the 3972 * NSEC or the RRSIG NSEC is missing. This 3973 * shouldn't happen. 3974 */ 3975 result = DNS_R_BADDB; 3976 } 3977 } else { 3978 /* 3979 * This node isn't active. We've got to keep 3980 * looking. 3981 */ 3982 result = previous_closest_nsec(type, search, name, 3983 origin, &prevnode, 3984 &nsecchain, &first); 3985 } 3986 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), 3987 isc_rwlocktype_read); 3988 node = prevnode; 3989 prevnode = NULL; 3990 } while (empty_node && result == ISC_R_SUCCESS); 3991 3992 if (!first) { 3993 dns_rbtnodechain_invalidate(&nsecchain); 3994 } 3995 3996 if (result == ISC_R_NOMORE && wraps) { 3997 result = dns_rbtnodechain_last(&search->chain, tree, NULL, 3998 NULL); 3999 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { 4000 wraps = false; 4001 goto again; 4002 } 4003 } 4004 4005 /* 4006 * If the result is ISC_R_NOMORE, then we got to the beginning of 4007 * the database and didn't find a NSEC record. This shouldn't 4008 * happen. 4009 */ 4010 if (result == ISC_R_NOMORE) { 4011 result = DNS_R_BADDB; 4012 } 4013 4014 return (result); 4015 } 4016 4017 static isc_result_t 4018 zone_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, 4019 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now, 4020 dns_dbnode_t **nodep, dns_name_t *foundname, dns_rdataset_t *rdataset, 4021 dns_rdataset_t *sigrdataset) { 4022 dns_rbtnode_t *node = NULL; 4023 isc_result_t result; 4024 rbtdb_search_t search; 4025 bool cname_ok = true; 4026 bool close_version = false; 4027 bool maybe_zonecut = false; 4028 bool at_zonecut = false; 4029 bool wild; 4030 bool empty_node; 4031 rdatasetheader_t *header, *header_next, *found, *nsecheader; 4032 rdatasetheader_t *foundsig, *cnamesig, *nsecsig; 4033 rbtdb_rdatatype_t sigtype; 4034 bool active; 4035 nodelock_t *lock; 4036 dns_rbt_t *tree; 4037 4038 search.rbtdb = (dns_rbtdb_t *)db; 4039 4040 REQUIRE(VALID_RBTDB(search.rbtdb)); 4041 INSIST(version == NULL || 4042 ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db); 4043 4044 /* 4045 * We don't care about 'now'. 4046 */ 4047 UNUSED(now); 4048 4049 /* 4050 * If the caller didn't supply a version, attach to the current 4051 * version. 4052 */ 4053 if (version == NULL) { 4054 currentversion(db, &version); 4055 close_version = true; 4056 } 4057 4058 search.rbtversion = version; 4059 search.serial = search.rbtversion->serial; 4060 search.options = options; 4061 search.copy_name = false; 4062 search.need_cleanup = false; 4063 search.wild = false; 4064 search.zonecut = NULL; 4065 dns_fixedname_init(&search.zonecut_name); 4066 dns_rbtnodechain_init(&search.chain); 4067 search.now = 0; 4068 4069 /* 4070 * 'wild' will be true iff. we've matched a wildcard. 4071 */ 4072 wild = false; 4073 4074 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); 4075 4076 /* 4077 * Search down from the root of the tree. If, while going down, we 4078 * encounter a callback node, zone_zonecut_callback() will search the 4079 * rdatasets at the zone cut for active DNAME or NS rdatasets. 4080 */ 4081 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 4082 : search.rbtdb->tree; 4083 result = dns_rbt_findnode(tree, name, foundname, &node, &search.chain, 4084 DNS_RBTFIND_EMPTYDATA, zone_zonecut_callback, 4085 &search); 4086 4087 if (result == DNS_R_PARTIALMATCH) { 4088 partial_match: 4089 if (search.zonecut != NULL) { 4090 result = setup_delegation(&search, nodep, foundname, 4091 rdataset, sigrdataset); 4092 goto tree_exit; 4093 } 4094 4095 if (search.wild) { 4096 /* 4097 * At least one of the levels in the search chain 4098 * potentially has a wildcard. For each such level, 4099 * we must see if there's a matching wildcard active 4100 * in the current version. 4101 */ 4102 result = find_wildcard(&search, &node, name); 4103 if (result == ISC_R_SUCCESS) { 4104 dns_name_copy(name, foundname); 4105 wild = true; 4106 goto found; 4107 } else if (result != ISC_R_NOTFOUND) { 4108 goto tree_exit; 4109 } 4110 } 4111 4112 active = false; 4113 if ((options & DNS_DBFIND_FORCENSEC3) == 0) { 4114 /* 4115 * The NSEC3 tree won't have empty nodes, 4116 * so it isn't necessary to check for them. 4117 */ 4118 dns_rbtnodechain_t chain = search.chain; 4119 active = activeempty(&search, &chain, name); 4120 } 4121 4122 /* 4123 * If we're here, then the name does not exist, is not 4124 * beneath a zonecut, and there's no matching wildcard. 4125 */ 4126 if ((search.rbtversion->secure == dns_db_secure && 4127 !search.rbtversion->havensec3) || 4128 (search.options & DNS_DBFIND_FORCENSEC) != 0 || 4129 (search.options & DNS_DBFIND_FORCENSEC3) != 0) 4130 { 4131 result = find_closest_nsec(&search, nodep, foundname, 4132 rdataset, sigrdataset, tree, 4133 search.rbtversion->secure); 4134 if (result == ISC_R_SUCCESS) { 4135 result = active ? DNS_R_EMPTYNAME 4136 : DNS_R_NXDOMAIN; 4137 } 4138 } else { 4139 bool wantpartial = (options & DNS_DBFIND_WANTPARTIAL) != 4140 0; 4141 result = active ? DNS_R_EMPTYNAME 4142 : wantpartial ? DNS_R_PARTIALMATCH 4143 : DNS_R_NXDOMAIN; 4144 } 4145 goto tree_exit; 4146 } else if (result != ISC_R_SUCCESS) { 4147 goto tree_exit; 4148 } 4149 4150 found: 4151 /* 4152 * We have found a node whose name is the desired name, or we 4153 * have matched a wildcard. 4154 */ 4155 4156 if (search.zonecut != NULL) { 4157 /* 4158 * If we're beneath a zone cut, we don't want to look for 4159 * CNAMEs because they're not legitimate zone glue. 4160 */ 4161 cname_ok = false; 4162 } else { 4163 /* 4164 * The node may be a zone cut itself. If it might be one, 4165 * make sure we check for it later. 4166 * 4167 * DS records live above the zone cut in ordinary zone so 4168 * we want to ignore any referral. 4169 * 4170 * Stub zones don't have anything "above" the delegation so 4171 * we always return a referral. 4172 */ 4173 if (node->find_callback && 4174 ((node != search.rbtdb->origin_node && 4175 !dns_rdatatype_atparent(type)) || 4176 IS_STUB(search.rbtdb))) 4177 { 4178 maybe_zonecut = true; 4179 } 4180 } 4181 4182 /* 4183 * Certain DNSSEC types are not subject to CNAME matching 4184 * (RFC4035, section 2.5 and RFC3007). 4185 * 4186 * We don't check for RRSIG, because we don't store RRSIG records 4187 * directly. 4188 */ 4189 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) { 4190 cname_ok = false; 4191 } 4192 4193 /* 4194 * We now go looking for rdata... 4195 */ 4196 4197 lock = &search.rbtdb->node_locks[node->locknum].lock; 4198 NODE_LOCK(lock, isc_rwlocktype_read); 4199 4200 found = NULL; 4201 foundsig = NULL; 4202 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type); 4203 nsecheader = NULL; 4204 nsecsig = NULL; 4205 cnamesig = NULL; 4206 empty_node = true; 4207 for (header = node->data; header != NULL; header = header_next) { 4208 header_next = header->next; 4209 /* 4210 * Look for an active, extant rdataset. 4211 */ 4212 do { 4213 if (header->serial <= search.serial && !IGNORE(header)) 4214 { 4215 /* 4216 * Is this a "this rdataset doesn't 4217 * exist" record? 4218 */ 4219 if (NONEXISTENT(header)) { 4220 header = NULL; 4221 } 4222 break; 4223 } else { 4224 header = header->down; 4225 } 4226 } while (header != NULL); 4227 if (header != NULL) { 4228 /* 4229 * We now know that there is at least one active 4230 * rdataset at this node. 4231 */ 4232 empty_node = false; 4233 4234 /* 4235 * Do special zone cut handling, if requested. 4236 */ 4237 if (maybe_zonecut && header->type == dns_rdatatype_ns) { 4238 /* 4239 * We increment the reference count on node to 4240 * ensure that search->zonecut_rdataset will 4241 * still be valid later. 4242 */ 4243 new_reference(search.rbtdb, node, 4244 isc_rwlocktype_read); 4245 search.zonecut = node; 4246 search.zonecut_rdataset = header; 4247 search.zonecut_sigrdataset = NULL; 4248 search.need_cleanup = true; 4249 maybe_zonecut = false; 4250 at_zonecut = true; 4251 /* 4252 * It is not clear if KEY should still be 4253 * allowed at the parent side of the zone 4254 * cut or not. It is needed for RFC3007 4255 * validated updates. 4256 */ 4257 if ((search.options & DNS_DBFIND_GLUEOK) == 0 && 4258 type != dns_rdatatype_nsec && 4259 type != dns_rdatatype_key) 4260 { 4261 /* 4262 * Glue is not OK, but any answer we 4263 * could return would be glue. Return 4264 * the delegation. 4265 */ 4266 found = NULL; 4267 break; 4268 } 4269 if (found != NULL && foundsig != NULL) { 4270 break; 4271 } 4272 } 4273 4274 /* 4275 * If the NSEC3 record doesn't match the chain 4276 * we are using behave as if it isn't here. 4277 */ 4278 if (header->type == dns_rdatatype_nsec3 && 4279 !matchparams(header, &search)) 4280 { 4281 NODE_UNLOCK(lock, isc_rwlocktype_read); 4282 goto partial_match; 4283 } 4284 /* 4285 * If we found a type we were looking for, 4286 * remember it. 4287 */ 4288 if (header->type == type || type == dns_rdatatype_any || 4289 (header->type == dns_rdatatype_cname && cname_ok)) 4290 { 4291 /* 4292 * We've found the answer! 4293 */ 4294 found = header; 4295 if (header->type == dns_rdatatype_cname && 4296 cname_ok) 4297 { 4298 /* 4299 * We may be finding a CNAME instead 4300 * of the desired type. 4301 * 4302 * If we've already got the CNAME RRSIG, 4303 * use it, otherwise change sigtype 4304 * so that we find it. 4305 */ 4306 if (cnamesig != NULL) { 4307 foundsig = cnamesig; 4308 } else { 4309 sigtype = 4310 RBTDB_RDATATYPE_SIGCNAME; 4311 } 4312 } 4313 /* 4314 * If we've got all we need, end the search. 4315 */ 4316 if (!maybe_zonecut && foundsig != NULL) { 4317 break; 4318 } 4319 } else if (header->type == sigtype) { 4320 /* 4321 * We've found the RRSIG rdataset for our 4322 * target type. Remember it. 4323 */ 4324 foundsig = header; 4325 /* 4326 * If we've got all we need, end the search. 4327 */ 4328 if (!maybe_zonecut && found != NULL) { 4329 break; 4330 } 4331 } else if (header->type == dns_rdatatype_nsec && 4332 !search.rbtversion->havensec3) 4333 { 4334 /* 4335 * Remember a NSEC rdataset even if we're 4336 * not specifically looking for it, because 4337 * we might need it later. 4338 */ 4339 nsecheader = header; 4340 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC && 4341 !search.rbtversion->havensec3) 4342 { 4343 /* 4344 * If we need the NSEC rdataset, we'll also 4345 * need its signature. 4346 */ 4347 nsecsig = header; 4348 } else if (cname_ok && 4349 header->type == RBTDB_RDATATYPE_SIGCNAME) 4350 { 4351 /* 4352 * If we get a CNAME match, we'll also need 4353 * its signature. 4354 */ 4355 cnamesig = header; 4356 } 4357 } 4358 } 4359 4360 if (empty_node) { 4361 /* 4362 * We have an exact match for the name, but there are no 4363 * active rdatasets in the desired version. That means that 4364 * this node doesn't exist in the desired version, and that 4365 * we really have a partial match. 4366 */ 4367 if (!wild) { 4368 NODE_UNLOCK(lock, isc_rwlocktype_read); 4369 goto partial_match; 4370 } 4371 } 4372 4373 /* 4374 * If we didn't find what we were looking for... 4375 */ 4376 if (found == NULL) { 4377 if (search.zonecut != NULL) { 4378 /* 4379 * We were trying to find glue at a node beneath a 4380 * zone cut, but didn't. 4381 * 4382 * Return the delegation. 4383 */ 4384 NODE_UNLOCK(lock, isc_rwlocktype_read); 4385 result = setup_delegation(&search, nodep, foundname, 4386 rdataset, sigrdataset); 4387 goto tree_exit; 4388 } 4389 /* 4390 * The desired type doesn't exist. 4391 */ 4392 result = DNS_R_NXRRSET; 4393 if (search.rbtversion->secure == dns_db_secure && 4394 !search.rbtversion->havensec3 && 4395 (nsecheader == NULL || nsecsig == NULL)) 4396 { 4397 /* 4398 * The zone is secure but there's no NSEC, 4399 * or the NSEC has no signature! 4400 */ 4401 if (!wild) { 4402 result = DNS_R_BADDB; 4403 goto node_exit; 4404 } 4405 4406 NODE_UNLOCK(lock, isc_rwlocktype_read); 4407 result = find_closest_nsec(&search, nodep, foundname, 4408 rdataset, sigrdataset, 4409 search.rbtdb->tree, 4410 search.rbtversion->secure); 4411 if (result == ISC_R_SUCCESS) { 4412 result = DNS_R_EMPTYWILD; 4413 } 4414 goto tree_exit; 4415 } 4416 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 && 4417 nsecheader == NULL) 4418 { 4419 /* 4420 * There's no NSEC record, and we were told 4421 * to find one. 4422 */ 4423 result = DNS_R_BADDB; 4424 goto node_exit; 4425 } 4426 if (nodep != NULL) { 4427 new_reference(search.rbtdb, node, isc_rwlocktype_read); 4428 *nodep = node; 4429 } 4430 if ((search.rbtversion->secure == dns_db_secure && 4431 !search.rbtversion->havensec3) || 4432 (search.options & DNS_DBFIND_FORCENSEC) != 0) 4433 { 4434 bind_rdataset(search.rbtdb, node, nsecheader, 0, 4435 isc_rwlocktype_read, rdataset); 4436 if (nsecsig != NULL) { 4437 bind_rdataset(search.rbtdb, node, nsecsig, 0, 4438 isc_rwlocktype_read, sigrdataset); 4439 } 4440 } 4441 if (wild) { 4442 foundname->attributes |= DNS_NAMEATTR_WILDCARD; 4443 } 4444 goto node_exit; 4445 } 4446 4447 /* 4448 * We found what we were looking for, or we found a CNAME. 4449 */ 4450 4451 if (type != found->type && type != dns_rdatatype_any && 4452 found->type == dns_rdatatype_cname) 4453 { 4454 /* 4455 * We weren't doing an ANY query and we found a CNAME instead 4456 * of the type we were looking for, so we need to indicate 4457 * that result to the caller. 4458 */ 4459 result = DNS_R_CNAME; 4460 } else if (search.zonecut != NULL) { 4461 /* 4462 * If we're beneath a zone cut, we must indicate that the 4463 * result is glue, unless we're actually at the zone cut 4464 * and the type is NSEC or KEY. 4465 */ 4466 if (search.zonecut == node) { 4467 /* 4468 * It is not clear if KEY should still be 4469 * allowed at the parent side of the zone 4470 * cut or not. It is needed for RFC3007 4471 * validated updates. 4472 */ 4473 if (type == dns_rdatatype_nsec || 4474 type == dns_rdatatype_nsec3 || 4475 type == dns_rdatatype_key) 4476 { 4477 result = ISC_R_SUCCESS; 4478 } else if (type == dns_rdatatype_any) { 4479 result = DNS_R_ZONECUT; 4480 } else { 4481 result = DNS_R_GLUE; 4482 } 4483 } else { 4484 result = DNS_R_GLUE; 4485 } 4486 /* 4487 * We might have found data that isn't glue, but was occluded 4488 * by a dynamic update. If the caller cares about this, they 4489 * will have told us to validate glue. 4490 * 4491 * XXX We should cache the glue validity state! 4492 */ 4493 if (result == DNS_R_GLUE && 4494 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 && 4495 !valid_glue(&search, foundname, type, node)) 4496 { 4497 NODE_UNLOCK(lock, isc_rwlocktype_read); 4498 result = setup_delegation(&search, nodep, foundname, 4499 rdataset, sigrdataset); 4500 goto tree_exit; 4501 } 4502 } else { 4503 /* 4504 * An ordinary successful query! 4505 */ 4506 result = ISC_R_SUCCESS; 4507 } 4508 4509 if (nodep != NULL) { 4510 if (!at_zonecut) { 4511 new_reference(search.rbtdb, node, isc_rwlocktype_read); 4512 } else { 4513 search.need_cleanup = false; 4514 } 4515 *nodep = node; 4516 } 4517 4518 if (type != dns_rdatatype_any) { 4519 bind_rdataset(search.rbtdb, node, found, 0, isc_rwlocktype_read, 4520 rdataset); 4521 if (foundsig != NULL) { 4522 bind_rdataset(search.rbtdb, node, foundsig, 0, 4523 isc_rwlocktype_read, sigrdataset); 4524 } 4525 } 4526 4527 if (wild) { 4528 foundname->attributes |= DNS_NAMEATTR_WILDCARD; 4529 } 4530 4531 node_exit: 4532 NODE_UNLOCK(lock, isc_rwlocktype_read); 4533 4534 tree_exit: 4535 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); 4536 4537 /* 4538 * If we found a zonecut but aren't going to use it, we have to 4539 * let go of it. 4540 */ 4541 if (search.need_cleanup) { 4542 node = search.zonecut; 4543 INSIST(node != NULL); 4544 lock = &(search.rbtdb->node_locks[node->locknum].lock); 4545 4546 NODE_LOCK(lock, isc_rwlocktype_read); 4547 decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read, 4548 isc_rwlocktype_none, false); 4549 NODE_UNLOCK(lock, isc_rwlocktype_read); 4550 } 4551 4552 if (close_version) { 4553 closeversion(db, &version, false); 4554 } 4555 4556 dns_rbtnodechain_reset(&search.chain); 4557 4558 return (result); 4559 } 4560 4561 static isc_result_t 4562 zone_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options, 4563 isc_stdtime_t now, dns_dbnode_t **nodep, dns_name_t *foundname, 4564 dns_name_t *dcname, dns_rdataset_t *rdataset, 4565 dns_rdataset_t *sigrdataset) { 4566 UNUSED(db); 4567 UNUSED(name); 4568 UNUSED(options); 4569 UNUSED(now); 4570 UNUSED(nodep); 4571 UNUSED(foundname); 4572 UNUSED(dcname); 4573 UNUSED(rdataset); 4574 UNUSED(sigrdataset); 4575 4576 FATAL_ERROR("zone_findzonecut() called!"); 4577 4578 UNREACHABLE(); 4579 return (ISC_R_NOTIMPLEMENTED); 4580 } 4581 4582 static bool 4583 check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header, 4584 isc_rwlocktype_t *locktype, nodelock_t *lock, 4585 rbtdb_search_t *search, rdatasetheader_t **header_prev) { 4586 if (!ACTIVE(header, search->now)) { 4587 dns_ttl_t stale = header->rdh_ttl + 4588 STALE_TTL(header, search->rbtdb); 4589 /* 4590 * If this data is in the stale window keep it and if 4591 * DNS_DBFIND_STALEOK is not set we tell the caller to 4592 * skip this record. We skip the records with ZEROTTL 4593 * (these records should not be cached anyway). 4594 */ 4595 4596 RDATASET_ATTR_CLR(header, RDATASET_ATTR_STALE_WINDOW); 4597 if (!ZEROTTL(header) && KEEPSTALE(search->rbtdb) && 4598 stale > search->now) 4599 { 4600 mark_header_stale(search->rbtdb, header); 4601 *header_prev = header; 4602 /* 4603 * If DNS_DBFIND_STALESTART is set then it means we 4604 * failed to resolve the name during recursion, in 4605 * this case we mark the time in which the refresh 4606 * failed. 4607 */ 4608 if ((search->options & DNS_DBFIND_STALESTART) != 0) { 4609 atomic_store_release( 4610 &header->last_refresh_fail_ts, 4611 search->now); 4612 } else if ((search->options & 4613 DNS_DBFIND_STALEENABLED) != 0 && 4614 search->now < 4615 (atomic_load_acquire( 4616 &header->last_refresh_fail_ts) + 4617 search->rbtdb->serve_stale_refresh)) 4618 { 4619 /* 4620 * If we are within interval between last 4621 * refresh failure time + 'stale-refresh-time', 4622 * then don't skip this stale entry but use it 4623 * instead. 4624 */ 4625 RDATASET_ATTR_SET(header, 4626 RDATASET_ATTR_STALE_WINDOW); 4627 return (false); 4628 } else if ((search->options & 4629 DNS_DBFIND_STALETIMEOUT) != 0) 4630 { 4631 /* 4632 * We want stale RRset due to timeout, so we 4633 * don't skip it. 4634 */ 4635 return (false); 4636 } 4637 return ((search->options & DNS_DBFIND_STALEOK) == 0); 4638 } 4639 4640 /* 4641 * This rdataset is stale. If no one else is using the 4642 * node, we can clean it up right now, otherwise we mark 4643 * it as ancient, and the node as dirty, so it will get 4644 * cleaned up later. 4645 */ 4646 if ((header->rdh_ttl < search->now - RBTDB_VIRTUAL) && 4647 (*locktype == isc_rwlocktype_write || 4648 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) 4649 { 4650 /* 4651 * We update the node's status only when we can 4652 * get write access; otherwise, we leave others 4653 * to this work. Periodical cleaning will 4654 * eventually take the job as the last resort. 4655 * We won't downgrade the lock, since other 4656 * rdatasets are probably stale, too. 4657 */ 4658 *locktype = isc_rwlocktype_write; 4659 4660 if (isc_refcount_current(&node->references) == 0) { 4661 isc_mem_t *mctx; 4662 4663 /* 4664 * header->down can be non-NULL if the 4665 * refcount has just decremented to 0 4666 * but decrement_reference() has not 4667 * performed clean_cache_node(), in 4668 * which case we need to purge the stale 4669 * headers first. 4670 */ 4671 mctx = search->rbtdb->common.mctx; 4672 clean_stale_headers(search->rbtdb, mctx, 4673 header); 4674 if (*header_prev != NULL) { 4675 (*header_prev)->next = header->next; 4676 } else { 4677 node->data = header->next; 4678 } 4679 free_rdataset(search->rbtdb, mctx, header); 4680 } else { 4681 mark_header_ancient(search->rbtdb, header); 4682 *header_prev = header; 4683 } 4684 } else { 4685 *header_prev = header; 4686 } 4687 return (true); 4688 } 4689 return (false); 4690 } 4691 4692 static isc_result_t 4693 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) { 4694 rbtdb_search_t *search = arg; 4695 rdatasetheader_t *header, *header_prev, *header_next; 4696 rdatasetheader_t *dname_header, *sigdname_header; 4697 isc_result_t result; 4698 nodelock_t *lock; 4699 isc_rwlocktype_t locktype; 4700 4701 /* XXX comment */ 4702 4703 REQUIRE(search->zonecut == NULL); 4704 4705 /* 4706 * Keep compiler silent. 4707 */ 4708 UNUSED(name); 4709 4710 lock = &(search->rbtdb->node_locks[node->locknum].lock); 4711 locktype = isc_rwlocktype_read; 4712 NODE_LOCK(lock, locktype); 4713 4714 /* 4715 * Look for a DNAME or RRSIG DNAME rdataset. 4716 */ 4717 dname_header = NULL; 4718 sigdname_header = NULL; 4719 header_prev = NULL; 4720 for (header = node->data; header != NULL; header = header_next) { 4721 header_next = header->next; 4722 if (check_stale_header(node, header, &locktype, lock, search, 4723 &header_prev)) 4724 { 4725 /* Do nothing. */ 4726 } else if (header->type == dns_rdatatype_dname && 4727 EXISTS(header) && !ANCIENT(header)) 4728 { 4729 dname_header = header; 4730 header_prev = header; 4731 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME && 4732 EXISTS(header) && !ANCIENT(header)) 4733 { 4734 sigdname_header = header; 4735 header_prev = header; 4736 } else { 4737 header_prev = header; 4738 } 4739 } 4740 4741 if (dname_header != NULL && 4742 (!DNS_TRUST_PENDING(dname_header->trust) || 4743 (search->options & DNS_DBFIND_PENDINGOK) != 0)) 4744 { 4745 /* 4746 * We increment the reference count on node to ensure that 4747 * search->zonecut_rdataset will still be valid later. 4748 */ 4749 new_reference(search->rbtdb, node, locktype); 4750 search->zonecut = node; 4751 search->zonecut_rdataset = dname_header; 4752 search->zonecut_sigrdataset = sigdname_header; 4753 search->need_cleanup = true; 4754 result = DNS_R_PARTIALMATCH; 4755 } else { 4756 result = DNS_R_CONTINUE; 4757 } 4758 4759 NODE_UNLOCK(lock, locktype); 4760 4761 return (result); 4762 } 4763 4764 static isc_result_t 4765 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node, 4766 dns_dbnode_t **nodep, dns_name_t *foundname, 4767 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) { 4768 unsigned int i; 4769 dns_rbtnode_t *level_node; 4770 rdatasetheader_t *header, *header_prev, *header_next; 4771 rdatasetheader_t *found, *foundsig; 4772 isc_result_t result = ISC_R_NOTFOUND; 4773 dns_name_t name; 4774 dns_rbtdb_t *rbtdb; 4775 bool done; 4776 nodelock_t *lock; 4777 isc_rwlocktype_t locktype; 4778 4779 /* 4780 * Caller must be holding the tree lock. 4781 */ 4782 4783 rbtdb = search->rbtdb; 4784 i = search->chain.level_matches; 4785 done = false; 4786 do { 4787 locktype = isc_rwlocktype_read; 4788 lock = &rbtdb->node_locks[node->locknum].lock; 4789 NODE_LOCK(lock, locktype); 4790 4791 /* 4792 * Look for NS and RRSIG NS rdatasets. 4793 */ 4794 found = NULL; 4795 foundsig = NULL; 4796 header_prev = NULL; 4797 for (header = node->data; header != NULL; header = header_next) 4798 { 4799 header_next = header->next; 4800 if (check_stale_header(node, header, &locktype, lock, 4801 search, &header_prev)) 4802 { 4803 /* Do nothing. */ 4804 } else if (EXISTS(header) && !ANCIENT(header)) { 4805 /* 4806 * We've found an extant rdataset. See if 4807 * we're interested in it. 4808 */ 4809 if (header->type == dns_rdatatype_ns) { 4810 found = header; 4811 if (foundsig != NULL) { 4812 break; 4813 } 4814 } else if (header->type == 4815 RBTDB_RDATATYPE_SIGNS) 4816 { 4817 foundsig = header; 4818 if (found != NULL) { 4819 break; 4820 } 4821 } 4822 header_prev = header; 4823 } else { 4824 header_prev = header; 4825 } 4826 } 4827 4828 if (found != NULL) { 4829 /* 4830 * If we have to set foundname, we do it before 4831 * anything else. If we were to set foundname after 4832 * we had set nodep or bound the rdataset, then we'd 4833 * have to undo that work if dns_name_concatenate() 4834 * failed. By setting foundname first, there's 4835 * nothing to undo if we have trouble. 4836 */ 4837 if (foundname != NULL) { 4838 dns_name_init(&name, NULL); 4839 dns_rbt_namefromnode(node, &name); 4840 dns_name_copy(&name, foundname); 4841 while (i > 0) { 4842 i--; 4843 level_node = search->chain.levels[i]; 4844 dns_name_init(&name, NULL); 4845 dns_rbt_namefromnode(level_node, &name); 4846 result = dns_name_concatenate( 4847 foundname, &name, foundname, 4848 NULL); 4849 if (result != ISC_R_SUCCESS) { 4850 if (nodep != NULL) { 4851 *nodep = NULL; 4852 } 4853 goto node_exit; 4854 } 4855 } 4856 } 4857 result = DNS_R_DELEGATION; 4858 if (nodep != NULL) { 4859 new_reference(search->rbtdb, node, locktype); 4860 *nodep = node; 4861 } 4862 bind_rdataset(search->rbtdb, node, found, search->now, 4863 locktype, rdataset); 4864 if (foundsig != NULL) { 4865 bind_rdataset(search->rbtdb, node, foundsig, 4866 search->now, locktype, 4867 sigrdataset); 4868 } 4869 if (need_headerupdate(found, search->now) || 4870 (foundsig != NULL && 4871 need_headerupdate(foundsig, search->now))) 4872 { 4873 if (locktype != isc_rwlocktype_write) { 4874 NODE_UNLOCK(lock, locktype); 4875 NODE_LOCK(lock, isc_rwlocktype_write); 4876 locktype = isc_rwlocktype_write; 4877 POST(locktype); 4878 } 4879 if (need_headerupdate(found, search->now)) { 4880 update_header(search->rbtdb, found, 4881 search->now); 4882 } 4883 if (foundsig != NULL && 4884 need_headerupdate(foundsig, search->now)) 4885 { 4886 update_header(search->rbtdb, foundsig, 4887 search->now); 4888 } 4889 } 4890 } 4891 4892 node_exit: 4893 NODE_UNLOCK(lock, locktype); 4894 4895 if (found == NULL && i > 0) { 4896 i--; 4897 node = search->chain.levels[i]; 4898 } else { 4899 done = true; 4900 } 4901 } while (!done); 4902 4903 return (result); 4904 } 4905 4906 /* 4907 * Look for a potentially covering NSEC in the cache where `name` 4908 * is known not to exist. This uses the auxiliary NSEC tree to find 4909 * the potential NSEC owner. If found, we update 'foundname', 'nodep', 4910 * 'rdataset' and 'sigrdataset', and return DNS_R_COVERINGNSEC. 4911 * Otherwise, return ISC_R_NOTFOUND. 4912 */ 4913 static isc_result_t 4914 find_coveringnsec(rbtdb_search_t *search, const dns_name_t *name, 4915 dns_dbnode_t **nodep, isc_stdtime_t now, 4916 dns_name_t *foundname, dns_rdataset_t *rdataset, 4917 dns_rdataset_t *sigrdataset) { 4918 dns_fixedname_t fprefix, forigin, ftarget, fixed; 4919 dns_name_t *prefix = NULL, *origin = NULL; 4920 dns_name_t *target = NULL, *fname = NULL; 4921 dns_rbtnode_t *node = NULL; 4922 dns_rbtnodechain_t chain; 4923 isc_result_t result; 4924 isc_rwlocktype_t locktype; 4925 nodelock_t *lock = NULL; 4926 rbtdb_rdatatype_t matchtype, sigmatchtype; 4927 rdatasetheader_t *found = NULL, *foundsig = NULL; 4928 rdatasetheader_t *header = NULL; 4929 rdatasetheader_t *header_next = NULL, *header_prev = NULL; 4930 4931 /* 4932 * Look for the node in the auxilary tree. 4933 */ 4934 dns_rbtnodechain_init(&chain); 4935 target = dns_fixedname_initname(&ftarget); 4936 result = dns_rbt_findnode(search->rbtdb->nsec, name, target, &node, 4937 &chain, DNS_RBTFIND_EMPTYDATA, NULL, NULL); 4938 if (result != DNS_R_PARTIALMATCH) { 4939 dns_rbtnodechain_reset(&chain); 4940 return (ISC_R_NOTFOUND); 4941 } 4942 4943 prefix = dns_fixedname_initname(&fprefix); 4944 origin = dns_fixedname_initname(&forigin); 4945 target = dns_fixedname_initname(&ftarget); 4946 fname = dns_fixedname_initname(&fixed); 4947 4948 locktype = isc_rwlocktype_read; 4949 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0); 4950 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, 4951 dns_rdatatype_nsec); 4952 4953 /* 4954 * Extract predecessor from chain. 4955 */ 4956 result = dns_rbtnodechain_current(&chain, prefix, origin, NULL); 4957 dns_rbtnodechain_reset(&chain); 4958 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) { 4959 return (ISC_R_NOTFOUND); 4960 } 4961 4962 result = dns_name_concatenate(prefix, origin, target, NULL); 4963 if (result != ISC_R_SUCCESS) { 4964 return (ISC_R_NOTFOUND); 4965 } 4966 4967 /* 4968 * Lookup the predecessor in the main tree. 4969 */ 4970 node = NULL; 4971 result = dns_rbt_findnode(search->rbtdb->tree, target, fname, &node, 4972 NULL, DNS_RBTFIND_EMPTYDATA, NULL, NULL); 4973 if (result != ISC_R_SUCCESS) { 4974 return (ISC_R_NOTFOUND); 4975 } 4976 4977 lock = &(search->rbtdb->node_locks[node->locknum].lock); 4978 NODE_LOCK(lock, locktype); 4979 for (header = node->data; header != NULL; header = header_next) { 4980 header_next = header->next; 4981 if (check_stale_header(node, header, &locktype, lock, search, 4982 &header_prev)) 4983 { 4984 continue; 4985 } 4986 if (NONEXISTENT(header) || 4987 RBTDB_RDATATYPE_BASE(header->type) == 0) 4988 { 4989 header_prev = header; 4990 continue; 4991 } 4992 if (header->type == matchtype) { 4993 found = header; 4994 if (foundsig != NULL) { 4995 break; 4996 } 4997 } else if (header->type == sigmatchtype) { 4998 foundsig = header; 4999 if (found != NULL) { 5000 break; 5001 } 5002 } 5003 header_prev = header; 5004 } 5005 if (found != NULL) { 5006 bind_rdataset(search->rbtdb, node, found, now, locktype, 5007 rdataset); 5008 if (foundsig != NULL) { 5009 bind_rdataset(search->rbtdb, node, foundsig, now, 5010 locktype, sigrdataset); 5011 } 5012 new_reference(search->rbtdb, node, locktype); 5013 5014 dns_name_copy(fname, foundname); 5015 5016 *nodep = node; 5017 result = DNS_R_COVERINGNSEC; 5018 } else { 5019 result = ISC_R_NOTFOUND; 5020 } 5021 NODE_UNLOCK(lock, locktype); 5022 return (result); 5023 } 5024 5025 static isc_result_t 5026 cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, 5027 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now, 5028 dns_dbnode_t **nodep, dns_name_t *foundname, 5029 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) { 5030 dns_rbtnode_t *node = NULL; 5031 isc_result_t result; 5032 rbtdb_search_t search; 5033 bool cname_ok = true; 5034 bool found_noqname = false; 5035 bool all_negative = true; 5036 bool empty_node; 5037 nodelock_t *lock; 5038 isc_rwlocktype_t locktype; 5039 rdatasetheader_t *header, *header_prev, *header_next; 5040 rdatasetheader_t *found, *nsheader; 5041 rdatasetheader_t *foundsig, *nssig, *cnamesig; 5042 rdatasetheader_t *update, *updatesig; 5043 rdatasetheader_t *nsecheader, *nsecsig; 5044 rbtdb_rdatatype_t sigtype, negtype; 5045 5046 UNUSED(version); 5047 5048 search.rbtdb = (dns_rbtdb_t *)db; 5049 5050 REQUIRE(VALID_RBTDB(search.rbtdb)); 5051 REQUIRE(version == NULL); 5052 5053 if (now == 0) { 5054 isc_stdtime_get(&now); 5055 } 5056 5057 search.rbtversion = NULL; 5058 search.serial = 1; 5059 search.options = options; 5060 search.copy_name = false; 5061 search.need_cleanup = false; 5062 search.wild = false; 5063 search.zonecut = NULL; 5064 search.zonecut_rdataset = NULL; 5065 search.zonecut_sigrdataset = NULL; 5066 dns_fixedname_init(&search.zonecut_name); 5067 dns_rbtnodechain_init(&search.chain); 5068 search.now = now; 5069 update = NULL; 5070 updatesig = NULL; 5071 5072 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); 5073 5074 /* 5075 * Search down from the root of the tree. If, while going down, we 5076 * encounter a callback node, cache_zonecut_callback() will search the 5077 * rdatasets at the zone cut for a DNAME rdataset. 5078 */ 5079 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node, 5080 &search.chain, DNS_RBTFIND_EMPTYDATA, 5081 cache_zonecut_callback, &search); 5082 5083 if (result == DNS_R_PARTIALMATCH) { 5084 /* 5085 * If dns_rbt_findnode discovered a covering DNAME skip 5086 * looking for a covering NSEC. 5087 */ 5088 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && 5089 (search.zonecut_rdataset == NULL || 5090 search.zonecut_rdataset->type != dns_rdatatype_dname)) 5091 { 5092 result = find_coveringnsec(&search, name, nodep, now, 5093 foundname, rdataset, 5094 sigrdataset); 5095 if (result == DNS_R_COVERINGNSEC) { 5096 goto tree_exit; 5097 } 5098 } 5099 if (search.zonecut != NULL) { 5100 result = setup_delegation(&search, nodep, foundname, 5101 rdataset, sigrdataset); 5102 goto tree_exit; 5103 } else { 5104 find_ns: 5105 result = find_deepest_zonecut(&search, node, nodep, 5106 foundname, rdataset, 5107 sigrdataset); 5108 goto tree_exit; 5109 } 5110 } else if (result != ISC_R_SUCCESS) { 5111 goto tree_exit; 5112 } 5113 5114 /* 5115 * Certain DNSSEC types are not subject to CNAME matching 5116 * (RFC4035, section 2.5 and RFC3007). 5117 * 5118 * We don't check for RRSIG, because we don't store RRSIG records 5119 * directly. 5120 */ 5121 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) { 5122 cname_ok = false; 5123 } 5124 5125 /* 5126 * We now go looking for rdata... 5127 */ 5128 5129 lock = &(search.rbtdb->node_locks[node->locknum].lock); 5130 locktype = isc_rwlocktype_read; 5131 NODE_LOCK(lock, locktype); 5132 5133 found = NULL; 5134 foundsig = NULL; 5135 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type); 5136 negtype = RBTDB_RDATATYPE_VALUE(0, type); 5137 nsheader = NULL; 5138 nsecheader = NULL; 5139 nssig = NULL; 5140 nsecsig = NULL; 5141 cnamesig = NULL; 5142 empty_node = true; 5143 header_prev = NULL; 5144 for (header = node->data; header != NULL; header = header_next) { 5145 header_next = header->next; 5146 if (check_stale_header(node, header, &locktype, lock, &search, 5147 &header_prev)) 5148 { 5149 /* Do nothing. */ 5150 } else if (EXISTS(header) && !ANCIENT(header)) { 5151 /* 5152 * We now know that there is at least one active 5153 * non-stale rdataset at this node. 5154 */ 5155 empty_node = false; 5156 if (header->noqname != NULL && 5157 header->trust == dns_trust_secure) 5158 { 5159 found_noqname = true; 5160 } 5161 if (!NEGATIVE(header)) { 5162 all_negative = false; 5163 } 5164 5165 /* 5166 * If we found a type we were looking for, remember 5167 * it. 5168 */ 5169 if (header->type == type || 5170 (type == dns_rdatatype_any && 5171 RBTDB_RDATATYPE_BASE(header->type) != 0) || 5172 (cname_ok && header->type == dns_rdatatype_cname)) 5173 { 5174 /* 5175 * We've found the answer. 5176 */ 5177 found = header; 5178 if (header->type == dns_rdatatype_cname && 5179 cname_ok) 5180 { 5181 /* 5182 * If we've already got the 5183 * CNAME RRSIG, use it. 5184 */ 5185 if (cnamesig != NULL) { 5186 foundsig = cnamesig; 5187 } else { 5188 sigtype = 5189 RBTDB_RDATATYPE_SIGCNAME; 5190 } 5191 } 5192 } else if (header->type == sigtype) { 5193 /* 5194 * We've found the RRSIG rdataset for our 5195 * target type. Remember it. 5196 */ 5197 foundsig = header; 5198 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY || 5199 header->type == negtype) 5200 { 5201 /* 5202 * We've found a negative cache entry. 5203 */ 5204 found = header; 5205 } else if (header->type == dns_rdatatype_ns) { 5206 /* 5207 * Remember a NS rdataset even if we're 5208 * not specifically looking for it, because 5209 * we might need it later. 5210 */ 5211 nsheader = header; 5212 } else if (header->type == RBTDB_RDATATYPE_SIGNS) { 5213 /* 5214 * If we need the NS rdataset, we'll also 5215 * need its signature. 5216 */ 5217 nssig = header; 5218 } else if (header->type == dns_rdatatype_nsec) { 5219 nsecheader = header; 5220 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) { 5221 nsecsig = header; 5222 } else if (cname_ok && 5223 header->type == RBTDB_RDATATYPE_SIGCNAME) 5224 { 5225 /* 5226 * If we get a CNAME match, we'll also need 5227 * its signature. 5228 */ 5229 cnamesig = header; 5230 } 5231 header_prev = header; 5232 } else { 5233 header_prev = header; 5234 } 5235 } 5236 5237 if (empty_node) { 5238 /* 5239 * We have an exact match for the name, but there are no 5240 * extant rdatasets. That means that this node doesn't 5241 * meaningfully exist, and that we really have a partial match. 5242 */ 5243 NODE_UNLOCK(lock, locktype); 5244 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) { 5245 result = find_coveringnsec(&search, name, nodep, now, 5246 foundname, rdataset, 5247 sigrdataset); 5248 if (result == DNS_R_COVERINGNSEC) { 5249 goto tree_exit; 5250 } 5251 } 5252 goto find_ns; 5253 } 5254 5255 /* 5256 * If we didn't find what we were looking for... 5257 */ 5258 if (found == NULL || 5259 (DNS_TRUST_ADDITIONAL(found->trust) && 5260 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) || 5261 (found->trust == dns_trust_glue && 5262 ((options & DNS_DBFIND_GLUEOK) == 0)) || 5263 (DNS_TRUST_PENDING(found->trust) && 5264 ((options & DNS_DBFIND_PENDINGOK) == 0))) 5265 { 5266 /* 5267 * Return covering NODATA NSEC record. 5268 */ 5269 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && 5270 nsecheader != NULL) 5271 { 5272 if (nodep != NULL) { 5273 new_reference(search.rbtdb, node, locktype); 5274 *nodep = node; 5275 } 5276 bind_rdataset(search.rbtdb, node, nsecheader, 5277 search.now, locktype, rdataset); 5278 if (need_headerupdate(nsecheader, search.now)) { 5279 update = nsecheader; 5280 } 5281 if (nsecsig != NULL) { 5282 bind_rdataset(search.rbtdb, node, nsecsig, 5283 search.now, locktype, 5284 sigrdataset); 5285 if (need_headerupdate(nsecsig, search.now)) { 5286 updatesig = nsecsig; 5287 } 5288 } 5289 result = DNS_R_COVERINGNSEC; 5290 goto node_exit; 5291 } 5292 5293 /* 5294 * This name was from a wild card. Look for a covering NSEC. 5295 */ 5296 if (found == NULL && (found_noqname || all_negative) && 5297 (search.options & DNS_DBFIND_COVERINGNSEC) != 0) 5298 { 5299 NODE_UNLOCK(lock, locktype); 5300 result = find_coveringnsec(&search, name, nodep, now, 5301 foundname, rdataset, 5302 sigrdataset); 5303 if (result == DNS_R_COVERINGNSEC) { 5304 goto tree_exit; 5305 } 5306 goto find_ns; 5307 } 5308 5309 /* 5310 * If there is an NS rdataset at this node, then this is the 5311 * deepest zone cut. 5312 */ 5313 if (nsheader != NULL) { 5314 if (nodep != NULL) { 5315 new_reference(search.rbtdb, node, locktype); 5316 *nodep = node; 5317 } 5318 bind_rdataset(search.rbtdb, node, nsheader, search.now, 5319 locktype, rdataset); 5320 if (need_headerupdate(nsheader, search.now)) { 5321 update = nsheader; 5322 } 5323 if (nssig != NULL) { 5324 bind_rdataset(search.rbtdb, node, nssig, 5325 search.now, locktype, 5326 sigrdataset); 5327 if (need_headerupdate(nssig, search.now)) { 5328 updatesig = nssig; 5329 } 5330 } 5331 result = DNS_R_DELEGATION; 5332 goto node_exit; 5333 } 5334 5335 /* 5336 * Go find the deepest zone cut. 5337 */ 5338 NODE_UNLOCK(lock, locktype); 5339 goto find_ns; 5340 } 5341 5342 /* 5343 * We found what we were looking for, or we found a CNAME. 5344 */ 5345 5346 if (nodep != NULL) { 5347 new_reference(search.rbtdb, node, locktype); 5348 *nodep = node; 5349 } 5350 5351 if (NEGATIVE(found)) { 5352 /* 5353 * We found a negative cache entry. 5354 */ 5355 if (NXDOMAIN(found)) { 5356 result = DNS_R_NCACHENXDOMAIN; 5357 } else { 5358 result = DNS_R_NCACHENXRRSET; 5359 } 5360 } else if (type != found->type && type != dns_rdatatype_any && 5361 found->type == dns_rdatatype_cname) 5362 { 5363 /* 5364 * We weren't doing an ANY query and we found a CNAME instead 5365 * of the type we were looking for, so we need to indicate 5366 * that result to the caller. 5367 */ 5368 result = DNS_R_CNAME; 5369 } else { 5370 /* 5371 * An ordinary successful query! 5372 */ 5373 result = ISC_R_SUCCESS; 5374 } 5375 5376 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN || 5377 result == DNS_R_NCACHENXRRSET) 5378 { 5379 bind_rdataset(search.rbtdb, node, found, search.now, locktype, 5380 rdataset); 5381 if (need_headerupdate(found, search.now)) { 5382 update = found; 5383 } 5384 if (!NEGATIVE(found) && foundsig != NULL) { 5385 bind_rdataset(search.rbtdb, node, foundsig, search.now, 5386 locktype, sigrdataset); 5387 if (need_headerupdate(foundsig, search.now)) { 5388 updatesig = foundsig; 5389 } 5390 } 5391 } 5392 5393 node_exit: 5394 if ((update != NULL || updatesig != NULL) && 5395 locktype != isc_rwlocktype_write) 5396 { 5397 NODE_UNLOCK(lock, locktype); 5398 NODE_LOCK(lock, isc_rwlocktype_write); 5399 locktype = isc_rwlocktype_write; 5400 POST(locktype); 5401 } 5402 if (update != NULL && need_headerupdate(update, search.now)) { 5403 update_header(search.rbtdb, update, search.now); 5404 } 5405 if (updatesig != NULL && need_headerupdate(updatesig, search.now)) { 5406 update_header(search.rbtdb, updatesig, search.now); 5407 } 5408 5409 NODE_UNLOCK(lock, locktype); 5410 5411 tree_exit: 5412 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); 5413 5414 /* 5415 * If we found a zonecut but aren't going to use it, we have to 5416 * let go of it. 5417 */ 5418 if (search.need_cleanup) { 5419 node = search.zonecut; 5420 INSIST(node != NULL); 5421 lock = &(search.rbtdb->node_locks[node->locknum].lock); 5422 5423 NODE_LOCK(lock, isc_rwlocktype_read); 5424 decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read, 5425 isc_rwlocktype_none, false); 5426 NODE_UNLOCK(lock, isc_rwlocktype_read); 5427 } 5428 5429 dns_rbtnodechain_reset(&search.chain); 5430 5431 update_cachestats(search.rbtdb, result); 5432 return (result); 5433 } 5434 5435 static isc_result_t 5436 cache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options, 5437 isc_stdtime_t now, dns_dbnode_t **nodep, 5438 dns_name_t *foundname, dns_name_t *dcname, 5439 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) { 5440 dns_rbtnode_t *node = NULL; 5441 nodelock_t *lock; 5442 isc_result_t result; 5443 rbtdb_search_t search; 5444 rdatasetheader_t *header, *header_prev, *header_next; 5445 rdatasetheader_t *found, *foundsig; 5446 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA; 5447 isc_rwlocktype_t locktype; 5448 bool dcnull = (dcname == NULL); 5449 5450 search.rbtdb = (dns_rbtdb_t *)db; 5451 5452 REQUIRE(VALID_RBTDB(search.rbtdb)); 5453 5454 if (now == 0) { 5455 isc_stdtime_get(&now); 5456 } 5457 5458 search.rbtversion = NULL; 5459 search.serial = 1; 5460 search.options = options; 5461 search.copy_name = false; 5462 search.need_cleanup = false; 5463 search.wild = false; 5464 search.zonecut = NULL; 5465 dns_fixedname_init(&search.zonecut_name); 5466 dns_rbtnodechain_init(&search.chain); 5467 search.now = now; 5468 5469 if (dcnull) { 5470 dcname = foundname; 5471 } 5472 5473 if ((options & DNS_DBFIND_NOEXACT) != 0) { 5474 rbtoptions |= DNS_RBTFIND_NOEXACT; 5475 } 5476 5477 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); 5478 5479 /* 5480 * Search down from the root of the tree. 5481 */ 5482 result = dns_rbt_findnode(search.rbtdb->tree, name, dcname, &node, 5483 &search.chain, rbtoptions, NULL, &search); 5484 5485 if (result == DNS_R_PARTIALMATCH) { 5486 result = find_deepest_zonecut(&search, node, nodep, foundname, 5487 rdataset, sigrdataset); 5488 goto tree_exit; 5489 } else if (result != ISC_R_SUCCESS) { 5490 goto tree_exit; 5491 } else if (!dcnull) { 5492 dns_name_copy(dcname, foundname); 5493 } 5494 5495 /* 5496 * We now go looking for an NS rdataset at the node. 5497 */ 5498 5499 lock = &(search.rbtdb->node_locks[node->locknum].lock); 5500 locktype = isc_rwlocktype_read; 5501 NODE_LOCK(lock, locktype); 5502 5503 found = NULL; 5504 foundsig = NULL; 5505 header_prev = NULL; 5506 for (header = node->data; header != NULL; header = header_next) { 5507 header_next = header->next; 5508 if (check_stale_header(node, header, &locktype, lock, &search, 5509 &header_prev)) 5510 { 5511 /* 5512 * The function dns_rbt_findnode found us the a matching 5513 * node for 'name' and stored the result in 'dcname'. 5514 * This is the deepest known zonecut in our database. 5515 * However, this node may be stale and if serve-stale 5516 * is not enabled (in other words 'stale-answer-enable' 5517 * is set to no), this node may not be used as a 5518 * zonecut we know about. If so, find the deepest 5519 * zonecut from this node up and return that instead. 5520 */ 5521 NODE_UNLOCK(lock, locktype); 5522 result = find_deepest_zonecut(&search, node, nodep, 5523 foundname, rdataset, 5524 sigrdataset); 5525 dns_name_copy(foundname, dcname); 5526 goto tree_exit; 5527 } else if (EXISTS(header) && !ANCIENT(header)) { 5528 /* 5529 * If we found a type we were looking for, remember 5530 * it. 5531 */ 5532 if (header->type == dns_rdatatype_ns) { 5533 /* 5534 * Remember a NS rdataset even if we're 5535 * not specifically looking for it, because 5536 * we might need it later. 5537 */ 5538 found = header; 5539 } else if (header->type == RBTDB_RDATATYPE_SIGNS) { 5540 /* 5541 * If we need the NS rdataset, we'll also 5542 * need its signature. 5543 */ 5544 foundsig = header; 5545 } 5546 header_prev = header; 5547 } else { 5548 header_prev = header; 5549 } 5550 } 5551 5552 if (found == NULL) { 5553 /* 5554 * No NS records here. 5555 */ 5556 NODE_UNLOCK(lock, locktype); 5557 result = find_deepest_zonecut(&search, node, nodep, foundname, 5558 rdataset, sigrdataset); 5559 goto tree_exit; 5560 } 5561 5562 if (nodep != NULL) { 5563 new_reference(search.rbtdb, node, locktype); 5564 *nodep = node; 5565 } 5566 5567 bind_rdataset(search.rbtdb, node, found, search.now, locktype, 5568 rdataset); 5569 if (foundsig != NULL) { 5570 bind_rdataset(search.rbtdb, node, foundsig, search.now, 5571 locktype, sigrdataset); 5572 } 5573 5574 if (need_headerupdate(found, search.now) || 5575 (foundsig != NULL && need_headerupdate(foundsig, search.now))) 5576 { 5577 if (locktype != isc_rwlocktype_write) { 5578 NODE_UNLOCK(lock, locktype); 5579 NODE_LOCK(lock, isc_rwlocktype_write); 5580 locktype = isc_rwlocktype_write; 5581 POST(locktype); 5582 } 5583 if (need_headerupdate(found, search.now)) { 5584 update_header(search.rbtdb, found, search.now); 5585 } 5586 if (foundsig != NULL && need_headerupdate(foundsig, search.now)) 5587 { 5588 update_header(search.rbtdb, foundsig, search.now); 5589 } 5590 } 5591 5592 NODE_UNLOCK(lock, locktype); 5593 5594 tree_exit: 5595 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); 5596 5597 INSIST(!search.need_cleanup); 5598 5599 dns_rbtnodechain_reset(&search.chain); 5600 5601 if (result == DNS_R_DELEGATION) { 5602 result = ISC_R_SUCCESS; 5603 } 5604 5605 return (result); 5606 } 5607 5608 static void 5609 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) { 5610 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5611 dns_rbtnode_t *node = (dns_rbtnode_t *)source; 5612 5613 REQUIRE(VALID_RBTDB(rbtdb)); 5614 REQUIRE(targetp != NULL && *targetp == NULL); 5615 5616 isc_refcount_increment(&node->references); 5617 5618 *targetp = source; 5619 } 5620 5621 static void 5622 detachnode(dns_db_t *db, dns_dbnode_t **targetp) { 5623 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5624 dns_rbtnode_t *node; 5625 bool want_free = false; 5626 bool inactive = false; 5627 rbtdb_nodelock_t *nodelock; 5628 5629 REQUIRE(VALID_RBTDB(rbtdb)); 5630 REQUIRE(targetp != NULL && *targetp != NULL); 5631 5632 node = (dns_rbtnode_t *)(*targetp); 5633 nodelock = &rbtdb->node_locks[node->locknum]; 5634 5635 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read); 5636 5637 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read, 5638 isc_rwlocktype_none, false)) 5639 { 5640 if (isc_refcount_current(&nodelock->references) == 0 && 5641 nodelock->exiting) 5642 { 5643 inactive = true; 5644 } 5645 } 5646 5647 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read); 5648 5649 *targetp = NULL; 5650 5651 if (inactive) { 5652 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 5653 rbtdb->active--; 5654 if (rbtdb->active == 0) { 5655 want_free = true; 5656 } 5657 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 5658 if (want_free) { 5659 char buf[DNS_NAME_FORMATSIZE]; 5660 if (dns_name_dynamic(&rbtdb->common.origin)) { 5661 dns_name_format(&rbtdb->common.origin, buf, 5662 sizeof(buf)); 5663 } else { 5664 strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 5665 } 5666 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 5667 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 5668 "calling free_rbtdb(%s)", buf); 5669 free_rbtdb(rbtdb, true, NULL); 5670 } 5671 } 5672 } 5673 5674 static isc_result_t 5675 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) { 5676 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5677 dns_rbtnode_t *rbtnode = node; 5678 rdatasetheader_t *header; 5679 bool force_expire = false; 5680 /* 5681 * These are the category and module used by the cache cleaner. 5682 */ 5683 bool log = false; 5684 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE; 5685 isc_logmodule_t *module = DNS_LOGMODULE_CACHE; 5686 int level = ISC_LOG_DEBUG(2); 5687 char printname[DNS_NAME_FORMATSIZE]; 5688 5689 REQUIRE(VALID_RBTDB(rbtdb)); 5690 5691 /* 5692 * Caller must hold a tree lock. 5693 */ 5694 5695 if (now == 0) { 5696 isc_stdtime_get(&now); 5697 } 5698 5699 if (isc_mem_isovermem(rbtdb->common.mctx)) { 5700 /* 5701 * Force expire with 25% probability. 5702 * XXXDCL Could stand to have a better policy, like LRU. 5703 */ 5704 force_expire = (rbtnode->down == NULL && 5705 (isc_random32() % 4) == 0); 5706 5707 /* 5708 * Note that 'log' can be true IFF overmem is also true. 5709 * overmem can currently only be true for cache 5710 * databases -- hence all of the "overmem cache" log strings. 5711 */ 5712 log = isc_log_wouldlog(dns_lctx, level); 5713 if (log) { 5714 isc_log_write( 5715 dns_lctx, category, module, level, 5716 "overmem cache: %s %s", 5717 force_expire ? "FORCE" : "check", 5718 dns_rbt_formatnodename(rbtnode, printname, 5719 sizeof(printname))); 5720 } 5721 } 5722 5723 /* 5724 * We may not need write access, but this code path is not performance 5725 * sensitive, so it should be okay to always lock as a writer. 5726 */ 5727 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 5728 isc_rwlocktype_write); 5729 5730 for (header = rbtnode->data; header != NULL; header = header->next) { 5731 if (header->rdh_ttl + STALE_TTL(header, rbtdb) <= 5732 now - RBTDB_VIRTUAL) 5733 { 5734 /* 5735 * We don't check if refcurrent(rbtnode) == 0 and try 5736 * to free like we do in cache_find(), because 5737 * refcurrent(rbtnode) must be non-zero. This is so 5738 * because 'node' is an argument to the function. 5739 */ 5740 mark_header_ancient(rbtdb, header); 5741 if (log) { 5742 isc_log_write(dns_lctx, category, module, level, 5743 "overmem cache: ancient %s", 5744 printname); 5745 } 5746 } else if (force_expire) { 5747 if (!RETAIN(header)) { 5748 set_ttl(rbtdb, header, 0); 5749 mark_header_ancient(rbtdb, header); 5750 } else if (log) { 5751 isc_log_write(dns_lctx, category, module, level, 5752 "overmem cache: " 5753 "reprieve by RETAIN() %s", 5754 printname); 5755 } 5756 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log) { 5757 isc_log_write(dns_lctx, category, module, level, 5758 "overmem cache: saved %s", printname); 5759 } 5760 } 5761 5762 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 5763 isc_rwlocktype_write); 5764 5765 return (ISC_R_SUCCESS); 5766 } 5767 5768 static void 5769 overmem(dns_db_t *db, bool over) { 5770 /* This is an empty callback. See adb.c:water() */ 5771 5772 UNUSED(db); 5773 UNUSED(over); 5774 5775 return; 5776 } 5777 5778 static void 5779 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) { 5780 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5781 dns_rbtnode_t *rbtnode = node; 5782 bool first; 5783 uint32_t refs; 5784 5785 REQUIRE(VALID_RBTDB(rbtdb)); 5786 5787 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 5788 isc_rwlocktype_read); 5789 5790 refs = isc_refcount_current(&rbtnode->references); 5791 fprintf(out, "node %p, %" PRIu32 " references, locknum = %u\n", rbtnode, 5792 refs, rbtnode->locknum); 5793 if (rbtnode->data != NULL) { 5794 rdatasetheader_t *current, *top_next; 5795 5796 for (current = rbtnode->data; current != NULL; 5797 current = top_next) 5798 { 5799 top_next = current->next; 5800 first = true; 5801 fprintf(out, "\ttype %u", current->type); 5802 do { 5803 uint_least16_t attributes = atomic_load_acquire( 5804 ¤t->attributes); 5805 if (!first) { 5806 fprintf(out, "\t"); 5807 } 5808 first = false; 5809 fprintf(out, 5810 "\tserial = %lu, ttl = %u, " 5811 "trust = %u, attributes = %" PRIuLEAST16 5812 ", " 5813 "resign = %u\n", 5814 (unsigned long)current->serial, 5815 current->rdh_ttl, current->trust, 5816 attributes, 5817 (current->resign << 1) | 5818 current->resign_lsb); 5819 current = current->down; 5820 } while (current != NULL); 5821 } 5822 } else { 5823 fprintf(out, "(empty)\n"); 5824 } 5825 5826 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 5827 isc_rwlocktype_read); 5828 } 5829 5830 static isc_result_t 5831 createiterator(dns_db_t *db, unsigned int options, 5832 dns_dbiterator_t **iteratorp) { 5833 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5834 rbtdb_dbiterator_t *rbtdbiter; 5835 5836 REQUIRE(VALID_RBTDB(rbtdb)); 5837 REQUIRE((options & (DNS_DB_NSEC3ONLY | DNS_DB_NONSEC3)) != 5838 (DNS_DB_NSEC3ONLY | DNS_DB_NONSEC3)); 5839 5840 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter)); 5841 5842 rbtdbiter->common.methods = &dbiterator_methods; 5843 rbtdbiter->common.db = NULL; 5844 dns_db_attach(db, &rbtdbiter->common.db); 5845 rbtdbiter->common.relative_names = ((options & DNS_DB_RELATIVENAMES) != 5846 0); 5847 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC; 5848 rbtdbiter->common.cleaning = false; 5849 rbtdbiter->paused = true; 5850 rbtdbiter->tree_locked = isc_rwlocktype_none; 5851 rbtdbiter->result = ISC_R_SUCCESS; 5852 dns_fixedname_init(&rbtdbiter->name); 5853 dns_fixedname_init(&rbtdbiter->origin); 5854 rbtdbiter->node = NULL; 5855 rbtdbiter->delcnt = 0; 5856 if ((options & DNS_DB_NSEC3ONLY) != 0) { 5857 rbtdbiter->nsec3mode = nsec3only; 5858 } else if ((options & DNS_DB_NONSEC3) != 0) { 5859 rbtdbiter->nsec3mode = nonsec3; 5860 } else { 5861 rbtdbiter->nsec3mode = full; 5862 } 5863 5864 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions)); 5865 dns_rbtnodechain_init(&rbtdbiter->chain); 5866 dns_rbtnodechain_init(&rbtdbiter->nsec3chain); 5867 if (rbtdbiter->nsec3mode == nsec3only) { 5868 rbtdbiter->current = &rbtdbiter->nsec3chain; 5869 } else { 5870 rbtdbiter->current = &rbtdbiter->chain; 5871 } 5872 5873 *iteratorp = (dns_dbiterator_t *)rbtdbiter; 5874 5875 return (ISC_R_SUCCESS); 5876 } 5877 5878 static isc_result_t 5879 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 5880 dns_rdatatype_t type, dns_rdatatype_t covers, 5881 isc_stdtime_t now, dns_rdataset_t *rdataset, 5882 dns_rdataset_t *sigrdataset) { 5883 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5884 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 5885 rdatasetheader_t *header, *header_next, *found, *foundsig; 5886 rbtdb_serial_t serial; 5887 rbtdb_version_t *rbtversion = version; 5888 bool close_version = false; 5889 rbtdb_rdatatype_t matchtype, sigmatchtype; 5890 5891 REQUIRE(VALID_RBTDB(rbtdb)); 5892 REQUIRE(type != dns_rdatatype_any); 5893 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 5894 5895 if (rbtversion == NULL) { 5896 currentversion(db, (dns_dbversion_t **)(void *)(&rbtversion)); 5897 close_version = true; 5898 } 5899 serial = rbtversion->serial; 5900 now = 0; 5901 5902 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 5903 isc_rwlocktype_read); 5904 5905 found = NULL; 5906 foundsig = NULL; 5907 matchtype = RBTDB_RDATATYPE_VALUE(type, covers); 5908 if (covers == 0) { 5909 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type); 5910 } else { 5911 sigmatchtype = 0; 5912 } 5913 5914 for (header = rbtnode->data; header != NULL; header = header_next) { 5915 header_next = header->next; 5916 do { 5917 if (header->serial <= serial && !IGNORE(header)) { 5918 /* 5919 * Is this a "this rdataset doesn't 5920 * exist" record? 5921 */ 5922 if (NONEXISTENT(header)) { 5923 header = NULL; 5924 } 5925 break; 5926 } else { 5927 header = header->down; 5928 } 5929 } while (header != NULL); 5930 if (header != NULL) { 5931 /* 5932 * We have an active, extant rdataset. If it's a 5933 * type we're looking for, remember it. 5934 */ 5935 if (header->type == matchtype) { 5936 found = header; 5937 if (foundsig != NULL) { 5938 break; 5939 } 5940 } else if (header->type == sigmatchtype) { 5941 foundsig = header; 5942 if (found != NULL) { 5943 break; 5944 } 5945 } 5946 } 5947 } 5948 if (found != NULL) { 5949 bind_rdataset(rbtdb, rbtnode, found, now, isc_rwlocktype_read, 5950 rdataset); 5951 if (foundsig != NULL) { 5952 bind_rdataset(rbtdb, rbtnode, foundsig, now, 5953 isc_rwlocktype_read, sigrdataset); 5954 } 5955 } 5956 5957 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 5958 isc_rwlocktype_read); 5959 5960 if (close_version) { 5961 closeversion(db, (dns_dbversion_t **)(void *)(&rbtversion), 5962 false); 5963 } 5964 5965 if (found == NULL) { 5966 return (ISC_R_NOTFOUND); 5967 } 5968 5969 return (ISC_R_SUCCESS); 5970 } 5971 5972 static isc_result_t 5973 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 5974 dns_rdatatype_t type, dns_rdatatype_t covers, 5975 isc_stdtime_t now, dns_rdataset_t *rdataset, 5976 dns_rdataset_t *sigrdataset) { 5977 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5978 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 5979 rdatasetheader_t *header, *header_next, *found, *foundsig; 5980 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype; 5981 isc_result_t result; 5982 nodelock_t *lock; 5983 isc_rwlocktype_t locktype; 5984 5985 REQUIRE(VALID_RBTDB(rbtdb)); 5986 REQUIRE(type != dns_rdatatype_any); 5987 5988 UNUSED(version); 5989 5990 result = ISC_R_SUCCESS; 5991 5992 if (now == 0) { 5993 isc_stdtime_get(&now); 5994 } 5995 5996 lock = &rbtdb->node_locks[rbtnode->locknum].lock; 5997 locktype = isc_rwlocktype_read; 5998 NODE_LOCK(lock, locktype); 5999 6000 found = NULL; 6001 foundsig = NULL; 6002 matchtype = RBTDB_RDATATYPE_VALUE(type, covers); 6003 negtype = RBTDB_RDATATYPE_VALUE(0, type); 6004 if (covers == 0) { 6005 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type); 6006 } else { 6007 sigmatchtype = 0; 6008 } 6009 6010 for (header = rbtnode->data; header != NULL; header = header_next) { 6011 header_next = header->next; 6012 if (!ACTIVE(header, now)) { 6013 if ((header->rdh_ttl + STALE_TTL(header, rbtdb) < 6014 now - RBTDB_VIRTUAL) && 6015 (locktype == isc_rwlocktype_write || 6016 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) 6017 { 6018 /* 6019 * We update the node's status only when we 6020 * can get write access. 6021 */ 6022 locktype = isc_rwlocktype_write; 6023 6024 /* 6025 * We don't check if refcurrent(rbtnode) == 0 6026 * and try to free like we do in cache_find(), 6027 * because refcurrent(rbtnode) must be 6028 * non-zero. This is so because 'node' is an 6029 * argument to the function. 6030 */ 6031 mark_header_ancient(rbtdb, header); 6032 } 6033 } else if (EXISTS(header) && !ANCIENT(header)) { 6034 if (header->type == matchtype) { 6035 found = header; 6036 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY || 6037 header->type == negtype) 6038 { 6039 found = header; 6040 } else if (header->type == sigmatchtype) { 6041 foundsig = header; 6042 } 6043 } 6044 } 6045 if (found != NULL) { 6046 bind_rdataset(rbtdb, rbtnode, found, now, locktype, rdataset); 6047 if (!NEGATIVE(found) && foundsig != NULL) { 6048 bind_rdataset(rbtdb, rbtnode, foundsig, now, locktype, 6049 sigrdataset); 6050 } 6051 } 6052 6053 NODE_UNLOCK(lock, locktype); 6054 6055 if (found == NULL) { 6056 return (ISC_R_NOTFOUND); 6057 } 6058 6059 if (NEGATIVE(found)) { 6060 /* 6061 * We found a negative cache entry. 6062 */ 6063 if (NXDOMAIN(found)) { 6064 result = DNS_R_NCACHENXDOMAIN; 6065 } else { 6066 result = DNS_R_NCACHENXRRSET; 6067 } 6068 } 6069 6070 update_cachestats(rbtdb, result); 6071 6072 return (result); 6073 } 6074 6075 static isc_result_t 6076 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 6077 unsigned int options, isc_stdtime_t now, 6078 dns_rdatasetiter_t **iteratorp) { 6079 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 6080 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 6081 rbtdb_version_t *rbtversion = version; 6082 rbtdb_rdatasetiter_t *iterator; 6083 6084 REQUIRE(VALID_RBTDB(rbtdb)); 6085 6086 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator)); 6087 6088 if ((db->attributes & DNS_DBATTR_CACHE) == 0) { 6089 now = 0; 6090 if (rbtversion == NULL) { 6091 currentversion( 6092 db, (dns_dbversion_t **)(void *)(&rbtversion)); 6093 } else { 6094 INSIST(rbtversion->rbtdb == rbtdb); 6095 6096 (void)isc_refcount_increment(&rbtversion->references); 6097 } 6098 } else { 6099 if (now == 0) { 6100 isc_stdtime_get(&now); 6101 } 6102 rbtversion = NULL; 6103 } 6104 6105 iterator->common.magic = DNS_RDATASETITER_MAGIC; 6106 iterator->common.methods = &rdatasetiter_methods; 6107 iterator->common.db = db; 6108 iterator->common.node = node; 6109 iterator->common.version = (dns_dbversion_t *)rbtversion; 6110 iterator->common.options = options; 6111 iterator->common.now = now; 6112 6113 isc_refcount_increment(&rbtnode->references); 6114 6115 iterator->current = NULL; 6116 6117 *iteratorp = (dns_rdatasetiter_t *)iterator; 6118 6119 return (ISC_R_SUCCESS); 6120 } 6121 6122 static bool 6123 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) { 6124 rdatasetheader_t *header, *header_next; 6125 bool cname = false, other_data = false; 6126 dns_rdatatype_t rdtype; 6127 6128 /* 6129 * The caller must hold the node lock. 6130 */ 6131 6132 /* 6133 * Look for CNAME and "other data" rdatasets active in our version. 6134 */ 6135 for (header = node->data; header != NULL; header = header_next) { 6136 header_next = header->next; 6137 if (!prio_type(header->type)) { 6138 /* 6139 * CNAME is in the priority list, so if we are done 6140 * with the priority list, we know there will not be 6141 * CNAME, so we are safe to skip the rest of the types. 6142 */ 6143 return (false); 6144 } 6145 if (header->type == dns_rdatatype_cname) { 6146 /* 6147 * Look for an active extant CNAME. 6148 */ 6149 do { 6150 if (header->serial <= serial && !IGNORE(header)) 6151 { 6152 /* 6153 * Is this a "this rdataset doesn't 6154 * exist" record? 6155 */ 6156 if (NONEXISTENT(header)) { 6157 header = NULL; 6158 } 6159 break; 6160 } else { 6161 header = header->down; 6162 } 6163 } while (header != NULL); 6164 if (header != NULL) { 6165 cname = true; 6166 } 6167 } else { 6168 /* 6169 * Look for active extant "other data". 6170 * 6171 * "Other data" is any rdataset whose type is not 6172 * KEY, NSEC, SIG or RRSIG. 6173 */ 6174 rdtype = RBTDB_RDATATYPE_BASE(header->type); 6175 if (rdtype != dns_rdatatype_key && 6176 rdtype != dns_rdatatype_sig && 6177 rdtype != dns_rdatatype_nsec && 6178 rdtype != dns_rdatatype_rrsig) 6179 { 6180 /* 6181 * Is it active and extant? 6182 */ 6183 do { 6184 if (header->serial <= serial && 6185 !IGNORE(header)) 6186 { 6187 /* 6188 * Is this a "this rdataset 6189 * doesn't exist" record? 6190 */ 6191 if (NONEXISTENT(header)) { 6192 header = NULL; 6193 } 6194 break; 6195 } else { 6196 header = header->down; 6197 } 6198 } while (header != NULL); 6199 if (header != NULL) { 6200 other_data = true; 6201 } 6202 } 6203 } 6204 if (cname && other_data) { 6205 return (true); 6206 } 6207 } 6208 6209 return (false); 6210 } 6211 6212 static void 6213 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) { 6214 INSIST(!IS_CACHE(rbtdb)); 6215 INSIST(newheader->heap_index == 0); 6216 INSIST(!ISC_LINK_LINKED(newheader, link)); 6217 6218 isc_heap_insert(rbtdb->heaps[idx], newheader); 6219 } 6220 6221 /* 6222 * node write lock must be held. 6223 */ 6224 static void 6225 resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version, 6226 rdatasetheader_t *header) { 6227 /* 6228 * Remove the old header from the heap 6229 */ 6230 if (header != NULL && header->heap_index != 0) { 6231 isc_heap_delete(rbtdb->heaps[header->node->locknum], 6232 header->heap_index); 6233 header->heap_index = 0; 6234 if (version != NULL) { 6235 new_reference(rbtdb, header->node, 6236 isc_rwlocktype_write); 6237 ISC_LIST_APPEND(version->resigned_list, header, link); 6238 } 6239 } 6240 } 6241 6242 static uint64_t 6243 recordsize(rdatasetheader_t *header, unsigned int namelen) { 6244 return (dns_rdataslab_rdatasize((unsigned char *)header, 6245 sizeof(*header)) + 6246 sizeof(dns_ttl_t) + sizeof(dns_rdatatype_t) + 6247 sizeof(dns_rdataclass_t) + namelen); 6248 } 6249 6250 static void 6251 update_recordsandxfrsize(bool add, rbtdb_version_t *rbtversion, 6252 rdatasetheader_t *header, unsigned int namelen) { 6253 unsigned char *hdr = (unsigned char *)header; 6254 size_t hdrsize = sizeof(*header); 6255 6256 RWLOCK(&rbtversion->rwlock, isc_rwlocktype_write); 6257 if (add) { 6258 rbtversion->records += dns_rdataslab_count(hdr, hdrsize); 6259 rbtversion->xfrsize += recordsize(header, namelen); 6260 } else { 6261 rbtversion->records -= dns_rdataslab_count(hdr, hdrsize); 6262 rbtversion->xfrsize -= recordsize(header, namelen); 6263 } 6264 RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_write); 6265 } 6266 6267 static bool 6268 overmaxtype(dns_rbtdb_t *rbtdb, uint32_t ntypes) { 6269 if (rbtdb->maxtypepername == 0) { 6270 return (false); 6271 } 6272 6273 return (ntypes >= rbtdb->maxtypepername); 6274 } 6275 6276 static bool 6277 prio_header(rdatasetheader_t *header) { 6278 if (NEGATIVE(header) && prio_type(RBTDB_RDATATYPE_EXT(header->type))) { 6279 return (true); 6280 } 6281 6282 return (prio_type(header->type)); 6283 } 6284 6285 /* 6286 * write lock on rbtnode must be held. 6287 */ 6288 static isc_result_t 6289 add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, const dns_name_t *nodename, 6290 rbtdb_version_t *rbtversion, rdatasetheader_t *newheader, 6291 unsigned int options, bool loading, dns_rdataset_t *addedrdataset, 6292 isc_stdtime_t now) { 6293 rbtdb_changed_t *changed = NULL; 6294 rdatasetheader_t *topheader = NULL, *topheader_prev = NULL; 6295 rdatasetheader_t *header = NULL, *sigheader = NULL; 6296 rdatasetheader_t *prioheader = NULL, *expireheader = NULL; 6297 unsigned char *merged = NULL; 6298 isc_result_t result; 6299 bool header_nx; 6300 bool newheader_nx; 6301 bool merge; 6302 dns_rdatatype_t rdtype, covers; 6303 rbtdb_rdatatype_t negtype, sigtype; 6304 dns_trust_t trust; 6305 int idx; 6306 uint32_t ntypes = 0; 6307 6308 /* 6309 * Add an rdatasetheader_t to a node. 6310 */ 6311 6312 /* 6313 * Caller must be holding the node lock. 6314 */ 6315 6316 if ((options & DNS_DBADD_MERGE) != 0) { 6317 REQUIRE(rbtversion != NULL); 6318 merge = true; 6319 } else { 6320 merge = false; 6321 } 6322 6323 if ((options & DNS_DBADD_FORCE) != 0) { 6324 trust = dns_trust_ultimate; 6325 } else { 6326 trust = newheader->trust; 6327 } 6328 6329 if (rbtversion != NULL && !loading) { 6330 /* 6331 * We always add a changed record, even if no changes end up 6332 * being made to this node, because it's harmless and 6333 * simplifies the code. 6334 */ 6335 changed = add_changed(rbtdb, rbtversion, rbtnode); 6336 if (changed == NULL) { 6337 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 6338 return (ISC_R_NOMEMORY); 6339 } 6340 } 6341 6342 newheader_nx = NONEXISTENT(newheader) ? true : false; 6343 topheader_prev = NULL; 6344 sigheader = NULL; 6345 negtype = 0; 6346 if (rbtversion == NULL && !newheader_nx) { 6347 rdtype = RBTDB_RDATATYPE_BASE(newheader->type); 6348 covers = RBTDB_RDATATYPE_EXT(newheader->type); 6349 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, covers); 6350 if (NEGATIVE(newheader)) { 6351 /* 6352 * We're adding a negative cache entry. 6353 */ 6354 if (covers == dns_rdatatype_any) { 6355 /* 6356 * If we're adding an negative cache entry 6357 * which covers all types (NXDOMAIN, 6358 * NODATA(QTYPE=ANY)), 6359 * 6360 * We make all other data ancient so that the 6361 * only rdataset that can be found at this 6362 * node is the negative cache entry. 6363 */ 6364 for (topheader = rbtnode->data; 6365 topheader != NULL; 6366 topheader = topheader->next) 6367 { 6368 set_ttl(rbtdb, topheader, 0); 6369 mark_header_ancient(rbtdb, topheader); 6370 } 6371 goto find_header; 6372 } 6373 /* 6374 * Otherwise look for any RRSIGs of the given 6375 * type so they can be marked ancient later. 6376 */ 6377 for (topheader = rbtnode->data; topheader != NULL; 6378 topheader = topheader->next) 6379 { 6380 if (topheader->type == sigtype) { 6381 sigheader = topheader; 6382 break; 6383 } 6384 } 6385 negtype = RBTDB_RDATATYPE_VALUE(covers, 0); 6386 } else { 6387 /* 6388 * We're adding something that isn't a 6389 * negative cache entry. Look for an extant 6390 * non-ancient NXDOMAIN/NODATA(QTYPE=ANY) negative 6391 * cache entry. If we're adding an RRSIG, also 6392 * check for an extant non-ancient NODATA ncache 6393 * entry which covers the same type as the RRSIG. 6394 */ 6395 for (topheader = rbtnode->data; topheader != NULL; 6396 topheader = topheader->next) 6397 { 6398 if ((topheader->type == 6399 RBTDB_RDATATYPE_NCACHEANY) || 6400 (newheader->type == sigtype && 6401 topheader->type == 6402 RBTDB_RDATATYPE_VALUE(0, covers))) 6403 { 6404 break; 6405 } 6406 } 6407 if (topheader != NULL && EXISTS(topheader) && 6408 ACTIVE(topheader, now)) 6409 { 6410 /* 6411 * Found one. 6412 */ 6413 if (trust < topheader->trust) { 6414 /* 6415 * The NXDOMAIN/NODATA(QTYPE=ANY) 6416 * is more trusted. 6417 */ 6418 free_rdataset(rbtdb, rbtdb->common.mctx, 6419 newheader); 6420 if (addedrdataset != NULL) { 6421 bind_rdataset( 6422 rbtdb, rbtnode, 6423 topheader, now, 6424 isc_rwlocktype_write, 6425 addedrdataset); 6426 } 6427 return (DNS_R_UNCHANGED); 6428 } 6429 /* 6430 * The new rdataset is better. Expire the 6431 * ncache entry. 6432 */ 6433 set_ttl(rbtdb, topheader, 0); 6434 mark_header_ancient(rbtdb, topheader); 6435 topheader = NULL; 6436 goto find_header; 6437 } 6438 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype); 6439 } 6440 } 6441 6442 for (topheader = rbtnode->data; topheader != NULL; 6443 topheader = topheader->next) 6444 { 6445 if (IS_CACHE(rbtdb) && ACTIVE(topheader, now)) { 6446 ++ntypes; 6447 expireheader = topheader; 6448 } else if (!IS_CACHE(rbtdb)) { 6449 ++ntypes; 6450 } 6451 if (prio_header(topheader)) { 6452 prioheader = topheader; 6453 } 6454 if (topheader->type == newheader->type || 6455 topheader->type == negtype) 6456 { 6457 break; 6458 } 6459 topheader_prev = topheader; 6460 } 6461 6462 find_header: 6463 /* 6464 * If header isn't NULL, we've found the right type. There may be 6465 * IGNORE rdatasets between the top of the chain and the first real 6466 * data. We skip over them. 6467 */ 6468 header = topheader; 6469 while (header != NULL && IGNORE(header)) { 6470 header = header->down; 6471 } 6472 if (header != NULL) { 6473 header_nx = NONEXISTENT(header) ? true : false; 6474 6475 /* 6476 * Deleting an already non-existent rdataset has no effect. 6477 */ 6478 if (header_nx && newheader_nx) { 6479 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 6480 return (DNS_R_UNCHANGED); 6481 } 6482 6483 /* 6484 * Trying to add an rdataset with lower trust to a cache 6485 * DB has no effect, provided that the cache data isn't 6486 * stale. If the cache data is stale, new lower trust 6487 * data will supersede it below. Unclear what the best 6488 * policy is here. 6489 */ 6490 if (rbtversion == NULL && trust < header->trust && 6491 (ACTIVE(header, now) || header_nx)) 6492 { 6493 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 6494 if (addedrdataset != NULL) { 6495 bind_rdataset(rbtdb, rbtnode, header, now, 6496 isc_rwlocktype_write, 6497 addedrdataset); 6498 } 6499 return (DNS_R_UNCHANGED); 6500 } 6501 6502 /* 6503 * Don't merge if a nonexistent rdataset is involved. 6504 */ 6505 if (merge && (header_nx || newheader_nx)) { 6506 merge = false; 6507 } 6508 6509 /* 6510 * If 'merge' is true, we'll try to create a new rdataset 6511 * that is the union of 'newheader' and 'header'. 6512 */ 6513 if (merge) { 6514 unsigned int flags = 0; 6515 INSIST(rbtversion->serial >= header->serial); 6516 merged = NULL; 6517 result = ISC_R_SUCCESS; 6518 6519 if ((options & DNS_DBADD_EXACT) != 0) { 6520 flags |= DNS_RDATASLAB_EXACT; 6521 } 6522 /* 6523 * TTL use here is irrelevant to the cache; 6524 * merge is only done with zonedbs. 6525 */ 6526 if ((options & DNS_DBADD_EXACTTTL) != 0 && 6527 newheader->rdh_ttl != header->rdh_ttl) 6528 { 6529 result = DNS_R_NOTEXACT; 6530 } else if (newheader->rdh_ttl != header->rdh_ttl) { 6531 flags |= DNS_RDATASLAB_FORCE; 6532 } 6533 if (result == ISC_R_SUCCESS) { 6534 result = dns_rdataslab_merge( 6535 (unsigned char *)header, 6536 (unsigned char *)newheader, 6537 (unsigned int)(sizeof(*newheader)), 6538 rbtdb->common.mctx, 6539 rbtdb->common.rdclass, 6540 (dns_rdatatype_t)header->type, flags, 6541 rbtdb->maxrrperset, &merged); 6542 } 6543 if (result == ISC_R_SUCCESS) { 6544 /* 6545 * If 'header' has the same serial number as 6546 * we do, we could clean it up now if we knew 6547 * that our caller had no references to it. 6548 * We don't know this, however, so we leave it 6549 * alone. It will get cleaned up when 6550 * clean_zone_node() runs. 6551 */ 6552 free_rdataset(rbtdb, rbtdb->common.mctx, 6553 newheader); 6554 newheader = (rdatasetheader_t *)merged; 6555 init_rdataset(rbtdb, newheader); 6556 update_newheader(newheader, header); 6557 if (loading && RESIGN(newheader) && 6558 RESIGN(header) && 6559 resign_sooner(header, newheader)) 6560 { 6561 newheader->resign = header->resign; 6562 newheader->resign_lsb = 6563 header->resign_lsb; 6564 } 6565 } else { 6566 free_rdataset(rbtdb, rbtdb->common.mctx, 6567 newheader); 6568 return (result); 6569 } 6570 } 6571 /* 6572 * Don't replace existing NS, A and AAAA RRsets in the 6573 * cache if they are already exist. This prevents named 6574 * being locked to old servers. Don't lower trust of 6575 * existing record if the update is forced. Nothing 6576 * special to be done w.r.t stale data; it gets replaced 6577 * normally further down. 6578 */ 6579 if (IS_CACHE(rbtdb) && ACTIVE(header, now) && 6580 header->type == dns_rdatatype_ns && !header_nx && 6581 !newheader_nx && header->trust >= newheader->trust && 6582 dns_rdataslab_equalx((unsigned char *)header, 6583 (unsigned char *)newheader, 6584 (unsigned int)(sizeof(*newheader)), 6585 rbtdb->common.rdclass, 6586 (dns_rdatatype_t)header->type)) 6587 { 6588 /* 6589 * Honour the new ttl if it is less than the 6590 * older one. 6591 */ 6592 if (header->rdh_ttl > newheader->rdh_ttl) { 6593 set_ttl(rbtdb, header, newheader->rdh_ttl); 6594 } 6595 if (header->last_used != now) { 6596 update_header(rbtdb, header, now); 6597 } 6598 if (header->noqname == NULL && 6599 newheader->noqname != NULL) 6600 { 6601 header->noqname = newheader->noqname; 6602 newheader->noqname = NULL; 6603 } 6604 if (header->closest == NULL && 6605 newheader->closest != NULL) 6606 { 6607 header->closest = newheader->closest; 6608 newheader->closest = NULL; 6609 } 6610 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 6611 if (addedrdataset != NULL) { 6612 bind_rdataset(rbtdb, rbtnode, header, now, 6613 isc_rwlocktype_write, 6614 addedrdataset); 6615 } 6616 return (ISC_R_SUCCESS); 6617 } 6618 /* 6619 * If we have will be replacing a NS RRset force its TTL 6620 * to be no more than the current NS RRset's TTL. This 6621 * ensures the delegations that are withdrawn are honoured. 6622 */ 6623 if (IS_CACHE(rbtdb) && ACTIVE(header, now) && 6624 header->type == dns_rdatatype_ns && !header_nx && 6625 !newheader_nx && header->trust <= newheader->trust) 6626 { 6627 if (newheader->rdh_ttl > header->rdh_ttl) { 6628 newheader->rdh_ttl = header->rdh_ttl; 6629 } 6630 } 6631 if (IS_CACHE(rbtdb) && ACTIVE(header, now) && 6632 (options & DNS_DBADD_PREFETCH) == 0 && 6633 (header->type == dns_rdatatype_a || 6634 header->type == dns_rdatatype_aaaa || 6635 header->type == dns_rdatatype_ds || 6636 header->type == RBTDB_RDATATYPE_SIGDS) && 6637 !header_nx && !newheader_nx && 6638 header->trust >= newheader->trust && 6639 dns_rdataslab_equal((unsigned char *)header, 6640 (unsigned char *)newheader, 6641 (unsigned int)(sizeof(*newheader)))) 6642 { 6643 /* 6644 * Honour the new ttl if it is less than the 6645 * older one. 6646 */ 6647 if (header->rdh_ttl > newheader->rdh_ttl) { 6648 set_ttl(rbtdb, header, newheader->rdh_ttl); 6649 } 6650 if (header->last_used != now) { 6651 update_header(rbtdb, header, now); 6652 } 6653 if (header->noqname == NULL && 6654 newheader->noqname != NULL) 6655 { 6656 header->noqname = newheader->noqname; 6657 newheader->noqname = NULL; 6658 } 6659 if (header->closest == NULL && 6660 newheader->closest != NULL) 6661 { 6662 header->closest = newheader->closest; 6663 newheader->closest = NULL; 6664 } 6665 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 6666 if (addedrdataset != NULL) { 6667 bind_rdataset(rbtdb, rbtnode, header, now, 6668 isc_rwlocktype_write, 6669 addedrdataset); 6670 } 6671 return (ISC_R_SUCCESS); 6672 } 6673 INSIST(rbtversion == NULL || 6674 rbtversion->serial >= topheader->serial); 6675 if (loading) { 6676 newheader->down = NULL; 6677 idx = newheader->node->locknum; 6678 if (IS_CACHE(rbtdb)) { 6679 if (ZEROTTL(newheader)) { 6680 newheader->last_used = 6681 atomic_load(&rbtdb->last_used) + 6682 1; 6683 ISC_LIST_APPEND(rbtdb->rdatasets[idx], 6684 newheader, link); 6685 } else { 6686 ISC_LIST_PREPEND(rbtdb->rdatasets[idx], 6687 newheader, link); 6688 } 6689 INSIST(rbtdb->heaps != NULL); 6690 isc_heap_insert(rbtdb->heaps[idx], newheader); 6691 } else if (RESIGN(newheader)) { 6692 resign_insert(rbtdb, idx, newheader); 6693 /* 6694 * Don't call resign_delete as we don't need 6695 * to reverse the delete. The free_rdataset 6696 * call below will clean up the heap entry. 6697 */ 6698 } 6699 6700 /* 6701 * There are no other references to 'header' when 6702 * loading, so we MAY clean up 'header' now. 6703 * Since we don't generate changed records when 6704 * loading, we MUST clean up 'header' now. 6705 */ 6706 if (topheader_prev != NULL) { 6707 topheader_prev->next = newheader; 6708 } else { 6709 rbtnode->data = newheader; 6710 } 6711 newheader->next = topheader->next; 6712 if (rbtversion != NULL && !header_nx) { 6713 update_recordsandxfrsize(false, rbtversion, 6714 header, 6715 nodename->length); 6716 } 6717 free_rdataset(rbtdb, rbtdb->common.mctx, header); 6718 } else { 6719 idx = newheader->node->locknum; 6720 if (IS_CACHE(rbtdb)) { 6721 INSIST(rbtdb->heaps != NULL); 6722 isc_heap_insert(rbtdb->heaps[idx], newheader); 6723 if (ZEROTTL(newheader)) { 6724 newheader->last_used = 6725 atomic_load(&rbtdb->last_used) + 6726 1; 6727 ISC_LIST_APPEND(rbtdb->rdatasets[idx], 6728 newheader, link); 6729 } else { 6730 ISC_LIST_PREPEND(rbtdb->rdatasets[idx], 6731 newheader, link); 6732 } 6733 } else if (RESIGN(newheader)) { 6734 resign_insert(rbtdb, idx, newheader); 6735 resign_delete(rbtdb, rbtversion, header); 6736 } 6737 if (topheader_prev != NULL) { 6738 topheader_prev->next = newheader; 6739 } else { 6740 rbtnode->data = newheader; 6741 } 6742 newheader->next = topheader->next; 6743 newheader->down = topheader; 6744 topheader->next = newheader; 6745 rbtnode->dirty = 1; 6746 if (changed != NULL) { 6747 changed->dirty = true; 6748 } 6749 if (rbtversion == NULL) { 6750 set_ttl(rbtdb, header, 0); 6751 mark_header_ancient(rbtdb, header); 6752 if (sigheader != NULL) { 6753 set_ttl(rbtdb, sigheader, 0); 6754 mark_header_ancient(rbtdb, sigheader); 6755 } 6756 } 6757 if (rbtversion != NULL && !header_nx) { 6758 update_recordsandxfrsize(false, rbtversion, 6759 header, 6760 nodename->length); 6761 } 6762 } 6763 } else { 6764 /* 6765 * No non-IGNORED rdatasets of the given type exist at 6766 * this node. 6767 */ 6768 6769 /* 6770 * If we're trying to delete the type, don't bother. 6771 */ 6772 if (newheader_nx) { 6773 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 6774 return (DNS_R_UNCHANGED); 6775 } 6776 6777 idx = newheader->node->locknum; 6778 if (IS_CACHE(rbtdb)) { 6779 isc_heap_insert(rbtdb->heaps[idx], newheader); 6780 if (ZEROTTL(newheader)) { 6781 ISC_LIST_APPEND(rbtdb->rdatasets[idx], 6782 newheader, link); 6783 } else { 6784 ISC_LIST_PREPEND(rbtdb->rdatasets[idx], 6785 newheader, link); 6786 } 6787 } else if (RESIGN(newheader)) { 6788 resign_insert(rbtdb, idx, newheader); 6789 resign_delete(rbtdb, rbtversion, header); 6790 } 6791 6792 if (topheader != NULL) { 6793 /* 6794 * We have an list of rdatasets of the given type, 6795 * but they're all marked IGNORE. We simply insert 6796 * the new rdataset at the head of the list. 6797 * 6798 * Ignored rdatasets cannot occur during loading, so 6799 * we INSIST on it. 6800 */ 6801 INSIST(!loading); 6802 INSIST(rbtversion == NULL || 6803 rbtversion->serial >= topheader->serial); 6804 if (topheader_prev != NULL) { 6805 topheader_prev->next = newheader; 6806 } else { 6807 rbtnode->data = newheader; 6808 } 6809 newheader->next = topheader->next; 6810 newheader->down = topheader; 6811 topheader->next = newheader; 6812 rbtnode->dirty = 1; 6813 if (changed != NULL) { 6814 changed->dirty = true; 6815 } 6816 } else { 6817 /* 6818 * No rdatasets of the given type exist at the node. 6819 */ 6820 if (!IS_CACHE(rbtdb) && overmaxtype(rbtdb, ntypes)) { 6821 free_rdataset(rbtdb, rbtdb->common.mctx, 6822 newheader); 6823 return (DNS_R_TOOMANYRECORDS); 6824 } 6825 6826 newheader->down = NULL; 6827 6828 if (prio_header(newheader)) { 6829 /* This is a priority type, prepend it */ 6830 newheader->next = rbtnode->data; 6831 rbtnode->data = newheader; 6832 } else if (prioheader != NULL) { 6833 /* Append after the priority headers */ 6834 newheader->next = prioheader->next; 6835 prioheader->next = newheader; 6836 } else { 6837 /* There were no priority headers */ 6838 newheader->next = rbtnode->data; 6839 rbtnode->data = newheader; 6840 } 6841 6842 if (IS_CACHE(rbtdb) && overmaxtype(rbtdb, ntypes)) { 6843 if (expireheader == NULL) { 6844 expireheader = newheader; 6845 } 6846 if (NEGATIVE(newheader) && 6847 !prio_header(newheader)) 6848 { 6849 /* 6850 * Add the new non-priority negative 6851 * header to the database only 6852 * temporarily. 6853 */ 6854 expireheader = newheader; 6855 } 6856 6857 set_ttl(rbtdb, expireheader, 0); 6858 mark_header_ancient(rbtdb, expireheader); 6859 /* 6860 * FIXME: In theory, we should mark the RRSIG 6861 * and the header at the same time, but there is 6862 * no direct link between those two header, so 6863 * we would have to check the whole list again. 6864 */ 6865 } 6866 } 6867 } 6868 6869 if (rbtversion != NULL && !newheader_nx) { 6870 update_recordsandxfrsize(true, rbtversion, newheader, 6871 nodename->length); 6872 } 6873 6874 /* 6875 * Check if the node now contains CNAME and other data. 6876 */ 6877 if (rbtversion != NULL && 6878 cname_and_other_data(rbtnode, rbtversion->serial)) 6879 { 6880 return (DNS_R_CNAMEANDOTHER); 6881 } 6882 6883 if (addedrdataset != NULL) { 6884 bind_rdataset(rbtdb, rbtnode, newheader, now, 6885 isc_rwlocktype_write, addedrdataset); 6886 } 6887 6888 return (ISC_R_SUCCESS); 6889 } 6890 6891 static bool 6892 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 6893 rbtdb_rdatatype_t type) { 6894 if (IS_CACHE(rbtdb)) { 6895 if (type == dns_rdatatype_dname) { 6896 return (true); 6897 } else { 6898 return (false); 6899 } 6900 } else if (type == dns_rdatatype_dname || 6901 (type == dns_rdatatype_ns && 6902 (node != rbtdb->origin_node || IS_STUB(rbtdb)))) 6903 { 6904 return (true); 6905 } 6906 return (false); 6907 } 6908 6909 static isc_result_t 6910 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader, 6911 uint32_t maxrrperset, dns_rdataset_t *rdataset) { 6912 struct noqname *noqname; 6913 isc_mem_t *mctx = rbtdb->common.mctx; 6914 dns_name_t name; 6915 dns_rdataset_t neg, negsig; 6916 isc_result_t result; 6917 isc_region_t r; 6918 6919 dns_name_init(&name, NULL); 6920 dns_rdataset_init(&neg); 6921 dns_rdataset_init(&negsig); 6922 6923 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig); 6924 RUNTIME_CHECK(result == ISC_R_SUCCESS); 6925 6926 noqname = isc_mem_get(mctx, sizeof(*noqname)); 6927 dns_name_init(&noqname->name, NULL); 6928 noqname->neg = NULL; 6929 noqname->negsig = NULL; 6930 noqname->type = neg.type; 6931 dns_name_dup(&name, mctx, &noqname->name); 6932 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0, maxrrperset); 6933 if (result != ISC_R_SUCCESS) { 6934 goto cleanup; 6935 } 6936 noqname->neg = r.base; 6937 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0, maxrrperset); 6938 if (result != ISC_R_SUCCESS) { 6939 goto cleanup; 6940 } 6941 noqname->negsig = r.base; 6942 dns_rdataset_disassociate(&neg); 6943 dns_rdataset_disassociate(&negsig); 6944 newheader->noqname = noqname; 6945 return (ISC_R_SUCCESS); 6946 6947 cleanup: 6948 dns_rdataset_disassociate(&neg); 6949 dns_rdataset_disassociate(&negsig); 6950 free_noqname(mctx, &noqname); 6951 return (result); 6952 } 6953 6954 static isc_result_t 6955 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader, 6956 uint32_t maxrrperset, dns_rdataset_t *rdataset) { 6957 struct noqname *closest; 6958 isc_mem_t *mctx = rbtdb->common.mctx; 6959 dns_name_t name; 6960 dns_rdataset_t neg, negsig; 6961 isc_result_t result; 6962 isc_region_t r; 6963 6964 dns_name_init(&name, NULL); 6965 dns_rdataset_init(&neg); 6966 dns_rdataset_init(&negsig); 6967 6968 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig); 6969 RUNTIME_CHECK(result == ISC_R_SUCCESS); 6970 6971 closest = isc_mem_get(mctx, sizeof(*closest)); 6972 dns_name_init(&closest->name, NULL); 6973 closest->neg = NULL; 6974 closest->negsig = NULL; 6975 closest->type = neg.type; 6976 dns_name_dup(&name, mctx, &closest->name); 6977 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0, maxrrperset); 6978 if (result != ISC_R_SUCCESS) { 6979 goto cleanup; 6980 } 6981 closest->neg = r.base; 6982 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0, maxrrperset); 6983 if (result != ISC_R_SUCCESS) { 6984 goto cleanup; 6985 } 6986 closest->negsig = r.base; 6987 dns_rdataset_disassociate(&neg); 6988 dns_rdataset_disassociate(&negsig); 6989 newheader->closest = closest; 6990 return (ISC_R_SUCCESS); 6991 6992 cleanup: 6993 dns_rdataset_disassociate(&neg); 6994 dns_rdataset_disassociate(&negsig); 6995 free_noqname(mctx, &closest); 6996 return (result); 6997 } 6998 6999 static dns_dbmethods_t zone_methods; 7000 7001 static size_t 7002 rdataset_size(rdatasetheader_t *header) { 7003 if (!NONEXISTENT(header)) { 7004 return (dns_rdataslab_size((unsigned char *)header, 7005 sizeof(*header))); 7006 } 7007 7008 return (sizeof(*header)); 7009 } 7010 7011 static void 7012 expire_ttl_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, bool tree_locked, 7013 isc_stdtime_t now); 7014 7015 static isc_result_t 7016 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 7017 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options, 7018 dns_rdataset_t *addedrdataset) { 7019 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 7020 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 7021 rbtdb_version_t *rbtversion = version; 7022 isc_region_t region; 7023 rdatasetheader_t *newheader; 7024 isc_result_t result; 7025 bool delegating; 7026 bool newnsec; 7027 bool tree_locked = false; 7028 bool cache_is_overmem = false; 7029 dns_fixedname_t fixed; 7030 dns_name_t *name; 7031 7032 REQUIRE(VALID_RBTDB(rbtdb)); 7033 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 7034 7035 if (rbtdb->common.methods == &zone_methods) { 7036 /* 7037 * SOA records are only allowed at top of zone. 7038 */ 7039 if (rdataset->type == dns_rdatatype_soa && 7040 node != rbtdb->origin_node) 7041 { 7042 return (DNS_R_NOTZONETOP); 7043 } 7044 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 7045 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 && 7046 (rdataset->type == dns_rdatatype_nsec3 || 7047 rdataset->covers == dns_rdatatype_nsec3)) || 7048 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 && 7049 rdataset->type != dns_rdatatype_nsec3 && 7050 rdataset->covers != dns_rdatatype_nsec3))); 7051 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 7052 } 7053 7054 if (rbtversion == NULL) { 7055 if (now == 0) { 7056 isc_stdtime_get(&now); 7057 } 7058 } else { 7059 now = 0; 7060 } 7061 7062 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, 7063 ®ion, sizeof(rdatasetheader_t), 7064 rbtdb->maxrrperset); 7065 if (result != ISC_R_SUCCESS) { 7066 return (result); 7067 } 7068 7069 name = dns_fixedname_initname(&fixed); 7070 nodefullname(db, node, name); 7071 dns_rdataset_getownercase(rdataset, name); 7072 7073 newheader = (rdatasetheader_t *)region.base; 7074 init_rdataset(rbtdb, newheader); 7075 setownercase(newheader, name); 7076 set_ttl(rbtdb, newheader, rdataset->ttl + now); 7077 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type, 7078 rdataset->covers); 7079 atomic_init(&newheader->attributes, 0); 7080 if (rdataset->ttl == 0U) { 7081 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_ZEROTTL); 7082 } 7083 newheader->noqname = NULL; 7084 newheader->closest = NULL; 7085 atomic_init(&newheader->count, 7086 atomic_fetch_add_relaxed(&init_count, 1)); 7087 newheader->trust = rdataset->trust; 7088 newheader->last_used = now; 7089 newheader->node = rbtnode; 7090 if (rbtversion != NULL) { 7091 newheader->serial = rbtversion->serial; 7092 now = 0; 7093 7094 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) { 7095 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN); 7096 newheader->resign = 7097 (isc_stdtime_t)(dns_time64_from32( 7098 rdataset->resign) >> 7099 1); 7100 newheader->resign_lsb = rdataset->resign & 0x1; 7101 } else { 7102 newheader->resign = 0; 7103 newheader->resign_lsb = 0; 7104 } 7105 } else { 7106 newheader->serial = 1; 7107 newheader->resign = 0; 7108 newheader->resign_lsb = 0; 7109 if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0) { 7110 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_PREFETCH); 7111 } 7112 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0) { 7113 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_NEGATIVE); 7114 } 7115 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0) { 7116 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_NXDOMAIN); 7117 } 7118 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0) { 7119 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_OPTOUT); 7120 } 7121 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) { 7122 result = addnoqname(rbtdb, newheader, 7123 rbtdb->maxrrperset, rdataset); 7124 if (result != ISC_R_SUCCESS) { 7125 free_rdataset(rbtdb, rbtdb->common.mctx, 7126 newheader); 7127 return (result); 7128 } 7129 } 7130 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) { 7131 result = addclosest(rbtdb, newheader, 7132 rbtdb->maxrrperset, rdataset); 7133 if (result != ISC_R_SUCCESS) { 7134 free_rdataset(rbtdb, rbtdb->common.mctx, 7135 newheader); 7136 return (result); 7137 } 7138 } 7139 } 7140 7141 /* 7142 * If we're adding a delegation type (e.g. NS or DNAME for a zone, 7143 * just DNAME for the cache), then we need to set the callback bit 7144 * on the node. 7145 */ 7146 if (delegating_type(rbtdb, rbtnode, rdataset->type)) { 7147 delegating = true; 7148 } else { 7149 delegating = false; 7150 } 7151 7152 /* 7153 * Add to the auxiliary NSEC tree if we're adding an NSEC record. 7154 */ 7155 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 7156 if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC && 7157 rdataset->type == dns_rdatatype_nsec) 7158 { 7159 newnsec = true; 7160 } else { 7161 newnsec = false; 7162 } 7163 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 7164 7165 /* 7166 * If we're adding a delegation type, adding to the auxiliary NSEC 7167 * tree, or the DB is a cache in an overmem state, hold an 7168 * exclusive lock on the tree. In the latter case the lock does 7169 * not necessarily have to be acquired but it will help purge 7170 * ancient entries more effectively. 7171 */ 7172 if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx)) { 7173 cache_is_overmem = true; 7174 } 7175 if (delegating || newnsec || cache_is_overmem) { 7176 tree_locked = true; 7177 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 7178 } 7179 7180 if (cache_is_overmem) { 7181 overmem_purge(rbtdb, newheader, tree_locked); 7182 } 7183 7184 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7185 isc_rwlocktype_write); 7186 7187 if (rbtdb->rrsetstats != NULL) { 7188 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_STATCOUNT); 7189 update_rrsetstats(rbtdb, newheader->type, 7190 atomic_load_acquire(&newheader->attributes), 7191 true); 7192 } 7193 7194 if (IS_CACHE(rbtdb)) { 7195 if (tree_locked) { 7196 cleanup_dead_nodes(rbtdb, rbtnode->locknum); 7197 } 7198 7199 expire_ttl_headers(rbtdb, rbtnode->locknum, tree_locked, now); 7200 7201 /* 7202 * If we've been holding a write lock on the tree just for 7203 * cleaning, we can release it now. However, we still need the 7204 * node lock. 7205 */ 7206 if (tree_locked && !delegating && !newnsec) { 7207 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 7208 tree_locked = false; 7209 } 7210 } 7211 7212 result = ISC_R_SUCCESS; 7213 if (newnsec) { 7214 dns_rbtnode_t *nsecnode; 7215 7216 nsecnode = NULL; 7217 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode); 7218 if (result == ISC_R_SUCCESS) { 7219 nsecnode->nsec = DNS_RBT_NSEC_NSEC; 7220 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC; 7221 } else if (result == ISC_R_EXISTS) { 7222 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC; 7223 result = ISC_R_SUCCESS; 7224 } 7225 } 7226 7227 if (result == ISC_R_SUCCESS) { 7228 result = add32(rbtdb, rbtnode, name, rbtversion, newheader, 7229 options, false, addedrdataset, now); 7230 } 7231 if (result == ISC_R_SUCCESS && delegating) { 7232 rbtnode->find_callback = 1; 7233 } 7234 7235 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7236 isc_rwlocktype_write); 7237 7238 if (tree_locked) { 7239 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 7240 } 7241 7242 return (result); 7243 } 7244 7245 static isc_result_t 7246 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 7247 dns_rdataset_t *rdataset, unsigned int options, 7248 dns_rdataset_t *newrdataset) { 7249 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 7250 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 7251 rbtdb_version_t *rbtversion = version; 7252 dns_fixedname_t fname; 7253 dns_name_t *nodename = dns_fixedname_initname(&fname); 7254 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader; 7255 unsigned char *subresult; 7256 isc_region_t region; 7257 isc_result_t result; 7258 rbtdb_changed_t *changed; 7259 7260 REQUIRE(VALID_RBTDB(rbtdb)); 7261 REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb); 7262 7263 if (rbtdb->common.methods == &zone_methods) { 7264 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 7265 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 && 7266 (rdataset->type == dns_rdatatype_nsec3 || 7267 rdataset->covers == dns_rdatatype_nsec3)) || 7268 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 && 7269 rdataset->type != dns_rdatatype_nsec3 && 7270 rdataset->covers != dns_rdatatype_nsec3))); 7271 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 7272 } 7273 7274 nodefullname(db, node, nodename); 7275 7276 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, 7277 ®ion, sizeof(rdatasetheader_t), 7278 0); 7279 if (result != ISC_R_SUCCESS) { 7280 return (result); 7281 } 7282 newheader = (rdatasetheader_t *)region.base; 7283 init_rdataset(rbtdb, newheader); 7284 set_ttl(rbtdb, newheader, rdataset->ttl); 7285 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type, 7286 rdataset->covers); 7287 atomic_init(&newheader->attributes, 0); 7288 newheader->serial = rbtversion->serial; 7289 newheader->trust = 0; 7290 newheader->noqname = NULL; 7291 newheader->closest = NULL; 7292 atomic_init(&newheader->count, 7293 atomic_fetch_add_relaxed(&init_count, 1)); 7294 newheader->last_used = 0; 7295 newheader->node = rbtnode; 7296 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) { 7297 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN); 7298 newheader->resign = 7299 (isc_stdtime_t)(dns_time64_from32(rdataset->resign) >> 7300 1); 7301 newheader->resign_lsb = rdataset->resign & 0x1; 7302 } else { 7303 newheader->resign = 0; 7304 newheader->resign_lsb = 0; 7305 } 7306 7307 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7308 isc_rwlocktype_write); 7309 7310 changed = add_changed(rbtdb, rbtversion, rbtnode); 7311 if (changed == NULL) { 7312 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 7313 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7314 isc_rwlocktype_write); 7315 return (ISC_R_NOMEMORY); 7316 } 7317 7318 topheader_prev = NULL; 7319 for (topheader = rbtnode->data; topheader != NULL; 7320 topheader = topheader->next) 7321 { 7322 if (topheader->type == newheader->type) { 7323 break; 7324 } 7325 topheader_prev = topheader; 7326 } 7327 /* 7328 * If header isn't NULL, we've found the right type. There may be 7329 * IGNORE rdatasets between the top of the chain and the first real 7330 * data. We skip over them. 7331 */ 7332 header = topheader; 7333 while (header != NULL && IGNORE(header)) { 7334 header = header->down; 7335 } 7336 if (header != NULL && EXISTS(header)) { 7337 unsigned int flags = 0; 7338 subresult = NULL; 7339 result = ISC_R_SUCCESS; 7340 if ((options & DNS_DBSUB_EXACT) != 0) { 7341 flags |= DNS_RDATASLAB_EXACT; 7342 if (newheader->rdh_ttl != header->rdh_ttl) { 7343 result = DNS_R_NOTEXACT; 7344 } 7345 } 7346 if (result == ISC_R_SUCCESS) { 7347 result = dns_rdataslab_subtract( 7348 (unsigned char *)header, 7349 (unsigned char *)newheader, 7350 (unsigned int)(sizeof(*newheader)), 7351 rbtdb->common.mctx, rbtdb->common.rdclass, 7352 (dns_rdatatype_t)header->type, flags, 7353 &subresult); 7354 } 7355 if (result == ISC_R_SUCCESS) { 7356 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 7357 newheader = (rdatasetheader_t *)subresult; 7358 init_rdataset(rbtdb, newheader); 7359 update_newheader(newheader, header); 7360 if (RESIGN(header)) { 7361 RDATASET_ATTR_SET(newheader, 7362 RDATASET_ATTR_RESIGN); 7363 newheader->resign = header->resign; 7364 newheader->resign_lsb = header->resign_lsb; 7365 resign_insert(rbtdb, rbtnode->locknum, 7366 newheader); 7367 } 7368 /* 7369 * We have to set the serial since the rdataslab 7370 * subtraction routine copies the reserved portion of 7371 * header, not newheader. 7372 */ 7373 newheader->serial = rbtversion->serial; 7374 /* 7375 * XXXJT: dns_rdataslab_subtract() copied the pointers 7376 * to additional info. We need to clear these fields 7377 * to avoid having duplicated references. 7378 */ 7379 update_recordsandxfrsize(true, rbtversion, newheader, 7380 nodename->length); 7381 } else if (result == DNS_R_NXRRSET) { 7382 /* 7383 * This subtraction would remove all of the rdata; 7384 * add a nonexistent header instead. 7385 */ 7386 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 7387 newheader = new_rdataset(rbtdb, rbtdb->common.mctx); 7388 if (newheader == NULL) { 7389 result = ISC_R_NOMEMORY; 7390 goto unlock; 7391 } 7392 init_rdataset(rbtdb, newheader); 7393 set_ttl(rbtdb, newheader, 0); 7394 newheader->type = topheader->type; 7395 atomic_init(&newheader->attributes, 7396 RDATASET_ATTR_NONEXISTENT); 7397 newheader->trust = 0; 7398 newheader->serial = rbtversion->serial; 7399 newheader->noqname = NULL; 7400 newheader->closest = NULL; 7401 atomic_init(&newheader->count, 0); 7402 newheader->node = rbtnode; 7403 newheader->resign = 0; 7404 newheader->resign_lsb = 0; 7405 newheader->last_used = 0; 7406 } else { 7407 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 7408 goto unlock; 7409 } 7410 7411 /* 7412 * If we're here, we want to link newheader in front of 7413 * topheader. 7414 */ 7415 INSIST(rbtversion->serial >= topheader->serial); 7416 update_recordsandxfrsize(false, rbtversion, header, 7417 nodename->length); 7418 if (topheader_prev != NULL) { 7419 topheader_prev->next = newheader; 7420 } else { 7421 rbtnode->data = newheader; 7422 } 7423 newheader->next = topheader->next; 7424 newheader->down = topheader; 7425 topheader->next = newheader; 7426 rbtnode->dirty = 1; 7427 changed->dirty = true; 7428 resign_delete(rbtdb, rbtversion, header); 7429 } else { 7430 /* 7431 * The rdataset doesn't exist, so we don't need to do anything 7432 * to satisfy the deletion request. 7433 */ 7434 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 7435 if ((options & DNS_DBSUB_EXACT) != 0) { 7436 result = DNS_R_NOTEXACT; 7437 } else { 7438 result = DNS_R_UNCHANGED; 7439 } 7440 } 7441 7442 if (result == ISC_R_SUCCESS && newrdataset != NULL) { 7443 bind_rdataset(rbtdb, rbtnode, newheader, 0, 7444 isc_rwlocktype_write, newrdataset); 7445 } 7446 7447 if (result == DNS_R_NXRRSET && newrdataset != NULL && 7448 (options & DNS_DBSUB_WANTOLD) != 0) 7449 { 7450 bind_rdataset(rbtdb, rbtnode, header, 0, isc_rwlocktype_write, 7451 newrdataset); 7452 } 7453 7454 unlock: 7455 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7456 isc_rwlocktype_write); 7457 7458 return (result); 7459 } 7460 7461 static isc_result_t 7462 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 7463 dns_rdatatype_t type, dns_rdatatype_t covers) { 7464 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 7465 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 7466 rbtdb_version_t *rbtversion = version; 7467 dns_fixedname_t fname; 7468 dns_name_t *nodename = dns_fixedname_initname(&fname); 7469 isc_result_t result; 7470 rdatasetheader_t *newheader; 7471 7472 REQUIRE(VALID_RBTDB(rbtdb)); 7473 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 7474 7475 if (type == dns_rdatatype_any) { 7476 return (ISC_R_NOTIMPLEMENTED); 7477 } 7478 if (type == dns_rdatatype_rrsig && covers == 0) { 7479 return (ISC_R_NOTIMPLEMENTED); 7480 } 7481 7482 newheader = new_rdataset(rbtdb, rbtdb->common.mctx); 7483 if (newheader == NULL) { 7484 return (ISC_R_NOMEMORY); 7485 } 7486 init_rdataset(rbtdb, newheader); 7487 set_ttl(rbtdb, newheader, 0); 7488 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers); 7489 atomic_init(&newheader->attributes, RDATASET_ATTR_NONEXISTENT); 7490 newheader->trust = 0; 7491 newheader->noqname = NULL; 7492 newheader->closest = NULL; 7493 if (rbtversion != NULL) { 7494 newheader->serial = rbtversion->serial; 7495 } else { 7496 newheader->serial = 0; 7497 } 7498 atomic_init(&newheader->count, 0); 7499 newheader->last_used = 0; 7500 newheader->node = rbtnode; 7501 7502 nodefullname(db, node, nodename); 7503 7504 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7505 isc_rwlocktype_write); 7506 result = add32(rbtdb, rbtnode, nodename, rbtversion, newheader, 7507 DNS_DBADD_FORCE, false, NULL, 0); 7508 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7509 isc_rwlocktype_write); 7510 7511 return (result); 7512 } 7513 7514 /* 7515 * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC 7516 */ 7517 static isc_result_t 7518 loadnode(dns_rbtdb_t *rbtdb, const dns_name_t *name, dns_rbtnode_t **nodep, 7519 bool hasnsec) { 7520 isc_result_t noderesult, nsecresult, tmpresult; 7521 dns_rbtnode_t *nsecnode = NULL, *node = NULL; 7522 7523 noderesult = dns_rbt_addnode(rbtdb->tree, name, &node); 7524 if (!hasnsec) { 7525 goto done; 7526 } 7527 if (noderesult == ISC_R_EXISTS) { 7528 /* 7529 * Add a node to the auxiliary NSEC tree for an old node 7530 * just now getting an NSEC record. 7531 */ 7532 if (node->nsec == DNS_RBT_NSEC_HAS_NSEC) { 7533 goto done; 7534 } 7535 } else if (noderesult != ISC_R_SUCCESS) { 7536 goto done; 7537 } 7538 7539 /* 7540 * Build the auxiliary tree for NSECs as we go. 7541 * This tree speeds searches for closest NSECs that would otherwise 7542 * need to examine many irrelevant nodes in large TLDs. 7543 * 7544 * Add nodes to the auxiliary tree after corresponding nodes have 7545 * been added to the main tree. 7546 */ 7547 nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode); 7548 if (nsecresult == ISC_R_SUCCESS) { 7549 nsecnode->nsec = DNS_RBT_NSEC_NSEC; 7550 node->nsec = DNS_RBT_NSEC_HAS_NSEC; 7551 goto done; 7552 } 7553 7554 if (nsecresult == ISC_R_EXISTS) { 7555 #if 1 /* 0 */ 7556 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 7557 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 7558 "addnode: NSEC node already exists"); 7559 #endif /* if 1 */ 7560 node->nsec = DNS_RBT_NSEC_HAS_NSEC; 7561 goto done; 7562 } 7563 7564 if (noderesult == ISC_R_SUCCESS) { 7565 /* 7566 * Remove the node we just added above. 7567 */ 7568 tmpresult = dns_rbt_deletenode(rbtdb->tree, node, false); 7569 if (tmpresult != ISC_R_SUCCESS) { 7570 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 7571 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 7572 "loading_addrdataset: " 7573 "dns_rbt_deletenode: %s after " 7574 "dns_rbt_addnode(NSEC): %s", 7575 isc_result_totext(tmpresult), 7576 isc_result_totext(noderesult)); 7577 } 7578 } 7579 7580 /* 7581 * Set the error condition to be returned. 7582 */ 7583 noderesult = nsecresult; 7584 7585 done: 7586 if (noderesult == ISC_R_SUCCESS || noderesult == ISC_R_EXISTS) { 7587 *nodep = node; 7588 } 7589 7590 return (noderesult); 7591 } 7592 7593 static isc_result_t 7594 loading_addrdataset(void *arg, const dns_name_t *name, 7595 dns_rdataset_t *rdataset) { 7596 rbtdb_load_t *loadctx = arg; 7597 dns_rbtdb_t *rbtdb = loadctx->rbtdb; 7598 dns_rbtnode_t *node; 7599 isc_result_t result; 7600 isc_region_t region; 7601 rdatasetheader_t *newheader; 7602 7603 REQUIRE(rdataset->rdclass == rbtdb->common.rdclass); 7604 7605 /* 7606 * SOA records are only allowed at top of zone. 7607 */ 7608 if (rdataset->type == dns_rdatatype_soa && !IS_CACHE(rbtdb) && 7609 !dns_name_equal(name, &rbtdb->common.origin)) 7610 { 7611 return (DNS_R_NOTZONETOP); 7612 } 7613 7614 if (rdataset->type != dns_rdatatype_nsec3 && 7615 rdataset->covers != dns_rdatatype_nsec3) 7616 { 7617 add_empty_wildcards(rbtdb, name, false); 7618 } 7619 7620 if (dns_name_iswildcard(name)) { 7621 /* 7622 * NS record owners cannot legally be wild cards. 7623 */ 7624 if (rdataset->type == dns_rdatatype_ns) { 7625 return (DNS_R_INVALIDNS); 7626 } 7627 /* 7628 * NSEC3 record owners cannot legally be wild cards. 7629 */ 7630 if (rdataset->type == dns_rdatatype_nsec3) { 7631 return (DNS_R_INVALIDNSEC3); 7632 } 7633 result = add_wildcard_magic(rbtdb, name, false); 7634 if (result != ISC_R_SUCCESS) { 7635 return (result); 7636 } 7637 } 7638 7639 node = NULL; 7640 if (rdataset->type == dns_rdatatype_nsec3 || 7641 rdataset->covers == dns_rdatatype_nsec3) 7642 { 7643 result = dns_rbt_addnode(rbtdb->nsec3, name, &node); 7644 if (result == ISC_R_SUCCESS) { 7645 node->nsec = DNS_RBT_NSEC_NSEC3; 7646 } 7647 } else if (rdataset->type == dns_rdatatype_nsec) { 7648 result = loadnode(rbtdb, name, &node, true); 7649 } else { 7650 result = loadnode(rbtdb, name, &node, false); 7651 } 7652 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) { 7653 return (result); 7654 } 7655 if (result == ISC_R_SUCCESS) { 7656 node->locknum = node->hashval % rbtdb->node_lock_count; 7657 } 7658 7659 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, 7660 ®ion, sizeof(rdatasetheader_t), 7661 rbtdb->maxrrperset); 7662 if (result != ISC_R_SUCCESS) { 7663 return (result); 7664 } 7665 newheader = (rdatasetheader_t *)region.base; 7666 init_rdataset(rbtdb, newheader); 7667 set_ttl(rbtdb, newheader, rdataset->ttl + loadctx->now); /* XXX overflow 7668 * check */ 7669 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type, 7670 rdataset->covers); 7671 atomic_init(&newheader->attributes, 0); 7672 newheader->trust = rdataset->trust; 7673 newheader->serial = 1; 7674 newheader->noqname = NULL; 7675 newheader->closest = NULL; 7676 atomic_init(&newheader->count, 7677 atomic_fetch_add_relaxed(&init_count, 1)); 7678 newheader->last_used = 0; 7679 newheader->node = node; 7680 setownercase(newheader, name); 7681 7682 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) { 7683 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN); 7684 newheader->resign = 7685 (isc_stdtime_t)(dns_time64_from32(rdataset->resign) >> 7686 1); 7687 newheader->resign_lsb = rdataset->resign & 0x1; 7688 } else { 7689 newheader->resign = 0; 7690 newheader->resign_lsb = 0; 7691 } 7692 7693 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock, isc_rwlocktype_write); 7694 result = add32(rbtdb, node, name, rbtdb->current_version, newheader, 7695 DNS_DBADD_MERGE, true, NULL, 0); 7696 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock, 7697 isc_rwlocktype_write); 7698 7699 if (result == ISC_R_SUCCESS && 7700 delegating_type(rbtdb, node, rdataset->type)) 7701 { 7702 node->find_callback = 1; 7703 } else if (result == DNS_R_UNCHANGED) { 7704 result = ISC_R_SUCCESS; 7705 } 7706 7707 return (result); 7708 } 7709 7710 static isc_result_t 7711 beginload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) { 7712 rbtdb_load_t *loadctx; 7713 dns_rbtdb_t *rbtdb; 7714 rbtdb = (dns_rbtdb_t *)db; 7715 7716 REQUIRE(DNS_CALLBACK_VALID(callbacks)); 7717 REQUIRE(VALID_RBTDB(rbtdb)); 7718 7719 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx)); 7720 7721 loadctx->rbtdb = rbtdb; 7722 if (IS_CACHE(rbtdb)) { 7723 isc_stdtime_get(&loadctx->now); 7724 } else { 7725 loadctx->now = 0; 7726 } 7727 7728 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 7729 7730 REQUIRE((rbtdb->attributes & 7731 (RBTDB_ATTR_LOADED | RBTDB_ATTR_LOADING)) == 0); 7732 rbtdb->attributes |= RBTDB_ATTR_LOADING; 7733 7734 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 7735 7736 callbacks->add = loading_addrdataset; 7737 callbacks->add_private = loadctx; 7738 7739 return (ISC_R_SUCCESS); 7740 } 7741 7742 static isc_result_t 7743 endload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) { 7744 rbtdb_load_t *loadctx; 7745 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 7746 7747 REQUIRE(VALID_RBTDB(rbtdb)); 7748 REQUIRE(DNS_CALLBACK_VALID(callbacks)); 7749 loadctx = callbacks->add_private; 7750 REQUIRE(loadctx != NULL); 7751 REQUIRE(loadctx->rbtdb == rbtdb); 7752 7753 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 7754 7755 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0); 7756 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0); 7757 7758 rbtdb->attributes &= ~RBTDB_ATTR_LOADING; 7759 rbtdb->attributes |= RBTDB_ATTR_LOADED; 7760 7761 /* 7762 * If there's a KEY rdataset at the zone origin containing a 7763 * zone key, we consider the zone secure. 7764 */ 7765 if (!IS_CACHE(rbtdb) && rbtdb->origin_node != NULL) { 7766 dns_dbversion_t *version = rbtdb->current_version; 7767 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 7768 iszonesecure(db, version, rbtdb->origin_node); 7769 } else { 7770 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 7771 } 7772 7773 callbacks->add = NULL; 7774 callbacks->add_private = NULL; 7775 7776 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx)); 7777 7778 return (ISC_R_SUCCESS); 7779 } 7780 7781 static isc_result_t 7782 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename, 7783 dns_masterformat_t masterformat) { 7784 dns_rbtdb_t *rbtdb; 7785 rbtdb_version_t *rbtversion = version; 7786 7787 rbtdb = (dns_rbtdb_t *)db; 7788 7789 REQUIRE(VALID_RBTDB(rbtdb)); 7790 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 7791 7792 return (dns_master_dump(rbtdb->common.mctx, db, version, 7793 &dns_master_style_default, filename, 7794 masterformat, NULL)); 7795 } 7796 7797 static void 7798 delete_callback(void *data, void *arg) { 7799 dns_rbtdb_t *rbtdb = arg; 7800 rdatasetheader_t *current, *next; 7801 unsigned int locknum; 7802 7803 current = data; 7804 locknum = current->node->locknum; 7805 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); 7806 while (current != NULL) { 7807 next = current->next; 7808 free_rdataset(rbtdb, rbtdb->common.mctx, current); 7809 current = next; 7810 } 7811 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); 7812 } 7813 7814 static bool 7815 issecure(dns_db_t *db) { 7816 dns_rbtdb_t *rbtdb; 7817 bool secure; 7818 7819 rbtdb = (dns_rbtdb_t *)db; 7820 7821 REQUIRE(VALID_RBTDB(rbtdb)); 7822 7823 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 7824 secure = (rbtdb->current_version->secure == dns_db_secure); 7825 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 7826 7827 return (secure); 7828 } 7829 7830 static bool 7831 isdnssec(dns_db_t *db) { 7832 dns_rbtdb_t *rbtdb; 7833 bool dnssec; 7834 7835 rbtdb = (dns_rbtdb_t *)db; 7836 7837 REQUIRE(VALID_RBTDB(rbtdb)); 7838 7839 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 7840 dnssec = (rbtdb->current_version->secure != dns_db_insecure); 7841 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 7842 7843 return (dnssec); 7844 } 7845 7846 static unsigned int 7847 nodecount(dns_db_t *db, dns_dbtree_t tree) { 7848 dns_rbtdb_t *rbtdb; 7849 unsigned int count; 7850 7851 rbtdb = (dns_rbtdb_t *)db; 7852 7853 REQUIRE(VALID_RBTDB(rbtdb)); 7854 7855 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 7856 switch (tree) { 7857 case dns_dbtree_main: 7858 count = dns_rbt_nodecount(rbtdb->tree); 7859 break; 7860 case dns_dbtree_nsec: 7861 count = dns_rbt_nodecount(rbtdb->nsec); 7862 break; 7863 case dns_dbtree_nsec3: 7864 count = dns_rbt_nodecount(rbtdb->nsec3); 7865 break; 7866 default: 7867 UNREACHABLE(); 7868 } 7869 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 7870 7871 return (count); 7872 } 7873 7874 static size_t 7875 hashsize(dns_db_t *db) { 7876 dns_rbtdb_t *rbtdb; 7877 size_t size; 7878 7879 rbtdb = (dns_rbtdb_t *)db; 7880 7881 REQUIRE(VALID_RBTDB(rbtdb)); 7882 7883 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 7884 size = dns_rbt_hashsize(rbtdb->tree); 7885 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 7886 7887 return (size); 7888 } 7889 7890 static void 7891 settask(dns_db_t *db, isc_task_t *task, isc_task_t *prunetask) { 7892 dns_rbtdb_t *rbtdb; 7893 7894 rbtdb = (dns_rbtdb_t *)db; 7895 7896 REQUIRE(VALID_RBTDB(rbtdb)); 7897 7898 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 7899 if (rbtdb->task != NULL) { 7900 isc_task_detach(&rbtdb->task); 7901 } 7902 if (task != NULL) { 7903 isc_task_attach(task, &rbtdb->task); 7904 } 7905 if (rbtdb->prunetask != NULL) { 7906 isc_task_detach(&rbtdb->prunetask); 7907 } 7908 if (prunetask != NULL) { 7909 isc_task_attach(prunetask, &rbtdb->prunetask); 7910 } 7911 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 7912 } 7913 7914 static bool 7915 ispersistent(dns_db_t *db) { 7916 UNUSED(db); 7917 return (false); 7918 } 7919 7920 static isc_result_t 7921 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) { 7922 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 7923 dns_rbtnode_t *onode; 7924 isc_result_t result = ISC_R_SUCCESS; 7925 7926 REQUIRE(VALID_RBTDB(rbtdb)); 7927 REQUIRE(nodep != NULL && *nodep == NULL); 7928 7929 /* Note that the access to origin_node doesn't require a DB lock */ 7930 onode = (dns_rbtnode_t *)rbtdb->origin_node; 7931 if (onode != NULL) { 7932 new_reference(rbtdb, onode, isc_rwlocktype_none); 7933 *nodep = rbtdb->origin_node; 7934 } else { 7935 INSIST(IS_CACHE(rbtdb)); 7936 result = ISC_R_NOTFOUND; 7937 } 7938 7939 return (result); 7940 } 7941 7942 static isc_result_t 7943 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash, 7944 uint8_t *flags, uint16_t *iterations, unsigned char *salt, 7945 size_t *salt_length) { 7946 dns_rbtdb_t *rbtdb; 7947 isc_result_t result = ISC_R_NOTFOUND; 7948 rbtdb_version_t *rbtversion = version; 7949 7950 rbtdb = (dns_rbtdb_t *)db; 7951 7952 REQUIRE(VALID_RBTDB(rbtdb)); 7953 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 7954 7955 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 7956 if (rbtversion == NULL) { 7957 rbtversion = rbtdb->current_version; 7958 } 7959 7960 if (rbtversion->havensec3) { 7961 if (hash != NULL) { 7962 *hash = rbtversion->hash; 7963 } 7964 if (salt != NULL && salt_length != NULL) { 7965 REQUIRE(*salt_length >= rbtversion->salt_length); 7966 memmove(salt, rbtversion->salt, 7967 rbtversion->salt_length); 7968 } 7969 if (salt_length != NULL) { 7970 *salt_length = rbtversion->salt_length; 7971 } 7972 if (iterations != NULL) { 7973 *iterations = rbtversion->iterations; 7974 } 7975 if (flags != NULL) { 7976 *flags = rbtversion->flags; 7977 } 7978 result = ISC_R_SUCCESS; 7979 } 7980 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 7981 7982 return (result); 7983 } 7984 7985 static isc_result_t 7986 getsize(dns_db_t *db, dns_dbversion_t *version, uint64_t *records, 7987 uint64_t *xfrsize) { 7988 dns_rbtdb_t *rbtdb; 7989 isc_result_t result = ISC_R_SUCCESS; 7990 rbtdb_version_t *rbtversion = version; 7991 7992 rbtdb = (dns_rbtdb_t *)db; 7993 7994 REQUIRE(VALID_RBTDB(rbtdb)); 7995 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 7996 7997 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 7998 if (rbtversion == NULL) { 7999 rbtversion = rbtdb->current_version; 8000 } 8001 8002 RWLOCK(&rbtversion->rwlock, isc_rwlocktype_read); 8003 if (records != NULL) { 8004 *records = rbtversion->records; 8005 } 8006 8007 if (xfrsize != NULL) { 8008 *xfrsize = rbtversion->xfrsize; 8009 } 8010 RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_read); 8011 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 8012 8013 return (result); 8014 } 8015 8016 static isc_result_t 8017 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) { 8018 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8019 rdatasetheader_t *header, oldheader; 8020 8021 REQUIRE(VALID_RBTDB(rbtdb)); 8022 REQUIRE(!IS_CACHE(rbtdb)); 8023 REQUIRE(rdataset != NULL); 8024 8025 header = rdataset->private3; 8026 header--; 8027 8028 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock, 8029 isc_rwlocktype_write); 8030 8031 oldheader = *header; 8032 /* 8033 * Only break the heap invariant (by adjusting resign and resign_lsb) 8034 * if we are going to be restoring it by calling isc_heap_increased 8035 * or isc_heap_decreased. 8036 */ 8037 if (resign != 0) { 8038 header->resign = (isc_stdtime_t)(dns_time64_from32(resign) >> 8039 1); 8040 header->resign_lsb = resign & 0x1; 8041 } 8042 if (header->heap_index != 0) { 8043 INSIST(RESIGN(header)); 8044 if (resign == 0) { 8045 isc_heap_delete(rbtdb->heaps[header->node->locknum], 8046 header->heap_index); 8047 header->heap_index = 0; 8048 } else if (resign_sooner(header, &oldheader)) { 8049 isc_heap_increased(rbtdb->heaps[header->node->locknum], 8050 header->heap_index); 8051 } else if (resign_sooner(&oldheader, header)) { 8052 isc_heap_decreased(rbtdb->heaps[header->node->locknum], 8053 header->heap_index); 8054 } 8055 } else if (resign != 0) { 8056 RDATASET_ATTR_SET(header, RDATASET_ATTR_RESIGN); 8057 resign_insert(rbtdb, header->node->locknum, header); 8058 } 8059 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock, 8060 isc_rwlocktype_write); 8061 return (ISC_R_SUCCESS); 8062 } 8063 8064 static isc_result_t 8065 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, dns_name_t *foundname) { 8066 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8067 rdatasetheader_t *header = NULL, *this; 8068 unsigned int i; 8069 isc_result_t result = ISC_R_NOTFOUND; 8070 unsigned int locknum = 0; 8071 8072 REQUIRE(VALID_RBTDB(rbtdb)); 8073 8074 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 8075 8076 for (i = 0; i < rbtdb->node_lock_count; i++) { 8077 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read); 8078 8079 /* 8080 * Find for the earliest signing time among all of the 8081 * heaps, each of which is covered by a different bucket 8082 * lock. 8083 */ 8084 this = isc_heap_element(rbtdb->heaps[i], 1); 8085 if (this == NULL) { 8086 /* Nothing found; unlock and try the next heap. */ 8087 NODE_UNLOCK(&rbtdb->node_locks[i].lock, 8088 isc_rwlocktype_read); 8089 continue; 8090 } 8091 8092 if (header == NULL) { 8093 /* 8094 * Found a signing time: retain the bucket lock and 8095 * preserve the lock number so we can unlock it 8096 * later. 8097 */ 8098 header = this; 8099 locknum = i; 8100 } else if (resign_sooner(this, header)) { 8101 /* 8102 * Found an earlier signing time; release the 8103 * previous bucket lock and retain this one instead. 8104 */ 8105 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, 8106 isc_rwlocktype_read); 8107 header = this; 8108 locknum = i; 8109 } else { 8110 /* 8111 * Earliest signing time in this heap isn't 8112 * an improvement; unlock and try the next heap. 8113 */ 8114 NODE_UNLOCK(&rbtdb->node_locks[i].lock, 8115 isc_rwlocktype_read); 8116 } 8117 } 8118 8119 if (header != NULL) { 8120 /* 8121 * Found something; pass back the answer and unlock 8122 * the bucket. 8123 */ 8124 bind_rdataset(rbtdb, header->node, header, 0, 8125 isc_rwlocktype_read, rdataset); 8126 8127 if (foundname != NULL) { 8128 dns_rbt_fullnamefromnode(header->node, foundname); 8129 } 8130 8131 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, 8132 isc_rwlocktype_read); 8133 8134 result = ISC_R_SUCCESS; 8135 } 8136 8137 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 8138 8139 return (result); 8140 } 8141 8142 static void 8143 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version) { 8144 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version; 8145 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8146 dns_rbtnode_t *node; 8147 rdatasetheader_t *header; 8148 8149 REQUIRE(VALID_RBTDB(rbtdb)); 8150 REQUIRE(rdataset != NULL); 8151 REQUIRE(rdataset->methods == &rdataset_methods); 8152 REQUIRE(rbtdb->future_version == rbtversion); 8153 REQUIRE(rbtversion != NULL); 8154 REQUIRE(rbtversion->writer); 8155 REQUIRE(rbtversion->rbtdb == rbtdb); 8156 8157 node = rdataset->private2; 8158 INSIST(node != NULL); 8159 header = rdataset->private3; 8160 INSIST(header != NULL); 8161 header--; 8162 8163 if (header->heap_index == 0) { 8164 return; 8165 } 8166 8167 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 8168 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock, isc_rwlocktype_write); 8169 /* 8170 * Delete from heap and save to re-signed list so that it can 8171 * be restored if we backout of this change. 8172 */ 8173 resign_delete(rbtdb, rbtversion, header); 8174 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock, 8175 isc_rwlocktype_write); 8176 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 8177 } 8178 8179 static isc_result_t 8180 setcachestats(dns_db_t *db, isc_stats_t *stats) { 8181 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8182 8183 REQUIRE(VALID_RBTDB(rbtdb)); 8184 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ 8185 REQUIRE(stats != NULL); 8186 8187 isc_stats_attach(stats, &rbtdb->cachestats); 8188 return (ISC_R_SUCCESS); 8189 } 8190 8191 static isc_result_t 8192 setgluecachestats(dns_db_t *db, isc_stats_t *stats) { 8193 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8194 8195 REQUIRE(VALID_RBTDB(rbtdb)); 8196 REQUIRE(!IS_CACHE(rbtdb) && !IS_STUB(rbtdb)); 8197 REQUIRE(stats != NULL); 8198 8199 isc_stats_attach(stats, &rbtdb->gluecachestats); 8200 return (ISC_R_SUCCESS); 8201 } 8202 8203 static void 8204 setmaxrrperset(dns_db_t *db, uint32_t maxrrperset) { 8205 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8206 8207 REQUIRE(VALID_RBTDB(rbtdb)); 8208 8209 rbtdb->maxrrperset = maxrrperset; 8210 } 8211 8212 static void 8213 setmaxtypepername(dns_db_t *db, uint32_t maxtypepername) { 8214 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8215 8216 REQUIRE(VALID_RBTDB(rbtdb)); 8217 8218 rbtdb->maxtypepername = maxtypepername; 8219 } 8220 8221 static dns_stats_t * 8222 getrrsetstats(dns_db_t *db) { 8223 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8224 8225 REQUIRE(VALID_RBTDB(rbtdb)); 8226 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ 8227 8228 return (rbtdb->rrsetstats); 8229 } 8230 8231 static isc_result_t 8232 nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) { 8233 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8234 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 8235 isc_result_t result; 8236 8237 REQUIRE(VALID_RBTDB(rbtdb)); 8238 REQUIRE(node != NULL); 8239 REQUIRE(name != NULL); 8240 8241 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 8242 result = dns_rbt_fullnamefromnode(rbtnode, name); 8243 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 8244 8245 return (result); 8246 } 8247 8248 static isc_result_t 8249 setservestalettl(dns_db_t *db, dns_ttl_t ttl) { 8250 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8251 8252 REQUIRE(VALID_RBTDB(rbtdb)); 8253 REQUIRE(IS_CACHE(rbtdb)); 8254 8255 /* currently no bounds checking. 0 means disable. */ 8256 rbtdb->serve_stale_ttl = ttl; 8257 return (ISC_R_SUCCESS); 8258 } 8259 8260 static isc_result_t 8261 getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { 8262 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8263 8264 REQUIRE(VALID_RBTDB(rbtdb)); 8265 REQUIRE(IS_CACHE(rbtdb)); 8266 8267 *ttl = rbtdb->serve_stale_ttl; 8268 return (ISC_R_SUCCESS); 8269 } 8270 8271 static isc_result_t 8272 setservestalerefresh(dns_db_t *db, uint32_t interval) { 8273 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8274 8275 REQUIRE(VALID_RBTDB(rbtdb)); 8276 REQUIRE(IS_CACHE(rbtdb)); 8277 8278 /* currently no bounds checking. 0 means disable. */ 8279 rbtdb->serve_stale_refresh = interval; 8280 return (ISC_R_SUCCESS); 8281 } 8282 8283 static isc_result_t 8284 getservestalerefresh(dns_db_t *db, uint32_t *interval) { 8285 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8286 8287 REQUIRE(VALID_RBTDB(rbtdb)); 8288 REQUIRE(IS_CACHE(rbtdb)); 8289 8290 *interval = rbtdb->serve_stale_refresh; 8291 return (ISC_R_SUCCESS); 8292 } 8293 8294 static dns_dbmethods_t zone_methods = { attach, 8295 detach, 8296 beginload, 8297 endload, 8298 dump, 8299 currentversion, 8300 newversion, 8301 attachversion, 8302 closeversion, 8303 findnode, 8304 zone_find, 8305 zone_findzonecut, 8306 attachnode, 8307 detachnode, 8308 expirenode, 8309 printnode, 8310 createiterator, 8311 zone_findrdataset, 8312 allrdatasets, 8313 addrdataset, 8314 subtractrdataset, 8315 deleterdataset, 8316 issecure, 8317 nodecount, 8318 ispersistent, 8319 overmem, 8320 settask, 8321 getoriginnode, 8322 NULL, /* transfernode */ 8323 getnsec3parameters, 8324 findnsec3node, 8325 setsigningtime, 8326 getsigningtime, 8327 resigned, 8328 isdnssec, 8329 NULL, /* getrrsetstats */ 8330 NULL, /* rpz_attach */ 8331 NULL, /* rpz_ready */ 8332 NULL, /* findnodeext */ 8333 NULL, /* findext */ 8334 NULL, /* setcachestats */ 8335 hashsize, 8336 nodefullname, 8337 getsize, 8338 NULL, /* setservestalettl */ 8339 NULL, /* getservestalettl */ 8340 NULL, /* setservestalerefresh */ 8341 NULL, /* getservestalerefresh */ 8342 setgluecachestats, 8343 setmaxrrperset, 8344 setmaxtypepername }; 8345 8346 static dns_dbmethods_t cache_methods = { attach, 8347 detach, 8348 beginload, 8349 endload, 8350 dump, 8351 currentversion, 8352 newversion, 8353 attachversion, 8354 closeversion, 8355 findnode, 8356 cache_find, 8357 cache_findzonecut, 8358 attachnode, 8359 detachnode, 8360 expirenode, 8361 printnode, 8362 createiterator, 8363 cache_findrdataset, 8364 allrdatasets, 8365 addrdataset, 8366 subtractrdataset, 8367 deleterdataset, 8368 issecure, 8369 nodecount, 8370 ispersistent, 8371 overmem, 8372 settask, 8373 getoriginnode, 8374 NULL, /* transfernode */ 8375 NULL, /* getnsec3parameters */ 8376 NULL, /* findnsec3node */ 8377 NULL, /* setsigningtime */ 8378 NULL, /* getsigningtime */ 8379 NULL, /* resigned */ 8380 isdnssec, 8381 getrrsetstats, 8382 NULL, /* rpz_attach */ 8383 NULL, /* rpz_ready */ 8384 NULL, /* findnodeext */ 8385 NULL, /* findext */ 8386 setcachestats, 8387 hashsize, 8388 nodefullname, 8389 NULL, /* getsize */ 8390 setservestalettl, 8391 getservestalettl, 8392 setservestalerefresh, 8393 getservestalerefresh, 8394 NULL, 8395 setmaxrrperset, 8396 setmaxtypepername }; 8397 8398 isc_result_t 8399 dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type, 8400 dns_rdataclass_t rdclass, unsigned int argc, char *argv[], 8401 void *driverarg, dns_db_t **dbp) { 8402 dns_rbtdb_t *rbtdb; 8403 isc_result_t result; 8404 int i; 8405 dns_name_t name; 8406 bool (*sooner)(void *, void *); 8407 isc_mem_t *hmctx = mctx; 8408 8409 /* Keep the compiler happy. */ 8410 UNUSED(driverarg); 8411 8412 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb)); 8413 8414 /* 8415 * If argv[0] exists, it points to a memory context to use for heap 8416 */ 8417 if (argc != 0) { 8418 hmctx = (isc_mem_t *)argv[0]; 8419 } 8420 8421 memset(rbtdb, '\0', sizeof(*rbtdb)); 8422 dns_name_init(&rbtdb->common.origin, NULL); 8423 rbtdb->common.attributes = 0; 8424 if (type == dns_dbtype_cache) { 8425 rbtdb->common.methods = &cache_methods; 8426 rbtdb->common.attributes |= DNS_DBATTR_CACHE; 8427 } else if (type == dns_dbtype_stub) { 8428 rbtdb->common.methods = &zone_methods; 8429 rbtdb->common.attributes |= DNS_DBATTR_STUB; 8430 } else { 8431 rbtdb->common.methods = &zone_methods; 8432 } 8433 rbtdb->common.rdclass = rdclass; 8434 rbtdb->common.mctx = NULL; 8435 8436 ISC_LIST_INIT(rbtdb->common.update_listeners); 8437 8438 RBTDB_INITLOCK(&rbtdb->lock); 8439 8440 isc_rwlock_init(&rbtdb->tree_lock, 0, 0); 8441 8442 /* 8443 * Initialize node_lock_count in a generic way to support future 8444 * extension which allows the user to specify this value on creation. 8445 * Note that when specified for a cache DB it must be larger than 1 8446 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT. 8447 */ 8448 if (rbtdb->node_lock_count == 0) { 8449 if (IS_CACHE(rbtdb)) { 8450 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT; 8451 } else { 8452 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT; 8453 } 8454 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) { 8455 result = ISC_R_RANGE; 8456 goto cleanup_tree_lock; 8457 } 8458 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH)); 8459 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count * 8460 sizeof(rbtdb_nodelock_t)); 8461 8462 rbtdb->cachestats = NULL; 8463 rbtdb->gluecachestats = NULL; 8464 8465 rbtdb->rrsetstats = NULL; 8466 if (IS_CACHE(rbtdb)) { 8467 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats); 8468 if (result != ISC_R_SUCCESS) { 8469 goto cleanup_node_locks; 8470 } 8471 rbtdb->rdatasets = isc_mem_get( 8472 mctx, 8473 rbtdb->node_lock_count * sizeof(rdatasetheaderlist_t)); 8474 for (i = 0; i < (int)rbtdb->node_lock_count; i++) { 8475 ISC_LIST_INIT(rbtdb->rdatasets[i]); 8476 } 8477 } else { 8478 rbtdb->rdatasets = NULL; 8479 } 8480 8481 /* 8482 * Create the heaps. 8483 */ 8484 rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count * 8485 sizeof(isc_heap_t *)); 8486 for (i = 0; i < (int)rbtdb->node_lock_count; i++) { 8487 rbtdb->heaps[i] = NULL; 8488 } 8489 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner; 8490 for (i = 0; i < (int)rbtdb->node_lock_count; i++) { 8491 isc_heap_create(hmctx, sooner, set_index, 0, &rbtdb->heaps[i]); 8492 } 8493 8494 /* 8495 * Create deadnode lists. 8496 */ 8497 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count * 8498 sizeof(rbtnodelist_t)); 8499 for (i = 0; i < (int)rbtdb->node_lock_count; i++) { 8500 ISC_LIST_INIT(rbtdb->deadnodes[i]); 8501 } 8502 8503 rbtdb->active = rbtdb->node_lock_count; 8504 8505 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) { 8506 NODE_INITLOCK(&rbtdb->node_locks[i].lock); 8507 isc_refcount_init(&rbtdb->node_locks[i].references, 0); 8508 rbtdb->node_locks[i].exiting = false; 8509 } 8510 8511 /* 8512 * Attach to the mctx. The database will persist so long as there 8513 * are references to it, and attaching to the mctx ensures that our 8514 * mctx won't disappear out from under us. 8515 */ 8516 isc_mem_attach(mctx, &rbtdb->common.mctx); 8517 isc_mem_attach(hmctx, &rbtdb->hmctx); 8518 8519 /* 8520 * Make a copy of the origin name. 8521 */ 8522 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin); 8523 if (result != ISC_R_SUCCESS) { 8524 free_rbtdb(rbtdb, false, NULL); 8525 return (result); 8526 } 8527 8528 /* 8529 * Make the Red-Black Trees. 8530 */ 8531 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree); 8532 if (result != ISC_R_SUCCESS) { 8533 free_rbtdb(rbtdb, false, NULL); 8534 return (result); 8535 } 8536 8537 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec); 8538 if (result != ISC_R_SUCCESS) { 8539 free_rbtdb(rbtdb, false, NULL); 8540 return (result); 8541 } 8542 8543 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3); 8544 if (result != ISC_R_SUCCESS) { 8545 free_rbtdb(rbtdb, false, NULL); 8546 return (result); 8547 } 8548 8549 /* 8550 * In order to set the node callback bit correctly in zone databases, 8551 * we need to know if the node has the origin name of the zone. 8552 * In loading_addrdataset() we could simply compare the new name 8553 * to the origin name, but this is expensive. Also, we don't know the 8554 * node name in addrdataset(), so we need another way of knowing the 8555 * zone's top. 8556 * 8557 * We now explicitly create a node for the zone's origin, and then 8558 * we simply remember the node's address. This is safe, because 8559 * the top-of-zone node can never be deleted, nor can its address 8560 * change. 8561 */ 8562 if (!IS_CACHE(rbtdb)) { 8563 rbtdb->origin_node = NULL; 8564 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin, 8565 &rbtdb->origin_node); 8566 if (result != ISC_R_SUCCESS) { 8567 INSIST(result != ISC_R_EXISTS); 8568 free_rbtdb(rbtdb, false, NULL); 8569 return (result); 8570 } 8571 INSIST(rbtdb->origin_node != NULL); 8572 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL; 8573 /* 8574 * We need to give the origin node the right locknum. 8575 */ 8576 dns_name_init(&name, NULL); 8577 dns_rbt_namefromnode(rbtdb->origin_node, &name); 8578 rbtdb->origin_node->locknum = rbtdb->origin_node->hashval % 8579 rbtdb->node_lock_count; 8580 /* 8581 * Add an apex node to the NSEC3 tree so that NSEC3 searches 8582 * return partial matches when there is only a single NSEC3 8583 * record in the tree. 8584 */ 8585 rbtdb->nsec3_origin_node = NULL; 8586 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin, 8587 &rbtdb->nsec3_origin_node); 8588 if (result != ISC_R_SUCCESS) { 8589 INSIST(result != ISC_R_EXISTS); 8590 free_rbtdb(rbtdb, false, NULL); 8591 return (result); 8592 } 8593 rbtdb->nsec3_origin_node->nsec = DNS_RBT_NSEC_NSEC3; 8594 /* 8595 * We need to give the nsec3 origin node the right locknum. 8596 */ 8597 dns_name_init(&name, NULL); 8598 dns_rbt_namefromnode(rbtdb->nsec3_origin_node, &name); 8599 rbtdb->nsec3_origin_node->locknum = 8600 rbtdb->nsec3_origin_node->hashval % 8601 rbtdb->node_lock_count; 8602 } 8603 8604 /* 8605 * Misc. Initialization. 8606 */ 8607 isc_refcount_init(&rbtdb->references, 1); 8608 rbtdb->attributes = 0; 8609 rbtdb->task = NULL; 8610 rbtdb->prunetask = NULL; 8611 rbtdb->serve_stale_ttl = 0; 8612 8613 /* 8614 * Version Initialization. 8615 */ 8616 rbtdb->current_serial = 1; 8617 rbtdb->least_serial = 1; 8618 rbtdb->next_serial = 2; 8619 rbtdb->current_version = allocate_version(mctx, 1, 1, false); 8620 rbtdb->current_version->rbtdb = rbtdb; 8621 rbtdb->current_version->secure = dns_db_insecure; 8622 rbtdb->current_version->havensec3 = false; 8623 rbtdb->current_version->flags = 0; 8624 rbtdb->current_version->iterations = 0; 8625 rbtdb->current_version->hash = 0; 8626 rbtdb->current_version->salt_length = 0; 8627 memset(rbtdb->current_version->salt, 0, 8628 sizeof(rbtdb->current_version->salt)); 8629 isc_rwlock_init(&rbtdb->current_version->rwlock, 0, 0); 8630 rbtdb->current_version->records = 0; 8631 rbtdb->current_version->xfrsize = 0; 8632 rbtdb->future_version = NULL; 8633 ISC_LIST_INIT(rbtdb->open_versions); 8634 /* 8635 * Keep the current version in the open list so that list operation 8636 * won't happen in normal lookup operations. 8637 */ 8638 PREPEND(rbtdb->open_versions, rbtdb->current_version, link); 8639 8640 rbtdb->common.magic = DNS_DB_MAGIC; 8641 rbtdb->common.impmagic = RBTDB_MAGIC; 8642 8643 *dbp = (dns_db_t *)rbtdb; 8644 8645 return (ISC_R_SUCCESS); 8646 8647 cleanup_node_locks: 8648 isc_mem_put(mctx, rbtdb->node_locks, 8649 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t)); 8650 8651 cleanup_tree_lock: 8652 isc_rwlock_destroy(&rbtdb->tree_lock); 8653 RBTDB_DESTROYLOCK(&rbtdb->lock); 8654 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb)); 8655 return (result); 8656 } 8657 8658 /* 8659 * Slabbed Rdataset Methods 8660 */ 8661 8662 static void 8663 rdataset_disassociate(dns_rdataset_t *rdataset) { 8664 dns_db_t *db = rdataset->private1; 8665 dns_dbnode_t *node = rdataset->private2; 8666 8667 detachnode(db, &node); 8668 } 8669 8670 static isc_result_t 8671 rdataset_first(dns_rdataset_t *rdataset) { 8672 unsigned char *raw = rdataset->private3; /* RDATASLAB */ 8673 unsigned int count; 8674 8675 count = raw[0] * 256 + raw[1]; 8676 if (count == 0) { 8677 rdataset->private5 = NULL; 8678 return (ISC_R_NOMORE); 8679 } 8680 8681 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) { 8682 raw += DNS_RDATASET_COUNT; 8683 } 8684 8685 raw += DNS_RDATASET_LENGTH; 8686 8687 /* 8688 * The privateuint4 field is the number of rdata beyond the 8689 * cursor position, so we decrement the total count by one 8690 * before storing it. 8691 * 8692 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the 8693 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points 8694 * to the first entry in the offset table. 8695 */ 8696 count--; 8697 rdataset->privateuint4 = count; 8698 rdataset->private5 = raw; 8699 8700 return (ISC_R_SUCCESS); 8701 } 8702 8703 static isc_result_t 8704 rdataset_next(dns_rdataset_t *rdataset) { 8705 unsigned int count; 8706 unsigned int length; 8707 unsigned char *raw; /* RDATASLAB */ 8708 8709 count = rdataset->privateuint4; 8710 if (count == 0) { 8711 return (ISC_R_NOMORE); 8712 } 8713 count--; 8714 rdataset->privateuint4 = count; 8715 8716 /* 8717 * Skip forward one record (length + 4) or one offset (4). 8718 */ 8719 raw = rdataset->private5; 8720 #if DNS_RDATASET_FIXED 8721 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) 8722 #endif /* DNS_RDATASET_FIXED */ 8723 { 8724 length = raw[0] * 256 + raw[1]; 8725 raw += length; 8726 } 8727 8728 rdataset->private5 = raw + DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; 8729 8730 return (ISC_R_SUCCESS); 8731 } 8732 8733 static void 8734 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) { 8735 unsigned char *raw = rdataset->private5; /* RDATASLAB */ 8736 unsigned int length; 8737 isc_region_t r; 8738 unsigned int flags = 0; 8739 8740 REQUIRE(raw != NULL); 8741 8742 /* 8743 * Find the start of the record if not already in private5 8744 * then skip the length and order fields. 8745 */ 8746 #if DNS_RDATASET_FIXED 8747 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) { 8748 unsigned int offset; 8749 offset = ((unsigned int)raw[0] << 24) + 8750 ((unsigned int)raw[1] << 16) + 8751 ((unsigned int)raw[2] << 8) + (unsigned int)raw[3]; 8752 raw = rdataset->private3; 8753 raw += offset; 8754 } 8755 #endif /* if DNS_RDATASET_FIXED */ 8756 8757 length = raw[0] * 256 + raw[1]; 8758 8759 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; 8760 8761 if (rdataset->type == dns_rdatatype_rrsig) { 8762 if (*raw & DNS_RDATASLAB_OFFLINE) { 8763 flags |= DNS_RDATA_OFFLINE; 8764 } 8765 length--; 8766 raw++; 8767 } 8768 r.length = length; 8769 r.base = raw; 8770 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r); 8771 rdata->flags |= flags; 8772 } 8773 8774 static void 8775 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) { 8776 dns_db_t *db = source->private1; 8777 dns_dbnode_t *node = source->private2; 8778 dns_dbnode_t *cloned_node = NULL; 8779 8780 attachnode(db, node, &cloned_node); 8781 INSIST(!ISC_LINK_LINKED(target, link)); 8782 *target = *source; 8783 ISC_LINK_INIT(target, link); 8784 8785 /* 8786 * Reset iterator state. 8787 */ 8788 target->privateuint4 = 0; 8789 target->private5 = NULL; 8790 } 8791 8792 static unsigned int 8793 rdataset_count(dns_rdataset_t *rdataset) { 8794 unsigned char *raw = rdataset->private3; /* RDATASLAB */ 8795 unsigned int count; 8796 8797 count = raw[0] * 256 + raw[1]; 8798 8799 return (count); 8800 } 8801 8802 static isc_result_t 8803 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name, 8804 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig) { 8805 dns_db_t *db = rdataset->private1; 8806 dns_dbnode_t *node = rdataset->private2; 8807 dns_dbnode_t *cloned_node; 8808 const struct noqname *noqname = rdataset->private6; 8809 8810 cloned_node = NULL; 8811 attachnode(db, node, &cloned_node); 8812 nsec->methods = &slab_methods; 8813 nsec->rdclass = db->rdclass; 8814 nsec->type = noqname->type; 8815 nsec->covers = 0; 8816 nsec->ttl = rdataset->ttl; 8817 nsec->trust = rdataset->trust; 8818 nsec->private1 = rdataset->private1; 8819 nsec->private2 = rdataset->private2; 8820 nsec->private3 = noqname->neg; 8821 nsec->privateuint4 = 0; 8822 nsec->private5 = NULL; 8823 nsec->private6 = NULL; 8824 nsec->private7 = NULL; 8825 8826 cloned_node = NULL; 8827 attachnode(db, node, &cloned_node); 8828 nsecsig->methods = &slab_methods; 8829 nsecsig->rdclass = db->rdclass; 8830 nsecsig->type = dns_rdatatype_rrsig; 8831 nsecsig->covers = noqname->type; 8832 nsecsig->ttl = rdataset->ttl; 8833 nsecsig->trust = rdataset->trust; 8834 nsecsig->private1 = rdataset->private1; 8835 nsecsig->private2 = rdataset->private2; 8836 nsecsig->private3 = noqname->negsig; 8837 nsecsig->privateuint4 = 0; 8838 nsecsig->private5 = NULL; 8839 nsec->private6 = NULL; 8840 nsec->private7 = NULL; 8841 8842 dns_name_clone(&noqname->name, name); 8843 8844 return (ISC_R_SUCCESS); 8845 } 8846 8847 static isc_result_t 8848 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name, 8849 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig) { 8850 dns_db_t *db = rdataset->private1; 8851 dns_dbnode_t *node = rdataset->private2; 8852 dns_dbnode_t *cloned_node; 8853 const struct noqname *closest = rdataset->private7; 8854 8855 cloned_node = NULL; 8856 attachnode(db, node, &cloned_node); 8857 nsec->methods = &slab_methods; 8858 nsec->rdclass = db->rdclass; 8859 nsec->type = closest->type; 8860 nsec->covers = 0; 8861 nsec->ttl = rdataset->ttl; 8862 nsec->trust = rdataset->trust; 8863 nsec->private1 = rdataset->private1; 8864 nsec->private2 = rdataset->private2; 8865 nsec->private3 = closest->neg; 8866 nsec->privateuint4 = 0; 8867 nsec->private5 = NULL; 8868 nsec->private6 = NULL; 8869 nsec->private7 = NULL; 8870 8871 cloned_node = NULL; 8872 attachnode(db, node, &cloned_node); 8873 nsecsig->methods = &slab_methods; 8874 nsecsig->rdclass = db->rdclass; 8875 nsecsig->type = dns_rdatatype_rrsig; 8876 nsecsig->covers = closest->type; 8877 nsecsig->ttl = rdataset->ttl; 8878 nsecsig->trust = rdataset->trust; 8879 nsecsig->private1 = rdataset->private1; 8880 nsecsig->private2 = rdataset->private2; 8881 nsecsig->private3 = closest->negsig; 8882 nsecsig->privateuint4 = 0; 8883 nsecsig->private5 = NULL; 8884 nsec->private6 = NULL; 8885 nsec->private7 = NULL; 8886 8887 dns_name_clone(&closest->name, name); 8888 8889 return (ISC_R_SUCCESS); 8890 } 8891 8892 static void 8893 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) { 8894 dns_rbtdb_t *rbtdb = rdataset->private1; 8895 dns_rbtnode_t *rbtnode = rdataset->private2; 8896 rdatasetheader_t *header = rdataset->private3; 8897 8898 header--; 8899 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 8900 isc_rwlocktype_write); 8901 header->trust = rdataset->trust = trust; 8902 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 8903 isc_rwlocktype_write); 8904 } 8905 8906 static void 8907 rdataset_expire(dns_rdataset_t *rdataset) { 8908 dns_rbtdb_t *rbtdb = rdataset->private1; 8909 dns_rbtnode_t *rbtnode = rdataset->private2; 8910 rdatasetheader_t *header = rdataset->private3; 8911 8912 header--; 8913 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 8914 isc_rwlocktype_write); 8915 expire_header(rbtdb, header, false, expire_flush); 8916 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 8917 isc_rwlocktype_write); 8918 } 8919 8920 static void 8921 rdataset_clearprefetch(dns_rdataset_t *rdataset) { 8922 dns_rbtdb_t *rbtdb = rdataset->private1; 8923 dns_rbtnode_t *rbtnode = rdataset->private2; 8924 rdatasetheader_t *header = rdataset->private3; 8925 8926 header--; 8927 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 8928 isc_rwlocktype_write); 8929 RDATASET_ATTR_CLR(header, RDATASET_ATTR_PREFETCH); 8930 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 8931 isc_rwlocktype_write); 8932 } 8933 8934 /* 8935 * Rdataset Iterator Methods 8936 */ 8937 8938 static void 8939 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) { 8940 rbtdb_rdatasetiter_t *rbtiterator; 8941 8942 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp); 8943 8944 if (rbtiterator->common.version != NULL) { 8945 closeversion(rbtiterator->common.db, 8946 &rbtiterator->common.version, false); 8947 } 8948 detachnode(rbtiterator->common.db, &rbtiterator->common.node); 8949 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator, 8950 sizeof(*rbtiterator)); 8951 8952 *iteratorp = NULL; 8953 } 8954 8955 static bool 8956 iterator_active(dns_rbtdb_t *rbtdb, rbtdb_rdatasetiter_t *rbtiterator, 8957 rdatasetheader_t *header) { 8958 dns_ttl_t stale_ttl = header->rdh_ttl + STALE_TTL(header, rbtdb); 8959 8960 /* 8961 * Is this a "this rdataset doesn't exist" record? 8962 */ 8963 if (NONEXISTENT(header)) { 8964 return (false); 8965 } 8966 8967 /* 8968 * If this is a zone or this header still active then return it. 8969 */ 8970 if (!IS_CACHE(rbtdb) || ACTIVE(header, rbtiterator->common.now)) { 8971 return (true); 8972 } 8973 8974 /* 8975 * If we are not returning stale records or the rdataset is 8976 * too old don't return it. 8977 */ 8978 if (!STALEOK(rbtiterator) || (rbtiterator->common.now > stale_ttl)) { 8979 return (false); 8980 } 8981 return (true); 8982 } 8983 8984 static isc_result_t 8985 rdatasetiter_first(dns_rdatasetiter_t *iterator) { 8986 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator; 8987 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db); 8988 dns_rbtnode_t *rbtnode = rbtiterator->common.node; 8989 rbtdb_version_t *rbtversion = rbtiterator->common.version; 8990 rdatasetheader_t *header, *top_next; 8991 rbtdb_serial_t serial = IS_CACHE(rbtdb) ? 1 : rbtversion->serial; 8992 8993 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 8994 isc_rwlocktype_read); 8995 8996 for (header = rbtnode->data; header != NULL; header = top_next) { 8997 top_next = header->next; 8998 do { 8999 if (EXPIREDOK(rbtiterator)) { 9000 if (!NONEXISTENT(header)) { 9001 break; 9002 } 9003 header = header->down; 9004 } else if (header->serial <= serial && !IGNORE(header)) 9005 { 9006 if (!iterator_active(rbtdb, rbtiterator, 9007 header)) 9008 { 9009 header = NULL; 9010 } 9011 break; 9012 } else { 9013 header = header->down; 9014 } 9015 } while (header != NULL); 9016 if (header != NULL) { 9017 break; 9018 } 9019 } 9020 9021 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9022 isc_rwlocktype_read); 9023 9024 rbtiterator->current = header; 9025 9026 if (header == NULL) { 9027 return (ISC_R_NOMORE); 9028 } 9029 9030 return (ISC_R_SUCCESS); 9031 } 9032 9033 static isc_result_t 9034 rdatasetiter_next(dns_rdatasetiter_t *iterator) { 9035 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator; 9036 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db); 9037 dns_rbtnode_t *rbtnode = rbtiterator->common.node; 9038 rbtdb_version_t *rbtversion = rbtiterator->common.version; 9039 rdatasetheader_t *header, *top_next; 9040 rbtdb_serial_t serial = IS_CACHE(rbtdb) ? 1 : rbtversion->serial; 9041 rbtdb_rdatatype_t type, negtype; 9042 dns_rdatatype_t rdtype, covers; 9043 bool expiredok = EXPIREDOK(rbtiterator); 9044 9045 header = rbtiterator->current; 9046 if (header == NULL) { 9047 return (ISC_R_NOMORE); 9048 } 9049 9050 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9051 isc_rwlocktype_read); 9052 9053 type = header->type; 9054 rdtype = RBTDB_RDATATYPE_BASE(header->type); 9055 if (NEGATIVE(header)) { 9056 covers = RBTDB_RDATATYPE_EXT(header->type); 9057 negtype = RBTDB_RDATATYPE_VALUE(covers, 0); 9058 } else { 9059 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype); 9060 } 9061 9062 /* 9063 * Find the start of the header chain for the next type 9064 * by walking back up the list. 9065 */ 9066 top_next = header->next; 9067 while (top_next != NULL && 9068 (top_next->type == type || top_next->type == negtype)) 9069 { 9070 top_next = top_next->next; 9071 } 9072 if (expiredok) { 9073 /* 9074 * Keep walking down the list if possible or 9075 * start the next type. 9076 */ 9077 header = header->down != NULL ? header->down : top_next; 9078 } else { 9079 header = top_next; 9080 } 9081 for (; header != NULL; header = top_next) { 9082 top_next = header->next; 9083 do { 9084 if (expiredok) { 9085 if (!NONEXISTENT(header)) { 9086 break; 9087 } 9088 header = header->down; 9089 } else if (header->serial <= serial && !IGNORE(header)) 9090 { 9091 if (!iterator_active(rbtdb, rbtiterator, 9092 header)) 9093 { 9094 header = NULL; 9095 } 9096 break; 9097 } else { 9098 header = header->down; 9099 } 9100 } while (header != NULL); 9101 if (header != NULL) { 9102 break; 9103 } 9104 /* 9105 * Find the start of the header chain for the next type 9106 * by walking back up the list. 9107 */ 9108 while (top_next != NULL && 9109 (top_next->type == type || top_next->type == negtype)) 9110 { 9111 top_next = top_next->next; 9112 } 9113 } 9114 9115 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9116 isc_rwlocktype_read); 9117 9118 rbtiterator->current = header; 9119 9120 if (header == NULL) { 9121 return (ISC_R_NOMORE); 9122 } 9123 9124 return (ISC_R_SUCCESS); 9125 } 9126 9127 static void 9128 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) { 9129 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator; 9130 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db); 9131 dns_rbtnode_t *rbtnode = rbtiterator->common.node; 9132 rdatasetheader_t *header; 9133 9134 header = rbtiterator->current; 9135 REQUIRE(header != NULL); 9136 9137 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9138 isc_rwlocktype_read); 9139 9140 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now, 9141 isc_rwlocktype_read, rdataset); 9142 9143 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9144 isc_rwlocktype_read); 9145 } 9146 9147 /* 9148 * Database Iterator Methods 9149 */ 9150 9151 static void 9152 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) { 9153 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 9154 dns_rbtnode_t *node = rbtdbiter->node; 9155 9156 if (node == NULL) { 9157 return; 9158 } 9159 9160 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none); 9161 reactivate_node(rbtdb, node, rbtdbiter->tree_locked); 9162 } 9163 9164 static void 9165 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) { 9166 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 9167 dns_rbtnode_t *node = rbtdbiter->node; 9168 nodelock_t *lock; 9169 9170 if (node == NULL) { 9171 return; 9172 } 9173 9174 lock = &rbtdb->node_locks[node->locknum].lock; 9175 NODE_LOCK(lock, isc_rwlocktype_read); 9176 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read, 9177 rbtdbiter->tree_locked, false); 9178 NODE_UNLOCK(lock, isc_rwlocktype_read); 9179 9180 rbtdbiter->node = NULL; 9181 } 9182 9183 static void 9184 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) { 9185 dns_rbtnode_t *node; 9186 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 9187 bool was_read_locked = false; 9188 nodelock_t *lock; 9189 int i; 9190 9191 if (rbtdbiter->delcnt != 0) { 9192 /* 9193 * Note that "%d node of %d in tree" can report things like 9194 * "flush_deletions: 59 nodes of 41 in tree". This means 9195 * That some nodes appear on the deletions list more than 9196 * once. Only the last occurrence will actually be deleted. 9197 */ 9198 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 9199 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 9200 "flush_deletions: %d nodes of %d in tree", 9201 rbtdbiter->delcnt, 9202 dns_rbt_nodecount(rbtdb->tree)); 9203 9204 if (rbtdbiter->tree_locked == isc_rwlocktype_read) { 9205 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 9206 was_read_locked = true; 9207 } 9208 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 9209 rbtdbiter->tree_locked = isc_rwlocktype_write; 9210 9211 for (i = 0; i < rbtdbiter->delcnt; i++) { 9212 node = rbtdbiter->deletions[i]; 9213 lock = &rbtdb->node_locks[node->locknum].lock; 9214 9215 NODE_LOCK(lock, isc_rwlocktype_read); 9216 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read, 9217 rbtdbiter->tree_locked, false); 9218 NODE_UNLOCK(lock, isc_rwlocktype_read); 9219 } 9220 9221 rbtdbiter->delcnt = 0; 9222 9223 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 9224 if (was_read_locked) { 9225 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 9226 rbtdbiter->tree_locked = isc_rwlocktype_read; 9227 } else { 9228 rbtdbiter->tree_locked = isc_rwlocktype_none; 9229 } 9230 } 9231 } 9232 9233 static void 9234 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) { 9235 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 9236 9237 REQUIRE(rbtdbiter->paused); 9238 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none); 9239 9240 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 9241 rbtdbiter->tree_locked = isc_rwlocktype_read; 9242 9243 rbtdbiter->paused = false; 9244 } 9245 9246 static void 9247 dbiterator_destroy(dns_dbiterator_t **iteratorp) { 9248 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp); 9249 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 9250 dns_db_t *db = NULL; 9251 9252 if (rbtdbiter->tree_locked == isc_rwlocktype_read) { 9253 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 9254 rbtdbiter->tree_locked = isc_rwlocktype_none; 9255 } else { 9256 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none); 9257 } 9258 9259 dereference_iter_node(rbtdbiter); 9260 9261 flush_deletions(rbtdbiter); 9262 9263 dns_db_attach(rbtdbiter->common.db, &db); 9264 dns_db_detach(&rbtdbiter->common.db); 9265 9266 dns_rbtnodechain_reset(&rbtdbiter->chain); 9267 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); 9268 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter)); 9269 dns_db_detach(&db); 9270 9271 *iteratorp = NULL; 9272 } 9273 9274 static isc_result_t 9275 dbiterator_first(dns_dbiterator_t *iterator) { 9276 isc_result_t result; 9277 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9278 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9279 dns_name_t *name, *origin; 9280 9281 if (rbtdbiter->result != ISC_R_SUCCESS && 9282 rbtdbiter->result != ISC_R_NOTFOUND && 9283 rbtdbiter->result != DNS_R_PARTIALMATCH && 9284 rbtdbiter->result != ISC_R_NOMORE) 9285 { 9286 return (rbtdbiter->result); 9287 } 9288 9289 if (rbtdbiter->paused) { 9290 resume_iteration(rbtdbiter); 9291 } 9292 9293 dereference_iter_node(rbtdbiter); 9294 9295 name = dns_fixedname_name(&rbtdbiter->name); 9296 origin = dns_fixedname_name(&rbtdbiter->origin); 9297 dns_rbtnodechain_reset(&rbtdbiter->chain); 9298 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); 9299 9300 switch (rbtdbiter->nsec3mode) { 9301 case nsec3only: 9302 rbtdbiter->current = &rbtdbiter->nsec3chain; 9303 result = dns_rbtnodechain_first(rbtdbiter->current, 9304 rbtdb->nsec3, name, origin); 9305 break; 9306 case nonsec3: 9307 rbtdbiter->current = &rbtdbiter->chain; 9308 result = dns_rbtnodechain_first(rbtdbiter->current, rbtdb->tree, 9309 name, origin); 9310 break; 9311 case full: 9312 rbtdbiter->current = &rbtdbiter->chain; 9313 result = dns_rbtnodechain_first(rbtdbiter->current, rbtdb->tree, 9314 name, origin); 9315 if (result == ISC_R_NOTFOUND) { 9316 rbtdbiter->current = &rbtdbiter->nsec3chain; 9317 result = dns_rbtnodechain_first( 9318 rbtdbiter->current, rbtdb->nsec3, name, origin); 9319 } 9320 break; 9321 default: 9322 UNREACHABLE(); 9323 } 9324 9325 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { 9326 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 9327 NULL, &rbtdbiter->node); 9328 9329 /* If we're in the NSEC3 tree, skip the origin */ 9330 if (RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, rbtdbiter)) { 9331 rbtdbiter->node = NULL; 9332 result = dns_rbtnodechain_next(rbtdbiter->current, name, 9333 origin); 9334 if (result == ISC_R_SUCCESS || 9335 result == DNS_R_NEWORIGIN) 9336 { 9337 result = dns_rbtnodechain_current( 9338 rbtdbiter->current, NULL, NULL, 9339 &rbtdbiter->node); 9340 } 9341 } 9342 if (result == ISC_R_SUCCESS) { 9343 rbtdbiter->new_origin = true; 9344 reference_iter_node(rbtdbiter); 9345 } 9346 } else { 9347 INSIST(result == ISC_R_NOTFOUND); 9348 result = ISC_R_NOMORE; /* The tree is empty. */ 9349 } 9350 9351 rbtdbiter->result = result; 9352 9353 if (result != ISC_R_SUCCESS) { 9354 ENSURE(!rbtdbiter->paused); 9355 } 9356 9357 return (result); 9358 } 9359 9360 static isc_result_t 9361 dbiterator_last(dns_dbiterator_t *iterator) { 9362 isc_result_t result; 9363 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9364 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9365 dns_name_t *name, *origin; 9366 9367 if (rbtdbiter->result != ISC_R_SUCCESS && 9368 rbtdbiter->result != ISC_R_NOTFOUND && 9369 rbtdbiter->result != DNS_R_PARTIALMATCH && 9370 rbtdbiter->result != ISC_R_NOMORE) 9371 { 9372 return (rbtdbiter->result); 9373 } 9374 9375 if (rbtdbiter->paused) { 9376 resume_iteration(rbtdbiter); 9377 } 9378 9379 dereference_iter_node(rbtdbiter); 9380 9381 name = dns_fixedname_name(&rbtdbiter->name); 9382 origin = dns_fixedname_name(&rbtdbiter->origin); 9383 dns_rbtnodechain_reset(&rbtdbiter->chain); 9384 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); 9385 9386 switch (rbtdbiter->nsec3mode) { 9387 case nsec3only: 9388 rbtdbiter->current = &rbtdbiter->nsec3chain; 9389 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->nsec3, 9390 name, origin); 9391 break; 9392 case nonsec3: 9393 rbtdbiter->current = &rbtdbiter->chain; 9394 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree, 9395 name, origin); 9396 break; 9397 case full: 9398 rbtdbiter->current = &rbtdbiter->nsec3chain; 9399 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->nsec3, 9400 name, origin); 9401 if (result == ISC_R_NOTFOUND) { 9402 rbtdbiter->current = &rbtdbiter->chain; 9403 result = dns_rbtnodechain_last( 9404 rbtdbiter->current, rbtdb->tree, name, origin); 9405 } 9406 break; 9407 default: 9408 UNREACHABLE(); 9409 } 9410 9411 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { 9412 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 9413 NULL, &rbtdbiter->node); 9414 if (RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, rbtdbiter)) { 9415 /* 9416 * NSEC3 tree only has an origin node. 9417 */ 9418 rbtdbiter->node = NULL; 9419 switch (rbtdbiter->nsec3mode) { 9420 case nsec3only: 9421 result = ISC_R_NOMORE; 9422 break; 9423 case nonsec3: 9424 case full: 9425 rbtdbiter->current = &rbtdbiter->chain; 9426 result = dns_rbtnodechain_last( 9427 rbtdbiter->current, rbtdb->tree, name, 9428 origin); 9429 if (result == ISC_R_SUCCESS || 9430 result == DNS_R_NEWORIGIN) 9431 { 9432 result = dns_rbtnodechain_current( 9433 rbtdbiter->current, NULL, NULL, 9434 &rbtdbiter->node); 9435 } 9436 break; 9437 default: 9438 UNREACHABLE(); 9439 } 9440 } 9441 if (result == ISC_R_SUCCESS) { 9442 rbtdbiter->new_origin = true; 9443 reference_iter_node(rbtdbiter); 9444 } 9445 } else { 9446 INSIST(result == ISC_R_NOTFOUND); 9447 result = ISC_R_NOMORE; /* The tree is empty. */ 9448 } 9449 9450 rbtdbiter->result = result; 9451 9452 return (result); 9453 } 9454 9455 static isc_result_t 9456 dbiterator_seek(dns_dbiterator_t *iterator, const dns_name_t *name) { 9457 isc_result_t result, tresult; 9458 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9459 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9460 dns_name_t *iname, *origin; 9461 9462 if (rbtdbiter->result != ISC_R_SUCCESS && 9463 rbtdbiter->result != ISC_R_NOTFOUND && 9464 rbtdbiter->result != DNS_R_PARTIALMATCH && 9465 rbtdbiter->result != ISC_R_NOMORE) 9466 { 9467 return (rbtdbiter->result); 9468 } 9469 9470 if (rbtdbiter->paused) { 9471 resume_iteration(rbtdbiter); 9472 } 9473 9474 dereference_iter_node(rbtdbiter); 9475 9476 iname = dns_fixedname_name(&rbtdbiter->name); 9477 origin = dns_fixedname_name(&rbtdbiter->origin); 9478 dns_rbtnodechain_reset(&rbtdbiter->chain); 9479 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); 9480 9481 switch (rbtdbiter->nsec3mode) { 9482 case nsec3only: 9483 rbtdbiter->current = &rbtdbiter->nsec3chain; 9484 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, 9485 &rbtdbiter->node, rbtdbiter->current, 9486 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 9487 break; 9488 case nonsec3: 9489 rbtdbiter->current = &rbtdbiter->chain; 9490 result = dns_rbt_findnode(rbtdb->tree, name, NULL, 9491 &rbtdbiter->node, rbtdbiter->current, 9492 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 9493 break; 9494 case full: 9495 /* 9496 * Stay on main chain if not found on either chain. 9497 */ 9498 rbtdbiter->current = &rbtdbiter->chain; 9499 result = dns_rbt_findnode(rbtdb->tree, name, NULL, 9500 &rbtdbiter->node, rbtdbiter->current, 9501 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 9502 if (result == DNS_R_PARTIALMATCH) { 9503 dns_rbtnode_t *node = NULL; 9504 tresult = dns_rbt_findnode( 9505 rbtdb->nsec3, name, NULL, &node, 9506 &rbtdbiter->nsec3chain, DNS_RBTFIND_EMPTYDATA, 9507 NULL, NULL); 9508 if (tresult == ISC_R_SUCCESS) { 9509 rbtdbiter->node = node; 9510 rbtdbiter->current = &rbtdbiter->nsec3chain; 9511 result = tresult; 9512 } 9513 } 9514 break; 9515 default: 9516 UNREACHABLE(); 9517 } 9518 9519 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) { 9520 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname, 9521 origin, NULL); 9522 if (tresult == ISC_R_SUCCESS) { 9523 rbtdbiter->new_origin = true; 9524 reference_iter_node(rbtdbiter); 9525 } else { 9526 result = tresult; 9527 rbtdbiter->node = NULL; 9528 } 9529 } else { 9530 rbtdbiter->node = NULL; 9531 } 9532 9533 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ? ISC_R_SUCCESS 9534 : result; 9535 9536 return (result); 9537 } 9538 9539 static isc_result_t 9540 dbiterator_prev(dns_dbiterator_t *iterator) { 9541 isc_result_t result; 9542 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9543 dns_name_t *name, *origin; 9544 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9545 9546 REQUIRE(rbtdbiter->node != NULL); 9547 9548 if (rbtdbiter->result != ISC_R_SUCCESS) { 9549 return (rbtdbiter->result); 9550 } 9551 9552 if (rbtdbiter->paused) { 9553 resume_iteration(rbtdbiter); 9554 } 9555 9556 dereference_iter_node(rbtdbiter); 9557 9558 name = dns_fixedname_name(&rbtdbiter->name); 9559 origin = dns_fixedname_name(&rbtdbiter->origin); 9560 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin); 9561 if (rbtdbiter->current == &rbtdbiter->nsec3chain && 9562 (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN)) 9563 { 9564 /* 9565 * If we're in the NSEC3 tree, it's empty or we've 9566 * reached the origin, then we're done with it. 9567 */ 9568 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 9569 NULL, &rbtdbiter->node); 9570 if (result == ISC_R_NOTFOUND || 9571 RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, rbtdbiter)) 9572 { 9573 rbtdbiter->node = NULL; 9574 result = ISC_R_NOMORE; 9575 } 9576 } 9577 if (result == ISC_R_NOMORE && rbtdbiter->nsec3mode != nsec3only && 9578 &rbtdbiter->nsec3chain == rbtdbiter->current) 9579 { 9580 rbtdbiter->current = &rbtdbiter->chain; 9581 dns_rbtnodechain_reset(rbtdbiter->current); 9582 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree, 9583 name, origin); 9584 if (result == ISC_R_NOTFOUND) { 9585 result = ISC_R_NOMORE; 9586 } 9587 } 9588 9589 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) { 9590 rbtdbiter->new_origin = (result == DNS_R_NEWORIGIN); 9591 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 9592 NULL, &rbtdbiter->node); 9593 } 9594 9595 if (result == ISC_R_SUCCESS) { 9596 reference_iter_node(rbtdbiter); 9597 } 9598 9599 rbtdbiter->result = result; 9600 9601 return (result); 9602 } 9603 9604 static isc_result_t 9605 dbiterator_next(dns_dbiterator_t *iterator) { 9606 isc_result_t result; 9607 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9608 dns_name_t *name, *origin; 9609 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9610 9611 REQUIRE(rbtdbiter->node != NULL); 9612 9613 if (rbtdbiter->result != ISC_R_SUCCESS) { 9614 return (rbtdbiter->result); 9615 } 9616 9617 if (rbtdbiter->paused) { 9618 resume_iteration(rbtdbiter); 9619 } 9620 9621 name = dns_fixedname_name(&rbtdbiter->name); 9622 origin = dns_fixedname_name(&rbtdbiter->origin); 9623 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin); 9624 if (result == ISC_R_NOMORE && rbtdbiter->nsec3mode != nonsec3 && 9625 &rbtdbiter->chain == rbtdbiter->current) 9626 { 9627 rbtdbiter->current = &rbtdbiter->nsec3chain; 9628 dns_rbtnodechain_reset(rbtdbiter->current); 9629 result = dns_rbtnodechain_first(rbtdbiter->current, 9630 rbtdb->nsec3, name, origin); 9631 if (result == ISC_R_NOTFOUND) { 9632 result = ISC_R_NOMORE; 9633 } 9634 } 9635 9636 dereference_iter_node(rbtdbiter); 9637 9638 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) { 9639 /* 9640 * If we've just started the NSEC3 tree, 9641 * skip over the origin. 9642 */ 9643 rbtdbiter->new_origin = (result == DNS_R_NEWORIGIN); 9644 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 9645 NULL, &rbtdbiter->node); 9646 if (RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, rbtdbiter)) { 9647 rbtdbiter->node = NULL; 9648 result = dns_rbtnodechain_next(rbtdbiter->current, name, 9649 origin); 9650 if (result == ISC_R_SUCCESS || 9651 result == DNS_R_NEWORIGIN) 9652 { 9653 result = dns_rbtnodechain_current( 9654 rbtdbiter->current, NULL, NULL, 9655 &rbtdbiter->node); 9656 } 9657 } 9658 } 9659 if (result == ISC_R_SUCCESS) { 9660 reference_iter_node(rbtdbiter); 9661 } 9662 9663 rbtdbiter->result = result; 9664 9665 return (result); 9666 } 9667 9668 static isc_result_t 9669 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, 9670 dns_name_t *name) { 9671 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9672 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9673 dns_rbtnode_t *node = rbtdbiter->node; 9674 isc_result_t result; 9675 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name); 9676 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin); 9677 9678 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS); 9679 REQUIRE(rbtdbiter->node != NULL); 9680 9681 if (rbtdbiter->paused) { 9682 resume_iteration(rbtdbiter); 9683 } 9684 9685 if (name != NULL) { 9686 if (rbtdbiter->common.relative_names) { 9687 origin = NULL; 9688 } 9689 result = dns_name_concatenate(nodename, origin, name, NULL); 9690 if (result != ISC_R_SUCCESS) { 9691 return (result); 9692 } 9693 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin) { 9694 result = DNS_R_NEWORIGIN; 9695 } 9696 } else { 9697 result = ISC_R_SUCCESS; 9698 } 9699 9700 new_reference(rbtdb, node, isc_rwlocktype_none); 9701 9702 *nodep = rbtdbiter->node; 9703 9704 if (iterator->cleaning && result == ISC_R_SUCCESS) { 9705 isc_result_t expire_result; 9706 9707 /* 9708 * If the deletion array is full, flush it before trying 9709 * to expire the current node. The current node can't 9710 * fully deleted while the iteration cursor is still on it. 9711 */ 9712 if (rbtdbiter->delcnt == DELETION_BATCH_MAX) { 9713 flush_deletions(rbtdbiter); 9714 } 9715 9716 expire_result = expirenode(iterator->db, *nodep, 0); 9717 9718 /* 9719 * expirenode() currently always returns success. 9720 */ 9721 if (expire_result == ISC_R_SUCCESS && node->down == NULL) { 9722 rbtdbiter->deletions[rbtdbiter->delcnt++] = node; 9723 isc_refcount_increment(&node->references); 9724 } 9725 } 9726 9727 return (result); 9728 } 9729 9730 static isc_result_t 9731 dbiterator_pause(dns_dbiterator_t *iterator) { 9732 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9733 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9734 9735 if (rbtdbiter->result != ISC_R_SUCCESS && 9736 rbtdbiter->result != ISC_R_NOTFOUND && 9737 rbtdbiter->result != DNS_R_PARTIALMATCH && 9738 rbtdbiter->result != ISC_R_NOMORE) 9739 { 9740 return (rbtdbiter->result); 9741 } 9742 9743 if (rbtdbiter->paused) { 9744 return (ISC_R_SUCCESS); 9745 } 9746 9747 rbtdbiter->paused = true; 9748 9749 if (rbtdbiter->tree_locked != isc_rwlocktype_none) { 9750 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read); 9751 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 9752 rbtdbiter->tree_locked = isc_rwlocktype_none; 9753 } 9754 9755 flush_deletions(rbtdbiter); 9756 9757 return (ISC_R_SUCCESS); 9758 } 9759 9760 static isc_result_t 9761 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) { 9762 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9763 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin); 9764 9765 if (rbtdbiter->result != ISC_R_SUCCESS) { 9766 return (rbtdbiter->result); 9767 } 9768 9769 dns_name_copy(origin, name); 9770 return (ISC_R_SUCCESS); 9771 } 9772 9773 static void 9774 setownercase(rdatasetheader_t *header, const dns_name_t *name) { 9775 unsigned int i; 9776 bool fully_lower; 9777 9778 /* 9779 * We do not need to worry about label lengths as they are all 9780 * less than or equal to 63. 9781 */ 9782 memset(header->upper, 0, sizeof(header->upper)); 9783 fully_lower = true; 9784 for (i = 0; i < name->length; i++) { 9785 if (isupper(name->ndata[i])) { 9786 header->upper[i / 8] |= 1 << (i % 8); 9787 fully_lower = false; 9788 } 9789 } 9790 RDATASET_ATTR_SET(header, RDATASET_ATTR_CASESET); 9791 if (fully_lower) { 9792 RDATASET_ATTR_SET(header, RDATASET_ATTR_CASEFULLYLOWER); 9793 } 9794 } 9795 9796 static void 9797 rdataset_setownercase(dns_rdataset_t *rdataset, const dns_name_t *name) { 9798 dns_rbtdb_t *rbtdb = rdataset->private1; 9799 dns_rbtnode_t *rbtnode = rdataset->private2; 9800 unsigned char *raw = rdataset->private3; /* RDATASLAB */ 9801 rdatasetheader_t *header; 9802 9803 header = (struct rdatasetheader *)(raw - sizeof(*header)); 9804 9805 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9806 isc_rwlocktype_write); 9807 setownercase(header, name); 9808 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9809 isc_rwlocktype_write); 9810 } 9811 9812 static void 9813 rdataset_getownercase(const dns_rdataset_t *rdataset, dns_name_t *name) { 9814 dns_rbtdb_t *rbtdb = rdataset->private1; 9815 dns_rbtnode_t *rbtnode = rdataset->private2; 9816 unsigned char *raw = rdataset->private3; /* RDATASLAB */ 9817 rdatasetheader_t *header = NULL; 9818 uint8_t mask = (1 << 7); 9819 uint8_t bits = 0; 9820 9821 header = (struct rdatasetheader *)(raw - sizeof(*header)); 9822 9823 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9824 isc_rwlocktype_read); 9825 9826 if (!CASESET(header)) { 9827 goto unlock; 9828 } 9829 9830 if (CASEFULLYLOWER(header)) { 9831 for (size_t i = 0; i < name->length; i++) { 9832 name->ndata[i] = tolower(name->ndata[i]); 9833 } 9834 } else { 9835 for (size_t i = 0; i < name->length; i++) { 9836 if (mask == (1 << 7)) { 9837 bits = header->upper[i / 8]; 9838 mask = 1; 9839 } else { 9840 mask <<= 1; 9841 } 9842 9843 name->ndata[i] = ((bits & mask) != 0) 9844 ? toupper(name->ndata[i]) 9845 : tolower(name->ndata[i]); 9846 } 9847 } 9848 9849 unlock: 9850 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9851 isc_rwlocktype_read); 9852 } 9853 9854 struct rbtdb_glue { 9855 struct rbtdb_glue *next; 9856 dns_fixedname_t fixedname; 9857 dns_rdataset_t rdataset_a; 9858 dns_rdataset_t sigrdataset_a; 9859 dns_rdataset_t rdataset_aaaa; 9860 dns_rdataset_t sigrdataset_aaaa; 9861 }; 9862 9863 typedef struct { 9864 rbtdb_glue_t *glue_list; 9865 dns_rbtdb_t *rbtdb; 9866 rbtdb_version_t *rbtversion; 9867 } rbtdb_glue_additionaldata_ctx_t; 9868 9869 static void 9870 free_gluelist(rbtdb_glue_t *glue_list, dns_rbtdb_t *rbtdb) { 9871 rbtdb_glue_t *cur, *cur_next; 9872 9873 if (glue_list == (void *)-1) { 9874 return; 9875 } 9876 9877 cur = glue_list; 9878 while (cur != NULL) { 9879 cur_next = cur->next; 9880 9881 if (dns_rdataset_isassociated(&cur->rdataset_a)) { 9882 dns_rdataset_disassociate(&cur->rdataset_a); 9883 } 9884 if (dns_rdataset_isassociated(&cur->sigrdataset_a)) { 9885 dns_rdataset_disassociate(&cur->sigrdataset_a); 9886 } 9887 9888 if (dns_rdataset_isassociated(&cur->rdataset_aaaa)) { 9889 dns_rdataset_disassociate(&cur->rdataset_aaaa); 9890 } 9891 if (dns_rdataset_isassociated(&cur->sigrdataset_aaaa)) { 9892 dns_rdataset_disassociate(&cur->sigrdataset_aaaa); 9893 } 9894 9895 dns_rdataset_invalidate(&cur->rdataset_a); 9896 dns_rdataset_invalidate(&cur->sigrdataset_a); 9897 dns_rdataset_invalidate(&cur->rdataset_aaaa); 9898 dns_rdataset_invalidate(&cur->sigrdataset_aaaa); 9899 9900 isc_mem_put(rbtdb->common.mctx, cur, sizeof(*cur)); 9901 cur = cur_next; 9902 } 9903 } 9904 9905 static void 9906 free_gluetable(rbtdb_version_t *version) { 9907 dns_rbtdb_t *rbtdb; 9908 size_t size, i; 9909 9910 RWLOCK(&version->glue_rwlock, isc_rwlocktype_write); 9911 9912 rbtdb = version->rbtdb; 9913 9914 for (i = 0; i < HASHSIZE(version->glue_table_bits); i++) { 9915 rbtdb_glue_table_node_t *cur, *cur_next; 9916 9917 cur = version->glue_table[i]; 9918 while (cur != NULL) { 9919 cur_next = cur->next; 9920 /* isc_refcount_decrement(&cur->node->references); */ 9921 cur->node = NULL; 9922 free_gluelist(cur->glue_list, rbtdb); 9923 cur->glue_list = NULL; 9924 isc_mem_put(rbtdb->common.mctx, cur, sizeof(*cur)); 9925 cur = cur_next; 9926 } 9927 version->glue_table[i] = NULL; 9928 } 9929 9930 size = HASHSIZE(version->glue_table_bits) * 9931 sizeof(*version->glue_table); 9932 isc_mem_put(rbtdb->common.mctx, version->glue_table, size); 9933 9934 RWUNLOCK(&version->glue_rwlock, isc_rwlocktype_write); 9935 } 9936 9937 static uint32_t 9938 rehash_bits(rbtdb_version_t *version, size_t newcount) { 9939 uint32_t oldbits = version->glue_table_bits; 9940 uint32_t newbits = oldbits; 9941 9942 while (newcount >= HASHSIZE(newbits) && 9943 newbits < RBTDB_GLUE_TABLE_MAX_BITS) 9944 { 9945 newbits += 1; 9946 } 9947 9948 return (newbits); 9949 } 9950 9951 /*% 9952 * Write lock (version->glue_rwlock) must be held. 9953 */ 9954 static void 9955 rehash_gluetable(rbtdb_version_t *version) { 9956 uint32_t oldbits, newbits; 9957 size_t newsize, oldcount, i; 9958 rbtdb_glue_table_node_t **oldtable; 9959 9960 oldbits = version->glue_table_bits; 9961 oldcount = HASHSIZE(oldbits); 9962 oldtable = version->glue_table; 9963 9964 newbits = rehash_bits(version, version->glue_table_nodecount); 9965 newsize = HASHSIZE(newbits) * sizeof(version->glue_table[0]); 9966 9967 version->glue_table = isc_mem_get(version->rbtdb->common.mctx, newsize); 9968 version->glue_table_bits = newbits; 9969 memset(version->glue_table, 0, newsize); 9970 9971 for (i = 0; i < oldcount; i++) { 9972 rbtdb_glue_table_node_t *gluenode; 9973 rbtdb_glue_table_node_t *nextgluenode; 9974 for (gluenode = oldtable[i]; gluenode != NULL; 9975 gluenode = nextgluenode) 9976 { 9977 uint32_t hash = isc_hash32( 9978 &gluenode->node, sizeof(gluenode->node), true); 9979 uint32_t idx = hash_32(hash, newbits); 9980 nextgluenode = gluenode->next; 9981 gluenode->next = version->glue_table[idx]; 9982 version->glue_table[idx] = gluenode; 9983 } 9984 } 9985 9986 isc_mem_put(version->rbtdb->common.mctx, oldtable, 9987 oldcount * sizeof(*version->glue_table)); 9988 9989 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_ZONE, 9990 ISC_LOG_DEBUG(3), 9991 "rehash_gluetable(): " 9992 "resized glue table from %zu to " 9993 "%zu", 9994 oldcount, newsize / sizeof(version->glue_table[0])); 9995 } 9996 9997 static void 9998 maybe_rehash_gluetable(rbtdb_version_t *version) { 9999 size_t overcommit = HASHSIZE(version->glue_table_bits) * 10000 RBTDB_GLUE_TABLE_OVERCOMMIT; 10001 if (version->glue_table_nodecount < overcommit) { 10002 return; 10003 } 10004 10005 rehash_gluetable(version); 10006 } 10007 10008 static isc_result_t 10009 glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype, 10010 dns_rdataset_t *unused) { 10011 rbtdb_glue_additionaldata_ctx_t *ctx; 10012 isc_result_t result; 10013 dns_fixedname_t fixedname_a; 10014 dns_name_t *name_a = NULL; 10015 dns_rdataset_t rdataset_a, sigrdataset_a; 10016 dns_rbtnode_t *node_a = NULL; 10017 dns_fixedname_t fixedname_aaaa; 10018 dns_name_t *name_aaaa = NULL; 10019 dns_rdataset_t rdataset_aaaa, sigrdataset_aaaa; 10020 dns_rbtnode_t *node_aaaa = NULL; 10021 rbtdb_glue_t *glue = NULL; 10022 dns_name_t *gluename = NULL; 10023 10024 UNUSED(unused); 10025 10026 /* 10027 * NS records want addresses in additional records. 10028 */ 10029 INSIST(qtype == dns_rdatatype_a); 10030 10031 ctx = (rbtdb_glue_additionaldata_ctx_t *)arg; 10032 10033 name_a = dns_fixedname_initname(&fixedname_a); 10034 dns_rdataset_init(&rdataset_a); 10035 dns_rdataset_init(&sigrdataset_a); 10036 10037 name_aaaa = dns_fixedname_initname(&fixedname_aaaa); 10038 dns_rdataset_init(&rdataset_aaaa); 10039 dns_rdataset_init(&sigrdataset_aaaa); 10040 10041 result = zone_find((dns_db_t *)ctx->rbtdb, name, ctx->rbtversion, 10042 dns_rdatatype_a, DNS_DBFIND_GLUEOK, 0, 10043 (dns_dbnode_t **)&node_a, name_a, &rdataset_a, 10044 &sigrdataset_a); 10045 if (result == DNS_R_GLUE) { 10046 glue = isc_mem_get(ctx->rbtdb->common.mctx, sizeof(*glue)); 10047 10048 gluename = dns_fixedname_initname(&glue->fixedname); 10049 dns_name_copy(name_a, gluename); 10050 10051 dns_rdataset_init(&glue->rdataset_a); 10052 dns_rdataset_init(&glue->sigrdataset_a); 10053 dns_rdataset_init(&glue->rdataset_aaaa); 10054 dns_rdataset_init(&glue->sigrdataset_aaaa); 10055 10056 dns_rdataset_clone(&rdataset_a, &glue->rdataset_a); 10057 if (dns_rdataset_isassociated(&sigrdataset_a)) { 10058 dns_rdataset_clone(&sigrdataset_a, 10059 &glue->sigrdataset_a); 10060 } 10061 } 10062 10063 result = zone_find((dns_db_t *)ctx->rbtdb, name, ctx->rbtversion, 10064 dns_rdatatype_aaaa, DNS_DBFIND_GLUEOK, 0, 10065 (dns_dbnode_t **)&node_aaaa, name_aaaa, 10066 &rdataset_aaaa, &sigrdataset_aaaa); 10067 if (result == DNS_R_GLUE) { 10068 if (glue == NULL) { 10069 glue = isc_mem_get(ctx->rbtdb->common.mctx, 10070 sizeof(*glue)); 10071 10072 gluename = dns_fixedname_initname(&glue->fixedname); 10073 dns_name_copy(name_aaaa, gluename); 10074 10075 dns_rdataset_init(&glue->rdataset_a); 10076 dns_rdataset_init(&glue->sigrdataset_a); 10077 dns_rdataset_init(&glue->rdataset_aaaa); 10078 dns_rdataset_init(&glue->sigrdataset_aaaa); 10079 } else { 10080 INSIST(node_a == node_aaaa); 10081 INSIST(dns_name_equal(name_a, name_aaaa)); 10082 } 10083 10084 dns_rdataset_clone(&rdataset_aaaa, &glue->rdataset_aaaa); 10085 if (dns_rdataset_isassociated(&sigrdataset_aaaa)) { 10086 dns_rdataset_clone(&sigrdataset_aaaa, 10087 &glue->sigrdataset_aaaa); 10088 } 10089 } 10090 10091 if (glue != NULL) { 10092 glue->next = ctx->glue_list; 10093 ctx->glue_list = glue; 10094 } 10095 10096 result = ISC_R_SUCCESS; 10097 10098 if (dns_rdataset_isassociated(&rdataset_a)) { 10099 rdataset_disassociate(&rdataset_a); 10100 } 10101 if (dns_rdataset_isassociated(&sigrdataset_a)) { 10102 rdataset_disassociate(&sigrdataset_a); 10103 } 10104 10105 if (dns_rdataset_isassociated(&rdataset_aaaa)) { 10106 rdataset_disassociate(&rdataset_aaaa); 10107 } 10108 if (dns_rdataset_isassociated(&sigrdataset_aaaa)) { 10109 rdataset_disassociate(&sigrdataset_aaaa); 10110 } 10111 10112 if (node_a != NULL) { 10113 detachnode((dns_db_t *)ctx->rbtdb, (dns_dbnode_t *)&node_a); 10114 } 10115 if (node_aaaa != NULL) { 10116 detachnode((dns_db_t *)ctx->rbtdb, (dns_dbnode_t *)&node_aaaa); 10117 } 10118 10119 return (result); 10120 } 10121 10122 static isc_result_t 10123 rdataset_addglue(dns_rdataset_t *rdataset, dns_dbversion_t *version, 10124 dns_message_t *msg) { 10125 dns_rbtdb_t *rbtdb = rdataset->private1; 10126 dns_rbtnode_t *node = rdataset->private2; 10127 rbtdb_version_t *rbtversion = version; 10128 uint32_t idx; 10129 rbtdb_glue_table_node_t *cur; 10130 bool found = false; 10131 bool restarted = false; 10132 rbtdb_glue_t *ge; 10133 rbtdb_glue_additionaldata_ctx_t ctx; 10134 isc_result_t result; 10135 uint64_t hash; 10136 10137 REQUIRE(rdataset->type == dns_rdatatype_ns); 10138 REQUIRE(rbtdb == rbtversion->rbtdb); 10139 REQUIRE(!IS_CACHE(rbtdb) && !IS_STUB(rbtdb)); 10140 10141 /* 10142 * The glue table cache that forms a part of the DB version 10143 * structure is not explicitly bounded and there's no cache 10144 * cleaning. The zone data size itself is an implicit bound. 10145 * 10146 * The key into the glue hashtable is the node pointer. This is 10147 * because the glue hashtable is a property of the DB version, 10148 * and the glue is keyed for the ownername/NS tuple. We don't 10149 * bother with using an expensive dns_name_t comparison here as 10150 * the node pointer is a fixed value that won't change for a DB 10151 * version and can be compared directly. 10152 */ 10153 hash = isc_hash_function(&node, sizeof(node), true); 10154 10155 restart: 10156 /* 10157 * First, check if we have the additional entries already cached 10158 * in the glue table. 10159 */ 10160 RWLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_read); 10161 10162 idx = hash_32(hash, rbtversion->glue_table_bits); 10163 10164 for (cur = rbtversion->glue_table[idx]; cur != NULL; cur = cur->next) { 10165 if (cur->node == node) { 10166 break; 10167 } 10168 } 10169 10170 if (cur == NULL) { 10171 goto no_glue; 10172 } 10173 /* 10174 * We found a cached result. Add it to the message and 10175 * return. 10176 */ 10177 found = true; 10178 ge = cur->glue_list; 10179 10180 /* 10181 * (void *) -1 is a special value that means no glue is 10182 * present in the zone. 10183 */ 10184 if (ge == (void *)-1) { 10185 if (!restarted && (rbtdb->gluecachestats != NULL)) { 10186 isc_stats_increment( 10187 rbtdb->gluecachestats, 10188 dns_gluecachestatscounter_hits_absent); 10189 } 10190 goto no_glue; 10191 } else { 10192 if (!restarted && (rbtdb->gluecachestats != NULL)) { 10193 isc_stats_increment( 10194 rbtdb->gluecachestats, 10195 dns_gluecachestatscounter_hits_present); 10196 } 10197 } 10198 10199 for (; ge != NULL; ge = ge->next) { 10200 dns_name_t *name = NULL; 10201 dns_rdataset_t *rdataset_a = NULL; 10202 dns_rdataset_t *sigrdataset_a = NULL; 10203 dns_rdataset_t *rdataset_aaaa = NULL; 10204 dns_rdataset_t *sigrdataset_aaaa = NULL; 10205 dns_name_t *gluename = dns_fixedname_name(&ge->fixedname); 10206 10207 result = dns_message_gettempname(msg, &name); 10208 if (result != ISC_R_SUCCESS) { 10209 goto no_glue; 10210 } 10211 10212 dns_name_copy(gluename, name); 10213 10214 if (dns_rdataset_isassociated(&ge->rdataset_a)) { 10215 result = dns_message_gettemprdataset(msg, &rdataset_a); 10216 if (result != ISC_R_SUCCESS) { 10217 dns_message_puttempname(msg, &name); 10218 goto no_glue; 10219 } 10220 } 10221 10222 if (dns_rdataset_isassociated(&ge->sigrdataset_a)) { 10223 result = dns_message_gettemprdataset(msg, 10224 &sigrdataset_a); 10225 if (result != ISC_R_SUCCESS) { 10226 if (rdataset_a != NULL) { 10227 dns_message_puttemprdataset( 10228 msg, &rdataset_a); 10229 } 10230 dns_message_puttempname(msg, &name); 10231 goto no_glue; 10232 } 10233 } 10234 10235 if (dns_rdataset_isassociated(&ge->rdataset_aaaa)) { 10236 result = dns_message_gettemprdataset(msg, 10237 &rdataset_aaaa); 10238 if (result != ISC_R_SUCCESS) { 10239 dns_message_puttempname(msg, &name); 10240 if (rdataset_a != NULL) { 10241 dns_message_puttemprdataset( 10242 msg, &rdataset_a); 10243 } 10244 if (sigrdataset_a != NULL) { 10245 dns_message_puttemprdataset( 10246 msg, &sigrdataset_a); 10247 } 10248 goto no_glue; 10249 } 10250 } 10251 10252 if (dns_rdataset_isassociated(&ge->sigrdataset_aaaa)) { 10253 result = dns_message_gettemprdataset(msg, 10254 &sigrdataset_aaaa); 10255 if (result != ISC_R_SUCCESS) { 10256 dns_message_puttempname(msg, &name); 10257 if (rdataset_a != NULL) { 10258 dns_message_puttemprdataset( 10259 msg, &rdataset_a); 10260 } 10261 if (sigrdataset_a != NULL) { 10262 dns_message_puttemprdataset( 10263 msg, &sigrdataset_a); 10264 } 10265 if (rdataset_aaaa != NULL) { 10266 dns_message_puttemprdataset( 10267 msg, &rdataset_aaaa); 10268 } 10269 goto no_glue; 10270 } 10271 } 10272 10273 if (rdataset_a != NULL) { 10274 dns_rdataset_clone(&ge->rdataset_a, rdataset_a); 10275 ISC_LIST_APPEND(name->list, rdataset_a, link); 10276 } 10277 10278 if (sigrdataset_a != NULL) { 10279 dns_rdataset_clone(&ge->sigrdataset_a, sigrdataset_a); 10280 ISC_LIST_APPEND(name->list, sigrdataset_a, link); 10281 } 10282 10283 if (rdataset_aaaa != NULL) { 10284 dns_rdataset_clone(&ge->rdataset_aaaa, rdataset_aaaa); 10285 ISC_LIST_APPEND(name->list, rdataset_aaaa, link); 10286 } 10287 if (sigrdataset_aaaa != NULL) { 10288 dns_rdataset_clone(&ge->sigrdataset_aaaa, 10289 sigrdataset_aaaa); 10290 ISC_LIST_APPEND(name->list, sigrdataset_aaaa, link); 10291 } 10292 10293 dns_message_addname(msg, name, DNS_SECTION_ADDITIONAL); 10294 } 10295 10296 no_glue: 10297 RWUNLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_read); 10298 10299 if (found) { 10300 return (ISC_R_SUCCESS); 10301 } 10302 10303 if (restarted) { 10304 return (ISC_R_FAILURE); 10305 } 10306 10307 /* 10308 * No cached glue was found in the table. Cache it and restart 10309 * this function. 10310 * 10311 * Due to the gap between the read lock and the write lock, it's 10312 * possible that we may cache a duplicate glue table entry, but 10313 * we don't care. 10314 */ 10315 10316 ctx.glue_list = NULL; 10317 ctx.rbtdb = rbtdb; 10318 ctx.rbtversion = rbtversion; 10319 10320 RWLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_write); 10321 10322 maybe_rehash_gluetable(rbtversion); 10323 idx = hash_32(hash, rbtversion->glue_table_bits); 10324 10325 (void)dns_rdataset_additionaldata(rdataset, dns_rootname, 10326 glue_nsdname_cb, &ctx); 10327 10328 cur = isc_mem_get(rbtdb->common.mctx, sizeof(*cur)); 10329 10330 /* 10331 * XXXMUKS: it looks like the dns_dbversion is not destroyed 10332 * when named is terminated by a keyboard break. This doesn't 10333 * cleanup the node reference and keeps the process dangling. 10334 */ 10335 /* isc_refcount_increment0(&node->references); */ 10336 cur->node = node; 10337 10338 if (ctx.glue_list == NULL) { 10339 /* 10340 * No glue was found. Cache it so. 10341 */ 10342 cur->glue_list = (void *)-1; 10343 if (rbtdb->gluecachestats != NULL) { 10344 isc_stats_increment( 10345 rbtdb->gluecachestats, 10346 dns_gluecachestatscounter_inserts_absent); 10347 } 10348 } else { 10349 cur->glue_list = ctx.glue_list; 10350 if (rbtdb->gluecachestats != NULL) { 10351 isc_stats_increment( 10352 rbtdb->gluecachestats, 10353 dns_gluecachestatscounter_inserts_present); 10354 } 10355 } 10356 10357 cur->next = rbtversion->glue_table[idx]; 10358 rbtversion->glue_table[idx] = cur; 10359 rbtversion->glue_table_nodecount++; 10360 10361 RWUNLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_write); 10362 10363 restarted = true; 10364 goto restart; 10365 10366 /* UNREACHABLE */ 10367 } 10368 10369 /*% 10370 * Routines for LRU-based cache management. 10371 */ 10372 10373 /*% 10374 * See if a given cache entry that is being reused needs to be updated 10375 * in the LRU-list. From the LRU management point of view, this function is 10376 * expected to return true for almost all cases. When used with threads, 10377 * however, this may cause a non-negligible performance penalty because a 10378 * writer lock will have to be acquired before updating the list. 10379 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this 10380 * function returns true if the entry has not been updated for some period of 10381 * time. We differentiate the NS or glue address case and the others since 10382 * experiments have shown that the former tends to be accessed relatively 10383 * infrequently and the cost of cache miss is higher (e.g., a missing NS records 10384 * may cause external queries at a higher level zone, involving more 10385 * transactions). 10386 * 10387 * Caller must hold the node (read or write) lock. 10388 */ 10389 static bool 10390 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) { 10391 if (RDATASET_ATTR_GET(header, (RDATASET_ATTR_NONEXISTENT | 10392 RDATASET_ATTR_ANCIENT | 10393 RDATASET_ATTR_ZEROTTL)) != 0) 10394 { 10395 return (false); 10396 } 10397 10398 #if DNS_RBTDB_LIMITLRUUPDATE 10399 if (header->type == dns_rdatatype_ns || 10400 (header->trust == dns_trust_glue && 10401 (header->type == dns_rdatatype_a || 10402 header->type == dns_rdatatype_aaaa))) 10403 { 10404 /* 10405 * Glue records are updated if at least DNS_RBTDB_LRUUPDATE_GLUE 10406 * seconds have passed since the previous update time. 10407 */ 10408 return (header->last_used + DNS_RBTDB_LRUUPDATE_GLUE <= now); 10409 } 10410 10411 /* 10412 * Other records are updated if DNS_RBTDB_LRUUPDATE_REGULAR seconds 10413 * have passed. 10414 */ 10415 return (header->last_used + DNS_RBTDB_LRUUPDATE_REGULAR <= now); 10416 #else 10417 UNUSED(now); 10418 10419 return (true); 10420 #endif /* if DNS_RBTDB_LIMITLRUUPDATE */ 10421 } 10422 10423 /*% 10424 * Update the timestamp of a given cache entry and move it to the head 10425 * of the corresponding LRU list. 10426 * 10427 * Caller must hold the node (write) lock. 10428 * 10429 * Note that the we do NOT touch the heap here, as the TTL has not changed. 10430 */ 10431 static void 10432 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now) { 10433 INSIST(IS_CACHE(rbtdb)); 10434 10435 /* To be checked: can we really assume this? XXXMLG */ 10436 INSIST(ISC_LINK_LINKED(header, link)); 10437 10438 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link); 10439 header->last_used = now; 10440 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link); 10441 } 10442 10443 static size_t 10444 expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, size_t purgesize, 10445 bool tree_locked) { 10446 rdatasetheader_t *header; 10447 size_t purged = 0; 10448 10449 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]); 10450 header != NULL && 10451 header->last_used <= atomic_load(&rbtdb->last_used) && 10452 purged <= purgesize; 10453 header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum])) 10454 { 10455 /* 10456 * Unlink the entry at this point to avoid checking it 10457 * again even if it's currently used someone else and 10458 * cannot be purged at this moment. This entry won't be 10459 * referenced any more (so unlinking is safe) since the 10460 * TTL will be reset to 0. 10461 */ 10462 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header, link); 10463 size_t header_size = rdataset_size(header); 10464 expire_header(rbtdb, header, tree_locked, expire_lru); 10465 purged += header_size; 10466 } 10467 10468 return (purged); 10469 } 10470 10471 /*% 10472 * Purge some stale (i.e. unused for some period - LRU based cleaning) cache 10473 * entries under the overmem condition. To recover from this condition quickly, 10474 * we cleanup entries up to the size of newly added rdata (passed as purgesize). 10475 * 10476 * The LRU lists tails are processed in LRU order to the nearest second. 10477 * 10478 * A write lock on the tree must be held. 10479 */ 10480 static void 10481 overmem_purge(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader, 10482 bool tree_locked) { 10483 uint32_t locknum_start = atomic_fetch_add(&rbtdb->lru_sweep, 1) % 10484 rbtdb->node_lock_count; 10485 uint32_t locknum = locknum_start; 10486 /* Size of added data, possible node and possible ENT node. */ 10487 size_t purgesize = rdataset_size(newheader) + 10488 2 * dns__rbtnode_getsize(newheader->node); 10489 size_t purged = 0; 10490 isc_stdtime_t min_last_used = 0; 10491 size_t max_passes = 8; 10492 10493 again: 10494 do { 10495 NODE_LOCK(&rbtdb->node_locks[locknum].lock, 10496 isc_rwlocktype_write); 10497 10498 purged += expire_lru_headers(rbtdb, locknum, purgesize - purged, 10499 tree_locked); 10500 10501 /* 10502 * Work out the oldest remaining last_used values of the list 10503 * tails as we walk across the array of lru lists. 10504 */ 10505 rdatasetheader_t *header = 10506 ISC_LIST_TAIL(rbtdb->rdatasets[locknum]); 10507 if (header != NULL && 10508 (min_last_used == 0 || header->last_used < min_last_used)) 10509 { 10510 min_last_used = header->last_used; 10511 } 10512 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, 10513 isc_rwlocktype_write); 10514 locknum = (locknum + 1) % rbtdb->node_lock_count; 10515 } while (locknum != locknum_start && purged <= purgesize); 10516 10517 /* 10518 * Update rbtdb->last_used if we have walked all the list tails and have 10519 * not freed the required amount of memory. 10520 */ 10521 if (purged < purgesize) { 10522 if (min_last_used != 0) { 10523 atomic_store(&rbtdb->last_used, min_last_used); 10524 if (max_passes-- > 0) { 10525 goto again; 10526 } 10527 } 10528 } 10529 } 10530 10531 static void 10532 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked, 10533 expire_t reason) { 10534 set_ttl(rbtdb, header, 0); 10535 mark_header_ancient(rbtdb, header); 10536 10537 /* 10538 * Caller must hold the node (write) lock. 10539 */ 10540 10541 if (isc_refcount_current(&header->node->references) == 0) { 10542 /* 10543 * If no one else is using the node, we can clean it up now. 10544 * We first need to gain a new reference to the node to meet a 10545 * requirement of decrement_reference(). 10546 */ 10547 new_reference(rbtdb, header->node, isc_rwlocktype_write); 10548 decrement_reference(rbtdb, header->node, 0, 10549 isc_rwlocktype_write, 10550 tree_locked ? isc_rwlocktype_write 10551 : isc_rwlocktype_none, 10552 false); 10553 10554 if (rbtdb->cachestats == NULL) { 10555 return; 10556 } 10557 10558 switch (reason) { 10559 case expire_ttl: 10560 isc_stats_increment(rbtdb->cachestats, 10561 dns_cachestatscounter_deletettl); 10562 break; 10563 case expire_lru: 10564 isc_stats_increment(rbtdb->cachestats, 10565 dns_cachestatscounter_deletelru); 10566 break; 10567 default: 10568 break; 10569 } 10570 } 10571 } 10572 10573 /* 10574 * Caller must be holding the node write lock. 10575 */ 10576 static void 10577 expire_ttl_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, bool tree_locked, 10578 isc_stdtime_t now) { 10579 isc_heap_t *heap = rbtdb->heaps[locknum]; 10580 10581 for (size_t i = 0; i < DNS_RBTDB_EXPIRE_TTL_COUNT; i++) { 10582 rdatasetheader_t *header = isc_heap_element(heap, 1); 10583 10584 if (header == NULL) { 10585 /* No headers left on this TTL heap; exit cleaning */ 10586 return; 10587 } 10588 10589 dns_ttl_t ttl = header->rdh_ttl; 10590 10591 if (!isc_mem_isovermem(rbtdb->common.mctx)) { 10592 /* Only account for stale TTL if cache is not overmem */ 10593 ttl += STALE_TTL(header, rbtdb); 10594 } 10595 10596 if (ttl >= now - RBTDB_VIRTUAL) { 10597 /* 10598 * The header at the top of this TTL heap is not yet 10599 * eligible for expiry, so none of the other headers on 10600 * the same heap can be eligible for expiry, either; 10601 * exit cleaning. 10602 */ 10603 return; 10604 } 10605 10606 expire_header(rbtdb, header, tree_locked, expire_ttl); 10607 } 10608 } 10609