1 /* $NetBSD: rbtdb.c,v 1.17 2023/06/26 22:03:00 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16 /*! \file */ 17 18 #include <ctype.h> 19 #include <inttypes.h> 20 #include <stdbool.h> 21 22 #include <isc/atomic.h> 23 #include <isc/crc64.h> 24 #include <isc/event.h> 25 #include <isc/file.h> 26 #include <isc/hash.h> 27 #include <isc/heap.h> 28 #include <isc/hex.h> 29 #include <isc/mem.h> 30 #include <isc/mutex.h> 31 #include <isc/once.h> 32 #include <isc/platform.h> 33 #include <isc/print.h> 34 #include <isc/random.h> 35 #include <isc/refcount.h> 36 #include <isc/rwlock.h> 37 #include <isc/serial.h> 38 #include <isc/socket.h> 39 #include <isc/stdio.h> 40 #include <isc/string.h> 41 #include <isc/task.h> 42 #include <isc/time.h> 43 #include <isc/util.h> 44 45 #include <dns/callbacks.h> 46 #include <dns/db.h> 47 #include <dns/dbiterator.h> 48 #include <dns/events.h> 49 #include <dns/fixedname.h> 50 #include <dns/lib.h> 51 #include <dns/log.h> 52 #include <dns/masterdump.h> 53 #include <dns/nsec.h> 54 #include <dns/nsec3.h> 55 #include <dns/rbt.h> 56 #include <dns/rdata.h> 57 #include <dns/rdataset.h> 58 #include <dns/rdatasetiter.h> 59 #include <dns/rdataslab.h> 60 #include <dns/rdatastruct.h> 61 #include <dns/result.h> 62 #include <dns/stats.h> 63 #include <dns/time.h> 64 #include <dns/version.h> 65 #include <dns/view.h> 66 #include <dns/zone.h> 67 #include <dns/zonekey.h> 68 69 #ifndef WIN32 70 #include <sys/mman.h> 71 #else /* ifndef WIN32 */ 72 #define PROT_READ 0x01 73 #define PROT_WRITE 0x02 74 #define MAP_PRIVATE 0x0002 75 #define MAP_FAILED ((void *)-1) 76 #endif /* ifndef WIN32 */ 77 78 #include "rbtdb.h" 79 80 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4') 81 82 #define CHECK(op) \ 83 do { \ 84 result = (op); \ 85 if (result != ISC_R_SUCCESS) \ 86 goto failure; \ 87 } while (0) 88 89 /* 90 * This is the map file header for RBTDB images. It is populated, and then 91 * written, as the LAST thing done to the file. Writing this last (with 92 * zeros in the header area initially) will ensure that the header is only 93 * valid when the RBTDB image is also valid. 94 */ 95 typedef struct rbtdb_file_header rbtdb_file_header_t; 96 97 /* Header length, always the same size regardless of structure size */ 98 #define RBTDB_HEADER_LENGTH 1024 99 100 struct rbtdb_file_header { 101 char version1[32]; 102 uint32_t ptrsize; 103 unsigned int bigendian : 1; 104 uint64_t tree; 105 uint64_t nsec; 106 uint64_t nsec3; 107 108 char version2[32]; /* repeated; must match version1 */ 109 }; 110 111 /*% 112 * Note that "impmagic" is not the first four bytes of the struct, so 113 * ISC_MAGIC_VALID cannot be used. 114 */ 115 #define VALID_RBTDB(rbtdb) \ 116 ((rbtdb) != NULL && (rbtdb)->common.impmagic == RBTDB_MAGIC) 117 118 typedef uint32_t rbtdb_serial_t; 119 typedef uint32_t rbtdb_rdatatype_t; 120 121 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type)&0xFFFF)) 122 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16)) 123 #define RBTDB_RDATATYPE_VALUE(base, ext) \ 124 ((rbtdb_rdatatype_t)(((uint32_t)ext) << 16) | \ 125 (((uint32_t)base) & 0xffff)) 126 127 #define RBTDB_RDATATYPE_SIGNSEC \ 128 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec) 129 #define RBTDB_RDATATYPE_SIGNSEC3 \ 130 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3) 131 #define RBTDB_RDATATYPE_SIGNS \ 132 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns) 133 #define RBTDB_RDATATYPE_SIGCNAME \ 134 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname) 135 #define RBTDB_RDATATYPE_SIGDNAME \ 136 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname) 137 #define RBTDB_RDATATYPE_SIGDS \ 138 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds) 139 #define RBTDB_RDATATYPE_SIGSOA \ 140 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_soa) 141 #define RBTDB_RDATATYPE_NCACHEANY RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any) 142 143 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0) 144 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l) 145 #define RBTDB_LOCK(l, t) RWLOCK((l), (t)) 146 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t)) 147 148 /* 149 * Since node locking is sensitive to both performance and memory footprint, 150 * we need some trick here. If we have both high-performance rwlock and 151 * high performance and small-memory reference counters, we use rwlock for 152 * node lock and isc_refcount for node references. In this case, we don't have 153 * to protect the access to the counters by locks. 154 * Otherwise, we simply use ordinary mutex lock for node locking, and use 155 * simple integers as reference counters which is protected by the lock. 156 * In most cases, we can simply use wrapper macros such as NODE_LOCK and 157 * NODE_UNLOCK. In some other cases, however, we need to protect reference 158 * counters first and then protect other parts of a node as read-only data. 159 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also 160 * provided for these special cases. When we can use the efficient backend 161 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read). 162 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical 163 * section including the access to the reference counter. 164 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected 165 * section is also protected by NODE_STRONGLOCK(). 166 */ 167 typedef isc_rwlock_t nodelock_t; 168 169 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0) 170 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l) 171 #define NODE_LOCK(l, t) RWLOCK((l), (t)) 172 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t)) 173 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l) 174 #define NODE_DOWNGRADE(l) isc_rwlock_downgrade(l) 175 176 /*% 177 * Whether to rate-limit updating the LRU to avoid possible thread contention. 178 * Updating LRU requires write locking, so we don't do it every time the 179 * record is touched - only after some time passes. 180 */ 181 #ifndef DNS_RBTDB_LIMITLRUUPDATE 182 #define DNS_RBTDB_LIMITLRUUPDATE 1 183 #endif 184 185 /*% Time after which we update LRU for glue records, 5 minutes */ 186 #define DNS_RBTDB_LRUUPDATE_GLUE 300 187 /*% Time after which we update LRU for all other records, 10 minutes */ 188 #define DNS_RBTDB_LRUUPDATE_REGULAR 600 189 190 /* 191 * Allow clients with a virtual time of up to 5 minutes in the past to see 192 * records that would have otherwise have expired. 193 */ 194 #define RBTDB_VIRTUAL 300 195 196 struct noqname { 197 dns_name_t name; 198 void *neg; 199 void *negsig; 200 dns_rdatatype_t type; 201 }; 202 203 typedef struct rdatasetheader { 204 /*% 205 * Locked by the owning node's lock. 206 */ 207 rbtdb_serial_t serial; 208 dns_ttl_t rdh_ttl; 209 rbtdb_rdatatype_t type; 210 atomic_uint_least16_t attributes; 211 dns_trust_t trust; 212 atomic_uint_fast32_t last_refresh_fail_ts; 213 struct noqname *noqname; 214 struct noqname *closest; 215 unsigned int is_mmapped : 1; 216 unsigned int next_is_relative : 1; 217 unsigned int node_is_relative : 1; 218 unsigned int resign_lsb : 1; 219 /*%< 220 * We don't use the LIST macros, because the LIST structure has 221 * both head and tail pointers, and is doubly linked. 222 */ 223 224 struct rdatasetheader *next; 225 /*%< 226 * If this is the top header for an rdataset, 'next' points 227 * to the top header for the next rdataset (i.e., the next type). 228 * Otherwise, it points up to the header whose down pointer points 229 * at this header. 230 */ 231 232 struct rdatasetheader *down; 233 /*%< 234 * Points to the header for the next older version of 235 * this rdataset. 236 */ 237 238 atomic_uint_fast32_t count; 239 /*%< 240 * Monotonously increased every time this rdataset is bound so that 241 * it is used as the base of the starting point in DNS responses 242 * when the "cyclic" rrset-order is required. 243 */ 244 245 dns_rbtnode_t *node; 246 isc_stdtime_t last_used; 247 ISC_LINK(struct rdatasetheader) link; 248 249 unsigned int heap_index; 250 /*%< 251 * Used for TTL-based cache cleaning. 252 */ 253 isc_stdtime_t resign; 254 /*%< 255 * Case vector. If the bit is set then the corresponding 256 * character in the owner name needs to be AND'd with 0x20, 257 * rendering that character upper case. 258 */ 259 unsigned char upper[32]; 260 } rdatasetheader_t; 261 262 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t; 263 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t; 264 265 #define RDATASET_ATTR_NONEXISTENT 0x0001 266 /*%< May be potentially served as stale data. */ 267 #define RDATASET_ATTR_STALE 0x0002 268 #define RDATASET_ATTR_IGNORE 0x0004 269 #define RDATASET_ATTR_RETAIN 0x0008 270 #define RDATASET_ATTR_NXDOMAIN 0x0010 271 #define RDATASET_ATTR_RESIGN 0x0020 272 #define RDATASET_ATTR_STATCOUNT 0x0040 273 #define RDATASET_ATTR_OPTOUT 0x0080 274 #define RDATASET_ATTR_NEGATIVE 0x0100 275 #define RDATASET_ATTR_PREFETCH 0x0200 276 #define RDATASET_ATTR_CASESET 0x0400 277 #define RDATASET_ATTR_ZEROTTL 0x0800 278 #define RDATASET_ATTR_CASEFULLYLOWER 0x1000 279 /*%< Ancient - awaiting cleanup. */ 280 #define RDATASET_ATTR_ANCIENT 0x2000 281 #define RDATASET_ATTR_STALE_WINDOW 0x4000 282 283 /* 284 * XXX 285 * When the cache will pre-expire data (due to memory low or other 286 * situations) before the rdataset's TTL has expired, it MUST 287 * respect the RETAIN bit and not expire the data until its TTL is 288 * expired. 289 */ 290 291 #undef IGNORE /* WIN32 winbase.h defines this. */ 292 293 #define EXISTS(header) \ 294 ((atomic_load_acquire(&(header)->attributes) & \ 295 RDATASET_ATTR_NONEXISTENT) == 0) 296 #define NONEXISTENT(header) \ 297 ((atomic_load_acquire(&(header)->attributes) & \ 298 RDATASET_ATTR_NONEXISTENT) != 0) 299 #define IGNORE(header) \ 300 ((atomic_load_acquire(&(header)->attributes) & \ 301 RDATASET_ATTR_IGNORE) != 0) 302 #define RETAIN(header) \ 303 ((atomic_load_acquire(&(header)->attributes) & \ 304 RDATASET_ATTR_RETAIN) != 0) 305 #define NXDOMAIN(header) \ 306 ((atomic_load_acquire(&(header)->attributes) & \ 307 RDATASET_ATTR_NXDOMAIN) != 0) 308 #define STALE(header) \ 309 ((atomic_load_acquire(&(header)->attributes) & RDATASET_ATTR_STALE) != \ 310 0) 311 #define STALE_WINDOW(header) \ 312 ((atomic_load_acquire(&(header)->attributes) & \ 313 RDATASET_ATTR_STALE_WINDOW) != 0) 314 #define RESIGN(header) \ 315 ((atomic_load_acquire(&(header)->attributes) & \ 316 RDATASET_ATTR_RESIGN) != 0) 317 #define OPTOUT(header) \ 318 ((atomic_load_acquire(&(header)->attributes) & \ 319 RDATASET_ATTR_OPTOUT) != 0) 320 #define NEGATIVE(header) \ 321 ((atomic_load_acquire(&(header)->attributes) & \ 322 RDATASET_ATTR_NEGATIVE) != 0) 323 #define PREFETCH(header) \ 324 ((atomic_load_acquire(&(header)->attributes) & \ 325 RDATASET_ATTR_PREFETCH) != 0) 326 #define CASESET(header) \ 327 ((atomic_load_acquire(&(header)->attributes) & \ 328 RDATASET_ATTR_CASESET) != 0) 329 #define ZEROTTL(header) \ 330 ((atomic_load_acquire(&(header)->attributes) & \ 331 RDATASET_ATTR_ZEROTTL) != 0) 332 #define CASEFULLYLOWER(header) \ 333 ((atomic_load_acquire(&(header)->attributes) & \ 334 RDATASET_ATTR_CASEFULLYLOWER) != 0) 335 #define ANCIENT(header) \ 336 ((atomic_load_acquire(&(header)->attributes) & \ 337 RDATASET_ATTR_ANCIENT) != 0) 338 #define STATCOUNT(header) \ 339 ((atomic_load_acquire(&(header)->attributes) & \ 340 RDATASET_ATTR_STATCOUNT) != 0) 341 342 #define RDATASET_ATTR_GET(header, attribute) \ 343 (atomic_load_acquire(&(header)->attributes) & attribute) 344 #define RDATASET_ATTR_SET(header, attribute) \ 345 atomic_fetch_or_release(&(header)->attributes, attribute) 346 #define RDATASET_ATTR_CLR(header, attribute) \ 347 atomic_fetch_and_release(&(header)->attributes, ~(attribute)) 348 349 #define ACTIVE(header, now) \ 350 (((header)->rdh_ttl > (now)) || \ 351 ((header)->rdh_ttl == (now) && ZEROTTL(header))) 352 353 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */ 354 #define RBTDB_GLUE_TABLE_INIT_BITS 2U 355 #define RBTDB_GLUE_TABLE_MAX_BITS 32U 356 #define RBTDB_GLUE_TABLE_OVERCOMMIT 3 357 358 #define GOLDEN_RATIO_32 0x61C88647 359 #define HASHSIZE(bits) (UINT64_C(1) << (bits)) 360 361 static uint32_t 362 hash_32(uint32_t val, unsigned int bits) { 363 REQUIRE(bits <= RBTDB_GLUE_TABLE_MAX_BITS); 364 /* High bits are more random. */ 365 return (val * GOLDEN_RATIO_32 >> (32 - bits)); 366 } 367 368 #define EXPIREDOK(rbtiterator) \ 369 (((rbtiterator)->common.options & DNS_DB_EXPIREDOK) != 0) 370 371 #define STALEOK(rbtiterator) \ 372 (((rbtiterator)->common.options & DNS_DB_STALEOK) != 0) 373 374 /*% 375 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps). 376 * There is a tradeoff issue about configuring this value: if this is too 377 * small, it may cause heavier contention between threads; if this is too large, 378 * LRU purge algorithm won't work well (entries tend to be purged prematurely). 379 * The default value should work well for most environments, but this can 380 * also be configurable at compilation time via the 381 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than 382 * 1 due to the assumption of overmem_purge(). 383 */ 384 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT 385 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 386 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1" 387 #else /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */ 388 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT 389 #endif /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */ 390 #else /* ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT */ 391 #define DEFAULT_CACHE_NODE_LOCK_COUNT 17 392 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */ 393 394 typedef struct { 395 nodelock_t lock; 396 /* Protected in the refcount routines. */ 397 isc_refcount_t references; 398 /* Locked by lock. */ 399 bool exiting; 400 } rbtdb_nodelock_t; 401 402 typedef struct rbtdb_changed { 403 dns_rbtnode_t *node; 404 bool dirty; 405 ISC_LINK(struct rbtdb_changed) link; 406 } rbtdb_changed_t; 407 408 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t; 409 410 typedef enum { dns_db_insecure, dns_db_partial, dns_db_secure } dns_db_secure_t; 411 412 typedef struct dns_rbtdb dns_rbtdb_t; 413 414 /* Reason for expiring a record from cache */ 415 typedef enum { expire_lru, expire_ttl, expire_flush } expire_t; 416 417 typedef struct rbtdb_glue rbtdb_glue_t; 418 419 typedef struct rbtdb_glue_table_node { 420 struct rbtdb_glue_table_node *next; 421 dns_rbtnode_t *node; 422 rbtdb_glue_t *glue_list; 423 } rbtdb_glue_table_node_t; 424 425 typedef enum { 426 rdataset_ttl_fresh, 427 rdataset_ttl_stale, 428 rdataset_ttl_ancient 429 } rdataset_ttl_t; 430 431 typedef struct rbtdb_version { 432 /* Not locked */ 433 rbtdb_serial_t serial; 434 dns_rbtdb_t *rbtdb; 435 /* 436 * Protected in the refcount routines. 437 * XXXJT: should we change the lock policy based on the refcount 438 * performance? 439 */ 440 isc_refcount_t references; 441 /* Locked by database lock. */ 442 bool writer; 443 bool commit_ok; 444 rbtdb_changedlist_t changed_list; 445 rdatasetheaderlist_t resigned_list; 446 ISC_LINK(struct rbtdb_version) link; 447 dns_db_secure_t secure; 448 bool havensec3; 449 /* NSEC3 parameters */ 450 dns_hash_t hash; 451 uint8_t flags; 452 uint16_t iterations; 453 uint8_t salt_length; 454 unsigned char salt[DNS_NSEC3_SALTSIZE]; 455 456 /* 457 * records and xfrsize are covered by rwlock. 458 */ 459 isc_rwlock_t rwlock; 460 uint64_t records; 461 uint64_t xfrsize; 462 463 isc_rwlock_t glue_rwlock; 464 size_t glue_table_bits; 465 size_t glue_table_nodecount; 466 rbtdb_glue_table_node_t **glue_table; 467 } rbtdb_version_t; 468 469 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t; 470 471 struct dns_rbtdb { 472 /* Unlocked. */ 473 dns_db_t common; 474 /* Locks the data in this struct */ 475 isc_rwlock_t lock; 476 /* Locks the tree structure (prevents nodes appearing/disappearing) */ 477 isc_rwlock_t tree_lock; 478 /* Locks for individual tree nodes */ 479 unsigned int node_lock_count; 480 rbtdb_nodelock_t *node_locks; 481 dns_rbtnode_t *origin_node; 482 dns_rbtnode_t *nsec3_origin_node; 483 dns_stats_t *rrsetstats; /* cache DB only */ 484 isc_stats_t *cachestats; /* cache DB only */ 485 isc_stats_t *gluecachestats; /* zone DB only */ 486 /* Locked by lock. */ 487 unsigned int active; 488 isc_refcount_t references; 489 unsigned int attributes; 490 rbtdb_serial_t current_serial; 491 rbtdb_serial_t least_serial; 492 rbtdb_serial_t next_serial; 493 rbtdb_version_t *current_version; 494 rbtdb_version_t *future_version; 495 rbtdb_versionlist_t open_versions; 496 isc_task_t *task; 497 dns_dbnode_t *soanode; 498 dns_dbnode_t *nsnode; 499 500 /* 501 * Maximum length of time to keep using a stale answer past its 502 * normal TTL expiry. 503 */ 504 dns_ttl_t serve_stale_ttl; 505 506 /* 507 * The time after a failed lookup, where stale answers from cache 508 * may be used directly in a DNS response without attempting a 509 * new iterative lookup. 510 */ 511 uint32_t serve_stale_refresh; 512 513 /* 514 * This is a linked list used to implement the LRU cache. There will 515 * be node_lock_count linked lists here. Nodes in bucket 1 will be 516 * placed on the linked list rdatasets[1]. 517 */ 518 rdatasetheaderlist_t *rdatasets; 519 520 /*% 521 * Temporary storage for stale cache nodes and dynamically deleted 522 * nodes that await being cleaned up. 523 */ 524 rbtnodelist_t *deadnodes; 525 526 /* 527 * Heaps. These are used for TTL based expiry in a cache, 528 * or for zone resigning in a zone DB. hmctx is the memory 529 * context to use for the heap (which differs from the main 530 * database memory context in the case of a cache). 531 */ 532 isc_mem_t *hmctx; 533 isc_heap_t **heaps; 534 535 /* 536 * Base values for the mmap() code. 537 */ 538 void *mmap_location; 539 size_t mmap_size; 540 541 /* Locked by tree_lock. */ 542 dns_rbt_t *tree; 543 dns_rbt_t *nsec; 544 dns_rbt_t *nsec3; 545 546 /* Unlocked */ 547 unsigned int quantum; 548 }; 549 550 #define RBTDB_ATTR_LOADED 0x01 551 #define RBTDB_ATTR_LOADING 0x02 552 553 #define KEEPSTALE(rbtdb) ((rbtdb)->serve_stale_ttl > 0) 554 555 /*% 556 * Search Context 557 */ 558 typedef struct { 559 dns_rbtdb_t *rbtdb; 560 rbtdb_version_t *rbtversion; 561 rbtdb_serial_t serial; 562 unsigned int options; 563 dns_rbtnodechain_t chain; 564 bool copy_name; 565 bool need_cleanup; 566 bool wild; 567 dns_rbtnode_t *zonecut; 568 rdatasetheader_t *zonecut_rdataset; 569 rdatasetheader_t *zonecut_sigrdataset; 570 dns_fixedname_t zonecut_name; 571 isc_stdtime_t now; 572 } rbtdb_search_t; 573 574 /*% 575 * Load Context 576 */ 577 typedef struct { 578 dns_rbtdb_t *rbtdb; 579 isc_stdtime_t now; 580 } rbtdb_load_t; 581 582 static void 583 delete_callback(void *data, void *arg); 584 static void 585 rdataset_disassociate(dns_rdataset_t *rdataset); 586 static isc_result_t 587 rdataset_first(dns_rdataset_t *rdataset); 588 static isc_result_t 589 rdataset_next(dns_rdataset_t *rdataset); 590 static void 591 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata); 592 static void 593 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target); 594 static unsigned int 595 rdataset_count(dns_rdataset_t *rdataset); 596 static isc_result_t 597 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name, 598 dns_rdataset_t *neg, dns_rdataset_t *negsig); 599 static isc_result_t 600 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name, 601 dns_rdataset_t *neg, dns_rdataset_t *negsig); 602 static bool 603 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now); 604 static void 605 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now); 606 static void 607 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked, 608 expire_t reason); 609 static void 610 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize, 611 bool tree_locked); 612 static void 613 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader); 614 static void 615 resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version, 616 rdatasetheader_t *header); 617 static void 618 prune_tree(isc_task_t *task, isc_event_t *event); 619 static void 620 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust); 621 static void 622 rdataset_expire(dns_rdataset_t *rdataset); 623 static void 624 rdataset_clearprefetch(dns_rdataset_t *rdataset); 625 static void 626 rdataset_setownercase(dns_rdataset_t *rdataset, const dns_name_t *name); 627 static void 628 rdataset_getownercase(const dns_rdataset_t *rdataset, dns_name_t *name); 629 static isc_result_t 630 rdataset_addglue(dns_rdataset_t *rdataset, dns_dbversion_t *version, 631 dns_message_t *msg); 632 static void 633 free_gluetable(rbtdb_version_t *version); 634 static isc_result_t 635 nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name); 636 637 static dns_rdatasetmethods_t rdataset_methods = { rdataset_disassociate, 638 rdataset_first, 639 rdataset_next, 640 rdataset_current, 641 rdataset_clone, 642 rdataset_count, 643 NULL, /* addnoqname */ 644 rdataset_getnoqname, 645 NULL, /* addclosest */ 646 rdataset_getclosest, 647 rdataset_settrust, 648 rdataset_expire, 649 rdataset_clearprefetch, 650 rdataset_setownercase, 651 rdataset_getownercase, 652 rdataset_addglue }; 653 654 static dns_rdatasetmethods_t slab_methods = { 655 rdataset_disassociate, 656 rdataset_first, 657 rdataset_next, 658 rdataset_current, 659 rdataset_clone, 660 rdataset_count, 661 NULL, /* addnoqname */ 662 NULL, /* getnoqname */ 663 NULL, /* addclosest */ 664 NULL, /* getclosest */ 665 NULL, /* settrust */ 666 NULL, /* expire */ 667 NULL, /* clearprefetch */ 668 NULL, /* setownercase */ 669 NULL, /* getownercase */ 670 NULL /* addglue */ 671 }; 672 673 static void 674 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp); 675 static isc_result_t 676 rdatasetiter_first(dns_rdatasetiter_t *iterator); 677 static isc_result_t 678 rdatasetiter_next(dns_rdatasetiter_t *iterator); 679 static void 680 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset); 681 682 static dns_rdatasetitermethods_t rdatasetiter_methods = { 683 rdatasetiter_destroy, rdatasetiter_first, rdatasetiter_next, 684 rdatasetiter_current 685 }; 686 687 typedef struct rbtdb_rdatasetiter { 688 dns_rdatasetiter_t common; 689 rdatasetheader_t *current; 690 } rbtdb_rdatasetiter_t; 691 692 /* 693 * Note that these iterators, unless created with either DNS_DB_NSEC3ONLY or 694 * DNS_DB_NONSEC3, will transparently move between the last node of the 695 * "regular" RBT ("chain" field) and the root node of the NSEC3 RBT 696 * ("nsec3chain" field) of the database in question, as if the latter was a 697 * successor to the former in lexical order. The "current" field always holds 698 * the address of either "chain" or "nsec3chain", depending on which RBT is 699 * being traversed at given time. 700 */ 701 static void 702 dbiterator_destroy(dns_dbiterator_t **iteratorp); 703 static isc_result_t 704 dbiterator_first(dns_dbiterator_t *iterator); 705 static isc_result_t 706 dbiterator_last(dns_dbiterator_t *iterator); 707 static isc_result_t 708 dbiterator_seek(dns_dbiterator_t *iterator, const dns_name_t *name); 709 static isc_result_t 710 dbiterator_prev(dns_dbiterator_t *iterator); 711 static isc_result_t 712 dbiterator_next(dns_dbiterator_t *iterator); 713 static isc_result_t 714 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, 715 dns_name_t *name); 716 static isc_result_t 717 dbiterator_pause(dns_dbiterator_t *iterator); 718 static isc_result_t 719 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name); 720 721 static dns_dbiteratormethods_t dbiterator_methods = { 722 dbiterator_destroy, dbiterator_first, dbiterator_last, 723 dbiterator_seek, dbiterator_prev, dbiterator_next, 724 dbiterator_current, dbiterator_pause, dbiterator_origin 725 }; 726 727 #define DELETION_BATCH_MAX 64 728 729 /* 730 * If 'paused' is true, then the tree lock is not being held. 731 */ 732 typedef struct rbtdb_dbiterator { 733 dns_dbiterator_t common; 734 bool paused; 735 bool new_origin; 736 isc_rwlocktype_t tree_locked; 737 isc_result_t result; 738 dns_fixedname_t name; 739 dns_fixedname_t origin; 740 dns_rbtnodechain_t chain; 741 dns_rbtnodechain_t nsec3chain; 742 dns_rbtnodechain_t *current; 743 dns_rbtnode_t *node; 744 dns_rbtnode_t *deletions[DELETION_BATCH_MAX]; 745 int delcnt; 746 bool nsec3only; 747 bool nonsec3; 748 } rbtdb_dbiterator_t; 749 750 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0) 751 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0) 752 753 static void 754 free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event); 755 static void 756 overmem(dns_db_t *db, bool over); 757 static void 758 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version); 759 static void 760 setownercase(rdatasetheader_t *header, const dns_name_t *name); 761 762 static bool 763 match_header_version(rbtdb_file_header_t *header); 764 765 /* Pad to 32 bytes */ 766 static char FILE_VERSION[32] = "\0"; 767 768 /*% 769 * 'init_count' is used to initialize 'newheader->count' which inturn 770 * is used to determine where in the cycle rrset-order cyclic starts. 771 * We don't lock this as we don't care about simultaneous updates. 772 * 773 * Note: 774 * Both init_count and header->count can be UINT32_MAX. 775 * The count on the returned rdataset however can't be as 776 * that indicates that the database does not implement cyclic 777 * processing. 778 */ 779 static atomic_uint_fast32_t init_count = 0; 780 781 /* 782 * Locking 783 * 784 * If a routine is going to lock more than one lock in this module, then 785 * the locking must be done in the following order: 786 * 787 * Tree Lock 788 * 789 * Node Lock (Only one from the set may be locked at one time by 790 * any caller) 791 * 792 * Database Lock 793 * 794 * Failure to follow this hierarchy can result in deadlock. 795 */ 796 797 /* 798 * Deleting Nodes 799 * 800 * For zone databases the node for the origin of the zone MUST NOT be deleted. 801 */ 802 803 /* 804 * Debugging routines 805 */ 806 #ifdef DEBUG 807 static void 808 hexdump(const char *desc, unsigned char *data, size_t size) { 809 char hexdump[BUFSIZ * 2 + 1]; 810 isc_buffer_t b; 811 isc_region_t r; 812 isc_result_t result; 813 size_t bytes; 814 815 fprintf(stderr, "%s: ", desc); 816 do { 817 isc_buffer_init(&b, hexdump, sizeof(hexdump)); 818 r.base = data; 819 r.length = bytes = (size > BUFSIZ) ? BUFSIZ : size; 820 result = isc_hex_totext(&r, 0, "", &b); 821 RUNTIME_CHECK(result == ISC_R_SUCCESS); 822 isc_buffer_putuint8(&b, 0); 823 fprintf(stderr, "%s", hexdump); 824 data += bytes; 825 size -= bytes; 826 } while (size > 0); 827 fprintf(stderr, "\n"); 828 } 829 #endif /* ifdef DEBUG */ 830 831 /* Fixed RRSet helper macros */ 832 833 #define DNS_RDATASET_LENGTH 2; 834 835 #if DNS_RDATASET_FIXED 836 #define DNS_RDATASET_ORDER 2 837 #define DNS_RDATASET_COUNT (count * 4) 838 #else /* !DNS_RDATASET_FIXED */ 839 #define DNS_RDATASET_ORDER 0 840 #define DNS_RDATASET_COUNT 0 841 #endif /* DNS_RDATASET_FIXED */ 842 843 /* 844 * DB Routines 845 */ 846 847 static void 848 attach(dns_db_t *source, dns_db_t **targetp) { 849 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source; 850 851 REQUIRE(VALID_RBTDB(rbtdb)); 852 853 isc_refcount_increment(&rbtdb->references); 854 855 *targetp = source; 856 } 857 858 static void 859 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) { 860 dns_rbtdb_t *rbtdb = event->ev_arg; 861 862 UNUSED(task); 863 864 free_rbtdb(rbtdb, true, event); 865 } 866 867 static void 868 update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) { 869 INSIST(IS_CACHE(rbtdb)); 870 871 if (rbtdb->cachestats == NULL) { 872 return; 873 } 874 875 switch (result) { 876 case ISC_R_SUCCESS: 877 case DNS_R_CNAME: 878 case DNS_R_DNAME: 879 case DNS_R_DELEGATION: 880 case DNS_R_NCACHENXDOMAIN: 881 case DNS_R_NCACHENXRRSET: 882 isc_stats_increment(rbtdb->cachestats, 883 dns_cachestatscounter_hits); 884 break; 885 default: 886 isc_stats_increment(rbtdb->cachestats, 887 dns_cachestatscounter_misses); 888 } 889 } 890 891 static bool 892 do_stats(rdatasetheader_t *header) { 893 return (EXISTS(header) && STATCOUNT(header)); 894 } 895 896 static void 897 update_rrsetstats(dns_rbtdb_t *rbtdb, const rbtdb_rdatatype_t htype, 898 const uint_least16_t hattributes, const bool increment) { 899 dns_rdatastatstype_t statattributes = 0; 900 dns_rdatastatstype_t base = 0; 901 dns_rdatastatstype_t type; 902 rdatasetheader_t *header = &(rdatasetheader_t){ 903 .type = htype, 904 .attributes = hattributes, 905 }; 906 907 if (!do_stats(header)) { 908 return; 909 } 910 911 /* At the moment we count statistics only for cache DB */ 912 INSIST(IS_CACHE(rbtdb)); 913 914 if (NEGATIVE(header)) { 915 if (NXDOMAIN(header)) { 916 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN; 917 } else { 918 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET; 919 base = RBTDB_RDATATYPE_EXT(header->type); 920 } 921 } else { 922 base = RBTDB_RDATATYPE_BASE(header->type); 923 } 924 925 if (STALE(header)) { 926 statattributes |= DNS_RDATASTATSTYPE_ATTR_STALE; 927 } 928 if (ANCIENT(header)) { 929 statattributes |= DNS_RDATASTATSTYPE_ATTR_ANCIENT; 930 } 931 932 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes); 933 if (increment) { 934 dns_rdatasetstats_increment(rbtdb->rrsetstats, type); 935 } else { 936 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type); 937 } 938 } 939 940 static void 941 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) { 942 int idx; 943 isc_heap_t *heap; 944 dns_ttl_t oldttl; 945 946 if (!IS_CACHE(rbtdb)) { 947 header->rdh_ttl = newttl; 948 return; 949 } 950 951 oldttl = header->rdh_ttl; 952 header->rdh_ttl = newttl; 953 954 /* 955 * It's possible the rbtdb is not a cache. If this is the case, 956 * we will not have a heap, and we move on. If we do, though, 957 * we might need to adjust things. 958 */ 959 if (header->heap_index == 0 || newttl == oldttl) { 960 return; 961 } 962 idx = header->node->locknum; 963 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL) { 964 return; 965 } 966 heap = rbtdb->heaps[idx]; 967 968 if (newttl < oldttl) { 969 isc_heap_increased(heap, header->heap_index); 970 } else { 971 isc_heap_decreased(heap, header->heap_index); 972 } 973 } 974 975 /*% 976 * These functions allow the heap code to rank the priority of each 977 * element. It returns true if v1 happens "sooner" than v2. 978 */ 979 static bool 980 ttl_sooner(void *v1, void *v2) { 981 rdatasetheader_t *h1 = v1; 982 rdatasetheader_t *h2 = v2; 983 984 return (h1->rdh_ttl < h2->rdh_ttl); 985 } 986 987 /*% 988 * Return which RRset should be resigned sooner. If the RRsets have the 989 * same signing time, prefer the other RRset over the SOA RRset. 990 */ 991 static bool 992 resign_sooner(void *v1, void *v2) { 993 rdatasetheader_t *h1 = v1; 994 rdatasetheader_t *h2 = v2; 995 996 return (h1->resign < h2->resign || 997 (h1->resign == h2->resign && h1->resign_lsb < h2->resign_lsb) || 998 (h1->resign == h2->resign && h1->resign_lsb == h2->resign_lsb && 999 h2->type == RBTDB_RDATATYPE_SIGSOA)); 1000 } 1001 1002 /*% 1003 * This function sets the heap index into the header. 1004 */ 1005 static void 1006 set_index(void *what, unsigned int idx) { 1007 rdatasetheader_t *h = what; 1008 1009 h->heap_index = idx; 1010 } 1011 1012 /*% 1013 * Work out how many nodes can be deleted in the time between two 1014 * requests to the nameserver. Smooth the resulting number and use it 1015 * as a estimate for the number of nodes to be deleted in the next 1016 * iteration. 1017 */ 1018 static unsigned int 1019 adjust_quantum(unsigned int old, isc_time_t *start) { 1020 unsigned int pps = dns_pps; /* packets per second */ 1021 unsigned int interval; 1022 uint64_t usecs; 1023 isc_time_t end; 1024 unsigned int nodes; 1025 1026 if (pps < 100) { 1027 pps = 100; 1028 } 1029 isc_time_now(&end); 1030 1031 interval = 1000000 / pps; /* interval in usec */ 1032 if (interval == 0) { 1033 interval = 1; 1034 } 1035 usecs = isc_time_microdiff(&end, start); 1036 if (usecs == 0) { 1037 /* 1038 * We were unable to measure the amount of time taken. 1039 * Double the nodes deleted next time. 1040 */ 1041 old *= 2; 1042 if (old > 1000) { 1043 old = 1000; 1044 } 1045 return (old); 1046 } 1047 nodes = old * interval; 1048 nodes /= (unsigned int)usecs; 1049 if (nodes == 0) { 1050 nodes = 1; 1051 } else if (nodes > 1000) { 1052 nodes = 1000; 1053 } 1054 1055 /* Smooth */ 1056 nodes = (nodes + old * 3) / 4; 1057 1058 if (nodes != old) { 1059 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1060 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 1061 "adjust_quantum: old=%d, new=%d", old, nodes); 1062 } 1063 1064 return (nodes); 1065 } 1066 1067 static void 1068 free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) { 1069 unsigned int i; 1070 isc_result_t result; 1071 char buf[DNS_NAME_FORMATSIZE]; 1072 dns_rbt_t **treep; 1073 isc_time_t start; 1074 1075 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) { 1076 overmem((dns_db_t *)rbtdb, (bool)-1); 1077 } 1078 1079 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions)); 1080 REQUIRE(rbtdb->future_version == NULL); 1081 1082 if (rbtdb->current_version != NULL) { 1083 isc_refcount_decrementz(&rbtdb->current_version->references); 1084 UNLINK(rbtdb->open_versions, rbtdb->current_version, link); 1085 isc_rwlock_destroy(&rbtdb->current_version->glue_rwlock); 1086 isc_refcount_destroy(&rbtdb->current_version->references); 1087 isc_rwlock_destroy(&rbtdb->current_version->rwlock); 1088 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version, 1089 sizeof(rbtdb_version_t)); 1090 } 1091 1092 /* 1093 * We assume the number of remaining dead nodes is reasonably small; 1094 * the overhead of unlinking all nodes here should be negligible. 1095 */ 1096 for (i = 0; i < rbtdb->node_lock_count; i++) { 1097 dns_rbtnode_t *node; 1098 1099 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]); 1100 while (node != NULL) { 1101 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink); 1102 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]); 1103 } 1104 } 1105 1106 if (event == NULL) { 1107 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0; 1108 } 1109 1110 for (;;) { 1111 /* 1112 * pick the next tree to (start to) destroy 1113 */ 1114 treep = &rbtdb->tree; 1115 if (*treep == NULL) { 1116 treep = &rbtdb->nsec; 1117 if (*treep == NULL) { 1118 treep = &rbtdb->nsec3; 1119 /* 1120 * we're finished after clear cutting 1121 */ 1122 if (*treep == NULL) { 1123 break; 1124 } 1125 } 1126 } 1127 1128 isc_time_now(&start); 1129 result = dns_rbt_destroy2(treep, rbtdb->quantum); 1130 if (result == ISC_R_QUOTA) { 1131 INSIST(rbtdb->task != NULL); 1132 if (rbtdb->quantum != 0) { 1133 rbtdb->quantum = adjust_quantum(rbtdb->quantum, 1134 &start); 1135 } 1136 if (event == NULL) { 1137 event = isc_event_allocate( 1138 rbtdb->common.mctx, NULL, 1139 DNS_EVENT_FREESTORAGE, 1140 free_rbtdb_callback, rbtdb, 1141 sizeof(isc_event_t)); 1142 } 1143 isc_task_send(rbtdb->task, &event); 1144 return; 1145 } 1146 INSIST(result == ISC_R_SUCCESS && *treep == NULL); 1147 } 1148 1149 if (event != NULL) { 1150 isc_event_free(&event); 1151 } 1152 if (log) { 1153 if (dns_name_dynamic(&rbtdb->common.origin)) { 1154 dns_name_format(&rbtdb->common.origin, buf, 1155 sizeof(buf)); 1156 } else { 1157 strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 1158 } 1159 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1160 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 1161 "done free_rbtdb(%s)", buf); 1162 } 1163 if (dns_name_dynamic(&rbtdb->common.origin)) { 1164 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx); 1165 } 1166 for (i = 0; i < rbtdb->node_lock_count; i++) { 1167 isc_refcount_destroy(&rbtdb->node_locks[i].references); 1168 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock); 1169 } 1170 1171 /* 1172 * Clean up LRU / re-signing order lists. 1173 */ 1174 if (rbtdb->rdatasets != NULL) { 1175 for (i = 0; i < rbtdb->node_lock_count; i++) { 1176 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i])); 1177 } 1178 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets, 1179 rbtdb->node_lock_count * 1180 sizeof(rdatasetheaderlist_t)); 1181 } 1182 /* 1183 * Clean up dead node buckets. 1184 */ 1185 if (rbtdb->deadnodes != NULL) { 1186 for (i = 0; i < rbtdb->node_lock_count; i++) { 1187 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i])); 1188 } 1189 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes, 1190 rbtdb->node_lock_count * sizeof(rbtnodelist_t)); 1191 } 1192 /* 1193 * Clean up heap objects. 1194 */ 1195 if (rbtdb->heaps != NULL) { 1196 for (i = 0; i < rbtdb->node_lock_count; i++) { 1197 isc_heap_destroy(&rbtdb->heaps[i]); 1198 } 1199 isc_mem_put(rbtdb->hmctx, rbtdb->heaps, 1200 rbtdb->node_lock_count * sizeof(isc_heap_t *)); 1201 } 1202 1203 if (rbtdb->rrsetstats != NULL) { 1204 dns_stats_detach(&rbtdb->rrsetstats); 1205 } 1206 if (rbtdb->cachestats != NULL) { 1207 isc_stats_detach(&rbtdb->cachestats); 1208 } 1209 if (rbtdb->gluecachestats != NULL) { 1210 isc_stats_detach(&rbtdb->gluecachestats); 1211 } 1212 1213 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks, 1214 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t)); 1215 isc_rwlock_destroy(&rbtdb->tree_lock); 1216 isc_refcount_destroy(&rbtdb->references); 1217 if (rbtdb->task != NULL) { 1218 isc_task_detach(&rbtdb->task); 1219 } 1220 1221 RBTDB_DESTROYLOCK(&rbtdb->lock); 1222 rbtdb->common.magic = 0; 1223 rbtdb->common.impmagic = 0; 1224 isc_mem_detach(&rbtdb->hmctx); 1225 1226 if (rbtdb->mmap_location != NULL) { 1227 isc_file_munmap(rbtdb->mmap_location, (size_t)rbtdb->mmap_size); 1228 } 1229 1230 INSIST(ISC_LIST_EMPTY(rbtdb->common.update_listeners)); 1231 1232 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb)); 1233 } 1234 1235 static void 1236 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) { 1237 bool want_free = false; 1238 unsigned int i; 1239 unsigned int inactive = 0; 1240 1241 /* XXX check for open versions here */ 1242 1243 if (rbtdb->soanode != NULL) { 1244 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode); 1245 } 1246 if (rbtdb->nsnode != NULL) { 1247 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode); 1248 } 1249 1250 /* 1251 * The current version's glue table needs to be freed early 1252 * so the nodes are dereferenced before we check the active 1253 * node count below. 1254 */ 1255 if (rbtdb->current_version != NULL) { 1256 free_gluetable(rbtdb->current_version); 1257 } 1258 1259 /* 1260 * Even though there are no external direct references, there still 1261 * may be nodes in use. 1262 */ 1263 for (i = 0; i < rbtdb->node_lock_count; i++) { 1264 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write); 1265 rbtdb->node_locks[i].exiting = true; 1266 if (isc_refcount_current(&rbtdb->node_locks[i].references) == 0) 1267 { 1268 inactive++; 1269 } 1270 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write); 1271 } 1272 1273 if (inactive != 0) { 1274 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 1275 rbtdb->active -= inactive; 1276 if (rbtdb->active == 0) { 1277 want_free = true; 1278 } 1279 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 1280 if (want_free) { 1281 char buf[DNS_NAME_FORMATSIZE]; 1282 if (dns_name_dynamic(&rbtdb->common.origin)) { 1283 dns_name_format(&rbtdb->common.origin, buf, 1284 sizeof(buf)); 1285 } else { 1286 strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 1287 } 1288 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1289 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 1290 "calling free_rbtdb(%s)", buf); 1291 free_rbtdb(rbtdb, true, NULL); 1292 } 1293 } 1294 } 1295 1296 static void 1297 detach(dns_db_t **dbp) { 1298 REQUIRE(dbp != NULL && VALID_RBTDB((dns_rbtdb_t *)(*dbp))); 1299 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp); 1300 *dbp = NULL; 1301 1302 if (isc_refcount_decrement(&rbtdb->references) == 1) { 1303 maybe_free_rbtdb(rbtdb); 1304 } 1305 } 1306 1307 static void 1308 currentversion(dns_db_t *db, dns_dbversion_t **versionp) { 1309 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1310 rbtdb_version_t *version; 1311 1312 REQUIRE(VALID_RBTDB(rbtdb)); 1313 1314 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 1315 version = rbtdb->current_version; 1316 isc_refcount_increment(&version->references); 1317 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 1318 1319 *versionp = (dns_dbversion_t *)version; 1320 } 1321 1322 static rbtdb_version_t * 1323 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial, 1324 unsigned int references, bool writer) { 1325 rbtdb_version_t *version; 1326 size_t size; 1327 1328 version = isc_mem_get(mctx, sizeof(*version)); 1329 version->serial = serial; 1330 1331 isc_refcount_init(&version->references, references); 1332 isc_rwlock_init(&version->glue_rwlock, 0, 0); 1333 1334 version->glue_table_bits = RBTDB_GLUE_TABLE_INIT_BITS; 1335 version->glue_table_nodecount = 0U; 1336 1337 size = HASHSIZE(version->glue_table_bits) * 1338 sizeof(version->glue_table[0]); 1339 version->glue_table = isc_mem_get(mctx, size); 1340 memset(version->glue_table, 0, size); 1341 1342 version->writer = writer; 1343 version->commit_ok = false; 1344 ISC_LIST_INIT(version->changed_list); 1345 ISC_LIST_INIT(version->resigned_list); 1346 ISC_LINK_INIT(version, link); 1347 1348 return (version); 1349 } 1350 1351 static isc_result_t 1352 newversion(dns_db_t *db, dns_dbversion_t **versionp) { 1353 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1354 rbtdb_version_t *version; 1355 1356 REQUIRE(VALID_RBTDB(rbtdb)); 1357 REQUIRE(versionp != NULL && *versionp == NULL); 1358 REQUIRE(rbtdb->future_version == NULL); 1359 1360 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 1361 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */ 1362 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1, 1363 true); 1364 version->rbtdb = rbtdb; 1365 version->commit_ok = true; 1366 version->secure = rbtdb->current_version->secure; 1367 version->havensec3 = rbtdb->current_version->havensec3; 1368 if (version->havensec3) { 1369 version->flags = rbtdb->current_version->flags; 1370 version->iterations = rbtdb->current_version->iterations; 1371 version->hash = rbtdb->current_version->hash; 1372 version->salt_length = rbtdb->current_version->salt_length; 1373 memmove(version->salt, rbtdb->current_version->salt, 1374 version->salt_length); 1375 } else { 1376 version->flags = 0; 1377 version->iterations = 0; 1378 version->hash = 0; 1379 version->salt_length = 0; 1380 memset(version->salt, 0, sizeof(version->salt)); 1381 } 1382 isc_rwlock_init(&version->rwlock, 0, 0); 1383 RWLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read); 1384 version->records = rbtdb->current_version->records; 1385 version->xfrsize = rbtdb->current_version->xfrsize; 1386 RWUNLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read); 1387 rbtdb->next_serial++; 1388 rbtdb->future_version = version; 1389 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 1390 1391 *versionp = version; 1392 1393 return (ISC_R_SUCCESS); 1394 } 1395 1396 static void 1397 attachversion(dns_db_t *db, dns_dbversion_t *source, 1398 dns_dbversion_t **targetp) { 1399 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1400 rbtdb_version_t *rbtversion = source; 1401 1402 REQUIRE(VALID_RBTDB(rbtdb)); 1403 INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb); 1404 1405 isc_refcount_increment(&rbtversion->references); 1406 1407 *targetp = rbtversion; 1408 } 1409 1410 static rbtdb_changed_t * 1411 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version, dns_rbtnode_t *node) { 1412 rbtdb_changed_t *changed; 1413 1414 /* 1415 * Caller must be holding the node lock if its reference must be 1416 * protected by the lock. 1417 */ 1418 1419 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed)); 1420 1421 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 1422 1423 REQUIRE(version->writer); 1424 1425 if (changed != NULL) { 1426 isc_refcount_increment(&node->references); 1427 changed->node = node; 1428 changed->dirty = false; 1429 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link); 1430 } else { 1431 version->commit_ok = false; 1432 } 1433 1434 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 1435 1436 return (changed); 1437 } 1438 1439 static void 1440 free_noqname(isc_mem_t *mctx, struct noqname **noqname) { 1441 if (dns_name_dynamic(&(*noqname)->name)) { 1442 dns_name_free(&(*noqname)->name, mctx); 1443 } 1444 if ((*noqname)->neg != NULL) { 1445 isc_mem_put(mctx, (*noqname)->neg, 1446 dns_rdataslab_size((*noqname)->neg, 0)); 1447 } 1448 if ((*noqname)->negsig != NULL) { 1449 isc_mem_put(mctx, (*noqname)->negsig, 1450 dns_rdataslab_size((*noqname)->negsig, 0)); 1451 } 1452 isc_mem_put(mctx, *noqname, sizeof(**noqname)); 1453 *noqname = NULL; 1454 } 1455 1456 static void 1457 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h) { 1458 ISC_LINK_INIT(h, link); 1459 h->heap_index = 0; 1460 h->is_mmapped = 0; 1461 h->next_is_relative = 0; 1462 h->node_is_relative = 0; 1463 atomic_init(&h->attributes, 0); 1464 atomic_init(&h->last_refresh_fail_ts, 0); 1465 1466 STATIC_ASSERT((sizeof(h->attributes) == 2), 1467 "The .attributes field of rdatasetheader_t needs to be " 1468 "16-bit int type exactly."); 1469 1470 #if TRACE_HEADER 1471 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) { 1472 fprintf(stderr, "initialized header: %p\n", h); 1473 } 1474 #else /* if TRACE_HEADER */ 1475 UNUSED(rbtdb); 1476 #endif /* if TRACE_HEADER */ 1477 } 1478 1479 /* 1480 * Update the copied values of 'next' and 'node' if they are relative. 1481 */ 1482 static void 1483 update_newheader(rdatasetheader_t *newh, rdatasetheader_t *old) { 1484 char *p; 1485 1486 if (old->next_is_relative) { 1487 p = (char *)old; 1488 p += (uintptr_t)old->next; 1489 newh->next = (rdatasetheader_t *)p; 1490 } 1491 if (old->node_is_relative) { 1492 p = (char *)old; 1493 p += (uintptr_t)old->node; 1494 newh->node = (dns_rbtnode_t *)p; 1495 } 1496 if (CASESET(old)) { 1497 uint_least16_t attr = RDATASET_ATTR_GET( 1498 old, 1499 (RDATASET_ATTR_CASESET | RDATASET_ATTR_CASEFULLYLOWER)); 1500 RDATASET_ATTR_SET(newh, attr); 1501 memmove(newh->upper, old->upper, sizeof(old->upper)); 1502 } 1503 } 1504 1505 static rdatasetheader_t * 1506 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx) { 1507 rdatasetheader_t *h; 1508 1509 h = isc_mem_get(mctx, sizeof(*h)); 1510 1511 #if TRACE_HEADER 1512 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) { 1513 fprintf(stderr, "allocated header: %p\n", h); 1514 } 1515 #endif /* if TRACE_HEADER */ 1516 memset(h->upper, 0xeb, sizeof(h->upper)); 1517 init_rdataset(rbtdb, h); 1518 h->rdh_ttl = 0; 1519 return (h); 1520 } 1521 1522 static void 1523 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset) { 1524 unsigned int size; 1525 int idx; 1526 1527 update_rrsetstats(rbtdb, rdataset->type, 1528 atomic_load_acquire(&rdataset->attributes), false); 1529 1530 idx = rdataset->node->locknum; 1531 if (ISC_LINK_LINKED(rdataset, link)) { 1532 INSIST(IS_CACHE(rbtdb)); 1533 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link); 1534 } 1535 1536 if (rdataset->heap_index != 0) { 1537 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index); 1538 } 1539 rdataset->heap_index = 0; 1540 1541 if (rdataset->noqname != NULL) { 1542 free_noqname(mctx, &rdataset->noqname); 1543 } 1544 if (rdataset->closest != NULL) { 1545 free_noqname(mctx, &rdataset->closest); 1546 } 1547 1548 if (NONEXISTENT(rdataset)) { 1549 size = sizeof(*rdataset); 1550 } else { 1551 size = dns_rdataslab_size((unsigned char *)rdataset, 1552 sizeof(*rdataset)); 1553 } 1554 1555 if (rdataset->is_mmapped == 1) { 1556 return; 1557 } 1558 1559 isc_mem_put(mctx, rdataset, size); 1560 } 1561 1562 static void 1563 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) { 1564 rdatasetheader_t *header, *dcurrent; 1565 bool make_dirty = false; 1566 1567 /* 1568 * Caller must hold the node lock. 1569 */ 1570 1571 /* 1572 * We set the IGNORE attribute on rdatasets with serial number 1573 * 'serial'. When the reference count goes to zero, these rdatasets 1574 * will be cleaned up; until that time, they will be ignored. 1575 */ 1576 for (header = node->data; header != NULL; header = header->next) { 1577 if (header->serial == serial) { 1578 RDATASET_ATTR_SET(header, RDATASET_ATTR_IGNORE); 1579 make_dirty = true; 1580 } 1581 for (dcurrent = header->down; dcurrent != NULL; 1582 dcurrent = dcurrent->down) 1583 { 1584 if (dcurrent->serial == serial) { 1585 RDATASET_ATTR_SET(dcurrent, 1586 RDATASET_ATTR_IGNORE); 1587 make_dirty = true; 1588 } 1589 } 1590 } 1591 if (make_dirty) { 1592 node->dirty = 1; 1593 } 1594 } 1595 1596 static void 1597 mark_header_ancient(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) { 1598 uint_least16_t attributes = atomic_load_acquire(&header->attributes); 1599 uint_least16_t newattributes = 0; 1600 1601 /* 1602 * If we are already ancient there is nothing to do. 1603 */ 1604 do { 1605 if ((attributes & RDATASET_ATTR_ANCIENT) != 0) { 1606 return; 1607 } 1608 newattributes = attributes | RDATASET_ATTR_ANCIENT; 1609 } while (!atomic_compare_exchange_weak_acq_rel( 1610 &header->attributes, &attributes, newattributes)); 1611 1612 /* 1613 * Decrement the stats counter for the appropriate RRtype. 1614 * If the STALE attribute is set, this will decrement the 1615 * stale type counter, otherwise it decrements the active 1616 * stats type counter. 1617 */ 1618 update_rrsetstats(rbtdb, header->type, attributes, false); 1619 header->node->dirty = 1; 1620 1621 /* Increment the stats counter for the ancient RRtype. */ 1622 update_rrsetstats(rbtdb, header->type, newattributes, true); 1623 } 1624 1625 static void 1626 mark_header_stale(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) { 1627 uint_least16_t attributes = atomic_load_acquire(&header->attributes); 1628 uint_least16_t newattributes = 0; 1629 1630 INSIST((attributes & RDATASET_ATTR_ZEROTTL) == 0); 1631 1632 /* 1633 * If we are already stale there is nothing to do. 1634 */ 1635 do { 1636 if ((attributes & RDATASET_ATTR_STALE) != 0) { 1637 return; 1638 } 1639 newattributes = attributes | RDATASET_ATTR_STALE; 1640 } while (!atomic_compare_exchange_weak_acq_rel( 1641 &header->attributes, &attributes, newattributes)); 1642 1643 /* Decrement the stats counter for the appropriate RRtype. 1644 * If the ANCIENT attribute is set (although it is very 1645 * unlikely that an RRset goes from ANCIENT to STALE), this 1646 * will decrement the ancient stale type counter, otherwise it 1647 * decrements the active stats type counter. 1648 */ 1649 1650 update_rrsetstats(rbtdb, header->type, attributes, false); 1651 update_rrsetstats(rbtdb, header->type, newattributes, true); 1652 } 1653 1654 static void 1655 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, 1656 rdatasetheader_t *top) { 1657 rdatasetheader_t *d, *down_next; 1658 1659 for (d = top->down; d != NULL; d = down_next) { 1660 down_next = d->down; 1661 free_rdataset(rbtdb, mctx, d); 1662 } 1663 top->down = NULL; 1664 } 1665 1666 static void 1667 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) { 1668 rdatasetheader_t *current, *top_prev, *top_next; 1669 isc_mem_t *mctx = rbtdb->common.mctx; 1670 1671 /* 1672 * Caller must be holding the node lock. 1673 */ 1674 1675 top_prev = NULL; 1676 for (current = node->data; current != NULL; current = top_next) { 1677 top_next = current->next; 1678 clean_stale_headers(rbtdb, mctx, current); 1679 /* 1680 * If current is nonexistent, ancient, or stale and 1681 * we are not keeping stale, we can clean it up. 1682 */ 1683 if (NONEXISTENT(current) || ANCIENT(current) || 1684 (STALE(current) && !KEEPSTALE(rbtdb))) 1685 { 1686 if (top_prev != NULL) { 1687 top_prev->next = current->next; 1688 } else { 1689 node->data = current->next; 1690 } 1691 free_rdataset(rbtdb, mctx, current); 1692 } else { 1693 top_prev = current; 1694 } 1695 } 1696 node->dirty = 0; 1697 } 1698 1699 static void 1700 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 1701 rbtdb_serial_t least_serial) { 1702 rdatasetheader_t *current, *dcurrent, *down_next, *dparent; 1703 rdatasetheader_t *top_prev, *top_next; 1704 isc_mem_t *mctx = rbtdb->common.mctx; 1705 bool still_dirty = false; 1706 1707 /* 1708 * Caller must be holding the node lock. 1709 */ 1710 REQUIRE(least_serial != 0); 1711 1712 top_prev = NULL; 1713 for (current = node->data; current != NULL; current = top_next) { 1714 top_next = current->next; 1715 1716 /* 1717 * First, we clean up any instances of multiple rdatasets 1718 * with the same serial number, or that have the IGNORE 1719 * attribute. 1720 */ 1721 dparent = current; 1722 for (dcurrent = current->down; dcurrent != NULL; 1723 dcurrent = down_next) 1724 { 1725 down_next = dcurrent->down; 1726 INSIST(dcurrent->serial <= dparent->serial); 1727 if (dcurrent->serial == dparent->serial || 1728 IGNORE(dcurrent)) 1729 { 1730 if (down_next != NULL) { 1731 down_next->next = dparent; 1732 } 1733 dparent->down = down_next; 1734 free_rdataset(rbtdb, mctx, dcurrent); 1735 } else { 1736 dparent = dcurrent; 1737 } 1738 } 1739 1740 /* 1741 * We've now eliminated all IGNORE datasets with the possible 1742 * exception of current, which we now check. 1743 */ 1744 if (IGNORE(current)) { 1745 down_next = current->down; 1746 if (down_next == NULL) { 1747 if (top_prev != NULL) { 1748 top_prev->next = current->next; 1749 } else { 1750 node->data = current->next; 1751 } 1752 free_rdataset(rbtdb, mctx, current); 1753 /* 1754 * current no longer exists, so we can 1755 * just continue with the loop. 1756 */ 1757 continue; 1758 } else { 1759 /* 1760 * Pull up current->down, making it the new 1761 * current. 1762 */ 1763 if (top_prev != NULL) { 1764 top_prev->next = down_next; 1765 } else { 1766 node->data = down_next; 1767 } 1768 down_next->next = top_next; 1769 free_rdataset(rbtdb, mctx, current); 1770 current = down_next; 1771 } 1772 } 1773 1774 /* 1775 * We now try to find the first down node less than the 1776 * least serial. 1777 */ 1778 dparent = current; 1779 for (dcurrent = current->down; dcurrent != NULL; 1780 dcurrent = down_next) 1781 { 1782 down_next = dcurrent->down; 1783 if (dcurrent->serial < least_serial) { 1784 break; 1785 } 1786 dparent = dcurrent; 1787 } 1788 1789 /* 1790 * If there is a such an rdataset, delete it and any older 1791 * versions. 1792 */ 1793 if (dcurrent != NULL) { 1794 do { 1795 down_next = dcurrent->down; 1796 INSIST(dcurrent->serial <= least_serial); 1797 free_rdataset(rbtdb, mctx, dcurrent); 1798 dcurrent = down_next; 1799 } while (dcurrent != NULL); 1800 dparent->down = NULL; 1801 } 1802 1803 /* 1804 * Note. The serial number of 'current' might be less than 1805 * least_serial too, but we cannot delete it because it is 1806 * the most recent version, unless it is a NONEXISTENT 1807 * rdataset. 1808 */ 1809 if (current->down != NULL) { 1810 still_dirty = true; 1811 top_prev = current; 1812 } else { 1813 /* 1814 * If this is a NONEXISTENT rdataset, we can delete it. 1815 */ 1816 if (NONEXISTENT(current)) { 1817 if (top_prev != NULL) { 1818 top_prev->next = current->next; 1819 } else { 1820 node->data = current->next; 1821 } 1822 free_rdataset(rbtdb, mctx, current); 1823 } else { 1824 top_prev = current; 1825 } 1826 } 1827 } 1828 if (!still_dirty) { 1829 node->dirty = 0; 1830 } 1831 } 1832 1833 /* 1834 * tree_lock(write) must be held. 1835 */ 1836 static void 1837 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) { 1838 dns_rbtnode_t *nsecnode; 1839 dns_fixedname_t fname; 1840 dns_name_t *name; 1841 isc_result_t result = ISC_R_UNEXPECTED; 1842 1843 INSIST(!ISC_LINK_LINKED(node, deadlink)); 1844 1845 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) { 1846 char printname[DNS_NAME_FORMATSIZE]; 1847 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1848 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 1849 "delete_node(): %p %s (bucket %d)", node, 1850 dns_rbt_formatnodename(node, printname, 1851 sizeof(printname)), 1852 node->locknum); 1853 } 1854 1855 switch (node->nsec) { 1856 case DNS_RBT_NSEC_NORMAL: 1857 /* 1858 * Though this may be wasteful, it has to be done before 1859 * node is deleted. 1860 */ 1861 name = dns_fixedname_initname(&fname); 1862 dns_rbt_fullnamefromnode(node, name); 1863 1864 result = dns_rbt_deletenode(rbtdb->tree, node, false); 1865 break; 1866 case DNS_RBT_NSEC_HAS_NSEC: 1867 name = dns_fixedname_initname(&fname); 1868 dns_rbt_fullnamefromnode(node, name); 1869 /* 1870 * Delete the corresponding node from the auxiliary NSEC 1871 * tree before deleting from the main tree. 1872 */ 1873 nsecnode = NULL; 1874 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode, 1875 NULL, DNS_RBTFIND_EMPTYDATA, NULL, 1876 NULL); 1877 if (result != ISC_R_SUCCESS) { 1878 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1879 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 1880 "delete_node: " 1881 "dns_rbt_findnode(nsec): %s", 1882 isc_result_totext(result)); 1883 } else { 1884 result = dns_rbt_deletenode(rbtdb->nsec, nsecnode, 1885 false); 1886 if (result != ISC_R_SUCCESS) { 1887 isc_log_write( 1888 dns_lctx, DNS_LOGCATEGORY_DATABASE, 1889 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 1890 "delete_node(): " 1891 "dns_rbt_deletenode(nsecnode): %s", 1892 isc_result_totext(result)); 1893 } 1894 } 1895 result = dns_rbt_deletenode(rbtdb->tree, node, false); 1896 break; 1897 case DNS_RBT_NSEC_NSEC: 1898 result = dns_rbt_deletenode(rbtdb->nsec, node, false); 1899 break; 1900 case DNS_RBT_NSEC_NSEC3: 1901 result = dns_rbt_deletenode(rbtdb->nsec3, node, false); 1902 break; 1903 } 1904 if (result != ISC_R_SUCCESS) { 1905 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1906 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 1907 "delete_node(): " 1908 "dns_rbt_deletenode: %s", 1909 isc_result_totext(result)); 1910 } 1911 } 1912 1913 /* 1914 * Caller must be holding the node lock. 1915 */ 1916 static void 1917 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 1918 isc_rwlocktype_t locktype) { 1919 if (locktype == isc_rwlocktype_write && ISC_LINK_LINKED(node, deadlink)) 1920 { 1921 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node, 1922 deadlink); 1923 } 1924 if (isc_refcount_increment0(&node->references) == 0) { 1925 /* this is the first reference to the node */ 1926 isc_refcount_increment0( 1927 &rbtdb->node_locks[node->locknum].references); 1928 } 1929 } 1930 1931 /*% 1932 * The tree lock must be held for the result to be valid. 1933 */ 1934 static bool 1935 is_leaf(dns_rbtnode_t *node) { 1936 return (node->parent != NULL && node->parent->down == node && 1937 node->left == NULL && node->right == NULL); 1938 } 1939 1940 static void 1941 send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 1942 isc_rwlocktype_t locktype) { 1943 isc_event_t *ev; 1944 dns_db_t *db; 1945 1946 ev = isc_event_allocate(rbtdb->common.mctx, NULL, DNS_EVENT_RBTPRUNE, 1947 prune_tree, node, sizeof(isc_event_t)); 1948 new_reference(rbtdb, node, locktype); 1949 db = NULL; 1950 attach((dns_db_t *)rbtdb, &db); 1951 ev->ev_sender = db; 1952 isc_task_send(rbtdb->task, &ev); 1953 } 1954 1955 /*% 1956 * Clean up dead nodes. These are nodes which have no references, and 1957 * have no data. They are dead but we could not or chose not to delete 1958 * them when we deleted all the data at that node because we did not want 1959 * to wait for the tree write lock. 1960 * 1961 * The caller must hold a tree write lock and bucketnum'th node (write) lock. 1962 */ 1963 static void 1964 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) { 1965 dns_rbtnode_t *node; 1966 int count = 10; /* XXXJT: should be adjustable */ 1967 1968 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]); 1969 while (node != NULL && count > 0) { 1970 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink); 1971 1972 /* 1973 * We might have reactivated this node without a tree write 1974 * lock, so we couldn't remove this node from deadnodes then 1975 * and we have to do it now. 1976 */ 1977 if (isc_refcount_current(&node->references) != 0 || 1978 node->data != NULL) 1979 { 1980 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]); 1981 count--; 1982 continue; 1983 } 1984 1985 if (is_leaf(node) && rbtdb->task != NULL) { 1986 send_to_prune_tree(rbtdb, node, isc_rwlocktype_write); 1987 } else if (node->down == NULL && node->data == NULL) { 1988 /* 1989 * Not a interior node and not needing to be 1990 * reactivated. 1991 */ 1992 delete_node(rbtdb, node); 1993 } else if (node->data == NULL) { 1994 /* 1995 * A interior node without data. Leave linked to 1996 * to be cleaned up when node->down becomes NULL. 1997 */ 1998 ISC_LIST_APPEND(rbtdb->deadnodes[bucketnum], node, 1999 deadlink); 2000 } 2001 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]); 2002 count--; 2003 } 2004 } 2005 2006 /* 2007 * This function is assumed to be called when a node is newly referenced 2008 * and can be in the deadnode list. In that case the node must be retrieved 2009 * from the list because it is going to be used. In addition, if the caller 2010 * happens to hold a write lock on the tree, it's a good chance to purge dead 2011 * nodes. 2012 * Note: while a new reference is gained in multiple places, there are only very 2013 * few cases where the node can be in the deadnode list (only empty nodes can 2014 * have been added to the list). 2015 */ 2016 static void 2017 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 2018 isc_rwlocktype_t treelocktype) { 2019 isc_rwlocktype_t locktype = isc_rwlocktype_read; 2020 nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock; 2021 bool maybe_cleanup = false; 2022 2023 POST(locktype); 2024 2025 NODE_LOCK(nodelock, locktype); 2026 2027 /* 2028 * Check if we can possibly cleanup the dead node. If so, upgrade 2029 * the node lock below to perform the cleanup. 2030 */ 2031 if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) && 2032 treelocktype == isc_rwlocktype_write) 2033 { 2034 maybe_cleanup = true; 2035 } 2036 2037 if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) { 2038 /* 2039 * Upgrade the lock and test if we still need to unlink. 2040 */ 2041 NODE_UNLOCK(nodelock, locktype); 2042 locktype = isc_rwlocktype_write; 2043 POST(locktype); 2044 NODE_LOCK(nodelock, locktype); 2045 if (ISC_LINK_LINKED(node, deadlink)) { 2046 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node, 2047 deadlink); 2048 } 2049 if (maybe_cleanup) { 2050 cleanup_dead_nodes(rbtdb, node->locknum); 2051 } 2052 } 2053 2054 new_reference(rbtdb, node, locktype); 2055 2056 NODE_UNLOCK(nodelock, locktype); 2057 } 2058 2059 /* 2060 * Caller must be holding the node lock; either the "strong", read or write 2061 * lock. Note that the lock must be held even when node references are 2062 * atomically modified; in that case the decrement operation itself does not 2063 * have to be protected, but we must avoid a race condition where multiple 2064 * threads are decreasing the reference to zero simultaneously and at least 2065 * one of them is going to free the node. 2066 * 2067 * This function returns true if and only if the node reference decreases 2068 * to zero. 2069 * 2070 * NOTE: Decrementing the reference count of a node to zero does not mean it 2071 * will be immediately freed. 2072 */ 2073 static bool 2074 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 2075 rbtdb_serial_t least_serial, isc_rwlocktype_t nlock, 2076 isc_rwlocktype_t tlock, bool pruning) { 2077 isc_result_t result; 2078 bool write_locked; 2079 bool locked = tlock != isc_rwlocktype_none; 2080 rbtdb_nodelock_t *nodelock; 2081 int bucket = node->locknum; 2082 bool no_reference = true; 2083 uint_fast32_t refs; 2084 2085 nodelock = &rbtdb->node_locks[bucket]; 2086 2087 #define KEEP_NODE(n, r, l) \ 2088 ((n)->data != NULL || ((l) && (n)->down != NULL) || \ 2089 (n) == (r)->origin_node || (n) == (r)->nsec3_origin_node) 2090 2091 /* Handle easy and typical case first. */ 2092 if (!node->dirty && KEEP_NODE(node, rbtdb, locked)) { 2093 if (isc_refcount_decrement(&node->references) == 1) { 2094 refs = isc_refcount_decrement(&nodelock->references); 2095 INSIST(refs > 0); 2096 return (true); 2097 } else { 2098 return (false); 2099 } 2100 } 2101 2102 /* Upgrade the lock? */ 2103 if (nlock == isc_rwlocktype_read) { 2104 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read); 2105 NODE_LOCK(&nodelock->lock, isc_rwlocktype_write); 2106 } 2107 2108 if (isc_refcount_decrement(&node->references) > 1) { 2109 /* Restore the lock? */ 2110 if (nlock == isc_rwlocktype_read) { 2111 NODE_DOWNGRADE(&nodelock->lock); 2112 } 2113 return (false); 2114 } 2115 2116 if (node->dirty) { 2117 if (IS_CACHE(rbtdb)) { 2118 clean_cache_node(rbtdb, node); 2119 } else { 2120 if (least_serial == 0) { 2121 /* 2122 * Caller doesn't know the least serial. 2123 * Get it. 2124 */ 2125 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 2126 least_serial = rbtdb->least_serial; 2127 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 2128 } 2129 clean_zone_node(rbtdb, node, least_serial); 2130 } 2131 } 2132 2133 /* 2134 * Attempt to switch to a write lock on the tree. If this fails, 2135 * we will add this node to a linked list of nodes in this locking 2136 * bucket which we will free later. 2137 */ 2138 if (tlock != isc_rwlocktype_write) { 2139 /* 2140 * Locking hierarchy notwithstanding, we don't need to free 2141 * the node lock before acquiring the tree write lock because 2142 * we only do a trylock. 2143 */ 2144 if (tlock == isc_rwlocktype_read) { 2145 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock); 2146 } else { 2147 result = isc_rwlock_trylock(&rbtdb->tree_lock, 2148 isc_rwlocktype_write); 2149 } 2150 RUNTIME_CHECK(result == ISC_R_SUCCESS || 2151 result == ISC_R_LOCKBUSY); 2152 2153 write_locked = (result == ISC_R_SUCCESS); 2154 } else { 2155 write_locked = true; 2156 } 2157 2158 refs = isc_refcount_decrement(&nodelock->references); 2159 INSIST(refs > 0); 2160 2161 if (KEEP_NODE(node, rbtdb, locked || write_locked)) { 2162 goto restore_locks; 2163 } 2164 2165 #undef KEEP_NODE 2166 2167 if (write_locked) { 2168 /* 2169 * We can now delete the node. 2170 */ 2171 2172 /* 2173 * If this node is the only one in the level it's in, deleting 2174 * this node may recursively make its parent the only node in 2175 * the parent level; if so, and if no one is currently using 2176 * the parent node, this is almost the only opportunity to 2177 * clean it up. But the recursive cleanup is not that trivial 2178 * since the child and parent may be in different lock buckets, 2179 * which would cause a lock order reversal problem. To avoid 2180 * the trouble, we'll dispatch a separate event for batch 2181 * cleaning. We need to check whether we're deleting the node 2182 * as a result of pruning to avoid infinite dispatching. 2183 * Note: pruning happens only when a task has been set for the 2184 * rbtdb. If the user of the rbtdb chooses not to set a task, 2185 * it's their responsibility to purge stale leaves (e.g. by 2186 * periodic walk-through). 2187 */ 2188 if (!pruning && is_leaf(node) && rbtdb->task != NULL) { 2189 send_to_prune_tree(rbtdb, node, isc_rwlocktype_write); 2190 no_reference = false; 2191 } else { 2192 delete_node(rbtdb, node); 2193 } 2194 } else { 2195 INSIST(node->data == NULL); 2196 if (!ISC_LINK_LINKED(node, deadlink)) { 2197 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, 2198 deadlink); 2199 } 2200 } 2201 2202 restore_locks: 2203 /* Restore the lock? */ 2204 if (nlock == isc_rwlocktype_read) { 2205 NODE_DOWNGRADE(&nodelock->lock); 2206 } 2207 2208 /* 2209 * Relock a read lock, or unlock the write lock if no lock was held. 2210 */ 2211 if (tlock == isc_rwlocktype_none) { 2212 if (write_locked) { 2213 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2214 } 2215 } 2216 2217 if (tlock == isc_rwlocktype_read) { 2218 if (write_locked) { 2219 isc_rwlock_downgrade(&rbtdb->tree_lock); 2220 } 2221 } 2222 2223 return (no_reference); 2224 } 2225 2226 /* 2227 * Prune the tree by recursively cleaning-up single leaves. In the worst 2228 * case, the number of iteration is the number of tree levels, which is at 2229 * most the maximum number of domain name labels, i.e, 127. In practice, this 2230 * should be much smaller (only a few times), and even the worst case would be 2231 * acceptable for a single event. 2232 */ 2233 static void 2234 prune_tree(isc_task_t *task, isc_event_t *event) { 2235 dns_rbtdb_t *rbtdb = event->ev_sender; 2236 dns_rbtnode_t *node = event->ev_arg; 2237 dns_rbtnode_t *parent; 2238 unsigned int locknum; 2239 2240 UNUSED(task); 2241 2242 isc_event_free(&event); 2243 2244 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2245 locknum = node->locknum; 2246 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); 2247 do { 2248 parent = node->parent; 2249 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write, 2250 isc_rwlocktype_write, true); 2251 2252 if (parent != NULL && parent->down == NULL) { 2253 /* 2254 * node was the only down child of the parent and has 2255 * just been removed. We'll then need to examine the 2256 * parent. Keep the lock if possible; otherwise, 2257 * release the old lock and acquire one for the parent. 2258 */ 2259 if (parent->locknum != locknum) { 2260 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, 2261 isc_rwlocktype_write); 2262 locknum = parent->locknum; 2263 NODE_LOCK(&rbtdb->node_locks[locknum].lock, 2264 isc_rwlocktype_write); 2265 } 2266 2267 /* 2268 * We need to gain a reference to the node before 2269 * decrementing it in the next iteration. 2270 */ 2271 if (ISC_LINK_LINKED(parent, deadlink)) { 2272 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum], 2273 parent, deadlink); 2274 } 2275 new_reference(rbtdb, parent, isc_rwlocktype_write); 2276 } else { 2277 parent = NULL; 2278 } 2279 2280 node = parent; 2281 } while (node != NULL); 2282 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); 2283 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2284 2285 detach((dns_db_t **)(void *)&rbtdb); 2286 } 2287 2288 static void 2289 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version, 2290 rbtdb_changedlist_t *cleanup_list) { 2291 /* 2292 * Caller must be holding the database lock. 2293 */ 2294 2295 rbtdb->least_serial = version->serial; 2296 *cleanup_list = version->changed_list; 2297 ISC_LIST_INIT(version->changed_list); 2298 } 2299 2300 static void 2301 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) { 2302 rbtdb_changed_t *changed, *next_changed; 2303 2304 /* 2305 * If the changed record is dirty, then 2306 * an update created multiple versions of 2307 * a given rdataset. We keep this list 2308 * until we're the least open version, at 2309 * which point it's safe to get rid of any 2310 * older versions. 2311 * 2312 * If the changed record isn't dirty, then 2313 * we don't need it anymore since we're 2314 * committing and not rolling back. 2315 * 2316 * The caller must be holding the database lock. 2317 */ 2318 for (changed = HEAD(version->changed_list); changed != NULL; 2319 changed = next_changed) 2320 { 2321 next_changed = NEXT(changed, link); 2322 if (!changed->dirty) { 2323 UNLINK(version->changed_list, changed, link); 2324 APPEND(*cleanup_list, changed, link); 2325 } 2326 } 2327 } 2328 2329 static void 2330 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) { 2331 dns_rdataset_t keyset; 2332 dns_rdataset_t nsecset, signsecset; 2333 bool haszonekey = false; 2334 bool hasnsec = false; 2335 isc_result_t result; 2336 2337 dns_rdataset_init(&keyset); 2338 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey, 2339 0, 0, &keyset, NULL); 2340 if (result == ISC_R_SUCCESS) { 2341 result = dns_rdataset_first(&keyset); 2342 while (result == ISC_R_SUCCESS) { 2343 dns_rdata_t keyrdata = DNS_RDATA_INIT; 2344 dns_rdataset_current(&keyset, &keyrdata); 2345 if (dns_zonekey_iszonekey(&keyrdata)) { 2346 haszonekey = true; 2347 break; 2348 } 2349 result = dns_rdataset_next(&keyset); 2350 } 2351 dns_rdataset_disassociate(&keyset); 2352 } 2353 if (!haszonekey) { 2354 version->secure = dns_db_insecure; 2355 version->havensec3 = false; 2356 return; 2357 } 2358 2359 dns_rdataset_init(&nsecset); 2360 dns_rdataset_init(&signsecset); 2361 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec, 0, 2362 0, &nsecset, &signsecset); 2363 if (result == ISC_R_SUCCESS) { 2364 if (dns_rdataset_isassociated(&signsecset)) { 2365 hasnsec = true; 2366 dns_rdataset_disassociate(&signsecset); 2367 } 2368 dns_rdataset_disassociate(&nsecset); 2369 } 2370 2371 setnsec3parameters(db, version); 2372 2373 /* 2374 * Do we have a valid NSEC/NSEC3 chain? 2375 */ 2376 if (version->havensec3 || hasnsec) { 2377 version->secure = dns_db_secure; 2378 } else { 2379 version->secure = dns_db_insecure; 2380 } 2381 } 2382 2383 /*%< 2384 * Walk the origin node looking for NSEC3PARAM records. 2385 * Cache the nsec3 parameters. 2386 */ 2387 static void 2388 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) { 2389 dns_rbtnode_t *node; 2390 dns_rdata_nsec3param_t nsec3param; 2391 dns_rdata_t rdata = DNS_RDATA_INIT; 2392 isc_region_t region; 2393 isc_result_t result; 2394 rdatasetheader_t *header, *header_next; 2395 unsigned char *raw; /* RDATASLAB */ 2396 unsigned int count, length; 2397 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 2398 2399 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 2400 version->havensec3 = false; 2401 node = rbtdb->origin_node; 2402 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock), 2403 isc_rwlocktype_read); 2404 for (header = node->data; header != NULL; header = header_next) { 2405 header_next = header->next; 2406 do { 2407 if (header->serial <= version->serial && 2408 !IGNORE(header)) 2409 { 2410 if (NONEXISTENT(header)) { 2411 header = NULL; 2412 } 2413 break; 2414 } else { 2415 header = header->down; 2416 } 2417 } while (header != NULL); 2418 2419 if (header != NULL && 2420 (header->type == dns_rdatatype_nsec3param)) 2421 { 2422 /* 2423 * Find A NSEC3PARAM with a supported algorithm. 2424 */ 2425 raw = (unsigned char *)header + sizeof(*header); 2426 count = raw[0] * 256 + raw[1]; /* count */ 2427 raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH; 2428 while (count-- > 0U) { 2429 length = raw[0] * 256 + raw[1]; 2430 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; 2431 region.base = raw; 2432 region.length = length; 2433 raw += length; 2434 dns_rdata_fromregion( 2435 &rdata, rbtdb->common.rdclass, 2436 dns_rdatatype_nsec3param, ®ion); 2437 result = dns_rdata_tostruct(&rdata, &nsec3param, 2438 NULL); 2439 INSIST(result == ISC_R_SUCCESS); 2440 dns_rdata_reset(&rdata); 2441 2442 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG && 2443 !dns_nsec3_supportedhash(nsec3param.hash)) 2444 { 2445 continue; 2446 } 2447 2448 if (nsec3param.flags != 0) { 2449 continue; 2450 } 2451 2452 memmove(version->salt, nsec3param.salt, 2453 nsec3param.salt_length); 2454 version->hash = nsec3param.hash; 2455 version->salt_length = nsec3param.salt_length; 2456 version->iterations = nsec3param.iterations; 2457 version->flags = nsec3param.flags; 2458 version->havensec3 = true; 2459 /* 2460 * Look for a better algorithm than the 2461 * unknown test algorithm. 2462 */ 2463 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG) { 2464 goto unlock; 2465 } 2466 } 2467 } 2468 } 2469 unlock: 2470 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), 2471 isc_rwlocktype_read); 2472 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 2473 } 2474 2475 static void 2476 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) { 2477 dns_rbtdb_t *rbtdb = event->ev_arg; 2478 bool again = false; 2479 unsigned int locknum; 2480 2481 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2482 for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) { 2483 NODE_LOCK(&rbtdb->node_locks[locknum].lock, 2484 isc_rwlocktype_write); 2485 cleanup_dead_nodes(rbtdb, locknum); 2486 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL) { 2487 again = true; 2488 } 2489 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, 2490 isc_rwlocktype_write); 2491 } 2492 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2493 if (again) { 2494 isc_task_send(task, &event); 2495 } else { 2496 isc_event_free(&event); 2497 if (isc_refcount_decrement(&rbtdb->references) == 1) { 2498 (void)isc_refcount_current(&rbtdb->references); 2499 maybe_free_rbtdb(rbtdb); 2500 } 2501 } 2502 } 2503 2504 static void 2505 closeversion(dns_db_t *db, dns_dbversion_t **versionp, bool commit) { 2506 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 2507 rbtdb_version_t *version, *cleanup_version, *least_greater; 2508 bool rollback = false; 2509 rbtdb_changedlist_t cleanup_list; 2510 rdatasetheaderlist_t resigned_list; 2511 rbtdb_changed_t *changed, *next_changed; 2512 rbtdb_serial_t serial, least_serial; 2513 dns_rbtnode_t *rbtnode; 2514 rdatasetheader_t *header; 2515 2516 REQUIRE(VALID_RBTDB(rbtdb)); 2517 version = (rbtdb_version_t *)*versionp; 2518 INSIST(version->rbtdb == rbtdb); 2519 2520 cleanup_version = NULL; 2521 ISC_LIST_INIT(cleanup_list); 2522 ISC_LIST_INIT(resigned_list); 2523 2524 if (isc_refcount_decrement(&version->references) > 1) { 2525 /* typical and easy case first */ 2526 if (commit) { 2527 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 2528 INSIST(!version->writer); 2529 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 2530 } 2531 goto end; 2532 } 2533 2534 /* 2535 * Update the zone's secure status in version before making 2536 * it the current version. 2537 */ 2538 if (version->writer && commit && !IS_CACHE(rbtdb)) { 2539 iszonesecure(db, version, rbtdb->origin_node); 2540 } 2541 2542 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 2543 serial = version->serial; 2544 if (version->writer) { 2545 if (commit) { 2546 unsigned cur_ref; 2547 rbtdb_version_t *cur_version; 2548 2549 INSIST(version->commit_ok); 2550 INSIST(version == rbtdb->future_version); 2551 /* 2552 * The current version is going to be replaced. 2553 * Release the (likely last) reference to it from the 2554 * DB itself and unlink it from the open list. 2555 */ 2556 cur_version = rbtdb->current_version; 2557 cur_ref = isc_refcount_decrement( 2558 &cur_version->references); 2559 if (cur_ref == 1) { 2560 (void)isc_refcount_current( 2561 &cur_version->references); 2562 if (cur_version->serial == rbtdb->least_serial) 2563 { 2564 INSIST(EMPTY( 2565 cur_version->changed_list)); 2566 } 2567 UNLINK(rbtdb->open_versions, cur_version, link); 2568 } 2569 if (EMPTY(rbtdb->open_versions)) { 2570 /* 2571 * We're going to become the least open 2572 * version. 2573 */ 2574 make_least_version(rbtdb, version, 2575 &cleanup_list); 2576 } else { 2577 /* 2578 * Some other open version is the 2579 * least version. We can't cleanup 2580 * records that were changed in this 2581 * version because the older versions 2582 * may still be in use by an open 2583 * version. 2584 * 2585 * We can, however, discard the 2586 * changed records for things that 2587 * we've added that didn't exist in 2588 * prior versions. 2589 */ 2590 cleanup_nondirty(version, &cleanup_list); 2591 } 2592 /* 2593 * If the (soon to be former) current version 2594 * isn't being used by anyone, we can clean 2595 * it up. 2596 */ 2597 if (cur_ref == 1) { 2598 cleanup_version = cur_version; 2599 APPENDLIST(version->changed_list, 2600 cleanup_version->changed_list, link); 2601 } 2602 /* 2603 * Become the current version. 2604 */ 2605 version->writer = false; 2606 rbtdb->current_version = version; 2607 rbtdb->current_serial = version->serial; 2608 rbtdb->future_version = NULL; 2609 2610 /* 2611 * Keep the current version in the open list, and 2612 * gain a reference for the DB itself (see the DB 2613 * creation function below). This must be the only 2614 * case where we need to increment the counter from 2615 * zero and need to use isc_refcount_increment0(). 2616 */ 2617 INSIST(isc_refcount_increment0(&version->references) == 2618 0); 2619 PREPEND(rbtdb->open_versions, rbtdb->current_version, 2620 link); 2621 resigned_list = version->resigned_list; 2622 ISC_LIST_INIT(version->resigned_list); 2623 } else { 2624 /* 2625 * We're rolling back this transaction. 2626 */ 2627 cleanup_list = version->changed_list; 2628 ISC_LIST_INIT(version->changed_list); 2629 resigned_list = version->resigned_list; 2630 ISC_LIST_INIT(version->resigned_list); 2631 rollback = true; 2632 cleanup_version = version; 2633 rbtdb->future_version = NULL; 2634 } 2635 } else { 2636 if (version != rbtdb->current_version) { 2637 /* 2638 * There are no external or internal references 2639 * to this version and it can be cleaned up. 2640 */ 2641 cleanup_version = version; 2642 2643 /* 2644 * Find the version with the least serial 2645 * number greater than ours. 2646 */ 2647 least_greater = PREV(version, link); 2648 if (least_greater == NULL) { 2649 least_greater = rbtdb->current_version; 2650 } 2651 2652 INSIST(version->serial < least_greater->serial); 2653 /* 2654 * Is this the least open version? 2655 */ 2656 if (version->serial == rbtdb->least_serial) { 2657 /* 2658 * Yes. Install the new least open 2659 * version. 2660 */ 2661 make_least_version(rbtdb, least_greater, 2662 &cleanup_list); 2663 } else { 2664 /* 2665 * Add any unexecuted cleanups to 2666 * those of the least greater version. 2667 */ 2668 APPENDLIST(least_greater->changed_list, 2669 version->changed_list, link); 2670 } 2671 } else if (version->serial == rbtdb->least_serial) { 2672 INSIST(EMPTY(version->changed_list)); 2673 } 2674 UNLINK(rbtdb->open_versions, version, link); 2675 } 2676 least_serial = rbtdb->least_serial; 2677 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 2678 2679 if (cleanup_version != NULL) { 2680 INSIST(EMPTY(cleanup_version->changed_list)); 2681 free_gluetable(cleanup_version); 2682 isc_rwlock_destroy(&cleanup_version->glue_rwlock); 2683 isc_rwlock_destroy(&cleanup_version->rwlock); 2684 isc_mem_put(rbtdb->common.mctx, cleanup_version, 2685 sizeof(*cleanup_version)); 2686 } 2687 2688 /* 2689 * Commit/rollback re-signed headers. 2690 */ 2691 for (header = HEAD(resigned_list); header != NULL; 2692 header = HEAD(resigned_list)) 2693 { 2694 nodelock_t *lock; 2695 2696 ISC_LIST_UNLINK(resigned_list, header, link); 2697 2698 lock = &rbtdb->node_locks[header->node->locknum].lock; 2699 NODE_LOCK(lock, isc_rwlocktype_write); 2700 if (rollback && !IGNORE(header)) { 2701 resign_insert(rbtdb, header->node->locknum, header); 2702 } 2703 decrement_reference(rbtdb, header->node, least_serial, 2704 isc_rwlocktype_write, isc_rwlocktype_none, 2705 false); 2706 NODE_UNLOCK(lock, isc_rwlocktype_write); 2707 } 2708 2709 if (!EMPTY(cleanup_list)) { 2710 isc_event_t *event = NULL; 2711 isc_rwlocktype_t tlock = isc_rwlocktype_none; 2712 2713 if (rbtdb->task != NULL) { 2714 event = isc_event_allocate(rbtdb->common.mctx, NULL, 2715 DNS_EVENT_RBTDEADNODES, 2716 cleanup_dead_nodes_callback, 2717 rbtdb, sizeof(isc_event_t)); 2718 } 2719 if (event == NULL) { 2720 /* 2721 * We acquire a tree write lock here in order to make 2722 * sure that stale nodes will be removed in 2723 * decrement_reference(). If we didn't have the lock, 2724 * those nodes could miss the chance to be removed 2725 * until the server stops. The write lock is 2726 * expensive, but this event should be rare enough 2727 * to justify the cost. 2728 */ 2729 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2730 tlock = isc_rwlocktype_write; 2731 } 2732 2733 for (changed = HEAD(cleanup_list); changed != NULL; 2734 changed = next_changed) 2735 { 2736 nodelock_t *lock; 2737 2738 next_changed = NEXT(changed, link); 2739 rbtnode = changed->node; 2740 lock = &rbtdb->node_locks[rbtnode->locknum].lock; 2741 2742 NODE_LOCK(lock, isc_rwlocktype_write); 2743 /* 2744 * This is a good opportunity to purge any dead nodes, 2745 * so use it. 2746 */ 2747 if (event == NULL) { 2748 cleanup_dead_nodes(rbtdb, rbtnode->locknum); 2749 } 2750 2751 if (rollback) { 2752 rollback_node(rbtnode, serial); 2753 } 2754 decrement_reference(rbtdb, rbtnode, least_serial, 2755 isc_rwlocktype_write, tlock, false); 2756 2757 NODE_UNLOCK(lock, isc_rwlocktype_write); 2758 2759 isc_mem_put(rbtdb->common.mctx, changed, 2760 sizeof(*changed)); 2761 } 2762 if (event != NULL) { 2763 isc_refcount_increment(&rbtdb->references); 2764 isc_task_send(rbtdb->task, &event); 2765 } else { 2766 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 2767 } 2768 } 2769 2770 end: 2771 *versionp = NULL; 2772 } 2773 2774 /* 2775 * Add the necessary magic for the wildcard name 'name' 2776 * to be found in 'rbtdb'. 2777 * 2778 * In order for wildcard matching to work correctly in 2779 * zone_find(), we must ensure that a node for the wildcarding 2780 * level exists in the database, and has its 'find_callback' 2781 * and 'wild' bits set. 2782 * 2783 * E.g. if the wildcard name is "*.sub.example." then we 2784 * must ensure that "sub.example." exists and is marked as 2785 * a wildcard level. 2786 * 2787 * tree_lock(write) must be held. 2788 */ 2789 static isc_result_t 2790 add_wildcard_magic(dns_rbtdb_t *rbtdb, const dns_name_t *name, bool lock) { 2791 isc_result_t result; 2792 dns_name_t foundname; 2793 dns_offsets_t offsets; 2794 unsigned int n; 2795 dns_rbtnode_t *node = NULL; 2796 2797 dns_name_init(&foundname, offsets); 2798 n = dns_name_countlabels(name); 2799 INSIST(n >= 2); 2800 n--; 2801 dns_name_getlabelsequence(name, 1, n, &foundname); 2802 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node); 2803 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) { 2804 return (result); 2805 } 2806 if (result == ISC_R_SUCCESS) { 2807 node->nsec = DNS_RBT_NSEC_NORMAL; 2808 } 2809 node->find_callback = 1; 2810 if (lock) { 2811 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock, 2812 isc_rwlocktype_write); 2813 } 2814 node->wild = 1; 2815 if (lock) { 2816 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock, 2817 isc_rwlocktype_write); 2818 } 2819 return (ISC_R_SUCCESS); 2820 } 2821 2822 /* 2823 * tree_lock(write) must be held. 2824 */ 2825 static isc_result_t 2826 add_empty_wildcards(dns_rbtdb_t *rbtdb, const dns_name_t *name, bool lock) { 2827 isc_result_t result; 2828 dns_name_t foundname; 2829 dns_offsets_t offsets; 2830 unsigned int n, l, i; 2831 2832 dns_name_init(&foundname, offsets); 2833 n = dns_name_countlabels(name); 2834 l = dns_name_countlabels(&rbtdb->common.origin); 2835 i = l + 1; 2836 while (i < n) { 2837 dns_rbtnode_t *node = NULL; /* dummy */ 2838 dns_name_getlabelsequence(name, n - i, i, &foundname); 2839 if (dns_name_iswildcard(&foundname)) { 2840 result = add_wildcard_magic(rbtdb, &foundname, lock); 2841 if (result != ISC_R_SUCCESS) { 2842 return (result); 2843 } 2844 result = dns_rbt_addnode(rbtdb->tree, &foundname, 2845 &node); 2846 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) { 2847 return (result); 2848 } 2849 if (result == ISC_R_SUCCESS) { 2850 node->nsec = DNS_RBT_NSEC_NORMAL; 2851 } 2852 } 2853 i++; 2854 } 2855 return (ISC_R_SUCCESS); 2856 } 2857 2858 static isc_result_t 2859 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, const dns_name_t *name, 2860 bool create, dns_dbnode_t **nodep) { 2861 dns_rbtnode_t *node = NULL; 2862 dns_name_t nodename; 2863 isc_result_t result; 2864 isc_rwlocktype_t locktype = isc_rwlocktype_read; 2865 2866 INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3); 2867 2868 dns_name_init(&nodename, NULL); 2869 RWLOCK(&rbtdb->tree_lock, locktype); 2870 result = dns_rbt_findnode(tree, name, NULL, &node, NULL, 2871 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 2872 if (result != ISC_R_SUCCESS) { 2873 RWUNLOCK(&rbtdb->tree_lock, locktype); 2874 if (!create) { 2875 if (result == DNS_R_PARTIALMATCH) { 2876 result = ISC_R_NOTFOUND; 2877 } 2878 return (result); 2879 } 2880 /* 2881 * It would be nice to try to upgrade the lock instead of 2882 * unlocking then relocking. 2883 */ 2884 locktype = isc_rwlocktype_write; 2885 RWLOCK(&rbtdb->tree_lock, locktype); 2886 node = NULL; 2887 result = dns_rbt_addnode(tree, name, &node); 2888 if (result == ISC_R_SUCCESS) { 2889 dns_rbt_namefromnode(node, &nodename); 2890 node->locknum = node->hashval % rbtdb->node_lock_count; 2891 if (tree == rbtdb->tree) { 2892 add_empty_wildcards(rbtdb, name, true); 2893 2894 if (dns_name_iswildcard(name)) { 2895 result = add_wildcard_magic(rbtdb, name, 2896 true); 2897 if (result != ISC_R_SUCCESS) { 2898 RWUNLOCK(&rbtdb->tree_lock, 2899 locktype); 2900 return (result); 2901 } 2902 } 2903 } 2904 if (tree == rbtdb->nsec3) { 2905 node->nsec = DNS_RBT_NSEC_NSEC3; 2906 } 2907 } else if (result != ISC_R_EXISTS) { 2908 RWUNLOCK(&rbtdb->tree_lock, locktype); 2909 return (result); 2910 } 2911 } 2912 2913 if (tree == rbtdb->nsec3) { 2914 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3); 2915 } 2916 2917 reactivate_node(rbtdb, node, locktype); 2918 2919 RWUNLOCK(&rbtdb->tree_lock, locktype); 2920 2921 *nodep = (dns_dbnode_t *)node; 2922 2923 return (ISC_R_SUCCESS); 2924 } 2925 2926 static isc_result_t 2927 findnode(dns_db_t *db, const dns_name_t *name, bool create, 2928 dns_dbnode_t **nodep) { 2929 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 2930 2931 REQUIRE(VALID_RBTDB(rbtdb)); 2932 2933 return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep)); 2934 } 2935 2936 static isc_result_t 2937 findnsec3node(dns_db_t *db, const dns_name_t *name, bool create, 2938 dns_dbnode_t **nodep) { 2939 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 2940 2941 REQUIRE(VALID_RBTDB(rbtdb)); 2942 2943 return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep)); 2944 } 2945 2946 static isc_result_t 2947 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) { 2948 rbtdb_search_t *search = arg; 2949 rdatasetheader_t *header, *header_next; 2950 rdatasetheader_t *dname_header, *sigdname_header, *ns_header; 2951 rdatasetheader_t *found; 2952 isc_result_t result; 2953 dns_rbtnode_t *onode; 2954 2955 /* 2956 * We only want to remember the topmost zone cut, since it's the one 2957 * that counts, so we'll just continue if we've already found a 2958 * zonecut. 2959 */ 2960 if (search->zonecut != NULL) { 2961 return (DNS_R_CONTINUE); 2962 } 2963 2964 found = NULL; 2965 result = DNS_R_CONTINUE; 2966 onode = search->rbtdb->origin_node; 2967 2968 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock), 2969 isc_rwlocktype_read); 2970 2971 /* 2972 * Look for an NS or DNAME rdataset active in our version. 2973 */ 2974 ns_header = NULL; 2975 dname_header = NULL; 2976 sigdname_header = NULL; 2977 for (header = node->data; header != NULL; header = header_next) { 2978 header_next = header->next; 2979 if (header->type == dns_rdatatype_ns || 2980 header->type == dns_rdatatype_dname || 2981 header->type == RBTDB_RDATATYPE_SIGDNAME) 2982 { 2983 do { 2984 if (header->serial <= search->serial && 2985 !IGNORE(header)) 2986 { 2987 /* 2988 * Is this a "this rdataset doesn't 2989 * exist" record? 2990 */ 2991 if (NONEXISTENT(header)) { 2992 header = NULL; 2993 } 2994 break; 2995 } else { 2996 header = header->down; 2997 } 2998 } while (header != NULL); 2999 if (header != NULL) { 3000 if (header->type == dns_rdatatype_dname) { 3001 dname_header = header; 3002 } else if (header->type == 3003 RBTDB_RDATATYPE_SIGDNAME) 3004 { 3005 sigdname_header = header; 3006 } else if (node != onode || 3007 IS_STUB(search->rbtdb)) 3008 { 3009 /* 3010 * We've found an NS rdataset that 3011 * isn't at the origin node. We check 3012 * that they're not at the origin node, 3013 * because otherwise we'd erroneously 3014 * treat the zone top as if it were 3015 * a delegation. 3016 */ 3017 ns_header = header; 3018 } 3019 } 3020 } 3021 } 3022 3023 /* 3024 * Did we find anything? 3025 */ 3026 if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) && 3027 ns_header != NULL) 3028 { 3029 /* 3030 * Note that NS has precedence over DNAME if both exist 3031 * in a zone. Otherwise DNAME take precedence over NS. 3032 */ 3033 found = ns_header; 3034 search->zonecut_sigrdataset = NULL; 3035 } else if (dname_header != NULL) { 3036 found = dname_header; 3037 search->zonecut_sigrdataset = sigdname_header; 3038 } else if (ns_header != NULL) { 3039 found = ns_header; 3040 search->zonecut_sigrdataset = NULL; 3041 } 3042 3043 if (found != NULL) { 3044 /* 3045 * We increment the reference count on node to ensure that 3046 * search->zonecut_rdataset will still be valid later. 3047 */ 3048 new_reference(search->rbtdb, node, isc_rwlocktype_read); 3049 search->zonecut = node; 3050 search->zonecut_rdataset = found; 3051 search->need_cleanup = true; 3052 /* 3053 * Since we've found a zonecut, anything beneath it is 3054 * glue and is not subject to wildcard matching, so we 3055 * may clear search->wild. 3056 */ 3057 search->wild = false; 3058 if ((search->options & DNS_DBFIND_GLUEOK) == 0) { 3059 /* 3060 * If the caller does not want to find glue, then 3061 * this is the best answer and the search should 3062 * stop now. 3063 */ 3064 result = DNS_R_PARTIALMATCH; 3065 } else { 3066 dns_name_t *zcname; 3067 3068 /* 3069 * The search will continue beneath the zone cut. 3070 * This may or may not be the best match. In case it 3071 * is, we need to remember the node name. 3072 */ 3073 zcname = dns_fixedname_name(&search->zonecut_name); 3074 dns_name_copynf(name, zcname); 3075 search->copy_name = true; 3076 } 3077 } else { 3078 /* 3079 * There is no zonecut at this node which is active in this 3080 * version. 3081 * 3082 * If this is a "wild" node and the caller hasn't disabled 3083 * wildcard matching, remember that we've seen a wild node 3084 * in case we need to go searching for wildcard matches 3085 * later on. 3086 */ 3087 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0) { 3088 search->wild = true; 3089 } 3090 } 3091 3092 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), 3093 isc_rwlocktype_read); 3094 3095 return (result); 3096 } 3097 3098 static void 3099 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, rdatasetheader_t *header, 3100 isc_stdtime_t now, isc_rwlocktype_t locktype, 3101 dns_rdataset_t *rdataset) { 3102 unsigned char *raw; /* RDATASLAB */ 3103 bool stale = STALE(header); 3104 bool ancient = ANCIENT(header); 3105 3106 /* 3107 * Caller must be holding the node reader lock. 3108 * XXXJT: technically, we need a writer lock, since we'll increment 3109 * the header count below. However, since the actual counter value 3110 * doesn't matter, we prioritize performance here. (We may want to 3111 * use atomic increment when available). 3112 */ 3113 3114 if (rdataset == NULL) { 3115 return; 3116 } 3117 3118 new_reference(rbtdb, node, locktype); 3119 3120 INSIST(rdataset->methods == NULL); /* We must be disassociated. */ 3121 3122 /* 3123 * Mark header stale or ancient if the RRset is no longer active. 3124 */ 3125 if (!ACTIVE(header, now)) { 3126 dns_ttl_t stale_ttl = header->rdh_ttl + rbtdb->serve_stale_ttl; 3127 /* 3128 * If this data is in the stale window keep it and if 3129 * DNS_DBFIND_STALEOK is not set we tell the caller to 3130 * skip this record. We skip the records with ZEROTTL 3131 * (these records should not be cached anyway). 3132 */ 3133 3134 if (KEEPSTALE(rbtdb) && stale_ttl > now) { 3135 stale = true; 3136 } else { 3137 /* 3138 * We are not keeping stale, or it is outside the 3139 * stale window. Mark ancient, i.e. ready for cleanup. 3140 */ 3141 ancient = true; 3142 } 3143 } 3144 3145 rdataset->methods = &rdataset_methods; 3146 rdataset->rdclass = rbtdb->common.rdclass; 3147 rdataset->type = RBTDB_RDATATYPE_BASE(header->type); 3148 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type); 3149 rdataset->ttl = header->rdh_ttl - now; 3150 rdataset->trust = header->trust; 3151 3152 if (NEGATIVE(header)) { 3153 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE; 3154 } 3155 if (NXDOMAIN(header)) { 3156 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN; 3157 } 3158 if (OPTOUT(header)) { 3159 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT; 3160 } 3161 if (PREFETCH(header)) { 3162 rdataset->attributes |= DNS_RDATASETATTR_PREFETCH; 3163 } 3164 3165 if (stale && !ancient) { 3166 dns_ttl_t stale_ttl = header->rdh_ttl + rbtdb->serve_stale_ttl; 3167 if (stale_ttl > now) { 3168 rdataset->ttl = stale_ttl - now; 3169 } else { 3170 rdataset->ttl = 0; 3171 } 3172 if (STALE_WINDOW(header)) { 3173 rdataset->attributes |= DNS_RDATASETATTR_STALE_WINDOW; 3174 } 3175 rdataset->attributes |= DNS_RDATASETATTR_STALE; 3176 } else if (IS_CACHE(rbtdb) && !ACTIVE(header, now)) { 3177 rdataset->attributes |= DNS_RDATASETATTR_ANCIENT; 3178 rdataset->ttl = header->rdh_ttl; 3179 } 3180 3181 rdataset->private1 = rbtdb; 3182 rdataset->private2 = node; 3183 raw = (unsigned char *)header + sizeof(*header); 3184 rdataset->private3 = raw; 3185 rdataset->count = atomic_fetch_add_relaxed(&header->count, 1); 3186 if (rdataset->count == UINT32_MAX) { 3187 rdataset->count = 0; 3188 } 3189 3190 /* 3191 * Reset iterator state. 3192 */ 3193 rdataset->privateuint4 = 0; 3194 rdataset->private5 = NULL; 3195 3196 /* 3197 * Add noqname proof. 3198 */ 3199 rdataset->private6 = header->noqname; 3200 if (rdataset->private6 != NULL) { 3201 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME; 3202 } 3203 rdataset->private7 = header->closest; 3204 if (rdataset->private7 != NULL) { 3205 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST; 3206 } 3207 3208 /* 3209 * Copy out re-signing information. 3210 */ 3211 if (RESIGN(header)) { 3212 rdataset->attributes |= DNS_RDATASETATTR_RESIGN; 3213 rdataset->resign = (header->resign << 1) | header->resign_lsb; 3214 } else { 3215 rdataset->resign = 0; 3216 } 3217 } 3218 3219 static isc_result_t 3220 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep, 3221 dns_name_t *foundname, dns_rdataset_t *rdataset, 3222 dns_rdataset_t *sigrdataset) { 3223 dns_name_t *zcname; 3224 rbtdb_rdatatype_t type; 3225 dns_rbtnode_t *node; 3226 3227 /* 3228 * The caller MUST NOT be holding any node locks. 3229 */ 3230 3231 node = search->zonecut; 3232 type = search->zonecut_rdataset->type; 3233 3234 /* 3235 * If we have to set foundname, we do it before anything else. 3236 * If we were to set foundname after we had set nodep or bound the 3237 * rdataset, then we'd have to undo that work if dns_name_copy() 3238 * failed. By setting foundname first, there's nothing to undo if 3239 * we have trouble. 3240 */ 3241 if (foundname != NULL && search->copy_name) { 3242 zcname = dns_fixedname_name(&search->zonecut_name); 3243 dns_name_copynf(zcname, foundname); 3244 } 3245 if (nodep != NULL) { 3246 /* 3247 * Note that we don't have to increment the node's reference 3248 * count here because we're going to use the reference we 3249 * already have in the search block. 3250 */ 3251 *nodep = node; 3252 search->need_cleanup = false; 3253 } 3254 if (rdataset != NULL) { 3255 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock), 3256 isc_rwlocktype_read); 3257 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset, 3258 search->now, isc_rwlocktype_read, rdataset); 3259 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL) 3260 { 3261 bind_rdataset(search->rbtdb, node, 3262 search->zonecut_sigrdataset, search->now, 3263 isc_rwlocktype_read, sigrdataset); 3264 } 3265 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), 3266 isc_rwlocktype_read); 3267 } 3268 3269 if (type == dns_rdatatype_dname) { 3270 return (DNS_R_DNAME); 3271 } 3272 return (DNS_R_DELEGATION); 3273 } 3274 3275 static bool 3276 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type, 3277 dns_rbtnode_t *node) { 3278 unsigned char *raw; /* RDATASLAB */ 3279 unsigned int count, size; 3280 dns_name_t ns_name; 3281 bool valid = false; 3282 dns_offsets_t offsets; 3283 isc_region_t region; 3284 rdatasetheader_t *header; 3285 3286 /* 3287 * No additional locking is required. 3288 */ 3289 3290 /* 3291 * Valid glue types are A, AAAA, A6. NS is also a valid glue type 3292 * if it occurs at a zone cut, but is not valid below it. 3293 */ 3294 if (type == dns_rdatatype_ns) { 3295 if (node != search->zonecut) { 3296 return (false); 3297 } 3298 } else if (type != dns_rdatatype_a && type != dns_rdatatype_aaaa && 3299 type != dns_rdatatype_a6) 3300 { 3301 return (false); 3302 } 3303 3304 header = search->zonecut_rdataset; 3305 raw = (unsigned char *)header + sizeof(*header); 3306 count = raw[0] * 256 + raw[1]; 3307 raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH; 3308 3309 while (count > 0) { 3310 count--; 3311 size = raw[0] * 256 + raw[1]; 3312 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; 3313 region.base = raw; 3314 region.length = size; 3315 raw += size; 3316 /* 3317 * XXX Until we have rdata structures, we have no choice but 3318 * to directly access the rdata format. 3319 */ 3320 dns_name_init(&ns_name, offsets); 3321 dns_name_fromregion(&ns_name, ®ion); 3322 if (dns_name_compare(&ns_name, name) == 0) { 3323 valid = true; 3324 break; 3325 } 3326 } 3327 3328 return (valid); 3329 } 3330 3331 static bool 3332 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain, 3333 const dns_name_t *name) { 3334 dns_fixedname_t fnext; 3335 dns_fixedname_t forigin; 3336 dns_name_t *next; 3337 dns_name_t *origin; 3338 dns_name_t prefix; 3339 dns_rbtdb_t *rbtdb; 3340 dns_rbtnode_t *node; 3341 isc_result_t result; 3342 bool answer = false; 3343 rdatasetheader_t *header; 3344 3345 rbtdb = search->rbtdb; 3346 3347 dns_name_init(&prefix, NULL); 3348 next = dns_fixedname_initname(&fnext); 3349 origin = dns_fixedname_initname(&forigin); 3350 3351 result = dns_rbtnodechain_next(chain, NULL, NULL); 3352 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { 3353 node = NULL; 3354 result = dns_rbtnodechain_current(chain, &prefix, origin, 3355 &node); 3356 if (result != ISC_R_SUCCESS) { 3357 break; 3358 } 3359 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock), 3360 isc_rwlocktype_read); 3361 for (header = node->data; header != NULL; header = header->next) 3362 { 3363 if (header->serial <= search->serial && 3364 !IGNORE(header) && EXISTS(header)) 3365 { 3366 break; 3367 } 3368 } 3369 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), 3370 isc_rwlocktype_read); 3371 if (header != NULL) { 3372 break; 3373 } 3374 result = dns_rbtnodechain_next(chain, NULL, NULL); 3375 } 3376 if (result == ISC_R_SUCCESS) { 3377 result = dns_name_concatenate(&prefix, origin, next, NULL); 3378 } 3379 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name)) { 3380 answer = true; 3381 } 3382 return (answer); 3383 } 3384 3385 static bool 3386 activeemptynode(rbtdb_search_t *search, const dns_name_t *qname, 3387 dns_name_t *wname) { 3388 dns_fixedname_t fnext; 3389 dns_fixedname_t forigin; 3390 dns_fixedname_t fprev; 3391 dns_name_t *next; 3392 dns_name_t *origin; 3393 dns_name_t *prev; 3394 dns_name_t name; 3395 dns_name_t rname; 3396 dns_name_t tname; 3397 dns_rbtdb_t *rbtdb; 3398 dns_rbtnode_t *node; 3399 dns_rbtnodechain_t chain; 3400 bool check_next = true; 3401 bool check_prev = true; 3402 bool answer = false; 3403 isc_result_t result; 3404 rdatasetheader_t *header; 3405 unsigned int n; 3406 3407 rbtdb = search->rbtdb; 3408 3409 dns_name_init(&name, NULL); 3410 dns_name_init(&tname, NULL); 3411 dns_name_init(&rname, NULL); 3412 next = dns_fixedname_initname(&fnext); 3413 prev = dns_fixedname_initname(&fprev); 3414 origin = dns_fixedname_initname(&forigin); 3415 3416 /* 3417 * Find if qname is at or below a empty node. 3418 * Use our own copy of the chain. 3419 */ 3420 3421 chain = search->chain; 3422 do { 3423 node = NULL; 3424 result = dns_rbtnodechain_current(&chain, &name, origin, &node); 3425 if (result != ISC_R_SUCCESS) { 3426 break; 3427 } 3428 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock), 3429 isc_rwlocktype_read); 3430 for (header = node->data; header != NULL; header = header->next) 3431 { 3432 if (header->serial <= search->serial && 3433 !IGNORE(header) && EXISTS(header)) 3434 { 3435 break; 3436 } 3437 } 3438 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), 3439 isc_rwlocktype_read); 3440 if (header != NULL) { 3441 break; 3442 } 3443 result = dns_rbtnodechain_prev(&chain, NULL, NULL); 3444 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN); 3445 if (result == ISC_R_SUCCESS) { 3446 result = dns_name_concatenate(&name, origin, prev, NULL); 3447 } 3448 if (result != ISC_R_SUCCESS) { 3449 check_prev = false; 3450 } 3451 3452 result = dns_rbtnodechain_next(&chain, NULL, NULL); 3453 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { 3454 node = NULL; 3455 result = dns_rbtnodechain_current(&chain, &name, origin, &node); 3456 if (result != ISC_R_SUCCESS) { 3457 break; 3458 } 3459 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock), 3460 isc_rwlocktype_read); 3461 for (header = node->data; header != NULL; header = header->next) 3462 { 3463 if (header->serial <= search->serial && 3464 !IGNORE(header) && EXISTS(header)) 3465 { 3466 break; 3467 } 3468 } 3469 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), 3470 isc_rwlocktype_read); 3471 if (header != NULL) { 3472 break; 3473 } 3474 result = dns_rbtnodechain_next(&chain, NULL, NULL); 3475 } 3476 if (result == ISC_R_SUCCESS) { 3477 result = dns_name_concatenate(&name, origin, next, NULL); 3478 } 3479 if (result != ISC_R_SUCCESS) { 3480 check_next = false; 3481 } 3482 3483 dns_name_clone(qname, &rname); 3484 3485 /* 3486 * Remove the wildcard label to find the terminal name. 3487 */ 3488 n = dns_name_countlabels(wname); 3489 dns_name_getlabelsequence(wname, 1, n - 1, &tname); 3490 3491 do { 3492 if ((check_prev && dns_name_issubdomain(prev, &rname)) || 3493 (check_next && dns_name_issubdomain(next, &rname))) 3494 { 3495 answer = true; 3496 break; 3497 } 3498 /* 3499 * Remove the left hand label. 3500 */ 3501 n = dns_name_countlabels(&rname); 3502 dns_name_getlabelsequence(&rname, 1, n - 1, &rname); 3503 } while (!dns_name_equal(&rname, &tname)); 3504 return (answer); 3505 } 3506 3507 static isc_result_t 3508 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep, 3509 const dns_name_t *qname) { 3510 unsigned int i, j; 3511 dns_rbtnode_t *node, *level_node, *wnode; 3512 rdatasetheader_t *header; 3513 isc_result_t result = ISC_R_NOTFOUND; 3514 dns_name_t name; 3515 dns_name_t *wname; 3516 dns_fixedname_t fwname; 3517 dns_rbtdb_t *rbtdb; 3518 bool done, wild, active; 3519 dns_rbtnodechain_t wchain; 3520 3521 /* 3522 * Caller must be holding the tree lock and MUST NOT be holding 3523 * any node locks. 3524 */ 3525 3526 /* 3527 * Examine each ancestor level. If the level's wild bit 3528 * is set, then construct the corresponding wildcard name and 3529 * search for it. If the wildcard node exists, and is active in 3530 * this version, we're done. If not, then we next check to see 3531 * if the ancestor is active in this version. If so, then there 3532 * can be no possible wildcard match and again we're done. If not, 3533 * continue the search. 3534 */ 3535 3536 rbtdb = search->rbtdb; 3537 i = search->chain.level_matches; 3538 done = false; 3539 node = *nodep; 3540 do { 3541 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock), 3542 isc_rwlocktype_read); 3543 3544 /* 3545 * First we try to figure out if this node is active in 3546 * the search's version. We do this now, even though we 3547 * may not need the information, because it simplifies the 3548 * locking and code flow. 3549 */ 3550 for (header = node->data; header != NULL; header = header->next) 3551 { 3552 if (header->serial <= search->serial && 3553 !IGNORE(header) && EXISTS(header) && 3554 !ANCIENT(header)) 3555 { 3556 break; 3557 } 3558 } 3559 if (header != NULL) { 3560 active = true; 3561 } else { 3562 active = false; 3563 } 3564 3565 if (node->wild) { 3566 wild = true; 3567 } else { 3568 wild = false; 3569 } 3570 3571 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), 3572 isc_rwlocktype_read); 3573 3574 if (wild) { 3575 /* 3576 * Construct the wildcard name for this level. 3577 */ 3578 dns_name_init(&name, NULL); 3579 dns_rbt_namefromnode(node, &name); 3580 wname = dns_fixedname_initname(&fwname); 3581 result = dns_name_concatenate(dns_wildcardname, &name, 3582 wname, NULL); 3583 j = i; 3584 while (result == ISC_R_SUCCESS && j != 0) { 3585 j--; 3586 level_node = search->chain.levels[j]; 3587 dns_name_init(&name, NULL); 3588 dns_rbt_namefromnode(level_node, &name); 3589 result = dns_name_concatenate(wname, &name, 3590 wname, NULL); 3591 } 3592 if (result != ISC_R_SUCCESS) { 3593 break; 3594 } 3595 3596 wnode = NULL; 3597 dns_rbtnodechain_init(&wchain); 3598 result = dns_rbt_findnode( 3599 rbtdb->tree, wname, NULL, &wnode, &wchain, 3600 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 3601 if (result == ISC_R_SUCCESS) { 3602 nodelock_t *lock; 3603 3604 /* 3605 * We have found the wildcard node. If it 3606 * is active in the search's version, we're 3607 * done. 3608 */ 3609 lock = &rbtdb->node_locks[wnode->locknum].lock; 3610 NODE_LOCK(lock, isc_rwlocktype_read); 3611 for (header = wnode->data; header != NULL; 3612 header = header->next) 3613 { 3614 if (header->serial <= search->serial && 3615 !IGNORE(header) && EXISTS(header) && 3616 !ANCIENT(header)) 3617 { 3618 break; 3619 } 3620 } 3621 NODE_UNLOCK(lock, isc_rwlocktype_read); 3622 if (header != NULL || 3623 activeempty(search, &wchain, wname)) 3624 { 3625 if (activeemptynode(search, qname, 3626 wname)) 3627 { 3628 return (ISC_R_NOTFOUND); 3629 } 3630 /* 3631 * The wildcard node is active! 3632 * 3633 * Note: result is still ISC_R_SUCCESS 3634 * so we don't have to set it. 3635 */ 3636 *nodep = wnode; 3637 break; 3638 } 3639 } else if (result != ISC_R_NOTFOUND && 3640 result != DNS_R_PARTIALMATCH) 3641 { 3642 /* 3643 * An error has occurred. Bail out. 3644 */ 3645 break; 3646 } 3647 } 3648 3649 if (active) { 3650 /* 3651 * The level node is active. Any wildcarding 3652 * present at higher levels has no 3653 * effect and we're done. 3654 */ 3655 result = ISC_R_NOTFOUND; 3656 break; 3657 } 3658 3659 if (i > 0) { 3660 i--; 3661 node = search->chain.levels[i]; 3662 } else { 3663 done = true; 3664 } 3665 } while (!done); 3666 3667 return (result); 3668 } 3669 3670 static bool 3671 matchparams(rdatasetheader_t *header, rbtdb_search_t *search) { 3672 dns_rdata_t rdata = DNS_RDATA_INIT; 3673 dns_rdata_nsec3_t nsec3; 3674 unsigned char *raw; /* RDATASLAB */ 3675 unsigned int rdlen, count; 3676 isc_region_t region; 3677 isc_result_t result; 3678 3679 REQUIRE(header->type == dns_rdatatype_nsec3); 3680 3681 raw = (unsigned char *)header + sizeof(*header); 3682 count = raw[0] * 256 + raw[1]; /* count */ 3683 raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH; 3684 3685 while (count-- > 0) { 3686 rdlen = raw[0] * 256 + raw[1]; 3687 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; 3688 region.base = raw; 3689 region.length = rdlen; 3690 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass, 3691 dns_rdatatype_nsec3, ®ion); 3692 raw += rdlen; 3693 result = dns_rdata_tostruct(&rdata, &nsec3, NULL); 3694 INSIST(result == ISC_R_SUCCESS); 3695 if (nsec3.hash == search->rbtversion->hash && 3696 nsec3.iterations == search->rbtversion->iterations && 3697 nsec3.salt_length == search->rbtversion->salt_length && 3698 memcmp(nsec3.salt, search->rbtversion->salt, 3699 nsec3.salt_length) == 0) 3700 { 3701 return (true); 3702 } 3703 dns_rdata_reset(&rdata); 3704 } 3705 return (false); 3706 } 3707 3708 /* 3709 * Find node of the NSEC/NSEC3 record that is 'name'. 3710 */ 3711 static isc_result_t 3712 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search, 3713 dns_name_t *name, dns_name_t *origin, 3714 dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain, 3715 bool *firstp) { 3716 dns_fixedname_t ftarget; 3717 dns_name_t *target; 3718 dns_rbtnode_t *nsecnode; 3719 isc_result_t result; 3720 3721 REQUIRE(nodep != NULL && *nodep == NULL); 3722 REQUIRE(type == dns_rdatatype_nsec3 || firstp != NULL); 3723 3724 if (type == dns_rdatatype_nsec3) { 3725 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL); 3726 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) { 3727 return (result); 3728 } 3729 result = dns_rbtnodechain_current(&search->chain, name, origin, 3730 nodep); 3731 return (result); 3732 } 3733 3734 target = dns_fixedname_initname(&ftarget); 3735 3736 for (;;) { 3737 if (*firstp) { 3738 /* 3739 * Construct the name of the second node to check. 3740 * It is the first node sought in the NSEC tree. 3741 */ 3742 *firstp = false; 3743 dns_rbtnodechain_init(nsecchain); 3744 result = dns_name_concatenate(name, origin, target, 3745 NULL); 3746 if (result != ISC_R_SUCCESS) { 3747 return (result); 3748 } 3749 nsecnode = NULL; 3750 result = dns_rbt_findnode( 3751 search->rbtdb->nsec, target, NULL, &nsecnode, 3752 nsecchain, DNS_RBTFIND_EMPTYDATA, NULL, NULL); 3753 if (result == ISC_R_SUCCESS) { 3754 /* 3755 * Since this was the first loop, finding the 3756 * name in the NSEC tree implies that the first 3757 * node checked in the main tree had an 3758 * unacceptable NSEC record. 3759 * Try the previous node in the NSEC tree. 3760 */ 3761 result = dns_rbtnodechain_prev(nsecchain, name, 3762 origin); 3763 if (result == DNS_R_NEWORIGIN) { 3764 result = ISC_R_SUCCESS; 3765 } 3766 } else if (result == ISC_R_NOTFOUND || 3767 result == DNS_R_PARTIALMATCH) 3768 { 3769 result = dns_rbtnodechain_current( 3770 nsecchain, name, origin, NULL); 3771 if (result == ISC_R_NOTFOUND) { 3772 result = ISC_R_NOMORE; 3773 } 3774 } 3775 } else { 3776 /* 3777 * This is a second or later trip through the auxiliary 3778 * tree for the name of a third or earlier NSEC node in 3779 * the main tree. Previous trips through the NSEC tree 3780 * must have found nodes in the main tree with NSEC 3781 * records. Perhaps they lacked signature records. 3782 */ 3783 result = dns_rbtnodechain_prev(nsecchain, name, origin); 3784 if (result == DNS_R_NEWORIGIN) { 3785 result = ISC_R_SUCCESS; 3786 } 3787 } 3788 if (result != ISC_R_SUCCESS) { 3789 return (result); 3790 } 3791 3792 /* 3793 * Construct the name to seek in the main tree. 3794 */ 3795 result = dns_name_concatenate(name, origin, target, NULL); 3796 if (result != ISC_R_SUCCESS) { 3797 return (result); 3798 } 3799 3800 *nodep = NULL; 3801 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL, 3802 nodep, &search->chain, 3803 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 3804 if (result == ISC_R_SUCCESS) { 3805 return (result); 3806 } 3807 3808 /* 3809 * There should always be a node in the main tree with the 3810 * same name as the node in the auxiliary NSEC tree, except for 3811 * nodes in the auxiliary tree that are awaiting deletion. 3812 */ 3813 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) { 3814 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 3815 DNS_LOGMODULE_CACHE, ISC_LOG_ERROR, 3816 "previous_closest_nsec(): %s", 3817 isc_result_totext(result)); 3818 return (DNS_R_BADDB); 3819 } 3820 } 3821 } 3822 3823 /* 3824 * Find the NSEC/NSEC3 which is or before the current point on the 3825 * search chain. For NSEC3 records only NSEC3 records that match the 3826 * current NSEC3PARAM record are considered. 3827 */ 3828 static isc_result_t 3829 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep, 3830 dns_name_t *foundname, dns_rdataset_t *rdataset, 3831 dns_rdataset_t *sigrdataset, dns_rbt_t *tree, 3832 dns_db_secure_t secure) { 3833 dns_rbtnode_t *node, *prevnode; 3834 rdatasetheader_t *header, *header_next, *found, *foundsig; 3835 dns_rbtnodechain_t nsecchain; 3836 bool empty_node; 3837 isc_result_t result; 3838 dns_fixedname_t fname, forigin; 3839 dns_name_t *name, *origin; 3840 dns_rdatatype_t type; 3841 rbtdb_rdatatype_t sigtype; 3842 bool wraps; 3843 bool first = true; 3844 bool need_sig = (secure == dns_db_secure); 3845 3846 if (tree == search->rbtdb->nsec3) { 3847 type = dns_rdatatype_nsec3; 3848 sigtype = RBTDB_RDATATYPE_SIGNSEC3; 3849 wraps = true; 3850 } else { 3851 type = dns_rdatatype_nsec; 3852 sigtype = RBTDB_RDATATYPE_SIGNSEC; 3853 wraps = false; 3854 } 3855 3856 /* 3857 * Use the auxiliary tree only starting with the second node in the 3858 * hope that the original node will be right much of the time. 3859 */ 3860 name = dns_fixedname_initname(&fname); 3861 origin = dns_fixedname_initname(&forigin); 3862 again: 3863 node = NULL; 3864 prevnode = NULL; 3865 result = dns_rbtnodechain_current(&search->chain, name, origin, &node); 3866 if (result != ISC_R_SUCCESS) { 3867 return (result); 3868 } 3869 do { 3870 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock), 3871 isc_rwlocktype_read); 3872 found = NULL; 3873 foundsig = NULL; 3874 empty_node = true; 3875 for (header = node->data; header != NULL; header = header_next) 3876 { 3877 header_next = header->next; 3878 /* 3879 * Look for an active, extant NSEC or RRSIG NSEC. 3880 */ 3881 do { 3882 if (header->serial <= search->serial && 3883 !IGNORE(header)) 3884 { 3885 /* 3886 * Is this a "this rdataset doesn't 3887 * exist" record? 3888 */ 3889 if (NONEXISTENT(header)) { 3890 header = NULL; 3891 } 3892 break; 3893 } else { 3894 header = header->down; 3895 } 3896 } while (header != NULL); 3897 if (header != NULL) { 3898 /* 3899 * We now know that there is at least one 3900 * active rdataset at this node. 3901 */ 3902 empty_node = false; 3903 if (header->type == type) { 3904 found = header; 3905 if (foundsig != NULL) { 3906 break; 3907 } 3908 } else if (header->type == sigtype) { 3909 foundsig = header; 3910 if (found != NULL) { 3911 break; 3912 } 3913 } 3914 } 3915 } 3916 if (!empty_node) { 3917 if (found != NULL && search->rbtversion->havensec3 && 3918 found->type == dns_rdatatype_nsec3 && 3919 !matchparams(found, search)) 3920 { 3921 empty_node = true; 3922 found = NULL; 3923 foundsig = NULL; 3924 result = previous_closest_nsec( 3925 type, search, name, origin, &prevnode, 3926 NULL, NULL); 3927 } else if (found != NULL && 3928 (foundsig != NULL || !need_sig)) 3929 { 3930 /* 3931 * We've found the right NSEC/NSEC3 record. 3932 * 3933 * Note: for this to really be the right 3934 * NSEC record, it's essential that the NSEC 3935 * records of any nodes obscured by a zone 3936 * cut have been removed; we assume this is 3937 * the case. 3938 */ 3939 result = dns_name_concatenate(name, origin, 3940 foundname, NULL); 3941 if (result == ISC_R_SUCCESS) { 3942 if (nodep != NULL) { 3943 new_reference( 3944 search->rbtdb, node, 3945 isc_rwlocktype_read); 3946 *nodep = node; 3947 } 3948 bind_rdataset(search->rbtdb, node, 3949 found, search->now, 3950 isc_rwlocktype_read, 3951 rdataset); 3952 if (foundsig != NULL) { 3953 bind_rdataset( 3954 search->rbtdb, node, 3955 foundsig, search->now, 3956 isc_rwlocktype_read, 3957 sigrdataset); 3958 } 3959 } 3960 } else if (found == NULL && foundsig == NULL) { 3961 /* 3962 * This node is active, but has no NSEC or 3963 * RRSIG NSEC. That means it's glue or 3964 * other obscured zone data that isn't 3965 * relevant for our search. Treat the 3966 * node as if it were empty and keep looking. 3967 */ 3968 empty_node = true; 3969 result = previous_closest_nsec( 3970 type, search, name, origin, &prevnode, 3971 &nsecchain, &first); 3972 } else { 3973 /* 3974 * We found an active node, but either the 3975 * NSEC or the RRSIG NSEC is missing. This 3976 * shouldn't happen. 3977 */ 3978 result = DNS_R_BADDB; 3979 } 3980 } else { 3981 /* 3982 * This node isn't active. We've got to keep 3983 * looking. 3984 */ 3985 result = previous_closest_nsec(type, search, name, 3986 origin, &prevnode, 3987 &nsecchain, &first); 3988 } 3989 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), 3990 isc_rwlocktype_read); 3991 node = prevnode; 3992 prevnode = NULL; 3993 } while (empty_node && result == ISC_R_SUCCESS); 3994 3995 if (!first) { 3996 dns_rbtnodechain_invalidate(&nsecchain); 3997 } 3998 3999 if (result == ISC_R_NOMORE && wraps) { 4000 result = dns_rbtnodechain_last(&search->chain, tree, NULL, 4001 NULL); 4002 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { 4003 wraps = false; 4004 goto again; 4005 } 4006 } 4007 4008 /* 4009 * If the result is ISC_R_NOMORE, then we got to the beginning of 4010 * the database and didn't find a NSEC record. This shouldn't 4011 * happen. 4012 */ 4013 if (result == ISC_R_NOMORE) { 4014 result = DNS_R_BADDB; 4015 } 4016 4017 return (result); 4018 } 4019 4020 static isc_result_t 4021 zone_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, 4022 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now, 4023 dns_dbnode_t **nodep, dns_name_t *foundname, dns_rdataset_t *rdataset, 4024 dns_rdataset_t *sigrdataset) { 4025 dns_rbtnode_t *node = NULL; 4026 isc_result_t result; 4027 rbtdb_search_t search; 4028 bool cname_ok = true; 4029 bool close_version = false; 4030 bool maybe_zonecut = false; 4031 bool at_zonecut = false; 4032 bool wild; 4033 bool empty_node; 4034 rdatasetheader_t *header, *header_next, *found, *nsecheader; 4035 rdatasetheader_t *foundsig, *cnamesig, *nsecsig; 4036 rbtdb_rdatatype_t sigtype; 4037 bool active; 4038 nodelock_t *lock; 4039 dns_rbt_t *tree; 4040 4041 search.rbtdb = (dns_rbtdb_t *)db; 4042 4043 REQUIRE(VALID_RBTDB(search.rbtdb)); 4044 INSIST(version == NULL || 4045 ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db); 4046 4047 /* 4048 * We don't care about 'now'. 4049 */ 4050 UNUSED(now); 4051 4052 /* 4053 * If the caller didn't supply a version, attach to the current 4054 * version. 4055 */ 4056 if (version == NULL) { 4057 currentversion(db, &version); 4058 close_version = true; 4059 } 4060 4061 search.rbtversion = version; 4062 search.serial = search.rbtversion->serial; 4063 search.options = options; 4064 search.copy_name = false; 4065 search.need_cleanup = false; 4066 search.wild = false; 4067 search.zonecut = NULL; 4068 dns_fixedname_init(&search.zonecut_name); 4069 dns_rbtnodechain_init(&search.chain); 4070 search.now = 0; 4071 4072 /* 4073 * 'wild' will be true iff. we've matched a wildcard. 4074 */ 4075 wild = false; 4076 4077 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); 4078 4079 /* 4080 * Search down from the root of the tree. If, while going down, we 4081 * encounter a callback node, zone_zonecut_callback() will search the 4082 * rdatasets at the zone cut for active DNAME or NS rdatasets. 4083 */ 4084 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 4085 : search.rbtdb->tree; 4086 result = dns_rbt_findnode(tree, name, foundname, &node, &search.chain, 4087 DNS_RBTFIND_EMPTYDATA, zone_zonecut_callback, 4088 &search); 4089 4090 if (result == DNS_R_PARTIALMATCH) { 4091 partial_match: 4092 if (search.zonecut != NULL) { 4093 result = setup_delegation(&search, nodep, foundname, 4094 rdataset, sigrdataset); 4095 goto tree_exit; 4096 } 4097 4098 if (search.wild) { 4099 /* 4100 * At least one of the levels in the search chain 4101 * potentially has a wildcard. For each such level, 4102 * we must see if there's a matching wildcard active 4103 * in the current version. 4104 */ 4105 result = find_wildcard(&search, &node, name); 4106 if (result == ISC_R_SUCCESS) { 4107 dns_name_copynf(name, foundname); 4108 wild = true; 4109 goto found; 4110 } else if (result != ISC_R_NOTFOUND) { 4111 goto tree_exit; 4112 } 4113 } 4114 4115 active = false; 4116 if ((options & DNS_DBFIND_FORCENSEC3) == 0) { 4117 /* 4118 * The NSEC3 tree won't have empty nodes, 4119 * so it isn't necessary to check for them. 4120 */ 4121 dns_rbtnodechain_t chain = search.chain; 4122 active = activeempty(&search, &chain, name); 4123 } 4124 4125 /* 4126 * If we're here, then the name does not exist, is not 4127 * beneath a zonecut, and there's no matching wildcard. 4128 */ 4129 if ((search.rbtversion->secure == dns_db_secure && 4130 !search.rbtversion->havensec3) || 4131 (search.options & DNS_DBFIND_FORCENSEC) != 0 || 4132 (search.options & DNS_DBFIND_FORCENSEC3) != 0) 4133 { 4134 result = find_closest_nsec(&search, nodep, foundname, 4135 rdataset, sigrdataset, tree, 4136 search.rbtversion->secure); 4137 if (result == ISC_R_SUCCESS) { 4138 result = active ? DNS_R_EMPTYNAME 4139 : DNS_R_NXDOMAIN; 4140 } 4141 } else { 4142 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN; 4143 } 4144 goto tree_exit; 4145 } else if (result != ISC_R_SUCCESS) { 4146 goto tree_exit; 4147 } 4148 4149 found: 4150 /* 4151 * We have found a node whose name is the desired name, or we 4152 * have matched a wildcard. 4153 */ 4154 4155 if (search.zonecut != NULL) { 4156 /* 4157 * If we're beneath a zone cut, we don't want to look for 4158 * CNAMEs because they're not legitimate zone glue. 4159 */ 4160 cname_ok = false; 4161 } else { 4162 /* 4163 * The node may be a zone cut itself. If it might be one, 4164 * make sure we check for it later. 4165 * 4166 * DS records live above the zone cut in ordinary zone so 4167 * we want to ignore any referral. 4168 * 4169 * Stub zones don't have anything "above" the delegation so 4170 * we always return a referral. 4171 */ 4172 if (node->find_callback && 4173 ((node != search.rbtdb->origin_node && 4174 !dns_rdatatype_atparent(type)) || 4175 IS_STUB(search.rbtdb))) 4176 { 4177 maybe_zonecut = true; 4178 } 4179 } 4180 4181 /* 4182 * Certain DNSSEC types are not subject to CNAME matching 4183 * (RFC4035, section 2.5 and RFC3007). 4184 * 4185 * We don't check for RRSIG, because we don't store RRSIG records 4186 * directly. 4187 */ 4188 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) { 4189 cname_ok = false; 4190 } 4191 4192 /* 4193 * We now go looking for rdata... 4194 */ 4195 4196 lock = &search.rbtdb->node_locks[node->locknum].lock; 4197 NODE_LOCK(lock, isc_rwlocktype_read); 4198 4199 found = NULL; 4200 foundsig = NULL; 4201 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type); 4202 nsecheader = NULL; 4203 nsecsig = NULL; 4204 cnamesig = NULL; 4205 empty_node = true; 4206 for (header = node->data; header != NULL; header = header_next) { 4207 header_next = header->next; 4208 /* 4209 * Look for an active, extant rdataset. 4210 */ 4211 do { 4212 if (header->serial <= search.serial && !IGNORE(header)) 4213 { 4214 /* 4215 * Is this a "this rdataset doesn't 4216 * exist" record? 4217 */ 4218 if (NONEXISTENT(header)) { 4219 header = NULL; 4220 } 4221 break; 4222 } else { 4223 header = header->down; 4224 } 4225 } while (header != NULL); 4226 if (header != NULL) { 4227 /* 4228 * We now know that there is at least one active 4229 * rdataset at this node. 4230 */ 4231 empty_node = false; 4232 4233 /* 4234 * Do special zone cut handling, if requested. 4235 */ 4236 if (maybe_zonecut && header->type == dns_rdatatype_ns) { 4237 /* 4238 * We increment the reference count on node to 4239 * ensure that search->zonecut_rdataset will 4240 * still be valid later. 4241 */ 4242 new_reference(search.rbtdb, node, 4243 isc_rwlocktype_read); 4244 search.zonecut = node; 4245 search.zonecut_rdataset = header; 4246 search.zonecut_sigrdataset = NULL; 4247 search.need_cleanup = true; 4248 maybe_zonecut = false; 4249 at_zonecut = true; 4250 /* 4251 * It is not clear if KEY should still be 4252 * allowed at the parent side of the zone 4253 * cut or not. It is needed for RFC3007 4254 * validated updates. 4255 */ 4256 if ((search.options & DNS_DBFIND_GLUEOK) == 0 && 4257 type != dns_rdatatype_nsec && 4258 type != dns_rdatatype_key) 4259 { 4260 /* 4261 * Glue is not OK, but any answer we 4262 * could return would be glue. Return 4263 * the delegation. 4264 */ 4265 found = NULL; 4266 break; 4267 } 4268 if (found != NULL && foundsig != NULL) { 4269 break; 4270 } 4271 } 4272 4273 /* 4274 * If the NSEC3 record doesn't match the chain 4275 * we are using behave as if it isn't here. 4276 */ 4277 if (header->type == dns_rdatatype_nsec3 && 4278 !matchparams(header, &search)) 4279 { 4280 NODE_UNLOCK(lock, isc_rwlocktype_read); 4281 goto partial_match; 4282 } 4283 /* 4284 * If we found a type we were looking for, 4285 * remember it. 4286 */ 4287 if (header->type == type || type == dns_rdatatype_any || 4288 (header->type == dns_rdatatype_cname && cname_ok)) 4289 { 4290 /* 4291 * We've found the answer! 4292 */ 4293 found = header; 4294 if (header->type == dns_rdatatype_cname && 4295 cname_ok) 4296 { 4297 /* 4298 * We may be finding a CNAME instead 4299 * of the desired type. 4300 * 4301 * If we've already got the CNAME RRSIG, 4302 * use it, otherwise change sigtype 4303 * so that we find it. 4304 */ 4305 if (cnamesig != NULL) { 4306 foundsig = cnamesig; 4307 } else { 4308 sigtype = 4309 RBTDB_RDATATYPE_SIGCNAME; 4310 } 4311 } 4312 /* 4313 * If we've got all we need, end the search. 4314 */ 4315 if (!maybe_zonecut && foundsig != NULL) { 4316 break; 4317 } 4318 } else if (header->type == sigtype) { 4319 /* 4320 * We've found the RRSIG rdataset for our 4321 * target type. Remember it. 4322 */ 4323 foundsig = header; 4324 /* 4325 * If we've got all we need, end the search. 4326 */ 4327 if (!maybe_zonecut && found != NULL) { 4328 break; 4329 } 4330 } else if (header->type == dns_rdatatype_nsec && 4331 !search.rbtversion->havensec3) 4332 { 4333 /* 4334 * Remember a NSEC rdataset even if we're 4335 * not specifically looking for it, because 4336 * we might need it later. 4337 */ 4338 nsecheader = header; 4339 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC && 4340 !search.rbtversion->havensec3) 4341 { 4342 /* 4343 * If we need the NSEC rdataset, we'll also 4344 * need its signature. 4345 */ 4346 nsecsig = header; 4347 } else if (cname_ok && 4348 header->type == RBTDB_RDATATYPE_SIGCNAME) 4349 { 4350 /* 4351 * If we get a CNAME match, we'll also need 4352 * its signature. 4353 */ 4354 cnamesig = header; 4355 } 4356 } 4357 } 4358 4359 if (empty_node) { 4360 /* 4361 * We have an exact match for the name, but there are no 4362 * active rdatasets in the desired version. That means that 4363 * this node doesn't exist in the desired version, and that 4364 * we really have a partial match. 4365 */ 4366 if (!wild) { 4367 NODE_UNLOCK(lock, isc_rwlocktype_read); 4368 goto partial_match; 4369 } 4370 } 4371 4372 /* 4373 * If we didn't find what we were looking for... 4374 */ 4375 if (found == NULL) { 4376 if (search.zonecut != NULL) { 4377 /* 4378 * We were trying to find glue at a node beneath a 4379 * zone cut, but didn't. 4380 * 4381 * Return the delegation. 4382 */ 4383 NODE_UNLOCK(lock, isc_rwlocktype_read); 4384 result = setup_delegation(&search, nodep, foundname, 4385 rdataset, sigrdataset); 4386 goto tree_exit; 4387 } 4388 /* 4389 * The desired type doesn't exist. 4390 */ 4391 result = DNS_R_NXRRSET; 4392 if (search.rbtversion->secure == dns_db_secure && 4393 !search.rbtversion->havensec3 && 4394 (nsecheader == NULL || nsecsig == NULL)) 4395 { 4396 /* 4397 * The zone is secure but there's no NSEC, 4398 * or the NSEC has no signature! 4399 */ 4400 if (!wild) { 4401 result = DNS_R_BADDB; 4402 goto node_exit; 4403 } 4404 4405 NODE_UNLOCK(lock, isc_rwlocktype_read); 4406 result = find_closest_nsec(&search, nodep, foundname, 4407 rdataset, sigrdataset, 4408 search.rbtdb->tree, 4409 search.rbtversion->secure); 4410 if (result == ISC_R_SUCCESS) { 4411 result = DNS_R_EMPTYWILD; 4412 } 4413 goto tree_exit; 4414 } 4415 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 && 4416 nsecheader == NULL) 4417 { 4418 /* 4419 * There's no NSEC record, and we were told 4420 * to find one. 4421 */ 4422 result = DNS_R_BADDB; 4423 goto node_exit; 4424 } 4425 if (nodep != NULL) { 4426 new_reference(search.rbtdb, node, isc_rwlocktype_read); 4427 *nodep = node; 4428 } 4429 if ((search.rbtversion->secure == dns_db_secure && 4430 !search.rbtversion->havensec3) || 4431 (search.options & DNS_DBFIND_FORCENSEC) != 0) 4432 { 4433 bind_rdataset(search.rbtdb, node, nsecheader, 0, 4434 isc_rwlocktype_read, rdataset); 4435 if (nsecsig != NULL) { 4436 bind_rdataset(search.rbtdb, node, nsecsig, 0, 4437 isc_rwlocktype_read, sigrdataset); 4438 } 4439 } 4440 if (wild) { 4441 foundname->attributes |= DNS_NAMEATTR_WILDCARD; 4442 } 4443 goto node_exit; 4444 } 4445 4446 /* 4447 * We found what we were looking for, or we found a CNAME. 4448 */ 4449 4450 if (type != found->type && type != dns_rdatatype_any && 4451 found->type == dns_rdatatype_cname) 4452 { 4453 /* 4454 * We weren't doing an ANY query and we found a CNAME instead 4455 * of the type we were looking for, so we need to indicate 4456 * that result to the caller. 4457 */ 4458 result = DNS_R_CNAME; 4459 } else if (search.zonecut != NULL) { 4460 /* 4461 * If we're beneath a zone cut, we must indicate that the 4462 * result is glue, unless we're actually at the zone cut 4463 * and the type is NSEC or KEY. 4464 */ 4465 if (search.zonecut == node) { 4466 /* 4467 * It is not clear if KEY should still be 4468 * allowed at the parent side of the zone 4469 * cut or not. It is needed for RFC3007 4470 * validated updates. 4471 */ 4472 if (type == dns_rdatatype_nsec || 4473 type == dns_rdatatype_nsec3 || 4474 type == dns_rdatatype_key) 4475 { 4476 result = ISC_R_SUCCESS; 4477 } else if (type == dns_rdatatype_any) { 4478 result = DNS_R_ZONECUT; 4479 } else { 4480 result = DNS_R_GLUE; 4481 } 4482 } else { 4483 result = DNS_R_GLUE; 4484 } 4485 /* 4486 * We might have found data that isn't glue, but was occluded 4487 * by a dynamic update. If the caller cares about this, they 4488 * will have told us to validate glue. 4489 * 4490 * XXX We should cache the glue validity state! 4491 */ 4492 if (result == DNS_R_GLUE && 4493 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 && 4494 !valid_glue(&search, foundname, type, node)) 4495 { 4496 NODE_UNLOCK(lock, isc_rwlocktype_read); 4497 result = setup_delegation(&search, nodep, foundname, 4498 rdataset, sigrdataset); 4499 goto tree_exit; 4500 } 4501 } else { 4502 /* 4503 * An ordinary successful query! 4504 */ 4505 result = ISC_R_SUCCESS; 4506 } 4507 4508 if (nodep != NULL) { 4509 if (!at_zonecut) { 4510 new_reference(search.rbtdb, node, isc_rwlocktype_read); 4511 } else { 4512 search.need_cleanup = false; 4513 } 4514 *nodep = node; 4515 } 4516 4517 if (type != dns_rdatatype_any) { 4518 bind_rdataset(search.rbtdb, node, found, 0, isc_rwlocktype_read, 4519 rdataset); 4520 if (foundsig != NULL) { 4521 bind_rdataset(search.rbtdb, node, foundsig, 0, 4522 isc_rwlocktype_read, sigrdataset); 4523 } 4524 } 4525 4526 if (wild) { 4527 foundname->attributes |= DNS_NAMEATTR_WILDCARD; 4528 } 4529 4530 node_exit: 4531 NODE_UNLOCK(lock, isc_rwlocktype_read); 4532 4533 tree_exit: 4534 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); 4535 4536 /* 4537 * If we found a zonecut but aren't going to use it, we have to 4538 * let go of it. 4539 */ 4540 if (search.need_cleanup) { 4541 node = search.zonecut; 4542 INSIST(node != NULL); 4543 lock = &(search.rbtdb->node_locks[node->locknum].lock); 4544 4545 NODE_LOCK(lock, isc_rwlocktype_read); 4546 decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read, 4547 isc_rwlocktype_none, false); 4548 NODE_UNLOCK(lock, isc_rwlocktype_read); 4549 } 4550 4551 if (close_version) { 4552 closeversion(db, &version, false); 4553 } 4554 4555 dns_rbtnodechain_reset(&search.chain); 4556 4557 return (result); 4558 } 4559 4560 static isc_result_t 4561 zone_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options, 4562 isc_stdtime_t now, dns_dbnode_t **nodep, dns_name_t *foundname, 4563 dns_name_t *dcname, dns_rdataset_t *rdataset, 4564 dns_rdataset_t *sigrdataset) { 4565 UNUSED(db); 4566 UNUSED(name); 4567 UNUSED(options); 4568 UNUSED(now); 4569 UNUSED(nodep); 4570 UNUSED(foundname); 4571 UNUSED(dcname); 4572 UNUSED(rdataset); 4573 UNUSED(sigrdataset); 4574 4575 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!"); 4576 4577 UNREACHABLE(); 4578 return (ISC_R_NOTIMPLEMENTED); 4579 } 4580 4581 static bool 4582 check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header, 4583 isc_rwlocktype_t *locktype, nodelock_t *lock, 4584 rbtdb_search_t *search, rdatasetheader_t **header_prev) { 4585 if (!ACTIVE(header, search->now)) { 4586 dns_ttl_t stale = header->rdh_ttl + 4587 search->rbtdb->serve_stale_ttl; 4588 /* 4589 * If this data is in the stale window keep it and if 4590 * DNS_DBFIND_STALEOK is not set we tell the caller to 4591 * skip this record. We skip the records with ZEROTTL 4592 * (these records should not be cached anyway). 4593 */ 4594 4595 RDATASET_ATTR_CLR(header, RDATASET_ATTR_STALE_WINDOW); 4596 if (!ZEROTTL(header) && KEEPSTALE(search->rbtdb) && 4597 stale > search->now) 4598 { 4599 mark_header_stale(search->rbtdb, header); 4600 *header_prev = header; 4601 /* 4602 * If DNS_DBFIND_STALESTART is set then it means we 4603 * failed to resolve the name during recursion, in 4604 * this case we mark the time in which the refresh 4605 * failed. 4606 */ 4607 if ((search->options & DNS_DBFIND_STALESTART) != 0) { 4608 atomic_store_release( 4609 &header->last_refresh_fail_ts, 4610 search->now); 4611 } else if ((search->options & 4612 DNS_DBFIND_STALEENABLED) != 0 && 4613 search->now < 4614 (atomic_load_acquire( 4615 &header->last_refresh_fail_ts) + 4616 search->rbtdb->serve_stale_refresh)) 4617 { 4618 /* 4619 * If we are within interval between last 4620 * refresh failure time + 'stale-refresh-time', 4621 * then don't skip this stale entry but use it 4622 * instead. 4623 */ 4624 RDATASET_ATTR_SET(header, 4625 RDATASET_ATTR_STALE_WINDOW); 4626 return (false); 4627 } else if ((search->options & 4628 DNS_DBFIND_STALETIMEOUT) != 0) 4629 { 4630 /* 4631 * We want stale RRset due to timeout, so we 4632 * don't skip it. 4633 */ 4634 return (false); 4635 } 4636 return ((search->options & DNS_DBFIND_STALEOK) == 0); 4637 } 4638 4639 /* 4640 * This rdataset is stale. If no one else is using the 4641 * node, we can clean it up right now, otherwise we mark 4642 * it as ancient, and the node as dirty, so it will get 4643 * cleaned up later. 4644 */ 4645 if ((header->rdh_ttl < search->now - RBTDB_VIRTUAL) && 4646 (*locktype == isc_rwlocktype_write || 4647 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) 4648 { 4649 /* 4650 * We update the node's status only when we can 4651 * get write access; otherwise, we leave others 4652 * to this work. Periodical cleaning will 4653 * eventually take the job as the last resort. 4654 * We won't downgrade the lock, since other 4655 * rdatasets are probably stale, too. 4656 */ 4657 *locktype = isc_rwlocktype_write; 4658 4659 if (isc_refcount_current(&node->references) == 0) { 4660 isc_mem_t *mctx; 4661 4662 /* 4663 * header->down can be non-NULL if the 4664 * refcount has just decremented to 0 4665 * but decrement_reference() has not 4666 * performed clean_cache_node(), in 4667 * which case we need to purge the stale 4668 * headers first. 4669 */ 4670 mctx = search->rbtdb->common.mctx; 4671 clean_stale_headers(search->rbtdb, mctx, 4672 header); 4673 if (*header_prev != NULL) { 4674 (*header_prev)->next = header->next; 4675 } else { 4676 node->data = header->next; 4677 } 4678 free_rdataset(search->rbtdb, mctx, header); 4679 } else { 4680 mark_header_ancient(search->rbtdb, header); 4681 *header_prev = header; 4682 } 4683 } else { 4684 *header_prev = header; 4685 } 4686 return (true); 4687 } 4688 return (false); 4689 } 4690 4691 static isc_result_t 4692 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) { 4693 rbtdb_search_t *search = arg; 4694 rdatasetheader_t *header, *header_prev, *header_next; 4695 rdatasetheader_t *dname_header, *sigdname_header; 4696 isc_result_t result; 4697 nodelock_t *lock; 4698 isc_rwlocktype_t locktype; 4699 4700 /* XXX comment */ 4701 4702 REQUIRE(search->zonecut == NULL); 4703 4704 /* 4705 * Keep compiler silent. 4706 */ 4707 UNUSED(name); 4708 4709 lock = &(search->rbtdb->node_locks[node->locknum].lock); 4710 locktype = isc_rwlocktype_read; 4711 NODE_LOCK(lock, locktype); 4712 4713 /* 4714 * Look for a DNAME or RRSIG DNAME rdataset. 4715 */ 4716 dname_header = NULL; 4717 sigdname_header = NULL; 4718 header_prev = NULL; 4719 for (header = node->data; header != NULL; header = header_next) { 4720 header_next = header->next; 4721 if (check_stale_header(node, header, &locktype, lock, search, 4722 &header_prev)) 4723 { 4724 /* Do nothing. */ 4725 } else if (header->type == dns_rdatatype_dname && 4726 EXISTS(header) && !ANCIENT(header)) 4727 { 4728 dname_header = header; 4729 header_prev = header; 4730 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME && 4731 EXISTS(header) && !ANCIENT(header)) 4732 { 4733 sigdname_header = header; 4734 header_prev = header; 4735 } else { 4736 header_prev = header; 4737 } 4738 } 4739 4740 if (dname_header != NULL && 4741 (!DNS_TRUST_PENDING(dname_header->trust) || 4742 (search->options & DNS_DBFIND_PENDINGOK) != 0)) 4743 { 4744 /* 4745 * We increment the reference count on node to ensure that 4746 * search->zonecut_rdataset will still be valid later. 4747 */ 4748 new_reference(search->rbtdb, node, locktype); 4749 search->zonecut = node; 4750 search->zonecut_rdataset = dname_header; 4751 search->zonecut_sigrdataset = sigdname_header; 4752 search->need_cleanup = true; 4753 result = DNS_R_PARTIALMATCH; 4754 } else { 4755 result = DNS_R_CONTINUE; 4756 } 4757 4758 NODE_UNLOCK(lock, locktype); 4759 4760 return (result); 4761 } 4762 4763 static isc_result_t 4764 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node, 4765 dns_dbnode_t **nodep, dns_name_t *foundname, 4766 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) { 4767 unsigned int i; 4768 dns_rbtnode_t *level_node; 4769 rdatasetheader_t *header, *header_prev, *header_next; 4770 rdatasetheader_t *found, *foundsig; 4771 isc_result_t result = ISC_R_NOTFOUND; 4772 dns_name_t name; 4773 dns_rbtdb_t *rbtdb; 4774 bool done; 4775 nodelock_t *lock; 4776 isc_rwlocktype_t locktype; 4777 4778 /* 4779 * Caller must be holding the tree lock. 4780 */ 4781 4782 rbtdb = search->rbtdb; 4783 i = search->chain.level_matches; 4784 done = false; 4785 do { 4786 locktype = isc_rwlocktype_read; 4787 lock = &rbtdb->node_locks[node->locknum].lock; 4788 NODE_LOCK(lock, locktype); 4789 4790 /* 4791 * Look for NS and RRSIG NS rdatasets. 4792 */ 4793 found = NULL; 4794 foundsig = NULL; 4795 header_prev = NULL; 4796 for (header = node->data; header != NULL; header = header_next) 4797 { 4798 header_next = header->next; 4799 if (check_stale_header(node, header, &locktype, lock, 4800 search, &header_prev)) 4801 { 4802 /* Do nothing. */ 4803 } else if (EXISTS(header) && !ANCIENT(header)) { 4804 /* 4805 * We've found an extant rdataset. See if 4806 * we're interested in it. 4807 */ 4808 if (header->type == dns_rdatatype_ns) { 4809 found = header; 4810 if (foundsig != NULL) { 4811 break; 4812 } 4813 } else if (header->type == 4814 RBTDB_RDATATYPE_SIGNS) 4815 { 4816 foundsig = header; 4817 if (found != NULL) { 4818 break; 4819 } 4820 } 4821 header_prev = header; 4822 } else { 4823 header_prev = header; 4824 } 4825 } 4826 4827 if (found != NULL) { 4828 /* 4829 * If we have to set foundname, we do it before 4830 * anything else. If we were to set foundname after 4831 * we had set nodep or bound the rdataset, then we'd 4832 * have to undo that work if dns_name_concatenate() 4833 * failed. By setting foundname first, there's 4834 * nothing to undo if we have trouble. 4835 */ 4836 if (foundname != NULL) { 4837 dns_name_init(&name, NULL); 4838 dns_rbt_namefromnode(node, &name); 4839 dns_name_copynf(&name, foundname); 4840 while (i > 0) { 4841 i--; 4842 level_node = search->chain.levels[i]; 4843 dns_name_init(&name, NULL); 4844 dns_rbt_namefromnode(level_node, &name); 4845 result = dns_name_concatenate( 4846 foundname, &name, foundname, 4847 NULL); 4848 if (result != ISC_R_SUCCESS) { 4849 if (nodep != NULL) { 4850 *nodep = NULL; 4851 } 4852 goto node_exit; 4853 } 4854 } 4855 } 4856 result = DNS_R_DELEGATION; 4857 if (nodep != NULL) { 4858 new_reference(search->rbtdb, node, locktype); 4859 *nodep = node; 4860 } 4861 bind_rdataset(search->rbtdb, node, found, search->now, 4862 locktype, rdataset); 4863 if (foundsig != NULL) { 4864 bind_rdataset(search->rbtdb, node, foundsig, 4865 search->now, locktype, 4866 sigrdataset); 4867 } 4868 if (need_headerupdate(found, search->now) || 4869 (foundsig != NULL && 4870 need_headerupdate(foundsig, search->now))) 4871 { 4872 if (locktype != isc_rwlocktype_write) { 4873 NODE_UNLOCK(lock, locktype); 4874 NODE_LOCK(lock, isc_rwlocktype_write); 4875 locktype = isc_rwlocktype_write; 4876 POST(locktype); 4877 } 4878 if (need_headerupdate(found, search->now)) { 4879 update_header(search->rbtdb, found, 4880 search->now); 4881 } 4882 if (foundsig != NULL && 4883 need_headerupdate(foundsig, search->now)) 4884 { 4885 update_header(search->rbtdb, foundsig, 4886 search->now); 4887 } 4888 } 4889 } 4890 4891 node_exit: 4892 NODE_UNLOCK(lock, locktype); 4893 4894 if (found == NULL && i > 0) { 4895 i--; 4896 node = search->chain.levels[i]; 4897 } else { 4898 done = true; 4899 } 4900 } while (!done); 4901 4902 return (result); 4903 } 4904 4905 static isc_result_t 4906 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep, 4907 isc_stdtime_t now, dns_name_t *foundname, 4908 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) { 4909 dns_rbtnode_t *node; 4910 rdatasetheader_t *header, *header_next, *header_prev; 4911 rdatasetheader_t *found, *foundsig; 4912 bool empty_node; 4913 isc_result_t result; 4914 dns_fixedname_t fname, forigin; 4915 dns_name_t *name, *origin; 4916 rbtdb_rdatatype_t matchtype, sigmatchtype; 4917 nodelock_t *lock; 4918 isc_rwlocktype_t locktype; 4919 dns_rbtnodechain_t chain; 4920 4921 chain = search->chain; 4922 4923 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0); 4924 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, 4925 dns_rdatatype_nsec); 4926 4927 do { 4928 node = NULL; 4929 name = dns_fixedname_initname(&fname); 4930 origin = dns_fixedname_initname(&forigin); 4931 result = dns_rbtnodechain_current(&chain, name, origin, &node); 4932 if (result != ISC_R_SUCCESS) { 4933 return (result); 4934 } 4935 locktype = isc_rwlocktype_read; 4936 lock = &(search->rbtdb->node_locks[node->locknum].lock); 4937 NODE_LOCK(lock, locktype); 4938 found = NULL; 4939 foundsig = NULL; 4940 empty_node = true; 4941 header_prev = NULL; 4942 for (header = node->data; header != NULL; header = header_next) 4943 { 4944 header_next = header->next; 4945 if (check_stale_header(node, header, &locktype, lock, 4946 search, &header_prev)) 4947 { 4948 continue; 4949 } 4950 if (NONEXISTENT(header) || 4951 RBTDB_RDATATYPE_BASE(header->type) == 0) 4952 { 4953 header_prev = header; 4954 continue; 4955 } 4956 /* 4957 * Don't stop on provable noqname / RRSIG. 4958 */ 4959 if (header->noqname == NULL && 4960 RBTDB_RDATATYPE_BASE(header->type) != 4961 dns_rdatatype_rrsig) 4962 { 4963 empty_node = false; 4964 } 4965 if (header->type == matchtype) { 4966 found = header; 4967 } else if (header->type == sigmatchtype) { 4968 foundsig = header; 4969 } 4970 header_prev = header; 4971 } 4972 if (found != NULL) { 4973 result = dns_name_concatenate(name, origin, foundname, 4974 NULL); 4975 if (result != ISC_R_SUCCESS) { 4976 goto unlock_node; 4977 } 4978 bind_rdataset(search->rbtdb, node, found, now, locktype, 4979 rdataset); 4980 if (foundsig != NULL) { 4981 bind_rdataset(search->rbtdb, node, foundsig, 4982 now, locktype, sigrdataset); 4983 } 4984 new_reference(search->rbtdb, node, locktype); 4985 *nodep = node; 4986 result = DNS_R_COVERINGNSEC; 4987 } else if (!empty_node) { 4988 result = ISC_R_NOTFOUND; 4989 } else { 4990 result = dns_rbtnodechain_prev(&chain, NULL, NULL); 4991 } 4992 unlock_node: 4993 NODE_UNLOCK(lock, locktype); 4994 } while (empty_node && result == ISC_R_SUCCESS); 4995 return (result); 4996 } 4997 4998 static isc_result_t 4999 cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, 5000 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now, 5001 dns_dbnode_t **nodep, dns_name_t *foundname, 5002 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) { 5003 dns_rbtnode_t *node = NULL; 5004 isc_result_t result; 5005 rbtdb_search_t search; 5006 bool cname_ok = true; 5007 bool empty_node; 5008 nodelock_t *lock; 5009 isc_rwlocktype_t locktype; 5010 rdatasetheader_t *header, *header_prev, *header_next; 5011 rdatasetheader_t *found, *nsheader; 5012 rdatasetheader_t *foundsig, *nssig, *cnamesig; 5013 rdatasetheader_t *update, *updatesig; 5014 rdatasetheader_t *nsecheader, *nsecsig; 5015 rbtdb_rdatatype_t sigtype, negtype; 5016 5017 UNUSED(version); 5018 5019 search.rbtdb = (dns_rbtdb_t *)db; 5020 5021 REQUIRE(VALID_RBTDB(search.rbtdb)); 5022 REQUIRE(version == NULL); 5023 5024 if (now == 0) { 5025 isc_stdtime_get(&now); 5026 } 5027 5028 search.rbtversion = NULL; 5029 search.serial = 1; 5030 search.options = options; 5031 search.copy_name = false; 5032 search.need_cleanup = false; 5033 search.wild = false; 5034 search.zonecut = NULL; 5035 dns_fixedname_init(&search.zonecut_name); 5036 dns_rbtnodechain_init(&search.chain); 5037 search.now = now; 5038 update = NULL; 5039 updatesig = NULL; 5040 5041 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); 5042 5043 /* 5044 * Search down from the root of the tree. If, while going down, we 5045 * encounter a callback node, cache_zonecut_callback() will search the 5046 * rdatasets at the zone cut for a DNAME rdataset. 5047 */ 5048 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node, 5049 &search.chain, DNS_RBTFIND_EMPTYDATA, 5050 cache_zonecut_callback, &search); 5051 5052 if (result == DNS_R_PARTIALMATCH) { 5053 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) { 5054 result = find_coveringnsec(&search, nodep, now, 5055 foundname, rdataset, 5056 sigrdataset); 5057 if (result == DNS_R_COVERINGNSEC) { 5058 goto tree_exit; 5059 } 5060 } 5061 if (search.zonecut != NULL) { 5062 result = setup_delegation(&search, nodep, foundname, 5063 rdataset, sigrdataset); 5064 goto tree_exit; 5065 } else { 5066 find_ns: 5067 result = find_deepest_zonecut(&search, node, nodep, 5068 foundname, rdataset, 5069 sigrdataset); 5070 goto tree_exit; 5071 } 5072 } else if (result != ISC_R_SUCCESS) { 5073 goto tree_exit; 5074 } 5075 5076 /* 5077 * Certain DNSSEC types are not subject to CNAME matching 5078 * (RFC4035, section 2.5 and RFC3007). 5079 * 5080 * We don't check for RRSIG, because we don't store RRSIG records 5081 * directly. 5082 */ 5083 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) { 5084 cname_ok = false; 5085 } 5086 5087 /* 5088 * We now go looking for rdata... 5089 */ 5090 5091 lock = &(search.rbtdb->node_locks[node->locknum].lock); 5092 locktype = isc_rwlocktype_read; 5093 NODE_LOCK(lock, locktype); 5094 5095 found = NULL; 5096 foundsig = NULL; 5097 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type); 5098 negtype = RBTDB_RDATATYPE_VALUE(0, type); 5099 nsheader = NULL; 5100 nsecheader = NULL; 5101 nssig = NULL; 5102 nsecsig = NULL; 5103 cnamesig = NULL; 5104 empty_node = true; 5105 header_prev = NULL; 5106 for (header = node->data; header != NULL; header = header_next) { 5107 header_next = header->next; 5108 if (check_stale_header(node, header, &locktype, lock, &search, 5109 &header_prev)) 5110 { 5111 /* Do nothing. */ 5112 } else if (EXISTS(header) && !ANCIENT(header)) { 5113 /* 5114 * We now know that there is at least one active 5115 * non-stale rdataset at this node. 5116 */ 5117 empty_node = false; 5118 5119 /* 5120 * If we found a type we were looking for, remember 5121 * it. 5122 */ 5123 if (header->type == type || 5124 (type == dns_rdatatype_any && 5125 RBTDB_RDATATYPE_BASE(header->type) != 0) || 5126 (cname_ok && header->type == dns_rdatatype_cname)) 5127 { 5128 /* 5129 * We've found the answer. 5130 */ 5131 found = header; 5132 if (header->type == dns_rdatatype_cname && 5133 cname_ok && cnamesig != NULL) 5134 { 5135 /* 5136 * If we've already got the 5137 * CNAME RRSIG, use it. 5138 */ 5139 foundsig = cnamesig; 5140 } 5141 } else if (header->type == sigtype) { 5142 /* 5143 * We've found the RRSIG rdataset for our 5144 * target type. Remember it. 5145 */ 5146 foundsig = header; 5147 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY || 5148 header->type == negtype) 5149 { 5150 /* 5151 * We've found a negative cache entry. 5152 */ 5153 found = header; 5154 } else if (header->type == dns_rdatatype_ns) { 5155 /* 5156 * Remember a NS rdataset even if we're 5157 * not specifically looking for it, because 5158 * we might need it later. 5159 */ 5160 nsheader = header; 5161 } else if (header->type == RBTDB_RDATATYPE_SIGNS) { 5162 /* 5163 * If we need the NS rdataset, we'll also 5164 * need its signature. 5165 */ 5166 nssig = header; 5167 } else if (header->type == dns_rdatatype_nsec) { 5168 nsecheader = header; 5169 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) { 5170 nsecsig = header; 5171 } else if (cname_ok && 5172 header->type == RBTDB_RDATATYPE_SIGCNAME) 5173 { 5174 /* 5175 * If we get a CNAME match, we'll also need 5176 * its signature. 5177 */ 5178 cnamesig = header; 5179 } 5180 header_prev = header; 5181 } else { 5182 header_prev = header; 5183 } 5184 } 5185 5186 if (empty_node) { 5187 /* 5188 * We have an exact match for the name, but there are no 5189 * extant rdatasets. That means that this node doesn't 5190 * meaningfully exist, and that we really have a partial match. 5191 */ 5192 NODE_UNLOCK(lock, locktype); 5193 goto find_ns; 5194 } 5195 5196 /* 5197 * If we didn't find what we were looking for... 5198 */ 5199 if (found == NULL || 5200 (DNS_TRUST_ADDITIONAL(found->trust) && 5201 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) || 5202 (found->trust == dns_trust_glue && 5203 ((options & DNS_DBFIND_GLUEOK) == 0)) || 5204 (DNS_TRUST_PENDING(found->trust) && 5205 ((options & DNS_DBFIND_PENDINGOK) == 0))) 5206 { 5207 /* 5208 * Return covering NODATA NSEC record. 5209 */ 5210 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && 5211 nsecheader != NULL) 5212 { 5213 if (nodep != NULL) { 5214 new_reference(search.rbtdb, node, locktype); 5215 *nodep = node; 5216 } 5217 bind_rdataset(search.rbtdb, node, nsecheader, 5218 search.now, locktype, rdataset); 5219 if (need_headerupdate(nsecheader, search.now)) { 5220 update = nsecheader; 5221 } 5222 if (nsecsig != NULL) { 5223 bind_rdataset(search.rbtdb, node, nsecsig, 5224 search.now, locktype, 5225 sigrdataset); 5226 if (need_headerupdate(nsecsig, search.now)) { 5227 updatesig = nsecsig; 5228 } 5229 } 5230 result = DNS_R_COVERINGNSEC; 5231 goto node_exit; 5232 } 5233 5234 /* 5235 * If there is an NS rdataset at this node, then this is the 5236 * deepest zone cut. 5237 */ 5238 if (nsheader != NULL) { 5239 if (nodep != NULL) { 5240 new_reference(search.rbtdb, node, locktype); 5241 *nodep = node; 5242 } 5243 bind_rdataset(search.rbtdb, node, nsheader, search.now, 5244 locktype, rdataset); 5245 if (need_headerupdate(nsheader, search.now)) { 5246 update = nsheader; 5247 } 5248 if (nssig != NULL) { 5249 bind_rdataset(search.rbtdb, node, nssig, 5250 search.now, locktype, 5251 sigrdataset); 5252 if (need_headerupdate(nssig, search.now)) { 5253 updatesig = nssig; 5254 } 5255 } 5256 result = DNS_R_DELEGATION; 5257 goto node_exit; 5258 } 5259 5260 /* 5261 * Go find the deepest zone cut. 5262 */ 5263 NODE_UNLOCK(lock, locktype); 5264 goto find_ns; 5265 } 5266 5267 /* 5268 * We found what we were looking for, or we found a CNAME. 5269 */ 5270 5271 if (nodep != NULL) { 5272 new_reference(search.rbtdb, node, locktype); 5273 *nodep = node; 5274 } 5275 5276 if (NEGATIVE(found)) { 5277 /* 5278 * We found a negative cache entry. 5279 */ 5280 if (NXDOMAIN(found)) { 5281 result = DNS_R_NCACHENXDOMAIN; 5282 } else { 5283 result = DNS_R_NCACHENXRRSET; 5284 } 5285 } else if (type != found->type && type != dns_rdatatype_any && 5286 found->type == dns_rdatatype_cname) 5287 { 5288 /* 5289 * We weren't doing an ANY query and we found a CNAME instead 5290 * of the type we were looking for, so we need to indicate 5291 * that result to the caller. 5292 */ 5293 result = DNS_R_CNAME; 5294 } else { 5295 /* 5296 * An ordinary successful query! 5297 */ 5298 result = ISC_R_SUCCESS; 5299 } 5300 5301 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN || 5302 result == DNS_R_NCACHENXRRSET) 5303 { 5304 bind_rdataset(search.rbtdb, node, found, search.now, locktype, 5305 rdataset); 5306 if (need_headerupdate(found, search.now)) { 5307 update = found; 5308 } 5309 if (!NEGATIVE(found) && foundsig != NULL) { 5310 bind_rdataset(search.rbtdb, node, foundsig, search.now, 5311 locktype, sigrdataset); 5312 if (need_headerupdate(foundsig, search.now)) { 5313 updatesig = foundsig; 5314 } 5315 } 5316 } 5317 5318 node_exit: 5319 if ((update != NULL || updatesig != NULL) && 5320 locktype != isc_rwlocktype_write) 5321 { 5322 NODE_UNLOCK(lock, locktype); 5323 NODE_LOCK(lock, isc_rwlocktype_write); 5324 locktype = isc_rwlocktype_write; 5325 POST(locktype); 5326 } 5327 if (update != NULL && need_headerupdate(update, search.now)) { 5328 update_header(search.rbtdb, update, search.now); 5329 } 5330 if (updatesig != NULL && need_headerupdate(updatesig, search.now)) { 5331 update_header(search.rbtdb, updatesig, search.now); 5332 } 5333 5334 NODE_UNLOCK(lock, locktype); 5335 5336 tree_exit: 5337 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); 5338 5339 /* 5340 * If we found a zonecut but aren't going to use it, we have to 5341 * let go of it. 5342 */ 5343 if (search.need_cleanup) { 5344 node = search.zonecut; 5345 INSIST(node != NULL); 5346 lock = &(search.rbtdb->node_locks[node->locknum].lock); 5347 5348 NODE_LOCK(lock, isc_rwlocktype_read); 5349 decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read, 5350 isc_rwlocktype_none, false); 5351 NODE_UNLOCK(lock, isc_rwlocktype_read); 5352 } 5353 5354 dns_rbtnodechain_reset(&search.chain); 5355 5356 update_cachestats(search.rbtdb, result); 5357 return (result); 5358 } 5359 5360 static isc_result_t 5361 cache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options, 5362 isc_stdtime_t now, dns_dbnode_t **nodep, 5363 dns_name_t *foundname, dns_name_t *dcname, 5364 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) { 5365 dns_rbtnode_t *node = NULL; 5366 nodelock_t *lock; 5367 isc_result_t result; 5368 rbtdb_search_t search; 5369 rdatasetheader_t *header, *header_prev, *header_next; 5370 rdatasetheader_t *found, *foundsig; 5371 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA; 5372 isc_rwlocktype_t locktype; 5373 bool dcnull = (dcname == NULL); 5374 5375 search.rbtdb = (dns_rbtdb_t *)db; 5376 5377 REQUIRE(VALID_RBTDB(search.rbtdb)); 5378 5379 if (now == 0) { 5380 isc_stdtime_get(&now); 5381 } 5382 5383 search.rbtversion = NULL; 5384 search.serial = 1; 5385 search.options = options; 5386 search.copy_name = false; 5387 search.need_cleanup = false; 5388 search.wild = false; 5389 search.zonecut = NULL; 5390 dns_fixedname_init(&search.zonecut_name); 5391 dns_rbtnodechain_init(&search.chain); 5392 search.now = now; 5393 5394 if (dcnull) { 5395 dcname = foundname; 5396 } 5397 5398 if ((options & DNS_DBFIND_NOEXACT) != 0) { 5399 rbtoptions |= DNS_RBTFIND_NOEXACT; 5400 } 5401 5402 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); 5403 5404 /* 5405 * Search down from the root of the tree. 5406 */ 5407 result = dns_rbt_findnode(search.rbtdb->tree, name, dcname, &node, 5408 &search.chain, rbtoptions, NULL, &search); 5409 5410 if (result == DNS_R_PARTIALMATCH) { 5411 result = find_deepest_zonecut(&search, node, nodep, foundname, 5412 rdataset, sigrdataset); 5413 goto tree_exit; 5414 } else if (result != ISC_R_SUCCESS) { 5415 goto tree_exit; 5416 } else if (!dcnull) { 5417 dns_name_copynf(dcname, foundname); 5418 } 5419 5420 /* 5421 * We now go looking for an NS rdataset at the node. 5422 */ 5423 5424 lock = &(search.rbtdb->node_locks[node->locknum].lock); 5425 locktype = isc_rwlocktype_read; 5426 NODE_LOCK(lock, locktype); 5427 5428 found = NULL; 5429 foundsig = NULL; 5430 header_prev = NULL; 5431 for (header = node->data; header != NULL; header = header_next) { 5432 header_next = header->next; 5433 if (check_stale_header(node, header, &locktype, lock, &search, 5434 &header_prev)) 5435 { 5436 /* 5437 * The function dns_rbt_findnode found us the a matching 5438 * node for 'name' and stored the result in 'dcname'. 5439 * This is the deepest known zonecut in our database. 5440 * However, this node may be stale and if serve-stale 5441 * is not enabled (in other words 'stale-answer-enable' 5442 * is set to no), this node may not be used as a 5443 * zonecut we know about. If so, find the deepest 5444 * zonecut from this node up and return that instead. 5445 */ 5446 NODE_UNLOCK(lock, locktype); 5447 result = find_deepest_zonecut(&search, node, nodep, 5448 foundname, rdataset, 5449 sigrdataset); 5450 dns_name_copynf(foundname, dcname); 5451 goto tree_exit; 5452 } else if (EXISTS(header) && !ANCIENT(header)) { 5453 /* 5454 * If we found a type we were looking for, remember 5455 * it. 5456 */ 5457 if (header->type == dns_rdatatype_ns) { 5458 /* 5459 * Remember a NS rdataset even if we're 5460 * not specifically looking for it, because 5461 * we might need it later. 5462 */ 5463 found = header; 5464 } else if (header->type == RBTDB_RDATATYPE_SIGNS) { 5465 /* 5466 * If we need the NS rdataset, we'll also 5467 * need its signature. 5468 */ 5469 foundsig = header; 5470 } 5471 header_prev = header; 5472 } else { 5473 header_prev = header; 5474 } 5475 } 5476 5477 if (found == NULL) { 5478 /* 5479 * No NS records here. 5480 */ 5481 NODE_UNLOCK(lock, locktype); 5482 result = find_deepest_zonecut(&search, node, nodep, foundname, 5483 rdataset, sigrdataset); 5484 goto tree_exit; 5485 } 5486 5487 if (nodep != NULL) { 5488 new_reference(search.rbtdb, node, locktype); 5489 *nodep = node; 5490 } 5491 5492 bind_rdataset(search.rbtdb, node, found, search.now, locktype, 5493 rdataset); 5494 if (foundsig != NULL) { 5495 bind_rdataset(search.rbtdb, node, foundsig, search.now, 5496 locktype, sigrdataset); 5497 } 5498 5499 if (need_headerupdate(found, search.now) || 5500 (foundsig != NULL && need_headerupdate(foundsig, search.now))) 5501 { 5502 if (locktype != isc_rwlocktype_write) { 5503 NODE_UNLOCK(lock, locktype); 5504 NODE_LOCK(lock, isc_rwlocktype_write); 5505 locktype = isc_rwlocktype_write; 5506 POST(locktype); 5507 } 5508 if (need_headerupdate(found, search.now)) { 5509 update_header(search.rbtdb, found, search.now); 5510 } 5511 if (foundsig != NULL && need_headerupdate(foundsig, search.now)) 5512 { 5513 update_header(search.rbtdb, foundsig, search.now); 5514 } 5515 } 5516 5517 NODE_UNLOCK(lock, locktype); 5518 5519 tree_exit: 5520 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); 5521 5522 INSIST(!search.need_cleanup); 5523 5524 dns_rbtnodechain_reset(&search.chain); 5525 5526 if (result == DNS_R_DELEGATION) { 5527 result = ISC_R_SUCCESS; 5528 } 5529 5530 return (result); 5531 } 5532 5533 static void 5534 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) { 5535 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5536 dns_rbtnode_t *node = (dns_rbtnode_t *)source; 5537 5538 REQUIRE(VALID_RBTDB(rbtdb)); 5539 REQUIRE(targetp != NULL && *targetp == NULL); 5540 5541 isc_refcount_increment(&node->references); 5542 5543 *targetp = source; 5544 } 5545 5546 static void 5547 detachnode(dns_db_t *db, dns_dbnode_t **targetp) { 5548 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5549 dns_rbtnode_t *node; 5550 bool want_free = false; 5551 bool inactive = false; 5552 rbtdb_nodelock_t *nodelock; 5553 5554 REQUIRE(VALID_RBTDB(rbtdb)); 5555 REQUIRE(targetp != NULL && *targetp != NULL); 5556 5557 node = (dns_rbtnode_t *)(*targetp); 5558 nodelock = &rbtdb->node_locks[node->locknum]; 5559 5560 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read); 5561 5562 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read, 5563 isc_rwlocktype_none, false)) 5564 { 5565 if (isc_refcount_current(&nodelock->references) == 0 && 5566 nodelock->exiting) 5567 { 5568 inactive = true; 5569 } 5570 } 5571 5572 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read); 5573 5574 *targetp = NULL; 5575 5576 if (inactive) { 5577 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 5578 rbtdb->active--; 5579 if (rbtdb->active == 0) { 5580 want_free = true; 5581 } 5582 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 5583 if (want_free) { 5584 char buf[DNS_NAME_FORMATSIZE]; 5585 if (dns_name_dynamic(&rbtdb->common.origin)) { 5586 dns_name_format(&rbtdb->common.origin, buf, 5587 sizeof(buf)); 5588 } else { 5589 strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 5590 } 5591 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 5592 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 5593 "calling free_rbtdb(%s)", buf); 5594 free_rbtdb(rbtdb, true, NULL); 5595 } 5596 } 5597 } 5598 5599 static isc_result_t 5600 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) { 5601 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5602 dns_rbtnode_t *rbtnode = node; 5603 rdatasetheader_t *header; 5604 bool force_expire = false; 5605 /* 5606 * These are the category and module used by the cache cleaner. 5607 */ 5608 bool log = false; 5609 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE; 5610 isc_logmodule_t *module = DNS_LOGMODULE_CACHE; 5611 int level = ISC_LOG_DEBUG(2); 5612 char printname[DNS_NAME_FORMATSIZE]; 5613 5614 REQUIRE(VALID_RBTDB(rbtdb)); 5615 5616 /* 5617 * Caller must hold a tree lock. 5618 */ 5619 5620 if (now == 0) { 5621 isc_stdtime_get(&now); 5622 } 5623 5624 if (isc_mem_isovermem(rbtdb->common.mctx)) { 5625 /* 5626 * Force expire with 25% probability. 5627 * XXXDCL Could stand to have a better policy, like LRU. 5628 */ 5629 force_expire = (rbtnode->down == NULL && 5630 (isc_random32() % 4) == 0); 5631 5632 /* 5633 * Note that 'log' can be true IFF overmem is also true. 5634 * overmem can currently only be true for cache 5635 * databases -- hence all of the "overmem cache" log strings. 5636 */ 5637 log = isc_log_wouldlog(dns_lctx, level); 5638 if (log) { 5639 isc_log_write( 5640 dns_lctx, category, module, level, 5641 "overmem cache: %s %s", 5642 force_expire ? "FORCE" : "check", 5643 dns_rbt_formatnodename(rbtnode, printname, 5644 sizeof(printname))); 5645 } 5646 } 5647 5648 /* 5649 * We may not need write access, but this code path is not performance 5650 * sensitive, so it should be okay to always lock as a writer. 5651 */ 5652 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 5653 isc_rwlocktype_write); 5654 5655 for (header = rbtnode->data; header != NULL; header = header->next) { 5656 if (header->rdh_ttl + rbtdb->serve_stale_ttl <= 5657 now - RBTDB_VIRTUAL) 5658 { 5659 /* 5660 * We don't check if refcurrent(rbtnode) == 0 and try 5661 * to free like we do in cache_find(), because 5662 * refcurrent(rbtnode) must be non-zero. This is so 5663 * because 'node' is an argument to the function. 5664 */ 5665 mark_header_ancient(rbtdb, header); 5666 if (log) { 5667 isc_log_write(dns_lctx, category, module, level, 5668 "overmem cache: ancient %s", 5669 printname); 5670 } 5671 } else if (force_expire) { 5672 if (!RETAIN(header)) { 5673 set_ttl(rbtdb, header, 0); 5674 mark_header_ancient(rbtdb, header); 5675 } else if (log) { 5676 isc_log_write(dns_lctx, category, module, level, 5677 "overmem cache: " 5678 "reprieve by RETAIN() %s", 5679 printname); 5680 } 5681 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log) { 5682 isc_log_write(dns_lctx, category, module, level, 5683 "overmem cache: saved %s", printname); 5684 } 5685 } 5686 5687 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 5688 isc_rwlocktype_write); 5689 5690 return (ISC_R_SUCCESS); 5691 } 5692 5693 static void 5694 overmem(dns_db_t *db, bool over) { 5695 /* This is an empty callback. See adb.c:water() */ 5696 5697 UNUSED(db); 5698 UNUSED(over); 5699 5700 return; 5701 } 5702 5703 static void 5704 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) { 5705 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5706 dns_rbtnode_t *rbtnode = node; 5707 bool first; 5708 uint32_t refs; 5709 5710 REQUIRE(VALID_RBTDB(rbtdb)); 5711 5712 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 5713 isc_rwlocktype_read); 5714 5715 refs = isc_refcount_current(&rbtnode->references); 5716 fprintf(out, "node %p, %" PRIu32 " references, locknum = %u\n", rbtnode, 5717 refs, rbtnode->locknum); 5718 if (rbtnode->data != NULL) { 5719 rdatasetheader_t *current, *top_next; 5720 5721 for (current = rbtnode->data; current != NULL; 5722 current = top_next) 5723 { 5724 top_next = current->next; 5725 first = true; 5726 fprintf(out, "\ttype %u", current->type); 5727 do { 5728 uint_least16_t attributes = atomic_load_acquire( 5729 ¤t->attributes); 5730 if (!first) { 5731 fprintf(out, "\t"); 5732 } 5733 first = false; 5734 fprintf(out, 5735 "\tserial = %lu, ttl = %u, " 5736 "trust = %u, attributes = %" PRIuLEAST16 5737 ", " 5738 "resign = %u\n", 5739 (unsigned long)current->serial, 5740 current->rdh_ttl, current->trust, 5741 attributes, 5742 (current->resign << 1) | 5743 current->resign_lsb); 5744 current = current->down; 5745 } while (current != NULL); 5746 } 5747 } else { 5748 fprintf(out, "(empty)\n"); 5749 } 5750 5751 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 5752 isc_rwlocktype_read); 5753 } 5754 5755 static isc_result_t 5756 createiterator(dns_db_t *db, unsigned int options, 5757 dns_dbiterator_t **iteratorp) { 5758 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5759 rbtdb_dbiterator_t *rbtdbiter; 5760 5761 REQUIRE(VALID_RBTDB(rbtdb)); 5762 5763 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter)); 5764 5765 rbtdbiter->common.methods = &dbiterator_methods; 5766 rbtdbiter->common.db = NULL; 5767 dns_db_attach(db, &rbtdbiter->common.db); 5768 rbtdbiter->common.relative_names = ((options & DNS_DB_RELATIVENAMES) != 5769 0); 5770 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC; 5771 rbtdbiter->common.cleaning = false; 5772 rbtdbiter->paused = true; 5773 rbtdbiter->tree_locked = isc_rwlocktype_none; 5774 rbtdbiter->result = ISC_R_SUCCESS; 5775 dns_fixedname_init(&rbtdbiter->name); 5776 dns_fixedname_init(&rbtdbiter->origin); 5777 rbtdbiter->node = NULL; 5778 rbtdbiter->delcnt = 0; 5779 rbtdbiter->nsec3only = ((options & DNS_DB_NSEC3ONLY) != 0); 5780 rbtdbiter->nonsec3 = ((options & DNS_DB_NONSEC3) != 0); 5781 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions)); 5782 dns_rbtnodechain_init(&rbtdbiter->chain); 5783 dns_rbtnodechain_init(&rbtdbiter->nsec3chain); 5784 if (rbtdbiter->nsec3only) { 5785 rbtdbiter->current = &rbtdbiter->nsec3chain; 5786 } else { 5787 rbtdbiter->current = &rbtdbiter->chain; 5788 } 5789 5790 *iteratorp = (dns_dbiterator_t *)rbtdbiter; 5791 5792 return (ISC_R_SUCCESS); 5793 } 5794 5795 static isc_result_t 5796 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 5797 dns_rdatatype_t type, dns_rdatatype_t covers, 5798 isc_stdtime_t now, dns_rdataset_t *rdataset, 5799 dns_rdataset_t *sigrdataset) { 5800 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5801 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 5802 rdatasetheader_t *header, *header_next, *found, *foundsig; 5803 rbtdb_serial_t serial; 5804 rbtdb_version_t *rbtversion = version; 5805 bool close_version = false; 5806 rbtdb_rdatatype_t matchtype, sigmatchtype; 5807 5808 REQUIRE(VALID_RBTDB(rbtdb)); 5809 REQUIRE(type != dns_rdatatype_any); 5810 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 5811 5812 if (rbtversion == NULL) { 5813 currentversion(db, (dns_dbversion_t **)(void *)(&rbtversion)); 5814 close_version = true; 5815 } 5816 serial = rbtversion->serial; 5817 now = 0; 5818 5819 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 5820 isc_rwlocktype_read); 5821 5822 found = NULL; 5823 foundsig = NULL; 5824 matchtype = RBTDB_RDATATYPE_VALUE(type, covers); 5825 if (covers == 0) { 5826 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type); 5827 } else { 5828 sigmatchtype = 0; 5829 } 5830 5831 for (header = rbtnode->data; header != NULL; header = header_next) { 5832 header_next = header->next; 5833 do { 5834 if (header->serial <= serial && !IGNORE(header)) { 5835 /* 5836 * Is this a "this rdataset doesn't 5837 * exist" record? 5838 */ 5839 if (NONEXISTENT(header)) { 5840 header = NULL; 5841 } 5842 break; 5843 } else { 5844 header = header->down; 5845 } 5846 } while (header != NULL); 5847 if (header != NULL) { 5848 /* 5849 * We have an active, extant rdataset. If it's a 5850 * type we're looking for, remember it. 5851 */ 5852 if (header->type == matchtype) { 5853 found = header; 5854 if (foundsig != NULL) { 5855 break; 5856 } 5857 } else if (header->type == sigmatchtype) { 5858 foundsig = header; 5859 if (found != NULL) { 5860 break; 5861 } 5862 } 5863 } 5864 } 5865 if (found != NULL) { 5866 bind_rdataset(rbtdb, rbtnode, found, now, isc_rwlocktype_read, 5867 rdataset); 5868 if (foundsig != NULL) { 5869 bind_rdataset(rbtdb, rbtnode, foundsig, now, 5870 isc_rwlocktype_read, sigrdataset); 5871 } 5872 } 5873 5874 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 5875 isc_rwlocktype_read); 5876 5877 if (close_version) { 5878 closeversion(db, (dns_dbversion_t **)(void *)(&rbtversion), 5879 false); 5880 } 5881 5882 if (found == NULL) { 5883 return (ISC_R_NOTFOUND); 5884 } 5885 5886 return (ISC_R_SUCCESS); 5887 } 5888 5889 static isc_result_t 5890 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 5891 dns_rdatatype_t type, dns_rdatatype_t covers, 5892 isc_stdtime_t now, dns_rdataset_t *rdataset, 5893 dns_rdataset_t *sigrdataset) { 5894 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5895 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 5896 rdatasetheader_t *header, *header_next, *found, *foundsig; 5897 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype; 5898 isc_result_t result; 5899 nodelock_t *lock; 5900 isc_rwlocktype_t locktype; 5901 5902 REQUIRE(VALID_RBTDB(rbtdb)); 5903 REQUIRE(type != dns_rdatatype_any); 5904 5905 UNUSED(version); 5906 5907 result = ISC_R_SUCCESS; 5908 5909 if (now == 0) { 5910 isc_stdtime_get(&now); 5911 } 5912 5913 lock = &rbtdb->node_locks[rbtnode->locknum].lock; 5914 locktype = isc_rwlocktype_read; 5915 NODE_LOCK(lock, locktype); 5916 5917 found = NULL; 5918 foundsig = NULL; 5919 matchtype = RBTDB_RDATATYPE_VALUE(type, covers); 5920 negtype = RBTDB_RDATATYPE_VALUE(0, type); 5921 if (covers == 0) { 5922 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type); 5923 } else { 5924 sigmatchtype = 0; 5925 } 5926 5927 for (header = rbtnode->data; header != NULL; header = header_next) { 5928 header_next = header->next; 5929 if (!ACTIVE(header, now)) { 5930 if ((header->rdh_ttl + rbtdb->serve_stale_ttl < 5931 now - RBTDB_VIRTUAL) && 5932 (locktype == isc_rwlocktype_write || 5933 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) 5934 { 5935 /* 5936 * We update the node's status only when we 5937 * can get write access. 5938 */ 5939 locktype = isc_rwlocktype_write; 5940 5941 /* 5942 * We don't check if refcurrent(rbtnode) == 0 5943 * and try to free like we do in cache_find(), 5944 * because refcurrent(rbtnode) must be 5945 * non-zero. This is so because 'node' is an 5946 * argument to the function. 5947 */ 5948 mark_header_ancient(rbtdb, header); 5949 } 5950 } else if (EXISTS(header) && !ANCIENT(header)) { 5951 if (header->type == matchtype) { 5952 found = header; 5953 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY || 5954 header->type == negtype) 5955 { 5956 found = header; 5957 } else if (header->type == sigmatchtype) { 5958 foundsig = header; 5959 } 5960 } 5961 } 5962 if (found != NULL) { 5963 bind_rdataset(rbtdb, rbtnode, found, now, locktype, rdataset); 5964 if (!NEGATIVE(found) && foundsig != NULL) { 5965 bind_rdataset(rbtdb, rbtnode, foundsig, now, locktype, 5966 sigrdataset); 5967 } 5968 } 5969 5970 NODE_UNLOCK(lock, locktype); 5971 5972 if (found == NULL) { 5973 return (ISC_R_NOTFOUND); 5974 } 5975 5976 if (NEGATIVE(found)) { 5977 /* 5978 * We found a negative cache entry. 5979 */ 5980 if (NXDOMAIN(found)) { 5981 result = DNS_R_NCACHENXDOMAIN; 5982 } else { 5983 result = DNS_R_NCACHENXRRSET; 5984 } 5985 } 5986 5987 update_cachestats(rbtdb, result); 5988 5989 return (result); 5990 } 5991 5992 static isc_result_t 5993 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 5994 unsigned int options, isc_stdtime_t now, 5995 dns_rdatasetiter_t **iteratorp) { 5996 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 5997 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 5998 rbtdb_version_t *rbtversion = version; 5999 rbtdb_rdatasetiter_t *iterator; 6000 6001 REQUIRE(VALID_RBTDB(rbtdb)); 6002 6003 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator)); 6004 6005 if ((db->attributes & DNS_DBATTR_CACHE) == 0) { 6006 now = 0; 6007 if (rbtversion == NULL) { 6008 currentversion( 6009 db, (dns_dbversion_t **)(void *)(&rbtversion)); 6010 } else { 6011 INSIST(rbtversion->rbtdb == rbtdb); 6012 6013 (void)isc_refcount_increment(&rbtversion->references); 6014 } 6015 } else { 6016 if (now == 0) { 6017 isc_stdtime_get(&now); 6018 } 6019 rbtversion = NULL; 6020 } 6021 6022 iterator->common.magic = DNS_RDATASETITER_MAGIC; 6023 iterator->common.methods = &rdatasetiter_methods; 6024 iterator->common.db = db; 6025 iterator->common.node = node; 6026 iterator->common.version = (dns_dbversion_t *)rbtversion; 6027 iterator->common.options = options; 6028 iterator->common.now = now; 6029 6030 isc_refcount_increment(&rbtnode->references); 6031 6032 iterator->current = NULL; 6033 6034 *iteratorp = (dns_rdatasetiter_t *)iterator; 6035 6036 return (ISC_R_SUCCESS); 6037 } 6038 6039 static bool 6040 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) { 6041 rdatasetheader_t *header, *header_next; 6042 bool cname, other_data; 6043 dns_rdatatype_t rdtype; 6044 6045 /* 6046 * The caller must hold the node lock. 6047 */ 6048 6049 /* 6050 * Look for CNAME and "other data" rdatasets active in our version. 6051 */ 6052 cname = false; 6053 other_data = false; 6054 for (header = node->data; header != NULL; header = header_next) { 6055 header_next = header->next; 6056 if (header->type == dns_rdatatype_cname) { 6057 /* 6058 * Look for an active extant CNAME. 6059 */ 6060 do { 6061 if (header->serial <= serial && !IGNORE(header)) 6062 { 6063 /* 6064 * Is this a "this rdataset doesn't 6065 * exist" record? 6066 */ 6067 if (NONEXISTENT(header)) { 6068 header = NULL; 6069 } 6070 break; 6071 } else { 6072 header = header->down; 6073 } 6074 } while (header != NULL); 6075 if (header != NULL) { 6076 cname = true; 6077 } 6078 } else { 6079 /* 6080 * Look for active extant "other data". 6081 * 6082 * "Other data" is any rdataset whose type is not 6083 * KEY, NSEC, SIG or RRSIG. 6084 */ 6085 rdtype = RBTDB_RDATATYPE_BASE(header->type); 6086 if (rdtype != dns_rdatatype_key && 6087 rdtype != dns_rdatatype_sig && 6088 rdtype != dns_rdatatype_nsec && 6089 rdtype != dns_rdatatype_rrsig) 6090 { 6091 /* 6092 * Is it active and extant? 6093 */ 6094 do { 6095 if (header->serial <= serial && 6096 !IGNORE(header)) 6097 { 6098 /* 6099 * Is this a "this rdataset 6100 * doesn't exist" record? 6101 */ 6102 if (NONEXISTENT(header)) { 6103 header = NULL; 6104 } 6105 break; 6106 } else { 6107 header = header->down; 6108 } 6109 } while (header != NULL); 6110 if (header != NULL) { 6111 other_data = true; 6112 } 6113 } 6114 } 6115 } 6116 6117 if (cname && other_data) { 6118 return (true); 6119 } 6120 6121 return (false); 6122 } 6123 6124 static void 6125 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) { 6126 INSIST(!IS_CACHE(rbtdb)); 6127 INSIST(newheader->heap_index == 0); 6128 INSIST(!ISC_LINK_LINKED(newheader, link)); 6129 6130 isc_heap_insert(rbtdb->heaps[idx], newheader); 6131 } 6132 6133 /* 6134 * node write lock must be held. 6135 */ 6136 static void 6137 resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version, 6138 rdatasetheader_t *header) { 6139 /* 6140 * Remove the old header from the heap 6141 */ 6142 if (header != NULL && header->heap_index != 0) { 6143 isc_heap_delete(rbtdb->heaps[header->node->locknum], 6144 header->heap_index); 6145 header->heap_index = 0; 6146 if (version != NULL) { 6147 new_reference(rbtdb, header->node, 6148 isc_rwlocktype_write); 6149 ISC_LIST_APPEND(version->resigned_list, header, link); 6150 } 6151 } 6152 } 6153 6154 static uint64_t 6155 recordsize(rdatasetheader_t *header, unsigned int namelen) { 6156 return (dns_rdataslab_rdatasize((unsigned char *)header, 6157 sizeof(*header)) + 6158 sizeof(dns_ttl_t) + sizeof(dns_rdatatype_t) + 6159 sizeof(dns_rdataclass_t) + namelen); 6160 } 6161 6162 static void 6163 update_recordsandxfrsize(bool add, rbtdb_version_t *rbtversion, 6164 rdatasetheader_t *header, unsigned int namelen) { 6165 unsigned char *hdr = (unsigned char *)header; 6166 size_t hdrsize = sizeof(*header); 6167 6168 RWLOCK(&rbtversion->rwlock, isc_rwlocktype_write); 6169 if (add) { 6170 rbtversion->records += dns_rdataslab_count(hdr, hdrsize); 6171 rbtversion->xfrsize += recordsize(header, namelen); 6172 } else { 6173 rbtversion->records -= dns_rdataslab_count(hdr, hdrsize); 6174 rbtversion->xfrsize -= recordsize(header, namelen); 6175 } 6176 RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_write); 6177 } 6178 6179 /* 6180 * write lock on rbtnode must be held. 6181 */ 6182 static isc_result_t 6183 add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, const dns_name_t *nodename, 6184 rbtdb_version_t *rbtversion, rdatasetheader_t *newheader, 6185 unsigned int options, bool loading, dns_rdataset_t *addedrdataset, 6186 isc_stdtime_t now) { 6187 rbtdb_changed_t *changed = NULL; 6188 rdatasetheader_t *topheader = NULL, *topheader_prev = NULL; 6189 rdatasetheader_t *header = NULL, *sigheader = NULL; 6190 unsigned char *merged = NULL; 6191 isc_result_t result; 6192 bool header_nx; 6193 bool newheader_nx; 6194 bool merge; 6195 dns_rdatatype_t rdtype, covers; 6196 rbtdb_rdatatype_t negtype, sigtype; 6197 dns_trust_t trust; 6198 int idx; 6199 6200 /* 6201 * Add an rdatasetheader_t to a node. 6202 */ 6203 6204 /* 6205 * Caller must be holding the node lock. 6206 */ 6207 6208 if ((options & DNS_DBADD_MERGE) != 0) { 6209 REQUIRE(rbtversion != NULL); 6210 merge = true; 6211 } else { 6212 merge = false; 6213 } 6214 6215 if ((options & DNS_DBADD_FORCE) != 0) { 6216 trust = dns_trust_ultimate; 6217 } else { 6218 trust = newheader->trust; 6219 } 6220 6221 if (rbtversion != NULL && !loading) { 6222 /* 6223 * We always add a changed record, even if no changes end up 6224 * being made to this node, because it's harmless and 6225 * simplifies the code. 6226 */ 6227 changed = add_changed(rbtdb, rbtversion, rbtnode); 6228 if (changed == NULL) { 6229 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 6230 return (ISC_R_NOMEMORY); 6231 } 6232 } 6233 6234 newheader_nx = NONEXISTENT(newheader) ? true : false; 6235 topheader_prev = NULL; 6236 sigheader = NULL; 6237 negtype = 0; 6238 if (rbtversion == NULL && !newheader_nx) { 6239 rdtype = RBTDB_RDATATYPE_BASE(newheader->type); 6240 covers = RBTDB_RDATATYPE_EXT(newheader->type); 6241 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, covers); 6242 if (NEGATIVE(newheader)) { 6243 /* 6244 * We're adding a negative cache entry. 6245 */ 6246 if (covers == dns_rdatatype_any) { 6247 /* 6248 * If we're adding an negative cache entry 6249 * which covers all types (NXDOMAIN, 6250 * NODATA(QTYPE=ANY)), 6251 * 6252 * We make all other data ancient so that the 6253 * only rdataset that can be found at this 6254 * node is the negative cache entry. 6255 */ 6256 for (topheader = rbtnode->data; 6257 topheader != NULL; 6258 topheader = topheader->next) 6259 { 6260 set_ttl(rbtdb, topheader, 0); 6261 mark_header_ancient(rbtdb, topheader); 6262 } 6263 goto find_header; 6264 } 6265 /* 6266 * Otherwise look for any RRSIGs of the given 6267 * type so they can be marked ancient later. 6268 */ 6269 for (topheader = rbtnode->data; topheader != NULL; 6270 topheader = topheader->next) 6271 { 6272 if (topheader->type == sigtype) { 6273 sigheader = topheader; 6274 } 6275 } 6276 negtype = RBTDB_RDATATYPE_VALUE(covers, 0); 6277 } else { 6278 /* 6279 * We're adding something that isn't a 6280 * negative cache entry. Look for an extant 6281 * non-ancient NXDOMAIN/NODATA(QTYPE=ANY) negative 6282 * cache entry. If we're adding an RRSIG, also 6283 * check for an extant non-ancient NODATA ncache 6284 * entry which covers the same type as the RRSIG. 6285 */ 6286 for (topheader = rbtnode->data; topheader != NULL; 6287 topheader = topheader->next) 6288 { 6289 if ((topheader->type == 6290 RBTDB_RDATATYPE_NCACHEANY) || 6291 (newheader->type == sigtype && 6292 topheader->type == 6293 RBTDB_RDATATYPE_VALUE(0, covers))) 6294 { 6295 break; 6296 } 6297 } 6298 if (topheader != NULL && EXISTS(topheader) && 6299 ACTIVE(topheader, now)) 6300 { 6301 /* 6302 * Found one. 6303 */ 6304 if (trust < topheader->trust) { 6305 /* 6306 * The NXDOMAIN/NODATA(QTYPE=ANY) 6307 * is more trusted. 6308 */ 6309 free_rdataset(rbtdb, rbtdb->common.mctx, 6310 newheader); 6311 if (addedrdataset != NULL) { 6312 bind_rdataset( 6313 rbtdb, rbtnode, 6314 topheader, now, 6315 isc_rwlocktype_write, 6316 addedrdataset); 6317 } 6318 return (DNS_R_UNCHANGED); 6319 } 6320 /* 6321 * The new rdataset is better. Expire the 6322 * ncache entry. 6323 */ 6324 set_ttl(rbtdb, topheader, 0); 6325 mark_header_ancient(rbtdb, topheader); 6326 topheader = NULL; 6327 goto find_header; 6328 } 6329 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype); 6330 } 6331 } 6332 6333 for (topheader = rbtnode->data; topheader != NULL; 6334 topheader = topheader->next) 6335 { 6336 if (topheader->type == newheader->type || 6337 topheader->type == negtype) 6338 { 6339 break; 6340 } 6341 topheader_prev = topheader; 6342 } 6343 6344 find_header: 6345 /* 6346 * If header isn't NULL, we've found the right type. There may be 6347 * IGNORE rdatasets between the top of the chain and the first real 6348 * data. We skip over them. 6349 */ 6350 header = topheader; 6351 while (header != NULL && IGNORE(header)) { 6352 header = header->down; 6353 } 6354 if (header != NULL) { 6355 header_nx = NONEXISTENT(header) ? true : false; 6356 6357 /* 6358 * Deleting an already non-existent rdataset has no effect. 6359 */ 6360 if (header_nx && newheader_nx) { 6361 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 6362 return (DNS_R_UNCHANGED); 6363 } 6364 6365 /* 6366 * Trying to add an rdataset with lower trust to a cache 6367 * DB has no effect, provided that the cache data isn't 6368 * stale. If the cache data is stale, new lower trust 6369 * data will supersede it below. Unclear what the best 6370 * policy is here. 6371 */ 6372 if (rbtversion == NULL && trust < header->trust && 6373 (ACTIVE(header, now) || header_nx)) 6374 { 6375 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 6376 if (addedrdataset != NULL) { 6377 bind_rdataset(rbtdb, rbtnode, header, now, 6378 isc_rwlocktype_write, 6379 addedrdataset); 6380 } 6381 return (DNS_R_UNCHANGED); 6382 } 6383 6384 /* 6385 * Don't merge if a nonexistent rdataset is involved. 6386 */ 6387 if (merge && (header_nx || newheader_nx)) { 6388 merge = false; 6389 } 6390 6391 /* 6392 * If 'merge' is true, we'll try to create a new rdataset 6393 * that is the union of 'newheader' and 'header'. 6394 */ 6395 if (merge) { 6396 unsigned int flags = 0; 6397 INSIST(rbtversion->serial >= header->serial); 6398 merged = NULL; 6399 result = ISC_R_SUCCESS; 6400 6401 if ((options & DNS_DBADD_EXACT) != 0) { 6402 flags |= DNS_RDATASLAB_EXACT; 6403 } 6404 /* 6405 * TTL use here is irrelevant to the cache; 6406 * merge is only done with zonedbs. 6407 */ 6408 if ((options & DNS_DBADD_EXACTTTL) != 0 && 6409 newheader->rdh_ttl != header->rdh_ttl) 6410 { 6411 result = DNS_R_NOTEXACT; 6412 } else if (newheader->rdh_ttl != header->rdh_ttl) { 6413 flags |= DNS_RDATASLAB_FORCE; 6414 } 6415 if (result == ISC_R_SUCCESS) { 6416 result = dns_rdataslab_merge( 6417 (unsigned char *)header, 6418 (unsigned char *)newheader, 6419 (unsigned int)(sizeof(*newheader)), 6420 rbtdb->common.mctx, 6421 rbtdb->common.rdclass, 6422 (dns_rdatatype_t)header->type, flags, 6423 &merged); 6424 } 6425 if (result == ISC_R_SUCCESS) { 6426 /* 6427 * If 'header' has the same serial number as 6428 * we do, we could clean it up now if we knew 6429 * that our caller had no references to it. 6430 * We don't know this, however, so we leave it 6431 * alone. It will get cleaned up when 6432 * clean_zone_node() runs. 6433 */ 6434 free_rdataset(rbtdb, rbtdb->common.mctx, 6435 newheader); 6436 newheader = (rdatasetheader_t *)merged; 6437 init_rdataset(rbtdb, newheader); 6438 update_newheader(newheader, header); 6439 if (loading && RESIGN(newheader) && 6440 RESIGN(header) && 6441 resign_sooner(header, newheader)) 6442 { 6443 newheader->resign = header->resign; 6444 newheader->resign_lsb = 6445 header->resign_lsb; 6446 } 6447 } else { 6448 free_rdataset(rbtdb, rbtdb->common.mctx, 6449 newheader); 6450 return (result); 6451 } 6452 } 6453 /* 6454 * Don't replace existing NS, A and AAAA RRsets in the 6455 * cache if they are already exist. This prevents named 6456 * being locked to old servers. Don't lower trust of 6457 * existing record if the update is forced. Nothing 6458 * special to be done w.r.t stale data; it gets replaced 6459 * normally further down. 6460 */ 6461 if (IS_CACHE(rbtdb) && ACTIVE(header, now) && 6462 header->type == dns_rdatatype_ns && !header_nx && 6463 !newheader_nx && header->trust >= newheader->trust && 6464 dns_rdataslab_equalx((unsigned char *)header, 6465 (unsigned char *)newheader, 6466 (unsigned int)(sizeof(*newheader)), 6467 rbtdb->common.rdclass, 6468 (dns_rdatatype_t)header->type)) 6469 { 6470 /* 6471 * Honour the new ttl if it is less than the 6472 * older one. 6473 */ 6474 if (header->rdh_ttl > newheader->rdh_ttl) { 6475 set_ttl(rbtdb, header, newheader->rdh_ttl); 6476 } 6477 if (header->noqname == NULL && 6478 newheader->noqname != NULL) 6479 { 6480 header->noqname = newheader->noqname; 6481 newheader->noqname = NULL; 6482 } 6483 if (header->closest == NULL && 6484 newheader->closest != NULL) 6485 { 6486 header->closest = newheader->closest; 6487 newheader->closest = NULL; 6488 } 6489 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 6490 if (addedrdataset != NULL) { 6491 bind_rdataset(rbtdb, rbtnode, header, now, 6492 isc_rwlocktype_write, 6493 addedrdataset); 6494 } 6495 return (ISC_R_SUCCESS); 6496 } 6497 /* 6498 * If we have will be replacing a NS RRset force its TTL 6499 * to be no more than the current NS RRset's TTL. This 6500 * ensures the delegations that are withdrawn are honoured. 6501 */ 6502 if (IS_CACHE(rbtdb) && ACTIVE(header, now) && 6503 header->type == dns_rdatatype_ns && !header_nx && 6504 !newheader_nx && header->trust <= newheader->trust) 6505 { 6506 if (newheader->rdh_ttl > header->rdh_ttl) { 6507 newheader->rdh_ttl = header->rdh_ttl; 6508 } 6509 } 6510 if (IS_CACHE(rbtdb) && ACTIVE(header, now) && 6511 (options & DNS_DBADD_PREFETCH) == 0 && 6512 (header->type == dns_rdatatype_a || 6513 header->type == dns_rdatatype_aaaa || 6514 header->type == dns_rdatatype_ds || 6515 header->type == RBTDB_RDATATYPE_SIGDS) && 6516 !header_nx && !newheader_nx && 6517 header->trust >= newheader->trust && 6518 dns_rdataslab_equal((unsigned char *)header, 6519 (unsigned char *)newheader, 6520 (unsigned int)(sizeof(*newheader)))) 6521 { 6522 /* 6523 * Honour the new ttl if it is less than the 6524 * older one. 6525 */ 6526 if (header->rdh_ttl > newheader->rdh_ttl) { 6527 set_ttl(rbtdb, header, newheader->rdh_ttl); 6528 } 6529 if (header->noqname == NULL && 6530 newheader->noqname != NULL) 6531 { 6532 header->noqname = newheader->noqname; 6533 newheader->noqname = NULL; 6534 } 6535 if (header->closest == NULL && 6536 newheader->closest != NULL) 6537 { 6538 header->closest = newheader->closest; 6539 newheader->closest = NULL; 6540 } 6541 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 6542 if (addedrdataset != NULL) { 6543 bind_rdataset(rbtdb, rbtnode, header, now, 6544 isc_rwlocktype_write, 6545 addedrdataset); 6546 } 6547 return (ISC_R_SUCCESS); 6548 } 6549 INSIST(rbtversion == NULL || 6550 rbtversion->serial >= topheader->serial); 6551 if (loading) { 6552 newheader->down = NULL; 6553 idx = newheader->node->locknum; 6554 if (IS_CACHE(rbtdb)) { 6555 if (ZEROTTL(newheader)) { 6556 ISC_LIST_APPEND(rbtdb->rdatasets[idx], 6557 newheader, link); 6558 } else { 6559 ISC_LIST_PREPEND(rbtdb->rdatasets[idx], 6560 newheader, link); 6561 } 6562 INSIST(rbtdb->heaps != NULL); 6563 isc_heap_insert(rbtdb->heaps[idx], newheader); 6564 } else if (RESIGN(newheader)) { 6565 resign_insert(rbtdb, idx, newheader); 6566 /* 6567 * Don't call resign_delete as we don't need 6568 * to reverse the delete. The free_rdataset 6569 * call below will clean up the heap entry. 6570 */ 6571 } 6572 6573 /* 6574 * There are no other references to 'header' when 6575 * loading, so we MAY clean up 'header' now. 6576 * Since we don't generate changed records when 6577 * loading, we MUST clean up 'header' now. 6578 */ 6579 if (topheader_prev != NULL) { 6580 topheader_prev->next = newheader; 6581 } else { 6582 rbtnode->data = newheader; 6583 } 6584 newheader->next = topheader->next; 6585 if (rbtversion != NULL && !header_nx) { 6586 update_recordsandxfrsize(false, rbtversion, 6587 header, 6588 nodename->length); 6589 } 6590 free_rdataset(rbtdb, rbtdb->common.mctx, header); 6591 } else { 6592 idx = newheader->node->locknum; 6593 if (IS_CACHE(rbtdb)) { 6594 INSIST(rbtdb->heaps != NULL); 6595 isc_heap_insert(rbtdb->heaps[idx], newheader); 6596 if (ZEROTTL(newheader)) { 6597 ISC_LIST_APPEND(rbtdb->rdatasets[idx], 6598 newheader, link); 6599 } else { 6600 ISC_LIST_PREPEND(rbtdb->rdatasets[idx], 6601 newheader, link); 6602 } 6603 } else if (RESIGN(newheader)) { 6604 resign_insert(rbtdb, idx, newheader); 6605 resign_delete(rbtdb, rbtversion, header); 6606 } 6607 if (topheader_prev != NULL) { 6608 topheader_prev->next = newheader; 6609 } else { 6610 rbtnode->data = newheader; 6611 } 6612 newheader->next = topheader->next; 6613 newheader->down = topheader; 6614 topheader->next = newheader; 6615 rbtnode->dirty = 1; 6616 if (changed != NULL) { 6617 changed->dirty = true; 6618 } 6619 if (rbtversion == NULL) { 6620 set_ttl(rbtdb, header, 0); 6621 mark_header_ancient(rbtdb, header); 6622 if (sigheader != NULL) { 6623 set_ttl(rbtdb, sigheader, 0); 6624 mark_header_ancient(rbtdb, sigheader); 6625 } 6626 } 6627 if (rbtversion != NULL && !header_nx) { 6628 update_recordsandxfrsize(false, rbtversion, 6629 header, 6630 nodename->length); 6631 } 6632 } 6633 } else { 6634 /* 6635 * No non-IGNORED rdatasets of the given type exist at 6636 * this node. 6637 */ 6638 6639 /* 6640 * If we're trying to delete the type, don't bother. 6641 */ 6642 if (newheader_nx) { 6643 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 6644 return (DNS_R_UNCHANGED); 6645 } 6646 6647 idx = newheader->node->locknum; 6648 if (IS_CACHE(rbtdb)) { 6649 isc_heap_insert(rbtdb->heaps[idx], newheader); 6650 if (ZEROTTL(newheader)) { 6651 ISC_LIST_APPEND(rbtdb->rdatasets[idx], 6652 newheader, link); 6653 } else { 6654 ISC_LIST_PREPEND(rbtdb->rdatasets[idx], 6655 newheader, link); 6656 } 6657 } else if (RESIGN(newheader)) { 6658 resign_insert(rbtdb, idx, newheader); 6659 resign_delete(rbtdb, rbtversion, header); 6660 } 6661 6662 if (topheader != NULL) { 6663 /* 6664 * We have an list of rdatasets of the given type, 6665 * but they're all marked IGNORE. We simply insert 6666 * the new rdataset at the head of the list. 6667 * 6668 * Ignored rdatasets cannot occur during loading, so 6669 * we INSIST on it. 6670 */ 6671 INSIST(!loading); 6672 INSIST(rbtversion == NULL || 6673 rbtversion->serial >= topheader->serial); 6674 if (topheader_prev != NULL) { 6675 topheader_prev->next = newheader; 6676 } else { 6677 rbtnode->data = newheader; 6678 } 6679 newheader->next = topheader->next; 6680 newheader->down = topheader; 6681 topheader->next = newheader; 6682 rbtnode->dirty = 1; 6683 if (changed != NULL) { 6684 changed->dirty = true; 6685 } 6686 } else { 6687 /* 6688 * No rdatasets of the given type exist at the node. 6689 */ 6690 newheader->next = rbtnode->data; 6691 newheader->down = NULL; 6692 rbtnode->data = newheader; 6693 } 6694 } 6695 6696 if (rbtversion != NULL && !newheader_nx) { 6697 update_recordsandxfrsize(true, rbtversion, newheader, 6698 nodename->length); 6699 } 6700 6701 /* 6702 * Check if the node now contains CNAME and other data. 6703 */ 6704 if (rbtversion != NULL && 6705 cname_and_other_data(rbtnode, rbtversion->serial)) 6706 { 6707 return (DNS_R_CNAMEANDOTHER); 6708 } 6709 6710 if (addedrdataset != NULL) { 6711 bind_rdataset(rbtdb, rbtnode, newheader, now, 6712 isc_rwlocktype_write, addedrdataset); 6713 } 6714 6715 return (ISC_R_SUCCESS); 6716 } 6717 6718 static bool 6719 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, 6720 rbtdb_rdatatype_t type) { 6721 if (IS_CACHE(rbtdb)) { 6722 if (type == dns_rdatatype_dname) { 6723 return (true); 6724 } else { 6725 return (false); 6726 } 6727 } else if (type == dns_rdatatype_dname || 6728 (type == dns_rdatatype_ns && 6729 (node != rbtdb->origin_node || IS_STUB(rbtdb)))) 6730 { 6731 return (true); 6732 } 6733 return (false); 6734 } 6735 6736 static isc_result_t 6737 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader, 6738 dns_rdataset_t *rdataset) { 6739 struct noqname *noqname; 6740 isc_mem_t *mctx = rbtdb->common.mctx; 6741 dns_name_t name; 6742 dns_rdataset_t neg, negsig; 6743 isc_result_t result; 6744 isc_region_t r; 6745 6746 dns_name_init(&name, NULL); 6747 dns_rdataset_init(&neg); 6748 dns_rdataset_init(&negsig); 6749 6750 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig); 6751 RUNTIME_CHECK(result == ISC_R_SUCCESS); 6752 6753 noqname = isc_mem_get(mctx, sizeof(*noqname)); 6754 dns_name_init(&noqname->name, NULL); 6755 noqname->neg = NULL; 6756 noqname->negsig = NULL; 6757 noqname->type = neg.type; 6758 dns_name_dup(&name, mctx, &noqname->name); 6759 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0); 6760 if (result != ISC_R_SUCCESS) { 6761 goto cleanup; 6762 } 6763 noqname->neg = r.base; 6764 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0); 6765 if (result != ISC_R_SUCCESS) { 6766 goto cleanup; 6767 } 6768 noqname->negsig = r.base; 6769 dns_rdataset_disassociate(&neg); 6770 dns_rdataset_disassociate(&negsig); 6771 newheader->noqname = noqname; 6772 return (ISC_R_SUCCESS); 6773 6774 cleanup: 6775 dns_rdataset_disassociate(&neg); 6776 dns_rdataset_disassociate(&negsig); 6777 free_noqname(mctx, &noqname); 6778 return (result); 6779 } 6780 6781 static isc_result_t 6782 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader, 6783 dns_rdataset_t *rdataset) { 6784 struct noqname *closest; 6785 isc_mem_t *mctx = rbtdb->common.mctx; 6786 dns_name_t name; 6787 dns_rdataset_t neg, negsig; 6788 isc_result_t result; 6789 isc_region_t r; 6790 6791 dns_name_init(&name, NULL); 6792 dns_rdataset_init(&neg); 6793 dns_rdataset_init(&negsig); 6794 6795 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig); 6796 RUNTIME_CHECK(result == ISC_R_SUCCESS); 6797 6798 closest = isc_mem_get(mctx, sizeof(*closest)); 6799 dns_name_init(&closest->name, NULL); 6800 closest->neg = NULL; 6801 closest->negsig = NULL; 6802 closest->type = neg.type; 6803 dns_name_dup(&name, mctx, &closest->name); 6804 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0); 6805 if (result != ISC_R_SUCCESS) { 6806 goto cleanup; 6807 } 6808 closest->neg = r.base; 6809 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0); 6810 if (result != ISC_R_SUCCESS) { 6811 goto cleanup; 6812 } 6813 closest->negsig = r.base; 6814 dns_rdataset_disassociate(&neg); 6815 dns_rdataset_disassociate(&negsig); 6816 newheader->closest = closest; 6817 return (ISC_R_SUCCESS); 6818 6819 cleanup: 6820 dns_rdataset_disassociate(&neg); 6821 dns_rdataset_disassociate(&negsig); 6822 free_noqname(mctx, &closest); 6823 return (result); 6824 } 6825 6826 static dns_dbmethods_t zone_methods; 6827 6828 static size_t 6829 rdataset_size(rdatasetheader_t *header) { 6830 if (!NONEXISTENT(header)) { 6831 return (dns_rdataslab_size((unsigned char *)header, 6832 sizeof(*header))); 6833 } 6834 6835 return (sizeof(*header)); 6836 } 6837 6838 static isc_result_t 6839 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 6840 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options, 6841 dns_rdataset_t *addedrdataset) { 6842 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 6843 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 6844 rbtdb_version_t *rbtversion = version; 6845 isc_region_t region; 6846 rdatasetheader_t *newheader; 6847 rdatasetheader_t *header; 6848 isc_result_t result; 6849 bool delegating; 6850 bool newnsec; 6851 bool tree_locked = false; 6852 bool cache_is_overmem = false; 6853 dns_fixedname_t fixed; 6854 dns_name_t *name; 6855 6856 REQUIRE(VALID_RBTDB(rbtdb)); 6857 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 6858 6859 if (rbtdb->common.methods == &zone_methods) { 6860 /* 6861 * SOA records are only allowed at top of zone. 6862 */ 6863 if (rdataset->type == dns_rdatatype_soa && 6864 node != rbtdb->origin_node) 6865 { 6866 return (DNS_R_NOTZONETOP); 6867 } 6868 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 6869 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 && 6870 (rdataset->type == dns_rdatatype_nsec3 || 6871 rdataset->covers == dns_rdatatype_nsec3)) || 6872 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 && 6873 rdataset->type != dns_rdatatype_nsec3 && 6874 rdataset->covers != dns_rdatatype_nsec3))); 6875 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 6876 } 6877 6878 if (rbtversion == NULL) { 6879 if (now == 0) { 6880 isc_stdtime_get(&now); 6881 } 6882 } else { 6883 now = 0; 6884 } 6885 6886 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, 6887 ®ion, sizeof(rdatasetheader_t)); 6888 if (result != ISC_R_SUCCESS) { 6889 return (result); 6890 } 6891 6892 name = dns_fixedname_initname(&fixed); 6893 nodefullname(db, node, name); 6894 dns_rdataset_getownercase(rdataset, name); 6895 6896 newheader = (rdatasetheader_t *)region.base; 6897 init_rdataset(rbtdb, newheader); 6898 setownercase(newheader, name); 6899 set_ttl(rbtdb, newheader, rdataset->ttl + now); 6900 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type, 6901 rdataset->covers); 6902 atomic_init(&newheader->attributes, 0); 6903 if (rdataset->ttl == 0U) { 6904 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_ZEROTTL); 6905 } 6906 newheader->noqname = NULL; 6907 newheader->closest = NULL; 6908 atomic_init(&newheader->count, 6909 atomic_fetch_add_relaxed(&init_count, 1)); 6910 newheader->trust = rdataset->trust; 6911 newheader->last_used = now; 6912 newheader->node = rbtnode; 6913 if (rbtversion != NULL) { 6914 newheader->serial = rbtversion->serial; 6915 now = 0; 6916 6917 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) { 6918 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN); 6919 newheader->resign = 6920 (isc_stdtime_t)(dns_time64_from32( 6921 rdataset->resign) >> 6922 1); 6923 newheader->resign_lsb = rdataset->resign & 0x1; 6924 } else { 6925 newheader->resign = 0; 6926 newheader->resign_lsb = 0; 6927 } 6928 } else { 6929 newheader->serial = 1; 6930 newheader->resign = 0; 6931 newheader->resign_lsb = 0; 6932 if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0) { 6933 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_PREFETCH); 6934 } 6935 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0) { 6936 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_NEGATIVE); 6937 } 6938 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0) { 6939 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_NXDOMAIN); 6940 } 6941 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0) { 6942 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_OPTOUT); 6943 } 6944 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) { 6945 result = addnoqname(rbtdb, newheader, rdataset); 6946 if (result != ISC_R_SUCCESS) { 6947 free_rdataset(rbtdb, rbtdb->common.mctx, 6948 newheader); 6949 return (result); 6950 } 6951 } 6952 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) { 6953 result = addclosest(rbtdb, newheader, rdataset); 6954 if (result != ISC_R_SUCCESS) { 6955 free_rdataset(rbtdb, rbtdb->common.mctx, 6956 newheader); 6957 return (result); 6958 } 6959 } 6960 } 6961 6962 /* 6963 * If we're adding a delegation type (e.g. NS or DNAME for a zone, 6964 * just DNAME for the cache), then we need to set the callback bit 6965 * on the node. 6966 */ 6967 if (delegating_type(rbtdb, rbtnode, rdataset->type)) { 6968 delegating = true; 6969 } else { 6970 delegating = false; 6971 } 6972 6973 /* 6974 * Add to the auxiliary NSEC tree if we're adding an NSEC record. 6975 */ 6976 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 6977 if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC && 6978 rdataset->type == dns_rdatatype_nsec) 6979 { 6980 newnsec = true; 6981 } else { 6982 newnsec = false; 6983 } 6984 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 6985 6986 /* 6987 * If we're adding a delegation type, adding to the auxiliary NSEC 6988 * tree, or the DB is a cache in an overmem state, hold an 6989 * exclusive lock on the tree. In the latter case the lock does 6990 * not necessarily have to be acquired but it will help purge 6991 * ancient entries more effectively. 6992 */ 6993 if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx)) { 6994 cache_is_overmem = true; 6995 } 6996 if (delegating || newnsec || cache_is_overmem) { 6997 tree_locked = true; 6998 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 6999 } 7000 7001 if (cache_is_overmem) { 7002 overmem_purge(rbtdb, rbtnode->locknum, rdataset_size(newheader), 7003 tree_locked); 7004 } 7005 7006 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7007 isc_rwlocktype_write); 7008 7009 if (rbtdb->rrsetstats != NULL) { 7010 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_STATCOUNT); 7011 update_rrsetstats(rbtdb, newheader->type, 7012 atomic_load_acquire(&newheader->attributes), 7013 true); 7014 } 7015 7016 if (IS_CACHE(rbtdb)) { 7017 if (tree_locked) { 7018 cleanup_dead_nodes(rbtdb, rbtnode->locknum); 7019 } 7020 7021 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1); 7022 if (header != NULL) { 7023 dns_ttl_t rdh_ttl = header->rdh_ttl; 7024 7025 /* Only account for stale TTL if cache is not overmem */ 7026 if (!cache_is_overmem) { 7027 rdh_ttl += rbtdb->serve_stale_ttl; 7028 } 7029 7030 if (rdh_ttl < now - RBTDB_VIRTUAL) { 7031 expire_header(rbtdb, header, tree_locked, 7032 expire_ttl); 7033 } 7034 } 7035 7036 /* 7037 * If we've been holding a write lock on the tree just for 7038 * cleaning, we can release it now. However, we still need the 7039 * node lock. 7040 */ 7041 if (tree_locked && !delegating && !newnsec) { 7042 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 7043 tree_locked = false; 7044 } 7045 } 7046 7047 result = ISC_R_SUCCESS; 7048 if (newnsec) { 7049 dns_rbtnode_t *nsecnode; 7050 7051 nsecnode = NULL; 7052 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode); 7053 if (result == ISC_R_SUCCESS) { 7054 nsecnode->nsec = DNS_RBT_NSEC_NSEC; 7055 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC; 7056 } else if (result == ISC_R_EXISTS) { 7057 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC; 7058 result = ISC_R_SUCCESS; 7059 } 7060 } 7061 7062 if (result == ISC_R_SUCCESS) { 7063 result = add32(rbtdb, rbtnode, name, rbtversion, newheader, 7064 options, false, addedrdataset, now); 7065 } 7066 if (result == ISC_R_SUCCESS && delegating) { 7067 rbtnode->find_callback = 1; 7068 } 7069 7070 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7071 isc_rwlocktype_write); 7072 7073 if (tree_locked) { 7074 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 7075 } 7076 7077 /* 7078 * Update the zone's secure status. If version is non-NULL 7079 * this is deferred until closeversion() is called. 7080 */ 7081 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) { 7082 iszonesecure(db, version, rbtdb->origin_node); 7083 } 7084 7085 return (result); 7086 } 7087 7088 static isc_result_t 7089 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 7090 dns_rdataset_t *rdataset, unsigned int options, 7091 dns_rdataset_t *newrdataset) { 7092 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 7093 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 7094 rbtdb_version_t *rbtversion = version; 7095 dns_fixedname_t fname; 7096 dns_name_t *nodename = dns_fixedname_initname(&fname); 7097 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader; 7098 unsigned char *subresult; 7099 isc_region_t region; 7100 isc_result_t result; 7101 rbtdb_changed_t *changed; 7102 7103 REQUIRE(VALID_RBTDB(rbtdb)); 7104 REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb); 7105 7106 if (rbtdb->common.methods == &zone_methods) { 7107 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 7108 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 && 7109 (rdataset->type == dns_rdatatype_nsec3 || 7110 rdataset->covers == dns_rdatatype_nsec3)) || 7111 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 && 7112 rdataset->type != dns_rdatatype_nsec3 && 7113 rdataset->covers != dns_rdatatype_nsec3))); 7114 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 7115 } 7116 7117 nodefullname(db, node, nodename); 7118 7119 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, 7120 ®ion, sizeof(rdatasetheader_t)); 7121 if (result != ISC_R_SUCCESS) { 7122 return (result); 7123 } 7124 newheader = (rdatasetheader_t *)region.base; 7125 init_rdataset(rbtdb, newheader); 7126 set_ttl(rbtdb, newheader, rdataset->ttl); 7127 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type, 7128 rdataset->covers); 7129 atomic_init(&newheader->attributes, 0); 7130 newheader->serial = rbtversion->serial; 7131 newheader->trust = 0; 7132 newheader->noqname = NULL; 7133 newheader->closest = NULL; 7134 atomic_init(&newheader->count, 7135 atomic_fetch_add_relaxed(&init_count, 1)); 7136 newheader->last_used = 0; 7137 newheader->node = rbtnode; 7138 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) { 7139 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN); 7140 newheader->resign = 7141 (isc_stdtime_t)(dns_time64_from32(rdataset->resign) >> 7142 1); 7143 newheader->resign_lsb = rdataset->resign & 0x1; 7144 } else { 7145 newheader->resign = 0; 7146 newheader->resign_lsb = 0; 7147 } 7148 7149 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7150 isc_rwlocktype_write); 7151 7152 changed = add_changed(rbtdb, rbtversion, rbtnode); 7153 if (changed == NULL) { 7154 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 7155 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7156 isc_rwlocktype_write); 7157 return (ISC_R_NOMEMORY); 7158 } 7159 7160 topheader_prev = NULL; 7161 for (topheader = rbtnode->data; topheader != NULL; 7162 topheader = topheader->next) 7163 { 7164 if (topheader->type == newheader->type) { 7165 break; 7166 } 7167 topheader_prev = topheader; 7168 } 7169 /* 7170 * If header isn't NULL, we've found the right type. There may be 7171 * IGNORE rdatasets between the top of the chain and the first real 7172 * data. We skip over them. 7173 */ 7174 header = topheader; 7175 while (header != NULL && IGNORE(header)) { 7176 header = header->down; 7177 } 7178 if (header != NULL && EXISTS(header)) { 7179 unsigned int flags = 0; 7180 subresult = NULL; 7181 result = ISC_R_SUCCESS; 7182 if ((options & DNS_DBSUB_EXACT) != 0) { 7183 flags |= DNS_RDATASLAB_EXACT; 7184 if (newheader->rdh_ttl != header->rdh_ttl) { 7185 result = DNS_R_NOTEXACT; 7186 } 7187 } 7188 if (result == ISC_R_SUCCESS) { 7189 result = dns_rdataslab_subtract( 7190 (unsigned char *)header, 7191 (unsigned char *)newheader, 7192 (unsigned int)(sizeof(*newheader)), 7193 rbtdb->common.mctx, rbtdb->common.rdclass, 7194 (dns_rdatatype_t)header->type, flags, 7195 &subresult); 7196 } 7197 if (result == ISC_R_SUCCESS) { 7198 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 7199 newheader = (rdatasetheader_t *)subresult; 7200 init_rdataset(rbtdb, newheader); 7201 update_newheader(newheader, header); 7202 if (RESIGN(header)) { 7203 RDATASET_ATTR_SET(newheader, 7204 RDATASET_ATTR_RESIGN); 7205 newheader->resign = header->resign; 7206 newheader->resign_lsb = header->resign_lsb; 7207 resign_insert(rbtdb, rbtnode->locknum, 7208 newheader); 7209 } 7210 /* 7211 * We have to set the serial since the rdataslab 7212 * subtraction routine copies the reserved portion of 7213 * header, not newheader. 7214 */ 7215 newheader->serial = rbtversion->serial; 7216 /* 7217 * XXXJT: dns_rdataslab_subtract() copied the pointers 7218 * to additional info. We need to clear these fields 7219 * to avoid having duplicated references. 7220 */ 7221 update_recordsandxfrsize(true, rbtversion, newheader, 7222 nodename->length); 7223 } else if (result == DNS_R_NXRRSET) { 7224 /* 7225 * This subtraction would remove all of the rdata; 7226 * add a nonexistent header instead. 7227 */ 7228 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 7229 newheader = new_rdataset(rbtdb, rbtdb->common.mctx); 7230 if (newheader == NULL) { 7231 result = ISC_R_NOMEMORY; 7232 goto unlock; 7233 } 7234 init_rdataset(rbtdb, newheader); 7235 set_ttl(rbtdb, newheader, 0); 7236 newheader->type = topheader->type; 7237 atomic_init(&newheader->attributes, 7238 RDATASET_ATTR_NONEXISTENT); 7239 newheader->trust = 0; 7240 newheader->serial = rbtversion->serial; 7241 newheader->noqname = NULL; 7242 newheader->closest = NULL; 7243 atomic_init(&newheader->count, 0); 7244 newheader->node = rbtnode; 7245 newheader->resign = 0; 7246 newheader->resign_lsb = 0; 7247 newheader->last_used = 0; 7248 } else { 7249 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 7250 goto unlock; 7251 } 7252 7253 /* 7254 * If we're here, we want to link newheader in front of 7255 * topheader. 7256 */ 7257 INSIST(rbtversion->serial >= topheader->serial); 7258 update_recordsandxfrsize(false, rbtversion, header, 7259 nodename->length); 7260 if (topheader_prev != NULL) { 7261 topheader_prev->next = newheader; 7262 } else { 7263 rbtnode->data = newheader; 7264 } 7265 newheader->next = topheader->next; 7266 newheader->down = topheader; 7267 topheader->next = newheader; 7268 rbtnode->dirty = 1; 7269 changed->dirty = true; 7270 resign_delete(rbtdb, rbtversion, header); 7271 } else { 7272 /* 7273 * The rdataset doesn't exist, so we don't need to do anything 7274 * to satisfy the deletion request. 7275 */ 7276 free_rdataset(rbtdb, rbtdb->common.mctx, newheader); 7277 if ((options & DNS_DBSUB_EXACT) != 0) { 7278 result = DNS_R_NOTEXACT; 7279 } else { 7280 result = DNS_R_UNCHANGED; 7281 } 7282 } 7283 7284 if (result == ISC_R_SUCCESS && newrdataset != NULL) { 7285 bind_rdataset(rbtdb, rbtnode, newheader, 0, 7286 isc_rwlocktype_write, newrdataset); 7287 } 7288 7289 if (result == DNS_R_NXRRSET && newrdataset != NULL && 7290 (options & DNS_DBSUB_WANTOLD) != 0) 7291 { 7292 bind_rdataset(rbtdb, rbtnode, header, 0, isc_rwlocktype_write, 7293 newrdataset); 7294 } 7295 7296 unlock: 7297 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7298 isc_rwlocktype_write); 7299 7300 /* 7301 * Update the zone's secure status. If version is non-NULL 7302 * this is deferred until closeversion() is called. 7303 */ 7304 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) { 7305 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 7306 version = rbtdb->current_version; 7307 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 7308 iszonesecure(db, version, rbtdb->origin_node); 7309 } 7310 7311 return (result); 7312 } 7313 7314 static isc_result_t 7315 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 7316 dns_rdatatype_t type, dns_rdatatype_t covers) { 7317 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 7318 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 7319 rbtdb_version_t *rbtversion = version; 7320 dns_fixedname_t fname; 7321 dns_name_t *nodename = dns_fixedname_initname(&fname); 7322 isc_result_t result; 7323 rdatasetheader_t *newheader; 7324 7325 REQUIRE(VALID_RBTDB(rbtdb)); 7326 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 7327 7328 if (type == dns_rdatatype_any) { 7329 return (ISC_R_NOTIMPLEMENTED); 7330 } 7331 if (type == dns_rdatatype_rrsig && covers == 0) { 7332 return (ISC_R_NOTIMPLEMENTED); 7333 } 7334 7335 newheader = new_rdataset(rbtdb, rbtdb->common.mctx); 7336 if (newheader == NULL) { 7337 return (ISC_R_NOMEMORY); 7338 } 7339 init_rdataset(rbtdb, newheader); 7340 set_ttl(rbtdb, newheader, 0); 7341 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers); 7342 atomic_init(&newheader->attributes, RDATASET_ATTR_NONEXISTENT); 7343 newheader->trust = 0; 7344 newheader->noqname = NULL; 7345 newheader->closest = NULL; 7346 if (rbtversion != NULL) { 7347 newheader->serial = rbtversion->serial; 7348 } else { 7349 newheader->serial = 0; 7350 } 7351 atomic_init(&newheader->count, 0); 7352 newheader->last_used = 0; 7353 newheader->node = rbtnode; 7354 7355 nodefullname(db, node, nodename); 7356 7357 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7358 isc_rwlocktype_write); 7359 result = add32(rbtdb, rbtnode, nodename, rbtversion, newheader, 7360 DNS_DBADD_FORCE, false, NULL, 0); 7361 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 7362 isc_rwlocktype_write); 7363 7364 /* 7365 * Update the zone's secure status. If version is non-NULL 7366 * this is deferred until closeversion() is called. 7367 */ 7368 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) { 7369 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 7370 version = rbtdb->current_version; 7371 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 7372 iszonesecure(db, version, rbtdb->origin_node); 7373 } 7374 7375 return (result); 7376 } 7377 7378 /* 7379 * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC 7380 */ 7381 static isc_result_t 7382 loadnode(dns_rbtdb_t *rbtdb, const dns_name_t *name, dns_rbtnode_t **nodep, 7383 bool hasnsec) { 7384 isc_result_t noderesult, nsecresult, tmpresult; 7385 dns_rbtnode_t *nsecnode = NULL, *node = NULL; 7386 7387 noderesult = dns_rbt_addnode(rbtdb->tree, name, &node); 7388 if (!hasnsec) { 7389 goto done; 7390 } 7391 if (noderesult == ISC_R_EXISTS) { 7392 /* 7393 * Add a node to the auxiliary NSEC tree for an old node 7394 * just now getting an NSEC record. 7395 */ 7396 if (node->nsec == DNS_RBT_NSEC_HAS_NSEC) { 7397 goto done; 7398 } 7399 } else if (noderesult != ISC_R_SUCCESS) { 7400 goto done; 7401 } 7402 7403 /* 7404 * Build the auxiliary tree for NSECs as we go. 7405 * This tree speeds searches for closest NSECs that would otherwise 7406 * need to examine many irrelevant nodes in large TLDs. 7407 * 7408 * Add nodes to the auxiliary tree after corresponding nodes have 7409 * been added to the main tree. 7410 */ 7411 nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode); 7412 if (nsecresult == ISC_R_SUCCESS) { 7413 nsecnode->nsec = DNS_RBT_NSEC_NSEC; 7414 node->nsec = DNS_RBT_NSEC_HAS_NSEC; 7415 goto done; 7416 } 7417 7418 if (nsecresult == ISC_R_EXISTS) { 7419 #if 1 /* 0 */ 7420 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 7421 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 7422 "addnode: NSEC node already exists"); 7423 #endif /* if 1 */ 7424 node->nsec = DNS_RBT_NSEC_HAS_NSEC; 7425 goto done; 7426 } 7427 7428 if (noderesult == ISC_R_SUCCESS) { 7429 /* 7430 * Remove the node we just added above. 7431 */ 7432 tmpresult = dns_rbt_deletenode(rbtdb->tree, node, false); 7433 if (tmpresult != ISC_R_SUCCESS) { 7434 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 7435 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 7436 "loading_addrdataset: " 7437 "dns_rbt_deletenode: %s after " 7438 "dns_rbt_addnode(NSEC): %s", 7439 isc_result_totext(tmpresult), 7440 isc_result_totext(noderesult)); 7441 } 7442 } 7443 7444 /* 7445 * Set the error condition to be returned. 7446 */ 7447 noderesult = nsecresult; 7448 7449 done: 7450 if (noderesult == ISC_R_SUCCESS || noderesult == ISC_R_EXISTS) { 7451 *nodep = node; 7452 } 7453 7454 return (noderesult); 7455 } 7456 7457 static isc_result_t 7458 loading_addrdataset(void *arg, const dns_name_t *name, 7459 dns_rdataset_t *rdataset) { 7460 rbtdb_load_t *loadctx = arg; 7461 dns_rbtdb_t *rbtdb = loadctx->rbtdb; 7462 dns_rbtnode_t *node; 7463 isc_result_t result; 7464 isc_region_t region; 7465 rdatasetheader_t *newheader; 7466 7467 REQUIRE(rdataset->rdclass == rbtdb->common.rdclass); 7468 7469 /* 7470 * SOA records are only allowed at top of zone. 7471 */ 7472 if (rdataset->type == dns_rdatatype_soa && !IS_CACHE(rbtdb) && 7473 !dns_name_equal(name, &rbtdb->common.origin)) 7474 { 7475 return (DNS_R_NOTZONETOP); 7476 } 7477 7478 if (rdataset->type != dns_rdatatype_nsec3 && 7479 rdataset->covers != dns_rdatatype_nsec3) 7480 { 7481 add_empty_wildcards(rbtdb, name, false); 7482 } 7483 7484 if (dns_name_iswildcard(name)) { 7485 /* 7486 * NS record owners cannot legally be wild cards. 7487 */ 7488 if (rdataset->type == dns_rdatatype_ns) { 7489 return (DNS_R_INVALIDNS); 7490 } 7491 /* 7492 * NSEC3 record owners cannot legally be wild cards. 7493 */ 7494 if (rdataset->type == dns_rdatatype_nsec3) { 7495 return (DNS_R_INVALIDNSEC3); 7496 } 7497 result = add_wildcard_magic(rbtdb, name, false); 7498 if (result != ISC_R_SUCCESS) { 7499 return (result); 7500 } 7501 } 7502 7503 node = NULL; 7504 if (rdataset->type == dns_rdatatype_nsec3 || 7505 rdataset->covers == dns_rdatatype_nsec3) 7506 { 7507 result = dns_rbt_addnode(rbtdb->nsec3, name, &node); 7508 if (result == ISC_R_SUCCESS) { 7509 node->nsec = DNS_RBT_NSEC_NSEC3; 7510 } 7511 } else if (rdataset->type == dns_rdatatype_nsec) { 7512 result = loadnode(rbtdb, name, &node, true); 7513 } else { 7514 result = loadnode(rbtdb, name, &node, false); 7515 } 7516 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) { 7517 return (result); 7518 } 7519 if (result == ISC_R_SUCCESS) { 7520 node->locknum = node->hashval % rbtdb->node_lock_count; 7521 } 7522 7523 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, 7524 ®ion, sizeof(rdatasetheader_t)); 7525 if (result != ISC_R_SUCCESS) { 7526 return (result); 7527 } 7528 newheader = (rdatasetheader_t *)region.base; 7529 init_rdataset(rbtdb, newheader); 7530 set_ttl(rbtdb, newheader, rdataset->ttl + loadctx->now); /* XXX overflow 7531 * check */ 7532 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type, 7533 rdataset->covers); 7534 atomic_init(&newheader->attributes, 0); 7535 newheader->trust = rdataset->trust; 7536 newheader->serial = 1; 7537 newheader->noqname = NULL; 7538 newheader->closest = NULL; 7539 atomic_init(&newheader->count, 7540 atomic_fetch_add_relaxed(&init_count, 1)); 7541 newheader->last_used = 0; 7542 newheader->node = node; 7543 setownercase(newheader, name); 7544 7545 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) { 7546 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN); 7547 newheader->resign = 7548 (isc_stdtime_t)(dns_time64_from32(rdataset->resign) >> 7549 1); 7550 newheader->resign_lsb = rdataset->resign & 0x1; 7551 } else { 7552 newheader->resign = 0; 7553 newheader->resign_lsb = 0; 7554 } 7555 7556 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock, isc_rwlocktype_write); 7557 result = add32(rbtdb, node, name, rbtdb->current_version, newheader, 7558 DNS_DBADD_MERGE, true, NULL, 0); 7559 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock, 7560 isc_rwlocktype_write); 7561 7562 if (result == ISC_R_SUCCESS && 7563 delegating_type(rbtdb, node, rdataset->type)) 7564 { 7565 node->find_callback = 1; 7566 } else if (result == DNS_R_UNCHANGED) { 7567 result = ISC_R_SUCCESS; 7568 } 7569 7570 return (result); 7571 } 7572 7573 static isc_result_t 7574 rbt_datafixer(dns_rbtnode_t *rbtnode, void *base, size_t filesize, void *arg, 7575 uint64_t *crc) { 7576 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)arg; 7577 rdatasetheader_t *header; 7578 unsigned char *limit = ((unsigned char *)base) + filesize; 7579 7580 REQUIRE(rbtnode != NULL); 7581 REQUIRE(VALID_RBTDB(rbtdb)); 7582 7583 for (header = rbtnode->data; header != NULL; header = header->next) { 7584 unsigned char *p = (unsigned char *)header; 7585 size_t size = dns_rdataslab_size(p, sizeof(*header)); 7586 isc_crc64_update(crc, p, size); 7587 #ifdef DEBUG 7588 hexdump("hashing header", p, sizeof(rdatasetheader_t)); 7589 hexdump("hashing slab", p + sizeof(rdatasetheader_t), 7590 size - sizeof(rdatasetheader_t)); 7591 #endif /* ifdef DEBUG */ 7592 header->serial = 1; 7593 header->is_mmapped = 1; 7594 header->node = rbtnode; 7595 header->node_is_relative = 0; 7596 7597 if (RESIGN(header) && 7598 (header->resign != 0 || header->resign_lsb != 0)) 7599 { 7600 int idx = header->node->locknum; 7601 isc_heap_insert(rbtdb->heaps[idx], header); 7602 } 7603 7604 if (header->next != NULL) { 7605 size_t cooked = dns_rbt_serialize_align(size); 7606 if ((uintptr_t)header->next != 7607 (p - (unsigned char *)base) + cooked) 7608 { 7609 return (ISC_R_INVALIDFILE); 7610 } 7611 header->next = (rdatasetheader_t *)(p + cooked); 7612 header->next_is_relative = 0; 7613 if ((header->next < (rdatasetheader_t *)base) || 7614 (header->next > (rdatasetheader_t *)limit)) 7615 { 7616 return (ISC_R_INVALIDFILE); 7617 } 7618 } 7619 7620 update_recordsandxfrsize(true, rbtdb->current_version, header, 7621 rbtnode->fullnamelen); 7622 } 7623 7624 /* We're done deserializing; clear fullnamelen */ 7625 rbtnode->fullnamelen = 0; 7626 7627 return (ISC_R_SUCCESS); 7628 } 7629 7630 /* 7631 * Load the RBT database from the image in 'f' 7632 */ 7633 static isc_result_t 7634 deserialize(void *arg, FILE *f, off_t offset) { 7635 isc_result_t result; 7636 rbtdb_load_t *loadctx = arg; 7637 dns_rbtdb_t *rbtdb = loadctx->rbtdb; 7638 rbtdb_file_header_t *header; 7639 int fd; 7640 off_t filesize = 0; 7641 char *base; 7642 dns_rbt_t *tree = NULL, *nsec = NULL, *nsec3 = NULL; 7643 int protect, flags; 7644 dns_rbtnode_t *origin_node = NULL; 7645 7646 REQUIRE(VALID_RBTDB(rbtdb)); 7647 7648 /* 7649 * TODO CKB: since this is read-write (had to be to add nodes later) 7650 * we will need to lock the file or the nodes in it before modifying 7651 * the nodes in the file. 7652 */ 7653 7654 /* Map in the whole file in one go */ 7655 fd = fileno(f); 7656 isc_file_getsizefd(fd, &filesize); 7657 protect = PROT_READ | PROT_WRITE; 7658 flags = MAP_PRIVATE; 7659 #ifdef MAP_FILE 7660 flags |= MAP_FILE; 7661 #endif /* ifdef MAP_FILE */ 7662 7663 base = isc_file_mmap(NULL, filesize, protect, flags, fd, 0); 7664 if (base == NULL || base == MAP_FAILED) { 7665 return (ISC_R_FAILURE); 7666 } 7667 7668 header = (rbtdb_file_header_t *)(base + offset); 7669 if (!match_header_version(header)) { 7670 result = ISC_R_INVALIDFILE; 7671 goto cleanup; 7672 } 7673 7674 if (header->tree != 0) { 7675 result = dns_rbt_deserialize_tree( 7676 base, filesize, (off_t)header->tree, rbtdb->common.mctx, 7677 delete_callback, rbtdb, rbt_datafixer, rbtdb, NULL, 7678 &tree); 7679 if (result != ISC_R_SUCCESS) { 7680 goto cleanup; 7681 } 7682 7683 result = dns_rbt_findnode(tree, &rbtdb->common.origin, NULL, 7684 &origin_node, NULL, 7685 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 7686 if (result != ISC_R_SUCCESS) { 7687 goto cleanup; 7688 } 7689 } 7690 7691 if (header->nsec != 0) { 7692 result = dns_rbt_deserialize_tree( 7693 base, filesize, (off_t)header->nsec, rbtdb->common.mctx, 7694 delete_callback, rbtdb, rbt_datafixer, rbtdb, NULL, 7695 &nsec); 7696 if (result != ISC_R_SUCCESS) { 7697 goto cleanup; 7698 } 7699 } 7700 7701 if (header->nsec3 != 0) { 7702 result = dns_rbt_deserialize_tree( 7703 base, filesize, (off_t)header->nsec3, 7704 rbtdb->common.mctx, delete_callback, rbtdb, 7705 rbt_datafixer, rbtdb, NULL, &nsec3); 7706 if (result != ISC_R_SUCCESS) { 7707 goto cleanup; 7708 } 7709 } 7710 7711 /* 7712 * We have a successfully loaded all the rbt trees now update 7713 * rbtdb to use them. 7714 */ 7715 7716 rbtdb->mmap_location = base; 7717 rbtdb->mmap_size = (size_t)filesize; 7718 7719 if (tree != NULL) { 7720 dns_rbt_destroy(&rbtdb->tree); 7721 rbtdb->tree = tree; 7722 rbtdb->origin_node = origin_node; 7723 } 7724 7725 if (nsec != NULL) { 7726 dns_rbt_destroy(&rbtdb->nsec); 7727 rbtdb->nsec = nsec; 7728 } 7729 7730 if (nsec3 != NULL) { 7731 dns_rbt_destroy(&rbtdb->nsec3); 7732 rbtdb->nsec3 = nsec3; 7733 } 7734 7735 return (ISC_R_SUCCESS); 7736 7737 cleanup: 7738 if (tree != NULL) { 7739 dns_rbt_destroy(&tree); 7740 } 7741 if (nsec != NULL) { 7742 dns_rbt_destroy(&nsec); 7743 } 7744 if (nsec3 != NULL) { 7745 dns_rbt_destroy(&nsec3); 7746 } 7747 isc_file_munmap(base, (size_t)filesize); 7748 return (result); 7749 } 7750 7751 static isc_result_t 7752 beginload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) { 7753 rbtdb_load_t *loadctx; 7754 dns_rbtdb_t *rbtdb; 7755 rbtdb = (dns_rbtdb_t *)db; 7756 7757 REQUIRE(DNS_CALLBACK_VALID(callbacks)); 7758 REQUIRE(VALID_RBTDB(rbtdb)); 7759 7760 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx)); 7761 7762 loadctx->rbtdb = rbtdb; 7763 if (IS_CACHE(rbtdb)) { 7764 isc_stdtime_get(&loadctx->now); 7765 } else { 7766 loadctx->now = 0; 7767 } 7768 7769 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 7770 7771 REQUIRE((rbtdb->attributes & 7772 (RBTDB_ATTR_LOADED | RBTDB_ATTR_LOADING)) == 0); 7773 rbtdb->attributes |= RBTDB_ATTR_LOADING; 7774 7775 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 7776 7777 callbacks->add = loading_addrdataset; 7778 callbacks->add_private = loadctx; 7779 callbacks->deserialize = deserialize; 7780 callbacks->deserialize_private = loadctx; 7781 7782 return (ISC_R_SUCCESS); 7783 } 7784 7785 static isc_result_t 7786 endload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) { 7787 rbtdb_load_t *loadctx; 7788 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 7789 7790 REQUIRE(VALID_RBTDB(rbtdb)); 7791 REQUIRE(DNS_CALLBACK_VALID(callbacks)); 7792 loadctx = callbacks->add_private; 7793 REQUIRE(loadctx != NULL); 7794 REQUIRE(loadctx->rbtdb == rbtdb); 7795 7796 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 7797 7798 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0); 7799 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0); 7800 7801 rbtdb->attributes &= ~RBTDB_ATTR_LOADING; 7802 rbtdb->attributes |= RBTDB_ATTR_LOADED; 7803 7804 /* 7805 * If there's a KEY rdataset at the zone origin containing a 7806 * zone key, we consider the zone secure. 7807 */ 7808 if (!IS_CACHE(rbtdb) && rbtdb->origin_node != NULL) { 7809 dns_dbversion_t *version = rbtdb->current_version; 7810 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 7811 iszonesecure(db, version, rbtdb->origin_node); 7812 } else { 7813 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 7814 } 7815 7816 callbacks->add = NULL; 7817 callbacks->add_private = NULL; 7818 callbacks->deserialize = NULL; 7819 callbacks->deserialize_private = NULL; 7820 7821 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx)); 7822 7823 return (ISC_R_SUCCESS); 7824 } 7825 7826 /* 7827 * helper function to handle writing out the rdataset data pointed to 7828 * by the void *data pointer in the dns_rbtnode 7829 */ 7830 static isc_result_t 7831 rbt_datawriter(FILE *rbtfile, unsigned char *data, void *arg, uint64_t *crc) { 7832 rbtdb_version_t *version = (rbtdb_version_t *)arg; 7833 rbtdb_serial_t serial; 7834 rdatasetheader_t newheader; 7835 rdatasetheader_t *header = (rdatasetheader_t *)data, *next; 7836 off_t where; 7837 size_t cooked, size; 7838 unsigned char *p; 7839 isc_result_t result = ISC_R_SUCCESS; 7840 char pad[sizeof(char *)]; 7841 uintptr_t off; 7842 7843 REQUIRE(rbtfile != NULL); 7844 REQUIRE(data != NULL); 7845 REQUIRE(version != NULL); 7846 7847 serial = version->serial; 7848 7849 for (; header != NULL; header = next) { 7850 next = header->next; 7851 do { 7852 if (header->serial <= serial && !IGNORE(header)) { 7853 if (NONEXISTENT(header)) { 7854 header = NULL; 7855 } 7856 break; 7857 } else { 7858 header = header->down; 7859 } 7860 } while (header != NULL); 7861 7862 if (header == NULL) { 7863 continue; 7864 } 7865 7866 CHECK(isc_stdio_tell(rbtfile, &where)); 7867 size = dns_rdataslab_size((unsigned char *)header, 7868 sizeof(rdatasetheader_t)); 7869 7870 p = (unsigned char *)header; 7871 memmove(&newheader, p, sizeof(rdatasetheader_t)); 7872 newheader.down = NULL; 7873 newheader.next = NULL; 7874 off = where; 7875 if ((off_t)off != where) { 7876 return (ISC_R_RANGE); 7877 } 7878 newheader.node = (dns_rbtnode_t *)off; 7879 newheader.node_is_relative = 1; 7880 newheader.serial = 1; 7881 7882 /* 7883 * Round size up to the next pointer sized offset so it 7884 * will be properly aligned when read back in. 7885 */ 7886 cooked = dns_rbt_serialize_align(size); 7887 if (next != NULL) { 7888 newheader.next = (rdatasetheader_t *)(off + cooked); 7889 newheader.next_is_relative = 1; 7890 } 7891 7892 #ifdef DEBUG 7893 hexdump("writing header", (unsigned char *)&newheader, 7894 sizeof(rdatasetheader_t)); 7895 hexdump("writing slab", p + sizeof(rdatasetheader_t), 7896 size - sizeof(rdatasetheader_t)); 7897 #endif /* ifdef DEBUG */ 7898 isc_crc64_update(crc, (unsigned char *)&newheader, 7899 sizeof(rdatasetheader_t)); 7900 CHECK(isc_stdio_write(&newheader, sizeof(rdatasetheader_t), 1, 7901 rbtfile, NULL)); 7902 7903 isc_crc64_update(crc, p + sizeof(rdatasetheader_t), 7904 size - sizeof(rdatasetheader_t)); 7905 CHECK(isc_stdio_write(p + sizeof(rdatasetheader_t), 7906 size - sizeof(rdatasetheader_t), 1, 7907 rbtfile, NULL)); 7908 /* 7909 * Pad to force alignment. 7910 */ 7911 if (size != (size_t)cooked) { 7912 memset(pad, 0, sizeof(pad)); 7913 CHECK(isc_stdio_write(pad, cooked - size, 1, rbtfile, 7914 NULL)); 7915 } 7916 } 7917 7918 failure: 7919 return (result); 7920 } 7921 7922 /* 7923 * Write out a zeroed header as a placeholder. Doing this ensures 7924 * that the file will not read while it is partially written, should 7925 * writing fail or be interrupted. 7926 */ 7927 static isc_result_t 7928 rbtdb_zero_header(FILE *rbtfile) { 7929 char buffer[RBTDB_HEADER_LENGTH]; 7930 isc_result_t result; 7931 7932 memset(buffer, 0, RBTDB_HEADER_LENGTH); 7933 result = isc_stdio_write(buffer, 1, RBTDB_HEADER_LENGTH, rbtfile, NULL); 7934 fflush(rbtfile); 7935 7936 return (result); 7937 } 7938 7939 static isc_once_t once = ISC_ONCE_INIT; 7940 7941 static void 7942 init_file_version(void) { 7943 int n; 7944 7945 memset(FILE_VERSION, 0, sizeof(FILE_VERSION)); 7946 n = snprintf(FILE_VERSION, sizeof(FILE_VERSION), "RBTDB Image %s %s", 7947 dns_major, dns_mapapi); 7948 INSIST(n > 0 && (unsigned int)n < sizeof(FILE_VERSION)); 7949 } 7950 7951 /* 7952 * Write the file header out, recording the locations of the three 7953 * RBT's used in the rbtdb: tree, nsec, and nsec3, and including NodeDump 7954 * version information and any information stored in the rbtdb object 7955 * itself that should be stored here. 7956 */ 7957 static isc_result_t 7958 rbtdb_write_header(FILE *rbtfile, off_t tree_location, off_t nsec_location, 7959 off_t nsec3_location) { 7960 rbtdb_file_header_t header; 7961 isc_result_t result; 7962 7963 RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS); 7964 7965 memset(&header, 0, sizeof(rbtdb_file_header_t)); 7966 memmove(header.version1, FILE_VERSION, sizeof(header.version1)); 7967 memmove(header.version2, FILE_VERSION, sizeof(header.version2)); 7968 header.ptrsize = (uint32_t)sizeof(void *); 7969 header.bigendian = (1 == htonl(1)) ? 1 : 0; 7970 header.tree = (uint64_t)tree_location; 7971 header.nsec = (uint64_t)nsec_location; 7972 header.nsec3 = (uint64_t)nsec3_location; 7973 result = isc_stdio_write(&header, 1, sizeof(rbtdb_file_header_t), 7974 rbtfile, NULL); 7975 fflush(rbtfile); 7976 7977 return (result); 7978 } 7979 7980 static bool 7981 match_header_version(rbtdb_file_header_t *header) { 7982 RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS); 7983 7984 if (memcmp(header->version1, FILE_VERSION, sizeof(header->version1)) != 7985 0 || 7986 memcmp(header->version2, FILE_VERSION, sizeof(header->version1)) != 7987 0) 7988 { 7989 return (false); 7990 } 7991 7992 return (true); 7993 } 7994 7995 static isc_result_t 7996 serialize(dns_db_t *db, dns_dbversion_t *ver, FILE *rbtfile) { 7997 rbtdb_version_t *version = (rbtdb_version_t *)ver; 7998 dns_rbtdb_t *rbtdb; 7999 isc_result_t result; 8000 off_t tree_location, nsec_location, nsec3_location, header_location; 8001 8002 rbtdb = (dns_rbtdb_t *)db; 8003 8004 REQUIRE(VALID_RBTDB(rbtdb)); 8005 REQUIRE(rbtfile != NULL); 8006 8007 /* Ensure we're writing to a plain file */ 8008 CHECK(isc_file_isplainfilefd(fileno(rbtfile))); 8009 8010 /* 8011 * first, write out a zeroed header to store rbtdb information 8012 * 8013 * then for each of the three trees, store the current position 8014 * in the file and call dns_rbt_serialize_tree 8015 * 8016 * finally, write out the rbtdb header, storing the locations of the 8017 * rbtheaders 8018 * 8019 * NOTE: need to do something better with the return codes, &= will 8020 * not work. 8021 */ 8022 CHECK(isc_stdio_tell(rbtfile, &header_location)); 8023 CHECK(rbtdb_zero_header(rbtfile)); 8024 CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->tree, rbt_datawriter, 8025 version, &tree_location)); 8026 CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->nsec, rbt_datawriter, 8027 version, &nsec_location)); 8028 CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->nsec3, rbt_datawriter, 8029 version, &nsec3_location)); 8030 8031 CHECK(isc_stdio_seek(rbtfile, header_location, SEEK_SET)); 8032 CHECK(rbtdb_write_header(rbtfile, tree_location, nsec_location, 8033 nsec3_location)); 8034 failure: 8035 return (result); 8036 } 8037 8038 static isc_result_t 8039 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename, 8040 dns_masterformat_t masterformat) { 8041 dns_rbtdb_t *rbtdb; 8042 rbtdb_version_t *rbtversion = version; 8043 8044 rbtdb = (dns_rbtdb_t *)db; 8045 8046 REQUIRE(VALID_RBTDB(rbtdb)); 8047 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 8048 8049 return (dns_master_dump(rbtdb->common.mctx, db, version, 8050 &dns_master_style_default, filename, 8051 masterformat, NULL)); 8052 } 8053 8054 static void 8055 delete_callback(void *data, void *arg) { 8056 dns_rbtdb_t *rbtdb = arg; 8057 rdatasetheader_t *current, *next; 8058 unsigned int locknum; 8059 8060 current = data; 8061 locknum = current->node->locknum; 8062 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); 8063 while (current != NULL) { 8064 next = current->next; 8065 free_rdataset(rbtdb, rbtdb->common.mctx, current); 8066 current = next; 8067 } 8068 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); 8069 } 8070 8071 static bool 8072 issecure(dns_db_t *db) { 8073 dns_rbtdb_t *rbtdb; 8074 bool secure; 8075 8076 rbtdb = (dns_rbtdb_t *)db; 8077 8078 REQUIRE(VALID_RBTDB(rbtdb)); 8079 8080 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 8081 secure = (rbtdb->current_version->secure == dns_db_secure); 8082 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 8083 8084 return (secure); 8085 } 8086 8087 static bool 8088 isdnssec(dns_db_t *db) { 8089 dns_rbtdb_t *rbtdb; 8090 bool dnssec; 8091 8092 rbtdb = (dns_rbtdb_t *)db; 8093 8094 REQUIRE(VALID_RBTDB(rbtdb)); 8095 8096 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 8097 dnssec = (rbtdb->current_version->secure != dns_db_insecure); 8098 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 8099 8100 return (dnssec); 8101 } 8102 8103 static unsigned int 8104 nodecount(dns_db_t *db) { 8105 dns_rbtdb_t *rbtdb; 8106 unsigned int count; 8107 8108 rbtdb = (dns_rbtdb_t *)db; 8109 8110 REQUIRE(VALID_RBTDB(rbtdb)); 8111 8112 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 8113 count = dns_rbt_nodecount(rbtdb->tree); 8114 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 8115 8116 return (count); 8117 } 8118 8119 static size_t 8120 hashsize(dns_db_t *db) { 8121 dns_rbtdb_t *rbtdb; 8122 size_t size; 8123 8124 rbtdb = (dns_rbtdb_t *)db; 8125 8126 REQUIRE(VALID_RBTDB(rbtdb)); 8127 8128 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 8129 size = dns_rbt_hashsize(rbtdb->tree); 8130 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 8131 8132 return (size); 8133 } 8134 8135 static isc_result_t 8136 adjusthashsize(dns_db_t *db, size_t size) { 8137 isc_result_t result; 8138 dns_rbtdb_t *rbtdb; 8139 8140 rbtdb = (dns_rbtdb_t *)db; 8141 8142 REQUIRE(VALID_RBTDB(rbtdb)); 8143 8144 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 8145 result = dns_rbt_adjusthashsize(rbtdb->tree, size); 8146 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 8147 8148 return (result); 8149 } 8150 8151 static void 8152 settask(dns_db_t *db, isc_task_t *task) { 8153 dns_rbtdb_t *rbtdb; 8154 8155 rbtdb = (dns_rbtdb_t *)db; 8156 8157 REQUIRE(VALID_RBTDB(rbtdb)); 8158 8159 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write); 8160 if (rbtdb->task != NULL) { 8161 isc_task_detach(&rbtdb->task); 8162 } 8163 if (task != NULL) { 8164 isc_task_attach(task, &rbtdb->task); 8165 } 8166 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write); 8167 } 8168 8169 static bool 8170 ispersistent(dns_db_t *db) { 8171 UNUSED(db); 8172 return (false); 8173 } 8174 8175 static isc_result_t 8176 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) { 8177 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8178 dns_rbtnode_t *onode; 8179 isc_result_t result = ISC_R_SUCCESS; 8180 8181 REQUIRE(VALID_RBTDB(rbtdb)); 8182 REQUIRE(nodep != NULL && *nodep == NULL); 8183 8184 /* Note that the access to origin_node doesn't require a DB lock */ 8185 onode = (dns_rbtnode_t *)rbtdb->origin_node; 8186 if (onode != NULL) { 8187 new_reference(rbtdb, onode, isc_rwlocktype_none); 8188 *nodep = rbtdb->origin_node; 8189 } else { 8190 INSIST(IS_CACHE(rbtdb)); 8191 result = ISC_R_NOTFOUND; 8192 } 8193 8194 return (result); 8195 } 8196 8197 static isc_result_t 8198 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash, 8199 uint8_t *flags, uint16_t *iterations, unsigned char *salt, 8200 size_t *salt_length) { 8201 dns_rbtdb_t *rbtdb; 8202 isc_result_t result = ISC_R_NOTFOUND; 8203 rbtdb_version_t *rbtversion = version; 8204 8205 rbtdb = (dns_rbtdb_t *)db; 8206 8207 REQUIRE(VALID_RBTDB(rbtdb)); 8208 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 8209 8210 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 8211 if (rbtversion == NULL) { 8212 rbtversion = rbtdb->current_version; 8213 } 8214 8215 if (rbtversion->havensec3) { 8216 if (hash != NULL) { 8217 *hash = rbtversion->hash; 8218 } 8219 if (salt != NULL && salt_length != NULL) { 8220 REQUIRE(*salt_length >= rbtversion->salt_length); 8221 memmove(salt, rbtversion->salt, 8222 rbtversion->salt_length); 8223 } 8224 if (salt_length != NULL) { 8225 *salt_length = rbtversion->salt_length; 8226 } 8227 if (iterations != NULL) { 8228 *iterations = rbtversion->iterations; 8229 } 8230 if (flags != NULL) { 8231 *flags = rbtversion->flags; 8232 } 8233 result = ISC_R_SUCCESS; 8234 } 8235 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 8236 8237 return (result); 8238 } 8239 8240 static isc_result_t 8241 getsize(dns_db_t *db, dns_dbversion_t *version, uint64_t *records, 8242 uint64_t *xfrsize) { 8243 dns_rbtdb_t *rbtdb; 8244 isc_result_t result = ISC_R_SUCCESS; 8245 rbtdb_version_t *rbtversion = version; 8246 8247 rbtdb = (dns_rbtdb_t *)db; 8248 8249 REQUIRE(VALID_RBTDB(rbtdb)); 8250 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); 8251 8252 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read); 8253 if (rbtversion == NULL) { 8254 rbtversion = rbtdb->current_version; 8255 } 8256 8257 RWLOCK(&rbtversion->rwlock, isc_rwlocktype_read); 8258 if (records != NULL) { 8259 *records = rbtversion->records; 8260 } 8261 8262 if (xfrsize != NULL) { 8263 *xfrsize = rbtversion->xfrsize; 8264 } 8265 RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_read); 8266 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read); 8267 8268 return (result); 8269 } 8270 8271 static isc_result_t 8272 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) { 8273 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8274 rdatasetheader_t *header, oldheader; 8275 8276 REQUIRE(VALID_RBTDB(rbtdb)); 8277 REQUIRE(!IS_CACHE(rbtdb)); 8278 REQUIRE(rdataset != NULL); 8279 8280 header = rdataset->private3; 8281 header--; 8282 8283 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock, 8284 isc_rwlocktype_write); 8285 8286 oldheader = *header; 8287 /* 8288 * Only break the heap invariant (by adjusting resign and resign_lsb) 8289 * if we are going to be restoring it by calling isc_heap_increased 8290 * or isc_heap_decreased. 8291 */ 8292 if (resign != 0) { 8293 header->resign = (isc_stdtime_t)(dns_time64_from32(resign) >> 8294 1); 8295 header->resign_lsb = resign & 0x1; 8296 } 8297 if (header->heap_index != 0) { 8298 INSIST(RESIGN(header)); 8299 if (resign == 0) { 8300 isc_heap_delete(rbtdb->heaps[header->node->locknum], 8301 header->heap_index); 8302 header->heap_index = 0; 8303 } else if (resign_sooner(header, &oldheader)) { 8304 isc_heap_increased(rbtdb->heaps[header->node->locknum], 8305 header->heap_index); 8306 } else if (resign_sooner(&oldheader, header)) { 8307 isc_heap_decreased(rbtdb->heaps[header->node->locknum], 8308 header->heap_index); 8309 } 8310 } else if (resign != 0) { 8311 RDATASET_ATTR_SET(header, RDATASET_ATTR_RESIGN); 8312 resign_insert(rbtdb, header->node->locknum, header); 8313 } 8314 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock, 8315 isc_rwlocktype_write); 8316 return (ISC_R_SUCCESS); 8317 } 8318 8319 static isc_result_t 8320 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, dns_name_t *foundname) { 8321 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8322 rdatasetheader_t *header = NULL, *this; 8323 unsigned int i; 8324 isc_result_t result = ISC_R_NOTFOUND; 8325 unsigned int locknum = 0; 8326 8327 REQUIRE(VALID_RBTDB(rbtdb)); 8328 8329 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 8330 8331 for (i = 0; i < rbtdb->node_lock_count; i++) { 8332 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read); 8333 8334 /* 8335 * Find for the earliest signing time among all of the 8336 * heaps, each of which is covered by a different bucket 8337 * lock. 8338 */ 8339 this = isc_heap_element(rbtdb->heaps[i], 1); 8340 if (this == NULL) { 8341 /* Nothing found; unlock and try the next heap. */ 8342 NODE_UNLOCK(&rbtdb->node_locks[i].lock, 8343 isc_rwlocktype_read); 8344 continue; 8345 } 8346 8347 if (header == NULL) { 8348 /* 8349 * Found a signing time: retain the bucket lock and 8350 * preserve the lock number so we can unlock it 8351 * later. 8352 */ 8353 header = this; 8354 locknum = i; 8355 } else if (resign_sooner(this, header)) { 8356 /* 8357 * Found an earlier signing time; release the 8358 * previous bucket lock and retain this one instead. 8359 */ 8360 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, 8361 isc_rwlocktype_read); 8362 header = this; 8363 locknum = i; 8364 } else { 8365 /* 8366 * Earliest signing time in this heap isn't 8367 * an improvement; unlock and try the next heap. 8368 */ 8369 NODE_UNLOCK(&rbtdb->node_locks[i].lock, 8370 isc_rwlocktype_read); 8371 } 8372 } 8373 8374 if (header != NULL) { 8375 /* 8376 * Found something; pass back the answer and unlock 8377 * the bucket. 8378 */ 8379 bind_rdataset(rbtdb, header->node, header, 0, 8380 isc_rwlocktype_read, rdataset); 8381 8382 if (foundname != NULL) { 8383 dns_rbt_fullnamefromnode(header->node, foundname); 8384 } 8385 8386 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, 8387 isc_rwlocktype_read); 8388 8389 result = ISC_R_SUCCESS; 8390 } 8391 8392 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 8393 8394 return (result); 8395 } 8396 8397 static void 8398 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version) { 8399 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version; 8400 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8401 dns_rbtnode_t *node; 8402 rdatasetheader_t *header; 8403 8404 REQUIRE(VALID_RBTDB(rbtdb)); 8405 REQUIRE(rdataset != NULL); 8406 REQUIRE(rdataset->methods == &rdataset_methods); 8407 REQUIRE(rbtdb->future_version == rbtversion); 8408 REQUIRE(rbtversion != NULL); 8409 REQUIRE(rbtversion->writer); 8410 REQUIRE(rbtversion->rbtdb == rbtdb); 8411 8412 node = rdataset->private2; 8413 INSIST(node != NULL); 8414 header = rdataset->private3; 8415 INSIST(header != NULL); 8416 header--; 8417 8418 if (header->heap_index == 0) { 8419 return; 8420 } 8421 8422 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 8423 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock, isc_rwlocktype_write); 8424 /* 8425 * Delete from heap and save to re-signed list so that it can 8426 * be restored if we backout of this change. 8427 */ 8428 resign_delete(rbtdb, rbtversion, header); 8429 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock, 8430 isc_rwlocktype_write); 8431 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 8432 } 8433 8434 static isc_result_t 8435 setcachestats(dns_db_t *db, isc_stats_t *stats) { 8436 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8437 8438 REQUIRE(VALID_RBTDB(rbtdb)); 8439 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ 8440 REQUIRE(stats != NULL); 8441 8442 isc_stats_attach(stats, &rbtdb->cachestats); 8443 return (ISC_R_SUCCESS); 8444 } 8445 8446 static isc_result_t 8447 setgluecachestats(dns_db_t *db, isc_stats_t *stats) { 8448 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8449 8450 REQUIRE(VALID_RBTDB(rbtdb)); 8451 REQUIRE(!IS_CACHE(rbtdb) && !IS_STUB(rbtdb)); 8452 REQUIRE(stats != NULL); 8453 8454 isc_stats_attach(stats, &rbtdb->gluecachestats); 8455 return (ISC_R_SUCCESS); 8456 } 8457 8458 static dns_stats_t * 8459 getrrsetstats(dns_db_t *db) { 8460 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8461 8462 REQUIRE(VALID_RBTDB(rbtdb)); 8463 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ 8464 8465 return (rbtdb->rrsetstats); 8466 } 8467 8468 static isc_result_t 8469 nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) { 8470 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8471 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 8472 isc_result_t result; 8473 8474 REQUIRE(VALID_RBTDB(rbtdb)); 8475 REQUIRE(node != NULL); 8476 REQUIRE(name != NULL); 8477 8478 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 8479 result = dns_rbt_fullnamefromnode(rbtnode, name); 8480 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 8481 8482 return (result); 8483 } 8484 8485 static isc_result_t 8486 setservestalettl(dns_db_t *db, dns_ttl_t ttl) { 8487 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8488 8489 REQUIRE(VALID_RBTDB(rbtdb)); 8490 REQUIRE(IS_CACHE(rbtdb)); 8491 8492 /* currently no bounds checking. 0 means disable. */ 8493 rbtdb->serve_stale_ttl = ttl; 8494 return (ISC_R_SUCCESS); 8495 } 8496 8497 static isc_result_t 8498 getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { 8499 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8500 8501 REQUIRE(VALID_RBTDB(rbtdb)); 8502 REQUIRE(IS_CACHE(rbtdb)); 8503 8504 *ttl = rbtdb->serve_stale_ttl; 8505 return (ISC_R_SUCCESS); 8506 } 8507 8508 static isc_result_t 8509 setservestalerefresh(dns_db_t *db, uint32_t interval) { 8510 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8511 8512 REQUIRE(VALID_RBTDB(rbtdb)); 8513 REQUIRE(IS_CACHE(rbtdb)); 8514 8515 /* currently no bounds checking. 0 means disable. */ 8516 rbtdb->serve_stale_refresh = interval; 8517 return (ISC_R_SUCCESS); 8518 } 8519 8520 static isc_result_t 8521 getservestalerefresh(dns_db_t *db, uint32_t *interval) { 8522 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 8523 8524 REQUIRE(VALID_RBTDB(rbtdb)); 8525 REQUIRE(IS_CACHE(rbtdb)); 8526 8527 *interval = rbtdb->serve_stale_refresh; 8528 return (ISC_R_SUCCESS); 8529 } 8530 8531 static dns_dbmethods_t zone_methods = { attach, 8532 detach, 8533 beginload, 8534 endload, 8535 serialize, 8536 dump, 8537 currentversion, 8538 newversion, 8539 attachversion, 8540 closeversion, 8541 findnode, 8542 zone_find, 8543 zone_findzonecut, 8544 attachnode, 8545 detachnode, 8546 expirenode, 8547 printnode, 8548 createiterator, 8549 zone_findrdataset, 8550 allrdatasets, 8551 addrdataset, 8552 subtractrdataset, 8553 deleterdataset, 8554 issecure, 8555 nodecount, 8556 ispersistent, 8557 overmem, 8558 settask, 8559 getoriginnode, 8560 NULL, /* transfernode */ 8561 getnsec3parameters, 8562 findnsec3node, 8563 setsigningtime, 8564 getsigningtime, 8565 resigned, 8566 isdnssec, 8567 NULL, /* getrrsetstats */ 8568 NULL, /* rpz_attach */ 8569 NULL, /* rpz_ready */ 8570 NULL, /* findnodeext */ 8571 NULL, /* findext */ 8572 NULL, /* setcachestats */ 8573 hashsize, 8574 nodefullname, 8575 getsize, 8576 NULL, /* setservestalettl */ 8577 NULL, /* getservestalettl */ 8578 NULL, /* setservestalerefresh */ 8579 NULL, /* getservestalerefresh */ 8580 setgluecachestats, 8581 adjusthashsize }; 8582 8583 static dns_dbmethods_t cache_methods = { attach, 8584 detach, 8585 beginload, 8586 endload, 8587 NULL, /* serialize */ 8588 dump, 8589 currentversion, 8590 newversion, 8591 attachversion, 8592 closeversion, 8593 findnode, 8594 cache_find, 8595 cache_findzonecut, 8596 attachnode, 8597 detachnode, 8598 expirenode, 8599 printnode, 8600 createiterator, 8601 cache_findrdataset, 8602 allrdatasets, 8603 addrdataset, 8604 subtractrdataset, 8605 deleterdataset, 8606 issecure, 8607 nodecount, 8608 ispersistent, 8609 overmem, 8610 settask, 8611 getoriginnode, 8612 NULL, /* transfernode */ 8613 NULL, /* getnsec3parameters */ 8614 NULL, /* findnsec3node */ 8615 NULL, /* setsigningtime */ 8616 NULL, /* getsigningtime */ 8617 NULL, /* resigned */ 8618 isdnssec, 8619 getrrsetstats, 8620 NULL, /* rpz_attach */ 8621 NULL, /* rpz_ready */ 8622 NULL, /* findnodeext */ 8623 NULL, /* findext */ 8624 setcachestats, 8625 hashsize, 8626 nodefullname, 8627 NULL, /* getsize */ 8628 setservestalettl, 8629 getservestalettl, 8630 setservestalerefresh, 8631 getservestalerefresh, 8632 NULL, 8633 adjusthashsize }; 8634 8635 isc_result_t 8636 dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type, 8637 dns_rdataclass_t rdclass, unsigned int argc, char *argv[], 8638 void *driverarg, dns_db_t **dbp) { 8639 dns_rbtdb_t *rbtdb; 8640 isc_result_t result; 8641 int i; 8642 dns_name_t name; 8643 bool (*sooner)(void *, void *); 8644 isc_mem_t *hmctx = mctx; 8645 8646 /* Keep the compiler happy. */ 8647 UNUSED(driverarg); 8648 8649 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb)); 8650 8651 /* 8652 * If argv[0] exists, it points to a memory context to use for heap 8653 */ 8654 if (argc != 0) { 8655 hmctx = (isc_mem_t *)argv[0]; 8656 } 8657 8658 memset(rbtdb, '\0', sizeof(*rbtdb)); 8659 dns_name_init(&rbtdb->common.origin, NULL); 8660 rbtdb->common.attributes = 0; 8661 if (type == dns_dbtype_cache) { 8662 rbtdb->common.methods = &cache_methods; 8663 rbtdb->common.attributes |= DNS_DBATTR_CACHE; 8664 } else if (type == dns_dbtype_stub) { 8665 rbtdb->common.methods = &zone_methods; 8666 rbtdb->common.attributes |= DNS_DBATTR_STUB; 8667 } else { 8668 rbtdb->common.methods = &zone_methods; 8669 } 8670 rbtdb->common.rdclass = rdclass; 8671 rbtdb->common.mctx = NULL; 8672 8673 ISC_LIST_INIT(rbtdb->common.update_listeners); 8674 8675 RBTDB_INITLOCK(&rbtdb->lock); 8676 8677 isc_rwlock_init(&rbtdb->tree_lock, 0, 0); 8678 8679 /* 8680 * Initialize node_lock_count in a generic way to support future 8681 * extension which allows the user to specify this value on creation. 8682 * Note that when specified for a cache DB it must be larger than 1 8683 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT. 8684 */ 8685 if (rbtdb->node_lock_count == 0) { 8686 if (IS_CACHE(rbtdb)) { 8687 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT; 8688 } else { 8689 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT; 8690 } 8691 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) { 8692 result = ISC_R_RANGE; 8693 goto cleanup_tree_lock; 8694 } 8695 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH)); 8696 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count * 8697 sizeof(rbtdb_nodelock_t)); 8698 8699 rbtdb->cachestats = NULL; 8700 rbtdb->gluecachestats = NULL; 8701 8702 rbtdb->rrsetstats = NULL; 8703 if (IS_CACHE(rbtdb)) { 8704 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats); 8705 if (result != ISC_R_SUCCESS) { 8706 goto cleanup_node_locks; 8707 } 8708 rbtdb->rdatasets = isc_mem_get( 8709 mctx, 8710 rbtdb->node_lock_count * sizeof(rdatasetheaderlist_t)); 8711 for (i = 0; i < (int)rbtdb->node_lock_count; i++) { 8712 ISC_LIST_INIT(rbtdb->rdatasets[i]); 8713 } 8714 } else { 8715 rbtdb->rdatasets = NULL; 8716 } 8717 8718 /* 8719 * Create the heaps. 8720 */ 8721 rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count * 8722 sizeof(isc_heap_t *)); 8723 for (i = 0; i < (int)rbtdb->node_lock_count; i++) { 8724 rbtdb->heaps[i] = NULL; 8725 } 8726 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner; 8727 for (i = 0; i < (int)rbtdb->node_lock_count; i++) { 8728 isc_heap_create(hmctx, sooner, set_index, 0, &rbtdb->heaps[i]); 8729 } 8730 8731 /* 8732 * Create deadnode lists. 8733 */ 8734 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count * 8735 sizeof(rbtnodelist_t)); 8736 for (i = 0; i < (int)rbtdb->node_lock_count; i++) { 8737 ISC_LIST_INIT(rbtdb->deadnodes[i]); 8738 } 8739 8740 rbtdb->active = rbtdb->node_lock_count; 8741 8742 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) { 8743 NODE_INITLOCK(&rbtdb->node_locks[i].lock); 8744 isc_refcount_init(&rbtdb->node_locks[i].references, 0); 8745 rbtdb->node_locks[i].exiting = false; 8746 } 8747 8748 /* 8749 * Attach to the mctx. The database will persist so long as there 8750 * are references to it, and attaching to the mctx ensures that our 8751 * mctx won't disappear out from under us. 8752 */ 8753 isc_mem_attach(mctx, &rbtdb->common.mctx); 8754 isc_mem_attach(hmctx, &rbtdb->hmctx); 8755 8756 /* 8757 * Make a copy of the origin name. 8758 */ 8759 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin); 8760 if (result != ISC_R_SUCCESS) { 8761 free_rbtdb(rbtdb, false, NULL); 8762 return (result); 8763 } 8764 8765 /* 8766 * Make the Red-Black Trees. 8767 */ 8768 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree); 8769 if (result != ISC_R_SUCCESS) { 8770 free_rbtdb(rbtdb, false, NULL); 8771 return (result); 8772 } 8773 8774 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec); 8775 if (result != ISC_R_SUCCESS) { 8776 free_rbtdb(rbtdb, false, NULL); 8777 return (result); 8778 } 8779 8780 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3); 8781 if (result != ISC_R_SUCCESS) { 8782 free_rbtdb(rbtdb, false, NULL); 8783 return (result); 8784 } 8785 8786 /* 8787 * In order to set the node callback bit correctly in zone databases, 8788 * we need to know if the node has the origin name of the zone. 8789 * In loading_addrdataset() we could simply compare the new name 8790 * to the origin name, but this is expensive. Also, we don't know the 8791 * node name in addrdataset(), so we need another way of knowing the 8792 * zone's top. 8793 * 8794 * We now explicitly create a node for the zone's origin, and then 8795 * we simply remember the node's address. This is safe, because 8796 * the top-of-zone node can never be deleted, nor can its address 8797 * change. 8798 */ 8799 if (!IS_CACHE(rbtdb)) { 8800 rbtdb->origin_node = NULL; 8801 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin, 8802 &rbtdb->origin_node); 8803 if (result != ISC_R_SUCCESS) { 8804 INSIST(result != ISC_R_EXISTS); 8805 free_rbtdb(rbtdb, false, NULL); 8806 return (result); 8807 } 8808 INSIST(rbtdb->origin_node != NULL); 8809 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL; 8810 /* 8811 * We need to give the origin node the right locknum. 8812 */ 8813 dns_name_init(&name, NULL); 8814 dns_rbt_namefromnode(rbtdb->origin_node, &name); 8815 rbtdb->origin_node->locknum = rbtdb->origin_node->hashval % 8816 rbtdb->node_lock_count; 8817 /* 8818 * Add an apex node to the NSEC3 tree so that NSEC3 searches 8819 * return partial matches when there is only a single NSEC3 8820 * record in the tree. 8821 */ 8822 rbtdb->nsec3_origin_node = NULL; 8823 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin, 8824 &rbtdb->nsec3_origin_node); 8825 if (result != ISC_R_SUCCESS) { 8826 INSIST(result != ISC_R_EXISTS); 8827 free_rbtdb(rbtdb, false, NULL); 8828 return (result); 8829 } 8830 rbtdb->nsec3_origin_node->nsec = DNS_RBT_NSEC_NSEC3; 8831 /* 8832 * We need to give the nsec3 origin node the right locknum. 8833 */ 8834 dns_name_init(&name, NULL); 8835 dns_rbt_namefromnode(rbtdb->nsec3_origin_node, &name); 8836 rbtdb->nsec3_origin_node->locknum = 8837 rbtdb->nsec3_origin_node->hashval % 8838 rbtdb->node_lock_count; 8839 } 8840 8841 /* 8842 * Misc. Initialization. 8843 */ 8844 isc_refcount_init(&rbtdb->references, 1); 8845 rbtdb->attributes = 0; 8846 rbtdb->task = NULL; 8847 rbtdb->serve_stale_ttl = 0; 8848 8849 /* 8850 * Version Initialization. 8851 */ 8852 rbtdb->current_serial = 1; 8853 rbtdb->least_serial = 1; 8854 rbtdb->next_serial = 2; 8855 rbtdb->current_version = allocate_version(mctx, 1, 1, false); 8856 rbtdb->current_version->rbtdb = rbtdb; 8857 rbtdb->current_version->secure = dns_db_insecure; 8858 rbtdb->current_version->havensec3 = false; 8859 rbtdb->current_version->flags = 0; 8860 rbtdb->current_version->iterations = 0; 8861 rbtdb->current_version->hash = 0; 8862 rbtdb->current_version->salt_length = 0; 8863 memset(rbtdb->current_version->salt, 0, 8864 sizeof(rbtdb->current_version->salt)); 8865 isc_rwlock_init(&rbtdb->current_version->rwlock, 0, 0); 8866 rbtdb->current_version->records = 0; 8867 rbtdb->current_version->xfrsize = 0; 8868 rbtdb->future_version = NULL; 8869 ISC_LIST_INIT(rbtdb->open_versions); 8870 /* 8871 * Keep the current version in the open list so that list operation 8872 * won't happen in normal lookup operations. 8873 */ 8874 PREPEND(rbtdb->open_versions, rbtdb->current_version, link); 8875 8876 rbtdb->common.magic = DNS_DB_MAGIC; 8877 rbtdb->common.impmagic = RBTDB_MAGIC; 8878 8879 *dbp = (dns_db_t *)rbtdb; 8880 8881 return (ISC_R_SUCCESS); 8882 8883 cleanup_node_locks: 8884 isc_mem_put(mctx, rbtdb->node_locks, 8885 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t)); 8886 8887 cleanup_tree_lock: 8888 isc_rwlock_destroy(&rbtdb->tree_lock); 8889 RBTDB_DESTROYLOCK(&rbtdb->lock); 8890 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb)); 8891 return (result); 8892 } 8893 8894 /* 8895 * Slabbed Rdataset Methods 8896 */ 8897 8898 static void 8899 rdataset_disassociate(dns_rdataset_t *rdataset) { 8900 dns_db_t *db = rdataset->private1; 8901 dns_dbnode_t *node = rdataset->private2; 8902 8903 detachnode(db, &node); 8904 } 8905 8906 static isc_result_t 8907 rdataset_first(dns_rdataset_t *rdataset) { 8908 unsigned char *raw = rdataset->private3; /* RDATASLAB */ 8909 unsigned int count; 8910 8911 count = raw[0] * 256 + raw[1]; 8912 if (count == 0) { 8913 rdataset->private5 = NULL; 8914 return (ISC_R_NOMORE); 8915 } 8916 8917 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) { 8918 raw += DNS_RDATASET_COUNT; 8919 } 8920 8921 raw += DNS_RDATASET_LENGTH; 8922 8923 /* 8924 * The privateuint4 field is the number of rdata beyond the 8925 * cursor position, so we decrement the total count by one 8926 * before storing it. 8927 * 8928 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the 8929 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points 8930 * to the first entry in the offset table. 8931 */ 8932 count--; 8933 rdataset->privateuint4 = count; 8934 rdataset->private5 = raw; 8935 8936 return (ISC_R_SUCCESS); 8937 } 8938 8939 static isc_result_t 8940 rdataset_next(dns_rdataset_t *rdataset) { 8941 unsigned int count; 8942 unsigned int length; 8943 unsigned char *raw; /* RDATASLAB */ 8944 8945 count = rdataset->privateuint4; 8946 if (count == 0) { 8947 return (ISC_R_NOMORE); 8948 } 8949 count--; 8950 rdataset->privateuint4 = count; 8951 8952 /* 8953 * Skip forward one record (length + 4) or one offset (4). 8954 */ 8955 raw = rdataset->private5; 8956 #if DNS_RDATASET_FIXED 8957 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) 8958 #endif /* DNS_RDATASET_FIXED */ 8959 { 8960 length = raw[0] * 256 + raw[1]; 8961 raw += length; 8962 } 8963 8964 rdataset->private5 = raw + DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; 8965 8966 return (ISC_R_SUCCESS); 8967 } 8968 8969 static void 8970 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) { 8971 unsigned char *raw = rdataset->private5; /* RDATASLAB */ 8972 unsigned int length; 8973 isc_region_t r; 8974 unsigned int flags = 0; 8975 8976 REQUIRE(raw != NULL); 8977 8978 /* 8979 * Find the start of the record if not already in private5 8980 * then skip the length and order fields. 8981 */ 8982 #if DNS_RDATASET_FIXED 8983 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) { 8984 unsigned int offset; 8985 offset = ((unsigned int)raw[0] << 24) + 8986 ((unsigned int)raw[1] << 16) + 8987 ((unsigned int)raw[2] << 8) + (unsigned int)raw[3]; 8988 raw = rdataset->private3; 8989 raw += offset; 8990 } 8991 #endif /* if DNS_RDATASET_FIXED */ 8992 8993 length = raw[0] * 256 + raw[1]; 8994 8995 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; 8996 8997 if (rdataset->type == dns_rdatatype_rrsig) { 8998 if (*raw & DNS_RDATASLAB_OFFLINE) { 8999 flags |= DNS_RDATA_OFFLINE; 9000 } 9001 length--; 9002 raw++; 9003 } 9004 r.length = length; 9005 r.base = raw; 9006 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r); 9007 rdata->flags |= flags; 9008 } 9009 9010 static void 9011 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) { 9012 dns_db_t *db = source->private1; 9013 dns_dbnode_t *node = source->private2; 9014 dns_dbnode_t *cloned_node = NULL; 9015 9016 attachnode(db, node, &cloned_node); 9017 INSIST(!ISC_LINK_LINKED(target, link)); 9018 *target = *source; 9019 ISC_LINK_INIT(target, link); 9020 9021 /* 9022 * Reset iterator state. 9023 */ 9024 target->privateuint4 = 0; 9025 target->private5 = NULL; 9026 } 9027 9028 static unsigned int 9029 rdataset_count(dns_rdataset_t *rdataset) { 9030 unsigned char *raw = rdataset->private3; /* RDATASLAB */ 9031 unsigned int count; 9032 9033 count = raw[0] * 256 + raw[1]; 9034 9035 return (count); 9036 } 9037 9038 static isc_result_t 9039 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name, 9040 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig) { 9041 dns_db_t *db = rdataset->private1; 9042 dns_dbnode_t *node = rdataset->private2; 9043 dns_dbnode_t *cloned_node; 9044 const struct noqname *noqname = rdataset->private6; 9045 9046 cloned_node = NULL; 9047 attachnode(db, node, &cloned_node); 9048 nsec->methods = &slab_methods; 9049 nsec->rdclass = db->rdclass; 9050 nsec->type = noqname->type; 9051 nsec->covers = 0; 9052 nsec->ttl = rdataset->ttl; 9053 nsec->trust = rdataset->trust; 9054 nsec->private1 = rdataset->private1; 9055 nsec->private2 = rdataset->private2; 9056 nsec->private3 = noqname->neg; 9057 nsec->privateuint4 = 0; 9058 nsec->private5 = NULL; 9059 nsec->private6 = NULL; 9060 nsec->private7 = NULL; 9061 9062 cloned_node = NULL; 9063 attachnode(db, node, &cloned_node); 9064 nsecsig->methods = &slab_methods; 9065 nsecsig->rdclass = db->rdclass; 9066 nsecsig->type = dns_rdatatype_rrsig; 9067 nsecsig->covers = noqname->type; 9068 nsecsig->ttl = rdataset->ttl; 9069 nsecsig->trust = rdataset->trust; 9070 nsecsig->private1 = rdataset->private1; 9071 nsecsig->private2 = rdataset->private2; 9072 nsecsig->private3 = noqname->negsig; 9073 nsecsig->privateuint4 = 0; 9074 nsecsig->private5 = NULL; 9075 nsec->private6 = NULL; 9076 nsec->private7 = NULL; 9077 9078 dns_name_clone(&noqname->name, name); 9079 9080 return (ISC_R_SUCCESS); 9081 } 9082 9083 static isc_result_t 9084 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name, 9085 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig) { 9086 dns_db_t *db = rdataset->private1; 9087 dns_dbnode_t *node = rdataset->private2; 9088 dns_dbnode_t *cloned_node; 9089 const struct noqname *closest = rdataset->private7; 9090 9091 cloned_node = NULL; 9092 attachnode(db, node, &cloned_node); 9093 nsec->methods = &slab_methods; 9094 nsec->rdclass = db->rdclass; 9095 nsec->type = closest->type; 9096 nsec->covers = 0; 9097 nsec->ttl = rdataset->ttl; 9098 nsec->trust = rdataset->trust; 9099 nsec->private1 = rdataset->private1; 9100 nsec->private2 = rdataset->private2; 9101 nsec->private3 = closest->neg; 9102 nsec->privateuint4 = 0; 9103 nsec->private5 = NULL; 9104 nsec->private6 = NULL; 9105 nsec->private7 = NULL; 9106 9107 cloned_node = NULL; 9108 attachnode(db, node, &cloned_node); 9109 nsecsig->methods = &slab_methods; 9110 nsecsig->rdclass = db->rdclass; 9111 nsecsig->type = dns_rdatatype_rrsig; 9112 nsecsig->covers = closest->type; 9113 nsecsig->ttl = rdataset->ttl; 9114 nsecsig->trust = rdataset->trust; 9115 nsecsig->private1 = rdataset->private1; 9116 nsecsig->private2 = rdataset->private2; 9117 nsecsig->private3 = closest->negsig; 9118 nsecsig->privateuint4 = 0; 9119 nsecsig->private5 = NULL; 9120 nsec->private6 = NULL; 9121 nsec->private7 = NULL; 9122 9123 dns_name_clone(&closest->name, name); 9124 9125 return (ISC_R_SUCCESS); 9126 } 9127 9128 static void 9129 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) { 9130 dns_rbtdb_t *rbtdb = rdataset->private1; 9131 dns_rbtnode_t *rbtnode = rdataset->private2; 9132 rdatasetheader_t *header = rdataset->private3; 9133 9134 header--; 9135 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9136 isc_rwlocktype_write); 9137 header->trust = rdataset->trust = trust; 9138 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9139 isc_rwlocktype_write); 9140 } 9141 9142 static void 9143 rdataset_expire(dns_rdataset_t *rdataset) { 9144 dns_rbtdb_t *rbtdb = rdataset->private1; 9145 dns_rbtnode_t *rbtnode = rdataset->private2; 9146 rdatasetheader_t *header = rdataset->private3; 9147 9148 header--; 9149 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9150 isc_rwlocktype_write); 9151 expire_header(rbtdb, header, false, expire_flush); 9152 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9153 isc_rwlocktype_write); 9154 } 9155 9156 static void 9157 rdataset_clearprefetch(dns_rdataset_t *rdataset) { 9158 dns_rbtdb_t *rbtdb = rdataset->private1; 9159 dns_rbtnode_t *rbtnode = rdataset->private2; 9160 rdatasetheader_t *header = rdataset->private3; 9161 9162 header--; 9163 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9164 isc_rwlocktype_write); 9165 RDATASET_ATTR_CLR(header, RDATASET_ATTR_PREFETCH); 9166 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9167 isc_rwlocktype_write); 9168 } 9169 9170 /* 9171 * Rdataset Iterator Methods 9172 */ 9173 9174 static void 9175 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) { 9176 rbtdb_rdatasetiter_t *rbtiterator; 9177 9178 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp); 9179 9180 if (rbtiterator->common.version != NULL) { 9181 closeversion(rbtiterator->common.db, 9182 &rbtiterator->common.version, false); 9183 } 9184 detachnode(rbtiterator->common.db, &rbtiterator->common.node); 9185 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator, 9186 sizeof(*rbtiterator)); 9187 9188 *iteratorp = NULL; 9189 } 9190 9191 static bool 9192 iterator_active(dns_rbtdb_t *rbtdb, rbtdb_rdatasetiter_t *rbtiterator, 9193 rdatasetheader_t *header) { 9194 dns_ttl_t stale_ttl = header->rdh_ttl + rbtdb->serve_stale_ttl; 9195 9196 /* 9197 * Is this a "this rdataset doesn't exist" record? 9198 */ 9199 if (NONEXISTENT(header)) { 9200 return (false); 9201 } 9202 9203 /* 9204 * If this is a zone or this header still active then return it. 9205 */ 9206 if (!IS_CACHE(rbtdb) || ACTIVE(header, rbtiterator->common.now)) { 9207 return (true); 9208 } 9209 9210 /* 9211 * If we are not returning stale records or the rdataset is 9212 * too old don't return it. 9213 */ 9214 if (!STALEOK(rbtiterator) || (rbtiterator->common.now > stale_ttl)) { 9215 return (false); 9216 } 9217 return (true); 9218 } 9219 9220 static isc_result_t 9221 rdatasetiter_first(dns_rdatasetiter_t *iterator) { 9222 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator; 9223 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db); 9224 dns_rbtnode_t *rbtnode = rbtiterator->common.node; 9225 rbtdb_version_t *rbtversion = rbtiterator->common.version; 9226 rdatasetheader_t *header, *top_next; 9227 rbtdb_serial_t serial = IS_CACHE(rbtdb) ? 1 : rbtversion->serial; 9228 9229 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9230 isc_rwlocktype_read); 9231 9232 for (header = rbtnode->data; header != NULL; header = top_next) { 9233 top_next = header->next; 9234 do { 9235 if (EXPIREDOK(rbtiterator)) { 9236 if (!NONEXISTENT(header)) { 9237 break; 9238 } 9239 header = header->down; 9240 } else if (header->serial <= serial && !IGNORE(header)) 9241 { 9242 if (!iterator_active(rbtdb, rbtiterator, 9243 header)) 9244 { 9245 header = NULL; 9246 } 9247 break; 9248 } else { 9249 header = header->down; 9250 } 9251 } while (header != NULL); 9252 if (header != NULL) { 9253 break; 9254 } 9255 } 9256 9257 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9258 isc_rwlocktype_read); 9259 9260 rbtiterator->current = header; 9261 9262 if (header == NULL) { 9263 return (ISC_R_NOMORE); 9264 } 9265 9266 return (ISC_R_SUCCESS); 9267 } 9268 9269 static isc_result_t 9270 rdatasetiter_next(dns_rdatasetiter_t *iterator) { 9271 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator; 9272 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db); 9273 dns_rbtnode_t *rbtnode = rbtiterator->common.node; 9274 rbtdb_version_t *rbtversion = rbtiterator->common.version; 9275 rdatasetheader_t *header, *top_next; 9276 rbtdb_serial_t serial = IS_CACHE(rbtdb) ? 1 : rbtversion->serial; 9277 rbtdb_rdatatype_t type, negtype; 9278 dns_rdatatype_t rdtype, covers; 9279 bool expiredok = EXPIREDOK(rbtiterator); 9280 9281 header = rbtiterator->current; 9282 if (header == NULL) { 9283 return (ISC_R_NOMORE); 9284 } 9285 9286 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9287 isc_rwlocktype_read); 9288 9289 type = header->type; 9290 rdtype = RBTDB_RDATATYPE_BASE(header->type); 9291 if (NEGATIVE(header)) { 9292 covers = RBTDB_RDATATYPE_EXT(header->type); 9293 negtype = RBTDB_RDATATYPE_VALUE(covers, 0); 9294 } else { 9295 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype); 9296 } 9297 9298 /* 9299 * Find the start of the header chain for the next type 9300 * by walking back up the list. 9301 */ 9302 top_next = header->next; 9303 while (top_next != NULL && 9304 (top_next->type == type || top_next->type == negtype)) 9305 { 9306 top_next = top_next->next; 9307 } 9308 if (expiredok) { 9309 /* 9310 * Keep walking down the list if possible or 9311 * start the next type. 9312 */ 9313 header = header->down != NULL ? header->down : top_next; 9314 } else { 9315 header = top_next; 9316 } 9317 for (; header != NULL; header = top_next) { 9318 top_next = header->next; 9319 do { 9320 if (expiredok) { 9321 if (!NONEXISTENT(header)) { 9322 break; 9323 } 9324 header = header->down; 9325 } else if (header->serial <= serial && !IGNORE(header)) 9326 { 9327 if (!iterator_active(rbtdb, rbtiterator, 9328 header)) 9329 { 9330 header = NULL; 9331 } 9332 break; 9333 } else { 9334 header = header->down; 9335 } 9336 } while (header != NULL); 9337 if (header != NULL) { 9338 break; 9339 } 9340 /* 9341 * Find the start of the header chain for the next type 9342 * by walking back up the list. 9343 */ 9344 while (top_next != NULL && 9345 (top_next->type == type || top_next->type == negtype)) 9346 { 9347 top_next = top_next->next; 9348 } 9349 } 9350 9351 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9352 isc_rwlocktype_read); 9353 9354 rbtiterator->current = header; 9355 9356 if (header == NULL) { 9357 return (ISC_R_NOMORE); 9358 } 9359 9360 return (ISC_R_SUCCESS); 9361 } 9362 9363 static void 9364 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) { 9365 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator; 9366 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db); 9367 dns_rbtnode_t *rbtnode = rbtiterator->common.node; 9368 rdatasetheader_t *header; 9369 9370 header = rbtiterator->current; 9371 REQUIRE(header != NULL); 9372 9373 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9374 isc_rwlocktype_read); 9375 9376 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now, 9377 isc_rwlocktype_read, rdataset); 9378 9379 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9380 isc_rwlocktype_read); 9381 } 9382 9383 /* 9384 * Database Iterator Methods 9385 */ 9386 9387 static void 9388 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) { 9389 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 9390 dns_rbtnode_t *node = rbtdbiter->node; 9391 9392 if (node == NULL) { 9393 return; 9394 } 9395 9396 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none); 9397 reactivate_node(rbtdb, node, rbtdbiter->tree_locked); 9398 } 9399 9400 static void 9401 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) { 9402 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 9403 dns_rbtnode_t *node = rbtdbiter->node; 9404 nodelock_t *lock; 9405 9406 if (node == NULL) { 9407 return; 9408 } 9409 9410 lock = &rbtdb->node_locks[node->locknum].lock; 9411 NODE_LOCK(lock, isc_rwlocktype_read); 9412 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read, 9413 rbtdbiter->tree_locked, false); 9414 NODE_UNLOCK(lock, isc_rwlocktype_read); 9415 9416 rbtdbiter->node = NULL; 9417 } 9418 9419 static void 9420 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) { 9421 dns_rbtnode_t *node; 9422 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 9423 bool was_read_locked = false; 9424 nodelock_t *lock; 9425 int i; 9426 9427 if (rbtdbiter->delcnt != 0) { 9428 /* 9429 * Note that "%d node of %d in tree" can report things like 9430 * "flush_deletions: 59 nodes of 41 in tree". This means 9431 * That some nodes appear on the deletions list more than 9432 * once. Only the last occurrence will actually be deleted. 9433 */ 9434 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 9435 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 9436 "flush_deletions: %d nodes of %d in tree", 9437 rbtdbiter->delcnt, 9438 dns_rbt_nodecount(rbtdb->tree)); 9439 9440 if (rbtdbiter->tree_locked == isc_rwlocktype_read) { 9441 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 9442 was_read_locked = true; 9443 } 9444 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 9445 rbtdbiter->tree_locked = isc_rwlocktype_write; 9446 9447 for (i = 0; i < rbtdbiter->delcnt; i++) { 9448 node = rbtdbiter->deletions[i]; 9449 lock = &rbtdb->node_locks[node->locknum].lock; 9450 9451 NODE_LOCK(lock, isc_rwlocktype_read); 9452 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read, 9453 rbtdbiter->tree_locked, false); 9454 NODE_UNLOCK(lock, isc_rwlocktype_read); 9455 } 9456 9457 rbtdbiter->delcnt = 0; 9458 9459 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); 9460 if (was_read_locked) { 9461 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 9462 rbtdbiter->tree_locked = isc_rwlocktype_read; 9463 } else { 9464 rbtdbiter->tree_locked = isc_rwlocktype_none; 9465 } 9466 } 9467 } 9468 9469 static void 9470 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) { 9471 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 9472 9473 REQUIRE(rbtdbiter->paused); 9474 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none); 9475 9476 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 9477 rbtdbiter->tree_locked = isc_rwlocktype_read; 9478 9479 rbtdbiter->paused = false; 9480 } 9481 9482 static void 9483 dbiterator_destroy(dns_dbiterator_t **iteratorp) { 9484 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp); 9485 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; 9486 dns_db_t *db = NULL; 9487 9488 if (rbtdbiter->tree_locked == isc_rwlocktype_read) { 9489 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 9490 rbtdbiter->tree_locked = isc_rwlocktype_none; 9491 } else { 9492 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none); 9493 } 9494 9495 dereference_iter_node(rbtdbiter); 9496 9497 flush_deletions(rbtdbiter); 9498 9499 dns_db_attach(rbtdbiter->common.db, &db); 9500 dns_db_detach(&rbtdbiter->common.db); 9501 9502 dns_rbtnodechain_reset(&rbtdbiter->chain); 9503 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); 9504 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter)); 9505 dns_db_detach(&db); 9506 9507 *iteratorp = NULL; 9508 } 9509 9510 static isc_result_t 9511 dbiterator_first(dns_dbiterator_t *iterator) { 9512 isc_result_t result; 9513 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9514 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9515 dns_name_t *name, *origin; 9516 9517 if (rbtdbiter->result != ISC_R_SUCCESS && 9518 rbtdbiter->result != ISC_R_NOTFOUND && 9519 rbtdbiter->result != DNS_R_PARTIALMATCH && 9520 rbtdbiter->result != ISC_R_NOMORE) 9521 { 9522 return (rbtdbiter->result); 9523 } 9524 9525 if (rbtdbiter->paused) { 9526 resume_iteration(rbtdbiter); 9527 } 9528 9529 dereference_iter_node(rbtdbiter); 9530 9531 name = dns_fixedname_name(&rbtdbiter->name); 9532 origin = dns_fixedname_name(&rbtdbiter->origin); 9533 dns_rbtnodechain_reset(&rbtdbiter->chain); 9534 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); 9535 9536 if (rbtdbiter->nsec3only) { 9537 rbtdbiter->current = &rbtdbiter->nsec3chain; 9538 result = dns_rbtnodechain_first(rbtdbiter->current, 9539 rbtdb->nsec3, name, origin); 9540 } else { 9541 rbtdbiter->current = &rbtdbiter->chain; 9542 result = dns_rbtnodechain_first(rbtdbiter->current, rbtdb->tree, 9543 name, origin); 9544 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) { 9545 rbtdbiter->current = &rbtdbiter->nsec3chain; 9546 result = dns_rbtnodechain_first( 9547 rbtdbiter->current, rbtdb->nsec3, name, origin); 9548 } 9549 } 9550 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { 9551 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 9552 NULL, &rbtdbiter->node); 9553 if (result == ISC_R_SUCCESS) { 9554 rbtdbiter->new_origin = true; 9555 reference_iter_node(rbtdbiter); 9556 } 9557 } else { 9558 INSIST(result == ISC_R_NOTFOUND); 9559 result = ISC_R_NOMORE; /* The tree is empty. */ 9560 } 9561 9562 rbtdbiter->result = result; 9563 9564 if (result != ISC_R_SUCCESS) { 9565 ENSURE(!rbtdbiter->paused); 9566 } 9567 9568 return (result); 9569 } 9570 9571 static isc_result_t 9572 dbiterator_last(dns_dbiterator_t *iterator) { 9573 isc_result_t result; 9574 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9575 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9576 dns_name_t *name, *origin; 9577 9578 if (rbtdbiter->result != ISC_R_SUCCESS && 9579 rbtdbiter->result != ISC_R_NOTFOUND && 9580 rbtdbiter->result != DNS_R_PARTIALMATCH && 9581 rbtdbiter->result != ISC_R_NOMORE) 9582 { 9583 return (rbtdbiter->result); 9584 } 9585 9586 if (rbtdbiter->paused) { 9587 resume_iteration(rbtdbiter); 9588 } 9589 9590 dereference_iter_node(rbtdbiter); 9591 9592 name = dns_fixedname_name(&rbtdbiter->name); 9593 origin = dns_fixedname_name(&rbtdbiter->origin); 9594 dns_rbtnodechain_reset(&rbtdbiter->chain); 9595 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); 9596 9597 result = ISC_R_NOTFOUND; 9598 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) { 9599 rbtdbiter->current = &rbtdbiter->nsec3chain; 9600 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->nsec3, 9601 name, origin); 9602 } 9603 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) { 9604 rbtdbiter->current = &rbtdbiter->chain; 9605 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree, 9606 name, origin); 9607 } 9608 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { 9609 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 9610 NULL, &rbtdbiter->node); 9611 if (result == ISC_R_SUCCESS) { 9612 rbtdbiter->new_origin = true; 9613 reference_iter_node(rbtdbiter); 9614 } 9615 } else { 9616 INSIST(result == ISC_R_NOTFOUND); 9617 result = ISC_R_NOMORE; /* The tree is empty. */ 9618 } 9619 9620 rbtdbiter->result = result; 9621 9622 return (result); 9623 } 9624 9625 static isc_result_t 9626 dbiterator_seek(dns_dbiterator_t *iterator, const dns_name_t *name) { 9627 isc_result_t result, tresult; 9628 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9629 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9630 dns_name_t *iname, *origin; 9631 9632 if (rbtdbiter->result != ISC_R_SUCCESS && 9633 rbtdbiter->result != ISC_R_NOTFOUND && 9634 rbtdbiter->result != DNS_R_PARTIALMATCH && 9635 rbtdbiter->result != ISC_R_NOMORE) 9636 { 9637 return (rbtdbiter->result); 9638 } 9639 9640 if (rbtdbiter->paused) { 9641 resume_iteration(rbtdbiter); 9642 } 9643 9644 dereference_iter_node(rbtdbiter); 9645 9646 iname = dns_fixedname_name(&rbtdbiter->name); 9647 origin = dns_fixedname_name(&rbtdbiter->origin); 9648 dns_rbtnodechain_reset(&rbtdbiter->chain); 9649 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); 9650 9651 if (rbtdbiter->nsec3only) { 9652 rbtdbiter->current = &rbtdbiter->nsec3chain; 9653 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, 9654 &rbtdbiter->node, rbtdbiter->current, 9655 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 9656 } else if (rbtdbiter->nonsec3) { 9657 rbtdbiter->current = &rbtdbiter->chain; 9658 result = dns_rbt_findnode(rbtdb->tree, name, NULL, 9659 &rbtdbiter->node, rbtdbiter->current, 9660 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 9661 } else { 9662 /* 9663 * Stay on main chain if not found on either chain. 9664 */ 9665 rbtdbiter->current = &rbtdbiter->chain; 9666 result = dns_rbt_findnode(rbtdb->tree, name, NULL, 9667 &rbtdbiter->node, rbtdbiter->current, 9668 DNS_RBTFIND_EMPTYDATA, NULL, NULL); 9669 if (result == DNS_R_PARTIALMATCH) { 9670 dns_rbtnode_t *node = NULL; 9671 tresult = dns_rbt_findnode( 9672 rbtdb->nsec3, name, NULL, &node, 9673 &rbtdbiter->nsec3chain, DNS_RBTFIND_EMPTYDATA, 9674 NULL, NULL); 9675 if (tresult == ISC_R_SUCCESS) { 9676 rbtdbiter->node = node; 9677 rbtdbiter->current = &rbtdbiter->nsec3chain; 9678 result = tresult; 9679 } 9680 } 9681 } 9682 9683 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) { 9684 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname, 9685 origin, NULL); 9686 if (tresult == ISC_R_SUCCESS) { 9687 rbtdbiter->new_origin = true; 9688 reference_iter_node(rbtdbiter); 9689 } else { 9690 result = tresult; 9691 rbtdbiter->node = NULL; 9692 } 9693 } else { 9694 rbtdbiter->node = NULL; 9695 } 9696 9697 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ? ISC_R_SUCCESS 9698 : result; 9699 9700 return (result); 9701 } 9702 9703 static isc_result_t 9704 dbiterator_prev(dns_dbiterator_t *iterator) { 9705 isc_result_t result; 9706 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9707 dns_name_t *name, *origin; 9708 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9709 9710 REQUIRE(rbtdbiter->node != NULL); 9711 9712 if (rbtdbiter->result != ISC_R_SUCCESS) { 9713 return (rbtdbiter->result); 9714 } 9715 9716 if (rbtdbiter->paused) { 9717 resume_iteration(rbtdbiter); 9718 } 9719 9720 name = dns_fixedname_name(&rbtdbiter->name); 9721 origin = dns_fixedname_name(&rbtdbiter->origin); 9722 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin); 9723 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only && 9724 !rbtdbiter->nonsec3 && &rbtdbiter->nsec3chain == rbtdbiter->current) 9725 { 9726 rbtdbiter->current = &rbtdbiter->chain; 9727 dns_rbtnodechain_reset(rbtdbiter->current); 9728 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree, 9729 name, origin); 9730 if (result == ISC_R_NOTFOUND) { 9731 result = ISC_R_NOMORE; 9732 } 9733 } 9734 9735 dereference_iter_node(rbtdbiter); 9736 9737 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) { 9738 rbtdbiter->new_origin = (result == DNS_R_NEWORIGIN); 9739 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 9740 NULL, &rbtdbiter->node); 9741 } 9742 9743 if (result == ISC_R_SUCCESS) { 9744 reference_iter_node(rbtdbiter); 9745 } 9746 9747 rbtdbiter->result = result; 9748 9749 return (result); 9750 } 9751 9752 static isc_result_t 9753 dbiterator_next(dns_dbiterator_t *iterator) { 9754 isc_result_t result; 9755 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9756 dns_name_t *name, *origin; 9757 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9758 9759 REQUIRE(rbtdbiter->node != NULL); 9760 9761 if (rbtdbiter->result != ISC_R_SUCCESS) { 9762 return (rbtdbiter->result); 9763 } 9764 9765 if (rbtdbiter->paused) { 9766 resume_iteration(rbtdbiter); 9767 } 9768 9769 name = dns_fixedname_name(&rbtdbiter->name); 9770 origin = dns_fixedname_name(&rbtdbiter->origin); 9771 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin); 9772 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only && 9773 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) 9774 { 9775 rbtdbiter->current = &rbtdbiter->nsec3chain; 9776 dns_rbtnodechain_reset(rbtdbiter->current); 9777 result = dns_rbtnodechain_first(rbtdbiter->current, 9778 rbtdb->nsec3, name, origin); 9779 if (result == ISC_R_NOTFOUND) { 9780 result = ISC_R_NOMORE; 9781 } 9782 } 9783 9784 dereference_iter_node(rbtdbiter); 9785 9786 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) { 9787 rbtdbiter->new_origin = (result == DNS_R_NEWORIGIN); 9788 result = dns_rbtnodechain_current(rbtdbiter->current, NULL, 9789 NULL, &rbtdbiter->node); 9790 } 9791 if (result == ISC_R_SUCCESS) { 9792 reference_iter_node(rbtdbiter); 9793 } 9794 9795 rbtdbiter->result = result; 9796 9797 return (result); 9798 } 9799 9800 static isc_result_t 9801 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, 9802 dns_name_t *name) { 9803 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9804 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9805 dns_rbtnode_t *node = rbtdbiter->node; 9806 isc_result_t result; 9807 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name); 9808 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin); 9809 9810 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS); 9811 REQUIRE(rbtdbiter->node != NULL); 9812 9813 if (rbtdbiter->paused) { 9814 resume_iteration(rbtdbiter); 9815 } 9816 9817 if (name != NULL) { 9818 if (rbtdbiter->common.relative_names) { 9819 origin = NULL; 9820 } 9821 result = dns_name_concatenate(nodename, origin, name, NULL); 9822 if (result != ISC_R_SUCCESS) { 9823 return (result); 9824 } 9825 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin) { 9826 result = DNS_R_NEWORIGIN; 9827 } 9828 } else { 9829 result = ISC_R_SUCCESS; 9830 } 9831 9832 new_reference(rbtdb, node, isc_rwlocktype_none); 9833 9834 *nodep = rbtdbiter->node; 9835 9836 if (iterator->cleaning && result == ISC_R_SUCCESS) { 9837 isc_result_t expire_result; 9838 9839 /* 9840 * If the deletion array is full, flush it before trying 9841 * to expire the current node. The current node can't 9842 * fully deleted while the iteration cursor is still on it. 9843 */ 9844 if (rbtdbiter->delcnt == DELETION_BATCH_MAX) { 9845 flush_deletions(rbtdbiter); 9846 } 9847 9848 expire_result = expirenode(iterator->db, *nodep, 0); 9849 9850 /* 9851 * expirenode() currently always returns success. 9852 */ 9853 if (expire_result == ISC_R_SUCCESS && node->down == NULL) { 9854 rbtdbiter->deletions[rbtdbiter->delcnt++] = node; 9855 isc_refcount_increment(&node->references); 9856 } 9857 } 9858 9859 return (result); 9860 } 9861 9862 static isc_result_t 9863 dbiterator_pause(dns_dbiterator_t *iterator) { 9864 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; 9865 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9866 9867 if (rbtdbiter->result != ISC_R_SUCCESS && 9868 rbtdbiter->result != ISC_R_NOTFOUND && 9869 rbtdbiter->result != DNS_R_PARTIALMATCH && 9870 rbtdbiter->result != ISC_R_NOMORE) 9871 { 9872 return (rbtdbiter->result); 9873 } 9874 9875 if (rbtdbiter->paused) { 9876 return (ISC_R_SUCCESS); 9877 } 9878 9879 rbtdbiter->paused = true; 9880 9881 if (rbtdbiter->tree_locked != isc_rwlocktype_none) { 9882 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read); 9883 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); 9884 rbtdbiter->tree_locked = isc_rwlocktype_none; 9885 } 9886 9887 flush_deletions(rbtdbiter); 9888 9889 return (ISC_R_SUCCESS); 9890 } 9891 9892 static isc_result_t 9893 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) { 9894 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; 9895 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin); 9896 9897 if (rbtdbiter->result != ISC_R_SUCCESS) { 9898 return (rbtdbiter->result); 9899 } 9900 9901 dns_name_copynf(origin, name); 9902 return (ISC_R_SUCCESS); 9903 } 9904 9905 static void 9906 setownercase(rdatasetheader_t *header, const dns_name_t *name) { 9907 unsigned int i; 9908 bool fully_lower; 9909 9910 /* 9911 * We do not need to worry about label lengths as they are all 9912 * less than or equal to 63. 9913 */ 9914 memset(header->upper, 0, sizeof(header->upper)); 9915 fully_lower = true; 9916 for (i = 0; i < name->length; i++) { 9917 if (isupper(name->ndata[i])) { 9918 header->upper[i / 8] |= 1 << (i % 8); 9919 fully_lower = false; 9920 } 9921 } 9922 RDATASET_ATTR_SET(header, RDATASET_ATTR_CASESET); 9923 if (ISC_LIKELY(fully_lower)) { 9924 RDATASET_ATTR_SET(header, RDATASET_ATTR_CASEFULLYLOWER); 9925 } 9926 } 9927 9928 static void 9929 rdataset_setownercase(dns_rdataset_t *rdataset, const dns_name_t *name) { 9930 dns_rbtdb_t *rbtdb = rdataset->private1; 9931 dns_rbtnode_t *rbtnode = rdataset->private2; 9932 unsigned char *raw = rdataset->private3; /* RDATASLAB */ 9933 rdatasetheader_t *header; 9934 9935 header = (struct rdatasetheader *)(raw - sizeof(*header)); 9936 9937 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9938 isc_rwlocktype_write); 9939 setownercase(header, name); 9940 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9941 isc_rwlocktype_write); 9942 } 9943 9944 static void 9945 rdataset_getownercase(const dns_rdataset_t *rdataset, dns_name_t *name) { 9946 dns_rbtdb_t *rbtdb = rdataset->private1; 9947 dns_rbtnode_t *rbtnode = rdataset->private2; 9948 unsigned char *raw = rdataset->private3; /* RDATASLAB */ 9949 rdatasetheader_t *header = NULL; 9950 uint8_t mask = (1 << 7); 9951 uint8_t bits = 0; 9952 9953 header = (struct rdatasetheader *)(raw - sizeof(*header)); 9954 9955 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9956 isc_rwlocktype_read); 9957 9958 if (!CASESET(header)) { 9959 goto unlock; 9960 } 9961 9962 if (ISC_LIKELY(CASEFULLYLOWER(header))) { 9963 for (size_t i = 0; i < name->length; i++) { 9964 name->ndata[i] = tolower(name->ndata[i]); 9965 } 9966 } else { 9967 for (size_t i = 0; i < name->length; i++) { 9968 if (mask == (1 << 7)) { 9969 bits = header->upper[i / 8]; 9970 mask = 1; 9971 } else { 9972 mask <<= 1; 9973 } 9974 9975 name->ndata[i] = ((bits & mask) != 0) 9976 ? toupper(name->ndata[i]) 9977 : tolower(name->ndata[i]); 9978 } 9979 } 9980 9981 unlock: 9982 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, 9983 isc_rwlocktype_read); 9984 } 9985 9986 struct rbtdb_glue { 9987 struct rbtdb_glue *next; 9988 dns_fixedname_t fixedname; 9989 dns_rdataset_t rdataset_a; 9990 dns_rdataset_t sigrdataset_a; 9991 dns_rdataset_t rdataset_aaaa; 9992 dns_rdataset_t sigrdataset_aaaa; 9993 }; 9994 9995 typedef struct { 9996 rbtdb_glue_t *glue_list; 9997 dns_rbtdb_t *rbtdb; 9998 rbtdb_version_t *rbtversion; 9999 } rbtdb_glue_additionaldata_ctx_t; 10000 10001 static void 10002 free_gluelist(rbtdb_glue_t *glue_list, dns_rbtdb_t *rbtdb) { 10003 rbtdb_glue_t *cur, *cur_next; 10004 10005 if (glue_list == (void *)-1) { 10006 return; 10007 } 10008 10009 cur = glue_list; 10010 while (cur != NULL) { 10011 cur_next = cur->next; 10012 10013 if (dns_rdataset_isassociated(&cur->rdataset_a)) { 10014 dns_rdataset_disassociate(&cur->rdataset_a); 10015 } 10016 if (dns_rdataset_isassociated(&cur->sigrdataset_a)) { 10017 dns_rdataset_disassociate(&cur->sigrdataset_a); 10018 } 10019 10020 if (dns_rdataset_isassociated(&cur->rdataset_aaaa)) { 10021 dns_rdataset_disassociate(&cur->rdataset_aaaa); 10022 } 10023 if (dns_rdataset_isassociated(&cur->sigrdataset_aaaa)) { 10024 dns_rdataset_disassociate(&cur->sigrdataset_aaaa); 10025 } 10026 10027 dns_rdataset_invalidate(&cur->rdataset_a); 10028 dns_rdataset_invalidate(&cur->sigrdataset_a); 10029 dns_rdataset_invalidate(&cur->rdataset_aaaa); 10030 dns_rdataset_invalidate(&cur->sigrdataset_aaaa); 10031 10032 isc_mem_put(rbtdb->common.mctx, cur, sizeof(*cur)); 10033 cur = cur_next; 10034 } 10035 } 10036 10037 static void 10038 free_gluetable(rbtdb_version_t *version) { 10039 dns_rbtdb_t *rbtdb; 10040 size_t size, i; 10041 10042 RWLOCK(&version->glue_rwlock, isc_rwlocktype_write); 10043 10044 rbtdb = version->rbtdb; 10045 10046 for (i = 0; i < HASHSIZE(version->glue_table_bits); i++) { 10047 rbtdb_glue_table_node_t *cur, *cur_next; 10048 10049 cur = version->glue_table[i]; 10050 while (cur != NULL) { 10051 cur_next = cur->next; 10052 /* isc_refcount_decrement(&cur->node->references); */ 10053 cur->node = NULL; 10054 free_gluelist(cur->glue_list, rbtdb); 10055 cur->glue_list = NULL; 10056 isc_mem_put(rbtdb->common.mctx, cur, sizeof(*cur)); 10057 cur = cur_next; 10058 } 10059 version->glue_table[i] = NULL; 10060 } 10061 10062 size = HASHSIZE(version->glue_table_bits) * 10063 sizeof(*version->glue_table); 10064 isc_mem_put(rbtdb->common.mctx, version->glue_table, size); 10065 10066 RWUNLOCK(&version->glue_rwlock, isc_rwlocktype_write); 10067 } 10068 10069 static uint32_t 10070 rehash_bits(rbtdb_version_t *version, size_t newcount) { 10071 uint32_t oldbits = version->glue_table_bits; 10072 uint32_t newbits = oldbits; 10073 10074 while (newcount >= HASHSIZE(newbits) && 10075 newbits <= RBTDB_GLUE_TABLE_MAX_BITS) 10076 { 10077 newbits += 1; 10078 } 10079 10080 return (newbits); 10081 } 10082 10083 /*% 10084 * Write lock (version->glue_rwlock) must be held. 10085 */ 10086 static void 10087 rehash_gluetable(rbtdb_version_t *version) { 10088 uint32_t oldbits, newbits; 10089 size_t newsize, oldcount, i; 10090 rbtdb_glue_table_node_t **oldtable; 10091 10092 oldbits = version->glue_table_bits; 10093 oldcount = HASHSIZE(oldbits); 10094 oldtable = version->glue_table; 10095 10096 newbits = rehash_bits(version, version->glue_table_nodecount); 10097 newsize = HASHSIZE(newbits) * sizeof(version->glue_table[0]); 10098 10099 version->glue_table = isc_mem_get(version->rbtdb->common.mctx, newsize); 10100 version->glue_table_bits = newbits; 10101 memset(version->glue_table, 0, newsize); 10102 10103 for (i = 0; i < oldcount; i++) { 10104 rbtdb_glue_table_node_t *gluenode; 10105 rbtdb_glue_table_node_t *nextgluenode; 10106 for (gluenode = oldtable[i]; gluenode != NULL; 10107 gluenode = nextgluenode) 10108 { 10109 uint32_t hash = isc_hash32( 10110 &gluenode->node, sizeof(gluenode->node), true); 10111 uint32_t idx = hash_32(hash, newbits); 10112 nextgluenode = gluenode->next; 10113 gluenode->next = version->glue_table[idx]; 10114 version->glue_table[idx] = gluenode; 10115 } 10116 } 10117 10118 isc_mem_put(version->rbtdb->common.mctx, oldtable, 10119 oldcount * sizeof(*version->glue_table)); 10120 10121 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_ZONE, 10122 ISC_LOG_DEBUG(3), 10123 "rehash_gluetable(): " 10124 "resized glue table from %zu to " 10125 "%zu", 10126 oldcount, newsize / sizeof(version->glue_table[0])); 10127 } 10128 10129 static void 10130 maybe_rehash_gluetable(rbtdb_version_t *version) { 10131 size_t overcommit = HASHSIZE(version->glue_table_bits) * 10132 RBTDB_GLUE_TABLE_OVERCOMMIT; 10133 if (ISC_LIKELY(version->glue_table_nodecount < overcommit)) { 10134 return; 10135 } 10136 10137 rehash_gluetable(version); 10138 } 10139 10140 static isc_result_t 10141 glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype) { 10142 rbtdb_glue_additionaldata_ctx_t *ctx; 10143 isc_result_t result; 10144 dns_fixedname_t fixedname_a; 10145 dns_name_t *name_a = NULL; 10146 dns_rdataset_t rdataset_a, sigrdataset_a; 10147 dns_rbtnode_t *node_a = NULL; 10148 dns_fixedname_t fixedname_aaaa; 10149 dns_name_t *name_aaaa = NULL; 10150 dns_rdataset_t rdataset_aaaa, sigrdataset_aaaa; 10151 dns_rbtnode_t *node_aaaa = NULL; 10152 rbtdb_glue_t *glue = NULL; 10153 dns_name_t *gluename = NULL; 10154 10155 /* 10156 * NS records want addresses in additional records. 10157 */ 10158 INSIST(qtype == dns_rdatatype_a); 10159 10160 ctx = (rbtdb_glue_additionaldata_ctx_t *)arg; 10161 10162 name_a = dns_fixedname_initname(&fixedname_a); 10163 dns_rdataset_init(&rdataset_a); 10164 dns_rdataset_init(&sigrdataset_a); 10165 10166 name_aaaa = dns_fixedname_initname(&fixedname_aaaa); 10167 dns_rdataset_init(&rdataset_aaaa); 10168 dns_rdataset_init(&sigrdataset_aaaa); 10169 10170 result = zone_find((dns_db_t *)ctx->rbtdb, name, ctx->rbtversion, 10171 dns_rdatatype_a, DNS_DBFIND_GLUEOK, 0, 10172 (dns_dbnode_t **)&node_a, name_a, &rdataset_a, 10173 &sigrdataset_a); 10174 if (result == DNS_R_GLUE) { 10175 glue = isc_mem_get(ctx->rbtdb->common.mctx, sizeof(*glue)); 10176 10177 gluename = dns_fixedname_initname(&glue->fixedname); 10178 dns_name_copynf(name_a, gluename); 10179 10180 dns_rdataset_init(&glue->rdataset_a); 10181 dns_rdataset_init(&glue->sigrdataset_a); 10182 dns_rdataset_init(&glue->rdataset_aaaa); 10183 dns_rdataset_init(&glue->sigrdataset_aaaa); 10184 10185 dns_rdataset_clone(&rdataset_a, &glue->rdataset_a); 10186 if (dns_rdataset_isassociated(&sigrdataset_a)) { 10187 dns_rdataset_clone(&sigrdataset_a, 10188 &glue->sigrdataset_a); 10189 } 10190 } 10191 10192 result = zone_find((dns_db_t *)ctx->rbtdb, name, ctx->rbtversion, 10193 dns_rdatatype_aaaa, DNS_DBFIND_GLUEOK, 0, 10194 (dns_dbnode_t **)&node_aaaa, name_aaaa, 10195 &rdataset_aaaa, &sigrdataset_aaaa); 10196 if (result == DNS_R_GLUE) { 10197 if (glue == NULL) { 10198 glue = isc_mem_get(ctx->rbtdb->common.mctx, 10199 sizeof(*glue)); 10200 10201 gluename = dns_fixedname_initname(&glue->fixedname); 10202 dns_name_copynf(name_aaaa, gluename); 10203 10204 dns_rdataset_init(&glue->rdataset_a); 10205 dns_rdataset_init(&glue->sigrdataset_a); 10206 dns_rdataset_init(&glue->rdataset_aaaa); 10207 dns_rdataset_init(&glue->sigrdataset_aaaa); 10208 } else { 10209 INSIST(node_a == node_aaaa); 10210 INSIST(dns_name_equal(name_a, name_aaaa)); 10211 } 10212 10213 dns_rdataset_clone(&rdataset_aaaa, &glue->rdataset_aaaa); 10214 if (dns_rdataset_isassociated(&sigrdataset_aaaa)) { 10215 dns_rdataset_clone(&sigrdataset_aaaa, 10216 &glue->sigrdataset_aaaa); 10217 } 10218 } 10219 10220 if (glue != NULL) { 10221 glue->next = ctx->glue_list; 10222 ctx->glue_list = glue; 10223 } 10224 10225 result = ISC_R_SUCCESS; 10226 10227 if (dns_rdataset_isassociated(&rdataset_a)) { 10228 rdataset_disassociate(&rdataset_a); 10229 } 10230 if (dns_rdataset_isassociated(&sigrdataset_a)) { 10231 rdataset_disassociate(&sigrdataset_a); 10232 } 10233 10234 if (dns_rdataset_isassociated(&rdataset_aaaa)) { 10235 rdataset_disassociate(&rdataset_aaaa); 10236 } 10237 if (dns_rdataset_isassociated(&sigrdataset_aaaa)) { 10238 rdataset_disassociate(&sigrdataset_aaaa); 10239 } 10240 10241 if (node_a != NULL) { 10242 detachnode((dns_db_t *)ctx->rbtdb, (dns_dbnode_t *)&node_a); 10243 } 10244 if (node_aaaa != NULL) { 10245 detachnode((dns_db_t *)ctx->rbtdb, (dns_dbnode_t *)&node_aaaa); 10246 } 10247 10248 return (result); 10249 } 10250 10251 static isc_result_t 10252 rdataset_addglue(dns_rdataset_t *rdataset, dns_dbversion_t *version, 10253 dns_message_t *msg) { 10254 dns_rbtdb_t *rbtdb = rdataset->private1; 10255 dns_rbtnode_t *node = rdataset->private2; 10256 rbtdb_version_t *rbtversion = version; 10257 uint32_t idx; 10258 rbtdb_glue_table_node_t *cur; 10259 bool found = false; 10260 bool restarted = false; 10261 rbtdb_glue_t *ge; 10262 rbtdb_glue_additionaldata_ctx_t ctx; 10263 isc_result_t result; 10264 uint64_t hash; 10265 10266 REQUIRE(rdataset->type == dns_rdatatype_ns); 10267 REQUIRE(rbtdb == rbtversion->rbtdb); 10268 REQUIRE(!IS_CACHE(rbtdb) && !IS_STUB(rbtdb)); 10269 10270 /* 10271 * The glue table cache that forms a part of the DB version 10272 * structure is not explicitly bounded and there's no cache 10273 * cleaning. The zone data size itself is an implicit bound. 10274 * 10275 * The key into the glue hashtable is the node pointer. This is 10276 * because the glue hashtable is a property of the DB version, 10277 * and the glue is keyed for the ownername/NS tuple. We don't 10278 * bother with using an expensive dns_name_t comparison here as 10279 * the node pointer is a fixed value that won't change for a DB 10280 * version and can be compared directly. 10281 */ 10282 hash = isc_hash_function(&node, sizeof(node), true); 10283 10284 restart: 10285 /* 10286 * First, check if we have the additional entries already cached 10287 * in the glue table. 10288 */ 10289 RWLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_read); 10290 10291 idx = hash_32(hash, rbtversion->glue_table_bits); 10292 10293 for (cur = rbtversion->glue_table[idx]; cur != NULL; cur = cur->next) { 10294 if (cur->node == node) { 10295 break; 10296 } 10297 } 10298 10299 if (cur == NULL) { 10300 goto no_glue; 10301 } 10302 /* 10303 * We found a cached result. Add it to the message and 10304 * return. 10305 */ 10306 found = true; 10307 ge = cur->glue_list; 10308 10309 /* 10310 * (void *) -1 is a special value that means no glue is 10311 * present in the zone. 10312 */ 10313 if (ge == (void *)-1) { 10314 if (!restarted && (rbtdb->gluecachestats != NULL)) { 10315 isc_stats_increment( 10316 rbtdb->gluecachestats, 10317 dns_gluecachestatscounter_hits_absent); 10318 } 10319 goto no_glue; 10320 } else { 10321 if (!restarted && (rbtdb->gluecachestats != NULL)) { 10322 isc_stats_increment( 10323 rbtdb->gluecachestats, 10324 dns_gluecachestatscounter_hits_present); 10325 } 10326 } 10327 10328 for (; ge != NULL; ge = ge->next) { 10329 dns_name_t *name = NULL; 10330 dns_rdataset_t *rdataset_a = NULL; 10331 dns_rdataset_t *sigrdataset_a = NULL; 10332 dns_rdataset_t *rdataset_aaaa = NULL; 10333 dns_rdataset_t *sigrdataset_aaaa = NULL; 10334 dns_name_t *gluename = dns_fixedname_name(&ge->fixedname); 10335 10336 result = dns_message_gettempname(msg, &name); 10337 if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) { 10338 goto no_glue; 10339 } 10340 10341 dns_name_copynf(gluename, name); 10342 10343 if (dns_rdataset_isassociated(&ge->rdataset_a)) { 10344 result = dns_message_gettemprdataset(msg, &rdataset_a); 10345 if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) { 10346 dns_message_puttempname(msg, &name); 10347 goto no_glue; 10348 } 10349 } 10350 10351 if (dns_rdataset_isassociated(&ge->sigrdataset_a)) { 10352 result = dns_message_gettemprdataset(msg, 10353 &sigrdataset_a); 10354 if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) { 10355 if (rdataset_a != NULL) { 10356 dns_message_puttemprdataset( 10357 msg, &rdataset_a); 10358 } 10359 dns_message_puttempname(msg, &name); 10360 goto no_glue; 10361 } 10362 } 10363 10364 if (dns_rdataset_isassociated(&ge->rdataset_aaaa)) { 10365 result = dns_message_gettemprdataset(msg, 10366 &rdataset_aaaa); 10367 if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) { 10368 dns_message_puttempname(msg, &name); 10369 if (rdataset_a != NULL) { 10370 dns_message_puttemprdataset( 10371 msg, &rdataset_a); 10372 } 10373 if (sigrdataset_a != NULL) { 10374 dns_message_puttemprdataset( 10375 msg, &sigrdataset_a); 10376 } 10377 goto no_glue; 10378 } 10379 } 10380 10381 if (dns_rdataset_isassociated(&ge->sigrdataset_aaaa)) { 10382 result = dns_message_gettemprdataset(msg, 10383 &sigrdataset_aaaa); 10384 if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) { 10385 dns_message_puttempname(msg, &name); 10386 if (rdataset_a != NULL) { 10387 dns_message_puttemprdataset( 10388 msg, &rdataset_a); 10389 } 10390 if (sigrdataset_a != NULL) { 10391 dns_message_puttemprdataset( 10392 msg, &sigrdataset_a); 10393 } 10394 if (rdataset_aaaa != NULL) { 10395 dns_message_puttemprdataset( 10396 msg, &rdataset_aaaa); 10397 } 10398 goto no_glue; 10399 } 10400 } 10401 10402 if (ISC_LIKELY(rdataset_a != NULL)) { 10403 dns_rdataset_clone(&ge->rdataset_a, rdataset_a); 10404 ISC_LIST_APPEND(name->list, rdataset_a, link); 10405 } 10406 10407 if (sigrdataset_a != NULL) { 10408 dns_rdataset_clone(&ge->sigrdataset_a, sigrdataset_a); 10409 ISC_LIST_APPEND(name->list, sigrdataset_a, link); 10410 } 10411 10412 if (rdataset_aaaa != NULL) { 10413 dns_rdataset_clone(&ge->rdataset_aaaa, rdataset_aaaa); 10414 ISC_LIST_APPEND(name->list, rdataset_aaaa, link); 10415 } 10416 if (sigrdataset_aaaa != NULL) { 10417 dns_rdataset_clone(&ge->sigrdataset_aaaa, 10418 sigrdataset_aaaa); 10419 ISC_LIST_APPEND(name->list, sigrdataset_aaaa, link); 10420 } 10421 10422 dns_message_addname(msg, name, DNS_SECTION_ADDITIONAL); 10423 } 10424 10425 no_glue: 10426 RWUNLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_read); 10427 10428 if (found) { 10429 return (ISC_R_SUCCESS); 10430 } 10431 10432 if (restarted) { 10433 return (ISC_R_FAILURE); 10434 } 10435 10436 /* 10437 * No cached glue was found in the table. Cache it and restart 10438 * this function. 10439 * 10440 * Due to the gap between the read lock and the write lock, it's 10441 * possible that we may cache a duplicate glue table entry, but 10442 * we don't care. 10443 */ 10444 10445 ctx.glue_list = NULL; 10446 ctx.rbtdb = rbtdb; 10447 ctx.rbtversion = rbtversion; 10448 10449 RWLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_write); 10450 10451 maybe_rehash_gluetable(rbtversion); 10452 idx = hash_32(hash, rbtversion->glue_table_bits); 10453 10454 (void)dns_rdataset_additionaldata(rdataset, glue_nsdname_cb, &ctx); 10455 10456 cur = isc_mem_get(rbtdb->common.mctx, sizeof(*cur)); 10457 10458 /* 10459 * XXXMUKS: it looks like the dns_dbversion is not destroyed 10460 * when named is terminated by a keyboard break. This doesn't 10461 * cleanup the node reference and keeps the process dangling. 10462 */ 10463 /* isc_refcount_increment0(&node->references); */ 10464 cur->node = node; 10465 10466 if (ctx.glue_list == NULL) { 10467 /* 10468 * No glue was found. Cache it so. 10469 */ 10470 cur->glue_list = (void *)-1; 10471 if (rbtdb->gluecachestats != NULL) { 10472 isc_stats_increment( 10473 rbtdb->gluecachestats, 10474 dns_gluecachestatscounter_inserts_absent); 10475 } 10476 } else { 10477 cur->glue_list = ctx.glue_list; 10478 if (rbtdb->gluecachestats != NULL) { 10479 isc_stats_increment( 10480 rbtdb->gluecachestats, 10481 dns_gluecachestatscounter_inserts_present); 10482 } 10483 } 10484 10485 cur->next = rbtversion->glue_table[idx]; 10486 rbtversion->glue_table[idx] = cur; 10487 rbtversion->glue_table_nodecount++; 10488 10489 RWUNLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_write); 10490 10491 restarted = true; 10492 goto restart; 10493 10494 /* UNREACHABLE */ 10495 } 10496 10497 /*% 10498 * Routines for LRU-based cache management. 10499 */ 10500 10501 /*% 10502 * See if a given cache entry that is being reused needs to be updated 10503 * in the LRU-list. From the LRU management point of view, this function is 10504 * expected to return true for almost all cases. When used with threads, 10505 * however, this may cause a non-negligible performance penalty because a 10506 * writer lock will have to be acquired before updating the list. 10507 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this 10508 * function returns true if the entry has not been updated for some period of 10509 * time. We differentiate the NS or glue address case and the others since 10510 * experiments have shown that the former tends to be accessed relatively 10511 * infrequently and the cost of cache miss is higher (e.g., a missing NS records 10512 * may cause external queries at a higher level zone, involving more 10513 * transactions). 10514 * 10515 * Caller must hold the node (read or write) lock. 10516 */ 10517 static bool 10518 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) { 10519 if (RDATASET_ATTR_GET(header, (RDATASET_ATTR_NONEXISTENT | 10520 RDATASET_ATTR_ANCIENT | 10521 RDATASET_ATTR_ZEROTTL)) != 0) 10522 { 10523 return (false); 10524 } 10525 10526 #if DNS_RBTDB_LIMITLRUUPDATE 10527 if (header->type == dns_rdatatype_ns || 10528 (header->trust == dns_trust_glue && 10529 (header->type == dns_rdatatype_a || 10530 header->type == dns_rdatatype_aaaa))) 10531 { 10532 /* 10533 * Glue records are updated if at least DNS_RBTDB_LRUUPDATE_GLUE 10534 * seconds have passed since the previous update time. 10535 */ 10536 return (header->last_used + DNS_RBTDB_LRUUPDATE_GLUE <= now); 10537 } 10538 10539 /* 10540 * Other records are updated if DNS_RBTDB_LRUUPDATE_REGULAR seconds 10541 * have passed. 10542 */ 10543 return (header->last_used + DNS_RBTDB_LRUUPDATE_REGULAR <= now); 10544 #else 10545 UNUSED(now); 10546 10547 return (true); 10548 #endif /* if DNS_RBTDB_LIMITLRUUPDATE */ 10549 } 10550 10551 /*% 10552 * Update the timestamp of a given cache entry and move it to the head 10553 * of the corresponding LRU list. 10554 * 10555 * Caller must hold the node (write) lock. 10556 * 10557 * Note that the we do NOT touch the heap here, as the TTL has not changed. 10558 */ 10559 static void 10560 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now) { 10561 INSIST(IS_CACHE(rbtdb)); 10562 10563 /* To be checked: can we really assume this? XXXMLG */ 10564 INSIST(ISC_LINK_LINKED(header, link)); 10565 10566 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link); 10567 header->last_used = now; 10568 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link); 10569 } 10570 10571 static size_t 10572 expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, size_t purgesize, 10573 bool tree_locked) { 10574 rdatasetheader_t *header, *header_prev; 10575 size_t purged = 0; 10576 10577 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]); 10578 header != NULL && purged <= purgesize; header = header_prev) 10579 { 10580 header_prev = ISC_LIST_PREV(header, link); 10581 /* 10582 * Unlink the entry at this point to avoid checking it 10583 * again even if it's currently used someone else and 10584 * cannot be purged at this moment. This entry won't be 10585 * referenced any more (so unlinking is safe) since the 10586 * TTL was reset to 0. 10587 */ 10588 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header, link); 10589 size_t header_size = rdataset_size(header); 10590 expire_header(rbtdb, header, tree_locked, expire_lru); 10591 purged += header_size; 10592 } 10593 10594 return (purged); 10595 } 10596 10597 /*% 10598 * Purge some stale (i.e. unused for some period - LRU based cleaning) cache 10599 * entries under the overmem condition. To recover from this condition quickly, 10600 * we cleanup entries up to the size of newly added rdata (passed as purgesize). 10601 * 10602 * This process is triggered while adding a new entry, and we specifically avoid 10603 * purging entries in the same LRU bucket as the one to which the new entry will 10604 * belong. Otherwise, we might purge entries of the same name of different RR 10605 * types while adding RRsets from a single response (consider the case where 10606 * we're adding A and AAAA glue records of the same NS name). 10607 */ 10608 static void 10609 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize, 10610 bool tree_locked) { 10611 unsigned int locknum; 10612 size_t purged = 0; 10613 10614 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count; 10615 locknum != locknum_start && purged <= purgesize; 10616 locknum = (locknum + 1) % rbtdb->node_lock_count) 10617 { 10618 NODE_LOCK(&rbtdb->node_locks[locknum].lock, 10619 isc_rwlocktype_write); 10620 10621 purged += expire_lru_headers(rbtdb, locknum, purgesize - purged, 10622 tree_locked); 10623 10624 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, 10625 isc_rwlocktype_write); 10626 } 10627 } 10628 10629 static void 10630 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked, 10631 expire_t reason) { 10632 set_ttl(rbtdb, header, 0); 10633 mark_header_ancient(rbtdb, header); 10634 10635 /* 10636 * Caller must hold the node (write) lock. 10637 */ 10638 10639 if (isc_refcount_current(&header->node->references) == 0) { 10640 /* 10641 * If no one else is using the node, we can clean it up now. 10642 * We first need to gain a new reference to the node to meet a 10643 * requirement of decrement_reference(). 10644 */ 10645 new_reference(rbtdb, header->node, isc_rwlocktype_write); 10646 decrement_reference(rbtdb, header->node, 0, 10647 isc_rwlocktype_write, 10648 tree_locked ? isc_rwlocktype_write 10649 : isc_rwlocktype_none, 10650 false); 10651 10652 if (rbtdb->cachestats == NULL) { 10653 return; 10654 } 10655 10656 switch (reason) { 10657 case expire_ttl: 10658 isc_stats_increment(rbtdb->cachestats, 10659 dns_cachestatscounter_deletettl); 10660 break; 10661 case expire_lru: 10662 isc_stats_increment(rbtdb->cachestats, 10663 dns_cachestatscounter_deletelru); 10664 break; 10665 default: 10666 break; 10667 } 10668 } 10669 } 10670