1*bcda20f6Schristos /* $NetBSD: qpcache.c,v 1.2 2025/01/26 16:25:24 christos Exp $ */ 29689912eSchristos 39689912eSchristos /* 49689912eSchristos * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 59689912eSchristos * 69689912eSchristos * SPDX-License-Identifier: MPL-2.0 79689912eSchristos * 89689912eSchristos * This Source Code Form is subject to the terms of the Mozilla Public 99689912eSchristos * License, v. 2.0. If a copy of the MPL was not distributed with this 109689912eSchristos * file, you can obtain one at https://mozilla.org/MPL/2.0/. 119689912eSchristos * 129689912eSchristos * See the COPYRIGHT file distributed with this work for additional 139689912eSchristos * information regarding copyright ownership. 149689912eSchristos */ 159689912eSchristos 169689912eSchristos /*! \file */ 179689912eSchristos 189689912eSchristos #include <inttypes.h> 199689912eSchristos #include <stdbool.h> 209689912eSchristos #include <sys/mman.h> 219689912eSchristos 229689912eSchristos #include <isc/ascii.h> 239689912eSchristos #include <isc/async.h> 249689912eSchristos #include <isc/atomic.h> 259689912eSchristos #include <isc/crc64.h> 269689912eSchristos #include <isc/file.h> 279689912eSchristos #include <isc/hash.h> 289689912eSchristos #include <isc/hashmap.h> 299689912eSchristos #include <isc/heap.h> 309689912eSchristos #include <isc/hex.h> 319689912eSchristos #include <isc/loop.h> 329689912eSchristos #include <isc/mem.h> 339689912eSchristos #include <isc/mutex.h> 349689912eSchristos #include <isc/once.h> 359689912eSchristos #include <isc/queue.h> 369689912eSchristos #include <isc/random.h> 379689912eSchristos #include <isc/refcount.h> 389689912eSchristos #include <isc/result.h> 399689912eSchristos #include <isc/rwlock.h> 409689912eSchristos #include <isc/stdio.h> 419689912eSchristos #include <isc/string.h> 429689912eSchristos #include <isc/time.h> 439689912eSchristos #include <isc/urcu.h> 449689912eSchristos #include <isc/util.h> 459689912eSchristos 469689912eSchristos #include <dns/callbacks.h> 479689912eSchristos #include <dns/db.h> 489689912eSchristos #include <dns/dbiterator.h> 499689912eSchristos #include <dns/fixedname.h> 509689912eSchristos #include <dns/log.h> 519689912eSchristos #include <dns/masterdump.h> 529689912eSchristos #include <dns/nsec.h> 539689912eSchristos #include <dns/qp.h> 549689912eSchristos #include <dns/rdata.h> 559689912eSchristos #include <dns/rdataset.h> 569689912eSchristos #include <dns/rdatasetiter.h> 579689912eSchristos #include <dns/rdataslab.h> 589689912eSchristos #include <dns/rdatastruct.h> 599689912eSchristos #include <dns/stats.h> 609689912eSchristos #include <dns/time.h> 619689912eSchristos #include <dns/view.h> 629689912eSchristos #include <dns/zonekey.h> 639689912eSchristos 649689912eSchristos #include "db_p.h" 659689912eSchristos #include "qpcache_p.h" 669689912eSchristos 679689912eSchristos #define CHECK(op) \ 689689912eSchristos do { \ 699689912eSchristos result = (op); \ 709689912eSchristos if (result != ISC_R_SUCCESS) \ 719689912eSchristos goto failure; \ 729689912eSchristos } while (0) 739689912eSchristos 749689912eSchristos #define EXISTS(header) \ 759689912eSchristos ((atomic_load_acquire(&(header)->attributes) & \ 769689912eSchristos DNS_SLABHEADERATTR_NONEXISTENT) == 0) 779689912eSchristos #define NONEXISTENT(header) \ 789689912eSchristos ((atomic_load_acquire(&(header)->attributes) & \ 799689912eSchristos DNS_SLABHEADERATTR_NONEXISTENT) != 0) 809689912eSchristos #define IGNORE(header) \ 819689912eSchristos ((atomic_load_acquire(&(header)->attributes) & \ 829689912eSchristos DNS_SLABHEADERATTR_IGNORE) != 0) 839689912eSchristos #define NXDOMAIN(header) \ 849689912eSchristos ((atomic_load_acquire(&(header)->attributes) & \ 859689912eSchristos DNS_SLABHEADERATTR_NXDOMAIN) != 0) 869689912eSchristos #define STALE(header) \ 879689912eSchristos ((atomic_load_acquire(&(header)->attributes) & \ 889689912eSchristos DNS_SLABHEADERATTR_STALE) != 0) 899689912eSchristos #define STALE_WINDOW(header) \ 909689912eSchristos ((atomic_load_acquire(&(header)->attributes) & \ 919689912eSchristos DNS_SLABHEADERATTR_STALE_WINDOW) != 0) 929689912eSchristos #define OPTOUT(header) \ 939689912eSchristos ((atomic_load_acquire(&(header)->attributes) & \ 949689912eSchristos DNS_SLABHEADERATTR_OPTOUT) != 0) 959689912eSchristos #define NEGATIVE(header) \ 969689912eSchristos ((atomic_load_acquire(&(header)->attributes) & \ 979689912eSchristos DNS_SLABHEADERATTR_NEGATIVE) != 0) 989689912eSchristos #define PREFETCH(header) \ 999689912eSchristos ((atomic_load_acquire(&(header)->attributes) & \ 1009689912eSchristos DNS_SLABHEADERATTR_PREFETCH) != 0) 1019689912eSchristos #define ZEROTTL(header) \ 1029689912eSchristos ((atomic_load_acquire(&(header)->attributes) & \ 1039689912eSchristos DNS_SLABHEADERATTR_ZEROTTL) != 0) 1049689912eSchristos #define ANCIENT(header) \ 1059689912eSchristos ((atomic_load_acquire(&(header)->attributes) & \ 1069689912eSchristos DNS_SLABHEADERATTR_ANCIENT) != 0) 1079689912eSchristos #define STATCOUNT(header) \ 1089689912eSchristos ((atomic_load_acquire(&(header)->attributes) & \ 1099689912eSchristos DNS_SLABHEADERATTR_STATCOUNT) != 0) 1109689912eSchristos 1119689912eSchristos #define STALE_TTL(header, qpdb) \ 1129689912eSchristos (NXDOMAIN(header) ? 0 : qpdb->common.serve_stale_ttl) 1139689912eSchristos 1149689912eSchristos #define ACTIVE(header, now) \ 1159689912eSchristos (((header)->ttl > (now)) || ((header)->ttl == (now) && ZEROTTL(header))) 1169689912eSchristos 1179689912eSchristos #define EXPIREDOK(iterator) \ 1189689912eSchristos (((iterator)->common.options & DNS_DB_EXPIREDOK) != 0) 1199689912eSchristos 1209689912eSchristos #define STALEOK(iterator) (((iterator)->common.options & DNS_DB_STALEOK) != 0) 1219689912eSchristos 1229689912eSchristos #define KEEPSTALE(qpdb) ((qpdb)->common.serve_stale_ttl > 0) 1239689912eSchristos 1249689912eSchristos /*% 1259689912eSchristos * Note that "impmagic" is not the first four bytes of the struct, so 1269689912eSchristos * ISC_MAGIC_VALID cannot be used. 1279689912eSchristos */ 1289689912eSchristos #define QPDB_MAGIC ISC_MAGIC('Q', 'P', 'D', '4') 1299689912eSchristos #define VALID_QPDB(qpdb) \ 1309689912eSchristos ((qpdb) != NULL && (qpdb)->common.impmagic == QPDB_MAGIC) 1319689912eSchristos 1329689912eSchristos #define HEADERNODE(h) ((qpcnode_t *)((h)->node)) 1339689912eSchristos 1349689912eSchristos /* 1359689912eSchristos * Allow clients with a virtual time of up to 5 minutes in the past to see 1369689912eSchristos * records that would have otherwise have expired. 1379689912eSchristos */ 1389689912eSchristos #define QPDB_VIRTUAL 300 1399689912eSchristos 1409689912eSchristos /*% 1419689912eSchristos * Whether to rate-limit updating the LRU to avoid possible thread contention. 1429689912eSchristos * Updating LRU requires write locking, so we don't do it every time the 1439689912eSchristos * record is touched - only after some time passes. 1449689912eSchristos */ 1459689912eSchristos #ifndef DNS_QPDB_LIMITLRUUPDATE 1469689912eSchristos #define DNS_QPDB_LIMITLRUUPDATE 1 1479689912eSchristos #endif 1489689912eSchristos 1499689912eSchristos /*% Time after which we update LRU for glue records, 5 minutes */ 1509689912eSchristos #define DNS_QPDB_LRUUPDATE_GLUE 300 1519689912eSchristos /*% Time after which we update LRU for all other records, 10 minutes */ 1529689912eSchristos #define DNS_QPDB_LRUUPDATE_REGULAR 600 1539689912eSchristos 1549689912eSchristos /* 1559689912eSchristos * This defines the number of headers that we try to expire each time the 1569689912eSchristos * expire_ttl_headers() is run. The number should be small enough, so the 1579689912eSchristos * TTL-based header expiration doesn't take too long, but it should be large 1589689912eSchristos * enough, so we expire enough headers if their TTL is clustered. 1599689912eSchristos */ 1609689912eSchristos #define DNS_QPDB_EXPIRE_TTL_COUNT 10 1619689912eSchristos 1629689912eSchristos /*% 1639689912eSchristos * This is the structure that is used for each node in the qp trie of trees. 1649689912eSchristos */ 1659689912eSchristos typedef struct qpcnode qpcnode_t; 1669689912eSchristos struct qpcnode { 1679689912eSchristos dns_name_t name; 1689689912eSchristos isc_mem_t *mctx; 1699689912eSchristos 1709689912eSchristos uint8_t : 0; 1719689912eSchristos unsigned int delegating : 1; 1729689912eSchristos unsigned int nsec : 2; /*%< range is 0..3 */ 1739689912eSchristos uint8_t : 0; 1749689912eSchristos 1759689912eSchristos isc_refcount_t references; 1769689912eSchristos isc_refcount_t erefs; 1779689912eSchristos uint16_t locknum; 1789689912eSchristos void *data; 1799689912eSchristos 1809689912eSchristos /*% 1819689912eSchristos * NOTE: The 'dirty' flag is protected by the node lock, so 1829689912eSchristos * this bitfield has to be separated from the one above. 1839689912eSchristos * We don't want it to share the same qword with bits 1849689912eSchristos * that can be accessed without the node lock. 1859689912eSchristos */ 1869689912eSchristos uint8_t : 0; 1879689912eSchristos uint8_t dirty : 1; 1889689912eSchristos uint8_t : 0; 1899689912eSchristos 1909689912eSchristos /*% 1919689912eSchristos * Used for dead nodes cleaning. This linked list is used to mark nodes 1929689912eSchristos * which have no data any longer, but we cannot unlink at that exact 1939689912eSchristos * moment because we did not or could not obtain a write lock on the 1949689912eSchristos * tree. 1959689912eSchristos */ 1969689912eSchristos isc_queue_node_t deadlink; 1979689912eSchristos }; 1989689912eSchristos 1999689912eSchristos typedef struct qpcache qpcache_t; 2009689912eSchristos struct qpcache { 2019689912eSchristos /* Unlocked. */ 2029689912eSchristos dns_db_t common; 2039689912eSchristos /* Loopmgr */ 2049689912eSchristos isc_loopmgr_t *loopmgr; 2059689912eSchristos /* Locks the data in this struct */ 2069689912eSchristos isc_rwlock_t lock; 2079689912eSchristos /* Locks the tree structure (prevents nodes appearing/disappearing) */ 2089689912eSchristos isc_rwlock_t tree_lock; 2099689912eSchristos /* Locks for individual tree nodes */ 2109689912eSchristos unsigned int node_lock_count; 2119689912eSchristos db_nodelock_t *node_locks; 2129689912eSchristos qpcnode_t *origin_node; 2139689912eSchristos dns_stats_t *rrsetstats; /* cache DB only */ 2149689912eSchristos isc_stats_t *cachestats; /* cache DB only */ 2159689912eSchristos isc_stats_t *gluecachestats; /* zone DB only */ 2169689912eSchristos /* Locked by lock. */ 2179689912eSchristos unsigned int active; 2189689912eSchristos 2199689912eSchristos uint32_t maxrrperset; /* Maximum RRs per RRset */ 2209689912eSchristos uint32_t maxtypepername; /* Maximum number of RR types per owner */ 2219689912eSchristos 2229689912eSchristos /* 2239689912eSchristos * The time after a failed lookup, where stale answers from cache 2249689912eSchristos * may be used directly in a DNS response without attempting a 2259689912eSchristos * new iterative lookup. 2269689912eSchristos */ 2279689912eSchristos uint32_t serve_stale_refresh; 2289689912eSchristos 2299689912eSchristos /* 2309689912eSchristos * This is an array of linked lists used to implement the LRU cache. 2319689912eSchristos * There will be node_lock_count linked lists here. Nodes in bucket 1 2329689912eSchristos * will be placed on the linked list lru[1]. 2339689912eSchristos */ 2349689912eSchristos dns_slabheaderlist_t *lru; 2359689912eSchristos 2369689912eSchristos /* 2379689912eSchristos * Start point % node_lock_count for next LRU cleanup. 2389689912eSchristos */ 2399689912eSchristos atomic_uint lru_sweep; 2409689912eSchristos 2419689912eSchristos /* 2429689912eSchristos * When performing LRU cleaning limit cleaning to headers that were 2439689912eSchristos * last used at or before this. 2449689912eSchristos */ 2459689912eSchristos _Atomic(isc_stdtime_t) last_used; 2469689912eSchristos 2479689912eSchristos /*% 2489689912eSchristos * Temporary storage for stale cache nodes and dynamically deleted 2499689912eSchristos * nodes that await being cleaned up. 2509689912eSchristos */ 2519689912eSchristos isc_queue_t *deadnodes; 2529689912eSchristos 2539689912eSchristos /* 2549689912eSchristos * Heaps. These are used for TTL based expiry in a cache, 2559689912eSchristos * or for zone resigning in a zone DB. hmctx is the memory 2569689912eSchristos * context to use for the heap (which differs from the main 2579689912eSchristos * database memory context in the case of a cache). 2589689912eSchristos */ 2599689912eSchristos isc_mem_t *hmctx; 2609689912eSchristos isc_heap_t **heaps; 2619689912eSchristos 2629689912eSchristos /* Locked by tree_lock. */ 2639689912eSchristos dns_qp_t *tree; 2649689912eSchristos dns_qp_t *nsec; 2659689912eSchristos }; 2669689912eSchristos 2679689912eSchristos /*% 2689689912eSchristos * Search Context 2699689912eSchristos */ 2709689912eSchristos typedef struct { 2719689912eSchristos qpcache_t *qpdb; 2729689912eSchristos unsigned int options; 2739689912eSchristos dns_qpchain_t chain; 2749689912eSchristos dns_qpiter_t iter; 2759689912eSchristos bool need_cleanup; 2769689912eSchristos qpcnode_t *zonecut; 2779689912eSchristos dns_slabheader_t *zonecut_header; 2789689912eSchristos dns_slabheader_t *zonecut_sigheader; 2799689912eSchristos isc_stdtime_t now; 2809689912eSchristos } qpc_search_t; 2819689912eSchristos 2829689912eSchristos #ifdef DNS_DB_NODETRACE 2839689912eSchristos #define qpcnode_ref(ptr) qpcnode__ref(ptr, __func__, __FILE__, __LINE__) 2849689912eSchristos #define qpcnode_unref(ptr) qpcnode__unref(ptr, __func__, __FILE__, __LINE__) 2859689912eSchristos #define qpcnode_attach(ptr, ptrp) \ 2869689912eSchristos qpcnode__attach(ptr, ptrp, __func__, __FILE__, __LINE__) 2879689912eSchristos #define qpcnode_detach(ptrp) qpcnode__detach(ptrp, __func__, __FILE__, __LINE__) 2889689912eSchristos ISC_REFCOUNT_STATIC_TRACE_DECL(qpcnode); 2899689912eSchristos #else 2909689912eSchristos ISC_REFCOUNT_STATIC_DECL(qpcnode); 2919689912eSchristos #endif 2929689912eSchristos 2939689912eSchristos /* QP methods */ 2949689912eSchristos static void 2959689912eSchristos qp_attach(void *uctx, void *pval, uint32_t ival); 2969689912eSchristos static void 2979689912eSchristos qp_detach(void *uctx, void *pval, uint32_t ival); 2989689912eSchristos static size_t 2999689912eSchristos qp_makekey(dns_qpkey_t key, void *uctx, void *pval, uint32_t ival); 3009689912eSchristos static void 3019689912eSchristos qp_triename(void *uctx, char *buf, size_t size); 3029689912eSchristos 3039689912eSchristos static dns_qpmethods_t qpmethods = { 3049689912eSchristos qp_attach, 3059689912eSchristos qp_detach, 3069689912eSchristos qp_makekey, 3079689912eSchristos qp_triename, 3089689912eSchristos }; 3099689912eSchristos 3109689912eSchristos static void 3119689912eSchristos qp_attach(void *uctx ISC_ATTR_UNUSED, void *pval, 3129689912eSchristos uint32_t ival ISC_ATTR_UNUSED) { 3139689912eSchristos qpcnode_t *data = pval; 3149689912eSchristos qpcnode_ref(data); 3159689912eSchristos } 3169689912eSchristos 3179689912eSchristos static void 3189689912eSchristos qp_detach(void *uctx ISC_ATTR_UNUSED, void *pval, 3199689912eSchristos uint32_t ival ISC_ATTR_UNUSED) { 3209689912eSchristos qpcnode_t *data = pval; 3219689912eSchristos qpcnode_detach(&data); 3229689912eSchristos } 3239689912eSchristos 3249689912eSchristos static size_t 3259689912eSchristos qp_makekey(dns_qpkey_t key, void *uctx ISC_ATTR_UNUSED, void *pval, 3269689912eSchristos uint32_t ival ISC_ATTR_UNUSED) { 3279689912eSchristos qpcnode_t *data = pval; 3289689912eSchristos return dns_qpkey_fromname(key, &data->name); 3299689912eSchristos } 3309689912eSchristos 3319689912eSchristos static void 3329689912eSchristos qp_triename(void *uctx, char *buf, size_t size) { 3339689912eSchristos UNUSED(uctx); 3349689912eSchristos snprintf(buf, size, "qpdb-lite"); 3359689912eSchristos } 3369689912eSchristos 3379689912eSchristos static void 3389689912eSchristos rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG); 3399689912eSchristos static isc_result_t 3409689912eSchristos rdatasetiter_first(dns_rdatasetiter_t *iterator DNS__DB_FLARG); 3419689912eSchristos static isc_result_t 3429689912eSchristos rdatasetiter_next(dns_rdatasetiter_t *iterator DNS__DB_FLARG); 3439689912eSchristos static void 3449689912eSchristos rdatasetiter_current(dns_rdatasetiter_t *iterator, 3459689912eSchristos dns_rdataset_t *rdataset DNS__DB_FLARG); 3469689912eSchristos 3479689912eSchristos static dns_rdatasetitermethods_t rdatasetiter_methods = { 3489689912eSchristos rdatasetiter_destroy, rdatasetiter_first, rdatasetiter_next, 3499689912eSchristos rdatasetiter_current 3509689912eSchristos }; 3519689912eSchristos 3529689912eSchristos typedef struct qpc_rditer { 3539689912eSchristos dns_rdatasetiter_t common; 3549689912eSchristos dns_slabheader_t *current; 3559689912eSchristos } qpc_rditer_t; 3569689912eSchristos 3579689912eSchristos static void 3589689912eSchristos dbiterator_destroy(dns_dbiterator_t **iteratorp DNS__DB_FLARG); 3599689912eSchristos static isc_result_t 3609689912eSchristos dbiterator_first(dns_dbiterator_t *iterator DNS__DB_FLARG); 3619689912eSchristos static isc_result_t 3629689912eSchristos dbiterator_last(dns_dbiterator_t *iterator DNS__DB_FLARG); 3639689912eSchristos static isc_result_t 3649689912eSchristos dbiterator_seek(dns_dbiterator_t *iterator, 3659689912eSchristos const dns_name_t *name DNS__DB_FLARG); 3669689912eSchristos static isc_result_t 3679689912eSchristos dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG); 3689689912eSchristos static isc_result_t 3699689912eSchristos dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG); 3709689912eSchristos static isc_result_t 3719689912eSchristos dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, 3729689912eSchristos dns_name_t *name DNS__DB_FLARG); 3739689912eSchristos static isc_result_t 3749689912eSchristos dbiterator_pause(dns_dbiterator_t *iterator); 3759689912eSchristos static isc_result_t 3769689912eSchristos dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name); 3779689912eSchristos 3789689912eSchristos static dns_dbiteratormethods_t dbiterator_methods = { 3799689912eSchristos dbiterator_destroy, dbiterator_first, dbiterator_last, 3809689912eSchristos dbiterator_seek, dbiterator_prev, dbiterator_next, 3819689912eSchristos dbiterator_current, dbiterator_pause, dbiterator_origin 3829689912eSchristos }; 3839689912eSchristos 3849689912eSchristos /* 3859689912eSchristos * Note that the QP cache database only needs a single QP iterator, because 3869689912eSchristos * unlike the QP zone database, NSEC3 records are cached in the main tree. 3879689912eSchristos * 3889689912eSchristos * If we ever implement synth-from-dnssec using NSEC3 records, we'll need 3899689912eSchristos * to have a separate tree for NSEC3 records, and to copy in the more complex 3909689912eSchristos * iterator implementation from qpzone.c. 3919689912eSchristos */ 3929689912eSchristos typedef struct qpc_dbit { 3939689912eSchristos dns_dbiterator_t common; 3949689912eSchristos bool paused; 3959689912eSchristos isc_rwlocktype_t tree_locked; 3969689912eSchristos isc_result_t result; 3979689912eSchristos dns_fixedname_t fixed; 3989689912eSchristos dns_name_t *name; 3999689912eSchristos dns_qpiter_t iter; 4009689912eSchristos qpcnode_t *node; 4019689912eSchristos } qpc_dbit_t; 4029689912eSchristos 4039689912eSchristos static void 4049689912eSchristos free_qpdb(qpcache_t *qpdb, bool log); 4059689912eSchristos 4069689912eSchristos static dns_dbmethods_t qpdb_cachemethods; 4079689912eSchristos 4089689912eSchristos /*% 4099689912eSchristos * 'init_count' is used to initialize 'newheader->count' which in turn 4109689912eSchristos * is used to determine where in the cycle rrset-order cyclic starts. 4119689912eSchristos * We don't lock this as we don't care about simultaneous updates. 4129689912eSchristos */ 4139689912eSchristos static atomic_uint_fast16_t init_count = 0; 4149689912eSchristos 4159689912eSchristos /* 4169689912eSchristos * Locking 4179689912eSchristos * 4189689912eSchristos * If a routine is going to lock more than one lock in this module, then 4199689912eSchristos * the locking must be done in the following order: 4209689912eSchristos * 4219689912eSchristos * Tree Lock 4229689912eSchristos * 4239689912eSchristos * Node Lock (Only one from the set may be locked at one time by 4249689912eSchristos * any caller) 4259689912eSchristos * 4269689912eSchristos * Database Lock 4279689912eSchristos * 4289689912eSchristos * Failure to follow this hierarchy can result in deadlock. 4299689912eSchristos */ 4309689912eSchristos 4319689912eSchristos /*% 4329689912eSchristos * Routines for LRU-based cache management. 4339689912eSchristos */ 4349689912eSchristos 4359689912eSchristos /*% 4369689912eSchristos * See if a given cache entry that is being reused needs to be updated 4379689912eSchristos * in the LRU-list. From the LRU management point of view, this function is 4389689912eSchristos * expected to return true for almost all cases. When used with threads, 4399689912eSchristos * however, this may cause a non-negligible performance penalty because a 4409689912eSchristos * writer lock will have to be acquired before updating the list. 4419689912eSchristos * If DNS_QPDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this 4429689912eSchristos * function returns true if the entry has not been updated for some period of 4439689912eSchristos * time. We differentiate the NS or glue address case and the others since 4449689912eSchristos * experiments have shown that the former tends to be accessed relatively 4459689912eSchristos * infrequently and the cost of cache miss is higher (e.g., a missing NS records 4469689912eSchristos * may cause external queries at a higher level zone, involving more 4479689912eSchristos * transactions). 4489689912eSchristos * 4499689912eSchristos * Caller must hold the node (read or write) lock. 4509689912eSchristos */ 4519689912eSchristos static bool 4529689912eSchristos need_headerupdate(dns_slabheader_t *header, isc_stdtime_t now) { 4539689912eSchristos if (DNS_SLABHEADER_GETATTR(header, (DNS_SLABHEADERATTR_NONEXISTENT | 4549689912eSchristos DNS_SLABHEADERATTR_ANCIENT | 4559689912eSchristos DNS_SLABHEADERATTR_ZEROTTL)) != 0) 4569689912eSchristos { 4579689912eSchristos return false; 4589689912eSchristos } 4599689912eSchristos 4609689912eSchristos #if DNS_QPDB_LIMITLRUUPDATE 4619689912eSchristos if (header->type == dns_rdatatype_ns || 4629689912eSchristos (header->trust == dns_trust_glue && 4639689912eSchristos (header->type == dns_rdatatype_a || 4649689912eSchristos header->type == dns_rdatatype_aaaa))) 4659689912eSchristos { 4669689912eSchristos /* 4679689912eSchristos * Glue records are updated if at least DNS_QPDB_LRUUPDATE_GLUE 4689689912eSchristos * seconds have passed since the previous update time. 4699689912eSchristos */ 4709689912eSchristos return header->last_used + DNS_QPDB_LRUUPDATE_GLUE <= now; 4719689912eSchristos } 4729689912eSchristos 4739689912eSchristos /* 4749689912eSchristos * Other records are updated if DNS_QPDB_LRUUPDATE_REGULAR seconds 4759689912eSchristos * have passed. 4769689912eSchristos */ 4779689912eSchristos return header->last_used + DNS_QPDB_LRUUPDATE_REGULAR <= now; 4789689912eSchristos #else 4799689912eSchristos UNUSED(now); 4809689912eSchristos 4819689912eSchristos return true; 4829689912eSchristos #endif /* if DNS_QPDB_LIMITLRUUPDATE */ 4839689912eSchristos } 4849689912eSchristos 4859689912eSchristos /*% 4869689912eSchristos * Update the timestamp of a given cache entry and move it to the head 4879689912eSchristos * of the corresponding LRU list. 4889689912eSchristos * 4899689912eSchristos * Caller must hold the node (write) lock. 4909689912eSchristos * 4919689912eSchristos * Note that the we do NOT touch the heap here, as the TTL has not changed. 4929689912eSchristos */ 4939689912eSchristos static void 4949689912eSchristos update_header(qpcache_t *qpdb, dns_slabheader_t *header, isc_stdtime_t now) { 4959689912eSchristos /* To be checked: can we really assume this? XXXMLG */ 4969689912eSchristos INSIST(ISC_LINK_LINKED(header, link)); 4979689912eSchristos 4989689912eSchristos ISC_LIST_UNLINK(qpdb->lru[HEADERNODE(header)->locknum], header, link); 4999689912eSchristos header->last_used = now; 5009689912eSchristos ISC_LIST_PREPEND(qpdb->lru[HEADERNODE(header)->locknum], header, link); 5019689912eSchristos } 5029689912eSchristos 5039689912eSchristos /* 5049689912eSchristos * Locking: 5059689912eSchristos * If a routine is going to lock more than one lock in this module, then 5069689912eSchristos * the locking must be done in the following order: 5079689912eSchristos * 5089689912eSchristos * Tree Lock 5099689912eSchristos * 5109689912eSchristos * Node Lock (Only one from the set may be locked at one time by 5119689912eSchristos * any caller) 5129689912eSchristos * 5139689912eSchristos * Database Lock 5149689912eSchristos * 5159689912eSchristos * Failure to follow this hierarchy can result in deadlock. 5169689912eSchristos * 5179689912eSchristos * Deleting Nodes: 5189689912eSchristos * For zone databases the node for the origin of the zone MUST NOT be deleted. 5199689912eSchristos */ 5209689912eSchristos 5219689912eSchristos /* 5229689912eSchristos * DB Routines 5239689912eSchristos */ 5249689912eSchristos 5259689912eSchristos static void 5269689912eSchristos clean_stale_headers(dns_slabheader_t *top) { 5279689912eSchristos dns_slabheader_t *d = NULL, *down_next = NULL; 5289689912eSchristos 5299689912eSchristos for (d = top->down; d != NULL; d = down_next) { 5309689912eSchristos down_next = d->down; 5319689912eSchristos dns_slabheader_destroy(&d); 5329689912eSchristos } 5339689912eSchristos top->down = NULL; 5349689912eSchristos } 5359689912eSchristos 5369689912eSchristos static void 5379689912eSchristos clean_cache_node(qpcache_t *qpdb, qpcnode_t *node) { 5389689912eSchristos dns_slabheader_t *current = NULL, *top_prev = NULL, *top_next = NULL; 5399689912eSchristos 5409689912eSchristos /* 5419689912eSchristos * Caller must be holding the node lock. 5429689912eSchristos */ 5439689912eSchristos 5449689912eSchristos for (current = node->data; current != NULL; current = top_next) { 5459689912eSchristos top_next = current->next; 5469689912eSchristos clean_stale_headers(current); 5479689912eSchristos /* 5489689912eSchristos * If current is nonexistent, ancient, or stale and 5499689912eSchristos * we are not keeping stale, we can clean it up. 5509689912eSchristos */ 5519689912eSchristos if (NONEXISTENT(current) || ANCIENT(current) || 5529689912eSchristos (STALE(current) && !KEEPSTALE(qpdb))) 5539689912eSchristos { 5549689912eSchristos if (top_prev != NULL) { 5559689912eSchristos top_prev->next = current->next; 5569689912eSchristos } else { 5579689912eSchristos node->data = current->next; 5589689912eSchristos } 5599689912eSchristos dns_slabheader_destroy(¤t); 5609689912eSchristos } else { 5619689912eSchristos top_prev = current; 5629689912eSchristos } 5639689912eSchristos } 5649689912eSchristos node->dirty = 0; 5659689912eSchristos } 5669689912eSchristos 5679689912eSchristos /* 5689689912eSchristos * tree_lock(write) must be held. 5699689912eSchristos */ 5709689912eSchristos static void 5719689912eSchristos delete_node(qpcache_t *qpdb, qpcnode_t *node) { 5729689912eSchristos isc_result_t result = ISC_R_UNEXPECTED; 5739689912eSchristos 5749689912eSchristos if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) { 5759689912eSchristos char printname[DNS_NAME_FORMATSIZE]; 5769689912eSchristos dns_name_format(&node->name, printname, sizeof(printname)); 5779689912eSchristos isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 5789689912eSchristos DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 5799689912eSchristos "delete_node(): %p %s (bucket %d)", node, 5809689912eSchristos printname, node->locknum); 5819689912eSchristos } 5829689912eSchristos 5839689912eSchristos switch (node->nsec) { 5849689912eSchristos case DNS_DB_NSEC_HAS_NSEC: 5859689912eSchristos /* 5869689912eSchristos * Delete the corresponding node from the auxiliary NSEC 5879689912eSchristos * tree before deleting from the main tree. 5889689912eSchristos */ 5899689912eSchristos result = dns_qp_deletename(qpdb->nsec, &node->name, NULL, NULL); 5909689912eSchristos if (result != ISC_R_SUCCESS) { 5919689912eSchristos isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 5929689912eSchristos DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 5939689912eSchristos "delete_node(): " 5949689912eSchristos "dns_qp_deletename: %s", 5959689912eSchristos isc_result_totext(result)); 5969689912eSchristos } 5979689912eSchristos /* FALLTHROUGH */ 5989689912eSchristos case DNS_DB_NSEC_NORMAL: 5999689912eSchristos result = dns_qp_deletename(qpdb->tree, &node->name, NULL, NULL); 6009689912eSchristos break; 6019689912eSchristos case DNS_DB_NSEC_NSEC: 6029689912eSchristos result = dns_qp_deletename(qpdb->nsec, &node->name, NULL, NULL); 6039689912eSchristos break; 6049689912eSchristos } 6059689912eSchristos if (result != ISC_R_SUCCESS) { 6069689912eSchristos isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 6079689912eSchristos DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, 6089689912eSchristos "delete_node(): " 6099689912eSchristos "dns_qp_deletename: %s", 6109689912eSchristos isc_result_totext(result)); 6119689912eSchristos } 6129689912eSchristos } 6139689912eSchristos 6149689912eSchristos /* 6159689912eSchristos * The caller must specify its currect node and tree lock status. 6169689912eSchristos * It's okay for neither lock to be held if there are existing external 6179689912eSchristos * references to the node, but if this is the first external reference, 6189689912eSchristos * then the caller must be holding at least one lock. 6199689912eSchristos */ 6209689912eSchristos static void 6219689912eSchristos newref(qpcache_t *qpdb, qpcnode_t *node, isc_rwlocktype_t nlocktype, 6229689912eSchristos isc_rwlocktype_t tlocktype DNS__DB_FLARG) { 6239689912eSchristos uint_fast32_t refs; 6249689912eSchristos 6259689912eSchristos qpcnode_ref(node); 6269689912eSchristos refs = isc_refcount_increment0(&node->erefs); 6279689912eSchristos 6289689912eSchristos #if DNS_DB_NODETRACE 6299689912eSchristos fprintf(stderr, "incr:node:%s:%s:%u:%p->erefs = %" PRIuFAST32 "\n", 6309689912eSchristos func, file, line, node, refs + 1); 6319689912eSchristos #endif 6329689912eSchristos 6339689912eSchristos if (refs == 0) { 6349689912eSchristos /* 6359689912eSchristos * this is the first external reference to the node. 6369689912eSchristos * 6379689912eSchristos * we need to hold the node or tree lock to avoid 6389689912eSchristos * incrementing the reference count while also deleting 6399689912eSchristos * the node. delete_node() is always protected by both 6409689912eSchristos * tree and node locks being write-locked. 6419689912eSchristos */ 6429689912eSchristos INSIST(nlocktype != isc_rwlocktype_none || 6439689912eSchristos tlocktype != isc_rwlocktype_none); 6449689912eSchristos 6459689912eSchristos refs = isc_refcount_increment0( 6469689912eSchristos &qpdb->node_locks[node->locknum].references); 6479689912eSchristos #if DNS_DB_NODETRACE 6489689912eSchristos fprintf(stderr, 6499689912eSchristos "incr:nodelock:%s:%s:%u:%p:%p->references = " 6509689912eSchristos "%" PRIuFAST32 "\n", 6519689912eSchristos func, file, line, node, 6529689912eSchristos &qpdb->node_locks[node->locknum], refs + 1); 6539689912eSchristos #else 6549689912eSchristos UNUSED(refs); 6559689912eSchristos #endif 6569689912eSchristos } 6579689912eSchristos } 6589689912eSchristos 6599689912eSchristos static void 6609689912eSchristos cleanup_deadnodes(void *arg); 6619689912eSchristos 6629689912eSchristos /* 6639689912eSchristos * Caller must be holding the node lock; either the read or write lock. 6649689912eSchristos * Note that the lock must be held even when node references are 6659689912eSchristos * atomically modified; in that case the decrement operation itself does not 6669689912eSchristos * have to be protected, but we must avoid a race condition where multiple 6679689912eSchristos * threads are decreasing the reference to zero simultaneously and at least 6689689912eSchristos * one of them is going to free the node. 6699689912eSchristos * 6709689912eSchristos * This decrements both the internal and external node reference counters. 6719689912eSchristos * If the external reference count drops to zero, then the node lock 6729689912eSchristos * reference count is also decremented. 6739689912eSchristos * 6749689912eSchristos * This function returns true if and only if the node reference decreases 6759689912eSchristos * to zero. (NOTE: Decrementing the reference count of a node to zero does 6769689912eSchristos * not mean it will be immediately freed.) 6779689912eSchristos */ 6789689912eSchristos static bool 6799689912eSchristos decref(qpcache_t *qpdb, qpcnode_t *node, isc_rwlocktype_t *nlocktypep, 6809689912eSchristos isc_rwlocktype_t *tlocktypep, bool tryupgrade DNS__DB_FLARG) { 6819689912eSchristos isc_result_t result; 6829689912eSchristos bool locked = *tlocktypep != isc_rwlocktype_none; 6839689912eSchristos bool write_locked = false; 6849689912eSchristos db_nodelock_t *nodelock = NULL; 6859689912eSchristos int bucket = node->locknum; 6869689912eSchristos uint_fast32_t refs; 6879689912eSchristos 6889689912eSchristos REQUIRE(*nlocktypep != isc_rwlocktype_none); 6899689912eSchristos 6909689912eSchristos nodelock = &qpdb->node_locks[bucket]; 6919689912eSchristos 6929689912eSchristos #define KEEP_NODE(n, r) ((n)->data != NULL || (n) == (r)->origin_node) 6939689912eSchristos 6949689912eSchristos /* Handle easy and typical case first. */ 6959689912eSchristos if (!node->dirty && KEEP_NODE(node, qpdb)) { 6969689912eSchristos bool no_reference = false; 6979689912eSchristos 6989689912eSchristos refs = isc_refcount_decrement(&node->erefs); 6999689912eSchristos #if DNS_DB_NODETRACE 7009689912eSchristos fprintf(stderr, 7019689912eSchristos "decr:node:%s:%s:%u:%p->erefs = %" PRIuFAST32 "\n", 7029689912eSchristos func, file, line, node, refs - 1); 7039689912eSchristos #else 7049689912eSchristos UNUSED(refs); 7059689912eSchristos #endif 7069689912eSchristos if (refs == 1) { 7079689912eSchristos refs = isc_refcount_decrement(&nodelock->references); 7089689912eSchristos #if DNS_DB_NODETRACE 7099689912eSchristos fprintf(stderr, 7109689912eSchristos "decr:nodelock:%s:%s:%u:%p:%p->references = " 7119689912eSchristos "%" PRIuFAST32 "\n", 7129689912eSchristos func, file, line, node, nodelock, refs - 1); 7139689912eSchristos #else 7149689912eSchristos UNUSED(refs); 7159689912eSchristos #endif 7169689912eSchristos no_reference = true; 7179689912eSchristos } 7189689912eSchristos 7199689912eSchristos qpcnode_unref(node); 7209689912eSchristos return no_reference; 7219689912eSchristos } 7229689912eSchristos 7239689912eSchristos /* Upgrade the lock? */ 7249689912eSchristos if (*nlocktypep == isc_rwlocktype_read) { 7259689912eSchristos NODE_FORCEUPGRADE(&nodelock->lock, nlocktypep); 7269689912eSchristos } 7279689912eSchristos 7289689912eSchristos refs = isc_refcount_decrement(&node->erefs); 7299689912eSchristos #if DNS_DB_NODETRACE 7309689912eSchristos fprintf(stderr, "decr:node:%s:%s:%u:%p->erefs = %" PRIuFAST32 "\n", 7319689912eSchristos func, file, line, node, refs - 1); 7329689912eSchristos #endif 7339689912eSchristos 7349689912eSchristos if (refs > 1) { 7359689912eSchristos qpcnode_unref(node); 7369689912eSchristos return false; 7379689912eSchristos } 7389689912eSchristos 7399689912eSchristos INSIST(refs == 1); 7409689912eSchristos 7419689912eSchristos if (node->dirty) { 7429689912eSchristos clean_cache_node(qpdb, node); 7439689912eSchristos } 7449689912eSchristos 7459689912eSchristos /* 7469689912eSchristos * Attempt to switch to a write lock on the tree. If this fails, 7479689912eSchristos * we will add this node to a linked list of nodes in this locking 7489689912eSchristos * bucket which we will free later. 7499689912eSchristos * 7509689912eSchristos * Locking hierarchy notwithstanding, we don't need to free 7519689912eSchristos * the node lock before acquiring the tree write lock because 7529689912eSchristos * we only do a trylock. 7539689912eSchristos */ 7549689912eSchristos /* We are allowed to upgrade the tree lock */ 7559689912eSchristos 7569689912eSchristos switch (*tlocktypep) { 7579689912eSchristos case isc_rwlocktype_write: 7589689912eSchristos result = ISC_R_SUCCESS; 7599689912eSchristos break; 7609689912eSchristos case isc_rwlocktype_read: 7619689912eSchristos if (tryupgrade) { 7629689912eSchristos result = TREE_TRYUPGRADE(&qpdb->tree_lock, tlocktypep); 7639689912eSchristos } else { 7649689912eSchristos result = ISC_R_LOCKBUSY; 7659689912eSchristos } 7669689912eSchristos break; 7679689912eSchristos case isc_rwlocktype_none: 7689689912eSchristos result = TREE_TRYWRLOCK(&qpdb->tree_lock, tlocktypep); 7699689912eSchristos break; 7709689912eSchristos default: 7719689912eSchristos UNREACHABLE(); 7729689912eSchristos } 7739689912eSchristos RUNTIME_CHECK(result == ISC_R_SUCCESS || result == ISC_R_LOCKBUSY); 7749689912eSchristos if (result == ISC_R_SUCCESS) { 7759689912eSchristos write_locked = true; 7769689912eSchristos } 7779689912eSchristos 7789689912eSchristos refs = isc_refcount_decrement(&nodelock->references); 7799689912eSchristos #if DNS_DB_NODETRACE 7809689912eSchristos fprintf(stderr, 7819689912eSchristos "decr:nodelock:%s:%s:%u:%p:%p->references = %" PRIuFAST32 "\n", 7829689912eSchristos func, file, line, node, nodelock, refs - 1); 7839689912eSchristos #else 7849689912eSchristos UNUSED(refs); 7859689912eSchristos #endif 7869689912eSchristos 7879689912eSchristos if (KEEP_NODE(node, qpdb)) { 7889689912eSchristos goto restore_locks; 7899689912eSchristos } 7909689912eSchristos 7919689912eSchristos #undef KEEP_NODE 7929689912eSchristos 7939689912eSchristos if (write_locked) { 7949689912eSchristos /* 7959689912eSchristos * We can now delete the node. 7969689912eSchristos */ 7979689912eSchristos delete_node(qpdb, node); 7989689912eSchristos } else { 7999689912eSchristos newref(qpdb, node, *nlocktypep, *tlocktypep DNS__DB_FLARG_PASS); 8009689912eSchristos 8019689912eSchristos isc_queue_node_init(&node->deadlink); 8029689912eSchristos if (!isc_queue_enqueue_entry(&qpdb->deadnodes[bucket], node, 8039689912eSchristos deadlink)) 8049689912eSchristos { 8059689912eSchristos /* Queue was empty, trigger new cleaning */ 8069689912eSchristos isc_loop_t *loop = isc_loop_get(qpdb->loopmgr, bucket); 8079689912eSchristos 8089689912eSchristos isc_async_run(loop, cleanup_deadnodes, qpdb); 8099689912eSchristos } 8109689912eSchristos } 8119689912eSchristos 8129689912eSchristos restore_locks: 8139689912eSchristos /* 8149689912eSchristos * Relock a read lock, or unlock the write lock if no lock was held. 8159689912eSchristos */ 8169689912eSchristos if (!locked && write_locked) { 8179689912eSchristos TREE_UNLOCK(&qpdb->tree_lock, tlocktypep); 8189689912eSchristos } 8199689912eSchristos 8209689912eSchristos qpcnode_unref(node); 8219689912eSchristos return true; 8229689912eSchristos } 8239689912eSchristos 8249689912eSchristos static void 8259689912eSchristos update_rrsetstats(dns_stats_t *stats, const dns_typepair_t htype, 8269689912eSchristos const uint_least16_t hattributes, const bool increment) { 8279689912eSchristos dns_rdatastatstype_t statattributes = 0; 8289689912eSchristos dns_rdatastatstype_t base = 0; 8299689912eSchristos dns_rdatastatstype_t type; 8309689912eSchristos dns_slabheader_t *header = &(dns_slabheader_t){ 8319689912eSchristos .type = htype, 8329689912eSchristos .attributes = hattributes, 8339689912eSchristos }; 8349689912eSchristos 8359689912eSchristos if (!EXISTS(header) || !STATCOUNT(header)) { 8369689912eSchristos return; 8379689912eSchristos } 8389689912eSchristos 8399689912eSchristos if (NEGATIVE(header)) { 8409689912eSchristos if (NXDOMAIN(header)) { 8419689912eSchristos statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN; 8429689912eSchristos } else { 8439689912eSchristos statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET; 8449689912eSchristos base = DNS_TYPEPAIR_COVERS(header->type); 8459689912eSchristos } 8469689912eSchristos } else { 8479689912eSchristos base = DNS_TYPEPAIR_TYPE(header->type); 8489689912eSchristos } 8499689912eSchristos 8509689912eSchristos if (STALE(header)) { 8519689912eSchristos statattributes |= DNS_RDATASTATSTYPE_ATTR_STALE; 8529689912eSchristos } 8539689912eSchristos if (ANCIENT(header)) { 8549689912eSchristos statattributes |= DNS_RDATASTATSTYPE_ATTR_ANCIENT; 8559689912eSchristos } 8569689912eSchristos 8579689912eSchristos type = DNS_RDATASTATSTYPE_VALUE(base, statattributes); 8589689912eSchristos if (increment) { 8599689912eSchristos dns_rdatasetstats_increment(stats, type); 8609689912eSchristos } else { 8619689912eSchristos dns_rdatasetstats_decrement(stats, type); 8629689912eSchristos } 8639689912eSchristos } 8649689912eSchristos 8659689912eSchristos static void 8669689912eSchristos mark(dns_slabheader_t *header, uint_least16_t flag) { 8679689912eSchristos uint_least16_t attributes = atomic_load_acquire(&header->attributes); 8689689912eSchristos uint_least16_t newattributes = 0; 8699689912eSchristos dns_stats_t *stats = NULL; 8709689912eSchristos 8719689912eSchristos /* 8729689912eSchristos * If we are already ancient there is nothing to do. 8739689912eSchristos */ 8749689912eSchristos do { 8759689912eSchristos if ((attributes & flag) != 0) { 8769689912eSchristos return; 8779689912eSchristos } 8789689912eSchristos newattributes = attributes | flag; 8799689912eSchristos } while (!atomic_compare_exchange_weak_acq_rel( 8809689912eSchristos &header->attributes, &attributes, newattributes)); 8819689912eSchristos 8829689912eSchristos /* 8839689912eSchristos * Decrement and increment the stats counter for the appropriate 8849689912eSchristos * RRtype. 8859689912eSchristos */ 8869689912eSchristos stats = dns_db_getrrsetstats(header->db); 8879689912eSchristos if (stats != NULL) { 8889689912eSchristos update_rrsetstats(stats, header->type, attributes, false); 8899689912eSchristos update_rrsetstats(stats, header->type, newattributes, true); 8909689912eSchristos } 8919689912eSchristos } 8929689912eSchristos 8939689912eSchristos static void 8949689912eSchristos setttl(dns_slabheader_t *header, dns_ttl_t newttl) { 8959689912eSchristos dns_ttl_t oldttl = header->ttl; 8969689912eSchristos 8979689912eSchristos header->ttl = newttl; 8989689912eSchristos 8999689912eSchristos if (header->db == NULL || !dns_db_iscache(header->db)) { 9009689912eSchristos return; 9019689912eSchristos } 9029689912eSchristos 9039689912eSchristos /* 9049689912eSchristos * This is a cache. Adjust the heaps if necessary. 9059689912eSchristos */ 9069689912eSchristos if (header->heap == NULL || header->heap_index == 0 || newttl == oldttl) 9079689912eSchristos { 9089689912eSchristos return; 9099689912eSchristos } 9109689912eSchristos 9119689912eSchristos if (newttl < oldttl) { 9129689912eSchristos isc_heap_increased(header->heap, header->heap_index); 9139689912eSchristos } else { 9149689912eSchristos isc_heap_decreased(header->heap, header->heap_index); 9159689912eSchristos } 9169689912eSchristos 9179689912eSchristos if (newttl == 0) { 9189689912eSchristos isc_heap_delete(header->heap, header->heap_index); 9199689912eSchristos } 9209689912eSchristos } 9219689912eSchristos 9229689912eSchristos /* 9239689912eSchristos * Caller must hold the node (write) lock. 9249689912eSchristos */ 9259689912eSchristos static void 9269689912eSchristos expireheader(dns_slabheader_t *header, isc_rwlocktype_t *nlocktypep, 9279689912eSchristos isc_rwlocktype_t *tlocktypep, dns_expire_t reason DNS__DB_FLARG) { 9289689912eSchristos setttl(header, 0); 9299689912eSchristos mark(header, DNS_SLABHEADERATTR_ANCIENT); 9309689912eSchristos HEADERNODE(header)->dirty = 1; 9319689912eSchristos 9329689912eSchristos if (isc_refcount_current(&HEADERNODE(header)->erefs) == 0) { 9339689912eSchristos qpcache_t *qpdb = (qpcache_t *)header->db; 9349689912eSchristos 9359689912eSchristos /* 9369689912eSchristos * If no one else is using the node, we can clean it up now. 9379689912eSchristos * We first need to gain a new reference to the node to meet a 9389689912eSchristos * requirement of decref(). 9399689912eSchristos */ 9409689912eSchristos newref(qpdb, HEADERNODE(header), *nlocktypep, 9419689912eSchristos *tlocktypep DNS__DB_FLARG_PASS); 9429689912eSchristos decref(qpdb, HEADERNODE(header), nlocktypep, tlocktypep, 9439689912eSchristos true DNS__DB_FLARG_PASS); 9449689912eSchristos 9459689912eSchristos if (qpdb->cachestats == NULL) { 9469689912eSchristos return; 9479689912eSchristos } 9489689912eSchristos 9499689912eSchristos switch (reason) { 9509689912eSchristos case dns_expire_ttl: 9519689912eSchristos isc_stats_increment(qpdb->cachestats, 9529689912eSchristos dns_cachestatscounter_deletettl); 9539689912eSchristos break; 9549689912eSchristos case dns_expire_lru: 9559689912eSchristos isc_stats_increment(qpdb->cachestats, 9569689912eSchristos dns_cachestatscounter_deletelru); 9579689912eSchristos break; 9589689912eSchristos default: 9599689912eSchristos break; 9609689912eSchristos } 9619689912eSchristos } 9629689912eSchristos } 9639689912eSchristos 9649689912eSchristos static void 9659689912eSchristos update_cachestats(qpcache_t *qpdb, isc_result_t result) { 9669689912eSchristos if (qpdb->cachestats == NULL) { 9679689912eSchristos return; 9689689912eSchristos } 9699689912eSchristos 9709689912eSchristos switch (result) { 9719689912eSchristos case DNS_R_COVERINGNSEC: 9729689912eSchristos isc_stats_increment(qpdb->cachestats, 9739689912eSchristos dns_cachestatscounter_coveringnsec); 9749689912eSchristos FALLTHROUGH; 9759689912eSchristos case ISC_R_SUCCESS: 9769689912eSchristos case DNS_R_CNAME: 9779689912eSchristos case DNS_R_DNAME: 9789689912eSchristos case DNS_R_DELEGATION: 9799689912eSchristos case DNS_R_NCACHENXDOMAIN: 9809689912eSchristos case DNS_R_NCACHENXRRSET: 9819689912eSchristos isc_stats_increment(qpdb->cachestats, 9829689912eSchristos dns_cachestatscounter_hits); 9839689912eSchristos break; 9849689912eSchristos default: 9859689912eSchristos isc_stats_increment(qpdb->cachestats, 9869689912eSchristos dns_cachestatscounter_misses); 9879689912eSchristos } 9889689912eSchristos } 9899689912eSchristos 9909689912eSchristos static void 9919689912eSchristos bindrdataset(qpcache_t *qpdb, qpcnode_t *node, dns_slabheader_t *header, 9929689912eSchristos isc_stdtime_t now, isc_rwlocktype_t nlocktype, 9939689912eSchristos isc_rwlocktype_t tlocktype, 9949689912eSchristos dns_rdataset_t *rdataset DNS__DB_FLARG) { 9959689912eSchristos bool stale = STALE(header); 9969689912eSchristos bool ancient = ANCIENT(header); 9979689912eSchristos 9989689912eSchristos /* 9999689912eSchristos * Caller must be holding the node reader lock. 10009689912eSchristos * XXXJT: technically, we need a writer lock, since we'll increment 10019689912eSchristos * the header count below. However, since the actual counter value 10029689912eSchristos * doesn't matter, we prioritize performance here. (We may want to 10039689912eSchristos * use atomic increment when available). 10049689912eSchristos */ 10059689912eSchristos 10069689912eSchristos if (rdataset == NULL) { 10079689912eSchristos return; 10089689912eSchristos } 10099689912eSchristos 10109689912eSchristos newref(qpdb, node, nlocktype, tlocktype DNS__DB_FLARG_PASS); 10119689912eSchristos 10129689912eSchristos INSIST(rdataset->methods == NULL); /* We must be disassociated. */ 10139689912eSchristos 10149689912eSchristos /* 10159689912eSchristos * Mark header stale or ancient if the RRset is no longer active. 10169689912eSchristos */ 10179689912eSchristos if (!ACTIVE(header, now)) { 10189689912eSchristos dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, qpdb); 10199689912eSchristos /* 10209689912eSchristos * If this data is in the stale window keep it and if 10219689912eSchristos * DNS_DBFIND_STALEOK is not set we tell the caller to 10229689912eSchristos * skip this record. We skip the records with ZEROTTL 10239689912eSchristos * (these records should not be cached anyway). 10249689912eSchristos */ 10259689912eSchristos 10269689912eSchristos if (KEEPSTALE(qpdb) && stale_ttl > now) { 10279689912eSchristos stale = true; 10289689912eSchristos } else { 10299689912eSchristos /* 10309689912eSchristos * We are not keeping stale, or it is outside the 10319689912eSchristos * stale window. Mark ancient, i.e. ready for cleanup. 10329689912eSchristos */ 10339689912eSchristos ancient = true; 10349689912eSchristos } 10359689912eSchristos } 10369689912eSchristos 10379689912eSchristos rdataset->methods = &dns_rdataslab_rdatasetmethods; 10389689912eSchristos rdataset->rdclass = qpdb->common.rdclass; 10399689912eSchristos rdataset->type = DNS_TYPEPAIR_TYPE(header->type); 10409689912eSchristos rdataset->covers = DNS_TYPEPAIR_COVERS(header->type); 10419689912eSchristos rdataset->ttl = header->ttl - now; 10429689912eSchristos rdataset->trust = header->trust; 10439689912eSchristos rdataset->resign = 0; 10449689912eSchristos 10459689912eSchristos if (NEGATIVE(header)) { 10469689912eSchristos rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE; 10479689912eSchristos } 10489689912eSchristos if (NXDOMAIN(header)) { 10499689912eSchristos rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN; 10509689912eSchristos } 10519689912eSchristos if (OPTOUT(header)) { 10529689912eSchristos rdataset->attributes |= DNS_RDATASETATTR_OPTOUT; 10539689912eSchristos } 10549689912eSchristos if (PREFETCH(header)) { 10559689912eSchristos rdataset->attributes |= DNS_RDATASETATTR_PREFETCH; 10569689912eSchristos } 10579689912eSchristos 10589689912eSchristos if (stale && !ancient) { 10599689912eSchristos dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, qpdb); 10609689912eSchristos if (stale_ttl > now) { 10619689912eSchristos rdataset->ttl = stale_ttl - now; 10629689912eSchristos } else { 10639689912eSchristos rdataset->ttl = 0; 10649689912eSchristos } 10659689912eSchristos if (STALE_WINDOW(header)) { 10669689912eSchristos rdataset->attributes |= DNS_RDATASETATTR_STALE_WINDOW; 10679689912eSchristos } 10689689912eSchristos rdataset->attributes |= DNS_RDATASETATTR_STALE; 10699689912eSchristos } else if (!ACTIVE(header, now)) { 10709689912eSchristos rdataset->attributes |= DNS_RDATASETATTR_ANCIENT; 10719689912eSchristos rdataset->ttl = header->ttl; 10729689912eSchristos } 10739689912eSchristos 10749689912eSchristos rdataset->count = atomic_fetch_add_relaxed(&header->count, 1); 10759689912eSchristos 10769689912eSchristos rdataset->slab.db = (dns_db_t *)qpdb; 10779689912eSchristos rdataset->slab.node = (dns_dbnode_t *)node; 10789689912eSchristos rdataset->slab.raw = dns_slabheader_raw(header); 10799689912eSchristos rdataset->slab.iter_pos = NULL; 10809689912eSchristos rdataset->slab.iter_count = 0; 10819689912eSchristos 10829689912eSchristos /* 10839689912eSchristos * Add noqname proof. 10849689912eSchristos */ 10859689912eSchristos rdataset->slab.noqname = header->noqname; 10869689912eSchristos if (header->noqname != NULL) { 10879689912eSchristos rdataset->attributes |= DNS_RDATASETATTR_NOQNAME; 10889689912eSchristos } 10899689912eSchristos rdataset->slab.closest = header->closest; 10909689912eSchristos if (header->closest != NULL) { 10919689912eSchristos rdataset->attributes |= DNS_RDATASETATTR_CLOSEST; 10929689912eSchristos } 10939689912eSchristos } 10949689912eSchristos 10959689912eSchristos static isc_result_t 10969689912eSchristos setup_delegation(qpc_search_t *search, dns_dbnode_t **nodep, 10979689912eSchristos dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset, 10989689912eSchristos isc_rwlocktype_t tlocktype DNS__DB_FLARG) { 10999689912eSchristos dns_typepair_t type; 11009689912eSchristos qpcnode_t *node = NULL; 11019689912eSchristos 11029689912eSchristos REQUIRE(search != NULL); 11039689912eSchristos REQUIRE(search->zonecut != NULL); 11049689912eSchristos REQUIRE(search->zonecut_header != NULL); 11059689912eSchristos 11069689912eSchristos /* 11079689912eSchristos * The caller MUST NOT be holding any node locks. 11089689912eSchristos */ 11099689912eSchristos 11109689912eSchristos node = search->zonecut; 11119689912eSchristos type = search->zonecut_header->type; 11129689912eSchristos 11139689912eSchristos if (nodep != NULL) { 11149689912eSchristos /* 11159689912eSchristos * Note that we don't have to increment the node's reference 11169689912eSchristos * count here because we're going to use the reference we 11179689912eSchristos * already have in the search block. 11189689912eSchristos */ 11199689912eSchristos *nodep = node; 11209689912eSchristos search->need_cleanup = false; 11219689912eSchristos } 11229689912eSchristos if (rdataset != NULL) { 11239689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 11249689912eSchristos NODE_RDLOCK(&(search->qpdb->node_locks[node->locknum].lock), 11259689912eSchristos &nlocktype); 11269689912eSchristos bindrdataset(search->qpdb, node, search->zonecut_header, 11279689912eSchristos search->now, nlocktype, tlocktype, 11289689912eSchristos rdataset DNS__DB_FLARG_PASS); 11299689912eSchristos if (sigrdataset != NULL && search->zonecut_sigheader != NULL) { 11309689912eSchristos bindrdataset(search->qpdb, node, 11319689912eSchristos search->zonecut_sigheader, search->now, 11329689912eSchristos nlocktype, tlocktype, 11339689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 11349689912eSchristos } 11359689912eSchristos NODE_UNLOCK(&(search->qpdb->node_locks[node->locknum].lock), 11369689912eSchristos &nlocktype); 11379689912eSchristos } 11389689912eSchristos 11399689912eSchristos if (type == dns_rdatatype_dname) { 11409689912eSchristos return DNS_R_DNAME; 11419689912eSchristos } 11429689912eSchristos return DNS_R_DELEGATION; 11439689912eSchristos } 11449689912eSchristos 11459689912eSchristos static bool 11469689912eSchristos check_stale_header(qpcnode_t *node, dns_slabheader_t *header, 11479689912eSchristos isc_rwlocktype_t *nlocktypep, isc_rwlock_t *lock, 11489689912eSchristos qpc_search_t *search, dns_slabheader_t **header_prev) { 11499689912eSchristos if (!ACTIVE(header, search->now)) { 11509689912eSchristos dns_ttl_t stale = header->ttl + STALE_TTL(header, search->qpdb); 11519689912eSchristos /* 11529689912eSchristos * If this data is in the stale window keep it and if 11539689912eSchristos * DNS_DBFIND_STALEOK is not set we tell the caller to 11549689912eSchristos * skip this record. We skip the records with ZEROTTL 11559689912eSchristos * (these records should not be cached anyway). 11569689912eSchristos */ 11579689912eSchristos 11589689912eSchristos DNS_SLABHEADER_CLRATTR(header, DNS_SLABHEADERATTR_STALE_WINDOW); 11599689912eSchristos if (!ZEROTTL(header) && KEEPSTALE(search->qpdb) && 11609689912eSchristos stale > search->now) 11619689912eSchristos { 11629689912eSchristos mark(header, DNS_SLABHEADERATTR_STALE); 11639689912eSchristos *header_prev = header; 11649689912eSchristos /* 11659689912eSchristos * If DNS_DBFIND_STALESTART is set then it means we 11669689912eSchristos * failed to resolve the name during recursion, in 11679689912eSchristos * this case we mark the time in which the refresh 11689689912eSchristos * failed. 11699689912eSchristos */ 11709689912eSchristos if ((search->options & DNS_DBFIND_STALESTART) != 0) { 11719689912eSchristos atomic_store_release( 11729689912eSchristos &header->last_refresh_fail_ts, 11739689912eSchristos search->now); 11749689912eSchristos } else if ((search->options & 11759689912eSchristos DNS_DBFIND_STALEENABLED) != 0 && 11769689912eSchristos search->now < 11779689912eSchristos (atomic_load_acquire( 11789689912eSchristos &header->last_refresh_fail_ts) + 11799689912eSchristos search->qpdb->serve_stale_refresh)) 11809689912eSchristos { 11819689912eSchristos /* 11829689912eSchristos * If we are within interval between last 11839689912eSchristos * refresh failure time + 'stale-refresh-time', 11849689912eSchristos * then don't skip this stale entry but use it 11859689912eSchristos * instead. 11869689912eSchristos */ 11879689912eSchristos DNS_SLABHEADER_SETATTR( 11889689912eSchristos header, 11899689912eSchristos DNS_SLABHEADERATTR_STALE_WINDOW); 11909689912eSchristos return false; 11919689912eSchristos } else if ((search->options & 11929689912eSchristos DNS_DBFIND_STALETIMEOUT) != 0) 11939689912eSchristos { 11949689912eSchristos /* 11959689912eSchristos * We want stale RRset due to timeout, so we 11969689912eSchristos * don't skip it. 11979689912eSchristos */ 11989689912eSchristos return false; 11999689912eSchristos } 12009689912eSchristos return (search->options & DNS_DBFIND_STALEOK) == 0; 12019689912eSchristos } 12029689912eSchristos 12039689912eSchristos /* 12049689912eSchristos * This rdataset is stale. If no one else is using the 12059689912eSchristos * node, we can clean it up right now, otherwise we mark 12069689912eSchristos * it as ancient, and the node as dirty, so it will get 12079689912eSchristos * cleaned up later. 12089689912eSchristos */ 12099689912eSchristos if ((header->ttl < search->now - QPDB_VIRTUAL) && 12109689912eSchristos (*nlocktypep == isc_rwlocktype_write || 12119689912eSchristos NODE_TRYUPGRADE(lock, nlocktypep) == ISC_R_SUCCESS)) 12129689912eSchristos { 12139689912eSchristos /* 12149689912eSchristos * We update the node's status only when we can 12159689912eSchristos * get write access; otherwise, we leave others 12169689912eSchristos * to this work. Periodical cleaning will 12179689912eSchristos * eventually take the job as the last resort. 12189689912eSchristos * We won't downgrade the lock, since other 12199689912eSchristos * rdatasets are probably stale, too. 12209689912eSchristos */ 12219689912eSchristos 12229689912eSchristos if (isc_refcount_current(&node->references) == 0) { 12239689912eSchristos /* 12249689912eSchristos * header->down can be non-NULL if the 12259689912eSchristos * refcount has just decremented to 0 12269689912eSchristos * but decref() has not 12279689912eSchristos * performed clean_cache_node(), in 12289689912eSchristos * which case we need to purge the stale 12299689912eSchristos * headers first. 12309689912eSchristos */ 12319689912eSchristos clean_stale_headers(header); 12329689912eSchristos if (*header_prev != NULL) { 12339689912eSchristos (*header_prev)->next = header->next; 12349689912eSchristos } else { 12359689912eSchristos node->data = header->next; 12369689912eSchristos } 12379689912eSchristos dns_slabheader_destroy(&header); 12389689912eSchristos } else { 12399689912eSchristos mark(header, DNS_SLABHEADERATTR_ANCIENT); 12409689912eSchristos HEADERNODE(header)->dirty = 1; 12419689912eSchristos *header_prev = header; 12429689912eSchristos } 12439689912eSchristos } else { 12449689912eSchristos *header_prev = header; 12459689912eSchristos } 12469689912eSchristos return true; 12479689912eSchristos } 12489689912eSchristos return false; 12499689912eSchristos } 12509689912eSchristos 12519689912eSchristos static isc_result_t 12529689912eSchristos check_zonecut(qpcnode_t *node, void *arg DNS__DB_FLARG) { 12539689912eSchristos qpc_search_t *search = arg; 12549689912eSchristos dns_slabheader_t *header = NULL; 12559689912eSchristos dns_slabheader_t *header_prev = NULL, *header_next = NULL; 12569689912eSchristos dns_slabheader_t *dname_header = NULL, *sigdname_header = NULL; 12579689912eSchristos isc_result_t result; 12589689912eSchristos isc_rwlock_t *lock = NULL; 12599689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 12609689912eSchristos 12619689912eSchristos REQUIRE(search->zonecut == NULL); 12629689912eSchristos 12639689912eSchristos lock = &(search->qpdb->node_locks[node->locknum].lock); 12649689912eSchristos NODE_RDLOCK(lock, &nlocktype); 12659689912eSchristos 12669689912eSchristos /* 12679689912eSchristos * Look for a DNAME or RRSIG DNAME rdataset. 12689689912eSchristos */ 12699689912eSchristos for (header = node->data; header != NULL; header = header_next) { 12709689912eSchristos header_next = header->next; 12719689912eSchristos if (check_stale_header(node, header, &nlocktype, lock, search, 12729689912eSchristos &header_prev)) 12739689912eSchristos { 12749689912eSchristos /* Do nothing. */ 12759689912eSchristos } else if (header->type == dns_rdatatype_dname && 12769689912eSchristos EXISTS(header) && !ANCIENT(header)) 12779689912eSchristos { 12789689912eSchristos dname_header = header; 12799689912eSchristos header_prev = header; 12809689912eSchristos } else if (header->type == DNS_SIGTYPE(dns_rdatatype_dname) && 12819689912eSchristos EXISTS(header) && !ANCIENT(header)) 12829689912eSchristos { 12839689912eSchristos sigdname_header = header; 12849689912eSchristos header_prev = header; 12859689912eSchristos } else { 12869689912eSchristos header_prev = header; 12879689912eSchristos } 12889689912eSchristos } 12899689912eSchristos 12909689912eSchristos if (dname_header != NULL && 12919689912eSchristos (!DNS_TRUST_PENDING(dname_header->trust) || 12929689912eSchristos (search->options & DNS_DBFIND_PENDINGOK) != 0)) 12939689912eSchristos { 12949689912eSchristos /* 12959689912eSchristos * We increment the reference count on node to ensure that 12969689912eSchristos * search->zonecut_header will still be valid later. 12979689912eSchristos */ 12989689912eSchristos newref(search->qpdb, node, nlocktype, 12999689912eSchristos isc_rwlocktype_none DNS__DB_FLARG_PASS); 13009689912eSchristos search->zonecut = node; 13019689912eSchristos search->zonecut_header = dname_header; 13029689912eSchristos search->zonecut_sigheader = sigdname_header; 13039689912eSchristos search->need_cleanup = true; 13049689912eSchristos result = DNS_R_PARTIALMATCH; 13059689912eSchristos } else { 13069689912eSchristos result = DNS_R_CONTINUE; 13079689912eSchristos } 13089689912eSchristos 13099689912eSchristos NODE_UNLOCK(lock, &nlocktype); 13109689912eSchristos 13119689912eSchristos return result; 13129689912eSchristos } 13139689912eSchristos 13149689912eSchristos static isc_result_t 13159689912eSchristos find_deepest_zonecut(qpc_search_t *search, qpcnode_t *node, 13169689912eSchristos dns_dbnode_t **nodep, dns_name_t *foundname, 13179689912eSchristos dns_rdataset_t *rdataset, 13189689912eSchristos dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 13199689912eSchristos isc_result_t result = ISC_R_NOTFOUND; 13209689912eSchristos qpcache_t *qpdb = NULL; 13219689912eSchristos 13229689912eSchristos /* 13239689912eSchristos * Caller must be holding the tree lock. 13249689912eSchristos */ 13259689912eSchristos 13269689912eSchristos qpdb = search->qpdb; 13279689912eSchristos 13289689912eSchristos for (int i = dns_qpchain_length(&search->chain) - 1; i >= 0; i--) { 13299689912eSchristos dns_slabheader_t *header = NULL; 13309689912eSchristos dns_slabheader_t *header_prev = NULL, *header_next = NULL; 13319689912eSchristos dns_slabheader_t *found = NULL, *foundsig = NULL; 13329689912eSchristos isc_rwlock_t *lock = NULL; 13339689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 13349689912eSchristos 13359689912eSchristos dns_qpchain_node(&search->chain, i, NULL, (void **)&node, NULL); 13369689912eSchristos lock = &qpdb->node_locks[node->locknum].lock; 13379689912eSchristos 13389689912eSchristos NODE_RDLOCK(lock, &nlocktype); 13399689912eSchristos 13409689912eSchristos /* 13419689912eSchristos * Look for NS and RRSIG NS rdatasets. 13429689912eSchristos */ 13439689912eSchristos for (header = node->data; header != NULL; header = header_next) 13449689912eSchristos { 13459689912eSchristos header_next = header->next; 13469689912eSchristos if (check_stale_header(node, header, &nlocktype, lock, 13479689912eSchristos search, &header_prev)) 13489689912eSchristos { 13499689912eSchristos /* Do nothing. */ 13509689912eSchristos } else if (EXISTS(header) && !ANCIENT(header)) { 13519689912eSchristos /* 13529689912eSchristos * We've found an extant rdataset. See if 13539689912eSchristos * we're interested in it. 13549689912eSchristos */ 13559689912eSchristos if (header->type == dns_rdatatype_ns) { 13569689912eSchristos found = header; 13579689912eSchristos if (foundsig != NULL) { 13589689912eSchristos break; 13599689912eSchristos } 13609689912eSchristos } else if (header->type == 13619689912eSchristos DNS_SIGTYPE(dns_rdatatype_ns)) 13629689912eSchristos { 13639689912eSchristos foundsig = header; 13649689912eSchristos if (found != NULL) { 13659689912eSchristos break; 13669689912eSchristos } 13679689912eSchristos } 13689689912eSchristos header_prev = header; 13699689912eSchristos } else { 13709689912eSchristos header_prev = header; 13719689912eSchristos } 13729689912eSchristos } 13739689912eSchristos 13749689912eSchristos if (found != NULL) { 13759689912eSchristos /* 13769689912eSchristos * If we have to set foundname, we do it before 13779689912eSchristos * anything else. 13789689912eSchristos */ 13799689912eSchristos if (foundname != NULL) { 13809689912eSchristos dns_name_copy(&node->name, foundname); 13819689912eSchristos } 13829689912eSchristos result = DNS_R_DELEGATION; 13839689912eSchristos if (nodep != NULL) { 13849689912eSchristos newref(search->qpdb, node, nlocktype, 13859689912eSchristos isc_rwlocktype_none DNS__DB_FLARG_PASS); 13869689912eSchristos *nodep = node; 13879689912eSchristos } 13889689912eSchristos bindrdataset(search->qpdb, node, found, search->now, 13899689912eSchristos nlocktype, isc_rwlocktype_none, 13909689912eSchristos rdataset DNS__DB_FLARG_PASS); 13919689912eSchristos if (foundsig != NULL) { 13929689912eSchristos bindrdataset(search->qpdb, node, foundsig, 13939689912eSchristos search->now, nlocktype, 13949689912eSchristos isc_rwlocktype_none, 13959689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 13969689912eSchristos } 13979689912eSchristos if (need_headerupdate(found, search->now) || 13989689912eSchristos (foundsig != NULL && 13999689912eSchristos need_headerupdate(foundsig, search->now))) 14009689912eSchristos { 14019689912eSchristos if (nlocktype != isc_rwlocktype_write) { 14029689912eSchristos NODE_FORCEUPGRADE(lock, &nlocktype); 14039689912eSchristos POST(nlocktype); 14049689912eSchristos } 14059689912eSchristos if (need_headerupdate(found, search->now)) { 14069689912eSchristos update_header(search->qpdb, found, 14079689912eSchristos search->now); 14089689912eSchristos } 14099689912eSchristos if (foundsig != NULL && 14109689912eSchristos need_headerupdate(foundsig, search->now)) 14119689912eSchristos { 14129689912eSchristos update_header(search->qpdb, foundsig, 14139689912eSchristos search->now); 14149689912eSchristos } 14159689912eSchristos } 14169689912eSchristos } 14179689912eSchristos 14189689912eSchristos NODE_UNLOCK(lock, &nlocktype); 14199689912eSchristos 14209689912eSchristos if (found != NULL) { 14219689912eSchristos break; 14229689912eSchristos } 14239689912eSchristos } 14249689912eSchristos 14259689912eSchristos return result; 14269689912eSchristos } 14279689912eSchristos 14289689912eSchristos /* 14299689912eSchristos * Look for a potentially covering NSEC in the cache where `name` 14309689912eSchristos * is known not to exist. This uses the auxiliary NSEC tree to find 14319689912eSchristos * the potential NSEC owner. If found, we update 'foundname', 'nodep', 14329689912eSchristos * 'rdataset' and 'sigrdataset', and return DNS_R_COVERINGNSEC. 14339689912eSchristos * Otherwise, return ISC_R_NOTFOUND. 14349689912eSchristos */ 14359689912eSchristos static isc_result_t 14369689912eSchristos find_coveringnsec(qpc_search_t *search, const dns_name_t *name, 14379689912eSchristos dns_dbnode_t **nodep, isc_stdtime_t now, 14389689912eSchristos dns_name_t *foundname, dns_rdataset_t *rdataset, 14399689912eSchristos dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 14409689912eSchristos dns_fixedname_t fpredecessor, fixed; 14419689912eSchristos dns_name_t *predecessor = NULL, *fname = NULL; 14429689912eSchristos qpcnode_t *node = NULL; 14439689912eSchristos dns_qpiter_t iter; 14449689912eSchristos isc_result_t result; 14459689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 14469689912eSchristos isc_rwlock_t *lock = NULL; 14479689912eSchristos dns_typepair_t matchtype, sigmatchtype; 14489689912eSchristos dns_slabheader_t *found = NULL, *foundsig = NULL; 14499689912eSchristos dns_slabheader_t *header = NULL; 14509689912eSchristos dns_slabheader_t *header_next = NULL, *header_prev = NULL; 14519689912eSchristos 14529689912eSchristos /* 14539689912eSchristos * Look for the node in the auxilary tree. 14549689912eSchristos */ 14559689912eSchristos result = dns_qp_lookup(search->qpdb->nsec, name, NULL, &iter, NULL, 14569689912eSchristos (void **)&node, NULL); 14579689912eSchristos if (result != DNS_R_PARTIALMATCH) { 14589689912eSchristos return ISC_R_NOTFOUND; 14599689912eSchristos } 14609689912eSchristos 14619689912eSchristos fname = dns_fixedname_initname(&fixed); 14629689912eSchristos predecessor = dns_fixedname_initname(&fpredecessor); 14639689912eSchristos matchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_nsec, 0); 14649689912eSchristos sigmatchtype = DNS_SIGTYPE(dns_rdatatype_nsec); 14659689912eSchristos 14669689912eSchristos /* 14679689912eSchristos * Extract predecessor from iterator. 14689689912eSchristos */ 14699689912eSchristos result = dns_qpiter_current(&iter, predecessor, NULL, NULL); 14709689912eSchristos if (result != ISC_R_SUCCESS) { 14719689912eSchristos return ISC_R_NOTFOUND; 14729689912eSchristos } 14739689912eSchristos 14749689912eSchristos /* 14759689912eSchristos * Lookup the predecessor in the main tree. 14769689912eSchristos */ 14779689912eSchristos node = NULL; 14789689912eSchristos result = dns_qp_getname(search->qpdb->tree, predecessor, (void **)&node, 14799689912eSchristos NULL); 14809689912eSchristos if (result != ISC_R_SUCCESS) { 14819689912eSchristos return result; 14829689912eSchristos } 14839689912eSchristos dns_name_copy(&node->name, fname); 14849689912eSchristos 14859689912eSchristos lock = &(search->qpdb->node_locks[node->locknum].lock); 14869689912eSchristos NODE_RDLOCK(lock, &nlocktype); 14879689912eSchristos for (header = node->data; header != NULL; header = header_next) { 14889689912eSchristos header_next = header->next; 14899689912eSchristos if (check_stale_header(node, header, &nlocktype, lock, search, 14909689912eSchristos &header_prev)) 14919689912eSchristos { 14929689912eSchristos continue; 14939689912eSchristos } 14949689912eSchristos if (NONEXISTENT(header) || DNS_TYPEPAIR_TYPE(header->type) == 0) 14959689912eSchristos { 14969689912eSchristos header_prev = header; 14979689912eSchristos continue; 14989689912eSchristos } 14999689912eSchristos if (header->type == matchtype) { 15009689912eSchristos found = header; 15019689912eSchristos if (foundsig != NULL) { 15029689912eSchristos break; 15039689912eSchristos } 15049689912eSchristos } else if (header->type == sigmatchtype) { 15059689912eSchristos foundsig = header; 15069689912eSchristos if (found != NULL) { 15079689912eSchristos break; 15089689912eSchristos } 15099689912eSchristos } 15109689912eSchristos header_prev = header; 15119689912eSchristos } 15129689912eSchristos if (found != NULL) { 15139689912eSchristos bindrdataset(search->qpdb, node, found, now, nlocktype, 15149689912eSchristos isc_rwlocktype_none, rdataset DNS__DB_FLARG_PASS); 15159689912eSchristos if (foundsig != NULL) { 15169689912eSchristos bindrdataset(search->qpdb, node, foundsig, now, 15179689912eSchristos nlocktype, isc_rwlocktype_none, 15189689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 15199689912eSchristos } 15209689912eSchristos newref(search->qpdb, node, nlocktype, 15219689912eSchristos isc_rwlocktype_none DNS__DB_FLARG_PASS); 15229689912eSchristos 15239689912eSchristos dns_name_copy(fname, foundname); 15249689912eSchristos 15259689912eSchristos *nodep = node; 15269689912eSchristos result = DNS_R_COVERINGNSEC; 15279689912eSchristos } else { 15289689912eSchristos result = ISC_R_NOTFOUND; 15299689912eSchristos } 15309689912eSchristos NODE_UNLOCK(lock, &nlocktype); 15319689912eSchristos return result; 15329689912eSchristos } 15339689912eSchristos 15349689912eSchristos static isc_result_t 15359689912eSchristos find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, 15369689912eSchristos dns_rdatatype_t type, unsigned int options, isc_stdtime_t now, 15379689912eSchristos dns_dbnode_t **nodep, dns_name_t *foundname, dns_rdataset_t *rdataset, 15389689912eSchristos dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 15399689912eSchristos qpcnode_t *node = NULL; 15409689912eSchristos isc_result_t result; 15419689912eSchristos qpc_search_t search; 15429689912eSchristos bool cname_ok = true; 15439689912eSchristos bool found_noqname = false; 15449689912eSchristos bool all_negative = true; 15459689912eSchristos bool empty_node; 15469689912eSchristos isc_rwlock_t *lock = NULL; 15479689912eSchristos isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 15489689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 15499689912eSchristos dns_slabheader_t *header = NULL; 15509689912eSchristos dns_slabheader_t *header_prev = NULL, *header_next = NULL; 15519689912eSchristos dns_slabheader_t *found = NULL, *nsheader = NULL; 15529689912eSchristos dns_slabheader_t *foundsig = NULL, *nssig = NULL, *cnamesig = NULL; 15539689912eSchristos dns_slabheader_t *update = NULL, *updatesig = NULL; 15549689912eSchristos dns_slabheader_t *nsecheader = NULL, *nsecsig = NULL; 15559689912eSchristos dns_typepair_t sigtype, negtype; 15569689912eSchristos 15579689912eSchristos UNUSED(version); 15589689912eSchristos 15599689912eSchristos REQUIRE(VALID_QPDB((qpcache_t *)db)); 15609689912eSchristos REQUIRE(version == NULL); 15619689912eSchristos 15629689912eSchristos if (now == 0) { 15639689912eSchristos now = isc_stdtime_now(); 15649689912eSchristos } 15659689912eSchristos 15669689912eSchristos search = (qpc_search_t){ 15679689912eSchristos .qpdb = (qpcache_t *)db, 15689689912eSchristos .options = options, 15699689912eSchristos .now = now, 15709689912eSchristos }; 15719689912eSchristos 15729689912eSchristos TREE_RDLOCK(&search.qpdb->tree_lock, &tlocktype); 15739689912eSchristos 15749689912eSchristos /* 15759689912eSchristos * Search down from the root of the tree. 15769689912eSchristos */ 15779689912eSchristos result = dns_qp_lookup(search.qpdb->tree, name, NULL, NULL, 15789689912eSchristos &search.chain, (void **)&node, NULL); 15799689912eSchristos if (result != ISC_R_NOTFOUND && foundname != NULL) { 15809689912eSchristos dns_name_copy(&node->name, foundname); 15819689912eSchristos } 15829689912eSchristos 15839689912eSchristos /* 15849689912eSchristos * Check the QP chain to see if there's a node above us with a 15859689912eSchristos * active DNAME or NS rdatasets. 15869689912eSchristos * 15879689912eSchristos * We're only interested in nodes above QNAME, so if the result 15889689912eSchristos * was success, then we skip the last item in the chain. 15899689912eSchristos */ 15909689912eSchristos unsigned int len = dns_qpchain_length(&search.chain); 15919689912eSchristos if (result == ISC_R_SUCCESS) { 15929689912eSchristos len--; 15939689912eSchristos } 15949689912eSchristos 15959689912eSchristos for (unsigned int i = 0; i < len; i++) { 15969689912eSchristos isc_result_t zcresult; 15979689912eSchristos qpcnode_t *encloser = NULL; 15989689912eSchristos 15999689912eSchristos dns_qpchain_node(&search.chain, i, NULL, (void **)&encloser, 16009689912eSchristos NULL); 16019689912eSchristos 16029689912eSchristos zcresult = check_zonecut(encloser, 16039689912eSchristos (void *)&search DNS__DB_FLARG_PASS); 16049689912eSchristos if (zcresult != DNS_R_CONTINUE) { 16059689912eSchristos result = DNS_R_PARTIALMATCH; 16069689912eSchristos search.chain.len = i - 1; 16079689912eSchristos node = encloser; 16089689912eSchristos if (foundname != NULL) { 16099689912eSchristos dns_name_copy(&node->name, foundname); 16109689912eSchristos } 16119689912eSchristos break; 16129689912eSchristos } 16139689912eSchristos } 16149689912eSchristos 16159689912eSchristos if (result == DNS_R_PARTIALMATCH) { 16169689912eSchristos /* 16179689912eSchristos * If we discovered a covering DNAME skip looking for a covering 16189689912eSchristos * NSEC. 16199689912eSchristos */ 16209689912eSchristos if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && 16219689912eSchristos (search.zonecut_header == NULL || 16229689912eSchristos search.zonecut_header->type != dns_rdatatype_dname)) 16239689912eSchristos { 16249689912eSchristos result = find_coveringnsec( 16259689912eSchristos &search, name, nodep, now, foundname, rdataset, 16269689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 16279689912eSchristos if (result == DNS_R_COVERINGNSEC) { 16289689912eSchristos goto tree_exit; 16299689912eSchristos } 16309689912eSchristos } 16319689912eSchristos if (search.zonecut != NULL) { 16329689912eSchristos result = setup_delegation(&search, nodep, rdataset, 16339689912eSchristos sigrdataset, 16349689912eSchristos tlocktype DNS__DB_FLARG_PASS); 16359689912eSchristos goto tree_exit; 16369689912eSchristos } else { 16379689912eSchristos find_ns: 16389689912eSchristos result = find_deepest_zonecut( 16399689912eSchristos &search, node, nodep, foundname, rdataset, 16409689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 16419689912eSchristos goto tree_exit; 16429689912eSchristos } 16439689912eSchristos } else if (result != ISC_R_SUCCESS) { 16449689912eSchristos goto tree_exit; 16459689912eSchristos } 16469689912eSchristos 16479689912eSchristos /* 16489689912eSchristos * Certain DNSSEC types are not subject to CNAME matching 16499689912eSchristos * (RFC4035, section 2.5 and RFC3007). 16509689912eSchristos * 16519689912eSchristos * We don't check for RRSIG, because we don't store RRSIG records 16529689912eSchristos * directly. 16539689912eSchristos */ 16549689912eSchristos if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) { 16559689912eSchristos cname_ok = false; 16569689912eSchristos } 16579689912eSchristos 16589689912eSchristos /* 16599689912eSchristos * We now go looking for rdata... 16609689912eSchristos */ 16619689912eSchristos 16629689912eSchristos lock = &(search.qpdb->node_locks[node->locknum].lock); 16639689912eSchristos NODE_RDLOCK(lock, &nlocktype); 16649689912eSchristos 16659689912eSchristos /* 16669689912eSchristos * These pointers need to be reset here in case we did 16679689912eSchristos * 'goto find_ns' from somewhere below. 16689689912eSchristos */ 16699689912eSchristos found = NULL; 16709689912eSchristos foundsig = NULL; 16719689912eSchristos sigtype = DNS_SIGTYPE(type); 16729689912eSchristos negtype = DNS_TYPEPAIR_VALUE(0, type); 16739689912eSchristos nsheader = NULL; 16749689912eSchristos nsecheader = NULL; 16759689912eSchristos nssig = NULL; 16769689912eSchristos nsecsig = NULL; 16779689912eSchristos cnamesig = NULL; 16789689912eSchristos empty_node = true; 16799689912eSchristos header_prev = NULL; 16809689912eSchristos for (header = node->data; header != NULL; header = header_next) { 16819689912eSchristos header_next = header->next; 16829689912eSchristos if (check_stale_header(node, header, &nlocktype, lock, &search, 16839689912eSchristos &header_prev)) 16849689912eSchristos { 16859689912eSchristos /* Do nothing. */ 16869689912eSchristos } else if (EXISTS(header) && !ANCIENT(header)) { 16879689912eSchristos /* 16889689912eSchristos * We now know that there is at least one active 16899689912eSchristos * non-stale rdataset at this node. 16909689912eSchristos */ 16919689912eSchristos empty_node = false; 16929689912eSchristos if (header->noqname != NULL && 16939689912eSchristos header->trust == dns_trust_secure) 16949689912eSchristos { 16959689912eSchristos found_noqname = true; 16969689912eSchristos } 16979689912eSchristos if (!NEGATIVE(header)) { 16989689912eSchristos all_negative = false; 16999689912eSchristos } 17009689912eSchristos 17019689912eSchristos /* 17029689912eSchristos * If we found a type we were looking for, remember 17039689912eSchristos * it. 17049689912eSchristos */ 17059689912eSchristos if (header->type == type || 17069689912eSchristos (type == dns_rdatatype_any && 17079689912eSchristos DNS_TYPEPAIR_TYPE(header->type) != 0) || 17089689912eSchristos (cname_ok && header->type == dns_rdatatype_cname)) 17099689912eSchristos { 17109689912eSchristos /* 17119689912eSchristos * We've found the answer. 17129689912eSchristos */ 17139689912eSchristos found = header; 17149689912eSchristos if (header->type == dns_rdatatype_cname && 17159689912eSchristos cname_ok) 17169689912eSchristos { 17179689912eSchristos /* 17189689912eSchristos * If we've already got the 17199689912eSchristos * CNAME RRSIG, use it. 17209689912eSchristos */ 17219689912eSchristos if (cnamesig != NULL) { 17229689912eSchristos foundsig = cnamesig; 17239689912eSchristos } else { 17249689912eSchristos sigtype = DNS_SIGTYPE( 17259689912eSchristos dns_rdatatype_cname); 17269689912eSchristos } 17279689912eSchristos } 17289689912eSchristos } else if (header->type == sigtype) { 17299689912eSchristos /* 17309689912eSchristos * We've found the RRSIG rdataset for our 17319689912eSchristos * target type. Remember it. 17329689912eSchristos */ 17339689912eSchristos foundsig = header; 17349689912eSchristos } else if (header->type == RDATATYPE_NCACHEANY || 17359689912eSchristos header->type == negtype) 17369689912eSchristos { 17379689912eSchristos /* 17389689912eSchristos * We've found a negative cache entry. 17399689912eSchristos */ 17409689912eSchristos found = header; 17419689912eSchristos } else if (header->type == dns_rdatatype_ns) { 17429689912eSchristos /* 17439689912eSchristos * Remember a NS rdataset even if we're 17449689912eSchristos * not specifically looking for it, because 17459689912eSchristos * we might need it later. 17469689912eSchristos */ 17479689912eSchristos nsheader = header; 17489689912eSchristos } else if (header->type == 17499689912eSchristos DNS_SIGTYPE(dns_rdatatype_ns)) 17509689912eSchristos { 17519689912eSchristos /* 17529689912eSchristos * If we need the NS rdataset, we'll also 17539689912eSchristos * need its signature. 17549689912eSchristos */ 17559689912eSchristos nssig = header; 17569689912eSchristos } else if (header->type == dns_rdatatype_nsec) { 17579689912eSchristos nsecheader = header; 17589689912eSchristos } else if (header->type == 17599689912eSchristos DNS_SIGTYPE(dns_rdatatype_nsec)) 17609689912eSchristos { 17619689912eSchristos nsecsig = header; 17629689912eSchristos } else if (cname_ok && 17639689912eSchristos header->type == 17649689912eSchristos DNS_SIGTYPE(dns_rdatatype_cname)) 17659689912eSchristos { 17669689912eSchristos /* 17679689912eSchristos * If we get a CNAME match, we'll also need 17689689912eSchristos * its signature. 17699689912eSchristos */ 17709689912eSchristos cnamesig = header; 17719689912eSchristos } 17729689912eSchristos header_prev = header; 17739689912eSchristos } else { 17749689912eSchristos header_prev = header; 17759689912eSchristos } 17769689912eSchristos } 17779689912eSchristos 17789689912eSchristos if (empty_node) { 17799689912eSchristos /* 17809689912eSchristos * We have an exact match for the name, but there are no 17819689912eSchristos * extant rdatasets. That means that this node doesn't 17829689912eSchristos * meaningfully exist, and that we really have a partial match. 17839689912eSchristos */ 17849689912eSchristos NODE_UNLOCK(lock, &nlocktype); 17859689912eSchristos if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) { 17869689912eSchristos result = find_coveringnsec( 17879689912eSchristos &search, name, nodep, now, foundname, rdataset, 17889689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 17899689912eSchristos if (result == DNS_R_COVERINGNSEC) { 17909689912eSchristos goto tree_exit; 17919689912eSchristos } 17929689912eSchristos } 17939689912eSchristos goto find_ns; 17949689912eSchristos } 17959689912eSchristos 17969689912eSchristos /* 17979689912eSchristos * If we didn't find what we were looking for... 17989689912eSchristos */ 17999689912eSchristos if (found == NULL || 18009689912eSchristos (DNS_TRUST_ADDITIONAL(found->trust) && 18019689912eSchristos ((options & DNS_DBFIND_ADDITIONALOK) == 0)) || 18029689912eSchristos (found->trust == dns_trust_glue && 18039689912eSchristos ((options & DNS_DBFIND_GLUEOK) == 0)) || 18049689912eSchristos (DNS_TRUST_PENDING(found->trust) && 18059689912eSchristos ((options & DNS_DBFIND_PENDINGOK) == 0))) 18069689912eSchristos { 18079689912eSchristos /* 18089689912eSchristos * Return covering NODATA NSEC record. 18099689912eSchristos */ 18109689912eSchristos if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && 18119689912eSchristos nsecheader != NULL) 18129689912eSchristos { 18139689912eSchristos if (nodep != NULL) { 18149689912eSchristos newref(search.qpdb, node, nlocktype, 18159689912eSchristos tlocktype DNS__DB_FLARG_PASS); 18169689912eSchristos *nodep = node; 18179689912eSchristos } 18189689912eSchristos bindrdataset(search.qpdb, node, nsecheader, search.now, 18199689912eSchristos nlocktype, tlocktype, 18209689912eSchristos rdataset DNS__DB_FLARG_PASS); 18219689912eSchristos if (need_headerupdate(nsecheader, search.now)) { 18229689912eSchristos update = nsecheader; 18239689912eSchristos } 18249689912eSchristos if (nsecsig != NULL) { 18259689912eSchristos bindrdataset(search.qpdb, node, nsecsig, 18269689912eSchristos search.now, nlocktype, tlocktype, 18279689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 18289689912eSchristos if (need_headerupdate(nsecsig, search.now)) { 18299689912eSchristos updatesig = nsecsig; 18309689912eSchristos } 18319689912eSchristos } 18329689912eSchristos result = DNS_R_COVERINGNSEC; 18339689912eSchristos goto node_exit; 18349689912eSchristos } 18359689912eSchristos 18369689912eSchristos /* 18379689912eSchristos * This name was from a wild card. Look for a covering NSEC. 18389689912eSchristos */ 18399689912eSchristos if (found == NULL && (found_noqname || all_negative) && 18409689912eSchristos (search.options & DNS_DBFIND_COVERINGNSEC) != 0) 18419689912eSchristos { 18429689912eSchristos NODE_UNLOCK(lock, &nlocktype); 18439689912eSchristos result = find_coveringnsec( 18449689912eSchristos &search, name, nodep, now, foundname, rdataset, 18459689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 18469689912eSchristos if (result == DNS_R_COVERINGNSEC) { 18479689912eSchristos goto tree_exit; 18489689912eSchristos } 18499689912eSchristos goto find_ns; 18509689912eSchristos } 18519689912eSchristos 18529689912eSchristos /* 18539689912eSchristos * If there is an NS rdataset at this node, then this is the 18549689912eSchristos * deepest zone cut. 18559689912eSchristos */ 18569689912eSchristos if (nsheader != NULL) { 18579689912eSchristos if (nodep != NULL) { 18589689912eSchristos newref(search.qpdb, node, nlocktype, 18599689912eSchristos tlocktype DNS__DB_FLARG_PASS); 18609689912eSchristos *nodep = node; 18619689912eSchristos } 18629689912eSchristos bindrdataset(search.qpdb, node, nsheader, search.now, 18639689912eSchristos nlocktype, tlocktype, 18649689912eSchristos rdataset DNS__DB_FLARG_PASS); 18659689912eSchristos if (need_headerupdate(nsheader, search.now)) { 18669689912eSchristos update = nsheader; 18679689912eSchristos } 18689689912eSchristos if (nssig != NULL) { 18699689912eSchristos bindrdataset(search.qpdb, node, nssig, 18709689912eSchristos search.now, nlocktype, tlocktype, 18719689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 18729689912eSchristos if (need_headerupdate(nssig, search.now)) { 18739689912eSchristos updatesig = nssig; 18749689912eSchristos } 18759689912eSchristos } 18769689912eSchristos result = DNS_R_DELEGATION; 18779689912eSchristos goto node_exit; 18789689912eSchristos } 18799689912eSchristos 18809689912eSchristos /* 18819689912eSchristos * Go find the deepest zone cut. 18829689912eSchristos */ 18839689912eSchristos NODE_UNLOCK(lock, &nlocktype); 18849689912eSchristos goto find_ns; 18859689912eSchristos } 18869689912eSchristos 18879689912eSchristos /* 18889689912eSchristos * We found what we were looking for, or we found a CNAME. 18899689912eSchristos */ 18909689912eSchristos 18919689912eSchristos if (nodep != NULL) { 18929689912eSchristos newref(search.qpdb, node, nlocktype, 18939689912eSchristos tlocktype DNS__DB_FLARG_PASS); 18949689912eSchristos *nodep = node; 18959689912eSchristos } 18969689912eSchristos 18979689912eSchristos if (NEGATIVE(found)) { 18989689912eSchristos /* 18999689912eSchristos * We found a negative cache entry. 19009689912eSchristos */ 19019689912eSchristos if (NXDOMAIN(found)) { 19029689912eSchristos result = DNS_R_NCACHENXDOMAIN; 19039689912eSchristos } else { 19049689912eSchristos result = DNS_R_NCACHENXRRSET; 19059689912eSchristos } 19069689912eSchristos } else if (type != found->type && type != dns_rdatatype_any && 19079689912eSchristos found->type == dns_rdatatype_cname) 19089689912eSchristos { 19099689912eSchristos /* 19109689912eSchristos * We weren't doing an ANY query and we found a CNAME instead 19119689912eSchristos * of the type we were looking for, so we need to indicate 19129689912eSchristos * that result to the caller. 19139689912eSchristos */ 19149689912eSchristos result = DNS_R_CNAME; 19159689912eSchristos } else { 19169689912eSchristos /* 19179689912eSchristos * An ordinary successful query! 19189689912eSchristos */ 19199689912eSchristos result = ISC_R_SUCCESS; 19209689912eSchristos } 19219689912eSchristos 19229689912eSchristos if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN || 19239689912eSchristos result == DNS_R_NCACHENXRRSET) 19249689912eSchristos { 19259689912eSchristos bindrdataset(search.qpdb, node, found, search.now, nlocktype, 19269689912eSchristos tlocktype, rdataset DNS__DB_FLARG_PASS); 19279689912eSchristos if (need_headerupdate(found, search.now)) { 19289689912eSchristos update = found; 19299689912eSchristos } 19309689912eSchristos if (!NEGATIVE(found) && foundsig != NULL) { 19319689912eSchristos bindrdataset(search.qpdb, node, foundsig, search.now, 19329689912eSchristos nlocktype, tlocktype, 19339689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 19349689912eSchristos if (need_headerupdate(foundsig, search.now)) { 19359689912eSchristos updatesig = foundsig; 19369689912eSchristos } 19379689912eSchristos } 19389689912eSchristos } 19399689912eSchristos 19409689912eSchristos node_exit: 19419689912eSchristos if ((update != NULL || updatesig != NULL) && 19429689912eSchristos nlocktype != isc_rwlocktype_write) 19439689912eSchristos { 19449689912eSchristos NODE_FORCEUPGRADE(lock, &nlocktype); 19459689912eSchristos POST(nlocktype); 19469689912eSchristos } 19479689912eSchristos if (update != NULL && need_headerupdate(update, search.now)) { 19489689912eSchristos update_header(search.qpdb, update, search.now); 19499689912eSchristos } 19509689912eSchristos if (updatesig != NULL && need_headerupdate(updatesig, search.now)) { 19519689912eSchristos update_header(search.qpdb, updatesig, search.now); 19529689912eSchristos } 19539689912eSchristos 19549689912eSchristos NODE_UNLOCK(lock, &nlocktype); 19559689912eSchristos 19569689912eSchristos tree_exit: 19579689912eSchristos TREE_UNLOCK(&search.qpdb->tree_lock, &tlocktype); 19589689912eSchristos 19599689912eSchristos /* 19609689912eSchristos * If we found a zonecut but aren't going to use it, we have to 19619689912eSchristos * let go of it. 19629689912eSchristos */ 19639689912eSchristos if (search.need_cleanup) { 19649689912eSchristos node = search.zonecut; 19659689912eSchristos INSIST(node != NULL); 19669689912eSchristos lock = &(search.qpdb->node_locks[node->locknum].lock); 19679689912eSchristos 19689689912eSchristos NODE_RDLOCK(lock, &nlocktype); 19699689912eSchristos decref(search.qpdb, node, &nlocktype, &tlocktype, 19709689912eSchristos true DNS__DB_FLARG_PASS); 19719689912eSchristos NODE_UNLOCK(lock, &nlocktype); 19729689912eSchristos INSIST(tlocktype == isc_rwlocktype_none); 19739689912eSchristos } 19749689912eSchristos 19759689912eSchristos update_cachestats(search.qpdb, result); 19769689912eSchristos return result; 19779689912eSchristos } 19789689912eSchristos 19799689912eSchristos static isc_result_t 19809689912eSchristos findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options, 19819689912eSchristos isc_stdtime_t now, dns_dbnode_t **nodep, dns_name_t *foundname, 19829689912eSchristos dns_name_t *dcname, dns_rdataset_t *rdataset, 19839689912eSchristos dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 19849689912eSchristos qpcnode_t *node = NULL; 19859689912eSchristos isc_rwlock_t *lock = NULL; 19869689912eSchristos isc_result_t result; 19879689912eSchristos qpc_search_t search; 19889689912eSchristos dns_slabheader_t *header = NULL; 19899689912eSchristos dns_slabheader_t *header_prev = NULL, *header_next = NULL; 19909689912eSchristos dns_slabheader_t *found = NULL, *foundsig = NULL; 19919689912eSchristos isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 19929689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 19939689912eSchristos bool dcnull = (dcname == NULL); 19949689912eSchristos 19959689912eSchristos REQUIRE(VALID_QPDB((qpcache_t *)db)); 19969689912eSchristos 19979689912eSchristos if (now == 0) { 19989689912eSchristos now = isc_stdtime_now(); 19999689912eSchristos } 20009689912eSchristos 20019689912eSchristos search = (qpc_search_t){ 20029689912eSchristos .qpdb = (qpcache_t *)db, 20039689912eSchristos .options = options, 20049689912eSchristos .now = now, 20059689912eSchristos }; 20069689912eSchristos 20079689912eSchristos if (dcnull) { 20089689912eSchristos dcname = foundname; 20099689912eSchristos } 20109689912eSchristos 20119689912eSchristos TREE_RDLOCK(&search.qpdb->tree_lock, &tlocktype); 20129689912eSchristos 20139689912eSchristos /* 20149689912eSchristos * Search down from the root of the tree. 20159689912eSchristos */ 20169689912eSchristos result = dns_qp_lookup(search.qpdb->tree, name, NULL, NULL, 20179689912eSchristos &search.chain, (void **)&node, NULL); 20189689912eSchristos if (result != ISC_R_NOTFOUND) { 20199689912eSchristos dns_name_copy(&node->name, dcname); 20209689912eSchristos } 20219689912eSchristos if ((options & DNS_DBFIND_NOEXACT) != 0 && result == ISC_R_SUCCESS) { 20229689912eSchristos int len = dns_qpchain_length(&search.chain); 20239689912eSchristos if (len >= 2) { 20249689912eSchristos node = NULL; 20259689912eSchristos dns_qpchain_node(&search.chain, len - 2, NULL, 20269689912eSchristos (void **)&node, NULL); 20279689912eSchristos search.chain.len = len - 1; 20289689912eSchristos result = DNS_R_PARTIALMATCH; 20299689912eSchristos } else { 20309689912eSchristos result = ISC_R_NOTFOUND; 20319689912eSchristos } 20329689912eSchristos } 20339689912eSchristos 20349689912eSchristos if (result == DNS_R_PARTIALMATCH) { 20359689912eSchristos result = find_deepest_zonecut(&search, node, nodep, foundname, 20369689912eSchristos rdataset, 20379689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 20389689912eSchristos goto tree_exit; 20399689912eSchristos } else if (result != ISC_R_SUCCESS) { 20409689912eSchristos goto tree_exit; 20419689912eSchristos } else if (!dcnull) { 20429689912eSchristos dns_name_copy(dcname, foundname); 20439689912eSchristos } 20449689912eSchristos 20459689912eSchristos /* 20469689912eSchristos * We now go looking for an NS rdataset at the node. 20479689912eSchristos */ 20489689912eSchristos 20499689912eSchristos lock = &(search.qpdb->node_locks[node->locknum].lock); 20509689912eSchristos NODE_RDLOCK(lock, &nlocktype); 20519689912eSchristos 20529689912eSchristos for (header = node->data; header != NULL; header = header_next) { 20539689912eSchristos header_next = header->next; 20549689912eSchristos if (check_stale_header(node, header, &nlocktype, lock, &search, 20559689912eSchristos &header_prev)) 20569689912eSchristos { 20579689912eSchristos /* 20589689912eSchristos * The function dns_qp_lookup found us a matching 20599689912eSchristos * node for 'name' and stored the result in 'dcname'. 20609689912eSchristos * This is the deepest known zonecut in our database. 20619689912eSchristos * However, this node may be stale and if serve-stale 20629689912eSchristos * is not enabled (in other words 'stale-answer-enable' 20639689912eSchristos * is set to no), this node may not be used as a 20649689912eSchristos * zonecut we know about. If so, find the deepest 20659689912eSchristos * zonecut from this node up and return that instead. 20669689912eSchristos */ 20679689912eSchristos NODE_UNLOCK(lock, &nlocktype); 20689689912eSchristos result = find_deepest_zonecut( 20699689912eSchristos &search, node, nodep, foundname, rdataset, 20709689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 20719689912eSchristos dns_name_copy(foundname, dcname); 20729689912eSchristos goto tree_exit; 20739689912eSchristos } else if (EXISTS(header) && !ANCIENT(header)) { 20749689912eSchristos /* 20759689912eSchristos * If we found a type we were looking for, remember 20769689912eSchristos * it. 20779689912eSchristos */ 20789689912eSchristos if (header->type == dns_rdatatype_ns) { 20799689912eSchristos /* 20809689912eSchristos * Remember a NS rdataset even if we're 20819689912eSchristos * not specifically looking for it, because 20829689912eSchristos * we might need it later. 20839689912eSchristos */ 20849689912eSchristos found = header; 20859689912eSchristos } else if (header->type == 20869689912eSchristos DNS_SIGTYPE(dns_rdatatype_ns)) 20879689912eSchristos { 20889689912eSchristos /* 20899689912eSchristos * If we need the NS rdataset, we'll also 20909689912eSchristos * need its signature. 20919689912eSchristos */ 20929689912eSchristos foundsig = header; 20939689912eSchristos } 20949689912eSchristos header_prev = header; 20959689912eSchristos } else { 20969689912eSchristos header_prev = header; 20979689912eSchristos } 20989689912eSchristos } 20999689912eSchristos 21009689912eSchristos if (found == NULL) { 21019689912eSchristos /* 21029689912eSchristos * No NS records here. 21039689912eSchristos */ 21049689912eSchristos NODE_UNLOCK(lock, &nlocktype); 21059689912eSchristos result = find_deepest_zonecut(&search, node, nodep, foundname, 21069689912eSchristos rdataset, 21079689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 21089689912eSchristos goto tree_exit; 21099689912eSchristos } 21109689912eSchristos 21119689912eSchristos if (nodep != NULL) { 21129689912eSchristos newref(search.qpdb, node, nlocktype, 21139689912eSchristos tlocktype DNS__DB_FLARG_PASS); 21149689912eSchristos *nodep = node; 21159689912eSchristos } 21169689912eSchristos 21179689912eSchristos bindrdataset(search.qpdb, node, found, search.now, nlocktype, tlocktype, 21189689912eSchristos rdataset DNS__DB_FLARG_PASS); 21199689912eSchristos if (foundsig != NULL) { 21209689912eSchristos bindrdataset(search.qpdb, node, foundsig, search.now, nlocktype, 21219689912eSchristos tlocktype, sigrdataset DNS__DB_FLARG_PASS); 21229689912eSchristos } 21239689912eSchristos 21249689912eSchristos if (need_headerupdate(found, search.now) || 21259689912eSchristos (foundsig != NULL && need_headerupdate(foundsig, search.now))) 21269689912eSchristos { 21279689912eSchristos if (nlocktype != isc_rwlocktype_write) { 21289689912eSchristos NODE_FORCEUPGRADE(lock, &nlocktype); 21299689912eSchristos POST(nlocktype); 21309689912eSchristos } 21319689912eSchristos if (need_headerupdate(found, search.now)) { 21329689912eSchristos update_header(search.qpdb, found, search.now); 21339689912eSchristos } 21349689912eSchristos if (foundsig != NULL && need_headerupdate(foundsig, search.now)) 21359689912eSchristos { 21369689912eSchristos update_header(search.qpdb, foundsig, search.now); 21379689912eSchristos } 21389689912eSchristos } 21399689912eSchristos 21409689912eSchristos NODE_UNLOCK(lock, &nlocktype); 21419689912eSchristos 21429689912eSchristos tree_exit: 21439689912eSchristos TREE_UNLOCK(&search.qpdb->tree_lock, &tlocktype); 21449689912eSchristos 21459689912eSchristos INSIST(!search.need_cleanup); 21469689912eSchristos 21479689912eSchristos if (result == DNS_R_DELEGATION) { 21489689912eSchristos result = ISC_R_SUCCESS; 21499689912eSchristos } 21509689912eSchristos 21519689912eSchristos return result; 21529689912eSchristos } 21539689912eSchristos 21549689912eSchristos static isc_result_t 21559689912eSchristos findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 21569689912eSchristos dns_rdatatype_t type, dns_rdatatype_t covers, isc_stdtime_t now, 21579689912eSchristos dns_rdataset_t *rdataset, 21589689912eSchristos dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 21599689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 21609689912eSchristos qpcnode_t *qpnode = (qpcnode_t *)node; 21619689912eSchristos dns_slabheader_t *header = NULL, *header_next = NULL; 21629689912eSchristos dns_slabheader_t *found = NULL, *foundsig = NULL; 21639689912eSchristos dns_typepair_t matchtype, sigmatchtype, negtype; 21649689912eSchristos isc_result_t result; 21659689912eSchristos isc_rwlock_t *lock = NULL; 21669689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 21679689912eSchristos 21689689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 21699689912eSchristos REQUIRE(type != dns_rdatatype_any); 21709689912eSchristos 21719689912eSchristos UNUSED(version); 21729689912eSchristos 21739689912eSchristos result = ISC_R_SUCCESS; 21749689912eSchristos 21759689912eSchristos if (now == 0) { 21769689912eSchristos now = isc_stdtime_now(); 21779689912eSchristos } 21789689912eSchristos 21799689912eSchristos lock = &qpdb->node_locks[qpnode->locknum].lock; 21809689912eSchristos NODE_RDLOCK(lock, &nlocktype); 21819689912eSchristos 21829689912eSchristos matchtype = DNS_TYPEPAIR_VALUE(type, covers); 21839689912eSchristos negtype = DNS_TYPEPAIR_VALUE(0, type); 21849689912eSchristos if (covers == 0) { 21859689912eSchristos sigmatchtype = DNS_SIGTYPE(type); 21869689912eSchristos } else { 21879689912eSchristos sigmatchtype = 0; 21889689912eSchristos } 21899689912eSchristos 21909689912eSchristos for (header = qpnode->data; header != NULL; header = header_next) { 21919689912eSchristos header_next = header->next; 21929689912eSchristos if (!ACTIVE(header, now)) { 21939689912eSchristos if ((header->ttl + STALE_TTL(header, qpdb) < 21949689912eSchristos now - QPDB_VIRTUAL) && 21959689912eSchristos (nlocktype == isc_rwlocktype_write || 21969689912eSchristos NODE_TRYUPGRADE(lock, &nlocktype) == 21979689912eSchristos ISC_R_SUCCESS)) 21989689912eSchristos { 21999689912eSchristos /* 22009689912eSchristos * We update the node's status only when we 22019689912eSchristos * can get write access. 22029689912eSchristos * 22039689912eSchristos * We don't check if refcurrent(qpnode) == 0 22049689912eSchristos * and try to free like we do in find(), 22059689912eSchristos * because refcurrent(qpnode) must be 22069689912eSchristos * non-zero. This is so because 'node' is an 22079689912eSchristos * argument to the function. 22089689912eSchristos */ 22099689912eSchristos mark(header, DNS_SLABHEADERATTR_ANCIENT); 22109689912eSchristos HEADERNODE(header)->dirty = 1; 22119689912eSchristos } 22129689912eSchristos } else if (EXISTS(header) && !ANCIENT(header)) { 22139689912eSchristos if (header->type == matchtype) { 22149689912eSchristos found = header; 22159689912eSchristos } else if (header->type == RDATATYPE_NCACHEANY || 22169689912eSchristos header->type == negtype) 22179689912eSchristos { 22189689912eSchristos found = header; 22199689912eSchristos } else if (header->type == sigmatchtype) { 22209689912eSchristos foundsig = header; 22219689912eSchristos } 22229689912eSchristos } 22239689912eSchristos } 22249689912eSchristos if (found != NULL) { 22259689912eSchristos bindrdataset(qpdb, qpnode, found, now, nlocktype, 22269689912eSchristos isc_rwlocktype_none, rdataset DNS__DB_FLARG_PASS); 22279689912eSchristos if (!NEGATIVE(found) && foundsig != NULL) { 22289689912eSchristos bindrdataset(qpdb, qpnode, foundsig, now, nlocktype, 22299689912eSchristos isc_rwlocktype_none, 22309689912eSchristos sigrdataset DNS__DB_FLARG_PASS); 22319689912eSchristos } 22329689912eSchristos } 22339689912eSchristos 22349689912eSchristos NODE_UNLOCK(lock, &nlocktype); 22359689912eSchristos 22369689912eSchristos if (found == NULL) { 22379689912eSchristos return ISC_R_NOTFOUND; 22389689912eSchristos } 22399689912eSchristos 22409689912eSchristos if (NEGATIVE(found)) { 22419689912eSchristos /* 22429689912eSchristos * We found a negative cache entry. 22439689912eSchristos */ 22449689912eSchristos if (NXDOMAIN(found)) { 22459689912eSchristos result = DNS_R_NCACHENXDOMAIN; 22469689912eSchristos } else { 22479689912eSchristos result = DNS_R_NCACHENXRRSET; 22489689912eSchristos } 22499689912eSchristos } 22509689912eSchristos 22519689912eSchristos update_cachestats(qpdb, result); 22529689912eSchristos 22539689912eSchristos return result; 22549689912eSchristos } 22559689912eSchristos 22569689912eSchristos static isc_result_t 22579689912eSchristos setcachestats(dns_db_t *db, isc_stats_t *stats) { 22589689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 22599689912eSchristos 22609689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 22619689912eSchristos REQUIRE(stats != NULL); 22629689912eSchristos 22639689912eSchristos isc_stats_attach(stats, &qpdb->cachestats); 22649689912eSchristos return ISC_R_SUCCESS; 22659689912eSchristos } 22669689912eSchristos 22679689912eSchristos static dns_stats_t * 22689689912eSchristos getrrsetstats(dns_db_t *db) { 22699689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 22709689912eSchristos 22719689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 22729689912eSchristos 22739689912eSchristos return qpdb->rrsetstats; 22749689912eSchristos } 22759689912eSchristos 22769689912eSchristos static isc_result_t 22779689912eSchristos setservestalettl(dns_db_t *db, dns_ttl_t ttl) { 22789689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 22799689912eSchristos 22809689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 22819689912eSchristos 22829689912eSchristos /* currently no bounds checking. 0 means disable. */ 22839689912eSchristos qpdb->common.serve_stale_ttl = ttl; 22849689912eSchristos return ISC_R_SUCCESS; 22859689912eSchristos } 22869689912eSchristos 22879689912eSchristos static isc_result_t 22889689912eSchristos getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { 22899689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 22909689912eSchristos 22919689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 22929689912eSchristos 22939689912eSchristos *ttl = qpdb->common.serve_stale_ttl; 22949689912eSchristos return ISC_R_SUCCESS; 22959689912eSchristos } 22969689912eSchristos 22979689912eSchristos static isc_result_t 22989689912eSchristos setservestalerefresh(dns_db_t *db, uint32_t interval) { 22999689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 23009689912eSchristos 23019689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 23029689912eSchristos 23039689912eSchristos /* currently no bounds checking. 0 means disable. */ 23049689912eSchristos qpdb->serve_stale_refresh = interval; 23059689912eSchristos return ISC_R_SUCCESS; 23069689912eSchristos } 23079689912eSchristos 23089689912eSchristos static isc_result_t 23099689912eSchristos getservestalerefresh(dns_db_t *db, uint32_t *interval) { 23109689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 23119689912eSchristos 23129689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 23139689912eSchristos 23149689912eSchristos *interval = qpdb->serve_stale_refresh; 23159689912eSchristos return ISC_R_SUCCESS; 23169689912eSchristos } 23179689912eSchristos 23189689912eSchristos static void 23199689912eSchristos expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) { 23209689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 23219689912eSchristos qpcnode_t *qpnode = (qpcnode_t *)node; 23229689912eSchristos dns_slabheader_t *header = data; 23239689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 23249689912eSchristos isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 23259689912eSchristos 23269689912eSchristos NODE_WRLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 23279689912eSchristos expireheader(header, &nlocktype, &tlocktype, 23289689912eSchristos dns_expire_flush DNS__DB_FILELINE); 23299689912eSchristos NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 23309689912eSchristos INSIST(tlocktype == isc_rwlocktype_none); 23319689912eSchristos } 23329689912eSchristos 23339689912eSchristos static size_t 23349689912eSchristos rdataset_size(dns_slabheader_t *header) { 23359689912eSchristos if (!NONEXISTENT(header)) { 23369689912eSchristos return dns_rdataslab_size((unsigned char *)header, 23379689912eSchristos sizeof(*header)); 23389689912eSchristos } 23399689912eSchristos 23409689912eSchristos return sizeof(*header); 23419689912eSchristos } 23429689912eSchristos 23439689912eSchristos static size_t 23449689912eSchristos expire_lru_headers(qpcache_t *qpdb, unsigned int locknum, 23459689912eSchristos isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep, 23469689912eSchristos size_t purgesize DNS__DB_FLARG) { 23479689912eSchristos dns_slabheader_t *header = NULL; 23489689912eSchristos size_t purged = 0; 23499689912eSchristos 23509689912eSchristos for (header = ISC_LIST_TAIL(qpdb->lru[locknum]); 23519689912eSchristos header != NULL && header->last_used <= qpdb->last_used && 23529689912eSchristos purged <= purgesize; 23539689912eSchristos header = ISC_LIST_TAIL(qpdb->lru[locknum])) 23549689912eSchristos { 23559689912eSchristos size_t header_size = rdataset_size(header); 23569689912eSchristos 23579689912eSchristos /* 23589689912eSchristos * Unlink the entry at this point to avoid checking it 23599689912eSchristos * again even if it's currently used someone else and 23609689912eSchristos * cannot be purged at this moment. This entry won't be 23619689912eSchristos * referenced any more (so unlinking is safe) since the 23629689912eSchristos * TTL will be reset to 0. 23639689912eSchristos */ 23649689912eSchristos ISC_LIST_UNLINK(qpdb->lru[locknum], header, link); 23659689912eSchristos expireheader(header, nlocktypep, tlocktypep, 23669689912eSchristos dns_expire_lru DNS__DB_FLARG_PASS); 23679689912eSchristos purged += header_size; 23689689912eSchristos } 23699689912eSchristos 23709689912eSchristos return purged; 23719689912eSchristos } 23729689912eSchristos 23739689912eSchristos /*% 23749689912eSchristos * Purge some expired and/or stale (i.e. unused for some period) cache entries 23759689912eSchristos * due to an overmem condition. To recover from this condition quickly, 23769689912eSchristos * we clean up entries up to the size of newly added rdata that triggered 23779689912eSchristos * the overmem; this is accessible via newheader. 23789689912eSchristos * 23799689912eSchristos * The LRU lists tails are processed in LRU order to the nearest second. 23809689912eSchristos * 23819689912eSchristos * A write lock on the tree must be held. 23829689912eSchristos */ 23839689912eSchristos static void 23849689912eSchristos overmem(qpcache_t *qpdb, dns_slabheader_t *newheader, 23859689912eSchristos isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) { 23869689912eSchristos uint32_t locknum_start = qpdb->lru_sweep++ % qpdb->node_lock_count; 23879689912eSchristos uint32_t locknum = locknum_start; 23889689912eSchristos size_t purgesize, purged = 0; 23899689912eSchristos isc_stdtime_t min_last_used = 0; 23909689912eSchristos size_t max_passes = 8; 23919689912eSchristos 23929689912eSchristos /* 23939689912eSchristos * Maximum estimated size of the data being added: The size 23949689912eSchristos * of the rdataset, plus a new QP database node and nodename, 23959689912eSchristos * and a possible additional NSEC node and nodename. Also add 23969689912eSchristos * a 12k margin for a possible QP-trie chunk allocation. 23979689912eSchristos * (It's okay to overestimate, we want to get cache memory 23989689912eSchristos * down quickly.) 23999689912eSchristos */ 24009689912eSchristos purgesize = 2 * (sizeof(qpcnode_t) + 24019689912eSchristos dns_name_size(&HEADERNODE(newheader)->name)) + 24029689912eSchristos rdataset_size(newheader) + 12288; 24039689912eSchristos again: 24049689912eSchristos do { 24059689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 24069689912eSchristos NODE_WRLOCK(&qpdb->node_locks[locknum].lock, &nlocktype); 24079689912eSchristos 24089689912eSchristos purged += expire_lru_headers( 24099689912eSchristos qpdb, locknum, &nlocktype, tlocktypep, 24109689912eSchristos purgesize - purged DNS__DB_FLARG_PASS); 24119689912eSchristos 24129689912eSchristos /* 24139689912eSchristos * Work out the oldest remaining last_used values of the list 24149689912eSchristos * tails as we walk across the array of lru lists. 24159689912eSchristos */ 24169689912eSchristos dns_slabheader_t *header = ISC_LIST_TAIL(qpdb->lru[locknum]); 24179689912eSchristos if (header != NULL && 24189689912eSchristos (min_last_used == 0 || header->last_used < min_last_used)) 24199689912eSchristos { 24209689912eSchristos min_last_used = header->last_used; 24219689912eSchristos } 24229689912eSchristos NODE_UNLOCK(&qpdb->node_locks[locknum].lock, &nlocktype); 24239689912eSchristos locknum = (locknum + 1) % qpdb->node_lock_count; 24249689912eSchristos } while (locknum != locknum_start && purged <= purgesize); 24259689912eSchristos 24269689912eSchristos /* 24279689912eSchristos * Update qpdb->last_used if we have walked all the list tails and have 24289689912eSchristos * not freed the required amount of memory. 24299689912eSchristos */ 24309689912eSchristos if (purged < purgesize) { 24319689912eSchristos if (min_last_used != 0) { 24329689912eSchristos qpdb->last_used = min_last_used; 24339689912eSchristos if (max_passes-- > 0) { 24349689912eSchristos goto again; 24359689912eSchristos } 24369689912eSchristos } 24379689912eSchristos } 24389689912eSchristos } 24399689912eSchristos 24409689912eSchristos /*% 24419689912eSchristos * These functions allow the heap code to rank the priority of each 24429689912eSchristos * element. It returns true if v1 happens "sooner" than v2. 24439689912eSchristos */ 24449689912eSchristos static bool 24459689912eSchristos ttl_sooner(void *v1, void *v2) { 24469689912eSchristos dns_slabheader_t *h1 = v1; 24479689912eSchristos dns_slabheader_t *h2 = v2; 24489689912eSchristos 24499689912eSchristos return h1->ttl < h2->ttl; 24509689912eSchristos } 24519689912eSchristos 24529689912eSchristos /*% 24539689912eSchristos * This function sets the heap index into the header. 24549689912eSchristos */ 24559689912eSchristos static void 24569689912eSchristos set_index(void *what, unsigned int idx) { 24579689912eSchristos dns_slabheader_t *h = what; 24589689912eSchristos 24599689912eSchristos h->heap_index = idx; 24609689912eSchristos } 24619689912eSchristos 24629689912eSchristos static void 24639689912eSchristos free_qpdb(qpcache_t *qpdb, bool log) { 24649689912eSchristos unsigned int i; 24659689912eSchristos char buf[DNS_NAME_FORMATSIZE]; 24669689912eSchristos dns_qp_t **treep = NULL; 24679689912eSchristos 24689689912eSchristos for (;;) { 24699689912eSchristos /* 24709689912eSchristos * pick the next tree to (start to) destroy 24719689912eSchristos */ 24729689912eSchristos treep = &qpdb->tree; 24739689912eSchristos if (*treep == NULL) { 24749689912eSchristos treep = &qpdb->nsec; 24759689912eSchristos if (*treep == NULL) { 24769689912eSchristos break; 24779689912eSchristos } 24789689912eSchristos } 24799689912eSchristos 24809689912eSchristos dns_qp_destroy(treep); 24819689912eSchristos INSIST(*treep == NULL); 24829689912eSchristos } 24839689912eSchristos 24849689912eSchristos if (log) { 24859689912eSchristos if (dns_name_dynamic(&qpdb->common.origin)) { 24869689912eSchristos dns_name_format(&qpdb->common.origin, buf, sizeof(buf)); 24879689912eSchristos } else { 24889689912eSchristos strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 24899689912eSchristos } 24909689912eSchristos isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 24919689912eSchristos DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 24929689912eSchristos "done free_qpdb(%s)", buf); 24939689912eSchristos } 24949689912eSchristos if (dns_name_dynamic(&qpdb->common.origin)) { 24959689912eSchristos dns_name_free(&qpdb->common.origin, qpdb->common.mctx); 24969689912eSchristos } 24979689912eSchristos for (i = 0; i < qpdb->node_lock_count; i++) { 24989689912eSchristos isc_refcount_destroy(&qpdb->node_locks[i].references); 24999689912eSchristos NODE_DESTROYLOCK(&qpdb->node_locks[i].lock); 25009689912eSchristos } 25019689912eSchristos 25029689912eSchristos /* 25039689912eSchristos * Clean up LRU / re-signing order lists. 25049689912eSchristos */ 25059689912eSchristos if (qpdb->lru != NULL) { 25069689912eSchristos for (i = 0; i < qpdb->node_lock_count; i++) { 25079689912eSchristos INSIST(ISC_LIST_EMPTY(qpdb->lru[i])); 25089689912eSchristos } 25099689912eSchristos isc_mem_cput(qpdb->common.mctx, qpdb->lru, 25109689912eSchristos qpdb->node_lock_count, 25119689912eSchristos sizeof(dns_slabheaderlist_t)); 25129689912eSchristos } 25139689912eSchristos /* 25149689912eSchristos * Clean up dead node buckets. 25159689912eSchristos */ 25169689912eSchristos for (i = 0; i < qpdb->node_lock_count; i++) { 25179689912eSchristos INSIST(isc_queue_empty(&qpdb->deadnodes[i])); 25189689912eSchristos isc_queue_destroy(&qpdb->deadnodes[i]); 25199689912eSchristos } 25209689912eSchristos isc_mem_cput(qpdb->common.mctx, qpdb->deadnodes, qpdb->node_lock_count, 25219689912eSchristos sizeof(qpdb->deadnodes[0])); 25229689912eSchristos 25239689912eSchristos /* 25249689912eSchristos * Clean up heap objects. 25259689912eSchristos */ 25269689912eSchristos if (qpdb->heaps != NULL) { 25279689912eSchristos for (i = 0; i < qpdb->node_lock_count; i++) { 25289689912eSchristos isc_heap_destroy(&qpdb->heaps[i]); 25299689912eSchristos } 25309689912eSchristos isc_mem_cput(qpdb->hmctx, qpdb->heaps, qpdb->node_lock_count, 25319689912eSchristos sizeof(isc_heap_t *)); 25329689912eSchristos } 25339689912eSchristos 25349689912eSchristos if (qpdb->rrsetstats != NULL) { 25359689912eSchristos dns_stats_detach(&qpdb->rrsetstats); 25369689912eSchristos } 25379689912eSchristos if (qpdb->cachestats != NULL) { 25389689912eSchristos isc_stats_detach(&qpdb->cachestats); 25399689912eSchristos } 25409689912eSchristos if (qpdb->gluecachestats != NULL) { 25419689912eSchristos isc_stats_detach(&qpdb->gluecachestats); 25429689912eSchristos } 25439689912eSchristos 25449689912eSchristos isc_mem_cput(qpdb->common.mctx, qpdb->node_locks, qpdb->node_lock_count, 25459689912eSchristos sizeof(db_nodelock_t)); 25469689912eSchristos TREE_DESTROYLOCK(&qpdb->tree_lock); 25479689912eSchristos isc_refcount_destroy(&qpdb->common.references); 25489689912eSchristos 25499689912eSchristos isc_rwlock_destroy(&qpdb->lock); 25509689912eSchristos qpdb->common.magic = 0; 25519689912eSchristos qpdb->common.impmagic = 0; 25529689912eSchristos isc_mem_detach(&qpdb->hmctx); 25539689912eSchristos 25549689912eSchristos isc_mem_putanddetach(&qpdb->common.mctx, qpdb, sizeof(*qpdb)); 25559689912eSchristos } 25569689912eSchristos 25579689912eSchristos static void 25589689912eSchristos qpdb_destroy(dns_db_t *arg) { 25599689912eSchristos qpcache_t *qpdb = (qpcache_t *)arg; 25609689912eSchristos bool want_free = false; 25619689912eSchristos unsigned int i; 25629689912eSchristos unsigned int inactive = 0; 25639689912eSchristos 25649689912eSchristos if (qpdb->origin_node != NULL) { 25659689912eSchristos qpcnode_detach(&qpdb->origin_node); 25669689912eSchristos } 25679689912eSchristos 25689689912eSchristos /* 25699689912eSchristos * Even though there are no external direct references, there still 25709689912eSchristos * may be nodes in use. 25719689912eSchristos */ 25729689912eSchristos for (i = 0; i < qpdb->node_lock_count; i++) { 25739689912eSchristos isc_rwlocktype_t nodelock = isc_rwlocktype_none; 25749689912eSchristos NODE_WRLOCK(&qpdb->node_locks[i].lock, &nodelock); 25759689912eSchristos qpdb->node_locks[i].exiting = true; 25769689912eSchristos if (isc_refcount_current(&qpdb->node_locks[i].references) == 0) 25779689912eSchristos { 25789689912eSchristos inactive++; 25799689912eSchristos } 25809689912eSchristos NODE_UNLOCK(&qpdb->node_locks[i].lock, &nodelock); 25819689912eSchristos } 25829689912eSchristos 25839689912eSchristos if (inactive != 0) { 25849689912eSchristos RWLOCK(&qpdb->lock, isc_rwlocktype_write); 25859689912eSchristos qpdb->active -= inactive; 25869689912eSchristos if (qpdb->active == 0) { 25879689912eSchristos want_free = true; 25889689912eSchristos } 25899689912eSchristos RWUNLOCK(&qpdb->lock, isc_rwlocktype_write); 25909689912eSchristos if (want_free) { 25919689912eSchristos char buf[DNS_NAME_FORMATSIZE]; 25929689912eSchristos if (dns_name_dynamic(&qpdb->common.origin)) { 25939689912eSchristos dns_name_format(&qpdb->common.origin, buf, 25949689912eSchristos sizeof(buf)); 25959689912eSchristos } else { 25969689912eSchristos strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 25979689912eSchristos } 25989689912eSchristos isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 25999689912eSchristos DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 26009689912eSchristos "calling free_qpdb(%s)", buf); 26019689912eSchristos free_qpdb(qpdb, true); 26029689912eSchristos } 26039689912eSchristos } 26049689912eSchristos } 26059689912eSchristos 26069689912eSchristos static void 26079689912eSchristos mark_ancient(dns_slabheader_t *header) { 26089689912eSchristos setttl(header, 0); 26099689912eSchristos mark(header, DNS_SLABHEADERATTR_ANCIENT); 26109689912eSchristos HEADERNODE(header)->dirty = 1; 26119689912eSchristos } 26129689912eSchristos 26139689912eSchristos /*% 26149689912eSchristos * Clean up dead nodes. These are nodes which have no references, and 26159689912eSchristos * have no data. They are dead but we could not or chose not to delete 26169689912eSchristos * them when we deleted all the data at that node because we did not want 26179689912eSchristos * to wait for the tree write lock. 26189689912eSchristos */ 26199689912eSchristos static void 26209689912eSchristos cleanup_deadnodes(void *arg) { 26219689912eSchristos qpcache_t *qpdb = arg; 26229689912eSchristos uint16_t locknum = isc_tid(); 26239689912eSchristos isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 26249689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 26259689912eSchristos qpcnode_t *qpnode = NULL, *qpnext = NULL; 26269689912eSchristos isc_queue_t deadnodes; 26279689912eSchristos 26289689912eSchristos INSIST(locknum < qpdb->node_lock_count); 26299689912eSchristos 26309689912eSchristos isc_queue_init(&deadnodes); 26319689912eSchristos 26329689912eSchristos TREE_WRLOCK(&qpdb->tree_lock, &tlocktype); 26339689912eSchristos NODE_WRLOCK(&qpdb->node_locks[locknum].lock, &nlocktype); 26349689912eSchristos 26359689912eSchristos RUNTIME_CHECK(isc_queue_splice(&deadnodes, &qpdb->deadnodes[locknum])); 26369689912eSchristos isc_queue_for_each_entry_safe(&deadnodes, qpnode, qpnext, deadlink) { 26379689912eSchristos decref(qpdb, qpnode, &nlocktype, &tlocktype, false); 26389689912eSchristos } 26399689912eSchristos 26409689912eSchristos NODE_UNLOCK(&qpdb->node_locks[locknum].lock, &nlocktype); 26419689912eSchristos TREE_UNLOCK(&qpdb->tree_lock, &tlocktype); 26429689912eSchristos } 26439689912eSchristos 26449689912eSchristos /* 26459689912eSchristos * This function is assumed to be called when a node is newly referenced 26469689912eSchristos * and can be in the deadnode list. In that case the node will be references 26479689912eSchristos * and cleanup_deadnodes() will remove it from the list when the cleaning 26489689912eSchristos * happens. 26499689912eSchristos * Note: while a new reference is gained in multiple places, there are only very 26509689912eSchristos * few cases where the node can be in the deadnode list (only empty nodes can 26519689912eSchristos * have been added to the list). 26529689912eSchristos */ 26539689912eSchristos static void 26549689912eSchristos reactivate_node(qpcache_t *qpdb, qpcnode_t *node, 26559689912eSchristos isc_rwlocktype_t tlocktype ISC_ATTR_UNUSED DNS__DB_FLARG) { 26569689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 26579689912eSchristos isc_rwlock_t *nodelock = &qpdb->node_locks[node->locknum].lock; 26589689912eSchristos 26599689912eSchristos NODE_RDLOCK(nodelock, &nlocktype); 26609689912eSchristos newref(qpdb, node, nlocktype, tlocktype DNS__DB_FLARG_PASS); 26619689912eSchristos NODE_UNLOCK(nodelock, &nlocktype); 26629689912eSchristos } 26639689912eSchristos 26649689912eSchristos static qpcnode_t * 26659689912eSchristos new_qpcnode(qpcache_t *qpdb, const dns_name_t *name) { 26669689912eSchristos qpcnode_t *newdata = isc_mem_get(qpdb->common.mctx, sizeof(*newdata)); 26679689912eSchristos *newdata = (qpcnode_t){ 26689689912eSchristos .name = DNS_NAME_INITEMPTY, 26699689912eSchristos .references = ISC_REFCOUNT_INITIALIZER(1), 26709689912eSchristos .locknum = isc_random_uniform(qpdb->node_lock_count), 26719689912eSchristos }; 26729689912eSchristos 26739689912eSchristos INSIST(newdata->locknum < qpdb->node_lock_count); 26749689912eSchristos 26759689912eSchristos isc_mem_attach(qpdb->common.mctx, &newdata->mctx); 26769689912eSchristos dns_name_dupwithoffsets(name, newdata->mctx, &newdata->name); 26779689912eSchristos 26789689912eSchristos #ifdef DNS_DB_NODETRACE 26799689912eSchristos fprintf(stderr, "new_qpcnode:%s:%s:%d:%p->references = 1\n", __func__, 26809689912eSchristos __FILE__, __LINE__ + 1, name); 26819689912eSchristos #endif 26829689912eSchristos return newdata; 26839689912eSchristos } 26849689912eSchristos 26859689912eSchristos static isc_result_t 26869689912eSchristos findnode(dns_db_t *db, const dns_name_t *name, bool create, 26879689912eSchristos dns_dbnode_t **nodep DNS__DB_FLARG) { 26889689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 26899689912eSchristos qpcnode_t *node = NULL; 26909689912eSchristos isc_result_t result; 26919689912eSchristos isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 26929689912eSchristos 26939689912eSchristos TREE_RDLOCK(&qpdb->tree_lock, &tlocktype); 26949689912eSchristos result = dns_qp_getname(qpdb->tree, name, (void **)&node, NULL); 26959689912eSchristos if (result != ISC_R_SUCCESS) { 26969689912eSchristos if (!create) { 26979689912eSchristos goto unlock; 26989689912eSchristos } 26999689912eSchristos /* 27009689912eSchristos * Try to upgrade the lock and if that fails unlock then relock. 27019689912eSchristos */ 27029689912eSchristos TREE_FORCEUPGRADE(&qpdb->tree_lock, &tlocktype); 27039689912eSchristos result = dns_qp_getname(qpdb->tree, name, (void **)&node, NULL); 27049689912eSchristos if (result != ISC_R_SUCCESS) { 27059689912eSchristos node = new_qpcnode(qpdb, name); 27069689912eSchristos result = dns_qp_insert(qpdb->tree, node, 0); 27079689912eSchristos INSIST(result == ISC_R_SUCCESS); 27089689912eSchristos qpcnode_unref(node); 27099689912eSchristos } 27109689912eSchristos } 27119689912eSchristos 27129689912eSchristos reactivate_node(qpdb, node, tlocktype DNS__DB_FLARG_PASS); 27139689912eSchristos 27149689912eSchristos *nodep = (dns_dbnode_t *)node; 27159689912eSchristos unlock: 27169689912eSchristos TREE_UNLOCK(&qpdb->tree_lock, &tlocktype); 27179689912eSchristos 27189689912eSchristos return result; 27199689912eSchristos } 27209689912eSchristos 27219689912eSchristos static void 27229689912eSchristos attachnode(dns_db_t *db, dns_dbnode_t *source, 27239689912eSchristos dns_dbnode_t **targetp DNS__DB_FLARG) { 27249689912eSchristos REQUIRE(VALID_QPDB((qpcache_t *)db)); 27259689912eSchristos REQUIRE(targetp != NULL && *targetp == NULL); 27269689912eSchristos 27279689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 27289689912eSchristos qpcnode_t *node = (qpcnode_t *)source; 27299689912eSchristos 27309689912eSchristos newref(qpdb, node, isc_rwlocktype_none, 27319689912eSchristos isc_rwlocktype_none DNS__DB_FLARG_PASS); 27329689912eSchristos 27339689912eSchristos *targetp = source; 27349689912eSchristos } 27359689912eSchristos 27369689912eSchristos static void 27379689912eSchristos detachnode(dns_db_t *db, dns_dbnode_t **targetp DNS__DB_FLARG) { 27389689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 27399689912eSchristos qpcnode_t *node = NULL; 27409689912eSchristos bool want_free = false; 27419689912eSchristos bool inactive = false; 27429689912eSchristos db_nodelock_t *nodelock = NULL; 27439689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 27449689912eSchristos isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 27459689912eSchristos 27469689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 27479689912eSchristos REQUIRE(targetp != NULL && *targetp != NULL); 27489689912eSchristos 27499689912eSchristos node = (qpcnode_t *)(*targetp); 27509689912eSchristos nodelock = &qpdb->node_locks[node->locknum]; 27519689912eSchristos 27529689912eSchristos NODE_RDLOCK(&nodelock->lock, &nlocktype); 27539689912eSchristos 27549689912eSchristos if (decref(qpdb, node, &nlocktype, &tlocktype, true DNS__DB_FLARG_PASS)) 27559689912eSchristos { 27569689912eSchristos if (isc_refcount_current(&nodelock->references) == 0 && 27579689912eSchristos nodelock->exiting) 27589689912eSchristos { 27599689912eSchristos inactive = true; 27609689912eSchristos } 27619689912eSchristos } 27629689912eSchristos 27639689912eSchristos NODE_UNLOCK(&nodelock->lock, &nlocktype); 27649689912eSchristos INSIST(tlocktype == isc_rwlocktype_none); 27659689912eSchristos 27669689912eSchristos *targetp = NULL; 27679689912eSchristos 27689689912eSchristos if (inactive) { 27699689912eSchristos RWLOCK(&qpdb->lock, isc_rwlocktype_write); 27709689912eSchristos qpdb->active--; 27719689912eSchristos if (qpdb->active == 0) { 27729689912eSchristos want_free = true; 27739689912eSchristos } 27749689912eSchristos RWUNLOCK(&qpdb->lock, isc_rwlocktype_write); 27759689912eSchristos if (want_free) { 27769689912eSchristos char buf[DNS_NAME_FORMATSIZE]; 27779689912eSchristos if (dns_name_dynamic(&qpdb->common.origin)) { 27789689912eSchristos dns_name_format(&qpdb->common.origin, buf, 27799689912eSchristos sizeof(buf)); 27809689912eSchristos } else { 27819689912eSchristos strlcpy(buf, "<UNKNOWN>", sizeof(buf)); 27829689912eSchristos } 27839689912eSchristos isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 27849689912eSchristos DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), 27859689912eSchristos "calling free_qpdb(%s)", buf); 27869689912eSchristos free_qpdb(qpdb, true); 27879689912eSchristos } 27889689912eSchristos } 27899689912eSchristos } 27909689912eSchristos 27919689912eSchristos static isc_result_t 27929689912eSchristos createiterator(dns_db_t *db, unsigned int options ISC_ATTR_UNUSED, 27939689912eSchristos dns_dbiterator_t **iteratorp) { 27949689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 27959689912eSchristos qpc_dbit_t *qpdbiter = NULL; 27969689912eSchristos 27979689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 27989689912eSchristos 27999689912eSchristos qpdbiter = isc_mem_get(qpdb->common.mctx, sizeof(*qpdbiter)); 28009689912eSchristos *qpdbiter = (qpc_dbit_t){ 28019689912eSchristos .common.methods = &dbiterator_methods, 28029689912eSchristos .common.magic = DNS_DBITERATOR_MAGIC, 28039689912eSchristos .paused = true, 28049689912eSchristos }; 28059689912eSchristos 28069689912eSchristos qpdbiter->name = dns_fixedname_initname(&qpdbiter->fixed); 28079689912eSchristos dns_db_attach(db, &qpdbiter->common.db); 28089689912eSchristos dns_qpiter_init(qpdb->tree, &qpdbiter->iter); 28099689912eSchristos 28109689912eSchristos *iteratorp = (dns_dbiterator_t *)qpdbiter; 28119689912eSchristos return ISC_R_SUCCESS; 28129689912eSchristos } 28139689912eSchristos 28149689912eSchristos static isc_result_t 28159689912eSchristos allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 28169689912eSchristos unsigned int options, isc_stdtime_t now, 28179689912eSchristos dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) { 28189689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 28199689912eSchristos qpcnode_t *qpnode = (qpcnode_t *)node; 28209689912eSchristos qpc_rditer_t *iterator = NULL; 28219689912eSchristos 28229689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 28239689912eSchristos 28249689912eSchristos UNUSED(version); 28259689912eSchristos 28269689912eSchristos iterator = isc_mem_get(qpdb->common.mctx, sizeof(*iterator)); 28279689912eSchristos 28289689912eSchristos if (now == 0) { 28299689912eSchristos now = isc_stdtime_now(); 28309689912eSchristos } 28319689912eSchristos 28329689912eSchristos iterator->common.magic = DNS_RDATASETITER_MAGIC; 28339689912eSchristos iterator->common.methods = &rdatasetiter_methods; 28349689912eSchristos iterator->common.db = db; 28359689912eSchristos iterator->common.node = node; 28369689912eSchristos iterator->common.version = NULL; 28379689912eSchristos iterator->common.options = options; 28389689912eSchristos iterator->common.now = now; 28399689912eSchristos iterator->current = NULL; 28409689912eSchristos 28419689912eSchristos newref(qpdb, qpnode, isc_rwlocktype_none, 28429689912eSchristos isc_rwlocktype_none DNS__DB_FLARG_PASS); 28439689912eSchristos 28449689912eSchristos *iteratorp = (dns_rdatasetiter_t *)iterator; 28459689912eSchristos 28469689912eSchristos return ISC_R_SUCCESS; 28479689912eSchristos } 28489689912eSchristos 28499689912eSchristos static bool 28509689912eSchristos overmaxtype(qpcache_t *qpdb, uint32_t ntypes) { 28519689912eSchristos if (qpdb->maxtypepername == 0) { 28529689912eSchristos return false; 28539689912eSchristos } 28549689912eSchristos 28559689912eSchristos return ntypes >= qpdb->maxtypepername; 28569689912eSchristos } 28579689912eSchristos 28589689912eSchristos static bool 28599689912eSchristos prio_header(dns_slabheader_t *header) { 28609689912eSchristos if (NEGATIVE(header) && prio_type(DNS_TYPEPAIR_COVERS(header->type))) { 28619689912eSchristos return true; 28629689912eSchristos } 28639689912eSchristos 28649689912eSchristos return prio_type(header->type); 28659689912eSchristos } 28669689912eSchristos 28679689912eSchristos static isc_result_t 28689689912eSchristos add(qpcache_t *qpdb, qpcnode_t *qpnode, 28699689912eSchristos const dns_name_t *nodename ISC_ATTR_UNUSED, dns_slabheader_t *newheader, 28709689912eSchristos unsigned int options, bool loading, dns_rdataset_t *addedrdataset, 28719689912eSchristos isc_stdtime_t now, isc_rwlocktype_t nlocktype, 28729689912eSchristos isc_rwlocktype_t tlocktype DNS__DB_FLARG) { 28739689912eSchristos dns_slabheader_t *topheader = NULL, *topheader_prev = NULL; 28749689912eSchristos dns_slabheader_t *header = NULL, *sigheader = NULL; 28759689912eSchristos dns_slabheader_t *prioheader = NULL, *expireheader = NULL; 28769689912eSchristos bool header_nx; 28779689912eSchristos bool newheader_nx; 28789689912eSchristos dns_typepair_t negtype = 0; 28799689912eSchristos dns_trust_t trust; 28809689912eSchristos int idx; 28819689912eSchristos uint32_t ntypes = 0; 28829689912eSchristos 28839689912eSchristos if ((options & DNS_DBADD_FORCE) != 0) { 28849689912eSchristos trust = dns_trust_ultimate; 28859689912eSchristos } else { 28869689912eSchristos trust = newheader->trust; 28879689912eSchristos } 28889689912eSchristos 28899689912eSchristos newheader_nx = NONEXISTENT(newheader) ? true : false; 28909689912eSchristos 28919689912eSchristos if (!newheader_nx) { 28929689912eSchristos dns_rdatatype_t rdtype = DNS_TYPEPAIR_TYPE(newheader->type); 28939689912eSchristos dns_rdatatype_t covers = DNS_TYPEPAIR_COVERS(newheader->type); 28949689912eSchristos dns_typepair_t sigtype = DNS_SIGTYPE(covers); 28959689912eSchristos if (NEGATIVE(newheader)) { 28969689912eSchristos /* 28979689912eSchristos * We're adding a negative cache entry. 28989689912eSchristos */ 28999689912eSchristos if (covers == dns_rdatatype_any) { 29009689912eSchristos /* 29019689912eSchristos * If we're adding an negative cache entry 29029689912eSchristos * which covers all types (NXDOMAIN, 29039689912eSchristos * NODATA(QTYPE=ANY)), 29049689912eSchristos * 29059689912eSchristos * We make all other data ancient so that the 29069689912eSchristos * only rdataset that can be found at this 29079689912eSchristos * node is the negative cache entry. 29089689912eSchristos */ 29099689912eSchristos for (topheader = qpnode->data; 29109689912eSchristos topheader != NULL; 29119689912eSchristos topheader = topheader->next) 29129689912eSchristos { 29139689912eSchristos mark_ancient(topheader); 29149689912eSchristos } 29159689912eSchristos goto find_header; 29169689912eSchristos } 29179689912eSchristos /* 29189689912eSchristos * Otherwise look for any RRSIGs of the given 29199689912eSchristos * type so they can be marked ancient later. 29209689912eSchristos */ 29219689912eSchristos for (topheader = qpnode->data; topheader != NULL; 29229689912eSchristos topheader = topheader->next) 29239689912eSchristos { 29249689912eSchristos if (topheader->type == sigtype) { 29259689912eSchristos sigheader = topheader; 29269689912eSchristos break; 29279689912eSchristos } 29289689912eSchristos } 29299689912eSchristos negtype = DNS_TYPEPAIR_VALUE(covers, 0); 29309689912eSchristos } else { 29319689912eSchristos /* 29329689912eSchristos * We're adding something that isn't a 29339689912eSchristos * negative cache entry. Look for an extant 29349689912eSchristos * non-ancient NXDOMAIN/NODATA(QTYPE=ANY) negative 29359689912eSchristos * cache entry. If we're adding an RRSIG, also 29369689912eSchristos * check for an extant non-ancient NODATA ncache 29379689912eSchristos * entry which covers the same type as the RRSIG. 29389689912eSchristos */ 29399689912eSchristos for (topheader = qpnode->data; topheader != NULL; 29409689912eSchristos topheader = topheader->next) 29419689912eSchristos { 29429689912eSchristos if ((topheader->type == RDATATYPE_NCACHEANY) || 29439689912eSchristos (newheader->type == sigtype && 29449689912eSchristos topheader->type == 29459689912eSchristos DNS_TYPEPAIR_VALUE(0, covers))) 29469689912eSchristos { 29479689912eSchristos break; 29489689912eSchristos } 29499689912eSchristos } 29509689912eSchristos if (topheader != NULL && EXISTS(topheader) && 29519689912eSchristos ACTIVE(topheader, now)) 29529689912eSchristos { 29539689912eSchristos /* 29549689912eSchristos * Found one. 29559689912eSchristos */ 29569689912eSchristos if (trust < topheader->trust) { 29579689912eSchristos /* 29589689912eSchristos * The NXDOMAIN/NODATA(QTYPE=ANY) 29599689912eSchristos * is more trusted. 29609689912eSchristos */ 29619689912eSchristos dns_slabheader_destroy(&newheader); 29629689912eSchristos if (addedrdataset != NULL) { 29639689912eSchristos bindrdataset( 29649689912eSchristos qpdb, qpnode, topheader, 29659689912eSchristos now, nlocktype, 29669689912eSchristos tlocktype, 29679689912eSchristos addedrdataset 29689689912eSchristos DNS__DB_FLARG_PASS); 29699689912eSchristos } 29709689912eSchristos return DNS_R_UNCHANGED; 29719689912eSchristos } 29729689912eSchristos /* 29739689912eSchristos * The new rdataset is better. Expire the 29749689912eSchristos * ncache entry. 29759689912eSchristos */ 29769689912eSchristos mark_ancient(topheader); 29779689912eSchristos topheader = NULL; 29789689912eSchristos goto find_header; 29799689912eSchristos } 29809689912eSchristos negtype = DNS_TYPEPAIR_VALUE(0, rdtype); 29819689912eSchristos } 29829689912eSchristos } 29839689912eSchristos 29849689912eSchristos for (topheader = qpnode->data; topheader != NULL; 29859689912eSchristos topheader = topheader->next) 29869689912eSchristos { 29879689912eSchristos if (ACTIVE(topheader, now)) { 29889689912eSchristos ++ntypes; 29899689912eSchristos expireheader = topheader; 29909689912eSchristos } 29919689912eSchristos if (prio_header(topheader)) { 29929689912eSchristos prioheader = topheader; 29939689912eSchristos } 29949689912eSchristos 29959689912eSchristos if (topheader->type == newheader->type || 29969689912eSchristos topheader->type == negtype) 29979689912eSchristos { 29989689912eSchristos break; 29999689912eSchristos } 30009689912eSchristos topheader_prev = topheader; 30019689912eSchristos } 30029689912eSchristos 30039689912eSchristos find_header: 30049689912eSchristos /* 30059689912eSchristos * If header isn't NULL, we've found the right type. There may be 30069689912eSchristos * IGNORE rdatasets between the top of the chain and the first real 30079689912eSchristos * data. We skip over them. 30089689912eSchristos */ 30099689912eSchristos header = topheader; 30109689912eSchristos while (header != NULL && IGNORE(header)) { 30119689912eSchristos header = header->down; 30129689912eSchristos } 30139689912eSchristos if (header != NULL) { 30149689912eSchristos header_nx = NONEXISTENT(header) ? true : false; 30159689912eSchristos 30169689912eSchristos /* 30179689912eSchristos * Deleting an already non-existent rdataset has no effect. 30189689912eSchristos */ 30199689912eSchristos if (header_nx && newheader_nx) { 30209689912eSchristos dns_slabheader_destroy(&newheader); 30219689912eSchristos return DNS_R_UNCHANGED; 30229689912eSchristos } 30239689912eSchristos 30249689912eSchristos /* 30259689912eSchristos * Trying to add an rdataset with lower trust to a cache 30269689912eSchristos * DB has no effect, provided that the cache data isn't 30279689912eSchristos * stale. If the cache data is stale, new lower trust 30289689912eSchristos * data will supersede it below. Unclear what the best 30299689912eSchristos * policy is here. 30309689912eSchristos */ 30319689912eSchristos if (trust < header->trust && (ACTIVE(header, now) || header_nx)) 30329689912eSchristos { 30339689912eSchristos dns_slabheader_destroy(&newheader); 30349689912eSchristos if (addedrdataset != NULL) { 30359689912eSchristos bindrdataset(qpdb, qpnode, header, now, 30369689912eSchristos nlocktype, tlocktype, 30379689912eSchristos addedrdataset DNS__DB_FLARG_PASS); 30389689912eSchristos } 30399689912eSchristos return DNS_R_UNCHANGED; 30409689912eSchristos } 30419689912eSchristos 30429689912eSchristos /* 30439689912eSchristos * Don't replace existing NS, A and AAAA RRsets in the 30449689912eSchristos * cache if they are already exist. This prevents named 30459689912eSchristos * being locked to old servers. Don't lower trust of 30469689912eSchristos * existing record if the update is forced. Nothing 30479689912eSchristos * special to be done w.r.t stale data; it gets replaced 30489689912eSchristos * normally further down. 30499689912eSchristos */ 30509689912eSchristos if (ACTIVE(header, now) && header->type == dns_rdatatype_ns && 30519689912eSchristos !header_nx && !newheader_nx && 30529689912eSchristos header->trust >= newheader->trust && 30539689912eSchristos dns_rdataslab_equalx((unsigned char *)header, 30549689912eSchristos (unsigned char *)newheader, 30559689912eSchristos (unsigned int)(sizeof(*newheader)), 30569689912eSchristos qpdb->common.rdclass, 30579689912eSchristos (dns_rdatatype_t)header->type)) 30589689912eSchristos { 30599689912eSchristos /* 30609689912eSchristos * Honour the new ttl if it is less than the 30619689912eSchristos * older one. 30629689912eSchristos */ 30639689912eSchristos if (header->ttl > newheader->ttl) { 30649689912eSchristos setttl(header, newheader->ttl); 30659689912eSchristos } 30669689912eSchristos if (header->last_used != now) { 30679689912eSchristos ISC_LIST_UNLINK( 30689689912eSchristos qpdb->lru[HEADERNODE(header)->locknum], 30699689912eSchristos header, link); 30709689912eSchristos header->last_used = now; 30719689912eSchristos ISC_LIST_PREPEND( 30729689912eSchristos qpdb->lru[HEADERNODE(header)->locknum], 30739689912eSchristos header, link); 30749689912eSchristos } 30759689912eSchristos if (header->noqname == NULL && 30769689912eSchristos newheader->noqname != NULL) 30779689912eSchristos { 30789689912eSchristos header->noqname = newheader->noqname; 30799689912eSchristos newheader->noqname = NULL; 30809689912eSchristos } 30819689912eSchristos if (header->closest == NULL && 30829689912eSchristos newheader->closest != NULL) 30839689912eSchristos { 30849689912eSchristos header->closest = newheader->closest; 30859689912eSchristos newheader->closest = NULL; 30869689912eSchristos } 30879689912eSchristos dns_slabheader_destroy(&newheader); 30889689912eSchristos if (addedrdataset != NULL) { 30899689912eSchristos bindrdataset(qpdb, qpnode, header, now, 30909689912eSchristos nlocktype, tlocktype, 30919689912eSchristos addedrdataset DNS__DB_FLARG_PASS); 30929689912eSchristos } 30939689912eSchristos return ISC_R_SUCCESS; 30949689912eSchristos } 30959689912eSchristos 30969689912eSchristos /* 30979689912eSchristos * If we have will be replacing a NS RRset force its TTL 30989689912eSchristos * to be no more than the current NS RRset's TTL. This 30999689912eSchristos * ensures the delegations that are withdrawn are honoured. 31009689912eSchristos */ 31019689912eSchristos if (ACTIVE(header, now) && header->type == dns_rdatatype_ns && 31029689912eSchristos !header_nx && !newheader_nx && 31039689912eSchristos header->trust <= newheader->trust) 31049689912eSchristos { 31059689912eSchristos if (newheader->ttl > header->ttl) { 31069689912eSchristos newheader->ttl = header->ttl; 31079689912eSchristos } 31089689912eSchristos } 31099689912eSchristos if (ACTIVE(header, now) && 31109689912eSchristos (options & DNS_DBADD_PREFETCH) == 0 && 31119689912eSchristos (header->type == dns_rdatatype_a || 31129689912eSchristos header->type == dns_rdatatype_aaaa || 31139689912eSchristos header->type == dns_rdatatype_ds || 31149689912eSchristos header->type == DNS_SIGTYPE(dns_rdatatype_ds)) && 31159689912eSchristos !header_nx && !newheader_nx && 31169689912eSchristos header->trust >= newheader->trust && 31179689912eSchristos dns_rdataslab_equal((unsigned char *)header, 31189689912eSchristos (unsigned char *)newheader, 31199689912eSchristos (unsigned int)(sizeof(*newheader)))) 31209689912eSchristos { 31219689912eSchristos /* 31229689912eSchristos * Honour the new ttl if it is less than the 31239689912eSchristos * older one. 31249689912eSchristos */ 31259689912eSchristos if (header->ttl > newheader->ttl) { 31269689912eSchristos setttl(header, newheader->ttl); 31279689912eSchristos } 31289689912eSchristos if (header->last_used != now) { 31299689912eSchristos ISC_LIST_UNLINK( 31309689912eSchristos qpdb->lru[HEADERNODE(header)->locknum], 31319689912eSchristos header, link); 31329689912eSchristos header->last_used = now; 31339689912eSchristos ISC_LIST_PREPEND( 31349689912eSchristos qpdb->lru[HEADERNODE(header)->locknum], 31359689912eSchristos header, link); 31369689912eSchristos } 31379689912eSchristos if (header->noqname == NULL && 31389689912eSchristos newheader->noqname != NULL) 31399689912eSchristos { 31409689912eSchristos header->noqname = newheader->noqname; 31419689912eSchristos newheader->noqname = NULL; 31429689912eSchristos } 31439689912eSchristos if (header->closest == NULL && 31449689912eSchristos newheader->closest != NULL) 31459689912eSchristos { 31469689912eSchristos header->closest = newheader->closest; 31479689912eSchristos newheader->closest = NULL; 31489689912eSchristos } 31499689912eSchristos dns_slabheader_destroy(&newheader); 31509689912eSchristos if (addedrdataset != NULL) { 31519689912eSchristos bindrdataset(qpdb, qpnode, header, now, 31529689912eSchristos nlocktype, tlocktype, 31539689912eSchristos addedrdataset DNS__DB_FLARG_PASS); 31549689912eSchristos } 31559689912eSchristos return ISC_R_SUCCESS; 31569689912eSchristos } 31579689912eSchristos 31589689912eSchristos if (loading) { 31599689912eSchristos newheader->down = NULL; 31609689912eSchristos idx = HEADERNODE(newheader)->locknum; 31619689912eSchristos if (ZEROTTL(newheader)) { 31629689912eSchristos newheader->last_used = qpdb->last_used + 1; 31639689912eSchristos ISC_LIST_APPEND(qpdb->lru[idx], newheader, 31649689912eSchristos link); 31659689912eSchristos } else { 31669689912eSchristos ISC_LIST_PREPEND(qpdb->lru[idx], newheader, 31679689912eSchristos link); 31689689912eSchristos } 31699689912eSchristos INSIST(qpdb->heaps != NULL); 31709689912eSchristos isc_heap_insert(qpdb->heaps[idx], newheader); 31719689912eSchristos newheader->heap = qpdb->heaps[idx]; 31729689912eSchristos 31739689912eSchristos /* 31749689912eSchristos * There are no other references to 'header' when 31759689912eSchristos * loading, so we MAY clean up 'header' now. 31769689912eSchristos * Since we don't generate changed records when 31779689912eSchristos * loading, we MUST clean up 'header' now. 31789689912eSchristos */ 31799689912eSchristos if (topheader_prev != NULL) { 31809689912eSchristos topheader_prev->next = newheader; 31819689912eSchristos } else { 31829689912eSchristos qpnode->data = newheader; 31839689912eSchristos } 31849689912eSchristos newheader->next = topheader->next; 31859689912eSchristos dns_slabheader_destroy(&header); 31869689912eSchristos } else { 31879689912eSchristos idx = HEADERNODE(newheader)->locknum; 31889689912eSchristos INSIST(qpdb->heaps != NULL); 31899689912eSchristos isc_heap_insert(qpdb->heaps[idx], newheader); 31909689912eSchristos newheader->heap = qpdb->heaps[idx]; 31919689912eSchristos if (ZEROTTL(newheader)) { 31929689912eSchristos newheader->last_used = qpdb->last_used + 1; 31939689912eSchristos ISC_LIST_APPEND(qpdb->lru[idx], newheader, 31949689912eSchristos link); 31959689912eSchristos } else { 31969689912eSchristos ISC_LIST_PREPEND(qpdb->lru[idx], newheader, 31979689912eSchristos link); 31989689912eSchristos } 31999689912eSchristos if (topheader_prev != NULL) { 32009689912eSchristos topheader_prev->next = newheader; 32019689912eSchristos } else { 32029689912eSchristos qpnode->data = newheader; 32039689912eSchristos } 32049689912eSchristos newheader->next = topheader->next; 32059689912eSchristos newheader->down = topheader; 32069689912eSchristos topheader->next = newheader; 32079689912eSchristos qpnode->dirty = 1; 32089689912eSchristos mark_ancient(header); 32099689912eSchristos if (sigheader != NULL) { 32109689912eSchristos mark_ancient(sigheader); 32119689912eSchristos } 32129689912eSchristos } 32139689912eSchristos } else { 32149689912eSchristos /* 32159689912eSchristos * No non-IGNORED rdatasets of the given type exist at 32169689912eSchristos * this node. 32179689912eSchristos */ 32189689912eSchristos 32199689912eSchristos /* 32209689912eSchristos * If we're trying to delete the type, don't bother. 32219689912eSchristos */ 32229689912eSchristos if (newheader_nx) { 32239689912eSchristos dns_slabheader_destroy(&newheader); 32249689912eSchristos return DNS_R_UNCHANGED; 32259689912eSchristos } 32269689912eSchristos 32279689912eSchristos idx = HEADERNODE(newheader)->locknum; 32289689912eSchristos isc_heap_insert(qpdb->heaps[idx], newheader); 32299689912eSchristos newheader->heap = qpdb->heaps[idx]; 32309689912eSchristos if (ZEROTTL(newheader)) { 32319689912eSchristos ISC_LIST_APPEND(qpdb->lru[idx], newheader, link); 32329689912eSchristos } else { 32339689912eSchristos ISC_LIST_PREPEND(qpdb->lru[idx], newheader, link); 32349689912eSchristos } 32359689912eSchristos 32369689912eSchristos if (topheader != NULL) { 32379689912eSchristos /* 32389689912eSchristos * We have a list of rdatasets of the given type, 32399689912eSchristos * but they're all marked IGNORE. We simply insert 32409689912eSchristos * the new rdataset at the head of the list. 32419689912eSchristos * 32429689912eSchristos * Ignored rdatasets cannot occur during loading, so 32439689912eSchristos * we INSIST on it. 32449689912eSchristos */ 32459689912eSchristos INSIST(!loading); 32469689912eSchristos if (topheader_prev != NULL) { 32479689912eSchristos topheader_prev->next = newheader; 32489689912eSchristos } else { 32499689912eSchristos qpnode->data = newheader; 32509689912eSchristos } 32519689912eSchristos newheader->next = topheader->next; 32529689912eSchristos newheader->down = topheader; 32539689912eSchristos topheader->next = newheader; 32549689912eSchristos qpnode->dirty = 1; 32559689912eSchristos } else { 32569689912eSchristos /* 32579689912eSchristos * No rdatasets of the given type exist at the node. 32589689912eSchristos */ 32599689912eSchristos INSIST(newheader->down == NULL); 32609689912eSchristos 32619689912eSchristos if (prio_header(newheader)) { 32629689912eSchristos /* This is a priority type, prepend it */ 32639689912eSchristos newheader->next = qpnode->data; 32649689912eSchristos qpnode->data = newheader; 32659689912eSchristos } else if (prioheader != NULL) { 32669689912eSchristos /* Append after the priority headers */ 32679689912eSchristos newheader->next = prioheader->next; 32689689912eSchristos prioheader->next = newheader; 32699689912eSchristos } else { 32709689912eSchristos /* There were no priority headers */ 32719689912eSchristos newheader->next = qpnode->data; 32729689912eSchristos qpnode->data = newheader; 32739689912eSchristos } 32749689912eSchristos 32759689912eSchristos if (overmaxtype(qpdb, ntypes)) { 32769689912eSchristos if (expireheader == NULL) { 32779689912eSchristos expireheader = newheader; 32789689912eSchristos } 32799689912eSchristos if (NEGATIVE(newheader) && 32809689912eSchristos !prio_header(newheader)) 32819689912eSchristos { 32829689912eSchristos /* 32839689912eSchristos * Add the new non-priority negative 32849689912eSchristos * header to the database only 32859689912eSchristos * temporarily. 32869689912eSchristos */ 32879689912eSchristos expireheader = newheader; 32889689912eSchristos } 32899689912eSchristos 32909689912eSchristos mark_ancient(expireheader); 32919689912eSchristos /* 32929689912eSchristos * FIXME: In theory, we should mark the RRSIG 32939689912eSchristos * and the header at the same time, but there is 32949689912eSchristos * no direct link between those two header, so 32959689912eSchristos * we would have to check the whole list again. 32969689912eSchristos */ 32979689912eSchristos } 32989689912eSchristos } 32999689912eSchristos } 33009689912eSchristos 33019689912eSchristos if (addedrdataset != NULL) { 33029689912eSchristos bindrdataset(qpdb, qpnode, newheader, now, nlocktype, tlocktype, 33039689912eSchristos addedrdataset DNS__DB_FLARG_PASS); 33049689912eSchristos } 33059689912eSchristos 33069689912eSchristos return ISC_R_SUCCESS; 33079689912eSchristos } 33089689912eSchristos 33099689912eSchristos static isc_result_t 33109689912eSchristos addnoqname(isc_mem_t *mctx, dns_slabheader_t *newheader, uint32_t maxrrperset, 33119689912eSchristos dns_rdataset_t *rdataset) { 33129689912eSchristos isc_result_t result; 33139689912eSchristos dns_slabheader_proof_t *noqname = NULL; 33149689912eSchristos dns_name_t name = DNS_NAME_INITEMPTY; 33159689912eSchristos dns_rdataset_t neg = DNS_RDATASET_INIT, negsig = DNS_RDATASET_INIT; 33169689912eSchristos isc_region_t r1, r2; 33179689912eSchristos 33189689912eSchristos result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig); 33199689912eSchristos RUNTIME_CHECK(result == ISC_R_SUCCESS); 33209689912eSchristos 33219689912eSchristos result = dns_rdataslab_fromrdataset(&neg, mctx, &r1, 0, maxrrperset); 33229689912eSchristos if (result != ISC_R_SUCCESS) { 33239689912eSchristos goto cleanup; 33249689912eSchristos } 33259689912eSchristos 33269689912eSchristos result = dns_rdataslab_fromrdataset(&negsig, mctx, &r2, 0, maxrrperset); 33279689912eSchristos if (result != ISC_R_SUCCESS) { 33289689912eSchristos goto cleanup; 33299689912eSchristos } 33309689912eSchristos 33319689912eSchristos noqname = isc_mem_get(mctx, sizeof(*noqname)); 33329689912eSchristos *noqname = (dns_slabheader_proof_t){ 33339689912eSchristos .neg = r1.base, 33349689912eSchristos .negsig = r2.base, 33359689912eSchristos .type = neg.type, 33369689912eSchristos .name = DNS_NAME_INITEMPTY, 33379689912eSchristos }; 33389689912eSchristos dns_name_dup(&name, mctx, &noqname->name); 33399689912eSchristos newheader->noqname = noqname; 33409689912eSchristos 33419689912eSchristos cleanup: 33429689912eSchristos dns_rdataset_disassociate(&neg); 33439689912eSchristos dns_rdataset_disassociate(&negsig); 33449689912eSchristos 33459689912eSchristos return result; 33469689912eSchristos } 33479689912eSchristos 33489689912eSchristos static isc_result_t 33499689912eSchristos addclosest(isc_mem_t *mctx, dns_slabheader_t *newheader, uint32_t maxrrperset, 33509689912eSchristos dns_rdataset_t *rdataset) { 33519689912eSchristos isc_result_t result; 33529689912eSchristos dns_slabheader_proof_t *closest = NULL; 33539689912eSchristos dns_name_t name = DNS_NAME_INITEMPTY; 33549689912eSchristos dns_rdataset_t neg = DNS_RDATASET_INIT, negsig = DNS_RDATASET_INIT; 33559689912eSchristos isc_region_t r1, r2; 33569689912eSchristos 33579689912eSchristos result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig); 33589689912eSchristos RUNTIME_CHECK(result == ISC_R_SUCCESS); 33599689912eSchristos 33609689912eSchristos result = dns_rdataslab_fromrdataset(&neg, mctx, &r1, 0, maxrrperset); 33619689912eSchristos if (result != ISC_R_SUCCESS) { 33629689912eSchristos goto cleanup; 33639689912eSchristos } 33649689912eSchristos 33659689912eSchristos result = dns_rdataslab_fromrdataset(&negsig, mctx, &r2, 0, maxrrperset); 33669689912eSchristos if (result != ISC_R_SUCCESS) { 33679689912eSchristos goto cleanup; 33689689912eSchristos } 33699689912eSchristos 33709689912eSchristos closest = isc_mem_get(mctx, sizeof(*closest)); 33719689912eSchristos *closest = (dns_slabheader_proof_t){ 33729689912eSchristos .neg = r1.base, 33739689912eSchristos .negsig = r2.base, 33749689912eSchristos .name = DNS_NAME_INITEMPTY, 33759689912eSchristos .type = neg.type, 33769689912eSchristos }; 33779689912eSchristos dns_name_dup(&name, mctx, &closest->name); 33789689912eSchristos newheader->closest = closest; 33799689912eSchristos 33809689912eSchristos cleanup: 33819689912eSchristos dns_rdataset_disassociate(&neg); 33829689912eSchristos dns_rdataset_disassociate(&negsig); 33839689912eSchristos return result; 33849689912eSchristos } 33859689912eSchristos 33869689912eSchristos static void 33879689912eSchristos expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum, 33889689912eSchristos isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep, 33899689912eSchristos isc_stdtime_t now, bool cache_is_overmem DNS__DB_FLARG); 33909689912eSchristos 33919689912eSchristos static isc_result_t 33929689912eSchristos addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 33939689912eSchristos isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options, 33949689912eSchristos dns_rdataset_t *addedrdataset DNS__DB_FLARG) { 33959689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 33969689912eSchristos qpcnode_t *qpnode = (qpcnode_t *)node; 33979689912eSchristos isc_region_t region; 33989689912eSchristos dns_slabheader_t *newheader = NULL; 33999689912eSchristos isc_result_t result; 34009689912eSchristos bool delegating = false; 34019689912eSchristos bool newnsec; 34029689912eSchristos isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 34039689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 34049689912eSchristos bool cache_is_overmem = false; 34059689912eSchristos dns_fixedname_t fixed; 34069689912eSchristos dns_name_t *name = NULL; 34079689912eSchristos 34089689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 34099689912eSchristos REQUIRE(version == NULL); 34109689912eSchristos 34119689912eSchristos if (now == 0) { 34129689912eSchristos now = isc_stdtime_now(); 34139689912eSchristos } 34149689912eSchristos 34159689912eSchristos result = dns_rdataslab_fromrdataset(rdataset, qpdb->common.mctx, 34169689912eSchristos ®ion, sizeof(dns_slabheader_t), 34179689912eSchristos qpdb->maxrrperset); 34189689912eSchristos if (result != ISC_R_SUCCESS) { 34199689912eSchristos if (result == DNS_R_TOOMANYRECORDS) { 34209689912eSchristos dns__db_logtoomanyrecords((dns_db_t *)qpdb, 34219689912eSchristos &qpnode->name, rdataset->type, 34229689912eSchristos "adding", qpdb->maxrrperset); 34239689912eSchristos } 34249689912eSchristos return result; 34259689912eSchristos } 34269689912eSchristos 34279689912eSchristos name = dns_fixedname_initname(&fixed); 34289689912eSchristos dns_name_copy(&qpnode->name, name); 34299689912eSchristos dns_rdataset_getownercase(rdataset, name); 34309689912eSchristos 34319689912eSchristos newheader = (dns_slabheader_t *)region.base; 34329689912eSchristos *newheader = (dns_slabheader_t){ 34339689912eSchristos .type = DNS_TYPEPAIR_VALUE(rdataset->type, rdataset->covers), 34349689912eSchristos .trust = rdataset->trust, 34359689912eSchristos .last_used = now, 34369689912eSchristos .node = qpnode, 34379689912eSchristos }; 34389689912eSchristos 34399689912eSchristos dns_slabheader_reset(newheader, db, node); 34409689912eSchristos setttl(newheader, rdataset->ttl + now); 34419689912eSchristos if (rdataset->ttl == 0U) { 34429689912eSchristos DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_ZEROTTL); 34439689912eSchristos } 34449689912eSchristos atomic_init(&newheader->count, 34459689912eSchristos atomic_fetch_add_relaxed(&init_count, 1)); 34469689912eSchristos if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0) { 34479689912eSchristos DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_PREFETCH); 34489689912eSchristos } 34499689912eSchristos if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0) { 34509689912eSchristos DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_NEGATIVE); 34519689912eSchristos } 34529689912eSchristos if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0) { 34539689912eSchristos DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_NXDOMAIN); 34549689912eSchristos } 34559689912eSchristos if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0) { 34569689912eSchristos DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_OPTOUT); 34579689912eSchristos } 34589689912eSchristos if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) { 34599689912eSchristos result = addnoqname(qpdb->common.mctx, newheader, 34609689912eSchristos qpdb->maxrrperset, rdataset); 34619689912eSchristos if (result != ISC_R_SUCCESS) { 34629689912eSchristos dns_slabheader_destroy(&newheader); 34639689912eSchristos return result; 34649689912eSchristos } 34659689912eSchristos } 34669689912eSchristos if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) { 34679689912eSchristos result = addclosest(qpdb->common.mctx, newheader, 34689689912eSchristos qpdb->maxrrperset, rdataset); 34699689912eSchristos if (result != ISC_R_SUCCESS) { 34709689912eSchristos dns_slabheader_destroy(&newheader); 34719689912eSchristos return result; 34729689912eSchristos } 34739689912eSchristos } 34749689912eSchristos 34759689912eSchristos /* 34769689912eSchristos * If we're adding a delegation type (which would be an NS or DNAME 34779689912eSchristos * for a zone, but only DNAME counts for a cache), we need to set 34789689912eSchristos * the callback bit on the node. 34799689912eSchristos */ 34809689912eSchristos if (rdataset->type == dns_rdatatype_dname) { 34819689912eSchristos delegating = true; 34829689912eSchristos } 34839689912eSchristos 34849689912eSchristos /* 34859689912eSchristos * Add to the auxiliary NSEC tree if we're adding an NSEC record. 34869689912eSchristos */ 34879689912eSchristos TREE_RDLOCK(&qpdb->tree_lock, &tlocktype); 34889689912eSchristos if (qpnode->nsec != DNS_DB_NSEC_HAS_NSEC && 34899689912eSchristos rdataset->type == dns_rdatatype_nsec) 34909689912eSchristos { 34919689912eSchristos newnsec = true; 34929689912eSchristos } else { 34939689912eSchristos newnsec = false; 34949689912eSchristos } 34959689912eSchristos TREE_UNLOCK(&qpdb->tree_lock, &tlocktype); 34969689912eSchristos 34979689912eSchristos /* 34989689912eSchristos * If we're adding a delegation type, adding to the auxiliary NSEC 34999689912eSchristos * tree, or the DB is a cache in an overmem state, hold an 35009689912eSchristos * exclusive lock on the tree. In the latter case the lock does 35019689912eSchristos * not necessarily have to be acquired but it will help purge 35029689912eSchristos * ancient entries more effectively. 35039689912eSchristos */ 35049689912eSchristos if (isc_mem_isovermem(qpdb->common.mctx)) { 35059689912eSchristos cache_is_overmem = true; 35069689912eSchristos } 35079689912eSchristos if (delegating || newnsec || cache_is_overmem) { 35089689912eSchristos TREE_WRLOCK(&qpdb->tree_lock, &tlocktype); 35099689912eSchristos } 35109689912eSchristos 35119689912eSchristos if (cache_is_overmem) { 35129689912eSchristos overmem(qpdb, newheader, &tlocktype DNS__DB_FLARG_PASS); 35139689912eSchristos } 35149689912eSchristos 35159689912eSchristos NODE_WRLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 35169689912eSchristos 35179689912eSchristos if (qpdb->rrsetstats != NULL) { 35189689912eSchristos DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_STATCOUNT); 35199689912eSchristos update_rrsetstats(qpdb->rrsetstats, newheader->type, 35209689912eSchristos atomic_load_acquire(&newheader->attributes), 35219689912eSchristos true); 35229689912eSchristos } 35239689912eSchristos 35249689912eSchristos expire_ttl_headers(qpdb, qpnode->locknum, &nlocktype, &tlocktype, now, 35259689912eSchristos cache_is_overmem DNS__DB_FLARG_PASS); 35269689912eSchristos 35279689912eSchristos /* 35289689912eSchristos * If we've been holding a write lock on the tree just for 35299689912eSchristos * cleaning, we can release it now. However, we still need the 35309689912eSchristos * node lock. 35319689912eSchristos */ 35329689912eSchristos if (tlocktype == isc_rwlocktype_write && !delegating && !newnsec) { 35339689912eSchristos TREE_UNLOCK(&qpdb->tree_lock, &tlocktype); 35349689912eSchristos } 35359689912eSchristos 35369689912eSchristos result = ISC_R_SUCCESS; 35379689912eSchristos if (newnsec) { 35389689912eSchristos qpcnode_t *nsecnode = NULL; 35399689912eSchristos 35409689912eSchristos result = dns_qp_getname(qpdb->nsec, name, (void **)&nsecnode, 35419689912eSchristos NULL); 35429689912eSchristos if (result == ISC_R_SUCCESS) { 35439689912eSchristos result = ISC_R_SUCCESS; 35449689912eSchristos } else { 35459689912eSchristos INSIST(nsecnode == NULL); 35469689912eSchristos nsecnode = new_qpcnode(qpdb, name); 35479689912eSchristos nsecnode->nsec = DNS_DB_NSEC_NSEC; 35489689912eSchristos result = dns_qp_insert(qpdb->nsec, nsecnode, 0); 35499689912eSchristos INSIST(result == ISC_R_SUCCESS); 35509689912eSchristos qpcnode_detach(&nsecnode); 35519689912eSchristos } 35529689912eSchristos qpnode->nsec = DNS_DB_NSEC_HAS_NSEC; 35539689912eSchristos } 35549689912eSchristos 35559689912eSchristos if (result == ISC_R_SUCCESS) { 35569689912eSchristos result = add(qpdb, qpnode, name, newheader, options, false, 35579689912eSchristos addedrdataset, now, nlocktype, 35589689912eSchristos tlocktype DNS__DB_FLARG_PASS); 35599689912eSchristos } 35609689912eSchristos if (result == ISC_R_SUCCESS && delegating) { 35619689912eSchristos qpnode->delegating = 1; 35629689912eSchristos } 35639689912eSchristos 35649689912eSchristos NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 35659689912eSchristos 35669689912eSchristos if (tlocktype != isc_rwlocktype_none) { 35679689912eSchristos TREE_UNLOCK(&qpdb->tree_lock, &tlocktype); 35689689912eSchristos } 35699689912eSchristos INSIST(tlocktype == isc_rwlocktype_none); 35709689912eSchristos 35719689912eSchristos return result; 35729689912eSchristos } 35739689912eSchristos 35749689912eSchristos static isc_result_t 35759689912eSchristos deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 35769689912eSchristos dns_rdatatype_t type, dns_rdatatype_t covers DNS__DB_FLARG) { 35779689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 35789689912eSchristos qpcnode_t *qpnode = (qpcnode_t *)node; 35799689912eSchristos isc_result_t result; 35809689912eSchristos dns_slabheader_t *newheader = NULL; 35819689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 35829689912eSchristos 35839689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 35849689912eSchristos REQUIRE(version == NULL); 35859689912eSchristos 35869689912eSchristos if (type == dns_rdatatype_any) { 35879689912eSchristos return ISC_R_NOTIMPLEMENTED; 35889689912eSchristos } 35899689912eSchristos if (type == dns_rdatatype_rrsig && covers == 0) { 35909689912eSchristos return ISC_R_NOTIMPLEMENTED; 35919689912eSchristos } 35929689912eSchristos 35939689912eSchristos newheader = dns_slabheader_new(db, node); 35949689912eSchristos newheader->type = DNS_TYPEPAIR_VALUE(type, covers); 35959689912eSchristos setttl(newheader, 0); 35969689912eSchristos atomic_init(&newheader->attributes, DNS_SLABHEADERATTR_NONEXISTENT); 35979689912eSchristos 35989689912eSchristos NODE_WRLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 35999689912eSchristos result = add(qpdb, qpnode, NULL, newheader, DNS_DBADD_FORCE, false, 36009689912eSchristos NULL, 0, nlocktype, 36019689912eSchristos isc_rwlocktype_none DNS__DB_FLARG_PASS); 36029689912eSchristos NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 36039689912eSchristos 36049689912eSchristos return result; 36059689912eSchristos } 36069689912eSchristos 36079689912eSchristos static unsigned int 36089689912eSchristos nodecount(dns_db_t *db, dns_dbtree_t tree) { 36099689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 36109689912eSchristos dns_qp_memusage_t mu; 36119689912eSchristos isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 36129689912eSchristos 36139689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 36149689912eSchristos 36159689912eSchristos TREE_RDLOCK(&qpdb->tree_lock, &tlocktype); 36169689912eSchristos switch (tree) { 36179689912eSchristos case dns_dbtree_main: 36189689912eSchristos mu = dns_qp_memusage(qpdb->tree); 36199689912eSchristos break; 36209689912eSchristos case dns_dbtree_nsec: 36219689912eSchristos mu = dns_qp_memusage(qpdb->nsec); 36229689912eSchristos break; 36239689912eSchristos default: 36249689912eSchristos UNREACHABLE(); 36259689912eSchristos } 36269689912eSchristos TREE_UNLOCK(&qpdb->tree_lock, &tlocktype); 36279689912eSchristos 36289689912eSchristos return mu.leaves; 36299689912eSchristos } 36309689912eSchristos 36319689912eSchristos static isc_result_t 36329689912eSchristos getoriginnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) { 36339689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 36349689912eSchristos qpcnode_t *onode = NULL; 36359689912eSchristos isc_result_t result = ISC_R_SUCCESS; 36369689912eSchristos 36379689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 36389689912eSchristos REQUIRE(nodep != NULL && *nodep == NULL); 36399689912eSchristos 36409689912eSchristos /* Note that the access to origin_node doesn't require a DB lock */ 36419689912eSchristos onode = (qpcnode_t *)qpdb->origin_node; 36429689912eSchristos if (onode != NULL) { 36439689912eSchristos newref(qpdb, onode, isc_rwlocktype_none, 36449689912eSchristos isc_rwlocktype_none DNS__DB_FLARG_PASS); 36459689912eSchristos *nodep = qpdb->origin_node; 36469689912eSchristos } else { 36479689912eSchristos result = ISC_R_NOTFOUND; 36489689912eSchristos } 36499689912eSchristos 36509689912eSchristos return result; 36519689912eSchristos } 36529689912eSchristos 36539689912eSchristos static void 36549689912eSchristos locknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) { 36559689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 36569689912eSchristos qpcnode_t *qpnode = (qpcnode_t *)node; 36579689912eSchristos 36589689912eSchristos RWLOCK(&qpdb->node_locks[qpnode->locknum].lock, type); 36599689912eSchristos } 36609689912eSchristos 36619689912eSchristos static void 36629689912eSchristos unlocknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) { 36639689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 36649689912eSchristos qpcnode_t *qpnode = (qpcnode_t *)node; 36659689912eSchristos 36669689912eSchristos RWUNLOCK(&qpdb->node_locks[qpnode->locknum].lock, type); 36679689912eSchristos } 36689689912eSchristos 36699689912eSchristos isc_result_t 36709689912eSchristos dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin, 36719689912eSchristos dns_dbtype_t type, dns_rdataclass_t rdclass, 36729689912eSchristos unsigned int argc, char *argv[], 36739689912eSchristos void *driverarg ISC_ATTR_UNUSED, dns_db_t **dbp) { 36749689912eSchristos qpcache_t *qpdb = NULL; 36759689912eSchristos isc_mem_t *hmctx = mctx; 36769689912eSchristos isc_loop_t *loop = isc_loop(); 36779689912eSchristos int i; 36789689912eSchristos 36799689912eSchristos /* This database implementation only supports cache semantics */ 36809689912eSchristos REQUIRE(type == dns_dbtype_cache); 36819689912eSchristos REQUIRE(loop != NULL); 36829689912eSchristos 36839689912eSchristos qpdb = isc_mem_get(mctx, sizeof(*qpdb)); 36849689912eSchristos *qpdb = (qpcache_t){ 36859689912eSchristos .common.methods = &qpdb_cachemethods, 36869689912eSchristos .common.origin = DNS_NAME_INITEMPTY, 36879689912eSchristos .common.rdclass = rdclass, 36889689912eSchristos .common.attributes = DNS_DBATTR_CACHE, 36899689912eSchristos .loopmgr = isc_loop_getloopmgr(loop), 36909689912eSchristos }; 36919689912eSchristos 36929689912eSchristos isc_refcount_init(&qpdb->common.references, 1); 36939689912eSchristos 36949689912eSchristos /* 36959689912eSchristos * If argv[0] exists, it points to a memory context to use for heap 36969689912eSchristos */ 36979689912eSchristos if (argc != 0) { 36989689912eSchristos hmctx = (isc_mem_t *)argv[0]; 36999689912eSchristos } 37009689912eSchristos 37019689912eSchristos isc_rwlock_init(&qpdb->lock); 37029689912eSchristos TREE_INITLOCK(&qpdb->tree_lock); 37039689912eSchristos 37049689912eSchristos qpdb->node_lock_count = isc_loopmgr_nloops(qpdb->loopmgr); 37059689912eSchristos qpdb->node_locks = isc_mem_cget(mctx, qpdb->node_lock_count, 37069689912eSchristos sizeof(db_nodelock_t)); 37079689912eSchristos 37089689912eSchristos dns_rdatasetstats_create(mctx, &qpdb->rrsetstats); 37099689912eSchristos qpdb->lru = isc_mem_cget(mctx, qpdb->node_lock_count, 37109689912eSchristos sizeof(dns_slabheaderlist_t)); 37119689912eSchristos for (i = 0; i < (int)qpdb->node_lock_count; i++) { 37129689912eSchristos ISC_LIST_INIT(qpdb->lru[i]); 37139689912eSchristos } 37149689912eSchristos 37159689912eSchristos /* 37169689912eSchristos * Create the heaps. 37179689912eSchristos */ 37189689912eSchristos qpdb->heaps = isc_mem_cget(hmctx, qpdb->node_lock_count, 37199689912eSchristos sizeof(isc_heap_t *)); 37209689912eSchristos for (i = 0; i < (int)qpdb->node_lock_count; i++) { 37219689912eSchristos isc_heap_create(hmctx, ttl_sooner, set_index, 0, 37229689912eSchristos &qpdb->heaps[i]); 37239689912eSchristos } 37249689912eSchristos 37259689912eSchristos /* 37269689912eSchristos * Create deadnode lists. 37279689912eSchristos */ 37289689912eSchristos qpdb->deadnodes = isc_mem_cget(mctx, qpdb->node_lock_count, 37299689912eSchristos sizeof(qpdb->deadnodes[0])); 37309689912eSchristos for (i = 0; i < (int)(qpdb->node_lock_count); i++) { 37319689912eSchristos isc_queue_init(&qpdb->deadnodes[i]); 37329689912eSchristos } 37339689912eSchristos 37349689912eSchristos qpdb->active = qpdb->node_lock_count; 37359689912eSchristos 37369689912eSchristos for (i = 0; i < (int)(qpdb->node_lock_count); i++) { 37379689912eSchristos NODE_INITLOCK(&qpdb->node_locks[i].lock); 37389689912eSchristos isc_refcount_init(&qpdb->node_locks[i].references, 0); 37399689912eSchristos qpdb->node_locks[i].exiting = false; 37409689912eSchristos } 37419689912eSchristos 37429689912eSchristos /* 37439689912eSchristos * Attach to the mctx. The database will persist so long as there 37449689912eSchristos * are references to it, and attaching to the mctx ensures that our 37459689912eSchristos * mctx won't disappear out from under us. 37469689912eSchristos */ 37479689912eSchristos isc_mem_attach(mctx, &qpdb->common.mctx); 37489689912eSchristos isc_mem_attach(hmctx, &qpdb->hmctx); 37499689912eSchristos 37509689912eSchristos /* 37519689912eSchristos * Make a copy of the origin name. 37529689912eSchristos */ 37539689912eSchristos dns_name_dupwithoffsets(origin, mctx, &qpdb->common.origin); 37549689912eSchristos 37559689912eSchristos /* 37569689912eSchristos * Make the qp tries. 37579689912eSchristos */ 37589689912eSchristos dns_qp_create(mctx, &qpmethods, qpdb, &qpdb->tree); 37599689912eSchristos dns_qp_create(mctx, &qpmethods, qpdb, &qpdb->nsec); 37609689912eSchristos 37619689912eSchristos qpdb->common.magic = DNS_DB_MAGIC; 37629689912eSchristos qpdb->common.impmagic = QPDB_MAGIC; 37639689912eSchristos 37649689912eSchristos *dbp = (dns_db_t *)qpdb; 37659689912eSchristos 37669689912eSchristos return ISC_R_SUCCESS; 37679689912eSchristos } 37689689912eSchristos 37699689912eSchristos /* 37709689912eSchristos * Rdataset Iterator Methods 37719689912eSchristos */ 37729689912eSchristos 37739689912eSchristos static void 37749689912eSchristos rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) { 37759689912eSchristos qpc_rditer_t *iterator = NULL; 37769689912eSchristos 37779689912eSchristos iterator = (qpc_rditer_t *)(*iteratorp); 37789689912eSchristos 37799689912eSchristos dns__db_detachnode(iterator->common.db, 37809689912eSchristos &iterator->common.node DNS__DB_FLARG_PASS); 37819689912eSchristos isc_mem_put(iterator->common.db->mctx, iterator, sizeof(*iterator)); 37829689912eSchristos 37839689912eSchristos *iteratorp = NULL; 37849689912eSchristos } 37859689912eSchristos 37869689912eSchristos static bool 37879689912eSchristos iterator_active(qpcache_t *qpdb, qpc_rditer_t *iterator, 37889689912eSchristos dns_slabheader_t *header) { 37899689912eSchristos dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, qpdb); 37909689912eSchristos 37919689912eSchristos /* 37929689912eSchristos * Is this a "this rdataset doesn't exist" record? 37939689912eSchristos */ 37949689912eSchristos if (NONEXISTENT(header)) { 37959689912eSchristos return false; 37969689912eSchristos } 37979689912eSchristos 37989689912eSchristos /* 37999689912eSchristos * If this header is still active then return it. 38009689912eSchristos */ 38019689912eSchristos if (ACTIVE(header, iterator->common.now)) { 38029689912eSchristos return true; 38039689912eSchristos } 38049689912eSchristos 38059689912eSchristos /* 38069689912eSchristos * If we are not returning stale records or the rdataset is 38079689912eSchristos * too old don't return it. 38089689912eSchristos */ 38099689912eSchristos if (!STALEOK(iterator) || (iterator->common.now > stale_ttl)) { 38109689912eSchristos return false; 38119689912eSchristos } 38129689912eSchristos return true; 38139689912eSchristos } 38149689912eSchristos 38159689912eSchristos static isc_result_t 38169689912eSchristos rdatasetiter_first(dns_rdatasetiter_t *it DNS__DB_FLARG) { 38179689912eSchristos qpc_rditer_t *iterator = (qpc_rditer_t *)it; 38189689912eSchristos qpcache_t *qpdb = (qpcache_t *)(iterator->common.db); 38199689912eSchristos qpcnode_t *qpnode = iterator->common.node; 38209689912eSchristos dns_slabheader_t *header = NULL, *top_next = NULL; 38219689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 38229689912eSchristos 38239689912eSchristos NODE_RDLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 38249689912eSchristos 38259689912eSchristos for (header = qpnode->data; header != NULL; header = top_next) { 38269689912eSchristos top_next = header->next; 38279689912eSchristos do { 38289689912eSchristos if (EXPIREDOK(iterator)) { 38299689912eSchristos if (!NONEXISTENT(header)) { 38309689912eSchristos break; 38319689912eSchristos } 38329689912eSchristos header = header->down; 38339689912eSchristos } else if (!IGNORE(header)) { 38349689912eSchristos if (!iterator_active(qpdb, iterator, header)) { 38359689912eSchristos header = NULL; 38369689912eSchristos } 38379689912eSchristos break; 38389689912eSchristos } else { 38399689912eSchristos header = header->down; 38409689912eSchristos } 38419689912eSchristos } while (header != NULL); 38429689912eSchristos if (header != NULL) { 38439689912eSchristos break; 38449689912eSchristos } 38459689912eSchristos } 38469689912eSchristos 38479689912eSchristos NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 38489689912eSchristos 38499689912eSchristos iterator->current = header; 38509689912eSchristos 38519689912eSchristos if (header == NULL) { 38529689912eSchristos return ISC_R_NOMORE; 38539689912eSchristos } 38549689912eSchristos 38559689912eSchristos return ISC_R_SUCCESS; 38569689912eSchristos } 38579689912eSchristos 38589689912eSchristos static isc_result_t 38599689912eSchristos rdatasetiter_next(dns_rdatasetiter_t *it DNS__DB_FLARG) { 38609689912eSchristos qpc_rditer_t *iterator = (qpc_rditer_t *)it; 38619689912eSchristos qpcache_t *qpdb = (qpcache_t *)(iterator->common.db); 38629689912eSchristos qpcnode_t *qpnode = iterator->common.node; 38639689912eSchristos dns_slabheader_t *header = NULL, *top_next = NULL; 38649689912eSchristos dns_typepair_t type, negtype; 38659689912eSchristos dns_rdatatype_t rdtype, covers; 38669689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 38679689912eSchristos bool expiredok = EXPIREDOK(iterator); 38689689912eSchristos 38699689912eSchristos header = iterator->current; 38709689912eSchristos if (header == NULL) { 38719689912eSchristos return ISC_R_NOMORE; 38729689912eSchristos } 38739689912eSchristos 38749689912eSchristos NODE_RDLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 38759689912eSchristos 38769689912eSchristos type = header->type; 38779689912eSchristos rdtype = DNS_TYPEPAIR_TYPE(header->type); 38789689912eSchristos if (NEGATIVE(header)) { 38799689912eSchristos covers = DNS_TYPEPAIR_COVERS(header->type); 38809689912eSchristos negtype = DNS_TYPEPAIR_VALUE(covers, 0); 38819689912eSchristos } else { 38829689912eSchristos negtype = DNS_TYPEPAIR_VALUE(0, rdtype); 38839689912eSchristos } 38849689912eSchristos 38859689912eSchristos /* 38869689912eSchristos * Find the start of the header chain for the next type 38879689912eSchristos * by walking back up the list. 38889689912eSchristos */ 38899689912eSchristos top_next = header->next; 38909689912eSchristos while (top_next != NULL && 38919689912eSchristos (top_next->type == type || top_next->type == negtype)) 38929689912eSchristos { 38939689912eSchristos top_next = top_next->next; 38949689912eSchristos } 38959689912eSchristos if (expiredok) { 38969689912eSchristos /* 38979689912eSchristos * Keep walking down the list if possible or 38989689912eSchristos * start the next type. 38999689912eSchristos */ 39009689912eSchristos header = header->down != NULL ? header->down : top_next; 39019689912eSchristos } else { 39029689912eSchristos header = top_next; 39039689912eSchristos } 39049689912eSchristos for (; header != NULL; header = top_next) { 39059689912eSchristos top_next = header->next; 39069689912eSchristos do { 39079689912eSchristos if (expiredok) { 39089689912eSchristos if (!NONEXISTENT(header)) { 39099689912eSchristos break; 39109689912eSchristos } 39119689912eSchristos header = header->down; 39129689912eSchristos } else if (!IGNORE(header)) { 39139689912eSchristos if (!iterator_active(qpdb, iterator, header)) { 39149689912eSchristos header = NULL; 39159689912eSchristos } 39169689912eSchristos break; 39179689912eSchristos } else { 39189689912eSchristos header = header->down; 39199689912eSchristos } 39209689912eSchristos } while (header != NULL); 39219689912eSchristos if (header != NULL) { 39229689912eSchristos break; 39239689912eSchristos } 39249689912eSchristos /* 39259689912eSchristos * Find the start of the header chain for the next type 39269689912eSchristos * by walking back up the list. 39279689912eSchristos */ 39289689912eSchristos while (top_next != NULL && 39299689912eSchristos (top_next->type == type || top_next->type == negtype)) 39309689912eSchristos { 39319689912eSchristos top_next = top_next->next; 39329689912eSchristos } 39339689912eSchristos } 39349689912eSchristos 39359689912eSchristos NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 39369689912eSchristos 39379689912eSchristos iterator->current = header; 39389689912eSchristos 39399689912eSchristos if (header == NULL) { 39409689912eSchristos return ISC_R_NOMORE; 39419689912eSchristos } 39429689912eSchristos 39439689912eSchristos return ISC_R_SUCCESS; 39449689912eSchristos } 39459689912eSchristos 39469689912eSchristos static void 39479689912eSchristos rdatasetiter_current(dns_rdatasetiter_t *it, 39489689912eSchristos dns_rdataset_t *rdataset DNS__DB_FLARG) { 39499689912eSchristos qpc_rditer_t *iterator = (qpc_rditer_t *)it; 39509689912eSchristos qpcache_t *qpdb = (qpcache_t *)(iterator->common.db); 39519689912eSchristos qpcnode_t *qpnode = iterator->common.node; 39529689912eSchristos dns_slabheader_t *header = NULL; 39539689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 39549689912eSchristos 39559689912eSchristos header = iterator->current; 39569689912eSchristos REQUIRE(header != NULL); 39579689912eSchristos 39589689912eSchristos NODE_RDLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 39599689912eSchristos 39609689912eSchristos bindrdataset(qpdb, qpnode, header, iterator->common.now, nlocktype, 39619689912eSchristos isc_rwlocktype_none, rdataset DNS__DB_FLARG_PASS); 39629689912eSchristos 39639689912eSchristos NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype); 39649689912eSchristos } 39659689912eSchristos 39669689912eSchristos /* 39679689912eSchristos * Database Iterator Methods 39689689912eSchristos */ 39699689912eSchristos 39709689912eSchristos static void 39719689912eSchristos reference_iter_node(qpc_dbit_t *qpdbiter DNS__DB_FLARG) { 39729689912eSchristos qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db; 39739689912eSchristos qpcnode_t *node = qpdbiter->node; 39749689912eSchristos 39759689912eSchristos if (node == NULL) { 39769689912eSchristos return; 39779689912eSchristos } 39789689912eSchristos 39799689912eSchristos INSIST(qpdbiter->tree_locked != isc_rwlocktype_none); 39809689912eSchristos reactivate_node(qpdb, node, qpdbiter->tree_locked DNS__DB_FLARG_PASS); 39819689912eSchristos } 39829689912eSchristos 39839689912eSchristos static void 39849689912eSchristos dereference_iter_node(qpc_dbit_t *qpdbiter DNS__DB_FLARG) { 39859689912eSchristos qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db; 39869689912eSchristos qpcnode_t *node = qpdbiter->node; 39879689912eSchristos isc_rwlock_t *lock = NULL; 39889689912eSchristos isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 39899689912eSchristos isc_rwlocktype_t tlocktype = qpdbiter->tree_locked; 39909689912eSchristos 39919689912eSchristos if (node == NULL) { 39929689912eSchristos return; 39939689912eSchristos } 39949689912eSchristos 39959689912eSchristos REQUIRE(tlocktype != isc_rwlocktype_write); 39969689912eSchristos 39979689912eSchristos lock = &qpdb->node_locks[node->locknum].lock; 39989689912eSchristos NODE_RDLOCK(lock, &nlocktype); 39999689912eSchristos decref(qpdb, node, &nlocktype, &qpdbiter->tree_locked, 40009689912eSchristos false DNS__DB_FLARG_PASS); 40019689912eSchristos NODE_UNLOCK(lock, &nlocktype); 40029689912eSchristos 40039689912eSchristos INSIST(qpdbiter->tree_locked == tlocktype); 40049689912eSchristos 40059689912eSchristos qpdbiter->node = NULL; 40069689912eSchristos } 40079689912eSchristos 40089689912eSchristos static void 40099689912eSchristos resume_iteration(qpc_dbit_t *qpdbiter, bool continuing) { 40109689912eSchristos qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db; 40119689912eSchristos 40129689912eSchristos REQUIRE(qpdbiter->paused); 40139689912eSchristos REQUIRE(qpdbiter->tree_locked == isc_rwlocktype_none); 40149689912eSchristos 40159689912eSchristos TREE_RDLOCK(&qpdb->tree_lock, &qpdbiter->tree_locked); 40169689912eSchristos 40179689912eSchristos /* 40189689912eSchristos * If we're being called from dbiterator_next or _prev, 40199689912eSchristos * then we may need to reinitialize the iterator to the current 40209689912eSchristos * name. The tree could have changed while it was unlocked, 40219689912eSchristos * would make the iterator traversal inconsistent. 40229689912eSchristos * 40239689912eSchristos * As long as the iterator is holding a reference to 40249689912eSchristos * qpdbiter->node, the node won't be removed from the tree, 40259689912eSchristos * so the lookup should always succeed. 40269689912eSchristos */ 40279689912eSchristos if (continuing && qpdbiter->node != NULL) { 40289689912eSchristos isc_result_t result; 40299689912eSchristos result = dns_qp_lookup(qpdb->tree, qpdbiter->name, NULL, 40309689912eSchristos &qpdbiter->iter, NULL, NULL, NULL); 40319689912eSchristos INSIST(result == ISC_R_SUCCESS); 40329689912eSchristos } 40339689912eSchristos 40349689912eSchristos qpdbiter->paused = false; 40359689912eSchristos } 40369689912eSchristos 40379689912eSchristos static void 40389689912eSchristos dbiterator_destroy(dns_dbiterator_t **iteratorp DNS__DB_FLARG) { 40399689912eSchristos qpc_dbit_t *qpdbiter = (qpc_dbit_t *)(*iteratorp); 40409689912eSchristos qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db; 40419689912eSchristos dns_db_t *db = NULL; 40429689912eSchristos 40439689912eSchristos if (qpdbiter->tree_locked == isc_rwlocktype_read) { 40449689912eSchristos TREE_UNLOCK(&qpdb->tree_lock, &qpdbiter->tree_locked); 40459689912eSchristos } 40469689912eSchristos INSIST(qpdbiter->tree_locked == isc_rwlocktype_none); 40479689912eSchristos 40489689912eSchristos dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 40499689912eSchristos 40509689912eSchristos dns_db_attach(qpdbiter->common.db, &db); 40519689912eSchristos dns_db_detach(&qpdbiter->common.db); 40529689912eSchristos 40539689912eSchristos isc_mem_put(db->mctx, qpdbiter, sizeof(*qpdbiter)); 40549689912eSchristos dns_db_detach(&db); 40559689912eSchristos 40569689912eSchristos *iteratorp = NULL; 40579689912eSchristos } 40589689912eSchristos 40599689912eSchristos static isc_result_t 40609689912eSchristos dbiterator_first(dns_dbiterator_t *iterator DNS__DB_FLARG) { 40619689912eSchristos isc_result_t result; 40629689912eSchristos qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 40639689912eSchristos qpcache_t *qpdb = (qpcache_t *)iterator->db; 40649689912eSchristos 40659689912eSchristos if (qpdbiter->result != ISC_R_SUCCESS && 40669689912eSchristos qpdbiter->result != ISC_R_NOTFOUND && 40679689912eSchristos qpdbiter->result != DNS_R_PARTIALMATCH && 40689689912eSchristos qpdbiter->result != ISC_R_NOMORE) 40699689912eSchristos { 40709689912eSchristos return qpdbiter->result; 40719689912eSchristos } 40729689912eSchristos 40739689912eSchristos if (qpdbiter->paused) { 40749689912eSchristos resume_iteration(qpdbiter, false); 40759689912eSchristos } 40769689912eSchristos 40779689912eSchristos dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 40789689912eSchristos 40799689912eSchristos dns_qpiter_init(qpdb->tree, &qpdbiter->iter); 40809689912eSchristos result = dns_qpiter_next(&qpdbiter->iter, NULL, 40819689912eSchristos (void **)&qpdbiter->node, NULL); 40829689912eSchristos 40839689912eSchristos if (result == ISC_R_SUCCESS) { 40849689912eSchristos dns_name_copy(&qpdbiter->node->name, qpdbiter->name); 40859689912eSchristos reference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 40869689912eSchristos } else { 40879689912eSchristos INSIST(result == ISC_R_NOMORE); /* The tree is empty. */ 40889689912eSchristos qpdbiter->node = NULL; 40899689912eSchristos } 40909689912eSchristos 40919689912eSchristos qpdbiter->result = result; 40929689912eSchristos 40939689912eSchristos if (result != ISC_R_SUCCESS) { 40949689912eSchristos ENSURE(!qpdbiter->paused); 40959689912eSchristos } 40969689912eSchristos 40979689912eSchristos return result; 40989689912eSchristos } 40999689912eSchristos 41009689912eSchristos static isc_result_t 41019689912eSchristos dbiterator_last(dns_dbiterator_t *iterator DNS__DB_FLARG) { 41029689912eSchristos isc_result_t result; 41039689912eSchristos qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 41049689912eSchristos qpcache_t *qpdb = (qpcache_t *)iterator->db; 41059689912eSchristos 41069689912eSchristos if (qpdbiter->result != ISC_R_SUCCESS && 41079689912eSchristos qpdbiter->result != ISC_R_NOTFOUND && 41089689912eSchristos qpdbiter->result != DNS_R_PARTIALMATCH && 41099689912eSchristos qpdbiter->result != ISC_R_NOMORE) 41109689912eSchristos { 41119689912eSchristos return qpdbiter->result; 41129689912eSchristos } 41139689912eSchristos 41149689912eSchristos if (qpdbiter->paused) { 41159689912eSchristos resume_iteration(qpdbiter, false); 41169689912eSchristos } 41179689912eSchristos 41189689912eSchristos dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 41199689912eSchristos 41209689912eSchristos dns_qpiter_init(qpdb->tree, &qpdbiter->iter); 41219689912eSchristos result = dns_qpiter_prev(&qpdbiter->iter, NULL, 41229689912eSchristos (void **)&qpdbiter->node, NULL); 41239689912eSchristos 41249689912eSchristos if (result == ISC_R_SUCCESS) { 41259689912eSchristos dns_name_copy(&qpdbiter->node->name, qpdbiter->name); 41269689912eSchristos reference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 41279689912eSchristos } else { 41289689912eSchristos INSIST(result == ISC_R_NOMORE); /* The tree is empty. */ 41299689912eSchristos qpdbiter->node = NULL; 41309689912eSchristos } 41319689912eSchristos 41329689912eSchristos qpdbiter->result = result; 41339689912eSchristos return result; 41349689912eSchristos } 41359689912eSchristos 41369689912eSchristos static isc_result_t 41379689912eSchristos dbiterator_seek(dns_dbiterator_t *iterator, 41389689912eSchristos const dns_name_t *name DNS__DB_FLARG) { 41399689912eSchristos isc_result_t result; 41409689912eSchristos qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 41419689912eSchristos qpcache_t *qpdb = (qpcache_t *)iterator->db; 41429689912eSchristos 41439689912eSchristos if (qpdbiter->result != ISC_R_SUCCESS && 41449689912eSchristos qpdbiter->result != ISC_R_NOTFOUND && 41459689912eSchristos qpdbiter->result != DNS_R_PARTIALMATCH && 41469689912eSchristos qpdbiter->result != ISC_R_NOMORE) 41479689912eSchristos { 41489689912eSchristos return qpdbiter->result; 41499689912eSchristos } 41509689912eSchristos 41519689912eSchristos if (qpdbiter->paused) { 41529689912eSchristos resume_iteration(qpdbiter, false); 41539689912eSchristos } 41549689912eSchristos 41559689912eSchristos dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 41569689912eSchristos 41579689912eSchristos result = dns_qp_lookup(qpdb->tree, name, NULL, &qpdbiter->iter, NULL, 41589689912eSchristos (void **)&qpdbiter->node, NULL); 41599689912eSchristos 41609689912eSchristos if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) { 41619689912eSchristos dns_name_copy(&qpdbiter->node->name, qpdbiter->name); 41629689912eSchristos reference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 41639689912eSchristos } else { 41649689912eSchristos qpdbiter->node = NULL; 41659689912eSchristos } 41669689912eSchristos 41679689912eSchristos qpdbiter->result = (result == DNS_R_PARTIALMATCH) ? ISC_R_SUCCESS 41689689912eSchristos : result; 41699689912eSchristos return result; 41709689912eSchristos } 41719689912eSchristos 41729689912eSchristos static isc_result_t 41739689912eSchristos dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG) { 41749689912eSchristos isc_result_t result; 41759689912eSchristos qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 41769689912eSchristos 41779689912eSchristos REQUIRE(qpdbiter->node != NULL); 41789689912eSchristos 41799689912eSchristos if (qpdbiter->result != ISC_R_SUCCESS) { 41809689912eSchristos return qpdbiter->result; 41819689912eSchristos } 41829689912eSchristos 41839689912eSchristos if (qpdbiter->paused) { 41849689912eSchristos resume_iteration(qpdbiter, true); 41859689912eSchristos } 41869689912eSchristos 41879689912eSchristos dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 41889689912eSchristos 41899689912eSchristos result = dns_qpiter_prev(&qpdbiter->iter, NULL, 41909689912eSchristos (void **)&qpdbiter->node, NULL); 41919689912eSchristos 41929689912eSchristos if (result == ISC_R_SUCCESS) { 41939689912eSchristos dns_name_copy(&qpdbiter->node->name, qpdbiter->name); 41949689912eSchristos reference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 41959689912eSchristos } else { 41969689912eSchristos INSIST(result == ISC_R_NOMORE); 41979689912eSchristos qpdbiter->node = NULL; 41989689912eSchristos } 41999689912eSchristos 42009689912eSchristos qpdbiter->result = result; 42019689912eSchristos return result; 42029689912eSchristos } 42039689912eSchristos 42049689912eSchristos static isc_result_t 42059689912eSchristos dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG) { 42069689912eSchristos isc_result_t result; 42079689912eSchristos qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 42089689912eSchristos 42099689912eSchristos REQUIRE(qpdbiter->node != NULL); 42109689912eSchristos 42119689912eSchristos if (qpdbiter->result != ISC_R_SUCCESS) { 42129689912eSchristos return qpdbiter->result; 42139689912eSchristos } 42149689912eSchristos 42159689912eSchristos if (qpdbiter->paused) { 42169689912eSchristos resume_iteration(qpdbiter, true); 42179689912eSchristos } 42189689912eSchristos 42199689912eSchristos dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 42209689912eSchristos 42219689912eSchristos result = dns_qpiter_next(&qpdbiter->iter, NULL, 42229689912eSchristos (void **)&qpdbiter->node, NULL); 42239689912eSchristos 42249689912eSchristos if (result == ISC_R_SUCCESS) { 42259689912eSchristos dns_name_copy(&qpdbiter->node->name, qpdbiter->name); 42269689912eSchristos reference_iter_node(qpdbiter DNS__DB_FLARG_PASS); 42279689912eSchristos } else { 42289689912eSchristos INSIST(result == ISC_R_NOMORE); 42299689912eSchristos qpdbiter->node = NULL; 42309689912eSchristos } 42319689912eSchristos 42329689912eSchristos qpdbiter->result = result; 42339689912eSchristos return result; 42349689912eSchristos } 42359689912eSchristos 42369689912eSchristos static isc_result_t 42379689912eSchristos dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, 42389689912eSchristos dns_name_t *name DNS__DB_FLARG) { 42399689912eSchristos qpcache_t *qpdb = (qpcache_t *)iterator->db; 42409689912eSchristos qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 42419689912eSchristos qpcnode_t *node = qpdbiter->node; 42429689912eSchristos 42439689912eSchristos REQUIRE(qpdbiter->result == ISC_R_SUCCESS); 42449689912eSchristos REQUIRE(node != NULL); 42459689912eSchristos 42469689912eSchristos if (qpdbiter->paused) { 42479689912eSchristos resume_iteration(qpdbiter, false); 42489689912eSchristos } 42499689912eSchristos 42509689912eSchristos if (name != NULL) { 42519689912eSchristos dns_name_copy(&node->name, name); 42529689912eSchristos } 42539689912eSchristos 42549689912eSchristos newref(qpdb, node, isc_rwlocktype_none, 42559689912eSchristos qpdbiter->tree_locked DNS__DB_FLARG_PASS); 42569689912eSchristos 42579689912eSchristos *nodep = qpdbiter->node; 42589689912eSchristos return ISC_R_SUCCESS; 42599689912eSchristos } 42609689912eSchristos 42619689912eSchristos static isc_result_t 42629689912eSchristos dbiterator_pause(dns_dbiterator_t *iterator) { 42639689912eSchristos qpcache_t *qpdb = (qpcache_t *)iterator->db; 42649689912eSchristos qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 42659689912eSchristos 42669689912eSchristos if (qpdbiter->result != ISC_R_SUCCESS && 42679689912eSchristos qpdbiter->result != ISC_R_NOTFOUND && 42689689912eSchristos qpdbiter->result != DNS_R_PARTIALMATCH && 42699689912eSchristos qpdbiter->result != ISC_R_NOMORE) 42709689912eSchristos { 42719689912eSchristos return qpdbiter->result; 42729689912eSchristos } 42739689912eSchristos 42749689912eSchristos if (qpdbiter->paused) { 42759689912eSchristos return ISC_R_SUCCESS; 42769689912eSchristos } 42779689912eSchristos 42789689912eSchristos qpdbiter->paused = true; 42799689912eSchristos 42809689912eSchristos if (qpdbiter->tree_locked == isc_rwlocktype_read) { 42819689912eSchristos TREE_UNLOCK(&qpdb->tree_lock, &qpdbiter->tree_locked); 42829689912eSchristos } 42839689912eSchristos INSIST(qpdbiter->tree_locked == isc_rwlocktype_none); 42849689912eSchristos 42859689912eSchristos return ISC_R_SUCCESS; 42869689912eSchristos } 42879689912eSchristos 42889689912eSchristos static isc_result_t 42899689912eSchristos dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) { 42909689912eSchristos qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator; 42919689912eSchristos 42929689912eSchristos if (qpdbiter->result != ISC_R_SUCCESS) { 42939689912eSchristos return qpdbiter->result; 42949689912eSchristos } 42959689912eSchristos 42969689912eSchristos dns_name_copy(dns_rootname, name); 42979689912eSchristos return ISC_R_SUCCESS; 42989689912eSchristos } 42999689912eSchristos 43009689912eSchristos static void 43019689912eSchristos deletedata(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *node ISC_ATTR_UNUSED, 43029689912eSchristos void *data) { 43039689912eSchristos dns_slabheader_t *header = data; 43049689912eSchristos qpcache_t *qpdb = (qpcache_t *)header->db; 43059689912eSchristos 43069689912eSchristos if (header->heap != NULL && header->heap_index != 0) { 43079689912eSchristos isc_heap_delete(header->heap, header->heap_index); 43089689912eSchristos } 43099689912eSchristos 43109689912eSchristos update_rrsetstats(qpdb->rrsetstats, header->type, 43119689912eSchristos atomic_load_acquire(&header->attributes), false); 43129689912eSchristos 43139689912eSchristos if (ISC_LINK_LINKED(header, link)) { 43149689912eSchristos int idx = HEADERNODE(header)->locknum; 43159689912eSchristos ISC_LIST_UNLINK(qpdb->lru[idx], header, link); 43169689912eSchristos } 43179689912eSchristos 43189689912eSchristos if (header->noqname != NULL) { 43199689912eSchristos dns_slabheader_freeproof(db->mctx, &header->noqname); 43209689912eSchristos } 43219689912eSchristos if (header->closest != NULL) { 43229689912eSchristos dns_slabheader_freeproof(db->mctx, &header->closest); 43239689912eSchristos } 43249689912eSchristos } 43259689912eSchristos 43269689912eSchristos /* 43279689912eSchristos * Caller must be holding the node write lock. 43289689912eSchristos */ 43299689912eSchristos static void 43309689912eSchristos expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum, 43319689912eSchristos isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep, 43329689912eSchristos isc_stdtime_t now, bool cache_is_overmem DNS__DB_FLARG) { 43339689912eSchristos isc_heap_t *heap = qpdb->heaps[locknum]; 43349689912eSchristos 43359689912eSchristos for (size_t i = 0; i < DNS_QPDB_EXPIRE_TTL_COUNT; i++) { 43369689912eSchristos dns_slabheader_t *header = isc_heap_element(heap, 1); 43379689912eSchristos 43389689912eSchristos if (header == NULL) { 43399689912eSchristos /* No headers left on this TTL heap; exit cleaning */ 43409689912eSchristos return; 43419689912eSchristos } 43429689912eSchristos 43439689912eSchristos dns_ttl_t ttl = header->ttl; 43449689912eSchristos 43459689912eSchristos if (!cache_is_overmem) { 43469689912eSchristos /* Only account for stale TTL if cache is not overmem */ 43479689912eSchristos ttl += STALE_TTL(header, qpdb); 43489689912eSchristos } 43499689912eSchristos 43509689912eSchristos if (ttl >= now - QPDB_VIRTUAL) { 43519689912eSchristos /* 43529689912eSchristos * The header at the top of this TTL heap is not yet 43539689912eSchristos * eligible for expiry, so none of the other headers on 43549689912eSchristos * the same heap can be eligible for expiry, either; 43559689912eSchristos * exit cleaning. 43569689912eSchristos */ 43579689912eSchristos return; 43589689912eSchristos } 43599689912eSchristos 43609689912eSchristos expireheader(header, nlocktypep, tlocktypep, 43619689912eSchristos dns_expire_ttl DNS__DB_FLARG_PASS); 43629689912eSchristos } 43639689912eSchristos } 43649689912eSchristos 43659689912eSchristos static void 43669689912eSchristos setmaxrrperset(dns_db_t *db, uint32_t value) { 43679689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 43689689912eSchristos 43699689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 43709689912eSchristos 43719689912eSchristos qpdb->maxrrperset = value; 43729689912eSchristos } 43739689912eSchristos 43749689912eSchristos static void 43759689912eSchristos setmaxtypepername(dns_db_t *db, uint32_t value) { 43769689912eSchristos qpcache_t *qpdb = (qpcache_t *)db; 43779689912eSchristos 43789689912eSchristos REQUIRE(VALID_QPDB(qpdb)); 43799689912eSchristos 43809689912eSchristos qpdb->maxtypepername = value; 43819689912eSchristos } 43829689912eSchristos 43839689912eSchristos static dns_dbmethods_t qpdb_cachemethods = { 43849689912eSchristos .destroy = qpdb_destroy, 43859689912eSchristos .findnode = findnode, 43869689912eSchristos .find = find, 43879689912eSchristos .findzonecut = findzonecut, 43889689912eSchristos .attachnode = attachnode, 43899689912eSchristos .detachnode = detachnode, 43909689912eSchristos .createiterator = createiterator, 43919689912eSchristos .findrdataset = findrdataset, 43929689912eSchristos .allrdatasets = allrdatasets, 43939689912eSchristos .addrdataset = addrdataset, 43949689912eSchristos .deleterdataset = deleterdataset, 43959689912eSchristos .nodecount = nodecount, 43969689912eSchristos .getoriginnode = getoriginnode, 43979689912eSchristos .getrrsetstats = getrrsetstats, 43989689912eSchristos .setcachestats = setcachestats, 43999689912eSchristos .setservestalettl = setservestalettl, 44009689912eSchristos .getservestalettl = getservestalettl, 44019689912eSchristos .setservestalerefresh = setservestalerefresh, 44029689912eSchristos .getservestalerefresh = getservestalerefresh, 44039689912eSchristos .locknode = locknode, 44049689912eSchristos .unlocknode = unlocknode, 44059689912eSchristos .expiredata = expiredata, 44069689912eSchristos .deletedata = deletedata, 44079689912eSchristos .setmaxrrperset = setmaxrrperset, 44089689912eSchristos .setmaxtypepername = setmaxtypepername, 44099689912eSchristos }; 44109689912eSchristos 44119689912eSchristos static void 44129689912eSchristos qpcnode_destroy(qpcnode_t *data) { 44139689912eSchristos dns_slabheader_t *current = NULL, *next = NULL; 44149689912eSchristos 44159689912eSchristos for (current = data->data; current != NULL; current = next) { 44169689912eSchristos dns_slabheader_t *down = current->down, *down_next = NULL; 44179689912eSchristos 44189689912eSchristos next = current->next; 44199689912eSchristos 44209689912eSchristos for (down = current->down; down != NULL; down = down_next) { 44219689912eSchristos down_next = down->down; 44229689912eSchristos dns_slabheader_destroy(&down); 44239689912eSchristos } 44249689912eSchristos 44259689912eSchristos dns_slabheader_destroy(¤t); 44269689912eSchristos } 44279689912eSchristos 44289689912eSchristos dns_name_free(&data->name, data->mctx); 44299689912eSchristos isc_mem_putanddetach(&data->mctx, data, sizeof(qpcnode_t)); 44309689912eSchristos } 44319689912eSchristos 44329689912eSchristos #ifdef DNS_DB_NODETRACE 44339689912eSchristos ISC_REFCOUNT_STATIC_TRACE_IMPL(qpcnode, qpcnode_destroy); 44349689912eSchristos #else 44359689912eSchristos ISC_REFCOUNT_STATIC_IMPL(qpcnode, qpcnode_destroy); 44369689912eSchristos #endif 4437