xref: /netbsd-src/external/mpl/bind/dist/lib/dns/qpcache.c (revision bcda20f65a8566e103791ec395f7f499ef322704)
1 /*	$NetBSD: qpcache.c,v 1.2 2025/01/26 16:25:24 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * SPDX-License-Identifier: MPL-2.0
7  *
8  * This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11  *
12  * See the COPYRIGHT file distributed with this work for additional
13  * information regarding copyright ownership.
14  */
15 
16 /*! \file */
17 
18 #include <inttypes.h>
19 #include <stdbool.h>
20 #include <sys/mman.h>
21 
22 #include <isc/ascii.h>
23 #include <isc/async.h>
24 #include <isc/atomic.h>
25 #include <isc/crc64.h>
26 #include <isc/file.h>
27 #include <isc/hash.h>
28 #include <isc/hashmap.h>
29 #include <isc/heap.h>
30 #include <isc/hex.h>
31 #include <isc/loop.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/once.h>
35 #include <isc/queue.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/result.h>
39 #include <isc/rwlock.h>
40 #include <isc/stdio.h>
41 #include <isc/string.h>
42 #include <isc/time.h>
43 #include <isc/urcu.h>
44 #include <isc/util.h>
45 
46 #include <dns/callbacks.h>
47 #include <dns/db.h>
48 #include <dns/dbiterator.h>
49 #include <dns/fixedname.h>
50 #include <dns/log.h>
51 #include <dns/masterdump.h>
52 #include <dns/nsec.h>
53 #include <dns/qp.h>
54 #include <dns/rdata.h>
55 #include <dns/rdataset.h>
56 #include <dns/rdatasetiter.h>
57 #include <dns/rdataslab.h>
58 #include <dns/rdatastruct.h>
59 #include <dns/stats.h>
60 #include <dns/time.h>
61 #include <dns/view.h>
62 #include <dns/zonekey.h>
63 
64 #include "db_p.h"
65 #include "qpcache_p.h"
66 
67 #define CHECK(op)                            \
68 	do {                                 \
69 		result = (op);               \
70 		if (result != ISC_R_SUCCESS) \
71 			goto failure;        \
72 	} while (0)
73 
74 #define EXISTS(header)                                 \
75 	((atomic_load_acquire(&(header)->attributes) & \
76 	  DNS_SLABHEADERATTR_NONEXISTENT) == 0)
77 #define NONEXISTENT(header)                            \
78 	((atomic_load_acquire(&(header)->attributes) & \
79 	  DNS_SLABHEADERATTR_NONEXISTENT) != 0)
80 #define IGNORE(header)                                 \
81 	((atomic_load_acquire(&(header)->attributes) & \
82 	  DNS_SLABHEADERATTR_IGNORE) != 0)
83 #define NXDOMAIN(header)                               \
84 	((atomic_load_acquire(&(header)->attributes) & \
85 	  DNS_SLABHEADERATTR_NXDOMAIN) != 0)
86 #define STALE(header)                                  \
87 	((atomic_load_acquire(&(header)->attributes) & \
88 	  DNS_SLABHEADERATTR_STALE) != 0)
89 #define STALE_WINDOW(header)                           \
90 	((atomic_load_acquire(&(header)->attributes) & \
91 	  DNS_SLABHEADERATTR_STALE_WINDOW) != 0)
92 #define OPTOUT(header)                                 \
93 	((atomic_load_acquire(&(header)->attributes) & \
94 	  DNS_SLABHEADERATTR_OPTOUT) != 0)
95 #define NEGATIVE(header)                               \
96 	((atomic_load_acquire(&(header)->attributes) & \
97 	  DNS_SLABHEADERATTR_NEGATIVE) != 0)
98 #define PREFETCH(header)                               \
99 	((atomic_load_acquire(&(header)->attributes) & \
100 	  DNS_SLABHEADERATTR_PREFETCH) != 0)
101 #define ZEROTTL(header)                                \
102 	((atomic_load_acquire(&(header)->attributes) & \
103 	  DNS_SLABHEADERATTR_ZEROTTL) != 0)
104 #define ANCIENT(header)                                \
105 	((atomic_load_acquire(&(header)->attributes) & \
106 	  DNS_SLABHEADERATTR_ANCIENT) != 0)
107 #define STATCOUNT(header)                              \
108 	((atomic_load_acquire(&(header)->attributes) & \
109 	  DNS_SLABHEADERATTR_STATCOUNT) != 0)
110 
111 #define STALE_TTL(header, qpdb) \
112 	(NXDOMAIN(header) ? 0 : qpdb->common.serve_stale_ttl)
113 
114 #define ACTIVE(header, now) \
115 	(((header)->ttl > (now)) || ((header)->ttl == (now) && ZEROTTL(header)))
116 
117 #define EXPIREDOK(iterator) \
118 	(((iterator)->common.options & DNS_DB_EXPIREDOK) != 0)
119 
120 #define STALEOK(iterator) (((iterator)->common.options & DNS_DB_STALEOK) != 0)
121 
122 #define KEEPSTALE(qpdb) ((qpdb)->common.serve_stale_ttl > 0)
123 
124 /*%
125  * Note that "impmagic" is not the first four bytes of the struct, so
126  * ISC_MAGIC_VALID cannot be used.
127  */
128 #define QPDB_MAGIC ISC_MAGIC('Q', 'P', 'D', '4')
129 #define VALID_QPDB(qpdb) \
130 	((qpdb) != NULL && (qpdb)->common.impmagic == QPDB_MAGIC)
131 
132 #define HEADERNODE(h) ((qpcnode_t *)((h)->node))
133 
134 /*
135  * Allow clients with a virtual time of up to 5 minutes in the past to see
136  * records that would have otherwise have expired.
137  */
138 #define QPDB_VIRTUAL 300
139 
140 /*%
141  * Whether to rate-limit updating the LRU to avoid possible thread contention.
142  * Updating LRU requires write locking, so we don't do it every time the
143  * record is touched - only after some time passes.
144  */
145 #ifndef DNS_QPDB_LIMITLRUUPDATE
146 #define DNS_QPDB_LIMITLRUUPDATE 1
147 #endif
148 
149 /*% Time after which we update LRU for glue records, 5 minutes */
150 #define DNS_QPDB_LRUUPDATE_GLUE 300
151 /*% Time after which we update LRU for all other records, 10 minutes */
152 #define DNS_QPDB_LRUUPDATE_REGULAR 600
153 
154 /*
155  * This defines the number of headers that we try to expire each time the
156  * expire_ttl_headers() is run.  The number should be small enough, so the
157  * TTL-based header expiration doesn't take too long, but it should be large
158  * enough, so we expire enough headers if their TTL is clustered.
159  */
160 #define DNS_QPDB_EXPIRE_TTL_COUNT 10
161 
162 /*%
163  * This is the structure that is used for each node in the qp trie of trees.
164  */
165 typedef struct qpcnode qpcnode_t;
166 struct qpcnode {
167 	dns_name_t name;
168 	isc_mem_t *mctx;
169 
170 	uint8_t			: 0;
171 	unsigned int delegating : 1;
172 	unsigned int nsec	: 2; /*%< range is 0..3 */
173 	uint8_t			: 0;
174 
175 	isc_refcount_t references;
176 	isc_refcount_t erefs;
177 	uint16_t locknum;
178 	void *data;
179 
180 	/*%
181 	 * NOTE: The 'dirty' flag is protected by the node lock, so
182 	 * this bitfield has to be separated from the one above.
183 	 * We don't want it to share the same qword with bits
184 	 * that can be accessed without the node lock.
185 	 */
186 	uint8_t	      : 0;
187 	uint8_t dirty : 1;
188 	uint8_t	      : 0;
189 
190 	/*%
191 	 * Used for dead nodes cleaning.  This linked list is used to mark nodes
192 	 * which have no data any longer, but we cannot unlink at that exact
193 	 * moment because we did not or could not obtain a write lock on the
194 	 * tree.
195 	 */
196 	isc_queue_node_t deadlink;
197 };
198 
199 typedef struct qpcache qpcache_t;
200 struct qpcache {
201 	/* Unlocked. */
202 	dns_db_t common;
203 	/* Loopmgr */
204 	isc_loopmgr_t *loopmgr;
205 	/* Locks the data in this struct */
206 	isc_rwlock_t lock;
207 	/* Locks the tree structure (prevents nodes appearing/disappearing) */
208 	isc_rwlock_t tree_lock;
209 	/* Locks for individual tree nodes */
210 	unsigned int node_lock_count;
211 	db_nodelock_t *node_locks;
212 	qpcnode_t *origin_node;
213 	dns_stats_t *rrsetstats;     /* cache DB only */
214 	isc_stats_t *cachestats;     /* cache DB only */
215 	isc_stats_t *gluecachestats; /* zone DB only */
216 	/* Locked by lock. */
217 	unsigned int active;
218 
219 	uint32_t maxrrperset;	 /* Maximum RRs per RRset */
220 	uint32_t maxtypepername; /* Maximum number of RR types per owner */
221 
222 	/*
223 	 * The time after a failed lookup, where stale answers from cache
224 	 * may be used directly in a DNS response without attempting a
225 	 * new iterative lookup.
226 	 */
227 	uint32_t serve_stale_refresh;
228 
229 	/*
230 	 * This is an array of linked lists used to implement the LRU cache.
231 	 * There will be node_lock_count linked lists here.  Nodes in bucket 1
232 	 * will be placed on the linked list lru[1].
233 	 */
234 	dns_slabheaderlist_t *lru;
235 
236 	/*
237 	 * Start point % node_lock_count for next LRU cleanup.
238 	 */
239 	atomic_uint lru_sweep;
240 
241 	/*
242 	 * When performing LRU cleaning limit cleaning to headers that were
243 	 * last used at or before this.
244 	 */
245 	_Atomic(isc_stdtime_t) last_used;
246 
247 	/*%
248 	 * Temporary storage for stale cache nodes and dynamically deleted
249 	 * nodes that await being cleaned up.
250 	 */
251 	isc_queue_t *deadnodes;
252 
253 	/*
254 	 * Heaps.  These are used for TTL based expiry in a cache,
255 	 * or for zone resigning in a zone DB.  hmctx is the memory
256 	 * context to use for the heap (which differs from the main
257 	 * database memory context in the case of a cache).
258 	 */
259 	isc_mem_t *hmctx;
260 	isc_heap_t **heaps;
261 
262 	/* Locked by tree_lock. */
263 	dns_qp_t *tree;
264 	dns_qp_t *nsec;
265 };
266 
267 /*%
268  * Search Context
269  */
270 typedef struct {
271 	qpcache_t *qpdb;
272 	unsigned int options;
273 	dns_qpchain_t chain;
274 	dns_qpiter_t iter;
275 	bool need_cleanup;
276 	qpcnode_t *zonecut;
277 	dns_slabheader_t *zonecut_header;
278 	dns_slabheader_t *zonecut_sigheader;
279 	isc_stdtime_t now;
280 } qpc_search_t;
281 
282 #ifdef DNS_DB_NODETRACE
283 #define qpcnode_ref(ptr)   qpcnode__ref(ptr, __func__, __FILE__, __LINE__)
284 #define qpcnode_unref(ptr) qpcnode__unref(ptr, __func__, __FILE__, __LINE__)
285 #define qpcnode_attach(ptr, ptrp) \
286 	qpcnode__attach(ptr, ptrp, __func__, __FILE__, __LINE__)
287 #define qpcnode_detach(ptrp) qpcnode__detach(ptrp, __func__, __FILE__, __LINE__)
288 ISC_REFCOUNT_STATIC_TRACE_DECL(qpcnode);
289 #else
290 ISC_REFCOUNT_STATIC_DECL(qpcnode);
291 #endif
292 
293 /* QP methods */
294 static void
295 qp_attach(void *uctx, void *pval, uint32_t ival);
296 static void
297 qp_detach(void *uctx, void *pval, uint32_t ival);
298 static size_t
299 qp_makekey(dns_qpkey_t key, void *uctx, void *pval, uint32_t ival);
300 static void
301 qp_triename(void *uctx, char *buf, size_t size);
302 
303 static dns_qpmethods_t qpmethods = {
304 	qp_attach,
305 	qp_detach,
306 	qp_makekey,
307 	qp_triename,
308 };
309 
310 static void
311 qp_attach(void *uctx ISC_ATTR_UNUSED, void *pval,
312 	  uint32_t ival ISC_ATTR_UNUSED) {
313 	qpcnode_t *data = pval;
314 	qpcnode_ref(data);
315 }
316 
317 static void
318 qp_detach(void *uctx ISC_ATTR_UNUSED, void *pval,
319 	  uint32_t ival ISC_ATTR_UNUSED) {
320 	qpcnode_t *data = pval;
321 	qpcnode_detach(&data);
322 }
323 
324 static size_t
325 qp_makekey(dns_qpkey_t key, void *uctx ISC_ATTR_UNUSED, void *pval,
326 	   uint32_t ival ISC_ATTR_UNUSED) {
327 	qpcnode_t *data = pval;
328 	return dns_qpkey_fromname(key, &data->name);
329 }
330 
331 static void
332 qp_triename(void *uctx, char *buf, size_t size) {
333 	UNUSED(uctx);
334 	snprintf(buf, size, "qpdb-lite");
335 }
336 
337 static void
338 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG);
339 static isc_result_t
340 rdatasetiter_first(dns_rdatasetiter_t *iterator DNS__DB_FLARG);
341 static isc_result_t
342 rdatasetiter_next(dns_rdatasetiter_t *iterator DNS__DB_FLARG);
343 static void
344 rdatasetiter_current(dns_rdatasetiter_t *iterator,
345 		     dns_rdataset_t *rdataset DNS__DB_FLARG);
346 
347 static dns_rdatasetitermethods_t rdatasetiter_methods = {
348 	rdatasetiter_destroy, rdatasetiter_first, rdatasetiter_next,
349 	rdatasetiter_current
350 };
351 
352 typedef struct qpc_rditer {
353 	dns_rdatasetiter_t common;
354 	dns_slabheader_t *current;
355 } qpc_rditer_t;
356 
357 static void
358 dbiterator_destroy(dns_dbiterator_t **iteratorp DNS__DB_FLARG);
359 static isc_result_t
360 dbiterator_first(dns_dbiterator_t *iterator DNS__DB_FLARG);
361 static isc_result_t
362 dbiterator_last(dns_dbiterator_t *iterator DNS__DB_FLARG);
363 static isc_result_t
364 dbiterator_seek(dns_dbiterator_t *iterator,
365 		const dns_name_t *name DNS__DB_FLARG);
366 static isc_result_t
367 dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG);
368 static isc_result_t
369 dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG);
370 static isc_result_t
371 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
372 		   dns_name_t *name DNS__DB_FLARG);
373 static isc_result_t
374 dbiterator_pause(dns_dbiterator_t *iterator);
375 static isc_result_t
376 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name);
377 
378 static dns_dbiteratormethods_t dbiterator_methods = {
379 	dbiterator_destroy, dbiterator_first, dbiterator_last,
380 	dbiterator_seek,    dbiterator_prev,  dbiterator_next,
381 	dbiterator_current, dbiterator_pause, dbiterator_origin
382 };
383 
384 /*
385  * Note that the QP cache database only needs a single QP iterator, because
386  * unlike the QP zone database, NSEC3 records are cached in the main tree.
387  *
388  * If we ever implement synth-from-dnssec using NSEC3 records, we'll need
389  * to have a separate tree for NSEC3 records, and to copy in the more complex
390  * iterator implementation from qpzone.c.
391  */
392 typedef struct qpc_dbit {
393 	dns_dbiterator_t common;
394 	bool paused;
395 	isc_rwlocktype_t tree_locked;
396 	isc_result_t result;
397 	dns_fixedname_t fixed;
398 	dns_name_t *name;
399 	dns_qpiter_t iter;
400 	qpcnode_t *node;
401 } qpc_dbit_t;
402 
403 static void
404 free_qpdb(qpcache_t *qpdb, bool log);
405 
406 static dns_dbmethods_t qpdb_cachemethods;
407 
408 /*%
409  * 'init_count' is used to initialize 'newheader->count' which in turn
410  * is used to determine where in the cycle rrset-order cyclic starts.
411  * We don't lock this as we don't care about simultaneous updates.
412  */
413 static atomic_uint_fast16_t init_count = 0;
414 
415 /*
416  * Locking
417  *
418  * If a routine is going to lock more than one lock in this module, then
419  * the locking must be done in the following order:
420  *
421  *      Tree Lock
422  *
423  *      Node Lock       (Only one from the set may be locked at one time by
424  *                       any caller)
425  *
426  *      Database Lock
427  *
428  * Failure to follow this hierarchy can result in deadlock.
429  */
430 
431 /*%
432  * Routines for LRU-based cache management.
433  */
434 
435 /*%
436  * See if a given cache entry that is being reused needs to be updated
437  * in the LRU-list.  From the LRU management point of view, this function is
438  * expected to return true for almost all cases.  When used with threads,
439  * however, this may cause a non-negligible performance penalty because a
440  * writer lock will have to be acquired before updating the list.
441  * If DNS_QPDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
442  * function returns true if the entry has not been updated for some period of
443  * time.  We differentiate the NS or glue address case and the others since
444  * experiments have shown that the former tends to be accessed relatively
445  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
446  * may cause external queries at a higher level zone, involving more
447  * transactions).
448  *
449  * Caller must hold the node (read or write) lock.
450  */
451 static bool
452 need_headerupdate(dns_slabheader_t *header, isc_stdtime_t now) {
453 	if (DNS_SLABHEADER_GETATTR(header, (DNS_SLABHEADERATTR_NONEXISTENT |
454 					    DNS_SLABHEADERATTR_ANCIENT |
455 					    DNS_SLABHEADERATTR_ZEROTTL)) != 0)
456 	{
457 		return false;
458 	}
459 
460 #if DNS_QPDB_LIMITLRUUPDATE
461 	if (header->type == dns_rdatatype_ns ||
462 	    (header->trust == dns_trust_glue &&
463 	     (header->type == dns_rdatatype_a ||
464 	      header->type == dns_rdatatype_aaaa)))
465 	{
466 		/*
467 		 * Glue records are updated if at least DNS_QPDB_LRUUPDATE_GLUE
468 		 * seconds have passed since the previous update time.
469 		 */
470 		return header->last_used + DNS_QPDB_LRUUPDATE_GLUE <= now;
471 	}
472 
473 	/*
474 	 * Other records are updated if DNS_QPDB_LRUUPDATE_REGULAR seconds
475 	 * have passed.
476 	 */
477 	return header->last_used + DNS_QPDB_LRUUPDATE_REGULAR <= now;
478 #else
479 	UNUSED(now);
480 
481 	return true;
482 #endif /* if DNS_QPDB_LIMITLRUUPDATE */
483 }
484 
485 /*%
486  * Update the timestamp of a given cache entry and move it to the head
487  * of the corresponding LRU list.
488  *
489  * Caller must hold the node (write) lock.
490  *
491  * Note that the we do NOT touch the heap here, as the TTL has not changed.
492  */
493 static void
494 update_header(qpcache_t *qpdb, dns_slabheader_t *header, isc_stdtime_t now) {
495 	/* To be checked: can we really assume this? XXXMLG */
496 	INSIST(ISC_LINK_LINKED(header, link));
497 
498 	ISC_LIST_UNLINK(qpdb->lru[HEADERNODE(header)->locknum], header, link);
499 	header->last_used = now;
500 	ISC_LIST_PREPEND(qpdb->lru[HEADERNODE(header)->locknum], header, link);
501 }
502 
503 /*
504  * Locking:
505  * If a routine is going to lock more than one lock in this module, then
506  * the locking must be done in the following order:
507  *
508  *      Tree Lock
509  *
510  *      Node Lock       (Only one from the set may be locked at one time by
511  *                       any caller)
512  *
513  *      Database Lock
514  *
515  * Failure to follow this hierarchy can result in deadlock.
516  *
517  * Deleting Nodes:
518  * For zone databases the node for the origin of the zone MUST NOT be deleted.
519  */
520 
521 /*
522  * DB Routines
523  */
524 
525 static void
526 clean_stale_headers(dns_slabheader_t *top) {
527 	dns_slabheader_t *d = NULL, *down_next = NULL;
528 
529 	for (d = top->down; d != NULL; d = down_next) {
530 		down_next = d->down;
531 		dns_slabheader_destroy(&d);
532 	}
533 	top->down = NULL;
534 }
535 
536 static void
537 clean_cache_node(qpcache_t *qpdb, qpcnode_t *node) {
538 	dns_slabheader_t *current = NULL, *top_prev = NULL, *top_next = NULL;
539 
540 	/*
541 	 * Caller must be holding the node lock.
542 	 */
543 
544 	for (current = node->data; current != NULL; current = top_next) {
545 		top_next = current->next;
546 		clean_stale_headers(current);
547 		/*
548 		 * If current is nonexistent, ancient, or stale and
549 		 * we are not keeping stale, we can clean it up.
550 		 */
551 		if (NONEXISTENT(current) || ANCIENT(current) ||
552 		    (STALE(current) && !KEEPSTALE(qpdb)))
553 		{
554 			if (top_prev != NULL) {
555 				top_prev->next = current->next;
556 			} else {
557 				node->data = current->next;
558 			}
559 			dns_slabheader_destroy(&current);
560 		} else {
561 			top_prev = current;
562 		}
563 	}
564 	node->dirty = 0;
565 }
566 
567 /*
568  * tree_lock(write) must be held.
569  */
570 static void
571 delete_node(qpcache_t *qpdb, qpcnode_t *node) {
572 	isc_result_t result = ISC_R_UNEXPECTED;
573 
574 	if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
575 		char printname[DNS_NAME_FORMATSIZE];
576 		dns_name_format(&node->name, printname, sizeof(printname));
577 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
578 			      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
579 			      "delete_node(): %p %s (bucket %d)", node,
580 			      printname, node->locknum);
581 	}
582 
583 	switch (node->nsec) {
584 	case DNS_DB_NSEC_HAS_NSEC:
585 		/*
586 		 * Delete the corresponding node from the auxiliary NSEC
587 		 * tree before deleting from the main tree.
588 		 */
589 		result = dns_qp_deletename(qpdb->nsec, &node->name, NULL, NULL);
590 		if (result != ISC_R_SUCCESS) {
591 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
592 				      DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
593 				      "delete_node(): "
594 				      "dns_qp_deletename: %s",
595 				      isc_result_totext(result));
596 		}
597 		/* FALLTHROUGH */
598 	case DNS_DB_NSEC_NORMAL:
599 		result = dns_qp_deletename(qpdb->tree, &node->name, NULL, NULL);
600 		break;
601 	case DNS_DB_NSEC_NSEC:
602 		result = dns_qp_deletename(qpdb->nsec, &node->name, NULL, NULL);
603 		break;
604 	}
605 	if (result != ISC_R_SUCCESS) {
606 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
607 			      DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
608 			      "delete_node(): "
609 			      "dns_qp_deletename: %s",
610 			      isc_result_totext(result));
611 	}
612 }
613 
614 /*
615  * The caller must specify its currect node and tree lock status.
616  * It's okay for neither lock to be held if there are existing external
617  * references to the node, but if this is the first external reference,
618  * then the caller must be holding at least one lock.
619  */
620 static void
621 newref(qpcache_t *qpdb, qpcnode_t *node, isc_rwlocktype_t nlocktype,
622        isc_rwlocktype_t tlocktype DNS__DB_FLARG) {
623 	uint_fast32_t refs;
624 
625 	qpcnode_ref(node);
626 	refs = isc_refcount_increment0(&node->erefs);
627 
628 #if DNS_DB_NODETRACE
629 	fprintf(stderr, "incr:node:%s:%s:%u:%p->erefs = %" PRIuFAST32 "\n",
630 		func, file, line, node, refs + 1);
631 #endif
632 
633 	if (refs == 0) {
634 		/*
635 		 * this is the first external reference to the node.
636 		 *
637 		 * we need to hold the node or tree lock to avoid
638 		 * incrementing the reference count while also deleting
639 		 * the node. delete_node() is always protected by both
640 		 * tree and node locks being write-locked.
641 		 */
642 		INSIST(nlocktype != isc_rwlocktype_none ||
643 		       tlocktype != isc_rwlocktype_none);
644 
645 		refs = isc_refcount_increment0(
646 			&qpdb->node_locks[node->locknum].references);
647 #if DNS_DB_NODETRACE
648 		fprintf(stderr,
649 			"incr:nodelock:%s:%s:%u:%p:%p->references = "
650 			"%" PRIuFAST32 "\n",
651 			func, file, line, node,
652 			&qpdb->node_locks[node->locknum], refs + 1);
653 #else
654 		UNUSED(refs);
655 #endif
656 	}
657 }
658 
659 static void
660 cleanup_deadnodes(void *arg);
661 
662 /*
663  * Caller must be holding the node lock; either the read or write lock.
664  * Note that the lock must be held even when node references are
665  * atomically modified; in that case the decrement operation itself does not
666  * have to be protected, but we must avoid a race condition where multiple
667  * threads are decreasing the reference to zero simultaneously and at least
668  * one of them is going to free the node.
669  *
670  * This decrements both the internal and external node reference counters.
671  * If the external reference count drops to zero, then the node lock
672  * reference count is also decremented.
673  *
674  * This function returns true if and only if the node reference decreases
675  * to zero.  (NOTE: Decrementing the reference count of a node to zero does
676  * not mean it will be immediately freed.)
677  */
678 static bool
679 decref(qpcache_t *qpdb, qpcnode_t *node, isc_rwlocktype_t *nlocktypep,
680        isc_rwlocktype_t *tlocktypep, bool tryupgrade DNS__DB_FLARG) {
681 	isc_result_t result;
682 	bool locked = *tlocktypep != isc_rwlocktype_none;
683 	bool write_locked = false;
684 	db_nodelock_t *nodelock = NULL;
685 	int bucket = node->locknum;
686 	uint_fast32_t refs;
687 
688 	REQUIRE(*nlocktypep != isc_rwlocktype_none);
689 
690 	nodelock = &qpdb->node_locks[bucket];
691 
692 #define KEEP_NODE(n, r) ((n)->data != NULL || (n) == (r)->origin_node)
693 
694 	/* Handle easy and typical case first. */
695 	if (!node->dirty && KEEP_NODE(node, qpdb)) {
696 		bool no_reference = false;
697 
698 		refs = isc_refcount_decrement(&node->erefs);
699 #if DNS_DB_NODETRACE
700 		fprintf(stderr,
701 			"decr:node:%s:%s:%u:%p->erefs = %" PRIuFAST32 "\n",
702 			func, file, line, node, refs - 1);
703 #else
704 		UNUSED(refs);
705 #endif
706 		if (refs == 1) {
707 			refs = isc_refcount_decrement(&nodelock->references);
708 #if DNS_DB_NODETRACE
709 			fprintf(stderr,
710 				"decr:nodelock:%s:%s:%u:%p:%p->references = "
711 				"%" PRIuFAST32 "\n",
712 				func, file, line, node, nodelock, refs - 1);
713 #else
714 			UNUSED(refs);
715 #endif
716 			no_reference = true;
717 		}
718 
719 		qpcnode_unref(node);
720 		return no_reference;
721 	}
722 
723 	/* Upgrade the lock? */
724 	if (*nlocktypep == isc_rwlocktype_read) {
725 		NODE_FORCEUPGRADE(&nodelock->lock, nlocktypep);
726 	}
727 
728 	refs = isc_refcount_decrement(&node->erefs);
729 #if DNS_DB_NODETRACE
730 	fprintf(stderr, "decr:node:%s:%s:%u:%p->erefs = %" PRIuFAST32 "\n",
731 		func, file, line, node, refs - 1);
732 #endif
733 
734 	if (refs > 1) {
735 		qpcnode_unref(node);
736 		return false;
737 	}
738 
739 	INSIST(refs == 1);
740 
741 	if (node->dirty) {
742 		clean_cache_node(qpdb, node);
743 	}
744 
745 	/*
746 	 * Attempt to switch to a write lock on the tree.  If this fails,
747 	 * we will add this node to a linked list of nodes in this locking
748 	 * bucket which we will free later.
749 	 *
750 	 * Locking hierarchy notwithstanding, we don't need to free
751 	 * the node lock before acquiring the tree write lock because
752 	 * we only do a trylock.
753 	 */
754 	/* We are allowed to upgrade the tree lock */
755 
756 	switch (*tlocktypep) {
757 	case isc_rwlocktype_write:
758 		result = ISC_R_SUCCESS;
759 		break;
760 	case isc_rwlocktype_read:
761 		if (tryupgrade) {
762 			result = TREE_TRYUPGRADE(&qpdb->tree_lock, tlocktypep);
763 		} else {
764 			result = ISC_R_LOCKBUSY;
765 		}
766 		break;
767 	case isc_rwlocktype_none:
768 		result = TREE_TRYWRLOCK(&qpdb->tree_lock, tlocktypep);
769 		break;
770 	default:
771 		UNREACHABLE();
772 	}
773 	RUNTIME_CHECK(result == ISC_R_SUCCESS || result == ISC_R_LOCKBUSY);
774 	if (result == ISC_R_SUCCESS) {
775 		write_locked = true;
776 	}
777 
778 	refs = isc_refcount_decrement(&nodelock->references);
779 #if DNS_DB_NODETRACE
780 	fprintf(stderr,
781 		"decr:nodelock:%s:%s:%u:%p:%p->references = %" PRIuFAST32 "\n",
782 		func, file, line, node, nodelock, refs - 1);
783 #else
784 	UNUSED(refs);
785 #endif
786 
787 	if (KEEP_NODE(node, qpdb)) {
788 		goto restore_locks;
789 	}
790 
791 #undef KEEP_NODE
792 
793 	if (write_locked) {
794 		/*
795 		 * We can now delete the node.
796 		 */
797 		delete_node(qpdb, node);
798 	} else {
799 		newref(qpdb, node, *nlocktypep, *tlocktypep DNS__DB_FLARG_PASS);
800 
801 		isc_queue_node_init(&node->deadlink);
802 		if (!isc_queue_enqueue_entry(&qpdb->deadnodes[bucket], node,
803 					     deadlink))
804 		{
805 			/* Queue was empty, trigger new cleaning */
806 			isc_loop_t *loop = isc_loop_get(qpdb->loopmgr, bucket);
807 
808 			isc_async_run(loop, cleanup_deadnodes, qpdb);
809 		}
810 	}
811 
812 restore_locks:
813 	/*
814 	 * Relock a read lock, or unlock the write lock if no lock was held.
815 	 */
816 	if (!locked && write_locked) {
817 		TREE_UNLOCK(&qpdb->tree_lock, tlocktypep);
818 	}
819 
820 	qpcnode_unref(node);
821 	return true;
822 }
823 
824 static void
825 update_rrsetstats(dns_stats_t *stats, const dns_typepair_t htype,
826 		  const uint_least16_t hattributes, const bool increment) {
827 	dns_rdatastatstype_t statattributes = 0;
828 	dns_rdatastatstype_t base = 0;
829 	dns_rdatastatstype_t type;
830 	dns_slabheader_t *header = &(dns_slabheader_t){
831 		.type = htype,
832 		.attributes = hattributes,
833 	};
834 
835 	if (!EXISTS(header) || !STATCOUNT(header)) {
836 		return;
837 	}
838 
839 	if (NEGATIVE(header)) {
840 		if (NXDOMAIN(header)) {
841 			statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
842 		} else {
843 			statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
844 			base = DNS_TYPEPAIR_COVERS(header->type);
845 		}
846 	} else {
847 		base = DNS_TYPEPAIR_TYPE(header->type);
848 	}
849 
850 	if (STALE(header)) {
851 		statattributes |= DNS_RDATASTATSTYPE_ATTR_STALE;
852 	}
853 	if (ANCIENT(header)) {
854 		statattributes |= DNS_RDATASTATSTYPE_ATTR_ANCIENT;
855 	}
856 
857 	type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
858 	if (increment) {
859 		dns_rdatasetstats_increment(stats, type);
860 	} else {
861 		dns_rdatasetstats_decrement(stats, type);
862 	}
863 }
864 
865 static void
866 mark(dns_slabheader_t *header, uint_least16_t flag) {
867 	uint_least16_t attributes = atomic_load_acquire(&header->attributes);
868 	uint_least16_t newattributes = 0;
869 	dns_stats_t *stats = NULL;
870 
871 	/*
872 	 * If we are already ancient there is nothing to do.
873 	 */
874 	do {
875 		if ((attributes & flag) != 0) {
876 			return;
877 		}
878 		newattributes = attributes | flag;
879 	} while (!atomic_compare_exchange_weak_acq_rel(
880 		&header->attributes, &attributes, newattributes));
881 
882 	/*
883 	 * Decrement and increment the stats counter for the appropriate
884 	 * RRtype.
885 	 */
886 	stats = dns_db_getrrsetstats(header->db);
887 	if (stats != NULL) {
888 		update_rrsetstats(stats, header->type, attributes, false);
889 		update_rrsetstats(stats, header->type, newattributes, true);
890 	}
891 }
892 
893 static void
894 setttl(dns_slabheader_t *header, dns_ttl_t newttl) {
895 	dns_ttl_t oldttl = header->ttl;
896 
897 	header->ttl = newttl;
898 
899 	if (header->db == NULL || !dns_db_iscache(header->db)) {
900 		return;
901 	}
902 
903 	/*
904 	 * This is a cache. Adjust the heaps if necessary.
905 	 */
906 	if (header->heap == NULL || header->heap_index == 0 || newttl == oldttl)
907 	{
908 		return;
909 	}
910 
911 	if (newttl < oldttl) {
912 		isc_heap_increased(header->heap, header->heap_index);
913 	} else {
914 		isc_heap_decreased(header->heap, header->heap_index);
915 	}
916 
917 	if (newttl == 0) {
918 		isc_heap_delete(header->heap, header->heap_index);
919 	}
920 }
921 
922 /*
923  * Caller must hold the node (write) lock.
924  */
925 static void
926 expireheader(dns_slabheader_t *header, isc_rwlocktype_t *nlocktypep,
927 	     isc_rwlocktype_t *tlocktypep, dns_expire_t reason DNS__DB_FLARG) {
928 	setttl(header, 0);
929 	mark(header, DNS_SLABHEADERATTR_ANCIENT);
930 	HEADERNODE(header)->dirty = 1;
931 
932 	if (isc_refcount_current(&HEADERNODE(header)->erefs) == 0) {
933 		qpcache_t *qpdb = (qpcache_t *)header->db;
934 
935 		/*
936 		 * If no one else is using the node, we can clean it up now.
937 		 * We first need to gain a new reference to the node to meet a
938 		 * requirement of decref().
939 		 */
940 		newref(qpdb, HEADERNODE(header), *nlocktypep,
941 		       *tlocktypep DNS__DB_FLARG_PASS);
942 		decref(qpdb, HEADERNODE(header), nlocktypep, tlocktypep,
943 		       true DNS__DB_FLARG_PASS);
944 
945 		if (qpdb->cachestats == NULL) {
946 			return;
947 		}
948 
949 		switch (reason) {
950 		case dns_expire_ttl:
951 			isc_stats_increment(qpdb->cachestats,
952 					    dns_cachestatscounter_deletettl);
953 			break;
954 		case dns_expire_lru:
955 			isc_stats_increment(qpdb->cachestats,
956 					    dns_cachestatscounter_deletelru);
957 			break;
958 		default:
959 			break;
960 		}
961 	}
962 }
963 
964 static void
965 update_cachestats(qpcache_t *qpdb, isc_result_t result) {
966 	if (qpdb->cachestats == NULL) {
967 		return;
968 	}
969 
970 	switch (result) {
971 	case DNS_R_COVERINGNSEC:
972 		isc_stats_increment(qpdb->cachestats,
973 				    dns_cachestatscounter_coveringnsec);
974 		FALLTHROUGH;
975 	case ISC_R_SUCCESS:
976 	case DNS_R_CNAME:
977 	case DNS_R_DNAME:
978 	case DNS_R_DELEGATION:
979 	case DNS_R_NCACHENXDOMAIN:
980 	case DNS_R_NCACHENXRRSET:
981 		isc_stats_increment(qpdb->cachestats,
982 				    dns_cachestatscounter_hits);
983 		break;
984 	default:
985 		isc_stats_increment(qpdb->cachestats,
986 				    dns_cachestatscounter_misses);
987 	}
988 }
989 
990 static void
991 bindrdataset(qpcache_t *qpdb, qpcnode_t *node, dns_slabheader_t *header,
992 	     isc_stdtime_t now, isc_rwlocktype_t nlocktype,
993 	     isc_rwlocktype_t tlocktype,
994 	     dns_rdataset_t *rdataset DNS__DB_FLARG) {
995 	bool stale = STALE(header);
996 	bool ancient = ANCIENT(header);
997 
998 	/*
999 	 * Caller must be holding the node reader lock.
1000 	 * XXXJT: technically, we need a writer lock, since we'll increment
1001 	 * the header count below.  However, since the actual counter value
1002 	 * doesn't matter, we prioritize performance here.  (We may want to
1003 	 * use atomic increment when available).
1004 	 */
1005 
1006 	if (rdataset == NULL) {
1007 		return;
1008 	}
1009 
1010 	newref(qpdb, node, nlocktype, tlocktype DNS__DB_FLARG_PASS);
1011 
1012 	INSIST(rdataset->methods == NULL); /* We must be disassociated. */
1013 
1014 	/*
1015 	 * Mark header stale or ancient if the RRset is no longer active.
1016 	 */
1017 	if (!ACTIVE(header, now)) {
1018 		dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, qpdb);
1019 		/*
1020 		 * If this data is in the stale window keep it and if
1021 		 * DNS_DBFIND_STALEOK is not set we tell the caller to
1022 		 * skip this record.  We skip the records with ZEROTTL
1023 		 * (these records should not be cached anyway).
1024 		 */
1025 
1026 		if (KEEPSTALE(qpdb) && stale_ttl > now) {
1027 			stale = true;
1028 		} else {
1029 			/*
1030 			 * We are not keeping stale, or it is outside the
1031 			 * stale window. Mark ancient, i.e. ready for cleanup.
1032 			 */
1033 			ancient = true;
1034 		}
1035 	}
1036 
1037 	rdataset->methods = &dns_rdataslab_rdatasetmethods;
1038 	rdataset->rdclass = qpdb->common.rdclass;
1039 	rdataset->type = DNS_TYPEPAIR_TYPE(header->type);
1040 	rdataset->covers = DNS_TYPEPAIR_COVERS(header->type);
1041 	rdataset->ttl = header->ttl - now;
1042 	rdataset->trust = header->trust;
1043 	rdataset->resign = 0;
1044 
1045 	if (NEGATIVE(header)) {
1046 		rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
1047 	}
1048 	if (NXDOMAIN(header)) {
1049 		rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
1050 	}
1051 	if (OPTOUT(header)) {
1052 		rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
1053 	}
1054 	if (PREFETCH(header)) {
1055 		rdataset->attributes |= DNS_RDATASETATTR_PREFETCH;
1056 	}
1057 
1058 	if (stale && !ancient) {
1059 		dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, qpdb);
1060 		if (stale_ttl > now) {
1061 			rdataset->ttl = stale_ttl - now;
1062 		} else {
1063 			rdataset->ttl = 0;
1064 		}
1065 		if (STALE_WINDOW(header)) {
1066 			rdataset->attributes |= DNS_RDATASETATTR_STALE_WINDOW;
1067 		}
1068 		rdataset->attributes |= DNS_RDATASETATTR_STALE;
1069 	} else if (!ACTIVE(header, now)) {
1070 		rdataset->attributes |= DNS_RDATASETATTR_ANCIENT;
1071 		rdataset->ttl = header->ttl;
1072 	}
1073 
1074 	rdataset->count = atomic_fetch_add_relaxed(&header->count, 1);
1075 
1076 	rdataset->slab.db = (dns_db_t *)qpdb;
1077 	rdataset->slab.node = (dns_dbnode_t *)node;
1078 	rdataset->slab.raw = dns_slabheader_raw(header);
1079 	rdataset->slab.iter_pos = NULL;
1080 	rdataset->slab.iter_count = 0;
1081 
1082 	/*
1083 	 * Add noqname proof.
1084 	 */
1085 	rdataset->slab.noqname = header->noqname;
1086 	if (header->noqname != NULL) {
1087 		rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
1088 	}
1089 	rdataset->slab.closest = header->closest;
1090 	if (header->closest != NULL) {
1091 		rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
1092 	}
1093 }
1094 
1095 static isc_result_t
1096 setup_delegation(qpc_search_t *search, dns_dbnode_t **nodep,
1097 		 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset,
1098 		 isc_rwlocktype_t tlocktype DNS__DB_FLARG) {
1099 	dns_typepair_t type;
1100 	qpcnode_t *node = NULL;
1101 
1102 	REQUIRE(search != NULL);
1103 	REQUIRE(search->zonecut != NULL);
1104 	REQUIRE(search->zonecut_header != NULL);
1105 
1106 	/*
1107 	 * The caller MUST NOT be holding any node locks.
1108 	 */
1109 
1110 	node = search->zonecut;
1111 	type = search->zonecut_header->type;
1112 
1113 	if (nodep != NULL) {
1114 		/*
1115 		 * Note that we don't have to increment the node's reference
1116 		 * count here because we're going to use the reference we
1117 		 * already have in the search block.
1118 		 */
1119 		*nodep = node;
1120 		search->need_cleanup = false;
1121 	}
1122 	if (rdataset != NULL) {
1123 		isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
1124 		NODE_RDLOCK(&(search->qpdb->node_locks[node->locknum].lock),
1125 			    &nlocktype);
1126 		bindrdataset(search->qpdb, node, search->zonecut_header,
1127 			     search->now, nlocktype, tlocktype,
1128 			     rdataset DNS__DB_FLARG_PASS);
1129 		if (sigrdataset != NULL && search->zonecut_sigheader != NULL) {
1130 			bindrdataset(search->qpdb, node,
1131 				     search->zonecut_sigheader, search->now,
1132 				     nlocktype, tlocktype,
1133 				     sigrdataset DNS__DB_FLARG_PASS);
1134 		}
1135 		NODE_UNLOCK(&(search->qpdb->node_locks[node->locknum].lock),
1136 			    &nlocktype);
1137 	}
1138 
1139 	if (type == dns_rdatatype_dname) {
1140 		return DNS_R_DNAME;
1141 	}
1142 	return DNS_R_DELEGATION;
1143 }
1144 
1145 static bool
1146 check_stale_header(qpcnode_t *node, dns_slabheader_t *header,
1147 		   isc_rwlocktype_t *nlocktypep, isc_rwlock_t *lock,
1148 		   qpc_search_t *search, dns_slabheader_t **header_prev) {
1149 	if (!ACTIVE(header, search->now)) {
1150 		dns_ttl_t stale = header->ttl + STALE_TTL(header, search->qpdb);
1151 		/*
1152 		 * If this data is in the stale window keep it and if
1153 		 * DNS_DBFIND_STALEOK is not set we tell the caller to
1154 		 * skip this record.  We skip the records with ZEROTTL
1155 		 * (these records should not be cached anyway).
1156 		 */
1157 
1158 		DNS_SLABHEADER_CLRATTR(header, DNS_SLABHEADERATTR_STALE_WINDOW);
1159 		if (!ZEROTTL(header) && KEEPSTALE(search->qpdb) &&
1160 		    stale > search->now)
1161 		{
1162 			mark(header, DNS_SLABHEADERATTR_STALE);
1163 			*header_prev = header;
1164 			/*
1165 			 * If DNS_DBFIND_STALESTART is set then it means we
1166 			 * failed to resolve the name during recursion, in
1167 			 * this case we mark the time in which the refresh
1168 			 * failed.
1169 			 */
1170 			if ((search->options & DNS_DBFIND_STALESTART) != 0) {
1171 				atomic_store_release(
1172 					&header->last_refresh_fail_ts,
1173 					search->now);
1174 			} else if ((search->options &
1175 				    DNS_DBFIND_STALEENABLED) != 0 &&
1176 				   search->now <
1177 					   (atomic_load_acquire(
1178 						    &header->last_refresh_fail_ts) +
1179 					    search->qpdb->serve_stale_refresh))
1180 			{
1181 				/*
1182 				 * If we are within interval between last
1183 				 * refresh failure time + 'stale-refresh-time',
1184 				 * then don't skip this stale entry but use it
1185 				 * instead.
1186 				 */
1187 				DNS_SLABHEADER_SETATTR(
1188 					header,
1189 					DNS_SLABHEADERATTR_STALE_WINDOW);
1190 				return false;
1191 			} else if ((search->options &
1192 				    DNS_DBFIND_STALETIMEOUT) != 0)
1193 			{
1194 				/*
1195 				 * We want stale RRset due to timeout, so we
1196 				 * don't skip it.
1197 				 */
1198 				return false;
1199 			}
1200 			return (search->options & DNS_DBFIND_STALEOK) == 0;
1201 		}
1202 
1203 		/*
1204 		 * This rdataset is stale.  If no one else is using the
1205 		 * node, we can clean it up right now, otherwise we mark
1206 		 * it as ancient, and the node as dirty, so it will get
1207 		 * cleaned up later.
1208 		 */
1209 		if ((header->ttl < search->now - QPDB_VIRTUAL) &&
1210 		    (*nlocktypep == isc_rwlocktype_write ||
1211 		     NODE_TRYUPGRADE(lock, nlocktypep) == ISC_R_SUCCESS))
1212 		{
1213 			/*
1214 			 * We update the node's status only when we can
1215 			 * get write access; otherwise, we leave others
1216 			 * to this work.  Periodical cleaning will
1217 			 * eventually take the job as the last resort.
1218 			 * We won't downgrade the lock, since other
1219 			 * rdatasets are probably stale, too.
1220 			 */
1221 
1222 			if (isc_refcount_current(&node->references) == 0) {
1223 				/*
1224 				 * header->down can be non-NULL if the
1225 				 * refcount has just decremented to 0
1226 				 * but decref() has not
1227 				 * performed clean_cache_node(), in
1228 				 * which case we need to purge the stale
1229 				 * headers first.
1230 				 */
1231 				clean_stale_headers(header);
1232 				if (*header_prev != NULL) {
1233 					(*header_prev)->next = header->next;
1234 				} else {
1235 					node->data = header->next;
1236 				}
1237 				dns_slabheader_destroy(&header);
1238 			} else {
1239 				mark(header, DNS_SLABHEADERATTR_ANCIENT);
1240 				HEADERNODE(header)->dirty = 1;
1241 				*header_prev = header;
1242 			}
1243 		} else {
1244 			*header_prev = header;
1245 		}
1246 		return true;
1247 	}
1248 	return false;
1249 }
1250 
1251 static isc_result_t
1252 check_zonecut(qpcnode_t *node, void *arg DNS__DB_FLARG) {
1253 	qpc_search_t *search = arg;
1254 	dns_slabheader_t *header = NULL;
1255 	dns_slabheader_t *header_prev = NULL, *header_next = NULL;
1256 	dns_slabheader_t *dname_header = NULL, *sigdname_header = NULL;
1257 	isc_result_t result;
1258 	isc_rwlock_t *lock = NULL;
1259 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
1260 
1261 	REQUIRE(search->zonecut == NULL);
1262 
1263 	lock = &(search->qpdb->node_locks[node->locknum].lock);
1264 	NODE_RDLOCK(lock, &nlocktype);
1265 
1266 	/*
1267 	 * Look for a DNAME or RRSIG DNAME rdataset.
1268 	 */
1269 	for (header = node->data; header != NULL; header = header_next) {
1270 		header_next = header->next;
1271 		if (check_stale_header(node, header, &nlocktype, lock, search,
1272 				       &header_prev))
1273 		{
1274 			/* Do nothing. */
1275 		} else if (header->type == dns_rdatatype_dname &&
1276 			   EXISTS(header) && !ANCIENT(header))
1277 		{
1278 			dname_header = header;
1279 			header_prev = header;
1280 		} else if (header->type == DNS_SIGTYPE(dns_rdatatype_dname) &&
1281 			   EXISTS(header) && !ANCIENT(header))
1282 		{
1283 			sigdname_header = header;
1284 			header_prev = header;
1285 		} else {
1286 			header_prev = header;
1287 		}
1288 	}
1289 
1290 	if (dname_header != NULL &&
1291 	    (!DNS_TRUST_PENDING(dname_header->trust) ||
1292 	     (search->options & DNS_DBFIND_PENDINGOK) != 0))
1293 	{
1294 		/*
1295 		 * We increment the reference count on node to ensure that
1296 		 * search->zonecut_header will still be valid later.
1297 		 */
1298 		newref(search->qpdb, node, nlocktype,
1299 		       isc_rwlocktype_none DNS__DB_FLARG_PASS);
1300 		search->zonecut = node;
1301 		search->zonecut_header = dname_header;
1302 		search->zonecut_sigheader = sigdname_header;
1303 		search->need_cleanup = true;
1304 		result = DNS_R_PARTIALMATCH;
1305 	} else {
1306 		result = DNS_R_CONTINUE;
1307 	}
1308 
1309 	NODE_UNLOCK(lock, &nlocktype);
1310 
1311 	return result;
1312 }
1313 
1314 static isc_result_t
1315 find_deepest_zonecut(qpc_search_t *search, qpcnode_t *node,
1316 		     dns_dbnode_t **nodep, dns_name_t *foundname,
1317 		     dns_rdataset_t *rdataset,
1318 		     dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
1319 	isc_result_t result = ISC_R_NOTFOUND;
1320 	qpcache_t *qpdb = NULL;
1321 
1322 	/*
1323 	 * Caller must be holding the tree lock.
1324 	 */
1325 
1326 	qpdb = search->qpdb;
1327 
1328 	for (int i = dns_qpchain_length(&search->chain) - 1; i >= 0; i--) {
1329 		dns_slabheader_t *header = NULL;
1330 		dns_slabheader_t *header_prev = NULL, *header_next = NULL;
1331 		dns_slabheader_t *found = NULL, *foundsig = NULL;
1332 		isc_rwlock_t *lock = NULL;
1333 		isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
1334 
1335 		dns_qpchain_node(&search->chain, i, NULL, (void **)&node, NULL);
1336 		lock = &qpdb->node_locks[node->locknum].lock;
1337 
1338 		NODE_RDLOCK(lock, &nlocktype);
1339 
1340 		/*
1341 		 * Look for NS and RRSIG NS rdatasets.
1342 		 */
1343 		for (header = node->data; header != NULL; header = header_next)
1344 		{
1345 			header_next = header->next;
1346 			if (check_stale_header(node, header, &nlocktype, lock,
1347 					       search, &header_prev))
1348 			{
1349 				/* Do nothing. */
1350 			} else if (EXISTS(header) && !ANCIENT(header)) {
1351 				/*
1352 				 * We've found an extant rdataset.  See if
1353 				 * we're interested in it.
1354 				 */
1355 				if (header->type == dns_rdatatype_ns) {
1356 					found = header;
1357 					if (foundsig != NULL) {
1358 						break;
1359 					}
1360 				} else if (header->type ==
1361 					   DNS_SIGTYPE(dns_rdatatype_ns))
1362 				{
1363 					foundsig = header;
1364 					if (found != NULL) {
1365 						break;
1366 					}
1367 				}
1368 				header_prev = header;
1369 			} else {
1370 				header_prev = header;
1371 			}
1372 		}
1373 
1374 		if (found != NULL) {
1375 			/*
1376 			 * If we have to set foundname, we do it before
1377 			 * anything else.
1378 			 */
1379 			if (foundname != NULL) {
1380 				dns_name_copy(&node->name, foundname);
1381 			}
1382 			result = DNS_R_DELEGATION;
1383 			if (nodep != NULL) {
1384 				newref(search->qpdb, node, nlocktype,
1385 				       isc_rwlocktype_none DNS__DB_FLARG_PASS);
1386 				*nodep = node;
1387 			}
1388 			bindrdataset(search->qpdb, node, found, search->now,
1389 				     nlocktype, isc_rwlocktype_none,
1390 				     rdataset DNS__DB_FLARG_PASS);
1391 			if (foundsig != NULL) {
1392 				bindrdataset(search->qpdb, node, foundsig,
1393 					     search->now, nlocktype,
1394 					     isc_rwlocktype_none,
1395 					     sigrdataset DNS__DB_FLARG_PASS);
1396 			}
1397 			if (need_headerupdate(found, search->now) ||
1398 			    (foundsig != NULL &&
1399 			     need_headerupdate(foundsig, search->now)))
1400 			{
1401 				if (nlocktype != isc_rwlocktype_write) {
1402 					NODE_FORCEUPGRADE(lock, &nlocktype);
1403 					POST(nlocktype);
1404 				}
1405 				if (need_headerupdate(found, search->now)) {
1406 					update_header(search->qpdb, found,
1407 						      search->now);
1408 				}
1409 				if (foundsig != NULL &&
1410 				    need_headerupdate(foundsig, search->now))
1411 				{
1412 					update_header(search->qpdb, foundsig,
1413 						      search->now);
1414 				}
1415 			}
1416 		}
1417 
1418 		NODE_UNLOCK(lock, &nlocktype);
1419 
1420 		if (found != NULL) {
1421 			break;
1422 		}
1423 	}
1424 
1425 	return result;
1426 }
1427 
1428 /*
1429  * Look for a potentially covering NSEC in the cache where `name`
1430  * is known not to exist.  This uses the auxiliary NSEC tree to find
1431  * the potential NSEC owner. If found, we update 'foundname', 'nodep',
1432  * 'rdataset' and 'sigrdataset', and return DNS_R_COVERINGNSEC.
1433  * Otherwise, return ISC_R_NOTFOUND.
1434  */
1435 static isc_result_t
1436 find_coveringnsec(qpc_search_t *search, const dns_name_t *name,
1437 		  dns_dbnode_t **nodep, isc_stdtime_t now,
1438 		  dns_name_t *foundname, dns_rdataset_t *rdataset,
1439 		  dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
1440 	dns_fixedname_t fpredecessor, fixed;
1441 	dns_name_t *predecessor = NULL, *fname = NULL;
1442 	qpcnode_t *node = NULL;
1443 	dns_qpiter_t iter;
1444 	isc_result_t result;
1445 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
1446 	isc_rwlock_t *lock = NULL;
1447 	dns_typepair_t matchtype, sigmatchtype;
1448 	dns_slabheader_t *found = NULL, *foundsig = NULL;
1449 	dns_slabheader_t *header = NULL;
1450 	dns_slabheader_t *header_next = NULL, *header_prev = NULL;
1451 
1452 	/*
1453 	 * Look for the node in the auxilary tree.
1454 	 */
1455 	result = dns_qp_lookup(search->qpdb->nsec, name, NULL, &iter, NULL,
1456 			       (void **)&node, NULL);
1457 	if (result != DNS_R_PARTIALMATCH) {
1458 		return ISC_R_NOTFOUND;
1459 	}
1460 
1461 	fname = dns_fixedname_initname(&fixed);
1462 	predecessor = dns_fixedname_initname(&fpredecessor);
1463 	matchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_nsec, 0);
1464 	sigmatchtype = DNS_SIGTYPE(dns_rdatatype_nsec);
1465 
1466 	/*
1467 	 * Extract predecessor from iterator.
1468 	 */
1469 	result = dns_qpiter_current(&iter, predecessor, NULL, NULL);
1470 	if (result != ISC_R_SUCCESS) {
1471 		return ISC_R_NOTFOUND;
1472 	}
1473 
1474 	/*
1475 	 * Lookup the predecessor in the main tree.
1476 	 */
1477 	node = NULL;
1478 	result = dns_qp_getname(search->qpdb->tree, predecessor, (void **)&node,
1479 				NULL);
1480 	if (result != ISC_R_SUCCESS) {
1481 		return result;
1482 	}
1483 	dns_name_copy(&node->name, fname);
1484 
1485 	lock = &(search->qpdb->node_locks[node->locknum].lock);
1486 	NODE_RDLOCK(lock, &nlocktype);
1487 	for (header = node->data; header != NULL; header = header_next) {
1488 		header_next = header->next;
1489 		if (check_stale_header(node, header, &nlocktype, lock, search,
1490 				       &header_prev))
1491 		{
1492 			continue;
1493 		}
1494 		if (NONEXISTENT(header) || DNS_TYPEPAIR_TYPE(header->type) == 0)
1495 		{
1496 			header_prev = header;
1497 			continue;
1498 		}
1499 		if (header->type == matchtype) {
1500 			found = header;
1501 			if (foundsig != NULL) {
1502 				break;
1503 			}
1504 		} else if (header->type == sigmatchtype) {
1505 			foundsig = header;
1506 			if (found != NULL) {
1507 				break;
1508 			}
1509 		}
1510 		header_prev = header;
1511 	}
1512 	if (found != NULL) {
1513 		bindrdataset(search->qpdb, node, found, now, nlocktype,
1514 			     isc_rwlocktype_none, rdataset DNS__DB_FLARG_PASS);
1515 		if (foundsig != NULL) {
1516 			bindrdataset(search->qpdb, node, foundsig, now,
1517 				     nlocktype, isc_rwlocktype_none,
1518 				     sigrdataset DNS__DB_FLARG_PASS);
1519 		}
1520 		newref(search->qpdb, node, nlocktype,
1521 		       isc_rwlocktype_none DNS__DB_FLARG_PASS);
1522 
1523 		dns_name_copy(fname, foundname);
1524 
1525 		*nodep = node;
1526 		result = DNS_R_COVERINGNSEC;
1527 	} else {
1528 		result = ISC_R_NOTFOUND;
1529 	}
1530 	NODE_UNLOCK(lock, &nlocktype);
1531 	return result;
1532 }
1533 
1534 static isc_result_t
1535 find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
1536      dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
1537      dns_dbnode_t **nodep, dns_name_t *foundname, dns_rdataset_t *rdataset,
1538      dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
1539 	qpcnode_t *node = NULL;
1540 	isc_result_t result;
1541 	qpc_search_t search;
1542 	bool cname_ok = true;
1543 	bool found_noqname = false;
1544 	bool all_negative = true;
1545 	bool empty_node;
1546 	isc_rwlock_t *lock = NULL;
1547 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
1548 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
1549 	dns_slabheader_t *header = NULL;
1550 	dns_slabheader_t *header_prev = NULL, *header_next = NULL;
1551 	dns_slabheader_t *found = NULL, *nsheader = NULL;
1552 	dns_slabheader_t *foundsig = NULL, *nssig = NULL, *cnamesig = NULL;
1553 	dns_slabheader_t *update = NULL, *updatesig = NULL;
1554 	dns_slabheader_t *nsecheader = NULL, *nsecsig = NULL;
1555 	dns_typepair_t sigtype, negtype;
1556 
1557 	UNUSED(version);
1558 
1559 	REQUIRE(VALID_QPDB((qpcache_t *)db));
1560 	REQUIRE(version == NULL);
1561 
1562 	if (now == 0) {
1563 		now = isc_stdtime_now();
1564 	}
1565 
1566 	search = (qpc_search_t){
1567 		.qpdb = (qpcache_t *)db,
1568 		.options = options,
1569 		.now = now,
1570 	};
1571 
1572 	TREE_RDLOCK(&search.qpdb->tree_lock, &tlocktype);
1573 
1574 	/*
1575 	 * Search down from the root of the tree.
1576 	 */
1577 	result = dns_qp_lookup(search.qpdb->tree, name, NULL, NULL,
1578 			       &search.chain, (void **)&node, NULL);
1579 	if (result != ISC_R_NOTFOUND && foundname != NULL) {
1580 		dns_name_copy(&node->name, foundname);
1581 	}
1582 
1583 	/*
1584 	 * Check the QP chain to see if there's a node above us with a
1585 	 * active DNAME or NS rdatasets.
1586 	 *
1587 	 * We're only interested in nodes above QNAME, so if the result
1588 	 * was success, then we skip the last item in the chain.
1589 	 */
1590 	unsigned int len = dns_qpchain_length(&search.chain);
1591 	if (result == ISC_R_SUCCESS) {
1592 		len--;
1593 	}
1594 
1595 	for (unsigned int i = 0; i < len; i++) {
1596 		isc_result_t zcresult;
1597 		qpcnode_t *encloser = NULL;
1598 
1599 		dns_qpchain_node(&search.chain, i, NULL, (void **)&encloser,
1600 				 NULL);
1601 
1602 		zcresult = check_zonecut(encloser,
1603 					 (void *)&search DNS__DB_FLARG_PASS);
1604 		if (zcresult != DNS_R_CONTINUE) {
1605 			result = DNS_R_PARTIALMATCH;
1606 			search.chain.len = i - 1;
1607 			node = encloser;
1608 			if (foundname != NULL) {
1609 				dns_name_copy(&node->name, foundname);
1610 			}
1611 			break;
1612 		}
1613 	}
1614 
1615 	if (result == DNS_R_PARTIALMATCH) {
1616 		/*
1617 		 * If we discovered a covering DNAME skip looking for a covering
1618 		 * NSEC.
1619 		 */
1620 		if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 &&
1621 		    (search.zonecut_header == NULL ||
1622 		     search.zonecut_header->type != dns_rdatatype_dname))
1623 		{
1624 			result = find_coveringnsec(
1625 				&search, name, nodep, now, foundname, rdataset,
1626 				sigrdataset DNS__DB_FLARG_PASS);
1627 			if (result == DNS_R_COVERINGNSEC) {
1628 				goto tree_exit;
1629 			}
1630 		}
1631 		if (search.zonecut != NULL) {
1632 			result = setup_delegation(&search, nodep, rdataset,
1633 						  sigrdataset,
1634 						  tlocktype DNS__DB_FLARG_PASS);
1635 			goto tree_exit;
1636 		} else {
1637 		find_ns:
1638 			result = find_deepest_zonecut(
1639 				&search, node, nodep, foundname, rdataset,
1640 				sigrdataset DNS__DB_FLARG_PASS);
1641 			goto tree_exit;
1642 		}
1643 	} else if (result != ISC_R_SUCCESS) {
1644 		goto tree_exit;
1645 	}
1646 
1647 	/*
1648 	 * Certain DNSSEC types are not subject to CNAME matching
1649 	 * (RFC4035, section 2.5 and RFC3007).
1650 	 *
1651 	 * We don't check for RRSIG, because we don't store RRSIG records
1652 	 * directly.
1653 	 */
1654 	if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) {
1655 		cname_ok = false;
1656 	}
1657 
1658 	/*
1659 	 * We now go looking for rdata...
1660 	 */
1661 
1662 	lock = &(search.qpdb->node_locks[node->locknum].lock);
1663 	NODE_RDLOCK(lock, &nlocktype);
1664 
1665 	/*
1666 	 * These pointers need to be reset here in case we did
1667 	 * 'goto find_ns' from somewhere below.
1668 	 */
1669 	found = NULL;
1670 	foundsig = NULL;
1671 	sigtype = DNS_SIGTYPE(type);
1672 	negtype = DNS_TYPEPAIR_VALUE(0, type);
1673 	nsheader = NULL;
1674 	nsecheader = NULL;
1675 	nssig = NULL;
1676 	nsecsig = NULL;
1677 	cnamesig = NULL;
1678 	empty_node = true;
1679 	header_prev = NULL;
1680 	for (header = node->data; header != NULL; header = header_next) {
1681 		header_next = header->next;
1682 		if (check_stale_header(node, header, &nlocktype, lock, &search,
1683 				       &header_prev))
1684 		{
1685 			/* Do nothing. */
1686 		} else if (EXISTS(header) && !ANCIENT(header)) {
1687 			/*
1688 			 * We now know that there is at least one active
1689 			 * non-stale rdataset at this node.
1690 			 */
1691 			empty_node = false;
1692 			if (header->noqname != NULL &&
1693 			    header->trust == dns_trust_secure)
1694 			{
1695 				found_noqname = true;
1696 			}
1697 			if (!NEGATIVE(header)) {
1698 				all_negative = false;
1699 			}
1700 
1701 			/*
1702 			 * If we found a type we were looking for, remember
1703 			 * it.
1704 			 */
1705 			if (header->type == type ||
1706 			    (type == dns_rdatatype_any &&
1707 			     DNS_TYPEPAIR_TYPE(header->type) != 0) ||
1708 			    (cname_ok && header->type == dns_rdatatype_cname))
1709 			{
1710 				/*
1711 				 * We've found the answer.
1712 				 */
1713 				found = header;
1714 				if (header->type == dns_rdatatype_cname &&
1715 				    cname_ok)
1716 				{
1717 					/*
1718 					 * If we've already got the
1719 					 * CNAME RRSIG, use it.
1720 					 */
1721 					if (cnamesig != NULL) {
1722 						foundsig = cnamesig;
1723 					} else {
1724 						sigtype = DNS_SIGTYPE(
1725 							dns_rdatatype_cname);
1726 					}
1727 				}
1728 			} else if (header->type == sigtype) {
1729 				/*
1730 				 * We've found the RRSIG rdataset for our
1731 				 * target type.  Remember it.
1732 				 */
1733 				foundsig = header;
1734 			} else if (header->type == RDATATYPE_NCACHEANY ||
1735 				   header->type == negtype)
1736 			{
1737 				/*
1738 				 * We've found a negative cache entry.
1739 				 */
1740 				found = header;
1741 			} else if (header->type == dns_rdatatype_ns) {
1742 				/*
1743 				 * Remember a NS rdataset even if we're
1744 				 * not specifically looking for it, because
1745 				 * we might need it later.
1746 				 */
1747 				nsheader = header;
1748 			} else if (header->type ==
1749 				   DNS_SIGTYPE(dns_rdatatype_ns))
1750 			{
1751 				/*
1752 				 * If we need the NS rdataset, we'll also
1753 				 * need its signature.
1754 				 */
1755 				nssig = header;
1756 			} else if (header->type == dns_rdatatype_nsec) {
1757 				nsecheader = header;
1758 			} else if (header->type ==
1759 				   DNS_SIGTYPE(dns_rdatatype_nsec))
1760 			{
1761 				nsecsig = header;
1762 			} else if (cname_ok &&
1763 				   header->type ==
1764 					   DNS_SIGTYPE(dns_rdatatype_cname))
1765 			{
1766 				/*
1767 				 * If we get a CNAME match, we'll also need
1768 				 * its signature.
1769 				 */
1770 				cnamesig = header;
1771 			}
1772 			header_prev = header;
1773 		} else {
1774 			header_prev = header;
1775 		}
1776 	}
1777 
1778 	if (empty_node) {
1779 		/*
1780 		 * We have an exact match for the name, but there are no
1781 		 * extant rdatasets.  That means that this node doesn't
1782 		 * meaningfully exist, and that we really have a partial match.
1783 		 */
1784 		NODE_UNLOCK(lock, &nlocktype);
1785 		if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
1786 			result = find_coveringnsec(
1787 				&search, name, nodep, now, foundname, rdataset,
1788 				sigrdataset DNS__DB_FLARG_PASS);
1789 			if (result == DNS_R_COVERINGNSEC) {
1790 				goto tree_exit;
1791 			}
1792 		}
1793 		goto find_ns;
1794 	}
1795 
1796 	/*
1797 	 * If we didn't find what we were looking for...
1798 	 */
1799 	if (found == NULL ||
1800 	    (DNS_TRUST_ADDITIONAL(found->trust) &&
1801 	     ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
1802 	    (found->trust == dns_trust_glue &&
1803 	     ((options & DNS_DBFIND_GLUEOK) == 0)) ||
1804 	    (DNS_TRUST_PENDING(found->trust) &&
1805 	     ((options & DNS_DBFIND_PENDINGOK) == 0)))
1806 	{
1807 		/*
1808 		 * Return covering NODATA NSEC record.
1809 		 */
1810 		if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 &&
1811 		    nsecheader != NULL)
1812 		{
1813 			if (nodep != NULL) {
1814 				newref(search.qpdb, node, nlocktype,
1815 				       tlocktype DNS__DB_FLARG_PASS);
1816 				*nodep = node;
1817 			}
1818 			bindrdataset(search.qpdb, node, nsecheader, search.now,
1819 				     nlocktype, tlocktype,
1820 				     rdataset DNS__DB_FLARG_PASS);
1821 			if (need_headerupdate(nsecheader, search.now)) {
1822 				update = nsecheader;
1823 			}
1824 			if (nsecsig != NULL) {
1825 				bindrdataset(search.qpdb, node, nsecsig,
1826 					     search.now, nlocktype, tlocktype,
1827 					     sigrdataset DNS__DB_FLARG_PASS);
1828 				if (need_headerupdate(nsecsig, search.now)) {
1829 					updatesig = nsecsig;
1830 				}
1831 			}
1832 			result = DNS_R_COVERINGNSEC;
1833 			goto node_exit;
1834 		}
1835 
1836 		/*
1837 		 * This name was from a wild card.  Look for a covering NSEC.
1838 		 */
1839 		if (found == NULL && (found_noqname || all_negative) &&
1840 		    (search.options & DNS_DBFIND_COVERINGNSEC) != 0)
1841 		{
1842 			NODE_UNLOCK(lock, &nlocktype);
1843 			result = find_coveringnsec(
1844 				&search, name, nodep, now, foundname, rdataset,
1845 				sigrdataset DNS__DB_FLARG_PASS);
1846 			if (result == DNS_R_COVERINGNSEC) {
1847 				goto tree_exit;
1848 			}
1849 			goto find_ns;
1850 		}
1851 
1852 		/*
1853 		 * If there is an NS rdataset at this node, then this is the
1854 		 * deepest zone cut.
1855 		 */
1856 		if (nsheader != NULL) {
1857 			if (nodep != NULL) {
1858 				newref(search.qpdb, node, nlocktype,
1859 				       tlocktype DNS__DB_FLARG_PASS);
1860 				*nodep = node;
1861 			}
1862 			bindrdataset(search.qpdb, node, nsheader, search.now,
1863 				     nlocktype, tlocktype,
1864 				     rdataset DNS__DB_FLARG_PASS);
1865 			if (need_headerupdate(nsheader, search.now)) {
1866 				update = nsheader;
1867 			}
1868 			if (nssig != NULL) {
1869 				bindrdataset(search.qpdb, node, nssig,
1870 					     search.now, nlocktype, tlocktype,
1871 					     sigrdataset DNS__DB_FLARG_PASS);
1872 				if (need_headerupdate(nssig, search.now)) {
1873 					updatesig = nssig;
1874 				}
1875 			}
1876 			result = DNS_R_DELEGATION;
1877 			goto node_exit;
1878 		}
1879 
1880 		/*
1881 		 * Go find the deepest zone cut.
1882 		 */
1883 		NODE_UNLOCK(lock, &nlocktype);
1884 		goto find_ns;
1885 	}
1886 
1887 	/*
1888 	 * We found what we were looking for, or we found a CNAME.
1889 	 */
1890 
1891 	if (nodep != NULL) {
1892 		newref(search.qpdb, node, nlocktype,
1893 		       tlocktype DNS__DB_FLARG_PASS);
1894 		*nodep = node;
1895 	}
1896 
1897 	if (NEGATIVE(found)) {
1898 		/*
1899 		 * We found a negative cache entry.
1900 		 */
1901 		if (NXDOMAIN(found)) {
1902 			result = DNS_R_NCACHENXDOMAIN;
1903 		} else {
1904 			result = DNS_R_NCACHENXRRSET;
1905 		}
1906 	} else if (type != found->type && type != dns_rdatatype_any &&
1907 		   found->type == dns_rdatatype_cname)
1908 	{
1909 		/*
1910 		 * We weren't doing an ANY query and we found a CNAME instead
1911 		 * of the type we were looking for, so we need to indicate
1912 		 * that result to the caller.
1913 		 */
1914 		result = DNS_R_CNAME;
1915 	} else {
1916 		/*
1917 		 * An ordinary successful query!
1918 		 */
1919 		result = ISC_R_SUCCESS;
1920 	}
1921 
1922 	if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
1923 	    result == DNS_R_NCACHENXRRSET)
1924 	{
1925 		bindrdataset(search.qpdb, node, found, search.now, nlocktype,
1926 			     tlocktype, rdataset DNS__DB_FLARG_PASS);
1927 		if (need_headerupdate(found, search.now)) {
1928 			update = found;
1929 		}
1930 		if (!NEGATIVE(found) && foundsig != NULL) {
1931 			bindrdataset(search.qpdb, node, foundsig, search.now,
1932 				     nlocktype, tlocktype,
1933 				     sigrdataset DNS__DB_FLARG_PASS);
1934 			if (need_headerupdate(foundsig, search.now)) {
1935 				updatesig = foundsig;
1936 			}
1937 		}
1938 	}
1939 
1940 node_exit:
1941 	if ((update != NULL || updatesig != NULL) &&
1942 	    nlocktype != isc_rwlocktype_write)
1943 	{
1944 		NODE_FORCEUPGRADE(lock, &nlocktype);
1945 		POST(nlocktype);
1946 	}
1947 	if (update != NULL && need_headerupdate(update, search.now)) {
1948 		update_header(search.qpdb, update, search.now);
1949 	}
1950 	if (updatesig != NULL && need_headerupdate(updatesig, search.now)) {
1951 		update_header(search.qpdb, updatesig, search.now);
1952 	}
1953 
1954 	NODE_UNLOCK(lock, &nlocktype);
1955 
1956 tree_exit:
1957 	TREE_UNLOCK(&search.qpdb->tree_lock, &tlocktype);
1958 
1959 	/*
1960 	 * If we found a zonecut but aren't going to use it, we have to
1961 	 * let go of it.
1962 	 */
1963 	if (search.need_cleanup) {
1964 		node = search.zonecut;
1965 		INSIST(node != NULL);
1966 		lock = &(search.qpdb->node_locks[node->locknum].lock);
1967 
1968 		NODE_RDLOCK(lock, &nlocktype);
1969 		decref(search.qpdb, node, &nlocktype, &tlocktype,
1970 		       true DNS__DB_FLARG_PASS);
1971 		NODE_UNLOCK(lock, &nlocktype);
1972 		INSIST(tlocktype == isc_rwlocktype_none);
1973 	}
1974 
1975 	update_cachestats(search.qpdb, result);
1976 	return result;
1977 }
1978 
1979 static isc_result_t
1980 findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options,
1981 	    isc_stdtime_t now, dns_dbnode_t **nodep, dns_name_t *foundname,
1982 	    dns_name_t *dcname, dns_rdataset_t *rdataset,
1983 	    dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
1984 	qpcnode_t *node = NULL;
1985 	isc_rwlock_t *lock = NULL;
1986 	isc_result_t result;
1987 	qpc_search_t search;
1988 	dns_slabheader_t *header = NULL;
1989 	dns_slabheader_t *header_prev = NULL, *header_next = NULL;
1990 	dns_slabheader_t *found = NULL, *foundsig = NULL;
1991 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
1992 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
1993 	bool dcnull = (dcname == NULL);
1994 
1995 	REQUIRE(VALID_QPDB((qpcache_t *)db));
1996 
1997 	if (now == 0) {
1998 		now = isc_stdtime_now();
1999 	}
2000 
2001 	search = (qpc_search_t){
2002 		.qpdb = (qpcache_t *)db,
2003 		.options = options,
2004 		.now = now,
2005 	};
2006 
2007 	if (dcnull) {
2008 		dcname = foundname;
2009 	}
2010 
2011 	TREE_RDLOCK(&search.qpdb->tree_lock, &tlocktype);
2012 
2013 	/*
2014 	 * Search down from the root of the tree.
2015 	 */
2016 	result = dns_qp_lookup(search.qpdb->tree, name, NULL, NULL,
2017 			       &search.chain, (void **)&node, NULL);
2018 	if (result != ISC_R_NOTFOUND) {
2019 		dns_name_copy(&node->name, dcname);
2020 	}
2021 	if ((options & DNS_DBFIND_NOEXACT) != 0 && result == ISC_R_SUCCESS) {
2022 		int len = dns_qpchain_length(&search.chain);
2023 		if (len >= 2) {
2024 			node = NULL;
2025 			dns_qpchain_node(&search.chain, len - 2, NULL,
2026 					 (void **)&node, NULL);
2027 			search.chain.len = len - 1;
2028 			result = DNS_R_PARTIALMATCH;
2029 		} else {
2030 			result = ISC_R_NOTFOUND;
2031 		}
2032 	}
2033 
2034 	if (result == DNS_R_PARTIALMATCH) {
2035 		result = find_deepest_zonecut(&search, node, nodep, foundname,
2036 					      rdataset,
2037 					      sigrdataset DNS__DB_FLARG_PASS);
2038 		goto tree_exit;
2039 	} else if (result != ISC_R_SUCCESS) {
2040 		goto tree_exit;
2041 	} else if (!dcnull) {
2042 		dns_name_copy(dcname, foundname);
2043 	}
2044 
2045 	/*
2046 	 * We now go looking for an NS rdataset at the node.
2047 	 */
2048 
2049 	lock = &(search.qpdb->node_locks[node->locknum].lock);
2050 	NODE_RDLOCK(lock, &nlocktype);
2051 
2052 	for (header = node->data; header != NULL; header = header_next) {
2053 		header_next = header->next;
2054 		if (check_stale_header(node, header, &nlocktype, lock, &search,
2055 				       &header_prev))
2056 		{
2057 			/*
2058 			 * The function dns_qp_lookup found us a matching
2059 			 * node for 'name' and stored the result in 'dcname'.
2060 			 * This is the deepest known zonecut in our database.
2061 			 * However, this node may be stale and if serve-stale
2062 			 * is not enabled (in other words 'stale-answer-enable'
2063 			 * is set to no), this node may not be used as a
2064 			 * zonecut we know about. If so, find the deepest
2065 			 * zonecut from this node up and return that instead.
2066 			 */
2067 			NODE_UNLOCK(lock, &nlocktype);
2068 			result = find_deepest_zonecut(
2069 				&search, node, nodep, foundname, rdataset,
2070 				sigrdataset DNS__DB_FLARG_PASS);
2071 			dns_name_copy(foundname, dcname);
2072 			goto tree_exit;
2073 		} else if (EXISTS(header) && !ANCIENT(header)) {
2074 			/*
2075 			 * If we found a type we were looking for, remember
2076 			 * it.
2077 			 */
2078 			if (header->type == dns_rdatatype_ns) {
2079 				/*
2080 				 * Remember a NS rdataset even if we're
2081 				 * not specifically looking for it, because
2082 				 * we might need it later.
2083 				 */
2084 				found = header;
2085 			} else if (header->type ==
2086 				   DNS_SIGTYPE(dns_rdatatype_ns))
2087 			{
2088 				/*
2089 				 * If we need the NS rdataset, we'll also
2090 				 * need its signature.
2091 				 */
2092 				foundsig = header;
2093 			}
2094 			header_prev = header;
2095 		} else {
2096 			header_prev = header;
2097 		}
2098 	}
2099 
2100 	if (found == NULL) {
2101 		/*
2102 		 * No NS records here.
2103 		 */
2104 		NODE_UNLOCK(lock, &nlocktype);
2105 		result = find_deepest_zonecut(&search, node, nodep, foundname,
2106 					      rdataset,
2107 					      sigrdataset DNS__DB_FLARG_PASS);
2108 		goto tree_exit;
2109 	}
2110 
2111 	if (nodep != NULL) {
2112 		newref(search.qpdb, node, nlocktype,
2113 		       tlocktype DNS__DB_FLARG_PASS);
2114 		*nodep = node;
2115 	}
2116 
2117 	bindrdataset(search.qpdb, node, found, search.now, nlocktype, tlocktype,
2118 		     rdataset DNS__DB_FLARG_PASS);
2119 	if (foundsig != NULL) {
2120 		bindrdataset(search.qpdb, node, foundsig, search.now, nlocktype,
2121 			     tlocktype, sigrdataset DNS__DB_FLARG_PASS);
2122 	}
2123 
2124 	if (need_headerupdate(found, search.now) ||
2125 	    (foundsig != NULL && need_headerupdate(foundsig, search.now)))
2126 	{
2127 		if (nlocktype != isc_rwlocktype_write) {
2128 			NODE_FORCEUPGRADE(lock, &nlocktype);
2129 			POST(nlocktype);
2130 		}
2131 		if (need_headerupdate(found, search.now)) {
2132 			update_header(search.qpdb, found, search.now);
2133 		}
2134 		if (foundsig != NULL && need_headerupdate(foundsig, search.now))
2135 		{
2136 			update_header(search.qpdb, foundsig, search.now);
2137 		}
2138 	}
2139 
2140 	NODE_UNLOCK(lock, &nlocktype);
2141 
2142 tree_exit:
2143 	TREE_UNLOCK(&search.qpdb->tree_lock, &tlocktype);
2144 
2145 	INSIST(!search.need_cleanup);
2146 
2147 	if (result == DNS_R_DELEGATION) {
2148 		result = ISC_R_SUCCESS;
2149 	}
2150 
2151 	return result;
2152 }
2153 
2154 static isc_result_t
2155 findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
2156 	     dns_rdatatype_t type, dns_rdatatype_t covers, isc_stdtime_t now,
2157 	     dns_rdataset_t *rdataset,
2158 	     dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
2159 	qpcache_t *qpdb = (qpcache_t *)db;
2160 	qpcnode_t *qpnode = (qpcnode_t *)node;
2161 	dns_slabheader_t *header = NULL, *header_next = NULL;
2162 	dns_slabheader_t *found = NULL, *foundsig = NULL;
2163 	dns_typepair_t matchtype, sigmatchtype, negtype;
2164 	isc_result_t result;
2165 	isc_rwlock_t *lock = NULL;
2166 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
2167 
2168 	REQUIRE(VALID_QPDB(qpdb));
2169 	REQUIRE(type != dns_rdatatype_any);
2170 
2171 	UNUSED(version);
2172 
2173 	result = ISC_R_SUCCESS;
2174 
2175 	if (now == 0) {
2176 		now = isc_stdtime_now();
2177 	}
2178 
2179 	lock = &qpdb->node_locks[qpnode->locknum].lock;
2180 	NODE_RDLOCK(lock, &nlocktype);
2181 
2182 	matchtype = DNS_TYPEPAIR_VALUE(type, covers);
2183 	negtype = DNS_TYPEPAIR_VALUE(0, type);
2184 	if (covers == 0) {
2185 		sigmatchtype = DNS_SIGTYPE(type);
2186 	} else {
2187 		sigmatchtype = 0;
2188 	}
2189 
2190 	for (header = qpnode->data; header != NULL; header = header_next) {
2191 		header_next = header->next;
2192 		if (!ACTIVE(header, now)) {
2193 			if ((header->ttl + STALE_TTL(header, qpdb) <
2194 			     now - QPDB_VIRTUAL) &&
2195 			    (nlocktype == isc_rwlocktype_write ||
2196 			     NODE_TRYUPGRADE(lock, &nlocktype) ==
2197 				     ISC_R_SUCCESS))
2198 			{
2199 				/*
2200 				 * We update the node's status only when we
2201 				 * can get write access.
2202 				 *
2203 				 * We don't check if refcurrent(qpnode) == 0
2204 				 * and try to free like we do in find(),
2205 				 * because refcurrent(qpnode) must be
2206 				 * non-zero.  This is so because 'node' is an
2207 				 * argument to the function.
2208 				 */
2209 				mark(header, DNS_SLABHEADERATTR_ANCIENT);
2210 				HEADERNODE(header)->dirty = 1;
2211 			}
2212 		} else if (EXISTS(header) && !ANCIENT(header)) {
2213 			if (header->type == matchtype) {
2214 				found = header;
2215 			} else if (header->type == RDATATYPE_NCACHEANY ||
2216 				   header->type == negtype)
2217 			{
2218 				found = header;
2219 			} else if (header->type == sigmatchtype) {
2220 				foundsig = header;
2221 			}
2222 		}
2223 	}
2224 	if (found != NULL) {
2225 		bindrdataset(qpdb, qpnode, found, now, nlocktype,
2226 			     isc_rwlocktype_none, rdataset DNS__DB_FLARG_PASS);
2227 		if (!NEGATIVE(found) && foundsig != NULL) {
2228 			bindrdataset(qpdb, qpnode, foundsig, now, nlocktype,
2229 				     isc_rwlocktype_none,
2230 				     sigrdataset DNS__DB_FLARG_PASS);
2231 		}
2232 	}
2233 
2234 	NODE_UNLOCK(lock, &nlocktype);
2235 
2236 	if (found == NULL) {
2237 		return ISC_R_NOTFOUND;
2238 	}
2239 
2240 	if (NEGATIVE(found)) {
2241 		/*
2242 		 * We found a negative cache entry.
2243 		 */
2244 		if (NXDOMAIN(found)) {
2245 			result = DNS_R_NCACHENXDOMAIN;
2246 		} else {
2247 			result = DNS_R_NCACHENXRRSET;
2248 		}
2249 	}
2250 
2251 	update_cachestats(qpdb, result);
2252 
2253 	return result;
2254 }
2255 
2256 static isc_result_t
2257 setcachestats(dns_db_t *db, isc_stats_t *stats) {
2258 	qpcache_t *qpdb = (qpcache_t *)db;
2259 
2260 	REQUIRE(VALID_QPDB(qpdb));
2261 	REQUIRE(stats != NULL);
2262 
2263 	isc_stats_attach(stats, &qpdb->cachestats);
2264 	return ISC_R_SUCCESS;
2265 }
2266 
2267 static dns_stats_t *
2268 getrrsetstats(dns_db_t *db) {
2269 	qpcache_t *qpdb = (qpcache_t *)db;
2270 
2271 	REQUIRE(VALID_QPDB(qpdb));
2272 
2273 	return qpdb->rrsetstats;
2274 }
2275 
2276 static isc_result_t
2277 setservestalettl(dns_db_t *db, dns_ttl_t ttl) {
2278 	qpcache_t *qpdb = (qpcache_t *)db;
2279 
2280 	REQUIRE(VALID_QPDB(qpdb));
2281 
2282 	/* currently no bounds checking.  0 means disable. */
2283 	qpdb->common.serve_stale_ttl = ttl;
2284 	return ISC_R_SUCCESS;
2285 }
2286 
2287 static isc_result_t
2288 getservestalettl(dns_db_t *db, dns_ttl_t *ttl) {
2289 	qpcache_t *qpdb = (qpcache_t *)db;
2290 
2291 	REQUIRE(VALID_QPDB(qpdb));
2292 
2293 	*ttl = qpdb->common.serve_stale_ttl;
2294 	return ISC_R_SUCCESS;
2295 }
2296 
2297 static isc_result_t
2298 setservestalerefresh(dns_db_t *db, uint32_t interval) {
2299 	qpcache_t *qpdb = (qpcache_t *)db;
2300 
2301 	REQUIRE(VALID_QPDB(qpdb));
2302 
2303 	/* currently no bounds checking.  0 means disable. */
2304 	qpdb->serve_stale_refresh = interval;
2305 	return ISC_R_SUCCESS;
2306 }
2307 
2308 static isc_result_t
2309 getservestalerefresh(dns_db_t *db, uint32_t *interval) {
2310 	qpcache_t *qpdb = (qpcache_t *)db;
2311 
2312 	REQUIRE(VALID_QPDB(qpdb));
2313 
2314 	*interval = qpdb->serve_stale_refresh;
2315 	return ISC_R_SUCCESS;
2316 }
2317 
2318 static void
2319 expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) {
2320 	qpcache_t *qpdb = (qpcache_t *)db;
2321 	qpcnode_t *qpnode = (qpcnode_t *)node;
2322 	dns_slabheader_t *header = data;
2323 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
2324 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
2325 
2326 	NODE_WRLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
2327 	expireheader(header, &nlocktype, &tlocktype,
2328 		     dns_expire_flush DNS__DB_FILELINE);
2329 	NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
2330 	INSIST(tlocktype == isc_rwlocktype_none);
2331 }
2332 
2333 static size_t
2334 rdataset_size(dns_slabheader_t *header) {
2335 	if (!NONEXISTENT(header)) {
2336 		return dns_rdataslab_size((unsigned char *)header,
2337 					  sizeof(*header));
2338 	}
2339 
2340 	return sizeof(*header);
2341 }
2342 
2343 static size_t
2344 expire_lru_headers(qpcache_t *qpdb, unsigned int locknum,
2345 		   isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
2346 		   size_t purgesize DNS__DB_FLARG) {
2347 	dns_slabheader_t *header = NULL;
2348 	size_t purged = 0;
2349 
2350 	for (header = ISC_LIST_TAIL(qpdb->lru[locknum]);
2351 	     header != NULL && header->last_used <= qpdb->last_used &&
2352 	     purged <= purgesize;
2353 	     header = ISC_LIST_TAIL(qpdb->lru[locknum]))
2354 	{
2355 		size_t header_size = rdataset_size(header);
2356 
2357 		/*
2358 		 * Unlink the entry at this point to avoid checking it
2359 		 * again even if it's currently used someone else and
2360 		 * cannot be purged at this moment.  This entry won't be
2361 		 * referenced any more (so unlinking is safe) since the
2362 		 * TTL will be reset to 0.
2363 		 */
2364 		ISC_LIST_UNLINK(qpdb->lru[locknum], header, link);
2365 		expireheader(header, nlocktypep, tlocktypep,
2366 			     dns_expire_lru DNS__DB_FLARG_PASS);
2367 		purged += header_size;
2368 	}
2369 
2370 	return purged;
2371 }
2372 
2373 /*%
2374  * Purge some expired and/or stale (i.e. unused for some period) cache entries
2375  * due to an overmem condition.  To recover from this condition quickly,
2376  * we clean up entries up to the size of newly added rdata that triggered
2377  * the overmem; this is accessible via newheader.
2378  *
2379  * The LRU lists tails are processed in LRU order to the nearest second.
2380  *
2381  * A write lock on the tree must be held.
2382  */
2383 static void
2384 overmem(qpcache_t *qpdb, dns_slabheader_t *newheader,
2385 	isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) {
2386 	uint32_t locknum_start = qpdb->lru_sweep++ % qpdb->node_lock_count;
2387 	uint32_t locknum = locknum_start;
2388 	size_t purgesize, purged = 0;
2389 	isc_stdtime_t min_last_used = 0;
2390 	size_t max_passes = 8;
2391 
2392 	/*
2393 	 * Maximum estimated size of the data being added: The size
2394 	 * of the rdataset, plus a new QP database node and nodename,
2395 	 * and a possible additional NSEC node and nodename. Also add
2396 	 * a 12k margin for a possible QP-trie chunk allocation.
2397 	 * (It's okay to overestimate, we want to get cache memory
2398 	 * down quickly.)
2399 	 */
2400 	purgesize = 2 * (sizeof(qpcnode_t) +
2401 			 dns_name_size(&HEADERNODE(newheader)->name)) +
2402 		    rdataset_size(newheader) + 12288;
2403 again:
2404 	do {
2405 		isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
2406 		NODE_WRLOCK(&qpdb->node_locks[locknum].lock, &nlocktype);
2407 
2408 		purged += expire_lru_headers(
2409 			qpdb, locknum, &nlocktype, tlocktypep,
2410 			purgesize - purged DNS__DB_FLARG_PASS);
2411 
2412 		/*
2413 		 * Work out the oldest remaining last_used values of the list
2414 		 * tails as we walk across the array of lru lists.
2415 		 */
2416 		dns_slabheader_t *header = ISC_LIST_TAIL(qpdb->lru[locknum]);
2417 		if (header != NULL &&
2418 		    (min_last_used == 0 || header->last_used < min_last_used))
2419 		{
2420 			min_last_used = header->last_used;
2421 		}
2422 		NODE_UNLOCK(&qpdb->node_locks[locknum].lock, &nlocktype);
2423 		locknum = (locknum + 1) % qpdb->node_lock_count;
2424 	} while (locknum != locknum_start && purged <= purgesize);
2425 
2426 	/*
2427 	 * Update qpdb->last_used if we have walked all the list tails and have
2428 	 * not freed the required amount of memory.
2429 	 */
2430 	if (purged < purgesize) {
2431 		if (min_last_used != 0) {
2432 			qpdb->last_used = min_last_used;
2433 			if (max_passes-- > 0) {
2434 				goto again;
2435 			}
2436 		}
2437 	}
2438 }
2439 
2440 /*%
2441  * These functions allow the heap code to rank the priority of each
2442  * element.  It returns true if v1 happens "sooner" than v2.
2443  */
2444 static bool
2445 ttl_sooner(void *v1, void *v2) {
2446 	dns_slabheader_t *h1 = v1;
2447 	dns_slabheader_t *h2 = v2;
2448 
2449 	return h1->ttl < h2->ttl;
2450 }
2451 
2452 /*%
2453  * This function sets the heap index into the header.
2454  */
2455 static void
2456 set_index(void *what, unsigned int idx) {
2457 	dns_slabheader_t *h = what;
2458 
2459 	h->heap_index = idx;
2460 }
2461 
2462 static void
2463 free_qpdb(qpcache_t *qpdb, bool log) {
2464 	unsigned int i;
2465 	char buf[DNS_NAME_FORMATSIZE];
2466 	dns_qp_t **treep = NULL;
2467 
2468 	for (;;) {
2469 		/*
2470 		 * pick the next tree to (start to) destroy
2471 		 */
2472 		treep = &qpdb->tree;
2473 		if (*treep == NULL) {
2474 			treep = &qpdb->nsec;
2475 			if (*treep == NULL) {
2476 				break;
2477 			}
2478 		}
2479 
2480 		dns_qp_destroy(treep);
2481 		INSIST(*treep == NULL);
2482 	}
2483 
2484 	if (log) {
2485 		if (dns_name_dynamic(&qpdb->common.origin)) {
2486 			dns_name_format(&qpdb->common.origin, buf, sizeof(buf));
2487 		} else {
2488 			strlcpy(buf, "<UNKNOWN>", sizeof(buf));
2489 		}
2490 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
2491 			      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
2492 			      "done free_qpdb(%s)", buf);
2493 	}
2494 	if (dns_name_dynamic(&qpdb->common.origin)) {
2495 		dns_name_free(&qpdb->common.origin, qpdb->common.mctx);
2496 	}
2497 	for (i = 0; i < qpdb->node_lock_count; i++) {
2498 		isc_refcount_destroy(&qpdb->node_locks[i].references);
2499 		NODE_DESTROYLOCK(&qpdb->node_locks[i].lock);
2500 	}
2501 
2502 	/*
2503 	 * Clean up LRU / re-signing order lists.
2504 	 */
2505 	if (qpdb->lru != NULL) {
2506 		for (i = 0; i < qpdb->node_lock_count; i++) {
2507 			INSIST(ISC_LIST_EMPTY(qpdb->lru[i]));
2508 		}
2509 		isc_mem_cput(qpdb->common.mctx, qpdb->lru,
2510 			     qpdb->node_lock_count,
2511 			     sizeof(dns_slabheaderlist_t));
2512 	}
2513 	/*
2514 	 * Clean up dead node buckets.
2515 	 */
2516 	for (i = 0; i < qpdb->node_lock_count; i++) {
2517 		INSIST(isc_queue_empty(&qpdb->deadnodes[i]));
2518 		isc_queue_destroy(&qpdb->deadnodes[i]);
2519 	}
2520 	isc_mem_cput(qpdb->common.mctx, qpdb->deadnodes, qpdb->node_lock_count,
2521 		     sizeof(qpdb->deadnodes[0]));
2522 
2523 	/*
2524 	 * Clean up heap objects.
2525 	 */
2526 	if (qpdb->heaps != NULL) {
2527 		for (i = 0; i < qpdb->node_lock_count; i++) {
2528 			isc_heap_destroy(&qpdb->heaps[i]);
2529 		}
2530 		isc_mem_cput(qpdb->hmctx, qpdb->heaps, qpdb->node_lock_count,
2531 			     sizeof(isc_heap_t *));
2532 	}
2533 
2534 	if (qpdb->rrsetstats != NULL) {
2535 		dns_stats_detach(&qpdb->rrsetstats);
2536 	}
2537 	if (qpdb->cachestats != NULL) {
2538 		isc_stats_detach(&qpdb->cachestats);
2539 	}
2540 	if (qpdb->gluecachestats != NULL) {
2541 		isc_stats_detach(&qpdb->gluecachestats);
2542 	}
2543 
2544 	isc_mem_cput(qpdb->common.mctx, qpdb->node_locks, qpdb->node_lock_count,
2545 		     sizeof(db_nodelock_t));
2546 	TREE_DESTROYLOCK(&qpdb->tree_lock);
2547 	isc_refcount_destroy(&qpdb->common.references);
2548 
2549 	isc_rwlock_destroy(&qpdb->lock);
2550 	qpdb->common.magic = 0;
2551 	qpdb->common.impmagic = 0;
2552 	isc_mem_detach(&qpdb->hmctx);
2553 
2554 	isc_mem_putanddetach(&qpdb->common.mctx, qpdb, sizeof(*qpdb));
2555 }
2556 
2557 static void
2558 qpdb_destroy(dns_db_t *arg) {
2559 	qpcache_t *qpdb = (qpcache_t *)arg;
2560 	bool want_free = false;
2561 	unsigned int i;
2562 	unsigned int inactive = 0;
2563 
2564 	if (qpdb->origin_node != NULL) {
2565 		qpcnode_detach(&qpdb->origin_node);
2566 	}
2567 
2568 	/*
2569 	 * Even though there are no external direct references, there still
2570 	 * may be nodes in use.
2571 	 */
2572 	for (i = 0; i < qpdb->node_lock_count; i++) {
2573 		isc_rwlocktype_t nodelock = isc_rwlocktype_none;
2574 		NODE_WRLOCK(&qpdb->node_locks[i].lock, &nodelock);
2575 		qpdb->node_locks[i].exiting = true;
2576 		if (isc_refcount_current(&qpdb->node_locks[i].references) == 0)
2577 		{
2578 			inactive++;
2579 		}
2580 		NODE_UNLOCK(&qpdb->node_locks[i].lock, &nodelock);
2581 	}
2582 
2583 	if (inactive != 0) {
2584 		RWLOCK(&qpdb->lock, isc_rwlocktype_write);
2585 		qpdb->active -= inactive;
2586 		if (qpdb->active == 0) {
2587 			want_free = true;
2588 		}
2589 		RWUNLOCK(&qpdb->lock, isc_rwlocktype_write);
2590 		if (want_free) {
2591 			char buf[DNS_NAME_FORMATSIZE];
2592 			if (dns_name_dynamic(&qpdb->common.origin)) {
2593 				dns_name_format(&qpdb->common.origin, buf,
2594 						sizeof(buf));
2595 			} else {
2596 				strlcpy(buf, "<UNKNOWN>", sizeof(buf));
2597 			}
2598 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
2599 				      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
2600 				      "calling free_qpdb(%s)", buf);
2601 			free_qpdb(qpdb, true);
2602 		}
2603 	}
2604 }
2605 
2606 static void
2607 mark_ancient(dns_slabheader_t *header) {
2608 	setttl(header, 0);
2609 	mark(header, DNS_SLABHEADERATTR_ANCIENT);
2610 	HEADERNODE(header)->dirty = 1;
2611 }
2612 
2613 /*%
2614  * Clean up dead nodes.  These are nodes which have no references, and
2615  * have no data.  They are dead but we could not or chose not to delete
2616  * them when we deleted all the data at that node because we did not want
2617  * to wait for the tree write lock.
2618  */
2619 static void
2620 cleanup_deadnodes(void *arg) {
2621 	qpcache_t *qpdb = arg;
2622 	uint16_t locknum = isc_tid();
2623 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
2624 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
2625 	qpcnode_t *qpnode = NULL, *qpnext = NULL;
2626 	isc_queue_t deadnodes;
2627 
2628 	INSIST(locknum < qpdb->node_lock_count);
2629 
2630 	isc_queue_init(&deadnodes);
2631 
2632 	TREE_WRLOCK(&qpdb->tree_lock, &tlocktype);
2633 	NODE_WRLOCK(&qpdb->node_locks[locknum].lock, &nlocktype);
2634 
2635 	RUNTIME_CHECK(isc_queue_splice(&deadnodes, &qpdb->deadnodes[locknum]));
2636 	isc_queue_for_each_entry_safe(&deadnodes, qpnode, qpnext, deadlink) {
2637 		decref(qpdb, qpnode, &nlocktype, &tlocktype, false);
2638 	}
2639 
2640 	NODE_UNLOCK(&qpdb->node_locks[locknum].lock, &nlocktype);
2641 	TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
2642 }
2643 
2644 /*
2645  * This function is assumed to be called when a node is newly referenced
2646  * and can be in the deadnode list.  In that case the node will be references
2647  * and cleanup_deadnodes() will remove it from the list when the cleaning
2648  * happens.
2649  * Note: while a new reference is gained in multiple places, there are only very
2650  * few cases where the node can be in the deadnode list (only empty nodes can
2651  * have been added to the list).
2652  */
2653 static void
2654 reactivate_node(qpcache_t *qpdb, qpcnode_t *node,
2655 		isc_rwlocktype_t tlocktype ISC_ATTR_UNUSED DNS__DB_FLARG) {
2656 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
2657 	isc_rwlock_t *nodelock = &qpdb->node_locks[node->locknum].lock;
2658 
2659 	NODE_RDLOCK(nodelock, &nlocktype);
2660 	newref(qpdb, node, nlocktype, tlocktype DNS__DB_FLARG_PASS);
2661 	NODE_UNLOCK(nodelock, &nlocktype);
2662 }
2663 
2664 static qpcnode_t *
2665 new_qpcnode(qpcache_t *qpdb, const dns_name_t *name) {
2666 	qpcnode_t *newdata = isc_mem_get(qpdb->common.mctx, sizeof(*newdata));
2667 	*newdata = (qpcnode_t){
2668 		.name = DNS_NAME_INITEMPTY,
2669 		.references = ISC_REFCOUNT_INITIALIZER(1),
2670 		.locknum = isc_random_uniform(qpdb->node_lock_count),
2671 	};
2672 
2673 	INSIST(newdata->locknum < qpdb->node_lock_count);
2674 
2675 	isc_mem_attach(qpdb->common.mctx, &newdata->mctx);
2676 	dns_name_dupwithoffsets(name, newdata->mctx, &newdata->name);
2677 
2678 #ifdef DNS_DB_NODETRACE
2679 	fprintf(stderr, "new_qpcnode:%s:%s:%d:%p->references = 1\n", __func__,
2680 		__FILE__, __LINE__ + 1, name);
2681 #endif
2682 	return newdata;
2683 }
2684 
2685 static isc_result_t
2686 findnode(dns_db_t *db, const dns_name_t *name, bool create,
2687 	 dns_dbnode_t **nodep DNS__DB_FLARG) {
2688 	qpcache_t *qpdb = (qpcache_t *)db;
2689 	qpcnode_t *node = NULL;
2690 	isc_result_t result;
2691 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
2692 
2693 	TREE_RDLOCK(&qpdb->tree_lock, &tlocktype);
2694 	result = dns_qp_getname(qpdb->tree, name, (void **)&node, NULL);
2695 	if (result != ISC_R_SUCCESS) {
2696 		if (!create) {
2697 			goto unlock;
2698 		}
2699 		/*
2700 		 * Try to upgrade the lock and if that fails unlock then relock.
2701 		 */
2702 		TREE_FORCEUPGRADE(&qpdb->tree_lock, &tlocktype);
2703 		result = dns_qp_getname(qpdb->tree, name, (void **)&node, NULL);
2704 		if (result != ISC_R_SUCCESS) {
2705 			node = new_qpcnode(qpdb, name);
2706 			result = dns_qp_insert(qpdb->tree, node, 0);
2707 			INSIST(result == ISC_R_SUCCESS);
2708 			qpcnode_unref(node);
2709 		}
2710 	}
2711 
2712 	reactivate_node(qpdb, node, tlocktype DNS__DB_FLARG_PASS);
2713 
2714 	*nodep = (dns_dbnode_t *)node;
2715 unlock:
2716 	TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
2717 
2718 	return result;
2719 }
2720 
2721 static void
2722 attachnode(dns_db_t *db, dns_dbnode_t *source,
2723 	   dns_dbnode_t **targetp DNS__DB_FLARG) {
2724 	REQUIRE(VALID_QPDB((qpcache_t *)db));
2725 	REQUIRE(targetp != NULL && *targetp == NULL);
2726 
2727 	qpcache_t *qpdb = (qpcache_t *)db;
2728 	qpcnode_t *node = (qpcnode_t *)source;
2729 
2730 	newref(qpdb, node, isc_rwlocktype_none,
2731 	       isc_rwlocktype_none DNS__DB_FLARG_PASS);
2732 
2733 	*targetp = source;
2734 }
2735 
2736 static void
2737 detachnode(dns_db_t *db, dns_dbnode_t **targetp DNS__DB_FLARG) {
2738 	qpcache_t *qpdb = (qpcache_t *)db;
2739 	qpcnode_t *node = NULL;
2740 	bool want_free = false;
2741 	bool inactive = false;
2742 	db_nodelock_t *nodelock = NULL;
2743 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
2744 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
2745 
2746 	REQUIRE(VALID_QPDB(qpdb));
2747 	REQUIRE(targetp != NULL && *targetp != NULL);
2748 
2749 	node = (qpcnode_t *)(*targetp);
2750 	nodelock = &qpdb->node_locks[node->locknum];
2751 
2752 	NODE_RDLOCK(&nodelock->lock, &nlocktype);
2753 
2754 	if (decref(qpdb, node, &nlocktype, &tlocktype, true DNS__DB_FLARG_PASS))
2755 	{
2756 		if (isc_refcount_current(&nodelock->references) == 0 &&
2757 		    nodelock->exiting)
2758 		{
2759 			inactive = true;
2760 		}
2761 	}
2762 
2763 	NODE_UNLOCK(&nodelock->lock, &nlocktype);
2764 	INSIST(tlocktype == isc_rwlocktype_none);
2765 
2766 	*targetp = NULL;
2767 
2768 	if (inactive) {
2769 		RWLOCK(&qpdb->lock, isc_rwlocktype_write);
2770 		qpdb->active--;
2771 		if (qpdb->active == 0) {
2772 			want_free = true;
2773 		}
2774 		RWUNLOCK(&qpdb->lock, isc_rwlocktype_write);
2775 		if (want_free) {
2776 			char buf[DNS_NAME_FORMATSIZE];
2777 			if (dns_name_dynamic(&qpdb->common.origin)) {
2778 				dns_name_format(&qpdb->common.origin, buf,
2779 						sizeof(buf));
2780 			} else {
2781 				strlcpy(buf, "<UNKNOWN>", sizeof(buf));
2782 			}
2783 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
2784 				      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
2785 				      "calling free_qpdb(%s)", buf);
2786 			free_qpdb(qpdb, true);
2787 		}
2788 	}
2789 }
2790 
2791 static isc_result_t
2792 createiterator(dns_db_t *db, unsigned int options ISC_ATTR_UNUSED,
2793 	       dns_dbiterator_t **iteratorp) {
2794 	qpcache_t *qpdb = (qpcache_t *)db;
2795 	qpc_dbit_t *qpdbiter = NULL;
2796 
2797 	REQUIRE(VALID_QPDB(qpdb));
2798 
2799 	qpdbiter = isc_mem_get(qpdb->common.mctx, sizeof(*qpdbiter));
2800 	*qpdbiter = (qpc_dbit_t){
2801 		.common.methods = &dbiterator_methods,
2802 		.common.magic = DNS_DBITERATOR_MAGIC,
2803 		.paused = true,
2804 	};
2805 
2806 	qpdbiter->name = dns_fixedname_initname(&qpdbiter->fixed);
2807 	dns_db_attach(db, &qpdbiter->common.db);
2808 	dns_qpiter_init(qpdb->tree, &qpdbiter->iter);
2809 
2810 	*iteratorp = (dns_dbiterator_t *)qpdbiter;
2811 	return ISC_R_SUCCESS;
2812 }
2813 
2814 static isc_result_t
2815 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
2816 	     unsigned int options, isc_stdtime_t now,
2817 	     dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) {
2818 	qpcache_t *qpdb = (qpcache_t *)db;
2819 	qpcnode_t *qpnode = (qpcnode_t *)node;
2820 	qpc_rditer_t *iterator = NULL;
2821 
2822 	REQUIRE(VALID_QPDB(qpdb));
2823 
2824 	UNUSED(version);
2825 
2826 	iterator = isc_mem_get(qpdb->common.mctx, sizeof(*iterator));
2827 
2828 	if (now == 0) {
2829 		now = isc_stdtime_now();
2830 	}
2831 
2832 	iterator->common.magic = DNS_RDATASETITER_MAGIC;
2833 	iterator->common.methods = &rdatasetiter_methods;
2834 	iterator->common.db = db;
2835 	iterator->common.node = node;
2836 	iterator->common.version = NULL;
2837 	iterator->common.options = options;
2838 	iterator->common.now = now;
2839 	iterator->current = NULL;
2840 
2841 	newref(qpdb, qpnode, isc_rwlocktype_none,
2842 	       isc_rwlocktype_none DNS__DB_FLARG_PASS);
2843 
2844 	*iteratorp = (dns_rdatasetiter_t *)iterator;
2845 
2846 	return ISC_R_SUCCESS;
2847 }
2848 
2849 static bool
2850 overmaxtype(qpcache_t *qpdb, uint32_t ntypes) {
2851 	if (qpdb->maxtypepername == 0) {
2852 		return false;
2853 	}
2854 
2855 	return ntypes >= qpdb->maxtypepername;
2856 }
2857 
2858 static bool
2859 prio_header(dns_slabheader_t *header) {
2860 	if (NEGATIVE(header) && prio_type(DNS_TYPEPAIR_COVERS(header->type))) {
2861 		return true;
2862 	}
2863 
2864 	return prio_type(header->type);
2865 }
2866 
2867 static isc_result_t
2868 add(qpcache_t *qpdb, qpcnode_t *qpnode,
2869     const dns_name_t *nodename ISC_ATTR_UNUSED, dns_slabheader_t *newheader,
2870     unsigned int options, bool loading, dns_rdataset_t *addedrdataset,
2871     isc_stdtime_t now, isc_rwlocktype_t nlocktype,
2872     isc_rwlocktype_t tlocktype DNS__DB_FLARG) {
2873 	dns_slabheader_t *topheader = NULL, *topheader_prev = NULL;
2874 	dns_slabheader_t *header = NULL, *sigheader = NULL;
2875 	dns_slabheader_t *prioheader = NULL, *expireheader = NULL;
2876 	bool header_nx;
2877 	bool newheader_nx;
2878 	dns_typepair_t negtype = 0;
2879 	dns_trust_t trust;
2880 	int idx;
2881 	uint32_t ntypes = 0;
2882 
2883 	if ((options & DNS_DBADD_FORCE) != 0) {
2884 		trust = dns_trust_ultimate;
2885 	} else {
2886 		trust = newheader->trust;
2887 	}
2888 
2889 	newheader_nx = NONEXISTENT(newheader) ? true : false;
2890 
2891 	if (!newheader_nx) {
2892 		dns_rdatatype_t rdtype = DNS_TYPEPAIR_TYPE(newheader->type);
2893 		dns_rdatatype_t covers = DNS_TYPEPAIR_COVERS(newheader->type);
2894 		dns_typepair_t sigtype = DNS_SIGTYPE(covers);
2895 		if (NEGATIVE(newheader)) {
2896 			/*
2897 			 * We're adding a negative cache entry.
2898 			 */
2899 			if (covers == dns_rdatatype_any) {
2900 				/*
2901 				 * If we're adding an negative cache entry
2902 				 * which covers all types (NXDOMAIN,
2903 				 * NODATA(QTYPE=ANY)),
2904 				 *
2905 				 * We make all other data ancient so that the
2906 				 * only rdataset that can be found at this
2907 				 * node is the negative cache entry.
2908 				 */
2909 				for (topheader = qpnode->data;
2910 				     topheader != NULL;
2911 				     topheader = topheader->next)
2912 				{
2913 					mark_ancient(topheader);
2914 				}
2915 				goto find_header;
2916 			}
2917 			/*
2918 			 * Otherwise look for any RRSIGs of the given
2919 			 * type so they can be marked ancient later.
2920 			 */
2921 			for (topheader = qpnode->data; topheader != NULL;
2922 			     topheader = topheader->next)
2923 			{
2924 				if (topheader->type == sigtype) {
2925 					sigheader = topheader;
2926 					break;
2927 				}
2928 			}
2929 			negtype = DNS_TYPEPAIR_VALUE(covers, 0);
2930 		} else {
2931 			/*
2932 			 * We're adding something that isn't a
2933 			 * negative cache entry.  Look for an extant
2934 			 * non-ancient NXDOMAIN/NODATA(QTYPE=ANY) negative
2935 			 * cache entry.  If we're adding an RRSIG, also
2936 			 * check for an extant non-ancient NODATA ncache
2937 			 * entry which covers the same type as the RRSIG.
2938 			 */
2939 			for (topheader = qpnode->data; topheader != NULL;
2940 			     topheader = topheader->next)
2941 			{
2942 				if ((topheader->type == RDATATYPE_NCACHEANY) ||
2943 				    (newheader->type == sigtype &&
2944 				     topheader->type ==
2945 					     DNS_TYPEPAIR_VALUE(0, covers)))
2946 				{
2947 					break;
2948 				}
2949 			}
2950 			if (topheader != NULL && EXISTS(topheader) &&
2951 			    ACTIVE(topheader, now))
2952 			{
2953 				/*
2954 				 * Found one.
2955 				 */
2956 				if (trust < topheader->trust) {
2957 					/*
2958 					 * The NXDOMAIN/NODATA(QTYPE=ANY)
2959 					 * is more trusted.
2960 					 */
2961 					dns_slabheader_destroy(&newheader);
2962 					if (addedrdataset != NULL) {
2963 						bindrdataset(
2964 							qpdb, qpnode, topheader,
2965 							now, nlocktype,
2966 							tlocktype,
2967 							addedrdataset
2968 								DNS__DB_FLARG_PASS);
2969 					}
2970 					return DNS_R_UNCHANGED;
2971 				}
2972 				/*
2973 				 * The new rdataset is better.  Expire the
2974 				 * ncache entry.
2975 				 */
2976 				mark_ancient(topheader);
2977 				topheader = NULL;
2978 				goto find_header;
2979 			}
2980 			negtype = DNS_TYPEPAIR_VALUE(0, rdtype);
2981 		}
2982 	}
2983 
2984 	for (topheader = qpnode->data; topheader != NULL;
2985 	     topheader = topheader->next)
2986 	{
2987 		if (ACTIVE(topheader, now)) {
2988 			++ntypes;
2989 			expireheader = topheader;
2990 		}
2991 		if (prio_header(topheader)) {
2992 			prioheader = topheader;
2993 		}
2994 
2995 		if (topheader->type == newheader->type ||
2996 		    topheader->type == negtype)
2997 		{
2998 			break;
2999 		}
3000 		topheader_prev = topheader;
3001 	}
3002 
3003 find_header:
3004 	/*
3005 	 * If header isn't NULL, we've found the right type.  There may be
3006 	 * IGNORE rdatasets between the top of the chain and the first real
3007 	 * data.  We skip over them.
3008 	 */
3009 	header = topheader;
3010 	while (header != NULL && IGNORE(header)) {
3011 		header = header->down;
3012 	}
3013 	if (header != NULL) {
3014 		header_nx = NONEXISTENT(header) ? true : false;
3015 
3016 		/*
3017 		 * Deleting an already non-existent rdataset has no effect.
3018 		 */
3019 		if (header_nx && newheader_nx) {
3020 			dns_slabheader_destroy(&newheader);
3021 			return DNS_R_UNCHANGED;
3022 		}
3023 
3024 		/*
3025 		 * Trying to add an rdataset with lower trust to a cache
3026 		 * DB has no effect, provided that the cache data isn't
3027 		 * stale. If the cache data is stale, new lower trust
3028 		 * data will supersede it below. Unclear what the best
3029 		 * policy is here.
3030 		 */
3031 		if (trust < header->trust && (ACTIVE(header, now) || header_nx))
3032 		{
3033 			dns_slabheader_destroy(&newheader);
3034 			if (addedrdataset != NULL) {
3035 				bindrdataset(qpdb, qpnode, header, now,
3036 					     nlocktype, tlocktype,
3037 					     addedrdataset DNS__DB_FLARG_PASS);
3038 			}
3039 			return DNS_R_UNCHANGED;
3040 		}
3041 
3042 		/*
3043 		 * Don't replace existing NS, A and AAAA RRsets in the
3044 		 * cache if they are already exist. This prevents named
3045 		 * being locked to old servers. Don't lower trust of
3046 		 * existing record if the update is forced. Nothing
3047 		 * special to be done w.r.t stale data; it gets replaced
3048 		 * normally further down.
3049 		 */
3050 		if (ACTIVE(header, now) && header->type == dns_rdatatype_ns &&
3051 		    !header_nx && !newheader_nx &&
3052 		    header->trust >= newheader->trust &&
3053 		    dns_rdataslab_equalx((unsigned char *)header,
3054 					 (unsigned char *)newheader,
3055 					 (unsigned int)(sizeof(*newheader)),
3056 					 qpdb->common.rdclass,
3057 					 (dns_rdatatype_t)header->type))
3058 		{
3059 			/*
3060 			 * Honour the new ttl if it is less than the
3061 			 * older one.
3062 			 */
3063 			if (header->ttl > newheader->ttl) {
3064 				setttl(header, newheader->ttl);
3065 			}
3066 			if (header->last_used != now) {
3067 				ISC_LIST_UNLINK(
3068 					qpdb->lru[HEADERNODE(header)->locknum],
3069 					header, link);
3070 				header->last_used = now;
3071 				ISC_LIST_PREPEND(
3072 					qpdb->lru[HEADERNODE(header)->locknum],
3073 					header, link);
3074 			}
3075 			if (header->noqname == NULL &&
3076 			    newheader->noqname != NULL)
3077 			{
3078 				header->noqname = newheader->noqname;
3079 				newheader->noqname = NULL;
3080 			}
3081 			if (header->closest == NULL &&
3082 			    newheader->closest != NULL)
3083 			{
3084 				header->closest = newheader->closest;
3085 				newheader->closest = NULL;
3086 			}
3087 			dns_slabheader_destroy(&newheader);
3088 			if (addedrdataset != NULL) {
3089 				bindrdataset(qpdb, qpnode, header, now,
3090 					     nlocktype, tlocktype,
3091 					     addedrdataset DNS__DB_FLARG_PASS);
3092 			}
3093 			return ISC_R_SUCCESS;
3094 		}
3095 
3096 		/*
3097 		 * If we have will be replacing a NS RRset force its TTL
3098 		 * to be no more than the current NS RRset's TTL.  This
3099 		 * ensures the delegations that are withdrawn are honoured.
3100 		 */
3101 		if (ACTIVE(header, now) && header->type == dns_rdatatype_ns &&
3102 		    !header_nx && !newheader_nx &&
3103 		    header->trust <= newheader->trust)
3104 		{
3105 			if (newheader->ttl > header->ttl) {
3106 				newheader->ttl = header->ttl;
3107 			}
3108 		}
3109 		if (ACTIVE(header, now) &&
3110 		    (options & DNS_DBADD_PREFETCH) == 0 &&
3111 		    (header->type == dns_rdatatype_a ||
3112 		     header->type == dns_rdatatype_aaaa ||
3113 		     header->type == dns_rdatatype_ds ||
3114 		     header->type == DNS_SIGTYPE(dns_rdatatype_ds)) &&
3115 		    !header_nx && !newheader_nx &&
3116 		    header->trust >= newheader->trust &&
3117 		    dns_rdataslab_equal((unsigned char *)header,
3118 					(unsigned char *)newheader,
3119 					(unsigned int)(sizeof(*newheader))))
3120 		{
3121 			/*
3122 			 * Honour the new ttl if it is less than the
3123 			 * older one.
3124 			 */
3125 			if (header->ttl > newheader->ttl) {
3126 				setttl(header, newheader->ttl);
3127 			}
3128 			if (header->last_used != now) {
3129 				ISC_LIST_UNLINK(
3130 					qpdb->lru[HEADERNODE(header)->locknum],
3131 					header, link);
3132 				header->last_used = now;
3133 				ISC_LIST_PREPEND(
3134 					qpdb->lru[HEADERNODE(header)->locknum],
3135 					header, link);
3136 			}
3137 			if (header->noqname == NULL &&
3138 			    newheader->noqname != NULL)
3139 			{
3140 				header->noqname = newheader->noqname;
3141 				newheader->noqname = NULL;
3142 			}
3143 			if (header->closest == NULL &&
3144 			    newheader->closest != NULL)
3145 			{
3146 				header->closest = newheader->closest;
3147 				newheader->closest = NULL;
3148 			}
3149 			dns_slabheader_destroy(&newheader);
3150 			if (addedrdataset != NULL) {
3151 				bindrdataset(qpdb, qpnode, header, now,
3152 					     nlocktype, tlocktype,
3153 					     addedrdataset DNS__DB_FLARG_PASS);
3154 			}
3155 			return ISC_R_SUCCESS;
3156 		}
3157 
3158 		if (loading) {
3159 			newheader->down = NULL;
3160 			idx = HEADERNODE(newheader)->locknum;
3161 			if (ZEROTTL(newheader)) {
3162 				newheader->last_used = qpdb->last_used + 1;
3163 				ISC_LIST_APPEND(qpdb->lru[idx], newheader,
3164 						link);
3165 			} else {
3166 				ISC_LIST_PREPEND(qpdb->lru[idx], newheader,
3167 						 link);
3168 			}
3169 			INSIST(qpdb->heaps != NULL);
3170 			isc_heap_insert(qpdb->heaps[idx], newheader);
3171 			newheader->heap = qpdb->heaps[idx];
3172 
3173 			/*
3174 			 * There are no other references to 'header' when
3175 			 * loading, so we MAY clean up 'header' now.
3176 			 * Since we don't generate changed records when
3177 			 * loading, we MUST clean up 'header' now.
3178 			 */
3179 			if (topheader_prev != NULL) {
3180 				topheader_prev->next = newheader;
3181 			} else {
3182 				qpnode->data = newheader;
3183 			}
3184 			newheader->next = topheader->next;
3185 			dns_slabheader_destroy(&header);
3186 		} else {
3187 			idx = HEADERNODE(newheader)->locknum;
3188 			INSIST(qpdb->heaps != NULL);
3189 			isc_heap_insert(qpdb->heaps[idx], newheader);
3190 			newheader->heap = qpdb->heaps[idx];
3191 			if (ZEROTTL(newheader)) {
3192 				newheader->last_used = qpdb->last_used + 1;
3193 				ISC_LIST_APPEND(qpdb->lru[idx], newheader,
3194 						link);
3195 			} else {
3196 				ISC_LIST_PREPEND(qpdb->lru[idx], newheader,
3197 						 link);
3198 			}
3199 			if (topheader_prev != NULL) {
3200 				topheader_prev->next = newheader;
3201 			} else {
3202 				qpnode->data = newheader;
3203 			}
3204 			newheader->next = topheader->next;
3205 			newheader->down = topheader;
3206 			topheader->next = newheader;
3207 			qpnode->dirty = 1;
3208 			mark_ancient(header);
3209 			if (sigheader != NULL) {
3210 				mark_ancient(sigheader);
3211 			}
3212 		}
3213 	} else {
3214 		/*
3215 		 * No non-IGNORED rdatasets of the given type exist at
3216 		 * this node.
3217 		 */
3218 
3219 		/*
3220 		 * If we're trying to delete the type, don't bother.
3221 		 */
3222 		if (newheader_nx) {
3223 			dns_slabheader_destroy(&newheader);
3224 			return DNS_R_UNCHANGED;
3225 		}
3226 
3227 		idx = HEADERNODE(newheader)->locknum;
3228 		isc_heap_insert(qpdb->heaps[idx], newheader);
3229 		newheader->heap = qpdb->heaps[idx];
3230 		if (ZEROTTL(newheader)) {
3231 			ISC_LIST_APPEND(qpdb->lru[idx], newheader, link);
3232 		} else {
3233 			ISC_LIST_PREPEND(qpdb->lru[idx], newheader, link);
3234 		}
3235 
3236 		if (topheader != NULL) {
3237 			/*
3238 			 * We have a list of rdatasets of the given type,
3239 			 * but they're all marked IGNORE.  We simply insert
3240 			 * the new rdataset at the head of the list.
3241 			 *
3242 			 * Ignored rdatasets cannot occur during loading, so
3243 			 * we INSIST on it.
3244 			 */
3245 			INSIST(!loading);
3246 			if (topheader_prev != NULL) {
3247 				topheader_prev->next = newheader;
3248 			} else {
3249 				qpnode->data = newheader;
3250 			}
3251 			newheader->next = topheader->next;
3252 			newheader->down = topheader;
3253 			topheader->next = newheader;
3254 			qpnode->dirty = 1;
3255 		} else {
3256 			/*
3257 			 * No rdatasets of the given type exist at the node.
3258 			 */
3259 			INSIST(newheader->down == NULL);
3260 
3261 			if (prio_header(newheader)) {
3262 				/* This is a priority type, prepend it */
3263 				newheader->next = qpnode->data;
3264 				qpnode->data = newheader;
3265 			} else if (prioheader != NULL) {
3266 				/* Append after the priority headers */
3267 				newheader->next = prioheader->next;
3268 				prioheader->next = newheader;
3269 			} else {
3270 				/* There were no priority headers */
3271 				newheader->next = qpnode->data;
3272 				qpnode->data = newheader;
3273 			}
3274 
3275 			if (overmaxtype(qpdb, ntypes)) {
3276 				if (expireheader == NULL) {
3277 					expireheader = newheader;
3278 				}
3279 				if (NEGATIVE(newheader) &&
3280 				    !prio_header(newheader))
3281 				{
3282 					/*
3283 					 * Add the new non-priority negative
3284 					 * header to the database only
3285 					 * temporarily.
3286 					 */
3287 					expireheader = newheader;
3288 				}
3289 
3290 				mark_ancient(expireheader);
3291 				/*
3292 				 * FIXME: In theory, we should mark the RRSIG
3293 				 * and the header at the same time, but there is
3294 				 * no direct link between those two header, so
3295 				 * we would have to check the whole list again.
3296 				 */
3297 			}
3298 		}
3299 	}
3300 
3301 	if (addedrdataset != NULL) {
3302 		bindrdataset(qpdb, qpnode, newheader, now, nlocktype, tlocktype,
3303 			     addedrdataset DNS__DB_FLARG_PASS);
3304 	}
3305 
3306 	return ISC_R_SUCCESS;
3307 }
3308 
3309 static isc_result_t
3310 addnoqname(isc_mem_t *mctx, dns_slabheader_t *newheader, uint32_t maxrrperset,
3311 	   dns_rdataset_t *rdataset) {
3312 	isc_result_t result;
3313 	dns_slabheader_proof_t *noqname = NULL;
3314 	dns_name_t name = DNS_NAME_INITEMPTY;
3315 	dns_rdataset_t neg = DNS_RDATASET_INIT, negsig = DNS_RDATASET_INIT;
3316 	isc_region_t r1, r2;
3317 
3318 	result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
3319 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
3320 
3321 	result = dns_rdataslab_fromrdataset(&neg, mctx, &r1, 0, maxrrperset);
3322 	if (result != ISC_R_SUCCESS) {
3323 		goto cleanup;
3324 	}
3325 
3326 	result = dns_rdataslab_fromrdataset(&negsig, mctx, &r2, 0, maxrrperset);
3327 	if (result != ISC_R_SUCCESS) {
3328 		goto cleanup;
3329 	}
3330 
3331 	noqname = isc_mem_get(mctx, sizeof(*noqname));
3332 	*noqname = (dns_slabheader_proof_t){
3333 		.neg = r1.base,
3334 		.negsig = r2.base,
3335 		.type = neg.type,
3336 		.name = DNS_NAME_INITEMPTY,
3337 	};
3338 	dns_name_dup(&name, mctx, &noqname->name);
3339 	newheader->noqname = noqname;
3340 
3341 cleanup:
3342 	dns_rdataset_disassociate(&neg);
3343 	dns_rdataset_disassociate(&negsig);
3344 
3345 	return result;
3346 }
3347 
3348 static isc_result_t
3349 addclosest(isc_mem_t *mctx, dns_slabheader_t *newheader, uint32_t maxrrperset,
3350 	   dns_rdataset_t *rdataset) {
3351 	isc_result_t result;
3352 	dns_slabheader_proof_t *closest = NULL;
3353 	dns_name_t name = DNS_NAME_INITEMPTY;
3354 	dns_rdataset_t neg = DNS_RDATASET_INIT, negsig = DNS_RDATASET_INIT;
3355 	isc_region_t r1, r2;
3356 
3357 	result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
3358 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
3359 
3360 	result = dns_rdataslab_fromrdataset(&neg, mctx, &r1, 0, maxrrperset);
3361 	if (result != ISC_R_SUCCESS) {
3362 		goto cleanup;
3363 	}
3364 
3365 	result = dns_rdataslab_fromrdataset(&negsig, mctx, &r2, 0, maxrrperset);
3366 	if (result != ISC_R_SUCCESS) {
3367 		goto cleanup;
3368 	}
3369 
3370 	closest = isc_mem_get(mctx, sizeof(*closest));
3371 	*closest = (dns_slabheader_proof_t){
3372 		.neg = r1.base,
3373 		.negsig = r2.base,
3374 		.name = DNS_NAME_INITEMPTY,
3375 		.type = neg.type,
3376 	};
3377 	dns_name_dup(&name, mctx, &closest->name);
3378 	newheader->closest = closest;
3379 
3380 cleanup:
3381 	dns_rdataset_disassociate(&neg);
3382 	dns_rdataset_disassociate(&negsig);
3383 	return result;
3384 }
3385 
3386 static void
3387 expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum,
3388 		   isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
3389 		   isc_stdtime_t now, bool cache_is_overmem DNS__DB_FLARG);
3390 
3391 static isc_result_t
3392 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
3393 	    isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
3394 	    dns_rdataset_t *addedrdataset DNS__DB_FLARG) {
3395 	qpcache_t *qpdb = (qpcache_t *)db;
3396 	qpcnode_t *qpnode = (qpcnode_t *)node;
3397 	isc_region_t region;
3398 	dns_slabheader_t *newheader = NULL;
3399 	isc_result_t result;
3400 	bool delegating = false;
3401 	bool newnsec;
3402 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
3403 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
3404 	bool cache_is_overmem = false;
3405 	dns_fixedname_t fixed;
3406 	dns_name_t *name = NULL;
3407 
3408 	REQUIRE(VALID_QPDB(qpdb));
3409 	REQUIRE(version == NULL);
3410 
3411 	if (now == 0) {
3412 		now = isc_stdtime_now();
3413 	}
3414 
3415 	result = dns_rdataslab_fromrdataset(rdataset, qpdb->common.mctx,
3416 					    &region, sizeof(dns_slabheader_t),
3417 					    qpdb->maxrrperset);
3418 	if (result != ISC_R_SUCCESS) {
3419 		if (result == DNS_R_TOOMANYRECORDS) {
3420 			dns__db_logtoomanyrecords((dns_db_t *)qpdb,
3421 						  &qpnode->name, rdataset->type,
3422 						  "adding", qpdb->maxrrperset);
3423 		}
3424 		return result;
3425 	}
3426 
3427 	name = dns_fixedname_initname(&fixed);
3428 	dns_name_copy(&qpnode->name, name);
3429 	dns_rdataset_getownercase(rdataset, name);
3430 
3431 	newheader = (dns_slabheader_t *)region.base;
3432 	*newheader = (dns_slabheader_t){
3433 		.type = DNS_TYPEPAIR_VALUE(rdataset->type, rdataset->covers),
3434 		.trust = rdataset->trust,
3435 		.last_used = now,
3436 		.node = qpnode,
3437 	};
3438 
3439 	dns_slabheader_reset(newheader, db, node);
3440 	setttl(newheader, rdataset->ttl + now);
3441 	if (rdataset->ttl == 0U) {
3442 		DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_ZEROTTL);
3443 	}
3444 	atomic_init(&newheader->count,
3445 		    atomic_fetch_add_relaxed(&init_count, 1));
3446 	if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0) {
3447 		DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_PREFETCH);
3448 	}
3449 	if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0) {
3450 		DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_NEGATIVE);
3451 	}
3452 	if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0) {
3453 		DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_NXDOMAIN);
3454 	}
3455 	if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0) {
3456 		DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_OPTOUT);
3457 	}
3458 	if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
3459 		result = addnoqname(qpdb->common.mctx, newheader,
3460 				    qpdb->maxrrperset, rdataset);
3461 		if (result != ISC_R_SUCCESS) {
3462 			dns_slabheader_destroy(&newheader);
3463 			return result;
3464 		}
3465 	}
3466 	if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
3467 		result = addclosest(qpdb->common.mctx, newheader,
3468 				    qpdb->maxrrperset, rdataset);
3469 		if (result != ISC_R_SUCCESS) {
3470 			dns_slabheader_destroy(&newheader);
3471 			return result;
3472 		}
3473 	}
3474 
3475 	/*
3476 	 * If we're adding a delegation type (which would be an NS or DNAME
3477 	 * for a zone, but only DNAME counts for a cache), we need to set
3478 	 * the callback bit on the node.
3479 	 */
3480 	if (rdataset->type == dns_rdatatype_dname) {
3481 		delegating = true;
3482 	}
3483 
3484 	/*
3485 	 * Add to the auxiliary NSEC tree if we're adding an NSEC record.
3486 	 */
3487 	TREE_RDLOCK(&qpdb->tree_lock, &tlocktype);
3488 	if (qpnode->nsec != DNS_DB_NSEC_HAS_NSEC &&
3489 	    rdataset->type == dns_rdatatype_nsec)
3490 	{
3491 		newnsec = true;
3492 	} else {
3493 		newnsec = false;
3494 	}
3495 	TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
3496 
3497 	/*
3498 	 * If we're adding a delegation type, adding to the auxiliary NSEC
3499 	 * tree, or the DB is a cache in an overmem state, hold an
3500 	 * exclusive lock on the tree.  In the latter case the lock does
3501 	 * not necessarily have to be acquired but it will help purge
3502 	 * ancient entries more effectively.
3503 	 */
3504 	if (isc_mem_isovermem(qpdb->common.mctx)) {
3505 		cache_is_overmem = true;
3506 	}
3507 	if (delegating || newnsec || cache_is_overmem) {
3508 		TREE_WRLOCK(&qpdb->tree_lock, &tlocktype);
3509 	}
3510 
3511 	if (cache_is_overmem) {
3512 		overmem(qpdb, newheader, &tlocktype DNS__DB_FLARG_PASS);
3513 	}
3514 
3515 	NODE_WRLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
3516 
3517 	if (qpdb->rrsetstats != NULL) {
3518 		DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_STATCOUNT);
3519 		update_rrsetstats(qpdb->rrsetstats, newheader->type,
3520 				  atomic_load_acquire(&newheader->attributes),
3521 				  true);
3522 	}
3523 
3524 	expire_ttl_headers(qpdb, qpnode->locknum, &nlocktype, &tlocktype, now,
3525 			   cache_is_overmem DNS__DB_FLARG_PASS);
3526 
3527 	/*
3528 	 * If we've been holding a write lock on the tree just for
3529 	 * cleaning, we can release it now.  However, we still need the
3530 	 * node lock.
3531 	 */
3532 	if (tlocktype == isc_rwlocktype_write && !delegating && !newnsec) {
3533 		TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
3534 	}
3535 
3536 	result = ISC_R_SUCCESS;
3537 	if (newnsec) {
3538 		qpcnode_t *nsecnode = NULL;
3539 
3540 		result = dns_qp_getname(qpdb->nsec, name, (void **)&nsecnode,
3541 					NULL);
3542 		if (result == ISC_R_SUCCESS) {
3543 			result = ISC_R_SUCCESS;
3544 		} else {
3545 			INSIST(nsecnode == NULL);
3546 			nsecnode = new_qpcnode(qpdb, name);
3547 			nsecnode->nsec = DNS_DB_NSEC_NSEC;
3548 			result = dns_qp_insert(qpdb->nsec, nsecnode, 0);
3549 			INSIST(result == ISC_R_SUCCESS);
3550 			qpcnode_detach(&nsecnode);
3551 		}
3552 		qpnode->nsec = DNS_DB_NSEC_HAS_NSEC;
3553 	}
3554 
3555 	if (result == ISC_R_SUCCESS) {
3556 		result = add(qpdb, qpnode, name, newheader, options, false,
3557 			     addedrdataset, now, nlocktype,
3558 			     tlocktype DNS__DB_FLARG_PASS);
3559 	}
3560 	if (result == ISC_R_SUCCESS && delegating) {
3561 		qpnode->delegating = 1;
3562 	}
3563 
3564 	NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
3565 
3566 	if (tlocktype != isc_rwlocktype_none) {
3567 		TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
3568 	}
3569 	INSIST(tlocktype == isc_rwlocktype_none);
3570 
3571 	return result;
3572 }
3573 
3574 static isc_result_t
3575 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
3576 	       dns_rdatatype_t type, dns_rdatatype_t covers DNS__DB_FLARG) {
3577 	qpcache_t *qpdb = (qpcache_t *)db;
3578 	qpcnode_t *qpnode = (qpcnode_t *)node;
3579 	isc_result_t result;
3580 	dns_slabheader_t *newheader = NULL;
3581 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
3582 
3583 	REQUIRE(VALID_QPDB(qpdb));
3584 	REQUIRE(version == NULL);
3585 
3586 	if (type == dns_rdatatype_any) {
3587 		return ISC_R_NOTIMPLEMENTED;
3588 	}
3589 	if (type == dns_rdatatype_rrsig && covers == 0) {
3590 		return ISC_R_NOTIMPLEMENTED;
3591 	}
3592 
3593 	newheader = dns_slabheader_new(db, node);
3594 	newheader->type = DNS_TYPEPAIR_VALUE(type, covers);
3595 	setttl(newheader, 0);
3596 	atomic_init(&newheader->attributes, DNS_SLABHEADERATTR_NONEXISTENT);
3597 
3598 	NODE_WRLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
3599 	result = add(qpdb, qpnode, NULL, newheader, DNS_DBADD_FORCE, false,
3600 		     NULL, 0, nlocktype,
3601 		     isc_rwlocktype_none DNS__DB_FLARG_PASS);
3602 	NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
3603 
3604 	return result;
3605 }
3606 
3607 static unsigned int
3608 nodecount(dns_db_t *db, dns_dbtree_t tree) {
3609 	qpcache_t *qpdb = (qpcache_t *)db;
3610 	dns_qp_memusage_t mu;
3611 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
3612 
3613 	REQUIRE(VALID_QPDB(qpdb));
3614 
3615 	TREE_RDLOCK(&qpdb->tree_lock, &tlocktype);
3616 	switch (tree) {
3617 	case dns_dbtree_main:
3618 		mu = dns_qp_memusage(qpdb->tree);
3619 		break;
3620 	case dns_dbtree_nsec:
3621 		mu = dns_qp_memusage(qpdb->nsec);
3622 		break;
3623 	default:
3624 		UNREACHABLE();
3625 	}
3626 	TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
3627 
3628 	return mu.leaves;
3629 }
3630 
3631 static isc_result_t
3632 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) {
3633 	qpcache_t *qpdb = (qpcache_t *)db;
3634 	qpcnode_t *onode = NULL;
3635 	isc_result_t result = ISC_R_SUCCESS;
3636 
3637 	REQUIRE(VALID_QPDB(qpdb));
3638 	REQUIRE(nodep != NULL && *nodep == NULL);
3639 
3640 	/* Note that the access to origin_node doesn't require a DB lock */
3641 	onode = (qpcnode_t *)qpdb->origin_node;
3642 	if (onode != NULL) {
3643 		newref(qpdb, onode, isc_rwlocktype_none,
3644 		       isc_rwlocktype_none DNS__DB_FLARG_PASS);
3645 		*nodep = qpdb->origin_node;
3646 	} else {
3647 		result = ISC_R_NOTFOUND;
3648 	}
3649 
3650 	return result;
3651 }
3652 
3653 static void
3654 locknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) {
3655 	qpcache_t *qpdb = (qpcache_t *)db;
3656 	qpcnode_t *qpnode = (qpcnode_t *)node;
3657 
3658 	RWLOCK(&qpdb->node_locks[qpnode->locknum].lock, type);
3659 }
3660 
3661 static void
3662 unlocknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) {
3663 	qpcache_t *qpdb = (qpcache_t *)db;
3664 	qpcnode_t *qpnode = (qpcnode_t *)node;
3665 
3666 	RWUNLOCK(&qpdb->node_locks[qpnode->locknum].lock, type);
3667 }
3668 
3669 isc_result_t
3670 dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
3671 		    dns_dbtype_t type, dns_rdataclass_t rdclass,
3672 		    unsigned int argc, char *argv[],
3673 		    void *driverarg ISC_ATTR_UNUSED, dns_db_t **dbp) {
3674 	qpcache_t *qpdb = NULL;
3675 	isc_mem_t *hmctx = mctx;
3676 	isc_loop_t *loop = isc_loop();
3677 	int i;
3678 
3679 	/* This database implementation only supports cache semantics */
3680 	REQUIRE(type == dns_dbtype_cache);
3681 	REQUIRE(loop != NULL);
3682 
3683 	qpdb = isc_mem_get(mctx, sizeof(*qpdb));
3684 	*qpdb = (qpcache_t){
3685 		.common.methods = &qpdb_cachemethods,
3686 		.common.origin = DNS_NAME_INITEMPTY,
3687 		.common.rdclass = rdclass,
3688 		.common.attributes = DNS_DBATTR_CACHE,
3689 		.loopmgr = isc_loop_getloopmgr(loop),
3690 	};
3691 
3692 	isc_refcount_init(&qpdb->common.references, 1);
3693 
3694 	/*
3695 	 * If argv[0] exists, it points to a memory context to use for heap
3696 	 */
3697 	if (argc != 0) {
3698 		hmctx = (isc_mem_t *)argv[0];
3699 	}
3700 
3701 	isc_rwlock_init(&qpdb->lock);
3702 	TREE_INITLOCK(&qpdb->tree_lock);
3703 
3704 	qpdb->node_lock_count = isc_loopmgr_nloops(qpdb->loopmgr);
3705 	qpdb->node_locks = isc_mem_cget(mctx, qpdb->node_lock_count,
3706 					sizeof(db_nodelock_t));
3707 
3708 	dns_rdatasetstats_create(mctx, &qpdb->rrsetstats);
3709 	qpdb->lru = isc_mem_cget(mctx, qpdb->node_lock_count,
3710 				 sizeof(dns_slabheaderlist_t));
3711 	for (i = 0; i < (int)qpdb->node_lock_count; i++) {
3712 		ISC_LIST_INIT(qpdb->lru[i]);
3713 	}
3714 
3715 	/*
3716 	 * Create the heaps.
3717 	 */
3718 	qpdb->heaps = isc_mem_cget(hmctx, qpdb->node_lock_count,
3719 				   sizeof(isc_heap_t *));
3720 	for (i = 0; i < (int)qpdb->node_lock_count; i++) {
3721 		isc_heap_create(hmctx, ttl_sooner, set_index, 0,
3722 				&qpdb->heaps[i]);
3723 	}
3724 
3725 	/*
3726 	 * Create deadnode lists.
3727 	 */
3728 	qpdb->deadnodes = isc_mem_cget(mctx, qpdb->node_lock_count,
3729 				       sizeof(qpdb->deadnodes[0]));
3730 	for (i = 0; i < (int)(qpdb->node_lock_count); i++) {
3731 		isc_queue_init(&qpdb->deadnodes[i]);
3732 	}
3733 
3734 	qpdb->active = qpdb->node_lock_count;
3735 
3736 	for (i = 0; i < (int)(qpdb->node_lock_count); i++) {
3737 		NODE_INITLOCK(&qpdb->node_locks[i].lock);
3738 		isc_refcount_init(&qpdb->node_locks[i].references, 0);
3739 		qpdb->node_locks[i].exiting = false;
3740 	}
3741 
3742 	/*
3743 	 * Attach to the mctx.  The database will persist so long as there
3744 	 * are references to it, and attaching to the mctx ensures that our
3745 	 * mctx won't disappear out from under us.
3746 	 */
3747 	isc_mem_attach(mctx, &qpdb->common.mctx);
3748 	isc_mem_attach(hmctx, &qpdb->hmctx);
3749 
3750 	/*
3751 	 * Make a copy of the origin name.
3752 	 */
3753 	dns_name_dupwithoffsets(origin, mctx, &qpdb->common.origin);
3754 
3755 	/*
3756 	 * Make the qp tries.
3757 	 */
3758 	dns_qp_create(mctx, &qpmethods, qpdb, &qpdb->tree);
3759 	dns_qp_create(mctx, &qpmethods, qpdb, &qpdb->nsec);
3760 
3761 	qpdb->common.magic = DNS_DB_MAGIC;
3762 	qpdb->common.impmagic = QPDB_MAGIC;
3763 
3764 	*dbp = (dns_db_t *)qpdb;
3765 
3766 	return ISC_R_SUCCESS;
3767 }
3768 
3769 /*
3770  * Rdataset Iterator Methods
3771  */
3772 
3773 static void
3774 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) {
3775 	qpc_rditer_t *iterator = NULL;
3776 
3777 	iterator = (qpc_rditer_t *)(*iteratorp);
3778 
3779 	dns__db_detachnode(iterator->common.db,
3780 			   &iterator->common.node DNS__DB_FLARG_PASS);
3781 	isc_mem_put(iterator->common.db->mctx, iterator, sizeof(*iterator));
3782 
3783 	*iteratorp = NULL;
3784 }
3785 
3786 static bool
3787 iterator_active(qpcache_t *qpdb, qpc_rditer_t *iterator,
3788 		dns_slabheader_t *header) {
3789 	dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, qpdb);
3790 
3791 	/*
3792 	 * Is this a "this rdataset doesn't exist" record?
3793 	 */
3794 	if (NONEXISTENT(header)) {
3795 		return false;
3796 	}
3797 
3798 	/*
3799 	 * If this header is still active then return it.
3800 	 */
3801 	if (ACTIVE(header, iterator->common.now)) {
3802 		return true;
3803 	}
3804 
3805 	/*
3806 	 * If we are not returning stale records or the rdataset is
3807 	 * too old don't return it.
3808 	 */
3809 	if (!STALEOK(iterator) || (iterator->common.now > stale_ttl)) {
3810 		return false;
3811 	}
3812 	return true;
3813 }
3814 
3815 static isc_result_t
3816 rdatasetiter_first(dns_rdatasetiter_t *it DNS__DB_FLARG) {
3817 	qpc_rditer_t *iterator = (qpc_rditer_t *)it;
3818 	qpcache_t *qpdb = (qpcache_t *)(iterator->common.db);
3819 	qpcnode_t *qpnode = iterator->common.node;
3820 	dns_slabheader_t *header = NULL, *top_next = NULL;
3821 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
3822 
3823 	NODE_RDLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
3824 
3825 	for (header = qpnode->data; header != NULL; header = top_next) {
3826 		top_next = header->next;
3827 		do {
3828 			if (EXPIREDOK(iterator)) {
3829 				if (!NONEXISTENT(header)) {
3830 					break;
3831 				}
3832 				header = header->down;
3833 			} else if (!IGNORE(header)) {
3834 				if (!iterator_active(qpdb, iterator, header)) {
3835 					header = NULL;
3836 				}
3837 				break;
3838 			} else {
3839 				header = header->down;
3840 			}
3841 		} while (header != NULL);
3842 		if (header != NULL) {
3843 			break;
3844 		}
3845 	}
3846 
3847 	NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
3848 
3849 	iterator->current = header;
3850 
3851 	if (header == NULL) {
3852 		return ISC_R_NOMORE;
3853 	}
3854 
3855 	return ISC_R_SUCCESS;
3856 }
3857 
3858 static isc_result_t
3859 rdatasetiter_next(dns_rdatasetiter_t *it DNS__DB_FLARG) {
3860 	qpc_rditer_t *iterator = (qpc_rditer_t *)it;
3861 	qpcache_t *qpdb = (qpcache_t *)(iterator->common.db);
3862 	qpcnode_t *qpnode = iterator->common.node;
3863 	dns_slabheader_t *header = NULL, *top_next = NULL;
3864 	dns_typepair_t type, negtype;
3865 	dns_rdatatype_t rdtype, covers;
3866 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
3867 	bool expiredok = EXPIREDOK(iterator);
3868 
3869 	header = iterator->current;
3870 	if (header == NULL) {
3871 		return ISC_R_NOMORE;
3872 	}
3873 
3874 	NODE_RDLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
3875 
3876 	type = header->type;
3877 	rdtype = DNS_TYPEPAIR_TYPE(header->type);
3878 	if (NEGATIVE(header)) {
3879 		covers = DNS_TYPEPAIR_COVERS(header->type);
3880 		negtype = DNS_TYPEPAIR_VALUE(covers, 0);
3881 	} else {
3882 		negtype = DNS_TYPEPAIR_VALUE(0, rdtype);
3883 	}
3884 
3885 	/*
3886 	 * Find the start of the header chain for the next type
3887 	 * by walking back up the list.
3888 	 */
3889 	top_next = header->next;
3890 	while (top_next != NULL &&
3891 	       (top_next->type == type || top_next->type == negtype))
3892 	{
3893 		top_next = top_next->next;
3894 	}
3895 	if (expiredok) {
3896 		/*
3897 		 * Keep walking down the list if possible or
3898 		 * start the next type.
3899 		 */
3900 		header = header->down != NULL ? header->down : top_next;
3901 	} else {
3902 		header = top_next;
3903 	}
3904 	for (; header != NULL; header = top_next) {
3905 		top_next = header->next;
3906 		do {
3907 			if (expiredok) {
3908 				if (!NONEXISTENT(header)) {
3909 					break;
3910 				}
3911 				header = header->down;
3912 			} else if (!IGNORE(header)) {
3913 				if (!iterator_active(qpdb, iterator, header)) {
3914 					header = NULL;
3915 				}
3916 				break;
3917 			} else {
3918 				header = header->down;
3919 			}
3920 		} while (header != NULL);
3921 		if (header != NULL) {
3922 			break;
3923 		}
3924 		/*
3925 		 * Find the start of the header chain for the next type
3926 		 * by walking back up the list.
3927 		 */
3928 		while (top_next != NULL &&
3929 		       (top_next->type == type || top_next->type == negtype))
3930 		{
3931 			top_next = top_next->next;
3932 		}
3933 	}
3934 
3935 	NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
3936 
3937 	iterator->current = header;
3938 
3939 	if (header == NULL) {
3940 		return ISC_R_NOMORE;
3941 	}
3942 
3943 	return ISC_R_SUCCESS;
3944 }
3945 
3946 static void
3947 rdatasetiter_current(dns_rdatasetiter_t *it,
3948 		     dns_rdataset_t *rdataset DNS__DB_FLARG) {
3949 	qpc_rditer_t *iterator = (qpc_rditer_t *)it;
3950 	qpcache_t *qpdb = (qpcache_t *)(iterator->common.db);
3951 	qpcnode_t *qpnode = iterator->common.node;
3952 	dns_slabheader_t *header = NULL;
3953 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
3954 
3955 	header = iterator->current;
3956 	REQUIRE(header != NULL);
3957 
3958 	NODE_RDLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
3959 
3960 	bindrdataset(qpdb, qpnode, header, iterator->common.now, nlocktype,
3961 		     isc_rwlocktype_none, rdataset DNS__DB_FLARG_PASS);
3962 
3963 	NODE_UNLOCK(&qpdb->node_locks[qpnode->locknum].lock, &nlocktype);
3964 }
3965 
3966 /*
3967  * Database Iterator Methods
3968  */
3969 
3970 static void
3971 reference_iter_node(qpc_dbit_t *qpdbiter DNS__DB_FLARG) {
3972 	qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db;
3973 	qpcnode_t *node = qpdbiter->node;
3974 
3975 	if (node == NULL) {
3976 		return;
3977 	}
3978 
3979 	INSIST(qpdbiter->tree_locked != isc_rwlocktype_none);
3980 	reactivate_node(qpdb, node, qpdbiter->tree_locked DNS__DB_FLARG_PASS);
3981 }
3982 
3983 static void
3984 dereference_iter_node(qpc_dbit_t *qpdbiter DNS__DB_FLARG) {
3985 	qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db;
3986 	qpcnode_t *node = qpdbiter->node;
3987 	isc_rwlock_t *lock = NULL;
3988 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
3989 	isc_rwlocktype_t tlocktype = qpdbiter->tree_locked;
3990 
3991 	if (node == NULL) {
3992 		return;
3993 	}
3994 
3995 	REQUIRE(tlocktype != isc_rwlocktype_write);
3996 
3997 	lock = &qpdb->node_locks[node->locknum].lock;
3998 	NODE_RDLOCK(lock, &nlocktype);
3999 	decref(qpdb, node, &nlocktype, &qpdbiter->tree_locked,
4000 	       false DNS__DB_FLARG_PASS);
4001 	NODE_UNLOCK(lock, &nlocktype);
4002 
4003 	INSIST(qpdbiter->tree_locked == tlocktype);
4004 
4005 	qpdbiter->node = NULL;
4006 }
4007 
4008 static void
4009 resume_iteration(qpc_dbit_t *qpdbiter, bool continuing) {
4010 	qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db;
4011 
4012 	REQUIRE(qpdbiter->paused);
4013 	REQUIRE(qpdbiter->tree_locked == isc_rwlocktype_none);
4014 
4015 	TREE_RDLOCK(&qpdb->tree_lock, &qpdbiter->tree_locked);
4016 
4017 	/*
4018 	 * If we're being called from dbiterator_next or _prev,
4019 	 * then we may need to reinitialize the iterator to the current
4020 	 * name. The tree could have changed while it was unlocked,
4021 	 * would make the iterator traversal inconsistent.
4022 	 *
4023 	 * As long as the iterator is holding a reference to
4024 	 * qpdbiter->node, the node won't be removed from the tree,
4025 	 * so the lookup should always succeed.
4026 	 */
4027 	if (continuing && qpdbiter->node != NULL) {
4028 		isc_result_t result;
4029 		result = dns_qp_lookup(qpdb->tree, qpdbiter->name, NULL,
4030 				       &qpdbiter->iter, NULL, NULL, NULL);
4031 		INSIST(result == ISC_R_SUCCESS);
4032 	}
4033 
4034 	qpdbiter->paused = false;
4035 }
4036 
4037 static void
4038 dbiterator_destroy(dns_dbiterator_t **iteratorp DNS__DB_FLARG) {
4039 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)(*iteratorp);
4040 	qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db;
4041 	dns_db_t *db = NULL;
4042 
4043 	if (qpdbiter->tree_locked == isc_rwlocktype_read) {
4044 		TREE_UNLOCK(&qpdb->tree_lock, &qpdbiter->tree_locked);
4045 	}
4046 	INSIST(qpdbiter->tree_locked == isc_rwlocktype_none);
4047 
4048 	dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
4049 
4050 	dns_db_attach(qpdbiter->common.db, &db);
4051 	dns_db_detach(&qpdbiter->common.db);
4052 
4053 	isc_mem_put(db->mctx, qpdbiter, sizeof(*qpdbiter));
4054 	dns_db_detach(&db);
4055 
4056 	*iteratorp = NULL;
4057 }
4058 
4059 static isc_result_t
4060 dbiterator_first(dns_dbiterator_t *iterator DNS__DB_FLARG) {
4061 	isc_result_t result;
4062 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
4063 	qpcache_t *qpdb = (qpcache_t *)iterator->db;
4064 
4065 	if (qpdbiter->result != ISC_R_SUCCESS &&
4066 	    qpdbiter->result != ISC_R_NOTFOUND &&
4067 	    qpdbiter->result != DNS_R_PARTIALMATCH &&
4068 	    qpdbiter->result != ISC_R_NOMORE)
4069 	{
4070 		return qpdbiter->result;
4071 	}
4072 
4073 	if (qpdbiter->paused) {
4074 		resume_iteration(qpdbiter, false);
4075 	}
4076 
4077 	dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
4078 
4079 	dns_qpiter_init(qpdb->tree, &qpdbiter->iter);
4080 	result = dns_qpiter_next(&qpdbiter->iter, NULL,
4081 				 (void **)&qpdbiter->node, NULL);
4082 
4083 	if (result == ISC_R_SUCCESS) {
4084 		dns_name_copy(&qpdbiter->node->name, qpdbiter->name);
4085 		reference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
4086 	} else {
4087 		INSIST(result == ISC_R_NOMORE); /* The tree is empty. */
4088 		qpdbiter->node = NULL;
4089 	}
4090 
4091 	qpdbiter->result = result;
4092 
4093 	if (result != ISC_R_SUCCESS) {
4094 		ENSURE(!qpdbiter->paused);
4095 	}
4096 
4097 	return result;
4098 }
4099 
4100 static isc_result_t
4101 dbiterator_last(dns_dbiterator_t *iterator DNS__DB_FLARG) {
4102 	isc_result_t result;
4103 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
4104 	qpcache_t *qpdb = (qpcache_t *)iterator->db;
4105 
4106 	if (qpdbiter->result != ISC_R_SUCCESS &&
4107 	    qpdbiter->result != ISC_R_NOTFOUND &&
4108 	    qpdbiter->result != DNS_R_PARTIALMATCH &&
4109 	    qpdbiter->result != ISC_R_NOMORE)
4110 	{
4111 		return qpdbiter->result;
4112 	}
4113 
4114 	if (qpdbiter->paused) {
4115 		resume_iteration(qpdbiter, false);
4116 	}
4117 
4118 	dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
4119 
4120 	dns_qpiter_init(qpdb->tree, &qpdbiter->iter);
4121 	result = dns_qpiter_prev(&qpdbiter->iter, NULL,
4122 				 (void **)&qpdbiter->node, NULL);
4123 
4124 	if (result == ISC_R_SUCCESS) {
4125 		dns_name_copy(&qpdbiter->node->name, qpdbiter->name);
4126 		reference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
4127 	} else {
4128 		INSIST(result == ISC_R_NOMORE); /* The tree is empty. */
4129 		qpdbiter->node = NULL;
4130 	}
4131 
4132 	qpdbiter->result = result;
4133 	return result;
4134 }
4135 
4136 static isc_result_t
4137 dbiterator_seek(dns_dbiterator_t *iterator,
4138 		const dns_name_t *name DNS__DB_FLARG) {
4139 	isc_result_t result;
4140 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
4141 	qpcache_t *qpdb = (qpcache_t *)iterator->db;
4142 
4143 	if (qpdbiter->result != ISC_R_SUCCESS &&
4144 	    qpdbiter->result != ISC_R_NOTFOUND &&
4145 	    qpdbiter->result != DNS_R_PARTIALMATCH &&
4146 	    qpdbiter->result != ISC_R_NOMORE)
4147 	{
4148 		return qpdbiter->result;
4149 	}
4150 
4151 	if (qpdbiter->paused) {
4152 		resume_iteration(qpdbiter, false);
4153 	}
4154 
4155 	dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
4156 
4157 	result = dns_qp_lookup(qpdb->tree, name, NULL, &qpdbiter->iter, NULL,
4158 			       (void **)&qpdbiter->node, NULL);
4159 
4160 	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
4161 		dns_name_copy(&qpdbiter->node->name, qpdbiter->name);
4162 		reference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
4163 	} else {
4164 		qpdbiter->node = NULL;
4165 	}
4166 
4167 	qpdbiter->result = (result == DNS_R_PARTIALMATCH) ? ISC_R_SUCCESS
4168 							  : result;
4169 	return result;
4170 }
4171 
4172 static isc_result_t
4173 dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG) {
4174 	isc_result_t result;
4175 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
4176 
4177 	REQUIRE(qpdbiter->node != NULL);
4178 
4179 	if (qpdbiter->result != ISC_R_SUCCESS) {
4180 		return qpdbiter->result;
4181 	}
4182 
4183 	if (qpdbiter->paused) {
4184 		resume_iteration(qpdbiter, true);
4185 	}
4186 
4187 	dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
4188 
4189 	result = dns_qpiter_prev(&qpdbiter->iter, NULL,
4190 				 (void **)&qpdbiter->node, NULL);
4191 
4192 	if (result == ISC_R_SUCCESS) {
4193 		dns_name_copy(&qpdbiter->node->name, qpdbiter->name);
4194 		reference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
4195 	} else {
4196 		INSIST(result == ISC_R_NOMORE);
4197 		qpdbiter->node = NULL;
4198 	}
4199 
4200 	qpdbiter->result = result;
4201 	return result;
4202 }
4203 
4204 static isc_result_t
4205 dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG) {
4206 	isc_result_t result;
4207 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
4208 
4209 	REQUIRE(qpdbiter->node != NULL);
4210 
4211 	if (qpdbiter->result != ISC_R_SUCCESS) {
4212 		return qpdbiter->result;
4213 	}
4214 
4215 	if (qpdbiter->paused) {
4216 		resume_iteration(qpdbiter, true);
4217 	}
4218 
4219 	dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
4220 
4221 	result = dns_qpiter_next(&qpdbiter->iter, NULL,
4222 				 (void **)&qpdbiter->node, NULL);
4223 
4224 	if (result == ISC_R_SUCCESS) {
4225 		dns_name_copy(&qpdbiter->node->name, qpdbiter->name);
4226 		reference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
4227 	} else {
4228 		INSIST(result == ISC_R_NOMORE);
4229 		qpdbiter->node = NULL;
4230 	}
4231 
4232 	qpdbiter->result = result;
4233 	return result;
4234 }
4235 
4236 static isc_result_t
4237 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
4238 		   dns_name_t *name DNS__DB_FLARG) {
4239 	qpcache_t *qpdb = (qpcache_t *)iterator->db;
4240 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
4241 	qpcnode_t *node = qpdbiter->node;
4242 
4243 	REQUIRE(qpdbiter->result == ISC_R_SUCCESS);
4244 	REQUIRE(node != NULL);
4245 
4246 	if (qpdbiter->paused) {
4247 		resume_iteration(qpdbiter, false);
4248 	}
4249 
4250 	if (name != NULL) {
4251 		dns_name_copy(&node->name, name);
4252 	}
4253 
4254 	newref(qpdb, node, isc_rwlocktype_none,
4255 	       qpdbiter->tree_locked DNS__DB_FLARG_PASS);
4256 
4257 	*nodep = qpdbiter->node;
4258 	return ISC_R_SUCCESS;
4259 }
4260 
4261 static isc_result_t
4262 dbiterator_pause(dns_dbiterator_t *iterator) {
4263 	qpcache_t *qpdb = (qpcache_t *)iterator->db;
4264 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
4265 
4266 	if (qpdbiter->result != ISC_R_SUCCESS &&
4267 	    qpdbiter->result != ISC_R_NOTFOUND &&
4268 	    qpdbiter->result != DNS_R_PARTIALMATCH &&
4269 	    qpdbiter->result != ISC_R_NOMORE)
4270 	{
4271 		return qpdbiter->result;
4272 	}
4273 
4274 	if (qpdbiter->paused) {
4275 		return ISC_R_SUCCESS;
4276 	}
4277 
4278 	qpdbiter->paused = true;
4279 
4280 	if (qpdbiter->tree_locked == isc_rwlocktype_read) {
4281 		TREE_UNLOCK(&qpdb->tree_lock, &qpdbiter->tree_locked);
4282 	}
4283 	INSIST(qpdbiter->tree_locked == isc_rwlocktype_none);
4284 
4285 	return ISC_R_SUCCESS;
4286 }
4287 
4288 static isc_result_t
4289 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
4290 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
4291 
4292 	if (qpdbiter->result != ISC_R_SUCCESS) {
4293 		return qpdbiter->result;
4294 	}
4295 
4296 	dns_name_copy(dns_rootname, name);
4297 	return ISC_R_SUCCESS;
4298 }
4299 
4300 static void
4301 deletedata(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *node ISC_ATTR_UNUSED,
4302 	   void *data) {
4303 	dns_slabheader_t *header = data;
4304 	qpcache_t *qpdb = (qpcache_t *)header->db;
4305 
4306 	if (header->heap != NULL && header->heap_index != 0) {
4307 		isc_heap_delete(header->heap, header->heap_index);
4308 	}
4309 
4310 	update_rrsetstats(qpdb->rrsetstats, header->type,
4311 			  atomic_load_acquire(&header->attributes), false);
4312 
4313 	if (ISC_LINK_LINKED(header, link)) {
4314 		int idx = HEADERNODE(header)->locknum;
4315 		ISC_LIST_UNLINK(qpdb->lru[idx], header, link);
4316 	}
4317 
4318 	if (header->noqname != NULL) {
4319 		dns_slabheader_freeproof(db->mctx, &header->noqname);
4320 	}
4321 	if (header->closest != NULL) {
4322 		dns_slabheader_freeproof(db->mctx, &header->closest);
4323 	}
4324 }
4325 
4326 /*
4327  * Caller must be holding the node write lock.
4328  */
4329 static void
4330 expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum,
4331 		   isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
4332 		   isc_stdtime_t now, bool cache_is_overmem DNS__DB_FLARG) {
4333 	isc_heap_t *heap = qpdb->heaps[locknum];
4334 
4335 	for (size_t i = 0; i < DNS_QPDB_EXPIRE_TTL_COUNT; i++) {
4336 		dns_slabheader_t *header = isc_heap_element(heap, 1);
4337 
4338 		if (header == NULL) {
4339 			/* No headers left on this TTL heap; exit cleaning */
4340 			return;
4341 		}
4342 
4343 		dns_ttl_t ttl = header->ttl;
4344 
4345 		if (!cache_is_overmem) {
4346 			/* Only account for stale TTL if cache is not overmem */
4347 			ttl += STALE_TTL(header, qpdb);
4348 		}
4349 
4350 		if (ttl >= now - QPDB_VIRTUAL) {
4351 			/*
4352 			 * The header at the top of this TTL heap is not yet
4353 			 * eligible for expiry, so none of the other headers on
4354 			 * the same heap can be eligible for expiry, either;
4355 			 * exit cleaning.
4356 			 */
4357 			return;
4358 		}
4359 
4360 		expireheader(header, nlocktypep, tlocktypep,
4361 			     dns_expire_ttl DNS__DB_FLARG_PASS);
4362 	}
4363 }
4364 
4365 static void
4366 setmaxrrperset(dns_db_t *db, uint32_t value) {
4367 	qpcache_t *qpdb = (qpcache_t *)db;
4368 
4369 	REQUIRE(VALID_QPDB(qpdb));
4370 
4371 	qpdb->maxrrperset = value;
4372 }
4373 
4374 static void
4375 setmaxtypepername(dns_db_t *db, uint32_t value) {
4376 	qpcache_t *qpdb = (qpcache_t *)db;
4377 
4378 	REQUIRE(VALID_QPDB(qpdb));
4379 
4380 	qpdb->maxtypepername = value;
4381 }
4382 
4383 static dns_dbmethods_t qpdb_cachemethods = {
4384 	.destroy = qpdb_destroy,
4385 	.findnode = findnode,
4386 	.find = find,
4387 	.findzonecut = findzonecut,
4388 	.attachnode = attachnode,
4389 	.detachnode = detachnode,
4390 	.createiterator = createiterator,
4391 	.findrdataset = findrdataset,
4392 	.allrdatasets = allrdatasets,
4393 	.addrdataset = addrdataset,
4394 	.deleterdataset = deleterdataset,
4395 	.nodecount = nodecount,
4396 	.getoriginnode = getoriginnode,
4397 	.getrrsetstats = getrrsetstats,
4398 	.setcachestats = setcachestats,
4399 	.setservestalettl = setservestalettl,
4400 	.getservestalettl = getservestalettl,
4401 	.setservestalerefresh = setservestalerefresh,
4402 	.getservestalerefresh = getservestalerefresh,
4403 	.locknode = locknode,
4404 	.unlocknode = unlocknode,
4405 	.expiredata = expiredata,
4406 	.deletedata = deletedata,
4407 	.setmaxrrperset = setmaxrrperset,
4408 	.setmaxtypepername = setmaxtypepername,
4409 };
4410 
4411 static void
4412 qpcnode_destroy(qpcnode_t *data) {
4413 	dns_slabheader_t *current = NULL, *next = NULL;
4414 
4415 	for (current = data->data; current != NULL; current = next) {
4416 		dns_slabheader_t *down = current->down, *down_next = NULL;
4417 
4418 		next = current->next;
4419 
4420 		for (down = current->down; down != NULL; down = down_next) {
4421 			down_next = down->down;
4422 			dns_slabheader_destroy(&down);
4423 		}
4424 
4425 		dns_slabheader_destroy(&current);
4426 	}
4427 
4428 	dns_name_free(&data->name, data->mctx);
4429 	isc_mem_putanddetach(&data->mctx, data, sizeof(qpcnode_t));
4430 }
4431 
4432 #ifdef DNS_DB_NODETRACE
4433 ISC_REFCOUNT_STATIC_TRACE_IMPL(qpcnode, qpcnode_destroy);
4434 #else
4435 ISC_REFCOUNT_STATIC_IMPL(qpcnode, qpcnode_destroy);
4436 #endif
4437