1 /* $NetBSD: rbtdb.c,v 1.1 2024/02/18 20:57:33 christos Exp $ */
2
3 /*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * SPDX-License-Identifier: MPL-2.0
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 *
12 * See the COPYRIGHT file distributed with this work for additional
13 * information regarding copyright ownership.
14 */
15
16 /*! \file */
17
18 #include <ctype.h>
19 #include <inttypes.h>
20 #include <stdbool.h>
21
22 #include <isc/atomic.h>
23 #include <isc/crc64.h>
24 #include <isc/event.h>
25 #include <isc/file.h>
26 #include <isc/hash.h>
27 #include <isc/heap.h>
28 #include <isc/hex.h>
29 #include <isc/mem.h>
30 #include <isc/mutex.h>
31 #include <isc/once.h>
32 #include <isc/platform.h>
33 #include <isc/print.h>
34 #include <isc/random.h>
35 #include <isc/refcount.h>
36 #include <isc/rwlock.h>
37 #include <isc/serial.h>
38 #include <isc/socket.h>
39 #include <isc/stdio.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/callbacks.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/time.h>
64 #include <dns/version.h>
65 #include <dns/view.h>
66 #include <dns/zone.h>
67 #include <dns/zonekey.h>
68
69 #ifndef WIN32
70 #include <sys/mman.h>
71 #else /* ifndef WIN32 */
72 #define PROT_READ 0x01
73 #define PROT_WRITE 0x02
74 #define MAP_PRIVATE 0x0002
75 #define MAP_FAILED ((void *)-1)
76 #endif /* ifndef WIN32 */
77
78 #include "rbtdb.h"
79
80 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
81
82 #define CHECK(op) \
83 do { \
84 result = (op); \
85 if (result != ISC_R_SUCCESS) \
86 goto failure; \
87 } while (0)
88
89 /*
90 * This is the map file header for RBTDB images. It is populated, and then
91 * written, as the LAST thing done to the file. Writing this last (with
92 * zeros in the header area initially) will ensure that the header is only
93 * valid when the RBTDB image is also valid.
94 */
95 typedef struct rbtdb_file_header rbtdb_file_header_t;
96
97 /* Header length, always the same size regardless of structure size */
98 #define RBTDB_HEADER_LENGTH 1024
99
100 struct rbtdb_file_header {
101 char version1[32];
102 uint32_t ptrsize;
103 unsigned int bigendian : 1;
104 uint64_t tree;
105 uint64_t nsec;
106 uint64_t nsec3;
107
108 char version2[32]; /* repeated; must match version1 */
109 };
110
111 /*%
112 * Note that "impmagic" is not the first four bytes of the struct, so
113 * ISC_MAGIC_VALID cannot be used.
114 */
115 #define VALID_RBTDB(rbtdb) \
116 ((rbtdb) != NULL && (rbtdb)->common.impmagic == RBTDB_MAGIC)
117
118 typedef uint32_t rbtdb_serial_t;
119 typedef uint32_t rbtdb_rdatatype_t;
120
121 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type)&0xFFFF))
122 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
123 #define RBTDB_RDATATYPE_VALUE(base, ext) \
124 ((rbtdb_rdatatype_t)(((uint32_t)ext) << 16) | \
125 (((uint32_t)base) & 0xffff))
126
127 #define RBTDB_RDATATYPE_SIGNSEC \
128 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
129 #define RBTDB_RDATATYPE_SIGNSEC3 \
130 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
131 #define RBTDB_RDATATYPE_SIGNS \
132 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
133 #define RBTDB_RDATATYPE_SIGCNAME \
134 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
135 #define RBTDB_RDATATYPE_SIGDNAME \
136 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
137 #define RBTDB_RDATATYPE_SIGDS \
138 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds)
139 #define RBTDB_RDATATYPE_SIGSOA \
140 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_soa)
141 #define RBTDB_RDATATYPE_NCACHEANY RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
142
143 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
144 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
145 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
146 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
147
148 /*
149 * Since node locking is sensitive to both performance and memory footprint,
150 * we need some trick here. If we have both high-performance rwlock and
151 * high performance and small-memory reference counters, we use rwlock for
152 * node lock and isc_refcount for node references. In this case, we don't have
153 * to protect the access to the counters by locks.
154 * Otherwise, we simply use ordinary mutex lock for node locking, and use
155 * simple integers as reference counters which is protected by the lock.
156 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
157 * NODE_UNLOCK. In some other cases, however, we need to protect reference
158 * counters first and then protect other parts of a node as read-only data.
159 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
160 * provided for these special cases. When we can use the efficient backend
161 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
162 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
163 * section including the access to the reference counter.
164 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
165 * section is also protected by NODE_STRONGLOCK().
166 */
167 typedef isc_rwlock_t nodelock_t;
168
169 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
170 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
171 #define NODE_LOCK(l, t) RWLOCK((l), (t))
172 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
173 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
174 #define NODE_DOWNGRADE(l) isc_rwlock_downgrade(l)
175
176 /*%
177 * Whether to rate-limit updating the LRU to avoid possible thread contention.
178 * Updating LRU requires write locking, so we don't do it every time the
179 * record is touched - only after some time passes.
180 */
181 #ifndef DNS_RBTDB_LIMITLRUUPDATE
182 #define DNS_RBTDB_LIMITLRUUPDATE 1
183 #endif
184
185 /*% Time after which we update LRU for glue records, 5 minutes */
186 #define DNS_RBTDB_LRUUPDATE_GLUE 300
187 /*% Time after which we update LRU for all other records, 10 minutes */
188 #define DNS_RBTDB_LRUUPDATE_REGULAR 600
189
190 /*
191 * Allow clients with a virtual time of up to 5 minutes in the past to see
192 * records that would have otherwise have expired.
193 */
194 #define RBTDB_VIRTUAL 300
195
196 struct noqname {
197 dns_name_t name;
198 void *neg;
199 void *negsig;
200 dns_rdatatype_t type;
201 };
202
203 typedef struct rdatasetheader {
204 /*%
205 * Locked by the owning node's lock.
206 */
207 rbtdb_serial_t serial;
208 dns_ttl_t rdh_ttl;
209 rbtdb_rdatatype_t type;
210 atomic_uint_least16_t attributes;
211 dns_trust_t trust;
212 atomic_uint_fast32_t last_refresh_fail_ts;
213 struct noqname *noqname;
214 struct noqname *closest;
215 unsigned int is_mmapped : 1;
216 unsigned int next_is_relative : 1;
217 unsigned int node_is_relative : 1;
218 unsigned int resign_lsb : 1;
219 /*%<
220 * We don't use the LIST macros, because the LIST structure has
221 * both head and tail pointers, and is doubly linked.
222 */
223
224 struct rdatasetheader *next;
225 /*%<
226 * If this is the top header for an rdataset, 'next' points
227 * to the top header for the next rdataset (i.e., the next type).
228 * Otherwise, it points up to the header whose down pointer points
229 * at this header.
230 */
231
232 struct rdatasetheader *down;
233 /*%<
234 * Points to the header for the next older version of
235 * this rdataset.
236 */
237
238 atomic_uint_fast32_t count;
239 /*%<
240 * Monotonously increased every time this rdataset is bound so that
241 * it is used as the base of the starting point in DNS responses
242 * when the "cyclic" rrset-order is required.
243 */
244
245 dns_rbtnode_t *node;
246 isc_stdtime_t last_used;
247 ISC_LINK(struct rdatasetheader) link;
248
249 unsigned int heap_index;
250 /*%<
251 * Used for TTL-based cache cleaning.
252 */
253 isc_stdtime_t resign;
254 /*%<
255 * Case vector. If the bit is set then the corresponding
256 * character in the owner name needs to be AND'd with 0x20,
257 * rendering that character upper case.
258 */
259 unsigned char upper[32];
260 } rdatasetheader_t;
261
262 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
263 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
264
265 #define RDATASET_ATTR_NONEXISTENT 0x0001
266 /*%< May be potentially served as stale data. */
267 #define RDATASET_ATTR_STALE 0x0002
268 #define RDATASET_ATTR_IGNORE 0x0004
269 #define RDATASET_ATTR_RETAIN 0x0008
270 #define RDATASET_ATTR_NXDOMAIN 0x0010
271 #define RDATASET_ATTR_RESIGN 0x0020
272 #define RDATASET_ATTR_STATCOUNT 0x0040
273 #define RDATASET_ATTR_OPTOUT 0x0080
274 #define RDATASET_ATTR_NEGATIVE 0x0100
275 #define RDATASET_ATTR_PREFETCH 0x0200
276 #define RDATASET_ATTR_CASESET 0x0400
277 #define RDATASET_ATTR_ZEROTTL 0x0800
278 #define RDATASET_ATTR_CASEFULLYLOWER 0x1000
279 /*%< Ancient - awaiting cleanup. */
280 #define RDATASET_ATTR_ANCIENT 0x2000
281 #define RDATASET_ATTR_STALE_WINDOW 0x4000
282
283 /*
284 * XXX
285 * When the cache will pre-expire data (due to memory low or other
286 * situations) before the rdataset's TTL has expired, it MUST
287 * respect the RETAIN bit and not expire the data until its TTL is
288 * expired.
289 */
290
291 #undef IGNORE /* WIN32 winbase.h defines this. */
292
293 #define EXISTS(header) \
294 ((atomic_load_acquire(&(header)->attributes) & \
295 RDATASET_ATTR_NONEXISTENT) == 0)
296 #define NONEXISTENT(header) \
297 ((atomic_load_acquire(&(header)->attributes) & \
298 RDATASET_ATTR_NONEXISTENT) != 0)
299 #define IGNORE(header) \
300 ((atomic_load_acquire(&(header)->attributes) & \
301 RDATASET_ATTR_IGNORE) != 0)
302 #define RETAIN(header) \
303 ((atomic_load_acquire(&(header)->attributes) & \
304 RDATASET_ATTR_RETAIN) != 0)
305 #define NXDOMAIN(header) \
306 ((atomic_load_acquire(&(header)->attributes) & \
307 RDATASET_ATTR_NXDOMAIN) != 0)
308 #define STALE(header) \
309 ((atomic_load_acquire(&(header)->attributes) & RDATASET_ATTR_STALE) != \
310 0)
311 #define STALE_WINDOW(header) \
312 ((atomic_load_acquire(&(header)->attributes) & \
313 RDATASET_ATTR_STALE_WINDOW) != 0)
314 #define RESIGN(header) \
315 ((atomic_load_acquire(&(header)->attributes) & \
316 RDATASET_ATTR_RESIGN) != 0)
317 #define OPTOUT(header) \
318 ((atomic_load_acquire(&(header)->attributes) & \
319 RDATASET_ATTR_OPTOUT) != 0)
320 #define NEGATIVE(header) \
321 ((atomic_load_acquire(&(header)->attributes) & \
322 RDATASET_ATTR_NEGATIVE) != 0)
323 #define PREFETCH(header) \
324 ((atomic_load_acquire(&(header)->attributes) & \
325 RDATASET_ATTR_PREFETCH) != 0)
326 #define CASESET(header) \
327 ((atomic_load_acquire(&(header)->attributes) & \
328 RDATASET_ATTR_CASESET) != 0)
329 #define ZEROTTL(header) \
330 ((atomic_load_acquire(&(header)->attributes) & \
331 RDATASET_ATTR_ZEROTTL) != 0)
332 #define CASEFULLYLOWER(header) \
333 ((atomic_load_acquire(&(header)->attributes) & \
334 RDATASET_ATTR_CASEFULLYLOWER) != 0)
335 #define ANCIENT(header) \
336 ((atomic_load_acquire(&(header)->attributes) & \
337 RDATASET_ATTR_ANCIENT) != 0)
338 #define STATCOUNT(header) \
339 ((atomic_load_acquire(&(header)->attributes) & \
340 RDATASET_ATTR_STATCOUNT) != 0)
341
342 #define RDATASET_ATTR_GET(header, attribute) \
343 (atomic_load_acquire(&(header)->attributes) & attribute)
344 #define RDATASET_ATTR_SET(header, attribute) \
345 atomic_fetch_or_release(&(header)->attributes, attribute)
346 #define RDATASET_ATTR_CLR(header, attribute) \
347 atomic_fetch_and_release(&(header)->attributes, ~(attribute))
348
349 #define ACTIVE(header, now) \
350 (((header)->rdh_ttl > (now)) || \
351 ((header)->rdh_ttl == (now) && ZEROTTL(header)))
352
353 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
354 #define RBTDB_GLUE_TABLE_INIT_BITS 2U
355 #define RBTDB_GLUE_TABLE_MAX_BITS 32U
356 #define RBTDB_GLUE_TABLE_OVERCOMMIT 3
357
358 #define GOLDEN_RATIO_32 0x61C88647
359 #define HASHSIZE(bits) (UINT64_C(1) << (bits))
360
361 static uint32_t
hash_32(uint32_t val,unsigned int bits)362 hash_32(uint32_t val, unsigned int bits) {
363 REQUIRE(bits <= RBTDB_GLUE_TABLE_MAX_BITS);
364 /* High bits are more random. */
365 return (val * GOLDEN_RATIO_32 >> (32 - bits));
366 }
367
368 #define EXPIREDOK(rbtiterator) \
369 (((rbtiterator)->common.options & DNS_DB_EXPIREDOK) != 0)
370
371 #define STALEOK(rbtiterator) \
372 (((rbtiterator)->common.options & DNS_DB_STALEOK) != 0)
373
374 /*%
375 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
376 * There is a tradeoff issue about configuring this value: if this is too
377 * small, it may cause heavier contention between threads; if this is too large,
378 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
379 * The default value should work well for most environments, but this can
380 * also be configurable at compilation time via the
381 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
382 * 1 due to the assumption of overmem_purge().
383 */
384 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
385 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
386 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
387 #else /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */
388 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
389 #endif /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */
390 #else /* ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
391 #define DEFAULT_CACHE_NODE_LOCK_COUNT 17
392 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
393
394 typedef struct {
395 nodelock_t lock;
396 /* Protected in the refcount routines. */
397 isc_refcount_t references;
398 /* Locked by lock. */
399 bool exiting;
400 } rbtdb_nodelock_t;
401
402 typedef struct rbtdb_changed {
403 dns_rbtnode_t *node;
404 bool dirty;
405 ISC_LINK(struct rbtdb_changed) link;
406 } rbtdb_changed_t;
407
408 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
409
410 typedef enum { dns_db_insecure, dns_db_partial, dns_db_secure } dns_db_secure_t;
411
412 typedef struct dns_rbtdb dns_rbtdb_t;
413
414 /* Reason for expiring a record from cache */
415 typedef enum { expire_lru, expire_ttl, expire_flush } expire_t;
416
417 typedef struct rbtdb_glue rbtdb_glue_t;
418
419 typedef struct rbtdb_glue_table_node {
420 struct rbtdb_glue_table_node *next;
421 dns_rbtnode_t *node;
422 rbtdb_glue_t *glue_list;
423 } rbtdb_glue_table_node_t;
424
425 typedef enum {
426 rdataset_ttl_fresh,
427 rdataset_ttl_stale,
428 rdataset_ttl_ancient
429 } rdataset_ttl_t;
430
431 typedef struct rbtdb_version {
432 /* Not locked */
433 rbtdb_serial_t serial;
434 dns_rbtdb_t *rbtdb;
435 /*
436 * Protected in the refcount routines.
437 * XXXJT: should we change the lock policy based on the refcount
438 * performance?
439 */
440 isc_refcount_t references;
441 /* Locked by database lock. */
442 bool writer;
443 bool commit_ok;
444 rbtdb_changedlist_t changed_list;
445 rdatasetheaderlist_t resigned_list;
446 ISC_LINK(struct rbtdb_version) link;
447 dns_db_secure_t secure;
448 bool havensec3;
449 /* NSEC3 parameters */
450 dns_hash_t hash;
451 uint8_t flags;
452 uint16_t iterations;
453 uint8_t salt_length;
454 unsigned char salt[DNS_NSEC3_SALTSIZE];
455
456 /*
457 * records and xfrsize are covered by rwlock.
458 */
459 isc_rwlock_t rwlock;
460 uint64_t records;
461 uint64_t xfrsize;
462
463 isc_rwlock_t glue_rwlock;
464 size_t glue_table_bits;
465 size_t glue_table_nodecount;
466 rbtdb_glue_table_node_t **glue_table;
467 } rbtdb_version_t;
468
469 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
470
471 struct dns_rbtdb {
472 /* Unlocked. */
473 dns_db_t common;
474 /* Locks the data in this struct */
475 isc_rwlock_t lock;
476 /* Locks the tree structure (prevents nodes appearing/disappearing) */
477 isc_rwlock_t tree_lock;
478 /* Locks for individual tree nodes */
479 unsigned int node_lock_count;
480 rbtdb_nodelock_t *node_locks;
481 dns_rbtnode_t *origin_node;
482 dns_rbtnode_t *nsec3_origin_node;
483 dns_stats_t *rrsetstats; /* cache DB only */
484 isc_stats_t *cachestats; /* cache DB only */
485 isc_stats_t *gluecachestats; /* zone DB only */
486 /* Locked by lock. */
487 unsigned int active;
488 isc_refcount_t references;
489 unsigned int attributes;
490 rbtdb_serial_t current_serial;
491 rbtdb_serial_t least_serial;
492 rbtdb_serial_t next_serial;
493 rbtdb_version_t *current_version;
494 rbtdb_version_t *future_version;
495 rbtdb_versionlist_t open_versions;
496 isc_task_t *task;
497 dns_dbnode_t *soanode;
498 dns_dbnode_t *nsnode;
499
500 /*
501 * Maximum length of time to keep using a stale answer past its
502 * normal TTL expiry.
503 */
504 dns_ttl_t serve_stale_ttl;
505
506 /*
507 * The time after a failed lookup, where stale answers from cache
508 * may be used directly in a DNS response without attempting a
509 * new iterative lookup.
510 */
511 uint32_t serve_stale_refresh;
512
513 /*
514 * This is a linked list used to implement the LRU cache. There will
515 * be node_lock_count linked lists here. Nodes in bucket 1 will be
516 * placed on the linked list rdatasets[1].
517 */
518 rdatasetheaderlist_t *rdatasets;
519
520 /*%
521 * Temporary storage for stale cache nodes and dynamically deleted
522 * nodes that await being cleaned up.
523 */
524 rbtnodelist_t *deadnodes;
525
526 /* List of nodes from which recursive tree pruning can be started from.
527 * Locked by tree_lock. */
528 rbtnodelist_t prunenodes;
529
530 /*
531 * Heaps. These are used for TTL based expiry in a cache,
532 * or for zone resigning in a zone DB. hmctx is the memory
533 * context to use for the heap (which differs from the main
534 * database memory context in the case of a cache).
535 */
536 isc_mem_t *hmctx;
537 isc_heap_t **heaps;
538
539 /*
540 * Base values for the mmap() code.
541 */
542 void *mmap_location;
543 size_t mmap_size;
544
545 /* Locked by tree_lock. */
546 dns_rbt_t *tree;
547 dns_rbt_t *nsec;
548 dns_rbt_t *nsec3;
549
550 /* Unlocked */
551 unsigned int quantum;
552 };
553
554 #define RBTDB_ATTR_LOADED 0x01
555 #define RBTDB_ATTR_LOADING 0x02
556
557 #define KEEPSTALE(rbtdb) ((rbtdb)->serve_stale_ttl > 0)
558
559 /*%
560 * Search Context
561 */
562 typedef struct {
563 dns_rbtdb_t *rbtdb;
564 rbtdb_version_t *rbtversion;
565 rbtdb_serial_t serial;
566 unsigned int options;
567 dns_rbtnodechain_t chain;
568 bool copy_name;
569 bool need_cleanup;
570 bool wild;
571 dns_rbtnode_t *zonecut;
572 rdatasetheader_t *zonecut_rdataset;
573 rdatasetheader_t *zonecut_sigrdataset;
574 dns_fixedname_t zonecut_name;
575 isc_stdtime_t now;
576 } rbtdb_search_t;
577
578 /*%
579 * Load Context
580 */
581 typedef struct {
582 dns_rbtdb_t *rbtdb;
583 isc_stdtime_t now;
584 } rbtdb_load_t;
585
586 static void
587 delete_callback(void *data, void *arg);
588 static void
589 rdataset_disassociate(dns_rdataset_t *rdataset);
590 static isc_result_t
591 rdataset_first(dns_rdataset_t *rdataset);
592 static isc_result_t
593 rdataset_next(dns_rdataset_t *rdataset);
594 static void
595 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
596 static void
597 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
598 static unsigned int
599 rdataset_count(dns_rdataset_t *rdataset);
600 static isc_result_t
601 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
602 dns_rdataset_t *neg, dns_rdataset_t *negsig);
603 static isc_result_t
604 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
605 dns_rdataset_t *neg, dns_rdataset_t *negsig);
606 static bool
607 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now);
608 static void
609 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now);
610 static void
611 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked,
612 expire_t reason);
613 static void
614 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize,
615 bool tree_locked);
616 static void
617 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader);
618 static void
619 resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
620 rdatasetheader_t *header);
621 static void
622 prune_tree(isc_task_t *task, isc_event_t *event);
623 static void
624 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
625 static void
626 rdataset_expire(dns_rdataset_t *rdataset);
627 static void
628 rdataset_clearprefetch(dns_rdataset_t *rdataset);
629 static void
630 rdataset_setownercase(dns_rdataset_t *rdataset, const dns_name_t *name);
631 static void
632 rdataset_getownercase(const dns_rdataset_t *rdataset, dns_name_t *name);
633 static isc_result_t
634 rdataset_addglue(dns_rdataset_t *rdataset, dns_dbversion_t *version,
635 dns_message_t *msg);
636 static void
637 free_gluetable(rbtdb_version_t *version);
638 static isc_result_t
639 nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name);
640
641 static dns_rdatasetmethods_t rdataset_methods = { rdataset_disassociate,
642 rdataset_first,
643 rdataset_next,
644 rdataset_current,
645 rdataset_clone,
646 rdataset_count,
647 NULL, /* addnoqname */
648 rdataset_getnoqname,
649 NULL, /* addclosest */
650 rdataset_getclosest,
651 rdataset_settrust,
652 rdataset_expire,
653 rdataset_clearprefetch,
654 rdataset_setownercase,
655 rdataset_getownercase,
656 rdataset_addglue };
657
658 static dns_rdatasetmethods_t slab_methods = {
659 rdataset_disassociate,
660 rdataset_first,
661 rdataset_next,
662 rdataset_current,
663 rdataset_clone,
664 rdataset_count,
665 NULL, /* addnoqname */
666 NULL, /* getnoqname */
667 NULL, /* addclosest */
668 NULL, /* getclosest */
669 NULL, /* settrust */
670 NULL, /* expire */
671 NULL, /* clearprefetch */
672 NULL, /* setownercase */
673 NULL, /* getownercase */
674 NULL /* addglue */
675 };
676
677 static void
678 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
679 static isc_result_t
680 rdatasetiter_first(dns_rdatasetiter_t *iterator);
681 static isc_result_t
682 rdatasetiter_next(dns_rdatasetiter_t *iterator);
683 static void
684 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset);
685
686 static dns_rdatasetitermethods_t rdatasetiter_methods = {
687 rdatasetiter_destroy, rdatasetiter_first, rdatasetiter_next,
688 rdatasetiter_current
689 };
690
691 typedef struct rbtdb_rdatasetiter {
692 dns_rdatasetiter_t common;
693 rdatasetheader_t *current;
694 } rbtdb_rdatasetiter_t;
695
696 /*
697 * Note that these iterators, unless created with either DNS_DB_NSEC3ONLY or
698 * DNS_DB_NONSEC3, will transparently move between the last node of the
699 * "regular" RBT ("chain" field) and the root node of the NSEC3 RBT
700 * ("nsec3chain" field) of the database in question, as if the latter was a
701 * successor to the former in lexical order. The "current" field always holds
702 * the address of either "chain" or "nsec3chain", depending on which RBT is
703 * being traversed at given time.
704 */
705 static void
706 dbiterator_destroy(dns_dbiterator_t **iteratorp);
707 static isc_result_t
708 dbiterator_first(dns_dbiterator_t *iterator);
709 static isc_result_t
710 dbiterator_last(dns_dbiterator_t *iterator);
711 static isc_result_t
712 dbiterator_seek(dns_dbiterator_t *iterator, const dns_name_t *name);
713 static isc_result_t
714 dbiterator_prev(dns_dbiterator_t *iterator);
715 static isc_result_t
716 dbiterator_next(dns_dbiterator_t *iterator);
717 static isc_result_t
718 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
719 dns_name_t *name);
720 static isc_result_t
721 dbiterator_pause(dns_dbiterator_t *iterator);
722 static isc_result_t
723 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name);
724
725 static dns_dbiteratormethods_t dbiterator_methods = {
726 dbiterator_destroy, dbiterator_first, dbiterator_last,
727 dbiterator_seek, dbiterator_prev, dbiterator_next,
728 dbiterator_current, dbiterator_pause, dbiterator_origin
729 };
730
731 #define DELETION_BATCH_MAX 64
732
733 /*
734 * If 'paused' is true, then the tree lock is not being held.
735 */
736 typedef struct rbtdb_dbiterator {
737 dns_dbiterator_t common;
738 bool paused;
739 bool new_origin;
740 isc_rwlocktype_t tree_locked;
741 isc_result_t result;
742 dns_fixedname_t name;
743 dns_fixedname_t origin;
744 dns_rbtnodechain_t chain;
745 dns_rbtnodechain_t nsec3chain;
746 dns_rbtnodechain_t *current;
747 dns_rbtnode_t *node;
748 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
749 int delcnt;
750 bool nsec3only;
751 bool nonsec3;
752 } rbtdb_dbiterator_t;
753
754 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
755 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
756
757 static void
758 free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event);
759 static void
760 overmem(dns_db_t *db, bool over);
761 static void
762 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
763 static void
764 setownercase(rdatasetheader_t *header, const dns_name_t *name);
765
766 static bool
767 match_header_version(rbtdb_file_header_t *header);
768
769 /* Pad to 32 bytes */
770 static char FILE_VERSION[32] = "\0";
771
772 /*%
773 * 'init_count' is used to initialize 'newheader->count' which inturn
774 * is used to determine where in the cycle rrset-order cyclic starts.
775 * We don't lock this as we don't care about simultaneous updates.
776 *
777 * Note:
778 * Both init_count and header->count can be UINT32_MAX.
779 * The count on the returned rdataset however can't be as
780 * that indicates that the database does not implement cyclic
781 * processing.
782 */
783 static atomic_uint_fast32_t init_count = 0;
784
785 /*
786 * Locking
787 *
788 * If a routine is going to lock more than one lock in this module, then
789 * the locking must be done in the following order:
790 *
791 * Tree Lock
792 *
793 * Node Lock (Only one from the set may be locked at one time by
794 * any caller)
795 *
796 * Database Lock
797 *
798 * Failure to follow this hierarchy can result in deadlock.
799 */
800
801 /*
802 * Deleting Nodes
803 *
804 * For zone databases the node for the origin of the zone MUST NOT be deleted.
805 */
806
807 /*
808 * Debugging routines
809 */
810 #ifdef DEBUG
811 static void
hexdump(const char * desc,unsigned char * data,size_t size)812 hexdump(const char *desc, unsigned char *data, size_t size) {
813 char hexdump[BUFSIZ * 2 + 1];
814 isc_buffer_t b;
815 isc_region_t r;
816 isc_result_t result;
817 size_t bytes;
818
819 fprintf(stderr, "%s: ", desc);
820 do {
821 isc_buffer_init(&b, hexdump, sizeof(hexdump));
822 r.base = data;
823 r.length = bytes = (size > BUFSIZ) ? BUFSIZ : size;
824 result = isc_hex_totext(&r, 0, "", &b);
825 RUNTIME_CHECK(result == ISC_R_SUCCESS);
826 isc_buffer_putuint8(&b, 0);
827 fprintf(stderr, "%s", hexdump);
828 data += bytes;
829 size -= bytes;
830 } while (size > 0);
831 fprintf(stderr, "\n");
832 }
833 #endif /* ifdef DEBUG */
834
835 /* Fixed RRSet helper macros */
836
837 #define DNS_RDATASET_LENGTH 2;
838
839 #if DNS_RDATASET_FIXED
840 #define DNS_RDATASET_ORDER 2
841 #define DNS_RDATASET_COUNT (count * 4)
842 #else /* !DNS_RDATASET_FIXED */
843 #define DNS_RDATASET_ORDER 0
844 #define DNS_RDATASET_COUNT 0
845 #endif /* DNS_RDATASET_FIXED */
846
847 /*
848 * DB Routines
849 */
850
851 static void
attach(dns_db_t * source,dns_db_t ** targetp)852 attach(dns_db_t *source, dns_db_t **targetp) {
853 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
854
855 REQUIRE(VALID_RBTDB(rbtdb));
856
857 isc_refcount_increment(&rbtdb->references);
858
859 *targetp = source;
860 }
861
862 static void
free_rbtdb_callback(isc_task_t * task,isc_event_t * event)863 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
864 dns_rbtdb_t *rbtdb = event->ev_arg;
865
866 UNUSED(task);
867
868 free_rbtdb(rbtdb, true, event);
869 }
870
871 static void
update_cachestats(dns_rbtdb_t * rbtdb,isc_result_t result)872 update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) {
873 INSIST(IS_CACHE(rbtdb));
874
875 if (rbtdb->cachestats == NULL) {
876 return;
877 }
878
879 switch (result) {
880 case ISC_R_SUCCESS:
881 case DNS_R_CNAME:
882 case DNS_R_DNAME:
883 case DNS_R_DELEGATION:
884 case DNS_R_NCACHENXDOMAIN:
885 case DNS_R_NCACHENXRRSET:
886 isc_stats_increment(rbtdb->cachestats,
887 dns_cachestatscounter_hits);
888 break;
889 default:
890 isc_stats_increment(rbtdb->cachestats,
891 dns_cachestatscounter_misses);
892 }
893 }
894
895 static bool
do_stats(rdatasetheader_t * header)896 do_stats(rdatasetheader_t *header) {
897 return (EXISTS(header) && STATCOUNT(header));
898 }
899
900 static void
update_rrsetstats(dns_rbtdb_t * rbtdb,const rbtdb_rdatatype_t htype,const uint_least16_t hattributes,const bool increment)901 update_rrsetstats(dns_rbtdb_t *rbtdb, const rbtdb_rdatatype_t htype,
902 const uint_least16_t hattributes, const bool increment) {
903 dns_rdatastatstype_t statattributes = 0;
904 dns_rdatastatstype_t base = 0;
905 dns_rdatastatstype_t type;
906 rdatasetheader_t *header = &(rdatasetheader_t){
907 .type = htype,
908 .attributes = hattributes,
909 };
910
911 if (!do_stats(header)) {
912 return;
913 }
914
915 /* At the moment we count statistics only for cache DB */
916 INSIST(IS_CACHE(rbtdb));
917
918 if (NEGATIVE(header)) {
919 if (NXDOMAIN(header)) {
920 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
921 } else {
922 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
923 base = RBTDB_RDATATYPE_EXT(header->type);
924 }
925 } else {
926 base = RBTDB_RDATATYPE_BASE(header->type);
927 }
928
929 if (STALE(header)) {
930 statattributes |= DNS_RDATASTATSTYPE_ATTR_STALE;
931 }
932 if (ANCIENT(header)) {
933 statattributes |= DNS_RDATASTATSTYPE_ATTR_ANCIENT;
934 }
935
936 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
937 if (increment) {
938 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
939 } else {
940 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
941 }
942 }
943
944 static void
set_ttl(dns_rbtdb_t * rbtdb,rdatasetheader_t * header,dns_ttl_t newttl)945 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
946 int idx;
947 isc_heap_t *heap;
948 dns_ttl_t oldttl;
949
950 if (!IS_CACHE(rbtdb)) {
951 header->rdh_ttl = newttl;
952 return;
953 }
954
955 oldttl = header->rdh_ttl;
956 header->rdh_ttl = newttl;
957
958 /*
959 * It's possible the rbtdb is not a cache. If this is the case,
960 * we will not have a heap, and we move on. If we do, though,
961 * we might need to adjust things.
962 */
963 if (header->heap_index == 0 || newttl == oldttl) {
964 return;
965 }
966 idx = header->node->locknum;
967 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL) {
968 return;
969 }
970 heap = rbtdb->heaps[idx];
971
972 if (newttl < oldttl) {
973 isc_heap_increased(heap, header->heap_index);
974 } else {
975 isc_heap_decreased(heap, header->heap_index);
976 }
977 }
978
979 /*%
980 * These functions allow the heap code to rank the priority of each
981 * element. It returns true if v1 happens "sooner" than v2.
982 */
983 static bool
ttl_sooner(void * v1,void * v2)984 ttl_sooner(void *v1, void *v2) {
985 rdatasetheader_t *h1 = v1;
986 rdatasetheader_t *h2 = v2;
987
988 return (h1->rdh_ttl < h2->rdh_ttl);
989 }
990
991 /*%
992 * Return which RRset should be resigned sooner. If the RRsets have the
993 * same signing time, prefer the other RRset over the SOA RRset.
994 */
995 static bool
resign_sooner(void * v1,void * v2)996 resign_sooner(void *v1, void *v2) {
997 rdatasetheader_t *h1 = v1;
998 rdatasetheader_t *h2 = v2;
999
1000 return (h1->resign < h2->resign ||
1001 (h1->resign == h2->resign && h1->resign_lsb < h2->resign_lsb) ||
1002 (h1->resign == h2->resign && h1->resign_lsb == h2->resign_lsb &&
1003 h2->type == RBTDB_RDATATYPE_SIGSOA));
1004 }
1005
1006 /*%
1007 * This function sets the heap index into the header.
1008 */
1009 static void
set_index(void * what,unsigned int idx)1010 set_index(void *what, unsigned int idx) {
1011 rdatasetheader_t *h = what;
1012
1013 h->heap_index = idx;
1014 }
1015
1016 /*%
1017 * Work out how many nodes can be deleted in the time between two
1018 * requests to the nameserver. Smooth the resulting number and use it
1019 * as a estimate for the number of nodes to be deleted in the next
1020 * iteration.
1021 */
1022 static unsigned int
adjust_quantum(unsigned int old,isc_time_t * start)1023 adjust_quantum(unsigned int old, isc_time_t *start) {
1024 unsigned int pps = dns_pps; /* packets per second */
1025 unsigned int interval;
1026 uint64_t usecs;
1027 isc_time_t end;
1028 unsigned int nodes;
1029
1030 if (pps < 100) {
1031 pps = 100;
1032 }
1033 isc_time_now(&end);
1034
1035 interval = 1000000 / pps; /* interval in usec */
1036 if (interval == 0) {
1037 interval = 1;
1038 }
1039 usecs = isc_time_microdiff(&end, start);
1040 if (usecs == 0) {
1041 /*
1042 * We were unable to measure the amount of time taken.
1043 * Double the nodes deleted next time.
1044 */
1045 old *= 2;
1046 if (old > 1000) {
1047 old = 1000;
1048 }
1049 return (old);
1050 }
1051 nodes = old * interval;
1052 nodes /= (unsigned int)usecs;
1053 if (nodes == 0) {
1054 nodes = 1;
1055 } else if (nodes > 1000) {
1056 nodes = 1000;
1057 }
1058
1059 /* Smooth */
1060 nodes = (nodes + old * 3) / 4;
1061
1062 if (nodes != old) {
1063 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1064 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1065 "adjust_quantum: old=%d, new=%d", old, nodes);
1066 }
1067
1068 return (nodes);
1069 }
1070
1071 static void
free_rbtdb(dns_rbtdb_t * rbtdb,bool log,isc_event_t * event)1072 free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) {
1073 unsigned int i;
1074 isc_result_t result;
1075 char buf[DNS_NAME_FORMATSIZE];
1076 dns_rbtnode_t *node = NULL;
1077 dns_rbt_t **treep;
1078 isc_time_t start;
1079
1080 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) {
1081 overmem((dns_db_t *)rbtdb, (bool)-1);
1082 }
1083
1084 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
1085 REQUIRE(rbtdb->future_version == NULL);
1086
1087 if (rbtdb->current_version != NULL) {
1088 isc_refcount_decrementz(&rbtdb->current_version->references);
1089 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
1090 isc_rwlock_destroy(&rbtdb->current_version->glue_rwlock);
1091 isc_refcount_destroy(&rbtdb->current_version->references);
1092 isc_rwlock_destroy(&rbtdb->current_version->rwlock);
1093 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
1094 sizeof(rbtdb_version_t));
1095 }
1096
1097 /*
1098 * We assume the number of remaining dead nodes is reasonably small;
1099 * the overhead of unlinking all nodes here should be negligible.
1100 */
1101 for (i = 0; i < rbtdb->node_lock_count; i++) {
1102 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
1103 while (node != NULL) {
1104 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
1105 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
1106 }
1107 }
1108
1109 node = ISC_LIST_HEAD(rbtdb->prunenodes);
1110 while (node != NULL) {
1111 ISC_LIST_UNLINK(rbtdb->prunenodes, node, prunelink);
1112 node = ISC_LIST_HEAD(rbtdb->prunenodes);
1113 }
1114
1115 if (event == NULL) {
1116 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
1117 }
1118
1119 for (;;) {
1120 /*
1121 * pick the next tree to (start to) destroy
1122 */
1123 treep = &rbtdb->tree;
1124 if (*treep == NULL) {
1125 treep = &rbtdb->nsec;
1126 if (*treep == NULL) {
1127 treep = &rbtdb->nsec3;
1128 /*
1129 * we're finished after clear cutting
1130 */
1131 if (*treep == NULL) {
1132 break;
1133 }
1134 }
1135 }
1136
1137 isc_time_now(&start);
1138 result = dns_rbt_destroy2(treep, rbtdb->quantum);
1139 if (result == ISC_R_QUOTA) {
1140 INSIST(rbtdb->task != NULL);
1141 if (rbtdb->quantum != 0) {
1142 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
1143 &start);
1144 }
1145 if (event == NULL) {
1146 event = isc_event_allocate(
1147 rbtdb->common.mctx, NULL,
1148 DNS_EVENT_FREESTORAGE,
1149 free_rbtdb_callback, rbtdb,
1150 sizeof(isc_event_t));
1151 }
1152 isc_task_send(rbtdb->task, &event);
1153 return;
1154 }
1155 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
1156 }
1157
1158 if (event != NULL) {
1159 isc_event_free(&event);
1160 }
1161 if (log) {
1162 if (dns_name_dynamic(&rbtdb->common.origin)) {
1163 dns_name_format(&rbtdb->common.origin, buf,
1164 sizeof(buf));
1165 } else {
1166 strlcpy(buf, "<UNKNOWN>", sizeof(buf));
1167 }
1168 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1169 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1170 "done free_rbtdb(%s)", buf);
1171 }
1172 if (dns_name_dynamic(&rbtdb->common.origin)) {
1173 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
1174 }
1175 for (i = 0; i < rbtdb->node_lock_count; i++) {
1176 isc_refcount_destroy(&rbtdb->node_locks[i].references);
1177 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
1178 }
1179
1180 /*
1181 * Clean up LRU / re-signing order lists.
1182 */
1183 if (rbtdb->rdatasets != NULL) {
1184 for (i = 0; i < rbtdb->node_lock_count; i++) {
1185 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
1186 }
1187 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
1188 rbtdb->node_lock_count *
1189 sizeof(rdatasetheaderlist_t));
1190 }
1191 /*
1192 * Clean up dead node buckets.
1193 */
1194 if (rbtdb->deadnodes != NULL) {
1195 for (i = 0; i < rbtdb->node_lock_count; i++) {
1196 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
1197 }
1198 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
1199 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
1200 }
1201 /*
1202 * Clean up heap objects.
1203 */
1204 if (rbtdb->heaps != NULL) {
1205 for (i = 0; i < rbtdb->node_lock_count; i++) {
1206 isc_heap_destroy(&rbtdb->heaps[i]);
1207 }
1208 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
1209 rbtdb->node_lock_count * sizeof(isc_heap_t *));
1210 }
1211
1212 if (rbtdb->rrsetstats != NULL) {
1213 dns_stats_detach(&rbtdb->rrsetstats);
1214 }
1215 if (rbtdb->cachestats != NULL) {
1216 isc_stats_detach(&rbtdb->cachestats);
1217 }
1218 if (rbtdb->gluecachestats != NULL) {
1219 isc_stats_detach(&rbtdb->gluecachestats);
1220 }
1221
1222 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
1223 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
1224 isc_rwlock_destroy(&rbtdb->tree_lock);
1225 isc_refcount_destroy(&rbtdb->references);
1226 if (rbtdb->task != NULL) {
1227 isc_task_detach(&rbtdb->task);
1228 }
1229
1230 RBTDB_DESTROYLOCK(&rbtdb->lock);
1231 rbtdb->common.magic = 0;
1232 rbtdb->common.impmagic = 0;
1233 isc_mem_detach(&rbtdb->hmctx);
1234
1235 if (rbtdb->mmap_location != NULL) {
1236 isc_file_munmap(rbtdb->mmap_location, (size_t)rbtdb->mmap_size);
1237 }
1238
1239 INSIST(ISC_LIST_EMPTY(rbtdb->common.update_listeners));
1240
1241 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
1242 }
1243
1244 static void
maybe_free_rbtdb(dns_rbtdb_t * rbtdb)1245 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
1246 bool want_free = false;
1247 unsigned int i;
1248 unsigned int inactive = 0;
1249
1250 /* XXX check for open versions here */
1251
1252 if (rbtdb->soanode != NULL) {
1253 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
1254 }
1255 if (rbtdb->nsnode != NULL) {
1256 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1257 }
1258
1259 /*
1260 * The current version's glue table needs to be freed early
1261 * so the nodes are dereferenced before we check the active
1262 * node count below.
1263 */
1264 if (rbtdb->current_version != NULL) {
1265 free_gluetable(rbtdb->current_version);
1266 }
1267
1268 /*
1269 * Even though there are no external direct references, there still
1270 * may be nodes in use.
1271 */
1272 for (i = 0; i < rbtdb->node_lock_count; i++) {
1273 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1274 rbtdb->node_locks[i].exiting = true;
1275 if (isc_refcount_current(&rbtdb->node_locks[i].references) == 0)
1276 {
1277 inactive++;
1278 }
1279 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1280 }
1281
1282 if (inactive != 0) {
1283 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1284 rbtdb->active -= inactive;
1285 if (rbtdb->active == 0) {
1286 want_free = true;
1287 }
1288 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1289 if (want_free) {
1290 char buf[DNS_NAME_FORMATSIZE];
1291 if (dns_name_dynamic(&rbtdb->common.origin)) {
1292 dns_name_format(&rbtdb->common.origin, buf,
1293 sizeof(buf));
1294 } else {
1295 strlcpy(buf, "<UNKNOWN>", sizeof(buf));
1296 }
1297 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1298 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1299 "calling free_rbtdb(%s)", buf);
1300 free_rbtdb(rbtdb, true, NULL);
1301 }
1302 }
1303 }
1304
1305 static void
detach(dns_db_t ** dbp)1306 detach(dns_db_t **dbp) {
1307 REQUIRE(dbp != NULL && VALID_RBTDB((dns_rbtdb_t *)(*dbp)));
1308 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1309 *dbp = NULL;
1310
1311 if (isc_refcount_decrement(&rbtdb->references) == 1) {
1312 maybe_free_rbtdb(rbtdb);
1313 }
1314 }
1315
1316 static void
currentversion(dns_db_t * db,dns_dbversion_t ** versionp)1317 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1318 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1319 rbtdb_version_t *version;
1320
1321 REQUIRE(VALID_RBTDB(rbtdb));
1322
1323 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1324 version = rbtdb->current_version;
1325 isc_refcount_increment(&version->references);
1326 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1327
1328 *versionp = (dns_dbversion_t *)version;
1329 }
1330
1331 static rbtdb_version_t *
allocate_version(isc_mem_t * mctx,rbtdb_serial_t serial,unsigned int references,bool writer)1332 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1333 unsigned int references, bool writer) {
1334 rbtdb_version_t *version;
1335 size_t size;
1336
1337 version = isc_mem_get(mctx, sizeof(*version));
1338 version->serial = serial;
1339
1340 isc_refcount_init(&version->references, references);
1341 isc_rwlock_init(&version->glue_rwlock, 0, 0);
1342
1343 version->glue_table_bits = RBTDB_GLUE_TABLE_INIT_BITS;
1344 version->glue_table_nodecount = 0U;
1345
1346 size = HASHSIZE(version->glue_table_bits) *
1347 sizeof(version->glue_table[0]);
1348 version->glue_table = isc_mem_get(mctx, size);
1349 memset(version->glue_table, 0, size);
1350
1351 version->writer = writer;
1352 version->commit_ok = false;
1353 ISC_LIST_INIT(version->changed_list);
1354 ISC_LIST_INIT(version->resigned_list);
1355 ISC_LINK_INIT(version, link);
1356
1357 return (version);
1358 }
1359
1360 static isc_result_t
newversion(dns_db_t * db,dns_dbversion_t ** versionp)1361 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1362 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1363 rbtdb_version_t *version;
1364
1365 REQUIRE(VALID_RBTDB(rbtdb));
1366 REQUIRE(versionp != NULL && *versionp == NULL);
1367 REQUIRE(rbtdb->future_version == NULL);
1368
1369 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1370 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1371 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1372 true);
1373 version->rbtdb = rbtdb;
1374 version->commit_ok = true;
1375 version->secure = rbtdb->current_version->secure;
1376 version->havensec3 = rbtdb->current_version->havensec3;
1377 if (version->havensec3) {
1378 version->flags = rbtdb->current_version->flags;
1379 version->iterations = rbtdb->current_version->iterations;
1380 version->hash = rbtdb->current_version->hash;
1381 version->salt_length = rbtdb->current_version->salt_length;
1382 memmove(version->salt, rbtdb->current_version->salt,
1383 version->salt_length);
1384 } else {
1385 version->flags = 0;
1386 version->iterations = 0;
1387 version->hash = 0;
1388 version->salt_length = 0;
1389 memset(version->salt, 0, sizeof(version->salt));
1390 }
1391 isc_rwlock_init(&version->rwlock, 0, 0);
1392 RWLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read);
1393 version->records = rbtdb->current_version->records;
1394 version->xfrsize = rbtdb->current_version->xfrsize;
1395 RWUNLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read);
1396 rbtdb->next_serial++;
1397 rbtdb->future_version = version;
1398 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1399
1400 *versionp = version;
1401
1402 return (ISC_R_SUCCESS);
1403 }
1404
1405 static void
attachversion(dns_db_t * db,dns_dbversion_t * source,dns_dbversion_t ** targetp)1406 attachversion(dns_db_t *db, dns_dbversion_t *source,
1407 dns_dbversion_t **targetp) {
1408 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1409 rbtdb_version_t *rbtversion = source;
1410
1411 REQUIRE(VALID_RBTDB(rbtdb));
1412 INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
1413
1414 isc_refcount_increment(&rbtversion->references);
1415
1416 *targetp = rbtversion;
1417 }
1418
1419 static rbtdb_changed_t *
add_changed(dns_rbtdb_t * rbtdb,rbtdb_version_t * version,dns_rbtnode_t * node)1420 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version, dns_rbtnode_t *node) {
1421 rbtdb_changed_t *changed;
1422
1423 /*
1424 * Caller must be holding the node lock if its reference must be
1425 * protected by the lock.
1426 */
1427
1428 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1429
1430 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1431
1432 REQUIRE(version->writer);
1433
1434 if (changed != NULL) {
1435 isc_refcount_increment(&node->references);
1436 changed->node = node;
1437 changed->dirty = false;
1438 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1439 } else {
1440 version->commit_ok = false;
1441 }
1442
1443 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1444
1445 return (changed);
1446 }
1447
1448 static void
free_noqname(isc_mem_t * mctx,struct noqname ** noqname)1449 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1450 if (dns_name_dynamic(&(*noqname)->name)) {
1451 dns_name_free(&(*noqname)->name, mctx);
1452 }
1453 if ((*noqname)->neg != NULL) {
1454 isc_mem_put(mctx, (*noqname)->neg,
1455 dns_rdataslab_size((*noqname)->neg, 0));
1456 }
1457 if ((*noqname)->negsig != NULL) {
1458 isc_mem_put(mctx, (*noqname)->negsig,
1459 dns_rdataslab_size((*noqname)->negsig, 0));
1460 }
1461 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1462 *noqname = NULL;
1463 }
1464
1465 static void
init_rdataset(dns_rbtdb_t * rbtdb,rdatasetheader_t * h)1466 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h) {
1467 ISC_LINK_INIT(h, link);
1468 h->heap_index = 0;
1469 h->is_mmapped = 0;
1470 h->next_is_relative = 0;
1471 h->node_is_relative = 0;
1472 atomic_init(&h->attributes, 0);
1473 atomic_init(&h->last_refresh_fail_ts, 0);
1474
1475 STATIC_ASSERT((sizeof(h->attributes) == 2),
1476 "The .attributes field of rdatasetheader_t needs to be "
1477 "16-bit int type exactly.");
1478
1479 #if TRACE_HEADER
1480 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) {
1481 fprintf(stderr, "initialized header: %p\n", h);
1482 }
1483 #else /* if TRACE_HEADER */
1484 UNUSED(rbtdb);
1485 #endif /* if TRACE_HEADER */
1486 }
1487
1488 /*
1489 * Update the copied values of 'next' and 'node' if they are relative.
1490 */
1491 static void
update_newheader(rdatasetheader_t * newh,rdatasetheader_t * old)1492 update_newheader(rdatasetheader_t *newh, rdatasetheader_t *old) {
1493 char *p;
1494
1495 if (old->next_is_relative) {
1496 p = (char *)old;
1497 p += (uintptr_t)old->next;
1498 newh->next = (rdatasetheader_t *)p;
1499 }
1500 if (old->node_is_relative) {
1501 p = (char *)old;
1502 p += (uintptr_t)old->node;
1503 newh->node = (dns_rbtnode_t *)p;
1504 }
1505 if (CASESET(old)) {
1506 uint_least16_t attr = RDATASET_ATTR_GET(
1507 old,
1508 (RDATASET_ATTR_CASESET | RDATASET_ATTR_CASEFULLYLOWER));
1509 RDATASET_ATTR_SET(newh, attr);
1510 memmove(newh->upper, old->upper, sizeof(old->upper));
1511 }
1512 }
1513
1514 static rdatasetheader_t *
new_rdataset(dns_rbtdb_t * rbtdb,isc_mem_t * mctx)1515 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx) {
1516 rdatasetheader_t *h;
1517
1518 h = isc_mem_get(mctx, sizeof(*h));
1519
1520 #if TRACE_HEADER
1521 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) {
1522 fprintf(stderr, "allocated header: %p\n", h);
1523 }
1524 #endif /* if TRACE_HEADER */
1525 memset(h->upper, 0xeb, sizeof(h->upper));
1526 init_rdataset(rbtdb, h);
1527 h->rdh_ttl = 0;
1528 return (h);
1529 }
1530
1531 static void
free_rdataset(dns_rbtdb_t * rbtdb,isc_mem_t * mctx,rdatasetheader_t * rdataset)1532 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset) {
1533 unsigned int size;
1534 int idx;
1535
1536 update_rrsetstats(rbtdb, rdataset->type,
1537 atomic_load_acquire(&rdataset->attributes), false);
1538
1539 idx = rdataset->node->locknum;
1540 if (ISC_LINK_LINKED(rdataset, link)) {
1541 INSIST(IS_CACHE(rbtdb));
1542 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1543 }
1544
1545 if (rdataset->heap_index != 0) {
1546 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1547 }
1548 rdataset->heap_index = 0;
1549
1550 if (rdataset->noqname != NULL) {
1551 free_noqname(mctx, &rdataset->noqname);
1552 }
1553 if (rdataset->closest != NULL) {
1554 free_noqname(mctx, &rdataset->closest);
1555 }
1556
1557 if (NONEXISTENT(rdataset)) {
1558 size = sizeof(*rdataset);
1559 } else {
1560 size = dns_rdataslab_size((unsigned char *)rdataset,
1561 sizeof(*rdataset));
1562 }
1563
1564 if (rdataset->is_mmapped == 1) {
1565 return;
1566 }
1567
1568 isc_mem_put(mctx, rdataset, size);
1569 }
1570
1571 static void
rollback_node(dns_rbtnode_t * node,rbtdb_serial_t serial)1572 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1573 rdatasetheader_t *header, *dcurrent;
1574 bool make_dirty = false;
1575
1576 /*
1577 * Caller must hold the node lock.
1578 */
1579
1580 /*
1581 * We set the IGNORE attribute on rdatasets with serial number
1582 * 'serial'. When the reference count goes to zero, these rdatasets
1583 * will be cleaned up; until that time, they will be ignored.
1584 */
1585 for (header = node->data; header != NULL; header = header->next) {
1586 if (header->serial == serial) {
1587 RDATASET_ATTR_SET(header, RDATASET_ATTR_IGNORE);
1588 make_dirty = true;
1589 }
1590 for (dcurrent = header->down; dcurrent != NULL;
1591 dcurrent = dcurrent->down)
1592 {
1593 if (dcurrent->serial == serial) {
1594 RDATASET_ATTR_SET(dcurrent,
1595 RDATASET_ATTR_IGNORE);
1596 make_dirty = true;
1597 }
1598 }
1599 }
1600 if (make_dirty) {
1601 node->dirty = 1;
1602 }
1603 }
1604
1605 static void
mark_header_ancient(dns_rbtdb_t * rbtdb,rdatasetheader_t * header)1606 mark_header_ancient(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) {
1607 uint_least16_t attributes = atomic_load_acquire(&header->attributes);
1608 uint_least16_t newattributes = 0;
1609
1610 /*
1611 * If we are already ancient there is nothing to do.
1612 */
1613 do {
1614 if ((attributes & RDATASET_ATTR_ANCIENT) != 0) {
1615 return;
1616 }
1617 newattributes = attributes | RDATASET_ATTR_ANCIENT;
1618 } while (!atomic_compare_exchange_weak_acq_rel(
1619 &header->attributes, &attributes, newattributes));
1620
1621 /*
1622 * Decrement the stats counter for the appropriate RRtype.
1623 * If the STALE attribute is set, this will decrement the
1624 * stale type counter, otherwise it decrements the active
1625 * stats type counter.
1626 */
1627 update_rrsetstats(rbtdb, header->type, attributes, false);
1628 header->node->dirty = 1;
1629
1630 /* Increment the stats counter for the ancient RRtype. */
1631 update_rrsetstats(rbtdb, header->type, newattributes, true);
1632 }
1633
1634 static void
mark_header_stale(dns_rbtdb_t * rbtdb,rdatasetheader_t * header)1635 mark_header_stale(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) {
1636 uint_least16_t attributes = atomic_load_acquire(&header->attributes);
1637 uint_least16_t newattributes = 0;
1638
1639 INSIST((attributes & RDATASET_ATTR_ZEROTTL) == 0);
1640
1641 /*
1642 * If we are already stale there is nothing to do.
1643 */
1644 do {
1645 if ((attributes & RDATASET_ATTR_STALE) != 0) {
1646 return;
1647 }
1648 newattributes = attributes | RDATASET_ATTR_STALE;
1649 } while (!atomic_compare_exchange_weak_acq_rel(
1650 &header->attributes, &attributes, newattributes));
1651
1652 /* Decrement the stats counter for the appropriate RRtype.
1653 * If the ANCIENT attribute is set (although it is very
1654 * unlikely that an RRset goes from ANCIENT to STALE), this
1655 * will decrement the ancient stale type counter, otherwise it
1656 * decrements the active stats type counter.
1657 */
1658
1659 update_rrsetstats(rbtdb, header->type, attributes, false);
1660 update_rrsetstats(rbtdb, header->type, newattributes, true);
1661 }
1662
1663 static void
clean_stale_headers(dns_rbtdb_t * rbtdb,isc_mem_t * mctx,rdatasetheader_t * top)1664 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx,
1665 rdatasetheader_t *top) {
1666 rdatasetheader_t *d, *down_next;
1667
1668 for (d = top->down; d != NULL; d = down_next) {
1669 down_next = d->down;
1670 free_rdataset(rbtdb, mctx, d);
1671 }
1672 top->down = NULL;
1673 }
1674
1675 static void
clean_cache_node(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node)1676 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1677 rdatasetheader_t *current, *top_prev, *top_next;
1678 isc_mem_t *mctx = rbtdb->common.mctx;
1679
1680 /*
1681 * Caller must be holding the node lock.
1682 */
1683
1684 top_prev = NULL;
1685 for (current = node->data; current != NULL; current = top_next) {
1686 top_next = current->next;
1687 clean_stale_headers(rbtdb, mctx, current);
1688 /*
1689 * If current is nonexistent, ancient, or stale and
1690 * we are not keeping stale, we can clean it up.
1691 */
1692 if (NONEXISTENT(current) || ANCIENT(current) ||
1693 (STALE(current) && !KEEPSTALE(rbtdb)))
1694 {
1695 if (top_prev != NULL) {
1696 top_prev->next = current->next;
1697 } else {
1698 node->data = current->next;
1699 }
1700 free_rdataset(rbtdb, mctx, current);
1701 } else {
1702 top_prev = current;
1703 }
1704 }
1705 node->dirty = 0;
1706 }
1707
1708 static void
clean_zone_node(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,rbtdb_serial_t least_serial)1709 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1710 rbtdb_serial_t least_serial) {
1711 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1712 rdatasetheader_t *top_prev, *top_next;
1713 isc_mem_t *mctx = rbtdb->common.mctx;
1714 bool still_dirty = false;
1715
1716 /*
1717 * Caller must be holding the node lock.
1718 */
1719 REQUIRE(least_serial != 0);
1720
1721 top_prev = NULL;
1722 for (current = node->data; current != NULL; current = top_next) {
1723 top_next = current->next;
1724
1725 /*
1726 * First, we clean up any instances of multiple rdatasets
1727 * with the same serial number, or that have the IGNORE
1728 * attribute.
1729 */
1730 dparent = current;
1731 for (dcurrent = current->down; dcurrent != NULL;
1732 dcurrent = down_next)
1733 {
1734 down_next = dcurrent->down;
1735 INSIST(dcurrent->serial <= dparent->serial);
1736 if (dcurrent->serial == dparent->serial ||
1737 IGNORE(dcurrent))
1738 {
1739 if (down_next != NULL) {
1740 down_next->next = dparent;
1741 }
1742 dparent->down = down_next;
1743 free_rdataset(rbtdb, mctx, dcurrent);
1744 } else {
1745 dparent = dcurrent;
1746 }
1747 }
1748
1749 /*
1750 * We've now eliminated all IGNORE datasets with the possible
1751 * exception of current, which we now check.
1752 */
1753 if (IGNORE(current)) {
1754 down_next = current->down;
1755 if (down_next == NULL) {
1756 if (top_prev != NULL) {
1757 top_prev->next = current->next;
1758 } else {
1759 node->data = current->next;
1760 }
1761 free_rdataset(rbtdb, mctx, current);
1762 /*
1763 * current no longer exists, so we can
1764 * just continue with the loop.
1765 */
1766 continue;
1767 } else {
1768 /*
1769 * Pull up current->down, making it the new
1770 * current.
1771 */
1772 if (top_prev != NULL) {
1773 top_prev->next = down_next;
1774 } else {
1775 node->data = down_next;
1776 }
1777 down_next->next = top_next;
1778 free_rdataset(rbtdb, mctx, current);
1779 current = down_next;
1780 }
1781 }
1782
1783 /*
1784 * We now try to find the first down node less than the
1785 * least serial.
1786 */
1787 dparent = current;
1788 for (dcurrent = current->down; dcurrent != NULL;
1789 dcurrent = down_next)
1790 {
1791 down_next = dcurrent->down;
1792 if (dcurrent->serial < least_serial) {
1793 break;
1794 }
1795 dparent = dcurrent;
1796 }
1797
1798 /*
1799 * If there is a such an rdataset, delete it and any older
1800 * versions.
1801 */
1802 if (dcurrent != NULL) {
1803 do {
1804 down_next = dcurrent->down;
1805 INSIST(dcurrent->serial <= least_serial);
1806 free_rdataset(rbtdb, mctx, dcurrent);
1807 dcurrent = down_next;
1808 } while (dcurrent != NULL);
1809 dparent->down = NULL;
1810 }
1811
1812 /*
1813 * Note. The serial number of 'current' might be less than
1814 * least_serial too, but we cannot delete it because it is
1815 * the most recent version, unless it is a NONEXISTENT
1816 * rdataset.
1817 */
1818 if (current->down != NULL) {
1819 still_dirty = true;
1820 top_prev = current;
1821 } else {
1822 /*
1823 * If this is a NONEXISTENT rdataset, we can delete it.
1824 */
1825 if (NONEXISTENT(current)) {
1826 if (top_prev != NULL) {
1827 top_prev->next = current->next;
1828 } else {
1829 node->data = current->next;
1830 }
1831 free_rdataset(rbtdb, mctx, current);
1832 } else {
1833 top_prev = current;
1834 }
1835 }
1836 }
1837 if (!still_dirty) {
1838 node->dirty = 0;
1839 }
1840 }
1841
1842 /*
1843 * tree_lock(write) must be held.
1844 */
1845 static void
delete_node(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node)1846 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1847 dns_rbtnode_t *nsecnode;
1848 dns_fixedname_t fname;
1849 dns_name_t *name;
1850 isc_result_t result = ISC_R_UNEXPECTED;
1851
1852 INSIST(!ISC_LINK_LINKED(node, deadlink));
1853
1854 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1855 char printname[DNS_NAME_FORMATSIZE];
1856 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1857 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1858 "delete_node(): %p %s (bucket %d)", node,
1859 dns_rbt_formatnodename(node, printname,
1860 sizeof(printname)),
1861 node->locknum);
1862 }
1863
1864 switch (node->nsec) {
1865 case DNS_RBT_NSEC_NORMAL:
1866 /*
1867 * Though this may be wasteful, it has to be done before
1868 * node is deleted.
1869 */
1870 name = dns_fixedname_initname(&fname);
1871 dns_rbt_fullnamefromnode(node, name);
1872
1873 result = dns_rbt_deletenode(rbtdb->tree, node, false);
1874 break;
1875 case DNS_RBT_NSEC_HAS_NSEC:
1876 name = dns_fixedname_initname(&fname);
1877 dns_rbt_fullnamefromnode(node, name);
1878 /*
1879 * Delete the corresponding node from the auxiliary NSEC
1880 * tree before deleting from the main tree.
1881 */
1882 nsecnode = NULL;
1883 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1884 NULL, DNS_RBTFIND_EMPTYDATA, NULL,
1885 NULL);
1886 if (result != ISC_R_SUCCESS) {
1887 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1888 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1889 "delete_node: "
1890 "dns_rbt_findnode(nsec): %s",
1891 isc_result_totext(result));
1892 } else {
1893 result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1894 false);
1895 if (result != ISC_R_SUCCESS) {
1896 isc_log_write(
1897 dns_lctx, DNS_LOGCATEGORY_DATABASE,
1898 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1899 "delete_node(): "
1900 "dns_rbt_deletenode(nsecnode): %s",
1901 isc_result_totext(result));
1902 }
1903 }
1904 result = dns_rbt_deletenode(rbtdb->tree, node, false);
1905 break;
1906 case DNS_RBT_NSEC_NSEC:
1907 result = dns_rbt_deletenode(rbtdb->nsec, node, false);
1908 break;
1909 case DNS_RBT_NSEC_NSEC3:
1910 result = dns_rbt_deletenode(rbtdb->nsec3, node, false);
1911 break;
1912 }
1913 if (result != ISC_R_SUCCESS) {
1914 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1915 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1916 "delete_node(): "
1917 "dns_rbt_deletenode: %s",
1918 isc_result_totext(result));
1919 }
1920 }
1921
1922 /*
1923 * Caller must be holding the node lock.
1924 */
1925 static void
new_reference(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,isc_rwlocktype_t locktype)1926 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1927 isc_rwlocktype_t locktype) {
1928 if (locktype == isc_rwlocktype_write && ISC_LINK_LINKED(node, deadlink))
1929 {
1930 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node,
1931 deadlink);
1932 }
1933 if (isc_refcount_increment0(&node->references) == 0) {
1934 /* this is the first reference to the node */
1935 isc_refcount_increment0(
1936 &rbtdb->node_locks[node->locknum].references);
1937 }
1938 }
1939
1940 /*%
1941 * The tree lock must be held for the result to be valid.
1942 */
1943 static bool
is_leaf(dns_rbtnode_t * node)1944 is_leaf(dns_rbtnode_t *node) {
1945 return (node->parent != NULL && node->parent->down == node &&
1946 node->left == NULL && node->right == NULL);
1947 }
1948
1949 /*%
1950 * The tree lock must be held when this function is called as it reads and
1951 * updates rbtdb->prunenodes.
1952 */
1953 static void
send_to_prune_tree(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,isc_rwlocktype_t locktype)1954 send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1955 isc_rwlocktype_t locktype) {
1956 bool pruning_queued = (ISC_LIST_HEAD(rbtdb->prunenodes) != NULL);
1957
1958 INSIST(locktype == isc_rwlocktype_write);
1959
1960 new_reference(rbtdb, node, locktype);
1961 INSIST(!ISC_LINK_LINKED(node, prunelink));
1962 ISC_LIST_APPEND(rbtdb->prunenodes, node, prunelink);
1963
1964 if (!pruning_queued) {
1965 isc_event_t *ev = NULL;
1966 dns_db_t *db = NULL;
1967
1968 attach((dns_db_t *)rbtdb, &db);
1969
1970 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1971 DNS_EVENT_RBTPRUNE, prune_tree, db,
1972 sizeof(isc_event_t));
1973 isc_task_send(rbtdb->task, &ev);
1974 }
1975 }
1976
1977 /*%
1978 * Clean up dead nodes. These are nodes which have no references, and
1979 * have no data. They are dead but we could not or chose not to delete
1980 * them when we deleted all the data at that node because we did not want
1981 * to wait for the tree write lock.
1982 *
1983 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1984 */
1985 static void
cleanup_dead_nodes(dns_rbtdb_t * rbtdb,int bucketnum)1986 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1987 dns_rbtnode_t *node;
1988 int count = 10; /* XXXJT: should be adjustable */
1989
1990 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1991 while (node != NULL && count > 0) {
1992 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1993
1994 /*
1995 * We might have reactivated this node without a tree write
1996 * lock, so we couldn't remove this node from deadnodes then
1997 * and we have to do it now.
1998 */
1999 if (isc_refcount_current(&node->references) != 0 ||
2000 node->data != NULL)
2001 {
2002 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
2003 count--;
2004 continue;
2005 }
2006
2007 if (is_leaf(node) && rbtdb->task != NULL) {
2008 send_to_prune_tree(rbtdb, node, isc_rwlocktype_write);
2009 } else if (node->down == NULL && node->data == NULL) {
2010 /*
2011 * Not a interior node and not needing to be
2012 * reactivated.
2013 */
2014 delete_node(rbtdb, node);
2015 } else if (node->data == NULL) {
2016 /*
2017 * A interior node without data. Leave linked to
2018 * to be cleaned up when node->down becomes NULL.
2019 */
2020 ISC_LIST_APPEND(rbtdb->deadnodes[bucketnum], node,
2021 deadlink);
2022 }
2023 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
2024 count--;
2025 }
2026 }
2027
2028 /*
2029 * This function is assumed to be called when a node is newly referenced
2030 * and can be in the deadnode list. In that case the node must be retrieved
2031 * from the list because it is going to be used. In addition, if the caller
2032 * happens to hold a write lock on the tree, it's a good chance to purge dead
2033 * nodes.
2034 * Note: while a new reference is gained in multiple places, there are only very
2035 * few cases where the node can be in the deadnode list (only empty nodes can
2036 * have been added to the list).
2037 */
2038 static void
reactivate_node(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,isc_rwlocktype_t treelocktype)2039 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2040 isc_rwlocktype_t treelocktype) {
2041 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2042 nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
2043 bool maybe_cleanup = false;
2044
2045 POST(locktype);
2046
2047 NODE_LOCK(nodelock, locktype);
2048
2049 /*
2050 * Check if we can possibly cleanup the dead node. If so, upgrade
2051 * the node lock below to perform the cleanup.
2052 */
2053 if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
2054 treelocktype == isc_rwlocktype_write)
2055 {
2056 maybe_cleanup = true;
2057 }
2058
2059 if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
2060 /*
2061 * Upgrade the lock and test if we still need to unlink.
2062 */
2063 NODE_UNLOCK(nodelock, locktype);
2064 locktype = isc_rwlocktype_write;
2065 POST(locktype);
2066 NODE_LOCK(nodelock, locktype);
2067 if (ISC_LINK_LINKED(node, deadlink)) {
2068 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node,
2069 deadlink);
2070 }
2071 if (maybe_cleanup) {
2072 cleanup_dead_nodes(rbtdb, node->locknum);
2073 }
2074 }
2075
2076 new_reference(rbtdb, node, locktype);
2077
2078 NODE_UNLOCK(nodelock, locktype);
2079 }
2080
2081 /*
2082 * Caller must be holding the node lock; either the "strong", read or write
2083 * lock. Note that the lock must be held even when node references are
2084 * atomically modified; in that case the decrement operation itself does not
2085 * have to be protected, but we must avoid a race condition where multiple
2086 * threads are decreasing the reference to zero simultaneously and at least
2087 * one of them is going to free the node.
2088 *
2089 * This function returns true if and only if the node reference decreases
2090 * to zero.
2091 *
2092 * NOTE: Decrementing the reference count of a node to zero does not mean it
2093 * will be immediately freed.
2094 */
2095 static bool
decrement_reference(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,rbtdb_serial_t least_serial,isc_rwlocktype_t nlock,isc_rwlocktype_t tlock,bool pruning)2096 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2097 rbtdb_serial_t least_serial, isc_rwlocktype_t nlock,
2098 isc_rwlocktype_t tlock, bool pruning) {
2099 isc_result_t result;
2100 bool write_locked;
2101 bool locked = tlock != isc_rwlocktype_none;
2102 rbtdb_nodelock_t *nodelock;
2103 int bucket = node->locknum;
2104 bool no_reference = true;
2105 uint_fast32_t refs;
2106
2107 nodelock = &rbtdb->node_locks[bucket];
2108
2109 #define KEEP_NODE(n, r, l) \
2110 ((n)->data != NULL || ((l) && (n)->down != NULL) || \
2111 (n) == (r)->origin_node || (n) == (r)->nsec3_origin_node)
2112
2113 /* Handle easy and typical case first. */
2114 if (!node->dirty && KEEP_NODE(node, rbtdb, locked)) {
2115 if (isc_refcount_decrement(&node->references) == 1) {
2116 refs = isc_refcount_decrement(&nodelock->references);
2117 INSIST(refs > 0);
2118 return (true);
2119 } else {
2120 return (false);
2121 }
2122 }
2123
2124 /* Upgrade the lock? */
2125 if (nlock == isc_rwlocktype_read) {
2126 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
2127 NODE_LOCK(&nodelock->lock, isc_rwlocktype_write);
2128 }
2129
2130 if (isc_refcount_decrement(&node->references) > 1) {
2131 /* Restore the lock? */
2132 if (nlock == isc_rwlocktype_read) {
2133 NODE_DOWNGRADE(&nodelock->lock);
2134 }
2135 return (false);
2136 }
2137
2138 if (node->dirty) {
2139 if (IS_CACHE(rbtdb)) {
2140 clean_cache_node(rbtdb, node);
2141 } else {
2142 if (least_serial == 0) {
2143 /*
2144 * Caller doesn't know the least serial.
2145 * Get it.
2146 */
2147 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2148 least_serial = rbtdb->least_serial;
2149 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2150 }
2151 clean_zone_node(rbtdb, node, least_serial);
2152 }
2153 }
2154
2155 /*
2156 * Attempt to switch to a write lock on the tree. If this fails,
2157 * we will add this node to a linked list of nodes in this locking
2158 * bucket which we will free later.
2159 */
2160 if (tlock != isc_rwlocktype_write) {
2161 /*
2162 * Locking hierarchy notwithstanding, we don't need to free
2163 * the node lock before acquiring the tree write lock because
2164 * we only do a trylock.
2165 */
2166 if (tlock == isc_rwlocktype_read) {
2167 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
2168 } else {
2169 result = isc_rwlock_trylock(&rbtdb->tree_lock,
2170 isc_rwlocktype_write);
2171 }
2172 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
2173 result == ISC_R_LOCKBUSY);
2174
2175 write_locked = (result == ISC_R_SUCCESS);
2176 } else {
2177 write_locked = true;
2178 }
2179
2180 refs = isc_refcount_decrement(&nodelock->references);
2181 INSIST(refs > 0);
2182
2183 if (KEEP_NODE(node, rbtdb, locked || write_locked)) {
2184 goto restore_locks;
2185 }
2186
2187 #undef KEEP_NODE
2188
2189 if (write_locked) {
2190 /*
2191 * We can now delete the node.
2192 */
2193
2194 /*
2195 * If this node is the only one in the level it's in, deleting
2196 * this node may recursively make its parent the only node in
2197 * the parent level; if so, and if no one is currently using
2198 * the parent node, this is almost the only opportunity to
2199 * clean it up. But the recursive cleanup is not that trivial
2200 * since the child and parent may be in different lock buckets,
2201 * which would cause a lock order reversal problem. To avoid
2202 * the trouble, we'll dispatch a separate event for batch
2203 * cleaning. We need to check whether we're deleting the node
2204 * as a result of pruning to avoid infinite dispatching.
2205 * Note: pruning happens only when a task has been set for the
2206 * rbtdb. If the user of the rbtdb chooses not to set a task,
2207 * it's their responsibility to purge stale leaves (e.g. by
2208 * periodic walk-through).
2209 */
2210 if (!pruning && is_leaf(node) && rbtdb->task != NULL) {
2211 send_to_prune_tree(rbtdb, node, isc_rwlocktype_write);
2212 no_reference = false;
2213 } else {
2214 delete_node(rbtdb, node);
2215 }
2216 } else {
2217 INSIST(node->data == NULL);
2218 if (!ISC_LINK_LINKED(node, deadlink)) {
2219 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
2220 deadlink);
2221 }
2222 }
2223
2224 restore_locks:
2225 /* Restore the lock? */
2226 if (nlock == isc_rwlocktype_read) {
2227 NODE_DOWNGRADE(&nodelock->lock);
2228 }
2229
2230 /*
2231 * Relock a read lock, or unlock the write lock if no lock was held.
2232 */
2233 if (tlock == isc_rwlocktype_none) {
2234 if (write_locked) {
2235 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2236 }
2237 }
2238
2239 if (tlock == isc_rwlocktype_read) {
2240 if (write_locked) {
2241 isc_rwlock_downgrade(&rbtdb->tree_lock);
2242 }
2243 }
2244
2245 return (no_reference);
2246 }
2247
2248 /*
2249 * Prune the tree by recursively cleaning up single leaves. Go through all
2250 * nodes stored in the rbtdb->prunenodes list; for each of them, in the worst
2251 * case, it will be necessary to traverse a number of tree levels equal to the
2252 * maximum legal number of domain name labels (127); in practice, the number of
2253 * tree levels to traverse will virtually always be much smaller (a few levels
2254 * at most). While holding the tree lock throughout this entire operation is
2255 * less than ideal, so is splitting the latter up by queueing a separate
2256 * prune_tree() run for each node to start pruning from (as queueing requires
2257 * allocating memory and can therefore potentially be exploited to exhaust
2258 * available memory). Also note that actually freeing up the memory used by
2259 * RBTDB nodes (which is what this function does) is essential to keeping cache
2260 * memory use in check, so since the tree lock needs to be acquired anyway,
2261 * freeing as many nodes as possible before the tree lock gets released is
2262 * prudent.
2263 */
2264 static void
prune_tree(isc_task_t * task,isc_event_t * event)2265 prune_tree(isc_task_t *task, isc_event_t *event) {
2266 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)event->ev_arg;
2267 dns_rbtnode_t *node = NULL;
2268 dns_rbtnode_t *parent = NULL;
2269 unsigned int locknum;
2270
2271 UNUSED(task);
2272
2273 isc_event_free(&event);
2274
2275 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2276
2277 while ((node = ISC_LIST_HEAD(rbtdb->prunenodes)) != NULL) {
2278 locknum = node->locknum;
2279 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2280 isc_rwlocktype_write);
2281 do {
2282 if (ISC_LINK_LINKED(node, prunelink)) {
2283 ISC_LIST_UNLINK(rbtdb->prunenodes, node,
2284 prunelink);
2285 }
2286
2287 parent = node->parent;
2288 decrement_reference(rbtdb, node, 0,
2289 isc_rwlocktype_write,
2290 isc_rwlocktype_write, true);
2291
2292 if (parent != NULL && parent->down == NULL) {
2293 /*
2294 * node was the only down child of the parent
2295 * and has just been removed. We'll then need
2296 * to examine the parent. Keep the lock if
2297 * possible; otherwise, release the old lock and
2298 * acquire one for the parent.
2299 */
2300 if (parent->locknum != locknum) {
2301 NODE_UNLOCK(
2302 &rbtdb->node_locks[locknum].lock,
2303 isc_rwlocktype_write);
2304 locknum = parent->locknum;
2305 NODE_LOCK(
2306 &rbtdb->node_locks[locknum].lock,
2307 isc_rwlocktype_write);
2308 }
2309
2310 /*
2311 * We need to gain a reference to the node
2312 * before decrementing it in the next iteration.
2313 */
2314 if (ISC_LINK_LINKED(parent, deadlink)) {
2315 ISC_LIST_UNLINK(
2316 rbtdb->deadnodes[locknum],
2317 parent, deadlink);
2318 }
2319 new_reference(rbtdb, parent,
2320 isc_rwlocktype_write);
2321 } else {
2322 parent = NULL;
2323 }
2324
2325 node = parent;
2326 } while (node != NULL);
2327 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2328 isc_rwlocktype_write);
2329 }
2330 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2331
2332 detach((dns_db_t **)(void *)&rbtdb);
2333 }
2334
2335 static void
make_least_version(dns_rbtdb_t * rbtdb,rbtdb_version_t * version,rbtdb_changedlist_t * cleanup_list)2336 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
2337 rbtdb_changedlist_t *cleanup_list) {
2338 /*
2339 * Caller must be holding the database lock.
2340 */
2341
2342 rbtdb->least_serial = version->serial;
2343 *cleanup_list = version->changed_list;
2344 ISC_LIST_INIT(version->changed_list);
2345 }
2346
2347 static void
cleanup_nondirty(rbtdb_version_t * version,rbtdb_changedlist_t * cleanup_list)2348 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
2349 rbtdb_changed_t *changed, *next_changed;
2350
2351 /*
2352 * If the changed record is dirty, then
2353 * an update created multiple versions of
2354 * a given rdataset. We keep this list
2355 * until we're the least open version, at
2356 * which point it's safe to get rid of any
2357 * older versions.
2358 *
2359 * If the changed record isn't dirty, then
2360 * we don't need it anymore since we're
2361 * committing and not rolling back.
2362 *
2363 * The caller must be holding the database lock.
2364 */
2365 for (changed = HEAD(version->changed_list); changed != NULL;
2366 changed = next_changed)
2367 {
2368 next_changed = NEXT(changed, link);
2369 if (!changed->dirty) {
2370 UNLINK(version->changed_list, changed, link);
2371 APPEND(*cleanup_list, changed, link);
2372 }
2373 }
2374 }
2375
2376 static void
iszonesecure(dns_db_t * db,rbtdb_version_t * version,dns_dbnode_t * origin)2377 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
2378 dns_rdataset_t keyset;
2379 dns_rdataset_t nsecset, signsecset;
2380 bool haszonekey = false;
2381 bool hasnsec = false;
2382 isc_result_t result;
2383
2384 dns_rdataset_init(&keyset);
2385 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
2386 0, 0, &keyset, NULL);
2387 if (result == ISC_R_SUCCESS) {
2388 result = dns_rdataset_first(&keyset);
2389 while (result == ISC_R_SUCCESS) {
2390 dns_rdata_t keyrdata = DNS_RDATA_INIT;
2391 dns_rdataset_current(&keyset, &keyrdata);
2392 if (dns_zonekey_iszonekey(&keyrdata)) {
2393 haszonekey = true;
2394 break;
2395 }
2396 result = dns_rdataset_next(&keyset);
2397 }
2398 dns_rdataset_disassociate(&keyset);
2399 }
2400 if (!haszonekey) {
2401 version->secure = dns_db_insecure;
2402 version->havensec3 = false;
2403 return;
2404 }
2405
2406 dns_rdataset_init(&nsecset);
2407 dns_rdataset_init(&signsecset);
2408 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec, 0,
2409 0, &nsecset, &signsecset);
2410 if (result == ISC_R_SUCCESS) {
2411 if (dns_rdataset_isassociated(&signsecset)) {
2412 hasnsec = true;
2413 dns_rdataset_disassociate(&signsecset);
2414 }
2415 dns_rdataset_disassociate(&nsecset);
2416 }
2417
2418 setnsec3parameters(db, version);
2419
2420 /*
2421 * Do we have a valid NSEC/NSEC3 chain?
2422 */
2423 if (version->havensec3 || hasnsec) {
2424 version->secure = dns_db_secure;
2425 } else {
2426 version->secure = dns_db_insecure;
2427 }
2428 }
2429
2430 /*%<
2431 * Walk the origin node looking for NSEC3PARAM records.
2432 * Cache the nsec3 parameters.
2433 */
2434 static void
setnsec3parameters(dns_db_t * db,rbtdb_version_t * version)2435 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2436 dns_rbtnode_t *node;
2437 dns_rdata_nsec3param_t nsec3param;
2438 dns_rdata_t rdata = DNS_RDATA_INIT;
2439 isc_region_t region;
2440 isc_result_t result;
2441 rdatasetheader_t *header, *header_next;
2442 unsigned char *raw; /* RDATASLAB */
2443 unsigned int count, length;
2444 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2445
2446 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2447 version->havensec3 = false;
2448 node = rbtdb->origin_node;
2449 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2450 isc_rwlocktype_read);
2451 for (header = node->data; header != NULL; header = header_next) {
2452 header_next = header->next;
2453 do {
2454 if (header->serial <= version->serial &&
2455 !IGNORE(header))
2456 {
2457 if (NONEXISTENT(header)) {
2458 header = NULL;
2459 }
2460 break;
2461 } else {
2462 header = header->down;
2463 }
2464 } while (header != NULL);
2465
2466 if (header != NULL &&
2467 (header->type == dns_rdatatype_nsec3param))
2468 {
2469 /*
2470 * Find A NSEC3PARAM with a supported algorithm.
2471 */
2472 raw = (unsigned char *)header + sizeof(*header);
2473 count = raw[0] * 256 + raw[1]; /* count */
2474 raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH;
2475 while (count-- > 0U) {
2476 length = raw[0] * 256 + raw[1];
2477 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
2478 region.base = raw;
2479 region.length = length;
2480 raw += length;
2481 dns_rdata_fromregion(
2482 &rdata, rbtdb->common.rdclass,
2483 dns_rdatatype_nsec3param, ®ion);
2484 result = dns_rdata_tostruct(&rdata, &nsec3param,
2485 NULL);
2486 INSIST(result == ISC_R_SUCCESS);
2487 dns_rdata_reset(&rdata);
2488
2489 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2490 !dns_nsec3_supportedhash(nsec3param.hash))
2491 {
2492 continue;
2493 }
2494
2495 if (nsec3param.flags != 0) {
2496 continue;
2497 }
2498
2499 memmove(version->salt, nsec3param.salt,
2500 nsec3param.salt_length);
2501 version->hash = nsec3param.hash;
2502 version->salt_length = nsec3param.salt_length;
2503 version->iterations = nsec3param.iterations;
2504 version->flags = nsec3param.flags;
2505 version->havensec3 = true;
2506 /*
2507 * Look for a better algorithm than the
2508 * unknown test algorithm.
2509 */
2510 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG) {
2511 goto unlock;
2512 }
2513 }
2514 }
2515 }
2516 unlock:
2517 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2518 isc_rwlocktype_read);
2519 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2520 }
2521
2522 static void
cleanup_dead_nodes_callback(isc_task_t * task,isc_event_t * event)2523 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2524 dns_rbtdb_t *rbtdb = event->ev_arg;
2525 bool again = false;
2526 unsigned int locknum;
2527
2528 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2529 for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2530 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2531 isc_rwlocktype_write);
2532 cleanup_dead_nodes(rbtdb, locknum);
2533 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL) {
2534 again = true;
2535 }
2536 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2537 isc_rwlocktype_write);
2538 }
2539 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2540 if (again) {
2541 isc_task_send(task, &event);
2542 } else {
2543 isc_event_free(&event);
2544 if (isc_refcount_decrement(&rbtdb->references) == 1) {
2545 (void)isc_refcount_current(&rbtdb->references);
2546 maybe_free_rbtdb(rbtdb);
2547 }
2548 }
2549 }
2550
2551 static void
closeversion(dns_db_t * db,dns_dbversion_t ** versionp,bool commit)2552 closeversion(dns_db_t *db, dns_dbversion_t **versionp, bool commit) {
2553 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2554 rbtdb_version_t *version, *cleanup_version, *least_greater;
2555 bool rollback = false;
2556 rbtdb_changedlist_t cleanup_list;
2557 rdatasetheaderlist_t resigned_list;
2558 rbtdb_changed_t *changed, *next_changed;
2559 rbtdb_serial_t serial, least_serial;
2560 dns_rbtnode_t *rbtnode;
2561 rdatasetheader_t *header;
2562
2563 REQUIRE(VALID_RBTDB(rbtdb));
2564 version = (rbtdb_version_t *)*versionp;
2565 INSIST(version->rbtdb == rbtdb);
2566
2567 cleanup_version = NULL;
2568 ISC_LIST_INIT(cleanup_list);
2569 ISC_LIST_INIT(resigned_list);
2570
2571 if (isc_refcount_decrement(&version->references) > 1) {
2572 /* typical and easy case first */
2573 if (commit) {
2574 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2575 INSIST(!version->writer);
2576 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2577 }
2578 goto end;
2579 }
2580
2581 /*
2582 * Update the zone's secure status in version before making
2583 * it the current version.
2584 */
2585 if (version->writer && commit && !IS_CACHE(rbtdb)) {
2586 iszonesecure(db, version, rbtdb->origin_node);
2587 }
2588
2589 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2590 serial = version->serial;
2591 if (version->writer) {
2592 if (commit) {
2593 unsigned cur_ref;
2594 rbtdb_version_t *cur_version;
2595
2596 INSIST(version->commit_ok);
2597 INSIST(version == rbtdb->future_version);
2598 /*
2599 * The current version is going to be replaced.
2600 * Release the (likely last) reference to it from the
2601 * DB itself and unlink it from the open list.
2602 */
2603 cur_version = rbtdb->current_version;
2604 cur_ref = isc_refcount_decrement(
2605 &cur_version->references);
2606 if (cur_ref == 1) {
2607 (void)isc_refcount_current(
2608 &cur_version->references);
2609 if (cur_version->serial == rbtdb->least_serial)
2610 {
2611 INSIST(EMPTY(
2612 cur_version->changed_list));
2613 }
2614 UNLINK(rbtdb->open_versions, cur_version, link);
2615 }
2616 if (EMPTY(rbtdb->open_versions)) {
2617 /*
2618 * We're going to become the least open
2619 * version.
2620 */
2621 make_least_version(rbtdb, version,
2622 &cleanup_list);
2623 } else {
2624 /*
2625 * Some other open version is the
2626 * least version. We can't cleanup
2627 * records that were changed in this
2628 * version because the older versions
2629 * may still be in use by an open
2630 * version.
2631 *
2632 * We can, however, discard the
2633 * changed records for things that
2634 * we've added that didn't exist in
2635 * prior versions.
2636 */
2637 cleanup_nondirty(version, &cleanup_list);
2638 }
2639 /*
2640 * If the (soon to be former) current version
2641 * isn't being used by anyone, we can clean
2642 * it up.
2643 */
2644 if (cur_ref == 1) {
2645 cleanup_version = cur_version;
2646 APPENDLIST(version->changed_list,
2647 cleanup_version->changed_list, link);
2648 }
2649 /*
2650 * Become the current version.
2651 */
2652 version->writer = false;
2653 rbtdb->current_version = version;
2654 rbtdb->current_serial = version->serial;
2655 rbtdb->future_version = NULL;
2656
2657 /*
2658 * Keep the current version in the open list, and
2659 * gain a reference for the DB itself (see the DB
2660 * creation function below). This must be the only
2661 * case where we need to increment the counter from
2662 * zero and need to use isc_refcount_increment0().
2663 */
2664 INSIST(isc_refcount_increment0(&version->references) ==
2665 0);
2666 PREPEND(rbtdb->open_versions, rbtdb->current_version,
2667 link);
2668 resigned_list = version->resigned_list;
2669 ISC_LIST_INIT(version->resigned_list);
2670 } else {
2671 /*
2672 * We're rolling back this transaction.
2673 */
2674 cleanup_list = version->changed_list;
2675 ISC_LIST_INIT(version->changed_list);
2676 resigned_list = version->resigned_list;
2677 ISC_LIST_INIT(version->resigned_list);
2678 rollback = true;
2679 cleanup_version = version;
2680 rbtdb->future_version = NULL;
2681 }
2682 } else {
2683 if (version != rbtdb->current_version) {
2684 /*
2685 * There are no external or internal references
2686 * to this version and it can be cleaned up.
2687 */
2688 cleanup_version = version;
2689
2690 /*
2691 * Find the version with the least serial
2692 * number greater than ours.
2693 */
2694 least_greater = PREV(version, link);
2695 if (least_greater == NULL) {
2696 least_greater = rbtdb->current_version;
2697 }
2698
2699 INSIST(version->serial < least_greater->serial);
2700 /*
2701 * Is this the least open version?
2702 */
2703 if (version->serial == rbtdb->least_serial) {
2704 /*
2705 * Yes. Install the new least open
2706 * version.
2707 */
2708 make_least_version(rbtdb, least_greater,
2709 &cleanup_list);
2710 } else {
2711 /*
2712 * Add any unexecuted cleanups to
2713 * those of the least greater version.
2714 */
2715 APPENDLIST(least_greater->changed_list,
2716 version->changed_list, link);
2717 }
2718 } else if (version->serial == rbtdb->least_serial) {
2719 INSIST(EMPTY(version->changed_list));
2720 }
2721 UNLINK(rbtdb->open_versions, version, link);
2722 }
2723 least_serial = rbtdb->least_serial;
2724 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2725
2726 if (cleanup_version != NULL) {
2727 INSIST(EMPTY(cleanup_version->changed_list));
2728 free_gluetable(cleanup_version);
2729 isc_rwlock_destroy(&cleanup_version->glue_rwlock);
2730 isc_rwlock_destroy(&cleanup_version->rwlock);
2731 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2732 sizeof(*cleanup_version));
2733 }
2734
2735 /*
2736 * Commit/rollback re-signed headers.
2737 */
2738 for (header = HEAD(resigned_list); header != NULL;
2739 header = HEAD(resigned_list))
2740 {
2741 nodelock_t *lock;
2742
2743 ISC_LIST_UNLINK(resigned_list, header, link);
2744
2745 lock = &rbtdb->node_locks[header->node->locknum].lock;
2746 NODE_LOCK(lock, isc_rwlocktype_write);
2747 if (rollback && !IGNORE(header)) {
2748 resign_insert(rbtdb, header->node->locknum, header);
2749 }
2750 decrement_reference(rbtdb, header->node, least_serial,
2751 isc_rwlocktype_write, isc_rwlocktype_none,
2752 false);
2753 NODE_UNLOCK(lock, isc_rwlocktype_write);
2754 }
2755
2756 if (!EMPTY(cleanup_list)) {
2757 isc_event_t *event = NULL;
2758 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2759
2760 if (rbtdb->task != NULL) {
2761 event = isc_event_allocate(rbtdb->common.mctx, NULL,
2762 DNS_EVENT_RBTDEADNODES,
2763 cleanup_dead_nodes_callback,
2764 rbtdb, sizeof(isc_event_t));
2765 }
2766 if (event == NULL) {
2767 /*
2768 * We acquire a tree write lock here in order to make
2769 * sure that stale nodes will be removed in
2770 * decrement_reference(). If we didn't have the lock,
2771 * those nodes could miss the chance to be removed
2772 * until the server stops. The write lock is
2773 * expensive, but this event should be rare enough
2774 * to justify the cost.
2775 */
2776 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2777 tlock = isc_rwlocktype_write;
2778 }
2779
2780 for (changed = HEAD(cleanup_list); changed != NULL;
2781 changed = next_changed)
2782 {
2783 nodelock_t *lock;
2784
2785 next_changed = NEXT(changed, link);
2786 rbtnode = changed->node;
2787 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2788
2789 NODE_LOCK(lock, isc_rwlocktype_write);
2790 /*
2791 * This is a good opportunity to purge any dead nodes,
2792 * so use it.
2793 */
2794 if (event == NULL) {
2795 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2796 }
2797
2798 if (rollback) {
2799 rollback_node(rbtnode, serial);
2800 }
2801 decrement_reference(rbtdb, rbtnode, least_serial,
2802 isc_rwlocktype_write, tlock, false);
2803
2804 NODE_UNLOCK(lock, isc_rwlocktype_write);
2805
2806 isc_mem_put(rbtdb->common.mctx, changed,
2807 sizeof(*changed));
2808 }
2809 if (event != NULL) {
2810 isc_refcount_increment(&rbtdb->references);
2811 isc_task_send(rbtdb->task, &event);
2812 } else {
2813 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2814 }
2815 }
2816
2817 end:
2818 *versionp = NULL;
2819 }
2820
2821 /*
2822 * Add the necessary magic for the wildcard name 'name'
2823 * to be found in 'rbtdb'.
2824 *
2825 * In order for wildcard matching to work correctly in
2826 * zone_find(), we must ensure that a node for the wildcarding
2827 * level exists in the database, and has its 'find_callback'
2828 * and 'wild' bits set.
2829 *
2830 * E.g. if the wildcard name is "*.sub.example." then we
2831 * must ensure that "sub.example." exists and is marked as
2832 * a wildcard level.
2833 *
2834 * tree_lock(write) must be held.
2835 */
2836 static isc_result_t
add_wildcard_magic(dns_rbtdb_t * rbtdb,const dns_name_t * name,bool lock)2837 add_wildcard_magic(dns_rbtdb_t *rbtdb, const dns_name_t *name, bool lock) {
2838 isc_result_t result;
2839 dns_name_t foundname;
2840 dns_offsets_t offsets;
2841 unsigned int n;
2842 dns_rbtnode_t *node = NULL;
2843
2844 dns_name_init(&foundname, offsets);
2845 n = dns_name_countlabels(name);
2846 INSIST(n >= 2);
2847 n--;
2848 dns_name_getlabelsequence(name, 1, n, &foundname);
2849 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2850 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
2851 return (result);
2852 }
2853 if (result == ISC_R_SUCCESS) {
2854 node->nsec = DNS_RBT_NSEC_NORMAL;
2855 }
2856 node->find_callback = 1;
2857 if (lock) {
2858 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
2859 isc_rwlocktype_write);
2860 }
2861 node->wild = 1;
2862 if (lock) {
2863 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
2864 isc_rwlocktype_write);
2865 }
2866 return (ISC_R_SUCCESS);
2867 }
2868
2869 /*
2870 * tree_lock(write) must be held.
2871 */
2872 static isc_result_t
add_empty_wildcards(dns_rbtdb_t * rbtdb,const dns_name_t * name,bool lock)2873 add_empty_wildcards(dns_rbtdb_t *rbtdb, const dns_name_t *name, bool lock) {
2874 isc_result_t result;
2875 dns_name_t foundname;
2876 dns_offsets_t offsets;
2877 unsigned int n, l, i;
2878
2879 dns_name_init(&foundname, offsets);
2880 n = dns_name_countlabels(name);
2881 l = dns_name_countlabels(&rbtdb->common.origin);
2882 i = l + 1;
2883 while (i < n) {
2884 dns_rbtnode_t *node = NULL; /* dummy */
2885 dns_name_getlabelsequence(name, n - i, i, &foundname);
2886 if (dns_name_iswildcard(&foundname)) {
2887 result = add_wildcard_magic(rbtdb, &foundname, lock);
2888 if (result != ISC_R_SUCCESS) {
2889 return (result);
2890 }
2891 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2892 &node);
2893 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
2894 return (result);
2895 }
2896 if (result == ISC_R_SUCCESS) {
2897 node->nsec = DNS_RBT_NSEC_NORMAL;
2898 }
2899 }
2900 i++;
2901 }
2902 return (ISC_R_SUCCESS);
2903 }
2904
2905 static isc_result_t
findnodeintree(dns_rbtdb_t * rbtdb,dns_rbt_t * tree,const dns_name_t * name,bool create,dns_dbnode_t ** nodep)2906 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, const dns_name_t *name,
2907 bool create, dns_dbnode_t **nodep) {
2908 dns_rbtnode_t *node = NULL;
2909 dns_name_t nodename;
2910 isc_result_t result;
2911 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2912
2913 INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
2914
2915 dns_name_init(&nodename, NULL);
2916 RWLOCK(&rbtdb->tree_lock, locktype);
2917 result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
2918 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2919 if (result != ISC_R_SUCCESS) {
2920 RWUNLOCK(&rbtdb->tree_lock, locktype);
2921 if (!create) {
2922 if (result == DNS_R_PARTIALMATCH) {
2923 result = ISC_R_NOTFOUND;
2924 }
2925 return (result);
2926 }
2927 /*
2928 * It would be nice to try to upgrade the lock instead of
2929 * unlocking then relocking.
2930 */
2931 locktype = isc_rwlocktype_write;
2932 RWLOCK(&rbtdb->tree_lock, locktype);
2933 node = NULL;
2934 result = dns_rbt_addnode(tree, name, &node);
2935 if (result == ISC_R_SUCCESS) {
2936 dns_rbt_namefromnode(node, &nodename);
2937 node->locknum = node->hashval % rbtdb->node_lock_count;
2938 if (tree == rbtdb->tree) {
2939 add_empty_wildcards(rbtdb, name, true);
2940
2941 if (dns_name_iswildcard(name)) {
2942 result = add_wildcard_magic(rbtdb, name,
2943 true);
2944 if (result != ISC_R_SUCCESS) {
2945 RWUNLOCK(&rbtdb->tree_lock,
2946 locktype);
2947 return (result);
2948 }
2949 }
2950 }
2951 if (tree == rbtdb->nsec3) {
2952 node->nsec = DNS_RBT_NSEC_NSEC3;
2953 }
2954 } else if (result != ISC_R_EXISTS) {
2955 RWUNLOCK(&rbtdb->tree_lock, locktype);
2956 return (result);
2957 }
2958 }
2959
2960 if (tree == rbtdb->nsec3) {
2961 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2962 }
2963
2964 reactivate_node(rbtdb, node, locktype);
2965
2966 RWUNLOCK(&rbtdb->tree_lock, locktype);
2967
2968 *nodep = (dns_dbnode_t *)node;
2969
2970 return (ISC_R_SUCCESS);
2971 }
2972
2973 static isc_result_t
findnode(dns_db_t * db,const dns_name_t * name,bool create,dns_dbnode_t ** nodep)2974 findnode(dns_db_t *db, const dns_name_t *name, bool create,
2975 dns_dbnode_t **nodep) {
2976 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2977
2978 REQUIRE(VALID_RBTDB(rbtdb));
2979
2980 return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
2981 }
2982
2983 static isc_result_t
findnsec3node(dns_db_t * db,const dns_name_t * name,bool create,dns_dbnode_t ** nodep)2984 findnsec3node(dns_db_t *db, const dns_name_t *name, bool create,
2985 dns_dbnode_t **nodep) {
2986 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2987
2988 REQUIRE(VALID_RBTDB(rbtdb));
2989
2990 return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
2991 }
2992
2993 static isc_result_t
zone_zonecut_callback(dns_rbtnode_t * node,dns_name_t * name,void * arg)2994 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2995 rbtdb_search_t *search = arg;
2996 rdatasetheader_t *header, *header_next;
2997 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2998 rdatasetheader_t *found;
2999 isc_result_t result;
3000 dns_rbtnode_t *onode;
3001
3002 /*
3003 * We only want to remember the topmost zone cut, since it's the one
3004 * that counts, so we'll just continue if we've already found a
3005 * zonecut.
3006 */
3007 if (search->zonecut != NULL) {
3008 return (DNS_R_CONTINUE);
3009 }
3010
3011 found = NULL;
3012 result = DNS_R_CONTINUE;
3013 onode = search->rbtdb->origin_node;
3014
3015 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3016 isc_rwlocktype_read);
3017
3018 /*
3019 * Look for an NS or DNAME rdataset active in our version.
3020 */
3021 ns_header = NULL;
3022 dname_header = NULL;
3023 sigdname_header = NULL;
3024 for (header = node->data; header != NULL; header = header_next) {
3025 header_next = header->next;
3026 if (header->type == dns_rdatatype_ns ||
3027 header->type == dns_rdatatype_dname ||
3028 header->type == RBTDB_RDATATYPE_SIGDNAME)
3029 {
3030 do {
3031 if (header->serial <= search->serial &&
3032 !IGNORE(header))
3033 {
3034 /*
3035 * Is this a "this rdataset doesn't
3036 * exist" record?
3037 */
3038 if (NONEXISTENT(header)) {
3039 header = NULL;
3040 }
3041 break;
3042 } else {
3043 header = header->down;
3044 }
3045 } while (header != NULL);
3046 if (header != NULL) {
3047 if (header->type == dns_rdatatype_dname) {
3048 dname_header = header;
3049 } else if (header->type ==
3050 RBTDB_RDATATYPE_SIGDNAME)
3051 {
3052 sigdname_header = header;
3053 } else if (node != onode ||
3054 IS_STUB(search->rbtdb))
3055 {
3056 /*
3057 * We've found an NS rdataset that
3058 * isn't at the origin node. We check
3059 * that they're not at the origin node,
3060 * because otherwise we'd erroneously
3061 * treat the zone top as if it were
3062 * a delegation.
3063 */
3064 ns_header = header;
3065 }
3066 }
3067 }
3068 }
3069
3070 /*
3071 * Did we find anything?
3072 */
3073 if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
3074 ns_header != NULL)
3075 {
3076 /*
3077 * Note that NS has precedence over DNAME if both exist
3078 * in a zone. Otherwise DNAME take precedence over NS.
3079 */
3080 found = ns_header;
3081 search->zonecut_sigrdataset = NULL;
3082 } else if (dname_header != NULL) {
3083 found = dname_header;
3084 search->zonecut_sigrdataset = sigdname_header;
3085 } else if (ns_header != NULL) {
3086 found = ns_header;
3087 search->zonecut_sigrdataset = NULL;
3088 }
3089
3090 if (found != NULL) {
3091 /*
3092 * We increment the reference count on node to ensure that
3093 * search->zonecut_rdataset will still be valid later.
3094 */
3095 new_reference(search->rbtdb, node, isc_rwlocktype_read);
3096 search->zonecut = node;
3097 search->zonecut_rdataset = found;
3098 search->need_cleanup = true;
3099 /*
3100 * Since we've found a zonecut, anything beneath it is
3101 * glue and is not subject to wildcard matching, so we
3102 * may clear search->wild.
3103 */
3104 search->wild = false;
3105 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
3106 /*
3107 * If the caller does not want to find glue, then
3108 * this is the best answer and the search should
3109 * stop now.
3110 */
3111 result = DNS_R_PARTIALMATCH;
3112 } else {
3113 dns_name_t *zcname;
3114
3115 /*
3116 * The search will continue beneath the zone cut.
3117 * This may or may not be the best match. In case it
3118 * is, we need to remember the node name.
3119 */
3120 zcname = dns_fixedname_name(&search->zonecut_name);
3121 dns_name_copynf(name, zcname);
3122 search->copy_name = true;
3123 }
3124 } else {
3125 /*
3126 * There is no zonecut at this node which is active in this
3127 * version.
3128 *
3129 * If this is a "wild" node and the caller hasn't disabled
3130 * wildcard matching, remember that we've seen a wild node
3131 * in case we need to go searching for wildcard matches
3132 * later on.
3133 */
3134 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0) {
3135 search->wild = true;
3136 }
3137 }
3138
3139 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3140 isc_rwlocktype_read);
3141
3142 return (result);
3143 }
3144
3145 static void
bind_rdataset(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,rdatasetheader_t * header,isc_stdtime_t now,isc_rwlocktype_t locktype,dns_rdataset_t * rdataset)3146 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, rdatasetheader_t *header,
3147 isc_stdtime_t now, isc_rwlocktype_t locktype,
3148 dns_rdataset_t *rdataset) {
3149 unsigned char *raw; /* RDATASLAB */
3150 bool stale = STALE(header);
3151 bool ancient = ANCIENT(header);
3152
3153 /*
3154 * Caller must be holding the node reader lock.
3155 * XXXJT: technically, we need a writer lock, since we'll increment
3156 * the header count below. However, since the actual counter value
3157 * doesn't matter, we prioritize performance here. (We may want to
3158 * use atomic increment when available).
3159 */
3160
3161 if (rdataset == NULL) {
3162 return;
3163 }
3164
3165 new_reference(rbtdb, node, locktype);
3166
3167 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
3168
3169 /*
3170 * Mark header stale or ancient if the RRset is no longer active.
3171 */
3172 if (!ACTIVE(header, now)) {
3173 dns_ttl_t stale_ttl = header->rdh_ttl + rbtdb->serve_stale_ttl;
3174 /*
3175 * If this data is in the stale window keep it and if
3176 * DNS_DBFIND_STALEOK is not set we tell the caller to
3177 * skip this record. We skip the records with ZEROTTL
3178 * (these records should not be cached anyway).
3179 */
3180
3181 if (KEEPSTALE(rbtdb) && stale_ttl > now) {
3182 stale = true;
3183 } else {
3184 /*
3185 * We are not keeping stale, or it is outside the
3186 * stale window. Mark ancient, i.e. ready for cleanup.
3187 */
3188 ancient = true;
3189 }
3190 }
3191
3192 rdataset->methods = &rdataset_methods;
3193 rdataset->rdclass = rbtdb->common.rdclass;
3194 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
3195 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
3196 rdataset->ttl = header->rdh_ttl - now;
3197 rdataset->trust = header->trust;
3198
3199 if (NEGATIVE(header)) {
3200 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
3201 }
3202 if (NXDOMAIN(header)) {
3203 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
3204 }
3205 if (OPTOUT(header)) {
3206 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
3207 }
3208 if (PREFETCH(header)) {
3209 rdataset->attributes |= DNS_RDATASETATTR_PREFETCH;
3210 }
3211
3212 if (stale && !ancient) {
3213 dns_ttl_t stale_ttl = header->rdh_ttl + rbtdb->serve_stale_ttl;
3214 if (stale_ttl > now) {
3215 rdataset->ttl = stale_ttl - now;
3216 } else {
3217 rdataset->ttl = 0;
3218 }
3219 if (STALE_WINDOW(header)) {
3220 rdataset->attributes |= DNS_RDATASETATTR_STALE_WINDOW;
3221 }
3222 rdataset->attributes |= DNS_RDATASETATTR_STALE;
3223 } else if (IS_CACHE(rbtdb) && !ACTIVE(header, now)) {
3224 rdataset->attributes |= DNS_RDATASETATTR_ANCIENT;
3225 rdataset->ttl = header->rdh_ttl;
3226 }
3227
3228 rdataset->private1 = rbtdb;
3229 rdataset->private2 = node;
3230 raw = (unsigned char *)header + sizeof(*header);
3231 rdataset->private3 = raw;
3232 rdataset->count = atomic_fetch_add_relaxed(&header->count, 1);
3233 if (rdataset->count == UINT32_MAX) {
3234 rdataset->count = 0;
3235 }
3236
3237 /*
3238 * Reset iterator state.
3239 */
3240 rdataset->privateuint4 = 0;
3241 rdataset->private5 = NULL;
3242
3243 /*
3244 * Add noqname proof.
3245 */
3246 rdataset->private6 = header->noqname;
3247 if (rdataset->private6 != NULL) {
3248 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
3249 }
3250 rdataset->private7 = header->closest;
3251 if (rdataset->private7 != NULL) {
3252 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
3253 }
3254
3255 /*
3256 * Copy out re-signing information.
3257 */
3258 if (RESIGN(header)) {
3259 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
3260 rdataset->resign = (header->resign << 1) | header->resign_lsb;
3261 } else {
3262 rdataset->resign = 0;
3263 }
3264 }
3265
3266 static isc_result_t
setup_delegation(rbtdb_search_t * search,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)3267 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
3268 dns_name_t *foundname, dns_rdataset_t *rdataset,
3269 dns_rdataset_t *sigrdataset) {
3270 dns_name_t *zcname;
3271 rbtdb_rdatatype_t type;
3272 dns_rbtnode_t *node;
3273
3274 /*
3275 * The caller MUST NOT be holding any node locks.
3276 */
3277
3278 node = search->zonecut;
3279 type = search->zonecut_rdataset->type;
3280
3281 /*
3282 * If we have to set foundname, we do it before anything else.
3283 * If we were to set foundname after we had set nodep or bound the
3284 * rdataset, then we'd have to undo that work if dns_name_copy()
3285 * failed. By setting foundname first, there's nothing to undo if
3286 * we have trouble.
3287 */
3288 if (foundname != NULL && search->copy_name) {
3289 zcname = dns_fixedname_name(&search->zonecut_name);
3290 dns_name_copynf(zcname, foundname);
3291 }
3292 if (nodep != NULL) {
3293 /*
3294 * Note that we don't have to increment the node's reference
3295 * count here because we're going to use the reference we
3296 * already have in the search block.
3297 */
3298 *nodep = node;
3299 search->need_cleanup = false;
3300 }
3301 if (rdataset != NULL) {
3302 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3303 isc_rwlocktype_read);
3304 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
3305 search->now, isc_rwlocktype_read, rdataset);
3306 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
3307 {
3308 bind_rdataset(search->rbtdb, node,
3309 search->zonecut_sigrdataset, search->now,
3310 isc_rwlocktype_read, sigrdataset);
3311 }
3312 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3313 isc_rwlocktype_read);
3314 }
3315
3316 if (type == dns_rdatatype_dname) {
3317 return (DNS_R_DNAME);
3318 }
3319 return (DNS_R_DELEGATION);
3320 }
3321
3322 static bool
valid_glue(rbtdb_search_t * search,dns_name_t * name,rbtdb_rdatatype_t type,dns_rbtnode_t * node)3323 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
3324 dns_rbtnode_t *node) {
3325 unsigned char *raw; /* RDATASLAB */
3326 unsigned int count, size;
3327 dns_name_t ns_name;
3328 bool valid = false;
3329 dns_offsets_t offsets;
3330 isc_region_t region;
3331 rdatasetheader_t *header;
3332
3333 /*
3334 * No additional locking is required.
3335 */
3336
3337 /*
3338 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
3339 * if it occurs at a zone cut, but is not valid below it.
3340 */
3341 if (type == dns_rdatatype_ns) {
3342 if (node != search->zonecut) {
3343 return (false);
3344 }
3345 } else if (type != dns_rdatatype_a && type != dns_rdatatype_aaaa &&
3346 type != dns_rdatatype_a6)
3347 {
3348 return (false);
3349 }
3350
3351 header = search->zonecut_rdataset;
3352 raw = (unsigned char *)header + sizeof(*header);
3353 count = raw[0] * 256 + raw[1];
3354 raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH;
3355
3356 while (count > 0) {
3357 count--;
3358 size = raw[0] * 256 + raw[1];
3359 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
3360 region.base = raw;
3361 region.length = size;
3362 raw += size;
3363 /*
3364 * XXX Until we have rdata structures, we have no choice but
3365 * to directly access the rdata format.
3366 */
3367 dns_name_init(&ns_name, offsets);
3368 dns_name_fromregion(&ns_name, ®ion);
3369 if (dns_name_compare(&ns_name, name) == 0) {
3370 valid = true;
3371 break;
3372 }
3373 }
3374
3375 return (valid);
3376 }
3377
3378 static bool
activeempty(rbtdb_search_t * search,dns_rbtnodechain_t * chain,const dns_name_t * name)3379 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
3380 const dns_name_t *name) {
3381 dns_fixedname_t fnext;
3382 dns_fixedname_t forigin;
3383 dns_name_t *next;
3384 dns_name_t *origin;
3385 dns_name_t prefix;
3386 dns_rbtdb_t *rbtdb;
3387 dns_rbtnode_t *node;
3388 isc_result_t result;
3389 bool answer = false;
3390 rdatasetheader_t *header;
3391
3392 rbtdb = search->rbtdb;
3393
3394 dns_name_init(&prefix, NULL);
3395 next = dns_fixedname_initname(&fnext);
3396 origin = dns_fixedname_initname(&forigin);
3397
3398 result = dns_rbtnodechain_next(chain, NULL, NULL);
3399 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3400 node = NULL;
3401 result = dns_rbtnodechain_current(chain, &prefix, origin,
3402 &node);
3403 if (result != ISC_R_SUCCESS) {
3404 break;
3405 }
3406 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3407 isc_rwlocktype_read);
3408 for (header = node->data; header != NULL; header = header->next)
3409 {
3410 if (header->serial <= search->serial &&
3411 !IGNORE(header) && EXISTS(header))
3412 {
3413 break;
3414 }
3415 }
3416 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3417 isc_rwlocktype_read);
3418 if (header != NULL) {
3419 break;
3420 }
3421 result = dns_rbtnodechain_next(chain, NULL, NULL);
3422 }
3423 if (result == ISC_R_SUCCESS) {
3424 result = dns_name_concatenate(&prefix, origin, next, NULL);
3425 }
3426 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name)) {
3427 answer = true;
3428 }
3429 return (answer);
3430 }
3431
3432 static bool
activeemptynode(rbtdb_search_t * search,const dns_name_t * qname,dns_name_t * wname)3433 activeemptynode(rbtdb_search_t *search, const dns_name_t *qname,
3434 dns_name_t *wname) {
3435 dns_fixedname_t fnext;
3436 dns_fixedname_t forigin;
3437 dns_fixedname_t fprev;
3438 dns_name_t *next;
3439 dns_name_t *origin;
3440 dns_name_t *prev;
3441 dns_name_t name;
3442 dns_name_t rname;
3443 dns_name_t tname;
3444 dns_rbtdb_t *rbtdb;
3445 dns_rbtnode_t *node;
3446 dns_rbtnodechain_t chain;
3447 bool check_next = true;
3448 bool check_prev = true;
3449 bool answer = false;
3450 isc_result_t result;
3451 rdatasetheader_t *header;
3452 unsigned int n;
3453
3454 rbtdb = search->rbtdb;
3455
3456 dns_name_init(&name, NULL);
3457 dns_name_init(&tname, NULL);
3458 dns_name_init(&rname, NULL);
3459 next = dns_fixedname_initname(&fnext);
3460 prev = dns_fixedname_initname(&fprev);
3461 origin = dns_fixedname_initname(&forigin);
3462
3463 /*
3464 * Find if qname is at or below a empty node.
3465 * Use our own copy of the chain.
3466 */
3467
3468 chain = search->chain;
3469 do {
3470 node = NULL;
3471 result = dns_rbtnodechain_current(&chain, &name, origin, &node);
3472 if (result != ISC_R_SUCCESS) {
3473 break;
3474 }
3475 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3476 isc_rwlocktype_read);
3477 for (header = node->data; header != NULL; header = header->next)
3478 {
3479 if (header->serial <= search->serial &&
3480 !IGNORE(header) && EXISTS(header))
3481 {
3482 break;
3483 }
3484 }
3485 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3486 isc_rwlocktype_read);
3487 if (header != NULL) {
3488 break;
3489 }
3490 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3491 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3492 if (result == ISC_R_SUCCESS) {
3493 result = dns_name_concatenate(&name, origin, prev, NULL);
3494 }
3495 if (result != ISC_R_SUCCESS) {
3496 check_prev = false;
3497 }
3498
3499 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3500 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3501 node = NULL;
3502 result = dns_rbtnodechain_current(&chain, &name, origin, &node);
3503 if (result != ISC_R_SUCCESS) {
3504 break;
3505 }
3506 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3507 isc_rwlocktype_read);
3508 for (header = node->data; header != NULL; header = header->next)
3509 {
3510 if (header->serial <= search->serial &&
3511 !IGNORE(header) && EXISTS(header))
3512 {
3513 break;
3514 }
3515 }
3516 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3517 isc_rwlocktype_read);
3518 if (header != NULL) {
3519 break;
3520 }
3521 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3522 }
3523 if (result == ISC_R_SUCCESS) {
3524 result = dns_name_concatenate(&name, origin, next, NULL);
3525 }
3526 if (result != ISC_R_SUCCESS) {
3527 check_next = false;
3528 }
3529
3530 dns_name_clone(qname, &rname);
3531
3532 /*
3533 * Remove the wildcard label to find the terminal name.
3534 */
3535 n = dns_name_countlabels(wname);
3536 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3537
3538 do {
3539 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3540 (check_next && dns_name_issubdomain(next, &rname)))
3541 {
3542 answer = true;
3543 break;
3544 }
3545 /*
3546 * Remove the left hand label.
3547 */
3548 n = dns_name_countlabels(&rname);
3549 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3550 } while (!dns_name_equal(&rname, &tname));
3551 return (answer);
3552 }
3553
3554 static isc_result_t
find_wildcard(rbtdb_search_t * search,dns_rbtnode_t ** nodep,const dns_name_t * qname)3555 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3556 const dns_name_t *qname) {
3557 unsigned int i, j;
3558 dns_rbtnode_t *node, *level_node, *wnode;
3559 rdatasetheader_t *header;
3560 isc_result_t result = ISC_R_NOTFOUND;
3561 dns_name_t name;
3562 dns_name_t *wname;
3563 dns_fixedname_t fwname;
3564 dns_rbtdb_t *rbtdb;
3565 bool done, wild, active;
3566 dns_rbtnodechain_t wchain;
3567
3568 /*
3569 * Caller must be holding the tree lock and MUST NOT be holding
3570 * any node locks.
3571 */
3572
3573 /*
3574 * Examine each ancestor level. If the level's wild bit
3575 * is set, then construct the corresponding wildcard name and
3576 * search for it. If the wildcard node exists, and is active in
3577 * this version, we're done. If not, then we next check to see
3578 * if the ancestor is active in this version. If so, then there
3579 * can be no possible wildcard match and again we're done. If not,
3580 * continue the search.
3581 */
3582
3583 rbtdb = search->rbtdb;
3584 i = search->chain.level_matches;
3585 done = false;
3586 node = *nodep;
3587 do {
3588 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3589 isc_rwlocktype_read);
3590
3591 /*
3592 * First we try to figure out if this node is active in
3593 * the search's version. We do this now, even though we
3594 * may not need the information, because it simplifies the
3595 * locking and code flow.
3596 */
3597 for (header = node->data; header != NULL; header = header->next)
3598 {
3599 if (header->serial <= search->serial &&
3600 !IGNORE(header) && EXISTS(header) &&
3601 !ANCIENT(header))
3602 {
3603 break;
3604 }
3605 }
3606 if (header != NULL) {
3607 active = true;
3608 } else {
3609 active = false;
3610 }
3611
3612 if (node->wild) {
3613 wild = true;
3614 } else {
3615 wild = false;
3616 }
3617
3618 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3619 isc_rwlocktype_read);
3620
3621 if (wild) {
3622 /*
3623 * Construct the wildcard name for this level.
3624 */
3625 dns_name_init(&name, NULL);
3626 dns_rbt_namefromnode(node, &name);
3627 wname = dns_fixedname_initname(&fwname);
3628 result = dns_name_concatenate(dns_wildcardname, &name,
3629 wname, NULL);
3630 j = i;
3631 while (result == ISC_R_SUCCESS && j != 0) {
3632 j--;
3633 level_node = search->chain.levels[j];
3634 dns_name_init(&name, NULL);
3635 dns_rbt_namefromnode(level_node, &name);
3636 result = dns_name_concatenate(wname, &name,
3637 wname, NULL);
3638 }
3639 if (result != ISC_R_SUCCESS) {
3640 break;
3641 }
3642
3643 wnode = NULL;
3644 dns_rbtnodechain_init(&wchain);
3645 result = dns_rbt_findnode(
3646 rbtdb->tree, wname, NULL, &wnode, &wchain,
3647 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
3648 if (result == ISC_R_SUCCESS) {
3649 nodelock_t *lock;
3650
3651 /*
3652 * We have found the wildcard node. If it
3653 * is active in the search's version, we're
3654 * done.
3655 */
3656 lock = &rbtdb->node_locks[wnode->locknum].lock;
3657 NODE_LOCK(lock, isc_rwlocktype_read);
3658 for (header = wnode->data; header != NULL;
3659 header = header->next)
3660 {
3661 if (header->serial <= search->serial &&
3662 !IGNORE(header) && EXISTS(header) &&
3663 !ANCIENT(header))
3664 {
3665 break;
3666 }
3667 }
3668 NODE_UNLOCK(lock, isc_rwlocktype_read);
3669 if (header != NULL ||
3670 activeempty(search, &wchain, wname))
3671 {
3672 if (activeemptynode(search, qname,
3673 wname))
3674 {
3675 return (ISC_R_NOTFOUND);
3676 }
3677 /*
3678 * The wildcard node is active!
3679 *
3680 * Note: result is still ISC_R_SUCCESS
3681 * so we don't have to set it.
3682 */
3683 *nodep = wnode;
3684 break;
3685 }
3686 } else if (result != ISC_R_NOTFOUND &&
3687 result != DNS_R_PARTIALMATCH)
3688 {
3689 /*
3690 * An error has occurred. Bail out.
3691 */
3692 break;
3693 }
3694 }
3695
3696 if (active) {
3697 /*
3698 * The level node is active. Any wildcarding
3699 * present at higher levels has no
3700 * effect and we're done.
3701 */
3702 result = ISC_R_NOTFOUND;
3703 break;
3704 }
3705
3706 if (i > 0) {
3707 i--;
3708 node = search->chain.levels[i];
3709 } else {
3710 done = true;
3711 }
3712 } while (!done);
3713
3714 return (result);
3715 }
3716
3717 static bool
matchparams(rdatasetheader_t * header,rbtdb_search_t * search)3718 matchparams(rdatasetheader_t *header, rbtdb_search_t *search) {
3719 dns_rdata_t rdata = DNS_RDATA_INIT;
3720 dns_rdata_nsec3_t nsec3;
3721 unsigned char *raw; /* RDATASLAB */
3722 unsigned int rdlen, count;
3723 isc_region_t region;
3724 isc_result_t result;
3725
3726 REQUIRE(header->type == dns_rdatatype_nsec3);
3727
3728 raw = (unsigned char *)header + sizeof(*header);
3729 count = raw[0] * 256 + raw[1]; /* count */
3730 raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH;
3731
3732 while (count-- > 0) {
3733 rdlen = raw[0] * 256 + raw[1];
3734 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
3735 region.base = raw;
3736 region.length = rdlen;
3737 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3738 dns_rdatatype_nsec3, ®ion);
3739 raw += rdlen;
3740 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3741 INSIST(result == ISC_R_SUCCESS);
3742 if (nsec3.hash == search->rbtversion->hash &&
3743 nsec3.iterations == search->rbtversion->iterations &&
3744 nsec3.salt_length == search->rbtversion->salt_length &&
3745 memcmp(nsec3.salt, search->rbtversion->salt,
3746 nsec3.salt_length) == 0)
3747 {
3748 return (true);
3749 }
3750 dns_rdata_reset(&rdata);
3751 }
3752 return (false);
3753 }
3754
3755 /*
3756 * Find node of the NSEC/NSEC3 record that is 'name'.
3757 */
3758 static isc_result_t
previous_closest_nsec(dns_rdatatype_t type,rbtdb_search_t * search,dns_name_t * name,dns_name_t * origin,dns_rbtnode_t ** nodep,dns_rbtnodechain_t * nsecchain,bool * firstp)3759 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3760 dns_name_t *name, dns_name_t *origin,
3761 dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3762 bool *firstp) {
3763 dns_fixedname_t ftarget;
3764 dns_name_t *target;
3765 dns_rbtnode_t *nsecnode;
3766 isc_result_t result;
3767
3768 REQUIRE(nodep != NULL && *nodep == NULL);
3769 REQUIRE(type == dns_rdatatype_nsec3 || firstp != NULL);
3770
3771 if (type == dns_rdatatype_nsec3) {
3772 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3773 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) {
3774 return (result);
3775 }
3776 result = dns_rbtnodechain_current(&search->chain, name, origin,
3777 nodep);
3778 return (result);
3779 }
3780
3781 target = dns_fixedname_initname(&ftarget);
3782
3783 for (;;) {
3784 if (*firstp) {
3785 /*
3786 * Construct the name of the second node to check.
3787 * It is the first node sought in the NSEC tree.
3788 */
3789 *firstp = false;
3790 dns_rbtnodechain_init(nsecchain);
3791 result = dns_name_concatenate(name, origin, target,
3792 NULL);
3793 if (result != ISC_R_SUCCESS) {
3794 return (result);
3795 }
3796 nsecnode = NULL;
3797 result = dns_rbt_findnode(
3798 search->rbtdb->nsec, target, NULL, &nsecnode,
3799 nsecchain, DNS_RBTFIND_EMPTYDATA, NULL, NULL);
3800 if (result == ISC_R_SUCCESS) {
3801 /*
3802 * Since this was the first loop, finding the
3803 * name in the NSEC tree implies that the first
3804 * node checked in the main tree had an
3805 * unacceptable NSEC record.
3806 * Try the previous node in the NSEC tree.
3807 */
3808 result = dns_rbtnodechain_prev(nsecchain, name,
3809 origin);
3810 if (result == DNS_R_NEWORIGIN) {
3811 result = ISC_R_SUCCESS;
3812 }
3813 } else if (result == ISC_R_NOTFOUND ||
3814 result == DNS_R_PARTIALMATCH)
3815 {
3816 result = dns_rbtnodechain_current(
3817 nsecchain, name, origin, NULL);
3818 if (result == ISC_R_NOTFOUND) {
3819 result = ISC_R_NOMORE;
3820 }
3821 }
3822 } else {
3823 /*
3824 * This is a second or later trip through the auxiliary
3825 * tree for the name of a third or earlier NSEC node in
3826 * the main tree. Previous trips through the NSEC tree
3827 * must have found nodes in the main tree with NSEC
3828 * records. Perhaps they lacked signature records.
3829 */
3830 result = dns_rbtnodechain_prev(nsecchain, name, origin);
3831 if (result == DNS_R_NEWORIGIN) {
3832 result = ISC_R_SUCCESS;
3833 }
3834 }
3835 if (result != ISC_R_SUCCESS) {
3836 return (result);
3837 }
3838
3839 /*
3840 * Construct the name to seek in the main tree.
3841 */
3842 result = dns_name_concatenate(name, origin, target, NULL);
3843 if (result != ISC_R_SUCCESS) {
3844 return (result);
3845 }
3846
3847 *nodep = NULL;
3848 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3849 nodep, &search->chain,
3850 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
3851 if (result == ISC_R_SUCCESS) {
3852 return (result);
3853 }
3854
3855 /*
3856 * There should always be a node in the main tree with the
3857 * same name as the node in the auxiliary NSEC tree, except for
3858 * nodes in the auxiliary tree that are awaiting deletion.
3859 */
3860 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
3861 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3862 DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3863 "previous_closest_nsec(): %s",
3864 isc_result_totext(result));
3865 return (DNS_R_BADDB);
3866 }
3867 }
3868 }
3869
3870 /*
3871 * Find the NSEC/NSEC3 which is or before the current point on the
3872 * search chain. For NSEC3 records only NSEC3 records that match the
3873 * current NSEC3PARAM record are considered.
3874 */
3875 static isc_result_t
find_closest_nsec(rbtdb_search_t * search,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset,dns_rbt_t * tree,dns_db_secure_t secure)3876 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3877 dns_name_t *foundname, dns_rdataset_t *rdataset,
3878 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3879 dns_db_secure_t secure) {
3880 dns_rbtnode_t *node, *prevnode;
3881 rdatasetheader_t *header, *header_next, *found, *foundsig;
3882 dns_rbtnodechain_t nsecchain;
3883 bool empty_node;
3884 isc_result_t result;
3885 dns_fixedname_t fname, forigin;
3886 dns_name_t *name, *origin;
3887 dns_rdatatype_t type;
3888 rbtdb_rdatatype_t sigtype;
3889 bool wraps;
3890 bool first = true;
3891 bool need_sig = (secure == dns_db_secure);
3892
3893 if (tree == search->rbtdb->nsec3) {
3894 type = dns_rdatatype_nsec3;
3895 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3896 wraps = true;
3897 } else {
3898 type = dns_rdatatype_nsec;
3899 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3900 wraps = false;
3901 }
3902
3903 /*
3904 * Use the auxiliary tree only starting with the second node in the
3905 * hope that the original node will be right much of the time.
3906 */
3907 name = dns_fixedname_initname(&fname);
3908 origin = dns_fixedname_initname(&forigin);
3909 again:
3910 node = NULL;
3911 prevnode = NULL;
3912 result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3913 if (result != ISC_R_SUCCESS) {
3914 return (result);
3915 }
3916 do {
3917 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3918 isc_rwlocktype_read);
3919 found = NULL;
3920 foundsig = NULL;
3921 empty_node = true;
3922 for (header = node->data; header != NULL; header = header_next)
3923 {
3924 header_next = header->next;
3925 /*
3926 * Look for an active, extant NSEC or RRSIG NSEC.
3927 */
3928 do {
3929 if (header->serial <= search->serial &&
3930 !IGNORE(header))
3931 {
3932 /*
3933 * Is this a "this rdataset doesn't
3934 * exist" record?
3935 */
3936 if (NONEXISTENT(header)) {
3937 header = NULL;
3938 }
3939 break;
3940 } else {
3941 header = header->down;
3942 }
3943 } while (header != NULL);
3944 if (header != NULL) {
3945 /*
3946 * We now know that there is at least one
3947 * active rdataset at this node.
3948 */
3949 empty_node = false;
3950 if (header->type == type) {
3951 found = header;
3952 if (foundsig != NULL) {
3953 break;
3954 }
3955 } else if (header->type == sigtype) {
3956 foundsig = header;
3957 if (found != NULL) {
3958 break;
3959 }
3960 }
3961 }
3962 }
3963 if (!empty_node) {
3964 if (found != NULL && search->rbtversion->havensec3 &&
3965 found->type == dns_rdatatype_nsec3 &&
3966 !matchparams(found, search))
3967 {
3968 empty_node = true;
3969 found = NULL;
3970 foundsig = NULL;
3971 result = previous_closest_nsec(
3972 type, search, name, origin, &prevnode,
3973 NULL, NULL);
3974 } else if (found != NULL &&
3975 (foundsig != NULL || !need_sig))
3976 {
3977 /*
3978 * We've found the right NSEC/NSEC3 record.
3979 *
3980 * Note: for this to really be the right
3981 * NSEC record, it's essential that the NSEC
3982 * records of any nodes obscured by a zone
3983 * cut have been removed; we assume this is
3984 * the case.
3985 */
3986 result = dns_name_concatenate(name, origin,
3987 foundname, NULL);
3988 if (result == ISC_R_SUCCESS) {
3989 if (nodep != NULL) {
3990 new_reference(
3991 search->rbtdb, node,
3992 isc_rwlocktype_read);
3993 *nodep = node;
3994 }
3995 bind_rdataset(search->rbtdb, node,
3996 found, search->now,
3997 isc_rwlocktype_read,
3998 rdataset);
3999 if (foundsig != NULL) {
4000 bind_rdataset(
4001 search->rbtdb, node,
4002 foundsig, search->now,
4003 isc_rwlocktype_read,
4004 sigrdataset);
4005 }
4006 }
4007 } else if (found == NULL && foundsig == NULL) {
4008 /*
4009 * This node is active, but has no NSEC or
4010 * RRSIG NSEC. That means it's glue or
4011 * other obscured zone data that isn't
4012 * relevant for our search. Treat the
4013 * node as if it were empty and keep looking.
4014 */
4015 empty_node = true;
4016 result = previous_closest_nsec(
4017 type, search, name, origin, &prevnode,
4018 &nsecchain, &first);
4019 } else {
4020 /*
4021 * We found an active node, but either the
4022 * NSEC or the RRSIG NSEC is missing. This
4023 * shouldn't happen.
4024 */
4025 result = DNS_R_BADDB;
4026 }
4027 } else {
4028 /*
4029 * This node isn't active. We've got to keep
4030 * looking.
4031 */
4032 result = previous_closest_nsec(type, search, name,
4033 origin, &prevnode,
4034 &nsecchain, &first);
4035 }
4036 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
4037 isc_rwlocktype_read);
4038 node = prevnode;
4039 prevnode = NULL;
4040 } while (empty_node && result == ISC_R_SUCCESS);
4041
4042 if (!first) {
4043 dns_rbtnodechain_invalidate(&nsecchain);
4044 }
4045
4046 if (result == ISC_R_NOMORE && wraps) {
4047 result = dns_rbtnodechain_last(&search->chain, tree, NULL,
4048 NULL);
4049 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
4050 wraps = false;
4051 goto again;
4052 }
4053 }
4054
4055 /*
4056 * If the result is ISC_R_NOMORE, then we got to the beginning of
4057 * the database and didn't find a NSEC record. This shouldn't
4058 * happen.
4059 */
4060 if (result == ISC_R_NOMORE) {
4061 result = DNS_R_BADDB;
4062 }
4063
4064 return (result);
4065 }
4066
4067 static isc_result_t
zone_find(dns_db_t * db,const dns_name_t * name,dns_dbversion_t * version,dns_rdatatype_t type,unsigned int options,isc_stdtime_t now,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)4068 zone_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
4069 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4070 dns_dbnode_t **nodep, dns_name_t *foundname, dns_rdataset_t *rdataset,
4071 dns_rdataset_t *sigrdataset) {
4072 dns_rbtnode_t *node = NULL;
4073 isc_result_t result;
4074 rbtdb_search_t search;
4075 bool cname_ok = true;
4076 bool close_version = false;
4077 bool maybe_zonecut = false;
4078 bool at_zonecut = false;
4079 bool wild;
4080 bool empty_node;
4081 rdatasetheader_t *header, *header_next, *found, *nsecheader;
4082 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
4083 rbtdb_rdatatype_t sigtype;
4084 bool active;
4085 nodelock_t *lock;
4086 dns_rbt_t *tree;
4087
4088 search.rbtdb = (dns_rbtdb_t *)db;
4089
4090 REQUIRE(VALID_RBTDB(search.rbtdb));
4091 INSIST(version == NULL ||
4092 ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
4093
4094 /*
4095 * We don't care about 'now'.
4096 */
4097 UNUSED(now);
4098
4099 /*
4100 * If the caller didn't supply a version, attach to the current
4101 * version.
4102 */
4103 if (version == NULL) {
4104 currentversion(db, &version);
4105 close_version = true;
4106 }
4107
4108 search.rbtversion = version;
4109 search.serial = search.rbtversion->serial;
4110 search.options = options;
4111 search.copy_name = false;
4112 search.need_cleanup = false;
4113 search.wild = false;
4114 search.zonecut = NULL;
4115 dns_fixedname_init(&search.zonecut_name);
4116 dns_rbtnodechain_init(&search.chain);
4117 search.now = 0;
4118
4119 /*
4120 * 'wild' will be true iff. we've matched a wildcard.
4121 */
4122 wild = false;
4123
4124 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4125
4126 /*
4127 * Search down from the root of the tree. If, while going down, we
4128 * encounter a callback node, zone_zonecut_callback() will search the
4129 * rdatasets at the zone cut for active DNAME or NS rdatasets.
4130 */
4131 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3
4132 : search.rbtdb->tree;
4133 result = dns_rbt_findnode(tree, name, foundname, &node, &search.chain,
4134 DNS_RBTFIND_EMPTYDATA, zone_zonecut_callback,
4135 &search);
4136
4137 if (result == DNS_R_PARTIALMATCH) {
4138 partial_match:
4139 if (search.zonecut != NULL) {
4140 result = setup_delegation(&search, nodep, foundname,
4141 rdataset, sigrdataset);
4142 goto tree_exit;
4143 }
4144
4145 if (search.wild) {
4146 /*
4147 * At least one of the levels in the search chain
4148 * potentially has a wildcard. For each such level,
4149 * we must see if there's a matching wildcard active
4150 * in the current version.
4151 */
4152 result = find_wildcard(&search, &node, name);
4153 if (result == ISC_R_SUCCESS) {
4154 dns_name_copynf(name, foundname);
4155 wild = true;
4156 goto found;
4157 } else if (result != ISC_R_NOTFOUND) {
4158 goto tree_exit;
4159 }
4160 }
4161
4162 active = false;
4163 if ((options & DNS_DBFIND_FORCENSEC3) == 0) {
4164 /*
4165 * The NSEC3 tree won't have empty nodes,
4166 * so it isn't necessary to check for them.
4167 */
4168 dns_rbtnodechain_t chain = search.chain;
4169 active = activeempty(&search, &chain, name);
4170 }
4171
4172 /*
4173 * If we're here, then the name does not exist, is not
4174 * beneath a zonecut, and there's no matching wildcard.
4175 */
4176 if ((search.rbtversion->secure == dns_db_secure &&
4177 !search.rbtversion->havensec3) ||
4178 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
4179 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
4180 {
4181 result = find_closest_nsec(&search, nodep, foundname,
4182 rdataset, sigrdataset, tree,
4183 search.rbtversion->secure);
4184 if (result == ISC_R_SUCCESS) {
4185 result = active ? DNS_R_EMPTYNAME
4186 : DNS_R_NXDOMAIN;
4187 }
4188 } else {
4189 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
4190 }
4191 goto tree_exit;
4192 } else if (result != ISC_R_SUCCESS) {
4193 goto tree_exit;
4194 }
4195
4196 found:
4197 /*
4198 * We have found a node whose name is the desired name, or we
4199 * have matched a wildcard.
4200 */
4201
4202 if (search.zonecut != NULL) {
4203 /*
4204 * If we're beneath a zone cut, we don't want to look for
4205 * CNAMEs because they're not legitimate zone glue.
4206 */
4207 cname_ok = false;
4208 } else {
4209 /*
4210 * The node may be a zone cut itself. If it might be one,
4211 * make sure we check for it later.
4212 *
4213 * DS records live above the zone cut in ordinary zone so
4214 * we want to ignore any referral.
4215 *
4216 * Stub zones don't have anything "above" the delegation so
4217 * we always return a referral.
4218 */
4219 if (node->find_callback &&
4220 ((node != search.rbtdb->origin_node &&
4221 !dns_rdatatype_atparent(type)) ||
4222 IS_STUB(search.rbtdb)))
4223 {
4224 maybe_zonecut = true;
4225 }
4226 }
4227
4228 /*
4229 * Certain DNSSEC types are not subject to CNAME matching
4230 * (RFC4035, section 2.5 and RFC3007).
4231 *
4232 * We don't check for RRSIG, because we don't store RRSIG records
4233 * directly.
4234 */
4235 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) {
4236 cname_ok = false;
4237 }
4238
4239 /*
4240 * We now go looking for rdata...
4241 */
4242
4243 lock = &search.rbtdb->node_locks[node->locknum].lock;
4244 NODE_LOCK(lock, isc_rwlocktype_read);
4245
4246 found = NULL;
4247 foundsig = NULL;
4248 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4249 nsecheader = NULL;
4250 nsecsig = NULL;
4251 cnamesig = NULL;
4252 empty_node = true;
4253 for (header = node->data; header != NULL; header = header_next) {
4254 header_next = header->next;
4255 /*
4256 * Look for an active, extant rdataset.
4257 */
4258 do {
4259 if (header->serial <= search.serial && !IGNORE(header))
4260 {
4261 /*
4262 * Is this a "this rdataset doesn't
4263 * exist" record?
4264 */
4265 if (NONEXISTENT(header)) {
4266 header = NULL;
4267 }
4268 break;
4269 } else {
4270 header = header->down;
4271 }
4272 } while (header != NULL);
4273 if (header != NULL) {
4274 /*
4275 * We now know that there is at least one active
4276 * rdataset at this node.
4277 */
4278 empty_node = false;
4279
4280 /*
4281 * Do special zone cut handling, if requested.
4282 */
4283 if (maybe_zonecut && header->type == dns_rdatatype_ns) {
4284 /*
4285 * We increment the reference count on node to
4286 * ensure that search->zonecut_rdataset will
4287 * still be valid later.
4288 */
4289 new_reference(search.rbtdb, node,
4290 isc_rwlocktype_read);
4291 search.zonecut = node;
4292 search.zonecut_rdataset = header;
4293 search.zonecut_sigrdataset = NULL;
4294 search.need_cleanup = true;
4295 maybe_zonecut = false;
4296 at_zonecut = true;
4297 /*
4298 * It is not clear if KEY should still be
4299 * allowed at the parent side of the zone
4300 * cut or not. It is needed for RFC3007
4301 * validated updates.
4302 */
4303 if ((search.options & DNS_DBFIND_GLUEOK) == 0 &&
4304 type != dns_rdatatype_nsec &&
4305 type != dns_rdatatype_key)
4306 {
4307 /*
4308 * Glue is not OK, but any answer we
4309 * could return would be glue. Return
4310 * the delegation.
4311 */
4312 found = NULL;
4313 break;
4314 }
4315 if (found != NULL && foundsig != NULL) {
4316 break;
4317 }
4318 }
4319
4320 /*
4321 * If the NSEC3 record doesn't match the chain
4322 * we are using behave as if it isn't here.
4323 */
4324 if (header->type == dns_rdatatype_nsec3 &&
4325 !matchparams(header, &search))
4326 {
4327 NODE_UNLOCK(lock, isc_rwlocktype_read);
4328 goto partial_match;
4329 }
4330 /*
4331 * If we found a type we were looking for,
4332 * remember it.
4333 */
4334 if (header->type == type || type == dns_rdatatype_any ||
4335 (header->type == dns_rdatatype_cname && cname_ok))
4336 {
4337 /*
4338 * We've found the answer!
4339 */
4340 found = header;
4341 if (header->type == dns_rdatatype_cname &&
4342 cname_ok)
4343 {
4344 /*
4345 * We may be finding a CNAME instead
4346 * of the desired type.
4347 *
4348 * If we've already got the CNAME RRSIG,
4349 * use it, otherwise change sigtype
4350 * so that we find it.
4351 */
4352 if (cnamesig != NULL) {
4353 foundsig = cnamesig;
4354 } else {
4355 sigtype =
4356 RBTDB_RDATATYPE_SIGCNAME;
4357 }
4358 }
4359 /*
4360 * If we've got all we need, end the search.
4361 */
4362 if (!maybe_zonecut && foundsig != NULL) {
4363 break;
4364 }
4365 } else if (header->type == sigtype) {
4366 /*
4367 * We've found the RRSIG rdataset for our
4368 * target type. Remember it.
4369 */
4370 foundsig = header;
4371 /*
4372 * If we've got all we need, end the search.
4373 */
4374 if (!maybe_zonecut && found != NULL) {
4375 break;
4376 }
4377 } else if (header->type == dns_rdatatype_nsec &&
4378 !search.rbtversion->havensec3)
4379 {
4380 /*
4381 * Remember a NSEC rdataset even if we're
4382 * not specifically looking for it, because
4383 * we might need it later.
4384 */
4385 nsecheader = header;
4386 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
4387 !search.rbtversion->havensec3)
4388 {
4389 /*
4390 * If we need the NSEC rdataset, we'll also
4391 * need its signature.
4392 */
4393 nsecsig = header;
4394 } else if (cname_ok &&
4395 header->type == RBTDB_RDATATYPE_SIGCNAME)
4396 {
4397 /*
4398 * If we get a CNAME match, we'll also need
4399 * its signature.
4400 */
4401 cnamesig = header;
4402 }
4403 }
4404 }
4405
4406 if (empty_node) {
4407 /*
4408 * We have an exact match for the name, but there are no
4409 * active rdatasets in the desired version. That means that
4410 * this node doesn't exist in the desired version, and that
4411 * we really have a partial match.
4412 */
4413 if (!wild) {
4414 NODE_UNLOCK(lock, isc_rwlocktype_read);
4415 goto partial_match;
4416 }
4417 }
4418
4419 /*
4420 * If we didn't find what we were looking for...
4421 */
4422 if (found == NULL) {
4423 if (search.zonecut != NULL) {
4424 /*
4425 * We were trying to find glue at a node beneath a
4426 * zone cut, but didn't.
4427 *
4428 * Return the delegation.
4429 */
4430 NODE_UNLOCK(lock, isc_rwlocktype_read);
4431 result = setup_delegation(&search, nodep, foundname,
4432 rdataset, sigrdataset);
4433 goto tree_exit;
4434 }
4435 /*
4436 * The desired type doesn't exist.
4437 */
4438 result = DNS_R_NXRRSET;
4439 if (search.rbtversion->secure == dns_db_secure &&
4440 !search.rbtversion->havensec3 &&
4441 (nsecheader == NULL || nsecsig == NULL))
4442 {
4443 /*
4444 * The zone is secure but there's no NSEC,
4445 * or the NSEC has no signature!
4446 */
4447 if (!wild) {
4448 result = DNS_R_BADDB;
4449 goto node_exit;
4450 }
4451
4452 NODE_UNLOCK(lock, isc_rwlocktype_read);
4453 result = find_closest_nsec(&search, nodep, foundname,
4454 rdataset, sigrdataset,
4455 search.rbtdb->tree,
4456 search.rbtversion->secure);
4457 if (result == ISC_R_SUCCESS) {
4458 result = DNS_R_EMPTYWILD;
4459 }
4460 goto tree_exit;
4461 }
4462 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
4463 nsecheader == NULL)
4464 {
4465 /*
4466 * There's no NSEC record, and we were told
4467 * to find one.
4468 */
4469 result = DNS_R_BADDB;
4470 goto node_exit;
4471 }
4472 if (nodep != NULL) {
4473 new_reference(search.rbtdb, node, isc_rwlocktype_read);
4474 *nodep = node;
4475 }
4476 if ((search.rbtversion->secure == dns_db_secure &&
4477 !search.rbtversion->havensec3) ||
4478 (search.options & DNS_DBFIND_FORCENSEC) != 0)
4479 {
4480 bind_rdataset(search.rbtdb, node, nsecheader, 0,
4481 isc_rwlocktype_read, rdataset);
4482 if (nsecsig != NULL) {
4483 bind_rdataset(search.rbtdb, node, nsecsig, 0,
4484 isc_rwlocktype_read, sigrdataset);
4485 }
4486 }
4487 if (wild) {
4488 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4489 }
4490 goto node_exit;
4491 }
4492
4493 /*
4494 * We found what we were looking for, or we found a CNAME.
4495 */
4496
4497 if (type != found->type && type != dns_rdatatype_any &&
4498 found->type == dns_rdatatype_cname)
4499 {
4500 /*
4501 * We weren't doing an ANY query and we found a CNAME instead
4502 * of the type we were looking for, so we need to indicate
4503 * that result to the caller.
4504 */
4505 result = DNS_R_CNAME;
4506 } else if (search.zonecut != NULL) {
4507 /*
4508 * If we're beneath a zone cut, we must indicate that the
4509 * result is glue, unless we're actually at the zone cut
4510 * and the type is NSEC or KEY.
4511 */
4512 if (search.zonecut == node) {
4513 /*
4514 * It is not clear if KEY should still be
4515 * allowed at the parent side of the zone
4516 * cut or not. It is needed for RFC3007
4517 * validated updates.
4518 */
4519 if (type == dns_rdatatype_nsec ||
4520 type == dns_rdatatype_nsec3 ||
4521 type == dns_rdatatype_key)
4522 {
4523 result = ISC_R_SUCCESS;
4524 } else if (type == dns_rdatatype_any) {
4525 result = DNS_R_ZONECUT;
4526 } else {
4527 result = DNS_R_GLUE;
4528 }
4529 } else {
4530 result = DNS_R_GLUE;
4531 }
4532 /*
4533 * We might have found data that isn't glue, but was occluded
4534 * by a dynamic update. If the caller cares about this, they
4535 * will have told us to validate glue.
4536 *
4537 * XXX We should cache the glue validity state!
4538 */
4539 if (result == DNS_R_GLUE &&
4540 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4541 !valid_glue(&search, foundname, type, node))
4542 {
4543 NODE_UNLOCK(lock, isc_rwlocktype_read);
4544 result = setup_delegation(&search, nodep, foundname,
4545 rdataset, sigrdataset);
4546 goto tree_exit;
4547 }
4548 } else {
4549 /*
4550 * An ordinary successful query!
4551 */
4552 result = ISC_R_SUCCESS;
4553 }
4554
4555 if (nodep != NULL) {
4556 if (!at_zonecut) {
4557 new_reference(search.rbtdb, node, isc_rwlocktype_read);
4558 } else {
4559 search.need_cleanup = false;
4560 }
4561 *nodep = node;
4562 }
4563
4564 if (type != dns_rdatatype_any) {
4565 bind_rdataset(search.rbtdb, node, found, 0, isc_rwlocktype_read,
4566 rdataset);
4567 if (foundsig != NULL) {
4568 bind_rdataset(search.rbtdb, node, foundsig, 0,
4569 isc_rwlocktype_read, sigrdataset);
4570 }
4571 }
4572
4573 if (wild) {
4574 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4575 }
4576
4577 node_exit:
4578 NODE_UNLOCK(lock, isc_rwlocktype_read);
4579
4580 tree_exit:
4581 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4582
4583 /*
4584 * If we found a zonecut but aren't going to use it, we have to
4585 * let go of it.
4586 */
4587 if (search.need_cleanup) {
4588 node = search.zonecut;
4589 INSIST(node != NULL);
4590 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4591
4592 NODE_LOCK(lock, isc_rwlocktype_read);
4593 decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read,
4594 isc_rwlocktype_none, false);
4595 NODE_UNLOCK(lock, isc_rwlocktype_read);
4596 }
4597
4598 if (close_version) {
4599 closeversion(db, &version, false);
4600 }
4601
4602 dns_rbtnodechain_reset(&search.chain);
4603
4604 return (result);
4605 }
4606
4607 static isc_result_t
zone_findzonecut(dns_db_t * db,const dns_name_t * name,unsigned int options,isc_stdtime_t now,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_name_t * dcname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)4608 zone_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options,
4609 isc_stdtime_t now, dns_dbnode_t **nodep, dns_name_t *foundname,
4610 dns_name_t *dcname, dns_rdataset_t *rdataset,
4611 dns_rdataset_t *sigrdataset) {
4612 UNUSED(db);
4613 UNUSED(name);
4614 UNUSED(options);
4615 UNUSED(now);
4616 UNUSED(nodep);
4617 UNUSED(foundname);
4618 UNUSED(dcname);
4619 UNUSED(rdataset);
4620 UNUSED(sigrdataset);
4621
4622 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4623
4624 UNREACHABLE();
4625 return (ISC_R_NOTIMPLEMENTED);
4626 }
4627
4628 static bool
check_stale_header(dns_rbtnode_t * node,rdatasetheader_t * header,isc_rwlocktype_t * locktype,nodelock_t * lock,rbtdb_search_t * search,rdatasetheader_t ** header_prev)4629 check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header,
4630 isc_rwlocktype_t *locktype, nodelock_t *lock,
4631 rbtdb_search_t *search, rdatasetheader_t **header_prev) {
4632 if (!ACTIVE(header, search->now)) {
4633 dns_ttl_t stale = header->rdh_ttl +
4634 search->rbtdb->serve_stale_ttl;
4635 /*
4636 * If this data is in the stale window keep it and if
4637 * DNS_DBFIND_STALEOK is not set we tell the caller to
4638 * skip this record. We skip the records with ZEROTTL
4639 * (these records should not be cached anyway).
4640 */
4641
4642 RDATASET_ATTR_CLR(header, RDATASET_ATTR_STALE_WINDOW);
4643 if (!ZEROTTL(header) && KEEPSTALE(search->rbtdb) &&
4644 stale > search->now)
4645 {
4646 mark_header_stale(search->rbtdb, header);
4647 *header_prev = header;
4648 /*
4649 * If DNS_DBFIND_STALESTART is set then it means we
4650 * failed to resolve the name during recursion, in
4651 * this case we mark the time in which the refresh
4652 * failed.
4653 */
4654 if ((search->options & DNS_DBFIND_STALESTART) != 0) {
4655 atomic_store_release(
4656 &header->last_refresh_fail_ts,
4657 search->now);
4658 } else if ((search->options &
4659 DNS_DBFIND_STALEENABLED) != 0 &&
4660 search->now <
4661 (atomic_load_acquire(
4662 &header->last_refresh_fail_ts) +
4663 search->rbtdb->serve_stale_refresh))
4664 {
4665 /*
4666 * If we are within interval between last
4667 * refresh failure time + 'stale-refresh-time',
4668 * then don't skip this stale entry but use it
4669 * instead.
4670 */
4671 RDATASET_ATTR_SET(header,
4672 RDATASET_ATTR_STALE_WINDOW);
4673 return (false);
4674 } else if ((search->options &
4675 DNS_DBFIND_STALETIMEOUT) != 0)
4676 {
4677 /*
4678 * We want stale RRset due to timeout, so we
4679 * don't skip it.
4680 */
4681 return (false);
4682 }
4683 return ((search->options & DNS_DBFIND_STALEOK) == 0);
4684 }
4685
4686 /*
4687 * This rdataset is stale. If no one else is using the
4688 * node, we can clean it up right now, otherwise we mark
4689 * it as ancient, and the node as dirty, so it will get
4690 * cleaned up later.
4691 */
4692 if ((header->rdh_ttl < search->now - RBTDB_VIRTUAL) &&
4693 (*locktype == isc_rwlocktype_write ||
4694 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS))
4695 {
4696 /*
4697 * We update the node's status only when we can
4698 * get write access; otherwise, we leave others
4699 * to this work. Periodical cleaning will
4700 * eventually take the job as the last resort.
4701 * We won't downgrade the lock, since other
4702 * rdatasets are probably stale, too.
4703 */
4704 *locktype = isc_rwlocktype_write;
4705
4706 if (isc_refcount_current(&node->references) == 0) {
4707 isc_mem_t *mctx;
4708
4709 /*
4710 * header->down can be non-NULL if the
4711 * refcount has just decremented to 0
4712 * but decrement_reference() has not
4713 * performed clean_cache_node(), in
4714 * which case we need to purge the stale
4715 * headers first.
4716 */
4717 mctx = search->rbtdb->common.mctx;
4718 clean_stale_headers(search->rbtdb, mctx,
4719 header);
4720 if (*header_prev != NULL) {
4721 (*header_prev)->next = header->next;
4722 } else {
4723 node->data = header->next;
4724 }
4725 free_rdataset(search->rbtdb, mctx, header);
4726 } else {
4727 mark_header_ancient(search->rbtdb, header);
4728 *header_prev = header;
4729 }
4730 } else {
4731 *header_prev = header;
4732 }
4733 return (true);
4734 }
4735 return (false);
4736 }
4737
4738 static isc_result_t
cache_zonecut_callback(dns_rbtnode_t * node,dns_name_t * name,void * arg)4739 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4740 rbtdb_search_t *search = arg;
4741 rdatasetheader_t *header, *header_prev, *header_next;
4742 rdatasetheader_t *dname_header, *sigdname_header;
4743 isc_result_t result;
4744 nodelock_t *lock;
4745 isc_rwlocktype_t locktype;
4746
4747 /* XXX comment */
4748
4749 REQUIRE(search->zonecut == NULL);
4750
4751 /*
4752 * Keep compiler silent.
4753 */
4754 UNUSED(name);
4755
4756 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4757 locktype = isc_rwlocktype_read;
4758 NODE_LOCK(lock, locktype);
4759
4760 /*
4761 * Look for a DNAME or RRSIG DNAME rdataset.
4762 */
4763 dname_header = NULL;
4764 sigdname_header = NULL;
4765 header_prev = NULL;
4766 for (header = node->data; header != NULL; header = header_next) {
4767 header_next = header->next;
4768 if (check_stale_header(node, header, &locktype, lock, search,
4769 &header_prev))
4770 {
4771 /* Do nothing. */
4772 } else if (header->type == dns_rdatatype_dname &&
4773 EXISTS(header) && !ANCIENT(header))
4774 {
4775 dname_header = header;
4776 header_prev = header;
4777 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4778 EXISTS(header) && !ANCIENT(header))
4779 {
4780 sigdname_header = header;
4781 header_prev = header;
4782 } else {
4783 header_prev = header;
4784 }
4785 }
4786
4787 if (dname_header != NULL &&
4788 (!DNS_TRUST_PENDING(dname_header->trust) ||
4789 (search->options & DNS_DBFIND_PENDINGOK) != 0))
4790 {
4791 /*
4792 * We increment the reference count on node to ensure that
4793 * search->zonecut_rdataset will still be valid later.
4794 */
4795 new_reference(search->rbtdb, node, locktype);
4796 search->zonecut = node;
4797 search->zonecut_rdataset = dname_header;
4798 search->zonecut_sigrdataset = sigdname_header;
4799 search->need_cleanup = true;
4800 result = DNS_R_PARTIALMATCH;
4801 } else {
4802 result = DNS_R_CONTINUE;
4803 }
4804
4805 NODE_UNLOCK(lock, locktype);
4806
4807 return (result);
4808 }
4809
4810 static isc_result_t
find_deepest_zonecut(rbtdb_search_t * search,dns_rbtnode_t * node,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)4811 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4812 dns_dbnode_t **nodep, dns_name_t *foundname,
4813 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) {
4814 unsigned int i;
4815 dns_rbtnode_t *level_node;
4816 rdatasetheader_t *header, *header_prev, *header_next;
4817 rdatasetheader_t *found, *foundsig;
4818 isc_result_t result = ISC_R_NOTFOUND;
4819 dns_name_t name;
4820 dns_rbtdb_t *rbtdb;
4821 bool done;
4822 nodelock_t *lock;
4823 isc_rwlocktype_t locktype;
4824
4825 /*
4826 * Caller must be holding the tree lock.
4827 */
4828
4829 rbtdb = search->rbtdb;
4830 i = search->chain.level_matches;
4831 done = false;
4832 do {
4833 locktype = isc_rwlocktype_read;
4834 lock = &rbtdb->node_locks[node->locknum].lock;
4835 NODE_LOCK(lock, locktype);
4836
4837 /*
4838 * Look for NS and RRSIG NS rdatasets.
4839 */
4840 found = NULL;
4841 foundsig = NULL;
4842 header_prev = NULL;
4843 for (header = node->data; header != NULL; header = header_next)
4844 {
4845 header_next = header->next;
4846 if (check_stale_header(node, header, &locktype, lock,
4847 search, &header_prev))
4848 {
4849 /* Do nothing. */
4850 } else if (EXISTS(header) && !ANCIENT(header)) {
4851 /*
4852 * We've found an extant rdataset. See if
4853 * we're interested in it.
4854 */
4855 if (header->type == dns_rdatatype_ns) {
4856 found = header;
4857 if (foundsig != NULL) {
4858 break;
4859 }
4860 } else if (header->type ==
4861 RBTDB_RDATATYPE_SIGNS)
4862 {
4863 foundsig = header;
4864 if (found != NULL) {
4865 break;
4866 }
4867 }
4868 header_prev = header;
4869 } else {
4870 header_prev = header;
4871 }
4872 }
4873
4874 if (found != NULL) {
4875 /*
4876 * If we have to set foundname, we do it before
4877 * anything else. If we were to set foundname after
4878 * we had set nodep or bound the rdataset, then we'd
4879 * have to undo that work if dns_name_concatenate()
4880 * failed. By setting foundname first, there's
4881 * nothing to undo if we have trouble.
4882 */
4883 if (foundname != NULL) {
4884 dns_name_init(&name, NULL);
4885 dns_rbt_namefromnode(node, &name);
4886 dns_name_copynf(&name, foundname);
4887 while (i > 0) {
4888 i--;
4889 level_node = search->chain.levels[i];
4890 dns_name_init(&name, NULL);
4891 dns_rbt_namefromnode(level_node, &name);
4892 result = dns_name_concatenate(
4893 foundname, &name, foundname,
4894 NULL);
4895 if (result != ISC_R_SUCCESS) {
4896 if (nodep != NULL) {
4897 *nodep = NULL;
4898 }
4899 goto node_exit;
4900 }
4901 }
4902 }
4903 result = DNS_R_DELEGATION;
4904 if (nodep != NULL) {
4905 new_reference(search->rbtdb, node, locktype);
4906 *nodep = node;
4907 }
4908 bind_rdataset(search->rbtdb, node, found, search->now,
4909 locktype, rdataset);
4910 if (foundsig != NULL) {
4911 bind_rdataset(search->rbtdb, node, foundsig,
4912 search->now, locktype,
4913 sigrdataset);
4914 }
4915 if (need_headerupdate(found, search->now) ||
4916 (foundsig != NULL &&
4917 need_headerupdate(foundsig, search->now)))
4918 {
4919 if (locktype != isc_rwlocktype_write) {
4920 NODE_UNLOCK(lock, locktype);
4921 NODE_LOCK(lock, isc_rwlocktype_write);
4922 locktype = isc_rwlocktype_write;
4923 POST(locktype);
4924 }
4925 if (need_headerupdate(found, search->now)) {
4926 update_header(search->rbtdb, found,
4927 search->now);
4928 }
4929 if (foundsig != NULL &&
4930 need_headerupdate(foundsig, search->now))
4931 {
4932 update_header(search->rbtdb, foundsig,
4933 search->now);
4934 }
4935 }
4936 }
4937
4938 node_exit:
4939 NODE_UNLOCK(lock, locktype);
4940
4941 if (found == NULL && i > 0) {
4942 i--;
4943 node = search->chain.levels[i];
4944 } else {
4945 done = true;
4946 }
4947 } while (!done);
4948
4949 return (result);
4950 }
4951
4952 static isc_result_t
find_coveringnsec(rbtdb_search_t * search,dns_dbnode_t ** nodep,isc_stdtime_t now,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)4953 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4954 isc_stdtime_t now, dns_name_t *foundname,
4955 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) {
4956 dns_rbtnode_t *node;
4957 rdatasetheader_t *header, *header_next, *header_prev;
4958 rdatasetheader_t *found, *foundsig;
4959 bool empty_node;
4960 isc_result_t result;
4961 dns_fixedname_t fname, forigin;
4962 dns_name_t *name, *origin;
4963 rbtdb_rdatatype_t matchtype, sigmatchtype;
4964 nodelock_t *lock;
4965 isc_rwlocktype_t locktype;
4966 dns_rbtnodechain_t chain;
4967
4968 chain = search->chain;
4969
4970 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4971 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4972 dns_rdatatype_nsec);
4973
4974 do {
4975 node = NULL;
4976 name = dns_fixedname_initname(&fname);
4977 origin = dns_fixedname_initname(&forigin);
4978 result = dns_rbtnodechain_current(&chain, name, origin, &node);
4979 if (result != ISC_R_SUCCESS) {
4980 return (result);
4981 }
4982 locktype = isc_rwlocktype_read;
4983 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4984 NODE_LOCK(lock, locktype);
4985 found = NULL;
4986 foundsig = NULL;
4987 empty_node = true;
4988 header_prev = NULL;
4989 for (header = node->data; header != NULL; header = header_next)
4990 {
4991 header_next = header->next;
4992 if (check_stale_header(node, header, &locktype, lock,
4993 search, &header_prev))
4994 {
4995 continue;
4996 }
4997 if (NONEXISTENT(header) ||
4998 RBTDB_RDATATYPE_BASE(header->type) == 0)
4999 {
5000 header_prev = header;
5001 continue;
5002 }
5003 /*
5004 * Don't stop on provable noqname / RRSIG.
5005 */
5006 if (header->noqname == NULL &&
5007 RBTDB_RDATATYPE_BASE(header->type) !=
5008 dns_rdatatype_rrsig)
5009 {
5010 empty_node = false;
5011 }
5012 if (header->type == matchtype) {
5013 found = header;
5014 } else if (header->type == sigmatchtype) {
5015 foundsig = header;
5016 }
5017 header_prev = header;
5018 }
5019 if (found != NULL) {
5020 result = dns_name_concatenate(name, origin, foundname,
5021 NULL);
5022 if (result != ISC_R_SUCCESS) {
5023 goto unlock_node;
5024 }
5025 bind_rdataset(search->rbtdb, node, found, now, locktype,
5026 rdataset);
5027 if (foundsig != NULL) {
5028 bind_rdataset(search->rbtdb, node, foundsig,
5029 now, locktype, sigrdataset);
5030 }
5031 new_reference(search->rbtdb, node, locktype);
5032 *nodep = node;
5033 result = DNS_R_COVERINGNSEC;
5034 } else if (!empty_node) {
5035 result = ISC_R_NOTFOUND;
5036 } else {
5037 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
5038 }
5039 unlock_node:
5040 NODE_UNLOCK(lock, locktype);
5041 } while (empty_node && result == ISC_R_SUCCESS);
5042 return (result);
5043 }
5044
5045 static isc_result_t
cache_find(dns_db_t * db,const dns_name_t * name,dns_dbversion_t * version,dns_rdatatype_t type,unsigned int options,isc_stdtime_t now,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)5046 cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
5047 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
5048 dns_dbnode_t **nodep, dns_name_t *foundname,
5049 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) {
5050 dns_rbtnode_t *node = NULL;
5051 isc_result_t result;
5052 rbtdb_search_t search;
5053 bool cname_ok = true;
5054 bool empty_node;
5055 nodelock_t *lock;
5056 isc_rwlocktype_t locktype;
5057 rdatasetheader_t *header, *header_prev, *header_next;
5058 rdatasetheader_t *found, *nsheader;
5059 rdatasetheader_t *foundsig, *nssig, *cnamesig;
5060 rdatasetheader_t *update, *updatesig;
5061 rdatasetheader_t *nsecheader, *nsecsig;
5062 rbtdb_rdatatype_t sigtype, negtype;
5063
5064 UNUSED(version);
5065
5066 search.rbtdb = (dns_rbtdb_t *)db;
5067
5068 REQUIRE(VALID_RBTDB(search.rbtdb));
5069 REQUIRE(version == NULL);
5070
5071 if (now == 0) {
5072 isc_stdtime_get(&now);
5073 }
5074
5075 search.rbtversion = NULL;
5076 search.serial = 1;
5077 search.options = options;
5078 search.copy_name = false;
5079 search.need_cleanup = false;
5080 search.wild = false;
5081 search.zonecut = NULL;
5082 dns_fixedname_init(&search.zonecut_name);
5083 dns_rbtnodechain_init(&search.chain);
5084 search.now = now;
5085 update = NULL;
5086 updatesig = NULL;
5087
5088 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5089
5090 /*
5091 * Search down from the root of the tree. If, while going down, we
5092 * encounter a callback node, cache_zonecut_callback() will search the
5093 * rdatasets at the zone cut for a DNAME rdataset.
5094 */
5095 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
5096 &search.chain, DNS_RBTFIND_EMPTYDATA,
5097 cache_zonecut_callback, &search);
5098
5099 if (result == DNS_R_PARTIALMATCH) {
5100 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
5101 result = find_coveringnsec(&search, nodep, now,
5102 foundname, rdataset,
5103 sigrdataset);
5104 if (result == DNS_R_COVERINGNSEC) {
5105 goto tree_exit;
5106 }
5107 }
5108 if (search.zonecut != NULL) {
5109 result = setup_delegation(&search, nodep, foundname,
5110 rdataset, sigrdataset);
5111 goto tree_exit;
5112 } else {
5113 find_ns:
5114 result = find_deepest_zonecut(&search, node, nodep,
5115 foundname, rdataset,
5116 sigrdataset);
5117 goto tree_exit;
5118 }
5119 } else if (result != ISC_R_SUCCESS) {
5120 goto tree_exit;
5121 }
5122
5123 /*
5124 * Certain DNSSEC types are not subject to CNAME matching
5125 * (RFC4035, section 2.5 and RFC3007).
5126 *
5127 * We don't check for RRSIG, because we don't store RRSIG records
5128 * directly.
5129 */
5130 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) {
5131 cname_ok = false;
5132 }
5133
5134 /*
5135 * We now go looking for rdata...
5136 */
5137
5138 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5139 locktype = isc_rwlocktype_read;
5140 NODE_LOCK(lock, locktype);
5141
5142 found = NULL;
5143 foundsig = NULL;
5144 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5145 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5146 nsheader = NULL;
5147 nsecheader = NULL;
5148 nssig = NULL;
5149 nsecsig = NULL;
5150 cnamesig = NULL;
5151 empty_node = true;
5152 header_prev = NULL;
5153 for (header = node->data; header != NULL; header = header_next) {
5154 header_next = header->next;
5155 if (check_stale_header(node, header, &locktype, lock, &search,
5156 &header_prev))
5157 {
5158 /* Do nothing. */
5159 } else if (EXISTS(header) && !ANCIENT(header)) {
5160 /*
5161 * We now know that there is at least one active
5162 * non-stale rdataset at this node.
5163 */
5164 empty_node = false;
5165
5166 /*
5167 * If we found a type we were looking for, remember
5168 * it.
5169 */
5170 if (header->type == type ||
5171 (type == dns_rdatatype_any &&
5172 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
5173 (cname_ok && header->type == dns_rdatatype_cname))
5174 {
5175 /*
5176 * We've found the answer.
5177 */
5178 found = header;
5179 if (header->type == dns_rdatatype_cname &&
5180 cname_ok && cnamesig != NULL)
5181 {
5182 /*
5183 * If we've already got the
5184 * CNAME RRSIG, use it.
5185 */
5186 foundsig = cnamesig;
5187 }
5188 } else if (header->type == sigtype) {
5189 /*
5190 * We've found the RRSIG rdataset for our
5191 * target type. Remember it.
5192 */
5193 foundsig = header;
5194 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5195 header->type == negtype)
5196 {
5197 /*
5198 * We've found a negative cache entry.
5199 */
5200 found = header;
5201 } else if (header->type == dns_rdatatype_ns) {
5202 /*
5203 * Remember a NS rdataset even if we're
5204 * not specifically looking for it, because
5205 * we might need it later.
5206 */
5207 nsheader = header;
5208 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5209 /*
5210 * If we need the NS rdataset, we'll also
5211 * need its signature.
5212 */
5213 nssig = header;
5214 } else if (header->type == dns_rdatatype_nsec) {
5215 nsecheader = header;
5216 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) {
5217 nsecsig = header;
5218 } else if (cname_ok &&
5219 header->type == RBTDB_RDATATYPE_SIGCNAME)
5220 {
5221 /*
5222 * If we get a CNAME match, we'll also need
5223 * its signature.
5224 */
5225 cnamesig = header;
5226 }
5227 header_prev = header;
5228 } else {
5229 header_prev = header;
5230 }
5231 }
5232
5233 if (empty_node) {
5234 /*
5235 * We have an exact match for the name, but there are no
5236 * extant rdatasets. That means that this node doesn't
5237 * meaningfully exist, and that we really have a partial match.
5238 */
5239 NODE_UNLOCK(lock, locktype);
5240 goto find_ns;
5241 }
5242
5243 /*
5244 * If we didn't find what we were looking for...
5245 */
5246 if (found == NULL ||
5247 (DNS_TRUST_ADDITIONAL(found->trust) &&
5248 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
5249 (found->trust == dns_trust_glue &&
5250 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
5251 (DNS_TRUST_PENDING(found->trust) &&
5252 ((options & DNS_DBFIND_PENDINGOK) == 0)))
5253 {
5254 /*
5255 * Return covering NODATA NSEC record.
5256 */
5257 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 &&
5258 nsecheader != NULL)
5259 {
5260 if (nodep != NULL) {
5261 new_reference(search.rbtdb, node, locktype);
5262 *nodep = node;
5263 }
5264 bind_rdataset(search.rbtdb, node, nsecheader,
5265 search.now, locktype, rdataset);
5266 if (need_headerupdate(nsecheader, search.now)) {
5267 update = nsecheader;
5268 }
5269 if (nsecsig != NULL) {
5270 bind_rdataset(search.rbtdb, node, nsecsig,
5271 search.now, locktype,
5272 sigrdataset);
5273 if (need_headerupdate(nsecsig, search.now)) {
5274 updatesig = nsecsig;
5275 }
5276 }
5277 result = DNS_R_COVERINGNSEC;
5278 goto node_exit;
5279 }
5280
5281 /*
5282 * If there is an NS rdataset at this node, then this is the
5283 * deepest zone cut.
5284 */
5285 if (nsheader != NULL) {
5286 if (nodep != NULL) {
5287 new_reference(search.rbtdb, node, locktype);
5288 *nodep = node;
5289 }
5290 bind_rdataset(search.rbtdb, node, nsheader, search.now,
5291 locktype, rdataset);
5292 if (need_headerupdate(nsheader, search.now)) {
5293 update = nsheader;
5294 }
5295 if (nssig != NULL) {
5296 bind_rdataset(search.rbtdb, node, nssig,
5297 search.now, locktype,
5298 sigrdataset);
5299 if (need_headerupdate(nssig, search.now)) {
5300 updatesig = nssig;
5301 }
5302 }
5303 result = DNS_R_DELEGATION;
5304 goto node_exit;
5305 }
5306
5307 /*
5308 * Go find the deepest zone cut.
5309 */
5310 NODE_UNLOCK(lock, locktype);
5311 goto find_ns;
5312 }
5313
5314 /*
5315 * We found what we were looking for, or we found a CNAME.
5316 */
5317
5318 if (nodep != NULL) {
5319 new_reference(search.rbtdb, node, locktype);
5320 *nodep = node;
5321 }
5322
5323 if (NEGATIVE(found)) {
5324 /*
5325 * We found a negative cache entry.
5326 */
5327 if (NXDOMAIN(found)) {
5328 result = DNS_R_NCACHENXDOMAIN;
5329 } else {
5330 result = DNS_R_NCACHENXRRSET;
5331 }
5332 } else if (type != found->type && type != dns_rdatatype_any &&
5333 found->type == dns_rdatatype_cname)
5334 {
5335 /*
5336 * We weren't doing an ANY query and we found a CNAME instead
5337 * of the type we were looking for, so we need to indicate
5338 * that result to the caller.
5339 */
5340 result = DNS_R_CNAME;
5341 } else {
5342 /*
5343 * An ordinary successful query!
5344 */
5345 result = ISC_R_SUCCESS;
5346 }
5347
5348 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
5349 result == DNS_R_NCACHENXRRSET)
5350 {
5351 bind_rdataset(search.rbtdb, node, found, search.now, locktype,
5352 rdataset);
5353 if (need_headerupdate(found, search.now)) {
5354 update = found;
5355 }
5356 if (!NEGATIVE(found) && foundsig != NULL) {
5357 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5358 locktype, sigrdataset);
5359 if (need_headerupdate(foundsig, search.now)) {
5360 updatesig = foundsig;
5361 }
5362 }
5363 }
5364
5365 node_exit:
5366 if ((update != NULL || updatesig != NULL) &&
5367 locktype != isc_rwlocktype_write)
5368 {
5369 NODE_UNLOCK(lock, locktype);
5370 NODE_LOCK(lock, isc_rwlocktype_write);
5371 locktype = isc_rwlocktype_write;
5372 POST(locktype);
5373 }
5374 if (update != NULL && need_headerupdate(update, search.now)) {
5375 update_header(search.rbtdb, update, search.now);
5376 }
5377 if (updatesig != NULL && need_headerupdate(updatesig, search.now)) {
5378 update_header(search.rbtdb, updatesig, search.now);
5379 }
5380
5381 NODE_UNLOCK(lock, locktype);
5382
5383 tree_exit:
5384 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5385
5386 /*
5387 * If we found a zonecut but aren't going to use it, we have to
5388 * let go of it.
5389 */
5390 if (search.need_cleanup) {
5391 node = search.zonecut;
5392 INSIST(node != NULL);
5393 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5394
5395 NODE_LOCK(lock, isc_rwlocktype_read);
5396 decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read,
5397 isc_rwlocktype_none, false);
5398 NODE_UNLOCK(lock, isc_rwlocktype_read);
5399 }
5400
5401 dns_rbtnodechain_reset(&search.chain);
5402
5403 update_cachestats(search.rbtdb, result);
5404 return (result);
5405 }
5406
5407 static isc_result_t
cache_findzonecut(dns_db_t * db,const dns_name_t * name,unsigned int options,isc_stdtime_t now,dns_dbnode_t ** nodep,dns_name_t * foundname,dns_name_t * dcname,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)5408 cache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options,
5409 isc_stdtime_t now, dns_dbnode_t **nodep,
5410 dns_name_t *foundname, dns_name_t *dcname,
5411 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) {
5412 dns_rbtnode_t *node = NULL;
5413 nodelock_t *lock;
5414 isc_result_t result;
5415 rbtdb_search_t search;
5416 rdatasetheader_t *header, *header_prev, *header_next;
5417 rdatasetheader_t *found, *foundsig;
5418 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
5419 isc_rwlocktype_t locktype;
5420 bool dcnull = (dcname == NULL);
5421
5422 search.rbtdb = (dns_rbtdb_t *)db;
5423
5424 REQUIRE(VALID_RBTDB(search.rbtdb));
5425
5426 if (now == 0) {
5427 isc_stdtime_get(&now);
5428 }
5429
5430 search.rbtversion = NULL;
5431 search.serial = 1;
5432 search.options = options;
5433 search.copy_name = false;
5434 search.need_cleanup = false;
5435 search.wild = false;
5436 search.zonecut = NULL;
5437 dns_fixedname_init(&search.zonecut_name);
5438 dns_rbtnodechain_init(&search.chain);
5439 search.now = now;
5440
5441 if (dcnull) {
5442 dcname = foundname;
5443 }
5444
5445 if ((options & DNS_DBFIND_NOEXACT) != 0) {
5446 rbtoptions |= DNS_RBTFIND_NOEXACT;
5447 }
5448
5449 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5450
5451 /*
5452 * Search down from the root of the tree.
5453 */
5454 result = dns_rbt_findnode(search.rbtdb->tree, name, dcname, &node,
5455 &search.chain, rbtoptions, NULL, &search);
5456
5457 if (result == DNS_R_PARTIALMATCH) {
5458 result = find_deepest_zonecut(&search, node, nodep, foundname,
5459 rdataset, sigrdataset);
5460 goto tree_exit;
5461 } else if (result != ISC_R_SUCCESS) {
5462 goto tree_exit;
5463 } else if (!dcnull) {
5464 dns_name_copynf(dcname, foundname);
5465 }
5466
5467 /*
5468 * We now go looking for an NS rdataset at the node.
5469 */
5470
5471 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5472 locktype = isc_rwlocktype_read;
5473 NODE_LOCK(lock, locktype);
5474
5475 found = NULL;
5476 foundsig = NULL;
5477 header_prev = NULL;
5478 for (header = node->data; header != NULL; header = header_next) {
5479 header_next = header->next;
5480 if (check_stale_header(node, header, &locktype, lock, &search,
5481 &header_prev))
5482 {
5483 /*
5484 * The function dns_rbt_findnode found us the a matching
5485 * node for 'name' and stored the result in 'dcname'.
5486 * This is the deepest known zonecut in our database.
5487 * However, this node may be stale and if serve-stale
5488 * is not enabled (in other words 'stale-answer-enable'
5489 * is set to no), this node may not be used as a
5490 * zonecut we know about. If so, find the deepest
5491 * zonecut from this node up and return that instead.
5492 */
5493 NODE_UNLOCK(lock, locktype);
5494 result = find_deepest_zonecut(&search, node, nodep,
5495 foundname, rdataset,
5496 sigrdataset);
5497 dns_name_copynf(foundname, dcname);
5498 goto tree_exit;
5499 } else if (EXISTS(header) && !ANCIENT(header)) {
5500 /*
5501 * If we found a type we were looking for, remember
5502 * it.
5503 */
5504 if (header->type == dns_rdatatype_ns) {
5505 /*
5506 * Remember a NS rdataset even if we're
5507 * not specifically looking for it, because
5508 * we might need it later.
5509 */
5510 found = header;
5511 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5512 /*
5513 * If we need the NS rdataset, we'll also
5514 * need its signature.
5515 */
5516 foundsig = header;
5517 }
5518 header_prev = header;
5519 } else {
5520 header_prev = header;
5521 }
5522 }
5523
5524 if (found == NULL) {
5525 /*
5526 * No NS records here.
5527 */
5528 NODE_UNLOCK(lock, locktype);
5529 result = find_deepest_zonecut(&search, node, nodep, foundname,
5530 rdataset, sigrdataset);
5531 goto tree_exit;
5532 }
5533
5534 if (nodep != NULL) {
5535 new_reference(search.rbtdb, node, locktype);
5536 *nodep = node;
5537 }
5538
5539 bind_rdataset(search.rbtdb, node, found, search.now, locktype,
5540 rdataset);
5541 if (foundsig != NULL) {
5542 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5543 locktype, sigrdataset);
5544 }
5545
5546 if (need_headerupdate(found, search.now) ||
5547 (foundsig != NULL && need_headerupdate(foundsig, search.now)))
5548 {
5549 if (locktype != isc_rwlocktype_write) {
5550 NODE_UNLOCK(lock, locktype);
5551 NODE_LOCK(lock, isc_rwlocktype_write);
5552 locktype = isc_rwlocktype_write;
5553 POST(locktype);
5554 }
5555 if (need_headerupdate(found, search.now)) {
5556 update_header(search.rbtdb, found, search.now);
5557 }
5558 if (foundsig != NULL && need_headerupdate(foundsig, search.now))
5559 {
5560 update_header(search.rbtdb, foundsig, search.now);
5561 }
5562 }
5563
5564 NODE_UNLOCK(lock, locktype);
5565
5566 tree_exit:
5567 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5568
5569 INSIST(!search.need_cleanup);
5570
5571 dns_rbtnodechain_reset(&search.chain);
5572
5573 if (result == DNS_R_DELEGATION) {
5574 result = ISC_R_SUCCESS;
5575 }
5576
5577 return (result);
5578 }
5579
5580 static void
attachnode(dns_db_t * db,dns_dbnode_t * source,dns_dbnode_t ** targetp)5581 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5582 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5583 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5584
5585 REQUIRE(VALID_RBTDB(rbtdb));
5586 REQUIRE(targetp != NULL && *targetp == NULL);
5587
5588 isc_refcount_increment(&node->references);
5589
5590 *targetp = source;
5591 }
5592
5593 static void
detachnode(dns_db_t * db,dns_dbnode_t ** targetp)5594 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5595 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5596 dns_rbtnode_t *node;
5597 bool want_free = false;
5598 bool inactive = false;
5599 rbtdb_nodelock_t *nodelock;
5600
5601 REQUIRE(VALID_RBTDB(rbtdb));
5602 REQUIRE(targetp != NULL && *targetp != NULL);
5603
5604 node = (dns_rbtnode_t *)(*targetp);
5605 nodelock = &rbtdb->node_locks[node->locknum];
5606
5607 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5608
5609 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5610 isc_rwlocktype_none, false))
5611 {
5612 if (isc_refcount_current(&nodelock->references) == 0 &&
5613 nodelock->exiting)
5614 {
5615 inactive = true;
5616 }
5617 }
5618
5619 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5620
5621 *targetp = NULL;
5622
5623 if (inactive) {
5624 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5625 rbtdb->active--;
5626 if (rbtdb->active == 0) {
5627 want_free = true;
5628 }
5629 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5630 if (want_free) {
5631 char buf[DNS_NAME_FORMATSIZE];
5632 if (dns_name_dynamic(&rbtdb->common.origin)) {
5633 dns_name_format(&rbtdb->common.origin, buf,
5634 sizeof(buf));
5635 } else {
5636 strlcpy(buf, "<UNKNOWN>", sizeof(buf));
5637 }
5638 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5639 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5640 "calling free_rbtdb(%s)", buf);
5641 free_rbtdb(rbtdb, true, NULL);
5642 }
5643 }
5644 }
5645
5646 static isc_result_t
expirenode(dns_db_t * db,dns_dbnode_t * node,isc_stdtime_t now)5647 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5648 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5649 dns_rbtnode_t *rbtnode = node;
5650 rdatasetheader_t *header;
5651 bool force_expire = false;
5652 /*
5653 * These are the category and module used by the cache cleaner.
5654 */
5655 bool log = false;
5656 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5657 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5658 int level = ISC_LOG_DEBUG(2);
5659 char printname[DNS_NAME_FORMATSIZE];
5660
5661 REQUIRE(VALID_RBTDB(rbtdb));
5662
5663 /*
5664 * Caller must hold a tree lock.
5665 */
5666
5667 if (now == 0) {
5668 isc_stdtime_get(&now);
5669 }
5670
5671 if (isc_mem_isovermem(rbtdb->common.mctx)) {
5672 /*
5673 * Force expire with 25% probability.
5674 * XXXDCL Could stand to have a better policy, like LRU.
5675 */
5676 force_expire = (rbtnode->down == NULL &&
5677 (isc_random32() % 4) == 0);
5678
5679 /*
5680 * Note that 'log' can be true IFF overmem is also true.
5681 * overmem can currently only be true for cache
5682 * databases -- hence all of the "overmem cache" log strings.
5683 */
5684 log = isc_log_wouldlog(dns_lctx, level);
5685 if (log) {
5686 isc_log_write(
5687 dns_lctx, category, module, level,
5688 "overmem cache: %s %s",
5689 force_expire ? "FORCE" : "check",
5690 dns_rbt_formatnodename(rbtnode, printname,
5691 sizeof(printname)));
5692 }
5693 }
5694
5695 /*
5696 * We may not need write access, but this code path is not performance
5697 * sensitive, so it should be okay to always lock as a writer.
5698 */
5699 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5700 isc_rwlocktype_write);
5701
5702 for (header = rbtnode->data; header != NULL; header = header->next) {
5703 if (header->rdh_ttl + rbtdb->serve_stale_ttl <=
5704 now - RBTDB_VIRTUAL)
5705 {
5706 /*
5707 * We don't check if refcurrent(rbtnode) == 0 and try
5708 * to free like we do in cache_find(), because
5709 * refcurrent(rbtnode) must be non-zero. This is so
5710 * because 'node' is an argument to the function.
5711 */
5712 mark_header_ancient(rbtdb, header);
5713 if (log) {
5714 isc_log_write(dns_lctx, category, module, level,
5715 "overmem cache: ancient %s",
5716 printname);
5717 }
5718 } else if (force_expire) {
5719 if (!RETAIN(header)) {
5720 set_ttl(rbtdb, header, 0);
5721 mark_header_ancient(rbtdb, header);
5722 } else if (log) {
5723 isc_log_write(dns_lctx, category, module, level,
5724 "overmem cache: "
5725 "reprieve by RETAIN() %s",
5726 printname);
5727 }
5728 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log) {
5729 isc_log_write(dns_lctx, category, module, level,
5730 "overmem cache: saved %s", printname);
5731 }
5732 }
5733
5734 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5735 isc_rwlocktype_write);
5736
5737 return (ISC_R_SUCCESS);
5738 }
5739
5740 static void
overmem(dns_db_t * db,bool over)5741 overmem(dns_db_t *db, bool over) {
5742 /* This is an empty callback. See adb.c:water() */
5743
5744 UNUSED(db);
5745 UNUSED(over);
5746
5747 return;
5748 }
5749
5750 static void
printnode(dns_db_t * db,dns_dbnode_t * node,FILE * out)5751 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5752 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5753 dns_rbtnode_t *rbtnode = node;
5754 bool first;
5755 uint32_t refs;
5756
5757 REQUIRE(VALID_RBTDB(rbtdb));
5758
5759 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5760 isc_rwlocktype_read);
5761
5762 refs = isc_refcount_current(&rbtnode->references);
5763 fprintf(out, "node %p, %" PRIu32 " references, locknum = %u\n", rbtnode,
5764 refs, rbtnode->locknum);
5765 if (rbtnode->data != NULL) {
5766 rdatasetheader_t *current, *top_next;
5767
5768 for (current = rbtnode->data; current != NULL;
5769 current = top_next)
5770 {
5771 top_next = current->next;
5772 first = true;
5773 fprintf(out, "\ttype %u", current->type);
5774 do {
5775 uint_least16_t attributes = atomic_load_acquire(
5776 ¤t->attributes);
5777 if (!first) {
5778 fprintf(out, "\t");
5779 }
5780 first = false;
5781 fprintf(out,
5782 "\tserial = %lu, ttl = %u, "
5783 "trust = %u, attributes = %" PRIuLEAST16
5784 ", "
5785 "resign = %u\n",
5786 (unsigned long)current->serial,
5787 current->rdh_ttl, current->trust,
5788 attributes,
5789 (current->resign << 1) |
5790 current->resign_lsb);
5791 current = current->down;
5792 } while (current != NULL);
5793 }
5794 } else {
5795 fprintf(out, "(empty)\n");
5796 }
5797
5798 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5799 isc_rwlocktype_read);
5800 }
5801
5802 static isc_result_t
createiterator(dns_db_t * db,unsigned int options,dns_dbiterator_t ** iteratorp)5803 createiterator(dns_db_t *db, unsigned int options,
5804 dns_dbiterator_t **iteratorp) {
5805 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5806 rbtdb_dbiterator_t *rbtdbiter;
5807
5808 REQUIRE(VALID_RBTDB(rbtdb));
5809
5810 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5811
5812 rbtdbiter->common.methods = &dbiterator_methods;
5813 rbtdbiter->common.db = NULL;
5814 dns_db_attach(db, &rbtdbiter->common.db);
5815 rbtdbiter->common.relative_names = ((options & DNS_DB_RELATIVENAMES) !=
5816 0);
5817 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5818 rbtdbiter->common.cleaning = false;
5819 rbtdbiter->paused = true;
5820 rbtdbiter->tree_locked = isc_rwlocktype_none;
5821 rbtdbiter->result = ISC_R_SUCCESS;
5822 dns_fixedname_init(&rbtdbiter->name);
5823 dns_fixedname_init(&rbtdbiter->origin);
5824 rbtdbiter->node = NULL;
5825 rbtdbiter->delcnt = 0;
5826 rbtdbiter->nsec3only = ((options & DNS_DB_NSEC3ONLY) != 0);
5827 rbtdbiter->nonsec3 = ((options & DNS_DB_NONSEC3) != 0);
5828 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5829 dns_rbtnodechain_init(&rbtdbiter->chain);
5830 dns_rbtnodechain_init(&rbtdbiter->nsec3chain);
5831 if (rbtdbiter->nsec3only) {
5832 rbtdbiter->current = &rbtdbiter->nsec3chain;
5833 } else {
5834 rbtdbiter->current = &rbtdbiter->chain;
5835 }
5836
5837 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5838
5839 return (ISC_R_SUCCESS);
5840 }
5841
5842 static isc_result_t
zone_findrdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,dns_rdatatype_t type,dns_rdatatype_t covers,isc_stdtime_t now,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)5843 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5844 dns_rdatatype_t type, dns_rdatatype_t covers,
5845 isc_stdtime_t now, dns_rdataset_t *rdataset,
5846 dns_rdataset_t *sigrdataset) {
5847 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5848 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5849 rdatasetheader_t *header, *header_next, *found, *foundsig;
5850 rbtdb_serial_t serial;
5851 rbtdb_version_t *rbtversion = version;
5852 bool close_version = false;
5853 rbtdb_rdatatype_t matchtype, sigmatchtype;
5854
5855 REQUIRE(VALID_RBTDB(rbtdb));
5856 REQUIRE(type != dns_rdatatype_any);
5857 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
5858
5859 if (rbtversion == NULL) {
5860 currentversion(db, (dns_dbversion_t **)(void *)(&rbtversion));
5861 close_version = true;
5862 }
5863 serial = rbtversion->serial;
5864 now = 0;
5865
5866 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5867 isc_rwlocktype_read);
5868
5869 found = NULL;
5870 foundsig = NULL;
5871 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5872 if (covers == 0) {
5873 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5874 } else {
5875 sigmatchtype = 0;
5876 }
5877
5878 for (header = rbtnode->data; header != NULL; header = header_next) {
5879 header_next = header->next;
5880 do {
5881 if (header->serial <= serial && !IGNORE(header)) {
5882 /*
5883 * Is this a "this rdataset doesn't
5884 * exist" record?
5885 */
5886 if (NONEXISTENT(header)) {
5887 header = NULL;
5888 }
5889 break;
5890 } else {
5891 header = header->down;
5892 }
5893 } while (header != NULL);
5894 if (header != NULL) {
5895 /*
5896 * We have an active, extant rdataset. If it's a
5897 * type we're looking for, remember it.
5898 */
5899 if (header->type == matchtype) {
5900 found = header;
5901 if (foundsig != NULL) {
5902 break;
5903 }
5904 } else if (header->type == sigmatchtype) {
5905 foundsig = header;
5906 if (found != NULL) {
5907 break;
5908 }
5909 }
5910 }
5911 }
5912 if (found != NULL) {
5913 bind_rdataset(rbtdb, rbtnode, found, now, isc_rwlocktype_read,
5914 rdataset);
5915 if (foundsig != NULL) {
5916 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5917 isc_rwlocktype_read, sigrdataset);
5918 }
5919 }
5920
5921 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5922 isc_rwlocktype_read);
5923
5924 if (close_version) {
5925 closeversion(db, (dns_dbversion_t **)(void *)(&rbtversion),
5926 false);
5927 }
5928
5929 if (found == NULL) {
5930 return (ISC_R_NOTFOUND);
5931 }
5932
5933 return (ISC_R_SUCCESS);
5934 }
5935
5936 static isc_result_t
cache_findrdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,dns_rdatatype_t type,dns_rdatatype_t covers,isc_stdtime_t now,dns_rdataset_t * rdataset,dns_rdataset_t * sigrdataset)5937 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5938 dns_rdatatype_t type, dns_rdatatype_t covers,
5939 isc_stdtime_t now, dns_rdataset_t *rdataset,
5940 dns_rdataset_t *sigrdataset) {
5941 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5942 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5943 rdatasetheader_t *header, *header_next, *found, *foundsig;
5944 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5945 isc_result_t result;
5946 nodelock_t *lock;
5947 isc_rwlocktype_t locktype;
5948
5949 REQUIRE(VALID_RBTDB(rbtdb));
5950 REQUIRE(type != dns_rdatatype_any);
5951
5952 UNUSED(version);
5953
5954 result = ISC_R_SUCCESS;
5955
5956 if (now == 0) {
5957 isc_stdtime_get(&now);
5958 }
5959
5960 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5961 locktype = isc_rwlocktype_read;
5962 NODE_LOCK(lock, locktype);
5963
5964 found = NULL;
5965 foundsig = NULL;
5966 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5967 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5968 if (covers == 0) {
5969 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5970 } else {
5971 sigmatchtype = 0;
5972 }
5973
5974 for (header = rbtnode->data; header != NULL; header = header_next) {
5975 header_next = header->next;
5976 if (!ACTIVE(header, now)) {
5977 if ((header->rdh_ttl + rbtdb->serve_stale_ttl <
5978 now - RBTDB_VIRTUAL) &&
5979 (locktype == isc_rwlocktype_write ||
5980 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS))
5981 {
5982 /*
5983 * We update the node's status only when we
5984 * can get write access.
5985 */
5986 locktype = isc_rwlocktype_write;
5987
5988 /*
5989 * We don't check if refcurrent(rbtnode) == 0
5990 * and try to free like we do in cache_find(),
5991 * because refcurrent(rbtnode) must be
5992 * non-zero. This is so because 'node' is an
5993 * argument to the function.
5994 */
5995 mark_header_ancient(rbtdb, header);
5996 }
5997 } else if (EXISTS(header) && !ANCIENT(header)) {
5998 if (header->type == matchtype) {
5999 found = header;
6000 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
6001 header->type == negtype)
6002 {
6003 found = header;
6004 } else if (header->type == sigmatchtype) {
6005 foundsig = header;
6006 }
6007 }
6008 }
6009 if (found != NULL) {
6010 bind_rdataset(rbtdb, rbtnode, found, now, locktype, rdataset);
6011 if (!NEGATIVE(found) && foundsig != NULL) {
6012 bind_rdataset(rbtdb, rbtnode, foundsig, now, locktype,
6013 sigrdataset);
6014 }
6015 }
6016
6017 NODE_UNLOCK(lock, locktype);
6018
6019 if (found == NULL) {
6020 return (ISC_R_NOTFOUND);
6021 }
6022
6023 if (NEGATIVE(found)) {
6024 /*
6025 * We found a negative cache entry.
6026 */
6027 if (NXDOMAIN(found)) {
6028 result = DNS_R_NCACHENXDOMAIN;
6029 } else {
6030 result = DNS_R_NCACHENXRRSET;
6031 }
6032 }
6033
6034 update_cachestats(rbtdb, result);
6035
6036 return (result);
6037 }
6038
6039 static isc_result_t
allrdatasets(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,unsigned int options,isc_stdtime_t now,dns_rdatasetiter_t ** iteratorp)6040 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6041 unsigned int options, isc_stdtime_t now,
6042 dns_rdatasetiter_t **iteratorp) {
6043 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6044 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6045 rbtdb_version_t *rbtversion = version;
6046 rbtdb_rdatasetiter_t *iterator;
6047
6048 REQUIRE(VALID_RBTDB(rbtdb));
6049
6050 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
6051
6052 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
6053 now = 0;
6054 if (rbtversion == NULL) {
6055 currentversion(
6056 db, (dns_dbversion_t **)(void *)(&rbtversion));
6057 } else {
6058 INSIST(rbtversion->rbtdb == rbtdb);
6059
6060 (void)isc_refcount_increment(&rbtversion->references);
6061 }
6062 } else {
6063 if (now == 0) {
6064 isc_stdtime_get(&now);
6065 }
6066 rbtversion = NULL;
6067 }
6068
6069 iterator->common.magic = DNS_RDATASETITER_MAGIC;
6070 iterator->common.methods = &rdatasetiter_methods;
6071 iterator->common.db = db;
6072 iterator->common.node = node;
6073 iterator->common.version = (dns_dbversion_t *)rbtversion;
6074 iterator->common.options = options;
6075 iterator->common.now = now;
6076
6077 isc_refcount_increment(&rbtnode->references);
6078
6079 iterator->current = NULL;
6080
6081 *iteratorp = (dns_rdatasetiter_t *)iterator;
6082
6083 return (ISC_R_SUCCESS);
6084 }
6085
6086 static bool
cname_and_other_data(dns_rbtnode_t * node,rbtdb_serial_t serial)6087 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
6088 rdatasetheader_t *header, *header_next;
6089 bool cname, other_data;
6090 dns_rdatatype_t rdtype;
6091
6092 /*
6093 * The caller must hold the node lock.
6094 */
6095
6096 /*
6097 * Look for CNAME and "other data" rdatasets active in our version.
6098 */
6099 cname = false;
6100 other_data = false;
6101 for (header = node->data; header != NULL; header = header_next) {
6102 header_next = header->next;
6103 if (header->type == dns_rdatatype_cname) {
6104 /*
6105 * Look for an active extant CNAME.
6106 */
6107 do {
6108 if (header->serial <= serial && !IGNORE(header))
6109 {
6110 /*
6111 * Is this a "this rdataset doesn't
6112 * exist" record?
6113 */
6114 if (NONEXISTENT(header)) {
6115 header = NULL;
6116 }
6117 break;
6118 } else {
6119 header = header->down;
6120 }
6121 } while (header != NULL);
6122 if (header != NULL) {
6123 cname = true;
6124 }
6125 } else {
6126 /*
6127 * Look for active extant "other data".
6128 *
6129 * "Other data" is any rdataset whose type is not
6130 * KEY, NSEC, SIG or RRSIG.
6131 */
6132 rdtype = RBTDB_RDATATYPE_BASE(header->type);
6133 if (rdtype != dns_rdatatype_key &&
6134 rdtype != dns_rdatatype_sig &&
6135 rdtype != dns_rdatatype_nsec &&
6136 rdtype != dns_rdatatype_rrsig)
6137 {
6138 /*
6139 * Is it active and extant?
6140 */
6141 do {
6142 if (header->serial <= serial &&
6143 !IGNORE(header))
6144 {
6145 /*
6146 * Is this a "this rdataset
6147 * doesn't exist" record?
6148 */
6149 if (NONEXISTENT(header)) {
6150 header = NULL;
6151 }
6152 break;
6153 } else {
6154 header = header->down;
6155 }
6156 } while (header != NULL);
6157 if (header != NULL) {
6158 other_data = true;
6159 }
6160 }
6161 }
6162 }
6163
6164 if (cname && other_data) {
6165 return (true);
6166 }
6167
6168 return (false);
6169 }
6170
6171 static void
resign_insert(dns_rbtdb_t * rbtdb,int idx,rdatasetheader_t * newheader)6172 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
6173 INSIST(!IS_CACHE(rbtdb));
6174 INSIST(newheader->heap_index == 0);
6175 INSIST(!ISC_LINK_LINKED(newheader, link));
6176
6177 isc_heap_insert(rbtdb->heaps[idx], newheader);
6178 }
6179
6180 /*
6181 * node write lock must be held.
6182 */
6183 static void
resign_delete(dns_rbtdb_t * rbtdb,rbtdb_version_t * version,rdatasetheader_t * header)6184 resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
6185 rdatasetheader_t *header) {
6186 /*
6187 * Remove the old header from the heap
6188 */
6189 if (header != NULL && header->heap_index != 0) {
6190 isc_heap_delete(rbtdb->heaps[header->node->locknum],
6191 header->heap_index);
6192 header->heap_index = 0;
6193 if (version != NULL) {
6194 new_reference(rbtdb, header->node,
6195 isc_rwlocktype_write);
6196 ISC_LIST_APPEND(version->resigned_list, header, link);
6197 }
6198 }
6199 }
6200
6201 static uint64_t
recordsize(rdatasetheader_t * header,unsigned int namelen)6202 recordsize(rdatasetheader_t *header, unsigned int namelen) {
6203 return (dns_rdataslab_rdatasize((unsigned char *)header,
6204 sizeof(*header)) +
6205 sizeof(dns_ttl_t) + sizeof(dns_rdatatype_t) +
6206 sizeof(dns_rdataclass_t) + namelen);
6207 }
6208
6209 static void
update_recordsandxfrsize(bool add,rbtdb_version_t * rbtversion,rdatasetheader_t * header,unsigned int namelen)6210 update_recordsandxfrsize(bool add, rbtdb_version_t *rbtversion,
6211 rdatasetheader_t *header, unsigned int namelen) {
6212 unsigned char *hdr = (unsigned char *)header;
6213 size_t hdrsize = sizeof(*header);
6214
6215 RWLOCK(&rbtversion->rwlock, isc_rwlocktype_write);
6216 if (add) {
6217 rbtversion->records += dns_rdataslab_count(hdr, hdrsize);
6218 rbtversion->xfrsize += recordsize(header, namelen);
6219 } else {
6220 rbtversion->records -= dns_rdataslab_count(hdr, hdrsize);
6221 rbtversion->xfrsize -= recordsize(header, namelen);
6222 }
6223 RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_write);
6224 }
6225
6226 /*
6227 * write lock on rbtnode must be held.
6228 */
6229 static isc_result_t
add32(dns_rbtdb_t * rbtdb,dns_rbtnode_t * rbtnode,const dns_name_t * nodename,rbtdb_version_t * rbtversion,rdatasetheader_t * newheader,unsigned int options,bool loading,dns_rdataset_t * addedrdataset,isc_stdtime_t now)6230 add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, const dns_name_t *nodename,
6231 rbtdb_version_t *rbtversion, rdatasetheader_t *newheader,
6232 unsigned int options, bool loading, dns_rdataset_t *addedrdataset,
6233 isc_stdtime_t now) {
6234 rbtdb_changed_t *changed = NULL;
6235 rdatasetheader_t *topheader = NULL, *topheader_prev = NULL;
6236 rdatasetheader_t *header = NULL, *sigheader = NULL;
6237 unsigned char *merged = NULL;
6238 isc_result_t result;
6239 bool header_nx;
6240 bool newheader_nx;
6241 bool merge;
6242 dns_rdatatype_t rdtype, covers;
6243 rbtdb_rdatatype_t negtype, sigtype;
6244 dns_trust_t trust;
6245 int idx;
6246
6247 /*
6248 * Add an rdatasetheader_t to a node.
6249 */
6250
6251 /*
6252 * Caller must be holding the node lock.
6253 */
6254
6255 if ((options & DNS_DBADD_MERGE) != 0) {
6256 REQUIRE(rbtversion != NULL);
6257 merge = true;
6258 } else {
6259 merge = false;
6260 }
6261
6262 if ((options & DNS_DBADD_FORCE) != 0) {
6263 trust = dns_trust_ultimate;
6264 } else {
6265 trust = newheader->trust;
6266 }
6267
6268 if (rbtversion != NULL && !loading) {
6269 /*
6270 * We always add a changed record, even if no changes end up
6271 * being made to this node, because it's harmless and
6272 * simplifies the code.
6273 */
6274 changed = add_changed(rbtdb, rbtversion, rbtnode);
6275 if (changed == NULL) {
6276 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6277 return (ISC_R_NOMEMORY);
6278 }
6279 }
6280
6281 newheader_nx = NONEXISTENT(newheader) ? true : false;
6282 topheader_prev = NULL;
6283 sigheader = NULL;
6284 negtype = 0;
6285 if (rbtversion == NULL && !newheader_nx) {
6286 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
6287 covers = RBTDB_RDATATYPE_EXT(newheader->type);
6288 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, covers);
6289 if (NEGATIVE(newheader)) {
6290 /*
6291 * We're adding a negative cache entry.
6292 */
6293 if (covers == dns_rdatatype_any) {
6294 /*
6295 * If we're adding an negative cache entry
6296 * which covers all types (NXDOMAIN,
6297 * NODATA(QTYPE=ANY)),
6298 *
6299 * We make all other data ancient so that the
6300 * only rdataset that can be found at this
6301 * node is the negative cache entry.
6302 */
6303 for (topheader = rbtnode->data;
6304 topheader != NULL;
6305 topheader = topheader->next)
6306 {
6307 set_ttl(rbtdb, topheader, 0);
6308 mark_header_ancient(rbtdb, topheader);
6309 }
6310 goto find_header;
6311 }
6312 /*
6313 * Otherwise look for any RRSIGs of the given
6314 * type so they can be marked ancient later.
6315 */
6316 for (topheader = rbtnode->data; topheader != NULL;
6317 topheader = topheader->next)
6318 {
6319 if (topheader->type == sigtype) {
6320 sigheader = topheader;
6321 }
6322 }
6323 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
6324 } else {
6325 /*
6326 * We're adding something that isn't a
6327 * negative cache entry. Look for an extant
6328 * non-ancient NXDOMAIN/NODATA(QTYPE=ANY) negative
6329 * cache entry. If we're adding an RRSIG, also
6330 * check for an extant non-ancient NODATA ncache
6331 * entry which covers the same type as the RRSIG.
6332 */
6333 for (topheader = rbtnode->data; topheader != NULL;
6334 topheader = topheader->next)
6335 {
6336 if ((topheader->type ==
6337 RBTDB_RDATATYPE_NCACHEANY) ||
6338 (newheader->type == sigtype &&
6339 topheader->type ==
6340 RBTDB_RDATATYPE_VALUE(0, covers)))
6341 {
6342 break;
6343 }
6344 }
6345 if (topheader != NULL && EXISTS(topheader) &&
6346 ACTIVE(topheader, now))
6347 {
6348 /*
6349 * Found one.
6350 */
6351 if (trust < topheader->trust) {
6352 /*
6353 * The NXDOMAIN/NODATA(QTYPE=ANY)
6354 * is more trusted.
6355 */
6356 free_rdataset(rbtdb, rbtdb->common.mctx,
6357 newheader);
6358 if (addedrdataset != NULL) {
6359 bind_rdataset(
6360 rbtdb, rbtnode,
6361 topheader, now,
6362 isc_rwlocktype_write,
6363 addedrdataset);
6364 }
6365 return (DNS_R_UNCHANGED);
6366 }
6367 /*
6368 * The new rdataset is better. Expire the
6369 * ncache entry.
6370 */
6371 set_ttl(rbtdb, topheader, 0);
6372 mark_header_ancient(rbtdb, topheader);
6373 topheader = NULL;
6374 goto find_header;
6375 }
6376 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
6377 }
6378 }
6379
6380 for (topheader = rbtnode->data; topheader != NULL;
6381 topheader = topheader->next)
6382 {
6383 if (topheader->type == newheader->type ||
6384 topheader->type == negtype)
6385 {
6386 break;
6387 }
6388 topheader_prev = topheader;
6389 }
6390
6391 find_header:
6392 /*
6393 * If header isn't NULL, we've found the right type. There may be
6394 * IGNORE rdatasets between the top of the chain and the first real
6395 * data. We skip over them.
6396 */
6397 header = topheader;
6398 while (header != NULL && IGNORE(header)) {
6399 header = header->down;
6400 }
6401 if (header != NULL) {
6402 header_nx = NONEXISTENT(header) ? true : false;
6403
6404 /*
6405 * Deleting an already non-existent rdataset has no effect.
6406 */
6407 if (header_nx && newheader_nx) {
6408 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6409 return (DNS_R_UNCHANGED);
6410 }
6411
6412 /*
6413 * Trying to add an rdataset with lower trust to a cache
6414 * DB has no effect, provided that the cache data isn't
6415 * stale. If the cache data is stale, new lower trust
6416 * data will supersede it below. Unclear what the best
6417 * policy is here.
6418 */
6419 if (rbtversion == NULL && trust < header->trust &&
6420 (ACTIVE(header, now) || header_nx))
6421 {
6422 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6423 if (addedrdataset != NULL) {
6424 bind_rdataset(rbtdb, rbtnode, header, now,
6425 isc_rwlocktype_write,
6426 addedrdataset);
6427 }
6428 return (DNS_R_UNCHANGED);
6429 }
6430
6431 /*
6432 * Don't merge if a nonexistent rdataset is involved.
6433 */
6434 if (merge && (header_nx || newheader_nx)) {
6435 merge = false;
6436 }
6437
6438 /*
6439 * If 'merge' is true, we'll try to create a new rdataset
6440 * that is the union of 'newheader' and 'header'.
6441 */
6442 if (merge) {
6443 unsigned int flags = 0;
6444 INSIST(rbtversion->serial >= header->serial);
6445 merged = NULL;
6446 result = ISC_R_SUCCESS;
6447
6448 if ((options & DNS_DBADD_EXACT) != 0) {
6449 flags |= DNS_RDATASLAB_EXACT;
6450 }
6451 /*
6452 * TTL use here is irrelevant to the cache;
6453 * merge is only done with zonedbs.
6454 */
6455 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
6456 newheader->rdh_ttl != header->rdh_ttl)
6457 {
6458 result = DNS_R_NOTEXACT;
6459 } else if (newheader->rdh_ttl != header->rdh_ttl) {
6460 flags |= DNS_RDATASLAB_FORCE;
6461 }
6462 if (result == ISC_R_SUCCESS) {
6463 result = dns_rdataslab_merge(
6464 (unsigned char *)header,
6465 (unsigned char *)newheader,
6466 (unsigned int)(sizeof(*newheader)),
6467 rbtdb->common.mctx,
6468 rbtdb->common.rdclass,
6469 (dns_rdatatype_t)header->type, flags,
6470 &merged);
6471 }
6472 if (result == ISC_R_SUCCESS) {
6473 /*
6474 * If 'header' has the same serial number as
6475 * we do, we could clean it up now if we knew
6476 * that our caller had no references to it.
6477 * We don't know this, however, so we leave it
6478 * alone. It will get cleaned up when
6479 * clean_zone_node() runs.
6480 */
6481 free_rdataset(rbtdb, rbtdb->common.mctx,
6482 newheader);
6483 newheader = (rdatasetheader_t *)merged;
6484 init_rdataset(rbtdb, newheader);
6485 update_newheader(newheader, header);
6486 if (loading && RESIGN(newheader) &&
6487 RESIGN(header) &&
6488 resign_sooner(header, newheader))
6489 {
6490 newheader->resign = header->resign;
6491 newheader->resign_lsb =
6492 header->resign_lsb;
6493 }
6494 } else {
6495 free_rdataset(rbtdb, rbtdb->common.mctx,
6496 newheader);
6497 return (result);
6498 }
6499 }
6500 /*
6501 * Don't replace existing NS, A and AAAA RRsets in the
6502 * cache if they are already exist. This prevents named
6503 * being locked to old servers. Don't lower trust of
6504 * existing record if the update is forced. Nothing
6505 * special to be done w.r.t stale data; it gets replaced
6506 * normally further down.
6507 */
6508 if (IS_CACHE(rbtdb) && ACTIVE(header, now) &&
6509 header->type == dns_rdatatype_ns && !header_nx &&
6510 !newheader_nx && header->trust >= newheader->trust &&
6511 dns_rdataslab_equalx((unsigned char *)header,
6512 (unsigned char *)newheader,
6513 (unsigned int)(sizeof(*newheader)),
6514 rbtdb->common.rdclass,
6515 (dns_rdatatype_t)header->type))
6516 {
6517 /*
6518 * Honour the new ttl if it is less than the
6519 * older one.
6520 */
6521 if (header->rdh_ttl > newheader->rdh_ttl) {
6522 set_ttl(rbtdb, header, newheader->rdh_ttl);
6523 }
6524 if (header->noqname == NULL &&
6525 newheader->noqname != NULL)
6526 {
6527 header->noqname = newheader->noqname;
6528 newheader->noqname = NULL;
6529 }
6530 if (header->closest == NULL &&
6531 newheader->closest != NULL)
6532 {
6533 header->closest = newheader->closest;
6534 newheader->closest = NULL;
6535 }
6536 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6537 if (addedrdataset != NULL) {
6538 bind_rdataset(rbtdb, rbtnode, header, now,
6539 isc_rwlocktype_write,
6540 addedrdataset);
6541 }
6542 return (ISC_R_SUCCESS);
6543 }
6544 /*
6545 * If we have will be replacing a NS RRset force its TTL
6546 * to be no more than the current NS RRset's TTL. This
6547 * ensures the delegations that are withdrawn are honoured.
6548 */
6549 if (IS_CACHE(rbtdb) && ACTIVE(header, now) &&
6550 header->type == dns_rdatatype_ns && !header_nx &&
6551 !newheader_nx && header->trust <= newheader->trust)
6552 {
6553 if (newheader->rdh_ttl > header->rdh_ttl) {
6554 newheader->rdh_ttl = header->rdh_ttl;
6555 }
6556 }
6557 if (IS_CACHE(rbtdb) && ACTIVE(header, now) &&
6558 (options & DNS_DBADD_PREFETCH) == 0 &&
6559 (header->type == dns_rdatatype_a ||
6560 header->type == dns_rdatatype_aaaa ||
6561 header->type == dns_rdatatype_ds ||
6562 header->type == RBTDB_RDATATYPE_SIGDS) &&
6563 !header_nx && !newheader_nx &&
6564 header->trust >= newheader->trust &&
6565 dns_rdataslab_equal((unsigned char *)header,
6566 (unsigned char *)newheader,
6567 (unsigned int)(sizeof(*newheader))))
6568 {
6569 /*
6570 * Honour the new ttl if it is less than the
6571 * older one.
6572 */
6573 if (header->rdh_ttl > newheader->rdh_ttl) {
6574 set_ttl(rbtdb, header, newheader->rdh_ttl);
6575 }
6576 if (header->noqname == NULL &&
6577 newheader->noqname != NULL)
6578 {
6579 header->noqname = newheader->noqname;
6580 newheader->noqname = NULL;
6581 }
6582 if (header->closest == NULL &&
6583 newheader->closest != NULL)
6584 {
6585 header->closest = newheader->closest;
6586 newheader->closest = NULL;
6587 }
6588 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6589 if (addedrdataset != NULL) {
6590 bind_rdataset(rbtdb, rbtnode, header, now,
6591 isc_rwlocktype_write,
6592 addedrdataset);
6593 }
6594 return (ISC_R_SUCCESS);
6595 }
6596 INSIST(rbtversion == NULL ||
6597 rbtversion->serial >= topheader->serial);
6598 if (loading) {
6599 newheader->down = NULL;
6600 idx = newheader->node->locknum;
6601 if (IS_CACHE(rbtdb)) {
6602 if (ZEROTTL(newheader)) {
6603 ISC_LIST_APPEND(rbtdb->rdatasets[idx],
6604 newheader, link);
6605 } else {
6606 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6607 newheader, link);
6608 }
6609 INSIST(rbtdb->heaps != NULL);
6610 isc_heap_insert(rbtdb->heaps[idx], newheader);
6611 } else if (RESIGN(newheader)) {
6612 resign_insert(rbtdb, idx, newheader);
6613 /*
6614 * Don't call resign_delete as we don't need
6615 * to reverse the delete. The free_rdataset
6616 * call below will clean up the heap entry.
6617 */
6618 }
6619
6620 /*
6621 * There are no other references to 'header' when
6622 * loading, so we MAY clean up 'header' now.
6623 * Since we don't generate changed records when
6624 * loading, we MUST clean up 'header' now.
6625 */
6626 if (topheader_prev != NULL) {
6627 topheader_prev->next = newheader;
6628 } else {
6629 rbtnode->data = newheader;
6630 }
6631 newheader->next = topheader->next;
6632 if (rbtversion != NULL && !header_nx) {
6633 update_recordsandxfrsize(false, rbtversion,
6634 header,
6635 nodename->length);
6636 }
6637 free_rdataset(rbtdb, rbtdb->common.mctx, header);
6638 } else {
6639 idx = newheader->node->locknum;
6640 if (IS_CACHE(rbtdb)) {
6641 INSIST(rbtdb->heaps != NULL);
6642 isc_heap_insert(rbtdb->heaps[idx], newheader);
6643 if (ZEROTTL(newheader)) {
6644 ISC_LIST_APPEND(rbtdb->rdatasets[idx],
6645 newheader, link);
6646 } else {
6647 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6648 newheader, link);
6649 }
6650 } else if (RESIGN(newheader)) {
6651 resign_insert(rbtdb, idx, newheader);
6652 resign_delete(rbtdb, rbtversion, header);
6653 }
6654 if (topheader_prev != NULL) {
6655 topheader_prev->next = newheader;
6656 } else {
6657 rbtnode->data = newheader;
6658 }
6659 newheader->next = topheader->next;
6660 newheader->down = topheader;
6661 topheader->next = newheader;
6662 rbtnode->dirty = 1;
6663 if (changed != NULL) {
6664 changed->dirty = true;
6665 }
6666 if (rbtversion == NULL) {
6667 set_ttl(rbtdb, header, 0);
6668 mark_header_ancient(rbtdb, header);
6669 if (sigheader != NULL) {
6670 set_ttl(rbtdb, sigheader, 0);
6671 mark_header_ancient(rbtdb, sigheader);
6672 }
6673 }
6674 if (rbtversion != NULL && !header_nx) {
6675 update_recordsandxfrsize(false, rbtversion,
6676 header,
6677 nodename->length);
6678 }
6679 }
6680 } else {
6681 /*
6682 * No non-IGNORED rdatasets of the given type exist at
6683 * this node.
6684 */
6685
6686 /*
6687 * If we're trying to delete the type, don't bother.
6688 */
6689 if (newheader_nx) {
6690 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6691 return (DNS_R_UNCHANGED);
6692 }
6693
6694 idx = newheader->node->locknum;
6695 if (IS_CACHE(rbtdb)) {
6696 isc_heap_insert(rbtdb->heaps[idx], newheader);
6697 if (ZEROTTL(newheader)) {
6698 ISC_LIST_APPEND(rbtdb->rdatasets[idx],
6699 newheader, link);
6700 } else {
6701 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6702 newheader, link);
6703 }
6704 } else if (RESIGN(newheader)) {
6705 resign_insert(rbtdb, idx, newheader);
6706 resign_delete(rbtdb, rbtversion, header);
6707 }
6708
6709 if (topheader != NULL) {
6710 /*
6711 * We have an list of rdatasets of the given type,
6712 * but they're all marked IGNORE. We simply insert
6713 * the new rdataset at the head of the list.
6714 *
6715 * Ignored rdatasets cannot occur during loading, so
6716 * we INSIST on it.
6717 */
6718 INSIST(!loading);
6719 INSIST(rbtversion == NULL ||
6720 rbtversion->serial >= topheader->serial);
6721 if (topheader_prev != NULL) {
6722 topheader_prev->next = newheader;
6723 } else {
6724 rbtnode->data = newheader;
6725 }
6726 newheader->next = topheader->next;
6727 newheader->down = topheader;
6728 topheader->next = newheader;
6729 rbtnode->dirty = 1;
6730 if (changed != NULL) {
6731 changed->dirty = true;
6732 }
6733 } else {
6734 /*
6735 * No rdatasets of the given type exist at the node.
6736 */
6737 newheader->next = rbtnode->data;
6738 newheader->down = NULL;
6739 rbtnode->data = newheader;
6740 }
6741 }
6742
6743 if (rbtversion != NULL && !newheader_nx) {
6744 update_recordsandxfrsize(true, rbtversion, newheader,
6745 nodename->length);
6746 }
6747
6748 /*
6749 * Check if the node now contains CNAME and other data.
6750 */
6751 if (rbtversion != NULL &&
6752 cname_and_other_data(rbtnode, rbtversion->serial))
6753 {
6754 return (DNS_R_CNAMEANDOTHER);
6755 }
6756
6757 if (addedrdataset != NULL) {
6758 bind_rdataset(rbtdb, rbtnode, newheader, now,
6759 isc_rwlocktype_write, addedrdataset);
6760 }
6761
6762 return (ISC_R_SUCCESS);
6763 }
6764
6765 static bool
delegating_type(dns_rbtdb_t * rbtdb,dns_rbtnode_t * node,rbtdb_rdatatype_t type)6766 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6767 rbtdb_rdatatype_t type) {
6768 if (IS_CACHE(rbtdb)) {
6769 if (type == dns_rdatatype_dname) {
6770 return (true);
6771 } else {
6772 return (false);
6773 }
6774 } else if (type == dns_rdatatype_dname ||
6775 (type == dns_rdatatype_ns &&
6776 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6777 {
6778 return (true);
6779 }
6780 return (false);
6781 }
6782
6783 static isc_result_t
addnoqname(dns_rbtdb_t * rbtdb,rdatasetheader_t * newheader,dns_rdataset_t * rdataset)6784 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6785 dns_rdataset_t *rdataset) {
6786 struct noqname *noqname;
6787 isc_mem_t *mctx = rbtdb->common.mctx;
6788 dns_name_t name;
6789 dns_rdataset_t neg, negsig;
6790 isc_result_t result;
6791 isc_region_t r;
6792
6793 dns_name_init(&name, NULL);
6794 dns_rdataset_init(&neg);
6795 dns_rdataset_init(&negsig);
6796
6797 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6798 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6799
6800 noqname = isc_mem_get(mctx, sizeof(*noqname));
6801 dns_name_init(&noqname->name, NULL);
6802 noqname->neg = NULL;
6803 noqname->negsig = NULL;
6804 noqname->type = neg.type;
6805 dns_name_dup(&name, mctx, &noqname->name);
6806 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6807 if (result != ISC_R_SUCCESS) {
6808 goto cleanup;
6809 }
6810 noqname->neg = r.base;
6811 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6812 if (result != ISC_R_SUCCESS) {
6813 goto cleanup;
6814 }
6815 noqname->negsig = r.base;
6816 dns_rdataset_disassociate(&neg);
6817 dns_rdataset_disassociate(&negsig);
6818 newheader->noqname = noqname;
6819 return (ISC_R_SUCCESS);
6820
6821 cleanup:
6822 dns_rdataset_disassociate(&neg);
6823 dns_rdataset_disassociate(&negsig);
6824 free_noqname(mctx, &noqname);
6825 return (result);
6826 }
6827
6828 static isc_result_t
addclosest(dns_rbtdb_t * rbtdb,rdatasetheader_t * newheader,dns_rdataset_t * rdataset)6829 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6830 dns_rdataset_t *rdataset) {
6831 struct noqname *closest;
6832 isc_mem_t *mctx = rbtdb->common.mctx;
6833 dns_name_t name;
6834 dns_rdataset_t neg, negsig;
6835 isc_result_t result;
6836 isc_region_t r;
6837
6838 dns_name_init(&name, NULL);
6839 dns_rdataset_init(&neg);
6840 dns_rdataset_init(&negsig);
6841
6842 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6843 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6844
6845 closest = isc_mem_get(mctx, sizeof(*closest));
6846 dns_name_init(&closest->name, NULL);
6847 closest->neg = NULL;
6848 closest->negsig = NULL;
6849 closest->type = neg.type;
6850 dns_name_dup(&name, mctx, &closest->name);
6851 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6852 if (result != ISC_R_SUCCESS) {
6853 goto cleanup;
6854 }
6855 closest->neg = r.base;
6856 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6857 if (result != ISC_R_SUCCESS) {
6858 goto cleanup;
6859 }
6860 closest->negsig = r.base;
6861 dns_rdataset_disassociate(&neg);
6862 dns_rdataset_disassociate(&negsig);
6863 newheader->closest = closest;
6864 return (ISC_R_SUCCESS);
6865
6866 cleanup:
6867 dns_rdataset_disassociate(&neg);
6868 dns_rdataset_disassociate(&negsig);
6869 free_noqname(mctx, &closest);
6870 return (result);
6871 }
6872
6873 static dns_dbmethods_t zone_methods;
6874
6875 static size_t
rdataset_size(rdatasetheader_t * header)6876 rdataset_size(rdatasetheader_t *header) {
6877 if (!NONEXISTENT(header)) {
6878 return (dns_rdataslab_size((unsigned char *)header,
6879 sizeof(*header)));
6880 }
6881
6882 return (sizeof(*header));
6883 }
6884
6885 static isc_result_t
addrdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,isc_stdtime_t now,dns_rdataset_t * rdataset,unsigned int options,dns_rdataset_t * addedrdataset)6886 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6887 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6888 dns_rdataset_t *addedrdataset) {
6889 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6890 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6891 rbtdb_version_t *rbtversion = version;
6892 isc_region_t region;
6893 rdatasetheader_t *newheader;
6894 rdatasetheader_t *header;
6895 isc_result_t result;
6896 bool delegating;
6897 bool newnsec;
6898 bool tree_locked = false;
6899 bool cache_is_overmem = false;
6900 dns_fixedname_t fixed;
6901 dns_name_t *name;
6902
6903 REQUIRE(VALID_RBTDB(rbtdb));
6904 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6905
6906 if (rbtdb->common.methods == &zone_methods) {
6907 /*
6908 * SOA records are only allowed at top of zone.
6909 */
6910 if (rdataset->type == dns_rdatatype_soa &&
6911 node != rbtdb->origin_node)
6912 {
6913 return (DNS_R_NOTZONETOP);
6914 }
6915 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6916 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6917 (rdataset->type == dns_rdatatype_nsec3 ||
6918 rdataset->covers == dns_rdatatype_nsec3)) ||
6919 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6920 rdataset->type != dns_rdatatype_nsec3 &&
6921 rdataset->covers != dns_rdatatype_nsec3)));
6922 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6923 }
6924
6925 if (rbtversion == NULL) {
6926 if (now == 0) {
6927 isc_stdtime_get(&now);
6928 }
6929 } else {
6930 now = 0;
6931 }
6932
6933 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6934 ®ion, sizeof(rdatasetheader_t));
6935 if (result != ISC_R_SUCCESS) {
6936 return (result);
6937 }
6938
6939 name = dns_fixedname_initname(&fixed);
6940 nodefullname(db, node, name);
6941 dns_rdataset_getownercase(rdataset, name);
6942
6943 newheader = (rdatasetheader_t *)region.base;
6944 init_rdataset(rbtdb, newheader);
6945 setownercase(newheader, name);
6946 set_ttl(rbtdb, newheader, rdataset->ttl + now);
6947 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6948 rdataset->covers);
6949 atomic_init(&newheader->attributes, 0);
6950 if (rdataset->ttl == 0U) {
6951 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_ZEROTTL);
6952 }
6953 newheader->noqname = NULL;
6954 newheader->closest = NULL;
6955 atomic_init(&newheader->count,
6956 atomic_fetch_add_relaxed(&init_count, 1));
6957 newheader->trust = rdataset->trust;
6958 newheader->last_used = now;
6959 newheader->node = rbtnode;
6960 if (rbtversion != NULL) {
6961 newheader->serial = rbtversion->serial;
6962 now = 0;
6963
6964 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6965 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN);
6966 newheader->resign =
6967 (isc_stdtime_t)(dns_time64_from32(
6968 rdataset->resign) >>
6969 1);
6970 newheader->resign_lsb = rdataset->resign & 0x1;
6971 } else {
6972 newheader->resign = 0;
6973 newheader->resign_lsb = 0;
6974 }
6975 } else {
6976 newheader->serial = 1;
6977 newheader->resign = 0;
6978 newheader->resign_lsb = 0;
6979 if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0) {
6980 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_PREFETCH);
6981 }
6982 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0) {
6983 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_NEGATIVE);
6984 }
6985 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0) {
6986 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_NXDOMAIN);
6987 }
6988 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0) {
6989 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_OPTOUT);
6990 }
6991 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6992 result = addnoqname(rbtdb, newheader, rdataset);
6993 if (result != ISC_R_SUCCESS) {
6994 free_rdataset(rbtdb, rbtdb->common.mctx,
6995 newheader);
6996 return (result);
6997 }
6998 }
6999 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
7000 result = addclosest(rbtdb, newheader, rdataset);
7001 if (result != ISC_R_SUCCESS) {
7002 free_rdataset(rbtdb, rbtdb->common.mctx,
7003 newheader);
7004 return (result);
7005 }
7006 }
7007 }
7008
7009 /*
7010 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
7011 * just DNAME for the cache), then we need to set the callback bit
7012 * on the node.
7013 */
7014 if (delegating_type(rbtdb, rbtnode, rdataset->type)) {
7015 delegating = true;
7016 } else {
7017 delegating = false;
7018 }
7019
7020 /*
7021 * Add to the auxiliary NSEC tree if we're adding an NSEC record.
7022 */
7023 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7024 if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
7025 rdataset->type == dns_rdatatype_nsec)
7026 {
7027 newnsec = true;
7028 } else {
7029 newnsec = false;
7030 }
7031 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7032
7033 /*
7034 * If we're adding a delegation type, adding to the auxiliary NSEC
7035 * tree, or the DB is a cache in an overmem state, hold an
7036 * exclusive lock on the tree. In the latter case the lock does
7037 * not necessarily have to be acquired but it will help purge
7038 * ancient entries more effectively.
7039 */
7040 if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx)) {
7041 cache_is_overmem = true;
7042 }
7043 if (delegating || newnsec || cache_is_overmem) {
7044 tree_locked = true;
7045 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7046 }
7047
7048 if (cache_is_overmem) {
7049 overmem_purge(rbtdb, rbtnode->locknum, rdataset_size(newheader),
7050 tree_locked);
7051 }
7052
7053 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7054 isc_rwlocktype_write);
7055
7056 if (rbtdb->rrsetstats != NULL) {
7057 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_STATCOUNT);
7058 update_rrsetstats(rbtdb, newheader->type,
7059 atomic_load_acquire(&newheader->attributes),
7060 true);
7061 }
7062
7063 if (IS_CACHE(rbtdb)) {
7064 if (tree_locked) {
7065 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
7066 }
7067
7068 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
7069 if (header != NULL) {
7070 dns_ttl_t rdh_ttl = header->rdh_ttl;
7071
7072 /* Only account for stale TTL if cache is not overmem */
7073 if (!cache_is_overmem) {
7074 rdh_ttl += rbtdb->serve_stale_ttl;
7075 }
7076
7077 if (rdh_ttl < now - RBTDB_VIRTUAL) {
7078 expire_header(rbtdb, header, tree_locked,
7079 expire_ttl);
7080 }
7081 }
7082
7083 /*
7084 * If we've been holding a write lock on the tree just for
7085 * cleaning, we can release it now. However, we still need the
7086 * node lock.
7087 */
7088 if (tree_locked && !delegating && !newnsec) {
7089 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7090 tree_locked = false;
7091 }
7092 }
7093
7094 result = ISC_R_SUCCESS;
7095 if (newnsec) {
7096 dns_rbtnode_t *nsecnode;
7097
7098 nsecnode = NULL;
7099 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
7100 if (result == ISC_R_SUCCESS) {
7101 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
7102 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
7103 } else if (result == ISC_R_EXISTS) {
7104 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
7105 result = ISC_R_SUCCESS;
7106 }
7107 }
7108
7109 if (result == ISC_R_SUCCESS) {
7110 result = add32(rbtdb, rbtnode, name, rbtversion, newheader,
7111 options, false, addedrdataset, now);
7112 }
7113 if (result == ISC_R_SUCCESS && delegating) {
7114 rbtnode->find_callback = 1;
7115 }
7116
7117 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7118 isc_rwlocktype_write);
7119
7120 if (tree_locked) {
7121 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7122 }
7123
7124 /*
7125 * Update the zone's secure status. If version is non-NULL
7126 * this is deferred until closeversion() is called.
7127 */
7128 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) {
7129 iszonesecure(db, version, rbtdb->origin_node);
7130 }
7131
7132 return (result);
7133 }
7134
7135 static isc_result_t
subtractrdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,dns_rdataset_t * rdataset,unsigned int options,dns_rdataset_t * newrdataset)7136 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
7137 dns_rdataset_t *rdataset, unsigned int options,
7138 dns_rdataset_t *newrdataset) {
7139 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7140 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
7141 rbtdb_version_t *rbtversion = version;
7142 dns_fixedname_t fname;
7143 dns_name_t *nodename = dns_fixedname_initname(&fname);
7144 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
7145 unsigned char *subresult;
7146 isc_region_t region;
7147 isc_result_t result;
7148 rbtdb_changed_t *changed;
7149
7150 REQUIRE(VALID_RBTDB(rbtdb));
7151 REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
7152
7153 if (rbtdb->common.methods == &zone_methods) {
7154 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7155 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
7156 (rdataset->type == dns_rdatatype_nsec3 ||
7157 rdataset->covers == dns_rdatatype_nsec3)) ||
7158 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
7159 rdataset->type != dns_rdatatype_nsec3 &&
7160 rdataset->covers != dns_rdatatype_nsec3)));
7161 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7162 }
7163
7164 nodefullname(db, node, nodename);
7165
7166 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
7167 ®ion, sizeof(rdatasetheader_t));
7168 if (result != ISC_R_SUCCESS) {
7169 return (result);
7170 }
7171 newheader = (rdatasetheader_t *)region.base;
7172 init_rdataset(rbtdb, newheader);
7173 set_ttl(rbtdb, newheader, rdataset->ttl);
7174 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
7175 rdataset->covers);
7176 atomic_init(&newheader->attributes, 0);
7177 newheader->serial = rbtversion->serial;
7178 newheader->trust = 0;
7179 newheader->noqname = NULL;
7180 newheader->closest = NULL;
7181 atomic_init(&newheader->count,
7182 atomic_fetch_add_relaxed(&init_count, 1));
7183 newheader->last_used = 0;
7184 newheader->node = rbtnode;
7185 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
7186 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN);
7187 newheader->resign =
7188 (isc_stdtime_t)(dns_time64_from32(rdataset->resign) >>
7189 1);
7190 newheader->resign_lsb = rdataset->resign & 0x1;
7191 } else {
7192 newheader->resign = 0;
7193 newheader->resign_lsb = 0;
7194 }
7195
7196 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7197 isc_rwlocktype_write);
7198
7199 changed = add_changed(rbtdb, rbtversion, rbtnode);
7200 if (changed == NULL) {
7201 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
7202 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7203 isc_rwlocktype_write);
7204 return (ISC_R_NOMEMORY);
7205 }
7206
7207 topheader_prev = NULL;
7208 for (topheader = rbtnode->data; topheader != NULL;
7209 topheader = topheader->next)
7210 {
7211 if (topheader->type == newheader->type) {
7212 break;
7213 }
7214 topheader_prev = topheader;
7215 }
7216 /*
7217 * If header isn't NULL, we've found the right type. There may be
7218 * IGNORE rdatasets between the top of the chain and the first real
7219 * data. We skip over them.
7220 */
7221 header = topheader;
7222 while (header != NULL && IGNORE(header)) {
7223 header = header->down;
7224 }
7225 if (header != NULL && EXISTS(header)) {
7226 unsigned int flags = 0;
7227 subresult = NULL;
7228 result = ISC_R_SUCCESS;
7229 if ((options & DNS_DBSUB_EXACT) != 0) {
7230 flags |= DNS_RDATASLAB_EXACT;
7231 if (newheader->rdh_ttl != header->rdh_ttl) {
7232 result = DNS_R_NOTEXACT;
7233 }
7234 }
7235 if (result == ISC_R_SUCCESS) {
7236 result = dns_rdataslab_subtract(
7237 (unsigned char *)header,
7238 (unsigned char *)newheader,
7239 (unsigned int)(sizeof(*newheader)),
7240 rbtdb->common.mctx, rbtdb->common.rdclass,
7241 (dns_rdatatype_t)header->type, flags,
7242 &subresult);
7243 }
7244 if (result == ISC_R_SUCCESS) {
7245 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
7246 newheader = (rdatasetheader_t *)subresult;
7247 init_rdataset(rbtdb, newheader);
7248 update_newheader(newheader, header);
7249 if (RESIGN(header)) {
7250 RDATASET_ATTR_SET(newheader,
7251 RDATASET_ATTR_RESIGN);
7252 newheader->resign = header->resign;
7253 newheader->resign_lsb = header->resign_lsb;
7254 resign_insert(rbtdb, rbtnode->locknum,
7255 newheader);
7256 }
7257 /*
7258 * We have to set the serial since the rdataslab
7259 * subtraction routine copies the reserved portion of
7260 * header, not newheader.
7261 */
7262 newheader->serial = rbtversion->serial;
7263 /*
7264 * XXXJT: dns_rdataslab_subtract() copied the pointers
7265 * to additional info. We need to clear these fields
7266 * to avoid having duplicated references.
7267 */
7268 update_recordsandxfrsize(true, rbtversion, newheader,
7269 nodename->length);
7270 } else if (result == DNS_R_NXRRSET) {
7271 /*
7272 * This subtraction would remove all of the rdata;
7273 * add a nonexistent header instead.
7274 */
7275 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
7276 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
7277 if (newheader == NULL) {
7278 result = ISC_R_NOMEMORY;
7279 goto unlock;
7280 }
7281 init_rdataset(rbtdb, newheader);
7282 set_ttl(rbtdb, newheader, 0);
7283 newheader->type = topheader->type;
7284 atomic_init(&newheader->attributes,
7285 RDATASET_ATTR_NONEXISTENT);
7286 newheader->trust = 0;
7287 newheader->serial = rbtversion->serial;
7288 newheader->noqname = NULL;
7289 newheader->closest = NULL;
7290 atomic_init(&newheader->count, 0);
7291 newheader->node = rbtnode;
7292 newheader->resign = 0;
7293 newheader->resign_lsb = 0;
7294 newheader->last_used = 0;
7295 } else {
7296 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
7297 goto unlock;
7298 }
7299
7300 /*
7301 * If we're here, we want to link newheader in front of
7302 * topheader.
7303 */
7304 INSIST(rbtversion->serial >= topheader->serial);
7305 update_recordsandxfrsize(false, rbtversion, header,
7306 nodename->length);
7307 if (topheader_prev != NULL) {
7308 topheader_prev->next = newheader;
7309 } else {
7310 rbtnode->data = newheader;
7311 }
7312 newheader->next = topheader->next;
7313 newheader->down = topheader;
7314 topheader->next = newheader;
7315 rbtnode->dirty = 1;
7316 changed->dirty = true;
7317 resign_delete(rbtdb, rbtversion, header);
7318 } else {
7319 /*
7320 * The rdataset doesn't exist, so we don't need to do anything
7321 * to satisfy the deletion request.
7322 */
7323 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
7324 if ((options & DNS_DBSUB_EXACT) != 0) {
7325 result = DNS_R_NOTEXACT;
7326 } else {
7327 result = DNS_R_UNCHANGED;
7328 }
7329 }
7330
7331 if (result == ISC_R_SUCCESS && newrdataset != NULL) {
7332 bind_rdataset(rbtdb, rbtnode, newheader, 0,
7333 isc_rwlocktype_write, newrdataset);
7334 }
7335
7336 if (result == DNS_R_NXRRSET && newrdataset != NULL &&
7337 (options & DNS_DBSUB_WANTOLD) != 0)
7338 {
7339 bind_rdataset(rbtdb, rbtnode, header, 0, isc_rwlocktype_write,
7340 newrdataset);
7341 }
7342
7343 unlock:
7344 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7345 isc_rwlocktype_write);
7346
7347 /*
7348 * Update the zone's secure status. If version is non-NULL
7349 * this is deferred until closeversion() is called.
7350 */
7351 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) {
7352 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
7353 version = rbtdb->current_version;
7354 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
7355 iszonesecure(db, version, rbtdb->origin_node);
7356 }
7357
7358 return (result);
7359 }
7360
7361 static isc_result_t
deleterdataset(dns_db_t * db,dns_dbnode_t * node,dns_dbversion_t * version,dns_rdatatype_t type,dns_rdatatype_t covers)7362 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
7363 dns_rdatatype_t type, dns_rdatatype_t covers) {
7364 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7365 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
7366 rbtdb_version_t *rbtversion = version;
7367 dns_fixedname_t fname;
7368 dns_name_t *nodename = dns_fixedname_initname(&fname);
7369 isc_result_t result;
7370 rdatasetheader_t *newheader;
7371
7372 REQUIRE(VALID_RBTDB(rbtdb));
7373 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7374
7375 if (type == dns_rdatatype_any) {
7376 return (ISC_R_NOTIMPLEMENTED);
7377 }
7378 if (type == dns_rdatatype_rrsig && covers == 0) {
7379 return (ISC_R_NOTIMPLEMENTED);
7380 }
7381
7382 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
7383 if (newheader == NULL) {
7384 return (ISC_R_NOMEMORY);
7385 }
7386 init_rdataset(rbtdb, newheader);
7387 set_ttl(rbtdb, newheader, 0);
7388 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
7389 atomic_init(&newheader->attributes, RDATASET_ATTR_NONEXISTENT);
7390 newheader->trust = 0;
7391 newheader->noqname = NULL;
7392 newheader->closest = NULL;
7393 if (rbtversion != NULL) {
7394 newheader->serial = rbtversion->serial;
7395 } else {
7396 newheader->serial = 0;
7397 }
7398 atomic_init(&newheader->count, 0);
7399 newheader->last_used = 0;
7400 newheader->node = rbtnode;
7401
7402 nodefullname(db, node, nodename);
7403
7404 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7405 isc_rwlocktype_write);
7406 result = add32(rbtdb, rbtnode, nodename, rbtversion, newheader,
7407 DNS_DBADD_FORCE, false, NULL, 0);
7408 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7409 isc_rwlocktype_write);
7410
7411 /*
7412 * Update the zone's secure status. If version is non-NULL
7413 * this is deferred until closeversion() is called.
7414 */
7415 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) {
7416 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
7417 version = rbtdb->current_version;
7418 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
7419 iszonesecure(db, version, rbtdb->origin_node);
7420 }
7421
7422 return (result);
7423 }
7424
7425 /*
7426 * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
7427 */
7428 static isc_result_t
loadnode(dns_rbtdb_t * rbtdb,const dns_name_t * name,dns_rbtnode_t ** nodep,bool hasnsec)7429 loadnode(dns_rbtdb_t *rbtdb, const dns_name_t *name, dns_rbtnode_t **nodep,
7430 bool hasnsec) {
7431 isc_result_t noderesult, nsecresult, tmpresult;
7432 dns_rbtnode_t *nsecnode = NULL, *node = NULL;
7433
7434 noderesult = dns_rbt_addnode(rbtdb->tree, name, &node);
7435 if (!hasnsec) {
7436 goto done;
7437 }
7438 if (noderesult == ISC_R_EXISTS) {
7439 /*
7440 * Add a node to the auxiliary NSEC tree for an old node
7441 * just now getting an NSEC record.
7442 */
7443 if (node->nsec == DNS_RBT_NSEC_HAS_NSEC) {
7444 goto done;
7445 }
7446 } else if (noderesult != ISC_R_SUCCESS) {
7447 goto done;
7448 }
7449
7450 /*
7451 * Build the auxiliary tree for NSECs as we go.
7452 * This tree speeds searches for closest NSECs that would otherwise
7453 * need to examine many irrelevant nodes in large TLDs.
7454 *
7455 * Add nodes to the auxiliary tree after corresponding nodes have
7456 * been added to the main tree.
7457 */
7458 nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
7459 if (nsecresult == ISC_R_SUCCESS) {
7460 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
7461 node->nsec = DNS_RBT_NSEC_HAS_NSEC;
7462 goto done;
7463 }
7464
7465 if (nsecresult == ISC_R_EXISTS) {
7466 #if 1 /* 0 */
7467 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7468 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
7469 "addnode: NSEC node already exists");
7470 #endif /* if 1 */
7471 node->nsec = DNS_RBT_NSEC_HAS_NSEC;
7472 goto done;
7473 }
7474
7475 if (noderesult == ISC_R_SUCCESS) {
7476 /*
7477 * Remove the node we just added above.
7478 */
7479 tmpresult = dns_rbt_deletenode(rbtdb->tree, node, false);
7480 if (tmpresult != ISC_R_SUCCESS) {
7481 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7482 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
7483 "loading_addrdataset: "
7484 "dns_rbt_deletenode: %s after "
7485 "dns_rbt_addnode(NSEC): %s",
7486 isc_result_totext(tmpresult),
7487 isc_result_totext(noderesult));
7488 }
7489 }
7490
7491 /*
7492 * Set the error condition to be returned.
7493 */
7494 noderesult = nsecresult;
7495
7496 done:
7497 if (noderesult == ISC_R_SUCCESS || noderesult == ISC_R_EXISTS) {
7498 *nodep = node;
7499 }
7500
7501 return (noderesult);
7502 }
7503
7504 static isc_result_t
loading_addrdataset(void * arg,const dns_name_t * name,dns_rdataset_t * rdataset)7505 loading_addrdataset(void *arg, const dns_name_t *name,
7506 dns_rdataset_t *rdataset) {
7507 rbtdb_load_t *loadctx = arg;
7508 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
7509 dns_rbtnode_t *node;
7510 isc_result_t result;
7511 isc_region_t region;
7512 rdatasetheader_t *newheader;
7513
7514 REQUIRE(rdataset->rdclass == rbtdb->common.rdclass);
7515
7516 /*
7517 * SOA records are only allowed at top of zone.
7518 */
7519 if (rdataset->type == dns_rdatatype_soa && !IS_CACHE(rbtdb) &&
7520 !dns_name_equal(name, &rbtdb->common.origin))
7521 {
7522 return (DNS_R_NOTZONETOP);
7523 }
7524
7525 if (rdataset->type != dns_rdatatype_nsec3 &&
7526 rdataset->covers != dns_rdatatype_nsec3)
7527 {
7528 add_empty_wildcards(rbtdb, name, false);
7529 }
7530
7531 if (dns_name_iswildcard(name)) {
7532 /*
7533 * NS record owners cannot legally be wild cards.
7534 */
7535 if (rdataset->type == dns_rdatatype_ns) {
7536 return (DNS_R_INVALIDNS);
7537 }
7538 /*
7539 * NSEC3 record owners cannot legally be wild cards.
7540 */
7541 if (rdataset->type == dns_rdatatype_nsec3) {
7542 return (DNS_R_INVALIDNSEC3);
7543 }
7544 result = add_wildcard_magic(rbtdb, name, false);
7545 if (result != ISC_R_SUCCESS) {
7546 return (result);
7547 }
7548 }
7549
7550 node = NULL;
7551 if (rdataset->type == dns_rdatatype_nsec3 ||
7552 rdataset->covers == dns_rdatatype_nsec3)
7553 {
7554 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
7555 if (result == ISC_R_SUCCESS) {
7556 node->nsec = DNS_RBT_NSEC_NSEC3;
7557 }
7558 } else if (rdataset->type == dns_rdatatype_nsec) {
7559 result = loadnode(rbtdb, name, &node, true);
7560 } else {
7561 result = loadnode(rbtdb, name, &node, false);
7562 }
7563 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
7564 return (result);
7565 }
7566 if (result == ISC_R_SUCCESS) {
7567 node->locknum = node->hashval % rbtdb->node_lock_count;
7568 }
7569
7570 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
7571 ®ion, sizeof(rdatasetheader_t));
7572 if (result != ISC_R_SUCCESS) {
7573 return (result);
7574 }
7575 newheader = (rdatasetheader_t *)region.base;
7576 init_rdataset(rbtdb, newheader);
7577 set_ttl(rbtdb, newheader, rdataset->ttl + loadctx->now); /* XXX overflow
7578 * check */
7579 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
7580 rdataset->covers);
7581 atomic_init(&newheader->attributes, 0);
7582 newheader->trust = rdataset->trust;
7583 newheader->serial = 1;
7584 newheader->noqname = NULL;
7585 newheader->closest = NULL;
7586 atomic_init(&newheader->count,
7587 atomic_fetch_add_relaxed(&init_count, 1));
7588 newheader->last_used = 0;
7589 newheader->node = node;
7590 setownercase(newheader, name);
7591
7592 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
7593 RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN);
7594 newheader->resign =
7595 (isc_stdtime_t)(dns_time64_from32(rdataset->resign) >>
7596 1);
7597 newheader->resign_lsb = rdataset->resign & 0x1;
7598 } else {
7599 newheader->resign = 0;
7600 newheader->resign_lsb = 0;
7601 }
7602
7603 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock, isc_rwlocktype_write);
7604 result = add32(rbtdb, node, name, rbtdb->current_version, newheader,
7605 DNS_DBADD_MERGE, true, NULL, 0);
7606 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
7607 isc_rwlocktype_write);
7608
7609 if (result == ISC_R_SUCCESS &&
7610 delegating_type(rbtdb, node, rdataset->type))
7611 {
7612 node->find_callback = 1;
7613 } else if (result == DNS_R_UNCHANGED) {
7614 result = ISC_R_SUCCESS;
7615 }
7616
7617 return (result);
7618 }
7619
7620 static isc_result_t
rbt_datafixer(dns_rbtnode_t * rbtnode,void * base,size_t filesize,void * arg,uint64_t * crc)7621 rbt_datafixer(dns_rbtnode_t *rbtnode, void *base, size_t filesize, void *arg,
7622 uint64_t *crc) {
7623 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)arg;
7624 rdatasetheader_t *header;
7625 unsigned char *limit = ((unsigned char *)base) + filesize;
7626
7627 REQUIRE(rbtnode != NULL);
7628 REQUIRE(VALID_RBTDB(rbtdb));
7629
7630 for (header = rbtnode->data; header != NULL; header = header->next) {
7631 unsigned char *p = (unsigned char *)header;
7632 size_t size = dns_rdataslab_size(p, sizeof(*header));
7633 isc_crc64_update(crc, p, size);
7634 #ifdef DEBUG
7635 hexdump("hashing header", p, sizeof(rdatasetheader_t));
7636 hexdump("hashing slab", p + sizeof(rdatasetheader_t),
7637 size - sizeof(rdatasetheader_t));
7638 #endif /* ifdef DEBUG */
7639 header->serial = 1;
7640 header->is_mmapped = 1;
7641 header->node = rbtnode;
7642 header->node_is_relative = 0;
7643
7644 if (RESIGN(header) &&
7645 (header->resign != 0 || header->resign_lsb != 0))
7646 {
7647 int idx = header->node->locknum;
7648 isc_heap_insert(rbtdb->heaps[idx], header);
7649 }
7650
7651 if (header->next != NULL) {
7652 size_t cooked = dns_rbt_serialize_align(size);
7653 if ((uintptr_t)header->next !=
7654 (p - (unsigned char *)base) + cooked)
7655 {
7656 return (ISC_R_INVALIDFILE);
7657 }
7658 header->next = (rdatasetheader_t *)(p + cooked);
7659 header->next_is_relative = 0;
7660 if ((header->next < (rdatasetheader_t *)base) ||
7661 (header->next > (rdatasetheader_t *)limit))
7662 {
7663 return (ISC_R_INVALIDFILE);
7664 }
7665 }
7666
7667 update_recordsandxfrsize(true, rbtdb->current_version, header,
7668 rbtnode->fullnamelen);
7669 }
7670
7671 /* We're done deserializing; clear fullnamelen */
7672 rbtnode->fullnamelen = 0;
7673
7674 return (ISC_R_SUCCESS);
7675 }
7676
7677 /*
7678 * Load the RBT database from the image in 'f'
7679 */
7680 static isc_result_t
deserialize(void * arg,FILE * f,off_t offset)7681 deserialize(void *arg, FILE *f, off_t offset) {
7682 isc_result_t result;
7683 rbtdb_load_t *loadctx = arg;
7684 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
7685 rbtdb_file_header_t *header;
7686 int fd;
7687 off_t filesize = 0;
7688 char *base;
7689 dns_rbt_t *tree = NULL, *nsec = NULL, *nsec3 = NULL;
7690 int protect, flags;
7691 dns_rbtnode_t *origin_node = NULL;
7692
7693 REQUIRE(VALID_RBTDB(rbtdb));
7694
7695 /*
7696 * TODO CKB: since this is read-write (had to be to add nodes later)
7697 * we will need to lock the file or the nodes in it before modifying
7698 * the nodes in the file.
7699 */
7700
7701 /* Map in the whole file in one go */
7702 fd = fileno(f);
7703 isc_file_getsizefd(fd, &filesize);
7704 protect = PROT_READ | PROT_WRITE;
7705 flags = MAP_PRIVATE;
7706 #ifdef MAP_FILE
7707 flags |= MAP_FILE;
7708 #endif /* ifdef MAP_FILE */
7709
7710 base = isc_file_mmap(NULL, filesize, protect, flags, fd, 0);
7711 if (base == NULL || base == MAP_FAILED) {
7712 return (ISC_R_FAILURE);
7713 }
7714
7715 header = (rbtdb_file_header_t *)(base + offset);
7716 if (!match_header_version(header)) {
7717 result = ISC_R_INVALIDFILE;
7718 goto cleanup;
7719 }
7720
7721 if (header->tree != 0) {
7722 result = dns_rbt_deserialize_tree(
7723 base, filesize, (off_t)header->tree, rbtdb->common.mctx,
7724 delete_callback, rbtdb, rbt_datafixer, rbtdb, NULL,
7725 &tree);
7726 if (result != ISC_R_SUCCESS) {
7727 goto cleanup;
7728 }
7729
7730 result = dns_rbt_findnode(tree, &rbtdb->common.origin, NULL,
7731 &origin_node, NULL,
7732 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7733 if (result != ISC_R_SUCCESS) {
7734 goto cleanup;
7735 }
7736 }
7737
7738 if (header->nsec != 0) {
7739 result = dns_rbt_deserialize_tree(
7740 base, filesize, (off_t)header->nsec, rbtdb->common.mctx,
7741 delete_callback, rbtdb, rbt_datafixer, rbtdb, NULL,
7742 &nsec);
7743 if (result != ISC_R_SUCCESS) {
7744 goto cleanup;
7745 }
7746 }
7747
7748 if (header->nsec3 != 0) {
7749 result = dns_rbt_deserialize_tree(
7750 base, filesize, (off_t)header->nsec3,
7751 rbtdb->common.mctx, delete_callback, rbtdb,
7752 rbt_datafixer, rbtdb, NULL, &nsec3);
7753 if (result != ISC_R_SUCCESS) {
7754 goto cleanup;
7755 }
7756 }
7757
7758 /*
7759 * We have a successfully loaded all the rbt trees now update
7760 * rbtdb to use them.
7761 */
7762
7763 rbtdb->mmap_location = base;
7764 rbtdb->mmap_size = (size_t)filesize;
7765
7766 if (tree != NULL) {
7767 dns_rbt_destroy(&rbtdb->tree);
7768 rbtdb->tree = tree;
7769 rbtdb->origin_node = origin_node;
7770 }
7771
7772 if (nsec != NULL) {
7773 dns_rbt_destroy(&rbtdb->nsec);
7774 rbtdb->nsec = nsec;
7775 }
7776
7777 if (nsec3 != NULL) {
7778 dns_rbt_destroy(&rbtdb->nsec3);
7779 rbtdb->nsec3 = nsec3;
7780 }
7781
7782 return (ISC_R_SUCCESS);
7783
7784 cleanup:
7785 if (tree != NULL) {
7786 dns_rbt_destroy(&tree);
7787 }
7788 if (nsec != NULL) {
7789 dns_rbt_destroy(&nsec);
7790 }
7791 if (nsec3 != NULL) {
7792 dns_rbt_destroy(&nsec3);
7793 }
7794 isc_file_munmap(base, (size_t)filesize);
7795 return (result);
7796 }
7797
7798 static isc_result_t
beginload(dns_db_t * db,dns_rdatacallbacks_t * callbacks)7799 beginload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
7800 rbtdb_load_t *loadctx;
7801 dns_rbtdb_t *rbtdb;
7802 rbtdb = (dns_rbtdb_t *)db;
7803
7804 REQUIRE(DNS_CALLBACK_VALID(callbacks));
7805 REQUIRE(VALID_RBTDB(rbtdb));
7806
7807 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
7808
7809 loadctx->rbtdb = rbtdb;
7810 if (IS_CACHE(rbtdb)) {
7811 isc_stdtime_get(&loadctx->now);
7812 } else {
7813 loadctx->now = 0;
7814 }
7815
7816 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7817
7818 REQUIRE((rbtdb->attributes &
7819 (RBTDB_ATTR_LOADED | RBTDB_ATTR_LOADING)) == 0);
7820 rbtdb->attributes |= RBTDB_ATTR_LOADING;
7821
7822 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7823
7824 callbacks->add = loading_addrdataset;
7825 callbacks->add_private = loadctx;
7826 callbacks->deserialize = deserialize;
7827 callbacks->deserialize_private = loadctx;
7828
7829 return (ISC_R_SUCCESS);
7830 }
7831
7832 static isc_result_t
endload(dns_db_t * db,dns_rdatacallbacks_t * callbacks)7833 endload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
7834 rbtdb_load_t *loadctx;
7835 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7836
7837 REQUIRE(VALID_RBTDB(rbtdb));
7838 REQUIRE(DNS_CALLBACK_VALID(callbacks));
7839 loadctx = callbacks->add_private;
7840 REQUIRE(loadctx != NULL);
7841 REQUIRE(loadctx->rbtdb == rbtdb);
7842
7843 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7844
7845 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
7846 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
7847
7848 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
7849 rbtdb->attributes |= RBTDB_ATTR_LOADED;
7850
7851 /*
7852 * If there's a KEY rdataset at the zone origin containing a
7853 * zone key, we consider the zone secure.
7854 */
7855 if (!IS_CACHE(rbtdb) && rbtdb->origin_node != NULL) {
7856 dns_dbversion_t *version = rbtdb->current_version;
7857 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7858 iszonesecure(db, version, rbtdb->origin_node);
7859 } else {
7860 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7861 }
7862
7863 callbacks->add = NULL;
7864 callbacks->add_private = NULL;
7865 callbacks->deserialize = NULL;
7866 callbacks->deserialize_private = NULL;
7867
7868 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
7869
7870 return (ISC_R_SUCCESS);
7871 }
7872
7873 /*
7874 * helper function to handle writing out the rdataset data pointed to
7875 * by the void *data pointer in the dns_rbtnode
7876 */
7877 static isc_result_t
rbt_datawriter(FILE * rbtfile,unsigned char * data,void * arg,uint64_t * crc)7878 rbt_datawriter(FILE *rbtfile, unsigned char *data, void *arg, uint64_t *crc) {
7879 rbtdb_version_t *version = (rbtdb_version_t *)arg;
7880 rbtdb_serial_t serial;
7881 rdatasetheader_t newheader;
7882 rdatasetheader_t *header = (rdatasetheader_t *)data, *next;
7883 off_t where;
7884 size_t cooked, size;
7885 unsigned char *p;
7886 isc_result_t result = ISC_R_SUCCESS;
7887 char pad[sizeof(char *)];
7888 uintptr_t off;
7889
7890 REQUIRE(rbtfile != NULL);
7891 REQUIRE(data != NULL);
7892 REQUIRE(version != NULL);
7893
7894 serial = version->serial;
7895
7896 for (; header != NULL; header = next) {
7897 next = header->next;
7898 do {
7899 if (header->serial <= serial && !IGNORE(header)) {
7900 if (NONEXISTENT(header)) {
7901 header = NULL;
7902 }
7903 break;
7904 } else {
7905 header = header->down;
7906 }
7907 } while (header != NULL);
7908
7909 if (header == NULL) {
7910 continue;
7911 }
7912
7913 CHECK(isc_stdio_tell(rbtfile, &where));
7914 size = dns_rdataslab_size((unsigned char *)header,
7915 sizeof(rdatasetheader_t));
7916
7917 p = (unsigned char *)header;
7918 memmove(&newheader, p, sizeof(rdatasetheader_t));
7919 newheader.down = NULL;
7920 newheader.next = NULL;
7921 off = where;
7922 if ((off_t)off != where) {
7923 return (ISC_R_RANGE);
7924 }
7925 newheader.node = (dns_rbtnode_t *)off;
7926 newheader.node_is_relative = 1;
7927 newheader.serial = 1;
7928
7929 /*
7930 * Round size up to the next pointer sized offset so it
7931 * will be properly aligned when read back in.
7932 */
7933 cooked = dns_rbt_serialize_align(size);
7934 if (next != NULL) {
7935 newheader.next = (rdatasetheader_t *)(off + cooked);
7936 newheader.next_is_relative = 1;
7937 }
7938
7939 #ifdef DEBUG
7940 hexdump("writing header", (unsigned char *)&newheader,
7941 sizeof(rdatasetheader_t));
7942 hexdump("writing slab", p + sizeof(rdatasetheader_t),
7943 size - sizeof(rdatasetheader_t));
7944 #endif /* ifdef DEBUG */
7945 isc_crc64_update(crc, (unsigned char *)&newheader,
7946 sizeof(rdatasetheader_t));
7947 CHECK(isc_stdio_write(&newheader, sizeof(rdatasetheader_t), 1,
7948 rbtfile, NULL));
7949
7950 isc_crc64_update(crc, p + sizeof(rdatasetheader_t),
7951 size - sizeof(rdatasetheader_t));
7952 CHECK(isc_stdio_write(p + sizeof(rdatasetheader_t),
7953 size - sizeof(rdatasetheader_t), 1,
7954 rbtfile, NULL));
7955 /*
7956 * Pad to force alignment.
7957 */
7958 if (size != (size_t)cooked) {
7959 memset(pad, 0, sizeof(pad));
7960 CHECK(isc_stdio_write(pad, cooked - size, 1, rbtfile,
7961 NULL));
7962 }
7963 }
7964
7965 failure:
7966 return (result);
7967 }
7968
7969 /*
7970 * Write out a zeroed header as a placeholder. Doing this ensures
7971 * that the file will not read while it is partially written, should
7972 * writing fail or be interrupted.
7973 */
7974 static isc_result_t
rbtdb_zero_header(FILE * rbtfile)7975 rbtdb_zero_header(FILE *rbtfile) {
7976 char buffer[RBTDB_HEADER_LENGTH];
7977 isc_result_t result;
7978
7979 memset(buffer, 0, RBTDB_HEADER_LENGTH);
7980 result = isc_stdio_write(buffer, 1, RBTDB_HEADER_LENGTH, rbtfile, NULL);
7981 fflush(rbtfile);
7982
7983 return (result);
7984 }
7985
7986 static isc_once_t once = ISC_ONCE_INIT;
7987
7988 static void
init_file_version(void)7989 init_file_version(void) {
7990 int n;
7991
7992 memset(FILE_VERSION, 0, sizeof(FILE_VERSION));
7993 n = snprintf(FILE_VERSION, sizeof(FILE_VERSION), "RBTDB Image %s %s",
7994 dns_major, dns_mapapi);
7995 INSIST(n > 0 && (unsigned int)n < sizeof(FILE_VERSION));
7996 }
7997
7998 /*
7999 * Write the file header out, recording the locations of the three
8000 * RBT's used in the rbtdb: tree, nsec, and nsec3, and including NodeDump
8001 * version information and any information stored in the rbtdb object
8002 * itself that should be stored here.
8003 */
8004 static isc_result_t
rbtdb_write_header(FILE * rbtfile,off_t tree_location,off_t nsec_location,off_t nsec3_location)8005 rbtdb_write_header(FILE *rbtfile, off_t tree_location, off_t nsec_location,
8006 off_t nsec3_location) {
8007 rbtdb_file_header_t header;
8008 isc_result_t result;
8009
8010 RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS);
8011
8012 memset(&header, 0, sizeof(rbtdb_file_header_t));
8013 memmove(header.version1, FILE_VERSION, sizeof(header.version1));
8014 memmove(header.version2, FILE_VERSION, sizeof(header.version2));
8015 header.ptrsize = (uint32_t)sizeof(void *);
8016 header.bigendian = (1 == htonl(1)) ? 1 : 0;
8017 header.tree = (uint64_t)tree_location;
8018 header.nsec = (uint64_t)nsec_location;
8019 header.nsec3 = (uint64_t)nsec3_location;
8020 result = isc_stdio_write(&header, 1, sizeof(rbtdb_file_header_t),
8021 rbtfile, NULL);
8022 fflush(rbtfile);
8023
8024 return (result);
8025 }
8026
8027 static bool
match_header_version(rbtdb_file_header_t * header)8028 match_header_version(rbtdb_file_header_t *header) {
8029 RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS);
8030
8031 if (memcmp(header->version1, FILE_VERSION, sizeof(header->version1)) !=
8032 0 ||
8033 memcmp(header->version2, FILE_VERSION, sizeof(header->version1)) !=
8034 0)
8035 {
8036 return (false);
8037 }
8038
8039 return (true);
8040 }
8041
8042 static isc_result_t
serialize(dns_db_t * db,dns_dbversion_t * ver,FILE * rbtfile)8043 serialize(dns_db_t *db, dns_dbversion_t *ver, FILE *rbtfile) {
8044 rbtdb_version_t *version = (rbtdb_version_t *)ver;
8045 dns_rbtdb_t *rbtdb;
8046 isc_result_t result;
8047 off_t tree_location, nsec_location, nsec3_location, header_location;
8048
8049 rbtdb = (dns_rbtdb_t *)db;
8050
8051 REQUIRE(VALID_RBTDB(rbtdb));
8052 REQUIRE(rbtfile != NULL);
8053
8054 /* Ensure we're writing to a plain file */
8055 CHECK(isc_file_isplainfilefd(fileno(rbtfile)));
8056
8057 /*
8058 * first, write out a zeroed header to store rbtdb information
8059 *
8060 * then for each of the three trees, store the current position
8061 * in the file and call dns_rbt_serialize_tree
8062 *
8063 * finally, write out the rbtdb header, storing the locations of the
8064 * rbtheaders
8065 *
8066 * NOTE: need to do something better with the return codes, &= will
8067 * not work.
8068 */
8069 CHECK(isc_stdio_tell(rbtfile, &header_location));
8070 CHECK(rbtdb_zero_header(rbtfile));
8071 CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->tree, rbt_datawriter,
8072 version, &tree_location));
8073 CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->nsec, rbt_datawriter,
8074 version, &nsec_location));
8075 CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->nsec3, rbt_datawriter,
8076 version, &nsec3_location));
8077
8078 CHECK(isc_stdio_seek(rbtfile, header_location, SEEK_SET));
8079 CHECK(rbtdb_write_header(rbtfile, tree_location, nsec_location,
8080 nsec3_location));
8081 failure:
8082 return (result);
8083 }
8084
8085 static isc_result_t
dump(dns_db_t * db,dns_dbversion_t * version,const char * filename,dns_masterformat_t masterformat)8086 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
8087 dns_masterformat_t masterformat) {
8088 dns_rbtdb_t *rbtdb;
8089 rbtdb_version_t *rbtversion = version;
8090
8091 rbtdb = (dns_rbtdb_t *)db;
8092
8093 REQUIRE(VALID_RBTDB(rbtdb));
8094 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
8095
8096 return (dns_master_dump(rbtdb->common.mctx, db, version,
8097 &dns_master_style_default, filename,
8098 masterformat, NULL));
8099 }
8100
8101 static void
delete_callback(void * data,void * arg)8102 delete_callback(void *data, void *arg) {
8103 dns_rbtdb_t *rbtdb = arg;
8104 rdatasetheader_t *current, *next;
8105 unsigned int locknum;
8106
8107 current = data;
8108 locknum = current->node->locknum;
8109 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
8110 while (current != NULL) {
8111 next = current->next;
8112 free_rdataset(rbtdb, rbtdb->common.mctx, current);
8113 current = next;
8114 }
8115 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
8116 }
8117
8118 static bool
issecure(dns_db_t * db)8119 issecure(dns_db_t *db) {
8120 dns_rbtdb_t *rbtdb;
8121 bool secure;
8122
8123 rbtdb = (dns_rbtdb_t *)db;
8124
8125 REQUIRE(VALID_RBTDB(rbtdb));
8126
8127 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
8128 secure = (rbtdb->current_version->secure == dns_db_secure);
8129 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
8130
8131 return (secure);
8132 }
8133
8134 static bool
isdnssec(dns_db_t * db)8135 isdnssec(dns_db_t *db) {
8136 dns_rbtdb_t *rbtdb;
8137 bool dnssec;
8138
8139 rbtdb = (dns_rbtdb_t *)db;
8140
8141 REQUIRE(VALID_RBTDB(rbtdb));
8142
8143 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
8144 dnssec = (rbtdb->current_version->secure != dns_db_insecure);
8145 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
8146
8147 return (dnssec);
8148 }
8149
8150 static unsigned int
nodecount(dns_db_t * db)8151 nodecount(dns_db_t *db) {
8152 dns_rbtdb_t *rbtdb;
8153 unsigned int count;
8154
8155 rbtdb = (dns_rbtdb_t *)db;
8156
8157 REQUIRE(VALID_RBTDB(rbtdb));
8158
8159 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8160 count = dns_rbt_nodecount(rbtdb->tree);
8161 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8162
8163 return (count);
8164 }
8165
8166 static size_t
hashsize(dns_db_t * db)8167 hashsize(dns_db_t *db) {
8168 dns_rbtdb_t *rbtdb;
8169 size_t size;
8170
8171 rbtdb = (dns_rbtdb_t *)db;
8172
8173 REQUIRE(VALID_RBTDB(rbtdb));
8174
8175 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8176 size = dns_rbt_hashsize(rbtdb->tree);
8177 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8178
8179 return (size);
8180 }
8181
8182 static isc_result_t
adjusthashsize(dns_db_t * db,size_t size)8183 adjusthashsize(dns_db_t *db, size_t size) {
8184 isc_result_t result;
8185 dns_rbtdb_t *rbtdb;
8186
8187 rbtdb = (dns_rbtdb_t *)db;
8188
8189 REQUIRE(VALID_RBTDB(rbtdb));
8190
8191 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8192 result = dns_rbt_adjusthashsize(rbtdb->tree, size);
8193 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8194
8195 return (result);
8196 }
8197
8198 static void
settask(dns_db_t * db,isc_task_t * task)8199 settask(dns_db_t *db, isc_task_t *task) {
8200 dns_rbtdb_t *rbtdb;
8201
8202 rbtdb = (dns_rbtdb_t *)db;
8203
8204 REQUIRE(VALID_RBTDB(rbtdb));
8205
8206 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
8207 if (rbtdb->task != NULL) {
8208 isc_task_detach(&rbtdb->task);
8209 }
8210 if (task != NULL) {
8211 isc_task_attach(task, &rbtdb->task);
8212 }
8213 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
8214 }
8215
8216 static bool
ispersistent(dns_db_t * db)8217 ispersistent(dns_db_t *db) {
8218 UNUSED(db);
8219 return (false);
8220 }
8221
8222 static isc_result_t
getoriginnode(dns_db_t * db,dns_dbnode_t ** nodep)8223 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
8224 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8225 dns_rbtnode_t *onode;
8226 isc_result_t result = ISC_R_SUCCESS;
8227
8228 REQUIRE(VALID_RBTDB(rbtdb));
8229 REQUIRE(nodep != NULL && *nodep == NULL);
8230
8231 /* Note that the access to origin_node doesn't require a DB lock */
8232 onode = (dns_rbtnode_t *)rbtdb->origin_node;
8233 if (onode != NULL) {
8234 new_reference(rbtdb, onode, isc_rwlocktype_none);
8235 *nodep = rbtdb->origin_node;
8236 } else {
8237 INSIST(IS_CACHE(rbtdb));
8238 result = ISC_R_NOTFOUND;
8239 }
8240
8241 return (result);
8242 }
8243
8244 static isc_result_t
getnsec3parameters(dns_db_t * db,dns_dbversion_t * version,dns_hash_t * hash,uint8_t * flags,uint16_t * iterations,unsigned char * salt,size_t * salt_length)8245 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
8246 uint8_t *flags, uint16_t *iterations, unsigned char *salt,
8247 size_t *salt_length) {
8248 dns_rbtdb_t *rbtdb;
8249 isc_result_t result = ISC_R_NOTFOUND;
8250 rbtdb_version_t *rbtversion = version;
8251
8252 rbtdb = (dns_rbtdb_t *)db;
8253
8254 REQUIRE(VALID_RBTDB(rbtdb));
8255 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
8256
8257 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
8258 if (rbtversion == NULL) {
8259 rbtversion = rbtdb->current_version;
8260 }
8261
8262 if (rbtversion->havensec3) {
8263 if (hash != NULL) {
8264 *hash = rbtversion->hash;
8265 }
8266 if (salt != NULL && salt_length != NULL) {
8267 REQUIRE(*salt_length >= rbtversion->salt_length);
8268 memmove(salt, rbtversion->salt,
8269 rbtversion->salt_length);
8270 }
8271 if (salt_length != NULL) {
8272 *salt_length = rbtversion->salt_length;
8273 }
8274 if (iterations != NULL) {
8275 *iterations = rbtversion->iterations;
8276 }
8277 if (flags != NULL) {
8278 *flags = rbtversion->flags;
8279 }
8280 result = ISC_R_SUCCESS;
8281 }
8282 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
8283
8284 return (result);
8285 }
8286
8287 static isc_result_t
getsize(dns_db_t * db,dns_dbversion_t * version,uint64_t * records,uint64_t * xfrsize)8288 getsize(dns_db_t *db, dns_dbversion_t *version, uint64_t *records,
8289 uint64_t *xfrsize) {
8290 dns_rbtdb_t *rbtdb;
8291 isc_result_t result = ISC_R_SUCCESS;
8292 rbtdb_version_t *rbtversion = version;
8293
8294 rbtdb = (dns_rbtdb_t *)db;
8295
8296 REQUIRE(VALID_RBTDB(rbtdb));
8297 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
8298
8299 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
8300 if (rbtversion == NULL) {
8301 rbtversion = rbtdb->current_version;
8302 }
8303
8304 RWLOCK(&rbtversion->rwlock, isc_rwlocktype_read);
8305 if (records != NULL) {
8306 *records = rbtversion->records;
8307 }
8308
8309 if (xfrsize != NULL) {
8310 *xfrsize = rbtversion->xfrsize;
8311 }
8312 RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_read);
8313 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
8314
8315 return (result);
8316 }
8317
8318 static isc_result_t
setsigningtime(dns_db_t * db,dns_rdataset_t * rdataset,isc_stdtime_t resign)8319 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
8320 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8321 rdatasetheader_t *header, oldheader;
8322
8323 REQUIRE(VALID_RBTDB(rbtdb));
8324 REQUIRE(!IS_CACHE(rbtdb));
8325 REQUIRE(rdataset != NULL);
8326
8327 header = rdataset->private3;
8328 header--;
8329
8330 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
8331 isc_rwlocktype_write);
8332
8333 oldheader = *header;
8334 /*
8335 * Only break the heap invariant (by adjusting resign and resign_lsb)
8336 * if we are going to be restoring it by calling isc_heap_increased
8337 * or isc_heap_decreased.
8338 */
8339 if (resign != 0) {
8340 header->resign = (isc_stdtime_t)(dns_time64_from32(resign) >>
8341 1);
8342 header->resign_lsb = resign & 0x1;
8343 }
8344 if (header->heap_index != 0) {
8345 INSIST(RESIGN(header));
8346 if (resign == 0) {
8347 isc_heap_delete(rbtdb->heaps[header->node->locknum],
8348 header->heap_index);
8349 header->heap_index = 0;
8350 } else if (resign_sooner(header, &oldheader)) {
8351 isc_heap_increased(rbtdb->heaps[header->node->locknum],
8352 header->heap_index);
8353 } else if (resign_sooner(&oldheader, header)) {
8354 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
8355 header->heap_index);
8356 }
8357 } else if (resign != 0) {
8358 RDATASET_ATTR_SET(header, RDATASET_ATTR_RESIGN);
8359 resign_insert(rbtdb, header->node->locknum, header);
8360 }
8361 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
8362 isc_rwlocktype_write);
8363 return (ISC_R_SUCCESS);
8364 }
8365
8366 static isc_result_t
getsigningtime(dns_db_t * db,dns_rdataset_t * rdataset,dns_name_t * foundname)8367 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, dns_name_t *foundname) {
8368 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8369 rdatasetheader_t *header = NULL, *this;
8370 unsigned int i;
8371 isc_result_t result = ISC_R_NOTFOUND;
8372 unsigned int locknum = 0;
8373
8374 REQUIRE(VALID_RBTDB(rbtdb));
8375
8376 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8377
8378 for (i = 0; i < rbtdb->node_lock_count; i++) {
8379 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
8380
8381 /*
8382 * Find for the earliest signing time among all of the
8383 * heaps, each of which is covered by a different bucket
8384 * lock.
8385 */
8386 this = isc_heap_element(rbtdb->heaps[i], 1);
8387 if (this == NULL) {
8388 /* Nothing found; unlock and try the next heap. */
8389 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
8390 isc_rwlocktype_read);
8391 continue;
8392 }
8393
8394 if (header == NULL) {
8395 /*
8396 * Found a signing time: retain the bucket lock and
8397 * preserve the lock number so we can unlock it
8398 * later.
8399 */
8400 header = this;
8401 locknum = i;
8402 } else if (resign_sooner(this, header)) {
8403 /*
8404 * Found an earlier signing time; release the
8405 * previous bucket lock and retain this one instead.
8406 */
8407 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8408 isc_rwlocktype_read);
8409 header = this;
8410 locknum = i;
8411 } else {
8412 /*
8413 * Earliest signing time in this heap isn't
8414 * an improvement; unlock and try the next heap.
8415 */
8416 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
8417 isc_rwlocktype_read);
8418 }
8419 }
8420
8421 if (header != NULL) {
8422 /*
8423 * Found something; pass back the answer and unlock
8424 * the bucket.
8425 */
8426 bind_rdataset(rbtdb, header->node, header, 0,
8427 isc_rwlocktype_read, rdataset);
8428
8429 if (foundname != NULL) {
8430 dns_rbt_fullnamefromnode(header->node, foundname);
8431 }
8432
8433 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8434 isc_rwlocktype_read);
8435
8436 result = ISC_R_SUCCESS;
8437 }
8438
8439 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8440
8441 return (result);
8442 }
8443
8444 static void
resigned(dns_db_t * db,dns_rdataset_t * rdataset,dns_dbversion_t * version)8445 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version) {
8446 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
8447 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8448 dns_rbtnode_t *node;
8449 rdatasetheader_t *header;
8450
8451 REQUIRE(VALID_RBTDB(rbtdb));
8452 REQUIRE(rdataset != NULL);
8453 REQUIRE(rdataset->methods == &rdataset_methods);
8454 REQUIRE(rbtdb->future_version == rbtversion);
8455 REQUIRE(rbtversion != NULL);
8456 REQUIRE(rbtversion->writer);
8457 REQUIRE(rbtversion->rbtdb == rbtdb);
8458
8459 node = rdataset->private2;
8460 INSIST(node != NULL);
8461 header = rdataset->private3;
8462 INSIST(header != NULL);
8463 header--;
8464
8465 if (header->heap_index == 0) {
8466 return;
8467 }
8468
8469 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8470 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock, isc_rwlocktype_write);
8471 /*
8472 * Delete from heap and save to re-signed list so that it can
8473 * be restored if we backout of this change.
8474 */
8475 resign_delete(rbtdb, rbtversion, header);
8476 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
8477 isc_rwlocktype_write);
8478 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8479 }
8480
8481 static isc_result_t
setcachestats(dns_db_t * db,isc_stats_t * stats)8482 setcachestats(dns_db_t *db, isc_stats_t *stats) {
8483 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8484
8485 REQUIRE(VALID_RBTDB(rbtdb));
8486 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
8487 REQUIRE(stats != NULL);
8488
8489 isc_stats_attach(stats, &rbtdb->cachestats);
8490 return (ISC_R_SUCCESS);
8491 }
8492
8493 static isc_result_t
setgluecachestats(dns_db_t * db,isc_stats_t * stats)8494 setgluecachestats(dns_db_t *db, isc_stats_t *stats) {
8495 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8496
8497 REQUIRE(VALID_RBTDB(rbtdb));
8498 REQUIRE(!IS_CACHE(rbtdb) && !IS_STUB(rbtdb));
8499 REQUIRE(stats != NULL);
8500
8501 isc_stats_attach(stats, &rbtdb->gluecachestats);
8502 return (ISC_R_SUCCESS);
8503 }
8504
8505 static dns_stats_t *
getrrsetstats(dns_db_t * db)8506 getrrsetstats(dns_db_t *db) {
8507 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8508
8509 REQUIRE(VALID_RBTDB(rbtdb));
8510 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
8511
8512 return (rbtdb->rrsetstats);
8513 }
8514
8515 static isc_result_t
nodefullname(dns_db_t * db,dns_dbnode_t * node,dns_name_t * name)8516 nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) {
8517 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8518 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
8519 isc_result_t result;
8520
8521 REQUIRE(VALID_RBTDB(rbtdb));
8522 REQUIRE(node != NULL);
8523 REQUIRE(name != NULL);
8524
8525 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8526 result = dns_rbt_fullnamefromnode(rbtnode, name);
8527 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8528
8529 return (result);
8530 }
8531
8532 static isc_result_t
setservestalettl(dns_db_t * db,dns_ttl_t ttl)8533 setservestalettl(dns_db_t *db, dns_ttl_t ttl) {
8534 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8535
8536 REQUIRE(VALID_RBTDB(rbtdb));
8537 REQUIRE(IS_CACHE(rbtdb));
8538
8539 /* currently no bounds checking. 0 means disable. */
8540 rbtdb->serve_stale_ttl = ttl;
8541 return (ISC_R_SUCCESS);
8542 }
8543
8544 static isc_result_t
getservestalettl(dns_db_t * db,dns_ttl_t * ttl)8545 getservestalettl(dns_db_t *db, dns_ttl_t *ttl) {
8546 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8547
8548 REQUIRE(VALID_RBTDB(rbtdb));
8549 REQUIRE(IS_CACHE(rbtdb));
8550
8551 *ttl = rbtdb->serve_stale_ttl;
8552 return (ISC_R_SUCCESS);
8553 }
8554
8555 static isc_result_t
setservestalerefresh(dns_db_t * db,uint32_t interval)8556 setservestalerefresh(dns_db_t *db, uint32_t interval) {
8557 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8558
8559 REQUIRE(VALID_RBTDB(rbtdb));
8560 REQUIRE(IS_CACHE(rbtdb));
8561
8562 /* currently no bounds checking. 0 means disable. */
8563 rbtdb->serve_stale_refresh = interval;
8564 return (ISC_R_SUCCESS);
8565 }
8566
8567 static isc_result_t
getservestalerefresh(dns_db_t * db,uint32_t * interval)8568 getservestalerefresh(dns_db_t *db, uint32_t *interval) {
8569 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
8570
8571 REQUIRE(VALID_RBTDB(rbtdb));
8572 REQUIRE(IS_CACHE(rbtdb));
8573
8574 *interval = rbtdb->serve_stale_refresh;
8575 return (ISC_R_SUCCESS);
8576 }
8577
8578 static dns_dbmethods_t zone_methods = { attach,
8579 detach,
8580 beginload,
8581 endload,
8582 serialize,
8583 dump,
8584 currentversion,
8585 newversion,
8586 attachversion,
8587 closeversion,
8588 findnode,
8589 zone_find,
8590 zone_findzonecut,
8591 attachnode,
8592 detachnode,
8593 expirenode,
8594 printnode,
8595 createiterator,
8596 zone_findrdataset,
8597 allrdatasets,
8598 addrdataset,
8599 subtractrdataset,
8600 deleterdataset,
8601 issecure,
8602 nodecount,
8603 ispersistent,
8604 overmem,
8605 settask,
8606 getoriginnode,
8607 NULL, /* transfernode */
8608 getnsec3parameters,
8609 findnsec3node,
8610 setsigningtime,
8611 getsigningtime,
8612 resigned,
8613 isdnssec,
8614 NULL, /* getrrsetstats */
8615 NULL, /* rpz_attach */
8616 NULL, /* rpz_ready */
8617 NULL, /* findnodeext */
8618 NULL, /* findext */
8619 NULL, /* setcachestats */
8620 hashsize,
8621 nodefullname,
8622 getsize,
8623 NULL, /* setservestalettl */
8624 NULL, /* getservestalettl */
8625 NULL, /* setservestalerefresh */
8626 NULL, /* getservestalerefresh */
8627 setgluecachestats,
8628 adjusthashsize };
8629
8630 static dns_dbmethods_t cache_methods = { attach,
8631 detach,
8632 beginload,
8633 endload,
8634 NULL, /* serialize */
8635 dump,
8636 currentversion,
8637 newversion,
8638 attachversion,
8639 closeversion,
8640 findnode,
8641 cache_find,
8642 cache_findzonecut,
8643 attachnode,
8644 detachnode,
8645 expirenode,
8646 printnode,
8647 createiterator,
8648 cache_findrdataset,
8649 allrdatasets,
8650 addrdataset,
8651 subtractrdataset,
8652 deleterdataset,
8653 issecure,
8654 nodecount,
8655 ispersistent,
8656 overmem,
8657 settask,
8658 getoriginnode,
8659 NULL, /* transfernode */
8660 NULL, /* getnsec3parameters */
8661 NULL, /* findnsec3node */
8662 NULL, /* setsigningtime */
8663 NULL, /* getsigningtime */
8664 NULL, /* resigned */
8665 isdnssec,
8666 getrrsetstats,
8667 NULL, /* rpz_attach */
8668 NULL, /* rpz_ready */
8669 NULL, /* findnodeext */
8670 NULL, /* findext */
8671 setcachestats,
8672 hashsize,
8673 nodefullname,
8674 NULL, /* getsize */
8675 setservestalettl,
8676 getservestalettl,
8677 setservestalerefresh,
8678 getservestalerefresh,
8679 NULL,
8680 adjusthashsize };
8681
8682 isc_result_t
dns_rbtdb_create(isc_mem_t * mctx,const dns_name_t * origin,dns_dbtype_t type,dns_rdataclass_t rdclass,unsigned int argc,char * argv[],void * driverarg,dns_db_t ** dbp)8683 dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,
8684 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
8685 void *driverarg, dns_db_t **dbp) {
8686 dns_rbtdb_t *rbtdb;
8687 isc_result_t result;
8688 int i;
8689 dns_name_t name;
8690 bool (*sooner)(void *, void *);
8691 isc_mem_t *hmctx = mctx;
8692
8693 /* Keep the compiler happy. */
8694 UNUSED(driverarg);
8695
8696 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
8697
8698 /*
8699 * If argv[0] exists, it points to a memory context to use for heap
8700 */
8701 if (argc != 0) {
8702 hmctx = (isc_mem_t *)argv[0];
8703 }
8704
8705 memset(rbtdb, '\0', sizeof(*rbtdb));
8706 dns_name_init(&rbtdb->common.origin, NULL);
8707 rbtdb->common.attributes = 0;
8708 if (type == dns_dbtype_cache) {
8709 rbtdb->common.methods = &cache_methods;
8710 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
8711 } else if (type == dns_dbtype_stub) {
8712 rbtdb->common.methods = &zone_methods;
8713 rbtdb->common.attributes |= DNS_DBATTR_STUB;
8714 } else {
8715 rbtdb->common.methods = &zone_methods;
8716 }
8717 rbtdb->common.rdclass = rdclass;
8718 rbtdb->common.mctx = NULL;
8719
8720 ISC_LIST_INIT(rbtdb->common.update_listeners);
8721
8722 RBTDB_INITLOCK(&rbtdb->lock);
8723
8724 isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
8725
8726 /*
8727 * Initialize node_lock_count in a generic way to support future
8728 * extension which allows the user to specify this value on creation.
8729 * Note that when specified for a cache DB it must be larger than 1
8730 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
8731 */
8732 if (rbtdb->node_lock_count == 0) {
8733 if (IS_CACHE(rbtdb)) {
8734 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
8735 } else {
8736 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
8737 }
8738 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
8739 result = ISC_R_RANGE;
8740 goto cleanup_tree_lock;
8741 }
8742 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
8743 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
8744 sizeof(rbtdb_nodelock_t));
8745
8746 rbtdb->cachestats = NULL;
8747 rbtdb->gluecachestats = NULL;
8748
8749 rbtdb->rrsetstats = NULL;
8750 if (IS_CACHE(rbtdb)) {
8751 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
8752 if (result != ISC_R_SUCCESS) {
8753 goto cleanup_node_locks;
8754 }
8755 rbtdb->rdatasets = isc_mem_get(
8756 mctx,
8757 rbtdb->node_lock_count * sizeof(rdatasetheaderlist_t));
8758 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
8759 ISC_LIST_INIT(rbtdb->rdatasets[i]);
8760 }
8761 } else {
8762 rbtdb->rdatasets = NULL;
8763 }
8764
8765 /*
8766 * Create the heaps.
8767 */
8768 rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
8769 sizeof(isc_heap_t *));
8770 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
8771 rbtdb->heaps[i] = NULL;
8772 }
8773 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
8774 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
8775 isc_heap_create(hmctx, sooner, set_index, 0, &rbtdb->heaps[i]);
8776 }
8777
8778 /*
8779 * Create deadnode lists.
8780 */
8781 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
8782 sizeof(rbtnodelist_t));
8783 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
8784 ISC_LIST_INIT(rbtdb->deadnodes[i]);
8785 }
8786
8787 ISC_LIST_INIT(rbtdb->prunenodes);
8788
8789 rbtdb->active = rbtdb->node_lock_count;
8790
8791 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
8792 NODE_INITLOCK(&rbtdb->node_locks[i].lock);
8793 isc_refcount_init(&rbtdb->node_locks[i].references, 0);
8794 rbtdb->node_locks[i].exiting = false;
8795 }
8796
8797 /*
8798 * Attach to the mctx. The database will persist so long as there
8799 * are references to it, and attaching to the mctx ensures that our
8800 * mctx won't disappear out from under us.
8801 */
8802 isc_mem_attach(mctx, &rbtdb->common.mctx);
8803 isc_mem_attach(hmctx, &rbtdb->hmctx);
8804
8805 /*
8806 * Make a copy of the origin name.
8807 */
8808 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
8809 if (result != ISC_R_SUCCESS) {
8810 free_rbtdb(rbtdb, false, NULL);
8811 return (result);
8812 }
8813
8814 /*
8815 * Make the Red-Black Trees.
8816 */
8817 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
8818 if (result != ISC_R_SUCCESS) {
8819 free_rbtdb(rbtdb, false, NULL);
8820 return (result);
8821 }
8822
8823 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
8824 if (result != ISC_R_SUCCESS) {
8825 free_rbtdb(rbtdb, false, NULL);
8826 return (result);
8827 }
8828
8829 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
8830 if (result != ISC_R_SUCCESS) {
8831 free_rbtdb(rbtdb, false, NULL);
8832 return (result);
8833 }
8834
8835 /*
8836 * In order to set the node callback bit correctly in zone databases,
8837 * we need to know if the node has the origin name of the zone.
8838 * In loading_addrdataset() we could simply compare the new name
8839 * to the origin name, but this is expensive. Also, we don't know the
8840 * node name in addrdataset(), so we need another way of knowing the
8841 * zone's top.
8842 *
8843 * We now explicitly create a node for the zone's origin, and then
8844 * we simply remember the node's address. This is safe, because
8845 * the top-of-zone node can never be deleted, nor can its address
8846 * change.
8847 */
8848 if (!IS_CACHE(rbtdb)) {
8849 rbtdb->origin_node = NULL;
8850 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
8851 &rbtdb->origin_node);
8852 if (result != ISC_R_SUCCESS) {
8853 INSIST(result != ISC_R_EXISTS);
8854 free_rbtdb(rbtdb, false, NULL);
8855 return (result);
8856 }
8857 INSIST(rbtdb->origin_node != NULL);
8858 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
8859 /*
8860 * We need to give the origin node the right locknum.
8861 */
8862 dns_name_init(&name, NULL);
8863 dns_rbt_namefromnode(rbtdb->origin_node, &name);
8864 rbtdb->origin_node->locknum = rbtdb->origin_node->hashval %
8865 rbtdb->node_lock_count;
8866 /*
8867 * Add an apex node to the NSEC3 tree so that NSEC3 searches
8868 * return partial matches when there is only a single NSEC3
8869 * record in the tree.
8870 */
8871 rbtdb->nsec3_origin_node = NULL;
8872 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
8873 &rbtdb->nsec3_origin_node);
8874 if (result != ISC_R_SUCCESS) {
8875 INSIST(result != ISC_R_EXISTS);
8876 free_rbtdb(rbtdb, false, NULL);
8877 return (result);
8878 }
8879 rbtdb->nsec3_origin_node->nsec = DNS_RBT_NSEC_NSEC3;
8880 /*
8881 * We need to give the nsec3 origin node the right locknum.
8882 */
8883 dns_name_init(&name, NULL);
8884 dns_rbt_namefromnode(rbtdb->nsec3_origin_node, &name);
8885 rbtdb->nsec3_origin_node->locknum =
8886 rbtdb->nsec3_origin_node->hashval %
8887 rbtdb->node_lock_count;
8888 }
8889
8890 /*
8891 * Misc. Initialization.
8892 */
8893 isc_refcount_init(&rbtdb->references, 1);
8894 rbtdb->attributes = 0;
8895 rbtdb->task = NULL;
8896 rbtdb->serve_stale_ttl = 0;
8897
8898 /*
8899 * Version Initialization.
8900 */
8901 rbtdb->current_serial = 1;
8902 rbtdb->least_serial = 1;
8903 rbtdb->next_serial = 2;
8904 rbtdb->current_version = allocate_version(mctx, 1, 1, false);
8905 rbtdb->current_version->rbtdb = rbtdb;
8906 rbtdb->current_version->secure = dns_db_insecure;
8907 rbtdb->current_version->havensec3 = false;
8908 rbtdb->current_version->flags = 0;
8909 rbtdb->current_version->iterations = 0;
8910 rbtdb->current_version->hash = 0;
8911 rbtdb->current_version->salt_length = 0;
8912 memset(rbtdb->current_version->salt, 0,
8913 sizeof(rbtdb->current_version->salt));
8914 isc_rwlock_init(&rbtdb->current_version->rwlock, 0, 0);
8915 rbtdb->current_version->records = 0;
8916 rbtdb->current_version->xfrsize = 0;
8917 rbtdb->future_version = NULL;
8918 ISC_LIST_INIT(rbtdb->open_versions);
8919 /*
8920 * Keep the current version in the open list so that list operation
8921 * won't happen in normal lookup operations.
8922 */
8923 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
8924
8925 rbtdb->common.magic = DNS_DB_MAGIC;
8926 rbtdb->common.impmagic = RBTDB_MAGIC;
8927
8928 *dbp = (dns_db_t *)rbtdb;
8929
8930 return (ISC_R_SUCCESS);
8931
8932 cleanup_node_locks:
8933 isc_mem_put(mctx, rbtdb->node_locks,
8934 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
8935
8936 cleanup_tree_lock:
8937 isc_rwlock_destroy(&rbtdb->tree_lock);
8938 RBTDB_DESTROYLOCK(&rbtdb->lock);
8939 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
8940 return (result);
8941 }
8942
8943 /*
8944 * Slabbed Rdataset Methods
8945 */
8946
8947 static void
rdataset_disassociate(dns_rdataset_t * rdataset)8948 rdataset_disassociate(dns_rdataset_t *rdataset) {
8949 dns_db_t *db = rdataset->private1;
8950 dns_dbnode_t *node = rdataset->private2;
8951
8952 detachnode(db, &node);
8953 }
8954
8955 static isc_result_t
rdataset_first(dns_rdataset_t * rdataset)8956 rdataset_first(dns_rdataset_t *rdataset) {
8957 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8958 unsigned int count;
8959
8960 count = raw[0] * 256 + raw[1];
8961 if (count == 0) {
8962 rdataset->private5 = NULL;
8963 return (ISC_R_NOMORE);
8964 }
8965
8966 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
8967 raw += DNS_RDATASET_COUNT;
8968 }
8969
8970 raw += DNS_RDATASET_LENGTH;
8971
8972 /*
8973 * The privateuint4 field is the number of rdata beyond the
8974 * cursor position, so we decrement the total count by one
8975 * before storing it.
8976 *
8977 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
8978 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
8979 * to the first entry in the offset table.
8980 */
8981 count--;
8982 rdataset->privateuint4 = count;
8983 rdataset->private5 = raw;
8984
8985 return (ISC_R_SUCCESS);
8986 }
8987
8988 static isc_result_t
rdataset_next(dns_rdataset_t * rdataset)8989 rdataset_next(dns_rdataset_t *rdataset) {
8990 unsigned int count;
8991 unsigned int length;
8992 unsigned char *raw; /* RDATASLAB */
8993
8994 count = rdataset->privateuint4;
8995 if (count == 0) {
8996 return (ISC_R_NOMORE);
8997 }
8998 count--;
8999 rdataset->privateuint4 = count;
9000
9001 /*
9002 * Skip forward one record (length + 4) or one offset (4).
9003 */
9004 raw = rdataset->private5;
9005 #if DNS_RDATASET_FIXED
9006 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
9007 #endif /* DNS_RDATASET_FIXED */
9008 {
9009 length = raw[0] * 256 + raw[1];
9010 raw += length;
9011 }
9012
9013 rdataset->private5 = raw + DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
9014
9015 return (ISC_R_SUCCESS);
9016 }
9017
9018 static void
rdataset_current(dns_rdataset_t * rdataset,dns_rdata_t * rdata)9019 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
9020 unsigned char *raw = rdataset->private5; /* RDATASLAB */
9021 unsigned int length;
9022 isc_region_t r;
9023 unsigned int flags = 0;
9024
9025 REQUIRE(raw != NULL);
9026
9027 /*
9028 * Find the start of the record if not already in private5
9029 * then skip the length and order fields.
9030 */
9031 #if DNS_RDATASET_FIXED
9032 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
9033 unsigned int offset;
9034 offset = ((unsigned int)raw[0] << 24) +
9035 ((unsigned int)raw[1] << 16) +
9036 ((unsigned int)raw[2] << 8) + (unsigned int)raw[3];
9037 raw = rdataset->private3;
9038 raw += offset;
9039 }
9040 #endif /* if DNS_RDATASET_FIXED */
9041
9042 length = raw[0] * 256 + raw[1];
9043
9044 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
9045
9046 if (rdataset->type == dns_rdatatype_rrsig) {
9047 if (*raw & DNS_RDATASLAB_OFFLINE) {
9048 flags |= DNS_RDATA_OFFLINE;
9049 }
9050 length--;
9051 raw++;
9052 }
9053 r.length = length;
9054 r.base = raw;
9055 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
9056 rdata->flags |= flags;
9057 }
9058
9059 static void
rdataset_clone(dns_rdataset_t * source,dns_rdataset_t * target)9060 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
9061 dns_db_t *db = source->private1;
9062 dns_dbnode_t *node = source->private2;
9063 dns_dbnode_t *cloned_node = NULL;
9064
9065 attachnode(db, node, &cloned_node);
9066 INSIST(!ISC_LINK_LINKED(target, link));
9067 *target = *source;
9068 ISC_LINK_INIT(target, link);
9069
9070 /*
9071 * Reset iterator state.
9072 */
9073 target->privateuint4 = 0;
9074 target->private5 = NULL;
9075 }
9076
9077 static unsigned int
rdataset_count(dns_rdataset_t * rdataset)9078 rdataset_count(dns_rdataset_t *rdataset) {
9079 unsigned char *raw = rdataset->private3; /* RDATASLAB */
9080 unsigned int count;
9081
9082 count = raw[0] * 256 + raw[1];
9083
9084 return (count);
9085 }
9086
9087 static isc_result_t
rdataset_getnoqname(dns_rdataset_t * rdataset,dns_name_t * name,dns_rdataset_t * nsec,dns_rdataset_t * nsecsig)9088 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
9089 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig) {
9090 dns_db_t *db = rdataset->private1;
9091 dns_dbnode_t *node = rdataset->private2;
9092 dns_dbnode_t *cloned_node;
9093 const struct noqname *noqname = rdataset->private6;
9094
9095 cloned_node = NULL;
9096 attachnode(db, node, &cloned_node);
9097 nsec->methods = &slab_methods;
9098 nsec->rdclass = db->rdclass;
9099 nsec->type = noqname->type;
9100 nsec->covers = 0;
9101 nsec->ttl = rdataset->ttl;
9102 nsec->trust = rdataset->trust;
9103 nsec->private1 = rdataset->private1;
9104 nsec->private2 = rdataset->private2;
9105 nsec->private3 = noqname->neg;
9106 nsec->privateuint4 = 0;
9107 nsec->private5 = NULL;
9108 nsec->private6 = NULL;
9109 nsec->private7 = NULL;
9110
9111 cloned_node = NULL;
9112 attachnode(db, node, &cloned_node);
9113 nsecsig->methods = &slab_methods;
9114 nsecsig->rdclass = db->rdclass;
9115 nsecsig->type = dns_rdatatype_rrsig;
9116 nsecsig->covers = noqname->type;
9117 nsecsig->ttl = rdataset->ttl;
9118 nsecsig->trust = rdataset->trust;
9119 nsecsig->private1 = rdataset->private1;
9120 nsecsig->private2 = rdataset->private2;
9121 nsecsig->private3 = noqname->negsig;
9122 nsecsig->privateuint4 = 0;
9123 nsecsig->private5 = NULL;
9124 nsec->private6 = NULL;
9125 nsec->private7 = NULL;
9126
9127 dns_name_clone(&noqname->name, name);
9128
9129 return (ISC_R_SUCCESS);
9130 }
9131
9132 static isc_result_t
rdataset_getclosest(dns_rdataset_t * rdataset,dns_name_t * name,dns_rdataset_t * nsec,dns_rdataset_t * nsecsig)9133 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
9134 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig) {
9135 dns_db_t *db = rdataset->private1;
9136 dns_dbnode_t *node = rdataset->private2;
9137 dns_dbnode_t *cloned_node;
9138 const struct noqname *closest = rdataset->private7;
9139
9140 cloned_node = NULL;
9141 attachnode(db, node, &cloned_node);
9142 nsec->methods = &slab_methods;
9143 nsec->rdclass = db->rdclass;
9144 nsec->type = closest->type;
9145 nsec->covers = 0;
9146 nsec->ttl = rdataset->ttl;
9147 nsec->trust = rdataset->trust;
9148 nsec->private1 = rdataset->private1;
9149 nsec->private2 = rdataset->private2;
9150 nsec->private3 = closest->neg;
9151 nsec->privateuint4 = 0;
9152 nsec->private5 = NULL;
9153 nsec->private6 = NULL;
9154 nsec->private7 = NULL;
9155
9156 cloned_node = NULL;
9157 attachnode(db, node, &cloned_node);
9158 nsecsig->methods = &slab_methods;
9159 nsecsig->rdclass = db->rdclass;
9160 nsecsig->type = dns_rdatatype_rrsig;
9161 nsecsig->covers = closest->type;
9162 nsecsig->ttl = rdataset->ttl;
9163 nsecsig->trust = rdataset->trust;
9164 nsecsig->private1 = rdataset->private1;
9165 nsecsig->private2 = rdataset->private2;
9166 nsecsig->private3 = closest->negsig;
9167 nsecsig->privateuint4 = 0;
9168 nsecsig->private5 = NULL;
9169 nsec->private6 = NULL;
9170 nsec->private7 = NULL;
9171
9172 dns_name_clone(&closest->name, name);
9173
9174 return (ISC_R_SUCCESS);
9175 }
9176
9177 static void
rdataset_settrust(dns_rdataset_t * rdataset,dns_trust_t trust)9178 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
9179 dns_rbtdb_t *rbtdb = rdataset->private1;
9180 dns_rbtnode_t *rbtnode = rdataset->private2;
9181 rdatasetheader_t *header = rdataset->private3;
9182
9183 header--;
9184 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9185 isc_rwlocktype_write);
9186 header->trust = rdataset->trust = trust;
9187 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9188 isc_rwlocktype_write);
9189 }
9190
9191 static void
rdataset_expire(dns_rdataset_t * rdataset)9192 rdataset_expire(dns_rdataset_t *rdataset) {
9193 dns_rbtdb_t *rbtdb = rdataset->private1;
9194 dns_rbtnode_t *rbtnode = rdataset->private2;
9195 rdatasetheader_t *header = rdataset->private3;
9196
9197 header--;
9198 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9199 isc_rwlocktype_write);
9200 expire_header(rbtdb, header, false, expire_flush);
9201 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9202 isc_rwlocktype_write);
9203 }
9204
9205 static void
rdataset_clearprefetch(dns_rdataset_t * rdataset)9206 rdataset_clearprefetch(dns_rdataset_t *rdataset) {
9207 dns_rbtdb_t *rbtdb = rdataset->private1;
9208 dns_rbtnode_t *rbtnode = rdataset->private2;
9209 rdatasetheader_t *header = rdataset->private3;
9210
9211 header--;
9212 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9213 isc_rwlocktype_write);
9214 RDATASET_ATTR_CLR(header, RDATASET_ATTR_PREFETCH);
9215 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9216 isc_rwlocktype_write);
9217 }
9218
9219 /*
9220 * Rdataset Iterator Methods
9221 */
9222
9223 static void
rdatasetiter_destroy(dns_rdatasetiter_t ** iteratorp)9224 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
9225 rbtdb_rdatasetiter_t *rbtiterator;
9226
9227 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
9228
9229 if (rbtiterator->common.version != NULL) {
9230 closeversion(rbtiterator->common.db,
9231 &rbtiterator->common.version, false);
9232 }
9233 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
9234 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
9235 sizeof(*rbtiterator));
9236
9237 *iteratorp = NULL;
9238 }
9239
9240 static bool
iterator_active(dns_rbtdb_t * rbtdb,rbtdb_rdatasetiter_t * rbtiterator,rdatasetheader_t * header)9241 iterator_active(dns_rbtdb_t *rbtdb, rbtdb_rdatasetiter_t *rbtiterator,
9242 rdatasetheader_t *header) {
9243 dns_ttl_t stale_ttl = header->rdh_ttl + rbtdb->serve_stale_ttl;
9244
9245 /*
9246 * Is this a "this rdataset doesn't exist" record?
9247 */
9248 if (NONEXISTENT(header)) {
9249 return (false);
9250 }
9251
9252 /*
9253 * If this is a zone or this header still active then return it.
9254 */
9255 if (!IS_CACHE(rbtdb) || ACTIVE(header, rbtiterator->common.now)) {
9256 return (true);
9257 }
9258
9259 /*
9260 * If we are not returning stale records or the rdataset is
9261 * too old don't return it.
9262 */
9263 if (!STALEOK(rbtiterator) || (rbtiterator->common.now > stale_ttl)) {
9264 return (false);
9265 }
9266 return (true);
9267 }
9268
9269 static isc_result_t
rdatasetiter_first(dns_rdatasetiter_t * iterator)9270 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
9271 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
9272 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
9273 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
9274 rbtdb_version_t *rbtversion = rbtiterator->common.version;
9275 rdatasetheader_t *header, *top_next;
9276 rbtdb_serial_t serial = IS_CACHE(rbtdb) ? 1 : rbtversion->serial;
9277
9278 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9279 isc_rwlocktype_read);
9280
9281 for (header = rbtnode->data; header != NULL; header = top_next) {
9282 top_next = header->next;
9283 do {
9284 if (EXPIREDOK(rbtiterator)) {
9285 if (!NONEXISTENT(header)) {
9286 break;
9287 }
9288 header = header->down;
9289 } else if (header->serial <= serial && !IGNORE(header))
9290 {
9291 if (!iterator_active(rbtdb, rbtiterator,
9292 header))
9293 {
9294 header = NULL;
9295 }
9296 break;
9297 } else {
9298 header = header->down;
9299 }
9300 } while (header != NULL);
9301 if (header != NULL) {
9302 break;
9303 }
9304 }
9305
9306 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9307 isc_rwlocktype_read);
9308
9309 rbtiterator->current = header;
9310
9311 if (header == NULL) {
9312 return (ISC_R_NOMORE);
9313 }
9314
9315 return (ISC_R_SUCCESS);
9316 }
9317
9318 static isc_result_t
rdatasetiter_next(dns_rdatasetiter_t * iterator)9319 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
9320 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
9321 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
9322 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
9323 rbtdb_version_t *rbtversion = rbtiterator->common.version;
9324 rdatasetheader_t *header, *top_next;
9325 rbtdb_serial_t serial = IS_CACHE(rbtdb) ? 1 : rbtversion->serial;
9326 rbtdb_rdatatype_t type, negtype;
9327 dns_rdatatype_t rdtype, covers;
9328 bool expiredok = EXPIREDOK(rbtiterator);
9329
9330 header = rbtiterator->current;
9331 if (header == NULL) {
9332 return (ISC_R_NOMORE);
9333 }
9334
9335 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9336 isc_rwlocktype_read);
9337
9338 type = header->type;
9339 rdtype = RBTDB_RDATATYPE_BASE(header->type);
9340 if (NEGATIVE(header)) {
9341 covers = RBTDB_RDATATYPE_EXT(header->type);
9342 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
9343 } else {
9344 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
9345 }
9346
9347 /*
9348 * Find the start of the header chain for the next type
9349 * by walking back up the list.
9350 */
9351 top_next = header->next;
9352 while (top_next != NULL &&
9353 (top_next->type == type || top_next->type == negtype))
9354 {
9355 top_next = top_next->next;
9356 }
9357 if (expiredok) {
9358 /*
9359 * Keep walking down the list if possible or
9360 * start the next type.
9361 */
9362 header = header->down != NULL ? header->down : top_next;
9363 } else {
9364 header = top_next;
9365 }
9366 for (; header != NULL; header = top_next) {
9367 top_next = header->next;
9368 do {
9369 if (expiredok) {
9370 if (!NONEXISTENT(header)) {
9371 break;
9372 }
9373 header = header->down;
9374 } else if (header->serial <= serial && !IGNORE(header))
9375 {
9376 if (!iterator_active(rbtdb, rbtiterator,
9377 header))
9378 {
9379 header = NULL;
9380 }
9381 break;
9382 } else {
9383 header = header->down;
9384 }
9385 } while (header != NULL);
9386 if (header != NULL) {
9387 break;
9388 }
9389 /*
9390 * Find the start of the header chain for the next type
9391 * by walking back up the list.
9392 */
9393 while (top_next != NULL &&
9394 (top_next->type == type || top_next->type == negtype))
9395 {
9396 top_next = top_next->next;
9397 }
9398 }
9399
9400 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9401 isc_rwlocktype_read);
9402
9403 rbtiterator->current = header;
9404
9405 if (header == NULL) {
9406 return (ISC_R_NOMORE);
9407 }
9408
9409 return (ISC_R_SUCCESS);
9410 }
9411
9412 static void
rdatasetiter_current(dns_rdatasetiter_t * iterator,dns_rdataset_t * rdataset)9413 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
9414 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
9415 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
9416 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
9417 rdatasetheader_t *header;
9418
9419 header = rbtiterator->current;
9420 REQUIRE(header != NULL);
9421
9422 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9423 isc_rwlocktype_read);
9424
9425 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
9426 isc_rwlocktype_read, rdataset);
9427
9428 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9429 isc_rwlocktype_read);
9430 }
9431
9432 /*
9433 * Database Iterator Methods
9434 */
9435
9436 static void
reference_iter_node(rbtdb_dbiterator_t * rbtdbiter)9437 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
9438 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
9439 dns_rbtnode_t *node = rbtdbiter->node;
9440
9441 if (node == NULL) {
9442 return;
9443 }
9444
9445 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
9446 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
9447 }
9448
9449 static void
dereference_iter_node(rbtdb_dbiterator_t * rbtdbiter)9450 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
9451 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
9452 dns_rbtnode_t *node = rbtdbiter->node;
9453 nodelock_t *lock;
9454
9455 if (node == NULL) {
9456 return;
9457 }
9458
9459 lock = &rbtdb->node_locks[node->locknum].lock;
9460 NODE_LOCK(lock, isc_rwlocktype_read);
9461 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
9462 rbtdbiter->tree_locked, false);
9463 NODE_UNLOCK(lock, isc_rwlocktype_read);
9464
9465 rbtdbiter->node = NULL;
9466 }
9467
9468 static void
flush_deletions(rbtdb_dbiterator_t * rbtdbiter)9469 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
9470 dns_rbtnode_t *node;
9471 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
9472 bool was_read_locked = false;
9473 nodelock_t *lock;
9474 int i;
9475
9476 if (rbtdbiter->delcnt != 0) {
9477 /*
9478 * Note that "%d node of %d in tree" can report things like
9479 * "flush_deletions: 59 nodes of 41 in tree". This means
9480 * That some nodes appear on the deletions list more than
9481 * once. Only the last occurrence will actually be deleted.
9482 */
9483 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
9484 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
9485 "flush_deletions: %d nodes of %d in tree",
9486 rbtdbiter->delcnt,
9487 dns_rbt_nodecount(rbtdb->tree));
9488
9489 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
9490 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9491 was_read_locked = true;
9492 }
9493 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
9494 rbtdbiter->tree_locked = isc_rwlocktype_write;
9495
9496 for (i = 0; i < rbtdbiter->delcnt; i++) {
9497 node = rbtdbiter->deletions[i];
9498 lock = &rbtdb->node_locks[node->locknum].lock;
9499
9500 NODE_LOCK(lock, isc_rwlocktype_read);
9501 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
9502 rbtdbiter->tree_locked, false);
9503 NODE_UNLOCK(lock, isc_rwlocktype_read);
9504 }
9505
9506 rbtdbiter->delcnt = 0;
9507
9508 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
9509 if (was_read_locked) {
9510 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9511 rbtdbiter->tree_locked = isc_rwlocktype_read;
9512 } else {
9513 rbtdbiter->tree_locked = isc_rwlocktype_none;
9514 }
9515 }
9516 }
9517
9518 static void
resume_iteration(rbtdb_dbiterator_t * rbtdbiter)9519 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
9520 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
9521
9522 REQUIRE(rbtdbiter->paused);
9523 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
9524
9525 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9526 rbtdbiter->tree_locked = isc_rwlocktype_read;
9527
9528 rbtdbiter->paused = false;
9529 }
9530
9531 static void
dbiterator_destroy(dns_dbiterator_t ** iteratorp)9532 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
9533 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
9534 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
9535 dns_db_t *db = NULL;
9536
9537 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
9538 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9539 rbtdbiter->tree_locked = isc_rwlocktype_none;
9540 } else {
9541 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
9542 }
9543
9544 dereference_iter_node(rbtdbiter);
9545
9546 flush_deletions(rbtdbiter);
9547
9548 dns_db_attach(rbtdbiter->common.db, &db);
9549 dns_db_detach(&rbtdbiter->common.db);
9550
9551 dns_rbtnodechain_reset(&rbtdbiter->chain);
9552 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
9553 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
9554 dns_db_detach(&db);
9555
9556 *iteratorp = NULL;
9557 }
9558
9559 static isc_result_t
dbiterator_first(dns_dbiterator_t * iterator)9560 dbiterator_first(dns_dbiterator_t *iterator) {
9561 isc_result_t result;
9562 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9563 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9564 dns_name_t *name, *origin;
9565
9566 if (rbtdbiter->result != ISC_R_SUCCESS &&
9567 rbtdbiter->result != ISC_R_NOTFOUND &&
9568 rbtdbiter->result != DNS_R_PARTIALMATCH &&
9569 rbtdbiter->result != ISC_R_NOMORE)
9570 {
9571 return (rbtdbiter->result);
9572 }
9573
9574 if (rbtdbiter->paused) {
9575 resume_iteration(rbtdbiter);
9576 }
9577
9578 dereference_iter_node(rbtdbiter);
9579
9580 name = dns_fixedname_name(&rbtdbiter->name);
9581 origin = dns_fixedname_name(&rbtdbiter->origin);
9582 dns_rbtnodechain_reset(&rbtdbiter->chain);
9583 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
9584
9585 if (rbtdbiter->nsec3only) {
9586 rbtdbiter->current = &rbtdbiter->nsec3chain;
9587 result = dns_rbtnodechain_first(rbtdbiter->current,
9588 rbtdb->nsec3, name, origin);
9589 } else {
9590 rbtdbiter->current = &rbtdbiter->chain;
9591 result = dns_rbtnodechain_first(rbtdbiter->current, rbtdb->tree,
9592 name, origin);
9593 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
9594 rbtdbiter->current = &rbtdbiter->nsec3chain;
9595 result = dns_rbtnodechain_first(
9596 rbtdbiter->current, rbtdb->nsec3, name, origin);
9597 }
9598 }
9599 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
9600 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
9601 NULL, &rbtdbiter->node);
9602 if (result == ISC_R_SUCCESS) {
9603 rbtdbiter->new_origin = true;
9604 reference_iter_node(rbtdbiter);
9605 }
9606 } else {
9607 INSIST(result == ISC_R_NOTFOUND);
9608 result = ISC_R_NOMORE; /* The tree is empty. */
9609 }
9610
9611 rbtdbiter->result = result;
9612
9613 if (result != ISC_R_SUCCESS) {
9614 ENSURE(!rbtdbiter->paused);
9615 }
9616
9617 return (result);
9618 }
9619
9620 static isc_result_t
dbiterator_last(dns_dbiterator_t * iterator)9621 dbiterator_last(dns_dbiterator_t *iterator) {
9622 isc_result_t result;
9623 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9624 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9625 dns_name_t *name, *origin;
9626
9627 if (rbtdbiter->result != ISC_R_SUCCESS &&
9628 rbtdbiter->result != ISC_R_NOTFOUND &&
9629 rbtdbiter->result != DNS_R_PARTIALMATCH &&
9630 rbtdbiter->result != ISC_R_NOMORE)
9631 {
9632 return (rbtdbiter->result);
9633 }
9634
9635 if (rbtdbiter->paused) {
9636 resume_iteration(rbtdbiter);
9637 }
9638
9639 dereference_iter_node(rbtdbiter);
9640
9641 name = dns_fixedname_name(&rbtdbiter->name);
9642 origin = dns_fixedname_name(&rbtdbiter->origin);
9643 dns_rbtnodechain_reset(&rbtdbiter->chain);
9644 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
9645
9646 result = ISC_R_NOTFOUND;
9647 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
9648 rbtdbiter->current = &rbtdbiter->nsec3chain;
9649 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->nsec3,
9650 name, origin);
9651 }
9652 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
9653 rbtdbiter->current = &rbtdbiter->chain;
9654 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
9655 name, origin);
9656 }
9657 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
9658 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
9659 NULL, &rbtdbiter->node);
9660 if (result == ISC_R_SUCCESS) {
9661 rbtdbiter->new_origin = true;
9662 reference_iter_node(rbtdbiter);
9663 }
9664 } else {
9665 INSIST(result == ISC_R_NOTFOUND);
9666 result = ISC_R_NOMORE; /* The tree is empty. */
9667 }
9668
9669 rbtdbiter->result = result;
9670
9671 return (result);
9672 }
9673
9674 static isc_result_t
dbiterator_seek(dns_dbiterator_t * iterator,const dns_name_t * name)9675 dbiterator_seek(dns_dbiterator_t *iterator, const dns_name_t *name) {
9676 isc_result_t result, tresult;
9677 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9678 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9679 dns_name_t *iname, *origin;
9680
9681 if (rbtdbiter->result != ISC_R_SUCCESS &&
9682 rbtdbiter->result != ISC_R_NOTFOUND &&
9683 rbtdbiter->result != DNS_R_PARTIALMATCH &&
9684 rbtdbiter->result != ISC_R_NOMORE)
9685 {
9686 return (rbtdbiter->result);
9687 }
9688
9689 if (rbtdbiter->paused) {
9690 resume_iteration(rbtdbiter);
9691 }
9692
9693 dereference_iter_node(rbtdbiter);
9694
9695 iname = dns_fixedname_name(&rbtdbiter->name);
9696 origin = dns_fixedname_name(&rbtdbiter->origin);
9697 dns_rbtnodechain_reset(&rbtdbiter->chain);
9698 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
9699
9700 if (rbtdbiter->nsec3only) {
9701 rbtdbiter->current = &rbtdbiter->nsec3chain;
9702 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
9703 &rbtdbiter->node, rbtdbiter->current,
9704 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
9705 } else if (rbtdbiter->nonsec3) {
9706 rbtdbiter->current = &rbtdbiter->chain;
9707 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
9708 &rbtdbiter->node, rbtdbiter->current,
9709 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
9710 } else {
9711 /*
9712 * Stay on main chain if not found on either chain.
9713 */
9714 rbtdbiter->current = &rbtdbiter->chain;
9715 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
9716 &rbtdbiter->node, rbtdbiter->current,
9717 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
9718 if (result == DNS_R_PARTIALMATCH) {
9719 dns_rbtnode_t *node = NULL;
9720 tresult = dns_rbt_findnode(
9721 rbtdb->nsec3, name, NULL, &node,
9722 &rbtdbiter->nsec3chain, DNS_RBTFIND_EMPTYDATA,
9723 NULL, NULL);
9724 if (tresult == ISC_R_SUCCESS) {
9725 rbtdbiter->node = node;
9726 rbtdbiter->current = &rbtdbiter->nsec3chain;
9727 result = tresult;
9728 }
9729 }
9730 }
9731
9732 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
9733 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
9734 origin, NULL);
9735 if (tresult == ISC_R_SUCCESS) {
9736 rbtdbiter->new_origin = true;
9737 reference_iter_node(rbtdbiter);
9738 } else {
9739 result = tresult;
9740 rbtdbiter->node = NULL;
9741 }
9742 } else {
9743 rbtdbiter->node = NULL;
9744 }
9745
9746 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ? ISC_R_SUCCESS
9747 : result;
9748
9749 return (result);
9750 }
9751
9752 static isc_result_t
dbiterator_prev(dns_dbiterator_t * iterator)9753 dbiterator_prev(dns_dbiterator_t *iterator) {
9754 isc_result_t result;
9755 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9756 dns_name_t *name, *origin;
9757 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9758
9759 REQUIRE(rbtdbiter->node != NULL);
9760
9761 if (rbtdbiter->result != ISC_R_SUCCESS) {
9762 return (rbtdbiter->result);
9763 }
9764
9765 if (rbtdbiter->paused) {
9766 resume_iteration(rbtdbiter);
9767 }
9768
9769 name = dns_fixedname_name(&rbtdbiter->name);
9770 origin = dns_fixedname_name(&rbtdbiter->origin);
9771 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
9772 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
9773 !rbtdbiter->nonsec3 && &rbtdbiter->nsec3chain == rbtdbiter->current)
9774 {
9775 rbtdbiter->current = &rbtdbiter->chain;
9776 dns_rbtnodechain_reset(rbtdbiter->current);
9777 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
9778 name, origin);
9779 if (result == ISC_R_NOTFOUND) {
9780 result = ISC_R_NOMORE;
9781 }
9782 }
9783
9784 dereference_iter_node(rbtdbiter);
9785
9786 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
9787 rbtdbiter->new_origin = (result == DNS_R_NEWORIGIN);
9788 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
9789 NULL, &rbtdbiter->node);
9790 }
9791
9792 if (result == ISC_R_SUCCESS) {
9793 reference_iter_node(rbtdbiter);
9794 }
9795
9796 rbtdbiter->result = result;
9797
9798 return (result);
9799 }
9800
9801 static isc_result_t
dbiterator_next(dns_dbiterator_t * iterator)9802 dbiterator_next(dns_dbiterator_t *iterator) {
9803 isc_result_t result;
9804 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9805 dns_name_t *name, *origin;
9806 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9807
9808 REQUIRE(rbtdbiter->node != NULL);
9809
9810 if (rbtdbiter->result != ISC_R_SUCCESS) {
9811 return (rbtdbiter->result);
9812 }
9813
9814 if (rbtdbiter->paused) {
9815 resume_iteration(rbtdbiter);
9816 }
9817
9818 name = dns_fixedname_name(&rbtdbiter->name);
9819 origin = dns_fixedname_name(&rbtdbiter->origin);
9820 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
9821 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
9822 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current)
9823 {
9824 rbtdbiter->current = &rbtdbiter->nsec3chain;
9825 dns_rbtnodechain_reset(rbtdbiter->current);
9826 result = dns_rbtnodechain_first(rbtdbiter->current,
9827 rbtdb->nsec3, name, origin);
9828 if (result == ISC_R_NOTFOUND) {
9829 result = ISC_R_NOMORE;
9830 }
9831 }
9832
9833 dereference_iter_node(rbtdbiter);
9834
9835 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
9836 rbtdbiter->new_origin = (result == DNS_R_NEWORIGIN);
9837 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
9838 NULL, &rbtdbiter->node);
9839 }
9840 if (result == ISC_R_SUCCESS) {
9841 reference_iter_node(rbtdbiter);
9842 }
9843
9844 rbtdbiter->result = result;
9845
9846 return (result);
9847 }
9848
9849 static isc_result_t
dbiterator_current(dns_dbiterator_t * iterator,dns_dbnode_t ** nodep,dns_name_t * name)9850 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
9851 dns_name_t *name) {
9852 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9853 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9854 dns_rbtnode_t *node = rbtdbiter->node;
9855 isc_result_t result;
9856 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
9857 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
9858
9859 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
9860 REQUIRE(rbtdbiter->node != NULL);
9861
9862 if (rbtdbiter->paused) {
9863 resume_iteration(rbtdbiter);
9864 }
9865
9866 if (name != NULL) {
9867 if (rbtdbiter->common.relative_names) {
9868 origin = NULL;
9869 }
9870 result = dns_name_concatenate(nodename, origin, name, NULL);
9871 if (result != ISC_R_SUCCESS) {
9872 return (result);
9873 }
9874 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin) {
9875 result = DNS_R_NEWORIGIN;
9876 }
9877 } else {
9878 result = ISC_R_SUCCESS;
9879 }
9880
9881 new_reference(rbtdb, node, isc_rwlocktype_none);
9882
9883 *nodep = rbtdbiter->node;
9884
9885 if (iterator->cleaning && result == ISC_R_SUCCESS) {
9886 isc_result_t expire_result;
9887
9888 /*
9889 * If the deletion array is full, flush it before trying
9890 * to expire the current node. The current node can't
9891 * fully deleted while the iteration cursor is still on it.
9892 */
9893 if (rbtdbiter->delcnt == DELETION_BATCH_MAX) {
9894 flush_deletions(rbtdbiter);
9895 }
9896
9897 expire_result = expirenode(iterator->db, *nodep, 0);
9898
9899 /*
9900 * expirenode() currently always returns success.
9901 */
9902 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
9903 rbtdbiter->deletions[rbtdbiter->delcnt++] = node;
9904 isc_refcount_increment(&node->references);
9905 }
9906 }
9907
9908 return (result);
9909 }
9910
9911 static isc_result_t
dbiterator_pause(dns_dbiterator_t * iterator)9912 dbiterator_pause(dns_dbiterator_t *iterator) {
9913 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
9914 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9915
9916 if (rbtdbiter->result != ISC_R_SUCCESS &&
9917 rbtdbiter->result != ISC_R_NOTFOUND &&
9918 rbtdbiter->result != DNS_R_PARTIALMATCH &&
9919 rbtdbiter->result != ISC_R_NOMORE)
9920 {
9921 return (rbtdbiter->result);
9922 }
9923
9924 if (rbtdbiter->paused) {
9925 return (ISC_R_SUCCESS);
9926 }
9927
9928 rbtdbiter->paused = true;
9929
9930 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
9931 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
9932 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
9933 rbtdbiter->tree_locked = isc_rwlocktype_none;
9934 }
9935
9936 flush_deletions(rbtdbiter);
9937
9938 return (ISC_R_SUCCESS);
9939 }
9940
9941 static isc_result_t
dbiterator_origin(dns_dbiterator_t * iterator,dns_name_t * name)9942 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
9943 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
9944 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
9945
9946 if (rbtdbiter->result != ISC_R_SUCCESS) {
9947 return (rbtdbiter->result);
9948 }
9949
9950 dns_name_copynf(origin, name);
9951 return (ISC_R_SUCCESS);
9952 }
9953
9954 static void
setownercase(rdatasetheader_t * header,const dns_name_t * name)9955 setownercase(rdatasetheader_t *header, const dns_name_t *name) {
9956 unsigned int i;
9957 bool fully_lower;
9958
9959 /*
9960 * We do not need to worry about label lengths as they are all
9961 * less than or equal to 63.
9962 */
9963 memset(header->upper, 0, sizeof(header->upper));
9964 fully_lower = true;
9965 for (i = 0; i < name->length; i++) {
9966 if (isupper(name->ndata[i])) {
9967 header->upper[i / 8] |= 1 << (i % 8);
9968 fully_lower = false;
9969 }
9970 }
9971 RDATASET_ATTR_SET(header, RDATASET_ATTR_CASESET);
9972 if (ISC_LIKELY(fully_lower)) {
9973 RDATASET_ATTR_SET(header, RDATASET_ATTR_CASEFULLYLOWER);
9974 }
9975 }
9976
9977 static void
rdataset_setownercase(dns_rdataset_t * rdataset,const dns_name_t * name)9978 rdataset_setownercase(dns_rdataset_t *rdataset, const dns_name_t *name) {
9979 dns_rbtdb_t *rbtdb = rdataset->private1;
9980 dns_rbtnode_t *rbtnode = rdataset->private2;
9981 unsigned char *raw = rdataset->private3; /* RDATASLAB */
9982 rdatasetheader_t *header;
9983
9984 header = (struct rdatasetheader *)(raw - sizeof(*header));
9985
9986 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9987 isc_rwlocktype_write);
9988 setownercase(header, name);
9989 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
9990 isc_rwlocktype_write);
9991 }
9992
9993 static void
rdataset_getownercase(const dns_rdataset_t * rdataset,dns_name_t * name)9994 rdataset_getownercase(const dns_rdataset_t *rdataset, dns_name_t *name) {
9995 dns_rbtdb_t *rbtdb = rdataset->private1;
9996 dns_rbtnode_t *rbtnode = rdataset->private2;
9997 unsigned char *raw = rdataset->private3; /* RDATASLAB */
9998 rdatasetheader_t *header = NULL;
9999 uint8_t mask = (1 << 7);
10000 uint8_t bits = 0;
10001
10002 header = (struct rdatasetheader *)(raw - sizeof(*header));
10003
10004 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
10005 isc_rwlocktype_read);
10006
10007 if (!CASESET(header)) {
10008 goto unlock;
10009 }
10010
10011 if (ISC_LIKELY(CASEFULLYLOWER(header))) {
10012 for (size_t i = 0; i < name->length; i++) {
10013 name->ndata[i] = tolower(name->ndata[i]);
10014 }
10015 } else {
10016 for (size_t i = 0; i < name->length; i++) {
10017 if (mask == (1 << 7)) {
10018 bits = header->upper[i / 8];
10019 mask = 1;
10020 } else {
10021 mask <<= 1;
10022 }
10023
10024 name->ndata[i] = ((bits & mask) != 0)
10025 ? toupper(name->ndata[i])
10026 : tolower(name->ndata[i]);
10027 }
10028 }
10029
10030 unlock:
10031 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
10032 isc_rwlocktype_read);
10033 }
10034
10035 struct rbtdb_glue {
10036 struct rbtdb_glue *next;
10037 dns_fixedname_t fixedname;
10038 dns_rdataset_t rdataset_a;
10039 dns_rdataset_t sigrdataset_a;
10040 dns_rdataset_t rdataset_aaaa;
10041 dns_rdataset_t sigrdataset_aaaa;
10042 };
10043
10044 typedef struct {
10045 rbtdb_glue_t *glue_list;
10046 dns_rbtdb_t *rbtdb;
10047 rbtdb_version_t *rbtversion;
10048 } rbtdb_glue_additionaldata_ctx_t;
10049
10050 static void
free_gluelist(rbtdb_glue_t * glue_list,dns_rbtdb_t * rbtdb)10051 free_gluelist(rbtdb_glue_t *glue_list, dns_rbtdb_t *rbtdb) {
10052 rbtdb_glue_t *cur, *cur_next;
10053
10054 if (glue_list == (void *)-1) {
10055 return;
10056 }
10057
10058 cur = glue_list;
10059 while (cur != NULL) {
10060 cur_next = cur->next;
10061
10062 if (dns_rdataset_isassociated(&cur->rdataset_a)) {
10063 dns_rdataset_disassociate(&cur->rdataset_a);
10064 }
10065 if (dns_rdataset_isassociated(&cur->sigrdataset_a)) {
10066 dns_rdataset_disassociate(&cur->sigrdataset_a);
10067 }
10068
10069 if (dns_rdataset_isassociated(&cur->rdataset_aaaa)) {
10070 dns_rdataset_disassociate(&cur->rdataset_aaaa);
10071 }
10072 if (dns_rdataset_isassociated(&cur->sigrdataset_aaaa)) {
10073 dns_rdataset_disassociate(&cur->sigrdataset_aaaa);
10074 }
10075
10076 dns_rdataset_invalidate(&cur->rdataset_a);
10077 dns_rdataset_invalidate(&cur->sigrdataset_a);
10078 dns_rdataset_invalidate(&cur->rdataset_aaaa);
10079 dns_rdataset_invalidate(&cur->sigrdataset_aaaa);
10080
10081 isc_mem_put(rbtdb->common.mctx, cur, sizeof(*cur));
10082 cur = cur_next;
10083 }
10084 }
10085
10086 static void
free_gluetable(rbtdb_version_t * version)10087 free_gluetable(rbtdb_version_t *version) {
10088 dns_rbtdb_t *rbtdb;
10089 size_t size, i;
10090
10091 RWLOCK(&version->glue_rwlock, isc_rwlocktype_write);
10092
10093 rbtdb = version->rbtdb;
10094
10095 for (i = 0; i < HASHSIZE(version->glue_table_bits); i++) {
10096 rbtdb_glue_table_node_t *cur, *cur_next;
10097
10098 cur = version->glue_table[i];
10099 while (cur != NULL) {
10100 cur_next = cur->next;
10101 /* isc_refcount_decrement(&cur->node->references); */
10102 cur->node = NULL;
10103 free_gluelist(cur->glue_list, rbtdb);
10104 cur->glue_list = NULL;
10105 isc_mem_put(rbtdb->common.mctx, cur, sizeof(*cur));
10106 cur = cur_next;
10107 }
10108 version->glue_table[i] = NULL;
10109 }
10110
10111 size = HASHSIZE(version->glue_table_bits) *
10112 sizeof(*version->glue_table);
10113 isc_mem_put(rbtdb->common.mctx, version->glue_table, size);
10114
10115 RWUNLOCK(&version->glue_rwlock, isc_rwlocktype_write);
10116 }
10117
10118 static uint32_t
rehash_bits(rbtdb_version_t * version,size_t newcount)10119 rehash_bits(rbtdb_version_t *version, size_t newcount) {
10120 uint32_t oldbits = version->glue_table_bits;
10121 uint32_t newbits = oldbits;
10122
10123 while (newcount >= HASHSIZE(newbits) &&
10124 newbits <= RBTDB_GLUE_TABLE_MAX_BITS)
10125 {
10126 newbits += 1;
10127 }
10128
10129 return (newbits);
10130 }
10131
10132 /*%
10133 * Write lock (version->glue_rwlock) must be held.
10134 */
10135 static void
rehash_gluetable(rbtdb_version_t * version)10136 rehash_gluetable(rbtdb_version_t *version) {
10137 uint32_t oldbits, newbits;
10138 size_t newsize, oldcount, i;
10139 rbtdb_glue_table_node_t **oldtable;
10140
10141 oldbits = version->glue_table_bits;
10142 oldcount = HASHSIZE(oldbits);
10143 oldtable = version->glue_table;
10144
10145 newbits = rehash_bits(version, version->glue_table_nodecount);
10146 newsize = HASHSIZE(newbits) * sizeof(version->glue_table[0]);
10147
10148 version->glue_table = isc_mem_get(version->rbtdb->common.mctx, newsize);
10149 version->glue_table_bits = newbits;
10150 memset(version->glue_table, 0, newsize);
10151
10152 for (i = 0; i < oldcount; i++) {
10153 rbtdb_glue_table_node_t *gluenode;
10154 rbtdb_glue_table_node_t *nextgluenode;
10155 for (gluenode = oldtable[i]; gluenode != NULL;
10156 gluenode = nextgluenode)
10157 {
10158 uint32_t hash = isc_hash32(
10159 &gluenode->node, sizeof(gluenode->node), true);
10160 uint32_t idx = hash_32(hash, newbits);
10161 nextgluenode = gluenode->next;
10162 gluenode->next = version->glue_table[idx];
10163 version->glue_table[idx] = gluenode;
10164 }
10165 }
10166
10167 isc_mem_put(version->rbtdb->common.mctx, oldtable,
10168 oldcount * sizeof(*version->glue_table));
10169
10170 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_ZONE,
10171 ISC_LOG_DEBUG(3),
10172 "rehash_gluetable(): "
10173 "resized glue table from %zu to "
10174 "%zu",
10175 oldcount, newsize / sizeof(version->glue_table[0]));
10176 }
10177
10178 static void
maybe_rehash_gluetable(rbtdb_version_t * version)10179 maybe_rehash_gluetable(rbtdb_version_t *version) {
10180 size_t overcommit = HASHSIZE(version->glue_table_bits) *
10181 RBTDB_GLUE_TABLE_OVERCOMMIT;
10182 if (ISC_LIKELY(version->glue_table_nodecount < overcommit)) {
10183 return;
10184 }
10185
10186 rehash_gluetable(version);
10187 }
10188
10189 static isc_result_t
glue_nsdname_cb(void * arg,const dns_name_t * name,dns_rdatatype_t qtype)10190 glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype) {
10191 rbtdb_glue_additionaldata_ctx_t *ctx;
10192 isc_result_t result;
10193 dns_fixedname_t fixedname_a;
10194 dns_name_t *name_a = NULL;
10195 dns_rdataset_t rdataset_a, sigrdataset_a;
10196 dns_rbtnode_t *node_a = NULL;
10197 dns_fixedname_t fixedname_aaaa;
10198 dns_name_t *name_aaaa = NULL;
10199 dns_rdataset_t rdataset_aaaa, sigrdataset_aaaa;
10200 dns_rbtnode_t *node_aaaa = NULL;
10201 rbtdb_glue_t *glue = NULL;
10202 dns_name_t *gluename = NULL;
10203
10204 /*
10205 * NS records want addresses in additional records.
10206 */
10207 INSIST(qtype == dns_rdatatype_a);
10208
10209 ctx = (rbtdb_glue_additionaldata_ctx_t *)arg;
10210
10211 name_a = dns_fixedname_initname(&fixedname_a);
10212 dns_rdataset_init(&rdataset_a);
10213 dns_rdataset_init(&sigrdataset_a);
10214
10215 name_aaaa = dns_fixedname_initname(&fixedname_aaaa);
10216 dns_rdataset_init(&rdataset_aaaa);
10217 dns_rdataset_init(&sigrdataset_aaaa);
10218
10219 result = zone_find((dns_db_t *)ctx->rbtdb, name, ctx->rbtversion,
10220 dns_rdatatype_a, DNS_DBFIND_GLUEOK, 0,
10221 (dns_dbnode_t **)&node_a, name_a, &rdataset_a,
10222 &sigrdataset_a);
10223 if (result == DNS_R_GLUE) {
10224 glue = isc_mem_get(ctx->rbtdb->common.mctx, sizeof(*glue));
10225
10226 gluename = dns_fixedname_initname(&glue->fixedname);
10227 dns_name_copynf(name_a, gluename);
10228
10229 dns_rdataset_init(&glue->rdataset_a);
10230 dns_rdataset_init(&glue->sigrdataset_a);
10231 dns_rdataset_init(&glue->rdataset_aaaa);
10232 dns_rdataset_init(&glue->sigrdataset_aaaa);
10233
10234 dns_rdataset_clone(&rdataset_a, &glue->rdataset_a);
10235 if (dns_rdataset_isassociated(&sigrdataset_a)) {
10236 dns_rdataset_clone(&sigrdataset_a,
10237 &glue->sigrdataset_a);
10238 }
10239 }
10240
10241 result = zone_find((dns_db_t *)ctx->rbtdb, name, ctx->rbtversion,
10242 dns_rdatatype_aaaa, DNS_DBFIND_GLUEOK, 0,
10243 (dns_dbnode_t **)&node_aaaa, name_aaaa,
10244 &rdataset_aaaa, &sigrdataset_aaaa);
10245 if (result == DNS_R_GLUE) {
10246 if (glue == NULL) {
10247 glue = isc_mem_get(ctx->rbtdb->common.mctx,
10248 sizeof(*glue));
10249
10250 gluename = dns_fixedname_initname(&glue->fixedname);
10251 dns_name_copynf(name_aaaa, gluename);
10252
10253 dns_rdataset_init(&glue->rdataset_a);
10254 dns_rdataset_init(&glue->sigrdataset_a);
10255 dns_rdataset_init(&glue->rdataset_aaaa);
10256 dns_rdataset_init(&glue->sigrdataset_aaaa);
10257 } else {
10258 INSIST(node_a == node_aaaa);
10259 INSIST(dns_name_equal(name_a, name_aaaa));
10260 }
10261
10262 dns_rdataset_clone(&rdataset_aaaa, &glue->rdataset_aaaa);
10263 if (dns_rdataset_isassociated(&sigrdataset_aaaa)) {
10264 dns_rdataset_clone(&sigrdataset_aaaa,
10265 &glue->sigrdataset_aaaa);
10266 }
10267 }
10268
10269 if (glue != NULL) {
10270 glue->next = ctx->glue_list;
10271 ctx->glue_list = glue;
10272 }
10273
10274 result = ISC_R_SUCCESS;
10275
10276 if (dns_rdataset_isassociated(&rdataset_a)) {
10277 rdataset_disassociate(&rdataset_a);
10278 }
10279 if (dns_rdataset_isassociated(&sigrdataset_a)) {
10280 rdataset_disassociate(&sigrdataset_a);
10281 }
10282
10283 if (dns_rdataset_isassociated(&rdataset_aaaa)) {
10284 rdataset_disassociate(&rdataset_aaaa);
10285 }
10286 if (dns_rdataset_isassociated(&sigrdataset_aaaa)) {
10287 rdataset_disassociate(&sigrdataset_aaaa);
10288 }
10289
10290 if (node_a != NULL) {
10291 detachnode((dns_db_t *)ctx->rbtdb, (dns_dbnode_t *)&node_a);
10292 }
10293 if (node_aaaa != NULL) {
10294 detachnode((dns_db_t *)ctx->rbtdb, (dns_dbnode_t *)&node_aaaa);
10295 }
10296
10297 return (result);
10298 }
10299
10300 static isc_result_t
rdataset_addglue(dns_rdataset_t * rdataset,dns_dbversion_t * version,dns_message_t * msg)10301 rdataset_addglue(dns_rdataset_t *rdataset, dns_dbversion_t *version,
10302 dns_message_t *msg) {
10303 dns_rbtdb_t *rbtdb = rdataset->private1;
10304 dns_rbtnode_t *node = rdataset->private2;
10305 rbtdb_version_t *rbtversion = version;
10306 uint32_t idx;
10307 rbtdb_glue_table_node_t *cur;
10308 bool found = false;
10309 bool restarted = false;
10310 rbtdb_glue_t *ge;
10311 rbtdb_glue_additionaldata_ctx_t ctx;
10312 isc_result_t result;
10313 uint64_t hash;
10314
10315 REQUIRE(rdataset->type == dns_rdatatype_ns);
10316 REQUIRE(rbtdb == rbtversion->rbtdb);
10317 REQUIRE(!IS_CACHE(rbtdb) && !IS_STUB(rbtdb));
10318
10319 /*
10320 * The glue table cache that forms a part of the DB version
10321 * structure is not explicitly bounded and there's no cache
10322 * cleaning. The zone data size itself is an implicit bound.
10323 *
10324 * The key into the glue hashtable is the node pointer. This is
10325 * because the glue hashtable is a property of the DB version,
10326 * and the glue is keyed for the ownername/NS tuple. We don't
10327 * bother with using an expensive dns_name_t comparison here as
10328 * the node pointer is a fixed value that won't change for a DB
10329 * version and can be compared directly.
10330 */
10331 hash = isc_hash_function(&node, sizeof(node), true);
10332
10333 restart:
10334 /*
10335 * First, check if we have the additional entries already cached
10336 * in the glue table.
10337 */
10338 RWLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_read);
10339
10340 idx = hash_32(hash, rbtversion->glue_table_bits);
10341
10342 for (cur = rbtversion->glue_table[idx]; cur != NULL; cur = cur->next) {
10343 if (cur->node == node) {
10344 break;
10345 }
10346 }
10347
10348 if (cur == NULL) {
10349 goto no_glue;
10350 }
10351 /*
10352 * We found a cached result. Add it to the message and
10353 * return.
10354 */
10355 found = true;
10356 ge = cur->glue_list;
10357
10358 /*
10359 * (void *) -1 is a special value that means no glue is
10360 * present in the zone.
10361 */
10362 if (ge == (void *)-1) {
10363 if (!restarted && (rbtdb->gluecachestats != NULL)) {
10364 isc_stats_increment(
10365 rbtdb->gluecachestats,
10366 dns_gluecachestatscounter_hits_absent);
10367 }
10368 goto no_glue;
10369 } else {
10370 if (!restarted && (rbtdb->gluecachestats != NULL)) {
10371 isc_stats_increment(
10372 rbtdb->gluecachestats,
10373 dns_gluecachestatscounter_hits_present);
10374 }
10375 }
10376
10377 for (; ge != NULL; ge = ge->next) {
10378 dns_name_t *name = NULL;
10379 dns_rdataset_t *rdataset_a = NULL;
10380 dns_rdataset_t *sigrdataset_a = NULL;
10381 dns_rdataset_t *rdataset_aaaa = NULL;
10382 dns_rdataset_t *sigrdataset_aaaa = NULL;
10383 dns_name_t *gluename = dns_fixedname_name(&ge->fixedname);
10384
10385 result = dns_message_gettempname(msg, &name);
10386 if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) {
10387 goto no_glue;
10388 }
10389
10390 dns_name_copynf(gluename, name);
10391
10392 if (dns_rdataset_isassociated(&ge->rdataset_a)) {
10393 result = dns_message_gettemprdataset(msg, &rdataset_a);
10394 if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) {
10395 dns_message_puttempname(msg, &name);
10396 goto no_glue;
10397 }
10398 }
10399
10400 if (dns_rdataset_isassociated(&ge->sigrdataset_a)) {
10401 result = dns_message_gettemprdataset(msg,
10402 &sigrdataset_a);
10403 if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) {
10404 if (rdataset_a != NULL) {
10405 dns_message_puttemprdataset(
10406 msg, &rdataset_a);
10407 }
10408 dns_message_puttempname(msg, &name);
10409 goto no_glue;
10410 }
10411 }
10412
10413 if (dns_rdataset_isassociated(&ge->rdataset_aaaa)) {
10414 result = dns_message_gettemprdataset(msg,
10415 &rdataset_aaaa);
10416 if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) {
10417 dns_message_puttempname(msg, &name);
10418 if (rdataset_a != NULL) {
10419 dns_message_puttemprdataset(
10420 msg, &rdataset_a);
10421 }
10422 if (sigrdataset_a != NULL) {
10423 dns_message_puttemprdataset(
10424 msg, &sigrdataset_a);
10425 }
10426 goto no_glue;
10427 }
10428 }
10429
10430 if (dns_rdataset_isassociated(&ge->sigrdataset_aaaa)) {
10431 result = dns_message_gettemprdataset(msg,
10432 &sigrdataset_aaaa);
10433 if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) {
10434 dns_message_puttempname(msg, &name);
10435 if (rdataset_a != NULL) {
10436 dns_message_puttemprdataset(
10437 msg, &rdataset_a);
10438 }
10439 if (sigrdataset_a != NULL) {
10440 dns_message_puttemprdataset(
10441 msg, &sigrdataset_a);
10442 }
10443 if (rdataset_aaaa != NULL) {
10444 dns_message_puttemprdataset(
10445 msg, &rdataset_aaaa);
10446 }
10447 goto no_glue;
10448 }
10449 }
10450
10451 if (ISC_LIKELY(rdataset_a != NULL)) {
10452 dns_rdataset_clone(&ge->rdataset_a, rdataset_a);
10453 ISC_LIST_APPEND(name->list, rdataset_a, link);
10454 }
10455
10456 if (sigrdataset_a != NULL) {
10457 dns_rdataset_clone(&ge->sigrdataset_a, sigrdataset_a);
10458 ISC_LIST_APPEND(name->list, sigrdataset_a, link);
10459 }
10460
10461 if (rdataset_aaaa != NULL) {
10462 dns_rdataset_clone(&ge->rdataset_aaaa, rdataset_aaaa);
10463 ISC_LIST_APPEND(name->list, rdataset_aaaa, link);
10464 }
10465 if (sigrdataset_aaaa != NULL) {
10466 dns_rdataset_clone(&ge->sigrdataset_aaaa,
10467 sigrdataset_aaaa);
10468 ISC_LIST_APPEND(name->list, sigrdataset_aaaa, link);
10469 }
10470
10471 dns_message_addname(msg, name, DNS_SECTION_ADDITIONAL);
10472 }
10473
10474 no_glue:
10475 RWUNLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_read);
10476
10477 if (found) {
10478 return (ISC_R_SUCCESS);
10479 }
10480
10481 if (restarted) {
10482 return (ISC_R_FAILURE);
10483 }
10484
10485 /*
10486 * No cached glue was found in the table. Cache it and restart
10487 * this function.
10488 *
10489 * Due to the gap between the read lock and the write lock, it's
10490 * possible that we may cache a duplicate glue table entry, but
10491 * we don't care.
10492 */
10493
10494 ctx.glue_list = NULL;
10495 ctx.rbtdb = rbtdb;
10496 ctx.rbtversion = rbtversion;
10497
10498 RWLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_write);
10499
10500 maybe_rehash_gluetable(rbtversion);
10501 idx = hash_32(hash, rbtversion->glue_table_bits);
10502
10503 (void)dns_rdataset_additionaldata(rdataset, glue_nsdname_cb, &ctx);
10504
10505 cur = isc_mem_get(rbtdb->common.mctx, sizeof(*cur));
10506
10507 /*
10508 * XXXMUKS: it looks like the dns_dbversion is not destroyed
10509 * when named is terminated by a keyboard break. This doesn't
10510 * cleanup the node reference and keeps the process dangling.
10511 */
10512 /* isc_refcount_increment0(&node->references); */
10513 cur->node = node;
10514
10515 if (ctx.glue_list == NULL) {
10516 /*
10517 * No glue was found. Cache it so.
10518 */
10519 cur->glue_list = (void *)-1;
10520 if (rbtdb->gluecachestats != NULL) {
10521 isc_stats_increment(
10522 rbtdb->gluecachestats,
10523 dns_gluecachestatscounter_inserts_absent);
10524 }
10525 } else {
10526 cur->glue_list = ctx.glue_list;
10527 if (rbtdb->gluecachestats != NULL) {
10528 isc_stats_increment(
10529 rbtdb->gluecachestats,
10530 dns_gluecachestatscounter_inserts_present);
10531 }
10532 }
10533
10534 cur->next = rbtversion->glue_table[idx];
10535 rbtversion->glue_table[idx] = cur;
10536 rbtversion->glue_table_nodecount++;
10537
10538 RWUNLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_write);
10539
10540 restarted = true;
10541 goto restart;
10542
10543 /* UNREACHABLE */
10544 }
10545
10546 /*%
10547 * Routines for LRU-based cache management.
10548 */
10549
10550 /*%
10551 * See if a given cache entry that is being reused needs to be updated
10552 * in the LRU-list. From the LRU management point of view, this function is
10553 * expected to return true for almost all cases. When used with threads,
10554 * however, this may cause a non-negligible performance penalty because a
10555 * writer lock will have to be acquired before updating the list.
10556 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
10557 * function returns true if the entry has not been updated for some period of
10558 * time. We differentiate the NS or glue address case and the others since
10559 * experiments have shown that the former tends to be accessed relatively
10560 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
10561 * may cause external queries at a higher level zone, involving more
10562 * transactions).
10563 *
10564 * Caller must hold the node (read or write) lock.
10565 */
10566 static bool
need_headerupdate(rdatasetheader_t * header,isc_stdtime_t now)10567 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
10568 if (RDATASET_ATTR_GET(header, (RDATASET_ATTR_NONEXISTENT |
10569 RDATASET_ATTR_ANCIENT |
10570 RDATASET_ATTR_ZEROTTL)) != 0)
10571 {
10572 return (false);
10573 }
10574
10575 #if DNS_RBTDB_LIMITLRUUPDATE
10576 if (header->type == dns_rdatatype_ns ||
10577 (header->trust == dns_trust_glue &&
10578 (header->type == dns_rdatatype_a ||
10579 header->type == dns_rdatatype_aaaa)))
10580 {
10581 /*
10582 * Glue records are updated if at least DNS_RBTDB_LRUUPDATE_GLUE
10583 * seconds have passed since the previous update time.
10584 */
10585 return (header->last_used + DNS_RBTDB_LRUUPDATE_GLUE <= now);
10586 }
10587
10588 /*
10589 * Other records are updated if DNS_RBTDB_LRUUPDATE_REGULAR seconds
10590 * have passed.
10591 */
10592 return (header->last_used + DNS_RBTDB_LRUUPDATE_REGULAR <= now);
10593 #else
10594 UNUSED(now);
10595
10596 return (true);
10597 #endif /* if DNS_RBTDB_LIMITLRUUPDATE */
10598 }
10599
10600 /*%
10601 * Update the timestamp of a given cache entry and move it to the head
10602 * of the corresponding LRU list.
10603 *
10604 * Caller must hold the node (write) lock.
10605 *
10606 * Note that the we do NOT touch the heap here, as the TTL has not changed.
10607 */
10608 static void
update_header(dns_rbtdb_t * rbtdb,rdatasetheader_t * header,isc_stdtime_t now)10609 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now) {
10610 INSIST(IS_CACHE(rbtdb));
10611
10612 /* To be checked: can we really assume this? XXXMLG */
10613 INSIST(ISC_LINK_LINKED(header, link));
10614
10615 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
10616 header->last_used = now;
10617 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
10618 }
10619
10620 static size_t
expire_lru_headers(dns_rbtdb_t * rbtdb,unsigned int locknum,size_t purgesize,bool tree_locked)10621 expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, size_t purgesize,
10622 bool tree_locked) {
10623 rdatasetheader_t *header, *header_prev;
10624 size_t purged = 0;
10625
10626 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
10627 header != NULL && purged <= purgesize; header = header_prev)
10628 {
10629 header_prev = ISC_LIST_PREV(header, link);
10630 /*
10631 * Unlink the entry at this point to avoid checking it
10632 * again even if it's currently used someone else and
10633 * cannot be purged at this moment. This entry won't be
10634 * referenced any more (so unlinking is safe) since the
10635 * TTL was reset to 0.
10636 */
10637 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header, link);
10638 size_t header_size = rdataset_size(header);
10639 expire_header(rbtdb, header, tree_locked, expire_lru);
10640 purged += header_size;
10641 }
10642
10643 return (purged);
10644 }
10645
10646 /*%
10647 * Purge some stale (i.e. unused for some period - LRU based cleaning) cache
10648 * entries under the overmem condition. To recover from this condition quickly,
10649 * we cleanup entries up to the size of newly added rdata (passed as purgesize).
10650 *
10651 * This process is triggered while adding a new entry, and we specifically avoid
10652 * purging entries in the same LRU bucket as the one to which the new entry will
10653 * belong. Otherwise, we might purge entries of the same name of different RR
10654 * types while adding RRsets from a single response (consider the case where
10655 * we're adding A and AAAA glue records of the same NS name).
10656 */
10657 static void
overmem_purge(dns_rbtdb_t * rbtdb,unsigned int locknum_start,size_t purgesize,bool tree_locked)10658 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize,
10659 bool tree_locked) {
10660 unsigned int locknum;
10661 size_t purged = 0;
10662
10663 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
10664 locknum != locknum_start && purged <= purgesize;
10665 locknum = (locknum + 1) % rbtdb->node_lock_count)
10666 {
10667 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
10668 isc_rwlocktype_write);
10669
10670 purged += expire_lru_headers(rbtdb, locknum, purgesize - purged,
10671 tree_locked);
10672
10673 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
10674 isc_rwlocktype_write);
10675 }
10676 }
10677
10678 static void
expire_header(dns_rbtdb_t * rbtdb,rdatasetheader_t * header,bool tree_locked,expire_t reason)10679 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked,
10680 expire_t reason) {
10681 set_ttl(rbtdb, header, 0);
10682 mark_header_ancient(rbtdb, header);
10683
10684 /*
10685 * Caller must hold the node (write) lock.
10686 */
10687
10688 if (isc_refcount_current(&header->node->references) == 0) {
10689 /*
10690 * If no one else is using the node, we can clean it up now.
10691 * We first need to gain a new reference to the node to meet a
10692 * requirement of decrement_reference().
10693 */
10694 new_reference(rbtdb, header->node, isc_rwlocktype_write);
10695 decrement_reference(rbtdb, header->node, 0,
10696 isc_rwlocktype_write,
10697 tree_locked ? isc_rwlocktype_write
10698 : isc_rwlocktype_none,
10699 false);
10700
10701 if (rbtdb->cachestats == NULL) {
10702 return;
10703 }
10704
10705 switch (reason) {
10706 case expire_ttl:
10707 isc_stats_increment(rbtdb->cachestats,
10708 dns_cachestatscounter_deletettl);
10709 break;
10710 case expire_lru:
10711 isc_stats_increment(rbtdb->cachestats,
10712 dns_cachestatscounter_deletelru);
10713 break;
10714 default:
10715 break;
10716 }
10717 }
10718 }
10719