xref: /netbsd-src/external/mpl/bind/dist/lib/dns/qp_p.h (revision bcda20f65a8566e103791ec395f7f499ef322704)
1*bcda20f6Schristos /*	$NetBSD: qp_p.h,v 1.2 2025/01/26 16:25:24 christos Exp $	*/
29689912eSchristos 
39689912eSchristos /*
49689912eSchristos  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
59689912eSchristos  *
69689912eSchristos  * SPDX-License-Identifier: MPL-2.0
79689912eSchristos  *
89689912eSchristos  * This Source Code Form is subject to the terms of the Mozilla Public
99689912eSchristos  * License, v. 2.0. If a copy of the MPL was not distributed with this
109689912eSchristos  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
119689912eSchristos  *
129689912eSchristos  * See the COPYRIGHT file distributed with this work for additional
139689912eSchristos  * information regarding copyright ownership.
149689912eSchristos  */
159689912eSchristos 
169689912eSchristos /*
179689912eSchristos  * For an overview, see doc/design/qp-trie.md
189689912eSchristos  *
199689912eSchristos  * This private header defines the internal data structures,
209689912eSchristos  */
219689912eSchristos 
229689912eSchristos #pragma once
239689912eSchristos 
249689912eSchristos /***********************************************************************
259689912eSchristos  *
269689912eSchristos  *  interior node basics
279689912eSchristos  */
289689912eSchristos 
299689912eSchristos /*
309689912eSchristos  * A qp-trie node is almost always one of two types: branch or leaf.
319689912eSchristos  * (A third type is used only to anchor the root of a trie; see below.)
329689912eSchristos  *
339689912eSchristos  * A node contains a 64-bit word and a 32-bit word. In order to avoid
349689912eSchristos  * unwanted padding, they are declared as three 32-bit words; this keeps
359689912eSchristos  * the size down to 12 bytes. They are in native endian order, so getting
369689912eSchristos  * the 64-bit part should compile down to an unaligned load.
379689912eSchristos  *
389689912eSchristos  * The node type is identified by the least significant bits of the 64-bit
399689912eSchristos  * word.
409689912eSchristos  *
419689912eSchristos  * In a leaf node:
429689912eSchristos  * - The 64-bit word is used to store a pointer value. (Pointers must be
439689912eSchristos  *   word-aligned so the least significant bits are zero; those bits can
449689912eSchristos  *   then act as a node tag to indicate that this is a leaf. This
459689912eSchristos  *   requirement is enforced by the make_leaf() constructor.)
469689912eSchristos  * - The 32-bit word is used to store an integer value.  Both the
479689912eSchristos  *   pointer and integer values can be retrieved when looking up a key.
489689912eSchristos  *
499689912eSchristos  * In a branch node:
509689912eSchristos  * - The 64-bit word is subdivided into three portions: the least
519689912eSchristos  *   significant bits are the node type (for a branch, 0x1); the
529689912eSchristos  *   most sigificant 15 bits are an offset value into the key, and
539689912eSchristos  *   the 47 bits in the middle are a bitmap; see the documentation
549689912eSchristos  *   for the SHIFT_* enum below.
559689912eSchristos  * - The 32-bit word is a reference (dns_qpref_t) to the packed sparse
569689912eSchristos  *   vector of "twigs", i.e. child nodes. A branch node has at least
579689912eSchristos  *   two and at most 47 twigs. (The qp-trie update functions ensure that
589689912eSchristos  *   branches actually branch, i.e. a branch cannot have only one child.)
599689912eSchristos  *
609689912eSchristos  * A third node type, reader nodes, anchors the root of a trie.
619689912eSchristos  * A pair of reader nodes together contain a packed `dns_qpreader_t`.
629689912eSchristos  * See the section on "packed reader nodes" for details.
639689912eSchristos  */
649689912eSchristos struct dns_qpnode {
659689912eSchristos #if WORDS_BIGENDIAN
669689912eSchristos 	uint32_t bighi, biglo, small;
679689912eSchristos #else
689689912eSchristos 	uint32_t biglo, bighi, small;
699689912eSchristos #endif
709689912eSchristos };
719689912eSchristos 
729689912eSchristos /*
739689912eSchristos  * The possible values of the node type tag. Type tags must fit in two bits
749689912eSchristos  * for compatibility with 4-byte pointer alignment on 32-bit systems.
759689912eSchristos  */
769689912eSchristos enum {
779689912eSchristos 	LEAF_TAG = 0,	/* leaf node */
789689912eSchristos 	BRANCH_TAG = 1, /* branch node */
799689912eSchristos 	READER_TAG = 2, /* reader node */
809689912eSchristos 	TAG_MASK = 3,	/* mask covering tag bits */
819689912eSchristos };
829689912eSchristos 
839689912eSchristos /*
849689912eSchristos  * This code does not work on CPUs with large pointers, e.g. CHERI capability
859689912eSchristos  * architectures. When porting to that kind of machine, a `dns_qpnode` should
869689912eSchristos  * be just a `uintptr_t`; a leaf node will contain a single pointer, and a
879689912eSchristos  * branch node will fit in the same space with room to spare.
889689912eSchristos  */
899689912eSchristos STATIC_ASSERT(sizeof(void *) <= sizeof(uint64_t),
909689912eSchristos 	      "pointers must fit in 64 bits");
919689912eSchristos 
929689912eSchristos /*
939689912eSchristos  * The 64-bit word in a branch node is comprised of a node type tag, a
949689912eSchristos  * bitmap, and an offset into the key. It is called an "index word" because
959689912eSchristos  * it describes how to access the twigs vector (think "database index").
969689912eSchristos  * The following enum sets up the bit positions of these parts.
979689912eSchristos  *
989689912eSchristos  * The bitmap is just above the type tag. The `dns_qp_bits_for_byte[]` table
999689912eSchristos  * is used to fill in a key so that bit tests can work directly against the
1009689912eSchristos  * index word without superfluous masking or shifting; we don't need to
1019689912eSchristos  * mask out the bitmap before testing a bit, but we do need to mask the
1029689912eSchristos  * bitmap before calling popcount.
1039689912eSchristos  *
1049689912eSchristos  * The byte offset into the key is at the top of the word, so that it
1059689912eSchristos  * can be extracted with just a shift, with no masking needed.
1069689912eSchristos  *
1079689912eSchristos  * The names are SHIFT_thing because they are dns_qpshift_t values. (See
1089689912eSchristos  * below for the various `qp_*` type declarations.)
1099689912eSchristos  *
1109689912eSchristos  * These values are relatively fixed in practice: SHIFT_NOBYTE needs
1119689912eSchristos  * to leave space for the type tag, and the implementation of
1129689912eSchristos  * `dns_qpkey_fromname()` depends on the bitmap being large enough.
1139689912eSchristos  * The symbolic names avoid mystery numbers in the code.
1149689912eSchristos  */
1159689912eSchristos enum {
1169689912eSchristos 	SHIFT_NOBYTE = 2,  /* label separator has no byte value */
1179689912eSchristos 	SHIFT_BITMAP,	   /* many bits here */
1189689912eSchristos 	SHIFT_OFFSET = 49, /* offset of byte in key */
1199689912eSchristos };
1209689912eSchristos 
1219689912eSchristos /***********************************************************************
1229689912eSchristos  *
1239689912eSchristos  *  garbage collector tuning parameters
1249689912eSchristos  */
1259689912eSchristos 
1269689912eSchristos /*
1279689912eSchristos  * A "cell" is a location that can contain a `dns_qpnode_t`, and a "chunk"
1289689912eSchristos  * is a moderately large array of cells. A big trie can occupy
1299689912eSchristos  * multiple chunks. (Unlike other nodes, a trie's root node lives in
1309689912eSchristos  * its `struct dns_qp` instead of being allocated in a cell.)
1319689912eSchristos  *
1329689912eSchristos  * The qp-trie allocator hands out space for twigs vectors. Allocations are
1339689912eSchristos  * made sequentially from one of the chunks; this kind of "sequential
1349689912eSchristos  * allocator" is also known as a "bump allocator", so in `struct dns_qp`
1359689912eSchristos  * (see below) the allocation chunk is called `bump`.
1369689912eSchristos  */
1379689912eSchristos 
1389689912eSchristos /*
1399689912eSchristos  * Number of cells in a chunk is a power of 2, which must have space for
1409689912eSchristos  * a full twigs vector (48 wide). When testing, use a much smaller chunk
1419689912eSchristos  * size to make the allocator work harder.
1429689912eSchristos  */
1439689912eSchristos #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1449689912eSchristos #define QP_CHUNK_LOG 7
1459689912eSchristos #else
1469689912eSchristos #define QP_CHUNK_LOG 10
1479689912eSchristos #endif
1489689912eSchristos 
1499689912eSchristos STATIC_ASSERT(6 <= QP_CHUNK_LOG && QP_CHUNK_LOG <= 20,
1509689912eSchristos 	      "qp-trie chunk size is unreasonable");
1519689912eSchristos 
1529689912eSchristos #define QP_CHUNK_SIZE  (1U << QP_CHUNK_LOG)
1539689912eSchristos #define QP_CHUNK_BYTES (QP_CHUNK_SIZE * sizeof(dns_qpnode_t))
1549689912eSchristos 
1559689912eSchristos /*
1569689912eSchristos  * We need a bitfield this big to count how much of a chunk is in use:
1579689912eSchristos  * it needs to count from 0 up to and including `1 << QP_CHUNK_LOG`.
1589689912eSchristos  */
1599689912eSchristos #define QP_USAGE_BITS (QP_CHUNK_LOG + 1)
1609689912eSchristos 
1619689912eSchristos /*
1629689912eSchristos  * A chunk needs to be compacted if it is less full than this threshold.
1639689912eSchristos  * (12% overhead seems reasonable)
1649689912eSchristos  */
1659689912eSchristos #define QP_MAX_FREE (QP_CHUNK_SIZE / 8)
1669689912eSchristos #define QP_MIN_USED (QP_CHUNK_SIZE - QP_MAX_FREE)
1679689912eSchristos 
1689689912eSchristos /*
1699689912eSchristos  * Compact automatically when we pass this threshold: when there is a lot
1709689912eSchristos  * of free space in absolute terms, and when we have freed more than half
1719689912eSchristos  * of the space we allocated.
1729689912eSchristos  *
1739689912eSchristos  * The current compaction algorithm scans the whole trie, so it is important
1749689912eSchristos  * to scale the threshold based on the size of the trie to avoid quadratic
1759689912eSchristos  * behaviour. XXXFANF find an algorithm that scans less of the trie!
1769689912eSchristos  *
1779689912eSchristos  * During a modification transaction, when we copy-on-write some twigs we
1789689912eSchristos  * count the old copy as "free", because they will be when the transaction
1799689912eSchristos  * commits. But they cannot be recovered immediately so they are also
1809689912eSchristos  * counted as on hold, and discounted when we decide whether to compact.
1819689912eSchristos  */
1829689912eSchristos #define QP_GC_HEURISTIC(qp, free) \
1839689912eSchristos 	((free) > QP_CHUNK_SIZE * 4 && (free) > (qp)->used_count / 2)
1849689912eSchristos 
1859689912eSchristos #define QP_NEEDGC(qp) QP_GC_HEURISTIC(qp, (qp)->free_count)
1869689912eSchristos #define QP_AUTOGC(qp) QP_GC_HEURISTIC(qp, (qp)->free_count - (qp)->hold_count)
1879689912eSchristos 
1889689912eSchristos /*
1899689912eSchristos  * The chunk base and usage arrays are resized geometically and start off
1909689912eSchristos  * with two entries.
1919689912eSchristos  */
1929689912eSchristos #define GROWTH_FACTOR(size) ((size) + (size) / 2 + 2)
1939689912eSchristos 
1949689912eSchristos /*
1959689912eSchristos  * Constructors and accessors for dns_qpref_t values, defined here to show
1969689912eSchristos  * how the dns_qpref_t, dns_qpchunk_t, dns_qpcell_t types relate to each other
1979689912eSchristos  */
1989689912eSchristos 
1999689912eSchristos static inline dns_qpref_t
2009689912eSchristos make_ref(dns_qpchunk_t chunk, dns_qpcell_t cell) {
2019689912eSchristos 	return QP_CHUNK_SIZE * chunk + cell;
2029689912eSchristos }
2039689912eSchristos 
2049689912eSchristos static inline dns_qpchunk_t
2059689912eSchristos ref_chunk(dns_qpref_t ref) {
2069689912eSchristos 	return ref / QP_CHUNK_SIZE;
2079689912eSchristos }
2089689912eSchristos 
2099689912eSchristos static inline dns_qpcell_t
2109689912eSchristos ref_cell(dns_qpref_t ref) {
2119689912eSchristos 	return ref % QP_CHUNK_SIZE;
2129689912eSchristos }
2139689912eSchristos 
2149689912eSchristos /*
2159689912eSchristos  * We should not use the `root_ref` in an empty trie, so we set it
2169689912eSchristos  * to a value that should trigger an obvious bug. See qp_init()
2179689912eSchristos  * and get_root() below.
2189689912eSchristos  */
2199689912eSchristos #define INVALID_REF ((dns_qpref_t)~0UL)
2209689912eSchristos 
2219689912eSchristos /***********************************************************************
2229689912eSchristos  *
2239689912eSchristos  *  chunk arrays
2249689912eSchristos  */
2259689912eSchristos 
2269689912eSchristos /*
2279689912eSchristos  * A `dns_qp_t` contains two arrays holding information about each chunk.
2289689912eSchristos  *
2299689912eSchristos  * The `base` array holds pointers to the base of each chunk.
2309689912eSchristos  * The `usage` array hold the allocator's state for each chunk.
2319689912eSchristos  *
2329689912eSchristos  * The `base` array is used by the hot qp-trie traversal paths. It can
2339689912eSchristos  * be shared by multiple versions of a trie, which are tracked with a
2349689912eSchristos  * refcount. Old versions of the trie can retain old versions of the
2359689912eSchristos  * `base` array.
2369689912eSchristos  *
2379689912eSchristos  * In multithreaded code, the `usage` array is only used when the
2389689912eSchristos  * `dns_qpmulti_t` mutex is held, and there is only one version of
2399689912eSchristos  * it in active use (maybe with a snapshot for rollback support).
2409689912eSchristos  *
2419689912eSchristos  * The two arrays are separate because they have rather different
2429689912eSchristos  * access patterns, different lifetimes, and different element sizes.
2439689912eSchristos  */
2449689912eSchristos 
2459689912eSchristos /*
2469689912eSchristos  * For most purposes we don't need to know exactly which cells are
2479689912eSchristos  * in use in a chunk, we only need to know how many of them there are.
2489689912eSchristos  *
2499689912eSchristos  * After we have finished allocating from a chunk, the `used` counter
2509689912eSchristos  * is the size we need to know for shrinking the chunk and for
2519689912eSchristos  * scanning it to detach leaf values before the chunk is free()d. The
2529689912eSchristos  * `free` counter tells us when the chunk needs compacting and when it
2539689912eSchristos  * has become empty.
2549689912eSchristos  *
2559689912eSchristos  * The `exists` flag allows the chunk scanning loops to look at the
2569689912eSchristos  * usage array only.
2579689912eSchristos  *
2589689912eSchristos  * In multithreaded code, we mark chunks as `immutable` when a modify
2599689912eSchristos  * transaction is opened. (We don't mark them immutable on commit,
2609689912eSchristos  * because the old bump chunk must remain mutable between write
2619689912eSchristos  * transactions, but it must become immutable when an update
2629689912eSchristos  * transaction is opened.)
2639689912eSchristos  *
2649689912eSchristos  * There are a few flags used to mark which chunks are still needed by
2659689912eSchristos  * snapshots after the chunks have passed their normal reclamation
2669689912eSchristos  * phase.
2679689912eSchristos  */
2689689912eSchristos typedef struct qp_usage {
2699689912eSchristos 	/*% the allocation point, increases monotonically */
2709689912eSchristos 	dns_qpcell_t used : QP_USAGE_BITS;
2719689912eSchristos 	/*% count of nodes no longer needed, also monotonic */
2729689912eSchristos 	dns_qpcell_t free : QP_USAGE_BITS;
2739689912eSchristos 	/*% qp->base->ptr[chunk] != NULL */
2749689912eSchristos 	bool exists : 1;
2759689912eSchristos 	/*% is this chunk shared? [MT] */
2769689912eSchristos 	bool immutable : 1;
2779689912eSchristos 	/*% already subtracted from multi->*_count [MT] */
2789689912eSchristos 	bool discounted : 1;
2799689912eSchristos 	/*% is a snapshot using this chunk? [MT] */
2809689912eSchristos 	bool snapshot : 1;
2819689912eSchristos 	/*% tried to free it but a snapshot needs it [MT] */
2829689912eSchristos 	bool snapfree : 1;
2839689912eSchristos 	/*% for mark/sweep snapshot flag updates [MT] */
2849689912eSchristos 	bool snapmark : 1;
2859689912eSchristos } qp_usage_t;
2869689912eSchristos 
2879689912eSchristos /*
2889689912eSchristos  * The chunks are owned by the current version of the `base` array.
2899689912eSchristos  * When the array is resized, the old version might still be in use by
2909689912eSchristos  * concurrent readers, in which case it is free()d later when its
2919689912eSchristos  * refcount drops to zero.
2929689912eSchristos  *
2939689912eSchristos  * A `dns_qpbase_t` counts references from `dns_qp_t` objects and
2949689912eSchristos  * from packed readers, but not from `dns_qpread_t` nor from
2959689912eSchristos  * `dns_qpsnap_t` objects. Refcount adjustments for `dns_qpread_t`
2969689912eSchristos  * would wreck multicore scalability; instead we rely on RCU.
2979689912eSchristos  *
2989689912eSchristos  * The `usage` array determines when a chunk is no longer needed: old
2999689912eSchristos  * chunk pointers in old `base` arrays are ignored. (They can become
3009689912eSchristos  * dangling pointers to free memory, but they will never be
3019689912eSchristos  * dereferenced.)
3029689912eSchristos  *
3039689912eSchristos  * We ensure that individual chunk base pointers remain immutable
3049689912eSchristos  * after assignment, and they are not cleared until the chunk is
3059689912eSchristos  * free()d, after all readers have departed. Slots can be reused, and
3069689912eSchristos  * we allow transactions to fill or re-fill empty slots adjacent to
3079689912eSchristos  * busy slots that are in use by readers.
3089689912eSchristos  */
3099689912eSchristos struct dns_qpbase {
3109689912eSchristos 	unsigned int magic;
3119689912eSchristos 	isc_refcount_t refcount;
3129689912eSchristos 	dns_qpnode_t *ptr[];
3139689912eSchristos };
3149689912eSchristos 
3159689912eSchristos /*
3169689912eSchristos  * Chunks that may be in use by readers are reclaimed asynchronously.
3179689912eSchristos  * When a transaction commits, immutable chunks that are now empty are
3189689912eSchristos  * listed in a `qp_rcuctx_t` structure and passed to `call_rcu()`.
3199689912eSchristos  */
3209689912eSchristos typedef struct qp_rcuctx {
3219689912eSchristos 	unsigned int magic;
3229689912eSchristos 	struct rcu_head rcu_head;
3239689912eSchristos 	isc_mem_t *mctx;
3249689912eSchristos 	dns_qpmulti_t *multi;
3259689912eSchristos 	dns_qpchunk_t count;
3269689912eSchristos 	dns_qpchunk_t chunk[];
3279689912eSchristos } qp_rcuctx_t;
3289689912eSchristos 
3299689912eSchristos /*
3309689912eSchristos  * Returns true when the base array can be free()d.
3319689912eSchristos  */
3329689912eSchristos static inline bool
3339689912eSchristos qpbase_unref(dns_qpreadable_t qpr) {
3349689912eSchristos 	dns_qpreader_t *qp = dns_qpreader(qpr);
3359689912eSchristos 	return qp->base != NULL &&
3369689912eSchristos 	       isc_refcount_decrement(&qp->base->refcount) == 1;
3379689912eSchristos }
3389689912eSchristos 
3399689912eSchristos /*
3409689912eSchristos  * Now we know about `dns_qpreader_t` and `dns_qpbase_t`,
3419689912eSchristos  * here's how we convert a twig reference into a pointer.
3429689912eSchristos  */
3439689912eSchristos static inline dns_qpnode_t *
3449689912eSchristos ref_ptr(dns_qpreadable_t qpr, dns_qpref_t ref) {
3459689912eSchristos 	dns_qpreader_t *qp = dns_qpreader(qpr);
3469689912eSchristos 	return qp->base->ptr[ref_chunk(ref)] + ref_cell(ref);
3479689912eSchristos }
3489689912eSchristos 
3499689912eSchristos /***********************************************************************
3509689912eSchristos  *
3519689912eSchristos  *  main qp-trie structures
3529689912eSchristos  */
3539689912eSchristos 
3549689912eSchristos #define QP_MAGIC       ISC_MAGIC('t', 'r', 'i', 'e')
3559689912eSchristos #define QPITER_MAGIC   ISC_MAGIC('q', 'p', 'i', 't')
3569689912eSchristos #define QPCHAIN_MAGIC  ISC_MAGIC('q', 'p', 'c', 'h')
3579689912eSchristos #define QPMULTI_MAGIC  ISC_MAGIC('q', 'p', 'm', 'v')
3589689912eSchristos #define QPREADER_MAGIC ISC_MAGIC('q', 'p', 'r', 'x')
3599689912eSchristos #define QPBASE_MAGIC   ISC_MAGIC('q', 'p', 'b', 'p')
3609689912eSchristos #define QPRCU_MAGIC    ISC_MAGIC('q', 'p', 'c', 'b')
3619689912eSchristos 
3629689912eSchristos #define QP_VALID(qp)	  ISC_MAGIC_VALID(qp, QP_MAGIC)
3639689912eSchristos #define QPITER_VALID(qp)  ISC_MAGIC_VALID(qp, QPITER_MAGIC)
3649689912eSchristos #define QPCHAIN_VALID(qp) ISC_MAGIC_VALID(qp, QPCHAIN_MAGIC)
3659689912eSchristos #define QPMULTI_VALID(qp) ISC_MAGIC_VALID(qp, QPMULTI_MAGIC)
3669689912eSchristos #define QPBASE_VALID(qp)  ISC_MAGIC_VALID(qp, QPBASE_MAGIC)
3679689912eSchristos #define QPRCU_VALID(qp)	  ISC_MAGIC_VALID(qp, QPRCU_MAGIC)
3689689912eSchristos 
3699689912eSchristos /*
3709689912eSchristos  * Polymorphic initialization of the `dns_qpreader_t` prefix.
3719689912eSchristos  *
3729689912eSchristos  * The location of the root node is actually a dns_qpref_t, but is
3739689912eSchristos  * declared in DNS_QPREADER_FIELDS as uint32_t to avoid leaking too
3749689912eSchristos  * many internal details into the public API.
3759689912eSchristos  *
3769689912eSchristos  * The `uctx` and `methods` support callbacks into the user's code.
3779689912eSchristos  * They are constant after initialization.
3789689912eSchristos  */
3799689912eSchristos #define QP_INIT(qp, m, x)                 \
3809689912eSchristos 	(*(qp) = (typeof(*(qp))){         \
3819689912eSchristos 		 .magic = QP_MAGIC,       \
3829689912eSchristos 		 .root_ref = INVALID_REF, \
3839689912eSchristos 		 .uctx = x,               \
3849689912eSchristos 		 .methods = m,            \
3859689912eSchristos 	 })
3869689912eSchristos 
3879689912eSchristos /*
3889689912eSchristos  * Snapshots have some extra cleanup machinery.
3899689912eSchristos  *
3909689912eSchristos  * Originally, a snapshot was basically just a `dns_qpread_t`
3919689912eSchristos  * allocated on the heap, with the extra behaviour that memory
3929689912eSchristos  * reclamation is suppressed for a particular trie while it has any
3939689912eSchristos  * snapshots. However that design gets into trouble for a zone with
3949689912eSchristos  * frequent updates and many zone transfers.
3959689912eSchristos  *
3969689912eSchristos  * Instead, each snapshot records which chunks it needs. When a
3979689912eSchristos  * snapshot is created, it makes a copy of the `base` array, except
3989689912eSchristos  * for chunks that are empty and waiting to be reclaimed. When a
3999689912eSchristos  * snapshot is destroyed, we can traverse the list of snapshots to
4009689912eSchristos  * accurately mark which chunks are still needed.
4019689912eSchristos  *
4029689912eSchristos  * A snapshot's `whence` pointer helps ensure that a `dns_qpsnap_t`is
4039689912eSchristos  * not muddled up with the wrong `dns_qpmulti_t`.
4049689912eSchristos  *
4059689912eSchristos  * A trie's `base` array might have grown after the snapshot was
4069689912eSchristos  * created, so it records its own `chunk_max`.
4079689912eSchristos  */
4089689912eSchristos struct dns_qpsnap {
4099689912eSchristos 	DNS_QPREADER_FIELDS;
4109689912eSchristos 	dns_qpmulti_t *whence;
4119689912eSchristos 	uint32_t chunk_max;
4129689912eSchristos 	ISC_LINK(struct dns_qpsnap) link;
4139689912eSchristos };
4149689912eSchristos 
4159689912eSchristos /*
4169689912eSchristos  * Read-write access to a qp-trie requires extra fields to support the
4179689912eSchristos  * allocator and garbage collector.
4189689912eSchristos  *
4199689912eSchristos  * Bare instances of a `struct dns_qp` are used for stand-alone
4209689912eSchristos  * single-threaded tries. For multithreaded access, a `dns_qpmulti_t`
4219689912eSchristos  * wraps a `dns_qp_t` with a mutex and other fields that are only needed
4229689912eSchristos  * at the start or end of a transaction.
4239689912eSchristos  *
4249689912eSchristos  * Allocations are made sequentially in the `bump` chunk. A sequence
4259689912eSchristos  * of lightweight write transactions can use the same `bump` chunk, so
4269689912eSchristos  * its prefix before `fender` is immutable, and the rest is mutable.
4279689912eSchristos  *
4289689912eSchristos  * To decide when to compact and reclaim space, QP_MAX_GARBAGE() examines
4299689912eSchristos  * the values of `used_count`, `free_count`, and `hold_count`. The
4309689912eSchristos  * `hold_count` tracks nodes that need to be retained while readers are
4319689912eSchristos  * using them; they are free but cannot be reclaimed until the transaction
4329689912eSchristos  * has committed, so the `hold_count` is discounted from QP_MAX_GARBAGE()
4339689912eSchristos  * during a transaction.
4349689912eSchristos  *
4359689912eSchristos  * There are some flags that alter the behaviour of write transactions.
4369689912eSchristos  *
4379689912eSchristos  *  - The `transaction_mode` indicates whether the current transaction is a
4389689912eSchristos  *    light write or a heavy update, or (between transactions) the previous
4399689912eSchristos  *    transaction's mode, because the setup for the next transaction
4409689912eSchristos  *    depends on how the previous one committed. The mode is set at the
4419689912eSchristos  *    start of each transaction. It is QP_NONE in a single-threaded qp-trie
4429689912eSchristos  *    to detect if part of a `dns_qpmulti_t` is passed to dns_qp_destroy().
4439689912eSchristos  *
4449689912eSchristos  *  - The `compact_all` flag is used when every node in the trie should be
4459689912eSchristos  *    copied. (Usually compation aims to avoid moving nodes out of
4469689912eSchristos  *    unfragmented chunks.) It is used when compaction is explicitly
4479689912eSchristos  *    requested via `dns_qp_compact()`, and as an emergency mechanism if
4489689912eSchristos  *    normal compaction failed to clear the QP_MAX_GARBAGE() condition.
4499689912eSchristos  *    (This emergency is a bug even tho we have a rescue mechanism.)
4509689912eSchristos  *
4519689912eSchristos  *  - When a qp-trie is destroyed while it has pending cleanup work, its
4529689912eSchristos  *    `destroy` flag is set so that it is destroyed by the reclaim worker.
4539689912eSchristos  *    (Because items cannot be removed from the middle of the cleanup list.)
4549689912eSchristos  *
4559689912eSchristos  *  - When built with fuzzing support, we can use mprotect() and munmap()
4569689912eSchristos  *    to ensure that incorrect memory accesses cause fatal errors. The
4579689912eSchristos  *    `write_protect` flag must be set straight after the `dns_qpmulti_t`
4589689912eSchristos  *    is created, then left unchanged.
4599689912eSchristos  *
4609689912eSchristos  * Some of the dns_qp_t fields are only needed for multithreaded transactions
4619689912eSchristos  * (marked [MT] below) but the same code paths are also used for single-
4629689912eSchristos  * threaded writes.
4639689912eSchristos  */
4649689912eSchristos struct dns_qp {
4659689912eSchristos 	DNS_QPREADER_FIELDS;
4669689912eSchristos 	/*% memory context (const) */
4679689912eSchristos 	isc_mem_t *mctx;
4689689912eSchristos 	/*% array of per-chunk allocation counters */
4699689912eSchristos 	qp_usage_t *usage;
4709689912eSchristos 	/*% number of slots in `chunk` and `usage` arrays */
4719689912eSchristos 	dns_qpchunk_t chunk_max;
4729689912eSchristos 	/*% which chunk is used for allocations */
4739689912eSchristos 	dns_qpchunk_t bump;
4749689912eSchristos 	/*% nodes in the `bump` chunk below `fender` are read only [MT] */
4759689912eSchristos 	dns_qpcell_t fender;
4769689912eSchristos 	/*% number of leaf nodes */
4779689912eSchristos 	dns_qpcell_t leaf_count;
4789689912eSchristos 	/*% total of all usage[] counters */
4799689912eSchristos 	dns_qpcell_t used_count, free_count;
4809689912eSchristos 	/*% free cells that cannot be recovered right now */
4819689912eSchristos 	dns_qpcell_t hold_count;
4829689912eSchristos 	/*% what kind of transaction was most recently started [MT] */
4839689912eSchristos 	enum { QP_NONE, QP_WRITE, QP_UPDATE } transaction_mode : 2;
4849689912eSchristos 	/*% compact the entire trie [MT] */
4859689912eSchristos 	bool compact_all : 1;
4869689912eSchristos 	/*% optionally when compiled with fuzzing support [MT] */
4879689912eSchristos 	bool write_protect : 1;
4889689912eSchristos };
4899689912eSchristos 
4909689912eSchristos /*
4919689912eSchristos  * Concurrent access to a qp-trie.
4929689912eSchristos  *
4939689912eSchristos  * The `reader` pointer provides wait-free access to the current version
4949689912eSchristos  * of the trie. See the "packed reader nodes" section below for a
4959689912eSchristos  * description of what it points to.
4969689912eSchristos  *
4979689912eSchristos  * The main object under the protection of the mutex is the `writer`
4989689912eSchristos  * containing all the allocator state. There can be a backup copy when
4999689912eSchristos  * we want to be able to rollback an update transaction.
5009689912eSchristos  *
5019689912eSchristos  * There is a `reader_ref` which corresponds to the `reader` pointer
5029689912eSchristos  * (`ref_ptr(multi->reader_ref) == multi->reader`). The `reader_ref` is
5039689912eSchristos  * necessary when freeing the space used by the reader, because there
5049689912eSchristos  * isn't a good way to recover a dns_qpref_t from a dns_qpnode_t pointer.
5059689912eSchristos  *
5069689912eSchristos  * There is a per-trie list of snapshots that is used for reclaiming
5079689912eSchristos  * memory when a snapshot is destroyed.
5089689912eSchristos  *
5099689912eSchristos  * Finally, we maintain a global list of `dns_qpmulti_t` objects that
5109689912eSchristos  * need asynchronous safe memory recovery.
5119689912eSchristos  */
5129689912eSchristos struct dns_qpmulti {
5139689912eSchristos 	uint32_t magic;
5149689912eSchristos 	/*% RCU-protected pointer to current packed reader */
5159689912eSchristos 	dns_qpnode_t *reader;
5169689912eSchristos 	/*% the mutex protects the rest of this structure */
5179689912eSchristos 	isc_mutex_t mutex;
5189689912eSchristos 	/*% ref_ptr(writer, reader_ref) == reader */
5199689912eSchristos 	dns_qpref_t reader_ref;
5209689912eSchristos 	/*% the main working structure */
5219689912eSchristos 	dns_qp_t writer;
5229689912eSchristos 	/*% saved allocator state to support rollback */
5239689912eSchristos 	dns_qp_t *rollback;
5249689912eSchristos 	/*% all snapshots of this trie */
5259689912eSchristos 	ISC_LIST(dns_qpsnap_t) snapshots;
5269689912eSchristos };
5279689912eSchristos 
5289689912eSchristos /***********************************************************************
5299689912eSchristos  *
5309689912eSchristos  *  interior node constructors and accessors
5319689912eSchristos  */
5329689912eSchristos 
5339689912eSchristos /*
5349689912eSchristos  * See the comments under "interior node basics" above, which explain
5359689912eSchristos  * the layout of nodes as implemented by the following functions.
5369689912eSchristos  *
5379689912eSchristos  * These functions are (mostly) constructors and getters. Imagine how
5389689912eSchristos  * much less code there would be if C had sum types with control over
5399689912eSchristos  * the layout...
5409689912eSchristos  */
5419689912eSchristos 
5429689912eSchristos /*
5439689912eSchristos  * Get the 64-bit word of a node.
5449689912eSchristos  */
5459689912eSchristos static inline uint64_t
5469689912eSchristos node64(dns_qpnode_t *n) {
5479689912eSchristos 	uint64_t lo = n->biglo;
5489689912eSchristos 	uint64_t hi = n->bighi;
5499689912eSchristos 	return lo | (hi << 32);
5509689912eSchristos }
5519689912eSchristos 
5529689912eSchristos /*
5539689912eSchristos  * Get the 32-bit word of a node.
5549689912eSchristos  */
5559689912eSchristos static inline uint32_t
5569689912eSchristos node32(dns_qpnode_t *n) {
5579689912eSchristos 	return n->small;
5589689912eSchristos }
5599689912eSchristos 
5609689912eSchristos /*
5619689912eSchristos  * Create a node from its parts
5629689912eSchristos  */
5639689912eSchristos static inline dns_qpnode_t
5649689912eSchristos make_node(uint64_t big, uint32_t small) {
5659689912eSchristos 	return (dns_qpnode_t){
5669689912eSchristos 		.biglo = (uint32_t)(big),
5679689912eSchristos 		.bighi = (uint32_t)(big >> 32),
5689689912eSchristos 		.small = small,
5699689912eSchristos 	};
5709689912eSchristos }
5719689912eSchristos 
5729689912eSchristos /*
5739689912eSchristos  * Extract a pointer from a node's 64 bit word. The double cast is to avoid
5749689912eSchristos  * a warning about mismatched pointer/integer sizes on 32 bit systems.
5759689912eSchristos  */
5769689912eSchristos static inline void *
5779689912eSchristos node_pointer(dns_qpnode_t *n) {
5789689912eSchristos 	return (void *)(uintptr_t)(node64(n) & ~TAG_MASK);
5799689912eSchristos }
5809689912eSchristos 
5819689912eSchristos /*
5829689912eSchristos  * Examine a node's tag bits
5839689912eSchristos  */
5849689912eSchristos static inline uint32_t
5859689912eSchristos node_tag(dns_qpnode_t *n) {
5869689912eSchristos 	return n->biglo & TAG_MASK;
5879689912eSchristos }
5889689912eSchristos 
5899689912eSchristos /*
5909689912eSchristos  * simplified for the hot path
5919689912eSchristos  */
5929689912eSchristos static inline bool
5939689912eSchristos is_branch(dns_qpnode_t *n) {
5949689912eSchristos 	return n->biglo & BRANCH_TAG;
5959689912eSchristos }
5969689912eSchristos 
5979689912eSchristos /* leaf nodes *********************************************************/
5989689912eSchristos 
5999689912eSchristos /*
6009689912eSchristos  * Get a leaf's pointer value.
6019689912eSchristos  */
6029689912eSchristos static inline void *
6039689912eSchristos leaf_pval(dns_qpnode_t *n) {
6049689912eSchristos 	return node_pointer(n);
6059689912eSchristos }
6069689912eSchristos 
6079689912eSchristos /*
6089689912eSchristos  * Get a leaf's integer value
6099689912eSchristos  */
6109689912eSchristos static inline uint32_t
6119689912eSchristos leaf_ival(dns_qpnode_t *n) {
6129689912eSchristos 	return node32(n);
6139689912eSchristos }
6149689912eSchristos 
6159689912eSchristos /*
6169689912eSchristos  * Create a leaf node from its parts
6179689912eSchristos  */
6189689912eSchristos static inline dns_qpnode_t
6199689912eSchristos make_leaf(const void *pval, uint32_t ival) {
6209689912eSchristos 	dns_qpnode_t leaf = make_node((uintptr_t)pval, ival);
6219689912eSchristos 	REQUIRE(node_tag(&leaf) == LEAF_TAG);
6229689912eSchristos 	return leaf;
6239689912eSchristos }
6249689912eSchristos 
6259689912eSchristos /* branch nodes *******************************************************/
6269689912eSchristos 
6279689912eSchristos /*
6289689912eSchristos  * The following function names use plural `twigs` when they work on a
6299689912eSchristos  * branch's twigs vector as a whole, and singular `twig` when they work on
6309689912eSchristos  * a particular twig.
6319689912eSchristos  */
6329689912eSchristos 
6339689912eSchristos /*
6349689912eSchristos  * Get a branch node's index word
6359689912eSchristos  */
6369689912eSchristos static inline uint64_t
6379689912eSchristos branch_index(dns_qpnode_t *n) {
6389689912eSchristos 	return node64(n);
6399689912eSchristos }
6409689912eSchristos 
6419689912eSchristos /*
6429689912eSchristos  * Get a reference to a branch node's child twigs.
6439689912eSchristos  */
6449689912eSchristos static inline dns_qpref_t
6459689912eSchristos branch_twigs_ref(dns_qpnode_t *n) {
6469689912eSchristos 	return node32(n);
6479689912eSchristos }
6489689912eSchristos 
6499689912eSchristos /*
6509689912eSchristos  * Bit positions in the bitmap come directly from the key. DNS names are
6519689912eSchristos  * converted to keys using the tables declared at the end of this file.
6529689912eSchristos  */
6539689912eSchristos static inline dns_qpshift_t
6549689912eSchristos qpkey_bit(const dns_qpkey_t key, size_t len, size_t offset) {
6559689912eSchristos 	if (offset < len) {
6569689912eSchristos 		return key[offset];
6579689912eSchristos 	} else {
6589689912eSchristos 		return SHIFT_NOBYTE;
6599689912eSchristos 	}
6609689912eSchristos }
6619689912eSchristos 
6629689912eSchristos /*
6639689912eSchristos  * Extract a branch node's offset field, used to index the key.
6649689912eSchristos  */
6659689912eSchristos static inline size_t
6669689912eSchristos branch_key_offset(dns_qpnode_t *n) {
6679689912eSchristos 	return (size_t)(branch_index(n) >> SHIFT_OFFSET);
6689689912eSchristos }
6699689912eSchristos 
6709689912eSchristos /*
6719689912eSchristos  * Which bit identifies the twig of this node for this key?
6729689912eSchristos  */
6739689912eSchristos static inline dns_qpshift_t
6749689912eSchristos branch_keybit(dns_qpnode_t *n, const dns_qpkey_t key, size_t len) {
6759689912eSchristos 	return qpkey_bit(key, len, branch_key_offset(n));
6769689912eSchristos }
6779689912eSchristos 
6789689912eSchristos /*
6799689912eSchristos  * Get a pointer to a the first twig of a branch (this also functions
6809689912eSchristos  * as a pointer to the entire twig vector).
6819689912eSchristos  */
6829689912eSchristos static inline dns_qpnode_t *
6839689912eSchristos branch_twigs(dns_qpreadable_t qpr, dns_qpnode_t *n) {
6849689912eSchristos 	return ref_ptr(qpr, branch_twigs_ref(n));
6859689912eSchristos }
6869689912eSchristos 
6879689912eSchristos /*
6889689912eSchristos  * Warm up the cache while calculating which twig we want.
6899689912eSchristos  */
6909689912eSchristos static inline void
6919689912eSchristos prefetch_twigs(dns_qpreadable_t qpr, dns_qpnode_t *n) {
6929689912eSchristos 	__builtin_prefetch(ref_ptr(qpr, branch_twigs_ref(n)));
6939689912eSchristos }
6949689912eSchristos 
6959689912eSchristos /* root node **********************************************************/
6969689912eSchristos 
6979689912eSchristos /*
6989689912eSchristos  * Get a pointer to the root node, checking if the trie is empty.
6999689912eSchristos  */
7009689912eSchristos static inline dns_qpnode_t *
7019689912eSchristos get_root(dns_qpreadable_t qpr) {
7029689912eSchristos 	dns_qpreader_t *qp = dns_qpreader(qpr);
7039689912eSchristos 	if (qp->root_ref == INVALID_REF) {
7049689912eSchristos 		return NULL;
7059689912eSchristos 	} else {
7069689912eSchristos 		return ref_ptr(qp, qp->root_ref);
7079689912eSchristos 	}
7089689912eSchristos }
7099689912eSchristos 
7109689912eSchristos /*
7119689912eSchristos  * When we need to move the root node, we avoid repeating allocation
7129689912eSchristos  * logistics by making a temporary fake branch node that has
7139689912eSchristos  *	`branch_twigs_size() == 1 && branch_twigs_ref() == root_ref`
7149689912eSchristos  * just enough to treat the root node as a vector of one twig.
7159689912eSchristos  */
7169689912eSchristos #define MOVABLE_ROOT(qp)                                   \
7179689912eSchristos 	(&(dns_qpnode_t){                                  \
7189689912eSchristos 		.biglo = BRANCH_TAG | (1 << SHIFT_NOBYTE), \
7199689912eSchristos 		.small = qp->root_ref,                     \
7209689912eSchristos 	})
7219689912eSchristos 
7229689912eSchristos /***********************************************************************
7239689912eSchristos  *
7249689912eSchristos  *  bitmap popcount shenanigans
7259689912eSchristos  */
7269689912eSchristos 
7279689912eSchristos /*
7289689912eSchristos  * How many twigs appear in the vector before the one corresponding to the
7299689912eSchristos  * given bit? Calculated using popcount of part of the branch's bitmap.
7309689912eSchristos  *
7319689912eSchristos  * To calculate a mask that covers the lesser bits in the bitmap,
7329689912eSchristos  * we subtract 1 to set all lesser bits, and subtract the tag mask
7339689912eSchristos  * because the type tag is not part of the bitmap.
7349689912eSchristos  */
7359689912eSchristos static inline dns_qpweight_t
7369689912eSchristos branch_count_bitmap_before(dns_qpnode_t *n, dns_qpshift_t bit) {
7379689912eSchristos 	uint64_t mask = (1ULL << bit) - 1 - TAG_MASK;
7389689912eSchristos 	uint64_t bitmap = branch_index(n) & mask;
7399689912eSchristos 	return (dns_qpweight_t)__builtin_popcountll(bitmap);
7409689912eSchristos }
7419689912eSchristos 
7429689912eSchristos /*
7439689912eSchristos  * How many twigs does this branch have?
7449689912eSchristos  *
7459689912eSchristos  * The offset is directly after the bitmap so the offset's lesser bits
7469689912eSchristos  * covers the whole bitmap, and the bitmap's weight is the number of twigs.
7479689912eSchristos  */
7489689912eSchristos static inline dns_qpweight_t
7499689912eSchristos branch_twigs_size(dns_qpnode_t *n) {
7509689912eSchristos 	return branch_count_bitmap_before(n, SHIFT_OFFSET);
7519689912eSchristos }
7529689912eSchristos 
7539689912eSchristos /*
7549689912eSchristos  * Position of a twig within the packed sparse vector.
7559689912eSchristos  */
7569689912eSchristos static inline dns_qpweight_t
7579689912eSchristos branch_twig_pos(dns_qpnode_t *n, dns_qpshift_t bit) {
7589689912eSchristos 	return branch_count_bitmap_before(n, bit);
7599689912eSchristos }
7609689912eSchristos 
7619689912eSchristos /*
7629689912eSchristos  * Get a pointer to the twig for a given bit number.
7639689912eSchristos  */
7649689912eSchristos static inline dns_qpnode_t *
7659689912eSchristos branch_twig_ptr(dns_qpreadable_t qpr, dns_qpnode_t *n, dns_qpshift_t bit) {
7669689912eSchristos 	return ref_ptr(qpr, branch_twigs_ref(n) + branch_twig_pos(n, bit));
7679689912eSchristos }
7689689912eSchristos 
7699689912eSchristos /*
7709689912eSchristos  * Is the twig identified by this bit present?
7719689912eSchristos  */
7729689912eSchristos static inline bool
7739689912eSchristos branch_has_twig(dns_qpnode_t *n, dns_qpshift_t bit) {
7749689912eSchristos 	return branch_index(n) & (1ULL << bit);
7759689912eSchristos }
7769689912eSchristos 
7779689912eSchristos /* twig logistics *****************************************************/
7789689912eSchristos 
7799689912eSchristos static inline void
7809689912eSchristos move_twigs(dns_qpnode_t *to, dns_qpnode_t *from, dns_qpweight_t size) {
7819689912eSchristos 	memmove(to, from, size * sizeof(dns_qpnode_t));
7829689912eSchristos }
7839689912eSchristos 
7849689912eSchristos static inline void
7859689912eSchristos zero_twigs(dns_qpnode_t *twigs, dns_qpweight_t size) {
7869689912eSchristos 	memset(twigs, 0, size * sizeof(dns_qpnode_t));
7879689912eSchristos }
7889689912eSchristos 
7899689912eSchristos /***********************************************************************
7909689912eSchristos  *
7919689912eSchristos  *  packed reader nodes
7929689912eSchristos  */
7939689912eSchristos 
7949689912eSchristos /*
7959689912eSchristos  * The purpose of these packed reader nodes is to simplify safe memory
7969689912eSchristos  * reclamation for a multithreaded qp-trie.
7979689912eSchristos  *
7989689912eSchristos  * After the `reader` pointer in a qpmulti is replaced, we need to wait
7999689912eSchristos  * for a grace period before we can reclaim the memory that is no longer
8009689912eSchristos  * needed by the trie. So we need some kind of structure to hold
8019689912eSchristos  * pointers to the (logically) detached memory until it is safe to free.
8029689912eSchristos  * This memory includes the chunks and the `base` arrays.
8039689912eSchristos  *
8049689912eSchristos  * Packed reader nodes save us from having to track `dns_qpread_t`
8059689912eSchristos  * objects as distinct allocations: the packed reader nodes get
8069689912eSchristos  * reclaimed when the the chunk containing their cells is reclaimed.
8079689912eSchristos  * When a real `dns_qpread_t` object is needed, it is allocated on the
8089689912eSchristos  * stack (it must not live longer than a isc_loop callback) and the
8099689912eSchristos  * packed reader is unpacked into it.
8109689912eSchristos  *
8119689912eSchristos  * Chunks are owned by the current `base` array, so unused chunks are
8129689912eSchristos  * held there until they are free()d. Old `base` arrays are attached
8139689912eSchristos  * to packed reader nodes with a refcount. When a chunk is reclaimed,
8149689912eSchristos  * it is scanned so that `chunk_free()` can call `detach_leaf()` on
8159689912eSchristos  * any remaining references to leaf objects. Similarly, it calls
8169689912eSchristos  * `qpbase_unref()` to reclaim old `base` arrays.
8179689912eSchristos  */
8189689912eSchristos 
8199689912eSchristos /*
8209689912eSchristos  * Two nodes is just enough space for the information needed by
8219689912eSchristos  * readers and for deferred memory reclamation.
8229689912eSchristos  */
8239689912eSchristos #define READER_SIZE 2
8249689912eSchristos 
8259689912eSchristos /*
8269689912eSchristos  * Create a packed reader; space for the reader should have been
8279689912eSchristos  * allocated using `alloc_twigs(&multi->writer, READER_SIZE)`.
8289689912eSchristos  */
8299689912eSchristos static inline void
8309689912eSchristos make_reader(dns_qpnode_t *reader, dns_qpmulti_t *multi) {
8319689912eSchristos 	dns_qp_t *qp = &multi->writer;
8329689912eSchristos 	reader[0] = make_node(READER_TAG | (uintptr_t)multi, QPREADER_MAGIC);
8339689912eSchristos 	reader[1] = make_node(READER_TAG | (uintptr_t)qp->base, qp->root_ref);
8349689912eSchristos }
8359689912eSchristos 
8369689912eSchristos static inline bool
8379689912eSchristos reader_valid(dns_qpnode_t *reader) {
8389689912eSchristos 	return reader != NULL && //
8399689912eSchristos 	       node_tag(&reader[0]) == READER_TAG &&
8409689912eSchristos 	       node_tag(&reader[1]) == READER_TAG &&
8419689912eSchristos 	       node32(&reader[0]) == QPREADER_MAGIC;
8429689912eSchristos }
8439689912eSchristos 
8449689912eSchristos /*
8459689912eSchristos  * Verify and unpack a reader. We return the `multi` pointer to use in
8469689912eSchristos  * consistency checks.
8479689912eSchristos  */
8489689912eSchristos static inline dns_qpmulti_t *
8499689912eSchristos unpack_reader(dns_qpreader_t *qp, dns_qpnode_t *reader) {
8509689912eSchristos 	INSIST(reader_valid(reader));
8519689912eSchristos 	dns_qpmulti_t *multi = node_pointer(&reader[0]);
8529689912eSchristos 	dns_qpbase_t *base = node_pointer(&reader[1]);
8539689912eSchristos 	INSIST(QPMULTI_VALID(multi));
8549689912eSchristos 	INSIST(QPBASE_VALID(base));
8559689912eSchristos 	*qp = (dns_qpreader_t){
8569689912eSchristos 		.magic = QP_MAGIC,
8579689912eSchristos 		.uctx = multi->writer.uctx,
8589689912eSchristos 		.methods = multi->writer.methods,
8599689912eSchristos 		.root_ref = node32(&reader[1]),
8609689912eSchristos 		.base = base,
8619689912eSchristos 	};
8629689912eSchristos 	return multi;
8639689912eSchristos }
8649689912eSchristos 
8659689912eSchristos /***********************************************************************
8669689912eSchristos  *
8679689912eSchristos  *  method invocation helpers
8689689912eSchristos  */
8699689912eSchristos 
8709689912eSchristos static inline void
8719689912eSchristos attach_leaf(dns_qpreadable_t qpr, dns_qpnode_t *n) {
8729689912eSchristos 	dns_qpreader_t *qp = dns_qpreader(qpr);
8739689912eSchristos 	qp->methods->attach(qp->uctx, leaf_pval(n), leaf_ival(n));
8749689912eSchristos }
8759689912eSchristos 
8769689912eSchristos static inline void
8779689912eSchristos detach_leaf(dns_qpreadable_t qpr, dns_qpnode_t *n) {
8789689912eSchristos 	dns_qpreader_t *qp = dns_qpreader(qpr);
8799689912eSchristos 	qp->methods->detach(qp->uctx, leaf_pval(n), leaf_ival(n));
8809689912eSchristos }
8819689912eSchristos 
8829689912eSchristos static inline size_t
8839689912eSchristos leaf_qpkey(dns_qpreadable_t qpr, dns_qpnode_t *n, dns_qpkey_t key) {
8849689912eSchristos 	dns_qpreader_t *qp = dns_qpreader(qpr);
8859689912eSchristos 	size_t len = qp->methods->makekey(key, qp->uctx, leaf_pval(n),
8869689912eSchristos 					  leaf_ival(n));
8879689912eSchristos 	INSIST(len < sizeof(dns_qpkey_t));
8889689912eSchristos 	return len;
8899689912eSchristos }
8909689912eSchristos 
8919689912eSchristos static inline char *
8929689912eSchristos triename(dns_qpreadable_t qpr, char *buf, size_t size) {
8939689912eSchristos 	dns_qpreader_t *qp = dns_qpreader(qpr);
8949689912eSchristos 	qp->methods->triename(qp->uctx, buf, size);
8959689912eSchristos 	return buf;
8969689912eSchristos }
8979689912eSchristos 
8989689912eSchristos #define TRIENAME(qp) \
8999689912eSchristos 	triename(qp, (char[DNS_QP_TRIENAME_MAX]){}, DNS_QP_TRIENAME_MAX)
9009689912eSchristos 
9019689912eSchristos /***********************************************************************
9029689912eSchristos  *
9039689912eSchristos  *  converting DNS names to trie keys
9049689912eSchristos  */
9059689912eSchristos 
9069689912eSchristos /*
9079689912eSchristos  * This is a deliberate simplification of the hostname characters,
9089689912eSchristos  * because it doesn't matter much if we treat a few extra characters
9099689912eSchristos  * favourably: there is plenty of space in the index word for a
9109689912eSchristos  * slightly larger bitmap.
9119689912eSchristos  */
9129689912eSchristos static inline bool
9139689912eSchristos qp_common_character(uint8_t byte) {
9149689912eSchristos 	return ('-' <= byte && byte <= '9') || ('_' <= byte && byte <= 'z');
9159689912eSchristos }
9169689912eSchristos 
9179689912eSchristos /*
9189689912eSchristos  * Lookup table mapping bytes in DNS names to bit positions, used
9199689912eSchristos  * by dns_qpkey_fromname() to convert DNS names to qp-trie keys.
9209689912eSchristos  */
9219689912eSchristos extern uint16_t dns_qp_bits_for_byte[];
9229689912eSchristos 
9239689912eSchristos /*
9249689912eSchristos  * And the reverse, mapping bit positions to characters, so the tests
9259689912eSchristos  * can print diagnostics involving qp-trie keys.
9269689912eSchristos  */
9279689912eSchristos extern uint8_t dns_qp_byte_for_bit[];
9289689912eSchristos 
9299689912eSchristos /**********************************************************************/
930