xref: /netbsd-src/external/bsd/jemalloc.old/dist/src/rtree.c (revision 8e33eff89e26cf71871ead62f0d5063e1313c33a)
1*8e33eff8Schristos #define JEMALLOC_RTREE_C_
2*8e33eff8Schristos #include "jemalloc/internal/jemalloc_preamble.h"
3*8e33eff8Schristos #include "jemalloc/internal/jemalloc_internal_includes.h"
4*8e33eff8Schristos 
5*8e33eff8Schristos #include "jemalloc/internal/assert.h"
6*8e33eff8Schristos #include "jemalloc/internal/mutex.h"
7*8e33eff8Schristos 
8*8e33eff8Schristos /*
9*8e33eff8Schristos  * Only the most significant bits of keys passed to rtree_{read,write}() are
10*8e33eff8Schristos  * used.
11*8e33eff8Schristos  */
12*8e33eff8Schristos bool
13*8e33eff8Schristos rtree_new(rtree_t *rtree, bool zeroed) {
14*8e33eff8Schristos #ifdef JEMALLOC_JET
15*8e33eff8Schristos 	if (!zeroed) {
16*8e33eff8Schristos 		memset(rtree, 0, sizeof(rtree_t)); /* Clear root. */
17*8e33eff8Schristos 	}
18*8e33eff8Schristos #else
19*8e33eff8Schristos 	assert(zeroed);
20*8e33eff8Schristos #endif
21*8e33eff8Schristos 
22*8e33eff8Schristos 	if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE,
23*8e33eff8Schristos 	    malloc_mutex_rank_exclusive)) {
24*8e33eff8Schristos 		return true;
25*8e33eff8Schristos 	}
26*8e33eff8Schristos 
27*8e33eff8Schristos 	return false;
28*8e33eff8Schristos }
29*8e33eff8Schristos 
30*8e33eff8Schristos static rtree_node_elm_t *
31*8e33eff8Schristos rtree_node_alloc_impl(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
32*8e33eff8Schristos 	return (rtree_node_elm_t *)base_alloc(tsdn, b0get(), nelms *
33*8e33eff8Schristos 	    sizeof(rtree_node_elm_t), CACHELINE);
34*8e33eff8Schristos }
35*8e33eff8Schristos rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc = rtree_node_alloc_impl;
36*8e33eff8Schristos 
37*8e33eff8Schristos static JEMALLOC_NORETURN void
38*8e33eff8Schristos rtree_node_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *node) {
39*8e33eff8Schristos 	/* Nodes are never deleted during normal operation. */
40*8e33eff8Schristos 	not_reached();
41*8e33eff8Schristos }
42*8e33eff8Schristos UNUSED rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc =
43*8e33eff8Schristos     rtree_node_dalloc_impl;
44*8e33eff8Schristos 
45*8e33eff8Schristos static rtree_leaf_elm_t *
46*8e33eff8Schristos rtree_leaf_alloc_impl(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
47*8e33eff8Schristos 	return (rtree_leaf_elm_t *)base_alloc(tsdn, b0get(), nelms *
48*8e33eff8Schristos 	    sizeof(rtree_leaf_elm_t), CACHELINE);
49*8e33eff8Schristos }
50*8e33eff8Schristos rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc = rtree_leaf_alloc_impl;
51*8e33eff8Schristos 
52*8e33eff8Schristos static JEMALLOC_NORETURN void
53*8e33eff8Schristos rtree_leaf_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *leaf) {
54*8e33eff8Schristos 	/* Leaves are never deleted during normal operation. */
55*8e33eff8Schristos 	not_reached();
56*8e33eff8Schristos }
57*8e33eff8Schristos UNUSED rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc =
58*8e33eff8Schristos     rtree_leaf_dalloc_impl;
59*8e33eff8Schristos 
60*8e33eff8Schristos #ifdef JEMALLOC_JET
61*8e33eff8Schristos #  if RTREE_HEIGHT > 1
62*8e33eff8Schristos static void
63*8e33eff8Schristos rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *subtree,
64*8e33eff8Schristos     unsigned level) {
65*8e33eff8Schristos 	size_t nchildren = ZU(1) << rtree_levels[level].bits;
66*8e33eff8Schristos 	if (level + 2 < RTREE_HEIGHT) {
67*8e33eff8Schristos 		for (size_t i = 0; i < nchildren; i++) {
68*8e33eff8Schristos 			rtree_node_elm_t *node =
69*8e33eff8Schristos 			    (rtree_node_elm_t *)atomic_load_p(&subtree[i].child,
70*8e33eff8Schristos 			    ATOMIC_RELAXED);
71*8e33eff8Schristos 			if (node != NULL) {
72*8e33eff8Schristos 				rtree_delete_subtree(tsdn, rtree, node, level +
73*8e33eff8Schristos 				    1);
74*8e33eff8Schristos 			}
75*8e33eff8Schristos 		}
76*8e33eff8Schristos 	} else {
77*8e33eff8Schristos 		for (size_t i = 0; i < nchildren; i++) {
78*8e33eff8Schristos 			rtree_leaf_elm_t *leaf =
79*8e33eff8Schristos 			    (rtree_leaf_elm_t *)atomic_load_p(&subtree[i].child,
80*8e33eff8Schristos 			    ATOMIC_RELAXED);
81*8e33eff8Schristos 			if (leaf != NULL) {
82*8e33eff8Schristos 				rtree_leaf_dalloc(tsdn, rtree, leaf);
83*8e33eff8Schristos 			}
84*8e33eff8Schristos 		}
85*8e33eff8Schristos 	}
86*8e33eff8Schristos 
87*8e33eff8Schristos 	if (subtree != rtree->root) {
88*8e33eff8Schristos 		rtree_node_dalloc(tsdn, rtree, subtree);
89*8e33eff8Schristos 	}
90*8e33eff8Schristos }
91*8e33eff8Schristos #  endif
92*8e33eff8Schristos 
93*8e33eff8Schristos void
94*8e33eff8Schristos rtree_delete(tsdn_t *tsdn, rtree_t *rtree) {
95*8e33eff8Schristos #  if RTREE_HEIGHT > 1
96*8e33eff8Schristos 	rtree_delete_subtree(tsdn, rtree, rtree->root, 0);
97*8e33eff8Schristos #  endif
98*8e33eff8Schristos }
99*8e33eff8Schristos #endif
100*8e33eff8Schristos 
101*8e33eff8Schristos static rtree_node_elm_t *
102*8e33eff8Schristos rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
103*8e33eff8Schristos     atomic_p_t *elmp) {
104*8e33eff8Schristos 	malloc_mutex_lock(tsdn, &rtree->init_lock);
105*8e33eff8Schristos 	/*
106*8e33eff8Schristos 	 * If *elmp is non-null, then it was initialized with the init lock
107*8e33eff8Schristos 	 * held, so we can get by with 'relaxed' here.
108*8e33eff8Schristos 	 */
109*8e33eff8Schristos 	rtree_node_elm_t *node = atomic_load_p(elmp, ATOMIC_RELAXED);
110*8e33eff8Schristos 	if (node == NULL) {
111*8e33eff8Schristos 		node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
112*8e33eff8Schristos 		    rtree_levels[level].bits);
113*8e33eff8Schristos 		if (node == NULL) {
114*8e33eff8Schristos 			malloc_mutex_unlock(tsdn, &rtree->init_lock);
115*8e33eff8Schristos 			return NULL;
116*8e33eff8Schristos 		}
117*8e33eff8Schristos 		/*
118*8e33eff8Schristos 		 * Even though we hold the lock, a later reader might not; we
119*8e33eff8Schristos 		 * need release semantics.
120*8e33eff8Schristos 		 */
121*8e33eff8Schristos 		atomic_store_p(elmp, node, ATOMIC_RELEASE);
122*8e33eff8Schristos 	}
123*8e33eff8Schristos 	malloc_mutex_unlock(tsdn, &rtree->init_lock);
124*8e33eff8Schristos 
125*8e33eff8Schristos 	return node;
126*8e33eff8Schristos }
127*8e33eff8Schristos 
128*8e33eff8Schristos static rtree_leaf_elm_t *
129*8e33eff8Schristos rtree_leaf_init(tsdn_t *tsdn, rtree_t *rtree, atomic_p_t *elmp) {
130*8e33eff8Schristos 	malloc_mutex_lock(tsdn, &rtree->init_lock);
131*8e33eff8Schristos 	/*
132*8e33eff8Schristos 	 * If *elmp is non-null, then it was initialized with the init lock
133*8e33eff8Schristos 	 * held, so we can get by with 'relaxed' here.
134*8e33eff8Schristos 	 */
135*8e33eff8Schristos 	rtree_leaf_elm_t *leaf = atomic_load_p(elmp, ATOMIC_RELAXED);
136*8e33eff8Schristos 	if (leaf == NULL) {
137*8e33eff8Schristos 		leaf = rtree_leaf_alloc(tsdn, rtree, ZU(1) <<
138*8e33eff8Schristos 		    rtree_levels[RTREE_HEIGHT-1].bits);
139*8e33eff8Schristos 		if (leaf == NULL) {
140*8e33eff8Schristos 			malloc_mutex_unlock(tsdn, &rtree->init_lock);
141*8e33eff8Schristos 			return NULL;
142*8e33eff8Schristos 		}
143*8e33eff8Schristos 		/*
144*8e33eff8Schristos 		 * Even though we hold the lock, a later reader might not; we
145*8e33eff8Schristos 		 * need release semantics.
146*8e33eff8Schristos 		 */
147*8e33eff8Schristos 		atomic_store_p(elmp, leaf, ATOMIC_RELEASE);
148*8e33eff8Schristos 	}
149*8e33eff8Schristos 	malloc_mutex_unlock(tsdn, &rtree->init_lock);
150*8e33eff8Schristos 
151*8e33eff8Schristos 	return leaf;
152*8e33eff8Schristos }
153*8e33eff8Schristos 
154*8e33eff8Schristos static bool
155*8e33eff8Schristos rtree_node_valid(rtree_node_elm_t *node) {
156*8e33eff8Schristos 	return ((uintptr_t)node != (uintptr_t)0);
157*8e33eff8Schristos }
158*8e33eff8Schristos 
159*8e33eff8Schristos static bool
160*8e33eff8Schristos rtree_leaf_valid(rtree_leaf_elm_t *leaf) {
161*8e33eff8Schristos 	return ((uintptr_t)leaf != (uintptr_t)0);
162*8e33eff8Schristos }
163*8e33eff8Schristos 
164*8e33eff8Schristos static rtree_node_elm_t *
165*8e33eff8Schristos rtree_child_node_tryread(rtree_node_elm_t *elm, bool dependent) {
166*8e33eff8Schristos 	rtree_node_elm_t *node;
167*8e33eff8Schristos 
168*8e33eff8Schristos 	if (dependent) {
169*8e33eff8Schristos 		node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
170*8e33eff8Schristos 		    ATOMIC_RELAXED);
171*8e33eff8Schristos 	} else {
172*8e33eff8Schristos 		node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
173*8e33eff8Schristos 		    ATOMIC_ACQUIRE);
174*8e33eff8Schristos 	}
175*8e33eff8Schristos 
176*8e33eff8Schristos 	assert(!dependent || node != NULL);
177*8e33eff8Schristos 	return node;
178*8e33eff8Schristos }
179*8e33eff8Schristos 
180*8e33eff8Schristos static rtree_node_elm_t *
181*8e33eff8Schristos rtree_child_node_read(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *elm,
182*8e33eff8Schristos     unsigned level, bool dependent) {
183*8e33eff8Schristos 	rtree_node_elm_t *node;
184*8e33eff8Schristos 
185*8e33eff8Schristos 	node = rtree_child_node_tryread(elm, dependent);
186*8e33eff8Schristos 	if (!dependent && unlikely(!rtree_node_valid(node))) {
187*8e33eff8Schristos 		node = rtree_node_init(tsdn, rtree, level + 1, &elm->child);
188*8e33eff8Schristos 	}
189*8e33eff8Schristos 	assert(!dependent || node != NULL);
190*8e33eff8Schristos 	return node;
191*8e33eff8Schristos }
192*8e33eff8Schristos 
193*8e33eff8Schristos static rtree_leaf_elm_t *
194*8e33eff8Schristos rtree_child_leaf_tryread(rtree_node_elm_t *elm, bool dependent) {
195*8e33eff8Schristos 	rtree_leaf_elm_t *leaf;
196*8e33eff8Schristos 
197*8e33eff8Schristos 	if (dependent) {
198*8e33eff8Schristos 		leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
199*8e33eff8Schristos 		    ATOMIC_RELAXED);
200*8e33eff8Schristos 	} else {
201*8e33eff8Schristos 		leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
202*8e33eff8Schristos 		    ATOMIC_ACQUIRE);
203*8e33eff8Schristos 	}
204*8e33eff8Schristos 
205*8e33eff8Schristos 	assert(!dependent || leaf != NULL);
206*8e33eff8Schristos 	return leaf;
207*8e33eff8Schristos }
208*8e33eff8Schristos 
209*8e33eff8Schristos static rtree_leaf_elm_t *
210*8e33eff8Schristos rtree_child_leaf_read(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *elm,
211*8e33eff8Schristos     unsigned level, bool dependent) {
212*8e33eff8Schristos 	rtree_leaf_elm_t *leaf;
213*8e33eff8Schristos 
214*8e33eff8Schristos 	leaf = rtree_child_leaf_tryread(elm, dependent);
215*8e33eff8Schristos 	if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {
216*8e33eff8Schristos 		leaf = rtree_leaf_init(tsdn, rtree, &elm->child);
217*8e33eff8Schristos 	}
218*8e33eff8Schristos 	assert(!dependent || leaf != NULL);
219*8e33eff8Schristos 	return leaf;
220*8e33eff8Schristos }
221*8e33eff8Schristos 
222*8e33eff8Schristos rtree_leaf_elm_t *
223*8e33eff8Schristos rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
224*8e33eff8Schristos     uintptr_t key, bool dependent, bool init_missing) {
225*8e33eff8Schristos 	rtree_node_elm_t *node;
226*8e33eff8Schristos 	rtree_leaf_elm_t *leaf;
227*8e33eff8Schristos #if RTREE_HEIGHT > 1
228*8e33eff8Schristos 	node = rtree->root;
229*8e33eff8Schristos #else
230*8e33eff8Schristos 	leaf = rtree->root;
231*8e33eff8Schristos #endif
232*8e33eff8Schristos 
233*8e33eff8Schristos 	if (config_debug) {
234*8e33eff8Schristos 		uintptr_t leafkey = rtree_leafkey(key);
235*8e33eff8Schristos 		for (unsigned i = 0; i < RTREE_CTX_NCACHE; i++) {
236*8e33eff8Schristos 			assert(rtree_ctx->cache[i].leafkey != leafkey);
237*8e33eff8Schristos 		}
238*8e33eff8Schristos 		for (unsigned i = 0; i < RTREE_CTX_NCACHE_L2; i++) {
239*8e33eff8Schristos 			assert(rtree_ctx->l2_cache[i].leafkey != leafkey);
240*8e33eff8Schristos 		}
241*8e33eff8Schristos 	}
242*8e33eff8Schristos 
243*8e33eff8Schristos #define RTREE_GET_CHILD(level) {					\
244*8e33eff8Schristos 		assert(level < RTREE_HEIGHT-1);				\
245*8e33eff8Schristos 		if (level != 0 && !dependent &&				\
246*8e33eff8Schristos 		    unlikely(!rtree_node_valid(node))) {		\
247*8e33eff8Schristos 			return NULL;					\
248*8e33eff8Schristos 		}							\
249*8e33eff8Schristos 		uintptr_t subkey = rtree_subkey(key, level);		\
250*8e33eff8Schristos 		if (level + 2 < RTREE_HEIGHT) {				\
251*8e33eff8Schristos 			node = init_missing ?				\
252*8e33eff8Schristos 			    rtree_child_node_read(tsdn, rtree,		\
253*8e33eff8Schristos 			    &node[subkey], level, dependent) :		\
254*8e33eff8Schristos 			    rtree_child_node_tryread(&node[subkey],	\
255*8e33eff8Schristos 			    dependent);					\
256*8e33eff8Schristos 		} else {						\
257*8e33eff8Schristos 			leaf = init_missing ?				\
258*8e33eff8Schristos 			    rtree_child_leaf_read(tsdn, rtree,		\
259*8e33eff8Schristos 			    &node[subkey], level, dependent) :		\
260*8e33eff8Schristos 			    rtree_child_leaf_tryread(&node[subkey],	\
261*8e33eff8Schristos 			    dependent);					\
262*8e33eff8Schristos 		}							\
263*8e33eff8Schristos 	}
264*8e33eff8Schristos 	/*
265*8e33eff8Schristos 	 * Cache replacement upon hard lookup (i.e. L1 & L2 rtree cache miss):
266*8e33eff8Schristos 	 * (1) evict last entry in L2 cache; (2) move the collision slot from L1
267*8e33eff8Schristos 	 * cache down to L2; and 3) fill L1.
268*8e33eff8Schristos 	 */
269*8e33eff8Schristos #define RTREE_GET_LEAF(level) {						\
270*8e33eff8Schristos 		assert(level == RTREE_HEIGHT-1);			\
271*8e33eff8Schristos 		if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {	\
272*8e33eff8Schristos 			return NULL;					\
273*8e33eff8Schristos 		}							\
274*8e33eff8Schristos 		if (RTREE_CTX_NCACHE_L2 > 1) {				\
275*8e33eff8Schristos 			memmove(&rtree_ctx->l2_cache[1],		\
276*8e33eff8Schristos 			    &rtree_ctx->l2_cache[0],			\
277*8e33eff8Schristos 			    sizeof(rtree_ctx_cache_elm_t) *		\
278*8e33eff8Schristos 			    (RTREE_CTX_NCACHE_L2 - 1));			\
279*8e33eff8Schristos 		}							\
280*8e33eff8Schristos 		size_t slot = rtree_cache_direct_map(key);		\
281*8e33eff8Schristos 		rtree_ctx->l2_cache[0].leafkey =			\
282*8e33eff8Schristos 		    rtree_ctx->cache[slot].leafkey;			\
283*8e33eff8Schristos 		rtree_ctx->l2_cache[0].leaf =				\
284*8e33eff8Schristos 		    rtree_ctx->cache[slot].leaf;			\
285*8e33eff8Schristos 		uintptr_t leafkey = rtree_leafkey(key);			\
286*8e33eff8Schristos 		rtree_ctx->cache[slot].leafkey = leafkey;		\
287*8e33eff8Schristos 		rtree_ctx->cache[slot].leaf = leaf;			\
288*8e33eff8Schristos 		uintptr_t subkey = rtree_subkey(key, level);		\
289*8e33eff8Schristos 		return &leaf[subkey];					\
290*8e33eff8Schristos 	}
291*8e33eff8Schristos 	if (RTREE_HEIGHT > 1) {
292*8e33eff8Schristos 		RTREE_GET_CHILD(0)
293*8e33eff8Schristos 	}
294*8e33eff8Schristos 	if (RTREE_HEIGHT > 2) {
295*8e33eff8Schristos 		RTREE_GET_CHILD(1)
296*8e33eff8Schristos 	}
297*8e33eff8Schristos 	if (RTREE_HEIGHT > 3) {
298*8e33eff8Schristos 		for (unsigned i = 2; i < RTREE_HEIGHT-1; i++) {
299*8e33eff8Schristos 			RTREE_GET_CHILD(i)
300*8e33eff8Schristos 		}
301*8e33eff8Schristos 	}
302*8e33eff8Schristos 	RTREE_GET_LEAF(RTREE_HEIGHT-1)
303*8e33eff8Schristos #undef RTREE_GET_CHILD
304*8e33eff8Schristos #undef RTREE_GET_LEAF
305*8e33eff8Schristos 	not_reached();
306*8e33eff8Schristos }
307*8e33eff8Schristos 
308*8e33eff8Schristos void
309*8e33eff8Schristos rtree_ctx_data_init(rtree_ctx_t *ctx) {
310*8e33eff8Schristos 	for (unsigned i = 0; i < RTREE_CTX_NCACHE; i++) {
311*8e33eff8Schristos 		rtree_ctx_cache_elm_t *cache = &ctx->cache[i];
312*8e33eff8Schristos 		cache->leafkey = RTREE_LEAFKEY_INVALID;
313*8e33eff8Schristos 		cache->leaf = NULL;
314*8e33eff8Schristos 	}
315*8e33eff8Schristos 	for (unsigned i = 0; i < RTREE_CTX_NCACHE_L2; i++) {
316*8e33eff8Schristos 		rtree_ctx_cache_elm_t *cache = &ctx->l2_cache[i];
317*8e33eff8Schristos 		cache->leafkey = RTREE_LEAFKEY_INVALID;
318*8e33eff8Schristos 		cache->leaf = NULL;
319*8e33eff8Schristos 	}
320*8e33eff8Schristos }
321