xref: /netbsd-src/external/bsd/jemalloc/include/jemalloc/internal/emap.h (revision 32d1c65c71fbdb65a012e8392a62a757dd6853e9)
1 #ifndef JEMALLOC_INTERNAL_EMAP_H
2 #define JEMALLOC_INTERNAL_EMAP_H
3 
4 #include "jemalloc/internal/base.h"
5 #include "jemalloc/internal/rtree.h"
6 
7 /*
8  * Note: Ends without at semicolon, so that
9  *     EMAP_DECLARE_RTREE_CTX;
10  * in uses will avoid empty-statement warnings.
11  */
12 #define EMAP_DECLARE_RTREE_CTX						\
13     rtree_ctx_t rtree_ctx_fallback;					\
14     rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
15 
16 typedef struct emap_s emap_t;
17 struct emap_s {
18 	rtree_t rtree;
19 };
20 
21 /* Used to pass rtree lookup context down the path. */
22 typedef struct emap_alloc_ctx_t emap_alloc_ctx_t;
23 struct emap_alloc_ctx_t {
24 	szind_t szind;
25 	bool slab;
26 };
27 
28 typedef struct emap_full_alloc_ctx_s emap_full_alloc_ctx_t;
29 struct emap_full_alloc_ctx_s {
30 	szind_t szind;
31 	bool slab;
32 	edata_t *edata;
33 };
34 
35 bool emap_init(emap_t *emap, base_t *base, bool zeroed);
36 
37 void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
38     bool slab);
39 
40 void emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
41     extent_state_t state);
42 
43 /*
44  * The two acquire functions below allow accessing neighbor edatas, if it's safe
45  * and valid to do so (i.e. from the same arena, of the same state, etc.).  This
46  * is necessary because the ecache locks are state based, and only protect
47  * edatas with the same state.  Therefore the neighbor edata's state needs to be
48  * verified first, before chasing the edata pointer.  The returned edata will be
49  * in an acquired state, meaning other threads will be prevented from accessing
50  * it, even if technically the edata can still be discovered from the rtree.
51  *
52  * This means, at any moment when holding pointers to edata, either one of the
53  * state based locks is held (and the edatas are all of the protected state), or
54  * the edatas are in an acquired state (e.g. in active or merging state).  The
55  * acquire operation itself (changing the edata to an acquired state) is done
56  * under the state locks.
57  */
58 edata_t *emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap,
59     edata_t *edata, extent_pai_t pai, extent_state_t expected_state,
60     bool forward);
61 edata_t *emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
62     edata_t *edata, extent_pai_t pai, extent_state_t expected_state);
63 void emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
64     extent_state_t new_state);
65 
66 /*
67  * Associate the given edata with its beginning and end address, setting the
68  * szind and slab info appropriately.
69  * Returns true on error (i.e. resource exhaustion).
70  */
71 bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
72     szind_t szind, bool slab);
73 
74 /*
75  * Does the same thing, but with the interior of the range, for slab
76  * allocations.
77  *
78  * You might wonder why we don't just have a single emap_register function that
79  * does both depending on the value of 'slab'.  The answer is twofold:
80  * - As a practical matter, in places like the extract->split->commit pathway,
81  *   we defer the interior operation until we're sure that the commit won't fail
82  *   (but we have to register the split boundaries there).
83  * - In general, we're trying to move to a world where the page-specific
84  *   allocator doesn't know as much about how the pages it allocates will be
85  *   used, and passing a 'slab' parameter everywhere makes that more
86  *   complicated.
87  *
88  * Unlike the boundary version, this function can't fail; this is because slabs
89  * can't get big enough to touch a new page that neither of the boundaries
90  * touched, so no allocation is necessary to fill the interior once the boundary
91  * has been touched.
92  */
93 void emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
94     szind_t szind);
95 
96 void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
97 void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
98 
99 typedef struct emap_prepare_s emap_prepare_t;
100 struct emap_prepare_s {
101 	rtree_leaf_elm_t *lead_elm_a;
102 	rtree_leaf_elm_t *lead_elm_b;
103 	rtree_leaf_elm_t *trail_elm_a;
104 	rtree_leaf_elm_t *trail_elm_b;
105 };
106 
107 /**
108  * These functions the emap metadata management for merging, splitting, and
109  * reusing extents.  In particular, they set the boundary mappings from
110  * addresses to edatas.  If the result is going to be used as a slab, you
111  * still need to call emap_register_interior on it, though.
112  *
113  * Remap simply changes the szind and slab status of an extent's boundary
114  * mappings.  If the extent is not a slab, it doesn't bother with updating the
115  * end mapping (since lookups only occur in the interior of an extent for
116  * slabs).  Since the szind and slab status only make sense for active extents,
117  * this should only be called while activating or deactivating an extent.
118  *
119  * Split and merge have a "prepare" and a "commit" portion.  The prepare portion
120  * does the operations that can be done without exclusive access to the extent
121  * in question, while the commit variant requires exclusive access to maintain
122  * the emap invariants.  The only function that can fail is emap_split_prepare,
123  * and it returns true on failure (at which point the caller shouldn't commit).
124  *
125  * In all cases, "lead" refers to the lower-addressed extent, and trail to the
126  * higher-addressed one.  It's the caller's responsibility to set the edata
127  * state appropriately.
128  */
129 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
130     edata_t *edata, size_t size_a, edata_t *trail, size_t size_b);
131 void emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
132     edata_t *lead, size_t size_a, edata_t *trail, size_t size_b);
133 void emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
134     edata_t *lead, edata_t *trail);
135 void emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
136     edata_t *lead, edata_t *trail);
137 
138 /* Assert that the emap's view of the given edata matches the edata's view. */
139 void emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
140 static inline void
141 emap_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
142 	if (config_debug) {
143 		emap_do_assert_mapped(tsdn, emap, edata);
144 	}
145 }
146 
147 /* Assert that the given edata isn't in the map. */
148 void emap_do_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
149 static inline void
150 emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
151 	if (config_debug) {
152 		emap_do_assert_not_mapped(tsdn, emap, edata);
153 	}
154 }
155 
156 JEMALLOC_ALWAYS_INLINE bool
157 emap_edata_in_transition(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
158 	assert(config_debug);
159 	emap_assert_mapped(tsdn, emap, edata);
160 
161 	EMAP_DECLARE_RTREE_CTX;
162 	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
163 	    (uintptr_t)edata_base_get(edata));
164 
165 	return edata_state_in_transition(contents.metadata.state);
166 }
167 
168 JEMALLOC_ALWAYS_INLINE bool
169 emap_edata_is_acquired(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
170 	if (!config_debug) {
171 		/* For assertions only. */
172 		return false;
173 	}
174 
175 	/*
176 	 * The edata is considered acquired if no other threads will attempt to
177 	 * read / write any fields from it.  This includes a few cases:
178 	 *
179 	 * 1) edata not hooked into emap yet -- This implies the edata just got
180 	 * allocated or initialized.
181 	 *
182 	 * 2) in an active or transition state -- In both cases, the edata can
183 	 * be discovered from the emap, however the state tracked in the rtree
184 	 * will prevent other threads from accessing the actual edata.
185 	 */
186 	EMAP_DECLARE_RTREE_CTX;
187 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
188 	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
189 	    /* init_missing */ false);
190 	if (elm == NULL) {
191 		return true;
192 	}
193 	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
194 	    /* dependent */ true);
195 	if (contents.edata == NULL ||
196 	    contents.metadata.state == extent_state_active ||
197 	    edata_state_in_transition(contents.metadata.state)) {
198 		return true;
199 	}
200 
201 	return false;
202 }
203 
204 JEMALLOC_ALWAYS_INLINE void
205 extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
206 	assert(edata_arena_ind_get(inner) == edata_arena_ind_get(outer));
207 	assert(edata_pai_get(inner) == edata_pai_get(outer));
208 	assert(edata_committed_get(inner) == edata_committed_get(outer));
209 	assert(edata_state_get(inner) == extent_state_active);
210 	assert(edata_state_get(outer) == extent_state_merging);
211 	assert(!edata_guarded_get(inner) && !edata_guarded_get(outer));
212 	assert(edata_base_get(inner) == edata_past_get(outer) ||
213 	    edata_base_get(outer) == edata_past_get(inner));
214 }
215 
216 JEMALLOC_ALWAYS_INLINE void
217 extent_assert_can_expand(const edata_t *original, const edata_t *expand) {
218 	assert(edata_arena_ind_get(original) == edata_arena_ind_get(expand));
219 	assert(edata_pai_get(original) == edata_pai_get(expand));
220 	assert(edata_state_get(original) == extent_state_active);
221 	assert(edata_state_get(expand) == extent_state_merging);
222 	assert(edata_past_get(original) == edata_base_get(expand));
223 }
224 
225 JEMALLOC_ALWAYS_INLINE edata_t *
226 emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
227 	EMAP_DECLARE_RTREE_CTX;
228 
229 	return rtree_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr).edata;
230 }
231 
232 /* Fills in alloc_ctx with the info in the map. */
233 JEMALLOC_ALWAYS_INLINE void
234 emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
235     emap_alloc_ctx_t *alloc_ctx) {
236 	EMAP_DECLARE_RTREE_CTX;
237 
238 	rtree_metadata_t metadata = rtree_metadata_read(tsdn, &emap->rtree,
239 	    rtree_ctx, (uintptr_t)ptr);
240 	alloc_ctx->szind = metadata.szind;
241 	alloc_ctx->slab = metadata.slab;
242 }
243 
244 /* The pointer must be mapped. */
245 JEMALLOC_ALWAYS_INLINE void
246 emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
247     emap_full_alloc_ctx_t *full_alloc_ctx) {
248 	EMAP_DECLARE_RTREE_CTX;
249 
250 	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
251 	    (uintptr_t)ptr);
252 	full_alloc_ctx->edata = contents.edata;
253 	full_alloc_ctx->szind = contents.metadata.szind;
254 	full_alloc_ctx->slab = contents.metadata.slab;
255 }
256 
257 /*
258  * The pointer is allowed to not be mapped.
259  *
260  * Returns true when the pointer is not present.
261  */
262 JEMALLOC_ALWAYS_INLINE bool
263 emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
264     emap_full_alloc_ctx_t *full_alloc_ctx) {
265 	EMAP_DECLARE_RTREE_CTX;
266 
267 	rtree_contents_t contents;
268 	bool err = rtree_read_independent(tsdn, &emap->rtree, rtree_ctx,
269 	    (uintptr_t)ptr, &contents);
270 	if (err) {
271 		return true;
272 	}
273 	full_alloc_ctx->edata = contents.edata;
274 	full_alloc_ctx->szind = contents.metadata.szind;
275 	full_alloc_ctx->slab = contents.metadata.slab;
276 	return false;
277 }
278 
279 /*
280  * Only used on the fastpath of free.  Returns true when cannot be fulfilled by
281  * fast path, e.g. when the metadata key is not cached.
282  */
283 JEMALLOC_ALWAYS_INLINE bool
284 emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
285     emap_alloc_ctx_t *alloc_ctx) {
286 	/* Use the unsafe getter since this may gets called during exit. */
287 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get_unsafe(tsd);
288 
289 	rtree_metadata_t metadata;
290 	bool err = rtree_metadata_try_read_fast(tsd_tsdn(tsd), &emap->rtree,
291 	    rtree_ctx, (uintptr_t)ptr, &metadata);
292 	if (err) {
293 		return true;
294 	}
295 	alloc_ctx->szind = metadata.szind;
296 	alloc_ctx->slab = metadata.slab;
297 	return false;
298 }
299 
300 /*
301  * We want to do batch lookups out of the cache bins, which use
302  * cache_bin_ptr_array_get to access the i'th element of the bin (since they
303  * invert usual ordering in deciding what to flush).  This lets the emap avoid
304  * caring about its caller's ordering.
305  */
306 typedef const void *(*emap_ptr_getter)(void *ctx, size_t ind);
307 /*
308  * This allows size-checking assertions, which we can only do while we're in the
309  * process of edata lookups.
310  */
311 typedef void (*emap_metadata_visitor)(void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
312 
313 typedef union emap_batch_lookup_result_u emap_batch_lookup_result_t;
314 union emap_batch_lookup_result_u {
315 	edata_t *edata;
316 	rtree_leaf_elm_t *rtree_leaf;
317 };
318 
319 JEMALLOC_ALWAYS_INLINE void
320 emap_edata_lookup_batch(tsd_t *tsd, emap_t *emap, size_t nptrs,
321     emap_ptr_getter ptr_getter, void *ptr_getter_ctx,
322     emap_metadata_visitor metadata_visitor, void *metadata_visitor_ctx,
323     emap_batch_lookup_result_t *result) {
324 	/* Avoids null-checking tsdn in the loop below. */
325 	util_assume(tsd != NULL);
326 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get(tsd);
327 
328 	for (size_t i = 0; i < nptrs; i++) {
329 		const void *ptr = ptr_getter(ptr_getter_ctx, i);
330 		/*
331 		 * Reuse the edatas array as a temp buffer, lying a little about
332 		 * the types.
333 		 */
334 		result[i].rtree_leaf = rtree_leaf_elm_lookup(tsd_tsdn(tsd),
335 		    &emap->rtree, rtree_ctx, (uintptr_t)ptr,
336 		    /* dependent */ true, /* init_missing */ false);
337 	}
338 
339 	for (size_t i = 0; i < nptrs; i++) {
340 		rtree_leaf_elm_t *elm = result[i].rtree_leaf;
341 		rtree_contents_t contents = rtree_leaf_elm_read(tsd_tsdn(tsd),
342 		    &emap->rtree, elm, /* dependent */ true);
343 		result[i].edata = contents.edata;
344 		emap_full_alloc_ctx_t alloc_ctx;
345 		/*
346 		 * Not all these fields are read in practice by the metadata
347 		 * visitor.  But the compiler can easily optimize away the ones
348 		 * that aren't, so no sense in being incomplete.
349 		 */
350 		alloc_ctx.szind = contents.metadata.szind;
351 		alloc_ctx.slab = contents.metadata.slab;
352 		alloc_ctx.edata = contents.edata;
353 		metadata_visitor(metadata_visitor_ctx, &alloc_ctx);
354 	}
355 }
356 
357 #endif /* JEMALLOC_INTERNAL_EMAP_H */
358