xref: /netbsd-src/external/bsd/jemalloc/dist/include/jemalloc/internal/ph.h (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1 #ifndef JEMALLOC_INTERNAL_PH_H
2 #define JEMALLOC_INTERNAL_PH_H
3 
4 /*
5  * A Pairing Heap implementation.
6  *
7  * "The Pairing Heap: A New Form of Self-Adjusting Heap"
8  * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf
9  *
10  * With auxiliary twopass list, described in a follow on paper.
11  *
12  * "Pairing Heaps: Experiments and Analysis"
13  * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
14  *
15  *******************************************************************************
16  *
17  * We include a non-obvious optimization:
18  * - First, we introduce a new pop-and-link operation; pop the two most
19  *   recently-inserted items off the aux-list, link them, and push the resulting
20  *   heap.
21  * - We maintain a count of the number of insertions since the last time we
22  *   merged the aux-list (i.e. via first() or remove_first()).  After N inserts,
23  *   we do ffs(N) pop-and-link operations.
24  *
25  * One way to think of this is that we're progressively building up a tree in
26  * the aux-list, rather than a linked-list (think of the series of merges that
27  * will be performed as the aux-count grows).
28  *
29  * There's a couple reasons we benefit from this:
30  * - Ordinarily, after N insertions, the aux-list is of size N.  With our
31  *   strategy, it's of size O(log(N)).  So we decrease the worst-case time of
32  *   first() calls, and reduce the average cost of remove_min calls.  Since
33  *   these almost always occur while holding a lock, we practically reduce the
34  *   frequency of unusually long hold times.
35  * - This moves the bulk of the work of merging the aux-list onto the threads
36  *   that are inserting into the heap.  In some common scenarios, insertions
37  *   happen in bulk, from a single thread (think tcache flushing; we potentially
38  *   move many slabs from slabs_full to slabs_nonfull).  All the nodes in this
39  *   case are in the inserting threads cache, and linking them is very cheap
40  *   (cache misses dominate linking cost).  Without this optimization, linking
41  *   happens on the next call to remove_first.  Since that remove_first call
42  *   likely happens on a different thread (or at least, after the cache has
43  *   gotten cold if done on the same thread), deferring linking trades cheap
44  *   link operations now for expensive ones later.
45  *
46  * The ffs trick keeps amortized insert cost at constant time.  Similar
47  * strategies based on periodically sorting the list after a batch of operations
48  * perform worse than this in practice, even with various fancy tricks; they
49  * all took amortized complexity of an insert from O(1) to O(log(n)).
50  */
51 
52 typedef int (*ph_cmp_t)(void *, void *);
53 
54 /* Node structure. */
55 typedef struct phn_link_s phn_link_t;
56 struct phn_link_s {
57 	void *prev;
58 	void *next;
59 	void *lchild;
60 };
61 
62 typedef struct ph_s ph_t;
63 struct ph_s {
64 	void *root;
65 	/*
66 	 * Inserts done since the last aux-list merge.  This is not necessarily
67 	 * the size of the aux-list, since it's possible that removals have
68 	 * happened since, and we don't track whether or not those removals are
69 	 * from the aux list.
70 	 */
71 	size_t auxcount;
72 };
73 
74 JEMALLOC_ALWAYS_INLINE phn_link_t *
75 phn_link_get(void *phn, size_t offset) {
76 	return (phn_link_t *)(((uintptr_t)phn) + offset);
77 }
78 
79 JEMALLOC_ALWAYS_INLINE void
80 phn_link_init(void *phn, size_t offset) {
81 	phn_link_get(phn, offset)->prev = NULL;
82 	phn_link_get(phn, offset)->next = NULL;
83 	phn_link_get(phn, offset)->lchild = NULL;
84 }
85 
86 /* Internal utility helpers. */
87 JEMALLOC_ALWAYS_INLINE void *
88 phn_lchild_get(void *phn, size_t offset) {
89 	return phn_link_get(phn, offset)->lchild;
90 }
91 
92 JEMALLOC_ALWAYS_INLINE void
93 phn_lchild_set(void *phn, void *lchild, size_t offset) {
94 	phn_link_get(phn, offset)->lchild = lchild;
95 }
96 
97 JEMALLOC_ALWAYS_INLINE void *
98 phn_next_get(void *phn, size_t offset) {
99 	return phn_link_get(phn, offset)->next;
100 }
101 
102 JEMALLOC_ALWAYS_INLINE void
103 phn_next_set(void *phn, void *next, size_t offset) {
104 	phn_link_get(phn, offset)->next = next;
105 }
106 
107 JEMALLOC_ALWAYS_INLINE void *
108 phn_prev_get(void *phn, size_t offset) {
109 	return phn_link_get(phn, offset)->prev;
110 }
111 
112 JEMALLOC_ALWAYS_INLINE void
113 phn_prev_set(void *phn, void *prev, size_t offset) {
114 	phn_link_get(phn, offset)->prev = prev;
115 }
116 
117 JEMALLOC_ALWAYS_INLINE void
118 phn_merge_ordered(void *phn0, void *phn1, size_t offset,
119     ph_cmp_t cmp) {
120 	void *phn0child;
121 
122 	assert(phn0 != NULL);
123 	assert(phn1 != NULL);
124 	assert(cmp(phn0, phn1) <= 0);
125 
126 	phn_prev_set(phn1, phn0, offset);
127 	phn0child = phn_lchild_get(phn0, offset);
128 	phn_next_set(phn1, phn0child, offset);
129 	if (phn0child != NULL) {
130 		phn_prev_set(phn0child, phn1, offset);
131 	}
132 	phn_lchild_set(phn0, phn1, offset);
133 }
134 
135 JEMALLOC_ALWAYS_INLINE void *
136 phn_merge(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) {
137 	void *result;
138 	if (phn0 == NULL) {
139 		result = phn1;
140 	} else if (phn1 == NULL) {
141 		result = phn0;
142 	} else if (cmp(phn0, phn1) < 0) {
143 		phn_merge_ordered(phn0, phn1, offset, cmp);
144 		result = phn0;
145 	} else {
146 		phn_merge_ordered(phn1, phn0, offset, cmp);
147 		result = phn1;
148 	}
149 	return result;
150 }
151 
152 JEMALLOC_ALWAYS_INLINE void *
153 phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
154 	void *head = NULL;
155 	void *tail = NULL;
156 	void *phn0 = phn;
157 	void *phn1 = phn_next_get(phn0, offset);
158 
159 	/*
160 	 * Multipass merge, wherein the first two elements of a FIFO
161 	 * are repeatedly merged, and each result is appended to the
162 	 * singly linked FIFO, until the FIFO contains only a single
163 	 * element.  We start with a sibling list but no reference to
164 	 * its tail, so we do a single pass over the sibling list to
165 	 * populate the FIFO.
166 	 */
167 	if (phn1 != NULL) {
168 		void *phnrest = phn_next_get(phn1, offset);
169 		if (phnrest != NULL) {
170 			phn_prev_set(phnrest, NULL, offset);
171 		}
172 		phn_prev_set(phn0, NULL, offset);
173 		phn_next_set(phn0, NULL, offset);
174 		phn_prev_set(phn1, NULL, offset);
175 		phn_next_set(phn1, NULL, offset);
176 		phn0 = phn_merge(phn0, phn1, offset, cmp);
177 		head = tail = phn0;
178 		phn0 = phnrest;
179 		while (phn0 != NULL) {
180 			phn1 = phn_next_get(phn0, offset);
181 			if (phn1 != NULL) {
182 				phnrest = phn_next_get(phn1, offset);
183 				if (phnrest != NULL) {
184 					phn_prev_set(phnrest, NULL, offset);
185 				}
186 				phn_prev_set(phn0, NULL, offset);
187 				phn_next_set(phn0, NULL, offset);
188 				phn_prev_set(phn1, NULL, offset);
189 				phn_next_set(phn1, NULL, offset);
190 				phn0 = phn_merge(phn0, phn1, offset, cmp);
191 				phn_next_set(tail, phn0, offset);
192 				tail = phn0;
193 				phn0 = phnrest;
194 			} else {
195 				phn_next_set(tail, phn0, offset);
196 				tail = phn0;
197 				phn0 = NULL;
198 			}
199 		}
200 		phn0 = head;
201 		phn1 = phn_next_get(phn0, offset);
202 		if (phn1 != NULL) {
203 			while (true) {
204 				head = phn_next_get(phn1, offset);
205 				assert(phn_prev_get(phn0, offset) == NULL);
206 				phn_next_set(phn0, NULL, offset);
207 				assert(phn_prev_get(phn1, offset) == NULL);
208 				phn_next_set(phn1, NULL, offset);
209 				phn0 = phn_merge(phn0, phn1, offset, cmp);
210 				if (head == NULL) {
211 					break;
212 				}
213 				phn_next_set(tail, phn0, offset);
214 				tail = phn0;
215 				phn0 = head;
216 				phn1 = phn_next_get(phn0, offset);
217 			}
218 		}
219 	}
220 	return phn0;
221 }
222 
223 JEMALLOC_ALWAYS_INLINE void
224 ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) {
225 	ph->auxcount = 0;
226 	void *phn = phn_next_get(ph->root, offset);
227 	if (phn != NULL) {
228 		phn_prev_set(ph->root, NULL, offset);
229 		phn_next_set(ph->root, NULL, offset);
230 		phn_prev_set(phn, NULL, offset);
231 		phn = phn_merge_siblings(phn, offset, cmp);
232 		assert(phn_next_get(phn, offset) == NULL);
233 		ph->root = phn_merge(ph->root, phn, offset, cmp);
234 	}
235 }
236 
237 JEMALLOC_ALWAYS_INLINE void *
238 ph_merge_children(void *phn, size_t offset, ph_cmp_t cmp) {
239 	void *result;
240 	void *lchild = phn_lchild_get(phn, offset);
241 	if (lchild == NULL) {
242 		result = NULL;
243 	} else {
244 		result = phn_merge_siblings(lchild, offset, cmp);
245 	}
246 	return result;
247 }
248 
249 JEMALLOC_ALWAYS_INLINE void
250 ph_new(ph_t *ph) {
251 	ph->root = NULL;
252 	ph->auxcount = 0;
253 }
254 
255 JEMALLOC_ALWAYS_INLINE bool
256 ph_empty(ph_t *ph) {
257 	return ph->root == NULL;
258 }
259 
260 JEMALLOC_ALWAYS_INLINE void *
261 ph_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
262 	if (ph->root == NULL) {
263 		return NULL;
264 	}
265 	ph_merge_aux(ph, offset, cmp);
266 	return ph->root;
267 }
268 
269 JEMALLOC_ALWAYS_INLINE void *
270 ph_any(ph_t *ph, size_t offset) {
271 	if (ph->root == NULL) {
272 		return NULL;
273 	}
274 	void *aux = phn_next_get(ph->root, offset);
275 	if (aux != NULL) {
276 		return aux;
277 	}
278 	return ph->root;
279 }
280 
281 /* Returns true if we should stop trying to merge. */
282 JEMALLOC_ALWAYS_INLINE bool
283 ph_try_aux_merge_pair(ph_t *ph, size_t offset, ph_cmp_t cmp) {
284 	assert(ph->root != NULL);
285 	void *phn0 = phn_next_get(ph->root, offset);
286 	if (phn0 == NULL) {
287 		return true;
288 	}
289 	void *phn1 = phn_next_get(phn0, offset);
290 	if (phn1 == NULL) {
291 		return true;
292 	}
293 	void *next_phn1 = phn_next_get(phn1, offset);
294 	phn_next_set(phn0, NULL, offset);
295 	phn_prev_set(phn0, NULL, offset);
296 	phn_next_set(phn1, NULL, offset);
297 	phn_prev_set(phn1, NULL, offset);
298 	phn0 = phn_merge(phn0, phn1, offset, cmp);
299 	phn_next_set(phn0, next_phn1, offset);
300 	if (next_phn1 != NULL) {
301 		phn_prev_set(next_phn1, phn0, offset);
302 	}
303 	phn_next_set(ph->root, phn0, offset);
304 	phn_prev_set(phn0, ph->root, offset);
305 	return next_phn1 == NULL;
306 }
307 
308 JEMALLOC_ALWAYS_INLINE void
309 ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
310 	phn_link_init(phn, offset);
311 
312 	/*
313 	 * Treat the root as an aux list during insertion, and lazily merge
314 	 * during a_prefix##remove_first().  For elements that are inserted,
315 	 * then removed via a_prefix##remove() before the aux list is ever
316 	 * processed, this makes insert/remove constant-time, whereas eager
317 	 * merging would make insert O(log n).
318 	 */
319 	if (ph->root == NULL) {
320 		ph->root = phn;
321 	} else {
322 		/*
323 		 * As a special case, check to see if we can replace the root.
324 		 * This is practically common in some important cases, and lets
325 		 * us defer some insertions (hopefully, until the point where
326 		 * some of the items in the aux list have been removed, savings
327 		 * us from linking them at all).
328 		 */
329 		if (cmp(phn, ph->root) < 0) {
330 			phn_lchild_set(phn, ph->root, offset);
331 			phn_prev_set(ph->root, phn, offset);
332 			ph->root = phn;
333 			ph->auxcount = 0;
334 			return;
335 		}
336 		ph->auxcount++;
337 		phn_next_set(phn, phn_next_get(ph->root, offset), offset);
338 		if (phn_next_get(ph->root, offset) != NULL) {
339 			phn_prev_set(phn_next_get(ph->root, offset), phn,
340 			    offset);
341 		}
342 		phn_prev_set(phn, ph->root, offset);
343 		phn_next_set(ph->root, phn, offset);
344 	}
345 	if (ph->auxcount > 1) {
346 		unsigned nmerges = ffs_zu(ph->auxcount - 1);
347 		bool done = false;
348 		for (unsigned i = 0; i < nmerges && !done; i++) {
349 			done = ph_try_aux_merge_pair(ph, offset, cmp);
350 		}
351 	}
352 }
353 
354 JEMALLOC_ALWAYS_INLINE void *
355 ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
356 	void *ret;
357 
358 	if (ph->root == NULL) {
359 		return NULL;
360 	}
361 	ph_merge_aux(ph, offset, cmp);
362 	ret = ph->root;
363 	ph->root = ph_merge_children(ph->root, offset, cmp);
364 
365 	return ret;
366 
367 }
368 
369 JEMALLOC_ALWAYS_INLINE void
370 ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
371 	void *replace;
372 	void *parent;
373 
374 	if (ph->root == phn) {
375 		/*
376 		 * We can delete from aux list without merging it, but we need
377 		 * to merge if we are dealing with the root node and it has
378 		 * children.
379 		 */
380 		if (phn_lchild_get(phn, offset) == NULL) {
381 			ph->root = phn_next_get(phn, offset);
382 			if (ph->root != NULL) {
383 				phn_prev_set(ph->root, NULL, offset);
384 			}
385 			return;
386 		}
387 		ph_merge_aux(ph, offset, cmp);
388 		if (ph->root == phn) {
389 			ph->root = ph_merge_children(ph->root, offset, cmp);
390 			return;
391 		}
392 	}
393 
394 	/* Get parent (if phn is leftmost child) before mutating. */
395 	if ((parent = phn_prev_get(phn, offset)) != NULL) {
396 		if (phn_lchild_get(parent, offset) != phn) {
397 			parent = NULL;
398 		}
399 	}
400 	/* Find a possible replacement node, and link to parent. */
401 	replace = ph_merge_children(phn, offset, cmp);
402 	/* Set next/prev for sibling linked list. */
403 	if (replace != NULL) {
404 		if (parent != NULL) {
405 			phn_prev_set(replace, parent, offset);
406 			phn_lchild_set(parent, replace, offset);
407 		} else {
408 			phn_prev_set(replace, phn_prev_get(phn, offset),
409 			    offset);
410 			if (phn_prev_get(phn, offset) != NULL) {
411 				phn_next_set(phn_prev_get(phn, offset), replace,
412 				    offset);
413 			}
414 		}
415 		phn_next_set(replace, phn_next_get(phn, offset), offset);
416 		if (phn_next_get(phn, offset) != NULL) {
417 			phn_prev_set(phn_next_get(phn, offset), replace,
418 			    offset);
419 		}
420 	} else {
421 		if (parent != NULL) {
422 			void *next = phn_next_get(phn, offset);
423 			phn_lchild_set(parent, next, offset);
424 			if (next != NULL) {
425 				phn_prev_set(next, parent, offset);
426 			}
427 		} else {
428 			assert(phn_prev_get(phn, offset) != NULL);
429 			phn_next_set(
430 			    phn_prev_get(phn, offset),
431 			    phn_next_get(phn, offset), offset);
432 		}
433 		if (phn_next_get(phn, offset) != NULL) {
434 			phn_prev_set(
435 			    phn_next_get(phn, offset),
436 			    phn_prev_get(phn, offset), offset);
437 		}
438 	}
439 }
440 
441 #define ph_structs(a_prefix, a_type)					\
442 typedef struct {							\
443 	phn_link_t link;						\
444 } a_prefix##_link_t;							\
445 									\
446 typedef struct {							\
447 	ph_t ph;							\
448 } a_prefix##_t
449 
450 /*
451  * The ph_proto() macro generates function prototypes that correspond to the
452  * functions generated by an equivalently parameterized call to ph_gen().
453  */
454 #define ph_proto(a_attr, a_prefix, a_type)				\
455 									\
456 a_attr void a_prefix##_new(a_prefix##_t *ph);				\
457 a_attr bool a_prefix##_empty(a_prefix##_t *ph);				\
458 a_attr a_type *a_prefix##_first(a_prefix##_t *ph);			\
459 a_attr a_type *a_prefix##_any(a_prefix##_t *ph);			\
460 a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn);		\
461 a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);		\
462 a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);		\
463 a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph)
464 
465 /* The ph_gen() macro generates a type-specific pairing heap implementation. */
466 #define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)		\
467 JEMALLOC_ALWAYS_INLINE int						\
468 a_prefix##_ph_cmp(void *a, void *b) {					\
469 	return a_cmp((a_type *)a, (a_type *)b);				\
470 }									\
471 									\
472 a_attr void								\
473 a_prefix##_new(a_prefix##_t *ph) {					\
474 	ph_new(&ph->ph);						\
475 }									\
476 									\
477 a_attr bool								\
478 a_prefix##_empty(a_prefix##_t *ph) {					\
479 	return ph_empty(&ph->ph);					\
480 }									\
481 									\
482 a_attr a_type *								\
483 a_prefix##_first(a_prefix##_t *ph) {					\
484 	return ph_first(&ph->ph, offsetof(a_type, a_field),		\
485 	    &a_prefix##_ph_cmp);					\
486 }									\
487 									\
488 a_attr a_type *								\
489 a_prefix##_any(a_prefix##_t *ph) {					\
490 	return ph_any(&ph->ph, offsetof(a_type, a_field));		\
491 }									\
492 									\
493 a_attr void								\
494 a_prefix##_insert(a_prefix##_t *ph, a_type *phn) {			\
495 	ph_insert(&ph->ph, phn, offsetof(a_type, a_field),		\
496 	    a_prefix##_ph_cmp);						\
497 }									\
498 									\
499 a_attr a_type *								\
500 a_prefix##_remove_first(a_prefix##_t *ph) {				\
501 	return ph_remove_first(&ph->ph, offsetof(a_type, a_field),	\
502 	    a_prefix##_ph_cmp);						\
503 }									\
504 									\
505 a_attr void								\
506 a_prefix##_remove(a_prefix##_t *ph, a_type *phn) {			\
507 	ph_remove(&ph->ph, phn, offsetof(a_type, a_field),		\
508 	    a_prefix##_ph_cmp);						\
509 }									\
510 									\
511 a_attr a_type *								\
512 a_prefix##_remove_any(a_prefix##_t *ph) {				\
513 	a_type *ret = a_prefix##_any(ph);				\
514 	if (ret != NULL) {						\
515 		a_prefix##_remove(ph, ret);				\
516 	}								\
517 	return ret;							\
518 }
519 
520 #endif /* JEMALLOC_INTERNAL_PH_H */
521