1 #ifndef JEMALLOC_INTERNAL_PH_H 2 #define JEMALLOC_INTERNAL_PH_H 3 4 /* 5 * A Pairing Heap implementation. 6 * 7 * "The Pairing Heap: A New Form of Self-Adjusting Heap" 8 * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf 9 * 10 * With auxiliary twopass list, described in a follow on paper. 11 * 12 * "Pairing Heaps: Experiments and Analysis" 13 * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf 14 * 15 ******************************************************************************* 16 * 17 * We include a non-obvious optimization: 18 * - First, we introduce a new pop-and-link operation; pop the two most 19 * recently-inserted items off the aux-list, link them, and push the resulting 20 * heap. 21 * - We maintain a count of the number of insertions since the last time we 22 * merged the aux-list (i.e. via first() or remove_first()). After N inserts, 23 * we do ffs(N) pop-and-link operations. 24 * 25 * One way to think of this is that we're progressively building up a tree in 26 * the aux-list, rather than a linked-list (think of the series of merges that 27 * will be performed as the aux-count grows). 28 * 29 * There's a couple reasons we benefit from this: 30 * - Ordinarily, after N insertions, the aux-list is of size N. With our 31 * strategy, it's of size O(log(N)). So we decrease the worst-case time of 32 * first() calls, and reduce the average cost of remove_min calls. Since 33 * these almost always occur while holding a lock, we practically reduce the 34 * frequency of unusually long hold times. 35 * - This moves the bulk of the work of merging the aux-list onto the threads 36 * that are inserting into the heap. In some common scenarios, insertions 37 * happen in bulk, from a single thread (think tcache flushing; we potentially 38 * move many slabs from slabs_full to slabs_nonfull). All the nodes in this 39 * case are in the inserting threads cache, and linking them is very cheap 40 * (cache misses dominate linking cost). Without this optimization, linking 41 * happens on the next call to remove_first. Since that remove_first call 42 * likely happens on a different thread (or at least, after the cache has 43 * gotten cold if done on the same thread), deferring linking trades cheap 44 * link operations now for expensive ones later. 45 * 46 * The ffs trick keeps amortized insert cost at constant time. Similar 47 * strategies based on periodically sorting the list after a batch of operations 48 * perform worse than this in practice, even with various fancy tricks; they 49 * all took amortized complexity of an insert from O(1) to O(log(n)). 50 */ 51 52 typedef int (*ph_cmp_t)(void *, void *); 53 54 /* Node structure. */ 55 typedef struct phn_link_s phn_link_t; 56 struct phn_link_s { 57 void *prev; 58 void *next; 59 void *lchild; 60 }; 61 62 typedef struct ph_s ph_t; 63 struct ph_s { 64 void *root; 65 /* 66 * Inserts done since the last aux-list merge. This is not necessarily 67 * the size of the aux-list, since it's possible that removals have 68 * happened since, and we don't track whether or not those removals are 69 * from the aux list. 70 */ 71 size_t auxcount; 72 }; 73 74 JEMALLOC_ALWAYS_INLINE phn_link_t * 75 phn_link_get(void *phn, size_t offset) { 76 return (phn_link_t *)(((uintptr_t)phn) + offset); 77 } 78 79 JEMALLOC_ALWAYS_INLINE void 80 phn_link_init(void *phn, size_t offset) { 81 phn_link_get(phn, offset)->prev = NULL; 82 phn_link_get(phn, offset)->next = NULL; 83 phn_link_get(phn, offset)->lchild = NULL; 84 } 85 86 /* Internal utility helpers. */ 87 JEMALLOC_ALWAYS_INLINE void * 88 phn_lchild_get(void *phn, size_t offset) { 89 return phn_link_get(phn, offset)->lchild; 90 } 91 92 JEMALLOC_ALWAYS_INLINE void 93 phn_lchild_set(void *phn, void *lchild, size_t offset) { 94 phn_link_get(phn, offset)->lchild = lchild; 95 } 96 97 JEMALLOC_ALWAYS_INLINE void * 98 phn_next_get(void *phn, size_t offset) { 99 return phn_link_get(phn, offset)->next; 100 } 101 102 JEMALLOC_ALWAYS_INLINE void 103 phn_next_set(void *phn, void *next, size_t offset) { 104 phn_link_get(phn, offset)->next = next; 105 } 106 107 JEMALLOC_ALWAYS_INLINE void * 108 phn_prev_get(void *phn, size_t offset) { 109 return phn_link_get(phn, offset)->prev; 110 } 111 112 JEMALLOC_ALWAYS_INLINE void 113 phn_prev_set(void *phn, void *prev, size_t offset) { 114 phn_link_get(phn, offset)->prev = prev; 115 } 116 117 JEMALLOC_ALWAYS_INLINE void 118 phn_merge_ordered(void *phn0, void *phn1, size_t offset, 119 ph_cmp_t cmp) { 120 void *phn0child; 121 122 assert(phn0 != NULL); 123 assert(phn1 != NULL); 124 assert(cmp(phn0, phn1) <= 0); 125 126 phn_prev_set(phn1, phn0, offset); 127 phn0child = phn_lchild_get(phn0, offset); 128 phn_next_set(phn1, phn0child, offset); 129 if (phn0child != NULL) { 130 phn_prev_set(phn0child, phn1, offset); 131 } 132 phn_lchild_set(phn0, phn1, offset); 133 } 134 135 JEMALLOC_ALWAYS_INLINE void * 136 phn_merge(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) { 137 void *result; 138 if (phn0 == NULL) { 139 result = phn1; 140 } else if (phn1 == NULL) { 141 result = phn0; 142 } else if (cmp(phn0, phn1) < 0) { 143 phn_merge_ordered(phn0, phn1, offset, cmp); 144 result = phn0; 145 } else { 146 phn_merge_ordered(phn1, phn0, offset, cmp); 147 result = phn1; 148 } 149 return result; 150 } 151 152 JEMALLOC_ALWAYS_INLINE void * 153 phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) { 154 void *head = NULL; 155 void *tail = NULL; 156 void *phn0 = phn; 157 void *phn1 = phn_next_get(phn0, offset); 158 159 /* 160 * Multipass merge, wherein the first two elements of a FIFO 161 * are repeatedly merged, and each result is appended to the 162 * singly linked FIFO, until the FIFO contains only a single 163 * element. We start with a sibling list but no reference to 164 * its tail, so we do a single pass over the sibling list to 165 * populate the FIFO. 166 */ 167 if (phn1 != NULL) { 168 void *phnrest = phn_next_get(phn1, offset); 169 if (phnrest != NULL) { 170 phn_prev_set(phnrest, NULL, offset); 171 } 172 phn_prev_set(phn0, NULL, offset); 173 phn_next_set(phn0, NULL, offset); 174 phn_prev_set(phn1, NULL, offset); 175 phn_next_set(phn1, NULL, offset); 176 phn0 = phn_merge(phn0, phn1, offset, cmp); 177 head = tail = phn0; 178 phn0 = phnrest; 179 while (phn0 != NULL) { 180 phn1 = phn_next_get(phn0, offset); 181 if (phn1 != NULL) { 182 phnrest = phn_next_get(phn1, offset); 183 if (phnrest != NULL) { 184 phn_prev_set(phnrest, NULL, offset); 185 } 186 phn_prev_set(phn0, NULL, offset); 187 phn_next_set(phn0, NULL, offset); 188 phn_prev_set(phn1, NULL, offset); 189 phn_next_set(phn1, NULL, offset); 190 phn0 = phn_merge(phn0, phn1, offset, cmp); 191 phn_next_set(tail, phn0, offset); 192 tail = phn0; 193 phn0 = phnrest; 194 } else { 195 phn_next_set(tail, phn0, offset); 196 tail = phn0; 197 phn0 = NULL; 198 } 199 } 200 phn0 = head; 201 phn1 = phn_next_get(phn0, offset); 202 if (phn1 != NULL) { 203 while (true) { 204 head = phn_next_get(phn1, offset); 205 assert(phn_prev_get(phn0, offset) == NULL); 206 phn_next_set(phn0, NULL, offset); 207 assert(phn_prev_get(phn1, offset) == NULL); 208 phn_next_set(phn1, NULL, offset); 209 phn0 = phn_merge(phn0, phn1, offset, cmp); 210 if (head == NULL) { 211 break; 212 } 213 phn_next_set(tail, phn0, offset); 214 tail = phn0; 215 phn0 = head; 216 phn1 = phn_next_get(phn0, offset); 217 } 218 } 219 } 220 return phn0; 221 } 222 223 JEMALLOC_ALWAYS_INLINE void 224 ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) { 225 ph->auxcount = 0; 226 void *phn = phn_next_get(ph->root, offset); 227 if (phn != NULL) { 228 phn_prev_set(ph->root, NULL, offset); 229 phn_next_set(ph->root, NULL, offset); 230 phn_prev_set(phn, NULL, offset); 231 phn = phn_merge_siblings(phn, offset, cmp); 232 assert(phn_next_get(phn, offset) == NULL); 233 ph->root = phn_merge(ph->root, phn, offset, cmp); 234 } 235 } 236 237 JEMALLOC_ALWAYS_INLINE void * 238 ph_merge_children(void *phn, size_t offset, ph_cmp_t cmp) { 239 void *result; 240 void *lchild = phn_lchild_get(phn, offset); 241 if (lchild == NULL) { 242 result = NULL; 243 } else { 244 result = phn_merge_siblings(lchild, offset, cmp); 245 } 246 return result; 247 } 248 249 JEMALLOC_ALWAYS_INLINE void 250 ph_new(ph_t *ph) { 251 ph->root = NULL; 252 ph->auxcount = 0; 253 } 254 255 JEMALLOC_ALWAYS_INLINE bool 256 ph_empty(ph_t *ph) { 257 return ph->root == NULL; 258 } 259 260 JEMALLOC_ALWAYS_INLINE void * 261 ph_first(ph_t *ph, size_t offset, ph_cmp_t cmp) { 262 if (ph->root == NULL) { 263 return NULL; 264 } 265 ph_merge_aux(ph, offset, cmp); 266 return ph->root; 267 } 268 269 JEMALLOC_ALWAYS_INLINE void * 270 ph_any(ph_t *ph, size_t offset) { 271 if (ph->root == NULL) { 272 return NULL; 273 } 274 void *aux = phn_next_get(ph->root, offset); 275 if (aux != NULL) { 276 return aux; 277 } 278 return ph->root; 279 } 280 281 /* Returns true if we should stop trying to merge. */ 282 JEMALLOC_ALWAYS_INLINE bool 283 ph_try_aux_merge_pair(ph_t *ph, size_t offset, ph_cmp_t cmp) { 284 assert(ph->root != NULL); 285 void *phn0 = phn_next_get(ph->root, offset); 286 if (phn0 == NULL) { 287 return true; 288 } 289 void *phn1 = phn_next_get(phn0, offset); 290 if (phn1 == NULL) { 291 return true; 292 } 293 void *next_phn1 = phn_next_get(phn1, offset); 294 phn_next_set(phn0, NULL, offset); 295 phn_prev_set(phn0, NULL, offset); 296 phn_next_set(phn1, NULL, offset); 297 phn_prev_set(phn1, NULL, offset); 298 phn0 = phn_merge(phn0, phn1, offset, cmp); 299 phn_next_set(phn0, next_phn1, offset); 300 if (next_phn1 != NULL) { 301 phn_prev_set(next_phn1, phn0, offset); 302 } 303 phn_next_set(ph->root, phn0, offset); 304 phn_prev_set(phn0, ph->root, offset); 305 return next_phn1 == NULL; 306 } 307 308 JEMALLOC_ALWAYS_INLINE void 309 ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) { 310 phn_link_init(phn, offset); 311 312 /* 313 * Treat the root as an aux list during insertion, and lazily merge 314 * during a_prefix##remove_first(). For elements that are inserted, 315 * then removed via a_prefix##remove() before the aux list is ever 316 * processed, this makes insert/remove constant-time, whereas eager 317 * merging would make insert O(log n). 318 */ 319 if (ph->root == NULL) { 320 ph->root = phn; 321 } else { 322 /* 323 * As a special case, check to see if we can replace the root. 324 * This is practically common in some important cases, and lets 325 * us defer some insertions (hopefully, until the point where 326 * some of the items in the aux list have been removed, savings 327 * us from linking them at all). 328 */ 329 if (cmp(phn, ph->root) < 0) { 330 phn_lchild_set(phn, ph->root, offset); 331 phn_prev_set(ph->root, phn, offset); 332 ph->root = phn; 333 ph->auxcount = 0; 334 return; 335 } 336 ph->auxcount++; 337 phn_next_set(phn, phn_next_get(ph->root, offset), offset); 338 if (phn_next_get(ph->root, offset) != NULL) { 339 phn_prev_set(phn_next_get(ph->root, offset), phn, 340 offset); 341 } 342 phn_prev_set(phn, ph->root, offset); 343 phn_next_set(ph->root, phn, offset); 344 } 345 if (ph->auxcount > 1) { 346 unsigned nmerges = ffs_zu(ph->auxcount - 1); 347 bool done = false; 348 for (unsigned i = 0; i < nmerges && !done; i++) { 349 done = ph_try_aux_merge_pair(ph, offset, cmp); 350 } 351 } 352 } 353 354 JEMALLOC_ALWAYS_INLINE void * 355 ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) { 356 void *ret; 357 358 if (ph->root == NULL) { 359 return NULL; 360 } 361 ph_merge_aux(ph, offset, cmp); 362 ret = ph->root; 363 ph->root = ph_merge_children(ph->root, offset, cmp); 364 365 return ret; 366 367 } 368 369 JEMALLOC_ALWAYS_INLINE void 370 ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) { 371 void *replace; 372 void *parent; 373 374 if (ph->root == phn) { 375 /* 376 * We can delete from aux list without merging it, but we need 377 * to merge if we are dealing with the root node and it has 378 * children. 379 */ 380 if (phn_lchild_get(phn, offset) == NULL) { 381 ph->root = phn_next_get(phn, offset); 382 if (ph->root != NULL) { 383 phn_prev_set(ph->root, NULL, offset); 384 } 385 return; 386 } 387 ph_merge_aux(ph, offset, cmp); 388 if (ph->root == phn) { 389 ph->root = ph_merge_children(ph->root, offset, cmp); 390 return; 391 } 392 } 393 394 /* Get parent (if phn is leftmost child) before mutating. */ 395 if ((parent = phn_prev_get(phn, offset)) != NULL) { 396 if (phn_lchild_get(parent, offset) != phn) { 397 parent = NULL; 398 } 399 } 400 /* Find a possible replacement node, and link to parent. */ 401 replace = ph_merge_children(phn, offset, cmp); 402 /* Set next/prev for sibling linked list. */ 403 if (replace != NULL) { 404 if (parent != NULL) { 405 phn_prev_set(replace, parent, offset); 406 phn_lchild_set(parent, replace, offset); 407 } else { 408 phn_prev_set(replace, phn_prev_get(phn, offset), 409 offset); 410 if (phn_prev_get(phn, offset) != NULL) { 411 phn_next_set(phn_prev_get(phn, offset), replace, 412 offset); 413 } 414 } 415 phn_next_set(replace, phn_next_get(phn, offset), offset); 416 if (phn_next_get(phn, offset) != NULL) { 417 phn_prev_set(phn_next_get(phn, offset), replace, 418 offset); 419 } 420 } else { 421 if (parent != NULL) { 422 void *next = phn_next_get(phn, offset); 423 phn_lchild_set(parent, next, offset); 424 if (next != NULL) { 425 phn_prev_set(next, parent, offset); 426 } 427 } else { 428 assert(phn_prev_get(phn, offset) != NULL); 429 phn_next_set( 430 phn_prev_get(phn, offset), 431 phn_next_get(phn, offset), offset); 432 } 433 if (phn_next_get(phn, offset) != NULL) { 434 phn_prev_set( 435 phn_next_get(phn, offset), 436 phn_prev_get(phn, offset), offset); 437 } 438 } 439 } 440 441 #define ph_structs(a_prefix, a_type) \ 442 typedef struct { \ 443 phn_link_t link; \ 444 } a_prefix##_link_t; \ 445 \ 446 typedef struct { \ 447 ph_t ph; \ 448 } a_prefix##_t 449 450 /* 451 * The ph_proto() macro generates function prototypes that correspond to the 452 * functions generated by an equivalently parameterized call to ph_gen(). 453 */ 454 #define ph_proto(a_attr, a_prefix, a_type) \ 455 \ 456 a_attr void a_prefix##_new(a_prefix##_t *ph); \ 457 a_attr bool a_prefix##_empty(a_prefix##_t *ph); \ 458 a_attr a_type *a_prefix##_first(a_prefix##_t *ph); \ 459 a_attr a_type *a_prefix##_any(a_prefix##_t *ph); \ 460 a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn); \ 461 a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph); \ 462 a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn); \ 463 a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph) 464 465 /* The ph_gen() macro generates a type-specific pairing heap implementation. */ 466 #define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp) \ 467 JEMALLOC_ALWAYS_INLINE int \ 468 a_prefix##_ph_cmp(void *a, void *b) { \ 469 return a_cmp((a_type *)a, (a_type *)b); \ 470 } \ 471 \ 472 a_attr void \ 473 a_prefix##_new(a_prefix##_t *ph) { \ 474 ph_new(&ph->ph); \ 475 } \ 476 \ 477 a_attr bool \ 478 a_prefix##_empty(a_prefix##_t *ph) { \ 479 return ph_empty(&ph->ph); \ 480 } \ 481 \ 482 a_attr a_type * \ 483 a_prefix##_first(a_prefix##_t *ph) { \ 484 return ph_first(&ph->ph, offsetof(a_type, a_field), \ 485 &a_prefix##_ph_cmp); \ 486 } \ 487 \ 488 a_attr a_type * \ 489 a_prefix##_any(a_prefix##_t *ph) { \ 490 return ph_any(&ph->ph, offsetof(a_type, a_field)); \ 491 } \ 492 \ 493 a_attr void \ 494 a_prefix##_insert(a_prefix##_t *ph, a_type *phn) { \ 495 ph_insert(&ph->ph, phn, offsetof(a_type, a_field), \ 496 a_prefix##_ph_cmp); \ 497 } \ 498 \ 499 a_attr a_type * \ 500 a_prefix##_remove_first(a_prefix##_t *ph) { \ 501 return ph_remove_first(&ph->ph, offsetof(a_type, a_field), \ 502 a_prefix##_ph_cmp); \ 503 } \ 504 \ 505 a_attr void \ 506 a_prefix##_remove(a_prefix##_t *ph, a_type *phn) { \ 507 ph_remove(&ph->ph, phn, offsetof(a_type, a_field), \ 508 a_prefix##_ph_cmp); \ 509 } \ 510 \ 511 a_attr a_type * \ 512 a_prefix##_remove_any(a_prefix##_t *ph) { \ 513 a_type *ret = a_prefix##_any(ph); \ 514 if (ret != NULL) { \ 515 a_prefix##_remove(ph, ret); \ 516 } \ 517 return ret; \ 518 } 519 520 #endif /* JEMALLOC_INTERNAL_PH_H */ 521