1 /* $OpenBSD: uvm_addr.c,v 1.22 2016/09/16 02:50:54 dlg Exp $ */ 2 3 /* 4 * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* #define DEBUG */ 20 21 #include <sys/param.h> 22 #include <uvm/uvm.h> 23 #include <uvm/uvm_addr.h> 24 #include <sys/pool.h> 25 26 /* Max gap between hint allocations. */ 27 #define UADDR_HINT_MAXGAP (4 * PAGE_SIZE) 28 /* Number of pivots in pivot allocator. */ 29 #define NUM_PIVOTS 16 30 /* 31 * Max number (inclusive) of pages the pivot allocator 32 * will place between allocations. 33 * 34 * The uaddr_pivot_random() function attempts to bias towards 35 * small space between allocations, so putting a large number here is fine. 36 */ 37 #define PIVOT_RND 8 38 /* 39 * Number of allocations that a pivot can supply before expiring. 40 * When a pivot expires, a new pivot has to be found. 41 * 42 * Must be at least 1. 43 */ 44 #define PIVOT_EXPIRE 1024 45 46 47 /* Pool with uvm_addr_state structures. */ 48 struct pool uaddr_pool; 49 struct pool uaddr_hint_pool; 50 struct pool uaddr_bestfit_pool; 51 struct pool uaddr_pivot_pool; 52 struct pool uaddr_rnd_pool; 53 54 /* uvm_addr state for hint based selector. */ 55 struct uaddr_hint_state { 56 struct uvm_addr_state uaddr; 57 vsize_t max_dist; 58 }; 59 60 /* uvm_addr state for bestfit selector. */ 61 struct uaddr_bestfit_state { 62 struct uvm_addr_state ubf_uaddr; 63 struct uaddr_free_rbtree ubf_free; 64 }; 65 66 /* uvm_addr state for rnd selector. */ 67 struct uaddr_rnd_state { 68 struct uvm_addr_state ur_uaddr; 69 #if 0 70 TAILQ_HEAD(, vm_map_entry) ur_free; 71 #endif 72 }; 73 74 /* Definition of a pivot in pivot selector. */ 75 struct uaddr_pivot { 76 vaddr_t addr; /* End of prev. allocation. */ 77 int expire;/* Best before date. */ 78 int dir; /* Direction. */ 79 struct vm_map_entry *entry; /* Will contain next alloc. */ 80 }; 81 /* uvm_addr state for pivot selector. */ 82 struct uaddr_pivot_state { 83 struct uvm_addr_state up_uaddr; 84 85 /* Free space tree, for fast pivot selection. */ 86 struct uaddr_free_rbtree up_free; 87 88 /* List of pivots. The pointers point to after the last allocation. */ 89 struct uaddr_pivot up_pivots[NUM_PIVOTS]; 90 }; 91 92 /* 93 * Free space comparison. 94 * Compares smaller free-space before larger free-space. 95 */ 96 static inline int 97 uvm_mapent_fspace_cmp(const struct vm_map_entry *e1, 98 const struct vm_map_entry *e2) 99 { 100 if (e1->fspace != e2->fspace) 101 return (e1->fspace < e2->fspace ? -1 : 1); 102 return (e1->start < e2->start ? -1 : e1->start > e2->start); 103 } 104 105 /* Forward declaration (see below). */ 106 extern const struct uvm_addr_functions uaddr_kernel_functions; 107 struct uvm_addr_state uaddr_kbootstrap; 108 109 /* Support functions. */ 110 #ifndef SMALL_KERNEL 111 struct vm_map_entry *uvm_addr_entrybyspace(struct uaddr_free_rbtree*, 112 vsize_t); 113 #endif /* !SMALL_KERNEL */ 114 void uaddr_kinsert(struct vm_map *, 115 struct uvm_addr_state *, struct vm_map_entry *); 116 void uaddr_kremove(struct vm_map *, 117 struct uvm_addr_state *, struct vm_map_entry *); 118 void uaddr_kbootstrapdestroy(struct uvm_addr_state *); 119 120 void uaddr_destroy(struct uvm_addr_state *); 121 void uaddr_hint_destroy(struct uvm_addr_state *); 122 void uaddr_kbootstrap_destroy(struct uvm_addr_state *); 123 void uaddr_rnd_destroy(struct uvm_addr_state *); 124 void uaddr_bestfit_destroy(struct uvm_addr_state *); 125 void uaddr_pivot_destroy(struct uvm_addr_state *); 126 127 #if 0 128 int uaddr_lin_select(struct vm_map *, 129 struct uvm_addr_state *, struct vm_map_entry **, 130 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 131 vaddr_t); 132 #endif 133 int uaddr_kbootstrap_select(struct vm_map *, 134 struct uvm_addr_state *, struct vm_map_entry **, 135 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 136 vaddr_t); 137 int uaddr_rnd_select(struct vm_map *, 138 struct uvm_addr_state *, struct vm_map_entry **, 139 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 140 vaddr_t); 141 int uaddr_hint_select(struct vm_map *, 142 struct uvm_addr_state*, struct vm_map_entry **, 143 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 144 vaddr_t); 145 int uaddr_bestfit_select(struct vm_map *, 146 struct uvm_addr_state*, struct vm_map_entry **, 147 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 148 vaddr_t); 149 #ifndef SMALL_KERNEL 150 int uaddr_pivot_select(struct vm_map *, 151 struct uvm_addr_state *, struct vm_map_entry **, 152 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 153 vaddr_t); 154 int uaddr_stack_brk_select(struct vm_map *, 155 struct uvm_addr_state *, struct vm_map_entry **, 156 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 157 vaddr_t); 158 #endif /* !SMALL_KERNEL */ 159 160 void uaddr_rnd_insert(struct vm_map *, 161 struct uvm_addr_state *, struct vm_map_entry *); 162 void uaddr_rnd_remove(struct vm_map *, 163 struct uvm_addr_state *, struct vm_map_entry *); 164 void uaddr_bestfit_insert(struct vm_map *, 165 struct uvm_addr_state *, struct vm_map_entry *); 166 void uaddr_bestfit_remove(struct vm_map *, 167 struct uvm_addr_state *, struct vm_map_entry *); 168 void uaddr_pivot_insert(struct vm_map *, 169 struct uvm_addr_state *, struct vm_map_entry *); 170 void uaddr_pivot_remove(struct vm_map *, 171 struct uvm_addr_state *, struct vm_map_entry *); 172 173 #ifndef SMALL_KERNEL 174 vsize_t uaddr_pivot_random(void); 175 int uaddr_pivot_newpivot(struct vm_map *, 176 struct uaddr_pivot_state *, struct uaddr_pivot *, 177 struct vm_map_entry **, vaddr_t *, 178 vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t); 179 #endif /* !SMALL_KERNEL */ 180 181 #if defined(DEBUG) || defined(DDB) 182 void uaddr_pivot_print(struct uvm_addr_state *, boolean_t, 183 int (*)(const char *, ...)); 184 #if 0 185 void uaddr_rnd_print(struct uvm_addr_state *, boolean_t, 186 int (*)(const char *, ...)); 187 #endif 188 #endif /* DEBUG || DDB */ 189 190 191 #ifndef SMALL_KERNEL 192 /* 193 * Find smallest entry in tree that will fit sz bytes. 194 */ 195 struct vm_map_entry * 196 uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz) 197 { 198 struct vm_map_entry *tmp, *res; 199 200 tmp = RBT_ROOT(uaddr_free_rbtree, free); 201 res = NULL; 202 while (tmp) { 203 if (tmp->fspace >= sz) { 204 res = tmp; 205 tmp = RBT_LEFT(uaddr_free_rbtree, tmp); 206 } else if (tmp->fspace < sz) 207 tmp = RBT_RIGHT(uaddr_free_rbtree, tmp); 208 } 209 return res; 210 } 211 #endif /* !SMALL_KERNEL */ 212 213 static __inline vaddr_t 214 uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset) 215 { 216 vaddr_t adjusted; 217 218 KASSERT(offset < align || (align == 0 && offset == 0)); 219 KASSERT((align & (align - 1)) == 0); 220 KASSERT((offset & PAGE_MASK) == 0); 221 222 align = MAX(align, PAGE_SIZE); 223 adjusted = addr & ~(align - 1); 224 adjusted += offset; 225 return (adjusted < addr ? adjusted + align : adjusted); 226 } 227 228 static __inline vaddr_t 229 uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset) 230 { 231 vaddr_t adjusted; 232 233 KASSERT(offset < align || (align == 0 && offset == 0)); 234 KASSERT((align & (align - 1)) == 0); 235 KASSERT((offset & PAGE_MASK) == 0); 236 237 align = MAX(align, PAGE_SIZE); 238 adjusted = addr & ~(align - 1); 239 adjusted += offset; 240 return (adjusted > addr ? adjusted - align : adjusted); 241 } 242 243 /* 244 * Try to fit the requested space into the entry. 245 */ 246 int 247 uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result, 248 vaddr_t low_addr, vaddr_t high_addr, vsize_t sz, 249 vaddr_t align, vaddr_t offset, 250 vsize_t before_gap, vsize_t after_gap) 251 { 252 vaddr_t tmp; 253 vsize_t fspace; 254 255 if (low_addr > high_addr) 256 return ENOMEM; 257 fspace = high_addr - low_addr; 258 if (fspace < before_gap + after_gap) 259 return ENOMEM; 260 if (fspace - before_gap - after_gap < sz) 261 return ENOMEM; 262 263 /* Calculate lowest address. */ 264 low_addr += before_gap; 265 low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset); 266 if (low_addr < tmp) /* Overflow during alignment. */ 267 return ENOMEM; 268 if (high_addr - after_gap - sz < low_addr) 269 return ENOMEM; 270 271 /* Calculate highest address. */ 272 high_addr -= after_gap + sz; 273 high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset); 274 if (high_addr > tmp) /* Overflow during alignment. */ 275 return ENOMEM; 276 if (low_addr > high_addr) 277 return ENOMEM; 278 279 *min_result = low_addr; 280 *max_result = high_addr; 281 return 0; 282 } 283 284 285 /* 286 * Initialize uvm_addr. 287 */ 288 void 289 uvm_addr_init(void) 290 { 291 pool_init(&uaddr_pool, sizeof(struct uvm_addr_state), 0, 292 IPL_VM, PR_WAITOK, "uaddr", NULL); 293 pool_init(&uaddr_hint_pool, sizeof(struct uaddr_hint_state), 0, 294 IPL_VM, PR_WAITOK, "uaddrhint", NULL); 295 pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state), 0, 296 IPL_VM, PR_WAITOK, "uaddrbest", NULL); 297 pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state), 0, 298 IPL_VM, PR_WAITOK, "uaddrpivot", NULL); 299 pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state), 0, 300 IPL_VM, PR_WAITOK, "uaddrrnd", NULL); 301 302 uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE; 303 uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE; 304 uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions; 305 } 306 307 /* 308 * Invoke destructor function of uaddr. 309 */ 310 void 311 uvm_addr_destroy(struct uvm_addr_state *uaddr) 312 { 313 if (uaddr) 314 (*uaddr->uaddr_functions->uaddr_destroy)(uaddr); 315 } 316 317 /* 318 * Move address forward to satisfy align, offset. 319 */ 320 vaddr_t 321 uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset) 322 { 323 vaddr_t result = (addr & ~(align - 1)) + offset; 324 if (result < addr) 325 result += align; 326 return result; 327 } 328 329 /* 330 * Move address backwards to satisfy align, offset. 331 */ 332 vaddr_t 333 uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset) 334 { 335 vaddr_t result = (addr & ~(align - 1)) + offset; 336 if (result > addr) 337 result -= align; 338 return result; 339 } 340 341 /* 342 * Directional first fit. 343 * 344 * Do a linear search for free space, starting at addr in entry. 345 * direction == 1: search forward 346 * direction == -1: search backward 347 * 348 * Output: low <= addr <= high and entry will contain addr. 349 * 0 will be returned if no space is available. 350 * 351 * gap describes the space that must appear between the preceding entry. 352 */ 353 int 354 uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr, 355 struct vm_map_entry **entry_out, vaddr_t *addr_out, 356 vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset, 357 int direction, vaddr_t low, vaddr_t high, 358 vsize_t before_gap, vsize_t after_gap) 359 { 360 struct vm_map_entry *entry; 361 vaddr_t low_addr, high_addr; 362 363 KASSERT(entry_out != NULL && addr_out != NULL); 364 KASSERT(direction == -1 || direction == 1); 365 KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 && 366 (low & PAGE_MASK) == 0 && 367 (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0); 368 KASSERT(high + sz > high); /* Check for overflow. */ 369 370 /* Hint magic. */ 371 if (hint == 0) 372 hint = (direction == 1 ? low : high); 373 else if (hint > high) { 374 if (direction != -1) 375 return ENOMEM; 376 hint = high; 377 } else if (hint < low) { 378 if (direction != 1) 379 return ENOMEM; 380 hint = low; 381 } 382 383 for (entry = uvm_map_entrybyaddr(&map->addr, 384 hint - (direction == -1 ? 1 : 0)); entry != NULL; 385 entry = (direction == 1 ? 386 RBT_NEXT(uvm_map_addr, entry) : 387 RBT_PREV(uvm_map_addr, entry))) { 388 if (VMMAP_FREE_START(entry) > high || 389 VMMAP_FREE_END(entry) < low) { 390 break; 391 } 392 393 if (uvm_addr_fitspace(&low_addr, &high_addr, 394 MAX(low, VMMAP_FREE_START(entry)), 395 MIN(high, VMMAP_FREE_END(entry)), 396 sz, align, offset, before_gap, after_gap) == 0) { 397 *entry_out = entry; 398 if (hint >= low_addr && hint <= high_addr) { 399 *addr_out = hint; 400 } else { 401 *addr_out = (direction == 1 ? 402 low_addr : high_addr); 403 } 404 return 0; 405 } 406 } 407 408 return ENOMEM; 409 } 410 411 /* 412 * Invoke address selector of uaddr. 413 * uaddr may be NULL, in which case the algorithm will fail with ENOMEM. 414 * 415 * Will invoke uvm_addr_isavail to fill in last_out. 416 */ 417 int 418 uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr, 419 struct vm_map_entry **entry_out, struct vm_map_entry **last_out, 420 vaddr_t *addr_out, 421 vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint) 422 { 423 int error; 424 425 if (uaddr == NULL) 426 return ENOMEM; 427 428 hint &= ~((vaddr_t)PAGE_MASK); 429 if (hint != 0 && 430 !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr)) 431 return ENOMEM; 432 433 error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr, 434 entry_out, addr_out, sz, align, offset, prot, hint); 435 436 if (error == 0) { 437 KASSERT(*entry_out != NULL); 438 *last_out = NULL; 439 if (!uvm_map_isavail(map, uaddr, entry_out, last_out, 440 *addr_out, sz)) { 441 panic("uvm_addr_invoke: address selector %p " 442 "(%s 0x%lx-0x%lx) " 443 "returned unavailable address 0x%lx sz 0x%lx", 444 uaddr, uaddr->uaddr_functions->uaddr_name, 445 uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr, 446 *addr_out, sz); 447 } 448 } 449 450 return error; 451 } 452 453 #if defined(DEBUG) || defined(DDB) 454 void 455 uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full, 456 int (*pr)(const char *, ...)) 457 { 458 if (uaddr == NULL) { 459 (*pr)("- uvm_addr %s: NULL\n", slot); 460 return; 461 } 462 463 (*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr, 464 uaddr->uaddr_functions->uaddr_name, 465 uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr); 466 if (uaddr->uaddr_functions->uaddr_print == NULL) 467 return; 468 469 (*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr); 470 } 471 #endif /* DEBUG || DDB */ 472 473 /* 474 * Destroy a uvm_addr_state structure. 475 * The uaddr must have been previously allocated from uaddr_state_pool. 476 */ 477 void 478 uaddr_destroy(struct uvm_addr_state *uaddr) 479 { 480 pool_put(&uaddr_pool, uaddr); 481 } 482 483 484 #if 0 485 /* 486 * Linear allocator. 487 * This allocator uses a first-fit algorithm. 488 * 489 * If hint is set, search will start at the hint position. 490 * Only searches forward. 491 */ 492 const struct uvm_addr_functions uaddr_lin_functions = { 493 .uaddr_select = &uaddr_lin_select, 494 .uaddr_destroy = &uaddr_destroy, 495 .uaddr_name = "uaddr_lin" 496 }; 497 498 struct uvm_addr_state * 499 uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr) 500 { 501 struct uvm_addr_state *uaddr; 502 503 uaddr = pool_get(&uaddr_pool, PR_WAITOK); 504 uaddr->uaddr_minaddr = minaddr; 505 uaddr->uaddr_maxaddr = maxaddr; 506 uaddr->uaddr_functions = &uaddr_lin_functions; 507 return uaddr; 508 } 509 510 int 511 uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr, 512 struct vm_map_entry **entry_out, vaddr_t *addr_out, 513 vsize_t sz, vaddr_t align, vaddr_t offset, 514 vm_prot_t prot, vaddr_t hint) 515 { 516 vaddr_t guard_sz; 517 518 /* Deal with guardpages: search for space with one extra page. */ 519 guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE); 520 521 if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr - guard_sz < sz) 522 return ENOMEM; 523 return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz, 524 align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz, 525 0, guard_sz); 526 } 527 #endif 528 529 /* 530 * Randomized allocator. 531 * This allocator use uvm_map_hint to acquire a random address and searches 532 * from there. 533 */ 534 535 const struct uvm_addr_functions uaddr_rnd_functions = { 536 .uaddr_select = &uaddr_rnd_select, 537 .uaddr_free_insert = &uaddr_rnd_insert, 538 .uaddr_free_remove = &uaddr_rnd_remove, 539 .uaddr_destroy = &uaddr_rnd_destroy, 540 #if defined(DEBUG) || defined(DDB) 541 #if 0 542 .uaddr_print = &uaddr_rnd_print, 543 #endif 544 #endif /* DEBUG || DDB */ 545 .uaddr_name = "uaddr_rnd" 546 }; 547 548 struct uvm_addr_state * 549 uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr) 550 { 551 struct uaddr_rnd_state *uaddr; 552 553 uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK); 554 uaddr->ur_uaddr.uaddr_minaddr = minaddr; 555 uaddr->ur_uaddr.uaddr_maxaddr = maxaddr; 556 uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions; 557 #if 0 558 TAILQ_INIT(&uaddr->ur_free); 559 #endif 560 return &uaddr->ur_uaddr; 561 } 562 563 int 564 uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr, 565 struct vm_map_entry **entry_out, vaddr_t *addr_out, 566 vsize_t sz, vaddr_t align, vaddr_t offset, 567 vm_prot_t prot, vaddr_t hint) 568 { 569 struct vmspace *vm; 570 vaddr_t minaddr, maxaddr; 571 vaddr_t guard_sz; 572 vaddr_t low_addr, high_addr; 573 struct vm_map_entry *entry, *next; 574 vsize_t before_gap, after_gap; 575 vaddr_t tmp; 576 577 KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0); 578 vm = (struct vmspace *)map; 579 580 /* Deal with guardpages: search for space with one extra page. */ 581 guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE); 582 583 if (uaddr->uaddr_maxaddr - guard_sz < sz) 584 return ENOMEM; 585 minaddr = uvm_addr_align_forward(uaddr->uaddr_minaddr, align, offset); 586 maxaddr = uvm_addr_align_backward(uaddr->uaddr_maxaddr - sz - guard_sz, 587 align, offset); 588 589 /* Quick fail if the allocation won't fit. */ 590 if (minaddr >= maxaddr) 591 return ENOMEM; 592 593 /* Select a hint. */ 594 if (hint == 0) 595 hint = uvm_map_hint(vm, prot, minaddr, maxaddr); 596 /* Clamp hint to uaddr range. */ 597 hint = MIN(MAX(hint, minaddr), maxaddr); 598 599 /* Align hint to align,offset parameters. */ 600 tmp = hint; 601 hint = uvm_addr_align_forward(tmp, align, offset); 602 /* Check for overflow during alignment. */ 603 if (hint < tmp || hint > maxaddr) 604 return ENOMEM; /* Compatibility mode: never look backwards. */ 605 606 before_gap = 0; 607 after_gap = guard_sz; 608 hint -= MIN(hint, before_gap); 609 610 /* 611 * Use the augmented address tree to look up the first entry 612 * at or after hint with sufficient space. 613 * 614 * This code is the original optimized code, but will fail if the 615 * subtree it looks at does have sufficient space, but fails to meet 616 * the align constraint. 617 * 618 * Guard: subtree is not exhausted and max(fspace) >= required. 619 */ 620 entry = uvm_map_entrybyaddr(&map->addr, hint); 621 622 /* Walk up the tree, until there is at least sufficient space. */ 623 while (entry != NULL && 624 entry->fspace_augment < before_gap + after_gap + sz) 625 entry = RBT_PARENT(uvm_map_addr, entry); 626 627 while (entry != NULL) { 628 /* Test if this fits. */ 629 if (VMMAP_FREE_END(entry) > hint && 630 uvm_map_uaddr_e(map, entry) == uaddr && 631 uvm_addr_fitspace(&low_addr, &high_addr, 632 MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)), 633 MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)), 634 sz, align, offset, before_gap, after_gap) == 0) { 635 *entry_out = entry; 636 if (hint >= low_addr && hint <= high_addr) 637 *addr_out = hint; 638 else 639 *addr_out = low_addr; 640 return 0; 641 } 642 643 /* RBT_NEXT, but skip subtrees that cannot possible fit. */ 644 next = RBT_RIGHT(uvm_map_addr, entry); 645 if (next != NULL && 646 next->fspace_augment >= before_gap + after_gap + sz) { 647 entry = next; 648 while ((next = RBT_LEFT(uvm_map_addr, entry)) != 649 NULL) 650 entry = next; 651 } else { 652 do_parent: 653 next = RBT_PARENT(uvm_map_addr, entry); 654 if (next == NULL) 655 entry = NULL; 656 else if (RBT_LEFT(uvm_map_addr, next) == entry) 657 entry = next; 658 else { 659 entry = next; 660 goto do_parent; 661 } 662 } 663 } 664 665 /* Lookup failed. */ 666 return ENOMEM; 667 } 668 669 /* 670 * Destroy a uaddr_rnd_state structure. 671 */ 672 void 673 uaddr_rnd_destroy(struct uvm_addr_state *uaddr) 674 { 675 pool_put(&uaddr_rnd_pool, uaddr); 676 } 677 678 /* 679 * Add entry to tailq. 680 */ 681 void 682 uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p, 683 struct vm_map_entry *entry) 684 { 685 return; 686 } 687 688 /* 689 * Remove entry from tailq. 690 */ 691 void 692 uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p, 693 struct vm_map_entry *entry) 694 { 695 return; 696 } 697 698 #if 0 699 #if defined(DEBUG) || defined(DDB) 700 void 701 uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full, 702 int (*pr)(const char*, ...)) 703 { 704 struct vm_map_entry *entry; 705 struct uaddr_rnd_state *uaddr; 706 vaddr_t addr; 707 size_t count; 708 vsize_t space; 709 710 uaddr = (struct uaddr_rnd_state *)uaddr_p; 711 addr = 0; 712 count = 0; 713 space = 0; 714 TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) { 715 count++; 716 space += entry->fspace; 717 718 if (full) { 719 (*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n", 720 entry, entry->start, entry->end, 721 entry->guard, entry->fspace); 722 (*pr)("\t\tfree: 0x%lx-0x%lx\n", 723 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 724 } 725 if (entry->start < addr) { 726 if (!full) 727 (*pr)("\tentry %p: 0x%lx-0x%lx " 728 "G=0x%lx F=0x%lx\n", 729 entry, entry->start, entry->end, 730 entry->guard, entry->fspace); 731 (*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n", 732 entry->start, addr); 733 } 734 735 addr = VMMAP_FREE_END(entry); 736 } 737 (*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space); 738 } 739 #endif /* DEBUG || DDB */ 740 #endif 741 742 /* 743 * An allocator that selects an address within distance of the hint. 744 * 745 * If no hint is given, the allocator refuses to allocate. 746 */ 747 const struct uvm_addr_functions uaddr_hint_functions = { 748 .uaddr_select = &uaddr_hint_select, 749 .uaddr_destroy = &uaddr_hint_destroy, 750 .uaddr_name = "uaddr_hint" 751 }; 752 753 /* 754 * Create uaddr_hint state. 755 */ 756 struct uvm_addr_state * 757 uaddr_hint_create(vaddr_t minaddr, vaddr_t maxaddr, vsize_t max_dist) 758 { 759 struct uaddr_hint_state *ua_hint; 760 761 KASSERT(uaddr_hint_pool.pr_size == sizeof(*ua_hint)); 762 763 ua_hint = pool_get(&uaddr_hint_pool, PR_WAITOK); 764 ua_hint->uaddr.uaddr_minaddr = minaddr; 765 ua_hint->uaddr.uaddr_maxaddr = maxaddr; 766 ua_hint->uaddr.uaddr_functions = &uaddr_hint_functions; 767 ua_hint->max_dist = max_dist; 768 return &ua_hint->uaddr; 769 } 770 771 /* 772 * Destroy uaddr_hint state. 773 */ 774 void 775 uaddr_hint_destroy(struct uvm_addr_state *uaddr) 776 { 777 pool_put(&uaddr_hint_pool, uaddr); 778 } 779 780 /* 781 * Hint selector. 782 * 783 * Attempts to find an address that is within max_dist of the hint. 784 */ 785 int 786 uaddr_hint_select(struct vm_map *map, struct uvm_addr_state *uaddr_param, 787 struct vm_map_entry **entry_out, vaddr_t *addr_out, 788 vsize_t sz, vaddr_t align, vaddr_t offset, 789 vm_prot_t prot, vaddr_t hint) 790 { 791 struct uaddr_hint_state *uaddr = 792 (struct uaddr_hint_state *)uaddr_param; 793 vsize_t before_gap, after_gap; 794 vaddr_t low, high; 795 int dir; 796 797 if (hint == 0) 798 return ENOMEM; 799 800 /* Calculate upper and lower bound for selected address. */ 801 high = hint + uaddr->max_dist; 802 if (high < hint) /* overflow */ 803 high = map->max_offset; 804 high = MIN(high, uaddr->uaddr.uaddr_maxaddr); 805 if (high < sz) 806 return ENOMEM; /* Protect against underflow. */ 807 high -= sz; 808 809 /* Calculate lower bound for selected address. */ 810 low = hint - uaddr->max_dist; 811 if (low > hint) /* underflow */ 812 low = map->min_offset; 813 low = MAX(low, uaddr->uaddr.uaddr_minaddr); 814 815 /* Search strategy setup. */ 816 before_gap = PAGE_SIZE + 817 (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK); 818 after_gap = PAGE_SIZE + 819 (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK); 820 dir = (arc4random() & 0x01) ? 1 : -1; 821 822 /* 823 * Try to search: 824 * - forward, with gap 825 * - backward, with gap 826 * - forward, without gap 827 * - backward, without gap 828 * (Where forward is in the direction specified by dir and 829 * backward is in the direction specified by -dir). 830 */ 831 if (uvm_addr_linsearch(map, uaddr_param, 832 entry_out, addr_out, hint, sz, align, offset, 833 dir, low, high, before_gap, after_gap) == 0) 834 return 0; 835 if (uvm_addr_linsearch(map, uaddr_param, 836 entry_out, addr_out, hint, sz, align, offset, 837 -dir, low, high, before_gap, after_gap) == 0) 838 return 0; 839 840 if (uvm_addr_linsearch(map, uaddr_param, 841 entry_out, addr_out, hint, sz, align, offset, 842 dir, low, high, 0, 0) == 0) 843 return 0; 844 if (uvm_addr_linsearch(map, uaddr_param, 845 entry_out, addr_out, hint, sz, align, offset, 846 -dir, low, high, 0, 0) == 0) 847 return 0; 848 849 return ENOMEM; 850 } 851 852 /* 853 * Kernel allocation bootstrap logic. 854 */ 855 const struct uvm_addr_functions uaddr_kernel_functions = { 856 .uaddr_select = &uaddr_kbootstrap_select, 857 .uaddr_destroy = &uaddr_kbootstrap_destroy, 858 .uaddr_name = "uaddr_kbootstrap" 859 }; 860 861 /* 862 * Select an address from the map. 863 * 864 * This function ignores the uaddr spec and instead uses the map directly. 865 * Because of that property, the uaddr algorithm can be shared across all 866 * kernel maps. 867 */ 868 int 869 uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr, 870 struct vm_map_entry **entry_out, vaddr_t *addr_out, 871 vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint) 872 { 873 vaddr_t tmp; 874 875 RBT_FOREACH(*entry_out, uvm_map_addr, &map->addr) { 876 if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr && 877 uvm_addr_fitspace(addr_out, &tmp, 878 VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out), 879 sz, align, offset, 0, 0) == 0) 880 return 0; 881 } 882 883 return ENOMEM; 884 } 885 886 /* 887 * Don't destroy the kernel bootstrap allocator. 888 */ 889 void 890 uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr) 891 { 892 KASSERT(uaddr == (struct uvm_addr_state *)&uaddr_kbootstrap); 893 } 894 895 #ifndef SMALL_KERNEL 896 /* 897 * Best fit algorithm. 898 */ 899 900 const struct uvm_addr_functions uaddr_bestfit_functions = { 901 .uaddr_select = &uaddr_bestfit_select, 902 .uaddr_free_insert = &uaddr_bestfit_insert, 903 .uaddr_free_remove = &uaddr_bestfit_remove, 904 .uaddr_destroy = &uaddr_bestfit_destroy, 905 .uaddr_name = "uaddr_bestfit" 906 }; 907 908 struct uvm_addr_state * 909 uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr) 910 { 911 struct uaddr_bestfit_state *uaddr; 912 913 uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK); 914 uaddr->ubf_uaddr.uaddr_minaddr = minaddr; 915 uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr; 916 uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions; 917 RBT_INIT(uaddr_free_rbtree, &uaddr->ubf_free); 918 return &uaddr->ubf_uaddr; 919 } 920 921 void 922 uaddr_bestfit_destroy(struct uvm_addr_state *uaddr) 923 { 924 pool_put(&uaddr_bestfit_pool, uaddr); 925 } 926 927 void 928 uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p, 929 struct vm_map_entry *entry) 930 { 931 struct uaddr_bestfit_state *uaddr; 932 struct vm_map_entry *rb_rv; 933 934 uaddr = (struct uaddr_bestfit_state *)uaddr_p; 935 if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) != 936 NULL) { 937 panic("%s: duplicate insertion: state %p " 938 "interting %p, colliding with %p", __func__, 939 uaddr, entry, rb_rv); 940 } 941 } 942 943 void 944 uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p, 945 struct vm_map_entry *entry) 946 { 947 struct uaddr_bestfit_state *uaddr; 948 949 uaddr = (struct uaddr_bestfit_state *)uaddr_p; 950 if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry) 951 panic("%s: entry was not in tree", __func__); 952 } 953 954 int 955 uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p, 956 struct vm_map_entry **entry_out, vaddr_t *addr_out, 957 vsize_t sz, vaddr_t align, vaddr_t offset, 958 vm_prot_t prot, vaddr_t hint) 959 { 960 vaddr_t min, max; 961 struct uaddr_bestfit_state *uaddr; 962 struct vm_map_entry *entry; 963 vsize_t guardsz; 964 965 uaddr = (struct uaddr_bestfit_state *)uaddr_p; 966 guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0); 967 if (sz + guardsz < sz) 968 return ENOMEM; 969 970 /* 971 * Find smallest item on freelist capable of holding item. 972 * Deal with guardpages: search for space with one extra page. 973 */ 974 entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz); 975 if (entry == NULL) 976 return ENOMEM; 977 978 /* Walk the tree until we find an entry that fits. */ 979 while (uvm_addr_fitspace(&min, &max, 980 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry), 981 sz, align, offset, 0, guardsz) != 0) { 982 entry = RBT_NEXT(uaddr_free_rbtree, entry); 983 if (entry == NULL) 984 return ENOMEM; 985 } 986 987 /* Return the address that generates the least fragmentation. */ 988 *entry_out = entry; 989 *addr_out = (min - VMMAP_FREE_START(entry) <= 990 VMMAP_FREE_END(entry) - guardsz - sz - max ? 991 min : max); 992 return 0; 993 } 994 #endif /* !SMALL_KERNEL */ 995 996 997 #ifndef SMALL_KERNEL 998 /* 999 * A userspace allocator based on pivots. 1000 */ 1001 1002 const struct uvm_addr_functions uaddr_pivot_functions = { 1003 .uaddr_select = &uaddr_pivot_select, 1004 .uaddr_free_insert = &uaddr_pivot_insert, 1005 .uaddr_free_remove = &uaddr_pivot_remove, 1006 .uaddr_destroy = &uaddr_pivot_destroy, 1007 #if defined(DEBUG) || defined(DDB) 1008 .uaddr_print = &uaddr_pivot_print, 1009 #endif /* DEBUG || DDB */ 1010 .uaddr_name = "uaddr_pivot" 1011 }; 1012 1013 /* 1014 * A special random function for pivots. 1015 * 1016 * This function will return: 1017 * - a random number 1018 * - a multiple of PAGE_SIZE 1019 * - at least PAGE_SIZE 1020 * 1021 * The random function has a slightly higher change to return a small number. 1022 */ 1023 vsize_t 1024 uaddr_pivot_random(void) 1025 { 1026 int r; 1027 1028 /* 1029 * The sum of two six-sided dice will have a normal distribution. 1030 * We map the highest probable number to 1, by folding the curve 1031 * (think of a graph on a piece of paper, that you fold). 1032 * 1033 * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1 1034 * have the same and highest probability of happening. 1035 */ 1036 r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) - 1037 (PIVOT_RND - 1); 1038 if (r < 0) 1039 r = -r; 1040 1041 /* 1042 * Make the returned value at least PAGE_SIZE and a multiple of 1043 * PAGE_SIZE. 1044 */ 1045 return (vaddr_t)(1 + r) << PAGE_SHIFT; 1046 } 1047 1048 /* 1049 * Select a new pivot. 1050 * 1051 * A pivot must: 1052 * - be chosen random 1053 * - have a randomly chosen gap before it, where the uaddr_state starts 1054 * - have a randomly chosen gap after it, before the uaddr_state ends 1055 * 1056 * Furthermore, the pivot must provide sufficient space for the allocation. 1057 * The addr will be set to the selected address. 1058 * 1059 * Returns ENOMEM on failure. 1060 */ 1061 int 1062 uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr, 1063 struct uaddr_pivot *pivot, 1064 struct vm_map_entry **entry_out, vaddr_t *addr_out, 1065 vsize_t sz, vaddr_t align, vaddr_t offset, 1066 vsize_t before_gap, vsize_t after_gap) 1067 { 1068 struct vm_map_entry *entry, *found; 1069 vaddr_t minaddr, maxaddr; 1070 vsize_t dist; 1071 vaddr_t found_minaddr, found_maxaddr; 1072 vaddr_t min, max; 1073 vsize_t arc4_arg; 1074 int fit_error; 1075 u_int32_t path; 1076 1077 minaddr = uaddr->up_uaddr.uaddr_minaddr; 1078 maxaddr = uaddr->up_uaddr.uaddr_maxaddr; 1079 KASSERT(minaddr < maxaddr); 1080 #ifdef DIAGNOSTIC 1081 if (minaddr + 2 * PAGE_SIZE > maxaddr) { 1082 panic("uaddr_pivot_newpivot: cannot grant random pivot " 1083 "in area less than 2 pages (size = 0x%lx)", 1084 maxaddr - minaddr); 1085 } 1086 #endif /* DIAGNOSTIC */ 1087 1088 /* 1089 * Gap calculation: 1/32 of the size of the managed area. 1090 * 1091 * At most: sufficient to not get truncated at arc4random. 1092 * At least: 2 PAGE_SIZE 1093 * 1094 * minaddr and maxaddr will be changed according to arc4random. 1095 */ 1096 dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE); 1097 if (dist >> PAGE_SHIFT > 0xffffffff) { 1098 minaddr += (vsize_t)arc4random() << PAGE_SHIFT; 1099 maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT; 1100 } else { 1101 minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) << 1102 PAGE_SHIFT; 1103 maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) << 1104 PAGE_SHIFT; 1105 } 1106 1107 /* 1108 * A very fast way to find an entry that will be large enough 1109 * to hold the allocation, but still is found more or less 1110 * randomly: the tree path selector has a 50% chance to go for 1111 * a bigger or smaller entry. 1112 * 1113 * Note that the memory may actually be available, 1114 * but the fragmentation may be so bad and the gaps chosen 1115 * so unfortunately, that the allocation will not succeed. 1116 * Or the alignment can only be satisfied by an entry that 1117 * is not visited in the randomly selected path. 1118 * 1119 * This code finds an entry with sufficient space in O(log n) time. 1120 */ 1121 path = arc4random(); 1122 found = NULL; 1123 entry = RBT_ROOT(uaddr_free_rbtree, &uaddr->up_free); 1124 while (entry != NULL) { 1125 fit_error = uvm_addr_fitspace(&min, &max, 1126 MAX(VMMAP_FREE_START(entry), minaddr), 1127 MIN(VMMAP_FREE_END(entry), maxaddr), 1128 sz, align, offset, before_gap, after_gap); 1129 1130 /* It fits, save this entry. */ 1131 if (fit_error == 0) { 1132 found = entry; 1133 found_minaddr = min; 1134 found_maxaddr = max; 1135 } 1136 1137 /* Next. */ 1138 if (fit_error != 0) 1139 entry = RBT_RIGHT(uaddr_free_rbtree, entry); 1140 else if ((path & 0x1) == 0) { 1141 path >>= 1; 1142 entry = RBT_RIGHT(uaddr_free_rbtree, entry); 1143 } else { 1144 path >>= 1; 1145 entry = RBT_LEFT(uaddr_free_rbtree, entry); 1146 } 1147 } 1148 if (found == NULL) 1149 return ENOMEM; /* Not found a large enough region. */ 1150 1151 /* 1152 * Calculate a random address within found. 1153 * 1154 * found_minaddr and found_maxaddr are already aligned, so be sure 1155 * to select a multiple of align as the offset in the entry. 1156 * Preferably, arc4random_uniform is used to provide no bias within 1157 * the entry. 1158 * However if the size of the entry exceeds arc4random_uniforms 1159 * argument limit, we simply use arc4random (thus limiting ourselves 1160 * to 4G * PAGE_SIZE bytes offset). 1161 */ 1162 if (found_maxaddr == found_minaddr) 1163 *addr_out = found_minaddr; 1164 else { 1165 KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0); 1166 arc4_arg = found_maxaddr - found_minaddr; 1167 if (arc4_arg > 0xffffffff) { 1168 *addr_out = found_minaddr + 1169 (arc4random() & ~(align - 1)); 1170 } else { 1171 *addr_out = found_minaddr + 1172 (arc4random_uniform(arc4_arg) & ~(align - 1)); 1173 } 1174 } 1175 /* Address was found in this entry. */ 1176 *entry_out = found; 1177 1178 /* 1179 * Set up new pivot and return selected address. 1180 * 1181 * Depending on the direction of the pivot, the pivot must be placed 1182 * at the bottom or the top of the allocation: 1183 * - if the pivot moves upwards, place the pivot at the top of the 1184 * allocation, 1185 * - if the pivot moves downwards, place the pivot at the bottom 1186 * of the allocation. 1187 */ 1188 pivot->entry = found; 1189 pivot->dir = (arc4random() & 0x1 ? 1 : -1); 1190 if (pivot->dir > 0) 1191 pivot->addr = *addr_out + sz; 1192 else 1193 pivot->addr = *addr_out; 1194 pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */ 1195 return 0; 1196 } 1197 1198 /* 1199 * Pivot selector. 1200 * 1201 * Each time the selector is invoked, it will select a random pivot, which 1202 * it will use to select memory with. The memory will be placed at the pivot, 1203 * with a randomly sized gap between the allocation and the pivot. 1204 * The pivot will then move so it will never revisit this address. 1205 * 1206 * Each allocation, the pivot expiry timer ticks. Once the pivot becomes 1207 * expired, it will be replaced with a newly created pivot. Pivots also 1208 * automatically expire if they fail to provide memory for an allocation. 1209 * 1210 * Expired pivots are replaced using the uaddr_pivot_newpivot() function, 1211 * which will ensure the pivot points at memory in such a way that the 1212 * allocation will succeed. 1213 * As an added bonus, the uaddr_pivot_newpivot() function will perform the 1214 * allocation immediately and move the pivot as appropriate. 1215 * 1216 * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the 1217 * allocation to succeed, it will not create a new pivot and the allocation 1218 * will fail. 1219 * 1220 * A pivot running into used memory will automatically expire (because it will 1221 * fail to allocate). 1222 * 1223 * Characteristics of the allocator: 1224 * - best case, an allocation is O(log N) 1225 * (it would be O(1), if it werent for the need to check if the memory is 1226 * free; although that can be avoided...) 1227 * - worst case, an allocation is O(log N) 1228 * (the uaddr_pivot_newpivot() function has that complexity) 1229 * - failed allocations always take O(log N) 1230 * (the uaddr_pivot_newpivot() function will walk that deep into the tree). 1231 */ 1232 int 1233 uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p, 1234 struct vm_map_entry **entry_out, vaddr_t *addr_out, 1235 vsize_t sz, vaddr_t align, vaddr_t offset, 1236 vm_prot_t prot, vaddr_t hint) 1237 { 1238 struct uaddr_pivot_state *uaddr; 1239 struct vm_map_entry *entry; 1240 struct uaddr_pivot *pivot; 1241 vaddr_t min, max; 1242 vsize_t before_gap, after_gap; 1243 int err; 1244 1245 /* Hint must be handled by dedicated hint allocator. */ 1246 if (hint != 0) 1247 return EINVAL; 1248 1249 /* 1250 * Select a random pivot and a random gap sizes around the allocation. 1251 */ 1252 uaddr = (struct uaddr_pivot_state *)uaddr_p; 1253 pivot = &uaddr->up_pivots[ 1254 arc4random_uniform(nitems(uaddr->up_pivots))]; 1255 before_gap = uaddr_pivot_random(); 1256 after_gap = uaddr_pivot_random(); 1257 if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0) 1258 goto expired; /* Pivot is invalid (null or expired). */ 1259 1260 /* Attempt to use the pivot to map the entry. */ 1261 entry = pivot->entry; 1262 if (pivot->dir > 0) { 1263 if (uvm_addr_fitspace(&min, &max, 1264 MAX(VMMAP_FREE_START(entry), pivot->addr), 1265 VMMAP_FREE_END(entry), sz, align, offset, 1266 before_gap, after_gap) == 0) { 1267 *addr_out = min; 1268 *entry_out = entry; 1269 pivot->addr = min + sz; 1270 pivot->expire--; 1271 return 0; 1272 } 1273 } else { 1274 if (uvm_addr_fitspace(&min, &max, 1275 VMMAP_FREE_START(entry), 1276 MIN(VMMAP_FREE_END(entry), pivot->addr), 1277 sz, align, offset, before_gap, after_gap) == 0) { 1278 *addr_out = max; 1279 *entry_out = entry; 1280 pivot->addr = max; 1281 pivot->expire--; 1282 return 0; 1283 } 1284 } 1285 1286 expired: 1287 /* 1288 * Pivot expired or allocation failed. 1289 * Use pivot selector to do the allocation and find a new pivot. 1290 */ 1291 err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out, 1292 sz, align, offset, before_gap, after_gap); 1293 return err; 1294 } 1295 1296 /* 1297 * Free the pivot. 1298 */ 1299 void 1300 uaddr_pivot_destroy(struct uvm_addr_state *uaddr) 1301 { 1302 pool_put(&uaddr_pivot_pool, uaddr); 1303 } 1304 1305 /* 1306 * Insert an entry with free space in the space tree. 1307 */ 1308 void 1309 uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p, 1310 struct vm_map_entry *entry) 1311 { 1312 struct uaddr_pivot_state *uaddr; 1313 struct vm_map_entry *rb_rv; 1314 struct uaddr_pivot *p; 1315 vaddr_t check_addr; 1316 vaddr_t start, end; 1317 1318 uaddr = (struct uaddr_pivot_state *)uaddr_p; 1319 if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) != 1320 NULL) { 1321 panic("%s: duplicate insertion: state %p " 1322 "inserting entry %p which collides with %p", __func__, 1323 uaddr, entry, rb_rv); 1324 } 1325 1326 start = VMMAP_FREE_START(entry); 1327 end = VMMAP_FREE_END(entry); 1328 1329 /* 1330 * Update all pivots that are contained in this entry. 1331 */ 1332 for (p = &uaddr->up_pivots[0]; 1333 p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) { 1334 check_addr = p->addr; 1335 if (check_addr == 0) 1336 continue; 1337 if (p->dir < 0) 1338 check_addr--; 1339 1340 if (start <= check_addr && 1341 check_addr < end) { 1342 KASSERT(p->entry == NULL); 1343 p->entry = entry; 1344 } 1345 } 1346 } 1347 1348 /* 1349 * Remove an entry with free space from the space tree. 1350 */ 1351 void 1352 uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p, 1353 struct vm_map_entry *entry) 1354 { 1355 struct uaddr_pivot_state *uaddr; 1356 struct uaddr_pivot *p; 1357 1358 uaddr = (struct uaddr_pivot_state *)uaddr_p; 1359 if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry) 1360 panic("%s: entry was not in tree", __func__); 1361 1362 /* 1363 * Inform any pivot with this entry that the entry is gone. 1364 * Note that this does not automatically invalidate the pivot. 1365 */ 1366 for (p = &uaddr->up_pivots[0]; 1367 p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) { 1368 if (p->entry == entry) 1369 p->entry = NULL; 1370 } 1371 } 1372 1373 /* 1374 * Create a new pivot selector. 1375 * 1376 * Initially, all pivots are in the expired state. 1377 * Two reasons for this: 1378 * - it means this allocator will not take a huge amount of time 1379 * - pivots select better on demand, because the pivot selection will be 1380 * affected by preceding allocations: 1381 * the next pivots will likely end up in different segments of free memory, 1382 * that was segmented by an earlier allocation; better spread. 1383 */ 1384 struct uvm_addr_state * 1385 uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr) 1386 { 1387 struct uaddr_pivot_state *uaddr; 1388 1389 uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK); 1390 uaddr->up_uaddr.uaddr_minaddr = minaddr; 1391 uaddr->up_uaddr.uaddr_maxaddr = maxaddr; 1392 uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions; 1393 RBT_INIT(uaddr_free_rbtree, &uaddr->up_free); 1394 memset(uaddr->up_pivots, 0, sizeof(uaddr->up_pivots)); 1395 1396 return &uaddr->up_uaddr; 1397 } 1398 1399 #if defined(DEBUG) || defined(DDB) 1400 /* 1401 * Print the uaddr_pivot_state. 1402 * 1403 * If full, a listing of all entries in the state will be provided. 1404 */ 1405 void 1406 uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full, 1407 int (*pr)(const char *, ...)) 1408 { 1409 struct uaddr_pivot_state *uaddr; 1410 struct uaddr_pivot *pivot; 1411 struct vm_map_entry *entry; 1412 int i; 1413 vaddr_t check_addr; 1414 1415 uaddr = (struct uaddr_pivot_state *)uaddr_p; 1416 1417 for (i = 0; i < NUM_PIVOTS; i++) { 1418 pivot = &uaddr->up_pivots[i]; 1419 1420 (*pr)("\tpivot 0x%lx, epires in %d, direction %d\n", 1421 pivot->addr, pivot->expire, pivot->dir); 1422 } 1423 if (!full) 1424 return; 1425 1426 if (RBT_EMPTY(uaddr_free_rbtree, &uaddr->up_free)) 1427 (*pr)("\tempty\n"); 1428 /* Print list of free space. */ 1429 RBT_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) { 1430 (*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n", 1431 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry), 1432 VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry)); 1433 1434 for (i = 0; i < NUM_PIVOTS; i++) { 1435 pivot = &uaddr->up_pivots[i]; 1436 check_addr = pivot->addr; 1437 if (check_addr == 0) 1438 continue; 1439 if (pivot->dir < 0) 1440 check_addr--; 1441 1442 if (VMMAP_FREE_START(entry) <= check_addr && 1443 check_addr < VMMAP_FREE_END(entry)) { 1444 (*pr)("\t\tcontains pivot %d (0x%lx)\n", 1445 i, pivot->addr); 1446 } 1447 } 1448 } 1449 } 1450 #endif /* DEBUG || DDB */ 1451 #endif /* !SMALL_KERNEL */ 1452 1453 #ifndef SMALL_KERNEL 1454 /* 1455 * Strategy for uaddr_stack_brk_select. 1456 */ 1457 struct uaddr_bs_strat { 1458 vaddr_t start; /* Start of area. */ 1459 vaddr_t end; /* End of area. */ 1460 int dir; /* Search direction. */ 1461 }; 1462 1463 /* 1464 * Stack/break allocator. 1465 * 1466 * Stack area is grown into in the opposite direction of the stack growth, 1467 * brk area is grown downward (because sbrk() grows upward). 1468 * 1469 * Both areas are grown into proportially: a weighted chance is used to 1470 * select which one (stack or brk area) to try. If the allocation fails, 1471 * the other one is tested. 1472 */ 1473 const struct uvm_addr_functions uaddr_stack_brk_functions = { 1474 .uaddr_select = &uaddr_stack_brk_select, 1475 .uaddr_destroy = &uaddr_destroy, 1476 .uaddr_name = "uaddr_stckbrk" 1477 }; 1478 1479 /* 1480 * Stack/brk address selector. 1481 */ 1482 int 1483 uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr, 1484 struct vm_map_entry **entry_out, vaddr_t *addr_out, 1485 vsize_t sz, vaddr_t align, vaddr_t offset, 1486 vm_prot_t prot, vaddr_t hint) 1487 { 1488 vsize_t before_gap, after_gap; 1489 int stack_idx, brk_idx; 1490 struct uaddr_bs_strat strat[2], *s; 1491 vsize_t sb_size; 1492 1493 /* 1494 * Choose gap size and if the stack is searched before or after the 1495 * brk area. 1496 */ 1497 before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT; 1498 after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT; 1499 1500 sb_size = (map->s_end - map->s_start) + (map->b_end - map->b_start); 1501 sb_size >>= PAGE_SHIFT; 1502 if (arc4random_uniform(MAX(sb_size, 0xffffffff)) > 1503 map->b_end - map->b_start) { 1504 brk_idx = 1; 1505 stack_idx = 0; 1506 } else { 1507 brk_idx = 0; 1508 stack_idx = 1; 1509 } 1510 1511 /* Set up stack search strategy. */ 1512 s = &strat[stack_idx]; 1513 s->start = MAX(map->s_start, uaddr->uaddr_minaddr); 1514 s->end = MIN(map->s_end, uaddr->uaddr_maxaddr); 1515 #ifdef MACHINE_STACK_GROWS_UP 1516 s->dir = -1; 1517 #else 1518 s->dir = 1; 1519 #endif 1520 1521 /* Set up brk search strategy. */ 1522 s = &strat[brk_idx]; 1523 s->start = MAX(map->b_start, uaddr->uaddr_minaddr); 1524 s->end = MIN(map->b_end, uaddr->uaddr_maxaddr); 1525 s->dir = -1; /* Opposite of brk() growth. */ 1526 1527 /* Linear search for space. */ 1528 for (s = &strat[0]; s < &strat[nitems(strat)]; s++) { 1529 if (s->end - s->start < sz) 1530 continue; 1531 if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 1532 0, sz, align, offset, s->dir, s->start, s->end - sz, 1533 before_gap, after_gap) == 0) 1534 return 0; 1535 } 1536 1537 return ENOMEM; 1538 } 1539 1540 struct uvm_addr_state * 1541 uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr) 1542 { 1543 struct uvm_addr_state* uaddr; 1544 1545 uaddr = pool_get(&uaddr_pool, PR_WAITOK); 1546 uaddr->uaddr_minaddr = minaddr; 1547 uaddr->uaddr_maxaddr = maxaddr; 1548 uaddr->uaddr_functions = &uaddr_stack_brk_functions; 1549 return uaddr; 1550 } 1551 #endif /* !SMALL_KERNEL */ 1552 1553 1554 #ifndef SMALL_KERNEL 1555 RBT_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree, 1556 uvm_mapent_fspace_cmp); 1557 #endif /* !SMALL_KERNEL */ 1558