1 /* $OpenBSD: uvm_addr.c,v 1.4 2012/06/06 04:54:36 matthew Exp $ */ 2 3 /* 4 * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* #define DEBUG */ 20 21 #include <sys/param.h> 22 #include <uvm/uvm.h> 23 #include <uvm/uvm_addr.h> 24 #include <sys/pool.h> 25 #include <dev/rndvar.h> 26 27 /* Max gap between hint allocations. */ 28 #define UADDR_HINT_MAXGAP (4 * PAGE_SIZE) 29 /* Number of pivots in pivot allocator. */ 30 #define NUM_PIVOTS 16 31 /* 32 * Max number (inclusive) of pages the pivot allocator 33 * will place between allocations. 34 * 35 * The uaddr_pivot_random() function attempts to bias towards 36 * small space between allocations, so putting a large number here is fine. 37 */ 38 #define PIVOT_RND 8 39 /* 40 * Number of allocations that a pivot can supply before expiring. 41 * When a pivot expires, a new pivot has to be found. 42 * 43 * Must be at least 1. 44 */ 45 #define PIVOT_EXPIRE 1024 46 47 48 /* Pool with uvm_addr_state structures. */ 49 struct pool uaddr_pool; 50 struct pool uaddr_hint_pool; 51 struct pool uaddr_bestfit_pool; 52 struct pool uaddr_pivot_pool; 53 struct pool uaddr_rnd_pool; 54 55 /* uvm_addr state for hint based selector. */ 56 struct uaddr_hint_state { 57 struct uvm_addr_state uaddr; 58 vsize_t max_dist; 59 }; 60 61 /* uvm_addr state for bestfit selector. */ 62 struct uaddr_bestfit_state { 63 struct uvm_addr_state ubf_uaddr; 64 struct uaddr_free_rbtree ubf_free; 65 }; 66 67 /* uvm_addr state for rnd selector. */ 68 struct uaddr_rnd_state { 69 struct uvm_addr_state ur_uaddr; 70 TAILQ_HEAD(, vm_map_entry) ur_free; 71 }; 72 73 /* 74 * Definition of a pivot in pivot selector. 75 */ 76 struct uaddr_pivot { 77 vaddr_t addr; /* End of prev. allocation. */ 78 int expire;/* Best before date. */ 79 int dir; /* Direction. */ 80 struct vm_map_entry *entry; /* Will contain next alloc. */ 81 }; 82 /* uvm_addr state for pivot selector. */ 83 struct uaddr_pivot_state { 84 struct uvm_addr_state up_uaddr; 85 86 /* Free space tree, for fast pivot selection. */ 87 struct uaddr_free_rbtree up_free; 88 89 /* List of pivots. The pointers point to after the last allocation. */ 90 struct uaddr_pivot up_pivots[NUM_PIVOTS]; 91 }; 92 93 /* 94 * Free space comparison. 95 * Compares smaller free-space before larger free-space. 96 */ 97 static __inline int 98 uvm_mapent_fspace_cmp(struct vm_map_entry *e1, struct vm_map_entry *e2) 99 { 100 if (e1->fspace != e2->fspace) 101 return (e1->fspace < e2->fspace ? -1 : 1); 102 return (e1->start < e2->start ? -1 : e1->start > e2->start); 103 } 104 105 /* Forward declaration (see below). */ 106 extern const struct uvm_addr_functions uaddr_kernel_functions; 107 struct uvm_addr_state uaddr_kbootstrap; 108 109 110 /* 111 * Support functions. 112 */ 113 114 #ifndef SMALL_KERNEL 115 struct vm_map_entry *uvm_addr_entrybyspace(struct uaddr_free_rbtree*, 116 vsize_t); 117 #endif /* !SMALL_KERNEL */ 118 void uaddr_kinsert(struct vm_map*, struct uvm_addr_state*, 119 struct vm_map_entry*); 120 void uaddr_kremove(struct vm_map*, struct uvm_addr_state*, 121 struct vm_map_entry*); 122 void uaddr_kbootstrapdestroy(struct uvm_addr_state*); 123 124 void uaddr_destroy(struct uvm_addr_state*); 125 void uaddr_hint_destroy(struct uvm_addr_state*); 126 void uaddr_kbootstrap_destroy(struct uvm_addr_state*); 127 void uaddr_rnd_destroy(struct uvm_addr_state*); 128 void uaddr_bestfit_destroy(struct uvm_addr_state*); 129 void uaddr_pivot_destroy(struct uvm_addr_state*); 130 131 int uaddr_lin_select(struct vm_map*, 132 struct uvm_addr_state*, struct vm_map_entry**, 133 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 134 vaddr_t); 135 int uaddr_kbootstrap_select(struct vm_map*, 136 struct uvm_addr_state*, struct vm_map_entry**, 137 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 138 vaddr_t); 139 int uaddr_rnd_select(struct vm_map*, 140 struct uvm_addr_state*, struct vm_map_entry**, 141 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 142 vaddr_t); 143 int uaddr_hint_select(struct vm_map*, 144 struct uvm_addr_state*, struct vm_map_entry**, 145 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 146 vaddr_t); 147 int uaddr_bestfit_select(struct vm_map*, 148 struct uvm_addr_state*, struct vm_map_entry**, 149 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 150 vaddr_t); 151 #ifndef SMALL_KERNEL 152 int uaddr_pivot_select(struct vm_map*, 153 struct uvm_addr_state*, struct vm_map_entry**, 154 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 155 vaddr_t); 156 int uaddr_stack_brk_select(struct vm_map*, 157 struct uvm_addr_state*, struct vm_map_entry**, 158 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 159 vaddr_t); 160 #endif /* !SMALL_KERNEL */ 161 162 void uaddr_rnd_insert(struct vm_map*, 163 struct uvm_addr_state*, struct vm_map_entry*); 164 void uaddr_rnd_remove(struct vm_map*, 165 struct uvm_addr_state*, struct vm_map_entry*); 166 void uaddr_bestfit_insert(struct vm_map*, 167 struct uvm_addr_state*, struct vm_map_entry*); 168 void uaddr_bestfit_remove(struct vm_map*, 169 struct uvm_addr_state*, struct vm_map_entry*); 170 void uaddr_pivot_insert(struct vm_map*, 171 struct uvm_addr_state*, struct vm_map_entry*); 172 void uaddr_pivot_remove(struct vm_map*, 173 struct uvm_addr_state*, struct vm_map_entry*); 174 175 #ifndef SMALL_KERNEL 176 vsize_t uaddr_pivot_random(void); 177 int uaddr_pivot_newpivot(struct vm_map*, 178 struct uaddr_pivot_state*, struct uaddr_pivot*, 179 struct vm_map_entry**, vaddr_t*, 180 vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t); 181 #endif /* !SMALL_KERNEL */ 182 183 #if defined(DEBUG) || defined(DDB) 184 void uaddr_pivot_print(struct uvm_addr_state*, boolean_t, 185 int (*)(const char*, ...)); 186 void uaddr_rnd_print(struct uvm_addr_state*, boolean_t, 187 int (*)(const char*, ...)); 188 #endif /* DEBUG || DDB */ 189 190 191 #ifndef SMALL_KERNEL 192 /* 193 * Find smallest entry in tree that will fit sz bytes. 194 */ 195 struct vm_map_entry* 196 uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz) 197 { 198 struct vm_map_entry *tmp, *res; 199 200 tmp = RB_ROOT(free); 201 res = NULL; 202 while (tmp) { 203 if (tmp->fspace >= sz) { 204 res = tmp; 205 tmp = RB_LEFT(tmp, dfree.rbtree); 206 } else if (tmp->fspace < sz) 207 tmp = RB_RIGHT(tmp, dfree.rbtree); 208 } 209 return res; 210 } 211 #endif /* !SMALL_KERNEL */ 212 213 static __inline vaddr_t 214 uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset) 215 { 216 vaddr_t adjusted; 217 218 KASSERT(offset < align || (align == 0 && offset == 0)); 219 KASSERT((align & (align - 1)) == 0); 220 KASSERT((offset & PAGE_MASK) == 0); 221 222 align = MAX(align, PAGE_SIZE); 223 adjusted = addr & ~(align - 1); 224 adjusted += offset; 225 return (adjusted < addr ? adjusted + align : adjusted); 226 } 227 228 static __inline vaddr_t 229 uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset) 230 { 231 vaddr_t adjusted; 232 233 KASSERT(offset < align || (align == 0 && offset == 0)); 234 KASSERT((align & (align - 1)) == 0); 235 KASSERT((offset & PAGE_MASK) == 0); 236 237 align = MAX(align, PAGE_SIZE); 238 adjusted = addr & ~(align - 1); 239 adjusted += offset; 240 return (adjusted > addr ? adjusted - align : adjusted); 241 } 242 243 /* 244 * Try to fit the requested space into the entry. 245 */ 246 int 247 uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result, 248 vaddr_t low_addr, vaddr_t high_addr, vsize_t sz, 249 vaddr_t align, vaddr_t offset, 250 vsize_t before_gap, vsize_t after_gap) 251 { 252 vaddr_t tmp; 253 vsize_t fspace; 254 255 if (low_addr > high_addr) 256 return ENOMEM; 257 fspace = high_addr - low_addr; 258 if (fspace < sz + before_gap + after_gap) 259 return ENOMEM; 260 261 /* 262 * Calculate lowest address. 263 */ 264 low_addr += before_gap; 265 low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset); 266 if (low_addr < tmp) /* Overflow during alignment. */ 267 return ENOMEM; 268 if (high_addr - after_gap - sz < low_addr) 269 return ENOMEM; 270 271 /* 272 * Calculate highest address. 273 */ 274 high_addr -= after_gap + sz; 275 high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset); 276 if (high_addr > tmp) /* Overflow during alignment. */ 277 return ENOMEM; 278 if (low_addr > high_addr) 279 return ENOMEM; 280 281 *min_result = low_addr; 282 *max_result = high_addr; 283 return 0; 284 } 285 286 287 /* 288 * Initialize uvm_addr. 289 */ 290 void 291 uvm_addr_init() 292 { 293 pool_init(&uaddr_pool, sizeof(struct uvm_addr_state), 294 0, 0, 0, "uaddr", &pool_allocator_nointr); 295 pool_init(&uaddr_hint_pool, sizeof(struct uaddr_hint_state), 296 0, 0, 0, "uaddrhint", &pool_allocator_nointr); 297 pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state), 298 0, 0, 0, "uaddrbestfit", &pool_allocator_nointr); 299 pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state), 300 0, 0, 0, "uaddrpivot", &pool_allocator_nointr); 301 pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state), 302 0, 0, 0, "uaddrrnd", &pool_allocator_nointr); 303 304 uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE; 305 uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE; 306 uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions; 307 } 308 309 /* 310 * Invoke destructor function of uaddr. 311 */ 312 void 313 uvm_addr_destroy(struct uvm_addr_state *uaddr) 314 { 315 if (uaddr) 316 (*uaddr->uaddr_functions->uaddr_destroy)(uaddr); 317 } 318 319 /* 320 * Move address forward to satisfy align, offset. 321 */ 322 vaddr_t 323 uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset) 324 { 325 vaddr_t result = (addr & ~(align - 1)) + offset; 326 if (result < addr) 327 result += align; 328 return result; 329 } 330 331 /* 332 * Move address backwards to satisfy align, offset. 333 */ 334 vaddr_t 335 uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset) 336 { 337 vaddr_t result = (addr & ~(align - 1)) + offset; 338 if (result > addr) 339 result -= align; 340 return result; 341 } 342 343 /* 344 * Directional first fit. 345 * 346 * Do a lineair search for free space, starting at addr in entry. 347 * direction == 1: search forward 348 * direction == -1: search backward 349 * 350 * Output: low <= addr <= high and entry will contain addr. 351 * 0 will be returned if no space is available. 352 * 353 * gap describes the space that must appear between the preceding entry. 354 */ 355 int 356 uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr, 357 struct vm_map_entry**entry_out, vaddr_t *addr_out, 358 vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset, 359 int direction, vaddr_t low, vaddr_t high, 360 vsize_t before_gap, vsize_t after_gap) 361 { 362 struct vm_map_entry *entry; 363 vaddr_t low_addr, high_addr; 364 365 KASSERT(entry_out != NULL && addr_out != NULL); 366 KASSERT(direction == -1 || direction == 1); 367 KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 && 368 (low & PAGE_MASK) == 0 && 369 (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0); 370 KASSERT(high + sz > high); /* Check for overflow. */ 371 372 /* 373 * Hint magic. 374 */ 375 if (hint == 0) 376 hint = (direction == 1 ? low : high); 377 else if (hint > high) { 378 if (direction != -1) 379 return ENOMEM; 380 hint = high; 381 } else if (hint < low) { 382 if (direction != 1) 383 return ENOMEM; 384 hint = low; 385 } 386 387 for (entry = uvm_map_entrybyaddr(&map->addr, 388 hint - (direction == -1 ? 1 : 0)); entry != NULL; 389 entry = (direction == 1 ? 390 RB_NEXT(uvm_map_addr, &map->addr, entry) : 391 RB_PREV(uvm_map_addr, &map->addr, entry))) { 392 if (VMMAP_FREE_START(entry) > high || 393 VMMAP_FREE_END(entry) < low) { 394 break; 395 } 396 397 if (uvm_addr_fitspace(&low_addr, &high_addr, 398 MAX(low, VMMAP_FREE_START(entry)), 399 MIN(high, VMMAP_FREE_END(entry)), 400 sz, align, offset, before_gap, after_gap) == 0) { 401 *entry_out = entry; 402 if (hint >= low_addr && hint <= high_addr) { 403 *addr_out = hint; 404 } else { 405 *addr_out = (direction == 1 ? 406 low_addr : high_addr); 407 } 408 return 0; 409 } 410 } 411 412 return ENOMEM; 413 } 414 415 /* 416 * Invoke address selector of uaddr. 417 * uaddr may be NULL, in which case the algorithm will fail with ENOMEM. 418 * 419 * Will invoke uvm_addr_isavail to fill in last_out. 420 */ 421 int 422 uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr, 423 struct vm_map_entry**entry_out, struct vm_map_entry**last_out, 424 vaddr_t *addr_out, 425 vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint) 426 { 427 int error; 428 429 if (uaddr == NULL) 430 return ENOMEM; 431 432 hint &= ~((vaddr_t)PAGE_MASK); 433 if (hint != 0 && 434 !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr)) 435 return ENOMEM; 436 437 error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr, 438 entry_out, addr_out, sz, align, offset, prot, hint); 439 440 if (error == 0) { 441 KASSERT(*entry_out != NULL); 442 *last_out = NULL; 443 if (!uvm_map_isavail(map, uaddr, entry_out, last_out, 444 *addr_out, sz)) { 445 panic("uvm_addr_invoke: address selector %p " 446 "(%s 0x%lx-0x%lx) " 447 "returned unavailable address 0x%lx", 448 uaddr, uaddr->uaddr_functions->uaddr_name, 449 uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr, 450 *addr_out); 451 } 452 } 453 454 return error; 455 } 456 457 #if defined(DEBUG) || defined(DDB) 458 void 459 uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full, 460 int (*pr)(const char*, ...)) 461 { 462 if (uaddr == NULL) { 463 (*pr)("- uvm_addr %s: NULL\n", slot); 464 return; 465 } 466 467 (*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr, 468 uaddr->uaddr_functions->uaddr_name, 469 uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr); 470 if (uaddr->uaddr_functions->uaddr_print == NULL) 471 return; 472 473 (*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr); 474 } 475 #endif /* DEBUG || DDB */ 476 477 /* 478 * Destroy a uvm_addr_state structure. 479 * The uaddr must have been previously allocated from uaddr_state_pool. 480 */ 481 void 482 uaddr_destroy(struct uvm_addr_state *uaddr) 483 { 484 pool_put(&uaddr_pool, uaddr); 485 } 486 487 488 /* 489 * Lineair allocator. 490 * This allocator uses a first-fit algorithm. 491 * 492 * If hint is set, search will start at the hint position. 493 * Only searches forward. 494 */ 495 496 const struct uvm_addr_functions uaddr_lin_functions = { 497 .uaddr_select = &uaddr_lin_select, 498 .uaddr_destroy = &uaddr_destroy, 499 .uaddr_name = "uaddr_lin" 500 }; 501 502 struct uvm_addr_state* 503 uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr) 504 { 505 struct uvm_addr_state* uaddr; 506 507 uaddr = pool_get(&uaddr_pool, PR_WAITOK); 508 uaddr->uaddr_minaddr = minaddr; 509 uaddr->uaddr_maxaddr = maxaddr; 510 uaddr->uaddr_functions = &uaddr_lin_functions; 511 return uaddr; 512 } 513 514 int 515 uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr, 516 struct vm_map_entry**entry_out, vaddr_t *addr_out, 517 vsize_t sz, vaddr_t align, vaddr_t offset, 518 vm_prot_t prot, vaddr_t hint) 519 { 520 vaddr_t guard_sz; 521 522 /* 523 * Deal with guardpages: search for space with one extra page. 524 */ 525 guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE); 526 527 if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr < sz + guard_sz) 528 return ENOMEM; 529 return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz, 530 align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz, 531 0, guard_sz); 532 } 533 534 535 /* 536 * Randomized allocator. 537 * This allocator use uvm_map_hint to acquire a random address and searches 538 * from there. 539 */ 540 541 const struct uvm_addr_functions uaddr_rnd_functions = { 542 .uaddr_select = &uaddr_rnd_select, 543 .uaddr_free_insert = &uaddr_rnd_insert, 544 .uaddr_free_remove = &uaddr_rnd_remove, 545 .uaddr_destroy = &uaddr_rnd_destroy, 546 #if defined(DEBUG) || defined(DDB) 547 .uaddr_print = &uaddr_rnd_print, 548 #endif /* DEBUG || DDB */ 549 .uaddr_name = "uaddr_rnd" 550 }; 551 552 struct uvm_addr_state* 553 uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr) 554 { 555 struct uaddr_rnd_state* uaddr; 556 557 uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK); 558 uaddr->ur_uaddr.uaddr_minaddr = minaddr; 559 uaddr->ur_uaddr.uaddr_maxaddr = maxaddr; 560 uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions; 561 TAILQ_INIT(&uaddr->ur_free); 562 return &uaddr->ur_uaddr; 563 } 564 565 int 566 uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr, 567 struct vm_map_entry**entry_out, vaddr_t *addr_out, 568 vsize_t sz, vaddr_t align, vaddr_t offset, 569 vm_prot_t prot, vaddr_t hint) 570 { 571 struct vmspace *vm; 572 vaddr_t guard_sz; 573 vaddr_t low_addr, high_addr; 574 struct vm_map_entry *entry, *next; 575 vsize_t before_gap, after_gap; 576 vaddr_t tmp; 577 578 KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0); 579 vm = (struct vmspace*)map; 580 581 /* Deal with guardpages: search for space with one extra page. */ 582 guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE); 583 584 /* Quick fail if the allocation won't fit. */ 585 if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr < sz + guard_sz) 586 return ENOMEM; 587 588 /* Select a hint. */ 589 if (hint == 0) 590 hint = uvm_map_hint(vm, prot); 591 /* Clamp hint to uaddr range. */ 592 hint = MIN(MAX(hint, uaddr->uaddr_minaddr), 593 uaddr->uaddr_maxaddr - sz - guard_sz); 594 595 /* Align hint to align,offset parameters. */ 596 tmp = hint; 597 hint = uvm_addr_align_forward(tmp, align, offset); 598 /* Check for overflow during alignment. */ 599 if (hint < tmp || hint > uaddr->uaddr_maxaddr - sz - guard_sz) 600 return ENOMEM; /* Compatibility mode: never look backwards. */ 601 602 before_gap = 0; 603 after_gap = guard_sz; 604 hint -= MIN(hint, before_gap); 605 606 /* 607 * Use the augmented address tree to look up the first entry 608 * at or after hint with sufficient space. 609 * 610 * This code is the original optimized code, but will fail if the 611 * subtree it looks at does have sufficient space, but fails to meet 612 * the align constraint. 613 * 614 * Guard: subtree is not exhausted and max(fspace) >= required. 615 */ 616 entry = uvm_map_entrybyaddr(&map->addr, hint); 617 618 /* Walk up the tree, until there is at least sufficient space. */ 619 while (entry != NULL && 620 entry->fspace_augment < before_gap + after_gap + sz) 621 entry = RB_PARENT(entry, daddrs.addr_entry); 622 623 while (entry != NULL) { 624 /* Test if this fits. */ 625 if (VMMAP_FREE_END(entry) > hint && 626 uvm_map_uaddr_e(map, entry) == uaddr && 627 uvm_addr_fitspace(&low_addr, &high_addr, 628 MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)), 629 MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)), 630 sz, align, offset, before_gap, after_gap) == 0) { 631 *entry_out = entry; 632 if (hint >= low_addr && hint <= high_addr) 633 *addr_out = hint; 634 else 635 *addr_out = low_addr; 636 return 0; 637 } 638 639 /* RB_NEXT, but skip subtrees that cannot possible fit. */ 640 next = RB_RIGHT(entry, daddrs.addr_entry); 641 if (next != NULL && 642 next->fspace_augment >= before_gap + after_gap + sz) { 643 entry = next; 644 while ((next = RB_LEFT(entry, daddrs.addr_entry)) != 645 NULL) 646 entry = next; 647 } else { 648 do_parent: 649 next = RB_PARENT(entry, daddrs.addr_entry); 650 if (next == NULL) 651 entry = NULL; 652 else if (RB_LEFT(next, daddrs.addr_entry) == entry) 653 entry = next; 654 else { 655 entry = next; 656 goto do_parent; 657 } 658 } 659 } 660 661 /* Lookup failed. */ 662 return ENOMEM; 663 } 664 665 /* 666 * Destroy a uaddr_rnd_state structure. 667 */ 668 void 669 uaddr_rnd_destroy(struct uvm_addr_state *uaddr) 670 { 671 pool_put(&uaddr_rnd_pool, uaddr); 672 } 673 674 /* 675 * Add entry to tailq. 676 */ 677 void 678 uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p, 679 struct vm_map_entry *entry) 680 { 681 return; 682 } 683 684 /* 685 * Remove entry from tailq. 686 */ 687 void 688 uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p, 689 struct vm_map_entry *entry) 690 { 691 return; 692 } 693 694 #if defined(DEBUG) || defined(DDB) 695 void 696 uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full, 697 int (*pr)(const char*, ...)) 698 { 699 struct vm_map_entry *entry; 700 struct uaddr_rnd_state *uaddr; 701 vaddr_t addr; 702 size_t count; 703 vsize_t space; 704 705 uaddr = (struct uaddr_rnd_state*)uaddr_p; 706 addr = 0; 707 count = 0; 708 space = 0; 709 TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) { 710 count++; 711 space += entry->fspace; 712 713 if (full) { 714 (*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n", 715 entry, entry->start, entry->end, 716 entry->guard, entry->fspace); 717 (*pr)("\t\tfree: 0x%lx-0x%lx\n", 718 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 719 } 720 if (entry->start < addr) { 721 if (!full) 722 (*pr)("\tentry %p: 0x%lx-0x%lx " 723 "G=0x%lx F=0x%lx\n", 724 entry, entry->start, entry->end, 725 entry->guard, entry->fspace); 726 (*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n", 727 entry->start, addr); 728 } 729 730 addr = VMMAP_FREE_END(entry); 731 } 732 (*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space); 733 } 734 #endif /* DEBUG || DDB */ 735 736 737 /* 738 * An allocator that selects an address within distance of the hint. 739 * 740 * If no hint is given, the allocator refuses to allocate. 741 */ 742 743 const struct uvm_addr_functions uaddr_hint_functions = { 744 .uaddr_select = &uaddr_hint_select, 745 .uaddr_destroy = &uaddr_hint_destroy, 746 .uaddr_name = "uaddr_hint" 747 }; 748 749 /* 750 * Create uaddr_hint state. 751 */ 752 struct uvm_addr_state* 753 uaddr_hint_create(vaddr_t minaddr, vaddr_t maxaddr, vsize_t max_dist) 754 { 755 struct uaddr_hint_state* ua_hint; 756 757 KASSERT(uaddr_hint_pool.pr_size == sizeof(*ua_hint)); 758 759 ua_hint = pool_get(&uaddr_hint_pool, PR_WAITOK); 760 ua_hint->uaddr.uaddr_minaddr = minaddr; 761 ua_hint->uaddr.uaddr_maxaddr = maxaddr; 762 ua_hint->uaddr.uaddr_functions = &uaddr_hint_functions; 763 ua_hint->max_dist = max_dist; 764 return &ua_hint->uaddr; 765 } 766 767 /* 768 * Destroy uaddr_hint state. 769 */ 770 void 771 uaddr_hint_destroy(struct uvm_addr_state *uaddr) 772 { 773 pool_put(&uaddr_hint_pool, uaddr); 774 } 775 776 /* 777 * Hint selector. 778 * 779 * Attempts to find an address that is within max_dist of the hint. 780 */ 781 int 782 uaddr_hint_select(struct vm_map *map, struct uvm_addr_state *uaddr_param, 783 struct vm_map_entry**entry_out, vaddr_t *addr_out, 784 vsize_t sz, vaddr_t align, vaddr_t offset, 785 vm_prot_t prot, vaddr_t hint) 786 { 787 struct uaddr_hint_state *uaddr = (struct uaddr_hint_state*)uaddr_param; 788 vsize_t before_gap, after_gap; 789 vaddr_t low, high; 790 int dir; 791 792 if (hint == 0) 793 return ENOMEM; 794 795 /* 796 * Calculate upper and lower bound for selected address. 797 */ 798 high = hint + uaddr->max_dist; 799 if (high < hint) /* overflow */ 800 high = map->max_offset; 801 high = MIN(high, uaddr->uaddr.uaddr_maxaddr); 802 if (high < sz) 803 return ENOMEM; /* Protect against underflow. */ 804 high -= sz; 805 806 /* Calculate lower bound for selected address. */ 807 low = hint - uaddr->max_dist; 808 if (low > hint) /* underflow */ 809 low = map->min_offset; 810 low = MAX(low, uaddr->uaddr.uaddr_minaddr); 811 812 /* Search strategy setup. */ 813 before_gap = PAGE_SIZE + 814 (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK); 815 after_gap = PAGE_SIZE + 816 (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK); 817 dir = (arc4random() & 0x01) ? 1 : -1; 818 819 /* 820 * Try to search: 821 * - forward, with gap 822 * - backward, with gap 823 * - forward, without gap 824 * - backward, without gap 825 * (Where forward is in the direction specified by dir and 826 * backward is in the direction specified by -dir). 827 */ 828 if (uvm_addr_linsearch(map, uaddr_param, 829 entry_out, addr_out, hint, sz, align, offset, 830 dir, low, high, before_gap, after_gap) == 0) 831 return 0; 832 if (uvm_addr_linsearch(map, uaddr_param, 833 entry_out, addr_out, hint, sz, align, offset, 834 -dir, low, high, before_gap, after_gap) == 0) 835 return 0; 836 837 if (uvm_addr_linsearch(map, uaddr_param, 838 entry_out, addr_out, hint, sz, align, offset, 839 dir, low, high, 0, 0) == 0) 840 return 0; 841 if (uvm_addr_linsearch(map, uaddr_param, 842 entry_out, addr_out, hint, sz, align, offset, 843 -dir, low, high, 0, 0) == 0) 844 return 0; 845 846 return ENOMEM; 847 } 848 849 /* 850 * Kernel allocation bootstrap logic. 851 */ 852 853 const struct uvm_addr_functions uaddr_kernel_functions = { 854 .uaddr_select = &uaddr_kbootstrap_select, 855 .uaddr_destroy = &uaddr_kbootstrap_destroy, 856 .uaddr_name = "uaddr_kbootstrap" 857 }; 858 859 /* 860 * Select an address from the map. 861 * 862 * This function ignores the uaddr spec and instead uses the map directly. 863 * Because of that property, the uaddr algorithm can be shared across all 864 * kernel maps. 865 */ 866 int 867 uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr, 868 struct vm_map_entry **entry_out, vaddr_t *addr_out, 869 vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint) 870 { 871 vaddr_t tmp; 872 873 RB_FOREACH(*entry_out, uvm_map_addr, &map->addr) { 874 if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr && 875 uvm_addr_fitspace(addr_out, &tmp, 876 VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out), 877 sz, align, offset, 0, 0) == 0) 878 return 0; 879 } 880 881 return ENOMEM; 882 } 883 884 /* 885 * Don't destroy the kernel bootstrap allocator. 886 */ 887 void 888 uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr) 889 { 890 KASSERT(uaddr == (struct uvm_addr_state*)&uaddr_kbootstrap); 891 } 892 893 #ifndef SMALL_KERNEL 894 /* 895 * Best fit algorithm. 896 */ 897 898 const struct uvm_addr_functions uaddr_bestfit_functions = { 899 .uaddr_select = &uaddr_bestfit_select, 900 .uaddr_free_insert = &uaddr_bestfit_insert, 901 .uaddr_free_remove = &uaddr_bestfit_remove, 902 .uaddr_destroy = &uaddr_bestfit_destroy, 903 .uaddr_name = "uaddr_bestfit" 904 }; 905 906 struct uvm_addr_state* 907 uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr) 908 { 909 struct uaddr_bestfit_state *uaddr; 910 911 uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK); 912 uaddr->ubf_uaddr.uaddr_minaddr = minaddr; 913 uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr; 914 uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions; 915 RB_INIT(&uaddr->ubf_free); 916 return &uaddr->ubf_uaddr; 917 } 918 919 void 920 uaddr_bestfit_destroy(struct uvm_addr_state *uaddr) 921 { 922 pool_put(&uaddr_bestfit_pool, uaddr); 923 } 924 925 void 926 uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p, 927 struct vm_map_entry *entry) 928 { 929 struct uaddr_bestfit_state *uaddr; 930 struct vm_map_entry *rb_rv; 931 932 uaddr = (struct uaddr_bestfit_state*)uaddr_p; 933 if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) != 934 NULL) { 935 panic("%s: duplicate insertion: state %p " 936 "interting %p, colliding with %p", __func__, 937 uaddr, entry, rb_rv); 938 } 939 } 940 941 void 942 uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p, 943 struct vm_map_entry *entry) 944 { 945 struct uaddr_bestfit_state *uaddr; 946 947 uaddr = (struct uaddr_bestfit_state*)uaddr_p; 948 if (RB_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry) 949 panic("%s: entry was not in tree", __func__); 950 } 951 952 int 953 uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p, 954 struct vm_map_entry**entry_out, vaddr_t *addr_out, 955 vsize_t sz, vaddr_t align, vaddr_t offset, 956 vm_prot_t prot, vaddr_t hint) 957 { 958 vaddr_t min, max; 959 struct uaddr_bestfit_state *uaddr; 960 struct vm_map_entry *entry; 961 vsize_t guardsz; 962 963 uaddr = (struct uaddr_bestfit_state*)uaddr_p; 964 guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0); 965 966 /* 967 * Find smallest item on freelist capable of holding item. 968 * Deal with guardpages: search for space with one extra page. 969 */ 970 entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz); 971 if (entry == NULL) 972 return ENOMEM; 973 974 /* 975 * Walk the tree until we find an entry that fits. 976 */ 977 while (uvm_addr_fitspace(&min, &max, 978 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry), 979 sz, align, offset, 0, guardsz) != 0) { 980 entry = RB_NEXT(uaddr_free_rbtree, &uaddr->ubf_free, entry); 981 if (entry == NULL) 982 return ENOMEM; 983 } 984 985 /* 986 * Return the address that generates the least fragmentation. 987 */ 988 *entry_out = entry; 989 *addr_out = (min - VMMAP_FREE_START(entry) <= 990 VMMAP_FREE_END(entry) - guardsz - sz - max ? 991 min : max); 992 return 0; 993 } 994 #endif /* !SMALL_KERNEL */ 995 996 997 #ifndef SMALL_KERNEL 998 /* 999 * A userspace allocator based on pivots. 1000 */ 1001 1002 const struct uvm_addr_functions uaddr_pivot_functions = { 1003 .uaddr_select = &uaddr_pivot_select, 1004 .uaddr_free_insert = &uaddr_pivot_insert, 1005 .uaddr_free_remove = &uaddr_pivot_remove, 1006 .uaddr_destroy = &uaddr_pivot_destroy, 1007 #if defined(DEBUG) || defined(DDB) 1008 .uaddr_print = &uaddr_pivot_print, 1009 #endif /* DEBUG || DDB */ 1010 .uaddr_name = "uaddr_pivot" 1011 }; 1012 1013 /* 1014 * A special random function for pivots. 1015 * 1016 * This function will return: 1017 * - a random number 1018 * - a multiple of PAGE_SIZE 1019 * - at least PAGE_SIZE 1020 * 1021 * The random function has a slightly higher change to return a small number. 1022 */ 1023 vsize_t 1024 uaddr_pivot_random() 1025 { 1026 int r; 1027 1028 /* 1029 * The sum of two six-sided dice will have a normal distribution. 1030 * We map the highest probable number to 1, by folding the curve 1031 * (think of a graph on a piece of paper, that you fold). 1032 * 1033 * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1 1034 * have the same and highest probability of happening. 1035 */ 1036 r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) - 1037 (PIVOT_RND - 1); 1038 if (r < 0) 1039 r = -r; 1040 1041 /* 1042 * Make the returned value at least PAGE_SIZE and a multiple of 1043 * PAGE_SIZE. 1044 */ 1045 return (vaddr_t)(1 + r) << PAGE_SHIFT; 1046 } 1047 1048 /* 1049 * Select a new pivot. 1050 * 1051 * A pivot must: 1052 * - be chosen random 1053 * - have a randomly chosen gap before it, where the uaddr_state starts 1054 * - have a randomly chosen gap after it, before the uaddr_state ends 1055 * 1056 * Furthermore, the pivot must provide sufficient space for the allocation. 1057 * The addr will be set to the selected address. 1058 * 1059 * Returns ENOMEM on failure. 1060 */ 1061 int 1062 uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr, 1063 struct uaddr_pivot *pivot, 1064 struct vm_map_entry**entry_out, vaddr_t *addr_out, 1065 vsize_t sz, vaddr_t align, vaddr_t offset, 1066 vsize_t before_gap, vsize_t after_gap) 1067 { 1068 struct vm_map_entry *entry, *found; 1069 vaddr_t minaddr, maxaddr; 1070 vsize_t dist; 1071 vaddr_t found_minaddr, found_maxaddr; 1072 vaddr_t min, max; 1073 vsize_t arc4_arg; 1074 int fit_error; 1075 u_int32_t path; 1076 1077 minaddr = uaddr->up_uaddr.uaddr_minaddr; 1078 maxaddr = uaddr->up_uaddr.uaddr_maxaddr; 1079 KASSERT(minaddr < maxaddr); 1080 #ifdef DIAGNOSTIC 1081 if (minaddr + 2 * PAGE_SIZE > maxaddr) { 1082 panic("uaddr_pivot_newpivot: cannot grant random pivot " 1083 "in area less than 2 pages (size = 0x%lx)", 1084 maxaddr - minaddr); 1085 } 1086 #endif /* DIAGNOSTIC */ 1087 1088 /* 1089 * Gap calculation: 1/32 of the size of the managed area. 1090 * 1091 * At most: sufficient to not get truncated at arc4random. 1092 * At least: 2 PAGE_SIZE 1093 * 1094 * minaddr and maxaddr will be changed according to arc4random. 1095 */ 1096 dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE); 1097 if (dist >> PAGE_SHIFT > 0xffffffff) { 1098 minaddr += (vsize_t)arc4random() << PAGE_SHIFT; 1099 maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT; 1100 } else { 1101 minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) << 1102 PAGE_SHIFT; 1103 maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) << 1104 PAGE_SHIFT; 1105 } 1106 1107 /* 1108 * A very fast way to find an entry that will be large enough 1109 * to hold the allocation, but still is found more or less 1110 * randomly: the tree path selector has a 50% chance to go for 1111 * a bigger or smaller entry. 1112 * 1113 * Note that the memory may actually be available, 1114 * but the fragmentation may be so bad and the gaps chosen 1115 * so unfortunately, that the allocation will not succeed. 1116 * Or the alignment can only be satisfied by an entry that 1117 * is not visited in the randomly selected path. 1118 * 1119 * This code finds an entry with sufficient space in O(log n) time. 1120 */ 1121 path = arc4random(); 1122 found = NULL; 1123 entry = RB_ROOT(&uaddr->up_free); 1124 while (entry != NULL) { 1125 fit_error = uvm_addr_fitspace(&min, &max, 1126 MAX(VMMAP_FREE_START(entry), minaddr), 1127 MIN(VMMAP_FREE_END(entry), maxaddr), 1128 sz, align, offset, before_gap, after_gap); 1129 1130 /* It fits, save this entry. */ 1131 if (fit_error == 0) { 1132 found = entry; 1133 found_minaddr = min; 1134 found_maxaddr = max; 1135 } 1136 1137 /* Next. */ 1138 if (fit_error != 0) 1139 entry = RB_RIGHT(entry, dfree.rbtree); 1140 else if ((path & 0x1) == 0) { 1141 path >>= 1; 1142 entry = RB_RIGHT(entry, dfree.rbtree); 1143 } else { 1144 path >>= 1; 1145 entry = RB_LEFT(entry, dfree.rbtree); 1146 } 1147 } 1148 if (found == NULL) 1149 return ENOMEM; /* Not found a large enough region. */ 1150 1151 /* 1152 * Calculate a random address within found. 1153 * 1154 * found_minaddr and found_maxaddr are already aligned, so be sure 1155 * to select a multiple of align as the offset in the entry. 1156 * Preferably, arc4random_uniform is used to provide no bias within 1157 * the entry. 1158 * However if the size of the entry exceeds arc4random_uniforms 1159 * argument limit, we simply use arc4random (thus limiting ourselves 1160 * to 4G * PAGE_SIZE bytes offset). 1161 */ 1162 if (found_maxaddr == found_minaddr) 1163 *addr_out = found_minaddr; 1164 else { 1165 KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0); 1166 arc4_arg = found_maxaddr - found_minaddr; 1167 if (arc4_arg > 0xffffffff) { 1168 *addr_out = found_minaddr + 1169 (arc4random() & (align - 1)); 1170 } else { 1171 *addr_out = found_minaddr + 1172 (arc4random_uniform(arc4_arg) & (align - 1)); 1173 } 1174 } 1175 /* Address was found in this entry. */ 1176 *entry_out = found; 1177 1178 /* 1179 * Set up new pivot and return selected address. 1180 * 1181 * Depending on the direction of the pivot, the pivot must be placed 1182 * at the bottom or the top of the allocation: 1183 * - if the pivot moves upwards, place the pivot at the top of the 1184 * allocation, 1185 * - if the pivot moves downwards, place the pivot at the bottom 1186 * of the allocation. 1187 */ 1188 pivot->entry = found; 1189 pivot->dir = (arc4random() & 0x1 ? 1 : -1); 1190 if (pivot->dir > 0) 1191 pivot->addr = *addr_out + sz; 1192 else 1193 pivot->addr = *addr_out; 1194 pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */ 1195 return 0; 1196 } 1197 1198 /* 1199 * Pivot selector. 1200 * 1201 * Each time the selector is invoked, it will select a random pivot, which 1202 * it will use to select memory with. The memory will be placed at the pivot, 1203 * with a randomly sized gap between the allocation and the pivot. 1204 * The pivot will then move so it will never revisit this address. 1205 * 1206 * Each allocation, the pivot expiry timer ticks. Once the pivot becomes 1207 * expired, it will be replaced with a newly created pivot. Pivots also 1208 * automatically expire if they fail to provide memory for an allocation. 1209 * 1210 * Expired pivots are replaced using the uaddr_pivot_newpivot() function, 1211 * which will ensure the pivot points at memory in such a way that the 1212 * allocation will succeed. 1213 * As an added bonus, the uaddr_pivot_newpivot() function will perform the 1214 * allocation immediately and move the pivot as appropriate. 1215 * 1216 * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the 1217 * allocation to succeed, it will not create a new pivot and the allocation 1218 * will fail. 1219 * 1220 * A pivot running into used memory will automatically expire (because it will 1221 * fail to allocate). 1222 * 1223 * Characteristics of the allocator: 1224 * - best case, an allocation is O(log N) 1225 * (it would be O(1), if it werent for the need to check if the memory is 1226 * free; although that can be avoided...) 1227 * - worst case, an allocation is O(log N) 1228 * (the uaddr_pivot_newpivot() function has that complexity) 1229 * - failed allocations always take O(log N) 1230 * (the uaddr_pivot_newpivot() function will walk that deep into the tree). 1231 */ 1232 int 1233 uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p, 1234 struct vm_map_entry**entry_out, vaddr_t *addr_out, 1235 vsize_t sz, vaddr_t align, vaddr_t offset, 1236 vm_prot_t prot, vaddr_t hint) 1237 { 1238 struct uaddr_pivot_state *uaddr; 1239 struct vm_map_entry *entry; 1240 struct uaddr_pivot *pivot; 1241 vaddr_t min, max; 1242 vsize_t before_gap, after_gap; 1243 int err; 1244 1245 /* Hint must be handled by dedicated hint allocator. */ 1246 if (hint != 0) 1247 return EINVAL; 1248 1249 /* 1250 * Select a random pivot and a random gap sizes around the allocation. 1251 */ 1252 uaddr = (struct uaddr_pivot_state*)uaddr_p; 1253 pivot = &uaddr->up_pivots[ 1254 arc4random_uniform(nitems(uaddr->up_pivots))]; 1255 before_gap = uaddr_pivot_random(); 1256 after_gap = uaddr_pivot_random(); 1257 if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0) 1258 goto expired; /* Pivot is invalid (null or expired). */ 1259 1260 /* 1261 * Attempt to use the pivot to map the entry. 1262 */ 1263 entry = pivot->entry; 1264 if (pivot->dir > 0) { 1265 if (uvm_addr_fitspace(&min, &max, 1266 MAX(VMMAP_FREE_START(entry), pivot->addr), 1267 VMMAP_FREE_END(entry), sz, align, offset, 1268 before_gap, after_gap) == 0) { 1269 *addr_out = min; 1270 *entry_out = entry; 1271 pivot->addr = min + sz; 1272 pivot->expire--; 1273 return 0; 1274 } 1275 } else { 1276 if (uvm_addr_fitspace(&min, &max, 1277 VMMAP_FREE_START(entry), 1278 MIN(VMMAP_FREE_END(entry), pivot->addr), 1279 sz, align, offset, before_gap, after_gap) == 0) { 1280 *addr_out = max; 1281 *entry_out = entry; 1282 pivot->addr = max; 1283 pivot->expire--; 1284 return 0; 1285 } 1286 } 1287 1288 expired: 1289 /* 1290 * Pivot expired or allocation failed. 1291 * Use pivot selector to do the allocation and find a new pivot. 1292 */ 1293 err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out, 1294 sz, align, offset, before_gap, after_gap); 1295 return err; 1296 } 1297 1298 /* 1299 * Free the pivot. 1300 */ 1301 void 1302 uaddr_pivot_destroy(struct uvm_addr_state *uaddr) 1303 { 1304 pool_put(&uaddr_pivot_pool, uaddr); 1305 } 1306 1307 /* 1308 * Insert an entry with free space in the space tree. 1309 */ 1310 void 1311 uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p, 1312 struct vm_map_entry *entry) 1313 { 1314 struct uaddr_pivot_state *uaddr; 1315 struct vm_map_entry *rb_rv; 1316 struct uaddr_pivot *p; 1317 vaddr_t check_addr; 1318 vaddr_t start, end; 1319 1320 uaddr = (struct uaddr_pivot_state*)uaddr_p; 1321 if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) != 1322 NULL) { 1323 panic("%s: duplicate insertion: state %p " 1324 "inserting entry %p which collides with %p", __func__, 1325 uaddr, entry, rb_rv); 1326 } 1327 1328 start = VMMAP_FREE_START(entry); 1329 end = VMMAP_FREE_END(entry); 1330 1331 /* 1332 * Update all pivots that are contained in this entry. 1333 */ 1334 for (p = &uaddr->up_pivots[0]; 1335 p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) { 1336 check_addr = p->addr; 1337 if (check_addr == 0) 1338 continue; 1339 if (p->dir < 0) 1340 check_addr--; 1341 1342 if (start <= check_addr && 1343 check_addr < end) { 1344 KASSERT(p->entry == NULL); 1345 p->entry = entry; 1346 } 1347 } 1348 } 1349 1350 /* 1351 * Remove an entry with free space from the space tree. 1352 */ 1353 void 1354 uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p, 1355 struct vm_map_entry *entry) 1356 { 1357 struct uaddr_pivot_state *uaddr; 1358 struct uaddr_pivot *p; 1359 1360 uaddr = (struct uaddr_pivot_state*)uaddr_p; 1361 if (RB_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry) 1362 panic("%s: entry was not in tree", __func__); 1363 1364 /* 1365 * Inform any pivot with this entry that the entry is gone. 1366 * Note that this does not automatically invalidate the pivot. 1367 */ 1368 for (p = &uaddr->up_pivots[0]; 1369 p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) { 1370 if (p->entry == entry) 1371 p->entry = NULL; 1372 } 1373 } 1374 1375 /* 1376 * Create a new pivot selector. 1377 * 1378 * Initially, all pivots are in the expired state. 1379 * Two reasons for this: 1380 * - it means this allocator will not take a huge amount of time 1381 * - pivots select better on demand, because the pivot selection will be 1382 * affected by preceding allocations: 1383 * the next pivots will likely end up in different segments of free memory, 1384 * that was segmented by an earlier allocation; better spread. 1385 */ 1386 struct uvm_addr_state* 1387 uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr) 1388 { 1389 struct uaddr_pivot_state *uaddr; 1390 1391 uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK); 1392 uaddr->up_uaddr.uaddr_minaddr = minaddr; 1393 uaddr->up_uaddr.uaddr_maxaddr = maxaddr; 1394 uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions; 1395 RB_INIT(&uaddr->up_free); 1396 bzero(uaddr->up_pivots, sizeof(uaddr->up_pivots)); 1397 1398 return &uaddr->up_uaddr; 1399 } 1400 1401 #if defined(DEBUG) || defined(DDB) 1402 /* 1403 * Print the uaddr_pivot_state. 1404 * 1405 * If full, a listing of all entries in the state will be provided. 1406 */ 1407 void 1408 uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full, 1409 int (*pr)(const char*, ...)) 1410 { 1411 struct uaddr_pivot_state *uaddr; 1412 struct uaddr_pivot *pivot; 1413 struct vm_map_entry *entry; 1414 int i; 1415 vaddr_t check_addr; 1416 1417 uaddr = (struct uaddr_pivot_state*)uaddr_p; 1418 1419 for (i = 0; i < NUM_PIVOTS; i++) { 1420 pivot = &uaddr->up_pivots[i]; 1421 1422 (*pr)("\tpivot 0x%lx, epires in %d, direction %d\n", 1423 pivot->addr, pivot->expire, pivot->dir); 1424 } 1425 if (!full) 1426 return; 1427 1428 if (RB_EMPTY(&uaddr->up_free)) 1429 (*pr)("\tempty\n"); 1430 /* Print list of free space. */ 1431 RB_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) { 1432 (*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n", 1433 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry), 1434 VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry)); 1435 1436 for (i = 0; i < NUM_PIVOTS; i++) { 1437 pivot = &uaddr->up_pivots[i]; 1438 check_addr = pivot->addr; 1439 if (check_addr == 0) 1440 continue; 1441 if (pivot->dir < 0) 1442 check_addr--; 1443 1444 if (VMMAP_FREE_START(entry) <= check_addr && 1445 check_addr < VMMAP_FREE_END(entry)) { 1446 (*pr)("\t\tcontains pivot %d (0x%lx)\n", 1447 i, pivot->addr); 1448 } 1449 } 1450 } 1451 } 1452 #endif /* DEBUG || DDB */ 1453 #endif /* !SMALL_KERNEL */ 1454 1455 #ifndef SMALL_KERNEL 1456 /* 1457 * Strategy for uaddr_stack_brk_select. 1458 */ 1459 struct uaddr_bs_strat { 1460 vaddr_t start; /* Start of area. */ 1461 vaddr_t end; /* End of area. */ 1462 int dir; /* Search direction. */ 1463 }; 1464 1465 /* 1466 * Stack/break allocator. 1467 * 1468 * Stack area is grown into in the opposite direction of the stack growth, 1469 * brk area is grown downward (because sbrk() grows upward). 1470 * 1471 * Both areas are grown into proportially: a weighted chance is used to 1472 * select which one (stack or brk area) to try. If the allocation fails, 1473 * the other one is tested. 1474 */ 1475 1476 const struct uvm_addr_functions uaddr_stack_brk_functions = { 1477 .uaddr_select = &uaddr_stack_brk_select, 1478 .uaddr_destroy = &uaddr_destroy, 1479 .uaddr_name = "uaddr_stckbrk" 1480 }; 1481 1482 /* 1483 * Stack/brk address selector. 1484 */ 1485 int 1486 uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr, 1487 struct vm_map_entry**entry_out, vaddr_t *addr_out, 1488 vsize_t sz, vaddr_t align, vaddr_t offset, 1489 vm_prot_t prot, vaddr_t hint) 1490 { 1491 vsize_t before_gap, after_gap; 1492 int stack_idx, brk_idx; 1493 struct uaddr_bs_strat strat[2], *s; 1494 vsize_t sb_size; 1495 1496 /* 1497 * Choose gap size and if the stack is searched before or after the 1498 * brk area. 1499 */ 1500 before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT; 1501 after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT; 1502 1503 sb_size = (map->s_end - map->s_start) + (map->b_end - map->b_start); 1504 sb_size >>= PAGE_SHIFT; 1505 if (arc4random_uniform(MAX(sb_size, 0xffffffff)) > 1506 map->b_end - map->b_start) { 1507 brk_idx = 1; 1508 stack_idx = 0; 1509 } else { 1510 brk_idx = 0; 1511 stack_idx = 1; 1512 } 1513 1514 /* 1515 * Set up stack search strategy. 1516 */ 1517 s = &strat[stack_idx]; 1518 s->start = MAX(map->s_start, uaddr->uaddr_minaddr); 1519 s->end = MIN(map->s_end, uaddr->uaddr_maxaddr); 1520 #ifdef MACHINE_STACK_GROWS_UP 1521 s->dir = -1; 1522 #else 1523 s->dir = 1; 1524 #endif 1525 1526 /* 1527 * Set up brk search strategy. 1528 */ 1529 s = &strat[brk_idx]; 1530 s->start = MAX(map->b_start, uaddr->uaddr_minaddr); 1531 s->end = MIN(map->b_end, uaddr->uaddr_maxaddr); 1532 s->dir = -1; /* Opposite of brk() growth. */ 1533 1534 /* 1535 * Linear search for space. 1536 */ 1537 for (s = &strat[0]; s < &strat[nitems(strat)]; s++) { 1538 if (s->end - s->start < sz) 1539 continue; 1540 if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 1541 0, sz, align, offset, s->dir, s->start, s->end - sz, 1542 before_gap, after_gap) == 0) 1543 return 0; 1544 } 1545 1546 return ENOMEM; 1547 } 1548 1549 struct uvm_addr_state* 1550 uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr) 1551 { 1552 struct uvm_addr_state* uaddr; 1553 1554 uaddr = pool_get(&uaddr_pool, PR_WAITOK); 1555 uaddr->uaddr_minaddr = minaddr; 1556 uaddr->uaddr_maxaddr = maxaddr; 1557 uaddr->uaddr_functions = &uaddr_stack_brk_functions; 1558 return uaddr; 1559 } 1560 #endif /* !SMALL_KERNEL */ 1561 1562 1563 #ifndef SMALL_KERNEL 1564 RB_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree, 1565 uvm_mapent_fspace_cmp); 1566 #endif /* !SMALL_KERNEL */ 1567