1 /* $OpenBSD: uvm_addr.c,v 1.8 2014/06/10 20:08:48 kettenis Exp $ */ 2 3 /* 4 * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* #define DEBUG */ 20 21 #include <sys/param.h> 22 #include <uvm/uvm.h> 23 #include <uvm/uvm_addr.h> 24 #include <sys/pool.h> 25 #include <dev/rndvar.h> 26 27 /* Max gap between hint allocations. */ 28 #define UADDR_HINT_MAXGAP (4 * PAGE_SIZE) 29 /* Number of pivots in pivot allocator. */ 30 #define NUM_PIVOTS 16 31 /* 32 * Max number (inclusive) of pages the pivot allocator 33 * will place between allocations. 34 * 35 * The uaddr_pivot_random() function attempts to bias towards 36 * small space between allocations, so putting a large number here is fine. 37 */ 38 #define PIVOT_RND 8 39 /* 40 * Number of allocations that a pivot can supply before expiring. 41 * When a pivot expires, a new pivot has to be found. 42 * 43 * Must be at least 1. 44 */ 45 #define PIVOT_EXPIRE 1024 46 47 48 /* Pool with uvm_addr_state structures. */ 49 struct pool uaddr_pool; 50 struct pool uaddr_hint_pool; 51 struct pool uaddr_bestfit_pool; 52 struct pool uaddr_pivot_pool; 53 struct pool uaddr_rnd_pool; 54 55 /* uvm_addr state for hint based selector. */ 56 struct uaddr_hint_state { 57 struct uvm_addr_state uaddr; 58 vsize_t max_dist; 59 }; 60 61 /* uvm_addr state for bestfit selector. */ 62 struct uaddr_bestfit_state { 63 struct uvm_addr_state ubf_uaddr; 64 struct uaddr_free_rbtree ubf_free; 65 }; 66 67 /* uvm_addr state for rnd selector. */ 68 struct uaddr_rnd_state { 69 struct uvm_addr_state ur_uaddr; 70 TAILQ_HEAD(, vm_map_entry) ur_free; 71 }; 72 73 /* Definition of a pivot in pivot selector. */ 74 struct uaddr_pivot { 75 vaddr_t addr; /* End of prev. allocation. */ 76 int expire;/* Best before date. */ 77 int dir; /* Direction. */ 78 struct vm_map_entry *entry; /* Will contain next alloc. */ 79 }; 80 /* uvm_addr state for pivot selector. */ 81 struct uaddr_pivot_state { 82 struct uvm_addr_state up_uaddr; 83 84 /* Free space tree, for fast pivot selection. */ 85 struct uaddr_free_rbtree up_free; 86 87 /* List of pivots. The pointers point to after the last allocation. */ 88 struct uaddr_pivot up_pivots[NUM_PIVOTS]; 89 }; 90 91 /* 92 * Free space comparison. 93 * Compares smaller free-space before larger free-space. 94 */ 95 static __inline int 96 uvm_mapent_fspace_cmp(struct vm_map_entry *e1, struct vm_map_entry *e2) 97 { 98 if (e1->fspace != e2->fspace) 99 return (e1->fspace < e2->fspace ? -1 : 1); 100 return (e1->start < e2->start ? -1 : e1->start > e2->start); 101 } 102 103 /* Forward declaration (see below). */ 104 extern const struct uvm_addr_functions uaddr_kernel_functions; 105 struct uvm_addr_state uaddr_kbootstrap; 106 107 /* Support functions. */ 108 #ifndef SMALL_KERNEL 109 struct vm_map_entry *uvm_addr_entrybyspace(struct uaddr_free_rbtree*, 110 vsize_t); 111 #endif /* !SMALL_KERNEL */ 112 void uaddr_kinsert(struct vm_map *, 113 struct uvm_addr_state *, struct vm_map_entry *); 114 void uaddr_kremove(struct vm_map *, 115 struct uvm_addr_state *, struct vm_map_entry *); 116 void uaddr_kbootstrapdestroy(struct uvm_addr_state *); 117 118 void uaddr_destroy(struct uvm_addr_state *); 119 void uaddr_hint_destroy(struct uvm_addr_state *); 120 void uaddr_kbootstrap_destroy(struct uvm_addr_state *); 121 void uaddr_rnd_destroy(struct uvm_addr_state *); 122 void uaddr_bestfit_destroy(struct uvm_addr_state *); 123 void uaddr_pivot_destroy(struct uvm_addr_state *); 124 125 int uaddr_lin_select(struct vm_map *, 126 struct uvm_addr_state *, struct vm_map_entry **, 127 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 128 vaddr_t); 129 int uaddr_kbootstrap_select(struct vm_map *, 130 struct uvm_addr_state *, struct vm_map_entry **, 131 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 132 vaddr_t); 133 int uaddr_rnd_select(struct vm_map *, 134 struct uvm_addr_state *, struct vm_map_entry **, 135 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 136 vaddr_t); 137 int uaddr_hint_select(struct vm_map *, 138 struct uvm_addr_state*, struct vm_map_entry **, 139 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 140 vaddr_t); 141 int uaddr_bestfit_select(struct vm_map *, 142 struct uvm_addr_state*, struct vm_map_entry **, 143 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 144 vaddr_t); 145 #ifndef SMALL_KERNEL 146 int uaddr_pivot_select(struct vm_map *, 147 struct uvm_addr_state *, struct vm_map_entry **, 148 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 149 vaddr_t); 150 int uaddr_stack_brk_select(struct vm_map *, 151 struct uvm_addr_state *, struct vm_map_entry **, 152 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 153 vaddr_t); 154 #endif /* !SMALL_KERNEL */ 155 156 void uaddr_rnd_insert(struct vm_map *, 157 struct uvm_addr_state *, struct vm_map_entry *); 158 void uaddr_rnd_remove(struct vm_map *, 159 struct uvm_addr_state *, struct vm_map_entry *); 160 void uaddr_bestfit_insert(struct vm_map *, 161 struct uvm_addr_state *, struct vm_map_entry *); 162 void uaddr_bestfit_remove(struct vm_map *, 163 struct uvm_addr_state *, struct vm_map_entry *); 164 void uaddr_pivot_insert(struct vm_map *, 165 struct uvm_addr_state *, struct vm_map_entry *); 166 void uaddr_pivot_remove(struct vm_map *, 167 struct uvm_addr_state *, struct vm_map_entry *); 168 169 #ifndef SMALL_KERNEL 170 vsize_t uaddr_pivot_random(void); 171 int uaddr_pivot_newpivot(struct vm_map *, 172 struct uaddr_pivot_state *, struct uaddr_pivot *, 173 struct vm_map_entry **, vaddr_t *, 174 vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t); 175 #endif /* !SMALL_KERNEL */ 176 177 #if defined(DEBUG) || defined(DDB) 178 void uaddr_pivot_print(struct uvm_addr_state *, boolean_t, 179 int (*)(const char *, ...)); 180 void uaddr_rnd_print(struct uvm_addr_state *, boolean_t, 181 int (*)(const char *, ...)); 182 #endif /* DEBUG || DDB */ 183 184 185 #ifndef SMALL_KERNEL 186 /* 187 * Find smallest entry in tree that will fit sz bytes. 188 */ 189 struct vm_map_entry * 190 uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz) 191 { 192 struct vm_map_entry *tmp, *res; 193 194 tmp = RB_ROOT(free); 195 res = NULL; 196 while (tmp) { 197 if (tmp->fspace >= sz) { 198 res = tmp; 199 tmp = RB_LEFT(tmp, dfree.rbtree); 200 } else if (tmp->fspace < sz) 201 tmp = RB_RIGHT(tmp, dfree.rbtree); 202 } 203 return res; 204 } 205 #endif /* !SMALL_KERNEL */ 206 207 static __inline vaddr_t 208 uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset) 209 { 210 vaddr_t adjusted; 211 212 KASSERT(offset < align || (align == 0 && offset == 0)); 213 KASSERT((align & (align - 1)) == 0); 214 KASSERT((offset & PAGE_MASK) == 0); 215 216 align = MAX(align, PAGE_SIZE); 217 adjusted = addr & ~(align - 1); 218 adjusted += offset; 219 return (adjusted < addr ? adjusted + align : adjusted); 220 } 221 222 static __inline vaddr_t 223 uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset) 224 { 225 vaddr_t adjusted; 226 227 KASSERT(offset < align || (align == 0 && offset == 0)); 228 KASSERT((align & (align - 1)) == 0); 229 KASSERT((offset & PAGE_MASK) == 0); 230 231 align = MAX(align, PAGE_SIZE); 232 adjusted = addr & ~(align - 1); 233 adjusted += offset; 234 return (adjusted > addr ? adjusted - align : adjusted); 235 } 236 237 /* 238 * Try to fit the requested space into the entry. 239 */ 240 int 241 uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result, 242 vaddr_t low_addr, vaddr_t high_addr, vsize_t sz, 243 vaddr_t align, vaddr_t offset, 244 vsize_t before_gap, vsize_t after_gap) 245 { 246 vaddr_t tmp; 247 vsize_t fspace; 248 249 if (low_addr > high_addr) 250 return ENOMEM; 251 fspace = high_addr - low_addr; 252 if (fspace < sz + before_gap + after_gap) 253 return ENOMEM; 254 255 /* Calculate lowest address. */ 256 low_addr += before_gap; 257 low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset); 258 if (low_addr < tmp) /* Overflow during alignment. */ 259 return ENOMEM; 260 if (high_addr - after_gap - sz < low_addr) 261 return ENOMEM; 262 263 /* Calculate highest address. */ 264 high_addr -= after_gap + sz; 265 high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset); 266 if (high_addr > tmp) /* Overflow during alignment. */ 267 return ENOMEM; 268 if (low_addr > high_addr) 269 return ENOMEM; 270 271 *min_result = low_addr; 272 *max_result = high_addr; 273 return 0; 274 } 275 276 277 /* 278 * Initialize uvm_addr. 279 */ 280 void 281 uvm_addr_init(void) 282 { 283 pool_init(&uaddr_pool, sizeof(struct uvm_addr_state), 284 0, 0, 0, "uaddr", &pool_allocator_nointr); 285 pool_init(&uaddr_hint_pool, sizeof(struct uaddr_hint_state), 286 0, 0, 0, "uaddrhint", &pool_allocator_nointr); 287 pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state), 288 0, 0, 0, "uaddrbest", &pool_allocator_nointr); 289 pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state), 290 0, 0, 0, "uaddrpivot", &pool_allocator_nointr); 291 pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state), 292 0, 0, 0, "uaddrrnd", &pool_allocator_nointr); 293 294 uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE; 295 uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE; 296 uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions; 297 } 298 299 /* 300 * Invoke destructor function of uaddr. 301 */ 302 void 303 uvm_addr_destroy(struct uvm_addr_state *uaddr) 304 { 305 if (uaddr) 306 (*uaddr->uaddr_functions->uaddr_destroy)(uaddr); 307 } 308 309 /* 310 * Move address forward to satisfy align, offset. 311 */ 312 vaddr_t 313 uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset) 314 { 315 vaddr_t result = (addr & ~(align - 1)) + offset; 316 if (result < addr) 317 result += align; 318 return result; 319 } 320 321 /* 322 * Move address backwards to satisfy align, offset. 323 */ 324 vaddr_t 325 uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset) 326 { 327 vaddr_t result = (addr & ~(align - 1)) + offset; 328 if (result > addr) 329 result -= align; 330 return result; 331 } 332 333 /* 334 * Directional first fit. 335 * 336 * Do a linear search for free space, starting at addr in entry. 337 * direction == 1: search forward 338 * direction == -1: search backward 339 * 340 * Output: low <= addr <= high and entry will contain addr. 341 * 0 will be returned if no space is available. 342 * 343 * gap describes the space that must appear between the preceding entry. 344 */ 345 int 346 uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr, 347 struct vm_map_entry **entry_out, vaddr_t *addr_out, 348 vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset, 349 int direction, vaddr_t low, vaddr_t high, 350 vsize_t before_gap, vsize_t after_gap) 351 { 352 struct vm_map_entry *entry; 353 vaddr_t low_addr, high_addr; 354 355 KASSERT(entry_out != NULL && addr_out != NULL); 356 KASSERT(direction == -1 || direction == 1); 357 KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 && 358 (low & PAGE_MASK) == 0 && 359 (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0); 360 KASSERT(high + sz > high); /* Check for overflow. */ 361 362 /* Hint magic. */ 363 if (hint == 0) 364 hint = (direction == 1 ? low : high); 365 else if (hint > high) { 366 if (direction != -1) 367 return ENOMEM; 368 hint = high; 369 } else if (hint < low) { 370 if (direction != 1) 371 return ENOMEM; 372 hint = low; 373 } 374 375 for (entry = uvm_map_entrybyaddr(&map->addr, 376 hint - (direction == -1 ? 1 : 0)); entry != NULL; 377 entry = (direction == 1 ? 378 RB_NEXT(uvm_map_addr, &map->addr, entry) : 379 RB_PREV(uvm_map_addr, &map->addr, entry))) { 380 if (VMMAP_FREE_START(entry) > high || 381 VMMAP_FREE_END(entry) < low) { 382 break; 383 } 384 385 if (uvm_addr_fitspace(&low_addr, &high_addr, 386 MAX(low, VMMAP_FREE_START(entry)), 387 MIN(high, VMMAP_FREE_END(entry)), 388 sz, align, offset, before_gap, after_gap) == 0) { 389 *entry_out = entry; 390 if (hint >= low_addr && hint <= high_addr) { 391 *addr_out = hint; 392 } else { 393 *addr_out = (direction == 1 ? 394 low_addr : high_addr); 395 } 396 return 0; 397 } 398 } 399 400 return ENOMEM; 401 } 402 403 /* 404 * Invoke address selector of uaddr. 405 * uaddr may be NULL, in which case the algorithm will fail with ENOMEM. 406 * 407 * Will invoke uvm_addr_isavail to fill in last_out. 408 */ 409 int 410 uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr, 411 struct vm_map_entry **entry_out, struct vm_map_entry **last_out, 412 vaddr_t *addr_out, 413 vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint) 414 { 415 int error; 416 417 if (uaddr == NULL) 418 return ENOMEM; 419 420 hint &= ~((vaddr_t)PAGE_MASK); 421 if (hint != 0 && 422 !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr)) 423 return ENOMEM; 424 425 error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr, 426 entry_out, addr_out, sz, align, offset, prot, hint); 427 428 if (error == 0) { 429 KASSERT(*entry_out != NULL); 430 *last_out = NULL; 431 if (!uvm_map_isavail(map, uaddr, entry_out, last_out, 432 *addr_out, sz)) { 433 panic("uvm_addr_invoke: address selector %p " 434 "(%s 0x%lx-0x%lx) " 435 "returned unavailable address 0x%lx", 436 uaddr, uaddr->uaddr_functions->uaddr_name, 437 uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr, 438 *addr_out); 439 } 440 } 441 442 return error; 443 } 444 445 #if defined(DEBUG) || defined(DDB) 446 void 447 uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full, 448 int (*pr)(const char *, ...)) 449 { 450 if (uaddr == NULL) { 451 (*pr)("- uvm_addr %s: NULL\n", slot); 452 return; 453 } 454 455 (*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr, 456 uaddr->uaddr_functions->uaddr_name, 457 uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr); 458 if (uaddr->uaddr_functions->uaddr_print == NULL) 459 return; 460 461 (*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr); 462 } 463 #endif /* DEBUG || DDB */ 464 465 /* 466 * Destroy a uvm_addr_state structure. 467 * The uaddr must have been previously allocated from uaddr_state_pool. 468 */ 469 void 470 uaddr_destroy(struct uvm_addr_state *uaddr) 471 { 472 pool_put(&uaddr_pool, uaddr); 473 } 474 475 476 /* 477 * Lineair allocator. 478 * This allocator uses a first-fit algorithm. 479 * 480 * If hint is set, search will start at the hint position. 481 * Only searches forward. 482 */ 483 const struct uvm_addr_functions uaddr_lin_functions = { 484 .uaddr_select = &uaddr_lin_select, 485 .uaddr_destroy = &uaddr_destroy, 486 .uaddr_name = "uaddr_lin" 487 }; 488 489 struct uvm_addr_state * 490 uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr) 491 { 492 struct uvm_addr_state *uaddr; 493 494 uaddr = pool_get(&uaddr_pool, PR_WAITOK); 495 uaddr->uaddr_minaddr = minaddr; 496 uaddr->uaddr_maxaddr = maxaddr; 497 uaddr->uaddr_functions = &uaddr_lin_functions; 498 return uaddr; 499 } 500 501 int 502 uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr, 503 struct vm_map_entry **entry_out, vaddr_t *addr_out, 504 vsize_t sz, vaddr_t align, vaddr_t offset, 505 vm_prot_t prot, vaddr_t hint) 506 { 507 vaddr_t guard_sz; 508 509 /* Deal with guardpages: search for space with one extra page. */ 510 guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE); 511 512 if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr < sz + guard_sz) 513 return ENOMEM; 514 return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz, 515 align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz, 516 0, guard_sz); 517 } 518 519 520 /* 521 * Randomized allocator. 522 * This allocator use uvm_map_hint to acquire a random address and searches 523 * from there. 524 */ 525 526 const struct uvm_addr_functions uaddr_rnd_functions = { 527 .uaddr_select = &uaddr_rnd_select, 528 .uaddr_free_insert = &uaddr_rnd_insert, 529 .uaddr_free_remove = &uaddr_rnd_remove, 530 .uaddr_destroy = &uaddr_rnd_destroy, 531 #if defined(DEBUG) || defined(DDB) 532 .uaddr_print = &uaddr_rnd_print, 533 #endif /* DEBUG || DDB */ 534 .uaddr_name = "uaddr_rnd" 535 }; 536 537 struct uvm_addr_state * 538 uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr) 539 { 540 struct uaddr_rnd_state *uaddr; 541 542 uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK); 543 uaddr->ur_uaddr.uaddr_minaddr = minaddr; 544 uaddr->ur_uaddr.uaddr_maxaddr = maxaddr; 545 uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions; 546 TAILQ_INIT(&uaddr->ur_free); 547 return &uaddr->ur_uaddr; 548 } 549 550 int 551 uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr, 552 struct vm_map_entry **entry_out, vaddr_t *addr_out, 553 vsize_t sz, vaddr_t align, vaddr_t offset, 554 vm_prot_t prot, vaddr_t hint) 555 { 556 struct vmspace *vm; 557 vaddr_t guard_sz; 558 vaddr_t low_addr, high_addr; 559 struct vm_map_entry *entry, *next; 560 vsize_t before_gap, after_gap; 561 vaddr_t tmp; 562 563 KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0); 564 vm = (struct vmspace *)map; 565 566 /* Deal with guardpages: search for space with one extra page. */ 567 guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE); 568 569 /* Quick fail if the allocation won't fit. */ 570 if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr < sz + guard_sz) 571 return ENOMEM; 572 573 /* Select a hint. */ 574 if (hint == 0) 575 hint = uvm_map_hint(vm, prot); 576 /* Clamp hint to uaddr range. */ 577 hint = MIN(MAX(hint, uaddr->uaddr_minaddr), 578 uaddr->uaddr_maxaddr - sz - guard_sz); 579 580 /* Align hint to align,offset parameters. */ 581 tmp = hint; 582 hint = uvm_addr_align_forward(tmp, align, offset); 583 /* Check for overflow during alignment. */ 584 if (hint < tmp || hint > uaddr->uaddr_maxaddr - sz - guard_sz) 585 return ENOMEM; /* Compatibility mode: never look backwards. */ 586 587 before_gap = 0; 588 after_gap = guard_sz; 589 hint -= MIN(hint, before_gap); 590 591 /* 592 * Use the augmented address tree to look up the first entry 593 * at or after hint with sufficient space. 594 * 595 * This code is the original optimized code, but will fail if the 596 * subtree it looks at does have sufficient space, but fails to meet 597 * the align constraint. 598 * 599 * Guard: subtree is not exhausted and max(fspace) >= required. 600 */ 601 entry = uvm_map_entrybyaddr(&map->addr, hint); 602 603 /* Walk up the tree, until there is at least sufficient space. */ 604 while (entry != NULL && 605 entry->fspace_augment < before_gap + after_gap + sz) 606 entry = RB_PARENT(entry, daddrs.addr_entry); 607 608 while (entry != NULL) { 609 /* Test if this fits. */ 610 if (VMMAP_FREE_END(entry) > hint && 611 uvm_map_uaddr_e(map, entry) == uaddr && 612 uvm_addr_fitspace(&low_addr, &high_addr, 613 MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)), 614 MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)), 615 sz, align, offset, before_gap, after_gap) == 0) { 616 *entry_out = entry; 617 if (hint >= low_addr && hint <= high_addr) 618 *addr_out = hint; 619 else 620 *addr_out = low_addr; 621 return 0; 622 } 623 624 /* RB_NEXT, but skip subtrees that cannot possible fit. */ 625 next = RB_RIGHT(entry, daddrs.addr_entry); 626 if (next != NULL && 627 next->fspace_augment >= before_gap + after_gap + sz) { 628 entry = next; 629 while ((next = RB_LEFT(entry, daddrs.addr_entry)) != 630 NULL) 631 entry = next; 632 } else { 633 do_parent: 634 next = RB_PARENT(entry, daddrs.addr_entry); 635 if (next == NULL) 636 entry = NULL; 637 else if (RB_LEFT(next, daddrs.addr_entry) == entry) 638 entry = next; 639 else { 640 entry = next; 641 goto do_parent; 642 } 643 } 644 } 645 646 /* Lookup failed. */ 647 return ENOMEM; 648 } 649 650 /* 651 * Destroy a uaddr_rnd_state structure. 652 */ 653 void 654 uaddr_rnd_destroy(struct uvm_addr_state *uaddr) 655 { 656 pool_put(&uaddr_rnd_pool, uaddr); 657 } 658 659 /* 660 * Add entry to tailq. 661 */ 662 void 663 uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p, 664 struct vm_map_entry *entry) 665 { 666 return; 667 } 668 669 /* 670 * Remove entry from tailq. 671 */ 672 void 673 uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p, 674 struct vm_map_entry *entry) 675 { 676 return; 677 } 678 679 #if defined(DEBUG) || defined(DDB) 680 void 681 uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full, 682 int (*pr)(const char*, ...)) 683 { 684 struct vm_map_entry *entry; 685 struct uaddr_rnd_state *uaddr; 686 vaddr_t addr; 687 size_t count; 688 vsize_t space; 689 690 uaddr = (struct uaddr_rnd_state *)uaddr_p; 691 addr = 0; 692 count = 0; 693 space = 0; 694 TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) { 695 count++; 696 space += entry->fspace; 697 698 if (full) { 699 (*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n", 700 entry, entry->start, entry->end, 701 entry->guard, entry->fspace); 702 (*pr)("\t\tfree: 0x%lx-0x%lx\n", 703 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 704 } 705 if (entry->start < addr) { 706 if (!full) 707 (*pr)("\tentry %p: 0x%lx-0x%lx " 708 "G=0x%lx F=0x%lx\n", 709 entry, entry->start, entry->end, 710 entry->guard, entry->fspace); 711 (*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n", 712 entry->start, addr); 713 } 714 715 addr = VMMAP_FREE_END(entry); 716 } 717 (*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space); 718 } 719 #endif /* DEBUG || DDB */ 720 721 722 /* 723 * An allocator that selects an address within distance of the hint. 724 * 725 * If no hint is given, the allocator refuses to allocate. 726 */ 727 const struct uvm_addr_functions uaddr_hint_functions = { 728 .uaddr_select = &uaddr_hint_select, 729 .uaddr_destroy = &uaddr_hint_destroy, 730 .uaddr_name = "uaddr_hint" 731 }; 732 733 /* 734 * Create uaddr_hint state. 735 */ 736 struct uvm_addr_state * 737 uaddr_hint_create(vaddr_t minaddr, vaddr_t maxaddr, vsize_t max_dist) 738 { 739 struct uaddr_hint_state *ua_hint; 740 741 KASSERT(uaddr_hint_pool.pr_size == sizeof(*ua_hint)); 742 743 ua_hint = pool_get(&uaddr_hint_pool, PR_WAITOK); 744 ua_hint->uaddr.uaddr_minaddr = minaddr; 745 ua_hint->uaddr.uaddr_maxaddr = maxaddr; 746 ua_hint->uaddr.uaddr_functions = &uaddr_hint_functions; 747 ua_hint->max_dist = max_dist; 748 return &ua_hint->uaddr; 749 } 750 751 /* 752 * Destroy uaddr_hint state. 753 */ 754 void 755 uaddr_hint_destroy(struct uvm_addr_state *uaddr) 756 { 757 pool_put(&uaddr_hint_pool, uaddr); 758 } 759 760 /* 761 * Hint selector. 762 * 763 * Attempts to find an address that is within max_dist of the hint. 764 */ 765 int 766 uaddr_hint_select(struct vm_map *map, struct uvm_addr_state *uaddr_param, 767 struct vm_map_entry **entry_out, vaddr_t *addr_out, 768 vsize_t sz, vaddr_t align, vaddr_t offset, 769 vm_prot_t prot, vaddr_t hint) 770 { 771 struct uaddr_hint_state *uaddr = 772 (struct uaddr_hint_state *)uaddr_param; 773 vsize_t before_gap, after_gap; 774 vaddr_t low, high; 775 int dir; 776 777 if (hint == 0) 778 return ENOMEM; 779 780 /* Calculate upper and lower bound for selected address. */ 781 high = hint + uaddr->max_dist; 782 if (high < hint) /* overflow */ 783 high = map->max_offset; 784 high = MIN(high, uaddr->uaddr.uaddr_maxaddr); 785 if (high < sz) 786 return ENOMEM; /* Protect against underflow. */ 787 high -= sz; 788 789 /* Calculate lower bound for selected address. */ 790 low = hint - uaddr->max_dist; 791 if (low > hint) /* underflow */ 792 low = map->min_offset; 793 low = MAX(low, uaddr->uaddr.uaddr_minaddr); 794 795 /* Search strategy setup. */ 796 before_gap = PAGE_SIZE + 797 (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK); 798 after_gap = PAGE_SIZE + 799 (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK); 800 dir = (arc4random() & 0x01) ? 1 : -1; 801 802 /* 803 * Try to search: 804 * - forward, with gap 805 * - backward, with gap 806 * - forward, without gap 807 * - backward, without gap 808 * (Where forward is in the direction specified by dir and 809 * backward is in the direction specified by -dir). 810 */ 811 if (uvm_addr_linsearch(map, uaddr_param, 812 entry_out, addr_out, hint, sz, align, offset, 813 dir, low, high, before_gap, after_gap) == 0) 814 return 0; 815 if (uvm_addr_linsearch(map, uaddr_param, 816 entry_out, addr_out, hint, sz, align, offset, 817 -dir, low, high, before_gap, after_gap) == 0) 818 return 0; 819 820 if (uvm_addr_linsearch(map, uaddr_param, 821 entry_out, addr_out, hint, sz, align, offset, 822 dir, low, high, 0, 0) == 0) 823 return 0; 824 if (uvm_addr_linsearch(map, uaddr_param, 825 entry_out, addr_out, hint, sz, align, offset, 826 -dir, low, high, 0, 0) == 0) 827 return 0; 828 829 return ENOMEM; 830 } 831 832 /* 833 * Kernel allocation bootstrap logic. 834 */ 835 const struct uvm_addr_functions uaddr_kernel_functions = { 836 .uaddr_select = &uaddr_kbootstrap_select, 837 .uaddr_destroy = &uaddr_kbootstrap_destroy, 838 .uaddr_name = "uaddr_kbootstrap" 839 }; 840 841 /* 842 * Select an address from the map. 843 * 844 * This function ignores the uaddr spec and instead uses the map directly. 845 * Because of that property, the uaddr algorithm can be shared across all 846 * kernel maps. 847 */ 848 int 849 uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr, 850 struct vm_map_entry **entry_out, vaddr_t *addr_out, 851 vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint) 852 { 853 vaddr_t tmp; 854 855 RB_FOREACH(*entry_out, uvm_map_addr, &map->addr) { 856 if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr && 857 uvm_addr_fitspace(addr_out, &tmp, 858 VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out), 859 sz, align, offset, 0, 0) == 0) 860 return 0; 861 } 862 863 return ENOMEM; 864 } 865 866 /* 867 * Don't destroy the kernel bootstrap allocator. 868 */ 869 void 870 uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr) 871 { 872 KASSERT(uaddr == (struct uvm_addr_state *)&uaddr_kbootstrap); 873 } 874 875 #ifndef SMALL_KERNEL 876 /* 877 * Best fit algorithm. 878 */ 879 880 const struct uvm_addr_functions uaddr_bestfit_functions = { 881 .uaddr_select = &uaddr_bestfit_select, 882 .uaddr_free_insert = &uaddr_bestfit_insert, 883 .uaddr_free_remove = &uaddr_bestfit_remove, 884 .uaddr_destroy = &uaddr_bestfit_destroy, 885 .uaddr_name = "uaddr_bestfit" 886 }; 887 888 struct uvm_addr_state * 889 uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr) 890 { 891 struct uaddr_bestfit_state *uaddr; 892 893 uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK); 894 uaddr->ubf_uaddr.uaddr_minaddr = minaddr; 895 uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr; 896 uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions; 897 RB_INIT(&uaddr->ubf_free); 898 return &uaddr->ubf_uaddr; 899 } 900 901 void 902 uaddr_bestfit_destroy(struct uvm_addr_state *uaddr) 903 { 904 pool_put(&uaddr_bestfit_pool, uaddr); 905 } 906 907 void 908 uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p, 909 struct vm_map_entry *entry) 910 { 911 struct uaddr_bestfit_state *uaddr; 912 struct vm_map_entry *rb_rv; 913 914 uaddr = (struct uaddr_bestfit_state *)uaddr_p; 915 if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) != 916 NULL) { 917 panic("%s: duplicate insertion: state %p " 918 "interting %p, colliding with %p", __func__, 919 uaddr, entry, rb_rv); 920 } 921 } 922 923 void 924 uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p, 925 struct vm_map_entry *entry) 926 { 927 struct uaddr_bestfit_state *uaddr; 928 929 uaddr = (struct uaddr_bestfit_state *)uaddr_p; 930 if (RB_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry) 931 panic("%s: entry was not in tree", __func__); 932 } 933 934 int 935 uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p, 936 struct vm_map_entry **entry_out, vaddr_t *addr_out, 937 vsize_t sz, vaddr_t align, vaddr_t offset, 938 vm_prot_t prot, vaddr_t hint) 939 { 940 vaddr_t min, max; 941 struct uaddr_bestfit_state *uaddr; 942 struct vm_map_entry *entry; 943 vsize_t guardsz; 944 945 uaddr = (struct uaddr_bestfit_state *)uaddr_p; 946 guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0); 947 948 /* 949 * Find smallest item on freelist capable of holding item. 950 * Deal with guardpages: search for space with one extra page. 951 */ 952 entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz); 953 if (entry == NULL) 954 return ENOMEM; 955 956 /* Walk the tree until we find an entry that fits. */ 957 while (uvm_addr_fitspace(&min, &max, 958 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry), 959 sz, align, offset, 0, guardsz) != 0) { 960 entry = RB_NEXT(uaddr_free_rbtree, &uaddr->ubf_free, entry); 961 if (entry == NULL) 962 return ENOMEM; 963 } 964 965 /* Return the address that generates the least fragmentation. */ 966 *entry_out = entry; 967 *addr_out = (min - VMMAP_FREE_START(entry) <= 968 VMMAP_FREE_END(entry) - guardsz - sz - max ? 969 min : max); 970 return 0; 971 } 972 #endif /* !SMALL_KERNEL */ 973 974 975 #ifndef SMALL_KERNEL 976 /* 977 * A userspace allocator based on pivots. 978 */ 979 980 const struct uvm_addr_functions uaddr_pivot_functions = { 981 .uaddr_select = &uaddr_pivot_select, 982 .uaddr_free_insert = &uaddr_pivot_insert, 983 .uaddr_free_remove = &uaddr_pivot_remove, 984 .uaddr_destroy = &uaddr_pivot_destroy, 985 #if defined(DEBUG) || defined(DDB) 986 .uaddr_print = &uaddr_pivot_print, 987 #endif /* DEBUG || DDB */ 988 .uaddr_name = "uaddr_pivot" 989 }; 990 991 /* 992 * A special random function for pivots. 993 * 994 * This function will return: 995 * - a random number 996 * - a multiple of PAGE_SIZE 997 * - at least PAGE_SIZE 998 * 999 * The random function has a slightly higher change to return a small number. 1000 */ 1001 vsize_t 1002 uaddr_pivot_random() 1003 { 1004 int r; 1005 1006 /* 1007 * The sum of two six-sided dice will have a normal distribution. 1008 * We map the highest probable number to 1, by folding the curve 1009 * (think of a graph on a piece of paper, that you fold). 1010 * 1011 * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1 1012 * have the same and highest probability of happening. 1013 */ 1014 r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) - 1015 (PIVOT_RND - 1); 1016 if (r < 0) 1017 r = -r; 1018 1019 /* 1020 * Make the returned value at least PAGE_SIZE and a multiple of 1021 * PAGE_SIZE. 1022 */ 1023 return (vaddr_t)(1 + r) << PAGE_SHIFT; 1024 } 1025 1026 /* 1027 * Select a new pivot. 1028 * 1029 * A pivot must: 1030 * - be chosen random 1031 * - have a randomly chosen gap before it, where the uaddr_state starts 1032 * - have a randomly chosen gap after it, before the uaddr_state ends 1033 * 1034 * Furthermore, the pivot must provide sufficient space for the allocation. 1035 * The addr will be set to the selected address. 1036 * 1037 * Returns ENOMEM on failure. 1038 */ 1039 int 1040 uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr, 1041 struct uaddr_pivot *pivot, 1042 struct vm_map_entry **entry_out, vaddr_t *addr_out, 1043 vsize_t sz, vaddr_t align, vaddr_t offset, 1044 vsize_t before_gap, vsize_t after_gap) 1045 { 1046 struct vm_map_entry *entry, *found; 1047 vaddr_t minaddr, maxaddr; 1048 vsize_t dist; 1049 vaddr_t found_minaddr, found_maxaddr; 1050 vaddr_t min, max; 1051 vsize_t arc4_arg; 1052 int fit_error; 1053 u_int32_t path; 1054 1055 minaddr = uaddr->up_uaddr.uaddr_minaddr; 1056 maxaddr = uaddr->up_uaddr.uaddr_maxaddr; 1057 KASSERT(minaddr < maxaddr); 1058 #ifdef DIAGNOSTIC 1059 if (minaddr + 2 * PAGE_SIZE > maxaddr) { 1060 panic("uaddr_pivot_newpivot: cannot grant random pivot " 1061 "in area less than 2 pages (size = 0x%lx)", 1062 maxaddr - minaddr); 1063 } 1064 #endif /* DIAGNOSTIC */ 1065 1066 /* 1067 * Gap calculation: 1/32 of the size of the managed area. 1068 * 1069 * At most: sufficient to not get truncated at arc4random. 1070 * At least: 2 PAGE_SIZE 1071 * 1072 * minaddr and maxaddr will be changed according to arc4random. 1073 */ 1074 dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE); 1075 if (dist >> PAGE_SHIFT > 0xffffffff) { 1076 minaddr += (vsize_t)arc4random() << PAGE_SHIFT; 1077 maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT; 1078 } else { 1079 minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) << 1080 PAGE_SHIFT; 1081 maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) << 1082 PAGE_SHIFT; 1083 } 1084 1085 /* 1086 * A very fast way to find an entry that will be large enough 1087 * to hold the allocation, but still is found more or less 1088 * randomly: the tree path selector has a 50% chance to go for 1089 * a bigger or smaller entry. 1090 * 1091 * Note that the memory may actually be available, 1092 * but the fragmentation may be so bad and the gaps chosen 1093 * so unfortunately, that the allocation will not succeed. 1094 * Or the alignment can only be satisfied by an entry that 1095 * is not visited in the randomly selected path. 1096 * 1097 * This code finds an entry with sufficient space in O(log n) time. 1098 */ 1099 path = arc4random(); 1100 found = NULL; 1101 entry = RB_ROOT(&uaddr->up_free); 1102 while (entry != NULL) { 1103 fit_error = uvm_addr_fitspace(&min, &max, 1104 MAX(VMMAP_FREE_START(entry), minaddr), 1105 MIN(VMMAP_FREE_END(entry), maxaddr), 1106 sz, align, offset, before_gap, after_gap); 1107 1108 /* It fits, save this entry. */ 1109 if (fit_error == 0) { 1110 found = entry; 1111 found_minaddr = min; 1112 found_maxaddr = max; 1113 } 1114 1115 /* Next. */ 1116 if (fit_error != 0) 1117 entry = RB_RIGHT(entry, dfree.rbtree); 1118 else if ((path & 0x1) == 0) { 1119 path >>= 1; 1120 entry = RB_RIGHT(entry, dfree.rbtree); 1121 } else { 1122 path >>= 1; 1123 entry = RB_LEFT(entry, dfree.rbtree); 1124 } 1125 } 1126 if (found == NULL) 1127 return ENOMEM; /* Not found a large enough region. */ 1128 1129 /* 1130 * Calculate a random address within found. 1131 * 1132 * found_minaddr and found_maxaddr are already aligned, so be sure 1133 * to select a multiple of align as the offset in the entry. 1134 * Preferably, arc4random_uniform is used to provide no bias within 1135 * the entry. 1136 * However if the size of the entry exceeds arc4random_uniforms 1137 * argument limit, we simply use arc4random (thus limiting ourselves 1138 * to 4G * PAGE_SIZE bytes offset). 1139 */ 1140 if (found_maxaddr == found_minaddr) 1141 *addr_out = found_minaddr; 1142 else { 1143 KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0); 1144 arc4_arg = found_maxaddr - found_minaddr; 1145 if (arc4_arg > 0xffffffff) { 1146 *addr_out = found_minaddr + 1147 (arc4random() & (align - 1)); 1148 } else { 1149 *addr_out = found_minaddr + 1150 (arc4random_uniform(arc4_arg) & (align - 1)); 1151 } 1152 } 1153 /* Address was found in this entry. */ 1154 *entry_out = found; 1155 1156 /* 1157 * Set up new pivot and return selected address. 1158 * 1159 * Depending on the direction of the pivot, the pivot must be placed 1160 * at the bottom or the top of the allocation: 1161 * - if the pivot moves upwards, place the pivot at the top of the 1162 * allocation, 1163 * - if the pivot moves downwards, place the pivot at the bottom 1164 * of the allocation. 1165 */ 1166 pivot->entry = found; 1167 pivot->dir = (arc4random() & 0x1 ? 1 : -1); 1168 if (pivot->dir > 0) 1169 pivot->addr = *addr_out + sz; 1170 else 1171 pivot->addr = *addr_out; 1172 pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */ 1173 return 0; 1174 } 1175 1176 /* 1177 * Pivot selector. 1178 * 1179 * Each time the selector is invoked, it will select a random pivot, which 1180 * it will use to select memory with. The memory will be placed at the pivot, 1181 * with a randomly sized gap between the allocation and the pivot. 1182 * The pivot will then move so it will never revisit this address. 1183 * 1184 * Each allocation, the pivot expiry timer ticks. Once the pivot becomes 1185 * expired, it will be replaced with a newly created pivot. Pivots also 1186 * automatically expire if they fail to provide memory for an allocation. 1187 * 1188 * Expired pivots are replaced using the uaddr_pivot_newpivot() function, 1189 * which will ensure the pivot points at memory in such a way that the 1190 * allocation will succeed. 1191 * As an added bonus, the uaddr_pivot_newpivot() function will perform the 1192 * allocation immediately and move the pivot as appropriate. 1193 * 1194 * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the 1195 * allocation to succeed, it will not create a new pivot and the allocation 1196 * will fail. 1197 * 1198 * A pivot running into used memory will automatically expire (because it will 1199 * fail to allocate). 1200 * 1201 * Characteristics of the allocator: 1202 * - best case, an allocation is O(log N) 1203 * (it would be O(1), if it werent for the need to check if the memory is 1204 * free; although that can be avoided...) 1205 * - worst case, an allocation is O(log N) 1206 * (the uaddr_pivot_newpivot() function has that complexity) 1207 * - failed allocations always take O(log N) 1208 * (the uaddr_pivot_newpivot() function will walk that deep into the tree). 1209 */ 1210 int 1211 uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p, 1212 struct vm_map_entry **entry_out, vaddr_t *addr_out, 1213 vsize_t sz, vaddr_t align, vaddr_t offset, 1214 vm_prot_t prot, vaddr_t hint) 1215 { 1216 struct uaddr_pivot_state *uaddr; 1217 struct vm_map_entry *entry; 1218 struct uaddr_pivot *pivot; 1219 vaddr_t min, max; 1220 vsize_t before_gap, after_gap; 1221 int err; 1222 1223 /* Hint must be handled by dedicated hint allocator. */ 1224 if (hint != 0) 1225 return EINVAL; 1226 1227 /* 1228 * Select a random pivot and a random gap sizes around the allocation. 1229 */ 1230 uaddr = (struct uaddr_pivot_state *)uaddr_p; 1231 pivot = &uaddr->up_pivots[ 1232 arc4random_uniform(nitems(uaddr->up_pivots))]; 1233 before_gap = uaddr_pivot_random(); 1234 after_gap = uaddr_pivot_random(); 1235 if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0) 1236 goto expired; /* Pivot is invalid (null or expired). */ 1237 1238 /* Attempt to use the pivot to map the entry. */ 1239 entry = pivot->entry; 1240 if (pivot->dir > 0) { 1241 if (uvm_addr_fitspace(&min, &max, 1242 MAX(VMMAP_FREE_START(entry), pivot->addr), 1243 VMMAP_FREE_END(entry), sz, align, offset, 1244 before_gap, after_gap) == 0) { 1245 *addr_out = min; 1246 *entry_out = entry; 1247 pivot->addr = min + sz; 1248 pivot->expire--; 1249 return 0; 1250 } 1251 } else { 1252 if (uvm_addr_fitspace(&min, &max, 1253 VMMAP_FREE_START(entry), 1254 MIN(VMMAP_FREE_END(entry), pivot->addr), 1255 sz, align, offset, before_gap, after_gap) == 0) { 1256 *addr_out = max; 1257 *entry_out = entry; 1258 pivot->addr = max; 1259 pivot->expire--; 1260 return 0; 1261 } 1262 } 1263 1264 expired: 1265 /* 1266 * Pivot expired or allocation failed. 1267 * Use pivot selector to do the allocation and find a new pivot. 1268 */ 1269 err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out, 1270 sz, align, offset, before_gap, after_gap); 1271 return err; 1272 } 1273 1274 /* 1275 * Free the pivot. 1276 */ 1277 void 1278 uaddr_pivot_destroy(struct uvm_addr_state *uaddr) 1279 { 1280 pool_put(&uaddr_pivot_pool, uaddr); 1281 } 1282 1283 /* 1284 * Insert an entry with free space in the space tree. 1285 */ 1286 void 1287 uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p, 1288 struct vm_map_entry *entry) 1289 { 1290 struct uaddr_pivot_state *uaddr; 1291 struct vm_map_entry *rb_rv; 1292 struct uaddr_pivot *p; 1293 vaddr_t check_addr; 1294 vaddr_t start, end; 1295 1296 uaddr = (struct uaddr_pivot_state *)uaddr_p; 1297 if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) != 1298 NULL) { 1299 panic("%s: duplicate insertion: state %p " 1300 "inserting entry %p which collides with %p", __func__, 1301 uaddr, entry, rb_rv); 1302 } 1303 1304 start = VMMAP_FREE_START(entry); 1305 end = VMMAP_FREE_END(entry); 1306 1307 /* 1308 * Update all pivots that are contained in this entry. 1309 */ 1310 for (p = &uaddr->up_pivots[0]; 1311 p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) { 1312 check_addr = p->addr; 1313 if (check_addr == 0) 1314 continue; 1315 if (p->dir < 0) 1316 check_addr--; 1317 1318 if (start <= check_addr && 1319 check_addr < end) { 1320 KASSERT(p->entry == NULL); 1321 p->entry = entry; 1322 } 1323 } 1324 } 1325 1326 /* 1327 * Remove an entry with free space from the space tree. 1328 */ 1329 void 1330 uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p, 1331 struct vm_map_entry *entry) 1332 { 1333 struct uaddr_pivot_state *uaddr; 1334 struct uaddr_pivot *p; 1335 1336 uaddr = (struct uaddr_pivot_state *)uaddr_p; 1337 if (RB_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry) 1338 panic("%s: entry was not in tree", __func__); 1339 1340 /* 1341 * Inform any pivot with this entry that the entry is gone. 1342 * Note that this does not automatically invalidate the pivot. 1343 */ 1344 for (p = &uaddr->up_pivots[0]; 1345 p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) { 1346 if (p->entry == entry) 1347 p->entry = NULL; 1348 } 1349 } 1350 1351 /* 1352 * Create a new pivot selector. 1353 * 1354 * Initially, all pivots are in the expired state. 1355 * Two reasons for this: 1356 * - it means this allocator will not take a huge amount of time 1357 * - pivots select better on demand, because the pivot selection will be 1358 * affected by preceding allocations: 1359 * the next pivots will likely end up in different segments of free memory, 1360 * that was segmented by an earlier allocation; better spread. 1361 */ 1362 struct uvm_addr_state * 1363 uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr) 1364 { 1365 struct uaddr_pivot_state *uaddr; 1366 1367 uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK); 1368 uaddr->up_uaddr.uaddr_minaddr = minaddr; 1369 uaddr->up_uaddr.uaddr_maxaddr = maxaddr; 1370 uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions; 1371 RB_INIT(&uaddr->up_free); 1372 bzero(uaddr->up_pivots, sizeof(uaddr->up_pivots)); 1373 1374 return &uaddr->up_uaddr; 1375 } 1376 1377 #if defined(DEBUG) || defined(DDB) 1378 /* 1379 * Print the uaddr_pivot_state. 1380 * 1381 * If full, a listing of all entries in the state will be provided. 1382 */ 1383 void 1384 uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full, 1385 int (*pr)(const char *, ...)) 1386 { 1387 struct uaddr_pivot_state *uaddr; 1388 struct uaddr_pivot *pivot; 1389 struct vm_map_entry *entry; 1390 int i; 1391 vaddr_t check_addr; 1392 1393 uaddr = (struct uaddr_pivot_state *)uaddr_p; 1394 1395 for (i = 0; i < NUM_PIVOTS; i++) { 1396 pivot = &uaddr->up_pivots[i]; 1397 1398 (*pr)("\tpivot 0x%lx, epires in %d, direction %d\n", 1399 pivot->addr, pivot->expire, pivot->dir); 1400 } 1401 if (!full) 1402 return; 1403 1404 if (RB_EMPTY(&uaddr->up_free)) 1405 (*pr)("\tempty\n"); 1406 /* Print list of free space. */ 1407 RB_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) { 1408 (*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n", 1409 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry), 1410 VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry)); 1411 1412 for (i = 0; i < NUM_PIVOTS; i++) { 1413 pivot = &uaddr->up_pivots[i]; 1414 check_addr = pivot->addr; 1415 if (check_addr == 0) 1416 continue; 1417 if (pivot->dir < 0) 1418 check_addr--; 1419 1420 if (VMMAP_FREE_START(entry) <= check_addr && 1421 check_addr < VMMAP_FREE_END(entry)) { 1422 (*pr)("\t\tcontains pivot %d (0x%lx)\n", 1423 i, pivot->addr); 1424 } 1425 } 1426 } 1427 } 1428 #endif /* DEBUG || DDB */ 1429 #endif /* !SMALL_KERNEL */ 1430 1431 #ifndef SMALL_KERNEL 1432 /* 1433 * Strategy for uaddr_stack_brk_select. 1434 */ 1435 struct uaddr_bs_strat { 1436 vaddr_t start; /* Start of area. */ 1437 vaddr_t end; /* End of area. */ 1438 int dir; /* Search direction. */ 1439 }; 1440 1441 /* 1442 * Stack/break allocator. 1443 * 1444 * Stack area is grown into in the opposite direction of the stack growth, 1445 * brk area is grown downward (because sbrk() grows upward). 1446 * 1447 * Both areas are grown into proportially: a weighted chance is used to 1448 * select which one (stack or brk area) to try. If the allocation fails, 1449 * the other one is tested. 1450 */ 1451 const struct uvm_addr_functions uaddr_stack_brk_functions = { 1452 .uaddr_select = &uaddr_stack_brk_select, 1453 .uaddr_destroy = &uaddr_destroy, 1454 .uaddr_name = "uaddr_stckbrk" 1455 }; 1456 1457 /* 1458 * Stack/brk address selector. 1459 */ 1460 int 1461 uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr, 1462 struct vm_map_entry **entry_out, vaddr_t *addr_out, 1463 vsize_t sz, vaddr_t align, vaddr_t offset, 1464 vm_prot_t prot, vaddr_t hint) 1465 { 1466 vsize_t before_gap, after_gap; 1467 int stack_idx, brk_idx; 1468 struct uaddr_bs_strat strat[2], *s; 1469 vsize_t sb_size; 1470 1471 /* 1472 * Choose gap size and if the stack is searched before or after the 1473 * brk area. 1474 */ 1475 before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT; 1476 after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT; 1477 1478 sb_size = (map->s_end - map->s_start) + (map->b_end - map->b_start); 1479 sb_size >>= PAGE_SHIFT; 1480 if (arc4random_uniform(MAX(sb_size, 0xffffffff)) > 1481 map->b_end - map->b_start) { 1482 brk_idx = 1; 1483 stack_idx = 0; 1484 } else { 1485 brk_idx = 0; 1486 stack_idx = 1; 1487 } 1488 1489 /* Set up stack search strategy. */ 1490 s = &strat[stack_idx]; 1491 s->start = MAX(map->s_start, uaddr->uaddr_minaddr); 1492 s->end = MIN(map->s_end, uaddr->uaddr_maxaddr); 1493 #ifdef MACHINE_STACK_GROWS_UP 1494 s->dir = -1; 1495 #else 1496 s->dir = 1; 1497 #endif 1498 1499 /* Set up brk search strategy. */ 1500 s = &strat[brk_idx]; 1501 s->start = MAX(map->b_start, uaddr->uaddr_minaddr); 1502 s->end = MIN(map->b_end, uaddr->uaddr_maxaddr); 1503 s->dir = -1; /* Opposite of brk() growth. */ 1504 1505 /* Linear search for space. */ 1506 for (s = &strat[0]; s < &strat[nitems(strat)]; s++) { 1507 if (s->end - s->start < sz) 1508 continue; 1509 if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 1510 0, sz, align, offset, s->dir, s->start, s->end - sz, 1511 before_gap, after_gap) == 0) 1512 return 0; 1513 } 1514 1515 return ENOMEM; 1516 } 1517 1518 struct uvm_addr_state * 1519 uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr) 1520 { 1521 struct uvm_addr_state* uaddr; 1522 1523 uaddr = pool_get(&uaddr_pool, PR_WAITOK); 1524 uaddr->uaddr_minaddr = minaddr; 1525 uaddr->uaddr_maxaddr = maxaddr; 1526 uaddr->uaddr_functions = &uaddr_stack_brk_functions; 1527 return uaddr; 1528 } 1529 #endif /* !SMALL_KERNEL */ 1530 1531 1532 #ifndef SMALL_KERNEL 1533 RB_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree, 1534 uvm_mapent_fspace_cmp); 1535 #endif /* !SMALL_KERNEL */ 1536