1 /* $OpenBSD: uvm_addr.c,v 1.16 2016/06/02 18:48:01 tedu Exp $ */ 2 3 /* 4 * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* #define DEBUG */ 20 21 #include <sys/param.h> 22 #include <uvm/uvm.h> 23 #include <uvm/uvm_addr.h> 24 #include <sys/pool.h> 25 26 /* Max gap between hint allocations. */ 27 #define UADDR_HINT_MAXGAP (4 * PAGE_SIZE) 28 /* Number of pivots in pivot allocator. */ 29 #define NUM_PIVOTS 16 30 /* 31 * Max number (inclusive) of pages the pivot allocator 32 * will place between allocations. 33 * 34 * The uaddr_pivot_random() function attempts to bias towards 35 * small space between allocations, so putting a large number here is fine. 36 */ 37 #define PIVOT_RND 8 38 /* 39 * Number of allocations that a pivot can supply before expiring. 40 * When a pivot expires, a new pivot has to be found. 41 * 42 * Must be at least 1. 43 */ 44 #define PIVOT_EXPIRE 1024 45 46 47 /* Pool with uvm_addr_state structures. */ 48 struct pool uaddr_pool; 49 struct pool uaddr_hint_pool; 50 struct pool uaddr_bestfit_pool; 51 struct pool uaddr_pivot_pool; 52 struct pool uaddr_rnd_pool; 53 54 /* uvm_addr state for hint based selector. */ 55 struct uaddr_hint_state { 56 struct uvm_addr_state uaddr; 57 vsize_t max_dist; 58 }; 59 60 /* uvm_addr state for bestfit selector. */ 61 struct uaddr_bestfit_state { 62 struct uvm_addr_state ubf_uaddr; 63 struct uaddr_free_rbtree ubf_free; 64 }; 65 66 /* uvm_addr state for rnd selector. */ 67 struct uaddr_rnd_state { 68 struct uvm_addr_state ur_uaddr; 69 #if 0 70 TAILQ_HEAD(, vm_map_entry) ur_free; 71 #endif 72 }; 73 74 /* Definition of a pivot in pivot selector. */ 75 struct uaddr_pivot { 76 vaddr_t addr; /* End of prev. allocation. */ 77 int expire;/* Best before date. */ 78 int dir; /* Direction. */ 79 struct vm_map_entry *entry; /* Will contain next alloc. */ 80 }; 81 /* uvm_addr state for pivot selector. */ 82 struct uaddr_pivot_state { 83 struct uvm_addr_state up_uaddr; 84 85 /* Free space tree, for fast pivot selection. */ 86 struct uaddr_free_rbtree up_free; 87 88 /* List of pivots. The pointers point to after the last allocation. */ 89 struct uaddr_pivot up_pivots[NUM_PIVOTS]; 90 }; 91 92 /* 93 * Free space comparison. 94 * Compares smaller free-space before larger free-space. 95 */ 96 static __inline int 97 uvm_mapent_fspace_cmp(struct vm_map_entry *e1, struct vm_map_entry *e2) 98 { 99 if (e1->fspace != e2->fspace) 100 return (e1->fspace < e2->fspace ? -1 : 1); 101 return (e1->start < e2->start ? -1 : e1->start > e2->start); 102 } 103 104 /* Forward declaration (see below). */ 105 extern const struct uvm_addr_functions uaddr_kernel_functions; 106 struct uvm_addr_state uaddr_kbootstrap; 107 108 /* Support functions. */ 109 #ifndef SMALL_KERNEL 110 struct vm_map_entry *uvm_addr_entrybyspace(struct uaddr_free_rbtree*, 111 vsize_t); 112 #endif /* !SMALL_KERNEL */ 113 void uaddr_kinsert(struct vm_map *, 114 struct uvm_addr_state *, struct vm_map_entry *); 115 void uaddr_kremove(struct vm_map *, 116 struct uvm_addr_state *, struct vm_map_entry *); 117 void uaddr_kbootstrapdestroy(struct uvm_addr_state *); 118 119 void uaddr_destroy(struct uvm_addr_state *); 120 void uaddr_hint_destroy(struct uvm_addr_state *); 121 void uaddr_kbootstrap_destroy(struct uvm_addr_state *); 122 void uaddr_rnd_destroy(struct uvm_addr_state *); 123 void uaddr_bestfit_destroy(struct uvm_addr_state *); 124 void uaddr_pivot_destroy(struct uvm_addr_state *); 125 126 #if 0 127 int uaddr_lin_select(struct vm_map *, 128 struct uvm_addr_state *, struct vm_map_entry **, 129 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 130 vaddr_t); 131 #endif 132 int uaddr_kbootstrap_select(struct vm_map *, 133 struct uvm_addr_state *, struct vm_map_entry **, 134 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 135 vaddr_t); 136 int uaddr_rnd_select(struct vm_map *, 137 struct uvm_addr_state *, struct vm_map_entry **, 138 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 139 vaddr_t); 140 int uaddr_hint_select(struct vm_map *, 141 struct uvm_addr_state*, struct vm_map_entry **, 142 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 143 vaddr_t); 144 int uaddr_bestfit_select(struct vm_map *, 145 struct uvm_addr_state*, struct vm_map_entry **, 146 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 147 vaddr_t); 148 #ifndef SMALL_KERNEL 149 int uaddr_pivot_select(struct vm_map *, 150 struct uvm_addr_state *, struct vm_map_entry **, 151 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 152 vaddr_t); 153 int uaddr_stack_brk_select(struct vm_map *, 154 struct uvm_addr_state *, struct vm_map_entry **, 155 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 156 vaddr_t); 157 #endif /* !SMALL_KERNEL */ 158 159 void uaddr_rnd_insert(struct vm_map *, 160 struct uvm_addr_state *, struct vm_map_entry *); 161 void uaddr_rnd_remove(struct vm_map *, 162 struct uvm_addr_state *, struct vm_map_entry *); 163 void uaddr_bestfit_insert(struct vm_map *, 164 struct uvm_addr_state *, struct vm_map_entry *); 165 void uaddr_bestfit_remove(struct vm_map *, 166 struct uvm_addr_state *, struct vm_map_entry *); 167 void uaddr_pivot_insert(struct vm_map *, 168 struct uvm_addr_state *, struct vm_map_entry *); 169 void uaddr_pivot_remove(struct vm_map *, 170 struct uvm_addr_state *, struct vm_map_entry *); 171 172 #ifndef SMALL_KERNEL 173 vsize_t uaddr_pivot_random(void); 174 int uaddr_pivot_newpivot(struct vm_map *, 175 struct uaddr_pivot_state *, struct uaddr_pivot *, 176 struct vm_map_entry **, vaddr_t *, 177 vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t); 178 #endif /* !SMALL_KERNEL */ 179 180 #if defined(DEBUG) || defined(DDB) 181 void uaddr_pivot_print(struct uvm_addr_state *, boolean_t, 182 int (*)(const char *, ...)); 183 #if 0 184 void uaddr_rnd_print(struct uvm_addr_state *, boolean_t, 185 int (*)(const char *, ...)); 186 #endif 187 #endif /* DEBUG || DDB */ 188 189 190 #ifndef SMALL_KERNEL 191 /* 192 * Find smallest entry in tree that will fit sz bytes. 193 */ 194 struct vm_map_entry * 195 uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz) 196 { 197 struct vm_map_entry *tmp, *res; 198 199 tmp = RB_ROOT(free); 200 res = NULL; 201 while (tmp) { 202 if (tmp->fspace >= sz) { 203 res = tmp; 204 tmp = RB_LEFT(tmp, dfree.rbtree); 205 } else if (tmp->fspace < sz) 206 tmp = RB_RIGHT(tmp, dfree.rbtree); 207 } 208 return res; 209 } 210 #endif /* !SMALL_KERNEL */ 211 212 static __inline vaddr_t 213 uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset) 214 { 215 vaddr_t adjusted; 216 217 KASSERT(offset < align || (align == 0 && offset == 0)); 218 KASSERT((align & (align - 1)) == 0); 219 KASSERT((offset & PAGE_MASK) == 0); 220 221 align = MAX(align, PAGE_SIZE); 222 adjusted = addr & ~(align - 1); 223 adjusted += offset; 224 return (adjusted < addr ? adjusted + align : adjusted); 225 } 226 227 static __inline vaddr_t 228 uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset) 229 { 230 vaddr_t adjusted; 231 232 KASSERT(offset < align || (align == 0 && offset == 0)); 233 KASSERT((align & (align - 1)) == 0); 234 KASSERT((offset & PAGE_MASK) == 0); 235 236 align = MAX(align, PAGE_SIZE); 237 adjusted = addr & ~(align - 1); 238 adjusted += offset; 239 return (adjusted > addr ? adjusted - align : adjusted); 240 } 241 242 /* 243 * Try to fit the requested space into the entry. 244 */ 245 int 246 uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result, 247 vaddr_t low_addr, vaddr_t high_addr, vsize_t sz, 248 vaddr_t align, vaddr_t offset, 249 vsize_t before_gap, vsize_t after_gap) 250 { 251 vaddr_t tmp; 252 vsize_t fspace; 253 254 if (low_addr > high_addr) 255 return ENOMEM; 256 fspace = high_addr - low_addr; 257 if (fspace < sz + before_gap + after_gap) 258 return ENOMEM; 259 260 /* Calculate lowest address. */ 261 low_addr += before_gap; 262 low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset); 263 if (low_addr < tmp) /* Overflow during alignment. */ 264 return ENOMEM; 265 if (high_addr - after_gap - sz < low_addr) 266 return ENOMEM; 267 268 /* Calculate highest address. */ 269 high_addr -= after_gap + sz; 270 high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset); 271 if (high_addr > tmp) /* Overflow during alignment. */ 272 return ENOMEM; 273 if (low_addr > high_addr) 274 return ENOMEM; 275 276 *min_result = low_addr; 277 *max_result = high_addr; 278 return 0; 279 } 280 281 282 /* 283 * Initialize uvm_addr. 284 */ 285 void 286 uvm_addr_init(void) 287 { 288 pool_init(&uaddr_pool, sizeof(struct uvm_addr_state), 289 0, 0, PR_WAITOK, "uaddr", NULL); 290 pool_setipl(&uaddr_pool, IPL_VM); 291 pool_init(&uaddr_hint_pool, sizeof(struct uaddr_hint_state), 292 0, 0, PR_WAITOK, "uaddrhint", NULL); 293 pool_setipl(&uaddr_hint_pool, IPL_VM); 294 pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state), 295 0, 0, PR_WAITOK, "uaddrbest", NULL); 296 pool_setipl(&uaddr_bestfit_pool, IPL_VM); 297 pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state), 298 0, 0, PR_WAITOK, "uaddrpivot", NULL); 299 pool_setipl(&uaddr_pivot_pool, IPL_VM); 300 pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state), 301 0, 0, PR_WAITOK, "uaddrrnd", NULL); 302 pool_setipl(&uaddr_rnd_pool, IPL_VM); 303 304 uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE; 305 uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE; 306 uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions; 307 } 308 309 /* 310 * Invoke destructor function of uaddr. 311 */ 312 void 313 uvm_addr_destroy(struct uvm_addr_state *uaddr) 314 { 315 if (uaddr) 316 (*uaddr->uaddr_functions->uaddr_destroy)(uaddr); 317 } 318 319 /* 320 * Move address forward to satisfy align, offset. 321 */ 322 vaddr_t 323 uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset) 324 { 325 vaddr_t result = (addr & ~(align - 1)) + offset; 326 if (result < addr) 327 result += align; 328 return result; 329 } 330 331 /* 332 * Move address backwards to satisfy align, offset. 333 */ 334 vaddr_t 335 uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset) 336 { 337 vaddr_t result = (addr & ~(align - 1)) + offset; 338 if (result > addr) 339 result -= align; 340 return result; 341 } 342 343 /* 344 * Directional first fit. 345 * 346 * Do a linear search for free space, starting at addr in entry. 347 * direction == 1: search forward 348 * direction == -1: search backward 349 * 350 * Output: low <= addr <= high and entry will contain addr. 351 * 0 will be returned if no space is available. 352 * 353 * gap describes the space that must appear between the preceding entry. 354 */ 355 int 356 uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr, 357 struct vm_map_entry **entry_out, vaddr_t *addr_out, 358 vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset, 359 int direction, vaddr_t low, vaddr_t high, 360 vsize_t before_gap, vsize_t after_gap) 361 { 362 struct vm_map_entry *entry; 363 vaddr_t low_addr, high_addr; 364 365 KASSERT(entry_out != NULL && addr_out != NULL); 366 KASSERT(direction == -1 || direction == 1); 367 KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 && 368 (low & PAGE_MASK) == 0 && 369 (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0); 370 KASSERT(high + sz > high); /* Check for overflow. */ 371 372 /* Hint magic. */ 373 if (hint == 0) 374 hint = (direction == 1 ? low : high); 375 else if (hint > high) { 376 if (direction != -1) 377 return ENOMEM; 378 hint = high; 379 } else if (hint < low) { 380 if (direction != 1) 381 return ENOMEM; 382 hint = low; 383 } 384 385 for (entry = uvm_map_entrybyaddr(&map->addr, 386 hint - (direction == -1 ? 1 : 0)); entry != NULL; 387 entry = (direction == 1 ? 388 RB_NEXT(uvm_map_addr, &map->addr, entry) : 389 RB_PREV(uvm_map_addr, &map->addr, entry))) { 390 if (VMMAP_FREE_START(entry) > high || 391 VMMAP_FREE_END(entry) < low) { 392 break; 393 } 394 395 if (uvm_addr_fitspace(&low_addr, &high_addr, 396 MAX(low, VMMAP_FREE_START(entry)), 397 MIN(high, VMMAP_FREE_END(entry)), 398 sz, align, offset, before_gap, after_gap) == 0) { 399 *entry_out = entry; 400 if (hint >= low_addr && hint <= high_addr) { 401 *addr_out = hint; 402 } else { 403 *addr_out = (direction == 1 ? 404 low_addr : high_addr); 405 } 406 return 0; 407 } 408 } 409 410 return ENOMEM; 411 } 412 413 /* 414 * Invoke address selector of uaddr. 415 * uaddr may be NULL, in which case the algorithm will fail with ENOMEM. 416 * 417 * Will invoke uvm_addr_isavail to fill in last_out. 418 */ 419 int 420 uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr, 421 struct vm_map_entry **entry_out, struct vm_map_entry **last_out, 422 vaddr_t *addr_out, 423 vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint) 424 { 425 int error; 426 427 if (uaddr == NULL) 428 return ENOMEM; 429 430 hint &= ~((vaddr_t)PAGE_MASK); 431 if (hint != 0 && 432 !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr)) 433 return ENOMEM; 434 435 error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr, 436 entry_out, addr_out, sz, align, offset, prot, hint); 437 438 if (error == 0) { 439 KASSERT(*entry_out != NULL); 440 *last_out = NULL; 441 if (!uvm_map_isavail(map, uaddr, entry_out, last_out, 442 *addr_out, sz)) { 443 panic("uvm_addr_invoke: address selector %p " 444 "(%s 0x%lx-0x%lx) " 445 "returned unavailable address 0x%lx sz 0x%lx", 446 uaddr, uaddr->uaddr_functions->uaddr_name, 447 uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr, 448 *addr_out, sz); 449 } 450 } 451 452 return error; 453 } 454 455 #if defined(DEBUG) || defined(DDB) 456 void 457 uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full, 458 int (*pr)(const char *, ...)) 459 { 460 if (uaddr == NULL) { 461 (*pr)("- uvm_addr %s: NULL\n", slot); 462 return; 463 } 464 465 (*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr, 466 uaddr->uaddr_functions->uaddr_name, 467 uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr); 468 if (uaddr->uaddr_functions->uaddr_print == NULL) 469 return; 470 471 (*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr); 472 } 473 #endif /* DEBUG || DDB */ 474 475 /* 476 * Destroy a uvm_addr_state structure. 477 * The uaddr must have been previously allocated from uaddr_state_pool. 478 */ 479 void 480 uaddr_destroy(struct uvm_addr_state *uaddr) 481 { 482 pool_put(&uaddr_pool, uaddr); 483 } 484 485 486 #if 0 487 /* 488 * Linear allocator. 489 * This allocator uses a first-fit algorithm. 490 * 491 * If hint is set, search will start at the hint position. 492 * Only searches forward. 493 */ 494 const struct uvm_addr_functions uaddr_lin_functions = { 495 .uaddr_select = &uaddr_lin_select, 496 .uaddr_destroy = &uaddr_destroy, 497 .uaddr_name = "uaddr_lin" 498 }; 499 500 struct uvm_addr_state * 501 uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr) 502 { 503 struct uvm_addr_state *uaddr; 504 505 uaddr = pool_get(&uaddr_pool, PR_WAITOK); 506 uaddr->uaddr_minaddr = minaddr; 507 uaddr->uaddr_maxaddr = maxaddr; 508 uaddr->uaddr_functions = &uaddr_lin_functions; 509 return uaddr; 510 } 511 512 int 513 uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr, 514 struct vm_map_entry **entry_out, vaddr_t *addr_out, 515 vsize_t sz, vaddr_t align, vaddr_t offset, 516 vm_prot_t prot, vaddr_t hint) 517 { 518 vaddr_t guard_sz; 519 520 /* Deal with guardpages: search for space with one extra page. */ 521 guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE); 522 523 if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr < sz + guard_sz) 524 return ENOMEM; 525 return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz, 526 align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz, 527 0, guard_sz); 528 } 529 #endif 530 531 /* 532 * Randomized allocator. 533 * This allocator use uvm_map_hint to acquire a random address and searches 534 * from there. 535 */ 536 537 const struct uvm_addr_functions uaddr_rnd_functions = { 538 .uaddr_select = &uaddr_rnd_select, 539 .uaddr_free_insert = &uaddr_rnd_insert, 540 .uaddr_free_remove = &uaddr_rnd_remove, 541 .uaddr_destroy = &uaddr_rnd_destroy, 542 #if defined(DEBUG) || defined(DDB) 543 #if 0 544 .uaddr_print = &uaddr_rnd_print, 545 #endif 546 #endif /* DEBUG || DDB */ 547 .uaddr_name = "uaddr_rnd" 548 }; 549 550 struct uvm_addr_state * 551 uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr) 552 { 553 struct uaddr_rnd_state *uaddr; 554 555 uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK); 556 uaddr->ur_uaddr.uaddr_minaddr = minaddr; 557 uaddr->ur_uaddr.uaddr_maxaddr = maxaddr; 558 uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions; 559 #if 0 560 TAILQ_INIT(&uaddr->ur_free); 561 #endif 562 return &uaddr->ur_uaddr; 563 } 564 565 int 566 uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr, 567 struct vm_map_entry **entry_out, vaddr_t *addr_out, 568 vsize_t sz, vaddr_t align, vaddr_t offset, 569 vm_prot_t prot, vaddr_t hint) 570 { 571 struct vmspace *vm; 572 vaddr_t minaddr, maxaddr; 573 vaddr_t guard_sz; 574 vaddr_t low_addr, high_addr; 575 struct vm_map_entry *entry, *next; 576 vsize_t before_gap, after_gap; 577 vaddr_t tmp; 578 579 KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0); 580 vm = (struct vmspace *)map; 581 582 /* Deal with guardpages: search for space with one extra page. */ 583 guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE); 584 585 minaddr = uvm_addr_align_forward(uaddr->uaddr_minaddr, align, offset); 586 maxaddr = uvm_addr_align_backward(uaddr->uaddr_maxaddr - sz - guard_sz, 587 align, offset); 588 589 /* Quick fail if the allocation won't fit. */ 590 if (minaddr >= maxaddr) 591 return ENOMEM; 592 593 /* Select a hint. */ 594 if (hint == 0) 595 hint = uvm_map_hint(vm, prot, minaddr, maxaddr); 596 /* Clamp hint to uaddr range. */ 597 hint = MIN(MAX(hint, minaddr), maxaddr); 598 599 /* Align hint to align,offset parameters. */ 600 tmp = hint; 601 hint = uvm_addr_align_forward(tmp, align, offset); 602 /* Check for overflow during alignment. */ 603 if (hint < tmp || hint > maxaddr) 604 return ENOMEM; /* Compatibility mode: never look backwards. */ 605 606 before_gap = 0; 607 after_gap = guard_sz; 608 hint -= MIN(hint, before_gap); 609 610 /* 611 * Use the augmented address tree to look up the first entry 612 * at or after hint with sufficient space. 613 * 614 * This code is the original optimized code, but will fail if the 615 * subtree it looks at does have sufficient space, but fails to meet 616 * the align constraint. 617 * 618 * Guard: subtree is not exhausted and max(fspace) >= required. 619 */ 620 entry = uvm_map_entrybyaddr(&map->addr, hint); 621 622 /* Walk up the tree, until there is at least sufficient space. */ 623 while (entry != NULL && 624 entry->fspace_augment < before_gap + after_gap + sz) 625 entry = RB_PARENT(entry, daddrs.addr_entry); 626 627 while (entry != NULL) { 628 /* Test if this fits. */ 629 if (VMMAP_FREE_END(entry) > hint && 630 uvm_map_uaddr_e(map, entry) == uaddr && 631 uvm_addr_fitspace(&low_addr, &high_addr, 632 MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)), 633 MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)), 634 sz, align, offset, before_gap, after_gap) == 0) { 635 *entry_out = entry; 636 if (hint >= low_addr && hint <= high_addr) 637 *addr_out = hint; 638 else 639 *addr_out = low_addr; 640 return 0; 641 } 642 643 /* RB_NEXT, but skip subtrees that cannot possible fit. */ 644 next = RB_RIGHT(entry, daddrs.addr_entry); 645 if (next != NULL && 646 next->fspace_augment >= before_gap + after_gap + sz) { 647 entry = next; 648 while ((next = RB_LEFT(entry, daddrs.addr_entry)) != 649 NULL) 650 entry = next; 651 } else { 652 do_parent: 653 next = RB_PARENT(entry, daddrs.addr_entry); 654 if (next == NULL) 655 entry = NULL; 656 else if (RB_LEFT(next, daddrs.addr_entry) == entry) 657 entry = next; 658 else { 659 entry = next; 660 goto do_parent; 661 } 662 } 663 } 664 665 /* Lookup failed. */ 666 return ENOMEM; 667 } 668 669 /* 670 * Destroy a uaddr_rnd_state structure. 671 */ 672 void 673 uaddr_rnd_destroy(struct uvm_addr_state *uaddr) 674 { 675 pool_put(&uaddr_rnd_pool, uaddr); 676 } 677 678 /* 679 * Add entry to tailq. 680 */ 681 void 682 uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p, 683 struct vm_map_entry *entry) 684 { 685 return; 686 } 687 688 /* 689 * Remove entry from tailq. 690 */ 691 void 692 uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p, 693 struct vm_map_entry *entry) 694 { 695 return; 696 } 697 698 #if 0 699 #if defined(DEBUG) || defined(DDB) 700 void 701 uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full, 702 int (*pr)(const char*, ...)) 703 { 704 struct vm_map_entry *entry; 705 struct uaddr_rnd_state *uaddr; 706 vaddr_t addr; 707 size_t count; 708 vsize_t space; 709 710 uaddr = (struct uaddr_rnd_state *)uaddr_p; 711 addr = 0; 712 count = 0; 713 space = 0; 714 TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) { 715 count++; 716 space += entry->fspace; 717 718 if (full) { 719 (*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n", 720 entry, entry->start, entry->end, 721 entry->guard, entry->fspace); 722 (*pr)("\t\tfree: 0x%lx-0x%lx\n", 723 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 724 } 725 if (entry->start < addr) { 726 if (!full) 727 (*pr)("\tentry %p: 0x%lx-0x%lx " 728 "G=0x%lx F=0x%lx\n", 729 entry, entry->start, entry->end, 730 entry->guard, entry->fspace); 731 (*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n", 732 entry->start, addr); 733 } 734 735 addr = VMMAP_FREE_END(entry); 736 } 737 (*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space); 738 } 739 #endif /* DEBUG || DDB */ 740 #endif 741 742 /* 743 * An allocator that selects an address within distance of the hint. 744 * 745 * If no hint is given, the allocator refuses to allocate. 746 */ 747 const struct uvm_addr_functions uaddr_hint_functions = { 748 .uaddr_select = &uaddr_hint_select, 749 .uaddr_destroy = &uaddr_hint_destroy, 750 .uaddr_name = "uaddr_hint" 751 }; 752 753 /* 754 * Create uaddr_hint state. 755 */ 756 struct uvm_addr_state * 757 uaddr_hint_create(vaddr_t minaddr, vaddr_t maxaddr, vsize_t max_dist) 758 { 759 struct uaddr_hint_state *ua_hint; 760 761 KASSERT(uaddr_hint_pool.pr_size == sizeof(*ua_hint)); 762 763 ua_hint = pool_get(&uaddr_hint_pool, PR_WAITOK); 764 ua_hint->uaddr.uaddr_minaddr = minaddr; 765 ua_hint->uaddr.uaddr_maxaddr = maxaddr; 766 ua_hint->uaddr.uaddr_functions = &uaddr_hint_functions; 767 ua_hint->max_dist = max_dist; 768 return &ua_hint->uaddr; 769 } 770 771 /* 772 * Destroy uaddr_hint state. 773 */ 774 void 775 uaddr_hint_destroy(struct uvm_addr_state *uaddr) 776 { 777 pool_put(&uaddr_hint_pool, uaddr); 778 } 779 780 /* 781 * Hint selector. 782 * 783 * Attempts to find an address that is within max_dist of the hint. 784 */ 785 int 786 uaddr_hint_select(struct vm_map *map, struct uvm_addr_state *uaddr_param, 787 struct vm_map_entry **entry_out, vaddr_t *addr_out, 788 vsize_t sz, vaddr_t align, vaddr_t offset, 789 vm_prot_t prot, vaddr_t hint) 790 { 791 struct uaddr_hint_state *uaddr = 792 (struct uaddr_hint_state *)uaddr_param; 793 vsize_t before_gap, after_gap; 794 vaddr_t low, high; 795 int dir; 796 797 if (hint == 0) 798 return ENOMEM; 799 800 /* Calculate upper and lower bound for selected address. */ 801 high = hint + uaddr->max_dist; 802 if (high < hint) /* overflow */ 803 high = map->max_offset; 804 high = MIN(high, uaddr->uaddr.uaddr_maxaddr); 805 if (high < sz) 806 return ENOMEM; /* Protect against underflow. */ 807 high -= sz; 808 809 /* Calculate lower bound for selected address. */ 810 low = hint - uaddr->max_dist; 811 if (low > hint) /* underflow */ 812 low = map->min_offset; 813 low = MAX(low, uaddr->uaddr.uaddr_minaddr); 814 815 /* Search strategy setup. */ 816 before_gap = PAGE_SIZE + 817 (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK); 818 after_gap = PAGE_SIZE + 819 (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK); 820 dir = (arc4random() & 0x01) ? 1 : -1; 821 822 /* 823 * Try to search: 824 * - forward, with gap 825 * - backward, with gap 826 * - forward, without gap 827 * - backward, without gap 828 * (Where forward is in the direction specified by dir and 829 * backward is in the direction specified by -dir). 830 */ 831 if (uvm_addr_linsearch(map, uaddr_param, 832 entry_out, addr_out, hint, sz, align, offset, 833 dir, low, high, before_gap, after_gap) == 0) 834 return 0; 835 if (uvm_addr_linsearch(map, uaddr_param, 836 entry_out, addr_out, hint, sz, align, offset, 837 -dir, low, high, before_gap, after_gap) == 0) 838 return 0; 839 840 if (uvm_addr_linsearch(map, uaddr_param, 841 entry_out, addr_out, hint, sz, align, offset, 842 dir, low, high, 0, 0) == 0) 843 return 0; 844 if (uvm_addr_linsearch(map, uaddr_param, 845 entry_out, addr_out, hint, sz, align, offset, 846 -dir, low, high, 0, 0) == 0) 847 return 0; 848 849 return ENOMEM; 850 } 851 852 /* 853 * Kernel allocation bootstrap logic. 854 */ 855 const struct uvm_addr_functions uaddr_kernel_functions = { 856 .uaddr_select = &uaddr_kbootstrap_select, 857 .uaddr_destroy = &uaddr_kbootstrap_destroy, 858 .uaddr_name = "uaddr_kbootstrap" 859 }; 860 861 /* 862 * Select an address from the map. 863 * 864 * This function ignores the uaddr spec and instead uses the map directly. 865 * Because of that property, the uaddr algorithm can be shared across all 866 * kernel maps. 867 */ 868 int 869 uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr, 870 struct vm_map_entry **entry_out, vaddr_t *addr_out, 871 vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint) 872 { 873 vaddr_t tmp; 874 875 RB_FOREACH(*entry_out, uvm_map_addr, &map->addr) { 876 if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr && 877 uvm_addr_fitspace(addr_out, &tmp, 878 VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out), 879 sz, align, offset, 0, 0) == 0) 880 return 0; 881 } 882 883 return ENOMEM; 884 } 885 886 /* 887 * Don't destroy the kernel bootstrap allocator. 888 */ 889 void 890 uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr) 891 { 892 KASSERT(uaddr == (struct uvm_addr_state *)&uaddr_kbootstrap); 893 } 894 895 #ifndef SMALL_KERNEL 896 /* 897 * Best fit algorithm. 898 */ 899 900 const struct uvm_addr_functions uaddr_bestfit_functions = { 901 .uaddr_select = &uaddr_bestfit_select, 902 .uaddr_free_insert = &uaddr_bestfit_insert, 903 .uaddr_free_remove = &uaddr_bestfit_remove, 904 .uaddr_destroy = &uaddr_bestfit_destroy, 905 .uaddr_name = "uaddr_bestfit" 906 }; 907 908 struct uvm_addr_state * 909 uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr) 910 { 911 struct uaddr_bestfit_state *uaddr; 912 913 uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK); 914 uaddr->ubf_uaddr.uaddr_minaddr = minaddr; 915 uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr; 916 uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions; 917 RB_INIT(&uaddr->ubf_free); 918 return &uaddr->ubf_uaddr; 919 } 920 921 void 922 uaddr_bestfit_destroy(struct uvm_addr_state *uaddr) 923 { 924 pool_put(&uaddr_bestfit_pool, uaddr); 925 } 926 927 void 928 uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p, 929 struct vm_map_entry *entry) 930 { 931 struct uaddr_bestfit_state *uaddr; 932 struct vm_map_entry *rb_rv; 933 934 uaddr = (struct uaddr_bestfit_state *)uaddr_p; 935 if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) != 936 NULL) { 937 panic("%s: duplicate insertion: state %p " 938 "interting %p, colliding with %p", __func__, 939 uaddr, entry, rb_rv); 940 } 941 } 942 943 void 944 uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p, 945 struct vm_map_entry *entry) 946 { 947 struct uaddr_bestfit_state *uaddr; 948 949 uaddr = (struct uaddr_bestfit_state *)uaddr_p; 950 if (RB_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry) 951 panic("%s: entry was not in tree", __func__); 952 } 953 954 int 955 uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p, 956 struct vm_map_entry **entry_out, vaddr_t *addr_out, 957 vsize_t sz, vaddr_t align, vaddr_t offset, 958 vm_prot_t prot, vaddr_t hint) 959 { 960 vaddr_t min, max; 961 struct uaddr_bestfit_state *uaddr; 962 struct vm_map_entry *entry; 963 vsize_t guardsz; 964 965 uaddr = (struct uaddr_bestfit_state *)uaddr_p; 966 guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0); 967 968 /* 969 * Find smallest item on freelist capable of holding item. 970 * Deal with guardpages: search for space with one extra page. 971 */ 972 entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz); 973 if (entry == NULL) 974 return ENOMEM; 975 976 /* Walk the tree until we find an entry that fits. */ 977 while (uvm_addr_fitspace(&min, &max, 978 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry), 979 sz, align, offset, 0, guardsz) != 0) { 980 entry = RB_NEXT(uaddr_free_rbtree, &uaddr->ubf_free, entry); 981 if (entry == NULL) 982 return ENOMEM; 983 } 984 985 /* Return the address that generates the least fragmentation. */ 986 *entry_out = entry; 987 *addr_out = (min - VMMAP_FREE_START(entry) <= 988 VMMAP_FREE_END(entry) - guardsz - sz - max ? 989 min : max); 990 return 0; 991 } 992 #endif /* !SMALL_KERNEL */ 993 994 995 #ifndef SMALL_KERNEL 996 /* 997 * A userspace allocator based on pivots. 998 */ 999 1000 const struct uvm_addr_functions uaddr_pivot_functions = { 1001 .uaddr_select = &uaddr_pivot_select, 1002 .uaddr_free_insert = &uaddr_pivot_insert, 1003 .uaddr_free_remove = &uaddr_pivot_remove, 1004 .uaddr_destroy = &uaddr_pivot_destroy, 1005 #if defined(DEBUG) || defined(DDB) 1006 .uaddr_print = &uaddr_pivot_print, 1007 #endif /* DEBUG || DDB */ 1008 .uaddr_name = "uaddr_pivot" 1009 }; 1010 1011 /* 1012 * A special random function for pivots. 1013 * 1014 * This function will return: 1015 * - a random number 1016 * - a multiple of PAGE_SIZE 1017 * - at least PAGE_SIZE 1018 * 1019 * The random function has a slightly higher change to return a small number. 1020 */ 1021 vsize_t 1022 uaddr_pivot_random(void) 1023 { 1024 int r; 1025 1026 /* 1027 * The sum of two six-sided dice will have a normal distribution. 1028 * We map the highest probable number to 1, by folding the curve 1029 * (think of a graph on a piece of paper, that you fold). 1030 * 1031 * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1 1032 * have the same and highest probability of happening. 1033 */ 1034 r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) - 1035 (PIVOT_RND - 1); 1036 if (r < 0) 1037 r = -r; 1038 1039 /* 1040 * Make the returned value at least PAGE_SIZE and a multiple of 1041 * PAGE_SIZE. 1042 */ 1043 return (vaddr_t)(1 + r) << PAGE_SHIFT; 1044 } 1045 1046 /* 1047 * Select a new pivot. 1048 * 1049 * A pivot must: 1050 * - be chosen random 1051 * - have a randomly chosen gap before it, where the uaddr_state starts 1052 * - have a randomly chosen gap after it, before the uaddr_state ends 1053 * 1054 * Furthermore, the pivot must provide sufficient space for the allocation. 1055 * The addr will be set to the selected address. 1056 * 1057 * Returns ENOMEM on failure. 1058 */ 1059 int 1060 uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr, 1061 struct uaddr_pivot *pivot, 1062 struct vm_map_entry **entry_out, vaddr_t *addr_out, 1063 vsize_t sz, vaddr_t align, vaddr_t offset, 1064 vsize_t before_gap, vsize_t after_gap) 1065 { 1066 struct vm_map_entry *entry, *found; 1067 vaddr_t minaddr, maxaddr; 1068 vsize_t dist; 1069 vaddr_t found_minaddr, found_maxaddr; 1070 vaddr_t min, max; 1071 vsize_t arc4_arg; 1072 int fit_error; 1073 u_int32_t path; 1074 1075 minaddr = uaddr->up_uaddr.uaddr_minaddr; 1076 maxaddr = uaddr->up_uaddr.uaddr_maxaddr; 1077 KASSERT(minaddr < maxaddr); 1078 #ifdef DIAGNOSTIC 1079 if (minaddr + 2 * PAGE_SIZE > maxaddr) { 1080 panic("uaddr_pivot_newpivot: cannot grant random pivot " 1081 "in area less than 2 pages (size = 0x%lx)", 1082 maxaddr - minaddr); 1083 } 1084 #endif /* DIAGNOSTIC */ 1085 1086 /* 1087 * Gap calculation: 1/32 of the size of the managed area. 1088 * 1089 * At most: sufficient to not get truncated at arc4random. 1090 * At least: 2 PAGE_SIZE 1091 * 1092 * minaddr and maxaddr will be changed according to arc4random. 1093 */ 1094 dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE); 1095 if (dist >> PAGE_SHIFT > 0xffffffff) { 1096 minaddr += (vsize_t)arc4random() << PAGE_SHIFT; 1097 maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT; 1098 } else { 1099 minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) << 1100 PAGE_SHIFT; 1101 maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) << 1102 PAGE_SHIFT; 1103 } 1104 1105 /* 1106 * A very fast way to find an entry that will be large enough 1107 * to hold the allocation, but still is found more or less 1108 * randomly: the tree path selector has a 50% chance to go for 1109 * a bigger or smaller entry. 1110 * 1111 * Note that the memory may actually be available, 1112 * but the fragmentation may be so bad and the gaps chosen 1113 * so unfortunately, that the allocation will not succeed. 1114 * Or the alignment can only be satisfied by an entry that 1115 * is not visited in the randomly selected path. 1116 * 1117 * This code finds an entry with sufficient space in O(log n) time. 1118 */ 1119 path = arc4random(); 1120 found = NULL; 1121 entry = RB_ROOT(&uaddr->up_free); 1122 while (entry != NULL) { 1123 fit_error = uvm_addr_fitspace(&min, &max, 1124 MAX(VMMAP_FREE_START(entry), minaddr), 1125 MIN(VMMAP_FREE_END(entry), maxaddr), 1126 sz, align, offset, before_gap, after_gap); 1127 1128 /* It fits, save this entry. */ 1129 if (fit_error == 0) { 1130 found = entry; 1131 found_minaddr = min; 1132 found_maxaddr = max; 1133 } 1134 1135 /* Next. */ 1136 if (fit_error != 0) 1137 entry = RB_RIGHT(entry, dfree.rbtree); 1138 else if ((path & 0x1) == 0) { 1139 path >>= 1; 1140 entry = RB_RIGHT(entry, dfree.rbtree); 1141 } else { 1142 path >>= 1; 1143 entry = RB_LEFT(entry, dfree.rbtree); 1144 } 1145 } 1146 if (found == NULL) 1147 return ENOMEM; /* Not found a large enough region. */ 1148 1149 /* 1150 * Calculate a random address within found. 1151 * 1152 * found_minaddr and found_maxaddr are already aligned, so be sure 1153 * to select a multiple of align as the offset in the entry. 1154 * Preferably, arc4random_uniform is used to provide no bias within 1155 * the entry. 1156 * However if the size of the entry exceeds arc4random_uniforms 1157 * argument limit, we simply use arc4random (thus limiting ourselves 1158 * to 4G * PAGE_SIZE bytes offset). 1159 */ 1160 if (found_maxaddr == found_minaddr) 1161 *addr_out = found_minaddr; 1162 else { 1163 KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0); 1164 arc4_arg = found_maxaddr - found_minaddr; 1165 if (arc4_arg > 0xffffffff) { 1166 *addr_out = found_minaddr + 1167 (arc4random() & (align - 1)); 1168 } else { 1169 *addr_out = found_minaddr + 1170 (arc4random_uniform(arc4_arg) & (align - 1)); 1171 } 1172 } 1173 /* Address was found in this entry. */ 1174 *entry_out = found; 1175 1176 /* 1177 * Set up new pivot and return selected address. 1178 * 1179 * Depending on the direction of the pivot, the pivot must be placed 1180 * at the bottom or the top of the allocation: 1181 * - if the pivot moves upwards, place the pivot at the top of the 1182 * allocation, 1183 * - if the pivot moves downwards, place the pivot at the bottom 1184 * of the allocation. 1185 */ 1186 pivot->entry = found; 1187 pivot->dir = (arc4random() & 0x1 ? 1 : -1); 1188 if (pivot->dir > 0) 1189 pivot->addr = *addr_out + sz; 1190 else 1191 pivot->addr = *addr_out; 1192 pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */ 1193 return 0; 1194 } 1195 1196 /* 1197 * Pivot selector. 1198 * 1199 * Each time the selector is invoked, it will select a random pivot, which 1200 * it will use to select memory with. The memory will be placed at the pivot, 1201 * with a randomly sized gap between the allocation and the pivot. 1202 * The pivot will then move so it will never revisit this address. 1203 * 1204 * Each allocation, the pivot expiry timer ticks. Once the pivot becomes 1205 * expired, it will be replaced with a newly created pivot. Pivots also 1206 * automatically expire if they fail to provide memory for an allocation. 1207 * 1208 * Expired pivots are replaced using the uaddr_pivot_newpivot() function, 1209 * which will ensure the pivot points at memory in such a way that the 1210 * allocation will succeed. 1211 * As an added bonus, the uaddr_pivot_newpivot() function will perform the 1212 * allocation immediately and move the pivot as appropriate. 1213 * 1214 * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the 1215 * allocation to succeed, it will not create a new pivot and the allocation 1216 * will fail. 1217 * 1218 * A pivot running into used memory will automatically expire (because it will 1219 * fail to allocate). 1220 * 1221 * Characteristics of the allocator: 1222 * - best case, an allocation is O(log N) 1223 * (it would be O(1), if it werent for the need to check if the memory is 1224 * free; although that can be avoided...) 1225 * - worst case, an allocation is O(log N) 1226 * (the uaddr_pivot_newpivot() function has that complexity) 1227 * - failed allocations always take O(log N) 1228 * (the uaddr_pivot_newpivot() function will walk that deep into the tree). 1229 */ 1230 int 1231 uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p, 1232 struct vm_map_entry **entry_out, vaddr_t *addr_out, 1233 vsize_t sz, vaddr_t align, vaddr_t offset, 1234 vm_prot_t prot, vaddr_t hint) 1235 { 1236 struct uaddr_pivot_state *uaddr; 1237 struct vm_map_entry *entry; 1238 struct uaddr_pivot *pivot; 1239 vaddr_t min, max; 1240 vsize_t before_gap, after_gap; 1241 int err; 1242 1243 /* Hint must be handled by dedicated hint allocator. */ 1244 if (hint != 0) 1245 return EINVAL; 1246 1247 /* 1248 * Select a random pivot and a random gap sizes around the allocation. 1249 */ 1250 uaddr = (struct uaddr_pivot_state *)uaddr_p; 1251 pivot = &uaddr->up_pivots[ 1252 arc4random_uniform(nitems(uaddr->up_pivots))]; 1253 before_gap = uaddr_pivot_random(); 1254 after_gap = uaddr_pivot_random(); 1255 if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0) 1256 goto expired; /* Pivot is invalid (null or expired). */ 1257 1258 /* Attempt to use the pivot to map the entry. */ 1259 entry = pivot->entry; 1260 if (pivot->dir > 0) { 1261 if (uvm_addr_fitspace(&min, &max, 1262 MAX(VMMAP_FREE_START(entry), pivot->addr), 1263 VMMAP_FREE_END(entry), sz, align, offset, 1264 before_gap, after_gap) == 0) { 1265 *addr_out = min; 1266 *entry_out = entry; 1267 pivot->addr = min + sz; 1268 pivot->expire--; 1269 return 0; 1270 } 1271 } else { 1272 if (uvm_addr_fitspace(&min, &max, 1273 VMMAP_FREE_START(entry), 1274 MIN(VMMAP_FREE_END(entry), pivot->addr), 1275 sz, align, offset, before_gap, after_gap) == 0) { 1276 *addr_out = max; 1277 *entry_out = entry; 1278 pivot->addr = max; 1279 pivot->expire--; 1280 return 0; 1281 } 1282 } 1283 1284 expired: 1285 /* 1286 * Pivot expired or allocation failed. 1287 * Use pivot selector to do the allocation and find a new pivot. 1288 */ 1289 err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out, 1290 sz, align, offset, before_gap, after_gap); 1291 return err; 1292 } 1293 1294 /* 1295 * Free the pivot. 1296 */ 1297 void 1298 uaddr_pivot_destroy(struct uvm_addr_state *uaddr) 1299 { 1300 pool_put(&uaddr_pivot_pool, uaddr); 1301 } 1302 1303 /* 1304 * Insert an entry with free space in the space tree. 1305 */ 1306 void 1307 uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p, 1308 struct vm_map_entry *entry) 1309 { 1310 struct uaddr_pivot_state *uaddr; 1311 struct vm_map_entry *rb_rv; 1312 struct uaddr_pivot *p; 1313 vaddr_t check_addr; 1314 vaddr_t start, end; 1315 1316 uaddr = (struct uaddr_pivot_state *)uaddr_p; 1317 if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) != 1318 NULL) { 1319 panic("%s: duplicate insertion: state %p " 1320 "inserting entry %p which collides with %p", __func__, 1321 uaddr, entry, rb_rv); 1322 } 1323 1324 start = VMMAP_FREE_START(entry); 1325 end = VMMAP_FREE_END(entry); 1326 1327 /* 1328 * Update all pivots that are contained in this entry. 1329 */ 1330 for (p = &uaddr->up_pivots[0]; 1331 p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) { 1332 check_addr = p->addr; 1333 if (check_addr == 0) 1334 continue; 1335 if (p->dir < 0) 1336 check_addr--; 1337 1338 if (start <= check_addr && 1339 check_addr < end) { 1340 KASSERT(p->entry == NULL); 1341 p->entry = entry; 1342 } 1343 } 1344 } 1345 1346 /* 1347 * Remove an entry with free space from the space tree. 1348 */ 1349 void 1350 uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p, 1351 struct vm_map_entry *entry) 1352 { 1353 struct uaddr_pivot_state *uaddr; 1354 struct uaddr_pivot *p; 1355 1356 uaddr = (struct uaddr_pivot_state *)uaddr_p; 1357 if (RB_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry) 1358 panic("%s: entry was not in tree", __func__); 1359 1360 /* 1361 * Inform any pivot with this entry that the entry is gone. 1362 * Note that this does not automatically invalidate the pivot. 1363 */ 1364 for (p = &uaddr->up_pivots[0]; 1365 p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) { 1366 if (p->entry == entry) 1367 p->entry = NULL; 1368 } 1369 } 1370 1371 /* 1372 * Create a new pivot selector. 1373 * 1374 * Initially, all pivots are in the expired state. 1375 * Two reasons for this: 1376 * - it means this allocator will not take a huge amount of time 1377 * - pivots select better on demand, because the pivot selection will be 1378 * affected by preceding allocations: 1379 * the next pivots will likely end up in different segments of free memory, 1380 * that was segmented by an earlier allocation; better spread. 1381 */ 1382 struct uvm_addr_state * 1383 uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr) 1384 { 1385 struct uaddr_pivot_state *uaddr; 1386 1387 uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK); 1388 uaddr->up_uaddr.uaddr_minaddr = minaddr; 1389 uaddr->up_uaddr.uaddr_maxaddr = maxaddr; 1390 uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions; 1391 RB_INIT(&uaddr->up_free); 1392 memset(uaddr->up_pivots, 0, sizeof(uaddr->up_pivots)); 1393 1394 return &uaddr->up_uaddr; 1395 } 1396 1397 #if defined(DEBUG) || defined(DDB) 1398 /* 1399 * Print the uaddr_pivot_state. 1400 * 1401 * If full, a listing of all entries in the state will be provided. 1402 */ 1403 void 1404 uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full, 1405 int (*pr)(const char *, ...)) 1406 { 1407 struct uaddr_pivot_state *uaddr; 1408 struct uaddr_pivot *pivot; 1409 struct vm_map_entry *entry; 1410 int i; 1411 vaddr_t check_addr; 1412 1413 uaddr = (struct uaddr_pivot_state *)uaddr_p; 1414 1415 for (i = 0; i < NUM_PIVOTS; i++) { 1416 pivot = &uaddr->up_pivots[i]; 1417 1418 (*pr)("\tpivot 0x%lx, epires in %d, direction %d\n", 1419 pivot->addr, pivot->expire, pivot->dir); 1420 } 1421 if (!full) 1422 return; 1423 1424 if (RB_EMPTY(&uaddr->up_free)) 1425 (*pr)("\tempty\n"); 1426 /* Print list of free space. */ 1427 RB_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) { 1428 (*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n", 1429 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry), 1430 VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry)); 1431 1432 for (i = 0; i < NUM_PIVOTS; i++) { 1433 pivot = &uaddr->up_pivots[i]; 1434 check_addr = pivot->addr; 1435 if (check_addr == 0) 1436 continue; 1437 if (pivot->dir < 0) 1438 check_addr--; 1439 1440 if (VMMAP_FREE_START(entry) <= check_addr && 1441 check_addr < VMMAP_FREE_END(entry)) { 1442 (*pr)("\t\tcontains pivot %d (0x%lx)\n", 1443 i, pivot->addr); 1444 } 1445 } 1446 } 1447 } 1448 #endif /* DEBUG || DDB */ 1449 #endif /* !SMALL_KERNEL */ 1450 1451 #ifndef SMALL_KERNEL 1452 /* 1453 * Strategy for uaddr_stack_brk_select. 1454 */ 1455 struct uaddr_bs_strat { 1456 vaddr_t start; /* Start of area. */ 1457 vaddr_t end; /* End of area. */ 1458 int dir; /* Search direction. */ 1459 }; 1460 1461 /* 1462 * Stack/break allocator. 1463 * 1464 * Stack area is grown into in the opposite direction of the stack growth, 1465 * brk area is grown downward (because sbrk() grows upward). 1466 * 1467 * Both areas are grown into proportially: a weighted chance is used to 1468 * select which one (stack or brk area) to try. If the allocation fails, 1469 * the other one is tested. 1470 */ 1471 const struct uvm_addr_functions uaddr_stack_brk_functions = { 1472 .uaddr_select = &uaddr_stack_brk_select, 1473 .uaddr_destroy = &uaddr_destroy, 1474 .uaddr_name = "uaddr_stckbrk" 1475 }; 1476 1477 /* 1478 * Stack/brk address selector. 1479 */ 1480 int 1481 uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr, 1482 struct vm_map_entry **entry_out, vaddr_t *addr_out, 1483 vsize_t sz, vaddr_t align, vaddr_t offset, 1484 vm_prot_t prot, vaddr_t hint) 1485 { 1486 vsize_t before_gap, after_gap; 1487 int stack_idx, brk_idx; 1488 struct uaddr_bs_strat strat[2], *s; 1489 vsize_t sb_size; 1490 1491 /* 1492 * Choose gap size and if the stack is searched before or after the 1493 * brk area. 1494 */ 1495 before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT; 1496 after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT; 1497 1498 sb_size = (map->s_end - map->s_start) + (map->b_end - map->b_start); 1499 sb_size >>= PAGE_SHIFT; 1500 if (arc4random_uniform(MAX(sb_size, 0xffffffff)) > 1501 map->b_end - map->b_start) { 1502 brk_idx = 1; 1503 stack_idx = 0; 1504 } else { 1505 brk_idx = 0; 1506 stack_idx = 1; 1507 } 1508 1509 /* Set up stack search strategy. */ 1510 s = &strat[stack_idx]; 1511 s->start = MAX(map->s_start, uaddr->uaddr_minaddr); 1512 s->end = MIN(map->s_end, uaddr->uaddr_maxaddr); 1513 #ifdef MACHINE_STACK_GROWS_UP 1514 s->dir = -1; 1515 #else 1516 s->dir = 1; 1517 #endif 1518 1519 /* Set up brk search strategy. */ 1520 s = &strat[brk_idx]; 1521 s->start = MAX(map->b_start, uaddr->uaddr_minaddr); 1522 s->end = MIN(map->b_end, uaddr->uaddr_maxaddr); 1523 s->dir = -1; /* Opposite of brk() growth. */ 1524 1525 /* Linear search for space. */ 1526 for (s = &strat[0]; s < &strat[nitems(strat)]; s++) { 1527 if (s->end - s->start < sz) 1528 continue; 1529 if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 1530 0, sz, align, offset, s->dir, s->start, s->end - sz, 1531 before_gap, after_gap) == 0) 1532 return 0; 1533 } 1534 1535 return ENOMEM; 1536 } 1537 1538 struct uvm_addr_state * 1539 uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr) 1540 { 1541 struct uvm_addr_state* uaddr; 1542 1543 uaddr = pool_get(&uaddr_pool, PR_WAITOK); 1544 uaddr->uaddr_minaddr = minaddr; 1545 uaddr->uaddr_maxaddr = maxaddr; 1546 uaddr->uaddr_functions = &uaddr_stack_brk_functions; 1547 return uaddr; 1548 } 1549 #endif /* !SMALL_KERNEL */ 1550 1551 1552 #ifndef SMALL_KERNEL 1553 RB_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree, 1554 uvm_mapent_fspace_cmp); 1555 #endif /* !SMALL_KERNEL */ 1556