1 /* $OpenBSD: uvm_map.c,v 1.157 2012/06/14 15:54:36 ariane Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. All advertising materials mentioning features or use of this software 37 * must display the following acknowledgement: 38 * This product includes software developed by Charles D. Cranor, 39 * Washington University, the University of California, Berkeley and 40 * its contributors. 41 * 4. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 58 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 59 * 60 * 61 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 62 * All rights reserved. 63 * 64 * Permission to use, copy, modify and distribute this software and 65 * its documentation is hereby granted, provided that both the copyright 66 * notice and this permission notice appear in all copies of the 67 * software, derivative works or modified versions, and any portions 68 * thereof, and that both notices appear in supporting documentation. 69 * 70 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 71 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 72 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 73 * 74 * Carnegie Mellon requests users of this software to return to 75 * 76 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 77 * School of Computer Science 78 * Carnegie Mellon University 79 * Pittsburgh PA 15213-3890 80 * 81 * any improvements or extensions that they make and grant Carnegie the 82 * rights to redistribute these changes. 83 */ 84 85 /* 86 * uvm_map.c: uvm map operations 87 */ 88 89 /* #define DEBUG */ 90 /* #define VMMAP_DEBUG */ 91 92 #include <sys/param.h> 93 #include <sys/systm.h> 94 #include <sys/mman.h> 95 #include <sys/proc.h> 96 #include <sys/malloc.h> 97 #include <sys/pool.h> 98 #include <sys/kernel.h> 99 100 #include <dev/rndvar.h> 101 102 #ifdef SYSVSHM 103 #include <sys/shm.h> 104 #endif 105 106 #include <uvm/uvm.h> 107 108 #ifdef DDB 109 #include <uvm/uvm_ddb.h> 110 #endif 111 112 #include <uvm/uvm_addr.h> 113 114 115 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 116 int uvm_mapent_isjoinable(struct vm_map*, 117 struct vm_map_entry*, struct vm_map_entry*); 118 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 119 struct vm_map_entry*, struct uvm_map_deadq*); 120 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 121 struct vm_map_entry*, struct uvm_map_deadq*); 122 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 123 struct vm_map_entry*, vaddr_t, vsize_t, int, 124 struct uvm_map_deadq*); 125 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 126 void uvm_mapent_free(struct vm_map_entry*); 127 void uvm_unmap_kill_entry(struct vm_map*, 128 struct vm_map_entry*); 129 void uvm_mapent_mkfree(struct vm_map*, 130 struct vm_map_entry*, struct vm_map_entry**, 131 struct uvm_map_deadq*, boolean_t); 132 void uvm_map_pageable_pgon(struct vm_map*, 133 struct vm_map_entry*, struct vm_map_entry*, 134 vaddr_t, vaddr_t); 135 int uvm_map_pageable_wire(struct vm_map*, 136 struct vm_map_entry*, struct vm_map_entry*, 137 vaddr_t, vaddr_t, int); 138 void uvm_map_setup_entries(struct vm_map*); 139 void uvm_map_setup_md(struct vm_map*); 140 void uvm_map_teardown(struct vm_map*); 141 void uvm_map_vmspace_update(struct vm_map*, 142 struct uvm_map_deadq*, int); 143 void uvm_map_kmem_grow(struct vm_map*, 144 struct uvm_map_deadq*, vsize_t, int); 145 void uvm_map_freelist_update_clear(struct vm_map*, 146 struct uvm_map_deadq*); 147 void uvm_map_freelist_update_refill(struct vm_map *, int); 148 void uvm_map_freelist_update(struct vm_map*, 149 struct uvm_map_deadq*, vaddr_t, vaddr_t, 150 vaddr_t, vaddr_t, int); 151 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 152 vaddr_t, vaddr_t, int); 153 int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, 154 struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, 155 int); 156 int uvm_map_findspace(struct vm_map*, 157 struct vm_map_entry**, struct vm_map_entry**, 158 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 159 vaddr_t); 160 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 161 void uvm_map_addr_augment(struct vm_map_entry*); 162 163 /* 164 * Tree management functions. 165 */ 166 167 static __inline void uvm_mapent_copy(struct vm_map_entry*, 168 struct vm_map_entry*); 169 static int uvm_mapentry_addrcmp(struct vm_map_entry*, 170 struct vm_map_entry*); 171 static int uvm_mapentry_freecmp(struct vm_map_entry*, 172 struct vm_map_entry*); 173 void uvm_mapent_free_insert(struct vm_map*, 174 struct uvm_addr_state*, struct vm_map_entry*); 175 void uvm_mapent_free_remove(struct vm_map*, 176 struct uvm_addr_state*, struct vm_map_entry*); 177 void uvm_mapent_addr_insert(struct vm_map*, 178 struct vm_map_entry*); 179 void uvm_mapent_addr_remove(struct vm_map*, 180 struct vm_map_entry*); 181 void uvm_map_splitentry(struct vm_map*, 182 struct vm_map_entry*, struct vm_map_entry*, 183 vaddr_t); 184 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 185 int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*); 186 187 /* 188 * uvm_vmspace_fork helper functions. 189 */ 190 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 191 vsize_t, struct vm_map_entry*, 192 struct uvm_map_deadq*, int, int); 193 void uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 194 struct vm_map*, struct vm_map_entry*, 195 struct uvm_map_deadq*); 196 void uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 197 struct vm_map*, struct vm_map_entry*, 198 struct uvm_map_deadq*); 199 200 /* 201 * Tree validation. 202 */ 203 204 #ifdef VMMAP_DEBUG 205 void uvm_tree_assert(struct vm_map*, int, char*, 206 char*, int); 207 #define UVM_ASSERT(map, cond, file, line) \ 208 uvm_tree_assert((map), (cond), #cond, (file), (line)) 209 void uvm_tree_sanity(struct vm_map*, char*, int); 210 void uvm_tree_size_chk(struct vm_map*, char*, int); 211 void vmspace_validate(struct vm_map*); 212 #else 213 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 214 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 215 #define vmspace_validate(_map) do {} while (0) 216 #endif 217 218 /* 219 * All architectures will have pmap_prefer. 220 */ 221 #ifndef PMAP_PREFER 222 #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE 223 #define PMAP_PREFER_OFFSET(off) 0 224 #define PMAP_PREFER(addr, off) (addr) 225 #endif 226 227 228 /* 229 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 230 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 231 * 232 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 233 * each time. 234 */ 235 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 236 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 237 #define VM_MAP_KSIZE_ALLOCMUL 4 238 /* 239 * When selecting a random free-space block, look at most FSPACE_DELTA blocks 240 * ahead. 241 */ 242 #define FSPACE_DELTA 8 243 /* 244 * Put allocations adjecent to previous allocations when the free-space tree 245 * is larger than FSPACE_COMPACT entries. 246 * 247 * Alignment and PMAP_PREFER may still cause the entry to not be fully 248 * adjecent. Note that this strategy reduces memory fragmentation (by leaving 249 * a large space before or after the allocation). 250 */ 251 #define FSPACE_COMPACT 128 252 /* 253 * Make the address selection skip at most this many bytes from the start of 254 * the free space in which the allocation takes place. 255 * 256 * The main idea behind a randomized address space is that an attacker cannot 257 * know where to target his attack. Therefore, the location of objects must be 258 * as random as possible. However, the goal is not to create the most sparse 259 * map that is possible. 260 * FSPACE_MAXOFF pushes the considered range in bytes down to less insane 261 * sizes, thereby reducing the sparseness. The biggest randomization comes 262 * from fragmentation, i.e. FSPACE_COMPACT. 263 */ 264 #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) 265 /* 266 * Allow for small gaps in the overflow areas. 267 * Gap size is in bytes and does not have to be a multiple of page-size. 268 */ 269 #define FSPACE_BIASGAP ((vaddr_t)32 * 1024) 270 271 /* auto-allocate address lower bound */ 272 #define VMMAP_MIN_ADDR PAGE_SIZE 273 274 275 #ifdef DEADBEEF0 276 #define UVMMAP_DEADBEEF ((void*)DEADBEEF0) 277 #else 278 #define UVMMAP_DEADBEEF ((void*)0xdeadd0d0) 279 #endif 280 281 #ifdef DEBUG 282 int uvm_map_printlocks = 0; 283 284 #define LPRINTF(_args) \ 285 do { \ 286 if (uvm_map_printlocks) \ 287 printf _args; \ 288 } while (0) 289 #else 290 #define LPRINTF(_args) do {} while (0) 291 #endif 292 293 static struct timeval uvm_kmapent_last_warn_time; 294 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 295 296 const char vmmapbsy[] = "vmmapbsy"; 297 298 /* 299 * pool for vmspace structures. 300 */ 301 struct pool uvm_vmspace_pool; 302 303 /* 304 * pool for dynamically-allocated map entries. 305 */ 306 struct pool uvm_map_entry_pool; 307 struct pool uvm_map_entry_kmem_pool; 308 309 /* 310 * This global represents the end of the kernel virtual address 311 * space. If we want to exceed this, we must grow the kernel 312 * virtual address space dynamically. 313 * 314 * Note, this variable is locked by kernel_map's lock. 315 */ 316 vaddr_t uvm_maxkaddr; 317 318 /* 319 * Locking predicate. 320 */ 321 #define UVM_MAP_REQ_WRITE(_map) \ 322 do { \ 323 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 324 rw_assert_wrlock(&(_map)->lock); \ 325 } while (0) 326 327 /* 328 * Tree describing entries by address. 329 * 330 * Addresses are unique. 331 * Entries with start == end may only exist if they are the first entry 332 * (sorted by address) within a free-memory tree. 333 */ 334 335 static __inline int 336 uvm_mapentry_addrcmp(struct vm_map_entry *e1, struct vm_map_entry *e2) 337 { 338 return e1->start < e2->start ? -1 : e1->start > e2->start; 339 } 340 341 /* 342 * Tree describing free memory. 343 * 344 * Free memory is indexed (so we can use array semantics in O(log N). 345 * Free memory is ordered by size (so we can reduce fragmentation). 346 * 347 * The address range in the tree can be limited, having part of the 348 * free memory not in the free-memory tree. Only free memory in the 349 * tree will be considered during 'any address' allocations. 350 */ 351 352 static __inline int 353 uvm_mapentry_freecmp(struct vm_map_entry *e1, struct vm_map_entry *e2) 354 { 355 int cmp = e1->fspace < e2->fspace ? -1 : e1->fspace > e2->fspace; 356 return cmp ? cmp : uvm_mapentry_addrcmp(e1, e2); 357 } 358 359 /* 360 * Copy mapentry. 361 */ 362 static __inline void 363 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 364 { 365 caddr_t csrc, cdst; 366 size_t sz; 367 368 csrc = (caddr_t)src; 369 cdst = (caddr_t)dst; 370 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 371 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 372 373 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 374 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 375 memcpy(cdst, csrc, sz); 376 } 377 378 /* 379 * Handle free-list insertion. 380 */ 381 void 382 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 383 struct vm_map_entry *entry) 384 { 385 const struct uvm_addr_functions *fun; 386 #ifdef VMMAP_DEBUG 387 vaddr_t min, max, bound; 388 #endif 389 390 #ifdef VMMAP_DEBUG 391 /* 392 * Boundary check. 393 * Boundaries are folded if they go on the same free list. 394 */ 395 min = VMMAP_FREE_START(entry); 396 max = VMMAP_FREE_END(entry); 397 398 while (min < max) { 399 bound = uvm_map_boundary(map, min, max); 400 KASSERT(uvm_map_uaddr(map, min) == uaddr); 401 min = bound; 402 } 403 #endif 404 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 405 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 406 407 UVM_MAP_REQ_WRITE(map); 408 409 /* Actual insert: forward to uaddr pointer. */ 410 if (uaddr != NULL) { 411 fun = uaddr->uaddr_functions; 412 KDASSERT(fun != NULL); 413 if (fun->uaddr_free_insert != NULL) 414 (*fun->uaddr_free_insert)(map, uaddr, entry); 415 entry->etype |= UVM_ET_FREEMAPPED; 416 } 417 418 /* Update fspace augmentation. */ 419 uvm_map_addr_augment(entry); 420 } 421 422 /* 423 * Handle free-list removal. 424 */ 425 void 426 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 427 struct vm_map_entry *entry) 428 { 429 const struct uvm_addr_functions *fun; 430 431 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 432 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 433 UVM_MAP_REQ_WRITE(map); 434 435 if (uaddr != NULL) { 436 fun = uaddr->uaddr_functions; 437 if (fun->uaddr_free_remove != NULL) 438 (*fun->uaddr_free_remove)(map, uaddr, entry); 439 entry->etype &= ~UVM_ET_FREEMAPPED; 440 } 441 } 442 443 /* 444 * Handle address tree insertion. 445 */ 446 void 447 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 448 { 449 struct vm_map_entry *res; 450 451 if (RB_LEFT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF || 452 RB_RIGHT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF || 453 RB_PARENT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF) 454 panic("uvm_mapent_addr_insert: entry still in addr list"); 455 KDASSERT(entry->start <= entry->end); 456 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 457 (entry->end & (vaddr_t)PAGE_MASK) == 0); 458 459 UVM_MAP_REQ_WRITE(map); 460 res = RB_INSERT(uvm_map_addr, &map->addr, entry); 461 if (res != NULL) { 462 panic("uvm_mapent_addr_insert: map %p entry %p " 463 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 464 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 465 map, entry, 466 entry->start, entry->end, entry->guard, entry->fspace, 467 res, res->start, res->end, res->guard, res->fspace); 468 } 469 } 470 471 /* 472 * Handle address tree removal. 473 */ 474 void 475 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 476 { 477 struct vm_map_entry *res; 478 479 UVM_MAP_REQ_WRITE(map); 480 res = RB_REMOVE(uvm_map_addr, &map->addr, entry); 481 if (res != entry) 482 panic("uvm_mapent_addr_remove"); 483 RB_LEFT(entry, daddrs.addr_entry) = RB_RIGHT(entry, daddrs.addr_entry) = 484 RB_PARENT(entry, daddrs.addr_entry) = UVMMAP_DEADBEEF; 485 } 486 487 /* 488 * uvm_map_reference: add reference to a map 489 * 490 * XXX check map reference counter lock 491 */ 492 #define uvm_map_reference(_map) \ 493 do { \ 494 simple_lock(&map->ref_lock); \ 495 map->ref_count++; \ 496 simple_unlock(&map->ref_lock); \ 497 } while (0) 498 499 /* 500 * Calculate the dused delta. 501 */ 502 vsize_t 503 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 504 { 505 struct vmspace *vm; 506 vsize_t sz; 507 vaddr_t lmax; 508 vaddr_t stack_begin, stack_end; /* Position of stack. */ 509 510 KASSERT(map->flags & VM_MAP_ISVMSPACE); 511 vm = (struct vmspace *)map; 512 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 513 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 514 515 sz = 0; 516 while (min != max) { 517 lmax = max; 518 if (min < stack_begin && lmax > stack_begin) 519 lmax = stack_begin; 520 else if (min < stack_end && lmax > stack_end) 521 lmax = stack_end; 522 523 if (min >= stack_begin && min < stack_end) { 524 /* nothing */ 525 } else 526 sz += lmax - min; 527 min = lmax; 528 } 529 530 return sz >> PAGE_SHIFT; 531 } 532 533 /* 534 * Find the entry describing the given address. 535 */ 536 struct vm_map_entry* 537 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 538 { 539 struct vm_map_entry *iter; 540 541 iter = RB_ROOT(atree); 542 while (iter != NULL) { 543 if (iter->start > addr) 544 iter = RB_LEFT(iter, daddrs.addr_entry); 545 else if (VMMAP_FREE_END(iter) <= addr) 546 iter = RB_RIGHT(iter, daddrs.addr_entry); 547 else 548 return iter; 549 } 550 return NULL; 551 } 552 553 /* 554 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 555 * 556 * Push dead entries into a linked list. 557 * Since the linked list abuses the address tree for storage, the entry 558 * may not be linked in a map. 559 * 560 * *head must be initialized to NULL before the first call to this macro. 561 * uvm_unmap_detach(*head, 0) will remove dead entries. 562 */ 563 static __inline void 564 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 565 { 566 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 567 } 568 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 569 dead_entry_push((_headptr), (_entry)) 570 571 /* 572 * Helper function for uvm_map_findspace_tree. 573 * 574 * Given allocation constraints and pmap constraints, finds the 575 * lowest and highest address in a range that can be used for the 576 * allocation. 577 * 578 * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. 579 * 580 * 581 * Big chunk of math with a seasoning of dragons. 582 */ 583 int 584 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, 585 struct vm_map_entry *sel, vaddr_t align, 586 vaddr_t pmap_align, vaddr_t pmap_off, int bias) 587 { 588 vaddr_t sel_min, sel_max; 589 #ifdef PMAP_PREFER 590 vaddr_t pmap_min, pmap_max; 591 #endif /* PMAP_PREFER */ 592 #ifdef DIAGNOSTIC 593 int bad; 594 #endif /* DIAGNOSTIC */ 595 596 sel_min = VMMAP_FREE_START(sel); 597 sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); 598 599 #ifdef PMAP_PREFER 600 601 /* 602 * There are two special cases, in which we can satisfy the align 603 * requirement and the pmap_prefer requirement. 604 * - when pmap_off == 0, we always select the largest of the two 605 * - when pmap_off % align == 0 and pmap_align > align, we simply 606 * satisfy the pmap_align requirement and automatically 607 * satisfy the align requirement. 608 */ 609 if (align > PAGE_SIZE && 610 !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { 611 /* 612 * Simple case: only use align. 613 */ 614 sel_min = roundup(sel_min, align); 615 sel_max &= ~(align - 1); 616 617 if (sel_min > sel_max) 618 return ENOMEM; 619 620 /* 621 * Correct for bias. 622 */ 623 if (sel_max - sel_min > FSPACE_BIASGAP) { 624 if (bias > 0) { 625 sel_min = sel_max - FSPACE_BIASGAP; 626 sel_min = roundup(sel_min, align); 627 } else if (bias < 0) { 628 sel_max = sel_min + FSPACE_BIASGAP; 629 sel_max &= ~(align - 1); 630 } 631 } 632 } else if (pmap_align != 0) { 633 /* 634 * Special case: satisfy both pmap_prefer and 635 * align argument. 636 */ 637 pmap_max = sel_max & ~(pmap_align - 1); 638 pmap_min = sel_min; 639 if (pmap_max < sel_min) 640 return ENOMEM; 641 642 /* Adjust pmap_min for BIASGAP for top-addr bias. */ 643 if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) 644 pmap_min = pmap_max - FSPACE_BIASGAP; 645 /* Align pmap_min. */ 646 pmap_min &= ~(pmap_align - 1); 647 if (pmap_min < sel_min) 648 pmap_min += pmap_align; 649 if (pmap_min > pmap_max) 650 return ENOMEM; 651 652 /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ 653 if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { 654 pmap_max = (pmap_min + FSPACE_BIASGAP) & 655 ~(pmap_align - 1); 656 } 657 if (pmap_min > pmap_max) 658 return ENOMEM; 659 660 /* Apply pmap prefer offset. */ 661 pmap_max |= pmap_off; 662 if (pmap_max > sel_max) 663 pmap_max -= pmap_align; 664 pmap_min |= pmap_off; 665 if (pmap_min < sel_min) 666 pmap_min += pmap_align; 667 668 /* 669 * Fixup: it's possible that pmap_min and pmap_max 670 * cross eachother. In this case, try to find one 671 * address that is allowed. 672 * (This usually happens in biased case.) 673 */ 674 if (pmap_min > pmap_max) { 675 if (pmap_min < sel_max) 676 pmap_max = pmap_min; 677 else if (pmap_max > sel_min) 678 pmap_min = pmap_max; 679 else 680 return ENOMEM; 681 } 682 683 /* Internal validation. */ 684 KDASSERT(pmap_min <= pmap_max); 685 686 sel_min = pmap_min; 687 sel_max = pmap_max; 688 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 689 sel_min = sel_max - FSPACE_BIASGAP; 690 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 691 sel_max = sel_min + FSPACE_BIASGAP; 692 693 #else 694 695 if (align > PAGE_SIZE) { 696 sel_min = roundup(sel_min, align); 697 sel_max &= ~(align - 1); 698 if (sel_min > sel_max) 699 return ENOMEM; 700 701 if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { 702 if (bias > 0) { 703 sel_min = roundup(sel_max - FSPACE_BIASGAP, 704 align); 705 } else { 706 sel_max = (sel_min + FSPACE_BIASGAP) & 707 ~(align - 1); 708 } 709 } 710 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 711 sel_min = sel_max - FSPACE_BIASGAP; 712 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 713 sel_max = sel_min + FSPACE_BIASGAP; 714 715 #endif 716 717 if (sel_min > sel_max) 718 return ENOMEM; 719 720 #ifdef DIAGNOSTIC 721 bad = 0; 722 /* Lower boundary check. */ 723 if (sel_min < VMMAP_FREE_START(sel)) { 724 printf("sel_min: 0x%lx, but should be at least 0x%lx\n", 725 sel_min, VMMAP_FREE_START(sel)); 726 bad++; 727 } 728 /* Upper boundary check. */ 729 if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { 730 printf("sel_max: 0x%lx, but should be at most 0x%lx\n", 731 sel_max, 732 VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); 733 bad++; 734 } 735 /* Lower boundary alignment. */ 736 if (align != 0 && (sel_min & (align - 1)) != 0) { 737 printf("sel_min: 0x%lx, not aligned to 0x%lx\n", 738 sel_min, align); 739 bad++; 740 } 741 /* Upper boundary alignment. */ 742 if (align != 0 && (sel_max & (align - 1)) != 0) { 743 printf("sel_max: 0x%lx, not aligned to 0x%lx\n", 744 sel_max, align); 745 bad++; 746 } 747 /* Lower boundary PMAP_PREFER check. */ 748 if (pmap_align != 0 && align == 0 && 749 (sel_min & (pmap_align - 1)) != pmap_off) { 750 printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 751 sel_min, sel_min & (pmap_align - 1), pmap_off); 752 bad++; 753 } 754 /* Upper boundary PMAP_PREFER check. */ 755 if (pmap_align != 0 && align == 0 && 756 (sel_max & (pmap_align - 1)) != pmap_off) { 757 printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 758 sel_max, sel_max & (pmap_align - 1), pmap_off); 759 bad++; 760 } 761 762 if (bad) { 763 panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " 764 "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " 765 "bias = %d, " 766 "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", 767 sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, 768 bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel)); 769 } 770 #endif /* DIAGNOSTIC */ 771 772 *min = sel_min; 773 *max = sel_max; 774 return 0; 775 } 776 777 /* 778 * Test if memory starting at addr with sz bytes is free. 779 * 780 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 781 * the space. 782 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 783 */ 784 int 785 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 786 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 787 vaddr_t addr, vsize_t sz) 788 { 789 struct uvm_addr_state *free; 790 struct uvm_map_addr *atree; 791 struct vm_map_entry *i, *i_end; 792 793 /* 794 * Kernel memory above uvm_maxkaddr is considered unavailable. 795 */ 796 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 797 if (addr + sz > uvm_maxkaddr) 798 return 0; 799 } 800 801 atree = &map->addr; 802 803 /* 804 * Fill in first, last, so they point at the entries containing the 805 * first and last address of the range. 806 * Note that if they are not NULL, we don't perform the lookup. 807 */ 808 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 809 if (*start_ptr == NULL) { 810 *start_ptr = uvm_map_entrybyaddr(atree, addr); 811 if (*start_ptr == NULL) 812 return 0; 813 } else 814 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 815 if (*end_ptr == NULL) { 816 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 817 *end_ptr = *start_ptr; 818 else { 819 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 820 if (*end_ptr == NULL) 821 return 0; 822 } 823 } else 824 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 825 826 /* 827 * Validation. 828 */ 829 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 830 KDASSERT((*start_ptr)->start <= addr && 831 VMMAP_FREE_END(*start_ptr) > addr && 832 (*end_ptr)->start < addr + sz && 833 VMMAP_FREE_END(*end_ptr) >= addr + sz); 834 835 /* 836 * Check the none of the entries intersects with <addr, addr+sz>. 837 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 838 * considered unavailable unless called by those allocators. 839 */ 840 i = *start_ptr; 841 i_end = RB_NEXT(uvm_map_addr, atree, *end_ptr); 842 for (; i != i_end; 843 i = RB_NEXT(uvm_map_addr, atree, i)) { 844 if (i->start != i->end && i->end > addr) 845 return 0; 846 847 /* 848 * uaddr_exe and uaddr_brk_stack may only be used 849 * by these allocators and the NULL uaddr (i.e. no 850 * uaddr). 851 * Reject if this requirement is not met. 852 */ 853 if (uaddr != NULL) { 854 free = uvm_map_uaddr_e(map, i); 855 856 if (uaddr != free && free != NULL && 857 (free == map->uaddr_exe || 858 free == map->uaddr_brk_stack)) 859 return 0; 860 } 861 } 862 863 return -1; 864 } 865 866 /* 867 * Invoke each address selector until an address is found. 868 * Will not invoke uaddr_exe. 869 */ 870 int 871 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 872 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 873 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 874 { 875 struct uvm_addr_state *uaddr; 876 int i; 877 878 /* 879 * Allocation for sz bytes at any address, 880 * using the addr selectors in order. 881 */ 882 for (i = 0; i < nitems(map->uaddr_any); i++) { 883 uaddr = map->uaddr_any[i]; 884 885 if (uvm_addr_invoke(map, uaddr, first, last, 886 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 887 return 0; 888 } 889 890 /* 891 * Fall back to brk() and stack() address selectors. 892 */ 893 uaddr = map->uaddr_brk_stack; 894 if (uvm_addr_invoke(map, uaddr, first, last, 895 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 896 return 0; 897 898 return ENOMEM; 899 } 900 901 /* Calculate entry augmentation value. */ 902 vsize_t 903 uvm_map_addr_augment_get(struct vm_map_entry *entry) 904 { 905 vsize_t augment; 906 struct vm_map_entry *left, *right; 907 908 augment = entry->fspace; 909 if ((left = RB_LEFT(entry, daddrs.addr_entry)) != NULL) 910 augment = MAX(augment, left->fspace_augment); 911 if ((right = RB_RIGHT(entry, daddrs.addr_entry)) != NULL) 912 augment = MAX(augment, right->fspace_augment); 913 return augment; 914 } 915 916 /* 917 * Update augmentation data in entry. 918 */ 919 void 920 uvm_map_addr_augment(struct vm_map_entry *entry) 921 { 922 vsize_t augment; 923 924 while (entry != NULL) { 925 /* Calculate value for augmentation. */ 926 augment = uvm_map_addr_augment_get(entry); 927 928 /* 929 * Descend update. 930 * Once we find an entry that already has the correct value, 931 * stop, since it means all its parents will use the correct 932 * value too. 933 */ 934 if (entry->fspace_augment == augment) 935 return; 936 entry->fspace_augment = augment; 937 entry = RB_PARENT(entry, daddrs.addr_entry); 938 } 939 } 940 941 /* 942 * uvm_map: establish a valid mapping in map 943 * 944 * => *addr and sz must be a multiple of PAGE_SIZE. 945 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 946 * => map must be unlocked. 947 * => <uobj,uoffset> value meanings (4 cases): 948 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 949 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 950 * [3] <uobj,uoffset> == normal mapping 951 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 952 * 953 * case [4] is for kernel mappings where we don't know the offset until 954 * we've found a virtual address. note that kernel object offsets are 955 * always relative to vm_map_min(kernel_map). 956 * 957 * => align: align vaddr, must be a power-of-2. 958 * Align is only a hint and will be ignored if the alignment fails. 959 */ 960 int 961 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 962 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags) 963 { 964 struct vm_map_entry *first, *last, *entry; 965 struct uvm_map_deadq dead; 966 vm_prot_t prot; 967 vm_prot_t maxprot; 968 vm_inherit_t inherit; 969 int advice; 970 int error; 971 vaddr_t pmap_align, pmap_offset; 972 vaddr_t hint; 973 974 if ((map->flags & VM_MAP_INTRSAFE) == 0) 975 splassert(IPL_NONE); 976 else 977 splassert(IPL_VM); 978 979 /* 980 * We use pmap_align and pmap_offset as alignment and offset variables. 981 * 982 * Because the align parameter takes precedence over pmap prefer, 983 * the pmap_align will need to be set to align, with pmap_offset = 0, 984 * if pmap_prefer will not align. 985 */ 986 if (uoffset == UVM_UNKNOWN_OFFSET) { 987 pmap_align = MAX(align, PAGE_SIZE); 988 pmap_offset = 0; 989 } else { 990 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 991 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 992 993 if (align == 0 || 994 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 995 /* 996 * pmap_offset satisfies align, no change. 997 */ 998 } else { 999 /* 1000 * Align takes precedence over pmap prefer. 1001 */ 1002 pmap_align = align; 1003 pmap_offset = 0; 1004 } 1005 } 1006 1007 /* 1008 * Decode parameters. 1009 */ 1010 prot = UVM_PROTECTION(flags); 1011 maxprot = UVM_MAXPROTECTION(flags); 1012 advice = UVM_ADVICE(flags); 1013 inherit = UVM_INHERIT(flags); 1014 error = 0; 1015 hint = trunc_page(*addr); 1016 TAILQ_INIT(&dead); 1017 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1018 KASSERT((align & (align - 1)) == 0); 1019 1020 /* 1021 * Holes are incompatible with other types of mappings. 1022 */ 1023 if (flags & UVM_FLAG_HOLE) { 1024 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 1025 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 1026 } 1027 1028 /* 1029 * Unset hint for kernel_map non-fixed allocations. 1030 */ 1031 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1032 hint = 0; 1033 1034 /* 1035 * Check protection. 1036 */ 1037 if ((prot & maxprot) != prot) 1038 return EACCES; 1039 1040 if (flags & UVM_FLAG_TRYLOCK) { 1041 if (vm_map_lock_try(map) == FALSE) 1042 return EFAULT; 1043 } else 1044 vm_map_lock(map); 1045 1046 first = last = NULL; 1047 if (flags & UVM_FLAG_FIXED) { 1048 /* 1049 * Fixed location. 1050 * 1051 * Note: we ignore align, pmap_prefer. 1052 * Fill in first, last and *addr. 1053 */ 1054 KASSERT((*addr & PAGE_MASK) == 0); 1055 1056 /* 1057 * Grow pmap to include allocated address. 1058 * If the growth fails, the allocation will fail too. 1059 */ 1060 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1061 uvm_maxkaddr < (*addr + sz)) { 1062 uvm_map_kmem_grow(map, &dead, 1063 *addr + sz - uvm_maxkaddr, flags); 1064 } 1065 1066 /* 1067 * Check that the space is available. 1068 */ 1069 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1070 error = ENOMEM; 1071 goto unlock; 1072 } 1073 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1074 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1075 (align == 0 || (*addr & (align - 1)) == 0) && 1076 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1077 /* 1078 * Address used as hint. 1079 * 1080 * Note: we enforce the alignment restriction, 1081 * but ignore pmap_prefer. 1082 */ 1083 } else if ((maxprot & VM_PROT_EXECUTE) != 0 && 1084 map->uaddr_exe != NULL) { 1085 /* 1086 * Run selection algorithm for executables. 1087 */ 1088 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1089 addr, sz, pmap_align, pmap_offset, prot, hint); 1090 1091 /* 1092 * Grow kernel memory and try again. 1093 */ 1094 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1095 uvm_map_kmem_grow(map, &dead, sz, flags); 1096 1097 error = uvm_addr_invoke(map, map->uaddr_exe, 1098 &first, &last, addr, sz, 1099 pmap_align, pmap_offset, prot, hint); 1100 } 1101 1102 if (error != 0) 1103 goto unlock; 1104 } else { 1105 /* 1106 * Update freelists from vmspace. 1107 */ 1108 if (map->flags & VM_MAP_ISVMSPACE) 1109 uvm_map_vmspace_update(map, &dead, flags); 1110 1111 error = uvm_map_findspace(map, &first, &last, addr, sz, 1112 pmap_align, pmap_offset, prot, hint); 1113 1114 /* 1115 * Grow kernel memory and try again. 1116 */ 1117 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1118 uvm_map_kmem_grow(map, &dead, sz, flags); 1119 1120 error = uvm_map_findspace(map, &first, &last, addr, sz, 1121 pmap_align, pmap_offset, prot, hint); 1122 } 1123 1124 if (error != 0) 1125 goto unlock; 1126 } 1127 1128 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1129 uvm_maxkaddr >= *addr + sz); 1130 1131 /* 1132 * If we only want a query, return now. 1133 */ 1134 if (flags & UVM_FLAG_QUERY) { 1135 error = 0; 1136 goto unlock; 1137 } 1138 1139 if (uobj == NULL) 1140 uoffset = 0; 1141 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1142 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1143 uoffset = *addr - vm_map_min(kernel_map); 1144 } 1145 1146 /* 1147 * Create new entry. 1148 * first and last may be invalidated after this call. 1149 */ 1150 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead); 1151 if (entry == NULL) { 1152 error = ENOMEM; 1153 goto unlock; 1154 } 1155 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1156 entry->object.uvm_obj = uobj; 1157 entry->offset = uoffset; 1158 entry->protection = prot; 1159 entry->max_protection = maxprot; 1160 entry->inheritance = inherit; 1161 entry->wired_count = 0; 1162 entry->advice = advice; 1163 if (uobj) 1164 entry->etype |= UVM_ET_OBJ; 1165 else if (flags & UVM_FLAG_HOLE) 1166 entry->etype |= UVM_ET_HOLE; 1167 if (flags & UVM_FLAG_COPYONW) { 1168 entry->etype |= UVM_ET_COPYONWRITE; 1169 if ((flags & UVM_FLAG_OVERLAY) == 0) 1170 entry->etype |= UVM_ET_NEEDSCOPY; 1171 } 1172 if (flags & UVM_FLAG_OVERLAY) { 1173 entry->aref.ar_pageoff = 0; 1174 entry->aref.ar_amap = amap_alloc(sz, 1175 ptoa(flags & UVM_FLAG_AMAPPAD ? UVM_AMAP_CHUNK : 0), 1176 M_WAITOK); 1177 } 1178 1179 /* 1180 * Update map and process statistics. 1181 */ 1182 if (!(flags & UVM_FLAG_HOLE)) { 1183 map->size += sz; 1184 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) { 1185 ((struct vmspace *)map)->vm_dused += 1186 uvmspace_dused(map, *addr, *addr + sz); 1187 } 1188 } 1189 1190 /* 1191 * Try to merge entry. 1192 * 1193 * Userland allocations are kept separated most of the time. 1194 * Forego the effort of merging what most of the time can't be merged 1195 * and only try the merge if it concerns a kernel entry. 1196 */ 1197 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1198 (map->flags & VM_MAP_ISVMSPACE) == 0) 1199 uvm_mapent_tryjoin(map, entry, &dead); 1200 1201 unlock: 1202 vm_map_unlock(map); 1203 1204 /* 1205 * Remove dead entries. 1206 * 1207 * Dead entries may be the result of merging. 1208 * uvm_map_mkentry may also create dead entries, when it attempts to 1209 * destroy free-space entries. 1210 */ 1211 uvm_unmap_detach(&dead, 0); 1212 return error; 1213 } 1214 1215 /* 1216 * True iff e1 and e2 can be joined together. 1217 */ 1218 int 1219 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1220 struct vm_map_entry *e2) 1221 { 1222 KDASSERT(e1 != NULL && e2 != NULL); 1223 1224 /* 1225 * Must be the same entry type and not have free memory between. 1226 */ 1227 if (e1->etype != e2->etype || e1->end != e2->start) 1228 return 0; 1229 1230 /* 1231 * Submaps are never joined. 1232 */ 1233 if (UVM_ET_ISSUBMAP(e1)) 1234 return 0; 1235 1236 /* 1237 * Never merge wired memory. 1238 */ 1239 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1240 return 0; 1241 1242 /* 1243 * Protection, inheritance and advice must be equal. 1244 */ 1245 if (e1->protection != e2->protection || 1246 e1->max_protection != e2->max_protection || 1247 e1->inheritance != e2->inheritance || 1248 e1->advice != e2->advice) 1249 return 0; 1250 1251 /* 1252 * If uvm_object: objects itself and offsets within object must match. 1253 */ 1254 if (UVM_ET_ISOBJ(e1)) { 1255 if (e1->object.uvm_obj != e2->object.uvm_obj) 1256 return 0; 1257 if (e1->offset + (e1->end - e1->start) != e2->offset) 1258 return 0; 1259 } 1260 1261 /* 1262 * Cannot join shared amaps. 1263 * Note: no need to lock amap to look at refs, since we don't care 1264 * about its exact value. 1265 * If it is 1 (i.e. we have the only reference) it will stay there. 1266 */ 1267 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1268 return 0; 1269 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1270 return 0; 1271 1272 /* 1273 * Apprently, e1 and e2 match. 1274 */ 1275 return 1; 1276 } 1277 1278 /* 1279 * Join support function. 1280 * 1281 * Returns the merged entry on succes. 1282 * Returns NULL if the merge failed. 1283 */ 1284 struct vm_map_entry* 1285 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1286 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1287 { 1288 struct uvm_addr_state *free; 1289 1290 /* 1291 * Amap of e1 must be extended to include e2. 1292 * e2 contains no real information in its amap, 1293 * so it can be erased immediately. 1294 */ 1295 if (e1->aref.ar_amap) { 1296 if (amap_extend(e1, e2->end - e2->start)) 1297 return NULL; 1298 } 1299 1300 /* 1301 * Don't drop obj reference: 1302 * uvm_unmap_detach will do this for us. 1303 */ 1304 1305 free = uvm_map_uaddr_e(map, e1); 1306 uvm_mapent_free_remove(map, free, e1); 1307 1308 free = uvm_map_uaddr_e(map, e2); 1309 uvm_mapent_free_remove(map, free, e2); 1310 uvm_mapent_addr_remove(map, e2); 1311 e1->end = e2->end; 1312 e1->guard = e2->guard; 1313 e1->fspace = e2->fspace; 1314 uvm_mapent_free_insert(map, free, e1); 1315 1316 DEAD_ENTRY_PUSH(dead, e2); 1317 return e1; 1318 } 1319 1320 /* 1321 * Attempt forward and backward joining of entry. 1322 * 1323 * Returns entry after joins. 1324 * We are guaranteed that the amap of entry is either non-existant or 1325 * has never been used. 1326 */ 1327 struct vm_map_entry* 1328 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1329 struct uvm_map_deadq *dead) 1330 { 1331 struct vm_map_entry *other; 1332 struct vm_map_entry *merged; 1333 1334 /* 1335 * Merge with previous entry. 1336 */ 1337 other = RB_PREV(uvm_map_addr, &map->addr, entry); 1338 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1339 merged = uvm_mapent_merge(map, other, entry, dead); 1340 if (merged) 1341 entry = merged; 1342 } 1343 1344 /* 1345 * Merge with next entry. 1346 * 1347 * Because amap can only extend forward and the next entry 1348 * probably contains sensible info, only perform forward merging 1349 * in the absence of an amap. 1350 */ 1351 other = RB_NEXT(uvm_map_addr, &map->addr, entry); 1352 if (other && entry->aref.ar_amap == NULL && 1353 other->aref.ar_amap == NULL && 1354 uvm_mapent_isjoinable(map, entry, other)) { 1355 merged = uvm_mapent_merge(map, entry, other, dead); 1356 if (merged) 1357 entry = merged; 1358 } 1359 1360 return entry; 1361 } 1362 1363 /* 1364 * Kill entries that are no longer in a map. 1365 */ 1366 void 1367 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1368 { 1369 struct vm_map_entry *entry; 1370 1371 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1372 /* 1373 * Drop reference to amap, if we've got one. 1374 */ 1375 if (entry->aref.ar_amap) 1376 amap_unref(entry->aref.ar_amap, 1377 entry->aref.ar_pageoff, 1378 atop(entry->end - entry->start), 1379 flags); 1380 1381 /* 1382 * Drop reference to our backing object, if we've got one. 1383 */ 1384 if (UVM_ET_ISSUBMAP(entry)) { 1385 /* ... unlikely to happen, but play it safe */ 1386 uvm_map_deallocate(entry->object.sub_map); 1387 } else if (UVM_ET_ISOBJ(entry) && 1388 entry->object.uvm_obj->pgops->pgo_detach) { 1389 entry->object.uvm_obj->pgops->pgo_detach( 1390 entry->object.uvm_obj); 1391 } 1392 1393 /* 1394 * Step to next. 1395 */ 1396 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1397 uvm_mapent_free(entry); 1398 } 1399 } 1400 1401 /* 1402 * Create and insert new entry. 1403 * 1404 * Returned entry contains new addresses and is inserted properly in the tree. 1405 * first and last are (probably) no longer valid. 1406 */ 1407 struct vm_map_entry* 1408 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1409 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1410 struct uvm_map_deadq *dead) 1411 { 1412 struct vm_map_entry *entry, *prev; 1413 struct uvm_addr_state *free; 1414 vaddr_t min, max; /* free space boundaries for new entry */ 1415 1416 KDASSERT(map != NULL); 1417 KDASSERT(first != NULL); 1418 KDASSERT(last != NULL); 1419 KDASSERT(dead != NULL); 1420 KDASSERT(sz > 0); 1421 KDASSERT(addr + sz > addr); 1422 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1423 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1424 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1425 uvm_tree_sanity(map, __FILE__, __LINE__); 1426 1427 min = addr + sz; 1428 max = VMMAP_FREE_END(last); 1429 1430 /* 1431 * Initialize new entry. 1432 */ 1433 entry = uvm_mapent_alloc(map, flags); 1434 if (entry == NULL) 1435 return NULL; 1436 entry->offset = 0; 1437 entry->etype = 0; 1438 entry->wired_count = 0; 1439 entry->aref.ar_pageoff = 0; 1440 entry->aref.ar_amap = NULL; 1441 1442 entry->start = addr; 1443 entry->end = min; 1444 entry->guard = 0; 1445 entry->fspace = 0; 1446 1447 /* 1448 * Reset free space in first. 1449 */ 1450 free = uvm_map_uaddr_e(map, first); 1451 uvm_mapent_free_remove(map, free, first); 1452 first->guard = 0; 1453 first->fspace = 0; 1454 1455 /* 1456 * Remove all entries that are fully replaced. 1457 * We are iterating using last in reverse order. 1458 */ 1459 for (; first != last; last = prev) { 1460 prev = RB_PREV(uvm_map_addr, &map->addr, last); 1461 1462 KDASSERT(last->start == last->end); 1463 free = uvm_map_uaddr_e(map, last); 1464 uvm_mapent_free_remove(map, free, last); 1465 uvm_mapent_addr_remove(map, last); 1466 DEAD_ENTRY_PUSH(dead, last); 1467 } 1468 /* 1469 * Remove first if it is entirely inside <addr, addr+sz>. 1470 */ 1471 if (first->start == addr) { 1472 uvm_mapent_addr_remove(map, first); 1473 DEAD_ENTRY_PUSH(dead, first); 1474 } else { 1475 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1476 addr, flags); 1477 } 1478 1479 /* 1480 * Finally, link in entry. 1481 */ 1482 uvm_mapent_addr_insert(map, entry); 1483 uvm_map_fix_space(map, entry, min, max, flags); 1484 1485 uvm_tree_sanity(map, __FILE__, __LINE__); 1486 return entry; 1487 } 1488 1489 /* 1490 * uvm_mapent_alloc: allocate a map entry 1491 */ 1492 struct vm_map_entry * 1493 uvm_mapent_alloc(struct vm_map *map, int flags) 1494 { 1495 struct vm_map_entry *me, *ne; 1496 int s, i; 1497 int pool_flags; 1498 1499 pool_flags = PR_WAITOK; 1500 if (flags & UVM_FLAG_TRYLOCK) 1501 pool_flags = PR_NOWAIT; 1502 1503 if (map->flags & VM_MAP_INTRSAFE || cold) { 1504 s = splvm(); 1505 simple_lock(&uvm.kentry_lock); 1506 me = uvm.kentry_free; 1507 if (me == NULL) { 1508 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1509 &kd_nowait); 1510 if (ne == NULL) 1511 panic("uvm_mapent_alloc: cannot allocate map " 1512 "entry"); 1513 for (i = 0; 1514 i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1; 1515 i++) 1516 RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1]; 1517 RB_LEFT(&ne[i], daddrs.addr_entry) = NULL; 1518 me = ne; 1519 if (ratecheck(&uvm_kmapent_last_warn_time, 1520 &uvm_kmapent_warn_rate)) 1521 printf("uvm_mapent_alloc: out of static " 1522 "map entries\n"); 1523 } 1524 uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry); 1525 uvmexp.kmapent++; 1526 simple_unlock(&uvm.kentry_lock); 1527 splx(s); 1528 me->flags = UVM_MAP_STATIC; 1529 } else if (map == kernel_map) { 1530 splassert(IPL_NONE); 1531 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1532 if (me == NULL) 1533 goto out; 1534 me->flags = UVM_MAP_KMEM; 1535 } else { 1536 splassert(IPL_NONE); 1537 me = pool_get(&uvm_map_entry_pool, pool_flags); 1538 if (me == NULL) 1539 goto out; 1540 me->flags = 0; 1541 } 1542 1543 if (me != NULL) { 1544 RB_LEFT(me, daddrs.addr_entry) = 1545 RB_RIGHT(me, daddrs.addr_entry) = 1546 RB_PARENT(me, daddrs.addr_entry) = UVMMAP_DEADBEEF; 1547 } 1548 1549 out: 1550 return(me); 1551 } 1552 1553 /* 1554 * uvm_mapent_free: free map entry 1555 * 1556 * => XXX: static pool for kernel map? 1557 */ 1558 void 1559 uvm_mapent_free(struct vm_map_entry *me) 1560 { 1561 int s; 1562 1563 if (me->flags & UVM_MAP_STATIC) { 1564 s = splvm(); 1565 simple_lock(&uvm.kentry_lock); 1566 RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free; 1567 uvm.kentry_free = me; 1568 uvmexp.kmapent--; 1569 simple_unlock(&uvm.kentry_lock); 1570 splx(s); 1571 } else if (me->flags & UVM_MAP_KMEM) { 1572 splassert(IPL_NONE); 1573 pool_put(&uvm_map_entry_kmem_pool, me); 1574 } else { 1575 splassert(IPL_NONE); 1576 pool_put(&uvm_map_entry_pool, me); 1577 } 1578 } 1579 1580 /* 1581 * uvm_map_lookup_entry: find map entry at or before an address. 1582 * 1583 * => map must at least be read-locked by caller 1584 * => entry is returned in "entry" 1585 * => return value is true if address is in the returned entry 1586 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1587 * returned for those mappings. 1588 */ 1589 boolean_t 1590 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1591 struct vm_map_entry **entry) 1592 { 1593 *entry = uvm_map_entrybyaddr(&map->addr, address); 1594 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1595 (*entry)->start <= address && (*entry)->end > address; 1596 } 1597 1598 /* 1599 * uvm_map_pie: return a random load address for a PIE executable 1600 * properly aligned. 1601 */ 1602 #ifndef VM_PIE_MAX_ADDR 1603 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1604 #endif 1605 1606 #ifndef VM_PIE_MIN_ADDR 1607 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1608 #endif 1609 1610 #ifndef VM_PIE_MIN_ALIGN 1611 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1612 #endif 1613 1614 vaddr_t 1615 uvm_map_pie(vaddr_t align) 1616 { 1617 vaddr_t addr, space, min; 1618 1619 align = MAX(align, VM_PIE_MIN_ALIGN); 1620 1621 /* round up to next alignment */ 1622 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1623 1624 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 1625 return (align); 1626 1627 space = (VM_PIE_MAX_ADDR - min) / align; 1628 space = MIN(space, (u_int32_t)-1); 1629 1630 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 1631 addr += min; 1632 1633 return (addr); 1634 } 1635 1636 void 1637 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 1638 { 1639 struct uvm_map_deadq dead; 1640 1641 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 1642 (end & (vaddr_t)PAGE_MASK) == 0); 1643 TAILQ_INIT(&dead); 1644 vm_map_lock(map); 1645 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); 1646 vm_map_unlock(map); 1647 1648 uvm_unmap_detach(&dead, 0); 1649 } 1650 1651 /* 1652 * Mark entry as free. 1653 * 1654 * entry will be put on the dead list. 1655 * The free space will be merged into the previous or a new entry, 1656 * unless markfree is false. 1657 */ 1658 void 1659 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 1660 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 1661 boolean_t markfree) 1662 { 1663 struct uvm_addr_state *free; 1664 struct vm_map_entry *prev; 1665 vaddr_t addr; /* Start of freed range. */ 1666 vaddr_t end; /* End of freed range. */ 1667 1668 prev = *prev_ptr; 1669 if (prev == entry) 1670 *prev_ptr = prev = NULL; 1671 1672 if (prev == NULL || 1673 VMMAP_FREE_END(prev) != entry->start) 1674 prev = RB_PREV(uvm_map_addr, &map->addr, entry); 1675 /* 1676 * Entry is describing only free memory and has nothing to drain into. 1677 */ 1678 if (prev == NULL && entry->start == entry->end && markfree) { 1679 *prev_ptr = entry; 1680 return; 1681 } 1682 1683 addr = entry->start; 1684 end = VMMAP_FREE_END(entry); 1685 free = uvm_map_uaddr_e(map, entry); 1686 uvm_mapent_free_remove(map, free, entry); 1687 uvm_mapent_addr_remove(map, entry); 1688 DEAD_ENTRY_PUSH(dead, entry); 1689 1690 if (markfree) { 1691 if (prev) { 1692 free = uvm_map_uaddr_e(map, prev); 1693 uvm_mapent_free_remove(map, free, prev); 1694 } 1695 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 1696 } 1697 } 1698 1699 /* 1700 * Unwire and release referenced amap and object from map entry. 1701 */ 1702 void 1703 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 1704 { 1705 /* 1706 * Unwire removed map entry. 1707 */ 1708 if (VM_MAPENT_ISWIRED(entry)) { 1709 entry->wired_count = 0; 1710 uvm_fault_unwire_locked(map, entry->start, entry->end); 1711 } 1712 1713 /* 1714 * Entry-type specific code. 1715 */ 1716 if (UVM_ET_ISHOLE(entry)) { 1717 /* 1718 * Nothing to be done for holes. 1719 */ 1720 } else if (map->flags & VM_MAP_INTRSAFE) { 1721 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1722 uvm_km_pgremove_intrsafe(entry->start, entry->end); 1723 pmap_kremove(entry->start, entry->end - entry->start); 1724 } else if (UVM_ET_ISOBJ(entry) && 1725 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 1726 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1727 1728 /* 1729 * Note: kernel object mappings are currently used in 1730 * two ways: 1731 * [1] "normal" mappings of pages in the kernel object 1732 * [2] uvm_km_valloc'd allocations in which we 1733 * pmap_enter in some non-kernel-object page 1734 * (e.g. vmapbuf). 1735 * 1736 * for case [1], we need to remove the mapping from 1737 * the pmap and then remove the page from the kernel 1738 * object (because, once pages in a kernel object are 1739 * unmapped they are no longer needed, unlike, say, 1740 * a vnode where you might want the data to persist 1741 * until flushed out of a queue). 1742 * 1743 * for case [2], we need to remove the mapping from 1744 * the pmap. there shouldn't be any pages at the 1745 * specified offset in the kernel object [but it 1746 * doesn't hurt to call uvm_km_pgremove just to be 1747 * safe?] 1748 * 1749 * uvm_km_pgremove currently does the following: 1750 * for pages in the kernel object range: 1751 * - drops the swap slot 1752 * - uvm_pagefree the page 1753 * 1754 * note there is version of uvm_km_pgremove() that 1755 * is used for "intrsafe" objects. 1756 */ 1757 1758 /* 1759 * remove mappings from pmap and drop the pages 1760 * from the object. offsets are always relative 1761 * to vm_map_min(kernel_map). 1762 */ 1763 pmap_remove(pmap_kernel(), entry->start, entry->end); 1764 uvm_km_pgremove(entry->object.uvm_obj, 1765 entry->start - vm_map_min(kernel_map), 1766 entry->end - vm_map_min(kernel_map)); 1767 1768 /* 1769 * null out kernel_object reference, we've just 1770 * dropped it 1771 */ 1772 entry->etype &= ~UVM_ET_OBJ; 1773 entry->object.uvm_obj = NULL; /* to be safe */ 1774 } else { 1775 /* 1776 * remove mappings the standard way. 1777 */ 1778 pmap_remove(map->pmap, entry->start, entry->end); 1779 } 1780 } 1781 1782 /* 1783 * Remove all entries from start to end. 1784 * 1785 * If remove_holes, then remove ET_HOLE entries as well. 1786 * If markfree, entry will be properly marked free, otherwise, no replacement 1787 * entry will be put in the tree (corrupting the tree). 1788 */ 1789 void 1790 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 1791 struct uvm_map_deadq *dead, boolean_t remove_holes, 1792 boolean_t markfree) 1793 { 1794 struct vm_map_entry *prev_hint, *next, *entry; 1795 1796 start = MAX(start, map->min_offset); 1797 end = MIN(end, map->max_offset); 1798 if (start >= end) 1799 return; 1800 1801 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1802 splassert(IPL_NONE); 1803 else 1804 splassert(IPL_VM); 1805 1806 /* 1807 * Find first affected entry. 1808 */ 1809 entry = uvm_map_entrybyaddr(&map->addr, start); 1810 KDASSERT(entry != NULL && entry->start <= start); 1811 if (entry->end <= start && markfree) 1812 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 1813 else 1814 UVM_MAP_CLIP_START(map, entry, start); 1815 1816 /* 1817 * Iterate entries until we reach end address. 1818 * prev_hint hints where the freed space can be appended to. 1819 */ 1820 prev_hint = NULL; 1821 for (; entry != NULL && entry->start < end; entry = next) { 1822 KDASSERT(entry->start >= start); 1823 if (entry->end > end || !markfree) 1824 UVM_MAP_CLIP_END(map, entry, end); 1825 KDASSERT(entry->start >= start && entry->end <= end); 1826 next = RB_NEXT(uvm_map_addr, &map->addr, entry); 1827 1828 /* Don't remove holes unless asked to do so. */ 1829 if (UVM_ET_ISHOLE(entry)) { 1830 if (!remove_holes) { 1831 prev_hint = entry; 1832 continue; 1833 } 1834 } 1835 1836 /* Kill entry. */ 1837 uvm_unmap_kill_entry(map, entry); 1838 1839 /* 1840 * Update space usage. 1841 */ 1842 if ((map->flags & VM_MAP_ISVMSPACE) && 1843 entry->object.uvm_obj == NULL && 1844 !UVM_ET_ISHOLE(entry)) { 1845 ((struct vmspace *)map)->vm_dused -= 1846 uvmspace_dused(map, entry->start, entry->end); 1847 } 1848 if (!UVM_ET_ISHOLE(entry)) 1849 map->size -= entry->end - entry->start; 1850 1851 /* 1852 * Actual removal of entry. 1853 */ 1854 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 1855 } 1856 1857 pmap_update(vm_map_pmap(map)); 1858 1859 #ifdef VMMAP_DEBUG 1860 if (markfree) { 1861 for (entry = uvm_map_entrybyaddr(&map->addr, start); 1862 entry != NULL && entry->start < end; 1863 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 1864 KDASSERT(entry->end <= start || 1865 entry->start == entry->end || 1866 UVM_ET_ISHOLE(entry)); 1867 } 1868 } else { 1869 vaddr_t a; 1870 for (a = start; a < end; a += PAGE_SIZE) 1871 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 1872 } 1873 #endif 1874 } 1875 1876 /* 1877 * Mark all entries from first until end (exclusive) as pageable. 1878 * 1879 * Lock must be exclusive on entry and will not be touched. 1880 */ 1881 void 1882 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 1883 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 1884 { 1885 struct vm_map_entry *iter; 1886 1887 for (iter = first; iter != end; 1888 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 1889 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 1890 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 1891 continue; 1892 1893 iter->wired_count = 0; 1894 uvm_fault_unwire_locked(map, iter->start, iter->end); 1895 } 1896 } 1897 1898 /* 1899 * Mark all entries from first until end (exclusive) as wired. 1900 * 1901 * Lockflags determines the lock state on return from this function. 1902 * Lock must be exclusive on entry. 1903 */ 1904 int 1905 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 1906 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 1907 int lockflags) 1908 { 1909 struct vm_map_entry *iter; 1910 #ifdef DIAGNOSTIC 1911 unsigned int timestamp_save; 1912 #endif 1913 int error; 1914 1915 /* 1916 * Wire pages in two passes: 1917 * 1918 * 1: holding the write lock, we create any anonymous maps that need 1919 * to be created. then we clip each map entry to the region to 1920 * be wired and increment its wiring count. 1921 * 1922 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 1923 * in the pages for any newly wired area (wired_count == 1). 1924 * 1925 * downgrading to a read lock for uvm_fault_wire avoids a possible 1926 * deadlock with another thread that may have faulted on one of 1927 * the pages to be wired (it would mark the page busy, blocking 1928 * us, then in turn block on the map lock that we hold). 1929 * because we keep the read lock on the map, the copy-on-write 1930 * status of the entries we modify here cannot change. 1931 */ 1932 for (iter = first; iter != end; 1933 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 1934 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 1935 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end) 1936 continue; 1937 1938 /* 1939 * Perform actions of vm_map_lookup that need the write lock. 1940 * - create an anonymous map for copy-on-write 1941 * - anonymous map for zero-fill 1942 * Skip submaps. 1943 */ 1944 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 1945 UVM_ET_ISNEEDSCOPY(iter) && 1946 ((iter->protection & VM_PROT_WRITE) || 1947 iter->object.uvm_obj == NULL)) { 1948 amap_copy(map, iter, M_WAITOK, TRUE, 1949 iter->start, iter->end); 1950 } 1951 iter->wired_count++; 1952 } 1953 1954 /* 1955 * Pass 2. 1956 */ 1957 #ifdef DIAGNOSTIC 1958 timestamp_save = map->timestamp; 1959 #endif 1960 vm_map_busy(map); 1961 vm_map_downgrade(map); 1962 1963 error = 0; 1964 for (iter = first; error == 0 && iter != end; 1965 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 1966 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end) 1967 continue; 1968 1969 error = uvm_fault_wire(map, iter->start, iter->end, 1970 iter->protection); 1971 } 1972 1973 if (error) { 1974 /* 1975 * uvm_fault_wire failure 1976 * 1977 * Reacquire lock and undo our work. 1978 */ 1979 vm_map_upgrade(map); 1980 vm_map_unbusy(map); 1981 #ifdef DIAGNOSTIC 1982 if (timestamp_save != map->timestamp) 1983 panic("uvm_map_pageable_wire: stale map"); 1984 #endif 1985 1986 /* 1987 * first is no longer needed to restart loops. 1988 * Use it as iterator to unmap successful mappings. 1989 */ 1990 for (; first != iter; 1991 first = RB_NEXT(uvm_map_addr, &map->addr, first)) { 1992 if (UVM_ET_ISHOLE(first) || first->start == first->end) 1993 continue; 1994 1995 first->wired_count--; 1996 if (!VM_MAPENT_ISWIRED(first)) { 1997 uvm_fault_unwire_locked(map, 1998 iter->start, iter->end); 1999 } 2000 } 2001 2002 /* 2003 * decrease counter in the rest of the entries 2004 */ 2005 for (; iter != end; 2006 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 2007 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end) 2008 continue; 2009 2010 iter->wired_count--; 2011 } 2012 2013 if ((lockflags & UVM_LK_EXIT) == 0) 2014 vm_map_unlock(map); 2015 return error; 2016 } 2017 2018 /* 2019 * We are currently holding a read lock. 2020 */ 2021 if ((lockflags & UVM_LK_EXIT) == 0) { 2022 vm_map_unbusy(map); 2023 vm_map_unlock_read(map); 2024 } else { 2025 vm_map_upgrade(map); 2026 vm_map_unbusy(map); 2027 #ifdef DIAGNOSTIC 2028 if (timestamp_save != map->timestamp) 2029 panic("uvm_map_pageable_wire: stale map"); 2030 #endif 2031 } 2032 return 0; 2033 } 2034 2035 /* 2036 * uvm_map_pageable: set pageability of a range in a map. 2037 * 2038 * Flags: 2039 * UVM_LK_ENTER: map is already locked by caller 2040 * UVM_LK_EXIT: don't unlock map on exit 2041 * 2042 * The full range must be in use (entries may not have fspace != 0). 2043 * UVM_ET_HOLE counts as unmapped. 2044 */ 2045 int 2046 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2047 boolean_t new_pageable, int lockflags) 2048 { 2049 struct vm_map_entry *first, *last, *tmp; 2050 int error; 2051 2052 start = trunc_page(start); 2053 end = round_page(end); 2054 2055 if (start > end) 2056 return EINVAL; 2057 if (start == end) 2058 return 0; /* nothing to do */ 2059 if (start < map->min_offset) 2060 return EFAULT; /* why? see first XXX below */ 2061 if (end > map->max_offset) 2062 return EINVAL; /* why? see second XXX below */ 2063 2064 KASSERT(map->flags & VM_MAP_PAGEABLE); 2065 if ((lockflags & UVM_LK_ENTER) == 0) 2066 vm_map_lock(map); 2067 2068 /* 2069 * Find first entry. 2070 * 2071 * Initial test on start is different, because of the different 2072 * error returned. Rest is tested further down. 2073 */ 2074 first = uvm_map_entrybyaddr(&map->addr, start); 2075 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2076 /* 2077 * XXX if the first address is not mapped, it is EFAULT? 2078 */ 2079 error = EFAULT; 2080 goto out; 2081 } 2082 2083 /* 2084 * Check that the range has no holes. 2085 */ 2086 for (last = first; last != NULL && last->start < end; 2087 last = RB_NEXT(uvm_map_addr, &map->addr, last)) { 2088 if (UVM_ET_ISHOLE(last) || 2089 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2090 /* 2091 * XXX unmapped memory in range, why is it EINVAL 2092 * instead of EFAULT? 2093 */ 2094 error = EINVAL; 2095 goto out; 2096 } 2097 } 2098 2099 /* 2100 * Last ended at the first entry after the range. 2101 * Move back one step. 2102 * 2103 * Note that last may be NULL. 2104 */ 2105 if (last == NULL) { 2106 last = RB_MAX(uvm_map_addr, &map->addr); 2107 if (last->end < end) { 2108 error = EINVAL; 2109 goto out; 2110 } 2111 } else { 2112 KASSERT(last != first); 2113 last = RB_PREV(uvm_map_addr, &map->addr, last); 2114 } 2115 2116 /* 2117 * Wire/unwire pages here. 2118 */ 2119 if (new_pageable) { 2120 /* 2121 * Mark pageable. 2122 * entries that are not wired are untouched. 2123 */ 2124 if (VM_MAPENT_ISWIRED(first)) 2125 UVM_MAP_CLIP_START(map, first, start); 2126 /* 2127 * Split last at end. 2128 * Make tmp be the first entry after what is to be touched. 2129 * If last is not wired, don't touch it. 2130 */ 2131 if (VM_MAPENT_ISWIRED(last)) { 2132 UVM_MAP_CLIP_END(map, last, end); 2133 tmp = RB_NEXT(uvm_map_addr, &map->addr, last); 2134 } else 2135 tmp = last; 2136 2137 uvm_map_pageable_pgon(map, first, tmp, start, end); 2138 error = 0; 2139 2140 out: 2141 if ((lockflags & UVM_LK_EXIT) == 0) 2142 vm_map_unlock(map); 2143 return error; 2144 } else { 2145 /* 2146 * Mark entries wired. 2147 * entries are always touched (because recovery needs this). 2148 */ 2149 if (!VM_MAPENT_ISWIRED(first)) 2150 UVM_MAP_CLIP_START(map, first, start); 2151 /* 2152 * Split last at end. 2153 * Make tmp be the first entry after what is to be touched. 2154 * If last is not wired, don't touch it. 2155 */ 2156 if (!VM_MAPENT_ISWIRED(last)) { 2157 UVM_MAP_CLIP_END(map, last, end); 2158 tmp = RB_NEXT(uvm_map_addr, &map->addr, last); 2159 } else 2160 tmp = last; 2161 2162 return uvm_map_pageable_wire(map, first, tmp, start, end, 2163 lockflags); 2164 } 2165 } 2166 2167 /* 2168 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2169 * all mapped regions. 2170 * 2171 * Map must not be locked. 2172 * If no flags are specified, all ragions are unwired. 2173 */ 2174 int 2175 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2176 { 2177 vsize_t size; 2178 struct vm_map_entry *iter; 2179 2180 KASSERT(map->flags & VM_MAP_PAGEABLE); 2181 vm_map_lock(map); 2182 2183 if (flags == 0) { 2184 uvm_map_pageable_pgon(map, RB_MIN(uvm_map_addr, &map->addr), 2185 NULL, map->min_offset, map->max_offset); 2186 2187 atomic_clearbits_int(&map->flags, VM_MAP_WIREFUTURE); 2188 vm_map_unlock(map); 2189 return 0; 2190 } 2191 2192 if (flags & MCL_FUTURE) 2193 atomic_setbits_int(&map->flags, VM_MAP_WIREFUTURE); 2194 if (!(flags & MCL_CURRENT)) { 2195 vm_map_unlock(map); 2196 return 0; 2197 } 2198 2199 /* 2200 * Count number of pages in all non-wired entries. 2201 * If the number exceeds the limit, abort. 2202 */ 2203 size = 0; 2204 RB_FOREACH(iter, uvm_map_addr, &map->addr) { 2205 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2206 continue; 2207 2208 size += iter->end - iter->start; 2209 } 2210 2211 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2212 vm_map_unlock(map); 2213 return ENOMEM; 2214 } 2215 2216 /* XXX non-pmap_wired_count case must be handled by caller */ 2217 #ifdef pmap_wired_count 2218 if (limit != 0 && 2219 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2220 vm_map_unlock(map); 2221 return ENOMEM; 2222 } 2223 #endif 2224 2225 /* 2226 * uvm_map_pageable_wire will release lcok 2227 */ 2228 return uvm_map_pageable_wire(map, RB_MIN(uvm_map_addr, &map->addr), 2229 NULL, map->min_offset, map->max_offset, 0); 2230 } 2231 2232 /* 2233 * Initialize map. 2234 * 2235 * Allocates sufficient entries to describe the free memory in the map. 2236 */ 2237 void 2238 uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags) 2239 { 2240 int i; 2241 2242 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2243 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2244 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2245 2246 /* 2247 * Update parameters. 2248 * 2249 * This code handles (vaddr_t)-1 and other page mask ending addresses 2250 * properly. 2251 * We lose the top page if the full virtual address space is used. 2252 */ 2253 if (max & (vaddr_t)PAGE_MASK) { 2254 max += 1; 2255 if (max == 0) /* overflow */ 2256 max -= PAGE_SIZE; 2257 } 2258 2259 RB_INIT(&map->addr); 2260 map->uaddr_exe = NULL; 2261 for (i = 0; i < nitems(map->uaddr_any); ++i) 2262 map->uaddr_any[i] = NULL; 2263 map->uaddr_brk_stack = NULL; 2264 2265 map->size = 0; 2266 map->ref_count = 1; 2267 map->min_offset = min; 2268 map->max_offset = max; 2269 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2270 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2271 map->flags = flags; 2272 map->timestamp = 0; 2273 rw_init(&map->lock, "vmmaplk"); 2274 simple_lock_init(&map->ref_lock); 2275 2276 /* 2277 * Configure the allocators. 2278 */ 2279 if (flags & VM_MAP_ISVMSPACE) 2280 uvm_map_setup_md(map); 2281 else 2282 map->uaddr_any[3] = &uaddr_kbootstrap; 2283 2284 /* 2285 * Fill map entries. 2286 * This requires a write-locked map (because of diagnostic assertions 2287 * in insert code). 2288 */ 2289 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 2290 if (rw_enter(&map->lock, RW_NOSLEEP|RW_WRITE) != 0) 2291 panic("uvm_map_setup: rw_enter failed on new map"); 2292 } 2293 uvm_map_setup_entries(map); 2294 uvm_tree_sanity(map, __FILE__, __LINE__); 2295 if ((map->flags & VM_MAP_INTRSAFE) == 0) 2296 rw_exit(&map->lock); 2297 } 2298 2299 /* 2300 * Destroy the map. 2301 * 2302 * This is the inverse operation to uvm_map_setup. 2303 */ 2304 void 2305 uvm_map_teardown(struct vm_map *map) 2306 { 2307 struct uvm_map_deadq dead_entries; 2308 int i; 2309 struct vm_map_entry *entry, *tmp; 2310 #ifdef VMMAP_DEBUG 2311 size_t numq, numt; 2312 #endif 2313 2314 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 2315 if (rw_enter(&map->lock, RW_NOSLEEP | RW_WRITE) != 0) 2316 panic("uvm_map_teardown: rw_enter failed on free map"); 2317 } 2318 2319 /* 2320 * Remove address selectors. 2321 */ 2322 uvm_addr_destroy(map->uaddr_exe); 2323 map->uaddr_exe = NULL; 2324 for (i = 0; i < nitems(map->uaddr_any); i++) { 2325 uvm_addr_destroy(map->uaddr_any[i]); 2326 map->uaddr_any[i] = NULL; 2327 } 2328 uvm_addr_destroy(map->uaddr_brk_stack); 2329 map->uaddr_brk_stack = NULL; 2330 2331 /* 2332 * Remove entries. 2333 * 2334 * The following is based on graph breadth-first search. 2335 * 2336 * In color terms: 2337 * - the dead_entries set contains all nodes that are reachable 2338 * (i.e. both the black and the grey nodes) 2339 * - any entry not in dead_entries is white 2340 * - any entry that appears in dead_entries before entry, 2341 * is black, the rest is grey. 2342 * The set [entry, end] is also referred to as the wavefront. 2343 * 2344 * Since the tree is always a fully connected graph, the breadth-first 2345 * search guarantees that each vmmap_entry is visited exactly once. 2346 * The vm_map is broken down in linear time. 2347 */ 2348 TAILQ_INIT(&dead_entries); 2349 if ((entry = RB_ROOT(&map->addr)) != NULL) 2350 DEAD_ENTRY_PUSH(&dead_entries, entry); 2351 while (entry != NULL) { 2352 uvm_unmap_kill_entry(map, entry); 2353 if ((tmp = RB_LEFT(entry, daddrs.addr_entry)) != NULL) 2354 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2355 if ((tmp = RB_RIGHT(entry, daddrs.addr_entry)) != NULL) 2356 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2357 /* Update wave-front. */ 2358 entry = TAILQ_NEXT(entry, dfree.deadq); 2359 } 2360 2361 if ((map->flags & VM_MAP_INTRSAFE) == 0) 2362 rw_exit(&map->lock); 2363 2364 #ifdef VMMAP_DEBUG 2365 numt = numq = 0; 2366 RB_FOREACH(entry, uvm_map_addr, &map->addr) 2367 numt++; 2368 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2369 numq++; 2370 KASSERT(numt == numq); 2371 #endif 2372 uvm_unmap_detach(&dead_entries, 0); 2373 pmap_destroy(map->pmap); 2374 map->pmap = NULL; 2375 } 2376 2377 /* 2378 * Populate map with free-memory entries. 2379 * 2380 * Map must be initialized and empty. 2381 */ 2382 void 2383 uvm_map_setup_entries(struct vm_map *map) 2384 { 2385 KDASSERT(RB_EMPTY(&map->addr)); 2386 2387 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2388 } 2389 2390 /* 2391 * Split entry at given address. 2392 * 2393 * orig: entry that is to be split. 2394 * next: a newly allocated map entry that is not linked. 2395 * split: address at which the split is done. 2396 */ 2397 void 2398 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2399 struct vm_map_entry *next, vaddr_t split) 2400 { 2401 struct uvm_addr_state *free, *free_before; 2402 vsize_t adj; 2403 2404 if ((split & PAGE_MASK) != 0) { 2405 panic("uvm_map_splitentry: split address 0x%lx " 2406 "not on page boundary!", split); 2407 } 2408 KDASSERT(map != NULL && orig != NULL && next != NULL); 2409 uvm_tree_sanity(map, __FILE__, __LINE__); 2410 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2411 2412 #ifdef VMMAP_DEBUG 2413 KDASSERT(RB_FIND(uvm_map_addr, &map->addr, orig) == orig); 2414 KDASSERT(RB_FIND(uvm_map_addr, &map->addr, next) != next); 2415 #endif /* VMMAP_DEBUG */ 2416 2417 /* 2418 * Free space will change, unlink from free space tree. 2419 */ 2420 free = uvm_map_uaddr_e(map, orig); 2421 uvm_mapent_free_remove(map, free, orig); 2422 2423 adj = split - orig->start; 2424 2425 uvm_mapent_copy(orig, next); 2426 if (split >= orig->end) { 2427 next->etype = 0; 2428 next->offset = 0; 2429 next->wired_count = 0; 2430 next->start = next->end = split; 2431 next->guard = 0; 2432 next->fspace = VMMAP_FREE_END(orig) - split; 2433 next->aref.ar_amap = NULL; 2434 next->aref.ar_pageoff = 0; 2435 orig->guard = MIN(orig->guard, split - orig->end); 2436 orig->fspace = split - VMMAP_FREE_START(orig); 2437 } else { 2438 orig->fspace = 0; 2439 orig->guard = 0; 2440 orig->end = next->start = split; 2441 2442 if (next->aref.ar_amap) 2443 amap_splitref(&orig->aref, &next->aref, adj); 2444 if (UVM_ET_ISSUBMAP(orig)) { 2445 uvm_map_reference(next->object.sub_map); 2446 next->offset += adj; 2447 } else if (UVM_ET_ISOBJ(orig)) { 2448 if (next->object.uvm_obj->pgops && 2449 next->object.uvm_obj->pgops->pgo_reference) { 2450 next->object.uvm_obj->pgops->pgo_reference( 2451 next->object.uvm_obj); 2452 } 2453 next->offset += adj; 2454 } 2455 } 2456 2457 /* 2458 * Link next into address tree. 2459 * Link orig and next into free-space tree. 2460 * 2461 * Don't insert 'next' into the addr tree until orig has been linked, 2462 * in case the free-list looks at adjecent entries in the addr tree 2463 * for its decisions. 2464 */ 2465 if (orig->fspace > 0) 2466 free_before = free; 2467 else 2468 free_before = uvm_map_uaddr_e(map, orig); 2469 uvm_mapent_free_insert(map, free_before, orig); 2470 uvm_mapent_addr_insert(map, next); 2471 uvm_mapent_free_insert(map, free, next); 2472 2473 uvm_tree_sanity(map, __FILE__, __LINE__); 2474 } 2475 2476 2477 #ifdef VMMAP_DEBUG 2478 2479 void 2480 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2481 char *file, int line) 2482 { 2483 char* map_special; 2484 2485 if (test) 2486 return; 2487 2488 if (map == kernel_map) 2489 map_special = " (kernel_map)"; 2490 else if (map == kmem_map) 2491 map_special = " (kmem_map)"; 2492 else 2493 map_special = ""; 2494 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2495 line, test_str); 2496 } 2497 2498 /* 2499 * Check that map is sane. 2500 */ 2501 void 2502 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2503 { 2504 struct vm_map_entry *iter; 2505 vaddr_t addr; 2506 vaddr_t min, max, bound; /* Bounds checker. */ 2507 struct uvm_addr_state *free; 2508 2509 addr = vm_map_min(map); 2510 RB_FOREACH(iter, uvm_map_addr, &map->addr) { 2511 /* 2512 * Valid start, end. 2513 * Catch overflow for end+fspace. 2514 */ 2515 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2516 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2517 /* 2518 * May not be empty. 2519 */ 2520 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2521 file, line); 2522 2523 /* 2524 * Addresses for entry must lie within map boundaries. 2525 */ 2526 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2527 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2528 2529 /* 2530 * Tree may not have gaps. 2531 */ 2532 UVM_ASSERT(map, iter->start == addr, file, line); 2533 addr = VMMAP_FREE_END(iter); 2534 2535 /* 2536 * Free space may not cross boundaries, unless the same 2537 * free list is used on both sides of the border. 2538 */ 2539 min = VMMAP_FREE_START(iter); 2540 max = VMMAP_FREE_END(iter); 2541 2542 while (min < max && 2543 (bound = uvm_map_boundary(map, min, max)) != max) { 2544 UVM_ASSERT(map, 2545 uvm_map_uaddr(map, bound - 1) == 2546 uvm_map_uaddr(map, bound), 2547 file, line); 2548 min = bound; 2549 } 2550 2551 free = uvm_map_uaddr_e(map, iter); 2552 if (free) { 2553 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2554 file, line); 2555 } else { 2556 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2557 file, line); 2558 } 2559 } 2560 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2561 } 2562 2563 void 2564 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2565 { 2566 struct vm_map_entry *iter; 2567 vsize_t size; 2568 2569 size = 0; 2570 RB_FOREACH(iter, uvm_map_addr, &map->addr) { 2571 if (!UVM_ET_ISHOLE(iter)) 2572 size += iter->end - iter->start; 2573 } 2574 2575 if (map->size != size) 2576 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2577 UVM_ASSERT(map, map->size == size, file, line); 2578 2579 vmspace_validate(map); 2580 } 2581 2582 /* 2583 * This function validates the statistics on vmspace. 2584 */ 2585 void 2586 vmspace_validate(struct vm_map *map) 2587 { 2588 struct vmspace *vm; 2589 struct vm_map_entry *iter; 2590 vaddr_t imin, imax; 2591 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2592 vsize_t stack, heap; /* Measured sizes. */ 2593 2594 if (!(map->flags & VM_MAP_ISVMSPACE)) 2595 return; 2596 2597 vm = (struct vmspace *)map; 2598 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2599 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2600 2601 stack = heap = 0; 2602 RB_FOREACH(iter, uvm_map_addr, &map->addr) { 2603 imin = imax = iter->start; 2604 2605 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL) 2606 continue; 2607 2608 /* 2609 * Update stack, heap. 2610 * Keep in mind that (theoretically) the entries of 2611 * userspace and stack may be joined. 2612 */ 2613 while (imin != iter->end) { 2614 /* 2615 * Set imax to the first boundary crossed between 2616 * imin and stack addresses. 2617 */ 2618 imax = iter->end; 2619 if (imin < stack_begin && imax > stack_begin) 2620 imax = stack_begin; 2621 else if (imin < stack_end && imax > stack_end) 2622 imax = stack_end; 2623 2624 if (imin >= stack_begin && imin < stack_end) 2625 stack += imax - imin; 2626 else 2627 heap += imax - imin; 2628 imin = imax; 2629 } 2630 } 2631 2632 heap >>= PAGE_SHIFT; 2633 if (heap != vm->vm_dused) { 2634 printf("vmspace stack range: 0x%lx-0x%lx\n", 2635 stack_begin, stack_end); 2636 panic("vmspace_validate: vmspace.vm_dused invalid, " 2637 "expected %ld pgs, got %ld pgs in map %p", 2638 heap, vm->vm_dused, 2639 map); 2640 } 2641 } 2642 2643 #endif /* VMMAP_DEBUG */ 2644 2645 /* 2646 * uvm_map_init: init mapping system at boot time. note that we allocate 2647 * and init the static pool of structs vm_map_entry for the kernel here. 2648 */ 2649 void 2650 uvm_map_init(void) 2651 { 2652 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 2653 int lcv; 2654 2655 /* 2656 * now set up static pool of kernel map entries ... 2657 */ 2658 2659 simple_lock_init(&uvm.kentry_lock); 2660 uvm.kentry_free = NULL; 2661 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 2662 RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) = 2663 uvm.kentry_free; 2664 uvm.kentry_free = &kernel_map_entry[lcv]; 2665 } 2666 2667 /* 2668 * initialize the map-related pools. 2669 */ 2670 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 2671 0, 0, 0, "vmsppl", &pool_allocator_nointr); 2672 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 2673 0, 0, 0, "vmmpepl", &pool_allocator_nointr); 2674 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 2675 0, 0, 0, "vmmpekpl", NULL); 2676 pool_sethiwat(&uvm_map_entry_pool, 8192); 2677 2678 uvm_addr_init(); 2679 } 2680 2681 #if defined(DDB) 2682 2683 /* 2684 * DDB hooks 2685 */ 2686 2687 /* 2688 * uvm_map_printit: actually prints the map 2689 */ 2690 void 2691 uvm_map_printit(struct vm_map *map, boolean_t full, 2692 int (*pr)(const char *, ...)) 2693 { 2694 struct vmspace *vm; 2695 struct vm_map_entry *entry; 2696 struct uvm_addr_state *free; 2697 int in_free, i; 2698 char buf[8]; 2699 2700 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 2701 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 2702 map->b_start, map->b_end); 2703 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 2704 map->s_start, map->s_end); 2705 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 2706 map->size, map->ref_count, map->timestamp, 2707 map->flags); 2708 #ifdef pmap_resident_count 2709 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 2710 pmap_resident_count(map->pmap)); 2711 #else 2712 /* XXXCDC: this should be required ... */ 2713 (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap); 2714 #endif 2715 2716 /* 2717 * struct vmspace handling. 2718 */ 2719 if (map->flags & VM_MAP_ISVMSPACE) { 2720 vm = (struct vmspace *)map; 2721 2722 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 2723 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 2724 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 2725 vm->vm_tsize, vm->vm_dsize); 2726 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 2727 vm->vm_taddr, vm->vm_daddr); 2728 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 2729 vm->vm_maxsaddr, vm->vm_minsaddr); 2730 } 2731 2732 if (!full) 2733 goto print_uaddr; 2734 RB_FOREACH(entry, uvm_map_addr, &map->addr) { 2735 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 2736 entry, entry->start, entry->end, entry->object.uvm_obj, 2737 (long long)entry->offset, entry->aref.ar_amap, 2738 entry->aref.ar_pageoff); 2739 (*pr)("\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 2740 "wc=%d, adv=%d\n", 2741 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 2742 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 2743 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 2744 entry->protection, entry->max_protection, 2745 entry->inheritance, entry->wired_count, entry->advice); 2746 2747 free = uvm_map_uaddr_e(map, entry); 2748 in_free = (free != NULL); 2749 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 2750 "free=0x%lx-0x%lx\n", 2751 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 2752 in_free ? 'T' : 'F', 2753 entry->guard, 2754 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 2755 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 2756 (*pr)("\tfreemapped=%c, uaddr=%p\n", 2757 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 2758 if (free) { 2759 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 2760 free->uaddr_minaddr, free->uaddr_maxaddr, 2761 free->uaddr_functions->uaddr_name); 2762 } 2763 } 2764 2765 print_uaddr: 2766 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 2767 for (i = 0; i < nitems(map->uaddr_any); i++) { 2768 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 2769 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 2770 } 2771 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 2772 } 2773 2774 /* 2775 * uvm_object_printit: actually prints the object 2776 */ 2777 void 2778 uvm_object_printit(uobj, full, pr) 2779 struct uvm_object *uobj; 2780 boolean_t full; 2781 int (*pr)(const char *, ...); 2782 { 2783 struct vm_page *pg; 2784 int cnt = 0; 2785 2786 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 2787 uobj, uobj->pgops, uobj->uo_npages); 2788 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 2789 (*pr)("refs=<SYSTEM>\n"); 2790 else 2791 (*pr)("refs=%d\n", uobj->uo_refs); 2792 2793 if (!full) { 2794 return; 2795 } 2796 (*pr)(" PAGES <pg,offset>:\n "); 2797 RB_FOREACH(pg, uvm_objtree, &uobj->memt) { 2798 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 2799 if ((cnt % 3) == 2) { 2800 (*pr)("\n "); 2801 } 2802 cnt++; 2803 } 2804 if ((cnt % 3) != 2) { 2805 (*pr)("\n"); 2806 } 2807 } 2808 2809 /* 2810 * uvm_page_printit: actually print the page 2811 */ 2812 static const char page_flagbits[] = 2813 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 2814 "\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0" 2815 "\31PMAP1\32PMAP2\33PMAP3"; 2816 2817 void 2818 uvm_page_printit(pg, full, pr) 2819 struct vm_page *pg; 2820 boolean_t full; 2821 int (*pr)(const char *, ...); 2822 { 2823 struct vm_page *tpg; 2824 struct uvm_object *uobj; 2825 struct pglist *pgl; 2826 2827 (*pr)("PAGE %p:\n", pg); 2828 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 2829 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 2830 (long long)pg->phys_addr); 2831 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", 2832 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); 2833 #if defined(UVM_PAGE_TRKOWN) 2834 if (pg->pg_flags & PG_BUSY) 2835 (*pr)(" owning process = %d, tag=%s", 2836 pg->owner, pg->owner_tag); 2837 else 2838 (*pr)(" page not busy, no owner"); 2839 #else 2840 (*pr)(" [page ownership tracking disabled]"); 2841 #endif 2842 #ifdef __HAVE_VM_PAGE_MD 2843 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 2844 #else 2845 (*pr)("\n"); 2846 #endif 2847 2848 if (!full) 2849 return; 2850 2851 /* cross-verify object/anon */ 2852 if ((pg->pg_flags & PQ_FREE) == 0) { 2853 if (pg->pg_flags & PQ_ANON) { 2854 if (pg->uanon == NULL || pg->uanon->an_page != pg) 2855 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 2856 (pg->uanon) ? pg->uanon->an_page : NULL); 2857 else 2858 (*pr)(" anon backpointer is OK\n"); 2859 } else { 2860 uobj = pg->uobject; 2861 if (uobj) { 2862 (*pr)(" checking object list\n"); 2863 RB_FOREACH(tpg, uvm_objtree, &uobj->memt) { 2864 if (tpg == pg) { 2865 break; 2866 } 2867 } 2868 if (tpg) 2869 (*pr)(" page found on object list\n"); 2870 else 2871 (*pr)(" >>> PAGE NOT FOUND " 2872 "ON OBJECT LIST! <<<\n"); 2873 } 2874 } 2875 } 2876 2877 /* cross-verify page queue */ 2878 if (pg->pg_flags & PQ_FREE) { 2879 if (uvm_pmr_isfree(pg)) 2880 (*pr)(" page found in uvm_pmemrange\n"); 2881 else 2882 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 2883 pgl = NULL; 2884 } else if (pg->pg_flags & PQ_INACTIVE) { 2885 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 2886 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 2887 } else if (pg->pg_flags & PQ_ACTIVE) { 2888 pgl = &uvm.page_active; 2889 } else { 2890 pgl = NULL; 2891 } 2892 2893 if (pgl) { 2894 (*pr)(" checking pageq list\n"); 2895 TAILQ_FOREACH(tpg, pgl, pageq) { 2896 if (tpg == pg) { 2897 break; 2898 } 2899 } 2900 if (tpg) 2901 (*pr)(" page found on pageq list\n"); 2902 else 2903 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 2904 } 2905 } 2906 #endif 2907 2908 /* 2909 * uvm_map_protect: change map protection 2910 * 2911 * => set_max means set max_protection. 2912 * => map must be unlocked. 2913 */ 2914 int 2915 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 2916 vm_prot_t new_prot, boolean_t set_max) 2917 { 2918 struct vm_map_entry *first, *iter; 2919 vm_prot_t old_prot; 2920 vm_prot_t mask; 2921 int error; 2922 2923 if (start > end) 2924 return EINVAL; 2925 start = MAX(start, map->min_offset); 2926 end = MIN(end, map->max_offset); 2927 if (start >= end) 2928 return 0; 2929 2930 error = 0; 2931 vm_map_lock(map); 2932 2933 /* 2934 * Set up first and last. 2935 * - first will contain first entry at or after start. 2936 */ 2937 first = uvm_map_entrybyaddr(&map->addr, start); 2938 KDASSERT(first != NULL); 2939 if (first->end < start) 2940 first = RB_NEXT(uvm_map_addr, &map->addr, first); 2941 2942 /* 2943 * First, check for protection violations. 2944 */ 2945 for (iter = first; iter != NULL && iter->start < end; 2946 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 2947 /* Treat memory holes as free space. */ 2948 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 2949 continue; 2950 2951 if (UVM_ET_ISSUBMAP(iter)) { 2952 error = EINVAL; 2953 goto out; 2954 } 2955 if ((new_prot & iter->max_protection) != new_prot) { 2956 error = EACCES; 2957 goto out; 2958 } 2959 } 2960 2961 /* 2962 * Fix protections. 2963 */ 2964 for (iter = first; iter != NULL && iter->start < end; 2965 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 2966 /* Treat memory holes as free space. */ 2967 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 2968 continue; 2969 2970 old_prot = iter->protection; 2971 2972 /* 2973 * Skip adapting protection iff old and new protection 2974 * are equal. 2975 */ 2976 if (set_max) { 2977 if (old_prot == (new_prot & old_prot) && 2978 iter->max_protection == new_prot) 2979 continue; 2980 } else { 2981 if (old_prot == new_prot) 2982 continue; 2983 } 2984 2985 UVM_MAP_CLIP_START(map, iter, start); 2986 UVM_MAP_CLIP_END(map, iter, end); 2987 2988 if (set_max) { 2989 iter->max_protection = new_prot; 2990 iter->protection &= new_prot; 2991 } else 2992 iter->protection = new_prot; 2993 2994 /* 2995 * update physical map if necessary. worry about copy-on-write 2996 * here -- CHECK THIS XXX 2997 */ 2998 if (iter->protection != old_prot) { 2999 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3000 ~VM_PROT_WRITE : VM_PROT_ALL; 3001 3002 /* update pmap */ 3003 if ((iter->protection & mask) == PROT_NONE && 3004 VM_MAPENT_ISWIRED(iter)) { 3005 /* 3006 * TODO(ariane) this is stupid. wired_count 3007 * is 0 if not wired, otherwise anything 3008 * larger than 0 (incremented once each time 3009 * wire is called). 3010 * Mostly to be able to undo the damage on 3011 * failure. Not the actually be a wired 3012 * refcounter... 3013 * Originally: iter->wired_count--; 3014 * (don't we have to unwire this in the pmap 3015 * as well?) 3016 */ 3017 iter->wired_count = 0; 3018 } 3019 pmap_protect(map->pmap, iter->start, iter->end, 3020 iter->protection & mask); 3021 } 3022 3023 /* 3024 * If the map is configured to lock any future mappings, 3025 * wire this entry now if the old protection was VM_PROT_NONE 3026 * and the new protection is not VM_PROT_NONE. 3027 */ 3028 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3029 VM_MAPENT_ISWIRED(iter) == 0 && 3030 old_prot == VM_PROT_NONE && 3031 new_prot != VM_PROT_NONE) { 3032 if (uvm_map_pageable(map, iter->start, iter->end, 3033 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3034 /* 3035 * If locking the entry fails, remember the 3036 * error if it's the first one. Note we 3037 * still continue setting the protection in 3038 * the map, but it will return the resource 3039 * storage condition regardless. 3040 * 3041 * XXX Ignore what the actual error is, 3042 * XXX just call it a resource shortage 3043 * XXX so that it doesn't get confused 3044 * XXX what uvm_map_protect() itself would 3045 * XXX normally return. 3046 */ 3047 error = ENOMEM; 3048 } 3049 } 3050 } 3051 pmap_update(map->pmap); 3052 3053 out: 3054 vm_map_unlock(map); 3055 return error; 3056 } 3057 3058 /* 3059 * uvmspace_alloc: allocate a vmspace structure. 3060 * 3061 * - structure includes vm_map and pmap 3062 * - XXX: no locking on this structure 3063 * - refcnt set to 1, rest must be init'd by caller 3064 */ 3065 struct vmspace * 3066 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3067 boolean_t remove_holes) 3068 { 3069 struct vmspace *vm; 3070 3071 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3072 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3073 return (vm); 3074 } 3075 3076 /* 3077 * uvmspace_init: initialize a vmspace structure. 3078 * 3079 * - XXX: no locking on this structure 3080 * - refcnt set to 1, rest must be init'd by caller 3081 */ 3082 void 3083 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3084 boolean_t pageable, boolean_t remove_holes) 3085 { 3086 if (pmap) 3087 pmap_reference(pmap); 3088 else 3089 pmap = pmap_create(); 3090 vm->vm_map.pmap = pmap; 3091 3092 uvm_map_setup(&vm->vm_map, min, max, 3093 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3094 3095 vm->vm_refcnt = 1; 3096 3097 if (remove_holes) 3098 pmap_remove_holes(&vm->vm_map); 3099 } 3100 3101 /* 3102 * uvmspace_share: share a vmspace between two processes 3103 * 3104 * - XXX: no locking on vmspace 3105 * - used for vfork and threads 3106 */ 3107 3108 void 3109 uvmspace_share(p1, p2) 3110 struct proc *p1, *p2; 3111 { 3112 p2->p_vmspace = p1->p_vmspace; 3113 p1->p_vmspace->vm_refcnt++; 3114 } 3115 3116 /* 3117 * uvmspace_exec: the process wants to exec a new program 3118 * 3119 * - XXX: no locking on vmspace 3120 */ 3121 3122 void 3123 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3124 { 3125 struct vmspace *nvm, *ovm = p->p_vmspace; 3126 struct vm_map *map = &ovm->vm_map; 3127 struct uvm_map_deadq dead_entries; 3128 3129 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3130 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3131 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3132 3133 pmap_unuse_final(p); /* before stack addresses go away */ 3134 TAILQ_INIT(&dead_entries); 3135 3136 /* 3137 * see if more than one process is using this vmspace... 3138 */ 3139 3140 if (ovm->vm_refcnt == 1) { 3141 /* 3142 * if p is the only process using its vmspace then we can safely 3143 * recycle that vmspace for the program that is being exec'd. 3144 */ 3145 3146 #ifdef SYSVSHM 3147 /* 3148 * SYSV SHM semantics require us to kill all segments on an exec 3149 */ 3150 if (ovm->vm_shm) 3151 shmexit(ovm); 3152 #endif 3153 3154 /* 3155 * POSIX 1003.1b -- "lock future mappings" is revoked 3156 * when a process execs another program image. 3157 */ 3158 vm_map_lock(map); 3159 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3160 3161 /* 3162 * now unmap the old program 3163 * 3164 * Instead of attempting to keep the map valid, we simply 3165 * nuke all entries and ask uvm_map_setup to reinitialize 3166 * the map to the new boundaries. 3167 * 3168 * uvm_unmap_remove will actually nuke all entries for us 3169 * (as in, not replace them with free-memory entries). 3170 */ 3171 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3172 &dead_entries, TRUE, FALSE); 3173 3174 KDASSERT(RB_EMPTY(&map->addr)); 3175 3176 /* 3177 * Nuke statistics and boundaries. 3178 */ 3179 bzero(&ovm->vm_startcopy, 3180 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3181 3182 3183 if (end & (vaddr_t)PAGE_MASK) { 3184 end += 1; 3185 if (end == 0) /* overflow */ 3186 end -= PAGE_SIZE; 3187 } 3188 3189 /* 3190 * Setup new boundaries and populate map with entries. 3191 */ 3192 map->min_offset = start; 3193 map->max_offset = end; 3194 uvm_map_setup_entries(map); 3195 vm_map_unlock(map); 3196 3197 /* 3198 * but keep MMU holes unavailable 3199 */ 3200 pmap_remove_holes(map); 3201 3202 } else { 3203 3204 /* 3205 * p's vmspace is being shared, so we can't reuse it for p since 3206 * it is still being used for others. allocate a new vmspace 3207 * for p 3208 */ 3209 nvm = uvmspace_alloc(start, end, 3210 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3211 3212 /* 3213 * install new vmspace and drop our ref to the old one. 3214 */ 3215 3216 pmap_deactivate(p); 3217 p->p_vmspace = nvm; 3218 pmap_activate(p); 3219 3220 uvmspace_free(ovm); 3221 } 3222 3223 /* 3224 * Release dead entries 3225 */ 3226 uvm_unmap_detach(&dead_entries, 0); 3227 } 3228 3229 /* 3230 * uvmspace_free: free a vmspace data structure 3231 * 3232 * - XXX: no locking on vmspace 3233 */ 3234 3235 void 3236 uvmspace_free(struct vmspace *vm) 3237 { 3238 if (--vm->vm_refcnt == 0) { 3239 /* 3240 * lock the map, to wait out all other references to it. delete 3241 * all of the mappings and pages they hold, then call the pmap 3242 * module to reclaim anything left. 3243 */ 3244 #ifdef SYSVSHM 3245 /* Get rid of any SYSV shared memory segments. */ 3246 if (vm->vm_shm != NULL) 3247 shmexit(vm); 3248 #endif 3249 3250 uvm_map_teardown(&vm->vm_map); 3251 pool_put(&uvm_vmspace_pool, vm); 3252 } 3253 } 3254 3255 /* 3256 * Clone map entry into other map. 3257 * 3258 * Mapping will be placed at dstaddr, for the same length. 3259 * Space must be available. 3260 * Reference counters are incremented. 3261 */ 3262 struct vm_map_entry* 3263 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3264 vsize_t off, struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3265 int mapent_flags, int amap_share_flags) 3266 { 3267 struct vm_map_entry *new_entry, *first, *last; 3268 3269 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3270 3271 /* 3272 * Create new entry (linked in on creation). 3273 * Fill in first, last. 3274 */ 3275 first = last = NULL; 3276 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3277 panic("uvmspace_fork: no space in map for " 3278 "entry in empty map"); 3279 } 3280 new_entry = uvm_map_mkentry(dstmap, first, last, 3281 dstaddr, dstlen, mapent_flags, dead); 3282 if (new_entry == NULL) 3283 return NULL; 3284 /* old_entry -> new_entry */ 3285 new_entry->object = old_entry->object; 3286 new_entry->offset = old_entry->offset; 3287 new_entry->aref = old_entry->aref; 3288 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3289 new_entry->protection = old_entry->protection; 3290 new_entry->max_protection = old_entry->max_protection; 3291 new_entry->inheritance = old_entry->inheritance; 3292 new_entry->advice = old_entry->advice; 3293 3294 /* 3295 * gain reference to object backing the map (can't 3296 * be a submap). 3297 */ 3298 if (new_entry->aref.ar_amap) { 3299 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3300 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3301 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3302 amap_share_flags); 3303 } 3304 3305 if (UVM_ET_ISOBJ(new_entry) && 3306 new_entry->object.uvm_obj->pgops->pgo_reference) { 3307 new_entry->offset += off; 3308 new_entry->object.uvm_obj->pgops->pgo_reference 3309 (new_entry->object.uvm_obj); 3310 } 3311 3312 return new_entry; 3313 } 3314 3315 /* 3316 * share the mapping: this means we want the old and 3317 * new entries to share amaps and backing objects. 3318 */ 3319 void 3320 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3321 struct vm_map *old_map, 3322 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3323 { 3324 struct vm_map_entry *new_entry; 3325 3326 /* 3327 * if the old_entry needs a new amap (due to prev fork) 3328 * then we need to allocate it now so that we have 3329 * something we own to share with the new_entry. [in 3330 * other words, we need to clear needs_copy] 3331 */ 3332 3333 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3334 /* get our own amap, clears needs_copy */ 3335 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 3336 0, 0); 3337 /* XXXCDC: WAITOK??? */ 3338 } 3339 3340 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3341 old_entry->end - old_entry->start, 0, old_entry, 3342 dead, 0, AMAP_SHARED); 3343 3344 /* 3345 * pmap_copy the mappings: this routine is optional 3346 * but if it is there it will reduce the number of 3347 * page faults in the new proc. 3348 */ 3349 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, 3350 (new_entry->end - new_entry->start), new_entry->start); 3351 3352 /* 3353 * Update process statistics. 3354 */ 3355 if (!UVM_ET_ISHOLE(new_entry)) 3356 new_map->size += new_entry->end - new_entry->start; 3357 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { 3358 new_vm->vm_dused += 3359 uvmspace_dused(new_map, new_entry->start, new_entry->end); 3360 } 3361 } 3362 3363 /* 3364 * copy-on-write the mapping (using mmap's 3365 * MAP_PRIVATE semantics) 3366 * 3367 * allocate new_entry, adjust reference counts. 3368 * (note that new references are read-only). 3369 */ 3370 void 3371 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3372 struct vm_map *old_map, 3373 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3374 { 3375 struct vm_map_entry *new_entry; 3376 boolean_t protect_child; 3377 3378 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3379 old_entry->end - old_entry->start, 0, old_entry, 3380 dead, 0, 0); 3381 3382 new_entry->etype |= 3383 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3384 3385 /* 3386 * the new entry will need an amap. it will either 3387 * need to be copied from the old entry or created 3388 * from scratch (if the old entry does not have an 3389 * amap). can we defer this process until later 3390 * (by setting "needs_copy") or do we need to copy 3391 * the amap now? 3392 * 3393 * we must copy the amap now if any of the following 3394 * conditions hold: 3395 * 1. the old entry has an amap and that amap is 3396 * being shared. this means that the old (parent) 3397 * process is sharing the amap with another 3398 * process. if we do not clear needs_copy here 3399 * we will end up in a situation where both the 3400 * parent and child process are referring to the 3401 * same amap with "needs_copy" set. if the 3402 * parent write-faults, the fault routine will 3403 * clear "needs_copy" in the parent by allocating 3404 * a new amap. this is wrong because the 3405 * parent is supposed to be sharing the old amap 3406 * and the new amap will break that. 3407 * 3408 * 2. if the old entry has an amap and a non-zero 3409 * wire count then we are going to have to call 3410 * amap_cow_now to avoid page faults in the 3411 * parent process. since amap_cow_now requires 3412 * "needs_copy" to be clear we might as well 3413 * clear it here as well. 3414 * 3415 */ 3416 3417 if (old_entry->aref.ar_amap != NULL && 3418 ((amap_flags(old_entry->aref.ar_amap) & 3419 AMAP_SHARED) != 0 || 3420 VM_MAPENT_ISWIRED(old_entry))) { 3421 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3422 0, 0); 3423 /* XXXCDC: M_WAITOK ... ok? */ 3424 } 3425 3426 /* 3427 * if the parent's entry is wired down, then the 3428 * parent process does not want page faults on 3429 * access to that memory. this means that we 3430 * cannot do copy-on-write because we can't write 3431 * protect the old entry. in this case we 3432 * resolve all copy-on-write faults now, using 3433 * amap_cow_now. note that we have already 3434 * allocated any needed amap (above). 3435 */ 3436 3437 if (VM_MAPENT_ISWIRED(old_entry)) { 3438 3439 /* 3440 * resolve all copy-on-write faults now 3441 * (note that there is nothing to do if 3442 * the old mapping does not have an amap). 3443 * XXX: is it worthwhile to bother with 3444 * pmap_copy in this case? 3445 */ 3446 if (old_entry->aref.ar_amap) 3447 amap_cow_now(new_map, new_entry); 3448 3449 } else { 3450 if (old_entry->aref.ar_amap) { 3451 3452 /* 3453 * setup mappings to trigger copy-on-write faults 3454 * we must write-protect the parent if it has 3455 * an amap and it is not already "needs_copy"... 3456 * if it is already "needs_copy" then the parent 3457 * has already been write-protected by a previous 3458 * fork operation. 3459 * 3460 * if we do not write-protect the parent, then 3461 * we must be sure to write-protect the child 3462 * after the pmap_copy() operation. 3463 * 3464 * XXX: pmap_copy should have some way of telling 3465 * us that it didn't do anything so we can avoid 3466 * calling pmap_protect needlessly. 3467 */ 3468 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3469 if (old_entry->max_protection & 3470 VM_PROT_WRITE) { 3471 pmap_protect(old_map->pmap, 3472 old_entry->start, 3473 old_entry->end, 3474 old_entry->protection & 3475 ~VM_PROT_WRITE); 3476 pmap_update(old_map->pmap); 3477 } 3478 old_entry->etype |= UVM_ET_NEEDSCOPY; 3479 } 3480 3481 /* 3482 * parent must now be write-protected 3483 */ 3484 protect_child = FALSE; 3485 } else { 3486 3487 /* 3488 * we only need to protect the child if the 3489 * parent has write access. 3490 */ 3491 if (old_entry->max_protection & VM_PROT_WRITE) 3492 protect_child = TRUE; 3493 else 3494 protect_child = FALSE; 3495 3496 } 3497 3498 /* 3499 * copy the mappings 3500 * XXX: need a way to tell if this does anything 3501 */ 3502 3503 pmap_copy(new_map->pmap, old_map->pmap, 3504 new_entry->start, 3505 (old_entry->end - old_entry->start), 3506 old_entry->start); 3507 3508 /* 3509 * protect the child's mappings if necessary 3510 */ 3511 if (protect_child) { 3512 pmap_protect(new_map->pmap, new_entry->start, 3513 new_entry->end, 3514 new_entry->protection & 3515 ~VM_PROT_WRITE); 3516 } 3517 } 3518 3519 /* 3520 * Update process statistics. 3521 */ 3522 if (!UVM_ET_ISHOLE(new_entry)) 3523 new_map->size += new_entry->end - new_entry->start; 3524 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { 3525 new_vm->vm_dused += 3526 uvmspace_dused(new_map, new_entry->start, new_entry->end); 3527 } 3528 } 3529 3530 /* 3531 * uvmspace_fork: fork a process' main map 3532 * 3533 * => create a new vmspace for child process from parent. 3534 * => parent's map must not be locked. 3535 */ 3536 struct vmspace * 3537 uvmspace_fork(struct vmspace *vm1) 3538 { 3539 struct vmspace *vm2; 3540 struct vm_map *old_map = &vm1->vm_map; 3541 struct vm_map *new_map; 3542 struct vm_map_entry *old_entry; 3543 struct uvm_map_deadq dead; 3544 3545 vm_map_lock(old_map); 3546 3547 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3548 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3549 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3550 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3551 vm2->vm_dused = 0; /* Statistic managed by us. */ 3552 new_map = &vm2->vm_map; 3553 vm_map_lock(new_map); 3554 3555 /* 3556 * go entry-by-entry 3557 */ 3558 3559 TAILQ_INIT(&dead); 3560 RB_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 3561 if (old_entry->start == old_entry->end) 3562 continue; 3563 3564 /* 3565 * first, some sanity checks on the old entry 3566 */ 3567 if (UVM_ET_ISSUBMAP(old_entry)) { 3568 panic("fork: encountered a submap during fork " 3569 "(illegal)"); 3570 } 3571 3572 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 3573 UVM_ET_ISNEEDSCOPY(old_entry)) { 3574 panic("fork: non-copy_on_write map entry marked " 3575 "needs_copy (illegal)"); 3576 } 3577 3578 /* 3579 * Apply inheritance. 3580 */ 3581 if (old_entry->inheritance == MAP_INHERIT_SHARE) { 3582 uvm_mapent_forkshared(vm2, new_map, 3583 old_map, old_entry, &dead); 3584 } 3585 if (old_entry->inheritance == MAP_INHERIT_COPY) { 3586 uvm_mapent_forkcopy(vm2, new_map, 3587 old_map, old_entry, &dead); 3588 } 3589 } 3590 3591 vm_map_unlock(old_map); 3592 vm_map_unlock(new_map); 3593 3594 /* 3595 * This can actually happen, if multiple entries described a 3596 * space in which an entry was inherited. 3597 */ 3598 uvm_unmap_detach(&dead, 0); 3599 3600 #ifdef SYSVSHM 3601 if (vm1->vm_shm) 3602 shmfork(vm1, vm2); 3603 #endif 3604 3605 #ifdef PMAP_FORK 3606 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); 3607 #endif 3608 3609 return vm2; 3610 } 3611 3612 /* 3613 * uvm_map_hint: return the beginning of the best area suitable for 3614 * creating a new mapping with "prot" protection. 3615 */ 3616 vaddr_t 3617 uvm_map_hint(struct vmspace *vm, vm_prot_t prot) 3618 { 3619 vaddr_t addr; 3620 3621 #ifdef __i386__ 3622 /* 3623 * If executable skip first two pages, otherwise start 3624 * after data + heap region. 3625 */ 3626 if ((prot & VM_PROT_EXECUTE) != 0 && 3627 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 3628 addr = (PAGE_SIZE*2) + 3629 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 3630 return (round_page(addr)); 3631 } 3632 #endif 3633 /* start malloc/mmap after the brk */ 3634 addr = (vaddr_t)vm->vm_daddr + BRKSIZ; 3635 #if !defined(__vax__) 3636 addr += arc4random() & (MIN((256 * 1024 * 1024), BRKSIZ) - 1); 3637 #endif 3638 return (round_page(addr)); 3639 } 3640 3641 /* 3642 * uvm_map_submap: punch down part of a map into a submap 3643 * 3644 * => only the kernel_map is allowed to be submapped 3645 * => the purpose of submapping is to break up the locking granularity 3646 * of a larger map 3647 * => the range specified must have been mapped previously with a uvm_map() 3648 * call [with uobj==NULL] to create a blank map entry in the main map. 3649 * [And it had better still be blank!] 3650 * => maps which contain submaps should never be copied or forked. 3651 * => to remove a submap, use uvm_unmap() on the main map 3652 * and then uvm_map_deallocate() the submap. 3653 * => main map must be unlocked. 3654 * => submap must have been init'd and have a zero reference count. 3655 * [need not be locked as we don't actually reference it] 3656 */ 3657 int 3658 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 3659 struct vm_map *submap) 3660 { 3661 struct vm_map_entry *entry; 3662 int result; 3663 3664 if (start > map->max_offset || end > map->max_offset || 3665 start < map->min_offset || end < map->min_offset) 3666 return EINVAL; 3667 3668 vm_map_lock(map); 3669 3670 if (uvm_map_lookup_entry(map, start, &entry)) { 3671 UVM_MAP_CLIP_START(map, entry, start); 3672 UVM_MAP_CLIP_END(map, entry, end); 3673 } else 3674 entry = NULL; 3675 3676 if (entry != NULL && 3677 entry->start == start && entry->end == end && 3678 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 3679 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 3680 entry->etype |= UVM_ET_SUBMAP; 3681 entry->object.sub_map = submap; 3682 entry->offset = 0; 3683 uvm_map_reference(submap); 3684 result = 0; 3685 } else 3686 result = EINVAL; 3687 3688 vm_map_unlock(map); 3689 return(result); 3690 } 3691 3692 /* 3693 * uvm_map_checkprot: check protection in map 3694 * 3695 * => must allow specific protection in a fully allocated region. 3696 * => map mut be read or write locked by caller. 3697 */ 3698 boolean_t 3699 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3700 vm_prot_t protection) 3701 { 3702 struct vm_map_entry *entry; 3703 3704 if (start < map->min_offset || end > map->max_offset || start > end) 3705 return FALSE; 3706 if (start == end) 3707 return TRUE; 3708 3709 /* 3710 * Iterate entries. 3711 */ 3712 for (entry = uvm_map_entrybyaddr(&map->addr, start); 3713 entry != NULL && entry->start < end; 3714 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 3715 /* 3716 * Fail if a hole is found. 3717 */ 3718 if (UVM_ET_ISHOLE(entry) || 3719 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 3720 return FALSE; 3721 3722 /* 3723 * Check protection. 3724 */ 3725 if ((entry->protection & protection) != protection) 3726 return FALSE; 3727 } 3728 return TRUE; 3729 } 3730 3731 /* 3732 * uvm_map_create: create map 3733 */ 3734 vm_map_t 3735 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 3736 { 3737 vm_map_t result; 3738 3739 result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK); 3740 result->pmap = pmap; 3741 uvm_map_setup(result, min, max, flags); 3742 return(result); 3743 } 3744 3745 /* 3746 * uvm_map_deallocate: drop reference to a map 3747 * 3748 * => caller must not lock map 3749 * => we will zap map if ref count goes to zero 3750 */ 3751 void 3752 uvm_map_deallocate(vm_map_t map) 3753 { 3754 int c; 3755 struct uvm_map_deadq dead; 3756 3757 simple_lock(&map->ref_lock); 3758 c = --map->ref_count; 3759 simple_unlock(&map->ref_lock); 3760 if (c > 0) { 3761 return; 3762 } 3763 3764 /* 3765 * all references gone. unmap and free. 3766 * 3767 * No lock required: we are only one to access this map. 3768 */ 3769 3770 TAILQ_INIT(&dead); 3771 uvm_tree_sanity(map, __FILE__, __LINE__); 3772 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 3773 TRUE, FALSE); 3774 pmap_destroy(map->pmap); 3775 KASSERT(RB_EMPTY(&map->addr)); 3776 free(map, M_VMMAP); 3777 3778 uvm_unmap_detach(&dead, 0); 3779 } 3780 3781 /* 3782 * uvm_map_inherit: set inheritance code for range of addrs in map. 3783 * 3784 * => map must be unlocked 3785 * => note that the inherit code is used during a "fork". see fork 3786 * code for details. 3787 */ 3788 int 3789 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 3790 vm_inherit_t new_inheritance) 3791 { 3792 struct vm_map_entry *entry; 3793 3794 switch (new_inheritance) { 3795 case MAP_INHERIT_NONE: 3796 case MAP_INHERIT_COPY: 3797 case MAP_INHERIT_SHARE: 3798 break; 3799 default: 3800 return (EINVAL); 3801 } 3802 3803 if (start > end) 3804 return EINVAL; 3805 start = MAX(start, map->min_offset); 3806 end = MIN(end, map->max_offset); 3807 if (start >= end) 3808 return 0; 3809 3810 vm_map_lock(map); 3811 3812 entry = uvm_map_entrybyaddr(&map->addr, start); 3813 if (entry->end > start) 3814 UVM_MAP_CLIP_START(map, entry, start); 3815 else 3816 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 3817 3818 while (entry != NULL && entry->start < end) { 3819 UVM_MAP_CLIP_END(map, entry, end); 3820 entry->inheritance = new_inheritance; 3821 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 3822 } 3823 3824 vm_map_unlock(map); 3825 return (0); 3826 } 3827 3828 /* 3829 * uvm_map_advice: set advice code for range of addrs in map. 3830 * 3831 * => map must be unlocked 3832 */ 3833 int 3834 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 3835 { 3836 struct vm_map_entry *entry; 3837 3838 switch (new_advice) { 3839 case MADV_NORMAL: 3840 case MADV_RANDOM: 3841 case MADV_SEQUENTIAL: 3842 break; 3843 default: 3844 return (EINVAL); 3845 } 3846 3847 if (start > end) 3848 return EINVAL; 3849 start = MAX(start, map->min_offset); 3850 end = MIN(end, map->max_offset); 3851 if (start >= end) 3852 return 0; 3853 3854 vm_map_lock(map); 3855 3856 entry = uvm_map_entrybyaddr(&map->addr, start); 3857 if (entry != NULL && entry->end > start) 3858 UVM_MAP_CLIP_START(map, entry, start); 3859 else if (entry!= NULL) 3860 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 3861 3862 /* 3863 * XXXJRT: disallow holes? 3864 */ 3865 3866 while (entry != NULL && entry->start < end) { 3867 UVM_MAP_CLIP_END(map, entry, end); 3868 entry->advice = new_advice; 3869 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 3870 } 3871 3872 vm_map_unlock(map); 3873 return (0); 3874 } 3875 3876 /* 3877 * uvm_map_extract: extract a mapping from a map and put it somewhere 3878 * in the kernel_map, setting protection to max_prot. 3879 * 3880 * => map should be unlocked (we will write lock it and kernel_map) 3881 * => returns 0 on success, error code otherwise 3882 * => start must be page aligned 3883 * => len must be page sized 3884 * => flags: 3885 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 3886 * Mappings are QREF's. 3887 */ 3888 int 3889 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 3890 vaddr_t *dstaddrp, int flags) 3891 { 3892 struct uvm_map_deadq dead; 3893 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 3894 vaddr_t dstaddr; 3895 vaddr_t end; 3896 vaddr_t cp_start; 3897 vsize_t cp_len, cp_off; 3898 int error; 3899 3900 TAILQ_INIT(&dead); 3901 end = start + len; 3902 3903 /* 3904 * Sanity check on the parameters. 3905 * Also, since the mapping may not contain gaps, error out if the 3906 * mapped area is not in source map. 3907 */ 3908 3909 if ((start & (vaddr_t)PAGE_MASK) != 0 || 3910 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 3911 return EINVAL; 3912 if (start < srcmap->min_offset || end > srcmap->max_offset) 3913 return EINVAL; 3914 3915 /* 3916 * Initialize dead entries. 3917 * Handle len == 0 case. 3918 */ 3919 3920 if (len == 0) 3921 return 0; 3922 3923 /* 3924 * Acquire lock on srcmap. 3925 */ 3926 vm_map_lock(srcmap); 3927 3928 /* 3929 * Lock srcmap, lookup first and last entry in <start,len>. 3930 */ 3931 first = uvm_map_entrybyaddr(&srcmap->addr, start); 3932 3933 /* 3934 * Check that the range is contiguous. 3935 */ 3936 for (entry = first; entry != NULL && entry->end < end; 3937 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 3938 if (VMMAP_FREE_END(entry) != entry->end || 3939 UVM_ET_ISHOLE(entry)) { 3940 error = EINVAL; 3941 goto fail; 3942 } 3943 } 3944 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 3945 error = EINVAL; 3946 goto fail; 3947 } 3948 3949 /* 3950 * Handle need-copy flag. 3951 * This may invalidate last, hence the re-initialization during the 3952 * loop. 3953 * 3954 * Also, perform clipping of last if not UVM_EXTRACT_QREF. 3955 */ 3956 for (entry = first; entry != NULL && entry->start < end; 3957 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 3958 if (UVM_ET_ISNEEDSCOPY(entry)) 3959 amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); 3960 if (UVM_ET_ISNEEDSCOPY(entry)) { 3961 /* 3962 * amap_copy failure 3963 */ 3964 error = ENOMEM; 3965 goto fail; 3966 } 3967 } 3968 3969 /* 3970 * Lock destination map (kernel_map). 3971 */ 3972 vm_map_lock(kernel_map); 3973 3974 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 3975 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 3976 VM_PROT_NONE, 0) != 0) { 3977 error = ENOMEM; 3978 goto fail2; 3979 } 3980 *dstaddrp = dstaddr; 3981 3982 /* 3983 * We now have srcmap and kernel_map locked. 3984 * dstaddr contains the destination offset in dstmap. 3985 */ 3986 3987 /* 3988 * step 1: start looping through map entries, performing extraction. 3989 */ 3990 for (entry = first; entry != NULL && entry->start < end; 3991 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 3992 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 3993 if (UVM_ET_ISHOLE(entry)) 3994 continue; 3995 3996 /* 3997 * Calculate uvm_mapent_clone parameters. 3998 */ 3999 cp_start = entry->start; 4000 if (cp_start < start) { 4001 cp_off = start - cp_start; 4002 cp_start = start; 4003 } else 4004 cp_off = 0; 4005 cp_len = MIN(entry->end, end) - cp_start; 4006 4007 newentry = uvm_mapent_clone(kernel_map, 4008 cp_start - start + dstaddr, cp_len, cp_off, 4009 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4010 if (newentry == NULL) { 4011 error = ENOMEM; 4012 goto fail2_unmap; 4013 } 4014 kernel_map->size += cp_len; 4015 if (flags & UVM_EXTRACT_FIXPROT) 4016 newentry->protection = newentry->max_protection; 4017 4018 /* 4019 * Step 2: perform pmap copy. 4020 * (Doing this in the loop saves one RB traversal.) 4021 */ 4022 pmap_copy(kernel_map->pmap, srcmap->pmap, 4023 cp_start - start + dstaddr, cp_len, cp_start); 4024 } 4025 pmap_update(kernel_map->pmap); 4026 4027 error = 0; 4028 4029 /* 4030 * Unmap copied entries on failure. 4031 */ 4032 fail2_unmap: 4033 if (error) { 4034 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4035 FALSE, TRUE); 4036 } 4037 4038 /* 4039 * Release maps, release dead entries. 4040 */ 4041 fail2: 4042 vm_map_unlock(kernel_map); 4043 4044 fail: 4045 vm_map_unlock(srcmap); 4046 4047 uvm_unmap_detach(&dead, 0); 4048 4049 return error; 4050 } 4051 4052 /* 4053 * uvm_map_clean: clean out a map range 4054 * 4055 * => valid flags: 4056 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4057 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4058 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4059 * if (flags & PGO_FREE): any cached pages are freed after clean 4060 * => returns an error if any part of the specified range isn't mapped 4061 * => never a need to flush amap layer since the anonymous memory has 4062 * no permanent home, but may deactivate pages there 4063 * => called from sys_msync() and sys_madvise() 4064 * => caller must not write-lock map (read OK). 4065 * => we may sleep while cleaning if SYNCIO [with map read-locked] 4066 */ 4067 4068 int amap_clean_works = 1; /* XXX for now, just in case... */ 4069 4070 int 4071 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4072 { 4073 struct vm_map_entry *first, *entry; 4074 struct vm_amap *amap; 4075 struct vm_anon *anon; 4076 struct vm_page *pg; 4077 struct uvm_object *uobj; 4078 vaddr_t cp_start, cp_end; 4079 int refs; 4080 int error; 4081 boolean_t rv; 4082 4083 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4084 (PGO_FREE|PGO_DEACTIVATE)); 4085 4086 if (start > end || start < map->min_offset || end > map->max_offset) 4087 return EINVAL; 4088 4089 vm_map_lock_read(map); 4090 first = uvm_map_entrybyaddr(&map->addr, start); 4091 4092 /* 4093 * Make a first pass to check for holes. 4094 */ 4095 for (entry = first; entry->start < end; 4096 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 4097 if (UVM_ET_ISSUBMAP(entry)) { 4098 vm_map_unlock_read(map); 4099 return EINVAL; 4100 } 4101 if (UVM_ET_ISSUBMAP(entry) || 4102 UVM_ET_ISHOLE(entry) || 4103 (entry->end < end && 4104 VMMAP_FREE_END(entry) != entry->end)) { 4105 vm_map_unlock_read(map); 4106 return EFAULT; 4107 } 4108 } 4109 4110 error = 0; 4111 for (entry = first; entry != NULL && entry->start < end; 4112 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 4113 amap = entry->aref.ar_amap; /* top layer */ 4114 if (UVM_ET_ISOBJ(entry)) 4115 uobj = entry->object.uvm_obj; 4116 else 4117 uobj = NULL; 4118 4119 /* 4120 * No amap cleaning necessary if: 4121 * - there's no amap 4122 * - we're not deactivating or freeing pages. 4123 */ 4124 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4125 goto flush_object; 4126 if (!amap_clean_works) 4127 goto flush_object; 4128 4129 cp_start = MAX(entry->start, start); 4130 cp_end = MIN(entry->end, end); 4131 4132 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4133 anon = amap_lookup(&entry->aref, 4134 cp_start - entry->start); 4135 if (anon == NULL) 4136 continue; 4137 4138 simple_lock(&anon->an_lock); /* XXX */ 4139 4140 pg = anon->an_page; 4141 if (pg == NULL) { 4142 simple_unlock(&anon->an_lock); 4143 continue; 4144 } 4145 4146 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4147 /* 4148 * XXX In these first 3 cases, we always just 4149 * XXX deactivate the page. We may want to 4150 * XXX handle the different cases more 4151 * XXX specifically, in the future. 4152 */ 4153 case PGO_CLEANIT|PGO_FREE: 4154 case PGO_CLEANIT|PGO_DEACTIVATE: 4155 case PGO_DEACTIVATE: 4156 deactivate_it: 4157 /* skip the page if it's loaned or wired */ 4158 if (pg->loan_count != 0 || 4159 pg->wire_count != 0) { 4160 simple_unlock(&anon->an_lock); 4161 break; 4162 } 4163 4164 uvm_lock_pageq(); 4165 4166 /* 4167 * skip the page if it's not actually owned 4168 * by the anon (may simply be loaned to the 4169 * anon). 4170 */ 4171 if ((pg->pg_flags & PQ_ANON) == 0) { 4172 KASSERT(pg->uobject == NULL); 4173 uvm_unlock_pageq(); 4174 simple_unlock(&anon->an_lock); 4175 break; 4176 } 4177 KASSERT(pg->uanon == anon); 4178 4179 /* zap all mappings for the page. */ 4180 pmap_page_protect(pg, VM_PROT_NONE); 4181 4182 /* ...and deactivate the page. */ 4183 uvm_pagedeactivate(pg); 4184 4185 uvm_unlock_pageq(); 4186 simple_unlock(&anon->an_lock); 4187 break; 4188 4189 case PGO_FREE: 4190 4191 /* 4192 * If there are mutliple references to 4193 * the amap, just deactivate the page. 4194 */ 4195 if (amap_refs(amap) > 1) 4196 goto deactivate_it; 4197 4198 /* XXX skip the page if it's wired */ 4199 if (pg->wire_count != 0) { 4200 simple_unlock(&anon->an_lock); 4201 break; 4202 } 4203 amap_unadd(&entry->aref, 4204 cp_start - entry->start); 4205 refs = --anon->an_ref; 4206 simple_unlock(&anon->an_lock); 4207 if (refs == 0) 4208 uvm_anfree(anon); 4209 break; 4210 4211 default: 4212 panic("uvm_map_clean: weird flags"); 4213 } 4214 } 4215 4216 flush_object: 4217 cp_start = MAX(entry->start, start); 4218 cp_end = MIN(entry->end, end); 4219 4220 /* 4221 * flush pages if we've got a valid backing object. 4222 * 4223 * Don't PGO_FREE if we don't have write permission 4224 * and don't flush if this is a copy-on-write object 4225 * since we can't know our permissions on it. 4226 */ 4227 if (uobj != NULL && 4228 ((flags & PGO_FREE) == 0 || 4229 ((entry->max_protection & VM_PROT_WRITE) != 0 && 4230 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4231 simple_lock(&uobj->vmobjlock); 4232 rv = uobj->pgops->pgo_flush(uobj, 4233 cp_start - entry->start + entry->offset, 4234 cp_end - entry->start + entry->offset, flags); 4235 simple_unlock(&uobj->vmobjlock); 4236 4237 if (rv == FALSE) 4238 error = EFAULT; 4239 } 4240 } 4241 4242 vm_map_unlock_read(map); 4243 return error; 4244 } 4245 4246 /* 4247 * UVM_MAP_CLIP_END implementation 4248 */ 4249 void 4250 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4251 { 4252 struct vm_map_entry *tmp; 4253 4254 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4255 tmp = uvm_mapent_alloc(map, 0); 4256 4257 /* 4258 * Invoke splitentry. 4259 */ 4260 uvm_map_splitentry(map, entry, tmp, addr); 4261 } 4262 4263 /* 4264 * UVM_MAP_CLIP_START implementation 4265 * 4266 * Clippers are required to not change the pointers to the entry they are 4267 * clipping on. 4268 * Since uvm_map_splitentry turns the original entry into the lowest 4269 * entry (address wise) we do a swap between the new entry and the original 4270 * entry, prior to calling uvm_map_splitentry. 4271 */ 4272 void 4273 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4274 { 4275 struct vm_map_entry *tmp; 4276 struct uvm_addr_state *free; 4277 4278 /* Unlink original. */ 4279 free = uvm_map_uaddr_e(map, entry); 4280 uvm_mapent_free_remove(map, free, entry); 4281 uvm_mapent_addr_remove(map, entry); 4282 4283 /* Copy entry. */ 4284 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4285 tmp = uvm_mapent_alloc(map, 0); 4286 uvm_mapent_copy(entry, tmp); 4287 4288 /* Put new entry in place of original entry. */ 4289 uvm_mapent_addr_insert(map, tmp); 4290 uvm_mapent_free_insert(map, free, tmp); 4291 4292 /* Invoke splitentry. */ 4293 uvm_map_splitentry(map, tmp, entry, addr); 4294 } 4295 4296 /* 4297 * Boundary fixer. 4298 */ 4299 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4300 static __inline vaddr_t 4301 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4302 { 4303 return (min < bound && max > bound) ? bound : max; 4304 } 4305 4306 /* 4307 * Choose free list based on address at start of free space. 4308 * 4309 * The uvm_addr_state returned contains addr and is the first of: 4310 * - uaddr_exe 4311 * - uaddr_brk_stack 4312 * - uaddr_any 4313 */ 4314 struct uvm_addr_state* 4315 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4316 { 4317 struct uvm_addr_state *uaddr; 4318 int i; 4319 4320 /* Special case the first page, to prevent mmap from returning 0. */ 4321 if (addr < VMMAP_MIN_ADDR) 4322 return NULL; 4323 4324 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4325 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4326 if (addr >= uvm_maxkaddr) 4327 return NULL; 4328 } 4329 4330 /* Is the address inside the exe-only map? */ 4331 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4332 addr < map->uaddr_exe->uaddr_maxaddr) 4333 return map->uaddr_exe; 4334 4335 /* Check if the space falls inside brk/stack area. */ 4336 if ((addr >= map->b_start && addr < map->b_end) || 4337 (addr >= map->s_start && addr < map->s_end)) { 4338 if (map->uaddr_brk_stack != NULL && 4339 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4340 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4341 return map->uaddr_brk_stack; 4342 } else 4343 return NULL; 4344 } 4345 4346 /* 4347 * Check the other selectors. 4348 * 4349 * These selectors are only marked as the owner, if they have insert 4350 * functions. 4351 */ 4352 for (i = 0; i < nitems(map->uaddr_any); i++) { 4353 uaddr = map->uaddr_any[i]; 4354 if (uaddr == NULL) 4355 continue; 4356 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4357 continue; 4358 4359 if (addr >= uaddr->uaddr_minaddr && 4360 addr < uaddr->uaddr_maxaddr) 4361 return uaddr; 4362 } 4363 4364 return NULL; 4365 } 4366 4367 /* 4368 * Choose free list based on address at start of free space. 4369 * 4370 * The uvm_addr_state returned contains addr and is the first of: 4371 * - uaddr_exe 4372 * - uaddr_brk_stack 4373 * - uaddr_any 4374 */ 4375 struct uvm_addr_state* 4376 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4377 { 4378 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4379 } 4380 4381 /* 4382 * Returns the first free-memory boundary that is crossed by [min-max]. 4383 */ 4384 vsize_t 4385 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4386 { 4387 struct uvm_addr_state *uaddr; 4388 int i; 4389 4390 /* Never return first page. */ 4391 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4392 4393 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4394 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4395 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4396 4397 /* Check for exe-only boundaries. */ 4398 if (map->uaddr_exe != NULL) { 4399 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4400 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4401 } 4402 4403 /* Check for exe-only boundaries. */ 4404 if (map->uaddr_brk_stack != NULL) { 4405 max = uvm_map_boundfix(min, max, 4406 map->uaddr_brk_stack->uaddr_minaddr); 4407 max = uvm_map_boundfix(min, max, 4408 map->uaddr_brk_stack->uaddr_maxaddr); 4409 } 4410 4411 /* Check other boundaries. */ 4412 for (i = 0; i < nitems(map->uaddr_any); i++) { 4413 uaddr = map->uaddr_any[i]; 4414 if (uaddr != NULL) { 4415 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4416 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4417 } 4418 } 4419 4420 /* Boundaries at stack and brk() area. */ 4421 max = uvm_map_boundfix(min, max, map->s_start); 4422 max = uvm_map_boundfix(min, max, map->s_end); 4423 max = uvm_map_boundfix(min, max, map->b_start); 4424 max = uvm_map_boundfix(min, max, map->b_end); 4425 4426 return max; 4427 } 4428 4429 /* 4430 * Update map allocation start and end addresses from proc vmspace. 4431 */ 4432 void 4433 uvm_map_vmspace_update(struct vm_map *map, 4434 struct uvm_map_deadq *dead, int flags) 4435 { 4436 struct vmspace *vm; 4437 vaddr_t b_start, b_end, s_start, s_end; 4438 4439 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4440 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4441 4442 /* 4443 * Derive actual allocation boundaries from vmspace. 4444 */ 4445 vm = (struct vmspace *)map; 4446 b_start = (vaddr_t)vm->vm_daddr; 4447 b_end = b_start + BRKSIZ; 4448 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4449 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4450 #ifdef DIAGNOSTIC 4451 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4452 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4453 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4454 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4455 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4456 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4457 vm, b_start, b_end, s_start, s_end); 4458 } 4459 #endif 4460 4461 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4462 map->s_start == s_start && map->s_end == s_end)) 4463 return; 4464 4465 uvm_map_freelist_update(map, dead, b_start, b_end, 4466 s_start, s_end, flags); 4467 } 4468 4469 /* 4470 * Grow kernel memory. 4471 * 4472 * This function is only called for kernel maps when an allocation fails. 4473 * 4474 * If the map has a gap that is large enough to accomodate alloc_sz, this 4475 * function will make sure map->free will include it. 4476 */ 4477 void 4478 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 4479 vsize_t alloc_sz, int flags) 4480 { 4481 vsize_t sz; 4482 vaddr_t end; 4483 struct vm_map_entry *entry; 4484 4485 /* Kernel memory only. */ 4486 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 4487 /* Destroy free list. */ 4488 uvm_map_freelist_update_clear(map, dead); 4489 4490 /* 4491 * Include the guard page in the hard minimum requirement of alloc_sz. 4492 */ 4493 if (map->flags & VM_MAP_GUARDPAGES) 4494 alloc_sz += PAGE_SIZE; 4495 4496 /* 4497 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 4498 * 4499 * Don't handle the case where the multiplication overflows: 4500 * if that happens, the allocation is probably too big anyway. 4501 */ 4502 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 4503 4504 /* 4505 * Walk forward until a gap large enough for alloc_sz shows up. 4506 * 4507 * We assume the kernel map has no boundaries. 4508 * uvm_maxkaddr may be zero. 4509 */ 4510 end = MAX(uvm_maxkaddr, map->min_offset); 4511 entry = uvm_map_entrybyaddr(&map->addr, end); 4512 while (entry && entry->fspace < alloc_sz) 4513 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 4514 if (entry) { 4515 end = MAX(VMMAP_FREE_START(entry), end); 4516 end += MIN(sz, map->max_offset - end); 4517 } else 4518 end = map->max_offset; 4519 4520 /* Reserve pmap entries. */ 4521 #ifdef PMAP_GROWKERNEL 4522 uvm_maxkaddr = pmap_growkernel(end); 4523 #else 4524 uvm_maxkaddr = end; 4525 #endif 4526 4527 /* Rebuild free list. */ 4528 uvm_map_freelist_update_refill(map, flags); 4529 } 4530 4531 /* 4532 * Freelist update subfunction: unlink all entries from freelists. 4533 */ 4534 void 4535 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 4536 { 4537 struct uvm_addr_state *free; 4538 struct vm_map_entry *entry, *prev, *next; 4539 4540 prev = NULL; 4541 for (entry = RB_MIN(uvm_map_addr, &map->addr); entry != NULL; 4542 entry = next) { 4543 next = RB_NEXT(uvm_map_addr, &map->addr, entry); 4544 4545 free = uvm_map_uaddr_e(map, entry); 4546 uvm_mapent_free_remove(map, free, entry); 4547 4548 if (prev != NULL && entry->start == entry->end) { 4549 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 4550 uvm_mapent_addr_remove(map, entry); 4551 DEAD_ENTRY_PUSH(dead, entry); 4552 } else 4553 prev = entry; 4554 } 4555 } 4556 4557 /* 4558 * Freelist update subfunction: refill the freelists with entries. 4559 */ 4560 void 4561 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 4562 { 4563 struct vm_map_entry *entry; 4564 vaddr_t min, max; 4565 4566 RB_FOREACH(entry, uvm_map_addr, &map->addr) { 4567 min = VMMAP_FREE_START(entry); 4568 max = VMMAP_FREE_END(entry); 4569 entry->fspace = 0; 4570 4571 entry = uvm_map_fix_space(map, entry, min, max, flags); 4572 } 4573 4574 uvm_tree_sanity(map, __FILE__, __LINE__); 4575 } 4576 4577 /* 4578 * Change {a,b}_{start,end} allocation ranges and associated free lists. 4579 */ 4580 void 4581 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 4582 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 4583 { 4584 KDASSERT(b_end >= b_start && s_end >= s_start); 4585 4586 /* Clear all free lists. */ 4587 uvm_map_freelist_update_clear(map, dead); 4588 4589 /* Apply new bounds. */ 4590 map->b_start = b_start; 4591 map->b_end = b_end; 4592 map->s_start = s_start; 4593 map->s_end = s_end; 4594 4595 /* Refill free lists. */ 4596 uvm_map_freelist_update_refill(map, flags); 4597 } 4598 4599 /* 4600 * Assign a uvm_addr_state to the specified pointer in vm_map. 4601 * 4602 * May sleep. 4603 */ 4604 void 4605 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 4606 struct uvm_addr_state *newval) 4607 { 4608 struct uvm_map_deadq dead; 4609 4610 /* Pointer which must be in this map. */ 4611 KASSERT(which != NULL); 4612 KASSERT((void*)map <= (void*)(which) && 4613 (void*)(which) < (void*)(map + 1)); 4614 4615 vm_map_lock(map); 4616 TAILQ_INIT(&dead); 4617 uvm_map_freelist_update_clear(map, &dead); 4618 4619 uvm_addr_destroy(*which); 4620 *which = newval; 4621 4622 uvm_map_freelist_update_refill(map, 0); 4623 vm_map_unlock(map); 4624 uvm_unmap_detach(&dead, 0); 4625 } 4626 4627 /* 4628 * Correct space insert. 4629 * 4630 * Entry must not be on any freelist. 4631 */ 4632 struct vm_map_entry* 4633 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 4634 vaddr_t min, vaddr_t max, int flags) 4635 { 4636 struct uvm_addr_state *free, *entfree; 4637 vaddr_t lmax; 4638 4639 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 4640 KDASSERT(min <= max); 4641 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 4642 min == map->min_offset); 4643 4644 /* 4645 * During the function, entfree will always point at the uaddr state 4646 * for entry. 4647 */ 4648 entfree = (entry == NULL ? NULL : 4649 uvm_map_uaddr_e(map, entry)); 4650 4651 while (min != max) { 4652 /* 4653 * Claim guard page for entry. 4654 */ 4655 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 4656 VMMAP_FREE_END(entry) == entry->end && 4657 entry->start != entry->end) { 4658 if (max - min == 2 * PAGE_SIZE) { 4659 /* 4660 * If the free-space gap is exactly 2 pages, 4661 * we make the guard 2 pages instead of 1. 4662 * Because in a guarded map, an area needs 4663 * at least 2 pages to allocate from: 4664 * one page for the allocation and one for 4665 * the guard. 4666 */ 4667 entry->guard = 2 * PAGE_SIZE; 4668 min = max; 4669 } else { 4670 entry->guard = PAGE_SIZE; 4671 min += PAGE_SIZE; 4672 } 4673 continue; 4674 } 4675 4676 /* 4677 * Handle the case where entry has a 2-page guard, but the 4678 * space after entry is freed. 4679 */ 4680 if (entry != NULL && entry->fspace == 0 && 4681 entry->guard > PAGE_SIZE) { 4682 entry->guard = PAGE_SIZE; 4683 min = VMMAP_FREE_START(entry); 4684 } 4685 4686 lmax = uvm_map_boundary(map, min, max); 4687 free = uvm_map_uaddr(map, min); 4688 4689 /* 4690 * Entries are merged if they point at the same uvm_free(). 4691 * Exception to that rule: if min == uvm_maxkaddr, a new 4692 * entry is started regardless (otherwise the allocators 4693 * will get confused). 4694 */ 4695 if (entry != NULL && free == entfree && 4696 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 4697 min == uvm_maxkaddr)) { 4698 KDASSERT(VMMAP_FREE_END(entry) == min); 4699 entry->fspace += lmax - min; 4700 } else { 4701 /* 4702 * Commit entry to free list: it'll not be added to 4703 * anymore. 4704 * We'll start a new entry and add to that entry 4705 * instead. 4706 */ 4707 if (entry != NULL) 4708 uvm_mapent_free_insert(map, entfree, entry); 4709 4710 /* New entry for new uaddr. */ 4711 entry = uvm_mapent_alloc(map, flags); 4712 KDASSERT(entry != NULL); 4713 entry->end = entry->start = min; 4714 entry->guard = 0; 4715 entry->fspace = lmax - min; 4716 entry->object.uvm_obj = NULL; 4717 entry->offset = 0; 4718 entry->etype = 0; 4719 entry->protection = entry->max_protection = 0; 4720 entry->inheritance = 0; 4721 entry->wired_count = 0; 4722 entry->advice = 0; 4723 entry->aref.ar_pageoff = 0; 4724 entry->aref.ar_amap = NULL; 4725 uvm_mapent_addr_insert(map, entry); 4726 4727 entfree = free; 4728 } 4729 4730 min = lmax; 4731 } 4732 /* Finally put entry on the uaddr state. */ 4733 if (entry != NULL) 4734 uvm_mapent_free_insert(map, entfree, entry); 4735 4736 return entry; 4737 } 4738 4739 /* 4740 * MQuery style of allocation. 4741 * 4742 * This allocator searches forward until sufficient space is found to map 4743 * the given size. 4744 * 4745 * XXX: factor in offset (via pmap_prefer) and protection? 4746 */ 4747 int 4748 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 4749 int flags) 4750 { 4751 struct vm_map_entry *entry, *last; 4752 vaddr_t addr; 4753 vaddr_t tmp, pmap_align, pmap_offset; 4754 int error; 4755 4756 addr = *addr_p; 4757 vm_map_lock_read(map); 4758 4759 /* 4760 * Configure pmap prefer. 4761 */ 4762 if (offset != UVM_UNKNOWN_OFFSET) { 4763 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 4764 pmap_offset = PMAP_PREFER_OFFSET(offset); 4765 } else { 4766 pmap_align = PAGE_SIZE; 4767 pmap_offset = 0; 4768 } 4769 4770 /* 4771 * Align address to pmap_prefer unless FLAG_FIXED is set. 4772 */ 4773 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 4774 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 4775 if (tmp < addr) 4776 tmp += pmap_align; 4777 addr = tmp; 4778 } 4779 4780 /* 4781 * First, check if the requested range is fully available. 4782 */ 4783 entry = uvm_map_entrybyaddr(&map->addr, addr); 4784 last = NULL; 4785 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 4786 error = 0; 4787 goto out; 4788 } 4789 if (flags & UVM_FLAG_FIXED) { 4790 error = EINVAL; 4791 goto out; 4792 } 4793 4794 error = ENOMEM; /* Default error from here. */ 4795 4796 /* 4797 * At this point, the memory at <addr, sz> is not available. 4798 * The reasons are: 4799 * [1] it's outside the map, 4800 * [2] it starts in used memory (and therefore needs to move 4801 * toward the first free page in entry), 4802 * [3] it starts in free memory but bumps into used memory. 4803 * 4804 * Note that for case [2], the forward moving is handled by the 4805 * for loop below. 4806 */ 4807 4808 if (entry == NULL) { 4809 /* [1] Outside the map. */ 4810 if (addr >= map->max_offset) 4811 goto out; 4812 else 4813 entry = RB_MIN(uvm_map_addr, &map->addr); 4814 } else if (VMMAP_FREE_START(entry) <= addr) { 4815 /* [3] Bumped into used memory. */ 4816 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 4817 } 4818 4819 /* 4820 * Test if the next entry is sufficient for the allocation. 4821 */ 4822 for (; entry != NULL; 4823 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 4824 if (entry->fspace == 0) 4825 continue; 4826 addr = VMMAP_FREE_START(entry); 4827 4828 restart: /* Restart address checks on address change. */ 4829 4830 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 4831 if (tmp < addr) 4832 tmp += pmap_align; 4833 addr = tmp; 4834 if (addr >= VMMAP_FREE_END(entry)) 4835 continue; 4836 4837 /* 4838 * Skip brk() allocation addresses. 4839 */ 4840 if (addr + sz > map->b_start && addr < map->b_end) { 4841 if (VMMAP_FREE_END(entry) > map->b_end) { 4842 addr = map->b_end; 4843 goto restart; 4844 } else 4845 continue; 4846 } 4847 /* 4848 * Skip stack allocation addresses. 4849 */ 4850 if (addr + sz > map->s_start && addr < map->s_end) { 4851 if (VMMAP_FREE_END(entry) > map->s_end) { 4852 addr = map->s_end; 4853 goto restart; 4854 } else 4855 continue; 4856 } 4857 4858 last = NULL; 4859 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 4860 error = 0; 4861 goto out; 4862 } 4863 } 4864 4865 out: 4866 vm_map_unlock_read(map); 4867 if (error == 0) 4868 *addr_p = addr; 4869 return error; 4870 } 4871 4872 /* 4873 * Determine allocation bias. 4874 * 4875 * Returns 1 if we should bias to high addresses, -1 for a bias towards low 4876 * addresses, or 0 for no bias. 4877 * The bias mechanism is intended to avoid clashing with brk() and stack 4878 * areas. 4879 */ 4880 int 4881 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry) 4882 { 4883 vaddr_t start, end; 4884 4885 start = VMMAP_FREE_START(entry); 4886 end = VMMAP_FREE_END(entry); 4887 4888 /* 4889 * Stay at the top of brk() area. 4890 */ 4891 if (end >= map->b_start && start < map->b_end) 4892 return 1; 4893 /* 4894 * Stay at the far end of the stack area. 4895 */ 4896 if (end >= map->s_start && start < map->s_end) { 4897 #ifdef MACHINE_STACK_GROWS_UP 4898 return 1; 4899 #else 4900 return -1; 4901 #endif 4902 } 4903 4904 /* 4905 * No bias, this area is meant for us. 4906 */ 4907 return 0; 4908 } 4909 4910 4911 boolean_t 4912 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 4913 { 4914 boolean_t rv; 4915 4916 if (map->flags & VM_MAP_INTRSAFE) { 4917 rv = TRUE; 4918 } else { 4919 if (map->flags & VM_MAP_BUSY) { 4920 return (FALSE); 4921 } 4922 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); 4923 } 4924 4925 if (rv) { 4926 map->timestamp++; 4927 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 4928 uvm_tree_sanity(map, file, line); 4929 uvm_tree_size_chk(map, file, line); 4930 } 4931 4932 return (rv); 4933 } 4934 4935 void 4936 vm_map_lock_ln(struct vm_map *map, char *file, int line) 4937 { 4938 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 4939 do { 4940 while (map->flags & VM_MAP_BUSY) { 4941 map->flags |= VM_MAP_WANTLOCK; 4942 tsleep(&map->flags, PVM, (char *)vmmapbsy, 0); 4943 } 4944 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); 4945 } 4946 4947 map->timestamp++; 4948 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 4949 uvm_tree_sanity(map, file, line); 4950 uvm_tree_size_chk(map, file, line); 4951 } 4952 4953 void 4954 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 4955 { 4956 if ((map->flags & VM_MAP_INTRSAFE) == 0) 4957 rw_enter_read(&map->lock); 4958 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 4959 uvm_tree_sanity(map, file, line); 4960 uvm_tree_size_chk(map, file, line); 4961 } 4962 4963 void 4964 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 4965 { 4966 uvm_tree_sanity(map, file, line); 4967 uvm_tree_size_chk(map, file, line); 4968 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 4969 if ((map->flags & VM_MAP_INTRSAFE) == 0) 4970 rw_exit(&map->lock); 4971 } 4972 4973 void 4974 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 4975 { 4976 /* XXX: RO */ uvm_tree_sanity(map, file, line); 4977 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 4978 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 4979 if ((map->flags & VM_MAP_INTRSAFE) == 0) 4980 rw_exit_read(&map->lock); 4981 } 4982 4983 void 4984 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 4985 { 4986 uvm_tree_sanity(map, file, line); 4987 uvm_tree_size_chk(map, file, line); 4988 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 4989 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 4990 if ((map->flags & VM_MAP_INTRSAFE) == 0) 4991 rw_enter(&map->lock, RW_DOWNGRADE); 4992 } 4993 4994 void 4995 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 4996 { 4997 /* XXX: RO */ uvm_tree_sanity(map, file, line); 4998 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 4999 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5000 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5001 rw_exit_read(&map->lock); 5002 rw_enter_write(&map->lock); 5003 } 5004 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5005 uvm_tree_sanity(map, file, line); 5006 } 5007 5008 void 5009 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5010 { 5011 map->flags |= VM_MAP_BUSY; 5012 } 5013 5014 void 5015 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5016 { 5017 int oflags; 5018 5019 oflags = map->flags; 5020 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5021 if (oflags & VM_MAP_WANTLOCK) 5022 wakeup(&map->flags); 5023 } 5024 5025 5026 #undef RB_AUGMENT 5027 #define RB_AUGMENT(x) uvm_map_addr_augment((x)) 5028 RB_GENERATE(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5029 uvm_mapentry_addrcmp); 5030 #undef RB_AUGMENT 5031 5032 5033 /* 5034 * MD code: vmspace allocator setup. 5035 */ 5036 5037 5038 #ifdef __i386__ 5039 void 5040 uvm_map_setup_md(struct vm_map *map) 5041 { 5042 vaddr_t min, max; 5043 5044 min = map->min_offset; 5045 max = map->max_offset; 5046 5047 /* 5048 * Ensure the selectors will not try to manage page 0; 5049 * it's too special. 5050 */ 5051 if (min < VMMAP_MIN_ADDR) 5052 min = VMMAP_MIN_ADDR; 5053 5054 #if 0 /* Cool stuff, not yet */ 5055 /* Hinted allocations. */ 5056 map->uaddr_any[1] = uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), max, 5057 1024 * 1024 * 1024); 5058 5059 /* Executable code is special. */ 5060 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5061 /* Place normal allocations beyond executable mappings. */ 5062 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5063 #else /* Crappy stuff, for now */ 5064 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5065 #endif 5066 5067 #ifndef SMALL_KERNEL 5068 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5069 #endif /* !SMALL_KERNEL */ 5070 } 5071 #elif __LP64__ 5072 void 5073 uvm_map_setup_md(struct vm_map *map) 5074 { 5075 vaddr_t min, max; 5076 5077 min = map->min_offset; 5078 max = map->max_offset; 5079 5080 /* 5081 * Ensure the selectors will not try to manage page 0; 5082 * it's too special. 5083 */ 5084 if (min < VMMAP_MIN_ADDR) 5085 min = VMMAP_MIN_ADDR; 5086 5087 #if 0 /* Cool stuff, not yet */ 5088 /* Hinted allocations above 4GB */ 5089 map->uaddr_any[0] = 5090 uaddr_hint_create(0x100000000ULL, max, 1024 * 1024 * 1024); 5091 /* Hinted allocations below 4GB */ 5092 map->uaddr_any[1] = 5093 uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), 0x100000000ULL, 5094 1024 * 1024 * 1024); 5095 /* Normal allocations, always above 4GB */ 5096 map->uaddr_any[3] = 5097 uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5098 #else /* Crappy stuff, for now */ 5099 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5100 #endif 5101 5102 #ifndef SMALL_KERNEL 5103 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5104 #endif /* !SMALL_KERNEL */ 5105 } 5106 #else /* non-i386, 32 bit */ 5107 void 5108 uvm_map_setup_md(struct vm_map *map) 5109 { 5110 vaddr_t min, max; 5111 5112 min = map->min_offset; 5113 max = map->max_offset; 5114 5115 /* 5116 * Ensure the selectors will not try to manage page 0; 5117 * it's too special. 5118 */ 5119 if (min < VMMAP_MIN_ADDR) 5120 min = VMMAP_MIN_ADDR; 5121 5122 #if 0 /* Cool stuff, not yet */ 5123 /* Hinted allocations. */ 5124 map->uaddr_any[1] = uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), max, 5125 1024 * 1024 * 1024); 5126 /* Normal allocations. */ 5127 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5128 #else /* Crappy stuff, for now */ 5129 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5130 #endif 5131 5132 #ifndef SMALL_KERNEL 5133 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5134 #endif /* !SMALL_KERNEL */ 5135 } 5136 #endif 5137