1 /* $OpenBSD: uvm_map.c,v 1.145 2011/07/05 03:10:29 dhill Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * Copyright (c) 1991, 1993, The Regents of the University of California. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by Charles D. Cranor, 24 * Washington University, the University of California, Berkeley and 25 * its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 43 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 44 * 45 * 46 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 47 * All rights reserved. 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 */ 69 70 /* 71 * uvm_map.c: uvm map operations 72 */ 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/mman.h> 77 #include <sys/proc.h> 78 #include <sys/malloc.h> 79 #include <sys/pool.h> 80 #include <sys/kernel.h> 81 82 #include <dev/rndvar.h> 83 84 #ifdef SYSVSHM 85 #include <sys/shm.h> 86 #endif 87 88 #include <uvm/uvm.h> 89 #undef RB_AUGMENT 90 #define RB_AUGMENT(x) uvm_rb_augment(x) 91 92 #ifdef DDB 93 #include <uvm/uvm_ddb.h> 94 #endif 95 96 static struct timeval uvm_kmapent_last_warn_time; 97 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 98 99 const char vmmapbsy[] = "vmmapbsy"; 100 101 /* 102 * pool for vmspace structures. 103 */ 104 105 struct pool uvm_vmspace_pool; 106 107 /* 108 * pool for dynamically-allocated map entries. 109 */ 110 111 struct pool uvm_map_entry_pool; 112 struct pool uvm_map_entry_kmem_pool; 113 114 #ifdef PMAP_GROWKERNEL 115 /* 116 * This global represents the end of the kernel virtual address 117 * space. If we want to exceed this, we must grow the kernel 118 * virtual address space dynamically. 119 * 120 * Note, this variable is locked by kernel_map's lock. 121 */ 122 vaddr_t uvm_maxkaddr; 123 #endif 124 125 /* 126 * macros 127 */ 128 129 /* 130 * uvm_map_entry_link: insert entry into a map 131 * 132 * => map must be locked 133 */ 134 #define uvm_map_entry_link(map, after_where, entry) do { \ 135 (map)->nentries++; \ 136 (entry)->prev = (after_where); \ 137 (entry)->next = (after_where)->next; \ 138 (entry)->prev->next = (entry); \ 139 (entry)->next->prev = (entry); \ 140 uvm_rb_insert(map, entry); \ 141 } while (0) 142 143 /* 144 * uvm_map_entry_unlink: remove entry from a map 145 * 146 * => map must be locked 147 */ 148 #define uvm_map_entry_unlink(map, entry) do { \ 149 (map)->nentries--; \ 150 (entry)->next->prev = (entry)->prev; \ 151 (entry)->prev->next = (entry)->next; \ 152 uvm_rb_remove(map, entry); \ 153 } while (0) 154 155 /* 156 * SAVE_HINT: saves the specified entry as the hint for future lookups. 157 * 158 * => map need not be locked (protected by hint_lock). 159 */ 160 #define SAVE_HINT(map,check,value) do { \ 161 simple_lock(&(map)->hint_lock); \ 162 if ((map)->hint == (check)) \ 163 (map)->hint = (value); \ 164 simple_unlock(&(map)->hint_lock); \ 165 } while (0) 166 167 /* 168 * VM_MAP_RANGE_CHECK: check and correct range 169 * 170 * => map must at least be read locked 171 */ 172 173 #define VM_MAP_RANGE_CHECK(map, start, end) do { \ 174 if (start < vm_map_min(map)) \ 175 start = vm_map_min(map); \ 176 if (end > vm_map_max(map)) \ 177 end = vm_map_max(map); \ 178 if (start > end) \ 179 start = end; \ 180 } while (0) 181 182 /* 183 * local prototypes 184 */ 185 186 void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); 187 void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); 188 void uvm_map_reference_amap(struct vm_map_entry *, int); 189 void uvm_map_unreference_amap(struct vm_map_entry *, int); 190 int uvm_map_spacefits(struct vm_map *, vaddr_t *, vsize_t, 191 struct vm_map_entry *, voff_t, vsize_t); 192 193 struct vm_map_entry *uvm_mapent_alloc(struct vm_map *, int); 194 void uvm_mapent_free(struct vm_map_entry *); 195 196 #ifdef KVA_GUARDPAGES 197 /* 198 * Number of kva guardpages in use. 199 */ 200 int kva_guardpages; 201 #endif 202 203 204 /* 205 * Tree manipulation. 206 */ 207 void uvm_rb_insert(struct vm_map *, struct vm_map_entry *); 208 void uvm_rb_remove(struct vm_map *, struct vm_map_entry *); 209 vsize_t uvm_rb_space(struct vm_map *, struct vm_map_entry *); 210 211 #ifdef DEBUG 212 int _uvm_tree_sanity(struct vm_map *map, const char *name); 213 #endif 214 vsize_t uvm_rb_subtree_space(struct vm_map_entry *); 215 void uvm_rb_fixup(struct vm_map *, struct vm_map_entry *); 216 217 static __inline int 218 uvm_compare(struct vm_map_entry *a, struct vm_map_entry *b) 219 { 220 if (a->start < b->start) 221 return (-1); 222 else if (a->start > b->start) 223 return (1); 224 225 return (0); 226 } 227 228 229 static __inline void 230 uvm_rb_augment(struct vm_map_entry *entry) 231 { 232 entry->space = uvm_rb_subtree_space(entry); 233 } 234 235 RB_PROTOTYPE(uvm_tree, vm_map_entry, rb_entry, uvm_compare); 236 237 RB_GENERATE(uvm_tree, vm_map_entry, rb_entry, uvm_compare); 238 239 vsize_t 240 uvm_rb_space(struct vm_map *map, struct vm_map_entry *entry) 241 { 242 struct vm_map_entry *next; 243 vaddr_t space; 244 245 if ((next = entry->next) == &map->header) 246 space = map->max_offset - entry->end; 247 else { 248 KASSERT(next); 249 space = next->start - entry->end; 250 } 251 return (space); 252 } 253 254 vsize_t 255 uvm_rb_subtree_space(struct vm_map_entry *entry) 256 { 257 vaddr_t space, tmp; 258 259 space = entry->ownspace; 260 if (RB_LEFT(entry, rb_entry)) { 261 tmp = RB_LEFT(entry, rb_entry)->space; 262 if (tmp > space) 263 space = tmp; 264 } 265 266 if (RB_RIGHT(entry, rb_entry)) { 267 tmp = RB_RIGHT(entry, rb_entry)->space; 268 if (tmp > space) 269 space = tmp; 270 } 271 272 return (space); 273 } 274 275 void 276 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) 277 { 278 /* We need to traverse to the very top */ 279 do { 280 entry->ownspace = uvm_rb_space(map, entry); 281 entry->space = uvm_rb_subtree_space(entry); 282 } while ((entry = RB_PARENT(entry, rb_entry)) != NULL); 283 } 284 285 void 286 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) 287 { 288 vaddr_t space = uvm_rb_space(map, entry); 289 struct vm_map_entry *tmp; 290 291 entry->ownspace = entry->space = space; 292 tmp = RB_INSERT(uvm_tree, &(map)->rbhead, entry); 293 #ifdef DIAGNOSTIC 294 if (tmp != NULL) 295 panic("uvm_rb_insert: duplicate entry?"); 296 #endif 297 uvm_rb_fixup(map, entry); 298 if (entry->prev != &map->header) 299 uvm_rb_fixup(map, entry->prev); 300 } 301 302 void 303 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) 304 { 305 struct vm_map_entry *parent; 306 307 parent = RB_PARENT(entry, rb_entry); 308 RB_REMOVE(uvm_tree, &(map)->rbhead, entry); 309 if (entry->prev != &map->header) 310 uvm_rb_fixup(map, entry->prev); 311 if (parent) 312 uvm_rb_fixup(map, parent); 313 } 314 315 #ifdef DEBUG 316 #define uvm_tree_sanity(x,y) _uvm_tree_sanity(x,y) 317 #else 318 #define uvm_tree_sanity(x,y) 319 #endif 320 321 #ifdef DEBUG 322 int 323 _uvm_tree_sanity(struct vm_map *map, const char *name) 324 { 325 struct vm_map_entry *tmp, *trtmp; 326 int n = 0, i = 1; 327 328 RB_FOREACH(tmp, uvm_tree, &map->rbhead) { 329 if (tmp->ownspace != uvm_rb_space(map, tmp)) { 330 printf("%s: %d/%d ownspace %x != %x %s\n", 331 name, n + 1, map->nentries, 332 tmp->ownspace, uvm_rb_space(map, tmp), 333 tmp->next == &map->header ? "(last)" : ""); 334 goto error; 335 } 336 } 337 trtmp = NULL; 338 RB_FOREACH(tmp, uvm_tree, &map->rbhead) { 339 if (tmp->space != uvm_rb_subtree_space(tmp)) { 340 printf("%s: space %d != %d\n", 341 name, tmp->space, uvm_rb_subtree_space(tmp)); 342 goto error; 343 } 344 if (trtmp != NULL && trtmp->start >= tmp->start) { 345 printf("%s: corrupt: 0x%lx >= 0x%lx\n", 346 name, trtmp->start, tmp->start); 347 goto error; 348 } 349 n++; 350 351 trtmp = tmp; 352 } 353 354 if (n != map->nentries) { 355 printf("%s: nentries: %d vs %d\n", 356 name, n, map->nentries); 357 goto error; 358 } 359 360 for (tmp = map->header.next; tmp && tmp != &map->header; 361 tmp = tmp->next, i++) { 362 trtmp = RB_FIND(uvm_tree, &map->rbhead, tmp); 363 if (trtmp != tmp) { 364 printf("%s: lookup: %d: %p - %p: %p\n", 365 name, i, tmp, trtmp, 366 RB_PARENT(tmp, rb_entry)); 367 goto error; 368 } 369 } 370 371 return (0); 372 error: 373 #ifdef DDB 374 /* handy breakpoint location for error case */ 375 __asm(".globl treesanity_label\ntreesanity_label:"); 376 #endif 377 return (-1); 378 } 379 #endif 380 381 /* 382 * uvm_mapent_alloc: allocate a map entry 383 */ 384 385 struct vm_map_entry * 386 uvm_mapent_alloc(struct vm_map *map, int flags) 387 { 388 struct vm_map_entry *me, *ne; 389 int s, i; 390 int pool_flags; 391 392 pool_flags = PR_WAITOK; 393 if (flags & UVM_FLAG_TRYLOCK) 394 pool_flags = PR_NOWAIT; 395 396 if (map->flags & VM_MAP_INTRSAFE || cold) { 397 s = splvm(); 398 simple_lock(&uvm.kentry_lock); 399 me = uvm.kentry_free; 400 if (me == NULL) { 401 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 402 &kd_nowait); 403 if (ne == NULL) 404 panic("uvm_mapent_alloc: cannot allocate map " 405 "entry"); 406 for (i = 0; 407 i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1; 408 i++) 409 ne[i].next = &ne[i + 1]; 410 ne[i].next = NULL; 411 me = ne; 412 if (ratecheck(&uvm_kmapent_last_warn_time, 413 &uvm_kmapent_warn_rate)) 414 printf("uvm_mapent_alloc: out of static " 415 "map entries\n"); 416 } 417 uvm.kentry_free = me->next; 418 uvmexp.kmapent++; 419 simple_unlock(&uvm.kentry_lock); 420 splx(s); 421 me->flags = UVM_MAP_STATIC; 422 } else if (map == kernel_map) { 423 splassert(IPL_NONE); 424 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 425 if (me == NULL) 426 goto out; 427 me->flags = UVM_MAP_KMEM; 428 } else { 429 splassert(IPL_NONE); 430 me = pool_get(&uvm_map_entry_pool, pool_flags); 431 if (me == NULL) 432 goto out; 433 me->flags = 0; 434 } 435 436 out: 437 return(me); 438 } 439 440 /* 441 * uvm_mapent_free: free map entry 442 * 443 * => XXX: static pool for kernel map? 444 */ 445 446 void 447 uvm_mapent_free(struct vm_map_entry *me) 448 { 449 int s; 450 451 if (me->flags & UVM_MAP_STATIC) { 452 s = splvm(); 453 simple_lock(&uvm.kentry_lock); 454 me->next = uvm.kentry_free; 455 uvm.kentry_free = me; 456 uvmexp.kmapent--; 457 simple_unlock(&uvm.kentry_lock); 458 splx(s); 459 } else if (me->flags & UVM_MAP_KMEM) { 460 splassert(IPL_NONE); 461 pool_put(&uvm_map_entry_kmem_pool, me); 462 } else { 463 splassert(IPL_NONE); 464 pool_put(&uvm_map_entry_pool, me); 465 } 466 } 467 468 /* 469 * uvm_mapent_copy: copy a map entry, preserving flags 470 */ 471 472 void 473 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 474 { 475 memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - 476 ((char *)src)); 477 } 478 479 /* 480 * uvm_map_entry_unwire: unwire a map entry 481 * 482 * => map should be locked by caller 483 */ 484 void 485 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) 486 { 487 488 entry->wired_count = 0; 489 uvm_fault_unwire_locked(map, entry->start, entry->end); 490 } 491 492 493 /* 494 * wrapper for calling amap_ref() 495 */ 496 void 497 uvm_map_reference_amap(struct vm_map_entry *entry, int flags) 498 { 499 amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, 500 (entry->end - entry->start) >> PAGE_SHIFT, flags); 501 } 502 503 504 /* 505 * wrapper for calling amap_unref() 506 */ 507 void 508 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) 509 { 510 amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, 511 (entry->end - entry->start) >> PAGE_SHIFT, flags); 512 } 513 514 515 /* 516 * uvm_map_init: init mapping system at boot time. note that we allocate 517 * and init the static pool of structs vm_map_entry for the kernel here. 518 */ 519 520 void 521 uvm_map_init(void) 522 { 523 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 524 int lcv; 525 526 /* 527 * set up static pool of kernel map entries ... 528 */ 529 530 simple_lock_init(&uvm.kentry_lock); 531 uvm.kentry_free = NULL; 532 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 533 kernel_map_entry[lcv].next = uvm.kentry_free; 534 uvm.kentry_free = &kernel_map_entry[lcv]; 535 } 536 537 /* 538 * initialize the map-related pools. 539 */ 540 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 541 0, 0, 0, "vmsppl", &pool_allocator_nointr); 542 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 543 0, 0, 0, "vmmpepl", &pool_allocator_nointr); 544 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 545 0, 0, 0, "vmmpekpl", NULL); 546 pool_sethiwat(&uvm_map_entry_pool, 8192); 547 } 548 549 /* 550 * clippers 551 */ 552 553 /* 554 * uvm_map_clip_start: ensure that the entry begins at or after 555 * the starting address, if it doesn't we split the entry. 556 * 557 * => caller should use UVM_MAP_CLIP_START macro rather than calling 558 * this directly 559 * => map must be locked by caller 560 */ 561 562 void 563 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, 564 vaddr_t start) 565 { 566 struct vm_map_entry *new_entry; 567 vaddr_t new_adj; 568 569 /* uvm_map_simplify_entry(map, entry); */ /* XXX */ 570 571 uvm_tree_sanity(map, "clip_start entry"); 572 573 /* 574 * Split off the front portion. note that we must insert the new 575 * entry BEFORE this one, so that this entry has the specified 576 * starting address. 577 */ 578 579 new_entry = uvm_mapent_alloc(map, 0); 580 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 581 582 new_entry->end = start; 583 new_adj = start - new_entry->start; 584 if (entry->object.uvm_obj) 585 entry->offset += new_adj; /* shift start over */ 586 587 /* Does not change order for the RB tree */ 588 entry->start = start; 589 590 if (new_entry->aref.ar_amap) { 591 amap_splitref(&new_entry->aref, &entry->aref, new_adj); 592 } 593 594 uvm_map_entry_link(map, entry->prev, new_entry); 595 596 if (UVM_ET_ISSUBMAP(entry)) { 597 /* ... unlikely to happen, but play it safe */ 598 uvm_map_reference(new_entry->object.sub_map); 599 } else { 600 if (UVM_ET_ISOBJ(entry) && 601 entry->object.uvm_obj->pgops && 602 entry->object.uvm_obj->pgops->pgo_reference) 603 entry->object.uvm_obj->pgops->pgo_reference( 604 entry->object.uvm_obj); 605 } 606 607 uvm_tree_sanity(map, "clip_start leave"); 608 } 609 610 /* 611 * uvm_map_clip_end: ensure that the entry ends at or before 612 * the ending address, if it doesn't we split the reference 613 * 614 * => caller should use UVM_MAP_CLIP_END macro rather than calling 615 * this directly 616 * => map must be locked by caller 617 */ 618 619 void 620 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end) 621 { 622 struct vm_map_entry *new_entry; 623 vaddr_t new_adj; /* #bytes we move start forward */ 624 625 uvm_tree_sanity(map, "clip_end entry"); 626 /* 627 * Create a new entry and insert it 628 * AFTER the specified entry 629 */ 630 631 new_entry = uvm_mapent_alloc(map, 0); 632 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 633 634 new_entry->start = entry->end = end; 635 new_adj = end - entry->start; 636 if (new_entry->object.uvm_obj) 637 new_entry->offset += new_adj; 638 639 if (entry->aref.ar_amap) 640 amap_splitref(&entry->aref, &new_entry->aref, new_adj); 641 642 uvm_rb_fixup(map, entry); 643 644 uvm_map_entry_link(map, entry, new_entry); 645 646 if (UVM_ET_ISSUBMAP(entry)) { 647 /* ... unlikely to happen, but play it safe */ 648 uvm_map_reference(new_entry->object.sub_map); 649 } else { 650 if (UVM_ET_ISOBJ(entry) && 651 entry->object.uvm_obj->pgops && 652 entry->object.uvm_obj->pgops->pgo_reference) 653 entry->object.uvm_obj->pgops->pgo_reference( 654 entry->object.uvm_obj); 655 } 656 uvm_tree_sanity(map, "clip_end leave"); 657 } 658 659 660 /* 661 * M A P - m a i n e n t r y p o i n t 662 */ 663 /* 664 * uvm_map: establish a valid mapping in a map 665 * 666 * => assume startp is page aligned. 667 * => assume size is a multiple of PAGE_SIZE. 668 * => assume sys_mmap provides enough of a "hint" to have us skip 669 * over text/data/bss area. 670 * => map must be unlocked (we will lock it) 671 * => <uobj,uoffset> value meanings (4 cases): 672 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 673 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 674 * [3] <uobj,uoffset> == normal mapping 675 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 676 * 677 * case [4] is for kernel mappings where we don't know the offset until 678 * we've found a virtual address. note that kernel object offsets are 679 * always relative to vm_map_min(kernel_map). 680 * 681 * => if `align' is non-zero, we try to align the virtual address to 682 * the specified alignment. this is only a hint; if we can't 683 * do it, the address will be unaligned. this is provided as 684 * a mechanism for large pages. 685 * 686 * => XXXCDC: need way to map in external amap? 687 */ 688 689 int 690 uvm_map_p(struct vm_map *map, vaddr_t *startp, vsize_t size, 691 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, 692 struct proc *p) 693 { 694 struct vm_map_entry *prev_entry, *new_entry; 695 #ifdef KVA_GUARDPAGES 696 struct vm_map_entry *guard_entry; 697 #endif 698 vm_prot_t prot = UVM_PROTECTION(flags), maxprot = 699 UVM_MAXPROTECTION(flags); 700 vm_inherit_t inherit = UVM_INHERIT(flags); 701 int advice = UVM_ADVICE(flags); 702 int error; 703 704 /* 705 * Holes are incompatible with other types of mappings. 706 */ 707 if (flags & UVM_FLAG_HOLE) { 708 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) != 0 && 709 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 710 } 711 712 #ifdef KVA_GUARDPAGES 713 if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) { 714 /* 715 * kva_guardstart is initialized to the start of the kernelmap 716 * and cycles through the kva space. 717 * This way we should have a long time between re-use of kva. 718 */ 719 static vaddr_t kva_guardstart = 0; 720 if (kva_guardstart == 0) { 721 kva_guardstart = vm_map_min(map); 722 printf("uvm_map: kva guard pages enabled: %p\n", 723 kva_guardstart); 724 } 725 size += PAGE_SIZE; /* Add guard page at the end. */ 726 /* 727 * Try to fully exhaust kva prior to wrap-around. 728 * (This may eat your ram!) 729 */ 730 if (VM_MAX_KERNEL_ADDRESS - kva_guardstart < size) { 731 static int wrap_counter = 0; 732 printf("uvm_map: kva guard page wrap-around %d\n", 733 ++wrap_counter); 734 kva_guardstart = vm_map_min(map); 735 } 736 *startp = kva_guardstart; 737 /* 738 * Prepare for next round. 739 */ 740 kva_guardstart += size; 741 } 742 #endif 743 744 uvm_tree_sanity(map, "map entry"); 745 746 if ((map->flags & VM_MAP_INTRSAFE) == 0) 747 splassert(IPL_NONE); 748 else 749 splassert(IPL_VM); 750 751 /* 752 * step 0: sanity check of protection code 753 */ 754 755 if ((prot & maxprot) != prot) { 756 return (EACCES); 757 } 758 759 /* 760 * step 1: figure out where to put new VM range 761 */ 762 763 if (vm_map_lock_try(map) == FALSE) { 764 if (flags & UVM_FLAG_TRYLOCK) 765 return (EFAULT); 766 vm_map_lock(map); /* could sleep here */ 767 } 768 if ((prev_entry = uvm_map_findspace(map, *startp, size, startp, 769 uobj, uoffset, align, flags)) == NULL) { 770 vm_map_unlock(map); 771 return (ENOMEM); 772 } 773 774 #ifdef PMAP_GROWKERNEL 775 { 776 /* 777 * If the kernel pmap can't map the requested space, 778 * then allocate more resources for it. 779 */ 780 if (map == kernel_map && !(flags & UVM_FLAG_FIXED) && 781 uvm_maxkaddr < (*startp + size)) 782 uvm_maxkaddr = pmap_growkernel(*startp + size); 783 } 784 #endif 785 786 /* 787 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER 788 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in 789 * either case we want to zero it before storing it in the map entry 790 * (because it looks strange and confusing when debugging...) 791 * 792 * if uobj is not null 793 * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping 794 * and we do not need to change uoffset. 795 * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset 796 * now (based on the starting address of the map). this case is 797 * for kernel object mappings where we don't know the offset until 798 * the virtual address is found (with uvm_map_findspace). the 799 * offset is the distance we are from the start of the map. 800 */ 801 802 if (uobj == NULL) { 803 uoffset = 0; 804 } else { 805 if (uoffset == UVM_UNKNOWN_OFFSET) { 806 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 807 uoffset = *startp - vm_map_min(kernel_map); 808 } 809 } 810 811 /* 812 * step 2: try and insert in map by extending previous entry, if 813 * possible 814 * XXX: we don't try and pull back the next entry. might be useful 815 * for a stack, but we are currently allocating our stack in advance. 816 */ 817 818 if ((flags & UVM_FLAG_NOMERGE) == 0 && 819 prev_entry->end == *startp && prev_entry != &map->header && 820 prev_entry->object.uvm_obj == uobj) { 821 822 if (uobj && prev_entry->offset + 823 (prev_entry->end - prev_entry->start) != uoffset) 824 goto step3; 825 826 if (UVM_ET_ISSUBMAP(prev_entry)) 827 goto step3; 828 829 if (prev_entry->protection != prot || 830 prev_entry->max_protection != maxprot) 831 goto step3; 832 833 if (prev_entry->inheritance != inherit || 834 prev_entry->advice != advice) 835 goto step3; 836 837 /* wiring status must match (new area is unwired) */ 838 if (VM_MAPENT_ISWIRED(prev_entry)) 839 goto step3; 840 841 /* 842 * can't extend a shared amap. note: no need to lock amap to 843 * look at refs since we don't care about its exact value. 844 * if it is one (i.e. we have only reference) it will stay there 845 */ 846 847 if (prev_entry->aref.ar_amap && 848 amap_refs(prev_entry->aref.ar_amap) != 1) { 849 goto step3; 850 } 851 852 /* 853 * Only merge kernel mappings, but keep track 854 * of how much we skipped. 855 */ 856 if (map != kernel_map && map != kmem_map) { 857 goto step3; 858 } 859 860 if (prev_entry->aref.ar_amap) { 861 error = amap_extend(prev_entry, size); 862 if (error) 863 goto step3; 864 } 865 866 /* 867 * drop our reference to uobj since we are extending a reference 868 * that we already have (the ref count can not drop to zero). 869 */ 870 871 if (uobj && uobj->pgops->pgo_detach) 872 uobj->pgops->pgo_detach(uobj); 873 874 prev_entry->end += size; 875 uvm_rb_fixup(map, prev_entry); 876 map->size += size; 877 if (p && uobj == NULL) 878 p->p_vmspace->vm_dused += atop(size); 879 880 uvm_tree_sanity(map, "map leave 2"); 881 882 vm_map_unlock(map); 883 return (0); 884 885 } 886 step3: 887 888 /* 889 * step 3: allocate new entry and link it in 890 */ 891 892 #ifdef KVA_GUARDPAGES 893 if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) 894 size -= PAGE_SIZE; 895 #endif 896 897 new_entry = uvm_mapent_alloc(map, flags); 898 if (new_entry == NULL) { 899 vm_map_unlock(map); 900 return (ENOMEM); 901 } 902 new_entry->start = *startp; 903 new_entry->end = new_entry->start + size; 904 new_entry->object.uvm_obj = uobj; 905 new_entry->offset = uoffset; 906 907 if (uobj) 908 new_entry->etype = UVM_ET_OBJ; 909 else 910 new_entry->etype = 0; 911 912 if (flags & UVM_FLAG_COPYONW) { 913 new_entry->etype |= UVM_ET_COPYONWRITE; 914 if ((flags & UVM_FLAG_OVERLAY) == 0) 915 new_entry->etype |= UVM_ET_NEEDSCOPY; 916 } 917 if (flags & UVM_FLAG_HOLE) 918 new_entry->etype |= UVM_ET_HOLE; 919 920 new_entry->protection = prot; 921 new_entry->max_protection = maxprot; 922 new_entry->inheritance = inherit; 923 new_entry->wired_count = 0; 924 new_entry->advice = advice; 925 if (flags & UVM_FLAG_OVERLAY) { 926 /* 927 * to_add: for BSS we overallocate a little since we 928 * are likely to extend 929 */ 930 vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? 931 UVM_AMAP_CHUNK << PAGE_SHIFT : 0; 932 struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK); 933 new_entry->aref.ar_pageoff = 0; 934 new_entry->aref.ar_amap = amap; 935 } else { 936 new_entry->aref.ar_pageoff = 0; 937 new_entry->aref.ar_amap = NULL; 938 } 939 940 uvm_map_entry_link(map, prev_entry, new_entry); 941 942 map->size += size; 943 if (p && uobj == NULL) 944 p->p_vmspace->vm_dused += atop(size); 945 946 947 /* 948 * Update the free space hint 949 */ 950 951 if ((map->first_free == prev_entry) && 952 (prev_entry->end >= new_entry->start)) 953 map->first_free = new_entry; 954 955 #ifdef KVA_GUARDPAGES 956 /* 957 * Create the guard entry. 958 */ 959 if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) { 960 guard_entry = uvm_mapent_alloc(map, flags); 961 if (guard_entry != NULL) { 962 guard_entry->start = new_entry->end; 963 guard_entry->end = guard_entry->start + PAGE_SIZE; 964 guard_entry->object.uvm_obj = uobj; 965 guard_entry->offset = uoffset; 966 guard_entry->etype = MAP_ET_KVAGUARD; 967 guard_entry->protection = prot; 968 guard_entry->max_protection = maxprot; 969 guard_entry->inheritance = inherit; 970 guard_entry->wired_count = 0; 971 guard_entry->advice = advice; 972 guard_entry->aref.ar_pageoff = 0; 973 guard_entry->aref.ar_amap = NULL; 974 uvm_map_entry_link(map, new_entry, guard_entry); 975 map->size += PAGE_SIZE; 976 kva_guardpages++; 977 } 978 } 979 #endif 980 981 uvm_tree_sanity(map, "map leave"); 982 983 vm_map_unlock(map); 984 return (0); 985 } 986 987 /* 988 * uvm_map_lookup_entry: find map entry at or before an address 989 * 990 * => map must at least be read-locked by caller 991 * => entry is returned in "entry" 992 * => return value is true if address is in the returned entry 993 */ 994 995 boolean_t 996 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 997 struct vm_map_entry **entry) 998 { 999 struct vm_map_entry *cur; 1000 struct vm_map_entry *last; 1001 int use_tree = 0; 1002 1003 /* 1004 * start looking either from the head of the 1005 * list, or from the hint. 1006 */ 1007 1008 simple_lock(&map->hint_lock); 1009 cur = map->hint; 1010 simple_unlock(&map->hint_lock); 1011 1012 if (cur == &map->header) 1013 cur = cur->next; 1014 1015 if (address >= cur->start) { 1016 /* 1017 * go from hint to end of list. 1018 * 1019 * but first, make a quick check to see if 1020 * we are already looking at the entry we 1021 * want (which is usually the case). 1022 * note also that we don't need to save the hint 1023 * here... it is the same hint (unless we are 1024 * at the header, in which case the hint didn't 1025 * buy us anything anyway). 1026 */ 1027 last = &map->header; 1028 if ((cur != last) && (cur->end > address)) { 1029 *entry = cur; 1030 return (TRUE); 1031 } 1032 1033 if (map->nentries > 30) 1034 use_tree = 1; 1035 } else { 1036 /* 1037 * go from start to hint, *inclusively* 1038 */ 1039 last = cur->next; 1040 cur = map->header.next; 1041 use_tree = 1; 1042 } 1043 1044 uvm_tree_sanity(map, __func__); 1045 1046 if (use_tree) { 1047 struct vm_map_entry *prev = &map->header; 1048 cur = RB_ROOT(&map->rbhead); 1049 1050 /* 1051 * Simple lookup in the tree. Happens when the hint is 1052 * invalid, or nentries reach a threshold. 1053 */ 1054 while (cur) { 1055 if (address >= cur->start) { 1056 if (address < cur->end) { 1057 *entry = cur; 1058 SAVE_HINT(map, map->hint, cur); 1059 return (TRUE); 1060 } 1061 prev = cur; 1062 cur = RB_RIGHT(cur, rb_entry); 1063 } else 1064 cur = RB_LEFT(cur, rb_entry); 1065 } 1066 *entry = prev; 1067 return (FALSE); 1068 } 1069 1070 /* 1071 * search linearly 1072 */ 1073 1074 while (cur != last) { 1075 if (cur->end > address) { 1076 if (address >= cur->start) { 1077 /* 1078 * save this lookup for future 1079 * hints, and return 1080 */ 1081 1082 *entry = cur; 1083 SAVE_HINT(map, map->hint, cur); 1084 return (TRUE); 1085 } 1086 break; 1087 } 1088 cur = cur->next; 1089 } 1090 1091 *entry = cur->prev; 1092 SAVE_HINT(map, map->hint, *entry); 1093 return (FALSE); 1094 } 1095 1096 /* 1097 * Checks if address pointed to by phint fits into the empty 1098 * space before the vm_map_entry after. Takes alignment and 1099 * offset into consideration. 1100 */ 1101 1102 int 1103 uvm_map_spacefits(struct vm_map *map, vaddr_t *phint, vsize_t length, 1104 struct vm_map_entry *after, voff_t uoffset, vsize_t align) 1105 { 1106 vaddr_t hint = *phint; 1107 vaddr_t end; 1108 1109 #ifdef PMAP_PREFER 1110 /* 1111 * push hint forward as needed to avoid VAC alias problems. 1112 * we only do this if a valid offset is specified. 1113 */ 1114 if (uoffset != UVM_UNKNOWN_OFFSET) 1115 hint = PMAP_PREFER(uoffset, hint); 1116 #endif 1117 if (align != 0) 1118 if ((hint & (align - 1)) != 0) 1119 hint = roundup(hint, align); 1120 *phint = hint; 1121 1122 end = hint + length; 1123 if (end > map->max_offset || end < hint) 1124 return (FALSE); 1125 if (after != NULL && after != &map->header && after->start < end) 1126 return (FALSE); 1127 1128 return (TRUE); 1129 } 1130 1131 /* 1132 * uvm_map_pie: return a random load address for a PIE executable 1133 * properly aligned. 1134 */ 1135 1136 #ifndef VM_PIE_MAX_ADDR 1137 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1138 #endif 1139 1140 #ifndef VM_PIE_MIN_ADDR 1141 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1142 #endif 1143 1144 #ifndef VM_PIE_MIN_ALIGN 1145 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1146 #endif 1147 1148 vaddr_t 1149 uvm_map_pie(vaddr_t align) 1150 { 1151 vaddr_t addr, space, min; 1152 1153 align = MAX(align, VM_PIE_MIN_ALIGN); 1154 1155 /* round up to next alignment */ 1156 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1157 1158 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 1159 return (align); 1160 1161 space = (VM_PIE_MAX_ADDR - min) / align; 1162 space = MIN(space, (u_int32_t)-1); 1163 1164 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 1165 addr += min; 1166 1167 return (addr); 1168 } 1169 1170 /* 1171 * uvm_map_hint: return the beginning of the best area suitable for 1172 * creating a new mapping with "prot" protection. 1173 */ 1174 vaddr_t 1175 uvm_map_hint1(struct proc *p, vm_prot_t prot, int skipheap) 1176 { 1177 vaddr_t addr; 1178 1179 #ifdef __i386__ 1180 /* 1181 * If executable skip first two pages, otherwise start 1182 * after data + heap region. 1183 */ 1184 if ((prot & VM_PROT_EXECUTE) && 1185 ((vaddr_t)p->p_vmspace->vm_daddr >= I386_MAX_EXE_ADDR)) { 1186 addr = (PAGE_SIZE*2) + 1187 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 1188 return (round_page(addr)); 1189 } 1190 #endif 1191 /* start malloc/mmap after the brk */ 1192 addr = (vaddr_t)p->p_vmspace->vm_daddr; 1193 if (skipheap) 1194 addr += BRKSIZ; 1195 #if !defined(__vax__) 1196 addr += arc4random() & (MIN((256 * 1024 * 1024), BRKSIZ) - 1); 1197 #endif 1198 return (round_page(addr)); 1199 } 1200 1201 /* 1202 * uvm_map_findspace: find "length" sized space in "map". 1203 * 1204 * => "hint" is a hint about where we want it, unless FINDSPACE_FIXED is 1205 * set (in which case we insist on using "hint"). 1206 * => "result" is VA returned 1207 * => uobj/uoffset are to be used to handle VAC alignment, if required 1208 * => if `align' is non-zero, we attempt to align to that value. 1209 * => caller must at least have read-locked map 1210 * => returns NULL on failure, or pointer to prev. map entry if success 1211 * => note this is a cross between the old vm_map_findspace and vm_map_find 1212 */ 1213 1214 struct vm_map_entry * 1215 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, 1216 vaddr_t *result, struct uvm_object *uobj, voff_t uoffset, vsize_t align, 1217 int flags) 1218 { 1219 struct vm_map_entry *entry, *next, *tmp; 1220 struct vm_map_entry *child, *prev = NULL; 1221 vaddr_t end, orig_hint; 1222 1223 KASSERT((align & (align - 1)) == 0); 1224 KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); 1225 1226 uvm_tree_sanity(map, "map_findspace entry"); 1227 1228 /* 1229 * remember the original hint. if we are aligning, then we 1230 * may have to try again with no alignment constraint if 1231 * we fail the first time. 1232 */ 1233 1234 orig_hint = hint; 1235 if (hint < map->min_offset) { /* check ranges ... */ 1236 if (flags & UVM_FLAG_FIXED) { 1237 return(NULL); 1238 } 1239 hint = map->min_offset; 1240 } 1241 if (hint > map->max_offset) { 1242 return(NULL); 1243 } 1244 1245 /* 1246 * Look for the first possible address; if there's already 1247 * something at this address, we have to start after it. 1248 */ 1249 1250 if ((flags & UVM_FLAG_FIXED) == 0 && hint == map->min_offset) { 1251 if ((entry = map->first_free) != &map->header) 1252 hint = entry->end; 1253 } else { 1254 if (uvm_map_lookup_entry(map, hint, &tmp)) { 1255 /* "hint" address already in use ... */ 1256 if (flags & UVM_FLAG_FIXED) { 1257 return(NULL); 1258 } 1259 hint = tmp->end; 1260 } 1261 entry = tmp; 1262 } 1263 1264 if (flags & UVM_FLAG_FIXED) { 1265 end = hint + length; 1266 if (end > map->max_offset || end < hint) { 1267 goto error; 1268 } 1269 next = entry->next; 1270 if (next == &map->header || next->start >= end) 1271 goto found; 1272 return(NULL); /* only one shot at it ... */ 1273 } 1274 1275 /* Try to find the space in the red-black tree */ 1276 1277 /* Check slot before any entry */ 1278 if (uvm_map_spacefits(map, &hint, length, entry->next, uoffset, align)) 1279 goto found; 1280 1281 /* If there is not enough space in the whole tree, we fail */ 1282 tmp = RB_ROOT(&map->rbhead); 1283 if (tmp == NULL || tmp->space < length) 1284 goto error; 1285 1286 /* Find an entry close to hint that has enough space */ 1287 for (; tmp;) { 1288 if (tmp->end >= hint && 1289 (prev == NULL || tmp->end < prev->end)) { 1290 if (tmp->ownspace >= length) 1291 prev = tmp; 1292 else if ((child = RB_RIGHT(tmp, rb_entry)) != NULL && 1293 child->space >= length) 1294 prev = tmp; 1295 } 1296 if (tmp->end < hint) 1297 child = RB_RIGHT(tmp, rb_entry); 1298 else if (tmp->end > hint) 1299 child = RB_LEFT(tmp, rb_entry); 1300 else { 1301 if (tmp->ownspace >= length) 1302 break; 1303 child = RB_RIGHT(tmp, rb_entry); 1304 } 1305 if (child == NULL || child->space < length) 1306 break; 1307 tmp = child; 1308 } 1309 1310 if (tmp != NULL && hint < tmp->end + tmp->ownspace) { 1311 /* 1312 * Check if the entry that we found satifies the 1313 * space requirement 1314 */ 1315 if (hint < tmp->end) 1316 hint = tmp->end; 1317 if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, 1318 align)) { 1319 entry = tmp; 1320 goto found; 1321 } else if (tmp->ownspace >= length) 1322 goto listsearch; 1323 } 1324 if (prev == NULL) 1325 goto error; 1326 1327 hint = prev->end; 1328 if (uvm_map_spacefits(map, &hint, length, prev->next, uoffset, 1329 align)) { 1330 entry = prev; 1331 goto found; 1332 } else if (prev->ownspace >= length) 1333 goto listsearch; 1334 1335 tmp = RB_RIGHT(prev, rb_entry); 1336 for (;;) { 1337 KASSERT(tmp && tmp->space >= length); 1338 child = RB_LEFT(tmp, rb_entry); 1339 if (child && child->space >= length) { 1340 tmp = child; 1341 continue; 1342 } 1343 if (tmp->ownspace >= length) 1344 break; 1345 tmp = RB_RIGHT(tmp, rb_entry); 1346 } 1347 1348 hint = tmp->end; 1349 if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, align)) { 1350 entry = tmp; 1351 goto found; 1352 } 1353 1354 /* 1355 * The tree fails to find an entry because of offset or alignment 1356 * restrictions. Search the list instead. 1357 */ 1358 listsearch: 1359 /* 1360 * Look through the rest of the map, trying to fit a new region in 1361 * the gap between existing regions, or after the very last region. 1362 * note: entry->end = base VA of current gap, 1363 * next->start = VA of end of current gap 1364 */ 1365 for (;; hint = (entry = next)->end) { 1366 /* 1367 * Find the end of the proposed new region. Be sure we didn't 1368 * go beyond the end of the map, or wrap around the address; 1369 * if so, we lose. Otherwise, if this is the last entry, or 1370 * if the proposed new region fits before the next entry, we 1371 * win. 1372 */ 1373 1374 #ifdef PMAP_PREFER 1375 /* 1376 * push hint forward as needed to avoid VAC alias problems. 1377 * we only do this if a valid offset is specified. 1378 */ 1379 if (uoffset != UVM_UNKNOWN_OFFSET) 1380 hint = PMAP_PREFER(uoffset, hint); 1381 #endif 1382 if (align != 0) { 1383 if ((hint & (align - 1)) != 0) 1384 hint = roundup(hint, align); 1385 /* 1386 * XXX Should we PMAP_PREFER() here again? 1387 */ 1388 } 1389 end = hint + length; 1390 if (end > map->max_offset || end < hint) { 1391 goto error; 1392 } 1393 next = entry->next; 1394 if (next == &map->header || next->start >= end) 1395 break; 1396 } 1397 found: 1398 SAVE_HINT(map, map->hint, entry); 1399 *result = hint; 1400 return (entry); 1401 1402 error: 1403 if (align != 0) { 1404 return (uvm_map_findspace(map, orig_hint, 1405 length, result, uobj, uoffset, 0, flags)); 1406 } 1407 return (NULL); 1408 } 1409 1410 /* 1411 * U N M A P - m a i n e n t r y p o i n t 1412 */ 1413 1414 /* 1415 * uvm_unmap: remove mappings from a vm_map (from "start" up to "stop") 1416 * 1417 * => caller must check alignment and size 1418 * => map must be unlocked (we will lock it) 1419 */ 1420 void 1421 uvm_unmap_p(vm_map_t map, vaddr_t start, vaddr_t end, struct proc *p) 1422 { 1423 vm_map_entry_t dead_entries; 1424 1425 /* 1426 * work now done by helper functions. wipe the pmap's and then 1427 * detach from the dead entries... 1428 */ 1429 vm_map_lock(map); 1430 uvm_unmap_remove(map, start, end, &dead_entries, p, FALSE); 1431 vm_map_unlock(map); 1432 1433 if (dead_entries != NULL) 1434 uvm_unmap_detach(dead_entries, 0); 1435 1436 } 1437 1438 1439 /* 1440 * U N M A P - m a i n h e l p e r f u n c t i o n s 1441 */ 1442 1443 /* 1444 * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") 1445 * 1446 * => caller must check alignment and size 1447 * => map must be locked by caller 1448 * => we return a list of map entries that we've remove from the map 1449 * in "entry_list" 1450 */ 1451 1452 void 1453 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 1454 struct vm_map_entry **entry_list, struct proc *p, boolean_t remove_holes) 1455 { 1456 struct vm_map_entry *entry, *first_entry, *next; 1457 vaddr_t len; 1458 1459 VM_MAP_RANGE_CHECK(map, start, end); 1460 1461 uvm_tree_sanity(map, "unmap_remove entry"); 1462 1463 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1464 splassert(IPL_NONE); 1465 else 1466 splassert(IPL_VM); 1467 1468 /* 1469 * find first entry 1470 */ 1471 if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) { 1472 /* clip and go... */ 1473 entry = first_entry; 1474 UVM_MAP_CLIP_START(map, entry, start); 1475 /* critical! prevents stale hint */ 1476 SAVE_HINT(map, entry, entry->prev); 1477 1478 } else { 1479 entry = first_entry->next; 1480 } 1481 1482 /* 1483 * Save the free space hint 1484 */ 1485 1486 if (map->first_free->start >= start) 1487 map->first_free = entry->prev; 1488 1489 /* 1490 * note: we now re-use first_entry for a different task. we remove 1491 * a number of map entries from the map and save them in a linked 1492 * list headed by "first_entry". once we remove them from the map 1493 * the caller should unlock the map and drop the references to the 1494 * backing objects [c.f. uvm_unmap_detach]. the object is to 1495 * separate unmapping from reference dropping. why? 1496 * [1] the map has to be locked for unmapping 1497 * [2] the map need not be locked for reference dropping 1498 * [3] dropping references may trigger pager I/O, and if we hit 1499 * a pager that does synchronous I/O we may have to wait for it. 1500 * [4] we would like all waiting for I/O to occur with maps unlocked 1501 * so that we don't block other threads. 1502 */ 1503 first_entry = NULL; 1504 *entry_list = NULL; /* to be safe */ 1505 1506 /* 1507 * break up the area into map entry sized regions and unmap. note 1508 * that all mappings have to be removed before we can even consider 1509 * dropping references to amaps or VM objects (otherwise we could end 1510 * up with a mapping to a page on the free list which would be very bad) 1511 */ 1512 1513 while ((entry != &map->header) && (entry->start < end)) { 1514 1515 UVM_MAP_CLIP_END(map, entry, end); 1516 next = entry->next; 1517 len = entry->end - entry->start; 1518 if (p && entry->object.uvm_obj == NULL) 1519 p->p_vmspace->vm_dused -= atop(len); 1520 1521 /* 1522 * unwire before removing addresses from the pmap; otherwise 1523 * unwiring will put the entries back into the pmap (XXX). 1524 */ 1525 1526 if (VM_MAPENT_ISWIRED(entry)) 1527 uvm_map_entry_unwire(map, entry); 1528 1529 /* 1530 * special case: handle mappings to anonymous kernel objects. 1531 * we want to free these pages right away... 1532 */ 1533 #ifdef KVA_GUARDPAGES 1534 if (map == kernel_map && entry->etype & MAP_ET_KVAGUARD) { 1535 entry->etype &= ~MAP_ET_KVAGUARD; 1536 kva_guardpages--; 1537 } else /* (code continues across line-break) */ 1538 #endif 1539 if (UVM_ET_ISHOLE(entry)) { 1540 if (!remove_holes) { 1541 entry = next; 1542 continue; 1543 } 1544 } else if (map->flags & VM_MAP_INTRSAFE) { 1545 uvm_km_pgremove_intrsafe(entry->start, entry->end); 1546 pmap_kremove(entry->start, len); 1547 } else if (UVM_ET_ISOBJ(entry) && 1548 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 1549 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1550 1551 /* 1552 * note: kernel object mappings are currently used in 1553 * two ways: 1554 * [1] "normal" mappings of pages in the kernel object 1555 * [2] uvm_km_valloc'd allocations in which we 1556 * pmap_enter in some non-kernel-object page 1557 * (e.g. vmapbuf). 1558 * 1559 * for case [1], we need to remove the mapping from 1560 * the pmap and then remove the page from the kernel 1561 * object (because, once pages in a kernel object are 1562 * unmapped they are no longer needed, unlike, say, 1563 * a vnode where you might want the data to persist 1564 * until flushed out of a queue). 1565 * 1566 * for case [2], we need to remove the mapping from 1567 * the pmap. there shouldn't be any pages at the 1568 * specified offset in the kernel object [but it 1569 * doesn't hurt to call uvm_km_pgremove just to be 1570 * safe?] 1571 * 1572 * uvm_km_pgremove currently does the following: 1573 * for pages in the kernel object in range: 1574 * - drops the swap slot 1575 * - uvm_pagefree the page 1576 * 1577 * note there is version of uvm_km_pgremove() that 1578 * is used for "intrsafe" objects. 1579 */ 1580 1581 /* 1582 * remove mappings from pmap and drop the pages 1583 * from the object. offsets are always relative 1584 * to vm_map_min(kernel_map). 1585 */ 1586 pmap_remove(pmap_kernel(), entry->start, entry->end); 1587 uvm_km_pgremove(entry->object.uvm_obj, 1588 entry->start - vm_map_min(kernel_map), 1589 entry->end - vm_map_min(kernel_map)); 1590 1591 /* 1592 * null out kernel_object reference, we've just 1593 * dropped it 1594 */ 1595 entry->etype &= ~UVM_ET_OBJ; 1596 entry->object.uvm_obj = NULL; /* to be safe */ 1597 1598 } else { 1599 /* 1600 * remove mappings the standard way. 1601 */ 1602 pmap_remove(map->pmap, entry->start, entry->end); 1603 } 1604 1605 /* 1606 * remove entry from map and put it on our list of entries 1607 * that we've nuked. then go do next entry. 1608 */ 1609 /* critical! prevents stale hint */ 1610 SAVE_HINT(map, entry, entry->prev); 1611 1612 uvm_map_entry_unlink(map, entry); 1613 map->size -= len; 1614 entry->next = first_entry; 1615 first_entry = entry; 1616 entry = next; /* next entry, please */ 1617 } 1618 #ifdef KVA_GUARDPAGES 1619 /* 1620 * entry points at the map-entry after the last-removed map-entry. 1621 */ 1622 if (map == kernel_map && entry != &map->header && 1623 entry->etype & MAP_ET_KVAGUARD && entry->start == end) { 1624 /* 1625 * Removed range is followed by guard page; 1626 * remove that guard page now (or it will stay forever). 1627 */ 1628 entry->etype &= ~MAP_ET_KVAGUARD; 1629 kva_guardpages--; 1630 1631 uvm_map_entry_unlink(map, entry); 1632 map->size -= len; 1633 entry->next = first_entry; 1634 first_entry = entry; 1635 entry = next; /* next entry, please */ 1636 } 1637 #endif 1638 /* if ((map->flags & VM_MAP_DYING) == 0) { */ 1639 pmap_update(vm_map_pmap(map)); 1640 /* } */ 1641 1642 1643 uvm_tree_sanity(map, "unmap_remove leave"); 1644 1645 /* 1646 * now we've cleaned up the map and are ready for the caller to drop 1647 * references to the mapped objects. 1648 */ 1649 1650 *entry_list = first_entry; 1651 } 1652 1653 /* 1654 * uvm_unmap_detach: drop references in a chain of map entries 1655 * 1656 * => we will free the map entries as we traverse the list. 1657 */ 1658 1659 void 1660 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) 1661 { 1662 struct vm_map_entry *next_entry; 1663 1664 while (first_entry) { 1665 KASSERT(!VM_MAPENT_ISWIRED(first_entry)); 1666 1667 /* 1668 * drop reference to amap, if we've got one 1669 */ 1670 1671 if (first_entry->aref.ar_amap) 1672 uvm_map_unreference_amap(first_entry, flags); 1673 1674 /* 1675 * drop reference to our backing object, if we've got one 1676 */ 1677 1678 if (UVM_ET_ISSUBMAP(first_entry)) { 1679 /* ... unlikely to happen, but play it safe */ 1680 uvm_map_deallocate(first_entry->object.sub_map); 1681 } else { 1682 if (UVM_ET_ISOBJ(first_entry) && 1683 first_entry->object.uvm_obj->pgops->pgo_detach) 1684 first_entry->object.uvm_obj->pgops-> 1685 pgo_detach(first_entry->object.uvm_obj); 1686 } 1687 1688 next_entry = first_entry->next; 1689 uvm_mapent_free(first_entry); 1690 first_entry = next_entry; 1691 } 1692 } 1693 1694 /* 1695 * E X T R A C T I O N F U N C T I O N S 1696 */ 1697 1698 /* 1699 * uvm_map_reserve: reserve space in a vm_map for future use. 1700 * 1701 * => we reserve space in a map by putting a dummy map entry in the 1702 * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) 1703 * => map should be unlocked (we will write lock it) 1704 * => we return true if we were able to reserve space 1705 * => XXXCDC: should be inline? 1706 */ 1707 1708 int 1709 uvm_map_reserve(struct vm_map *map, vsize_t size, vaddr_t offset, 1710 vsize_t align, vaddr_t *raddr) 1711 { 1712 1713 size = round_page(size); 1714 if (*raddr < vm_map_min(map)) 1715 *raddr = vm_map_min(map); /* hint */ 1716 1717 /* 1718 * reserve some virtual space. 1719 */ 1720 1721 if (uvm_map(map, raddr, size, NULL, offset, 0, 1722 UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, 1723 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) { 1724 return (FALSE); 1725 } 1726 1727 return (TRUE); 1728 } 1729 1730 /* 1731 * uvm_map_replace: replace a reserved (blank) area of memory with 1732 * real mappings. 1733 * 1734 * => caller must WRITE-LOCK the map 1735 * => we return TRUE if replacement was a success 1736 * => we expect the newents chain to have nnewents entries on it and 1737 * we expect newents->prev to point to the last entry on the list 1738 * => note newents is allowed to be NULL 1739 */ 1740 1741 int 1742 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, 1743 struct vm_map_entry *newents, int nnewents) 1744 { 1745 struct vm_map_entry *oldent, *last; 1746 1747 uvm_tree_sanity(map, "map_replace entry"); 1748 1749 /* 1750 * first find the blank map entry at the specified address 1751 */ 1752 1753 if (!uvm_map_lookup_entry(map, start, &oldent)) { 1754 return(FALSE); 1755 } 1756 1757 /* 1758 * check to make sure we have a proper blank entry 1759 */ 1760 1761 if (oldent->start != start || oldent->end != end || 1762 oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { 1763 return (FALSE); 1764 } 1765 1766 #ifdef DIAGNOSTIC 1767 /* 1768 * sanity check the newents chain 1769 */ 1770 { 1771 struct vm_map_entry *tmpent = newents; 1772 int nent = 0; 1773 vaddr_t cur = start; 1774 1775 while (tmpent) { 1776 nent++; 1777 if (tmpent->start < cur) 1778 panic("uvm_map_replace1"); 1779 if (tmpent->start > tmpent->end || tmpent->end > end) { 1780 printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n", 1781 tmpent->start, tmpent->end, end); 1782 panic("uvm_map_replace2"); 1783 } 1784 cur = tmpent->end; 1785 if (tmpent->next) { 1786 if (tmpent->next->prev != tmpent) 1787 panic("uvm_map_replace3"); 1788 } else { 1789 if (newents->prev != tmpent) 1790 panic("uvm_map_replace4"); 1791 } 1792 tmpent = tmpent->next; 1793 } 1794 if (nent != nnewents) 1795 panic("uvm_map_replace5"); 1796 } 1797 #endif 1798 1799 /* 1800 * map entry is a valid blank! replace it. (this does all the 1801 * work of map entry link/unlink...). 1802 */ 1803 1804 if (newents) { 1805 last = newents->prev; /* we expect this */ 1806 1807 /* critical: flush stale hints out of map */ 1808 SAVE_HINT(map, map->hint, newents); 1809 if (map->first_free == oldent) 1810 map->first_free = last; 1811 1812 last->next = oldent->next; 1813 last->next->prev = last; 1814 1815 /* Fix RB tree */ 1816 uvm_rb_remove(map, oldent); 1817 1818 newents->prev = oldent->prev; 1819 newents->prev->next = newents; 1820 map->nentries = map->nentries + (nnewents - 1); 1821 1822 /* Fixup the RB tree */ 1823 { 1824 int i; 1825 struct vm_map_entry *tmp; 1826 1827 tmp = newents; 1828 for (i = 0; i < nnewents && tmp; i++) { 1829 uvm_rb_insert(map, tmp); 1830 tmp = tmp->next; 1831 } 1832 } 1833 } else { 1834 1835 /* critical: flush stale hints out of map */ 1836 SAVE_HINT(map, map->hint, oldent->prev); 1837 if (map->first_free == oldent) 1838 map->first_free = oldent->prev; 1839 1840 /* NULL list of new entries: just remove the old one */ 1841 uvm_map_entry_unlink(map, oldent); 1842 } 1843 1844 1845 uvm_tree_sanity(map, "map_replace leave"); 1846 1847 /* 1848 * now we can free the old blank entry, unlock the map and return. 1849 */ 1850 1851 uvm_mapent_free(oldent); 1852 return(TRUE); 1853 } 1854 1855 /* 1856 * uvm_map_extract: extract a mapping from a map and put it somewhere 1857 * (maybe removing the old mapping) 1858 * 1859 * => maps should be unlocked (we will write lock them) 1860 * => returns 0 on success, error code otherwise 1861 * => start must be page aligned 1862 * => len must be page sized 1863 * => flags: 1864 * UVM_EXTRACT_REMOVE: remove mappings from srcmap 1865 * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) 1866 * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs 1867 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 1868 * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< 1869 * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only 1870 * be used from within the kernel in a kernel level map <<< 1871 */ 1872 1873 int 1874 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 1875 struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) 1876 { 1877 vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge, 1878 oldstart; 1879 struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry; 1880 struct vm_map_entry *deadentry, *oldentry; 1881 vsize_t elen; 1882 int nchain, error, copy_ok; 1883 1884 uvm_tree_sanity(srcmap, "map_extract src enter"); 1885 uvm_tree_sanity(dstmap, "map_extract dst enter"); 1886 1887 /* 1888 * step 0: sanity check: start must be on a page boundary, length 1889 * must be page sized. can't ask for CONTIG/QREF if you asked for 1890 * REMOVE. 1891 */ 1892 1893 KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0); 1894 KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || 1895 (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); 1896 1897 /* 1898 * step 1: reserve space in the target map for the extracted area 1899 */ 1900 1901 dstaddr = vm_map_min(dstmap); 1902 if (uvm_map_reserve(dstmap, len, start, 0, &dstaddr) == FALSE) 1903 return(ENOMEM); 1904 *dstaddrp = dstaddr; /* pass address back to caller */ 1905 1906 /* 1907 * step 2: setup for the extraction process loop by init'ing the 1908 * map entry chain, locking src map, and looking up the first useful 1909 * entry in the map. 1910 */ 1911 1912 end = start + len; 1913 newend = dstaddr + len; 1914 chain = endchain = NULL; 1915 nchain = 0; 1916 vm_map_lock(srcmap); 1917 1918 if (uvm_map_lookup_entry(srcmap, start, &entry)) { 1919 1920 /* "start" is within an entry */ 1921 if (flags & UVM_EXTRACT_QREF) { 1922 1923 /* 1924 * for quick references we don't clip the entry, so 1925 * the entry may map space "before" the starting 1926 * virtual address... this is the "fudge" factor 1927 * (which can be non-zero only the first time 1928 * through the "while" loop in step 3). 1929 */ 1930 1931 fudge = start - entry->start; 1932 } else { 1933 1934 /* 1935 * normal reference: we clip the map to fit (thus 1936 * fudge is zero) 1937 */ 1938 1939 UVM_MAP_CLIP_START(srcmap, entry, start); 1940 SAVE_HINT(srcmap, srcmap->hint, entry->prev); 1941 fudge = 0; 1942 } 1943 } else { 1944 1945 /* "start" is not within an entry ... skip to next entry */ 1946 if (flags & UVM_EXTRACT_CONTIG) { 1947 error = EINVAL; 1948 goto bad; /* definite hole here ... */ 1949 } 1950 1951 entry = entry->next; 1952 fudge = 0; 1953 } 1954 1955 /* save values from srcmap for step 6 */ 1956 orig_entry = entry; 1957 orig_fudge = fudge; 1958 1959 /* 1960 * step 3: now start looping through the map entries, extracting 1961 * as we go. 1962 */ 1963 1964 while (entry->start < end && entry != &srcmap->header) { 1965 1966 /* if we are not doing a quick reference, clip it */ 1967 if ((flags & UVM_EXTRACT_QREF) == 0) 1968 UVM_MAP_CLIP_END(srcmap, entry, end); 1969 1970 /* clear needs_copy (allow chunking) */ 1971 if (UVM_ET_ISNEEDSCOPY(entry)) { 1972 if (fudge) 1973 oldstart = entry->start; 1974 else 1975 oldstart = 0; /* XXX: gcc */ 1976 amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); 1977 if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ 1978 error = ENOMEM; 1979 goto bad; 1980 } 1981 1982 /* amap_copy could clip (during chunk)! update fudge */ 1983 if (fudge) { 1984 fudge = fudge - (entry->start - oldstart); 1985 orig_fudge = fudge; 1986 } 1987 } 1988 1989 /* calculate the offset of this from "start" */ 1990 oldoffset = (entry->start + fudge) - start; 1991 1992 /* allocate a new map entry */ 1993 newentry = uvm_mapent_alloc(dstmap, flags); 1994 if (newentry == NULL) { 1995 error = ENOMEM; 1996 goto bad; 1997 } 1998 1999 /* set up new map entry */ 2000 newentry->next = NULL; 2001 newentry->prev = endchain; 2002 newentry->start = dstaddr + oldoffset; 2003 newentry->end = 2004 newentry->start + (entry->end - (entry->start + fudge)); 2005 if (newentry->end > newend || newentry->end < newentry->start) 2006 newentry->end = newend; 2007 newentry->object.uvm_obj = entry->object.uvm_obj; 2008 if (newentry->object.uvm_obj) { 2009 if (newentry->object.uvm_obj->pgops->pgo_reference) 2010 newentry->object.uvm_obj->pgops-> 2011 pgo_reference(newentry->object.uvm_obj); 2012 newentry->offset = entry->offset + fudge; 2013 } else { 2014 newentry->offset = 0; 2015 } 2016 newentry->etype = entry->etype; 2017 newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? 2018 entry->max_protection : entry->protection; 2019 newentry->max_protection = entry->max_protection; 2020 newentry->inheritance = entry->inheritance; 2021 newentry->wired_count = 0; 2022 newentry->aref.ar_amap = entry->aref.ar_amap; 2023 if (newentry->aref.ar_amap) { 2024 newentry->aref.ar_pageoff = 2025 entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); 2026 uvm_map_reference_amap(newentry, AMAP_SHARED | 2027 ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); 2028 } else { 2029 newentry->aref.ar_pageoff = 0; 2030 } 2031 newentry->advice = entry->advice; 2032 2033 /* now link it on the chain */ 2034 nchain++; 2035 if (endchain == NULL) { 2036 chain = endchain = newentry; 2037 } else { 2038 endchain->next = newentry; 2039 endchain = newentry; 2040 } 2041 2042 /* end of 'while' loop! */ 2043 if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && 2044 (entry->next == &srcmap->header || 2045 entry->next->start != entry->end)) { 2046 error = EINVAL; 2047 goto bad; 2048 } 2049 entry = entry->next; 2050 fudge = 0; 2051 } 2052 2053 /* 2054 * step 4: close off chain (in format expected by uvm_map_replace) 2055 */ 2056 2057 if (chain) 2058 chain->prev = endchain; 2059 2060 /* 2061 * step 5: attempt to lock the dest map so we can pmap_copy. 2062 * note usage of copy_ok: 2063 * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) 2064 * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 2065 */ 2066 2067 if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) { 2068 copy_ok = 1; 2069 if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2070 nchain)) { 2071 if (srcmap != dstmap) 2072 vm_map_unlock(dstmap); 2073 error = EIO; 2074 goto bad; 2075 } 2076 } else { 2077 copy_ok = 0; 2078 /* replace defered until step 7 */ 2079 } 2080 2081 /* 2082 * step 6: traverse the srcmap a second time to do the following: 2083 * - if we got a lock on the dstmap do pmap_copy 2084 * - if UVM_EXTRACT_REMOVE remove the entries 2085 * we make use of orig_entry and orig_fudge (saved in step 2) 2086 */ 2087 2088 if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { 2089 2090 /* purge possible stale hints from srcmap */ 2091 if (flags & UVM_EXTRACT_REMOVE) { 2092 SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); 2093 if (srcmap->first_free->start >= start) 2094 srcmap->first_free = orig_entry->prev; 2095 } 2096 2097 entry = orig_entry; 2098 fudge = orig_fudge; 2099 deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ 2100 2101 while (entry->start < end && entry != &srcmap->header) { 2102 if (copy_ok) { 2103 oldoffset = (entry->start + fudge) - start; 2104 elen = MIN(end, entry->end) - 2105 (entry->start + fudge); 2106 pmap_copy(dstmap->pmap, srcmap->pmap, 2107 dstaddr + oldoffset, elen, 2108 entry->start + fudge); 2109 } 2110 2111 /* we advance "entry" in the following if statement */ 2112 if (flags & UVM_EXTRACT_REMOVE) { 2113 pmap_remove(srcmap->pmap, entry->start, 2114 entry->end); 2115 oldentry = entry; /* save entry */ 2116 entry = entry->next; /* advance */ 2117 uvm_map_entry_unlink(srcmap, oldentry); 2118 /* add to dead list */ 2119 oldentry->next = deadentry; 2120 deadentry = oldentry; 2121 } else { 2122 entry = entry->next; /* advance */ 2123 } 2124 2125 /* end of 'while' loop */ 2126 fudge = 0; 2127 } 2128 pmap_update(srcmap->pmap); 2129 2130 /* 2131 * unlock dstmap. we will dispose of deadentry in 2132 * step 7 if needed 2133 */ 2134 2135 if (copy_ok && srcmap != dstmap) 2136 vm_map_unlock(dstmap); 2137 2138 } 2139 else 2140 deadentry = NULL; /* XXX: gcc */ 2141 2142 /* 2143 * step 7: we are done with the source map, unlock. if copy_ok 2144 * is 0 then we have not replaced the dummy mapping in dstmap yet 2145 * and we need to do so now. 2146 */ 2147 2148 vm_map_unlock(srcmap); 2149 if ((flags & UVM_EXTRACT_REMOVE) && deadentry) 2150 uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ 2151 2152 /* now do the replacement if we didn't do it in step 5 */ 2153 if (copy_ok == 0) { 2154 vm_map_lock(dstmap); 2155 error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2156 nchain); 2157 vm_map_unlock(dstmap); 2158 2159 if (error == FALSE) { 2160 error = EIO; 2161 goto bad2; 2162 } 2163 } 2164 2165 uvm_tree_sanity(srcmap, "map_extract src leave"); 2166 uvm_tree_sanity(dstmap, "map_extract dst leave"); 2167 2168 return(0); 2169 2170 /* 2171 * bad: failure recovery 2172 */ 2173 bad: 2174 vm_map_unlock(srcmap); 2175 bad2: /* src already unlocked */ 2176 if (chain) 2177 uvm_unmap_detach(chain, 2178 (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); 2179 2180 uvm_tree_sanity(srcmap, "map_extract src err leave"); 2181 uvm_tree_sanity(dstmap, "map_extract dst err leave"); 2182 2183 uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ 2184 return(error); 2185 } 2186 2187 /* end of extraction functions */ 2188 2189 /* 2190 * uvm_map_submap: punch down part of a map into a submap 2191 * 2192 * => only the kernel_map is allowed to be submapped 2193 * => the purpose of submapping is to break up the locking granularity 2194 * of a larger map 2195 * => the range specified must have been mapped previously with a uvm_map() 2196 * call [with uobj==NULL] to create a blank map entry in the main map. 2197 * [And it had better still be blank!] 2198 * => maps which contain submaps should never be copied or forked. 2199 * => to remove a submap, use uvm_unmap() on the main map 2200 * and then uvm_map_deallocate() the submap. 2201 * => main map must be unlocked. 2202 * => submap must have been init'd and have a zero reference count. 2203 * [need not be locked as we don't actually reference it] 2204 */ 2205 2206 int 2207 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 2208 struct vm_map *submap) 2209 { 2210 struct vm_map_entry *entry; 2211 int result; 2212 2213 vm_map_lock(map); 2214 2215 VM_MAP_RANGE_CHECK(map, start, end); 2216 2217 if (uvm_map_lookup_entry(map, start, &entry)) { 2218 UVM_MAP_CLIP_START(map, entry, start); 2219 UVM_MAP_CLIP_END(map, entry, end); /* to be safe */ 2220 } else { 2221 entry = NULL; 2222 } 2223 2224 if (entry != NULL && 2225 entry->start == start && entry->end == end && 2226 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 2227 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 2228 entry->etype |= UVM_ET_SUBMAP; 2229 entry->object.sub_map = submap; 2230 entry->offset = 0; 2231 uvm_map_reference(submap); 2232 result = 0; 2233 } else { 2234 result = EINVAL; 2235 } 2236 vm_map_unlock(map); 2237 return(result); 2238 } 2239 2240 2241 /* 2242 * uvm_map_protect: change map protection 2243 * 2244 * => set_max means set max_protection. 2245 * => map must be unlocked. 2246 */ 2247 2248 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 2249 ~VM_PROT_WRITE : VM_PROT_ALL) 2250 #define max(a,b) ((a) > (b) ? (a) : (b)) 2251 2252 int 2253 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 2254 vm_prot_t new_prot, boolean_t set_max) 2255 { 2256 struct vm_map_entry *current, *entry; 2257 int error = 0; 2258 2259 vm_map_lock(map); 2260 2261 VM_MAP_RANGE_CHECK(map, start, end); 2262 2263 if (uvm_map_lookup_entry(map, start, &entry)) { 2264 UVM_MAP_CLIP_START(map, entry, start); 2265 } else { 2266 entry = entry->next; 2267 } 2268 2269 /* 2270 * make a first pass to check for protection violations. 2271 */ 2272 2273 current = entry; 2274 while ((current != &map->header) && (current->start < end)) { 2275 if (UVM_ET_ISSUBMAP(current)) { 2276 error = EINVAL; 2277 goto out; 2278 } 2279 if ((new_prot & current->max_protection) != new_prot) { 2280 error = EACCES; 2281 goto out; 2282 } 2283 current = current->next; 2284 } 2285 2286 /* go back and fix up protections (no need to clip this time). */ 2287 2288 current = entry; 2289 2290 while ((current != &map->header) && (current->start < end)) { 2291 vm_prot_t old_prot; 2292 2293 UVM_MAP_CLIP_END(map, current, end); 2294 2295 old_prot = current->protection; 2296 if (set_max) 2297 current->protection = 2298 (current->max_protection = new_prot) & old_prot; 2299 else 2300 current->protection = new_prot; 2301 2302 /* 2303 * update physical map if necessary. worry about copy-on-write 2304 * here -- CHECK THIS XXX 2305 */ 2306 2307 if (current->protection != old_prot) { 2308 /* update pmap! */ 2309 if ((current->protection & MASK(entry)) == PROT_NONE && 2310 VM_MAPENT_ISWIRED(entry)) 2311 current->wired_count--; 2312 pmap_protect(map->pmap, current->start, current->end, 2313 current->protection & MASK(entry)); 2314 } 2315 2316 /* 2317 * If the map is configured to lock any future mappings, 2318 * wire this entry now if the old protection was VM_PROT_NONE 2319 * and the new protection is not VM_PROT_NONE. 2320 */ 2321 2322 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 2323 VM_MAPENT_ISWIRED(entry) == 0 && 2324 old_prot == VM_PROT_NONE && 2325 new_prot != VM_PROT_NONE) { 2326 if (uvm_map_pageable(map, entry->start, entry->end, 2327 FALSE, UVM_LK_ENTER|UVM_LK_EXIT) != 0) { 2328 /* 2329 * If locking the entry fails, remember the 2330 * error if it's the first one. Note we 2331 * still continue setting the protection in 2332 * the map, but will return the resource 2333 * shortage condition regardless. 2334 * 2335 * XXX Ignore what the actual error is, 2336 * XXX just call it a resource shortage 2337 * XXX so that it doesn't get confused 2338 * XXX what uvm_map_protect() itself would 2339 * XXX normally return. 2340 */ 2341 error = ENOMEM; 2342 } 2343 } 2344 2345 current = current->next; 2346 } 2347 pmap_update(map->pmap); 2348 2349 out: 2350 vm_map_unlock(map); 2351 return (error); 2352 } 2353 2354 #undef max 2355 #undef MASK 2356 2357 /* 2358 * uvm_map_inherit: set inheritance code for range of addrs in map. 2359 * 2360 * => map must be unlocked 2361 * => note that the inherit code is used during a "fork". see fork 2362 * code for details. 2363 */ 2364 2365 int 2366 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 2367 vm_inherit_t new_inheritance) 2368 { 2369 struct vm_map_entry *entry; 2370 2371 switch (new_inheritance) { 2372 case MAP_INHERIT_NONE: 2373 case MAP_INHERIT_COPY: 2374 case MAP_INHERIT_SHARE: 2375 break; 2376 default: 2377 return (EINVAL); 2378 } 2379 2380 vm_map_lock(map); 2381 2382 VM_MAP_RANGE_CHECK(map, start, end); 2383 2384 if (uvm_map_lookup_entry(map, start, &entry)) { 2385 UVM_MAP_CLIP_START(map, entry, start); 2386 } else { 2387 entry = entry->next; 2388 } 2389 2390 while ((entry != &map->header) && (entry->start < end)) { 2391 UVM_MAP_CLIP_END(map, entry, end); 2392 entry->inheritance = new_inheritance; 2393 entry = entry->next; 2394 } 2395 2396 vm_map_unlock(map); 2397 return (0); 2398 } 2399 2400 /* 2401 * uvm_map_advice: set advice code for range of addrs in map. 2402 * 2403 * => map must be unlocked 2404 */ 2405 2406 int 2407 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 2408 { 2409 struct vm_map_entry *entry; 2410 2411 switch (new_advice) { 2412 case MADV_NORMAL: 2413 case MADV_RANDOM: 2414 case MADV_SEQUENTIAL: 2415 /* nothing special here */ 2416 break; 2417 2418 default: 2419 return (EINVAL); 2420 } 2421 vm_map_lock(map); 2422 VM_MAP_RANGE_CHECK(map, start, end); 2423 if (uvm_map_lookup_entry(map, start, &entry)) { 2424 UVM_MAP_CLIP_START(map, entry, start); 2425 } else { 2426 entry = entry->next; 2427 } 2428 2429 /* 2430 * XXXJRT: disallow holes? 2431 */ 2432 2433 while ((entry != &map->header) && (entry->start < end)) { 2434 UVM_MAP_CLIP_END(map, entry, end); 2435 2436 entry->advice = new_advice; 2437 entry = entry->next; 2438 } 2439 2440 vm_map_unlock(map); 2441 return (0); 2442 } 2443 2444 /* 2445 * uvm_map_pageable: sets the pageability of a range in a map. 2446 * 2447 * => wires map entries. should not be used for transient page locking. 2448 * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). 2449 * => regions sepcified as not pageable require lock-down (wired) memory 2450 * and page tables. 2451 * => map must never be read-locked 2452 * => if islocked is TRUE, map is already write-locked 2453 * => we always unlock the map, since we must downgrade to a read-lock 2454 * to call uvm_fault_wire() 2455 * => XXXCDC: check this and try and clean it up. 2456 */ 2457 2458 int 2459 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2460 boolean_t new_pageable, int lockflags) 2461 { 2462 struct vm_map_entry *entry, *start_entry, *failed_entry; 2463 int rv; 2464 #ifdef DIAGNOSTIC 2465 u_int timestamp_save; 2466 #endif 2467 KASSERT(map->flags & VM_MAP_PAGEABLE); 2468 2469 if ((lockflags & UVM_LK_ENTER) == 0) 2470 vm_map_lock(map); 2471 2472 VM_MAP_RANGE_CHECK(map, start, end); 2473 2474 /* 2475 * only one pageability change may take place at one time, since 2476 * uvm_fault_wire assumes it will be called only once for each 2477 * wiring/unwiring. therefore, we have to make sure we're actually 2478 * changing the pageability for the entire region. we do so before 2479 * making any changes. 2480 */ 2481 2482 if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) { 2483 if ((lockflags & UVM_LK_EXIT) == 0) 2484 vm_map_unlock(map); 2485 2486 return (EFAULT); 2487 } 2488 entry = start_entry; 2489 2490 /* 2491 * handle wiring and unwiring separately. 2492 */ 2493 2494 if (new_pageable) { /* unwire */ 2495 UVM_MAP_CLIP_START(map, entry, start); 2496 2497 /* 2498 * unwiring. first ensure that the range to be unwired is 2499 * really wired down and that there are no holes. 2500 */ 2501 2502 while ((entry != &map->header) && (entry->start < end)) { 2503 if (entry->wired_count == 0 || 2504 (entry->end < end && 2505 (entry->next == &map->header || 2506 entry->next->start > entry->end))) { 2507 if ((lockflags & UVM_LK_EXIT) == 0) 2508 vm_map_unlock(map); 2509 return (EINVAL); 2510 } 2511 entry = entry->next; 2512 } 2513 2514 /* 2515 * POSIX 1003.1b - a single munlock call unlocks a region, 2516 * regardless of the number of mlock calls made on that 2517 * region. 2518 */ 2519 2520 entry = start_entry; 2521 while ((entry != &map->header) && (entry->start < end)) { 2522 UVM_MAP_CLIP_END(map, entry, end); 2523 if (VM_MAPENT_ISWIRED(entry)) 2524 uvm_map_entry_unwire(map, entry); 2525 entry = entry->next; 2526 } 2527 if ((lockflags & UVM_LK_EXIT) == 0) 2528 vm_map_unlock(map); 2529 return (0); 2530 } 2531 2532 /* 2533 * wire case: in two passes [XXXCDC: ugly block of code here] 2534 * 2535 * 1: holding the write lock, we create any anonymous maps that need 2536 * to be created. then we clip each map entry to the region to 2537 * be wired and increment its wiring count. 2538 * 2539 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2540 * in the pages for any newly wired area (wired_count == 1). 2541 * 2542 * downgrading to a read lock for uvm_fault_wire avoids a possible 2543 * deadlock with another thread that may have faulted on one of 2544 * the pages to be wired (it would mark the page busy, blocking 2545 * us, then in turn block on the map lock that we hold). because 2546 * of problems in the recursive lock package, we cannot upgrade 2547 * to a write lock in vm_map_lookup. thus, any actions that 2548 * require the write lock must be done beforehand. because we 2549 * keep the read lock on the map, the copy-on-write status of the 2550 * entries we modify here cannot change. 2551 */ 2552 2553 while ((entry != &map->header) && (entry->start < end)) { 2554 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 2555 2556 /* 2557 * perform actions of vm_map_lookup that need the 2558 * write lock on the map: create an anonymous map 2559 * for a copy-on-write region, or an anonymous map 2560 * for a zero-fill region. (XXXCDC: submap case 2561 * ok?) 2562 */ 2563 2564 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 2565 if (UVM_ET_ISNEEDSCOPY(entry) && 2566 ((entry->protection & VM_PROT_WRITE) || 2567 (entry->object.uvm_obj == NULL))) { 2568 amap_copy(map, entry, M_WAITOK, TRUE, 2569 start, end); 2570 /* XXXCDC: wait OK? */ 2571 } 2572 } 2573 } 2574 UVM_MAP_CLIP_START(map, entry, start); 2575 UVM_MAP_CLIP_END(map, entry, end); 2576 entry->wired_count++; 2577 2578 /* 2579 * Check for holes 2580 */ 2581 2582 if (entry->protection == VM_PROT_NONE || 2583 (entry->end < end && 2584 (entry->next == &map->header || 2585 entry->next->start > entry->end))) { 2586 2587 /* 2588 * found one. amap creation actions do not need to 2589 * be undone, but the wired counts need to be restored. 2590 */ 2591 2592 while (entry != &map->header && entry->end > start) { 2593 entry->wired_count--; 2594 entry = entry->prev; 2595 } 2596 if ((lockflags & UVM_LK_EXIT) == 0) 2597 vm_map_unlock(map); 2598 return (EINVAL); 2599 } 2600 entry = entry->next; 2601 } 2602 2603 /* 2604 * Pass 2. 2605 */ 2606 2607 #ifdef DIAGNOSTIC 2608 timestamp_save = map->timestamp; 2609 #endif 2610 vm_map_busy(map); 2611 vm_map_downgrade(map); 2612 2613 rv = 0; 2614 entry = start_entry; 2615 while (entry != &map->header && entry->start < end) { 2616 if (entry->wired_count == 1) { 2617 rv = uvm_fault_wire(map, entry->start, entry->end, 2618 entry->protection); 2619 if (rv) { 2620 /* 2621 * wiring failed. break out of the loop. 2622 * we'll clean up the map below, once we 2623 * have a write lock again. 2624 */ 2625 break; 2626 } 2627 } 2628 entry = entry->next; 2629 } 2630 2631 if (rv) { /* failed? */ 2632 2633 /* 2634 * Get back to an exclusive (write) lock. 2635 */ 2636 2637 vm_map_upgrade(map); 2638 vm_map_unbusy(map); 2639 2640 #ifdef DIAGNOSTIC 2641 if (timestamp_save != map->timestamp) 2642 panic("uvm_map_pageable: stale map"); 2643 #endif 2644 2645 /* 2646 * first drop the wiring count on all the entries 2647 * which haven't actually been wired yet. 2648 */ 2649 2650 failed_entry = entry; 2651 while (entry != &map->header && entry->start < end) { 2652 entry->wired_count--; 2653 entry = entry->next; 2654 } 2655 2656 /* 2657 * now, unwire all the entries that were successfully 2658 * wired above. 2659 */ 2660 2661 entry = start_entry; 2662 while (entry != failed_entry) { 2663 entry->wired_count--; 2664 if (VM_MAPENT_ISWIRED(entry) == 0) 2665 uvm_map_entry_unwire(map, entry); 2666 entry = entry->next; 2667 } 2668 if ((lockflags & UVM_LK_EXIT) == 0) 2669 vm_map_unlock(map); 2670 return(rv); 2671 } 2672 2673 /* We are holding a read lock here. */ 2674 if ((lockflags & UVM_LK_EXIT) == 0) { 2675 vm_map_unbusy(map); 2676 vm_map_unlock_read(map); 2677 } else { 2678 2679 /* 2680 * Get back to an exclusive (write) lock. 2681 */ 2682 2683 vm_map_upgrade(map); 2684 vm_map_unbusy(map); 2685 } 2686 2687 return (0); 2688 } 2689 2690 /* 2691 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2692 * all mapped regions. 2693 * 2694 * => map must not be locked. 2695 * => if no flags are specified, all regions are unwired. 2696 * => XXXJRT: has some of the same problems as uvm_map_pageable() above. 2697 */ 2698 2699 int 2700 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2701 { 2702 struct vm_map_entry *entry, *failed_entry; 2703 vsize_t size; 2704 int error; 2705 #ifdef DIAGNOSTIC 2706 u_int timestamp_save; 2707 #endif 2708 2709 KASSERT(map->flags & VM_MAP_PAGEABLE); 2710 2711 vm_map_lock(map); 2712 2713 /* 2714 * handle wiring and unwiring separately. 2715 */ 2716 2717 if (flags == 0) { /* unwire */ 2718 /* 2719 * POSIX 1003.1b -- munlockall unlocks all regions, 2720 * regardless of how many times mlockall has been called. 2721 */ 2722 for (entry = map->header.next; entry != &map->header; 2723 entry = entry->next) { 2724 if (VM_MAPENT_ISWIRED(entry)) 2725 uvm_map_entry_unwire(map, entry); 2726 } 2727 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2728 vm_map_unlock(map); 2729 return (0); 2730 2731 /* 2732 * end of unwire case! 2733 */ 2734 } 2735 2736 if (flags & MCL_FUTURE) { 2737 /* 2738 * must wire all future mappings; remember this. 2739 */ 2740 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2741 } 2742 2743 if ((flags & MCL_CURRENT) == 0) { 2744 /* 2745 * no more work to do! 2746 */ 2747 vm_map_unlock(map); 2748 return (0); 2749 } 2750 2751 /* 2752 * wire case: in three passes [XXXCDC: ugly block of code here] 2753 * 2754 * 1: holding the write lock, count all pages mapped by non-wired 2755 * entries. if this would cause us to go over our limit, we fail. 2756 * 2757 * 2: still holding the write lock, we create any anonymous maps that 2758 * need to be created. then we increment its wiring count. 2759 * 2760 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault 2761 * in the pages for any newly wired area (wired_count == 1). 2762 * 2763 * downgrading to a read lock for uvm_fault_wire avoids a possible 2764 * deadlock with another thread that may have faulted on one of 2765 * the pages to be wired (it would mark the page busy, blocking 2766 * us, then in turn block on the map lock that we hold). because 2767 * of problems in the recursive lock package, we cannot upgrade 2768 * to a write lock in vm_map_lookup. thus, any actions that 2769 * require the write lock must be done beforehand. because we 2770 * keep the read lock on the map, the copy-on-write status of the 2771 * entries we modify here cannot change. 2772 */ 2773 2774 for (size = 0, entry = map->header.next; entry != &map->header; 2775 entry = entry->next) { 2776 if (entry->protection != VM_PROT_NONE && 2777 VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 2778 size += entry->end - entry->start; 2779 } 2780 } 2781 2782 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2783 vm_map_unlock(map); 2784 return (ENOMEM); /* XXX overloaded */ 2785 } 2786 2787 /* XXX non-pmap_wired_count case must be handled by caller */ 2788 #ifdef pmap_wired_count 2789 if (limit != 0 && 2790 (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { 2791 vm_map_unlock(map); 2792 return (ENOMEM); /* XXX overloaded */ 2793 } 2794 #endif 2795 2796 /* 2797 * Pass 2. 2798 */ 2799 2800 for (entry = map->header.next; entry != &map->header; 2801 entry = entry->next) { 2802 if (entry->protection == VM_PROT_NONE) 2803 continue; 2804 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 2805 /* 2806 * perform actions of vm_map_lookup that need the 2807 * write lock on the map: create an anonymous map 2808 * for a copy-on-write region, or an anonymous map 2809 * for a zero-fill region. (XXXCDC: submap case 2810 * ok?) 2811 */ 2812 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 2813 if (UVM_ET_ISNEEDSCOPY(entry) && 2814 ((entry->protection & VM_PROT_WRITE) || 2815 (entry->object.uvm_obj == NULL))) { 2816 amap_copy(map, entry, M_WAITOK, TRUE, 2817 entry->start, entry->end); 2818 /* XXXCDC: wait OK? */ 2819 } 2820 } 2821 } 2822 entry->wired_count++; 2823 } 2824 2825 /* 2826 * Pass 3. 2827 */ 2828 2829 #ifdef DIAGNOSTIC 2830 timestamp_save = map->timestamp; 2831 #endif 2832 vm_map_busy(map); 2833 vm_map_downgrade(map); 2834 2835 for (error = 0, entry = map->header.next; 2836 entry != &map->header && error == 0; 2837 entry = entry->next) { 2838 if (entry->wired_count == 1) { 2839 error = uvm_fault_wire(map, entry->start, entry->end, 2840 entry->protection); 2841 } 2842 } 2843 2844 if (error) { /* failed? */ 2845 /* 2846 * Get back an exclusive (write) lock. 2847 */ 2848 vm_map_upgrade(map); 2849 vm_map_unbusy(map); 2850 2851 #ifdef DIAGNOSTIC 2852 if (timestamp_save != map->timestamp) 2853 panic("uvm_map_pageable_all: stale map"); 2854 #endif 2855 2856 /* 2857 * first drop the wiring count on all the entries 2858 * which haven't actually been wired yet. 2859 * 2860 * Skip VM_PROT_NONE entries like we did above. 2861 */ 2862 failed_entry = entry; 2863 for (/* nothing */; entry != &map->header; 2864 entry = entry->next) { 2865 if (entry->protection == VM_PROT_NONE) 2866 continue; 2867 entry->wired_count--; 2868 } 2869 2870 /* 2871 * now, unwire all the entries that were successfully 2872 * wired above. 2873 * 2874 * Skip VM_PROT_NONE entries like we did above. 2875 */ 2876 for (entry = map->header.next; entry != failed_entry; 2877 entry = entry->next) { 2878 if (entry->protection == VM_PROT_NONE) 2879 continue; 2880 entry->wired_count--; 2881 if (VM_MAPENT_ISWIRED(entry)) 2882 uvm_map_entry_unwire(map, entry); 2883 } 2884 vm_map_unlock(map); 2885 return (error); 2886 } 2887 2888 /* We are holding a read lock here. */ 2889 vm_map_unbusy(map); 2890 vm_map_unlock_read(map); 2891 2892 return (0); 2893 } 2894 2895 /* 2896 * uvm_map_clean: clean out a map range 2897 * 2898 * => valid flags: 2899 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 2900 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 2901 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 2902 * if (flags & PGO_FREE): any cached pages are freed after clean 2903 * => returns an error if any part of the specified range isn't mapped 2904 * => never a need to flush amap layer since the anonymous memory has 2905 * no permanent home, but may deactivate pages there 2906 * => called from sys_msync() and sys_madvise() 2907 * => caller must not write-lock map (read OK). 2908 * => we may sleep while cleaning if SYNCIO [with map read-locked] 2909 */ 2910 2911 int amap_clean_works = 1; /* XXX for now, just in case... */ 2912 2913 int 2914 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 2915 { 2916 struct vm_map_entry *current, *entry; 2917 struct uvm_object *uobj; 2918 struct vm_amap *amap; 2919 struct vm_anon *anon; 2920 struct vm_page *pg; 2921 vaddr_t offset; 2922 vsize_t size; 2923 int rv, error, refs; 2924 2925 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 2926 (PGO_FREE|PGO_DEACTIVATE)); 2927 2928 vm_map_lock_read(map); 2929 VM_MAP_RANGE_CHECK(map, start, end); 2930 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { 2931 vm_map_unlock_read(map); 2932 return (EFAULT); 2933 } 2934 2935 /* 2936 * Make a first pass to check for holes. 2937 */ 2938 2939 for (current = entry; current->start < end; current = current->next) { 2940 if (UVM_ET_ISSUBMAP(current)) { 2941 vm_map_unlock_read(map); 2942 return (EINVAL); 2943 } 2944 if (end > current->end && (current->next == &map->header || 2945 current->end != current->next->start)) { 2946 vm_map_unlock_read(map); 2947 return (EFAULT); 2948 } 2949 } 2950 2951 error = 0; 2952 2953 for (current = entry; current->start < end; current = current->next) { 2954 amap = current->aref.ar_amap; /* top layer */ 2955 uobj = current->object.uvm_obj; /* bottom layer */ 2956 KASSERT(start >= current->start); 2957 2958 /* 2959 * No amap cleaning necessary if: 2960 * 2961 * (1) There's no amap. 2962 * 2963 * (2) We're not deactivating or freeing pages. 2964 */ 2965 2966 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 2967 goto flush_object; 2968 2969 /* XXX for now, just in case... */ 2970 if (amap_clean_works == 0) 2971 goto flush_object; 2972 2973 offset = start - current->start; 2974 size = MIN(end, current->end) - start; 2975 for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { 2976 anon = amap_lookup(¤t->aref, offset); 2977 if (anon == NULL) 2978 continue; 2979 2980 simple_lock(&anon->an_lock); 2981 2982 pg = anon->an_page; 2983 if (pg == NULL) { 2984 simple_unlock(&anon->an_lock); 2985 continue; 2986 } 2987 2988 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 2989 2990 /* 2991 * XXX In these first 3 cases, we always just 2992 * XXX deactivate the page. We may want to 2993 * XXX handle the different cases more 2994 * XXX specifically, in the future. 2995 */ 2996 2997 case PGO_CLEANIT|PGO_FREE: 2998 case PGO_CLEANIT|PGO_DEACTIVATE: 2999 case PGO_DEACTIVATE: 3000 deactivate_it: 3001 /* skip the page if it's loaned or wired */ 3002 if (pg->loan_count != 0 || 3003 pg->wire_count != 0) { 3004 simple_unlock(&anon->an_lock); 3005 continue; 3006 } 3007 3008 uvm_lock_pageq(); 3009 3010 /* 3011 * skip the page if it's not actually owned 3012 * by the anon (may simply be loaned to the 3013 * anon). 3014 */ 3015 3016 if ((pg->pg_flags & PQ_ANON) == 0) { 3017 KASSERT(pg->uobject == NULL); 3018 uvm_unlock_pageq(); 3019 simple_unlock(&anon->an_lock); 3020 continue; 3021 } 3022 KASSERT(pg->uanon == anon); 3023 3024 #ifdef UBC 3025 /* ...and deactivate the page. */ 3026 pmap_clear_reference(pg); 3027 #else 3028 /* zap all mappings for the page. */ 3029 pmap_page_protect(pg, VM_PROT_NONE); 3030 3031 /* ...and deactivate the page. */ 3032 #endif 3033 uvm_pagedeactivate(pg); 3034 3035 uvm_unlock_pageq(); 3036 simple_unlock(&anon->an_lock); 3037 continue; 3038 3039 case PGO_FREE: 3040 3041 /* 3042 * If there are multiple references to 3043 * the amap, just deactivate the page. 3044 */ 3045 3046 if (amap_refs(amap) > 1) 3047 goto deactivate_it; 3048 3049 /* XXX skip the page if it's wired */ 3050 if (pg->wire_count != 0) { 3051 simple_unlock(&anon->an_lock); 3052 continue; 3053 } 3054 amap_unadd(¤t->aref, offset); 3055 refs = --anon->an_ref; 3056 simple_unlock(&anon->an_lock); 3057 if (refs == 0) 3058 uvm_anfree(anon); 3059 continue; 3060 3061 default: 3062 panic("uvm_map_clean: weird flags"); 3063 } 3064 } 3065 3066 flush_object: 3067 /* 3068 * flush pages if we've got a valid backing object. 3069 * 3070 * Don't PGO_FREE if we don't have write permission 3071 * and don't flush if this is a copy-on-write object 3072 * since we can't know our permissions on it. 3073 */ 3074 3075 offset = current->offset + (start - current->start); 3076 size = MIN(end, current->end) - start; 3077 if (uobj != NULL && 3078 ((flags & PGO_FREE) == 0 || 3079 ((entry->max_protection & VM_PROT_WRITE) != 0 && 3080 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 3081 simple_lock(&uobj->vmobjlock); 3082 rv = uobj->pgops->pgo_flush(uobj, offset, 3083 offset + size, flags); 3084 simple_unlock(&uobj->vmobjlock); 3085 3086 if (rv == FALSE) 3087 error = EFAULT; 3088 } 3089 start += size; 3090 } 3091 vm_map_unlock_read(map); 3092 return (error); 3093 } 3094 3095 3096 /* 3097 * uvm_map_checkprot: check protection in map 3098 * 3099 * => must allow specified protection in a fully allocated region. 3100 * => map must be read or write locked by caller. 3101 */ 3102 3103 boolean_t 3104 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3105 vm_prot_t protection) 3106 { 3107 struct vm_map_entry *entry; 3108 struct vm_map_entry *tmp_entry; 3109 3110 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { 3111 return(FALSE); 3112 } 3113 entry = tmp_entry; 3114 while (start < end) { 3115 if (entry == &map->header) { 3116 return(FALSE); 3117 } 3118 3119 /* 3120 * no holes allowed 3121 */ 3122 3123 if (start < entry->start) { 3124 return(FALSE); 3125 } 3126 3127 /* 3128 * check protection associated with entry 3129 */ 3130 3131 if ((entry->protection & protection) != protection) { 3132 return(FALSE); 3133 } 3134 3135 /* go to next entry */ 3136 3137 start = entry->end; 3138 entry = entry->next; 3139 } 3140 return(TRUE); 3141 } 3142 3143 /* 3144 * uvmspace_alloc: allocate a vmspace structure. 3145 * 3146 * - structure includes vm_map and pmap 3147 * - XXX: no locking on this structure 3148 * - refcnt set to 1, rest must be init'd by caller 3149 */ 3150 struct vmspace * 3151 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3152 boolean_t remove_holes) 3153 { 3154 struct vmspace *vm; 3155 3156 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3157 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3158 return (vm); 3159 } 3160 3161 /* 3162 * uvmspace_init: initialize a vmspace structure. 3163 * 3164 * - XXX: no locking on this structure 3165 * - refcnt set to 1, rest must be init'd by caller 3166 */ 3167 void 3168 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3169 boolean_t pageable, boolean_t remove_holes) 3170 { 3171 3172 uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0); 3173 3174 if (pmap) 3175 pmap_reference(pmap); 3176 else 3177 pmap = pmap_create(); 3178 vm->vm_map.pmap = pmap; 3179 3180 vm->vm_refcnt = 1; 3181 3182 if (remove_holes) 3183 pmap_remove_holes(&vm->vm_map); 3184 } 3185 3186 /* 3187 * uvmspace_share: share a vmspace between two proceses 3188 * 3189 * - XXX: no locking on vmspace 3190 * - used for vfork, threads(?) 3191 */ 3192 3193 void 3194 uvmspace_share(struct proc *p1, struct proc *p2) 3195 { 3196 p2->p_vmspace = p1->p_vmspace; 3197 p1->p_vmspace->vm_refcnt++; 3198 } 3199 3200 /* 3201 * uvmspace_exec: the process wants to exec a new program 3202 * 3203 * - XXX: no locking on vmspace 3204 */ 3205 3206 void 3207 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3208 { 3209 struct vmspace *nvm, *ovm = p->p_vmspace; 3210 struct vm_map *map = &ovm->vm_map; 3211 3212 pmap_unuse_final(p); /* before stack addresses go away */ 3213 3214 /* 3215 * see if more than one process is using this vmspace... 3216 */ 3217 3218 if (ovm->vm_refcnt == 1) { 3219 3220 /* 3221 * if p is the only process using its vmspace then we can safely 3222 * recycle that vmspace for the program that is being exec'd. 3223 */ 3224 3225 #ifdef SYSVSHM 3226 /* 3227 * SYSV SHM semantics require us to kill all segments on an exec 3228 */ 3229 if (ovm->vm_shm) 3230 shmexit(ovm); 3231 #endif 3232 3233 /* 3234 * POSIX 1003.1b -- "lock future mappings" is revoked 3235 * when a process execs another program image. 3236 */ 3237 vm_map_lock(map); 3238 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3239 vm_map_unlock(map); 3240 3241 /* 3242 * now unmap the old program 3243 */ 3244 uvm_unmap(map, map->min_offset, map->max_offset); 3245 3246 /* 3247 * but keep MMU holes unavailable 3248 */ 3249 pmap_remove_holes(map); 3250 3251 /* 3252 * resize the map 3253 */ 3254 vm_map_lock(map); 3255 map->min_offset = start; 3256 uvm_tree_sanity(map, "resize enter"); 3257 map->max_offset = end; 3258 if (map->header.prev != &map->header) 3259 uvm_rb_fixup(map, map->header.prev); 3260 uvm_tree_sanity(map, "resize leave"); 3261 vm_map_unlock(map); 3262 3263 3264 } else { 3265 3266 /* 3267 * p's vmspace is being shared, so we can't reuse it for p since 3268 * it is still being used for others. allocate a new vmspace 3269 * for p 3270 */ 3271 nvm = uvmspace_alloc(start, end, 3272 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3273 3274 /* 3275 * install new vmspace and drop our ref to the old one. 3276 */ 3277 3278 pmap_deactivate(p); 3279 p->p_vmspace = nvm; 3280 pmap_activate(p); 3281 3282 uvmspace_free(ovm); 3283 } 3284 } 3285 3286 /* 3287 * uvmspace_free: free a vmspace data structure 3288 * 3289 * - XXX: no locking on vmspace 3290 */ 3291 3292 void 3293 uvmspace_free(struct vmspace *vm) 3294 { 3295 struct vm_map_entry *dead_entries; 3296 3297 if (--vm->vm_refcnt == 0) { 3298 /* 3299 * lock the map, to wait out all other references to it. delete 3300 * all of the mappings and pages they hold, then call the pmap 3301 * module to reclaim anything left. 3302 */ 3303 #ifdef SYSVSHM 3304 /* Get rid of any SYSV shared memory segments. */ 3305 if (vm->vm_shm != NULL) 3306 shmexit(vm); 3307 #endif 3308 vm_map_lock(&vm->vm_map); 3309 if (vm->vm_map.nentries) { 3310 uvm_unmap_remove(&vm->vm_map, 3311 vm->vm_map.min_offset, vm->vm_map.max_offset, 3312 &dead_entries, NULL, TRUE); 3313 if (dead_entries != NULL) 3314 uvm_unmap_detach(dead_entries, 0); 3315 } 3316 pmap_destroy(vm->vm_map.pmap); 3317 vm->vm_map.pmap = NULL; 3318 pool_put(&uvm_vmspace_pool, vm); 3319 } 3320 } 3321 3322 /* 3323 * uvm_map_create: create map 3324 */ 3325 vm_map_t 3326 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 3327 { 3328 vm_map_t result; 3329 3330 result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK); 3331 uvm_map_setup(result, min, max, flags); 3332 result->pmap = pmap; 3333 return(result); 3334 } 3335 3336 /* 3337 * uvm_map_setup: init map 3338 * 3339 * => map must not be in service yet. 3340 */ 3341 void 3342 uvm_map_setup(vm_map_t map, vaddr_t min, vaddr_t max, int flags) 3343 { 3344 3345 RB_INIT(&map->rbhead); 3346 map->header.next = map->header.prev = &map->header; 3347 map->nentries = 0; 3348 map->size = 0; 3349 map->ref_count = 1; 3350 map->min_offset = min; 3351 map->max_offset = max; 3352 map->flags = flags; 3353 map->first_free = &map->header; 3354 map->hint = &map->header; 3355 map->timestamp = 0; 3356 rw_init(&map->lock, "vmmaplk"); 3357 simple_lock_init(&map->ref_lock); 3358 simple_lock_init(&map->hint_lock); 3359 } 3360 3361 3362 3363 /* 3364 * uvm_map_reference: add reference to a map 3365 * 3366 * => map need not be locked (we use ref_lock). 3367 */ 3368 void 3369 uvm_map_reference(vm_map_t map) 3370 { 3371 simple_lock(&map->ref_lock); 3372 map->ref_count++; 3373 simple_unlock(&map->ref_lock); 3374 } 3375 3376 /* 3377 * uvm_map_deallocate: drop reference to a map 3378 * 3379 * => caller must not lock map 3380 * => we will zap map if ref count goes to zero 3381 */ 3382 void 3383 uvm_map_deallocate(vm_map_t map) 3384 { 3385 int c; 3386 3387 simple_lock(&map->ref_lock); 3388 c = --map->ref_count; 3389 simple_unlock(&map->ref_lock); 3390 if (c > 0) { 3391 return; 3392 } 3393 3394 /* 3395 * all references gone. unmap and free. 3396 */ 3397 3398 uvm_unmap(map, map->min_offset, map->max_offset); 3399 pmap_destroy(map->pmap); 3400 free(map, M_VMMAP); 3401 } 3402 3403 /* 3404 * F O R K - m a i n e n t r y p o i n t 3405 */ 3406 /* 3407 * uvmspace_fork: fork a process' main map 3408 * 3409 * => create a new vmspace for child process from parent. 3410 * => parent's map must not be locked. 3411 */ 3412 3413 struct vmspace * 3414 uvmspace_fork(struct vmspace *vm1) 3415 { 3416 struct vmspace *vm2; 3417 struct vm_map *old_map = &vm1->vm_map; 3418 struct vm_map *new_map; 3419 struct vm_map_entry *old_entry; 3420 struct vm_map_entry *new_entry; 3421 pmap_t new_pmap; 3422 boolean_t protect_child; 3423 3424 vm_map_lock(old_map); 3425 3426 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3427 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3428 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3429 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3430 new_map = &vm2->vm_map; /* XXX */ 3431 new_pmap = new_map->pmap; 3432 3433 old_entry = old_map->header.next; 3434 3435 /* 3436 * go entry-by-entry 3437 */ 3438 3439 while (old_entry != &old_map->header) { 3440 3441 /* 3442 * first, some sanity checks on the old entry 3443 */ 3444 if (UVM_ET_ISSUBMAP(old_entry)) 3445 panic("fork: encountered a submap during fork (illegal)"); 3446 3447 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 3448 UVM_ET_ISNEEDSCOPY(old_entry)) 3449 panic("fork: non-copy_on_write map entry marked needs_copy (illegal)"); 3450 3451 3452 switch (old_entry->inheritance) { 3453 case MAP_INHERIT_NONE: 3454 /* 3455 * drop the mapping 3456 */ 3457 break; 3458 3459 case MAP_INHERIT_SHARE: 3460 /* 3461 * share the mapping: this means we want the old and 3462 * new entries to share amaps and backing objects. 3463 */ 3464 3465 /* 3466 * if the old_entry needs a new amap (due to prev fork) 3467 * then we need to allocate it now so that we have 3468 * something we own to share with the new_entry. [in 3469 * other words, we need to clear needs_copy] 3470 */ 3471 3472 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3473 /* get our own amap, clears needs_copy */ 3474 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 3475 0, 0); 3476 /* XXXCDC: WAITOK??? */ 3477 } 3478 3479 new_entry = uvm_mapent_alloc(new_map, 0); 3480 /* old_entry -> new_entry */ 3481 uvm_mapent_copy(old_entry, new_entry); 3482 3483 /* new pmap has nothing wired in it */ 3484 new_entry->wired_count = 0; 3485 3486 /* 3487 * gain reference to object backing the map (can't 3488 * be a submap, already checked this case). 3489 */ 3490 if (new_entry->aref.ar_amap) 3491 /* share reference */ 3492 uvm_map_reference_amap(new_entry, AMAP_SHARED); 3493 3494 if (new_entry->object.uvm_obj && 3495 new_entry->object.uvm_obj->pgops->pgo_reference) 3496 new_entry->object.uvm_obj-> 3497 pgops->pgo_reference( 3498 new_entry->object.uvm_obj); 3499 3500 /* insert entry at end of new_map's entry list */ 3501 uvm_map_entry_link(new_map, new_map->header.prev, 3502 new_entry); 3503 3504 /* 3505 * pmap_copy the mappings: this routine is optional 3506 * but if it is there it will reduce the number of 3507 * page faults in the new proc. 3508 */ 3509 3510 pmap_copy(new_pmap, old_map->pmap, new_entry->start, 3511 (old_entry->end - old_entry->start), 3512 old_entry->start); 3513 3514 break; 3515 3516 case MAP_INHERIT_COPY: 3517 3518 /* 3519 * copy-on-write the mapping (using mmap's 3520 * MAP_PRIVATE semantics) 3521 * 3522 * allocate new_entry, adjust reference counts. 3523 * (note that new references are read-only). 3524 */ 3525 3526 new_entry = uvm_mapent_alloc(new_map, 0); 3527 /* old_entry -> new_entry */ 3528 uvm_mapent_copy(old_entry, new_entry); 3529 3530 if (new_entry->aref.ar_amap) 3531 uvm_map_reference_amap(new_entry, 0); 3532 3533 if (new_entry->object.uvm_obj && 3534 new_entry->object.uvm_obj->pgops->pgo_reference) 3535 new_entry->object.uvm_obj->pgops->pgo_reference 3536 (new_entry->object.uvm_obj); 3537 3538 /* new pmap has nothing wired in it */ 3539 new_entry->wired_count = 0; 3540 3541 new_entry->etype |= 3542 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3543 uvm_map_entry_link(new_map, new_map->header.prev, 3544 new_entry); 3545 3546 /* 3547 * the new entry will need an amap. it will either 3548 * need to be copied from the old entry or created 3549 * from scratch (if the old entry does not have an 3550 * amap). can we defer this process until later 3551 * (by setting "needs_copy") or do we need to copy 3552 * the amap now? 3553 * 3554 * we must copy the amap now if any of the following 3555 * conditions hold: 3556 * 1. the old entry has an amap and that amap is 3557 * being shared. this means that the old (parent) 3558 * process is sharing the amap with another 3559 * process. if we do not clear needs_copy here 3560 * we will end up in a situation where both the 3561 * parent and child process are referring to the 3562 * same amap with "needs_copy" set. if the 3563 * parent write-faults, the fault routine will 3564 * clear "needs_copy" in the parent by allocating 3565 * a new amap. this is wrong because the 3566 * parent is supposed to be sharing the old amap 3567 * and the new amap will break that. 3568 * 3569 * 2. if the old entry has an amap and a non-zero 3570 * wire count then we are going to have to call 3571 * amap_cow_now to avoid page faults in the 3572 * parent process. since amap_cow_now requires 3573 * "needs_copy" to be clear we might as well 3574 * clear it here as well. 3575 * 3576 */ 3577 3578 if (old_entry->aref.ar_amap != NULL) { 3579 3580 if ((amap_flags(old_entry->aref.ar_amap) & 3581 AMAP_SHARED) != 0 || 3582 VM_MAPENT_ISWIRED(old_entry)) { 3583 3584 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3585 0, 0); 3586 /* XXXCDC: M_WAITOK ... ok? */ 3587 } 3588 } 3589 3590 /* 3591 * if the parent's entry is wired down, then the 3592 * parent process does not want page faults on 3593 * access to that memory. this means that we 3594 * cannot do copy-on-write because we can't write 3595 * protect the old entry. in this case we 3596 * resolve all copy-on-write faults now, using 3597 * amap_cow_now. note that we have already 3598 * allocated any needed amap (above). 3599 */ 3600 3601 if (VM_MAPENT_ISWIRED(old_entry)) { 3602 3603 /* 3604 * resolve all copy-on-write faults now 3605 * (note that there is nothing to do if 3606 * the old mapping does not have an amap). 3607 * XXX: is it worthwhile to bother with pmap_copy 3608 * in this case? 3609 */ 3610 if (old_entry->aref.ar_amap) 3611 amap_cow_now(new_map, new_entry); 3612 3613 } else { 3614 3615 /* 3616 * setup mappings to trigger copy-on-write faults 3617 * we must write-protect the parent if it has 3618 * an amap and it is not already "needs_copy"... 3619 * if it is already "needs_copy" then the parent 3620 * has already been write-protected by a previous 3621 * fork operation. 3622 * 3623 * if we do not write-protect the parent, then 3624 * we must be sure to write-protect the child 3625 * after the pmap_copy() operation. 3626 * 3627 * XXX: pmap_copy should have some way of telling 3628 * us that it didn't do anything so we can avoid 3629 * calling pmap_protect needlessly. 3630 */ 3631 3632 if (old_entry->aref.ar_amap) { 3633 3634 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3635 if (old_entry->max_protection & VM_PROT_WRITE) { 3636 pmap_protect(old_map->pmap, 3637 old_entry->start, 3638 old_entry->end, 3639 old_entry->protection & 3640 ~VM_PROT_WRITE); 3641 pmap_update(old_map->pmap); 3642 3643 } 3644 old_entry->etype |= UVM_ET_NEEDSCOPY; 3645 } 3646 3647 /* 3648 * parent must now be write-protected 3649 */ 3650 protect_child = FALSE; 3651 } else { 3652 3653 /* 3654 * we only need to protect the child if the 3655 * parent has write access. 3656 */ 3657 if (old_entry->max_protection & VM_PROT_WRITE) 3658 protect_child = TRUE; 3659 else 3660 protect_child = FALSE; 3661 3662 } 3663 3664 /* 3665 * copy the mappings 3666 * XXX: need a way to tell if this does anything 3667 */ 3668 3669 pmap_copy(new_pmap, old_map->pmap, 3670 new_entry->start, 3671 (old_entry->end - old_entry->start), 3672 old_entry->start); 3673 3674 /* 3675 * protect the child's mappings if necessary 3676 */ 3677 if (protect_child) { 3678 pmap_protect(new_pmap, new_entry->start, 3679 new_entry->end, 3680 new_entry->protection & 3681 ~VM_PROT_WRITE); 3682 } 3683 3684 } 3685 break; 3686 } /* end of switch statement */ 3687 old_entry = old_entry->next; 3688 } 3689 3690 new_map->size = old_map->size; 3691 vm_map_unlock(old_map); 3692 3693 #ifdef SYSVSHM 3694 if (vm1->vm_shm) 3695 shmfork(vm1, vm2); 3696 #endif 3697 3698 #ifdef PMAP_FORK 3699 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); 3700 #endif 3701 3702 return(vm2); 3703 } 3704 3705 #if defined(DDB) 3706 3707 /* 3708 * DDB hooks 3709 */ 3710 3711 /* 3712 * uvm_map_printit: actually prints the map 3713 */ 3714 3715 void 3716 uvm_map_printit(struct vm_map *map, boolean_t full, 3717 int (*pr)(const char *, ...)) 3718 { 3719 struct vm_map_entry *entry; 3720 3721 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 3722 (*pr)("\t#ent=%d, sz=%u, ref=%d, version=%u, flags=0x%x\n", 3723 map->nentries, map->size, map->ref_count, map->timestamp, 3724 map->flags); 3725 #ifdef pmap_resident_count 3726 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 3727 pmap_resident_count(map->pmap)); 3728 #else 3729 /* XXXCDC: this should be required ... */ 3730 (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap); 3731 #endif 3732 if (!full) 3733 return; 3734 for (entry = map->header.next; entry != &map->header; 3735 entry = entry->next) { 3736 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 3737 entry, entry->start, entry->end, entry->object.uvm_obj, 3738 (long long)entry->offset, entry->aref.ar_amap, 3739 entry->aref.ar_pageoff); 3740 (*pr)( 3741 "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 3742 "wc=%d, adv=%d\n", 3743 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 3744 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 3745 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 3746 entry->protection, entry->max_protection, 3747 entry->inheritance, entry->wired_count, entry->advice); 3748 } 3749 } 3750 3751 /* 3752 * uvm_object_printit: actually prints the object 3753 */ 3754 3755 void 3756 uvm_object_printit(struct uvm_object *uobj, boolean_t full, 3757 int (*pr)(const char *, ...)) 3758 { 3759 struct vm_page *pg; 3760 int cnt = 0; 3761 3762 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 3763 uobj, uobj->pgops, uobj->uo_npages); 3764 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 3765 (*pr)("refs=<SYSTEM>\n"); 3766 else 3767 (*pr)("refs=%d\n", uobj->uo_refs); 3768 3769 if (!full) { 3770 return; 3771 } 3772 (*pr)(" PAGES <pg,offset>:\n "); 3773 RB_FOREACH(pg, uvm_objtree, &uobj->memt) { 3774 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 3775 if ((cnt % 3) == 2) { 3776 (*pr)("\n "); 3777 } 3778 cnt++; 3779 } 3780 if ((cnt % 3) != 2) { 3781 (*pr)("\n"); 3782 } 3783 } 3784 3785 /* 3786 * uvm_page_printit: actually print the page 3787 */ 3788 3789 static const char page_flagbits[] = 3790 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 3791 "\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0" 3792 "\31PMAP1\32PMAP2\33PMAP3"; 3793 3794 void 3795 uvm_page_printit(struct vm_page *pg, boolean_t full, 3796 int (*pr)(const char *, ...)) 3797 { 3798 struct vm_page *tpg; 3799 struct uvm_object *uobj; 3800 struct pglist *pgl; 3801 3802 (*pr)("PAGE %p:\n", pg); 3803 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 3804 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 3805 (long long)pg->phys_addr); 3806 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", 3807 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); 3808 #if defined(UVM_PAGE_TRKOWN) 3809 if (pg->pg_flags & PG_BUSY) 3810 (*pr)(" owning process = %d, tag=%s\n", 3811 pg->owner, pg->owner_tag); 3812 else 3813 (*pr)(" page not busy, no owner\n"); 3814 #else 3815 (*pr)(" [page ownership tracking disabled]\n"); 3816 #endif 3817 3818 if (!full) 3819 return; 3820 3821 /* cross-verify object/anon */ 3822 if ((pg->pg_flags & PQ_FREE) == 0) { 3823 if (pg->pg_flags & PQ_ANON) { 3824 if (pg->uanon == NULL || pg->uanon->an_page != pg) 3825 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 3826 (pg->uanon) ? pg->uanon->an_page : NULL); 3827 else 3828 (*pr)(" anon backpointer is OK\n"); 3829 } else { 3830 uobj = pg->uobject; 3831 if (uobj) { 3832 (*pr)(" checking object list\n"); 3833 RB_FOREACH(tpg, uvm_objtree, &uobj->memt) { 3834 if (tpg == pg) { 3835 break; 3836 } 3837 } 3838 if (tpg) 3839 (*pr)(" page found on object list\n"); 3840 else 3841 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 3842 } 3843 } 3844 } 3845 3846 /* cross-verify page queue */ 3847 if (pg->pg_flags & PQ_FREE) { 3848 if (uvm_pmr_isfree(pg)) 3849 printf(" page found in uvm_pmemrange\n"); 3850 else 3851 printf(" >>> page not found in uvm_pmemrange <<<\n"); 3852 pgl = NULL; 3853 } else if (pg->pg_flags & PQ_INACTIVE) { 3854 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 3855 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 3856 } else if (pg->pg_flags & PQ_ACTIVE) { 3857 pgl = &uvm.page_active; 3858 } else { 3859 pgl = NULL; 3860 } 3861 3862 if (pgl) { 3863 (*pr)(" checking pageq list\n"); 3864 TAILQ_FOREACH(tpg, pgl, pageq) { 3865 if (tpg == pg) { 3866 break; 3867 } 3868 } 3869 if (tpg) 3870 (*pr)(" page found on pageq list\n"); 3871 else 3872 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3873 } 3874 } 3875 #endif 3876