1 /* $OpenBSD: uvm_map.c,v 1.104 2008/09/23 13:25:46 art Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * Copyright (c) 1991, 1993, The Regents of the University of California. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by Charles D. Cranor, 24 * Washington University, the University of California, Berkeley and 25 * its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 43 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 44 * 45 * 46 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 47 * All rights reserved. 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 */ 69 70 /* 71 * uvm_map.c: uvm map operations 72 */ 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/mman.h> 77 #include <sys/proc.h> 78 #include <sys/malloc.h> 79 #include <sys/pool.h> 80 #include <sys/kernel.h> 81 82 #include <dev/rndvar.h> 83 84 #ifdef SYSVSHM 85 #include <sys/shm.h> 86 #endif 87 88 #define UVM_MAP 89 #include <uvm/uvm.h> 90 #undef RB_AUGMENT 91 #define RB_AUGMENT(x) uvm_rb_augment(x) 92 93 #ifdef DDB 94 #include <uvm/uvm_ddb.h> 95 #endif 96 97 static struct timeval uvm_kmapent_last_warn_time; 98 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 99 100 struct uvm_cnt uvm_map_call, map_backmerge, map_forwmerge; 101 struct uvm_cnt map_nousermerge; 102 struct uvm_cnt uvm_mlk_call, uvm_mlk_hint; 103 const char vmmapbsy[] = "vmmapbsy"; 104 105 /* 106 * Da history books 107 */ 108 UVMHIST_DECL(maphist); 109 UVMHIST_DECL(pdhist); 110 111 /* 112 * pool for vmspace structures. 113 */ 114 115 struct pool uvm_vmspace_pool; 116 117 /* 118 * pool for dynamically-allocated map entries. 119 */ 120 121 struct pool uvm_map_entry_pool; 122 struct pool uvm_map_entry_kmem_pool; 123 124 #ifdef PMAP_GROWKERNEL 125 /* 126 * This global represents the end of the kernel virtual address 127 * space. If we want to exceed this, we must grow the kernel 128 * virtual address space dynamically. 129 * 130 * Note, this variable is locked by kernel_map's lock. 131 */ 132 vaddr_t uvm_maxkaddr; 133 #endif 134 135 /* 136 * macros 137 */ 138 139 /* 140 * uvm_map_entry_link: insert entry into a map 141 * 142 * => map must be locked 143 */ 144 #define uvm_map_entry_link(map, after_where, entry) do { \ 145 (map)->nentries++; \ 146 (entry)->prev = (after_where); \ 147 (entry)->next = (after_where)->next; \ 148 (entry)->prev->next = (entry); \ 149 (entry)->next->prev = (entry); \ 150 uvm_rb_insert(map, entry); \ 151 } while (0) 152 153 /* 154 * uvm_map_entry_unlink: remove entry from a map 155 * 156 * => map must be locked 157 */ 158 #define uvm_map_entry_unlink(map, entry) do { \ 159 (map)->nentries--; \ 160 (entry)->next->prev = (entry)->prev; \ 161 (entry)->prev->next = (entry)->next; \ 162 uvm_rb_remove(map, entry); \ 163 } while (0) 164 165 /* 166 * SAVE_HINT: saves the specified entry as the hint for future lookups. 167 * 168 * => map need not be locked (protected by hint_lock). 169 */ 170 #define SAVE_HINT(map,check,value) do { \ 171 simple_lock(&(map)->hint_lock); \ 172 if ((map)->hint == (check)) \ 173 (map)->hint = (value); \ 174 simple_unlock(&(map)->hint_lock); \ 175 } while (0) 176 177 /* 178 * VM_MAP_RANGE_CHECK: check and correct range 179 * 180 * => map must at least be read locked 181 */ 182 183 #define VM_MAP_RANGE_CHECK(map, start, end) do { \ 184 if (start < vm_map_min(map)) \ 185 start = vm_map_min(map); \ 186 if (end > vm_map_max(map)) \ 187 end = vm_map_max(map); \ 188 if (start > end) \ 189 start = end; \ 190 } while (0) 191 192 /* 193 * local prototypes 194 */ 195 196 void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); 197 void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); 198 void uvm_map_reference_amap(struct vm_map_entry *, int); 199 void uvm_map_unreference_amap(struct vm_map_entry *, int); 200 int uvm_map_spacefits(struct vm_map *, vaddr_t *, vsize_t, 201 struct vm_map_entry *, voff_t, vsize_t); 202 203 struct vm_map_entry *uvm_mapent_alloc(struct vm_map *); 204 void uvm_mapent_free(struct vm_map_entry *); 205 206 207 /* 208 * Tree manipulation. 209 */ 210 void uvm_rb_insert(struct vm_map *, struct vm_map_entry *); 211 void uvm_rb_remove(struct vm_map *, struct vm_map_entry *); 212 vsize_t uvm_rb_space(struct vm_map *, struct vm_map_entry *); 213 214 #ifdef DEBUG 215 int _uvm_tree_sanity(struct vm_map *map, const char *name); 216 #endif 217 vsize_t uvm_rb_subtree_space(struct vm_map_entry *); 218 void uvm_rb_fixup(struct vm_map *, struct vm_map_entry *); 219 220 static __inline int 221 uvm_compare(struct vm_map_entry *a, struct vm_map_entry *b) 222 { 223 if (a->start < b->start) 224 return (-1); 225 else if (a->start > b->start) 226 return (1); 227 228 return (0); 229 } 230 231 232 static __inline void 233 uvm_rb_augment(struct vm_map_entry *entry) 234 { 235 entry->space = uvm_rb_subtree_space(entry); 236 } 237 238 RB_PROTOTYPE(uvm_tree, vm_map_entry, rb_entry, uvm_compare); 239 240 RB_GENERATE(uvm_tree, vm_map_entry, rb_entry, uvm_compare); 241 242 vsize_t 243 uvm_rb_space(struct vm_map *map, struct vm_map_entry *entry) 244 { 245 struct vm_map_entry *next; 246 vaddr_t space; 247 248 if ((next = entry->next) == &map->header) 249 space = map->max_offset - entry->end; 250 else { 251 KASSERT(next); 252 space = next->start - entry->end; 253 } 254 return (space); 255 } 256 257 vsize_t 258 uvm_rb_subtree_space(struct vm_map_entry *entry) 259 { 260 vaddr_t space, tmp; 261 262 space = entry->ownspace; 263 if (RB_LEFT(entry, rb_entry)) { 264 tmp = RB_LEFT(entry, rb_entry)->space; 265 if (tmp > space) 266 space = tmp; 267 } 268 269 if (RB_RIGHT(entry, rb_entry)) { 270 tmp = RB_RIGHT(entry, rb_entry)->space; 271 if (tmp > space) 272 space = tmp; 273 } 274 275 return (space); 276 } 277 278 void 279 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) 280 { 281 /* We need to traverse to the very top */ 282 do { 283 entry->ownspace = uvm_rb_space(map, entry); 284 entry->space = uvm_rb_subtree_space(entry); 285 } while ((entry = RB_PARENT(entry, rb_entry)) != NULL); 286 } 287 288 void 289 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) 290 { 291 vaddr_t space = uvm_rb_space(map, entry); 292 struct vm_map_entry *tmp; 293 294 entry->ownspace = entry->space = space; 295 tmp = RB_INSERT(uvm_tree, &(map)->rbhead, entry); 296 #ifdef DIAGNOSTIC 297 if (tmp != NULL) 298 panic("uvm_rb_insert: duplicate entry?"); 299 #endif 300 uvm_rb_fixup(map, entry); 301 if (entry->prev != &map->header) 302 uvm_rb_fixup(map, entry->prev); 303 } 304 305 void 306 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) 307 { 308 struct vm_map_entry *parent; 309 310 parent = RB_PARENT(entry, rb_entry); 311 RB_REMOVE(uvm_tree, &(map)->rbhead, entry); 312 if (entry->prev != &map->header) 313 uvm_rb_fixup(map, entry->prev); 314 if (parent) 315 uvm_rb_fixup(map, parent); 316 } 317 318 #ifdef DEBUG 319 #define uvm_tree_sanity(x,y) _uvm_tree_sanity(x,y) 320 #else 321 #define uvm_tree_sanity(x,y) 322 #endif 323 324 #ifdef DEBUG 325 int 326 _uvm_tree_sanity(struct vm_map *map, const char *name) 327 { 328 struct vm_map_entry *tmp, *trtmp; 329 int n = 0, i = 1; 330 331 RB_FOREACH(tmp, uvm_tree, &map->rbhead) { 332 if (tmp->ownspace != uvm_rb_space(map, tmp)) { 333 printf("%s: %d/%d ownspace %x != %x %s\n", 334 name, n + 1, map->nentries, 335 tmp->ownspace, uvm_rb_space(map, tmp), 336 tmp->next == &map->header ? "(last)" : ""); 337 goto error; 338 } 339 } 340 trtmp = NULL; 341 RB_FOREACH(tmp, uvm_tree, &map->rbhead) { 342 if (tmp->space != uvm_rb_subtree_space(tmp)) { 343 printf("%s: space %d != %d\n", 344 name, tmp->space, uvm_rb_subtree_space(tmp)); 345 goto error; 346 } 347 if (trtmp != NULL && trtmp->start >= tmp->start) { 348 printf("%s: corrupt: 0x%lx >= 0x%lx\n", 349 name, trtmp->start, tmp->start); 350 goto error; 351 } 352 n++; 353 354 trtmp = tmp; 355 } 356 357 if (n != map->nentries) { 358 printf("%s: nentries: %d vs %d\n", 359 name, n, map->nentries); 360 goto error; 361 } 362 363 for (tmp = map->header.next; tmp && tmp != &map->header; 364 tmp = tmp->next, i++) { 365 trtmp = RB_FIND(uvm_tree, &map->rbhead, tmp); 366 if (trtmp != tmp) { 367 printf("%s: lookup: %d: %p - %p: %p\n", 368 name, i, tmp, trtmp, 369 RB_PARENT(tmp, rb_entry)); 370 goto error; 371 } 372 } 373 374 return (0); 375 error: 376 #ifdef DDB 377 /* handy breakpoint location for error case */ 378 __asm(".globl treesanity_label\ntreesanity_label:"); 379 #endif 380 return (-1); 381 } 382 #endif 383 384 /* 385 * uvm_mapent_alloc: allocate a map entry 386 */ 387 388 struct vm_map_entry * 389 uvm_mapent_alloc(struct vm_map *map) 390 { 391 struct vm_map_entry *me, *ne; 392 int s, i; 393 UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist); 394 395 if (map->flags & VM_MAP_INTRSAFE || cold) { 396 s = splvm(); 397 simple_lock(&uvm.kentry_lock); 398 me = uvm.kentry_free; 399 if (me == NULL) { 400 ne = uvm_km_getpage(0); 401 if (ne == NULL) 402 panic("uvm_mapent_alloc: cannot allocate map " 403 "entry"); 404 for (i = 0; 405 i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1; 406 i++) 407 ne[i].next = &ne[i + 1]; 408 ne[i].next = NULL; 409 me = ne; 410 if (ratecheck(&uvm_kmapent_last_warn_time, 411 &uvm_kmapent_warn_rate)) 412 printf("uvm_mapent_alloc: out of static " 413 "map entries\n"); 414 } 415 uvm.kentry_free = me->next; 416 uvmexp.kmapent++; 417 simple_unlock(&uvm.kentry_lock); 418 splx(s); 419 me->flags = UVM_MAP_STATIC; 420 } else if (map == kernel_map) { 421 splassert(IPL_NONE); 422 me = pool_get(&uvm_map_entry_kmem_pool, PR_WAITOK); 423 me->flags = UVM_MAP_KMEM; 424 } else { 425 splassert(IPL_NONE); 426 me = pool_get(&uvm_map_entry_pool, PR_WAITOK); 427 me->flags = 0; 428 } 429 430 UVMHIST_LOG(maphist, "<- new entry=%p [kentry=%ld]", me, 431 ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0); 432 return(me); 433 } 434 435 /* 436 * uvm_mapent_free: free map entry 437 * 438 * => XXX: static pool for kernel map? 439 */ 440 441 void 442 uvm_mapent_free(struct vm_map_entry *me) 443 { 444 int s; 445 UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist); 446 447 UVMHIST_LOG(maphist,"<- freeing map entry=%p [flags=%ld]", 448 me, me->flags, 0, 0); 449 if (me->flags & UVM_MAP_STATIC) { 450 s = splvm(); 451 simple_lock(&uvm.kentry_lock); 452 me->next = uvm.kentry_free; 453 uvm.kentry_free = me; 454 uvmexp.kmapent--; 455 simple_unlock(&uvm.kentry_lock); 456 splx(s); 457 } else if (me->flags & UVM_MAP_KMEM) { 458 splassert(IPL_NONE); 459 pool_put(&uvm_map_entry_kmem_pool, me); 460 } else { 461 splassert(IPL_NONE); 462 pool_put(&uvm_map_entry_pool, me); 463 } 464 } 465 466 /* 467 * uvm_mapent_copy: copy a map entry, preserving flags 468 */ 469 470 void 471 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 472 { 473 memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - 474 ((char *)src)); 475 } 476 477 /* 478 * uvm_map_entry_unwire: unwire a map entry 479 * 480 * => map should be locked by caller 481 */ 482 void 483 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) 484 { 485 486 entry->wired_count = 0; 487 uvm_fault_unwire_locked(map, entry->start, entry->end); 488 } 489 490 491 /* 492 * wrapper for calling amap_ref() 493 */ 494 void 495 uvm_map_reference_amap(struct vm_map_entry *entry, int flags) 496 { 497 amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, 498 (entry->end - entry->start) >> PAGE_SHIFT, flags); 499 } 500 501 502 /* 503 * wrapper for calling amap_unref() 504 */ 505 void 506 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) 507 { 508 amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, 509 (entry->end - entry->start) >> PAGE_SHIFT, flags); 510 } 511 512 513 /* 514 * uvm_map_init: init mapping system at boot time. note that we allocate 515 * and init the static pool of structs vm_map_entry for the kernel here. 516 */ 517 518 void 519 uvm_map_init(void) 520 { 521 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 522 #if defined(UVMHIST) 523 static struct uvm_history_ent maphistbuf[100]; 524 static struct uvm_history_ent pdhistbuf[100]; 525 #endif 526 int lcv; 527 528 /* 529 * first, init logging system. 530 */ 531 532 UVMHIST_FUNC("uvm_map_init"); 533 UVMHIST_INIT_STATIC(maphist, maphistbuf); 534 UVMHIST_INIT_STATIC(pdhist, pdhistbuf); 535 UVMHIST_CALLED(maphist); 536 UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); 537 UVMCNT_INIT(uvm_map_call, UVMCNT_CNT, 0, 538 "# uvm_map() successful calls", 0); 539 UVMCNT_INIT(map_backmerge, UVMCNT_CNT, 0, "# uvm_map() back merges", 0); 540 UVMCNT_INIT(map_forwmerge, UVMCNT_CNT, 0, "# uvm_map() missed forward", 541 0); 542 UVMCNT_INIT(map_nousermerge, UVMCNT_CNT, 0, "# back merges skipped", 0); 543 UVMCNT_INIT(uvm_mlk_call, UVMCNT_CNT, 0, "# map lookup calls", 0); 544 UVMCNT_INIT(uvm_mlk_hint, UVMCNT_CNT, 0, "# map lookup hint hits", 0); 545 546 /* 547 * now set up static pool of kernel map entries ... 548 */ 549 550 simple_lock_init(&uvm.kentry_lock); 551 uvm.kentry_free = NULL; 552 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 553 kernel_map_entry[lcv].next = uvm.kentry_free; 554 uvm.kentry_free = &kernel_map_entry[lcv]; 555 } 556 557 /* 558 * initialize the map-related pools. 559 */ 560 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 561 0, 0, 0, "vmsppl", &pool_allocator_nointr); 562 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 563 0, 0, 0, "vmmpepl", &pool_allocator_nointr); 564 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 565 0, 0, 0, "vmmpekpl", NULL); 566 pool_sethiwat(&uvm_map_entry_pool, 8192); 567 } 568 569 /* 570 * clippers 571 */ 572 573 /* 574 * uvm_map_clip_start: ensure that the entry begins at or after 575 * the starting address, if it doesn't we split the entry. 576 * 577 * => caller should use UVM_MAP_CLIP_START macro rather than calling 578 * this directly 579 * => map must be locked by caller 580 */ 581 582 void 583 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, 584 vaddr_t start) 585 { 586 struct vm_map_entry *new_entry; 587 vaddr_t new_adj; 588 589 /* uvm_map_simplify_entry(map, entry); */ /* XXX */ 590 591 uvm_tree_sanity(map, "clip_start entry"); 592 593 /* 594 * Split off the front portion. note that we must insert the new 595 * entry BEFORE this one, so that this entry has the specified 596 * starting address. 597 */ 598 599 new_entry = uvm_mapent_alloc(map); 600 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 601 602 new_entry->end = start; 603 new_adj = start - new_entry->start; 604 if (entry->object.uvm_obj) 605 entry->offset += new_adj; /* shift start over */ 606 607 /* Does not change order for the RB tree */ 608 entry->start = start; 609 610 if (new_entry->aref.ar_amap) { 611 amap_splitref(&new_entry->aref, &entry->aref, new_adj); 612 } 613 614 uvm_map_entry_link(map, entry->prev, new_entry); 615 616 if (UVM_ET_ISSUBMAP(entry)) { 617 /* ... unlikely to happen, but play it safe */ 618 uvm_map_reference(new_entry->object.sub_map); 619 } else { 620 if (UVM_ET_ISOBJ(entry) && 621 entry->object.uvm_obj->pgops && 622 entry->object.uvm_obj->pgops->pgo_reference) 623 entry->object.uvm_obj->pgops->pgo_reference( 624 entry->object.uvm_obj); 625 } 626 627 uvm_tree_sanity(map, "clip_start leave"); 628 } 629 630 /* 631 * uvm_map_clip_end: ensure that the entry ends at or before 632 * the ending address, if it doesn't we split the reference 633 * 634 * => caller should use UVM_MAP_CLIP_END macro rather than calling 635 * this directly 636 * => map must be locked by caller 637 */ 638 639 void 640 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end) 641 { 642 struct vm_map_entry *new_entry; 643 vaddr_t new_adj; /* #bytes we move start forward */ 644 645 uvm_tree_sanity(map, "clip_end entry"); 646 /* 647 * Create a new entry and insert it 648 * AFTER the specified entry 649 */ 650 651 new_entry = uvm_mapent_alloc(map); 652 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 653 654 new_entry->start = entry->end = end; 655 new_adj = end - entry->start; 656 if (new_entry->object.uvm_obj) 657 new_entry->offset += new_adj; 658 659 if (entry->aref.ar_amap) 660 amap_splitref(&entry->aref, &new_entry->aref, new_adj); 661 662 uvm_rb_fixup(map, entry); 663 664 uvm_map_entry_link(map, entry, new_entry); 665 666 if (UVM_ET_ISSUBMAP(entry)) { 667 /* ... unlikely to happen, but play it safe */ 668 uvm_map_reference(new_entry->object.sub_map); 669 } else { 670 if (UVM_ET_ISOBJ(entry) && 671 entry->object.uvm_obj->pgops && 672 entry->object.uvm_obj->pgops->pgo_reference) 673 entry->object.uvm_obj->pgops->pgo_reference( 674 entry->object.uvm_obj); 675 } 676 uvm_tree_sanity(map, "clip_end leave"); 677 } 678 679 680 /* 681 * M A P - m a i n e n t r y p o i n t 682 */ 683 /* 684 * uvm_map: establish a valid mapping in a map 685 * 686 * => assume startp is page aligned. 687 * => assume size is a multiple of PAGE_SIZE. 688 * => assume sys_mmap provides enough of a "hint" to have us skip 689 * over text/data/bss area. 690 * => map must be unlocked (we will lock it) 691 * => <uobj,uoffset> value meanings (4 cases): 692 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 693 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 694 * [3] <uobj,uoffset> == normal mapping 695 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 696 * 697 * case [4] is for kernel mappings where we don't know the offset until 698 * we've found a virtual address. note that kernel object offsets are 699 * always relative to vm_map_min(kernel_map). 700 * 701 * => if `align' is non-zero, we try to align the virtual address to 702 * the specified alignment. this is only a hint; if we can't 703 * do it, the address will be unaligned. this is provided as 704 * a mechanism for large pages. 705 * 706 * => XXXCDC: need way to map in external amap? 707 */ 708 709 int 710 uvm_map_p(struct vm_map *map, vaddr_t *startp, vsize_t size, 711 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, 712 struct proc *p) 713 { 714 struct vm_map_entry *prev_entry, *new_entry; 715 vm_prot_t prot = UVM_PROTECTION(flags), maxprot = 716 UVM_MAXPROTECTION(flags); 717 vm_inherit_t inherit = UVM_INHERIT(flags); 718 int advice = UVM_ADVICE(flags); 719 int error; 720 UVMHIST_FUNC("uvm_map"); 721 UVMHIST_CALLED(maphist); 722 723 UVMHIST_LOG(maphist, "(map=%p, *startp=0x%lx, size=%ld, flags=0x%lx)", 724 map, *startp, size, flags); 725 UVMHIST_LOG(maphist, " uobj/offset %p/%ld", uobj, (u_long)uoffset,0,0); 726 727 uvm_tree_sanity(map, "map entry"); 728 729 if ((map->flags & VM_MAP_INTRSAFE) == 0) 730 splassert(IPL_NONE); 731 else 732 splassert(IPL_VM); 733 734 /* 735 * step 0: sanity check of protection code 736 */ 737 738 if ((prot & maxprot) != prot) { 739 UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%lx, max=0x%lx", 740 prot, maxprot,0,0); 741 return (EACCES); 742 } 743 744 /* 745 * step 1: figure out where to put new VM range 746 */ 747 748 if (vm_map_lock_try(map) == FALSE) { 749 if (flags & UVM_FLAG_TRYLOCK) 750 return (EFAULT); 751 vm_map_lock(map); /* could sleep here */ 752 } 753 if ((prev_entry = uvm_map_findspace(map, *startp, size, startp, 754 uobj, uoffset, align, flags)) == NULL) { 755 UVMHIST_LOG(maphist,"<- uvm_map_findspace failed!",0,0,0,0); 756 vm_map_unlock(map); 757 return (ENOMEM); 758 } 759 760 #ifdef PMAP_GROWKERNEL 761 { 762 /* 763 * If the kernel pmap can't map the requested space, 764 * then allocate more resources for it. 765 */ 766 if (map == kernel_map && uvm_maxkaddr < (*startp + size)) 767 uvm_maxkaddr = pmap_growkernel(*startp + size); 768 } 769 #endif 770 771 UVMCNT_INCR(uvm_map_call); 772 773 /* 774 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER 775 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in 776 * either case we want to zero it before storing it in the map entry 777 * (because it looks strange and confusing when debugging...) 778 * 779 * if uobj is not null 780 * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping 781 * and we do not need to change uoffset. 782 * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset 783 * now (based on the starting address of the map). this case is 784 * for kernel object mappings where we don't know the offset until 785 * the virtual address is found (with uvm_map_findspace). the 786 * offset is the distance we are from the start of the map. 787 */ 788 789 if (uobj == NULL) { 790 uoffset = 0; 791 } else { 792 if (uoffset == UVM_UNKNOWN_OFFSET) { 793 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 794 uoffset = *startp - vm_map_min(kernel_map); 795 } 796 } 797 798 /* 799 * step 2: try and insert in map by extending previous entry, if 800 * possible 801 * XXX: we don't try and pull back the next entry. might be useful 802 * for a stack, but we are currently allocating our stack in advance. 803 */ 804 805 if ((flags & UVM_FLAG_NOMERGE) == 0 && 806 prev_entry->end == *startp && prev_entry != &map->header && 807 prev_entry->object.uvm_obj == uobj) { 808 809 if (uobj && prev_entry->offset + 810 (prev_entry->end - prev_entry->start) != uoffset) 811 goto step3; 812 813 if (UVM_ET_ISSUBMAP(prev_entry)) 814 goto step3; 815 816 if (prev_entry->protection != prot || 817 prev_entry->max_protection != maxprot) 818 goto step3; 819 820 if (prev_entry->inheritance != inherit || 821 prev_entry->advice != advice) 822 goto step3; 823 824 /* wiring status must match (new area is unwired) */ 825 if (VM_MAPENT_ISWIRED(prev_entry)) 826 goto step3; 827 828 /* 829 * can't extend a shared amap. note: no need to lock amap to 830 * look at refs since we don't care about its exact value. 831 * if it is one (i.e. we have only reference) it will stay there 832 */ 833 834 if (prev_entry->aref.ar_amap && 835 amap_refs(prev_entry->aref.ar_amap) != 1) { 836 goto step3; 837 } 838 839 /* 840 * Only merge kernel mappings, but keep track 841 * of how much we skipped. 842 */ 843 if (map != kernel_map && map != kmem_map) { 844 UVMCNT_INCR(map_nousermerge); 845 goto step3; 846 } 847 848 if (prev_entry->aref.ar_amap) { 849 error = amap_extend(prev_entry, size); 850 if (error) { 851 vm_map_unlock(map); 852 return (error); 853 } 854 } 855 856 UVMCNT_INCR(map_backmerge); 857 UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0); 858 859 /* 860 * drop our reference to uobj since we are extending a reference 861 * that we already have (the ref count can not drop to zero). 862 */ 863 864 if (uobj && uobj->pgops->pgo_detach) 865 uobj->pgops->pgo_detach(uobj); 866 867 prev_entry->end += size; 868 uvm_rb_fixup(map, prev_entry); 869 map->size += size; 870 if (p && uobj == NULL) 871 p->p_vmspace->vm_dused += atop(size); 872 873 uvm_tree_sanity(map, "map leave 2"); 874 875 UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); 876 vm_map_unlock(map); 877 return (0); 878 879 } 880 step3: 881 UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0); 882 883 /* 884 * check for possible forward merge (which we don't do) and count 885 * the number of times we missed a *possible* chance to merge more 886 */ 887 888 if ((flags & UVM_FLAG_NOMERGE) == 0 && 889 prev_entry->next != &map->header && 890 prev_entry->next->start == (*startp + size)) 891 UVMCNT_INCR(map_forwmerge); 892 893 /* 894 * step 3: allocate new entry and link it in 895 */ 896 897 new_entry = uvm_mapent_alloc(map); 898 new_entry->start = *startp; 899 new_entry->end = new_entry->start + size; 900 new_entry->object.uvm_obj = uobj; 901 new_entry->offset = uoffset; 902 903 if (uobj) 904 new_entry->etype = UVM_ET_OBJ; 905 else 906 new_entry->etype = 0; 907 908 if (flags & UVM_FLAG_COPYONW) { 909 new_entry->etype |= UVM_ET_COPYONWRITE; 910 if ((flags & UVM_FLAG_OVERLAY) == 0) 911 new_entry->etype |= UVM_ET_NEEDSCOPY; 912 } 913 if (flags & UVM_FLAG_HOLE) 914 new_entry->etype |= UVM_ET_HOLE; 915 916 new_entry->protection = prot; 917 new_entry->max_protection = maxprot; 918 new_entry->inheritance = inherit; 919 new_entry->wired_count = 0; 920 new_entry->advice = advice; 921 if (flags & UVM_FLAG_OVERLAY) { 922 /* 923 * to_add: for BSS we overallocate a little since we 924 * are likely to extend 925 */ 926 vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? 927 UVM_AMAP_CHUNK << PAGE_SHIFT : 0; 928 struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK); 929 new_entry->aref.ar_pageoff = 0; 930 new_entry->aref.ar_amap = amap; 931 } else { 932 new_entry->aref.ar_pageoff = 0; 933 new_entry->aref.ar_amap = NULL; 934 } 935 936 uvm_map_entry_link(map, prev_entry, new_entry); 937 938 map->size += size; 939 if (p && uobj == NULL) 940 p->p_vmspace->vm_dused += atop(size); 941 942 943 /* 944 * Update the free space hint 945 */ 946 947 if ((map->first_free == prev_entry) && 948 (prev_entry->end >= new_entry->start)) 949 map->first_free = new_entry; 950 951 uvm_tree_sanity(map, "map leave"); 952 953 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 954 vm_map_unlock(map); 955 return (0); 956 } 957 958 /* 959 * uvm_map_lookup_entry: find map entry at or before an address 960 * 961 * => map must at least be read-locked by caller 962 * => entry is returned in "entry" 963 * => return value is true if address is in the returned entry 964 */ 965 966 boolean_t 967 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 968 struct vm_map_entry **entry) 969 { 970 struct vm_map_entry *cur; 971 struct vm_map_entry *last; 972 int use_tree = 0; 973 UVMHIST_FUNC("uvm_map_lookup_entry"); 974 UVMHIST_CALLED(maphist); 975 976 UVMHIST_LOG(maphist,"(map=%p,addr=0x%lx,ent=%p)", 977 map, address, entry, 0); 978 979 /* 980 * start looking either from the head of the 981 * list, or from the hint. 982 */ 983 984 simple_lock(&map->hint_lock); 985 cur = map->hint; 986 simple_unlock(&map->hint_lock); 987 988 if (cur == &map->header) 989 cur = cur->next; 990 991 UVMCNT_INCR(uvm_mlk_call); 992 if (address >= cur->start) { 993 /* 994 * go from hint to end of list. 995 * 996 * but first, make a quick check to see if 997 * we are already looking at the entry we 998 * want (which is usually the case). 999 * note also that we don't need to save the hint 1000 * here... it is the same hint (unless we are 1001 * at the header, in which case the hint didn't 1002 * buy us anything anyway). 1003 */ 1004 last = &map->header; 1005 if ((cur != last) && (cur->end > address)) { 1006 UVMCNT_INCR(uvm_mlk_hint); 1007 *entry = cur; 1008 UVMHIST_LOG(maphist,"<- got it via hint (%p)", 1009 cur, 0, 0, 0); 1010 return (TRUE); 1011 } 1012 1013 if (map->nentries > 30) 1014 use_tree = 1; 1015 } else { 1016 /* 1017 * go from start to hint, *inclusively* 1018 */ 1019 last = cur->next; 1020 cur = map->header.next; 1021 use_tree = 1; 1022 } 1023 1024 uvm_tree_sanity(map, __func__); 1025 1026 if (use_tree) { 1027 struct vm_map_entry *prev = &map->header; 1028 cur = RB_ROOT(&map->rbhead); 1029 1030 /* 1031 * Simple lookup in the tree. Happens when the hint is 1032 * invalid, or nentries reach a threshold. 1033 */ 1034 while (cur) { 1035 if (address >= cur->start) { 1036 if (address < cur->end) { 1037 *entry = cur; 1038 SAVE_HINT(map, map->hint, cur); 1039 return (TRUE); 1040 } 1041 prev = cur; 1042 cur = RB_RIGHT(cur, rb_entry); 1043 } else 1044 cur = RB_LEFT(cur, rb_entry); 1045 } 1046 *entry = prev; 1047 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); 1048 return (FALSE); 1049 } 1050 1051 /* 1052 * search linearly 1053 */ 1054 1055 while (cur != last) { 1056 if (cur->end > address) { 1057 if (address >= cur->start) { 1058 /* 1059 * save this lookup for future 1060 * hints, and return 1061 */ 1062 1063 *entry = cur; 1064 SAVE_HINT(map, map->hint, cur); 1065 UVMHIST_LOG(maphist,"<- search got it (%p)", 1066 cur, 0, 0, 0); 1067 return (TRUE); 1068 } 1069 break; 1070 } 1071 cur = cur->next; 1072 } 1073 1074 *entry = cur->prev; 1075 SAVE_HINT(map, map->hint, *entry); 1076 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); 1077 return (FALSE); 1078 } 1079 1080 /* 1081 * Checks if address pointed to be phint fits into the empty 1082 * space before the vm_map_entry after. Takes aligment and 1083 * offset into consideration. 1084 */ 1085 1086 int 1087 uvm_map_spacefits(struct vm_map *map, vaddr_t *phint, vsize_t length, 1088 struct vm_map_entry *after, voff_t uoffset, vsize_t align) 1089 { 1090 vaddr_t hint = *phint; 1091 vaddr_t end; 1092 1093 #ifdef PMAP_PREFER 1094 /* 1095 * push hint forward as needed to avoid VAC alias problems. 1096 * we only do this if a valid offset is specified. 1097 */ 1098 if (uoffset != UVM_UNKNOWN_OFFSET) 1099 PMAP_PREFER(uoffset, &hint); 1100 #endif 1101 if (align != 0) 1102 if ((hint & (align - 1)) != 0) 1103 hint = roundup(hint, align); 1104 *phint = hint; 1105 1106 end = hint + length; 1107 if (end > map->max_offset || end < hint) 1108 return (FALSE); 1109 if (after != NULL && after != &map->header && after->start < end) 1110 return (FALSE); 1111 1112 return (TRUE); 1113 } 1114 1115 /* 1116 * uvm_map_pie: return a random load address for a PIE executable 1117 * properly aligned. 1118 */ 1119 1120 #ifndef VM_PIE_MAX_ADDR 1121 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1122 #endif 1123 1124 #ifndef VM_PIE_MIN_ADDR 1125 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1126 #endif 1127 1128 #ifndef VM_PIE_MIN_ALIGN 1129 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1130 #endif 1131 1132 vaddr_t 1133 uvm_map_pie(vaddr_t align) 1134 { 1135 vaddr_t addr, space, min; 1136 1137 align = MAX(align, VM_PIE_MIN_ALIGN); 1138 1139 /* round up to next alignment */ 1140 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1141 1142 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 1143 return (align); 1144 1145 space = (VM_PIE_MAX_ADDR - min) / align; 1146 space = MIN(space, (u_int32_t)-1); 1147 1148 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 1149 addr += min; 1150 1151 return (addr); 1152 } 1153 1154 /* 1155 * uvm_map_hint: return the beginning of the best area suitable for 1156 * creating a new mapping with "prot" protection. 1157 */ 1158 vaddr_t 1159 uvm_map_hint(struct proc *p, vm_prot_t prot) 1160 { 1161 vaddr_t addr; 1162 1163 #ifdef __i386__ 1164 /* 1165 * If executable skip first two pages, otherwise start 1166 * after data + heap region. 1167 */ 1168 if ((prot & VM_PROT_EXECUTE) && 1169 ((vaddr_t)p->p_vmspace->vm_daddr >= I386_MAX_EXE_ADDR)) { 1170 addr = (PAGE_SIZE*2) + 1171 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 1172 return (round_page(addr)); 1173 } 1174 #endif 1175 addr = (vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ; 1176 #if !defined(__vax__) 1177 addr += arc4random() & (MIN((256 * 1024 * 1024), MAXDSIZ) - 1); 1178 #else 1179 /* start malloc/mmap after the brk */ 1180 addr = (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ; 1181 #endif 1182 return (round_page(addr)); 1183 } 1184 1185 /* 1186 * uvm_map_findspace: find "length" sized space in "map". 1187 * 1188 * => "hint" is a hint about where we want it, unless FINDSPACE_FIXED is 1189 * set (in which case we insist on using "hint"). 1190 * => "result" is VA returned 1191 * => uobj/uoffset are to be used to handle VAC alignment, if required 1192 * => if `align' is non-zero, we attempt to align to that value. 1193 * => caller must at least have read-locked map 1194 * => returns NULL on failure, or pointer to prev. map entry if success 1195 * => note this is a cross between the old vm_map_findspace and vm_map_find 1196 */ 1197 1198 struct vm_map_entry * 1199 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, 1200 vaddr_t *result, struct uvm_object *uobj, voff_t uoffset, vsize_t align, 1201 int flags) 1202 { 1203 struct vm_map_entry *entry, *next, *tmp; 1204 struct vm_map_entry *child, *prev = NULL; 1205 1206 vaddr_t end, orig_hint; 1207 UVMHIST_FUNC("uvm_map_findspace"); 1208 UVMHIST_CALLED(maphist); 1209 1210 UVMHIST_LOG(maphist, "(map=%p, hint=0x%lx, len=%ld, flags=0x%lx)", 1211 map, hint, length, flags); 1212 KASSERT((align & (align - 1)) == 0); 1213 KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); 1214 1215 uvm_tree_sanity(map, "map_findspace entry"); 1216 1217 /* 1218 * remember the original hint. if we are aligning, then we 1219 * may have to try again with no alignment constraint if 1220 * we fail the first time. 1221 */ 1222 1223 orig_hint = hint; 1224 if (hint < map->min_offset) { /* check ranges ... */ 1225 if (flags & UVM_FLAG_FIXED) { 1226 UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0); 1227 return(NULL); 1228 } 1229 hint = map->min_offset; 1230 } 1231 if (hint > map->max_offset) { 1232 UVMHIST_LOG(maphist,"<- VA 0x%lx > range [0x%lx->0x%lx]", 1233 hint, map->min_offset, map->max_offset, 0); 1234 return(NULL); 1235 } 1236 1237 /* 1238 * Look for the first possible address; if there's already 1239 * something at this address, we have to start after it. 1240 */ 1241 1242 if ((flags & UVM_FLAG_FIXED) == 0 && hint == map->min_offset) { 1243 if ((entry = map->first_free) != &map->header) 1244 hint = entry->end; 1245 } else { 1246 if (uvm_map_lookup_entry(map, hint, &tmp)) { 1247 /* "hint" address already in use ... */ 1248 if (flags & UVM_FLAG_FIXED) { 1249 UVMHIST_LOG(maphist,"<- fixed & VA in use", 1250 0, 0, 0, 0); 1251 return(NULL); 1252 } 1253 hint = tmp->end; 1254 } 1255 entry = tmp; 1256 } 1257 1258 if (flags & UVM_FLAG_FIXED) { 1259 end = hint + length; 1260 if (end > map->max_offset || end < hint) { 1261 UVMHIST_LOG(maphist,"<- failed (off end)", 0,0,0,0); 1262 goto error; 1263 } 1264 next = entry->next; 1265 if (next == &map->header || next->start >= end) 1266 goto found; 1267 UVMHIST_LOG(maphist,"<- fixed mapping failed", 0,0,0,0); 1268 return(NULL); /* only one shot at it ... */ 1269 } 1270 1271 /* Try to find the space in the red-black tree */ 1272 1273 /* Check slot before any entry */ 1274 if (uvm_map_spacefits(map, &hint, length, entry->next, uoffset, align)) 1275 goto found; 1276 1277 /* If there is not enough space in the whole tree, we fail */ 1278 tmp = RB_ROOT(&map->rbhead); 1279 if (tmp == NULL || tmp->space < length) 1280 goto error; 1281 1282 /* Find an entry close to hint that has enough space */ 1283 for (; tmp;) { 1284 if (tmp->end >= hint && 1285 (prev == NULL || tmp->end < prev->end)) { 1286 if (tmp->ownspace >= length) 1287 prev = tmp; 1288 else if ((child = RB_RIGHT(tmp, rb_entry)) != NULL && 1289 child->space >= length) 1290 prev = tmp; 1291 } 1292 if (tmp->end < hint) 1293 child = RB_RIGHT(tmp, rb_entry); 1294 else if (tmp->end > hint) 1295 child = RB_LEFT(tmp, rb_entry); 1296 else { 1297 if (tmp->ownspace >= length) 1298 break; 1299 child = RB_RIGHT(tmp, rb_entry); 1300 } 1301 if (child == NULL || child->space < length) 1302 break; 1303 tmp = child; 1304 } 1305 1306 if (tmp != NULL && hint < tmp->end + tmp->ownspace) { 1307 /* 1308 * Check if the entry that we found satifies the 1309 * space requirement 1310 */ 1311 if (hint < tmp->end) 1312 hint = tmp->end; 1313 if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, 1314 align)) { 1315 entry = tmp; 1316 goto found; 1317 } else if (tmp->ownspace >= length) 1318 goto listsearch; 1319 } 1320 if (prev == NULL) 1321 goto error; 1322 1323 hint = prev->end; 1324 if (uvm_map_spacefits(map, &hint, length, prev->next, uoffset, 1325 align)) { 1326 entry = prev; 1327 goto found; 1328 } else if (prev->ownspace >= length) 1329 goto listsearch; 1330 1331 tmp = RB_RIGHT(prev, rb_entry); 1332 for (;;) { 1333 KASSERT(tmp && tmp->space >= length); 1334 child = RB_LEFT(tmp, rb_entry); 1335 if (child && child->space >= length) { 1336 tmp = child; 1337 continue; 1338 } 1339 if (tmp->ownspace >= length) 1340 break; 1341 tmp = RB_RIGHT(tmp, rb_entry); 1342 } 1343 1344 hint = tmp->end; 1345 if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, align)) { 1346 entry = tmp; 1347 goto found; 1348 } 1349 1350 /* 1351 * The tree fails to find an entry because of offset or alignment 1352 * restrictions. Search the list instead. 1353 */ 1354 listsearch: 1355 /* 1356 * Look through the rest of the map, trying to fit a new region in 1357 * the gap between existing regions, or after the very last region. 1358 * note: entry->end = base VA of current gap, 1359 * next->start = VA of end of current gap 1360 */ 1361 for (;; hint = (entry = next)->end) { 1362 /* 1363 * Find the end of the proposed new region. Be sure we didn't 1364 * go beyond the end of the map, or wrap around the address; 1365 * if so, we lose. Otherwise, if this is the last entry, or 1366 * if the proposed new region fits before the next entry, we 1367 * win. 1368 */ 1369 1370 #ifdef PMAP_PREFER 1371 /* 1372 * push hint forward as needed to avoid VAC alias problems. 1373 * we only do this if a valid offset is specified. 1374 */ 1375 if (uoffset != UVM_UNKNOWN_OFFSET) 1376 PMAP_PREFER(uoffset, &hint); 1377 #endif 1378 if (align != 0) { 1379 if ((hint & (align - 1)) != 0) 1380 hint = roundup(hint, align); 1381 /* 1382 * XXX Should we PMAP_PREFER() here again? 1383 */ 1384 } 1385 end = hint + length; 1386 if (end > map->max_offset || end < hint) { 1387 UVMHIST_LOG(maphist,"<- failed (off end)", 0,0,0,0); 1388 goto error; 1389 } 1390 next = entry->next; 1391 if (next == &map->header || next->start >= end) 1392 break; 1393 } 1394 found: 1395 SAVE_HINT(map, map->hint, entry); 1396 *result = hint; 1397 UVMHIST_LOG(maphist,"<- got it! (result=0x%lx)", hint, 0,0,0); 1398 return (entry); 1399 1400 error: 1401 if (align != 0) { 1402 UVMHIST_LOG(maphist, 1403 "calling recursively, no align", 1404 0,0,0,0); 1405 return (uvm_map_findspace(map, orig_hint, 1406 length, result, uobj, uoffset, 0, flags)); 1407 } 1408 return (NULL); 1409 } 1410 1411 /* 1412 * U N M A P - m a i n h e l p e r f u n c t i o n s 1413 */ 1414 1415 /* 1416 * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") 1417 * 1418 * => caller must check alignment and size 1419 * => map must be locked by caller 1420 * => we return a list of map entries that we've remove from the map 1421 * in "entry_list" 1422 */ 1423 1424 void 1425 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 1426 struct vm_map_entry **entry_list, struct proc *p) 1427 { 1428 struct vm_map_entry *entry, *first_entry, *next; 1429 vaddr_t len; 1430 UVMHIST_FUNC("uvm_unmap_remove"); 1431 UVMHIST_CALLED(maphist); 1432 1433 UVMHIST_LOG(maphist,"(map=%p, start=0x%lx, end=0x%lx)", 1434 map, start, end, 0); 1435 1436 VM_MAP_RANGE_CHECK(map, start, end); 1437 1438 uvm_tree_sanity(map, "unmap_remove entry"); 1439 1440 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1441 splassert(IPL_NONE); 1442 else 1443 splassert(IPL_VM); 1444 1445 /* 1446 * find first entry 1447 */ 1448 if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) { 1449 /* clip and go... */ 1450 entry = first_entry; 1451 UVM_MAP_CLIP_START(map, entry, start); 1452 /* critical! prevents stale hint */ 1453 SAVE_HINT(map, entry, entry->prev); 1454 1455 } else { 1456 entry = first_entry->next; 1457 } 1458 1459 /* 1460 * Save the free space hint 1461 */ 1462 1463 if (map->first_free->start >= start) 1464 map->first_free = entry->prev; 1465 1466 /* 1467 * note: we now re-use first_entry for a different task. we remove 1468 * a number of map entries from the map and save them in a linked 1469 * list headed by "first_entry". once we remove them from the map 1470 * the caller should unlock the map and drop the references to the 1471 * backing objects [c.f. uvm_unmap_detach]. the object is to 1472 * separate unmapping from reference dropping. why? 1473 * [1] the map has to be locked for unmapping 1474 * [2] the map need not be locked for reference dropping 1475 * [3] dropping references may trigger pager I/O, and if we hit 1476 * a pager that does synchronous I/O we may have to wait for it. 1477 * [4] we would like all waiting for I/O to occur with maps unlocked 1478 * so that we don't block other threads. 1479 */ 1480 first_entry = NULL; 1481 *entry_list = NULL; /* to be safe */ 1482 1483 /* 1484 * break up the area into map entry sized regions and unmap. note 1485 * that all mappings have to be removed before we can even consider 1486 * dropping references to amaps or VM objects (otherwise we could end 1487 * up with a mapping to a page on the free list which would be very bad) 1488 */ 1489 1490 while ((entry != &map->header) && (entry->start < end)) { 1491 1492 UVM_MAP_CLIP_END(map, entry, end); 1493 next = entry->next; 1494 len = entry->end - entry->start; 1495 if (p && entry->object.uvm_obj == NULL) 1496 p->p_vmspace->vm_dused -= atop(len); 1497 1498 /* 1499 * unwire before removing addresses from the pmap; otherwise 1500 * unwiring will put the entries back into the pmap (XXX). 1501 */ 1502 1503 if (VM_MAPENT_ISWIRED(entry)) 1504 uvm_map_entry_unwire(map, entry); 1505 1506 /* 1507 * special case: handle mappings to anonymous kernel objects. 1508 * we want to free these pages right away... 1509 */ 1510 if (UVM_ET_ISHOLE(entry)) { 1511 /* nothing to do! */ 1512 } else if (map->flags & VM_MAP_INTRSAFE) { 1513 uvm_km_pgremove_intrsafe(entry->start, entry->end); 1514 pmap_kremove(entry->start, len); 1515 } else if (UVM_ET_ISOBJ(entry) && 1516 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 1517 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1518 1519 /* 1520 * note: kernel object mappings are currently used in 1521 * two ways: 1522 * [1] "normal" mappings of pages in the kernel object 1523 * [2] uvm_km_valloc'd allocations in which we 1524 * pmap_enter in some non-kernel-object page 1525 * (e.g. vmapbuf). 1526 * 1527 * for case [1], we need to remove the mapping from 1528 * the pmap and then remove the page from the kernel 1529 * object (because, once pages in a kernel object are 1530 * unmapped they are no longer needed, unlike, say, 1531 * a vnode where you might want the data to persist 1532 * until flushed out of a queue). 1533 * 1534 * for case [2], we need to remove the mapping from 1535 * the pmap. there shouldn't be any pages at the 1536 * specified offset in the kernel object [but it 1537 * doesn't hurt to call uvm_km_pgremove just to be 1538 * safe?] 1539 * 1540 * uvm_km_pgremove currently does the following: 1541 * for pages in the kernel object in range: 1542 * - drops the swap slot 1543 * - uvm_pagefree the page 1544 * 1545 * note there is version of uvm_km_pgremove() that 1546 * is used for "intrsafe" objects. 1547 */ 1548 1549 /* 1550 * remove mappings from pmap and drop the pages 1551 * from the object. offsets are always relative 1552 * to vm_map_min(kernel_map). 1553 */ 1554 pmap_remove(pmap_kernel(), entry->start, entry->end); 1555 uvm_km_pgremove(entry->object.uvm_obj, 1556 entry->start - vm_map_min(kernel_map), 1557 entry->end - vm_map_min(kernel_map)); 1558 1559 /* 1560 * null out kernel_object reference, we've just 1561 * dropped it 1562 */ 1563 entry->etype &= ~UVM_ET_OBJ; 1564 entry->object.uvm_obj = NULL; /* to be safe */ 1565 1566 } else { 1567 /* 1568 * remove mappings the standard way. 1569 */ 1570 pmap_remove(map->pmap, entry->start, entry->end); 1571 } 1572 1573 /* 1574 * remove entry from map and put it on our list of entries 1575 * that we've nuked. then go do next entry. 1576 */ 1577 UVMHIST_LOG(maphist, " removed map entry %p", entry, 0, 0,0); 1578 1579 /* critical! prevents stale hint */ 1580 SAVE_HINT(map, entry, entry->prev); 1581 1582 uvm_map_entry_unlink(map, entry); 1583 map->size -= len; 1584 entry->next = first_entry; 1585 first_entry = entry; 1586 entry = next; /* next entry, please */ 1587 } 1588 /* if ((map->flags & VM_MAP_DYING) == 0) { */ 1589 pmap_update(vm_map_pmap(map)); 1590 /* } */ 1591 1592 1593 uvm_tree_sanity(map, "unmap_remove leave"); 1594 1595 /* 1596 * now we've cleaned up the map and are ready for the caller to drop 1597 * references to the mapped objects. 1598 */ 1599 1600 *entry_list = first_entry; 1601 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1602 } 1603 1604 /* 1605 * uvm_unmap_detach: drop references in a chain of map entries 1606 * 1607 * => we will free the map entries as we traverse the list. 1608 */ 1609 1610 void 1611 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) 1612 { 1613 struct vm_map_entry *next_entry; 1614 UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist); 1615 1616 while (first_entry) { 1617 KASSERT(!VM_MAPENT_ISWIRED(first_entry)); 1618 UVMHIST_LOG(maphist, 1619 " detach 0x%lx: amap=%p, obj=%p, submap?=%ld", 1620 first_entry, first_entry->aref.ar_amap, 1621 first_entry->object.uvm_obj, 1622 UVM_ET_ISSUBMAP(first_entry)); 1623 1624 /* 1625 * drop reference to amap, if we've got one 1626 */ 1627 1628 if (first_entry->aref.ar_amap) 1629 uvm_map_unreference_amap(first_entry, flags); 1630 1631 /* 1632 * drop reference to our backing object, if we've got one 1633 */ 1634 1635 if (UVM_ET_ISSUBMAP(first_entry)) { 1636 /* ... unlikely to happen, but play it safe */ 1637 uvm_map_deallocate(first_entry->object.sub_map); 1638 } else { 1639 if (UVM_ET_ISOBJ(first_entry) && 1640 first_entry->object.uvm_obj->pgops->pgo_detach) 1641 first_entry->object.uvm_obj->pgops-> 1642 pgo_detach(first_entry->object.uvm_obj); 1643 } 1644 1645 next_entry = first_entry->next; 1646 uvm_mapent_free(first_entry); 1647 first_entry = next_entry; 1648 } 1649 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 1650 } 1651 1652 /* 1653 * E X T R A C T I O N F U N C T I O N S 1654 */ 1655 1656 /* 1657 * uvm_map_reserve: reserve space in a vm_map for future use. 1658 * 1659 * => we reserve space in a map by putting a dummy map entry in the 1660 * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) 1661 * => map should be unlocked (we will write lock it) 1662 * => we return true if we were able to reserve space 1663 * => XXXCDC: should be inline? 1664 */ 1665 1666 int 1667 uvm_map_reserve(struct vm_map *map, vsize_t size, vaddr_t offset, 1668 vsize_t align, vaddr_t *raddr) 1669 { 1670 UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); 1671 1672 UVMHIST_LOG(maphist, "(map=%p, size=0x%lx, offset=0x%lx,addr=0x%lx)", 1673 map,size,offset,raddr); 1674 1675 size = round_page(size); 1676 if (*raddr < vm_map_min(map)) 1677 *raddr = vm_map_min(map); /* hint */ 1678 1679 /* 1680 * reserve some virtual space. 1681 */ 1682 1683 if (uvm_map(map, raddr, size, NULL, offset, 0, 1684 UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, 1685 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) { 1686 UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); 1687 return (FALSE); 1688 } 1689 1690 UVMHIST_LOG(maphist, "<- done (*raddr=0x%lx)", *raddr,0,0,0); 1691 return (TRUE); 1692 } 1693 1694 /* 1695 * uvm_map_replace: replace a reserved (blank) area of memory with 1696 * real mappings. 1697 * 1698 * => caller must WRITE-LOCK the map 1699 * => we return TRUE if replacement was a success 1700 * => we expect the newents chain to have nnewents entries on it and 1701 * we expect newents->prev to point to the last entry on the list 1702 * => note newents is allowed to be NULL 1703 */ 1704 1705 int 1706 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, 1707 struct vm_map_entry *newents, int nnewents) 1708 { 1709 struct vm_map_entry *oldent, *last; 1710 1711 uvm_tree_sanity(map, "map_replace entry"); 1712 1713 /* 1714 * first find the blank map entry at the specified address 1715 */ 1716 1717 if (!uvm_map_lookup_entry(map, start, &oldent)) { 1718 return(FALSE); 1719 } 1720 1721 /* 1722 * check to make sure we have a proper blank entry 1723 */ 1724 1725 if (oldent->start != start || oldent->end != end || 1726 oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { 1727 return (FALSE); 1728 } 1729 1730 #ifdef DIAGNOSTIC 1731 /* 1732 * sanity check the newents chain 1733 */ 1734 { 1735 struct vm_map_entry *tmpent = newents; 1736 int nent = 0; 1737 vaddr_t cur = start; 1738 1739 while (tmpent) { 1740 nent++; 1741 if (tmpent->start < cur) 1742 panic("uvm_map_replace1"); 1743 if (tmpent->start > tmpent->end || tmpent->end > end) { 1744 printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n", 1745 tmpent->start, tmpent->end, end); 1746 panic("uvm_map_replace2"); 1747 } 1748 cur = tmpent->end; 1749 if (tmpent->next) { 1750 if (tmpent->next->prev != tmpent) 1751 panic("uvm_map_replace3"); 1752 } else { 1753 if (newents->prev != tmpent) 1754 panic("uvm_map_replace4"); 1755 } 1756 tmpent = tmpent->next; 1757 } 1758 if (nent != nnewents) 1759 panic("uvm_map_replace5"); 1760 } 1761 #endif 1762 1763 /* 1764 * map entry is a valid blank! replace it. (this does all the 1765 * work of map entry link/unlink...). 1766 */ 1767 1768 if (newents) { 1769 last = newents->prev; /* we expect this */ 1770 1771 /* critical: flush stale hints out of map */ 1772 SAVE_HINT(map, map->hint, newents); 1773 if (map->first_free == oldent) 1774 map->first_free = last; 1775 1776 last->next = oldent->next; 1777 last->next->prev = last; 1778 1779 /* Fix RB tree */ 1780 uvm_rb_remove(map, oldent); 1781 1782 newents->prev = oldent->prev; 1783 newents->prev->next = newents; 1784 map->nentries = map->nentries + (nnewents - 1); 1785 1786 /* Fixup the RB tree */ 1787 { 1788 int i; 1789 struct vm_map_entry *tmp; 1790 1791 tmp = newents; 1792 for (i = 0; i < nnewents && tmp; i++) { 1793 uvm_rb_insert(map, tmp); 1794 tmp = tmp->next; 1795 } 1796 } 1797 } else { 1798 1799 /* critical: flush stale hints out of map */ 1800 SAVE_HINT(map, map->hint, oldent->prev); 1801 if (map->first_free == oldent) 1802 map->first_free = oldent->prev; 1803 1804 /* NULL list of new entries: just remove the old one */ 1805 uvm_map_entry_unlink(map, oldent); 1806 } 1807 1808 1809 uvm_tree_sanity(map, "map_replace leave"); 1810 1811 /* 1812 * now we can free the old blank entry, unlock the map and return. 1813 */ 1814 1815 uvm_mapent_free(oldent); 1816 return(TRUE); 1817 } 1818 1819 /* 1820 * uvm_map_extract: extract a mapping from a map and put it somewhere 1821 * (maybe removing the old mapping) 1822 * 1823 * => maps should be unlocked (we will write lock them) 1824 * => returns 0 on success, error code otherwise 1825 * => start must be page aligned 1826 * => len must be page sized 1827 * => flags: 1828 * UVM_EXTRACT_REMOVE: remove mappings from srcmap 1829 * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) 1830 * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs 1831 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 1832 * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< 1833 * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only 1834 * be used from within the kernel in a kernel level map <<< 1835 */ 1836 1837 int 1838 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 1839 struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) 1840 { 1841 vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge, 1842 oldstart; 1843 struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry; 1844 struct vm_map_entry *deadentry, *oldentry; 1845 vsize_t elen; 1846 int nchain, error, copy_ok; 1847 UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist); 1848 1849 UVMHIST_LOG(maphist,"(srcmap=%p,start=0x%lx, len=0x%lx", srcmap, start, 1850 len,0); 1851 UVMHIST_LOG(maphist," ...,dstmap=%p, flags=0x%lx)", dstmap,flags,0,0); 1852 1853 uvm_tree_sanity(srcmap, "map_extract src enter"); 1854 uvm_tree_sanity(dstmap, "map_extract dst enter"); 1855 1856 /* 1857 * step 0: sanity check: start must be on a page boundary, length 1858 * must be page sized. can't ask for CONTIG/QREF if you asked for 1859 * REMOVE. 1860 */ 1861 1862 KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0); 1863 KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || 1864 (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); 1865 1866 /* 1867 * step 1: reserve space in the target map for the extracted area 1868 */ 1869 1870 dstaddr = vm_map_min(dstmap); 1871 if (uvm_map_reserve(dstmap, len, start, 0, &dstaddr) == FALSE) 1872 return(ENOMEM); 1873 *dstaddrp = dstaddr; /* pass address back to caller */ 1874 UVMHIST_LOG(maphist, " dstaddr=0x%lx", dstaddr,0,0,0); 1875 1876 /* 1877 * step 2: setup for the extraction process loop by init'ing the 1878 * map entry chain, locking src map, and looking up the first useful 1879 * entry in the map. 1880 */ 1881 1882 end = start + len; 1883 newend = dstaddr + len; 1884 chain = endchain = NULL; 1885 nchain = 0; 1886 vm_map_lock(srcmap); 1887 1888 if (uvm_map_lookup_entry(srcmap, start, &entry)) { 1889 1890 /* "start" is within an entry */ 1891 if (flags & UVM_EXTRACT_QREF) { 1892 1893 /* 1894 * for quick references we don't clip the entry, so 1895 * the entry may map space "before" the starting 1896 * virtual address... this is the "fudge" factor 1897 * (which can be non-zero only the first time 1898 * through the "while" loop in step 3). 1899 */ 1900 1901 fudge = start - entry->start; 1902 } else { 1903 1904 /* 1905 * normal reference: we clip the map to fit (thus 1906 * fudge is zero) 1907 */ 1908 1909 UVM_MAP_CLIP_START(srcmap, entry, start); 1910 SAVE_HINT(srcmap, srcmap->hint, entry->prev); 1911 fudge = 0; 1912 } 1913 } else { 1914 1915 /* "start" is not within an entry ... skip to next entry */ 1916 if (flags & UVM_EXTRACT_CONTIG) { 1917 error = EINVAL; 1918 goto bad; /* definite hole here ... */ 1919 } 1920 1921 entry = entry->next; 1922 fudge = 0; 1923 } 1924 1925 /* save values from srcmap for step 6 */ 1926 orig_entry = entry; 1927 orig_fudge = fudge; 1928 1929 /* 1930 * step 3: now start looping through the map entries, extracting 1931 * as we go. 1932 */ 1933 1934 while (entry->start < end && entry != &srcmap->header) { 1935 1936 /* if we are not doing a quick reference, clip it */ 1937 if ((flags & UVM_EXTRACT_QREF) == 0) 1938 UVM_MAP_CLIP_END(srcmap, entry, end); 1939 1940 /* clear needs_copy (allow chunking) */ 1941 if (UVM_ET_ISNEEDSCOPY(entry)) { 1942 if (fudge) 1943 oldstart = entry->start; 1944 else 1945 oldstart = 0; /* XXX: gcc */ 1946 amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); 1947 if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ 1948 error = ENOMEM; 1949 goto bad; 1950 } 1951 1952 /* amap_copy could clip (during chunk)! update fudge */ 1953 if (fudge) { 1954 fudge = fudge - (entry->start - oldstart); 1955 orig_fudge = fudge; 1956 } 1957 } 1958 1959 /* calculate the offset of this from "start" */ 1960 oldoffset = (entry->start + fudge) - start; 1961 1962 /* allocate a new map entry */ 1963 newentry = uvm_mapent_alloc(dstmap); 1964 if (newentry == NULL) { 1965 error = ENOMEM; 1966 goto bad; 1967 } 1968 1969 /* set up new map entry */ 1970 newentry->next = NULL; 1971 newentry->prev = endchain; 1972 newentry->start = dstaddr + oldoffset; 1973 newentry->end = 1974 newentry->start + (entry->end - (entry->start + fudge)); 1975 if (newentry->end > newend || newentry->end < newentry->start) 1976 newentry->end = newend; 1977 newentry->object.uvm_obj = entry->object.uvm_obj; 1978 if (newentry->object.uvm_obj) { 1979 if (newentry->object.uvm_obj->pgops->pgo_reference) 1980 newentry->object.uvm_obj->pgops-> 1981 pgo_reference(newentry->object.uvm_obj); 1982 newentry->offset = entry->offset + fudge; 1983 } else { 1984 newentry->offset = 0; 1985 } 1986 newentry->etype = entry->etype; 1987 newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? 1988 entry->max_protection : entry->protection; 1989 newentry->max_protection = entry->max_protection; 1990 newentry->inheritance = entry->inheritance; 1991 newentry->wired_count = 0; 1992 newentry->aref.ar_amap = entry->aref.ar_amap; 1993 if (newentry->aref.ar_amap) { 1994 newentry->aref.ar_pageoff = 1995 entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); 1996 uvm_map_reference_amap(newentry, AMAP_SHARED | 1997 ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); 1998 } else { 1999 newentry->aref.ar_pageoff = 0; 2000 } 2001 newentry->advice = entry->advice; 2002 2003 /* now link it on the chain */ 2004 nchain++; 2005 if (endchain == NULL) { 2006 chain = endchain = newentry; 2007 } else { 2008 endchain->next = newentry; 2009 endchain = newentry; 2010 } 2011 2012 /* end of 'while' loop! */ 2013 if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && 2014 (entry->next == &srcmap->header || 2015 entry->next->start != entry->end)) { 2016 error = EINVAL; 2017 goto bad; 2018 } 2019 entry = entry->next; 2020 fudge = 0; 2021 } 2022 2023 /* 2024 * step 4: close off chain (in format expected by uvm_map_replace) 2025 */ 2026 2027 if (chain) 2028 chain->prev = endchain; 2029 2030 /* 2031 * step 5: attempt to lock the dest map so we can pmap_copy. 2032 * note usage of copy_ok: 2033 * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) 2034 * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 2035 */ 2036 2037 if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) { 2038 copy_ok = 1; 2039 if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2040 nchain)) { 2041 if (srcmap != dstmap) 2042 vm_map_unlock(dstmap); 2043 error = EIO; 2044 goto bad; 2045 } 2046 } else { 2047 copy_ok = 0; 2048 /* replace defered until step 7 */ 2049 } 2050 2051 /* 2052 * step 6: traverse the srcmap a second time to do the following: 2053 * - if we got a lock on the dstmap do pmap_copy 2054 * - if UVM_EXTRACT_REMOVE remove the entries 2055 * we make use of orig_entry and orig_fudge (saved in step 2) 2056 */ 2057 2058 if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { 2059 2060 /* purge possible stale hints from srcmap */ 2061 if (flags & UVM_EXTRACT_REMOVE) { 2062 SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); 2063 if (srcmap->first_free->start >= start) 2064 srcmap->first_free = orig_entry->prev; 2065 } 2066 2067 entry = orig_entry; 2068 fudge = orig_fudge; 2069 deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ 2070 2071 while (entry->start < end && entry != &srcmap->header) { 2072 if (copy_ok) { 2073 oldoffset = (entry->start + fudge) - start; 2074 elen = MIN(end, entry->end) - 2075 (entry->start + fudge); 2076 pmap_copy(dstmap->pmap, srcmap->pmap, 2077 dstaddr + oldoffset, elen, 2078 entry->start + fudge); 2079 } 2080 2081 /* we advance "entry" in the following if statement */ 2082 if (flags & UVM_EXTRACT_REMOVE) { 2083 pmap_remove(srcmap->pmap, entry->start, 2084 entry->end); 2085 oldentry = entry; /* save entry */ 2086 entry = entry->next; /* advance */ 2087 uvm_map_entry_unlink(srcmap, oldentry); 2088 /* add to dead list */ 2089 oldentry->next = deadentry; 2090 deadentry = oldentry; 2091 } else { 2092 entry = entry->next; /* advance */ 2093 } 2094 2095 /* end of 'while' loop */ 2096 fudge = 0; 2097 } 2098 pmap_update(srcmap->pmap); 2099 2100 /* 2101 * unlock dstmap. we will dispose of deadentry in 2102 * step 7 if needed 2103 */ 2104 2105 if (copy_ok && srcmap != dstmap) 2106 vm_map_unlock(dstmap); 2107 2108 } 2109 else 2110 deadentry = NULL; /* XXX: gcc */ 2111 2112 /* 2113 * step 7: we are done with the source map, unlock. if copy_ok 2114 * is 0 then we have not replaced the dummy mapping in dstmap yet 2115 * and we need to do so now. 2116 */ 2117 2118 vm_map_unlock(srcmap); 2119 if ((flags & UVM_EXTRACT_REMOVE) && deadentry) 2120 uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ 2121 2122 /* now do the replacement if we didn't do it in step 5 */ 2123 if (copy_ok == 0) { 2124 vm_map_lock(dstmap); 2125 error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2126 nchain); 2127 vm_map_unlock(dstmap); 2128 2129 if (error == FALSE) { 2130 error = EIO; 2131 goto bad2; 2132 } 2133 } 2134 2135 uvm_tree_sanity(srcmap, "map_extract src leave"); 2136 uvm_tree_sanity(dstmap, "map_extract dst leave"); 2137 2138 return(0); 2139 2140 /* 2141 * bad: failure recovery 2142 */ 2143 bad: 2144 vm_map_unlock(srcmap); 2145 bad2: /* src already unlocked */ 2146 if (chain) 2147 uvm_unmap_detach(chain, 2148 (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); 2149 2150 uvm_tree_sanity(srcmap, "map_extract src err leave"); 2151 uvm_tree_sanity(dstmap, "map_extract dst err leave"); 2152 2153 uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ 2154 return(error); 2155 } 2156 2157 /* end of extraction functions */ 2158 2159 /* 2160 * uvm_map_submap: punch down part of a map into a submap 2161 * 2162 * => only the kernel_map is allowed to be submapped 2163 * => the purpose of submapping is to break up the locking granularity 2164 * of a larger map 2165 * => the range specified must have been mapped previously with a uvm_map() 2166 * call [with uobj==NULL] to create a blank map entry in the main map. 2167 * [And it had better still be blank!] 2168 * => maps which contain submaps should never be copied or forked. 2169 * => to remove a submap, use uvm_unmap() on the main map 2170 * and then uvm_map_deallocate() the submap. 2171 * => main map must be unlocked. 2172 * => submap must have been init'd and have a zero reference count. 2173 * [need not be locked as we don't actually reference it] 2174 */ 2175 2176 int 2177 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 2178 struct vm_map *submap) 2179 { 2180 struct vm_map_entry *entry; 2181 int result; 2182 2183 vm_map_lock(map); 2184 2185 VM_MAP_RANGE_CHECK(map, start, end); 2186 2187 if (uvm_map_lookup_entry(map, start, &entry)) { 2188 UVM_MAP_CLIP_START(map, entry, start); 2189 UVM_MAP_CLIP_END(map, entry, end); /* to be safe */ 2190 } else { 2191 entry = NULL; 2192 } 2193 2194 if (entry != NULL && 2195 entry->start == start && entry->end == end && 2196 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 2197 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 2198 entry->etype |= UVM_ET_SUBMAP; 2199 entry->object.sub_map = submap; 2200 entry->offset = 0; 2201 uvm_map_reference(submap); 2202 result = 0; 2203 } else { 2204 result = EINVAL; 2205 } 2206 vm_map_unlock(map); 2207 return(result); 2208 } 2209 2210 2211 /* 2212 * uvm_map_protect: change map protection 2213 * 2214 * => set_max means set max_protection. 2215 * => map must be unlocked. 2216 */ 2217 2218 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 2219 ~VM_PROT_WRITE : VM_PROT_ALL) 2220 #define max(a,b) ((a) > (b) ? (a) : (b)) 2221 2222 int 2223 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 2224 vm_prot_t new_prot, boolean_t set_max) 2225 { 2226 struct vm_map_entry *current, *entry; 2227 int error = 0; 2228 UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist); 2229 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_prot=0x%lx)", 2230 map, start, end, new_prot); 2231 2232 vm_map_lock(map); 2233 2234 VM_MAP_RANGE_CHECK(map, start, end); 2235 2236 if (uvm_map_lookup_entry(map, start, &entry)) { 2237 UVM_MAP_CLIP_START(map, entry, start); 2238 } else { 2239 entry = entry->next; 2240 } 2241 2242 /* 2243 * make a first pass to check for protection violations. 2244 */ 2245 2246 current = entry; 2247 while ((current != &map->header) && (current->start < end)) { 2248 if (UVM_ET_ISSUBMAP(current)) { 2249 error = EINVAL; 2250 goto out; 2251 } 2252 if ((new_prot & current->max_protection) != new_prot) { 2253 error = EACCES; 2254 goto out; 2255 } 2256 current = current->next; 2257 } 2258 2259 /* go back and fix up protections (no need to clip this time). */ 2260 2261 current = entry; 2262 2263 while ((current != &map->header) && (current->start < end)) { 2264 vm_prot_t old_prot; 2265 2266 UVM_MAP_CLIP_END(map, current, end); 2267 2268 old_prot = current->protection; 2269 if (set_max) 2270 current->protection = 2271 (current->max_protection = new_prot) & old_prot; 2272 else 2273 current->protection = new_prot; 2274 2275 /* 2276 * update physical map if necessary. worry about copy-on-write 2277 * here -- CHECK THIS XXX 2278 */ 2279 2280 if (current->protection != old_prot) { 2281 /* update pmap! */ 2282 if ((current->protection & MASK(entry)) == PROT_NONE && 2283 VM_MAPENT_ISWIRED(entry)) 2284 current->wired_count--; 2285 pmap_protect(map->pmap, current->start, current->end, 2286 current->protection & MASK(entry)); 2287 } 2288 2289 /* 2290 * If the map is configured to lock any future mappings, 2291 * wire this entry now if the old protection was VM_PROT_NONE 2292 * and the new protection is not VM_PROT_NONE. 2293 */ 2294 2295 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 2296 VM_MAPENT_ISWIRED(entry) == 0 && 2297 old_prot == VM_PROT_NONE && 2298 new_prot != VM_PROT_NONE) { 2299 if (uvm_map_pageable(map, entry->start, entry->end, 2300 FALSE, UVM_LK_ENTER|UVM_LK_EXIT) != 0) { 2301 /* 2302 * If locking the entry fails, remember the 2303 * error if it's the first one. Note we 2304 * still continue setting the protection in 2305 * the map, but will return the resource 2306 * shortage condition regardless. 2307 * 2308 * XXX Ignore what the actual error is, 2309 * XXX just call it a resource shortage 2310 * XXX so that it doesn't get confused 2311 * XXX what uvm_map_protect() itself would 2312 * XXX normally return. 2313 */ 2314 error = ENOMEM; 2315 } 2316 } 2317 2318 current = current->next; 2319 } 2320 pmap_update(map->pmap); 2321 2322 out: 2323 vm_map_unlock(map); 2324 UVMHIST_LOG(maphist, "<- done, rv=%ld",error,0,0,0); 2325 return (error); 2326 } 2327 2328 #undef max 2329 #undef MASK 2330 2331 /* 2332 * uvm_map_inherit: set inheritance code for range of addrs in map. 2333 * 2334 * => map must be unlocked 2335 * => note that the inherit code is used during a "fork". see fork 2336 * code for details. 2337 */ 2338 2339 int 2340 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 2341 vm_inherit_t new_inheritance) 2342 { 2343 struct vm_map_entry *entry, *temp_entry; 2344 UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist); 2345 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_inh=0x%lx)", 2346 map, start, end, new_inheritance); 2347 2348 switch (new_inheritance) { 2349 case MAP_INHERIT_NONE: 2350 case MAP_INHERIT_COPY: 2351 case MAP_INHERIT_SHARE: 2352 break; 2353 default: 2354 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 2355 return (EINVAL); 2356 } 2357 2358 vm_map_lock(map); 2359 2360 VM_MAP_RANGE_CHECK(map, start, end); 2361 2362 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 2363 entry = temp_entry; 2364 UVM_MAP_CLIP_START(map, entry, start); 2365 } else { 2366 entry = temp_entry->next; 2367 } 2368 2369 while ((entry != &map->header) && (entry->start < end)) { 2370 UVM_MAP_CLIP_END(map, entry, end); 2371 entry->inheritance = new_inheritance; 2372 entry = entry->next; 2373 } 2374 2375 vm_map_unlock(map); 2376 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 2377 return (0); 2378 } 2379 2380 /* 2381 * uvm_map_advice: set advice code for range of addrs in map. 2382 * 2383 * => map must be unlocked 2384 */ 2385 2386 int 2387 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 2388 { 2389 struct vm_map_entry *entry, *temp_entry; 2390 UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist); 2391 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_adv=0x%lx)", 2392 map, start, end, new_advice); 2393 2394 vm_map_lock(map); 2395 VM_MAP_RANGE_CHECK(map, start, end); 2396 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 2397 entry = temp_entry; 2398 UVM_MAP_CLIP_START(map, entry, start); 2399 } else { 2400 entry = temp_entry->next; 2401 } 2402 2403 /* 2404 * XXXJRT: disallow holes? 2405 */ 2406 2407 while ((entry != &map->header) && (entry->start < end)) { 2408 UVM_MAP_CLIP_END(map, entry, end); 2409 2410 switch (new_advice) { 2411 case MADV_NORMAL: 2412 case MADV_RANDOM: 2413 case MADV_SEQUENTIAL: 2414 /* nothing special here */ 2415 break; 2416 2417 default: 2418 vm_map_unlock(map); 2419 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 2420 return (EINVAL); 2421 } 2422 entry->advice = new_advice; 2423 entry = entry->next; 2424 } 2425 2426 vm_map_unlock(map); 2427 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 2428 return (0); 2429 } 2430 2431 /* 2432 * uvm_map_pageable: sets the pageability of a range in a map. 2433 * 2434 * => wires map entries. should not be used for transient page locking. 2435 * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). 2436 * => regions sepcified as not pageable require lock-down (wired) memory 2437 * and page tables. 2438 * => map must never be read-locked 2439 * => if islocked is TRUE, map is already write-locked 2440 * => we always unlock the map, since we must downgrade to a read-lock 2441 * to call uvm_fault_wire() 2442 * => XXXCDC: check this and try and clean it up. 2443 */ 2444 2445 int 2446 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2447 boolean_t new_pageable, int lockflags) 2448 { 2449 struct vm_map_entry *entry, *start_entry, *failed_entry; 2450 int rv; 2451 #ifdef DIAGNOSTIC 2452 u_int timestamp_save; 2453 #endif 2454 UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist); 2455 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_pageable=0x%lx)", 2456 map, start, end, new_pageable); 2457 KASSERT(map->flags & VM_MAP_PAGEABLE); 2458 2459 if ((lockflags & UVM_LK_ENTER) == 0) 2460 vm_map_lock(map); 2461 2462 VM_MAP_RANGE_CHECK(map, start, end); 2463 2464 /* 2465 * only one pageability change may take place at one time, since 2466 * uvm_fault_wire assumes it will be called only once for each 2467 * wiring/unwiring. therefore, we have to make sure we're actually 2468 * changing the pageability for the entire region. we do so before 2469 * making any changes. 2470 */ 2471 2472 if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) { 2473 if ((lockflags & UVM_LK_EXIT) == 0) 2474 vm_map_unlock(map); 2475 2476 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 2477 return (EFAULT); 2478 } 2479 entry = start_entry; 2480 2481 /* 2482 * handle wiring and unwiring separately. 2483 */ 2484 2485 if (new_pageable) { /* unwire */ 2486 UVM_MAP_CLIP_START(map, entry, start); 2487 2488 /* 2489 * unwiring. first ensure that the range to be unwired is 2490 * really wired down and that there are no holes. 2491 */ 2492 2493 while ((entry != &map->header) && (entry->start < end)) { 2494 if (entry->wired_count == 0 || 2495 (entry->end < end && 2496 (entry->next == &map->header || 2497 entry->next->start > entry->end))) { 2498 if ((lockflags & UVM_LK_EXIT) == 0) 2499 vm_map_unlock(map); 2500 UVMHIST_LOG(maphist, 2501 "<- done (INVALID UNWIRE ARG)",0,0,0,0); 2502 return (EINVAL); 2503 } 2504 entry = entry->next; 2505 } 2506 2507 /* 2508 * POSIX 1003.1b - a single munlock call unlocks a region, 2509 * regardless of the number of mlock calls made on that 2510 * region. 2511 */ 2512 2513 entry = start_entry; 2514 while ((entry != &map->header) && (entry->start < end)) { 2515 UVM_MAP_CLIP_END(map, entry, end); 2516 if (VM_MAPENT_ISWIRED(entry)) 2517 uvm_map_entry_unwire(map, entry); 2518 entry = entry->next; 2519 } 2520 if ((lockflags & UVM_LK_EXIT) == 0) 2521 vm_map_unlock(map); 2522 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 2523 return (0); 2524 } 2525 2526 /* 2527 * wire case: in two passes [XXXCDC: ugly block of code here] 2528 * 2529 * 1: holding the write lock, we create any anonymous maps that need 2530 * to be created. then we clip each map entry to the region to 2531 * be wired and increment its wiring count. 2532 * 2533 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2534 * in the pages for any newly wired area (wired_count == 1). 2535 * 2536 * downgrading to a read lock for uvm_fault_wire avoids a possible 2537 * deadlock with another thread that may have faulted on one of 2538 * the pages to be wired (it would mark the page busy, blocking 2539 * us, then in turn block on the map lock that we hold). because 2540 * of problems in the recursive lock package, we cannot upgrade 2541 * to a write lock in vm_map_lookup. thus, any actions that 2542 * require the write lock must be done beforehand. because we 2543 * keep the read lock on the map, the copy-on-write status of the 2544 * entries we modify here cannot change. 2545 */ 2546 2547 while ((entry != &map->header) && (entry->start < end)) { 2548 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 2549 2550 /* 2551 * perform actions of vm_map_lookup that need the 2552 * write lock on the map: create an anonymous map 2553 * for a copy-on-write region, or an anonymous map 2554 * for a zero-fill region. (XXXCDC: submap case 2555 * ok?) 2556 */ 2557 2558 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 2559 if (UVM_ET_ISNEEDSCOPY(entry) && 2560 ((entry->protection & VM_PROT_WRITE) || 2561 (entry->object.uvm_obj == NULL))) { 2562 amap_copy(map, entry, M_WAITOK, TRUE, 2563 start, end); 2564 /* XXXCDC: wait OK? */ 2565 } 2566 } 2567 } 2568 UVM_MAP_CLIP_START(map, entry, start); 2569 UVM_MAP_CLIP_END(map, entry, end); 2570 entry->wired_count++; 2571 2572 /* 2573 * Check for holes 2574 */ 2575 2576 if (entry->protection == VM_PROT_NONE || 2577 (entry->end < end && 2578 (entry->next == &map->header || 2579 entry->next->start > entry->end))) { 2580 2581 /* 2582 * found one. amap creation actions do not need to 2583 * be undone, but the wired counts need to be restored. 2584 */ 2585 2586 while (entry != &map->header && entry->end > start) { 2587 entry->wired_count--; 2588 entry = entry->prev; 2589 } 2590 if ((lockflags & UVM_LK_EXIT) == 0) 2591 vm_map_unlock(map); 2592 UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); 2593 return (EINVAL); 2594 } 2595 entry = entry->next; 2596 } 2597 2598 /* 2599 * Pass 2. 2600 */ 2601 2602 #ifdef DIAGNOSTIC 2603 timestamp_save = map->timestamp; 2604 #endif 2605 vm_map_busy(map); 2606 vm_map_downgrade(map); 2607 2608 rv = 0; 2609 entry = start_entry; 2610 while (entry != &map->header && entry->start < end) { 2611 if (entry->wired_count == 1) { 2612 rv = uvm_fault_wire(map, entry->start, entry->end, 2613 entry->protection); 2614 if (rv) { 2615 /* 2616 * wiring failed. break out of the loop. 2617 * we'll clean up the map below, once we 2618 * have a write lock again. 2619 */ 2620 break; 2621 } 2622 } 2623 entry = entry->next; 2624 } 2625 2626 if (rv) { /* failed? */ 2627 2628 /* 2629 * Get back to an exclusive (write) lock. 2630 */ 2631 2632 vm_map_upgrade(map); 2633 vm_map_unbusy(map); 2634 2635 #ifdef DIAGNOSTIC 2636 if (timestamp_save != map->timestamp) 2637 panic("uvm_map_pageable: stale map"); 2638 #endif 2639 2640 /* 2641 * first drop the wiring count on all the entries 2642 * which haven't actually been wired yet. 2643 */ 2644 2645 failed_entry = entry; 2646 while (entry != &map->header && entry->start < end) { 2647 entry->wired_count--; 2648 entry = entry->next; 2649 } 2650 2651 /* 2652 * now, unwire all the entries that were successfully 2653 * wired above. 2654 */ 2655 2656 entry = start_entry; 2657 while (entry != failed_entry) { 2658 entry->wired_count--; 2659 if (VM_MAPENT_ISWIRED(entry) == 0) 2660 uvm_map_entry_unwire(map, entry); 2661 entry = entry->next; 2662 } 2663 if ((lockflags & UVM_LK_EXIT) == 0) 2664 vm_map_unlock(map); 2665 UVMHIST_LOG(maphist, "<- done (RV=%ld)", rv,0,0,0); 2666 return(rv); 2667 } 2668 2669 /* We are holding a read lock here. */ 2670 if ((lockflags & UVM_LK_EXIT) == 0) { 2671 vm_map_unbusy(map); 2672 vm_map_unlock_read(map); 2673 } else { 2674 2675 /* 2676 * Get back to an exclusive (write) lock. 2677 */ 2678 2679 vm_map_upgrade(map); 2680 vm_map_unbusy(map); 2681 } 2682 2683 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 2684 return (0); 2685 } 2686 2687 /* 2688 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2689 * all mapped regions. 2690 * 2691 * => map must not be locked. 2692 * => if no flags are specified, all regions are unwired. 2693 * => XXXJRT: has some of the same problems as uvm_map_pageable() above. 2694 */ 2695 2696 int 2697 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2698 { 2699 struct vm_map_entry *entry, *failed_entry; 2700 vsize_t size; 2701 int error; 2702 #ifdef DIAGNOSTIC 2703 u_int timestamp_save; 2704 #endif 2705 UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist); 2706 UVMHIST_LOG(maphist,"(map=%p,flags=0x%lx)", map, flags, 0, 0); 2707 2708 KASSERT(map->flags & VM_MAP_PAGEABLE); 2709 2710 vm_map_lock(map); 2711 2712 /* 2713 * handle wiring and unwiring separately. 2714 */ 2715 2716 if (flags == 0) { /* unwire */ 2717 /* 2718 * POSIX 1003.1b -- munlockall unlocks all regions, 2719 * regardless of how many times mlockall has been called. 2720 */ 2721 for (entry = map->header.next; entry != &map->header; 2722 entry = entry->next) { 2723 if (VM_MAPENT_ISWIRED(entry)) 2724 uvm_map_entry_unwire(map, entry); 2725 } 2726 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2727 vm_map_unlock(map); 2728 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 2729 return (0); 2730 2731 /* 2732 * end of unwire case! 2733 */ 2734 } 2735 2736 if (flags & MCL_FUTURE) { 2737 /* 2738 * must wire all future mappings; remember this. 2739 */ 2740 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2741 } 2742 2743 if ((flags & MCL_CURRENT) == 0) { 2744 /* 2745 * no more work to do! 2746 */ 2747 UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); 2748 vm_map_unlock(map); 2749 return (0); 2750 } 2751 2752 /* 2753 * wire case: in three passes [XXXCDC: ugly block of code here] 2754 * 2755 * 1: holding the write lock, count all pages mapped by non-wired 2756 * entries. if this would cause us to go over our limit, we fail. 2757 * 2758 * 2: still holding the write lock, we create any anonymous maps that 2759 * need to be created. then we increment its wiring count. 2760 * 2761 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault 2762 * in the pages for any newly wired area (wired_count == 1). 2763 * 2764 * downgrading to a read lock for uvm_fault_wire avoids a possible 2765 * deadlock with another thread that may have faulted on one of 2766 * the pages to be wired (it would mark the page busy, blocking 2767 * us, then in turn block on the map lock that we hold). because 2768 * of problems in the recursive lock package, we cannot upgrade 2769 * to a write lock in vm_map_lookup. thus, any actions that 2770 * require the write lock must be done beforehand. because we 2771 * keep the read lock on the map, the copy-on-write status of the 2772 * entries we modify here cannot change. 2773 */ 2774 2775 for (size = 0, entry = map->header.next; entry != &map->header; 2776 entry = entry->next) { 2777 if (entry->protection != VM_PROT_NONE && 2778 VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 2779 size += entry->end - entry->start; 2780 } 2781 } 2782 2783 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2784 vm_map_unlock(map); 2785 return (ENOMEM); /* XXX overloaded */ 2786 } 2787 2788 /* XXX non-pmap_wired_count case must be handled by caller */ 2789 #ifdef pmap_wired_count 2790 if (limit != 0 && 2791 (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { 2792 vm_map_unlock(map); 2793 return (ENOMEM); /* XXX overloaded */ 2794 } 2795 #endif 2796 2797 /* 2798 * Pass 2. 2799 */ 2800 2801 for (entry = map->header.next; entry != &map->header; 2802 entry = entry->next) { 2803 if (entry->protection == VM_PROT_NONE) 2804 continue; 2805 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 2806 /* 2807 * perform actions of vm_map_lookup that need the 2808 * write lock on the map: create an anonymous map 2809 * for a copy-on-write region, or an anonymous map 2810 * for a zero-fill region. (XXXCDC: submap case 2811 * ok?) 2812 */ 2813 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 2814 if (UVM_ET_ISNEEDSCOPY(entry) && 2815 ((entry->protection & VM_PROT_WRITE) || 2816 (entry->object.uvm_obj == NULL))) { 2817 amap_copy(map, entry, M_WAITOK, TRUE, 2818 entry->start, entry->end); 2819 /* XXXCDC: wait OK? */ 2820 } 2821 } 2822 } 2823 entry->wired_count++; 2824 } 2825 2826 /* 2827 * Pass 3. 2828 */ 2829 2830 #ifdef DIAGNOSTIC 2831 timestamp_save = map->timestamp; 2832 #endif 2833 vm_map_busy(map); 2834 vm_map_downgrade(map); 2835 2836 for (error = 0, entry = map->header.next; 2837 entry != &map->header && error == 0; 2838 entry = entry->next) { 2839 if (entry->wired_count == 1) { 2840 error = uvm_fault_wire(map, entry->start, entry->end, 2841 entry->protection); 2842 } 2843 } 2844 2845 if (error) { /* failed? */ 2846 /* 2847 * Get back an exclusive (write) lock. 2848 */ 2849 vm_map_upgrade(map); 2850 vm_map_unbusy(map); 2851 2852 #ifdef DIAGNOSTIC 2853 if (timestamp_save != map->timestamp) 2854 panic("uvm_map_pageable_all: stale map"); 2855 #endif 2856 2857 /* 2858 * first drop the wiring count on all the entries 2859 * which haven't actually been wired yet. 2860 * 2861 * Skip VM_PROT_NONE entries like we did above. 2862 */ 2863 failed_entry = entry; 2864 for (/* nothing */; entry != &map->header; 2865 entry = entry->next) { 2866 if (entry->protection == VM_PROT_NONE) 2867 continue; 2868 entry->wired_count--; 2869 } 2870 2871 /* 2872 * now, unwire all the entries that were successfully 2873 * wired above. 2874 * 2875 * Skip VM_PROT_NONE entries like we did above. 2876 */ 2877 for (entry = map->header.next; entry != failed_entry; 2878 entry = entry->next) { 2879 if (entry->protection == VM_PROT_NONE) 2880 continue; 2881 entry->wired_count--; 2882 if (VM_MAPENT_ISWIRED(entry)) 2883 uvm_map_entry_unwire(map, entry); 2884 } 2885 vm_map_unlock(map); 2886 UVMHIST_LOG(maphist,"<- done (RV=%ld)", error,0,0,0); 2887 return (error); 2888 } 2889 2890 /* We are holding a read lock here. */ 2891 vm_map_unbusy(map); 2892 vm_map_unlock_read(map); 2893 2894 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 2895 return (0); 2896 } 2897 2898 /* 2899 * uvm_map_clean: clean out a map range 2900 * 2901 * => valid flags: 2902 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 2903 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 2904 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 2905 * if (flags & PGO_FREE): any cached pages are freed after clean 2906 * => returns an error if any part of the specified range isn't mapped 2907 * => never a need to flush amap layer since the anonymous memory has 2908 * no permanent home, but may deactivate pages there 2909 * => called from sys_msync() and sys_madvise() 2910 * => caller must not write-lock map (read OK). 2911 * => we may sleep while cleaning if SYNCIO [with map read-locked] 2912 */ 2913 2914 int amap_clean_works = 1; /* XXX for now, just in case... */ 2915 2916 int 2917 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 2918 { 2919 struct vm_map_entry *current, *entry; 2920 struct uvm_object *uobj; 2921 struct vm_amap *amap; 2922 struct vm_anon *anon; 2923 struct vm_page *pg; 2924 vaddr_t offset; 2925 vsize_t size; 2926 int rv, error, refs; 2927 UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist); 2928 2929 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,flags=0x%lx)", 2930 map, start, end, flags); 2931 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 2932 (PGO_FREE|PGO_DEACTIVATE)); 2933 2934 vm_map_lock_read(map); 2935 VM_MAP_RANGE_CHECK(map, start, end); 2936 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { 2937 vm_map_unlock_read(map); 2938 return (EFAULT); 2939 } 2940 2941 /* 2942 * Make a first pass to check for holes. 2943 */ 2944 2945 for (current = entry; current->start < end; current = current->next) { 2946 if (UVM_ET_ISSUBMAP(current)) { 2947 vm_map_unlock_read(map); 2948 return (EINVAL); 2949 } 2950 if (end > current->end && (current->next == &map->header || 2951 current->end != current->next->start)) { 2952 vm_map_unlock_read(map); 2953 return (EFAULT); 2954 } 2955 } 2956 2957 error = 0; 2958 2959 for (current = entry; current->start < end; current = current->next) { 2960 amap = current->aref.ar_amap; /* top layer */ 2961 uobj = current->object.uvm_obj; /* bottom layer */ 2962 KASSERT(start >= current->start); 2963 2964 /* 2965 * No amap cleaning necessary if: 2966 * 2967 * (1) There's no amap. 2968 * 2969 * (2) We're not deactivating or freeing pages. 2970 */ 2971 2972 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 2973 goto flush_object; 2974 2975 /* XXX for now, just in case... */ 2976 if (amap_clean_works == 0) 2977 goto flush_object; 2978 2979 offset = start - current->start; 2980 size = MIN(end, current->end) - start; 2981 for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { 2982 anon = amap_lookup(¤t->aref, offset); 2983 if (anon == NULL) 2984 continue; 2985 2986 simple_lock(&anon->an_lock); 2987 2988 pg = anon->an_page; 2989 if (pg == NULL) { 2990 simple_unlock(&anon->an_lock); 2991 continue; 2992 } 2993 2994 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 2995 2996 /* 2997 * XXX In these first 3 cases, we always just 2998 * XXX deactivate the page. We may want to 2999 * XXX handle the different cases more 3000 * XXX specifically, in the future. 3001 */ 3002 3003 case PGO_CLEANIT|PGO_FREE: 3004 case PGO_CLEANIT|PGO_DEACTIVATE: 3005 case PGO_DEACTIVATE: 3006 deactivate_it: 3007 /* skip the page if it's loaned or wired */ 3008 if (pg->loan_count != 0 || 3009 pg->wire_count != 0) { 3010 simple_unlock(&anon->an_lock); 3011 continue; 3012 } 3013 3014 uvm_lock_pageq(); 3015 3016 /* 3017 * skip the page if it's not actually owned 3018 * by the anon (may simply be loaned to the 3019 * anon). 3020 */ 3021 3022 if ((pg->pg_flags & PQ_ANON) == 0) { 3023 KASSERT(pg->uobject == NULL); 3024 uvm_unlock_pageq(); 3025 simple_unlock(&anon->an_lock); 3026 continue; 3027 } 3028 KASSERT(pg->uanon == anon); 3029 3030 #ifdef UBC 3031 /* ...and deactivate the page. */ 3032 pmap_clear_reference(pg); 3033 #else 3034 /* zap all mappings for the page. */ 3035 pmap_page_protect(pg, VM_PROT_NONE); 3036 3037 /* ...and deactivate the page. */ 3038 #endif 3039 uvm_pagedeactivate(pg); 3040 3041 uvm_unlock_pageq(); 3042 simple_unlock(&anon->an_lock); 3043 continue; 3044 3045 case PGO_FREE: 3046 3047 /* 3048 * If there are multiple references to 3049 * the amap, just deactivate the page. 3050 */ 3051 3052 if (amap_refs(amap) > 1) 3053 goto deactivate_it; 3054 3055 /* XXX skip the page if it's wired */ 3056 if (pg->wire_count != 0) { 3057 simple_unlock(&anon->an_lock); 3058 continue; 3059 } 3060 amap_unadd(¤t->aref, offset); 3061 refs = --anon->an_ref; 3062 simple_unlock(&anon->an_lock); 3063 if (refs == 0) 3064 uvm_anfree(anon); 3065 continue; 3066 3067 default: 3068 panic("uvm_map_clean: weird flags"); 3069 } 3070 } 3071 3072 flush_object: 3073 /* 3074 * flush pages if we've got a valid backing object. 3075 * 3076 * Don't PGO_FREE if we don't have write permission 3077 * and don't flush if this is a copy-on-write object 3078 * since we can't know our permissions on it. 3079 */ 3080 3081 offset = current->offset + (start - current->start); 3082 size = MIN(end, current->end) - start; 3083 if (uobj != NULL && 3084 ((flags & PGO_FREE) == 0 || 3085 ((entry->max_protection & VM_PROT_WRITE) != 0 && 3086 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 3087 simple_lock(&uobj->vmobjlock); 3088 rv = uobj->pgops->pgo_flush(uobj, offset, 3089 offset + size, flags); 3090 simple_unlock(&uobj->vmobjlock); 3091 3092 if (rv == FALSE) 3093 error = EFAULT; 3094 } 3095 start += size; 3096 } 3097 vm_map_unlock_read(map); 3098 return (error); 3099 } 3100 3101 3102 /* 3103 * uvm_map_checkprot: check protection in map 3104 * 3105 * => must allow specified protection in a fully allocated region. 3106 * => map must be read or write locked by caller. 3107 */ 3108 3109 boolean_t 3110 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3111 vm_prot_t protection) 3112 { 3113 struct vm_map_entry *entry; 3114 struct vm_map_entry *tmp_entry; 3115 3116 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { 3117 return(FALSE); 3118 } 3119 entry = tmp_entry; 3120 while (start < end) { 3121 if (entry == &map->header) { 3122 return(FALSE); 3123 } 3124 3125 /* 3126 * no holes allowed 3127 */ 3128 3129 if (start < entry->start) { 3130 return(FALSE); 3131 } 3132 3133 /* 3134 * check protection associated with entry 3135 */ 3136 3137 if ((entry->protection & protection) != protection) { 3138 return(FALSE); 3139 } 3140 3141 /* go to next entry */ 3142 3143 start = entry->end; 3144 entry = entry->next; 3145 } 3146 return(TRUE); 3147 } 3148 3149 /* 3150 * uvmspace_alloc: allocate a vmspace structure. 3151 * 3152 * - structure includes vm_map and pmap 3153 * - XXX: no locking on this structure 3154 * - refcnt set to 1, rest must be init'd by caller 3155 */ 3156 struct vmspace * 3157 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3158 boolean_t remove_holes) 3159 { 3160 struct vmspace *vm; 3161 UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); 3162 3163 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK); 3164 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3165 UVMHIST_LOG(maphist,"<- done (vm=%p)", vm,0,0,0); 3166 return (vm); 3167 } 3168 3169 /* 3170 * uvmspace_init: initialize a vmspace structure. 3171 * 3172 * - XXX: no locking on this structure 3173 * - refcnt set to 1, rest must be init'd by caller 3174 */ 3175 void 3176 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3177 boolean_t pageable, boolean_t remove_holes) 3178 { 3179 UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); 3180 3181 memset(vm, 0, sizeof(*vm)); 3182 3183 uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0); 3184 3185 if (pmap) 3186 pmap_reference(pmap); 3187 else 3188 pmap = pmap_create(); 3189 vm->vm_map.pmap = pmap; 3190 3191 vm->vm_refcnt = 1; 3192 3193 if (remove_holes) 3194 pmap_remove_holes(&vm->vm_map); 3195 3196 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 3197 } 3198 3199 /* 3200 * uvmspace_share: share a vmspace between two proceses 3201 * 3202 * - XXX: no locking on vmspace 3203 * - used for vfork, threads(?) 3204 */ 3205 3206 void 3207 uvmspace_share(p1, p2) 3208 struct proc *p1, *p2; 3209 { 3210 p2->p_vmspace = p1->p_vmspace; 3211 p1->p_vmspace->vm_refcnt++; 3212 } 3213 3214 /* 3215 * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace 3216 * 3217 * - XXX: no locking on vmspace 3218 */ 3219 3220 void 3221 uvmspace_unshare(p) 3222 struct proc *p; 3223 { 3224 struct vmspace *nvm, *ovm = p->p_vmspace; 3225 3226 if (ovm->vm_refcnt == 1) 3227 /* nothing to do: vmspace isn't shared in the first place */ 3228 return; 3229 3230 /* make a new vmspace, still holding old one */ 3231 nvm = uvmspace_fork(ovm); 3232 3233 pmap_deactivate(p); /* unbind old vmspace */ 3234 p->p_vmspace = nvm; 3235 pmap_activate(p); /* switch to new vmspace */ 3236 3237 uvmspace_free(ovm); /* drop reference to old vmspace */ 3238 } 3239 3240 /* 3241 * uvmspace_exec: the process wants to exec a new program 3242 * 3243 * - XXX: no locking on vmspace 3244 */ 3245 3246 void 3247 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3248 { 3249 struct vmspace *nvm, *ovm = p->p_vmspace; 3250 struct vm_map *map = &ovm->vm_map; 3251 3252 pmap_unuse_final(p); /* before stack addresses go away */ 3253 3254 /* 3255 * see if more than one process is using this vmspace... 3256 */ 3257 3258 if (ovm->vm_refcnt == 1) { 3259 3260 /* 3261 * if p is the only process using its vmspace then we can safely 3262 * recycle that vmspace for the program that is being exec'd. 3263 */ 3264 3265 #ifdef SYSVSHM 3266 /* 3267 * SYSV SHM semantics require us to kill all segments on an exec 3268 */ 3269 if (ovm->vm_shm) 3270 shmexit(ovm); 3271 #endif 3272 3273 /* 3274 * POSIX 1003.1b -- "lock future mappings" is revoked 3275 * when a process execs another program image. 3276 */ 3277 vm_map_lock(map); 3278 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3279 vm_map_unlock(map); 3280 3281 /* 3282 * now unmap the old program 3283 */ 3284 uvm_unmap(map, map->min_offset, map->max_offset); 3285 3286 /* 3287 * but keep MMU holes unavailable 3288 */ 3289 pmap_remove_holes(map); 3290 3291 /* 3292 * resize the map 3293 */ 3294 vm_map_lock(map); 3295 map->min_offset = start; 3296 uvm_tree_sanity(map, "resize enter"); 3297 map->max_offset = end; 3298 if (map->header.prev != &map->header) 3299 uvm_rb_fixup(map, map->header.prev); 3300 uvm_tree_sanity(map, "resize leave"); 3301 vm_map_unlock(map); 3302 3303 3304 } else { 3305 3306 /* 3307 * p's vmspace is being shared, so we can't reuse it for p since 3308 * it is still being used for others. allocate a new vmspace 3309 * for p 3310 */ 3311 nvm = uvmspace_alloc(start, end, 3312 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3313 3314 /* 3315 * install new vmspace and drop our ref to the old one. 3316 */ 3317 3318 pmap_deactivate(p); 3319 p->p_vmspace = nvm; 3320 pmap_activate(p); 3321 3322 uvmspace_free(ovm); 3323 } 3324 } 3325 3326 /* 3327 * uvmspace_free: free a vmspace data structure 3328 * 3329 * - XXX: no locking on vmspace 3330 */ 3331 3332 void 3333 uvmspace_free(struct vmspace *vm) 3334 { 3335 struct vm_map_entry *dead_entries; 3336 UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); 3337 3338 UVMHIST_LOG(maphist,"(vm=%p) ref=%ld", vm, vm->vm_refcnt,0,0); 3339 if (--vm->vm_refcnt == 0) { 3340 /* 3341 * lock the map, to wait out all other references to it. delete 3342 * all of the mappings and pages they hold, then call the pmap 3343 * module to reclaim anything left. 3344 */ 3345 #ifdef SYSVSHM 3346 /* Get rid of any SYSV shared memory segments. */ 3347 if (vm->vm_shm != NULL) 3348 shmexit(vm); 3349 #endif 3350 vm_map_lock(&vm->vm_map); 3351 if (vm->vm_map.nentries) { 3352 uvm_unmap_remove(&vm->vm_map, 3353 vm->vm_map.min_offset, vm->vm_map.max_offset, 3354 &dead_entries, NULL); 3355 if (dead_entries != NULL) 3356 uvm_unmap_detach(dead_entries, 0); 3357 } 3358 pmap_destroy(vm->vm_map.pmap); 3359 vm->vm_map.pmap = NULL; 3360 pool_put(&uvm_vmspace_pool, vm); 3361 } 3362 UVMHIST_LOG(maphist,"<- done", 0,0,0,0); 3363 } 3364 3365 /* 3366 * F O R K - m a i n e n t r y p o i n t 3367 */ 3368 /* 3369 * uvmspace_fork: fork a process' main map 3370 * 3371 * => create a new vmspace for child process from parent. 3372 * => parent's map must not be locked. 3373 */ 3374 3375 struct vmspace * 3376 uvmspace_fork(struct vmspace *vm1) 3377 { 3378 struct vmspace *vm2; 3379 struct vm_map *old_map = &vm1->vm_map; 3380 struct vm_map *new_map; 3381 struct vm_map_entry *old_entry; 3382 struct vm_map_entry *new_entry; 3383 pmap_t new_pmap; 3384 boolean_t protect_child; 3385 UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); 3386 3387 vm_map_lock(old_map); 3388 3389 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3390 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3391 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3392 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3393 new_map = &vm2->vm_map; /* XXX */ 3394 new_pmap = new_map->pmap; 3395 3396 old_entry = old_map->header.next; 3397 3398 /* 3399 * go entry-by-entry 3400 */ 3401 3402 while (old_entry != &old_map->header) { 3403 3404 /* 3405 * first, some sanity checks on the old entry 3406 */ 3407 if (UVM_ET_ISSUBMAP(old_entry)) 3408 panic("fork: encountered a submap during fork (illegal)"); 3409 3410 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 3411 UVM_ET_ISNEEDSCOPY(old_entry)) 3412 panic("fork: non-copy_on_write map entry marked needs_copy (illegal)"); 3413 3414 3415 switch (old_entry->inheritance) { 3416 case MAP_INHERIT_NONE: 3417 /* 3418 * drop the mapping 3419 */ 3420 break; 3421 3422 case MAP_INHERIT_SHARE: 3423 /* 3424 * share the mapping: this means we want the old and 3425 * new entries to share amaps and backing objects. 3426 */ 3427 3428 /* 3429 * if the old_entry needs a new amap (due to prev fork) 3430 * then we need to allocate it now so that we have 3431 * something we own to share with the new_entry. [in 3432 * other words, we need to clear needs_copy] 3433 */ 3434 3435 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3436 /* get our own amap, clears needs_copy */ 3437 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 3438 0, 0); 3439 /* XXXCDC: WAITOK??? */ 3440 } 3441 3442 new_entry = uvm_mapent_alloc(new_map); 3443 /* old_entry -> new_entry */ 3444 uvm_mapent_copy(old_entry, new_entry); 3445 3446 /* new pmap has nothing wired in it */ 3447 new_entry->wired_count = 0; 3448 3449 /* 3450 * gain reference to object backing the map (can't 3451 * be a submap, already checked this case). 3452 */ 3453 if (new_entry->aref.ar_amap) 3454 /* share reference */ 3455 uvm_map_reference_amap(new_entry, AMAP_SHARED); 3456 3457 if (new_entry->object.uvm_obj && 3458 new_entry->object.uvm_obj->pgops->pgo_reference) 3459 new_entry->object.uvm_obj-> 3460 pgops->pgo_reference( 3461 new_entry->object.uvm_obj); 3462 3463 /* insert entry at end of new_map's entry list */ 3464 uvm_map_entry_link(new_map, new_map->header.prev, 3465 new_entry); 3466 3467 /* 3468 * pmap_copy the mappings: this routine is optional 3469 * but if it is there it will reduce the number of 3470 * page faults in the new proc. 3471 */ 3472 3473 pmap_copy(new_pmap, old_map->pmap, new_entry->start, 3474 (old_entry->end - old_entry->start), 3475 old_entry->start); 3476 3477 break; 3478 3479 case MAP_INHERIT_COPY: 3480 3481 /* 3482 * copy-on-write the mapping (using mmap's 3483 * MAP_PRIVATE semantics) 3484 * 3485 * allocate new_entry, adjust reference counts. 3486 * (note that new references are read-only). 3487 */ 3488 3489 new_entry = uvm_mapent_alloc(new_map); 3490 /* old_entry -> new_entry */ 3491 uvm_mapent_copy(old_entry, new_entry); 3492 3493 if (new_entry->aref.ar_amap) 3494 uvm_map_reference_amap(new_entry, 0); 3495 3496 if (new_entry->object.uvm_obj && 3497 new_entry->object.uvm_obj->pgops->pgo_reference) 3498 new_entry->object.uvm_obj->pgops->pgo_reference 3499 (new_entry->object.uvm_obj); 3500 3501 /* new pmap has nothing wired in it */ 3502 new_entry->wired_count = 0; 3503 3504 new_entry->etype |= 3505 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3506 uvm_map_entry_link(new_map, new_map->header.prev, 3507 new_entry); 3508 3509 /* 3510 * the new entry will need an amap. it will either 3511 * need to be copied from the old entry or created 3512 * from scratch (if the old entry does not have an 3513 * amap). can we defer this process until later 3514 * (by setting "needs_copy") or do we need to copy 3515 * the amap now? 3516 * 3517 * we must copy the amap now if any of the following 3518 * conditions hold: 3519 * 1. the old entry has an amap and that amap is 3520 * being shared. this means that the old (parent) 3521 * process is sharing the amap with another 3522 * process. if we do not clear needs_copy here 3523 * we will end up in a situation where both the 3524 * parent and child process are referring to the 3525 * same amap with "needs_copy" set. if the 3526 * parent write-faults, the fault routine will 3527 * clear "needs_copy" in the parent by allocating 3528 * a new amap. this is wrong because the 3529 * parent is supposed to be sharing the old amap 3530 * and the new amap will break that. 3531 * 3532 * 2. if the old entry has an amap and a non-zero 3533 * wire count then we are going to have to call 3534 * amap_cow_now to avoid page faults in the 3535 * parent process. since amap_cow_now requires 3536 * "needs_copy" to be clear we might as well 3537 * clear it here as well. 3538 * 3539 */ 3540 3541 if (old_entry->aref.ar_amap != NULL) { 3542 3543 if ((amap_flags(old_entry->aref.ar_amap) & 3544 AMAP_SHARED) != 0 || 3545 VM_MAPENT_ISWIRED(old_entry)) { 3546 3547 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3548 0, 0); 3549 /* XXXCDC: M_WAITOK ... ok? */ 3550 } 3551 } 3552 3553 /* 3554 * if the parent's entry is wired down, then the 3555 * parent process does not want page faults on 3556 * access to that memory. this means that we 3557 * cannot do copy-on-write because we can't write 3558 * protect the old entry. in this case we 3559 * resolve all copy-on-write faults now, using 3560 * amap_cow_now. note that we have already 3561 * allocated any needed amap (above). 3562 */ 3563 3564 if (VM_MAPENT_ISWIRED(old_entry)) { 3565 3566 /* 3567 * resolve all copy-on-write faults now 3568 * (note that there is nothing to do if 3569 * the old mapping does not have an amap). 3570 * XXX: is it worthwhile to bother with pmap_copy 3571 * in this case? 3572 */ 3573 if (old_entry->aref.ar_amap) 3574 amap_cow_now(new_map, new_entry); 3575 3576 } else { 3577 3578 /* 3579 * setup mappings to trigger copy-on-write faults 3580 * we must write-protect the parent if it has 3581 * an amap and it is not already "needs_copy"... 3582 * if it is already "needs_copy" then the parent 3583 * has already been write-protected by a previous 3584 * fork operation. 3585 * 3586 * if we do not write-protect the parent, then 3587 * we must be sure to write-protect the child 3588 * after the pmap_copy() operation. 3589 * 3590 * XXX: pmap_copy should have some way of telling 3591 * us that it didn't do anything so we can avoid 3592 * calling pmap_protect needlessly. 3593 */ 3594 3595 if (old_entry->aref.ar_amap) { 3596 3597 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3598 if (old_entry->max_protection & VM_PROT_WRITE) { 3599 pmap_protect(old_map->pmap, 3600 old_entry->start, 3601 old_entry->end, 3602 old_entry->protection & 3603 ~VM_PROT_WRITE); 3604 pmap_update(old_map->pmap); 3605 3606 } 3607 old_entry->etype |= UVM_ET_NEEDSCOPY; 3608 } 3609 3610 /* 3611 * parent must now be write-protected 3612 */ 3613 protect_child = FALSE; 3614 } else { 3615 3616 /* 3617 * we only need to protect the child if the 3618 * parent has write access. 3619 */ 3620 if (old_entry->max_protection & VM_PROT_WRITE) 3621 protect_child = TRUE; 3622 else 3623 protect_child = FALSE; 3624 3625 } 3626 3627 /* 3628 * copy the mappings 3629 * XXX: need a way to tell if this does anything 3630 */ 3631 3632 pmap_copy(new_pmap, old_map->pmap, 3633 new_entry->start, 3634 (old_entry->end - old_entry->start), 3635 old_entry->start); 3636 3637 /* 3638 * protect the child's mappings if necessary 3639 */ 3640 if (protect_child) { 3641 pmap_protect(new_pmap, new_entry->start, 3642 new_entry->end, 3643 new_entry->protection & 3644 ~VM_PROT_WRITE); 3645 } 3646 3647 } 3648 break; 3649 } /* end of switch statement */ 3650 old_entry = old_entry->next; 3651 } 3652 3653 new_map->size = old_map->size; 3654 vm_map_unlock(old_map); 3655 3656 #ifdef SYSVSHM 3657 if (vm1->vm_shm) 3658 shmfork(vm1, vm2); 3659 #endif 3660 3661 #ifdef PMAP_FORK 3662 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); 3663 #endif 3664 3665 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 3666 return(vm2); 3667 } 3668 3669 #if defined(DDB) 3670 3671 /* 3672 * DDB hooks 3673 */ 3674 3675 /* 3676 * uvm_map_printit: actually prints the map 3677 */ 3678 3679 void 3680 uvm_map_printit(struct vm_map *map, boolean_t full, 3681 int (*pr)(const char *, ...)) 3682 { 3683 struct vm_map_entry *entry; 3684 3685 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 3686 (*pr)("\t#ent=%d, sz=%u, ref=%d, version=%u, flags=0x%x\n", 3687 map->nentries, map->size, map->ref_count, map->timestamp, 3688 map->flags); 3689 #ifdef pmap_resident_count 3690 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 3691 pmap_resident_count(map->pmap)); 3692 #else 3693 /* XXXCDC: this should be required ... */ 3694 (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap); 3695 #endif 3696 if (!full) 3697 return; 3698 for (entry = map->header.next; entry != &map->header; 3699 entry = entry->next) { 3700 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 3701 entry, entry->start, entry->end, entry->object.uvm_obj, 3702 (long long)entry->offset, entry->aref.ar_amap, 3703 entry->aref.ar_pageoff); 3704 (*pr)( 3705 "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 3706 "wc=%d, adv=%d\n", 3707 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 3708 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 3709 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 3710 entry->protection, entry->max_protection, 3711 entry->inheritance, entry->wired_count, entry->advice); 3712 } 3713 } 3714 3715 /* 3716 * uvm_object_printit: actually prints the object 3717 */ 3718 3719 void 3720 uvm_object_printit(uobj, full, pr) 3721 struct uvm_object *uobj; 3722 boolean_t full; 3723 int (*pr)(const char *, ...); 3724 { 3725 struct vm_page *pg; 3726 int cnt = 0; 3727 3728 (*pr)("OBJECT %p: locked=%d, pgops=%p, npages=%d, ", 3729 uobj, uobj->vmobjlock.lock_data, uobj->pgops, uobj->uo_npages); 3730 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 3731 (*pr)("refs=<SYSTEM>\n"); 3732 else 3733 (*pr)("refs=%d\n", uobj->uo_refs); 3734 3735 if (!full) { 3736 return; 3737 } 3738 (*pr)(" PAGES <pg,offset>:\n "); 3739 for (pg = TAILQ_FIRST(&uobj->memq); 3740 pg != NULL; 3741 pg = TAILQ_NEXT(pg, listq), cnt++) { 3742 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 3743 if ((cnt % 3) == 2) { 3744 (*pr)("\n "); 3745 } 3746 } 3747 if ((cnt % 3) != 2) { 3748 (*pr)("\n"); 3749 } 3750 } 3751 3752 /* 3753 * uvm_page_printit: actually print the page 3754 */ 3755 3756 static const char page_flagbits[] = 3757 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 3758 "\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0" 3759 "\31PMAP1\32PMAP2\33PMAP3"; 3760 3761 void 3762 uvm_page_printit(pg, full, pr) 3763 struct vm_page *pg; 3764 boolean_t full; 3765 int (*pr)(const char *, ...); 3766 { 3767 struct vm_page *tpg; 3768 struct uvm_object *uobj; 3769 struct pglist *pgl; 3770 3771 (*pr)("PAGE %p:\n", pg); 3772 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 3773 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 3774 (long long)pg->phys_addr); 3775 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", 3776 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); 3777 #if defined(UVM_PAGE_TRKOWN) 3778 if (pg->pg_flags & PG_BUSY) 3779 (*pr)(" owning process = %d, tag=%s\n", 3780 pg->owner, pg->owner_tag); 3781 else 3782 (*pr)(" page not busy, no owner\n"); 3783 #else 3784 (*pr)(" [page ownership tracking disabled]\n"); 3785 #endif 3786 3787 if (!full) 3788 return; 3789 3790 /* cross-verify object/anon */ 3791 if ((pg->pg_flags & PQ_FREE) == 0) { 3792 if (pg->pg_flags & PQ_ANON) { 3793 if (pg->uanon == NULL || pg->uanon->an_page != pg) 3794 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 3795 (pg->uanon) ? pg->uanon->an_page : NULL); 3796 else 3797 (*pr)(" anon backpointer is OK\n"); 3798 } else { 3799 uobj = pg->uobject; 3800 if (uobj) { 3801 (*pr)(" checking object list\n"); 3802 TAILQ_FOREACH(tpg, &uobj->memq, listq) { 3803 if (tpg == pg) { 3804 break; 3805 } 3806 } 3807 if (tpg) 3808 (*pr)(" page found on object list\n"); 3809 else 3810 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 3811 } 3812 } 3813 } 3814 3815 /* cross-verify page queue */ 3816 if (pg->pg_flags & PQ_FREE) { 3817 int fl = uvm_page_lookup_freelist(pg); 3818 pgl = &uvm.page_free[fl].pgfl_queues[((pg)->pg_flags & PG_ZERO) ? 3819 PGFL_ZEROS : PGFL_UNKNOWN]; 3820 } else if (pg->pg_flags & PQ_INACTIVE) { 3821 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 3822 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 3823 } else if (pg->pg_flags & PQ_ACTIVE) { 3824 pgl = &uvm.page_active; 3825 } else { 3826 pgl = NULL; 3827 } 3828 3829 if (pgl) { 3830 (*pr)(" checking pageq list\n"); 3831 TAILQ_FOREACH(tpg, pgl, pageq) { 3832 if (tpg == pg) { 3833 break; 3834 } 3835 } 3836 if (tpg) 3837 (*pr)(" page found on pageq list\n"); 3838 else 3839 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3840 } 3841 } 3842 #endif 3843