1 /* $OpenBSD: uvm_map.c,v 1.123 2009/08/28 00:40:03 ariane Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * Copyright (c) 1991, 1993, The Regents of the University of California. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by Charles D. Cranor, 24 * Washington University, the University of California, Berkeley and 25 * its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 43 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 44 * 45 * 46 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 47 * All rights reserved. 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 */ 69 70 /* 71 * uvm_map.c: uvm map operations 72 */ 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/mman.h> 77 #include <sys/proc.h> 78 #include <sys/malloc.h> 79 #include <sys/pool.h> 80 #include <sys/kernel.h> 81 82 #include <dev/rndvar.h> 83 84 #ifdef SYSVSHM 85 #include <sys/shm.h> 86 #endif 87 88 #include <uvm/uvm.h> 89 #undef RB_AUGMENT 90 #define RB_AUGMENT(x) uvm_rb_augment(x) 91 92 #ifdef DDB 93 #include <uvm/uvm_ddb.h> 94 #endif 95 96 static struct timeval uvm_kmapent_last_warn_time; 97 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 98 99 struct uvm_cnt uvm_map_call, map_backmerge, map_forwmerge; 100 struct uvm_cnt map_nousermerge; 101 struct uvm_cnt uvm_mlk_call, uvm_mlk_hint; 102 const char vmmapbsy[] = "vmmapbsy"; 103 104 /* 105 * Da history books 106 */ 107 UVMHIST_DECL(maphist); 108 UVMHIST_DECL(pdhist); 109 110 /* 111 * pool for vmspace structures. 112 */ 113 114 struct pool uvm_vmspace_pool; 115 116 /* 117 * pool for dynamically-allocated map entries. 118 */ 119 120 struct pool uvm_map_entry_pool; 121 struct pool uvm_map_entry_kmem_pool; 122 123 #ifdef PMAP_GROWKERNEL 124 /* 125 * This global represents the end of the kernel virtual address 126 * space. If we want to exceed this, we must grow the kernel 127 * virtual address space dynamically. 128 * 129 * Note, this variable is locked by kernel_map's lock. 130 */ 131 vaddr_t uvm_maxkaddr; 132 #endif 133 134 /* 135 * macros 136 */ 137 138 /* 139 * uvm_map_entry_link: insert entry into a map 140 * 141 * => map must be locked 142 */ 143 #define uvm_map_entry_link(map, after_where, entry) do { \ 144 (map)->nentries++; \ 145 (entry)->prev = (after_where); \ 146 (entry)->next = (after_where)->next; \ 147 (entry)->prev->next = (entry); \ 148 (entry)->next->prev = (entry); \ 149 uvm_rb_insert(map, entry); \ 150 } while (0) 151 152 /* 153 * uvm_map_entry_unlink: remove entry from a map 154 * 155 * => map must be locked 156 */ 157 #define uvm_map_entry_unlink(map, entry) do { \ 158 (map)->nentries--; \ 159 (entry)->next->prev = (entry)->prev; \ 160 (entry)->prev->next = (entry)->next; \ 161 uvm_rb_remove(map, entry); \ 162 } while (0) 163 164 /* 165 * SAVE_HINT: saves the specified entry as the hint for future lookups. 166 * 167 * => map need not be locked (protected by hint_lock). 168 */ 169 #define SAVE_HINT(map,check,value) do { \ 170 simple_lock(&(map)->hint_lock); \ 171 if ((map)->hint == (check)) \ 172 (map)->hint = (value); \ 173 simple_unlock(&(map)->hint_lock); \ 174 } while (0) 175 176 /* 177 * VM_MAP_RANGE_CHECK: check and correct range 178 * 179 * => map must at least be read locked 180 */ 181 182 #define VM_MAP_RANGE_CHECK(map, start, end) do { \ 183 if (start < vm_map_min(map)) \ 184 start = vm_map_min(map); \ 185 if (end > vm_map_max(map)) \ 186 end = vm_map_max(map); \ 187 if (start > end) \ 188 start = end; \ 189 } while (0) 190 191 /* 192 * local prototypes 193 */ 194 195 void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); 196 void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); 197 void uvm_map_reference_amap(struct vm_map_entry *, int); 198 void uvm_map_unreference_amap(struct vm_map_entry *, int); 199 int uvm_map_spacefits(struct vm_map *, vaddr_t *, vsize_t, 200 struct vm_map_entry *, voff_t, vsize_t); 201 202 struct vm_map_entry *uvm_mapent_alloc(struct vm_map *); 203 void uvm_mapent_free(struct vm_map_entry *); 204 205 #ifdef KVA_GUARDPAGES 206 /* 207 * Number of kva guardpages in use. 208 */ 209 int kva_guardpages; 210 #endif 211 212 213 /* 214 * Tree manipulation. 215 */ 216 void uvm_rb_insert(struct vm_map *, struct vm_map_entry *); 217 void uvm_rb_remove(struct vm_map *, struct vm_map_entry *); 218 vsize_t uvm_rb_space(struct vm_map *, struct vm_map_entry *); 219 220 #ifdef DEBUG 221 int _uvm_tree_sanity(struct vm_map *map, const char *name); 222 #endif 223 vsize_t uvm_rb_subtree_space(struct vm_map_entry *); 224 void uvm_rb_fixup(struct vm_map *, struct vm_map_entry *); 225 226 static __inline int 227 uvm_compare(struct vm_map_entry *a, struct vm_map_entry *b) 228 { 229 if (a->start < b->start) 230 return (-1); 231 else if (a->start > b->start) 232 return (1); 233 234 return (0); 235 } 236 237 238 static __inline void 239 uvm_rb_augment(struct vm_map_entry *entry) 240 { 241 entry->space = uvm_rb_subtree_space(entry); 242 } 243 244 RB_PROTOTYPE(uvm_tree, vm_map_entry, rb_entry, uvm_compare); 245 246 RB_GENERATE(uvm_tree, vm_map_entry, rb_entry, uvm_compare); 247 248 vsize_t 249 uvm_rb_space(struct vm_map *map, struct vm_map_entry *entry) 250 { 251 struct vm_map_entry *next; 252 vaddr_t space; 253 254 if ((next = entry->next) == &map->header) 255 space = map->max_offset - entry->end; 256 else { 257 KASSERT(next); 258 space = next->start - entry->end; 259 } 260 return (space); 261 } 262 263 vsize_t 264 uvm_rb_subtree_space(struct vm_map_entry *entry) 265 { 266 vaddr_t space, tmp; 267 268 space = entry->ownspace; 269 if (RB_LEFT(entry, rb_entry)) { 270 tmp = RB_LEFT(entry, rb_entry)->space; 271 if (tmp > space) 272 space = tmp; 273 } 274 275 if (RB_RIGHT(entry, rb_entry)) { 276 tmp = RB_RIGHT(entry, rb_entry)->space; 277 if (tmp > space) 278 space = tmp; 279 } 280 281 return (space); 282 } 283 284 void 285 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) 286 { 287 /* We need to traverse to the very top */ 288 do { 289 entry->ownspace = uvm_rb_space(map, entry); 290 entry->space = uvm_rb_subtree_space(entry); 291 } while ((entry = RB_PARENT(entry, rb_entry)) != NULL); 292 } 293 294 void 295 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) 296 { 297 vaddr_t space = uvm_rb_space(map, entry); 298 struct vm_map_entry *tmp; 299 300 entry->ownspace = entry->space = space; 301 tmp = RB_INSERT(uvm_tree, &(map)->rbhead, entry); 302 #ifdef DIAGNOSTIC 303 if (tmp != NULL) 304 panic("uvm_rb_insert: duplicate entry?"); 305 #endif 306 uvm_rb_fixup(map, entry); 307 if (entry->prev != &map->header) 308 uvm_rb_fixup(map, entry->prev); 309 } 310 311 void 312 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) 313 { 314 struct vm_map_entry *parent; 315 316 parent = RB_PARENT(entry, rb_entry); 317 RB_REMOVE(uvm_tree, &(map)->rbhead, entry); 318 if (entry->prev != &map->header) 319 uvm_rb_fixup(map, entry->prev); 320 if (parent) 321 uvm_rb_fixup(map, parent); 322 } 323 324 #ifdef DEBUG 325 #define uvm_tree_sanity(x,y) _uvm_tree_sanity(x,y) 326 #else 327 #define uvm_tree_sanity(x,y) 328 #endif 329 330 #ifdef DEBUG 331 int 332 _uvm_tree_sanity(struct vm_map *map, const char *name) 333 { 334 struct vm_map_entry *tmp, *trtmp; 335 int n = 0, i = 1; 336 337 RB_FOREACH(tmp, uvm_tree, &map->rbhead) { 338 if (tmp->ownspace != uvm_rb_space(map, tmp)) { 339 printf("%s: %d/%d ownspace %x != %x %s\n", 340 name, n + 1, map->nentries, 341 tmp->ownspace, uvm_rb_space(map, tmp), 342 tmp->next == &map->header ? "(last)" : ""); 343 goto error; 344 } 345 } 346 trtmp = NULL; 347 RB_FOREACH(tmp, uvm_tree, &map->rbhead) { 348 if (tmp->space != uvm_rb_subtree_space(tmp)) { 349 printf("%s: space %d != %d\n", 350 name, tmp->space, uvm_rb_subtree_space(tmp)); 351 goto error; 352 } 353 if (trtmp != NULL && trtmp->start >= tmp->start) { 354 printf("%s: corrupt: 0x%lx >= 0x%lx\n", 355 name, trtmp->start, tmp->start); 356 goto error; 357 } 358 n++; 359 360 trtmp = tmp; 361 } 362 363 if (n != map->nentries) { 364 printf("%s: nentries: %d vs %d\n", 365 name, n, map->nentries); 366 goto error; 367 } 368 369 for (tmp = map->header.next; tmp && tmp != &map->header; 370 tmp = tmp->next, i++) { 371 trtmp = RB_FIND(uvm_tree, &map->rbhead, tmp); 372 if (trtmp != tmp) { 373 printf("%s: lookup: %d: %p - %p: %p\n", 374 name, i, tmp, trtmp, 375 RB_PARENT(tmp, rb_entry)); 376 goto error; 377 } 378 } 379 380 return (0); 381 error: 382 #ifdef DDB 383 /* handy breakpoint location for error case */ 384 __asm(".globl treesanity_label\ntreesanity_label:"); 385 #endif 386 return (-1); 387 } 388 #endif 389 390 /* 391 * uvm_mapent_alloc: allocate a map entry 392 */ 393 394 struct vm_map_entry * 395 uvm_mapent_alloc(struct vm_map *map) 396 { 397 struct vm_map_entry *me, *ne; 398 int s, i; 399 int slowdown; 400 UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist); 401 402 if (map->flags & VM_MAP_INTRSAFE || cold) { 403 s = splvm(); 404 simple_lock(&uvm.kentry_lock); 405 me = uvm.kentry_free; 406 if (me == NULL) { 407 ne = uvm_km_getpage(0, &slowdown); 408 if (ne == NULL) 409 panic("uvm_mapent_alloc: cannot allocate map " 410 "entry"); 411 for (i = 0; 412 i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1; 413 i++) 414 ne[i].next = &ne[i + 1]; 415 ne[i].next = NULL; 416 me = ne; 417 if (ratecheck(&uvm_kmapent_last_warn_time, 418 &uvm_kmapent_warn_rate)) 419 printf("uvm_mapent_alloc: out of static " 420 "map entries\n"); 421 } 422 uvm.kentry_free = me->next; 423 uvmexp.kmapent++; 424 simple_unlock(&uvm.kentry_lock); 425 splx(s); 426 me->flags = UVM_MAP_STATIC; 427 } else if (map == kernel_map) { 428 splassert(IPL_NONE); 429 me = pool_get(&uvm_map_entry_kmem_pool, PR_WAITOK); 430 me->flags = UVM_MAP_KMEM; 431 } else { 432 splassert(IPL_NONE); 433 me = pool_get(&uvm_map_entry_pool, PR_WAITOK); 434 me->flags = 0; 435 } 436 437 UVMHIST_LOG(maphist, "<- new entry=%p [kentry=%ld]", me, 438 ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0); 439 return(me); 440 } 441 442 /* 443 * uvm_mapent_free: free map entry 444 * 445 * => XXX: static pool for kernel map? 446 */ 447 448 void 449 uvm_mapent_free(struct vm_map_entry *me) 450 { 451 int s; 452 UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist); 453 454 UVMHIST_LOG(maphist,"<- freeing map entry=%p [flags=%ld]", 455 me, me->flags, 0, 0); 456 if (me->flags & UVM_MAP_STATIC) { 457 s = splvm(); 458 simple_lock(&uvm.kentry_lock); 459 me->next = uvm.kentry_free; 460 uvm.kentry_free = me; 461 uvmexp.kmapent--; 462 simple_unlock(&uvm.kentry_lock); 463 splx(s); 464 } else if (me->flags & UVM_MAP_KMEM) { 465 splassert(IPL_NONE); 466 pool_put(&uvm_map_entry_kmem_pool, me); 467 } else { 468 splassert(IPL_NONE); 469 pool_put(&uvm_map_entry_pool, me); 470 } 471 } 472 473 /* 474 * uvm_mapent_copy: copy a map entry, preserving flags 475 */ 476 477 void 478 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 479 { 480 memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - 481 ((char *)src)); 482 } 483 484 /* 485 * uvm_map_entry_unwire: unwire a map entry 486 * 487 * => map should be locked by caller 488 */ 489 void 490 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) 491 { 492 493 entry->wired_count = 0; 494 uvm_fault_unwire_locked(map, entry->start, entry->end); 495 } 496 497 498 /* 499 * wrapper for calling amap_ref() 500 */ 501 void 502 uvm_map_reference_amap(struct vm_map_entry *entry, int flags) 503 { 504 amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, 505 (entry->end - entry->start) >> PAGE_SHIFT, flags); 506 } 507 508 509 /* 510 * wrapper for calling amap_unref() 511 */ 512 void 513 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) 514 { 515 amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, 516 (entry->end - entry->start) >> PAGE_SHIFT, flags); 517 } 518 519 520 /* 521 * uvm_map_init: init mapping system at boot time. note that we allocate 522 * and init the static pool of structs vm_map_entry for the kernel here. 523 */ 524 525 void 526 uvm_map_init(void) 527 { 528 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 529 #if defined(UVMHIST) 530 static struct uvm_history_ent maphistbuf[100]; 531 static struct uvm_history_ent pdhistbuf[100]; 532 #endif 533 int lcv; 534 535 /* 536 * first, init logging system. 537 */ 538 539 UVMHIST_FUNC("uvm_map_init"); 540 UVMHIST_INIT_STATIC(maphist, maphistbuf); 541 UVMHIST_INIT_STATIC(pdhist, pdhistbuf); 542 UVMHIST_CALLED(maphist); 543 UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); 544 UVMCNT_INIT(uvm_map_call, UVMCNT_CNT, 0, 545 "# uvm_map() successful calls", 0); 546 UVMCNT_INIT(map_backmerge, UVMCNT_CNT, 0, "# uvm_map() back merges", 0); 547 UVMCNT_INIT(map_forwmerge, UVMCNT_CNT, 0, "# uvm_map() missed forward", 548 0); 549 UVMCNT_INIT(map_nousermerge, UVMCNT_CNT, 0, "# back merges skipped", 0); 550 UVMCNT_INIT(uvm_mlk_call, UVMCNT_CNT, 0, "# map lookup calls", 0); 551 UVMCNT_INIT(uvm_mlk_hint, UVMCNT_CNT, 0, "# map lookup hint hits", 0); 552 553 /* 554 * now set up static pool of kernel map entries ... 555 */ 556 557 simple_lock_init(&uvm.kentry_lock); 558 uvm.kentry_free = NULL; 559 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 560 kernel_map_entry[lcv].next = uvm.kentry_free; 561 uvm.kentry_free = &kernel_map_entry[lcv]; 562 } 563 564 /* 565 * initialize the map-related pools. 566 */ 567 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 568 0, 0, 0, "vmsppl", &pool_allocator_nointr); 569 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 570 0, 0, 0, "vmmpepl", &pool_allocator_nointr); 571 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 572 0, 0, 0, "vmmpekpl", NULL); 573 pool_sethiwat(&uvm_map_entry_pool, 8192); 574 } 575 576 /* 577 * clippers 578 */ 579 580 /* 581 * uvm_map_clip_start: ensure that the entry begins at or after 582 * the starting address, if it doesn't we split the entry. 583 * 584 * => caller should use UVM_MAP_CLIP_START macro rather than calling 585 * this directly 586 * => map must be locked by caller 587 */ 588 589 void 590 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, 591 vaddr_t start) 592 { 593 struct vm_map_entry *new_entry; 594 vaddr_t new_adj; 595 596 /* uvm_map_simplify_entry(map, entry); */ /* XXX */ 597 598 uvm_tree_sanity(map, "clip_start entry"); 599 600 /* 601 * Split off the front portion. note that we must insert the new 602 * entry BEFORE this one, so that this entry has the specified 603 * starting address. 604 */ 605 606 new_entry = uvm_mapent_alloc(map); 607 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 608 609 new_entry->end = start; 610 new_adj = start - new_entry->start; 611 if (entry->object.uvm_obj) 612 entry->offset += new_adj; /* shift start over */ 613 614 /* Does not change order for the RB tree */ 615 entry->start = start; 616 617 if (new_entry->aref.ar_amap) { 618 amap_splitref(&new_entry->aref, &entry->aref, new_adj); 619 } 620 621 uvm_map_entry_link(map, entry->prev, new_entry); 622 623 if (UVM_ET_ISSUBMAP(entry)) { 624 /* ... unlikely to happen, but play it safe */ 625 uvm_map_reference(new_entry->object.sub_map); 626 } else { 627 if (UVM_ET_ISOBJ(entry) && 628 entry->object.uvm_obj->pgops && 629 entry->object.uvm_obj->pgops->pgo_reference) 630 entry->object.uvm_obj->pgops->pgo_reference( 631 entry->object.uvm_obj); 632 } 633 634 uvm_tree_sanity(map, "clip_start leave"); 635 } 636 637 /* 638 * uvm_map_clip_end: ensure that the entry ends at or before 639 * the ending address, if it doesn't we split the reference 640 * 641 * => caller should use UVM_MAP_CLIP_END macro rather than calling 642 * this directly 643 * => map must be locked by caller 644 */ 645 646 void 647 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end) 648 { 649 struct vm_map_entry *new_entry; 650 vaddr_t new_adj; /* #bytes we move start forward */ 651 652 uvm_tree_sanity(map, "clip_end entry"); 653 /* 654 * Create a new entry and insert it 655 * AFTER the specified entry 656 */ 657 658 new_entry = uvm_mapent_alloc(map); 659 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 660 661 new_entry->start = entry->end = end; 662 new_adj = end - entry->start; 663 if (new_entry->object.uvm_obj) 664 new_entry->offset += new_adj; 665 666 if (entry->aref.ar_amap) 667 amap_splitref(&entry->aref, &new_entry->aref, new_adj); 668 669 uvm_rb_fixup(map, entry); 670 671 uvm_map_entry_link(map, entry, new_entry); 672 673 if (UVM_ET_ISSUBMAP(entry)) { 674 /* ... unlikely to happen, but play it safe */ 675 uvm_map_reference(new_entry->object.sub_map); 676 } else { 677 if (UVM_ET_ISOBJ(entry) && 678 entry->object.uvm_obj->pgops && 679 entry->object.uvm_obj->pgops->pgo_reference) 680 entry->object.uvm_obj->pgops->pgo_reference( 681 entry->object.uvm_obj); 682 } 683 uvm_tree_sanity(map, "clip_end leave"); 684 } 685 686 687 /* 688 * M A P - m a i n e n t r y p o i n t 689 */ 690 /* 691 * uvm_map: establish a valid mapping in a map 692 * 693 * => assume startp is page aligned. 694 * => assume size is a multiple of PAGE_SIZE. 695 * => assume sys_mmap provides enough of a "hint" to have us skip 696 * over text/data/bss area. 697 * => map must be unlocked (we will lock it) 698 * => <uobj,uoffset> value meanings (4 cases): 699 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 700 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 701 * [3] <uobj,uoffset> == normal mapping 702 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 703 * 704 * case [4] is for kernel mappings where we don't know the offset until 705 * we've found a virtual address. note that kernel object offsets are 706 * always relative to vm_map_min(kernel_map). 707 * 708 * => if `align' is non-zero, we try to align the virtual address to 709 * the specified alignment. this is only a hint; if we can't 710 * do it, the address will be unaligned. this is provided as 711 * a mechanism for large pages. 712 * 713 * => XXXCDC: need way to map in external amap? 714 */ 715 716 int 717 uvm_map_p(struct vm_map *map, vaddr_t *startp, vsize_t size, 718 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, 719 struct proc *p) 720 { 721 struct vm_map_entry *prev_entry, *new_entry; 722 #ifdef KVA_GUARDPAGES 723 struct vm_map_entry *guard_entry; 724 #endif 725 vm_prot_t prot = UVM_PROTECTION(flags), maxprot = 726 UVM_MAXPROTECTION(flags); 727 vm_inherit_t inherit = UVM_INHERIT(flags); 728 int advice = UVM_ADVICE(flags); 729 int error; 730 UVMHIST_FUNC("uvm_map"); 731 UVMHIST_CALLED(maphist); 732 733 UVMHIST_LOG(maphist, "(map=%p, *startp=0x%lx, size=%ld, flags=0x%lx)", 734 map, *startp, size, flags); 735 UVMHIST_LOG(maphist, " uobj/offset %p/%ld", uobj, (u_long)uoffset,0,0); 736 737 #ifdef KVA_GUARDPAGES 738 if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) { 739 /* 740 * kva_guardstart is initialized to the start of the kernelmap 741 * and cycles through the kva space. 742 * This way we should have a long time between re-use of kva. 743 */ 744 static vaddr_t kva_guardstart = 0; 745 if (kva_guardstart == 0) { 746 kva_guardstart = vm_map_min(map); 747 printf("uvm_map: kva guard pages enabled: %p\n", 748 kva_guardstart); 749 } 750 size += PAGE_SIZE; /* Add guard page at the end. */ 751 /* 752 * Try to fully exhaust kva prior to wrap-around. 753 * (This may eat your ram!) 754 */ 755 if (VM_MAX_KERNEL_ADDRESS - kva_guardstart < size) { 756 static int wrap_counter = 0; 757 printf("uvm_map: kva guard page wrap-around %d\n", 758 ++wrap_counter); 759 kva_guardstart = vm_map_min(map); 760 } 761 *startp = kva_guardstart; 762 /* 763 * Prepare for next round. 764 */ 765 kva_guardstart += size; 766 } 767 #endif 768 769 uvm_tree_sanity(map, "map entry"); 770 771 if ((map->flags & VM_MAP_INTRSAFE) == 0) 772 splassert(IPL_NONE); 773 else 774 splassert(IPL_VM); 775 776 /* 777 * step 0: sanity check of protection code 778 */ 779 780 if ((prot & maxprot) != prot) { 781 UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%lx, max=0x%lx", 782 prot, maxprot,0,0); 783 return (EACCES); 784 } 785 786 /* 787 * step 1: figure out where to put new VM range 788 */ 789 790 if (vm_map_lock_try(map) == FALSE) { 791 if (flags & UVM_FLAG_TRYLOCK) 792 return (EFAULT); 793 vm_map_lock(map); /* could sleep here */ 794 } 795 if ((prev_entry = uvm_map_findspace(map, *startp, size, startp, 796 uobj, uoffset, align, flags)) == NULL) { 797 UVMHIST_LOG(maphist,"<- uvm_map_findspace failed!",0,0,0,0); 798 vm_map_unlock(map); 799 return (ENOMEM); 800 } 801 802 #ifdef PMAP_GROWKERNEL 803 { 804 /* 805 * If the kernel pmap can't map the requested space, 806 * then allocate more resources for it. 807 */ 808 if (map == kernel_map && !(flags & UVM_FLAG_FIXED) && 809 uvm_maxkaddr < (*startp + size)) 810 uvm_maxkaddr = pmap_growkernel(*startp + size); 811 } 812 #endif 813 814 UVMCNT_INCR(uvm_map_call); 815 816 /* 817 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER 818 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in 819 * either case we want to zero it before storing it in the map entry 820 * (because it looks strange and confusing when debugging...) 821 * 822 * if uobj is not null 823 * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping 824 * and we do not need to change uoffset. 825 * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset 826 * now (based on the starting address of the map). this case is 827 * for kernel object mappings where we don't know the offset until 828 * the virtual address is found (with uvm_map_findspace). the 829 * offset is the distance we are from the start of the map. 830 */ 831 832 if (uobj == NULL) { 833 uoffset = 0; 834 } else { 835 if (uoffset == UVM_UNKNOWN_OFFSET) { 836 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 837 uoffset = *startp - vm_map_min(kernel_map); 838 } 839 } 840 841 /* 842 * step 2: try and insert in map by extending previous entry, if 843 * possible 844 * XXX: we don't try and pull back the next entry. might be useful 845 * for a stack, but we are currently allocating our stack in advance. 846 */ 847 848 if ((flags & UVM_FLAG_NOMERGE) == 0 && 849 prev_entry->end == *startp && prev_entry != &map->header && 850 prev_entry->object.uvm_obj == uobj) { 851 852 if (uobj && prev_entry->offset + 853 (prev_entry->end - prev_entry->start) != uoffset) 854 goto step3; 855 856 if (UVM_ET_ISSUBMAP(prev_entry)) 857 goto step3; 858 859 if (prev_entry->protection != prot || 860 prev_entry->max_protection != maxprot) 861 goto step3; 862 863 if (prev_entry->inheritance != inherit || 864 prev_entry->advice != advice) 865 goto step3; 866 867 /* wiring status must match (new area is unwired) */ 868 if (VM_MAPENT_ISWIRED(prev_entry)) 869 goto step3; 870 871 /* 872 * can't extend a shared amap. note: no need to lock amap to 873 * look at refs since we don't care about its exact value. 874 * if it is one (i.e. we have only reference) it will stay there 875 */ 876 877 if (prev_entry->aref.ar_amap && 878 amap_refs(prev_entry->aref.ar_amap) != 1) { 879 goto step3; 880 } 881 882 /* 883 * Only merge kernel mappings, but keep track 884 * of how much we skipped. 885 */ 886 if (map != kernel_map && map != kmem_map) { 887 UVMCNT_INCR(map_nousermerge); 888 goto step3; 889 } 890 891 if (prev_entry->aref.ar_amap) { 892 error = amap_extend(prev_entry, size); 893 if (error) 894 goto step3; 895 } 896 897 UVMCNT_INCR(map_backmerge); 898 UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0); 899 900 /* 901 * drop our reference to uobj since we are extending a reference 902 * that we already have (the ref count can not drop to zero). 903 */ 904 905 if (uobj && uobj->pgops->pgo_detach) 906 uobj->pgops->pgo_detach(uobj); 907 908 prev_entry->end += size; 909 uvm_rb_fixup(map, prev_entry); 910 map->size += size; 911 if (p && uobj == NULL) 912 p->p_vmspace->vm_dused += atop(size); 913 914 uvm_tree_sanity(map, "map leave 2"); 915 916 UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); 917 vm_map_unlock(map); 918 return (0); 919 920 } 921 step3: 922 UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0); 923 924 /* 925 * check for possible forward merge (which we don't do) and count 926 * the number of times we missed a *possible* chance to merge more 927 */ 928 929 if ((flags & UVM_FLAG_NOMERGE) == 0 && 930 prev_entry->next != &map->header && 931 prev_entry->next->start == (*startp + size)) 932 UVMCNT_INCR(map_forwmerge); 933 934 /* 935 * step 3: allocate new entry and link it in 936 */ 937 938 #ifdef KVA_GUARDPAGES 939 if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) 940 size -= PAGE_SIZE; 941 #endif 942 943 new_entry = uvm_mapent_alloc(map); 944 new_entry->start = *startp; 945 new_entry->end = new_entry->start + size; 946 new_entry->object.uvm_obj = uobj; 947 new_entry->offset = uoffset; 948 949 if (uobj) 950 new_entry->etype = UVM_ET_OBJ; 951 else 952 new_entry->etype = 0; 953 954 if (flags & UVM_FLAG_COPYONW) { 955 new_entry->etype |= UVM_ET_COPYONWRITE; 956 if ((flags & UVM_FLAG_OVERLAY) == 0) 957 new_entry->etype |= UVM_ET_NEEDSCOPY; 958 } 959 if (flags & UVM_FLAG_HOLE) 960 new_entry->etype |= UVM_ET_HOLE; 961 962 new_entry->protection = prot; 963 new_entry->max_protection = maxprot; 964 new_entry->inheritance = inherit; 965 new_entry->wired_count = 0; 966 new_entry->advice = advice; 967 if (flags & UVM_FLAG_OVERLAY) { 968 /* 969 * to_add: for BSS we overallocate a little since we 970 * are likely to extend 971 */ 972 vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? 973 UVM_AMAP_CHUNK << PAGE_SHIFT : 0; 974 struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK); 975 new_entry->aref.ar_pageoff = 0; 976 new_entry->aref.ar_amap = amap; 977 } else { 978 new_entry->aref.ar_pageoff = 0; 979 new_entry->aref.ar_amap = NULL; 980 } 981 982 uvm_map_entry_link(map, prev_entry, new_entry); 983 984 map->size += size; 985 if (p && uobj == NULL) 986 p->p_vmspace->vm_dused += atop(size); 987 988 989 /* 990 * Update the free space hint 991 */ 992 993 if ((map->first_free == prev_entry) && 994 (prev_entry->end >= new_entry->start)) 995 map->first_free = new_entry; 996 997 #ifdef KVA_GUARDPAGES 998 /* 999 * Create the guard entry. 1000 */ 1001 if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) { 1002 guard_entry = uvm_mapent_alloc(map); 1003 guard_entry->start = new_entry->end; 1004 guard_entry->end = guard_entry->start + PAGE_SIZE; 1005 guard_entry->object.uvm_obj = uobj; 1006 guard_entry->offset = uoffset; 1007 guard_entry->etype = MAP_ET_KVAGUARD; 1008 guard_entry->protection = prot; 1009 guard_entry->max_protection = maxprot; 1010 guard_entry->inheritance = inherit; 1011 guard_entry->wired_count = 0; 1012 guard_entry->advice = advice; 1013 guard_entry->aref.ar_pageoff = 0; 1014 guard_entry->aref.ar_amap = NULL; 1015 uvm_map_entry_link(map, new_entry, guard_entry); 1016 map->size += PAGE_SIZE; 1017 kva_guardpages++; 1018 } 1019 #endif 1020 1021 uvm_tree_sanity(map, "map leave"); 1022 1023 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1024 vm_map_unlock(map); 1025 return (0); 1026 } 1027 1028 /* 1029 * uvm_map_lookup_entry: find map entry at or before an address 1030 * 1031 * => map must at least be read-locked by caller 1032 * => entry is returned in "entry" 1033 * => return value is true if address is in the returned entry 1034 */ 1035 1036 boolean_t 1037 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1038 struct vm_map_entry **entry) 1039 { 1040 struct vm_map_entry *cur; 1041 struct vm_map_entry *last; 1042 int use_tree = 0; 1043 UVMHIST_FUNC("uvm_map_lookup_entry"); 1044 UVMHIST_CALLED(maphist); 1045 1046 UVMHIST_LOG(maphist,"(map=%p,addr=0x%lx,ent=%p)", 1047 map, address, entry, 0); 1048 1049 /* 1050 * start looking either from the head of the 1051 * list, or from the hint. 1052 */ 1053 1054 simple_lock(&map->hint_lock); 1055 cur = map->hint; 1056 simple_unlock(&map->hint_lock); 1057 1058 if (cur == &map->header) 1059 cur = cur->next; 1060 1061 UVMCNT_INCR(uvm_mlk_call); 1062 if (address >= cur->start) { 1063 /* 1064 * go from hint to end of list. 1065 * 1066 * but first, make a quick check to see if 1067 * we are already looking at the entry we 1068 * want (which is usually the case). 1069 * note also that we don't need to save the hint 1070 * here... it is the same hint (unless we are 1071 * at the header, in which case the hint didn't 1072 * buy us anything anyway). 1073 */ 1074 last = &map->header; 1075 if ((cur != last) && (cur->end > address)) { 1076 UVMCNT_INCR(uvm_mlk_hint); 1077 *entry = cur; 1078 UVMHIST_LOG(maphist,"<- got it via hint (%p)", 1079 cur, 0, 0, 0); 1080 return (TRUE); 1081 } 1082 1083 if (map->nentries > 30) 1084 use_tree = 1; 1085 } else { 1086 /* 1087 * go from start to hint, *inclusively* 1088 */ 1089 last = cur->next; 1090 cur = map->header.next; 1091 use_tree = 1; 1092 } 1093 1094 uvm_tree_sanity(map, __func__); 1095 1096 if (use_tree) { 1097 struct vm_map_entry *prev = &map->header; 1098 cur = RB_ROOT(&map->rbhead); 1099 1100 /* 1101 * Simple lookup in the tree. Happens when the hint is 1102 * invalid, or nentries reach a threshold. 1103 */ 1104 while (cur) { 1105 if (address >= cur->start) { 1106 if (address < cur->end) { 1107 *entry = cur; 1108 SAVE_HINT(map, map->hint, cur); 1109 return (TRUE); 1110 } 1111 prev = cur; 1112 cur = RB_RIGHT(cur, rb_entry); 1113 } else 1114 cur = RB_LEFT(cur, rb_entry); 1115 } 1116 *entry = prev; 1117 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); 1118 return (FALSE); 1119 } 1120 1121 /* 1122 * search linearly 1123 */ 1124 1125 while (cur != last) { 1126 if (cur->end > address) { 1127 if (address >= cur->start) { 1128 /* 1129 * save this lookup for future 1130 * hints, and return 1131 */ 1132 1133 *entry = cur; 1134 SAVE_HINT(map, map->hint, cur); 1135 UVMHIST_LOG(maphist,"<- search got it (%p)", 1136 cur, 0, 0, 0); 1137 return (TRUE); 1138 } 1139 break; 1140 } 1141 cur = cur->next; 1142 } 1143 1144 *entry = cur->prev; 1145 SAVE_HINT(map, map->hint, *entry); 1146 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); 1147 return (FALSE); 1148 } 1149 1150 /* 1151 * Checks if address pointed to by phint fits into the empty 1152 * space before the vm_map_entry after. Takes aligment and 1153 * offset into consideration. 1154 */ 1155 1156 int 1157 uvm_map_spacefits(struct vm_map *map, vaddr_t *phint, vsize_t length, 1158 struct vm_map_entry *after, voff_t uoffset, vsize_t align) 1159 { 1160 vaddr_t hint = *phint; 1161 vaddr_t end; 1162 1163 #ifdef PMAP_PREFER 1164 /* 1165 * push hint forward as needed to avoid VAC alias problems. 1166 * we only do this if a valid offset is specified. 1167 */ 1168 if (uoffset != UVM_UNKNOWN_OFFSET) 1169 PMAP_PREFER(uoffset, &hint); 1170 #endif 1171 if (align != 0) 1172 if ((hint & (align - 1)) != 0) 1173 hint = roundup(hint, align); 1174 *phint = hint; 1175 1176 end = hint + length; 1177 if (end > map->max_offset || end < hint) 1178 return (FALSE); 1179 if (after != NULL && after != &map->header && after->start < end) 1180 return (FALSE); 1181 1182 return (TRUE); 1183 } 1184 1185 /* 1186 * uvm_map_pie: return a random load address for a PIE executable 1187 * properly aligned. 1188 */ 1189 1190 #ifndef VM_PIE_MAX_ADDR 1191 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1192 #endif 1193 1194 #ifndef VM_PIE_MIN_ADDR 1195 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1196 #endif 1197 1198 #ifndef VM_PIE_MIN_ALIGN 1199 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1200 #endif 1201 1202 vaddr_t 1203 uvm_map_pie(vaddr_t align) 1204 { 1205 vaddr_t addr, space, min; 1206 1207 align = MAX(align, VM_PIE_MIN_ALIGN); 1208 1209 /* round up to next alignment */ 1210 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1211 1212 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 1213 return (align); 1214 1215 space = (VM_PIE_MAX_ADDR - min) / align; 1216 space = MIN(space, (u_int32_t)-1); 1217 1218 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 1219 addr += min; 1220 1221 return (addr); 1222 } 1223 1224 /* 1225 * uvm_map_hint: return the beginning of the best area suitable for 1226 * creating a new mapping with "prot" protection. 1227 */ 1228 vaddr_t 1229 uvm_map_hint(struct proc *p, vm_prot_t prot) 1230 { 1231 vaddr_t addr; 1232 1233 #ifdef __i386__ 1234 /* 1235 * If executable skip first two pages, otherwise start 1236 * after data + heap region. 1237 */ 1238 if ((prot & VM_PROT_EXECUTE) && 1239 ((vaddr_t)p->p_vmspace->vm_daddr >= I386_MAX_EXE_ADDR)) { 1240 addr = (PAGE_SIZE*2) + 1241 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 1242 return (round_page(addr)); 1243 } 1244 #endif 1245 addr = (vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ; 1246 #if !defined(__vax__) 1247 addr += arc4random() & (MIN((256 * 1024 * 1024), MAXDSIZ) - 1); 1248 #else 1249 /* start malloc/mmap after the brk */ 1250 addr = (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ; 1251 #endif 1252 return (round_page(addr)); 1253 } 1254 1255 /* 1256 * uvm_map_findspace: find "length" sized space in "map". 1257 * 1258 * => "hint" is a hint about where we want it, unless FINDSPACE_FIXED is 1259 * set (in which case we insist on using "hint"). 1260 * => "result" is VA returned 1261 * => uobj/uoffset are to be used to handle VAC alignment, if required 1262 * => if `align' is non-zero, we attempt to align to that value. 1263 * => caller must at least have read-locked map 1264 * => returns NULL on failure, or pointer to prev. map entry if success 1265 * => note this is a cross between the old vm_map_findspace and vm_map_find 1266 */ 1267 1268 struct vm_map_entry * 1269 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, 1270 vaddr_t *result, struct uvm_object *uobj, voff_t uoffset, vsize_t align, 1271 int flags) 1272 { 1273 struct vm_map_entry *entry, *next, *tmp; 1274 struct vm_map_entry *child, *prev = NULL; 1275 1276 vaddr_t end, orig_hint; 1277 UVMHIST_FUNC("uvm_map_findspace"); 1278 UVMHIST_CALLED(maphist); 1279 1280 UVMHIST_LOG(maphist, "(map=%p, hint=0x%lx, len=%ld, flags=0x%lx)", 1281 map, hint, length, flags); 1282 KASSERT((align & (align - 1)) == 0); 1283 KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); 1284 1285 uvm_tree_sanity(map, "map_findspace entry"); 1286 1287 /* 1288 * remember the original hint. if we are aligning, then we 1289 * may have to try again with no alignment constraint if 1290 * we fail the first time. 1291 */ 1292 1293 orig_hint = hint; 1294 if (hint < map->min_offset) { /* check ranges ... */ 1295 if (flags & UVM_FLAG_FIXED) { 1296 UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0); 1297 return(NULL); 1298 } 1299 hint = map->min_offset; 1300 } 1301 if (hint > map->max_offset) { 1302 UVMHIST_LOG(maphist,"<- VA 0x%lx > range [0x%lx->0x%lx]", 1303 hint, map->min_offset, map->max_offset, 0); 1304 return(NULL); 1305 } 1306 1307 /* 1308 * Look for the first possible address; if there's already 1309 * something at this address, we have to start after it. 1310 */ 1311 1312 if ((flags & UVM_FLAG_FIXED) == 0 && hint == map->min_offset) { 1313 if ((entry = map->first_free) != &map->header) 1314 hint = entry->end; 1315 } else { 1316 if (uvm_map_lookup_entry(map, hint, &tmp)) { 1317 /* "hint" address already in use ... */ 1318 if (flags & UVM_FLAG_FIXED) { 1319 UVMHIST_LOG(maphist,"<- fixed & VA in use", 1320 0, 0, 0, 0); 1321 return(NULL); 1322 } 1323 hint = tmp->end; 1324 } 1325 entry = tmp; 1326 } 1327 1328 if (flags & UVM_FLAG_FIXED) { 1329 end = hint + length; 1330 if (end > map->max_offset || end < hint) { 1331 UVMHIST_LOG(maphist,"<- failed (off end)", 0,0,0,0); 1332 goto error; 1333 } 1334 next = entry->next; 1335 if (next == &map->header || next->start >= end) 1336 goto found; 1337 UVMHIST_LOG(maphist,"<- fixed mapping failed", 0,0,0,0); 1338 return(NULL); /* only one shot at it ... */ 1339 } 1340 1341 /* Try to find the space in the red-black tree */ 1342 1343 /* Check slot before any entry */ 1344 if (uvm_map_spacefits(map, &hint, length, entry->next, uoffset, align)) 1345 goto found; 1346 1347 /* If there is not enough space in the whole tree, we fail */ 1348 tmp = RB_ROOT(&map->rbhead); 1349 if (tmp == NULL || tmp->space < length) 1350 goto error; 1351 1352 /* Find an entry close to hint that has enough space */ 1353 for (; tmp;) { 1354 if (tmp->end >= hint && 1355 (prev == NULL || tmp->end < prev->end)) { 1356 if (tmp->ownspace >= length) 1357 prev = tmp; 1358 else if ((child = RB_RIGHT(tmp, rb_entry)) != NULL && 1359 child->space >= length) 1360 prev = tmp; 1361 } 1362 if (tmp->end < hint) 1363 child = RB_RIGHT(tmp, rb_entry); 1364 else if (tmp->end > hint) 1365 child = RB_LEFT(tmp, rb_entry); 1366 else { 1367 if (tmp->ownspace >= length) 1368 break; 1369 child = RB_RIGHT(tmp, rb_entry); 1370 } 1371 if (child == NULL || child->space < length) 1372 break; 1373 tmp = child; 1374 } 1375 1376 if (tmp != NULL && hint < tmp->end + tmp->ownspace) { 1377 /* 1378 * Check if the entry that we found satifies the 1379 * space requirement 1380 */ 1381 if (hint < tmp->end) 1382 hint = tmp->end; 1383 if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, 1384 align)) { 1385 entry = tmp; 1386 goto found; 1387 } else if (tmp->ownspace >= length) 1388 goto listsearch; 1389 } 1390 if (prev == NULL) 1391 goto error; 1392 1393 hint = prev->end; 1394 if (uvm_map_spacefits(map, &hint, length, prev->next, uoffset, 1395 align)) { 1396 entry = prev; 1397 goto found; 1398 } else if (prev->ownspace >= length) 1399 goto listsearch; 1400 1401 tmp = RB_RIGHT(prev, rb_entry); 1402 for (;;) { 1403 KASSERT(tmp && tmp->space >= length); 1404 child = RB_LEFT(tmp, rb_entry); 1405 if (child && child->space >= length) { 1406 tmp = child; 1407 continue; 1408 } 1409 if (tmp->ownspace >= length) 1410 break; 1411 tmp = RB_RIGHT(tmp, rb_entry); 1412 } 1413 1414 hint = tmp->end; 1415 if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, align)) { 1416 entry = tmp; 1417 goto found; 1418 } 1419 1420 /* 1421 * The tree fails to find an entry because of offset or alignment 1422 * restrictions. Search the list instead. 1423 */ 1424 listsearch: 1425 /* 1426 * Look through the rest of the map, trying to fit a new region in 1427 * the gap between existing regions, or after the very last region. 1428 * note: entry->end = base VA of current gap, 1429 * next->start = VA of end of current gap 1430 */ 1431 for (;; hint = (entry = next)->end) { 1432 /* 1433 * Find the end of the proposed new region. Be sure we didn't 1434 * go beyond the end of the map, or wrap around the address; 1435 * if so, we lose. Otherwise, if this is the last entry, or 1436 * if the proposed new region fits before the next entry, we 1437 * win. 1438 */ 1439 1440 #ifdef PMAP_PREFER 1441 /* 1442 * push hint forward as needed to avoid VAC alias problems. 1443 * we only do this if a valid offset is specified. 1444 */ 1445 if (uoffset != UVM_UNKNOWN_OFFSET) 1446 PMAP_PREFER(uoffset, &hint); 1447 #endif 1448 if (align != 0) { 1449 if ((hint & (align - 1)) != 0) 1450 hint = roundup(hint, align); 1451 /* 1452 * XXX Should we PMAP_PREFER() here again? 1453 */ 1454 } 1455 end = hint + length; 1456 if (end > map->max_offset || end < hint) { 1457 UVMHIST_LOG(maphist,"<- failed (off end)", 0,0,0,0); 1458 goto error; 1459 } 1460 next = entry->next; 1461 if (next == &map->header || next->start >= end) 1462 break; 1463 } 1464 found: 1465 SAVE_HINT(map, map->hint, entry); 1466 *result = hint; 1467 UVMHIST_LOG(maphist,"<- got it! (result=0x%lx)", hint, 0,0,0); 1468 return (entry); 1469 1470 error: 1471 if (align != 0) { 1472 UVMHIST_LOG(maphist, 1473 "calling recursively, no align", 1474 0,0,0,0); 1475 return (uvm_map_findspace(map, orig_hint, 1476 length, result, uobj, uoffset, 0, flags)); 1477 } 1478 return (NULL); 1479 } 1480 1481 /* 1482 * U N M A P - m a i n e n t r y p o i n t 1483 */ 1484 1485 /* 1486 * uvm_unmap: remove mappings from a vm_map (from "start" up to "stop") 1487 * 1488 * => caller must check alignment and size 1489 * => map must be unlocked (we will lock it) 1490 */ 1491 void 1492 uvm_unmap_p(vm_map_t map, vaddr_t start, vaddr_t end, struct proc *p) 1493 { 1494 vm_map_entry_t dead_entries; 1495 UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist); 1496 1497 UVMHIST_LOG(maphist, " (map=%p, start=0x%lx, end=0x%lx)", 1498 map, start, end, 0); 1499 1500 /* 1501 * work now done by helper functions. wipe the pmap's and then 1502 * detach from the dead entries... 1503 */ 1504 vm_map_lock(map); 1505 uvm_unmap_remove(map, start, end, &dead_entries, p, FALSE); 1506 vm_map_unlock(map); 1507 1508 if (dead_entries != NULL) 1509 uvm_unmap_detach(dead_entries, 0); 1510 1511 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 1512 } 1513 1514 1515 /* 1516 * U N M A P - m a i n h e l p e r f u n c t i o n s 1517 */ 1518 1519 /* 1520 * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") 1521 * 1522 * => caller must check alignment and size 1523 * => map must be locked by caller 1524 * => we return a list of map entries that we've remove from the map 1525 * in "entry_list" 1526 */ 1527 1528 void 1529 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 1530 struct vm_map_entry **entry_list, struct proc *p, boolean_t remove_holes) 1531 { 1532 struct vm_map_entry *entry, *first_entry, *next; 1533 vaddr_t len; 1534 UVMHIST_FUNC("uvm_unmap_remove"); 1535 UVMHIST_CALLED(maphist); 1536 1537 UVMHIST_LOG(maphist,"(map=%p, start=0x%lx, end=0x%lx)", 1538 map, start, end, 0); 1539 1540 VM_MAP_RANGE_CHECK(map, start, end); 1541 1542 uvm_tree_sanity(map, "unmap_remove entry"); 1543 1544 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1545 splassert(IPL_NONE); 1546 else 1547 splassert(IPL_VM); 1548 1549 /* 1550 * find first entry 1551 */ 1552 if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) { 1553 /* clip and go... */ 1554 entry = first_entry; 1555 UVM_MAP_CLIP_START(map, entry, start); 1556 /* critical! prevents stale hint */ 1557 SAVE_HINT(map, entry, entry->prev); 1558 1559 } else { 1560 entry = first_entry->next; 1561 } 1562 1563 /* 1564 * Save the free space hint 1565 */ 1566 1567 if (map->first_free->start >= start) 1568 map->first_free = entry->prev; 1569 1570 /* 1571 * note: we now re-use first_entry for a different task. we remove 1572 * a number of map entries from the map and save them in a linked 1573 * list headed by "first_entry". once we remove them from the map 1574 * the caller should unlock the map and drop the references to the 1575 * backing objects [c.f. uvm_unmap_detach]. the object is to 1576 * separate unmapping from reference dropping. why? 1577 * [1] the map has to be locked for unmapping 1578 * [2] the map need not be locked for reference dropping 1579 * [3] dropping references may trigger pager I/O, and if we hit 1580 * a pager that does synchronous I/O we may have to wait for it. 1581 * [4] we would like all waiting for I/O to occur with maps unlocked 1582 * so that we don't block other threads. 1583 */ 1584 first_entry = NULL; 1585 *entry_list = NULL; /* to be safe */ 1586 1587 /* 1588 * break up the area into map entry sized regions and unmap. note 1589 * that all mappings have to be removed before we can even consider 1590 * dropping references to amaps or VM objects (otherwise we could end 1591 * up with a mapping to a page on the free list which would be very bad) 1592 */ 1593 1594 while ((entry != &map->header) && (entry->start < end)) { 1595 1596 UVM_MAP_CLIP_END(map, entry, end); 1597 next = entry->next; 1598 len = entry->end - entry->start; 1599 if (p && entry->object.uvm_obj == NULL) 1600 p->p_vmspace->vm_dused -= atop(len); 1601 1602 /* 1603 * unwire before removing addresses from the pmap; otherwise 1604 * unwiring will put the entries back into the pmap (XXX). 1605 */ 1606 1607 if (VM_MAPENT_ISWIRED(entry)) 1608 uvm_map_entry_unwire(map, entry); 1609 1610 /* 1611 * special case: handle mappings to anonymous kernel objects. 1612 * we want to free these pages right away... 1613 */ 1614 #ifdef KVA_GUARDPAGES 1615 if (map == kernel_map && entry->etype & MAP_ET_KVAGUARD) { 1616 entry->etype &= ~MAP_ET_KVAGUARD; 1617 kva_guardpages--; 1618 } else /* (code continues across line-break) */ 1619 #endif 1620 if (UVM_ET_ISHOLE(entry)) { 1621 if (!remove_holes) { 1622 entry = next; 1623 continue; 1624 } 1625 } else if (map->flags & VM_MAP_INTRSAFE) { 1626 uvm_km_pgremove_intrsafe(entry->start, entry->end); 1627 pmap_kremove(entry->start, len); 1628 } else if (UVM_ET_ISOBJ(entry) && 1629 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 1630 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1631 1632 /* 1633 * note: kernel object mappings are currently used in 1634 * two ways: 1635 * [1] "normal" mappings of pages in the kernel object 1636 * [2] uvm_km_valloc'd allocations in which we 1637 * pmap_enter in some non-kernel-object page 1638 * (e.g. vmapbuf). 1639 * 1640 * for case [1], we need to remove the mapping from 1641 * the pmap and then remove the page from the kernel 1642 * object (because, once pages in a kernel object are 1643 * unmapped they are no longer needed, unlike, say, 1644 * a vnode where you might want the data to persist 1645 * until flushed out of a queue). 1646 * 1647 * for case [2], we need to remove the mapping from 1648 * the pmap. there shouldn't be any pages at the 1649 * specified offset in the kernel object [but it 1650 * doesn't hurt to call uvm_km_pgremove just to be 1651 * safe?] 1652 * 1653 * uvm_km_pgremove currently does the following: 1654 * for pages in the kernel object in range: 1655 * - drops the swap slot 1656 * - uvm_pagefree the page 1657 * 1658 * note there is version of uvm_km_pgremove() that 1659 * is used for "intrsafe" objects. 1660 */ 1661 1662 /* 1663 * remove mappings from pmap and drop the pages 1664 * from the object. offsets are always relative 1665 * to vm_map_min(kernel_map). 1666 */ 1667 pmap_remove(pmap_kernel(), entry->start, entry->end); 1668 uvm_km_pgremove(entry->object.uvm_obj, 1669 entry->start - vm_map_min(kernel_map), 1670 entry->end - vm_map_min(kernel_map)); 1671 1672 /* 1673 * null out kernel_object reference, we've just 1674 * dropped it 1675 */ 1676 entry->etype &= ~UVM_ET_OBJ; 1677 entry->object.uvm_obj = NULL; /* to be safe */ 1678 1679 } else { 1680 /* 1681 * remove mappings the standard way. 1682 */ 1683 pmap_remove(map->pmap, entry->start, entry->end); 1684 } 1685 1686 /* 1687 * remove entry from map and put it on our list of entries 1688 * that we've nuked. then go do next entry. 1689 */ 1690 UVMHIST_LOG(maphist, " removed map entry %p", entry, 0, 0,0); 1691 1692 /* critical! prevents stale hint */ 1693 SAVE_HINT(map, entry, entry->prev); 1694 1695 uvm_map_entry_unlink(map, entry); 1696 map->size -= len; 1697 entry->next = first_entry; 1698 first_entry = entry; 1699 entry = next; /* next entry, please */ 1700 } 1701 #ifdef KVA_GUARDPAGES 1702 /* 1703 * entry points at the map-entry after the last-removed map-entry. 1704 */ 1705 if (map == kernel_map && entry != &map->header && 1706 entry->etype & MAP_ET_KVAGUARD && entry->start == end) { 1707 /* 1708 * Removed range is followed by guard page; 1709 * remove that guard page now (or it will stay forever). 1710 */ 1711 entry->etype &= ~MAP_ET_KVAGUARD; 1712 kva_guardpages--; 1713 1714 uvm_map_entry_unlink(map, entry); 1715 map->size -= len; 1716 entry->next = first_entry; 1717 first_entry = entry; 1718 entry = next; /* next entry, please */ 1719 } 1720 #endif 1721 /* if ((map->flags & VM_MAP_DYING) == 0) { */ 1722 pmap_update(vm_map_pmap(map)); 1723 /* } */ 1724 1725 1726 uvm_tree_sanity(map, "unmap_remove leave"); 1727 1728 /* 1729 * now we've cleaned up the map and are ready for the caller to drop 1730 * references to the mapped objects. 1731 */ 1732 1733 *entry_list = first_entry; 1734 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1735 } 1736 1737 /* 1738 * uvm_unmap_detach: drop references in a chain of map entries 1739 * 1740 * => we will free the map entries as we traverse the list. 1741 */ 1742 1743 void 1744 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) 1745 { 1746 struct vm_map_entry *next_entry; 1747 UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist); 1748 1749 while (first_entry) { 1750 KASSERT(!VM_MAPENT_ISWIRED(first_entry)); 1751 UVMHIST_LOG(maphist, 1752 " detach 0x%lx: amap=%p, obj=%p, submap?=%ld", 1753 first_entry, first_entry->aref.ar_amap, 1754 first_entry->object.uvm_obj, 1755 UVM_ET_ISSUBMAP(first_entry)); 1756 1757 /* 1758 * drop reference to amap, if we've got one 1759 */ 1760 1761 if (first_entry->aref.ar_amap) 1762 uvm_map_unreference_amap(first_entry, flags); 1763 1764 /* 1765 * drop reference to our backing object, if we've got one 1766 */ 1767 1768 if (UVM_ET_ISSUBMAP(first_entry)) { 1769 /* ... unlikely to happen, but play it safe */ 1770 uvm_map_deallocate(first_entry->object.sub_map); 1771 } else { 1772 if (UVM_ET_ISOBJ(first_entry) && 1773 first_entry->object.uvm_obj->pgops->pgo_detach) 1774 first_entry->object.uvm_obj->pgops-> 1775 pgo_detach(first_entry->object.uvm_obj); 1776 } 1777 1778 next_entry = first_entry->next; 1779 uvm_mapent_free(first_entry); 1780 first_entry = next_entry; 1781 } 1782 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 1783 } 1784 1785 /* 1786 * E X T R A C T I O N F U N C T I O N S 1787 */ 1788 1789 /* 1790 * uvm_map_reserve: reserve space in a vm_map for future use. 1791 * 1792 * => we reserve space in a map by putting a dummy map entry in the 1793 * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) 1794 * => map should be unlocked (we will write lock it) 1795 * => we return true if we were able to reserve space 1796 * => XXXCDC: should be inline? 1797 */ 1798 1799 int 1800 uvm_map_reserve(struct vm_map *map, vsize_t size, vaddr_t offset, 1801 vsize_t align, vaddr_t *raddr) 1802 { 1803 UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); 1804 1805 UVMHIST_LOG(maphist, "(map=%p, size=0x%lx, offset=0x%lx,addr=0x%lx)", 1806 map,size,offset,raddr); 1807 1808 size = round_page(size); 1809 if (*raddr < vm_map_min(map)) 1810 *raddr = vm_map_min(map); /* hint */ 1811 1812 /* 1813 * reserve some virtual space. 1814 */ 1815 1816 if (uvm_map(map, raddr, size, NULL, offset, 0, 1817 UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, 1818 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) { 1819 UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); 1820 return (FALSE); 1821 } 1822 1823 UVMHIST_LOG(maphist, "<- done (*raddr=0x%lx)", *raddr,0,0,0); 1824 return (TRUE); 1825 } 1826 1827 /* 1828 * uvm_map_replace: replace a reserved (blank) area of memory with 1829 * real mappings. 1830 * 1831 * => caller must WRITE-LOCK the map 1832 * => we return TRUE if replacement was a success 1833 * => we expect the newents chain to have nnewents entries on it and 1834 * we expect newents->prev to point to the last entry on the list 1835 * => note newents is allowed to be NULL 1836 */ 1837 1838 int 1839 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, 1840 struct vm_map_entry *newents, int nnewents) 1841 { 1842 struct vm_map_entry *oldent, *last; 1843 1844 uvm_tree_sanity(map, "map_replace entry"); 1845 1846 /* 1847 * first find the blank map entry at the specified address 1848 */ 1849 1850 if (!uvm_map_lookup_entry(map, start, &oldent)) { 1851 return(FALSE); 1852 } 1853 1854 /* 1855 * check to make sure we have a proper blank entry 1856 */ 1857 1858 if (oldent->start != start || oldent->end != end || 1859 oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { 1860 return (FALSE); 1861 } 1862 1863 #ifdef DIAGNOSTIC 1864 /* 1865 * sanity check the newents chain 1866 */ 1867 { 1868 struct vm_map_entry *tmpent = newents; 1869 int nent = 0; 1870 vaddr_t cur = start; 1871 1872 while (tmpent) { 1873 nent++; 1874 if (tmpent->start < cur) 1875 panic("uvm_map_replace1"); 1876 if (tmpent->start > tmpent->end || tmpent->end > end) { 1877 printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n", 1878 tmpent->start, tmpent->end, end); 1879 panic("uvm_map_replace2"); 1880 } 1881 cur = tmpent->end; 1882 if (tmpent->next) { 1883 if (tmpent->next->prev != tmpent) 1884 panic("uvm_map_replace3"); 1885 } else { 1886 if (newents->prev != tmpent) 1887 panic("uvm_map_replace4"); 1888 } 1889 tmpent = tmpent->next; 1890 } 1891 if (nent != nnewents) 1892 panic("uvm_map_replace5"); 1893 } 1894 #endif 1895 1896 /* 1897 * map entry is a valid blank! replace it. (this does all the 1898 * work of map entry link/unlink...). 1899 */ 1900 1901 if (newents) { 1902 last = newents->prev; /* we expect this */ 1903 1904 /* critical: flush stale hints out of map */ 1905 SAVE_HINT(map, map->hint, newents); 1906 if (map->first_free == oldent) 1907 map->first_free = last; 1908 1909 last->next = oldent->next; 1910 last->next->prev = last; 1911 1912 /* Fix RB tree */ 1913 uvm_rb_remove(map, oldent); 1914 1915 newents->prev = oldent->prev; 1916 newents->prev->next = newents; 1917 map->nentries = map->nentries + (nnewents - 1); 1918 1919 /* Fixup the RB tree */ 1920 { 1921 int i; 1922 struct vm_map_entry *tmp; 1923 1924 tmp = newents; 1925 for (i = 0; i < nnewents && tmp; i++) { 1926 uvm_rb_insert(map, tmp); 1927 tmp = tmp->next; 1928 } 1929 } 1930 } else { 1931 1932 /* critical: flush stale hints out of map */ 1933 SAVE_HINT(map, map->hint, oldent->prev); 1934 if (map->first_free == oldent) 1935 map->first_free = oldent->prev; 1936 1937 /* NULL list of new entries: just remove the old one */ 1938 uvm_map_entry_unlink(map, oldent); 1939 } 1940 1941 1942 uvm_tree_sanity(map, "map_replace leave"); 1943 1944 /* 1945 * now we can free the old blank entry, unlock the map and return. 1946 */ 1947 1948 uvm_mapent_free(oldent); 1949 return(TRUE); 1950 } 1951 1952 /* 1953 * uvm_map_extract: extract a mapping from a map and put it somewhere 1954 * (maybe removing the old mapping) 1955 * 1956 * => maps should be unlocked (we will write lock them) 1957 * => returns 0 on success, error code otherwise 1958 * => start must be page aligned 1959 * => len must be page sized 1960 * => flags: 1961 * UVM_EXTRACT_REMOVE: remove mappings from srcmap 1962 * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) 1963 * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs 1964 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 1965 * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< 1966 * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only 1967 * be used from within the kernel in a kernel level map <<< 1968 */ 1969 1970 int 1971 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 1972 struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) 1973 { 1974 vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge, 1975 oldstart; 1976 struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry; 1977 struct vm_map_entry *deadentry, *oldentry; 1978 vsize_t elen; 1979 int nchain, error, copy_ok; 1980 UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist); 1981 1982 UVMHIST_LOG(maphist,"(srcmap=%p,start=0x%lx, len=0x%lx", srcmap, start, 1983 len,0); 1984 UVMHIST_LOG(maphist," ...,dstmap=%p, flags=0x%lx)", dstmap,flags,0,0); 1985 1986 uvm_tree_sanity(srcmap, "map_extract src enter"); 1987 uvm_tree_sanity(dstmap, "map_extract dst enter"); 1988 1989 /* 1990 * step 0: sanity check: start must be on a page boundary, length 1991 * must be page sized. can't ask for CONTIG/QREF if you asked for 1992 * REMOVE. 1993 */ 1994 1995 KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0); 1996 KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || 1997 (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); 1998 1999 /* 2000 * step 1: reserve space in the target map for the extracted area 2001 */ 2002 2003 dstaddr = vm_map_min(dstmap); 2004 if (uvm_map_reserve(dstmap, len, start, 0, &dstaddr) == FALSE) 2005 return(ENOMEM); 2006 *dstaddrp = dstaddr; /* pass address back to caller */ 2007 UVMHIST_LOG(maphist, " dstaddr=0x%lx", dstaddr,0,0,0); 2008 2009 /* 2010 * step 2: setup for the extraction process loop by init'ing the 2011 * map entry chain, locking src map, and looking up the first useful 2012 * entry in the map. 2013 */ 2014 2015 end = start + len; 2016 newend = dstaddr + len; 2017 chain = endchain = NULL; 2018 nchain = 0; 2019 vm_map_lock(srcmap); 2020 2021 if (uvm_map_lookup_entry(srcmap, start, &entry)) { 2022 2023 /* "start" is within an entry */ 2024 if (flags & UVM_EXTRACT_QREF) { 2025 2026 /* 2027 * for quick references we don't clip the entry, so 2028 * the entry may map space "before" the starting 2029 * virtual address... this is the "fudge" factor 2030 * (which can be non-zero only the first time 2031 * through the "while" loop in step 3). 2032 */ 2033 2034 fudge = start - entry->start; 2035 } else { 2036 2037 /* 2038 * normal reference: we clip the map to fit (thus 2039 * fudge is zero) 2040 */ 2041 2042 UVM_MAP_CLIP_START(srcmap, entry, start); 2043 SAVE_HINT(srcmap, srcmap->hint, entry->prev); 2044 fudge = 0; 2045 } 2046 } else { 2047 2048 /* "start" is not within an entry ... skip to next entry */ 2049 if (flags & UVM_EXTRACT_CONTIG) { 2050 error = EINVAL; 2051 goto bad; /* definite hole here ... */ 2052 } 2053 2054 entry = entry->next; 2055 fudge = 0; 2056 } 2057 2058 /* save values from srcmap for step 6 */ 2059 orig_entry = entry; 2060 orig_fudge = fudge; 2061 2062 /* 2063 * step 3: now start looping through the map entries, extracting 2064 * as we go. 2065 */ 2066 2067 while (entry->start < end && entry != &srcmap->header) { 2068 2069 /* if we are not doing a quick reference, clip it */ 2070 if ((flags & UVM_EXTRACT_QREF) == 0) 2071 UVM_MAP_CLIP_END(srcmap, entry, end); 2072 2073 /* clear needs_copy (allow chunking) */ 2074 if (UVM_ET_ISNEEDSCOPY(entry)) { 2075 if (fudge) 2076 oldstart = entry->start; 2077 else 2078 oldstart = 0; /* XXX: gcc */ 2079 amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); 2080 if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ 2081 error = ENOMEM; 2082 goto bad; 2083 } 2084 2085 /* amap_copy could clip (during chunk)! update fudge */ 2086 if (fudge) { 2087 fudge = fudge - (entry->start - oldstart); 2088 orig_fudge = fudge; 2089 } 2090 } 2091 2092 /* calculate the offset of this from "start" */ 2093 oldoffset = (entry->start + fudge) - start; 2094 2095 /* allocate a new map entry */ 2096 newentry = uvm_mapent_alloc(dstmap); 2097 if (newentry == NULL) { 2098 error = ENOMEM; 2099 goto bad; 2100 } 2101 2102 /* set up new map entry */ 2103 newentry->next = NULL; 2104 newentry->prev = endchain; 2105 newentry->start = dstaddr + oldoffset; 2106 newentry->end = 2107 newentry->start + (entry->end - (entry->start + fudge)); 2108 if (newentry->end > newend || newentry->end < newentry->start) 2109 newentry->end = newend; 2110 newentry->object.uvm_obj = entry->object.uvm_obj; 2111 if (newentry->object.uvm_obj) { 2112 if (newentry->object.uvm_obj->pgops->pgo_reference) 2113 newentry->object.uvm_obj->pgops-> 2114 pgo_reference(newentry->object.uvm_obj); 2115 newentry->offset = entry->offset + fudge; 2116 } else { 2117 newentry->offset = 0; 2118 } 2119 newentry->etype = entry->etype; 2120 newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? 2121 entry->max_protection : entry->protection; 2122 newentry->max_protection = entry->max_protection; 2123 newentry->inheritance = entry->inheritance; 2124 newentry->wired_count = 0; 2125 newentry->aref.ar_amap = entry->aref.ar_amap; 2126 if (newentry->aref.ar_amap) { 2127 newentry->aref.ar_pageoff = 2128 entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); 2129 uvm_map_reference_amap(newentry, AMAP_SHARED | 2130 ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); 2131 } else { 2132 newentry->aref.ar_pageoff = 0; 2133 } 2134 newentry->advice = entry->advice; 2135 2136 /* now link it on the chain */ 2137 nchain++; 2138 if (endchain == NULL) { 2139 chain = endchain = newentry; 2140 } else { 2141 endchain->next = newentry; 2142 endchain = newentry; 2143 } 2144 2145 /* end of 'while' loop! */ 2146 if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && 2147 (entry->next == &srcmap->header || 2148 entry->next->start != entry->end)) { 2149 error = EINVAL; 2150 goto bad; 2151 } 2152 entry = entry->next; 2153 fudge = 0; 2154 } 2155 2156 /* 2157 * step 4: close off chain (in format expected by uvm_map_replace) 2158 */ 2159 2160 if (chain) 2161 chain->prev = endchain; 2162 2163 /* 2164 * step 5: attempt to lock the dest map so we can pmap_copy. 2165 * note usage of copy_ok: 2166 * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) 2167 * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 2168 */ 2169 2170 if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) { 2171 copy_ok = 1; 2172 if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2173 nchain)) { 2174 if (srcmap != dstmap) 2175 vm_map_unlock(dstmap); 2176 error = EIO; 2177 goto bad; 2178 } 2179 } else { 2180 copy_ok = 0; 2181 /* replace defered until step 7 */ 2182 } 2183 2184 /* 2185 * step 6: traverse the srcmap a second time to do the following: 2186 * - if we got a lock on the dstmap do pmap_copy 2187 * - if UVM_EXTRACT_REMOVE remove the entries 2188 * we make use of orig_entry and orig_fudge (saved in step 2) 2189 */ 2190 2191 if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { 2192 2193 /* purge possible stale hints from srcmap */ 2194 if (flags & UVM_EXTRACT_REMOVE) { 2195 SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); 2196 if (srcmap->first_free->start >= start) 2197 srcmap->first_free = orig_entry->prev; 2198 } 2199 2200 entry = orig_entry; 2201 fudge = orig_fudge; 2202 deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ 2203 2204 while (entry->start < end && entry != &srcmap->header) { 2205 if (copy_ok) { 2206 oldoffset = (entry->start + fudge) - start; 2207 elen = MIN(end, entry->end) - 2208 (entry->start + fudge); 2209 pmap_copy(dstmap->pmap, srcmap->pmap, 2210 dstaddr + oldoffset, elen, 2211 entry->start + fudge); 2212 } 2213 2214 /* we advance "entry" in the following if statement */ 2215 if (flags & UVM_EXTRACT_REMOVE) { 2216 pmap_remove(srcmap->pmap, entry->start, 2217 entry->end); 2218 oldentry = entry; /* save entry */ 2219 entry = entry->next; /* advance */ 2220 uvm_map_entry_unlink(srcmap, oldentry); 2221 /* add to dead list */ 2222 oldentry->next = deadentry; 2223 deadentry = oldentry; 2224 } else { 2225 entry = entry->next; /* advance */ 2226 } 2227 2228 /* end of 'while' loop */ 2229 fudge = 0; 2230 } 2231 pmap_update(srcmap->pmap); 2232 2233 /* 2234 * unlock dstmap. we will dispose of deadentry in 2235 * step 7 if needed 2236 */ 2237 2238 if (copy_ok && srcmap != dstmap) 2239 vm_map_unlock(dstmap); 2240 2241 } 2242 else 2243 deadentry = NULL; /* XXX: gcc */ 2244 2245 /* 2246 * step 7: we are done with the source map, unlock. if copy_ok 2247 * is 0 then we have not replaced the dummy mapping in dstmap yet 2248 * and we need to do so now. 2249 */ 2250 2251 vm_map_unlock(srcmap); 2252 if ((flags & UVM_EXTRACT_REMOVE) && deadentry) 2253 uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ 2254 2255 /* now do the replacement if we didn't do it in step 5 */ 2256 if (copy_ok == 0) { 2257 vm_map_lock(dstmap); 2258 error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2259 nchain); 2260 vm_map_unlock(dstmap); 2261 2262 if (error == FALSE) { 2263 error = EIO; 2264 goto bad2; 2265 } 2266 } 2267 2268 uvm_tree_sanity(srcmap, "map_extract src leave"); 2269 uvm_tree_sanity(dstmap, "map_extract dst leave"); 2270 2271 return(0); 2272 2273 /* 2274 * bad: failure recovery 2275 */ 2276 bad: 2277 vm_map_unlock(srcmap); 2278 bad2: /* src already unlocked */ 2279 if (chain) 2280 uvm_unmap_detach(chain, 2281 (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); 2282 2283 uvm_tree_sanity(srcmap, "map_extract src err leave"); 2284 uvm_tree_sanity(dstmap, "map_extract dst err leave"); 2285 2286 uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ 2287 return(error); 2288 } 2289 2290 /* end of extraction functions */ 2291 2292 /* 2293 * uvm_map_submap: punch down part of a map into a submap 2294 * 2295 * => only the kernel_map is allowed to be submapped 2296 * => the purpose of submapping is to break up the locking granularity 2297 * of a larger map 2298 * => the range specified must have been mapped previously with a uvm_map() 2299 * call [with uobj==NULL] to create a blank map entry in the main map. 2300 * [And it had better still be blank!] 2301 * => maps which contain submaps should never be copied or forked. 2302 * => to remove a submap, use uvm_unmap() on the main map 2303 * and then uvm_map_deallocate() the submap. 2304 * => main map must be unlocked. 2305 * => submap must have been init'd and have a zero reference count. 2306 * [need not be locked as we don't actually reference it] 2307 */ 2308 2309 int 2310 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 2311 struct vm_map *submap) 2312 { 2313 struct vm_map_entry *entry; 2314 int result; 2315 2316 vm_map_lock(map); 2317 2318 VM_MAP_RANGE_CHECK(map, start, end); 2319 2320 if (uvm_map_lookup_entry(map, start, &entry)) { 2321 UVM_MAP_CLIP_START(map, entry, start); 2322 UVM_MAP_CLIP_END(map, entry, end); /* to be safe */ 2323 } else { 2324 entry = NULL; 2325 } 2326 2327 if (entry != NULL && 2328 entry->start == start && entry->end == end && 2329 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 2330 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 2331 entry->etype |= UVM_ET_SUBMAP; 2332 entry->object.sub_map = submap; 2333 entry->offset = 0; 2334 uvm_map_reference(submap); 2335 result = 0; 2336 } else { 2337 result = EINVAL; 2338 } 2339 vm_map_unlock(map); 2340 return(result); 2341 } 2342 2343 2344 /* 2345 * uvm_map_protect: change map protection 2346 * 2347 * => set_max means set max_protection. 2348 * => map must be unlocked. 2349 */ 2350 2351 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 2352 ~VM_PROT_WRITE : VM_PROT_ALL) 2353 #define max(a,b) ((a) > (b) ? (a) : (b)) 2354 2355 int 2356 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 2357 vm_prot_t new_prot, boolean_t set_max) 2358 { 2359 struct vm_map_entry *current, *entry; 2360 int error = 0; 2361 UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist); 2362 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_prot=0x%lx)", 2363 map, start, end, new_prot); 2364 2365 vm_map_lock(map); 2366 2367 VM_MAP_RANGE_CHECK(map, start, end); 2368 2369 if (uvm_map_lookup_entry(map, start, &entry)) { 2370 UVM_MAP_CLIP_START(map, entry, start); 2371 } else { 2372 entry = entry->next; 2373 } 2374 2375 /* 2376 * make a first pass to check for protection violations. 2377 */ 2378 2379 current = entry; 2380 while ((current != &map->header) && (current->start < end)) { 2381 if (UVM_ET_ISSUBMAP(current)) { 2382 error = EINVAL; 2383 goto out; 2384 } 2385 if ((new_prot & current->max_protection) != new_prot) { 2386 error = EACCES; 2387 goto out; 2388 } 2389 current = current->next; 2390 } 2391 2392 /* go back and fix up protections (no need to clip this time). */ 2393 2394 current = entry; 2395 2396 while ((current != &map->header) && (current->start < end)) { 2397 vm_prot_t old_prot; 2398 2399 UVM_MAP_CLIP_END(map, current, end); 2400 2401 old_prot = current->protection; 2402 if (set_max) 2403 current->protection = 2404 (current->max_protection = new_prot) & old_prot; 2405 else 2406 current->protection = new_prot; 2407 2408 /* 2409 * update physical map if necessary. worry about copy-on-write 2410 * here -- CHECK THIS XXX 2411 */ 2412 2413 if (current->protection != old_prot) { 2414 /* update pmap! */ 2415 if ((current->protection & MASK(entry)) == PROT_NONE && 2416 VM_MAPENT_ISWIRED(entry)) 2417 current->wired_count--; 2418 pmap_protect(map->pmap, current->start, current->end, 2419 current->protection & MASK(entry)); 2420 } 2421 2422 /* 2423 * If the map is configured to lock any future mappings, 2424 * wire this entry now if the old protection was VM_PROT_NONE 2425 * and the new protection is not VM_PROT_NONE. 2426 */ 2427 2428 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 2429 VM_MAPENT_ISWIRED(entry) == 0 && 2430 old_prot == VM_PROT_NONE && 2431 new_prot != VM_PROT_NONE) { 2432 if (uvm_map_pageable(map, entry->start, entry->end, 2433 FALSE, UVM_LK_ENTER|UVM_LK_EXIT) != 0) { 2434 /* 2435 * If locking the entry fails, remember the 2436 * error if it's the first one. Note we 2437 * still continue setting the protection in 2438 * the map, but will return the resource 2439 * shortage condition regardless. 2440 * 2441 * XXX Ignore what the actual error is, 2442 * XXX just call it a resource shortage 2443 * XXX so that it doesn't get confused 2444 * XXX what uvm_map_protect() itself would 2445 * XXX normally return. 2446 */ 2447 error = ENOMEM; 2448 } 2449 } 2450 2451 current = current->next; 2452 } 2453 pmap_update(map->pmap); 2454 2455 out: 2456 vm_map_unlock(map); 2457 UVMHIST_LOG(maphist, "<- done, rv=%ld",error,0,0,0); 2458 return (error); 2459 } 2460 2461 #undef max 2462 #undef MASK 2463 2464 /* 2465 * uvm_map_inherit: set inheritance code for range of addrs in map. 2466 * 2467 * => map must be unlocked 2468 * => note that the inherit code is used during a "fork". see fork 2469 * code for details. 2470 */ 2471 2472 int 2473 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 2474 vm_inherit_t new_inheritance) 2475 { 2476 struct vm_map_entry *entry, *temp_entry; 2477 UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist); 2478 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_inh=0x%lx)", 2479 map, start, end, new_inheritance); 2480 2481 switch (new_inheritance) { 2482 case MAP_INHERIT_NONE: 2483 case MAP_INHERIT_COPY: 2484 case MAP_INHERIT_SHARE: 2485 break; 2486 default: 2487 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 2488 return (EINVAL); 2489 } 2490 2491 vm_map_lock(map); 2492 2493 VM_MAP_RANGE_CHECK(map, start, end); 2494 2495 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 2496 entry = temp_entry; 2497 UVM_MAP_CLIP_START(map, entry, start); 2498 } else { 2499 entry = temp_entry->next; 2500 } 2501 2502 while ((entry != &map->header) && (entry->start < end)) { 2503 UVM_MAP_CLIP_END(map, entry, end); 2504 entry->inheritance = new_inheritance; 2505 entry = entry->next; 2506 } 2507 2508 vm_map_unlock(map); 2509 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 2510 return (0); 2511 } 2512 2513 /* 2514 * uvm_map_advice: set advice code for range of addrs in map. 2515 * 2516 * => map must be unlocked 2517 */ 2518 2519 int 2520 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 2521 { 2522 struct vm_map_entry *entry, *temp_entry; 2523 UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist); 2524 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_adv=0x%lx)", 2525 map, start, end, new_advice); 2526 2527 vm_map_lock(map); 2528 VM_MAP_RANGE_CHECK(map, start, end); 2529 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 2530 entry = temp_entry; 2531 UVM_MAP_CLIP_START(map, entry, start); 2532 } else { 2533 entry = temp_entry->next; 2534 } 2535 2536 /* 2537 * XXXJRT: disallow holes? 2538 */ 2539 2540 while ((entry != &map->header) && (entry->start < end)) { 2541 UVM_MAP_CLIP_END(map, entry, end); 2542 2543 switch (new_advice) { 2544 case MADV_NORMAL: 2545 case MADV_RANDOM: 2546 case MADV_SEQUENTIAL: 2547 /* nothing special here */ 2548 break; 2549 2550 default: 2551 vm_map_unlock(map); 2552 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 2553 return (EINVAL); 2554 } 2555 entry->advice = new_advice; 2556 entry = entry->next; 2557 } 2558 2559 vm_map_unlock(map); 2560 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 2561 return (0); 2562 } 2563 2564 /* 2565 * uvm_map_pageable: sets the pageability of a range in a map. 2566 * 2567 * => wires map entries. should not be used for transient page locking. 2568 * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). 2569 * => regions sepcified as not pageable require lock-down (wired) memory 2570 * and page tables. 2571 * => map must never be read-locked 2572 * => if islocked is TRUE, map is already write-locked 2573 * => we always unlock the map, since we must downgrade to a read-lock 2574 * to call uvm_fault_wire() 2575 * => XXXCDC: check this and try and clean it up. 2576 */ 2577 2578 int 2579 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2580 boolean_t new_pageable, int lockflags) 2581 { 2582 struct vm_map_entry *entry, *start_entry, *failed_entry; 2583 int rv; 2584 #ifdef DIAGNOSTIC 2585 u_int timestamp_save; 2586 #endif 2587 UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist); 2588 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,new_pageable=0x%lx)", 2589 map, start, end, new_pageable); 2590 KASSERT(map->flags & VM_MAP_PAGEABLE); 2591 2592 if ((lockflags & UVM_LK_ENTER) == 0) 2593 vm_map_lock(map); 2594 2595 VM_MAP_RANGE_CHECK(map, start, end); 2596 2597 /* 2598 * only one pageability change may take place at one time, since 2599 * uvm_fault_wire assumes it will be called only once for each 2600 * wiring/unwiring. therefore, we have to make sure we're actually 2601 * changing the pageability for the entire region. we do so before 2602 * making any changes. 2603 */ 2604 2605 if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) { 2606 if ((lockflags & UVM_LK_EXIT) == 0) 2607 vm_map_unlock(map); 2608 2609 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 2610 return (EFAULT); 2611 } 2612 entry = start_entry; 2613 2614 /* 2615 * handle wiring and unwiring separately. 2616 */ 2617 2618 if (new_pageable) { /* unwire */ 2619 UVM_MAP_CLIP_START(map, entry, start); 2620 2621 /* 2622 * unwiring. first ensure that the range to be unwired is 2623 * really wired down and that there are no holes. 2624 */ 2625 2626 while ((entry != &map->header) && (entry->start < end)) { 2627 if (entry->wired_count == 0 || 2628 (entry->end < end && 2629 (entry->next == &map->header || 2630 entry->next->start > entry->end))) { 2631 if ((lockflags & UVM_LK_EXIT) == 0) 2632 vm_map_unlock(map); 2633 UVMHIST_LOG(maphist, 2634 "<- done (INVALID UNWIRE ARG)",0,0,0,0); 2635 return (EINVAL); 2636 } 2637 entry = entry->next; 2638 } 2639 2640 /* 2641 * POSIX 1003.1b - a single munlock call unlocks a region, 2642 * regardless of the number of mlock calls made on that 2643 * region. 2644 */ 2645 2646 entry = start_entry; 2647 while ((entry != &map->header) && (entry->start < end)) { 2648 UVM_MAP_CLIP_END(map, entry, end); 2649 if (VM_MAPENT_ISWIRED(entry)) 2650 uvm_map_entry_unwire(map, entry); 2651 entry = entry->next; 2652 } 2653 if ((lockflags & UVM_LK_EXIT) == 0) 2654 vm_map_unlock(map); 2655 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 2656 return (0); 2657 } 2658 2659 /* 2660 * wire case: in two passes [XXXCDC: ugly block of code here] 2661 * 2662 * 1: holding the write lock, we create any anonymous maps that need 2663 * to be created. then we clip each map entry to the region to 2664 * be wired and increment its wiring count. 2665 * 2666 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2667 * in the pages for any newly wired area (wired_count == 1). 2668 * 2669 * downgrading to a read lock for uvm_fault_wire avoids a possible 2670 * deadlock with another thread that may have faulted on one of 2671 * the pages to be wired (it would mark the page busy, blocking 2672 * us, then in turn block on the map lock that we hold). because 2673 * of problems in the recursive lock package, we cannot upgrade 2674 * to a write lock in vm_map_lookup. thus, any actions that 2675 * require the write lock must be done beforehand. because we 2676 * keep the read lock on the map, the copy-on-write status of the 2677 * entries we modify here cannot change. 2678 */ 2679 2680 while ((entry != &map->header) && (entry->start < end)) { 2681 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 2682 2683 /* 2684 * perform actions of vm_map_lookup that need the 2685 * write lock on the map: create an anonymous map 2686 * for a copy-on-write region, or an anonymous map 2687 * for a zero-fill region. (XXXCDC: submap case 2688 * ok?) 2689 */ 2690 2691 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 2692 if (UVM_ET_ISNEEDSCOPY(entry) && 2693 ((entry->protection & VM_PROT_WRITE) || 2694 (entry->object.uvm_obj == NULL))) { 2695 amap_copy(map, entry, M_WAITOK, TRUE, 2696 start, end); 2697 /* XXXCDC: wait OK? */ 2698 } 2699 } 2700 } 2701 UVM_MAP_CLIP_START(map, entry, start); 2702 UVM_MAP_CLIP_END(map, entry, end); 2703 entry->wired_count++; 2704 2705 /* 2706 * Check for holes 2707 */ 2708 2709 if (entry->protection == VM_PROT_NONE || 2710 (entry->end < end && 2711 (entry->next == &map->header || 2712 entry->next->start > entry->end))) { 2713 2714 /* 2715 * found one. amap creation actions do not need to 2716 * be undone, but the wired counts need to be restored. 2717 */ 2718 2719 while (entry != &map->header && entry->end > start) { 2720 entry->wired_count--; 2721 entry = entry->prev; 2722 } 2723 if ((lockflags & UVM_LK_EXIT) == 0) 2724 vm_map_unlock(map); 2725 UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); 2726 return (EINVAL); 2727 } 2728 entry = entry->next; 2729 } 2730 2731 /* 2732 * Pass 2. 2733 */ 2734 2735 #ifdef DIAGNOSTIC 2736 timestamp_save = map->timestamp; 2737 #endif 2738 vm_map_busy(map); 2739 vm_map_downgrade(map); 2740 2741 rv = 0; 2742 entry = start_entry; 2743 while (entry != &map->header && entry->start < end) { 2744 if (entry->wired_count == 1) { 2745 rv = uvm_fault_wire(map, entry->start, entry->end, 2746 entry->protection); 2747 if (rv) { 2748 /* 2749 * wiring failed. break out of the loop. 2750 * we'll clean up the map below, once we 2751 * have a write lock again. 2752 */ 2753 break; 2754 } 2755 } 2756 entry = entry->next; 2757 } 2758 2759 if (rv) { /* failed? */ 2760 2761 /* 2762 * Get back to an exclusive (write) lock. 2763 */ 2764 2765 vm_map_upgrade(map); 2766 vm_map_unbusy(map); 2767 2768 #ifdef DIAGNOSTIC 2769 if (timestamp_save != map->timestamp) 2770 panic("uvm_map_pageable: stale map"); 2771 #endif 2772 2773 /* 2774 * first drop the wiring count on all the entries 2775 * which haven't actually been wired yet. 2776 */ 2777 2778 failed_entry = entry; 2779 while (entry != &map->header && entry->start < end) { 2780 entry->wired_count--; 2781 entry = entry->next; 2782 } 2783 2784 /* 2785 * now, unwire all the entries that were successfully 2786 * wired above. 2787 */ 2788 2789 entry = start_entry; 2790 while (entry != failed_entry) { 2791 entry->wired_count--; 2792 if (VM_MAPENT_ISWIRED(entry) == 0) 2793 uvm_map_entry_unwire(map, entry); 2794 entry = entry->next; 2795 } 2796 if ((lockflags & UVM_LK_EXIT) == 0) 2797 vm_map_unlock(map); 2798 UVMHIST_LOG(maphist, "<- done (RV=%ld)", rv,0,0,0); 2799 return(rv); 2800 } 2801 2802 /* We are holding a read lock here. */ 2803 if ((lockflags & UVM_LK_EXIT) == 0) { 2804 vm_map_unbusy(map); 2805 vm_map_unlock_read(map); 2806 } else { 2807 2808 /* 2809 * Get back to an exclusive (write) lock. 2810 */ 2811 2812 vm_map_upgrade(map); 2813 vm_map_unbusy(map); 2814 } 2815 2816 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 2817 return (0); 2818 } 2819 2820 /* 2821 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2822 * all mapped regions. 2823 * 2824 * => map must not be locked. 2825 * => if no flags are specified, all regions are unwired. 2826 * => XXXJRT: has some of the same problems as uvm_map_pageable() above. 2827 */ 2828 2829 int 2830 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2831 { 2832 struct vm_map_entry *entry, *failed_entry; 2833 vsize_t size; 2834 int error; 2835 #ifdef DIAGNOSTIC 2836 u_int timestamp_save; 2837 #endif 2838 UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist); 2839 UVMHIST_LOG(maphist,"(map=%p,flags=0x%lx)", map, flags, 0, 0); 2840 2841 KASSERT(map->flags & VM_MAP_PAGEABLE); 2842 2843 vm_map_lock(map); 2844 2845 /* 2846 * handle wiring and unwiring separately. 2847 */ 2848 2849 if (flags == 0) { /* unwire */ 2850 /* 2851 * POSIX 1003.1b -- munlockall unlocks all regions, 2852 * regardless of how many times mlockall has been called. 2853 */ 2854 for (entry = map->header.next; entry != &map->header; 2855 entry = entry->next) { 2856 if (VM_MAPENT_ISWIRED(entry)) 2857 uvm_map_entry_unwire(map, entry); 2858 } 2859 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2860 vm_map_unlock(map); 2861 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 2862 return (0); 2863 2864 /* 2865 * end of unwire case! 2866 */ 2867 } 2868 2869 if (flags & MCL_FUTURE) { 2870 /* 2871 * must wire all future mappings; remember this. 2872 */ 2873 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2874 } 2875 2876 if ((flags & MCL_CURRENT) == 0) { 2877 /* 2878 * no more work to do! 2879 */ 2880 UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); 2881 vm_map_unlock(map); 2882 return (0); 2883 } 2884 2885 /* 2886 * wire case: in three passes [XXXCDC: ugly block of code here] 2887 * 2888 * 1: holding the write lock, count all pages mapped by non-wired 2889 * entries. if this would cause us to go over our limit, we fail. 2890 * 2891 * 2: still holding the write lock, we create any anonymous maps that 2892 * need to be created. then we increment its wiring count. 2893 * 2894 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault 2895 * in the pages for any newly wired area (wired_count == 1). 2896 * 2897 * downgrading to a read lock for uvm_fault_wire avoids a possible 2898 * deadlock with another thread that may have faulted on one of 2899 * the pages to be wired (it would mark the page busy, blocking 2900 * us, then in turn block on the map lock that we hold). because 2901 * of problems in the recursive lock package, we cannot upgrade 2902 * to a write lock in vm_map_lookup. thus, any actions that 2903 * require the write lock must be done beforehand. because we 2904 * keep the read lock on the map, the copy-on-write status of the 2905 * entries we modify here cannot change. 2906 */ 2907 2908 for (size = 0, entry = map->header.next; entry != &map->header; 2909 entry = entry->next) { 2910 if (entry->protection != VM_PROT_NONE && 2911 VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 2912 size += entry->end - entry->start; 2913 } 2914 } 2915 2916 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2917 vm_map_unlock(map); 2918 return (ENOMEM); /* XXX overloaded */ 2919 } 2920 2921 /* XXX non-pmap_wired_count case must be handled by caller */ 2922 #ifdef pmap_wired_count 2923 if (limit != 0 && 2924 (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { 2925 vm_map_unlock(map); 2926 return (ENOMEM); /* XXX overloaded */ 2927 } 2928 #endif 2929 2930 /* 2931 * Pass 2. 2932 */ 2933 2934 for (entry = map->header.next; entry != &map->header; 2935 entry = entry->next) { 2936 if (entry->protection == VM_PROT_NONE) 2937 continue; 2938 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 2939 /* 2940 * perform actions of vm_map_lookup that need the 2941 * write lock on the map: create an anonymous map 2942 * for a copy-on-write region, or an anonymous map 2943 * for a zero-fill region. (XXXCDC: submap case 2944 * ok?) 2945 */ 2946 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 2947 if (UVM_ET_ISNEEDSCOPY(entry) && 2948 ((entry->protection & VM_PROT_WRITE) || 2949 (entry->object.uvm_obj == NULL))) { 2950 amap_copy(map, entry, M_WAITOK, TRUE, 2951 entry->start, entry->end); 2952 /* XXXCDC: wait OK? */ 2953 } 2954 } 2955 } 2956 entry->wired_count++; 2957 } 2958 2959 /* 2960 * Pass 3. 2961 */ 2962 2963 #ifdef DIAGNOSTIC 2964 timestamp_save = map->timestamp; 2965 #endif 2966 vm_map_busy(map); 2967 vm_map_downgrade(map); 2968 2969 for (error = 0, entry = map->header.next; 2970 entry != &map->header && error == 0; 2971 entry = entry->next) { 2972 if (entry->wired_count == 1) { 2973 error = uvm_fault_wire(map, entry->start, entry->end, 2974 entry->protection); 2975 } 2976 } 2977 2978 if (error) { /* failed? */ 2979 /* 2980 * Get back an exclusive (write) lock. 2981 */ 2982 vm_map_upgrade(map); 2983 vm_map_unbusy(map); 2984 2985 #ifdef DIAGNOSTIC 2986 if (timestamp_save != map->timestamp) 2987 panic("uvm_map_pageable_all: stale map"); 2988 #endif 2989 2990 /* 2991 * first drop the wiring count on all the entries 2992 * which haven't actually been wired yet. 2993 * 2994 * Skip VM_PROT_NONE entries like we did above. 2995 */ 2996 failed_entry = entry; 2997 for (/* nothing */; entry != &map->header; 2998 entry = entry->next) { 2999 if (entry->protection == VM_PROT_NONE) 3000 continue; 3001 entry->wired_count--; 3002 } 3003 3004 /* 3005 * now, unwire all the entries that were successfully 3006 * wired above. 3007 * 3008 * Skip VM_PROT_NONE entries like we did above. 3009 */ 3010 for (entry = map->header.next; entry != failed_entry; 3011 entry = entry->next) { 3012 if (entry->protection == VM_PROT_NONE) 3013 continue; 3014 entry->wired_count--; 3015 if (VM_MAPENT_ISWIRED(entry)) 3016 uvm_map_entry_unwire(map, entry); 3017 } 3018 vm_map_unlock(map); 3019 UVMHIST_LOG(maphist,"<- done (RV=%ld)", error,0,0,0); 3020 return (error); 3021 } 3022 3023 /* We are holding a read lock here. */ 3024 vm_map_unbusy(map); 3025 vm_map_unlock_read(map); 3026 3027 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3028 return (0); 3029 } 3030 3031 /* 3032 * uvm_map_clean: clean out a map range 3033 * 3034 * => valid flags: 3035 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 3036 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 3037 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 3038 * if (flags & PGO_FREE): any cached pages are freed after clean 3039 * => returns an error if any part of the specified range isn't mapped 3040 * => never a need to flush amap layer since the anonymous memory has 3041 * no permanent home, but may deactivate pages there 3042 * => called from sys_msync() and sys_madvise() 3043 * => caller must not write-lock map (read OK). 3044 * => we may sleep while cleaning if SYNCIO [with map read-locked] 3045 */ 3046 3047 int amap_clean_works = 1; /* XXX for now, just in case... */ 3048 3049 int 3050 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 3051 { 3052 struct vm_map_entry *current, *entry; 3053 struct uvm_object *uobj; 3054 struct vm_amap *amap; 3055 struct vm_anon *anon; 3056 struct vm_page *pg; 3057 vaddr_t offset; 3058 vsize_t size; 3059 int rv, error, refs; 3060 UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist); 3061 3062 UVMHIST_LOG(maphist,"(map=%p,start=0x%lx,end=0x%lx,flags=0x%lx)", 3063 map, start, end, flags); 3064 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 3065 (PGO_FREE|PGO_DEACTIVATE)); 3066 3067 vm_map_lock_read(map); 3068 VM_MAP_RANGE_CHECK(map, start, end); 3069 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { 3070 vm_map_unlock_read(map); 3071 return (EFAULT); 3072 } 3073 3074 /* 3075 * Make a first pass to check for holes. 3076 */ 3077 3078 for (current = entry; current->start < end; current = current->next) { 3079 if (UVM_ET_ISSUBMAP(current)) { 3080 vm_map_unlock_read(map); 3081 return (EINVAL); 3082 } 3083 if (end > current->end && (current->next == &map->header || 3084 current->end != current->next->start)) { 3085 vm_map_unlock_read(map); 3086 return (EFAULT); 3087 } 3088 } 3089 3090 error = 0; 3091 3092 for (current = entry; current->start < end; current = current->next) { 3093 amap = current->aref.ar_amap; /* top layer */ 3094 uobj = current->object.uvm_obj; /* bottom layer */ 3095 KASSERT(start >= current->start); 3096 3097 /* 3098 * No amap cleaning necessary if: 3099 * 3100 * (1) There's no amap. 3101 * 3102 * (2) We're not deactivating or freeing pages. 3103 */ 3104 3105 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 3106 goto flush_object; 3107 3108 /* XXX for now, just in case... */ 3109 if (amap_clean_works == 0) 3110 goto flush_object; 3111 3112 offset = start - current->start; 3113 size = MIN(end, current->end) - start; 3114 for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { 3115 anon = amap_lookup(¤t->aref, offset); 3116 if (anon == NULL) 3117 continue; 3118 3119 simple_lock(&anon->an_lock); 3120 3121 pg = anon->an_page; 3122 if (pg == NULL) { 3123 simple_unlock(&anon->an_lock); 3124 continue; 3125 } 3126 3127 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 3128 3129 /* 3130 * XXX In these first 3 cases, we always just 3131 * XXX deactivate the page. We may want to 3132 * XXX handle the different cases more 3133 * XXX specifically, in the future. 3134 */ 3135 3136 case PGO_CLEANIT|PGO_FREE: 3137 case PGO_CLEANIT|PGO_DEACTIVATE: 3138 case PGO_DEACTIVATE: 3139 deactivate_it: 3140 /* skip the page if it's loaned or wired */ 3141 if (pg->loan_count != 0 || 3142 pg->wire_count != 0) { 3143 simple_unlock(&anon->an_lock); 3144 continue; 3145 } 3146 3147 uvm_lock_pageq(); 3148 3149 /* 3150 * skip the page if it's not actually owned 3151 * by the anon (may simply be loaned to the 3152 * anon). 3153 */ 3154 3155 if ((pg->pg_flags & PQ_ANON) == 0) { 3156 KASSERT(pg->uobject == NULL); 3157 uvm_unlock_pageq(); 3158 simple_unlock(&anon->an_lock); 3159 continue; 3160 } 3161 KASSERT(pg->uanon == anon); 3162 3163 #ifdef UBC 3164 /* ...and deactivate the page. */ 3165 pmap_clear_reference(pg); 3166 #else 3167 /* zap all mappings for the page. */ 3168 pmap_page_protect(pg, VM_PROT_NONE); 3169 3170 /* ...and deactivate the page. */ 3171 #endif 3172 uvm_pagedeactivate(pg); 3173 3174 uvm_unlock_pageq(); 3175 simple_unlock(&anon->an_lock); 3176 continue; 3177 3178 case PGO_FREE: 3179 3180 /* 3181 * If there are multiple references to 3182 * the amap, just deactivate the page. 3183 */ 3184 3185 if (amap_refs(amap) > 1) 3186 goto deactivate_it; 3187 3188 /* XXX skip the page if it's wired */ 3189 if (pg->wire_count != 0) { 3190 simple_unlock(&anon->an_lock); 3191 continue; 3192 } 3193 amap_unadd(¤t->aref, offset); 3194 refs = --anon->an_ref; 3195 simple_unlock(&anon->an_lock); 3196 if (refs == 0) 3197 uvm_anfree(anon); 3198 continue; 3199 3200 default: 3201 panic("uvm_map_clean: weird flags"); 3202 } 3203 } 3204 3205 flush_object: 3206 /* 3207 * flush pages if we've got a valid backing object. 3208 * 3209 * Don't PGO_FREE if we don't have write permission 3210 * and don't flush if this is a copy-on-write object 3211 * since we can't know our permissions on it. 3212 */ 3213 3214 offset = current->offset + (start - current->start); 3215 size = MIN(end, current->end) - start; 3216 if (uobj != NULL && 3217 ((flags & PGO_FREE) == 0 || 3218 ((entry->max_protection & VM_PROT_WRITE) != 0 && 3219 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 3220 simple_lock(&uobj->vmobjlock); 3221 rv = uobj->pgops->pgo_flush(uobj, offset, 3222 offset + size, flags); 3223 simple_unlock(&uobj->vmobjlock); 3224 3225 if (rv == FALSE) 3226 error = EFAULT; 3227 } 3228 start += size; 3229 } 3230 vm_map_unlock_read(map); 3231 return (error); 3232 } 3233 3234 3235 /* 3236 * uvm_map_checkprot: check protection in map 3237 * 3238 * => must allow specified protection in a fully allocated region. 3239 * => map must be read or write locked by caller. 3240 */ 3241 3242 boolean_t 3243 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3244 vm_prot_t protection) 3245 { 3246 struct vm_map_entry *entry; 3247 struct vm_map_entry *tmp_entry; 3248 3249 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { 3250 return(FALSE); 3251 } 3252 entry = tmp_entry; 3253 while (start < end) { 3254 if (entry == &map->header) { 3255 return(FALSE); 3256 } 3257 3258 /* 3259 * no holes allowed 3260 */ 3261 3262 if (start < entry->start) { 3263 return(FALSE); 3264 } 3265 3266 /* 3267 * check protection associated with entry 3268 */ 3269 3270 if ((entry->protection & protection) != protection) { 3271 return(FALSE); 3272 } 3273 3274 /* go to next entry */ 3275 3276 start = entry->end; 3277 entry = entry->next; 3278 } 3279 return(TRUE); 3280 } 3281 3282 /* 3283 * uvmspace_alloc: allocate a vmspace structure. 3284 * 3285 * - structure includes vm_map and pmap 3286 * - XXX: no locking on this structure 3287 * - refcnt set to 1, rest must be init'd by caller 3288 */ 3289 struct vmspace * 3290 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3291 boolean_t remove_holes) 3292 { 3293 struct vmspace *vm; 3294 UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); 3295 3296 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3297 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3298 UVMHIST_LOG(maphist,"<- done (vm=%p)", vm,0,0,0); 3299 return (vm); 3300 } 3301 3302 /* 3303 * uvmspace_init: initialize a vmspace structure. 3304 * 3305 * - XXX: no locking on this structure 3306 * - refcnt set to 1, rest must be init'd by caller 3307 */ 3308 void 3309 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3310 boolean_t pageable, boolean_t remove_holes) 3311 { 3312 UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); 3313 3314 uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0); 3315 3316 if (pmap) 3317 pmap_reference(pmap); 3318 else 3319 pmap = pmap_create(); 3320 vm->vm_map.pmap = pmap; 3321 3322 vm->vm_refcnt = 1; 3323 3324 if (remove_holes) 3325 pmap_remove_holes(&vm->vm_map); 3326 3327 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 3328 } 3329 3330 /* 3331 * uvmspace_share: share a vmspace between two proceses 3332 * 3333 * - XXX: no locking on vmspace 3334 * - used for vfork, threads(?) 3335 */ 3336 3337 void 3338 uvmspace_share(p1, p2) 3339 struct proc *p1, *p2; 3340 { 3341 p2->p_vmspace = p1->p_vmspace; 3342 p1->p_vmspace->vm_refcnt++; 3343 } 3344 3345 /* 3346 * uvmspace_exec: the process wants to exec a new program 3347 * 3348 * - XXX: no locking on vmspace 3349 */ 3350 3351 void 3352 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3353 { 3354 struct vmspace *nvm, *ovm = p->p_vmspace; 3355 struct vm_map *map = &ovm->vm_map; 3356 3357 pmap_unuse_final(p); /* before stack addresses go away */ 3358 3359 /* 3360 * see if more than one process is using this vmspace... 3361 */ 3362 3363 if (ovm->vm_refcnt == 1) { 3364 3365 /* 3366 * if p is the only process using its vmspace then we can safely 3367 * recycle that vmspace for the program that is being exec'd. 3368 */ 3369 3370 #ifdef SYSVSHM 3371 /* 3372 * SYSV SHM semantics require us to kill all segments on an exec 3373 */ 3374 if (ovm->vm_shm) 3375 shmexit(ovm); 3376 #endif 3377 3378 /* 3379 * POSIX 1003.1b -- "lock future mappings" is revoked 3380 * when a process execs another program image. 3381 */ 3382 vm_map_lock(map); 3383 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3384 vm_map_unlock(map); 3385 3386 /* 3387 * now unmap the old program 3388 */ 3389 uvm_unmap(map, map->min_offset, map->max_offset); 3390 3391 /* 3392 * but keep MMU holes unavailable 3393 */ 3394 pmap_remove_holes(map); 3395 3396 /* 3397 * resize the map 3398 */ 3399 vm_map_lock(map); 3400 map->min_offset = start; 3401 uvm_tree_sanity(map, "resize enter"); 3402 map->max_offset = end; 3403 if (map->header.prev != &map->header) 3404 uvm_rb_fixup(map, map->header.prev); 3405 uvm_tree_sanity(map, "resize leave"); 3406 vm_map_unlock(map); 3407 3408 3409 } else { 3410 3411 /* 3412 * p's vmspace is being shared, so we can't reuse it for p since 3413 * it is still being used for others. allocate a new vmspace 3414 * for p 3415 */ 3416 nvm = uvmspace_alloc(start, end, 3417 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3418 3419 /* 3420 * install new vmspace and drop our ref to the old one. 3421 */ 3422 3423 pmap_deactivate(p); 3424 p->p_vmspace = nvm; 3425 pmap_activate(p); 3426 3427 uvmspace_free(ovm); 3428 } 3429 } 3430 3431 /* 3432 * uvmspace_free: free a vmspace data structure 3433 * 3434 * - XXX: no locking on vmspace 3435 */ 3436 3437 void 3438 uvmspace_free(struct vmspace *vm) 3439 { 3440 struct vm_map_entry *dead_entries; 3441 UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); 3442 3443 UVMHIST_LOG(maphist,"(vm=%p) ref=%ld", vm, vm->vm_refcnt,0,0); 3444 if (--vm->vm_refcnt == 0) { 3445 /* 3446 * lock the map, to wait out all other references to it. delete 3447 * all of the mappings and pages they hold, then call the pmap 3448 * module to reclaim anything left. 3449 */ 3450 #ifdef SYSVSHM 3451 /* Get rid of any SYSV shared memory segments. */ 3452 if (vm->vm_shm != NULL) 3453 shmexit(vm); 3454 #endif 3455 vm_map_lock(&vm->vm_map); 3456 if (vm->vm_map.nentries) { 3457 uvm_unmap_remove(&vm->vm_map, 3458 vm->vm_map.min_offset, vm->vm_map.max_offset, 3459 &dead_entries, NULL, TRUE); 3460 if (dead_entries != NULL) 3461 uvm_unmap_detach(dead_entries, 0); 3462 } 3463 pmap_destroy(vm->vm_map.pmap); 3464 vm->vm_map.pmap = NULL; 3465 pool_put(&uvm_vmspace_pool, vm); 3466 } 3467 UVMHIST_LOG(maphist,"<- done", 0,0,0,0); 3468 } 3469 3470 /* 3471 * uvm_map_create: create map 3472 */ 3473 vm_map_t 3474 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 3475 { 3476 vm_map_t result; 3477 3478 result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK); 3479 uvm_map_setup(result, min, max, flags); 3480 result->pmap = pmap; 3481 return(result); 3482 } 3483 3484 /* 3485 * uvm_map_setup: init map 3486 * 3487 * => map must not be in service yet. 3488 */ 3489 void 3490 uvm_map_setup(vm_map_t map, vaddr_t min, vaddr_t max, int flags) 3491 { 3492 3493 RB_INIT(&map->rbhead); 3494 map->header.next = map->header.prev = &map->header; 3495 map->nentries = 0; 3496 map->size = 0; 3497 map->ref_count = 1; 3498 map->min_offset = min; 3499 map->max_offset = max; 3500 map->flags = flags; 3501 map->first_free = &map->header; 3502 map->hint = &map->header; 3503 map->timestamp = 0; 3504 rw_init(&map->lock, "vmmaplk"); 3505 simple_lock_init(&map->ref_lock); 3506 simple_lock_init(&map->hint_lock); 3507 } 3508 3509 3510 3511 /* 3512 * uvm_map_reference: add reference to a map 3513 * 3514 * => map need not be locked (we use ref_lock). 3515 */ 3516 void 3517 uvm_map_reference(vm_map_t map) 3518 { 3519 simple_lock(&map->ref_lock); 3520 map->ref_count++; 3521 simple_unlock(&map->ref_lock); 3522 } 3523 3524 /* 3525 * uvm_map_deallocate: drop reference to a map 3526 * 3527 * => caller must not lock map 3528 * => we will zap map if ref count goes to zero 3529 */ 3530 void 3531 uvm_map_deallocate(vm_map_t map) 3532 { 3533 int c; 3534 3535 simple_lock(&map->ref_lock); 3536 c = --map->ref_count; 3537 simple_unlock(&map->ref_lock); 3538 if (c > 0) { 3539 return; 3540 } 3541 3542 /* 3543 * all references gone. unmap and free. 3544 */ 3545 3546 uvm_unmap(map, map->min_offset, map->max_offset); 3547 pmap_destroy(map->pmap); 3548 free(map, M_VMMAP); 3549 } 3550 3551 /* 3552 * F O R K - m a i n e n t r y p o i n t 3553 */ 3554 /* 3555 * uvmspace_fork: fork a process' main map 3556 * 3557 * => create a new vmspace for child process from parent. 3558 * => parent's map must not be locked. 3559 */ 3560 3561 struct vmspace * 3562 uvmspace_fork(struct vmspace *vm1) 3563 { 3564 struct vmspace *vm2; 3565 struct vm_map *old_map = &vm1->vm_map; 3566 struct vm_map *new_map; 3567 struct vm_map_entry *old_entry; 3568 struct vm_map_entry *new_entry; 3569 pmap_t new_pmap; 3570 boolean_t protect_child; 3571 UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); 3572 3573 vm_map_lock(old_map); 3574 3575 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3576 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3577 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3578 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3579 new_map = &vm2->vm_map; /* XXX */ 3580 new_pmap = new_map->pmap; 3581 3582 old_entry = old_map->header.next; 3583 3584 /* 3585 * go entry-by-entry 3586 */ 3587 3588 while (old_entry != &old_map->header) { 3589 3590 /* 3591 * first, some sanity checks on the old entry 3592 */ 3593 if (UVM_ET_ISSUBMAP(old_entry)) 3594 panic("fork: encountered a submap during fork (illegal)"); 3595 3596 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 3597 UVM_ET_ISNEEDSCOPY(old_entry)) 3598 panic("fork: non-copy_on_write map entry marked needs_copy (illegal)"); 3599 3600 3601 switch (old_entry->inheritance) { 3602 case MAP_INHERIT_NONE: 3603 /* 3604 * drop the mapping 3605 */ 3606 break; 3607 3608 case MAP_INHERIT_SHARE: 3609 /* 3610 * share the mapping: this means we want the old and 3611 * new entries to share amaps and backing objects. 3612 */ 3613 3614 /* 3615 * if the old_entry needs a new amap (due to prev fork) 3616 * then we need to allocate it now so that we have 3617 * something we own to share with the new_entry. [in 3618 * other words, we need to clear needs_copy] 3619 */ 3620 3621 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3622 /* get our own amap, clears needs_copy */ 3623 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 3624 0, 0); 3625 /* XXXCDC: WAITOK??? */ 3626 } 3627 3628 new_entry = uvm_mapent_alloc(new_map); 3629 /* old_entry -> new_entry */ 3630 uvm_mapent_copy(old_entry, new_entry); 3631 3632 /* new pmap has nothing wired in it */ 3633 new_entry->wired_count = 0; 3634 3635 /* 3636 * gain reference to object backing the map (can't 3637 * be a submap, already checked this case). 3638 */ 3639 if (new_entry->aref.ar_amap) 3640 /* share reference */ 3641 uvm_map_reference_amap(new_entry, AMAP_SHARED); 3642 3643 if (new_entry->object.uvm_obj && 3644 new_entry->object.uvm_obj->pgops->pgo_reference) 3645 new_entry->object.uvm_obj-> 3646 pgops->pgo_reference( 3647 new_entry->object.uvm_obj); 3648 3649 /* insert entry at end of new_map's entry list */ 3650 uvm_map_entry_link(new_map, new_map->header.prev, 3651 new_entry); 3652 3653 /* 3654 * pmap_copy the mappings: this routine is optional 3655 * but if it is there it will reduce the number of 3656 * page faults in the new proc. 3657 */ 3658 3659 pmap_copy(new_pmap, old_map->pmap, new_entry->start, 3660 (old_entry->end - old_entry->start), 3661 old_entry->start); 3662 3663 break; 3664 3665 case MAP_INHERIT_COPY: 3666 3667 /* 3668 * copy-on-write the mapping (using mmap's 3669 * MAP_PRIVATE semantics) 3670 * 3671 * allocate new_entry, adjust reference counts. 3672 * (note that new references are read-only). 3673 */ 3674 3675 new_entry = uvm_mapent_alloc(new_map); 3676 /* old_entry -> new_entry */ 3677 uvm_mapent_copy(old_entry, new_entry); 3678 3679 if (new_entry->aref.ar_amap) 3680 uvm_map_reference_amap(new_entry, 0); 3681 3682 if (new_entry->object.uvm_obj && 3683 new_entry->object.uvm_obj->pgops->pgo_reference) 3684 new_entry->object.uvm_obj->pgops->pgo_reference 3685 (new_entry->object.uvm_obj); 3686 3687 /* new pmap has nothing wired in it */ 3688 new_entry->wired_count = 0; 3689 3690 new_entry->etype |= 3691 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3692 uvm_map_entry_link(new_map, new_map->header.prev, 3693 new_entry); 3694 3695 /* 3696 * the new entry will need an amap. it will either 3697 * need to be copied from the old entry or created 3698 * from scratch (if the old entry does not have an 3699 * amap). can we defer this process until later 3700 * (by setting "needs_copy") or do we need to copy 3701 * the amap now? 3702 * 3703 * we must copy the amap now if any of the following 3704 * conditions hold: 3705 * 1. the old entry has an amap and that amap is 3706 * being shared. this means that the old (parent) 3707 * process is sharing the amap with another 3708 * process. if we do not clear needs_copy here 3709 * we will end up in a situation where both the 3710 * parent and child process are referring to the 3711 * same amap with "needs_copy" set. if the 3712 * parent write-faults, the fault routine will 3713 * clear "needs_copy" in the parent by allocating 3714 * a new amap. this is wrong because the 3715 * parent is supposed to be sharing the old amap 3716 * and the new amap will break that. 3717 * 3718 * 2. if the old entry has an amap and a non-zero 3719 * wire count then we are going to have to call 3720 * amap_cow_now to avoid page faults in the 3721 * parent process. since amap_cow_now requires 3722 * "needs_copy" to be clear we might as well 3723 * clear it here as well. 3724 * 3725 */ 3726 3727 if (old_entry->aref.ar_amap != NULL) { 3728 3729 if ((amap_flags(old_entry->aref.ar_amap) & 3730 AMAP_SHARED) != 0 || 3731 VM_MAPENT_ISWIRED(old_entry)) { 3732 3733 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3734 0, 0); 3735 /* XXXCDC: M_WAITOK ... ok? */ 3736 } 3737 } 3738 3739 /* 3740 * if the parent's entry is wired down, then the 3741 * parent process does not want page faults on 3742 * access to that memory. this means that we 3743 * cannot do copy-on-write because we can't write 3744 * protect the old entry. in this case we 3745 * resolve all copy-on-write faults now, using 3746 * amap_cow_now. note that we have already 3747 * allocated any needed amap (above). 3748 */ 3749 3750 if (VM_MAPENT_ISWIRED(old_entry)) { 3751 3752 /* 3753 * resolve all copy-on-write faults now 3754 * (note that there is nothing to do if 3755 * the old mapping does not have an amap). 3756 * XXX: is it worthwhile to bother with pmap_copy 3757 * in this case? 3758 */ 3759 if (old_entry->aref.ar_amap) 3760 amap_cow_now(new_map, new_entry); 3761 3762 } else { 3763 3764 /* 3765 * setup mappings to trigger copy-on-write faults 3766 * we must write-protect the parent if it has 3767 * an amap and it is not already "needs_copy"... 3768 * if it is already "needs_copy" then the parent 3769 * has already been write-protected by a previous 3770 * fork operation. 3771 * 3772 * if we do not write-protect the parent, then 3773 * we must be sure to write-protect the child 3774 * after the pmap_copy() operation. 3775 * 3776 * XXX: pmap_copy should have some way of telling 3777 * us that it didn't do anything so we can avoid 3778 * calling pmap_protect needlessly. 3779 */ 3780 3781 if (old_entry->aref.ar_amap) { 3782 3783 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3784 if (old_entry->max_protection & VM_PROT_WRITE) { 3785 pmap_protect(old_map->pmap, 3786 old_entry->start, 3787 old_entry->end, 3788 old_entry->protection & 3789 ~VM_PROT_WRITE); 3790 pmap_update(old_map->pmap); 3791 3792 } 3793 old_entry->etype |= UVM_ET_NEEDSCOPY; 3794 } 3795 3796 /* 3797 * parent must now be write-protected 3798 */ 3799 protect_child = FALSE; 3800 } else { 3801 3802 /* 3803 * we only need to protect the child if the 3804 * parent has write access. 3805 */ 3806 if (old_entry->max_protection & VM_PROT_WRITE) 3807 protect_child = TRUE; 3808 else 3809 protect_child = FALSE; 3810 3811 } 3812 3813 /* 3814 * copy the mappings 3815 * XXX: need a way to tell if this does anything 3816 */ 3817 3818 pmap_copy(new_pmap, old_map->pmap, 3819 new_entry->start, 3820 (old_entry->end - old_entry->start), 3821 old_entry->start); 3822 3823 /* 3824 * protect the child's mappings if necessary 3825 */ 3826 if (protect_child) { 3827 pmap_protect(new_pmap, new_entry->start, 3828 new_entry->end, 3829 new_entry->protection & 3830 ~VM_PROT_WRITE); 3831 } 3832 3833 } 3834 break; 3835 } /* end of switch statement */ 3836 old_entry = old_entry->next; 3837 } 3838 3839 new_map->size = old_map->size; 3840 vm_map_unlock(old_map); 3841 3842 #ifdef SYSVSHM 3843 if (vm1->vm_shm) 3844 shmfork(vm1, vm2); 3845 #endif 3846 3847 #ifdef PMAP_FORK 3848 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); 3849 #endif 3850 3851 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 3852 return(vm2); 3853 } 3854 3855 #if defined(DDB) 3856 3857 /* 3858 * DDB hooks 3859 */ 3860 3861 /* 3862 * uvm_map_printit: actually prints the map 3863 */ 3864 3865 void 3866 uvm_map_printit(struct vm_map *map, boolean_t full, 3867 int (*pr)(const char *, ...)) 3868 { 3869 struct vm_map_entry *entry; 3870 3871 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 3872 (*pr)("\t#ent=%d, sz=%u, ref=%d, version=%u, flags=0x%x\n", 3873 map->nentries, map->size, map->ref_count, map->timestamp, 3874 map->flags); 3875 #ifdef pmap_resident_count 3876 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 3877 pmap_resident_count(map->pmap)); 3878 #else 3879 /* XXXCDC: this should be required ... */ 3880 (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap); 3881 #endif 3882 if (!full) 3883 return; 3884 for (entry = map->header.next; entry != &map->header; 3885 entry = entry->next) { 3886 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 3887 entry, entry->start, entry->end, entry->object.uvm_obj, 3888 (long long)entry->offset, entry->aref.ar_amap, 3889 entry->aref.ar_pageoff); 3890 (*pr)( 3891 "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 3892 "wc=%d, adv=%d\n", 3893 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 3894 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 3895 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 3896 entry->protection, entry->max_protection, 3897 entry->inheritance, entry->wired_count, entry->advice); 3898 } 3899 } 3900 3901 /* 3902 * uvm_object_printit: actually prints the object 3903 */ 3904 3905 void 3906 uvm_object_printit(uobj, full, pr) 3907 struct uvm_object *uobj; 3908 boolean_t full; 3909 int (*pr)(const char *, ...); 3910 { 3911 struct vm_page *pg; 3912 int cnt = 0; 3913 3914 (*pr)("OBJECT %p: locked=%d, pgops=%p, npages=%d, ", 3915 uobj, uobj->vmobjlock.lock_data, uobj->pgops, uobj->uo_npages); 3916 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 3917 (*pr)("refs=<SYSTEM>\n"); 3918 else 3919 (*pr)("refs=%d\n", uobj->uo_refs); 3920 3921 if (!full) { 3922 return; 3923 } 3924 (*pr)(" PAGES <pg,offset>:\n "); 3925 RB_FOREACH(pg, uvm_objtree, &uobj->memt) { 3926 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 3927 if ((cnt % 3) == 2) { 3928 (*pr)("\n "); 3929 } 3930 cnt++; 3931 } 3932 if ((cnt % 3) != 2) { 3933 (*pr)("\n"); 3934 } 3935 } 3936 3937 /* 3938 * uvm_page_printit: actually print the page 3939 */ 3940 3941 static const char page_flagbits[] = 3942 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 3943 "\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0" 3944 "\31PMAP1\32PMAP2\33PMAP3"; 3945 3946 void 3947 uvm_page_printit(pg, full, pr) 3948 struct vm_page *pg; 3949 boolean_t full; 3950 int (*pr)(const char *, ...); 3951 { 3952 struct vm_page *tpg; 3953 struct uvm_object *uobj; 3954 struct pglist *pgl; 3955 3956 (*pr)("PAGE %p:\n", pg); 3957 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 3958 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 3959 (long long)pg->phys_addr); 3960 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", 3961 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); 3962 #if defined(UVM_PAGE_TRKOWN) 3963 if (pg->pg_flags & PG_BUSY) 3964 (*pr)(" owning process = %d, tag=%s\n", 3965 pg->owner, pg->owner_tag); 3966 else 3967 (*pr)(" page not busy, no owner\n"); 3968 #else 3969 (*pr)(" [page ownership tracking disabled]\n"); 3970 #endif 3971 3972 if (!full) 3973 return; 3974 3975 /* cross-verify object/anon */ 3976 if ((pg->pg_flags & PQ_FREE) == 0) { 3977 if (pg->pg_flags & PQ_ANON) { 3978 if (pg->uanon == NULL || pg->uanon->an_page != pg) 3979 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 3980 (pg->uanon) ? pg->uanon->an_page : NULL); 3981 else 3982 (*pr)(" anon backpointer is OK\n"); 3983 } else { 3984 uobj = pg->uobject; 3985 if (uobj) { 3986 (*pr)(" checking object list\n"); 3987 RB_FOREACH(tpg, uvm_objtree, &uobj->memt) { 3988 if (tpg == pg) { 3989 break; 3990 } 3991 } 3992 if (tpg) 3993 (*pr)(" page found on object list\n"); 3994 else 3995 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 3996 } 3997 } 3998 } 3999 4000 /* cross-verify page queue */ 4001 if (pg->pg_flags & PQ_FREE) { 4002 int fl = uvm_page_lookup_freelist(pg); 4003 pgl = &uvm.page_free[fl].pgfl_queues[((pg)->pg_flags & PG_ZERO) ? 4004 PGFL_ZEROS : PGFL_UNKNOWN]; 4005 } else if (pg->pg_flags & PQ_INACTIVE) { 4006 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 4007 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 4008 } else if (pg->pg_flags & PQ_ACTIVE) { 4009 pgl = &uvm.page_active; 4010 } else { 4011 pgl = NULL; 4012 } 4013 4014 if (pgl) { 4015 (*pr)(" checking pageq list\n"); 4016 TAILQ_FOREACH(tpg, pgl, pageq) { 4017 if (tpg == pg) { 4018 break; 4019 } 4020 } 4021 if (tpg) 4022 (*pr)(" page found on pageq list\n"); 4023 else 4024 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 4025 } 4026 } 4027 #endif 4028