1 /* $NetBSD: uvm_map.c,v 1.229 2006/09/16 07:14:38 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by Charles D. Cranor, 23 * Washington University, the University of California, Berkeley and 24 * its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 42 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 43 * 44 * 45 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 46 * All rights reserved. 47 * 48 * Permission to use, copy, modify and distribute this software and 49 * its documentation is hereby granted, provided that both the copyright 50 * notice and this permission notice appear in all copies of the 51 * software, derivative works or modified versions, and any portions 52 * thereof, and that both notices appear in supporting documentation. 53 * 54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 57 * 58 * Carnegie Mellon requests users of this software to return to 59 * 60 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 61 * School of Computer Science 62 * Carnegie Mellon University 63 * Pittsburgh PA 15213-3890 64 * 65 * any improvements or extensions that they make and grant Carnegie the 66 * rights to redistribute these changes. 67 */ 68 69 /* 70 * uvm_map.c: uvm map operations 71 */ 72 73 #include <sys/cdefs.h> 74 __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.229 2006/09/16 07:14:38 yamt Exp $"); 75 76 #include "opt_ddb.h" 77 #include "opt_uvmhist.h" 78 #include "opt_uvm.h" 79 #include "opt_sysv.h" 80 81 #include <sys/param.h> 82 #include <sys/systm.h> 83 #include <sys/mman.h> 84 #include <sys/proc.h> 85 #include <sys/malloc.h> 86 #include <sys/pool.h> 87 #include <sys/kernel.h> 88 #include <sys/mount.h> 89 #include <sys/vnode.h> 90 91 #ifdef SYSVSHM 92 #include <sys/shm.h> 93 #endif 94 95 #include <uvm/uvm.h> 96 #undef RB_AUGMENT 97 #define RB_AUGMENT(x) uvm_rb_augment(x) 98 99 #ifdef DDB 100 #include <uvm/uvm_ddb.h> 101 #endif 102 103 #if defined(UVMMAP_NOCOUNTERS) 104 105 #define UVMMAP_EVCNT_DEFINE(name) /* nothing */ 106 #define UVMMAP_EVCNT_INCR(ev) /* nothing */ 107 #define UVMMAP_EVCNT_DECR(ev) /* nothing */ 108 109 #else /* defined(UVMMAP_NOCOUNTERS) */ 110 111 #include <sys/evcnt.h> 112 #define UVMMAP_EVCNT_DEFINE(name) \ 113 struct evcnt uvmmap_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 114 "uvmmap", #name); \ 115 EVCNT_ATTACH_STATIC(uvmmap_evcnt_##name); 116 #define UVMMAP_EVCNT_INCR(ev) uvmmap_evcnt_##ev.ev_count++ 117 #define UVMMAP_EVCNT_DECR(ev) uvmmap_evcnt_##ev.ev_count-- 118 119 #endif /* defined(UVMMAP_NOCOUNTERS) */ 120 121 UVMMAP_EVCNT_DEFINE(ubackmerge) 122 UVMMAP_EVCNT_DEFINE(uforwmerge) 123 UVMMAP_EVCNT_DEFINE(ubimerge) 124 UVMMAP_EVCNT_DEFINE(unomerge) 125 UVMMAP_EVCNT_DEFINE(kbackmerge) 126 UVMMAP_EVCNT_DEFINE(kforwmerge) 127 UVMMAP_EVCNT_DEFINE(kbimerge) 128 UVMMAP_EVCNT_DEFINE(knomerge) 129 UVMMAP_EVCNT_DEFINE(map_call) 130 UVMMAP_EVCNT_DEFINE(mlk_call) 131 UVMMAP_EVCNT_DEFINE(mlk_hint) 132 133 UVMMAP_EVCNT_DEFINE(uke_alloc) 134 UVMMAP_EVCNT_DEFINE(uke_free) 135 UVMMAP_EVCNT_DEFINE(ukh_alloc) 136 UVMMAP_EVCNT_DEFINE(ukh_free) 137 138 const char vmmapbsy[] = "vmmapbsy"; 139 140 /* 141 * pool for vmspace structures. 142 */ 143 144 POOL_INIT(uvm_vmspace_pool, sizeof(struct vmspace), 0, 0, 0, "vmsppl", 145 &pool_allocator_nointr); 146 147 /* 148 * pool for dynamically-allocated map entries. 149 */ 150 151 POOL_INIT(uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 0, 0, "vmmpepl", 152 &pool_allocator_nointr); 153 154 MALLOC_DEFINE(M_VMMAP, "VM map", "VM map structures"); 155 MALLOC_DEFINE(M_VMPMAP, "VM pmap", "VM pmap"); 156 157 #ifdef PMAP_GROWKERNEL 158 /* 159 * This global represents the end of the kernel virtual address 160 * space. If we want to exceed this, we must grow the kernel 161 * virtual address space dynamically. 162 * 163 * Note, this variable is locked by kernel_map's lock. 164 */ 165 vaddr_t uvm_maxkaddr; 166 #endif 167 168 /* 169 * macros 170 */ 171 172 /* 173 * VM_MAP_USE_KMAPENT: determine if uvm_kmapent_alloc/free is used 174 * for the vm_map. 175 */ 176 extern struct vm_map *pager_map; /* XXX */ 177 #define VM_MAP_USE_KMAPENT_FLAGS(flags) \ 178 (((flags) & VM_MAP_INTRSAFE) != 0) 179 #define VM_MAP_USE_KMAPENT(map) \ 180 (VM_MAP_USE_KMAPENT_FLAGS((map)->flags) || (map) == kernel_map) 181 182 /* 183 * UVM_ET_ISCOMPATIBLE: check some requirements for map entry merging 184 */ 185 186 #define UVM_ET_ISCOMPATIBLE(ent, type, uobj, meflags, \ 187 prot, maxprot, inh, adv, wire) \ 188 ((ent)->etype == (type) && \ 189 (((ent)->flags ^ (meflags)) & (UVM_MAP_NOMERGE | UVM_MAP_QUANTUM)) \ 190 == 0 && \ 191 (ent)->object.uvm_obj == (uobj) && \ 192 (ent)->protection == (prot) && \ 193 (ent)->max_protection == (maxprot) && \ 194 (ent)->inheritance == (inh) && \ 195 (ent)->advice == (adv) && \ 196 (ent)->wired_count == (wire)) 197 198 /* 199 * uvm_map_entry_link: insert entry into a map 200 * 201 * => map must be locked 202 */ 203 #define uvm_map_entry_link(map, after_where, entry) do { \ 204 uvm_mapent_check(entry); \ 205 (map)->nentries++; \ 206 (entry)->prev = (after_where); \ 207 (entry)->next = (after_where)->next; \ 208 (entry)->prev->next = (entry); \ 209 (entry)->next->prev = (entry); \ 210 uvm_rb_insert((map), (entry)); \ 211 } while (/*CONSTCOND*/ 0) 212 213 /* 214 * uvm_map_entry_unlink: remove entry from a map 215 * 216 * => map must be locked 217 */ 218 #define uvm_map_entry_unlink(map, entry) do { \ 219 KASSERT((entry) != (map)->first_free); \ 220 KASSERT((entry) != (map)->hint); \ 221 uvm_mapent_check(entry); \ 222 (map)->nentries--; \ 223 (entry)->next->prev = (entry)->prev; \ 224 (entry)->prev->next = (entry)->next; \ 225 uvm_rb_remove((map), (entry)); \ 226 } while (/*CONSTCOND*/ 0) 227 228 /* 229 * SAVE_HINT: saves the specified entry as the hint for future lookups. 230 * 231 * => map need not be locked (protected by hint_lock). 232 */ 233 #define SAVE_HINT(map,check,value) do { \ 234 simple_lock(&(map)->hint_lock); \ 235 if ((map)->hint == (check)) \ 236 (map)->hint = (value); \ 237 simple_unlock(&(map)->hint_lock); \ 238 } while (/*CONSTCOND*/ 0) 239 240 /* 241 * clear_hints: ensure that hints don't point to the entry. 242 * 243 * => map must be write-locked. 244 */ 245 static void 246 clear_hints(struct vm_map *map, struct vm_map_entry *ent) 247 { 248 249 SAVE_HINT(map, ent, ent->prev); 250 if (map->first_free == ent) { 251 map->first_free = ent->prev; 252 } 253 } 254 255 /* 256 * VM_MAP_RANGE_CHECK: check and correct range 257 * 258 * => map must at least be read locked 259 */ 260 261 #define VM_MAP_RANGE_CHECK(map, start, end) do { \ 262 if (start < vm_map_min(map)) \ 263 start = vm_map_min(map); \ 264 if (end > vm_map_max(map)) \ 265 end = vm_map_max(map); \ 266 if (start > end) \ 267 start = end; \ 268 } while (/*CONSTCOND*/ 0) 269 270 /* 271 * local prototypes 272 */ 273 274 static struct vm_map_entry * 275 uvm_mapent_alloc(struct vm_map *, int); 276 static struct vm_map_entry * 277 uvm_mapent_alloc_split(struct vm_map *, 278 const struct vm_map_entry *, int, 279 struct uvm_mapent_reservation *); 280 static void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); 281 static void uvm_mapent_free(struct vm_map_entry *); 282 #if defined(DEBUG) 283 static void _uvm_mapent_check(const struct vm_map_entry *, const char *, 284 int); 285 #define uvm_mapent_check(map) _uvm_mapent_check(map, __FILE__, __LINE__) 286 #else /* defined(DEBUG) */ 287 #define uvm_mapent_check(e) /* nothing */ 288 #endif /* defined(DEBUG) */ 289 static struct vm_map_entry * 290 uvm_kmapent_alloc(struct vm_map *, int); 291 static void uvm_kmapent_free(struct vm_map_entry *); 292 static vsize_t uvm_kmapent_overhead(vsize_t); 293 294 static void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); 295 static void uvm_map_reference_amap(struct vm_map_entry *, int); 296 static int uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int, 297 struct vm_map_entry *); 298 static void uvm_map_unreference_amap(struct vm_map_entry *, int); 299 300 int _uvm_map_sanity(struct vm_map *); 301 int _uvm_tree_sanity(struct vm_map *); 302 static vsize_t uvm_rb_subtree_space(const struct vm_map_entry *); 303 304 static inline int 305 uvm_compare(const struct vm_map_entry *a, const struct vm_map_entry *b) 306 { 307 308 if (a->start < b->start) 309 return (-1); 310 else if (a->start > b->start) 311 return (1); 312 313 return (0); 314 } 315 316 static inline void 317 uvm_rb_augment(struct vm_map_entry *entry) 318 { 319 320 entry->space = uvm_rb_subtree_space(entry); 321 } 322 323 RB_PROTOTYPE(uvm_tree, vm_map_entry, rb_entry, uvm_compare); 324 325 RB_GENERATE(uvm_tree, vm_map_entry, rb_entry, uvm_compare); 326 327 static inline vsize_t 328 uvm_rb_space(const struct vm_map *map, const struct vm_map_entry *entry) 329 { 330 /* XXX map is not used */ 331 332 KASSERT(entry->next != NULL); 333 return entry->next->start - entry->end; 334 } 335 336 static vsize_t 337 uvm_rb_subtree_space(const struct vm_map_entry *entry) 338 { 339 vaddr_t space, tmp; 340 341 space = entry->ownspace; 342 if (RB_LEFT(entry, rb_entry)) { 343 tmp = RB_LEFT(entry, rb_entry)->space; 344 if (tmp > space) 345 space = tmp; 346 } 347 348 if (RB_RIGHT(entry, rb_entry)) { 349 tmp = RB_RIGHT(entry, rb_entry)->space; 350 if (tmp > space) 351 space = tmp; 352 } 353 354 return (space); 355 } 356 357 static inline void 358 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) 359 { 360 /* We need to traverse to the very top */ 361 do { 362 entry->ownspace = uvm_rb_space(map, entry); 363 entry->space = uvm_rb_subtree_space(entry); 364 } while ((entry = RB_PARENT(entry, rb_entry)) != NULL); 365 } 366 367 static void 368 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) 369 { 370 vaddr_t space = uvm_rb_space(map, entry); 371 struct vm_map_entry *tmp; 372 373 entry->ownspace = entry->space = space; 374 tmp = RB_INSERT(uvm_tree, &(map)->rbhead, entry); 375 #ifdef DIAGNOSTIC 376 if (tmp != NULL) 377 panic("uvm_rb_insert: duplicate entry?"); 378 #endif 379 uvm_rb_fixup(map, entry); 380 if (entry->prev != &map->header) 381 uvm_rb_fixup(map, entry->prev); 382 } 383 384 static void 385 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) 386 { 387 struct vm_map_entry *parent; 388 389 parent = RB_PARENT(entry, rb_entry); 390 RB_REMOVE(uvm_tree, &(map)->rbhead, entry); 391 if (entry->prev != &map->header) 392 uvm_rb_fixup(map, entry->prev); 393 if (parent) 394 uvm_rb_fixup(map, parent); 395 } 396 397 #if defined(DEBUG) 398 int uvm_debug_check_map = 0; 399 int uvm_debug_check_rbtree = 0; 400 #define uvm_map_check(map, name) \ 401 _uvm_map_check((map), (name), __FILE__, __LINE__) 402 static void 403 _uvm_map_check(struct vm_map *map, const char *name, 404 const char *file, int line) 405 { 406 407 if ((uvm_debug_check_map && _uvm_map_sanity(map)) || 408 (uvm_debug_check_rbtree && _uvm_tree_sanity(map))) { 409 panic("uvm_map_check failed: \"%s\" map=%p (%s:%d)", 410 name, map, file, line); 411 } 412 } 413 #else /* defined(DEBUG) */ 414 #define uvm_map_check(map, name) /* nothing */ 415 #endif /* defined(DEBUG) */ 416 417 #if defined(DEBUG) || defined(DDB) 418 int 419 _uvm_map_sanity(struct vm_map *map) 420 { 421 boolean_t first_free_found = FALSE; 422 boolean_t hint_found = FALSE; 423 const struct vm_map_entry *e; 424 425 e = &map->header; 426 for (;;) { 427 if (map->first_free == e) { 428 first_free_found = TRUE; 429 } else if (!first_free_found && e->next->start > e->end) { 430 printf("first_free %p should be %p\n", 431 map->first_free, e); 432 return -1; 433 } 434 if (map->hint == e) { 435 hint_found = TRUE; 436 } 437 438 e = e->next; 439 if (e == &map->header) { 440 break; 441 } 442 } 443 if (!first_free_found) { 444 printf("stale first_free\n"); 445 return -1; 446 } 447 if (!hint_found) { 448 printf("stale hint\n"); 449 return -1; 450 } 451 return 0; 452 } 453 454 int 455 _uvm_tree_sanity(struct vm_map *map) 456 { 457 struct vm_map_entry *tmp, *trtmp; 458 int n = 0, i = 1; 459 460 RB_FOREACH(tmp, uvm_tree, &map->rbhead) { 461 if (tmp->ownspace != uvm_rb_space(map, tmp)) { 462 printf("%d/%d ownspace %lx != %lx %s\n", 463 n + 1, map->nentries, 464 (ulong)tmp->ownspace, (ulong)uvm_rb_space(map, tmp), 465 tmp->next == &map->header ? "(last)" : ""); 466 goto error; 467 } 468 } 469 trtmp = NULL; 470 RB_FOREACH(tmp, uvm_tree, &map->rbhead) { 471 if (tmp->space != uvm_rb_subtree_space(tmp)) { 472 printf("space %lx != %lx\n", 473 (ulong)tmp->space, 474 (ulong)uvm_rb_subtree_space(tmp)); 475 goto error; 476 } 477 if (trtmp != NULL && trtmp->start >= tmp->start) { 478 printf("corrupt: 0x%lx >= 0x%lx\n", 479 trtmp->start, tmp->start); 480 goto error; 481 } 482 n++; 483 484 trtmp = tmp; 485 } 486 487 if (n != map->nentries) { 488 printf("nentries: %d vs %d\n", n, map->nentries); 489 goto error; 490 } 491 492 for (tmp = map->header.next; tmp && tmp != &map->header; 493 tmp = tmp->next, i++) { 494 trtmp = RB_FIND(uvm_tree, &map->rbhead, tmp); 495 if (trtmp != tmp) { 496 printf("lookup: %d: %p - %p: %p\n", i, tmp, trtmp, 497 RB_PARENT(tmp, rb_entry)); 498 goto error; 499 } 500 } 501 502 return (0); 503 error: 504 return (-1); 505 } 506 #endif /* defined(DEBUG) || defined(DDB) */ 507 508 #ifdef DIAGNOSTIC 509 static struct vm_map *uvm_kmapent_map(struct vm_map_entry *); 510 #endif 511 512 /* 513 * uvm_mapent_alloc: allocate a map entry 514 */ 515 516 static struct vm_map_entry * 517 uvm_mapent_alloc(struct vm_map *map, int flags) 518 { 519 struct vm_map_entry *me; 520 int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK; 521 UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist); 522 523 if (VM_MAP_USE_KMAPENT(map)) { 524 me = uvm_kmapent_alloc(map, flags); 525 } else { 526 me = pool_get(&uvm_map_entry_pool, pflags); 527 if (__predict_false(me == NULL)) 528 return NULL; 529 me->flags = 0; 530 } 531 532 UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", me, 533 ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0); 534 return (me); 535 } 536 537 /* 538 * uvm_mapent_alloc_split: allocate a map entry for clipping. 539 */ 540 541 static struct vm_map_entry * 542 uvm_mapent_alloc_split(struct vm_map *map, 543 const struct vm_map_entry *old_entry, int flags, 544 struct uvm_mapent_reservation *umr) 545 { 546 struct vm_map_entry *me; 547 548 KASSERT(!VM_MAP_USE_KMAPENT(map) || 549 (old_entry->flags & UVM_MAP_QUANTUM) || !UMR_EMPTY(umr)); 550 551 if (old_entry->flags & UVM_MAP_QUANTUM) { 552 int s; 553 struct vm_map_kernel *vmk = vm_map_to_kernel(map); 554 555 s = splvm(); 556 simple_lock(&uvm.kentry_lock); 557 me = vmk->vmk_merged_entries; 558 KASSERT(me); 559 vmk->vmk_merged_entries = me->next; 560 simple_unlock(&uvm.kentry_lock); 561 splx(s); 562 KASSERT(me->flags & UVM_MAP_QUANTUM); 563 } else { 564 me = uvm_mapent_alloc(map, flags); 565 } 566 567 return me; 568 } 569 570 /* 571 * uvm_mapent_free: free map entry 572 */ 573 574 static void 575 uvm_mapent_free(struct vm_map_entry *me) 576 { 577 UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist); 578 579 UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]", 580 me, me->flags, 0, 0); 581 if (me->flags & UVM_MAP_KERNEL) { 582 uvm_kmapent_free(me); 583 } else { 584 pool_put(&uvm_map_entry_pool, me); 585 } 586 } 587 588 /* 589 * uvm_mapent_free_merged: free merged map entry 590 * 591 * => keep the entry if needed. 592 * => caller shouldn't hold map locked if VM_MAP_USE_KMAPENT(map) is true. 593 */ 594 595 static void 596 uvm_mapent_free_merged(struct vm_map *map, struct vm_map_entry *me) 597 { 598 599 KASSERT(!(me->flags & UVM_MAP_KERNEL) || uvm_kmapent_map(me) == map); 600 601 if (me->flags & UVM_MAP_QUANTUM) { 602 /* 603 * keep this entry for later splitting. 604 */ 605 struct vm_map_kernel *vmk; 606 int s; 607 608 KASSERT(VM_MAP_IS_KERNEL(map)); 609 KASSERT(!VM_MAP_USE_KMAPENT(map) || 610 (me->flags & UVM_MAP_KERNEL)); 611 612 vmk = vm_map_to_kernel(map); 613 s = splvm(); 614 simple_lock(&uvm.kentry_lock); 615 me->next = vmk->vmk_merged_entries; 616 vmk->vmk_merged_entries = me; 617 simple_unlock(&uvm.kentry_lock); 618 splx(s); 619 } else { 620 uvm_mapent_free(me); 621 } 622 } 623 624 /* 625 * uvm_mapent_copy: copy a map entry, preserving flags 626 */ 627 628 static inline void 629 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 630 { 631 632 memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - 633 ((char *)src)); 634 } 635 636 /* 637 * uvm_mapent_overhead: calculate maximum kva overhead necessary for 638 * map entries. 639 * 640 * => size and flags are the same as uvm_km_suballoc's ones. 641 */ 642 643 vsize_t 644 uvm_mapent_overhead(vsize_t size, int flags) 645 { 646 647 if (VM_MAP_USE_KMAPENT_FLAGS(flags)) { 648 return uvm_kmapent_overhead(size); 649 } 650 return 0; 651 } 652 653 #if defined(DEBUG) 654 static void 655 _uvm_mapent_check(const struct vm_map_entry *entry, const char *file, int line) 656 { 657 658 if (entry->start >= entry->end) { 659 goto bad; 660 } 661 if (UVM_ET_ISOBJ(entry)) { 662 if (entry->object.uvm_obj == NULL) { 663 goto bad; 664 } 665 } else if (UVM_ET_ISSUBMAP(entry)) { 666 if (entry->object.sub_map == NULL) { 667 goto bad; 668 } 669 } else { 670 if (entry->object.uvm_obj != NULL || 671 entry->object.sub_map != NULL) { 672 goto bad; 673 } 674 } 675 if (!UVM_ET_ISOBJ(entry)) { 676 if (entry->offset != 0) { 677 goto bad; 678 } 679 } 680 681 return; 682 683 bad: 684 panic("%s: bad entry %p (%s:%d)", __func__, entry, file, line); 685 } 686 #endif /* defined(DEBUG) */ 687 688 /* 689 * uvm_map_entry_unwire: unwire a map entry 690 * 691 * => map should be locked by caller 692 */ 693 694 static inline void 695 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) 696 { 697 698 entry->wired_count = 0; 699 uvm_fault_unwire_locked(map, entry->start, entry->end); 700 } 701 702 703 /* 704 * wrapper for calling amap_ref() 705 */ 706 static inline void 707 uvm_map_reference_amap(struct vm_map_entry *entry, int flags) 708 { 709 710 amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, 711 (entry->end - entry->start) >> PAGE_SHIFT, flags); 712 } 713 714 715 /* 716 * wrapper for calling amap_unref() 717 */ 718 static inline void 719 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) 720 { 721 722 amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, 723 (entry->end - entry->start) >> PAGE_SHIFT, flags); 724 } 725 726 727 /* 728 * uvm_map_init: init mapping system at boot time. note that we allocate 729 * and init the static pool of struct vm_map_entry *'s for the kernel here. 730 */ 731 732 void 733 uvm_map_init(void) 734 { 735 #if defined(UVMHIST) 736 static struct uvm_history_ent maphistbuf[100]; 737 static struct uvm_history_ent pdhistbuf[100]; 738 #endif 739 740 /* 741 * first, init logging system. 742 */ 743 744 UVMHIST_FUNC("uvm_map_init"); 745 UVMHIST_INIT_STATIC(maphist, maphistbuf); 746 UVMHIST_INIT_STATIC(pdhist, pdhistbuf); 747 UVMHIST_CALLED(maphist); 748 UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); 749 750 /* 751 * initialize the global lock for kernel map entry. 752 * 753 * XXX is it worth to have per-map lock instead? 754 */ 755 756 simple_lock_init(&uvm.kentry_lock); 757 } 758 759 /* 760 * clippers 761 */ 762 763 /* 764 * uvm_mapent_splitadj: adjust map entries for splitting, after uvm_mapent_copy. 765 */ 766 767 static void 768 uvm_mapent_splitadj(struct vm_map_entry *entry1, struct vm_map_entry *entry2, 769 vaddr_t splitat) 770 { 771 vaddr_t adj; 772 773 KASSERT(entry1->start < splitat); 774 KASSERT(splitat < entry1->end); 775 776 adj = splitat - entry1->start; 777 entry1->end = entry2->start = splitat; 778 779 if (entry1->aref.ar_amap) { 780 amap_splitref(&entry1->aref, &entry2->aref, adj); 781 } 782 if (UVM_ET_ISSUBMAP(entry1)) { 783 /* ... unlikely to happen, but play it safe */ 784 uvm_map_reference(entry1->object.sub_map); 785 } else if (UVM_ET_ISOBJ(entry1)) { 786 KASSERT(entry1->object.uvm_obj != NULL); /* suppress coverity */ 787 entry2->offset += adj; 788 if (entry1->object.uvm_obj->pgops && 789 entry1->object.uvm_obj->pgops->pgo_reference) 790 entry1->object.uvm_obj->pgops->pgo_reference( 791 entry1->object.uvm_obj); 792 } 793 } 794 795 /* 796 * uvm_map_clip_start: ensure that the entry begins at or after 797 * the starting address, if it doesn't we split the entry. 798 * 799 * => caller should use UVM_MAP_CLIP_START macro rather than calling 800 * this directly 801 * => map must be locked by caller 802 */ 803 804 void 805 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, 806 vaddr_t start, struct uvm_mapent_reservation *umr) 807 { 808 struct vm_map_entry *new_entry; 809 810 /* uvm_map_simplify_entry(map, entry); */ /* XXX */ 811 812 uvm_map_check(map, "clip_start entry"); 813 uvm_mapent_check(entry); 814 815 /* 816 * Split off the front portion. note that we must insert the new 817 * entry BEFORE this one, so that this entry has the specified 818 * starting address. 819 */ 820 new_entry = uvm_mapent_alloc_split(map, entry, 0, umr); 821 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 822 uvm_mapent_splitadj(new_entry, entry, start); 823 uvm_map_entry_link(map, entry->prev, new_entry); 824 825 uvm_map_check(map, "clip_start leave"); 826 } 827 828 /* 829 * uvm_map_clip_end: ensure that the entry ends at or before 830 * the ending address, if it does't we split the reference 831 * 832 * => caller should use UVM_MAP_CLIP_END macro rather than calling 833 * this directly 834 * => map must be locked by caller 835 */ 836 837 void 838 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end, 839 struct uvm_mapent_reservation *umr) 840 { 841 struct vm_map_entry *new_entry; 842 843 uvm_map_check(map, "clip_end entry"); 844 uvm_mapent_check(entry); 845 846 /* 847 * Create a new entry and insert it 848 * AFTER the specified entry 849 */ 850 new_entry = uvm_mapent_alloc_split(map, entry, 0, umr); 851 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 852 uvm_mapent_splitadj(entry, new_entry, end); 853 uvm_map_entry_link(map, entry, new_entry); 854 855 uvm_map_check(map, "clip_end leave"); 856 } 857 858 static void 859 vm_map_drain(struct vm_map *map, uvm_flag_t flags) 860 { 861 862 if (!VM_MAP_IS_KERNEL(map)) { 863 return; 864 } 865 866 uvm_km_va_drain(map, flags); 867 } 868 869 /* 870 * M A P - m a i n e n t r y p o i n t 871 */ 872 /* 873 * uvm_map: establish a valid mapping in a map 874 * 875 * => assume startp is page aligned. 876 * => assume size is a multiple of PAGE_SIZE. 877 * => assume sys_mmap provides enough of a "hint" to have us skip 878 * over text/data/bss area. 879 * => map must be unlocked (we will lock it) 880 * => <uobj,uoffset> value meanings (4 cases): 881 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 882 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 883 * [3] <uobj,uoffset> == normal mapping 884 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 885 * 886 * case [4] is for kernel mappings where we don't know the offset until 887 * we've found a virtual address. note that kernel object offsets are 888 * always relative to vm_map_min(kernel_map). 889 * 890 * => if `align' is non-zero, we align the virtual address to the specified 891 * alignment. 892 * this is provided as a mechanism for large pages. 893 * 894 * => XXXCDC: need way to map in external amap? 895 */ 896 897 int 898 uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size, 899 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags) 900 { 901 struct uvm_map_args args; 902 struct vm_map_entry *new_entry; 903 int error; 904 905 KASSERT((flags & UVM_FLAG_QUANTUM) == 0 || VM_MAP_IS_KERNEL(map)); 906 KASSERT((size & PAGE_MASK) == 0); 907 908 /* 909 * for pager_map, allocate the new entry first to avoid sleeping 910 * for memory while we have the map locked. 911 * 912 * besides, because we allocates entries for in-kernel maps 913 * a bit differently (cf. uvm_kmapent_alloc/free), we need to 914 * allocate them before locking the map. 915 */ 916 917 new_entry = NULL; 918 if (VM_MAP_USE_KMAPENT(map) || (flags & UVM_FLAG_QUANTUM) || 919 map == pager_map) { 920 new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT)); 921 if (__predict_false(new_entry == NULL)) 922 return ENOMEM; 923 if (flags & UVM_FLAG_QUANTUM) 924 new_entry->flags |= UVM_MAP_QUANTUM; 925 } 926 if (map == pager_map) 927 flags |= UVM_FLAG_NOMERGE; 928 929 error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align, 930 flags, &args); 931 if (!error) { 932 error = uvm_map_enter(map, &args, new_entry); 933 *startp = args.uma_start; 934 } else if (new_entry) { 935 uvm_mapent_free(new_entry); 936 } 937 938 #if defined(DEBUG) 939 if (!error && VM_MAP_IS_KERNEL(map)) { 940 uvm_km_check_empty(*startp, *startp + size, 941 (map->flags & VM_MAP_INTRSAFE) != 0); 942 } 943 #endif /* defined(DEBUG) */ 944 945 return error; 946 } 947 948 int 949 uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size, 950 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, 951 struct uvm_map_args *args) 952 { 953 struct vm_map_entry *prev_entry; 954 vm_prot_t prot = UVM_PROTECTION(flags); 955 vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 956 957 UVMHIST_FUNC("uvm_map_prepare"); 958 UVMHIST_CALLED(maphist); 959 960 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)", 961 map, start, size, flags); 962 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0); 963 964 /* 965 * detect a popular device driver bug. 966 */ 967 968 KASSERT(doing_shutdown || curlwp != NULL || 969 (map->flags & VM_MAP_INTRSAFE)); 970 971 /* 972 * zero-sized mapping doesn't make any sense. 973 */ 974 KASSERT(size > 0); 975 976 KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0); 977 978 uvm_map_check(map, "map entry"); 979 980 /* 981 * check sanity of protection code 982 */ 983 984 if ((prot & maxprot) != prot) { 985 UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%x, max=0x%x", 986 prot, maxprot,0,0); 987 return EACCES; 988 } 989 990 /* 991 * figure out where to put new VM range 992 */ 993 994 retry: 995 if (vm_map_lock_try(map) == FALSE) { 996 if (flags & UVM_FLAG_TRYLOCK) { 997 return EAGAIN; 998 } 999 vm_map_lock(map); /* could sleep here */ 1000 } 1001 prev_entry = uvm_map_findspace(map, start, size, &start, 1002 uobj, uoffset, align, flags); 1003 if (prev_entry == NULL) { 1004 unsigned int timestamp; 1005 1006 timestamp = map->timestamp; 1007 UVMHIST_LOG(maphist,"waiting va timestamp=0x%x", 1008 timestamp,0,0,0); 1009 simple_lock(&map->flags_lock); 1010 map->flags |= VM_MAP_WANTVA; 1011 simple_unlock(&map->flags_lock); 1012 vm_map_unlock(map); 1013 1014 /* 1015 * try to reclaim kva and wait until someone does unmap. 1016 * XXX fragile locking 1017 */ 1018 1019 vm_map_drain(map, flags); 1020 1021 simple_lock(&map->flags_lock); 1022 while ((map->flags & VM_MAP_WANTVA) != 0 && 1023 map->timestamp == timestamp) { 1024 if ((flags & UVM_FLAG_WAITVA) == 0) { 1025 simple_unlock(&map->flags_lock); 1026 UVMHIST_LOG(maphist, 1027 "<- uvm_map_findspace failed!", 0,0,0,0); 1028 return ENOMEM; 1029 } else { 1030 ltsleep(&map->header, PVM, "vmmapva", 0, 1031 &map->flags_lock); 1032 } 1033 } 1034 simple_unlock(&map->flags_lock); 1035 goto retry; 1036 } 1037 1038 #ifdef PMAP_GROWKERNEL 1039 /* 1040 * If the kernel pmap can't map the requested space, 1041 * then allocate more resources for it. 1042 */ 1043 if (map == kernel_map && uvm_maxkaddr < (start + size)) 1044 uvm_maxkaddr = pmap_growkernel(start + size); 1045 #endif 1046 1047 UVMMAP_EVCNT_INCR(map_call); 1048 1049 /* 1050 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER 1051 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in 1052 * either case we want to zero it before storing it in the map entry 1053 * (because it looks strange and confusing when debugging...) 1054 * 1055 * if uobj is not null 1056 * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping 1057 * and we do not need to change uoffset. 1058 * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset 1059 * now (based on the starting address of the map). this case is 1060 * for kernel object mappings where we don't know the offset until 1061 * the virtual address is found (with uvm_map_findspace). the 1062 * offset is the distance we are from the start of the map. 1063 */ 1064 1065 if (uobj == NULL) { 1066 uoffset = 0; 1067 } else { 1068 if (uoffset == UVM_UNKNOWN_OFFSET) { 1069 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1070 uoffset = start - vm_map_min(kernel_map); 1071 } 1072 } 1073 1074 args->uma_flags = flags; 1075 args->uma_prev = prev_entry; 1076 args->uma_start = start; 1077 args->uma_size = size; 1078 args->uma_uobj = uobj; 1079 args->uma_uoffset = uoffset; 1080 1081 return 0; 1082 } 1083 1084 int 1085 uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args, 1086 struct vm_map_entry *new_entry) 1087 { 1088 struct vm_map_entry *prev_entry = args->uma_prev; 1089 struct vm_map_entry *dead = NULL; 1090 1091 const uvm_flag_t flags = args->uma_flags; 1092 const vm_prot_t prot = UVM_PROTECTION(flags); 1093 const vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1094 const vm_inherit_t inherit = UVM_INHERIT(flags); 1095 const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ? 1096 AMAP_EXTEND_NOWAIT : 0; 1097 const int advice = UVM_ADVICE(flags); 1098 const int meflagval = (flags & UVM_FLAG_QUANTUM) ? 1099 UVM_MAP_QUANTUM : 0; 1100 1101 vaddr_t start = args->uma_start; 1102 vsize_t size = args->uma_size; 1103 struct uvm_object *uobj = args->uma_uobj; 1104 voff_t uoffset = args->uma_uoffset; 1105 1106 const int kmap = (vm_map_pmap(map) == pmap_kernel()); 1107 int merged = 0; 1108 int error; 1109 int newetype; 1110 1111 UVMHIST_FUNC("uvm_map_enter"); 1112 UVMHIST_CALLED(maphist); 1113 1114 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)", 1115 map, start, size, flags); 1116 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0); 1117 1118 KASSERT(map->hint == prev_entry); /* bimerge case assumes this */ 1119 1120 if (flags & UVM_FLAG_QUANTUM) { 1121 KASSERT(new_entry); 1122 KASSERT(new_entry->flags & UVM_MAP_QUANTUM); 1123 } 1124 1125 if (uobj) 1126 newetype = UVM_ET_OBJ; 1127 else 1128 newetype = 0; 1129 1130 if (flags & UVM_FLAG_COPYONW) { 1131 newetype |= UVM_ET_COPYONWRITE; 1132 if ((flags & UVM_FLAG_OVERLAY) == 0) 1133 newetype |= UVM_ET_NEEDSCOPY; 1134 } 1135 1136 /* 1137 * try and insert in map by extending previous entry, if possible. 1138 * XXX: we don't try and pull back the next entry. might be useful 1139 * for a stack, but we are currently allocating our stack in advance. 1140 */ 1141 1142 if (flags & UVM_FLAG_NOMERGE) 1143 goto nomerge; 1144 1145 if (prev_entry->end == start && 1146 prev_entry != &map->header && 1147 UVM_ET_ISCOMPATIBLE(prev_entry, newetype, uobj, meflagval, 1148 prot, maxprot, inherit, advice, 0)) { 1149 1150 if (uobj && prev_entry->offset + 1151 (prev_entry->end - prev_entry->start) != uoffset) 1152 goto forwardmerge; 1153 1154 /* 1155 * can't extend a shared amap. note: no need to lock amap to 1156 * look at refs since we don't care about its exact value. 1157 * if it is one (i.e. we have only reference) it will stay there 1158 */ 1159 1160 if (prev_entry->aref.ar_amap && 1161 amap_refs(prev_entry->aref.ar_amap) != 1) { 1162 goto forwardmerge; 1163 } 1164 1165 if (prev_entry->aref.ar_amap) { 1166 error = amap_extend(prev_entry, size, 1167 amapwaitflag | AMAP_EXTEND_FORWARDS); 1168 if (error) 1169 goto nomerge; 1170 } 1171 1172 if (kmap) 1173 UVMMAP_EVCNT_INCR(kbackmerge); 1174 else 1175 UVMMAP_EVCNT_INCR(ubackmerge); 1176 UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0); 1177 1178 /* 1179 * drop our reference to uobj since we are extending a reference 1180 * that we already have (the ref count can not drop to zero). 1181 */ 1182 1183 if (uobj && uobj->pgops->pgo_detach) 1184 uobj->pgops->pgo_detach(uobj); 1185 1186 prev_entry->end += size; 1187 uvm_rb_fixup(map, prev_entry); 1188 1189 uvm_map_check(map, "map backmerged"); 1190 1191 UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); 1192 merged++; 1193 } 1194 1195 forwardmerge: 1196 if (prev_entry->next->start == (start + size) && 1197 prev_entry->next != &map->header && 1198 UVM_ET_ISCOMPATIBLE(prev_entry->next, newetype, uobj, meflagval, 1199 prot, maxprot, inherit, advice, 0)) { 1200 1201 if (uobj && prev_entry->next->offset != uoffset + size) 1202 goto nomerge; 1203 1204 /* 1205 * can't extend a shared amap. note: no need to lock amap to 1206 * look at refs since we don't care about its exact value. 1207 * if it is one (i.e. we have only reference) it will stay there. 1208 * 1209 * note that we also can't merge two amaps, so if we 1210 * merged with the previous entry which has an amap, 1211 * and the next entry also has an amap, we give up. 1212 * 1213 * Interesting cases: 1214 * amap, new, amap -> give up second merge (single fwd extend) 1215 * amap, new, none -> double forward extend (extend again here) 1216 * none, new, amap -> double backward extend (done here) 1217 * uobj, new, amap -> single backward extend (done here) 1218 * 1219 * XXX should we attempt to deal with someone refilling 1220 * the deallocated region between two entries that are 1221 * backed by the same amap (ie, arefs is 2, "prev" and 1222 * "next" refer to it, and adding this allocation will 1223 * close the hole, thus restoring arefs to 1 and 1224 * deallocating the "next" vm_map_entry)? -- @@@ 1225 */ 1226 1227 if (prev_entry->next->aref.ar_amap && 1228 (amap_refs(prev_entry->next->aref.ar_amap) != 1 || 1229 (merged && prev_entry->aref.ar_amap))) { 1230 goto nomerge; 1231 } 1232 1233 if (merged) { 1234 /* 1235 * Try to extend the amap of the previous entry to 1236 * cover the next entry as well. If it doesn't work 1237 * just skip on, don't actually give up, since we've 1238 * already completed the back merge. 1239 */ 1240 if (prev_entry->aref.ar_amap) { 1241 if (amap_extend(prev_entry, 1242 prev_entry->next->end - 1243 prev_entry->next->start, 1244 amapwaitflag | AMAP_EXTEND_FORWARDS)) 1245 goto nomerge; 1246 } 1247 1248 /* 1249 * Try to extend the amap of the *next* entry 1250 * back to cover the new allocation *and* the 1251 * previous entry as well (the previous merge 1252 * didn't have an amap already otherwise we 1253 * wouldn't be checking here for an amap). If 1254 * it doesn't work just skip on, again, don't 1255 * actually give up, since we've already 1256 * completed the back merge. 1257 */ 1258 else if (prev_entry->next->aref.ar_amap) { 1259 if (amap_extend(prev_entry->next, 1260 prev_entry->end - 1261 prev_entry->start, 1262 amapwaitflag | AMAP_EXTEND_BACKWARDS)) 1263 goto nomerge; 1264 } 1265 } else { 1266 /* 1267 * Pull the next entry's amap backwards to cover this 1268 * new allocation. 1269 */ 1270 if (prev_entry->next->aref.ar_amap) { 1271 error = amap_extend(prev_entry->next, size, 1272 amapwaitflag | AMAP_EXTEND_BACKWARDS); 1273 if (error) 1274 goto nomerge; 1275 } 1276 } 1277 1278 if (merged) { 1279 if (kmap) { 1280 UVMMAP_EVCNT_DECR(kbackmerge); 1281 UVMMAP_EVCNT_INCR(kbimerge); 1282 } else { 1283 UVMMAP_EVCNT_DECR(ubackmerge); 1284 UVMMAP_EVCNT_INCR(ubimerge); 1285 } 1286 } else { 1287 if (kmap) 1288 UVMMAP_EVCNT_INCR(kforwmerge); 1289 else 1290 UVMMAP_EVCNT_INCR(uforwmerge); 1291 } 1292 UVMHIST_LOG(maphist," starting forward merge", 0, 0, 0, 0); 1293 1294 /* 1295 * drop our reference to uobj since we are extending a reference 1296 * that we already have (the ref count can not drop to zero). 1297 * (if merged, we've already detached) 1298 */ 1299 if (uobj && uobj->pgops->pgo_detach && !merged) 1300 uobj->pgops->pgo_detach(uobj); 1301 1302 if (merged) { 1303 dead = prev_entry->next; 1304 prev_entry->end = dead->end; 1305 uvm_map_entry_unlink(map, dead); 1306 if (dead->aref.ar_amap != NULL) { 1307 prev_entry->aref = dead->aref; 1308 dead->aref.ar_amap = NULL; 1309 } 1310 } else { 1311 prev_entry->next->start -= size; 1312 if (prev_entry != &map->header) 1313 uvm_rb_fixup(map, prev_entry); 1314 if (uobj) 1315 prev_entry->next->offset = uoffset; 1316 } 1317 1318 uvm_map_check(map, "map forwardmerged"); 1319 1320 UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0); 1321 merged++; 1322 } 1323 1324 nomerge: 1325 if (!merged) { 1326 UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0); 1327 if (kmap) 1328 UVMMAP_EVCNT_INCR(knomerge); 1329 else 1330 UVMMAP_EVCNT_INCR(unomerge); 1331 1332 /* 1333 * allocate new entry and link it in. 1334 */ 1335 1336 if (new_entry == NULL) { 1337 new_entry = uvm_mapent_alloc(map, 1338 (flags & UVM_FLAG_NOWAIT)); 1339 if (__predict_false(new_entry == NULL)) { 1340 error = ENOMEM; 1341 goto done; 1342 } 1343 } 1344 new_entry->start = start; 1345 new_entry->end = new_entry->start + size; 1346 new_entry->object.uvm_obj = uobj; 1347 new_entry->offset = uoffset; 1348 1349 new_entry->etype = newetype; 1350 1351 if (flags & UVM_FLAG_NOMERGE) { 1352 new_entry->flags |= UVM_MAP_NOMERGE; 1353 } 1354 1355 new_entry->protection = prot; 1356 new_entry->max_protection = maxprot; 1357 new_entry->inheritance = inherit; 1358 new_entry->wired_count = 0; 1359 new_entry->advice = advice; 1360 if (flags & UVM_FLAG_OVERLAY) { 1361 1362 /* 1363 * to_add: for BSS we overallocate a little since we 1364 * are likely to extend 1365 */ 1366 1367 vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? 1368 UVM_AMAP_CHUNK << PAGE_SHIFT : 0; 1369 struct vm_amap *amap = amap_alloc(size, to_add, 1370 (flags & UVM_FLAG_NOWAIT)); 1371 if (__predict_false(amap == NULL)) { 1372 error = ENOMEM; 1373 goto done; 1374 } 1375 new_entry->aref.ar_pageoff = 0; 1376 new_entry->aref.ar_amap = amap; 1377 } else { 1378 new_entry->aref.ar_pageoff = 0; 1379 new_entry->aref.ar_amap = NULL; 1380 } 1381 uvm_map_entry_link(map, prev_entry, new_entry); 1382 1383 /* 1384 * Update the free space hint 1385 */ 1386 1387 if ((map->first_free == prev_entry) && 1388 (prev_entry->end >= new_entry->start)) 1389 map->first_free = new_entry; 1390 1391 new_entry = NULL; 1392 } 1393 1394 map->size += size; 1395 1396 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1397 1398 error = 0; 1399 done: 1400 vm_map_unlock(map); 1401 if (new_entry) { 1402 if (error == 0) { 1403 KDASSERT(merged); 1404 uvm_mapent_free_merged(map, new_entry); 1405 } else { 1406 uvm_mapent_free(new_entry); 1407 } 1408 } 1409 if (dead) { 1410 KDASSERT(merged); 1411 uvm_mapent_free_merged(map, dead); 1412 } 1413 return error; 1414 } 1415 1416 /* 1417 * uvm_map_lookup_entry: find map entry at or before an address 1418 * 1419 * => map must at least be read-locked by caller 1420 * => entry is returned in "entry" 1421 * => return value is true if address is in the returned entry 1422 */ 1423 1424 boolean_t 1425 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1426 struct vm_map_entry **entry /* OUT */) 1427 { 1428 struct vm_map_entry *cur; 1429 boolean_t use_tree = FALSE; 1430 UVMHIST_FUNC("uvm_map_lookup_entry"); 1431 UVMHIST_CALLED(maphist); 1432 1433 UVMHIST_LOG(maphist,"(map=0x%x,addr=0x%x,ent=0x%x)", 1434 map, address, entry, 0); 1435 1436 /* 1437 * start looking either from the head of the 1438 * list, or from the hint. 1439 */ 1440 1441 simple_lock(&map->hint_lock); 1442 cur = map->hint; 1443 simple_unlock(&map->hint_lock); 1444 1445 if (cur == &map->header) 1446 cur = cur->next; 1447 1448 UVMMAP_EVCNT_INCR(mlk_call); 1449 if (address >= cur->start) { 1450 1451 /* 1452 * go from hint to end of list. 1453 * 1454 * but first, make a quick check to see if 1455 * we are already looking at the entry we 1456 * want (which is usually the case). 1457 * note also that we don't need to save the hint 1458 * here... it is the same hint (unless we are 1459 * at the header, in which case the hint didn't 1460 * buy us anything anyway). 1461 */ 1462 1463 if (cur != &map->header && cur->end > address) { 1464 UVMMAP_EVCNT_INCR(mlk_hint); 1465 *entry = cur; 1466 UVMHIST_LOG(maphist,"<- got it via hint (0x%x)", 1467 cur, 0, 0, 0); 1468 uvm_mapent_check(*entry); 1469 return (TRUE); 1470 } 1471 1472 if (map->nentries > 30) 1473 use_tree = TRUE; 1474 } else { 1475 1476 /* 1477 * invalid hint. use tree. 1478 */ 1479 use_tree = TRUE; 1480 } 1481 1482 uvm_map_check(map, __func__); 1483 1484 if (use_tree) { 1485 struct vm_map_entry *prev = &map->header; 1486 cur = RB_ROOT(&map->rbhead); 1487 1488 /* 1489 * Simple lookup in the tree. Happens when the hint is 1490 * invalid, or nentries reach a threshold. 1491 */ 1492 while (cur) { 1493 if (address >= cur->start) { 1494 if (address < cur->end) { 1495 *entry = cur; 1496 goto got; 1497 } 1498 prev = cur; 1499 cur = RB_RIGHT(cur, rb_entry); 1500 } else 1501 cur = RB_LEFT(cur, rb_entry); 1502 } 1503 *entry = prev; 1504 goto failed; 1505 } 1506 1507 /* 1508 * search linearly 1509 */ 1510 1511 while (cur != &map->header) { 1512 if (cur->end > address) { 1513 if (address >= cur->start) { 1514 /* 1515 * save this lookup for future 1516 * hints, and return 1517 */ 1518 1519 *entry = cur; 1520 got: 1521 SAVE_HINT(map, map->hint, *entry); 1522 UVMHIST_LOG(maphist,"<- search got it (0x%x)", 1523 cur, 0, 0, 0); 1524 KDASSERT((*entry)->start <= address); 1525 KDASSERT(address < (*entry)->end); 1526 uvm_mapent_check(*entry); 1527 return (TRUE); 1528 } 1529 break; 1530 } 1531 cur = cur->next; 1532 } 1533 *entry = cur->prev; 1534 failed: 1535 SAVE_HINT(map, map->hint, *entry); 1536 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); 1537 KDASSERT((*entry) == &map->header || (*entry)->end <= address); 1538 KDASSERT((*entry)->next == &map->header || 1539 address < (*entry)->next->start); 1540 return (FALSE); 1541 } 1542 1543 /* 1544 * See if the range between start and start + length fits in the gap 1545 * entry->next->start and entry->end. Returns 1 if fits, 0 if doesn't 1546 * fit, and -1 address wraps around. 1547 */ 1548 static int 1549 uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset, 1550 vsize_t align, int topdown, struct vm_map_entry *entry) 1551 { 1552 vaddr_t end; 1553 1554 #ifdef PMAP_PREFER 1555 /* 1556 * push start address forward as needed to avoid VAC alias problems. 1557 * we only do this if a valid offset is specified. 1558 */ 1559 1560 if (uoffset != UVM_UNKNOWN_OFFSET) 1561 PMAP_PREFER(uoffset, start, length, topdown); 1562 #endif 1563 if (align != 0) { 1564 if ((*start & (align - 1)) != 0) { 1565 if (topdown) 1566 *start &= ~(align - 1); 1567 else 1568 *start = roundup(*start, align); 1569 } 1570 /* 1571 * XXX Should we PMAP_PREFER() here again? 1572 * eh...i think we're okay 1573 */ 1574 } 1575 1576 /* 1577 * Find the end of the proposed new region. Be sure we didn't 1578 * wrap around the address; if so, we lose. Otherwise, if the 1579 * proposed new region fits before the next entry, we win. 1580 */ 1581 1582 end = *start + length; 1583 if (end < *start) 1584 return (-1); 1585 1586 if (entry->next->start >= end && *start >= entry->end) 1587 return (1); 1588 1589 return (0); 1590 } 1591 1592 /* 1593 * uvm_map_findspace: find "length" sized space in "map". 1594 * 1595 * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is 1596 * set in "flags" (in which case we insist on using "hint"). 1597 * => "result" is VA returned 1598 * => uobj/uoffset are to be used to handle VAC alignment, if required 1599 * => if "align" is non-zero, we attempt to align to that value. 1600 * => caller must at least have read-locked map 1601 * => returns NULL on failure, or pointer to prev. map entry if success 1602 * => note this is a cross between the old vm_map_findspace and vm_map_find 1603 */ 1604 1605 struct vm_map_entry * 1606 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, 1607 vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset, 1608 vsize_t align, int flags) 1609 { 1610 struct vm_map_entry *entry; 1611 struct vm_map_entry *child, *prev, *tmp; 1612 vaddr_t orig_hint; 1613 const int topdown = map->flags & VM_MAP_TOPDOWN; 1614 UVMHIST_FUNC("uvm_map_findspace"); 1615 UVMHIST_CALLED(maphist); 1616 1617 UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)", 1618 map, hint, length, flags); 1619 KASSERT((align & (align - 1)) == 0); 1620 KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); 1621 1622 uvm_map_check(map, "map_findspace entry"); 1623 1624 /* 1625 * remember the original hint. if we are aligning, then we 1626 * may have to try again with no alignment constraint if 1627 * we fail the first time. 1628 */ 1629 1630 orig_hint = hint; 1631 if (hint < vm_map_min(map)) { /* check ranges ... */ 1632 if (flags & UVM_FLAG_FIXED) { 1633 UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0); 1634 return (NULL); 1635 } 1636 hint = vm_map_min(map); 1637 } 1638 if (hint > vm_map_max(map)) { 1639 UVMHIST_LOG(maphist,"<- VA 0x%x > range [0x%x->0x%x]", 1640 hint, vm_map_min(map), vm_map_max(map), 0); 1641 return (NULL); 1642 } 1643 1644 /* 1645 * Look for the first possible address; if there's already 1646 * something at this address, we have to start after it. 1647 */ 1648 1649 /* 1650 * @@@: there are four, no, eight cases to consider. 1651 * 1652 * 0: found, fixed, bottom up -> fail 1653 * 1: found, fixed, top down -> fail 1654 * 2: found, not fixed, bottom up -> start after entry->end, 1655 * loop up 1656 * 3: found, not fixed, top down -> start before entry->start, 1657 * loop down 1658 * 4: not found, fixed, bottom up -> check entry->next->start, fail 1659 * 5: not found, fixed, top down -> check entry->next->start, fail 1660 * 6: not found, not fixed, bottom up -> check entry->next->start, 1661 * loop up 1662 * 7: not found, not fixed, top down -> check entry->next->start, 1663 * loop down 1664 * 1665 * as you can see, it reduces to roughly five cases, and that 1666 * adding top down mapping only adds one unique case (without 1667 * it, there would be four cases). 1668 */ 1669 1670 if ((flags & UVM_FLAG_FIXED) == 0 && hint == vm_map_min(map)) { 1671 entry = map->first_free; 1672 } else { 1673 if (uvm_map_lookup_entry(map, hint, &entry)) { 1674 /* "hint" address already in use ... */ 1675 if (flags & UVM_FLAG_FIXED) { 1676 UVMHIST_LOG(maphist, "<- fixed & VA in use", 1677 0, 0, 0, 0); 1678 return (NULL); 1679 } 1680 if (topdown) 1681 /* Start from lower gap. */ 1682 entry = entry->prev; 1683 } else if (flags & UVM_FLAG_FIXED) { 1684 if (entry->next->start >= hint + length && 1685 hint + length > hint) 1686 goto found; 1687 1688 /* "hint" address is gap but too small */ 1689 UVMHIST_LOG(maphist, "<- fixed mapping failed", 1690 0, 0, 0, 0); 1691 return (NULL); /* only one shot at it ... */ 1692 } else { 1693 /* 1694 * See if given hint fits in this gap. 1695 */ 1696 switch (uvm_map_space_avail(&hint, length, 1697 uoffset, align, topdown, entry)) { 1698 case 1: 1699 goto found; 1700 case -1: 1701 goto wraparound; 1702 } 1703 1704 if (topdown) { 1705 /* 1706 * Still there is a chance to fit 1707 * if hint > entry->end. 1708 */ 1709 } else { 1710 /* Start from higher gap. */ 1711 entry = entry->next; 1712 if (entry == &map->header) 1713 goto notfound; 1714 goto nextgap; 1715 } 1716 } 1717 } 1718 1719 /* 1720 * Note that all UVM_FLAGS_FIXED case is already handled. 1721 */ 1722 KDASSERT((flags & UVM_FLAG_FIXED) == 0); 1723 1724 /* Try to find the space in the red-black tree */ 1725 1726 /* Check slot before any entry */ 1727 hint = topdown ? entry->next->start - length : entry->end; 1728 switch (uvm_map_space_avail(&hint, length, uoffset, align, 1729 topdown, entry)) { 1730 case 1: 1731 goto found; 1732 case -1: 1733 goto wraparound; 1734 } 1735 1736 nextgap: 1737 KDASSERT((flags & UVM_FLAG_FIXED) == 0); 1738 /* If there is not enough space in the whole tree, we fail */ 1739 tmp = RB_ROOT(&map->rbhead); 1740 if (tmp == NULL || tmp->space < length) 1741 goto notfound; 1742 1743 prev = NULL; /* previous candidate */ 1744 1745 /* Find an entry close to hint that has enough space */ 1746 for (; tmp;) { 1747 KASSERT(tmp->next->start == tmp->end + tmp->ownspace); 1748 if (topdown) { 1749 if (tmp->next->start < hint + length && 1750 (prev == NULL || tmp->end > prev->end)) { 1751 if (tmp->ownspace >= length) 1752 prev = tmp; 1753 else if ((child = RB_LEFT(tmp, rb_entry)) 1754 != NULL && child->space >= length) 1755 prev = tmp; 1756 } 1757 } else { 1758 if (tmp->end >= hint && 1759 (prev == NULL || tmp->end < prev->end)) { 1760 if (tmp->ownspace >= length) 1761 prev = tmp; 1762 else if ((child = RB_RIGHT(tmp, rb_entry)) 1763 != NULL && child->space >= length) 1764 prev = tmp; 1765 } 1766 } 1767 if (tmp->next->start < hint + length) 1768 child = RB_RIGHT(tmp, rb_entry); 1769 else if (tmp->end > hint) 1770 child = RB_LEFT(tmp, rb_entry); 1771 else { 1772 if (tmp->ownspace >= length) 1773 break; 1774 if (topdown) 1775 child = RB_LEFT(tmp, rb_entry); 1776 else 1777 child = RB_RIGHT(tmp, rb_entry); 1778 } 1779 if (child == NULL || child->space < length) 1780 break; 1781 tmp = child; 1782 } 1783 1784 if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) { 1785 /* 1786 * Check if the entry that we found satifies the 1787 * space requirement 1788 */ 1789 if (topdown) { 1790 if (hint > tmp->next->start - length) 1791 hint = tmp->next->start - length; 1792 } else { 1793 if (hint < tmp->end) 1794 hint = tmp->end; 1795 } 1796 switch (uvm_map_space_avail(&hint, length, uoffset, align, 1797 topdown, tmp)) { 1798 case 1: 1799 entry = tmp; 1800 goto found; 1801 case -1: 1802 goto wraparound; 1803 } 1804 if (tmp->ownspace >= length) 1805 goto listsearch; 1806 } 1807 if (prev == NULL) 1808 goto notfound; 1809 1810 if (topdown) { 1811 KASSERT(orig_hint >= prev->next->start - length || 1812 prev->next->start - length > prev->next->start); 1813 hint = prev->next->start - length; 1814 } else { 1815 KASSERT(orig_hint <= prev->end); 1816 hint = prev->end; 1817 } 1818 switch (uvm_map_space_avail(&hint, length, uoffset, align, 1819 topdown, prev)) { 1820 case 1: 1821 entry = prev; 1822 goto found; 1823 case -1: 1824 goto wraparound; 1825 } 1826 if (prev->ownspace >= length) 1827 goto listsearch; 1828 1829 if (topdown) 1830 tmp = RB_LEFT(prev, rb_entry); 1831 else 1832 tmp = RB_RIGHT(prev, rb_entry); 1833 for (;;) { 1834 KASSERT(tmp && tmp->space >= length); 1835 if (topdown) 1836 child = RB_RIGHT(tmp, rb_entry); 1837 else 1838 child = RB_LEFT(tmp, rb_entry); 1839 if (child && child->space >= length) { 1840 tmp = child; 1841 continue; 1842 } 1843 if (tmp->ownspace >= length) 1844 break; 1845 if (topdown) 1846 tmp = RB_LEFT(tmp, rb_entry); 1847 else 1848 tmp = RB_RIGHT(tmp, rb_entry); 1849 } 1850 1851 if (topdown) { 1852 KASSERT(orig_hint >= tmp->next->start - length || 1853 tmp->next->start - length > tmp->next->start); 1854 hint = tmp->next->start - length; 1855 } else { 1856 KASSERT(orig_hint <= tmp->end); 1857 hint = tmp->end; 1858 } 1859 switch (uvm_map_space_avail(&hint, length, uoffset, align, 1860 topdown, tmp)) { 1861 case 1: 1862 entry = tmp; 1863 goto found; 1864 case -1: 1865 goto wraparound; 1866 } 1867 1868 /* 1869 * The tree fails to find an entry because of offset or alignment 1870 * restrictions. Search the list instead. 1871 */ 1872 listsearch: 1873 /* 1874 * Look through the rest of the map, trying to fit a new region in 1875 * the gap between existing regions, or after the very last region. 1876 * note: entry->end = base VA of current gap, 1877 * entry->next->start = VA of end of current gap 1878 */ 1879 1880 for (;;) { 1881 /* Update hint for current gap. */ 1882 hint = topdown ? entry->next->start - length : entry->end; 1883 1884 /* See if it fits. */ 1885 switch (uvm_map_space_avail(&hint, length, uoffset, align, 1886 topdown, entry)) { 1887 case 1: 1888 goto found; 1889 case -1: 1890 goto wraparound; 1891 } 1892 1893 /* Advance to next/previous gap */ 1894 if (topdown) { 1895 if (entry == &map->header) { 1896 UVMHIST_LOG(maphist, "<- failed (off start)", 1897 0,0,0,0); 1898 goto notfound; 1899 } 1900 entry = entry->prev; 1901 } else { 1902 entry = entry->next; 1903 if (entry == &map->header) { 1904 UVMHIST_LOG(maphist, "<- failed (off end)", 1905 0,0,0,0); 1906 goto notfound; 1907 } 1908 } 1909 } 1910 1911 found: 1912 SAVE_HINT(map, map->hint, entry); 1913 *result = hint; 1914 UVMHIST_LOG(maphist,"<- got it! (result=0x%x)", hint, 0,0,0); 1915 KASSERT( topdown || hint >= orig_hint); 1916 KASSERT(!topdown || hint <= orig_hint); 1917 KASSERT(entry->end <= hint); 1918 KASSERT(hint + length <= entry->next->start); 1919 return (entry); 1920 1921 wraparound: 1922 UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0); 1923 1924 return (NULL); 1925 1926 notfound: 1927 UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0); 1928 1929 return (NULL); 1930 } 1931 1932 /* 1933 * U N M A P - m a i n h e l p e r f u n c t i o n s 1934 */ 1935 1936 /* 1937 * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") 1938 * 1939 * => caller must check alignment and size 1940 * => map must be locked by caller 1941 * => we return a list of map entries that we've remove from the map 1942 * in "entry_list" 1943 */ 1944 1945 void 1946 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 1947 struct vm_map_entry **entry_list /* OUT */, 1948 struct uvm_mapent_reservation *umr, int flags) 1949 { 1950 struct vm_map_entry *entry, *first_entry, *next; 1951 vaddr_t len; 1952 UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist); 1953 1954 UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)", 1955 map, start, end, 0); 1956 VM_MAP_RANGE_CHECK(map, start, end); 1957 1958 uvm_map_check(map, "unmap_remove entry"); 1959 1960 /* 1961 * find first entry 1962 */ 1963 1964 if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) { 1965 /* clip and go... */ 1966 entry = first_entry; 1967 UVM_MAP_CLIP_START(map, entry, start, umr); 1968 /* critical! prevents stale hint */ 1969 SAVE_HINT(map, entry, entry->prev); 1970 } else { 1971 entry = first_entry->next; 1972 } 1973 1974 /* 1975 * Save the free space hint 1976 */ 1977 1978 if (map->first_free != &map->header && map->first_free->start >= start) 1979 map->first_free = entry->prev; 1980 1981 /* 1982 * note: we now re-use first_entry for a different task. we remove 1983 * a number of map entries from the map and save them in a linked 1984 * list headed by "first_entry". once we remove them from the map 1985 * the caller should unlock the map and drop the references to the 1986 * backing objects [c.f. uvm_unmap_detach]. the object is to 1987 * separate unmapping from reference dropping. why? 1988 * [1] the map has to be locked for unmapping 1989 * [2] the map need not be locked for reference dropping 1990 * [3] dropping references may trigger pager I/O, and if we hit 1991 * a pager that does synchronous I/O we may have to wait for it. 1992 * [4] we would like all waiting for I/O to occur with maps unlocked 1993 * so that we don't block other threads. 1994 */ 1995 1996 first_entry = NULL; 1997 *entry_list = NULL; 1998 1999 /* 2000 * break up the area into map entry sized regions and unmap. note 2001 * that all mappings have to be removed before we can even consider 2002 * dropping references to amaps or VM objects (otherwise we could end 2003 * up with a mapping to a page on the free list which would be very bad) 2004 */ 2005 2006 while ((entry != &map->header) && (entry->start < end)) { 2007 KASSERT((entry->flags & UVM_MAP_FIRST) == 0); 2008 2009 UVM_MAP_CLIP_END(map, entry, end, umr); 2010 next = entry->next; 2011 len = entry->end - entry->start; 2012 2013 /* 2014 * unwire before removing addresses from the pmap; otherwise 2015 * unwiring will put the entries back into the pmap (XXX). 2016 */ 2017 2018 if (VM_MAPENT_ISWIRED(entry)) { 2019 uvm_map_entry_unwire(map, entry); 2020 } 2021 if (flags & UVM_FLAG_VAONLY) { 2022 2023 /* nothing */ 2024 2025 } else if ((map->flags & VM_MAP_PAGEABLE) == 0) { 2026 2027 /* 2028 * if the map is non-pageable, any pages mapped there 2029 * must be wired and entered with pmap_kenter_pa(), 2030 * and we should free any such pages immediately. 2031 * this is mostly used for kmem_map and mb_map. 2032 */ 2033 2034 if ((entry->flags & UVM_MAP_KMAPENT) == 0) { 2035 uvm_km_pgremove_intrsafe(entry->start, 2036 entry->end); 2037 pmap_kremove(entry->start, len); 2038 } 2039 } else if (UVM_ET_ISOBJ(entry) && 2040 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2041 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2042 2043 /* 2044 * note: kernel object mappings are currently used in 2045 * two ways: 2046 * [1] "normal" mappings of pages in the kernel object 2047 * [2] uvm_km_valloc'd allocations in which we 2048 * pmap_enter in some non-kernel-object page 2049 * (e.g. vmapbuf). 2050 * 2051 * for case [1], we need to remove the mapping from 2052 * the pmap and then remove the page from the kernel 2053 * object (because, once pages in a kernel object are 2054 * unmapped they are no longer needed, unlike, say, 2055 * a vnode where you might want the data to persist 2056 * until flushed out of a queue). 2057 * 2058 * for case [2], we need to remove the mapping from 2059 * the pmap. there shouldn't be any pages at the 2060 * specified offset in the kernel object [but it 2061 * doesn't hurt to call uvm_km_pgremove just to be 2062 * safe?] 2063 * 2064 * uvm_km_pgremove currently does the following: 2065 * for pages in the kernel object in range: 2066 * - drops the swap slot 2067 * - uvm_pagefree the page 2068 */ 2069 2070 /* 2071 * remove mappings from pmap and drop the pages 2072 * from the object. offsets are always relative 2073 * to vm_map_min(kernel_map). 2074 */ 2075 2076 pmap_remove(pmap_kernel(), entry->start, 2077 entry->start + len); 2078 uvm_km_pgremove(entry->start, entry->end); 2079 2080 /* 2081 * null out kernel_object reference, we've just 2082 * dropped it 2083 */ 2084 2085 entry->etype &= ~UVM_ET_OBJ; 2086 entry->object.uvm_obj = NULL; 2087 } else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) { 2088 2089 /* 2090 * remove mappings the standard way. 2091 */ 2092 2093 pmap_remove(map->pmap, entry->start, entry->end); 2094 } 2095 2096 #if defined(DEBUG) 2097 if ((entry->flags & UVM_MAP_KMAPENT) == 0) { 2098 2099 /* 2100 * check if there's remaining mapping, 2101 * which is a bug in caller. 2102 */ 2103 2104 vaddr_t va; 2105 for (va = entry->start; va < entry->end; 2106 va += PAGE_SIZE) { 2107 if (pmap_extract(vm_map_pmap(map), va, NULL)) { 2108 panic("uvm_unmap_remove: has mapping"); 2109 } 2110 } 2111 2112 if (VM_MAP_IS_KERNEL(map)) { 2113 uvm_km_check_empty(entry->start, entry->end, 2114 (map->flags & VM_MAP_INTRSAFE) != 0); 2115 } 2116 } 2117 #endif /* defined(DEBUG) */ 2118 2119 /* 2120 * remove entry from map and put it on our list of entries 2121 * that we've nuked. then go to next entry. 2122 */ 2123 2124 UVMHIST_LOG(maphist, " removed map entry 0x%x", entry, 0, 0,0); 2125 2126 /* critical! prevents stale hint */ 2127 SAVE_HINT(map, entry, entry->prev); 2128 2129 uvm_map_entry_unlink(map, entry); 2130 KASSERT(map->size >= len); 2131 map->size -= len; 2132 entry->prev = NULL; 2133 entry->next = first_entry; 2134 first_entry = entry; 2135 entry = next; 2136 } 2137 if ((map->flags & VM_MAP_DYING) == 0) { 2138 pmap_update(vm_map_pmap(map)); 2139 } 2140 2141 uvm_map_check(map, "unmap_remove leave"); 2142 2143 /* 2144 * now we've cleaned up the map and are ready for the caller to drop 2145 * references to the mapped objects. 2146 */ 2147 2148 *entry_list = first_entry; 2149 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 2150 2151 simple_lock(&map->flags_lock); 2152 if (map->flags & VM_MAP_WANTVA) { 2153 map->flags &= ~VM_MAP_WANTVA; 2154 wakeup(&map->header); 2155 } 2156 simple_unlock(&map->flags_lock); 2157 } 2158 2159 /* 2160 * uvm_unmap_detach: drop references in a chain of map entries 2161 * 2162 * => we will free the map entries as we traverse the list. 2163 */ 2164 2165 void 2166 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) 2167 { 2168 struct vm_map_entry *next_entry; 2169 UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist); 2170 2171 while (first_entry) { 2172 KASSERT(!VM_MAPENT_ISWIRED(first_entry)); 2173 UVMHIST_LOG(maphist, 2174 " detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d", 2175 first_entry, first_entry->aref.ar_amap, 2176 first_entry->object.uvm_obj, 2177 UVM_ET_ISSUBMAP(first_entry)); 2178 2179 /* 2180 * drop reference to amap, if we've got one 2181 */ 2182 2183 if (first_entry->aref.ar_amap) 2184 uvm_map_unreference_amap(first_entry, flags); 2185 2186 /* 2187 * drop reference to our backing object, if we've got one 2188 */ 2189 2190 KASSERT(!UVM_ET_ISSUBMAP(first_entry)); 2191 if (UVM_ET_ISOBJ(first_entry) && 2192 first_entry->object.uvm_obj->pgops->pgo_detach) { 2193 (*first_entry->object.uvm_obj->pgops->pgo_detach) 2194 (first_entry->object.uvm_obj); 2195 } 2196 next_entry = first_entry->next; 2197 uvm_mapent_free(first_entry); 2198 first_entry = next_entry; 2199 } 2200 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 2201 } 2202 2203 /* 2204 * E X T R A C T I O N F U N C T I O N S 2205 */ 2206 2207 /* 2208 * uvm_map_reserve: reserve space in a vm_map for future use. 2209 * 2210 * => we reserve space in a map by putting a dummy map entry in the 2211 * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) 2212 * => map should be unlocked (we will write lock it) 2213 * => we return true if we were able to reserve space 2214 * => XXXCDC: should be inline? 2215 */ 2216 2217 int 2218 uvm_map_reserve(struct vm_map *map, vsize_t size, 2219 vaddr_t offset /* hint for pmap_prefer */, 2220 vsize_t align /* alignment hint */, 2221 vaddr_t *raddr /* IN:hint, OUT: reserved VA */, 2222 uvm_flag_t flags /* UVM_FLAG_FIXED or 0 */) 2223 { 2224 UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); 2225 2226 UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)", 2227 map,size,offset,raddr); 2228 2229 size = round_page(size); 2230 2231 /* 2232 * reserve some virtual space. 2233 */ 2234 2235 if (uvm_map(map, raddr, size, NULL, offset, 0, 2236 UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, 2237 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE|flags)) != 0) { 2238 UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); 2239 return (FALSE); 2240 } 2241 2242 UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0); 2243 return (TRUE); 2244 } 2245 2246 /* 2247 * uvm_map_replace: replace a reserved (blank) area of memory with 2248 * real mappings. 2249 * 2250 * => caller must WRITE-LOCK the map 2251 * => we return TRUE if replacement was a success 2252 * => we expect the newents chain to have nnewents entrys on it and 2253 * we expect newents->prev to point to the last entry on the list 2254 * => note newents is allowed to be NULL 2255 */ 2256 2257 int 2258 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, 2259 struct vm_map_entry *newents, int nnewents) 2260 { 2261 struct vm_map_entry *oldent, *last; 2262 2263 uvm_map_check(map, "map_replace entry"); 2264 2265 /* 2266 * first find the blank map entry at the specified address 2267 */ 2268 2269 if (!uvm_map_lookup_entry(map, start, &oldent)) { 2270 return (FALSE); 2271 } 2272 2273 /* 2274 * check to make sure we have a proper blank entry 2275 */ 2276 2277 if (end < oldent->end && !VM_MAP_USE_KMAPENT(map)) { 2278 UVM_MAP_CLIP_END(map, oldent, end, NULL); 2279 } 2280 if (oldent->start != start || oldent->end != end || 2281 oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { 2282 return (FALSE); 2283 } 2284 2285 #ifdef DIAGNOSTIC 2286 2287 /* 2288 * sanity check the newents chain 2289 */ 2290 2291 { 2292 struct vm_map_entry *tmpent = newents; 2293 int nent = 0; 2294 vaddr_t cur = start; 2295 2296 while (tmpent) { 2297 nent++; 2298 if (tmpent->start < cur) 2299 panic("uvm_map_replace1"); 2300 if (tmpent->start > tmpent->end || tmpent->end > end) { 2301 printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n", 2302 tmpent->start, tmpent->end, end); 2303 panic("uvm_map_replace2"); 2304 } 2305 cur = tmpent->end; 2306 if (tmpent->next) { 2307 if (tmpent->next->prev != tmpent) 2308 panic("uvm_map_replace3"); 2309 } else { 2310 if (newents->prev != tmpent) 2311 panic("uvm_map_replace4"); 2312 } 2313 tmpent = tmpent->next; 2314 } 2315 if (nent != nnewents) 2316 panic("uvm_map_replace5"); 2317 } 2318 #endif 2319 2320 /* 2321 * map entry is a valid blank! replace it. (this does all the 2322 * work of map entry link/unlink...). 2323 */ 2324 2325 if (newents) { 2326 last = newents->prev; 2327 2328 /* critical: flush stale hints out of map */ 2329 SAVE_HINT(map, map->hint, newents); 2330 if (map->first_free == oldent) 2331 map->first_free = last; 2332 2333 last->next = oldent->next; 2334 last->next->prev = last; 2335 2336 /* Fix RB tree */ 2337 uvm_rb_remove(map, oldent); 2338 2339 newents->prev = oldent->prev; 2340 newents->prev->next = newents; 2341 map->nentries = map->nentries + (nnewents - 1); 2342 2343 /* Fixup the RB tree */ 2344 { 2345 int i; 2346 struct vm_map_entry *tmp; 2347 2348 tmp = newents; 2349 for (i = 0; i < nnewents && tmp; i++) { 2350 uvm_rb_insert(map, tmp); 2351 tmp = tmp->next; 2352 } 2353 } 2354 } else { 2355 /* NULL list of new entries: just remove the old one */ 2356 clear_hints(map, oldent); 2357 uvm_map_entry_unlink(map, oldent); 2358 } 2359 2360 uvm_map_check(map, "map_replace leave"); 2361 2362 /* 2363 * now we can free the old blank entry and return. 2364 */ 2365 2366 uvm_mapent_free(oldent); 2367 return (TRUE); 2368 } 2369 2370 /* 2371 * uvm_map_extract: extract a mapping from a map and put it somewhere 2372 * (maybe removing the old mapping) 2373 * 2374 * => maps should be unlocked (we will write lock them) 2375 * => returns 0 on success, error code otherwise 2376 * => start must be page aligned 2377 * => len must be page sized 2378 * => flags: 2379 * UVM_EXTRACT_REMOVE: remove mappings from srcmap 2380 * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) 2381 * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs 2382 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 2383 * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< 2384 * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only 2385 * be used from within the kernel in a kernel level map <<< 2386 */ 2387 2388 int 2389 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 2390 struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) 2391 { 2392 vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge; 2393 struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry, 2394 *deadentry, *oldentry; 2395 vsize_t elen; 2396 int nchain, error, copy_ok; 2397 UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist); 2398 2399 UVMHIST_LOG(maphist,"(srcmap=0x%x,start=0x%x, len=0x%x", srcmap, start, 2400 len,0); 2401 UVMHIST_LOG(maphist," ...,dstmap=0x%x, flags=0x%x)", dstmap,flags,0,0); 2402 2403 uvm_map_check(srcmap, "map_extract src enter"); 2404 uvm_map_check(dstmap, "map_extract dst enter"); 2405 2406 /* 2407 * step 0: sanity check: start must be on a page boundary, length 2408 * must be page sized. can't ask for CONTIG/QREF if you asked for 2409 * REMOVE. 2410 */ 2411 2412 KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0); 2413 KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || 2414 (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); 2415 2416 /* 2417 * step 1: reserve space in the target map for the extracted area 2418 */ 2419 2420 if ((flags & UVM_EXTRACT_RESERVED) == 0) { 2421 dstaddr = vm_map_min(dstmap); 2422 if (!uvm_map_reserve(dstmap, len, start, 0, &dstaddr, 0)) 2423 return (ENOMEM); 2424 *dstaddrp = dstaddr; /* pass address back to caller */ 2425 UVMHIST_LOG(maphist, " dstaddr=0x%x", dstaddr,0,0,0); 2426 } else { 2427 dstaddr = *dstaddrp; 2428 } 2429 2430 /* 2431 * step 2: setup for the extraction process loop by init'ing the 2432 * map entry chain, locking src map, and looking up the first useful 2433 * entry in the map. 2434 */ 2435 2436 end = start + len; 2437 newend = dstaddr + len; 2438 chain = endchain = NULL; 2439 nchain = 0; 2440 vm_map_lock(srcmap); 2441 2442 if (uvm_map_lookup_entry(srcmap, start, &entry)) { 2443 2444 /* "start" is within an entry */ 2445 if (flags & UVM_EXTRACT_QREF) { 2446 2447 /* 2448 * for quick references we don't clip the entry, so 2449 * the entry may map space "before" the starting 2450 * virtual address... this is the "fudge" factor 2451 * (which can be non-zero only the first time 2452 * through the "while" loop in step 3). 2453 */ 2454 2455 fudge = start - entry->start; 2456 } else { 2457 2458 /* 2459 * normal reference: we clip the map to fit (thus 2460 * fudge is zero) 2461 */ 2462 2463 UVM_MAP_CLIP_START(srcmap, entry, start, NULL); 2464 SAVE_HINT(srcmap, srcmap->hint, entry->prev); 2465 fudge = 0; 2466 } 2467 } else { 2468 2469 /* "start" is not within an entry ... skip to next entry */ 2470 if (flags & UVM_EXTRACT_CONTIG) { 2471 error = EINVAL; 2472 goto bad; /* definite hole here ... */ 2473 } 2474 2475 entry = entry->next; 2476 fudge = 0; 2477 } 2478 2479 /* save values from srcmap for step 6 */ 2480 orig_entry = entry; 2481 orig_fudge = fudge; 2482 2483 /* 2484 * step 3: now start looping through the map entries, extracting 2485 * as we go. 2486 */ 2487 2488 while (entry->start < end && entry != &srcmap->header) { 2489 2490 /* if we are not doing a quick reference, clip it */ 2491 if ((flags & UVM_EXTRACT_QREF) == 0) 2492 UVM_MAP_CLIP_END(srcmap, entry, end, NULL); 2493 2494 /* clear needs_copy (allow chunking) */ 2495 if (UVM_ET_ISNEEDSCOPY(entry)) { 2496 amap_copy(srcmap, entry, 2497 AMAP_COPY_NOWAIT|AMAP_COPY_NOMERGE, start, end); 2498 if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ 2499 error = ENOMEM; 2500 goto bad; 2501 } 2502 2503 /* amap_copy could clip (during chunk)! update fudge */ 2504 if (fudge) { 2505 fudge = start - entry->start; 2506 orig_fudge = fudge; 2507 } 2508 } 2509 2510 /* calculate the offset of this from "start" */ 2511 oldoffset = (entry->start + fudge) - start; 2512 2513 /* allocate a new map entry */ 2514 newentry = uvm_mapent_alloc(dstmap, 0); 2515 if (newentry == NULL) { 2516 error = ENOMEM; 2517 goto bad; 2518 } 2519 2520 /* set up new map entry */ 2521 newentry->next = NULL; 2522 newentry->prev = endchain; 2523 newentry->start = dstaddr + oldoffset; 2524 newentry->end = 2525 newentry->start + (entry->end - (entry->start + fudge)); 2526 if (newentry->end > newend || newentry->end < newentry->start) 2527 newentry->end = newend; 2528 newentry->object.uvm_obj = entry->object.uvm_obj; 2529 if (newentry->object.uvm_obj) { 2530 if (newentry->object.uvm_obj->pgops->pgo_reference) 2531 newentry->object.uvm_obj->pgops-> 2532 pgo_reference(newentry->object.uvm_obj); 2533 newentry->offset = entry->offset + fudge; 2534 } else { 2535 newentry->offset = 0; 2536 } 2537 newentry->etype = entry->etype; 2538 newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? 2539 entry->max_protection : entry->protection; 2540 newentry->max_protection = entry->max_protection; 2541 newentry->inheritance = entry->inheritance; 2542 newentry->wired_count = 0; 2543 newentry->aref.ar_amap = entry->aref.ar_amap; 2544 if (newentry->aref.ar_amap) { 2545 newentry->aref.ar_pageoff = 2546 entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); 2547 uvm_map_reference_amap(newentry, AMAP_SHARED | 2548 ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); 2549 } else { 2550 newentry->aref.ar_pageoff = 0; 2551 } 2552 newentry->advice = entry->advice; 2553 2554 /* now link it on the chain */ 2555 nchain++; 2556 if (endchain == NULL) { 2557 chain = endchain = newentry; 2558 } else { 2559 endchain->next = newentry; 2560 endchain = newentry; 2561 } 2562 2563 /* end of 'while' loop! */ 2564 if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && 2565 (entry->next == &srcmap->header || 2566 entry->next->start != entry->end)) { 2567 error = EINVAL; 2568 goto bad; 2569 } 2570 entry = entry->next; 2571 fudge = 0; 2572 } 2573 2574 /* 2575 * step 4: close off chain (in format expected by uvm_map_replace) 2576 */ 2577 2578 if (chain) 2579 chain->prev = endchain; 2580 2581 /* 2582 * step 5: attempt to lock the dest map so we can pmap_copy. 2583 * note usage of copy_ok: 2584 * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) 2585 * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 2586 */ 2587 2588 if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) { 2589 copy_ok = 1; 2590 if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2591 nchain)) { 2592 if (srcmap != dstmap) 2593 vm_map_unlock(dstmap); 2594 error = EIO; 2595 goto bad; 2596 } 2597 } else { 2598 copy_ok = 0; 2599 /* replace defered until step 7 */ 2600 } 2601 2602 /* 2603 * step 6: traverse the srcmap a second time to do the following: 2604 * - if we got a lock on the dstmap do pmap_copy 2605 * - if UVM_EXTRACT_REMOVE remove the entries 2606 * we make use of orig_entry and orig_fudge (saved in step 2) 2607 */ 2608 2609 if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { 2610 2611 /* purge possible stale hints from srcmap */ 2612 if (flags & UVM_EXTRACT_REMOVE) { 2613 SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); 2614 if (srcmap->first_free != &srcmap->header && 2615 srcmap->first_free->start >= start) 2616 srcmap->first_free = orig_entry->prev; 2617 } 2618 2619 entry = orig_entry; 2620 fudge = orig_fudge; 2621 deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ 2622 2623 while (entry->start < end && entry != &srcmap->header) { 2624 if (copy_ok) { 2625 oldoffset = (entry->start + fudge) - start; 2626 elen = MIN(end, entry->end) - 2627 (entry->start + fudge); 2628 pmap_copy(dstmap->pmap, srcmap->pmap, 2629 dstaddr + oldoffset, elen, 2630 entry->start + fudge); 2631 } 2632 2633 /* we advance "entry" in the following if statement */ 2634 if (flags & UVM_EXTRACT_REMOVE) { 2635 pmap_remove(srcmap->pmap, entry->start, 2636 entry->end); 2637 oldentry = entry; /* save entry */ 2638 entry = entry->next; /* advance */ 2639 uvm_map_entry_unlink(srcmap, oldentry); 2640 /* add to dead list */ 2641 oldentry->next = deadentry; 2642 deadentry = oldentry; 2643 } else { 2644 entry = entry->next; /* advance */ 2645 } 2646 2647 /* end of 'while' loop */ 2648 fudge = 0; 2649 } 2650 pmap_update(srcmap->pmap); 2651 2652 /* 2653 * unlock dstmap. we will dispose of deadentry in 2654 * step 7 if needed 2655 */ 2656 2657 if (copy_ok && srcmap != dstmap) 2658 vm_map_unlock(dstmap); 2659 2660 } else { 2661 deadentry = NULL; 2662 } 2663 2664 /* 2665 * step 7: we are done with the source map, unlock. if copy_ok 2666 * is 0 then we have not replaced the dummy mapping in dstmap yet 2667 * and we need to do so now. 2668 */ 2669 2670 vm_map_unlock(srcmap); 2671 if ((flags & UVM_EXTRACT_REMOVE) && deadentry) 2672 uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ 2673 2674 /* now do the replacement if we didn't do it in step 5 */ 2675 if (copy_ok == 0) { 2676 vm_map_lock(dstmap); 2677 error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2678 nchain); 2679 vm_map_unlock(dstmap); 2680 2681 if (error == FALSE) { 2682 error = EIO; 2683 goto bad2; 2684 } 2685 } 2686 2687 uvm_map_check(srcmap, "map_extract src leave"); 2688 uvm_map_check(dstmap, "map_extract dst leave"); 2689 2690 return (0); 2691 2692 /* 2693 * bad: failure recovery 2694 */ 2695 bad: 2696 vm_map_unlock(srcmap); 2697 bad2: /* src already unlocked */ 2698 if (chain) 2699 uvm_unmap_detach(chain, 2700 (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); 2701 2702 uvm_map_check(srcmap, "map_extract src err leave"); 2703 uvm_map_check(dstmap, "map_extract dst err leave"); 2704 2705 if ((flags & UVM_EXTRACT_RESERVED) == 0) { 2706 uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ 2707 } 2708 return (error); 2709 } 2710 2711 /* end of extraction functions */ 2712 2713 /* 2714 * uvm_map_submap: punch down part of a map into a submap 2715 * 2716 * => only the kernel_map is allowed to be submapped 2717 * => the purpose of submapping is to break up the locking granularity 2718 * of a larger map 2719 * => the range specified must have been mapped previously with a uvm_map() 2720 * call [with uobj==NULL] to create a blank map entry in the main map. 2721 * [And it had better still be blank!] 2722 * => maps which contain submaps should never be copied or forked. 2723 * => to remove a submap, use uvm_unmap() on the main map 2724 * and then uvm_map_deallocate() the submap. 2725 * => main map must be unlocked. 2726 * => submap must have been init'd and have a zero reference count. 2727 * [need not be locked as we don't actually reference it] 2728 */ 2729 2730 int 2731 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 2732 struct vm_map *submap) 2733 { 2734 struct vm_map_entry *entry; 2735 struct uvm_mapent_reservation umr; 2736 int error; 2737 2738 uvm_mapent_reserve(map, &umr, 2, 0); 2739 2740 vm_map_lock(map); 2741 VM_MAP_RANGE_CHECK(map, start, end); 2742 2743 if (uvm_map_lookup_entry(map, start, &entry)) { 2744 UVM_MAP_CLIP_START(map, entry, start, &umr); 2745 UVM_MAP_CLIP_END(map, entry, end, &umr); /* to be safe */ 2746 } else { 2747 entry = NULL; 2748 } 2749 2750 if (entry != NULL && 2751 entry->start == start && entry->end == end && 2752 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 2753 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 2754 entry->etype |= UVM_ET_SUBMAP; 2755 entry->object.sub_map = submap; 2756 entry->offset = 0; 2757 uvm_map_reference(submap); 2758 error = 0; 2759 } else { 2760 error = EINVAL; 2761 } 2762 vm_map_unlock(map); 2763 2764 uvm_mapent_unreserve(map, &umr); 2765 2766 return error; 2767 } 2768 2769 /* 2770 * uvm_map_setup_kernel: init in-kernel map 2771 * 2772 * => map must not be in service yet. 2773 */ 2774 2775 void 2776 uvm_map_setup_kernel(struct vm_map_kernel *map, 2777 vaddr_t vmin, vaddr_t vmax, int flags) 2778 { 2779 2780 uvm_map_setup(&map->vmk_map, vmin, vmax, flags); 2781 2782 callback_head_init(&map->vmk_reclaim_callback); 2783 LIST_INIT(&map->vmk_kentry_free); 2784 map->vmk_merged_entries = NULL; 2785 } 2786 2787 2788 /* 2789 * uvm_map_protect: change map protection 2790 * 2791 * => set_max means set max_protection. 2792 * => map must be unlocked. 2793 */ 2794 2795 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 2796 ~VM_PROT_WRITE : VM_PROT_ALL) 2797 2798 int 2799 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 2800 vm_prot_t new_prot, boolean_t set_max) 2801 { 2802 struct vm_map_entry *current, *entry; 2803 int error = 0; 2804 UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist); 2805 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)", 2806 map, start, end, new_prot); 2807 2808 vm_map_lock(map); 2809 VM_MAP_RANGE_CHECK(map, start, end); 2810 if (uvm_map_lookup_entry(map, start, &entry)) { 2811 UVM_MAP_CLIP_START(map, entry, start, NULL); 2812 } else { 2813 entry = entry->next; 2814 } 2815 2816 /* 2817 * make a first pass to check for protection violations. 2818 */ 2819 2820 current = entry; 2821 while ((current != &map->header) && (current->start < end)) { 2822 if (UVM_ET_ISSUBMAP(current)) { 2823 error = EINVAL; 2824 goto out; 2825 } 2826 if ((new_prot & current->max_protection) != new_prot) { 2827 error = EACCES; 2828 goto out; 2829 } 2830 /* 2831 * Don't allow VM_PROT_EXECUTE to be set on entries that 2832 * point to vnodes that are associated with a NOEXEC file 2833 * system. 2834 */ 2835 if (UVM_ET_ISOBJ(current) && 2836 UVM_OBJ_IS_VNODE(current->object.uvm_obj)) { 2837 struct vnode *vp = 2838 (struct vnode *) current->object.uvm_obj; 2839 2840 if ((new_prot & VM_PROT_EXECUTE) != 0 && 2841 (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) { 2842 error = EACCES; 2843 goto out; 2844 } 2845 } 2846 2847 current = current->next; 2848 } 2849 2850 /* go back and fix up protections (no need to clip this time). */ 2851 2852 current = entry; 2853 while ((current != &map->header) && (current->start < end)) { 2854 vm_prot_t old_prot; 2855 2856 UVM_MAP_CLIP_END(map, current, end, NULL); 2857 old_prot = current->protection; 2858 if (set_max) 2859 current->protection = 2860 (current->max_protection = new_prot) & old_prot; 2861 else 2862 current->protection = new_prot; 2863 2864 /* 2865 * update physical map if necessary. worry about copy-on-write 2866 * here -- CHECK THIS XXX 2867 */ 2868 2869 if (current->protection != old_prot) { 2870 /* update pmap! */ 2871 pmap_protect(map->pmap, current->start, current->end, 2872 current->protection & MASK(entry)); 2873 2874 /* 2875 * If this entry points at a vnode, and the 2876 * protection includes VM_PROT_EXECUTE, mark 2877 * the vnode as VEXECMAP. 2878 */ 2879 if (UVM_ET_ISOBJ(current)) { 2880 struct uvm_object *uobj = 2881 current->object.uvm_obj; 2882 2883 if (UVM_OBJ_IS_VNODE(uobj) && 2884 (current->protection & VM_PROT_EXECUTE)) 2885 vn_markexec((struct vnode *) uobj); 2886 } 2887 } 2888 2889 /* 2890 * If the map is configured to lock any future mappings, 2891 * wire this entry now if the old protection was VM_PROT_NONE 2892 * and the new protection is not VM_PROT_NONE. 2893 */ 2894 2895 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 2896 VM_MAPENT_ISWIRED(entry) == 0 && 2897 old_prot == VM_PROT_NONE && 2898 new_prot != VM_PROT_NONE) { 2899 if (uvm_map_pageable(map, entry->start, 2900 entry->end, FALSE, 2901 UVM_LK_ENTER|UVM_LK_EXIT) != 0) { 2902 2903 /* 2904 * If locking the entry fails, remember the 2905 * error if it's the first one. Note we 2906 * still continue setting the protection in 2907 * the map, but will return the error 2908 * condition regardless. 2909 * 2910 * XXX Ignore what the actual error is, 2911 * XXX just call it a resource shortage 2912 * XXX so that it doesn't get confused 2913 * XXX what uvm_map_protect() itself would 2914 * XXX normally return. 2915 */ 2916 2917 error = ENOMEM; 2918 } 2919 } 2920 current = current->next; 2921 } 2922 pmap_update(map->pmap); 2923 2924 out: 2925 vm_map_unlock(map); 2926 2927 UVMHIST_LOG(maphist, "<- done, error=%d",error,0,0,0); 2928 return error; 2929 } 2930 2931 #undef MASK 2932 2933 /* 2934 * uvm_map_inherit: set inheritance code for range of addrs in map. 2935 * 2936 * => map must be unlocked 2937 * => note that the inherit code is used during a "fork". see fork 2938 * code for details. 2939 */ 2940 2941 int 2942 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 2943 vm_inherit_t new_inheritance) 2944 { 2945 struct vm_map_entry *entry, *temp_entry; 2946 UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist); 2947 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)", 2948 map, start, end, new_inheritance); 2949 2950 switch (new_inheritance) { 2951 case MAP_INHERIT_NONE: 2952 case MAP_INHERIT_COPY: 2953 case MAP_INHERIT_SHARE: 2954 break; 2955 default: 2956 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 2957 return EINVAL; 2958 } 2959 2960 vm_map_lock(map); 2961 VM_MAP_RANGE_CHECK(map, start, end); 2962 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 2963 entry = temp_entry; 2964 UVM_MAP_CLIP_START(map, entry, start, NULL); 2965 } else { 2966 entry = temp_entry->next; 2967 } 2968 while ((entry != &map->header) && (entry->start < end)) { 2969 UVM_MAP_CLIP_END(map, entry, end, NULL); 2970 entry->inheritance = new_inheritance; 2971 entry = entry->next; 2972 } 2973 vm_map_unlock(map); 2974 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 2975 return 0; 2976 } 2977 2978 /* 2979 * uvm_map_advice: set advice code for range of addrs in map. 2980 * 2981 * => map must be unlocked 2982 */ 2983 2984 int 2985 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 2986 { 2987 struct vm_map_entry *entry, *temp_entry; 2988 UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist); 2989 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)", 2990 map, start, end, new_advice); 2991 2992 vm_map_lock(map); 2993 VM_MAP_RANGE_CHECK(map, start, end); 2994 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 2995 entry = temp_entry; 2996 UVM_MAP_CLIP_START(map, entry, start, NULL); 2997 } else { 2998 entry = temp_entry->next; 2999 } 3000 3001 /* 3002 * XXXJRT: disallow holes? 3003 */ 3004 3005 while ((entry != &map->header) && (entry->start < end)) { 3006 UVM_MAP_CLIP_END(map, entry, end, NULL); 3007 3008 switch (new_advice) { 3009 case MADV_NORMAL: 3010 case MADV_RANDOM: 3011 case MADV_SEQUENTIAL: 3012 /* nothing special here */ 3013 break; 3014 3015 default: 3016 vm_map_unlock(map); 3017 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3018 return EINVAL; 3019 } 3020 entry->advice = new_advice; 3021 entry = entry->next; 3022 } 3023 3024 vm_map_unlock(map); 3025 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3026 return 0; 3027 } 3028 3029 /* 3030 * uvm_map_pageable: sets the pageability of a range in a map. 3031 * 3032 * => wires map entries. should not be used for transient page locking. 3033 * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). 3034 * => regions specified as not pageable require lock-down (wired) memory 3035 * and page tables. 3036 * => map must never be read-locked 3037 * => if islocked is TRUE, map is already write-locked 3038 * => we always unlock the map, since we must downgrade to a read-lock 3039 * to call uvm_fault_wire() 3040 * => XXXCDC: check this and try and clean it up. 3041 */ 3042 3043 int 3044 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 3045 boolean_t new_pageable, int lockflags) 3046 { 3047 struct vm_map_entry *entry, *start_entry, *failed_entry; 3048 int rv; 3049 #ifdef DIAGNOSTIC 3050 u_int timestamp_save; 3051 #endif 3052 UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist); 3053 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)", 3054 map, start, end, new_pageable); 3055 KASSERT(map->flags & VM_MAP_PAGEABLE); 3056 3057 if ((lockflags & UVM_LK_ENTER) == 0) 3058 vm_map_lock(map); 3059 VM_MAP_RANGE_CHECK(map, start, end); 3060 3061 /* 3062 * only one pageability change may take place at one time, since 3063 * uvm_fault_wire assumes it will be called only once for each 3064 * wiring/unwiring. therefore, we have to make sure we're actually 3065 * changing the pageability for the entire region. we do so before 3066 * making any changes. 3067 */ 3068 3069 if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) { 3070 if ((lockflags & UVM_LK_EXIT) == 0) 3071 vm_map_unlock(map); 3072 3073 UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0); 3074 return EFAULT; 3075 } 3076 entry = start_entry; 3077 3078 /* 3079 * handle wiring and unwiring separately. 3080 */ 3081 3082 if (new_pageable) { /* unwire */ 3083 UVM_MAP_CLIP_START(map, entry, start, NULL); 3084 3085 /* 3086 * unwiring. first ensure that the range to be unwired is 3087 * really wired down and that there are no holes. 3088 */ 3089 3090 while ((entry != &map->header) && (entry->start < end)) { 3091 if (entry->wired_count == 0 || 3092 (entry->end < end && 3093 (entry->next == &map->header || 3094 entry->next->start > entry->end))) { 3095 if ((lockflags & UVM_LK_EXIT) == 0) 3096 vm_map_unlock(map); 3097 UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0); 3098 return EINVAL; 3099 } 3100 entry = entry->next; 3101 } 3102 3103 /* 3104 * POSIX 1003.1b - a single munlock call unlocks a region, 3105 * regardless of the number of mlock calls made on that 3106 * region. 3107 */ 3108 3109 entry = start_entry; 3110 while ((entry != &map->header) && (entry->start < end)) { 3111 UVM_MAP_CLIP_END(map, entry, end, NULL); 3112 if (VM_MAPENT_ISWIRED(entry)) 3113 uvm_map_entry_unwire(map, entry); 3114 entry = entry->next; 3115 } 3116 if ((lockflags & UVM_LK_EXIT) == 0) 3117 vm_map_unlock(map); 3118 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3119 return 0; 3120 } 3121 3122 /* 3123 * wire case: in two passes [XXXCDC: ugly block of code here] 3124 * 3125 * 1: holding the write lock, we create any anonymous maps that need 3126 * to be created. then we clip each map entry to the region to 3127 * be wired and increment its wiring count. 3128 * 3129 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 3130 * in the pages for any newly wired area (wired_count == 1). 3131 * 3132 * downgrading to a read lock for uvm_fault_wire avoids a possible 3133 * deadlock with another thread that may have faulted on one of 3134 * the pages to be wired (it would mark the page busy, blocking 3135 * us, then in turn block on the map lock that we hold). because 3136 * of problems in the recursive lock package, we cannot upgrade 3137 * to a write lock in vm_map_lookup. thus, any actions that 3138 * require the write lock must be done beforehand. because we 3139 * keep the read lock on the map, the copy-on-write status of the 3140 * entries we modify here cannot change. 3141 */ 3142 3143 while ((entry != &map->header) && (entry->start < end)) { 3144 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3145 3146 /* 3147 * perform actions of vm_map_lookup that need the 3148 * write lock on the map: create an anonymous map 3149 * for a copy-on-write region, or an anonymous map 3150 * for a zero-fill region. (XXXCDC: submap case 3151 * ok?) 3152 */ 3153 3154 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3155 if (UVM_ET_ISNEEDSCOPY(entry) && 3156 ((entry->max_protection & VM_PROT_WRITE) || 3157 (entry->object.uvm_obj == NULL))) { 3158 amap_copy(map, entry, 0, start, end); 3159 /* XXXCDC: wait OK? */ 3160 } 3161 } 3162 } 3163 UVM_MAP_CLIP_START(map, entry, start, NULL); 3164 UVM_MAP_CLIP_END(map, entry, end, NULL); 3165 entry->wired_count++; 3166 3167 /* 3168 * Check for holes 3169 */ 3170 3171 if (entry->protection == VM_PROT_NONE || 3172 (entry->end < end && 3173 (entry->next == &map->header || 3174 entry->next->start > entry->end))) { 3175 3176 /* 3177 * found one. amap creation actions do not need to 3178 * be undone, but the wired counts need to be restored. 3179 */ 3180 3181 while (entry != &map->header && entry->end > start) { 3182 entry->wired_count--; 3183 entry = entry->prev; 3184 } 3185 if ((lockflags & UVM_LK_EXIT) == 0) 3186 vm_map_unlock(map); 3187 UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); 3188 return EINVAL; 3189 } 3190 entry = entry->next; 3191 } 3192 3193 /* 3194 * Pass 2. 3195 */ 3196 3197 #ifdef DIAGNOSTIC 3198 timestamp_save = map->timestamp; 3199 #endif 3200 vm_map_busy(map); 3201 vm_map_downgrade(map); 3202 3203 rv = 0; 3204 entry = start_entry; 3205 while (entry != &map->header && entry->start < end) { 3206 if (entry->wired_count == 1) { 3207 rv = uvm_fault_wire(map, entry->start, entry->end, 3208 entry->max_protection, 1); 3209 if (rv) { 3210 3211 /* 3212 * wiring failed. break out of the loop. 3213 * we'll clean up the map below, once we 3214 * have a write lock again. 3215 */ 3216 3217 break; 3218 } 3219 } 3220 entry = entry->next; 3221 } 3222 3223 if (rv) { /* failed? */ 3224 3225 /* 3226 * Get back to an exclusive (write) lock. 3227 */ 3228 3229 vm_map_upgrade(map); 3230 vm_map_unbusy(map); 3231 3232 #ifdef DIAGNOSTIC 3233 if (timestamp_save != map->timestamp) 3234 panic("uvm_map_pageable: stale map"); 3235 #endif 3236 3237 /* 3238 * first drop the wiring count on all the entries 3239 * which haven't actually been wired yet. 3240 */ 3241 3242 failed_entry = entry; 3243 while (entry != &map->header && entry->start < end) { 3244 entry->wired_count--; 3245 entry = entry->next; 3246 } 3247 3248 /* 3249 * now, unwire all the entries that were successfully 3250 * wired above. 3251 */ 3252 3253 entry = start_entry; 3254 while (entry != failed_entry) { 3255 entry->wired_count--; 3256 if (VM_MAPENT_ISWIRED(entry) == 0) 3257 uvm_map_entry_unwire(map, entry); 3258 entry = entry->next; 3259 } 3260 if ((lockflags & UVM_LK_EXIT) == 0) 3261 vm_map_unlock(map); 3262 UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0); 3263 return (rv); 3264 } 3265 3266 /* We are holding a read lock here. */ 3267 if ((lockflags & UVM_LK_EXIT) == 0) { 3268 vm_map_unbusy(map); 3269 vm_map_unlock_read(map); 3270 } else { 3271 3272 /* 3273 * Get back to an exclusive (write) lock. 3274 */ 3275 3276 vm_map_upgrade(map); 3277 vm_map_unbusy(map); 3278 } 3279 3280 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3281 return 0; 3282 } 3283 3284 /* 3285 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 3286 * all mapped regions. 3287 * 3288 * => map must not be locked. 3289 * => if no flags are specified, all regions are unwired. 3290 * => XXXJRT: has some of the same problems as uvm_map_pageable() above. 3291 */ 3292 3293 int 3294 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 3295 { 3296 struct vm_map_entry *entry, *failed_entry; 3297 vsize_t size; 3298 int rv; 3299 #ifdef DIAGNOSTIC 3300 u_int timestamp_save; 3301 #endif 3302 UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist); 3303 UVMHIST_LOG(maphist,"(map=0x%x,flags=0x%x)", map, flags, 0, 0); 3304 3305 KASSERT(map->flags & VM_MAP_PAGEABLE); 3306 3307 vm_map_lock(map); 3308 3309 /* 3310 * handle wiring and unwiring separately. 3311 */ 3312 3313 if (flags == 0) { /* unwire */ 3314 3315 /* 3316 * POSIX 1003.1b -- munlockall unlocks all regions, 3317 * regardless of how many times mlockall has been called. 3318 */ 3319 3320 for (entry = map->header.next; entry != &map->header; 3321 entry = entry->next) { 3322 if (VM_MAPENT_ISWIRED(entry)) 3323 uvm_map_entry_unwire(map, entry); 3324 } 3325 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3326 vm_map_unlock(map); 3327 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3328 return 0; 3329 } 3330 3331 if (flags & MCL_FUTURE) { 3332 3333 /* 3334 * must wire all future mappings; remember this. 3335 */ 3336 3337 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 3338 } 3339 3340 if ((flags & MCL_CURRENT) == 0) { 3341 3342 /* 3343 * no more work to do! 3344 */ 3345 3346 UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); 3347 vm_map_unlock(map); 3348 return 0; 3349 } 3350 3351 /* 3352 * wire case: in three passes [XXXCDC: ugly block of code here] 3353 * 3354 * 1: holding the write lock, count all pages mapped by non-wired 3355 * entries. if this would cause us to go over our limit, we fail. 3356 * 3357 * 2: still holding the write lock, we create any anonymous maps that 3358 * need to be created. then we increment its wiring count. 3359 * 3360 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault 3361 * in the pages for any newly wired area (wired_count == 1). 3362 * 3363 * downgrading to a read lock for uvm_fault_wire avoids a possible 3364 * deadlock with another thread that may have faulted on one of 3365 * the pages to be wired (it would mark the page busy, blocking 3366 * us, then in turn block on the map lock that we hold). because 3367 * of problems in the recursive lock package, we cannot upgrade 3368 * to a write lock in vm_map_lookup. thus, any actions that 3369 * require the write lock must be done beforehand. because we 3370 * keep the read lock on the map, the copy-on-write status of the 3371 * entries we modify here cannot change. 3372 */ 3373 3374 for (size = 0, entry = map->header.next; entry != &map->header; 3375 entry = entry->next) { 3376 if (entry->protection != VM_PROT_NONE && 3377 VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3378 size += entry->end - entry->start; 3379 } 3380 } 3381 3382 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 3383 vm_map_unlock(map); 3384 return ENOMEM; 3385 } 3386 3387 if (limit != 0 && 3388 (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { 3389 vm_map_unlock(map); 3390 return ENOMEM; 3391 } 3392 3393 /* 3394 * Pass 2. 3395 */ 3396 3397 for (entry = map->header.next; entry != &map->header; 3398 entry = entry->next) { 3399 if (entry->protection == VM_PROT_NONE) 3400 continue; 3401 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3402 3403 /* 3404 * perform actions of vm_map_lookup that need the 3405 * write lock on the map: create an anonymous map 3406 * for a copy-on-write region, or an anonymous map 3407 * for a zero-fill region. (XXXCDC: submap case 3408 * ok?) 3409 */ 3410 3411 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3412 if (UVM_ET_ISNEEDSCOPY(entry) && 3413 ((entry->max_protection & VM_PROT_WRITE) || 3414 (entry->object.uvm_obj == NULL))) { 3415 amap_copy(map, entry, 0, entry->start, 3416 entry->end); 3417 /* XXXCDC: wait OK? */ 3418 } 3419 } 3420 } 3421 entry->wired_count++; 3422 } 3423 3424 /* 3425 * Pass 3. 3426 */ 3427 3428 #ifdef DIAGNOSTIC 3429 timestamp_save = map->timestamp; 3430 #endif 3431 vm_map_busy(map); 3432 vm_map_downgrade(map); 3433 3434 rv = 0; 3435 for (entry = map->header.next; entry != &map->header; 3436 entry = entry->next) { 3437 if (entry->wired_count == 1) { 3438 rv = uvm_fault_wire(map, entry->start, entry->end, 3439 entry->max_protection, 1); 3440 if (rv) { 3441 3442 /* 3443 * wiring failed. break out of the loop. 3444 * we'll clean up the map below, once we 3445 * have a write lock again. 3446 */ 3447 3448 break; 3449 } 3450 } 3451 } 3452 3453 if (rv) { 3454 3455 /* 3456 * Get back an exclusive (write) lock. 3457 */ 3458 3459 vm_map_upgrade(map); 3460 vm_map_unbusy(map); 3461 3462 #ifdef DIAGNOSTIC 3463 if (timestamp_save != map->timestamp) 3464 panic("uvm_map_pageable_all: stale map"); 3465 #endif 3466 3467 /* 3468 * first drop the wiring count on all the entries 3469 * which haven't actually been wired yet. 3470 * 3471 * Skip VM_PROT_NONE entries like we did above. 3472 */ 3473 3474 failed_entry = entry; 3475 for (/* nothing */; entry != &map->header; 3476 entry = entry->next) { 3477 if (entry->protection == VM_PROT_NONE) 3478 continue; 3479 entry->wired_count--; 3480 } 3481 3482 /* 3483 * now, unwire all the entries that were successfully 3484 * wired above. 3485 * 3486 * Skip VM_PROT_NONE entries like we did above. 3487 */ 3488 3489 for (entry = map->header.next; entry != failed_entry; 3490 entry = entry->next) { 3491 if (entry->protection == VM_PROT_NONE) 3492 continue; 3493 entry->wired_count--; 3494 if (VM_MAPENT_ISWIRED(entry)) 3495 uvm_map_entry_unwire(map, entry); 3496 } 3497 vm_map_unlock(map); 3498 UVMHIST_LOG(maphist,"<- done (RV=%d)", rv,0,0,0); 3499 return (rv); 3500 } 3501 3502 /* We are holding a read lock here. */ 3503 vm_map_unbusy(map); 3504 vm_map_unlock_read(map); 3505 3506 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3507 return 0; 3508 } 3509 3510 /* 3511 * uvm_map_clean: clean out a map range 3512 * 3513 * => valid flags: 3514 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 3515 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 3516 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 3517 * if (flags & PGO_FREE): any cached pages are freed after clean 3518 * => returns an error if any part of the specified range isn't mapped 3519 * => never a need to flush amap layer since the anonymous memory has 3520 * no permanent home, but may deactivate pages there 3521 * => called from sys_msync() and sys_madvise() 3522 * => caller must not write-lock map (read OK). 3523 * => we may sleep while cleaning if SYNCIO [with map read-locked] 3524 */ 3525 3526 int 3527 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 3528 { 3529 struct vm_map_entry *current, *entry; 3530 struct uvm_object *uobj; 3531 struct vm_amap *amap; 3532 struct vm_anon *anon; 3533 struct vm_page *pg; 3534 vaddr_t offset; 3535 vsize_t size; 3536 voff_t uoff; 3537 int error, refs; 3538 UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist); 3539 3540 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,flags=0x%x)", 3541 map, start, end, flags); 3542 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 3543 (PGO_FREE|PGO_DEACTIVATE)); 3544 3545 vm_map_lock_read(map); 3546 VM_MAP_RANGE_CHECK(map, start, end); 3547 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { 3548 vm_map_unlock_read(map); 3549 return EFAULT; 3550 } 3551 3552 /* 3553 * Make a first pass to check for holes and wiring problems. 3554 */ 3555 3556 for (current = entry; current->start < end; current = current->next) { 3557 if (UVM_ET_ISSUBMAP(current)) { 3558 vm_map_unlock_read(map); 3559 return EINVAL; 3560 } 3561 if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) { 3562 vm_map_unlock_read(map); 3563 return EBUSY; 3564 } 3565 if (end <= current->end) { 3566 break; 3567 } 3568 if (current->end != current->next->start) { 3569 vm_map_unlock_read(map); 3570 return EFAULT; 3571 } 3572 } 3573 3574 error = 0; 3575 for (current = entry; start < end; current = current->next) { 3576 amap = current->aref.ar_amap; /* top layer */ 3577 uobj = current->object.uvm_obj; /* bottom layer */ 3578 KASSERT(start >= current->start); 3579 3580 /* 3581 * No amap cleaning necessary if: 3582 * 3583 * (1) There's no amap. 3584 * 3585 * (2) We're not deactivating or freeing pages. 3586 */ 3587 3588 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 3589 goto flush_object; 3590 3591 amap_lock(amap); 3592 offset = start - current->start; 3593 size = MIN(end, current->end) - start; 3594 for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { 3595 anon = amap_lookup(¤t->aref, offset); 3596 if (anon == NULL) 3597 continue; 3598 3599 simple_lock(&anon->an_lock); 3600 pg = anon->an_page; 3601 if (pg == NULL) { 3602 simple_unlock(&anon->an_lock); 3603 continue; 3604 } 3605 3606 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 3607 3608 /* 3609 * In these first 3 cases, we just deactivate the page. 3610 */ 3611 3612 case PGO_CLEANIT|PGO_FREE: 3613 case PGO_CLEANIT|PGO_DEACTIVATE: 3614 case PGO_DEACTIVATE: 3615 deactivate_it: 3616 /* 3617 * skip the page if it's loaned or wired, 3618 * since it shouldn't be on a paging queue 3619 * at all in these cases. 3620 */ 3621 3622 uvm_lock_pageq(); 3623 if (pg->loan_count != 0 || 3624 pg->wire_count != 0) { 3625 uvm_unlock_pageq(); 3626 simple_unlock(&anon->an_lock); 3627 continue; 3628 } 3629 KASSERT(pg->uanon == anon); 3630 pmap_clear_reference(pg); 3631 uvm_pagedeactivate(pg); 3632 uvm_unlock_pageq(); 3633 simple_unlock(&anon->an_lock); 3634 continue; 3635 3636 case PGO_FREE: 3637 3638 /* 3639 * If there are multiple references to 3640 * the amap, just deactivate the page. 3641 */ 3642 3643 if (amap_refs(amap) > 1) 3644 goto deactivate_it; 3645 3646 /* skip the page if it's wired */ 3647 if (pg->wire_count != 0) { 3648 simple_unlock(&anon->an_lock); 3649 continue; 3650 } 3651 amap_unadd(¤t->aref, offset); 3652 refs = --anon->an_ref; 3653 simple_unlock(&anon->an_lock); 3654 if (refs == 0) 3655 uvm_anfree(anon); 3656 continue; 3657 } 3658 } 3659 amap_unlock(amap); 3660 3661 flush_object: 3662 /* 3663 * flush pages if we've got a valid backing object. 3664 * note that we must always clean object pages before 3665 * freeing them since otherwise we could reveal stale 3666 * data from files. 3667 */ 3668 3669 uoff = current->offset + (start - current->start); 3670 size = MIN(end, current->end) - start; 3671 if (uobj != NULL) { 3672 simple_lock(&uobj->vmobjlock); 3673 if (uobj->pgops->pgo_put != NULL) 3674 error = (uobj->pgops->pgo_put)(uobj, uoff, 3675 uoff + size, flags | PGO_CLEANIT); 3676 else 3677 error = 0; 3678 } 3679 start += size; 3680 } 3681 vm_map_unlock_read(map); 3682 return (error); 3683 } 3684 3685 3686 /* 3687 * uvm_map_checkprot: check protection in map 3688 * 3689 * => must allow specified protection in a fully allocated region. 3690 * => map must be read or write locked by caller. 3691 */ 3692 3693 boolean_t 3694 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3695 vm_prot_t protection) 3696 { 3697 struct vm_map_entry *entry; 3698 struct vm_map_entry *tmp_entry; 3699 3700 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { 3701 return (FALSE); 3702 } 3703 entry = tmp_entry; 3704 while (start < end) { 3705 if (entry == &map->header) { 3706 return (FALSE); 3707 } 3708 3709 /* 3710 * no holes allowed 3711 */ 3712 3713 if (start < entry->start) { 3714 return (FALSE); 3715 } 3716 3717 /* 3718 * check protection associated with entry 3719 */ 3720 3721 if ((entry->protection & protection) != protection) { 3722 return (FALSE); 3723 } 3724 start = entry->end; 3725 entry = entry->next; 3726 } 3727 return (TRUE); 3728 } 3729 3730 /* 3731 * uvmspace_alloc: allocate a vmspace structure. 3732 * 3733 * - structure includes vm_map and pmap 3734 * - XXX: no locking on this structure 3735 * - refcnt set to 1, rest must be init'd by caller 3736 */ 3737 struct vmspace * 3738 uvmspace_alloc(vaddr_t vmin, vaddr_t vmax) 3739 { 3740 struct vmspace *vm; 3741 UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); 3742 3743 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK); 3744 uvmspace_init(vm, NULL, vmin, vmax); 3745 UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0); 3746 return (vm); 3747 } 3748 3749 /* 3750 * uvmspace_init: initialize a vmspace structure. 3751 * 3752 * - XXX: no locking on this structure 3753 * - refcnt set to 1, rest must be init'd by caller 3754 */ 3755 void 3756 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax) 3757 { 3758 UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); 3759 3760 memset(vm, 0, sizeof(*vm)); 3761 uvm_map_setup(&vm->vm_map, vmin, vmax, VM_MAP_PAGEABLE 3762 #ifdef __USING_TOPDOWN_VM 3763 | VM_MAP_TOPDOWN 3764 #endif 3765 ); 3766 if (pmap) 3767 pmap_reference(pmap); 3768 else 3769 pmap = pmap_create(); 3770 vm->vm_map.pmap = pmap; 3771 vm->vm_refcnt = 1; 3772 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 3773 } 3774 3775 /* 3776 * uvmspace_share: share a vmspace between two processes 3777 * 3778 * - used for vfork, threads(?) 3779 */ 3780 3781 void 3782 uvmspace_share(struct proc *p1, struct proc *p2) 3783 { 3784 3785 uvmspace_addref(p1->p_vmspace); 3786 p2->p_vmspace = p1->p_vmspace; 3787 } 3788 3789 /* 3790 * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace 3791 * 3792 * - XXX: no locking on vmspace 3793 */ 3794 3795 void 3796 uvmspace_unshare(struct lwp *l) 3797 { 3798 struct proc *p = l->l_proc; 3799 struct vmspace *nvm, *ovm = p->p_vmspace; 3800 3801 if (ovm->vm_refcnt == 1) 3802 /* nothing to do: vmspace isn't shared in the first place */ 3803 return; 3804 3805 /* make a new vmspace, still holding old one */ 3806 nvm = uvmspace_fork(ovm); 3807 3808 pmap_deactivate(l); /* unbind old vmspace */ 3809 p->p_vmspace = nvm; 3810 pmap_activate(l); /* switch to new vmspace */ 3811 3812 uvmspace_free(ovm); /* drop reference to old vmspace */ 3813 } 3814 3815 /* 3816 * uvmspace_exec: the process wants to exec a new program 3817 */ 3818 3819 void 3820 uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end) 3821 { 3822 struct proc *p = l->l_proc; 3823 struct vmspace *nvm, *ovm = p->p_vmspace; 3824 struct vm_map *map = &ovm->vm_map; 3825 3826 #ifdef __sparc__ 3827 /* XXX cgd 960926: the sparc #ifdef should be a MD hook */ 3828 kill_user_windows(l); /* before stack addresses go away */ 3829 #endif 3830 3831 /* 3832 * see if more than one process is using this vmspace... 3833 */ 3834 3835 if (ovm->vm_refcnt == 1) { 3836 3837 /* 3838 * if p is the only process using its vmspace then we can safely 3839 * recycle that vmspace for the program that is being exec'd. 3840 */ 3841 3842 #ifdef SYSVSHM 3843 /* 3844 * SYSV SHM semantics require us to kill all segments on an exec 3845 */ 3846 3847 if (ovm->vm_shm) 3848 shmexit(ovm); 3849 #endif 3850 3851 /* 3852 * POSIX 1003.1b -- "lock future mappings" is revoked 3853 * when a process execs another program image. 3854 */ 3855 3856 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3857 3858 /* 3859 * now unmap the old program 3860 */ 3861 3862 pmap_remove_all(map->pmap); 3863 uvm_unmap(map, vm_map_min(map), vm_map_max(map)); 3864 KASSERT(map->header.prev == &map->header); 3865 KASSERT(map->nentries == 0); 3866 3867 /* 3868 * resize the map 3869 */ 3870 3871 vm_map_setmin(map, start); 3872 vm_map_setmax(map, end); 3873 } else { 3874 3875 /* 3876 * p's vmspace is being shared, so we can't reuse it for p since 3877 * it is still being used for others. allocate a new vmspace 3878 * for p 3879 */ 3880 3881 nvm = uvmspace_alloc(start, end); 3882 3883 /* 3884 * install new vmspace and drop our ref to the old one. 3885 */ 3886 3887 pmap_deactivate(l); 3888 p->p_vmspace = nvm; 3889 pmap_activate(l); 3890 3891 uvmspace_free(ovm); 3892 } 3893 } 3894 3895 /* 3896 * uvmspace_addref: add a referece to a vmspace. 3897 */ 3898 3899 void 3900 uvmspace_addref(struct vmspace *vm) 3901 { 3902 struct vm_map *map = &vm->vm_map; 3903 3904 KASSERT((map->flags & VM_MAP_DYING) == 0); 3905 3906 simple_lock(&map->ref_lock); 3907 KASSERT(vm->vm_refcnt > 0); 3908 vm->vm_refcnt++; 3909 simple_unlock(&map->ref_lock); 3910 } 3911 3912 /* 3913 * uvmspace_free: free a vmspace data structure 3914 */ 3915 3916 void 3917 uvmspace_free(struct vmspace *vm) 3918 { 3919 struct vm_map_entry *dead_entries; 3920 struct vm_map *map = &vm->vm_map; 3921 int n; 3922 3923 UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); 3924 3925 UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0); 3926 simple_lock(&map->ref_lock); 3927 n = --vm->vm_refcnt; 3928 simple_unlock(&map->ref_lock); 3929 if (n > 0) 3930 return; 3931 3932 /* 3933 * at this point, there should be no other references to the map. 3934 * delete all of the mappings, then destroy the pmap. 3935 */ 3936 3937 map->flags |= VM_MAP_DYING; 3938 pmap_remove_all(map->pmap); 3939 #ifdef SYSVSHM 3940 /* Get rid of any SYSV shared memory segments. */ 3941 if (vm->vm_shm != NULL) 3942 shmexit(vm); 3943 #endif 3944 if (map->nentries) { 3945 uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map), 3946 &dead_entries, NULL, 0); 3947 if (dead_entries != NULL) 3948 uvm_unmap_detach(dead_entries, 0); 3949 } 3950 KASSERT(map->nentries == 0); 3951 KASSERT(map->size == 0); 3952 pmap_destroy(map->pmap); 3953 pool_put(&uvm_vmspace_pool, vm); 3954 } 3955 3956 /* 3957 * F O R K - m a i n e n t r y p o i n t 3958 */ 3959 /* 3960 * uvmspace_fork: fork a process' main map 3961 * 3962 * => create a new vmspace for child process from parent. 3963 * => parent's map must not be locked. 3964 */ 3965 3966 struct vmspace * 3967 uvmspace_fork(struct vmspace *vm1) 3968 { 3969 struct vmspace *vm2; 3970 struct vm_map *old_map = &vm1->vm_map; 3971 struct vm_map *new_map; 3972 struct vm_map_entry *old_entry; 3973 struct vm_map_entry *new_entry; 3974 UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); 3975 3976 vm_map_lock(old_map); 3977 3978 vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map)); 3979 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3980 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3981 new_map = &vm2->vm_map; /* XXX */ 3982 3983 old_entry = old_map->header.next; 3984 new_map->size = old_map->size; 3985 3986 /* 3987 * go entry-by-entry 3988 */ 3989 3990 while (old_entry != &old_map->header) { 3991 3992 /* 3993 * first, some sanity checks on the old entry 3994 */ 3995 3996 KASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3997 KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) || 3998 !UVM_ET_ISNEEDSCOPY(old_entry)); 3999 4000 switch (old_entry->inheritance) { 4001 case MAP_INHERIT_NONE: 4002 4003 /* 4004 * drop the mapping, modify size 4005 */ 4006 new_map->size -= old_entry->end - old_entry->start; 4007 break; 4008 4009 case MAP_INHERIT_SHARE: 4010 4011 /* 4012 * share the mapping: this means we want the old and 4013 * new entries to share amaps and backing objects. 4014 */ 4015 /* 4016 * if the old_entry needs a new amap (due to prev fork) 4017 * then we need to allocate it now so that we have 4018 * something we own to share with the new_entry. [in 4019 * other words, we need to clear needs_copy] 4020 */ 4021 4022 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 4023 /* get our own amap, clears needs_copy */ 4024 amap_copy(old_map, old_entry, AMAP_COPY_NOCHUNK, 4025 0, 0); 4026 /* XXXCDC: WAITOK??? */ 4027 } 4028 4029 new_entry = uvm_mapent_alloc(new_map, 0); 4030 /* old_entry -> new_entry */ 4031 uvm_mapent_copy(old_entry, new_entry); 4032 4033 /* new pmap has nothing wired in it */ 4034 new_entry->wired_count = 0; 4035 4036 /* 4037 * gain reference to object backing the map (can't 4038 * be a submap, already checked this case). 4039 */ 4040 4041 if (new_entry->aref.ar_amap) 4042 uvm_map_reference_amap(new_entry, AMAP_SHARED); 4043 4044 if (new_entry->object.uvm_obj && 4045 new_entry->object.uvm_obj->pgops->pgo_reference) 4046 new_entry->object.uvm_obj-> 4047 pgops->pgo_reference( 4048 new_entry->object.uvm_obj); 4049 4050 /* insert entry at end of new_map's entry list */ 4051 uvm_map_entry_link(new_map, new_map->header.prev, 4052 new_entry); 4053 4054 break; 4055 4056 case MAP_INHERIT_COPY: 4057 4058 /* 4059 * copy-on-write the mapping (using mmap's 4060 * MAP_PRIVATE semantics) 4061 * 4062 * allocate new_entry, adjust reference counts. 4063 * (note that new references are read-only). 4064 */ 4065 4066 new_entry = uvm_mapent_alloc(new_map, 0); 4067 /* old_entry -> new_entry */ 4068 uvm_mapent_copy(old_entry, new_entry); 4069 4070 if (new_entry->aref.ar_amap) 4071 uvm_map_reference_amap(new_entry, 0); 4072 4073 if (new_entry->object.uvm_obj && 4074 new_entry->object.uvm_obj->pgops->pgo_reference) 4075 new_entry->object.uvm_obj->pgops->pgo_reference 4076 (new_entry->object.uvm_obj); 4077 4078 /* new pmap has nothing wired in it */ 4079 new_entry->wired_count = 0; 4080 4081 new_entry->etype |= 4082 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 4083 uvm_map_entry_link(new_map, new_map->header.prev, 4084 new_entry); 4085 4086 /* 4087 * the new entry will need an amap. it will either 4088 * need to be copied from the old entry or created 4089 * from scratch (if the old entry does not have an 4090 * amap). can we defer this process until later 4091 * (by setting "needs_copy") or do we need to copy 4092 * the amap now? 4093 * 4094 * we must copy the amap now if any of the following 4095 * conditions hold: 4096 * 1. the old entry has an amap and that amap is 4097 * being shared. this means that the old (parent) 4098 * process is sharing the amap with another 4099 * process. if we do not clear needs_copy here 4100 * we will end up in a situation where both the 4101 * parent and child process are refering to the 4102 * same amap with "needs_copy" set. if the 4103 * parent write-faults, the fault routine will 4104 * clear "needs_copy" in the parent by allocating 4105 * a new amap. this is wrong because the 4106 * parent is supposed to be sharing the old amap 4107 * and the new amap will break that. 4108 * 4109 * 2. if the old entry has an amap and a non-zero 4110 * wire count then we are going to have to call 4111 * amap_cow_now to avoid page faults in the 4112 * parent process. since amap_cow_now requires 4113 * "needs_copy" to be clear we might as well 4114 * clear it here as well. 4115 * 4116 */ 4117 4118 if (old_entry->aref.ar_amap != NULL) { 4119 if ((amap_flags(old_entry->aref.ar_amap) & 4120 AMAP_SHARED) != 0 || 4121 VM_MAPENT_ISWIRED(old_entry)) { 4122 4123 amap_copy(new_map, new_entry, 4124 AMAP_COPY_NOCHUNK, 0, 0); 4125 /* XXXCDC: M_WAITOK ... ok? */ 4126 } 4127 } 4128 4129 /* 4130 * if the parent's entry is wired down, then the 4131 * parent process does not want page faults on 4132 * access to that memory. this means that we 4133 * cannot do copy-on-write because we can't write 4134 * protect the old entry. in this case we 4135 * resolve all copy-on-write faults now, using 4136 * amap_cow_now. note that we have already 4137 * allocated any needed amap (above). 4138 */ 4139 4140 if (VM_MAPENT_ISWIRED(old_entry)) { 4141 4142 /* 4143 * resolve all copy-on-write faults now 4144 * (note that there is nothing to do if 4145 * the old mapping does not have an amap). 4146 */ 4147 if (old_entry->aref.ar_amap) 4148 amap_cow_now(new_map, new_entry); 4149 4150 } else { 4151 4152 /* 4153 * setup mappings to trigger copy-on-write faults 4154 * we must write-protect the parent if it has 4155 * an amap and it is not already "needs_copy"... 4156 * if it is already "needs_copy" then the parent 4157 * has already been write-protected by a previous 4158 * fork operation. 4159 */ 4160 4161 if (old_entry->aref.ar_amap && 4162 !UVM_ET_ISNEEDSCOPY(old_entry)) { 4163 if (old_entry->max_protection & VM_PROT_WRITE) { 4164 pmap_protect(old_map->pmap, 4165 old_entry->start, 4166 old_entry->end, 4167 old_entry->protection & 4168 ~VM_PROT_WRITE); 4169 pmap_update(old_map->pmap); 4170 } 4171 old_entry->etype |= UVM_ET_NEEDSCOPY; 4172 } 4173 } 4174 break; 4175 } /* end of switch statement */ 4176 old_entry = old_entry->next; 4177 } 4178 4179 vm_map_unlock(old_map); 4180 4181 #ifdef SYSVSHM 4182 if (vm1->vm_shm) 4183 shmfork(vm1, vm2); 4184 #endif 4185 4186 #ifdef PMAP_FORK 4187 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); 4188 #endif 4189 4190 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 4191 return (vm2); 4192 } 4193 4194 4195 /* 4196 * in-kernel map entry allocation. 4197 */ 4198 4199 struct uvm_kmapent_hdr { 4200 LIST_ENTRY(uvm_kmapent_hdr) ukh_listq; 4201 int ukh_nused; 4202 struct vm_map_entry *ukh_freelist; 4203 struct vm_map *ukh_map; 4204 struct vm_map_entry ukh_entries[0]; 4205 }; 4206 4207 #define UVM_KMAPENT_CHUNK \ 4208 ((PAGE_SIZE - sizeof(struct uvm_kmapent_hdr)) \ 4209 / sizeof(struct vm_map_entry)) 4210 4211 #define UVM_KHDR_FIND(entry) \ 4212 ((struct uvm_kmapent_hdr *)(((vaddr_t)entry) & ~PAGE_MASK)) 4213 4214 4215 #ifdef DIAGNOSTIC 4216 static struct vm_map * 4217 uvm_kmapent_map(struct vm_map_entry *entry) 4218 { 4219 const struct uvm_kmapent_hdr *ukh; 4220 4221 ukh = UVM_KHDR_FIND(entry); 4222 return ukh->ukh_map; 4223 } 4224 #endif 4225 4226 static inline struct vm_map_entry * 4227 uvm_kmapent_get(struct uvm_kmapent_hdr *ukh) 4228 { 4229 struct vm_map_entry *entry; 4230 4231 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK); 4232 KASSERT(ukh->ukh_nused >= 0); 4233 4234 entry = ukh->ukh_freelist; 4235 if (entry) { 4236 KASSERT((entry->flags & (UVM_MAP_KERNEL | UVM_MAP_KMAPENT)) 4237 == UVM_MAP_KERNEL); 4238 ukh->ukh_freelist = entry->next; 4239 ukh->ukh_nused++; 4240 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK); 4241 } else { 4242 KASSERT(ukh->ukh_nused == UVM_KMAPENT_CHUNK); 4243 } 4244 4245 return entry; 4246 } 4247 4248 static inline void 4249 uvm_kmapent_put(struct uvm_kmapent_hdr *ukh, struct vm_map_entry *entry) 4250 { 4251 4252 KASSERT((entry->flags & (UVM_MAP_KERNEL | UVM_MAP_KMAPENT)) 4253 == UVM_MAP_KERNEL); 4254 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK); 4255 KASSERT(ukh->ukh_nused > 0); 4256 KASSERT(ukh->ukh_freelist != NULL || 4257 ukh->ukh_nused == UVM_KMAPENT_CHUNK); 4258 KASSERT(ukh->ukh_freelist == NULL || 4259 ukh->ukh_nused < UVM_KMAPENT_CHUNK); 4260 4261 ukh->ukh_nused--; 4262 entry->next = ukh->ukh_freelist; 4263 ukh->ukh_freelist = entry; 4264 } 4265 4266 /* 4267 * uvm_kmapent_alloc: allocate a map entry for in-kernel map 4268 */ 4269 4270 static struct vm_map_entry * 4271 uvm_kmapent_alloc(struct vm_map *map, int flags) 4272 { 4273 struct vm_page *pg; 4274 struct uvm_map_args args; 4275 struct uvm_kmapent_hdr *ukh; 4276 struct vm_map_entry *entry; 4277 uvm_flag_t mapflags = UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, 4278 UVM_INH_NONE, UVM_ADV_RANDOM, flags | UVM_FLAG_NOMERGE); 4279 vaddr_t va; 4280 int error; 4281 int i; 4282 int s; 4283 4284 KDASSERT(UVM_KMAPENT_CHUNK > 2); 4285 KDASSERT(kernel_map != NULL); 4286 KASSERT(vm_map_pmap(map) == pmap_kernel()); 4287 4288 UVMMAP_EVCNT_INCR(uke_alloc); 4289 entry = NULL; 4290 again: 4291 /* 4292 * try to grab an entry from freelist. 4293 */ 4294 s = splvm(); 4295 simple_lock(&uvm.kentry_lock); 4296 ukh = LIST_FIRST(&vm_map_to_kernel(map)->vmk_kentry_free); 4297 if (ukh) { 4298 entry = uvm_kmapent_get(ukh); 4299 if (ukh->ukh_nused == UVM_KMAPENT_CHUNK) 4300 LIST_REMOVE(ukh, ukh_listq); 4301 } 4302 simple_unlock(&uvm.kentry_lock); 4303 splx(s); 4304 4305 if (entry) 4306 return entry; 4307 4308 /* 4309 * there's no free entry for this vm_map. 4310 * now we need to allocate some vm_map_entry. 4311 * for simplicity, always allocate one page chunk of them at once. 4312 */ 4313 4314 pg = uvm_pagealloc(NULL, 0, NULL, 0); 4315 if (__predict_false(pg == NULL)) { 4316 if (flags & UVM_FLAG_NOWAIT) 4317 return NULL; 4318 uvm_wait("kme_alloc"); 4319 goto again; 4320 } 4321 4322 error = uvm_map_prepare(map, 0, PAGE_SIZE, NULL, 0, 0, mapflags, &args); 4323 if (error) { 4324 uvm_pagefree(pg); 4325 return NULL; 4326 } 4327 4328 va = args.uma_start; 4329 4330 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), VM_PROT_READ|VM_PROT_WRITE); 4331 pmap_update(vm_map_pmap(map)); 4332 4333 ukh = (void *)va; 4334 4335 /* 4336 * use the first entry for ukh itsself. 4337 */ 4338 4339 entry = &ukh->ukh_entries[0]; 4340 entry->flags = UVM_MAP_KERNEL | UVM_MAP_KMAPENT; 4341 error = uvm_map_enter(map, &args, entry); 4342 KASSERT(error == 0); 4343 4344 ukh->ukh_nused = UVM_KMAPENT_CHUNK; 4345 ukh->ukh_map = map; 4346 ukh->ukh_freelist = NULL; 4347 for (i = UVM_KMAPENT_CHUNK - 1; i >= 2; i--) { 4348 struct vm_map_entry *xentry = &ukh->ukh_entries[i]; 4349 4350 xentry->flags = UVM_MAP_KERNEL; 4351 uvm_kmapent_put(ukh, xentry); 4352 } 4353 KASSERT(ukh->ukh_nused == 2); 4354 4355 s = splvm(); 4356 simple_lock(&uvm.kentry_lock); 4357 LIST_INSERT_HEAD(&vm_map_to_kernel(map)->vmk_kentry_free, 4358 ukh, ukh_listq); 4359 simple_unlock(&uvm.kentry_lock); 4360 splx(s); 4361 4362 /* 4363 * return second entry. 4364 */ 4365 4366 entry = &ukh->ukh_entries[1]; 4367 entry->flags = UVM_MAP_KERNEL; 4368 UVMMAP_EVCNT_INCR(ukh_alloc); 4369 return entry; 4370 } 4371 4372 /* 4373 * uvm_mapent_free: free map entry for in-kernel map 4374 */ 4375 4376 static void 4377 uvm_kmapent_free(struct vm_map_entry *entry) 4378 { 4379 struct uvm_kmapent_hdr *ukh; 4380 struct vm_page *pg; 4381 struct vm_map *map; 4382 struct pmap *pmap; 4383 vaddr_t va; 4384 paddr_t pa; 4385 struct vm_map_entry *deadentry; 4386 int s; 4387 4388 UVMMAP_EVCNT_INCR(uke_free); 4389 ukh = UVM_KHDR_FIND(entry); 4390 map = ukh->ukh_map; 4391 4392 s = splvm(); 4393 simple_lock(&uvm.kentry_lock); 4394 uvm_kmapent_put(ukh, entry); 4395 if (ukh->ukh_nused > 1) { 4396 if (ukh->ukh_nused == UVM_KMAPENT_CHUNK - 1) 4397 LIST_INSERT_HEAD( 4398 &vm_map_to_kernel(map)->vmk_kentry_free, 4399 ukh, ukh_listq); 4400 simple_unlock(&uvm.kentry_lock); 4401 splx(s); 4402 return; 4403 } 4404 4405 /* 4406 * now we can free this ukh. 4407 * 4408 * however, keep an empty ukh to avoid ping-pong. 4409 */ 4410 4411 if (LIST_FIRST(&vm_map_to_kernel(map)->vmk_kentry_free) == ukh && 4412 LIST_NEXT(ukh, ukh_listq) == NULL) { 4413 simple_unlock(&uvm.kentry_lock); 4414 splx(s); 4415 return; 4416 } 4417 LIST_REMOVE(ukh, ukh_listq); 4418 simple_unlock(&uvm.kentry_lock); 4419 splx(s); 4420 4421 KASSERT(ukh->ukh_nused == 1); 4422 4423 /* 4424 * remove map entry for ukh itsself. 4425 */ 4426 4427 va = (vaddr_t)ukh; 4428 KASSERT((va & PAGE_MASK) == 0); 4429 vm_map_lock(map); 4430 uvm_unmap_remove(map, va, va + PAGE_SIZE, &deadentry, NULL, 0); 4431 KASSERT(deadentry->flags & UVM_MAP_KERNEL); 4432 KASSERT(deadentry->flags & UVM_MAP_KMAPENT); 4433 KASSERT(deadentry->next == NULL); 4434 KASSERT(deadentry == &ukh->ukh_entries[0]); 4435 4436 /* 4437 * unmap the page from pmap and free it. 4438 */ 4439 4440 pmap = vm_map_pmap(map); 4441 KASSERT(pmap == pmap_kernel()); 4442 if (!pmap_extract(pmap, va, &pa)) 4443 panic("%s: no mapping", __func__); 4444 pmap_kremove(va, PAGE_SIZE); 4445 vm_map_unlock(map); 4446 pg = PHYS_TO_VM_PAGE(pa); 4447 uvm_pagefree(pg); 4448 UVMMAP_EVCNT_INCR(ukh_free); 4449 } 4450 4451 static vsize_t 4452 uvm_kmapent_overhead(vsize_t size) 4453 { 4454 4455 /* 4456 * - the max number of unmerged entries is howmany(size, PAGE_SIZE) 4457 * as the min allocation unit is PAGE_SIZE. 4458 * - UVM_KMAPENT_CHUNK "kmapent"s are allocated from a page. 4459 * one of them are used to map the page itself. 4460 */ 4461 4462 return howmany(howmany(size, PAGE_SIZE), (UVM_KMAPENT_CHUNK - 1)) * 4463 PAGE_SIZE; 4464 } 4465 4466 /* 4467 * map entry reservation 4468 */ 4469 4470 /* 4471 * uvm_mapent_reserve: reserve map entries for clipping before locking map. 4472 * 4473 * => needed when unmapping entries allocated without UVM_FLAG_QUANTUM. 4474 * => caller shouldn't hold map locked. 4475 */ 4476 int 4477 uvm_mapent_reserve(struct vm_map *map, struct uvm_mapent_reservation *umr, 4478 int nentries, int flags) 4479 { 4480 4481 umr->umr_nentries = 0; 4482 4483 if ((flags & UVM_FLAG_QUANTUM) != 0) 4484 return 0; 4485 4486 if (!VM_MAP_USE_KMAPENT(map)) 4487 return 0; 4488 4489 while (nentries--) { 4490 struct vm_map_entry *ent; 4491 ent = uvm_kmapent_alloc(map, flags); 4492 if (!ent) { 4493 uvm_mapent_unreserve(map, umr); 4494 return ENOMEM; 4495 } 4496 UMR_PUTENTRY(umr, ent); 4497 } 4498 4499 return 0; 4500 } 4501 4502 /* 4503 * uvm_mapent_unreserve: 4504 * 4505 * => caller shouldn't hold map locked. 4506 * => never fail or sleep. 4507 */ 4508 void 4509 uvm_mapent_unreserve(struct vm_map *map, struct uvm_mapent_reservation *umr) 4510 { 4511 4512 while (!UMR_EMPTY(umr)) 4513 uvm_kmapent_free(UMR_GETENTRY(umr)); 4514 } 4515 4516 /* 4517 * uvm_mapent_trymerge: try to merge an entry with its neighbors. 4518 * 4519 * => called with map locked. 4520 * => return non zero if successfully merged. 4521 */ 4522 4523 int 4524 uvm_mapent_trymerge(struct vm_map *map, struct vm_map_entry *entry, int flags) 4525 { 4526 struct uvm_object *uobj; 4527 struct vm_map_entry *next; 4528 struct vm_map_entry *prev; 4529 vsize_t size; 4530 int merged = 0; 4531 boolean_t copying; 4532 int newetype; 4533 4534 if (VM_MAP_USE_KMAPENT(map)) { 4535 return 0; 4536 } 4537 if (entry->aref.ar_amap != NULL) { 4538 return 0; 4539 } 4540 if ((entry->flags & UVM_MAP_NOMERGE) != 0) { 4541 return 0; 4542 } 4543 4544 uobj = entry->object.uvm_obj; 4545 size = entry->end - entry->start; 4546 copying = (flags & UVM_MERGE_COPYING) != 0; 4547 newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype; 4548 4549 next = entry->next; 4550 if (next != &map->header && 4551 next->start == entry->end && 4552 ((copying && next->aref.ar_amap != NULL && 4553 amap_refs(next->aref.ar_amap) == 1) || 4554 (!copying && next->aref.ar_amap == NULL)) && 4555 UVM_ET_ISCOMPATIBLE(next, newetype, 4556 uobj, entry->flags, entry->protection, 4557 entry->max_protection, entry->inheritance, entry->advice, 4558 entry->wired_count) && 4559 (uobj == NULL || entry->offset + size == next->offset)) { 4560 int error; 4561 4562 if (copying) { 4563 error = amap_extend(next, size, 4564 AMAP_EXTEND_NOWAIT|AMAP_EXTEND_BACKWARDS); 4565 } else { 4566 error = 0; 4567 } 4568 if (error == 0) { 4569 if (uobj) { 4570 if (uobj->pgops->pgo_detach) { 4571 uobj->pgops->pgo_detach(uobj); 4572 } 4573 } 4574 4575 entry->end = next->end; 4576 clear_hints(map, next); 4577 uvm_map_entry_unlink(map, next); 4578 if (copying) { 4579 entry->aref = next->aref; 4580 entry->etype &= ~UVM_ET_NEEDSCOPY; 4581 } 4582 uvm_map_check(map, "trymerge forwardmerge"); 4583 uvm_mapent_free_merged(map, next); 4584 merged++; 4585 } 4586 } 4587 4588 prev = entry->prev; 4589 if (prev != &map->header && 4590 prev->end == entry->start && 4591 ((copying && !merged && prev->aref.ar_amap != NULL && 4592 amap_refs(prev->aref.ar_amap) == 1) || 4593 (!copying && prev->aref.ar_amap == NULL)) && 4594 UVM_ET_ISCOMPATIBLE(prev, newetype, 4595 uobj, entry->flags, entry->protection, 4596 entry->max_protection, entry->inheritance, entry->advice, 4597 entry->wired_count) && 4598 (uobj == NULL || 4599 prev->offset + prev->end - prev->start == entry->offset)) { 4600 int error; 4601 4602 if (copying) { 4603 error = amap_extend(prev, size, 4604 AMAP_EXTEND_NOWAIT|AMAP_EXTEND_FORWARDS); 4605 } else { 4606 error = 0; 4607 } 4608 if (error == 0) { 4609 if (uobj) { 4610 if (uobj->pgops->pgo_detach) { 4611 uobj->pgops->pgo_detach(uobj); 4612 } 4613 entry->offset = prev->offset; 4614 } 4615 4616 entry->start = prev->start; 4617 clear_hints(map, prev); 4618 uvm_map_entry_unlink(map, prev); 4619 if (copying) { 4620 entry->aref = prev->aref; 4621 entry->etype &= ~UVM_ET_NEEDSCOPY; 4622 } 4623 uvm_map_check(map, "trymerge backmerge"); 4624 uvm_mapent_free_merged(map, prev); 4625 merged++; 4626 } 4627 } 4628 4629 return merged; 4630 } 4631 4632 #if defined(DDB) 4633 4634 /* 4635 * DDB hooks 4636 */ 4637 4638 /* 4639 * uvm_map_printit: actually prints the map 4640 */ 4641 4642 void 4643 uvm_map_printit(struct vm_map *map, boolean_t full, 4644 void (*pr)(const char *, ...)) 4645 { 4646 struct vm_map_entry *entry; 4647 4648 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, vm_map_min(map), 4649 vm_map_max(map)); 4650 (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n", 4651 map->nentries, map->size, map->ref_count, map->timestamp, 4652 map->flags); 4653 (*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap, 4654 pmap_resident_count(map->pmap), pmap_wired_count(map->pmap)); 4655 if (!full) 4656 return; 4657 for (entry = map->header.next; entry != &map->header; 4658 entry = entry->next) { 4659 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 4660 entry, entry->start, entry->end, entry->object.uvm_obj, 4661 (long long)entry->offset, entry->aref.ar_amap, 4662 entry->aref.ar_pageoff); 4663 (*pr)( 4664 "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 4665 "wc=%d, adv=%d\n", 4666 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 4667 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 4668 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 4669 entry->protection, entry->max_protection, 4670 entry->inheritance, entry->wired_count, entry->advice); 4671 } 4672 } 4673 4674 /* 4675 * uvm_object_printit: actually prints the object 4676 */ 4677 4678 void 4679 uvm_object_printit(struct uvm_object *uobj, boolean_t full, 4680 void (*pr)(const char *, ...)) 4681 { 4682 struct vm_page *pg; 4683 int cnt = 0; 4684 4685 (*pr)("OBJECT %p: locked=%d, pgops=%p, npages=%d, ", 4686 uobj, uobj->vmobjlock.lock_data, uobj->pgops, uobj->uo_npages); 4687 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 4688 (*pr)("refs=<SYSTEM>\n"); 4689 else 4690 (*pr)("refs=%d\n", uobj->uo_refs); 4691 4692 if (!full) { 4693 return; 4694 } 4695 (*pr)(" PAGES <pg,offset>:\n "); 4696 TAILQ_FOREACH(pg, &uobj->memq, listq) { 4697 cnt++; 4698 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 4699 if ((cnt % 3) == 0) { 4700 (*pr)("\n "); 4701 } 4702 } 4703 if ((cnt % 3) != 0) { 4704 (*pr)("\n"); 4705 } 4706 } 4707 4708 /* 4709 * uvm_page_printit: actually print the page 4710 */ 4711 4712 static const char page_flagbits[] = UVM_PGFLAGBITS; 4713 static const char page_pqflagbits[] = UVM_PQFLAGBITS; 4714 4715 void 4716 uvm_page_printit(struct vm_page *pg, boolean_t full, 4717 void (*pr)(const char *, ...)) 4718 { 4719 struct vm_page *tpg; 4720 struct uvm_object *uobj; 4721 struct pglist *pgl; 4722 char pgbuf[128]; 4723 char pqbuf[128]; 4724 4725 (*pr)("PAGE %p:\n", pg); 4726 bitmask_snprintf(pg->flags, page_flagbits, pgbuf, sizeof(pgbuf)); 4727 bitmask_snprintf(pg->pqflags, page_pqflagbits, pqbuf, sizeof(pqbuf)); 4728 (*pr)(" flags=%s, pqflags=%s, wire_count=%d, pa=0x%lx\n", 4729 pgbuf, pqbuf, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg)); 4730 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", 4731 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); 4732 #if defined(UVM_PAGE_TRKOWN) 4733 if (pg->flags & PG_BUSY) 4734 (*pr)(" owning process = %d, tag=%s\n", 4735 pg->owner, pg->owner_tag); 4736 else 4737 (*pr)(" page not busy, no owner\n"); 4738 #else 4739 (*pr)(" [page ownership tracking disabled]\n"); 4740 #endif 4741 4742 if (!full) 4743 return; 4744 4745 /* cross-verify object/anon */ 4746 if ((pg->pqflags & PQ_FREE) == 0) { 4747 if (pg->pqflags & PQ_ANON) { 4748 if (pg->uanon == NULL || pg->uanon->an_page != pg) 4749 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 4750 (pg->uanon) ? pg->uanon->an_page : NULL); 4751 else 4752 (*pr)(" anon backpointer is OK\n"); 4753 } else { 4754 uobj = pg->uobject; 4755 if (uobj) { 4756 (*pr)(" checking object list\n"); 4757 TAILQ_FOREACH(tpg, &uobj->memq, listq) { 4758 if (tpg == pg) { 4759 break; 4760 } 4761 } 4762 if (tpg) 4763 (*pr)(" page found on object list\n"); 4764 else 4765 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 4766 } 4767 } 4768 } 4769 4770 /* cross-verify page queue */ 4771 if (pg->pqflags & PQ_FREE) { 4772 int fl = uvm_page_lookup_freelist(pg); 4773 int color = VM_PGCOLOR_BUCKET(pg); 4774 pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[ 4775 ((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN]; 4776 } else { 4777 pgl = NULL; 4778 } 4779 4780 if (pgl) { 4781 (*pr)(" checking pageq list\n"); 4782 TAILQ_FOREACH(tpg, pgl, pageq) { 4783 if (tpg == pg) { 4784 break; 4785 } 4786 } 4787 if (tpg) 4788 (*pr)(" page found on pageq list\n"); 4789 else 4790 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 4791 } 4792 } 4793 4794 /* 4795 * uvm_pages_printthem - print a summary of all managed pages 4796 */ 4797 4798 void 4799 uvm_page_printall(void (*pr)(const char *, ...)) 4800 { 4801 unsigned i; 4802 struct vm_page *pg; 4803 4804 (*pr)("%18s %4s %2s %18s %18s" 4805 #ifdef UVM_PAGE_TRKOWN 4806 " OWNER" 4807 #endif 4808 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON"); 4809 for (i = 0; i < vm_nphysseg; i++) { 4810 for (pg = vm_physmem[i].pgs; pg <= vm_physmem[i].lastpg; pg++) { 4811 (*pr)("%18p %04x %02x %18p %18p", 4812 pg, pg->flags, pg->pqflags, pg->uobject, 4813 pg->uanon); 4814 #ifdef UVM_PAGE_TRKOWN 4815 if (pg->flags & PG_BUSY) 4816 (*pr)(" %d [%s]", pg->owner, pg->owner_tag); 4817 #endif 4818 (*pr)("\n"); 4819 } 4820 } 4821 } 4822 4823 #endif 4824 4825 /* 4826 * uvm_map_create: create map 4827 */ 4828 4829 struct vm_map * 4830 uvm_map_create(pmap_t pmap, vaddr_t vmin, vaddr_t vmax, int flags) 4831 { 4832 struct vm_map *result; 4833 4834 MALLOC(result, struct vm_map *, sizeof(struct vm_map), 4835 M_VMMAP, M_WAITOK); 4836 uvm_map_setup(result, vmin, vmax, flags); 4837 result->pmap = pmap; 4838 return(result); 4839 } 4840 4841 /* 4842 * uvm_map_setup: init map 4843 * 4844 * => map must not be in service yet. 4845 */ 4846 4847 void 4848 uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags) 4849 { 4850 4851 RB_INIT(&map->rbhead); 4852 map->header.next = map->header.prev = &map->header; 4853 map->nentries = 0; 4854 map->size = 0; 4855 map->ref_count = 1; 4856 vm_map_setmin(map, vmin); 4857 vm_map_setmax(map, vmax); 4858 map->flags = flags; 4859 map->first_free = &map->header; 4860 map->hint = &map->header; 4861 map->timestamp = 0; 4862 lockinit(&map->lock, PVM, "vmmaplk", 0, 0); 4863 simple_lock_init(&map->ref_lock); 4864 simple_lock_init(&map->hint_lock); 4865 simple_lock_init(&map->flags_lock); 4866 } 4867 4868 4869 /* 4870 * U N M A P - m a i n e n t r y p o i n t 4871 */ 4872 4873 /* 4874 * uvm_unmap1: remove mappings from a vm_map (from "start" up to "stop") 4875 * 4876 * => caller must check alignment and size 4877 * => map must be unlocked (we will lock it) 4878 * => flags is UVM_FLAG_QUANTUM or 0. 4879 */ 4880 4881 void 4882 uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4883 { 4884 struct vm_map_entry *dead_entries; 4885 struct uvm_mapent_reservation umr; 4886 UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist); 4887 4888 UVMHIST_LOG(maphist, " (map=0x%x, start=0x%x, end=0x%x)", 4889 map, start, end, 0); 4890 /* 4891 * work now done by helper functions. wipe the pmap's and then 4892 * detach from the dead entries... 4893 */ 4894 uvm_mapent_reserve(map, &umr, 2, flags); 4895 vm_map_lock(map); 4896 uvm_unmap_remove(map, start, end, &dead_entries, &umr, flags); 4897 vm_map_unlock(map); 4898 uvm_mapent_unreserve(map, &umr); 4899 4900 if (dead_entries != NULL) 4901 uvm_unmap_detach(dead_entries, 0); 4902 4903 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 4904 } 4905 4906 4907 /* 4908 * uvm_map_reference: add reference to a map 4909 * 4910 * => map need not be locked (we use ref_lock). 4911 */ 4912 4913 void 4914 uvm_map_reference(struct vm_map *map) 4915 { 4916 simple_lock(&map->ref_lock); 4917 map->ref_count++; 4918 simple_unlock(&map->ref_lock); 4919 } 4920 4921 struct vm_map_kernel * 4922 vm_map_to_kernel(struct vm_map *map) 4923 { 4924 4925 KASSERT(VM_MAP_IS_KERNEL(map)); 4926 4927 return (struct vm_map_kernel *)map; 4928 } 4929 4930 boolean_t 4931 vm_map_starved_p(struct vm_map *map) 4932 { 4933 4934 if ((map->flags & VM_MAP_WANTVA) != 0) { 4935 return TRUE; 4936 } 4937 /* XXX */ 4938 if ((vm_map_max(map) - vm_map_min(map)) / 16 * 15 < map->size) { 4939 return TRUE; 4940 } 4941 return FALSE; 4942 } 4943