1 /* $NetBSD: uvm_map.c,v 1.268 2008/12/20 11:33:38 ad Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by Charles D. Cranor, 23 * Washington University, the University of California, Berkeley and 24 * its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 42 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 43 * 44 * 45 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 46 * All rights reserved. 47 * 48 * Permission to use, copy, modify and distribute this software and 49 * its documentation is hereby granted, provided that both the copyright 50 * notice and this permission notice appear in all copies of the 51 * software, derivative works or modified versions, and any portions 52 * thereof, and that both notices appear in supporting documentation. 53 * 54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 57 * 58 * Carnegie Mellon requests users of this software to return to 59 * 60 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 61 * School of Computer Science 62 * Carnegie Mellon University 63 * Pittsburgh PA 15213-3890 64 * 65 * any improvements or extensions that they make and grant Carnegie the 66 * rights to redistribute these changes. 67 */ 68 69 /* 70 * uvm_map.c: uvm map operations 71 */ 72 73 #include <sys/cdefs.h> 74 __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.268 2008/12/20 11:33:38 ad Exp $"); 75 76 #include "opt_ddb.h" 77 #include "opt_uvmhist.h" 78 #include "opt_uvm.h" 79 #include "opt_sysv.h" 80 81 #include <sys/param.h> 82 #include <sys/systm.h> 83 #include <sys/mman.h> 84 #include <sys/proc.h> 85 #include <sys/malloc.h> 86 #include <sys/pool.h> 87 #include <sys/kernel.h> 88 #include <sys/mount.h> 89 #include <sys/vnode.h> 90 #include <sys/lockdebug.h> 91 #include <sys/atomic.h> 92 93 #ifdef SYSVSHM 94 #include <sys/shm.h> 95 #endif 96 97 #include <uvm/uvm.h> 98 99 #ifdef DDB 100 #include <uvm/uvm_ddb.h> 101 #endif 102 103 #if !defined(UVMMAP_COUNTERS) 104 105 #define UVMMAP_EVCNT_DEFINE(name) /* nothing */ 106 #define UVMMAP_EVCNT_INCR(ev) /* nothing */ 107 #define UVMMAP_EVCNT_DECR(ev) /* nothing */ 108 109 #else /* defined(UVMMAP_NOCOUNTERS) */ 110 111 #include <sys/evcnt.h> 112 #define UVMMAP_EVCNT_DEFINE(name) \ 113 struct evcnt uvmmap_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 114 "uvmmap", #name); \ 115 EVCNT_ATTACH_STATIC(uvmmap_evcnt_##name); 116 #define UVMMAP_EVCNT_INCR(ev) uvmmap_evcnt_##ev.ev_count++ 117 #define UVMMAP_EVCNT_DECR(ev) uvmmap_evcnt_##ev.ev_count-- 118 119 #endif /* defined(UVMMAP_NOCOUNTERS) */ 120 121 UVMMAP_EVCNT_DEFINE(ubackmerge) 122 UVMMAP_EVCNT_DEFINE(uforwmerge) 123 UVMMAP_EVCNT_DEFINE(ubimerge) 124 UVMMAP_EVCNT_DEFINE(unomerge) 125 UVMMAP_EVCNT_DEFINE(kbackmerge) 126 UVMMAP_EVCNT_DEFINE(kforwmerge) 127 UVMMAP_EVCNT_DEFINE(kbimerge) 128 UVMMAP_EVCNT_DEFINE(knomerge) 129 UVMMAP_EVCNT_DEFINE(map_call) 130 UVMMAP_EVCNT_DEFINE(mlk_call) 131 UVMMAP_EVCNT_DEFINE(mlk_hint) 132 UVMMAP_EVCNT_DEFINE(mlk_list) 133 UVMMAP_EVCNT_DEFINE(mlk_tree) 134 UVMMAP_EVCNT_DEFINE(mlk_treeloop) 135 UVMMAP_EVCNT_DEFINE(mlk_listloop) 136 137 UVMMAP_EVCNT_DEFINE(uke_alloc) 138 UVMMAP_EVCNT_DEFINE(uke_free) 139 UVMMAP_EVCNT_DEFINE(ukh_alloc) 140 UVMMAP_EVCNT_DEFINE(ukh_free) 141 142 const char vmmapbsy[] = "vmmapbsy"; 143 144 /* 145 * cache for vmspace structures. 146 */ 147 148 static struct pool_cache uvm_vmspace_cache; 149 150 /* 151 * cache for dynamically-allocated map entries. 152 */ 153 154 static struct pool_cache uvm_map_entry_cache; 155 156 MALLOC_DEFINE(M_VMMAP, "VM map", "VM map structures"); 157 MALLOC_DEFINE(M_VMPMAP, "VM pmap", "VM pmap"); 158 159 #ifdef PMAP_GROWKERNEL 160 /* 161 * This global represents the end of the kernel virtual address 162 * space. If we want to exceed this, we must grow the kernel 163 * virtual address space dynamically. 164 * 165 * Note, this variable is locked by kernel_map's lock. 166 */ 167 vaddr_t uvm_maxkaddr; 168 #endif 169 170 /* 171 * macros 172 */ 173 174 /* 175 * VM_MAP_USE_KMAPENT: determine if uvm_kmapent_alloc/free is used 176 * for the vm_map. 177 */ 178 extern struct vm_map *pager_map; /* XXX */ 179 #define VM_MAP_USE_KMAPENT_FLAGS(flags) \ 180 (((flags) & VM_MAP_INTRSAFE) != 0) 181 #define VM_MAP_USE_KMAPENT(map) \ 182 (VM_MAP_USE_KMAPENT_FLAGS((map)->flags) || (map) == kernel_map) 183 184 /* 185 * UVM_ET_ISCOMPATIBLE: check some requirements for map entry merging 186 */ 187 188 #define UVM_ET_ISCOMPATIBLE(ent, type, uobj, meflags, \ 189 prot, maxprot, inh, adv, wire) \ 190 ((ent)->etype == (type) && \ 191 (((ent)->flags ^ (meflags)) & (UVM_MAP_NOMERGE | UVM_MAP_QUANTUM)) \ 192 == 0 && \ 193 (ent)->object.uvm_obj == (uobj) && \ 194 (ent)->protection == (prot) && \ 195 (ent)->max_protection == (maxprot) && \ 196 (ent)->inheritance == (inh) && \ 197 (ent)->advice == (adv) && \ 198 (ent)->wired_count == (wire)) 199 200 /* 201 * uvm_map_entry_link: insert entry into a map 202 * 203 * => map must be locked 204 */ 205 #define uvm_map_entry_link(map, after_where, entry) do { \ 206 uvm_mapent_check(entry); \ 207 (map)->nentries++; \ 208 (entry)->prev = (after_where); \ 209 (entry)->next = (after_where)->next; \ 210 (entry)->prev->next = (entry); \ 211 (entry)->next->prev = (entry); \ 212 uvm_rb_insert((map), (entry)); \ 213 } while (/*CONSTCOND*/ 0) 214 215 /* 216 * uvm_map_entry_unlink: remove entry from a map 217 * 218 * => map must be locked 219 */ 220 #define uvm_map_entry_unlink(map, entry) do { \ 221 KASSERT((entry) != (map)->first_free); \ 222 KASSERT((entry) != (map)->hint); \ 223 uvm_mapent_check(entry); \ 224 (map)->nentries--; \ 225 (entry)->next->prev = (entry)->prev; \ 226 (entry)->prev->next = (entry)->next; \ 227 uvm_rb_remove((map), (entry)); \ 228 } while (/*CONSTCOND*/ 0) 229 230 /* 231 * SAVE_HINT: saves the specified entry as the hint for future lookups. 232 * 233 * => map need not be locked. 234 */ 235 #define SAVE_HINT(map, check, value) do { \ 236 if ((map)->hint == (check)) \ 237 (map)->hint = (value); \ 238 } while (/*CONSTCOND*/ 0) 239 240 /* 241 * clear_hints: ensure that hints don't point to the entry. 242 * 243 * => map must be write-locked. 244 */ 245 static void 246 clear_hints(struct vm_map *map, struct vm_map_entry *ent) 247 { 248 249 SAVE_HINT(map, ent, ent->prev); 250 if (map->first_free == ent) { 251 map->first_free = ent->prev; 252 } 253 } 254 255 /* 256 * VM_MAP_RANGE_CHECK: check and correct range 257 * 258 * => map must at least be read locked 259 */ 260 261 #define VM_MAP_RANGE_CHECK(map, start, end) do { \ 262 if (start < vm_map_min(map)) \ 263 start = vm_map_min(map); \ 264 if (end > vm_map_max(map)) \ 265 end = vm_map_max(map); \ 266 if (start > end) \ 267 start = end; \ 268 } while (/*CONSTCOND*/ 0) 269 270 /* 271 * local prototypes 272 */ 273 274 static struct vm_map_entry * 275 uvm_mapent_alloc(struct vm_map *, int); 276 static struct vm_map_entry * 277 uvm_mapent_alloc_split(struct vm_map *, 278 const struct vm_map_entry *, int, 279 struct uvm_mapent_reservation *); 280 static void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); 281 static void uvm_mapent_free(struct vm_map_entry *); 282 #if defined(DEBUG) 283 static void _uvm_mapent_check(const struct vm_map_entry *, const char *, 284 int); 285 #define uvm_mapent_check(map) _uvm_mapent_check(map, __FILE__, __LINE__) 286 #else /* defined(DEBUG) */ 287 #define uvm_mapent_check(e) /* nothing */ 288 #endif /* defined(DEBUG) */ 289 static struct vm_map_entry * 290 uvm_kmapent_alloc(struct vm_map *, int); 291 static void uvm_kmapent_free(struct vm_map_entry *); 292 static vsize_t uvm_kmapent_overhead(vsize_t); 293 294 static void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); 295 static void uvm_map_reference_amap(struct vm_map_entry *, int); 296 static int uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int, 297 struct vm_map_entry *); 298 static void uvm_map_unreference_amap(struct vm_map_entry *, int); 299 300 int _uvm_map_sanity(struct vm_map *); 301 int _uvm_tree_sanity(struct vm_map *); 302 static vsize_t uvm_rb_maxgap(const struct vm_map_entry *); 303 304 CTASSERT(offsetof(struct vm_map_entry, rb_node) == 0); 305 #define ROOT_ENTRY(map) ((struct vm_map_entry *)(map)->rb_tree.rbt_root) 306 #define LEFT_ENTRY(entry) ((struct vm_map_entry *)(entry)->rb_node.rb_left) 307 #define RIGHT_ENTRY(entry) ((struct vm_map_entry *)(entry)->rb_node.rb_right) 308 #define PARENT_ENTRY(map, entry) \ 309 (ROOT_ENTRY(map) == (entry) \ 310 ? NULL \ 311 : (struct vm_map_entry *)RB_FATHER(&(entry)->rb_node)) 312 313 static int 314 uvm_map_compare_nodes(const struct rb_node *nparent, 315 const struct rb_node *nkey) 316 { 317 const struct vm_map_entry *eparent = (const void *) nparent; 318 const struct vm_map_entry *ekey = (const void *) nkey; 319 320 KASSERT(eparent->start < ekey->start || eparent->start >= ekey->end); 321 KASSERT(ekey->start < eparent->start || ekey->start >= eparent->end); 322 323 if (ekey->start < eparent->start) 324 return -1; 325 if (ekey->start >= eparent->end) 326 return 1; 327 return 0; 328 } 329 330 static int 331 uvm_map_compare_key(const struct rb_node *nparent, const void *vkey) 332 { 333 const struct vm_map_entry *eparent = (const void *) nparent; 334 const vaddr_t va = *(const vaddr_t *) vkey; 335 336 if (va < eparent->start) 337 return -1; 338 if (va >= eparent->end) 339 return 1; 340 return 0; 341 } 342 343 static const struct rb_tree_ops uvm_map_tree_ops = { 344 .rbto_compare_nodes = uvm_map_compare_nodes, 345 .rbto_compare_key = uvm_map_compare_key, 346 }; 347 348 static inline vsize_t 349 uvm_rb_gap(const struct vm_map_entry *entry) 350 { 351 KASSERT(entry->next != NULL); 352 return entry->next->start - entry->end; 353 } 354 355 static vsize_t 356 uvm_rb_maxgap(const struct vm_map_entry *entry) 357 { 358 struct vm_map_entry *child; 359 vsize_t maxgap = entry->gap; 360 361 /* 362 * We need maxgap to be the largest gap of us or any of our 363 * descendents. Since each of our children's maxgap is the 364 * cached value of their largest gap of themselves or their 365 * descendents, we can just use that value and avoid recursing 366 * down the tree to calculate it. 367 */ 368 if ((child = LEFT_ENTRY(entry)) != NULL && maxgap < child->maxgap) 369 maxgap = child->maxgap; 370 371 if ((child = RIGHT_ENTRY(entry)) != NULL && maxgap < child->maxgap) 372 maxgap = child->maxgap; 373 374 return maxgap; 375 } 376 377 static void 378 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) 379 { 380 struct vm_map_entry *parent; 381 382 KASSERT(entry->gap == uvm_rb_gap(entry)); 383 entry->maxgap = uvm_rb_maxgap(entry); 384 385 while ((parent = PARENT_ENTRY(map, entry)) != NULL) { 386 struct vm_map_entry *brother; 387 vsize_t maxgap = parent->gap; 388 389 KDASSERT(parent->gap == uvm_rb_gap(parent)); 390 if (maxgap < entry->maxgap) 391 maxgap = entry->maxgap; 392 /* 393 * Since we work our towards the root, we know entry's maxgap 394 * value is ok but its brothers may now be out-of-date due 395 * rebalancing. So refresh it. 396 */ 397 brother = (struct vm_map_entry *)parent->rb_node.rb_nodes[RB_POSITION(&entry->rb_node) ^ RB_DIR_OTHER]; 398 if (brother != NULL) { 399 KDASSERT(brother->gap == uvm_rb_gap(brother)); 400 brother->maxgap = uvm_rb_maxgap(brother); 401 if (maxgap < brother->maxgap) 402 maxgap = brother->maxgap; 403 } 404 405 parent->maxgap = maxgap; 406 entry = parent; 407 } 408 } 409 410 static void 411 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) 412 { 413 entry->gap = entry->maxgap = uvm_rb_gap(entry); 414 if (entry->prev != &map->header) 415 entry->prev->gap = uvm_rb_gap(entry->prev); 416 417 if (!rb_tree_insert_node(&map->rb_tree, &entry->rb_node)) 418 panic("uvm_rb_insert: map %p: duplicate entry?", map); 419 420 /* 421 * If the previous entry is not our immediate left child, then it's an 422 * ancestor and will be fixed up on the way to the root. We don't 423 * have to check entry->prev against &map->header since &map->header 424 * will never be in the tree. 425 */ 426 uvm_rb_fixup(map, 427 LEFT_ENTRY(entry) == entry->prev ? entry->prev : entry); 428 } 429 430 static void 431 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) 432 { 433 struct vm_map_entry *prev_parent = NULL, *next_parent = NULL; 434 435 /* 436 * If we are removing an interior node, then an adjacent node will 437 * be used to replace its position in the tree. Therefore we will 438 * need to fixup the tree starting at the parent of the replacement 439 * node. So record their parents for later use. 440 */ 441 if (entry->prev != &map->header) 442 prev_parent = PARENT_ENTRY(map, entry->prev); 443 if (entry->next != &map->header) 444 next_parent = PARENT_ENTRY(map, entry->next); 445 446 rb_tree_remove_node(&map->rb_tree, &entry->rb_node); 447 448 /* 449 * If the previous node has a new parent, fixup the tree starting 450 * at the previous node's old parent. 451 */ 452 if (entry->prev != &map->header) { 453 /* 454 * Update the previous entry's gap due to our absence. 455 */ 456 entry->prev->gap = uvm_rb_gap(entry->prev); 457 uvm_rb_fixup(map, entry->prev); 458 if (prev_parent != NULL 459 && prev_parent != entry 460 && prev_parent != PARENT_ENTRY(map, entry->prev)) 461 uvm_rb_fixup(map, prev_parent); 462 } 463 464 /* 465 * If the next node has a new parent, fixup the tree starting 466 * at the next node's old parent. 467 */ 468 if (entry->next != &map->header) { 469 uvm_rb_fixup(map, entry->next); 470 if (next_parent != NULL 471 && next_parent != entry 472 && next_parent != PARENT_ENTRY(map, entry->next)) 473 uvm_rb_fixup(map, next_parent); 474 } 475 } 476 477 #if defined(DEBUG) 478 int uvm_debug_check_map = 0; 479 int uvm_debug_check_rbtree = 0; 480 #define uvm_map_check(map, name) \ 481 _uvm_map_check((map), (name), __FILE__, __LINE__) 482 static void 483 _uvm_map_check(struct vm_map *map, const char *name, 484 const char *file, int line) 485 { 486 487 if ((uvm_debug_check_map && _uvm_map_sanity(map)) || 488 (uvm_debug_check_rbtree && _uvm_tree_sanity(map))) { 489 panic("uvm_map_check failed: \"%s\" map=%p (%s:%d)", 490 name, map, file, line); 491 } 492 } 493 #else /* defined(DEBUG) */ 494 #define uvm_map_check(map, name) /* nothing */ 495 #endif /* defined(DEBUG) */ 496 497 #if defined(DEBUG) || defined(DDB) 498 int 499 _uvm_map_sanity(struct vm_map *map) 500 { 501 bool first_free_found = false; 502 bool hint_found = false; 503 const struct vm_map_entry *e; 504 505 e = &map->header; 506 for (;;) { 507 if (map->first_free == e) { 508 first_free_found = true; 509 } else if (!first_free_found && e->next->start > e->end) { 510 printf("first_free %p should be %p\n", 511 map->first_free, e); 512 return -1; 513 } 514 if (map->hint == e) { 515 hint_found = true; 516 } 517 518 e = e->next; 519 if (e == &map->header) { 520 break; 521 } 522 } 523 if (!first_free_found) { 524 printf("stale first_free\n"); 525 return -1; 526 } 527 if (!hint_found) { 528 printf("stale hint\n"); 529 return -1; 530 } 531 return 0; 532 } 533 534 int 535 _uvm_tree_sanity(struct vm_map *map) 536 { 537 struct vm_map_entry *tmp, *trtmp; 538 int n = 0, i = 1; 539 540 for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { 541 if (tmp->gap != uvm_rb_gap(tmp)) { 542 printf("%d/%d gap %lx != %lx %s\n", 543 n + 1, map->nentries, 544 (ulong)tmp->gap, (ulong)uvm_rb_gap(tmp), 545 tmp->next == &map->header ? "(last)" : ""); 546 goto error; 547 } 548 /* 549 * If any entries are out of order, tmp->gap will be unsigned 550 * and will likely exceed the size of the map. 551 */ 552 KASSERT(tmp->gap < map->size); 553 n++; 554 } 555 556 if (n != map->nentries) { 557 printf("nentries: %d vs %d\n", n, map->nentries); 558 goto error; 559 } 560 561 trtmp = NULL; 562 for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { 563 if (tmp->maxgap != uvm_rb_maxgap(tmp)) { 564 printf("maxgap %lx != %lx\n", 565 (ulong)tmp->maxgap, 566 (ulong)uvm_rb_maxgap(tmp)); 567 goto error; 568 } 569 if (trtmp != NULL && trtmp->start >= tmp->start) { 570 printf("corrupt: 0x%lx >= 0x%lx\n", 571 trtmp->start, tmp->start); 572 goto error; 573 } 574 575 trtmp = tmp; 576 } 577 578 for (tmp = map->header.next; tmp != &map->header; 579 tmp = tmp->next, i++) { 580 trtmp = (void *) rb_tree_iterate(&map->rb_tree, &tmp->rb_node, 581 RB_DIR_LEFT); 582 if (trtmp == NULL) 583 trtmp = &map->header; 584 if (tmp->prev != trtmp) { 585 printf("lookup: %d: %p->prev=%p: %p\n", 586 i, tmp, tmp->prev, trtmp); 587 goto error; 588 } 589 trtmp = (void *) rb_tree_iterate(&map->rb_tree, &tmp->rb_node, 590 RB_DIR_RIGHT); 591 if (trtmp == NULL) 592 trtmp = &map->header; 593 if (tmp->next != trtmp) { 594 printf("lookup: %d: %p->next=%p: %p\n", 595 i, tmp, tmp->next, trtmp); 596 goto error; 597 } 598 trtmp = (void *)rb_tree_find_node(&map->rb_tree, &tmp->start); 599 if (trtmp != tmp) { 600 printf("lookup: %d: %p - %p: %p\n", i, tmp, trtmp, 601 PARENT_ENTRY(map, tmp)); 602 goto error; 603 } 604 } 605 606 return (0); 607 error: 608 return (-1); 609 } 610 #endif /* defined(DEBUG) || defined(DDB) */ 611 612 #ifdef DIAGNOSTIC 613 static struct vm_map *uvm_kmapent_map(struct vm_map_entry *); 614 #endif 615 616 /* 617 * vm_map_lock: acquire an exclusive (write) lock on a map. 618 * 619 * => Note that "intrsafe" maps use only exclusive, spin locks. 620 * 621 * => The locking protocol provides for guaranteed upgrade from shared -> 622 * exclusive by whichever thread currently has the map marked busy. 623 * See "LOCKING PROTOCOL NOTES" in uvm_map.h. This is horrible; among 624 * other problems, it defeats any fairness guarantees provided by RW 625 * locks. 626 */ 627 628 void 629 vm_map_lock(struct vm_map *map) 630 { 631 632 if ((map->flags & VM_MAP_INTRSAFE) != 0) { 633 mutex_spin_enter(&map->mutex); 634 return; 635 } 636 637 for (;;) { 638 rw_enter(&map->lock, RW_WRITER); 639 if (map->busy == NULL) 640 break; 641 if (map->busy == curlwp) 642 break; 643 mutex_enter(&map->misc_lock); 644 rw_exit(&map->lock); 645 if (map->busy != NULL) 646 cv_wait(&map->cv, &map->misc_lock); 647 mutex_exit(&map->misc_lock); 648 } 649 650 map->timestamp++; 651 } 652 653 /* 654 * vm_map_lock_try: try to lock a map, failing if it is already locked. 655 */ 656 657 bool 658 vm_map_lock_try(struct vm_map *map) 659 { 660 661 if ((map->flags & VM_MAP_INTRSAFE) != 0) 662 return mutex_tryenter(&map->mutex); 663 if (!rw_tryenter(&map->lock, RW_WRITER)) 664 return false; 665 if (map->busy != NULL) { 666 rw_exit(&map->lock); 667 return false; 668 } 669 670 map->timestamp++; 671 return true; 672 } 673 674 /* 675 * vm_map_unlock: release an exclusive lock on a map. 676 */ 677 678 void 679 vm_map_unlock(struct vm_map *map) 680 { 681 682 if ((map->flags & VM_MAP_INTRSAFE) != 0) 683 mutex_spin_exit(&map->mutex); 684 else { 685 KASSERT(rw_write_held(&map->lock)); 686 KASSERT(map->busy == NULL || map->busy == curlwp); 687 rw_exit(&map->lock); 688 } 689 } 690 691 /* 692 * vm_map_unbusy: mark the map as unbusy, and wake any waiters that 693 * want an exclusive lock. 694 */ 695 696 void 697 vm_map_unbusy(struct vm_map *map) 698 { 699 700 KASSERT(map->busy == curlwp); 701 702 /* 703 * Safe to clear 'busy' and 'waiters' with only a read lock held: 704 * 705 * o they can only be set with a write lock held 706 * o writers are blocked out with a read or write hold 707 * o at any time, only one thread owns the set of values 708 */ 709 mutex_enter(&map->misc_lock); 710 map->busy = NULL; 711 cv_broadcast(&map->cv); 712 mutex_exit(&map->misc_lock); 713 } 714 715 /* 716 * vm_map_lock_read: acquire a shared (read) lock on a map. 717 */ 718 719 void 720 vm_map_lock_read(struct vm_map *map) 721 { 722 723 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 724 725 rw_enter(&map->lock, RW_READER); 726 } 727 728 /* 729 * vm_map_unlock_read: release a shared lock on a map. 730 */ 731 732 void 733 vm_map_unlock_read(struct vm_map *map) 734 { 735 736 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 737 738 rw_exit(&map->lock); 739 } 740 741 /* 742 * vm_map_busy: mark a map as busy. 743 * 744 * => the caller must hold the map write locked 745 */ 746 747 void 748 vm_map_busy(struct vm_map *map) 749 { 750 751 KASSERT(rw_write_held(&map->lock)); 752 KASSERT(map->busy == NULL); 753 754 map->busy = curlwp; 755 } 756 757 /* 758 * vm_map_locked_p: return true if the map is write locked. 759 */ 760 761 bool 762 vm_map_locked_p(struct vm_map *map) 763 { 764 765 if ((map->flags & VM_MAP_INTRSAFE) != 0) { 766 return mutex_owned(&map->mutex); 767 } else { 768 return rw_write_held(&map->lock); 769 } 770 } 771 772 /* 773 * uvm_mapent_alloc: allocate a map entry 774 */ 775 776 static struct vm_map_entry * 777 uvm_mapent_alloc(struct vm_map *map, int flags) 778 { 779 struct vm_map_entry *me; 780 int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK; 781 UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist); 782 783 if (VM_MAP_USE_KMAPENT(map)) { 784 me = uvm_kmapent_alloc(map, flags); 785 } else { 786 me = pool_cache_get(&uvm_map_entry_cache, pflags); 787 if (__predict_false(me == NULL)) 788 return NULL; 789 me->flags = 0; 790 } 791 792 UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", me, 793 ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0); 794 return (me); 795 } 796 797 /* 798 * uvm_mapent_alloc_split: allocate a map entry for clipping. 799 * 800 * => map must be locked by caller if UVM_MAP_QUANTUM is set. 801 */ 802 803 static struct vm_map_entry * 804 uvm_mapent_alloc_split(struct vm_map *map, 805 const struct vm_map_entry *old_entry, int flags, 806 struct uvm_mapent_reservation *umr) 807 { 808 struct vm_map_entry *me; 809 810 KASSERT(!VM_MAP_USE_KMAPENT(map) || 811 (old_entry->flags & UVM_MAP_QUANTUM) || !UMR_EMPTY(umr)); 812 813 if (old_entry->flags & UVM_MAP_QUANTUM) { 814 struct vm_map_kernel *vmk = vm_map_to_kernel(map); 815 816 KASSERT(vm_map_locked_p(map)); 817 me = vmk->vmk_merged_entries; 818 KASSERT(me); 819 vmk->vmk_merged_entries = me->next; 820 KASSERT(me->flags & UVM_MAP_QUANTUM); 821 } else { 822 me = uvm_mapent_alloc(map, flags); 823 } 824 825 return me; 826 } 827 828 /* 829 * uvm_mapent_free: free map entry 830 */ 831 832 static void 833 uvm_mapent_free(struct vm_map_entry *me) 834 { 835 UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist); 836 837 UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]", 838 me, me->flags, 0, 0); 839 if (me->flags & UVM_MAP_KERNEL) { 840 uvm_kmapent_free(me); 841 } else { 842 pool_cache_put(&uvm_map_entry_cache, me); 843 } 844 } 845 846 /* 847 * uvm_mapent_free_merged: free merged map entry 848 * 849 * => keep the entry if needed. 850 * => caller shouldn't hold map locked if VM_MAP_USE_KMAPENT(map) is true. 851 * => map should be locked if UVM_MAP_QUANTUM is set. 852 */ 853 854 static void 855 uvm_mapent_free_merged(struct vm_map *map, struct vm_map_entry *me) 856 { 857 858 KASSERT(!(me->flags & UVM_MAP_KERNEL) || uvm_kmapent_map(me) == map); 859 860 if (me->flags & UVM_MAP_QUANTUM) { 861 /* 862 * keep this entry for later splitting. 863 */ 864 struct vm_map_kernel *vmk; 865 866 KASSERT(vm_map_locked_p(map)); 867 KASSERT(VM_MAP_IS_KERNEL(map)); 868 KASSERT(!VM_MAP_USE_KMAPENT(map) || 869 (me->flags & UVM_MAP_KERNEL)); 870 871 vmk = vm_map_to_kernel(map); 872 me->next = vmk->vmk_merged_entries; 873 vmk->vmk_merged_entries = me; 874 } else { 875 uvm_mapent_free(me); 876 } 877 } 878 879 /* 880 * uvm_mapent_copy: copy a map entry, preserving flags 881 */ 882 883 static inline void 884 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 885 { 886 887 memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - 888 ((char *)src)); 889 } 890 891 /* 892 * uvm_mapent_overhead: calculate maximum kva overhead necessary for 893 * map entries. 894 * 895 * => size and flags are the same as uvm_km_suballoc's ones. 896 */ 897 898 vsize_t 899 uvm_mapent_overhead(vsize_t size, int flags) 900 { 901 902 if (VM_MAP_USE_KMAPENT_FLAGS(flags)) { 903 return uvm_kmapent_overhead(size); 904 } 905 return 0; 906 } 907 908 #if defined(DEBUG) 909 static void 910 _uvm_mapent_check(const struct vm_map_entry *entry, const char *file, int line) 911 { 912 913 if (entry->start >= entry->end) { 914 goto bad; 915 } 916 if (UVM_ET_ISOBJ(entry)) { 917 if (entry->object.uvm_obj == NULL) { 918 goto bad; 919 } 920 } else if (UVM_ET_ISSUBMAP(entry)) { 921 if (entry->object.sub_map == NULL) { 922 goto bad; 923 } 924 } else { 925 if (entry->object.uvm_obj != NULL || 926 entry->object.sub_map != NULL) { 927 goto bad; 928 } 929 } 930 if (!UVM_ET_ISOBJ(entry)) { 931 if (entry->offset != 0) { 932 goto bad; 933 } 934 } 935 936 return; 937 938 bad: 939 panic("%s: bad entry %p (%s:%d)", __func__, entry, file, line); 940 } 941 #endif /* defined(DEBUG) */ 942 943 /* 944 * uvm_map_entry_unwire: unwire a map entry 945 * 946 * => map should be locked by caller 947 */ 948 949 static inline void 950 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) 951 { 952 953 entry->wired_count = 0; 954 uvm_fault_unwire_locked(map, entry->start, entry->end); 955 } 956 957 958 /* 959 * wrapper for calling amap_ref() 960 */ 961 static inline void 962 uvm_map_reference_amap(struct vm_map_entry *entry, int flags) 963 { 964 965 amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, 966 (entry->end - entry->start) >> PAGE_SHIFT, flags); 967 } 968 969 970 /* 971 * wrapper for calling amap_unref() 972 */ 973 static inline void 974 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) 975 { 976 977 amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, 978 (entry->end - entry->start) >> PAGE_SHIFT, flags); 979 } 980 981 982 /* 983 * uvm_map_init: init mapping system at boot time. 984 */ 985 986 void 987 uvm_map_init(void) 988 { 989 #if defined(UVMHIST) 990 static struct uvm_history_ent maphistbuf[100]; 991 static struct uvm_history_ent pdhistbuf[100]; 992 #endif 993 994 /* 995 * first, init logging system. 996 */ 997 998 UVMHIST_FUNC("uvm_map_init"); 999 UVMHIST_INIT_STATIC(maphist, maphistbuf); 1000 UVMHIST_INIT_STATIC(pdhist, pdhistbuf); 1001 UVMHIST_CALLED(maphist); 1002 UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); 1003 1004 /* 1005 * initialize the global lock for kernel map entry. 1006 */ 1007 1008 mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM); 1009 1010 /* 1011 * initialize caches. 1012 */ 1013 1014 pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry), 1015 0, 0, 0, "vmmpepl", NULL, IPL_NONE, NULL, NULL, NULL); 1016 pool_cache_bootstrap(&uvm_vmspace_cache, sizeof(struct vmspace), 1017 0, 0, 0, "vmsppl", NULL, IPL_NONE, NULL, NULL, NULL); 1018 } 1019 1020 /* 1021 * clippers 1022 */ 1023 1024 /* 1025 * uvm_mapent_splitadj: adjust map entries for splitting, after uvm_mapent_copy. 1026 */ 1027 1028 static void 1029 uvm_mapent_splitadj(struct vm_map_entry *entry1, struct vm_map_entry *entry2, 1030 vaddr_t splitat) 1031 { 1032 vaddr_t adj; 1033 1034 KASSERT(entry1->start < splitat); 1035 KASSERT(splitat < entry1->end); 1036 1037 adj = splitat - entry1->start; 1038 entry1->end = entry2->start = splitat; 1039 1040 if (entry1->aref.ar_amap) { 1041 amap_splitref(&entry1->aref, &entry2->aref, adj); 1042 } 1043 if (UVM_ET_ISSUBMAP(entry1)) { 1044 /* ... unlikely to happen, but play it safe */ 1045 uvm_map_reference(entry1->object.sub_map); 1046 } else if (UVM_ET_ISOBJ(entry1)) { 1047 KASSERT(entry1->object.uvm_obj != NULL); /* suppress coverity */ 1048 entry2->offset += adj; 1049 if (entry1->object.uvm_obj->pgops && 1050 entry1->object.uvm_obj->pgops->pgo_reference) 1051 entry1->object.uvm_obj->pgops->pgo_reference( 1052 entry1->object.uvm_obj); 1053 } 1054 } 1055 1056 /* 1057 * uvm_map_clip_start: ensure that the entry begins at or after 1058 * the starting address, if it doesn't we split the entry. 1059 * 1060 * => caller should use UVM_MAP_CLIP_START macro rather than calling 1061 * this directly 1062 * => map must be locked by caller 1063 */ 1064 1065 void 1066 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, 1067 vaddr_t start, struct uvm_mapent_reservation *umr) 1068 { 1069 struct vm_map_entry *new_entry; 1070 1071 /* uvm_map_simplify_entry(map, entry); */ /* XXX */ 1072 1073 uvm_map_check(map, "clip_start entry"); 1074 uvm_mapent_check(entry); 1075 1076 /* 1077 * Split off the front portion. note that we must insert the new 1078 * entry BEFORE this one, so that this entry has the specified 1079 * starting address. 1080 */ 1081 new_entry = uvm_mapent_alloc_split(map, entry, 0, umr); 1082 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 1083 uvm_mapent_splitadj(new_entry, entry, start); 1084 uvm_map_entry_link(map, entry->prev, new_entry); 1085 1086 uvm_map_check(map, "clip_start leave"); 1087 } 1088 1089 /* 1090 * uvm_map_clip_end: ensure that the entry ends at or before 1091 * the ending address, if it does't we split the reference 1092 * 1093 * => caller should use UVM_MAP_CLIP_END macro rather than calling 1094 * this directly 1095 * => map must be locked by caller 1096 */ 1097 1098 void 1099 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end, 1100 struct uvm_mapent_reservation *umr) 1101 { 1102 struct vm_map_entry *new_entry; 1103 1104 uvm_map_check(map, "clip_end entry"); 1105 uvm_mapent_check(entry); 1106 1107 /* 1108 * Create a new entry and insert it 1109 * AFTER the specified entry 1110 */ 1111 new_entry = uvm_mapent_alloc_split(map, entry, 0, umr); 1112 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 1113 uvm_mapent_splitadj(entry, new_entry, end); 1114 uvm_map_entry_link(map, entry, new_entry); 1115 1116 uvm_map_check(map, "clip_end leave"); 1117 } 1118 1119 static void 1120 vm_map_drain(struct vm_map *map, uvm_flag_t flags) 1121 { 1122 1123 if (!VM_MAP_IS_KERNEL(map)) { 1124 return; 1125 } 1126 1127 uvm_km_va_drain(map, flags); 1128 } 1129 1130 /* 1131 * M A P - m a i n e n t r y p o i n t 1132 */ 1133 /* 1134 * uvm_map: establish a valid mapping in a map 1135 * 1136 * => assume startp is page aligned. 1137 * => assume size is a multiple of PAGE_SIZE. 1138 * => assume sys_mmap provides enough of a "hint" to have us skip 1139 * over text/data/bss area. 1140 * => map must be unlocked (we will lock it) 1141 * => <uobj,uoffset> value meanings (4 cases): 1142 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1143 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1144 * [3] <uobj,uoffset> == normal mapping 1145 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1146 * 1147 * case [4] is for kernel mappings where we don't know the offset until 1148 * we've found a virtual address. note that kernel object offsets are 1149 * always relative to vm_map_min(kernel_map). 1150 * 1151 * => if `align' is non-zero, we align the virtual address to the specified 1152 * alignment. 1153 * this is provided as a mechanism for large pages. 1154 * 1155 * => XXXCDC: need way to map in external amap? 1156 */ 1157 1158 int 1159 uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size, 1160 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags) 1161 { 1162 struct uvm_map_args args; 1163 struct vm_map_entry *new_entry; 1164 int error; 1165 1166 KASSERT((flags & UVM_FLAG_QUANTUM) == 0 || VM_MAP_IS_KERNEL(map)); 1167 KASSERT((size & PAGE_MASK) == 0); 1168 1169 /* 1170 * for pager_map, allocate the new entry first to avoid sleeping 1171 * for memory while we have the map locked. 1172 * 1173 * Also, because we allocate entries for in-kernel maps 1174 * a bit differently (cf. uvm_kmapent_alloc/free), we need to 1175 * allocate them before locking the map. 1176 */ 1177 1178 new_entry = NULL; 1179 if (VM_MAP_USE_KMAPENT(map) || (flags & UVM_FLAG_QUANTUM) || 1180 map == pager_map) { 1181 new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT)); 1182 if (__predict_false(new_entry == NULL)) 1183 return ENOMEM; 1184 if (flags & UVM_FLAG_QUANTUM) 1185 new_entry->flags |= UVM_MAP_QUANTUM; 1186 } 1187 if (map == pager_map) 1188 flags |= UVM_FLAG_NOMERGE; 1189 1190 error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align, 1191 flags, &args); 1192 if (!error) { 1193 error = uvm_map_enter(map, &args, new_entry); 1194 *startp = args.uma_start; 1195 } else if (new_entry) { 1196 uvm_mapent_free(new_entry); 1197 } 1198 1199 #if defined(DEBUG) 1200 if (!error && VM_MAP_IS_KERNEL(map)) { 1201 uvm_km_check_empty(map, *startp, *startp + size); 1202 } 1203 #endif /* defined(DEBUG) */ 1204 1205 return error; 1206 } 1207 1208 int 1209 uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size, 1210 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, 1211 struct uvm_map_args *args) 1212 { 1213 struct vm_map_entry *prev_entry; 1214 vm_prot_t prot = UVM_PROTECTION(flags); 1215 vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1216 1217 UVMHIST_FUNC("uvm_map_prepare"); 1218 UVMHIST_CALLED(maphist); 1219 1220 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)", 1221 map, start, size, flags); 1222 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0); 1223 1224 /* 1225 * detect a popular device driver bug. 1226 */ 1227 1228 KASSERT(doing_shutdown || curlwp != NULL || 1229 (map->flags & VM_MAP_INTRSAFE)); 1230 1231 /* 1232 * zero-sized mapping doesn't make any sense. 1233 */ 1234 KASSERT(size > 0); 1235 1236 KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0); 1237 1238 uvm_map_check(map, "map entry"); 1239 1240 /* 1241 * check sanity of protection code 1242 */ 1243 1244 if ((prot & maxprot) != prot) { 1245 UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%x, max=0x%x", 1246 prot, maxprot,0,0); 1247 return EACCES; 1248 } 1249 1250 /* 1251 * figure out where to put new VM range 1252 */ 1253 1254 retry: 1255 if (vm_map_lock_try(map) == false) { 1256 if ((flags & UVM_FLAG_TRYLOCK) != 0 && 1257 (map->flags & VM_MAP_INTRSAFE) == 0) { 1258 return EAGAIN; 1259 } 1260 vm_map_lock(map); /* could sleep here */ 1261 } 1262 prev_entry = uvm_map_findspace(map, start, size, &start, 1263 uobj, uoffset, align, flags); 1264 if (prev_entry == NULL) { 1265 unsigned int timestamp; 1266 1267 timestamp = map->timestamp; 1268 UVMHIST_LOG(maphist,"waiting va timestamp=0x%x", 1269 timestamp,0,0,0); 1270 map->flags |= VM_MAP_WANTVA; 1271 vm_map_unlock(map); 1272 1273 /* 1274 * try to reclaim kva and wait until someone does unmap. 1275 * fragile locking here, so we awaken every second to 1276 * recheck the condition. 1277 */ 1278 1279 vm_map_drain(map, flags); 1280 1281 mutex_enter(&map->misc_lock); 1282 while ((map->flags & VM_MAP_WANTVA) != 0 && 1283 map->timestamp == timestamp) { 1284 if ((flags & UVM_FLAG_WAITVA) == 0) { 1285 mutex_exit(&map->misc_lock); 1286 UVMHIST_LOG(maphist, 1287 "<- uvm_map_findspace failed!", 0,0,0,0); 1288 return ENOMEM; 1289 } else { 1290 cv_timedwait(&map->cv, &map->misc_lock, hz); 1291 } 1292 } 1293 mutex_exit(&map->misc_lock); 1294 goto retry; 1295 } 1296 1297 #ifdef PMAP_GROWKERNEL 1298 /* 1299 * If the kernel pmap can't map the requested space, 1300 * then allocate more resources for it. 1301 */ 1302 if (map == kernel_map && uvm_maxkaddr < (start + size)) 1303 uvm_maxkaddr = pmap_growkernel(start + size); 1304 #endif 1305 1306 UVMMAP_EVCNT_INCR(map_call); 1307 1308 /* 1309 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER 1310 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in 1311 * either case we want to zero it before storing it in the map entry 1312 * (because it looks strange and confusing when debugging...) 1313 * 1314 * if uobj is not null 1315 * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping 1316 * and we do not need to change uoffset. 1317 * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset 1318 * now (based on the starting address of the map). this case is 1319 * for kernel object mappings where we don't know the offset until 1320 * the virtual address is found (with uvm_map_findspace). the 1321 * offset is the distance we are from the start of the map. 1322 */ 1323 1324 if (uobj == NULL) { 1325 uoffset = 0; 1326 } else { 1327 if (uoffset == UVM_UNKNOWN_OFFSET) { 1328 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1329 uoffset = start - vm_map_min(kernel_map); 1330 } 1331 } 1332 1333 args->uma_flags = flags; 1334 args->uma_prev = prev_entry; 1335 args->uma_start = start; 1336 args->uma_size = size; 1337 args->uma_uobj = uobj; 1338 args->uma_uoffset = uoffset; 1339 1340 return 0; 1341 } 1342 1343 int 1344 uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args, 1345 struct vm_map_entry *new_entry) 1346 { 1347 struct vm_map_entry *prev_entry = args->uma_prev; 1348 struct vm_map_entry *dead = NULL; 1349 1350 const uvm_flag_t flags = args->uma_flags; 1351 const vm_prot_t prot = UVM_PROTECTION(flags); 1352 const vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1353 const vm_inherit_t inherit = UVM_INHERIT(flags); 1354 const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ? 1355 AMAP_EXTEND_NOWAIT : 0; 1356 const int advice = UVM_ADVICE(flags); 1357 const int meflagval = (flags & UVM_FLAG_QUANTUM) ? 1358 UVM_MAP_QUANTUM : 0; 1359 1360 vaddr_t start = args->uma_start; 1361 vsize_t size = args->uma_size; 1362 struct uvm_object *uobj = args->uma_uobj; 1363 voff_t uoffset = args->uma_uoffset; 1364 1365 const int kmap = (vm_map_pmap(map) == pmap_kernel()); 1366 int merged = 0; 1367 int error; 1368 int newetype; 1369 1370 UVMHIST_FUNC("uvm_map_enter"); 1371 UVMHIST_CALLED(maphist); 1372 1373 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)", 1374 map, start, size, flags); 1375 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0); 1376 1377 KASSERT(map->hint == prev_entry); /* bimerge case assumes this */ 1378 1379 if (flags & UVM_FLAG_QUANTUM) { 1380 KASSERT(new_entry); 1381 KASSERT(new_entry->flags & UVM_MAP_QUANTUM); 1382 } 1383 1384 if (uobj) 1385 newetype = UVM_ET_OBJ; 1386 else 1387 newetype = 0; 1388 1389 if (flags & UVM_FLAG_COPYONW) { 1390 newetype |= UVM_ET_COPYONWRITE; 1391 if ((flags & UVM_FLAG_OVERLAY) == 0) 1392 newetype |= UVM_ET_NEEDSCOPY; 1393 } 1394 1395 /* 1396 * try and insert in map by extending previous entry, if possible. 1397 * XXX: we don't try and pull back the next entry. might be useful 1398 * for a stack, but we are currently allocating our stack in advance. 1399 */ 1400 1401 if (flags & UVM_FLAG_NOMERGE) 1402 goto nomerge; 1403 1404 if (prev_entry->end == start && 1405 prev_entry != &map->header && 1406 UVM_ET_ISCOMPATIBLE(prev_entry, newetype, uobj, meflagval, 1407 prot, maxprot, inherit, advice, 0)) { 1408 1409 if (uobj && prev_entry->offset + 1410 (prev_entry->end - prev_entry->start) != uoffset) 1411 goto forwardmerge; 1412 1413 /* 1414 * can't extend a shared amap. note: no need to lock amap to 1415 * look at refs since we don't care about its exact value. 1416 * if it is one (i.e. we have only reference) it will stay there 1417 */ 1418 1419 if (prev_entry->aref.ar_amap && 1420 amap_refs(prev_entry->aref.ar_amap) != 1) { 1421 goto forwardmerge; 1422 } 1423 1424 if (prev_entry->aref.ar_amap) { 1425 error = amap_extend(prev_entry, size, 1426 amapwaitflag | AMAP_EXTEND_FORWARDS); 1427 if (error) 1428 goto nomerge; 1429 } 1430 1431 if (kmap) { 1432 UVMMAP_EVCNT_INCR(kbackmerge); 1433 } else { 1434 UVMMAP_EVCNT_INCR(ubackmerge); 1435 } 1436 UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0); 1437 1438 /* 1439 * drop our reference to uobj since we are extending a reference 1440 * that we already have (the ref count can not drop to zero). 1441 */ 1442 1443 if (uobj && uobj->pgops->pgo_detach) 1444 uobj->pgops->pgo_detach(uobj); 1445 1446 /* 1447 * Now that we've merged the entries, note that we've grown 1448 * and our gap has shrunk. Then fix the tree. 1449 */ 1450 prev_entry->end += size; 1451 prev_entry->gap -= size; 1452 uvm_rb_fixup(map, prev_entry); 1453 1454 uvm_map_check(map, "map backmerged"); 1455 1456 UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); 1457 merged++; 1458 } 1459 1460 forwardmerge: 1461 if (prev_entry->next->start == (start + size) && 1462 prev_entry->next != &map->header && 1463 UVM_ET_ISCOMPATIBLE(prev_entry->next, newetype, uobj, meflagval, 1464 prot, maxprot, inherit, advice, 0)) { 1465 1466 if (uobj && prev_entry->next->offset != uoffset + size) 1467 goto nomerge; 1468 1469 /* 1470 * can't extend a shared amap. note: no need to lock amap to 1471 * look at refs since we don't care about its exact value. 1472 * if it is one (i.e. we have only reference) it will stay there. 1473 * 1474 * note that we also can't merge two amaps, so if we 1475 * merged with the previous entry which has an amap, 1476 * and the next entry also has an amap, we give up. 1477 * 1478 * Interesting cases: 1479 * amap, new, amap -> give up second merge (single fwd extend) 1480 * amap, new, none -> double forward extend (extend again here) 1481 * none, new, amap -> double backward extend (done here) 1482 * uobj, new, amap -> single backward extend (done here) 1483 * 1484 * XXX should we attempt to deal with someone refilling 1485 * the deallocated region between two entries that are 1486 * backed by the same amap (ie, arefs is 2, "prev" and 1487 * "next" refer to it, and adding this allocation will 1488 * close the hole, thus restoring arefs to 1 and 1489 * deallocating the "next" vm_map_entry)? -- @@@ 1490 */ 1491 1492 if (prev_entry->next->aref.ar_amap && 1493 (amap_refs(prev_entry->next->aref.ar_amap) != 1 || 1494 (merged && prev_entry->aref.ar_amap))) { 1495 goto nomerge; 1496 } 1497 1498 if (merged) { 1499 /* 1500 * Try to extend the amap of the previous entry to 1501 * cover the next entry as well. If it doesn't work 1502 * just skip on, don't actually give up, since we've 1503 * already completed the back merge. 1504 */ 1505 if (prev_entry->aref.ar_amap) { 1506 if (amap_extend(prev_entry, 1507 prev_entry->next->end - 1508 prev_entry->next->start, 1509 amapwaitflag | AMAP_EXTEND_FORWARDS)) 1510 goto nomerge; 1511 } 1512 1513 /* 1514 * Try to extend the amap of the *next* entry 1515 * back to cover the new allocation *and* the 1516 * previous entry as well (the previous merge 1517 * didn't have an amap already otherwise we 1518 * wouldn't be checking here for an amap). If 1519 * it doesn't work just skip on, again, don't 1520 * actually give up, since we've already 1521 * completed the back merge. 1522 */ 1523 else if (prev_entry->next->aref.ar_amap) { 1524 if (amap_extend(prev_entry->next, 1525 prev_entry->end - 1526 prev_entry->start, 1527 amapwaitflag | AMAP_EXTEND_BACKWARDS)) 1528 goto nomerge; 1529 } 1530 } else { 1531 /* 1532 * Pull the next entry's amap backwards to cover this 1533 * new allocation. 1534 */ 1535 if (prev_entry->next->aref.ar_amap) { 1536 error = amap_extend(prev_entry->next, size, 1537 amapwaitflag | AMAP_EXTEND_BACKWARDS); 1538 if (error) 1539 goto nomerge; 1540 } 1541 } 1542 1543 if (merged) { 1544 if (kmap) { 1545 UVMMAP_EVCNT_DECR(kbackmerge); 1546 UVMMAP_EVCNT_INCR(kbimerge); 1547 } else { 1548 UVMMAP_EVCNT_DECR(ubackmerge); 1549 UVMMAP_EVCNT_INCR(ubimerge); 1550 } 1551 } else { 1552 if (kmap) { 1553 UVMMAP_EVCNT_INCR(kforwmerge); 1554 } else { 1555 UVMMAP_EVCNT_INCR(uforwmerge); 1556 } 1557 } 1558 UVMHIST_LOG(maphist," starting forward merge", 0, 0, 0, 0); 1559 1560 /* 1561 * drop our reference to uobj since we are extending a reference 1562 * that we already have (the ref count can not drop to zero). 1563 * (if merged, we've already detached) 1564 */ 1565 if (uobj && uobj->pgops->pgo_detach && !merged) 1566 uobj->pgops->pgo_detach(uobj); 1567 1568 if (merged) { 1569 dead = prev_entry->next; 1570 prev_entry->end = dead->end; 1571 uvm_map_entry_unlink(map, dead); 1572 if (dead->aref.ar_amap != NULL) { 1573 prev_entry->aref = dead->aref; 1574 dead->aref.ar_amap = NULL; 1575 } 1576 } else { 1577 prev_entry->next->start -= size; 1578 if (prev_entry != &map->header) { 1579 prev_entry->gap -= size; 1580 KASSERT(prev_entry->gap == uvm_rb_gap(prev_entry)); 1581 uvm_rb_fixup(map, prev_entry); 1582 } 1583 if (uobj) 1584 prev_entry->next->offset = uoffset; 1585 } 1586 1587 uvm_map_check(map, "map forwardmerged"); 1588 1589 UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0); 1590 merged++; 1591 } 1592 1593 nomerge: 1594 if (!merged) { 1595 UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0); 1596 if (kmap) { 1597 UVMMAP_EVCNT_INCR(knomerge); 1598 } else { 1599 UVMMAP_EVCNT_INCR(unomerge); 1600 } 1601 1602 /* 1603 * allocate new entry and link it in. 1604 */ 1605 1606 if (new_entry == NULL) { 1607 new_entry = uvm_mapent_alloc(map, 1608 (flags & UVM_FLAG_NOWAIT)); 1609 if (__predict_false(new_entry == NULL)) { 1610 error = ENOMEM; 1611 goto done; 1612 } 1613 } 1614 new_entry->start = start; 1615 new_entry->end = new_entry->start + size; 1616 new_entry->object.uvm_obj = uobj; 1617 new_entry->offset = uoffset; 1618 1619 new_entry->etype = newetype; 1620 1621 if (flags & UVM_FLAG_NOMERGE) { 1622 new_entry->flags |= UVM_MAP_NOMERGE; 1623 } 1624 1625 new_entry->protection = prot; 1626 new_entry->max_protection = maxprot; 1627 new_entry->inheritance = inherit; 1628 new_entry->wired_count = 0; 1629 new_entry->advice = advice; 1630 if (flags & UVM_FLAG_OVERLAY) { 1631 1632 /* 1633 * to_add: for BSS we overallocate a little since we 1634 * are likely to extend 1635 */ 1636 1637 vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? 1638 UVM_AMAP_CHUNK << PAGE_SHIFT : 0; 1639 struct vm_amap *amap = amap_alloc(size, to_add, 1640 (flags & UVM_FLAG_NOWAIT)); 1641 if (__predict_false(amap == NULL)) { 1642 error = ENOMEM; 1643 goto done; 1644 } 1645 new_entry->aref.ar_pageoff = 0; 1646 new_entry->aref.ar_amap = amap; 1647 } else { 1648 new_entry->aref.ar_pageoff = 0; 1649 new_entry->aref.ar_amap = NULL; 1650 } 1651 uvm_map_entry_link(map, prev_entry, new_entry); 1652 1653 /* 1654 * Update the free space hint 1655 */ 1656 1657 if ((map->first_free == prev_entry) && 1658 (prev_entry->end >= new_entry->start)) 1659 map->first_free = new_entry; 1660 1661 new_entry = NULL; 1662 } 1663 1664 map->size += size; 1665 1666 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1667 1668 error = 0; 1669 done: 1670 if ((flags & UVM_FLAG_QUANTUM) == 0) { 1671 /* 1672 * vmk_merged_entries is locked by the map's lock. 1673 */ 1674 vm_map_unlock(map); 1675 } 1676 if (new_entry && error == 0) { 1677 KDASSERT(merged); 1678 uvm_mapent_free_merged(map, new_entry); 1679 new_entry = NULL; 1680 } 1681 if (dead) { 1682 KDASSERT(merged); 1683 uvm_mapent_free_merged(map, dead); 1684 } 1685 if ((flags & UVM_FLAG_QUANTUM) != 0) { 1686 vm_map_unlock(map); 1687 } 1688 if (new_entry != NULL) { 1689 uvm_mapent_free(new_entry); 1690 } 1691 return error; 1692 } 1693 1694 /* 1695 * uvm_map_lookup_entry_bytree: lookup an entry in tree 1696 */ 1697 1698 static inline bool 1699 uvm_map_lookup_entry_bytree(struct vm_map *map, vaddr_t address, 1700 struct vm_map_entry **entry /* OUT */) 1701 { 1702 struct vm_map_entry *prev = &map->header; 1703 struct vm_map_entry *cur = ROOT_ENTRY(map); 1704 1705 while (cur) { 1706 UVMMAP_EVCNT_INCR(mlk_treeloop); 1707 if (address >= cur->start) { 1708 if (address < cur->end) { 1709 *entry = cur; 1710 return true; 1711 } 1712 prev = cur; 1713 cur = RIGHT_ENTRY(cur); 1714 } else 1715 cur = LEFT_ENTRY(cur); 1716 } 1717 *entry = prev; 1718 return false; 1719 } 1720 1721 /* 1722 * uvm_map_lookup_entry: find map entry at or before an address 1723 * 1724 * => map must at least be read-locked by caller 1725 * => entry is returned in "entry" 1726 * => return value is true if address is in the returned entry 1727 */ 1728 1729 bool 1730 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1731 struct vm_map_entry **entry /* OUT */) 1732 { 1733 struct vm_map_entry *cur; 1734 bool use_tree = false; 1735 UVMHIST_FUNC("uvm_map_lookup_entry"); 1736 UVMHIST_CALLED(maphist); 1737 1738 UVMHIST_LOG(maphist,"(map=0x%x,addr=0x%x,ent=0x%x)", 1739 map, address, entry, 0); 1740 1741 /* 1742 * start looking either from the head of the 1743 * list, or from the hint. 1744 */ 1745 1746 cur = map->hint; 1747 1748 if (cur == &map->header) 1749 cur = cur->next; 1750 1751 UVMMAP_EVCNT_INCR(mlk_call); 1752 if (address >= cur->start) { 1753 1754 /* 1755 * go from hint to end of list. 1756 * 1757 * but first, make a quick check to see if 1758 * we are already looking at the entry we 1759 * want (which is usually the case). 1760 * note also that we don't need to save the hint 1761 * here... it is the same hint (unless we are 1762 * at the header, in which case the hint didn't 1763 * buy us anything anyway). 1764 */ 1765 1766 if (cur != &map->header && cur->end > address) { 1767 UVMMAP_EVCNT_INCR(mlk_hint); 1768 *entry = cur; 1769 UVMHIST_LOG(maphist,"<- got it via hint (0x%x)", 1770 cur, 0, 0, 0); 1771 uvm_mapent_check(*entry); 1772 return (true); 1773 } 1774 1775 if (map->nentries > 15) 1776 use_tree = true; 1777 } else { 1778 1779 /* 1780 * invalid hint. use tree. 1781 */ 1782 use_tree = true; 1783 } 1784 1785 uvm_map_check(map, __func__); 1786 1787 if (use_tree) { 1788 /* 1789 * Simple lookup in the tree. Happens when the hint is 1790 * invalid, or nentries reach a threshold. 1791 */ 1792 UVMMAP_EVCNT_INCR(mlk_tree); 1793 if (uvm_map_lookup_entry_bytree(map, address, entry)) { 1794 goto got; 1795 } else { 1796 goto failed; 1797 } 1798 } 1799 1800 /* 1801 * search linearly 1802 */ 1803 1804 UVMMAP_EVCNT_INCR(mlk_list); 1805 while (cur != &map->header) { 1806 UVMMAP_EVCNT_INCR(mlk_listloop); 1807 if (cur->end > address) { 1808 if (address >= cur->start) { 1809 /* 1810 * save this lookup for future 1811 * hints, and return 1812 */ 1813 1814 *entry = cur; 1815 got: 1816 SAVE_HINT(map, map->hint, *entry); 1817 UVMHIST_LOG(maphist,"<- search got it (0x%x)", 1818 cur, 0, 0, 0); 1819 KDASSERT((*entry)->start <= address); 1820 KDASSERT(address < (*entry)->end); 1821 uvm_mapent_check(*entry); 1822 return (true); 1823 } 1824 break; 1825 } 1826 cur = cur->next; 1827 } 1828 *entry = cur->prev; 1829 failed: 1830 SAVE_HINT(map, map->hint, *entry); 1831 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); 1832 KDASSERT((*entry) == &map->header || (*entry)->end <= address); 1833 KDASSERT((*entry)->next == &map->header || 1834 address < (*entry)->next->start); 1835 return (false); 1836 } 1837 1838 /* 1839 * See if the range between start and start + length fits in the gap 1840 * entry->next->start and entry->end. Returns 1 if fits, 0 if doesn't 1841 * fit, and -1 address wraps around. 1842 */ 1843 static int 1844 uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset, 1845 vsize_t align, int topdown, struct vm_map_entry *entry) 1846 { 1847 vaddr_t end; 1848 1849 #ifdef PMAP_PREFER 1850 /* 1851 * push start address forward as needed to avoid VAC alias problems. 1852 * we only do this if a valid offset is specified. 1853 */ 1854 1855 if (uoffset != UVM_UNKNOWN_OFFSET) 1856 PMAP_PREFER(uoffset, start, length, topdown); 1857 #endif 1858 if (align != 0) { 1859 if ((*start & (align - 1)) != 0) { 1860 if (topdown) 1861 *start &= ~(align - 1); 1862 else 1863 *start = roundup(*start, align); 1864 } 1865 /* 1866 * XXX Should we PMAP_PREFER() here again? 1867 * eh...i think we're okay 1868 */ 1869 } 1870 1871 /* 1872 * Find the end of the proposed new region. Be sure we didn't 1873 * wrap around the address; if so, we lose. Otherwise, if the 1874 * proposed new region fits before the next entry, we win. 1875 */ 1876 1877 end = *start + length; 1878 if (end < *start) 1879 return (-1); 1880 1881 if (entry->next->start >= end && *start >= entry->end) 1882 return (1); 1883 1884 return (0); 1885 } 1886 1887 /* 1888 * uvm_map_findspace: find "length" sized space in "map". 1889 * 1890 * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is 1891 * set in "flags" (in which case we insist on using "hint"). 1892 * => "result" is VA returned 1893 * => uobj/uoffset are to be used to handle VAC alignment, if required 1894 * => if "align" is non-zero, we attempt to align to that value. 1895 * => caller must at least have read-locked map 1896 * => returns NULL on failure, or pointer to prev. map entry if success 1897 * => note this is a cross between the old vm_map_findspace and vm_map_find 1898 */ 1899 1900 struct vm_map_entry * 1901 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, 1902 vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset, 1903 vsize_t align, int flags) 1904 { 1905 struct vm_map_entry *entry; 1906 struct vm_map_entry *child, *prev, *tmp; 1907 vaddr_t orig_hint; 1908 const int topdown = map->flags & VM_MAP_TOPDOWN; 1909 UVMHIST_FUNC("uvm_map_findspace"); 1910 UVMHIST_CALLED(maphist); 1911 1912 UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)", 1913 map, hint, length, flags); 1914 KASSERT((align & (align - 1)) == 0); 1915 KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); 1916 1917 uvm_map_check(map, "map_findspace entry"); 1918 1919 /* 1920 * remember the original hint. if we are aligning, then we 1921 * may have to try again with no alignment constraint if 1922 * we fail the first time. 1923 */ 1924 1925 orig_hint = hint; 1926 if (hint < vm_map_min(map)) { /* check ranges ... */ 1927 if (flags & UVM_FLAG_FIXED) { 1928 UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0); 1929 return (NULL); 1930 } 1931 hint = vm_map_min(map); 1932 } 1933 if (hint > vm_map_max(map)) { 1934 UVMHIST_LOG(maphist,"<- VA 0x%x > range [0x%x->0x%x]", 1935 hint, vm_map_min(map), vm_map_max(map), 0); 1936 return (NULL); 1937 } 1938 1939 /* 1940 * Look for the first possible address; if there's already 1941 * something at this address, we have to start after it. 1942 */ 1943 1944 /* 1945 * @@@: there are four, no, eight cases to consider. 1946 * 1947 * 0: found, fixed, bottom up -> fail 1948 * 1: found, fixed, top down -> fail 1949 * 2: found, not fixed, bottom up -> start after entry->end, 1950 * loop up 1951 * 3: found, not fixed, top down -> start before entry->start, 1952 * loop down 1953 * 4: not found, fixed, bottom up -> check entry->next->start, fail 1954 * 5: not found, fixed, top down -> check entry->next->start, fail 1955 * 6: not found, not fixed, bottom up -> check entry->next->start, 1956 * loop up 1957 * 7: not found, not fixed, top down -> check entry->next->start, 1958 * loop down 1959 * 1960 * as you can see, it reduces to roughly five cases, and that 1961 * adding top down mapping only adds one unique case (without 1962 * it, there would be four cases). 1963 */ 1964 1965 if ((flags & UVM_FLAG_FIXED) == 0 && hint == vm_map_min(map)) { 1966 entry = map->first_free; 1967 } else { 1968 if (uvm_map_lookup_entry(map, hint, &entry)) { 1969 /* "hint" address already in use ... */ 1970 if (flags & UVM_FLAG_FIXED) { 1971 UVMHIST_LOG(maphist, "<- fixed & VA in use", 1972 0, 0, 0, 0); 1973 return (NULL); 1974 } 1975 if (topdown) 1976 /* Start from lower gap. */ 1977 entry = entry->prev; 1978 } else if (flags & UVM_FLAG_FIXED) { 1979 if (entry->next->start >= hint + length && 1980 hint + length > hint) 1981 goto found; 1982 1983 /* "hint" address is gap but too small */ 1984 UVMHIST_LOG(maphist, "<- fixed mapping failed", 1985 0, 0, 0, 0); 1986 return (NULL); /* only one shot at it ... */ 1987 } else { 1988 /* 1989 * See if given hint fits in this gap. 1990 */ 1991 switch (uvm_map_space_avail(&hint, length, 1992 uoffset, align, topdown, entry)) { 1993 case 1: 1994 goto found; 1995 case -1: 1996 goto wraparound; 1997 } 1998 1999 if (topdown) { 2000 /* 2001 * Still there is a chance to fit 2002 * if hint > entry->end. 2003 */ 2004 } else { 2005 /* Start from higher gap. */ 2006 entry = entry->next; 2007 if (entry == &map->header) 2008 goto notfound; 2009 goto nextgap; 2010 } 2011 } 2012 } 2013 2014 /* 2015 * Note that all UVM_FLAGS_FIXED case is already handled. 2016 */ 2017 KDASSERT((flags & UVM_FLAG_FIXED) == 0); 2018 2019 /* Try to find the space in the red-black tree */ 2020 2021 /* Check slot before any entry */ 2022 hint = topdown ? entry->next->start - length : entry->end; 2023 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2024 topdown, entry)) { 2025 case 1: 2026 goto found; 2027 case -1: 2028 goto wraparound; 2029 } 2030 2031 nextgap: 2032 KDASSERT((flags & UVM_FLAG_FIXED) == 0); 2033 /* If there is not enough space in the whole tree, we fail */ 2034 tmp = ROOT_ENTRY(map); 2035 if (tmp == NULL || tmp->maxgap < length) 2036 goto notfound; 2037 2038 prev = NULL; /* previous candidate */ 2039 2040 /* Find an entry close to hint that has enough space */ 2041 for (; tmp;) { 2042 KASSERT(tmp->next->start == tmp->end + tmp->gap); 2043 if (topdown) { 2044 if (tmp->next->start < hint + length && 2045 (prev == NULL || tmp->end > prev->end)) { 2046 if (tmp->gap >= length) 2047 prev = tmp; 2048 else if ((child = LEFT_ENTRY(tmp)) != NULL 2049 && child->maxgap >= length) 2050 prev = tmp; 2051 } 2052 } else { 2053 if (tmp->end >= hint && 2054 (prev == NULL || tmp->end < prev->end)) { 2055 if (tmp->gap >= length) 2056 prev = tmp; 2057 else if ((child = RIGHT_ENTRY(tmp)) != NULL 2058 && child->maxgap >= length) 2059 prev = tmp; 2060 } 2061 } 2062 if (tmp->next->start < hint + length) 2063 child = RIGHT_ENTRY(tmp); 2064 else if (tmp->end > hint) 2065 child = LEFT_ENTRY(tmp); 2066 else { 2067 if (tmp->gap >= length) 2068 break; 2069 if (topdown) 2070 child = LEFT_ENTRY(tmp); 2071 else 2072 child = RIGHT_ENTRY(tmp); 2073 } 2074 if (child == NULL || child->maxgap < length) 2075 break; 2076 tmp = child; 2077 } 2078 2079 if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) { 2080 /* 2081 * Check if the entry that we found satifies the 2082 * space requirement 2083 */ 2084 if (topdown) { 2085 if (hint > tmp->next->start - length) 2086 hint = tmp->next->start - length; 2087 } else { 2088 if (hint < tmp->end) 2089 hint = tmp->end; 2090 } 2091 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2092 topdown, tmp)) { 2093 case 1: 2094 entry = tmp; 2095 goto found; 2096 case -1: 2097 goto wraparound; 2098 } 2099 if (tmp->gap >= length) 2100 goto listsearch; 2101 } 2102 if (prev == NULL) 2103 goto notfound; 2104 2105 if (topdown) { 2106 KASSERT(orig_hint >= prev->next->start - length || 2107 prev->next->start - length > prev->next->start); 2108 hint = prev->next->start - length; 2109 } else { 2110 KASSERT(orig_hint <= prev->end); 2111 hint = prev->end; 2112 } 2113 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2114 topdown, prev)) { 2115 case 1: 2116 entry = prev; 2117 goto found; 2118 case -1: 2119 goto wraparound; 2120 } 2121 if (prev->gap >= length) 2122 goto listsearch; 2123 2124 if (topdown) 2125 tmp = LEFT_ENTRY(prev); 2126 else 2127 tmp = RIGHT_ENTRY(prev); 2128 for (;;) { 2129 KASSERT(tmp && tmp->maxgap >= length); 2130 if (topdown) 2131 child = RIGHT_ENTRY(tmp); 2132 else 2133 child = LEFT_ENTRY(tmp); 2134 if (child && child->maxgap >= length) { 2135 tmp = child; 2136 continue; 2137 } 2138 if (tmp->gap >= length) 2139 break; 2140 if (topdown) 2141 tmp = LEFT_ENTRY(tmp); 2142 else 2143 tmp = RIGHT_ENTRY(tmp); 2144 } 2145 2146 if (topdown) { 2147 KASSERT(orig_hint >= tmp->next->start - length || 2148 tmp->next->start - length > tmp->next->start); 2149 hint = tmp->next->start - length; 2150 } else { 2151 KASSERT(orig_hint <= tmp->end); 2152 hint = tmp->end; 2153 } 2154 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2155 topdown, tmp)) { 2156 case 1: 2157 entry = tmp; 2158 goto found; 2159 case -1: 2160 goto wraparound; 2161 } 2162 2163 /* 2164 * The tree fails to find an entry because of offset or alignment 2165 * restrictions. Search the list instead. 2166 */ 2167 listsearch: 2168 /* 2169 * Look through the rest of the map, trying to fit a new region in 2170 * the gap between existing regions, or after the very last region. 2171 * note: entry->end = base VA of current gap, 2172 * entry->next->start = VA of end of current gap 2173 */ 2174 2175 for (;;) { 2176 /* Update hint for current gap. */ 2177 hint = topdown ? entry->next->start - length : entry->end; 2178 2179 /* See if it fits. */ 2180 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2181 topdown, entry)) { 2182 case 1: 2183 goto found; 2184 case -1: 2185 goto wraparound; 2186 } 2187 2188 /* Advance to next/previous gap */ 2189 if (topdown) { 2190 if (entry == &map->header) { 2191 UVMHIST_LOG(maphist, "<- failed (off start)", 2192 0,0,0,0); 2193 goto notfound; 2194 } 2195 entry = entry->prev; 2196 } else { 2197 entry = entry->next; 2198 if (entry == &map->header) { 2199 UVMHIST_LOG(maphist, "<- failed (off end)", 2200 0,0,0,0); 2201 goto notfound; 2202 } 2203 } 2204 } 2205 2206 found: 2207 SAVE_HINT(map, map->hint, entry); 2208 *result = hint; 2209 UVMHIST_LOG(maphist,"<- got it! (result=0x%x)", hint, 0,0,0); 2210 KASSERT( topdown || hint >= orig_hint); 2211 KASSERT(!topdown || hint <= orig_hint); 2212 KASSERT(entry->end <= hint); 2213 KASSERT(hint + length <= entry->next->start); 2214 return (entry); 2215 2216 wraparound: 2217 UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0); 2218 2219 return (NULL); 2220 2221 notfound: 2222 UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0); 2223 2224 return (NULL); 2225 } 2226 2227 /* 2228 * U N M A P - m a i n h e l p e r f u n c t i o n s 2229 */ 2230 2231 /* 2232 * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") 2233 * 2234 * => caller must check alignment and size 2235 * => map must be locked by caller 2236 * => we return a list of map entries that we've remove from the map 2237 * in "entry_list" 2238 */ 2239 2240 void 2241 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2242 struct vm_map_entry **entry_list /* OUT */, 2243 struct uvm_mapent_reservation *umr, int flags) 2244 { 2245 struct vm_map_entry *entry, *first_entry, *next; 2246 vaddr_t len; 2247 UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist); 2248 2249 UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)", 2250 map, start, end, 0); 2251 VM_MAP_RANGE_CHECK(map, start, end); 2252 2253 uvm_map_check(map, "unmap_remove entry"); 2254 2255 /* 2256 * find first entry 2257 */ 2258 2259 if (uvm_map_lookup_entry(map, start, &first_entry) == true) { 2260 /* clip and go... */ 2261 entry = first_entry; 2262 UVM_MAP_CLIP_START(map, entry, start, umr); 2263 /* critical! prevents stale hint */ 2264 SAVE_HINT(map, entry, entry->prev); 2265 } else { 2266 entry = first_entry->next; 2267 } 2268 2269 /* 2270 * Save the free space hint 2271 */ 2272 2273 if (map->first_free != &map->header && map->first_free->start >= start) 2274 map->first_free = entry->prev; 2275 2276 /* 2277 * note: we now re-use first_entry for a different task. we remove 2278 * a number of map entries from the map and save them in a linked 2279 * list headed by "first_entry". once we remove them from the map 2280 * the caller should unlock the map and drop the references to the 2281 * backing objects [c.f. uvm_unmap_detach]. the object is to 2282 * separate unmapping from reference dropping. why? 2283 * [1] the map has to be locked for unmapping 2284 * [2] the map need not be locked for reference dropping 2285 * [3] dropping references may trigger pager I/O, and if we hit 2286 * a pager that does synchronous I/O we may have to wait for it. 2287 * [4] we would like all waiting for I/O to occur with maps unlocked 2288 * so that we don't block other threads. 2289 */ 2290 2291 first_entry = NULL; 2292 *entry_list = NULL; 2293 2294 /* 2295 * break up the area into map entry sized regions and unmap. note 2296 * that all mappings have to be removed before we can even consider 2297 * dropping references to amaps or VM objects (otherwise we could end 2298 * up with a mapping to a page on the free list which would be very bad) 2299 */ 2300 2301 while ((entry != &map->header) && (entry->start < end)) { 2302 KASSERT((entry->flags & UVM_MAP_FIRST) == 0); 2303 2304 UVM_MAP_CLIP_END(map, entry, end, umr); 2305 next = entry->next; 2306 len = entry->end - entry->start; 2307 2308 /* 2309 * unwire before removing addresses from the pmap; otherwise 2310 * unwiring will put the entries back into the pmap (XXX). 2311 */ 2312 2313 if (VM_MAPENT_ISWIRED(entry)) { 2314 uvm_map_entry_unwire(map, entry); 2315 } 2316 if (flags & UVM_FLAG_VAONLY) { 2317 2318 /* nothing */ 2319 2320 } else if ((map->flags & VM_MAP_PAGEABLE) == 0) { 2321 2322 /* 2323 * if the map is non-pageable, any pages mapped there 2324 * must be wired and entered with pmap_kenter_pa(), 2325 * and we should free any such pages immediately. 2326 * this is mostly used for kmem_map and mb_map. 2327 */ 2328 2329 if ((entry->flags & UVM_MAP_KMAPENT) == 0) { 2330 uvm_km_pgremove_intrsafe(map, entry->start, 2331 entry->end); 2332 pmap_kremove(entry->start, len); 2333 } 2334 } else if (UVM_ET_ISOBJ(entry) && 2335 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2336 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2337 2338 /* 2339 * note: kernel object mappings are currently used in 2340 * two ways: 2341 * [1] "normal" mappings of pages in the kernel object 2342 * [2] uvm_km_valloc'd allocations in which we 2343 * pmap_enter in some non-kernel-object page 2344 * (e.g. vmapbuf). 2345 * 2346 * for case [1], we need to remove the mapping from 2347 * the pmap and then remove the page from the kernel 2348 * object (because, once pages in a kernel object are 2349 * unmapped they are no longer needed, unlike, say, 2350 * a vnode where you might want the data to persist 2351 * until flushed out of a queue). 2352 * 2353 * for case [2], we need to remove the mapping from 2354 * the pmap. there shouldn't be any pages at the 2355 * specified offset in the kernel object [but it 2356 * doesn't hurt to call uvm_km_pgremove just to be 2357 * safe?] 2358 * 2359 * uvm_km_pgremove currently does the following: 2360 * for pages in the kernel object in range: 2361 * - drops the swap slot 2362 * - uvm_pagefree the page 2363 */ 2364 2365 /* 2366 * remove mappings from pmap and drop the pages 2367 * from the object. offsets are always relative 2368 * to vm_map_min(kernel_map). 2369 */ 2370 2371 pmap_remove(pmap_kernel(), entry->start, 2372 entry->start + len); 2373 uvm_km_pgremove(entry->start, entry->end); 2374 2375 /* 2376 * null out kernel_object reference, we've just 2377 * dropped it 2378 */ 2379 2380 entry->etype &= ~UVM_ET_OBJ; 2381 entry->object.uvm_obj = NULL; 2382 } else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) { 2383 2384 /* 2385 * remove mappings the standard way. 2386 */ 2387 2388 pmap_remove(map->pmap, entry->start, entry->end); 2389 } 2390 2391 #if defined(DEBUG) 2392 if ((entry->flags & UVM_MAP_KMAPENT) == 0) { 2393 2394 /* 2395 * check if there's remaining mapping, 2396 * which is a bug in caller. 2397 */ 2398 2399 vaddr_t va; 2400 for (va = entry->start; va < entry->end; 2401 va += PAGE_SIZE) { 2402 if (pmap_extract(vm_map_pmap(map), va, NULL)) { 2403 panic("uvm_unmap_remove: has mapping"); 2404 } 2405 } 2406 2407 if (VM_MAP_IS_KERNEL(map)) { 2408 uvm_km_check_empty(map, entry->start, 2409 entry->end); 2410 } 2411 } 2412 #endif /* defined(DEBUG) */ 2413 2414 /* 2415 * remove entry from map and put it on our list of entries 2416 * that we've nuked. then go to next entry. 2417 */ 2418 2419 UVMHIST_LOG(maphist, " removed map entry 0x%x", entry, 0, 0,0); 2420 2421 /* critical! prevents stale hint */ 2422 SAVE_HINT(map, entry, entry->prev); 2423 2424 uvm_map_entry_unlink(map, entry); 2425 KASSERT(map->size >= len); 2426 map->size -= len; 2427 entry->prev = NULL; 2428 entry->next = first_entry; 2429 first_entry = entry; 2430 entry = next; 2431 } 2432 if ((map->flags & VM_MAP_DYING) == 0) { 2433 pmap_update(vm_map_pmap(map)); 2434 } 2435 2436 uvm_map_check(map, "unmap_remove leave"); 2437 2438 /* 2439 * now we've cleaned up the map and are ready for the caller to drop 2440 * references to the mapped objects. 2441 */ 2442 2443 *entry_list = first_entry; 2444 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 2445 2446 if (map->flags & VM_MAP_WANTVA) { 2447 mutex_enter(&map->misc_lock); 2448 map->flags &= ~VM_MAP_WANTVA; 2449 cv_broadcast(&map->cv); 2450 mutex_exit(&map->misc_lock); 2451 } 2452 } 2453 2454 /* 2455 * uvm_unmap_detach: drop references in a chain of map entries 2456 * 2457 * => we will free the map entries as we traverse the list. 2458 */ 2459 2460 void 2461 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) 2462 { 2463 struct vm_map_entry *next_entry; 2464 UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist); 2465 2466 while (first_entry) { 2467 KASSERT(!VM_MAPENT_ISWIRED(first_entry)); 2468 UVMHIST_LOG(maphist, 2469 " detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d", 2470 first_entry, first_entry->aref.ar_amap, 2471 first_entry->object.uvm_obj, 2472 UVM_ET_ISSUBMAP(first_entry)); 2473 2474 /* 2475 * drop reference to amap, if we've got one 2476 */ 2477 2478 if (first_entry->aref.ar_amap) 2479 uvm_map_unreference_amap(first_entry, flags); 2480 2481 /* 2482 * drop reference to our backing object, if we've got one 2483 */ 2484 2485 KASSERT(!UVM_ET_ISSUBMAP(first_entry)); 2486 if (UVM_ET_ISOBJ(first_entry) && 2487 first_entry->object.uvm_obj->pgops->pgo_detach) { 2488 (*first_entry->object.uvm_obj->pgops->pgo_detach) 2489 (first_entry->object.uvm_obj); 2490 } 2491 next_entry = first_entry->next; 2492 uvm_mapent_free(first_entry); 2493 first_entry = next_entry; 2494 } 2495 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 2496 } 2497 2498 /* 2499 * E X T R A C T I O N F U N C T I O N S 2500 */ 2501 2502 /* 2503 * uvm_map_reserve: reserve space in a vm_map for future use. 2504 * 2505 * => we reserve space in a map by putting a dummy map entry in the 2506 * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) 2507 * => map should be unlocked (we will write lock it) 2508 * => we return true if we were able to reserve space 2509 * => XXXCDC: should be inline? 2510 */ 2511 2512 int 2513 uvm_map_reserve(struct vm_map *map, vsize_t size, 2514 vaddr_t offset /* hint for pmap_prefer */, 2515 vsize_t align /* alignment */, 2516 vaddr_t *raddr /* IN:hint, OUT: reserved VA */, 2517 uvm_flag_t flags /* UVM_FLAG_FIXED or 0 */) 2518 { 2519 UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); 2520 2521 UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)", 2522 map,size,offset,raddr); 2523 2524 size = round_page(size); 2525 2526 /* 2527 * reserve some virtual space. 2528 */ 2529 2530 if (uvm_map(map, raddr, size, NULL, offset, align, 2531 UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, 2532 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE|flags)) != 0) { 2533 UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); 2534 return (false); 2535 } 2536 2537 UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0); 2538 return (true); 2539 } 2540 2541 /* 2542 * uvm_map_replace: replace a reserved (blank) area of memory with 2543 * real mappings. 2544 * 2545 * => caller must WRITE-LOCK the map 2546 * => we return true if replacement was a success 2547 * => we expect the newents chain to have nnewents entrys on it and 2548 * we expect newents->prev to point to the last entry on the list 2549 * => note newents is allowed to be NULL 2550 */ 2551 2552 int 2553 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, 2554 struct vm_map_entry *newents, int nnewents, struct vm_map_entry **oldentryp) 2555 { 2556 struct vm_map_entry *oldent, *last; 2557 2558 uvm_map_check(map, "map_replace entry"); 2559 2560 /* 2561 * first find the blank map entry at the specified address 2562 */ 2563 2564 if (!uvm_map_lookup_entry(map, start, &oldent)) { 2565 return (false); 2566 } 2567 2568 /* 2569 * check to make sure we have a proper blank entry 2570 */ 2571 2572 if (end < oldent->end && !VM_MAP_USE_KMAPENT(map)) { 2573 UVM_MAP_CLIP_END(map, oldent, end, NULL); 2574 } 2575 if (oldent->start != start || oldent->end != end || 2576 oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { 2577 return (false); 2578 } 2579 2580 #ifdef DIAGNOSTIC 2581 2582 /* 2583 * sanity check the newents chain 2584 */ 2585 2586 { 2587 struct vm_map_entry *tmpent = newents; 2588 int nent = 0; 2589 vaddr_t cur = start; 2590 2591 while (tmpent) { 2592 nent++; 2593 if (tmpent->start < cur) 2594 panic("uvm_map_replace1"); 2595 if (tmpent->start > tmpent->end || tmpent->end > end) { 2596 printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n", 2597 tmpent->start, tmpent->end, end); 2598 panic("uvm_map_replace2"); 2599 } 2600 cur = tmpent->end; 2601 if (tmpent->next) { 2602 if (tmpent->next->prev != tmpent) 2603 panic("uvm_map_replace3"); 2604 } else { 2605 if (newents->prev != tmpent) 2606 panic("uvm_map_replace4"); 2607 } 2608 tmpent = tmpent->next; 2609 } 2610 if (nent != nnewents) 2611 panic("uvm_map_replace5"); 2612 } 2613 #endif 2614 2615 /* 2616 * map entry is a valid blank! replace it. (this does all the 2617 * work of map entry link/unlink...). 2618 */ 2619 2620 if (newents) { 2621 last = newents->prev; 2622 2623 /* critical: flush stale hints out of map */ 2624 SAVE_HINT(map, map->hint, newents); 2625 if (map->first_free == oldent) 2626 map->first_free = last; 2627 2628 last->next = oldent->next; 2629 last->next->prev = last; 2630 2631 /* Fix RB tree */ 2632 uvm_rb_remove(map, oldent); 2633 2634 newents->prev = oldent->prev; 2635 newents->prev->next = newents; 2636 map->nentries = map->nentries + (nnewents - 1); 2637 2638 /* Fixup the RB tree */ 2639 { 2640 int i; 2641 struct vm_map_entry *tmp; 2642 2643 tmp = newents; 2644 for (i = 0; i < nnewents && tmp; i++) { 2645 uvm_rb_insert(map, tmp); 2646 tmp = tmp->next; 2647 } 2648 } 2649 } else { 2650 /* NULL list of new entries: just remove the old one */ 2651 clear_hints(map, oldent); 2652 uvm_map_entry_unlink(map, oldent); 2653 } 2654 2655 uvm_map_check(map, "map_replace leave"); 2656 2657 /* 2658 * now we can free the old blank entry and return. 2659 */ 2660 2661 *oldentryp = oldent; 2662 return (true); 2663 } 2664 2665 /* 2666 * uvm_map_extract: extract a mapping from a map and put it somewhere 2667 * (maybe removing the old mapping) 2668 * 2669 * => maps should be unlocked (we will write lock them) 2670 * => returns 0 on success, error code otherwise 2671 * => start must be page aligned 2672 * => len must be page sized 2673 * => flags: 2674 * UVM_EXTRACT_REMOVE: remove mappings from srcmap 2675 * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) 2676 * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs 2677 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 2678 * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< 2679 * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only 2680 * be used from within the kernel in a kernel level map <<< 2681 */ 2682 2683 int 2684 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 2685 struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) 2686 { 2687 vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge; 2688 struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry, 2689 *deadentry, *oldentry; 2690 struct vm_map_entry *resentry = NULL; /* a dummy reservation entry */ 2691 vsize_t elen; 2692 int nchain, error, copy_ok; 2693 UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist); 2694 2695 UVMHIST_LOG(maphist,"(srcmap=0x%x,start=0x%x, len=0x%x", srcmap, start, 2696 len,0); 2697 UVMHIST_LOG(maphist," ...,dstmap=0x%x, flags=0x%x)", dstmap,flags,0,0); 2698 2699 uvm_map_check(srcmap, "map_extract src enter"); 2700 uvm_map_check(dstmap, "map_extract dst enter"); 2701 2702 /* 2703 * step 0: sanity check: start must be on a page boundary, length 2704 * must be page sized. can't ask for CONTIG/QREF if you asked for 2705 * REMOVE. 2706 */ 2707 2708 KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0); 2709 KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || 2710 (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); 2711 2712 /* 2713 * step 1: reserve space in the target map for the extracted area 2714 */ 2715 2716 if ((flags & UVM_EXTRACT_RESERVED) == 0) { 2717 dstaddr = vm_map_min(dstmap); 2718 if (!uvm_map_reserve(dstmap, len, start, 0, &dstaddr, 0)) 2719 return (ENOMEM); 2720 *dstaddrp = dstaddr; /* pass address back to caller */ 2721 UVMHIST_LOG(maphist, " dstaddr=0x%x", dstaddr,0,0,0); 2722 } else { 2723 dstaddr = *dstaddrp; 2724 } 2725 2726 /* 2727 * step 2: setup for the extraction process loop by init'ing the 2728 * map entry chain, locking src map, and looking up the first useful 2729 * entry in the map. 2730 */ 2731 2732 end = start + len; 2733 newend = dstaddr + len; 2734 chain = endchain = NULL; 2735 nchain = 0; 2736 vm_map_lock(srcmap); 2737 2738 if (uvm_map_lookup_entry(srcmap, start, &entry)) { 2739 2740 /* "start" is within an entry */ 2741 if (flags & UVM_EXTRACT_QREF) { 2742 2743 /* 2744 * for quick references we don't clip the entry, so 2745 * the entry may map space "before" the starting 2746 * virtual address... this is the "fudge" factor 2747 * (which can be non-zero only the first time 2748 * through the "while" loop in step 3). 2749 */ 2750 2751 fudge = start - entry->start; 2752 } else { 2753 2754 /* 2755 * normal reference: we clip the map to fit (thus 2756 * fudge is zero) 2757 */ 2758 2759 UVM_MAP_CLIP_START(srcmap, entry, start, NULL); 2760 SAVE_HINT(srcmap, srcmap->hint, entry->prev); 2761 fudge = 0; 2762 } 2763 } else { 2764 2765 /* "start" is not within an entry ... skip to next entry */ 2766 if (flags & UVM_EXTRACT_CONTIG) { 2767 error = EINVAL; 2768 goto bad; /* definite hole here ... */ 2769 } 2770 2771 entry = entry->next; 2772 fudge = 0; 2773 } 2774 2775 /* save values from srcmap for step 6 */ 2776 orig_entry = entry; 2777 orig_fudge = fudge; 2778 2779 /* 2780 * step 3: now start looping through the map entries, extracting 2781 * as we go. 2782 */ 2783 2784 while (entry->start < end && entry != &srcmap->header) { 2785 2786 /* if we are not doing a quick reference, clip it */ 2787 if ((flags & UVM_EXTRACT_QREF) == 0) 2788 UVM_MAP_CLIP_END(srcmap, entry, end, NULL); 2789 2790 /* clear needs_copy (allow chunking) */ 2791 if (UVM_ET_ISNEEDSCOPY(entry)) { 2792 amap_copy(srcmap, entry, 2793 AMAP_COPY_NOWAIT|AMAP_COPY_NOMERGE, start, end); 2794 if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ 2795 error = ENOMEM; 2796 goto bad; 2797 } 2798 2799 /* amap_copy could clip (during chunk)! update fudge */ 2800 if (fudge) { 2801 fudge = start - entry->start; 2802 orig_fudge = fudge; 2803 } 2804 } 2805 2806 /* calculate the offset of this from "start" */ 2807 oldoffset = (entry->start + fudge) - start; 2808 2809 /* allocate a new map entry */ 2810 newentry = uvm_mapent_alloc(dstmap, 0); 2811 if (newentry == NULL) { 2812 error = ENOMEM; 2813 goto bad; 2814 } 2815 2816 /* set up new map entry */ 2817 newentry->next = NULL; 2818 newentry->prev = endchain; 2819 newentry->start = dstaddr + oldoffset; 2820 newentry->end = 2821 newentry->start + (entry->end - (entry->start + fudge)); 2822 if (newentry->end > newend || newentry->end < newentry->start) 2823 newentry->end = newend; 2824 newentry->object.uvm_obj = entry->object.uvm_obj; 2825 if (newentry->object.uvm_obj) { 2826 if (newentry->object.uvm_obj->pgops->pgo_reference) 2827 newentry->object.uvm_obj->pgops-> 2828 pgo_reference(newentry->object.uvm_obj); 2829 newentry->offset = entry->offset + fudge; 2830 } else { 2831 newentry->offset = 0; 2832 } 2833 newentry->etype = entry->etype; 2834 newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? 2835 entry->max_protection : entry->protection; 2836 newentry->max_protection = entry->max_protection; 2837 newentry->inheritance = entry->inheritance; 2838 newentry->wired_count = 0; 2839 newentry->aref.ar_amap = entry->aref.ar_amap; 2840 if (newentry->aref.ar_amap) { 2841 newentry->aref.ar_pageoff = 2842 entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); 2843 uvm_map_reference_amap(newentry, AMAP_SHARED | 2844 ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); 2845 } else { 2846 newentry->aref.ar_pageoff = 0; 2847 } 2848 newentry->advice = entry->advice; 2849 if ((flags & UVM_EXTRACT_QREF) != 0) { 2850 newentry->flags |= UVM_MAP_NOMERGE; 2851 } 2852 2853 /* now link it on the chain */ 2854 nchain++; 2855 if (endchain == NULL) { 2856 chain = endchain = newentry; 2857 } else { 2858 endchain->next = newentry; 2859 endchain = newentry; 2860 } 2861 2862 /* end of 'while' loop! */ 2863 if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && 2864 (entry->next == &srcmap->header || 2865 entry->next->start != entry->end)) { 2866 error = EINVAL; 2867 goto bad; 2868 } 2869 entry = entry->next; 2870 fudge = 0; 2871 } 2872 2873 /* 2874 * step 4: close off chain (in format expected by uvm_map_replace) 2875 */ 2876 2877 if (chain) 2878 chain->prev = endchain; 2879 2880 /* 2881 * step 5: attempt to lock the dest map so we can pmap_copy. 2882 * note usage of copy_ok: 2883 * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) 2884 * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 2885 */ 2886 2887 if (srcmap == dstmap || vm_map_lock_try(dstmap) == true) { 2888 copy_ok = 1; 2889 if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2890 nchain, &resentry)) { 2891 if (srcmap != dstmap) 2892 vm_map_unlock(dstmap); 2893 error = EIO; 2894 goto bad; 2895 } 2896 } else { 2897 copy_ok = 0; 2898 /* replace defered until step 7 */ 2899 } 2900 2901 /* 2902 * step 6: traverse the srcmap a second time to do the following: 2903 * - if we got a lock on the dstmap do pmap_copy 2904 * - if UVM_EXTRACT_REMOVE remove the entries 2905 * we make use of orig_entry and orig_fudge (saved in step 2) 2906 */ 2907 2908 if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { 2909 2910 /* purge possible stale hints from srcmap */ 2911 if (flags & UVM_EXTRACT_REMOVE) { 2912 SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); 2913 if (srcmap->first_free != &srcmap->header && 2914 srcmap->first_free->start >= start) 2915 srcmap->first_free = orig_entry->prev; 2916 } 2917 2918 entry = orig_entry; 2919 fudge = orig_fudge; 2920 deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ 2921 2922 while (entry->start < end && entry != &srcmap->header) { 2923 if (copy_ok) { 2924 oldoffset = (entry->start + fudge) - start; 2925 elen = MIN(end, entry->end) - 2926 (entry->start + fudge); 2927 pmap_copy(dstmap->pmap, srcmap->pmap, 2928 dstaddr + oldoffset, elen, 2929 entry->start + fudge); 2930 } 2931 2932 /* we advance "entry" in the following if statement */ 2933 if (flags & UVM_EXTRACT_REMOVE) { 2934 pmap_remove(srcmap->pmap, entry->start, 2935 entry->end); 2936 oldentry = entry; /* save entry */ 2937 entry = entry->next; /* advance */ 2938 uvm_map_entry_unlink(srcmap, oldentry); 2939 /* add to dead list */ 2940 oldentry->next = deadentry; 2941 deadentry = oldentry; 2942 } else { 2943 entry = entry->next; /* advance */ 2944 } 2945 2946 /* end of 'while' loop */ 2947 fudge = 0; 2948 } 2949 pmap_update(srcmap->pmap); 2950 2951 /* 2952 * unlock dstmap. we will dispose of deadentry in 2953 * step 7 if needed 2954 */ 2955 2956 if (copy_ok && srcmap != dstmap) 2957 vm_map_unlock(dstmap); 2958 2959 } else { 2960 deadentry = NULL; 2961 } 2962 2963 /* 2964 * step 7: we are done with the source map, unlock. if copy_ok 2965 * is 0 then we have not replaced the dummy mapping in dstmap yet 2966 * and we need to do so now. 2967 */ 2968 2969 vm_map_unlock(srcmap); 2970 if ((flags & UVM_EXTRACT_REMOVE) && deadentry) 2971 uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ 2972 2973 /* now do the replacement if we didn't do it in step 5 */ 2974 if (copy_ok == 0) { 2975 vm_map_lock(dstmap); 2976 error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2977 nchain, &resentry); 2978 vm_map_unlock(dstmap); 2979 2980 if (error == false) { 2981 error = EIO; 2982 goto bad2; 2983 } 2984 } 2985 2986 if (resentry != NULL) 2987 uvm_mapent_free(resentry); 2988 2989 uvm_map_check(srcmap, "map_extract src leave"); 2990 uvm_map_check(dstmap, "map_extract dst leave"); 2991 2992 return (0); 2993 2994 /* 2995 * bad: failure recovery 2996 */ 2997 bad: 2998 vm_map_unlock(srcmap); 2999 bad2: /* src already unlocked */ 3000 if (chain) 3001 uvm_unmap_detach(chain, 3002 (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); 3003 3004 if (resentry != NULL) 3005 uvm_mapent_free(resentry); 3006 3007 uvm_map_check(srcmap, "map_extract src err leave"); 3008 uvm_map_check(dstmap, "map_extract dst err leave"); 3009 3010 if ((flags & UVM_EXTRACT_RESERVED) == 0) { 3011 uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ 3012 } 3013 return (error); 3014 } 3015 3016 /* end of extraction functions */ 3017 3018 /* 3019 * uvm_map_submap: punch down part of a map into a submap 3020 * 3021 * => only the kernel_map is allowed to be submapped 3022 * => the purpose of submapping is to break up the locking granularity 3023 * of a larger map 3024 * => the range specified must have been mapped previously with a uvm_map() 3025 * call [with uobj==NULL] to create a blank map entry in the main map. 3026 * [And it had better still be blank!] 3027 * => maps which contain submaps should never be copied or forked. 3028 * => to remove a submap, use uvm_unmap() on the main map 3029 * and then uvm_map_deallocate() the submap. 3030 * => main map must be unlocked. 3031 * => submap must have been init'd and have a zero reference count. 3032 * [need not be locked as we don't actually reference it] 3033 */ 3034 3035 int 3036 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 3037 struct vm_map *submap) 3038 { 3039 struct vm_map_entry *entry; 3040 struct uvm_mapent_reservation umr; 3041 int error; 3042 3043 uvm_mapent_reserve(map, &umr, 2, 0); 3044 3045 vm_map_lock(map); 3046 VM_MAP_RANGE_CHECK(map, start, end); 3047 3048 if (uvm_map_lookup_entry(map, start, &entry)) { 3049 UVM_MAP_CLIP_START(map, entry, start, &umr); 3050 UVM_MAP_CLIP_END(map, entry, end, &umr); /* to be safe */ 3051 } else { 3052 entry = NULL; 3053 } 3054 3055 if (entry != NULL && 3056 entry->start == start && entry->end == end && 3057 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 3058 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 3059 entry->etype |= UVM_ET_SUBMAP; 3060 entry->object.sub_map = submap; 3061 entry->offset = 0; 3062 uvm_map_reference(submap); 3063 error = 0; 3064 } else { 3065 error = EINVAL; 3066 } 3067 vm_map_unlock(map); 3068 3069 uvm_mapent_unreserve(map, &umr); 3070 3071 return error; 3072 } 3073 3074 /* 3075 * uvm_map_setup_kernel: init in-kernel map 3076 * 3077 * => map must not be in service yet. 3078 */ 3079 3080 void 3081 uvm_map_setup_kernel(struct vm_map_kernel *map, 3082 vaddr_t vmin, vaddr_t vmax, int flags) 3083 { 3084 3085 uvm_map_setup(&map->vmk_map, vmin, vmax, flags); 3086 callback_head_init(&map->vmk_reclaim_callback, IPL_VM); 3087 LIST_INIT(&map->vmk_kentry_free); 3088 map->vmk_merged_entries = NULL; 3089 } 3090 3091 3092 /* 3093 * uvm_map_protect: change map protection 3094 * 3095 * => set_max means set max_protection. 3096 * => map must be unlocked. 3097 */ 3098 3099 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 3100 ~VM_PROT_WRITE : VM_PROT_ALL) 3101 3102 int 3103 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3104 vm_prot_t new_prot, bool set_max) 3105 { 3106 struct vm_map_entry *current, *entry; 3107 int error = 0; 3108 UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist); 3109 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)", 3110 map, start, end, new_prot); 3111 3112 vm_map_lock(map); 3113 VM_MAP_RANGE_CHECK(map, start, end); 3114 if (uvm_map_lookup_entry(map, start, &entry)) { 3115 UVM_MAP_CLIP_START(map, entry, start, NULL); 3116 } else { 3117 entry = entry->next; 3118 } 3119 3120 /* 3121 * make a first pass to check for protection violations. 3122 */ 3123 3124 current = entry; 3125 while ((current != &map->header) && (current->start < end)) { 3126 if (UVM_ET_ISSUBMAP(current)) { 3127 error = EINVAL; 3128 goto out; 3129 } 3130 if ((new_prot & current->max_protection) != new_prot) { 3131 error = EACCES; 3132 goto out; 3133 } 3134 /* 3135 * Don't allow VM_PROT_EXECUTE to be set on entries that 3136 * point to vnodes that are associated with a NOEXEC file 3137 * system. 3138 */ 3139 if (UVM_ET_ISOBJ(current) && 3140 UVM_OBJ_IS_VNODE(current->object.uvm_obj)) { 3141 struct vnode *vp = 3142 (struct vnode *) current->object.uvm_obj; 3143 3144 if ((new_prot & VM_PROT_EXECUTE) != 0 && 3145 (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) { 3146 error = EACCES; 3147 goto out; 3148 } 3149 } 3150 3151 current = current->next; 3152 } 3153 3154 /* go back and fix up protections (no need to clip this time). */ 3155 3156 current = entry; 3157 while ((current != &map->header) && (current->start < end)) { 3158 vm_prot_t old_prot; 3159 3160 UVM_MAP_CLIP_END(map, current, end, NULL); 3161 old_prot = current->protection; 3162 if (set_max) 3163 current->protection = 3164 (current->max_protection = new_prot) & old_prot; 3165 else 3166 current->protection = new_prot; 3167 3168 /* 3169 * update physical map if necessary. worry about copy-on-write 3170 * here -- CHECK THIS XXX 3171 */ 3172 3173 if (current->protection != old_prot) { 3174 /* update pmap! */ 3175 pmap_protect(map->pmap, current->start, current->end, 3176 current->protection & MASK(entry)); 3177 3178 /* 3179 * If this entry points at a vnode, and the 3180 * protection includes VM_PROT_EXECUTE, mark 3181 * the vnode as VEXECMAP. 3182 */ 3183 if (UVM_ET_ISOBJ(current)) { 3184 struct uvm_object *uobj = 3185 current->object.uvm_obj; 3186 3187 if (UVM_OBJ_IS_VNODE(uobj) && 3188 (current->protection & VM_PROT_EXECUTE)) { 3189 vn_markexec((struct vnode *) uobj); 3190 } 3191 } 3192 } 3193 3194 /* 3195 * If the map is configured to lock any future mappings, 3196 * wire this entry now if the old protection was VM_PROT_NONE 3197 * and the new protection is not VM_PROT_NONE. 3198 */ 3199 3200 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3201 VM_MAPENT_ISWIRED(entry) == 0 && 3202 old_prot == VM_PROT_NONE && 3203 new_prot != VM_PROT_NONE) { 3204 if (uvm_map_pageable(map, entry->start, 3205 entry->end, false, 3206 UVM_LK_ENTER|UVM_LK_EXIT) != 0) { 3207 3208 /* 3209 * If locking the entry fails, remember the 3210 * error if it's the first one. Note we 3211 * still continue setting the protection in 3212 * the map, but will return the error 3213 * condition regardless. 3214 * 3215 * XXX Ignore what the actual error is, 3216 * XXX just call it a resource shortage 3217 * XXX so that it doesn't get confused 3218 * XXX what uvm_map_protect() itself would 3219 * XXX normally return. 3220 */ 3221 3222 error = ENOMEM; 3223 } 3224 } 3225 current = current->next; 3226 } 3227 pmap_update(map->pmap); 3228 3229 out: 3230 vm_map_unlock(map); 3231 3232 UVMHIST_LOG(maphist, "<- done, error=%d",error,0,0,0); 3233 return error; 3234 } 3235 3236 #undef MASK 3237 3238 /* 3239 * uvm_map_inherit: set inheritance code for range of addrs in map. 3240 * 3241 * => map must be unlocked 3242 * => note that the inherit code is used during a "fork". see fork 3243 * code for details. 3244 */ 3245 3246 int 3247 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 3248 vm_inherit_t new_inheritance) 3249 { 3250 struct vm_map_entry *entry, *temp_entry; 3251 UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist); 3252 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)", 3253 map, start, end, new_inheritance); 3254 3255 switch (new_inheritance) { 3256 case MAP_INHERIT_NONE: 3257 case MAP_INHERIT_COPY: 3258 case MAP_INHERIT_SHARE: 3259 break; 3260 default: 3261 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3262 return EINVAL; 3263 } 3264 3265 vm_map_lock(map); 3266 VM_MAP_RANGE_CHECK(map, start, end); 3267 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 3268 entry = temp_entry; 3269 UVM_MAP_CLIP_START(map, entry, start, NULL); 3270 } else { 3271 entry = temp_entry->next; 3272 } 3273 while ((entry != &map->header) && (entry->start < end)) { 3274 UVM_MAP_CLIP_END(map, entry, end, NULL); 3275 entry->inheritance = new_inheritance; 3276 entry = entry->next; 3277 } 3278 vm_map_unlock(map); 3279 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3280 return 0; 3281 } 3282 3283 /* 3284 * uvm_map_advice: set advice code for range of addrs in map. 3285 * 3286 * => map must be unlocked 3287 */ 3288 3289 int 3290 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 3291 { 3292 struct vm_map_entry *entry, *temp_entry; 3293 UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist); 3294 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)", 3295 map, start, end, new_advice); 3296 3297 vm_map_lock(map); 3298 VM_MAP_RANGE_CHECK(map, start, end); 3299 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 3300 entry = temp_entry; 3301 UVM_MAP_CLIP_START(map, entry, start, NULL); 3302 } else { 3303 entry = temp_entry->next; 3304 } 3305 3306 /* 3307 * XXXJRT: disallow holes? 3308 */ 3309 3310 while ((entry != &map->header) && (entry->start < end)) { 3311 UVM_MAP_CLIP_END(map, entry, end, NULL); 3312 3313 switch (new_advice) { 3314 case MADV_NORMAL: 3315 case MADV_RANDOM: 3316 case MADV_SEQUENTIAL: 3317 /* nothing special here */ 3318 break; 3319 3320 default: 3321 vm_map_unlock(map); 3322 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3323 return EINVAL; 3324 } 3325 entry->advice = new_advice; 3326 entry = entry->next; 3327 } 3328 3329 vm_map_unlock(map); 3330 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3331 return 0; 3332 } 3333 3334 /* 3335 * uvm_map_pageable: sets the pageability of a range in a map. 3336 * 3337 * => wires map entries. should not be used for transient page locking. 3338 * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). 3339 * => regions specified as not pageable require lock-down (wired) memory 3340 * and page tables. 3341 * => map must never be read-locked 3342 * => if islocked is true, map is already write-locked 3343 * => we always unlock the map, since we must downgrade to a read-lock 3344 * to call uvm_fault_wire() 3345 * => XXXCDC: check this and try and clean it up. 3346 */ 3347 3348 int 3349 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 3350 bool new_pageable, int lockflags) 3351 { 3352 struct vm_map_entry *entry, *start_entry, *failed_entry; 3353 int rv; 3354 #ifdef DIAGNOSTIC 3355 u_int timestamp_save; 3356 #endif 3357 UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist); 3358 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)", 3359 map, start, end, new_pageable); 3360 KASSERT(map->flags & VM_MAP_PAGEABLE); 3361 3362 if ((lockflags & UVM_LK_ENTER) == 0) 3363 vm_map_lock(map); 3364 VM_MAP_RANGE_CHECK(map, start, end); 3365 3366 /* 3367 * only one pageability change may take place at one time, since 3368 * uvm_fault_wire assumes it will be called only once for each 3369 * wiring/unwiring. therefore, we have to make sure we're actually 3370 * changing the pageability for the entire region. we do so before 3371 * making any changes. 3372 */ 3373 3374 if (uvm_map_lookup_entry(map, start, &start_entry) == false) { 3375 if ((lockflags & UVM_LK_EXIT) == 0) 3376 vm_map_unlock(map); 3377 3378 UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0); 3379 return EFAULT; 3380 } 3381 entry = start_entry; 3382 3383 /* 3384 * handle wiring and unwiring separately. 3385 */ 3386 3387 if (new_pageable) { /* unwire */ 3388 UVM_MAP_CLIP_START(map, entry, start, NULL); 3389 3390 /* 3391 * unwiring. first ensure that the range to be unwired is 3392 * really wired down and that there are no holes. 3393 */ 3394 3395 while ((entry != &map->header) && (entry->start < end)) { 3396 if (entry->wired_count == 0 || 3397 (entry->end < end && 3398 (entry->next == &map->header || 3399 entry->next->start > entry->end))) { 3400 if ((lockflags & UVM_LK_EXIT) == 0) 3401 vm_map_unlock(map); 3402 UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0); 3403 return EINVAL; 3404 } 3405 entry = entry->next; 3406 } 3407 3408 /* 3409 * POSIX 1003.1b - a single munlock call unlocks a region, 3410 * regardless of the number of mlock calls made on that 3411 * region. 3412 */ 3413 3414 entry = start_entry; 3415 while ((entry != &map->header) && (entry->start < end)) { 3416 UVM_MAP_CLIP_END(map, entry, end, NULL); 3417 if (VM_MAPENT_ISWIRED(entry)) 3418 uvm_map_entry_unwire(map, entry); 3419 entry = entry->next; 3420 } 3421 if ((lockflags & UVM_LK_EXIT) == 0) 3422 vm_map_unlock(map); 3423 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3424 return 0; 3425 } 3426 3427 /* 3428 * wire case: in two passes [XXXCDC: ugly block of code here] 3429 * 3430 * 1: holding the write lock, we create any anonymous maps that need 3431 * to be created. then we clip each map entry to the region to 3432 * be wired and increment its wiring count. 3433 * 3434 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 3435 * in the pages for any newly wired area (wired_count == 1). 3436 * 3437 * downgrading to a read lock for uvm_fault_wire avoids a possible 3438 * deadlock with another thread that may have faulted on one of 3439 * the pages to be wired (it would mark the page busy, blocking 3440 * us, then in turn block on the map lock that we hold). because 3441 * of problems in the recursive lock package, we cannot upgrade 3442 * to a write lock in vm_map_lookup. thus, any actions that 3443 * require the write lock must be done beforehand. because we 3444 * keep the read lock on the map, the copy-on-write status of the 3445 * entries we modify here cannot change. 3446 */ 3447 3448 while ((entry != &map->header) && (entry->start < end)) { 3449 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3450 3451 /* 3452 * perform actions of vm_map_lookup that need the 3453 * write lock on the map: create an anonymous map 3454 * for a copy-on-write region, or an anonymous map 3455 * for a zero-fill region. (XXXCDC: submap case 3456 * ok?) 3457 */ 3458 3459 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3460 if (UVM_ET_ISNEEDSCOPY(entry) && 3461 ((entry->max_protection & VM_PROT_WRITE) || 3462 (entry->object.uvm_obj == NULL))) { 3463 amap_copy(map, entry, 0, start, end); 3464 /* XXXCDC: wait OK? */ 3465 } 3466 } 3467 } 3468 UVM_MAP_CLIP_START(map, entry, start, NULL); 3469 UVM_MAP_CLIP_END(map, entry, end, NULL); 3470 entry->wired_count++; 3471 3472 /* 3473 * Check for holes 3474 */ 3475 3476 if (entry->protection == VM_PROT_NONE || 3477 (entry->end < end && 3478 (entry->next == &map->header || 3479 entry->next->start > entry->end))) { 3480 3481 /* 3482 * found one. amap creation actions do not need to 3483 * be undone, but the wired counts need to be restored. 3484 */ 3485 3486 while (entry != &map->header && entry->end > start) { 3487 entry->wired_count--; 3488 entry = entry->prev; 3489 } 3490 if ((lockflags & UVM_LK_EXIT) == 0) 3491 vm_map_unlock(map); 3492 UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); 3493 return EINVAL; 3494 } 3495 entry = entry->next; 3496 } 3497 3498 /* 3499 * Pass 2. 3500 */ 3501 3502 #ifdef DIAGNOSTIC 3503 timestamp_save = map->timestamp; 3504 #endif 3505 vm_map_busy(map); 3506 vm_map_unlock(map); 3507 3508 rv = 0; 3509 entry = start_entry; 3510 while (entry != &map->header && entry->start < end) { 3511 if (entry->wired_count == 1) { 3512 rv = uvm_fault_wire(map, entry->start, entry->end, 3513 entry->max_protection, 1); 3514 if (rv) { 3515 3516 /* 3517 * wiring failed. break out of the loop. 3518 * we'll clean up the map below, once we 3519 * have a write lock again. 3520 */ 3521 3522 break; 3523 } 3524 } 3525 entry = entry->next; 3526 } 3527 3528 if (rv) { /* failed? */ 3529 3530 /* 3531 * Get back to an exclusive (write) lock. 3532 */ 3533 3534 vm_map_lock(map); 3535 vm_map_unbusy(map); 3536 3537 #ifdef DIAGNOSTIC 3538 if (timestamp_save + 1 != map->timestamp) 3539 panic("uvm_map_pageable: stale map"); 3540 #endif 3541 3542 /* 3543 * first drop the wiring count on all the entries 3544 * which haven't actually been wired yet. 3545 */ 3546 3547 failed_entry = entry; 3548 while (entry != &map->header && entry->start < end) { 3549 entry->wired_count--; 3550 entry = entry->next; 3551 } 3552 3553 /* 3554 * now, unwire all the entries that were successfully 3555 * wired above. 3556 */ 3557 3558 entry = start_entry; 3559 while (entry != failed_entry) { 3560 entry->wired_count--; 3561 if (VM_MAPENT_ISWIRED(entry) == 0) 3562 uvm_map_entry_unwire(map, entry); 3563 entry = entry->next; 3564 } 3565 if ((lockflags & UVM_LK_EXIT) == 0) 3566 vm_map_unlock(map); 3567 UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0); 3568 return (rv); 3569 } 3570 3571 if ((lockflags & UVM_LK_EXIT) == 0) { 3572 vm_map_unbusy(map); 3573 } else { 3574 3575 /* 3576 * Get back to an exclusive (write) lock. 3577 */ 3578 3579 vm_map_lock(map); 3580 vm_map_unbusy(map); 3581 } 3582 3583 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3584 return 0; 3585 } 3586 3587 /* 3588 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 3589 * all mapped regions. 3590 * 3591 * => map must not be locked. 3592 * => if no flags are specified, all regions are unwired. 3593 * => XXXJRT: has some of the same problems as uvm_map_pageable() above. 3594 */ 3595 3596 int 3597 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 3598 { 3599 struct vm_map_entry *entry, *failed_entry; 3600 vsize_t size; 3601 int rv; 3602 #ifdef DIAGNOSTIC 3603 u_int timestamp_save; 3604 #endif 3605 UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist); 3606 UVMHIST_LOG(maphist,"(map=0x%x,flags=0x%x)", map, flags, 0, 0); 3607 3608 KASSERT(map->flags & VM_MAP_PAGEABLE); 3609 3610 vm_map_lock(map); 3611 3612 /* 3613 * handle wiring and unwiring separately. 3614 */ 3615 3616 if (flags == 0) { /* unwire */ 3617 3618 /* 3619 * POSIX 1003.1b -- munlockall unlocks all regions, 3620 * regardless of how many times mlockall has been called. 3621 */ 3622 3623 for (entry = map->header.next; entry != &map->header; 3624 entry = entry->next) { 3625 if (VM_MAPENT_ISWIRED(entry)) 3626 uvm_map_entry_unwire(map, entry); 3627 } 3628 map->flags &= ~VM_MAP_WIREFUTURE; 3629 vm_map_unlock(map); 3630 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3631 return 0; 3632 } 3633 3634 if (flags & MCL_FUTURE) { 3635 3636 /* 3637 * must wire all future mappings; remember this. 3638 */ 3639 3640 map->flags |= VM_MAP_WIREFUTURE; 3641 } 3642 3643 if ((flags & MCL_CURRENT) == 0) { 3644 3645 /* 3646 * no more work to do! 3647 */ 3648 3649 UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); 3650 vm_map_unlock(map); 3651 return 0; 3652 } 3653 3654 /* 3655 * wire case: in three passes [XXXCDC: ugly block of code here] 3656 * 3657 * 1: holding the write lock, count all pages mapped by non-wired 3658 * entries. if this would cause us to go over our limit, we fail. 3659 * 3660 * 2: still holding the write lock, we create any anonymous maps that 3661 * need to be created. then we increment its wiring count. 3662 * 3663 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault 3664 * in the pages for any newly wired area (wired_count == 1). 3665 * 3666 * downgrading to a read lock for uvm_fault_wire avoids a possible 3667 * deadlock with another thread that may have faulted on one of 3668 * the pages to be wired (it would mark the page busy, blocking 3669 * us, then in turn block on the map lock that we hold). because 3670 * of problems in the recursive lock package, we cannot upgrade 3671 * to a write lock in vm_map_lookup. thus, any actions that 3672 * require the write lock must be done beforehand. because we 3673 * keep the read lock on the map, the copy-on-write status of the 3674 * entries we modify here cannot change. 3675 */ 3676 3677 for (size = 0, entry = map->header.next; entry != &map->header; 3678 entry = entry->next) { 3679 if (entry->protection != VM_PROT_NONE && 3680 VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3681 size += entry->end - entry->start; 3682 } 3683 } 3684 3685 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 3686 vm_map_unlock(map); 3687 return ENOMEM; 3688 } 3689 3690 if (limit != 0 && 3691 (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { 3692 vm_map_unlock(map); 3693 return ENOMEM; 3694 } 3695 3696 /* 3697 * Pass 2. 3698 */ 3699 3700 for (entry = map->header.next; entry != &map->header; 3701 entry = entry->next) { 3702 if (entry->protection == VM_PROT_NONE) 3703 continue; 3704 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3705 3706 /* 3707 * perform actions of vm_map_lookup that need the 3708 * write lock on the map: create an anonymous map 3709 * for a copy-on-write region, or an anonymous map 3710 * for a zero-fill region. (XXXCDC: submap case 3711 * ok?) 3712 */ 3713 3714 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3715 if (UVM_ET_ISNEEDSCOPY(entry) && 3716 ((entry->max_protection & VM_PROT_WRITE) || 3717 (entry->object.uvm_obj == NULL))) { 3718 amap_copy(map, entry, 0, entry->start, 3719 entry->end); 3720 /* XXXCDC: wait OK? */ 3721 } 3722 } 3723 } 3724 entry->wired_count++; 3725 } 3726 3727 /* 3728 * Pass 3. 3729 */ 3730 3731 #ifdef DIAGNOSTIC 3732 timestamp_save = map->timestamp; 3733 #endif 3734 vm_map_busy(map); 3735 vm_map_unlock(map); 3736 3737 rv = 0; 3738 for (entry = map->header.next; entry != &map->header; 3739 entry = entry->next) { 3740 if (entry->wired_count == 1) { 3741 rv = uvm_fault_wire(map, entry->start, entry->end, 3742 entry->max_protection, 1); 3743 if (rv) { 3744 3745 /* 3746 * wiring failed. break out of the loop. 3747 * we'll clean up the map below, once we 3748 * have a write lock again. 3749 */ 3750 3751 break; 3752 } 3753 } 3754 } 3755 3756 if (rv) { 3757 3758 /* 3759 * Get back an exclusive (write) lock. 3760 */ 3761 3762 vm_map_lock(map); 3763 vm_map_unbusy(map); 3764 3765 #ifdef DIAGNOSTIC 3766 if (timestamp_save + 1 != map->timestamp) 3767 panic("uvm_map_pageable_all: stale map"); 3768 #endif 3769 3770 /* 3771 * first drop the wiring count on all the entries 3772 * which haven't actually been wired yet. 3773 * 3774 * Skip VM_PROT_NONE entries like we did above. 3775 */ 3776 3777 failed_entry = entry; 3778 for (/* nothing */; entry != &map->header; 3779 entry = entry->next) { 3780 if (entry->protection == VM_PROT_NONE) 3781 continue; 3782 entry->wired_count--; 3783 } 3784 3785 /* 3786 * now, unwire all the entries that were successfully 3787 * wired above. 3788 * 3789 * Skip VM_PROT_NONE entries like we did above. 3790 */ 3791 3792 for (entry = map->header.next; entry != failed_entry; 3793 entry = entry->next) { 3794 if (entry->protection == VM_PROT_NONE) 3795 continue; 3796 entry->wired_count--; 3797 if (VM_MAPENT_ISWIRED(entry)) 3798 uvm_map_entry_unwire(map, entry); 3799 } 3800 vm_map_unlock(map); 3801 UVMHIST_LOG(maphist,"<- done (RV=%d)", rv,0,0,0); 3802 return (rv); 3803 } 3804 3805 vm_map_unbusy(map); 3806 3807 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3808 return 0; 3809 } 3810 3811 /* 3812 * uvm_map_clean: clean out a map range 3813 * 3814 * => valid flags: 3815 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 3816 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 3817 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 3818 * if (flags & PGO_FREE): any cached pages are freed after clean 3819 * => returns an error if any part of the specified range isn't mapped 3820 * => never a need to flush amap layer since the anonymous memory has 3821 * no permanent home, but may deactivate pages there 3822 * => called from sys_msync() and sys_madvise() 3823 * => caller must not write-lock map (read OK). 3824 * => we may sleep while cleaning if SYNCIO [with map read-locked] 3825 */ 3826 3827 int 3828 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 3829 { 3830 struct vm_map_entry *current, *entry; 3831 struct uvm_object *uobj; 3832 struct vm_amap *amap; 3833 struct vm_anon *anon; 3834 struct vm_page *pg; 3835 vaddr_t offset; 3836 vsize_t size; 3837 voff_t uoff; 3838 int error, refs; 3839 UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist); 3840 3841 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,flags=0x%x)", 3842 map, start, end, flags); 3843 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 3844 (PGO_FREE|PGO_DEACTIVATE)); 3845 3846 vm_map_lock_read(map); 3847 VM_MAP_RANGE_CHECK(map, start, end); 3848 if (uvm_map_lookup_entry(map, start, &entry) == false) { 3849 vm_map_unlock_read(map); 3850 return EFAULT; 3851 } 3852 3853 /* 3854 * Make a first pass to check for holes and wiring problems. 3855 */ 3856 3857 for (current = entry; current->start < end; current = current->next) { 3858 if (UVM_ET_ISSUBMAP(current)) { 3859 vm_map_unlock_read(map); 3860 return EINVAL; 3861 } 3862 if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) { 3863 vm_map_unlock_read(map); 3864 return EBUSY; 3865 } 3866 if (end <= current->end) { 3867 break; 3868 } 3869 if (current->end != current->next->start) { 3870 vm_map_unlock_read(map); 3871 return EFAULT; 3872 } 3873 } 3874 3875 error = 0; 3876 for (current = entry; start < end; current = current->next) { 3877 amap = current->aref.ar_amap; /* top layer */ 3878 uobj = current->object.uvm_obj; /* bottom layer */ 3879 KASSERT(start >= current->start); 3880 3881 /* 3882 * No amap cleaning necessary if: 3883 * 3884 * (1) There's no amap. 3885 * 3886 * (2) We're not deactivating or freeing pages. 3887 */ 3888 3889 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 3890 goto flush_object; 3891 3892 amap_lock(amap); 3893 offset = start - current->start; 3894 size = MIN(end, current->end) - start; 3895 for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { 3896 anon = amap_lookup(¤t->aref, offset); 3897 if (anon == NULL) 3898 continue; 3899 3900 mutex_enter(&anon->an_lock); 3901 pg = anon->an_page; 3902 if (pg == NULL) { 3903 mutex_exit(&anon->an_lock); 3904 continue; 3905 } 3906 3907 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 3908 3909 /* 3910 * In these first 3 cases, we just deactivate the page. 3911 */ 3912 3913 case PGO_CLEANIT|PGO_FREE: 3914 case PGO_CLEANIT|PGO_DEACTIVATE: 3915 case PGO_DEACTIVATE: 3916 deactivate_it: 3917 /* 3918 * skip the page if it's loaned or wired, 3919 * since it shouldn't be on a paging queue 3920 * at all in these cases. 3921 */ 3922 3923 mutex_enter(&uvm_pageqlock); 3924 if (pg->loan_count != 0 || 3925 pg->wire_count != 0) { 3926 mutex_exit(&uvm_pageqlock); 3927 mutex_exit(&anon->an_lock); 3928 continue; 3929 } 3930 KASSERT(pg->uanon == anon); 3931 uvm_pagedeactivate(pg); 3932 mutex_exit(&uvm_pageqlock); 3933 mutex_exit(&anon->an_lock); 3934 continue; 3935 3936 case PGO_FREE: 3937 3938 /* 3939 * If there are multiple references to 3940 * the amap, just deactivate the page. 3941 */ 3942 3943 if (amap_refs(amap) > 1) 3944 goto deactivate_it; 3945 3946 /* skip the page if it's wired */ 3947 if (pg->wire_count != 0) { 3948 mutex_exit(&anon->an_lock); 3949 continue; 3950 } 3951 amap_unadd(¤t->aref, offset); 3952 refs = --anon->an_ref; 3953 mutex_exit(&anon->an_lock); 3954 if (refs == 0) 3955 uvm_anfree(anon); 3956 continue; 3957 } 3958 } 3959 amap_unlock(amap); 3960 3961 flush_object: 3962 /* 3963 * flush pages if we've got a valid backing object. 3964 * note that we must always clean object pages before 3965 * freeing them since otherwise we could reveal stale 3966 * data from files. 3967 */ 3968 3969 uoff = current->offset + (start - current->start); 3970 size = MIN(end, current->end) - start; 3971 if (uobj != NULL) { 3972 mutex_enter(&uobj->vmobjlock); 3973 if (uobj->pgops->pgo_put != NULL) 3974 error = (uobj->pgops->pgo_put)(uobj, uoff, 3975 uoff + size, flags | PGO_CLEANIT); 3976 else 3977 error = 0; 3978 } 3979 start += size; 3980 } 3981 vm_map_unlock_read(map); 3982 return (error); 3983 } 3984 3985 3986 /* 3987 * uvm_map_checkprot: check protection in map 3988 * 3989 * => must allow specified protection in a fully allocated region. 3990 * => map must be read or write locked by caller. 3991 */ 3992 3993 bool 3994 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3995 vm_prot_t protection) 3996 { 3997 struct vm_map_entry *entry; 3998 struct vm_map_entry *tmp_entry; 3999 4000 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { 4001 return (false); 4002 } 4003 entry = tmp_entry; 4004 while (start < end) { 4005 if (entry == &map->header) { 4006 return (false); 4007 } 4008 4009 /* 4010 * no holes allowed 4011 */ 4012 4013 if (start < entry->start) { 4014 return (false); 4015 } 4016 4017 /* 4018 * check protection associated with entry 4019 */ 4020 4021 if ((entry->protection & protection) != protection) { 4022 return (false); 4023 } 4024 start = entry->end; 4025 entry = entry->next; 4026 } 4027 return (true); 4028 } 4029 4030 /* 4031 * uvmspace_alloc: allocate a vmspace structure. 4032 * 4033 * - structure includes vm_map and pmap 4034 * - XXX: no locking on this structure 4035 * - refcnt set to 1, rest must be init'd by caller 4036 */ 4037 struct vmspace * 4038 uvmspace_alloc(vaddr_t vmin, vaddr_t vmax) 4039 { 4040 struct vmspace *vm; 4041 UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); 4042 4043 vm = pool_cache_get(&uvm_vmspace_cache, PR_WAITOK); 4044 uvmspace_init(vm, NULL, vmin, vmax); 4045 UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0); 4046 return (vm); 4047 } 4048 4049 /* 4050 * uvmspace_init: initialize a vmspace structure. 4051 * 4052 * - XXX: no locking on this structure 4053 * - refcnt set to 1, rest must be init'd by caller 4054 */ 4055 void 4056 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax) 4057 { 4058 UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); 4059 4060 memset(vm, 0, sizeof(*vm)); 4061 uvm_map_setup(&vm->vm_map, vmin, vmax, VM_MAP_PAGEABLE 4062 #ifdef __USING_TOPDOWN_VM 4063 | VM_MAP_TOPDOWN 4064 #endif 4065 ); 4066 if (pmap) 4067 pmap_reference(pmap); 4068 else 4069 pmap = pmap_create(); 4070 vm->vm_map.pmap = pmap; 4071 vm->vm_refcnt = 1; 4072 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 4073 } 4074 4075 /* 4076 * uvmspace_share: share a vmspace between two processes 4077 * 4078 * - used for vfork, threads(?) 4079 */ 4080 4081 void 4082 uvmspace_share(struct proc *p1, struct proc *p2) 4083 { 4084 4085 uvmspace_addref(p1->p_vmspace); 4086 p2->p_vmspace = p1->p_vmspace; 4087 } 4088 4089 /* 4090 * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace 4091 * 4092 * - XXX: no locking on vmspace 4093 */ 4094 4095 void 4096 uvmspace_unshare(struct lwp *l) 4097 { 4098 struct proc *p = l->l_proc; 4099 struct vmspace *nvm, *ovm = p->p_vmspace; 4100 4101 if (ovm->vm_refcnt == 1) 4102 /* nothing to do: vmspace isn't shared in the first place */ 4103 return; 4104 4105 /* make a new vmspace, still holding old one */ 4106 nvm = uvmspace_fork(ovm); 4107 4108 kpreempt_disable(); 4109 pmap_deactivate(l); /* unbind old vmspace */ 4110 p->p_vmspace = nvm; 4111 pmap_activate(l); /* switch to new vmspace */ 4112 kpreempt_enable(); 4113 4114 uvmspace_free(ovm); /* drop reference to old vmspace */ 4115 } 4116 4117 /* 4118 * uvmspace_exec: the process wants to exec a new program 4119 */ 4120 4121 void 4122 uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end) 4123 { 4124 struct proc *p = l->l_proc; 4125 struct vmspace *nvm, *ovm = p->p_vmspace; 4126 struct vm_map *map = &ovm->vm_map; 4127 4128 #ifdef __sparc__ 4129 /* XXX cgd 960926: the sparc #ifdef should be a MD hook */ 4130 kill_user_windows(l); /* before stack addresses go away */ 4131 #endif 4132 4133 /* 4134 * see if more than one process is using this vmspace... 4135 */ 4136 4137 if (ovm->vm_refcnt == 1) { 4138 4139 /* 4140 * if p is the only process using its vmspace then we can safely 4141 * recycle that vmspace for the program that is being exec'd. 4142 */ 4143 4144 #ifdef SYSVSHM 4145 /* 4146 * SYSV SHM semantics require us to kill all segments on an exec 4147 */ 4148 4149 if (ovm->vm_shm) 4150 shmexit(ovm); 4151 #endif 4152 4153 /* 4154 * POSIX 1003.1b -- "lock future mappings" is revoked 4155 * when a process execs another program image. 4156 */ 4157 4158 map->flags &= ~VM_MAP_WIREFUTURE; 4159 4160 /* 4161 * now unmap the old program 4162 */ 4163 4164 pmap_remove_all(map->pmap); 4165 uvm_unmap(map, vm_map_min(map), vm_map_max(map)); 4166 KASSERT(map->header.prev == &map->header); 4167 KASSERT(map->nentries == 0); 4168 4169 /* 4170 * resize the map 4171 */ 4172 4173 vm_map_setmin(map, start); 4174 vm_map_setmax(map, end); 4175 } else { 4176 4177 /* 4178 * p's vmspace is being shared, so we can't reuse it for p since 4179 * it is still being used for others. allocate a new vmspace 4180 * for p 4181 */ 4182 4183 nvm = uvmspace_alloc(start, end); 4184 4185 /* 4186 * install new vmspace and drop our ref to the old one. 4187 */ 4188 4189 kpreempt_disable(); 4190 pmap_deactivate(l); 4191 p->p_vmspace = nvm; 4192 pmap_activate(l); 4193 kpreempt_enable(); 4194 4195 uvmspace_free(ovm); 4196 } 4197 } 4198 4199 /* 4200 * uvmspace_addref: add a referece to a vmspace. 4201 */ 4202 4203 void 4204 uvmspace_addref(struct vmspace *vm) 4205 { 4206 struct vm_map *map = &vm->vm_map; 4207 4208 KASSERT((map->flags & VM_MAP_DYING) == 0); 4209 4210 mutex_enter(&map->misc_lock); 4211 KASSERT(vm->vm_refcnt > 0); 4212 vm->vm_refcnt++; 4213 mutex_exit(&map->misc_lock); 4214 } 4215 4216 /* 4217 * uvmspace_free: free a vmspace data structure 4218 */ 4219 4220 void 4221 uvmspace_free(struct vmspace *vm) 4222 { 4223 struct vm_map_entry *dead_entries; 4224 struct vm_map *map = &vm->vm_map; 4225 int n; 4226 4227 UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); 4228 4229 UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0); 4230 mutex_enter(&map->misc_lock); 4231 n = --vm->vm_refcnt; 4232 mutex_exit(&map->misc_lock); 4233 if (n > 0) 4234 return; 4235 4236 /* 4237 * at this point, there should be no other references to the map. 4238 * delete all of the mappings, then destroy the pmap. 4239 */ 4240 4241 map->flags |= VM_MAP_DYING; 4242 pmap_remove_all(map->pmap); 4243 #ifdef SYSVSHM 4244 /* Get rid of any SYSV shared memory segments. */ 4245 if (vm->vm_shm != NULL) 4246 shmexit(vm); 4247 #endif 4248 if (map->nentries) { 4249 uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map), 4250 &dead_entries, NULL, 0); 4251 if (dead_entries != NULL) 4252 uvm_unmap_detach(dead_entries, 0); 4253 } 4254 KASSERT(map->nentries == 0); 4255 KASSERT(map->size == 0); 4256 mutex_destroy(&map->misc_lock); 4257 mutex_destroy(&map->mutex); 4258 rw_destroy(&map->lock); 4259 cv_destroy(&map->cv); 4260 pmap_destroy(map->pmap); 4261 pool_cache_put(&uvm_vmspace_cache, vm); 4262 } 4263 4264 /* 4265 * F O R K - m a i n e n t r y p o i n t 4266 */ 4267 /* 4268 * uvmspace_fork: fork a process' main map 4269 * 4270 * => create a new vmspace for child process from parent. 4271 * => parent's map must not be locked. 4272 */ 4273 4274 struct vmspace * 4275 uvmspace_fork(struct vmspace *vm1) 4276 { 4277 struct vmspace *vm2; 4278 struct vm_map *old_map = &vm1->vm_map; 4279 struct vm_map *new_map; 4280 struct vm_map_entry *old_entry; 4281 struct vm_map_entry *new_entry; 4282 UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); 4283 4284 vm_map_lock(old_map); 4285 4286 vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map)); 4287 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 4288 (char *) (vm1 + 1) - (char *) &vm1->vm_startcopy); 4289 new_map = &vm2->vm_map; /* XXX */ 4290 4291 old_entry = old_map->header.next; 4292 new_map->size = old_map->size; 4293 4294 /* 4295 * go entry-by-entry 4296 */ 4297 4298 while (old_entry != &old_map->header) { 4299 4300 /* 4301 * first, some sanity checks on the old entry 4302 */ 4303 4304 KASSERT(!UVM_ET_ISSUBMAP(old_entry)); 4305 KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) || 4306 !UVM_ET_ISNEEDSCOPY(old_entry)); 4307 4308 switch (old_entry->inheritance) { 4309 case MAP_INHERIT_NONE: 4310 4311 /* 4312 * drop the mapping, modify size 4313 */ 4314 new_map->size -= old_entry->end - old_entry->start; 4315 break; 4316 4317 case MAP_INHERIT_SHARE: 4318 4319 /* 4320 * share the mapping: this means we want the old and 4321 * new entries to share amaps and backing objects. 4322 */ 4323 /* 4324 * if the old_entry needs a new amap (due to prev fork) 4325 * then we need to allocate it now so that we have 4326 * something we own to share with the new_entry. [in 4327 * other words, we need to clear needs_copy] 4328 */ 4329 4330 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 4331 /* get our own amap, clears needs_copy */ 4332 amap_copy(old_map, old_entry, AMAP_COPY_NOCHUNK, 4333 0, 0); 4334 /* XXXCDC: WAITOK??? */ 4335 } 4336 4337 new_entry = uvm_mapent_alloc(new_map, 0); 4338 /* old_entry -> new_entry */ 4339 uvm_mapent_copy(old_entry, new_entry); 4340 4341 /* new pmap has nothing wired in it */ 4342 new_entry->wired_count = 0; 4343 4344 /* 4345 * gain reference to object backing the map (can't 4346 * be a submap, already checked this case). 4347 */ 4348 4349 if (new_entry->aref.ar_amap) 4350 uvm_map_reference_amap(new_entry, AMAP_SHARED); 4351 4352 if (new_entry->object.uvm_obj && 4353 new_entry->object.uvm_obj->pgops->pgo_reference) 4354 new_entry->object.uvm_obj-> 4355 pgops->pgo_reference( 4356 new_entry->object.uvm_obj); 4357 4358 /* insert entry at end of new_map's entry list */ 4359 uvm_map_entry_link(new_map, new_map->header.prev, 4360 new_entry); 4361 4362 break; 4363 4364 case MAP_INHERIT_COPY: 4365 4366 /* 4367 * copy-on-write the mapping (using mmap's 4368 * MAP_PRIVATE semantics) 4369 * 4370 * allocate new_entry, adjust reference counts. 4371 * (note that new references are read-only). 4372 */ 4373 4374 new_entry = uvm_mapent_alloc(new_map, 0); 4375 /* old_entry -> new_entry */ 4376 uvm_mapent_copy(old_entry, new_entry); 4377 4378 if (new_entry->aref.ar_amap) 4379 uvm_map_reference_amap(new_entry, 0); 4380 4381 if (new_entry->object.uvm_obj && 4382 new_entry->object.uvm_obj->pgops->pgo_reference) 4383 new_entry->object.uvm_obj->pgops->pgo_reference 4384 (new_entry->object.uvm_obj); 4385 4386 /* new pmap has nothing wired in it */ 4387 new_entry->wired_count = 0; 4388 4389 new_entry->etype |= 4390 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 4391 uvm_map_entry_link(new_map, new_map->header.prev, 4392 new_entry); 4393 4394 /* 4395 * the new entry will need an amap. it will either 4396 * need to be copied from the old entry or created 4397 * from scratch (if the old entry does not have an 4398 * amap). can we defer this process until later 4399 * (by setting "needs_copy") or do we need to copy 4400 * the amap now? 4401 * 4402 * we must copy the amap now if any of the following 4403 * conditions hold: 4404 * 1. the old entry has an amap and that amap is 4405 * being shared. this means that the old (parent) 4406 * process is sharing the amap with another 4407 * process. if we do not clear needs_copy here 4408 * we will end up in a situation where both the 4409 * parent and child process are refering to the 4410 * same amap with "needs_copy" set. if the 4411 * parent write-faults, the fault routine will 4412 * clear "needs_copy" in the parent by allocating 4413 * a new amap. this is wrong because the 4414 * parent is supposed to be sharing the old amap 4415 * and the new amap will break that. 4416 * 4417 * 2. if the old entry has an amap and a non-zero 4418 * wire count then we are going to have to call 4419 * amap_cow_now to avoid page faults in the 4420 * parent process. since amap_cow_now requires 4421 * "needs_copy" to be clear we might as well 4422 * clear it here as well. 4423 * 4424 */ 4425 4426 if (old_entry->aref.ar_amap != NULL) { 4427 if ((amap_flags(old_entry->aref.ar_amap) & 4428 AMAP_SHARED) != 0 || 4429 VM_MAPENT_ISWIRED(old_entry)) { 4430 4431 amap_copy(new_map, new_entry, 4432 AMAP_COPY_NOCHUNK, 0, 0); 4433 /* XXXCDC: M_WAITOK ... ok? */ 4434 } 4435 } 4436 4437 /* 4438 * if the parent's entry is wired down, then the 4439 * parent process does not want page faults on 4440 * access to that memory. this means that we 4441 * cannot do copy-on-write because we can't write 4442 * protect the old entry. in this case we 4443 * resolve all copy-on-write faults now, using 4444 * amap_cow_now. note that we have already 4445 * allocated any needed amap (above). 4446 */ 4447 4448 if (VM_MAPENT_ISWIRED(old_entry)) { 4449 4450 /* 4451 * resolve all copy-on-write faults now 4452 * (note that there is nothing to do if 4453 * the old mapping does not have an amap). 4454 */ 4455 if (old_entry->aref.ar_amap) 4456 amap_cow_now(new_map, new_entry); 4457 4458 } else { 4459 4460 /* 4461 * setup mappings to trigger copy-on-write faults 4462 * we must write-protect the parent if it has 4463 * an amap and it is not already "needs_copy"... 4464 * if it is already "needs_copy" then the parent 4465 * has already been write-protected by a previous 4466 * fork operation. 4467 */ 4468 4469 if (old_entry->aref.ar_amap && 4470 !UVM_ET_ISNEEDSCOPY(old_entry)) { 4471 if (old_entry->max_protection & VM_PROT_WRITE) { 4472 pmap_protect(old_map->pmap, 4473 old_entry->start, 4474 old_entry->end, 4475 old_entry->protection & 4476 ~VM_PROT_WRITE); 4477 } 4478 old_entry->etype |= UVM_ET_NEEDSCOPY; 4479 } 4480 } 4481 break; 4482 } /* end of switch statement */ 4483 old_entry = old_entry->next; 4484 } 4485 4486 pmap_update(old_map->pmap); 4487 vm_map_unlock(old_map); 4488 4489 #ifdef SYSVSHM 4490 if (vm1->vm_shm) 4491 shmfork(vm1, vm2); 4492 #endif 4493 4494 #ifdef PMAP_FORK 4495 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); 4496 #endif 4497 4498 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 4499 return (vm2); 4500 } 4501 4502 4503 /* 4504 * in-kernel map entry allocation. 4505 */ 4506 4507 struct uvm_kmapent_hdr { 4508 LIST_ENTRY(uvm_kmapent_hdr) ukh_listq; 4509 int ukh_nused; 4510 struct vm_map_entry *ukh_freelist; 4511 struct vm_map *ukh_map; 4512 struct vm_map_entry ukh_entries[0]; 4513 }; 4514 4515 #define UVM_KMAPENT_CHUNK \ 4516 ((PAGE_SIZE - sizeof(struct uvm_kmapent_hdr)) \ 4517 / sizeof(struct vm_map_entry)) 4518 4519 #define UVM_KHDR_FIND(entry) \ 4520 ((struct uvm_kmapent_hdr *)(((vaddr_t)entry) & ~PAGE_MASK)) 4521 4522 4523 #ifdef DIAGNOSTIC 4524 static struct vm_map * 4525 uvm_kmapent_map(struct vm_map_entry *entry) 4526 { 4527 const struct uvm_kmapent_hdr *ukh; 4528 4529 ukh = UVM_KHDR_FIND(entry); 4530 return ukh->ukh_map; 4531 } 4532 #endif 4533 4534 static inline struct vm_map_entry * 4535 uvm_kmapent_get(struct uvm_kmapent_hdr *ukh) 4536 { 4537 struct vm_map_entry *entry; 4538 4539 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK); 4540 KASSERT(ukh->ukh_nused >= 0); 4541 4542 entry = ukh->ukh_freelist; 4543 if (entry) { 4544 KASSERT((entry->flags & (UVM_MAP_KERNEL | UVM_MAP_KMAPENT)) 4545 == UVM_MAP_KERNEL); 4546 ukh->ukh_freelist = entry->next; 4547 ukh->ukh_nused++; 4548 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK); 4549 } else { 4550 KASSERT(ukh->ukh_nused == UVM_KMAPENT_CHUNK); 4551 } 4552 4553 return entry; 4554 } 4555 4556 static inline void 4557 uvm_kmapent_put(struct uvm_kmapent_hdr *ukh, struct vm_map_entry *entry) 4558 { 4559 4560 KASSERT((entry->flags & (UVM_MAP_KERNEL | UVM_MAP_KMAPENT)) 4561 == UVM_MAP_KERNEL); 4562 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK); 4563 KASSERT(ukh->ukh_nused > 0); 4564 KASSERT(ukh->ukh_freelist != NULL || 4565 ukh->ukh_nused == UVM_KMAPENT_CHUNK); 4566 KASSERT(ukh->ukh_freelist == NULL || 4567 ukh->ukh_nused < UVM_KMAPENT_CHUNK); 4568 4569 ukh->ukh_nused--; 4570 entry->next = ukh->ukh_freelist; 4571 ukh->ukh_freelist = entry; 4572 } 4573 4574 /* 4575 * uvm_kmapent_alloc: allocate a map entry for in-kernel map 4576 */ 4577 4578 static struct vm_map_entry * 4579 uvm_kmapent_alloc(struct vm_map *map, int flags) 4580 { 4581 struct vm_page *pg; 4582 struct uvm_map_args args; 4583 struct uvm_kmapent_hdr *ukh; 4584 struct vm_map_entry *entry; 4585 uvm_flag_t mapflags = UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, 4586 UVM_INH_NONE, UVM_ADV_RANDOM, flags | UVM_FLAG_NOMERGE); 4587 vaddr_t va; 4588 int error; 4589 int i; 4590 4591 KDASSERT(UVM_KMAPENT_CHUNK > 2); 4592 KDASSERT(kernel_map != NULL); 4593 KASSERT(vm_map_pmap(map) == pmap_kernel()); 4594 4595 UVMMAP_EVCNT_INCR(uke_alloc); 4596 entry = NULL; 4597 again: 4598 /* 4599 * try to grab an entry from freelist. 4600 */ 4601 mutex_spin_enter(&uvm_kentry_lock); 4602 ukh = LIST_FIRST(&vm_map_to_kernel(map)->vmk_kentry_free); 4603 if (ukh) { 4604 entry = uvm_kmapent_get(ukh); 4605 if (ukh->ukh_nused == UVM_KMAPENT_CHUNK) 4606 LIST_REMOVE(ukh, ukh_listq); 4607 } 4608 mutex_spin_exit(&uvm_kentry_lock); 4609 4610 if (entry) 4611 return entry; 4612 4613 /* 4614 * there's no free entry for this vm_map. 4615 * now we need to allocate some vm_map_entry. 4616 * for simplicity, always allocate one page chunk of them at once. 4617 */ 4618 4619 pg = uvm_pagealloc(NULL, 0, NULL, 4620 (flags & UVM_KMF_NOWAIT) != 0 ? UVM_PGA_USERESERVE : 0); 4621 if (__predict_false(pg == NULL)) { 4622 if (flags & UVM_FLAG_NOWAIT) 4623 return NULL; 4624 uvm_wait("kme_alloc"); 4625 goto again; 4626 } 4627 4628 error = uvm_map_prepare(map, 0, PAGE_SIZE, NULL, UVM_UNKNOWN_OFFSET, 4629 0, mapflags, &args); 4630 if (error) { 4631 uvm_pagefree(pg); 4632 return NULL; 4633 } 4634 4635 va = args.uma_start; 4636 4637 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), 4638 VM_PROT_READ|VM_PROT_WRITE|PMAP_KMPAGE); 4639 pmap_update(vm_map_pmap(map)); 4640 4641 ukh = (void *)va; 4642 4643 /* 4644 * use the first entry for ukh itsself. 4645 */ 4646 4647 entry = &ukh->ukh_entries[0]; 4648 entry->flags = UVM_MAP_KERNEL | UVM_MAP_KMAPENT; 4649 error = uvm_map_enter(map, &args, entry); 4650 KASSERT(error == 0); 4651 4652 ukh->ukh_nused = UVM_KMAPENT_CHUNK; 4653 ukh->ukh_map = map; 4654 ukh->ukh_freelist = NULL; 4655 for (i = UVM_KMAPENT_CHUNK - 1; i >= 2; i--) { 4656 struct vm_map_entry *xentry = &ukh->ukh_entries[i]; 4657 4658 xentry->flags = UVM_MAP_KERNEL; 4659 uvm_kmapent_put(ukh, xentry); 4660 } 4661 KASSERT(ukh->ukh_nused == 2); 4662 4663 mutex_spin_enter(&uvm_kentry_lock); 4664 LIST_INSERT_HEAD(&vm_map_to_kernel(map)->vmk_kentry_free, 4665 ukh, ukh_listq); 4666 mutex_spin_exit(&uvm_kentry_lock); 4667 4668 /* 4669 * return second entry. 4670 */ 4671 4672 entry = &ukh->ukh_entries[1]; 4673 entry->flags = UVM_MAP_KERNEL; 4674 UVMMAP_EVCNT_INCR(ukh_alloc); 4675 return entry; 4676 } 4677 4678 /* 4679 * uvm_mapent_free: free map entry for in-kernel map 4680 */ 4681 4682 static void 4683 uvm_kmapent_free(struct vm_map_entry *entry) 4684 { 4685 struct uvm_kmapent_hdr *ukh; 4686 struct vm_page *pg; 4687 struct vm_map *map; 4688 struct pmap *pmap; 4689 vaddr_t va; 4690 paddr_t pa; 4691 struct vm_map_entry *deadentry; 4692 4693 UVMMAP_EVCNT_INCR(uke_free); 4694 ukh = UVM_KHDR_FIND(entry); 4695 map = ukh->ukh_map; 4696 4697 mutex_spin_enter(&uvm_kentry_lock); 4698 uvm_kmapent_put(ukh, entry); 4699 if (ukh->ukh_nused > 1) { 4700 if (ukh->ukh_nused == UVM_KMAPENT_CHUNK - 1) 4701 LIST_INSERT_HEAD( 4702 &vm_map_to_kernel(map)->vmk_kentry_free, 4703 ukh, ukh_listq); 4704 mutex_spin_exit(&uvm_kentry_lock); 4705 return; 4706 } 4707 4708 /* 4709 * now we can free this ukh. 4710 * 4711 * however, keep an empty ukh to avoid ping-pong. 4712 */ 4713 4714 if (LIST_FIRST(&vm_map_to_kernel(map)->vmk_kentry_free) == ukh && 4715 LIST_NEXT(ukh, ukh_listq) == NULL) { 4716 mutex_spin_exit(&uvm_kentry_lock); 4717 return; 4718 } 4719 LIST_REMOVE(ukh, ukh_listq); 4720 mutex_spin_exit(&uvm_kentry_lock); 4721 4722 KASSERT(ukh->ukh_nused == 1); 4723 4724 /* 4725 * remove map entry for ukh itsself. 4726 */ 4727 4728 va = (vaddr_t)ukh; 4729 KASSERT((va & PAGE_MASK) == 0); 4730 vm_map_lock(map); 4731 uvm_unmap_remove(map, va, va + PAGE_SIZE, &deadentry, NULL, 0); 4732 KASSERT(deadentry->flags & UVM_MAP_KERNEL); 4733 KASSERT(deadentry->flags & UVM_MAP_KMAPENT); 4734 KASSERT(deadentry->next == NULL); 4735 KASSERT(deadentry == &ukh->ukh_entries[0]); 4736 4737 /* 4738 * unmap the page from pmap and free it. 4739 */ 4740 4741 pmap = vm_map_pmap(map); 4742 KASSERT(pmap == pmap_kernel()); 4743 if (!pmap_extract(pmap, va, &pa)) 4744 panic("%s: no mapping", __func__); 4745 pmap_kremove(va, PAGE_SIZE); 4746 pmap_update(vm_map_pmap(map)); 4747 vm_map_unlock(map); 4748 pg = PHYS_TO_VM_PAGE(pa); 4749 uvm_pagefree(pg); 4750 UVMMAP_EVCNT_INCR(ukh_free); 4751 } 4752 4753 static vsize_t 4754 uvm_kmapent_overhead(vsize_t size) 4755 { 4756 4757 /* 4758 * - the max number of unmerged entries is howmany(size, PAGE_SIZE) 4759 * as the min allocation unit is PAGE_SIZE. 4760 * - UVM_KMAPENT_CHUNK "kmapent"s are allocated from a page. 4761 * one of them are used to map the page itself. 4762 */ 4763 4764 return howmany(howmany(size, PAGE_SIZE), (UVM_KMAPENT_CHUNK - 1)) * 4765 PAGE_SIZE; 4766 } 4767 4768 /* 4769 * map entry reservation 4770 */ 4771 4772 /* 4773 * uvm_mapent_reserve: reserve map entries for clipping before locking map. 4774 * 4775 * => needed when unmapping entries allocated without UVM_FLAG_QUANTUM. 4776 * => caller shouldn't hold map locked. 4777 */ 4778 int 4779 uvm_mapent_reserve(struct vm_map *map, struct uvm_mapent_reservation *umr, 4780 int nentries, int flags) 4781 { 4782 4783 umr->umr_nentries = 0; 4784 4785 if ((flags & UVM_FLAG_QUANTUM) != 0) 4786 return 0; 4787 4788 if (!VM_MAP_USE_KMAPENT(map)) 4789 return 0; 4790 4791 while (nentries--) { 4792 struct vm_map_entry *ent; 4793 ent = uvm_kmapent_alloc(map, flags); 4794 if (!ent) { 4795 uvm_mapent_unreserve(map, umr); 4796 return ENOMEM; 4797 } 4798 UMR_PUTENTRY(umr, ent); 4799 } 4800 4801 return 0; 4802 } 4803 4804 /* 4805 * uvm_mapent_unreserve: 4806 * 4807 * => caller shouldn't hold map locked. 4808 * => never fail or sleep. 4809 */ 4810 void 4811 uvm_mapent_unreserve(struct vm_map *map, struct uvm_mapent_reservation *umr) 4812 { 4813 4814 while (!UMR_EMPTY(umr)) 4815 uvm_kmapent_free(UMR_GETENTRY(umr)); 4816 } 4817 4818 /* 4819 * uvm_mapent_trymerge: try to merge an entry with its neighbors. 4820 * 4821 * => called with map locked. 4822 * => return non zero if successfully merged. 4823 */ 4824 4825 int 4826 uvm_mapent_trymerge(struct vm_map *map, struct vm_map_entry *entry, int flags) 4827 { 4828 struct uvm_object *uobj; 4829 struct vm_map_entry *next; 4830 struct vm_map_entry *prev; 4831 vsize_t size; 4832 int merged = 0; 4833 bool copying; 4834 int newetype; 4835 4836 if (VM_MAP_USE_KMAPENT(map)) { 4837 return 0; 4838 } 4839 if (entry->aref.ar_amap != NULL) { 4840 return 0; 4841 } 4842 if ((entry->flags & UVM_MAP_NOMERGE) != 0) { 4843 return 0; 4844 } 4845 4846 uobj = entry->object.uvm_obj; 4847 size = entry->end - entry->start; 4848 copying = (flags & UVM_MERGE_COPYING) != 0; 4849 newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype; 4850 4851 next = entry->next; 4852 if (next != &map->header && 4853 next->start == entry->end && 4854 ((copying && next->aref.ar_amap != NULL && 4855 amap_refs(next->aref.ar_amap) == 1) || 4856 (!copying && next->aref.ar_amap == NULL)) && 4857 UVM_ET_ISCOMPATIBLE(next, newetype, 4858 uobj, entry->flags, entry->protection, 4859 entry->max_protection, entry->inheritance, entry->advice, 4860 entry->wired_count) && 4861 (uobj == NULL || entry->offset + size == next->offset)) { 4862 int error; 4863 4864 if (copying) { 4865 error = amap_extend(next, size, 4866 AMAP_EXTEND_NOWAIT|AMAP_EXTEND_BACKWARDS); 4867 } else { 4868 error = 0; 4869 } 4870 if (error == 0) { 4871 if (uobj) { 4872 if (uobj->pgops->pgo_detach) { 4873 uobj->pgops->pgo_detach(uobj); 4874 } 4875 } 4876 4877 entry->end = next->end; 4878 clear_hints(map, next); 4879 uvm_map_entry_unlink(map, next); 4880 if (copying) { 4881 entry->aref = next->aref; 4882 entry->etype &= ~UVM_ET_NEEDSCOPY; 4883 } 4884 uvm_map_check(map, "trymerge forwardmerge"); 4885 uvm_mapent_free_merged(map, next); 4886 merged++; 4887 } 4888 } 4889 4890 prev = entry->prev; 4891 if (prev != &map->header && 4892 prev->end == entry->start && 4893 ((copying && !merged && prev->aref.ar_amap != NULL && 4894 amap_refs(prev->aref.ar_amap) == 1) || 4895 (!copying && prev->aref.ar_amap == NULL)) && 4896 UVM_ET_ISCOMPATIBLE(prev, newetype, 4897 uobj, entry->flags, entry->protection, 4898 entry->max_protection, entry->inheritance, entry->advice, 4899 entry->wired_count) && 4900 (uobj == NULL || 4901 prev->offset + prev->end - prev->start == entry->offset)) { 4902 int error; 4903 4904 if (copying) { 4905 error = amap_extend(prev, size, 4906 AMAP_EXTEND_NOWAIT|AMAP_EXTEND_FORWARDS); 4907 } else { 4908 error = 0; 4909 } 4910 if (error == 0) { 4911 if (uobj) { 4912 if (uobj->pgops->pgo_detach) { 4913 uobj->pgops->pgo_detach(uobj); 4914 } 4915 entry->offset = prev->offset; 4916 } 4917 4918 entry->start = prev->start; 4919 clear_hints(map, prev); 4920 uvm_map_entry_unlink(map, prev); 4921 if (copying) { 4922 entry->aref = prev->aref; 4923 entry->etype &= ~UVM_ET_NEEDSCOPY; 4924 } 4925 uvm_map_check(map, "trymerge backmerge"); 4926 uvm_mapent_free_merged(map, prev); 4927 merged++; 4928 } 4929 } 4930 4931 return merged; 4932 } 4933 4934 #if defined(DDB) 4935 4936 /* 4937 * DDB hooks 4938 */ 4939 4940 /* 4941 * uvm_map_printit: actually prints the map 4942 */ 4943 4944 void 4945 uvm_map_printit(struct vm_map *map, bool full, 4946 void (*pr)(const char *, ...)) 4947 { 4948 struct vm_map_entry *entry; 4949 4950 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, vm_map_min(map), 4951 vm_map_max(map)); 4952 (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n", 4953 map->nentries, map->size, map->ref_count, map->timestamp, 4954 map->flags); 4955 (*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap, 4956 pmap_resident_count(map->pmap), pmap_wired_count(map->pmap)); 4957 if (!full) 4958 return; 4959 for (entry = map->header.next; entry != &map->header; 4960 entry = entry->next) { 4961 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 4962 entry, entry->start, entry->end, entry->object.uvm_obj, 4963 (long long)entry->offset, entry->aref.ar_amap, 4964 entry->aref.ar_pageoff); 4965 (*pr)( 4966 "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 4967 "wc=%d, adv=%d\n", 4968 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 4969 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 4970 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 4971 entry->protection, entry->max_protection, 4972 entry->inheritance, entry->wired_count, entry->advice); 4973 } 4974 } 4975 4976 /* 4977 * uvm_object_printit: actually prints the object 4978 */ 4979 4980 void 4981 uvm_object_printit(struct uvm_object *uobj, bool full, 4982 void (*pr)(const char *, ...)) 4983 { 4984 struct vm_page *pg; 4985 int cnt = 0; 4986 4987 (*pr)("OBJECT %p: locked=%d, pgops=%p, npages=%d, ", 4988 uobj, mutex_owned(&uobj->vmobjlock), uobj->pgops, uobj->uo_npages); 4989 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 4990 (*pr)("refs=<SYSTEM>\n"); 4991 else 4992 (*pr)("refs=%d\n", uobj->uo_refs); 4993 4994 if (!full) { 4995 return; 4996 } 4997 (*pr)(" PAGES <pg,offset>:\n "); 4998 TAILQ_FOREACH(pg, &uobj->memq, listq.queue) { 4999 cnt++; 5000 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 5001 if ((cnt % 3) == 0) { 5002 (*pr)("\n "); 5003 } 5004 } 5005 if ((cnt % 3) != 0) { 5006 (*pr)("\n"); 5007 } 5008 } 5009 5010 /* 5011 * uvm_page_printit: actually print the page 5012 */ 5013 5014 static const char page_flagbits[] = UVM_PGFLAGBITS; 5015 static const char page_pqflagbits[] = UVM_PQFLAGBITS; 5016 5017 void 5018 uvm_page_printit(struct vm_page *pg, bool full, 5019 void (*pr)(const char *, ...)) 5020 { 5021 struct vm_page *tpg; 5022 struct uvm_object *uobj; 5023 struct pgflist *pgl; 5024 char pgbuf[128]; 5025 char pqbuf[128]; 5026 5027 (*pr)("PAGE %p:\n", pg); 5028 snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); 5029 snprintb(pqbuf, sizeof(pqbuf), page_pqflagbits, pg->pqflags); 5030 (*pr)(" flags=%s, pqflags=%s, wire_count=%d, pa=0x%lx\n", 5031 pgbuf, pqbuf, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg)); 5032 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", 5033 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); 5034 #if defined(UVM_PAGE_TRKOWN) 5035 if (pg->flags & PG_BUSY) 5036 (*pr)(" owning process = %d, tag=%s\n", 5037 pg->owner, pg->owner_tag); 5038 else 5039 (*pr)(" page not busy, no owner\n"); 5040 #else 5041 (*pr)(" [page ownership tracking disabled]\n"); 5042 #endif 5043 5044 if (!full) 5045 return; 5046 5047 /* cross-verify object/anon */ 5048 if ((pg->pqflags & PQ_FREE) == 0) { 5049 if (pg->pqflags & PQ_ANON) { 5050 if (pg->uanon == NULL || pg->uanon->an_page != pg) 5051 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 5052 (pg->uanon) ? pg->uanon->an_page : NULL); 5053 else 5054 (*pr)(" anon backpointer is OK\n"); 5055 } else { 5056 uobj = pg->uobject; 5057 if (uobj) { 5058 (*pr)(" checking object list\n"); 5059 TAILQ_FOREACH(tpg, &uobj->memq, listq.queue) { 5060 if (tpg == pg) { 5061 break; 5062 } 5063 } 5064 if (tpg) 5065 (*pr)(" page found on object list\n"); 5066 else 5067 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 5068 } 5069 } 5070 } 5071 5072 /* cross-verify page queue */ 5073 if (pg->pqflags & PQ_FREE) { 5074 int fl = uvm_page_lookup_freelist(pg); 5075 int color = VM_PGCOLOR_BUCKET(pg); 5076 pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[ 5077 ((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN]; 5078 } else { 5079 pgl = NULL; 5080 } 5081 5082 if (pgl) { 5083 (*pr)(" checking pageq list\n"); 5084 LIST_FOREACH(tpg, pgl, pageq.list) { 5085 if (tpg == pg) { 5086 break; 5087 } 5088 } 5089 if (tpg) 5090 (*pr)(" page found on pageq list\n"); 5091 else 5092 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 5093 } 5094 } 5095 5096 /* 5097 * uvm_pages_printthem - print a summary of all managed pages 5098 */ 5099 5100 void 5101 uvm_page_printall(void (*pr)(const char *, ...)) 5102 { 5103 unsigned i; 5104 struct vm_page *pg; 5105 5106 (*pr)("%18s %4s %4s %18s %18s" 5107 #ifdef UVM_PAGE_TRKOWN 5108 " OWNER" 5109 #endif 5110 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON"); 5111 for (i = 0; i < vm_nphysseg; i++) { 5112 for (pg = vm_physmem[i].pgs; pg <= vm_physmem[i].lastpg; pg++) { 5113 (*pr)("%18p %04x %04x %18p %18p", 5114 pg, pg->flags, pg->pqflags, pg->uobject, 5115 pg->uanon); 5116 #ifdef UVM_PAGE_TRKOWN 5117 if (pg->flags & PG_BUSY) 5118 (*pr)(" %d [%s]", pg->owner, pg->owner_tag); 5119 #endif 5120 (*pr)("\n"); 5121 } 5122 } 5123 } 5124 5125 #endif 5126 5127 /* 5128 * uvm_map_create: create map 5129 */ 5130 5131 struct vm_map * 5132 uvm_map_create(pmap_t pmap, vaddr_t vmin, vaddr_t vmax, int flags) 5133 { 5134 struct vm_map *result; 5135 5136 result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK); 5137 uvm_map_setup(result, vmin, vmax, flags); 5138 result->pmap = pmap; 5139 return(result); 5140 } 5141 5142 /* 5143 * uvm_map_setup: init map 5144 * 5145 * => map must not be in service yet. 5146 */ 5147 5148 void 5149 uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags) 5150 { 5151 int ipl; 5152 5153 rb_tree_init(&map->rb_tree, &uvm_map_tree_ops); 5154 map->header.next = map->header.prev = &map->header; 5155 map->nentries = 0; 5156 map->size = 0; 5157 map->ref_count = 1; 5158 vm_map_setmin(map, vmin); 5159 vm_map_setmax(map, vmax); 5160 map->flags = flags; 5161 map->first_free = &map->header; 5162 map->hint = &map->header; 5163 map->timestamp = 0; 5164 map->busy = NULL; 5165 5166 if ((flags & VM_MAP_INTRSAFE) != 0) { 5167 ipl = IPL_VM; 5168 } else { 5169 ipl = IPL_NONE; 5170 } 5171 5172 rw_init(&map->lock); 5173 cv_init(&map->cv, "vm_map"); 5174 mutex_init(&map->misc_lock, MUTEX_DRIVER, ipl); 5175 mutex_init(&map->mutex, MUTEX_DRIVER, ipl); 5176 } 5177 5178 5179 /* 5180 * U N M A P - m a i n e n t r y p o i n t 5181 */ 5182 5183 /* 5184 * uvm_unmap1: remove mappings from a vm_map (from "start" up to "stop") 5185 * 5186 * => caller must check alignment and size 5187 * => map must be unlocked (we will lock it) 5188 * => flags is UVM_FLAG_QUANTUM or 0. 5189 */ 5190 5191 void 5192 uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 5193 { 5194 struct vm_map_entry *dead_entries; 5195 struct uvm_mapent_reservation umr; 5196 UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist); 5197 5198 UVMHIST_LOG(maphist, " (map=0x%x, start=0x%x, end=0x%x)", 5199 map, start, end, 0); 5200 if (map == kernel_map) { 5201 LOCKDEBUG_MEM_CHECK((void *)start, end - start); 5202 } 5203 /* 5204 * work now done by helper functions. wipe the pmap's and then 5205 * detach from the dead entries... 5206 */ 5207 uvm_mapent_reserve(map, &umr, 2, flags); 5208 vm_map_lock(map); 5209 uvm_unmap_remove(map, start, end, &dead_entries, &umr, flags); 5210 vm_map_unlock(map); 5211 uvm_mapent_unreserve(map, &umr); 5212 5213 if (dead_entries != NULL) 5214 uvm_unmap_detach(dead_entries, 0); 5215 5216 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 5217 } 5218 5219 5220 /* 5221 * uvm_map_reference: add reference to a map 5222 * 5223 * => map need not be locked (we use misc_lock). 5224 */ 5225 5226 void 5227 uvm_map_reference(struct vm_map *map) 5228 { 5229 mutex_enter(&map->misc_lock); 5230 map->ref_count++; 5231 mutex_exit(&map->misc_lock); 5232 } 5233 5234 struct vm_map_kernel * 5235 vm_map_to_kernel(struct vm_map *map) 5236 { 5237 5238 KASSERT(VM_MAP_IS_KERNEL(map)); 5239 5240 return (struct vm_map_kernel *)map; 5241 } 5242 5243 bool 5244 vm_map_starved_p(struct vm_map *map) 5245 { 5246 5247 if ((map->flags & VM_MAP_WANTVA) != 0) { 5248 return true; 5249 } 5250 /* XXX */ 5251 if ((vm_map_max(map) - vm_map_min(map)) / 16 * 15 < map->size) { 5252 return true; 5253 } 5254 return false; 5255 } 5256 5257 #if defined(DDB) 5258 void 5259 uvm_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 5260 { 5261 struct vm_map *map; 5262 5263 for (map = kernel_map;;) { 5264 struct vm_map_entry *entry; 5265 5266 if (!uvm_map_lookup_entry_bytree(map, (vaddr_t)addr, &entry)) { 5267 break; 5268 } 5269 (*pr)("%p is %p+%zu from VMMAP %p\n", 5270 (void *)addr, (void *)entry->start, 5271 (size_t)(addr - (uintptr_t)entry->start), map); 5272 if (!UVM_ET_ISSUBMAP(entry)) { 5273 break; 5274 } 5275 map = entry->object.sub_map; 5276 } 5277 } 5278 #endif /* defined(DDB) */ 5279