1 /* $NetBSD: uvm_map.c,v 1.269 2009/01/13 14:04:35 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by Charles D. Cranor, 23 * Washington University, the University of California, Berkeley and 24 * its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 42 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 43 * 44 * 45 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 46 * All rights reserved. 47 * 48 * Permission to use, copy, modify and distribute this software and 49 * its documentation is hereby granted, provided that both the copyright 50 * notice and this permission notice appear in all copies of the 51 * software, derivative works or modified versions, and any portions 52 * thereof, and that both notices appear in supporting documentation. 53 * 54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 57 * 58 * Carnegie Mellon requests users of this software to return to 59 * 60 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 61 * School of Computer Science 62 * Carnegie Mellon University 63 * Pittsburgh PA 15213-3890 64 * 65 * any improvements or extensions that they make and grant Carnegie the 66 * rights to redistribute these changes. 67 */ 68 69 /* 70 * uvm_map.c: uvm map operations 71 */ 72 73 #include <sys/cdefs.h> 74 __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.269 2009/01/13 14:04:35 yamt Exp $"); 75 76 #include "opt_ddb.h" 77 #include "opt_uvmhist.h" 78 #include "opt_uvm.h" 79 #include "opt_sysv.h" 80 81 #include <sys/param.h> 82 #include <sys/systm.h> 83 #include <sys/mman.h> 84 #include <sys/proc.h> 85 #include <sys/malloc.h> 86 #include <sys/pool.h> 87 #include <sys/kernel.h> 88 #include <sys/mount.h> 89 #include <sys/vnode.h> 90 #include <sys/lockdebug.h> 91 #include <sys/atomic.h> 92 93 #ifdef SYSVSHM 94 #include <sys/shm.h> 95 #endif 96 97 #include <uvm/uvm.h> 98 99 #ifdef DDB 100 #include <uvm/uvm_ddb.h> 101 #endif 102 103 #if !defined(UVMMAP_COUNTERS) 104 105 #define UVMMAP_EVCNT_DEFINE(name) /* nothing */ 106 #define UVMMAP_EVCNT_INCR(ev) /* nothing */ 107 #define UVMMAP_EVCNT_DECR(ev) /* nothing */ 108 109 #else /* defined(UVMMAP_NOCOUNTERS) */ 110 111 #include <sys/evcnt.h> 112 #define UVMMAP_EVCNT_DEFINE(name) \ 113 struct evcnt uvmmap_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 114 "uvmmap", #name); \ 115 EVCNT_ATTACH_STATIC(uvmmap_evcnt_##name); 116 #define UVMMAP_EVCNT_INCR(ev) uvmmap_evcnt_##ev.ev_count++ 117 #define UVMMAP_EVCNT_DECR(ev) uvmmap_evcnt_##ev.ev_count-- 118 119 #endif /* defined(UVMMAP_NOCOUNTERS) */ 120 121 UVMMAP_EVCNT_DEFINE(ubackmerge) 122 UVMMAP_EVCNT_DEFINE(uforwmerge) 123 UVMMAP_EVCNT_DEFINE(ubimerge) 124 UVMMAP_EVCNT_DEFINE(unomerge) 125 UVMMAP_EVCNT_DEFINE(kbackmerge) 126 UVMMAP_EVCNT_DEFINE(kforwmerge) 127 UVMMAP_EVCNT_DEFINE(kbimerge) 128 UVMMAP_EVCNT_DEFINE(knomerge) 129 UVMMAP_EVCNT_DEFINE(map_call) 130 UVMMAP_EVCNT_DEFINE(mlk_call) 131 UVMMAP_EVCNT_DEFINE(mlk_hint) 132 UVMMAP_EVCNT_DEFINE(mlk_list) 133 UVMMAP_EVCNT_DEFINE(mlk_tree) 134 UVMMAP_EVCNT_DEFINE(mlk_treeloop) 135 UVMMAP_EVCNT_DEFINE(mlk_listloop) 136 137 UVMMAP_EVCNT_DEFINE(uke_alloc) 138 UVMMAP_EVCNT_DEFINE(uke_free) 139 UVMMAP_EVCNT_DEFINE(ukh_alloc) 140 UVMMAP_EVCNT_DEFINE(ukh_free) 141 142 const char vmmapbsy[] = "vmmapbsy"; 143 144 /* 145 * cache for vmspace structures. 146 */ 147 148 static struct pool_cache uvm_vmspace_cache; 149 150 /* 151 * cache for dynamically-allocated map entries. 152 */ 153 154 static struct pool_cache uvm_map_entry_cache; 155 156 MALLOC_DEFINE(M_VMMAP, "VM map", "VM map structures"); 157 MALLOC_DEFINE(M_VMPMAP, "VM pmap", "VM pmap"); 158 159 #ifdef PMAP_GROWKERNEL 160 /* 161 * This global represents the end of the kernel virtual address 162 * space. If we want to exceed this, we must grow the kernel 163 * virtual address space dynamically. 164 * 165 * Note, this variable is locked by kernel_map's lock. 166 */ 167 vaddr_t uvm_maxkaddr; 168 #endif 169 170 /* 171 * macros 172 */ 173 174 /* 175 * VM_MAP_USE_KMAPENT: determine if uvm_kmapent_alloc/free is used 176 * for the vm_map. 177 */ 178 extern struct vm_map *pager_map; /* XXX */ 179 #define VM_MAP_USE_KMAPENT_FLAGS(flags) \ 180 (((flags) & VM_MAP_INTRSAFE) != 0) 181 #define VM_MAP_USE_KMAPENT(map) \ 182 (VM_MAP_USE_KMAPENT_FLAGS((map)->flags) || (map) == kernel_map) 183 184 /* 185 * UVM_ET_ISCOMPATIBLE: check some requirements for map entry merging 186 */ 187 188 #define UVM_ET_ISCOMPATIBLE(ent, type, uobj, meflags, \ 189 prot, maxprot, inh, adv, wire) \ 190 ((ent)->etype == (type) && \ 191 (((ent)->flags ^ (meflags)) & (UVM_MAP_NOMERGE | UVM_MAP_QUANTUM)) \ 192 == 0 && \ 193 (ent)->object.uvm_obj == (uobj) && \ 194 (ent)->protection == (prot) && \ 195 (ent)->max_protection == (maxprot) && \ 196 (ent)->inheritance == (inh) && \ 197 (ent)->advice == (adv) && \ 198 (ent)->wired_count == (wire)) 199 200 /* 201 * uvm_map_entry_link: insert entry into a map 202 * 203 * => map must be locked 204 */ 205 #define uvm_map_entry_link(map, after_where, entry) do { \ 206 uvm_mapent_check(entry); \ 207 (map)->nentries++; \ 208 (entry)->prev = (after_where); \ 209 (entry)->next = (after_where)->next; \ 210 (entry)->prev->next = (entry); \ 211 (entry)->next->prev = (entry); \ 212 uvm_rb_insert((map), (entry)); \ 213 } while (/*CONSTCOND*/ 0) 214 215 /* 216 * uvm_map_entry_unlink: remove entry from a map 217 * 218 * => map must be locked 219 */ 220 #define uvm_map_entry_unlink(map, entry) do { \ 221 KASSERT((entry) != (map)->first_free); \ 222 KASSERT((entry) != (map)->hint); \ 223 uvm_mapent_check(entry); \ 224 (map)->nentries--; \ 225 (entry)->next->prev = (entry)->prev; \ 226 (entry)->prev->next = (entry)->next; \ 227 uvm_rb_remove((map), (entry)); \ 228 } while (/*CONSTCOND*/ 0) 229 230 /* 231 * SAVE_HINT: saves the specified entry as the hint for future lookups. 232 * 233 * => map need not be locked. 234 */ 235 #define SAVE_HINT(map, check, value) do { \ 236 if ((map)->hint == (check)) \ 237 (map)->hint = (value); \ 238 } while (/*CONSTCOND*/ 0) 239 240 /* 241 * clear_hints: ensure that hints don't point to the entry. 242 * 243 * => map must be write-locked. 244 */ 245 static void 246 clear_hints(struct vm_map *map, struct vm_map_entry *ent) 247 { 248 249 SAVE_HINT(map, ent, ent->prev); 250 if (map->first_free == ent) { 251 map->first_free = ent->prev; 252 } 253 } 254 255 /* 256 * VM_MAP_RANGE_CHECK: check and correct range 257 * 258 * => map must at least be read locked 259 */ 260 261 #define VM_MAP_RANGE_CHECK(map, start, end) do { \ 262 if (start < vm_map_min(map)) \ 263 start = vm_map_min(map); \ 264 if (end > vm_map_max(map)) \ 265 end = vm_map_max(map); \ 266 if (start > end) \ 267 start = end; \ 268 } while (/*CONSTCOND*/ 0) 269 270 /* 271 * local prototypes 272 */ 273 274 static struct vm_map_entry * 275 uvm_mapent_alloc(struct vm_map *, int); 276 static struct vm_map_entry * 277 uvm_mapent_alloc_split(struct vm_map *, 278 const struct vm_map_entry *, int, 279 struct uvm_mapent_reservation *); 280 static void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); 281 static void uvm_mapent_free(struct vm_map_entry *); 282 #if defined(DEBUG) 283 static void _uvm_mapent_check(const struct vm_map_entry *, const char *, 284 int); 285 #define uvm_mapent_check(map) _uvm_mapent_check(map, __FILE__, __LINE__) 286 #else /* defined(DEBUG) */ 287 #define uvm_mapent_check(e) /* nothing */ 288 #endif /* defined(DEBUG) */ 289 static struct vm_map_entry * 290 uvm_kmapent_alloc(struct vm_map *, int); 291 static void uvm_kmapent_free(struct vm_map_entry *); 292 static vsize_t uvm_kmapent_overhead(vsize_t); 293 294 static void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); 295 static void uvm_map_reference_amap(struct vm_map_entry *, int); 296 static int uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int, 297 struct vm_map_entry *); 298 static void uvm_map_unreference_amap(struct vm_map_entry *, int); 299 300 int _uvm_map_sanity(struct vm_map *); 301 int _uvm_tree_sanity(struct vm_map *); 302 static vsize_t uvm_rb_maxgap(const struct vm_map_entry *); 303 304 CTASSERT(offsetof(struct vm_map_entry, rb_node) == 0); 305 #define ROOT_ENTRY(map) ((struct vm_map_entry *)(map)->rb_tree.rbt_root) 306 #define LEFT_ENTRY(entry) ((struct vm_map_entry *)(entry)->rb_node.rb_left) 307 #define RIGHT_ENTRY(entry) ((struct vm_map_entry *)(entry)->rb_node.rb_right) 308 #define PARENT_ENTRY(map, entry) \ 309 (ROOT_ENTRY(map) == (entry) \ 310 ? NULL \ 311 : (struct vm_map_entry *)RB_FATHER(&(entry)->rb_node)) 312 313 static int 314 uvm_map_compare_nodes(const struct rb_node *nparent, 315 const struct rb_node *nkey) 316 { 317 const struct vm_map_entry *eparent = (const void *) nparent; 318 const struct vm_map_entry *ekey = (const void *) nkey; 319 320 KASSERT(eparent->start < ekey->start || eparent->start >= ekey->end); 321 KASSERT(ekey->start < eparent->start || ekey->start >= eparent->end); 322 323 if (ekey->start < eparent->start) 324 return -1; 325 if (ekey->start >= eparent->end) 326 return 1; 327 return 0; 328 } 329 330 static int 331 uvm_map_compare_key(const struct rb_node *nparent, const void *vkey) 332 { 333 const struct vm_map_entry *eparent = (const void *) nparent; 334 const vaddr_t va = *(const vaddr_t *) vkey; 335 336 if (va < eparent->start) 337 return -1; 338 if (va >= eparent->end) 339 return 1; 340 return 0; 341 } 342 343 static const struct rb_tree_ops uvm_map_tree_ops = { 344 .rbto_compare_nodes = uvm_map_compare_nodes, 345 .rbto_compare_key = uvm_map_compare_key, 346 }; 347 348 static inline vsize_t 349 uvm_rb_gap(const struct vm_map_entry *entry) 350 { 351 KASSERT(entry->next != NULL); 352 return entry->next->start - entry->end; 353 } 354 355 static vsize_t 356 uvm_rb_maxgap(const struct vm_map_entry *entry) 357 { 358 struct vm_map_entry *child; 359 vsize_t maxgap = entry->gap; 360 361 /* 362 * We need maxgap to be the largest gap of us or any of our 363 * descendents. Since each of our children's maxgap is the 364 * cached value of their largest gap of themselves or their 365 * descendents, we can just use that value and avoid recursing 366 * down the tree to calculate it. 367 */ 368 if ((child = LEFT_ENTRY(entry)) != NULL && maxgap < child->maxgap) 369 maxgap = child->maxgap; 370 371 if ((child = RIGHT_ENTRY(entry)) != NULL && maxgap < child->maxgap) 372 maxgap = child->maxgap; 373 374 return maxgap; 375 } 376 377 static void 378 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) 379 { 380 struct vm_map_entry *parent; 381 382 KASSERT(entry->gap == uvm_rb_gap(entry)); 383 entry->maxgap = uvm_rb_maxgap(entry); 384 385 while ((parent = PARENT_ENTRY(map, entry)) != NULL) { 386 struct vm_map_entry *brother; 387 vsize_t maxgap = parent->gap; 388 389 KDASSERT(parent->gap == uvm_rb_gap(parent)); 390 if (maxgap < entry->maxgap) 391 maxgap = entry->maxgap; 392 /* 393 * Since we work our towards the root, we know entry's maxgap 394 * value is ok but its brothers may now be out-of-date due 395 * rebalancing. So refresh it. 396 */ 397 brother = (struct vm_map_entry *)parent->rb_node.rb_nodes[RB_POSITION(&entry->rb_node) ^ RB_DIR_OTHER]; 398 if (brother != NULL) { 399 KDASSERT(brother->gap == uvm_rb_gap(brother)); 400 brother->maxgap = uvm_rb_maxgap(brother); 401 if (maxgap < brother->maxgap) 402 maxgap = brother->maxgap; 403 } 404 405 parent->maxgap = maxgap; 406 entry = parent; 407 } 408 } 409 410 static void 411 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) 412 { 413 entry->gap = entry->maxgap = uvm_rb_gap(entry); 414 if (entry->prev != &map->header) 415 entry->prev->gap = uvm_rb_gap(entry->prev); 416 417 if (!rb_tree_insert_node(&map->rb_tree, &entry->rb_node)) 418 panic("uvm_rb_insert: map %p: duplicate entry?", map); 419 420 /* 421 * If the previous entry is not our immediate left child, then it's an 422 * ancestor and will be fixed up on the way to the root. We don't 423 * have to check entry->prev against &map->header since &map->header 424 * will never be in the tree. 425 */ 426 uvm_rb_fixup(map, 427 LEFT_ENTRY(entry) == entry->prev ? entry->prev : entry); 428 } 429 430 static void 431 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) 432 { 433 struct vm_map_entry *prev_parent = NULL, *next_parent = NULL; 434 435 /* 436 * If we are removing an interior node, then an adjacent node will 437 * be used to replace its position in the tree. Therefore we will 438 * need to fixup the tree starting at the parent of the replacement 439 * node. So record their parents for later use. 440 */ 441 if (entry->prev != &map->header) 442 prev_parent = PARENT_ENTRY(map, entry->prev); 443 if (entry->next != &map->header) 444 next_parent = PARENT_ENTRY(map, entry->next); 445 446 rb_tree_remove_node(&map->rb_tree, &entry->rb_node); 447 448 /* 449 * If the previous node has a new parent, fixup the tree starting 450 * at the previous node's old parent. 451 */ 452 if (entry->prev != &map->header) { 453 /* 454 * Update the previous entry's gap due to our absence. 455 */ 456 entry->prev->gap = uvm_rb_gap(entry->prev); 457 uvm_rb_fixup(map, entry->prev); 458 if (prev_parent != NULL 459 && prev_parent != entry 460 && prev_parent != PARENT_ENTRY(map, entry->prev)) 461 uvm_rb_fixup(map, prev_parent); 462 } 463 464 /* 465 * If the next node has a new parent, fixup the tree starting 466 * at the next node's old parent. 467 */ 468 if (entry->next != &map->header) { 469 uvm_rb_fixup(map, entry->next); 470 if (next_parent != NULL 471 && next_parent != entry 472 && next_parent != PARENT_ENTRY(map, entry->next)) 473 uvm_rb_fixup(map, next_parent); 474 } 475 } 476 477 #if defined(DEBUG) 478 int uvm_debug_check_map = 0; 479 int uvm_debug_check_rbtree = 0; 480 #define uvm_map_check(map, name) \ 481 _uvm_map_check((map), (name), __FILE__, __LINE__) 482 static void 483 _uvm_map_check(struct vm_map *map, const char *name, 484 const char *file, int line) 485 { 486 487 if ((uvm_debug_check_map && _uvm_map_sanity(map)) || 488 (uvm_debug_check_rbtree && _uvm_tree_sanity(map))) { 489 panic("uvm_map_check failed: \"%s\" map=%p (%s:%d)", 490 name, map, file, line); 491 } 492 } 493 #else /* defined(DEBUG) */ 494 #define uvm_map_check(map, name) /* nothing */ 495 #endif /* defined(DEBUG) */ 496 497 #if defined(DEBUG) || defined(DDB) 498 int 499 _uvm_map_sanity(struct vm_map *map) 500 { 501 bool first_free_found = false; 502 bool hint_found = false; 503 const struct vm_map_entry *e; 504 505 e = &map->header; 506 for (;;) { 507 if (map->first_free == e) { 508 first_free_found = true; 509 } else if (!first_free_found && e->next->start > e->end) { 510 printf("first_free %p should be %p\n", 511 map->first_free, e); 512 return -1; 513 } 514 if (map->hint == e) { 515 hint_found = true; 516 } 517 518 e = e->next; 519 if (e == &map->header) { 520 break; 521 } 522 } 523 if (!first_free_found) { 524 printf("stale first_free\n"); 525 return -1; 526 } 527 if (!hint_found) { 528 printf("stale hint\n"); 529 return -1; 530 } 531 return 0; 532 } 533 534 int 535 _uvm_tree_sanity(struct vm_map *map) 536 { 537 struct vm_map_entry *tmp, *trtmp; 538 int n = 0, i = 1; 539 540 for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { 541 if (tmp->gap != uvm_rb_gap(tmp)) { 542 printf("%d/%d gap %lx != %lx %s\n", 543 n + 1, map->nentries, 544 (ulong)tmp->gap, (ulong)uvm_rb_gap(tmp), 545 tmp->next == &map->header ? "(last)" : ""); 546 goto error; 547 } 548 /* 549 * If any entries are out of order, tmp->gap will be unsigned 550 * and will likely exceed the size of the map. 551 */ 552 KASSERT(tmp->gap < map->size); 553 n++; 554 } 555 556 if (n != map->nentries) { 557 printf("nentries: %d vs %d\n", n, map->nentries); 558 goto error; 559 } 560 561 trtmp = NULL; 562 for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { 563 if (tmp->maxgap != uvm_rb_maxgap(tmp)) { 564 printf("maxgap %lx != %lx\n", 565 (ulong)tmp->maxgap, 566 (ulong)uvm_rb_maxgap(tmp)); 567 goto error; 568 } 569 if (trtmp != NULL && trtmp->start >= tmp->start) { 570 printf("corrupt: 0x%lx >= 0x%lx\n", 571 trtmp->start, tmp->start); 572 goto error; 573 } 574 575 trtmp = tmp; 576 } 577 578 for (tmp = map->header.next; tmp != &map->header; 579 tmp = tmp->next, i++) { 580 trtmp = (void *) rb_tree_iterate(&map->rb_tree, &tmp->rb_node, 581 RB_DIR_LEFT); 582 if (trtmp == NULL) 583 trtmp = &map->header; 584 if (tmp->prev != trtmp) { 585 printf("lookup: %d: %p->prev=%p: %p\n", 586 i, tmp, tmp->prev, trtmp); 587 goto error; 588 } 589 trtmp = (void *) rb_tree_iterate(&map->rb_tree, &tmp->rb_node, 590 RB_DIR_RIGHT); 591 if (trtmp == NULL) 592 trtmp = &map->header; 593 if (tmp->next != trtmp) { 594 printf("lookup: %d: %p->next=%p: %p\n", 595 i, tmp, tmp->next, trtmp); 596 goto error; 597 } 598 trtmp = (void *)rb_tree_find_node(&map->rb_tree, &tmp->start); 599 if (trtmp != tmp) { 600 printf("lookup: %d: %p - %p: %p\n", i, tmp, trtmp, 601 PARENT_ENTRY(map, tmp)); 602 goto error; 603 } 604 } 605 606 return (0); 607 error: 608 return (-1); 609 } 610 #endif /* defined(DEBUG) || defined(DDB) */ 611 612 #ifdef DIAGNOSTIC 613 static struct vm_map *uvm_kmapent_map(struct vm_map_entry *); 614 #endif 615 616 /* 617 * vm_map_lock: acquire an exclusive (write) lock on a map. 618 * 619 * => Note that "intrsafe" maps use only exclusive, spin locks. 620 * 621 * => The locking protocol provides for guaranteed upgrade from shared -> 622 * exclusive by whichever thread currently has the map marked busy. 623 * See "LOCKING PROTOCOL NOTES" in uvm_map.h. This is horrible; among 624 * other problems, it defeats any fairness guarantees provided by RW 625 * locks. 626 */ 627 628 void 629 vm_map_lock(struct vm_map *map) 630 { 631 632 if ((map->flags & VM_MAP_INTRSAFE) != 0) { 633 mutex_spin_enter(&map->mutex); 634 return; 635 } 636 637 for (;;) { 638 rw_enter(&map->lock, RW_WRITER); 639 if (map->busy == NULL) 640 break; 641 if (map->busy == curlwp) 642 break; 643 mutex_enter(&map->misc_lock); 644 rw_exit(&map->lock); 645 if (map->busy != NULL) 646 cv_wait(&map->cv, &map->misc_lock); 647 mutex_exit(&map->misc_lock); 648 } 649 650 map->timestamp++; 651 } 652 653 /* 654 * vm_map_lock_try: try to lock a map, failing if it is already locked. 655 */ 656 657 bool 658 vm_map_lock_try(struct vm_map *map) 659 { 660 661 if ((map->flags & VM_MAP_INTRSAFE) != 0) 662 return mutex_tryenter(&map->mutex); 663 if (!rw_tryenter(&map->lock, RW_WRITER)) 664 return false; 665 if (map->busy != NULL) { 666 rw_exit(&map->lock); 667 return false; 668 } 669 670 map->timestamp++; 671 return true; 672 } 673 674 /* 675 * vm_map_unlock: release an exclusive lock on a map. 676 */ 677 678 void 679 vm_map_unlock(struct vm_map *map) 680 { 681 682 if ((map->flags & VM_MAP_INTRSAFE) != 0) 683 mutex_spin_exit(&map->mutex); 684 else { 685 KASSERT(rw_write_held(&map->lock)); 686 KASSERT(map->busy == NULL || map->busy == curlwp); 687 rw_exit(&map->lock); 688 } 689 } 690 691 /* 692 * vm_map_unbusy: mark the map as unbusy, and wake any waiters that 693 * want an exclusive lock. 694 */ 695 696 void 697 vm_map_unbusy(struct vm_map *map) 698 { 699 700 KASSERT(map->busy == curlwp); 701 702 /* 703 * Safe to clear 'busy' and 'waiters' with only a read lock held: 704 * 705 * o they can only be set with a write lock held 706 * o writers are blocked out with a read or write hold 707 * o at any time, only one thread owns the set of values 708 */ 709 mutex_enter(&map->misc_lock); 710 map->busy = NULL; 711 cv_broadcast(&map->cv); 712 mutex_exit(&map->misc_lock); 713 } 714 715 /* 716 * vm_map_lock_read: acquire a shared (read) lock on a map. 717 */ 718 719 void 720 vm_map_lock_read(struct vm_map *map) 721 { 722 723 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 724 725 rw_enter(&map->lock, RW_READER); 726 } 727 728 /* 729 * vm_map_unlock_read: release a shared lock on a map. 730 */ 731 732 void 733 vm_map_unlock_read(struct vm_map *map) 734 { 735 736 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 737 738 rw_exit(&map->lock); 739 } 740 741 /* 742 * vm_map_busy: mark a map as busy. 743 * 744 * => the caller must hold the map write locked 745 */ 746 747 void 748 vm_map_busy(struct vm_map *map) 749 { 750 751 KASSERT(rw_write_held(&map->lock)); 752 KASSERT(map->busy == NULL); 753 754 map->busy = curlwp; 755 } 756 757 /* 758 * vm_map_locked_p: return true if the map is write locked. 759 * 760 * => only for debug purposes like KASSERTs. 761 * => should not be used to verify that a map is not locked. 762 */ 763 764 bool 765 vm_map_locked_p(struct vm_map *map) 766 { 767 768 if ((map->flags & VM_MAP_INTRSAFE) != 0) { 769 return mutex_owned(&map->mutex); 770 } else { 771 return rw_write_held(&map->lock); 772 } 773 } 774 775 /* 776 * uvm_mapent_alloc: allocate a map entry 777 */ 778 779 static struct vm_map_entry * 780 uvm_mapent_alloc(struct vm_map *map, int flags) 781 { 782 struct vm_map_entry *me; 783 int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK; 784 UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist); 785 786 if (VM_MAP_USE_KMAPENT(map)) { 787 me = uvm_kmapent_alloc(map, flags); 788 } else { 789 me = pool_cache_get(&uvm_map_entry_cache, pflags); 790 if (__predict_false(me == NULL)) 791 return NULL; 792 me->flags = 0; 793 } 794 795 UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", me, 796 ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0); 797 return (me); 798 } 799 800 /* 801 * uvm_mapent_alloc_split: allocate a map entry for clipping. 802 * 803 * => map must be locked by caller if UVM_MAP_QUANTUM is set. 804 */ 805 806 static struct vm_map_entry * 807 uvm_mapent_alloc_split(struct vm_map *map, 808 const struct vm_map_entry *old_entry, int flags, 809 struct uvm_mapent_reservation *umr) 810 { 811 struct vm_map_entry *me; 812 813 KASSERT(!VM_MAP_USE_KMAPENT(map) || 814 (old_entry->flags & UVM_MAP_QUANTUM) || !UMR_EMPTY(umr)); 815 816 if (old_entry->flags & UVM_MAP_QUANTUM) { 817 struct vm_map_kernel *vmk = vm_map_to_kernel(map); 818 819 KASSERT(vm_map_locked_p(map)); 820 me = vmk->vmk_merged_entries; 821 KASSERT(me); 822 vmk->vmk_merged_entries = me->next; 823 KASSERT(me->flags & UVM_MAP_QUANTUM); 824 } else { 825 me = uvm_mapent_alloc(map, flags); 826 } 827 828 return me; 829 } 830 831 /* 832 * uvm_mapent_free: free map entry 833 */ 834 835 static void 836 uvm_mapent_free(struct vm_map_entry *me) 837 { 838 UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist); 839 840 UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]", 841 me, me->flags, 0, 0); 842 if (me->flags & UVM_MAP_KERNEL) { 843 uvm_kmapent_free(me); 844 } else { 845 pool_cache_put(&uvm_map_entry_cache, me); 846 } 847 } 848 849 /* 850 * uvm_mapent_free_merged: free merged map entry 851 * 852 * => keep the entry if needed. 853 * => caller shouldn't hold map locked if VM_MAP_USE_KMAPENT(map) is true. 854 * => map should be locked if UVM_MAP_QUANTUM is set. 855 */ 856 857 static void 858 uvm_mapent_free_merged(struct vm_map *map, struct vm_map_entry *me) 859 { 860 861 KASSERT(!(me->flags & UVM_MAP_KERNEL) || uvm_kmapent_map(me) == map); 862 863 if (me->flags & UVM_MAP_QUANTUM) { 864 /* 865 * keep this entry for later splitting. 866 */ 867 struct vm_map_kernel *vmk; 868 869 KASSERT(vm_map_locked_p(map)); 870 KASSERT(VM_MAP_IS_KERNEL(map)); 871 KASSERT(!VM_MAP_USE_KMAPENT(map) || 872 (me->flags & UVM_MAP_KERNEL)); 873 874 vmk = vm_map_to_kernel(map); 875 me->next = vmk->vmk_merged_entries; 876 vmk->vmk_merged_entries = me; 877 } else { 878 uvm_mapent_free(me); 879 } 880 } 881 882 /* 883 * uvm_mapent_copy: copy a map entry, preserving flags 884 */ 885 886 static inline void 887 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 888 { 889 890 memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - 891 ((char *)src)); 892 } 893 894 /* 895 * uvm_mapent_overhead: calculate maximum kva overhead necessary for 896 * map entries. 897 * 898 * => size and flags are the same as uvm_km_suballoc's ones. 899 */ 900 901 vsize_t 902 uvm_mapent_overhead(vsize_t size, int flags) 903 { 904 905 if (VM_MAP_USE_KMAPENT_FLAGS(flags)) { 906 return uvm_kmapent_overhead(size); 907 } 908 return 0; 909 } 910 911 #if defined(DEBUG) 912 static void 913 _uvm_mapent_check(const struct vm_map_entry *entry, const char *file, int line) 914 { 915 916 if (entry->start >= entry->end) { 917 goto bad; 918 } 919 if (UVM_ET_ISOBJ(entry)) { 920 if (entry->object.uvm_obj == NULL) { 921 goto bad; 922 } 923 } else if (UVM_ET_ISSUBMAP(entry)) { 924 if (entry->object.sub_map == NULL) { 925 goto bad; 926 } 927 } else { 928 if (entry->object.uvm_obj != NULL || 929 entry->object.sub_map != NULL) { 930 goto bad; 931 } 932 } 933 if (!UVM_ET_ISOBJ(entry)) { 934 if (entry->offset != 0) { 935 goto bad; 936 } 937 } 938 939 return; 940 941 bad: 942 panic("%s: bad entry %p (%s:%d)", __func__, entry, file, line); 943 } 944 #endif /* defined(DEBUG) */ 945 946 /* 947 * uvm_map_entry_unwire: unwire a map entry 948 * 949 * => map should be locked by caller 950 */ 951 952 static inline void 953 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) 954 { 955 956 entry->wired_count = 0; 957 uvm_fault_unwire_locked(map, entry->start, entry->end); 958 } 959 960 961 /* 962 * wrapper for calling amap_ref() 963 */ 964 static inline void 965 uvm_map_reference_amap(struct vm_map_entry *entry, int flags) 966 { 967 968 amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, 969 (entry->end - entry->start) >> PAGE_SHIFT, flags); 970 } 971 972 973 /* 974 * wrapper for calling amap_unref() 975 */ 976 static inline void 977 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) 978 { 979 980 amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, 981 (entry->end - entry->start) >> PAGE_SHIFT, flags); 982 } 983 984 985 /* 986 * uvm_map_init: init mapping system at boot time. 987 */ 988 989 void 990 uvm_map_init(void) 991 { 992 #if defined(UVMHIST) 993 static struct uvm_history_ent maphistbuf[100]; 994 static struct uvm_history_ent pdhistbuf[100]; 995 #endif 996 997 /* 998 * first, init logging system. 999 */ 1000 1001 UVMHIST_FUNC("uvm_map_init"); 1002 UVMHIST_INIT_STATIC(maphist, maphistbuf); 1003 UVMHIST_INIT_STATIC(pdhist, pdhistbuf); 1004 UVMHIST_CALLED(maphist); 1005 UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); 1006 1007 /* 1008 * initialize the global lock for kernel map entry. 1009 */ 1010 1011 mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM); 1012 1013 /* 1014 * initialize caches. 1015 */ 1016 1017 pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry), 1018 0, 0, 0, "vmmpepl", NULL, IPL_NONE, NULL, NULL, NULL); 1019 pool_cache_bootstrap(&uvm_vmspace_cache, sizeof(struct vmspace), 1020 0, 0, 0, "vmsppl", NULL, IPL_NONE, NULL, NULL, NULL); 1021 } 1022 1023 /* 1024 * clippers 1025 */ 1026 1027 /* 1028 * uvm_mapent_splitadj: adjust map entries for splitting, after uvm_mapent_copy. 1029 */ 1030 1031 static void 1032 uvm_mapent_splitadj(struct vm_map_entry *entry1, struct vm_map_entry *entry2, 1033 vaddr_t splitat) 1034 { 1035 vaddr_t adj; 1036 1037 KASSERT(entry1->start < splitat); 1038 KASSERT(splitat < entry1->end); 1039 1040 adj = splitat - entry1->start; 1041 entry1->end = entry2->start = splitat; 1042 1043 if (entry1->aref.ar_amap) { 1044 amap_splitref(&entry1->aref, &entry2->aref, adj); 1045 } 1046 if (UVM_ET_ISSUBMAP(entry1)) { 1047 /* ... unlikely to happen, but play it safe */ 1048 uvm_map_reference(entry1->object.sub_map); 1049 } else if (UVM_ET_ISOBJ(entry1)) { 1050 KASSERT(entry1->object.uvm_obj != NULL); /* suppress coverity */ 1051 entry2->offset += adj; 1052 if (entry1->object.uvm_obj->pgops && 1053 entry1->object.uvm_obj->pgops->pgo_reference) 1054 entry1->object.uvm_obj->pgops->pgo_reference( 1055 entry1->object.uvm_obj); 1056 } 1057 } 1058 1059 /* 1060 * uvm_map_clip_start: ensure that the entry begins at or after 1061 * the starting address, if it doesn't we split the entry. 1062 * 1063 * => caller should use UVM_MAP_CLIP_START macro rather than calling 1064 * this directly 1065 * => map must be locked by caller 1066 */ 1067 1068 void 1069 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, 1070 vaddr_t start, struct uvm_mapent_reservation *umr) 1071 { 1072 struct vm_map_entry *new_entry; 1073 1074 /* uvm_map_simplify_entry(map, entry); */ /* XXX */ 1075 1076 uvm_map_check(map, "clip_start entry"); 1077 uvm_mapent_check(entry); 1078 1079 /* 1080 * Split off the front portion. note that we must insert the new 1081 * entry BEFORE this one, so that this entry has the specified 1082 * starting address. 1083 */ 1084 new_entry = uvm_mapent_alloc_split(map, entry, 0, umr); 1085 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 1086 uvm_mapent_splitadj(new_entry, entry, start); 1087 uvm_map_entry_link(map, entry->prev, new_entry); 1088 1089 uvm_map_check(map, "clip_start leave"); 1090 } 1091 1092 /* 1093 * uvm_map_clip_end: ensure that the entry ends at or before 1094 * the ending address, if it does't we split the reference 1095 * 1096 * => caller should use UVM_MAP_CLIP_END macro rather than calling 1097 * this directly 1098 * => map must be locked by caller 1099 */ 1100 1101 void 1102 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end, 1103 struct uvm_mapent_reservation *umr) 1104 { 1105 struct vm_map_entry *new_entry; 1106 1107 uvm_map_check(map, "clip_end entry"); 1108 uvm_mapent_check(entry); 1109 1110 /* 1111 * Create a new entry and insert it 1112 * AFTER the specified entry 1113 */ 1114 new_entry = uvm_mapent_alloc_split(map, entry, 0, umr); 1115 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 1116 uvm_mapent_splitadj(entry, new_entry, end); 1117 uvm_map_entry_link(map, entry, new_entry); 1118 1119 uvm_map_check(map, "clip_end leave"); 1120 } 1121 1122 static void 1123 vm_map_drain(struct vm_map *map, uvm_flag_t flags) 1124 { 1125 1126 if (!VM_MAP_IS_KERNEL(map)) { 1127 return; 1128 } 1129 1130 uvm_km_va_drain(map, flags); 1131 } 1132 1133 /* 1134 * M A P - m a i n e n t r y p o i n t 1135 */ 1136 /* 1137 * uvm_map: establish a valid mapping in a map 1138 * 1139 * => assume startp is page aligned. 1140 * => assume size is a multiple of PAGE_SIZE. 1141 * => assume sys_mmap provides enough of a "hint" to have us skip 1142 * over text/data/bss area. 1143 * => map must be unlocked (we will lock it) 1144 * => <uobj,uoffset> value meanings (4 cases): 1145 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1146 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1147 * [3] <uobj,uoffset> == normal mapping 1148 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1149 * 1150 * case [4] is for kernel mappings where we don't know the offset until 1151 * we've found a virtual address. note that kernel object offsets are 1152 * always relative to vm_map_min(kernel_map). 1153 * 1154 * => if `align' is non-zero, we align the virtual address to the specified 1155 * alignment. 1156 * this is provided as a mechanism for large pages. 1157 * 1158 * => XXXCDC: need way to map in external amap? 1159 */ 1160 1161 int 1162 uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size, 1163 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags) 1164 { 1165 struct uvm_map_args args; 1166 struct vm_map_entry *new_entry; 1167 int error; 1168 1169 KASSERT((flags & UVM_FLAG_QUANTUM) == 0 || VM_MAP_IS_KERNEL(map)); 1170 KASSERT((size & PAGE_MASK) == 0); 1171 1172 /* 1173 * for pager_map, allocate the new entry first to avoid sleeping 1174 * for memory while we have the map locked. 1175 * 1176 * Also, because we allocate entries for in-kernel maps 1177 * a bit differently (cf. uvm_kmapent_alloc/free), we need to 1178 * allocate them before locking the map. 1179 */ 1180 1181 new_entry = NULL; 1182 if (VM_MAP_USE_KMAPENT(map) || (flags & UVM_FLAG_QUANTUM) || 1183 map == pager_map) { 1184 new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT)); 1185 if (__predict_false(new_entry == NULL)) 1186 return ENOMEM; 1187 if (flags & UVM_FLAG_QUANTUM) 1188 new_entry->flags |= UVM_MAP_QUANTUM; 1189 } 1190 if (map == pager_map) 1191 flags |= UVM_FLAG_NOMERGE; 1192 1193 error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align, 1194 flags, &args); 1195 if (!error) { 1196 error = uvm_map_enter(map, &args, new_entry); 1197 *startp = args.uma_start; 1198 } else if (new_entry) { 1199 uvm_mapent_free(new_entry); 1200 } 1201 1202 #if defined(DEBUG) 1203 if (!error && VM_MAP_IS_KERNEL(map)) { 1204 uvm_km_check_empty(map, *startp, *startp + size); 1205 } 1206 #endif /* defined(DEBUG) */ 1207 1208 return error; 1209 } 1210 1211 int 1212 uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size, 1213 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, 1214 struct uvm_map_args *args) 1215 { 1216 struct vm_map_entry *prev_entry; 1217 vm_prot_t prot = UVM_PROTECTION(flags); 1218 vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1219 1220 UVMHIST_FUNC("uvm_map_prepare"); 1221 UVMHIST_CALLED(maphist); 1222 1223 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)", 1224 map, start, size, flags); 1225 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0); 1226 1227 /* 1228 * detect a popular device driver bug. 1229 */ 1230 1231 KASSERT(doing_shutdown || curlwp != NULL || 1232 (map->flags & VM_MAP_INTRSAFE)); 1233 1234 /* 1235 * zero-sized mapping doesn't make any sense. 1236 */ 1237 KASSERT(size > 0); 1238 1239 KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0); 1240 1241 uvm_map_check(map, "map entry"); 1242 1243 /* 1244 * check sanity of protection code 1245 */ 1246 1247 if ((prot & maxprot) != prot) { 1248 UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%x, max=0x%x", 1249 prot, maxprot,0,0); 1250 return EACCES; 1251 } 1252 1253 /* 1254 * figure out where to put new VM range 1255 */ 1256 1257 retry: 1258 if (vm_map_lock_try(map) == false) { 1259 if ((flags & UVM_FLAG_TRYLOCK) != 0 && 1260 (map->flags & VM_MAP_INTRSAFE) == 0) { 1261 return EAGAIN; 1262 } 1263 vm_map_lock(map); /* could sleep here */ 1264 } 1265 prev_entry = uvm_map_findspace(map, start, size, &start, 1266 uobj, uoffset, align, flags); 1267 if (prev_entry == NULL) { 1268 unsigned int timestamp; 1269 1270 timestamp = map->timestamp; 1271 UVMHIST_LOG(maphist,"waiting va timestamp=0x%x", 1272 timestamp,0,0,0); 1273 map->flags |= VM_MAP_WANTVA; 1274 vm_map_unlock(map); 1275 1276 /* 1277 * try to reclaim kva and wait until someone does unmap. 1278 * fragile locking here, so we awaken every second to 1279 * recheck the condition. 1280 */ 1281 1282 vm_map_drain(map, flags); 1283 1284 mutex_enter(&map->misc_lock); 1285 while ((map->flags & VM_MAP_WANTVA) != 0 && 1286 map->timestamp == timestamp) { 1287 if ((flags & UVM_FLAG_WAITVA) == 0) { 1288 mutex_exit(&map->misc_lock); 1289 UVMHIST_LOG(maphist, 1290 "<- uvm_map_findspace failed!", 0,0,0,0); 1291 return ENOMEM; 1292 } else { 1293 cv_timedwait(&map->cv, &map->misc_lock, hz); 1294 } 1295 } 1296 mutex_exit(&map->misc_lock); 1297 goto retry; 1298 } 1299 1300 #ifdef PMAP_GROWKERNEL 1301 /* 1302 * If the kernel pmap can't map the requested space, 1303 * then allocate more resources for it. 1304 */ 1305 if (map == kernel_map && uvm_maxkaddr < (start + size)) 1306 uvm_maxkaddr = pmap_growkernel(start + size); 1307 #endif 1308 1309 UVMMAP_EVCNT_INCR(map_call); 1310 1311 /* 1312 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER 1313 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in 1314 * either case we want to zero it before storing it in the map entry 1315 * (because it looks strange and confusing when debugging...) 1316 * 1317 * if uobj is not null 1318 * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping 1319 * and we do not need to change uoffset. 1320 * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset 1321 * now (based on the starting address of the map). this case is 1322 * for kernel object mappings where we don't know the offset until 1323 * the virtual address is found (with uvm_map_findspace). the 1324 * offset is the distance we are from the start of the map. 1325 */ 1326 1327 if (uobj == NULL) { 1328 uoffset = 0; 1329 } else { 1330 if (uoffset == UVM_UNKNOWN_OFFSET) { 1331 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1332 uoffset = start - vm_map_min(kernel_map); 1333 } 1334 } 1335 1336 args->uma_flags = flags; 1337 args->uma_prev = prev_entry; 1338 args->uma_start = start; 1339 args->uma_size = size; 1340 args->uma_uobj = uobj; 1341 args->uma_uoffset = uoffset; 1342 1343 return 0; 1344 } 1345 1346 int 1347 uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args, 1348 struct vm_map_entry *new_entry) 1349 { 1350 struct vm_map_entry *prev_entry = args->uma_prev; 1351 struct vm_map_entry *dead = NULL; 1352 1353 const uvm_flag_t flags = args->uma_flags; 1354 const vm_prot_t prot = UVM_PROTECTION(flags); 1355 const vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1356 const vm_inherit_t inherit = UVM_INHERIT(flags); 1357 const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ? 1358 AMAP_EXTEND_NOWAIT : 0; 1359 const int advice = UVM_ADVICE(flags); 1360 const int meflagval = (flags & UVM_FLAG_QUANTUM) ? 1361 UVM_MAP_QUANTUM : 0; 1362 1363 vaddr_t start = args->uma_start; 1364 vsize_t size = args->uma_size; 1365 struct uvm_object *uobj = args->uma_uobj; 1366 voff_t uoffset = args->uma_uoffset; 1367 1368 const int kmap = (vm_map_pmap(map) == pmap_kernel()); 1369 int merged = 0; 1370 int error; 1371 int newetype; 1372 1373 UVMHIST_FUNC("uvm_map_enter"); 1374 UVMHIST_CALLED(maphist); 1375 1376 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)", 1377 map, start, size, flags); 1378 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0); 1379 1380 KASSERT(map->hint == prev_entry); /* bimerge case assumes this */ 1381 1382 if (flags & UVM_FLAG_QUANTUM) { 1383 KASSERT(new_entry); 1384 KASSERT(new_entry->flags & UVM_MAP_QUANTUM); 1385 } 1386 1387 if (uobj) 1388 newetype = UVM_ET_OBJ; 1389 else 1390 newetype = 0; 1391 1392 if (flags & UVM_FLAG_COPYONW) { 1393 newetype |= UVM_ET_COPYONWRITE; 1394 if ((flags & UVM_FLAG_OVERLAY) == 0) 1395 newetype |= UVM_ET_NEEDSCOPY; 1396 } 1397 1398 /* 1399 * try and insert in map by extending previous entry, if possible. 1400 * XXX: we don't try and pull back the next entry. might be useful 1401 * for a stack, but we are currently allocating our stack in advance. 1402 */ 1403 1404 if (flags & UVM_FLAG_NOMERGE) 1405 goto nomerge; 1406 1407 if (prev_entry->end == start && 1408 prev_entry != &map->header && 1409 UVM_ET_ISCOMPATIBLE(prev_entry, newetype, uobj, meflagval, 1410 prot, maxprot, inherit, advice, 0)) { 1411 1412 if (uobj && prev_entry->offset + 1413 (prev_entry->end - prev_entry->start) != uoffset) 1414 goto forwardmerge; 1415 1416 /* 1417 * can't extend a shared amap. note: no need to lock amap to 1418 * look at refs since we don't care about its exact value. 1419 * if it is one (i.e. we have only reference) it will stay there 1420 */ 1421 1422 if (prev_entry->aref.ar_amap && 1423 amap_refs(prev_entry->aref.ar_amap) != 1) { 1424 goto forwardmerge; 1425 } 1426 1427 if (prev_entry->aref.ar_amap) { 1428 error = amap_extend(prev_entry, size, 1429 amapwaitflag | AMAP_EXTEND_FORWARDS); 1430 if (error) 1431 goto nomerge; 1432 } 1433 1434 if (kmap) { 1435 UVMMAP_EVCNT_INCR(kbackmerge); 1436 } else { 1437 UVMMAP_EVCNT_INCR(ubackmerge); 1438 } 1439 UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0); 1440 1441 /* 1442 * drop our reference to uobj since we are extending a reference 1443 * that we already have (the ref count can not drop to zero). 1444 */ 1445 1446 if (uobj && uobj->pgops->pgo_detach) 1447 uobj->pgops->pgo_detach(uobj); 1448 1449 /* 1450 * Now that we've merged the entries, note that we've grown 1451 * and our gap has shrunk. Then fix the tree. 1452 */ 1453 prev_entry->end += size; 1454 prev_entry->gap -= size; 1455 uvm_rb_fixup(map, prev_entry); 1456 1457 uvm_map_check(map, "map backmerged"); 1458 1459 UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); 1460 merged++; 1461 } 1462 1463 forwardmerge: 1464 if (prev_entry->next->start == (start + size) && 1465 prev_entry->next != &map->header && 1466 UVM_ET_ISCOMPATIBLE(prev_entry->next, newetype, uobj, meflagval, 1467 prot, maxprot, inherit, advice, 0)) { 1468 1469 if (uobj && prev_entry->next->offset != uoffset + size) 1470 goto nomerge; 1471 1472 /* 1473 * can't extend a shared amap. note: no need to lock amap to 1474 * look at refs since we don't care about its exact value. 1475 * if it is one (i.e. we have only reference) it will stay there. 1476 * 1477 * note that we also can't merge two amaps, so if we 1478 * merged with the previous entry which has an amap, 1479 * and the next entry also has an amap, we give up. 1480 * 1481 * Interesting cases: 1482 * amap, new, amap -> give up second merge (single fwd extend) 1483 * amap, new, none -> double forward extend (extend again here) 1484 * none, new, amap -> double backward extend (done here) 1485 * uobj, new, amap -> single backward extend (done here) 1486 * 1487 * XXX should we attempt to deal with someone refilling 1488 * the deallocated region between two entries that are 1489 * backed by the same amap (ie, arefs is 2, "prev" and 1490 * "next" refer to it, and adding this allocation will 1491 * close the hole, thus restoring arefs to 1 and 1492 * deallocating the "next" vm_map_entry)? -- @@@ 1493 */ 1494 1495 if (prev_entry->next->aref.ar_amap && 1496 (amap_refs(prev_entry->next->aref.ar_amap) != 1 || 1497 (merged && prev_entry->aref.ar_amap))) { 1498 goto nomerge; 1499 } 1500 1501 if (merged) { 1502 /* 1503 * Try to extend the amap of the previous entry to 1504 * cover the next entry as well. If it doesn't work 1505 * just skip on, don't actually give up, since we've 1506 * already completed the back merge. 1507 */ 1508 if (prev_entry->aref.ar_amap) { 1509 if (amap_extend(prev_entry, 1510 prev_entry->next->end - 1511 prev_entry->next->start, 1512 amapwaitflag | AMAP_EXTEND_FORWARDS)) 1513 goto nomerge; 1514 } 1515 1516 /* 1517 * Try to extend the amap of the *next* entry 1518 * back to cover the new allocation *and* the 1519 * previous entry as well (the previous merge 1520 * didn't have an amap already otherwise we 1521 * wouldn't be checking here for an amap). If 1522 * it doesn't work just skip on, again, don't 1523 * actually give up, since we've already 1524 * completed the back merge. 1525 */ 1526 else if (prev_entry->next->aref.ar_amap) { 1527 if (amap_extend(prev_entry->next, 1528 prev_entry->end - 1529 prev_entry->start, 1530 amapwaitflag | AMAP_EXTEND_BACKWARDS)) 1531 goto nomerge; 1532 } 1533 } else { 1534 /* 1535 * Pull the next entry's amap backwards to cover this 1536 * new allocation. 1537 */ 1538 if (prev_entry->next->aref.ar_amap) { 1539 error = amap_extend(prev_entry->next, size, 1540 amapwaitflag | AMAP_EXTEND_BACKWARDS); 1541 if (error) 1542 goto nomerge; 1543 } 1544 } 1545 1546 if (merged) { 1547 if (kmap) { 1548 UVMMAP_EVCNT_DECR(kbackmerge); 1549 UVMMAP_EVCNT_INCR(kbimerge); 1550 } else { 1551 UVMMAP_EVCNT_DECR(ubackmerge); 1552 UVMMAP_EVCNT_INCR(ubimerge); 1553 } 1554 } else { 1555 if (kmap) { 1556 UVMMAP_EVCNT_INCR(kforwmerge); 1557 } else { 1558 UVMMAP_EVCNT_INCR(uforwmerge); 1559 } 1560 } 1561 UVMHIST_LOG(maphist," starting forward merge", 0, 0, 0, 0); 1562 1563 /* 1564 * drop our reference to uobj since we are extending a reference 1565 * that we already have (the ref count can not drop to zero). 1566 * (if merged, we've already detached) 1567 */ 1568 if (uobj && uobj->pgops->pgo_detach && !merged) 1569 uobj->pgops->pgo_detach(uobj); 1570 1571 if (merged) { 1572 dead = prev_entry->next; 1573 prev_entry->end = dead->end; 1574 uvm_map_entry_unlink(map, dead); 1575 if (dead->aref.ar_amap != NULL) { 1576 prev_entry->aref = dead->aref; 1577 dead->aref.ar_amap = NULL; 1578 } 1579 } else { 1580 prev_entry->next->start -= size; 1581 if (prev_entry != &map->header) { 1582 prev_entry->gap -= size; 1583 KASSERT(prev_entry->gap == uvm_rb_gap(prev_entry)); 1584 uvm_rb_fixup(map, prev_entry); 1585 } 1586 if (uobj) 1587 prev_entry->next->offset = uoffset; 1588 } 1589 1590 uvm_map_check(map, "map forwardmerged"); 1591 1592 UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0); 1593 merged++; 1594 } 1595 1596 nomerge: 1597 if (!merged) { 1598 UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0); 1599 if (kmap) { 1600 UVMMAP_EVCNT_INCR(knomerge); 1601 } else { 1602 UVMMAP_EVCNT_INCR(unomerge); 1603 } 1604 1605 /* 1606 * allocate new entry and link it in. 1607 */ 1608 1609 if (new_entry == NULL) { 1610 new_entry = uvm_mapent_alloc(map, 1611 (flags & UVM_FLAG_NOWAIT)); 1612 if (__predict_false(new_entry == NULL)) { 1613 error = ENOMEM; 1614 goto done; 1615 } 1616 } 1617 new_entry->start = start; 1618 new_entry->end = new_entry->start + size; 1619 new_entry->object.uvm_obj = uobj; 1620 new_entry->offset = uoffset; 1621 1622 new_entry->etype = newetype; 1623 1624 if (flags & UVM_FLAG_NOMERGE) { 1625 new_entry->flags |= UVM_MAP_NOMERGE; 1626 } 1627 1628 new_entry->protection = prot; 1629 new_entry->max_protection = maxprot; 1630 new_entry->inheritance = inherit; 1631 new_entry->wired_count = 0; 1632 new_entry->advice = advice; 1633 if (flags & UVM_FLAG_OVERLAY) { 1634 1635 /* 1636 * to_add: for BSS we overallocate a little since we 1637 * are likely to extend 1638 */ 1639 1640 vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? 1641 UVM_AMAP_CHUNK << PAGE_SHIFT : 0; 1642 struct vm_amap *amap = amap_alloc(size, to_add, 1643 (flags & UVM_FLAG_NOWAIT)); 1644 if (__predict_false(amap == NULL)) { 1645 error = ENOMEM; 1646 goto done; 1647 } 1648 new_entry->aref.ar_pageoff = 0; 1649 new_entry->aref.ar_amap = amap; 1650 } else { 1651 new_entry->aref.ar_pageoff = 0; 1652 new_entry->aref.ar_amap = NULL; 1653 } 1654 uvm_map_entry_link(map, prev_entry, new_entry); 1655 1656 /* 1657 * Update the free space hint 1658 */ 1659 1660 if ((map->first_free == prev_entry) && 1661 (prev_entry->end >= new_entry->start)) 1662 map->first_free = new_entry; 1663 1664 new_entry = NULL; 1665 } 1666 1667 map->size += size; 1668 1669 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1670 1671 error = 0; 1672 done: 1673 if ((flags & UVM_FLAG_QUANTUM) == 0) { 1674 /* 1675 * vmk_merged_entries is locked by the map's lock. 1676 */ 1677 vm_map_unlock(map); 1678 } 1679 if (new_entry && error == 0) { 1680 KDASSERT(merged); 1681 uvm_mapent_free_merged(map, new_entry); 1682 new_entry = NULL; 1683 } 1684 if (dead) { 1685 KDASSERT(merged); 1686 uvm_mapent_free_merged(map, dead); 1687 } 1688 if ((flags & UVM_FLAG_QUANTUM) != 0) { 1689 vm_map_unlock(map); 1690 } 1691 if (new_entry != NULL) { 1692 uvm_mapent_free(new_entry); 1693 } 1694 return error; 1695 } 1696 1697 /* 1698 * uvm_map_lookup_entry_bytree: lookup an entry in tree 1699 */ 1700 1701 static inline bool 1702 uvm_map_lookup_entry_bytree(struct vm_map *map, vaddr_t address, 1703 struct vm_map_entry **entry /* OUT */) 1704 { 1705 struct vm_map_entry *prev = &map->header; 1706 struct vm_map_entry *cur = ROOT_ENTRY(map); 1707 1708 while (cur) { 1709 UVMMAP_EVCNT_INCR(mlk_treeloop); 1710 if (address >= cur->start) { 1711 if (address < cur->end) { 1712 *entry = cur; 1713 return true; 1714 } 1715 prev = cur; 1716 cur = RIGHT_ENTRY(cur); 1717 } else 1718 cur = LEFT_ENTRY(cur); 1719 } 1720 *entry = prev; 1721 return false; 1722 } 1723 1724 /* 1725 * uvm_map_lookup_entry: find map entry at or before an address 1726 * 1727 * => map must at least be read-locked by caller 1728 * => entry is returned in "entry" 1729 * => return value is true if address is in the returned entry 1730 */ 1731 1732 bool 1733 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1734 struct vm_map_entry **entry /* OUT */) 1735 { 1736 struct vm_map_entry *cur; 1737 bool use_tree = false; 1738 UVMHIST_FUNC("uvm_map_lookup_entry"); 1739 UVMHIST_CALLED(maphist); 1740 1741 UVMHIST_LOG(maphist,"(map=0x%x,addr=0x%x,ent=0x%x)", 1742 map, address, entry, 0); 1743 1744 /* 1745 * start looking either from the head of the 1746 * list, or from the hint. 1747 */ 1748 1749 cur = map->hint; 1750 1751 if (cur == &map->header) 1752 cur = cur->next; 1753 1754 UVMMAP_EVCNT_INCR(mlk_call); 1755 if (address >= cur->start) { 1756 1757 /* 1758 * go from hint to end of list. 1759 * 1760 * but first, make a quick check to see if 1761 * we are already looking at the entry we 1762 * want (which is usually the case). 1763 * note also that we don't need to save the hint 1764 * here... it is the same hint (unless we are 1765 * at the header, in which case the hint didn't 1766 * buy us anything anyway). 1767 */ 1768 1769 if (cur != &map->header && cur->end > address) { 1770 UVMMAP_EVCNT_INCR(mlk_hint); 1771 *entry = cur; 1772 UVMHIST_LOG(maphist,"<- got it via hint (0x%x)", 1773 cur, 0, 0, 0); 1774 uvm_mapent_check(*entry); 1775 return (true); 1776 } 1777 1778 if (map->nentries > 15) 1779 use_tree = true; 1780 } else { 1781 1782 /* 1783 * invalid hint. use tree. 1784 */ 1785 use_tree = true; 1786 } 1787 1788 uvm_map_check(map, __func__); 1789 1790 if (use_tree) { 1791 /* 1792 * Simple lookup in the tree. Happens when the hint is 1793 * invalid, or nentries reach a threshold. 1794 */ 1795 UVMMAP_EVCNT_INCR(mlk_tree); 1796 if (uvm_map_lookup_entry_bytree(map, address, entry)) { 1797 goto got; 1798 } else { 1799 goto failed; 1800 } 1801 } 1802 1803 /* 1804 * search linearly 1805 */ 1806 1807 UVMMAP_EVCNT_INCR(mlk_list); 1808 while (cur != &map->header) { 1809 UVMMAP_EVCNT_INCR(mlk_listloop); 1810 if (cur->end > address) { 1811 if (address >= cur->start) { 1812 /* 1813 * save this lookup for future 1814 * hints, and return 1815 */ 1816 1817 *entry = cur; 1818 got: 1819 SAVE_HINT(map, map->hint, *entry); 1820 UVMHIST_LOG(maphist,"<- search got it (0x%x)", 1821 cur, 0, 0, 0); 1822 KDASSERT((*entry)->start <= address); 1823 KDASSERT(address < (*entry)->end); 1824 uvm_mapent_check(*entry); 1825 return (true); 1826 } 1827 break; 1828 } 1829 cur = cur->next; 1830 } 1831 *entry = cur->prev; 1832 failed: 1833 SAVE_HINT(map, map->hint, *entry); 1834 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); 1835 KDASSERT((*entry) == &map->header || (*entry)->end <= address); 1836 KDASSERT((*entry)->next == &map->header || 1837 address < (*entry)->next->start); 1838 return (false); 1839 } 1840 1841 /* 1842 * See if the range between start and start + length fits in the gap 1843 * entry->next->start and entry->end. Returns 1 if fits, 0 if doesn't 1844 * fit, and -1 address wraps around. 1845 */ 1846 static int 1847 uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset, 1848 vsize_t align, int topdown, struct vm_map_entry *entry) 1849 { 1850 vaddr_t end; 1851 1852 #ifdef PMAP_PREFER 1853 /* 1854 * push start address forward as needed to avoid VAC alias problems. 1855 * we only do this if a valid offset is specified. 1856 */ 1857 1858 if (uoffset != UVM_UNKNOWN_OFFSET) 1859 PMAP_PREFER(uoffset, start, length, topdown); 1860 #endif 1861 if (align != 0) { 1862 if ((*start & (align - 1)) != 0) { 1863 if (topdown) 1864 *start &= ~(align - 1); 1865 else 1866 *start = roundup(*start, align); 1867 } 1868 /* 1869 * XXX Should we PMAP_PREFER() here again? 1870 * eh...i think we're okay 1871 */ 1872 } 1873 1874 /* 1875 * Find the end of the proposed new region. Be sure we didn't 1876 * wrap around the address; if so, we lose. Otherwise, if the 1877 * proposed new region fits before the next entry, we win. 1878 */ 1879 1880 end = *start + length; 1881 if (end < *start) 1882 return (-1); 1883 1884 if (entry->next->start >= end && *start >= entry->end) 1885 return (1); 1886 1887 return (0); 1888 } 1889 1890 /* 1891 * uvm_map_findspace: find "length" sized space in "map". 1892 * 1893 * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is 1894 * set in "flags" (in which case we insist on using "hint"). 1895 * => "result" is VA returned 1896 * => uobj/uoffset are to be used to handle VAC alignment, if required 1897 * => if "align" is non-zero, we attempt to align to that value. 1898 * => caller must at least have read-locked map 1899 * => returns NULL on failure, or pointer to prev. map entry if success 1900 * => note this is a cross between the old vm_map_findspace and vm_map_find 1901 */ 1902 1903 struct vm_map_entry * 1904 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, 1905 vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset, 1906 vsize_t align, int flags) 1907 { 1908 struct vm_map_entry *entry; 1909 struct vm_map_entry *child, *prev, *tmp; 1910 vaddr_t orig_hint; 1911 const int topdown = map->flags & VM_MAP_TOPDOWN; 1912 UVMHIST_FUNC("uvm_map_findspace"); 1913 UVMHIST_CALLED(maphist); 1914 1915 UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)", 1916 map, hint, length, flags); 1917 KASSERT((align & (align - 1)) == 0); 1918 KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); 1919 1920 uvm_map_check(map, "map_findspace entry"); 1921 1922 /* 1923 * remember the original hint. if we are aligning, then we 1924 * may have to try again with no alignment constraint if 1925 * we fail the first time. 1926 */ 1927 1928 orig_hint = hint; 1929 if (hint < vm_map_min(map)) { /* check ranges ... */ 1930 if (flags & UVM_FLAG_FIXED) { 1931 UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0); 1932 return (NULL); 1933 } 1934 hint = vm_map_min(map); 1935 } 1936 if (hint > vm_map_max(map)) { 1937 UVMHIST_LOG(maphist,"<- VA 0x%x > range [0x%x->0x%x]", 1938 hint, vm_map_min(map), vm_map_max(map), 0); 1939 return (NULL); 1940 } 1941 1942 /* 1943 * Look for the first possible address; if there's already 1944 * something at this address, we have to start after it. 1945 */ 1946 1947 /* 1948 * @@@: there are four, no, eight cases to consider. 1949 * 1950 * 0: found, fixed, bottom up -> fail 1951 * 1: found, fixed, top down -> fail 1952 * 2: found, not fixed, bottom up -> start after entry->end, 1953 * loop up 1954 * 3: found, not fixed, top down -> start before entry->start, 1955 * loop down 1956 * 4: not found, fixed, bottom up -> check entry->next->start, fail 1957 * 5: not found, fixed, top down -> check entry->next->start, fail 1958 * 6: not found, not fixed, bottom up -> check entry->next->start, 1959 * loop up 1960 * 7: not found, not fixed, top down -> check entry->next->start, 1961 * loop down 1962 * 1963 * as you can see, it reduces to roughly five cases, and that 1964 * adding top down mapping only adds one unique case (without 1965 * it, there would be four cases). 1966 */ 1967 1968 if ((flags & UVM_FLAG_FIXED) == 0 && hint == vm_map_min(map)) { 1969 entry = map->first_free; 1970 } else { 1971 if (uvm_map_lookup_entry(map, hint, &entry)) { 1972 /* "hint" address already in use ... */ 1973 if (flags & UVM_FLAG_FIXED) { 1974 UVMHIST_LOG(maphist, "<- fixed & VA in use", 1975 0, 0, 0, 0); 1976 return (NULL); 1977 } 1978 if (topdown) 1979 /* Start from lower gap. */ 1980 entry = entry->prev; 1981 } else if (flags & UVM_FLAG_FIXED) { 1982 if (entry->next->start >= hint + length && 1983 hint + length > hint) 1984 goto found; 1985 1986 /* "hint" address is gap but too small */ 1987 UVMHIST_LOG(maphist, "<- fixed mapping failed", 1988 0, 0, 0, 0); 1989 return (NULL); /* only one shot at it ... */ 1990 } else { 1991 /* 1992 * See if given hint fits in this gap. 1993 */ 1994 switch (uvm_map_space_avail(&hint, length, 1995 uoffset, align, topdown, entry)) { 1996 case 1: 1997 goto found; 1998 case -1: 1999 goto wraparound; 2000 } 2001 2002 if (topdown) { 2003 /* 2004 * Still there is a chance to fit 2005 * if hint > entry->end. 2006 */ 2007 } else { 2008 /* Start from higher gap. */ 2009 entry = entry->next; 2010 if (entry == &map->header) 2011 goto notfound; 2012 goto nextgap; 2013 } 2014 } 2015 } 2016 2017 /* 2018 * Note that all UVM_FLAGS_FIXED case is already handled. 2019 */ 2020 KDASSERT((flags & UVM_FLAG_FIXED) == 0); 2021 2022 /* Try to find the space in the red-black tree */ 2023 2024 /* Check slot before any entry */ 2025 hint = topdown ? entry->next->start - length : entry->end; 2026 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2027 topdown, entry)) { 2028 case 1: 2029 goto found; 2030 case -1: 2031 goto wraparound; 2032 } 2033 2034 nextgap: 2035 KDASSERT((flags & UVM_FLAG_FIXED) == 0); 2036 /* If there is not enough space in the whole tree, we fail */ 2037 tmp = ROOT_ENTRY(map); 2038 if (tmp == NULL || tmp->maxgap < length) 2039 goto notfound; 2040 2041 prev = NULL; /* previous candidate */ 2042 2043 /* Find an entry close to hint that has enough space */ 2044 for (; tmp;) { 2045 KASSERT(tmp->next->start == tmp->end + tmp->gap); 2046 if (topdown) { 2047 if (tmp->next->start < hint + length && 2048 (prev == NULL || tmp->end > prev->end)) { 2049 if (tmp->gap >= length) 2050 prev = tmp; 2051 else if ((child = LEFT_ENTRY(tmp)) != NULL 2052 && child->maxgap >= length) 2053 prev = tmp; 2054 } 2055 } else { 2056 if (tmp->end >= hint && 2057 (prev == NULL || tmp->end < prev->end)) { 2058 if (tmp->gap >= length) 2059 prev = tmp; 2060 else if ((child = RIGHT_ENTRY(tmp)) != NULL 2061 && child->maxgap >= length) 2062 prev = tmp; 2063 } 2064 } 2065 if (tmp->next->start < hint + length) 2066 child = RIGHT_ENTRY(tmp); 2067 else if (tmp->end > hint) 2068 child = LEFT_ENTRY(tmp); 2069 else { 2070 if (tmp->gap >= length) 2071 break; 2072 if (topdown) 2073 child = LEFT_ENTRY(tmp); 2074 else 2075 child = RIGHT_ENTRY(tmp); 2076 } 2077 if (child == NULL || child->maxgap < length) 2078 break; 2079 tmp = child; 2080 } 2081 2082 if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) { 2083 /* 2084 * Check if the entry that we found satifies the 2085 * space requirement 2086 */ 2087 if (topdown) { 2088 if (hint > tmp->next->start - length) 2089 hint = tmp->next->start - length; 2090 } else { 2091 if (hint < tmp->end) 2092 hint = tmp->end; 2093 } 2094 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2095 topdown, tmp)) { 2096 case 1: 2097 entry = tmp; 2098 goto found; 2099 case -1: 2100 goto wraparound; 2101 } 2102 if (tmp->gap >= length) 2103 goto listsearch; 2104 } 2105 if (prev == NULL) 2106 goto notfound; 2107 2108 if (topdown) { 2109 KASSERT(orig_hint >= prev->next->start - length || 2110 prev->next->start - length > prev->next->start); 2111 hint = prev->next->start - length; 2112 } else { 2113 KASSERT(orig_hint <= prev->end); 2114 hint = prev->end; 2115 } 2116 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2117 topdown, prev)) { 2118 case 1: 2119 entry = prev; 2120 goto found; 2121 case -1: 2122 goto wraparound; 2123 } 2124 if (prev->gap >= length) 2125 goto listsearch; 2126 2127 if (topdown) 2128 tmp = LEFT_ENTRY(prev); 2129 else 2130 tmp = RIGHT_ENTRY(prev); 2131 for (;;) { 2132 KASSERT(tmp && tmp->maxgap >= length); 2133 if (topdown) 2134 child = RIGHT_ENTRY(tmp); 2135 else 2136 child = LEFT_ENTRY(tmp); 2137 if (child && child->maxgap >= length) { 2138 tmp = child; 2139 continue; 2140 } 2141 if (tmp->gap >= length) 2142 break; 2143 if (topdown) 2144 tmp = LEFT_ENTRY(tmp); 2145 else 2146 tmp = RIGHT_ENTRY(tmp); 2147 } 2148 2149 if (topdown) { 2150 KASSERT(orig_hint >= tmp->next->start - length || 2151 tmp->next->start - length > tmp->next->start); 2152 hint = tmp->next->start - length; 2153 } else { 2154 KASSERT(orig_hint <= tmp->end); 2155 hint = tmp->end; 2156 } 2157 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2158 topdown, tmp)) { 2159 case 1: 2160 entry = tmp; 2161 goto found; 2162 case -1: 2163 goto wraparound; 2164 } 2165 2166 /* 2167 * The tree fails to find an entry because of offset or alignment 2168 * restrictions. Search the list instead. 2169 */ 2170 listsearch: 2171 /* 2172 * Look through the rest of the map, trying to fit a new region in 2173 * the gap between existing regions, or after the very last region. 2174 * note: entry->end = base VA of current gap, 2175 * entry->next->start = VA of end of current gap 2176 */ 2177 2178 for (;;) { 2179 /* Update hint for current gap. */ 2180 hint = topdown ? entry->next->start - length : entry->end; 2181 2182 /* See if it fits. */ 2183 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2184 topdown, entry)) { 2185 case 1: 2186 goto found; 2187 case -1: 2188 goto wraparound; 2189 } 2190 2191 /* Advance to next/previous gap */ 2192 if (topdown) { 2193 if (entry == &map->header) { 2194 UVMHIST_LOG(maphist, "<- failed (off start)", 2195 0,0,0,0); 2196 goto notfound; 2197 } 2198 entry = entry->prev; 2199 } else { 2200 entry = entry->next; 2201 if (entry == &map->header) { 2202 UVMHIST_LOG(maphist, "<- failed (off end)", 2203 0,0,0,0); 2204 goto notfound; 2205 } 2206 } 2207 } 2208 2209 found: 2210 SAVE_HINT(map, map->hint, entry); 2211 *result = hint; 2212 UVMHIST_LOG(maphist,"<- got it! (result=0x%x)", hint, 0,0,0); 2213 KASSERT( topdown || hint >= orig_hint); 2214 KASSERT(!topdown || hint <= orig_hint); 2215 KASSERT(entry->end <= hint); 2216 KASSERT(hint + length <= entry->next->start); 2217 return (entry); 2218 2219 wraparound: 2220 UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0); 2221 2222 return (NULL); 2223 2224 notfound: 2225 UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0); 2226 2227 return (NULL); 2228 } 2229 2230 /* 2231 * U N M A P - m a i n h e l p e r f u n c t i o n s 2232 */ 2233 2234 /* 2235 * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") 2236 * 2237 * => caller must check alignment and size 2238 * => map must be locked by caller 2239 * => we return a list of map entries that we've remove from the map 2240 * in "entry_list" 2241 */ 2242 2243 void 2244 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2245 struct vm_map_entry **entry_list /* OUT */, 2246 struct uvm_mapent_reservation *umr, int flags) 2247 { 2248 struct vm_map_entry *entry, *first_entry, *next; 2249 vaddr_t len; 2250 UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist); 2251 2252 UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)", 2253 map, start, end, 0); 2254 VM_MAP_RANGE_CHECK(map, start, end); 2255 2256 uvm_map_check(map, "unmap_remove entry"); 2257 2258 /* 2259 * find first entry 2260 */ 2261 2262 if (uvm_map_lookup_entry(map, start, &first_entry) == true) { 2263 /* clip and go... */ 2264 entry = first_entry; 2265 UVM_MAP_CLIP_START(map, entry, start, umr); 2266 /* critical! prevents stale hint */ 2267 SAVE_HINT(map, entry, entry->prev); 2268 } else { 2269 entry = first_entry->next; 2270 } 2271 2272 /* 2273 * Save the free space hint 2274 */ 2275 2276 if (map->first_free != &map->header && map->first_free->start >= start) 2277 map->first_free = entry->prev; 2278 2279 /* 2280 * note: we now re-use first_entry for a different task. we remove 2281 * a number of map entries from the map and save them in a linked 2282 * list headed by "first_entry". once we remove them from the map 2283 * the caller should unlock the map and drop the references to the 2284 * backing objects [c.f. uvm_unmap_detach]. the object is to 2285 * separate unmapping from reference dropping. why? 2286 * [1] the map has to be locked for unmapping 2287 * [2] the map need not be locked for reference dropping 2288 * [3] dropping references may trigger pager I/O, and if we hit 2289 * a pager that does synchronous I/O we may have to wait for it. 2290 * [4] we would like all waiting for I/O to occur with maps unlocked 2291 * so that we don't block other threads. 2292 */ 2293 2294 first_entry = NULL; 2295 *entry_list = NULL; 2296 2297 /* 2298 * break up the area into map entry sized regions and unmap. note 2299 * that all mappings have to be removed before we can even consider 2300 * dropping references to amaps or VM objects (otherwise we could end 2301 * up with a mapping to a page on the free list which would be very bad) 2302 */ 2303 2304 while ((entry != &map->header) && (entry->start < end)) { 2305 KASSERT((entry->flags & UVM_MAP_FIRST) == 0); 2306 2307 UVM_MAP_CLIP_END(map, entry, end, umr); 2308 next = entry->next; 2309 len = entry->end - entry->start; 2310 2311 /* 2312 * unwire before removing addresses from the pmap; otherwise 2313 * unwiring will put the entries back into the pmap (XXX). 2314 */ 2315 2316 if (VM_MAPENT_ISWIRED(entry)) { 2317 uvm_map_entry_unwire(map, entry); 2318 } 2319 if (flags & UVM_FLAG_VAONLY) { 2320 2321 /* nothing */ 2322 2323 } else if ((map->flags & VM_MAP_PAGEABLE) == 0) { 2324 2325 /* 2326 * if the map is non-pageable, any pages mapped there 2327 * must be wired and entered with pmap_kenter_pa(), 2328 * and we should free any such pages immediately. 2329 * this is mostly used for kmem_map and mb_map. 2330 */ 2331 2332 if ((entry->flags & UVM_MAP_KMAPENT) == 0) { 2333 uvm_km_pgremove_intrsafe(map, entry->start, 2334 entry->end); 2335 pmap_kremove(entry->start, len); 2336 } 2337 } else if (UVM_ET_ISOBJ(entry) && 2338 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2339 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2340 2341 /* 2342 * note: kernel object mappings are currently used in 2343 * two ways: 2344 * [1] "normal" mappings of pages in the kernel object 2345 * [2] uvm_km_valloc'd allocations in which we 2346 * pmap_enter in some non-kernel-object page 2347 * (e.g. vmapbuf). 2348 * 2349 * for case [1], we need to remove the mapping from 2350 * the pmap and then remove the page from the kernel 2351 * object (because, once pages in a kernel object are 2352 * unmapped they are no longer needed, unlike, say, 2353 * a vnode where you might want the data to persist 2354 * until flushed out of a queue). 2355 * 2356 * for case [2], we need to remove the mapping from 2357 * the pmap. there shouldn't be any pages at the 2358 * specified offset in the kernel object [but it 2359 * doesn't hurt to call uvm_km_pgremove just to be 2360 * safe?] 2361 * 2362 * uvm_km_pgremove currently does the following: 2363 * for pages in the kernel object in range: 2364 * - drops the swap slot 2365 * - uvm_pagefree the page 2366 */ 2367 2368 /* 2369 * remove mappings from pmap and drop the pages 2370 * from the object. offsets are always relative 2371 * to vm_map_min(kernel_map). 2372 */ 2373 2374 pmap_remove(pmap_kernel(), entry->start, 2375 entry->start + len); 2376 uvm_km_pgremove(entry->start, entry->end); 2377 2378 /* 2379 * null out kernel_object reference, we've just 2380 * dropped it 2381 */ 2382 2383 entry->etype &= ~UVM_ET_OBJ; 2384 entry->object.uvm_obj = NULL; 2385 } else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) { 2386 2387 /* 2388 * remove mappings the standard way. 2389 */ 2390 2391 pmap_remove(map->pmap, entry->start, entry->end); 2392 } 2393 2394 #if defined(DEBUG) 2395 if ((entry->flags & UVM_MAP_KMAPENT) == 0) { 2396 2397 /* 2398 * check if there's remaining mapping, 2399 * which is a bug in caller. 2400 */ 2401 2402 vaddr_t va; 2403 for (va = entry->start; va < entry->end; 2404 va += PAGE_SIZE) { 2405 if (pmap_extract(vm_map_pmap(map), va, NULL)) { 2406 panic("uvm_unmap_remove: has mapping"); 2407 } 2408 } 2409 2410 if (VM_MAP_IS_KERNEL(map)) { 2411 uvm_km_check_empty(map, entry->start, 2412 entry->end); 2413 } 2414 } 2415 #endif /* defined(DEBUG) */ 2416 2417 /* 2418 * remove entry from map and put it on our list of entries 2419 * that we've nuked. then go to next entry. 2420 */ 2421 2422 UVMHIST_LOG(maphist, " removed map entry 0x%x", entry, 0, 0,0); 2423 2424 /* critical! prevents stale hint */ 2425 SAVE_HINT(map, entry, entry->prev); 2426 2427 uvm_map_entry_unlink(map, entry); 2428 KASSERT(map->size >= len); 2429 map->size -= len; 2430 entry->prev = NULL; 2431 entry->next = first_entry; 2432 first_entry = entry; 2433 entry = next; 2434 } 2435 if ((map->flags & VM_MAP_DYING) == 0) { 2436 pmap_update(vm_map_pmap(map)); 2437 } 2438 2439 uvm_map_check(map, "unmap_remove leave"); 2440 2441 /* 2442 * now we've cleaned up the map and are ready for the caller to drop 2443 * references to the mapped objects. 2444 */ 2445 2446 *entry_list = first_entry; 2447 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 2448 2449 if (map->flags & VM_MAP_WANTVA) { 2450 mutex_enter(&map->misc_lock); 2451 map->flags &= ~VM_MAP_WANTVA; 2452 cv_broadcast(&map->cv); 2453 mutex_exit(&map->misc_lock); 2454 } 2455 } 2456 2457 /* 2458 * uvm_unmap_detach: drop references in a chain of map entries 2459 * 2460 * => we will free the map entries as we traverse the list. 2461 */ 2462 2463 void 2464 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) 2465 { 2466 struct vm_map_entry *next_entry; 2467 UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist); 2468 2469 while (first_entry) { 2470 KASSERT(!VM_MAPENT_ISWIRED(first_entry)); 2471 UVMHIST_LOG(maphist, 2472 " detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d", 2473 first_entry, first_entry->aref.ar_amap, 2474 first_entry->object.uvm_obj, 2475 UVM_ET_ISSUBMAP(first_entry)); 2476 2477 /* 2478 * drop reference to amap, if we've got one 2479 */ 2480 2481 if (first_entry->aref.ar_amap) 2482 uvm_map_unreference_amap(first_entry, flags); 2483 2484 /* 2485 * drop reference to our backing object, if we've got one 2486 */ 2487 2488 KASSERT(!UVM_ET_ISSUBMAP(first_entry)); 2489 if (UVM_ET_ISOBJ(first_entry) && 2490 first_entry->object.uvm_obj->pgops->pgo_detach) { 2491 (*first_entry->object.uvm_obj->pgops->pgo_detach) 2492 (first_entry->object.uvm_obj); 2493 } 2494 next_entry = first_entry->next; 2495 uvm_mapent_free(first_entry); 2496 first_entry = next_entry; 2497 } 2498 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 2499 } 2500 2501 /* 2502 * E X T R A C T I O N F U N C T I O N S 2503 */ 2504 2505 /* 2506 * uvm_map_reserve: reserve space in a vm_map for future use. 2507 * 2508 * => we reserve space in a map by putting a dummy map entry in the 2509 * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) 2510 * => map should be unlocked (we will write lock it) 2511 * => we return true if we were able to reserve space 2512 * => XXXCDC: should be inline? 2513 */ 2514 2515 int 2516 uvm_map_reserve(struct vm_map *map, vsize_t size, 2517 vaddr_t offset /* hint for pmap_prefer */, 2518 vsize_t align /* alignment */, 2519 vaddr_t *raddr /* IN:hint, OUT: reserved VA */, 2520 uvm_flag_t flags /* UVM_FLAG_FIXED or 0 */) 2521 { 2522 UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); 2523 2524 UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)", 2525 map,size,offset,raddr); 2526 2527 size = round_page(size); 2528 2529 /* 2530 * reserve some virtual space. 2531 */ 2532 2533 if (uvm_map(map, raddr, size, NULL, offset, align, 2534 UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, 2535 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE|flags)) != 0) { 2536 UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); 2537 return (false); 2538 } 2539 2540 UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0); 2541 return (true); 2542 } 2543 2544 /* 2545 * uvm_map_replace: replace a reserved (blank) area of memory with 2546 * real mappings. 2547 * 2548 * => caller must WRITE-LOCK the map 2549 * => we return true if replacement was a success 2550 * => we expect the newents chain to have nnewents entrys on it and 2551 * we expect newents->prev to point to the last entry on the list 2552 * => note newents is allowed to be NULL 2553 */ 2554 2555 int 2556 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, 2557 struct vm_map_entry *newents, int nnewents, struct vm_map_entry **oldentryp) 2558 { 2559 struct vm_map_entry *oldent, *last; 2560 2561 uvm_map_check(map, "map_replace entry"); 2562 2563 /* 2564 * first find the blank map entry at the specified address 2565 */ 2566 2567 if (!uvm_map_lookup_entry(map, start, &oldent)) { 2568 return (false); 2569 } 2570 2571 /* 2572 * check to make sure we have a proper blank entry 2573 */ 2574 2575 if (end < oldent->end && !VM_MAP_USE_KMAPENT(map)) { 2576 UVM_MAP_CLIP_END(map, oldent, end, NULL); 2577 } 2578 if (oldent->start != start || oldent->end != end || 2579 oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { 2580 return (false); 2581 } 2582 2583 #ifdef DIAGNOSTIC 2584 2585 /* 2586 * sanity check the newents chain 2587 */ 2588 2589 { 2590 struct vm_map_entry *tmpent = newents; 2591 int nent = 0; 2592 vaddr_t cur = start; 2593 2594 while (tmpent) { 2595 nent++; 2596 if (tmpent->start < cur) 2597 panic("uvm_map_replace1"); 2598 if (tmpent->start > tmpent->end || tmpent->end > end) { 2599 printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n", 2600 tmpent->start, tmpent->end, end); 2601 panic("uvm_map_replace2"); 2602 } 2603 cur = tmpent->end; 2604 if (tmpent->next) { 2605 if (tmpent->next->prev != tmpent) 2606 panic("uvm_map_replace3"); 2607 } else { 2608 if (newents->prev != tmpent) 2609 panic("uvm_map_replace4"); 2610 } 2611 tmpent = tmpent->next; 2612 } 2613 if (nent != nnewents) 2614 panic("uvm_map_replace5"); 2615 } 2616 #endif 2617 2618 /* 2619 * map entry is a valid blank! replace it. (this does all the 2620 * work of map entry link/unlink...). 2621 */ 2622 2623 if (newents) { 2624 last = newents->prev; 2625 2626 /* critical: flush stale hints out of map */ 2627 SAVE_HINT(map, map->hint, newents); 2628 if (map->first_free == oldent) 2629 map->first_free = last; 2630 2631 last->next = oldent->next; 2632 last->next->prev = last; 2633 2634 /* Fix RB tree */ 2635 uvm_rb_remove(map, oldent); 2636 2637 newents->prev = oldent->prev; 2638 newents->prev->next = newents; 2639 map->nentries = map->nentries + (nnewents - 1); 2640 2641 /* Fixup the RB tree */ 2642 { 2643 int i; 2644 struct vm_map_entry *tmp; 2645 2646 tmp = newents; 2647 for (i = 0; i < nnewents && tmp; i++) { 2648 uvm_rb_insert(map, tmp); 2649 tmp = tmp->next; 2650 } 2651 } 2652 } else { 2653 /* NULL list of new entries: just remove the old one */ 2654 clear_hints(map, oldent); 2655 uvm_map_entry_unlink(map, oldent); 2656 } 2657 2658 uvm_map_check(map, "map_replace leave"); 2659 2660 /* 2661 * now we can free the old blank entry and return. 2662 */ 2663 2664 *oldentryp = oldent; 2665 return (true); 2666 } 2667 2668 /* 2669 * uvm_map_extract: extract a mapping from a map and put it somewhere 2670 * (maybe removing the old mapping) 2671 * 2672 * => maps should be unlocked (we will write lock them) 2673 * => returns 0 on success, error code otherwise 2674 * => start must be page aligned 2675 * => len must be page sized 2676 * => flags: 2677 * UVM_EXTRACT_REMOVE: remove mappings from srcmap 2678 * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) 2679 * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs 2680 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 2681 * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< 2682 * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only 2683 * be used from within the kernel in a kernel level map <<< 2684 */ 2685 2686 int 2687 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 2688 struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) 2689 { 2690 vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge; 2691 struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry, 2692 *deadentry, *oldentry; 2693 struct vm_map_entry *resentry = NULL; /* a dummy reservation entry */ 2694 vsize_t elen; 2695 int nchain, error, copy_ok; 2696 UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist); 2697 2698 UVMHIST_LOG(maphist,"(srcmap=0x%x,start=0x%x, len=0x%x", srcmap, start, 2699 len,0); 2700 UVMHIST_LOG(maphist," ...,dstmap=0x%x, flags=0x%x)", dstmap,flags,0,0); 2701 2702 uvm_map_check(srcmap, "map_extract src enter"); 2703 uvm_map_check(dstmap, "map_extract dst enter"); 2704 2705 /* 2706 * step 0: sanity check: start must be on a page boundary, length 2707 * must be page sized. can't ask for CONTIG/QREF if you asked for 2708 * REMOVE. 2709 */ 2710 2711 KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0); 2712 KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || 2713 (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); 2714 2715 /* 2716 * step 1: reserve space in the target map for the extracted area 2717 */ 2718 2719 if ((flags & UVM_EXTRACT_RESERVED) == 0) { 2720 dstaddr = vm_map_min(dstmap); 2721 if (!uvm_map_reserve(dstmap, len, start, 0, &dstaddr, 0)) 2722 return (ENOMEM); 2723 *dstaddrp = dstaddr; /* pass address back to caller */ 2724 UVMHIST_LOG(maphist, " dstaddr=0x%x", dstaddr,0,0,0); 2725 } else { 2726 dstaddr = *dstaddrp; 2727 } 2728 2729 /* 2730 * step 2: setup for the extraction process loop by init'ing the 2731 * map entry chain, locking src map, and looking up the first useful 2732 * entry in the map. 2733 */ 2734 2735 end = start + len; 2736 newend = dstaddr + len; 2737 chain = endchain = NULL; 2738 nchain = 0; 2739 vm_map_lock(srcmap); 2740 2741 if (uvm_map_lookup_entry(srcmap, start, &entry)) { 2742 2743 /* "start" is within an entry */ 2744 if (flags & UVM_EXTRACT_QREF) { 2745 2746 /* 2747 * for quick references we don't clip the entry, so 2748 * the entry may map space "before" the starting 2749 * virtual address... this is the "fudge" factor 2750 * (which can be non-zero only the first time 2751 * through the "while" loop in step 3). 2752 */ 2753 2754 fudge = start - entry->start; 2755 } else { 2756 2757 /* 2758 * normal reference: we clip the map to fit (thus 2759 * fudge is zero) 2760 */ 2761 2762 UVM_MAP_CLIP_START(srcmap, entry, start, NULL); 2763 SAVE_HINT(srcmap, srcmap->hint, entry->prev); 2764 fudge = 0; 2765 } 2766 } else { 2767 2768 /* "start" is not within an entry ... skip to next entry */ 2769 if (flags & UVM_EXTRACT_CONTIG) { 2770 error = EINVAL; 2771 goto bad; /* definite hole here ... */ 2772 } 2773 2774 entry = entry->next; 2775 fudge = 0; 2776 } 2777 2778 /* save values from srcmap for step 6 */ 2779 orig_entry = entry; 2780 orig_fudge = fudge; 2781 2782 /* 2783 * step 3: now start looping through the map entries, extracting 2784 * as we go. 2785 */ 2786 2787 while (entry->start < end && entry != &srcmap->header) { 2788 2789 /* if we are not doing a quick reference, clip it */ 2790 if ((flags & UVM_EXTRACT_QREF) == 0) 2791 UVM_MAP_CLIP_END(srcmap, entry, end, NULL); 2792 2793 /* clear needs_copy (allow chunking) */ 2794 if (UVM_ET_ISNEEDSCOPY(entry)) { 2795 amap_copy(srcmap, entry, 2796 AMAP_COPY_NOWAIT|AMAP_COPY_NOMERGE, start, end); 2797 if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ 2798 error = ENOMEM; 2799 goto bad; 2800 } 2801 2802 /* amap_copy could clip (during chunk)! update fudge */ 2803 if (fudge) { 2804 fudge = start - entry->start; 2805 orig_fudge = fudge; 2806 } 2807 } 2808 2809 /* calculate the offset of this from "start" */ 2810 oldoffset = (entry->start + fudge) - start; 2811 2812 /* allocate a new map entry */ 2813 newentry = uvm_mapent_alloc(dstmap, 0); 2814 if (newentry == NULL) { 2815 error = ENOMEM; 2816 goto bad; 2817 } 2818 2819 /* set up new map entry */ 2820 newentry->next = NULL; 2821 newentry->prev = endchain; 2822 newentry->start = dstaddr + oldoffset; 2823 newentry->end = 2824 newentry->start + (entry->end - (entry->start + fudge)); 2825 if (newentry->end > newend || newentry->end < newentry->start) 2826 newentry->end = newend; 2827 newentry->object.uvm_obj = entry->object.uvm_obj; 2828 if (newentry->object.uvm_obj) { 2829 if (newentry->object.uvm_obj->pgops->pgo_reference) 2830 newentry->object.uvm_obj->pgops-> 2831 pgo_reference(newentry->object.uvm_obj); 2832 newentry->offset = entry->offset + fudge; 2833 } else { 2834 newentry->offset = 0; 2835 } 2836 newentry->etype = entry->etype; 2837 newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? 2838 entry->max_protection : entry->protection; 2839 newentry->max_protection = entry->max_protection; 2840 newentry->inheritance = entry->inheritance; 2841 newentry->wired_count = 0; 2842 newentry->aref.ar_amap = entry->aref.ar_amap; 2843 if (newentry->aref.ar_amap) { 2844 newentry->aref.ar_pageoff = 2845 entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); 2846 uvm_map_reference_amap(newentry, AMAP_SHARED | 2847 ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); 2848 } else { 2849 newentry->aref.ar_pageoff = 0; 2850 } 2851 newentry->advice = entry->advice; 2852 if ((flags & UVM_EXTRACT_QREF) != 0) { 2853 newentry->flags |= UVM_MAP_NOMERGE; 2854 } 2855 2856 /* now link it on the chain */ 2857 nchain++; 2858 if (endchain == NULL) { 2859 chain = endchain = newentry; 2860 } else { 2861 endchain->next = newentry; 2862 endchain = newentry; 2863 } 2864 2865 /* end of 'while' loop! */ 2866 if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && 2867 (entry->next == &srcmap->header || 2868 entry->next->start != entry->end)) { 2869 error = EINVAL; 2870 goto bad; 2871 } 2872 entry = entry->next; 2873 fudge = 0; 2874 } 2875 2876 /* 2877 * step 4: close off chain (in format expected by uvm_map_replace) 2878 */ 2879 2880 if (chain) 2881 chain->prev = endchain; 2882 2883 /* 2884 * step 5: attempt to lock the dest map so we can pmap_copy. 2885 * note usage of copy_ok: 2886 * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) 2887 * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 2888 */ 2889 2890 if (srcmap == dstmap || vm_map_lock_try(dstmap) == true) { 2891 copy_ok = 1; 2892 if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2893 nchain, &resentry)) { 2894 if (srcmap != dstmap) 2895 vm_map_unlock(dstmap); 2896 error = EIO; 2897 goto bad; 2898 } 2899 } else { 2900 copy_ok = 0; 2901 /* replace defered until step 7 */ 2902 } 2903 2904 /* 2905 * step 6: traverse the srcmap a second time to do the following: 2906 * - if we got a lock on the dstmap do pmap_copy 2907 * - if UVM_EXTRACT_REMOVE remove the entries 2908 * we make use of orig_entry and orig_fudge (saved in step 2) 2909 */ 2910 2911 if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { 2912 2913 /* purge possible stale hints from srcmap */ 2914 if (flags & UVM_EXTRACT_REMOVE) { 2915 SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); 2916 if (srcmap->first_free != &srcmap->header && 2917 srcmap->first_free->start >= start) 2918 srcmap->first_free = orig_entry->prev; 2919 } 2920 2921 entry = orig_entry; 2922 fudge = orig_fudge; 2923 deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ 2924 2925 while (entry->start < end && entry != &srcmap->header) { 2926 if (copy_ok) { 2927 oldoffset = (entry->start + fudge) - start; 2928 elen = MIN(end, entry->end) - 2929 (entry->start + fudge); 2930 pmap_copy(dstmap->pmap, srcmap->pmap, 2931 dstaddr + oldoffset, elen, 2932 entry->start + fudge); 2933 } 2934 2935 /* we advance "entry" in the following if statement */ 2936 if (flags & UVM_EXTRACT_REMOVE) { 2937 pmap_remove(srcmap->pmap, entry->start, 2938 entry->end); 2939 oldentry = entry; /* save entry */ 2940 entry = entry->next; /* advance */ 2941 uvm_map_entry_unlink(srcmap, oldentry); 2942 /* add to dead list */ 2943 oldentry->next = deadentry; 2944 deadentry = oldentry; 2945 } else { 2946 entry = entry->next; /* advance */ 2947 } 2948 2949 /* end of 'while' loop */ 2950 fudge = 0; 2951 } 2952 pmap_update(srcmap->pmap); 2953 2954 /* 2955 * unlock dstmap. we will dispose of deadentry in 2956 * step 7 if needed 2957 */ 2958 2959 if (copy_ok && srcmap != dstmap) 2960 vm_map_unlock(dstmap); 2961 2962 } else { 2963 deadentry = NULL; 2964 } 2965 2966 /* 2967 * step 7: we are done with the source map, unlock. if copy_ok 2968 * is 0 then we have not replaced the dummy mapping in dstmap yet 2969 * and we need to do so now. 2970 */ 2971 2972 vm_map_unlock(srcmap); 2973 if ((flags & UVM_EXTRACT_REMOVE) && deadentry) 2974 uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ 2975 2976 /* now do the replacement if we didn't do it in step 5 */ 2977 if (copy_ok == 0) { 2978 vm_map_lock(dstmap); 2979 error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2980 nchain, &resentry); 2981 vm_map_unlock(dstmap); 2982 2983 if (error == false) { 2984 error = EIO; 2985 goto bad2; 2986 } 2987 } 2988 2989 if (resentry != NULL) 2990 uvm_mapent_free(resentry); 2991 2992 uvm_map_check(srcmap, "map_extract src leave"); 2993 uvm_map_check(dstmap, "map_extract dst leave"); 2994 2995 return (0); 2996 2997 /* 2998 * bad: failure recovery 2999 */ 3000 bad: 3001 vm_map_unlock(srcmap); 3002 bad2: /* src already unlocked */ 3003 if (chain) 3004 uvm_unmap_detach(chain, 3005 (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); 3006 3007 if (resentry != NULL) 3008 uvm_mapent_free(resentry); 3009 3010 uvm_map_check(srcmap, "map_extract src err leave"); 3011 uvm_map_check(dstmap, "map_extract dst err leave"); 3012 3013 if ((flags & UVM_EXTRACT_RESERVED) == 0) { 3014 uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ 3015 } 3016 return (error); 3017 } 3018 3019 /* end of extraction functions */ 3020 3021 /* 3022 * uvm_map_submap: punch down part of a map into a submap 3023 * 3024 * => only the kernel_map is allowed to be submapped 3025 * => the purpose of submapping is to break up the locking granularity 3026 * of a larger map 3027 * => the range specified must have been mapped previously with a uvm_map() 3028 * call [with uobj==NULL] to create a blank map entry in the main map. 3029 * [And it had better still be blank!] 3030 * => maps which contain submaps should never be copied or forked. 3031 * => to remove a submap, use uvm_unmap() on the main map 3032 * and then uvm_map_deallocate() the submap. 3033 * => main map must be unlocked. 3034 * => submap must have been init'd and have a zero reference count. 3035 * [need not be locked as we don't actually reference it] 3036 */ 3037 3038 int 3039 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 3040 struct vm_map *submap) 3041 { 3042 struct vm_map_entry *entry; 3043 struct uvm_mapent_reservation umr; 3044 int error; 3045 3046 uvm_mapent_reserve(map, &umr, 2, 0); 3047 3048 vm_map_lock(map); 3049 VM_MAP_RANGE_CHECK(map, start, end); 3050 3051 if (uvm_map_lookup_entry(map, start, &entry)) { 3052 UVM_MAP_CLIP_START(map, entry, start, &umr); 3053 UVM_MAP_CLIP_END(map, entry, end, &umr); /* to be safe */ 3054 } else { 3055 entry = NULL; 3056 } 3057 3058 if (entry != NULL && 3059 entry->start == start && entry->end == end && 3060 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 3061 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 3062 entry->etype |= UVM_ET_SUBMAP; 3063 entry->object.sub_map = submap; 3064 entry->offset = 0; 3065 uvm_map_reference(submap); 3066 error = 0; 3067 } else { 3068 error = EINVAL; 3069 } 3070 vm_map_unlock(map); 3071 3072 uvm_mapent_unreserve(map, &umr); 3073 3074 return error; 3075 } 3076 3077 /* 3078 * uvm_map_setup_kernel: init in-kernel map 3079 * 3080 * => map must not be in service yet. 3081 */ 3082 3083 void 3084 uvm_map_setup_kernel(struct vm_map_kernel *map, 3085 vaddr_t vmin, vaddr_t vmax, int flags) 3086 { 3087 3088 uvm_map_setup(&map->vmk_map, vmin, vmax, flags); 3089 callback_head_init(&map->vmk_reclaim_callback, IPL_VM); 3090 LIST_INIT(&map->vmk_kentry_free); 3091 map->vmk_merged_entries = NULL; 3092 } 3093 3094 3095 /* 3096 * uvm_map_protect: change map protection 3097 * 3098 * => set_max means set max_protection. 3099 * => map must be unlocked. 3100 */ 3101 3102 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 3103 ~VM_PROT_WRITE : VM_PROT_ALL) 3104 3105 int 3106 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3107 vm_prot_t new_prot, bool set_max) 3108 { 3109 struct vm_map_entry *current, *entry; 3110 int error = 0; 3111 UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist); 3112 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)", 3113 map, start, end, new_prot); 3114 3115 vm_map_lock(map); 3116 VM_MAP_RANGE_CHECK(map, start, end); 3117 if (uvm_map_lookup_entry(map, start, &entry)) { 3118 UVM_MAP_CLIP_START(map, entry, start, NULL); 3119 } else { 3120 entry = entry->next; 3121 } 3122 3123 /* 3124 * make a first pass to check for protection violations. 3125 */ 3126 3127 current = entry; 3128 while ((current != &map->header) && (current->start < end)) { 3129 if (UVM_ET_ISSUBMAP(current)) { 3130 error = EINVAL; 3131 goto out; 3132 } 3133 if ((new_prot & current->max_protection) != new_prot) { 3134 error = EACCES; 3135 goto out; 3136 } 3137 /* 3138 * Don't allow VM_PROT_EXECUTE to be set on entries that 3139 * point to vnodes that are associated with a NOEXEC file 3140 * system. 3141 */ 3142 if (UVM_ET_ISOBJ(current) && 3143 UVM_OBJ_IS_VNODE(current->object.uvm_obj)) { 3144 struct vnode *vp = 3145 (struct vnode *) current->object.uvm_obj; 3146 3147 if ((new_prot & VM_PROT_EXECUTE) != 0 && 3148 (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) { 3149 error = EACCES; 3150 goto out; 3151 } 3152 } 3153 3154 current = current->next; 3155 } 3156 3157 /* go back and fix up protections (no need to clip this time). */ 3158 3159 current = entry; 3160 while ((current != &map->header) && (current->start < end)) { 3161 vm_prot_t old_prot; 3162 3163 UVM_MAP_CLIP_END(map, current, end, NULL); 3164 old_prot = current->protection; 3165 if (set_max) 3166 current->protection = 3167 (current->max_protection = new_prot) & old_prot; 3168 else 3169 current->protection = new_prot; 3170 3171 /* 3172 * update physical map if necessary. worry about copy-on-write 3173 * here -- CHECK THIS XXX 3174 */ 3175 3176 if (current->protection != old_prot) { 3177 /* update pmap! */ 3178 pmap_protect(map->pmap, current->start, current->end, 3179 current->protection & MASK(entry)); 3180 3181 /* 3182 * If this entry points at a vnode, and the 3183 * protection includes VM_PROT_EXECUTE, mark 3184 * the vnode as VEXECMAP. 3185 */ 3186 if (UVM_ET_ISOBJ(current)) { 3187 struct uvm_object *uobj = 3188 current->object.uvm_obj; 3189 3190 if (UVM_OBJ_IS_VNODE(uobj) && 3191 (current->protection & VM_PROT_EXECUTE)) { 3192 vn_markexec((struct vnode *) uobj); 3193 } 3194 } 3195 } 3196 3197 /* 3198 * If the map is configured to lock any future mappings, 3199 * wire this entry now if the old protection was VM_PROT_NONE 3200 * and the new protection is not VM_PROT_NONE. 3201 */ 3202 3203 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3204 VM_MAPENT_ISWIRED(entry) == 0 && 3205 old_prot == VM_PROT_NONE && 3206 new_prot != VM_PROT_NONE) { 3207 if (uvm_map_pageable(map, entry->start, 3208 entry->end, false, 3209 UVM_LK_ENTER|UVM_LK_EXIT) != 0) { 3210 3211 /* 3212 * If locking the entry fails, remember the 3213 * error if it's the first one. Note we 3214 * still continue setting the protection in 3215 * the map, but will return the error 3216 * condition regardless. 3217 * 3218 * XXX Ignore what the actual error is, 3219 * XXX just call it a resource shortage 3220 * XXX so that it doesn't get confused 3221 * XXX what uvm_map_protect() itself would 3222 * XXX normally return. 3223 */ 3224 3225 error = ENOMEM; 3226 } 3227 } 3228 current = current->next; 3229 } 3230 pmap_update(map->pmap); 3231 3232 out: 3233 vm_map_unlock(map); 3234 3235 UVMHIST_LOG(maphist, "<- done, error=%d",error,0,0,0); 3236 return error; 3237 } 3238 3239 #undef MASK 3240 3241 /* 3242 * uvm_map_inherit: set inheritance code for range of addrs in map. 3243 * 3244 * => map must be unlocked 3245 * => note that the inherit code is used during a "fork". see fork 3246 * code for details. 3247 */ 3248 3249 int 3250 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 3251 vm_inherit_t new_inheritance) 3252 { 3253 struct vm_map_entry *entry, *temp_entry; 3254 UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist); 3255 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)", 3256 map, start, end, new_inheritance); 3257 3258 switch (new_inheritance) { 3259 case MAP_INHERIT_NONE: 3260 case MAP_INHERIT_COPY: 3261 case MAP_INHERIT_SHARE: 3262 break; 3263 default: 3264 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3265 return EINVAL; 3266 } 3267 3268 vm_map_lock(map); 3269 VM_MAP_RANGE_CHECK(map, start, end); 3270 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 3271 entry = temp_entry; 3272 UVM_MAP_CLIP_START(map, entry, start, NULL); 3273 } else { 3274 entry = temp_entry->next; 3275 } 3276 while ((entry != &map->header) && (entry->start < end)) { 3277 UVM_MAP_CLIP_END(map, entry, end, NULL); 3278 entry->inheritance = new_inheritance; 3279 entry = entry->next; 3280 } 3281 vm_map_unlock(map); 3282 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3283 return 0; 3284 } 3285 3286 /* 3287 * uvm_map_advice: set advice code for range of addrs in map. 3288 * 3289 * => map must be unlocked 3290 */ 3291 3292 int 3293 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 3294 { 3295 struct vm_map_entry *entry, *temp_entry; 3296 UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist); 3297 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)", 3298 map, start, end, new_advice); 3299 3300 vm_map_lock(map); 3301 VM_MAP_RANGE_CHECK(map, start, end); 3302 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 3303 entry = temp_entry; 3304 UVM_MAP_CLIP_START(map, entry, start, NULL); 3305 } else { 3306 entry = temp_entry->next; 3307 } 3308 3309 /* 3310 * XXXJRT: disallow holes? 3311 */ 3312 3313 while ((entry != &map->header) && (entry->start < end)) { 3314 UVM_MAP_CLIP_END(map, entry, end, NULL); 3315 3316 switch (new_advice) { 3317 case MADV_NORMAL: 3318 case MADV_RANDOM: 3319 case MADV_SEQUENTIAL: 3320 /* nothing special here */ 3321 break; 3322 3323 default: 3324 vm_map_unlock(map); 3325 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3326 return EINVAL; 3327 } 3328 entry->advice = new_advice; 3329 entry = entry->next; 3330 } 3331 3332 vm_map_unlock(map); 3333 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3334 return 0; 3335 } 3336 3337 /* 3338 * uvm_map_pageable: sets the pageability of a range in a map. 3339 * 3340 * => wires map entries. should not be used for transient page locking. 3341 * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). 3342 * => regions specified as not pageable require lock-down (wired) memory 3343 * and page tables. 3344 * => map must never be read-locked 3345 * => if islocked is true, map is already write-locked 3346 * => we always unlock the map, since we must downgrade to a read-lock 3347 * to call uvm_fault_wire() 3348 * => XXXCDC: check this and try and clean it up. 3349 */ 3350 3351 int 3352 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 3353 bool new_pageable, int lockflags) 3354 { 3355 struct vm_map_entry *entry, *start_entry, *failed_entry; 3356 int rv; 3357 #ifdef DIAGNOSTIC 3358 u_int timestamp_save; 3359 #endif 3360 UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist); 3361 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)", 3362 map, start, end, new_pageable); 3363 KASSERT(map->flags & VM_MAP_PAGEABLE); 3364 3365 if ((lockflags & UVM_LK_ENTER) == 0) 3366 vm_map_lock(map); 3367 VM_MAP_RANGE_CHECK(map, start, end); 3368 3369 /* 3370 * only one pageability change may take place at one time, since 3371 * uvm_fault_wire assumes it will be called only once for each 3372 * wiring/unwiring. therefore, we have to make sure we're actually 3373 * changing the pageability for the entire region. we do so before 3374 * making any changes. 3375 */ 3376 3377 if (uvm_map_lookup_entry(map, start, &start_entry) == false) { 3378 if ((lockflags & UVM_LK_EXIT) == 0) 3379 vm_map_unlock(map); 3380 3381 UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0); 3382 return EFAULT; 3383 } 3384 entry = start_entry; 3385 3386 /* 3387 * handle wiring and unwiring separately. 3388 */ 3389 3390 if (new_pageable) { /* unwire */ 3391 UVM_MAP_CLIP_START(map, entry, start, NULL); 3392 3393 /* 3394 * unwiring. first ensure that the range to be unwired is 3395 * really wired down and that there are no holes. 3396 */ 3397 3398 while ((entry != &map->header) && (entry->start < end)) { 3399 if (entry->wired_count == 0 || 3400 (entry->end < end && 3401 (entry->next == &map->header || 3402 entry->next->start > entry->end))) { 3403 if ((lockflags & UVM_LK_EXIT) == 0) 3404 vm_map_unlock(map); 3405 UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0); 3406 return EINVAL; 3407 } 3408 entry = entry->next; 3409 } 3410 3411 /* 3412 * POSIX 1003.1b - a single munlock call unlocks a region, 3413 * regardless of the number of mlock calls made on that 3414 * region. 3415 */ 3416 3417 entry = start_entry; 3418 while ((entry != &map->header) && (entry->start < end)) { 3419 UVM_MAP_CLIP_END(map, entry, end, NULL); 3420 if (VM_MAPENT_ISWIRED(entry)) 3421 uvm_map_entry_unwire(map, entry); 3422 entry = entry->next; 3423 } 3424 if ((lockflags & UVM_LK_EXIT) == 0) 3425 vm_map_unlock(map); 3426 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3427 return 0; 3428 } 3429 3430 /* 3431 * wire case: in two passes [XXXCDC: ugly block of code here] 3432 * 3433 * 1: holding the write lock, we create any anonymous maps that need 3434 * to be created. then we clip each map entry to the region to 3435 * be wired and increment its wiring count. 3436 * 3437 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 3438 * in the pages for any newly wired area (wired_count == 1). 3439 * 3440 * downgrading to a read lock for uvm_fault_wire avoids a possible 3441 * deadlock with another thread that may have faulted on one of 3442 * the pages to be wired (it would mark the page busy, blocking 3443 * us, then in turn block on the map lock that we hold). because 3444 * of problems in the recursive lock package, we cannot upgrade 3445 * to a write lock in vm_map_lookup. thus, any actions that 3446 * require the write lock must be done beforehand. because we 3447 * keep the read lock on the map, the copy-on-write status of the 3448 * entries we modify here cannot change. 3449 */ 3450 3451 while ((entry != &map->header) && (entry->start < end)) { 3452 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3453 3454 /* 3455 * perform actions of vm_map_lookup that need the 3456 * write lock on the map: create an anonymous map 3457 * for a copy-on-write region, or an anonymous map 3458 * for a zero-fill region. (XXXCDC: submap case 3459 * ok?) 3460 */ 3461 3462 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3463 if (UVM_ET_ISNEEDSCOPY(entry) && 3464 ((entry->max_protection & VM_PROT_WRITE) || 3465 (entry->object.uvm_obj == NULL))) { 3466 amap_copy(map, entry, 0, start, end); 3467 /* XXXCDC: wait OK? */ 3468 } 3469 } 3470 } 3471 UVM_MAP_CLIP_START(map, entry, start, NULL); 3472 UVM_MAP_CLIP_END(map, entry, end, NULL); 3473 entry->wired_count++; 3474 3475 /* 3476 * Check for holes 3477 */ 3478 3479 if (entry->protection == VM_PROT_NONE || 3480 (entry->end < end && 3481 (entry->next == &map->header || 3482 entry->next->start > entry->end))) { 3483 3484 /* 3485 * found one. amap creation actions do not need to 3486 * be undone, but the wired counts need to be restored. 3487 */ 3488 3489 while (entry != &map->header && entry->end > start) { 3490 entry->wired_count--; 3491 entry = entry->prev; 3492 } 3493 if ((lockflags & UVM_LK_EXIT) == 0) 3494 vm_map_unlock(map); 3495 UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); 3496 return EINVAL; 3497 } 3498 entry = entry->next; 3499 } 3500 3501 /* 3502 * Pass 2. 3503 */ 3504 3505 #ifdef DIAGNOSTIC 3506 timestamp_save = map->timestamp; 3507 #endif 3508 vm_map_busy(map); 3509 vm_map_unlock(map); 3510 3511 rv = 0; 3512 entry = start_entry; 3513 while (entry != &map->header && entry->start < end) { 3514 if (entry->wired_count == 1) { 3515 rv = uvm_fault_wire(map, entry->start, entry->end, 3516 entry->max_protection, 1); 3517 if (rv) { 3518 3519 /* 3520 * wiring failed. break out of the loop. 3521 * we'll clean up the map below, once we 3522 * have a write lock again. 3523 */ 3524 3525 break; 3526 } 3527 } 3528 entry = entry->next; 3529 } 3530 3531 if (rv) { /* failed? */ 3532 3533 /* 3534 * Get back to an exclusive (write) lock. 3535 */ 3536 3537 vm_map_lock(map); 3538 vm_map_unbusy(map); 3539 3540 #ifdef DIAGNOSTIC 3541 if (timestamp_save + 1 != map->timestamp) 3542 panic("uvm_map_pageable: stale map"); 3543 #endif 3544 3545 /* 3546 * first drop the wiring count on all the entries 3547 * which haven't actually been wired yet. 3548 */ 3549 3550 failed_entry = entry; 3551 while (entry != &map->header && entry->start < end) { 3552 entry->wired_count--; 3553 entry = entry->next; 3554 } 3555 3556 /* 3557 * now, unwire all the entries that were successfully 3558 * wired above. 3559 */ 3560 3561 entry = start_entry; 3562 while (entry != failed_entry) { 3563 entry->wired_count--; 3564 if (VM_MAPENT_ISWIRED(entry) == 0) 3565 uvm_map_entry_unwire(map, entry); 3566 entry = entry->next; 3567 } 3568 if ((lockflags & UVM_LK_EXIT) == 0) 3569 vm_map_unlock(map); 3570 UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0); 3571 return (rv); 3572 } 3573 3574 if ((lockflags & UVM_LK_EXIT) == 0) { 3575 vm_map_unbusy(map); 3576 } else { 3577 3578 /* 3579 * Get back to an exclusive (write) lock. 3580 */ 3581 3582 vm_map_lock(map); 3583 vm_map_unbusy(map); 3584 } 3585 3586 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3587 return 0; 3588 } 3589 3590 /* 3591 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 3592 * all mapped regions. 3593 * 3594 * => map must not be locked. 3595 * => if no flags are specified, all regions are unwired. 3596 * => XXXJRT: has some of the same problems as uvm_map_pageable() above. 3597 */ 3598 3599 int 3600 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 3601 { 3602 struct vm_map_entry *entry, *failed_entry; 3603 vsize_t size; 3604 int rv; 3605 #ifdef DIAGNOSTIC 3606 u_int timestamp_save; 3607 #endif 3608 UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist); 3609 UVMHIST_LOG(maphist,"(map=0x%x,flags=0x%x)", map, flags, 0, 0); 3610 3611 KASSERT(map->flags & VM_MAP_PAGEABLE); 3612 3613 vm_map_lock(map); 3614 3615 /* 3616 * handle wiring and unwiring separately. 3617 */ 3618 3619 if (flags == 0) { /* unwire */ 3620 3621 /* 3622 * POSIX 1003.1b -- munlockall unlocks all regions, 3623 * regardless of how many times mlockall has been called. 3624 */ 3625 3626 for (entry = map->header.next; entry != &map->header; 3627 entry = entry->next) { 3628 if (VM_MAPENT_ISWIRED(entry)) 3629 uvm_map_entry_unwire(map, entry); 3630 } 3631 map->flags &= ~VM_MAP_WIREFUTURE; 3632 vm_map_unlock(map); 3633 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3634 return 0; 3635 } 3636 3637 if (flags & MCL_FUTURE) { 3638 3639 /* 3640 * must wire all future mappings; remember this. 3641 */ 3642 3643 map->flags |= VM_MAP_WIREFUTURE; 3644 } 3645 3646 if ((flags & MCL_CURRENT) == 0) { 3647 3648 /* 3649 * no more work to do! 3650 */ 3651 3652 UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); 3653 vm_map_unlock(map); 3654 return 0; 3655 } 3656 3657 /* 3658 * wire case: in three passes [XXXCDC: ugly block of code here] 3659 * 3660 * 1: holding the write lock, count all pages mapped by non-wired 3661 * entries. if this would cause us to go over our limit, we fail. 3662 * 3663 * 2: still holding the write lock, we create any anonymous maps that 3664 * need to be created. then we increment its wiring count. 3665 * 3666 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault 3667 * in the pages for any newly wired area (wired_count == 1). 3668 * 3669 * downgrading to a read lock for uvm_fault_wire avoids a possible 3670 * deadlock with another thread that may have faulted on one of 3671 * the pages to be wired (it would mark the page busy, blocking 3672 * us, then in turn block on the map lock that we hold). because 3673 * of problems in the recursive lock package, we cannot upgrade 3674 * to a write lock in vm_map_lookup. thus, any actions that 3675 * require the write lock must be done beforehand. because we 3676 * keep the read lock on the map, the copy-on-write status of the 3677 * entries we modify here cannot change. 3678 */ 3679 3680 for (size = 0, entry = map->header.next; entry != &map->header; 3681 entry = entry->next) { 3682 if (entry->protection != VM_PROT_NONE && 3683 VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3684 size += entry->end - entry->start; 3685 } 3686 } 3687 3688 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 3689 vm_map_unlock(map); 3690 return ENOMEM; 3691 } 3692 3693 if (limit != 0 && 3694 (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { 3695 vm_map_unlock(map); 3696 return ENOMEM; 3697 } 3698 3699 /* 3700 * Pass 2. 3701 */ 3702 3703 for (entry = map->header.next; entry != &map->header; 3704 entry = entry->next) { 3705 if (entry->protection == VM_PROT_NONE) 3706 continue; 3707 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3708 3709 /* 3710 * perform actions of vm_map_lookup that need the 3711 * write lock on the map: create an anonymous map 3712 * for a copy-on-write region, or an anonymous map 3713 * for a zero-fill region. (XXXCDC: submap case 3714 * ok?) 3715 */ 3716 3717 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3718 if (UVM_ET_ISNEEDSCOPY(entry) && 3719 ((entry->max_protection & VM_PROT_WRITE) || 3720 (entry->object.uvm_obj == NULL))) { 3721 amap_copy(map, entry, 0, entry->start, 3722 entry->end); 3723 /* XXXCDC: wait OK? */ 3724 } 3725 } 3726 } 3727 entry->wired_count++; 3728 } 3729 3730 /* 3731 * Pass 3. 3732 */ 3733 3734 #ifdef DIAGNOSTIC 3735 timestamp_save = map->timestamp; 3736 #endif 3737 vm_map_busy(map); 3738 vm_map_unlock(map); 3739 3740 rv = 0; 3741 for (entry = map->header.next; entry != &map->header; 3742 entry = entry->next) { 3743 if (entry->wired_count == 1) { 3744 rv = uvm_fault_wire(map, entry->start, entry->end, 3745 entry->max_protection, 1); 3746 if (rv) { 3747 3748 /* 3749 * wiring failed. break out of the loop. 3750 * we'll clean up the map below, once we 3751 * have a write lock again. 3752 */ 3753 3754 break; 3755 } 3756 } 3757 } 3758 3759 if (rv) { 3760 3761 /* 3762 * Get back an exclusive (write) lock. 3763 */ 3764 3765 vm_map_lock(map); 3766 vm_map_unbusy(map); 3767 3768 #ifdef DIAGNOSTIC 3769 if (timestamp_save + 1 != map->timestamp) 3770 panic("uvm_map_pageable_all: stale map"); 3771 #endif 3772 3773 /* 3774 * first drop the wiring count on all the entries 3775 * which haven't actually been wired yet. 3776 * 3777 * Skip VM_PROT_NONE entries like we did above. 3778 */ 3779 3780 failed_entry = entry; 3781 for (/* nothing */; entry != &map->header; 3782 entry = entry->next) { 3783 if (entry->protection == VM_PROT_NONE) 3784 continue; 3785 entry->wired_count--; 3786 } 3787 3788 /* 3789 * now, unwire all the entries that were successfully 3790 * wired above. 3791 * 3792 * Skip VM_PROT_NONE entries like we did above. 3793 */ 3794 3795 for (entry = map->header.next; entry != failed_entry; 3796 entry = entry->next) { 3797 if (entry->protection == VM_PROT_NONE) 3798 continue; 3799 entry->wired_count--; 3800 if (VM_MAPENT_ISWIRED(entry)) 3801 uvm_map_entry_unwire(map, entry); 3802 } 3803 vm_map_unlock(map); 3804 UVMHIST_LOG(maphist,"<- done (RV=%d)", rv,0,0,0); 3805 return (rv); 3806 } 3807 3808 vm_map_unbusy(map); 3809 3810 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3811 return 0; 3812 } 3813 3814 /* 3815 * uvm_map_clean: clean out a map range 3816 * 3817 * => valid flags: 3818 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 3819 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 3820 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 3821 * if (flags & PGO_FREE): any cached pages are freed after clean 3822 * => returns an error if any part of the specified range isn't mapped 3823 * => never a need to flush amap layer since the anonymous memory has 3824 * no permanent home, but may deactivate pages there 3825 * => called from sys_msync() and sys_madvise() 3826 * => caller must not write-lock map (read OK). 3827 * => we may sleep while cleaning if SYNCIO [with map read-locked] 3828 */ 3829 3830 int 3831 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 3832 { 3833 struct vm_map_entry *current, *entry; 3834 struct uvm_object *uobj; 3835 struct vm_amap *amap; 3836 struct vm_anon *anon; 3837 struct vm_page *pg; 3838 vaddr_t offset; 3839 vsize_t size; 3840 voff_t uoff; 3841 int error, refs; 3842 UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist); 3843 3844 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,flags=0x%x)", 3845 map, start, end, flags); 3846 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 3847 (PGO_FREE|PGO_DEACTIVATE)); 3848 3849 vm_map_lock_read(map); 3850 VM_MAP_RANGE_CHECK(map, start, end); 3851 if (uvm_map_lookup_entry(map, start, &entry) == false) { 3852 vm_map_unlock_read(map); 3853 return EFAULT; 3854 } 3855 3856 /* 3857 * Make a first pass to check for holes and wiring problems. 3858 */ 3859 3860 for (current = entry; current->start < end; current = current->next) { 3861 if (UVM_ET_ISSUBMAP(current)) { 3862 vm_map_unlock_read(map); 3863 return EINVAL; 3864 } 3865 if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) { 3866 vm_map_unlock_read(map); 3867 return EBUSY; 3868 } 3869 if (end <= current->end) { 3870 break; 3871 } 3872 if (current->end != current->next->start) { 3873 vm_map_unlock_read(map); 3874 return EFAULT; 3875 } 3876 } 3877 3878 error = 0; 3879 for (current = entry; start < end; current = current->next) { 3880 amap = current->aref.ar_amap; /* top layer */ 3881 uobj = current->object.uvm_obj; /* bottom layer */ 3882 KASSERT(start >= current->start); 3883 3884 /* 3885 * No amap cleaning necessary if: 3886 * 3887 * (1) There's no amap. 3888 * 3889 * (2) We're not deactivating or freeing pages. 3890 */ 3891 3892 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 3893 goto flush_object; 3894 3895 amap_lock(amap); 3896 offset = start - current->start; 3897 size = MIN(end, current->end) - start; 3898 for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { 3899 anon = amap_lookup(¤t->aref, offset); 3900 if (anon == NULL) 3901 continue; 3902 3903 mutex_enter(&anon->an_lock); 3904 pg = anon->an_page; 3905 if (pg == NULL) { 3906 mutex_exit(&anon->an_lock); 3907 continue; 3908 } 3909 3910 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 3911 3912 /* 3913 * In these first 3 cases, we just deactivate the page. 3914 */ 3915 3916 case PGO_CLEANIT|PGO_FREE: 3917 case PGO_CLEANIT|PGO_DEACTIVATE: 3918 case PGO_DEACTIVATE: 3919 deactivate_it: 3920 /* 3921 * skip the page if it's loaned or wired, 3922 * since it shouldn't be on a paging queue 3923 * at all in these cases. 3924 */ 3925 3926 mutex_enter(&uvm_pageqlock); 3927 if (pg->loan_count != 0 || 3928 pg->wire_count != 0) { 3929 mutex_exit(&uvm_pageqlock); 3930 mutex_exit(&anon->an_lock); 3931 continue; 3932 } 3933 KASSERT(pg->uanon == anon); 3934 uvm_pagedeactivate(pg); 3935 mutex_exit(&uvm_pageqlock); 3936 mutex_exit(&anon->an_lock); 3937 continue; 3938 3939 case PGO_FREE: 3940 3941 /* 3942 * If there are multiple references to 3943 * the amap, just deactivate the page. 3944 */ 3945 3946 if (amap_refs(amap) > 1) 3947 goto deactivate_it; 3948 3949 /* skip the page if it's wired */ 3950 if (pg->wire_count != 0) { 3951 mutex_exit(&anon->an_lock); 3952 continue; 3953 } 3954 amap_unadd(¤t->aref, offset); 3955 refs = --anon->an_ref; 3956 mutex_exit(&anon->an_lock); 3957 if (refs == 0) 3958 uvm_anfree(anon); 3959 continue; 3960 } 3961 } 3962 amap_unlock(amap); 3963 3964 flush_object: 3965 /* 3966 * flush pages if we've got a valid backing object. 3967 * note that we must always clean object pages before 3968 * freeing them since otherwise we could reveal stale 3969 * data from files. 3970 */ 3971 3972 uoff = current->offset + (start - current->start); 3973 size = MIN(end, current->end) - start; 3974 if (uobj != NULL) { 3975 mutex_enter(&uobj->vmobjlock); 3976 if (uobj->pgops->pgo_put != NULL) 3977 error = (uobj->pgops->pgo_put)(uobj, uoff, 3978 uoff + size, flags | PGO_CLEANIT); 3979 else 3980 error = 0; 3981 } 3982 start += size; 3983 } 3984 vm_map_unlock_read(map); 3985 return (error); 3986 } 3987 3988 3989 /* 3990 * uvm_map_checkprot: check protection in map 3991 * 3992 * => must allow specified protection in a fully allocated region. 3993 * => map must be read or write locked by caller. 3994 */ 3995 3996 bool 3997 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3998 vm_prot_t protection) 3999 { 4000 struct vm_map_entry *entry; 4001 struct vm_map_entry *tmp_entry; 4002 4003 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { 4004 return (false); 4005 } 4006 entry = tmp_entry; 4007 while (start < end) { 4008 if (entry == &map->header) { 4009 return (false); 4010 } 4011 4012 /* 4013 * no holes allowed 4014 */ 4015 4016 if (start < entry->start) { 4017 return (false); 4018 } 4019 4020 /* 4021 * check protection associated with entry 4022 */ 4023 4024 if ((entry->protection & protection) != protection) { 4025 return (false); 4026 } 4027 start = entry->end; 4028 entry = entry->next; 4029 } 4030 return (true); 4031 } 4032 4033 /* 4034 * uvmspace_alloc: allocate a vmspace structure. 4035 * 4036 * - structure includes vm_map and pmap 4037 * - XXX: no locking on this structure 4038 * - refcnt set to 1, rest must be init'd by caller 4039 */ 4040 struct vmspace * 4041 uvmspace_alloc(vaddr_t vmin, vaddr_t vmax) 4042 { 4043 struct vmspace *vm; 4044 UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); 4045 4046 vm = pool_cache_get(&uvm_vmspace_cache, PR_WAITOK); 4047 uvmspace_init(vm, NULL, vmin, vmax); 4048 UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0); 4049 return (vm); 4050 } 4051 4052 /* 4053 * uvmspace_init: initialize a vmspace structure. 4054 * 4055 * - XXX: no locking on this structure 4056 * - refcnt set to 1, rest must be init'd by caller 4057 */ 4058 void 4059 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax) 4060 { 4061 UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); 4062 4063 memset(vm, 0, sizeof(*vm)); 4064 uvm_map_setup(&vm->vm_map, vmin, vmax, VM_MAP_PAGEABLE 4065 #ifdef __USING_TOPDOWN_VM 4066 | VM_MAP_TOPDOWN 4067 #endif 4068 ); 4069 if (pmap) 4070 pmap_reference(pmap); 4071 else 4072 pmap = pmap_create(); 4073 vm->vm_map.pmap = pmap; 4074 vm->vm_refcnt = 1; 4075 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 4076 } 4077 4078 /* 4079 * uvmspace_share: share a vmspace between two processes 4080 * 4081 * - used for vfork, threads(?) 4082 */ 4083 4084 void 4085 uvmspace_share(struct proc *p1, struct proc *p2) 4086 { 4087 4088 uvmspace_addref(p1->p_vmspace); 4089 p2->p_vmspace = p1->p_vmspace; 4090 } 4091 4092 /* 4093 * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace 4094 * 4095 * - XXX: no locking on vmspace 4096 */ 4097 4098 void 4099 uvmspace_unshare(struct lwp *l) 4100 { 4101 struct proc *p = l->l_proc; 4102 struct vmspace *nvm, *ovm = p->p_vmspace; 4103 4104 if (ovm->vm_refcnt == 1) 4105 /* nothing to do: vmspace isn't shared in the first place */ 4106 return; 4107 4108 /* make a new vmspace, still holding old one */ 4109 nvm = uvmspace_fork(ovm); 4110 4111 kpreempt_disable(); 4112 pmap_deactivate(l); /* unbind old vmspace */ 4113 p->p_vmspace = nvm; 4114 pmap_activate(l); /* switch to new vmspace */ 4115 kpreempt_enable(); 4116 4117 uvmspace_free(ovm); /* drop reference to old vmspace */ 4118 } 4119 4120 /* 4121 * uvmspace_exec: the process wants to exec a new program 4122 */ 4123 4124 void 4125 uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end) 4126 { 4127 struct proc *p = l->l_proc; 4128 struct vmspace *nvm, *ovm = p->p_vmspace; 4129 struct vm_map *map = &ovm->vm_map; 4130 4131 #ifdef __sparc__ 4132 /* XXX cgd 960926: the sparc #ifdef should be a MD hook */ 4133 kill_user_windows(l); /* before stack addresses go away */ 4134 #endif 4135 4136 /* 4137 * see if more than one process is using this vmspace... 4138 */ 4139 4140 if (ovm->vm_refcnt == 1) { 4141 4142 /* 4143 * if p is the only process using its vmspace then we can safely 4144 * recycle that vmspace for the program that is being exec'd. 4145 */ 4146 4147 #ifdef SYSVSHM 4148 /* 4149 * SYSV SHM semantics require us to kill all segments on an exec 4150 */ 4151 4152 if (ovm->vm_shm) 4153 shmexit(ovm); 4154 #endif 4155 4156 /* 4157 * POSIX 1003.1b -- "lock future mappings" is revoked 4158 * when a process execs another program image. 4159 */ 4160 4161 map->flags &= ~VM_MAP_WIREFUTURE; 4162 4163 /* 4164 * now unmap the old program 4165 */ 4166 4167 pmap_remove_all(map->pmap); 4168 uvm_unmap(map, vm_map_min(map), vm_map_max(map)); 4169 KASSERT(map->header.prev == &map->header); 4170 KASSERT(map->nentries == 0); 4171 4172 /* 4173 * resize the map 4174 */ 4175 4176 vm_map_setmin(map, start); 4177 vm_map_setmax(map, end); 4178 } else { 4179 4180 /* 4181 * p's vmspace is being shared, so we can't reuse it for p since 4182 * it is still being used for others. allocate a new vmspace 4183 * for p 4184 */ 4185 4186 nvm = uvmspace_alloc(start, end); 4187 4188 /* 4189 * install new vmspace and drop our ref to the old one. 4190 */ 4191 4192 kpreempt_disable(); 4193 pmap_deactivate(l); 4194 p->p_vmspace = nvm; 4195 pmap_activate(l); 4196 kpreempt_enable(); 4197 4198 uvmspace_free(ovm); 4199 } 4200 } 4201 4202 /* 4203 * uvmspace_addref: add a referece to a vmspace. 4204 */ 4205 4206 void 4207 uvmspace_addref(struct vmspace *vm) 4208 { 4209 struct vm_map *map = &vm->vm_map; 4210 4211 KASSERT((map->flags & VM_MAP_DYING) == 0); 4212 4213 mutex_enter(&map->misc_lock); 4214 KASSERT(vm->vm_refcnt > 0); 4215 vm->vm_refcnt++; 4216 mutex_exit(&map->misc_lock); 4217 } 4218 4219 /* 4220 * uvmspace_free: free a vmspace data structure 4221 */ 4222 4223 void 4224 uvmspace_free(struct vmspace *vm) 4225 { 4226 struct vm_map_entry *dead_entries; 4227 struct vm_map *map = &vm->vm_map; 4228 int n; 4229 4230 UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); 4231 4232 UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0); 4233 mutex_enter(&map->misc_lock); 4234 n = --vm->vm_refcnt; 4235 mutex_exit(&map->misc_lock); 4236 if (n > 0) 4237 return; 4238 4239 /* 4240 * at this point, there should be no other references to the map. 4241 * delete all of the mappings, then destroy the pmap. 4242 */ 4243 4244 map->flags |= VM_MAP_DYING; 4245 pmap_remove_all(map->pmap); 4246 #ifdef SYSVSHM 4247 /* Get rid of any SYSV shared memory segments. */ 4248 if (vm->vm_shm != NULL) 4249 shmexit(vm); 4250 #endif 4251 if (map->nentries) { 4252 uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map), 4253 &dead_entries, NULL, 0); 4254 if (dead_entries != NULL) 4255 uvm_unmap_detach(dead_entries, 0); 4256 } 4257 KASSERT(map->nentries == 0); 4258 KASSERT(map->size == 0); 4259 mutex_destroy(&map->misc_lock); 4260 mutex_destroy(&map->mutex); 4261 rw_destroy(&map->lock); 4262 cv_destroy(&map->cv); 4263 pmap_destroy(map->pmap); 4264 pool_cache_put(&uvm_vmspace_cache, vm); 4265 } 4266 4267 /* 4268 * F O R K - m a i n e n t r y p o i n t 4269 */ 4270 /* 4271 * uvmspace_fork: fork a process' main map 4272 * 4273 * => create a new vmspace for child process from parent. 4274 * => parent's map must not be locked. 4275 */ 4276 4277 struct vmspace * 4278 uvmspace_fork(struct vmspace *vm1) 4279 { 4280 struct vmspace *vm2; 4281 struct vm_map *old_map = &vm1->vm_map; 4282 struct vm_map *new_map; 4283 struct vm_map_entry *old_entry; 4284 struct vm_map_entry *new_entry; 4285 UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); 4286 4287 vm_map_lock(old_map); 4288 4289 vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map)); 4290 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 4291 (char *) (vm1 + 1) - (char *) &vm1->vm_startcopy); 4292 new_map = &vm2->vm_map; /* XXX */ 4293 4294 old_entry = old_map->header.next; 4295 new_map->size = old_map->size; 4296 4297 /* 4298 * go entry-by-entry 4299 */ 4300 4301 while (old_entry != &old_map->header) { 4302 4303 /* 4304 * first, some sanity checks on the old entry 4305 */ 4306 4307 KASSERT(!UVM_ET_ISSUBMAP(old_entry)); 4308 KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) || 4309 !UVM_ET_ISNEEDSCOPY(old_entry)); 4310 4311 switch (old_entry->inheritance) { 4312 case MAP_INHERIT_NONE: 4313 4314 /* 4315 * drop the mapping, modify size 4316 */ 4317 new_map->size -= old_entry->end - old_entry->start; 4318 break; 4319 4320 case MAP_INHERIT_SHARE: 4321 4322 /* 4323 * share the mapping: this means we want the old and 4324 * new entries to share amaps and backing objects. 4325 */ 4326 /* 4327 * if the old_entry needs a new amap (due to prev fork) 4328 * then we need to allocate it now so that we have 4329 * something we own to share with the new_entry. [in 4330 * other words, we need to clear needs_copy] 4331 */ 4332 4333 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 4334 /* get our own amap, clears needs_copy */ 4335 amap_copy(old_map, old_entry, AMAP_COPY_NOCHUNK, 4336 0, 0); 4337 /* XXXCDC: WAITOK??? */ 4338 } 4339 4340 new_entry = uvm_mapent_alloc(new_map, 0); 4341 /* old_entry -> new_entry */ 4342 uvm_mapent_copy(old_entry, new_entry); 4343 4344 /* new pmap has nothing wired in it */ 4345 new_entry->wired_count = 0; 4346 4347 /* 4348 * gain reference to object backing the map (can't 4349 * be a submap, already checked this case). 4350 */ 4351 4352 if (new_entry->aref.ar_amap) 4353 uvm_map_reference_amap(new_entry, AMAP_SHARED); 4354 4355 if (new_entry->object.uvm_obj && 4356 new_entry->object.uvm_obj->pgops->pgo_reference) 4357 new_entry->object.uvm_obj-> 4358 pgops->pgo_reference( 4359 new_entry->object.uvm_obj); 4360 4361 /* insert entry at end of new_map's entry list */ 4362 uvm_map_entry_link(new_map, new_map->header.prev, 4363 new_entry); 4364 4365 break; 4366 4367 case MAP_INHERIT_COPY: 4368 4369 /* 4370 * copy-on-write the mapping (using mmap's 4371 * MAP_PRIVATE semantics) 4372 * 4373 * allocate new_entry, adjust reference counts. 4374 * (note that new references are read-only). 4375 */ 4376 4377 new_entry = uvm_mapent_alloc(new_map, 0); 4378 /* old_entry -> new_entry */ 4379 uvm_mapent_copy(old_entry, new_entry); 4380 4381 if (new_entry->aref.ar_amap) 4382 uvm_map_reference_amap(new_entry, 0); 4383 4384 if (new_entry->object.uvm_obj && 4385 new_entry->object.uvm_obj->pgops->pgo_reference) 4386 new_entry->object.uvm_obj->pgops->pgo_reference 4387 (new_entry->object.uvm_obj); 4388 4389 /* new pmap has nothing wired in it */ 4390 new_entry->wired_count = 0; 4391 4392 new_entry->etype |= 4393 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 4394 uvm_map_entry_link(new_map, new_map->header.prev, 4395 new_entry); 4396 4397 /* 4398 * the new entry will need an amap. it will either 4399 * need to be copied from the old entry or created 4400 * from scratch (if the old entry does not have an 4401 * amap). can we defer this process until later 4402 * (by setting "needs_copy") or do we need to copy 4403 * the amap now? 4404 * 4405 * we must copy the amap now if any of the following 4406 * conditions hold: 4407 * 1. the old entry has an amap and that amap is 4408 * being shared. this means that the old (parent) 4409 * process is sharing the amap with another 4410 * process. if we do not clear needs_copy here 4411 * we will end up in a situation where both the 4412 * parent and child process are refering to the 4413 * same amap with "needs_copy" set. if the 4414 * parent write-faults, the fault routine will 4415 * clear "needs_copy" in the parent by allocating 4416 * a new amap. this is wrong because the 4417 * parent is supposed to be sharing the old amap 4418 * and the new amap will break that. 4419 * 4420 * 2. if the old entry has an amap and a non-zero 4421 * wire count then we are going to have to call 4422 * amap_cow_now to avoid page faults in the 4423 * parent process. since amap_cow_now requires 4424 * "needs_copy" to be clear we might as well 4425 * clear it here as well. 4426 * 4427 */ 4428 4429 if (old_entry->aref.ar_amap != NULL) { 4430 if ((amap_flags(old_entry->aref.ar_amap) & 4431 AMAP_SHARED) != 0 || 4432 VM_MAPENT_ISWIRED(old_entry)) { 4433 4434 amap_copy(new_map, new_entry, 4435 AMAP_COPY_NOCHUNK, 0, 0); 4436 /* XXXCDC: M_WAITOK ... ok? */ 4437 } 4438 } 4439 4440 /* 4441 * if the parent's entry is wired down, then the 4442 * parent process does not want page faults on 4443 * access to that memory. this means that we 4444 * cannot do copy-on-write because we can't write 4445 * protect the old entry. in this case we 4446 * resolve all copy-on-write faults now, using 4447 * amap_cow_now. note that we have already 4448 * allocated any needed amap (above). 4449 */ 4450 4451 if (VM_MAPENT_ISWIRED(old_entry)) { 4452 4453 /* 4454 * resolve all copy-on-write faults now 4455 * (note that there is nothing to do if 4456 * the old mapping does not have an amap). 4457 */ 4458 if (old_entry->aref.ar_amap) 4459 amap_cow_now(new_map, new_entry); 4460 4461 } else { 4462 4463 /* 4464 * setup mappings to trigger copy-on-write faults 4465 * we must write-protect the parent if it has 4466 * an amap and it is not already "needs_copy"... 4467 * if it is already "needs_copy" then the parent 4468 * has already been write-protected by a previous 4469 * fork operation. 4470 */ 4471 4472 if (old_entry->aref.ar_amap && 4473 !UVM_ET_ISNEEDSCOPY(old_entry)) { 4474 if (old_entry->max_protection & VM_PROT_WRITE) { 4475 pmap_protect(old_map->pmap, 4476 old_entry->start, 4477 old_entry->end, 4478 old_entry->protection & 4479 ~VM_PROT_WRITE); 4480 } 4481 old_entry->etype |= UVM_ET_NEEDSCOPY; 4482 } 4483 } 4484 break; 4485 } /* end of switch statement */ 4486 old_entry = old_entry->next; 4487 } 4488 4489 pmap_update(old_map->pmap); 4490 vm_map_unlock(old_map); 4491 4492 #ifdef SYSVSHM 4493 if (vm1->vm_shm) 4494 shmfork(vm1, vm2); 4495 #endif 4496 4497 #ifdef PMAP_FORK 4498 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); 4499 #endif 4500 4501 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 4502 return (vm2); 4503 } 4504 4505 4506 /* 4507 * in-kernel map entry allocation. 4508 */ 4509 4510 struct uvm_kmapent_hdr { 4511 LIST_ENTRY(uvm_kmapent_hdr) ukh_listq; 4512 int ukh_nused; 4513 struct vm_map_entry *ukh_freelist; 4514 struct vm_map *ukh_map; 4515 struct vm_map_entry ukh_entries[0]; 4516 }; 4517 4518 #define UVM_KMAPENT_CHUNK \ 4519 ((PAGE_SIZE - sizeof(struct uvm_kmapent_hdr)) \ 4520 / sizeof(struct vm_map_entry)) 4521 4522 #define UVM_KHDR_FIND(entry) \ 4523 ((struct uvm_kmapent_hdr *)(((vaddr_t)entry) & ~PAGE_MASK)) 4524 4525 4526 #ifdef DIAGNOSTIC 4527 static struct vm_map * 4528 uvm_kmapent_map(struct vm_map_entry *entry) 4529 { 4530 const struct uvm_kmapent_hdr *ukh; 4531 4532 ukh = UVM_KHDR_FIND(entry); 4533 return ukh->ukh_map; 4534 } 4535 #endif 4536 4537 static inline struct vm_map_entry * 4538 uvm_kmapent_get(struct uvm_kmapent_hdr *ukh) 4539 { 4540 struct vm_map_entry *entry; 4541 4542 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK); 4543 KASSERT(ukh->ukh_nused >= 0); 4544 4545 entry = ukh->ukh_freelist; 4546 if (entry) { 4547 KASSERT((entry->flags & (UVM_MAP_KERNEL | UVM_MAP_KMAPENT)) 4548 == UVM_MAP_KERNEL); 4549 ukh->ukh_freelist = entry->next; 4550 ukh->ukh_nused++; 4551 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK); 4552 } else { 4553 KASSERT(ukh->ukh_nused == UVM_KMAPENT_CHUNK); 4554 } 4555 4556 return entry; 4557 } 4558 4559 static inline void 4560 uvm_kmapent_put(struct uvm_kmapent_hdr *ukh, struct vm_map_entry *entry) 4561 { 4562 4563 KASSERT((entry->flags & (UVM_MAP_KERNEL | UVM_MAP_KMAPENT)) 4564 == UVM_MAP_KERNEL); 4565 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK); 4566 KASSERT(ukh->ukh_nused > 0); 4567 KASSERT(ukh->ukh_freelist != NULL || 4568 ukh->ukh_nused == UVM_KMAPENT_CHUNK); 4569 KASSERT(ukh->ukh_freelist == NULL || 4570 ukh->ukh_nused < UVM_KMAPENT_CHUNK); 4571 4572 ukh->ukh_nused--; 4573 entry->next = ukh->ukh_freelist; 4574 ukh->ukh_freelist = entry; 4575 } 4576 4577 /* 4578 * uvm_kmapent_alloc: allocate a map entry for in-kernel map 4579 */ 4580 4581 static struct vm_map_entry * 4582 uvm_kmapent_alloc(struct vm_map *map, int flags) 4583 { 4584 struct vm_page *pg; 4585 struct uvm_map_args args; 4586 struct uvm_kmapent_hdr *ukh; 4587 struct vm_map_entry *entry; 4588 uvm_flag_t mapflags = UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, 4589 UVM_INH_NONE, UVM_ADV_RANDOM, flags | UVM_FLAG_NOMERGE); 4590 vaddr_t va; 4591 int error; 4592 int i; 4593 4594 KDASSERT(UVM_KMAPENT_CHUNK > 2); 4595 KDASSERT(kernel_map != NULL); 4596 KASSERT(vm_map_pmap(map) == pmap_kernel()); 4597 4598 UVMMAP_EVCNT_INCR(uke_alloc); 4599 entry = NULL; 4600 again: 4601 /* 4602 * try to grab an entry from freelist. 4603 */ 4604 mutex_spin_enter(&uvm_kentry_lock); 4605 ukh = LIST_FIRST(&vm_map_to_kernel(map)->vmk_kentry_free); 4606 if (ukh) { 4607 entry = uvm_kmapent_get(ukh); 4608 if (ukh->ukh_nused == UVM_KMAPENT_CHUNK) 4609 LIST_REMOVE(ukh, ukh_listq); 4610 } 4611 mutex_spin_exit(&uvm_kentry_lock); 4612 4613 if (entry) 4614 return entry; 4615 4616 /* 4617 * there's no free entry for this vm_map. 4618 * now we need to allocate some vm_map_entry. 4619 * for simplicity, always allocate one page chunk of them at once. 4620 */ 4621 4622 pg = uvm_pagealloc(NULL, 0, NULL, 4623 (flags & UVM_KMF_NOWAIT) != 0 ? UVM_PGA_USERESERVE : 0); 4624 if (__predict_false(pg == NULL)) { 4625 if (flags & UVM_FLAG_NOWAIT) 4626 return NULL; 4627 uvm_wait("kme_alloc"); 4628 goto again; 4629 } 4630 4631 error = uvm_map_prepare(map, 0, PAGE_SIZE, NULL, UVM_UNKNOWN_OFFSET, 4632 0, mapflags, &args); 4633 if (error) { 4634 uvm_pagefree(pg); 4635 return NULL; 4636 } 4637 4638 va = args.uma_start; 4639 4640 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), 4641 VM_PROT_READ|VM_PROT_WRITE|PMAP_KMPAGE); 4642 pmap_update(vm_map_pmap(map)); 4643 4644 ukh = (void *)va; 4645 4646 /* 4647 * use the first entry for ukh itsself. 4648 */ 4649 4650 entry = &ukh->ukh_entries[0]; 4651 entry->flags = UVM_MAP_KERNEL | UVM_MAP_KMAPENT; 4652 error = uvm_map_enter(map, &args, entry); 4653 KASSERT(error == 0); 4654 4655 ukh->ukh_nused = UVM_KMAPENT_CHUNK; 4656 ukh->ukh_map = map; 4657 ukh->ukh_freelist = NULL; 4658 for (i = UVM_KMAPENT_CHUNK - 1; i >= 2; i--) { 4659 struct vm_map_entry *xentry = &ukh->ukh_entries[i]; 4660 4661 xentry->flags = UVM_MAP_KERNEL; 4662 uvm_kmapent_put(ukh, xentry); 4663 } 4664 KASSERT(ukh->ukh_nused == 2); 4665 4666 mutex_spin_enter(&uvm_kentry_lock); 4667 LIST_INSERT_HEAD(&vm_map_to_kernel(map)->vmk_kentry_free, 4668 ukh, ukh_listq); 4669 mutex_spin_exit(&uvm_kentry_lock); 4670 4671 /* 4672 * return second entry. 4673 */ 4674 4675 entry = &ukh->ukh_entries[1]; 4676 entry->flags = UVM_MAP_KERNEL; 4677 UVMMAP_EVCNT_INCR(ukh_alloc); 4678 return entry; 4679 } 4680 4681 /* 4682 * uvm_mapent_free: free map entry for in-kernel map 4683 */ 4684 4685 static void 4686 uvm_kmapent_free(struct vm_map_entry *entry) 4687 { 4688 struct uvm_kmapent_hdr *ukh; 4689 struct vm_page *pg; 4690 struct vm_map *map; 4691 struct pmap *pmap; 4692 vaddr_t va; 4693 paddr_t pa; 4694 struct vm_map_entry *deadentry; 4695 4696 UVMMAP_EVCNT_INCR(uke_free); 4697 ukh = UVM_KHDR_FIND(entry); 4698 map = ukh->ukh_map; 4699 4700 mutex_spin_enter(&uvm_kentry_lock); 4701 uvm_kmapent_put(ukh, entry); 4702 if (ukh->ukh_nused > 1) { 4703 if (ukh->ukh_nused == UVM_KMAPENT_CHUNK - 1) 4704 LIST_INSERT_HEAD( 4705 &vm_map_to_kernel(map)->vmk_kentry_free, 4706 ukh, ukh_listq); 4707 mutex_spin_exit(&uvm_kentry_lock); 4708 return; 4709 } 4710 4711 /* 4712 * now we can free this ukh. 4713 * 4714 * however, keep an empty ukh to avoid ping-pong. 4715 */ 4716 4717 if (LIST_FIRST(&vm_map_to_kernel(map)->vmk_kentry_free) == ukh && 4718 LIST_NEXT(ukh, ukh_listq) == NULL) { 4719 mutex_spin_exit(&uvm_kentry_lock); 4720 return; 4721 } 4722 LIST_REMOVE(ukh, ukh_listq); 4723 mutex_spin_exit(&uvm_kentry_lock); 4724 4725 KASSERT(ukh->ukh_nused == 1); 4726 4727 /* 4728 * remove map entry for ukh itsself. 4729 */ 4730 4731 va = (vaddr_t)ukh; 4732 KASSERT((va & PAGE_MASK) == 0); 4733 vm_map_lock(map); 4734 uvm_unmap_remove(map, va, va + PAGE_SIZE, &deadentry, NULL, 0); 4735 KASSERT(deadentry->flags & UVM_MAP_KERNEL); 4736 KASSERT(deadentry->flags & UVM_MAP_KMAPENT); 4737 KASSERT(deadentry->next == NULL); 4738 KASSERT(deadentry == &ukh->ukh_entries[0]); 4739 4740 /* 4741 * unmap the page from pmap and free it. 4742 */ 4743 4744 pmap = vm_map_pmap(map); 4745 KASSERT(pmap == pmap_kernel()); 4746 if (!pmap_extract(pmap, va, &pa)) 4747 panic("%s: no mapping", __func__); 4748 pmap_kremove(va, PAGE_SIZE); 4749 pmap_update(vm_map_pmap(map)); 4750 vm_map_unlock(map); 4751 pg = PHYS_TO_VM_PAGE(pa); 4752 uvm_pagefree(pg); 4753 UVMMAP_EVCNT_INCR(ukh_free); 4754 } 4755 4756 static vsize_t 4757 uvm_kmapent_overhead(vsize_t size) 4758 { 4759 4760 /* 4761 * - the max number of unmerged entries is howmany(size, PAGE_SIZE) 4762 * as the min allocation unit is PAGE_SIZE. 4763 * - UVM_KMAPENT_CHUNK "kmapent"s are allocated from a page. 4764 * one of them are used to map the page itself. 4765 */ 4766 4767 return howmany(howmany(size, PAGE_SIZE), (UVM_KMAPENT_CHUNK - 1)) * 4768 PAGE_SIZE; 4769 } 4770 4771 /* 4772 * map entry reservation 4773 */ 4774 4775 /* 4776 * uvm_mapent_reserve: reserve map entries for clipping before locking map. 4777 * 4778 * => needed when unmapping entries allocated without UVM_FLAG_QUANTUM. 4779 * => caller shouldn't hold map locked. 4780 */ 4781 int 4782 uvm_mapent_reserve(struct vm_map *map, struct uvm_mapent_reservation *umr, 4783 int nentries, int flags) 4784 { 4785 4786 umr->umr_nentries = 0; 4787 4788 if ((flags & UVM_FLAG_QUANTUM) != 0) 4789 return 0; 4790 4791 if (!VM_MAP_USE_KMAPENT(map)) 4792 return 0; 4793 4794 while (nentries--) { 4795 struct vm_map_entry *ent; 4796 ent = uvm_kmapent_alloc(map, flags); 4797 if (!ent) { 4798 uvm_mapent_unreserve(map, umr); 4799 return ENOMEM; 4800 } 4801 UMR_PUTENTRY(umr, ent); 4802 } 4803 4804 return 0; 4805 } 4806 4807 /* 4808 * uvm_mapent_unreserve: 4809 * 4810 * => caller shouldn't hold map locked. 4811 * => never fail or sleep. 4812 */ 4813 void 4814 uvm_mapent_unreserve(struct vm_map *map, struct uvm_mapent_reservation *umr) 4815 { 4816 4817 while (!UMR_EMPTY(umr)) 4818 uvm_kmapent_free(UMR_GETENTRY(umr)); 4819 } 4820 4821 /* 4822 * uvm_mapent_trymerge: try to merge an entry with its neighbors. 4823 * 4824 * => called with map locked. 4825 * => return non zero if successfully merged. 4826 */ 4827 4828 int 4829 uvm_mapent_trymerge(struct vm_map *map, struct vm_map_entry *entry, int flags) 4830 { 4831 struct uvm_object *uobj; 4832 struct vm_map_entry *next; 4833 struct vm_map_entry *prev; 4834 vsize_t size; 4835 int merged = 0; 4836 bool copying; 4837 int newetype; 4838 4839 if (VM_MAP_USE_KMAPENT(map)) { 4840 return 0; 4841 } 4842 if (entry->aref.ar_amap != NULL) { 4843 return 0; 4844 } 4845 if ((entry->flags & UVM_MAP_NOMERGE) != 0) { 4846 return 0; 4847 } 4848 4849 uobj = entry->object.uvm_obj; 4850 size = entry->end - entry->start; 4851 copying = (flags & UVM_MERGE_COPYING) != 0; 4852 newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype; 4853 4854 next = entry->next; 4855 if (next != &map->header && 4856 next->start == entry->end && 4857 ((copying && next->aref.ar_amap != NULL && 4858 amap_refs(next->aref.ar_amap) == 1) || 4859 (!copying && next->aref.ar_amap == NULL)) && 4860 UVM_ET_ISCOMPATIBLE(next, newetype, 4861 uobj, entry->flags, entry->protection, 4862 entry->max_protection, entry->inheritance, entry->advice, 4863 entry->wired_count) && 4864 (uobj == NULL || entry->offset + size == next->offset)) { 4865 int error; 4866 4867 if (copying) { 4868 error = amap_extend(next, size, 4869 AMAP_EXTEND_NOWAIT|AMAP_EXTEND_BACKWARDS); 4870 } else { 4871 error = 0; 4872 } 4873 if (error == 0) { 4874 if (uobj) { 4875 if (uobj->pgops->pgo_detach) { 4876 uobj->pgops->pgo_detach(uobj); 4877 } 4878 } 4879 4880 entry->end = next->end; 4881 clear_hints(map, next); 4882 uvm_map_entry_unlink(map, next); 4883 if (copying) { 4884 entry->aref = next->aref; 4885 entry->etype &= ~UVM_ET_NEEDSCOPY; 4886 } 4887 uvm_map_check(map, "trymerge forwardmerge"); 4888 uvm_mapent_free_merged(map, next); 4889 merged++; 4890 } 4891 } 4892 4893 prev = entry->prev; 4894 if (prev != &map->header && 4895 prev->end == entry->start && 4896 ((copying && !merged && prev->aref.ar_amap != NULL && 4897 amap_refs(prev->aref.ar_amap) == 1) || 4898 (!copying && prev->aref.ar_amap == NULL)) && 4899 UVM_ET_ISCOMPATIBLE(prev, newetype, 4900 uobj, entry->flags, entry->protection, 4901 entry->max_protection, entry->inheritance, entry->advice, 4902 entry->wired_count) && 4903 (uobj == NULL || 4904 prev->offset + prev->end - prev->start == entry->offset)) { 4905 int error; 4906 4907 if (copying) { 4908 error = amap_extend(prev, size, 4909 AMAP_EXTEND_NOWAIT|AMAP_EXTEND_FORWARDS); 4910 } else { 4911 error = 0; 4912 } 4913 if (error == 0) { 4914 if (uobj) { 4915 if (uobj->pgops->pgo_detach) { 4916 uobj->pgops->pgo_detach(uobj); 4917 } 4918 entry->offset = prev->offset; 4919 } 4920 4921 entry->start = prev->start; 4922 clear_hints(map, prev); 4923 uvm_map_entry_unlink(map, prev); 4924 if (copying) { 4925 entry->aref = prev->aref; 4926 entry->etype &= ~UVM_ET_NEEDSCOPY; 4927 } 4928 uvm_map_check(map, "trymerge backmerge"); 4929 uvm_mapent_free_merged(map, prev); 4930 merged++; 4931 } 4932 } 4933 4934 return merged; 4935 } 4936 4937 #if defined(DDB) 4938 4939 /* 4940 * DDB hooks 4941 */ 4942 4943 /* 4944 * uvm_map_printit: actually prints the map 4945 */ 4946 4947 void 4948 uvm_map_printit(struct vm_map *map, bool full, 4949 void (*pr)(const char *, ...)) 4950 { 4951 struct vm_map_entry *entry; 4952 4953 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, vm_map_min(map), 4954 vm_map_max(map)); 4955 (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n", 4956 map->nentries, map->size, map->ref_count, map->timestamp, 4957 map->flags); 4958 (*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap, 4959 pmap_resident_count(map->pmap), pmap_wired_count(map->pmap)); 4960 if (!full) 4961 return; 4962 for (entry = map->header.next; entry != &map->header; 4963 entry = entry->next) { 4964 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 4965 entry, entry->start, entry->end, entry->object.uvm_obj, 4966 (long long)entry->offset, entry->aref.ar_amap, 4967 entry->aref.ar_pageoff); 4968 (*pr)( 4969 "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 4970 "wc=%d, adv=%d\n", 4971 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 4972 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 4973 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 4974 entry->protection, entry->max_protection, 4975 entry->inheritance, entry->wired_count, entry->advice); 4976 } 4977 } 4978 4979 /* 4980 * uvm_object_printit: actually prints the object 4981 */ 4982 4983 void 4984 uvm_object_printit(struct uvm_object *uobj, bool full, 4985 void (*pr)(const char *, ...)) 4986 { 4987 struct vm_page *pg; 4988 int cnt = 0; 4989 4990 (*pr)("OBJECT %p: locked=%d, pgops=%p, npages=%d, ", 4991 uobj, mutex_owned(&uobj->vmobjlock), uobj->pgops, uobj->uo_npages); 4992 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 4993 (*pr)("refs=<SYSTEM>\n"); 4994 else 4995 (*pr)("refs=%d\n", uobj->uo_refs); 4996 4997 if (!full) { 4998 return; 4999 } 5000 (*pr)(" PAGES <pg,offset>:\n "); 5001 TAILQ_FOREACH(pg, &uobj->memq, listq.queue) { 5002 cnt++; 5003 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 5004 if ((cnt % 3) == 0) { 5005 (*pr)("\n "); 5006 } 5007 } 5008 if ((cnt % 3) != 0) { 5009 (*pr)("\n"); 5010 } 5011 } 5012 5013 /* 5014 * uvm_page_printit: actually print the page 5015 */ 5016 5017 static const char page_flagbits[] = UVM_PGFLAGBITS; 5018 static const char page_pqflagbits[] = UVM_PQFLAGBITS; 5019 5020 void 5021 uvm_page_printit(struct vm_page *pg, bool full, 5022 void (*pr)(const char *, ...)) 5023 { 5024 struct vm_page *tpg; 5025 struct uvm_object *uobj; 5026 struct pgflist *pgl; 5027 char pgbuf[128]; 5028 char pqbuf[128]; 5029 5030 (*pr)("PAGE %p:\n", pg); 5031 snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); 5032 snprintb(pqbuf, sizeof(pqbuf), page_pqflagbits, pg->pqflags); 5033 (*pr)(" flags=%s, pqflags=%s, wire_count=%d, pa=0x%lx\n", 5034 pgbuf, pqbuf, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg)); 5035 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", 5036 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); 5037 #if defined(UVM_PAGE_TRKOWN) 5038 if (pg->flags & PG_BUSY) 5039 (*pr)(" owning process = %d, tag=%s\n", 5040 pg->owner, pg->owner_tag); 5041 else 5042 (*pr)(" page not busy, no owner\n"); 5043 #else 5044 (*pr)(" [page ownership tracking disabled]\n"); 5045 #endif 5046 5047 if (!full) 5048 return; 5049 5050 /* cross-verify object/anon */ 5051 if ((pg->pqflags & PQ_FREE) == 0) { 5052 if (pg->pqflags & PQ_ANON) { 5053 if (pg->uanon == NULL || pg->uanon->an_page != pg) 5054 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 5055 (pg->uanon) ? pg->uanon->an_page : NULL); 5056 else 5057 (*pr)(" anon backpointer is OK\n"); 5058 } else { 5059 uobj = pg->uobject; 5060 if (uobj) { 5061 (*pr)(" checking object list\n"); 5062 TAILQ_FOREACH(tpg, &uobj->memq, listq.queue) { 5063 if (tpg == pg) { 5064 break; 5065 } 5066 } 5067 if (tpg) 5068 (*pr)(" page found on object list\n"); 5069 else 5070 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 5071 } 5072 } 5073 } 5074 5075 /* cross-verify page queue */ 5076 if (pg->pqflags & PQ_FREE) { 5077 int fl = uvm_page_lookup_freelist(pg); 5078 int color = VM_PGCOLOR_BUCKET(pg); 5079 pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[ 5080 ((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN]; 5081 } else { 5082 pgl = NULL; 5083 } 5084 5085 if (pgl) { 5086 (*pr)(" checking pageq list\n"); 5087 LIST_FOREACH(tpg, pgl, pageq.list) { 5088 if (tpg == pg) { 5089 break; 5090 } 5091 } 5092 if (tpg) 5093 (*pr)(" page found on pageq list\n"); 5094 else 5095 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 5096 } 5097 } 5098 5099 /* 5100 * uvm_pages_printthem - print a summary of all managed pages 5101 */ 5102 5103 void 5104 uvm_page_printall(void (*pr)(const char *, ...)) 5105 { 5106 unsigned i; 5107 struct vm_page *pg; 5108 5109 (*pr)("%18s %4s %4s %18s %18s" 5110 #ifdef UVM_PAGE_TRKOWN 5111 " OWNER" 5112 #endif 5113 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON"); 5114 for (i = 0; i < vm_nphysseg; i++) { 5115 for (pg = vm_physmem[i].pgs; pg <= vm_physmem[i].lastpg; pg++) { 5116 (*pr)("%18p %04x %04x %18p %18p", 5117 pg, pg->flags, pg->pqflags, pg->uobject, 5118 pg->uanon); 5119 #ifdef UVM_PAGE_TRKOWN 5120 if (pg->flags & PG_BUSY) 5121 (*pr)(" %d [%s]", pg->owner, pg->owner_tag); 5122 #endif 5123 (*pr)("\n"); 5124 } 5125 } 5126 } 5127 5128 #endif 5129 5130 /* 5131 * uvm_map_create: create map 5132 */ 5133 5134 struct vm_map * 5135 uvm_map_create(pmap_t pmap, vaddr_t vmin, vaddr_t vmax, int flags) 5136 { 5137 struct vm_map *result; 5138 5139 result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK); 5140 uvm_map_setup(result, vmin, vmax, flags); 5141 result->pmap = pmap; 5142 return(result); 5143 } 5144 5145 /* 5146 * uvm_map_setup: init map 5147 * 5148 * => map must not be in service yet. 5149 */ 5150 5151 void 5152 uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags) 5153 { 5154 int ipl; 5155 5156 rb_tree_init(&map->rb_tree, &uvm_map_tree_ops); 5157 map->header.next = map->header.prev = &map->header; 5158 map->nentries = 0; 5159 map->size = 0; 5160 map->ref_count = 1; 5161 vm_map_setmin(map, vmin); 5162 vm_map_setmax(map, vmax); 5163 map->flags = flags; 5164 map->first_free = &map->header; 5165 map->hint = &map->header; 5166 map->timestamp = 0; 5167 map->busy = NULL; 5168 5169 if ((flags & VM_MAP_INTRSAFE) != 0) { 5170 ipl = IPL_VM; 5171 } else { 5172 ipl = IPL_NONE; 5173 } 5174 5175 rw_init(&map->lock); 5176 cv_init(&map->cv, "vm_map"); 5177 mutex_init(&map->misc_lock, MUTEX_DRIVER, ipl); 5178 mutex_init(&map->mutex, MUTEX_DRIVER, ipl); 5179 } 5180 5181 5182 /* 5183 * U N M A P - m a i n e n t r y p o i n t 5184 */ 5185 5186 /* 5187 * uvm_unmap1: remove mappings from a vm_map (from "start" up to "stop") 5188 * 5189 * => caller must check alignment and size 5190 * => map must be unlocked (we will lock it) 5191 * => flags is UVM_FLAG_QUANTUM or 0. 5192 */ 5193 5194 void 5195 uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 5196 { 5197 struct vm_map_entry *dead_entries; 5198 struct uvm_mapent_reservation umr; 5199 UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist); 5200 5201 UVMHIST_LOG(maphist, " (map=0x%x, start=0x%x, end=0x%x)", 5202 map, start, end, 0); 5203 if (map == kernel_map) { 5204 LOCKDEBUG_MEM_CHECK((void *)start, end - start); 5205 } 5206 /* 5207 * work now done by helper functions. wipe the pmap's and then 5208 * detach from the dead entries... 5209 */ 5210 uvm_mapent_reserve(map, &umr, 2, flags); 5211 vm_map_lock(map); 5212 uvm_unmap_remove(map, start, end, &dead_entries, &umr, flags); 5213 vm_map_unlock(map); 5214 uvm_mapent_unreserve(map, &umr); 5215 5216 if (dead_entries != NULL) 5217 uvm_unmap_detach(dead_entries, 0); 5218 5219 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 5220 } 5221 5222 5223 /* 5224 * uvm_map_reference: add reference to a map 5225 * 5226 * => map need not be locked (we use misc_lock). 5227 */ 5228 5229 void 5230 uvm_map_reference(struct vm_map *map) 5231 { 5232 mutex_enter(&map->misc_lock); 5233 map->ref_count++; 5234 mutex_exit(&map->misc_lock); 5235 } 5236 5237 struct vm_map_kernel * 5238 vm_map_to_kernel(struct vm_map *map) 5239 { 5240 5241 KASSERT(VM_MAP_IS_KERNEL(map)); 5242 5243 return (struct vm_map_kernel *)map; 5244 } 5245 5246 bool 5247 vm_map_starved_p(struct vm_map *map) 5248 { 5249 5250 if ((map->flags & VM_MAP_WANTVA) != 0) { 5251 return true; 5252 } 5253 /* XXX */ 5254 if ((vm_map_max(map) - vm_map_min(map)) / 16 * 15 < map->size) { 5255 return true; 5256 } 5257 return false; 5258 } 5259 5260 #if defined(DDB) 5261 void 5262 uvm_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 5263 { 5264 struct vm_map *map; 5265 5266 for (map = kernel_map;;) { 5267 struct vm_map_entry *entry; 5268 5269 if (!uvm_map_lookup_entry_bytree(map, (vaddr_t)addr, &entry)) { 5270 break; 5271 } 5272 (*pr)("%p is %p+%zu from VMMAP %p\n", 5273 (void *)addr, (void *)entry->start, 5274 (size_t)(addr - (uintptr_t)entry->start), map); 5275 if (!UVM_ET_ISSUBMAP(entry)) { 5276 break; 5277 } 5278 map = entry->object.sub_map; 5279 } 5280 } 5281 #endif /* defined(DDB) */ 5282