1 /* $NetBSD: uvm_map.c,v 1.316 2012/03/13 18:41:15 elad Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 37 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 38 * 39 * 40 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 41 * All rights reserved. 42 * 43 * Permission to use, copy, modify and distribute this software and 44 * its documentation is hereby granted, provided that both the copyright 45 * notice and this permission notice appear in all copies of the 46 * software, derivative works or modified versions, and any portions 47 * thereof, and that both notices appear in supporting documentation. 48 * 49 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 50 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 51 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 52 * 53 * Carnegie Mellon requests users of this software to return to 54 * 55 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 56 * School of Computer Science 57 * Carnegie Mellon University 58 * Pittsburgh PA 15213-3890 59 * 60 * any improvements or extensions that they make and grant Carnegie the 61 * rights to redistribute these changes. 62 */ 63 64 /* 65 * uvm_map.c: uvm map operations 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.316 2012/03/13 18:41:15 elad Exp $"); 70 71 #include "opt_ddb.h" 72 #include "opt_uvmhist.h" 73 #include "opt_uvm.h" 74 #include "opt_sysv.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/mman.h> 79 #include <sys/proc.h> 80 #include <sys/pool.h> 81 #include <sys/kernel.h> 82 #include <sys/mount.h> 83 #include <sys/vnode.h> 84 #include <sys/lockdebug.h> 85 #include <sys/atomic.h> 86 #ifndef __USER_VA0_IS_SAFE 87 #include <sys/sysctl.h> 88 #include <sys/kauth.h> 89 #include "opt_user_va0_disable_default.h" 90 #endif 91 92 #ifdef SYSVSHM 93 #include <sys/shm.h> 94 #endif 95 96 #include <uvm/uvm.h> 97 #include <uvm/uvm_readahead.h> 98 99 #if defined(DDB) || defined(DEBUGPRINT) 100 #include <uvm/uvm_ddb.h> 101 #endif 102 103 #if !defined(UVMMAP_COUNTERS) 104 105 #define UVMMAP_EVCNT_DEFINE(name) /* nothing */ 106 #define UVMMAP_EVCNT_INCR(ev) /* nothing */ 107 #define UVMMAP_EVCNT_DECR(ev) /* nothing */ 108 109 #else /* defined(UVMMAP_NOCOUNTERS) */ 110 111 #include <sys/evcnt.h> 112 #define UVMMAP_EVCNT_DEFINE(name) \ 113 struct evcnt uvmmap_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 114 "uvmmap", #name); \ 115 EVCNT_ATTACH_STATIC(uvmmap_evcnt_##name); 116 #define UVMMAP_EVCNT_INCR(ev) uvmmap_evcnt_##ev.ev_count++ 117 #define UVMMAP_EVCNT_DECR(ev) uvmmap_evcnt_##ev.ev_count-- 118 119 #endif /* defined(UVMMAP_NOCOUNTERS) */ 120 121 UVMMAP_EVCNT_DEFINE(ubackmerge) 122 UVMMAP_EVCNT_DEFINE(uforwmerge) 123 UVMMAP_EVCNT_DEFINE(ubimerge) 124 UVMMAP_EVCNT_DEFINE(unomerge) 125 UVMMAP_EVCNT_DEFINE(kbackmerge) 126 UVMMAP_EVCNT_DEFINE(kforwmerge) 127 UVMMAP_EVCNT_DEFINE(kbimerge) 128 UVMMAP_EVCNT_DEFINE(knomerge) 129 UVMMAP_EVCNT_DEFINE(map_call) 130 UVMMAP_EVCNT_DEFINE(mlk_call) 131 UVMMAP_EVCNT_DEFINE(mlk_hint) 132 UVMMAP_EVCNT_DEFINE(mlk_list) 133 UVMMAP_EVCNT_DEFINE(mlk_tree) 134 UVMMAP_EVCNT_DEFINE(mlk_treeloop) 135 UVMMAP_EVCNT_DEFINE(mlk_listloop) 136 137 const char vmmapbsy[] = "vmmapbsy"; 138 139 /* 140 * cache for vmspace structures. 141 */ 142 143 static struct pool_cache uvm_vmspace_cache; 144 145 /* 146 * cache for dynamically-allocated map entries. 147 */ 148 149 static struct pool_cache uvm_map_entry_cache; 150 151 #ifdef PMAP_GROWKERNEL 152 /* 153 * This global represents the end of the kernel virtual address 154 * space. If we want to exceed this, we must grow the kernel 155 * virtual address space dynamically. 156 * 157 * Note, this variable is locked by kernel_map's lock. 158 */ 159 vaddr_t uvm_maxkaddr; 160 #endif 161 162 #ifndef __USER_VA0_IS_SAFE 163 #ifndef __USER_VA0_DISABLE_DEFAULT 164 #define __USER_VA0_DISABLE_DEFAULT 1 165 #endif 166 #ifdef USER_VA0_DISABLE_DEFAULT /* kernel config option overrides */ 167 #undef __USER_VA0_DISABLE_DEFAULT 168 #define __USER_VA0_DISABLE_DEFAULT USER_VA0_DISABLE_DEFAULT 169 #endif 170 static int user_va0_disable = __USER_VA0_DISABLE_DEFAULT; 171 #endif 172 173 /* 174 * macros 175 */ 176 177 /* 178 * UVM_ET_ISCOMPATIBLE: check some requirements for map entry merging 179 */ 180 extern struct vm_map *pager_map; 181 182 #define UVM_ET_ISCOMPATIBLE(ent, type, uobj, meflags, \ 183 prot, maxprot, inh, adv, wire) \ 184 ((ent)->etype == (type) && \ 185 (((ent)->flags ^ (meflags)) & (UVM_MAP_NOMERGE)) == 0 && \ 186 (ent)->object.uvm_obj == (uobj) && \ 187 (ent)->protection == (prot) && \ 188 (ent)->max_protection == (maxprot) && \ 189 (ent)->inheritance == (inh) && \ 190 (ent)->advice == (adv) && \ 191 (ent)->wired_count == (wire)) 192 193 /* 194 * uvm_map_entry_link: insert entry into a map 195 * 196 * => map must be locked 197 */ 198 #define uvm_map_entry_link(map, after_where, entry) do { \ 199 uvm_mapent_check(entry); \ 200 (map)->nentries++; \ 201 (entry)->prev = (after_where); \ 202 (entry)->next = (after_where)->next; \ 203 (entry)->prev->next = (entry); \ 204 (entry)->next->prev = (entry); \ 205 uvm_rb_insert((map), (entry)); \ 206 } while (/*CONSTCOND*/ 0) 207 208 /* 209 * uvm_map_entry_unlink: remove entry from a map 210 * 211 * => map must be locked 212 */ 213 #define uvm_map_entry_unlink(map, entry) do { \ 214 KASSERT((entry) != (map)->first_free); \ 215 KASSERT((entry) != (map)->hint); \ 216 uvm_mapent_check(entry); \ 217 (map)->nentries--; \ 218 (entry)->next->prev = (entry)->prev; \ 219 (entry)->prev->next = (entry)->next; \ 220 uvm_rb_remove((map), (entry)); \ 221 } while (/*CONSTCOND*/ 0) 222 223 /* 224 * SAVE_HINT: saves the specified entry as the hint for future lookups. 225 * 226 * => map need not be locked. 227 */ 228 #define SAVE_HINT(map, check, value) do { \ 229 if ((map)->hint == (check)) \ 230 (map)->hint = (value); \ 231 } while (/*CONSTCOND*/ 0) 232 233 /* 234 * clear_hints: ensure that hints don't point to the entry. 235 * 236 * => map must be write-locked. 237 */ 238 static void 239 clear_hints(struct vm_map *map, struct vm_map_entry *ent) 240 { 241 242 SAVE_HINT(map, ent, ent->prev); 243 if (map->first_free == ent) { 244 map->first_free = ent->prev; 245 } 246 } 247 248 /* 249 * VM_MAP_RANGE_CHECK: check and correct range 250 * 251 * => map must at least be read locked 252 */ 253 254 #define VM_MAP_RANGE_CHECK(map, start, end) do { \ 255 if (start < vm_map_min(map)) \ 256 start = vm_map_min(map); \ 257 if (end > vm_map_max(map)) \ 258 end = vm_map_max(map); \ 259 if (start > end) \ 260 start = end; \ 261 } while (/*CONSTCOND*/ 0) 262 263 /* 264 * local prototypes 265 */ 266 267 static struct vm_map_entry * 268 uvm_mapent_alloc(struct vm_map *, int); 269 static void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); 270 static void uvm_mapent_free(struct vm_map_entry *); 271 #if defined(DEBUG) 272 static void _uvm_mapent_check(const struct vm_map_entry *, const char *, 273 int); 274 #define uvm_mapent_check(map) _uvm_mapent_check(map, __FILE__, __LINE__) 275 #else /* defined(DEBUG) */ 276 #define uvm_mapent_check(e) /* nothing */ 277 #endif /* defined(DEBUG) */ 278 279 static void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); 280 static void uvm_map_reference_amap(struct vm_map_entry *, int); 281 static int uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int, 282 int, struct vm_map_entry *); 283 static void uvm_map_unreference_amap(struct vm_map_entry *, int); 284 285 int _uvm_map_sanity(struct vm_map *); 286 int _uvm_tree_sanity(struct vm_map *); 287 static vsize_t uvm_rb_maxgap(const struct vm_map_entry *); 288 289 #define ROOT_ENTRY(map) ((struct vm_map_entry *)(map)->rb_tree.rbt_root) 290 #define LEFT_ENTRY(entry) ((struct vm_map_entry *)(entry)->rb_node.rb_left) 291 #define RIGHT_ENTRY(entry) ((struct vm_map_entry *)(entry)->rb_node.rb_right) 292 #define PARENT_ENTRY(map, entry) \ 293 (ROOT_ENTRY(map) == (entry) \ 294 ? NULL : (struct vm_map_entry *)RB_FATHER(&(entry)->rb_node)) 295 296 static int 297 uvm_map_compare_nodes(void *ctx, const void *nparent, const void *nkey) 298 { 299 const struct vm_map_entry *eparent = nparent; 300 const struct vm_map_entry *ekey = nkey; 301 302 KASSERT(eparent->start < ekey->start || eparent->start >= ekey->end); 303 KASSERT(ekey->start < eparent->start || ekey->start >= eparent->end); 304 305 if (eparent->start < ekey->start) 306 return -1; 307 if (eparent->end >= ekey->start) 308 return 1; 309 return 0; 310 } 311 312 static int 313 uvm_map_compare_key(void *ctx, const void *nparent, const void *vkey) 314 { 315 const struct vm_map_entry *eparent = nparent; 316 const vaddr_t va = *(const vaddr_t *) vkey; 317 318 if (eparent->start < va) 319 return -1; 320 if (eparent->end >= va) 321 return 1; 322 return 0; 323 } 324 325 static const rb_tree_ops_t uvm_map_tree_ops = { 326 .rbto_compare_nodes = uvm_map_compare_nodes, 327 .rbto_compare_key = uvm_map_compare_key, 328 .rbto_node_offset = offsetof(struct vm_map_entry, rb_node), 329 .rbto_context = NULL 330 }; 331 332 /* 333 * uvm_rb_gap: return the gap size between our entry and next entry. 334 */ 335 static inline vsize_t 336 uvm_rb_gap(const struct vm_map_entry *entry) 337 { 338 339 KASSERT(entry->next != NULL); 340 return entry->next->start - entry->end; 341 } 342 343 static vsize_t 344 uvm_rb_maxgap(const struct vm_map_entry *entry) 345 { 346 struct vm_map_entry *child; 347 vsize_t maxgap = entry->gap; 348 349 /* 350 * We need maxgap to be the largest gap of us or any of our 351 * descendents. Since each of our children's maxgap is the 352 * cached value of their largest gap of themselves or their 353 * descendents, we can just use that value and avoid recursing 354 * down the tree to calculate it. 355 */ 356 if ((child = LEFT_ENTRY(entry)) != NULL && maxgap < child->maxgap) 357 maxgap = child->maxgap; 358 359 if ((child = RIGHT_ENTRY(entry)) != NULL && maxgap < child->maxgap) 360 maxgap = child->maxgap; 361 362 return maxgap; 363 } 364 365 static void 366 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) 367 { 368 struct vm_map_entry *parent; 369 370 KASSERT(entry->gap == uvm_rb_gap(entry)); 371 entry->maxgap = uvm_rb_maxgap(entry); 372 373 while ((parent = PARENT_ENTRY(map, entry)) != NULL) { 374 struct vm_map_entry *brother; 375 vsize_t maxgap = parent->gap; 376 unsigned int which; 377 378 KDASSERT(parent->gap == uvm_rb_gap(parent)); 379 if (maxgap < entry->maxgap) 380 maxgap = entry->maxgap; 381 /* 382 * Since we work towards the root, we know entry's maxgap 383 * value is OK, but its brothers may now be out-of-date due 384 * to rebalancing. So refresh it. 385 */ 386 which = RB_POSITION(&entry->rb_node) ^ RB_DIR_OTHER; 387 brother = (struct vm_map_entry *)parent->rb_node.rb_nodes[which]; 388 if (brother != NULL) { 389 KDASSERT(brother->gap == uvm_rb_gap(brother)); 390 brother->maxgap = uvm_rb_maxgap(brother); 391 if (maxgap < brother->maxgap) 392 maxgap = brother->maxgap; 393 } 394 395 parent->maxgap = maxgap; 396 entry = parent; 397 } 398 } 399 400 static void 401 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) 402 { 403 struct vm_map_entry *ret; 404 405 entry->gap = entry->maxgap = uvm_rb_gap(entry); 406 if (entry->prev != &map->header) 407 entry->prev->gap = uvm_rb_gap(entry->prev); 408 409 ret = rb_tree_insert_node(&map->rb_tree, entry); 410 KASSERTMSG(ret == entry, 411 "uvm_rb_insert: map %p: duplicate entry %p", map, ret); 412 413 /* 414 * If the previous entry is not our immediate left child, then it's an 415 * ancestor and will be fixed up on the way to the root. We don't 416 * have to check entry->prev against &map->header since &map->header 417 * will never be in the tree. 418 */ 419 uvm_rb_fixup(map, 420 LEFT_ENTRY(entry) == entry->prev ? entry->prev : entry); 421 } 422 423 static void 424 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) 425 { 426 struct vm_map_entry *prev_parent = NULL, *next_parent = NULL; 427 428 /* 429 * If we are removing an interior node, then an adjacent node will 430 * be used to replace its position in the tree. Therefore we will 431 * need to fixup the tree starting at the parent of the replacement 432 * node. So record their parents for later use. 433 */ 434 if (entry->prev != &map->header) 435 prev_parent = PARENT_ENTRY(map, entry->prev); 436 if (entry->next != &map->header) 437 next_parent = PARENT_ENTRY(map, entry->next); 438 439 rb_tree_remove_node(&map->rb_tree, entry); 440 441 /* 442 * If the previous node has a new parent, fixup the tree starting 443 * at the previous node's old parent. 444 */ 445 if (entry->prev != &map->header) { 446 /* 447 * Update the previous entry's gap due to our absence. 448 */ 449 entry->prev->gap = uvm_rb_gap(entry->prev); 450 uvm_rb_fixup(map, entry->prev); 451 if (prev_parent != NULL 452 && prev_parent != entry 453 && prev_parent != PARENT_ENTRY(map, entry->prev)) 454 uvm_rb_fixup(map, prev_parent); 455 } 456 457 /* 458 * If the next node has a new parent, fixup the tree starting 459 * at the next node's old parent. 460 */ 461 if (entry->next != &map->header) { 462 uvm_rb_fixup(map, entry->next); 463 if (next_parent != NULL 464 && next_parent != entry 465 && next_parent != PARENT_ENTRY(map, entry->next)) 466 uvm_rb_fixup(map, next_parent); 467 } 468 } 469 470 #if defined(DEBUG) 471 int uvm_debug_check_map = 0; 472 int uvm_debug_check_rbtree = 0; 473 #define uvm_map_check(map, name) \ 474 _uvm_map_check((map), (name), __FILE__, __LINE__) 475 static void 476 _uvm_map_check(struct vm_map *map, const char *name, 477 const char *file, int line) 478 { 479 480 if ((uvm_debug_check_map && _uvm_map_sanity(map)) || 481 (uvm_debug_check_rbtree && _uvm_tree_sanity(map))) { 482 panic("uvm_map_check failed: \"%s\" map=%p (%s:%d)", 483 name, map, file, line); 484 } 485 } 486 #else /* defined(DEBUG) */ 487 #define uvm_map_check(map, name) /* nothing */ 488 #endif /* defined(DEBUG) */ 489 490 #if defined(DEBUG) || defined(DDB) 491 int 492 _uvm_map_sanity(struct vm_map *map) 493 { 494 bool first_free_found = false; 495 bool hint_found = false; 496 const struct vm_map_entry *e; 497 struct vm_map_entry *hint = map->hint; 498 499 e = &map->header; 500 for (;;) { 501 if (map->first_free == e) { 502 first_free_found = true; 503 } else if (!first_free_found && e->next->start > e->end) { 504 printf("first_free %p should be %p\n", 505 map->first_free, e); 506 return -1; 507 } 508 if (hint == e) { 509 hint_found = true; 510 } 511 512 e = e->next; 513 if (e == &map->header) { 514 break; 515 } 516 } 517 if (!first_free_found) { 518 printf("stale first_free\n"); 519 return -1; 520 } 521 if (!hint_found) { 522 printf("stale hint\n"); 523 return -1; 524 } 525 return 0; 526 } 527 528 int 529 _uvm_tree_sanity(struct vm_map *map) 530 { 531 struct vm_map_entry *tmp, *trtmp; 532 int n = 0, i = 1; 533 534 for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { 535 if (tmp->gap != uvm_rb_gap(tmp)) { 536 printf("%d/%d gap %lx != %lx %s\n", 537 n + 1, map->nentries, 538 (ulong)tmp->gap, (ulong)uvm_rb_gap(tmp), 539 tmp->next == &map->header ? "(last)" : ""); 540 goto error; 541 } 542 /* 543 * If any entries are out of order, tmp->gap will be unsigned 544 * and will likely exceed the size of the map. 545 */ 546 if (tmp->gap >= vm_map_max(map) - vm_map_min(map)) { 547 printf("too large gap %zu\n", (size_t)tmp->gap); 548 goto error; 549 } 550 n++; 551 } 552 553 if (n != map->nentries) { 554 printf("nentries: %d vs %d\n", n, map->nentries); 555 goto error; 556 } 557 558 trtmp = NULL; 559 for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { 560 if (tmp->maxgap != uvm_rb_maxgap(tmp)) { 561 printf("maxgap %lx != %lx\n", 562 (ulong)tmp->maxgap, 563 (ulong)uvm_rb_maxgap(tmp)); 564 goto error; 565 } 566 if (trtmp != NULL && trtmp->start >= tmp->start) { 567 printf("corrupt: 0x%"PRIxVADDR"x >= 0x%"PRIxVADDR"x\n", 568 trtmp->start, tmp->start); 569 goto error; 570 } 571 572 trtmp = tmp; 573 } 574 575 for (tmp = map->header.next; tmp != &map->header; 576 tmp = tmp->next, i++) { 577 trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_LEFT); 578 if (trtmp == NULL) 579 trtmp = &map->header; 580 if (tmp->prev != trtmp) { 581 printf("lookup: %d: %p->prev=%p: %p\n", 582 i, tmp, tmp->prev, trtmp); 583 goto error; 584 } 585 trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_RIGHT); 586 if (trtmp == NULL) 587 trtmp = &map->header; 588 if (tmp->next != trtmp) { 589 printf("lookup: %d: %p->next=%p: %p\n", 590 i, tmp, tmp->next, trtmp); 591 goto error; 592 } 593 trtmp = rb_tree_find_node(&map->rb_tree, &tmp->start); 594 if (trtmp != tmp) { 595 printf("lookup: %d: %p - %p: %p\n", i, tmp, trtmp, 596 PARENT_ENTRY(map, tmp)); 597 goto error; 598 } 599 } 600 601 return (0); 602 error: 603 return (-1); 604 } 605 #endif /* defined(DEBUG) || defined(DDB) */ 606 607 /* 608 * vm_map_lock: acquire an exclusive (write) lock on a map. 609 * 610 * => The locking protocol provides for guaranteed upgrade from shared -> 611 * exclusive by whichever thread currently has the map marked busy. 612 * See "LOCKING PROTOCOL NOTES" in uvm_map.h. This is horrible; among 613 * other problems, it defeats any fairness guarantees provided by RW 614 * locks. 615 */ 616 617 void 618 vm_map_lock(struct vm_map *map) 619 { 620 621 for (;;) { 622 rw_enter(&map->lock, RW_WRITER); 623 if (map->busy == NULL || map->busy == curlwp) { 624 break; 625 } 626 mutex_enter(&map->misc_lock); 627 rw_exit(&map->lock); 628 if (map->busy != NULL) { 629 cv_wait(&map->cv, &map->misc_lock); 630 } 631 mutex_exit(&map->misc_lock); 632 } 633 map->timestamp++; 634 } 635 636 /* 637 * vm_map_lock_try: try to lock a map, failing if it is already locked. 638 */ 639 640 bool 641 vm_map_lock_try(struct vm_map *map) 642 { 643 644 if (!rw_tryenter(&map->lock, RW_WRITER)) { 645 return false; 646 } 647 if (map->busy != NULL) { 648 rw_exit(&map->lock); 649 return false; 650 } 651 map->timestamp++; 652 return true; 653 } 654 655 /* 656 * vm_map_unlock: release an exclusive lock on a map. 657 */ 658 659 void 660 vm_map_unlock(struct vm_map *map) 661 { 662 663 KASSERT(rw_write_held(&map->lock)); 664 KASSERT(map->busy == NULL || map->busy == curlwp); 665 rw_exit(&map->lock); 666 } 667 668 /* 669 * vm_map_unbusy: mark the map as unbusy, and wake any waiters that 670 * want an exclusive lock. 671 */ 672 673 void 674 vm_map_unbusy(struct vm_map *map) 675 { 676 677 KASSERT(map->busy == curlwp); 678 679 /* 680 * Safe to clear 'busy' and 'waiters' with only a read lock held: 681 * 682 * o they can only be set with a write lock held 683 * o writers are blocked out with a read or write hold 684 * o at any time, only one thread owns the set of values 685 */ 686 mutex_enter(&map->misc_lock); 687 map->busy = NULL; 688 cv_broadcast(&map->cv); 689 mutex_exit(&map->misc_lock); 690 } 691 692 /* 693 * vm_map_lock_read: acquire a shared (read) lock on a map. 694 */ 695 696 void 697 vm_map_lock_read(struct vm_map *map) 698 { 699 700 rw_enter(&map->lock, RW_READER); 701 } 702 703 /* 704 * vm_map_unlock_read: release a shared lock on a map. 705 */ 706 707 void 708 vm_map_unlock_read(struct vm_map *map) 709 { 710 711 rw_exit(&map->lock); 712 } 713 714 /* 715 * vm_map_busy: mark a map as busy. 716 * 717 * => the caller must hold the map write locked 718 */ 719 720 void 721 vm_map_busy(struct vm_map *map) 722 { 723 724 KASSERT(rw_write_held(&map->lock)); 725 KASSERT(map->busy == NULL); 726 727 map->busy = curlwp; 728 } 729 730 /* 731 * vm_map_locked_p: return true if the map is write locked. 732 * 733 * => only for debug purposes like KASSERTs. 734 * => should not be used to verify that a map is not locked. 735 */ 736 737 bool 738 vm_map_locked_p(struct vm_map *map) 739 { 740 741 return rw_write_held(&map->lock); 742 } 743 744 /* 745 * uvm_mapent_alloc: allocate a map entry 746 */ 747 748 static struct vm_map_entry * 749 uvm_mapent_alloc(struct vm_map *map, int flags) 750 { 751 struct vm_map_entry *me; 752 int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK; 753 UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist); 754 755 me = pool_cache_get(&uvm_map_entry_cache, pflags); 756 if (__predict_false(me == NULL)) { 757 return NULL; 758 } 759 me->flags = 0; 760 761 UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", me, 762 (map == kernel_map), 0, 0); 763 return me; 764 } 765 766 /* 767 * uvm_mapent_free: free map entry 768 */ 769 770 static void 771 uvm_mapent_free(struct vm_map_entry *me) 772 { 773 UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist); 774 775 UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]", 776 me, me->flags, 0, 0); 777 pool_cache_put(&uvm_map_entry_cache, me); 778 } 779 780 /* 781 * uvm_mapent_copy: copy a map entry, preserving flags 782 */ 783 784 static inline void 785 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 786 { 787 788 memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - 789 ((char *)src)); 790 } 791 792 #if defined(DEBUG) 793 static void 794 _uvm_mapent_check(const struct vm_map_entry *entry, const char *file, int line) 795 { 796 797 if (entry->start >= entry->end) { 798 goto bad; 799 } 800 if (UVM_ET_ISOBJ(entry)) { 801 if (entry->object.uvm_obj == NULL) { 802 goto bad; 803 } 804 } else if (UVM_ET_ISSUBMAP(entry)) { 805 if (entry->object.sub_map == NULL) { 806 goto bad; 807 } 808 } else { 809 if (entry->object.uvm_obj != NULL || 810 entry->object.sub_map != NULL) { 811 goto bad; 812 } 813 } 814 if (!UVM_ET_ISOBJ(entry)) { 815 if (entry->offset != 0) { 816 goto bad; 817 } 818 } 819 820 return; 821 822 bad: 823 panic("%s: bad entry %p (%s:%d)", __func__, entry, file, line); 824 } 825 #endif /* defined(DEBUG) */ 826 827 /* 828 * uvm_map_entry_unwire: unwire a map entry 829 * 830 * => map should be locked by caller 831 */ 832 833 static inline void 834 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) 835 { 836 837 entry->wired_count = 0; 838 uvm_fault_unwire_locked(map, entry->start, entry->end); 839 } 840 841 842 /* 843 * wrapper for calling amap_ref() 844 */ 845 static inline void 846 uvm_map_reference_amap(struct vm_map_entry *entry, int flags) 847 { 848 849 amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, 850 (entry->end - entry->start) >> PAGE_SHIFT, flags); 851 } 852 853 854 /* 855 * wrapper for calling amap_unref() 856 */ 857 static inline void 858 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) 859 { 860 861 amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, 862 (entry->end - entry->start) >> PAGE_SHIFT, flags); 863 } 864 865 866 /* 867 * uvm_map_init: init mapping system at boot time. 868 */ 869 870 void 871 uvm_map_init(void) 872 { 873 #if defined(UVMHIST) 874 static struct kern_history_ent maphistbuf[100]; 875 static struct kern_history_ent pdhistbuf[100]; 876 #endif 877 878 /* 879 * first, init logging system. 880 */ 881 882 UVMHIST_FUNC("uvm_map_init"); 883 UVMHIST_INIT_STATIC(maphist, maphistbuf); 884 UVMHIST_INIT_STATIC(pdhist, pdhistbuf); 885 UVMHIST_CALLED(maphist); 886 UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); 887 888 /* 889 * initialize the global lock for kernel map entry. 890 */ 891 892 mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM); 893 } 894 895 /* 896 * uvm_map_init_caches: init mapping system caches. 897 */ 898 void 899 uvm_map_init_caches(void) 900 { 901 /* 902 * initialize caches. 903 */ 904 905 pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry), 906 0, 0, 0, "vmmpepl", NULL, IPL_NONE, NULL, NULL, NULL); 907 pool_cache_bootstrap(&uvm_vmspace_cache, sizeof(struct vmspace), 908 0, 0, 0, "vmsppl", NULL, IPL_NONE, NULL, NULL, NULL); 909 } 910 911 /* 912 * clippers 913 */ 914 915 /* 916 * uvm_mapent_splitadj: adjust map entries for splitting, after uvm_mapent_copy. 917 */ 918 919 static void 920 uvm_mapent_splitadj(struct vm_map_entry *entry1, struct vm_map_entry *entry2, 921 vaddr_t splitat) 922 { 923 vaddr_t adj; 924 925 KASSERT(entry1->start < splitat); 926 KASSERT(splitat < entry1->end); 927 928 adj = splitat - entry1->start; 929 entry1->end = entry2->start = splitat; 930 931 if (entry1->aref.ar_amap) { 932 amap_splitref(&entry1->aref, &entry2->aref, adj); 933 } 934 if (UVM_ET_ISSUBMAP(entry1)) { 935 /* ... unlikely to happen, but play it safe */ 936 uvm_map_reference(entry1->object.sub_map); 937 } else if (UVM_ET_ISOBJ(entry1)) { 938 KASSERT(entry1->object.uvm_obj != NULL); /* suppress coverity */ 939 entry2->offset += adj; 940 if (entry1->object.uvm_obj->pgops && 941 entry1->object.uvm_obj->pgops->pgo_reference) 942 entry1->object.uvm_obj->pgops->pgo_reference( 943 entry1->object.uvm_obj); 944 } 945 } 946 947 /* 948 * uvm_map_clip_start: ensure that the entry begins at or after 949 * the starting address, if it doesn't we split the entry. 950 * 951 * => caller should use UVM_MAP_CLIP_START macro rather than calling 952 * this directly 953 * => map must be locked by caller 954 */ 955 956 void 957 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, 958 vaddr_t start) 959 { 960 struct vm_map_entry *new_entry; 961 962 /* uvm_map_simplify_entry(map, entry); */ /* XXX */ 963 964 uvm_map_check(map, "clip_start entry"); 965 uvm_mapent_check(entry); 966 967 /* 968 * Split off the front portion. note that we must insert the new 969 * entry BEFORE this one, so that this entry has the specified 970 * starting address. 971 */ 972 new_entry = uvm_mapent_alloc(map, 0); 973 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 974 uvm_mapent_splitadj(new_entry, entry, start); 975 uvm_map_entry_link(map, entry->prev, new_entry); 976 977 uvm_map_check(map, "clip_start leave"); 978 } 979 980 /* 981 * uvm_map_clip_end: ensure that the entry ends at or before 982 * the ending address, if it does't we split the reference 983 * 984 * => caller should use UVM_MAP_CLIP_END macro rather than calling 985 * this directly 986 * => map must be locked by caller 987 */ 988 989 void 990 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end) 991 { 992 struct vm_map_entry *new_entry; 993 994 uvm_map_check(map, "clip_end entry"); 995 uvm_mapent_check(entry); 996 997 /* 998 * Create a new entry and insert it 999 * AFTER the specified entry 1000 */ 1001 new_entry = uvm_mapent_alloc(map, 0); 1002 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 1003 uvm_mapent_splitadj(entry, new_entry, end); 1004 uvm_map_entry_link(map, entry, new_entry); 1005 1006 uvm_map_check(map, "clip_end leave"); 1007 } 1008 1009 /* 1010 * M A P - m a i n e n t r y p o i n t 1011 */ 1012 /* 1013 * uvm_map: establish a valid mapping in a map 1014 * 1015 * => assume startp is page aligned. 1016 * => assume size is a multiple of PAGE_SIZE. 1017 * => assume sys_mmap provides enough of a "hint" to have us skip 1018 * over text/data/bss area. 1019 * => map must be unlocked (we will lock it) 1020 * => <uobj,uoffset> value meanings (4 cases): 1021 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1022 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1023 * [3] <uobj,uoffset> == normal mapping 1024 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1025 * 1026 * case [4] is for kernel mappings where we don't know the offset until 1027 * we've found a virtual address. note that kernel object offsets are 1028 * always relative to vm_map_min(kernel_map). 1029 * 1030 * => if `align' is non-zero, we align the virtual address to the specified 1031 * alignment. 1032 * this is provided as a mechanism for large pages. 1033 * 1034 * => XXXCDC: need way to map in external amap? 1035 */ 1036 1037 int 1038 uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size, 1039 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags) 1040 { 1041 struct uvm_map_args args; 1042 struct vm_map_entry *new_entry; 1043 int error; 1044 1045 KASSERT((size & PAGE_MASK) == 0); 1046 1047 #ifndef __USER_VA0_IS_SAFE 1048 if ((flags & UVM_FLAG_FIXED) && *startp == 0 && 1049 !VM_MAP_IS_KERNEL(map) && user_va0_disable) 1050 return EACCES; 1051 #endif 1052 1053 /* 1054 * for pager_map, allocate the new entry first to avoid sleeping 1055 * for memory while we have the map locked. 1056 */ 1057 1058 new_entry = NULL; 1059 if (map == pager_map) { 1060 new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT)); 1061 if (__predict_false(new_entry == NULL)) 1062 return ENOMEM; 1063 } 1064 if (map == pager_map) 1065 flags |= UVM_FLAG_NOMERGE; 1066 1067 error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align, 1068 flags, &args); 1069 if (!error) { 1070 error = uvm_map_enter(map, &args, new_entry); 1071 *startp = args.uma_start; 1072 } else if (new_entry) { 1073 uvm_mapent_free(new_entry); 1074 } 1075 1076 #if defined(DEBUG) 1077 if (!error && VM_MAP_IS_KERNEL(map)) { 1078 uvm_km_check_empty(map, *startp, *startp + size); 1079 } 1080 #endif /* defined(DEBUG) */ 1081 1082 return error; 1083 } 1084 1085 /* 1086 * uvm_map_prepare: 1087 * 1088 * called with map unlocked. 1089 * on success, returns the map locked. 1090 */ 1091 1092 int 1093 uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size, 1094 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, 1095 struct uvm_map_args *args) 1096 { 1097 struct vm_map_entry *prev_entry; 1098 vm_prot_t prot = UVM_PROTECTION(flags); 1099 vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1100 1101 UVMHIST_FUNC("uvm_map_prepare"); 1102 UVMHIST_CALLED(maphist); 1103 1104 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)", 1105 map, start, size, flags); 1106 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0); 1107 1108 /* 1109 * detect a popular device driver bug. 1110 */ 1111 1112 KASSERT(doing_shutdown || curlwp != NULL); 1113 1114 /* 1115 * zero-sized mapping doesn't make any sense. 1116 */ 1117 KASSERT(size > 0); 1118 1119 KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0); 1120 1121 uvm_map_check(map, "map entry"); 1122 1123 /* 1124 * check sanity of protection code 1125 */ 1126 1127 if ((prot & maxprot) != prot) { 1128 UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%x, max=0x%x", 1129 prot, maxprot,0,0); 1130 return EACCES; 1131 } 1132 1133 /* 1134 * figure out where to put new VM range 1135 */ 1136 retry: 1137 if (vm_map_lock_try(map) == false) { 1138 if ((flags & UVM_FLAG_TRYLOCK) != 0) { 1139 return EAGAIN; 1140 } 1141 vm_map_lock(map); /* could sleep here */ 1142 } 1143 prev_entry = uvm_map_findspace(map, start, size, &start, 1144 uobj, uoffset, align, flags); 1145 if (prev_entry == NULL) { 1146 unsigned int timestamp; 1147 1148 timestamp = map->timestamp; 1149 UVMHIST_LOG(maphist,"waiting va timestamp=0x%x", 1150 timestamp,0,0,0); 1151 map->flags |= VM_MAP_WANTVA; 1152 vm_map_unlock(map); 1153 1154 /* 1155 * try to reclaim kva and wait until someone does unmap. 1156 * fragile locking here, so we awaken every second to 1157 * recheck the condition. 1158 */ 1159 1160 mutex_enter(&map->misc_lock); 1161 while ((map->flags & VM_MAP_WANTVA) != 0 && 1162 map->timestamp == timestamp) { 1163 if ((flags & UVM_FLAG_WAITVA) == 0) { 1164 mutex_exit(&map->misc_lock); 1165 UVMHIST_LOG(maphist, 1166 "<- uvm_map_findspace failed!", 0,0,0,0); 1167 return ENOMEM; 1168 } else { 1169 cv_timedwait(&map->cv, &map->misc_lock, hz); 1170 } 1171 } 1172 mutex_exit(&map->misc_lock); 1173 goto retry; 1174 } 1175 1176 #ifdef PMAP_GROWKERNEL 1177 /* 1178 * If the kernel pmap can't map the requested space, 1179 * then allocate more resources for it. 1180 */ 1181 if (map == kernel_map && uvm_maxkaddr < (start + size)) 1182 uvm_maxkaddr = pmap_growkernel(start + size); 1183 #endif 1184 1185 UVMMAP_EVCNT_INCR(map_call); 1186 1187 /* 1188 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER 1189 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in 1190 * either case we want to zero it before storing it in the map entry 1191 * (because it looks strange and confusing when debugging...) 1192 * 1193 * if uobj is not null 1194 * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping 1195 * and we do not need to change uoffset. 1196 * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset 1197 * now (based on the starting address of the map). this case is 1198 * for kernel object mappings where we don't know the offset until 1199 * the virtual address is found (with uvm_map_findspace). the 1200 * offset is the distance we are from the start of the map. 1201 */ 1202 1203 if (uobj == NULL) { 1204 uoffset = 0; 1205 } else { 1206 if (uoffset == UVM_UNKNOWN_OFFSET) { 1207 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1208 uoffset = start - vm_map_min(kernel_map); 1209 } 1210 } 1211 1212 args->uma_flags = flags; 1213 args->uma_prev = prev_entry; 1214 args->uma_start = start; 1215 args->uma_size = size; 1216 args->uma_uobj = uobj; 1217 args->uma_uoffset = uoffset; 1218 1219 UVMHIST_LOG(maphist, "<- done!", 0,0,0,0); 1220 return 0; 1221 } 1222 1223 /* 1224 * uvm_map_enter: 1225 * 1226 * called with map locked. 1227 * unlock the map before returning. 1228 */ 1229 1230 int 1231 uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args, 1232 struct vm_map_entry *new_entry) 1233 { 1234 struct vm_map_entry *prev_entry = args->uma_prev; 1235 struct vm_map_entry *dead = NULL; 1236 1237 const uvm_flag_t flags = args->uma_flags; 1238 const vm_prot_t prot = UVM_PROTECTION(flags); 1239 const vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1240 const vm_inherit_t inherit = UVM_INHERIT(flags); 1241 const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ? 1242 AMAP_EXTEND_NOWAIT : 0; 1243 const int advice = UVM_ADVICE(flags); 1244 1245 vaddr_t start = args->uma_start; 1246 vsize_t size = args->uma_size; 1247 struct uvm_object *uobj = args->uma_uobj; 1248 voff_t uoffset = args->uma_uoffset; 1249 1250 const int kmap = (vm_map_pmap(map) == pmap_kernel()); 1251 int merged = 0; 1252 int error; 1253 int newetype; 1254 1255 UVMHIST_FUNC("uvm_map_enter"); 1256 UVMHIST_CALLED(maphist); 1257 1258 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)", 1259 map, start, size, flags); 1260 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0); 1261 1262 KASSERT(map->hint == prev_entry); /* bimerge case assumes this */ 1263 KASSERT(vm_map_locked_p(map)); 1264 1265 if (uobj) 1266 newetype = UVM_ET_OBJ; 1267 else 1268 newetype = 0; 1269 1270 if (flags & UVM_FLAG_COPYONW) { 1271 newetype |= UVM_ET_COPYONWRITE; 1272 if ((flags & UVM_FLAG_OVERLAY) == 0) 1273 newetype |= UVM_ET_NEEDSCOPY; 1274 } 1275 1276 /* 1277 * try and insert in map by extending previous entry, if possible. 1278 * XXX: we don't try and pull back the next entry. might be useful 1279 * for a stack, but we are currently allocating our stack in advance. 1280 */ 1281 1282 if (flags & UVM_FLAG_NOMERGE) 1283 goto nomerge; 1284 1285 if (prev_entry->end == start && 1286 prev_entry != &map->header && 1287 UVM_ET_ISCOMPATIBLE(prev_entry, newetype, uobj, 0, 1288 prot, maxprot, inherit, advice, 0)) { 1289 1290 if (uobj && prev_entry->offset + 1291 (prev_entry->end - prev_entry->start) != uoffset) 1292 goto forwardmerge; 1293 1294 /* 1295 * can't extend a shared amap. note: no need to lock amap to 1296 * look at refs since we don't care about its exact value. 1297 * if it is one (i.e. we have only reference) it will stay there 1298 */ 1299 1300 if (prev_entry->aref.ar_amap && 1301 amap_refs(prev_entry->aref.ar_amap) != 1) { 1302 goto forwardmerge; 1303 } 1304 1305 if (prev_entry->aref.ar_amap) { 1306 error = amap_extend(prev_entry, size, 1307 amapwaitflag | AMAP_EXTEND_FORWARDS); 1308 if (error) 1309 goto nomerge; 1310 } 1311 1312 if (kmap) { 1313 UVMMAP_EVCNT_INCR(kbackmerge); 1314 } else { 1315 UVMMAP_EVCNT_INCR(ubackmerge); 1316 } 1317 UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0); 1318 1319 /* 1320 * drop our reference to uobj since we are extending a reference 1321 * that we already have (the ref count can not drop to zero). 1322 */ 1323 1324 if (uobj && uobj->pgops->pgo_detach) 1325 uobj->pgops->pgo_detach(uobj); 1326 1327 /* 1328 * Now that we've merged the entries, note that we've grown 1329 * and our gap has shrunk. Then fix the tree. 1330 */ 1331 prev_entry->end += size; 1332 prev_entry->gap -= size; 1333 uvm_rb_fixup(map, prev_entry); 1334 1335 uvm_map_check(map, "map backmerged"); 1336 1337 UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); 1338 merged++; 1339 } 1340 1341 forwardmerge: 1342 if (prev_entry->next->start == (start + size) && 1343 prev_entry->next != &map->header && 1344 UVM_ET_ISCOMPATIBLE(prev_entry->next, newetype, uobj, 0, 1345 prot, maxprot, inherit, advice, 0)) { 1346 1347 if (uobj && prev_entry->next->offset != uoffset + size) 1348 goto nomerge; 1349 1350 /* 1351 * can't extend a shared amap. note: no need to lock amap to 1352 * look at refs since we don't care about its exact value. 1353 * if it is one (i.e. we have only reference) it will stay there. 1354 * 1355 * note that we also can't merge two amaps, so if we 1356 * merged with the previous entry which has an amap, 1357 * and the next entry also has an amap, we give up. 1358 * 1359 * Interesting cases: 1360 * amap, new, amap -> give up second merge (single fwd extend) 1361 * amap, new, none -> double forward extend (extend again here) 1362 * none, new, amap -> double backward extend (done here) 1363 * uobj, new, amap -> single backward extend (done here) 1364 * 1365 * XXX should we attempt to deal with someone refilling 1366 * the deallocated region between two entries that are 1367 * backed by the same amap (ie, arefs is 2, "prev" and 1368 * "next" refer to it, and adding this allocation will 1369 * close the hole, thus restoring arefs to 1 and 1370 * deallocating the "next" vm_map_entry)? -- @@@ 1371 */ 1372 1373 if (prev_entry->next->aref.ar_amap && 1374 (amap_refs(prev_entry->next->aref.ar_amap) != 1 || 1375 (merged && prev_entry->aref.ar_amap))) { 1376 goto nomerge; 1377 } 1378 1379 if (merged) { 1380 /* 1381 * Try to extend the amap of the previous entry to 1382 * cover the next entry as well. If it doesn't work 1383 * just skip on, don't actually give up, since we've 1384 * already completed the back merge. 1385 */ 1386 if (prev_entry->aref.ar_amap) { 1387 if (amap_extend(prev_entry, 1388 prev_entry->next->end - 1389 prev_entry->next->start, 1390 amapwaitflag | AMAP_EXTEND_FORWARDS)) 1391 goto nomerge; 1392 } 1393 1394 /* 1395 * Try to extend the amap of the *next* entry 1396 * back to cover the new allocation *and* the 1397 * previous entry as well (the previous merge 1398 * didn't have an amap already otherwise we 1399 * wouldn't be checking here for an amap). If 1400 * it doesn't work just skip on, again, don't 1401 * actually give up, since we've already 1402 * completed the back merge. 1403 */ 1404 else if (prev_entry->next->aref.ar_amap) { 1405 if (amap_extend(prev_entry->next, 1406 prev_entry->end - 1407 prev_entry->start, 1408 amapwaitflag | AMAP_EXTEND_BACKWARDS)) 1409 goto nomerge; 1410 } 1411 } else { 1412 /* 1413 * Pull the next entry's amap backwards to cover this 1414 * new allocation. 1415 */ 1416 if (prev_entry->next->aref.ar_amap) { 1417 error = amap_extend(prev_entry->next, size, 1418 amapwaitflag | AMAP_EXTEND_BACKWARDS); 1419 if (error) 1420 goto nomerge; 1421 } 1422 } 1423 1424 if (merged) { 1425 if (kmap) { 1426 UVMMAP_EVCNT_DECR(kbackmerge); 1427 UVMMAP_EVCNT_INCR(kbimerge); 1428 } else { 1429 UVMMAP_EVCNT_DECR(ubackmerge); 1430 UVMMAP_EVCNT_INCR(ubimerge); 1431 } 1432 } else { 1433 if (kmap) { 1434 UVMMAP_EVCNT_INCR(kforwmerge); 1435 } else { 1436 UVMMAP_EVCNT_INCR(uforwmerge); 1437 } 1438 } 1439 UVMHIST_LOG(maphist," starting forward merge", 0, 0, 0, 0); 1440 1441 /* 1442 * drop our reference to uobj since we are extending a reference 1443 * that we already have (the ref count can not drop to zero). 1444 * (if merged, we've already detached) 1445 */ 1446 if (uobj && uobj->pgops->pgo_detach && !merged) 1447 uobj->pgops->pgo_detach(uobj); 1448 1449 if (merged) { 1450 dead = prev_entry->next; 1451 prev_entry->end = dead->end; 1452 uvm_map_entry_unlink(map, dead); 1453 if (dead->aref.ar_amap != NULL) { 1454 prev_entry->aref = dead->aref; 1455 dead->aref.ar_amap = NULL; 1456 } 1457 } else { 1458 prev_entry->next->start -= size; 1459 if (prev_entry != &map->header) { 1460 prev_entry->gap -= size; 1461 KASSERT(prev_entry->gap == uvm_rb_gap(prev_entry)); 1462 uvm_rb_fixup(map, prev_entry); 1463 } 1464 if (uobj) 1465 prev_entry->next->offset = uoffset; 1466 } 1467 1468 uvm_map_check(map, "map forwardmerged"); 1469 1470 UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0); 1471 merged++; 1472 } 1473 1474 nomerge: 1475 if (!merged) { 1476 UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0); 1477 if (kmap) { 1478 UVMMAP_EVCNT_INCR(knomerge); 1479 } else { 1480 UVMMAP_EVCNT_INCR(unomerge); 1481 } 1482 1483 /* 1484 * allocate new entry and link it in. 1485 */ 1486 1487 if (new_entry == NULL) { 1488 new_entry = uvm_mapent_alloc(map, 1489 (flags & UVM_FLAG_NOWAIT)); 1490 if (__predict_false(new_entry == NULL)) { 1491 error = ENOMEM; 1492 goto done; 1493 } 1494 } 1495 new_entry->start = start; 1496 new_entry->end = new_entry->start + size; 1497 new_entry->object.uvm_obj = uobj; 1498 new_entry->offset = uoffset; 1499 1500 new_entry->etype = newetype; 1501 1502 if (flags & UVM_FLAG_NOMERGE) { 1503 new_entry->flags |= UVM_MAP_NOMERGE; 1504 } 1505 1506 new_entry->protection = prot; 1507 new_entry->max_protection = maxprot; 1508 new_entry->inheritance = inherit; 1509 new_entry->wired_count = 0; 1510 new_entry->advice = advice; 1511 if (flags & UVM_FLAG_OVERLAY) { 1512 1513 /* 1514 * to_add: for BSS we overallocate a little since we 1515 * are likely to extend 1516 */ 1517 1518 vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? 1519 UVM_AMAP_CHUNK << PAGE_SHIFT : 0; 1520 struct vm_amap *amap = amap_alloc(size, to_add, 1521 (flags & UVM_FLAG_NOWAIT)); 1522 if (__predict_false(amap == NULL)) { 1523 error = ENOMEM; 1524 goto done; 1525 } 1526 new_entry->aref.ar_pageoff = 0; 1527 new_entry->aref.ar_amap = amap; 1528 } else { 1529 new_entry->aref.ar_pageoff = 0; 1530 new_entry->aref.ar_amap = NULL; 1531 } 1532 uvm_map_entry_link(map, prev_entry, new_entry); 1533 1534 /* 1535 * Update the free space hint 1536 */ 1537 1538 if ((map->first_free == prev_entry) && 1539 (prev_entry->end >= new_entry->start)) 1540 map->first_free = new_entry; 1541 1542 new_entry = NULL; 1543 } 1544 1545 map->size += size; 1546 1547 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1548 1549 error = 0; 1550 done: 1551 vm_map_unlock(map); 1552 1553 if (new_entry) { 1554 uvm_mapent_free(new_entry); 1555 } 1556 1557 if (dead) { 1558 KDASSERT(merged); 1559 uvm_mapent_free(dead); 1560 } 1561 1562 return error; 1563 } 1564 1565 /* 1566 * uvm_map_lookup_entry_bytree: lookup an entry in tree 1567 */ 1568 1569 static inline bool 1570 uvm_map_lookup_entry_bytree(struct vm_map *map, vaddr_t address, 1571 struct vm_map_entry **entry /* OUT */) 1572 { 1573 struct vm_map_entry *prev = &map->header; 1574 struct vm_map_entry *cur = ROOT_ENTRY(map); 1575 1576 while (cur) { 1577 UVMMAP_EVCNT_INCR(mlk_treeloop); 1578 if (address >= cur->start) { 1579 if (address < cur->end) { 1580 *entry = cur; 1581 return true; 1582 } 1583 prev = cur; 1584 cur = RIGHT_ENTRY(cur); 1585 } else 1586 cur = LEFT_ENTRY(cur); 1587 } 1588 *entry = prev; 1589 return false; 1590 } 1591 1592 /* 1593 * uvm_map_lookup_entry: find map entry at or before an address 1594 * 1595 * => map must at least be read-locked by caller 1596 * => entry is returned in "entry" 1597 * => return value is true if address is in the returned entry 1598 */ 1599 1600 bool 1601 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1602 struct vm_map_entry **entry /* OUT */) 1603 { 1604 struct vm_map_entry *cur; 1605 bool use_tree = false; 1606 UVMHIST_FUNC("uvm_map_lookup_entry"); 1607 UVMHIST_CALLED(maphist); 1608 1609 UVMHIST_LOG(maphist,"(map=0x%x,addr=0x%x,ent=0x%x)", 1610 map, address, entry, 0); 1611 1612 /* 1613 * start looking either from the head of the 1614 * list, or from the hint. 1615 */ 1616 1617 cur = map->hint; 1618 1619 if (cur == &map->header) 1620 cur = cur->next; 1621 1622 UVMMAP_EVCNT_INCR(mlk_call); 1623 if (address >= cur->start) { 1624 1625 /* 1626 * go from hint to end of list. 1627 * 1628 * but first, make a quick check to see if 1629 * we are already looking at the entry we 1630 * want (which is usually the case). 1631 * note also that we don't need to save the hint 1632 * here... it is the same hint (unless we are 1633 * at the header, in which case the hint didn't 1634 * buy us anything anyway). 1635 */ 1636 1637 if (cur != &map->header && cur->end > address) { 1638 UVMMAP_EVCNT_INCR(mlk_hint); 1639 *entry = cur; 1640 UVMHIST_LOG(maphist,"<- got it via hint (0x%x)", 1641 cur, 0, 0, 0); 1642 uvm_mapent_check(*entry); 1643 return (true); 1644 } 1645 1646 if (map->nentries > 15) 1647 use_tree = true; 1648 } else { 1649 1650 /* 1651 * invalid hint. use tree. 1652 */ 1653 use_tree = true; 1654 } 1655 1656 uvm_map_check(map, __func__); 1657 1658 if (use_tree) { 1659 /* 1660 * Simple lookup in the tree. Happens when the hint is 1661 * invalid, or nentries reach a threshold. 1662 */ 1663 UVMMAP_EVCNT_INCR(mlk_tree); 1664 if (uvm_map_lookup_entry_bytree(map, address, entry)) { 1665 goto got; 1666 } else { 1667 goto failed; 1668 } 1669 } 1670 1671 /* 1672 * search linearly 1673 */ 1674 1675 UVMMAP_EVCNT_INCR(mlk_list); 1676 while (cur != &map->header) { 1677 UVMMAP_EVCNT_INCR(mlk_listloop); 1678 if (cur->end > address) { 1679 if (address >= cur->start) { 1680 /* 1681 * save this lookup for future 1682 * hints, and return 1683 */ 1684 1685 *entry = cur; 1686 got: 1687 SAVE_HINT(map, map->hint, *entry); 1688 UVMHIST_LOG(maphist,"<- search got it (0x%x)", 1689 cur, 0, 0, 0); 1690 KDASSERT((*entry)->start <= address); 1691 KDASSERT(address < (*entry)->end); 1692 uvm_mapent_check(*entry); 1693 return (true); 1694 } 1695 break; 1696 } 1697 cur = cur->next; 1698 } 1699 *entry = cur->prev; 1700 failed: 1701 SAVE_HINT(map, map->hint, *entry); 1702 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); 1703 KDASSERT((*entry) == &map->header || (*entry)->end <= address); 1704 KDASSERT((*entry)->next == &map->header || 1705 address < (*entry)->next->start); 1706 return (false); 1707 } 1708 1709 /* 1710 * See if the range between start and start + length fits in the gap 1711 * entry->next->start and entry->end. Returns 1 if fits, 0 if doesn't 1712 * fit, and -1 address wraps around. 1713 */ 1714 static int 1715 uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset, 1716 vsize_t align, int flags, int topdown, struct vm_map_entry *entry) 1717 { 1718 vaddr_t end; 1719 1720 #ifdef PMAP_PREFER 1721 /* 1722 * push start address forward as needed to avoid VAC alias problems. 1723 * we only do this if a valid offset is specified. 1724 */ 1725 1726 if (uoffset != UVM_UNKNOWN_OFFSET) 1727 PMAP_PREFER(uoffset, start, length, topdown); 1728 #endif 1729 if ((flags & UVM_FLAG_COLORMATCH) != 0) { 1730 KASSERT(align < uvmexp.ncolors); 1731 if (uvmexp.ncolors > 1) { 1732 const u_int colormask = uvmexp.colormask; 1733 const u_int colorsize = colormask + 1; 1734 vaddr_t hint = atop(*start); 1735 const u_int color = hint & colormask; 1736 if (color != align) { 1737 hint -= color; /* adjust to color boundary */ 1738 KASSERT((hint & colormask) == 0); 1739 if (topdown) { 1740 if (align > color) 1741 hint -= colorsize; 1742 } else { 1743 if (align < color) 1744 hint += colorsize; 1745 } 1746 *start = ptoa(hint + align); /* adjust to color */ 1747 } 1748 } 1749 } else if (align != 0) { 1750 if ((*start & (align - 1)) != 0) { 1751 if (topdown) 1752 *start &= ~(align - 1); 1753 else 1754 *start = roundup(*start, align); 1755 } 1756 /* 1757 * XXX Should we PMAP_PREFER() here again? 1758 * eh...i think we're okay 1759 */ 1760 } 1761 1762 /* 1763 * Find the end of the proposed new region. Be sure we didn't 1764 * wrap around the address; if so, we lose. Otherwise, if the 1765 * proposed new region fits before the next entry, we win. 1766 */ 1767 1768 end = *start + length; 1769 if (end < *start) 1770 return (-1); 1771 1772 if (entry->next->start >= end && *start >= entry->end) 1773 return (1); 1774 1775 return (0); 1776 } 1777 1778 /* 1779 * uvm_map_findspace: find "length" sized space in "map". 1780 * 1781 * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is 1782 * set in "flags" (in which case we insist on using "hint"). 1783 * => "result" is VA returned 1784 * => uobj/uoffset are to be used to handle VAC alignment, if required 1785 * => if "align" is non-zero, we attempt to align to that value. 1786 * => caller must at least have read-locked map 1787 * => returns NULL on failure, or pointer to prev. map entry if success 1788 * => note this is a cross between the old vm_map_findspace and vm_map_find 1789 */ 1790 1791 struct vm_map_entry * 1792 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, 1793 vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset, 1794 vsize_t align, int flags) 1795 { 1796 struct vm_map_entry *entry; 1797 struct vm_map_entry *child, *prev, *tmp; 1798 vaddr_t orig_hint; 1799 const int topdown = map->flags & VM_MAP_TOPDOWN; 1800 UVMHIST_FUNC("uvm_map_findspace"); 1801 UVMHIST_CALLED(maphist); 1802 1803 UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)", 1804 map, hint, length, flags); 1805 KASSERT((flags & UVM_FLAG_COLORMATCH) != 0 || (align & (align - 1)) == 0); 1806 KASSERT((flags & UVM_FLAG_COLORMATCH) == 0 || align < uvmexp.ncolors); 1807 KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); 1808 1809 uvm_map_check(map, "map_findspace entry"); 1810 1811 /* 1812 * remember the original hint. if we are aligning, then we 1813 * may have to try again with no alignment constraint if 1814 * we fail the first time. 1815 */ 1816 1817 orig_hint = hint; 1818 if (hint < vm_map_min(map)) { /* check ranges ... */ 1819 if (flags & UVM_FLAG_FIXED) { 1820 UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0); 1821 return (NULL); 1822 } 1823 hint = vm_map_min(map); 1824 } 1825 if (hint > vm_map_max(map)) { 1826 UVMHIST_LOG(maphist,"<- VA 0x%x > range [0x%x->0x%x]", 1827 hint, vm_map_min(map), vm_map_max(map), 0); 1828 return (NULL); 1829 } 1830 1831 /* 1832 * Look for the first possible address; if there's already 1833 * something at this address, we have to start after it. 1834 */ 1835 1836 /* 1837 * @@@: there are four, no, eight cases to consider. 1838 * 1839 * 0: found, fixed, bottom up -> fail 1840 * 1: found, fixed, top down -> fail 1841 * 2: found, not fixed, bottom up -> start after entry->end, 1842 * loop up 1843 * 3: found, not fixed, top down -> start before entry->start, 1844 * loop down 1845 * 4: not found, fixed, bottom up -> check entry->next->start, fail 1846 * 5: not found, fixed, top down -> check entry->next->start, fail 1847 * 6: not found, not fixed, bottom up -> check entry->next->start, 1848 * loop up 1849 * 7: not found, not fixed, top down -> check entry->next->start, 1850 * loop down 1851 * 1852 * as you can see, it reduces to roughly five cases, and that 1853 * adding top down mapping only adds one unique case (without 1854 * it, there would be four cases). 1855 */ 1856 1857 if ((flags & UVM_FLAG_FIXED) == 0 && hint == vm_map_min(map)) { 1858 entry = map->first_free; 1859 } else { 1860 if (uvm_map_lookup_entry(map, hint, &entry)) { 1861 /* "hint" address already in use ... */ 1862 if (flags & UVM_FLAG_FIXED) { 1863 UVMHIST_LOG(maphist, "<- fixed & VA in use", 1864 0, 0, 0, 0); 1865 return (NULL); 1866 } 1867 if (topdown) 1868 /* Start from lower gap. */ 1869 entry = entry->prev; 1870 } else if (flags & UVM_FLAG_FIXED) { 1871 if (entry->next->start >= hint + length && 1872 hint + length > hint) 1873 goto found; 1874 1875 /* "hint" address is gap but too small */ 1876 UVMHIST_LOG(maphist, "<- fixed mapping failed", 1877 0, 0, 0, 0); 1878 return (NULL); /* only one shot at it ... */ 1879 } else { 1880 /* 1881 * See if given hint fits in this gap. 1882 */ 1883 switch (uvm_map_space_avail(&hint, length, 1884 uoffset, align, flags, topdown, entry)) { 1885 case 1: 1886 goto found; 1887 case -1: 1888 goto wraparound; 1889 } 1890 1891 if (topdown) { 1892 /* 1893 * Still there is a chance to fit 1894 * if hint > entry->end. 1895 */ 1896 } else { 1897 /* Start from higher gap. */ 1898 entry = entry->next; 1899 if (entry == &map->header) 1900 goto notfound; 1901 goto nextgap; 1902 } 1903 } 1904 } 1905 1906 /* 1907 * Note that all UVM_FLAGS_FIXED case is already handled. 1908 */ 1909 KDASSERT((flags & UVM_FLAG_FIXED) == 0); 1910 1911 /* Try to find the space in the red-black tree */ 1912 1913 /* Check slot before any entry */ 1914 hint = topdown ? entry->next->start - length : entry->end; 1915 switch (uvm_map_space_avail(&hint, length, uoffset, align, flags, 1916 topdown, entry)) { 1917 case 1: 1918 goto found; 1919 case -1: 1920 goto wraparound; 1921 } 1922 1923 nextgap: 1924 KDASSERT((flags & UVM_FLAG_FIXED) == 0); 1925 /* If there is not enough space in the whole tree, we fail */ 1926 tmp = ROOT_ENTRY(map); 1927 if (tmp == NULL || tmp->maxgap < length) 1928 goto notfound; 1929 1930 prev = NULL; /* previous candidate */ 1931 1932 /* Find an entry close to hint that has enough space */ 1933 for (; tmp;) { 1934 KASSERT(tmp->next->start == tmp->end + tmp->gap); 1935 if (topdown) { 1936 if (tmp->next->start < hint + length && 1937 (prev == NULL || tmp->end > prev->end)) { 1938 if (tmp->gap >= length) 1939 prev = tmp; 1940 else if ((child = LEFT_ENTRY(tmp)) != NULL 1941 && child->maxgap >= length) 1942 prev = tmp; 1943 } 1944 } else { 1945 if (tmp->end >= hint && 1946 (prev == NULL || tmp->end < prev->end)) { 1947 if (tmp->gap >= length) 1948 prev = tmp; 1949 else if ((child = RIGHT_ENTRY(tmp)) != NULL 1950 && child->maxgap >= length) 1951 prev = tmp; 1952 } 1953 } 1954 if (tmp->next->start < hint + length) 1955 child = RIGHT_ENTRY(tmp); 1956 else if (tmp->end > hint) 1957 child = LEFT_ENTRY(tmp); 1958 else { 1959 if (tmp->gap >= length) 1960 break; 1961 if (topdown) 1962 child = LEFT_ENTRY(tmp); 1963 else 1964 child = RIGHT_ENTRY(tmp); 1965 } 1966 if (child == NULL || child->maxgap < length) 1967 break; 1968 tmp = child; 1969 } 1970 1971 if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) { 1972 /* 1973 * Check if the entry that we found satifies the 1974 * space requirement 1975 */ 1976 if (topdown) { 1977 if (hint > tmp->next->start - length) 1978 hint = tmp->next->start - length; 1979 } else { 1980 if (hint < tmp->end) 1981 hint = tmp->end; 1982 } 1983 switch (uvm_map_space_avail(&hint, length, uoffset, align, 1984 flags, topdown, tmp)) { 1985 case 1: 1986 entry = tmp; 1987 goto found; 1988 case -1: 1989 goto wraparound; 1990 } 1991 if (tmp->gap >= length) 1992 goto listsearch; 1993 } 1994 if (prev == NULL) 1995 goto notfound; 1996 1997 if (topdown) { 1998 KASSERT(orig_hint >= prev->next->start - length || 1999 prev->next->start - length > prev->next->start); 2000 hint = prev->next->start - length; 2001 } else { 2002 KASSERT(orig_hint <= prev->end); 2003 hint = prev->end; 2004 } 2005 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2006 flags, topdown, prev)) { 2007 case 1: 2008 entry = prev; 2009 goto found; 2010 case -1: 2011 goto wraparound; 2012 } 2013 if (prev->gap >= length) 2014 goto listsearch; 2015 2016 if (topdown) 2017 tmp = LEFT_ENTRY(prev); 2018 else 2019 tmp = RIGHT_ENTRY(prev); 2020 for (;;) { 2021 KASSERT(tmp && tmp->maxgap >= length); 2022 if (topdown) 2023 child = RIGHT_ENTRY(tmp); 2024 else 2025 child = LEFT_ENTRY(tmp); 2026 if (child && child->maxgap >= length) { 2027 tmp = child; 2028 continue; 2029 } 2030 if (tmp->gap >= length) 2031 break; 2032 if (topdown) 2033 tmp = LEFT_ENTRY(tmp); 2034 else 2035 tmp = RIGHT_ENTRY(tmp); 2036 } 2037 2038 if (topdown) { 2039 KASSERT(orig_hint >= tmp->next->start - length || 2040 tmp->next->start - length > tmp->next->start); 2041 hint = tmp->next->start - length; 2042 } else { 2043 KASSERT(orig_hint <= tmp->end); 2044 hint = tmp->end; 2045 } 2046 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2047 flags, topdown, tmp)) { 2048 case 1: 2049 entry = tmp; 2050 goto found; 2051 case -1: 2052 goto wraparound; 2053 } 2054 2055 /* 2056 * The tree fails to find an entry because of offset or alignment 2057 * restrictions. Search the list instead. 2058 */ 2059 listsearch: 2060 /* 2061 * Look through the rest of the map, trying to fit a new region in 2062 * the gap between existing regions, or after the very last region. 2063 * note: entry->end = base VA of current gap, 2064 * entry->next->start = VA of end of current gap 2065 */ 2066 2067 for (;;) { 2068 /* Update hint for current gap. */ 2069 hint = topdown ? entry->next->start - length : entry->end; 2070 2071 /* See if it fits. */ 2072 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2073 flags, topdown, entry)) { 2074 case 1: 2075 goto found; 2076 case -1: 2077 goto wraparound; 2078 } 2079 2080 /* Advance to next/previous gap */ 2081 if (topdown) { 2082 if (entry == &map->header) { 2083 UVMHIST_LOG(maphist, "<- failed (off start)", 2084 0,0,0,0); 2085 goto notfound; 2086 } 2087 entry = entry->prev; 2088 } else { 2089 entry = entry->next; 2090 if (entry == &map->header) { 2091 UVMHIST_LOG(maphist, "<- failed (off end)", 2092 0,0,0,0); 2093 goto notfound; 2094 } 2095 } 2096 } 2097 2098 found: 2099 SAVE_HINT(map, map->hint, entry); 2100 *result = hint; 2101 UVMHIST_LOG(maphist,"<- got it! (result=0x%x)", hint, 0,0,0); 2102 KASSERT( topdown || hint >= orig_hint); 2103 KASSERT(!topdown || hint <= orig_hint); 2104 KASSERT(entry->end <= hint); 2105 KASSERT(hint + length <= entry->next->start); 2106 return (entry); 2107 2108 wraparound: 2109 UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0); 2110 2111 return (NULL); 2112 2113 notfound: 2114 UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0); 2115 2116 return (NULL); 2117 } 2118 2119 /* 2120 * U N M A P - m a i n h e l p e r f u n c t i o n s 2121 */ 2122 2123 /* 2124 * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") 2125 * 2126 * => caller must check alignment and size 2127 * => map must be locked by caller 2128 * => we return a list of map entries that we've remove from the map 2129 * in "entry_list" 2130 */ 2131 2132 void 2133 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2134 struct vm_map_entry **entry_list /* OUT */, int flags) 2135 { 2136 struct vm_map_entry *entry, *first_entry, *next; 2137 vaddr_t len; 2138 UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist); 2139 2140 UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)", 2141 map, start, end, 0); 2142 VM_MAP_RANGE_CHECK(map, start, end); 2143 2144 uvm_map_check(map, "unmap_remove entry"); 2145 2146 /* 2147 * find first entry 2148 */ 2149 2150 if (uvm_map_lookup_entry(map, start, &first_entry) == true) { 2151 /* clip and go... */ 2152 entry = first_entry; 2153 UVM_MAP_CLIP_START(map, entry, start); 2154 /* critical! prevents stale hint */ 2155 SAVE_HINT(map, entry, entry->prev); 2156 } else { 2157 entry = first_entry->next; 2158 } 2159 2160 /* 2161 * Save the free space hint 2162 */ 2163 2164 if (map->first_free != &map->header && map->first_free->start >= start) 2165 map->first_free = entry->prev; 2166 2167 /* 2168 * note: we now re-use first_entry for a different task. we remove 2169 * a number of map entries from the map and save them in a linked 2170 * list headed by "first_entry". once we remove them from the map 2171 * the caller should unlock the map and drop the references to the 2172 * backing objects [c.f. uvm_unmap_detach]. the object is to 2173 * separate unmapping from reference dropping. why? 2174 * [1] the map has to be locked for unmapping 2175 * [2] the map need not be locked for reference dropping 2176 * [3] dropping references may trigger pager I/O, and if we hit 2177 * a pager that does synchronous I/O we may have to wait for it. 2178 * [4] we would like all waiting for I/O to occur with maps unlocked 2179 * so that we don't block other threads. 2180 */ 2181 2182 first_entry = NULL; 2183 *entry_list = NULL; 2184 2185 /* 2186 * break up the area into map entry sized regions and unmap. note 2187 * that all mappings have to be removed before we can even consider 2188 * dropping references to amaps or VM objects (otherwise we could end 2189 * up with a mapping to a page on the free list which would be very bad) 2190 */ 2191 2192 while ((entry != &map->header) && (entry->start < end)) { 2193 KASSERT((entry->flags & UVM_MAP_STATIC) == 0); 2194 2195 UVM_MAP_CLIP_END(map, entry, end); 2196 next = entry->next; 2197 len = entry->end - entry->start; 2198 2199 /* 2200 * unwire before removing addresses from the pmap; otherwise 2201 * unwiring will put the entries back into the pmap (XXX). 2202 */ 2203 2204 if (VM_MAPENT_ISWIRED(entry)) { 2205 uvm_map_entry_unwire(map, entry); 2206 } 2207 if (flags & UVM_FLAG_VAONLY) { 2208 2209 /* nothing */ 2210 2211 } else if ((map->flags & VM_MAP_PAGEABLE) == 0) { 2212 2213 /* 2214 * if the map is non-pageable, any pages mapped there 2215 * must be wired and entered with pmap_kenter_pa(), 2216 * and we should free any such pages immediately. 2217 * this is mostly used for kmem_map. 2218 */ 2219 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2220 2221 if ((entry->flags & UVM_MAP_KMAPENT) == 0) { 2222 uvm_km_pgremove_intrsafe(map, entry->start, 2223 entry->end); 2224 } 2225 } else if (UVM_ET_ISOBJ(entry) && 2226 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2227 panic("%s: kernel object %p %p\n", 2228 __func__, map, entry); 2229 } else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) { 2230 /* 2231 * remove mappings the standard way. lock object 2232 * and/or amap to ensure vm_page state does not 2233 * change while in pmap_remove(). 2234 */ 2235 2236 uvm_map_lock_entry(entry); 2237 pmap_remove(map->pmap, entry->start, entry->end); 2238 uvm_map_unlock_entry(entry); 2239 } 2240 2241 #if defined(DEBUG) 2242 if ((entry->flags & UVM_MAP_KMAPENT) == 0) { 2243 2244 /* 2245 * check if there's remaining mapping, 2246 * which is a bug in caller. 2247 */ 2248 2249 vaddr_t va; 2250 for (va = entry->start; va < entry->end; 2251 va += PAGE_SIZE) { 2252 if (pmap_extract(vm_map_pmap(map), va, NULL)) { 2253 panic("%s: %#"PRIxVADDR" has mapping", 2254 __func__, va); 2255 } 2256 } 2257 2258 if (VM_MAP_IS_KERNEL(map)) { 2259 uvm_km_check_empty(map, entry->start, 2260 entry->end); 2261 } 2262 } 2263 #endif /* defined(DEBUG) */ 2264 2265 /* 2266 * remove entry from map and put it on our list of entries 2267 * that we've nuked. then go to next entry. 2268 */ 2269 2270 UVMHIST_LOG(maphist, " removed map entry 0x%x", entry, 0, 0,0); 2271 2272 /* critical! prevents stale hint */ 2273 SAVE_HINT(map, entry, entry->prev); 2274 2275 uvm_map_entry_unlink(map, entry); 2276 KASSERT(map->size >= len); 2277 map->size -= len; 2278 entry->prev = NULL; 2279 entry->next = first_entry; 2280 first_entry = entry; 2281 entry = next; 2282 } 2283 2284 /* 2285 * Note: if map is dying, leave pmap_update() for pmap_destroy(), 2286 * which will be called later. 2287 */ 2288 if ((map->flags & VM_MAP_DYING) == 0) { 2289 pmap_update(vm_map_pmap(map)); 2290 } else { 2291 KASSERT(vm_map_pmap(map) != pmap_kernel()); 2292 } 2293 2294 uvm_map_check(map, "unmap_remove leave"); 2295 2296 /* 2297 * now we've cleaned up the map and are ready for the caller to drop 2298 * references to the mapped objects. 2299 */ 2300 2301 *entry_list = first_entry; 2302 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 2303 2304 if (map->flags & VM_MAP_WANTVA) { 2305 mutex_enter(&map->misc_lock); 2306 map->flags &= ~VM_MAP_WANTVA; 2307 cv_broadcast(&map->cv); 2308 mutex_exit(&map->misc_lock); 2309 } 2310 } 2311 2312 /* 2313 * uvm_unmap_detach: drop references in a chain of map entries 2314 * 2315 * => we will free the map entries as we traverse the list. 2316 */ 2317 2318 void 2319 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) 2320 { 2321 struct vm_map_entry *next_entry; 2322 UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist); 2323 2324 while (first_entry) { 2325 KASSERT(!VM_MAPENT_ISWIRED(first_entry)); 2326 UVMHIST_LOG(maphist, 2327 " detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d", 2328 first_entry, first_entry->aref.ar_amap, 2329 first_entry->object.uvm_obj, 2330 UVM_ET_ISSUBMAP(first_entry)); 2331 2332 /* 2333 * drop reference to amap, if we've got one 2334 */ 2335 2336 if (first_entry->aref.ar_amap) 2337 uvm_map_unreference_amap(first_entry, flags); 2338 2339 /* 2340 * drop reference to our backing object, if we've got one 2341 */ 2342 2343 KASSERT(!UVM_ET_ISSUBMAP(first_entry)); 2344 if (UVM_ET_ISOBJ(first_entry) && 2345 first_entry->object.uvm_obj->pgops->pgo_detach) { 2346 (*first_entry->object.uvm_obj->pgops->pgo_detach) 2347 (first_entry->object.uvm_obj); 2348 } 2349 next_entry = first_entry->next; 2350 uvm_mapent_free(first_entry); 2351 first_entry = next_entry; 2352 } 2353 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 2354 } 2355 2356 /* 2357 * E X T R A C T I O N F U N C T I O N S 2358 */ 2359 2360 /* 2361 * uvm_map_reserve: reserve space in a vm_map for future use. 2362 * 2363 * => we reserve space in a map by putting a dummy map entry in the 2364 * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) 2365 * => map should be unlocked (we will write lock it) 2366 * => we return true if we were able to reserve space 2367 * => XXXCDC: should be inline? 2368 */ 2369 2370 int 2371 uvm_map_reserve(struct vm_map *map, vsize_t size, 2372 vaddr_t offset /* hint for pmap_prefer */, 2373 vsize_t align /* alignment */, 2374 vaddr_t *raddr /* IN:hint, OUT: reserved VA */, 2375 uvm_flag_t flags /* UVM_FLAG_FIXED or 0 */) 2376 { 2377 UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); 2378 2379 UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)", 2380 map,size,offset,raddr); 2381 2382 size = round_page(size); 2383 2384 /* 2385 * reserve some virtual space. 2386 */ 2387 2388 if (uvm_map(map, raddr, size, NULL, offset, align, 2389 UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, 2390 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE|flags)) != 0) { 2391 UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); 2392 return (false); 2393 } 2394 2395 UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0); 2396 return (true); 2397 } 2398 2399 /* 2400 * uvm_map_replace: replace a reserved (blank) area of memory with 2401 * real mappings. 2402 * 2403 * => caller must WRITE-LOCK the map 2404 * => we return true if replacement was a success 2405 * => we expect the newents chain to have nnewents entrys on it and 2406 * we expect newents->prev to point to the last entry on the list 2407 * => note newents is allowed to be NULL 2408 */ 2409 2410 static int 2411 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, 2412 struct vm_map_entry *newents, int nnewents, vsize_t nsize, 2413 struct vm_map_entry **oldentryp) 2414 { 2415 struct vm_map_entry *oldent, *last; 2416 2417 uvm_map_check(map, "map_replace entry"); 2418 2419 /* 2420 * first find the blank map entry at the specified address 2421 */ 2422 2423 if (!uvm_map_lookup_entry(map, start, &oldent)) { 2424 return (false); 2425 } 2426 2427 /* 2428 * check to make sure we have a proper blank entry 2429 */ 2430 2431 if (end < oldent->end) { 2432 UVM_MAP_CLIP_END(map, oldent, end); 2433 } 2434 if (oldent->start != start || oldent->end != end || 2435 oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { 2436 return (false); 2437 } 2438 2439 #ifdef DIAGNOSTIC 2440 2441 /* 2442 * sanity check the newents chain 2443 */ 2444 2445 { 2446 struct vm_map_entry *tmpent = newents; 2447 int nent = 0; 2448 vsize_t sz = 0; 2449 vaddr_t cur = start; 2450 2451 while (tmpent) { 2452 nent++; 2453 sz += tmpent->end - tmpent->start; 2454 if (tmpent->start < cur) 2455 panic("uvm_map_replace1"); 2456 if (tmpent->start >= tmpent->end || tmpent->end > end) { 2457 panic("uvm_map_replace2: " 2458 "tmpent->start=0x%"PRIxVADDR 2459 ", tmpent->end=0x%"PRIxVADDR 2460 ", end=0x%"PRIxVADDR, 2461 tmpent->start, tmpent->end, end); 2462 } 2463 cur = tmpent->end; 2464 if (tmpent->next) { 2465 if (tmpent->next->prev != tmpent) 2466 panic("uvm_map_replace3"); 2467 } else { 2468 if (newents->prev != tmpent) 2469 panic("uvm_map_replace4"); 2470 } 2471 tmpent = tmpent->next; 2472 } 2473 if (nent != nnewents) 2474 panic("uvm_map_replace5"); 2475 if (sz != nsize) 2476 panic("uvm_map_replace6"); 2477 } 2478 #endif 2479 2480 /* 2481 * map entry is a valid blank! replace it. (this does all the 2482 * work of map entry link/unlink...). 2483 */ 2484 2485 if (newents) { 2486 last = newents->prev; 2487 2488 /* critical: flush stale hints out of map */ 2489 SAVE_HINT(map, map->hint, newents); 2490 if (map->first_free == oldent) 2491 map->first_free = last; 2492 2493 last->next = oldent->next; 2494 last->next->prev = last; 2495 2496 /* Fix RB tree */ 2497 uvm_rb_remove(map, oldent); 2498 2499 newents->prev = oldent->prev; 2500 newents->prev->next = newents; 2501 map->nentries = map->nentries + (nnewents - 1); 2502 2503 /* Fixup the RB tree */ 2504 { 2505 int i; 2506 struct vm_map_entry *tmp; 2507 2508 tmp = newents; 2509 for (i = 0; i < nnewents && tmp; i++) { 2510 uvm_rb_insert(map, tmp); 2511 tmp = tmp->next; 2512 } 2513 } 2514 } else { 2515 /* NULL list of new entries: just remove the old one */ 2516 clear_hints(map, oldent); 2517 uvm_map_entry_unlink(map, oldent); 2518 } 2519 map->size -= end - start - nsize; 2520 2521 uvm_map_check(map, "map_replace leave"); 2522 2523 /* 2524 * now we can free the old blank entry and return. 2525 */ 2526 2527 *oldentryp = oldent; 2528 return (true); 2529 } 2530 2531 /* 2532 * uvm_map_extract: extract a mapping from a map and put it somewhere 2533 * (maybe removing the old mapping) 2534 * 2535 * => maps should be unlocked (we will write lock them) 2536 * => returns 0 on success, error code otherwise 2537 * => start must be page aligned 2538 * => len must be page sized 2539 * => flags: 2540 * UVM_EXTRACT_REMOVE: remove mappings from srcmap 2541 * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) 2542 * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs 2543 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 2544 * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< 2545 * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only 2546 * be used from within the kernel in a kernel level map <<< 2547 */ 2548 2549 int 2550 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 2551 struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) 2552 { 2553 vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge; 2554 struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry, 2555 *deadentry, *oldentry; 2556 struct vm_map_entry *resentry = NULL; /* a dummy reservation entry */ 2557 vsize_t elen; 2558 int nchain, error, copy_ok; 2559 vsize_t nsize; 2560 UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist); 2561 2562 UVMHIST_LOG(maphist,"(srcmap=0x%x,start=0x%x, len=0x%x", srcmap, start, 2563 len,0); 2564 UVMHIST_LOG(maphist," ...,dstmap=0x%x, flags=0x%x)", dstmap,flags,0,0); 2565 2566 /* 2567 * step 0: sanity check: start must be on a page boundary, length 2568 * must be page sized. can't ask for CONTIG/QREF if you asked for 2569 * REMOVE. 2570 */ 2571 2572 KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0); 2573 KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || 2574 (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); 2575 2576 /* 2577 * step 1: reserve space in the target map for the extracted area 2578 */ 2579 2580 if ((flags & UVM_EXTRACT_RESERVED) == 0) { 2581 dstaddr = vm_map_min(dstmap); 2582 if (!uvm_map_reserve(dstmap, len, start, 0, &dstaddr, 0)) 2583 return (ENOMEM); 2584 *dstaddrp = dstaddr; /* pass address back to caller */ 2585 UVMHIST_LOG(maphist, " dstaddr=0x%x", dstaddr,0,0,0); 2586 } else { 2587 dstaddr = *dstaddrp; 2588 } 2589 2590 /* 2591 * step 2: setup for the extraction process loop by init'ing the 2592 * map entry chain, locking src map, and looking up the first useful 2593 * entry in the map. 2594 */ 2595 2596 end = start + len; 2597 newend = dstaddr + len; 2598 chain = endchain = NULL; 2599 nchain = 0; 2600 nsize = 0; 2601 vm_map_lock(srcmap); 2602 2603 if (uvm_map_lookup_entry(srcmap, start, &entry)) { 2604 2605 /* "start" is within an entry */ 2606 if (flags & UVM_EXTRACT_QREF) { 2607 2608 /* 2609 * for quick references we don't clip the entry, so 2610 * the entry may map space "before" the starting 2611 * virtual address... this is the "fudge" factor 2612 * (which can be non-zero only the first time 2613 * through the "while" loop in step 3). 2614 */ 2615 2616 fudge = start - entry->start; 2617 } else { 2618 2619 /* 2620 * normal reference: we clip the map to fit (thus 2621 * fudge is zero) 2622 */ 2623 2624 UVM_MAP_CLIP_START(srcmap, entry, start); 2625 SAVE_HINT(srcmap, srcmap->hint, entry->prev); 2626 fudge = 0; 2627 } 2628 } else { 2629 2630 /* "start" is not within an entry ... skip to next entry */ 2631 if (flags & UVM_EXTRACT_CONTIG) { 2632 error = EINVAL; 2633 goto bad; /* definite hole here ... */ 2634 } 2635 2636 entry = entry->next; 2637 fudge = 0; 2638 } 2639 2640 /* save values from srcmap for step 6 */ 2641 orig_entry = entry; 2642 orig_fudge = fudge; 2643 2644 /* 2645 * step 3: now start looping through the map entries, extracting 2646 * as we go. 2647 */ 2648 2649 while (entry->start < end && entry != &srcmap->header) { 2650 2651 /* if we are not doing a quick reference, clip it */ 2652 if ((flags & UVM_EXTRACT_QREF) == 0) 2653 UVM_MAP_CLIP_END(srcmap, entry, end); 2654 2655 /* clear needs_copy (allow chunking) */ 2656 if (UVM_ET_ISNEEDSCOPY(entry)) { 2657 amap_copy(srcmap, entry, 2658 AMAP_COPY_NOWAIT|AMAP_COPY_NOMERGE, start, end); 2659 if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ 2660 error = ENOMEM; 2661 goto bad; 2662 } 2663 2664 /* amap_copy could clip (during chunk)! update fudge */ 2665 if (fudge) { 2666 fudge = start - entry->start; 2667 orig_fudge = fudge; 2668 } 2669 } 2670 2671 /* calculate the offset of this from "start" */ 2672 oldoffset = (entry->start + fudge) - start; 2673 2674 /* allocate a new map entry */ 2675 newentry = uvm_mapent_alloc(dstmap, 0); 2676 if (newentry == NULL) { 2677 error = ENOMEM; 2678 goto bad; 2679 } 2680 2681 /* set up new map entry */ 2682 newentry->next = NULL; 2683 newentry->prev = endchain; 2684 newentry->start = dstaddr + oldoffset; 2685 newentry->end = 2686 newentry->start + (entry->end - (entry->start + fudge)); 2687 if (newentry->end > newend || newentry->end < newentry->start) 2688 newentry->end = newend; 2689 newentry->object.uvm_obj = entry->object.uvm_obj; 2690 if (newentry->object.uvm_obj) { 2691 if (newentry->object.uvm_obj->pgops->pgo_reference) 2692 newentry->object.uvm_obj->pgops-> 2693 pgo_reference(newentry->object.uvm_obj); 2694 newentry->offset = entry->offset + fudge; 2695 } else { 2696 newentry->offset = 0; 2697 } 2698 newentry->etype = entry->etype; 2699 newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? 2700 entry->max_protection : entry->protection; 2701 newentry->max_protection = entry->max_protection; 2702 newentry->inheritance = entry->inheritance; 2703 newentry->wired_count = 0; 2704 newentry->aref.ar_amap = entry->aref.ar_amap; 2705 if (newentry->aref.ar_amap) { 2706 newentry->aref.ar_pageoff = 2707 entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); 2708 uvm_map_reference_amap(newentry, AMAP_SHARED | 2709 ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); 2710 } else { 2711 newentry->aref.ar_pageoff = 0; 2712 } 2713 newentry->advice = entry->advice; 2714 if ((flags & UVM_EXTRACT_QREF) != 0) { 2715 newentry->flags |= UVM_MAP_NOMERGE; 2716 } 2717 2718 /* now link it on the chain */ 2719 nchain++; 2720 nsize += newentry->end - newentry->start; 2721 if (endchain == NULL) { 2722 chain = endchain = newentry; 2723 } else { 2724 endchain->next = newentry; 2725 endchain = newentry; 2726 } 2727 2728 /* end of 'while' loop! */ 2729 if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && 2730 (entry->next == &srcmap->header || 2731 entry->next->start != entry->end)) { 2732 error = EINVAL; 2733 goto bad; 2734 } 2735 entry = entry->next; 2736 fudge = 0; 2737 } 2738 2739 /* 2740 * step 4: close off chain (in format expected by uvm_map_replace) 2741 */ 2742 2743 if (chain) 2744 chain->prev = endchain; 2745 2746 /* 2747 * step 5: attempt to lock the dest map so we can pmap_copy. 2748 * note usage of copy_ok: 2749 * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) 2750 * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 2751 */ 2752 2753 if (srcmap == dstmap || vm_map_lock_try(dstmap) == true) { 2754 copy_ok = 1; 2755 if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2756 nchain, nsize, &resentry)) { 2757 if (srcmap != dstmap) 2758 vm_map_unlock(dstmap); 2759 error = EIO; 2760 goto bad; 2761 } 2762 } else { 2763 copy_ok = 0; 2764 /* replace defered until step 7 */ 2765 } 2766 2767 /* 2768 * step 6: traverse the srcmap a second time to do the following: 2769 * - if we got a lock on the dstmap do pmap_copy 2770 * - if UVM_EXTRACT_REMOVE remove the entries 2771 * we make use of orig_entry and orig_fudge (saved in step 2) 2772 */ 2773 2774 if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { 2775 2776 /* purge possible stale hints from srcmap */ 2777 if (flags & UVM_EXTRACT_REMOVE) { 2778 SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); 2779 if (srcmap->first_free != &srcmap->header && 2780 srcmap->first_free->start >= start) 2781 srcmap->first_free = orig_entry->prev; 2782 } 2783 2784 entry = orig_entry; 2785 fudge = orig_fudge; 2786 deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ 2787 2788 while (entry->start < end && entry != &srcmap->header) { 2789 if (copy_ok) { 2790 oldoffset = (entry->start + fudge) - start; 2791 elen = MIN(end, entry->end) - 2792 (entry->start + fudge); 2793 pmap_copy(dstmap->pmap, srcmap->pmap, 2794 dstaddr + oldoffset, elen, 2795 entry->start + fudge); 2796 } 2797 2798 /* we advance "entry" in the following if statement */ 2799 if (flags & UVM_EXTRACT_REMOVE) { 2800 uvm_map_lock_entry(entry); 2801 pmap_remove(srcmap->pmap, entry->start, 2802 entry->end); 2803 uvm_map_unlock_entry(entry); 2804 oldentry = entry; /* save entry */ 2805 entry = entry->next; /* advance */ 2806 uvm_map_entry_unlink(srcmap, oldentry); 2807 /* add to dead list */ 2808 oldentry->next = deadentry; 2809 deadentry = oldentry; 2810 } else { 2811 entry = entry->next; /* advance */ 2812 } 2813 2814 /* end of 'while' loop */ 2815 fudge = 0; 2816 } 2817 pmap_update(srcmap->pmap); 2818 2819 /* 2820 * unlock dstmap. we will dispose of deadentry in 2821 * step 7 if needed 2822 */ 2823 2824 if (copy_ok && srcmap != dstmap) 2825 vm_map_unlock(dstmap); 2826 2827 } else { 2828 deadentry = NULL; 2829 } 2830 2831 /* 2832 * step 7: we are done with the source map, unlock. if copy_ok 2833 * is 0 then we have not replaced the dummy mapping in dstmap yet 2834 * and we need to do so now. 2835 */ 2836 2837 vm_map_unlock(srcmap); 2838 if ((flags & UVM_EXTRACT_REMOVE) && deadentry) 2839 uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ 2840 2841 /* now do the replacement if we didn't do it in step 5 */ 2842 if (copy_ok == 0) { 2843 vm_map_lock(dstmap); 2844 error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2845 nchain, nsize, &resentry); 2846 vm_map_unlock(dstmap); 2847 2848 if (error == false) { 2849 error = EIO; 2850 goto bad2; 2851 } 2852 } 2853 2854 if (resentry != NULL) 2855 uvm_mapent_free(resentry); 2856 2857 return (0); 2858 2859 /* 2860 * bad: failure recovery 2861 */ 2862 bad: 2863 vm_map_unlock(srcmap); 2864 bad2: /* src already unlocked */ 2865 if (chain) 2866 uvm_unmap_detach(chain, 2867 (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); 2868 2869 if (resentry != NULL) 2870 uvm_mapent_free(resentry); 2871 2872 if ((flags & UVM_EXTRACT_RESERVED) == 0) { 2873 uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ 2874 } 2875 return (error); 2876 } 2877 2878 /* end of extraction functions */ 2879 2880 /* 2881 * uvm_map_submap: punch down part of a map into a submap 2882 * 2883 * => only the kernel_map is allowed to be submapped 2884 * => the purpose of submapping is to break up the locking granularity 2885 * of a larger map 2886 * => the range specified must have been mapped previously with a uvm_map() 2887 * call [with uobj==NULL] to create a blank map entry in the main map. 2888 * [And it had better still be blank!] 2889 * => maps which contain submaps should never be copied or forked. 2890 * => to remove a submap, use uvm_unmap() on the main map 2891 * and then uvm_map_deallocate() the submap. 2892 * => main map must be unlocked. 2893 * => submap must have been init'd and have a zero reference count. 2894 * [need not be locked as we don't actually reference it] 2895 */ 2896 2897 int 2898 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 2899 struct vm_map *submap) 2900 { 2901 struct vm_map_entry *entry; 2902 int error; 2903 2904 vm_map_lock(map); 2905 VM_MAP_RANGE_CHECK(map, start, end); 2906 2907 if (uvm_map_lookup_entry(map, start, &entry)) { 2908 UVM_MAP_CLIP_START(map, entry, start); 2909 UVM_MAP_CLIP_END(map, entry, end); /* to be safe */ 2910 } else { 2911 entry = NULL; 2912 } 2913 2914 if (entry != NULL && 2915 entry->start == start && entry->end == end && 2916 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 2917 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 2918 entry->etype |= UVM_ET_SUBMAP; 2919 entry->object.sub_map = submap; 2920 entry->offset = 0; 2921 uvm_map_reference(submap); 2922 error = 0; 2923 } else { 2924 error = EINVAL; 2925 } 2926 vm_map_unlock(map); 2927 2928 return error; 2929 } 2930 2931 /* 2932 * uvm_map_protect: change map protection 2933 * 2934 * => set_max means set max_protection. 2935 * => map must be unlocked. 2936 */ 2937 2938 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 2939 ~VM_PROT_WRITE : VM_PROT_ALL) 2940 2941 int 2942 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 2943 vm_prot_t new_prot, bool set_max) 2944 { 2945 struct vm_map_entry *current, *entry; 2946 int error = 0; 2947 UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist); 2948 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)", 2949 map, start, end, new_prot); 2950 2951 vm_map_lock(map); 2952 VM_MAP_RANGE_CHECK(map, start, end); 2953 if (uvm_map_lookup_entry(map, start, &entry)) { 2954 UVM_MAP_CLIP_START(map, entry, start); 2955 } else { 2956 entry = entry->next; 2957 } 2958 2959 /* 2960 * make a first pass to check for protection violations. 2961 */ 2962 2963 current = entry; 2964 while ((current != &map->header) && (current->start < end)) { 2965 if (UVM_ET_ISSUBMAP(current)) { 2966 error = EINVAL; 2967 goto out; 2968 } 2969 if ((new_prot & current->max_protection) != new_prot) { 2970 error = EACCES; 2971 goto out; 2972 } 2973 /* 2974 * Don't allow VM_PROT_EXECUTE to be set on entries that 2975 * point to vnodes that are associated with a NOEXEC file 2976 * system. 2977 */ 2978 if (UVM_ET_ISOBJ(current) && 2979 UVM_OBJ_IS_VNODE(current->object.uvm_obj)) { 2980 struct vnode *vp = 2981 (struct vnode *) current->object.uvm_obj; 2982 2983 if ((new_prot & VM_PROT_EXECUTE) != 0 && 2984 (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) { 2985 error = EACCES; 2986 goto out; 2987 } 2988 } 2989 2990 current = current->next; 2991 } 2992 2993 /* go back and fix up protections (no need to clip this time). */ 2994 2995 current = entry; 2996 while ((current != &map->header) && (current->start < end)) { 2997 vm_prot_t old_prot; 2998 2999 UVM_MAP_CLIP_END(map, current, end); 3000 old_prot = current->protection; 3001 if (set_max) 3002 current->protection = 3003 (current->max_protection = new_prot) & old_prot; 3004 else 3005 current->protection = new_prot; 3006 3007 /* 3008 * update physical map if necessary. worry about copy-on-write 3009 * here -- CHECK THIS XXX 3010 */ 3011 3012 if (current->protection != old_prot) { 3013 /* update pmap! */ 3014 uvm_map_lock_entry(current); 3015 pmap_protect(map->pmap, current->start, current->end, 3016 current->protection & MASK(entry)); 3017 uvm_map_unlock_entry(current); 3018 3019 /* 3020 * If this entry points at a vnode, and the 3021 * protection includes VM_PROT_EXECUTE, mark 3022 * the vnode as VEXECMAP. 3023 */ 3024 if (UVM_ET_ISOBJ(current)) { 3025 struct uvm_object *uobj = 3026 current->object.uvm_obj; 3027 3028 if (UVM_OBJ_IS_VNODE(uobj) && 3029 (current->protection & VM_PROT_EXECUTE)) { 3030 vn_markexec((struct vnode *) uobj); 3031 } 3032 } 3033 } 3034 3035 /* 3036 * If the map is configured to lock any future mappings, 3037 * wire this entry now if the old protection was VM_PROT_NONE 3038 * and the new protection is not VM_PROT_NONE. 3039 */ 3040 3041 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3042 VM_MAPENT_ISWIRED(entry) == 0 && 3043 old_prot == VM_PROT_NONE && 3044 new_prot != VM_PROT_NONE) { 3045 if (uvm_map_pageable(map, entry->start, 3046 entry->end, false, 3047 UVM_LK_ENTER|UVM_LK_EXIT) != 0) { 3048 3049 /* 3050 * If locking the entry fails, remember the 3051 * error if it's the first one. Note we 3052 * still continue setting the protection in 3053 * the map, but will return the error 3054 * condition regardless. 3055 * 3056 * XXX Ignore what the actual error is, 3057 * XXX just call it a resource shortage 3058 * XXX so that it doesn't get confused 3059 * XXX what uvm_map_protect() itself would 3060 * XXX normally return. 3061 */ 3062 3063 error = ENOMEM; 3064 } 3065 } 3066 current = current->next; 3067 } 3068 pmap_update(map->pmap); 3069 3070 out: 3071 vm_map_unlock(map); 3072 3073 UVMHIST_LOG(maphist, "<- done, error=%d",error,0,0,0); 3074 return error; 3075 } 3076 3077 #undef MASK 3078 3079 /* 3080 * uvm_map_inherit: set inheritance code for range of addrs in map. 3081 * 3082 * => map must be unlocked 3083 * => note that the inherit code is used during a "fork". see fork 3084 * code for details. 3085 */ 3086 3087 int 3088 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 3089 vm_inherit_t new_inheritance) 3090 { 3091 struct vm_map_entry *entry, *temp_entry; 3092 UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist); 3093 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)", 3094 map, start, end, new_inheritance); 3095 3096 switch (new_inheritance) { 3097 case MAP_INHERIT_NONE: 3098 case MAP_INHERIT_COPY: 3099 case MAP_INHERIT_SHARE: 3100 break; 3101 default: 3102 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3103 return EINVAL; 3104 } 3105 3106 vm_map_lock(map); 3107 VM_MAP_RANGE_CHECK(map, start, end); 3108 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 3109 entry = temp_entry; 3110 UVM_MAP_CLIP_START(map, entry, start); 3111 } else { 3112 entry = temp_entry->next; 3113 } 3114 while ((entry != &map->header) && (entry->start < end)) { 3115 UVM_MAP_CLIP_END(map, entry, end); 3116 entry->inheritance = new_inheritance; 3117 entry = entry->next; 3118 } 3119 vm_map_unlock(map); 3120 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3121 return 0; 3122 } 3123 3124 /* 3125 * uvm_map_advice: set advice code for range of addrs in map. 3126 * 3127 * => map must be unlocked 3128 */ 3129 3130 int 3131 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 3132 { 3133 struct vm_map_entry *entry, *temp_entry; 3134 UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist); 3135 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)", 3136 map, start, end, new_advice); 3137 3138 vm_map_lock(map); 3139 VM_MAP_RANGE_CHECK(map, start, end); 3140 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 3141 entry = temp_entry; 3142 UVM_MAP_CLIP_START(map, entry, start); 3143 } else { 3144 entry = temp_entry->next; 3145 } 3146 3147 /* 3148 * XXXJRT: disallow holes? 3149 */ 3150 3151 while ((entry != &map->header) && (entry->start < end)) { 3152 UVM_MAP_CLIP_END(map, entry, end); 3153 3154 switch (new_advice) { 3155 case MADV_NORMAL: 3156 case MADV_RANDOM: 3157 case MADV_SEQUENTIAL: 3158 /* nothing special here */ 3159 break; 3160 3161 default: 3162 vm_map_unlock(map); 3163 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3164 return EINVAL; 3165 } 3166 entry->advice = new_advice; 3167 entry = entry->next; 3168 } 3169 3170 vm_map_unlock(map); 3171 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3172 return 0; 3173 } 3174 3175 /* 3176 * uvm_map_willneed: apply MADV_WILLNEED 3177 */ 3178 3179 int 3180 uvm_map_willneed(struct vm_map *map, vaddr_t start, vaddr_t end) 3181 { 3182 struct vm_map_entry *entry; 3183 UVMHIST_FUNC("uvm_map_willneed"); UVMHIST_CALLED(maphist); 3184 UVMHIST_LOG(maphist,"(map=0x%lx,start=0x%lx,end=0x%lx)", 3185 map, start, end, 0); 3186 3187 vm_map_lock_read(map); 3188 VM_MAP_RANGE_CHECK(map, start, end); 3189 if (!uvm_map_lookup_entry(map, start, &entry)) { 3190 entry = entry->next; 3191 } 3192 while (entry->start < end) { 3193 struct vm_amap * const amap = entry->aref.ar_amap; 3194 struct uvm_object * const uobj = entry->object.uvm_obj; 3195 3196 KASSERT(entry != &map->header); 3197 KASSERT(start < entry->end); 3198 /* 3199 * For now, we handle only the easy but commonly-requested case. 3200 * ie. start prefetching of backing uobj pages. 3201 * 3202 * XXX It might be useful to pmap_enter() the already-in-core 3203 * pages by inventing a "weak" mode for uvm_fault() which would 3204 * only do the PGO_LOCKED pgo_get(). 3205 */ 3206 if (UVM_ET_ISOBJ(entry) && amap == NULL && uobj != NULL) { 3207 off_t offset; 3208 off_t size; 3209 3210 offset = entry->offset; 3211 if (start < entry->start) { 3212 offset += entry->start - start; 3213 } 3214 size = entry->offset + (entry->end - entry->start); 3215 if (entry->end < end) { 3216 size -= end - entry->end; 3217 } 3218 uvm_readahead(uobj, offset, size); 3219 } 3220 entry = entry->next; 3221 } 3222 vm_map_unlock_read(map); 3223 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3224 return 0; 3225 } 3226 3227 /* 3228 * uvm_map_pageable: sets the pageability of a range in a map. 3229 * 3230 * => wires map entries. should not be used for transient page locking. 3231 * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). 3232 * => regions specified as not pageable require lock-down (wired) memory 3233 * and page tables. 3234 * => map must never be read-locked 3235 * => if islocked is true, map is already write-locked 3236 * => we always unlock the map, since we must downgrade to a read-lock 3237 * to call uvm_fault_wire() 3238 * => XXXCDC: check this and try and clean it up. 3239 */ 3240 3241 int 3242 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 3243 bool new_pageable, int lockflags) 3244 { 3245 struct vm_map_entry *entry, *start_entry, *failed_entry; 3246 int rv; 3247 #ifdef DIAGNOSTIC 3248 u_int timestamp_save; 3249 #endif 3250 UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist); 3251 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)", 3252 map, start, end, new_pageable); 3253 KASSERT(map->flags & VM_MAP_PAGEABLE); 3254 3255 if ((lockflags & UVM_LK_ENTER) == 0) 3256 vm_map_lock(map); 3257 VM_MAP_RANGE_CHECK(map, start, end); 3258 3259 /* 3260 * only one pageability change may take place at one time, since 3261 * uvm_fault_wire assumes it will be called only once for each 3262 * wiring/unwiring. therefore, we have to make sure we're actually 3263 * changing the pageability for the entire region. we do so before 3264 * making any changes. 3265 */ 3266 3267 if (uvm_map_lookup_entry(map, start, &start_entry) == false) { 3268 if ((lockflags & UVM_LK_EXIT) == 0) 3269 vm_map_unlock(map); 3270 3271 UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0); 3272 return EFAULT; 3273 } 3274 entry = start_entry; 3275 3276 /* 3277 * handle wiring and unwiring separately. 3278 */ 3279 3280 if (new_pageable) { /* unwire */ 3281 UVM_MAP_CLIP_START(map, entry, start); 3282 3283 /* 3284 * unwiring. first ensure that the range to be unwired is 3285 * really wired down and that there are no holes. 3286 */ 3287 3288 while ((entry != &map->header) && (entry->start < end)) { 3289 if (entry->wired_count == 0 || 3290 (entry->end < end && 3291 (entry->next == &map->header || 3292 entry->next->start > entry->end))) { 3293 if ((lockflags & UVM_LK_EXIT) == 0) 3294 vm_map_unlock(map); 3295 UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0); 3296 return EINVAL; 3297 } 3298 entry = entry->next; 3299 } 3300 3301 /* 3302 * POSIX 1003.1b - a single munlock call unlocks a region, 3303 * regardless of the number of mlock calls made on that 3304 * region. 3305 */ 3306 3307 entry = start_entry; 3308 while ((entry != &map->header) && (entry->start < end)) { 3309 UVM_MAP_CLIP_END(map, entry, end); 3310 if (VM_MAPENT_ISWIRED(entry)) 3311 uvm_map_entry_unwire(map, entry); 3312 entry = entry->next; 3313 } 3314 if ((lockflags & UVM_LK_EXIT) == 0) 3315 vm_map_unlock(map); 3316 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3317 return 0; 3318 } 3319 3320 /* 3321 * wire case: in two passes [XXXCDC: ugly block of code here] 3322 * 3323 * 1: holding the write lock, we create any anonymous maps that need 3324 * to be created. then we clip each map entry to the region to 3325 * be wired and increment its wiring count. 3326 * 3327 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 3328 * in the pages for any newly wired area (wired_count == 1). 3329 * 3330 * downgrading to a read lock for uvm_fault_wire avoids a possible 3331 * deadlock with another thread that may have faulted on one of 3332 * the pages to be wired (it would mark the page busy, blocking 3333 * us, then in turn block on the map lock that we hold). because 3334 * of problems in the recursive lock package, we cannot upgrade 3335 * to a write lock in vm_map_lookup. thus, any actions that 3336 * require the write lock must be done beforehand. because we 3337 * keep the read lock on the map, the copy-on-write status of the 3338 * entries we modify here cannot change. 3339 */ 3340 3341 while ((entry != &map->header) && (entry->start < end)) { 3342 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3343 3344 /* 3345 * perform actions of vm_map_lookup that need the 3346 * write lock on the map: create an anonymous map 3347 * for a copy-on-write region, or an anonymous map 3348 * for a zero-fill region. (XXXCDC: submap case 3349 * ok?) 3350 */ 3351 3352 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3353 if (UVM_ET_ISNEEDSCOPY(entry) && 3354 ((entry->max_protection & VM_PROT_WRITE) || 3355 (entry->object.uvm_obj == NULL))) { 3356 amap_copy(map, entry, 0, start, end); 3357 /* XXXCDC: wait OK? */ 3358 } 3359 } 3360 } 3361 UVM_MAP_CLIP_START(map, entry, start); 3362 UVM_MAP_CLIP_END(map, entry, end); 3363 entry->wired_count++; 3364 3365 /* 3366 * Check for holes 3367 */ 3368 3369 if (entry->protection == VM_PROT_NONE || 3370 (entry->end < end && 3371 (entry->next == &map->header || 3372 entry->next->start > entry->end))) { 3373 3374 /* 3375 * found one. amap creation actions do not need to 3376 * be undone, but the wired counts need to be restored. 3377 */ 3378 3379 while (entry != &map->header && entry->end > start) { 3380 entry->wired_count--; 3381 entry = entry->prev; 3382 } 3383 if ((lockflags & UVM_LK_EXIT) == 0) 3384 vm_map_unlock(map); 3385 UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); 3386 return EINVAL; 3387 } 3388 entry = entry->next; 3389 } 3390 3391 /* 3392 * Pass 2. 3393 */ 3394 3395 #ifdef DIAGNOSTIC 3396 timestamp_save = map->timestamp; 3397 #endif 3398 vm_map_busy(map); 3399 vm_map_unlock(map); 3400 3401 rv = 0; 3402 entry = start_entry; 3403 while (entry != &map->header && entry->start < end) { 3404 if (entry->wired_count == 1) { 3405 rv = uvm_fault_wire(map, entry->start, entry->end, 3406 entry->max_protection, 1); 3407 if (rv) { 3408 3409 /* 3410 * wiring failed. break out of the loop. 3411 * we'll clean up the map below, once we 3412 * have a write lock again. 3413 */ 3414 3415 break; 3416 } 3417 } 3418 entry = entry->next; 3419 } 3420 3421 if (rv) { /* failed? */ 3422 3423 /* 3424 * Get back to an exclusive (write) lock. 3425 */ 3426 3427 vm_map_lock(map); 3428 vm_map_unbusy(map); 3429 3430 #ifdef DIAGNOSTIC 3431 if (timestamp_save + 1 != map->timestamp) 3432 panic("uvm_map_pageable: stale map"); 3433 #endif 3434 3435 /* 3436 * first drop the wiring count on all the entries 3437 * which haven't actually been wired yet. 3438 */ 3439 3440 failed_entry = entry; 3441 while (entry != &map->header && entry->start < end) { 3442 entry->wired_count--; 3443 entry = entry->next; 3444 } 3445 3446 /* 3447 * now, unwire all the entries that were successfully 3448 * wired above. 3449 */ 3450 3451 entry = start_entry; 3452 while (entry != failed_entry) { 3453 entry->wired_count--; 3454 if (VM_MAPENT_ISWIRED(entry) == 0) 3455 uvm_map_entry_unwire(map, entry); 3456 entry = entry->next; 3457 } 3458 if ((lockflags & UVM_LK_EXIT) == 0) 3459 vm_map_unlock(map); 3460 UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0); 3461 return (rv); 3462 } 3463 3464 if ((lockflags & UVM_LK_EXIT) == 0) { 3465 vm_map_unbusy(map); 3466 } else { 3467 3468 /* 3469 * Get back to an exclusive (write) lock. 3470 */ 3471 3472 vm_map_lock(map); 3473 vm_map_unbusy(map); 3474 } 3475 3476 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3477 return 0; 3478 } 3479 3480 /* 3481 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 3482 * all mapped regions. 3483 * 3484 * => map must not be locked. 3485 * => if no flags are specified, all regions are unwired. 3486 * => XXXJRT: has some of the same problems as uvm_map_pageable() above. 3487 */ 3488 3489 int 3490 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 3491 { 3492 struct vm_map_entry *entry, *failed_entry; 3493 vsize_t size; 3494 int rv; 3495 #ifdef DIAGNOSTIC 3496 u_int timestamp_save; 3497 #endif 3498 UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist); 3499 UVMHIST_LOG(maphist,"(map=0x%x,flags=0x%x)", map, flags, 0, 0); 3500 3501 KASSERT(map->flags & VM_MAP_PAGEABLE); 3502 3503 vm_map_lock(map); 3504 3505 /* 3506 * handle wiring and unwiring separately. 3507 */ 3508 3509 if (flags == 0) { /* unwire */ 3510 3511 /* 3512 * POSIX 1003.1b -- munlockall unlocks all regions, 3513 * regardless of how many times mlockall has been called. 3514 */ 3515 3516 for (entry = map->header.next; entry != &map->header; 3517 entry = entry->next) { 3518 if (VM_MAPENT_ISWIRED(entry)) 3519 uvm_map_entry_unwire(map, entry); 3520 } 3521 map->flags &= ~VM_MAP_WIREFUTURE; 3522 vm_map_unlock(map); 3523 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3524 return 0; 3525 } 3526 3527 if (flags & MCL_FUTURE) { 3528 3529 /* 3530 * must wire all future mappings; remember this. 3531 */ 3532 3533 map->flags |= VM_MAP_WIREFUTURE; 3534 } 3535 3536 if ((flags & MCL_CURRENT) == 0) { 3537 3538 /* 3539 * no more work to do! 3540 */ 3541 3542 UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); 3543 vm_map_unlock(map); 3544 return 0; 3545 } 3546 3547 /* 3548 * wire case: in three passes [XXXCDC: ugly block of code here] 3549 * 3550 * 1: holding the write lock, count all pages mapped by non-wired 3551 * entries. if this would cause us to go over our limit, we fail. 3552 * 3553 * 2: still holding the write lock, we create any anonymous maps that 3554 * need to be created. then we increment its wiring count. 3555 * 3556 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault 3557 * in the pages for any newly wired area (wired_count == 1). 3558 * 3559 * downgrading to a read lock for uvm_fault_wire avoids a possible 3560 * deadlock with another thread that may have faulted on one of 3561 * the pages to be wired (it would mark the page busy, blocking 3562 * us, then in turn block on the map lock that we hold). because 3563 * of problems in the recursive lock package, we cannot upgrade 3564 * to a write lock in vm_map_lookup. thus, any actions that 3565 * require the write lock must be done beforehand. because we 3566 * keep the read lock on the map, the copy-on-write status of the 3567 * entries we modify here cannot change. 3568 */ 3569 3570 for (size = 0, entry = map->header.next; entry != &map->header; 3571 entry = entry->next) { 3572 if (entry->protection != VM_PROT_NONE && 3573 VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3574 size += entry->end - entry->start; 3575 } 3576 } 3577 3578 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 3579 vm_map_unlock(map); 3580 return ENOMEM; 3581 } 3582 3583 if (limit != 0 && 3584 (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { 3585 vm_map_unlock(map); 3586 return ENOMEM; 3587 } 3588 3589 /* 3590 * Pass 2. 3591 */ 3592 3593 for (entry = map->header.next; entry != &map->header; 3594 entry = entry->next) { 3595 if (entry->protection == VM_PROT_NONE) 3596 continue; 3597 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3598 3599 /* 3600 * perform actions of vm_map_lookup that need the 3601 * write lock on the map: create an anonymous map 3602 * for a copy-on-write region, or an anonymous map 3603 * for a zero-fill region. (XXXCDC: submap case 3604 * ok?) 3605 */ 3606 3607 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3608 if (UVM_ET_ISNEEDSCOPY(entry) && 3609 ((entry->max_protection & VM_PROT_WRITE) || 3610 (entry->object.uvm_obj == NULL))) { 3611 amap_copy(map, entry, 0, entry->start, 3612 entry->end); 3613 /* XXXCDC: wait OK? */ 3614 } 3615 } 3616 } 3617 entry->wired_count++; 3618 } 3619 3620 /* 3621 * Pass 3. 3622 */ 3623 3624 #ifdef DIAGNOSTIC 3625 timestamp_save = map->timestamp; 3626 #endif 3627 vm_map_busy(map); 3628 vm_map_unlock(map); 3629 3630 rv = 0; 3631 for (entry = map->header.next; entry != &map->header; 3632 entry = entry->next) { 3633 if (entry->wired_count == 1) { 3634 rv = uvm_fault_wire(map, entry->start, entry->end, 3635 entry->max_protection, 1); 3636 if (rv) { 3637 3638 /* 3639 * wiring failed. break out of the loop. 3640 * we'll clean up the map below, once we 3641 * have a write lock again. 3642 */ 3643 3644 break; 3645 } 3646 } 3647 } 3648 3649 if (rv) { 3650 3651 /* 3652 * Get back an exclusive (write) lock. 3653 */ 3654 3655 vm_map_lock(map); 3656 vm_map_unbusy(map); 3657 3658 #ifdef DIAGNOSTIC 3659 if (timestamp_save + 1 != map->timestamp) 3660 panic("uvm_map_pageable_all: stale map"); 3661 #endif 3662 3663 /* 3664 * first drop the wiring count on all the entries 3665 * which haven't actually been wired yet. 3666 * 3667 * Skip VM_PROT_NONE entries like we did above. 3668 */ 3669 3670 failed_entry = entry; 3671 for (/* nothing */; entry != &map->header; 3672 entry = entry->next) { 3673 if (entry->protection == VM_PROT_NONE) 3674 continue; 3675 entry->wired_count--; 3676 } 3677 3678 /* 3679 * now, unwire all the entries that were successfully 3680 * wired above. 3681 * 3682 * Skip VM_PROT_NONE entries like we did above. 3683 */ 3684 3685 for (entry = map->header.next; entry != failed_entry; 3686 entry = entry->next) { 3687 if (entry->protection == VM_PROT_NONE) 3688 continue; 3689 entry->wired_count--; 3690 if (VM_MAPENT_ISWIRED(entry)) 3691 uvm_map_entry_unwire(map, entry); 3692 } 3693 vm_map_unlock(map); 3694 UVMHIST_LOG(maphist,"<- done (RV=%d)", rv,0,0,0); 3695 return (rv); 3696 } 3697 3698 vm_map_unbusy(map); 3699 3700 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3701 return 0; 3702 } 3703 3704 /* 3705 * uvm_map_clean: clean out a map range 3706 * 3707 * => valid flags: 3708 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 3709 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 3710 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 3711 * if (flags & PGO_FREE): any cached pages are freed after clean 3712 * => returns an error if any part of the specified range isn't mapped 3713 * => never a need to flush amap layer since the anonymous memory has 3714 * no permanent home, but may deactivate pages there 3715 * => called from sys_msync() and sys_madvise() 3716 * => caller must not write-lock map (read OK). 3717 * => we may sleep while cleaning if SYNCIO [with map read-locked] 3718 */ 3719 3720 int 3721 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 3722 { 3723 struct vm_map_entry *current, *entry; 3724 struct uvm_object *uobj; 3725 struct vm_amap *amap; 3726 struct vm_anon *anon, *anon_tofree; 3727 struct vm_page *pg; 3728 vaddr_t offset; 3729 vsize_t size; 3730 voff_t uoff; 3731 int error, refs; 3732 UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist); 3733 3734 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,flags=0x%x)", 3735 map, start, end, flags); 3736 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 3737 (PGO_FREE|PGO_DEACTIVATE)); 3738 3739 vm_map_lock_read(map); 3740 VM_MAP_RANGE_CHECK(map, start, end); 3741 if (uvm_map_lookup_entry(map, start, &entry) == false) { 3742 vm_map_unlock_read(map); 3743 return EFAULT; 3744 } 3745 3746 /* 3747 * Make a first pass to check for holes and wiring problems. 3748 */ 3749 3750 for (current = entry; current->start < end; current = current->next) { 3751 if (UVM_ET_ISSUBMAP(current)) { 3752 vm_map_unlock_read(map); 3753 return EINVAL; 3754 } 3755 if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) { 3756 vm_map_unlock_read(map); 3757 return EBUSY; 3758 } 3759 if (end <= current->end) { 3760 break; 3761 } 3762 if (current->end != current->next->start) { 3763 vm_map_unlock_read(map); 3764 return EFAULT; 3765 } 3766 } 3767 3768 error = 0; 3769 for (current = entry; start < end; current = current->next) { 3770 amap = current->aref.ar_amap; /* upper layer */ 3771 uobj = current->object.uvm_obj; /* lower layer */ 3772 KASSERT(start >= current->start); 3773 3774 /* 3775 * No amap cleaning necessary if: 3776 * 3777 * (1) There's no amap. 3778 * 3779 * (2) We're not deactivating or freeing pages. 3780 */ 3781 3782 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 3783 goto flush_object; 3784 3785 offset = start - current->start; 3786 size = MIN(end, current->end) - start; 3787 anon_tofree = NULL; 3788 3789 amap_lock(amap); 3790 for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { 3791 anon = amap_lookup(¤t->aref, offset); 3792 if (anon == NULL) 3793 continue; 3794 3795 KASSERT(anon->an_lock == amap->am_lock); 3796 pg = anon->an_page; 3797 if (pg == NULL) { 3798 continue; 3799 } 3800 3801 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 3802 3803 /* 3804 * In these first 3 cases, we just deactivate the page. 3805 */ 3806 3807 case PGO_CLEANIT|PGO_FREE: 3808 case PGO_CLEANIT|PGO_DEACTIVATE: 3809 case PGO_DEACTIVATE: 3810 deactivate_it: 3811 /* 3812 * skip the page if it's loaned or wired, 3813 * since it shouldn't be on a paging queue 3814 * at all in these cases. 3815 */ 3816 3817 mutex_enter(&uvm_pageqlock); 3818 if (pg->loan_count != 0 || 3819 pg->wire_count != 0) { 3820 mutex_exit(&uvm_pageqlock); 3821 continue; 3822 } 3823 KASSERT(pg->uanon == anon); 3824 uvm_pagedeactivate(pg); 3825 mutex_exit(&uvm_pageqlock); 3826 continue; 3827 3828 case PGO_FREE: 3829 3830 /* 3831 * If there are multiple references to 3832 * the amap, just deactivate the page. 3833 */ 3834 3835 if (amap_refs(amap) > 1) 3836 goto deactivate_it; 3837 3838 /* skip the page if it's wired */ 3839 if (pg->wire_count != 0) { 3840 continue; 3841 } 3842 amap_unadd(¤t->aref, offset); 3843 refs = --anon->an_ref; 3844 if (refs == 0) { 3845 anon->an_link = anon_tofree; 3846 anon_tofree = anon; 3847 } 3848 continue; 3849 } 3850 } 3851 uvm_anon_freelst(amap, anon_tofree); 3852 3853 flush_object: 3854 /* 3855 * flush pages if we've got a valid backing object. 3856 * note that we must always clean object pages before 3857 * freeing them since otherwise we could reveal stale 3858 * data from files. 3859 */ 3860 3861 uoff = current->offset + (start - current->start); 3862 size = MIN(end, current->end) - start; 3863 if (uobj != NULL) { 3864 mutex_enter(uobj->vmobjlock); 3865 if (uobj->pgops->pgo_put != NULL) 3866 error = (uobj->pgops->pgo_put)(uobj, uoff, 3867 uoff + size, flags | PGO_CLEANIT); 3868 else 3869 error = 0; 3870 } 3871 start += size; 3872 } 3873 vm_map_unlock_read(map); 3874 return (error); 3875 } 3876 3877 3878 /* 3879 * uvm_map_checkprot: check protection in map 3880 * 3881 * => must allow specified protection in a fully allocated region. 3882 * => map must be read or write locked by caller. 3883 */ 3884 3885 bool 3886 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3887 vm_prot_t protection) 3888 { 3889 struct vm_map_entry *entry; 3890 struct vm_map_entry *tmp_entry; 3891 3892 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { 3893 return (false); 3894 } 3895 entry = tmp_entry; 3896 while (start < end) { 3897 if (entry == &map->header) { 3898 return (false); 3899 } 3900 3901 /* 3902 * no holes allowed 3903 */ 3904 3905 if (start < entry->start) { 3906 return (false); 3907 } 3908 3909 /* 3910 * check protection associated with entry 3911 */ 3912 3913 if ((entry->protection & protection) != protection) { 3914 return (false); 3915 } 3916 start = entry->end; 3917 entry = entry->next; 3918 } 3919 return (true); 3920 } 3921 3922 /* 3923 * uvmspace_alloc: allocate a vmspace structure. 3924 * 3925 * - structure includes vm_map and pmap 3926 * - XXX: no locking on this structure 3927 * - refcnt set to 1, rest must be init'd by caller 3928 */ 3929 struct vmspace * 3930 uvmspace_alloc(vaddr_t vmin, vaddr_t vmax) 3931 { 3932 struct vmspace *vm; 3933 UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); 3934 3935 vm = pool_cache_get(&uvm_vmspace_cache, PR_WAITOK); 3936 uvmspace_init(vm, NULL, vmin, vmax); 3937 UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0); 3938 return (vm); 3939 } 3940 3941 /* 3942 * uvmspace_init: initialize a vmspace structure. 3943 * 3944 * - XXX: no locking on this structure 3945 * - refcnt set to 1, rest must be init'd by caller 3946 */ 3947 void 3948 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax) 3949 { 3950 UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); 3951 3952 memset(vm, 0, sizeof(*vm)); 3953 uvm_map_setup(&vm->vm_map, vmin, vmax, VM_MAP_PAGEABLE 3954 #ifdef __USING_TOPDOWN_VM 3955 | VM_MAP_TOPDOWN 3956 #endif 3957 ); 3958 if (pmap) 3959 pmap_reference(pmap); 3960 else 3961 pmap = pmap_create(); 3962 vm->vm_map.pmap = pmap; 3963 vm->vm_refcnt = 1; 3964 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 3965 } 3966 3967 /* 3968 * uvmspace_share: share a vmspace between two processes 3969 * 3970 * - used for vfork, threads(?) 3971 */ 3972 3973 void 3974 uvmspace_share(struct proc *p1, struct proc *p2) 3975 { 3976 3977 uvmspace_addref(p1->p_vmspace); 3978 p2->p_vmspace = p1->p_vmspace; 3979 } 3980 3981 #if 0 3982 3983 /* 3984 * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace 3985 * 3986 * - XXX: no locking on vmspace 3987 */ 3988 3989 void 3990 uvmspace_unshare(struct lwp *l) 3991 { 3992 struct proc *p = l->l_proc; 3993 struct vmspace *nvm, *ovm = p->p_vmspace; 3994 3995 if (ovm->vm_refcnt == 1) 3996 /* nothing to do: vmspace isn't shared in the first place */ 3997 return; 3998 3999 /* make a new vmspace, still holding old one */ 4000 nvm = uvmspace_fork(ovm); 4001 4002 kpreempt_disable(); 4003 pmap_deactivate(l); /* unbind old vmspace */ 4004 p->p_vmspace = nvm; 4005 pmap_activate(l); /* switch to new vmspace */ 4006 kpreempt_enable(); 4007 4008 uvmspace_free(ovm); /* drop reference to old vmspace */ 4009 } 4010 4011 #endif 4012 4013 /* 4014 * uvmspace_exec: the process wants to exec a new program 4015 */ 4016 4017 void 4018 uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end) 4019 { 4020 struct proc *p = l->l_proc; 4021 struct vmspace *nvm, *ovm = p->p_vmspace; 4022 struct vm_map *map; 4023 4024 #ifdef __HAVE_CPU_VMSPACE_EXEC 4025 cpu_vmspace_exec(l, start, end); 4026 #endif 4027 4028 /* 4029 * Special case: no vmspace yet (see posix_spawn) - 4030 * no races possible in this case. 4031 */ 4032 if (ovm == NULL) { 4033 ovm = uvmspace_alloc(start, end); 4034 kpreempt_disable(); 4035 p->p_vmspace = ovm; 4036 pmap_activate(l); 4037 kpreempt_enable(); 4038 return; 4039 } 4040 4041 map = &ovm->vm_map; 4042 /* 4043 * see if more than one process is using this vmspace... 4044 */ 4045 4046 if (ovm->vm_refcnt == 1) { 4047 4048 /* 4049 * if p is the only process using its vmspace then we can safely 4050 * recycle that vmspace for the program that is being exec'd. 4051 */ 4052 4053 #ifdef SYSVSHM 4054 /* 4055 * SYSV SHM semantics require us to kill all segments on an exec 4056 */ 4057 4058 if (ovm->vm_shm) 4059 shmexit(ovm); 4060 #endif 4061 4062 /* 4063 * POSIX 1003.1b -- "lock future mappings" is revoked 4064 * when a process execs another program image. 4065 */ 4066 4067 map->flags &= ~VM_MAP_WIREFUTURE; 4068 4069 /* 4070 * now unmap the old program 4071 */ 4072 4073 pmap_remove_all(map->pmap); 4074 uvm_unmap(map, vm_map_min(map), vm_map_max(map)); 4075 KASSERT(map->header.prev == &map->header); 4076 KASSERT(map->nentries == 0); 4077 4078 /* 4079 * resize the map 4080 */ 4081 4082 vm_map_setmin(map, start); 4083 vm_map_setmax(map, end); 4084 } else { 4085 4086 /* 4087 * p's vmspace is being shared, so we can't reuse it for p since 4088 * it is still being used for others. allocate a new vmspace 4089 * for p 4090 */ 4091 4092 nvm = uvmspace_alloc(start, end); 4093 4094 /* 4095 * install new vmspace and drop our ref to the old one. 4096 */ 4097 4098 kpreempt_disable(); 4099 pmap_deactivate(l); 4100 p->p_vmspace = nvm; 4101 pmap_activate(l); 4102 kpreempt_enable(); 4103 4104 uvmspace_free(ovm); 4105 } 4106 } 4107 4108 /* 4109 * uvmspace_addref: add a referece to a vmspace. 4110 */ 4111 4112 void 4113 uvmspace_addref(struct vmspace *vm) 4114 { 4115 struct vm_map *map = &vm->vm_map; 4116 4117 KASSERT((map->flags & VM_MAP_DYING) == 0); 4118 4119 mutex_enter(&map->misc_lock); 4120 KASSERT(vm->vm_refcnt > 0); 4121 vm->vm_refcnt++; 4122 mutex_exit(&map->misc_lock); 4123 } 4124 4125 /* 4126 * uvmspace_free: free a vmspace data structure 4127 */ 4128 4129 void 4130 uvmspace_free(struct vmspace *vm) 4131 { 4132 struct vm_map_entry *dead_entries; 4133 struct vm_map *map = &vm->vm_map; 4134 int n; 4135 4136 UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); 4137 4138 UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0); 4139 mutex_enter(&map->misc_lock); 4140 n = --vm->vm_refcnt; 4141 mutex_exit(&map->misc_lock); 4142 if (n > 0) 4143 return; 4144 4145 /* 4146 * at this point, there should be no other references to the map. 4147 * delete all of the mappings, then destroy the pmap. 4148 */ 4149 4150 map->flags |= VM_MAP_DYING; 4151 pmap_remove_all(map->pmap); 4152 #ifdef SYSVSHM 4153 /* Get rid of any SYSV shared memory segments. */ 4154 if (vm->vm_shm != NULL) 4155 shmexit(vm); 4156 #endif 4157 4158 if (map->nentries) { 4159 uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map), 4160 &dead_entries, 0); 4161 if (dead_entries != NULL) 4162 uvm_unmap_detach(dead_entries, 0); 4163 } 4164 KASSERT(map->nentries == 0); 4165 KASSERT(map->size == 0); 4166 4167 mutex_destroy(&map->misc_lock); 4168 rw_destroy(&map->lock); 4169 cv_destroy(&map->cv); 4170 pmap_destroy(map->pmap); 4171 pool_cache_put(&uvm_vmspace_cache, vm); 4172 } 4173 4174 /* 4175 * F O R K - m a i n e n t r y p o i n t 4176 */ 4177 /* 4178 * uvmspace_fork: fork a process' main map 4179 * 4180 * => create a new vmspace for child process from parent. 4181 * => parent's map must not be locked. 4182 */ 4183 4184 struct vmspace * 4185 uvmspace_fork(struct vmspace *vm1) 4186 { 4187 struct vmspace *vm2; 4188 struct vm_map *old_map = &vm1->vm_map; 4189 struct vm_map *new_map; 4190 struct vm_map_entry *old_entry; 4191 struct vm_map_entry *new_entry; 4192 UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); 4193 4194 vm_map_lock(old_map); 4195 4196 vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map)); 4197 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 4198 (char *) (vm1 + 1) - (char *) &vm1->vm_startcopy); 4199 new_map = &vm2->vm_map; /* XXX */ 4200 4201 old_entry = old_map->header.next; 4202 new_map->size = old_map->size; 4203 4204 /* 4205 * go entry-by-entry 4206 */ 4207 4208 while (old_entry != &old_map->header) { 4209 4210 /* 4211 * first, some sanity checks on the old entry 4212 */ 4213 4214 KASSERT(!UVM_ET_ISSUBMAP(old_entry)); 4215 KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) || 4216 !UVM_ET_ISNEEDSCOPY(old_entry)); 4217 4218 switch (old_entry->inheritance) { 4219 case MAP_INHERIT_NONE: 4220 4221 /* 4222 * drop the mapping, modify size 4223 */ 4224 new_map->size -= old_entry->end - old_entry->start; 4225 break; 4226 4227 case MAP_INHERIT_SHARE: 4228 4229 /* 4230 * share the mapping: this means we want the old and 4231 * new entries to share amaps and backing objects. 4232 */ 4233 /* 4234 * if the old_entry needs a new amap (due to prev fork) 4235 * then we need to allocate it now so that we have 4236 * something we own to share with the new_entry. [in 4237 * other words, we need to clear needs_copy] 4238 */ 4239 4240 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 4241 /* get our own amap, clears needs_copy */ 4242 amap_copy(old_map, old_entry, AMAP_COPY_NOCHUNK, 4243 0, 0); 4244 /* XXXCDC: WAITOK??? */ 4245 } 4246 4247 new_entry = uvm_mapent_alloc(new_map, 0); 4248 /* old_entry -> new_entry */ 4249 uvm_mapent_copy(old_entry, new_entry); 4250 4251 /* new pmap has nothing wired in it */ 4252 new_entry->wired_count = 0; 4253 4254 /* 4255 * gain reference to object backing the map (can't 4256 * be a submap, already checked this case). 4257 */ 4258 4259 if (new_entry->aref.ar_amap) 4260 uvm_map_reference_amap(new_entry, AMAP_SHARED); 4261 4262 if (new_entry->object.uvm_obj && 4263 new_entry->object.uvm_obj->pgops->pgo_reference) 4264 new_entry->object.uvm_obj-> 4265 pgops->pgo_reference( 4266 new_entry->object.uvm_obj); 4267 4268 /* insert entry at end of new_map's entry list */ 4269 uvm_map_entry_link(new_map, new_map->header.prev, 4270 new_entry); 4271 4272 break; 4273 4274 case MAP_INHERIT_COPY: 4275 4276 /* 4277 * copy-on-write the mapping (using mmap's 4278 * MAP_PRIVATE semantics) 4279 * 4280 * allocate new_entry, adjust reference counts. 4281 * (note that new references are read-only). 4282 */ 4283 4284 new_entry = uvm_mapent_alloc(new_map, 0); 4285 /* old_entry -> new_entry */ 4286 uvm_mapent_copy(old_entry, new_entry); 4287 4288 if (new_entry->aref.ar_amap) 4289 uvm_map_reference_amap(new_entry, 0); 4290 4291 if (new_entry->object.uvm_obj && 4292 new_entry->object.uvm_obj->pgops->pgo_reference) 4293 new_entry->object.uvm_obj->pgops->pgo_reference 4294 (new_entry->object.uvm_obj); 4295 4296 /* new pmap has nothing wired in it */ 4297 new_entry->wired_count = 0; 4298 4299 new_entry->etype |= 4300 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 4301 uvm_map_entry_link(new_map, new_map->header.prev, 4302 new_entry); 4303 4304 /* 4305 * the new entry will need an amap. it will either 4306 * need to be copied from the old entry or created 4307 * from scratch (if the old entry does not have an 4308 * amap). can we defer this process until later 4309 * (by setting "needs_copy") or do we need to copy 4310 * the amap now? 4311 * 4312 * we must copy the amap now if any of the following 4313 * conditions hold: 4314 * 1. the old entry has an amap and that amap is 4315 * being shared. this means that the old (parent) 4316 * process is sharing the amap with another 4317 * process. if we do not clear needs_copy here 4318 * we will end up in a situation where both the 4319 * parent and child process are refering to the 4320 * same amap with "needs_copy" set. if the 4321 * parent write-faults, the fault routine will 4322 * clear "needs_copy" in the parent by allocating 4323 * a new amap. this is wrong because the 4324 * parent is supposed to be sharing the old amap 4325 * and the new amap will break that. 4326 * 4327 * 2. if the old entry has an amap and a non-zero 4328 * wire count then we are going to have to call 4329 * amap_cow_now to avoid page faults in the 4330 * parent process. since amap_cow_now requires 4331 * "needs_copy" to be clear we might as well 4332 * clear it here as well. 4333 * 4334 */ 4335 4336 if (old_entry->aref.ar_amap != NULL) { 4337 if ((amap_flags(old_entry->aref.ar_amap) & 4338 AMAP_SHARED) != 0 || 4339 VM_MAPENT_ISWIRED(old_entry)) { 4340 4341 amap_copy(new_map, new_entry, 4342 AMAP_COPY_NOCHUNK, 0, 0); 4343 /* XXXCDC: M_WAITOK ... ok? */ 4344 } 4345 } 4346 4347 /* 4348 * if the parent's entry is wired down, then the 4349 * parent process does not want page faults on 4350 * access to that memory. this means that we 4351 * cannot do copy-on-write because we can't write 4352 * protect the old entry. in this case we 4353 * resolve all copy-on-write faults now, using 4354 * amap_cow_now. note that we have already 4355 * allocated any needed amap (above). 4356 */ 4357 4358 if (VM_MAPENT_ISWIRED(old_entry)) { 4359 4360 /* 4361 * resolve all copy-on-write faults now 4362 * (note that there is nothing to do if 4363 * the old mapping does not have an amap). 4364 */ 4365 if (old_entry->aref.ar_amap) 4366 amap_cow_now(new_map, new_entry); 4367 4368 } else { 4369 4370 /* 4371 * setup mappings to trigger copy-on-write faults 4372 * we must write-protect the parent if it has 4373 * an amap and it is not already "needs_copy"... 4374 * if it is already "needs_copy" then the parent 4375 * has already been write-protected by a previous 4376 * fork operation. 4377 */ 4378 4379 if (old_entry->aref.ar_amap && 4380 !UVM_ET_ISNEEDSCOPY(old_entry)) { 4381 if (old_entry->max_protection & VM_PROT_WRITE) { 4382 pmap_protect(old_map->pmap, 4383 old_entry->start, 4384 old_entry->end, 4385 old_entry->protection & 4386 ~VM_PROT_WRITE); 4387 } 4388 old_entry->etype |= UVM_ET_NEEDSCOPY; 4389 } 4390 } 4391 break; 4392 } /* end of switch statement */ 4393 old_entry = old_entry->next; 4394 } 4395 4396 pmap_update(old_map->pmap); 4397 vm_map_unlock(old_map); 4398 4399 #ifdef SYSVSHM 4400 if (vm1->vm_shm) 4401 shmfork(vm1, vm2); 4402 #endif 4403 4404 #ifdef PMAP_FORK 4405 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); 4406 #endif 4407 4408 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 4409 return (vm2); 4410 } 4411 4412 4413 /* 4414 * uvm_mapent_trymerge: try to merge an entry with its neighbors. 4415 * 4416 * => called with map locked. 4417 * => return non zero if successfully merged. 4418 */ 4419 4420 int 4421 uvm_mapent_trymerge(struct vm_map *map, struct vm_map_entry *entry, int flags) 4422 { 4423 struct uvm_object *uobj; 4424 struct vm_map_entry *next; 4425 struct vm_map_entry *prev; 4426 vsize_t size; 4427 int merged = 0; 4428 bool copying; 4429 int newetype; 4430 4431 if (entry->aref.ar_amap != NULL) { 4432 return 0; 4433 } 4434 if ((entry->flags & UVM_MAP_NOMERGE) != 0) { 4435 return 0; 4436 } 4437 4438 uobj = entry->object.uvm_obj; 4439 size = entry->end - entry->start; 4440 copying = (flags & UVM_MERGE_COPYING) != 0; 4441 newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype; 4442 4443 next = entry->next; 4444 if (next != &map->header && 4445 next->start == entry->end && 4446 ((copying && next->aref.ar_amap != NULL && 4447 amap_refs(next->aref.ar_amap) == 1) || 4448 (!copying && next->aref.ar_amap == NULL)) && 4449 UVM_ET_ISCOMPATIBLE(next, newetype, 4450 uobj, entry->flags, entry->protection, 4451 entry->max_protection, entry->inheritance, entry->advice, 4452 entry->wired_count) && 4453 (uobj == NULL || entry->offset + size == next->offset)) { 4454 int error; 4455 4456 if (copying) { 4457 error = amap_extend(next, size, 4458 AMAP_EXTEND_NOWAIT|AMAP_EXTEND_BACKWARDS); 4459 } else { 4460 error = 0; 4461 } 4462 if (error == 0) { 4463 if (uobj) { 4464 if (uobj->pgops->pgo_detach) { 4465 uobj->pgops->pgo_detach(uobj); 4466 } 4467 } 4468 4469 entry->end = next->end; 4470 clear_hints(map, next); 4471 uvm_map_entry_unlink(map, next); 4472 if (copying) { 4473 entry->aref = next->aref; 4474 entry->etype &= ~UVM_ET_NEEDSCOPY; 4475 } 4476 uvm_map_check(map, "trymerge forwardmerge"); 4477 uvm_mapent_free(next); 4478 merged++; 4479 } 4480 } 4481 4482 prev = entry->prev; 4483 if (prev != &map->header && 4484 prev->end == entry->start && 4485 ((copying && !merged && prev->aref.ar_amap != NULL && 4486 amap_refs(prev->aref.ar_amap) == 1) || 4487 (!copying && prev->aref.ar_amap == NULL)) && 4488 UVM_ET_ISCOMPATIBLE(prev, newetype, 4489 uobj, entry->flags, entry->protection, 4490 entry->max_protection, entry->inheritance, entry->advice, 4491 entry->wired_count) && 4492 (uobj == NULL || 4493 prev->offset + prev->end - prev->start == entry->offset)) { 4494 int error; 4495 4496 if (copying) { 4497 error = amap_extend(prev, size, 4498 AMAP_EXTEND_NOWAIT|AMAP_EXTEND_FORWARDS); 4499 } else { 4500 error = 0; 4501 } 4502 if (error == 0) { 4503 if (uobj) { 4504 if (uobj->pgops->pgo_detach) { 4505 uobj->pgops->pgo_detach(uobj); 4506 } 4507 entry->offset = prev->offset; 4508 } 4509 4510 entry->start = prev->start; 4511 clear_hints(map, prev); 4512 uvm_map_entry_unlink(map, prev); 4513 if (copying) { 4514 entry->aref = prev->aref; 4515 entry->etype &= ~UVM_ET_NEEDSCOPY; 4516 } 4517 uvm_map_check(map, "trymerge backmerge"); 4518 uvm_mapent_free(prev); 4519 merged++; 4520 } 4521 } 4522 4523 return merged; 4524 } 4525 4526 /* 4527 * uvm_map_setup: init map 4528 * 4529 * => map must not be in service yet. 4530 */ 4531 4532 void 4533 uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags) 4534 { 4535 4536 rb_tree_init(&map->rb_tree, &uvm_map_tree_ops); 4537 map->header.next = map->header.prev = &map->header; 4538 map->nentries = 0; 4539 map->size = 0; 4540 map->ref_count = 1; 4541 vm_map_setmin(map, vmin); 4542 vm_map_setmax(map, vmax); 4543 map->flags = flags; 4544 map->first_free = &map->header; 4545 map->hint = &map->header; 4546 map->timestamp = 0; 4547 map->busy = NULL; 4548 4549 rw_init(&map->lock); 4550 cv_init(&map->cv, "vm_map"); 4551 mutex_init(&map->misc_lock, MUTEX_DRIVER, IPL_NONE); 4552 } 4553 4554 /* 4555 * U N M A P - m a i n e n t r y p o i n t 4556 */ 4557 4558 /* 4559 * uvm_unmap1: remove mappings from a vm_map (from "start" up to "stop") 4560 * 4561 * => caller must check alignment and size 4562 * => map must be unlocked (we will lock it) 4563 * => flags is UVM_FLAG_QUANTUM or 0. 4564 */ 4565 4566 void 4567 uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4568 { 4569 struct vm_map_entry *dead_entries; 4570 UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist); 4571 4572 UVMHIST_LOG(maphist, " (map=0x%x, start=0x%x, end=0x%x)", 4573 map, start, end, 0); 4574 if (map == kernel_map) { 4575 LOCKDEBUG_MEM_CHECK((void *)start, end - start); 4576 } 4577 /* 4578 * work now done by helper functions. wipe the pmap's and then 4579 * detach from the dead entries... 4580 */ 4581 vm_map_lock(map); 4582 uvm_unmap_remove(map, start, end, &dead_entries, flags); 4583 vm_map_unlock(map); 4584 4585 if (dead_entries != NULL) 4586 uvm_unmap_detach(dead_entries, 0); 4587 4588 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 4589 } 4590 4591 4592 /* 4593 * uvm_map_reference: add reference to a map 4594 * 4595 * => map need not be locked (we use misc_lock). 4596 */ 4597 4598 void 4599 uvm_map_reference(struct vm_map *map) 4600 { 4601 mutex_enter(&map->misc_lock); 4602 map->ref_count++; 4603 mutex_exit(&map->misc_lock); 4604 } 4605 4606 bool 4607 vm_map_starved_p(struct vm_map *map) 4608 { 4609 4610 if ((map->flags & VM_MAP_WANTVA) != 0) { 4611 return true; 4612 } 4613 /* XXX */ 4614 if ((vm_map_max(map) - vm_map_min(map)) / 16 * 15 < map->size) { 4615 return true; 4616 } 4617 return false; 4618 } 4619 4620 void 4621 uvm_map_lock_entry(struct vm_map_entry *entry) 4622 { 4623 4624 if (entry->aref.ar_amap != NULL) { 4625 amap_lock(entry->aref.ar_amap); 4626 } 4627 if (UVM_ET_ISOBJ(entry)) { 4628 mutex_enter(entry->object.uvm_obj->vmobjlock); 4629 } 4630 } 4631 4632 void 4633 uvm_map_unlock_entry(struct vm_map_entry *entry) 4634 { 4635 4636 if (UVM_ET_ISOBJ(entry)) { 4637 mutex_exit(entry->object.uvm_obj->vmobjlock); 4638 } 4639 if (entry->aref.ar_amap != NULL) { 4640 amap_unlock(entry->aref.ar_amap); 4641 } 4642 } 4643 4644 #if defined(DDB) || defined(DEBUGPRINT) 4645 4646 /* 4647 * uvm_map_printit: actually prints the map 4648 */ 4649 4650 void 4651 uvm_map_printit(struct vm_map *map, bool full, 4652 void (*pr)(const char *, ...)) 4653 { 4654 struct vm_map_entry *entry; 4655 4656 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, vm_map_min(map), 4657 vm_map_max(map)); 4658 (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n", 4659 map->nentries, map->size, map->ref_count, map->timestamp, 4660 map->flags); 4661 (*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap, 4662 pmap_resident_count(map->pmap), pmap_wired_count(map->pmap)); 4663 if (!full) 4664 return; 4665 for (entry = map->header.next; entry != &map->header; 4666 entry = entry->next) { 4667 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 4668 entry, entry->start, entry->end, entry->object.uvm_obj, 4669 (long long)entry->offset, entry->aref.ar_amap, 4670 entry->aref.ar_pageoff); 4671 (*pr)( 4672 "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 4673 "wc=%d, adv=%d\n", 4674 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 4675 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 4676 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 4677 entry->protection, entry->max_protection, 4678 entry->inheritance, entry->wired_count, entry->advice); 4679 } 4680 } 4681 4682 void 4683 uvm_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 4684 { 4685 struct vm_map *map; 4686 4687 for (map = kernel_map;;) { 4688 struct vm_map_entry *entry; 4689 4690 if (!uvm_map_lookup_entry_bytree(map, (vaddr_t)addr, &entry)) { 4691 break; 4692 } 4693 (*pr)("%p is %p+%zu from VMMAP %p\n", 4694 (void *)addr, (void *)entry->start, 4695 (size_t)(addr - (uintptr_t)entry->start), map); 4696 if (!UVM_ET_ISSUBMAP(entry)) { 4697 break; 4698 } 4699 map = entry->object.sub_map; 4700 } 4701 } 4702 4703 #endif /* DDB || DEBUGPRINT */ 4704 4705 #ifndef __USER_VA0_IS_SAFE 4706 static int 4707 sysctl_user_va0_disable(SYSCTLFN_ARGS) 4708 { 4709 struct sysctlnode node; 4710 int t, error; 4711 4712 node = *rnode; 4713 node.sysctl_data = &t; 4714 t = user_va0_disable; 4715 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 4716 if (error || newp == NULL) 4717 return (error); 4718 4719 if (!t && user_va0_disable && 4720 kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MAP_VA_ZERO, 0, 4721 NULL, NULL, NULL)) 4722 return EPERM; 4723 4724 user_va0_disable = !!t; 4725 return 0; 4726 } 4727 4728 SYSCTL_SETUP(sysctl_uvmmap_setup, "sysctl uvmmap setup") 4729 { 4730 4731 sysctl_createv(clog, 0, NULL, NULL, 4732 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 4733 CTLTYPE_INT, "user_va0_disable", 4734 SYSCTL_DESCR("Disable VA 0"), 4735 sysctl_user_va0_disable, 0, &user_va0_disable, 0, 4736 CTL_VM, CTL_CREATE, CTL_EOL); 4737 } 4738 #endif 4739