1 /* $NetBSD: uvm_map.c,v 1.324 2012/11/02 16:43:16 matt Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 37 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 38 * 39 * 40 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 41 * All rights reserved. 42 * 43 * Permission to use, copy, modify and distribute this software and 44 * its documentation is hereby granted, provided that both the copyright 45 * notice and this permission notice appear in all copies of the 46 * software, derivative works or modified versions, and any portions 47 * thereof, and that both notices appear in supporting documentation. 48 * 49 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 50 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 51 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 52 * 53 * Carnegie Mellon requests users of this software to return to 54 * 55 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 56 * School of Computer Science 57 * Carnegie Mellon University 58 * Pittsburgh PA 15213-3890 59 * 60 * any improvements or extensions that they make and grant Carnegie the 61 * rights to redistribute these changes. 62 */ 63 64 /* 65 * uvm_map.c: uvm map operations 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.324 2012/11/02 16:43:16 matt Exp $"); 70 71 #include "opt_ddb.h" 72 #include "opt_uvmhist.h" 73 #include "opt_uvm.h" 74 #include "opt_sysv.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/mman.h> 79 #include <sys/proc.h> 80 #include <sys/pool.h> 81 #include <sys/kernel.h> 82 #include <sys/mount.h> 83 #include <sys/vnode.h> 84 #include <sys/lockdebug.h> 85 #include <sys/atomic.h> 86 #ifndef __USER_VA0_IS_SAFE 87 #include <sys/sysctl.h> 88 #include <sys/kauth.h> 89 #include "opt_user_va0_disable_default.h" 90 #endif 91 92 #ifdef SYSVSHM 93 #include <sys/shm.h> 94 #endif 95 96 #include <uvm/uvm.h> 97 #include <uvm/uvm_readahead.h> 98 99 #if defined(DDB) || defined(DEBUGPRINT) 100 #include <uvm/uvm_ddb.h> 101 #endif 102 103 #ifdef UVMHIST 104 UVMHIST_DEFINE(maphist); 105 #endif 106 107 #if !defined(UVMMAP_COUNTERS) 108 109 #define UVMMAP_EVCNT_DEFINE(name) /* nothing */ 110 #define UVMMAP_EVCNT_INCR(ev) /* nothing */ 111 #define UVMMAP_EVCNT_DECR(ev) /* nothing */ 112 113 #else /* defined(UVMMAP_NOCOUNTERS) */ 114 115 #include <sys/evcnt.h> 116 #define UVMMAP_EVCNT_DEFINE(name) \ 117 struct evcnt uvmmap_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 118 "uvmmap", #name); \ 119 EVCNT_ATTACH_STATIC(uvmmap_evcnt_##name); 120 #define UVMMAP_EVCNT_INCR(ev) uvmmap_evcnt_##ev.ev_count++ 121 #define UVMMAP_EVCNT_DECR(ev) uvmmap_evcnt_##ev.ev_count-- 122 123 #endif /* defined(UVMMAP_NOCOUNTERS) */ 124 125 UVMMAP_EVCNT_DEFINE(ubackmerge) 126 UVMMAP_EVCNT_DEFINE(uforwmerge) 127 UVMMAP_EVCNT_DEFINE(ubimerge) 128 UVMMAP_EVCNT_DEFINE(unomerge) 129 UVMMAP_EVCNT_DEFINE(kbackmerge) 130 UVMMAP_EVCNT_DEFINE(kforwmerge) 131 UVMMAP_EVCNT_DEFINE(kbimerge) 132 UVMMAP_EVCNT_DEFINE(knomerge) 133 UVMMAP_EVCNT_DEFINE(map_call) 134 UVMMAP_EVCNT_DEFINE(mlk_call) 135 UVMMAP_EVCNT_DEFINE(mlk_hint) 136 UVMMAP_EVCNT_DEFINE(mlk_list) 137 UVMMAP_EVCNT_DEFINE(mlk_tree) 138 UVMMAP_EVCNT_DEFINE(mlk_treeloop) 139 UVMMAP_EVCNT_DEFINE(mlk_listloop) 140 141 const char vmmapbsy[] = "vmmapbsy"; 142 143 /* 144 * cache for vmspace structures. 145 */ 146 147 static struct pool_cache uvm_vmspace_cache; 148 149 /* 150 * cache for dynamically-allocated map entries. 151 */ 152 153 static struct pool_cache uvm_map_entry_cache; 154 155 #ifdef PMAP_GROWKERNEL 156 /* 157 * This global represents the end of the kernel virtual address 158 * space. If we want to exceed this, we must grow the kernel 159 * virtual address space dynamically. 160 * 161 * Note, this variable is locked by kernel_map's lock. 162 */ 163 vaddr_t uvm_maxkaddr; 164 #endif 165 166 #ifndef __USER_VA0_IS_SAFE 167 #ifndef __USER_VA0_DISABLE_DEFAULT 168 #define __USER_VA0_DISABLE_DEFAULT 1 169 #endif 170 #ifdef USER_VA0_DISABLE_DEFAULT /* kernel config option overrides */ 171 #undef __USER_VA0_DISABLE_DEFAULT 172 #define __USER_VA0_DISABLE_DEFAULT USER_VA0_DISABLE_DEFAULT 173 #endif 174 static int user_va0_disable = __USER_VA0_DISABLE_DEFAULT; 175 #endif 176 177 /* 178 * macros 179 */ 180 181 /* 182 * UVM_ET_ISCOMPATIBLE: check some requirements for map entry merging 183 */ 184 extern struct vm_map *pager_map; 185 186 #define UVM_ET_ISCOMPATIBLE(ent, type, uobj, meflags, \ 187 prot, maxprot, inh, adv, wire) \ 188 ((ent)->etype == (type) && \ 189 (((ent)->flags ^ (meflags)) & (UVM_MAP_NOMERGE)) == 0 && \ 190 (ent)->object.uvm_obj == (uobj) && \ 191 (ent)->protection == (prot) && \ 192 (ent)->max_protection == (maxprot) && \ 193 (ent)->inheritance == (inh) && \ 194 (ent)->advice == (adv) && \ 195 (ent)->wired_count == (wire)) 196 197 /* 198 * uvm_map_entry_link: insert entry into a map 199 * 200 * => map must be locked 201 */ 202 #define uvm_map_entry_link(map, after_where, entry) do { \ 203 uvm_mapent_check(entry); \ 204 (map)->nentries++; \ 205 (entry)->prev = (after_where); \ 206 (entry)->next = (after_where)->next; \ 207 (entry)->prev->next = (entry); \ 208 (entry)->next->prev = (entry); \ 209 uvm_rb_insert((map), (entry)); \ 210 } while (/*CONSTCOND*/ 0) 211 212 /* 213 * uvm_map_entry_unlink: remove entry from a map 214 * 215 * => map must be locked 216 */ 217 #define uvm_map_entry_unlink(map, entry) do { \ 218 KASSERT((entry) != (map)->first_free); \ 219 KASSERT((entry) != (map)->hint); \ 220 uvm_mapent_check(entry); \ 221 (map)->nentries--; \ 222 (entry)->next->prev = (entry)->prev; \ 223 (entry)->prev->next = (entry)->next; \ 224 uvm_rb_remove((map), (entry)); \ 225 } while (/*CONSTCOND*/ 0) 226 227 /* 228 * SAVE_HINT: saves the specified entry as the hint for future lookups. 229 * 230 * => map need not be locked. 231 */ 232 #define SAVE_HINT(map, check, value) do { \ 233 if ((map)->hint == (check)) \ 234 (map)->hint = (value); \ 235 } while (/*CONSTCOND*/ 0) 236 237 /* 238 * clear_hints: ensure that hints don't point to the entry. 239 * 240 * => map must be write-locked. 241 */ 242 static void 243 clear_hints(struct vm_map *map, struct vm_map_entry *ent) 244 { 245 246 SAVE_HINT(map, ent, ent->prev); 247 if (map->first_free == ent) { 248 map->first_free = ent->prev; 249 } 250 } 251 252 /* 253 * VM_MAP_RANGE_CHECK: check and correct range 254 * 255 * => map must at least be read locked 256 */ 257 258 #define VM_MAP_RANGE_CHECK(map, start, end) do { \ 259 if (start < vm_map_min(map)) \ 260 start = vm_map_min(map); \ 261 if (end > vm_map_max(map)) \ 262 end = vm_map_max(map); \ 263 if (start > end) \ 264 start = end; \ 265 } while (/*CONSTCOND*/ 0) 266 267 /* 268 * local prototypes 269 */ 270 271 static struct vm_map_entry * 272 uvm_mapent_alloc(struct vm_map *, int); 273 static void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); 274 static void uvm_mapent_free(struct vm_map_entry *); 275 #if defined(DEBUG) 276 static void _uvm_mapent_check(const struct vm_map_entry *, const char *, 277 int); 278 #define uvm_mapent_check(map) _uvm_mapent_check(map, __FILE__, __LINE__) 279 #else /* defined(DEBUG) */ 280 #define uvm_mapent_check(e) /* nothing */ 281 #endif /* defined(DEBUG) */ 282 283 static void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); 284 static void uvm_map_reference_amap(struct vm_map_entry *, int); 285 static int uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int, 286 int, struct vm_map_entry *); 287 static void uvm_map_unreference_amap(struct vm_map_entry *, int); 288 289 int _uvm_map_sanity(struct vm_map *); 290 int _uvm_tree_sanity(struct vm_map *); 291 static vsize_t uvm_rb_maxgap(const struct vm_map_entry *); 292 293 #define ROOT_ENTRY(map) ((struct vm_map_entry *)(map)->rb_tree.rbt_root) 294 #define LEFT_ENTRY(entry) ((struct vm_map_entry *)(entry)->rb_node.rb_left) 295 #define RIGHT_ENTRY(entry) ((struct vm_map_entry *)(entry)->rb_node.rb_right) 296 #define PARENT_ENTRY(map, entry) \ 297 (ROOT_ENTRY(map) == (entry) \ 298 ? NULL : (struct vm_map_entry *)RB_FATHER(&(entry)->rb_node)) 299 300 static int 301 uvm_map_compare_nodes(void *ctx, const void *nparent, const void *nkey) 302 { 303 const struct vm_map_entry *eparent = nparent; 304 const struct vm_map_entry *ekey = nkey; 305 306 KASSERT(eparent->start < ekey->start || eparent->start >= ekey->end); 307 KASSERT(ekey->start < eparent->start || ekey->start >= eparent->end); 308 309 if (eparent->start < ekey->start) 310 return -1; 311 if (eparent->end >= ekey->start) 312 return 1; 313 return 0; 314 } 315 316 static int 317 uvm_map_compare_key(void *ctx, const void *nparent, const void *vkey) 318 { 319 const struct vm_map_entry *eparent = nparent; 320 const vaddr_t va = *(const vaddr_t *) vkey; 321 322 if (eparent->start < va) 323 return -1; 324 if (eparent->end >= va) 325 return 1; 326 return 0; 327 } 328 329 static const rb_tree_ops_t uvm_map_tree_ops = { 330 .rbto_compare_nodes = uvm_map_compare_nodes, 331 .rbto_compare_key = uvm_map_compare_key, 332 .rbto_node_offset = offsetof(struct vm_map_entry, rb_node), 333 .rbto_context = NULL 334 }; 335 336 /* 337 * uvm_rb_gap: return the gap size between our entry and next entry. 338 */ 339 static inline vsize_t 340 uvm_rb_gap(const struct vm_map_entry *entry) 341 { 342 343 KASSERT(entry->next != NULL); 344 return entry->next->start - entry->end; 345 } 346 347 static vsize_t 348 uvm_rb_maxgap(const struct vm_map_entry *entry) 349 { 350 struct vm_map_entry *child; 351 vsize_t maxgap = entry->gap; 352 353 /* 354 * We need maxgap to be the largest gap of us or any of our 355 * descendents. Since each of our children's maxgap is the 356 * cached value of their largest gap of themselves or their 357 * descendents, we can just use that value and avoid recursing 358 * down the tree to calculate it. 359 */ 360 if ((child = LEFT_ENTRY(entry)) != NULL && maxgap < child->maxgap) 361 maxgap = child->maxgap; 362 363 if ((child = RIGHT_ENTRY(entry)) != NULL && maxgap < child->maxgap) 364 maxgap = child->maxgap; 365 366 return maxgap; 367 } 368 369 static void 370 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) 371 { 372 struct vm_map_entry *parent; 373 374 KASSERT(entry->gap == uvm_rb_gap(entry)); 375 entry->maxgap = uvm_rb_maxgap(entry); 376 377 while ((parent = PARENT_ENTRY(map, entry)) != NULL) { 378 struct vm_map_entry *brother; 379 vsize_t maxgap = parent->gap; 380 unsigned int which; 381 382 KDASSERT(parent->gap == uvm_rb_gap(parent)); 383 if (maxgap < entry->maxgap) 384 maxgap = entry->maxgap; 385 /* 386 * Since we work towards the root, we know entry's maxgap 387 * value is OK, but its brothers may now be out-of-date due 388 * to rebalancing. So refresh it. 389 */ 390 which = RB_POSITION(&entry->rb_node) ^ RB_DIR_OTHER; 391 brother = (struct vm_map_entry *)parent->rb_node.rb_nodes[which]; 392 if (brother != NULL) { 393 KDASSERT(brother->gap == uvm_rb_gap(brother)); 394 brother->maxgap = uvm_rb_maxgap(brother); 395 if (maxgap < brother->maxgap) 396 maxgap = brother->maxgap; 397 } 398 399 parent->maxgap = maxgap; 400 entry = parent; 401 } 402 } 403 404 static void 405 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) 406 { 407 struct vm_map_entry *ret; 408 409 entry->gap = entry->maxgap = uvm_rb_gap(entry); 410 if (entry->prev != &map->header) 411 entry->prev->gap = uvm_rb_gap(entry->prev); 412 413 ret = rb_tree_insert_node(&map->rb_tree, entry); 414 KASSERTMSG(ret == entry, 415 "uvm_rb_insert: map %p: duplicate entry %p", map, ret); 416 417 /* 418 * If the previous entry is not our immediate left child, then it's an 419 * ancestor and will be fixed up on the way to the root. We don't 420 * have to check entry->prev against &map->header since &map->header 421 * will never be in the tree. 422 */ 423 uvm_rb_fixup(map, 424 LEFT_ENTRY(entry) == entry->prev ? entry->prev : entry); 425 } 426 427 static void 428 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) 429 { 430 struct vm_map_entry *prev_parent = NULL, *next_parent = NULL; 431 432 /* 433 * If we are removing an interior node, then an adjacent node will 434 * be used to replace its position in the tree. Therefore we will 435 * need to fixup the tree starting at the parent of the replacement 436 * node. So record their parents for later use. 437 */ 438 if (entry->prev != &map->header) 439 prev_parent = PARENT_ENTRY(map, entry->prev); 440 if (entry->next != &map->header) 441 next_parent = PARENT_ENTRY(map, entry->next); 442 443 rb_tree_remove_node(&map->rb_tree, entry); 444 445 /* 446 * If the previous node has a new parent, fixup the tree starting 447 * at the previous node's old parent. 448 */ 449 if (entry->prev != &map->header) { 450 /* 451 * Update the previous entry's gap due to our absence. 452 */ 453 entry->prev->gap = uvm_rb_gap(entry->prev); 454 uvm_rb_fixup(map, entry->prev); 455 if (prev_parent != NULL 456 && prev_parent != entry 457 && prev_parent != PARENT_ENTRY(map, entry->prev)) 458 uvm_rb_fixup(map, prev_parent); 459 } 460 461 /* 462 * If the next node has a new parent, fixup the tree starting 463 * at the next node's old parent. 464 */ 465 if (entry->next != &map->header) { 466 uvm_rb_fixup(map, entry->next); 467 if (next_parent != NULL 468 && next_parent != entry 469 && next_parent != PARENT_ENTRY(map, entry->next)) 470 uvm_rb_fixup(map, next_parent); 471 } 472 } 473 474 #if defined(DEBUG) 475 int uvm_debug_check_map = 0; 476 int uvm_debug_check_rbtree = 0; 477 #define uvm_map_check(map, name) \ 478 _uvm_map_check((map), (name), __FILE__, __LINE__) 479 static void 480 _uvm_map_check(struct vm_map *map, const char *name, 481 const char *file, int line) 482 { 483 484 if ((uvm_debug_check_map && _uvm_map_sanity(map)) || 485 (uvm_debug_check_rbtree && _uvm_tree_sanity(map))) { 486 panic("uvm_map_check failed: \"%s\" map=%p (%s:%d)", 487 name, map, file, line); 488 } 489 } 490 #else /* defined(DEBUG) */ 491 #define uvm_map_check(map, name) /* nothing */ 492 #endif /* defined(DEBUG) */ 493 494 #if defined(DEBUG) || defined(DDB) 495 int 496 _uvm_map_sanity(struct vm_map *map) 497 { 498 bool first_free_found = false; 499 bool hint_found = false; 500 const struct vm_map_entry *e; 501 struct vm_map_entry *hint = map->hint; 502 503 e = &map->header; 504 for (;;) { 505 if (map->first_free == e) { 506 first_free_found = true; 507 } else if (!first_free_found && e->next->start > e->end) { 508 printf("first_free %p should be %p\n", 509 map->first_free, e); 510 return -1; 511 } 512 if (hint == e) { 513 hint_found = true; 514 } 515 516 e = e->next; 517 if (e == &map->header) { 518 break; 519 } 520 } 521 if (!first_free_found) { 522 printf("stale first_free\n"); 523 return -1; 524 } 525 if (!hint_found) { 526 printf("stale hint\n"); 527 return -1; 528 } 529 return 0; 530 } 531 532 int 533 _uvm_tree_sanity(struct vm_map *map) 534 { 535 struct vm_map_entry *tmp, *trtmp; 536 int n = 0, i = 1; 537 538 for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { 539 if (tmp->gap != uvm_rb_gap(tmp)) { 540 printf("%d/%d gap %lx != %lx %s\n", 541 n + 1, map->nentries, 542 (ulong)tmp->gap, (ulong)uvm_rb_gap(tmp), 543 tmp->next == &map->header ? "(last)" : ""); 544 goto error; 545 } 546 /* 547 * If any entries are out of order, tmp->gap will be unsigned 548 * and will likely exceed the size of the map. 549 */ 550 if (tmp->gap >= vm_map_max(map) - vm_map_min(map)) { 551 printf("too large gap %zu\n", (size_t)tmp->gap); 552 goto error; 553 } 554 n++; 555 } 556 557 if (n != map->nentries) { 558 printf("nentries: %d vs %d\n", n, map->nentries); 559 goto error; 560 } 561 562 trtmp = NULL; 563 for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { 564 if (tmp->maxgap != uvm_rb_maxgap(tmp)) { 565 printf("maxgap %lx != %lx\n", 566 (ulong)tmp->maxgap, 567 (ulong)uvm_rb_maxgap(tmp)); 568 goto error; 569 } 570 if (trtmp != NULL && trtmp->start >= tmp->start) { 571 printf("corrupt: 0x%"PRIxVADDR"x >= 0x%"PRIxVADDR"x\n", 572 trtmp->start, tmp->start); 573 goto error; 574 } 575 576 trtmp = tmp; 577 } 578 579 for (tmp = map->header.next; tmp != &map->header; 580 tmp = tmp->next, i++) { 581 trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_LEFT); 582 if (trtmp == NULL) 583 trtmp = &map->header; 584 if (tmp->prev != trtmp) { 585 printf("lookup: %d: %p->prev=%p: %p\n", 586 i, tmp, tmp->prev, trtmp); 587 goto error; 588 } 589 trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_RIGHT); 590 if (trtmp == NULL) 591 trtmp = &map->header; 592 if (tmp->next != trtmp) { 593 printf("lookup: %d: %p->next=%p: %p\n", 594 i, tmp, tmp->next, trtmp); 595 goto error; 596 } 597 trtmp = rb_tree_find_node(&map->rb_tree, &tmp->start); 598 if (trtmp != tmp) { 599 printf("lookup: %d: %p - %p: %p\n", i, tmp, trtmp, 600 PARENT_ENTRY(map, tmp)); 601 goto error; 602 } 603 } 604 605 return (0); 606 error: 607 return (-1); 608 } 609 #endif /* defined(DEBUG) || defined(DDB) */ 610 611 /* 612 * vm_map_lock: acquire an exclusive (write) lock on a map. 613 * 614 * => The locking protocol provides for guaranteed upgrade from shared -> 615 * exclusive by whichever thread currently has the map marked busy. 616 * See "LOCKING PROTOCOL NOTES" in uvm_map.h. This is horrible; among 617 * other problems, it defeats any fairness guarantees provided by RW 618 * locks. 619 */ 620 621 void 622 vm_map_lock(struct vm_map *map) 623 { 624 625 for (;;) { 626 rw_enter(&map->lock, RW_WRITER); 627 if (map->busy == NULL || map->busy == curlwp) { 628 break; 629 } 630 mutex_enter(&map->misc_lock); 631 rw_exit(&map->lock); 632 if (map->busy != NULL) { 633 cv_wait(&map->cv, &map->misc_lock); 634 } 635 mutex_exit(&map->misc_lock); 636 } 637 map->timestamp++; 638 } 639 640 /* 641 * vm_map_lock_try: try to lock a map, failing if it is already locked. 642 */ 643 644 bool 645 vm_map_lock_try(struct vm_map *map) 646 { 647 648 if (!rw_tryenter(&map->lock, RW_WRITER)) { 649 return false; 650 } 651 if (map->busy != NULL) { 652 rw_exit(&map->lock); 653 return false; 654 } 655 map->timestamp++; 656 return true; 657 } 658 659 /* 660 * vm_map_unlock: release an exclusive lock on a map. 661 */ 662 663 void 664 vm_map_unlock(struct vm_map *map) 665 { 666 667 KASSERT(rw_write_held(&map->lock)); 668 KASSERT(map->busy == NULL || map->busy == curlwp); 669 rw_exit(&map->lock); 670 } 671 672 /* 673 * vm_map_unbusy: mark the map as unbusy, and wake any waiters that 674 * want an exclusive lock. 675 */ 676 677 void 678 vm_map_unbusy(struct vm_map *map) 679 { 680 681 KASSERT(map->busy == curlwp); 682 683 /* 684 * Safe to clear 'busy' and 'waiters' with only a read lock held: 685 * 686 * o they can only be set with a write lock held 687 * o writers are blocked out with a read or write hold 688 * o at any time, only one thread owns the set of values 689 */ 690 mutex_enter(&map->misc_lock); 691 map->busy = NULL; 692 cv_broadcast(&map->cv); 693 mutex_exit(&map->misc_lock); 694 } 695 696 /* 697 * vm_map_lock_read: acquire a shared (read) lock on a map. 698 */ 699 700 void 701 vm_map_lock_read(struct vm_map *map) 702 { 703 704 rw_enter(&map->lock, RW_READER); 705 } 706 707 /* 708 * vm_map_unlock_read: release a shared lock on a map. 709 */ 710 711 void 712 vm_map_unlock_read(struct vm_map *map) 713 { 714 715 rw_exit(&map->lock); 716 } 717 718 /* 719 * vm_map_busy: mark a map as busy. 720 * 721 * => the caller must hold the map write locked 722 */ 723 724 void 725 vm_map_busy(struct vm_map *map) 726 { 727 728 KASSERT(rw_write_held(&map->lock)); 729 KASSERT(map->busy == NULL); 730 731 map->busy = curlwp; 732 } 733 734 /* 735 * vm_map_locked_p: return true if the map is write locked. 736 * 737 * => only for debug purposes like KASSERTs. 738 * => should not be used to verify that a map is not locked. 739 */ 740 741 bool 742 vm_map_locked_p(struct vm_map *map) 743 { 744 745 return rw_write_held(&map->lock); 746 } 747 748 /* 749 * uvm_mapent_alloc: allocate a map entry 750 */ 751 752 static struct vm_map_entry * 753 uvm_mapent_alloc(struct vm_map *map, int flags) 754 { 755 struct vm_map_entry *me; 756 int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK; 757 UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist); 758 759 me = pool_cache_get(&uvm_map_entry_cache, pflags); 760 if (__predict_false(me == NULL)) { 761 return NULL; 762 } 763 me->flags = 0; 764 765 UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", me, 766 (map == kernel_map), 0, 0); 767 return me; 768 } 769 770 /* 771 * uvm_mapent_free: free map entry 772 */ 773 774 static void 775 uvm_mapent_free(struct vm_map_entry *me) 776 { 777 UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist); 778 779 UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]", 780 me, me->flags, 0, 0); 781 pool_cache_put(&uvm_map_entry_cache, me); 782 } 783 784 /* 785 * uvm_mapent_copy: copy a map entry, preserving flags 786 */ 787 788 static inline void 789 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 790 { 791 792 memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - 793 ((char *)src)); 794 } 795 796 #if defined(DEBUG) 797 static void 798 _uvm_mapent_check(const struct vm_map_entry *entry, const char *file, int line) 799 { 800 801 if (entry->start >= entry->end) { 802 goto bad; 803 } 804 if (UVM_ET_ISOBJ(entry)) { 805 if (entry->object.uvm_obj == NULL) { 806 goto bad; 807 } 808 } else if (UVM_ET_ISSUBMAP(entry)) { 809 if (entry->object.sub_map == NULL) { 810 goto bad; 811 } 812 } else { 813 if (entry->object.uvm_obj != NULL || 814 entry->object.sub_map != NULL) { 815 goto bad; 816 } 817 } 818 if (!UVM_ET_ISOBJ(entry)) { 819 if (entry->offset != 0) { 820 goto bad; 821 } 822 } 823 824 return; 825 826 bad: 827 panic("%s: bad entry %p (%s:%d)", __func__, entry, file, line); 828 } 829 #endif /* defined(DEBUG) */ 830 831 /* 832 * uvm_map_entry_unwire: unwire a map entry 833 * 834 * => map should be locked by caller 835 */ 836 837 static inline void 838 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) 839 { 840 841 entry->wired_count = 0; 842 uvm_fault_unwire_locked(map, entry->start, entry->end); 843 } 844 845 846 /* 847 * wrapper for calling amap_ref() 848 */ 849 static inline void 850 uvm_map_reference_amap(struct vm_map_entry *entry, int flags) 851 { 852 853 amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, 854 (entry->end - entry->start) >> PAGE_SHIFT, flags); 855 } 856 857 858 /* 859 * wrapper for calling amap_unref() 860 */ 861 static inline void 862 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) 863 { 864 865 amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, 866 (entry->end - entry->start) >> PAGE_SHIFT, flags); 867 } 868 869 870 /* 871 * uvm_map_init: init mapping system at boot time. 872 */ 873 874 void 875 uvm_map_init(void) 876 { 877 #if defined(UVMHIST) 878 static struct kern_history_ent maphistbuf[100]; 879 static struct kern_history_ent pdhistbuf[100]; 880 #endif 881 882 /* 883 * first, init logging system. 884 */ 885 886 UVMHIST_FUNC("uvm_map_init"); 887 UVMHIST_INIT_STATIC(maphist, maphistbuf); 888 UVMHIST_INIT_STATIC(pdhist, pdhistbuf); 889 UVMHIST_CALLED(maphist); 890 UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); 891 892 /* 893 * initialize the global lock for kernel map entry. 894 */ 895 896 mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM); 897 } 898 899 /* 900 * uvm_map_init_caches: init mapping system caches. 901 */ 902 void 903 uvm_map_init_caches(void) 904 { 905 /* 906 * initialize caches. 907 */ 908 909 pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry), 910 0, 0, 0, "vmmpepl", NULL, IPL_NONE, NULL, NULL, NULL); 911 pool_cache_bootstrap(&uvm_vmspace_cache, sizeof(struct vmspace), 912 0, 0, 0, "vmsppl", NULL, IPL_NONE, NULL, NULL, NULL); 913 } 914 915 /* 916 * clippers 917 */ 918 919 /* 920 * uvm_mapent_splitadj: adjust map entries for splitting, after uvm_mapent_copy. 921 */ 922 923 static void 924 uvm_mapent_splitadj(struct vm_map_entry *entry1, struct vm_map_entry *entry2, 925 vaddr_t splitat) 926 { 927 vaddr_t adj; 928 929 KASSERT(entry1->start < splitat); 930 KASSERT(splitat < entry1->end); 931 932 adj = splitat - entry1->start; 933 entry1->end = entry2->start = splitat; 934 935 if (entry1->aref.ar_amap) { 936 amap_splitref(&entry1->aref, &entry2->aref, adj); 937 } 938 if (UVM_ET_ISSUBMAP(entry1)) { 939 /* ... unlikely to happen, but play it safe */ 940 uvm_map_reference(entry1->object.sub_map); 941 } else if (UVM_ET_ISOBJ(entry1)) { 942 KASSERT(entry1->object.uvm_obj != NULL); /* suppress coverity */ 943 entry2->offset += adj; 944 if (entry1->object.uvm_obj->pgops && 945 entry1->object.uvm_obj->pgops->pgo_reference) 946 entry1->object.uvm_obj->pgops->pgo_reference( 947 entry1->object.uvm_obj); 948 } 949 } 950 951 /* 952 * uvm_map_clip_start: ensure that the entry begins at or after 953 * the starting address, if it doesn't we split the entry. 954 * 955 * => caller should use UVM_MAP_CLIP_START macro rather than calling 956 * this directly 957 * => map must be locked by caller 958 */ 959 960 void 961 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, 962 vaddr_t start) 963 { 964 struct vm_map_entry *new_entry; 965 966 /* uvm_map_simplify_entry(map, entry); */ /* XXX */ 967 968 uvm_map_check(map, "clip_start entry"); 969 uvm_mapent_check(entry); 970 971 /* 972 * Split off the front portion. note that we must insert the new 973 * entry BEFORE this one, so that this entry has the specified 974 * starting address. 975 */ 976 new_entry = uvm_mapent_alloc(map, 0); 977 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 978 uvm_mapent_splitadj(new_entry, entry, start); 979 uvm_map_entry_link(map, entry->prev, new_entry); 980 981 uvm_map_check(map, "clip_start leave"); 982 } 983 984 /* 985 * uvm_map_clip_end: ensure that the entry ends at or before 986 * the ending address, if it does't we split the reference 987 * 988 * => caller should use UVM_MAP_CLIP_END macro rather than calling 989 * this directly 990 * => map must be locked by caller 991 */ 992 993 void 994 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end) 995 { 996 struct vm_map_entry *new_entry; 997 998 uvm_map_check(map, "clip_end entry"); 999 uvm_mapent_check(entry); 1000 1001 /* 1002 * Create a new entry and insert it 1003 * AFTER the specified entry 1004 */ 1005 new_entry = uvm_mapent_alloc(map, 0); 1006 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 1007 uvm_mapent_splitadj(entry, new_entry, end); 1008 uvm_map_entry_link(map, entry, new_entry); 1009 1010 uvm_map_check(map, "clip_end leave"); 1011 } 1012 1013 /* 1014 * M A P - m a i n e n t r y p o i n t 1015 */ 1016 /* 1017 * uvm_map: establish a valid mapping in a map 1018 * 1019 * => assume startp is page aligned. 1020 * => assume size is a multiple of PAGE_SIZE. 1021 * => assume sys_mmap provides enough of a "hint" to have us skip 1022 * over text/data/bss area. 1023 * => map must be unlocked (we will lock it) 1024 * => <uobj,uoffset> value meanings (4 cases): 1025 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1026 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1027 * [3] <uobj,uoffset> == normal mapping 1028 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1029 * 1030 * case [4] is for kernel mappings where we don't know the offset until 1031 * we've found a virtual address. note that kernel object offsets are 1032 * always relative to vm_map_min(kernel_map). 1033 * 1034 * => if `align' is non-zero, we align the virtual address to the specified 1035 * alignment. 1036 * this is provided as a mechanism for large pages. 1037 * 1038 * => XXXCDC: need way to map in external amap? 1039 */ 1040 1041 int 1042 uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size, 1043 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags) 1044 { 1045 struct uvm_map_args args; 1046 struct vm_map_entry *new_entry; 1047 int error; 1048 1049 KASSERT((size & PAGE_MASK) == 0); 1050 1051 #ifndef __USER_VA0_IS_SAFE 1052 if ((flags & UVM_FLAG_FIXED) && *startp == 0 && 1053 !VM_MAP_IS_KERNEL(map) && user_va0_disable) 1054 return EACCES; 1055 #endif 1056 1057 /* 1058 * for pager_map, allocate the new entry first to avoid sleeping 1059 * for memory while we have the map locked. 1060 */ 1061 1062 new_entry = NULL; 1063 if (map == pager_map) { 1064 new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT)); 1065 if (__predict_false(new_entry == NULL)) 1066 return ENOMEM; 1067 } 1068 if (map == pager_map) 1069 flags |= UVM_FLAG_NOMERGE; 1070 1071 error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align, 1072 flags, &args); 1073 if (!error) { 1074 error = uvm_map_enter(map, &args, new_entry); 1075 *startp = args.uma_start; 1076 } else if (new_entry) { 1077 uvm_mapent_free(new_entry); 1078 } 1079 1080 #if defined(DEBUG) 1081 if (!error && VM_MAP_IS_KERNEL(map)) { 1082 uvm_km_check_empty(map, *startp, *startp + size); 1083 } 1084 #endif /* defined(DEBUG) */ 1085 1086 return error; 1087 } 1088 1089 /* 1090 * uvm_map_prepare: 1091 * 1092 * called with map unlocked. 1093 * on success, returns the map locked. 1094 */ 1095 1096 int 1097 uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size, 1098 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, 1099 struct uvm_map_args *args) 1100 { 1101 struct vm_map_entry *prev_entry; 1102 vm_prot_t prot = UVM_PROTECTION(flags); 1103 vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1104 1105 UVMHIST_FUNC("uvm_map_prepare"); 1106 UVMHIST_CALLED(maphist); 1107 1108 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)", 1109 map, start, size, flags); 1110 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0); 1111 1112 /* 1113 * detect a popular device driver bug. 1114 */ 1115 1116 KASSERT(doing_shutdown || curlwp != NULL); 1117 1118 /* 1119 * zero-sized mapping doesn't make any sense. 1120 */ 1121 KASSERT(size > 0); 1122 1123 KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0); 1124 1125 uvm_map_check(map, "map entry"); 1126 1127 /* 1128 * check sanity of protection code 1129 */ 1130 1131 if ((prot & maxprot) != prot) { 1132 UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%x, max=0x%x", 1133 prot, maxprot,0,0); 1134 return EACCES; 1135 } 1136 1137 /* 1138 * figure out where to put new VM range 1139 */ 1140 retry: 1141 if (vm_map_lock_try(map) == false) { 1142 if ((flags & UVM_FLAG_TRYLOCK) != 0) { 1143 return EAGAIN; 1144 } 1145 vm_map_lock(map); /* could sleep here */ 1146 } 1147 prev_entry = uvm_map_findspace(map, start, size, &start, 1148 uobj, uoffset, align, flags); 1149 if (prev_entry == NULL) { 1150 unsigned int timestamp; 1151 1152 timestamp = map->timestamp; 1153 UVMHIST_LOG(maphist,"waiting va timestamp=0x%x", 1154 timestamp,0,0,0); 1155 map->flags |= VM_MAP_WANTVA; 1156 vm_map_unlock(map); 1157 1158 /* 1159 * try to reclaim kva and wait until someone does unmap. 1160 * fragile locking here, so we awaken every second to 1161 * recheck the condition. 1162 */ 1163 1164 mutex_enter(&map->misc_lock); 1165 while ((map->flags & VM_MAP_WANTVA) != 0 && 1166 map->timestamp == timestamp) { 1167 if ((flags & UVM_FLAG_WAITVA) == 0) { 1168 mutex_exit(&map->misc_lock); 1169 UVMHIST_LOG(maphist, 1170 "<- uvm_map_findspace failed!", 0,0,0,0); 1171 return ENOMEM; 1172 } else { 1173 cv_timedwait(&map->cv, &map->misc_lock, hz); 1174 } 1175 } 1176 mutex_exit(&map->misc_lock); 1177 goto retry; 1178 } 1179 1180 #ifdef PMAP_GROWKERNEL 1181 /* 1182 * If the kernel pmap can't map the requested space, 1183 * then allocate more resources for it. 1184 */ 1185 if (map == kernel_map && uvm_maxkaddr < (start + size)) 1186 uvm_maxkaddr = pmap_growkernel(start + size); 1187 #endif 1188 1189 UVMMAP_EVCNT_INCR(map_call); 1190 1191 /* 1192 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER 1193 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in 1194 * either case we want to zero it before storing it in the map entry 1195 * (because it looks strange and confusing when debugging...) 1196 * 1197 * if uobj is not null 1198 * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping 1199 * and we do not need to change uoffset. 1200 * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset 1201 * now (based on the starting address of the map). this case is 1202 * for kernel object mappings where we don't know the offset until 1203 * the virtual address is found (with uvm_map_findspace). the 1204 * offset is the distance we are from the start of the map. 1205 */ 1206 1207 if (uobj == NULL) { 1208 uoffset = 0; 1209 } else { 1210 if (uoffset == UVM_UNKNOWN_OFFSET) { 1211 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1212 uoffset = start - vm_map_min(kernel_map); 1213 } 1214 } 1215 1216 args->uma_flags = flags; 1217 args->uma_prev = prev_entry; 1218 args->uma_start = start; 1219 args->uma_size = size; 1220 args->uma_uobj = uobj; 1221 args->uma_uoffset = uoffset; 1222 1223 UVMHIST_LOG(maphist, "<- done!", 0,0,0,0); 1224 return 0; 1225 } 1226 1227 /* 1228 * uvm_map_enter: 1229 * 1230 * called with map locked. 1231 * unlock the map before returning. 1232 */ 1233 1234 int 1235 uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args, 1236 struct vm_map_entry *new_entry) 1237 { 1238 struct vm_map_entry *prev_entry = args->uma_prev; 1239 struct vm_map_entry *dead = NULL; 1240 1241 const uvm_flag_t flags = args->uma_flags; 1242 const vm_prot_t prot = UVM_PROTECTION(flags); 1243 const vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1244 const vm_inherit_t inherit = UVM_INHERIT(flags); 1245 const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ? 1246 AMAP_EXTEND_NOWAIT : 0; 1247 const int advice = UVM_ADVICE(flags); 1248 1249 vaddr_t start = args->uma_start; 1250 vsize_t size = args->uma_size; 1251 struct uvm_object *uobj = args->uma_uobj; 1252 voff_t uoffset = args->uma_uoffset; 1253 1254 const int kmap = (vm_map_pmap(map) == pmap_kernel()); 1255 int merged = 0; 1256 int error; 1257 int newetype; 1258 1259 UVMHIST_FUNC("uvm_map_enter"); 1260 UVMHIST_CALLED(maphist); 1261 1262 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)", 1263 map, start, size, flags); 1264 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0); 1265 1266 KASSERT(map->hint == prev_entry); /* bimerge case assumes this */ 1267 KASSERT(vm_map_locked_p(map)); 1268 1269 if (uobj) 1270 newetype = UVM_ET_OBJ; 1271 else 1272 newetype = 0; 1273 1274 if (flags & UVM_FLAG_COPYONW) { 1275 newetype |= UVM_ET_COPYONWRITE; 1276 if ((flags & UVM_FLAG_OVERLAY) == 0) 1277 newetype |= UVM_ET_NEEDSCOPY; 1278 } 1279 1280 /* 1281 * try and insert in map by extending previous entry, if possible. 1282 * XXX: we don't try and pull back the next entry. might be useful 1283 * for a stack, but we are currently allocating our stack in advance. 1284 */ 1285 1286 if (flags & UVM_FLAG_NOMERGE) 1287 goto nomerge; 1288 1289 if (prev_entry->end == start && 1290 prev_entry != &map->header && 1291 UVM_ET_ISCOMPATIBLE(prev_entry, newetype, uobj, 0, 1292 prot, maxprot, inherit, advice, 0)) { 1293 1294 if (uobj && prev_entry->offset + 1295 (prev_entry->end - prev_entry->start) != uoffset) 1296 goto forwardmerge; 1297 1298 /* 1299 * can't extend a shared amap. note: no need to lock amap to 1300 * look at refs since we don't care about its exact value. 1301 * if it is one (i.e. we have only reference) it will stay there 1302 */ 1303 1304 if (prev_entry->aref.ar_amap && 1305 amap_refs(prev_entry->aref.ar_amap) != 1) { 1306 goto forwardmerge; 1307 } 1308 1309 if (prev_entry->aref.ar_amap) { 1310 error = amap_extend(prev_entry, size, 1311 amapwaitflag | AMAP_EXTEND_FORWARDS); 1312 if (error) 1313 goto nomerge; 1314 } 1315 1316 if (kmap) { 1317 UVMMAP_EVCNT_INCR(kbackmerge); 1318 } else { 1319 UVMMAP_EVCNT_INCR(ubackmerge); 1320 } 1321 UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0); 1322 1323 /* 1324 * drop our reference to uobj since we are extending a reference 1325 * that we already have (the ref count can not drop to zero). 1326 */ 1327 1328 if (uobj && uobj->pgops->pgo_detach) 1329 uobj->pgops->pgo_detach(uobj); 1330 1331 /* 1332 * Now that we've merged the entries, note that we've grown 1333 * and our gap has shrunk. Then fix the tree. 1334 */ 1335 prev_entry->end += size; 1336 prev_entry->gap -= size; 1337 uvm_rb_fixup(map, prev_entry); 1338 1339 uvm_map_check(map, "map backmerged"); 1340 1341 UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); 1342 merged++; 1343 } 1344 1345 forwardmerge: 1346 if (prev_entry->next->start == (start + size) && 1347 prev_entry->next != &map->header && 1348 UVM_ET_ISCOMPATIBLE(prev_entry->next, newetype, uobj, 0, 1349 prot, maxprot, inherit, advice, 0)) { 1350 1351 if (uobj && prev_entry->next->offset != uoffset + size) 1352 goto nomerge; 1353 1354 /* 1355 * can't extend a shared amap. note: no need to lock amap to 1356 * look at refs since we don't care about its exact value. 1357 * if it is one (i.e. we have only reference) it will stay there. 1358 * 1359 * note that we also can't merge two amaps, so if we 1360 * merged with the previous entry which has an amap, 1361 * and the next entry also has an amap, we give up. 1362 * 1363 * Interesting cases: 1364 * amap, new, amap -> give up second merge (single fwd extend) 1365 * amap, new, none -> double forward extend (extend again here) 1366 * none, new, amap -> double backward extend (done here) 1367 * uobj, new, amap -> single backward extend (done here) 1368 * 1369 * XXX should we attempt to deal with someone refilling 1370 * the deallocated region between two entries that are 1371 * backed by the same amap (ie, arefs is 2, "prev" and 1372 * "next" refer to it, and adding this allocation will 1373 * close the hole, thus restoring arefs to 1 and 1374 * deallocating the "next" vm_map_entry)? -- @@@ 1375 */ 1376 1377 if (prev_entry->next->aref.ar_amap && 1378 (amap_refs(prev_entry->next->aref.ar_amap) != 1 || 1379 (merged && prev_entry->aref.ar_amap))) { 1380 goto nomerge; 1381 } 1382 1383 if (merged) { 1384 /* 1385 * Try to extend the amap of the previous entry to 1386 * cover the next entry as well. If it doesn't work 1387 * just skip on, don't actually give up, since we've 1388 * already completed the back merge. 1389 */ 1390 if (prev_entry->aref.ar_amap) { 1391 if (amap_extend(prev_entry, 1392 prev_entry->next->end - 1393 prev_entry->next->start, 1394 amapwaitflag | AMAP_EXTEND_FORWARDS)) 1395 goto nomerge; 1396 } 1397 1398 /* 1399 * Try to extend the amap of the *next* entry 1400 * back to cover the new allocation *and* the 1401 * previous entry as well (the previous merge 1402 * didn't have an amap already otherwise we 1403 * wouldn't be checking here for an amap). If 1404 * it doesn't work just skip on, again, don't 1405 * actually give up, since we've already 1406 * completed the back merge. 1407 */ 1408 else if (prev_entry->next->aref.ar_amap) { 1409 if (amap_extend(prev_entry->next, 1410 prev_entry->end - 1411 prev_entry->start, 1412 amapwaitflag | AMAP_EXTEND_BACKWARDS)) 1413 goto nomerge; 1414 } 1415 } else { 1416 /* 1417 * Pull the next entry's amap backwards to cover this 1418 * new allocation. 1419 */ 1420 if (prev_entry->next->aref.ar_amap) { 1421 error = amap_extend(prev_entry->next, size, 1422 amapwaitflag | AMAP_EXTEND_BACKWARDS); 1423 if (error) 1424 goto nomerge; 1425 } 1426 } 1427 1428 if (merged) { 1429 if (kmap) { 1430 UVMMAP_EVCNT_DECR(kbackmerge); 1431 UVMMAP_EVCNT_INCR(kbimerge); 1432 } else { 1433 UVMMAP_EVCNT_DECR(ubackmerge); 1434 UVMMAP_EVCNT_INCR(ubimerge); 1435 } 1436 } else { 1437 if (kmap) { 1438 UVMMAP_EVCNT_INCR(kforwmerge); 1439 } else { 1440 UVMMAP_EVCNT_INCR(uforwmerge); 1441 } 1442 } 1443 UVMHIST_LOG(maphist," starting forward merge", 0, 0, 0, 0); 1444 1445 /* 1446 * drop our reference to uobj since we are extending a reference 1447 * that we already have (the ref count can not drop to zero). 1448 */ 1449 if (uobj && uobj->pgops->pgo_detach) 1450 uobj->pgops->pgo_detach(uobj); 1451 1452 if (merged) { 1453 dead = prev_entry->next; 1454 prev_entry->end = dead->end; 1455 uvm_map_entry_unlink(map, dead); 1456 if (dead->aref.ar_amap != NULL) { 1457 prev_entry->aref = dead->aref; 1458 dead->aref.ar_amap = NULL; 1459 } 1460 } else { 1461 prev_entry->next->start -= size; 1462 if (prev_entry != &map->header) { 1463 prev_entry->gap -= size; 1464 KASSERT(prev_entry->gap == uvm_rb_gap(prev_entry)); 1465 uvm_rb_fixup(map, prev_entry); 1466 } 1467 if (uobj) 1468 prev_entry->next->offset = uoffset; 1469 } 1470 1471 uvm_map_check(map, "map forwardmerged"); 1472 1473 UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0); 1474 merged++; 1475 } 1476 1477 nomerge: 1478 if (!merged) { 1479 UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0); 1480 if (kmap) { 1481 UVMMAP_EVCNT_INCR(knomerge); 1482 } else { 1483 UVMMAP_EVCNT_INCR(unomerge); 1484 } 1485 1486 /* 1487 * allocate new entry and link it in. 1488 */ 1489 1490 if (new_entry == NULL) { 1491 new_entry = uvm_mapent_alloc(map, 1492 (flags & UVM_FLAG_NOWAIT)); 1493 if (__predict_false(new_entry == NULL)) { 1494 error = ENOMEM; 1495 goto done; 1496 } 1497 } 1498 new_entry->start = start; 1499 new_entry->end = new_entry->start + size; 1500 new_entry->object.uvm_obj = uobj; 1501 new_entry->offset = uoffset; 1502 1503 new_entry->etype = newetype; 1504 1505 if (flags & UVM_FLAG_NOMERGE) { 1506 new_entry->flags |= UVM_MAP_NOMERGE; 1507 } 1508 1509 new_entry->protection = prot; 1510 new_entry->max_protection = maxprot; 1511 new_entry->inheritance = inherit; 1512 new_entry->wired_count = 0; 1513 new_entry->advice = advice; 1514 if (flags & UVM_FLAG_OVERLAY) { 1515 1516 /* 1517 * to_add: for BSS we overallocate a little since we 1518 * are likely to extend 1519 */ 1520 1521 vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? 1522 UVM_AMAP_CHUNK << PAGE_SHIFT : 0; 1523 struct vm_amap *amap = amap_alloc(size, to_add, 1524 (flags & UVM_FLAG_NOWAIT)); 1525 if (__predict_false(amap == NULL)) { 1526 error = ENOMEM; 1527 goto done; 1528 } 1529 new_entry->aref.ar_pageoff = 0; 1530 new_entry->aref.ar_amap = amap; 1531 } else { 1532 new_entry->aref.ar_pageoff = 0; 1533 new_entry->aref.ar_amap = NULL; 1534 } 1535 uvm_map_entry_link(map, prev_entry, new_entry); 1536 1537 /* 1538 * Update the free space hint 1539 */ 1540 1541 if ((map->first_free == prev_entry) && 1542 (prev_entry->end >= new_entry->start)) 1543 map->first_free = new_entry; 1544 1545 new_entry = NULL; 1546 } 1547 1548 map->size += size; 1549 1550 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1551 1552 error = 0; 1553 done: 1554 vm_map_unlock(map); 1555 1556 if (new_entry) { 1557 uvm_mapent_free(new_entry); 1558 } 1559 1560 if (dead) { 1561 KDASSERT(merged); 1562 uvm_mapent_free(dead); 1563 } 1564 1565 return error; 1566 } 1567 1568 /* 1569 * uvm_map_lookup_entry_bytree: lookup an entry in tree 1570 */ 1571 1572 static inline bool 1573 uvm_map_lookup_entry_bytree(struct vm_map *map, vaddr_t address, 1574 struct vm_map_entry **entry /* OUT */) 1575 { 1576 struct vm_map_entry *prev = &map->header; 1577 struct vm_map_entry *cur = ROOT_ENTRY(map); 1578 1579 while (cur) { 1580 UVMMAP_EVCNT_INCR(mlk_treeloop); 1581 if (address >= cur->start) { 1582 if (address < cur->end) { 1583 *entry = cur; 1584 return true; 1585 } 1586 prev = cur; 1587 cur = RIGHT_ENTRY(cur); 1588 } else 1589 cur = LEFT_ENTRY(cur); 1590 } 1591 *entry = prev; 1592 return false; 1593 } 1594 1595 /* 1596 * uvm_map_lookup_entry: find map entry at or before an address 1597 * 1598 * => map must at least be read-locked by caller 1599 * => entry is returned in "entry" 1600 * => return value is true if address is in the returned entry 1601 */ 1602 1603 bool 1604 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1605 struct vm_map_entry **entry /* OUT */) 1606 { 1607 struct vm_map_entry *cur; 1608 bool use_tree = false; 1609 UVMHIST_FUNC("uvm_map_lookup_entry"); 1610 UVMHIST_CALLED(maphist); 1611 1612 UVMHIST_LOG(maphist,"(map=0x%x,addr=0x%x,ent=0x%x)", 1613 map, address, entry, 0); 1614 1615 /* 1616 * start looking either from the head of the 1617 * list, or from the hint. 1618 */ 1619 1620 cur = map->hint; 1621 1622 if (cur == &map->header) 1623 cur = cur->next; 1624 1625 UVMMAP_EVCNT_INCR(mlk_call); 1626 if (address >= cur->start) { 1627 1628 /* 1629 * go from hint to end of list. 1630 * 1631 * but first, make a quick check to see if 1632 * we are already looking at the entry we 1633 * want (which is usually the case). 1634 * note also that we don't need to save the hint 1635 * here... it is the same hint (unless we are 1636 * at the header, in which case the hint didn't 1637 * buy us anything anyway). 1638 */ 1639 1640 if (cur != &map->header && cur->end > address) { 1641 UVMMAP_EVCNT_INCR(mlk_hint); 1642 *entry = cur; 1643 UVMHIST_LOG(maphist,"<- got it via hint (0x%x)", 1644 cur, 0, 0, 0); 1645 uvm_mapent_check(*entry); 1646 return (true); 1647 } 1648 1649 if (map->nentries > 15) 1650 use_tree = true; 1651 } else { 1652 1653 /* 1654 * invalid hint. use tree. 1655 */ 1656 use_tree = true; 1657 } 1658 1659 uvm_map_check(map, __func__); 1660 1661 if (use_tree) { 1662 /* 1663 * Simple lookup in the tree. Happens when the hint is 1664 * invalid, or nentries reach a threshold. 1665 */ 1666 UVMMAP_EVCNT_INCR(mlk_tree); 1667 if (uvm_map_lookup_entry_bytree(map, address, entry)) { 1668 goto got; 1669 } else { 1670 goto failed; 1671 } 1672 } 1673 1674 /* 1675 * search linearly 1676 */ 1677 1678 UVMMAP_EVCNT_INCR(mlk_list); 1679 while (cur != &map->header) { 1680 UVMMAP_EVCNT_INCR(mlk_listloop); 1681 if (cur->end > address) { 1682 if (address >= cur->start) { 1683 /* 1684 * save this lookup for future 1685 * hints, and return 1686 */ 1687 1688 *entry = cur; 1689 got: 1690 SAVE_HINT(map, map->hint, *entry); 1691 UVMHIST_LOG(maphist,"<- search got it (0x%x)", 1692 cur, 0, 0, 0); 1693 KDASSERT((*entry)->start <= address); 1694 KDASSERT(address < (*entry)->end); 1695 uvm_mapent_check(*entry); 1696 return (true); 1697 } 1698 break; 1699 } 1700 cur = cur->next; 1701 } 1702 *entry = cur->prev; 1703 failed: 1704 SAVE_HINT(map, map->hint, *entry); 1705 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); 1706 KDASSERT((*entry) == &map->header || (*entry)->end <= address); 1707 KDASSERT((*entry)->next == &map->header || 1708 address < (*entry)->next->start); 1709 return (false); 1710 } 1711 1712 /* 1713 * See if the range between start and start + length fits in the gap 1714 * entry->next->start and entry->end. Returns 1 if fits, 0 if doesn't 1715 * fit, and -1 address wraps around. 1716 */ 1717 static int 1718 uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset, 1719 vsize_t align, int flags, int topdown, struct vm_map_entry *entry) 1720 { 1721 vaddr_t end; 1722 1723 #ifdef PMAP_PREFER 1724 /* 1725 * push start address forward as needed to avoid VAC alias problems. 1726 * we only do this if a valid offset is specified. 1727 */ 1728 1729 if (uoffset != UVM_UNKNOWN_OFFSET) 1730 PMAP_PREFER(uoffset, start, length, topdown); 1731 #endif 1732 if ((flags & UVM_FLAG_COLORMATCH) != 0) { 1733 KASSERT(align < uvmexp.ncolors); 1734 if (uvmexp.ncolors > 1) { 1735 const u_int colormask = uvmexp.colormask; 1736 const u_int colorsize = colormask + 1; 1737 vaddr_t hint = atop(*start); 1738 const u_int color = hint & colormask; 1739 if (color != align) { 1740 hint -= color; /* adjust to color boundary */ 1741 KASSERT((hint & colormask) == 0); 1742 if (topdown) { 1743 if (align > color) 1744 hint -= colorsize; 1745 } else { 1746 if (align < color) 1747 hint += colorsize; 1748 } 1749 *start = ptoa(hint + align); /* adjust to color */ 1750 } 1751 } 1752 } else if (align != 0) { 1753 if ((*start & (align - 1)) != 0) { 1754 if (topdown) 1755 *start &= ~(align - 1); 1756 else 1757 *start = roundup(*start, align); 1758 } 1759 /* 1760 * XXX Should we PMAP_PREFER() here again? 1761 * eh...i think we're okay 1762 */ 1763 } 1764 1765 /* 1766 * Find the end of the proposed new region. Be sure we didn't 1767 * wrap around the address; if so, we lose. Otherwise, if the 1768 * proposed new region fits before the next entry, we win. 1769 */ 1770 1771 end = *start + length; 1772 if (end < *start) 1773 return (-1); 1774 1775 if (entry->next->start >= end && *start >= entry->end) 1776 return (1); 1777 1778 return (0); 1779 } 1780 1781 /* 1782 * uvm_map_findspace: find "length" sized space in "map". 1783 * 1784 * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is 1785 * set in "flags" (in which case we insist on using "hint"). 1786 * => "result" is VA returned 1787 * => uobj/uoffset are to be used to handle VAC alignment, if required 1788 * => if "align" is non-zero, we attempt to align to that value. 1789 * => caller must at least have read-locked map 1790 * => returns NULL on failure, or pointer to prev. map entry if success 1791 * => note this is a cross between the old vm_map_findspace and vm_map_find 1792 */ 1793 1794 struct vm_map_entry * 1795 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, 1796 vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset, 1797 vsize_t align, int flags) 1798 { 1799 struct vm_map_entry *entry; 1800 struct vm_map_entry *child, *prev, *tmp; 1801 vaddr_t orig_hint; 1802 const int topdown = map->flags & VM_MAP_TOPDOWN; 1803 UVMHIST_FUNC("uvm_map_findspace"); 1804 UVMHIST_CALLED(maphist); 1805 1806 UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)", 1807 map, hint, length, flags); 1808 KASSERT((flags & UVM_FLAG_COLORMATCH) != 0 || (align & (align - 1)) == 0); 1809 KASSERT((flags & UVM_FLAG_COLORMATCH) == 0 || align < uvmexp.ncolors); 1810 KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); 1811 1812 uvm_map_check(map, "map_findspace entry"); 1813 1814 /* 1815 * remember the original hint. if we are aligning, then we 1816 * may have to try again with no alignment constraint if 1817 * we fail the first time. 1818 */ 1819 1820 orig_hint = hint; 1821 if (hint < vm_map_min(map)) { /* check ranges ... */ 1822 if (flags & UVM_FLAG_FIXED) { 1823 UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0); 1824 return (NULL); 1825 } 1826 hint = vm_map_min(map); 1827 } 1828 if (hint > vm_map_max(map)) { 1829 UVMHIST_LOG(maphist,"<- VA 0x%x > range [0x%x->0x%x]", 1830 hint, vm_map_min(map), vm_map_max(map), 0); 1831 return (NULL); 1832 } 1833 1834 /* 1835 * Look for the first possible address; if there's already 1836 * something at this address, we have to start after it. 1837 */ 1838 1839 /* 1840 * @@@: there are four, no, eight cases to consider. 1841 * 1842 * 0: found, fixed, bottom up -> fail 1843 * 1: found, fixed, top down -> fail 1844 * 2: found, not fixed, bottom up -> start after entry->end, 1845 * loop up 1846 * 3: found, not fixed, top down -> start before entry->start, 1847 * loop down 1848 * 4: not found, fixed, bottom up -> check entry->next->start, fail 1849 * 5: not found, fixed, top down -> check entry->next->start, fail 1850 * 6: not found, not fixed, bottom up -> check entry->next->start, 1851 * loop up 1852 * 7: not found, not fixed, top down -> check entry->next->start, 1853 * loop down 1854 * 1855 * as you can see, it reduces to roughly five cases, and that 1856 * adding top down mapping only adds one unique case (without 1857 * it, there would be four cases). 1858 */ 1859 1860 if ((flags & UVM_FLAG_FIXED) == 0 && hint == vm_map_min(map)) { 1861 entry = map->first_free; 1862 } else { 1863 if (uvm_map_lookup_entry(map, hint, &entry)) { 1864 /* "hint" address already in use ... */ 1865 if (flags & UVM_FLAG_FIXED) { 1866 UVMHIST_LOG(maphist, "<- fixed & VA in use", 1867 0, 0, 0, 0); 1868 return (NULL); 1869 } 1870 if (topdown) 1871 /* Start from lower gap. */ 1872 entry = entry->prev; 1873 } else if (flags & UVM_FLAG_FIXED) { 1874 if (entry->next->start >= hint + length && 1875 hint + length > hint) 1876 goto found; 1877 1878 /* "hint" address is gap but too small */ 1879 UVMHIST_LOG(maphist, "<- fixed mapping failed", 1880 0, 0, 0, 0); 1881 return (NULL); /* only one shot at it ... */ 1882 } else { 1883 /* 1884 * See if given hint fits in this gap. 1885 */ 1886 switch (uvm_map_space_avail(&hint, length, 1887 uoffset, align, flags, topdown, entry)) { 1888 case 1: 1889 goto found; 1890 case -1: 1891 goto wraparound; 1892 } 1893 1894 if (topdown) { 1895 /* 1896 * Still there is a chance to fit 1897 * if hint > entry->end. 1898 */ 1899 } else { 1900 /* Start from higher gap. */ 1901 entry = entry->next; 1902 if (entry == &map->header) 1903 goto notfound; 1904 goto nextgap; 1905 } 1906 } 1907 } 1908 1909 /* 1910 * Note that all UVM_FLAGS_FIXED case is already handled. 1911 */ 1912 KDASSERT((flags & UVM_FLAG_FIXED) == 0); 1913 1914 /* Try to find the space in the red-black tree */ 1915 1916 /* Check slot before any entry */ 1917 hint = topdown ? entry->next->start - length : entry->end; 1918 switch (uvm_map_space_avail(&hint, length, uoffset, align, flags, 1919 topdown, entry)) { 1920 case 1: 1921 goto found; 1922 case -1: 1923 goto wraparound; 1924 } 1925 1926 nextgap: 1927 KDASSERT((flags & UVM_FLAG_FIXED) == 0); 1928 /* If there is not enough space in the whole tree, we fail */ 1929 tmp = ROOT_ENTRY(map); 1930 if (tmp == NULL || tmp->maxgap < length) 1931 goto notfound; 1932 1933 prev = NULL; /* previous candidate */ 1934 1935 /* Find an entry close to hint that has enough space */ 1936 for (; tmp;) { 1937 KASSERT(tmp->next->start == tmp->end + tmp->gap); 1938 if (topdown) { 1939 if (tmp->next->start < hint + length && 1940 (prev == NULL || tmp->end > prev->end)) { 1941 if (tmp->gap >= length) 1942 prev = tmp; 1943 else if ((child = LEFT_ENTRY(tmp)) != NULL 1944 && child->maxgap >= length) 1945 prev = tmp; 1946 } 1947 } else { 1948 if (tmp->end >= hint && 1949 (prev == NULL || tmp->end < prev->end)) { 1950 if (tmp->gap >= length) 1951 prev = tmp; 1952 else if ((child = RIGHT_ENTRY(tmp)) != NULL 1953 && child->maxgap >= length) 1954 prev = tmp; 1955 } 1956 } 1957 if (tmp->next->start < hint + length) 1958 child = RIGHT_ENTRY(tmp); 1959 else if (tmp->end > hint) 1960 child = LEFT_ENTRY(tmp); 1961 else { 1962 if (tmp->gap >= length) 1963 break; 1964 if (topdown) 1965 child = LEFT_ENTRY(tmp); 1966 else 1967 child = RIGHT_ENTRY(tmp); 1968 } 1969 if (child == NULL || child->maxgap < length) 1970 break; 1971 tmp = child; 1972 } 1973 1974 if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) { 1975 /* 1976 * Check if the entry that we found satifies the 1977 * space requirement 1978 */ 1979 if (topdown) { 1980 if (hint > tmp->next->start - length) 1981 hint = tmp->next->start - length; 1982 } else { 1983 if (hint < tmp->end) 1984 hint = tmp->end; 1985 } 1986 switch (uvm_map_space_avail(&hint, length, uoffset, align, 1987 flags, topdown, tmp)) { 1988 case 1: 1989 entry = tmp; 1990 goto found; 1991 case -1: 1992 goto wraparound; 1993 } 1994 if (tmp->gap >= length) 1995 goto listsearch; 1996 } 1997 if (prev == NULL) 1998 goto notfound; 1999 2000 if (topdown) { 2001 KASSERT(orig_hint >= prev->next->start - length || 2002 prev->next->start - length > prev->next->start); 2003 hint = prev->next->start - length; 2004 } else { 2005 KASSERT(orig_hint <= prev->end); 2006 hint = prev->end; 2007 } 2008 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2009 flags, topdown, prev)) { 2010 case 1: 2011 entry = prev; 2012 goto found; 2013 case -1: 2014 goto wraparound; 2015 } 2016 if (prev->gap >= length) 2017 goto listsearch; 2018 2019 if (topdown) 2020 tmp = LEFT_ENTRY(prev); 2021 else 2022 tmp = RIGHT_ENTRY(prev); 2023 for (;;) { 2024 KASSERT(tmp && tmp->maxgap >= length); 2025 if (topdown) 2026 child = RIGHT_ENTRY(tmp); 2027 else 2028 child = LEFT_ENTRY(tmp); 2029 if (child && child->maxgap >= length) { 2030 tmp = child; 2031 continue; 2032 } 2033 if (tmp->gap >= length) 2034 break; 2035 if (topdown) 2036 tmp = LEFT_ENTRY(tmp); 2037 else 2038 tmp = RIGHT_ENTRY(tmp); 2039 } 2040 2041 if (topdown) { 2042 KASSERT(orig_hint >= tmp->next->start - length || 2043 tmp->next->start - length > tmp->next->start); 2044 hint = tmp->next->start - length; 2045 } else { 2046 KASSERT(orig_hint <= tmp->end); 2047 hint = tmp->end; 2048 } 2049 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2050 flags, topdown, tmp)) { 2051 case 1: 2052 entry = tmp; 2053 goto found; 2054 case -1: 2055 goto wraparound; 2056 } 2057 2058 /* 2059 * The tree fails to find an entry because of offset or alignment 2060 * restrictions. Search the list instead. 2061 */ 2062 listsearch: 2063 /* 2064 * Look through the rest of the map, trying to fit a new region in 2065 * the gap between existing regions, or after the very last region. 2066 * note: entry->end = base VA of current gap, 2067 * entry->next->start = VA of end of current gap 2068 */ 2069 2070 for (;;) { 2071 /* Update hint for current gap. */ 2072 hint = topdown ? entry->next->start - length : entry->end; 2073 2074 /* See if it fits. */ 2075 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2076 flags, topdown, entry)) { 2077 case 1: 2078 goto found; 2079 case -1: 2080 goto wraparound; 2081 } 2082 2083 /* Advance to next/previous gap */ 2084 if (topdown) { 2085 if (entry == &map->header) { 2086 UVMHIST_LOG(maphist, "<- failed (off start)", 2087 0,0,0,0); 2088 goto notfound; 2089 } 2090 entry = entry->prev; 2091 } else { 2092 entry = entry->next; 2093 if (entry == &map->header) { 2094 UVMHIST_LOG(maphist, "<- failed (off end)", 2095 0,0,0,0); 2096 goto notfound; 2097 } 2098 } 2099 } 2100 2101 found: 2102 SAVE_HINT(map, map->hint, entry); 2103 *result = hint; 2104 UVMHIST_LOG(maphist,"<- got it! (result=0x%x)", hint, 0,0,0); 2105 KASSERT( topdown || hint >= orig_hint); 2106 KASSERT(!topdown || hint <= orig_hint); 2107 KASSERT(entry->end <= hint); 2108 KASSERT(hint + length <= entry->next->start); 2109 return (entry); 2110 2111 wraparound: 2112 UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0); 2113 2114 return (NULL); 2115 2116 notfound: 2117 UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0); 2118 2119 return (NULL); 2120 } 2121 2122 /* 2123 * U N M A P - m a i n h e l p e r f u n c t i o n s 2124 */ 2125 2126 /* 2127 * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") 2128 * 2129 * => caller must check alignment and size 2130 * => map must be locked by caller 2131 * => we return a list of map entries that we've remove from the map 2132 * in "entry_list" 2133 */ 2134 2135 void 2136 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2137 struct vm_map_entry **entry_list /* OUT */, int flags) 2138 { 2139 struct vm_map_entry *entry, *first_entry, *next; 2140 vaddr_t len; 2141 UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist); 2142 2143 UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)", 2144 map, start, end, 0); 2145 VM_MAP_RANGE_CHECK(map, start, end); 2146 2147 uvm_map_check(map, "unmap_remove entry"); 2148 2149 /* 2150 * find first entry 2151 */ 2152 2153 if (uvm_map_lookup_entry(map, start, &first_entry) == true) { 2154 /* clip and go... */ 2155 entry = first_entry; 2156 UVM_MAP_CLIP_START(map, entry, start); 2157 /* critical! prevents stale hint */ 2158 SAVE_HINT(map, entry, entry->prev); 2159 } else { 2160 entry = first_entry->next; 2161 } 2162 2163 /* 2164 * Save the free space hint 2165 */ 2166 2167 if (map->first_free != &map->header && map->first_free->start >= start) 2168 map->first_free = entry->prev; 2169 2170 /* 2171 * note: we now re-use first_entry for a different task. we remove 2172 * a number of map entries from the map and save them in a linked 2173 * list headed by "first_entry". once we remove them from the map 2174 * the caller should unlock the map and drop the references to the 2175 * backing objects [c.f. uvm_unmap_detach]. the object is to 2176 * separate unmapping from reference dropping. why? 2177 * [1] the map has to be locked for unmapping 2178 * [2] the map need not be locked for reference dropping 2179 * [3] dropping references may trigger pager I/O, and if we hit 2180 * a pager that does synchronous I/O we may have to wait for it. 2181 * [4] we would like all waiting for I/O to occur with maps unlocked 2182 * so that we don't block other threads. 2183 */ 2184 2185 first_entry = NULL; 2186 *entry_list = NULL; 2187 2188 /* 2189 * break up the area into map entry sized regions and unmap. note 2190 * that all mappings have to be removed before we can even consider 2191 * dropping references to amaps or VM objects (otherwise we could end 2192 * up with a mapping to a page on the free list which would be very bad) 2193 */ 2194 2195 while ((entry != &map->header) && (entry->start < end)) { 2196 KASSERT((entry->flags & UVM_MAP_STATIC) == 0); 2197 2198 UVM_MAP_CLIP_END(map, entry, end); 2199 next = entry->next; 2200 len = entry->end - entry->start; 2201 2202 /* 2203 * unwire before removing addresses from the pmap; otherwise 2204 * unwiring will put the entries back into the pmap (XXX). 2205 */ 2206 2207 if (VM_MAPENT_ISWIRED(entry)) { 2208 uvm_map_entry_unwire(map, entry); 2209 } 2210 if (flags & UVM_FLAG_VAONLY) { 2211 2212 /* nothing */ 2213 2214 } else if ((map->flags & VM_MAP_PAGEABLE) == 0) { 2215 2216 /* 2217 * if the map is non-pageable, any pages mapped there 2218 * must be wired and entered with pmap_kenter_pa(), 2219 * and we should free any such pages immediately. 2220 * this is mostly used for kmem_map. 2221 */ 2222 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2223 2224 uvm_km_pgremove_intrsafe(map, entry->start, entry->end); 2225 } else if (UVM_ET_ISOBJ(entry) && 2226 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2227 panic("%s: kernel object %p %p\n", 2228 __func__, map, entry); 2229 } else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) { 2230 /* 2231 * remove mappings the standard way. lock object 2232 * and/or amap to ensure vm_page state does not 2233 * change while in pmap_remove(). 2234 */ 2235 2236 uvm_map_lock_entry(entry); 2237 pmap_remove(map->pmap, entry->start, entry->end); 2238 uvm_map_unlock_entry(entry); 2239 } 2240 2241 #if defined(DEBUG) 2242 /* 2243 * check if there's remaining mapping, 2244 * which is a bug in caller. 2245 */ 2246 2247 vaddr_t va; 2248 for (va = entry->start; va < entry->end; 2249 va += PAGE_SIZE) { 2250 if (pmap_extract(vm_map_pmap(map), va, NULL)) { 2251 panic("%s: %#"PRIxVADDR" has mapping", 2252 __func__, va); 2253 } 2254 } 2255 2256 if (VM_MAP_IS_KERNEL(map)) { 2257 uvm_km_check_empty(map, entry->start, 2258 entry->end); 2259 } 2260 #endif /* defined(DEBUG) */ 2261 2262 /* 2263 * remove entry from map and put it on our list of entries 2264 * that we've nuked. then go to next entry. 2265 */ 2266 2267 UVMHIST_LOG(maphist, " removed map entry 0x%x", entry, 0, 0,0); 2268 2269 /* critical! prevents stale hint */ 2270 SAVE_HINT(map, entry, entry->prev); 2271 2272 uvm_map_entry_unlink(map, entry); 2273 KASSERT(map->size >= len); 2274 map->size -= len; 2275 entry->prev = NULL; 2276 entry->next = first_entry; 2277 first_entry = entry; 2278 entry = next; 2279 } 2280 2281 /* 2282 * Note: if map is dying, leave pmap_update() for pmap_destroy(), 2283 * which will be called later. 2284 */ 2285 if ((map->flags & VM_MAP_DYING) == 0) { 2286 pmap_update(vm_map_pmap(map)); 2287 } else { 2288 KASSERT(vm_map_pmap(map) != pmap_kernel()); 2289 } 2290 2291 uvm_map_check(map, "unmap_remove leave"); 2292 2293 /* 2294 * now we've cleaned up the map and are ready for the caller to drop 2295 * references to the mapped objects. 2296 */ 2297 2298 *entry_list = first_entry; 2299 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 2300 2301 if (map->flags & VM_MAP_WANTVA) { 2302 mutex_enter(&map->misc_lock); 2303 map->flags &= ~VM_MAP_WANTVA; 2304 cv_broadcast(&map->cv); 2305 mutex_exit(&map->misc_lock); 2306 } 2307 } 2308 2309 /* 2310 * uvm_unmap_detach: drop references in a chain of map entries 2311 * 2312 * => we will free the map entries as we traverse the list. 2313 */ 2314 2315 void 2316 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) 2317 { 2318 struct vm_map_entry *next_entry; 2319 UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist); 2320 2321 while (first_entry) { 2322 KASSERT(!VM_MAPENT_ISWIRED(first_entry)); 2323 UVMHIST_LOG(maphist, 2324 " detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d", 2325 first_entry, first_entry->aref.ar_amap, 2326 first_entry->object.uvm_obj, 2327 UVM_ET_ISSUBMAP(first_entry)); 2328 2329 /* 2330 * drop reference to amap, if we've got one 2331 */ 2332 2333 if (first_entry->aref.ar_amap) 2334 uvm_map_unreference_amap(first_entry, flags); 2335 2336 /* 2337 * drop reference to our backing object, if we've got one 2338 */ 2339 2340 KASSERT(!UVM_ET_ISSUBMAP(first_entry)); 2341 if (UVM_ET_ISOBJ(first_entry) && 2342 first_entry->object.uvm_obj->pgops->pgo_detach) { 2343 (*first_entry->object.uvm_obj->pgops->pgo_detach) 2344 (first_entry->object.uvm_obj); 2345 } 2346 next_entry = first_entry->next; 2347 uvm_mapent_free(first_entry); 2348 first_entry = next_entry; 2349 } 2350 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 2351 } 2352 2353 /* 2354 * E X T R A C T I O N F U N C T I O N S 2355 */ 2356 2357 /* 2358 * uvm_map_reserve: reserve space in a vm_map for future use. 2359 * 2360 * => we reserve space in a map by putting a dummy map entry in the 2361 * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) 2362 * => map should be unlocked (we will write lock it) 2363 * => we return true if we were able to reserve space 2364 * => XXXCDC: should be inline? 2365 */ 2366 2367 int 2368 uvm_map_reserve(struct vm_map *map, vsize_t size, 2369 vaddr_t offset /* hint for pmap_prefer */, 2370 vsize_t align /* alignment */, 2371 vaddr_t *raddr /* IN:hint, OUT: reserved VA */, 2372 uvm_flag_t flags /* UVM_FLAG_FIXED or UVM_FLAG_COLORMATCH or 0 */) 2373 { 2374 UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); 2375 2376 UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)", 2377 map,size,offset,raddr); 2378 2379 size = round_page(size); 2380 2381 /* 2382 * reserve some virtual space. 2383 */ 2384 2385 if (uvm_map(map, raddr, size, NULL, offset, align, 2386 UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, 2387 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE|flags)) != 0) { 2388 UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); 2389 return (false); 2390 } 2391 2392 UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0); 2393 return (true); 2394 } 2395 2396 /* 2397 * uvm_map_replace: replace a reserved (blank) area of memory with 2398 * real mappings. 2399 * 2400 * => caller must WRITE-LOCK the map 2401 * => we return true if replacement was a success 2402 * => we expect the newents chain to have nnewents entrys on it and 2403 * we expect newents->prev to point to the last entry on the list 2404 * => note newents is allowed to be NULL 2405 */ 2406 2407 static int 2408 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, 2409 struct vm_map_entry *newents, int nnewents, vsize_t nsize, 2410 struct vm_map_entry **oldentryp) 2411 { 2412 struct vm_map_entry *oldent, *last; 2413 2414 uvm_map_check(map, "map_replace entry"); 2415 2416 /* 2417 * first find the blank map entry at the specified address 2418 */ 2419 2420 if (!uvm_map_lookup_entry(map, start, &oldent)) { 2421 return (false); 2422 } 2423 2424 /* 2425 * check to make sure we have a proper blank entry 2426 */ 2427 2428 if (end < oldent->end) { 2429 UVM_MAP_CLIP_END(map, oldent, end); 2430 } 2431 if (oldent->start != start || oldent->end != end || 2432 oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { 2433 return (false); 2434 } 2435 2436 #ifdef DIAGNOSTIC 2437 2438 /* 2439 * sanity check the newents chain 2440 */ 2441 2442 { 2443 struct vm_map_entry *tmpent = newents; 2444 int nent = 0; 2445 vsize_t sz = 0; 2446 vaddr_t cur = start; 2447 2448 while (tmpent) { 2449 nent++; 2450 sz += tmpent->end - tmpent->start; 2451 if (tmpent->start < cur) 2452 panic("uvm_map_replace1"); 2453 if (tmpent->start >= tmpent->end || tmpent->end > end) { 2454 panic("uvm_map_replace2: " 2455 "tmpent->start=0x%"PRIxVADDR 2456 ", tmpent->end=0x%"PRIxVADDR 2457 ", end=0x%"PRIxVADDR, 2458 tmpent->start, tmpent->end, end); 2459 } 2460 cur = tmpent->end; 2461 if (tmpent->next) { 2462 if (tmpent->next->prev != tmpent) 2463 panic("uvm_map_replace3"); 2464 } else { 2465 if (newents->prev != tmpent) 2466 panic("uvm_map_replace4"); 2467 } 2468 tmpent = tmpent->next; 2469 } 2470 if (nent != nnewents) 2471 panic("uvm_map_replace5"); 2472 if (sz != nsize) 2473 panic("uvm_map_replace6"); 2474 } 2475 #endif 2476 2477 /* 2478 * map entry is a valid blank! replace it. (this does all the 2479 * work of map entry link/unlink...). 2480 */ 2481 2482 if (newents) { 2483 last = newents->prev; 2484 2485 /* critical: flush stale hints out of map */ 2486 SAVE_HINT(map, map->hint, newents); 2487 if (map->first_free == oldent) 2488 map->first_free = last; 2489 2490 last->next = oldent->next; 2491 last->next->prev = last; 2492 2493 /* Fix RB tree */ 2494 uvm_rb_remove(map, oldent); 2495 2496 newents->prev = oldent->prev; 2497 newents->prev->next = newents; 2498 map->nentries = map->nentries + (nnewents - 1); 2499 2500 /* Fixup the RB tree */ 2501 { 2502 int i; 2503 struct vm_map_entry *tmp; 2504 2505 tmp = newents; 2506 for (i = 0; i < nnewents && tmp; i++) { 2507 uvm_rb_insert(map, tmp); 2508 tmp = tmp->next; 2509 } 2510 } 2511 } else { 2512 /* NULL list of new entries: just remove the old one */ 2513 clear_hints(map, oldent); 2514 uvm_map_entry_unlink(map, oldent); 2515 } 2516 map->size -= end - start - nsize; 2517 2518 uvm_map_check(map, "map_replace leave"); 2519 2520 /* 2521 * now we can free the old blank entry and return. 2522 */ 2523 2524 *oldentryp = oldent; 2525 return (true); 2526 } 2527 2528 /* 2529 * uvm_map_extract: extract a mapping from a map and put it somewhere 2530 * (maybe removing the old mapping) 2531 * 2532 * => maps should be unlocked (we will write lock them) 2533 * => returns 0 on success, error code otherwise 2534 * => start must be page aligned 2535 * => len must be page sized 2536 * => flags: 2537 * UVM_EXTRACT_REMOVE: remove mappings from srcmap 2538 * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) 2539 * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs 2540 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 2541 * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< 2542 * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only 2543 * be used from within the kernel in a kernel level map <<< 2544 */ 2545 2546 int 2547 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 2548 struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) 2549 { 2550 vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge; 2551 struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry, 2552 *deadentry, *oldentry; 2553 struct vm_map_entry *resentry = NULL; /* a dummy reservation entry */ 2554 vsize_t elen; 2555 int nchain, error, copy_ok; 2556 vsize_t nsize; 2557 UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist); 2558 2559 UVMHIST_LOG(maphist,"(srcmap=0x%x,start=0x%x, len=0x%x", srcmap, start, 2560 len,0); 2561 UVMHIST_LOG(maphist," ...,dstmap=0x%x, flags=0x%x)", dstmap,flags,0,0); 2562 2563 /* 2564 * step 0: sanity check: start must be on a page boundary, length 2565 * must be page sized. can't ask for CONTIG/QREF if you asked for 2566 * REMOVE. 2567 */ 2568 2569 KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0); 2570 KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || 2571 (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); 2572 2573 /* 2574 * step 1: reserve space in the target map for the extracted area 2575 */ 2576 2577 if ((flags & UVM_EXTRACT_RESERVED) == 0) { 2578 dstaddr = vm_map_min(dstmap); 2579 if (!uvm_map_reserve(dstmap, len, start, 2580 atop(start) & uvmexp.colormask, &dstaddr, 2581 UVM_FLAG_COLORMATCH)) 2582 return (ENOMEM); 2583 KASSERT((atop(start ^ dstaddr) & uvmexp.colormask) == 0); 2584 *dstaddrp = dstaddr; /* pass address back to caller */ 2585 UVMHIST_LOG(maphist, " dstaddr=0x%x", dstaddr,0,0,0); 2586 } else { 2587 dstaddr = *dstaddrp; 2588 } 2589 2590 /* 2591 * step 2: setup for the extraction process loop by init'ing the 2592 * map entry chain, locking src map, and looking up the first useful 2593 * entry in the map. 2594 */ 2595 2596 end = start + len; 2597 newend = dstaddr + len; 2598 chain = endchain = NULL; 2599 nchain = 0; 2600 nsize = 0; 2601 vm_map_lock(srcmap); 2602 2603 if (uvm_map_lookup_entry(srcmap, start, &entry)) { 2604 2605 /* "start" is within an entry */ 2606 if (flags & UVM_EXTRACT_QREF) { 2607 2608 /* 2609 * for quick references we don't clip the entry, so 2610 * the entry may map space "before" the starting 2611 * virtual address... this is the "fudge" factor 2612 * (which can be non-zero only the first time 2613 * through the "while" loop in step 3). 2614 */ 2615 2616 fudge = start - entry->start; 2617 } else { 2618 2619 /* 2620 * normal reference: we clip the map to fit (thus 2621 * fudge is zero) 2622 */ 2623 2624 UVM_MAP_CLIP_START(srcmap, entry, start); 2625 SAVE_HINT(srcmap, srcmap->hint, entry->prev); 2626 fudge = 0; 2627 } 2628 } else { 2629 2630 /* "start" is not within an entry ... skip to next entry */ 2631 if (flags & UVM_EXTRACT_CONTIG) { 2632 error = EINVAL; 2633 goto bad; /* definite hole here ... */ 2634 } 2635 2636 entry = entry->next; 2637 fudge = 0; 2638 } 2639 2640 /* save values from srcmap for step 6 */ 2641 orig_entry = entry; 2642 orig_fudge = fudge; 2643 2644 /* 2645 * step 3: now start looping through the map entries, extracting 2646 * as we go. 2647 */ 2648 2649 while (entry->start < end && entry != &srcmap->header) { 2650 2651 /* if we are not doing a quick reference, clip it */ 2652 if ((flags & UVM_EXTRACT_QREF) == 0) 2653 UVM_MAP_CLIP_END(srcmap, entry, end); 2654 2655 /* clear needs_copy (allow chunking) */ 2656 if (UVM_ET_ISNEEDSCOPY(entry)) { 2657 amap_copy(srcmap, entry, 2658 AMAP_COPY_NOWAIT|AMAP_COPY_NOMERGE, start, end); 2659 if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ 2660 error = ENOMEM; 2661 goto bad; 2662 } 2663 2664 /* amap_copy could clip (during chunk)! update fudge */ 2665 if (fudge) { 2666 fudge = start - entry->start; 2667 orig_fudge = fudge; 2668 } 2669 } 2670 2671 /* calculate the offset of this from "start" */ 2672 oldoffset = (entry->start + fudge) - start; 2673 2674 /* allocate a new map entry */ 2675 newentry = uvm_mapent_alloc(dstmap, 0); 2676 if (newentry == NULL) { 2677 error = ENOMEM; 2678 goto bad; 2679 } 2680 2681 /* set up new map entry */ 2682 newentry->next = NULL; 2683 newentry->prev = endchain; 2684 newentry->start = dstaddr + oldoffset; 2685 newentry->end = 2686 newentry->start + (entry->end - (entry->start + fudge)); 2687 if (newentry->end > newend || newentry->end < newentry->start) 2688 newentry->end = newend; 2689 newentry->object.uvm_obj = entry->object.uvm_obj; 2690 if (newentry->object.uvm_obj) { 2691 if (newentry->object.uvm_obj->pgops->pgo_reference) 2692 newentry->object.uvm_obj->pgops-> 2693 pgo_reference(newentry->object.uvm_obj); 2694 newentry->offset = entry->offset + fudge; 2695 } else { 2696 newentry->offset = 0; 2697 } 2698 newentry->etype = entry->etype; 2699 newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? 2700 entry->max_protection : entry->protection; 2701 newentry->max_protection = entry->max_protection; 2702 newentry->inheritance = entry->inheritance; 2703 newentry->wired_count = 0; 2704 newentry->aref.ar_amap = entry->aref.ar_amap; 2705 if (newentry->aref.ar_amap) { 2706 newentry->aref.ar_pageoff = 2707 entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); 2708 uvm_map_reference_amap(newentry, AMAP_SHARED | 2709 ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); 2710 } else { 2711 newentry->aref.ar_pageoff = 0; 2712 } 2713 newentry->advice = entry->advice; 2714 if ((flags & UVM_EXTRACT_QREF) != 0) { 2715 newentry->flags |= UVM_MAP_NOMERGE; 2716 } 2717 2718 /* now link it on the chain */ 2719 nchain++; 2720 nsize += newentry->end - newentry->start; 2721 if (endchain == NULL) { 2722 chain = endchain = newentry; 2723 } else { 2724 endchain->next = newentry; 2725 endchain = newentry; 2726 } 2727 2728 /* end of 'while' loop! */ 2729 if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && 2730 (entry->next == &srcmap->header || 2731 entry->next->start != entry->end)) { 2732 error = EINVAL; 2733 goto bad; 2734 } 2735 entry = entry->next; 2736 fudge = 0; 2737 } 2738 2739 /* 2740 * step 4: close off chain (in format expected by uvm_map_replace) 2741 */ 2742 2743 if (chain) 2744 chain->prev = endchain; 2745 2746 /* 2747 * step 5: attempt to lock the dest map so we can pmap_copy. 2748 * note usage of copy_ok: 2749 * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) 2750 * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 2751 */ 2752 2753 if (srcmap == dstmap || vm_map_lock_try(dstmap) == true) { 2754 copy_ok = 1; 2755 if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2756 nchain, nsize, &resentry)) { 2757 if (srcmap != dstmap) 2758 vm_map_unlock(dstmap); 2759 error = EIO; 2760 goto bad; 2761 } 2762 } else { 2763 copy_ok = 0; 2764 /* replace defered until step 7 */ 2765 } 2766 2767 /* 2768 * step 6: traverse the srcmap a second time to do the following: 2769 * - if we got a lock on the dstmap do pmap_copy 2770 * - if UVM_EXTRACT_REMOVE remove the entries 2771 * we make use of orig_entry and orig_fudge (saved in step 2) 2772 */ 2773 2774 if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { 2775 2776 /* purge possible stale hints from srcmap */ 2777 if (flags & UVM_EXTRACT_REMOVE) { 2778 SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); 2779 if (srcmap->first_free != &srcmap->header && 2780 srcmap->first_free->start >= start) 2781 srcmap->first_free = orig_entry->prev; 2782 } 2783 2784 entry = orig_entry; 2785 fudge = orig_fudge; 2786 deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ 2787 2788 while (entry->start < end && entry != &srcmap->header) { 2789 if (copy_ok) { 2790 oldoffset = (entry->start + fudge) - start; 2791 elen = MIN(end, entry->end) - 2792 (entry->start + fudge); 2793 pmap_copy(dstmap->pmap, srcmap->pmap, 2794 dstaddr + oldoffset, elen, 2795 entry->start + fudge); 2796 } 2797 2798 /* we advance "entry" in the following if statement */ 2799 if (flags & UVM_EXTRACT_REMOVE) { 2800 uvm_map_lock_entry(entry); 2801 pmap_remove(srcmap->pmap, entry->start, 2802 entry->end); 2803 uvm_map_unlock_entry(entry); 2804 oldentry = entry; /* save entry */ 2805 entry = entry->next; /* advance */ 2806 uvm_map_entry_unlink(srcmap, oldentry); 2807 /* add to dead list */ 2808 oldentry->next = deadentry; 2809 deadentry = oldentry; 2810 } else { 2811 entry = entry->next; /* advance */ 2812 } 2813 2814 /* end of 'while' loop */ 2815 fudge = 0; 2816 } 2817 pmap_update(srcmap->pmap); 2818 2819 /* 2820 * unlock dstmap. we will dispose of deadentry in 2821 * step 7 if needed 2822 */ 2823 2824 if (copy_ok && srcmap != dstmap) 2825 vm_map_unlock(dstmap); 2826 2827 } else { 2828 deadentry = NULL; 2829 } 2830 2831 /* 2832 * step 7: we are done with the source map, unlock. if copy_ok 2833 * is 0 then we have not replaced the dummy mapping in dstmap yet 2834 * and we need to do so now. 2835 */ 2836 2837 vm_map_unlock(srcmap); 2838 if ((flags & UVM_EXTRACT_REMOVE) && deadentry) 2839 uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ 2840 2841 /* now do the replacement if we didn't do it in step 5 */ 2842 if (copy_ok == 0) { 2843 vm_map_lock(dstmap); 2844 error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2845 nchain, nsize, &resentry); 2846 vm_map_unlock(dstmap); 2847 2848 if (error == false) { 2849 error = EIO; 2850 goto bad2; 2851 } 2852 } 2853 2854 if (resentry != NULL) 2855 uvm_mapent_free(resentry); 2856 2857 return (0); 2858 2859 /* 2860 * bad: failure recovery 2861 */ 2862 bad: 2863 vm_map_unlock(srcmap); 2864 bad2: /* src already unlocked */ 2865 if (chain) 2866 uvm_unmap_detach(chain, 2867 (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); 2868 2869 if (resentry != NULL) 2870 uvm_mapent_free(resentry); 2871 2872 if ((flags & UVM_EXTRACT_RESERVED) == 0) { 2873 uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ 2874 } 2875 return (error); 2876 } 2877 2878 /* end of extraction functions */ 2879 2880 /* 2881 * uvm_map_submap: punch down part of a map into a submap 2882 * 2883 * => only the kernel_map is allowed to be submapped 2884 * => the purpose of submapping is to break up the locking granularity 2885 * of a larger map 2886 * => the range specified must have been mapped previously with a uvm_map() 2887 * call [with uobj==NULL] to create a blank map entry in the main map. 2888 * [And it had better still be blank!] 2889 * => maps which contain submaps should never be copied or forked. 2890 * => to remove a submap, use uvm_unmap() on the main map 2891 * and then uvm_map_deallocate() the submap. 2892 * => main map must be unlocked. 2893 * => submap must have been init'd and have a zero reference count. 2894 * [need not be locked as we don't actually reference it] 2895 */ 2896 2897 int 2898 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 2899 struct vm_map *submap) 2900 { 2901 struct vm_map_entry *entry; 2902 int error; 2903 2904 vm_map_lock(map); 2905 VM_MAP_RANGE_CHECK(map, start, end); 2906 2907 if (uvm_map_lookup_entry(map, start, &entry)) { 2908 UVM_MAP_CLIP_START(map, entry, start); 2909 UVM_MAP_CLIP_END(map, entry, end); /* to be safe */ 2910 } else { 2911 entry = NULL; 2912 } 2913 2914 if (entry != NULL && 2915 entry->start == start && entry->end == end && 2916 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 2917 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 2918 entry->etype |= UVM_ET_SUBMAP; 2919 entry->object.sub_map = submap; 2920 entry->offset = 0; 2921 uvm_map_reference(submap); 2922 error = 0; 2923 } else { 2924 error = EINVAL; 2925 } 2926 vm_map_unlock(map); 2927 2928 return error; 2929 } 2930 2931 /* 2932 * uvm_map_protect: change map protection 2933 * 2934 * => set_max means set max_protection. 2935 * => map must be unlocked. 2936 */ 2937 2938 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 2939 ~VM_PROT_WRITE : VM_PROT_ALL) 2940 2941 int 2942 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 2943 vm_prot_t new_prot, bool set_max) 2944 { 2945 struct vm_map_entry *current, *entry; 2946 int error = 0; 2947 UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist); 2948 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)", 2949 map, start, end, new_prot); 2950 2951 vm_map_lock(map); 2952 VM_MAP_RANGE_CHECK(map, start, end); 2953 if (uvm_map_lookup_entry(map, start, &entry)) { 2954 UVM_MAP_CLIP_START(map, entry, start); 2955 } else { 2956 entry = entry->next; 2957 } 2958 2959 /* 2960 * make a first pass to check for protection violations. 2961 */ 2962 2963 current = entry; 2964 while ((current != &map->header) && (current->start < end)) { 2965 if (UVM_ET_ISSUBMAP(current)) { 2966 error = EINVAL; 2967 goto out; 2968 } 2969 if ((new_prot & current->max_protection) != new_prot) { 2970 error = EACCES; 2971 goto out; 2972 } 2973 /* 2974 * Don't allow VM_PROT_EXECUTE to be set on entries that 2975 * point to vnodes that are associated with a NOEXEC file 2976 * system. 2977 */ 2978 if (UVM_ET_ISOBJ(current) && 2979 UVM_OBJ_IS_VNODE(current->object.uvm_obj)) { 2980 struct vnode *vp = 2981 (struct vnode *) current->object.uvm_obj; 2982 2983 if ((new_prot & VM_PROT_EXECUTE) != 0 && 2984 (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) { 2985 error = EACCES; 2986 goto out; 2987 } 2988 } 2989 2990 current = current->next; 2991 } 2992 2993 /* go back and fix up protections (no need to clip this time). */ 2994 2995 current = entry; 2996 while ((current != &map->header) && (current->start < end)) { 2997 vm_prot_t old_prot; 2998 2999 UVM_MAP_CLIP_END(map, current, end); 3000 old_prot = current->protection; 3001 if (set_max) 3002 current->protection = 3003 (current->max_protection = new_prot) & old_prot; 3004 else 3005 current->protection = new_prot; 3006 3007 /* 3008 * update physical map if necessary. worry about copy-on-write 3009 * here -- CHECK THIS XXX 3010 */ 3011 3012 if (current->protection != old_prot) { 3013 /* update pmap! */ 3014 uvm_map_lock_entry(current); 3015 pmap_protect(map->pmap, current->start, current->end, 3016 current->protection & MASK(entry)); 3017 uvm_map_unlock_entry(current); 3018 3019 /* 3020 * If this entry points at a vnode, and the 3021 * protection includes VM_PROT_EXECUTE, mark 3022 * the vnode as VEXECMAP. 3023 */ 3024 if (UVM_ET_ISOBJ(current)) { 3025 struct uvm_object *uobj = 3026 current->object.uvm_obj; 3027 3028 if (UVM_OBJ_IS_VNODE(uobj) && 3029 (current->protection & VM_PROT_EXECUTE)) { 3030 vn_markexec((struct vnode *) uobj); 3031 } 3032 } 3033 } 3034 3035 /* 3036 * If the map is configured to lock any future mappings, 3037 * wire this entry now if the old protection was VM_PROT_NONE 3038 * and the new protection is not VM_PROT_NONE. 3039 */ 3040 3041 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3042 VM_MAPENT_ISWIRED(entry) == 0 && 3043 old_prot == VM_PROT_NONE && 3044 new_prot != VM_PROT_NONE) { 3045 if (uvm_map_pageable(map, entry->start, 3046 entry->end, false, 3047 UVM_LK_ENTER|UVM_LK_EXIT) != 0) { 3048 3049 /* 3050 * If locking the entry fails, remember the 3051 * error if it's the first one. Note we 3052 * still continue setting the protection in 3053 * the map, but will return the error 3054 * condition regardless. 3055 * 3056 * XXX Ignore what the actual error is, 3057 * XXX just call it a resource shortage 3058 * XXX so that it doesn't get confused 3059 * XXX what uvm_map_protect() itself would 3060 * XXX normally return. 3061 */ 3062 3063 error = ENOMEM; 3064 } 3065 } 3066 current = current->next; 3067 } 3068 pmap_update(map->pmap); 3069 3070 out: 3071 vm_map_unlock(map); 3072 3073 UVMHIST_LOG(maphist, "<- done, error=%d",error,0,0,0); 3074 return error; 3075 } 3076 3077 #undef MASK 3078 3079 /* 3080 * uvm_map_inherit: set inheritance code for range of addrs in map. 3081 * 3082 * => map must be unlocked 3083 * => note that the inherit code is used during a "fork". see fork 3084 * code for details. 3085 */ 3086 3087 int 3088 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 3089 vm_inherit_t new_inheritance) 3090 { 3091 struct vm_map_entry *entry, *temp_entry; 3092 UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist); 3093 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)", 3094 map, start, end, new_inheritance); 3095 3096 switch (new_inheritance) { 3097 case MAP_INHERIT_NONE: 3098 case MAP_INHERIT_COPY: 3099 case MAP_INHERIT_SHARE: 3100 break; 3101 default: 3102 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3103 return EINVAL; 3104 } 3105 3106 vm_map_lock(map); 3107 VM_MAP_RANGE_CHECK(map, start, end); 3108 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 3109 entry = temp_entry; 3110 UVM_MAP_CLIP_START(map, entry, start); 3111 } else { 3112 entry = temp_entry->next; 3113 } 3114 while ((entry != &map->header) && (entry->start < end)) { 3115 UVM_MAP_CLIP_END(map, entry, end); 3116 entry->inheritance = new_inheritance; 3117 entry = entry->next; 3118 } 3119 vm_map_unlock(map); 3120 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3121 return 0; 3122 } 3123 3124 /* 3125 * uvm_map_advice: set advice code for range of addrs in map. 3126 * 3127 * => map must be unlocked 3128 */ 3129 3130 int 3131 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 3132 { 3133 struct vm_map_entry *entry, *temp_entry; 3134 UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist); 3135 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)", 3136 map, start, end, new_advice); 3137 3138 vm_map_lock(map); 3139 VM_MAP_RANGE_CHECK(map, start, end); 3140 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 3141 entry = temp_entry; 3142 UVM_MAP_CLIP_START(map, entry, start); 3143 } else { 3144 entry = temp_entry->next; 3145 } 3146 3147 /* 3148 * XXXJRT: disallow holes? 3149 */ 3150 3151 while ((entry != &map->header) && (entry->start < end)) { 3152 UVM_MAP_CLIP_END(map, entry, end); 3153 3154 switch (new_advice) { 3155 case MADV_NORMAL: 3156 case MADV_RANDOM: 3157 case MADV_SEQUENTIAL: 3158 /* nothing special here */ 3159 break; 3160 3161 default: 3162 vm_map_unlock(map); 3163 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3164 return EINVAL; 3165 } 3166 entry->advice = new_advice; 3167 entry = entry->next; 3168 } 3169 3170 vm_map_unlock(map); 3171 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3172 return 0; 3173 } 3174 3175 /* 3176 * uvm_map_willneed: apply MADV_WILLNEED 3177 */ 3178 3179 int 3180 uvm_map_willneed(struct vm_map *map, vaddr_t start, vaddr_t end) 3181 { 3182 struct vm_map_entry *entry; 3183 UVMHIST_FUNC("uvm_map_willneed"); UVMHIST_CALLED(maphist); 3184 UVMHIST_LOG(maphist,"(map=0x%lx,start=0x%lx,end=0x%lx)", 3185 map, start, end, 0); 3186 3187 vm_map_lock_read(map); 3188 VM_MAP_RANGE_CHECK(map, start, end); 3189 if (!uvm_map_lookup_entry(map, start, &entry)) { 3190 entry = entry->next; 3191 } 3192 while (entry->start < end) { 3193 struct vm_amap * const amap = entry->aref.ar_amap; 3194 struct uvm_object * const uobj = entry->object.uvm_obj; 3195 3196 KASSERT(entry != &map->header); 3197 KASSERT(start < entry->end); 3198 /* 3199 * For now, we handle only the easy but commonly-requested case. 3200 * ie. start prefetching of backing uobj pages. 3201 * 3202 * XXX It might be useful to pmap_enter() the already-in-core 3203 * pages by inventing a "weak" mode for uvm_fault() which would 3204 * only do the PGO_LOCKED pgo_get(). 3205 */ 3206 if (UVM_ET_ISOBJ(entry) && amap == NULL && uobj != NULL) { 3207 off_t offset; 3208 off_t size; 3209 3210 offset = entry->offset; 3211 if (start < entry->start) { 3212 offset += entry->start - start; 3213 } 3214 size = entry->offset + (entry->end - entry->start); 3215 if (entry->end < end) { 3216 size -= end - entry->end; 3217 } 3218 uvm_readahead(uobj, offset, size); 3219 } 3220 entry = entry->next; 3221 } 3222 vm_map_unlock_read(map); 3223 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3224 return 0; 3225 } 3226 3227 /* 3228 * uvm_map_pageable: sets the pageability of a range in a map. 3229 * 3230 * => wires map entries. should not be used for transient page locking. 3231 * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). 3232 * => regions specified as not pageable require lock-down (wired) memory 3233 * and page tables. 3234 * => map must never be read-locked 3235 * => if islocked is true, map is already write-locked 3236 * => we always unlock the map, since we must downgrade to a read-lock 3237 * to call uvm_fault_wire() 3238 * => XXXCDC: check this and try and clean it up. 3239 */ 3240 3241 int 3242 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 3243 bool new_pageable, int lockflags) 3244 { 3245 struct vm_map_entry *entry, *start_entry, *failed_entry; 3246 int rv; 3247 #ifdef DIAGNOSTIC 3248 u_int timestamp_save; 3249 #endif 3250 UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist); 3251 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)", 3252 map, start, end, new_pageable); 3253 KASSERT(map->flags & VM_MAP_PAGEABLE); 3254 3255 if ((lockflags & UVM_LK_ENTER) == 0) 3256 vm_map_lock(map); 3257 VM_MAP_RANGE_CHECK(map, start, end); 3258 3259 /* 3260 * only one pageability change may take place at one time, since 3261 * uvm_fault_wire assumes it will be called only once for each 3262 * wiring/unwiring. therefore, we have to make sure we're actually 3263 * changing the pageability for the entire region. we do so before 3264 * making any changes. 3265 */ 3266 3267 if (uvm_map_lookup_entry(map, start, &start_entry) == false) { 3268 if ((lockflags & UVM_LK_EXIT) == 0) 3269 vm_map_unlock(map); 3270 3271 UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0); 3272 return EFAULT; 3273 } 3274 entry = start_entry; 3275 3276 /* 3277 * handle wiring and unwiring separately. 3278 */ 3279 3280 if (new_pageable) { /* unwire */ 3281 UVM_MAP_CLIP_START(map, entry, start); 3282 3283 /* 3284 * unwiring. first ensure that the range to be unwired is 3285 * really wired down and that there are no holes. 3286 */ 3287 3288 while ((entry != &map->header) && (entry->start < end)) { 3289 if (entry->wired_count == 0 || 3290 (entry->end < end && 3291 (entry->next == &map->header || 3292 entry->next->start > entry->end))) { 3293 if ((lockflags & UVM_LK_EXIT) == 0) 3294 vm_map_unlock(map); 3295 UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0); 3296 return EINVAL; 3297 } 3298 entry = entry->next; 3299 } 3300 3301 /* 3302 * POSIX 1003.1b - a single munlock call unlocks a region, 3303 * regardless of the number of mlock calls made on that 3304 * region. 3305 */ 3306 3307 entry = start_entry; 3308 while ((entry != &map->header) && (entry->start < end)) { 3309 UVM_MAP_CLIP_END(map, entry, end); 3310 if (VM_MAPENT_ISWIRED(entry)) 3311 uvm_map_entry_unwire(map, entry); 3312 entry = entry->next; 3313 } 3314 if ((lockflags & UVM_LK_EXIT) == 0) 3315 vm_map_unlock(map); 3316 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3317 return 0; 3318 } 3319 3320 /* 3321 * wire case: in two passes [XXXCDC: ugly block of code here] 3322 * 3323 * 1: holding the write lock, we create any anonymous maps that need 3324 * to be created. then we clip each map entry to the region to 3325 * be wired and increment its wiring count. 3326 * 3327 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 3328 * in the pages for any newly wired area (wired_count == 1). 3329 * 3330 * downgrading to a read lock for uvm_fault_wire avoids a possible 3331 * deadlock with another thread that may have faulted on one of 3332 * the pages to be wired (it would mark the page busy, blocking 3333 * us, then in turn block on the map lock that we hold). because 3334 * of problems in the recursive lock package, we cannot upgrade 3335 * to a write lock in vm_map_lookup. thus, any actions that 3336 * require the write lock must be done beforehand. because we 3337 * keep the read lock on the map, the copy-on-write status of the 3338 * entries we modify here cannot change. 3339 */ 3340 3341 while ((entry != &map->header) && (entry->start < end)) { 3342 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3343 3344 /* 3345 * perform actions of vm_map_lookup that need the 3346 * write lock on the map: create an anonymous map 3347 * for a copy-on-write region, or an anonymous map 3348 * for a zero-fill region. (XXXCDC: submap case 3349 * ok?) 3350 */ 3351 3352 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3353 if (UVM_ET_ISNEEDSCOPY(entry) && 3354 ((entry->max_protection & VM_PROT_WRITE) || 3355 (entry->object.uvm_obj == NULL))) { 3356 amap_copy(map, entry, 0, start, end); 3357 /* XXXCDC: wait OK? */ 3358 } 3359 } 3360 } 3361 UVM_MAP_CLIP_START(map, entry, start); 3362 UVM_MAP_CLIP_END(map, entry, end); 3363 entry->wired_count++; 3364 3365 /* 3366 * Check for holes 3367 */ 3368 3369 if (entry->protection == VM_PROT_NONE || 3370 (entry->end < end && 3371 (entry->next == &map->header || 3372 entry->next->start > entry->end))) { 3373 3374 /* 3375 * found one. amap creation actions do not need to 3376 * be undone, but the wired counts need to be restored. 3377 */ 3378 3379 while (entry != &map->header && entry->end > start) { 3380 entry->wired_count--; 3381 entry = entry->prev; 3382 } 3383 if ((lockflags & UVM_LK_EXIT) == 0) 3384 vm_map_unlock(map); 3385 UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); 3386 return EINVAL; 3387 } 3388 entry = entry->next; 3389 } 3390 3391 /* 3392 * Pass 2. 3393 */ 3394 3395 #ifdef DIAGNOSTIC 3396 timestamp_save = map->timestamp; 3397 #endif 3398 vm_map_busy(map); 3399 vm_map_unlock(map); 3400 3401 rv = 0; 3402 entry = start_entry; 3403 while (entry != &map->header && entry->start < end) { 3404 if (entry->wired_count == 1) { 3405 rv = uvm_fault_wire(map, entry->start, entry->end, 3406 entry->max_protection, 1); 3407 if (rv) { 3408 3409 /* 3410 * wiring failed. break out of the loop. 3411 * we'll clean up the map below, once we 3412 * have a write lock again. 3413 */ 3414 3415 break; 3416 } 3417 } 3418 entry = entry->next; 3419 } 3420 3421 if (rv) { /* failed? */ 3422 3423 /* 3424 * Get back to an exclusive (write) lock. 3425 */ 3426 3427 vm_map_lock(map); 3428 vm_map_unbusy(map); 3429 3430 #ifdef DIAGNOSTIC 3431 if (timestamp_save + 1 != map->timestamp) 3432 panic("uvm_map_pageable: stale map"); 3433 #endif 3434 3435 /* 3436 * first drop the wiring count on all the entries 3437 * which haven't actually been wired yet. 3438 */ 3439 3440 failed_entry = entry; 3441 while (entry != &map->header && entry->start < end) { 3442 entry->wired_count--; 3443 entry = entry->next; 3444 } 3445 3446 /* 3447 * now, unwire all the entries that were successfully 3448 * wired above. 3449 */ 3450 3451 entry = start_entry; 3452 while (entry != failed_entry) { 3453 entry->wired_count--; 3454 if (VM_MAPENT_ISWIRED(entry) == 0) 3455 uvm_map_entry_unwire(map, entry); 3456 entry = entry->next; 3457 } 3458 if ((lockflags & UVM_LK_EXIT) == 0) 3459 vm_map_unlock(map); 3460 UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0); 3461 return (rv); 3462 } 3463 3464 if ((lockflags & UVM_LK_EXIT) == 0) { 3465 vm_map_unbusy(map); 3466 } else { 3467 3468 /* 3469 * Get back to an exclusive (write) lock. 3470 */ 3471 3472 vm_map_lock(map); 3473 vm_map_unbusy(map); 3474 } 3475 3476 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3477 return 0; 3478 } 3479 3480 /* 3481 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 3482 * all mapped regions. 3483 * 3484 * => map must not be locked. 3485 * => if no flags are specified, all regions are unwired. 3486 * => XXXJRT: has some of the same problems as uvm_map_pageable() above. 3487 */ 3488 3489 int 3490 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 3491 { 3492 struct vm_map_entry *entry, *failed_entry; 3493 vsize_t size; 3494 int rv; 3495 #ifdef DIAGNOSTIC 3496 u_int timestamp_save; 3497 #endif 3498 UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist); 3499 UVMHIST_LOG(maphist,"(map=0x%x,flags=0x%x)", map, flags, 0, 0); 3500 3501 KASSERT(map->flags & VM_MAP_PAGEABLE); 3502 3503 vm_map_lock(map); 3504 3505 /* 3506 * handle wiring and unwiring separately. 3507 */ 3508 3509 if (flags == 0) { /* unwire */ 3510 3511 /* 3512 * POSIX 1003.1b -- munlockall unlocks all regions, 3513 * regardless of how many times mlockall has been called. 3514 */ 3515 3516 for (entry = map->header.next; entry != &map->header; 3517 entry = entry->next) { 3518 if (VM_MAPENT_ISWIRED(entry)) 3519 uvm_map_entry_unwire(map, entry); 3520 } 3521 map->flags &= ~VM_MAP_WIREFUTURE; 3522 vm_map_unlock(map); 3523 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3524 return 0; 3525 } 3526 3527 if (flags & MCL_FUTURE) { 3528 3529 /* 3530 * must wire all future mappings; remember this. 3531 */ 3532 3533 map->flags |= VM_MAP_WIREFUTURE; 3534 } 3535 3536 if ((flags & MCL_CURRENT) == 0) { 3537 3538 /* 3539 * no more work to do! 3540 */ 3541 3542 UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); 3543 vm_map_unlock(map); 3544 return 0; 3545 } 3546 3547 /* 3548 * wire case: in three passes [XXXCDC: ugly block of code here] 3549 * 3550 * 1: holding the write lock, count all pages mapped by non-wired 3551 * entries. if this would cause us to go over our limit, we fail. 3552 * 3553 * 2: still holding the write lock, we create any anonymous maps that 3554 * need to be created. then we increment its wiring count. 3555 * 3556 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault 3557 * in the pages for any newly wired area (wired_count == 1). 3558 * 3559 * downgrading to a read lock for uvm_fault_wire avoids a possible 3560 * deadlock with another thread that may have faulted on one of 3561 * the pages to be wired (it would mark the page busy, blocking 3562 * us, then in turn block on the map lock that we hold). because 3563 * of problems in the recursive lock package, we cannot upgrade 3564 * to a write lock in vm_map_lookup. thus, any actions that 3565 * require the write lock must be done beforehand. because we 3566 * keep the read lock on the map, the copy-on-write status of the 3567 * entries we modify here cannot change. 3568 */ 3569 3570 for (size = 0, entry = map->header.next; entry != &map->header; 3571 entry = entry->next) { 3572 if (entry->protection != VM_PROT_NONE && 3573 VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3574 size += entry->end - entry->start; 3575 } 3576 } 3577 3578 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 3579 vm_map_unlock(map); 3580 return ENOMEM; 3581 } 3582 3583 if (limit != 0 && 3584 (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { 3585 vm_map_unlock(map); 3586 return ENOMEM; 3587 } 3588 3589 /* 3590 * Pass 2. 3591 */ 3592 3593 for (entry = map->header.next; entry != &map->header; 3594 entry = entry->next) { 3595 if (entry->protection == VM_PROT_NONE) 3596 continue; 3597 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3598 3599 /* 3600 * perform actions of vm_map_lookup that need the 3601 * write lock on the map: create an anonymous map 3602 * for a copy-on-write region, or an anonymous map 3603 * for a zero-fill region. (XXXCDC: submap case 3604 * ok?) 3605 */ 3606 3607 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3608 if (UVM_ET_ISNEEDSCOPY(entry) && 3609 ((entry->max_protection & VM_PROT_WRITE) || 3610 (entry->object.uvm_obj == NULL))) { 3611 amap_copy(map, entry, 0, entry->start, 3612 entry->end); 3613 /* XXXCDC: wait OK? */ 3614 } 3615 } 3616 } 3617 entry->wired_count++; 3618 } 3619 3620 /* 3621 * Pass 3. 3622 */ 3623 3624 #ifdef DIAGNOSTIC 3625 timestamp_save = map->timestamp; 3626 #endif 3627 vm_map_busy(map); 3628 vm_map_unlock(map); 3629 3630 rv = 0; 3631 for (entry = map->header.next; entry != &map->header; 3632 entry = entry->next) { 3633 if (entry->wired_count == 1) { 3634 rv = uvm_fault_wire(map, entry->start, entry->end, 3635 entry->max_protection, 1); 3636 if (rv) { 3637 3638 /* 3639 * wiring failed. break out of the loop. 3640 * we'll clean up the map below, once we 3641 * have a write lock again. 3642 */ 3643 3644 break; 3645 } 3646 } 3647 } 3648 3649 if (rv) { 3650 3651 /* 3652 * Get back an exclusive (write) lock. 3653 */ 3654 3655 vm_map_lock(map); 3656 vm_map_unbusy(map); 3657 3658 #ifdef DIAGNOSTIC 3659 if (timestamp_save + 1 != map->timestamp) 3660 panic("uvm_map_pageable_all: stale map"); 3661 #endif 3662 3663 /* 3664 * first drop the wiring count on all the entries 3665 * which haven't actually been wired yet. 3666 * 3667 * Skip VM_PROT_NONE entries like we did above. 3668 */ 3669 3670 failed_entry = entry; 3671 for (/* nothing */; entry != &map->header; 3672 entry = entry->next) { 3673 if (entry->protection == VM_PROT_NONE) 3674 continue; 3675 entry->wired_count--; 3676 } 3677 3678 /* 3679 * now, unwire all the entries that were successfully 3680 * wired above. 3681 * 3682 * Skip VM_PROT_NONE entries like we did above. 3683 */ 3684 3685 for (entry = map->header.next; entry != failed_entry; 3686 entry = entry->next) { 3687 if (entry->protection == VM_PROT_NONE) 3688 continue; 3689 entry->wired_count--; 3690 if (VM_MAPENT_ISWIRED(entry)) 3691 uvm_map_entry_unwire(map, entry); 3692 } 3693 vm_map_unlock(map); 3694 UVMHIST_LOG(maphist,"<- done (RV=%d)", rv,0,0,0); 3695 return (rv); 3696 } 3697 3698 vm_map_unbusy(map); 3699 3700 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3701 return 0; 3702 } 3703 3704 /* 3705 * uvm_map_clean: clean out a map range 3706 * 3707 * => valid flags: 3708 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 3709 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 3710 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 3711 * if (flags & PGO_FREE): any cached pages are freed after clean 3712 * => returns an error if any part of the specified range isn't mapped 3713 * => never a need to flush amap layer since the anonymous memory has 3714 * no permanent home, but may deactivate pages there 3715 * => called from sys_msync() and sys_madvise() 3716 * => caller must not write-lock map (read OK). 3717 * => we may sleep while cleaning if SYNCIO [with map read-locked] 3718 */ 3719 3720 int 3721 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 3722 { 3723 struct vm_map_entry *current, *entry; 3724 struct uvm_object *uobj; 3725 struct vm_amap *amap; 3726 struct vm_anon *anon, *anon_tofree; 3727 struct vm_page *pg; 3728 vaddr_t offset; 3729 vsize_t size; 3730 voff_t uoff; 3731 int error, refs; 3732 UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist); 3733 3734 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,flags=0x%x)", 3735 map, start, end, flags); 3736 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 3737 (PGO_FREE|PGO_DEACTIVATE)); 3738 3739 vm_map_lock_read(map); 3740 VM_MAP_RANGE_CHECK(map, start, end); 3741 if (uvm_map_lookup_entry(map, start, &entry) == false) { 3742 vm_map_unlock_read(map); 3743 return EFAULT; 3744 } 3745 3746 /* 3747 * Make a first pass to check for holes and wiring problems. 3748 */ 3749 3750 for (current = entry; current->start < end; current = current->next) { 3751 if (UVM_ET_ISSUBMAP(current)) { 3752 vm_map_unlock_read(map); 3753 return EINVAL; 3754 } 3755 if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) { 3756 vm_map_unlock_read(map); 3757 return EBUSY; 3758 } 3759 if (end <= current->end) { 3760 break; 3761 } 3762 if (current->end != current->next->start) { 3763 vm_map_unlock_read(map); 3764 return EFAULT; 3765 } 3766 } 3767 3768 error = 0; 3769 for (current = entry; start < end; current = current->next) { 3770 amap = current->aref.ar_amap; /* upper layer */ 3771 uobj = current->object.uvm_obj; /* lower layer */ 3772 KASSERT(start >= current->start); 3773 3774 /* 3775 * No amap cleaning necessary if: 3776 * 3777 * (1) There's no amap. 3778 * 3779 * (2) We're not deactivating or freeing pages. 3780 */ 3781 3782 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 3783 goto flush_object; 3784 3785 offset = start - current->start; 3786 size = MIN(end, current->end) - start; 3787 anon_tofree = NULL; 3788 3789 amap_lock(amap); 3790 for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { 3791 anon = amap_lookup(¤t->aref, offset); 3792 if (anon == NULL) 3793 continue; 3794 3795 KASSERT(anon->an_lock == amap->am_lock); 3796 pg = anon->an_page; 3797 if (pg == NULL) { 3798 continue; 3799 } 3800 3801 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 3802 3803 /* 3804 * In these first 3 cases, we just deactivate the page. 3805 */ 3806 3807 case PGO_CLEANIT|PGO_FREE: 3808 case PGO_CLEANIT|PGO_DEACTIVATE: 3809 case PGO_DEACTIVATE: 3810 deactivate_it: 3811 /* 3812 * skip the page if it's loaned or wired, 3813 * since it shouldn't be on a paging queue 3814 * at all in these cases. 3815 */ 3816 3817 mutex_enter(&uvm_pageqlock); 3818 if (pg->loan_count != 0 || 3819 pg->wire_count != 0) { 3820 mutex_exit(&uvm_pageqlock); 3821 continue; 3822 } 3823 KASSERT(pg->uanon == anon); 3824 uvm_pagedeactivate(pg); 3825 mutex_exit(&uvm_pageqlock); 3826 continue; 3827 3828 case PGO_FREE: 3829 3830 /* 3831 * If there are multiple references to 3832 * the amap, just deactivate the page. 3833 */ 3834 3835 if (amap_refs(amap) > 1) 3836 goto deactivate_it; 3837 3838 /* skip the page if it's wired */ 3839 if (pg->wire_count != 0) { 3840 continue; 3841 } 3842 amap_unadd(¤t->aref, offset); 3843 refs = --anon->an_ref; 3844 if (refs == 0) { 3845 anon->an_link = anon_tofree; 3846 anon_tofree = anon; 3847 } 3848 continue; 3849 } 3850 } 3851 uvm_anon_freelst(amap, anon_tofree); 3852 3853 flush_object: 3854 /* 3855 * flush pages if we've got a valid backing object. 3856 * note that we must always clean object pages before 3857 * freeing them since otherwise we could reveal stale 3858 * data from files. 3859 */ 3860 3861 uoff = current->offset + (start - current->start); 3862 size = MIN(end, current->end) - start; 3863 if (uobj != NULL) { 3864 mutex_enter(uobj->vmobjlock); 3865 if (uobj->pgops->pgo_put != NULL) 3866 error = (uobj->pgops->pgo_put)(uobj, uoff, 3867 uoff + size, flags | PGO_CLEANIT); 3868 else 3869 error = 0; 3870 } 3871 start += size; 3872 } 3873 vm_map_unlock_read(map); 3874 return (error); 3875 } 3876 3877 3878 /* 3879 * uvm_map_checkprot: check protection in map 3880 * 3881 * => must allow specified protection in a fully allocated region. 3882 * => map must be read or write locked by caller. 3883 */ 3884 3885 bool 3886 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3887 vm_prot_t protection) 3888 { 3889 struct vm_map_entry *entry; 3890 struct vm_map_entry *tmp_entry; 3891 3892 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { 3893 return (false); 3894 } 3895 entry = tmp_entry; 3896 while (start < end) { 3897 if (entry == &map->header) { 3898 return (false); 3899 } 3900 3901 /* 3902 * no holes allowed 3903 */ 3904 3905 if (start < entry->start) { 3906 return (false); 3907 } 3908 3909 /* 3910 * check protection associated with entry 3911 */ 3912 3913 if ((entry->protection & protection) != protection) { 3914 return (false); 3915 } 3916 start = entry->end; 3917 entry = entry->next; 3918 } 3919 return (true); 3920 } 3921 3922 /* 3923 * uvmspace_alloc: allocate a vmspace structure. 3924 * 3925 * - structure includes vm_map and pmap 3926 * - XXX: no locking on this structure 3927 * - refcnt set to 1, rest must be init'd by caller 3928 */ 3929 struct vmspace * 3930 uvmspace_alloc(vaddr_t vmin, vaddr_t vmax) 3931 { 3932 struct vmspace *vm; 3933 UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); 3934 3935 vm = pool_cache_get(&uvm_vmspace_cache, PR_WAITOK); 3936 uvmspace_init(vm, NULL, vmin, vmax); 3937 UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0); 3938 return (vm); 3939 } 3940 3941 /* 3942 * uvmspace_init: initialize a vmspace structure. 3943 * 3944 * - XXX: no locking on this structure 3945 * - refcnt set to 1, rest must be init'd by caller 3946 */ 3947 void 3948 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax) 3949 { 3950 UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); 3951 3952 memset(vm, 0, sizeof(*vm)); 3953 uvm_map_setup(&vm->vm_map, vmin, vmax, VM_MAP_PAGEABLE 3954 #ifdef __USING_TOPDOWN_VM 3955 | VM_MAP_TOPDOWN 3956 #endif 3957 ); 3958 if (pmap) 3959 pmap_reference(pmap); 3960 else 3961 pmap = pmap_create(); 3962 vm->vm_map.pmap = pmap; 3963 vm->vm_refcnt = 1; 3964 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 3965 } 3966 3967 /* 3968 * uvmspace_share: share a vmspace between two processes 3969 * 3970 * - used for vfork, threads(?) 3971 */ 3972 3973 void 3974 uvmspace_share(struct proc *p1, struct proc *p2) 3975 { 3976 3977 uvmspace_addref(p1->p_vmspace); 3978 p2->p_vmspace = p1->p_vmspace; 3979 } 3980 3981 #if 0 3982 3983 /* 3984 * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace 3985 * 3986 * - XXX: no locking on vmspace 3987 */ 3988 3989 void 3990 uvmspace_unshare(struct lwp *l) 3991 { 3992 struct proc *p = l->l_proc; 3993 struct vmspace *nvm, *ovm = p->p_vmspace; 3994 3995 if (ovm->vm_refcnt == 1) 3996 /* nothing to do: vmspace isn't shared in the first place */ 3997 return; 3998 3999 /* make a new vmspace, still holding old one */ 4000 nvm = uvmspace_fork(ovm); 4001 4002 kpreempt_disable(); 4003 pmap_deactivate(l); /* unbind old vmspace */ 4004 p->p_vmspace = nvm; 4005 pmap_activate(l); /* switch to new vmspace */ 4006 kpreempt_enable(); 4007 4008 uvmspace_free(ovm); /* drop reference to old vmspace */ 4009 } 4010 4011 #endif 4012 4013 4014 /* 4015 * uvmspace_spawn: a new process has been spawned and needs a vmspace 4016 */ 4017 4018 void 4019 uvmspace_spawn(struct lwp *l, vaddr_t start, vaddr_t end) 4020 { 4021 struct proc *p = l->l_proc; 4022 struct vmspace *nvm; 4023 4024 #ifdef __HAVE_CPU_VMSPACE_EXEC 4025 cpu_vmspace_exec(l, start, end); 4026 #endif 4027 4028 nvm = uvmspace_alloc(start, end); 4029 kpreempt_disable(); 4030 p->p_vmspace = nvm; 4031 pmap_activate(l); 4032 kpreempt_enable(); 4033 } 4034 4035 /* 4036 * uvmspace_exec: the process wants to exec a new program 4037 */ 4038 4039 void 4040 uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end) 4041 { 4042 struct proc *p = l->l_proc; 4043 struct vmspace *nvm, *ovm = p->p_vmspace; 4044 struct vm_map *map; 4045 4046 KASSERT(ovm != NULL); 4047 #ifdef __HAVE_CPU_VMSPACE_EXEC 4048 cpu_vmspace_exec(l, start, end); 4049 #endif 4050 4051 map = &ovm->vm_map; 4052 /* 4053 * see if more than one process is using this vmspace... 4054 */ 4055 4056 if (ovm->vm_refcnt == 1) { 4057 4058 /* 4059 * if p is the only process using its vmspace then we can safely 4060 * recycle that vmspace for the program that is being exec'd. 4061 */ 4062 4063 #ifdef SYSVSHM 4064 /* 4065 * SYSV SHM semantics require us to kill all segments on an exec 4066 */ 4067 4068 if (ovm->vm_shm) 4069 shmexit(ovm); 4070 #endif 4071 4072 /* 4073 * POSIX 1003.1b -- "lock future mappings" is revoked 4074 * when a process execs another program image. 4075 */ 4076 4077 map->flags &= ~VM_MAP_WIREFUTURE; 4078 4079 /* 4080 * now unmap the old program 4081 */ 4082 4083 pmap_remove_all(map->pmap); 4084 uvm_unmap(map, vm_map_min(map), vm_map_max(map)); 4085 KASSERT(map->header.prev == &map->header); 4086 KASSERT(map->nentries == 0); 4087 4088 /* 4089 * resize the map 4090 */ 4091 4092 vm_map_setmin(map, start); 4093 vm_map_setmax(map, end); 4094 } else { 4095 4096 /* 4097 * p's vmspace is being shared, so we can't reuse it for p since 4098 * it is still being used for others. allocate a new vmspace 4099 * for p 4100 */ 4101 4102 nvm = uvmspace_alloc(start, end); 4103 4104 /* 4105 * install new vmspace and drop our ref to the old one. 4106 */ 4107 4108 kpreempt_disable(); 4109 pmap_deactivate(l); 4110 p->p_vmspace = nvm; 4111 pmap_activate(l); 4112 kpreempt_enable(); 4113 4114 uvmspace_free(ovm); 4115 } 4116 } 4117 4118 /* 4119 * uvmspace_addref: add a referece to a vmspace. 4120 */ 4121 4122 void 4123 uvmspace_addref(struct vmspace *vm) 4124 { 4125 struct vm_map *map = &vm->vm_map; 4126 4127 KASSERT((map->flags & VM_MAP_DYING) == 0); 4128 4129 mutex_enter(&map->misc_lock); 4130 KASSERT(vm->vm_refcnt > 0); 4131 vm->vm_refcnt++; 4132 mutex_exit(&map->misc_lock); 4133 } 4134 4135 /* 4136 * uvmspace_free: free a vmspace data structure 4137 */ 4138 4139 void 4140 uvmspace_free(struct vmspace *vm) 4141 { 4142 struct vm_map_entry *dead_entries; 4143 struct vm_map *map = &vm->vm_map; 4144 int n; 4145 4146 UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); 4147 4148 UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0); 4149 mutex_enter(&map->misc_lock); 4150 n = --vm->vm_refcnt; 4151 mutex_exit(&map->misc_lock); 4152 if (n > 0) 4153 return; 4154 4155 /* 4156 * at this point, there should be no other references to the map. 4157 * delete all of the mappings, then destroy the pmap. 4158 */ 4159 4160 map->flags |= VM_MAP_DYING; 4161 pmap_remove_all(map->pmap); 4162 #ifdef SYSVSHM 4163 /* Get rid of any SYSV shared memory segments. */ 4164 if (vm->vm_shm != NULL) 4165 shmexit(vm); 4166 #endif 4167 4168 if (map->nentries) { 4169 uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map), 4170 &dead_entries, 0); 4171 if (dead_entries != NULL) 4172 uvm_unmap_detach(dead_entries, 0); 4173 } 4174 KASSERT(map->nentries == 0); 4175 KASSERT(map->size == 0); 4176 4177 mutex_destroy(&map->misc_lock); 4178 rw_destroy(&map->lock); 4179 cv_destroy(&map->cv); 4180 pmap_destroy(map->pmap); 4181 pool_cache_put(&uvm_vmspace_cache, vm); 4182 } 4183 4184 /* 4185 * F O R K - m a i n e n t r y p o i n t 4186 */ 4187 /* 4188 * uvmspace_fork: fork a process' main map 4189 * 4190 * => create a new vmspace for child process from parent. 4191 * => parent's map must not be locked. 4192 */ 4193 4194 struct vmspace * 4195 uvmspace_fork(struct vmspace *vm1) 4196 { 4197 struct vmspace *vm2; 4198 struct vm_map *old_map = &vm1->vm_map; 4199 struct vm_map *new_map; 4200 struct vm_map_entry *old_entry; 4201 struct vm_map_entry *new_entry; 4202 UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); 4203 4204 vm_map_lock(old_map); 4205 4206 vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map)); 4207 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 4208 (char *) (vm1 + 1) - (char *) &vm1->vm_startcopy); 4209 new_map = &vm2->vm_map; /* XXX */ 4210 4211 old_entry = old_map->header.next; 4212 new_map->size = old_map->size; 4213 4214 /* 4215 * go entry-by-entry 4216 */ 4217 4218 while (old_entry != &old_map->header) { 4219 4220 /* 4221 * first, some sanity checks on the old entry 4222 */ 4223 4224 KASSERT(!UVM_ET_ISSUBMAP(old_entry)); 4225 KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) || 4226 !UVM_ET_ISNEEDSCOPY(old_entry)); 4227 4228 switch (old_entry->inheritance) { 4229 case MAP_INHERIT_NONE: 4230 4231 /* 4232 * drop the mapping, modify size 4233 */ 4234 new_map->size -= old_entry->end - old_entry->start; 4235 break; 4236 4237 case MAP_INHERIT_SHARE: 4238 4239 /* 4240 * share the mapping: this means we want the old and 4241 * new entries to share amaps and backing objects. 4242 */ 4243 /* 4244 * if the old_entry needs a new amap (due to prev fork) 4245 * then we need to allocate it now so that we have 4246 * something we own to share with the new_entry. [in 4247 * other words, we need to clear needs_copy] 4248 */ 4249 4250 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 4251 /* get our own amap, clears needs_copy */ 4252 amap_copy(old_map, old_entry, AMAP_COPY_NOCHUNK, 4253 0, 0); 4254 /* XXXCDC: WAITOK??? */ 4255 } 4256 4257 new_entry = uvm_mapent_alloc(new_map, 0); 4258 /* old_entry -> new_entry */ 4259 uvm_mapent_copy(old_entry, new_entry); 4260 4261 /* new pmap has nothing wired in it */ 4262 new_entry->wired_count = 0; 4263 4264 /* 4265 * gain reference to object backing the map (can't 4266 * be a submap, already checked this case). 4267 */ 4268 4269 if (new_entry->aref.ar_amap) 4270 uvm_map_reference_amap(new_entry, AMAP_SHARED); 4271 4272 if (new_entry->object.uvm_obj && 4273 new_entry->object.uvm_obj->pgops->pgo_reference) 4274 new_entry->object.uvm_obj-> 4275 pgops->pgo_reference( 4276 new_entry->object.uvm_obj); 4277 4278 /* insert entry at end of new_map's entry list */ 4279 uvm_map_entry_link(new_map, new_map->header.prev, 4280 new_entry); 4281 4282 break; 4283 4284 case MAP_INHERIT_COPY: 4285 4286 /* 4287 * copy-on-write the mapping (using mmap's 4288 * MAP_PRIVATE semantics) 4289 * 4290 * allocate new_entry, adjust reference counts. 4291 * (note that new references are read-only). 4292 */ 4293 4294 new_entry = uvm_mapent_alloc(new_map, 0); 4295 /* old_entry -> new_entry */ 4296 uvm_mapent_copy(old_entry, new_entry); 4297 4298 if (new_entry->aref.ar_amap) 4299 uvm_map_reference_amap(new_entry, 0); 4300 4301 if (new_entry->object.uvm_obj && 4302 new_entry->object.uvm_obj->pgops->pgo_reference) 4303 new_entry->object.uvm_obj->pgops->pgo_reference 4304 (new_entry->object.uvm_obj); 4305 4306 /* new pmap has nothing wired in it */ 4307 new_entry->wired_count = 0; 4308 4309 new_entry->etype |= 4310 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 4311 uvm_map_entry_link(new_map, new_map->header.prev, 4312 new_entry); 4313 4314 /* 4315 * the new entry will need an amap. it will either 4316 * need to be copied from the old entry or created 4317 * from scratch (if the old entry does not have an 4318 * amap). can we defer this process until later 4319 * (by setting "needs_copy") or do we need to copy 4320 * the amap now? 4321 * 4322 * we must copy the amap now if any of the following 4323 * conditions hold: 4324 * 1. the old entry has an amap and that amap is 4325 * being shared. this means that the old (parent) 4326 * process is sharing the amap with another 4327 * process. if we do not clear needs_copy here 4328 * we will end up in a situation where both the 4329 * parent and child process are refering to the 4330 * same amap with "needs_copy" set. if the 4331 * parent write-faults, the fault routine will 4332 * clear "needs_copy" in the parent by allocating 4333 * a new amap. this is wrong because the 4334 * parent is supposed to be sharing the old amap 4335 * and the new amap will break that. 4336 * 4337 * 2. if the old entry has an amap and a non-zero 4338 * wire count then we are going to have to call 4339 * amap_cow_now to avoid page faults in the 4340 * parent process. since amap_cow_now requires 4341 * "needs_copy" to be clear we might as well 4342 * clear it here as well. 4343 * 4344 */ 4345 4346 if (old_entry->aref.ar_amap != NULL) { 4347 if ((amap_flags(old_entry->aref.ar_amap) & 4348 AMAP_SHARED) != 0 || 4349 VM_MAPENT_ISWIRED(old_entry)) { 4350 4351 amap_copy(new_map, new_entry, 4352 AMAP_COPY_NOCHUNK, 0, 0); 4353 /* XXXCDC: M_WAITOK ... ok? */ 4354 } 4355 } 4356 4357 /* 4358 * if the parent's entry is wired down, then the 4359 * parent process does not want page faults on 4360 * access to that memory. this means that we 4361 * cannot do copy-on-write because we can't write 4362 * protect the old entry. in this case we 4363 * resolve all copy-on-write faults now, using 4364 * amap_cow_now. note that we have already 4365 * allocated any needed amap (above). 4366 */ 4367 4368 if (VM_MAPENT_ISWIRED(old_entry)) { 4369 4370 /* 4371 * resolve all copy-on-write faults now 4372 * (note that there is nothing to do if 4373 * the old mapping does not have an amap). 4374 */ 4375 if (old_entry->aref.ar_amap) 4376 amap_cow_now(new_map, new_entry); 4377 4378 } else { 4379 4380 /* 4381 * setup mappings to trigger copy-on-write faults 4382 * we must write-protect the parent if it has 4383 * an amap and it is not already "needs_copy"... 4384 * if it is already "needs_copy" then the parent 4385 * has already been write-protected by a previous 4386 * fork operation. 4387 */ 4388 4389 if (old_entry->aref.ar_amap && 4390 !UVM_ET_ISNEEDSCOPY(old_entry)) { 4391 if (old_entry->max_protection & VM_PROT_WRITE) { 4392 pmap_protect(old_map->pmap, 4393 old_entry->start, 4394 old_entry->end, 4395 old_entry->protection & 4396 ~VM_PROT_WRITE); 4397 } 4398 old_entry->etype |= UVM_ET_NEEDSCOPY; 4399 } 4400 } 4401 break; 4402 } /* end of switch statement */ 4403 old_entry = old_entry->next; 4404 } 4405 4406 pmap_update(old_map->pmap); 4407 vm_map_unlock(old_map); 4408 4409 #ifdef SYSVSHM 4410 if (vm1->vm_shm) 4411 shmfork(vm1, vm2); 4412 #endif 4413 4414 #ifdef PMAP_FORK 4415 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); 4416 #endif 4417 4418 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 4419 return (vm2); 4420 } 4421 4422 4423 /* 4424 * uvm_mapent_trymerge: try to merge an entry with its neighbors. 4425 * 4426 * => called with map locked. 4427 * => return non zero if successfully merged. 4428 */ 4429 4430 int 4431 uvm_mapent_trymerge(struct vm_map *map, struct vm_map_entry *entry, int flags) 4432 { 4433 struct uvm_object *uobj; 4434 struct vm_map_entry *next; 4435 struct vm_map_entry *prev; 4436 vsize_t size; 4437 int merged = 0; 4438 bool copying; 4439 int newetype; 4440 4441 if (entry->aref.ar_amap != NULL) { 4442 return 0; 4443 } 4444 if ((entry->flags & UVM_MAP_NOMERGE) != 0) { 4445 return 0; 4446 } 4447 4448 uobj = entry->object.uvm_obj; 4449 size = entry->end - entry->start; 4450 copying = (flags & UVM_MERGE_COPYING) != 0; 4451 newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype; 4452 4453 next = entry->next; 4454 if (next != &map->header && 4455 next->start == entry->end && 4456 ((copying && next->aref.ar_amap != NULL && 4457 amap_refs(next->aref.ar_amap) == 1) || 4458 (!copying && next->aref.ar_amap == NULL)) && 4459 UVM_ET_ISCOMPATIBLE(next, newetype, 4460 uobj, entry->flags, entry->protection, 4461 entry->max_protection, entry->inheritance, entry->advice, 4462 entry->wired_count) && 4463 (uobj == NULL || entry->offset + size == next->offset)) { 4464 int error; 4465 4466 if (copying) { 4467 error = amap_extend(next, size, 4468 AMAP_EXTEND_NOWAIT|AMAP_EXTEND_BACKWARDS); 4469 } else { 4470 error = 0; 4471 } 4472 if (error == 0) { 4473 if (uobj) { 4474 if (uobj->pgops->pgo_detach) { 4475 uobj->pgops->pgo_detach(uobj); 4476 } 4477 } 4478 4479 entry->end = next->end; 4480 clear_hints(map, next); 4481 uvm_map_entry_unlink(map, next); 4482 if (copying) { 4483 entry->aref = next->aref; 4484 entry->etype &= ~UVM_ET_NEEDSCOPY; 4485 } 4486 uvm_map_check(map, "trymerge forwardmerge"); 4487 uvm_mapent_free(next); 4488 merged++; 4489 } 4490 } 4491 4492 prev = entry->prev; 4493 if (prev != &map->header && 4494 prev->end == entry->start && 4495 ((copying && !merged && prev->aref.ar_amap != NULL && 4496 amap_refs(prev->aref.ar_amap) == 1) || 4497 (!copying && prev->aref.ar_amap == NULL)) && 4498 UVM_ET_ISCOMPATIBLE(prev, newetype, 4499 uobj, entry->flags, entry->protection, 4500 entry->max_protection, entry->inheritance, entry->advice, 4501 entry->wired_count) && 4502 (uobj == NULL || 4503 prev->offset + prev->end - prev->start == entry->offset)) { 4504 int error; 4505 4506 if (copying) { 4507 error = amap_extend(prev, size, 4508 AMAP_EXTEND_NOWAIT|AMAP_EXTEND_FORWARDS); 4509 } else { 4510 error = 0; 4511 } 4512 if (error == 0) { 4513 if (uobj) { 4514 if (uobj->pgops->pgo_detach) { 4515 uobj->pgops->pgo_detach(uobj); 4516 } 4517 entry->offset = prev->offset; 4518 } 4519 4520 entry->start = prev->start; 4521 clear_hints(map, prev); 4522 uvm_map_entry_unlink(map, prev); 4523 if (copying) { 4524 entry->aref = prev->aref; 4525 entry->etype &= ~UVM_ET_NEEDSCOPY; 4526 } 4527 uvm_map_check(map, "trymerge backmerge"); 4528 uvm_mapent_free(prev); 4529 merged++; 4530 } 4531 } 4532 4533 return merged; 4534 } 4535 4536 /* 4537 * uvm_map_setup: init map 4538 * 4539 * => map must not be in service yet. 4540 */ 4541 4542 void 4543 uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags) 4544 { 4545 4546 rb_tree_init(&map->rb_tree, &uvm_map_tree_ops); 4547 map->header.next = map->header.prev = &map->header; 4548 map->nentries = 0; 4549 map->size = 0; 4550 map->ref_count = 1; 4551 vm_map_setmin(map, vmin); 4552 vm_map_setmax(map, vmax); 4553 map->flags = flags; 4554 map->first_free = &map->header; 4555 map->hint = &map->header; 4556 map->timestamp = 0; 4557 map->busy = NULL; 4558 4559 rw_init(&map->lock); 4560 cv_init(&map->cv, "vm_map"); 4561 mutex_init(&map->misc_lock, MUTEX_DRIVER, IPL_NONE); 4562 } 4563 4564 /* 4565 * U N M A P - m a i n e n t r y p o i n t 4566 */ 4567 4568 /* 4569 * uvm_unmap1: remove mappings from a vm_map (from "start" up to "stop") 4570 * 4571 * => caller must check alignment and size 4572 * => map must be unlocked (we will lock it) 4573 * => flags is UVM_FLAG_QUANTUM or 0. 4574 */ 4575 4576 void 4577 uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4578 { 4579 struct vm_map_entry *dead_entries; 4580 UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist); 4581 4582 UVMHIST_LOG(maphist, " (map=0x%x, start=0x%x, end=0x%x)", 4583 map, start, end, 0); 4584 if (map == kernel_map) { 4585 LOCKDEBUG_MEM_CHECK((void *)start, end - start); 4586 } 4587 /* 4588 * work now done by helper functions. wipe the pmap's and then 4589 * detach from the dead entries... 4590 */ 4591 vm_map_lock(map); 4592 uvm_unmap_remove(map, start, end, &dead_entries, flags); 4593 vm_map_unlock(map); 4594 4595 if (dead_entries != NULL) 4596 uvm_unmap_detach(dead_entries, 0); 4597 4598 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 4599 } 4600 4601 4602 /* 4603 * uvm_map_reference: add reference to a map 4604 * 4605 * => map need not be locked (we use misc_lock). 4606 */ 4607 4608 void 4609 uvm_map_reference(struct vm_map *map) 4610 { 4611 mutex_enter(&map->misc_lock); 4612 map->ref_count++; 4613 mutex_exit(&map->misc_lock); 4614 } 4615 4616 bool 4617 vm_map_starved_p(struct vm_map *map) 4618 { 4619 4620 if ((map->flags & VM_MAP_WANTVA) != 0) { 4621 return true; 4622 } 4623 /* XXX */ 4624 if ((vm_map_max(map) - vm_map_min(map)) / 16 * 15 < map->size) { 4625 return true; 4626 } 4627 return false; 4628 } 4629 4630 void 4631 uvm_map_lock_entry(struct vm_map_entry *entry) 4632 { 4633 4634 if (entry->aref.ar_amap != NULL) { 4635 amap_lock(entry->aref.ar_amap); 4636 } 4637 if (UVM_ET_ISOBJ(entry)) { 4638 mutex_enter(entry->object.uvm_obj->vmobjlock); 4639 } 4640 } 4641 4642 void 4643 uvm_map_unlock_entry(struct vm_map_entry *entry) 4644 { 4645 4646 if (UVM_ET_ISOBJ(entry)) { 4647 mutex_exit(entry->object.uvm_obj->vmobjlock); 4648 } 4649 if (entry->aref.ar_amap != NULL) { 4650 amap_unlock(entry->aref.ar_amap); 4651 } 4652 } 4653 4654 #if defined(DDB) || defined(DEBUGPRINT) 4655 4656 /* 4657 * uvm_map_printit: actually prints the map 4658 */ 4659 4660 void 4661 uvm_map_printit(struct vm_map *map, bool full, 4662 void (*pr)(const char *, ...)) 4663 { 4664 struct vm_map_entry *entry; 4665 4666 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, vm_map_min(map), 4667 vm_map_max(map)); 4668 (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n", 4669 map->nentries, map->size, map->ref_count, map->timestamp, 4670 map->flags); 4671 (*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap, 4672 pmap_resident_count(map->pmap), pmap_wired_count(map->pmap)); 4673 if (!full) 4674 return; 4675 for (entry = map->header.next; entry != &map->header; 4676 entry = entry->next) { 4677 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 4678 entry, entry->start, entry->end, entry->object.uvm_obj, 4679 (long long)entry->offset, entry->aref.ar_amap, 4680 entry->aref.ar_pageoff); 4681 (*pr)( 4682 "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 4683 "wc=%d, adv=%d\n", 4684 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 4685 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 4686 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 4687 entry->protection, entry->max_protection, 4688 entry->inheritance, entry->wired_count, entry->advice); 4689 } 4690 } 4691 4692 void 4693 uvm_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 4694 { 4695 struct vm_map *map; 4696 4697 for (map = kernel_map;;) { 4698 struct vm_map_entry *entry; 4699 4700 if (!uvm_map_lookup_entry_bytree(map, (vaddr_t)addr, &entry)) { 4701 break; 4702 } 4703 (*pr)("%p is %p+%zu from VMMAP %p\n", 4704 (void *)addr, (void *)entry->start, 4705 (size_t)(addr - (uintptr_t)entry->start), map); 4706 if (!UVM_ET_ISSUBMAP(entry)) { 4707 break; 4708 } 4709 map = entry->object.sub_map; 4710 } 4711 } 4712 4713 #endif /* DDB || DEBUGPRINT */ 4714 4715 #ifndef __USER_VA0_IS_SAFE 4716 static int 4717 sysctl_user_va0_disable(SYSCTLFN_ARGS) 4718 { 4719 struct sysctlnode node; 4720 int t, error; 4721 4722 node = *rnode; 4723 node.sysctl_data = &t; 4724 t = user_va0_disable; 4725 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 4726 if (error || newp == NULL) 4727 return (error); 4728 4729 if (!t && user_va0_disable && 4730 kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MAP_VA_ZERO, 0, 4731 NULL, NULL, NULL)) 4732 return EPERM; 4733 4734 user_va0_disable = !!t; 4735 return 0; 4736 } 4737 4738 SYSCTL_SETUP(sysctl_uvmmap_setup, "sysctl uvmmap setup") 4739 { 4740 4741 sysctl_createv(clog, 0, NULL, NULL, 4742 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 4743 CTLTYPE_INT, "user_va0_disable", 4744 SYSCTL_DESCR("Disable VA 0"), 4745 sysctl_user_va0_disable, 0, &user_va0_disable, 0, 4746 CTL_VM, CTL_CREATE, CTL_EOL); 4747 } 4748 #endif 4749