1 /* $NetBSD: uvm_map.c,v 1.328 2014/03/05 05:35:55 matt Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993, The Regents of the University of California. 6 * 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * The Mach Operating System project at Carnegie-Mellon University. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 37 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 38 * 39 * 40 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 41 * All rights reserved. 42 * 43 * Permission to use, copy, modify and distribute this software and 44 * its documentation is hereby granted, provided that both the copyright 45 * notice and this permission notice appear in all copies of the 46 * software, derivative works or modified versions, and any portions 47 * thereof, and that both notices appear in supporting documentation. 48 * 49 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 50 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 51 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 52 * 53 * Carnegie Mellon requests users of this software to return to 54 * 55 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 56 * School of Computer Science 57 * Carnegie Mellon University 58 * Pittsburgh PA 15213-3890 59 * 60 * any improvements or extensions that they make and grant Carnegie the 61 * rights to redistribute these changes. 62 */ 63 64 /* 65 * uvm_map.c: uvm map operations 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.328 2014/03/05 05:35:55 matt Exp $"); 70 71 #include "opt_ddb.h" 72 #include "opt_uvmhist.h" 73 #include "opt_uvm.h" 74 #include "opt_sysv.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/mman.h> 79 #include <sys/proc.h> 80 #include <sys/pool.h> 81 #include <sys/kernel.h> 82 #include <sys/mount.h> 83 #include <sys/vnode.h> 84 #include <sys/lockdebug.h> 85 #include <sys/atomic.h> 86 #ifndef __USER_VA0_IS_SAFE 87 #include <sys/sysctl.h> 88 #include <sys/kauth.h> 89 #include "opt_user_va0_disable_default.h" 90 #endif 91 92 #ifdef SYSVSHM 93 #include <sys/shm.h> 94 #endif 95 96 #include <uvm/uvm.h> 97 #include <uvm/uvm_readahead.h> 98 99 #if defined(DDB) || defined(DEBUGPRINT) 100 #include <uvm/uvm_ddb.h> 101 #endif 102 103 #ifdef UVMHIST 104 static struct kern_history_ent maphistbuf[100]; 105 UVMHIST_DEFINE(maphist) = UVMHIST_INITIALIZER(maphist, maphistbuf); 106 #endif 107 108 #if !defined(UVMMAP_COUNTERS) 109 110 #define UVMMAP_EVCNT_DEFINE(name) /* nothing */ 111 #define UVMMAP_EVCNT_INCR(ev) /* nothing */ 112 #define UVMMAP_EVCNT_DECR(ev) /* nothing */ 113 114 #else /* defined(UVMMAP_NOCOUNTERS) */ 115 116 #include <sys/evcnt.h> 117 #define UVMMAP_EVCNT_DEFINE(name) \ 118 struct evcnt uvmmap_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 119 "uvmmap", #name); \ 120 EVCNT_ATTACH_STATIC(uvmmap_evcnt_##name); 121 #define UVMMAP_EVCNT_INCR(ev) uvmmap_evcnt_##ev.ev_count++ 122 #define UVMMAP_EVCNT_DECR(ev) uvmmap_evcnt_##ev.ev_count-- 123 124 #endif /* defined(UVMMAP_NOCOUNTERS) */ 125 126 UVMMAP_EVCNT_DEFINE(ubackmerge) 127 UVMMAP_EVCNT_DEFINE(uforwmerge) 128 UVMMAP_EVCNT_DEFINE(ubimerge) 129 UVMMAP_EVCNT_DEFINE(unomerge) 130 UVMMAP_EVCNT_DEFINE(kbackmerge) 131 UVMMAP_EVCNT_DEFINE(kforwmerge) 132 UVMMAP_EVCNT_DEFINE(kbimerge) 133 UVMMAP_EVCNT_DEFINE(knomerge) 134 UVMMAP_EVCNT_DEFINE(map_call) 135 UVMMAP_EVCNT_DEFINE(mlk_call) 136 UVMMAP_EVCNT_DEFINE(mlk_hint) 137 UVMMAP_EVCNT_DEFINE(mlk_list) 138 UVMMAP_EVCNT_DEFINE(mlk_tree) 139 UVMMAP_EVCNT_DEFINE(mlk_treeloop) 140 UVMMAP_EVCNT_DEFINE(mlk_listloop) 141 142 const char vmmapbsy[] = "vmmapbsy"; 143 144 /* 145 * cache for vmspace structures. 146 */ 147 148 static struct pool_cache uvm_vmspace_cache; 149 150 /* 151 * cache for dynamically-allocated map entries. 152 */ 153 154 static struct pool_cache uvm_map_entry_cache; 155 156 #ifdef PMAP_GROWKERNEL 157 /* 158 * This global represents the end of the kernel virtual address 159 * space. If we want to exceed this, we must grow the kernel 160 * virtual address space dynamically. 161 * 162 * Note, this variable is locked by kernel_map's lock. 163 */ 164 vaddr_t uvm_maxkaddr; 165 #endif 166 167 #ifndef __USER_VA0_IS_SAFE 168 #ifndef __USER_VA0_DISABLE_DEFAULT 169 #define __USER_VA0_DISABLE_DEFAULT 1 170 #endif 171 #ifdef USER_VA0_DISABLE_DEFAULT /* kernel config option overrides */ 172 #undef __USER_VA0_DISABLE_DEFAULT 173 #define __USER_VA0_DISABLE_DEFAULT USER_VA0_DISABLE_DEFAULT 174 #endif 175 static int user_va0_disable = __USER_VA0_DISABLE_DEFAULT; 176 #endif 177 178 /* 179 * macros 180 */ 181 182 /* 183 * UVM_ET_ISCOMPATIBLE: check some requirements for map entry merging 184 */ 185 extern struct vm_map *pager_map; 186 187 #define UVM_ET_ISCOMPATIBLE(ent, type, uobj, meflags, \ 188 prot, maxprot, inh, adv, wire) \ 189 ((ent)->etype == (type) && \ 190 (((ent)->flags ^ (meflags)) & (UVM_MAP_NOMERGE)) == 0 && \ 191 (ent)->object.uvm_obj == (uobj) && \ 192 (ent)->protection == (prot) && \ 193 (ent)->max_protection == (maxprot) && \ 194 (ent)->inheritance == (inh) && \ 195 (ent)->advice == (adv) && \ 196 (ent)->wired_count == (wire)) 197 198 /* 199 * uvm_map_entry_link: insert entry into a map 200 * 201 * => map must be locked 202 */ 203 #define uvm_map_entry_link(map, after_where, entry) do { \ 204 uvm_mapent_check(entry); \ 205 (map)->nentries++; \ 206 (entry)->prev = (after_where); \ 207 (entry)->next = (after_where)->next; \ 208 (entry)->prev->next = (entry); \ 209 (entry)->next->prev = (entry); \ 210 uvm_rb_insert((map), (entry)); \ 211 } while (/*CONSTCOND*/ 0) 212 213 /* 214 * uvm_map_entry_unlink: remove entry from a map 215 * 216 * => map must be locked 217 */ 218 #define uvm_map_entry_unlink(map, entry) do { \ 219 KASSERT((entry) != (map)->first_free); \ 220 KASSERT((entry) != (map)->hint); \ 221 uvm_mapent_check(entry); \ 222 (map)->nentries--; \ 223 (entry)->next->prev = (entry)->prev; \ 224 (entry)->prev->next = (entry)->next; \ 225 uvm_rb_remove((map), (entry)); \ 226 } while (/*CONSTCOND*/ 0) 227 228 /* 229 * SAVE_HINT: saves the specified entry as the hint for future lookups. 230 * 231 * => map need not be locked. 232 */ 233 #define SAVE_HINT(map, check, value) do { \ 234 if ((map)->hint == (check)) \ 235 (map)->hint = (value); \ 236 } while (/*CONSTCOND*/ 0) 237 238 /* 239 * clear_hints: ensure that hints don't point to the entry. 240 * 241 * => map must be write-locked. 242 */ 243 static void 244 clear_hints(struct vm_map *map, struct vm_map_entry *ent) 245 { 246 247 SAVE_HINT(map, ent, ent->prev); 248 if (map->first_free == ent) { 249 map->first_free = ent->prev; 250 } 251 } 252 253 /* 254 * VM_MAP_RANGE_CHECK: check and correct range 255 * 256 * => map must at least be read locked 257 */ 258 259 #define VM_MAP_RANGE_CHECK(map, start, end) do { \ 260 if (start < vm_map_min(map)) \ 261 start = vm_map_min(map); \ 262 if (end > vm_map_max(map)) \ 263 end = vm_map_max(map); \ 264 if (start > end) \ 265 start = end; \ 266 } while (/*CONSTCOND*/ 0) 267 268 /* 269 * local prototypes 270 */ 271 272 static struct vm_map_entry * 273 uvm_mapent_alloc(struct vm_map *, int); 274 static void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *); 275 static void uvm_mapent_free(struct vm_map_entry *); 276 #if defined(DEBUG) 277 static void _uvm_mapent_check(const struct vm_map_entry *, const char *, 278 int); 279 #define uvm_mapent_check(map) _uvm_mapent_check(map, __FILE__, __LINE__) 280 #else /* defined(DEBUG) */ 281 #define uvm_mapent_check(e) /* nothing */ 282 #endif /* defined(DEBUG) */ 283 284 static void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *); 285 static void uvm_map_reference_amap(struct vm_map_entry *, int); 286 static int uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int, 287 int, struct vm_map_entry *); 288 static void uvm_map_unreference_amap(struct vm_map_entry *, int); 289 290 int _uvm_map_sanity(struct vm_map *); 291 int _uvm_tree_sanity(struct vm_map *); 292 static vsize_t uvm_rb_maxgap(const struct vm_map_entry *); 293 294 #define ROOT_ENTRY(map) ((struct vm_map_entry *)(map)->rb_tree.rbt_root) 295 #define LEFT_ENTRY(entry) ((struct vm_map_entry *)(entry)->rb_node.rb_left) 296 #define RIGHT_ENTRY(entry) ((struct vm_map_entry *)(entry)->rb_node.rb_right) 297 #define PARENT_ENTRY(map, entry) \ 298 (ROOT_ENTRY(map) == (entry) \ 299 ? NULL : (struct vm_map_entry *)RB_FATHER(&(entry)->rb_node)) 300 301 static int 302 uvm_map_compare_nodes(void *ctx, const void *nparent, const void *nkey) 303 { 304 const struct vm_map_entry *eparent = nparent; 305 const struct vm_map_entry *ekey = nkey; 306 307 KASSERT(eparent->start < ekey->start || eparent->start >= ekey->end); 308 KASSERT(ekey->start < eparent->start || ekey->start >= eparent->end); 309 310 if (eparent->start < ekey->start) 311 return -1; 312 if (eparent->end >= ekey->start) 313 return 1; 314 return 0; 315 } 316 317 static int 318 uvm_map_compare_key(void *ctx, const void *nparent, const void *vkey) 319 { 320 const struct vm_map_entry *eparent = nparent; 321 const vaddr_t va = *(const vaddr_t *) vkey; 322 323 if (eparent->start < va) 324 return -1; 325 if (eparent->end >= va) 326 return 1; 327 return 0; 328 } 329 330 static const rb_tree_ops_t uvm_map_tree_ops = { 331 .rbto_compare_nodes = uvm_map_compare_nodes, 332 .rbto_compare_key = uvm_map_compare_key, 333 .rbto_node_offset = offsetof(struct vm_map_entry, rb_node), 334 .rbto_context = NULL 335 }; 336 337 /* 338 * uvm_rb_gap: return the gap size between our entry and next entry. 339 */ 340 static inline vsize_t 341 uvm_rb_gap(const struct vm_map_entry *entry) 342 { 343 344 KASSERT(entry->next != NULL); 345 return entry->next->start - entry->end; 346 } 347 348 static vsize_t 349 uvm_rb_maxgap(const struct vm_map_entry *entry) 350 { 351 struct vm_map_entry *child; 352 vsize_t maxgap = entry->gap; 353 354 /* 355 * We need maxgap to be the largest gap of us or any of our 356 * descendents. Since each of our children's maxgap is the 357 * cached value of their largest gap of themselves or their 358 * descendents, we can just use that value and avoid recursing 359 * down the tree to calculate it. 360 */ 361 if ((child = LEFT_ENTRY(entry)) != NULL && maxgap < child->maxgap) 362 maxgap = child->maxgap; 363 364 if ((child = RIGHT_ENTRY(entry)) != NULL && maxgap < child->maxgap) 365 maxgap = child->maxgap; 366 367 return maxgap; 368 } 369 370 static void 371 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry) 372 { 373 struct vm_map_entry *parent; 374 375 KASSERT(entry->gap == uvm_rb_gap(entry)); 376 entry->maxgap = uvm_rb_maxgap(entry); 377 378 while ((parent = PARENT_ENTRY(map, entry)) != NULL) { 379 struct vm_map_entry *brother; 380 vsize_t maxgap = parent->gap; 381 unsigned int which; 382 383 KDASSERT(parent->gap == uvm_rb_gap(parent)); 384 if (maxgap < entry->maxgap) 385 maxgap = entry->maxgap; 386 /* 387 * Since we work towards the root, we know entry's maxgap 388 * value is OK, but its brothers may now be out-of-date due 389 * to rebalancing. So refresh it. 390 */ 391 which = RB_POSITION(&entry->rb_node) ^ RB_DIR_OTHER; 392 brother = (struct vm_map_entry *)parent->rb_node.rb_nodes[which]; 393 if (brother != NULL) { 394 KDASSERT(brother->gap == uvm_rb_gap(brother)); 395 brother->maxgap = uvm_rb_maxgap(brother); 396 if (maxgap < brother->maxgap) 397 maxgap = brother->maxgap; 398 } 399 400 parent->maxgap = maxgap; 401 entry = parent; 402 } 403 } 404 405 static void 406 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry) 407 { 408 struct vm_map_entry *ret __diagused; 409 410 entry->gap = entry->maxgap = uvm_rb_gap(entry); 411 if (entry->prev != &map->header) 412 entry->prev->gap = uvm_rb_gap(entry->prev); 413 414 ret = rb_tree_insert_node(&map->rb_tree, entry); 415 KASSERTMSG(ret == entry, 416 "uvm_rb_insert: map %p: duplicate entry %p", map, ret); 417 418 /* 419 * If the previous entry is not our immediate left child, then it's an 420 * ancestor and will be fixed up on the way to the root. We don't 421 * have to check entry->prev against &map->header since &map->header 422 * will never be in the tree. 423 */ 424 uvm_rb_fixup(map, 425 LEFT_ENTRY(entry) == entry->prev ? entry->prev : entry); 426 } 427 428 static void 429 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry) 430 { 431 struct vm_map_entry *prev_parent = NULL, *next_parent = NULL; 432 433 /* 434 * If we are removing an interior node, then an adjacent node will 435 * be used to replace its position in the tree. Therefore we will 436 * need to fixup the tree starting at the parent of the replacement 437 * node. So record their parents for later use. 438 */ 439 if (entry->prev != &map->header) 440 prev_parent = PARENT_ENTRY(map, entry->prev); 441 if (entry->next != &map->header) 442 next_parent = PARENT_ENTRY(map, entry->next); 443 444 rb_tree_remove_node(&map->rb_tree, entry); 445 446 /* 447 * If the previous node has a new parent, fixup the tree starting 448 * at the previous node's old parent. 449 */ 450 if (entry->prev != &map->header) { 451 /* 452 * Update the previous entry's gap due to our absence. 453 */ 454 entry->prev->gap = uvm_rb_gap(entry->prev); 455 uvm_rb_fixup(map, entry->prev); 456 if (prev_parent != NULL 457 && prev_parent != entry 458 && prev_parent != PARENT_ENTRY(map, entry->prev)) 459 uvm_rb_fixup(map, prev_parent); 460 } 461 462 /* 463 * If the next node has a new parent, fixup the tree starting 464 * at the next node's old parent. 465 */ 466 if (entry->next != &map->header) { 467 uvm_rb_fixup(map, entry->next); 468 if (next_parent != NULL 469 && next_parent != entry 470 && next_parent != PARENT_ENTRY(map, entry->next)) 471 uvm_rb_fixup(map, next_parent); 472 } 473 } 474 475 #if defined(DEBUG) 476 int uvm_debug_check_map = 0; 477 int uvm_debug_check_rbtree = 0; 478 #define uvm_map_check(map, name) \ 479 _uvm_map_check((map), (name), __FILE__, __LINE__) 480 static void 481 _uvm_map_check(struct vm_map *map, const char *name, 482 const char *file, int line) 483 { 484 485 if ((uvm_debug_check_map && _uvm_map_sanity(map)) || 486 (uvm_debug_check_rbtree && _uvm_tree_sanity(map))) { 487 panic("uvm_map_check failed: \"%s\" map=%p (%s:%d)", 488 name, map, file, line); 489 } 490 } 491 #else /* defined(DEBUG) */ 492 #define uvm_map_check(map, name) /* nothing */ 493 #endif /* defined(DEBUG) */ 494 495 #if defined(DEBUG) || defined(DDB) 496 int 497 _uvm_map_sanity(struct vm_map *map) 498 { 499 bool first_free_found = false; 500 bool hint_found = false; 501 const struct vm_map_entry *e; 502 struct vm_map_entry *hint = map->hint; 503 504 e = &map->header; 505 for (;;) { 506 if (map->first_free == e) { 507 first_free_found = true; 508 } else if (!first_free_found && e->next->start > e->end) { 509 printf("first_free %p should be %p\n", 510 map->first_free, e); 511 return -1; 512 } 513 if (hint == e) { 514 hint_found = true; 515 } 516 517 e = e->next; 518 if (e == &map->header) { 519 break; 520 } 521 } 522 if (!first_free_found) { 523 printf("stale first_free\n"); 524 return -1; 525 } 526 if (!hint_found) { 527 printf("stale hint\n"); 528 return -1; 529 } 530 return 0; 531 } 532 533 int 534 _uvm_tree_sanity(struct vm_map *map) 535 { 536 struct vm_map_entry *tmp, *trtmp; 537 int n = 0, i = 1; 538 539 for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { 540 if (tmp->gap != uvm_rb_gap(tmp)) { 541 printf("%d/%d gap %lx != %lx %s\n", 542 n + 1, map->nentries, 543 (ulong)tmp->gap, (ulong)uvm_rb_gap(tmp), 544 tmp->next == &map->header ? "(last)" : ""); 545 goto error; 546 } 547 /* 548 * If any entries are out of order, tmp->gap will be unsigned 549 * and will likely exceed the size of the map. 550 */ 551 if (tmp->gap >= vm_map_max(map) - vm_map_min(map)) { 552 printf("too large gap %zu\n", (size_t)tmp->gap); 553 goto error; 554 } 555 n++; 556 } 557 558 if (n != map->nentries) { 559 printf("nentries: %d vs %d\n", n, map->nentries); 560 goto error; 561 } 562 563 trtmp = NULL; 564 for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) { 565 if (tmp->maxgap != uvm_rb_maxgap(tmp)) { 566 printf("maxgap %lx != %lx\n", 567 (ulong)tmp->maxgap, 568 (ulong)uvm_rb_maxgap(tmp)); 569 goto error; 570 } 571 if (trtmp != NULL && trtmp->start >= tmp->start) { 572 printf("corrupt: 0x%"PRIxVADDR"x >= 0x%"PRIxVADDR"x\n", 573 trtmp->start, tmp->start); 574 goto error; 575 } 576 577 trtmp = tmp; 578 } 579 580 for (tmp = map->header.next; tmp != &map->header; 581 tmp = tmp->next, i++) { 582 trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_LEFT); 583 if (trtmp == NULL) 584 trtmp = &map->header; 585 if (tmp->prev != trtmp) { 586 printf("lookup: %d: %p->prev=%p: %p\n", 587 i, tmp, tmp->prev, trtmp); 588 goto error; 589 } 590 trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_RIGHT); 591 if (trtmp == NULL) 592 trtmp = &map->header; 593 if (tmp->next != trtmp) { 594 printf("lookup: %d: %p->next=%p: %p\n", 595 i, tmp, tmp->next, trtmp); 596 goto error; 597 } 598 trtmp = rb_tree_find_node(&map->rb_tree, &tmp->start); 599 if (trtmp != tmp) { 600 printf("lookup: %d: %p - %p: %p\n", i, tmp, trtmp, 601 PARENT_ENTRY(map, tmp)); 602 goto error; 603 } 604 } 605 606 return (0); 607 error: 608 return (-1); 609 } 610 #endif /* defined(DEBUG) || defined(DDB) */ 611 612 /* 613 * vm_map_lock: acquire an exclusive (write) lock on a map. 614 * 615 * => The locking protocol provides for guaranteed upgrade from shared -> 616 * exclusive by whichever thread currently has the map marked busy. 617 * See "LOCKING PROTOCOL NOTES" in uvm_map.h. This is horrible; among 618 * other problems, it defeats any fairness guarantees provided by RW 619 * locks. 620 */ 621 622 void 623 vm_map_lock(struct vm_map *map) 624 { 625 626 for (;;) { 627 rw_enter(&map->lock, RW_WRITER); 628 if (map->busy == NULL || map->busy == curlwp) { 629 break; 630 } 631 mutex_enter(&map->misc_lock); 632 rw_exit(&map->lock); 633 if (map->busy != NULL) { 634 cv_wait(&map->cv, &map->misc_lock); 635 } 636 mutex_exit(&map->misc_lock); 637 } 638 map->timestamp++; 639 } 640 641 /* 642 * vm_map_lock_try: try to lock a map, failing if it is already locked. 643 */ 644 645 bool 646 vm_map_lock_try(struct vm_map *map) 647 { 648 649 if (!rw_tryenter(&map->lock, RW_WRITER)) { 650 return false; 651 } 652 if (map->busy != NULL) { 653 rw_exit(&map->lock); 654 return false; 655 } 656 map->timestamp++; 657 return true; 658 } 659 660 /* 661 * vm_map_unlock: release an exclusive lock on a map. 662 */ 663 664 void 665 vm_map_unlock(struct vm_map *map) 666 { 667 668 KASSERT(rw_write_held(&map->lock)); 669 KASSERT(map->busy == NULL || map->busy == curlwp); 670 rw_exit(&map->lock); 671 } 672 673 /* 674 * vm_map_unbusy: mark the map as unbusy, and wake any waiters that 675 * want an exclusive lock. 676 */ 677 678 void 679 vm_map_unbusy(struct vm_map *map) 680 { 681 682 KASSERT(map->busy == curlwp); 683 684 /* 685 * Safe to clear 'busy' and 'waiters' with only a read lock held: 686 * 687 * o they can only be set with a write lock held 688 * o writers are blocked out with a read or write hold 689 * o at any time, only one thread owns the set of values 690 */ 691 mutex_enter(&map->misc_lock); 692 map->busy = NULL; 693 cv_broadcast(&map->cv); 694 mutex_exit(&map->misc_lock); 695 } 696 697 /* 698 * vm_map_lock_read: acquire a shared (read) lock on a map. 699 */ 700 701 void 702 vm_map_lock_read(struct vm_map *map) 703 { 704 705 rw_enter(&map->lock, RW_READER); 706 } 707 708 /* 709 * vm_map_unlock_read: release a shared lock on a map. 710 */ 711 712 void 713 vm_map_unlock_read(struct vm_map *map) 714 { 715 716 rw_exit(&map->lock); 717 } 718 719 /* 720 * vm_map_busy: mark a map as busy. 721 * 722 * => the caller must hold the map write locked 723 */ 724 725 void 726 vm_map_busy(struct vm_map *map) 727 { 728 729 KASSERT(rw_write_held(&map->lock)); 730 KASSERT(map->busy == NULL); 731 732 map->busy = curlwp; 733 } 734 735 /* 736 * vm_map_locked_p: return true if the map is write locked. 737 * 738 * => only for debug purposes like KASSERTs. 739 * => should not be used to verify that a map is not locked. 740 */ 741 742 bool 743 vm_map_locked_p(struct vm_map *map) 744 { 745 746 return rw_write_held(&map->lock); 747 } 748 749 /* 750 * uvm_mapent_alloc: allocate a map entry 751 */ 752 753 static struct vm_map_entry * 754 uvm_mapent_alloc(struct vm_map *map, int flags) 755 { 756 struct vm_map_entry *me; 757 int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK; 758 UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist); 759 760 me = pool_cache_get(&uvm_map_entry_cache, pflags); 761 if (__predict_false(me == NULL)) { 762 return NULL; 763 } 764 me->flags = 0; 765 766 UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", me, 767 (map == kernel_map), 0, 0); 768 return me; 769 } 770 771 /* 772 * uvm_mapent_free: free map entry 773 */ 774 775 static void 776 uvm_mapent_free(struct vm_map_entry *me) 777 { 778 UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist); 779 780 UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]", 781 me, me->flags, 0, 0); 782 pool_cache_put(&uvm_map_entry_cache, me); 783 } 784 785 /* 786 * uvm_mapent_copy: copy a map entry, preserving flags 787 */ 788 789 static inline void 790 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 791 { 792 793 memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) - 794 ((char *)src)); 795 } 796 797 #if defined(DEBUG) 798 static void 799 _uvm_mapent_check(const struct vm_map_entry *entry, const char *file, int line) 800 { 801 802 if (entry->start >= entry->end) { 803 goto bad; 804 } 805 if (UVM_ET_ISOBJ(entry)) { 806 if (entry->object.uvm_obj == NULL) { 807 goto bad; 808 } 809 } else if (UVM_ET_ISSUBMAP(entry)) { 810 if (entry->object.sub_map == NULL) { 811 goto bad; 812 } 813 } else { 814 if (entry->object.uvm_obj != NULL || 815 entry->object.sub_map != NULL) { 816 goto bad; 817 } 818 } 819 if (!UVM_ET_ISOBJ(entry)) { 820 if (entry->offset != 0) { 821 goto bad; 822 } 823 } 824 825 return; 826 827 bad: 828 panic("%s: bad entry %p (%s:%d)", __func__, entry, file, line); 829 } 830 #endif /* defined(DEBUG) */ 831 832 /* 833 * uvm_map_entry_unwire: unwire a map entry 834 * 835 * => map should be locked by caller 836 */ 837 838 static inline void 839 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry) 840 { 841 842 entry->wired_count = 0; 843 uvm_fault_unwire_locked(map, entry->start, entry->end); 844 } 845 846 847 /* 848 * wrapper for calling amap_ref() 849 */ 850 static inline void 851 uvm_map_reference_amap(struct vm_map_entry *entry, int flags) 852 { 853 854 amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff, 855 (entry->end - entry->start) >> PAGE_SHIFT, flags); 856 } 857 858 859 /* 860 * wrapper for calling amap_unref() 861 */ 862 static inline void 863 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) 864 { 865 866 amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff, 867 (entry->end - entry->start) >> PAGE_SHIFT, flags); 868 } 869 870 871 /* 872 * uvm_map_init: init mapping system at boot time. 873 */ 874 875 void 876 uvm_map_init(void) 877 { 878 #if defined(UVMHIST) 879 static struct kern_history_ent pdhistbuf[100]; 880 #endif 881 882 /* 883 * first, init logging system. 884 */ 885 886 UVMHIST_FUNC("uvm_map_init"); 887 UVMHIST_LINK_STATIC(maphist); 888 UVMHIST_INIT_STATIC(pdhist, pdhistbuf); 889 UVMHIST_CALLED(maphist); 890 UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0); 891 892 /* 893 * initialize the global lock for kernel map entry. 894 */ 895 896 mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM); 897 } 898 899 /* 900 * uvm_map_init_caches: init mapping system caches. 901 */ 902 void 903 uvm_map_init_caches(void) 904 { 905 /* 906 * initialize caches. 907 */ 908 909 pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry), 910 0, 0, 0, "vmmpepl", NULL, IPL_NONE, NULL, NULL, NULL); 911 pool_cache_bootstrap(&uvm_vmspace_cache, sizeof(struct vmspace), 912 0, 0, 0, "vmsppl", NULL, IPL_NONE, NULL, NULL, NULL); 913 } 914 915 /* 916 * clippers 917 */ 918 919 /* 920 * uvm_mapent_splitadj: adjust map entries for splitting, after uvm_mapent_copy. 921 */ 922 923 static void 924 uvm_mapent_splitadj(struct vm_map_entry *entry1, struct vm_map_entry *entry2, 925 vaddr_t splitat) 926 { 927 vaddr_t adj; 928 929 KASSERT(entry1->start < splitat); 930 KASSERT(splitat < entry1->end); 931 932 adj = splitat - entry1->start; 933 entry1->end = entry2->start = splitat; 934 935 if (entry1->aref.ar_amap) { 936 amap_splitref(&entry1->aref, &entry2->aref, adj); 937 } 938 if (UVM_ET_ISSUBMAP(entry1)) { 939 /* ... unlikely to happen, but play it safe */ 940 uvm_map_reference(entry1->object.sub_map); 941 } else if (UVM_ET_ISOBJ(entry1)) { 942 KASSERT(entry1->object.uvm_obj != NULL); /* suppress coverity */ 943 entry2->offset += adj; 944 if (entry1->object.uvm_obj->pgops && 945 entry1->object.uvm_obj->pgops->pgo_reference) 946 entry1->object.uvm_obj->pgops->pgo_reference( 947 entry1->object.uvm_obj); 948 } 949 } 950 951 /* 952 * uvm_map_clip_start: ensure that the entry begins at or after 953 * the starting address, if it doesn't we split the entry. 954 * 955 * => caller should use UVM_MAP_CLIP_START macro rather than calling 956 * this directly 957 * => map must be locked by caller 958 */ 959 960 void 961 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, 962 vaddr_t start) 963 { 964 struct vm_map_entry *new_entry; 965 966 /* uvm_map_simplify_entry(map, entry); */ /* XXX */ 967 968 uvm_map_check(map, "clip_start entry"); 969 uvm_mapent_check(entry); 970 971 /* 972 * Split off the front portion. note that we must insert the new 973 * entry BEFORE this one, so that this entry has the specified 974 * starting address. 975 */ 976 new_entry = uvm_mapent_alloc(map, 0); 977 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 978 uvm_mapent_splitadj(new_entry, entry, start); 979 uvm_map_entry_link(map, entry->prev, new_entry); 980 981 uvm_map_check(map, "clip_start leave"); 982 } 983 984 /* 985 * uvm_map_clip_end: ensure that the entry ends at or before 986 * the ending address, if it does't we split the reference 987 * 988 * => caller should use UVM_MAP_CLIP_END macro rather than calling 989 * this directly 990 * => map must be locked by caller 991 */ 992 993 void 994 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end) 995 { 996 struct vm_map_entry *new_entry; 997 998 uvm_map_check(map, "clip_end entry"); 999 uvm_mapent_check(entry); 1000 1001 /* 1002 * Create a new entry and insert it 1003 * AFTER the specified entry 1004 */ 1005 new_entry = uvm_mapent_alloc(map, 0); 1006 uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */ 1007 uvm_mapent_splitadj(entry, new_entry, end); 1008 uvm_map_entry_link(map, entry, new_entry); 1009 1010 uvm_map_check(map, "clip_end leave"); 1011 } 1012 1013 /* 1014 * M A P - m a i n e n t r y p o i n t 1015 */ 1016 /* 1017 * uvm_map: establish a valid mapping in a map 1018 * 1019 * => assume startp is page aligned. 1020 * => assume size is a multiple of PAGE_SIZE. 1021 * => assume sys_mmap provides enough of a "hint" to have us skip 1022 * over text/data/bss area. 1023 * => map must be unlocked (we will lock it) 1024 * => <uobj,uoffset> value meanings (4 cases): 1025 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1026 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1027 * [3] <uobj,uoffset> == normal mapping 1028 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1029 * 1030 * case [4] is for kernel mappings where we don't know the offset until 1031 * we've found a virtual address. note that kernel object offsets are 1032 * always relative to vm_map_min(kernel_map). 1033 * 1034 * => if `align' is non-zero, we align the virtual address to the specified 1035 * alignment. 1036 * this is provided as a mechanism for large pages. 1037 * 1038 * => XXXCDC: need way to map in external amap? 1039 */ 1040 1041 int 1042 uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size, 1043 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags) 1044 { 1045 struct uvm_map_args args; 1046 struct vm_map_entry *new_entry; 1047 int error; 1048 1049 KASSERT((size & PAGE_MASK) == 0); 1050 1051 #ifndef __USER_VA0_IS_SAFE 1052 if ((flags & UVM_FLAG_FIXED) && *startp == 0 && 1053 !VM_MAP_IS_KERNEL(map) && user_va0_disable) 1054 return EACCES; 1055 #endif 1056 1057 /* 1058 * for pager_map, allocate the new entry first to avoid sleeping 1059 * for memory while we have the map locked. 1060 */ 1061 1062 new_entry = NULL; 1063 if (map == pager_map) { 1064 new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT)); 1065 if (__predict_false(new_entry == NULL)) 1066 return ENOMEM; 1067 } 1068 if (map == pager_map) 1069 flags |= UVM_FLAG_NOMERGE; 1070 1071 error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align, 1072 flags, &args); 1073 if (!error) { 1074 error = uvm_map_enter(map, &args, new_entry); 1075 *startp = args.uma_start; 1076 } else if (new_entry) { 1077 uvm_mapent_free(new_entry); 1078 } 1079 1080 #if defined(DEBUG) 1081 if (!error && VM_MAP_IS_KERNEL(map)) { 1082 uvm_km_check_empty(map, *startp, *startp + size); 1083 } 1084 #endif /* defined(DEBUG) */ 1085 1086 return error; 1087 } 1088 1089 /* 1090 * uvm_map_prepare: 1091 * 1092 * called with map unlocked. 1093 * on success, returns the map locked. 1094 */ 1095 1096 int 1097 uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size, 1098 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags, 1099 struct uvm_map_args *args) 1100 { 1101 struct vm_map_entry *prev_entry; 1102 vm_prot_t prot = UVM_PROTECTION(flags); 1103 vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1104 1105 UVMHIST_FUNC("uvm_map_prepare"); 1106 UVMHIST_CALLED(maphist); 1107 1108 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)", 1109 map, start, size, flags); 1110 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0); 1111 1112 /* 1113 * detect a popular device driver bug. 1114 */ 1115 1116 KASSERT(doing_shutdown || curlwp != NULL); 1117 1118 /* 1119 * zero-sized mapping doesn't make any sense. 1120 */ 1121 KASSERT(size > 0); 1122 1123 KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0); 1124 1125 uvm_map_check(map, "map entry"); 1126 1127 /* 1128 * check sanity of protection code 1129 */ 1130 1131 if ((prot & maxprot) != prot) { 1132 UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%x, max=0x%x", 1133 prot, maxprot,0,0); 1134 return EACCES; 1135 } 1136 1137 /* 1138 * figure out where to put new VM range 1139 */ 1140 retry: 1141 if (vm_map_lock_try(map) == false) { 1142 if ((flags & UVM_FLAG_TRYLOCK) != 0) { 1143 return EAGAIN; 1144 } 1145 vm_map_lock(map); /* could sleep here */ 1146 } 1147 prev_entry = uvm_map_findspace(map, start, size, &start, 1148 uobj, uoffset, align, flags); 1149 if (prev_entry == NULL) { 1150 unsigned int timestamp; 1151 1152 timestamp = map->timestamp; 1153 UVMHIST_LOG(maphist,"waiting va timestamp=0x%x", 1154 timestamp,0,0,0); 1155 map->flags |= VM_MAP_WANTVA; 1156 vm_map_unlock(map); 1157 1158 /* 1159 * try to reclaim kva and wait until someone does unmap. 1160 * fragile locking here, so we awaken every second to 1161 * recheck the condition. 1162 */ 1163 1164 mutex_enter(&map->misc_lock); 1165 while ((map->flags & VM_MAP_WANTVA) != 0 && 1166 map->timestamp == timestamp) { 1167 if ((flags & UVM_FLAG_WAITVA) == 0) { 1168 mutex_exit(&map->misc_lock); 1169 UVMHIST_LOG(maphist, 1170 "<- uvm_map_findspace failed!", 0,0,0,0); 1171 return ENOMEM; 1172 } else { 1173 cv_timedwait(&map->cv, &map->misc_lock, hz); 1174 } 1175 } 1176 mutex_exit(&map->misc_lock); 1177 goto retry; 1178 } 1179 1180 #ifdef PMAP_GROWKERNEL 1181 /* 1182 * If the kernel pmap can't map the requested space, 1183 * then allocate more resources for it. 1184 */ 1185 if (map == kernel_map && uvm_maxkaddr < (start + size)) 1186 uvm_maxkaddr = pmap_growkernel(start + size); 1187 #endif 1188 1189 UVMMAP_EVCNT_INCR(map_call); 1190 1191 /* 1192 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER 1193 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in 1194 * either case we want to zero it before storing it in the map entry 1195 * (because it looks strange and confusing when debugging...) 1196 * 1197 * if uobj is not null 1198 * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping 1199 * and we do not need to change uoffset. 1200 * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset 1201 * now (based on the starting address of the map). this case is 1202 * for kernel object mappings where we don't know the offset until 1203 * the virtual address is found (with uvm_map_findspace). the 1204 * offset is the distance we are from the start of the map. 1205 */ 1206 1207 if (uobj == NULL) { 1208 uoffset = 0; 1209 } else { 1210 if (uoffset == UVM_UNKNOWN_OFFSET) { 1211 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1212 uoffset = start - vm_map_min(kernel_map); 1213 } 1214 } 1215 1216 args->uma_flags = flags; 1217 args->uma_prev = prev_entry; 1218 args->uma_start = start; 1219 args->uma_size = size; 1220 args->uma_uobj = uobj; 1221 args->uma_uoffset = uoffset; 1222 1223 UVMHIST_LOG(maphist, "<- done!", 0,0,0,0); 1224 return 0; 1225 } 1226 1227 /* 1228 * uvm_map_enter: 1229 * 1230 * called with map locked. 1231 * unlock the map before returning. 1232 */ 1233 1234 int 1235 uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args, 1236 struct vm_map_entry *new_entry) 1237 { 1238 struct vm_map_entry *prev_entry = args->uma_prev; 1239 struct vm_map_entry *dead = NULL; 1240 1241 const uvm_flag_t flags = args->uma_flags; 1242 const vm_prot_t prot = UVM_PROTECTION(flags); 1243 const vm_prot_t maxprot = UVM_MAXPROTECTION(flags); 1244 const vm_inherit_t inherit = UVM_INHERIT(flags); 1245 const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ? 1246 AMAP_EXTEND_NOWAIT : 0; 1247 const int advice = UVM_ADVICE(flags); 1248 1249 vaddr_t start = args->uma_start; 1250 vsize_t size = args->uma_size; 1251 struct uvm_object *uobj = args->uma_uobj; 1252 voff_t uoffset = args->uma_uoffset; 1253 1254 const int kmap = (vm_map_pmap(map) == pmap_kernel()); 1255 int merged = 0; 1256 int error; 1257 int newetype; 1258 1259 UVMHIST_FUNC("uvm_map_enter"); 1260 UVMHIST_CALLED(maphist); 1261 1262 UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)", 1263 map, start, size, flags); 1264 UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0); 1265 1266 KASSERT(map->hint == prev_entry); /* bimerge case assumes this */ 1267 KASSERT(vm_map_locked_p(map)); 1268 1269 if (uobj) 1270 newetype = UVM_ET_OBJ; 1271 else 1272 newetype = 0; 1273 1274 if (flags & UVM_FLAG_COPYONW) { 1275 newetype |= UVM_ET_COPYONWRITE; 1276 if ((flags & UVM_FLAG_OVERLAY) == 0) 1277 newetype |= UVM_ET_NEEDSCOPY; 1278 } 1279 1280 /* 1281 * try and insert in map by extending previous entry, if possible. 1282 * XXX: we don't try and pull back the next entry. might be useful 1283 * for a stack, but we are currently allocating our stack in advance. 1284 */ 1285 1286 if (flags & UVM_FLAG_NOMERGE) 1287 goto nomerge; 1288 1289 if (prev_entry->end == start && 1290 prev_entry != &map->header && 1291 UVM_ET_ISCOMPATIBLE(prev_entry, newetype, uobj, 0, 1292 prot, maxprot, inherit, advice, 0)) { 1293 1294 if (uobj && prev_entry->offset + 1295 (prev_entry->end - prev_entry->start) != uoffset) 1296 goto forwardmerge; 1297 1298 /* 1299 * can't extend a shared amap. note: no need to lock amap to 1300 * look at refs since we don't care about its exact value. 1301 * if it is one (i.e. we have only reference) it will stay there 1302 */ 1303 1304 if (prev_entry->aref.ar_amap && 1305 amap_refs(prev_entry->aref.ar_amap) != 1) { 1306 goto forwardmerge; 1307 } 1308 1309 if (prev_entry->aref.ar_amap) { 1310 error = amap_extend(prev_entry, size, 1311 amapwaitflag | AMAP_EXTEND_FORWARDS); 1312 if (error) 1313 goto nomerge; 1314 } 1315 1316 if (kmap) { 1317 UVMMAP_EVCNT_INCR(kbackmerge); 1318 } else { 1319 UVMMAP_EVCNT_INCR(ubackmerge); 1320 } 1321 UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0); 1322 1323 /* 1324 * drop our reference to uobj since we are extending a reference 1325 * that we already have (the ref count can not drop to zero). 1326 */ 1327 1328 if (uobj && uobj->pgops->pgo_detach) 1329 uobj->pgops->pgo_detach(uobj); 1330 1331 /* 1332 * Now that we've merged the entries, note that we've grown 1333 * and our gap has shrunk. Then fix the tree. 1334 */ 1335 prev_entry->end += size; 1336 prev_entry->gap -= size; 1337 uvm_rb_fixup(map, prev_entry); 1338 1339 uvm_map_check(map, "map backmerged"); 1340 1341 UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0); 1342 merged++; 1343 } 1344 1345 forwardmerge: 1346 if (prev_entry->next->start == (start + size) && 1347 prev_entry->next != &map->header && 1348 UVM_ET_ISCOMPATIBLE(prev_entry->next, newetype, uobj, 0, 1349 prot, maxprot, inherit, advice, 0)) { 1350 1351 if (uobj && prev_entry->next->offset != uoffset + size) 1352 goto nomerge; 1353 1354 /* 1355 * can't extend a shared amap. note: no need to lock amap to 1356 * look at refs since we don't care about its exact value. 1357 * if it is one (i.e. we have only reference) it will stay there. 1358 * 1359 * note that we also can't merge two amaps, so if we 1360 * merged with the previous entry which has an amap, 1361 * and the next entry also has an amap, we give up. 1362 * 1363 * Interesting cases: 1364 * amap, new, amap -> give up second merge (single fwd extend) 1365 * amap, new, none -> double forward extend (extend again here) 1366 * none, new, amap -> double backward extend (done here) 1367 * uobj, new, amap -> single backward extend (done here) 1368 * 1369 * XXX should we attempt to deal with someone refilling 1370 * the deallocated region between two entries that are 1371 * backed by the same amap (ie, arefs is 2, "prev" and 1372 * "next" refer to it, and adding this allocation will 1373 * close the hole, thus restoring arefs to 1 and 1374 * deallocating the "next" vm_map_entry)? -- @@@ 1375 */ 1376 1377 if (prev_entry->next->aref.ar_amap && 1378 (amap_refs(prev_entry->next->aref.ar_amap) != 1 || 1379 (merged && prev_entry->aref.ar_amap))) { 1380 goto nomerge; 1381 } 1382 1383 if (merged) { 1384 /* 1385 * Try to extend the amap of the previous entry to 1386 * cover the next entry as well. If it doesn't work 1387 * just skip on, don't actually give up, since we've 1388 * already completed the back merge. 1389 */ 1390 if (prev_entry->aref.ar_amap) { 1391 if (amap_extend(prev_entry, 1392 prev_entry->next->end - 1393 prev_entry->next->start, 1394 amapwaitflag | AMAP_EXTEND_FORWARDS)) 1395 goto nomerge; 1396 } 1397 1398 /* 1399 * Try to extend the amap of the *next* entry 1400 * back to cover the new allocation *and* the 1401 * previous entry as well (the previous merge 1402 * didn't have an amap already otherwise we 1403 * wouldn't be checking here for an amap). If 1404 * it doesn't work just skip on, again, don't 1405 * actually give up, since we've already 1406 * completed the back merge. 1407 */ 1408 else if (prev_entry->next->aref.ar_amap) { 1409 if (amap_extend(prev_entry->next, 1410 prev_entry->end - 1411 prev_entry->start, 1412 amapwaitflag | AMAP_EXTEND_BACKWARDS)) 1413 goto nomerge; 1414 } 1415 } else { 1416 /* 1417 * Pull the next entry's amap backwards to cover this 1418 * new allocation. 1419 */ 1420 if (prev_entry->next->aref.ar_amap) { 1421 error = amap_extend(prev_entry->next, size, 1422 amapwaitflag | AMAP_EXTEND_BACKWARDS); 1423 if (error) 1424 goto nomerge; 1425 } 1426 } 1427 1428 if (merged) { 1429 if (kmap) { 1430 UVMMAP_EVCNT_DECR(kbackmerge); 1431 UVMMAP_EVCNT_INCR(kbimerge); 1432 } else { 1433 UVMMAP_EVCNT_DECR(ubackmerge); 1434 UVMMAP_EVCNT_INCR(ubimerge); 1435 } 1436 } else { 1437 if (kmap) { 1438 UVMMAP_EVCNT_INCR(kforwmerge); 1439 } else { 1440 UVMMAP_EVCNT_INCR(uforwmerge); 1441 } 1442 } 1443 UVMHIST_LOG(maphist," starting forward merge", 0, 0, 0, 0); 1444 1445 /* 1446 * drop our reference to uobj since we are extending a reference 1447 * that we already have (the ref count can not drop to zero). 1448 */ 1449 if (uobj && uobj->pgops->pgo_detach) 1450 uobj->pgops->pgo_detach(uobj); 1451 1452 if (merged) { 1453 dead = prev_entry->next; 1454 prev_entry->end = dead->end; 1455 uvm_map_entry_unlink(map, dead); 1456 if (dead->aref.ar_amap != NULL) { 1457 prev_entry->aref = dead->aref; 1458 dead->aref.ar_amap = NULL; 1459 } 1460 } else { 1461 prev_entry->next->start -= size; 1462 if (prev_entry != &map->header) { 1463 prev_entry->gap -= size; 1464 KASSERT(prev_entry->gap == uvm_rb_gap(prev_entry)); 1465 uvm_rb_fixup(map, prev_entry); 1466 } 1467 if (uobj) 1468 prev_entry->next->offset = uoffset; 1469 } 1470 1471 uvm_map_check(map, "map forwardmerged"); 1472 1473 UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0); 1474 merged++; 1475 } 1476 1477 nomerge: 1478 if (!merged) { 1479 UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0); 1480 if (kmap) { 1481 UVMMAP_EVCNT_INCR(knomerge); 1482 } else { 1483 UVMMAP_EVCNT_INCR(unomerge); 1484 } 1485 1486 /* 1487 * allocate new entry and link it in. 1488 */ 1489 1490 if (new_entry == NULL) { 1491 new_entry = uvm_mapent_alloc(map, 1492 (flags & UVM_FLAG_NOWAIT)); 1493 if (__predict_false(new_entry == NULL)) { 1494 error = ENOMEM; 1495 goto done; 1496 } 1497 } 1498 new_entry->start = start; 1499 new_entry->end = new_entry->start + size; 1500 new_entry->object.uvm_obj = uobj; 1501 new_entry->offset = uoffset; 1502 1503 new_entry->etype = newetype; 1504 1505 if (flags & UVM_FLAG_NOMERGE) { 1506 new_entry->flags |= UVM_MAP_NOMERGE; 1507 } 1508 1509 new_entry->protection = prot; 1510 new_entry->max_protection = maxprot; 1511 new_entry->inheritance = inherit; 1512 new_entry->wired_count = 0; 1513 new_entry->advice = advice; 1514 if (flags & UVM_FLAG_OVERLAY) { 1515 1516 /* 1517 * to_add: for BSS we overallocate a little since we 1518 * are likely to extend 1519 */ 1520 1521 vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? 1522 UVM_AMAP_CHUNK << PAGE_SHIFT : 0; 1523 struct vm_amap *amap = amap_alloc(size, to_add, 1524 (flags & UVM_FLAG_NOWAIT)); 1525 if (__predict_false(amap == NULL)) { 1526 error = ENOMEM; 1527 goto done; 1528 } 1529 new_entry->aref.ar_pageoff = 0; 1530 new_entry->aref.ar_amap = amap; 1531 } else { 1532 new_entry->aref.ar_pageoff = 0; 1533 new_entry->aref.ar_amap = NULL; 1534 } 1535 uvm_map_entry_link(map, prev_entry, new_entry); 1536 1537 /* 1538 * Update the free space hint 1539 */ 1540 1541 if ((map->first_free == prev_entry) && 1542 (prev_entry->end >= new_entry->start)) 1543 map->first_free = new_entry; 1544 1545 new_entry = NULL; 1546 } 1547 1548 map->size += size; 1549 1550 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1551 1552 error = 0; 1553 done: 1554 vm_map_unlock(map); 1555 1556 if (new_entry) { 1557 uvm_mapent_free(new_entry); 1558 } 1559 1560 if (dead) { 1561 KDASSERT(merged); 1562 uvm_mapent_free(dead); 1563 } 1564 1565 return error; 1566 } 1567 1568 /* 1569 * uvm_map_lookup_entry_bytree: lookup an entry in tree 1570 */ 1571 1572 static inline bool 1573 uvm_map_lookup_entry_bytree(struct vm_map *map, vaddr_t address, 1574 struct vm_map_entry **entry /* OUT */) 1575 { 1576 struct vm_map_entry *prev = &map->header; 1577 struct vm_map_entry *cur = ROOT_ENTRY(map); 1578 1579 while (cur) { 1580 UVMMAP_EVCNT_INCR(mlk_treeloop); 1581 if (address >= cur->start) { 1582 if (address < cur->end) { 1583 *entry = cur; 1584 return true; 1585 } 1586 prev = cur; 1587 cur = RIGHT_ENTRY(cur); 1588 } else 1589 cur = LEFT_ENTRY(cur); 1590 } 1591 *entry = prev; 1592 return false; 1593 } 1594 1595 /* 1596 * uvm_map_lookup_entry: find map entry at or before an address 1597 * 1598 * => map must at least be read-locked by caller 1599 * => entry is returned in "entry" 1600 * => return value is true if address is in the returned entry 1601 */ 1602 1603 bool 1604 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1605 struct vm_map_entry **entry /* OUT */) 1606 { 1607 struct vm_map_entry *cur; 1608 bool use_tree = false; 1609 UVMHIST_FUNC("uvm_map_lookup_entry"); 1610 UVMHIST_CALLED(maphist); 1611 1612 UVMHIST_LOG(maphist,"(map=0x%x,addr=0x%x,ent=0x%x)", 1613 map, address, entry, 0); 1614 1615 /* 1616 * start looking either from the head of the 1617 * list, or from the hint. 1618 */ 1619 1620 cur = map->hint; 1621 1622 if (cur == &map->header) 1623 cur = cur->next; 1624 1625 UVMMAP_EVCNT_INCR(mlk_call); 1626 if (address >= cur->start) { 1627 1628 /* 1629 * go from hint to end of list. 1630 * 1631 * but first, make a quick check to see if 1632 * we are already looking at the entry we 1633 * want (which is usually the case). 1634 * note also that we don't need to save the hint 1635 * here... it is the same hint (unless we are 1636 * at the header, in which case the hint didn't 1637 * buy us anything anyway). 1638 */ 1639 1640 if (cur != &map->header && cur->end > address) { 1641 UVMMAP_EVCNT_INCR(mlk_hint); 1642 *entry = cur; 1643 UVMHIST_LOG(maphist,"<- got it via hint (0x%x)", 1644 cur, 0, 0, 0); 1645 uvm_mapent_check(*entry); 1646 return (true); 1647 } 1648 1649 if (map->nentries > 15) 1650 use_tree = true; 1651 } else { 1652 1653 /* 1654 * invalid hint. use tree. 1655 */ 1656 use_tree = true; 1657 } 1658 1659 uvm_map_check(map, __func__); 1660 1661 if (use_tree) { 1662 /* 1663 * Simple lookup in the tree. Happens when the hint is 1664 * invalid, or nentries reach a threshold. 1665 */ 1666 UVMMAP_EVCNT_INCR(mlk_tree); 1667 if (uvm_map_lookup_entry_bytree(map, address, entry)) { 1668 goto got; 1669 } else { 1670 goto failed; 1671 } 1672 } 1673 1674 /* 1675 * search linearly 1676 */ 1677 1678 UVMMAP_EVCNT_INCR(mlk_list); 1679 while (cur != &map->header) { 1680 UVMMAP_EVCNT_INCR(mlk_listloop); 1681 if (cur->end > address) { 1682 if (address >= cur->start) { 1683 /* 1684 * save this lookup for future 1685 * hints, and return 1686 */ 1687 1688 *entry = cur; 1689 got: 1690 SAVE_HINT(map, map->hint, *entry); 1691 UVMHIST_LOG(maphist,"<- search got it (0x%x)", 1692 cur, 0, 0, 0); 1693 KDASSERT((*entry)->start <= address); 1694 KDASSERT(address < (*entry)->end); 1695 uvm_mapent_check(*entry); 1696 return (true); 1697 } 1698 break; 1699 } 1700 cur = cur->next; 1701 } 1702 *entry = cur->prev; 1703 failed: 1704 SAVE_HINT(map, map->hint, *entry); 1705 UVMHIST_LOG(maphist,"<- failed!",0,0,0,0); 1706 KDASSERT((*entry) == &map->header || (*entry)->end <= address); 1707 KDASSERT((*entry)->next == &map->header || 1708 address < (*entry)->next->start); 1709 return (false); 1710 } 1711 1712 /* 1713 * See if the range between start and start + length fits in the gap 1714 * entry->next->start and entry->end. Returns 1 if fits, 0 if doesn't 1715 * fit, and -1 address wraps around. 1716 */ 1717 static int 1718 uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset, 1719 vsize_t align, int flags, int topdown, struct vm_map_entry *entry) 1720 { 1721 vaddr_t end; 1722 1723 #ifdef PMAP_PREFER 1724 /* 1725 * push start address forward as needed to avoid VAC alias problems. 1726 * we only do this if a valid offset is specified. 1727 */ 1728 1729 if (uoffset != UVM_UNKNOWN_OFFSET) 1730 PMAP_PREFER(uoffset, start, length, topdown); 1731 #endif 1732 if ((flags & UVM_FLAG_COLORMATCH) != 0) { 1733 KASSERT(align < uvmexp.ncolors); 1734 if (uvmexp.ncolors > 1) { 1735 const u_int colormask = uvmexp.colormask; 1736 const u_int colorsize = colormask + 1; 1737 vaddr_t hint = atop(*start); 1738 const u_int color = hint & colormask; 1739 if (color != align) { 1740 hint -= color; /* adjust to color boundary */ 1741 KASSERT((hint & colormask) == 0); 1742 if (topdown) { 1743 if (align > color) 1744 hint -= colorsize; 1745 } else { 1746 if (align < color) 1747 hint += colorsize; 1748 } 1749 *start = ptoa(hint + align); /* adjust to color */ 1750 } 1751 } 1752 } else if (align != 0) { 1753 if ((*start & (align - 1)) != 0) { 1754 if (topdown) 1755 *start &= ~(align - 1); 1756 else 1757 *start = roundup(*start, align); 1758 } 1759 /* 1760 * XXX Should we PMAP_PREFER() here again? 1761 * eh...i think we're okay 1762 */ 1763 } 1764 1765 /* 1766 * Find the end of the proposed new region. Be sure we didn't 1767 * wrap around the address; if so, we lose. Otherwise, if the 1768 * proposed new region fits before the next entry, we win. 1769 */ 1770 1771 end = *start + length; 1772 if (end < *start) 1773 return (-1); 1774 1775 if (entry->next->start >= end && *start >= entry->end) 1776 return (1); 1777 1778 return (0); 1779 } 1780 1781 /* 1782 * uvm_map_findspace: find "length" sized space in "map". 1783 * 1784 * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is 1785 * set in "flags" (in which case we insist on using "hint"). 1786 * => "result" is VA returned 1787 * => uobj/uoffset are to be used to handle VAC alignment, if required 1788 * => if "align" is non-zero, we attempt to align to that value. 1789 * => caller must at least have read-locked map 1790 * => returns NULL on failure, or pointer to prev. map entry if success 1791 * => note this is a cross between the old vm_map_findspace and vm_map_find 1792 */ 1793 1794 struct vm_map_entry * 1795 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length, 1796 vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset, 1797 vsize_t align, int flags) 1798 { 1799 struct vm_map_entry *entry; 1800 struct vm_map_entry *child, *prev, *tmp; 1801 vaddr_t orig_hint __diagused; 1802 const int topdown = map->flags & VM_MAP_TOPDOWN; 1803 UVMHIST_FUNC("uvm_map_findspace"); 1804 UVMHIST_CALLED(maphist); 1805 1806 UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)", 1807 map, hint, length, flags); 1808 KASSERT((flags & UVM_FLAG_COLORMATCH) != 0 || (align & (align - 1)) == 0); 1809 KASSERT((flags & UVM_FLAG_COLORMATCH) == 0 || align < uvmexp.ncolors); 1810 KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0); 1811 1812 uvm_map_check(map, "map_findspace entry"); 1813 1814 /* 1815 * remember the original hint. if we are aligning, then we 1816 * may have to try again with no alignment constraint if 1817 * we fail the first time. 1818 */ 1819 1820 orig_hint = hint; 1821 if (hint < vm_map_min(map)) { /* check ranges ... */ 1822 if (flags & UVM_FLAG_FIXED) { 1823 UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0); 1824 return (NULL); 1825 } 1826 hint = vm_map_min(map); 1827 } 1828 if (hint > vm_map_max(map)) { 1829 UVMHIST_LOG(maphist,"<- VA 0x%x > range [0x%x->0x%x]", 1830 hint, vm_map_min(map), vm_map_max(map), 0); 1831 return (NULL); 1832 } 1833 1834 /* 1835 * Look for the first possible address; if there's already 1836 * something at this address, we have to start after it. 1837 */ 1838 1839 /* 1840 * @@@: there are four, no, eight cases to consider. 1841 * 1842 * 0: found, fixed, bottom up -> fail 1843 * 1: found, fixed, top down -> fail 1844 * 2: found, not fixed, bottom up -> start after entry->end, 1845 * loop up 1846 * 3: found, not fixed, top down -> start before entry->start, 1847 * loop down 1848 * 4: not found, fixed, bottom up -> check entry->next->start, fail 1849 * 5: not found, fixed, top down -> check entry->next->start, fail 1850 * 6: not found, not fixed, bottom up -> check entry->next->start, 1851 * loop up 1852 * 7: not found, not fixed, top down -> check entry->next->start, 1853 * loop down 1854 * 1855 * as you can see, it reduces to roughly five cases, and that 1856 * adding top down mapping only adds one unique case (without 1857 * it, there would be four cases). 1858 */ 1859 1860 if ((flags & UVM_FLAG_FIXED) == 0 && hint == vm_map_min(map)) { 1861 entry = map->first_free; 1862 } else { 1863 if (uvm_map_lookup_entry(map, hint, &entry)) { 1864 /* "hint" address already in use ... */ 1865 if (flags & UVM_FLAG_FIXED) { 1866 UVMHIST_LOG(maphist, "<- fixed & VA in use", 1867 0, 0, 0, 0); 1868 return (NULL); 1869 } 1870 if (topdown) 1871 /* Start from lower gap. */ 1872 entry = entry->prev; 1873 } else if (flags & UVM_FLAG_FIXED) { 1874 if (entry->next->start >= hint + length && 1875 hint + length > hint) 1876 goto found; 1877 1878 /* "hint" address is gap but too small */ 1879 UVMHIST_LOG(maphist, "<- fixed mapping failed", 1880 0, 0, 0, 0); 1881 return (NULL); /* only one shot at it ... */ 1882 } else { 1883 /* 1884 * See if given hint fits in this gap. 1885 */ 1886 switch (uvm_map_space_avail(&hint, length, 1887 uoffset, align, flags, topdown, entry)) { 1888 case 1: 1889 goto found; 1890 case -1: 1891 goto wraparound; 1892 } 1893 1894 if (topdown) { 1895 /* 1896 * Still there is a chance to fit 1897 * if hint > entry->end. 1898 */ 1899 } else { 1900 /* Start from higher gap. */ 1901 entry = entry->next; 1902 if (entry == &map->header) 1903 goto notfound; 1904 goto nextgap; 1905 } 1906 } 1907 } 1908 1909 /* 1910 * Note that all UVM_FLAGS_FIXED case is already handled. 1911 */ 1912 KDASSERT((flags & UVM_FLAG_FIXED) == 0); 1913 1914 /* Try to find the space in the red-black tree */ 1915 1916 /* Check slot before any entry */ 1917 hint = topdown ? entry->next->start - length : entry->end; 1918 switch (uvm_map_space_avail(&hint, length, uoffset, align, flags, 1919 topdown, entry)) { 1920 case 1: 1921 goto found; 1922 case -1: 1923 goto wraparound; 1924 } 1925 1926 nextgap: 1927 KDASSERT((flags & UVM_FLAG_FIXED) == 0); 1928 /* If there is not enough space in the whole tree, we fail */ 1929 tmp = ROOT_ENTRY(map); 1930 if (tmp == NULL || tmp->maxgap < length) 1931 goto notfound; 1932 1933 prev = NULL; /* previous candidate */ 1934 1935 /* Find an entry close to hint that has enough space */ 1936 for (; tmp;) { 1937 KASSERT(tmp->next->start == tmp->end + tmp->gap); 1938 if (topdown) { 1939 if (tmp->next->start < hint + length && 1940 (prev == NULL || tmp->end > prev->end)) { 1941 if (tmp->gap >= length) 1942 prev = tmp; 1943 else if ((child = LEFT_ENTRY(tmp)) != NULL 1944 && child->maxgap >= length) 1945 prev = tmp; 1946 } 1947 } else { 1948 if (tmp->end >= hint && 1949 (prev == NULL || tmp->end < prev->end)) { 1950 if (tmp->gap >= length) 1951 prev = tmp; 1952 else if ((child = RIGHT_ENTRY(tmp)) != NULL 1953 && child->maxgap >= length) 1954 prev = tmp; 1955 } 1956 } 1957 if (tmp->next->start < hint + length) 1958 child = RIGHT_ENTRY(tmp); 1959 else if (tmp->end > hint) 1960 child = LEFT_ENTRY(tmp); 1961 else { 1962 if (tmp->gap >= length) 1963 break; 1964 if (topdown) 1965 child = LEFT_ENTRY(tmp); 1966 else 1967 child = RIGHT_ENTRY(tmp); 1968 } 1969 if (child == NULL || child->maxgap < length) 1970 break; 1971 tmp = child; 1972 } 1973 1974 if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) { 1975 /* 1976 * Check if the entry that we found satifies the 1977 * space requirement 1978 */ 1979 if (topdown) { 1980 if (hint > tmp->next->start - length) 1981 hint = tmp->next->start - length; 1982 } else { 1983 if (hint < tmp->end) 1984 hint = tmp->end; 1985 } 1986 switch (uvm_map_space_avail(&hint, length, uoffset, align, 1987 flags, topdown, tmp)) { 1988 case 1: 1989 entry = tmp; 1990 goto found; 1991 case -1: 1992 goto wraparound; 1993 } 1994 if (tmp->gap >= length) 1995 goto listsearch; 1996 } 1997 if (prev == NULL) 1998 goto notfound; 1999 2000 if (topdown) { 2001 KASSERT(orig_hint >= prev->next->start - length || 2002 prev->next->start - length > prev->next->start); 2003 hint = prev->next->start - length; 2004 } else { 2005 KASSERT(orig_hint <= prev->end); 2006 hint = prev->end; 2007 } 2008 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2009 flags, topdown, prev)) { 2010 case 1: 2011 entry = prev; 2012 goto found; 2013 case -1: 2014 goto wraparound; 2015 } 2016 if (prev->gap >= length) 2017 goto listsearch; 2018 2019 if (topdown) 2020 tmp = LEFT_ENTRY(prev); 2021 else 2022 tmp = RIGHT_ENTRY(prev); 2023 for (;;) { 2024 KASSERT(tmp && tmp->maxgap >= length); 2025 if (topdown) 2026 child = RIGHT_ENTRY(tmp); 2027 else 2028 child = LEFT_ENTRY(tmp); 2029 if (child && child->maxgap >= length) { 2030 tmp = child; 2031 continue; 2032 } 2033 if (tmp->gap >= length) 2034 break; 2035 if (topdown) 2036 tmp = LEFT_ENTRY(tmp); 2037 else 2038 tmp = RIGHT_ENTRY(tmp); 2039 } 2040 2041 if (topdown) { 2042 KASSERT(orig_hint >= tmp->next->start - length || 2043 tmp->next->start - length > tmp->next->start); 2044 hint = tmp->next->start - length; 2045 } else { 2046 KASSERT(orig_hint <= tmp->end); 2047 hint = tmp->end; 2048 } 2049 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2050 flags, topdown, tmp)) { 2051 case 1: 2052 entry = tmp; 2053 goto found; 2054 case -1: 2055 goto wraparound; 2056 } 2057 2058 /* 2059 * The tree fails to find an entry because of offset or alignment 2060 * restrictions. Search the list instead. 2061 */ 2062 listsearch: 2063 /* 2064 * Look through the rest of the map, trying to fit a new region in 2065 * the gap between existing regions, or after the very last region. 2066 * note: entry->end = base VA of current gap, 2067 * entry->next->start = VA of end of current gap 2068 */ 2069 2070 for (;;) { 2071 /* Update hint for current gap. */ 2072 hint = topdown ? entry->next->start - length : entry->end; 2073 2074 /* See if it fits. */ 2075 switch (uvm_map_space_avail(&hint, length, uoffset, align, 2076 flags, topdown, entry)) { 2077 case 1: 2078 goto found; 2079 case -1: 2080 goto wraparound; 2081 } 2082 2083 /* Advance to next/previous gap */ 2084 if (topdown) { 2085 if (entry == &map->header) { 2086 UVMHIST_LOG(maphist, "<- failed (off start)", 2087 0,0,0,0); 2088 goto notfound; 2089 } 2090 entry = entry->prev; 2091 } else { 2092 entry = entry->next; 2093 if (entry == &map->header) { 2094 UVMHIST_LOG(maphist, "<- failed (off end)", 2095 0,0,0,0); 2096 goto notfound; 2097 } 2098 } 2099 } 2100 2101 found: 2102 SAVE_HINT(map, map->hint, entry); 2103 *result = hint; 2104 UVMHIST_LOG(maphist,"<- got it! (result=0x%x)", hint, 0,0,0); 2105 KASSERT( topdown || hint >= orig_hint); 2106 KASSERT(!topdown || hint <= orig_hint); 2107 KASSERT(entry->end <= hint); 2108 KASSERT(hint + length <= entry->next->start); 2109 return (entry); 2110 2111 wraparound: 2112 UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0); 2113 2114 return (NULL); 2115 2116 notfound: 2117 UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0); 2118 2119 return (NULL); 2120 } 2121 2122 /* 2123 * U N M A P - m a i n h e l p e r f u n c t i o n s 2124 */ 2125 2126 /* 2127 * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop") 2128 * 2129 * => caller must check alignment and size 2130 * => map must be locked by caller 2131 * => we return a list of map entries that we've remove from the map 2132 * in "entry_list" 2133 */ 2134 2135 void 2136 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2137 struct vm_map_entry **entry_list /* OUT */, int flags) 2138 { 2139 struct vm_map_entry *entry, *first_entry, *next; 2140 vaddr_t len; 2141 UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist); 2142 2143 UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)", 2144 map, start, end, 0); 2145 VM_MAP_RANGE_CHECK(map, start, end); 2146 2147 uvm_map_check(map, "unmap_remove entry"); 2148 2149 /* 2150 * find first entry 2151 */ 2152 2153 if (uvm_map_lookup_entry(map, start, &first_entry) == true) { 2154 /* clip and go... */ 2155 entry = first_entry; 2156 UVM_MAP_CLIP_START(map, entry, start); 2157 /* critical! prevents stale hint */ 2158 SAVE_HINT(map, entry, entry->prev); 2159 } else { 2160 entry = first_entry->next; 2161 } 2162 2163 /* 2164 * Save the free space hint 2165 */ 2166 2167 if (map->first_free != &map->header && map->first_free->start >= start) 2168 map->first_free = entry->prev; 2169 2170 /* 2171 * note: we now re-use first_entry for a different task. we remove 2172 * a number of map entries from the map and save them in a linked 2173 * list headed by "first_entry". once we remove them from the map 2174 * the caller should unlock the map and drop the references to the 2175 * backing objects [c.f. uvm_unmap_detach]. the object is to 2176 * separate unmapping from reference dropping. why? 2177 * [1] the map has to be locked for unmapping 2178 * [2] the map need not be locked for reference dropping 2179 * [3] dropping references may trigger pager I/O, and if we hit 2180 * a pager that does synchronous I/O we may have to wait for it. 2181 * [4] we would like all waiting for I/O to occur with maps unlocked 2182 * so that we don't block other threads. 2183 */ 2184 2185 first_entry = NULL; 2186 *entry_list = NULL; 2187 2188 /* 2189 * break up the area into map entry sized regions and unmap. note 2190 * that all mappings have to be removed before we can even consider 2191 * dropping references to amaps or VM objects (otherwise we could end 2192 * up with a mapping to a page on the free list which would be very bad) 2193 */ 2194 2195 while ((entry != &map->header) && (entry->start < end)) { 2196 KASSERT((entry->flags & UVM_MAP_STATIC) == 0); 2197 2198 UVM_MAP_CLIP_END(map, entry, end); 2199 next = entry->next; 2200 len = entry->end - entry->start; 2201 2202 /* 2203 * unwire before removing addresses from the pmap; otherwise 2204 * unwiring will put the entries back into the pmap (XXX). 2205 */ 2206 2207 if (VM_MAPENT_ISWIRED(entry)) { 2208 uvm_map_entry_unwire(map, entry); 2209 } 2210 if (flags & UVM_FLAG_VAONLY) { 2211 2212 /* nothing */ 2213 2214 } else if ((map->flags & VM_MAP_PAGEABLE) == 0) { 2215 2216 /* 2217 * if the map is non-pageable, any pages mapped there 2218 * must be wired and entered with pmap_kenter_pa(), 2219 * and we should free any such pages immediately. 2220 * this is mostly used for kmem_map. 2221 */ 2222 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2223 2224 uvm_km_pgremove_intrsafe(map, entry->start, entry->end); 2225 } else if (UVM_ET_ISOBJ(entry) && 2226 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2227 panic("%s: kernel object %p %p\n", 2228 __func__, map, entry); 2229 } else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) { 2230 /* 2231 * remove mappings the standard way. lock object 2232 * and/or amap to ensure vm_page state does not 2233 * change while in pmap_remove(). 2234 */ 2235 2236 uvm_map_lock_entry(entry); 2237 pmap_remove(map->pmap, entry->start, entry->end); 2238 uvm_map_unlock_entry(entry); 2239 } 2240 2241 #if defined(DEBUG) 2242 /* 2243 * check if there's remaining mapping, 2244 * which is a bug in caller. 2245 */ 2246 2247 vaddr_t va; 2248 for (va = entry->start; va < entry->end; 2249 va += PAGE_SIZE) { 2250 if (pmap_extract(vm_map_pmap(map), va, NULL)) { 2251 panic("%s: %#"PRIxVADDR" has mapping", 2252 __func__, va); 2253 } 2254 } 2255 2256 if (VM_MAP_IS_KERNEL(map)) { 2257 uvm_km_check_empty(map, entry->start, 2258 entry->end); 2259 } 2260 #endif /* defined(DEBUG) */ 2261 2262 /* 2263 * remove entry from map and put it on our list of entries 2264 * that we've nuked. then go to next entry. 2265 */ 2266 2267 UVMHIST_LOG(maphist, " removed map entry 0x%x", entry, 0, 0,0); 2268 2269 /* critical! prevents stale hint */ 2270 SAVE_HINT(map, entry, entry->prev); 2271 2272 uvm_map_entry_unlink(map, entry); 2273 KASSERT(map->size >= len); 2274 map->size -= len; 2275 entry->prev = NULL; 2276 entry->next = first_entry; 2277 first_entry = entry; 2278 entry = next; 2279 } 2280 2281 /* 2282 * Note: if map is dying, leave pmap_update() for pmap_destroy(), 2283 * which will be called later. 2284 */ 2285 if ((map->flags & VM_MAP_DYING) == 0) { 2286 pmap_update(vm_map_pmap(map)); 2287 } else { 2288 KASSERT(vm_map_pmap(map) != pmap_kernel()); 2289 } 2290 2291 uvm_map_check(map, "unmap_remove leave"); 2292 2293 /* 2294 * now we've cleaned up the map and are ready for the caller to drop 2295 * references to the mapped objects. 2296 */ 2297 2298 *entry_list = first_entry; 2299 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 2300 2301 if (map->flags & VM_MAP_WANTVA) { 2302 mutex_enter(&map->misc_lock); 2303 map->flags &= ~VM_MAP_WANTVA; 2304 cv_broadcast(&map->cv); 2305 mutex_exit(&map->misc_lock); 2306 } 2307 } 2308 2309 /* 2310 * uvm_unmap_detach: drop references in a chain of map entries 2311 * 2312 * => we will free the map entries as we traverse the list. 2313 */ 2314 2315 void 2316 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags) 2317 { 2318 struct vm_map_entry *next_entry; 2319 UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist); 2320 2321 while (first_entry) { 2322 KASSERT(!VM_MAPENT_ISWIRED(first_entry)); 2323 UVMHIST_LOG(maphist, 2324 " detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d", 2325 first_entry, first_entry->aref.ar_amap, 2326 first_entry->object.uvm_obj, 2327 UVM_ET_ISSUBMAP(first_entry)); 2328 2329 /* 2330 * drop reference to amap, if we've got one 2331 */ 2332 2333 if (first_entry->aref.ar_amap) 2334 uvm_map_unreference_amap(first_entry, flags); 2335 2336 /* 2337 * drop reference to our backing object, if we've got one 2338 */ 2339 2340 KASSERT(!UVM_ET_ISSUBMAP(first_entry)); 2341 if (UVM_ET_ISOBJ(first_entry) && 2342 first_entry->object.uvm_obj->pgops->pgo_detach) { 2343 (*first_entry->object.uvm_obj->pgops->pgo_detach) 2344 (first_entry->object.uvm_obj); 2345 } 2346 next_entry = first_entry->next; 2347 uvm_mapent_free(first_entry); 2348 first_entry = next_entry; 2349 } 2350 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 2351 } 2352 2353 /* 2354 * E X T R A C T I O N F U N C T I O N S 2355 */ 2356 2357 /* 2358 * uvm_map_reserve: reserve space in a vm_map for future use. 2359 * 2360 * => we reserve space in a map by putting a dummy map entry in the 2361 * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE) 2362 * => map should be unlocked (we will write lock it) 2363 * => we return true if we were able to reserve space 2364 * => XXXCDC: should be inline? 2365 */ 2366 2367 int 2368 uvm_map_reserve(struct vm_map *map, vsize_t size, 2369 vaddr_t offset /* hint for pmap_prefer */, 2370 vsize_t align /* alignment */, 2371 vaddr_t *raddr /* IN:hint, OUT: reserved VA */, 2372 uvm_flag_t flags /* UVM_FLAG_FIXED or UVM_FLAG_COLORMATCH or 0 */) 2373 { 2374 UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); 2375 2376 UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)", 2377 map,size,offset,raddr); 2378 2379 size = round_page(size); 2380 2381 /* 2382 * reserve some virtual space. 2383 */ 2384 2385 if (uvm_map(map, raddr, size, NULL, offset, align, 2386 UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE, 2387 UVM_ADV_RANDOM, UVM_FLAG_NOMERGE|flags)) != 0) { 2388 UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0); 2389 return (false); 2390 } 2391 2392 UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0); 2393 return (true); 2394 } 2395 2396 /* 2397 * uvm_map_replace: replace a reserved (blank) area of memory with 2398 * real mappings. 2399 * 2400 * => caller must WRITE-LOCK the map 2401 * => we return true if replacement was a success 2402 * => we expect the newents chain to have nnewents entrys on it and 2403 * we expect newents->prev to point to the last entry on the list 2404 * => note newents is allowed to be NULL 2405 */ 2406 2407 static int 2408 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end, 2409 struct vm_map_entry *newents, int nnewents, vsize_t nsize, 2410 struct vm_map_entry **oldentryp) 2411 { 2412 struct vm_map_entry *oldent, *last; 2413 2414 uvm_map_check(map, "map_replace entry"); 2415 2416 /* 2417 * first find the blank map entry at the specified address 2418 */ 2419 2420 if (!uvm_map_lookup_entry(map, start, &oldent)) { 2421 return (false); 2422 } 2423 2424 /* 2425 * check to make sure we have a proper blank entry 2426 */ 2427 2428 if (end < oldent->end) { 2429 UVM_MAP_CLIP_END(map, oldent, end); 2430 } 2431 if (oldent->start != start || oldent->end != end || 2432 oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) { 2433 return (false); 2434 } 2435 2436 #ifdef DIAGNOSTIC 2437 2438 /* 2439 * sanity check the newents chain 2440 */ 2441 2442 { 2443 struct vm_map_entry *tmpent = newents; 2444 int nent = 0; 2445 vsize_t sz = 0; 2446 vaddr_t cur = start; 2447 2448 while (tmpent) { 2449 nent++; 2450 sz += tmpent->end - tmpent->start; 2451 if (tmpent->start < cur) 2452 panic("uvm_map_replace1"); 2453 if (tmpent->start >= tmpent->end || tmpent->end > end) { 2454 panic("uvm_map_replace2: " 2455 "tmpent->start=0x%"PRIxVADDR 2456 ", tmpent->end=0x%"PRIxVADDR 2457 ", end=0x%"PRIxVADDR, 2458 tmpent->start, tmpent->end, end); 2459 } 2460 cur = tmpent->end; 2461 if (tmpent->next) { 2462 if (tmpent->next->prev != tmpent) 2463 panic("uvm_map_replace3"); 2464 } else { 2465 if (newents->prev != tmpent) 2466 panic("uvm_map_replace4"); 2467 } 2468 tmpent = tmpent->next; 2469 } 2470 if (nent != nnewents) 2471 panic("uvm_map_replace5"); 2472 if (sz != nsize) 2473 panic("uvm_map_replace6"); 2474 } 2475 #endif 2476 2477 /* 2478 * map entry is a valid blank! replace it. (this does all the 2479 * work of map entry link/unlink...). 2480 */ 2481 2482 if (newents) { 2483 last = newents->prev; 2484 2485 /* critical: flush stale hints out of map */ 2486 SAVE_HINT(map, map->hint, newents); 2487 if (map->first_free == oldent) 2488 map->first_free = last; 2489 2490 last->next = oldent->next; 2491 last->next->prev = last; 2492 2493 /* Fix RB tree */ 2494 uvm_rb_remove(map, oldent); 2495 2496 newents->prev = oldent->prev; 2497 newents->prev->next = newents; 2498 map->nentries = map->nentries + (nnewents - 1); 2499 2500 /* Fixup the RB tree */ 2501 { 2502 int i; 2503 struct vm_map_entry *tmp; 2504 2505 tmp = newents; 2506 for (i = 0; i < nnewents && tmp; i++) { 2507 uvm_rb_insert(map, tmp); 2508 tmp = tmp->next; 2509 } 2510 } 2511 } else { 2512 /* NULL list of new entries: just remove the old one */ 2513 clear_hints(map, oldent); 2514 uvm_map_entry_unlink(map, oldent); 2515 } 2516 map->size -= end - start - nsize; 2517 2518 uvm_map_check(map, "map_replace leave"); 2519 2520 /* 2521 * now we can free the old blank entry and return. 2522 */ 2523 2524 *oldentryp = oldent; 2525 return (true); 2526 } 2527 2528 /* 2529 * uvm_map_extract: extract a mapping from a map and put it somewhere 2530 * (maybe removing the old mapping) 2531 * 2532 * => maps should be unlocked (we will write lock them) 2533 * => returns 0 on success, error code otherwise 2534 * => start must be page aligned 2535 * => len must be page sized 2536 * => flags: 2537 * UVM_EXTRACT_REMOVE: remove mappings from srcmap 2538 * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only) 2539 * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs 2540 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 2541 * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<< 2542 * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only 2543 * be used from within the kernel in a kernel level map <<< 2544 */ 2545 2546 int 2547 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 2548 struct vm_map *dstmap, vaddr_t *dstaddrp, int flags) 2549 { 2550 vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge; 2551 struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry, 2552 *deadentry, *oldentry; 2553 struct vm_map_entry *resentry = NULL; /* a dummy reservation entry */ 2554 vsize_t elen __unused; 2555 int nchain, error, copy_ok; 2556 vsize_t nsize; 2557 UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist); 2558 2559 UVMHIST_LOG(maphist,"(srcmap=0x%x,start=0x%x, len=0x%x", srcmap, start, 2560 len,0); 2561 UVMHIST_LOG(maphist," ...,dstmap=0x%x, flags=0x%x)", dstmap,flags,0,0); 2562 2563 /* 2564 * step 0: sanity check: start must be on a page boundary, length 2565 * must be page sized. can't ask for CONTIG/QREF if you asked for 2566 * REMOVE. 2567 */ 2568 2569 KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0); 2570 KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 || 2571 (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0); 2572 2573 /* 2574 * step 1: reserve space in the target map for the extracted area 2575 */ 2576 2577 if ((flags & UVM_EXTRACT_RESERVED) == 0) { 2578 dstaddr = vm_map_min(dstmap); 2579 if (!uvm_map_reserve(dstmap, len, start, 2580 atop(start) & uvmexp.colormask, &dstaddr, 2581 UVM_FLAG_COLORMATCH)) 2582 return (ENOMEM); 2583 KASSERT((atop(start ^ dstaddr) & uvmexp.colormask) == 0); 2584 *dstaddrp = dstaddr; /* pass address back to caller */ 2585 UVMHIST_LOG(maphist, " dstaddr=0x%x", dstaddr,0,0,0); 2586 } else { 2587 dstaddr = *dstaddrp; 2588 } 2589 2590 /* 2591 * step 2: setup for the extraction process loop by init'ing the 2592 * map entry chain, locking src map, and looking up the first useful 2593 * entry in the map. 2594 */ 2595 2596 end = start + len; 2597 newend = dstaddr + len; 2598 chain = endchain = NULL; 2599 nchain = 0; 2600 nsize = 0; 2601 vm_map_lock(srcmap); 2602 2603 if (uvm_map_lookup_entry(srcmap, start, &entry)) { 2604 2605 /* "start" is within an entry */ 2606 if (flags & UVM_EXTRACT_QREF) { 2607 2608 /* 2609 * for quick references we don't clip the entry, so 2610 * the entry may map space "before" the starting 2611 * virtual address... this is the "fudge" factor 2612 * (which can be non-zero only the first time 2613 * through the "while" loop in step 3). 2614 */ 2615 2616 fudge = start - entry->start; 2617 } else { 2618 2619 /* 2620 * normal reference: we clip the map to fit (thus 2621 * fudge is zero) 2622 */ 2623 2624 UVM_MAP_CLIP_START(srcmap, entry, start); 2625 SAVE_HINT(srcmap, srcmap->hint, entry->prev); 2626 fudge = 0; 2627 } 2628 } else { 2629 2630 /* "start" is not within an entry ... skip to next entry */ 2631 if (flags & UVM_EXTRACT_CONTIG) { 2632 error = EINVAL; 2633 goto bad; /* definite hole here ... */ 2634 } 2635 2636 entry = entry->next; 2637 fudge = 0; 2638 } 2639 2640 /* save values from srcmap for step 6 */ 2641 orig_entry = entry; 2642 orig_fudge = fudge; 2643 2644 /* 2645 * step 3: now start looping through the map entries, extracting 2646 * as we go. 2647 */ 2648 2649 while (entry->start < end && entry != &srcmap->header) { 2650 2651 /* if we are not doing a quick reference, clip it */ 2652 if ((flags & UVM_EXTRACT_QREF) == 0) 2653 UVM_MAP_CLIP_END(srcmap, entry, end); 2654 2655 /* clear needs_copy (allow chunking) */ 2656 if (UVM_ET_ISNEEDSCOPY(entry)) { 2657 amap_copy(srcmap, entry, 2658 AMAP_COPY_NOWAIT|AMAP_COPY_NOMERGE, start, end); 2659 if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */ 2660 error = ENOMEM; 2661 goto bad; 2662 } 2663 2664 /* amap_copy could clip (during chunk)! update fudge */ 2665 if (fudge) { 2666 fudge = start - entry->start; 2667 orig_fudge = fudge; 2668 } 2669 } 2670 2671 /* calculate the offset of this from "start" */ 2672 oldoffset = (entry->start + fudge) - start; 2673 2674 /* allocate a new map entry */ 2675 newentry = uvm_mapent_alloc(dstmap, 0); 2676 if (newentry == NULL) { 2677 error = ENOMEM; 2678 goto bad; 2679 } 2680 2681 /* set up new map entry */ 2682 newentry->next = NULL; 2683 newentry->prev = endchain; 2684 newentry->start = dstaddr + oldoffset; 2685 newentry->end = 2686 newentry->start + (entry->end - (entry->start + fudge)); 2687 if (newentry->end > newend || newentry->end < newentry->start) 2688 newentry->end = newend; 2689 newentry->object.uvm_obj = entry->object.uvm_obj; 2690 if (newentry->object.uvm_obj) { 2691 if (newentry->object.uvm_obj->pgops->pgo_reference) 2692 newentry->object.uvm_obj->pgops-> 2693 pgo_reference(newentry->object.uvm_obj); 2694 newentry->offset = entry->offset + fudge; 2695 } else { 2696 newentry->offset = 0; 2697 } 2698 newentry->etype = entry->etype; 2699 newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? 2700 entry->max_protection : entry->protection; 2701 newentry->max_protection = entry->max_protection; 2702 newentry->inheritance = entry->inheritance; 2703 newentry->wired_count = 0; 2704 newentry->aref.ar_amap = entry->aref.ar_amap; 2705 if (newentry->aref.ar_amap) { 2706 newentry->aref.ar_pageoff = 2707 entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT); 2708 uvm_map_reference_amap(newentry, AMAP_SHARED | 2709 ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0)); 2710 } else { 2711 newentry->aref.ar_pageoff = 0; 2712 } 2713 newentry->advice = entry->advice; 2714 if ((flags & UVM_EXTRACT_QREF) != 0) { 2715 newentry->flags |= UVM_MAP_NOMERGE; 2716 } 2717 2718 /* now link it on the chain */ 2719 nchain++; 2720 nsize += newentry->end - newentry->start; 2721 if (endchain == NULL) { 2722 chain = endchain = newentry; 2723 } else { 2724 endchain->next = newentry; 2725 endchain = newentry; 2726 } 2727 2728 /* end of 'while' loop! */ 2729 if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && 2730 (entry->next == &srcmap->header || 2731 entry->next->start != entry->end)) { 2732 error = EINVAL; 2733 goto bad; 2734 } 2735 entry = entry->next; 2736 fudge = 0; 2737 } 2738 2739 /* 2740 * step 4: close off chain (in format expected by uvm_map_replace) 2741 */ 2742 2743 if (chain) 2744 chain->prev = endchain; 2745 2746 /* 2747 * step 5: attempt to lock the dest map so we can pmap_copy. 2748 * note usage of copy_ok: 2749 * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5) 2750 * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7 2751 */ 2752 2753 if (srcmap == dstmap || vm_map_lock_try(dstmap) == true) { 2754 copy_ok = 1; 2755 if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2756 nchain, nsize, &resentry)) { 2757 if (srcmap != dstmap) 2758 vm_map_unlock(dstmap); 2759 error = EIO; 2760 goto bad; 2761 } 2762 } else { 2763 copy_ok = 0; 2764 /* replace defered until step 7 */ 2765 } 2766 2767 /* 2768 * step 6: traverse the srcmap a second time to do the following: 2769 * - if we got a lock on the dstmap do pmap_copy 2770 * - if UVM_EXTRACT_REMOVE remove the entries 2771 * we make use of orig_entry and orig_fudge (saved in step 2) 2772 */ 2773 2774 if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) { 2775 2776 /* purge possible stale hints from srcmap */ 2777 if (flags & UVM_EXTRACT_REMOVE) { 2778 SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev); 2779 if (srcmap->first_free != &srcmap->header && 2780 srcmap->first_free->start >= start) 2781 srcmap->first_free = orig_entry->prev; 2782 } 2783 2784 entry = orig_entry; 2785 fudge = orig_fudge; 2786 deadentry = NULL; /* for UVM_EXTRACT_REMOVE */ 2787 2788 while (entry->start < end && entry != &srcmap->header) { 2789 if (copy_ok) { 2790 oldoffset = (entry->start + fudge) - start; 2791 elen = MIN(end, entry->end) - 2792 (entry->start + fudge); 2793 pmap_copy(dstmap->pmap, srcmap->pmap, 2794 dstaddr + oldoffset, elen, 2795 entry->start + fudge); 2796 } 2797 2798 /* we advance "entry" in the following if statement */ 2799 if (flags & UVM_EXTRACT_REMOVE) { 2800 uvm_map_lock_entry(entry); 2801 pmap_remove(srcmap->pmap, entry->start, 2802 entry->end); 2803 uvm_map_unlock_entry(entry); 2804 oldentry = entry; /* save entry */ 2805 entry = entry->next; /* advance */ 2806 uvm_map_entry_unlink(srcmap, oldentry); 2807 /* add to dead list */ 2808 oldentry->next = deadentry; 2809 deadentry = oldentry; 2810 } else { 2811 entry = entry->next; /* advance */ 2812 } 2813 2814 /* end of 'while' loop */ 2815 fudge = 0; 2816 } 2817 pmap_update(srcmap->pmap); 2818 2819 /* 2820 * unlock dstmap. we will dispose of deadentry in 2821 * step 7 if needed 2822 */ 2823 2824 if (copy_ok && srcmap != dstmap) 2825 vm_map_unlock(dstmap); 2826 2827 } else { 2828 deadentry = NULL; 2829 } 2830 2831 /* 2832 * step 7: we are done with the source map, unlock. if copy_ok 2833 * is 0 then we have not replaced the dummy mapping in dstmap yet 2834 * and we need to do so now. 2835 */ 2836 2837 vm_map_unlock(srcmap); 2838 if ((flags & UVM_EXTRACT_REMOVE) && deadentry) 2839 uvm_unmap_detach(deadentry, 0); /* dispose of old entries */ 2840 2841 /* now do the replacement if we didn't do it in step 5 */ 2842 if (copy_ok == 0) { 2843 vm_map_lock(dstmap); 2844 error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain, 2845 nchain, nsize, &resentry); 2846 vm_map_unlock(dstmap); 2847 2848 if (error == false) { 2849 error = EIO; 2850 goto bad2; 2851 } 2852 } 2853 2854 if (resentry != NULL) 2855 uvm_mapent_free(resentry); 2856 2857 return (0); 2858 2859 /* 2860 * bad: failure recovery 2861 */ 2862 bad: 2863 vm_map_unlock(srcmap); 2864 bad2: /* src already unlocked */ 2865 if (chain) 2866 uvm_unmap_detach(chain, 2867 (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0); 2868 2869 if (resentry != NULL) 2870 uvm_mapent_free(resentry); 2871 2872 if ((flags & UVM_EXTRACT_RESERVED) == 0) { 2873 uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */ 2874 } 2875 return (error); 2876 } 2877 2878 /* end of extraction functions */ 2879 2880 /* 2881 * uvm_map_submap: punch down part of a map into a submap 2882 * 2883 * => only the kernel_map is allowed to be submapped 2884 * => the purpose of submapping is to break up the locking granularity 2885 * of a larger map 2886 * => the range specified must have been mapped previously with a uvm_map() 2887 * call [with uobj==NULL] to create a blank map entry in the main map. 2888 * [And it had better still be blank!] 2889 * => maps which contain submaps should never be copied or forked. 2890 * => to remove a submap, use uvm_unmap() on the main map 2891 * and then uvm_map_deallocate() the submap. 2892 * => main map must be unlocked. 2893 * => submap must have been init'd and have a zero reference count. 2894 * [need not be locked as we don't actually reference it] 2895 */ 2896 2897 int 2898 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 2899 struct vm_map *submap) 2900 { 2901 struct vm_map_entry *entry; 2902 int error; 2903 2904 vm_map_lock(map); 2905 VM_MAP_RANGE_CHECK(map, start, end); 2906 2907 if (uvm_map_lookup_entry(map, start, &entry)) { 2908 UVM_MAP_CLIP_START(map, entry, start); 2909 UVM_MAP_CLIP_END(map, entry, end); /* to be safe */ 2910 } else { 2911 entry = NULL; 2912 } 2913 2914 if (entry != NULL && 2915 entry->start == start && entry->end == end && 2916 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 2917 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 2918 entry->etype |= UVM_ET_SUBMAP; 2919 entry->object.sub_map = submap; 2920 entry->offset = 0; 2921 uvm_map_reference(submap); 2922 error = 0; 2923 } else { 2924 error = EINVAL; 2925 } 2926 vm_map_unlock(map); 2927 2928 return error; 2929 } 2930 2931 /* 2932 * uvm_map_protect: change map protection 2933 * 2934 * => set_max means set max_protection. 2935 * => map must be unlocked. 2936 */ 2937 2938 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 2939 ~VM_PROT_WRITE : VM_PROT_ALL) 2940 2941 int 2942 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 2943 vm_prot_t new_prot, bool set_max) 2944 { 2945 struct vm_map_entry *current, *entry; 2946 int error = 0; 2947 UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist); 2948 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)", 2949 map, start, end, new_prot); 2950 2951 vm_map_lock(map); 2952 VM_MAP_RANGE_CHECK(map, start, end); 2953 if (uvm_map_lookup_entry(map, start, &entry)) { 2954 UVM_MAP_CLIP_START(map, entry, start); 2955 } else { 2956 entry = entry->next; 2957 } 2958 2959 /* 2960 * make a first pass to check for protection violations. 2961 */ 2962 2963 current = entry; 2964 while ((current != &map->header) && (current->start < end)) { 2965 if (UVM_ET_ISSUBMAP(current)) { 2966 error = EINVAL; 2967 goto out; 2968 } 2969 if ((new_prot & current->max_protection) != new_prot) { 2970 error = EACCES; 2971 goto out; 2972 } 2973 /* 2974 * Don't allow VM_PROT_EXECUTE to be set on entries that 2975 * point to vnodes that are associated with a NOEXEC file 2976 * system. 2977 */ 2978 if (UVM_ET_ISOBJ(current) && 2979 UVM_OBJ_IS_VNODE(current->object.uvm_obj)) { 2980 struct vnode *vp = 2981 (struct vnode *) current->object.uvm_obj; 2982 2983 if ((new_prot & VM_PROT_EXECUTE) != 0 && 2984 (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) { 2985 error = EACCES; 2986 goto out; 2987 } 2988 } 2989 2990 current = current->next; 2991 } 2992 2993 /* go back and fix up protections (no need to clip this time). */ 2994 2995 current = entry; 2996 while ((current != &map->header) && (current->start < end)) { 2997 vm_prot_t old_prot; 2998 2999 UVM_MAP_CLIP_END(map, current, end); 3000 old_prot = current->protection; 3001 if (set_max) 3002 current->protection = 3003 (current->max_protection = new_prot) & old_prot; 3004 else 3005 current->protection = new_prot; 3006 3007 /* 3008 * update physical map if necessary. worry about copy-on-write 3009 * here -- CHECK THIS XXX 3010 */ 3011 3012 if (current->protection != old_prot) { 3013 /* update pmap! */ 3014 uvm_map_lock_entry(current); 3015 pmap_protect(map->pmap, current->start, current->end, 3016 current->protection & MASK(entry)); 3017 uvm_map_unlock_entry(current); 3018 3019 /* 3020 * If this entry points at a vnode, and the 3021 * protection includes VM_PROT_EXECUTE, mark 3022 * the vnode as VEXECMAP. 3023 */ 3024 if (UVM_ET_ISOBJ(current)) { 3025 struct uvm_object *uobj = 3026 current->object.uvm_obj; 3027 3028 if (UVM_OBJ_IS_VNODE(uobj) && 3029 (current->protection & VM_PROT_EXECUTE)) { 3030 vn_markexec((struct vnode *) uobj); 3031 } 3032 } 3033 } 3034 3035 /* 3036 * If the map is configured to lock any future mappings, 3037 * wire this entry now if the old protection was VM_PROT_NONE 3038 * and the new protection is not VM_PROT_NONE. 3039 */ 3040 3041 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3042 VM_MAPENT_ISWIRED(entry) == 0 && 3043 old_prot == VM_PROT_NONE && 3044 new_prot != VM_PROT_NONE) { 3045 if (uvm_map_pageable(map, entry->start, 3046 entry->end, false, 3047 UVM_LK_ENTER|UVM_LK_EXIT) != 0) { 3048 3049 /* 3050 * If locking the entry fails, remember the 3051 * error if it's the first one. Note we 3052 * still continue setting the protection in 3053 * the map, but will return the error 3054 * condition regardless. 3055 * 3056 * XXX Ignore what the actual error is, 3057 * XXX just call it a resource shortage 3058 * XXX so that it doesn't get confused 3059 * XXX what uvm_map_protect() itself would 3060 * XXX normally return. 3061 */ 3062 3063 error = ENOMEM; 3064 } 3065 } 3066 current = current->next; 3067 } 3068 pmap_update(map->pmap); 3069 3070 out: 3071 vm_map_unlock(map); 3072 3073 UVMHIST_LOG(maphist, "<- done, error=%d",error,0,0,0); 3074 return error; 3075 } 3076 3077 #undef MASK 3078 3079 /* 3080 * uvm_map_inherit: set inheritance code for range of addrs in map. 3081 * 3082 * => map must be unlocked 3083 * => note that the inherit code is used during a "fork". see fork 3084 * code for details. 3085 */ 3086 3087 int 3088 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 3089 vm_inherit_t new_inheritance) 3090 { 3091 struct vm_map_entry *entry, *temp_entry; 3092 UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist); 3093 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)", 3094 map, start, end, new_inheritance); 3095 3096 switch (new_inheritance) { 3097 case MAP_INHERIT_NONE: 3098 case MAP_INHERIT_COPY: 3099 case MAP_INHERIT_SHARE: 3100 break; 3101 default: 3102 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3103 return EINVAL; 3104 } 3105 3106 vm_map_lock(map); 3107 VM_MAP_RANGE_CHECK(map, start, end); 3108 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 3109 entry = temp_entry; 3110 UVM_MAP_CLIP_START(map, entry, start); 3111 } else { 3112 entry = temp_entry->next; 3113 } 3114 while ((entry != &map->header) && (entry->start < end)) { 3115 UVM_MAP_CLIP_END(map, entry, end); 3116 entry->inheritance = new_inheritance; 3117 entry = entry->next; 3118 } 3119 vm_map_unlock(map); 3120 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3121 return 0; 3122 } 3123 3124 /* 3125 * uvm_map_advice: set advice code for range of addrs in map. 3126 * 3127 * => map must be unlocked 3128 */ 3129 3130 int 3131 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 3132 { 3133 struct vm_map_entry *entry, *temp_entry; 3134 UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist); 3135 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)", 3136 map, start, end, new_advice); 3137 3138 vm_map_lock(map); 3139 VM_MAP_RANGE_CHECK(map, start, end); 3140 if (uvm_map_lookup_entry(map, start, &temp_entry)) { 3141 entry = temp_entry; 3142 UVM_MAP_CLIP_START(map, entry, start); 3143 } else { 3144 entry = temp_entry->next; 3145 } 3146 3147 /* 3148 * XXXJRT: disallow holes? 3149 */ 3150 3151 while ((entry != &map->header) && (entry->start < end)) { 3152 UVM_MAP_CLIP_END(map, entry, end); 3153 3154 switch (new_advice) { 3155 case MADV_NORMAL: 3156 case MADV_RANDOM: 3157 case MADV_SEQUENTIAL: 3158 /* nothing special here */ 3159 break; 3160 3161 default: 3162 vm_map_unlock(map); 3163 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); 3164 return EINVAL; 3165 } 3166 entry->advice = new_advice; 3167 entry = entry->next; 3168 } 3169 3170 vm_map_unlock(map); 3171 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3172 return 0; 3173 } 3174 3175 /* 3176 * uvm_map_willneed: apply MADV_WILLNEED 3177 */ 3178 3179 int 3180 uvm_map_willneed(struct vm_map *map, vaddr_t start, vaddr_t end) 3181 { 3182 struct vm_map_entry *entry; 3183 UVMHIST_FUNC("uvm_map_willneed"); UVMHIST_CALLED(maphist); 3184 UVMHIST_LOG(maphist,"(map=0x%lx,start=0x%lx,end=0x%lx)", 3185 map, start, end, 0); 3186 3187 vm_map_lock_read(map); 3188 VM_MAP_RANGE_CHECK(map, start, end); 3189 if (!uvm_map_lookup_entry(map, start, &entry)) { 3190 entry = entry->next; 3191 } 3192 while (entry->start < end) { 3193 struct vm_amap * const amap = entry->aref.ar_amap; 3194 struct uvm_object * const uobj = entry->object.uvm_obj; 3195 3196 KASSERT(entry != &map->header); 3197 KASSERT(start < entry->end); 3198 /* 3199 * For now, we handle only the easy but commonly-requested case. 3200 * ie. start prefetching of backing uobj pages. 3201 * 3202 * XXX It might be useful to pmap_enter() the already-in-core 3203 * pages by inventing a "weak" mode for uvm_fault() which would 3204 * only do the PGO_LOCKED pgo_get(). 3205 */ 3206 if (UVM_ET_ISOBJ(entry) && amap == NULL && uobj != NULL) { 3207 off_t offset; 3208 off_t size; 3209 3210 offset = entry->offset; 3211 if (start < entry->start) { 3212 offset += entry->start - start; 3213 } 3214 size = entry->offset + (entry->end - entry->start); 3215 if (entry->end < end) { 3216 size -= end - entry->end; 3217 } 3218 uvm_readahead(uobj, offset, size); 3219 } 3220 entry = entry->next; 3221 } 3222 vm_map_unlock_read(map); 3223 UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); 3224 return 0; 3225 } 3226 3227 /* 3228 * uvm_map_pageable: sets the pageability of a range in a map. 3229 * 3230 * => wires map entries. should not be used for transient page locking. 3231 * for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()). 3232 * => regions specified as not pageable require lock-down (wired) memory 3233 * and page tables. 3234 * => map must never be read-locked 3235 * => if islocked is true, map is already write-locked 3236 * => we always unlock the map, since we must downgrade to a read-lock 3237 * to call uvm_fault_wire() 3238 * => XXXCDC: check this and try and clean it up. 3239 */ 3240 3241 int 3242 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 3243 bool new_pageable, int lockflags) 3244 { 3245 struct vm_map_entry *entry, *start_entry, *failed_entry; 3246 int rv; 3247 #ifdef DIAGNOSTIC 3248 u_int timestamp_save; 3249 #endif 3250 UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist); 3251 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)", 3252 map, start, end, new_pageable); 3253 KASSERT(map->flags & VM_MAP_PAGEABLE); 3254 3255 if ((lockflags & UVM_LK_ENTER) == 0) 3256 vm_map_lock(map); 3257 VM_MAP_RANGE_CHECK(map, start, end); 3258 3259 /* 3260 * only one pageability change may take place at one time, since 3261 * uvm_fault_wire assumes it will be called only once for each 3262 * wiring/unwiring. therefore, we have to make sure we're actually 3263 * changing the pageability for the entire region. we do so before 3264 * making any changes. 3265 */ 3266 3267 if (uvm_map_lookup_entry(map, start, &start_entry) == false) { 3268 if ((lockflags & UVM_LK_EXIT) == 0) 3269 vm_map_unlock(map); 3270 3271 UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0); 3272 return EFAULT; 3273 } 3274 entry = start_entry; 3275 3276 /* 3277 * handle wiring and unwiring separately. 3278 */ 3279 3280 if (new_pageable) { /* unwire */ 3281 UVM_MAP_CLIP_START(map, entry, start); 3282 3283 /* 3284 * unwiring. first ensure that the range to be unwired is 3285 * really wired down and that there are no holes. 3286 */ 3287 3288 while ((entry != &map->header) && (entry->start < end)) { 3289 if (entry->wired_count == 0 || 3290 (entry->end < end && 3291 (entry->next == &map->header || 3292 entry->next->start > entry->end))) { 3293 if ((lockflags & UVM_LK_EXIT) == 0) 3294 vm_map_unlock(map); 3295 UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0); 3296 return EINVAL; 3297 } 3298 entry = entry->next; 3299 } 3300 3301 /* 3302 * POSIX 1003.1b - a single munlock call unlocks a region, 3303 * regardless of the number of mlock calls made on that 3304 * region. 3305 */ 3306 3307 entry = start_entry; 3308 while ((entry != &map->header) && (entry->start < end)) { 3309 UVM_MAP_CLIP_END(map, entry, end); 3310 if (VM_MAPENT_ISWIRED(entry)) 3311 uvm_map_entry_unwire(map, entry); 3312 entry = entry->next; 3313 } 3314 if ((lockflags & UVM_LK_EXIT) == 0) 3315 vm_map_unlock(map); 3316 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3317 return 0; 3318 } 3319 3320 /* 3321 * wire case: in two passes [XXXCDC: ugly block of code here] 3322 * 3323 * 1: holding the write lock, we create any anonymous maps that need 3324 * to be created. then we clip each map entry to the region to 3325 * be wired and increment its wiring count. 3326 * 3327 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 3328 * in the pages for any newly wired area (wired_count == 1). 3329 * 3330 * downgrading to a read lock for uvm_fault_wire avoids a possible 3331 * deadlock with another thread that may have faulted on one of 3332 * the pages to be wired (it would mark the page busy, blocking 3333 * us, then in turn block on the map lock that we hold). because 3334 * of problems in the recursive lock package, we cannot upgrade 3335 * to a write lock in vm_map_lookup. thus, any actions that 3336 * require the write lock must be done beforehand. because we 3337 * keep the read lock on the map, the copy-on-write status of the 3338 * entries we modify here cannot change. 3339 */ 3340 3341 while ((entry != &map->header) && (entry->start < end)) { 3342 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3343 3344 /* 3345 * perform actions of vm_map_lookup that need the 3346 * write lock on the map: create an anonymous map 3347 * for a copy-on-write region, or an anonymous map 3348 * for a zero-fill region. (XXXCDC: submap case 3349 * ok?) 3350 */ 3351 3352 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3353 if (UVM_ET_ISNEEDSCOPY(entry) && 3354 ((entry->max_protection & VM_PROT_WRITE) || 3355 (entry->object.uvm_obj == NULL))) { 3356 amap_copy(map, entry, 0, start, end); 3357 /* XXXCDC: wait OK? */ 3358 } 3359 } 3360 } 3361 UVM_MAP_CLIP_START(map, entry, start); 3362 UVM_MAP_CLIP_END(map, entry, end); 3363 entry->wired_count++; 3364 3365 /* 3366 * Check for holes 3367 */ 3368 3369 if (entry->protection == VM_PROT_NONE || 3370 (entry->end < end && 3371 (entry->next == &map->header || 3372 entry->next->start > entry->end))) { 3373 3374 /* 3375 * found one. amap creation actions do not need to 3376 * be undone, but the wired counts need to be restored. 3377 */ 3378 3379 while (entry != &map->header && entry->end > start) { 3380 entry->wired_count--; 3381 entry = entry->prev; 3382 } 3383 if ((lockflags & UVM_LK_EXIT) == 0) 3384 vm_map_unlock(map); 3385 UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0); 3386 return EINVAL; 3387 } 3388 entry = entry->next; 3389 } 3390 3391 /* 3392 * Pass 2. 3393 */ 3394 3395 #ifdef DIAGNOSTIC 3396 timestamp_save = map->timestamp; 3397 #endif 3398 vm_map_busy(map); 3399 vm_map_unlock(map); 3400 3401 rv = 0; 3402 entry = start_entry; 3403 while (entry != &map->header && entry->start < end) { 3404 if (entry->wired_count == 1) { 3405 rv = uvm_fault_wire(map, entry->start, entry->end, 3406 entry->max_protection, 1); 3407 if (rv) { 3408 3409 /* 3410 * wiring failed. break out of the loop. 3411 * we'll clean up the map below, once we 3412 * have a write lock again. 3413 */ 3414 3415 break; 3416 } 3417 } 3418 entry = entry->next; 3419 } 3420 3421 if (rv) { /* failed? */ 3422 3423 /* 3424 * Get back to an exclusive (write) lock. 3425 */ 3426 3427 vm_map_lock(map); 3428 vm_map_unbusy(map); 3429 3430 #ifdef DIAGNOSTIC 3431 if (timestamp_save + 1 != map->timestamp) 3432 panic("uvm_map_pageable: stale map"); 3433 #endif 3434 3435 /* 3436 * first drop the wiring count on all the entries 3437 * which haven't actually been wired yet. 3438 */ 3439 3440 failed_entry = entry; 3441 while (entry != &map->header && entry->start < end) { 3442 entry->wired_count--; 3443 entry = entry->next; 3444 } 3445 3446 /* 3447 * now, unwire all the entries that were successfully 3448 * wired above. 3449 */ 3450 3451 entry = start_entry; 3452 while (entry != failed_entry) { 3453 entry->wired_count--; 3454 if (VM_MAPENT_ISWIRED(entry) == 0) 3455 uvm_map_entry_unwire(map, entry); 3456 entry = entry->next; 3457 } 3458 if ((lockflags & UVM_LK_EXIT) == 0) 3459 vm_map_unlock(map); 3460 UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0); 3461 return (rv); 3462 } 3463 3464 if ((lockflags & UVM_LK_EXIT) == 0) { 3465 vm_map_unbusy(map); 3466 } else { 3467 3468 /* 3469 * Get back to an exclusive (write) lock. 3470 */ 3471 3472 vm_map_lock(map); 3473 vm_map_unbusy(map); 3474 } 3475 3476 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3477 return 0; 3478 } 3479 3480 /* 3481 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 3482 * all mapped regions. 3483 * 3484 * => map must not be locked. 3485 * => if no flags are specified, all regions are unwired. 3486 * => XXXJRT: has some of the same problems as uvm_map_pageable() above. 3487 */ 3488 3489 int 3490 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 3491 { 3492 struct vm_map_entry *entry, *failed_entry; 3493 vsize_t size; 3494 int rv; 3495 #ifdef DIAGNOSTIC 3496 u_int timestamp_save; 3497 #endif 3498 UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist); 3499 UVMHIST_LOG(maphist,"(map=0x%x,flags=0x%x)", map, flags, 0, 0); 3500 3501 KASSERT(map->flags & VM_MAP_PAGEABLE); 3502 3503 vm_map_lock(map); 3504 3505 /* 3506 * handle wiring and unwiring separately. 3507 */ 3508 3509 if (flags == 0) { /* unwire */ 3510 3511 /* 3512 * POSIX 1003.1b -- munlockall unlocks all regions, 3513 * regardless of how many times mlockall has been called. 3514 */ 3515 3516 for (entry = map->header.next; entry != &map->header; 3517 entry = entry->next) { 3518 if (VM_MAPENT_ISWIRED(entry)) 3519 uvm_map_entry_unwire(map, entry); 3520 } 3521 map->flags &= ~VM_MAP_WIREFUTURE; 3522 vm_map_unlock(map); 3523 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); 3524 return 0; 3525 } 3526 3527 if (flags & MCL_FUTURE) { 3528 3529 /* 3530 * must wire all future mappings; remember this. 3531 */ 3532 3533 map->flags |= VM_MAP_WIREFUTURE; 3534 } 3535 3536 if ((flags & MCL_CURRENT) == 0) { 3537 3538 /* 3539 * no more work to do! 3540 */ 3541 3542 UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0); 3543 vm_map_unlock(map); 3544 return 0; 3545 } 3546 3547 /* 3548 * wire case: in three passes [XXXCDC: ugly block of code here] 3549 * 3550 * 1: holding the write lock, count all pages mapped by non-wired 3551 * entries. if this would cause us to go over our limit, we fail. 3552 * 3553 * 2: still holding the write lock, we create any anonymous maps that 3554 * need to be created. then we increment its wiring count. 3555 * 3556 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault 3557 * in the pages for any newly wired area (wired_count == 1). 3558 * 3559 * downgrading to a read lock for uvm_fault_wire avoids a possible 3560 * deadlock with another thread that may have faulted on one of 3561 * the pages to be wired (it would mark the page busy, blocking 3562 * us, then in turn block on the map lock that we hold). because 3563 * of problems in the recursive lock package, we cannot upgrade 3564 * to a write lock in vm_map_lookup. thus, any actions that 3565 * require the write lock must be done beforehand. because we 3566 * keep the read lock on the map, the copy-on-write status of the 3567 * entries we modify here cannot change. 3568 */ 3569 3570 for (size = 0, entry = map->header.next; entry != &map->header; 3571 entry = entry->next) { 3572 if (entry->protection != VM_PROT_NONE && 3573 VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3574 size += entry->end - entry->start; 3575 } 3576 } 3577 3578 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 3579 vm_map_unlock(map); 3580 return ENOMEM; 3581 } 3582 3583 if (limit != 0 && 3584 (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) { 3585 vm_map_unlock(map); 3586 return ENOMEM; 3587 } 3588 3589 /* 3590 * Pass 2. 3591 */ 3592 3593 for (entry = map->header.next; entry != &map->header; 3594 entry = entry->next) { 3595 if (entry->protection == VM_PROT_NONE) 3596 continue; 3597 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */ 3598 3599 /* 3600 * perform actions of vm_map_lookup that need the 3601 * write lock on the map: create an anonymous map 3602 * for a copy-on-write region, or an anonymous map 3603 * for a zero-fill region. (XXXCDC: submap case 3604 * ok?) 3605 */ 3606 3607 if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */ 3608 if (UVM_ET_ISNEEDSCOPY(entry) && 3609 ((entry->max_protection & VM_PROT_WRITE) || 3610 (entry->object.uvm_obj == NULL))) { 3611 amap_copy(map, entry, 0, entry->start, 3612 entry->end); 3613 /* XXXCDC: wait OK? */ 3614 } 3615 } 3616 } 3617 entry->wired_count++; 3618 } 3619 3620 /* 3621 * Pass 3. 3622 */ 3623 3624 #ifdef DIAGNOSTIC 3625 timestamp_save = map->timestamp; 3626 #endif 3627 vm_map_busy(map); 3628 vm_map_unlock(map); 3629 3630 rv = 0; 3631 for (entry = map->header.next; entry != &map->header; 3632 entry = entry->next) { 3633 if (entry->wired_count == 1) { 3634 rv = uvm_fault_wire(map, entry->start, entry->end, 3635 entry->max_protection, 1); 3636 if (rv) { 3637 3638 /* 3639 * wiring failed. break out of the loop. 3640 * we'll clean up the map below, once we 3641 * have a write lock again. 3642 */ 3643 3644 break; 3645 } 3646 } 3647 } 3648 3649 if (rv) { 3650 3651 /* 3652 * Get back an exclusive (write) lock. 3653 */ 3654 3655 vm_map_lock(map); 3656 vm_map_unbusy(map); 3657 3658 #ifdef DIAGNOSTIC 3659 if (timestamp_save + 1 != map->timestamp) 3660 panic("uvm_map_pageable_all: stale map"); 3661 #endif 3662 3663 /* 3664 * first drop the wiring count on all the entries 3665 * which haven't actually been wired yet. 3666 * 3667 * Skip VM_PROT_NONE entries like we did above. 3668 */ 3669 3670 failed_entry = entry; 3671 for (/* nothing */; entry != &map->header; 3672 entry = entry->next) { 3673 if (entry->protection == VM_PROT_NONE) 3674 continue; 3675 entry->wired_count--; 3676 } 3677 3678 /* 3679 * now, unwire all the entries that were successfully 3680 * wired above. 3681 * 3682 * Skip VM_PROT_NONE entries like we did above. 3683 */ 3684 3685 for (entry = map->header.next; entry != failed_entry; 3686 entry = entry->next) { 3687 if (entry->protection == VM_PROT_NONE) 3688 continue; 3689 entry->wired_count--; 3690 if (VM_MAPENT_ISWIRED(entry)) 3691 uvm_map_entry_unwire(map, entry); 3692 } 3693 vm_map_unlock(map); 3694 UVMHIST_LOG(maphist,"<- done (RV=%d)", rv,0,0,0); 3695 return (rv); 3696 } 3697 3698 vm_map_unbusy(map); 3699 3700 UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); 3701 return 0; 3702 } 3703 3704 /* 3705 * uvm_map_clean: clean out a map range 3706 * 3707 * => valid flags: 3708 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 3709 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 3710 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 3711 * if (flags & PGO_FREE): any cached pages are freed after clean 3712 * => returns an error if any part of the specified range isn't mapped 3713 * => never a need to flush amap layer since the anonymous memory has 3714 * no permanent home, but may deactivate pages there 3715 * => called from sys_msync() and sys_madvise() 3716 * => caller must not write-lock map (read OK). 3717 * => we may sleep while cleaning if SYNCIO [with map read-locked] 3718 */ 3719 3720 int 3721 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 3722 { 3723 struct vm_map_entry *current, *entry; 3724 struct uvm_object *uobj; 3725 struct vm_amap *amap; 3726 struct vm_anon *anon, *anon_tofree; 3727 struct vm_page *pg; 3728 vaddr_t offset; 3729 vsize_t size; 3730 voff_t uoff; 3731 int error, refs; 3732 UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist); 3733 3734 UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,flags=0x%x)", 3735 map, start, end, flags); 3736 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 3737 (PGO_FREE|PGO_DEACTIVATE)); 3738 3739 vm_map_lock_read(map); 3740 VM_MAP_RANGE_CHECK(map, start, end); 3741 if (uvm_map_lookup_entry(map, start, &entry) == false) { 3742 vm_map_unlock_read(map); 3743 return EFAULT; 3744 } 3745 3746 /* 3747 * Make a first pass to check for holes and wiring problems. 3748 */ 3749 3750 for (current = entry; current->start < end; current = current->next) { 3751 if (UVM_ET_ISSUBMAP(current)) { 3752 vm_map_unlock_read(map); 3753 return EINVAL; 3754 } 3755 if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) { 3756 vm_map_unlock_read(map); 3757 return EBUSY; 3758 } 3759 if (end <= current->end) { 3760 break; 3761 } 3762 if (current->end != current->next->start) { 3763 vm_map_unlock_read(map); 3764 return EFAULT; 3765 } 3766 } 3767 3768 error = 0; 3769 for (current = entry; start < end; current = current->next) { 3770 amap = current->aref.ar_amap; /* upper layer */ 3771 uobj = current->object.uvm_obj; /* lower layer */ 3772 KASSERT(start >= current->start); 3773 3774 /* 3775 * No amap cleaning necessary if: 3776 * 3777 * (1) There's no amap. 3778 * 3779 * (2) We're not deactivating or freeing pages. 3780 */ 3781 3782 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 3783 goto flush_object; 3784 3785 offset = start - current->start; 3786 size = MIN(end, current->end) - start; 3787 anon_tofree = NULL; 3788 3789 amap_lock(amap); 3790 for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) { 3791 anon = amap_lookup(¤t->aref, offset); 3792 if (anon == NULL) 3793 continue; 3794 3795 KASSERT(anon->an_lock == amap->am_lock); 3796 pg = anon->an_page; 3797 if (pg == NULL) { 3798 continue; 3799 } 3800 3801 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 3802 3803 /* 3804 * In these first 3 cases, we just deactivate the page. 3805 */ 3806 3807 case PGO_CLEANIT|PGO_FREE: 3808 case PGO_CLEANIT|PGO_DEACTIVATE: 3809 case PGO_DEACTIVATE: 3810 deactivate_it: 3811 /* 3812 * skip the page if it's loaned or wired, 3813 * since it shouldn't be on a paging queue 3814 * at all in these cases. 3815 */ 3816 3817 mutex_enter(&uvm_pageqlock); 3818 if (pg->loan_count != 0 || 3819 pg->wire_count != 0) { 3820 mutex_exit(&uvm_pageqlock); 3821 continue; 3822 } 3823 KASSERT(pg->uanon == anon); 3824 uvm_pagedeactivate(pg); 3825 mutex_exit(&uvm_pageqlock); 3826 continue; 3827 3828 case PGO_FREE: 3829 3830 /* 3831 * If there are multiple references to 3832 * the amap, just deactivate the page. 3833 */ 3834 3835 if (amap_refs(amap) > 1) 3836 goto deactivate_it; 3837 3838 /* skip the page if it's wired */ 3839 if (pg->wire_count != 0) { 3840 continue; 3841 } 3842 amap_unadd(¤t->aref, offset); 3843 refs = --anon->an_ref; 3844 if (refs == 0) { 3845 anon->an_link = anon_tofree; 3846 anon_tofree = anon; 3847 } 3848 continue; 3849 } 3850 } 3851 uvm_anon_freelst(amap, anon_tofree); 3852 3853 flush_object: 3854 /* 3855 * flush pages if we've got a valid backing object. 3856 * note that we must always clean object pages before 3857 * freeing them since otherwise we could reveal stale 3858 * data from files. 3859 */ 3860 3861 uoff = current->offset + (start - current->start); 3862 size = MIN(end, current->end) - start; 3863 if (uobj != NULL) { 3864 mutex_enter(uobj->vmobjlock); 3865 if (uobj->pgops->pgo_put != NULL) 3866 error = (uobj->pgops->pgo_put)(uobj, uoff, 3867 uoff + size, flags | PGO_CLEANIT); 3868 else 3869 error = 0; 3870 } 3871 start += size; 3872 } 3873 vm_map_unlock_read(map); 3874 return (error); 3875 } 3876 3877 3878 /* 3879 * uvm_map_checkprot: check protection in map 3880 * 3881 * => must allow specified protection in a fully allocated region. 3882 * => map must be read or write locked by caller. 3883 */ 3884 3885 bool 3886 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3887 vm_prot_t protection) 3888 { 3889 struct vm_map_entry *entry; 3890 struct vm_map_entry *tmp_entry; 3891 3892 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) { 3893 return (false); 3894 } 3895 entry = tmp_entry; 3896 while (start < end) { 3897 if (entry == &map->header) { 3898 return (false); 3899 } 3900 3901 /* 3902 * no holes allowed 3903 */ 3904 3905 if (start < entry->start) { 3906 return (false); 3907 } 3908 3909 /* 3910 * check protection associated with entry 3911 */ 3912 3913 if ((entry->protection & protection) != protection) { 3914 return (false); 3915 } 3916 start = entry->end; 3917 entry = entry->next; 3918 } 3919 return (true); 3920 } 3921 3922 /* 3923 * uvmspace_alloc: allocate a vmspace structure. 3924 * 3925 * - structure includes vm_map and pmap 3926 * - XXX: no locking on this structure 3927 * - refcnt set to 1, rest must be init'd by caller 3928 */ 3929 struct vmspace * 3930 uvmspace_alloc(vaddr_t vmin, vaddr_t vmax, bool topdown) 3931 { 3932 struct vmspace *vm; 3933 UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); 3934 3935 vm = pool_cache_get(&uvm_vmspace_cache, PR_WAITOK); 3936 uvmspace_init(vm, NULL, vmin, vmax, topdown); 3937 UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0); 3938 return (vm); 3939 } 3940 3941 /* 3942 * uvmspace_init: initialize a vmspace structure. 3943 * 3944 * - XXX: no locking on this structure 3945 * - refcnt set to 1, rest must be init'd by caller 3946 */ 3947 void 3948 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, 3949 vaddr_t vmax, bool topdown) 3950 { 3951 UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist); 3952 3953 memset(vm, 0, sizeof(*vm)); 3954 uvm_map_setup(&vm->vm_map, vmin, vmax, VM_MAP_PAGEABLE 3955 | (topdown ? VM_MAP_TOPDOWN : 0) 3956 ); 3957 if (pmap) 3958 pmap_reference(pmap); 3959 else 3960 pmap = pmap_create(); 3961 vm->vm_map.pmap = pmap; 3962 vm->vm_refcnt = 1; 3963 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 3964 } 3965 3966 /* 3967 * uvmspace_share: share a vmspace between two processes 3968 * 3969 * - used for vfork, threads(?) 3970 */ 3971 3972 void 3973 uvmspace_share(struct proc *p1, struct proc *p2) 3974 { 3975 3976 uvmspace_addref(p1->p_vmspace); 3977 p2->p_vmspace = p1->p_vmspace; 3978 } 3979 3980 #if 0 3981 3982 /* 3983 * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace 3984 * 3985 * - XXX: no locking on vmspace 3986 */ 3987 3988 void 3989 uvmspace_unshare(struct lwp *l) 3990 { 3991 struct proc *p = l->l_proc; 3992 struct vmspace *nvm, *ovm = p->p_vmspace; 3993 3994 if (ovm->vm_refcnt == 1) 3995 /* nothing to do: vmspace isn't shared in the first place */ 3996 return; 3997 3998 /* make a new vmspace, still holding old one */ 3999 nvm = uvmspace_fork(ovm); 4000 4001 kpreempt_disable(); 4002 pmap_deactivate(l); /* unbind old vmspace */ 4003 p->p_vmspace = nvm; 4004 pmap_activate(l); /* switch to new vmspace */ 4005 kpreempt_enable(); 4006 4007 uvmspace_free(ovm); /* drop reference to old vmspace */ 4008 } 4009 4010 #endif 4011 4012 4013 /* 4014 * uvmspace_spawn: a new process has been spawned and needs a vmspace 4015 */ 4016 4017 void 4018 uvmspace_spawn(struct lwp *l, vaddr_t start, vaddr_t end, bool topdown) 4019 { 4020 struct proc *p = l->l_proc; 4021 struct vmspace *nvm; 4022 4023 #ifdef __HAVE_CPU_VMSPACE_EXEC 4024 cpu_vmspace_exec(l, start, end); 4025 #endif 4026 4027 nvm = uvmspace_alloc(start, end, topdown); 4028 kpreempt_disable(); 4029 p->p_vmspace = nvm; 4030 pmap_activate(l); 4031 kpreempt_enable(); 4032 } 4033 4034 /* 4035 * uvmspace_exec: the process wants to exec a new program 4036 */ 4037 4038 void 4039 uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end, bool topdown) 4040 { 4041 struct proc *p = l->l_proc; 4042 struct vmspace *nvm, *ovm = p->p_vmspace; 4043 struct vm_map *map; 4044 4045 KASSERT(ovm != NULL); 4046 #ifdef __HAVE_CPU_VMSPACE_EXEC 4047 cpu_vmspace_exec(l, start, end); 4048 #endif 4049 4050 map = &ovm->vm_map; 4051 /* 4052 * see if more than one process is using this vmspace... 4053 */ 4054 4055 if (ovm->vm_refcnt == 1 4056 && topdown == ((ovm->vm_map.flags & VM_MAP_TOPDOWN) != 0)) { 4057 4058 /* 4059 * if p is the only process using its vmspace then we can safely 4060 * recycle that vmspace for the program that is being exec'd. 4061 * But only if TOPDOWN matches the requested value for the new 4062 * vm space! 4063 */ 4064 4065 #ifdef SYSVSHM 4066 /* 4067 * SYSV SHM semantics require us to kill all segments on an exec 4068 */ 4069 4070 if (ovm->vm_shm) 4071 shmexit(ovm); 4072 #endif 4073 4074 /* 4075 * POSIX 1003.1b -- "lock future mappings" is revoked 4076 * when a process execs another program image. 4077 */ 4078 4079 map->flags &= ~VM_MAP_WIREFUTURE; 4080 4081 /* 4082 * now unmap the old program 4083 */ 4084 4085 pmap_remove_all(map->pmap); 4086 uvm_unmap(map, vm_map_min(map), vm_map_max(map)); 4087 KASSERT(map->header.prev == &map->header); 4088 KASSERT(map->nentries == 0); 4089 4090 /* 4091 * resize the map 4092 */ 4093 4094 vm_map_setmin(map, start); 4095 vm_map_setmax(map, end); 4096 } else { 4097 4098 /* 4099 * p's vmspace is being shared, so we can't reuse it for p since 4100 * it is still being used for others. allocate a new vmspace 4101 * for p 4102 */ 4103 4104 nvm = uvmspace_alloc(start, end, topdown); 4105 4106 /* 4107 * install new vmspace and drop our ref to the old one. 4108 */ 4109 4110 kpreempt_disable(); 4111 pmap_deactivate(l); 4112 p->p_vmspace = nvm; 4113 pmap_activate(l); 4114 kpreempt_enable(); 4115 4116 uvmspace_free(ovm); 4117 } 4118 } 4119 4120 /* 4121 * uvmspace_addref: add a referece to a vmspace. 4122 */ 4123 4124 void 4125 uvmspace_addref(struct vmspace *vm) 4126 { 4127 struct vm_map *map = &vm->vm_map; 4128 4129 KASSERT((map->flags & VM_MAP_DYING) == 0); 4130 4131 mutex_enter(&map->misc_lock); 4132 KASSERT(vm->vm_refcnt > 0); 4133 vm->vm_refcnt++; 4134 mutex_exit(&map->misc_lock); 4135 } 4136 4137 /* 4138 * uvmspace_free: free a vmspace data structure 4139 */ 4140 4141 void 4142 uvmspace_free(struct vmspace *vm) 4143 { 4144 struct vm_map_entry *dead_entries; 4145 struct vm_map *map = &vm->vm_map; 4146 int n; 4147 4148 UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist); 4149 4150 UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0); 4151 mutex_enter(&map->misc_lock); 4152 n = --vm->vm_refcnt; 4153 mutex_exit(&map->misc_lock); 4154 if (n > 0) 4155 return; 4156 4157 /* 4158 * at this point, there should be no other references to the map. 4159 * delete all of the mappings, then destroy the pmap. 4160 */ 4161 4162 map->flags |= VM_MAP_DYING; 4163 pmap_remove_all(map->pmap); 4164 #ifdef SYSVSHM 4165 /* Get rid of any SYSV shared memory segments. */ 4166 if (vm->vm_shm != NULL) 4167 shmexit(vm); 4168 #endif 4169 4170 if (map->nentries) { 4171 uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map), 4172 &dead_entries, 0); 4173 if (dead_entries != NULL) 4174 uvm_unmap_detach(dead_entries, 0); 4175 } 4176 KASSERT(map->nentries == 0); 4177 KASSERT(map->size == 0); 4178 4179 mutex_destroy(&map->misc_lock); 4180 rw_destroy(&map->lock); 4181 cv_destroy(&map->cv); 4182 pmap_destroy(map->pmap); 4183 pool_cache_put(&uvm_vmspace_cache, vm); 4184 } 4185 4186 /* 4187 * F O R K - m a i n e n t r y p o i n t 4188 */ 4189 /* 4190 * uvmspace_fork: fork a process' main map 4191 * 4192 * => create a new vmspace for child process from parent. 4193 * => parent's map must not be locked. 4194 */ 4195 4196 struct vmspace * 4197 uvmspace_fork(struct vmspace *vm1) 4198 { 4199 struct vmspace *vm2; 4200 struct vm_map *old_map = &vm1->vm_map; 4201 struct vm_map *new_map; 4202 struct vm_map_entry *old_entry; 4203 struct vm_map_entry *new_entry; 4204 UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist); 4205 4206 vm_map_lock(old_map); 4207 4208 vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map), 4209 vm1->vm_map.flags & VM_MAP_TOPDOWN); 4210 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 4211 (char *) (vm1 + 1) - (char *) &vm1->vm_startcopy); 4212 new_map = &vm2->vm_map; /* XXX */ 4213 4214 old_entry = old_map->header.next; 4215 new_map->size = old_map->size; 4216 4217 /* 4218 * go entry-by-entry 4219 */ 4220 4221 while (old_entry != &old_map->header) { 4222 4223 /* 4224 * first, some sanity checks on the old entry 4225 */ 4226 4227 KASSERT(!UVM_ET_ISSUBMAP(old_entry)); 4228 KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) || 4229 !UVM_ET_ISNEEDSCOPY(old_entry)); 4230 4231 switch (old_entry->inheritance) { 4232 case MAP_INHERIT_NONE: 4233 4234 /* 4235 * drop the mapping, modify size 4236 */ 4237 new_map->size -= old_entry->end - old_entry->start; 4238 break; 4239 4240 case MAP_INHERIT_SHARE: 4241 4242 /* 4243 * share the mapping: this means we want the old and 4244 * new entries to share amaps and backing objects. 4245 */ 4246 /* 4247 * if the old_entry needs a new amap (due to prev fork) 4248 * then we need to allocate it now so that we have 4249 * something we own to share with the new_entry. [in 4250 * other words, we need to clear needs_copy] 4251 */ 4252 4253 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 4254 /* get our own amap, clears needs_copy */ 4255 amap_copy(old_map, old_entry, AMAP_COPY_NOCHUNK, 4256 0, 0); 4257 /* XXXCDC: WAITOK??? */ 4258 } 4259 4260 new_entry = uvm_mapent_alloc(new_map, 0); 4261 /* old_entry -> new_entry */ 4262 uvm_mapent_copy(old_entry, new_entry); 4263 4264 /* new pmap has nothing wired in it */ 4265 new_entry->wired_count = 0; 4266 4267 /* 4268 * gain reference to object backing the map (can't 4269 * be a submap, already checked this case). 4270 */ 4271 4272 if (new_entry->aref.ar_amap) 4273 uvm_map_reference_amap(new_entry, AMAP_SHARED); 4274 4275 if (new_entry->object.uvm_obj && 4276 new_entry->object.uvm_obj->pgops->pgo_reference) 4277 new_entry->object.uvm_obj-> 4278 pgops->pgo_reference( 4279 new_entry->object.uvm_obj); 4280 4281 /* insert entry at end of new_map's entry list */ 4282 uvm_map_entry_link(new_map, new_map->header.prev, 4283 new_entry); 4284 4285 break; 4286 4287 case MAP_INHERIT_COPY: 4288 4289 /* 4290 * copy-on-write the mapping (using mmap's 4291 * MAP_PRIVATE semantics) 4292 * 4293 * allocate new_entry, adjust reference counts. 4294 * (note that new references are read-only). 4295 */ 4296 4297 new_entry = uvm_mapent_alloc(new_map, 0); 4298 /* old_entry -> new_entry */ 4299 uvm_mapent_copy(old_entry, new_entry); 4300 4301 if (new_entry->aref.ar_amap) 4302 uvm_map_reference_amap(new_entry, 0); 4303 4304 if (new_entry->object.uvm_obj && 4305 new_entry->object.uvm_obj->pgops->pgo_reference) 4306 new_entry->object.uvm_obj->pgops->pgo_reference 4307 (new_entry->object.uvm_obj); 4308 4309 /* new pmap has nothing wired in it */ 4310 new_entry->wired_count = 0; 4311 4312 new_entry->etype |= 4313 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 4314 uvm_map_entry_link(new_map, new_map->header.prev, 4315 new_entry); 4316 4317 /* 4318 * the new entry will need an amap. it will either 4319 * need to be copied from the old entry or created 4320 * from scratch (if the old entry does not have an 4321 * amap). can we defer this process until later 4322 * (by setting "needs_copy") or do we need to copy 4323 * the amap now? 4324 * 4325 * we must copy the amap now if any of the following 4326 * conditions hold: 4327 * 1. the old entry has an amap and that amap is 4328 * being shared. this means that the old (parent) 4329 * process is sharing the amap with another 4330 * process. if we do not clear needs_copy here 4331 * we will end up in a situation where both the 4332 * parent and child process are refering to the 4333 * same amap with "needs_copy" set. if the 4334 * parent write-faults, the fault routine will 4335 * clear "needs_copy" in the parent by allocating 4336 * a new amap. this is wrong because the 4337 * parent is supposed to be sharing the old amap 4338 * and the new amap will break that. 4339 * 4340 * 2. if the old entry has an amap and a non-zero 4341 * wire count then we are going to have to call 4342 * amap_cow_now to avoid page faults in the 4343 * parent process. since amap_cow_now requires 4344 * "needs_copy" to be clear we might as well 4345 * clear it here as well. 4346 * 4347 */ 4348 4349 if (old_entry->aref.ar_amap != NULL) { 4350 if ((amap_flags(old_entry->aref.ar_amap) & 4351 AMAP_SHARED) != 0 || 4352 VM_MAPENT_ISWIRED(old_entry)) { 4353 4354 amap_copy(new_map, new_entry, 4355 AMAP_COPY_NOCHUNK, 0, 0); 4356 /* XXXCDC: M_WAITOK ... ok? */ 4357 } 4358 } 4359 4360 /* 4361 * if the parent's entry is wired down, then the 4362 * parent process does not want page faults on 4363 * access to that memory. this means that we 4364 * cannot do copy-on-write because we can't write 4365 * protect the old entry. in this case we 4366 * resolve all copy-on-write faults now, using 4367 * amap_cow_now. note that we have already 4368 * allocated any needed amap (above). 4369 */ 4370 4371 if (VM_MAPENT_ISWIRED(old_entry)) { 4372 4373 /* 4374 * resolve all copy-on-write faults now 4375 * (note that there is nothing to do if 4376 * the old mapping does not have an amap). 4377 */ 4378 if (old_entry->aref.ar_amap) 4379 amap_cow_now(new_map, new_entry); 4380 4381 } else { 4382 4383 /* 4384 * setup mappings to trigger copy-on-write faults 4385 * we must write-protect the parent if it has 4386 * an amap and it is not already "needs_copy"... 4387 * if it is already "needs_copy" then the parent 4388 * has already been write-protected by a previous 4389 * fork operation. 4390 */ 4391 4392 if (old_entry->aref.ar_amap && 4393 !UVM_ET_ISNEEDSCOPY(old_entry)) { 4394 if (old_entry->max_protection & VM_PROT_WRITE) { 4395 pmap_protect(old_map->pmap, 4396 old_entry->start, 4397 old_entry->end, 4398 old_entry->protection & 4399 ~VM_PROT_WRITE); 4400 } 4401 old_entry->etype |= UVM_ET_NEEDSCOPY; 4402 } 4403 } 4404 break; 4405 } /* end of switch statement */ 4406 old_entry = old_entry->next; 4407 } 4408 4409 pmap_update(old_map->pmap); 4410 vm_map_unlock(old_map); 4411 4412 #ifdef SYSVSHM 4413 if (vm1->vm_shm) 4414 shmfork(vm1, vm2); 4415 #endif 4416 4417 #ifdef PMAP_FORK 4418 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); 4419 #endif 4420 4421 UVMHIST_LOG(maphist,"<- done",0,0,0,0); 4422 return (vm2); 4423 } 4424 4425 4426 /* 4427 * uvm_mapent_trymerge: try to merge an entry with its neighbors. 4428 * 4429 * => called with map locked. 4430 * => return non zero if successfully merged. 4431 */ 4432 4433 int 4434 uvm_mapent_trymerge(struct vm_map *map, struct vm_map_entry *entry, int flags) 4435 { 4436 struct uvm_object *uobj; 4437 struct vm_map_entry *next; 4438 struct vm_map_entry *prev; 4439 vsize_t size; 4440 int merged = 0; 4441 bool copying; 4442 int newetype; 4443 4444 if (entry->aref.ar_amap != NULL) { 4445 return 0; 4446 } 4447 if ((entry->flags & UVM_MAP_NOMERGE) != 0) { 4448 return 0; 4449 } 4450 4451 uobj = entry->object.uvm_obj; 4452 size = entry->end - entry->start; 4453 copying = (flags & UVM_MERGE_COPYING) != 0; 4454 newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype; 4455 4456 next = entry->next; 4457 if (next != &map->header && 4458 next->start == entry->end && 4459 ((copying && next->aref.ar_amap != NULL && 4460 amap_refs(next->aref.ar_amap) == 1) || 4461 (!copying && next->aref.ar_amap == NULL)) && 4462 UVM_ET_ISCOMPATIBLE(next, newetype, 4463 uobj, entry->flags, entry->protection, 4464 entry->max_protection, entry->inheritance, entry->advice, 4465 entry->wired_count) && 4466 (uobj == NULL || entry->offset + size == next->offset)) { 4467 int error; 4468 4469 if (copying) { 4470 error = amap_extend(next, size, 4471 AMAP_EXTEND_NOWAIT|AMAP_EXTEND_BACKWARDS); 4472 } else { 4473 error = 0; 4474 } 4475 if (error == 0) { 4476 if (uobj) { 4477 if (uobj->pgops->pgo_detach) { 4478 uobj->pgops->pgo_detach(uobj); 4479 } 4480 } 4481 4482 entry->end = next->end; 4483 clear_hints(map, next); 4484 uvm_map_entry_unlink(map, next); 4485 if (copying) { 4486 entry->aref = next->aref; 4487 entry->etype &= ~UVM_ET_NEEDSCOPY; 4488 } 4489 uvm_map_check(map, "trymerge forwardmerge"); 4490 uvm_mapent_free(next); 4491 merged++; 4492 } 4493 } 4494 4495 prev = entry->prev; 4496 if (prev != &map->header && 4497 prev->end == entry->start && 4498 ((copying && !merged && prev->aref.ar_amap != NULL && 4499 amap_refs(prev->aref.ar_amap) == 1) || 4500 (!copying && prev->aref.ar_amap == NULL)) && 4501 UVM_ET_ISCOMPATIBLE(prev, newetype, 4502 uobj, entry->flags, entry->protection, 4503 entry->max_protection, entry->inheritance, entry->advice, 4504 entry->wired_count) && 4505 (uobj == NULL || 4506 prev->offset + prev->end - prev->start == entry->offset)) { 4507 int error; 4508 4509 if (copying) { 4510 error = amap_extend(prev, size, 4511 AMAP_EXTEND_NOWAIT|AMAP_EXTEND_FORWARDS); 4512 } else { 4513 error = 0; 4514 } 4515 if (error == 0) { 4516 if (uobj) { 4517 if (uobj->pgops->pgo_detach) { 4518 uobj->pgops->pgo_detach(uobj); 4519 } 4520 entry->offset = prev->offset; 4521 } 4522 4523 entry->start = prev->start; 4524 clear_hints(map, prev); 4525 uvm_map_entry_unlink(map, prev); 4526 if (copying) { 4527 entry->aref = prev->aref; 4528 entry->etype &= ~UVM_ET_NEEDSCOPY; 4529 } 4530 uvm_map_check(map, "trymerge backmerge"); 4531 uvm_mapent_free(prev); 4532 merged++; 4533 } 4534 } 4535 4536 return merged; 4537 } 4538 4539 /* 4540 * uvm_map_setup: init map 4541 * 4542 * => map must not be in service yet. 4543 */ 4544 4545 void 4546 uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags) 4547 { 4548 4549 rb_tree_init(&map->rb_tree, &uvm_map_tree_ops); 4550 map->header.next = map->header.prev = &map->header; 4551 map->nentries = 0; 4552 map->size = 0; 4553 map->ref_count = 1; 4554 vm_map_setmin(map, vmin); 4555 vm_map_setmax(map, vmax); 4556 map->flags = flags; 4557 map->first_free = &map->header; 4558 map->hint = &map->header; 4559 map->timestamp = 0; 4560 map->busy = NULL; 4561 4562 rw_init(&map->lock); 4563 cv_init(&map->cv, "vm_map"); 4564 mutex_init(&map->misc_lock, MUTEX_DRIVER, IPL_NONE); 4565 } 4566 4567 /* 4568 * U N M A P - m a i n e n t r y p o i n t 4569 */ 4570 4571 /* 4572 * uvm_unmap1: remove mappings from a vm_map (from "start" up to "stop") 4573 * 4574 * => caller must check alignment and size 4575 * => map must be unlocked (we will lock it) 4576 * => flags is UVM_FLAG_QUANTUM or 0. 4577 */ 4578 4579 void 4580 uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4581 { 4582 struct vm_map_entry *dead_entries; 4583 UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist); 4584 4585 UVMHIST_LOG(maphist, " (map=0x%x, start=0x%x, end=0x%x)", 4586 map, start, end, 0); 4587 if (map == kernel_map) { 4588 LOCKDEBUG_MEM_CHECK((void *)start, end - start); 4589 } 4590 /* 4591 * work now done by helper functions. wipe the pmap's and then 4592 * detach from the dead entries... 4593 */ 4594 vm_map_lock(map); 4595 uvm_unmap_remove(map, start, end, &dead_entries, flags); 4596 vm_map_unlock(map); 4597 4598 if (dead_entries != NULL) 4599 uvm_unmap_detach(dead_entries, 0); 4600 4601 UVMHIST_LOG(maphist, "<- done", 0,0,0,0); 4602 } 4603 4604 4605 /* 4606 * uvm_map_reference: add reference to a map 4607 * 4608 * => map need not be locked (we use misc_lock). 4609 */ 4610 4611 void 4612 uvm_map_reference(struct vm_map *map) 4613 { 4614 mutex_enter(&map->misc_lock); 4615 map->ref_count++; 4616 mutex_exit(&map->misc_lock); 4617 } 4618 4619 bool 4620 vm_map_starved_p(struct vm_map *map) 4621 { 4622 4623 if ((map->flags & VM_MAP_WANTVA) != 0) { 4624 return true; 4625 } 4626 /* XXX */ 4627 if ((vm_map_max(map) - vm_map_min(map)) / 16 * 15 < map->size) { 4628 return true; 4629 } 4630 return false; 4631 } 4632 4633 void 4634 uvm_map_lock_entry(struct vm_map_entry *entry) 4635 { 4636 4637 if (entry->aref.ar_amap != NULL) { 4638 amap_lock(entry->aref.ar_amap); 4639 } 4640 if (UVM_ET_ISOBJ(entry)) { 4641 mutex_enter(entry->object.uvm_obj->vmobjlock); 4642 } 4643 } 4644 4645 void 4646 uvm_map_unlock_entry(struct vm_map_entry *entry) 4647 { 4648 4649 if (UVM_ET_ISOBJ(entry)) { 4650 mutex_exit(entry->object.uvm_obj->vmobjlock); 4651 } 4652 if (entry->aref.ar_amap != NULL) { 4653 amap_unlock(entry->aref.ar_amap); 4654 } 4655 } 4656 4657 #if defined(DDB) || defined(DEBUGPRINT) 4658 4659 /* 4660 * uvm_map_printit: actually prints the map 4661 */ 4662 4663 void 4664 uvm_map_printit(struct vm_map *map, bool full, 4665 void (*pr)(const char *, ...)) 4666 { 4667 struct vm_map_entry *entry; 4668 4669 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, vm_map_min(map), 4670 vm_map_max(map)); 4671 (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n", 4672 map->nentries, map->size, map->ref_count, map->timestamp, 4673 map->flags); 4674 (*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap, 4675 pmap_resident_count(map->pmap), pmap_wired_count(map->pmap)); 4676 if (!full) 4677 return; 4678 for (entry = map->header.next; entry != &map->header; 4679 entry = entry->next) { 4680 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 4681 entry, entry->start, entry->end, entry->object.uvm_obj, 4682 (long long)entry->offset, entry->aref.ar_amap, 4683 entry->aref.ar_pageoff); 4684 (*pr)( 4685 "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 4686 "wc=%d, adv=%d\n", 4687 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 4688 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 4689 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 4690 entry->protection, entry->max_protection, 4691 entry->inheritance, entry->wired_count, entry->advice); 4692 } 4693 } 4694 4695 void 4696 uvm_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 4697 { 4698 struct vm_map *map; 4699 4700 for (map = kernel_map;;) { 4701 struct vm_map_entry *entry; 4702 4703 if (!uvm_map_lookup_entry_bytree(map, (vaddr_t)addr, &entry)) { 4704 break; 4705 } 4706 (*pr)("%p is %p+%zu from VMMAP %p\n", 4707 (void *)addr, (void *)entry->start, 4708 (size_t)(addr - (uintptr_t)entry->start), map); 4709 if (!UVM_ET_ISSUBMAP(entry)) { 4710 break; 4711 } 4712 map = entry->object.sub_map; 4713 } 4714 } 4715 4716 #endif /* DDB || DEBUGPRINT */ 4717 4718 #ifndef __USER_VA0_IS_SAFE 4719 static int 4720 sysctl_user_va0_disable(SYSCTLFN_ARGS) 4721 { 4722 struct sysctlnode node; 4723 int t, error; 4724 4725 node = *rnode; 4726 node.sysctl_data = &t; 4727 t = user_va0_disable; 4728 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 4729 if (error || newp == NULL) 4730 return (error); 4731 4732 if (!t && user_va0_disable && 4733 kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MAP_VA_ZERO, 0, 4734 NULL, NULL, NULL)) 4735 return EPERM; 4736 4737 user_va0_disable = !!t; 4738 return 0; 4739 } 4740 4741 SYSCTL_SETUP(sysctl_uvmmap_setup, "sysctl uvmmap setup") 4742 { 4743 4744 sysctl_createv(clog, 0, NULL, NULL, 4745 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 4746 CTLTYPE_INT, "user_va0_disable", 4747 SYSCTL_DESCR("Disable VA 0"), 4748 sysctl_user_va0_disable, 0, &user_va0_disable, 0, 4749 CTL_VM, CTL_CREATE, CTL_EOL); 4750 } 4751 #endif 4752