1 /* $OpenBSD: uvm_map.c,v 1.317 2023/04/26 12:25:12 bluhm Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/acct.h> 90 #include <sys/mman.h> 91 #include <sys/proc.h> 92 #include <sys/malloc.h> 93 #include <sys/pool.h> 94 #include <sys/sysctl.h> 95 #include <sys/signalvar.h> 96 #include <sys/syslog.h> 97 #include <sys/user.h> 98 #include <sys/tracepoint.h> 99 100 #ifdef SYSVSHM 101 #include <sys/shm.h> 102 #endif 103 104 #include <uvm/uvm.h> 105 106 #ifdef DDB 107 #include <uvm/uvm_ddb.h> 108 #endif 109 110 #include <uvm/uvm_addr.h> 111 112 113 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 114 int uvm_mapent_isjoinable(struct vm_map*, 115 struct vm_map_entry*, struct vm_map_entry*); 116 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 117 struct vm_map_entry*, struct uvm_map_deadq*); 118 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 119 struct vm_map_entry*, struct uvm_map_deadq*); 120 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 121 struct vm_map_entry*, vaddr_t, vsize_t, int, 122 struct uvm_map_deadq*, struct vm_map_entry*); 123 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 124 void uvm_mapent_free(struct vm_map_entry*); 125 void uvm_unmap_kill_entry(struct vm_map*, 126 struct vm_map_entry*); 127 void uvm_unmap_kill_entry_withlock(struct vm_map *, 128 struct vm_map_entry *, int); 129 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); 130 void uvm_mapent_mkfree(struct vm_map*, 131 struct vm_map_entry*, struct vm_map_entry**, 132 struct uvm_map_deadq*, boolean_t); 133 void uvm_map_pageable_pgon(struct vm_map*, 134 struct vm_map_entry*, struct vm_map_entry*, 135 vaddr_t, vaddr_t); 136 int uvm_map_pageable_wire(struct vm_map*, 137 struct vm_map_entry*, struct vm_map_entry*, 138 vaddr_t, vaddr_t, int); 139 void uvm_map_setup_entries(struct vm_map*); 140 void uvm_map_setup_md(struct vm_map*); 141 void uvm_map_teardown(struct vm_map*); 142 void uvm_map_vmspace_update(struct vm_map*, 143 struct uvm_map_deadq*, int); 144 void uvm_map_kmem_grow(struct vm_map*, 145 struct uvm_map_deadq*, vsize_t, int); 146 void uvm_map_freelist_update_clear(struct vm_map*, 147 struct uvm_map_deadq*); 148 void uvm_map_freelist_update_refill(struct vm_map *, int); 149 void uvm_map_freelist_update(struct vm_map*, 150 struct uvm_map_deadq*, vaddr_t, vaddr_t, 151 vaddr_t, vaddr_t, int); 152 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 153 vaddr_t, vaddr_t, int); 154 int uvm_map_findspace(struct vm_map*, 155 struct vm_map_entry**, struct vm_map_entry**, 156 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 157 vaddr_t); 158 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 159 void uvm_map_addr_augment(struct vm_map_entry*); 160 161 int uvm_map_inentry_recheck(u_long, vaddr_t, 162 struct p_inentry *); 163 boolean_t uvm_map_inentry_fix(struct proc *, struct p_inentry *, 164 vaddr_t, int (*)(vm_map_entry_t), u_long); 165 /* 166 * Tree management functions. 167 */ 168 169 static inline void uvm_mapent_copy(struct vm_map_entry*, 170 struct vm_map_entry*); 171 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*, 172 const struct vm_map_entry*); 173 void uvm_mapent_free_insert(struct vm_map*, 174 struct uvm_addr_state*, struct vm_map_entry*); 175 void uvm_mapent_free_remove(struct vm_map*, 176 struct uvm_addr_state*, struct vm_map_entry*); 177 void uvm_mapent_addr_insert(struct vm_map*, 178 struct vm_map_entry*); 179 void uvm_mapent_addr_remove(struct vm_map*, 180 struct vm_map_entry*); 181 void uvm_map_splitentry(struct vm_map*, 182 struct vm_map_entry*, struct vm_map_entry*, 183 vaddr_t); 184 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 185 186 /* 187 * uvm_vmspace_fork helper functions. 188 */ 189 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 190 vsize_t, vm_prot_t, vm_prot_t, 191 struct vm_map_entry*, struct uvm_map_deadq*, int, 192 int); 193 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t, 194 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*, 195 struct vm_map_entry*, struct uvm_map_deadq*); 196 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 197 struct vm_map*, struct vm_map_entry*, 198 struct uvm_map_deadq*); 199 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 200 struct vm_map*, struct vm_map_entry*, 201 struct uvm_map_deadq*); 202 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 203 struct vm_map*, struct vm_map_entry*, 204 struct uvm_map_deadq*); 205 206 /* 207 * Tree validation. 208 */ 209 #ifdef VMMAP_DEBUG 210 void uvm_tree_assert(struct vm_map*, int, char*, 211 char*, int); 212 #define UVM_ASSERT(map, cond, file, line) \ 213 uvm_tree_assert((map), (cond), #cond, (file), (line)) 214 void uvm_tree_sanity(struct vm_map*, char*, int); 215 void uvm_tree_size_chk(struct vm_map*, char*, int); 216 void vmspace_validate(struct vm_map*); 217 #else 218 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 219 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 220 #define vmspace_validate(_map) do {} while (0) 221 #endif 222 223 /* 224 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 225 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 226 * 227 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 228 * each time. 229 */ 230 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 231 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 232 #define VM_MAP_KSIZE_ALLOCMUL 4 233 234 /* auto-allocate address lower bound */ 235 #define VMMAP_MIN_ADDR PAGE_SIZE 236 237 238 #ifdef DEADBEEF0 239 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0) 240 #else 241 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0) 242 #endif 243 244 #ifdef DEBUG 245 int uvm_map_printlocks = 0; 246 247 #define LPRINTF(_args) \ 248 do { \ 249 if (uvm_map_printlocks) \ 250 printf _args; \ 251 } while (0) 252 #else 253 #define LPRINTF(_args) do {} while (0) 254 #endif 255 256 static struct mutex uvm_kmapent_mtx; 257 static struct timeval uvm_kmapent_last_warn_time; 258 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 259 260 const char vmmapbsy[] = "vmmapbsy"; 261 262 /* 263 * pool for vmspace structures. 264 */ 265 struct pool uvm_vmspace_pool; 266 267 /* 268 * pool for dynamically-allocated map entries. 269 */ 270 struct pool uvm_map_entry_pool; 271 struct pool uvm_map_entry_kmem_pool; 272 273 /* 274 * This global represents the end of the kernel virtual address 275 * space. If we want to exceed this, we must grow the kernel 276 * virtual address space dynamically. 277 * 278 * Note, this variable is locked by kernel_map's lock. 279 */ 280 vaddr_t uvm_maxkaddr; 281 282 /* 283 * Locking predicate. 284 */ 285 #define UVM_MAP_REQ_WRITE(_map) \ 286 do { \ 287 if ((_map)->ref_count > 0) { \ 288 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 289 rw_assert_wrlock(&(_map)->lock); \ 290 else \ 291 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \ 292 } \ 293 } while (0) 294 295 #define vm_map_modflags(map, set, clear) \ 296 do { \ 297 mtx_enter(&(map)->flags_lock); \ 298 (map)->flags = ((map)->flags | (set)) & ~(clear); \ 299 mtx_leave(&(map)->flags_lock); \ 300 } while (0) 301 302 303 /* 304 * Tree describing entries by address. 305 * 306 * Addresses are unique. 307 * Entries with start == end may only exist if they are the first entry 308 * (sorted by address) within a free-memory tree. 309 */ 310 311 static inline int 312 uvm_mapentry_addrcmp(const struct vm_map_entry *e1, 313 const struct vm_map_entry *e2) 314 { 315 return e1->start < e2->start ? -1 : e1->start > e2->start; 316 } 317 318 /* 319 * Copy mapentry. 320 */ 321 static inline void 322 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 323 { 324 caddr_t csrc, cdst; 325 size_t sz; 326 327 csrc = (caddr_t)src; 328 cdst = (caddr_t)dst; 329 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 330 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 331 332 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 333 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 334 memcpy(cdst, csrc, sz); 335 } 336 337 /* 338 * Handle free-list insertion. 339 */ 340 void 341 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 342 struct vm_map_entry *entry) 343 { 344 const struct uvm_addr_functions *fun; 345 #ifdef VMMAP_DEBUG 346 vaddr_t min, max, bound; 347 #endif 348 349 #ifdef VMMAP_DEBUG 350 /* 351 * Boundary check. 352 * Boundaries are folded if they go on the same free list. 353 */ 354 min = VMMAP_FREE_START(entry); 355 max = VMMAP_FREE_END(entry); 356 357 while (min < max) { 358 bound = uvm_map_boundary(map, min, max); 359 KASSERT(uvm_map_uaddr(map, min) == uaddr); 360 min = bound; 361 } 362 #endif 363 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 364 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 365 366 UVM_MAP_REQ_WRITE(map); 367 368 /* Actual insert: forward to uaddr pointer. */ 369 if (uaddr != NULL) { 370 fun = uaddr->uaddr_functions; 371 KDASSERT(fun != NULL); 372 if (fun->uaddr_free_insert != NULL) 373 (*fun->uaddr_free_insert)(map, uaddr, entry); 374 entry->etype |= UVM_ET_FREEMAPPED; 375 } 376 377 /* Update fspace augmentation. */ 378 uvm_map_addr_augment(entry); 379 } 380 381 /* 382 * Handle free-list removal. 383 */ 384 void 385 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 386 struct vm_map_entry *entry) 387 { 388 const struct uvm_addr_functions *fun; 389 390 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 391 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 392 UVM_MAP_REQ_WRITE(map); 393 394 if (uaddr != NULL) { 395 fun = uaddr->uaddr_functions; 396 if (fun->uaddr_free_remove != NULL) 397 (*fun->uaddr_free_remove)(map, uaddr, entry); 398 entry->etype &= ~UVM_ET_FREEMAPPED; 399 } 400 } 401 402 /* 403 * Handle address tree insertion. 404 */ 405 void 406 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 407 { 408 struct vm_map_entry *res; 409 410 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF)) 411 panic("uvm_mapent_addr_insert: entry still in addr list"); 412 KDASSERT(entry->start <= entry->end); 413 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 414 (entry->end & (vaddr_t)PAGE_MASK) == 0); 415 416 TRACEPOINT(uvm, map_insert, 417 entry->start, entry->end, entry->protection, NULL); 418 419 UVM_MAP_REQ_WRITE(map); 420 res = RBT_INSERT(uvm_map_addr, &map->addr, entry); 421 if (res != NULL) { 422 panic("uvm_mapent_addr_insert: map %p entry %p " 423 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 424 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 425 map, entry, 426 entry->start, entry->end, entry->guard, entry->fspace, 427 res, res->start, res->end, res->guard, res->fspace); 428 } 429 } 430 431 /* 432 * Handle address tree removal. 433 */ 434 void 435 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 436 { 437 struct vm_map_entry *res; 438 439 TRACEPOINT(uvm, map_remove, 440 entry->start, entry->end, entry->protection, NULL); 441 442 UVM_MAP_REQ_WRITE(map); 443 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry); 444 if (res != entry) 445 panic("uvm_mapent_addr_remove"); 446 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF); 447 } 448 449 /* 450 * uvm_map_reference: add reference to a map 451 * 452 * => map need not be locked 453 */ 454 void 455 uvm_map_reference(struct vm_map *map) 456 { 457 atomic_inc_int(&map->ref_count); 458 } 459 460 void 461 uvm_map_lock_entry(struct vm_map_entry *entry) 462 { 463 if (entry->aref.ar_amap != NULL) { 464 amap_lock(entry->aref.ar_amap); 465 } 466 if (UVM_ET_ISOBJ(entry)) { 467 rw_enter(entry->object.uvm_obj->vmobjlock, RW_WRITE); 468 } 469 } 470 471 void 472 uvm_map_unlock_entry(struct vm_map_entry *entry) 473 { 474 if (UVM_ET_ISOBJ(entry)) { 475 rw_exit(entry->object.uvm_obj->vmobjlock); 476 } 477 if (entry->aref.ar_amap != NULL) { 478 amap_unlock(entry->aref.ar_amap); 479 } 480 } 481 482 /* 483 * Calculate the dused delta. 484 */ 485 vsize_t 486 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 487 { 488 struct vmspace *vm; 489 vsize_t sz; 490 vaddr_t lmax; 491 vaddr_t stack_begin, stack_end; /* Position of stack. */ 492 493 KASSERT(map->flags & VM_MAP_ISVMSPACE); 494 vm_map_assert_anylock(map); 495 496 vm = (struct vmspace *)map; 497 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 498 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 499 500 sz = 0; 501 while (min != max) { 502 lmax = max; 503 if (min < stack_begin && lmax > stack_begin) 504 lmax = stack_begin; 505 else if (min < stack_end && lmax > stack_end) 506 lmax = stack_end; 507 508 if (min >= stack_begin && min < stack_end) { 509 /* nothing */ 510 } else 511 sz += lmax - min; 512 min = lmax; 513 } 514 515 return sz >> PAGE_SHIFT; 516 } 517 518 /* 519 * Find the entry describing the given address. 520 */ 521 struct vm_map_entry* 522 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 523 { 524 struct vm_map_entry *iter; 525 526 iter = RBT_ROOT(uvm_map_addr, atree); 527 while (iter != NULL) { 528 if (iter->start > addr) 529 iter = RBT_LEFT(uvm_map_addr, iter); 530 else if (VMMAP_FREE_END(iter) <= addr) 531 iter = RBT_RIGHT(uvm_map_addr, iter); 532 else 533 return iter; 534 } 535 return NULL; 536 } 537 538 /* 539 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 540 * 541 * Push dead entries into a linked list. 542 * Since the linked list abuses the address tree for storage, the entry 543 * may not be linked in a map. 544 * 545 * *head must be initialized to NULL before the first call to this macro. 546 * uvm_unmap_detach(*head, 0) will remove dead entries. 547 */ 548 static inline void 549 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 550 { 551 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 552 } 553 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 554 dead_entry_push((_headptr), (_entry)) 555 556 /* 557 * Test if memory starting at addr with sz bytes is free. 558 * 559 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 560 * the space. 561 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 562 */ 563 int 564 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 565 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 566 vaddr_t addr, vsize_t sz) 567 { 568 struct uvm_addr_state *free; 569 struct uvm_map_addr *atree; 570 struct vm_map_entry *i, *i_end; 571 572 if (addr + sz < addr) 573 return 0; 574 575 vm_map_assert_anylock(map); 576 577 /* 578 * Kernel memory above uvm_maxkaddr is considered unavailable. 579 */ 580 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 581 if (addr + sz > uvm_maxkaddr) 582 return 0; 583 } 584 585 atree = &map->addr; 586 587 /* 588 * Fill in first, last, so they point at the entries containing the 589 * first and last address of the range. 590 * Note that if they are not NULL, we don't perform the lookup. 591 */ 592 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 593 if (*start_ptr == NULL) { 594 *start_ptr = uvm_map_entrybyaddr(atree, addr); 595 if (*start_ptr == NULL) 596 return 0; 597 } else 598 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 599 if (*end_ptr == NULL) { 600 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 601 *end_ptr = *start_ptr; 602 else { 603 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 604 if (*end_ptr == NULL) 605 return 0; 606 } 607 } else 608 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 609 610 /* Validation. */ 611 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 612 KDASSERT((*start_ptr)->start <= addr && 613 VMMAP_FREE_END(*start_ptr) > addr && 614 (*end_ptr)->start < addr + sz && 615 VMMAP_FREE_END(*end_ptr) >= addr + sz); 616 617 /* 618 * Check the none of the entries intersects with <addr, addr+sz>. 619 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 620 * considered unavailable unless called by those allocators. 621 */ 622 i = *start_ptr; 623 i_end = RBT_NEXT(uvm_map_addr, *end_ptr); 624 for (; i != i_end; 625 i = RBT_NEXT(uvm_map_addr, i)) { 626 if (i->start != i->end && i->end > addr) 627 return 0; 628 629 /* 630 * uaddr_exe and uaddr_brk_stack may only be used 631 * by these allocators and the NULL uaddr (i.e. no 632 * uaddr). 633 * Reject if this requirement is not met. 634 */ 635 if (uaddr != NULL) { 636 free = uvm_map_uaddr_e(map, i); 637 638 if (uaddr != free && free != NULL && 639 (free == map->uaddr_exe || 640 free == map->uaddr_brk_stack)) 641 return 0; 642 } 643 } 644 645 return -1; 646 } 647 648 /* 649 * Invoke each address selector until an address is found. 650 * Will not invoke uaddr_exe. 651 */ 652 int 653 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 654 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 655 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 656 { 657 struct uvm_addr_state *uaddr; 658 int i; 659 660 /* 661 * Allocation for sz bytes at any address, 662 * using the addr selectors in order. 663 */ 664 for (i = 0; i < nitems(map->uaddr_any); i++) { 665 uaddr = map->uaddr_any[i]; 666 667 if (uvm_addr_invoke(map, uaddr, first, last, 668 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 669 return 0; 670 } 671 672 /* Fall back to brk() and stack() address selectors. */ 673 uaddr = map->uaddr_brk_stack; 674 if (uvm_addr_invoke(map, uaddr, first, last, 675 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 676 return 0; 677 678 return ENOMEM; 679 } 680 681 /* Calculate entry augmentation value. */ 682 vsize_t 683 uvm_map_addr_augment_get(struct vm_map_entry *entry) 684 { 685 vsize_t augment; 686 struct vm_map_entry *left, *right; 687 688 augment = entry->fspace; 689 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL) 690 augment = MAX(augment, left->fspace_augment); 691 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 692 augment = MAX(augment, right->fspace_augment); 693 return augment; 694 } 695 696 /* 697 * Update augmentation data in entry. 698 */ 699 void 700 uvm_map_addr_augment(struct vm_map_entry *entry) 701 { 702 vsize_t augment; 703 704 while (entry != NULL) { 705 /* Calculate value for augmentation. */ 706 augment = uvm_map_addr_augment_get(entry); 707 708 /* 709 * Descend update. 710 * Once we find an entry that already has the correct value, 711 * stop, since it means all its parents will use the correct 712 * value too. 713 */ 714 if (entry->fspace_augment == augment) 715 return; 716 entry->fspace_augment = augment; 717 entry = RBT_PARENT(uvm_map_addr, entry); 718 } 719 } 720 721 /* 722 * uvm_mapanon: establish a valid mapping in map for an anon 723 * 724 * => *addr and sz must be a multiple of PAGE_SIZE. 725 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 726 * => map must be unlocked. 727 * 728 * => align: align vaddr, must be a power-of-2. 729 * Align is only a hint and will be ignored if the alignment fails. 730 */ 731 int 732 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, 733 vsize_t align, unsigned int flags) 734 { 735 struct vm_map_entry *first, *last, *entry, *new; 736 struct uvm_map_deadq dead; 737 vm_prot_t prot; 738 vm_prot_t maxprot; 739 vm_inherit_t inherit; 740 int advice; 741 int error; 742 vaddr_t pmap_align, pmap_offset; 743 vaddr_t hint; 744 745 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE); 746 KASSERT(map != kernel_map); 747 KASSERT((map->flags & UVM_FLAG_HOLE) == 0); 748 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 749 splassert(IPL_NONE); 750 KASSERT((flags & UVM_FLAG_TRYLOCK) == 0); 751 752 /* 753 * We use pmap_align and pmap_offset as alignment and offset variables. 754 * 755 * Because the align parameter takes precedence over pmap prefer, 756 * the pmap_align will need to be set to align, with pmap_offset = 0, 757 * if pmap_prefer will not align. 758 */ 759 pmap_align = MAX(align, PAGE_SIZE); 760 pmap_offset = 0; 761 762 /* Decode parameters. */ 763 prot = UVM_PROTECTION(flags); 764 maxprot = UVM_MAXPROTECTION(flags); 765 advice = UVM_ADVICE(flags); 766 inherit = UVM_INHERIT(flags); 767 error = 0; 768 hint = trunc_page(*addr); 769 TAILQ_INIT(&dead); 770 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 771 KASSERT((align & (align - 1)) == 0); 772 773 /* Check protection. */ 774 if ((prot & maxprot) != prot) 775 return EACCES; 776 777 /* 778 * Before grabbing the lock, allocate a map entry for later 779 * use to ensure we don't wait for memory while holding the 780 * vm_map_lock. 781 */ 782 new = uvm_mapent_alloc(map, flags); 783 if (new == NULL) 784 return ENOMEM; 785 786 vm_map_lock(map); 787 first = last = NULL; 788 if (flags & UVM_FLAG_FIXED) { 789 /* 790 * Fixed location. 791 * 792 * Note: we ignore align, pmap_prefer. 793 * Fill in first, last and *addr. 794 */ 795 KASSERT((*addr & PAGE_MASK) == 0); 796 797 /* Check that the space is available. */ 798 if (flags & UVM_FLAG_UNMAP) { 799 if ((flags & UVM_FLAG_STACK) && 800 !uvm_map_is_stack_remappable(map, *addr, sz, 801 (flags & UVM_FLAG_SIGALTSTACK))) { 802 error = EINVAL; 803 goto unlock; 804 } 805 if (uvm_unmap_remove(map, *addr, *addr + sz, &dead, 806 FALSE, TRUE, 807 (flags & UVM_FLAG_SIGALTSTACK) ? FALSE : TRUE) != 0) { 808 error = EPERM; /* immutable entries found */ 809 goto unlock; 810 } 811 } 812 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 813 error = ENOMEM; 814 goto unlock; 815 } 816 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 817 (align == 0 || (*addr & (align - 1)) == 0) && 818 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 819 /* 820 * Address used as hint. 821 * 822 * Note: we enforce the alignment restriction, 823 * but ignore pmap_prefer. 824 */ 825 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 826 /* Run selection algorithm for executables. */ 827 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 828 addr, sz, pmap_align, pmap_offset, prot, hint); 829 830 if (error != 0) 831 goto unlock; 832 } else { 833 /* Update freelists from vmspace. */ 834 uvm_map_vmspace_update(map, &dead, flags); 835 836 error = uvm_map_findspace(map, &first, &last, addr, sz, 837 pmap_align, pmap_offset, prot, hint); 838 839 if (error != 0) 840 goto unlock; 841 } 842 843 /* Double-check if selected address doesn't cause overflow. */ 844 if (*addr + sz < *addr) { 845 error = ENOMEM; 846 goto unlock; 847 } 848 849 /* If we only want a query, return now. */ 850 if (flags & UVM_FLAG_QUERY) { 851 error = 0; 852 goto unlock; 853 } 854 855 /* 856 * Create new entry. 857 * first and last may be invalidated after this call. 858 */ 859 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 860 new); 861 if (entry == NULL) { 862 error = ENOMEM; 863 goto unlock; 864 } 865 new = NULL; 866 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 867 entry->object.uvm_obj = NULL; 868 entry->offset = 0; 869 entry->protection = prot; 870 entry->max_protection = maxprot; 871 entry->inheritance = inherit; 872 entry->wired_count = 0; 873 entry->advice = advice; 874 if (prot & PROT_WRITE) 875 map->wserial++; 876 if (flags & UVM_FLAG_SYSCALL) { 877 entry->etype |= UVM_ET_SYSCALL; 878 map->wserial++; 879 } 880 if (flags & UVM_FLAG_STACK) { 881 entry->etype |= UVM_ET_STACK; 882 if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP)) 883 map->sserial++; 884 } 885 if (flags & UVM_FLAG_COPYONW) { 886 entry->etype |= UVM_ET_COPYONWRITE; 887 if ((flags & UVM_FLAG_OVERLAY) == 0) 888 entry->etype |= UVM_ET_NEEDSCOPY; 889 } 890 if (flags & UVM_FLAG_CONCEAL) 891 entry->etype |= UVM_ET_CONCEAL; 892 if (flags & UVM_FLAG_OVERLAY) { 893 entry->aref.ar_pageoff = 0; 894 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 895 } 896 897 /* Update map and process statistics. */ 898 map->size += sz; 899 if (prot != PROT_NONE) { 900 ((struct vmspace *)map)->vm_dused += 901 uvmspace_dused(map, *addr, *addr + sz); 902 } 903 904 unlock: 905 vm_map_unlock(map); 906 907 /* 908 * Remove dead entries. 909 * 910 * Dead entries may be the result of merging. 911 * uvm_map_mkentry may also create dead entries, when it attempts to 912 * destroy free-space entries. 913 */ 914 uvm_unmap_detach(&dead, 0); 915 916 if (new) 917 uvm_mapent_free(new); 918 return error; 919 } 920 921 /* 922 * uvm_map: establish a valid mapping in map 923 * 924 * => *addr and sz must be a multiple of PAGE_SIZE. 925 * => map must be unlocked. 926 * => <uobj,uoffset> value meanings (4 cases): 927 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 928 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 929 * [3] <uobj,uoffset> == normal mapping 930 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 931 * 932 * case [4] is for kernel mappings where we don't know the offset until 933 * we've found a virtual address. note that kernel object offsets are 934 * always relative to vm_map_min(kernel_map). 935 * 936 * => align: align vaddr, must be a power-of-2. 937 * Align is only a hint and will be ignored if the alignment fails. 938 */ 939 int 940 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 941 struct uvm_object *uobj, voff_t uoffset, 942 vsize_t align, unsigned int flags) 943 { 944 struct vm_map_entry *first, *last, *entry, *new; 945 struct uvm_map_deadq dead; 946 vm_prot_t prot; 947 vm_prot_t maxprot; 948 vm_inherit_t inherit; 949 int advice; 950 int error; 951 vaddr_t pmap_align, pmap_offset; 952 vaddr_t hint; 953 954 if ((map->flags & VM_MAP_INTRSAFE) == 0) 955 splassert(IPL_NONE); 956 else 957 splassert(IPL_VM); 958 959 /* 960 * We use pmap_align and pmap_offset as alignment and offset variables. 961 * 962 * Because the align parameter takes precedence over pmap prefer, 963 * the pmap_align will need to be set to align, with pmap_offset = 0, 964 * if pmap_prefer will not align. 965 */ 966 if (uoffset == UVM_UNKNOWN_OFFSET) { 967 pmap_align = MAX(align, PAGE_SIZE); 968 pmap_offset = 0; 969 } else { 970 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 971 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 972 973 if (align == 0 || 974 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 975 /* pmap_offset satisfies align, no change. */ 976 } else { 977 /* Align takes precedence over pmap prefer. */ 978 pmap_align = align; 979 pmap_offset = 0; 980 } 981 } 982 983 /* Decode parameters. */ 984 prot = UVM_PROTECTION(flags); 985 maxprot = UVM_MAXPROTECTION(flags); 986 advice = UVM_ADVICE(flags); 987 inherit = UVM_INHERIT(flags); 988 error = 0; 989 hint = trunc_page(*addr); 990 TAILQ_INIT(&dead); 991 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 992 KASSERT((align & (align - 1)) == 0); 993 994 /* Holes are incompatible with other types of mappings. */ 995 if (flags & UVM_FLAG_HOLE) { 996 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 997 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 998 } 999 1000 /* Unset hint for kernel_map non-fixed allocations. */ 1001 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1002 hint = 0; 1003 1004 /* Check protection. */ 1005 if ((prot & maxprot) != prot) 1006 return EACCES; 1007 1008 if (map == kernel_map && 1009 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1010 panic("uvm_map: kernel map W^X violation requested"); 1011 1012 /* 1013 * Before grabbing the lock, allocate a map entry for later 1014 * use to ensure we don't wait for memory while holding the 1015 * vm_map_lock. 1016 */ 1017 new = uvm_mapent_alloc(map, flags); 1018 if (new == NULL) 1019 return ENOMEM; 1020 1021 if (flags & UVM_FLAG_TRYLOCK) { 1022 if (vm_map_lock_try(map) == FALSE) { 1023 error = EFAULT; 1024 goto out; 1025 } 1026 } else { 1027 vm_map_lock(map); 1028 } 1029 1030 first = last = NULL; 1031 if (flags & UVM_FLAG_FIXED) { 1032 /* 1033 * Fixed location. 1034 * 1035 * Note: we ignore align, pmap_prefer. 1036 * Fill in first, last and *addr. 1037 */ 1038 KASSERT((*addr & PAGE_MASK) == 0); 1039 1040 /* 1041 * Grow pmap to include allocated address. 1042 * If the growth fails, the allocation will fail too. 1043 */ 1044 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1045 uvm_maxkaddr < (*addr + sz)) { 1046 uvm_map_kmem_grow(map, &dead, 1047 *addr + sz - uvm_maxkaddr, flags); 1048 } 1049 1050 /* Check that the space is available. */ 1051 if (flags & UVM_FLAG_UNMAP) { 1052 if (uvm_unmap_remove(map, *addr, *addr + sz, &dead, 1053 FALSE, TRUE, TRUE) != 0) { 1054 error = EPERM; /* immutable entries found */ 1055 goto unlock; 1056 } 1057 } 1058 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1059 error = ENOMEM; 1060 goto unlock; 1061 } 1062 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1063 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1064 (align == 0 || (*addr & (align - 1)) == 0) && 1065 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1066 /* 1067 * Address used as hint. 1068 * 1069 * Note: we enforce the alignment restriction, 1070 * but ignore pmap_prefer. 1071 */ 1072 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1073 /* Run selection algorithm for executables. */ 1074 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1075 addr, sz, pmap_align, pmap_offset, prot, hint); 1076 1077 /* Grow kernel memory and try again. */ 1078 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1079 uvm_map_kmem_grow(map, &dead, sz, flags); 1080 1081 error = uvm_addr_invoke(map, map->uaddr_exe, 1082 &first, &last, addr, sz, 1083 pmap_align, pmap_offset, prot, hint); 1084 } 1085 1086 if (error != 0) 1087 goto unlock; 1088 } else { 1089 /* Update freelists from vmspace. */ 1090 if (map->flags & VM_MAP_ISVMSPACE) 1091 uvm_map_vmspace_update(map, &dead, flags); 1092 1093 error = uvm_map_findspace(map, &first, &last, addr, sz, 1094 pmap_align, pmap_offset, prot, hint); 1095 1096 /* Grow kernel memory and try again. */ 1097 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1098 uvm_map_kmem_grow(map, &dead, sz, flags); 1099 1100 error = uvm_map_findspace(map, &first, &last, addr, sz, 1101 pmap_align, pmap_offset, prot, hint); 1102 } 1103 1104 if (error != 0) 1105 goto unlock; 1106 } 1107 1108 /* Double-check if selected address doesn't cause overflow. */ 1109 if (*addr + sz < *addr) { 1110 error = ENOMEM; 1111 goto unlock; 1112 } 1113 1114 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1115 uvm_maxkaddr >= *addr + sz); 1116 1117 /* If we only want a query, return now. */ 1118 if (flags & UVM_FLAG_QUERY) { 1119 error = 0; 1120 goto unlock; 1121 } 1122 1123 if (uobj == NULL) 1124 uoffset = 0; 1125 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1126 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1127 uoffset = *addr - vm_map_min(kernel_map); 1128 } 1129 1130 /* 1131 * Create new entry. 1132 * first and last may be invalidated after this call. 1133 */ 1134 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1135 new); 1136 if (entry == NULL) { 1137 error = ENOMEM; 1138 goto unlock; 1139 } 1140 new = NULL; 1141 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1142 entry->object.uvm_obj = uobj; 1143 entry->offset = uoffset; 1144 entry->protection = prot; 1145 entry->max_protection = maxprot; 1146 entry->inheritance = inherit; 1147 entry->wired_count = 0; 1148 entry->advice = advice; 1149 if (prot & PROT_WRITE) 1150 map->wserial++; 1151 if (flags & UVM_FLAG_SYSCALL) { 1152 entry->etype |= UVM_ET_SYSCALL; 1153 map->wserial++; 1154 } 1155 if (flags & UVM_FLAG_STACK) { 1156 entry->etype |= UVM_ET_STACK; 1157 if (flags & UVM_FLAG_UNMAP) 1158 map->sserial++; 1159 } 1160 if (uobj) 1161 entry->etype |= UVM_ET_OBJ; 1162 else if (flags & UVM_FLAG_HOLE) 1163 entry->etype |= UVM_ET_HOLE; 1164 if (flags & UVM_FLAG_NOFAULT) 1165 entry->etype |= UVM_ET_NOFAULT; 1166 if (flags & UVM_FLAG_WC) 1167 entry->etype |= UVM_ET_WC; 1168 if (flags & UVM_FLAG_COPYONW) { 1169 entry->etype |= UVM_ET_COPYONWRITE; 1170 if ((flags & UVM_FLAG_OVERLAY) == 0) 1171 entry->etype |= UVM_ET_NEEDSCOPY; 1172 } 1173 if (flags & UVM_FLAG_CONCEAL) 1174 entry->etype |= UVM_ET_CONCEAL; 1175 if (flags & UVM_FLAG_OVERLAY) { 1176 entry->aref.ar_pageoff = 0; 1177 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1178 } 1179 1180 /* Update map and process statistics. */ 1181 if (!(flags & UVM_FLAG_HOLE)) { 1182 map->size += sz; 1183 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL && 1184 prot != PROT_NONE) { 1185 ((struct vmspace *)map)->vm_dused += 1186 uvmspace_dused(map, *addr, *addr + sz); 1187 } 1188 } 1189 1190 /* 1191 * Try to merge entry. 1192 * 1193 * Userland allocations are kept separated most of the time. 1194 * Forego the effort of merging what most of the time can't be merged 1195 * and only try the merge if it concerns a kernel entry. 1196 */ 1197 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1198 (map->flags & VM_MAP_ISVMSPACE) == 0) 1199 uvm_mapent_tryjoin(map, entry, &dead); 1200 1201 unlock: 1202 vm_map_unlock(map); 1203 1204 /* 1205 * Remove dead entries. 1206 * 1207 * Dead entries may be the result of merging. 1208 * uvm_map_mkentry may also create dead entries, when it attempts to 1209 * destroy free-space entries. 1210 */ 1211 if (map->flags & VM_MAP_INTRSAFE) 1212 uvm_unmap_detach_intrsafe(&dead); 1213 else 1214 uvm_unmap_detach(&dead, 0); 1215 out: 1216 if (new) 1217 uvm_mapent_free(new); 1218 return error; 1219 } 1220 1221 /* 1222 * True iff e1 and e2 can be joined together. 1223 */ 1224 int 1225 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1226 struct vm_map_entry *e2) 1227 { 1228 KDASSERT(e1 != NULL && e2 != NULL); 1229 1230 /* Must be the same entry type and not have free memory between. */ 1231 if (e1->etype != e2->etype || e1->end != e2->start) 1232 return 0; 1233 1234 /* Submaps are never joined. */ 1235 if (UVM_ET_ISSUBMAP(e1)) 1236 return 0; 1237 1238 /* Never merge wired memory. */ 1239 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1240 return 0; 1241 1242 /* Protection, inheritance and advice must be equal. */ 1243 if (e1->protection != e2->protection || 1244 e1->max_protection != e2->max_protection || 1245 e1->inheritance != e2->inheritance || 1246 e1->advice != e2->advice) 1247 return 0; 1248 1249 /* If uvm_object: object itself and offsets within object must match. */ 1250 if (UVM_ET_ISOBJ(e1)) { 1251 if (e1->object.uvm_obj != e2->object.uvm_obj) 1252 return 0; 1253 if (e1->offset + (e1->end - e1->start) != e2->offset) 1254 return 0; 1255 } 1256 1257 /* 1258 * Cannot join shared amaps. 1259 * Note: no need to lock amap to look at refs, since we don't care 1260 * about its exact value. 1261 * If it is 1 (i.e. we have the only reference) it will stay there. 1262 */ 1263 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1264 return 0; 1265 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1266 return 0; 1267 1268 /* Apparently, e1 and e2 match. */ 1269 return 1; 1270 } 1271 1272 /* 1273 * Join support function. 1274 * 1275 * Returns the merged entry on success. 1276 * Returns NULL if the merge failed. 1277 */ 1278 struct vm_map_entry* 1279 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1280 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1281 { 1282 struct uvm_addr_state *free; 1283 1284 /* 1285 * Merging is not supported for map entries that 1286 * contain an amap in e1. This should never happen 1287 * anyway, because only kernel entries are merged. 1288 * These do not contain amaps. 1289 * e2 contains no real information in its amap, 1290 * so it can be erased immediately. 1291 */ 1292 KASSERT(e1->aref.ar_amap == NULL); 1293 1294 /* 1295 * Don't drop obj reference: 1296 * uvm_unmap_detach will do this for us. 1297 */ 1298 free = uvm_map_uaddr_e(map, e1); 1299 uvm_mapent_free_remove(map, free, e1); 1300 1301 free = uvm_map_uaddr_e(map, e2); 1302 uvm_mapent_free_remove(map, free, e2); 1303 uvm_mapent_addr_remove(map, e2); 1304 e1->end = e2->end; 1305 e1->guard = e2->guard; 1306 e1->fspace = e2->fspace; 1307 uvm_mapent_free_insert(map, free, e1); 1308 1309 DEAD_ENTRY_PUSH(dead, e2); 1310 return e1; 1311 } 1312 1313 /* 1314 * Attempt forward and backward joining of entry. 1315 * 1316 * Returns entry after joins. 1317 * We are guaranteed that the amap of entry is either non-existent or 1318 * has never been used. 1319 */ 1320 struct vm_map_entry* 1321 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1322 struct uvm_map_deadq *dead) 1323 { 1324 struct vm_map_entry *other; 1325 struct vm_map_entry *merged; 1326 1327 /* Merge with previous entry. */ 1328 other = RBT_PREV(uvm_map_addr, entry); 1329 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1330 merged = uvm_mapent_merge(map, other, entry, dead); 1331 if (merged) 1332 entry = merged; 1333 } 1334 1335 /* 1336 * Merge with next entry. 1337 * 1338 * Because amap can only extend forward and the next entry 1339 * probably contains sensible info, only perform forward merging 1340 * in the absence of an amap. 1341 */ 1342 other = RBT_NEXT(uvm_map_addr, entry); 1343 if (other && entry->aref.ar_amap == NULL && 1344 other->aref.ar_amap == NULL && 1345 uvm_mapent_isjoinable(map, entry, other)) { 1346 merged = uvm_mapent_merge(map, entry, other, dead); 1347 if (merged) 1348 entry = merged; 1349 } 1350 1351 return entry; 1352 } 1353 1354 /* 1355 * Kill entries that are no longer in a map. 1356 */ 1357 void 1358 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1359 { 1360 struct vm_map_entry *entry, *tmp; 1361 int waitok = flags & UVM_PLA_WAITOK; 1362 1363 TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) { 1364 /* Drop reference to amap, if we've got one. */ 1365 if (entry->aref.ar_amap) 1366 amap_unref(entry->aref.ar_amap, 1367 entry->aref.ar_pageoff, 1368 atop(entry->end - entry->start), 1369 flags & AMAP_REFALL); 1370 1371 /* Skip entries for which we have to grab the kernel lock. */ 1372 if (UVM_ET_ISSUBMAP(entry) || UVM_ET_ISOBJ(entry)) 1373 continue; 1374 1375 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1376 uvm_mapent_free(entry); 1377 } 1378 1379 if (TAILQ_EMPTY(deadq)) 1380 return; 1381 1382 KERNEL_LOCK(); 1383 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1384 if (waitok) 1385 uvm_pause(); 1386 /* Drop reference to our backing object, if we've got one. */ 1387 if (UVM_ET_ISSUBMAP(entry)) { 1388 /* ... unlikely to happen, but play it safe */ 1389 uvm_map_deallocate(entry->object.sub_map); 1390 } else if (UVM_ET_ISOBJ(entry) && 1391 entry->object.uvm_obj->pgops->pgo_detach) { 1392 entry->object.uvm_obj->pgops->pgo_detach( 1393 entry->object.uvm_obj); 1394 } 1395 1396 /* Step to next. */ 1397 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1398 uvm_mapent_free(entry); 1399 } 1400 KERNEL_UNLOCK(); 1401 } 1402 1403 void 1404 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq) 1405 { 1406 struct vm_map_entry *entry; 1407 1408 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1409 KASSERT(entry->aref.ar_amap == NULL); 1410 KASSERT(!UVM_ET_ISSUBMAP(entry)); 1411 KASSERT(!UVM_ET_ISOBJ(entry)); 1412 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1413 uvm_mapent_free(entry); 1414 } 1415 } 1416 1417 /* 1418 * Create and insert new entry. 1419 * 1420 * Returned entry contains new addresses and is inserted properly in the tree. 1421 * first and last are (probably) no longer valid. 1422 */ 1423 struct vm_map_entry* 1424 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1425 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1426 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1427 { 1428 struct vm_map_entry *entry, *prev; 1429 struct uvm_addr_state *free; 1430 vaddr_t min, max; /* free space boundaries for new entry */ 1431 1432 KDASSERT(map != NULL); 1433 KDASSERT(first != NULL); 1434 KDASSERT(last != NULL); 1435 KDASSERT(dead != NULL); 1436 KDASSERT(sz > 0); 1437 KDASSERT(addr + sz > addr); 1438 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1439 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1440 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1441 uvm_tree_sanity(map, __FILE__, __LINE__); 1442 1443 min = addr + sz; 1444 max = VMMAP_FREE_END(last); 1445 1446 /* Initialize new entry. */ 1447 if (new == NULL) 1448 entry = uvm_mapent_alloc(map, flags); 1449 else 1450 entry = new; 1451 if (entry == NULL) 1452 return NULL; 1453 entry->offset = 0; 1454 entry->etype = 0; 1455 entry->wired_count = 0; 1456 entry->aref.ar_pageoff = 0; 1457 entry->aref.ar_amap = NULL; 1458 1459 entry->start = addr; 1460 entry->end = min; 1461 entry->guard = 0; 1462 entry->fspace = 0; 1463 1464 vm_map_assert_wrlock(map); 1465 1466 /* Reset free space in first. */ 1467 free = uvm_map_uaddr_e(map, first); 1468 uvm_mapent_free_remove(map, free, first); 1469 first->guard = 0; 1470 first->fspace = 0; 1471 1472 /* 1473 * Remove all entries that are fully replaced. 1474 * We are iterating using last in reverse order. 1475 */ 1476 for (; first != last; last = prev) { 1477 prev = RBT_PREV(uvm_map_addr, last); 1478 1479 KDASSERT(last->start == last->end); 1480 free = uvm_map_uaddr_e(map, last); 1481 uvm_mapent_free_remove(map, free, last); 1482 uvm_mapent_addr_remove(map, last); 1483 DEAD_ENTRY_PUSH(dead, last); 1484 } 1485 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1486 if (first->start == addr) { 1487 uvm_mapent_addr_remove(map, first); 1488 DEAD_ENTRY_PUSH(dead, first); 1489 } else { 1490 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1491 addr, flags); 1492 } 1493 1494 /* Finally, link in entry. */ 1495 uvm_mapent_addr_insert(map, entry); 1496 uvm_map_fix_space(map, entry, min, max, flags); 1497 1498 uvm_tree_sanity(map, __FILE__, __LINE__); 1499 return entry; 1500 } 1501 1502 1503 /* 1504 * uvm_mapent_alloc: allocate a map entry 1505 */ 1506 struct vm_map_entry * 1507 uvm_mapent_alloc(struct vm_map *map, int flags) 1508 { 1509 struct vm_map_entry *me, *ne; 1510 int pool_flags; 1511 int i; 1512 1513 pool_flags = PR_WAITOK; 1514 if (flags & UVM_FLAG_TRYLOCK) 1515 pool_flags = PR_NOWAIT; 1516 1517 if (map->flags & VM_MAP_INTRSAFE || cold) { 1518 mtx_enter(&uvm_kmapent_mtx); 1519 if (SLIST_EMPTY(&uvm.kentry_free)) { 1520 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1521 &kd_nowait); 1522 if (ne == NULL) 1523 panic("uvm_mapent_alloc: cannot allocate map " 1524 "entry"); 1525 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) { 1526 SLIST_INSERT_HEAD(&uvm.kentry_free, 1527 &ne[i], daddrs.addr_kentry); 1528 } 1529 if (ratecheck(&uvm_kmapent_last_warn_time, 1530 &uvm_kmapent_warn_rate)) 1531 printf("uvm_mapent_alloc: out of static " 1532 "map entries\n"); 1533 } 1534 me = SLIST_FIRST(&uvm.kentry_free); 1535 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry); 1536 uvmexp.kmapent++; 1537 mtx_leave(&uvm_kmapent_mtx); 1538 me->flags = UVM_MAP_STATIC; 1539 } else if (map == kernel_map) { 1540 splassert(IPL_NONE); 1541 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1542 if (me == NULL) 1543 goto out; 1544 me->flags = UVM_MAP_KMEM; 1545 } else { 1546 splassert(IPL_NONE); 1547 me = pool_get(&uvm_map_entry_pool, pool_flags); 1548 if (me == NULL) 1549 goto out; 1550 me->flags = 0; 1551 } 1552 1553 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF); 1554 out: 1555 return me; 1556 } 1557 1558 /* 1559 * uvm_mapent_free: free map entry 1560 * 1561 * => XXX: static pool for kernel map? 1562 */ 1563 void 1564 uvm_mapent_free(struct vm_map_entry *me) 1565 { 1566 if (me->flags & UVM_MAP_STATIC) { 1567 mtx_enter(&uvm_kmapent_mtx); 1568 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry); 1569 uvmexp.kmapent--; 1570 mtx_leave(&uvm_kmapent_mtx); 1571 } else if (me->flags & UVM_MAP_KMEM) { 1572 splassert(IPL_NONE); 1573 pool_put(&uvm_map_entry_kmem_pool, me); 1574 } else { 1575 splassert(IPL_NONE); 1576 pool_put(&uvm_map_entry_pool, me); 1577 } 1578 } 1579 1580 /* 1581 * uvm_map_lookup_entry: find map entry at or before an address. 1582 * 1583 * => map must at least be read-locked by caller 1584 * => entry is returned in "entry" 1585 * => return value is true if address is in the returned entry 1586 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1587 * returned for those mappings. 1588 */ 1589 boolean_t 1590 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1591 struct vm_map_entry **entry) 1592 { 1593 vm_map_assert_anylock(map); 1594 1595 *entry = uvm_map_entrybyaddr(&map->addr, address); 1596 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1597 (*entry)->start <= address && (*entry)->end > address; 1598 } 1599 1600 /* 1601 * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet 1602 * grown -- then uvm_map_check_region_range() should not cache the entry 1603 * because growth won't be seen. 1604 */ 1605 int 1606 uvm_map_inentry_sp(vm_map_entry_t entry) 1607 { 1608 if ((entry->etype & UVM_ET_STACK) == 0) { 1609 if (entry->protection == PROT_NONE) 1610 return (-1); /* don't update range */ 1611 return (0); 1612 } 1613 return (1); 1614 } 1615 1616 /* 1617 * The system call must not come from a writeable entry, W^X is violated. 1618 * (Would be nice if we can spot aliasing, which is also kind of bad) 1619 * 1620 * The system call must come from an syscall-labeled entry (which are 1621 * the text regions of the main program, sigtramp, ld.so, or libc). 1622 */ 1623 int 1624 uvm_map_inentry_pc(vm_map_entry_t entry) 1625 { 1626 if (entry->protection & PROT_WRITE) 1627 return (0); /* not permitted */ 1628 if ((entry->etype & UVM_ET_SYSCALL) == 0) 1629 return (0); /* not permitted */ 1630 return (1); 1631 } 1632 1633 int 1634 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie) 1635 { 1636 return (serial != ie->ie_serial || ie->ie_start == 0 || 1637 addr < ie->ie_start || addr >= ie->ie_end); 1638 } 1639 1640 /* 1641 * Inside a vm_map find the reg address and verify it via function. 1642 * Remember low and high addresses of region if valid and return TRUE, 1643 * else return FALSE. 1644 */ 1645 boolean_t 1646 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1647 int (*fn)(vm_map_entry_t), u_long serial) 1648 { 1649 vm_map_t map = &p->p_vmspace->vm_map; 1650 vm_map_entry_t entry; 1651 int ret; 1652 1653 if (addr < map->min_offset || addr >= map->max_offset) 1654 return (FALSE); 1655 1656 /* lock map */ 1657 vm_map_lock_read(map); 1658 1659 /* lookup */ 1660 if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) { 1661 vm_map_unlock_read(map); 1662 return (FALSE); 1663 } 1664 1665 ret = (*fn)(entry); 1666 if (ret == 0) { 1667 vm_map_unlock_read(map); 1668 return (FALSE); 1669 } else if (ret == 1) { 1670 ie->ie_start = entry->start; 1671 ie->ie_end = entry->end; 1672 ie->ie_serial = serial; 1673 } else { 1674 /* do not update, re-check later */ 1675 } 1676 vm_map_unlock_read(map); 1677 return (TRUE); 1678 } 1679 1680 boolean_t 1681 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1682 const char *fmt, int (*fn)(vm_map_entry_t), u_long serial) 1683 { 1684 union sigval sv; 1685 boolean_t ok = TRUE; 1686 1687 if (uvm_map_inentry_recheck(serial, addr, ie)) { 1688 ok = uvm_map_inentry_fix(p, ie, addr, fn, serial); 1689 if (!ok) { 1690 KERNEL_LOCK(); 1691 printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid, 1692 addr, ie->ie_start, ie->ie_end-1); 1693 p->p_p->ps_acflag |= AMAP; 1694 sv.sival_ptr = (void *)PROC_PC(p); 1695 trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv); 1696 KERNEL_UNLOCK(); 1697 } 1698 } 1699 return (ok); 1700 } 1701 1702 /* 1703 * Check whether the given address range can be converted to a MAP_STACK 1704 * mapping. 1705 * 1706 * Must be called with map locked. 1707 */ 1708 boolean_t 1709 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz, 1710 int sigaltstack_check) 1711 { 1712 vaddr_t end = addr + sz; 1713 struct vm_map_entry *first, *iter, *prev = NULL; 1714 1715 vm_map_assert_anylock(map); 1716 1717 if (!uvm_map_lookup_entry(map, addr, &first)) { 1718 printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n", 1719 addr, end, map); 1720 return FALSE; 1721 } 1722 1723 /* 1724 * Check that the address range exists and is contiguous. 1725 */ 1726 for (iter = first; iter != NULL && iter->start < end; 1727 prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) { 1728 /* 1729 * Make sure that we do not have holes in the range. 1730 */ 1731 #if 0 1732 if (prev != NULL) { 1733 printf("prev->start 0x%lx, prev->end 0x%lx, " 1734 "iter->start 0x%lx, iter->end 0x%lx\n", 1735 prev->start, prev->end, iter->start, iter->end); 1736 } 1737 #endif 1738 1739 if (prev != NULL && prev->end != iter->start) { 1740 printf("map stack 0x%lx-0x%lx of map %p failed: " 1741 "hole in range\n", addr, end, map); 1742 return FALSE; 1743 } 1744 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) { 1745 printf("map stack 0x%lx-0x%lx of map %p failed: " 1746 "hole in range\n", addr, end, map); 1747 return FALSE; 1748 } 1749 if (sigaltstack_check) { 1750 if ((iter->etype & UVM_ET_SYSCALL)) 1751 return FALSE; 1752 if (iter->protection != (PROT_READ | PROT_WRITE)) 1753 return FALSE; 1754 } 1755 } 1756 1757 return TRUE; 1758 } 1759 1760 /* 1761 * Remap the middle-pages of an existing mapping as a stack range. 1762 * If there exists a previous contiguous mapping with the given range 1763 * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the 1764 * mapping is dropped, and a new anon mapping is created and marked as 1765 * a stack. 1766 * 1767 * Must be called with map unlocked. 1768 */ 1769 int 1770 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz) 1771 { 1772 vm_map_t map = &p->p_vmspace->vm_map; 1773 vaddr_t start, end; 1774 int error; 1775 int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, 1776 PROT_READ | PROT_WRITE | PROT_EXEC, 1777 MAP_INHERIT_COPY, MADV_NORMAL, 1778 UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP | 1779 UVM_FLAG_COPYONW | UVM_FLAG_SIGALTSTACK); 1780 1781 start = round_page(addr); 1782 end = trunc_page(addr + sz); 1783 #ifdef MACHINE_STACK_GROWS_UP 1784 if (end == addr + sz) 1785 end -= PAGE_SIZE; 1786 #else 1787 if (start == addr) 1788 start += PAGE_SIZE; 1789 #endif 1790 1791 if (start < map->min_offset || end >= map->max_offset || end < start) 1792 return EINVAL; 1793 1794 /* 1795 * UVM_FLAG_SIGALTSTACK indicates that immutable may be bypassed, 1796 * but the range is checked that it is contiguous, is not a syscall 1797 * mapping, and protection RW. Then, a new mapping (all zero) is 1798 * placed upon the region, which prevents an attacker from pivoting 1799 * into pre-placed MAP_STACK space. 1800 */ 1801 error = uvm_mapanon(map, &start, end - start, 0, flags); 1802 if (error != 0) 1803 printf("map stack for pid %d failed\n", p->p_p->ps_pid); 1804 1805 return error; 1806 } 1807 1808 /* 1809 * uvm_map_pie: return a random load address for a PIE executable 1810 * properly aligned. 1811 */ 1812 #ifndef VM_PIE_MAX_ADDR 1813 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1814 #endif 1815 1816 #ifndef VM_PIE_MIN_ADDR 1817 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1818 #endif 1819 1820 #ifndef VM_PIE_MIN_ALIGN 1821 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1822 #endif 1823 1824 vaddr_t 1825 uvm_map_pie(vaddr_t align) 1826 { 1827 vaddr_t addr, space, min; 1828 1829 align = MAX(align, VM_PIE_MIN_ALIGN); 1830 1831 /* round up to next alignment */ 1832 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1833 1834 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 1835 return (align); 1836 1837 space = (VM_PIE_MAX_ADDR - min) / align; 1838 space = MIN(space, (u_int32_t)-1); 1839 1840 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 1841 addr += min; 1842 1843 return (addr); 1844 } 1845 1846 void 1847 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 1848 { 1849 struct uvm_map_deadq dead; 1850 1851 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 1852 (end & (vaddr_t)PAGE_MASK) == 0); 1853 TAILQ_INIT(&dead); 1854 vm_map_lock(map); 1855 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE, FALSE); 1856 vm_map_unlock(map); 1857 1858 if (map->flags & VM_MAP_INTRSAFE) 1859 uvm_unmap_detach_intrsafe(&dead); 1860 else 1861 uvm_unmap_detach(&dead, 0); 1862 } 1863 1864 /* 1865 * Mark entry as free. 1866 * 1867 * entry will be put on the dead list. 1868 * The free space will be merged into the previous or a new entry, 1869 * unless markfree is false. 1870 */ 1871 void 1872 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 1873 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 1874 boolean_t markfree) 1875 { 1876 struct uvm_addr_state *free; 1877 struct vm_map_entry *prev; 1878 vaddr_t addr; /* Start of freed range. */ 1879 vaddr_t end; /* End of freed range. */ 1880 1881 UVM_MAP_REQ_WRITE(map); 1882 1883 prev = *prev_ptr; 1884 if (prev == entry) 1885 *prev_ptr = prev = NULL; 1886 1887 if (prev == NULL || 1888 VMMAP_FREE_END(prev) != entry->start) 1889 prev = RBT_PREV(uvm_map_addr, entry); 1890 1891 /* Entry is describing only free memory and has nothing to drain into. */ 1892 if (prev == NULL && entry->start == entry->end && markfree) { 1893 *prev_ptr = entry; 1894 return; 1895 } 1896 1897 addr = entry->start; 1898 end = VMMAP_FREE_END(entry); 1899 free = uvm_map_uaddr_e(map, entry); 1900 uvm_mapent_free_remove(map, free, entry); 1901 uvm_mapent_addr_remove(map, entry); 1902 DEAD_ENTRY_PUSH(dead, entry); 1903 1904 if (markfree) { 1905 if (prev) { 1906 free = uvm_map_uaddr_e(map, prev); 1907 uvm_mapent_free_remove(map, free, prev); 1908 } 1909 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 1910 } 1911 } 1912 1913 /* 1914 * Unwire and release referenced amap and object from map entry. 1915 */ 1916 void 1917 uvm_unmap_kill_entry_withlock(struct vm_map *map, struct vm_map_entry *entry, 1918 int needlock) 1919 { 1920 /* Unwire removed map entry. */ 1921 if (VM_MAPENT_ISWIRED(entry)) { 1922 KERNEL_LOCK(); 1923 entry->wired_count = 0; 1924 uvm_fault_unwire_locked(map, entry->start, entry->end); 1925 KERNEL_UNLOCK(); 1926 } 1927 1928 if (needlock) 1929 uvm_map_lock_entry(entry); 1930 1931 /* Entry-type specific code. */ 1932 if (UVM_ET_ISHOLE(entry)) { 1933 /* Nothing to be done for holes. */ 1934 } else if (map->flags & VM_MAP_INTRSAFE) { 1935 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1936 1937 uvm_km_pgremove_intrsafe(entry->start, entry->end); 1938 } else if (UVM_ET_ISOBJ(entry) && 1939 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 1940 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1941 /* 1942 * Note: kernel object mappings are currently used in 1943 * two ways: 1944 * [1] "normal" mappings of pages in the kernel object 1945 * [2] uvm_km_valloc'd allocations in which we 1946 * pmap_enter in some non-kernel-object page 1947 * (e.g. vmapbuf). 1948 * 1949 * for case [1], we need to remove the mapping from 1950 * the pmap and then remove the page from the kernel 1951 * object (because, once pages in a kernel object are 1952 * unmapped they are no longer needed, unlike, say, 1953 * a vnode where you might want the data to persist 1954 * until flushed out of a queue). 1955 * 1956 * for case [2], we need to remove the mapping from 1957 * the pmap. there shouldn't be any pages at the 1958 * specified offset in the kernel object [but it 1959 * doesn't hurt to call uvm_km_pgremove just to be 1960 * safe?] 1961 * 1962 * uvm_km_pgremove currently does the following: 1963 * for pages in the kernel object range: 1964 * - drops the swap slot 1965 * - uvm_pagefree the page 1966 * 1967 * note there is version of uvm_km_pgremove() that 1968 * is used for "intrsafe" objects. 1969 */ 1970 /* 1971 * remove mappings from pmap and drop the pages 1972 * from the object. offsets are always relative 1973 * to vm_map_min(kernel_map). 1974 */ 1975 uvm_km_pgremove(entry->object.uvm_obj, entry->start, 1976 entry->end); 1977 } else { 1978 /* remove mappings the standard way. */ 1979 pmap_remove(map->pmap, entry->start, entry->end); 1980 } 1981 1982 if (needlock) 1983 uvm_map_unlock_entry(entry); 1984 } 1985 1986 void 1987 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 1988 { 1989 uvm_unmap_kill_entry_withlock(map, entry, 0); 1990 } 1991 1992 /* 1993 * Remove all entries from start to end. 1994 * 1995 * If remove_holes, then remove ET_HOLE entries as well. 1996 * If markfree, entry will be properly marked free, otherwise, no replacement 1997 * entry will be put in the tree (corrupting the tree). 1998 */ 1999 int 2000 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2001 struct uvm_map_deadq *dead, boolean_t remove_holes, 2002 boolean_t markfree, boolean_t checkimmutable) 2003 { 2004 struct vm_map_entry *prev_hint, *next, *entry; 2005 2006 start = MAX(start, map->min_offset); 2007 end = MIN(end, map->max_offset); 2008 if (start >= end) 2009 return 0; 2010 2011 vm_map_assert_wrlock(map); 2012 2013 /* Find first affected entry. */ 2014 entry = uvm_map_entrybyaddr(&map->addr, start); 2015 KDASSERT(entry != NULL && entry->start <= start); 2016 2017 if (checkimmutable) { 2018 struct vm_map_entry *entry1 = entry; 2019 2020 /* Refuse to unmap if any entries are immutable */ 2021 if (entry1->end <= start) 2022 entry1 = RBT_NEXT(uvm_map_addr, entry1); 2023 for (; entry1 != NULL && entry1->start < end; entry1 = next) { 2024 KDASSERT(entry1->start >= start); 2025 next = RBT_NEXT(uvm_map_addr, entry1); 2026 /* Treat memory holes as free space. */ 2027 if (entry1->start == entry1->end || UVM_ET_ISHOLE(entry1)) 2028 continue; 2029 if (entry1->etype & UVM_ET_IMMUTABLE) 2030 return EPERM; 2031 } 2032 } 2033 2034 if (entry->end <= start && markfree) 2035 entry = RBT_NEXT(uvm_map_addr, entry); 2036 else 2037 UVM_MAP_CLIP_START(map, entry, start); 2038 2039 /* 2040 * Iterate entries until we reach end address. 2041 * prev_hint hints where the freed space can be appended to. 2042 */ 2043 prev_hint = NULL; 2044 for (; entry != NULL && entry->start < end; entry = next) { 2045 KDASSERT(entry->start >= start); 2046 if (entry->end > end || !markfree) 2047 UVM_MAP_CLIP_END(map, entry, end); 2048 KDASSERT(entry->start >= start && entry->end <= end); 2049 next = RBT_NEXT(uvm_map_addr, entry); 2050 2051 /* Don't remove holes unless asked to do so. */ 2052 if (UVM_ET_ISHOLE(entry)) { 2053 if (!remove_holes) { 2054 prev_hint = entry; 2055 continue; 2056 } 2057 } 2058 2059 /* A stack has been removed.. */ 2060 if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE)) 2061 map->sserial++; 2062 2063 /* Kill entry. */ 2064 uvm_unmap_kill_entry_withlock(map, entry, 1); 2065 2066 /* Update space usage. */ 2067 if ((map->flags & VM_MAP_ISVMSPACE) && 2068 entry->object.uvm_obj == NULL && 2069 entry->protection != PROT_NONE && 2070 !UVM_ET_ISHOLE(entry)) { 2071 ((struct vmspace *)map)->vm_dused -= 2072 uvmspace_dused(map, entry->start, entry->end); 2073 } 2074 if (!UVM_ET_ISHOLE(entry)) 2075 map->size -= entry->end - entry->start; 2076 2077 /* Actual removal of entry. */ 2078 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 2079 } 2080 2081 pmap_update(vm_map_pmap(map)); 2082 2083 #ifdef VMMAP_DEBUG 2084 if (markfree) { 2085 for (entry = uvm_map_entrybyaddr(&map->addr, start); 2086 entry != NULL && entry->start < end; 2087 entry = RBT_NEXT(uvm_map_addr, entry)) { 2088 KDASSERT(entry->end <= start || 2089 entry->start == entry->end || 2090 UVM_ET_ISHOLE(entry)); 2091 } 2092 } else { 2093 vaddr_t a; 2094 for (a = start; a < end; a += PAGE_SIZE) 2095 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 2096 } 2097 #endif 2098 return 0; 2099 } 2100 2101 /* 2102 * Mark all entries from first until end (exclusive) as pageable. 2103 * 2104 * Lock must be exclusive on entry and will not be touched. 2105 */ 2106 void 2107 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 2108 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 2109 { 2110 struct vm_map_entry *iter; 2111 2112 for (iter = first; iter != end; 2113 iter = RBT_NEXT(uvm_map_addr, iter)) { 2114 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2115 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2116 continue; 2117 2118 iter->wired_count = 0; 2119 uvm_fault_unwire_locked(map, iter->start, iter->end); 2120 } 2121 } 2122 2123 /* 2124 * Mark all entries from first until end (exclusive) as wired. 2125 * 2126 * Lockflags determines the lock state on return from this function. 2127 * Lock must be exclusive on entry. 2128 */ 2129 int 2130 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 2131 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 2132 int lockflags) 2133 { 2134 struct vm_map_entry *iter; 2135 #ifdef DIAGNOSTIC 2136 unsigned int timestamp_save; 2137 #endif 2138 int error; 2139 2140 /* 2141 * Wire pages in two passes: 2142 * 2143 * 1: holding the write lock, we create any anonymous maps that need 2144 * to be created. then we clip each map entry to the region to 2145 * be wired and increment its wiring count. 2146 * 2147 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2148 * in the pages for any newly wired area (wired_count == 1). 2149 * 2150 * downgrading to a read lock for uvm_fault_wire avoids a possible 2151 * deadlock with another thread that may have faulted on one of 2152 * the pages to be wired (it would mark the page busy, blocking 2153 * us, then in turn block on the map lock that we hold). 2154 * because we keep the read lock on the map, the copy-on-write 2155 * status of the entries we modify here cannot change. 2156 */ 2157 for (iter = first; iter != end; 2158 iter = RBT_NEXT(uvm_map_addr, iter)) { 2159 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2160 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2161 iter->protection == PROT_NONE) 2162 continue; 2163 2164 /* 2165 * Perform actions of vm_map_lookup that need the write lock. 2166 * - create an anonymous map for copy-on-write 2167 * - anonymous map for zero-fill 2168 * Skip submaps. 2169 */ 2170 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 2171 UVM_ET_ISNEEDSCOPY(iter) && 2172 ((iter->protection & PROT_WRITE) || 2173 iter->object.uvm_obj == NULL)) { 2174 amap_copy(map, iter, M_WAITOK, 2175 UVM_ET_ISSTACK(iter) ? FALSE : TRUE, 2176 iter->start, iter->end); 2177 } 2178 iter->wired_count++; 2179 } 2180 2181 /* 2182 * Pass 2. 2183 */ 2184 #ifdef DIAGNOSTIC 2185 timestamp_save = map->timestamp; 2186 #endif 2187 vm_map_busy(map); 2188 vm_map_downgrade(map); 2189 2190 error = 0; 2191 for (iter = first; error == 0 && iter != end; 2192 iter = RBT_NEXT(uvm_map_addr, iter)) { 2193 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2194 iter->protection == PROT_NONE) 2195 continue; 2196 2197 error = uvm_fault_wire(map, iter->start, iter->end, 2198 iter->protection); 2199 } 2200 2201 if (error) { 2202 /* 2203 * uvm_fault_wire failure 2204 * 2205 * Reacquire lock and undo our work. 2206 */ 2207 vm_map_upgrade(map); 2208 vm_map_unbusy(map); 2209 #ifdef DIAGNOSTIC 2210 if (timestamp_save != map->timestamp) 2211 panic("uvm_map_pageable_wire: stale map"); 2212 #endif 2213 2214 /* 2215 * first is no longer needed to restart loops. 2216 * Use it as iterator to unmap successful mappings. 2217 */ 2218 for (; first != iter; 2219 first = RBT_NEXT(uvm_map_addr, first)) { 2220 if (UVM_ET_ISHOLE(first) || 2221 first->start == first->end || 2222 first->protection == PROT_NONE) 2223 continue; 2224 2225 first->wired_count--; 2226 if (!VM_MAPENT_ISWIRED(first)) { 2227 uvm_fault_unwire_locked(map, 2228 first->start, first->end); 2229 } 2230 } 2231 2232 /* decrease counter in the rest of the entries */ 2233 for (; iter != end; 2234 iter = RBT_NEXT(uvm_map_addr, iter)) { 2235 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2236 iter->protection == PROT_NONE) 2237 continue; 2238 2239 iter->wired_count--; 2240 } 2241 2242 if ((lockflags & UVM_LK_EXIT) == 0) 2243 vm_map_unlock(map); 2244 return error; 2245 } 2246 2247 /* We are currently holding a read lock. */ 2248 if ((lockflags & UVM_LK_EXIT) == 0) { 2249 vm_map_unbusy(map); 2250 vm_map_unlock_read(map); 2251 } else { 2252 vm_map_upgrade(map); 2253 vm_map_unbusy(map); 2254 #ifdef DIAGNOSTIC 2255 if (timestamp_save != map->timestamp) 2256 panic("uvm_map_pageable_wire: stale map"); 2257 #endif 2258 } 2259 return 0; 2260 } 2261 2262 /* 2263 * uvm_map_pageable: set pageability of a range in a map. 2264 * 2265 * Flags: 2266 * UVM_LK_ENTER: map is already locked by caller 2267 * UVM_LK_EXIT: don't unlock map on exit 2268 * 2269 * The full range must be in use (entries may not have fspace != 0). 2270 * UVM_ET_HOLE counts as unmapped. 2271 */ 2272 int 2273 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2274 boolean_t new_pageable, int lockflags) 2275 { 2276 struct vm_map_entry *first, *last, *tmp; 2277 int error; 2278 2279 start = trunc_page(start); 2280 end = round_page(end); 2281 2282 if (start > end) 2283 return EINVAL; 2284 if (start == end) 2285 return 0; /* nothing to do */ 2286 if (start < map->min_offset) 2287 return EFAULT; /* why? see first XXX below */ 2288 if (end > map->max_offset) 2289 return EINVAL; /* why? see second XXX below */ 2290 2291 KASSERT(map->flags & VM_MAP_PAGEABLE); 2292 if ((lockflags & UVM_LK_ENTER) == 0) 2293 vm_map_lock(map); 2294 2295 /* 2296 * Find first entry. 2297 * 2298 * Initial test on start is different, because of the different 2299 * error returned. Rest is tested further down. 2300 */ 2301 first = uvm_map_entrybyaddr(&map->addr, start); 2302 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2303 /* 2304 * XXX if the first address is not mapped, it is EFAULT? 2305 */ 2306 error = EFAULT; 2307 goto out; 2308 } 2309 2310 /* Check that the range has no holes. */ 2311 for (last = first; last != NULL && last->start < end; 2312 last = RBT_NEXT(uvm_map_addr, last)) { 2313 if (UVM_ET_ISHOLE(last) || 2314 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2315 /* 2316 * XXX unmapped memory in range, why is it EINVAL 2317 * instead of EFAULT? 2318 */ 2319 error = EINVAL; 2320 goto out; 2321 } 2322 } 2323 2324 /* 2325 * Last ended at the first entry after the range. 2326 * Move back one step. 2327 * 2328 * Note that last may be NULL. 2329 */ 2330 if (last == NULL) { 2331 last = RBT_MAX(uvm_map_addr, &map->addr); 2332 if (last->end < end) { 2333 error = EINVAL; 2334 goto out; 2335 } 2336 } else { 2337 KASSERT(last != first); 2338 last = RBT_PREV(uvm_map_addr, last); 2339 } 2340 2341 /* Wire/unwire pages here. */ 2342 if (new_pageable) { 2343 /* 2344 * Mark pageable. 2345 * entries that are not wired are untouched. 2346 */ 2347 if (VM_MAPENT_ISWIRED(first)) 2348 UVM_MAP_CLIP_START(map, first, start); 2349 /* 2350 * Split last at end. 2351 * Make tmp be the first entry after what is to be touched. 2352 * If last is not wired, don't touch it. 2353 */ 2354 if (VM_MAPENT_ISWIRED(last)) { 2355 UVM_MAP_CLIP_END(map, last, end); 2356 tmp = RBT_NEXT(uvm_map_addr, last); 2357 } else 2358 tmp = last; 2359 2360 uvm_map_pageable_pgon(map, first, tmp, start, end); 2361 error = 0; 2362 2363 out: 2364 if ((lockflags & UVM_LK_EXIT) == 0) 2365 vm_map_unlock(map); 2366 return error; 2367 } else { 2368 /* 2369 * Mark entries wired. 2370 * entries are always touched (because recovery needs this). 2371 */ 2372 if (!VM_MAPENT_ISWIRED(first)) 2373 UVM_MAP_CLIP_START(map, first, start); 2374 /* 2375 * Split last at end. 2376 * Make tmp be the first entry after what is to be touched. 2377 * If last is not wired, don't touch it. 2378 */ 2379 if (!VM_MAPENT_ISWIRED(last)) { 2380 UVM_MAP_CLIP_END(map, last, end); 2381 tmp = RBT_NEXT(uvm_map_addr, last); 2382 } else 2383 tmp = last; 2384 2385 return uvm_map_pageable_wire(map, first, tmp, start, end, 2386 lockflags); 2387 } 2388 } 2389 2390 /* 2391 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2392 * all mapped regions. 2393 * 2394 * Map must not be locked. 2395 * If no flags are specified, all regions are unwired. 2396 */ 2397 int 2398 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2399 { 2400 vsize_t size; 2401 struct vm_map_entry *iter; 2402 2403 KASSERT(map->flags & VM_MAP_PAGEABLE); 2404 vm_map_lock(map); 2405 2406 if (flags == 0) { 2407 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr), 2408 NULL, map->min_offset, map->max_offset); 2409 2410 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2411 vm_map_unlock(map); 2412 return 0; 2413 } 2414 2415 if (flags & MCL_FUTURE) 2416 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2417 if (!(flags & MCL_CURRENT)) { 2418 vm_map_unlock(map); 2419 return 0; 2420 } 2421 2422 /* 2423 * Count number of pages in all non-wired entries. 2424 * If the number exceeds the limit, abort. 2425 */ 2426 size = 0; 2427 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2428 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2429 continue; 2430 2431 size += iter->end - iter->start; 2432 } 2433 2434 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2435 vm_map_unlock(map); 2436 return ENOMEM; 2437 } 2438 2439 /* XXX non-pmap_wired_count case must be handled by caller */ 2440 #ifdef pmap_wired_count 2441 if (limit != 0 && 2442 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2443 vm_map_unlock(map); 2444 return ENOMEM; 2445 } 2446 #endif 2447 2448 /* 2449 * uvm_map_pageable_wire will release lock 2450 */ 2451 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr), 2452 NULL, map->min_offset, map->max_offset, 0); 2453 } 2454 2455 /* 2456 * Initialize map. 2457 * 2458 * Allocates sufficient entries to describe the free memory in the map. 2459 */ 2460 void 2461 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max, 2462 int flags) 2463 { 2464 int i; 2465 2466 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2467 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2468 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2469 2470 /* 2471 * Update parameters. 2472 * 2473 * This code handles (vaddr_t)-1 and other page mask ending addresses 2474 * properly. 2475 * We lose the top page if the full virtual address space is used. 2476 */ 2477 if (max & (vaddr_t)PAGE_MASK) { 2478 max += 1; 2479 if (max == 0) /* overflow */ 2480 max -= PAGE_SIZE; 2481 } 2482 2483 RBT_INIT(uvm_map_addr, &map->addr); 2484 map->uaddr_exe = NULL; 2485 for (i = 0; i < nitems(map->uaddr_any); ++i) 2486 map->uaddr_any[i] = NULL; 2487 map->uaddr_brk_stack = NULL; 2488 2489 map->pmap = pmap; 2490 map->size = 0; 2491 map->ref_count = 0; 2492 map->min_offset = min; 2493 map->max_offset = max; 2494 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2495 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2496 map->flags = flags; 2497 map->timestamp = 0; 2498 if (flags & VM_MAP_ISVMSPACE) 2499 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK); 2500 else 2501 rw_init(&map->lock, "kmmaplk"); 2502 mtx_init(&map->mtx, IPL_VM); 2503 mtx_init(&map->flags_lock, IPL_VM); 2504 2505 /* Configure the allocators. */ 2506 if (flags & VM_MAP_ISVMSPACE) 2507 uvm_map_setup_md(map); 2508 else 2509 map->uaddr_any[3] = &uaddr_kbootstrap; 2510 2511 /* 2512 * Fill map entries. 2513 * We do not need to write-lock the map here because only the current 2514 * thread sees it right now. Initialize ref_count to 0 above to avoid 2515 * bogus triggering of lock-not-held assertions. 2516 */ 2517 uvm_map_setup_entries(map); 2518 uvm_tree_sanity(map, __FILE__, __LINE__); 2519 map->ref_count = 1; 2520 } 2521 2522 /* 2523 * Destroy the map. 2524 * 2525 * This is the inverse operation to uvm_map_setup. 2526 */ 2527 void 2528 uvm_map_teardown(struct vm_map *map) 2529 { 2530 struct uvm_map_deadq dead_entries; 2531 struct vm_map_entry *entry, *tmp; 2532 #ifdef VMMAP_DEBUG 2533 size_t numq, numt; 2534 #endif 2535 int i; 2536 2537 KERNEL_ASSERT_LOCKED(); 2538 KERNEL_UNLOCK(); 2539 KERNEL_ASSERT_UNLOCKED(); 2540 2541 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2542 2543 vm_map_lock(map); 2544 2545 /* Remove address selectors. */ 2546 uvm_addr_destroy(map->uaddr_exe); 2547 map->uaddr_exe = NULL; 2548 for (i = 0; i < nitems(map->uaddr_any); i++) { 2549 uvm_addr_destroy(map->uaddr_any[i]); 2550 map->uaddr_any[i] = NULL; 2551 } 2552 uvm_addr_destroy(map->uaddr_brk_stack); 2553 map->uaddr_brk_stack = NULL; 2554 2555 /* 2556 * Remove entries. 2557 * 2558 * The following is based on graph breadth-first search. 2559 * 2560 * In color terms: 2561 * - the dead_entries set contains all nodes that are reachable 2562 * (i.e. both the black and the grey nodes) 2563 * - any entry not in dead_entries is white 2564 * - any entry that appears in dead_entries before entry, 2565 * is black, the rest is grey. 2566 * The set [entry, end] is also referred to as the wavefront. 2567 * 2568 * Since the tree is always a fully connected graph, the breadth-first 2569 * search guarantees that each vmmap_entry is visited exactly once. 2570 * The vm_map is broken down in linear time. 2571 */ 2572 TAILQ_INIT(&dead_entries); 2573 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL) 2574 DEAD_ENTRY_PUSH(&dead_entries, entry); 2575 while (entry != NULL) { 2576 sched_pause(yield); 2577 uvm_unmap_kill_entry(map, entry); 2578 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL) 2579 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2580 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 2581 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2582 /* Update wave-front. */ 2583 entry = TAILQ_NEXT(entry, dfree.deadq); 2584 } 2585 2586 vm_map_unlock(map); 2587 2588 #ifdef VMMAP_DEBUG 2589 numt = numq = 0; 2590 RBT_FOREACH(entry, uvm_map_addr, &map->addr) 2591 numt++; 2592 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2593 numq++; 2594 KASSERT(numt == numq); 2595 #endif 2596 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); 2597 2598 KERNEL_LOCK(); 2599 2600 pmap_destroy(map->pmap); 2601 map->pmap = NULL; 2602 } 2603 2604 /* 2605 * Populate map with free-memory entries. 2606 * 2607 * Map must be initialized and empty. 2608 */ 2609 void 2610 uvm_map_setup_entries(struct vm_map *map) 2611 { 2612 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 2613 2614 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2615 } 2616 2617 /* 2618 * Split entry at given address. 2619 * 2620 * orig: entry that is to be split. 2621 * next: a newly allocated map entry that is not linked. 2622 * split: address at which the split is done. 2623 */ 2624 void 2625 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2626 struct vm_map_entry *next, vaddr_t split) 2627 { 2628 struct uvm_addr_state *free, *free_before; 2629 vsize_t adj; 2630 2631 if ((split & PAGE_MASK) != 0) { 2632 panic("uvm_map_splitentry: split address 0x%lx " 2633 "not on page boundary!", split); 2634 } 2635 KDASSERT(map != NULL && orig != NULL && next != NULL); 2636 uvm_tree_sanity(map, __FILE__, __LINE__); 2637 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2638 2639 #ifdef VMMAP_DEBUG 2640 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig); 2641 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next); 2642 #endif /* VMMAP_DEBUG */ 2643 2644 /* 2645 * Free space will change, unlink from free space tree. 2646 */ 2647 free = uvm_map_uaddr_e(map, orig); 2648 uvm_mapent_free_remove(map, free, orig); 2649 2650 adj = split - orig->start; 2651 2652 uvm_mapent_copy(orig, next); 2653 if (split >= orig->end) { 2654 next->etype = 0; 2655 next->offset = 0; 2656 next->wired_count = 0; 2657 next->start = next->end = split; 2658 next->guard = 0; 2659 next->fspace = VMMAP_FREE_END(orig) - split; 2660 next->aref.ar_amap = NULL; 2661 next->aref.ar_pageoff = 0; 2662 orig->guard = MIN(orig->guard, split - orig->end); 2663 orig->fspace = split - VMMAP_FREE_START(orig); 2664 } else { 2665 orig->fspace = 0; 2666 orig->guard = 0; 2667 orig->end = next->start = split; 2668 2669 if (next->aref.ar_amap) { 2670 amap_splitref(&orig->aref, &next->aref, adj); 2671 } 2672 if (UVM_ET_ISSUBMAP(orig)) { 2673 uvm_map_reference(next->object.sub_map); 2674 next->offset += adj; 2675 } else if (UVM_ET_ISOBJ(orig)) { 2676 if (next->object.uvm_obj->pgops && 2677 next->object.uvm_obj->pgops->pgo_reference) { 2678 KERNEL_LOCK(); 2679 next->object.uvm_obj->pgops->pgo_reference( 2680 next->object.uvm_obj); 2681 KERNEL_UNLOCK(); 2682 } 2683 next->offset += adj; 2684 } 2685 } 2686 2687 /* 2688 * Link next into address tree. 2689 * Link orig and next into free-space tree. 2690 * 2691 * Don't insert 'next' into the addr tree until orig has been linked, 2692 * in case the free-list looks at adjacent entries in the addr tree 2693 * for its decisions. 2694 */ 2695 if (orig->fspace > 0) 2696 free_before = free; 2697 else 2698 free_before = uvm_map_uaddr_e(map, orig); 2699 uvm_mapent_free_insert(map, free_before, orig); 2700 uvm_mapent_addr_insert(map, next); 2701 uvm_mapent_free_insert(map, free, next); 2702 2703 uvm_tree_sanity(map, __FILE__, __LINE__); 2704 } 2705 2706 2707 #ifdef VMMAP_DEBUG 2708 2709 void 2710 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2711 char *file, int line) 2712 { 2713 char* map_special; 2714 2715 if (test) 2716 return; 2717 2718 if (map == kernel_map) 2719 map_special = " (kernel_map)"; 2720 else if (map == kmem_map) 2721 map_special = " (kmem_map)"; 2722 else 2723 map_special = ""; 2724 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2725 line, test_str); 2726 } 2727 2728 /* 2729 * Check that map is sane. 2730 */ 2731 void 2732 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2733 { 2734 struct vm_map_entry *iter; 2735 vaddr_t addr; 2736 vaddr_t min, max, bound; /* Bounds checker. */ 2737 struct uvm_addr_state *free; 2738 2739 addr = vm_map_min(map); 2740 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2741 /* 2742 * Valid start, end. 2743 * Catch overflow for end+fspace. 2744 */ 2745 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2746 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2747 2748 /* May not be empty. */ 2749 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2750 file, line); 2751 2752 /* Addresses for entry must lie within map boundaries. */ 2753 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2754 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2755 2756 /* Tree may not have gaps. */ 2757 UVM_ASSERT(map, iter->start == addr, file, line); 2758 addr = VMMAP_FREE_END(iter); 2759 2760 /* 2761 * Free space may not cross boundaries, unless the same 2762 * free list is used on both sides of the border. 2763 */ 2764 min = VMMAP_FREE_START(iter); 2765 max = VMMAP_FREE_END(iter); 2766 2767 while (min < max && 2768 (bound = uvm_map_boundary(map, min, max)) != max) { 2769 UVM_ASSERT(map, 2770 uvm_map_uaddr(map, bound - 1) == 2771 uvm_map_uaddr(map, bound), 2772 file, line); 2773 min = bound; 2774 } 2775 2776 free = uvm_map_uaddr_e(map, iter); 2777 if (free) { 2778 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2779 file, line); 2780 } else { 2781 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2782 file, line); 2783 } 2784 } 2785 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2786 } 2787 2788 void 2789 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2790 { 2791 struct vm_map_entry *iter; 2792 vsize_t size; 2793 2794 size = 0; 2795 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2796 if (!UVM_ET_ISHOLE(iter)) 2797 size += iter->end - iter->start; 2798 } 2799 2800 if (map->size != size) 2801 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2802 UVM_ASSERT(map, map->size == size, file, line); 2803 2804 vmspace_validate(map); 2805 } 2806 2807 /* 2808 * This function validates the statistics on vmspace. 2809 */ 2810 void 2811 vmspace_validate(struct vm_map *map) 2812 { 2813 struct vmspace *vm; 2814 struct vm_map_entry *iter; 2815 vaddr_t imin, imax; 2816 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2817 vsize_t stack, heap; /* Measured sizes. */ 2818 2819 if (!(map->flags & VM_MAP_ISVMSPACE)) 2820 return; 2821 2822 vm = (struct vmspace *)map; 2823 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2824 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2825 2826 stack = heap = 0; 2827 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2828 imin = imax = iter->start; 2829 2830 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL || 2831 iter->protection != PROT_NONE) 2832 continue; 2833 2834 /* 2835 * Update stack, heap. 2836 * Keep in mind that (theoretically) the entries of 2837 * userspace and stack may be joined. 2838 */ 2839 while (imin != iter->end) { 2840 /* 2841 * Set imax to the first boundary crossed between 2842 * imin and stack addresses. 2843 */ 2844 imax = iter->end; 2845 if (imin < stack_begin && imax > stack_begin) 2846 imax = stack_begin; 2847 else if (imin < stack_end && imax > stack_end) 2848 imax = stack_end; 2849 2850 if (imin >= stack_begin && imin < stack_end) 2851 stack += imax - imin; 2852 else 2853 heap += imax - imin; 2854 imin = imax; 2855 } 2856 } 2857 2858 heap >>= PAGE_SHIFT; 2859 if (heap != vm->vm_dused) { 2860 printf("vmspace stack range: 0x%lx-0x%lx\n", 2861 stack_begin, stack_end); 2862 panic("vmspace_validate: vmspace.vm_dused invalid, " 2863 "expected %ld pgs, got %d pgs in map %p", 2864 heap, vm->vm_dused, 2865 map); 2866 } 2867 } 2868 2869 #endif /* VMMAP_DEBUG */ 2870 2871 /* 2872 * uvm_map_init: init mapping system at boot time. note that we allocate 2873 * and init the static pool of structs vm_map_entry for the kernel here. 2874 */ 2875 void 2876 uvm_map_init(void) 2877 { 2878 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 2879 int lcv; 2880 2881 /* now set up static pool of kernel map entries ... */ 2882 mtx_init(&uvm_kmapent_mtx, IPL_VM); 2883 SLIST_INIT(&uvm.kentry_free); 2884 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 2885 SLIST_INSERT_HEAD(&uvm.kentry_free, 2886 &kernel_map_entry[lcv], daddrs.addr_kentry); 2887 } 2888 2889 /* initialize the map-related pools. */ 2890 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0, 2891 IPL_NONE, PR_WAITOK, "vmsppl", NULL); 2892 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 2893 IPL_VM, PR_WAITOK, "vmmpepl", NULL); 2894 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0, 2895 IPL_VM, 0, "vmmpekpl", NULL); 2896 pool_sethiwat(&uvm_map_entry_pool, 8192); 2897 2898 uvm_addr_init(); 2899 } 2900 2901 #if defined(DDB) 2902 2903 /* 2904 * DDB hooks 2905 */ 2906 2907 /* 2908 * uvm_map_printit: actually prints the map 2909 */ 2910 void 2911 uvm_map_printit(struct vm_map *map, boolean_t full, 2912 int (*pr)(const char *, ...)) 2913 { 2914 struct vmspace *vm; 2915 struct vm_map_entry *entry; 2916 struct uvm_addr_state *free; 2917 int in_free, i; 2918 char buf[8]; 2919 2920 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 2921 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 2922 map->b_start, map->b_end); 2923 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 2924 map->s_start, map->s_end); 2925 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 2926 map->size, map->ref_count, map->timestamp, 2927 map->flags); 2928 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 2929 pmap_resident_count(map->pmap)); 2930 2931 /* struct vmspace handling. */ 2932 if (map->flags & VM_MAP_ISVMSPACE) { 2933 vm = (struct vmspace *)map; 2934 2935 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 2936 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 2937 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 2938 vm->vm_tsize, vm->vm_dsize); 2939 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 2940 vm->vm_taddr, vm->vm_daddr); 2941 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 2942 vm->vm_maxsaddr, vm->vm_minsaddr); 2943 } 2944 2945 if (!full) 2946 goto print_uaddr; 2947 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 2948 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 2949 entry, entry->start, entry->end, entry->object.uvm_obj, 2950 (long long)entry->offset, entry->aref.ar_amap, 2951 entry->aref.ar_pageoff); 2952 (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, " 2953 "syscall=%c, prot(max)=%d/%d, inh=%d, " 2954 "wc=%d, adv=%d\n", 2955 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 2956 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 2957 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 2958 (entry->etype & UVM_ET_STACK) ? 'T' : 'F', 2959 (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F', 2960 entry->protection, entry->max_protection, 2961 entry->inheritance, entry->wired_count, entry->advice); 2962 2963 free = uvm_map_uaddr_e(map, entry); 2964 in_free = (free != NULL); 2965 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 2966 "free=0x%lx-0x%lx\n", 2967 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 2968 in_free ? 'T' : 'F', 2969 entry->guard, 2970 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 2971 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 2972 (*pr)("\tfreemapped=%c, uaddr=%p\n", 2973 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 2974 if (free) { 2975 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 2976 free->uaddr_minaddr, free->uaddr_maxaddr, 2977 free->uaddr_functions->uaddr_name); 2978 } 2979 } 2980 2981 print_uaddr: 2982 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 2983 for (i = 0; i < nitems(map->uaddr_any); i++) { 2984 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 2985 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 2986 } 2987 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 2988 } 2989 2990 /* 2991 * uvm_object_printit: actually prints the object 2992 */ 2993 void 2994 uvm_object_printit(struct uvm_object *uobj, boolean_t full, 2995 int (*pr)(const char *, ...)) 2996 { 2997 struct vm_page *pg; 2998 int cnt = 0; 2999 3000 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 3001 uobj, uobj->pgops, uobj->uo_npages); 3002 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 3003 (*pr)("refs=<SYSTEM>\n"); 3004 else 3005 (*pr)("refs=%d\n", uobj->uo_refs); 3006 3007 if (!full) { 3008 return; 3009 } 3010 (*pr)(" PAGES <pg,offset>:\n "); 3011 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) { 3012 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 3013 if ((cnt % 3) == 2) { 3014 (*pr)("\n "); 3015 } 3016 cnt++; 3017 } 3018 if ((cnt % 3) != 2) { 3019 (*pr)("\n"); 3020 } 3021 } 3022 3023 /* 3024 * uvm_page_printit: actually print the page 3025 */ 3026 static const char page_flagbits[] = 3027 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 3028 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ" 3029 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5"; 3030 3031 void 3032 uvm_page_printit(struct vm_page *pg, boolean_t full, 3033 int (*pr)(const char *, ...)) 3034 { 3035 struct vm_page *tpg; 3036 struct uvm_object *uobj; 3037 struct pglist *pgl; 3038 3039 (*pr)("PAGE %p:\n", pg); 3040 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 3041 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 3042 (long long)pg->phys_addr); 3043 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 3044 pg->uobject, pg->uanon, (long long)pg->offset); 3045 #if defined(UVM_PAGE_TRKOWN) 3046 if (pg->pg_flags & PG_BUSY) 3047 (*pr)(" owning thread = %d, tag=%s", 3048 pg->owner, pg->owner_tag); 3049 else 3050 (*pr)(" page not busy, no owner"); 3051 #else 3052 (*pr)(" [page ownership tracking disabled]"); 3053 #endif 3054 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 3055 3056 if (!full) 3057 return; 3058 3059 /* cross-verify object/anon */ 3060 if ((pg->pg_flags & PQ_FREE) == 0) { 3061 if (pg->pg_flags & PQ_ANON) { 3062 if (pg->uanon == NULL || pg->uanon->an_page != pg) 3063 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 3064 (pg->uanon) ? pg->uanon->an_page : NULL); 3065 else 3066 (*pr)(" anon backpointer is OK\n"); 3067 } else { 3068 uobj = pg->uobject; 3069 if (uobj) { 3070 (*pr)(" checking object list\n"); 3071 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) { 3072 if (tpg == pg) { 3073 break; 3074 } 3075 } 3076 if (tpg) 3077 (*pr)(" page found on object list\n"); 3078 else 3079 (*pr)(" >>> PAGE NOT FOUND " 3080 "ON OBJECT LIST! <<<\n"); 3081 } 3082 } 3083 } 3084 3085 /* cross-verify page queue */ 3086 if (pg->pg_flags & PQ_FREE) { 3087 if (uvm_pmr_isfree(pg)) 3088 (*pr)(" page found in uvm_pmemrange\n"); 3089 else 3090 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 3091 pgl = NULL; 3092 } else if (pg->pg_flags & PQ_INACTIVE) { 3093 pgl = &uvm.page_inactive; 3094 } else if (pg->pg_flags & PQ_ACTIVE) { 3095 pgl = &uvm.page_active; 3096 } else { 3097 pgl = NULL; 3098 } 3099 3100 if (pgl) { 3101 (*pr)(" checking pageq list\n"); 3102 TAILQ_FOREACH(tpg, pgl, pageq) { 3103 if (tpg == pg) { 3104 break; 3105 } 3106 } 3107 if (tpg) 3108 (*pr)(" page found on pageq list\n"); 3109 else 3110 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3111 } 3112 } 3113 #endif 3114 3115 /* 3116 * uvm_map_protect: change map protection 3117 * 3118 * => set_max means set max_protection. 3119 * => map must be unlocked. 3120 */ 3121 int 3122 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3123 vm_prot_t new_prot, int etype, boolean_t set_max, boolean_t checkimmutable) 3124 { 3125 struct vm_map_entry *first, *iter; 3126 vm_prot_t old_prot; 3127 vm_prot_t mask; 3128 vsize_t dused; 3129 int error; 3130 3131 KASSERT((etype & ~UVM_ET_STACK) == 0); /* only UVM_ET_STACK allowed */ 3132 3133 if (start > end) 3134 return EINVAL; 3135 start = MAX(start, map->min_offset); 3136 end = MIN(end, map->max_offset); 3137 if (start >= end) 3138 return 0; 3139 3140 dused = 0; 3141 error = 0; 3142 vm_map_lock(map); 3143 3144 /* 3145 * Set up first and last. 3146 * - first will contain first entry at or after start. 3147 */ 3148 first = uvm_map_entrybyaddr(&map->addr, start); 3149 KDASSERT(first != NULL); 3150 if (first->end <= start) 3151 first = RBT_NEXT(uvm_map_addr, first); 3152 3153 /* First, check for protection violations. */ 3154 for (iter = first; iter != NULL && iter->start < end; 3155 iter = RBT_NEXT(uvm_map_addr, iter)) { 3156 /* Treat memory holes as free space. */ 3157 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3158 continue; 3159 3160 if (checkimmutable && 3161 (iter->etype & UVM_ET_IMMUTABLE)) { 3162 if (iter->protection == (PROT_READ | PROT_WRITE) && 3163 new_prot == PROT_READ) { 3164 /* Permit RW to R as a data-locking mechanism */ 3165 ; 3166 } else { 3167 error = EPERM; 3168 goto out; 3169 } 3170 } 3171 old_prot = iter->protection; 3172 if (old_prot == PROT_NONE && new_prot != old_prot) { 3173 dused += uvmspace_dused( 3174 map, MAX(start, iter->start), MIN(end, iter->end)); 3175 } 3176 3177 if (UVM_ET_ISSUBMAP(iter)) { 3178 error = EINVAL; 3179 goto out; 3180 } 3181 if ((new_prot & iter->max_protection) != new_prot) { 3182 error = EACCES; 3183 goto out; 3184 } 3185 if (map == kernel_map && 3186 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 3187 panic("uvm_map_protect: kernel map W^X violation requested"); 3188 } 3189 3190 /* Check limits. */ 3191 if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) { 3192 vsize_t limit = lim_cur(RLIMIT_DATA); 3193 dused = ptoa(dused); 3194 if (limit < dused || 3195 limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) { 3196 error = ENOMEM; 3197 goto out; 3198 } 3199 } 3200 3201 /* only apply UVM_ET_STACK on a mapping changing to RW */ 3202 if (etype && new_prot != (PROT_READ|PROT_WRITE)) 3203 etype = 0; 3204 3205 /* Fix protections. */ 3206 for (iter = first; iter != NULL && iter->start < end; 3207 iter = RBT_NEXT(uvm_map_addr, iter)) { 3208 /* Treat memory holes as free space. */ 3209 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3210 continue; 3211 3212 old_prot = iter->protection; 3213 3214 /* 3215 * Skip adapting protection iff old and new protection 3216 * are equal. 3217 */ 3218 if (set_max) { 3219 if (old_prot == (new_prot & old_prot) && 3220 iter->max_protection == new_prot) 3221 continue; 3222 } else { 3223 if (old_prot == new_prot) 3224 continue; 3225 } 3226 3227 UVM_MAP_CLIP_START(map, iter, start); 3228 UVM_MAP_CLIP_END(map, iter, end); 3229 3230 if (set_max) { 3231 iter->max_protection = new_prot; 3232 iter->protection &= new_prot; 3233 } else 3234 iter->protection = new_prot; 3235 iter->etype |= etype; /* potentially add UVM_ET_STACK */ 3236 3237 /* 3238 * update physical map if necessary. worry about copy-on-write 3239 * here -- CHECK THIS XXX 3240 */ 3241 if (iter->protection != old_prot) { 3242 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3243 ~PROT_WRITE : PROT_MASK; 3244 3245 /* XXX should only wserial++ if no split occurs */ 3246 if (iter->protection & PROT_WRITE) 3247 map->wserial++; 3248 3249 if (map->flags & VM_MAP_ISVMSPACE) { 3250 if (old_prot == PROT_NONE) { 3251 ((struct vmspace *)map)->vm_dused += 3252 uvmspace_dused(map, iter->start, 3253 iter->end); 3254 } 3255 if (iter->protection == PROT_NONE) { 3256 ((struct vmspace *)map)->vm_dused -= 3257 uvmspace_dused(map, iter->start, 3258 iter->end); 3259 } 3260 } 3261 3262 /* update pmap */ 3263 if ((iter->protection & mask) == PROT_NONE && 3264 VM_MAPENT_ISWIRED(iter)) { 3265 /* 3266 * TODO(ariane) this is stupid. wired_count 3267 * is 0 if not wired, otherwise anything 3268 * larger than 0 (incremented once each time 3269 * wire is called). 3270 * Mostly to be able to undo the damage on 3271 * failure. Not the actually be a wired 3272 * refcounter... 3273 * Originally: iter->wired_count--; 3274 * (don't we have to unwire this in the pmap 3275 * as well?) 3276 */ 3277 iter->wired_count = 0; 3278 } 3279 uvm_map_lock_entry(iter); 3280 pmap_protect(map->pmap, iter->start, iter->end, 3281 iter->protection & mask); 3282 uvm_map_unlock_entry(iter); 3283 } 3284 3285 /* 3286 * If the map is configured to lock any future mappings, 3287 * wire this entry now if the old protection was PROT_NONE 3288 * and the new protection is not PROT_NONE. 3289 */ 3290 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3291 VM_MAPENT_ISWIRED(iter) == 0 && 3292 old_prot == PROT_NONE && 3293 new_prot != PROT_NONE) { 3294 if (uvm_map_pageable(map, iter->start, iter->end, 3295 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3296 /* 3297 * If locking the entry fails, remember the 3298 * error if it's the first one. Note we 3299 * still continue setting the protection in 3300 * the map, but it will return the resource 3301 * storage condition regardless. 3302 * 3303 * XXX Ignore what the actual error is, 3304 * XXX just call it a resource shortage 3305 * XXX so that it doesn't get confused 3306 * XXX what uvm_map_protect() itself would 3307 * XXX normally return. 3308 */ 3309 error = ENOMEM; 3310 } 3311 } 3312 } 3313 pmap_update(map->pmap); 3314 3315 out: 3316 if (etype & UVM_ET_STACK) 3317 map->sserial++; 3318 vm_map_unlock(map); 3319 return error; 3320 } 3321 3322 /* 3323 * uvmspace_alloc: allocate a vmspace structure. 3324 * 3325 * - structure includes vm_map and pmap 3326 * - XXX: no locking on this structure 3327 * - refcnt set to 1, rest must be init'd by caller 3328 */ 3329 struct vmspace * 3330 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3331 boolean_t remove_holes) 3332 { 3333 struct vmspace *vm; 3334 3335 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3336 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3337 return (vm); 3338 } 3339 3340 /* 3341 * uvmspace_init: initialize a vmspace structure. 3342 * 3343 * - XXX: no locking on this structure 3344 * - refcnt set to 1, rest must be init'd by caller 3345 */ 3346 void 3347 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3348 boolean_t pageable, boolean_t remove_holes) 3349 { 3350 KASSERT(pmap == NULL || pmap == pmap_kernel()); 3351 3352 if (pmap) 3353 pmap_reference(pmap); 3354 else 3355 pmap = pmap_create(); 3356 3357 uvm_map_setup(&vm->vm_map, pmap, min, max, 3358 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3359 3360 vm->vm_refcnt = 1; 3361 3362 if (remove_holes) 3363 pmap_remove_holes(vm); 3364 } 3365 3366 /* 3367 * uvmspace_share: share a vmspace between two processes 3368 * 3369 * - used for vfork 3370 */ 3371 3372 struct vmspace * 3373 uvmspace_share(struct process *pr) 3374 { 3375 struct vmspace *vm = pr->ps_vmspace; 3376 3377 uvmspace_addref(vm); 3378 return vm; 3379 } 3380 3381 /* 3382 * uvmspace_exec: the process wants to exec a new program 3383 * 3384 * - XXX: no locking on vmspace 3385 */ 3386 3387 void 3388 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3389 { 3390 struct process *pr = p->p_p; 3391 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3392 struct vm_map *map = &ovm->vm_map; 3393 struct uvm_map_deadq dead_entries; 3394 3395 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3396 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3397 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3398 3399 pmap_unuse_final(p); /* before stack addresses go away */ 3400 TAILQ_INIT(&dead_entries); 3401 3402 /* see if more than one process is using this vmspace... */ 3403 if (ovm->vm_refcnt == 1) { 3404 /* 3405 * If pr is the only process using its vmspace then 3406 * we can safely recycle that vmspace for the program 3407 * that is being exec'd. 3408 */ 3409 3410 #ifdef SYSVSHM 3411 /* 3412 * SYSV SHM semantics require us to kill all segments on an exec 3413 */ 3414 if (ovm->vm_shm) 3415 shmexit(ovm); 3416 #endif 3417 3418 /* 3419 * POSIX 1003.1b -- "lock future mappings" is revoked 3420 * when a process execs another program image. 3421 */ 3422 vm_map_lock(map); 3423 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE); 3424 3425 /* 3426 * now unmap the old program 3427 * 3428 * Instead of attempting to keep the map valid, we simply 3429 * nuke all entries and ask uvm_map_setup to reinitialize 3430 * the map to the new boundaries. 3431 * 3432 * uvm_unmap_remove will actually nuke all entries for us 3433 * (as in, not replace them with free-memory entries). 3434 */ 3435 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3436 &dead_entries, TRUE, FALSE, FALSE); 3437 3438 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 3439 3440 /* Nuke statistics and boundaries. */ 3441 memset(&ovm->vm_startcopy, 0, 3442 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3443 3444 3445 if (end & (vaddr_t)PAGE_MASK) { 3446 end += 1; 3447 if (end == 0) /* overflow */ 3448 end -= PAGE_SIZE; 3449 } 3450 3451 /* Setup new boundaries and populate map with entries. */ 3452 map->min_offset = start; 3453 map->max_offset = end; 3454 uvm_map_setup_entries(map); 3455 vm_map_unlock(map); 3456 3457 /* but keep MMU holes unavailable */ 3458 pmap_remove_holes(ovm); 3459 } else { 3460 /* 3461 * pr's vmspace is being shared, so we can't reuse 3462 * it for pr since it is still being used for others. 3463 * allocate a new vmspace for pr 3464 */ 3465 nvm = uvmspace_alloc(start, end, 3466 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3467 3468 /* install new vmspace and drop our ref to the old one. */ 3469 pmap_deactivate(p); 3470 p->p_vmspace = pr->ps_vmspace = nvm; 3471 pmap_activate(p); 3472 3473 uvmspace_free(ovm); 3474 } 3475 #ifdef PMAP_CHECK_COPYIN 3476 p->p_vmspace->vm_map.check_copyin_count = 0; /* disable checks */ 3477 #endif 3478 3479 /* Release dead entries */ 3480 uvm_unmap_detach(&dead_entries, 0); 3481 } 3482 3483 /* 3484 * uvmspace_addref: add a reference to a vmspace. 3485 */ 3486 void 3487 uvmspace_addref(struct vmspace *vm) 3488 { 3489 KERNEL_ASSERT_LOCKED(); 3490 KASSERT(vm->vm_refcnt > 0); 3491 3492 vm->vm_refcnt++; 3493 } 3494 3495 /* 3496 * uvmspace_free: free a vmspace data structure 3497 */ 3498 void 3499 uvmspace_free(struct vmspace *vm) 3500 { 3501 KERNEL_ASSERT_LOCKED(); 3502 3503 if (--vm->vm_refcnt == 0) { 3504 /* 3505 * lock the map, to wait out all other references to it. delete 3506 * all of the mappings and pages they hold, then call the pmap 3507 * module to reclaim anything left. 3508 */ 3509 #ifdef SYSVSHM 3510 /* Get rid of any SYSV shared memory segments. */ 3511 if (vm->vm_shm != NULL) 3512 shmexit(vm); 3513 #endif 3514 3515 uvm_map_teardown(&vm->vm_map); 3516 pool_put(&uvm_vmspace_pool, vm); 3517 } 3518 } 3519 3520 /* 3521 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in 3522 * srcmap to the address range [dstaddr, dstaddr + sz) in 3523 * dstmap. 3524 * 3525 * The whole address range in srcmap must be backed by an object 3526 * (no holes). 3527 * 3528 * If successful, the address ranges share memory and the destination 3529 * address range uses the protection flags in prot. 3530 * 3531 * This routine assumes that sz is a multiple of PAGE_SIZE and 3532 * that dstaddr and srcaddr are page-aligned. 3533 */ 3534 int 3535 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot, 3536 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz) 3537 { 3538 int ret = 0; 3539 vaddr_t unmap_end; 3540 vaddr_t dstva; 3541 vsize_t s_off, len, n = sz, remain; 3542 struct vm_map_entry *first = NULL, *last = NULL; 3543 struct vm_map_entry *src_entry, *psrc_entry = NULL; 3544 struct uvm_map_deadq dead; 3545 3546 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr) 3547 return EINVAL; 3548 3549 TAILQ_INIT(&dead); 3550 vm_map_lock(dstmap); 3551 vm_map_lock_read(srcmap); 3552 3553 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) { 3554 ret = ENOMEM; 3555 goto exit_unlock; 3556 } 3557 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) { 3558 ret = EINVAL; 3559 goto exit_unlock; 3560 } 3561 3562 dstva = dstaddr; 3563 unmap_end = dstaddr; 3564 for (; src_entry != NULL; 3565 psrc_entry = src_entry, 3566 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) { 3567 /* hole in address space, bail out */ 3568 if (psrc_entry != NULL && psrc_entry->end != src_entry->start) 3569 break; 3570 if (src_entry->start >= srcaddr + sz) 3571 break; 3572 3573 if (UVM_ET_ISSUBMAP(src_entry)) 3574 panic("uvm_share: encountered a submap (illegal)"); 3575 if (!UVM_ET_ISCOPYONWRITE(src_entry) && 3576 UVM_ET_ISNEEDSCOPY(src_entry)) 3577 panic("uvm_share: non-copy_on_write map entries " 3578 "marked needs_copy (illegal)"); 3579 3580 /* 3581 * srcaddr > map entry start? means we are in the middle of a 3582 * map, so we calculate the offset to use in the source map. 3583 */ 3584 if (srcaddr > src_entry->start) 3585 s_off = srcaddr - src_entry->start; 3586 else if (srcaddr == src_entry->start) 3587 s_off = 0; 3588 else 3589 panic("uvm_share: map entry start > srcaddr"); 3590 3591 remain = src_entry->end - src_entry->start - s_off; 3592 3593 /* Determine how many bytes to share in this pass */ 3594 if (n < remain) 3595 len = n; 3596 else 3597 len = remain; 3598 3599 if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot, 3600 srcmap, src_entry, &dead) == NULL) 3601 break; 3602 3603 n -= len; 3604 dstva += len; 3605 srcaddr += len; 3606 unmap_end = dstva + len; 3607 if (n == 0) 3608 goto exit_unlock; 3609 } 3610 3611 ret = EINVAL; 3612 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE, FALSE); 3613 3614 exit_unlock: 3615 vm_map_unlock_read(srcmap); 3616 vm_map_unlock(dstmap); 3617 uvm_unmap_detach(&dead, 0); 3618 3619 return ret; 3620 } 3621 3622 /* 3623 * Clone map entry into other map. 3624 * 3625 * Mapping will be placed at dstaddr, for the same length. 3626 * Space must be available. 3627 * Reference counters are incremented. 3628 */ 3629 struct vm_map_entry * 3630 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3631 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, 3632 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3633 int mapent_flags, int amap_share_flags) 3634 { 3635 struct vm_map_entry *new_entry, *first, *last; 3636 3637 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3638 3639 /* Create new entry (linked in on creation). Fill in first, last. */ 3640 first = last = NULL; 3641 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3642 panic("uvm_mapent_clone: no space in map for " 3643 "entry in empty map"); 3644 } 3645 new_entry = uvm_map_mkentry(dstmap, first, last, 3646 dstaddr, dstlen, mapent_flags, dead, NULL); 3647 if (new_entry == NULL) 3648 return NULL; 3649 /* old_entry -> new_entry */ 3650 new_entry->object = old_entry->object; 3651 new_entry->offset = old_entry->offset; 3652 new_entry->aref = old_entry->aref; 3653 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3654 new_entry->protection = prot; 3655 new_entry->max_protection = maxprot; 3656 new_entry->inheritance = old_entry->inheritance; 3657 new_entry->advice = old_entry->advice; 3658 3659 /* gain reference to object backing the map (can't be a submap). */ 3660 if (new_entry->aref.ar_amap) { 3661 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3662 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3663 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3664 amap_share_flags); 3665 } 3666 3667 if (UVM_ET_ISOBJ(new_entry) && 3668 new_entry->object.uvm_obj->pgops->pgo_reference) { 3669 new_entry->offset += off; 3670 new_entry->object.uvm_obj->pgops->pgo_reference 3671 (new_entry->object.uvm_obj); 3672 } 3673 3674 return new_entry; 3675 } 3676 3677 struct vm_map_entry * 3678 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3679 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map, 3680 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3681 { 3682 /* 3683 * If old_entry refers to a copy-on-write region that has not yet been 3684 * written to (needs_copy flag is set), then we need to allocate a new 3685 * amap for old_entry. 3686 * 3687 * If we do not do this, and the process owning old_entry does a copy-on 3688 * write later, old_entry and new_entry will refer to different memory 3689 * regions, and the memory between the processes is no longer shared. 3690 * 3691 * [in other words, we need to clear needs_copy] 3692 */ 3693 3694 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3695 /* get our own amap, clears needs_copy */ 3696 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0); 3697 /* XXXCDC: WAITOK??? */ 3698 } 3699 3700 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off, 3701 prot, maxprot, old_entry, dead, 0, AMAP_SHARED); 3702 } 3703 3704 /* 3705 * share the mapping: this means we want the old and 3706 * new entries to share amaps and backing objects. 3707 */ 3708 struct vm_map_entry * 3709 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3710 struct vm_map *old_map, 3711 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3712 { 3713 struct vm_map_entry *new_entry; 3714 3715 new_entry = uvm_mapent_share(new_map, old_entry->start, 3716 old_entry->end - old_entry->start, 0, old_entry->protection, 3717 old_entry->max_protection, old_map, old_entry, dead); 3718 3719 return (new_entry); 3720 } 3721 3722 /* 3723 * copy-on-write the mapping (using mmap's 3724 * MAP_PRIVATE semantics) 3725 * 3726 * allocate new_entry, adjust reference counts. 3727 * (note that new references are read-only). 3728 */ 3729 struct vm_map_entry * 3730 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3731 struct vm_map *old_map, 3732 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3733 { 3734 struct vm_map_entry *new_entry; 3735 boolean_t protect_child; 3736 3737 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3738 old_entry->end - old_entry->start, 0, old_entry->protection, 3739 old_entry->max_protection, old_entry, dead, 0, 0); 3740 3741 new_entry->etype |= 3742 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3743 3744 /* 3745 * the new entry will need an amap. it will either 3746 * need to be copied from the old entry or created 3747 * from scratch (if the old entry does not have an 3748 * amap). can we defer this process until later 3749 * (by setting "needs_copy") or do we need to copy 3750 * the amap now? 3751 * 3752 * we must copy the amap now if any of the following 3753 * conditions hold: 3754 * 1. the old entry has an amap and that amap is 3755 * being shared. this means that the old (parent) 3756 * process is sharing the amap with another 3757 * process. if we do not clear needs_copy here 3758 * we will end up in a situation where both the 3759 * parent and child process are referring to the 3760 * same amap with "needs_copy" set. if the 3761 * parent write-faults, the fault routine will 3762 * clear "needs_copy" in the parent by allocating 3763 * a new amap. this is wrong because the 3764 * parent is supposed to be sharing the old amap 3765 * and the new amap will break that. 3766 * 3767 * 2. if the old entry has an amap and a non-zero 3768 * wire count then we are going to have to call 3769 * amap_cow_now to avoid page faults in the 3770 * parent process. since amap_cow_now requires 3771 * "needs_copy" to be clear we might as well 3772 * clear it here as well. 3773 * 3774 */ 3775 if (old_entry->aref.ar_amap != NULL && 3776 ((amap_flags(old_entry->aref.ar_amap) & 3777 AMAP_SHARED) != 0 || 3778 VM_MAPENT_ISWIRED(old_entry))) { 3779 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3780 0, 0); 3781 /* XXXCDC: M_WAITOK ... ok? */ 3782 } 3783 3784 /* 3785 * if the parent's entry is wired down, then the 3786 * parent process does not want page faults on 3787 * access to that memory. this means that we 3788 * cannot do copy-on-write because we can't write 3789 * protect the old entry. in this case we 3790 * resolve all copy-on-write faults now, using 3791 * amap_cow_now. note that we have already 3792 * allocated any needed amap (above). 3793 */ 3794 if (VM_MAPENT_ISWIRED(old_entry)) { 3795 /* 3796 * resolve all copy-on-write faults now 3797 * (note that there is nothing to do if 3798 * the old mapping does not have an amap). 3799 */ 3800 if (old_entry->aref.ar_amap) 3801 amap_cow_now(new_map, new_entry); 3802 } else { 3803 if (old_entry->aref.ar_amap) { 3804 /* 3805 * setup mappings to trigger copy-on-write faults 3806 * we must write-protect the parent if it has 3807 * an amap and it is not already "needs_copy"... 3808 * if it is already "needs_copy" then the parent 3809 * has already been write-protected by a previous 3810 * fork operation. 3811 * 3812 * if we do not write-protect the parent, then 3813 * we must be sure to write-protect the child. 3814 */ 3815 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3816 if (old_entry->max_protection & PROT_WRITE) { 3817 uvm_map_lock_entry(old_entry); 3818 pmap_protect(old_map->pmap, 3819 old_entry->start, 3820 old_entry->end, 3821 old_entry->protection & 3822 ~PROT_WRITE); 3823 uvm_map_unlock_entry(old_entry); 3824 pmap_update(old_map->pmap); 3825 } 3826 old_entry->etype |= UVM_ET_NEEDSCOPY; 3827 } 3828 3829 /* parent must now be write-protected */ 3830 protect_child = FALSE; 3831 } else { 3832 /* 3833 * we only need to protect the child if the 3834 * parent has write access. 3835 */ 3836 if (old_entry->max_protection & PROT_WRITE) 3837 protect_child = TRUE; 3838 else 3839 protect_child = FALSE; 3840 } 3841 3842 /* protect the child's mappings if necessary */ 3843 if (protect_child) { 3844 pmap_protect(new_map->pmap, new_entry->start, 3845 new_entry->end, 3846 new_entry->protection & 3847 ~PROT_WRITE); 3848 } 3849 } 3850 3851 return (new_entry); 3852 } 3853 3854 /* 3855 * zero the mapping: the new entry will be zero initialized 3856 */ 3857 struct vm_map_entry * 3858 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 3859 struct vm_map *old_map, 3860 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3861 { 3862 struct vm_map_entry *new_entry; 3863 3864 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3865 old_entry->end - old_entry->start, 0, old_entry->protection, 3866 old_entry->max_protection, old_entry, dead, 0, 0); 3867 3868 new_entry->etype |= 3869 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3870 3871 if (new_entry->aref.ar_amap) { 3872 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3873 atop(new_entry->end - new_entry->start), 0); 3874 new_entry->aref.ar_amap = NULL; 3875 new_entry->aref.ar_pageoff = 0; 3876 } 3877 3878 if (UVM_ET_ISOBJ(new_entry)) { 3879 if (new_entry->object.uvm_obj->pgops->pgo_detach) 3880 new_entry->object.uvm_obj->pgops->pgo_detach( 3881 new_entry->object.uvm_obj); 3882 new_entry->object.uvm_obj = NULL; 3883 new_entry->etype &= ~UVM_ET_OBJ; 3884 } 3885 3886 return (new_entry); 3887 } 3888 3889 /* 3890 * uvmspace_fork: fork a process' main map 3891 * 3892 * => create a new vmspace for child process from parent. 3893 * => parent's map must not be locked. 3894 */ 3895 struct vmspace * 3896 uvmspace_fork(struct process *pr) 3897 { 3898 struct vmspace *vm1 = pr->ps_vmspace; 3899 struct vmspace *vm2; 3900 struct vm_map *old_map = &vm1->vm_map; 3901 struct vm_map *new_map; 3902 struct vm_map_entry *old_entry, *new_entry; 3903 struct uvm_map_deadq dead; 3904 3905 vm_map_lock(old_map); 3906 3907 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3908 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3909 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3910 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3911 vm2->vm_dused = 0; /* Statistic managed by us. */ 3912 new_map = &vm2->vm_map; 3913 vm_map_lock(new_map); 3914 3915 /* go entry-by-entry */ 3916 TAILQ_INIT(&dead); 3917 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 3918 if (old_entry->start == old_entry->end) 3919 continue; 3920 3921 /* first, some sanity checks on the old entry */ 3922 if (UVM_ET_ISSUBMAP(old_entry)) { 3923 panic("fork: encountered a submap during fork " 3924 "(illegal)"); 3925 } 3926 3927 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 3928 UVM_ET_ISNEEDSCOPY(old_entry)) { 3929 panic("fork: non-copy_on_write map entry marked " 3930 "needs_copy (illegal)"); 3931 } 3932 3933 /* Apply inheritance. */ 3934 switch (old_entry->inheritance) { 3935 case MAP_INHERIT_SHARE: 3936 new_entry = uvm_mapent_forkshared(vm2, new_map, 3937 old_map, old_entry, &dead); 3938 break; 3939 case MAP_INHERIT_COPY: 3940 new_entry = uvm_mapent_forkcopy(vm2, new_map, 3941 old_map, old_entry, &dead); 3942 break; 3943 case MAP_INHERIT_ZERO: 3944 new_entry = uvm_mapent_forkzero(vm2, new_map, 3945 old_map, old_entry, &dead); 3946 break; 3947 default: 3948 continue; 3949 } 3950 3951 /* Update process statistics. */ 3952 if (!UVM_ET_ISHOLE(new_entry)) 3953 new_map->size += new_entry->end - new_entry->start; 3954 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) && 3955 new_entry->protection != PROT_NONE) { 3956 vm2->vm_dused += uvmspace_dused( 3957 new_map, new_entry->start, new_entry->end); 3958 } 3959 } 3960 new_map->flags |= old_map->flags & VM_MAP_SYSCALL_ONCE; 3961 #ifdef PMAP_CHECK_COPYIN 3962 if (PMAP_CHECK_COPYIN) { 3963 memcpy(&new_map->check_copyin, &old_map->check_copyin, 3964 sizeof(new_map->check_copyin)); 3965 membar_producer(); 3966 new_map->check_copyin_count = old_map->check_copyin_count; 3967 } 3968 #endif 3969 3970 vm_map_unlock(old_map); 3971 vm_map_unlock(new_map); 3972 3973 /* 3974 * This can actually happen, if multiple entries described a 3975 * space in which an entry was inherited. 3976 */ 3977 uvm_unmap_detach(&dead, 0); 3978 3979 #ifdef SYSVSHM 3980 if (vm1->vm_shm) 3981 shmfork(vm1, vm2); 3982 #endif 3983 3984 return vm2; 3985 } 3986 3987 /* 3988 * uvm_map_hint: return the beginning of the best area suitable for 3989 * creating a new mapping with "prot" protection. 3990 */ 3991 vaddr_t 3992 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr, 3993 vaddr_t maxaddr) 3994 { 3995 vaddr_t addr; 3996 vaddr_t spacing; 3997 3998 #ifdef __i386__ 3999 /* 4000 * If executable skip first two pages, otherwise start 4001 * after data + heap region. 4002 */ 4003 if ((prot & PROT_EXEC) != 0 && 4004 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 4005 addr = (PAGE_SIZE*2) + 4006 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 4007 return (round_page(addr)); 4008 } 4009 #endif 4010 4011 #if defined (__LP64__) 4012 spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4013 #else 4014 spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4015 #endif 4016 4017 /* 4018 * Start malloc/mmap after the brk. 4019 */ 4020 addr = (vaddr_t)vm->vm_daddr + BRKSIZ; 4021 addr = MAX(addr, minaddr); 4022 4023 if (addr < maxaddr) { 4024 while (spacing > maxaddr - addr) 4025 spacing >>= 1; 4026 } 4027 addr += arc4random() & spacing; 4028 return (round_page(addr)); 4029 } 4030 4031 /* 4032 * uvm_map_submap: punch down part of a map into a submap 4033 * 4034 * => only the kernel_map is allowed to be submapped 4035 * => the purpose of submapping is to break up the locking granularity 4036 * of a larger map 4037 * => the range specified must have been mapped previously with a uvm_map() 4038 * call [with uobj==NULL] to create a blank map entry in the main map. 4039 * [And it had better still be blank!] 4040 * => maps which contain submaps should never be copied or forked. 4041 * => to remove a submap, use uvm_unmap() on the main map 4042 * and then uvm_map_deallocate() the submap. 4043 * => main map must be unlocked. 4044 * => submap must have been init'd and have a zero reference count. 4045 * [need not be locked as we don't actually reference it] 4046 */ 4047 int 4048 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 4049 struct vm_map *submap) 4050 { 4051 struct vm_map_entry *entry; 4052 int result; 4053 4054 if (start > map->max_offset || end > map->max_offset || 4055 start < map->min_offset || end < map->min_offset) 4056 return EINVAL; 4057 4058 vm_map_lock(map); 4059 4060 if (uvm_map_lookup_entry(map, start, &entry)) { 4061 UVM_MAP_CLIP_START(map, entry, start); 4062 UVM_MAP_CLIP_END(map, entry, end); 4063 } else 4064 entry = NULL; 4065 4066 if (entry != NULL && 4067 entry->start == start && entry->end == end && 4068 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 4069 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 4070 entry->etype |= UVM_ET_SUBMAP; 4071 entry->object.sub_map = submap; 4072 entry->offset = 0; 4073 uvm_map_reference(submap); 4074 result = 0; 4075 } else 4076 result = EINVAL; 4077 4078 vm_map_unlock(map); 4079 return result; 4080 } 4081 4082 /* 4083 * uvm_map_checkprot: check protection in map 4084 * 4085 * => must allow specific protection in a fully allocated region. 4086 * => map must be read or write locked by caller. 4087 */ 4088 boolean_t 4089 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 4090 vm_prot_t protection) 4091 { 4092 struct vm_map_entry *entry; 4093 4094 vm_map_assert_anylock(map); 4095 4096 if (start < map->min_offset || end > map->max_offset || start > end) 4097 return FALSE; 4098 if (start == end) 4099 return TRUE; 4100 4101 /* 4102 * Iterate entries. 4103 */ 4104 for (entry = uvm_map_entrybyaddr(&map->addr, start); 4105 entry != NULL && entry->start < end; 4106 entry = RBT_NEXT(uvm_map_addr, entry)) { 4107 /* Fail if a hole is found. */ 4108 if (UVM_ET_ISHOLE(entry) || 4109 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 4110 return FALSE; 4111 4112 /* Check protection. */ 4113 if ((entry->protection & protection) != protection) 4114 return FALSE; 4115 } 4116 return TRUE; 4117 } 4118 4119 /* 4120 * uvm_map_create: create map 4121 */ 4122 vm_map_t 4123 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 4124 { 4125 vm_map_t map; 4126 4127 map = malloc(sizeof *map, M_VMMAP, M_WAITOK); 4128 uvm_map_setup(map, pmap, min, max, flags); 4129 return (map); 4130 } 4131 4132 /* 4133 * uvm_map_deallocate: drop reference to a map 4134 * 4135 * => caller must not lock map 4136 * => we will zap map if ref count goes to zero 4137 */ 4138 void 4139 uvm_map_deallocate(vm_map_t map) 4140 { 4141 int c; 4142 struct uvm_map_deadq dead; 4143 4144 c = atomic_dec_int_nv(&map->ref_count); 4145 if (c > 0) { 4146 return; 4147 } 4148 4149 /* 4150 * all references gone. unmap and free. 4151 * 4152 * No lock required: we are only one to access this map. 4153 */ 4154 TAILQ_INIT(&dead); 4155 uvm_tree_sanity(map, __FILE__, __LINE__); 4156 vm_map_lock(map); 4157 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 4158 TRUE, FALSE, FALSE); 4159 vm_map_unlock(map); 4160 pmap_destroy(map->pmap); 4161 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 4162 free(map, M_VMMAP, sizeof *map); 4163 4164 uvm_unmap_detach(&dead, 0); 4165 } 4166 4167 /* 4168 * uvm_map_inherit: set inheritance code for range of addrs in map. 4169 * 4170 * => map must be unlocked 4171 * => note that the inherit code is used during a "fork". see fork 4172 * code for details. 4173 */ 4174 int 4175 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 4176 vm_inherit_t new_inheritance) 4177 { 4178 struct vm_map_entry *entry; 4179 4180 switch (new_inheritance) { 4181 case MAP_INHERIT_NONE: 4182 case MAP_INHERIT_COPY: 4183 case MAP_INHERIT_SHARE: 4184 case MAP_INHERIT_ZERO: 4185 break; 4186 default: 4187 return (EINVAL); 4188 } 4189 4190 if (start > end) 4191 return EINVAL; 4192 start = MAX(start, map->min_offset); 4193 end = MIN(end, map->max_offset); 4194 if (start >= end) 4195 return 0; 4196 4197 vm_map_lock(map); 4198 4199 entry = uvm_map_entrybyaddr(&map->addr, start); 4200 if (entry->end > start) 4201 UVM_MAP_CLIP_START(map, entry, start); 4202 else 4203 entry = RBT_NEXT(uvm_map_addr, entry); 4204 4205 while (entry != NULL && entry->start < end) { 4206 UVM_MAP_CLIP_END(map, entry, end); 4207 entry->inheritance = new_inheritance; 4208 entry = RBT_NEXT(uvm_map_addr, entry); 4209 } 4210 4211 vm_map_unlock(map); 4212 return (0); 4213 } 4214 4215 #ifdef PMAP_CHECK_COPYIN 4216 static void inline 4217 check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end) 4218 { 4219 if (PMAP_CHECK_COPYIN == 0 || 4220 map->check_copyin_count >= UVM_MAP_CHECK_COPYIN_MAX) 4221 return; 4222 vm_map_assert_wrlock(map); 4223 map->check_copyin[map->check_copyin_count].start = start; 4224 map->check_copyin[map->check_copyin_count].end = end; 4225 membar_producer(); 4226 map->check_copyin_count++; 4227 } 4228 4229 /* 4230 * uvm_map_check_copyin_add: remember regions which are X-only for copyin(), 4231 * copyinstr(), uiomove(), and others 4232 * 4233 * => map must be unlocked 4234 */ 4235 int 4236 uvm_map_check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end) 4237 { 4238 if (start > end) 4239 return EINVAL; 4240 start = MAX(start, map->min_offset); 4241 end = MIN(end, map->max_offset); 4242 if (start >= end) 4243 return 0; 4244 vm_map_lock(map); 4245 check_copyin_add(map, start, end); 4246 vm_map_unlock(map); 4247 return (0); 4248 } 4249 #endif /* PMAP_CHECK_COPYIN */ 4250 4251 /* 4252 * uvm_map_syscall: permit system calls for range of addrs in map. 4253 * 4254 * => map must be unlocked 4255 */ 4256 int 4257 uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end) 4258 { 4259 struct vm_map_entry *entry; 4260 4261 if (start > end) 4262 return EINVAL; 4263 start = MAX(start, map->min_offset); 4264 end = MIN(end, map->max_offset); 4265 if (start >= end) 4266 return 0; 4267 if (map->flags & VM_MAP_SYSCALL_ONCE) /* only allowed once */ 4268 return (EPERM); 4269 4270 vm_map_lock(map); 4271 4272 entry = uvm_map_entrybyaddr(&map->addr, start); 4273 if (entry->end > start) 4274 UVM_MAP_CLIP_START(map, entry, start); 4275 else 4276 entry = RBT_NEXT(uvm_map_addr, entry); 4277 4278 while (entry != NULL && entry->start < end) { 4279 UVM_MAP_CLIP_END(map, entry, end); 4280 entry->etype |= UVM_ET_SYSCALL; 4281 entry = RBT_NEXT(uvm_map_addr, entry); 4282 } 4283 4284 #ifdef PMAP_CHECK_COPYIN 4285 check_copyin_add(map, start, end); /* Add libc's text segment */ 4286 #endif 4287 map->wserial++; 4288 map->flags |= VM_MAP_SYSCALL_ONCE; 4289 vm_map_unlock(map); 4290 return (0); 4291 } 4292 4293 /* 4294 * uvm_map_immutable: block mapping/mprotect for range of addrs in map. 4295 * 4296 * => map must be unlocked 4297 */ 4298 int 4299 uvm_map_immutable(struct vm_map *map, vaddr_t start, vaddr_t end, int imut) 4300 { 4301 struct vm_map_entry *entry; 4302 4303 if (start > end) 4304 return EINVAL; 4305 start = MAX(start, map->min_offset); 4306 end = MIN(end, map->max_offset); 4307 if (start >= end) 4308 return 0; 4309 4310 vm_map_lock(map); 4311 4312 entry = uvm_map_entrybyaddr(&map->addr, start); 4313 if (entry->end > start) 4314 UVM_MAP_CLIP_START(map, entry, start); 4315 else 4316 entry = RBT_NEXT(uvm_map_addr, entry); 4317 4318 while (entry != NULL && entry->start < end) { 4319 UVM_MAP_CLIP_END(map, entry, end); 4320 if (imut) 4321 entry->etype |= UVM_ET_IMMUTABLE; 4322 else 4323 entry->etype &= ~UVM_ET_IMMUTABLE; 4324 entry = RBT_NEXT(uvm_map_addr, entry); 4325 } 4326 4327 map->wserial++; 4328 vm_map_unlock(map); 4329 return (0); 4330 } 4331 4332 /* 4333 * uvm_map_advice: set advice code for range of addrs in map. 4334 * 4335 * => map must be unlocked 4336 */ 4337 int 4338 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 4339 { 4340 struct vm_map_entry *entry; 4341 4342 switch (new_advice) { 4343 case MADV_NORMAL: 4344 case MADV_RANDOM: 4345 case MADV_SEQUENTIAL: 4346 break; 4347 default: 4348 return (EINVAL); 4349 } 4350 4351 if (start > end) 4352 return EINVAL; 4353 start = MAX(start, map->min_offset); 4354 end = MIN(end, map->max_offset); 4355 if (start >= end) 4356 return 0; 4357 4358 vm_map_lock(map); 4359 4360 entry = uvm_map_entrybyaddr(&map->addr, start); 4361 if (entry != NULL && entry->end > start) 4362 UVM_MAP_CLIP_START(map, entry, start); 4363 else if (entry!= NULL) 4364 entry = RBT_NEXT(uvm_map_addr, entry); 4365 4366 /* 4367 * XXXJRT: disallow holes? 4368 */ 4369 while (entry != NULL && entry->start < end) { 4370 UVM_MAP_CLIP_END(map, entry, end); 4371 entry->advice = new_advice; 4372 entry = RBT_NEXT(uvm_map_addr, entry); 4373 } 4374 4375 vm_map_unlock(map); 4376 return (0); 4377 } 4378 4379 /* 4380 * uvm_map_extract: extract a mapping from a map and put it somewhere 4381 * in the kernel_map, setting protection to max_prot. 4382 * 4383 * => map should be unlocked (we will write lock it and kernel_map) 4384 * => returns 0 on success, error code otherwise 4385 * => start must be page aligned 4386 * => len must be page sized 4387 * => flags: 4388 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 4389 * Mappings are QREF's. 4390 */ 4391 int 4392 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 4393 vaddr_t *dstaddrp, int flags) 4394 { 4395 struct uvm_map_deadq dead; 4396 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 4397 vaddr_t dstaddr; 4398 vaddr_t end; 4399 vaddr_t cp_start; 4400 vsize_t cp_len, cp_off; 4401 int error; 4402 4403 TAILQ_INIT(&dead); 4404 end = start + len; 4405 4406 /* 4407 * Sanity check on the parameters. 4408 * Also, since the mapping may not contain gaps, error out if the 4409 * mapped area is not in source map. 4410 */ 4411 if ((start & (vaddr_t)PAGE_MASK) != 0 || 4412 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 4413 return EINVAL; 4414 if (start < srcmap->min_offset || end > srcmap->max_offset) 4415 return EINVAL; 4416 4417 /* Initialize dead entries. Handle len == 0 case. */ 4418 if (len == 0) 4419 return 0; 4420 4421 /* Acquire lock on srcmap. */ 4422 vm_map_lock(srcmap); 4423 4424 /* Lock srcmap, lookup first and last entry in <start,len>. */ 4425 first = uvm_map_entrybyaddr(&srcmap->addr, start); 4426 4427 /* Check that the range is contiguous. */ 4428 for (entry = first; entry != NULL && entry->end < end; 4429 entry = RBT_NEXT(uvm_map_addr, entry)) { 4430 if (VMMAP_FREE_END(entry) != entry->end || 4431 UVM_ET_ISHOLE(entry)) { 4432 error = EINVAL; 4433 goto fail; 4434 } 4435 } 4436 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 4437 error = EINVAL; 4438 goto fail; 4439 } 4440 4441 /* 4442 * Handle need-copy flag. 4443 */ 4444 for (entry = first; entry != NULL && entry->start < end; 4445 entry = RBT_NEXT(uvm_map_addr, entry)) { 4446 if (UVM_ET_ISNEEDSCOPY(entry)) 4447 amap_copy(srcmap, entry, M_NOWAIT, 4448 UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end); 4449 if (UVM_ET_ISNEEDSCOPY(entry)) { 4450 /* 4451 * amap_copy failure 4452 */ 4453 error = ENOMEM; 4454 goto fail; 4455 } 4456 } 4457 4458 /* Lock destination map (kernel_map). */ 4459 vm_map_lock(kernel_map); 4460 4461 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 4462 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 4463 PROT_NONE, 0) != 0) { 4464 error = ENOMEM; 4465 goto fail2; 4466 } 4467 *dstaddrp = dstaddr; 4468 4469 /* 4470 * We now have srcmap and kernel_map locked. 4471 * dstaddr contains the destination offset in dstmap. 4472 */ 4473 /* step 1: start looping through map entries, performing extraction. */ 4474 for (entry = first; entry != NULL && entry->start < end; 4475 entry = RBT_NEXT(uvm_map_addr, entry)) { 4476 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 4477 if (UVM_ET_ISHOLE(entry)) 4478 continue; 4479 4480 /* Calculate uvm_mapent_clone parameters. */ 4481 cp_start = entry->start; 4482 if (cp_start < start) { 4483 cp_off = start - cp_start; 4484 cp_start = start; 4485 } else 4486 cp_off = 0; 4487 cp_len = MIN(entry->end, end) - cp_start; 4488 4489 newentry = uvm_mapent_clone(kernel_map, 4490 cp_start - start + dstaddr, cp_len, cp_off, 4491 entry->protection, entry->max_protection, 4492 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4493 if (newentry == NULL) { 4494 error = ENOMEM; 4495 goto fail2_unmap; 4496 } 4497 kernel_map->size += cp_len; 4498 4499 /* Figure out the best protection */ 4500 if ((flags & UVM_EXTRACT_FIXPROT) && 4501 newentry->protection != PROT_NONE) 4502 newentry->protection = newentry->max_protection; 4503 newentry->protection &= ~PROT_EXEC; 4504 } 4505 pmap_update(kernel_map->pmap); 4506 4507 error = 0; 4508 4509 /* Unmap copied entries on failure. */ 4510 fail2_unmap: 4511 if (error) { 4512 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4513 FALSE, TRUE, FALSE); 4514 } 4515 4516 /* Release maps, release dead entries. */ 4517 fail2: 4518 vm_map_unlock(kernel_map); 4519 4520 fail: 4521 vm_map_unlock(srcmap); 4522 4523 uvm_unmap_detach(&dead, 0); 4524 4525 return error; 4526 } 4527 4528 /* 4529 * uvm_map_clean: clean out a map range 4530 * 4531 * => valid flags: 4532 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4533 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4534 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4535 * if (flags & PGO_FREE): any cached pages are freed after clean 4536 * => returns an error if any part of the specified range isn't mapped 4537 * => never a need to flush amap layer since the anonymous memory has 4538 * no permanent home, but may deactivate pages there 4539 * => called from sys_msync() and sys_madvise() 4540 * => caller must not have map locked 4541 */ 4542 4543 int 4544 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4545 { 4546 struct vm_map_entry *first, *entry; 4547 struct vm_amap *amap; 4548 struct vm_anon *anon; 4549 struct vm_page *pg; 4550 struct uvm_object *uobj; 4551 vaddr_t cp_start, cp_end; 4552 int refs; 4553 int error; 4554 boolean_t rv; 4555 4556 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4557 (PGO_FREE|PGO_DEACTIVATE)); 4558 4559 if (start > end || start < map->min_offset || end > map->max_offset) 4560 return EINVAL; 4561 4562 vm_map_lock(map); 4563 first = uvm_map_entrybyaddr(&map->addr, start); 4564 4565 /* Make a first pass to check for holes. */ 4566 for (entry = first; entry != NULL && entry->start < end; 4567 entry = RBT_NEXT(uvm_map_addr, entry)) { 4568 if (UVM_ET_ISSUBMAP(entry)) { 4569 vm_map_unlock(map); 4570 return EINVAL; 4571 } 4572 if (UVM_ET_ISSUBMAP(entry) || 4573 UVM_ET_ISHOLE(entry) || 4574 (entry->end < end && 4575 VMMAP_FREE_END(entry) != entry->end)) { 4576 vm_map_unlock(map); 4577 return EFAULT; 4578 } 4579 } 4580 4581 vm_map_busy(map); 4582 vm_map_unlock(map); 4583 error = 0; 4584 for (entry = first; entry != NULL && entry->start < end; 4585 entry = RBT_NEXT(uvm_map_addr, entry)) { 4586 amap = entry->aref.ar_amap; /* top layer */ 4587 if (UVM_ET_ISOBJ(entry)) 4588 uobj = entry->object.uvm_obj; 4589 else 4590 uobj = NULL; 4591 4592 /* 4593 * No amap cleaning necessary if: 4594 * - there's no amap 4595 * - we're not deactivating or freeing pages. 4596 */ 4597 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4598 goto flush_object; 4599 4600 cp_start = MAX(entry->start, start); 4601 cp_end = MIN(entry->end, end); 4602 4603 amap_lock(amap); 4604 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4605 anon = amap_lookup(&entry->aref, 4606 cp_start - entry->start); 4607 if (anon == NULL) 4608 continue; 4609 4610 KASSERT(anon->an_lock == amap->am_lock); 4611 pg = anon->an_page; 4612 if (pg == NULL) { 4613 continue; 4614 } 4615 KASSERT(pg->pg_flags & PQ_ANON); 4616 4617 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4618 /* 4619 * XXX In these first 3 cases, we always just 4620 * XXX deactivate the page. We may want to 4621 * XXX handle the different cases more 4622 * XXX specifically, in the future. 4623 */ 4624 case PGO_CLEANIT|PGO_FREE: 4625 case PGO_CLEANIT|PGO_DEACTIVATE: 4626 case PGO_DEACTIVATE: 4627 deactivate_it: 4628 /* skip the page if it's wired */ 4629 if (pg->wire_count != 0) 4630 break; 4631 4632 uvm_lock_pageq(); 4633 4634 KASSERT(pg->uanon == anon); 4635 4636 /* zap all mappings for the page. */ 4637 pmap_page_protect(pg, PROT_NONE); 4638 4639 /* ...and deactivate the page. */ 4640 uvm_pagedeactivate(pg); 4641 4642 uvm_unlock_pageq(); 4643 break; 4644 case PGO_FREE: 4645 /* 4646 * If there are multiple references to 4647 * the amap, just deactivate the page. 4648 */ 4649 if (amap_refs(amap) > 1) 4650 goto deactivate_it; 4651 4652 /* XXX skip the page if it's wired */ 4653 if (pg->wire_count != 0) { 4654 break; 4655 } 4656 amap_unadd(&entry->aref, 4657 cp_start - entry->start); 4658 refs = --anon->an_ref; 4659 if (refs == 0) 4660 uvm_anfree(anon); 4661 break; 4662 default: 4663 panic("uvm_map_clean: weird flags"); 4664 } 4665 } 4666 amap_unlock(amap); 4667 4668 flush_object: 4669 cp_start = MAX(entry->start, start); 4670 cp_end = MIN(entry->end, end); 4671 4672 /* 4673 * flush pages if we've got a valid backing object. 4674 * 4675 * Don't PGO_FREE if we don't have write permission 4676 * and don't flush if this is a copy-on-write object 4677 * since we can't know our permissions on it. 4678 */ 4679 if (uobj != NULL && 4680 ((flags & PGO_FREE) == 0 || 4681 ((entry->max_protection & PROT_WRITE) != 0 && 4682 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4683 rw_enter(uobj->vmobjlock, RW_WRITE); 4684 rv = uobj->pgops->pgo_flush(uobj, 4685 cp_start - entry->start + entry->offset, 4686 cp_end - entry->start + entry->offset, flags); 4687 rw_exit(uobj->vmobjlock); 4688 4689 if (rv == FALSE) 4690 error = EFAULT; 4691 } 4692 } 4693 4694 vm_map_unbusy(map); 4695 return error; 4696 } 4697 4698 /* 4699 * UVM_MAP_CLIP_END implementation 4700 */ 4701 void 4702 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4703 { 4704 struct vm_map_entry *tmp; 4705 4706 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4707 tmp = uvm_mapent_alloc(map, 0); 4708 4709 /* Invoke splitentry. */ 4710 uvm_map_splitentry(map, entry, tmp, addr); 4711 } 4712 4713 /* 4714 * UVM_MAP_CLIP_START implementation 4715 * 4716 * Clippers are required to not change the pointers to the entry they are 4717 * clipping on. 4718 * Since uvm_map_splitentry turns the original entry into the lowest 4719 * entry (address wise) we do a swap between the new entry and the original 4720 * entry, prior to calling uvm_map_splitentry. 4721 */ 4722 void 4723 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4724 { 4725 struct vm_map_entry *tmp; 4726 struct uvm_addr_state *free; 4727 4728 /* Unlink original. */ 4729 free = uvm_map_uaddr_e(map, entry); 4730 uvm_mapent_free_remove(map, free, entry); 4731 uvm_mapent_addr_remove(map, entry); 4732 4733 /* Copy entry. */ 4734 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4735 tmp = uvm_mapent_alloc(map, 0); 4736 uvm_mapent_copy(entry, tmp); 4737 4738 /* Put new entry in place of original entry. */ 4739 uvm_mapent_addr_insert(map, tmp); 4740 uvm_mapent_free_insert(map, free, tmp); 4741 4742 /* Invoke splitentry. */ 4743 uvm_map_splitentry(map, tmp, entry, addr); 4744 } 4745 4746 /* 4747 * Boundary fixer. 4748 */ 4749 static inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4750 static inline vaddr_t 4751 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4752 { 4753 return (min < bound && max > bound) ? bound : max; 4754 } 4755 4756 /* 4757 * Choose free list based on address at start of free space. 4758 * 4759 * The uvm_addr_state returned contains addr and is the first of: 4760 * - uaddr_exe 4761 * - uaddr_brk_stack 4762 * - uaddr_any 4763 */ 4764 struct uvm_addr_state* 4765 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4766 { 4767 struct uvm_addr_state *uaddr; 4768 int i; 4769 4770 /* Special case the first page, to prevent mmap from returning 0. */ 4771 if (addr < VMMAP_MIN_ADDR) 4772 return NULL; 4773 4774 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4775 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4776 if (addr >= uvm_maxkaddr) 4777 return NULL; 4778 } 4779 4780 /* Is the address inside the exe-only map? */ 4781 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4782 addr < map->uaddr_exe->uaddr_maxaddr) 4783 return map->uaddr_exe; 4784 4785 /* Check if the space falls inside brk/stack area. */ 4786 if ((addr >= map->b_start && addr < map->b_end) || 4787 (addr >= map->s_start && addr < map->s_end)) { 4788 if (map->uaddr_brk_stack != NULL && 4789 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4790 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4791 return map->uaddr_brk_stack; 4792 } else 4793 return NULL; 4794 } 4795 4796 /* 4797 * Check the other selectors. 4798 * 4799 * These selectors are only marked as the owner, if they have insert 4800 * functions. 4801 */ 4802 for (i = 0; i < nitems(map->uaddr_any); i++) { 4803 uaddr = map->uaddr_any[i]; 4804 if (uaddr == NULL) 4805 continue; 4806 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4807 continue; 4808 4809 if (addr >= uaddr->uaddr_minaddr && 4810 addr < uaddr->uaddr_maxaddr) 4811 return uaddr; 4812 } 4813 4814 return NULL; 4815 } 4816 4817 /* 4818 * Choose free list based on address at start of free space. 4819 * 4820 * The uvm_addr_state returned contains addr and is the first of: 4821 * - uaddr_exe 4822 * - uaddr_brk_stack 4823 * - uaddr_any 4824 */ 4825 struct uvm_addr_state* 4826 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4827 { 4828 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4829 } 4830 4831 /* 4832 * Returns the first free-memory boundary that is crossed by [min-max]. 4833 */ 4834 vsize_t 4835 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4836 { 4837 struct uvm_addr_state *uaddr; 4838 int i; 4839 4840 /* Never return first page. */ 4841 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4842 4843 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4844 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4845 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4846 4847 /* Check for exe-only boundaries. */ 4848 if (map->uaddr_exe != NULL) { 4849 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4850 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4851 } 4852 4853 /* Check for exe-only boundaries. */ 4854 if (map->uaddr_brk_stack != NULL) { 4855 max = uvm_map_boundfix(min, max, 4856 map->uaddr_brk_stack->uaddr_minaddr); 4857 max = uvm_map_boundfix(min, max, 4858 map->uaddr_brk_stack->uaddr_maxaddr); 4859 } 4860 4861 /* Check other boundaries. */ 4862 for (i = 0; i < nitems(map->uaddr_any); i++) { 4863 uaddr = map->uaddr_any[i]; 4864 if (uaddr != NULL) { 4865 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4866 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4867 } 4868 } 4869 4870 /* Boundaries at stack and brk() area. */ 4871 max = uvm_map_boundfix(min, max, map->s_start); 4872 max = uvm_map_boundfix(min, max, map->s_end); 4873 max = uvm_map_boundfix(min, max, map->b_start); 4874 max = uvm_map_boundfix(min, max, map->b_end); 4875 4876 return max; 4877 } 4878 4879 /* 4880 * Update map allocation start and end addresses from proc vmspace. 4881 */ 4882 void 4883 uvm_map_vmspace_update(struct vm_map *map, 4884 struct uvm_map_deadq *dead, int flags) 4885 { 4886 struct vmspace *vm; 4887 vaddr_t b_start, b_end, s_start, s_end; 4888 4889 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4890 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4891 4892 /* 4893 * Derive actual allocation boundaries from vmspace. 4894 */ 4895 vm = (struct vmspace *)map; 4896 b_start = (vaddr_t)vm->vm_daddr; 4897 b_end = b_start + BRKSIZ; 4898 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4899 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4900 #ifdef DIAGNOSTIC 4901 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4902 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4903 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4904 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4905 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4906 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4907 vm, b_start, b_end, s_start, s_end); 4908 } 4909 #endif 4910 4911 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4912 map->s_start == s_start && map->s_end == s_end)) 4913 return; 4914 4915 uvm_map_freelist_update(map, dead, b_start, b_end, 4916 s_start, s_end, flags); 4917 } 4918 4919 /* 4920 * Grow kernel memory. 4921 * 4922 * This function is only called for kernel maps when an allocation fails. 4923 * 4924 * If the map has a gap that is large enough to accommodate alloc_sz, this 4925 * function will make sure map->free will include it. 4926 */ 4927 void 4928 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 4929 vsize_t alloc_sz, int flags) 4930 { 4931 vsize_t sz; 4932 vaddr_t end; 4933 struct vm_map_entry *entry; 4934 4935 /* Kernel memory only. */ 4936 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 4937 /* Destroy free list. */ 4938 uvm_map_freelist_update_clear(map, dead); 4939 4940 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 4941 if (map->flags & VM_MAP_GUARDPAGES) 4942 alloc_sz += PAGE_SIZE; 4943 4944 /* 4945 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 4946 * 4947 * Don't handle the case where the multiplication overflows: 4948 * if that happens, the allocation is probably too big anyway. 4949 */ 4950 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 4951 4952 /* 4953 * Walk forward until a gap large enough for alloc_sz shows up. 4954 * 4955 * We assume the kernel map has no boundaries. 4956 * uvm_maxkaddr may be zero. 4957 */ 4958 end = MAX(uvm_maxkaddr, map->min_offset); 4959 entry = uvm_map_entrybyaddr(&map->addr, end); 4960 while (entry && entry->fspace < alloc_sz) 4961 entry = RBT_NEXT(uvm_map_addr, entry); 4962 if (entry) { 4963 end = MAX(VMMAP_FREE_START(entry), end); 4964 end += MIN(sz, map->max_offset - end); 4965 } else 4966 end = map->max_offset; 4967 4968 /* Reserve pmap entries. */ 4969 #ifdef PMAP_GROWKERNEL 4970 uvm_maxkaddr = pmap_growkernel(end); 4971 #else 4972 uvm_maxkaddr = MAX(uvm_maxkaddr, end); 4973 #endif 4974 4975 /* Rebuild free list. */ 4976 uvm_map_freelist_update_refill(map, flags); 4977 } 4978 4979 /* 4980 * Freelist update subfunction: unlink all entries from freelists. 4981 */ 4982 void 4983 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 4984 { 4985 struct uvm_addr_state *free; 4986 struct vm_map_entry *entry, *prev, *next; 4987 4988 prev = NULL; 4989 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL; 4990 entry = next) { 4991 next = RBT_NEXT(uvm_map_addr, entry); 4992 4993 free = uvm_map_uaddr_e(map, entry); 4994 uvm_mapent_free_remove(map, free, entry); 4995 4996 if (prev != NULL && entry->start == entry->end) { 4997 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 4998 uvm_mapent_addr_remove(map, entry); 4999 DEAD_ENTRY_PUSH(dead, entry); 5000 } else 5001 prev = entry; 5002 } 5003 } 5004 5005 /* 5006 * Freelist update subfunction: refill the freelists with entries. 5007 */ 5008 void 5009 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 5010 { 5011 struct vm_map_entry *entry; 5012 vaddr_t min, max; 5013 5014 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5015 min = VMMAP_FREE_START(entry); 5016 max = VMMAP_FREE_END(entry); 5017 entry->fspace = 0; 5018 5019 entry = uvm_map_fix_space(map, entry, min, max, flags); 5020 } 5021 5022 uvm_tree_sanity(map, __FILE__, __LINE__); 5023 } 5024 5025 /* 5026 * Change {a,b}_{start,end} allocation ranges and associated free lists. 5027 */ 5028 void 5029 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 5030 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 5031 { 5032 KDASSERT(b_end >= b_start && s_end >= s_start); 5033 vm_map_assert_wrlock(map); 5034 5035 /* Clear all free lists. */ 5036 uvm_map_freelist_update_clear(map, dead); 5037 5038 /* Apply new bounds. */ 5039 map->b_start = b_start; 5040 map->b_end = b_end; 5041 map->s_start = s_start; 5042 map->s_end = s_end; 5043 5044 /* Refill free lists. */ 5045 uvm_map_freelist_update_refill(map, flags); 5046 } 5047 5048 /* 5049 * Assign a uvm_addr_state to the specified pointer in vm_map. 5050 * 5051 * May sleep. 5052 */ 5053 void 5054 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 5055 struct uvm_addr_state *newval) 5056 { 5057 struct uvm_map_deadq dead; 5058 5059 /* Pointer which must be in this map. */ 5060 KASSERT(which != NULL); 5061 KASSERT((void*)map <= (void*)(which) && 5062 (void*)(which) < (void*)(map + 1)); 5063 5064 vm_map_lock(map); 5065 TAILQ_INIT(&dead); 5066 uvm_map_freelist_update_clear(map, &dead); 5067 5068 uvm_addr_destroy(*which); 5069 *which = newval; 5070 5071 uvm_map_freelist_update_refill(map, 0); 5072 vm_map_unlock(map); 5073 uvm_unmap_detach(&dead, 0); 5074 } 5075 5076 /* 5077 * Correct space insert. 5078 * 5079 * Entry must not be on any freelist. 5080 */ 5081 struct vm_map_entry* 5082 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 5083 vaddr_t min, vaddr_t max, int flags) 5084 { 5085 struct uvm_addr_state *free, *entfree; 5086 vaddr_t lmax; 5087 5088 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 5089 KDASSERT(min <= max); 5090 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 5091 min == map->min_offset); 5092 5093 UVM_MAP_REQ_WRITE(map); 5094 5095 /* 5096 * During the function, entfree will always point at the uaddr state 5097 * for entry. 5098 */ 5099 entfree = (entry == NULL ? NULL : 5100 uvm_map_uaddr_e(map, entry)); 5101 5102 while (min != max) { 5103 /* Claim guard page for entry. */ 5104 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 5105 VMMAP_FREE_END(entry) == entry->end && 5106 entry->start != entry->end) { 5107 if (max - min == 2 * PAGE_SIZE) { 5108 /* 5109 * If the free-space gap is exactly 2 pages, 5110 * we make the guard 2 pages instead of 1. 5111 * Because in a guarded map, an area needs 5112 * at least 2 pages to allocate from: 5113 * one page for the allocation and one for 5114 * the guard. 5115 */ 5116 entry->guard = 2 * PAGE_SIZE; 5117 min = max; 5118 } else { 5119 entry->guard = PAGE_SIZE; 5120 min += PAGE_SIZE; 5121 } 5122 continue; 5123 } 5124 5125 /* 5126 * Handle the case where entry has a 2-page guard, but the 5127 * space after entry is freed. 5128 */ 5129 if (entry != NULL && entry->fspace == 0 && 5130 entry->guard > PAGE_SIZE) { 5131 entry->guard = PAGE_SIZE; 5132 min = VMMAP_FREE_START(entry); 5133 } 5134 5135 lmax = uvm_map_boundary(map, min, max); 5136 free = uvm_map_uaddr(map, min); 5137 5138 /* 5139 * Entries are merged if they point at the same uvm_free(). 5140 * Exception to that rule: if min == uvm_maxkaddr, a new 5141 * entry is started regardless (otherwise the allocators 5142 * will get confused). 5143 */ 5144 if (entry != NULL && free == entfree && 5145 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 5146 min == uvm_maxkaddr)) { 5147 KDASSERT(VMMAP_FREE_END(entry) == min); 5148 entry->fspace += lmax - min; 5149 } else { 5150 /* 5151 * Commit entry to free list: it'll not be added to 5152 * anymore. 5153 * We'll start a new entry and add to that entry 5154 * instead. 5155 */ 5156 if (entry != NULL) 5157 uvm_mapent_free_insert(map, entfree, entry); 5158 5159 /* New entry for new uaddr. */ 5160 entry = uvm_mapent_alloc(map, flags); 5161 KDASSERT(entry != NULL); 5162 entry->end = entry->start = min; 5163 entry->guard = 0; 5164 entry->fspace = lmax - min; 5165 entry->object.uvm_obj = NULL; 5166 entry->offset = 0; 5167 entry->etype = 0; 5168 entry->protection = entry->max_protection = 0; 5169 entry->inheritance = 0; 5170 entry->wired_count = 0; 5171 entry->advice = 0; 5172 entry->aref.ar_pageoff = 0; 5173 entry->aref.ar_amap = NULL; 5174 uvm_mapent_addr_insert(map, entry); 5175 5176 entfree = free; 5177 } 5178 5179 min = lmax; 5180 } 5181 /* Finally put entry on the uaddr state. */ 5182 if (entry != NULL) 5183 uvm_mapent_free_insert(map, entfree, entry); 5184 5185 return entry; 5186 } 5187 5188 /* 5189 * MQuery style of allocation. 5190 * 5191 * This allocator searches forward until sufficient space is found to map 5192 * the given size. 5193 * 5194 * XXX: factor in offset (via pmap_prefer) and protection? 5195 */ 5196 int 5197 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 5198 int flags) 5199 { 5200 struct vm_map_entry *entry, *last; 5201 vaddr_t addr; 5202 vaddr_t tmp, pmap_align, pmap_offset; 5203 int error; 5204 5205 addr = *addr_p; 5206 vm_map_lock_read(map); 5207 5208 /* Configure pmap prefer. */ 5209 if (offset != UVM_UNKNOWN_OFFSET) { 5210 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 5211 pmap_offset = PMAP_PREFER_OFFSET(offset); 5212 } else { 5213 pmap_align = PAGE_SIZE; 5214 pmap_offset = 0; 5215 } 5216 5217 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 5218 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 5219 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5220 if (tmp < addr) 5221 tmp += pmap_align; 5222 addr = tmp; 5223 } 5224 5225 /* First, check if the requested range is fully available. */ 5226 entry = uvm_map_entrybyaddr(&map->addr, addr); 5227 last = NULL; 5228 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5229 error = 0; 5230 goto out; 5231 } 5232 if (flags & UVM_FLAG_FIXED) { 5233 error = EINVAL; 5234 goto out; 5235 } 5236 5237 error = ENOMEM; /* Default error from here. */ 5238 5239 /* 5240 * At this point, the memory at <addr, sz> is not available. 5241 * The reasons are: 5242 * [1] it's outside the map, 5243 * [2] it starts in used memory (and therefore needs to move 5244 * toward the first free page in entry), 5245 * [3] it starts in free memory but bumps into used memory. 5246 * 5247 * Note that for case [2], the forward moving is handled by the 5248 * for loop below. 5249 */ 5250 if (entry == NULL) { 5251 /* [1] Outside the map. */ 5252 if (addr >= map->max_offset) 5253 goto out; 5254 else 5255 entry = RBT_MIN(uvm_map_addr, &map->addr); 5256 } else if (VMMAP_FREE_START(entry) <= addr) { 5257 /* [3] Bumped into used memory. */ 5258 entry = RBT_NEXT(uvm_map_addr, entry); 5259 } 5260 5261 /* Test if the next entry is sufficient for the allocation. */ 5262 for (; entry != NULL; 5263 entry = RBT_NEXT(uvm_map_addr, entry)) { 5264 if (entry->fspace == 0) 5265 continue; 5266 addr = VMMAP_FREE_START(entry); 5267 5268 restart: /* Restart address checks on address change. */ 5269 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5270 if (tmp < addr) 5271 tmp += pmap_align; 5272 addr = tmp; 5273 if (addr >= VMMAP_FREE_END(entry)) 5274 continue; 5275 5276 /* Skip brk() allocation addresses. */ 5277 if (addr + sz > map->b_start && addr < map->b_end) { 5278 if (VMMAP_FREE_END(entry) > map->b_end) { 5279 addr = map->b_end; 5280 goto restart; 5281 } else 5282 continue; 5283 } 5284 /* Skip stack allocation addresses. */ 5285 if (addr + sz > map->s_start && addr < map->s_end) { 5286 if (VMMAP_FREE_END(entry) > map->s_end) { 5287 addr = map->s_end; 5288 goto restart; 5289 } else 5290 continue; 5291 } 5292 5293 last = NULL; 5294 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5295 error = 0; 5296 goto out; 5297 } 5298 } 5299 5300 out: 5301 vm_map_unlock_read(map); 5302 if (error == 0) 5303 *addr_p = addr; 5304 return error; 5305 } 5306 5307 boolean_t 5308 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 5309 { 5310 boolean_t rv; 5311 5312 if (map->flags & VM_MAP_INTRSAFE) { 5313 rv = mtx_enter_try(&map->mtx); 5314 } else { 5315 mtx_enter(&map->flags_lock); 5316 if (map->flags & VM_MAP_BUSY) { 5317 mtx_leave(&map->flags_lock); 5318 return (FALSE); 5319 } 5320 mtx_leave(&map->flags_lock); 5321 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); 5322 /* check if the lock is busy and back out if we won the race */ 5323 if (rv) { 5324 mtx_enter(&map->flags_lock); 5325 if (map->flags & VM_MAP_BUSY) { 5326 rw_exit(&map->lock); 5327 rv = FALSE; 5328 } 5329 mtx_leave(&map->flags_lock); 5330 } 5331 } 5332 5333 if (rv) { 5334 map->timestamp++; 5335 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5336 uvm_tree_sanity(map, file, line); 5337 uvm_tree_size_chk(map, file, line); 5338 } 5339 5340 return (rv); 5341 } 5342 5343 void 5344 vm_map_lock_ln(struct vm_map *map, char *file, int line) 5345 { 5346 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5347 do { 5348 mtx_enter(&map->flags_lock); 5349 tryagain: 5350 while (map->flags & VM_MAP_BUSY) { 5351 map->flags |= VM_MAP_WANTLOCK; 5352 msleep_nsec(&map->flags, &map->flags_lock, 5353 PVM, vmmapbsy, INFSLP); 5354 } 5355 mtx_leave(&map->flags_lock); 5356 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); 5357 /* check if the lock is busy and back out if we won the race */ 5358 mtx_enter(&map->flags_lock); 5359 if (map->flags & VM_MAP_BUSY) { 5360 rw_exit(&map->lock); 5361 goto tryagain; 5362 } 5363 mtx_leave(&map->flags_lock); 5364 } else { 5365 mtx_enter(&map->mtx); 5366 } 5367 5368 map->timestamp++; 5369 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5370 uvm_tree_sanity(map, file, line); 5371 uvm_tree_size_chk(map, file, line); 5372 } 5373 5374 void 5375 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 5376 { 5377 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5378 rw_enter_read(&map->lock); 5379 else 5380 mtx_enter(&map->mtx); 5381 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5382 uvm_tree_sanity(map, file, line); 5383 uvm_tree_size_chk(map, file, line); 5384 } 5385 5386 void 5387 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 5388 { 5389 uvm_tree_sanity(map, file, line); 5390 uvm_tree_size_chk(map, file, line); 5391 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5392 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5393 rw_exit(&map->lock); 5394 else 5395 mtx_leave(&map->mtx); 5396 } 5397 5398 void 5399 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 5400 { 5401 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5402 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5403 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5404 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5405 rw_exit_read(&map->lock); 5406 else 5407 mtx_leave(&map->mtx); 5408 } 5409 5410 void 5411 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 5412 { 5413 uvm_tree_sanity(map, file, line); 5414 uvm_tree_size_chk(map, file, line); 5415 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5416 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5417 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5418 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5419 rw_enter(&map->lock, RW_DOWNGRADE); 5420 } 5421 5422 void 5423 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 5424 { 5425 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5426 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5427 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5428 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5429 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5430 rw_exit_read(&map->lock); 5431 rw_enter_write(&map->lock); 5432 } 5433 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5434 uvm_tree_sanity(map, file, line); 5435 } 5436 5437 void 5438 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5439 { 5440 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5441 mtx_enter(&map->flags_lock); 5442 map->flags |= VM_MAP_BUSY; 5443 mtx_leave(&map->flags_lock); 5444 } 5445 5446 void 5447 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5448 { 5449 int oflags; 5450 5451 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5452 mtx_enter(&map->flags_lock); 5453 oflags = map->flags; 5454 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5455 mtx_leave(&map->flags_lock); 5456 if (oflags & VM_MAP_WANTLOCK) 5457 wakeup(&map->flags); 5458 } 5459 5460 void 5461 vm_map_assert_anylock_ln(struct vm_map *map, char *file, int line) 5462 { 5463 LPRINTF(("map assert read or write locked: %p (at %s %d)\n", map, file, line)); 5464 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5465 rw_assert_anylock(&map->lock); 5466 else 5467 MUTEX_ASSERT_LOCKED(&map->mtx); 5468 } 5469 5470 void 5471 vm_map_assert_wrlock_ln(struct vm_map *map, char *file, int line) 5472 { 5473 LPRINTF(("map assert write locked: %p (at %s %d)\n", map, file, line)); 5474 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5475 splassert(IPL_NONE); 5476 rw_assert_wrlock(&map->lock); 5477 } else 5478 MUTEX_ASSERT_LOCKED(&map->mtx); 5479 } 5480 5481 #ifndef SMALL_KERNEL 5482 int 5483 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, 5484 size_t *lenp) 5485 { 5486 struct vm_map_entry *entry; 5487 vaddr_t start; 5488 int cnt, maxcnt, error = 0; 5489 5490 KASSERT(*lenp > 0); 5491 KASSERT((*lenp % sizeof(*kve)) == 0); 5492 cnt = 0; 5493 maxcnt = *lenp / sizeof(*kve); 5494 KASSERT(maxcnt > 0); 5495 5496 /* 5497 * Return only entries whose address is above the given base 5498 * address. This allows userland to iterate without knowing the 5499 * number of entries beforehand. 5500 */ 5501 start = (vaddr_t)kve[0].kve_start; 5502 5503 vm_map_lock(map); 5504 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5505 if (cnt == maxcnt) { 5506 error = ENOMEM; 5507 break; 5508 } 5509 if (start != 0 && entry->start < start) 5510 continue; 5511 kve->kve_start = entry->start; 5512 kve->kve_end = entry->end; 5513 kve->kve_guard = entry->guard; 5514 kve->kve_fspace = entry->fspace; 5515 kve->kve_fspace_augment = entry->fspace_augment; 5516 kve->kve_offset = entry->offset; 5517 kve->kve_wired_count = entry->wired_count; 5518 kve->kve_etype = entry->etype; 5519 kve->kve_protection = entry->protection; 5520 kve->kve_max_protection = entry->max_protection; 5521 kve->kve_advice = entry->advice; 5522 kve->kve_inheritance = entry->inheritance; 5523 kve->kve_flags = entry->flags; 5524 kve++; 5525 cnt++; 5526 } 5527 vm_map_unlock(map); 5528 5529 KASSERT(cnt <= maxcnt); 5530 5531 *lenp = sizeof(*kve) * cnt; 5532 return error; 5533 } 5534 #endif 5535 5536 5537 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5538 uvm_mapentry_addrcmp, uvm_map_addr_augment); 5539 5540 5541 /* 5542 * MD code: vmspace allocator setup. 5543 */ 5544 5545 #ifdef __i386__ 5546 void 5547 uvm_map_setup_md(struct vm_map *map) 5548 { 5549 vaddr_t min, max; 5550 5551 min = map->min_offset; 5552 max = map->max_offset; 5553 5554 /* 5555 * Ensure the selectors will not try to manage page 0; 5556 * it's too special. 5557 */ 5558 if (min < VMMAP_MIN_ADDR) 5559 min = VMMAP_MIN_ADDR; 5560 5561 #if 0 /* Cool stuff, not yet */ 5562 /* Executable code is special. */ 5563 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5564 /* Place normal allocations beyond executable mappings. */ 5565 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5566 #else /* Crappy stuff, for now */ 5567 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5568 #endif 5569 5570 #ifndef SMALL_KERNEL 5571 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5572 #endif /* !SMALL_KERNEL */ 5573 } 5574 #elif __LP64__ 5575 void 5576 uvm_map_setup_md(struct vm_map *map) 5577 { 5578 vaddr_t min, max; 5579 5580 min = map->min_offset; 5581 max = map->max_offset; 5582 5583 /* 5584 * Ensure the selectors will not try to manage page 0; 5585 * it's too special. 5586 */ 5587 if (min < VMMAP_MIN_ADDR) 5588 min = VMMAP_MIN_ADDR; 5589 5590 #if 0 /* Cool stuff, not yet */ 5591 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5592 #else /* Crappy stuff, for now */ 5593 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5594 #endif 5595 5596 #ifndef SMALL_KERNEL 5597 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5598 #endif /* !SMALL_KERNEL */ 5599 } 5600 #else /* non-i386, 32 bit */ 5601 void 5602 uvm_map_setup_md(struct vm_map *map) 5603 { 5604 vaddr_t min, max; 5605 5606 min = map->min_offset; 5607 max = map->max_offset; 5608 5609 /* 5610 * Ensure the selectors will not try to manage page 0; 5611 * it's too special. 5612 */ 5613 if (min < VMMAP_MIN_ADDR) 5614 min = VMMAP_MIN_ADDR; 5615 5616 #if 0 /* Cool stuff, not yet */ 5617 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5618 #else /* Crappy stuff, for now */ 5619 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5620 #endif 5621 5622 #ifndef SMALL_KERNEL 5623 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5624 #endif /* !SMALL_KERNEL */ 5625 } 5626 #endif 5627