1 /* $OpenBSD: uvm_map.c,v 1.290 2022/03/12 08:11:07 mpi Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/acct.h> 90 #include <sys/mman.h> 91 #include <sys/proc.h> 92 #include <sys/malloc.h> 93 #include <sys/pool.h> 94 #include <sys/sysctl.h> 95 #include <sys/signalvar.h> 96 #include <sys/syslog.h> 97 #include <sys/user.h> 98 #include <sys/tracepoint.h> 99 100 #ifdef SYSVSHM 101 #include <sys/shm.h> 102 #endif 103 104 #include <uvm/uvm.h> 105 106 #ifdef DDB 107 #include <uvm/uvm_ddb.h> 108 #endif 109 110 #include <uvm/uvm_addr.h> 111 112 113 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 114 int uvm_mapent_isjoinable(struct vm_map*, 115 struct vm_map_entry*, struct vm_map_entry*); 116 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 117 struct vm_map_entry*, struct uvm_map_deadq*); 118 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 119 struct vm_map_entry*, struct uvm_map_deadq*); 120 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 121 struct vm_map_entry*, vaddr_t, vsize_t, int, 122 struct uvm_map_deadq*, struct vm_map_entry*); 123 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 124 void uvm_mapent_free(struct vm_map_entry*); 125 void uvm_unmap_kill_entry(struct vm_map*, 126 struct vm_map_entry*); 127 void uvm_unmap_kill_entry_withlock(struct vm_map *, 128 struct vm_map_entry *, int); 129 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); 130 void uvm_mapent_mkfree(struct vm_map*, 131 struct vm_map_entry*, struct vm_map_entry**, 132 struct uvm_map_deadq*, boolean_t); 133 void uvm_map_pageable_pgon(struct vm_map*, 134 struct vm_map_entry*, struct vm_map_entry*, 135 vaddr_t, vaddr_t); 136 int uvm_map_pageable_wire(struct vm_map*, 137 struct vm_map_entry*, struct vm_map_entry*, 138 vaddr_t, vaddr_t, int); 139 void uvm_map_setup_entries(struct vm_map*); 140 void uvm_map_setup_md(struct vm_map*); 141 void uvm_map_teardown(struct vm_map*); 142 void uvm_map_vmspace_update(struct vm_map*, 143 struct uvm_map_deadq*, int); 144 void uvm_map_kmem_grow(struct vm_map*, 145 struct uvm_map_deadq*, vsize_t, int); 146 void uvm_map_freelist_update_clear(struct vm_map*, 147 struct uvm_map_deadq*); 148 void uvm_map_freelist_update_refill(struct vm_map *, int); 149 void uvm_map_freelist_update(struct vm_map*, 150 struct uvm_map_deadq*, vaddr_t, vaddr_t, 151 vaddr_t, vaddr_t, int); 152 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 153 vaddr_t, vaddr_t, int); 154 int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, 155 struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, 156 int); 157 int uvm_map_findspace(struct vm_map*, 158 struct vm_map_entry**, struct vm_map_entry**, 159 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 160 vaddr_t); 161 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 162 void uvm_map_addr_augment(struct vm_map_entry*); 163 164 int uvm_map_inentry_recheck(u_long, vaddr_t, 165 struct p_inentry *); 166 boolean_t uvm_map_inentry_fix(struct proc *, struct p_inentry *, 167 vaddr_t, int (*)(vm_map_entry_t), u_long); 168 /* 169 * Tree management functions. 170 */ 171 172 static inline void uvm_mapent_copy(struct vm_map_entry*, 173 struct vm_map_entry*); 174 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*, 175 const struct vm_map_entry*); 176 void uvm_mapent_free_insert(struct vm_map*, 177 struct uvm_addr_state*, struct vm_map_entry*); 178 void uvm_mapent_free_remove(struct vm_map*, 179 struct uvm_addr_state*, struct vm_map_entry*); 180 void uvm_mapent_addr_insert(struct vm_map*, 181 struct vm_map_entry*); 182 void uvm_mapent_addr_remove(struct vm_map*, 183 struct vm_map_entry*); 184 void uvm_map_splitentry(struct vm_map*, 185 struct vm_map_entry*, struct vm_map_entry*, 186 vaddr_t); 187 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 188 189 /* 190 * uvm_vmspace_fork helper functions. 191 */ 192 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 193 vsize_t, vm_prot_t, vm_prot_t, 194 struct vm_map_entry*, struct uvm_map_deadq*, int, 195 int); 196 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t, 197 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*, 198 struct vm_map_entry*, struct uvm_map_deadq*); 199 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 200 struct vm_map*, struct vm_map_entry*, 201 struct uvm_map_deadq*); 202 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 203 struct vm_map*, struct vm_map_entry*, 204 struct uvm_map_deadq*); 205 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 206 struct vm_map*, struct vm_map_entry*, 207 struct uvm_map_deadq*); 208 209 /* 210 * Tree validation. 211 */ 212 #ifdef VMMAP_DEBUG 213 void uvm_tree_assert(struct vm_map*, int, char*, 214 char*, int); 215 #define UVM_ASSERT(map, cond, file, line) \ 216 uvm_tree_assert((map), (cond), #cond, (file), (line)) 217 void uvm_tree_sanity(struct vm_map*, char*, int); 218 void uvm_tree_size_chk(struct vm_map*, char*, int); 219 void vmspace_validate(struct vm_map*); 220 #else 221 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 222 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 223 #define vmspace_validate(_map) do {} while (0) 224 #endif 225 226 /* 227 * All architectures will have pmap_prefer. 228 */ 229 #ifndef PMAP_PREFER 230 #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE 231 #define PMAP_PREFER_OFFSET(off) 0 232 #define PMAP_PREFER(addr, off) (addr) 233 #endif 234 235 /* 236 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 237 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 238 * 239 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 240 * each time. 241 */ 242 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 243 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 244 #define VM_MAP_KSIZE_ALLOCMUL 4 245 /* 246 * When selecting a random free-space block, look at most FSPACE_DELTA blocks 247 * ahead. 248 */ 249 #define FSPACE_DELTA 8 250 /* 251 * Put allocations adjecent to previous allocations when the free-space tree 252 * is larger than FSPACE_COMPACT entries. 253 * 254 * Alignment and PMAP_PREFER may still cause the entry to not be fully 255 * adjecent. Note that this strategy reduces memory fragmentation (by leaving 256 * a large space before or after the allocation). 257 */ 258 #define FSPACE_COMPACT 128 259 /* 260 * Make the address selection skip at most this many bytes from the start of 261 * the free space in which the allocation takes place. 262 * 263 * The main idea behind a randomized address space is that an attacker cannot 264 * know where to target his attack. Therefore, the location of objects must be 265 * as random as possible. However, the goal is not to create the most sparse 266 * map that is possible. 267 * FSPACE_MAXOFF pushes the considered range in bytes down to less insane 268 * sizes, thereby reducing the sparseness. The biggest randomization comes 269 * from fragmentation, i.e. FSPACE_COMPACT. 270 */ 271 #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) 272 /* 273 * Allow for small gaps in the overflow areas. 274 * Gap size is in bytes and does not have to be a multiple of page-size. 275 */ 276 #define FSPACE_BIASGAP ((vaddr_t)32 * 1024) 277 278 /* auto-allocate address lower bound */ 279 #define VMMAP_MIN_ADDR PAGE_SIZE 280 281 282 #ifdef DEADBEEF0 283 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0) 284 #else 285 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0) 286 #endif 287 288 #ifdef DEBUG 289 int uvm_map_printlocks = 0; 290 291 #define LPRINTF(_args) \ 292 do { \ 293 if (uvm_map_printlocks) \ 294 printf _args; \ 295 } while (0) 296 #else 297 #define LPRINTF(_args) do {} while (0) 298 #endif 299 300 static struct mutex uvm_kmapent_mtx; 301 static struct timeval uvm_kmapent_last_warn_time; 302 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 303 304 const char vmmapbsy[] = "vmmapbsy"; 305 306 /* 307 * pool for vmspace structures. 308 */ 309 struct pool uvm_vmspace_pool; 310 311 /* 312 * pool for dynamically-allocated map entries. 313 */ 314 struct pool uvm_map_entry_pool; 315 struct pool uvm_map_entry_kmem_pool; 316 317 /* 318 * This global represents the end of the kernel virtual address 319 * space. If we want to exceed this, we must grow the kernel 320 * virtual address space dynamically. 321 * 322 * Note, this variable is locked by kernel_map's lock. 323 */ 324 vaddr_t uvm_maxkaddr; 325 326 /* 327 * Locking predicate. 328 */ 329 #define UVM_MAP_REQ_WRITE(_map) \ 330 do { \ 331 if ((_map)->ref_count > 0) { \ 332 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 333 rw_assert_wrlock(&(_map)->lock); \ 334 else \ 335 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \ 336 } \ 337 } while (0) 338 339 #define vm_map_modflags(map, set, clear) \ 340 do { \ 341 mtx_enter(&(map)->flags_lock); \ 342 (map)->flags = ((map)->flags | (set)) & ~(clear); \ 343 mtx_leave(&(map)->flags_lock); \ 344 } while (0) 345 346 347 /* 348 * Tree describing entries by address. 349 * 350 * Addresses are unique. 351 * Entries with start == end may only exist if they are the first entry 352 * (sorted by address) within a free-memory tree. 353 */ 354 355 static inline int 356 uvm_mapentry_addrcmp(const struct vm_map_entry *e1, 357 const struct vm_map_entry *e2) 358 { 359 return e1->start < e2->start ? -1 : e1->start > e2->start; 360 } 361 362 /* 363 * Copy mapentry. 364 */ 365 static inline void 366 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 367 { 368 caddr_t csrc, cdst; 369 size_t sz; 370 371 csrc = (caddr_t)src; 372 cdst = (caddr_t)dst; 373 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 374 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 375 376 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 377 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 378 memcpy(cdst, csrc, sz); 379 } 380 381 /* 382 * Handle free-list insertion. 383 */ 384 void 385 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 386 struct vm_map_entry *entry) 387 { 388 const struct uvm_addr_functions *fun; 389 #ifdef VMMAP_DEBUG 390 vaddr_t min, max, bound; 391 #endif 392 393 #ifdef VMMAP_DEBUG 394 /* 395 * Boundary check. 396 * Boundaries are folded if they go on the same free list. 397 */ 398 min = VMMAP_FREE_START(entry); 399 max = VMMAP_FREE_END(entry); 400 401 while (min < max) { 402 bound = uvm_map_boundary(map, min, max); 403 KASSERT(uvm_map_uaddr(map, min) == uaddr); 404 min = bound; 405 } 406 #endif 407 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 408 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 409 410 UVM_MAP_REQ_WRITE(map); 411 412 /* Actual insert: forward to uaddr pointer. */ 413 if (uaddr != NULL) { 414 fun = uaddr->uaddr_functions; 415 KDASSERT(fun != NULL); 416 if (fun->uaddr_free_insert != NULL) 417 (*fun->uaddr_free_insert)(map, uaddr, entry); 418 entry->etype |= UVM_ET_FREEMAPPED; 419 } 420 421 /* Update fspace augmentation. */ 422 uvm_map_addr_augment(entry); 423 } 424 425 /* 426 * Handle free-list removal. 427 */ 428 void 429 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 430 struct vm_map_entry *entry) 431 { 432 const struct uvm_addr_functions *fun; 433 434 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 435 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 436 UVM_MAP_REQ_WRITE(map); 437 438 if (uaddr != NULL) { 439 fun = uaddr->uaddr_functions; 440 if (fun->uaddr_free_remove != NULL) 441 (*fun->uaddr_free_remove)(map, uaddr, entry); 442 entry->etype &= ~UVM_ET_FREEMAPPED; 443 } 444 } 445 446 /* 447 * Handle address tree insertion. 448 */ 449 void 450 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 451 { 452 struct vm_map_entry *res; 453 454 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF)) 455 panic("uvm_mapent_addr_insert: entry still in addr list"); 456 KDASSERT(entry->start <= entry->end); 457 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 458 (entry->end & (vaddr_t)PAGE_MASK) == 0); 459 460 TRACEPOINT(uvm, map_insert, 461 entry->start, entry->end, entry->protection, NULL); 462 463 UVM_MAP_REQ_WRITE(map); 464 res = RBT_INSERT(uvm_map_addr, &map->addr, entry); 465 if (res != NULL) { 466 panic("uvm_mapent_addr_insert: map %p entry %p " 467 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 468 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 469 map, entry, 470 entry->start, entry->end, entry->guard, entry->fspace, 471 res, res->start, res->end, res->guard, res->fspace); 472 } 473 } 474 475 /* 476 * Handle address tree removal. 477 */ 478 void 479 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 480 { 481 struct vm_map_entry *res; 482 483 TRACEPOINT(uvm, map_remove, 484 entry->start, entry->end, entry->protection, NULL); 485 486 UVM_MAP_REQ_WRITE(map); 487 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry); 488 if (res != entry) 489 panic("uvm_mapent_addr_remove"); 490 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF); 491 } 492 493 /* 494 * uvm_map_reference: add reference to a map 495 * 496 * => map need not be locked 497 */ 498 void 499 uvm_map_reference(struct vm_map *map) 500 { 501 atomic_inc_int(&map->ref_count); 502 } 503 504 void 505 uvm_map_lock_entry(struct vm_map_entry *entry) 506 { 507 if (entry->aref.ar_amap != NULL) { 508 amap_lock(entry->aref.ar_amap); 509 } 510 if (UVM_ET_ISOBJ(entry)) { 511 rw_enter(entry->object.uvm_obj->vmobjlock, RW_WRITE); 512 } 513 } 514 515 void 516 uvm_map_unlock_entry(struct vm_map_entry *entry) 517 { 518 if (UVM_ET_ISOBJ(entry)) { 519 rw_exit(entry->object.uvm_obj->vmobjlock); 520 } 521 if (entry->aref.ar_amap != NULL) { 522 amap_unlock(entry->aref.ar_amap); 523 } 524 } 525 526 /* 527 * Calculate the dused delta. 528 */ 529 vsize_t 530 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 531 { 532 struct vmspace *vm; 533 vsize_t sz; 534 vaddr_t lmax; 535 vaddr_t stack_begin, stack_end; /* Position of stack. */ 536 537 KASSERT(map->flags & VM_MAP_ISVMSPACE); 538 vm = (struct vmspace *)map; 539 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 540 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 541 542 sz = 0; 543 while (min != max) { 544 lmax = max; 545 if (min < stack_begin && lmax > stack_begin) 546 lmax = stack_begin; 547 else if (min < stack_end && lmax > stack_end) 548 lmax = stack_end; 549 550 if (min >= stack_begin && min < stack_end) { 551 /* nothing */ 552 } else 553 sz += lmax - min; 554 min = lmax; 555 } 556 557 return sz >> PAGE_SHIFT; 558 } 559 560 /* 561 * Find the entry describing the given address. 562 */ 563 struct vm_map_entry* 564 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 565 { 566 struct vm_map_entry *iter; 567 568 iter = RBT_ROOT(uvm_map_addr, atree); 569 while (iter != NULL) { 570 if (iter->start > addr) 571 iter = RBT_LEFT(uvm_map_addr, iter); 572 else if (VMMAP_FREE_END(iter) <= addr) 573 iter = RBT_RIGHT(uvm_map_addr, iter); 574 else 575 return iter; 576 } 577 return NULL; 578 } 579 580 /* 581 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 582 * 583 * Push dead entries into a linked list. 584 * Since the linked list abuses the address tree for storage, the entry 585 * may not be linked in a map. 586 * 587 * *head must be initialized to NULL before the first call to this macro. 588 * uvm_unmap_detach(*head, 0) will remove dead entries. 589 */ 590 static inline void 591 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 592 { 593 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 594 } 595 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 596 dead_entry_push((_headptr), (_entry)) 597 598 /* 599 * Helper function for uvm_map_findspace_tree. 600 * 601 * Given allocation constraints and pmap constraints, finds the 602 * lowest and highest address in a range that can be used for the 603 * allocation. 604 * 605 * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. 606 * 607 * 608 * Big chunk of math with a seasoning of dragons. 609 */ 610 int 611 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, 612 struct vm_map_entry *sel, vaddr_t align, 613 vaddr_t pmap_align, vaddr_t pmap_off, int bias) 614 { 615 vaddr_t sel_min, sel_max; 616 #ifdef PMAP_PREFER 617 vaddr_t pmap_min, pmap_max; 618 #endif /* PMAP_PREFER */ 619 #ifdef DIAGNOSTIC 620 int bad; 621 #endif /* DIAGNOSTIC */ 622 623 sel_min = VMMAP_FREE_START(sel); 624 sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); 625 626 #ifdef PMAP_PREFER 627 628 /* 629 * There are two special cases, in which we can satisfy the align 630 * requirement and the pmap_prefer requirement. 631 * - when pmap_off == 0, we always select the largest of the two 632 * - when pmap_off % align == 0 and pmap_align > align, we simply 633 * satisfy the pmap_align requirement and automatically 634 * satisfy the align requirement. 635 */ 636 if (align > PAGE_SIZE && 637 !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { 638 /* 639 * Simple case: only use align. 640 */ 641 sel_min = roundup(sel_min, align); 642 sel_max &= ~(align - 1); 643 644 if (sel_min > sel_max) 645 return ENOMEM; 646 647 /* Correct for bias. */ 648 if (sel_max - sel_min > FSPACE_BIASGAP) { 649 if (bias > 0) { 650 sel_min = sel_max - FSPACE_BIASGAP; 651 sel_min = roundup(sel_min, align); 652 } else if (bias < 0) { 653 sel_max = sel_min + FSPACE_BIASGAP; 654 sel_max &= ~(align - 1); 655 } 656 } 657 } else if (pmap_align != 0) { 658 /* 659 * Special case: satisfy both pmap_prefer and 660 * align argument. 661 */ 662 pmap_max = sel_max & ~(pmap_align - 1); 663 pmap_min = sel_min; 664 if (pmap_max < sel_min) 665 return ENOMEM; 666 667 /* Adjust pmap_min for BIASGAP for top-addr bias. */ 668 if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) 669 pmap_min = pmap_max - FSPACE_BIASGAP; 670 /* Align pmap_min. */ 671 pmap_min &= ~(pmap_align - 1); 672 if (pmap_min < sel_min) 673 pmap_min += pmap_align; 674 if (pmap_min > pmap_max) 675 return ENOMEM; 676 677 /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ 678 if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { 679 pmap_max = (pmap_min + FSPACE_BIASGAP) & 680 ~(pmap_align - 1); 681 } 682 if (pmap_min > pmap_max) 683 return ENOMEM; 684 685 /* Apply pmap prefer offset. */ 686 pmap_max |= pmap_off; 687 if (pmap_max > sel_max) 688 pmap_max -= pmap_align; 689 pmap_min |= pmap_off; 690 if (pmap_min < sel_min) 691 pmap_min += pmap_align; 692 693 /* 694 * Fixup: it's possible that pmap_min and pmap_max 695 * cross each other. In this case, try to find one 696 * address that is allowed. 697 * (This usually happens in biased case.) 698 */ 699 if (pmap_min > pmap_max) { 700 if (pmap_min < sel_max) 701 pmap_max = pmap_min; 702 else if (pmap_max > sel_min) 703 pmap_min = pmap_max; 704 else 705 return ENOMEM; 706 } 707 708 /* Internal validation. */ 709 KDASSERT(pmap_min <= pmap_max); 710 711 sel_min = pmap_min; 712 sel_max = pmap_max; 713 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 714 sel_min = sel_max - FSPACE_BIASGAP; 715 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 716 sel_max = sel_min + FSPACE_BIASGAP; 717 718 #else 719 720 if (align > PAGE_SIZE) { 721 sel_min = roundup(sel_min, align); 722 sel_max &= ~(align - 1); 723 if (sel_min > sel_max) 724 return ENOMEM; 725 726 if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { 727 if (bias > 0) { 728 sel_min = roundup(sel_max - FSPACE_BIASGAP, 729 align); 730 } else { 731 sel_max = (sel_min + FSPACE_BIASGAP) & 732 ~(align - 1); 733 } 734 } 735 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 736 sel_min = sel_max - FSPACE_BIASGAP; 737 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 738 sel_max = sel_min + FSPACE_BIASGAP; 739 740 #endif 741 742 if (sel_min > sel_max) 743 return ENOMEM; 744 745 #ifdef DIAGNOSTIC 746 bad = 0; 747 /* Lower boundary check. */ 748 if (sel_min < VMMAP_FREE_START(sel)) { 749 printf("sel_min: 0x%lx, but should be at least 0x%lx\n", 750 sel_min, VMMAP_FREE_START(sel)); 751 bad++; 752 } 753 /* Upper boundary check. */ 754 if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { 755 printf("sel_max: 0x%lx, but should be at most 0x%lx\n", 756 sel_max, 757 VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); 758 bad++; 759 } 760 /* Lower boundary alignment. */ 761 if (align != 0 && (sel_min & (align - 1)) != 0) { 762 printf("sel_min: 0x%lx, not aligned to 0x%lx\n", 763 sel_min, align); 764 bad++; 765 } 766 /* Upper boundary alignment. */ 767 if (align != 0 && (sel_max & (align - 1)) != 0) { 768 printf("sel_max: 0x%lx, not aligned to 0x%lx\n", 769 sel_max, align); 770 bad++; 771 } 772 /* Lower boundary PMAP_PREFER check. */ 773 if (pmap_align != 0 && align == 0 && 774 (sel_min & (pmap_align - 1)) != pmap_off) { 775 printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 776 sel_min, sel_min & (pmap_align - 1), pmap_off); 777 bad++; 778 } 779 /* Upper boundary PMAP_PREFER check. */ 780 if (pmap_align != 0 && align == 0 && 781 (sel_max & (pmap_align - 1)) != pmap_off) { 782 printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 783 sel_max, sel_max & (pmap_align - 1), pmap_off); 784 bad++; 785 } 786 787 if (bad) { 788 panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " 789 "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " 790 "bias = %d, " 791 "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", 792 sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, 793 bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel)); 794 } 795 #endif /* DIAGNOSTIC */ 796 797 *min = sel_min; 798 *max = sel_max; 799 return 0; 800 } 801 802 /* 803 * Test if memory starting at addr with sz bytes is free. 804 * 805 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 806 * the space. 807 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 808 */ 809 int 810 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 811 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 812 vaddr_t addr, vsize_t sz) 813 { 814 struct uvm_addr_state *free; 815 struct uvm_map_addr *atree; 816 struct vm_map_entry *i, *i_end; 817 818 if (addr + sz < addr) 819 return 0; 820 821 /* 822 * Kernel memory above uvm_maxkaddr is considered unavailable. 823 */ 824 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 825 if (addr + sz > uvm_maxkaddr) 826 return 0; 827 } 828 829 atree = &map->addr; 830 831 /* 832 * Fill in first, last, so they point at the entries containing the 833 * first and last address of the range. 834 * Note that if they are not NULL, we don't perform the lookup. 835 */ 836 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 837 if (*start_ptr == NULL) { 838 *start_ptr = uvm_map_entrybyaddr(atree, addr); 839 if (*start_ptr == NULL) 840 return 0; 841 } else 842 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 843 if (*end_ptr == NULL) { 844 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 845 *end_ptr = *start_ptr; 846 else { 847 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 848 if (*end_ptr == NULL) 849 return 0; 850 } 851 } else 852 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 853 854 /* Validation. */ 855 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 856 KDASSERT((*start_ptr)->start <= addr && 857 VMMAP_FREE_END(*start_ptr) > addr && 858 (*end_ptr)->start < addr + sz && 859 VMMAP_FREE_END(*end_ptr) >= addr + sz); 860 861 /* 862 * Check the none of the entries intersects with <addr, addr+sz>. 863 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 864 * considered unavailable unless called by those allocators. 865 */ 866 i = *start_ptr; 867 i_end = RBT_NEXT(uvm_map_addr, *end_ptr); 868 for (; i != i_end; 869 i = RBT_NEXT(uvm_map_addr, i)) { 870 if (i->start != i->end && i->end > addr) 871 return 0; 872 873 /* 874 * uaddr_exe and uaddr_brk_stack may only be used 875 * by these allocators and the NULL uaddr (i.e. no 876 * uaddr). 877 * Reject if this requirement is not met. 878 */ 879 if (uaddr != NULL) { 880 free = uvm_map_uaddr_e(map, i); 881 882 if (uaddr != free && free != NULL && 883 (free == map->uaddr_exe || 884 free == map->uaddr_brk_stack)) 885 return 0; 886 } 887 } 888 889 return -1; 890 } 891 892 /* 893 * Invoke each address selector until an address is found. 894 * Will not invoke uaddr_exe. 895 */ 896 int 897 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 898 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 899 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 900 { 901 struct uvm_addr_state *uaddr; 902 int i; 903 904 /* 905 * Allocation for sz bytes at any address, 906 * using the addr selectors in order. 907 */ 908 for (i = 0; i < nitems(map->uaddr_any); i++) { 909 uaddr = map->uaddr_any[i]; 910 911 if (uvm_addr_invoke(map, uaddr, first, last, 912 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 913 return 0; 914 } 915 916 /* Fall back to brk() and stack() address selectors. */ 917 uaddr = map->uaddr_brk_stack; 918 if (uvm_addr_invoke(map, uaddr, first, last, 919 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 920 return 0; 921 922 return ENOMEM; 923 } 924 925 /* Calculate entry augmentation value. */ 926 vsize_t 927 uvm_map_addr_augment_get(struct vm_map_entry *entry) 928 { 929 vsize_t augment; 930 struct vm_map_entry *left, *right; 931 932 augment = entry->fspace; 933 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL) 934 augment = MAX(augment, left->fspace_augment); 935 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 936 augment = MAX(augment, right->fspace_augment); 937 return augment; 938 } 939 940 /* 941 * Update augmentation data in entry. 942 */ 943 void 944 uvm_map_addr_augment(struct vm_map_entry *entry) 945 { 946 vsize_t augment; 947 948 while (entry != NULL) { 949 /* Calculate value for augmentation. */ 950 augment = uvm_map_addr_augment_get(entry); 951 952 /* 953 * Descend update. 954 * Once we find an entry that already has the correct value, 955 * stop, since it means all its parents will use the correct 956 * value too. 957 */ 958 if (entry->fspace_augment == augment) 959 return; 960 entry->fspace_augment = augment; 961 entry = RBT_PARENT(uvm_map_addr, entry); 962 } 963 } 964 965 /* 966 * uvm_mapanon: establish a valid mapping in map for an anon 967 * 968 * => *addr and sz must be a multiple of PAGE_SIZE. 969 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 970 * => map must be unlocked. 971 * 972 * => align: align vaddr, must be a power-of-2. 973 * Align is only a hint and will be ignored if the alignment fails. 974 */ 975 int 976 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, 977 vsize_t align, unsigned int flags) 978 { 979 struct vm_map_entry *first, *last, *entry, *new; 980 struct uvm_map_deadq dead; 981 vm_prot_t prot; 982 vm_prot_t maxprot; 983 vm_inherit_t inherit; 984 int advice; 985 int error; 986 vaddr_t pmap_align, pmap_offset; 987 vaddr_t hint; 988 989 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE); 990 KASSERT(map != kernel_map); 991 KASSERT((map->flags & UVM_FLAG_HOLE) == 0); 992 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 993 splassert(IPL_NONE); 994 KASSERT((flags & UVM_FLAG_TRYLOCK) == 0); 995 996 /* 997 * We use pmap_align and pmap_offset as alignment and offset variables. 998 * 999 * Because the align parameter takes precedence over pmap prefer, 1000 * the pmap_align will need to be set to align, with pmap_offset = 0, 1001 * if pmap_prefer will not align. 1002 */ 1003 pmap_align = MAX(align, PAGE_SIZE); 1004 pmap_offset = 0; 1005 1006 /* Decode parameters. */ 1007 prot = UVM_PROTECTION(flags); 1008 maxprot = UVM_MAXPROTECTION(flags); 1009 advice = UVM_ADVICE(flags); 1010 inherit = UVM_INHERIT(flags); 1011 error = 0; 1012 hint = trunc_page(*addr); 1013 TAILQ_INIT(&dead); 1014 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1015 KASSERT((align & (align - 1)) == 0); 1016 1017 /* Check protection. */ 1018 if ((prot & maxprot) != prot) 1019 return EACCES; 1020 1021 /* 1022 * Before grabbing the lock, allocate a map entry for later 1023 * use to ensure we don't wait for memory while holding the 1024 * vm_map_lock. 1025 */ 1026 new = uvm_mapent_alloc(map, flags); 1027 if (new == NULL) 1028 return ENOMEM; 1029 1030 vm_map_lock(map); 1031 first = last = NULL; 1032 if (flags & UVM_FLAG_FIXED) { 1033 /* 1034 * Fixed location. 1035 * 1036 * Note: we ignore align, pmap_prefer. 1037 * Fill in first, last and *addr. 1038 */ 1039 KASSERT((*addr & PAGE_MASK) == 0); 1040 1041 /* Check that the space is available. */ 1042 if (flags & UVM_FLAG_UNMAP) { 1043 if ((flags & UVM_FLAG_STACK) && 1044 !uvm_map_is_stack_remappable(map, *addr, sz)) { 1045 error = EINVAL; 1046 goto unlock; 1047 } 1048 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1049 } 1050 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1051 error = ENOMEM; 1052 goto unlock; 1053 } 1054 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1055 (align == 0 || (*addr & (align - 1)) == 0) && 1056 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1057 /* 1058 * Address used as hint. 1059 * 1060 * Note: we enforce the alignment restriction, 1061 * but ignore pmap_prefer. 1062 */ 1063 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1064 /* Run selection algorithm for executables. */ 1065 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1066 addr, sz, pmap_align, pmap_offset, prot, hint); 1067 1068 if (error != 0) 1069 goto unlock; 1070 } else { 1071 /* Update freelists from vmspace. */ 1072 uvm_map_vmspace_update(map, &dead, flags); 1073 1074 error = uvm_map_findspace(map, &first, &last, addr, sz, 1075 pmap_align, pmap_offset, prot, hint); 1076 1077 if (error != 0) 1078 goto unlock; 1079 } 1080 1081 /* Double-check if selected address doesn't cause overflow. */ 1082 if (*addr + sz < *addr) { 1083 error = ENOMEM; 1084 goto unlock; 1085 } 1086 1087 /* If we only want a query, return now. */ 1088 if (flags & UVM_FLAG_QUERY) { 1089 error = 0; 1090 goto unlock; 1091 } 1092 1093 /* 1094 * Create new entry. 1095 * first and last may be invalidated after this call. 1096 */ 1097 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1098 new); 1099 if (entry == NULL) { 1100 error = ENOMEM; 1101 goto unlock; 1102 } 1103 new = NULL; 1104 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1105 entry->object.uvm_obj = NULL; 1106 entry->offset = 0; 1107 entry->protection = prot; 1108 entry->max_protection = maxprot; 1109 entry->inheritance = inherit; 1110 entry->wired_count = 0; 1111 entry->advice = advice; 1112 if (prot & PROT_WRITE) 1113 map->wserial++; 1114 if (flags & UVM_FLAG_SYSCALL) { 1115 entry->etype |= UVM_ET_SYSCALL; 1116 map->wserial++; 1117 } 1118 if (flags & UVM_FLAG_STACK) { 1119 entry->etype |= UVM_ET_STACK; 1120 if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP)) 1121 map->sserial++; 1122 } 1123 if (flags & UVM_FLAG_COPYONW) { 1124 entry->etype |= UVM_ET_COPYONWRITE; 1125 if ((flags & UVM_FLAG_OVERLAY) == 0) 1126 entry->etype |= UVM_ET_NEEDSCOPY; 1127 } 1128 if (flags & UVM_FLAG_CONCEAL) 1129 entry->etype |= UVM_ET_CONCEAL; 1130 if (flags & UVM_FLAG_OVERLAY) { 1131 entry->aref.ar_pageoff = 0; 1132 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1133 } 1134 1135 /* Update map and process statistics. */ 1136 map->size += sz; 1137 if (prot != PROT_NONE) { 1138 ((struct vmspace *)map)->vm_dused += 1139 uvmspace_dused(map, *addr, *addr + sz); 1140 } 1141 1142 unlock: 1143 vm_map_unlock(map); 1144 1145 /* 1146 * Remove dead entries. 1147 * 1148 * Dead entries may be the result of merging. 1149 * uvm_map_mkentry may also create dead entries, when it attempts to 1150 * destroy free-space entries. 1151 */ 1152 uvm_unmap_detach(&dead, 0); 1153 1154 if (new) 1155 uvm_mapent_free(new); 1156 return error; 1157 } 1158 1159 /* 1160 * uvm_map: establish a valid mapping in map 1161 * 1162 * => *addr and sz must be a multiple of PAGE_SIZE. 1163 * => map must be unlocked. 1164 * => <uobj,uoffset> value meanings (4 cases): 1165 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1166 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1167 * [3] <uobj,uoffset> == normal mapping 1168 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1169 * 1170 * case [4] is for kernel mappings where we don't know the offset until 1171 * we've found a virtual address. note that kernel object offsets are 1172 * always relative to vm_map_min(kernel_map). 1173 * 1174 * => align: align vaddr, must be a power-of-2. 1175 * Align is only a hint and will be ignored if the alignment fails. 1176 */ 1177 int 1178 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 1179 struct uvm_object *uobj, voff_t uoffset, 1180 vsize_t align, unsigned int flags) 1181 { 1182 struct vm_map_entry *first, *last, *entry, *new; 1183 struct uvm_map_deadq dead; 1184 vm_prot_t prot; 1185 vm_prot_t maxprot; 1186 vm_inherit_t inherit; 1187 int advice; 1188 int error; 1189 vaddr_t pmap_align, pmap_offset; 1190 vaddr_t hint; 1191 1192 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1193 splassert(IPL_NONE); 1194 else 1195 splassert(IPL_VM); 1196 1197 /* 1198 * We use pmap_align and pmap_offset as alignment and offset variables. 1199 * 1200 * Because the align parameter takes precedence over pmap prefer, 1201 * the pmap_align will need to be set to align, with pmap_offset = 0, 1202 * if pmap_prefer will not align. 1203 */ 1204 if (uoffset == UVM_UNKNOWN_OFFSET) { 1205 pmap_align = MAX(align, PAGE_SIZE); 1206 pmap_offset = 0; 1207 } else { 1208 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 1209 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 1210 1211 if (align == 0 || 1212 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 1213 /* pmap_offset satisfies align, no change. */ 1214 } else { 1215 /* Align takes precedence over pmap prefer. */ 1216 pmap_align = align; 1217 pmap_offset = 0; 1218 } 1219 } 1220 1221 /* Decode parameters. */ 1222 prot = UVM_PROTECTION(flags); 1223 maxprot = UVM_MAXPROTECTION(flags); 1224 advice = UVM_ADVICE(flags); 1225 inherit = UVM_INHERIT(flags); 1226 error = 0; 1227 hint = trunc_page(*addr); 1228 TAILQ_INIT(&dead); 1229 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1230 KASSERT((align & (align - 1)) == 0); 1231 1232 /* Holes are incompatible with other types of mappings. */ 1233 if (flags & UVM_FLAG_HOLE) { 1234 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 1235 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 1236 } 1237 1238 /* Unset hint for kernel_map non-fixed allocations. */ 1239 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1240 hint = 0; 1241 1242 /* Check protection. */ 1243 if ((prot & maxprot) != prot) 1244 return EACCES; 1245 1246 if (map == kernel_map && 1247 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1248 panic("uvm_map: kernel map W^X violation requested"); 1249 1250 /* 1251 * Before grabbing the lock, allocate a map entry for later 1252 * use to ensure we don't wait for memory while holding the 1253 * vm_map_lock. 1254 */ 1255 new = uvm_mapent_alloc(map, flags); 1256 if (new == NULL) 1257 return ENOMEM; 1258 1259 if (flags & UVM_FLAG_TRYLOCK) { 1260 if (vm_map_lock_try(map) == FALSE) { 1261 error = EFAULT; 1262 goto out; 1263 } 1264 } else { 1265 vm_map_lock(map); 1266 } 1267 1268 first = last = NULL; 1269 if (flags & UVM_FLAG_FIXED) { 1270 /* 1271 * Fixed location. 1272 * 1273 * Note: we ignore align, pmap_prefer. 1274 * Fill in first, last and *addr. 1275 */ 1276 KASSERT((*addr & PAGE_MASK) == 0); 1277 1278 /* 1279 * Grow pmap to include allocated address. 1280 * If the growth fails, the allocation will fail too. 1281 */ 1282 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1283 uvm_maxkaddr < (*addr + sz)) { 1284 uvm_map_kmem_grow(map, &dead, 1285 *addr + sz - uvm_maxkaddr, flags); 1286 } 1287 1288 /* Check that the space is available. */ 1289 if (flags & UVM_FLAG_UNMAP) 1290 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1291 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1292 error = ENOMEM; 1293 goto unlock; 1294 } 1295 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1296 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1297 (align == 0 || (*addr & (align - 1)) == 0) && 1298 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1299 /* 1300 * Address used as hint. 1301 * 1302 * Note: we enforce the alignment restriction, 1303 * but ignore pmap_prefer. 1304 */ 1305 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1306 /* Run selection algorithm for executables. */ 1307 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1308 addr, sz, pmap_align, pmap_offset, prot, hint); 1309 1310 /* Grow kernel memory and try again. */ 1311 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1312 uvm_map_kmem_grow(map, &dead, sz, flags); 1313 1314 error = uvm_addr_invoke(map, map->uaddr_exe, 1315 &first, &last, addr, sz, 1316 pmap_align, pmap_offset, prot, hint); 1317 } 1318 1319 if (error != 0) 1320 goto unlock; 1321 } else { 1322 /* Update freelists from vmspace. */ 1323 if (map->flags & VM_MAP_ISVMSPACE) 1324 uvm_map_vmspace_update(map, &dead, flags); 1325 1326 error = uvm_map_findspace(map, &first, &last, addr, sz, 1327 pmap_align, pmap_offset, prot, hint); 1328 1329 /* Grow kernel memory and try again. */ 1330 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1331 uvm_map_kmem_grow(map, &dead, sz, flags); 1332 1333 error = uvm_map_findspace(map, &first, &last, addr, sz, 1334 pmap_align, pmap_offset, prot, hint); 1335 } 1336 1337 if (error != 0) 1338 goto unlock; 1339 } 1340 1341 /* Double-check if selected address doesn't cause overflow. */ 1342 if (*addr + sz < *addr) { 1343 error = ENOMEM; 1344 goto unlock; 1345 } 1346 1347 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1348 uvm_maxkaddr >= *addr + sz); 1349 1350 /* If we only want a query, return now. */ 1351 if (flags & UVM_FLAG_QUERY) { 1352 error = 0; 1353 goto unlock; 1354 } 1355 1356 if (uobj == NULL) 1357 uoffset = 0; 1358 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1359 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1360 uoffset = *addr - vm_map_min(kernel_map); 1361 } 1362 1363 /* 1364 * Create new entry. 1365 * first and last may be invalidated after this call. 1366 */ 1367 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1368 new); 1369 if (entry == NULL) { 1370 error = ENOMEM; 1371 goto unlock; 1372 } 1373 new = NULL; 1374 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1375 entry->object.uvm_obj = uobj; 1376 entry->offset = uoffset; 1377 entry->protection = prot; 1378 entry->max_protection = maxprot; 1379 entry->inheritance = inherit; 1380 entry->wired_count = 0; 1381 entry->advice = advice; 1382 if (prot & PROT_WRITE) 1383 map->wserial++; 1384 if (flags & UVM_FLAG_SYSCALL) { 1385 entry->etype |= UVM_ET_SYSCALL; 1386 map->wserial++; 1387 } 1388 if (flags & UVM_FLAG_STACK) { 1389 entry->etype |= UVM_ET_STACK; 1390 if (flags & UVM_FLAG_UNMAP) 1391 map->sserial++; 1392 } 1393 if (uobj) 1394 entry->etype |= UVM_ET_OBJ; 1395 else if (flags & UVM_FLAG_HOLE) 1396 entry->etype |= UVM_ET_HOLE; 1397 if (flags & UVM_FLAG_NOFAULT) 1398 entry->etype |= UVM_ET_NOFAULT; 1399 if (flags & UVM_FLAG_WC) 1400 entry->etype |= UVM_ET_WC; 1401 if (flags & UVM_FLAG_COPYONW) { 1402 entry->etype |= UVM_ET_COPYONWRITE; 1403 if ((flags & UVM_FLAG_OVERLAY) == 0) 1404 entry->etype |= UVM_ET_NEEDSCOPY; 1405 } 1406 if (flags & UVM_FLAG_CONCEAL) 1407 entry->etype |= UVM_ET_CONCEAL; 1408 if (flags & UVM_FLAG_OVERLAY) { 1409 entry->aref.ar_pageoff = 0; 1410 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1411 } 1412 1413 /* Update map and process statistics. */ 1414 if (!(flags & UVM_FLAG_HOLE)) { 1415 map->size += sz; 1416 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL && 1417 prot != PROT_NONE) { 1418 ((struct vmspace *)map)->vm_dused += 1419 uvmspace_dused(map, *addr, *addr + sz); 1420 } 1421 } 1422 1423 /* 1424 * Try to merge entry. 1425 * 1426 * Userland allocations are kept separated most of the time. 1427 * Forego the effort of merging what most of the time can't be merged 1428 * and only try the merge if it concerns a kernel entry. 1429 */ 1430 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1431 (map->flags & VM_MAP_ISVMSPACE) == 0) 1432 uvm_mapent_tryjoin(map, entry, &dead); 1433 1434 unlock: 1435 vm_map_unlock(map); 1436 1437 /* 1438 * Remove dead entries. 1439 * 1440 * Dead entries may be the result of merging. 1441 * uvm_map_mkentry may also create dead entries, when it attempts to 1442 * destroy free-space entries. 1443 */ 1444 if (map->flags & VM_MAP_INTRSAFE) 1445 uvm_unmap_detach_intrsafe(&dead); 1446 else 1447 uvm_unmap_detach(&dead, 0); 1448 out: 1449 if (new) 1450 uvm_mapent_free(new); 1451 return error; 1452 } 1453 1454 /* 1455 * True iff e1 and e2 can be joined together. 1456 */ 1457 int 1458 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1459 struct vm_map_entry *e2) 1460 { 1461 KDASSERT(e1 != NULL && e2 != NULL); 1462 1463 /* Must be the same entry type and not have free memory between. */ 1464 if (e1->etype != e2->etype || e1->end != e2->start) 1465 return 0; 1466 1467 /* Submaps are never joined. */ 1468 if (UVM_ET_ISSUBMAP(e1)) 1469 return 0; 1470 1471 /* Never merge wired memory. */ 1472 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1473 return 0; 1474 1475 /* Protection, inheritance and advice must be equal. */ 1476 if (e1->protection != e2->protection || 1477 e1->max_protection != e2->max_protection || 1478 e1->inheritance != e2->inheritance || 1479 e1->advice != e2->advice) 1480 return 0; 1481 1482 /* If uvm_object: object itself and offsets within object must match. */ 1483 if (UVM_ET_ISOBJ(e1)) { 1484 if (e1->object.uvm_obj != e2->object.uvm_obj) 1485 return 0; 1486 if (e1->offset + (e1->end - e1->start) != e2->offset) 1487 return 0; 1488 } 1489 1490 /* 1491 * Cannot join shared amaps. 1492 * Note: no need to lock amap to look at refs, since we don't care 1493 * about its exact value. 1494 * If it is 1 (i.e. we have the only reference) it will stay there. 1495 */ 1496 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1497 return 0; 1498 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1499 return 0; 1500 1501 /* Apparently, e1 and e2 match. */ 1502 return 1; 1503 } 1504 1505 /* 1506 * Join support function. 1507 * 1508 * Returns the merged entry on success. 1509 * Returns NULL if the merge failed. 1510 */ 1511 struct vm_map_entry* 1512 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1513 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1514 { 1515 struct uvm_addr_state *free; 1516 1517 /* 1518 * Merging is not supported for map entries that 1519 * contain an amap in e1. This should never happen 1520 * anyway, because only kernel entries are merged. 1521 * These do not contain amaps. 1522 * e2 contains no real information in its amap, 1523 * so it can be erased immediately. 1524 */ 1525 KASSERT(e1->aref.ar_amap == NULL); 1526 1527 /* 1528 * Don't drop obj reference: 1529 * uvm_unmap_detach will do this for us. 1530 */ 1531 free = uvm_map_uaddr_e(map, e1); 1532 uvm_mapent_free_remove(map, free, e1); 1533 1534 free = uvm_map_uaddr_e(map, e2); 1535 uvm_mapent_free_remove(map, free, e2); 1536 uvm_mapent_addr_remove(map, e2); 1537 e1->end = e2->end; 1538 e1->guard = e2->guard; 1539 e1->fspace = e2->fspace; 1540 uvm_mapent_free_insert(map, free, e1); 1541 1542 DEAD_ENTRY_PUSH(dead, e2); 1543 return e1; 1544 } 1545 1546 /* 1547 * Attempt forward and backward joining of entry. 1548 * 1549 * Returns entry after joins. 1550 * We are guaranteed that the amap of entry is either non-existent or 1551 * has never been used. 1552 */ 1553 struct vm_map_entry* 1554 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1555 struct uvm_map_deadq *dead) 1556 { 1557 struct vm_map_entry *other; 1558 struct vm_map_entry *merged; 1559 1560 /* Merge with previous entry. */ 1561 other = RBT_PREV(uvm_map_addr, entry); 1562 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1563 merged = uvm_mapent_merge(map, other, entry, dead); 1564 if (merged) 1565 entry = merged; 1566 } 1567 1568 /* 1569 * Merge with next entry. 1570 * 1571 * Because amap can only extend forward and the next entry 1572 * probably contains sensible info, only perform forward merging 1573 * in the absence of an amap. 1574 */ 1575 other = RBT_NEXT(uvm_map_addr, entry); 1576 if (other && entry->aref.ar_amap == NULL && 1577 other->aref.ar_amap == NULL && 1578 uvm_mapent_isjoinable(map, entry, other)) { 1579 merged = uvm_mapent_merge(map, entry, other, dead); 1580 if (merged) 1581 entry = merged; 1582 } 1583 1584 return entry; 1585 } 1586 1587 /* 1588 * Kill entries that are no longer in a map. 1589 */ 1590 void 1591 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1592 { 1593 struct vm_map_entry *entry, *tmp; 1594 int waitok = flags & UVM_PLA_WAITOK; 1595 1596 TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) { 1597 /* Drop reference to amap, if we've got one. */ 1598 if (entry->aref.ar_amap) 1599 amap_unref(entry->aref.ar_amap, 1600 entry->aref.ar_pageoff, 1601 atop(entry->end - entry->start), 1602 flags & AMAP_REFALL); 1603 1604 /* Skip entries for which we have to grab the kernel lock. */ 1605 if (UVM_ET_ISSUBMAP(entry) || UVM_ET_ISOBJ(entry)) 1606 continue; 1607 1608 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1609 uvm_mapent_free(entry); 1610 } 1611 1612 if (TAILQ_EMPTY(deadq)) 1613 return; 1614 1615 KERNEL_LOCK(); 1616 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1617 if (waitok) 1618 uvm_pause(); 1619 /* Drop reference to our backing object, if we've got one. */ 1620 if (UVM_ET_ISSUBMAP(entry)) { 1621 /* ... unlikely to happen, but play it safe */ 1622 uvm_map_deallocate(entry->object.sub_map); 1623 } else if (UVM_ET_ISOBJ(entry) && 1624 entry->object.uvm_obj->pgops->pgo_detach) { 1625 entry->object.uvm_obj->pgops->pgo_detach( 1626 entry->object.uvm_obj); 1627 } 1628 1629 /* Step to next. */ 1630 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1631 uvm_mapent_free(entry); 1632 } 1633 KERNEL_UNLOCK(); 1634 } 1635 1636 void 1637 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq) 1638 { 1639 struct vm_map_entry *entry; 1640 1641 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1642 KASSERT(entry->aref.ar_amap == NULL); 1643 KASSERT(!UVM_ET_ISSUBMAP(entry)); 1644 KASSERT(!UVM_ET_ISOBJ(entry)); 1645 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1646 uvm_mapent_free(entry); 1647 } 1648 } 1649 1650 /* 1651 * Create and insert new entry. 1652 * 1653 * Returned entry contains new addresses and is inserted properly in the tree. 1654 * first and last are (probably) no longer valid. 1655 */ 1656 struct vm_map_entry* 1657 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1658 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1659 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1660 { 1661 struct vm_map_entry *entry, *prev; 1662 struct uvm_addr_state *free; 1663 vaddr_t min, max; /* free space boundaries for new entry */ 1664 1665 KDASSERT(map != NULL); 1666 KDASSERT(first != NULL); 1667 KDASSERT(last != NULL); 1668 KDASSERT(dead != NULL); 1669 KDASSERT(sz > 0); 1670 KDASSERT(addr + sz > addr); 1671 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1672 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1673 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1674 uvm_tree_sanity(map, __FILE__, __LINE__); 1675 1676 min = addr + sz; 1677 max = VMMAP_FREE_END(last); 1678 1679 /* Initialize new entry. */ 1680 if (new == NULL) 1681 entry = uvm_mapent_alloc(map, flags); 1682 else 1683 entry = new; 1684 if (entry == NULL) 1685 return NULL; 1686 entry->offset = 0; 1687 entry->etype = 0; 1688 entry->wired_count = 0; 1689 entry->aref.ar_pageoff = 0; 1690 entry->aref.ar_amap = NULL; 1691 1692 entry->start = addr; 1693 entry->end = min; 1694 entry->guard = 0; 1695 entry->fspace = 0; 1696 1697 /* Reset free space in first. */ 1698 free = uvm_map_uaddr_e(map, first); 1699 uvm_mapent_free_remove(map, free, first); 1700 first->guard = 0; 1701 first->fspace = 0; 1702 1703 /* 1704 * Remove all entries that are fully replaced. 1705 * We are iterating using last in reverse order. 1706 */ 1707 for (; first != last; last = prev) { 1708 prev = RBT_PREV(uvm_map_addr, last); 1709 1710 KDASSERT(last->start == last->end); 1711 free = uvm_map_uaddr_e(map, last); 1712 uvm_mapent_free_remove(map, free, last); 1713 uvm_mapent_addr_remove(map, last); 1714 DEAD_ENTRY_PUSH(dead, last); 1715 } 1716 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1717 if (first->start == addr) { 1718 uvm_mapent_addr_remove(map, first); 1719 DEAD_ENTRY_PUSH(dead, first); 1720 } else { 1721 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1722 addr, flags); 1723 } 1724 1725 /* Finally, link in entry. */ 1726 uvm_mapent_addr_insert(map, entry); 1727 uvm_map_fix_space(map, entry, min, max, flags); 1728 1729 uvm_tree_sanity(map, __FILE__, __LINE__); 1730 return entry; 1731 } 1732 1733 1734 /* 1735 * uvm_mapent_alloc: allocate a map entry 1736 */ 1737 struct vm_map_entry * 1738 uvm_mapent_alloc(struct vm_map *map, int flags) 1739 { 1740 struct vm_map_entry *me, *ne; 1741 int pool_flags; 1742 int i; 1743 1744 pool_flags = PR_WAITOK; 1745 if (flags & UVM_FLAG_TRYLOCK) 1746 pool_flags = PR_NOWAIT; 1747 1748 if (map->flags & VM_MAP_INTRSAFE || cold) { 1749 mtx_enter(&uvm_kmapent_mtx); 1750 if (SLIST_EMPTY(&uvm.kentry_free)) { 1751 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1752 &kd_nowait); 1753 if (ne == NULL) 1754 panic("uvm_mapent_alloc: cannot allocate map " 1755 "entry"); 1756 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) { 1757 SLIST_INSERT_HEAD(&uvm.kentry_free, 1758 &ne[i], daddrs.addr_kentry); 1759 } 1760 if (ratecheck(&uvm_kmapent_last_warn_time, 1761 &uvm_kmapent_warn_rate)) 1762 printf("uvm_mapent_alloc: out of static " 1763 "map entries\n"); 1764 } 1765 me = SLIST_FIRST(&uvm.kentry_free); 1766 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry); 1767 uvmexp.kmapent++; 1768 mtx_leave(&uvm_kmapent_mtx); 1769 me->flags = UVM_MAP_STATIC; 1770 } else if (map == kernel_map) { 1771 splassert(IPL_NONE); 1772 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1773 if (me == NULL) 1774 goto out; 1775 me->flags = UVM_MAP_KMEM; 1776 } else { 1777 splassert(IPL_NONE); 1778 me = pool_get(&uvm_map_entry_pool, pool_flags); 1779 if (me == NULL) 1780 goto out; 1781 me->flags = 0; 1782 } 1783 1784 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF); 1785 out: 1786 return me; 1787 } 1788 1789 /* 1790 * uvm_mapent_free: free map entry 1791 * 1792 * => XXX: static pool for kernel map? 1793 */ 1794 void 1795 uvm_mapent_free(struct vm_map_entry *me) 1796 { 1797 if (me->flags & UVM_MAP_STATIC) { 1798 mtx_enter(&uvm_kmapent_mtx); 1799 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry); 1800 uvmexp.kmapent--; 1801 mtx_leave(&uvm_kmapent_mtx); 1802 } else if (me->flags & UVM_MAP_KMEM) { 1803 splassert(IPL_NONE); 1804 pool_put(&uvm_map_entry_kmem_pool, me); 1805 } else { 1806 splassert(IPL_NONE); 1807 pool_put(&uvm_map_entry_pool, me); 1808 } 1809 } 1810 1811 /* 1812 * uvm_map_lookup_entry: find map entry at or before an address. 1813 * 1814 * => map must at least be read-locked by caller 1815 * => entry is returned in "entry" 1816 * => return value is true if address is in the returned entry 1817 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1818 * returned for those mappings. 1819 */ 1820 boolean_t 1821 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1822 struct vm_map_entry **entry) 1823 { 1824 *entry = uvm_map_entrybyaddr(&map->addr, address); 1825 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1826 (*entry)->start <= address && (*entry)->end > address; 1827 } 1828 1829 /* 1830 * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet 1831 * grown -- then uvm_map_check_region_range() should not cache the entry 1832 * because growth won't be seen. 1833 */ 1834 int 1835 uvm_map_inentry_sp(vm_map_entry_t entry) 1836 { 1837 if ((entry->etype & UVM_ET_STACK) == 0) { 1838 if (entry->protection == PROT_NONE) 1839 return (-1); /* don't update range */ 1840 return (0); 1841 } 1842 return (1); 1843 } 1844 1845 /* 1846 * The system call must not come from a writeable entry, W^X is violated. 1847 * (Would be nice if we can spot aliasing, which is also kind of bad) 1848 * 1849 * The system call must come from an syscall-labeled entry (which are 1850 * the text regions of the main program, sigtramp, ld.so, or libc). 1851 */ 1852 int 1853 uvm_map_inentry_pc(vm_map_entry_t entry) 1854 { 1855 if (entry->protection & PROT_WRITE) 1856 return (0); /* not permitted */ 1857 if ((entry->etype & UVM_ET_SYSCALL) == 0) 1858 return (0); /* not permitted */ 1859 return (1); 1860 } 1861 1862 int 1863 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie) 1864 { 1865 return (serial != ie->ie_serial || ie->ie_start == 0 || 1866 addr < ie->ie_start || addr >= ie->ie_end); 1867 } 1868 1869 /* 1870 * Inside a vm_map find the reg address and verify it via function. 1871 * Remember low and high addresses of region if valid and return TRUE, 1872 * else return FALSE. 1873 */ 1874 boolean_t 1875 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1876 int (*fn)(vm_map_entry_t), u_long serial) 1877 { 1878 vm_map_t map = &p->p_vmspace->vm_map; 1879 vm_map_entry_t entry; 1880 int ret; 1881 1882 if (addr < map->min_offset || addr >= map->max_offset) 1883 return (FALSE); 1884 1885 /* lock map */ 1886 vm_map_lock_read(map); 1887 1888 /* lookup */ 1889 if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) { 1890 vm_map_unlock_read(map); 1891 return (FALSE); 1892 } 1893 1894 ret = (*fn)(entry); 1895 if (ret == 0) { 1896 vm_map_unlock_read(map); 1897 return (FALSE); 1898 } else if (ret == 1) { 1899 ie->ie_start = entry->start; 1900 ie->ie_end = entry->end; 1901 ie->ie_serial = serial; 1902 } else { 1903 /* do not update, re-check later */ 1904 } 1905 vm_map_unlock_read(map); 1906 return (TRUE); 1907 } 1908 1909 boolean_t 1910 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1911 const char *fmt, int (*fn)(vm_map_entry_t), u_long serial) 1912 { 1913 union sigval sv; 1914 boolean_t ok = TRUE; 1915 1916 if (uvm_map_inentry_recheck(serial, addr, ie)) { 1917 ok = uvm_map_inentry_fix(p, ie, addr, fn, serial); 1918 if (!ok) { 1919 KERNEL_LOCK(); 1920 printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid, 1921 addr, ie->ie_start, ie->ie_end-1); 1922 p->p_p->ps_acflag |= AMAP; 1923 sv.sival_ptr = (void *)PROC_PC(p); 1924 trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv); 1925 KERNEL_UNLOCK(); 1926 } 1927 } 1928 return (ok); 1929 } 1930 1931 /* 1932 * Check whether the given address range can be converted to a MAP_STACK 1933 * mapping. 1934 * 1935 * Must be called with map locked. 1936 */ 1937 boolean_t 1938 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz) 1939 { 1940 vaddr_t end = addr + sz; 1941 struct vm_map_entry *first, *iter, *prev = NULL; 1942 1943 if (!uvm_map_lookup_entry(map, addr, &first)) { 1944 printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n", 1945 addr, end, map); 1946 return FALSE; 1947 } 1948 1949 /* 1950 * Check that the address range exists and is contiguous. 1951 */ 1952 for (iter = first; iter != NULL && iter->start < end; 1953 prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) { 1954 /* 1955 * Make sure that we do not have holes in the range. 1956 */ 1957 #if 0 1958 if (prev != NULL) { 1959 printf("prev->start 0x%lx, prev->end 0x%lx, " 1960 "iter->start 0x%lx, iter->end 0x%lx\n", 1961 prev->start, prev->end, iter->start, iter->end); 1962 } 1963 #endif 1964 1965 if (prev != NULL && prev->end != iter->start) { 1966 printf("map stack 0x%lx-0x%lx of map %p failed: " 1967 "hole in range\n", addr, end, map); 1968 return FALSE; 1969 } 1970 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) { 1971 printf("map stack 0x%lx-0x%lx of map %p failed: " 1972 "hole in range\n", addr, end, map); 1973 return FALSE; 1974 } 1975 } 1976 1977 return TRUE; 1978 } 1979 1980 /* 1981 * Remap the middle-pages of an existing mapping as a stack range. 1982 * If there exists a previous contiguous mapping with the given range 1983 * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the 1984 * mapping is dropped, and a new anon mapping is created and marked as 1985 * a stack. 1986 * 1987 * Must be called with map unlocked. 1988 */ 1989 int 1990 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz) 1991 { 1992 vm_map_t map = &p->p_vmspace->vm_map; 1993 vaddr_t start, end; 1994 int error; 1995 int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, 1996 PROT_READ | PROT_WRITE | PROT_EXEC, 1997 MAP_INHERIT_COPY, MADV_NORMAL, 1998 UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP | 1999 UVM_FLAG_COPYONW); 2000 2001 start = round_page(addr); 2002 end = trunc_page(addr + sz); 2003 #ifdef MACHINE_STACK_GROWS_UP 2004 if (end == addr + sz) 2005 end -= PAGE_SIZE; 2006 #else 2007 if (start == addr) 2008 start += PAGE_SIZE; 2009 #endif 2010 2011 if (start < map->min_offset || end >= map->max_offset || end < start) 2012 return EINVAL; 2013 2014 error = uvm_mapanon(map, &start, end - start, 0, flags); 2015 if (error != 0) 2016 printf("map stack for pid %d failed\n", p->p_p->ps_pid); 2017 2018 return error; 2019 } 2020 2021 /* 2022 * uvm_map_pie: return a random load address for a PIE executable 2023 * properly aligned. 2024 */ 2025 #ifndef VM_PIE_MAX_ADDR 2026 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 2027 #endif 2028 2029 #ifndef VM_PIE_MIN_ADDR 2030 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 2031 #endif 2032 2033 #ifndef VM_PIE_MIN_ALIGN 2034 #define VM_PIE_MIN_ALIGN PAGE_SIZE 2035 #endif 2036 2037 vaddr_t 2038 uvm_map_pie(vaddr_t align) 2039 { 2040 vaddr_t addr, space, min; 2041 2042 align = MAX(align, VM_PIE_MIN_ALIGN); 2043 2044 /* round up to next alignment */ 2045 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 2046 2047 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 2048 return (align); 2049 2050 space = (VM_PIE_MAX_ADDR - min) / align; 2051 space = MIN(space, (u_int32_t)-1); 2052 2053 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 2054 addr += min; 2055 2056 return (addr); 2057 } 2058 2059 void 2060 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 2061 { 2062 struct uvm_map_deadq dead; 2063 2064 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 2065 (end & (vaddr_t)PAGE_MASK) == 0); 2066 TAILQ_INIT(&dead); 2067 vm_map_lock(map); 2068 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); 2069 vm_map_unlock(map); 2070 2071 if (map->flags & VM_MAP_INTRSAFE) 2072 uvm_unmap_detach_intrsafe(&dead); 2073 else 2074 uvm_unmap_detach(&dead, 0); 2075 } 2076 2077 /* 2078 * Mark entry as free. 2079 * 2080 * entry will be put on the dead list. 2081 * The free space will be merged into the previous or a new entry, 2082 * unless markfree is false. 2083 */ 2084 void 2085 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 2086 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 2087 boolean_t markfree) 2088 { 2089 struct uvm_addr_state *free; 2090 struct vm_map_entry *prev; 2091 vaddr_t addr; /* Start of freed range. */ 2092 vaddr_t end; /* End of freed range. */ 2093 2094 prev = *prev_ptr; 2095 if (prev == entry) 2096 *prev_ptr = prev = NULL; 2097 2098 if (prev == NULL || 2099 VMMAP_FREE_END(prev) != entry->start) 2100 prev = RBT_PREV(uvm_map_addr, entry); 2101 2102 /* Entry is describing only free memory and has nothing to drain into. */ 2103 if (prev == NULL && entry->start == entry->end && markfree) { 2104 *prev_ptr = entry; 2105 return; 2106 } 2107 2108 addr = entry->start; 2109 end = VMMAP_FREE_END(entry); 2110 free = uvm_map_uaddr_e(map, entry); 2111 uvm_mapent_free_remove(map, free, entry); 2112 uvm_mapent_addr_remove(map, entry); 2113 DEAD_ENTRY_PUSH(dead, entry); 2114 2115 if (markfree) { 2116 if (prev) { 2117 free = uvm_map_uaddr_e(map, prev); 2118 uvm_mapent_free_remove(map, free, prev); 2119 } 2120 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 2121 } 2122 } 2123 2124 /* 2125 * Unwire and release referenced amap and object from map entry. 2126 */ 2127 void 2128 uvm_unmap_kill_entry_withlock(struct vm_map *map, struct vm_map_entry *entry, 2129 int needlock) 2130 { 2131 /* Unwire removed map entry. */ 2132 if (VM_MAPENT_ISWIRED(entry)) { 2133 KERNEL_LOCK(); 2134 entry->wired_count = 0; 2135 uvm_fault_unwire_locked(map, entry->start, entry->end); 2136 KERNEL_UNLOCK(); 2137 } 2138 2139 if (needlock) 2140 uvm_map_lock_entry(entry); 2141 2142 /* Entry-type specific code. */ 2143 if (UVM_ET_ISHOLE(entry)) { 2144 /* Nothing to be done for holes. */ 2145 } else if (map->flags & VM_MAP_INTRSAFE) { 2146 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2147 2148 uvm_km_pgremove_intrsafe(entry->start, entry->end); 2149 } else if (UVM_ET_ISOBJ(entry) && 2150 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2151 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2152 /* 2153 * Note: kernel object mappings are currently used in 2154 * two ways: 2155 * [1] "normal" mappings of pages in the kernel object 2156 * [2] uvm_km_valloc'd allocations in which we 2157 * pmap_enter in some non-kernel-object page 2158 * (e.g. vmapbuf). 2159 * 2160 * for case [1], we need to remove the mapping from 2161 * the pmap and then remove the page from the kernel 2162 * object (because, once pages in a kernel object are 2163 * unmapped they are no longer needed, unlike, say, 2164 * a vnode where you might want the data to persist 2165 * until flushed out of a queue). 2166 * 2167 * for case [2], we need to remove the mapping from 2168 * the pmap. there shouldn't be any pages at the 2169 * specified offset in the kernel object [but it 2170 * doesn't hurt to call uvm_km_pgremove just to be 2171 * safe?] 2172 * 2173 * uvm_km_pgremove currently does the following: 2174 * for pages in the kernel object range: 2175 * - drops the swap slot 2176 * - uvm_pagefree the page 2177 * 2178 * note there is version of uvm_km_pgremove() that 2179 * is used for "intrsafe" objects. 2180 */ 2181 /* 2182 * remove mappings from pmap and drop the pages 2183 * from the object. offsets are always relative 2184 * to vm_map_min(kernel_map). 2185 */ 2186 uvm_km_pgremove(entry->object.uvm_obj, entry->start, 2187 entry->end); 2188 } else { 2189 /* remove mappings the standard way. */ 2190 pmap_remove(map->pmap, entry->start, entry->end); 2191 } 2192 2193 if (needlock) 2194 uvm_map_unlock_entry(entry); 2195 } 2196 2197 void 2198 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 2199 { 2200 uvm_unmap_kill_entry_withlock(map, entry, 0); 2201 } 2202 2203 /* 2204 * Remove all entries from start to end. 2205 * 2206 * If remove_holes, then remove ET_HOLE entries as well. 2207 * If markfree, entry will be properly marked free, otherwise, no replacement 2208 * entry will be put in the tree (corrupting the tree). 2209 */ 2210 void 2211 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2212 struct uvm_map_deadq *dead, boolean_t remove_holes, 2213 boolean_t markfree) 2214 { 2215 struct vm_map_entry *prev_hint, *next, *entry; 2216 2217 start = MAX(start, map->min_offset); 2218 end = MIN(end, map->max_offset); 2219 if (start >= end) 2220 return; 2221 2222 if ((map->flags & VM_MAP_INTRSAFE) == 0) 2223 splassert(IPL_NONE); 2224 else 2225 splassert(IPL_VM); 2226 2227 /* Find first affected entry. */ 2228 entry = uvm_map_entrybyaddr(&map->addr, start); 2229 KDASSERT(entry != NULL && entry->start <= start); 2230 if (entry->end <= start && markfree) 2231 entry = RBT_NEXT(uvm_map_addr, entry); 2232 else 2233 UVM_MAP_CLIP_START(map, entry, start); 2234 2235 /* 2236 * Iterate entries until we reach end address. 2237 * prev_hint hints where the freed space can be appended to. 2238 */ 2239 prev_hint = NULL; 2240 for (; entry != NULL && entry->start < end; entry = next) { 2241 KDASSERT(entry->start >= start); 2242 if (entry->end > end || !markfree) 2243 UVM_MAP_CLIP_END(map, entry, end); 2244 KDASSERT(entry->start >= start && entry->end <= end); 2245 next = RBT_NEXT(uvm_map_addr, entry); 2246 2247 /* Don't remove holes unless asked to do so. */ 2248 if (UVM_ET_ISHOLE(entry)) { 2249 if (!remove_holes) { 2250 prev_hint = entry; 2251 continue; 2252 } 2253 } 2254 2255 /* A stack has been removed.. */ 2256 if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE)) 2257 map->sserial++; 2258 2259 /* Kill entry. */ 2260 uvm_unmap_kill_entry_withlock(map, entry, 1); 2261 2262 /* Update space usage. */ 2263 if ((map->flags & VM_MAP_ISVMSPACE) && 2264 entry->object.uvm_obj == NULL && 2265 entry->protection != PROT_NONE && 2266 !UVM_ET_ISHOLE(entry)) { 2267 ((struct vmspace *)map)->vm_dused -= 2268 uvmspace_dused(map, entry->start, entry->end); 2269 } 2270 if (!UVM_ET_ISHOLE(entry)) 2271 map->size -= entry->end - entry->start; 2272 2273 /* Actual removal of entry. */ 2274 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 2275 } 2276 2277 pmap_update(vm_map_pmap(map)); 2278 2279 #ifdef VMMAP_DEBUG 2280 if (markfree) { 2281 for (entry = uvm_map_entrybyaddr(&map->addr, start); 2282 entry != NULL && entry->start < end; 2283 entry = RBT_NEXT(uvm_map_addr, entry)) { 2284 KDASSERT(entry->end <= start || 2285 entry->start == entry->end || 2286 UVM_ET_ISHOLE(entry)); 2287 } 2288 } else { 2289 vaddr_t a; 2290 for (a = start; a < end; a += PAGE_SIZE) 2291 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 2292 } 2293 #endif 2294 } 2295 2296 /* 2297 * Mark all entries from first until end (exclusive) as pageable. 2298 * 2299 * Lock must be exclusive on entry and will not be touched. 2300 */ 2301 void 2302 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 2303 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 2304 { 2305 struct vm_map_entry *iter; 2306 2307 for (iter = first; iter != end; 2308 iter = RBT_NEXT(uvm_map_addr, iter)) { 2309 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2310 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2311 continue; 2312 2313 iter->wired_count = 0; 2314 uvm_fault_unwire_locked(map, iter->start, iter->end); 2315 } 2316 } 2317 2318 /* 2319 * Mark all entries from first until end (exclusive) as wired. 2320 * 2321 * Lockflags determines the lock state on return from this function. 2322 * Lock must be exclusive on entry. 2323 */ 2324 int 2325 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 2326 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 2327 int lockflags) 2328 { 2329 struct vm_map_entry *iter; 2330 #ifdef DIAGNOSTIC 2331 unsigned int timestamp_save; 2332 #endif 2333 int error; 2334 2335 /* 2336 * Wire pages in two passes: 2337 * 2338 * 1: holding the write lock, we create any anonymous maps that need 2339 * to be created. then we clip each map entry to the region to 2340 * be wired and increment its wiring count. 2341 * 2342 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2343 * in the pages for any newly wired area (wired_count == 1). 2344 * 2345 * downgrading to a read lock for uvm_fault_wire avoids a possible 2346 * deadlock with another thread that may have faulted on one of 2347 * the pages to be wired (it would mark the page busy, blocking 2348 * us, then in turn block on the map lock that we hold). 2349 * because we keep the read lock on the map, the copy-on-write 2350 * status of the entries we modify here cannot change. 2351 */ 2352 for (iter = first; iter != end; 2353 iter = RBT_NEXT(uvm_map_addr, iter)) { 2354 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2355 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2356 iter->protection == PROT_NONE) 2357 continue; 2358 2359 /* 2360 * Perform actions of vm_map_lookup that need the write lock. 2361 * - create an anonymous map for copy-on-write 2362 * - anonymous map for zero-fill 2363 * Skip submaps. 2364 */ 2365 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 2366 UVM_ET_ISNEEDSCOPY(iter) && 2367 ((iter->protection & PROT_WRITE) || 2368 iter->object.uvm_obj == NULL)) { 2369 amap_copy(map, iter, M_WAITOK, 2370 UVM_ET_ISSTACK(iter) ? FALSE : TRUE, 2371 iter->start, iter->end); 2372 } 2373 iter->wired_count++; 2374 } 2375 2376 /* 2377 * Pass 2. 2378 */ 2379 #ifdef DIAGNOSTIC 2380 timestamp_save = map->timestamp; 2381 #endif 2382 vm_map_busy(map); 2383 vm_map_downgrade(map); 2384 2385 error = 0; 2386 for (iter = first; error == 0 && iter != end; 2387 iter = RBT_NEXT(uvm_map_addr, iter)) { 2388 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2389 iter->protection == PROT_NONE) 2390 continue; 2391 2392 error = uvm_fault_wire(map, iter->start, iter->end, 2393 iter->protection); 2394 } 2395 2396 if (error) { 2397 /* 2398 * uvm_fault_wire failure 2399 * 2400 * Reacquire lock and undo our work. 2401 */ 2402 vm_map_upgrade(map); 2403 vm_map_unbusy(map); 2404 #ifdef DIAGNOSTIC 2405 if (timestamp_save != map->timestamp) 2406 panic("uvm_map_pageable_wire: stale map"); 2407 #endif 2408 2409 /* 2410 * first is no longer needed to restart loops. 2411 * Use it as iterator to unmap successful mappings. 2412 */ 2413 for (; first != iter; 2414 first = RBT_NEXT(uvm_map_addr, first)) { 2415 if (UVM_ET_ISHOLE(first) || 2416 first->start == first->end || 2417 first->protection == PROT_NONE) 2418 continue; 2419 2420 first->wired_count--; 2421 if (!VM_MAPENT_ISWIRED(first)) { 2422 uvm_fault_unwire_locked(map, 2423 first->start, first->end); 2424 } 2425 } 2426 2427 /* decrease counter in the rest of the entries */ 2428 for (; iter != end; 2429 iter = RBT_NEXT(uvm_map_addr, iter)) { 2430 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2431 iter->protection == PROT_NONE) 2432 continue; 2433 2434 iter->wired_count--; 2435 } 2436 2437 if ((lockflags & UVM_LK_EXIT) == 0) 2438 vm_map_unlock(map); 2439 return error; 2440 } 2441 2442 /* We are currently holding a read lock. */ 2443 if ((lockflags & UVM_LK_EXIT) == 0) { 2444 vm_map_unbusy(map); 2445 vm_map_unlock_read(map); 2446 } else { 2447 vm_map_upgrade(map); 2448 vm_map_unbusy(map); 2449 #ifdef DIAGNOSTIC 2450 if (timestamp_save != map->timestamp) 2451 panic("uvm_map_pageable_wire: stale map"); 2452 #endif 2453 } 2454 return 0; 2455 } 2456 2457 /* 2458 * uvm_map_pageable: set pageability of a range in a map. 2459 * 2460 * Flags: 2461 * UVM_LK_ENTER: map is already locked by caller 2462 * UVM_LK_EXIT: don't unlock map on exit 2463 * 2464 * The full range must be in use (entries may not have fspace != 0). 2465 * UVM_ET_HOLE counts as unmapped. 2466 */ 2467 int 2468 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2469 boolean_t new_pageable, int lockflags) 2470 { 2471 struct vm_map_entry *first, *last, *tmp; 2472 int error; 2473 2474 start = trunc_page(start); 2475 end = round_page(end); 2476 2477 if (start > end) 2478 return EINVAL; 2479 if (start == end) 2480 return 0; /* nothing to do */ 2481 if (start < map->min_offset) 2482 return EFAULT; /* why? see first XXX below */ 2483 if (end > map->max_offset) 2484 return EINVAL; /* why? see second XXX below */ 2485 2486 KASSERT(map->flags & VM_MAP_PAGEABLE); 2487 if ((lockflags & UVM_LK_ENTER) == 0) 2488 vm_map_lock(map); 2489 2490 /* 2491 * Find first entry. 2492 * 2493 * Initial test on start is different, because of the different 2494 * error returned. Rest is tested further down. 2495 */ 2496 first = uvm_map_entrybyaddr(&map->addr, start); 2497 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2498 /* 2499 * XXX if the first address is not mapped, it is EFAULT? 2500 */ 2501 error = EFAULT; 2502 goto out; 2503 } 2504 2505 /* Check that the range has no holes. */ 2506 for (last = first; last != NULL && last->start < end; 2507 last = RBT_NEXT(uvm_map_addr, last)) { 2508 if (UVM_ET_ISHOLE(last) || 2509 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2510 /* 2511 * XXX unmapped memory in range, why is it EINVAL 2512 * instead of EFAULT? 2513 */ 2514 error = EINVAL; 2515 goto out; 2516 } 2517 } 2518 2519 /* 2520 * Last ended at the first entry after the range. 2521 * Move back one step. 2522 * 2523 * Note that last may be NULL. 2524 */ 2525 if (last == NULL) { 2526 last = RBT_MAX(uvm_map_addr, &map->addr); 2527 if (last->end < end) { 2528 error = EINVAL; 2529 goto out; 2530 } 2531 } else { 2532 KASSERT(last != first); 2533 last = RBT_PREV(uvm_map_addr, last); 2534 } 2535 2536 /* Wire/unwire pages here. */ 2537 if (new_pageable) { 2538 /* 2539 * Mark pageable. 2540 * entries that are not wired are untouched. 2541 */ 2542 if (VM_MAPENT_ISWIRED(first)) 2543 UVM_MAP_CLIP_START(map, first, start); 2544 /* 2545 * Split last at end. 2546 * Make tmp be the first entry after what is to be touched. 2547 * If last is not wired, don't touch it. 2548 */ 2549 if (VM_MAPENT_ISWIRED(last)) { 2550 UVM_MAP_CLIP_END(map, last, end); 2551 tmp = RBT_NEXT(uvm_map_addr, last); 2552 } else 2553 tmp = last; 2554 2555 uvm_map_pageable_pgon(map, first, tmp, start, end); 2556 error = 0; 2557 2558 out: 2559 if ((lockflags & UVM_LK_EXIT) == 0) 2560 vm_map_unlock(map); 2561 return error; 2562 } else { 2563 /* 2564 * Mark entries wired. 2565 * entries are always touched (because recovery needs this). 2566 */ 2567 if (!VM_MAPENT_ISWIRED(first)) 2568 UVM_MAP_CLIP_START(map, first, start); 2569 /* 2570 * Split last at end. 2571 * Make tmp be the first entry after what is to be touched. 2572 * If last is not wired, don't touch it. 2573 */ 2574 if (!VM_MAPENT_ISWIRED(last)) { 2575 UVM_MAP_CLIP_END(map, last, end); 2576 tmp = RBT_NEXT(uvm_map_addr, last); 2577 } else 2578 tmp = last; 2579 2580 return uvm_map_pageable_wire(map, first, tmp, start, end, 2581 lockflags); 2582 } 2583 } 2584 2585 /* 2586 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2587 * all mapped regions. 2588 * 2589 * Map must not be locked. 2590 * If no flags are specified, all ragions are unwired. 2591 */ 2592 int 2593 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2594 { 2595 vsize_t size; 2596 struct vm_map_entry *iter; 2597 2598 KASSERT(map->flags & VM_MAP_PAGEABLE); 2599 vm_map_lock(map); 2600 2601 if (flags == 0) { 2602 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr), 2603 NULL, map->min_offset, map->max_offset); 2604 2605 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2606 vm_map_unlock(map); 2607 return 0; 2608 } 2609 2610 if (flags & MCL_FUTURE) 2611 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2612 if (!(flags & MCL_CURRENT)) { 2613 vm_map_unlock(map); 2614 return 0; 2615 } 2616 2617 /* 2618 * Count number of pages in all non-wired entries. 2619 * If the number exceeds the limit, abort. 2620 */ 2621 size = 0; 2622 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2623 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2624 continue; 2625 2626 size += iter->end - iter->start; 2627 } 2628 2629 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2630 vm_map_unlock(map); 2631 return ENOMEM; 2632 } 2633 2634 /* XXX non-pmap_wired_count case must be handled by caller */ 2635 #ifdef pmap_wired_count 2636 if (limit != 0 && 2637 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2638 vm_map_unlock(map); 2639 return ENOMEM; 2640 } 2641 #endif 2642 2643 /* 2644 * uvm_map_pageable_wire will release lock 2645 */ 2646 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr), 2647 NULL, map->min_offset, map->max_offset, 0); 2648 } 2649 2650 /* 2651 * Initialize map. 2652 * 2653 * Allocates sufficient entries to describe the free memory in the map. 2654 */ 2655 void 2656 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max, 2657 int flags) 2658 { 2659 int i; 2660 2661 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2662 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2663 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2664 2665 /* 2666 * Update parameters. 2667 * 2668 * This code handles (vaddr_t)-1 and other page mask ending addresses 2669 * properly. 2670 * We lose the top page if the full virtual address space is used. 2671 */ 2672 if (max & (vaddr_t)PAGE_MASK) { 2673 max += 1; 2674 if (max == 0) /* overflow */ 2675 max -= PAGE_SIZE; 2676 } 2677 2678 RBT_INIT(uvm_map_addr, &map->addr); 2679 map->uaddr_exe = NULL; 2680 for (i = 0; i < nitems(map->uaddr_any); ++i) 2681 map->uaddr_any[i] = NULL; 2682 map->uaddr_brk_stack = NULL; 2683 2684 map->pmap = pmap; 2685 map->size = 0; 2686 map->ref_count = 0; 2687 map->min_offset = min; 2688 map->max_offset = max; 2689 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2690 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2691 map->flags = flags; 2692 map->timestamp = 0; 2693 if (flags & VM_MAP_ISVMSPACE) 2694 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK); 2695 else 2696 rw_init(&map->lock, "kmmaplk"); 2697 mtx_init(&map->mtx, IPL_VM); 2698 mtx_init(&map->flags_lock, IPL_VM); 2699 2700 /* Configure the allocators. */ 2701 if (flags & VM_MAP_ISVMSPACE) 2702 uvm_map_setup_md(map); 2703 else 2704 map->uaddr_any[3] = &uaddr_kbootstrap; 2705 2706 /* 2707 * Fill map entries. 2708 * We do not need to write-lock the map here because only the current 2709 * thread sees it right now. Initialize ref_count to 0 above to avoid 2710 * bogus triggering of lock-not-held assertions. 2711 */ 2712 uvm_map_setup_entries(map); 2713 uvm_tree_sanity(map, __FILE__, __LINE__); 2714 map->ref_count = 1; 2715 } 2716 2717 /* 2718 * Destroy the map. 2719 * 2720 * This is the inverse operation to uvm_map_setup. 2721 */ 2722 void 2723 uvm_map_teardown(struct vm_map *map) 2724 { 2725 struct uvm_map_deadq dead_entries; 2726 struct vm_map_entry *entry, *tmp; 2727 #ifdef VMMAP_DEBUG 2728 size_t numq, numt; 2729 #endif 2730 int i; 2731 2732 KERNEL_ASSERT_LOCKED(); 2733 KERNEL_UNLOCK(); 2734 KERNEL_ASSERT_UNLOCKED(); 2735 2736 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2737 2738 /* Remove address selectors. */ 2739 uvm_addr_destroy(map->uaddr_exe); 2740 map->uaddr_exe = NULL; 2741 for (i = 0; i < nitems(map->uaddr_any); i++) { 2742 uvm_addr_destroy(map->uaddr_any[i]); 2743 map->uaddr_any[i] = NULL; 2744 } 2745 uvm_addr_destroy(map->uaddr_brk_stack); 2746 map->uaddr_brk_stack = NULL; 2747 2748 /* 2749 * Remove entries. 2750 * 2751 * The following is based on graph breadth-first search. 2752 * 2753 * In color terms: 2754 * - the dead_entries set contains all nodes that are reachable 2755 * (i.e. both the black and the grey nodes) 2756 * - any entry not in dead_entries is white 2757 * - any entry that appears in dead_entries before entry, 2758 * is black, the rest is grey. 2759 * The set [entry, end] is also referred to as the wavefront. 2760 * 2761 * Since the tree is always a fully connected graph, the breadth-first 2762 * search guarantees that each vmmap_entry is visited exactly once. 2763 * The vm_map is broken down in linear time. 2764 */ 2765 TAILQ_INIT(&dead_entries); 2766 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL) 2767 DEAD_ENTRY_PUSH(&dead_entries, entry); 2768 while (entry != NULL) { 2769 sched_pause(yield); 2770 uvm_unmap_kill_entry(map, entry); 2771 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL) 2772 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2773 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 2774 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2775 /* Update wave-front. */ 2776 entry = TAILQ_NEXT(entry, dfree.deadq); 2777 } 2778 2779 #ifdef VMMAP_DEBUG 2780 numt = numq = 0; 2781 RBT_FOREACH(entry, uvm_map_addr, &map->addr) 2782 numt++; 2783 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2784 numq++; 2785 KASSERT(numt == numq); 2786 #endif 2787 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); 2788 2789 KERNEL_LOCK(); 2790 2791 pmap_destroy(map->pmap); 2792 map->pmap = NULL; 2793 } 2794 2795 /* 2796 * Populate map with free-memory entries. 2797 * 2798 * Map must be initialized and empty. 2799 */ 2800 void 2801 uvm_map_setup_entries(struct vm_map *map) 2802 { 2803 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 2804 2805 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2806 } 2807 2808 /* 2809 * Split entry at given address. 2810 * 2811 * orig: entry that is to be split. 2812 * next: a newly allocated map entry that is not linked. 2813 * split: address at which the split is done. 2814 */ 2815 void 2816 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2817 struct vm_map_entry *next, vaddr_t split) 2818 { 2819 struct uvm_addr_state *free, *free_before; 2820 vsize_t adj; 2821 2822 if ((split & PAGE_MASK) != 0) { 2823 panic("uvm_map_splitentry: split address 0x%lx " 2824 "not on page boundary!", split); 2825 } 2826 KDASSERT(map != NULL && orig != NULL && next != NULL); 2827 uvm_tree_sanity(map, __FILE__, __LINE__); 2828 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2829 2830 #ifdef VMMAP_DEBUG 2831 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig); 2832 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next); 2833 #endif /* VMMAP_DEBUG */ 2834 2835 /* 2836 * Free space will change, unlink from free space tree. 2837 */ 2838 free = uvm_map_uaddr_e(map, orig); 2839 uvm_mapent_free_remove(map, free, orig); 2840 2841 adj = split - orig->start; 2842 2843 uvm_mapent_copy(orig, next); 2844 if (split >= orig->end) { 2845 next->etype = 0; 2846 next->offset = 0; 2847 next->wired_count = 0; 2848 next->start = next->end = split; 2849 next->guard = 0; 2850 next->fspace = VMMAP_FREE_END(orig) - split; 2851 next->aref.ar_amap = NULL; 2852 next->aref.ar_pageoff = 0; 2853 orig->guard = MIN(orig->guard, split - orig->end); 2854 orig->fspace = split - VMMAP_FREE_START(orig); 2855 } else { 2856 orig->fspace = 0; 2857 orig->guard = 0; 2858 orig->end = next->start = split; 2859 2860 if (next->aref.ar_amap) { 2861 amap_splitref(&orig->aref, &next->aref, adj); 2862 } 2863 if (UVM_ET_ISSUBMAP(orig)) { 2864 uvm_map_reference(next->object.sub_map); 2865 next->offset += adj; 2866 } else if (UVM_ET_ISOBJ(orig)) { 2867 if (next->object.uvm_obj->pgops && 2868 next->object.uvm_obj->pgops->pgo_reference) { 2869 KERNEL_LOCK(); 2870 next->object.uvm_obj->pgops->pgo_reference( 2871 next->object.uvm_obj); 2872 KERNEL_UNLOCK(); 2873 } 2874 next->offset += adj; 2875 } 2876 } 2877 2878 /* 2879 * Link next into address tree. 2880 * Link orig and next into free-space tree. 2881 * 2882 * Don't insert 'next' into the addr tree until orig has been linked, 2883 * in case the free-list looks at adjecent entries in the addr tree 2884 * for its decisions. 2885 */ 2886 if (orig->fspace > 0) 2887 free_before = free; 2888 else 2889 free_before = uvm_map_uaddr_e(map, orig); 2890 uvm_mapent_free_insert(map, free_before, orig); 2891 uvm_mapent_addr_insert(map, next); 2892 uvm_mapent_free_insert(map, free, next); 2893 2894 uvm_tree_sanity(map, __FILE__, __LINE__); 2895 } 2896 2897 2898 #ifdef VMMAP_DEBUG 2899 2900 void 2901 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2902 char *file, int line) 2903 { 2904 char* map_special; 2905 2906 if (test) 2907 return; 2908 2909 if (map == kernel_map) 2910 map_special = " (kernel_map)"; 2911 else if (map == kmem_map) 2912 map_special = " (kmem_map)"; 2913 else 2914 map_special = ""; 2915 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2916 line, test_str); 2917 } 2918 2919 /* 2920 * Check that map is sane. 2921 */ 2922 void 2923 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2924 { 2925 struct vm_map_entry *iter; 2926 vaddr_t addr; 2927 vaddr_t min, max, bound; /* Bounds checker. */ 2928 struct uvm_addr_state *free; 2929 2930 addr = vm_map_min(map); 2931 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2932 /* 2933 * Valid start, end. 2934 * Catch overflow for end+fspace. 2935 */ 2936 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2937 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2938 2939 /* May not be empty. */ 2940 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2941 file, line); 2942 2943 /* Addresses for entry must lie within map boundaries. */ 2944 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2945 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2946 2947 /* Tree may not have gaps. */ 2948 UVM_ASSERT(map, iter->start == addr, file, line); 2949 addr = VMMAP_FREE_END(iter); 2950 2951 /* 2952 * Free space may not cross boundaries, unless the same 2953 * free list is used on both sides of the border. 2954 */ 2955 min = VMMAP_FREE_START(iter); 2956 max = VMMAP_FREE_END(iter); 2957 2958 while (min < max && 2959 (bound = uvm_map_boundary(map, min, max)) != max) { 2960 UVM_ASSERT(map, 2961 uvm_map_uaddr(map, bound - 1) == 2962 uvm_map_uaddr(map, bound), 2963 file, line); 2964 min = bound; 2965 } 2966 2967 free = uvm_map_uaddr_e(map, iter); 2968 if (free) { 2969 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2970 file, line); 2971 } else { 2972 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2973 file, line); 2974 } 2975 } 2976 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2977 } 2978 2979 void 2980 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2981 { 2982 struct vm_map_entry *iter; 2983 vsize_t size; 2984 2985 size = 0; 2986 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2987 if (!UVM_ET_ISHOLE(iter)) 2988 size += iter->end - iter->start; 2989 } 2990 2991 if (map->size != size) 2992 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2993 UVM_ASSERT(map, map->size == size, file, line); 2994 2995 vmspace_validate(map); 2996 } 2997 2998 /* 2999 * This function validates the statistics on vmspace. 3000 */ 3001 void 3002 vmspace_validate(struct vm_map *map) 3003 { 3004 struct vmspace *vm; 3005 struct vm_map_entry *iter; 3006 vaddr_t imin, imax; 3007 vaddr_t stack_begin, stack_end; /* Position of stack. */ 3008 vsize_t stack, heap; /* Measured sizes. */ 3009 3010 if (!(map->flags & VM_MAP_ISVMSPACE)) 3011 return; 3012 3013 vm = (struct vmspace *)map; 3014 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 3015 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 3016 3017 stack = heap = 0; 3018 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 3019 imin = imax = iter->start; 3020 3021 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL || 3022 iter->prot != PROT_NONE) 3023 continue; 3024 3025 /* 3026 * Update stack, heap. 3027 * Keep in mind that (theoretically) the entries of 3028 * userspace and stack may be joined. 3029 */ 3030 while (imin != iter->end) { 3031 /* 3032 * Set imax to the first boundary crossed between 3033 * imin and stack addresses. 3034 */ 3035 imax = iter->end; 3036 if (imin < stack_begin && imax > stack_begin) 3037 imax = stack_begin; 3038 else if (imin < stack_end && imax > stack_end) 3039 imax = stack_end; 3040 3041 if (imin >= stack_begin && imin < stack_end) 3042 stack += imax - imin; 3043 else 3044 heap += imax - imin; 3045 imin = imax; 3046 } 3047 } 3048 3049 heap >>= PAGE_SHIFT; 3050 if (heap != vm->vm_dused) { 3051 printf("vmspace stack range: 0x%lx-0x%lx\n", 3052 stack_begin, stack_end); 3053 panic("vmspace_validate: vmspace.vm_dused invalid, " 3054 "expected %ld pgs, got %ld pgs in map %p", 3055 heap, vm->vm_dused, 3056 map); 3057 } 3058 } 3059 3060 #endif /* VMMAP_DEBUG */ 3061 3062 /* 3063 * uvm_map_init: init mapping system at boot time. note that we allocate 3064 * and init the static pool of structs vm_map_entry for the kernel here. 3065 */ 3066 void 3067 uvm_map_init(void) 3068 { 3069 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 3070 int lcv; 3071 3072 /* now set up static pool of kernel map entries ... */ 3073 mtx_init(&uvm_kmapent_mtx, IPL_VM); 3074 SLIST_INIT(&uvm.kentry_free); 3075 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 3076 SLIST_INSERT_HEAD(&uvm.kentry_free, 3077 &kernel_map_entry[lcv], daddrs.addr_kentry); 3078 } 3079 3080 /* initialize the map-related pools. */ 3081 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0, 3082 IPL_NONE, PR_WAITOK, "vmsppl", NULL); 3083 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 3084 IPL_VM, PR_WAITOK, "vmmpepl", NULL); 3085 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0, 3086 IPL_VM, 0, "vmmpekpl", NULL); 3087 pool_sethiwat(&uvm_map_entry_pool, 8192); 3088 3089 uvm_addr_init(); 3090 } 3091 3092 #if defined(DDB) 3093 3094 /* 3095 * DDB hooks 3096 */ 3097 3098 /* 3099 * uvm_map_printit: actually prints the map 3100 */ 3101 void 3102 uvm_map_printit(struct vm_map *map, boolean_t full, 3103 int (*pr)(const char *, ...)) 3104 { 3105 struct vmspace *vm; 3106 struct vm_map_entry *entry; 3107 struct uvm_addr_state *free; 3108 int in_free, i; 3109 char buf[8]; 3110 3111 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 3112 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 3113 map->b_start, map->b_end); 3114 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 3115 map->s_start, map->s_end); 3116 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 3117 map->size, map->ref_count, map->timestamp, 3118 map->flags); 3119 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 3120 pmap_resident_count(map->pmap)); 3121 3122 /* struct vmspace handling. */ 3123 if (map->flags & VM_MAP_ISVMSPACE) { 3124 vm = (struct vmspace *)map; 3125 3126 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 3127 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 3128 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 3129 vm->vm_tsize, vm->vm_dsize); 3130 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 3131 vm->vm_taddr, vm->vm_daddr); 3132 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 3133 vm->vm_maxsaddr, vm->vm_minsaddr); 3134 } 3135 3136 if (!full) 3137 goto print_uaddr; 3138 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 3139 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 3140 entry, entry->start, entry->end, entry->object.uvm_obj, 3141 (long long)entry->offset, entry->aref.ar_amap, 3142 entry->aref.ar_pageoff); 3143 (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, " 3144 "syscall=%c, prot(max)=%d/%d, inh=%d, " 3145 "wc=%d, adv=%d\n", 3146 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 3147 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 3148 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 3149 (entry->etype & UVM_ET_STACK) ? 'T' : 'F', 3150 (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F', 3151 entry->protection, entry->max_protection, 3152 entry->inheritance, entry->wired_count, entry->advice); 3153 3154 free = uvm_map_uaddr_e(map, entry); 3155 in_free = (free != NULL); 3156 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 3157 "free=0x%lx-0x%lx\n", 3158 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 3159 in_free ? 'T' : 'F', 3160 entry->guard, 3161 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 3162 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 3163 (*pr)("\tfreemapped=%c, uaddr=%p\n", 3164 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 3165 if (free) { 3166 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 3167 free->uaddr_minaddr, free->uaddr_maxaddr, 3168 free->uaddr_functions->uaddr_name); 3169 } 3170 } 3171 3172 print_uaddr: 3173 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 3174 for (i = 0; i < nitems(map->uaddr_any); i++) { 3175 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 3176 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 3177 } 3178 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 3179 } 3180 3181 /* 3182 * uvm_object_printit: actually prints the object 3183 */ 3184 void 3185 uvm_object_printit(struct uvm_object *uobj, boolean_t full, 3186 int (*pr)(const char *, ...)) 3187 { 3188 struct vm_page *pg; 3189 int cnt = 0; 3190 3191 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 3192 uobj, uobj->pgops, uobj->uo_npages); 3193 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 3194 (*pr)("refs=<SYSTEM>\n"); 3195 else 3196 (*pr)("refs=%d\n", uobj->uo_refs); 3197 3198 if (!full) { 3199 return; 3200 } 3201 (*pr)(" PAGES <pg,offset>:\n "); 3202 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) { 3203 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 3204 if ((cnt % 3) == 2) { 3205 (*pr)("\n "); 3206 } 3207 cnt++; 3208 } 3209 if ((cnt % 3) != 2) { 3210 (*pr)("\n"); 3211 } 3212 } 3213 3214 /* 3215 * uvm_page_printit: actually print the page 3216 */ 3217 static const char page_flagbits[] = 3218 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 3219 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ" 3220 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5"; 3221 3222 void 3223 uvm_page_printit(struct vm_page *pg, boolean_t full, 3224 int (*pr)(const char *, ...)) 3225 { 3226 struct vm_page *tpg; 3227 struct uvm_object *uobj; 3228 struct pglist *pgl; 3229 3230 (*pr)("PAGE %p:\n", pg); 3231 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 3232 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 3233 (long long)pg->phys_addr); 3234 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 3235 pg->uobject, pg->uanon, (long long)pg->offset); 3236 #if defined(UVM_PAGE_TRKOWN) 3237 if (pg->pg_flags & PG_BUSY) 3238 (*pr)(" owning thread = %d, tag=%s", 3239 pg->owner, pg->owner_tag); 3240 else 3241 (*pr)(" page not busy, no owner"); 3242 #else 3243 (*pr)(" [page ownership tracking disabled]"); 3244 #endif 3245 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 3246 3247 if (!full) 3248 return; 3249 3250 /* cross-verify object/anon */ 3251 if ((pg->pg_flags & PQ_FREE) == 0) { 3252 if (pg->pg_flags & PQ_ANON) { 3253 if (pg->uanon == NULL || pg->uanon->an_page != pg) 3254 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 3255 (pg->uanon) ? pg->uanon->an_page : NULL); 3256 else 3257 (*pr)(" anon backpointer is OK\n"); 3258 } else { 3259 uobj = pg->uobject; 3260 if (uobj) { 3261 (*pr)(" checking object list\n"); 3262 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) { 3263 if (tpg == pg) { 3264 break; 3265 } 3266 } 3267 if (tpg) 3268 (*pr)(" page found on object list\n"); 3269 else 3270 (*pr)(" >>> PAGE NOT FOUND " 3271 "ON OBJECT LIST! <<<\n"); 3272 } 3273 } 3274 } 3275 3276 /* cross-verify page queue */ 3277 if (pg->pg_flags & PQ_FREE) { 3278 if (uvm_pmr_isfree(pg)) 3279 (*pr)(" page found in uvm_pmemrange\n"); 3280 else 3281 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 3282 pgl = NULL; 3283 } else if (pg->pg_flags & PQ_INACTIVE) { 3284 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 3285 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 3286 } else if (pg->pg_flags & PQ_ACTIVE) { 3287 pgl = &uvm.page_active; 3288 } else { 3289 pgl = NULL; 3290 } 3291 3292 if (pgl) { 3293 (*pr)(" checking pageq list\n"); 3294 TAILQ_FOREACH(tpg, pgl, pageq) { 3295 if (tpg == pg) { 3296 break; 3297 } 3298 } 3299 if (tpg) 3300 (*pr)(" page found on pageq list\n"); 3301 else 3302 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3303 } 3304 } 3305 #endif 3306 3307 /* 3308 * uvm_map_protect: change map protection 3309 * 3310 * => set_max means set max_protection. 3311 * => map must be unlocked. 3312 */ 3313 int 3314 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3315 vm_prot_t new_prot, boolean_t set_max) 3316 { 3317 struct vm_map_entry *first, *iter; 3318 vm_prot_t old_prot; 3319 vm_prot_t mask; 3320 vsize_t dused; 3321 int error; 3322 3323 if (start > end) 3324 return EINVAL; 3325 start = MAX(start, map->min_offset); 3326 end = MIN(end, map->max_offset); 3327 if (start >= end) 3328 return 0; 3329 3330 dused = 0; 3331 error = 0; 3332 vm_map_lock(map); 3333 3334 /* 3335 * Set up first and last. 3336 * - first will contain first entry at or after start. 3337 */ 3338 first = uvm_map_entrybyaddr(&map->addr, start); 3339 KDASSERT(first != NULL); 3340 if (first->end <= start) 3341 first = RBT_NEXT(uvm_map_addr, first); 3342 3343 /* First, check for protection violations. */ 3344 for (iter = first; iter != NULL && iter->start < end; 3345 iter = RBT_NEXT(uvm_map_addr, iter)) { 3346 /* Treat memory holes as free space. */ 3347 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3348 continue; 3349 3350 old_prot = iter->protection; 3351 if (old_prot == PROT_NONE && new_prot != old_prot) { 3352 dused += uvmspace_dused( 3353 map, MAX(start, iter->start), MIN(end, iter->end)); 3354 } 3355 3356 if (UVM_ET_ISSUBMAP(iter)) { 3357 error = EINVAL; 3358 goto out; 3359 } 3360 if ((new_prot & iter->max_protection) != new_prot) { 3361 error = EACCES; 3362 goto out; 3363 } 3364 if (map == kernel_map && 3365 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 3366 panic("uvm_map_protect: kernel map W^X violation requested"); 3367 } 3368 3369 /* Check limits. */ 3370 if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) { 3371 vsize_t limit = lim_cur(RLIMIT_DATA); 3372 dused = ptoa(dused); 3373 if (limit < dused || 3374 limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) { 3375 error = ENOMEM; 3376 goto out; 3377 } 3378 } 3379 3380 /* Fix protections. */ 3381 for (iter = first; iter != NULL && iter->start < end; 3382 iter = RBT_NEXT(uvm_map_addr, iter)) { 3383 /* Treat memory holes as free space. */ 3384 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3385 continue; 3386 3387 old_prot = iter->protection; 3388 3389 /* 3390 * Skip adapting protection iff old and new protection 3391 * are equal. 3392 */ 3393 if (set_max) { 3394 if (old_prot == (new_prot & old_prot) && 3395 iter->max_protection == new_prot) 3396 continue; 3397 } else { 3398 if (old_prot == new_prot) 3399 continue; 3400 } 3401 3402 UVM_MAP_CLIP_START(map, iter, start); 3403 UVM_MAP_CLIP_END(map, iter, end); 3404 3405 if (set_max) { 3406 iter->max_protection = new_prot; 3407 iter->protection &= new_prot; 3408 } else 3409 iter->protection = new_prot; 3410 3411 /* 3412 * update physical map if necessary. worry about copy-on-write 3413 * here -- CHECK THIS XXX 3414 */ 3415 if (iter->protection != old_prot) { 3416 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3417 ~PROT_WRITE : PROT_MASK; 3418 3419 /* XXX should only wserial++ if no split occurs */ 3420 if (iter->protection & PROT_WRITE) 3421 map->wserial++; 3422 3423 if (map->flags & VM_MAP_ISVMSPACE) { 3424 if (old_prot == PROT_NONE) { 3425 ((struct vmspace *)map)->vm_dused += 3426 uvmspace_dused(map, iter->start, 3427 iter->end); 3428 } 3429 if (iter->protection == PROT_NONE) { 3430 ((struct vmspace *)map)->vm_dused -= 3431 uvmspace_dused(map, iter->start, 3432 iter->end); 3433 } 3434 } 3435 3436 /* update pmap */ 3437 if ((iter->protection & mask) == PROT_NONE && 3438 VM_MAPENT_ISWIRED(iter)) { 3439 /* 3440 * TODO(ariane) this is stupid. wired_count 3441 * is 0 if not wired, otherwise anything 3442 * larger than 0 (incremented once each time 3443 * wire is called). 3444 * Mostly to be able to undo the damage on 3445 * failure. Not the actually be a wired 3446 * refcounter... 3447 * Originally: iter->wired_count--; 3448 * (don't we have to unwire this in the pmap 3449 * as well?) 3450 */ 3451 iter->wired_count = 0; 3452 } 3453 uvm_map_lock_entry(iter); 3454 pmap_protect(map->pmap, iter->start, iter->end, 3455 iter->protection & mask); 3456 uvm_map_unlock_entry(iter); 3457 } 3458 3459 /* 3460 * If the map is configured to lock any future mappings, 3461 * wire this entry now if the old protection was PROT_NONE 3462 * and the new protection is not PROT_NONE. 3463 */ 3464 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3465 VM_MAPENT_ISWIRED(iter) == 0 && 3466 old_prot == PROT_NONE && 3467 new_prot != PROT_NONE) { 3468 if (uvm_map_pageable(map, iter->start, iter->end, 3469 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3470 /* 3471 * If locking the entry fails, remember the 3472 * error if it's the first one. Note we 3473 * still continue setting the protection in 3474 * the map, but it will return the resource 3475 * storage condition regardless. 3476 * 3477 * XXX Ignore what the actual error is, 3478 * XXX just call it a resource shortage 3479 * XXX so that it doesn't get confused 3480 * XXX what uvm_map_protect() itself would 3481 * XXX normally return. 3482 */ 3483 error = ENOMEM; 3484 } 3485 } 3486 } 3487 pmap_update(map->pmap); 3488 3489 out: 3490 vm_map_unlock(map); 3491 return error; 3492 } 3493 3494 /* 3495 * uvmspace_alloc: allocate a vmspace structure. 3496 * 3497 * - structure includes vm_map and pmap 3498 * - XXX: no locking on this structure 3499 * - refcnt set to 1, rest must be init'd by caller 3500 */ 3501 struct vmspace * 3502 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3503 boolean_t remove_holes) 3504 { 3505 struct vmspace *vm; 3506 3507 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3508 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3509 return (vm); 3510 } 3511 3512 /* 3513 * uvmspace_init: initialize a vmspace structure. 3514 * 3515 * - XXX: no locking on this structure 3516 * - refcnt set to 1, rest must be init'd by caller 3517 */ 3518 void 3519 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3520 boolean_t pageable, boolean_t remove_holes) 3521 { 3522 KASSERT(pmap == NULL || pmap == pmap_kernel()); 3523 3524 if (pmap) 3525 pmap_reference(pmap); 3526 else 3527 pmap = pmap_create(); 3528 3529 uvm_map_setup(&vm->vm_map, pmap, min, max, 3530 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3531 3532 vm->vm_refcnt = 1; 3533 3534 if (remove_holes) 3535 pmap_remove_holes(vm); 3536 } 3537 3538 /* 3539 * uvmspace_share: share a vmspace between two processes 3540 * 3541 * - used for vfork 3542 */ 3543 3544 struct vmspace * 3545 uvmspace_share(struct process *pr) 3546 { 3547 struct vmspace *vm = pr->ps_vmspace; 3548 3549 uvmspace_addref(vm); 3550 return vm; 3551 } 3552 3553 /* 3554 * uvmspace_exec: the process wants to exec a new program 3555 * 3556 * - XXX: no locking on vmspace 3557 */ 3558 3559 void 3560 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3561 { 3562 struct process *pr = p->p_p; 3563 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3564 struct vm_map *map = &ovm->vm_map; 3565 struct uvm_map_deadq dead_entries; 3566 3567 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3568 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3569 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3570 3571 pmap_unuse_final(p); /* before stack addresses go away */ 3572 TAILQ_INIT(&dead_entries); 3573 3574 /* see if more than one process is using this vmspace... */ 3575 if (ovm->vm_refcnt == 1) { 3576 /* 3577 * If pr is the only process using its vmspace then 3578 * we can safely recycle that vmspace for the program 3579 * that is being exec'd. 3580 */ 3581 3582 #ifdef SYSVSHM 3583 /* 3584 * SYSV SHM semantics require us to kill all segments on an exec 3585 */ 3586 if (ovm->vm_shm) 3587 shmexit(ovm); 3588 #endif 3589 3590 /* 3591 * POSIX 1003.1b -- "lock future mappings" is revoked 3592 * when a process execs another program image. 3593 */ 3594 vm_map_lock(map); 3595 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE); 3596 3597 /* 3598 * now unmap the old program 3599 * 3600 * Instead of attempting to keep the map valid, we simply 3601 * nuke all entries and ask uvm_map_setup to reinitialize 3602 * the map to the new boundaries. 3603 * 3604 * uvm_unmap_remove will actually nuke all entries for us 3605 * (as in, not replace them with free-memory entries). 3606 */ 3607 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3608 &dead_entries, TRUE, FALSE); 3609 3610 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 3611 3612 /* Nuke statistics and boundaries. */ 3613 memset(&ovm->vm_startcopy, 0, 3614 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3615 3616 3617 if (end & (vaddr_t)PAGE_MASK) { 3618 end += 1; 3619 if (end == 0) /* overflow */ 3620 end -= PAGE_SIZE; 3621 } 3622 3623 /* Setup new boundaries and populate map with entries. */ 3624 map->min_offset = start; 3625 map->max_offset = end; 3626 uvm_map_setup_entries(map); 3627 vm_map_unlock(map); 3628 3629 /* but keep MMU holes unavailable */ 3630 pmap_remove_holes(ovm); 3631 } else { 3632 /* 3633 * pr's vmspace is being shared, so we can't reuse 3634 * it for pr since it is still being used for others. 3635 * allocate a new vmspace for pr 3636 */ 3637 nvm = uvmspace_alloc(start, end, 3638 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3639 3640 /* install new vmspace and drop our ref to the old one. */ 3641 pmap_deactivate(p); 3642 p->p_vmspace = pr->ps_vmspace = nvm; 3643 pmap_activate(p); 3644 3645 uvmspace_free(ovm); 3646 } 3647 3648 /* Release dead entries */ 3649 uvm_unmap_detach(&dead_entries, 0); 3650 } 3651 3652 /* 3653 * uvmspace_addref: add a reference to a vmspace. 3654 */ 3655 void 3656 uvmspace_addref(struct vmspace *vm) 3657 { 3658 KERNEL_ASSERT_LOCKED(); 3659 KASSERT(vm->vm_refcnt > 0); 3660 3661 vm->vm_refcnt++; 3662 } 3663 3664 /* 3665 * uvmspace_free: free a vmspace data structure 3666 */ 3667 void 3668 uvmspace_free(struct vmspace *vm) 3669 { 3670 KERNEL_ASSERT_LOCKED(); 3671 3672 if (--vm->vm_refcnt == 0) { 3673 /* 3674 * lock the map, to wait out all other references to it. delete 3675 * all of the mappings and pages they hold, then call the pmap 3676 * module to reclaim anything left. 3677 */ 3678 #ifdef SYSVSHM 3679 /* Get rid of any SYSV shared memory segments. */ 3680 if (vm->vm_shm != NULL) 3681 shmexit(vm); 3682 #endif 3683 3684 uvm_map_teardown(&vm->vm_map); 3685 pool_put(&uvm_vmspace_pool, vm); 3686 } 3687 } 3688 3689 /* 3690 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in 3691 * srcmap to the address range [dstaddr, dstaddr + sz) in 3692 * dstmap. 3693 * 3694 * The whole address range in srcmap must be backed by an object 3695 * (no holes). 3696 * 3697 * If successful, the address ranges share memory and the destination 3698 * address range uses the protection flags in prot. 3699 * 3700 * This routine assumes that sz is a multiple of PAGE_SIZE and 3701 * that dstaddr and srcaddr are page-aligned. 3702 */ 3703 int 3704 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot, 3705 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz) 3706 { 3707 int ret = 0; 3708 vaddr_t unmap_end; 3709 vaddr_t dstva; 3710 vsize_t s_off, len, n = sz, remain; 3711 struct vm_map_entry *first = NULL, *last = NULL; 3712 struct vm_map_entry *src_entry, *psrc_entry = NULL; 3713 struct uvm_map_deadq dead; 3714 3715 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr) 3716 return EINVAL; 3717 3718 TAILQ_INIT(&dead); 3719 vm_map_lock(dstmap); 3720 vm_map_lock_read(srcmap); 3721 3722 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) { 3723 ret = ENOMEM; 3724 goto exit_unlock; 3725 } 3726 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) { 3727 ret = EINVAL; 3728 goto exit_unlock; 3729 } 3730 3731 dstva = dstaddr; 3732 unmap_end = dstaddr; 3733 for (; src_entry != NULL; 3734 psrc_entry = src_entry, 3735 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) { 3736 /* hole in address space, bail out */ 3737 if (psrc_entry != NULL && psrc_entry->end != src_entry->start) 3738 break; 3739 if (src_entry->start >= srcaddr + sz) 3740 break; 3741 3742 if (UVM_ET_ISSUBMAP(src_entry)) 3743 panic("uvm_share: encountered a submap (illegal)"); 3744 if (!UVM_ET_ISCOPYONWRITE(src_entry) && 3745 UVM_ET_ISNEEDSCOPY(src_entry)) 3746 panic("uvm_share: non-copy_on_write map entries " 3747 "marked needs_copy (illegal)"); 3748 3749 /* 3750 * srcaddr > map entry start? means we are in the middle of a 3751 * map, so we calculate the offset to use in the source map. 3752 */ 3753 if (srcaddr > src_entry->start) 3754 s_off = srcaddr - src_entry->start; 3755 else if (srcaddr == src_entry->start) 3756 s_off = 0; 3757 else 3758 panic("uvm_share: map entry start > srcaddr"); 3759 3760 remain = src_entry->end - src_entry->start - s_off; 3761 3762 /* Determine how many bytes to share in this pass */ 3763 if (n < remain) 3764 len = n; 3765 else 3766 len = remain; 3767 3768 if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot, 3769 srcmap, src_entry, &dead) == NULL) 3770 break; 3771 3772 n -= len; 3773 dstva += len; 3774 srcaddr += len; 3775 unmap_end = dstva + len; 3776 if (n == 0) 3777 goto exit_unlock; 3778 } 3779 3780 ret = EINVAL; 3781 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE); 3782 3783 exit_unlock: 3784 vm_map_unlock_read(srcmap); 3785 vm_map_unlock(dstmap); 3786 uvm_unmap_detach(&dead, 0); 3787 3788 return ret; 3789 } 3790 3791 /* 3792 * Clone map entry into other map. 3793 * 3794 * Mapping will be placed at dstaddr, for the same length. 3795 * Space must be available. 3796 * Reference counters are incremented. 3797 */ 3798 struct vm_map_entry * 3799 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3800 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, 3801 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3802 int mapent_flags, int amap_share_flags) 3803 { 3804 struct vm_map_entry *new_entry, *first, *last; 3805 3806 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3807 3808 /* Create new entry (linked in on creation). Fill in first, last. */ 3809 first = last = NULL; 3810 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3811 panic("uvm_mapent_clone: no space in map for " 3812 "entry in empty map"); 3813 } 3814 new_entry = uvm_map_mkentry(dstmap, first, last, 3815 dstaddr, dstlen, mapent_flags, dead, NULL); 3816 if (new_entry == NULL) 3817 return NULL; 3818 /* old_entry -> new_entry */ 3819 new_entry->object = old_entry->object; 3820 new_entry->offset = old_entry->offset; 3821 new_entry->aref = old_entry->aref; 3822 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3823 new_entry->protection = prot; 3824 new_entry->max_protection = maxprot; 3825 new_entry->inheritance = old_entry->inheritance; 3826 new_entry->advice = old_entry->advice; 3827 3828 /* gain reference to object backing the map (can't be a submap). */ 3829 if (new_entry->aref.ar_amap) { 3830 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3831 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3832 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3833 amap_share_flags); 3834 } 3835 3836 if (UVM_ET_ISOBJ(new_entry) && 3837 new_entry->object.uvm_obj->pgops->pgo_reference) { 3838 new_entry->offset += off; 3839 new_entry->object.uvm_obj->pgops->pgo_reference 3840 (new_entry->object.uvm_obj); 3841 } 3842 3843 return new_entry; 3844 } 3845 3846 struct vm_map_entry * 3847 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3848 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map, 3849 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3850 { 3851 /* 3852 * If old_entry refers to a copy-on-write region that has not yet been 3853 * written to (needs_copy flag is set), then we need to allocate a new 3854 * amap for old_entry. 3855 * 3856 * If we do not do this, and the process owning old_entry does a copy-on 3857 * write later, old_entry and new_entry will refer to different memory 3858 * regions, and the memory between the processes is no longer shared. 3859 * 3860 * [in other words, we need to clear needs_copy] 3861 */ 3862 3863 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3864 /* get our own amap, clears needs_copy */ 3865 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0); 3866 /* XXXCDC: WAITOK??? */ 3867 } 3868 3869 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off, 3870 prot, maxprot, old_entry, dead, 0, AMAP_SHARED); 3871 } 3872 3873 /* 3874 * share the mapping: this means we want the old and 3875 * new entries to share amaps and backing objects. 3876 */ 3877 struct vm_map_entry * 3878 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3879 struct vm_map *old_map, 3880 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3881 { 3882 struct vm_map_entry *new_entry; 3883 3884 new_entry = uvm_mapent_share(new_map, old_entry->start, 3885 old_entry->end - old_entry->start, 0, old_entry->protection, 3886 old_entry->max_protection, old_map, old_entry, dead); 3887 3888 /* 3889 * pmap_copy the mappings: this routine is optional 3890 * but if it is there it will reduce the number of 3891 * page faults in the new proc. 3892 */ 3893 if (!UVM_ET_ISHOLE(new_entry)) 3894 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, 3895 (new_entry->end - new_entry->start), new_entry->start); 3896 3897 return (new_entry); 3898 } 3899 3900 /* 3901 * copy-on-write the mapping (using mmap's 3902 * MAP_PRIVATE semantics) 3903 * 3904 * allocate new_entry, adjust reference counts. 3905 * (note that new references are read-only). 3906 */ 3907 struct vm_map_entry * 3908 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3909 struct vm_map *old_map, 3910 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3911 { 3912 struct vm_map_entry *new_entry; 3913 boolean_t protect_child; 3914 3915 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3916 old_entry->end - old_entry->start, 0, old_entry->protection, 3917 old_entry->max_protection, old_entry, dead, 0, 0); 3918 3919 new_entry->etype |= 3920 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3921 3922 /* 3923 * the new entry will need an amap. it will either 3924 * need to be copied from the old entry or created 3925 * from scratch (if the old entry does not have an 3926 * amap). can we defer this process until later 3927 * (by setting "needs_copy") or do we need to copy 3928 * the amap now? 3929 * 3930 * we must copy the amap now if any of the following 3931 * conditions hold: 3932 * 1. the old entry has an amap and that amap is 3933 * being shared. this means that the old (parent) 3934 * process is sharing the amap with another 3935 * process. if we do not clear needs_copy here 3936 * we will end up in a situation where both the 3937 * parent and child process are referring to the 3938 * same amap with "needs_copy" set. if the 3939 * parent write-faults, the fault routine will 3940 * clear "needs_copy" in the parent by allocating 3941 * a new amap. this is wrong because the 3942 * parent is supposed to be sharing the old amap 3943 * and the new amap will break that. 3944 * 3945 * 2. if the old entry has an amap and a non-zero 3946 * wire count then we are going to have to call 3947 * amap_cow_now to avoid page faults in the 3948 * parent process. since amap_cow_now requires 3949 * "needs_copy" to be clear we might as well 3950 * clear it here as well. 3951 * 3952 */ 3953 if (old_entry->aref.ar_amap != NULL && 3954 ((amap_flags(old_entry->aref.ar_amap) & 3955 AMAP_SHARED) != 0 || 3956 VM_MAPENT_ISWIRED(old_entry))) { 3957 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3958 0, 0); 3959 /* XXXCDC: M_WAITOK ... ok? */ 3960 } 3961 3962 /* 3963 * if the parent's entry is wired down, then the 3964 * parent process does not want page faults on 3965 * access to that memory. this means that we 3966 * cannot do copy-on-write because we can't write 3967 * protect the old entry. in this case we 3968 * resolve all copy-on-write faults now, using 3969 * amap_cow_now. note that we have already 3970 * allocated any needed amap (above). 3971 */ 3972 if (VM_MAPENT_ISWIRED(old_entry)) { 3973 /* 3974 * resolve all copy-on-write faults now 3975 * (note that there is nothing to do if 3976 * the old mapping does not have an amap). 3977 * XXX: is it worthwhile to bother with 3978 * pmap_copy in this case? 3979 */ 3980 if (old_entry->aref.ar_amap) 3981 amap_cow_now(new_map, new_entry); 3982 } else { 3983 if (old_entry->aref.ar_amap) { 3984 /* 3985 * setup mappings to trigger copy-on-write faults 3986 * we must write-protect the parent if it has 3987 * an amap and it is not already "needs_copy"... 3988 * if it is already "needs_copy" then the parent 3989 * has already been write-protected by a previous 3990 * fork operation. 3991 * 3992 * if we do not write-protect the parent, then 3993 * we must be sure to write-protect the child 3994 * after the pmap_copy() operation. 3995 * 3996 * XXX: pmap_copy should have some way of telling 3997 * us that it didn't do anything so we can avoid 3998 * calling pmap_protect needlessly. 3999 */ 4000 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 4001 if (old_entry->max_protection & PROT_WRITE) { 4002 uvm_map_lock_entry(old_entry); 4003 pmap_protect(old_map->pmap, 4004 old_entry->start, 4005 old_entry->end, 4006 old_entry->protection & 4007 ~PROT_WRITE); 4008 uvm_map_unlock_entry(old_entry); 4009 pmap_update(old_map->pmap); 4010 } 4011 old_entry->etype |= UVM_ET_NEEDSCOPY; 4012 } 4013 4014 /* parent must now be write-protected */ 4015 protect_child = FALSE; 4016 } else { 4017 /* 4018 * we only need to protect the child if the 4019 * parent has write access. 4020 */ 4021 if (old_entry->max_protection & PROT_WRITE) 4022 protect_child = TRUE; 4023 else 4024 protect_child = FALSE; 4025 } 4026 /* 4027 * copy the mappings 4028 * XXX: need a way to tell if this does anything 4029 */ 4030 if (!UVM_ET_ISHOLE(new_entry)) 4031 pmap_copy(new_map->pmap, old_map->pmap, 4032 new_entry->start, 4033 (old_entry->end - old_entry->start), 4034 old_entry->start); 4035 4036 /* protect the child's mappings if necessary */ 4037 if (protect_child) { 4038 pmap_protect(new_map->pmap, new_entry->start, 4039 new_entry->end, 4040 new_entry->protection & 4041 ~PROT_WRITE); 4042 } 4043 } 4044 4045 return (new_entry); 4046 } 4047 4048 /* 4049 * zero the mapping: the new entry will be zero initialized 4050 */ 4051 struct vm_map_entry * 4052 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 4053 struct vm_map *old_map, 4054 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 4055 { 4056 struct vm_map_entry *new_entry; 4057 4058 new_entry = uvm_mapent_clone(new_map, old_entry->start, 4059 old_entry->end - old_entry->start, 0, old_entry->protection, 4060 old_entry->max_protection, old_entry, dead, 0, 0); 4061 4062 new_entry->etype |= 4063 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 4064 4065 if (new_entry->aref.ar_amap) { 4066 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 4067 atop(new_entry->end - new_entry->start), 0); 4068 new_entry->aref.ar_amap = NULL; 4069 new_entry->aref.ar_pageoff = 0; 4070 } 4071 4072 if (UVM_ET_ISOBJ(new_entry)) { 4073 if (new_entry->object.uvm_obj->pgops->pgo_detach) 4074 new_entry->object.uvm_obj->pgops->pgo_detach( 4075 new_entry->object.uvm_obj); 4076 new_entry->object.uvm_obj = NULL; 4077 new_entry->etype &= ~UVM_ET_OBJ; 4078 } 4079 4080 return (new_entry); 4081 } 4082 4083 /* 4084 * uvmspace_fork: fork a process' main map 4085 * 4086 * => create a new vmspace for child process from parent. 4087 * => parent's map must not be locked. 4088 */ 4089 struct vmspace * 4090 uvmspace_fork(struct process *pr) 4091 { 4092 struct vmspace *vm1 = pr->ps_vmspace; 4093 struct vmspace *vm2; 4094 struct vm_map *old_map = &vm1->vm_map; 4095 struct vm_map *new_map; 4096 struct vm_map_entry *old_entry, *new_entry; 4097 struct uvm_map_deadq dead; 4098 4099 vm_map_lock(old_map); 4100 4101 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 4102 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 4103 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 4104 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 4105 vm2->vm_dused = 0; /* Statistic managed by us. */ 4106 new_map = &vm2->vm_map; 4107 vm_map_lock(new_map); 4108 4109 /* go entry-by-entry */ 4110 TAILQ_INIT(&dead); 4111 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 4112 if (old_entry->start == old_entry->end) 4113 continue; 4114 4115 /* first, some sanity checks on the old entry */ 4116 if (UVM_ET_ISSUBMAP(old_entry)) { 4117 panic("fork: encountered a submap during fork " 4118 "(illegal)"); 4119 } 4120 4121 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 4122 UVM_ET_ISNEEDSCOPY(old_entry)) { 4123 panic("fork: non-copy_on_write map entry marked " 4124 "needs_copy (illegal)"); 4125 } 4126 4127 /* Apply inheritance. */ 4128 switch (old_entry->inheritance) { 4129 case MAP_INHERIT_SHARE: 4130 new_entry = uvm_mapent_forkshared(vm2, new_map, 4131 old_map, old_entry, &dead); 4132 break; 4133 case MAP_INHERIT_COPY: 4134 new_entry = uvm_mapent_forkcopy(vm2, new_map, 4135 old_map, old_entry, &dead); 4136 break; 4137 case MAP_INHERIT_ZERO: 4138 new_entry = uvm_mapent_forkzero(vm2, new_map, 4139 old_map, old_entry, &dead); 4140 break; 4141 default: 4142 continue; 4143 } 4144 4145 /* Update process statistics. */ 4146 if (!UVM_ET_ISHOLE(new_entry)) 4147 new_map->size += new_entry->end - new_entry->start; 4148 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) && 4149 new_entry->protection != PROT_NONE) { 4150 vm2->vm_dused += uvmspace_dused( 4151 new_map, new_entry->start, new_entry->end); 4152 } 4153 } 4154 4155 vm_map_unlock(old_map); 4156 vm_map_unlock(new_map); 4157 4158 /* 4159 * This can actually happen, if multiple entries described a 4160 * space in which an entry was inherited. 4161 */ 4162 uvm_unmap_detach(&dead, 0); 4163 4164 #ifdef SYSVSHM 4165 if (vm1->vm_shm) 4166 shmfork(vm1, vm2); 4167 #endif 4168 4169 return vm2; 4170 } 4171 4172 /* 4173 * uvm_map_hint: return the beginning of the best area suitable for 4174 * creating a new mapping with "prot" protection. 4175 */ 4176 vaddr_t 4177 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr, 4178 vaddr_t maxaddr) 4179 { 4180 vaddr_t addr; 4181 vaddr_t spacing; 4182 4183 #ifdef __i386__ 4184 /* 4185 * If executable skip first two pages, otherwise start 4186 * after data + heap region. 4187 */ 4188 if ((prot & PROT_EXEC) != 0 && 4189 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 4190 addr = (PAGE_SIZE*2) + 4191 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 4192 return (round_page(addr)); 4193 } 4194 #endif 4195 4196 #if defined (__LP64__) 4197 spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4198 #else 4199 spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4200 #endif 4201 4202 /* 4203 * Start malloc/mmap after the brk. 4204 */ 4205 addr = (vaddr_t)vm->vm_daddr + BRKSIZ; 4206 addr = MAX(addr, minaddr); 4207 4208 if (addr < maxaddr) { 4209 while (spacing > maxaddr - addr) 4210 spacing >>= 1; 4211 } 4212 addr += arc4random() & spacing; 4213 return (round_page(addr)); 4214 } 4215 4216 /* 4217 * uvm_map_submap: punch down part of a map into a submap 4218 * 4219 * => only the kernel_map is allowed to be submapped 4220 * => the purpose of submapping is to break up the locking granularity 4221 * of a larger map 4222 * => the range specified must have been mapped previously with a uvm_map() 4223 * call [with uobj==NULL] to create a blank map entry in the main map. 4224 * [And it had better still be blank!] 4225 * => maps which contain submaps should never be copied or forked. 4226 * => to remove a submap, use uvm_unmap() on the main map 4227 * and then uvm_map_deallocate() the submap. 4228 * => main map must be unlocked. 4229 * => submap must have been init'd and have a zero reference count. 4230 * [need not be locked as we don't actually reference it] 4231 */ 4232 int 4233 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 4234 struct vm_map *submap) 4235 { 4236 struct vm_map_entry *entry; 4237 int result; 4238 4239 if (start > map->max_offset || end > map->max_offset || 4240 start < map->min_offset || end < map->min_offset) 4241 return EINVAL; 4242 4243 vm_map_lock(map); 4244 4245 if (uvm_map_lookup_entry(map, start, &entry)) { 4246 UVM_MAP_CLIP_START(map, entry, start); 4247 UVM_MAP_CLIP_END(map, entry, end); 4248 } else 4249 entry = NULL; 4250 4251 if (entry != NULL && 4252 entry->start == start && entry->end == end && 4253 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 4254 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 4255 entry->etype |= UVM_ET_SUBMAP; 4256 entry->object.sub_map = submap; 4257 entry->offset = 0; 4258 uvm_map_reference(submap); 4259 result = 0; 4260 } else 4261 result = EINVAL; 4262 4263 vm_map_unlock(map); 4264 return result; 4265 } 4266 4267 /* 4268 * uvm_map_checkprot: check protection in map 4269 * 4270 * => must allow specific protection in a fully allocated region. 4271 * => map mut be read or write locked by caller. 4272 */ 4273 boolean_t 4274 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 4275 vm_prot_t protection) 4276 { 4277 struct vm_map_entry *entry; 4278 4279 if (start < map->min_offset || end > map->max_offset || start > end) 4280 return FALSE; 4281 if (start == end) 4282 return TRUE; 4283 4284 /* 4285 * Iterate entries. 4286 */ 4287 for (entry = uvm_map_entrybyaddr(&map->addr, start); 4288 entry != NULL && entry->start < end; 4289 entry = RBT_NEXT(uvm_map_addr, entry)) { 4290 /* Fail if a hole is found. */ 4291 if (UVM_ET_ISHOLE(entry) || 4292 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 4293 return FALSE; 4294 4295 /* Check protection. */ 4296 if ((entry->protection & protection) != protection) 4297 return FALSE; 4298 } 4299 return TRUE; 4300 } 4301 4302 /* 4303 * uvm_map_create: create map 4304 */ 4305 vm_map_t 4306 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 4307 { 4308 vm_map_t map; 4309 4310 map = malloc(sizeof *map, M_VMMAP, M_WAITOK); 4311 uvm_map_setup(map, pmap, min, max, flags); 4312 return (map); 4313 } 4314 4315 /* 4316 * uvm_map_deallocate: drop reference to a map 4317 * 4318 * => caller must not lock map 4319 * => we will zap map if ref count goes to zero 4320 */ 4321 void 4322 uvm_map_deallocate(vm_map_t map) 4323 { 4324 int c; 4325 struct uvm_map_deadq dead; 4326 4327 c = atomic_dec_int_nv(&map->ref_count); 4328 if (c > 0) { 4329 return; 4330 } 4331 4332 /* 4333 * all references gone. unmap and free. 4334 * 4335 * No lock required: we are only one to access this map. 4336 */ 4337 TAILQ_INIT(&dead); 4338 uvm_tree_sanity(map, __FILE__, __LINE__); 4339 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 4340 TRUE, FALSE); 4341 pmap_destroy(map->pmap); 4342 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 4343 free(map, M_VMMAP, sizeof *map); 4344 4345 uvm_unmap_detach(&dead, 0); 4346 } 4347 4348 /* 4349 * uvm_map_inherit: set inheritance code for range of addrs in map. 4350 * 4351 * => map must be unlocked 4352 * => note that the inherit code is used during a "fork". see fork 4353 * code for details. 4354 */ 4355 int 4356 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 4357 vm_inherit_t new_inheritance) 4358 { 4359 struct vm_map_entry *entry; 4360 4361 switch (new_inheritance) { 4362 case MAP_INHERIT_NONE: 4363 case MAP_INHERIT_COPY: 4364 case MAP_INHERIT_SHARE: 4365 case MAP_INHERIT_ZERO: 4366 break; 4367 default: 4368 return (EINVAL); 4369 } 4370 4371 if (start > end) 4372 return EINVAL; 4373 start = MAX(start, map->min_offset); 4374 end = MIN(end, map->max_offset); 4375 if (start >= end) 4376 return 0; 4377 4378 vm_map_lock(map); 4379 4380 entry = uvm_map_entrybyaddr(&map->addr, start); 4381 if (entry->end > start) 4382 UVM_MAP_CLIP_START(map, entry, start); 4383 else 4384 entry = RBT_NEXT(uvm_map_addr, entry); 4385 4386 while (entry != NULL && entry->start < end) { 4387 UVM_MAP_CLIP_END(map, entry, end); 4388 entry->inheritance = new_inheritance; 4389 entry = RBT_NEXT(uvm_map_addr, entry); 4390 } 4391 4392 vm_map_unlock(map); 4393 return (0); 4394 } 4395 4396 /* 4397 * uvm_map_syscall: permit system calls for range of addrs in map. 4398 * 4399 * => map must be unlocked 4400 */ 4401 int 4402 uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end) 4403 { 4404 struct vm_map_entry *entry; 4405 4406 if (start > end) 4407 return EINVAL; 4408 start = MAX(start, map->min_offset); 4409 end = MIN(end, map->max_offset); 4410 if (start >= end) 4411 return 0; 4412 if (map->flags & VM_MAP_SYSCALL_ONCE) /* only allowed once */ 4413 return (EPERM); 4414 4415 vm_map_lock(map); 4416 4417 entry = uvm_map_entrybyaddr(&map->addr, start); 4418 if (entry->end > start) 4419 UVM_MAP_CLIP_START(map, entry, start); 4420 else 4421 entry = RBT_NEXT(uvm_map_addr, entry); 4422 4423 while (entry != NULL && entry->start < end) { 4424 UVM_MAP_CLIP_END(map, entry, end); 4425 entry->etype |= UVM_ET_SYSCALL; 4426 entry = RBT_NEXT(uvm_map_addr, entry); 4427 } 4428 4429 map->wserial++; 4430 map->flags |= VM_MAP_SYSCALL_ONCE; 4431 vm_map_unlock(map); 4432 return (0); 4433 } 4434 4435 /* 4436 * uvm_map_advice: set advice code for range of addrs in map. 4437 * 4438 * => map must be unlocked 4439 */ 4440 int 4441 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 4442 { 4443 struct vm_map_entry *entry; 4444 4445 switch (new_advice) { 4446 case MADV_NORMAL: 4447 case MADV_RANDOM: 4448 case MADV_SEQUENTIAL: 4449 break; 4450 default: 4451 return (EINVAL); 4452 } 4453 4454 if (start > end) 4455 return EINVAL; 4456 start = MAX(start, map->min_offset); 4457 end = MIN(end, map->max_offset); 4458 if (start >= end) 4459 return 0; 4460 4461 vm_map_lock(map); 4462 4463 entry = uvm_map_entrybyaddr(&map->addr, start); 4464 if (entry != NULL && entry->end > start) 4465 UVM_MAP_CLIP_START(map, entry, start); 4466 else if (entry!= NULL) 4467 entry = RBT_NEXT(uvm_map_addr, entry); 4468 4469 /* 4470 * XXXJRT: disallow holes? 4471 */ 4472 while (entry != NULL && entry->start < end) { 4473 UVM_MAP_CLIP_END(map, entry, end); 4474 entry->advice = new_advice; 4475 entry = RBT_NEXT(uvm_map_addr, entry); 4476 } 4477 4478 vm_map_unlock(map); 4479 return (0); 4480 } 4481 4482 /* 4483 * uvm_map_extract: extract a mapping from a map and put it somewhere 4484 * in the kernel_map, setting protection to max_prot. 4485 * 4486 * => map should be unlocked (we will write lock it and kernel_map) 4487 * => returns 0 on success, error code otherwise 4488 * => start must be page aligned 4489 * => len must be page sized 4490 * => flags: 4491 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 4492 * Mappings are QREF's. 4493 */ 4494 int 4495 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 4496 vaddr_t *dstaddrp, int flags) 4497 { 4498 struct uvm_map_deadq dead; 4499 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 4500 vaddr_t dstaddr; 4501 vaddr_t end; 4502 vaddr_t cp_start; 4503 vsize_t cp_len, cp_off; 4504 int error; 4505 4506 TAILQ_INIT(&dead); 4507 end = start + len; 4508 4509 /* 4510 * Sanity check on the parameters. 4511 * Also, since the mapping may not contain gaps, error out if the 4512 * mapped area is not in source map. 4513 */ 4514 if ((start & (vaddr_t)PAGE_MASK) != 0 || 4515 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 4516 return EINVAL; 4517 if (start < srcmap->min_offset || end > srcmap->max_offset) 4518 return EINVAL; 4519 4520 /* Initialize dead entries. Handle len == 0 case. */ 4521 if (len == 0) 4522 return 0; 4523 4524 /* Acquire lock on srcmap. */ 4525 vm_map_lock(srcmap); 4526 4527 /* Lock srcmap, lookup first and last entry in <start,len>. */ 4528 first = uvm_map_entrybyaddr(&srcmap->addr, start); 4529 4530 /* Check that the range is contiguous. */ 4531 for (entry = first; entry != NULL && entry->end < end; 4532 entry = RBT_NEXT(uvm_map_addr, entry)) { 4533 if (VMMAP_FREE_END(entry) != entry->end || 4534 UVM_ET_ISHOLE(entry)) { 4535 error = EINVAL; 4536 goto fail; 4537 } 4538 } 4539 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 4540 error = EINVAL; 4541 goto fail; 4542 } 4543 4544 /* 4545 * Handle need-copy flag. 4546 */ 4547 for (entry = first; entry != NULL && entry->start < end; 4548 entry = RBT_NEXT(uvm_map_addr, entry)) { 4549 if (UVM_ET_ISNEEDSCOPY(entry)) 4550 amap_copy(srcmap, entry, M_NOWAIT, 4551 UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end); 4552 if (UVM_ET_ISNEEDSCOPY(entry)) { 4553 /* 4554 * amap_copy failure 4555 */ 4556 error = ENOMEM; 4557 goto fail; 4558 } 4559 } 4560 4561 /* Lock destination map (kernel_map). */ 4562 vm_map_lock(kernel_map); 4563 4564 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 4565 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 4566 PROT_NONE, 0) != 0) { 4567 error = ENOMEM; 4568 goto fail2; 4569 } 4570 *dstaddrp = dstaddr; 4571 4572 /* 4573 * We now have srcmap and kernel_map locked. 4574 * dstaddr contains the destination offset in dstmap. 4575 */ 4576 /* step 1: start looping through map entries, performing extraction. */ 4577 for (entry = first; entry != NULL && entry->start < end; 4578 entry = RBT_NEXT(uvm_map_addr, entry)) { 4579 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 4580 if (UVM_ET_ISHOLE(entry)) 4581 continue; 4582 4583 /* Calculate uvm_mapent_clone parameters. */ 4584 cp_start = entry->start; 4585 if (cp_start < start) { 4586 cp_off = start - cp_start; 4587 cp_start = start; 4588 } else 4589 cp_off = 0; 4590 cp_len = MIN(entry->end, end) - cp_start; 4591 4592 newentry = uvm_mapent_clone(kernel_map, 4593 cp_start - start + dstaddr, cp_len, cp_off, 4594 entry->protection, entry->max_protection, 4595 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4596 if (newentry == NULL) { 4597 error = ENOMEM; 4598 goto fail2_unmap; 4599 } 4600 kernel_map->size += cp_len; 4601 if (flags & UVM_EXTRACT_FIXPROT) 4602 newentry->protection = newentry->max_protection; 4603 4604 /* 4605 * Step 2: perform pmap copy. 4606 * (Doing this in the loop saves one RB traversal.) 4607 */ 4608 pmap_copy(kernel_map->pmap, srcmap->pmap, 4609 cp_start - start + dstaddr, cp_len, cp_start); 4610 } 4611 pmap_update(kernel_map->pmap); 4612 4613 error = 0; 4614 4615 /* Unmap copied entries on failure. */ 4616 fail2_unmap: 4617 if (error) { 4618 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4619 FALSE, TRUE); 4620 } 4621 4622 /* Release maps, release dead entries. */ 4623 fail2: 4624 vm_map_unlock(kernel_map); 4625 4626 fail: 4627 vm_map_unlock(srcmap); 4628 4629 uvm_unmap_detach(&dead, 0); 4630 4631 return error; 4632 } 4633 4634 /* 4635 * uvm_map_clean: clean out a map range 4636 * 4637 * => valid flags: 4638 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4639 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4640 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4641 * if (flags & PGO_FREE): any cached pages are freed after clean 4642 * => returns an error if any part of the specified range isn't mapped 4643 * => never a need to flush amap layer since the anonymous memory has 4644 * no permanent home, but may deactivate pages there 4645 * => called from sys_msync() and sys_madvise() 4646 * => caller must not write-lock map (read OK). 4647 * => we may sleep while cleaning if SYNCIO [with map read-locked] 4648 */ 4649 4650 int 4651 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4652 { 4653 struct vm_map_entry *first, *entry; 4654 struct vm_amap *amap; 4655 struct vm_anon *anon; 4656 struct vm_page *pg; 4657 struct uvm_object *uobj; 4658 vaddr_t cp_start, cp_end; 4659 int refs; 4660 int error; 4661 boolean_t rv; 4662 4663 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4664 (PGO_FREE|PGO_DEACTIVATE)); 4665 4666 if (start > end || start < map->min_offset || end > map->max_offset) 4667 return EINVAL; 4668 4669 vm_map_lock_read(map); 4670 first = uvm_map_entrybyaddr(&map->addr, start); 4671 4672 /* Make a first pass to check for holes. */ 4673 for (entry = first; entry != NULL && entry->start < end; 4674 entry = RBT_NEXT(uvm_map_addr, entry)) { 4675 if (UVM_ET_ISSUBMAP(entry)) { 4676 vm_map_unlock_read(map); 4677 return EINVAL; 4678 } 4679 if (UVM_ET_ISSUBMAP(entry) || 4680 UVM_ET_ISHOLE(entry) || 4681 (entry->end < end && 4682 VMMAP_FREE_END(entry) != entry->end)) { 4683 vm_map_unlock_read(map); 4684 return EFAULT; 4685 } 4686 } 4687 4688 error = 0; 4689 for (entry = first; entry != NULL && entry->start < end; 4690 entry = RBT_NEXT(uvm_map_addr, entry)) { 4691 amap = entry->aref.ar_amap; /* top layer */ 4692 if (UVM_ET_ISOBJ(entry)) 4693 uobj = entry->object.uvm_obj; 4694 else 4695 uobj = NULL; 4696 4697 /* 4698 * No amap cleaning necessary if: 4699 * - there's no amap 4700 * - we're not deactivating or freeing pages. 4701 */ 4702 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4703 goto flush_object; 4704 4705 cp_start = MAX(entry->start, start); 4706 cp_end = MIN(entry->end, end); 4707 4708 amap_lock(amap); 4709 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4710 anon = amap_lookup(&entry->aref, 4711 cp_start - entry->start); 4712 if (anon == NULL) 4713 continue; 4714 4715 KASSERT(anon->an_lock == amap->am_lock); 4716 pg = anon->an_page; 4717 if (pg == NULL) { 4718 continue; 4719 } 4720 KASSERT(pg->pg_flags & PQ_ANON); 4721 4722 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4723 /* 4724 * XXX In these first 3 cases, we always just 4725 * XXX deactivate the page. We may want to 4726 * XXX handle the different cases more 4727 * XXX specifically, in the future. 4728 */ 4729 case PGO_CLEANIT|PGO_FREE: 4730 case PGO_CLEANIT|PGO_DEACTIVATE: 4731 case PGO_DEACTIVATE: 4732 deactivate_it: 4733 /* skip the page if it's wired */ 4734 if (pg->wire_count != 0) 4735 break; 4736 4737 uvm_lock_pageq(); 4738 4739 KASSERT(pg->uanon == anon); 4740 4741 /* zap all mappings for the page. */ 4742 pmap_page_protect(pg, PROT_NONE); 4743 4744 /* ...and deactivate the page. */ 4745 uvm_pagedeactivate(pg); 4746 4747 uvm_unlock_pageq(); 4748 break; 4749 case PGO_FREE: 4750 /* 4751 * If there are multiple references to 4752 * the amap, just deactivate the page. 4753 */ 4754 if (amap_refs(amap) > 1) 4755 goto deactivate_it; 4756 4757 /* XXX skip the page if it's wired */ 4758 if (pg->wire_count != 0) { 4759 break; 4760 } 4761 amap_unadd(&entry->aref, 4762 cp_start - entry->start); 4763 refs = --anon->an_ref; 4764 if (refs == 0) 4765 uvm_anfree(anon); 4766 break; 4767 default: 4768 panic("uvm_map_clean: weird flags"); 4769 } 4770 } 4771 amap_unlock(amap); 4772 4773 flush_object: 4774 cp_start = MAX(entry->start, start); 4775 cp_end = MIN(entry->end, end); 4776 4777 /* 4778 * flush pages if we've got a valid backing object. 4779 * 4780 * Don't PGO_FREE if we don't have write permission 4781 * and don't flush if this is a copy-on-write object 4782 * since we can't know our permissions on it. 4783 */ 4784 if (uobj != NULL && 4785 ((flags & PGO_FREE) == 0 || 4786 ((entry->max_protection & PROT_WRITE) != 0 && 4787 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4788 rw_enter(uobj->vmobjlock, RW_WRITE); 4789 rv = uobj->pgops->pgo_flush(uobj, 4790 cp_start - entry->start + entry->offset, 4791 cp_end - entry->start + entry->offset, flags); 4792 rw_exit(uobj->vmobjlock); 4793 4794 if (rv == FALSE) 4795 error = EFAULT; 4796 } 4797 } 4798 4799 vm_map_unlock_read(map); 4800 return error; 4801 } 4802 4803 /* 4804 * UVM_MAP_CLIP_END implementation 4805 */ 4806 void 4807 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4808 { 4809 struct vm_map_entry *tmp; 4810 4811 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4812 tmp = uvm_mapent_alloc(map, 0); 4813 4814 /* Invoke splitentry. */ 4815 uvm_map_splitentry(map, entry, tmp, addr); 4816 } 4817 4818 /* 4819 * UVM_MAP_CLIP_START implementation 4820 * 4821 * Clippers are required to not change the pointers to the entry they are 4822 * clipping on. 4823 * Since uvm_map_splitentry turns the original entry into the lowest 4824 * entry (address wise) we do a swap between the new entry and the original 4825 * entry, prior to calling uvm_map_splitentry. 4826 */ 4827 void 4828 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4829 { 4830 struct vm_map_entry *tmp; 4831 struct uvm_addr_state *free; 4832 4833 /* Unlink original. */ 4834 free = uvm_map_uaddr_e(map, entry); 4835 uvm_mapent_free_remove(map, free, entry); 4836 uvm_mapent_addr_remove(map, entry); 4837 4838 /* Copy entry. */ 4839 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4840 tmp = uvm_mapent_alloc(map, 0); 4841 uvm_mapent_copy(entry, tmp); 4842 4843 /* Put new entry in place of original entry. */ 4844 uvm_mapent_addr_insert(map, tmp); 4845 uvm_mapent_free_insert(map, free, tmp); 4846 4847 /* Invoke splitentry. */ 4848 uvm_map_splitentry(map, tmp, entry, addr); 4849 } 4850 4851 /* 4852 * Boundary fixer. 4853 */ 4854 static inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4855 static inline vaddr_t 4856 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4857 { 4858 return (min < bound && max > bound) ? bound : max; 4859 } 4860 4861 /* 4862 * Choose free list based on address at start of free space. 4863 * 4864 * The uvm_addr_state returned contains addr and is the first of: 4865 * - uaddr_exe 4866 * - uaddr_brk_stack 4867 * - uaddr_any 4868 */ 4869 struct uvm_addr_state* 4870 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4871 { 4872 struct uvm_addr_state *uaddr; 4873 int i; 4874 4875 /* Special case the first page, to prevent mmap from returning 0. */ 4876 if (addr < VMMAP_MIN_ADDR) 4877 return NULL; 4878 4879 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4880 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4881 if (addr >= uvm_maxkaddr) 4882 return NULL; 4883 } 4884 4885 /* Is the address inside the exe-only map? */ 4886 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4887 addr < map->uaddr_exe->uaddr_maxaddr) 4888 return map->uaddr_exe; 4889 4890 /* Check if the space falls inside brk/stack area. */ 4891 if ((addr >= map->b_start && addr < map->b_end) || 4892 (addr >= map->s_start && addr < map->s_end)) { 4893 if (map->uaddr_brk_stack != NULL && 4894 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4895 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4896 return map->uaddr_brk_stack; 4897 } else 4898 return NULL; 4899 } 4900 4901 /* 4902 * Check the other selectors. 4903 * 4904 * These selectors are only marked as the owner, if they have insert 4905 * functions. 4906 */ 4907 for (i = 0; i < nitems(map->uaddr_any); i++) { 4908 uaddr = map->uaddr_any[i]; 4909 if (uaddr == NULL) 4910 continue; 4911 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4912 continue; 4913 4914 if (addr >= uaddr->uaddr_minaddr && 4915 addr < uaddr->uaddr_maxaddr) 4916 return uaddr; 4917 } 4918 4919 return NULL; 4920 } 4921 4922 /* 4923 * Choose free list based on address at start of free space. 4924 * 4925 * The uvm_addr_state returned contains addr and is the first of: 4926 * - uaddr_exe 4927 * - uaddr_brk_stack 4928 * - uaddr_any 4929 */ 4930 struct uvm_addr_state* 4931 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4932 { 4933 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4934 } 4935 4936 /* 4937 * Returns the first free-memory boundary that is crossed by [min-max]. 4938 */ 4939 vsize_t 4940 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4941 { 4942 struct uvm_addr_state *uaddr; 4943 int i; 4944 4945 /* Never return first page. */ 4946 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4947 4948 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4949 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4950 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4951 4952 /* Check for exe-only boundaries. */ 4953 if (map->uaddr_exe != NULL) { 4954 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4955 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4956 } 4957 4958 /* Check for exe-only boundaries. */ 4959 if (map->uaddr_brk_stack != NULL) { 4960 max = uvm_map_boundfix(min, max, 4961 map->uaddr_brk_stack->uaddr_minaddr); 4962 max = uvm_map_boundfix(min, max, 4963 map->uaddr_brk_stack->uaddr_maxaddr); 4964 } 4965 4966 /* Check other boundaries. */ 4967 for (i = 0; i < nitems(map->uaddr_any); i++) { 4968 uaddr = map->uaddr_any[i]; 4969 if (uaddr != NULL) { 4970 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4971 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4972 } 4973 } 4974 4975 /* Boundaries at stack and brk() area. */ 4976 max = uvm_map_boundfix(min, max, map->s_start); 4977 max = uvm_map_boundfix(min, max, map->s_end); 4978 max = uvm_map_boundfix(min, max, map->b_start); 4979 max = uvm_map_boundfix(min, max, map->b_end); 4980 4981 return max; 4982 } 4983 4984 /* 4985 * Update map allocation start and end addresses from proc vmspace. 4986 */ 4987 void 4988 uvm_map_vmspace_update(struct vm_map *map, 4989 struct uvm_map_deadq *dead, int flags) 4990 { 4991 struct vmspace *vm; 4992 vaddr_t b_start, b_end, s_start, s_end; 4993 4994 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4995 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4996 4997 /* 4998 * Derive actual allocation boundaries from vmspace. 4999 */ 5000 vm = (struct vmspace *)map; 5001 b_start = (vaddr_t)vm->vm_daddr; 5002 b_end = b_start + BRKSIZ; 5003 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 5004 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 5005 #ifdef DIAGNOSTIC 5006 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 5007 (b_end & (vaddr_t)PAGE_MASK) != 0 || 5008 (s_start & (vaddr_t)PAGE_MASK) != 0 || 5009 (s_end & (vaddr_t)PAGE_MASK) != 0) { 5010 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 5011 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 5012 vm, b_start, b_end, s_start, s_end); 5013 } 5014 #endif 5015 5016 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 5017 map->s_start == s_start && map->s_end == s_end)) 5018 return; 5019 5020 uvm_map_freelist_update(map, dead, b_start, b_end, 5021 s_start, s_end, flags); 5022 } 5023 5024 /* 5025 * Grow kernel memory. 5026 * 5027 * This function is only called for kernel maps when an allocation fails. 5028 * 5029 * If the map has a gap that is large enough to accommodate alloc_sz, this 5030 * function will make sure map->free will include it. 5031 */ 5032 void 5033 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 5034 vsize_t alloc_sz, int flags) 5035 { 5036 vsize_t sz; 5037 vaddr_t end; 5038 struct vm_map_entry *entry; 5039 5040 /* Kernel memory only. */ 5041 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 5042 /* Destroy free list. */ 5043 uvm_map_freelist_update_clear(map, dead); 5044 5045 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 5046 if (map->flags & VM_MAP_GUARDPAGES) 5047 alloc_sz += PAGE_SIZE; 5048 5049 /* 5050 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 5051 * 5052 * Don't handle the case where the multiplication overflows: 5053 * if that happens, the allocation is probably too big anyway. 5054 */ 5055 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 5056 5057 /* 5058 * Walk forward until a gap large enough for alloc_sz shows up. 5059 * 5060 * We assume the kernel map has no boundaries. 5061 * uvm_maxkaddr may be zero. 5062 */ 5063 end = MAX(uvm_maxkaddr, map->min_offset); 5064 entry = uvm_map_entrybyaddr(&map->addr, end); 5065 while (entry && entry->fspace < alloc_sz) 5066 entry = RBT_NEXT(uvm_map_addr, entry); 5067 if (entry) { 5068 end = MAX(VMMAP_FREE_START(entry), end); 5069 end += MIN(sz, map->max_offset - end); 5070 } else 5071 end = map->max_offset; 5072 5073 /* Reserve pmap entries. */ 5074 #ifdef PMAP_GROWKERNEL 5075 uvm_maxkaddr = pmap_growkernel(end); 5076 #else 5077 uvm_maxkaddr = MAX(uvm_maxkaddr, end); 5078 #endif 5079 5080 /* Rebuild free list. */ 5081 uvm_map_freelist_update_refill(map, flags); 5082 } 5083 5084 /* 5085 * Freelist update subfunction: unlink all entries from freelists. 5086 */ 5087 void 5088 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 5089 { 5090 struct uvm_addr_state *free; 5091 struct vm_map_entry *entry, *prev, *next; 5092 5093 prev = NULL; 5094 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL; 5095 entry = next) { 5096 next = RBT_NEXT(uvm_map_addr, entry); 5097 5098 free = uvm_map_uaddr_e(map, entry); 5099 uvm_mapent_free_remove(map, free, entry); 5100 5101 if (prev != NULL && entry->start == entry->end) { 5102 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 5103 uvm_mapent_addr_remove(map, entry); 5104 DEAD_ENTRY_PUSH(dead, entry); 5105 } else 5106 prev = entry; 5107 } 5108 } 5109 5110 /* 5111 * Freelist update subfunction: refill the freelists with entries. 5112 */ 5113 void 5114 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 5115 { 5116 struct vm_map_entry *entry; 5117 vaddr_t min, max; 5118 5119 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5120 min = VMMAP_FREE_START(entry); 5121 max = VMMAP_FREE_END(entry); 5122 entry->fspace = 0; 5123 5124 entry = uvm_map_fix_space(map, entry, min, max, flags); 5125 } 5126 5127 uvm_tree_sanity(map, __FILE__, __LINE__); 5128 } 5129 5130 /* 5131 * Change {a,b}_{start,end} allocation ranges and associated free lists. 5132 */ 5133 void 5134 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 5135 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 5136 { 5137 KDASSERT(b_end >= b_start && s_end >= s_start); 5138 5139 /* Clear all free lists. */ 5140 uvm_map_freelist_update_clear(map, dead); 5141 5142 /* Apply new bounds. */ 5143 map->b_start = b_start; 5144 map->b_end = b_end; 5145 map->s_start = s_start; 5146 map->s_end = s_end; 5147 5148 /* Refill free lists. */ 5149 uvm_map_freelist_update_refill(map, flags); 5150 } 5151 5152 /* 5153 * Assign a uvm_addr_state to the specified pointer in vm_map. 5154 * 5155 * May sleep. 5156 */ 5157 void 5158 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 5159 struct uvm_addr_state *newval) 5160 { 5161 struct uvm_map_deadq dead; 5162 5163 /* Pointer which must be in this map. */ 5164 KASSERT(which != NULL); 5165 KASSERT((void*)map <= (void*)(which) && 5166 (void*)(which) < (void*)(map + 1)); 5167 5168 vm_map_lock(map); 5169 TAILQ_INIT(&dead); 5170 uvm_map_freelist_update_clear(map, &dead); 5171 5172 uvm_addr_destroy(*which); 5173 *which = newval; 5174 5175 uvm_map_freelist_update_refill(map, 0); 5176 vm_map_unlock(map); 5177 uvm_unmap_detach(&dead, 0); 5178 } 5179 5180 /* 5181 * Correct space insert. 5182 * 5183 * Entry must not be on any freelist. 5184 */ 5185 struct vm_map_entry* 5186 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 5187 vaddr_t min, vaddr_t max, int flags) 5188 { 5189 struct uvm_addr_state *free, *entfree; 5190 vaddr_t lmax; 5191 5192 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 5193 KDASSERT(min <= max); 5194 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 5195 min == map->min_offset); 5196 5197 /* 5198 * During the function, entfree will always point at the uaddr state 5199 * for entry. 5200 */ 5201 entfree = (entry == NULL ? NULL : 5202 uvm_map_uaddr_e(map, entry)); 5203 5204 while (min != max) { 5205 /* Claim guard page for entry. */ 5206 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 5207 VMMAP_FREE_END(entry) == entry->end && 5208 entry->start != entry->end) { 5209 if (max - min == 2 * PAGE_SIZE) { 5210 /* 5211 * If the free-space gap is exactly 2 pages, 5212 * we make the guard 2 pages instead of 1. 5213 * Because in a guarded map, an area needs 5214 * at least 2 pages to allocate from: 5215 * one page for the allocation and one for 5216 * the guard. 5217 */ 5218 entry->guard = 2 * PAGE_SIZE; 5219 min = max; 5220 } else { 5221 entry->guard = PAGE_SIZE; 5222 min += PAGE_SIZE; 5223 } 5224 continue; 5225 } 5226 5227 /* 5228 * Handle the case where entry has a 2-page guard, but the 5229 * space after entry is freed. 5230 */ 5231 if (entry != NULL && entry->fspace == 0 && 5232 entry->guard > PAGE_SIZE) { 5233 entry->guard = PAGE_SIZE; 5234 min = VMMAP_FREE_START(entry); 5235 } 5236 5237 lmax = uvm_map_boundary(map, min, max); 5238 free = uvm_map_uaddr(map, min); 5239 5240 /* 5241 * Entries are merged if they point at the same uvm_free(). 5242 * Exception to that rule: if min == uvm_maxkaddr, a new 5243 * entry is started regardless (otherwise the allocators 5244 * will get confused). 5245 */ 5246 if (entry != NULL && free == entfree && 5247 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 5248 min == uvm_maxkaddr)) { 5249 KDASSERT(VMMAP_FREE_END(entry) == min); 5250 entry->fspace += lmax - min; 5251 } else { 5252 /* 5253 * Commit entry to free list: it'll not be added to 5254 * anymore. 5255 * We'll start a new entry and add to that entry 5256 * instead. 5257 */ 5258 if (entry != NULL) 5259 uvm_mapent_free_insert(map, entfree, entry); 5260 5261 /* New entry for new uaddr. */ 5262 entry = uvm_mapent_alloc(map, flags); 5263 KDASSERT(entry != NULL); 5264 entry->end = entry->start = min; 5265 entry->guard = 0; 5266 entry->fspace = lmax - min; 5267 entry->object.uvm_obj = NULL; 5268 entry->offset = 0; 5269 entry->etype = 0; 5270 entry->protection = entry->max_protection = 0; 5271 entry->inheritance = 0; 5272 entry->wired_count = 0; 5273 entry->advice = 0; 5274 entry->aref.ar_pageoff = 0; 5275 entry->aref.ar_amap = NULL; 5276 uvm_mapent_addr_insert(map, entry); 5277 5278 entfree = free; 5279 } 5280 5281 min = lmax; 5282 } 5283 /* Finally put entry on the uaddr state. */ 5284 if (entry != NULL) 5285 uvm_mapent_free_insert(map, entfree, entry); 5286 5287 return entry; 5288 } 5289 5290 /* 5291 * MQuery style of allocation. 5292 * 5293 * This allocator searches forward until sufficient space is found to map 5294 * the given size. 5295 * 5296 * XXX: factor in offset (via pmap_prefer) and protection? 5297 */ 5298 int 5299 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 5300 int flags) 5301 { 5302 struct vm_map_entry *entry, *last; 5303 vaddr_t addr; 5304 vaddr_t tmp, pmap_align, pmap_offset; 5305 int error; 5306 5307 addr = *addr_p; 5308 vm_map_lock_read(map); 5309 5310 /* Configure pmap prefer. */ 5311 if (offset != UVM_UNKNOWN_OFFSET) { 5312 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 5313 pmap_offset = PMAP_PREFER_OFFSET(offset); 5314 } else { 5315 pmap_align = PAGE_SIZE; 5316 pmap_offset = 0; 5317 } 5318 5319 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 5320 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 5321 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5322 if (tmp < addr) 5323 tmp += pmap_align; 5324 addr = tmp; 5325 } 5326 5327 /* First, check if the requested range is fully available. */ 5328 entry = uvm_map_entrybyaddr(&map->addr, addr); 5329 last = NULL; 5330 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5331 error = 0; 5332 goto out; 5333 } 5334 if (flags & UVM_FLAG_FIXED) { 5335 error = EINVAL; 5336 goto out; 5337 } 5338 5339 error = ENOMEM; /* Default error from here. */ 5340 5341 /* 5342 * At this point, the memory at <addr, sz> is not available. 5343 * The reasons are: 5344 * [1] it's outside the map, 5345 * [2] it starts in used memory (and therefore needs to move 5346 * toward the first free page in entry), 5347 * [3] it starts in free memory but bumps into used memory. 5348 * 5349 * Note that for case [2], the forward moving is handled by the 5350 * for loop below. 5351 */ 5352 if (entry == NULL) { 5353 /* [1] Outside the map. */ 5354 if (addr >= map->max_offset) 5355 goto out; 5356 else 5357 entry = RBT_MIN(uvm_map_addr, &map->addr); 5358 } else if (VMMAP_FREE_START(entry) <= addr) { 5359 /* [3] Bumped into used memory. */ 5360 entry = RBT_NEXT(uvm_map_addr, entry); 5361 } 5362 5363 /* Test if the next entry is sufficient for the allocation. */ 5364 for (; entry != NULL; 5365 entry = RBT_NEXT(uvm_map_addr, entry)) { 5366 if (entry->fspace == 0) 5367 continue; 5368 addr = VMMAP_FREE_START(entry); 5369 5370 restart: /* Restart address checks on address change. */ 5371 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5372 if (tmp < addr) 5373 tmp += pmap_align; 5374 addr = tmp; 5375 if (addr >= VMMAP_FREE_END(entry)) 5376 continue; 5377 5378 /* Skip brk() allocation addresses. */ 5379 if (addr + sz > map->b_start && addr < map->b_end) { 5380 if (VMMAP_FREE_END(entry) > map->b_end) { 5381 addr = map->b_end; 5382 goto restart; 5383 } else 5384 continue; 5385 } 5386 /* Skip stack allocation addresses. */ 5387 if (addr + sz > map->s_start && addr < map->s_end) { 5388 if (VMMAP_FREE_END(entry) > map->s_end) { 5389 addr = map->s_end; 5390 goto restart; 5391 } else 5392 continue; 5393 } 5394 5395 last = NULL; 5396 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5397 error = 0; 5398 goto out; 5399 } 5400 } 5401 5402 out: 5403 vm_map_unlock_read(map); 5404 if (error == 0) 5405 *addr_p = addr; 5406 return error; 5407 } 5408 5409 boolean_t 5410 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 5411 { 5412 boolean_t rv; 5413 5414 if (map->flags & VM_MAP_INTRSAFE) { 5415 rv = mtx_enter_try(&map->mtx); 5416 } else { 5417 mtx_enter(&map->flags_lock); 5418 if (map->flags & VM_MAP_BUSY) { 5419 mtx_leave(&map->flags_lock); 5420 return (FALSE); 5421 } 5422 mtx_leave(&map->flags_lock); 5423 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); 5424 /* check if the lock is busy and back out if we won the race */ 5425 if (rv) { 5426 mtx_enter(&map->flags_lock); 5427 if (map->flags & VM_MAP_BUSY) { 5428 rw_exit(&map->lock); 5429 rv = FALSE; 5430 } 5431 mtx_leave(&map->flags_lock); 5432 } 5433 } 5434 5435 if (rv) { 5436 map->timestamp++; 5437 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5438 uvm_tree_sanity(map, file, line); 5439 uvm_tree_size_chk(map, file, line); 5440 } 5441 5442 return (rv); 5443 } 5444 5445 void 5446 vm_map_lock_ln(struct vm_map *map, char *file, int line) 5447 { 5448 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5449 do { 5450 mtx_enter(&map->flags_lock); 5451 tryagain: 5452 while (map->flags & VM_MAP_BUSY) { 5453 map->flags |= VM_MAP_WANTLOCK; 5454 msleep_nsec(&map->flags, &map->flags_lock, 5455 PVM, vmmapbsy, INFSLP); 5456 } 5457 mtx_leave(&map->flags_lock); 5458 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); 5459 /* check if the lock is busy and back out if we won the race */ 5460 mtx_enter(&map->flags_lock); 5461 if (map->flags & VM_MAP_BUSY) { 5462 rw_exit(&map->lock); 5463 goto tryagain; 5464 } 5465 mtx_leave(&map->flags_lock); 5466 } else { 5467 mtx_enter(&map->mtx); 5468 } 5469 5470 map->timestamp++; 5471 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5472 uvm_tree_sanity(map, file, line); 5473 uvm_tree_size_chk(map, file, line); 5474 } 5475 5476 void 5477 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 5478 { 5479 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5480 rw_enter_read(&map->lock); 5481 else 5482 mtx_enter(&map->mtx); 5483 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5484 uvm_tree_sanity(map, file, line); 5485 uvm_tree_size_chk(map, file, line); 5486 } 5487 5488 void 5489 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 5490 { 5491 uvm_tree_sanity(map, file, line); 5492 uvm_tree_size_chk(map, file, line); 5493 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5494 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5495 rw_exit(&map->lock); 5496 else 5497 mtx_leave(&map->mtx); 5498 } 5499 5500 void 5501 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 5502 { 5503 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5504 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5505 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5506 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5507 rw_exit_read(&map->lock); 5508 else 5509 mtx_leave(&map->mtx); 5510 } 5511 5512 void 5513 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 5514 { 5515 uvm_tree_sanity(map, file, line); 5516 uvm_tree_size_chk(map, file, line); 5517 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5518 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5519 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5520 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5521 rw_enter(&map->lock, RW_DOWNGRADE); 5522 } 5523 5524 void 5525 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 5526 { 5527 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5528 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5529 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5530 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5531 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5532 rw_exit_read(&map->lock); 5533 rw_enter_write(&map->lock); 5534 } 5535 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5536 uvm_tree_sanity(map, file, line); 5537 } 5538 5539 void 5540 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5541 { 5542 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5543 mtx_enter(&map->flags_lock); 5544 map->flags |= VM_MAP_BUSY; 5545 mtx_leave(&map->flags_lock); 5546 } 5547 5548 void 5549 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5550 { 5551 int oflags; 5552 5553 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5554 mtx_enter(&map->flags_lock); 5555 oflags = map->flags; 5556 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5557 mtx_leave(&map->flags_lock); 5558 if (oflags & VM_MAP_WANTLOCK) 5559 wakeup(&map->flags); 5560 } 5561 5562 #ifndef SMALL_KERNEL 5563 int 5564 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, 5565 size_t *lenp) 5566 { 5567 struct vm_map_entry *entry; 5568 vaddr_t start; 5569 int cnt, maxcnt, error = 0; 5570 5571 KASSERT(*lenp > 0); 5572 KASSERT((*lenp % sizeof(*kve)) == 0); 5573 cnt = 0; 5574 maxcnt = *lenp / sizeof(*kve); 5575 KASSERT(maxcnt > 0); 5576 5577 /* 5578 * Return only entries whose address is above the given base 5579 * address. This allows userland to iterate without knowing the 5580 * number of entries beforehand. 5581 */ 5582 start = (vaddr_t)kve[0].kve_start; 5583 5584 vm_map_lock(map); 5585 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5586 if (cnt == maxcnt) { 5587 error = ENOMEM; 5588 break; 5589 } 5590 if (start != 0 && entry->start < start) 5591 continue; 5592 kve->kve_start = entry->start; 5593 kve->kve_end = entry->end; 5594 kve->kve_guard = entry->guard; 5595 kve->kve_fspace = entry->fspace; 5596 kve->kve_fspace_augment = entry->fspace_augment; 5597 kve->kve_offset = entry->offset; 5598 kve->kve_wired_count = entry->wired_count; 5599 kve->kve_etype = entry->etype; 5600 kve->kve_protection = entry->protection; 5601 kve->kve_max_protection = entry->max_protection; 5602 kve->kve_advice = entry->advice; 5603 kve->kve_inheritance = entry->inheritance; 5604 kve->kve_flags = entry->flags; 5605 kve++; 5606 cnt++; 5607 } 5608 vm_map_unlock(map); 5609 5610 KASSERT(cnt <= maxcnt); 5611 5612 *lenp = sizeof(*kve) * cnt; 5613 return error; 5614 } 5615 #endif 5616 5617 5618 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5619 uvm_mapentry_addrcmp, uvm_map_addr_augment); 5620 5621 5622 /* 5623 * MD code: vmspace allocator setup. 5624 */ 5625 5626 #ifdef __i386__ 5627 void 5628 uvm_map_setup_md(struct vm_map *map) 5629 { 5630 vaddr_t min, max; 5631 5632 min = map->min_offset; 5633 max = map->max_offset; 5634 5635 /* 5636 * Ensure the selectors will not try to manage page 0; 5637 * it's too special. 5638 */ 5639 if (min < VMMAP_MIN_ADDR) 5640 min = VMMAP_MIN_ADDR; 5641 5642 #if 0 /* Cool stuff, not yet */ 5643 /* Executable code is special. */ 5644 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5645 /* Place normal allocations beyond executable mappings. */ 5646 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5647 #else /* Crappy stuff, for now */ 5648 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5649 #endif 5650 5651 #ifndef SMALL_KERNEL 5652 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5653 #endif /* !SMALL_KERNEL */ 5654 } 5655 #elif __LP64__ 5656 void 5657 uvm_map_setup_md(struct vm_map *map) 5658 { 5659 vaddr_t min, max; 5660 5661 min = map->min_offset; 5662 max = map->max_offset; 5663 5664 /* 5665 * Ensure the selectors will not try to manage page 0; 5666 * it's too special. 5667 */ 5668 if (min < VMMAP_MIN_ADDR) 5669 min = VMMAP_MIN_ADDR; 5670 5671 #if 0 /* Cool stuff, not yet */ 5672 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5673 #else /* Crappy stuff, for now */ 5674 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5675 #endif 5676 5677 #ifndef SMALL_KERNEL 5678 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5679 #endif /* !SMALL_KERNEL */ 5680 } 5681 #else /* non-i386, 32 bit */ 5682 void 5683 uvm_map_setup_md(struct vm_map *map) 5684 { 5685 vaddr_t min, max; 5686 5687 min = map->min_offset; 5688 max = map->max_offset; 5689 5690 /* 5691 * Ensure the selectors will not try to manage page 0; 5692 * it's too special. 5693 */ 5694 if (min < VMMAP_MIN_ADDR) 5695 min = VMMAP_MIN_ADDR; 5696 5697 #if 0 /* Cool stuff, not yet */ 5698 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5699 #else /* Crappy stuff, for now */ 5700 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5701 #endif 5702 5703 #ifndef SMALL_KERNEL 5704 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5705 #endif /* !SMALL_KERNEL */ 5706 } 5707 #endif 5708