1 /* $OpenBSD: uvm_map.c,v 1.277 2021/06/17 16:10:39 mpi Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/acct.h> 90 #include <sys/mman.h> 91 #include <sys/proc.h> 92 #include <sys/malloc.h> 93 #include <sys/pool.h> 94 #include <sys/sysctl.h> 95 #include <sys/signalvar.h> 96 #include <sys/syslog.h> 97 #include <sys/user.h> 98 #include <sys/tracepoint.h> 99 100 #ifdef SYSVSHM 101 #include <sys/shm.h> 102 #endif 103 104 #include <uvm/uvm.h> 105 106 #ifdef DDB 107 #include <uvm/uvm_ddb.h> 108 #endif 109 110 #include <uvm/uvm_addr.h> 111 112 113 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 114 int uvm_mapent_isjoinable(struct vm_map*, 115 struct vm_map_entry*, struct vm_map_entry*); 116 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 117 struct vm_map_entry*, struct uvm_map_deadq*); 118 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 119 struct vm_map_entry*, struct uvm_map_deadq*); 120 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 121 struct vm_map_entry*, vaddr_t, vsize_t, int, 122 struct uvm_map_deadq*, struct vm_map_entry*); 123 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 124 void uvm_mapent_free(struct vm_map_entry*); 125 void uvm_unmap_kill_entry(struct vm_map*, 126 struct vm_map_entry*); 127 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); 128 void uvm_mapent_mkfree(struct vm_map*, 129 struct vm_map_entry*, struct vm_map_entry**, 130 struct uvm_map_deadq*, boolean_t); 131 void uvm_map_pageable_pgon(struct vm_map*, 132 struct vm_map_entry*, struct vm_map_entry*, 133 vaddr_t, vaddr_t); 134 int uvm_map_pageable_wire(struct vm_map*, 135 struct vm_map_entry*, struct vm_map_entry*, 136 vaddr_t, vaddr_t, int); 137 void uvm_map_setup_entries(struct vm_map*); 138 void uvm_map_setup_md(struct vm_map*); 139 void uvm_map_teardown(struct vm_map*); 140 void uvm_map_vmspace_update(struct vm_map*, 141 struct uvm_map_deadq*, int); 142 void uvm_map_kmem_grow(struct vm_map*, 143 struct uvm_map_deadq*, vsize_t, int); 144 void uvm_map_freelist_update_clear(struct vm_map*, 145 struct uvm_map_deadq*); 146 void uvm_map_freelist_update_refill(struct vm_map *, int); 147 void uvm_map_freelist_update(struct vm_map*, 148 struct uvm_map_deadq*, vaddr_t, vaddr_t, 149 vaddr_t, vaddr_t, int); 150 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 151 vaddr_t, vaddr_t, int); 152 int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, 153 struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, 154 int); 155 int uvm_map_findspace(struct vm_map*, 156 struct vm_map_entry**, struct vm_map_entry**, 157 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 158 vaddr_t); 159 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 160 void uvm_map_addr_augment(struct vm_map_entry*); 161 162 int uvm_map_inentry_recheck(u_long, vaddr_t, 163 struct p_inentry *); 164 boolean_t uvm_map_inentry_fix(struct proc *, struct p_inentry *, 165 vaddr_t, int (*)(vm_map_entry_t), u_long); 166 /* 167 * Tree management functions. 168 */ 169 170 static inline void uvm_mapent_copy(struct vm_map_entry*, 171 struct vm_map_entry*); 172 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*, 173 const struct vm_map_entry*); 174 void uvm_mapent_free_insert(struct vm_map*, 175 struct uvm_addr_state*, struct vm_map_entry*); 176 void uvm_mapent_free_remove(struct vm_map*, 177 struct uvm_addr_state*, struct vm_map_entry*); 178 void uvm_mapent_addr_insert(struct vm_map*, 179 struct vm_map_entry*); 180 void uvm_mapent_addr_remove(struct vm_map*, 181 struct vm_map_entry*); 182 void uvm_map_splitentry(struct vm_map*, 183 struct vm_map_entry*, struct vm_map_entry*, 184 vaddr_t); 185 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 186 187 /* 188 * uvm_vmspace_fork helper functions. 189 */ 190 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 191 vsize_t, vm_prot_t, vm_prot_t, 192 struct vm_map_entry*, struct uvm_map_deadq*, int, 193 int); 194 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t, 195 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*, 196 struct vm_map_entry*, struct uvm_map_deadq*); 197 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 198 struct vm_map*, struct vm_map_entry*, 199 struct uvm_map_deadq*); 200 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 201 struct vm_map*, struct vm_map_entry*, 202 struct uvm_map_deadq*); 203 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 204 struct vm_map*, struct vm_map_entry*, 205 struct uvm_map_deadq*); 206 207 /* 208 * Tree validation. 209 */ 210 #ifdef VMMAP_DEBUG 211 void uvm_tree_assert(struct vm_map*, int, char*, 212 char*, int); 213 #define UVM_ASSERT(map, cond, file, line) \ 214 uvm_tree_assert((map), (cond), #cond, (file), (line)) 215 void uvm_tree_sanity(struct vm_map*, char*, int); 216 void uvm_tree_size_chk(struct vm_map*, char*, int); 217 void vmspace_validate(struct vm_map*); 218 #else 219 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 220 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 221 #define vmspace_validate(_map) do {} while (0) 222 #endif 223 224 /* 225 * All architectures will have pmap_prefer. 226 */ 227 #ifndef PMAP_PREFER 228 #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE 229 #define PMAP_PREFER_OFFSET(off) 0 230 #define PMAP_PREFER(addr, off) (addr) 231 #endif 232 233 /* 234 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 235 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 236 * 237 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 238 * each time. 239 */ 240 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 241 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 242 #define VM_MAP_KSIZE_ALLOCMUL 4 243 /* 244 * When selecting a random free-space block, look at most FSPACE_DELTA blocks 245 * ahead. 246 */ 247 #define FSPACE_DELTA 8 248 /* 249 * Put allocations adjecent to previous allocations when the free-space tree 250 * is larger than FSPACE_COMPACT entries. 251 * 252 * Alignment and PMAP_PREFER may still cause the entry to not be fully 253 * adjecent. Note that this strategy reduces memory fragmentation (by leaving 254 * a large space before or after the allocation). 255 */ 256 #define FSPACE_COMPACT 128 257 /* 258 * Make the address selection skip at most this many bytes from the start of 259 * the free space in which the allocation takes place. 260 * 261 * The main idea behind a randomized address space is that an attacker cannot 262 * know where to target his attack. Therefore, the location of objects must be 263 * as random as possible. However, the goal is not to create the most sparse 264 * map that is possible. 265 * FSPACE_MAXOFF pushes the considered range in bytes down to less insane 266 * sizes, thereby reducing the sparseness. The biggest randomization comes 267 * from fragmentation, i.e. FSPACE_COMPACT. 268 */ 269 #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) 270 /* 271 * Allow for small gaps in the overflow areas. 272 * Gap size is in bytes and does not have to be a multiple of page-size. 273 */ 274 #define FSPACE_BIASGAP ((vaddr_t)32 * 1024) 275 276 /* auto-allocate address lower bound */ 277 #define VMMAP_MIN_ADDR PAGE_SIZE 278 279 280 #ifdef DEADBEEF0 281 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0) 282 #else 283 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0) 284 #endif 285 286 #ifdef DEBUG 287 int uvm_map_printlocks = 0; 288 289 #define LPRINTF(_args) \ 290 do { \ 291 if (uvm_map_printlocks) \ 292 printf _args; \ 293 } while (0) 294 #else 295 #define LPRINTF(_args) do {} while (0) 296 #endif 297 298 static struct mutex uvm_kmapent_mtx; 299 static struct timeval uvm_kmapent_last_warn_time; 300 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 301 302 const char vmmapbsy[] = "vmmapbsy"; 303 304 /* 305 * pool for vmspace structures. 306 */ 307 struct pool uvm_vmspace_pool; 308 309 /* 310 * pool for dynamically-allocated map entries. 311 */ 312 struct pool uvm_map_entry_pool; 313 struct pool uvm_map_entry_kmem_pool; 314 315 /* 316 * This global represents the end of the kernel virtual address 317 * space. If we want to exceed this, we must grow the kernel 318 * virtual address space dynamically. 319 * 320 * Note, this variable is locked by kernel_map's lock. 321 */ 322 vaddr_t uvm_maxkaddr; 323 324 /* 325 * Locking predicate. 326 */ 327 #define UVM_MAP_REQ_WRITE(_map) \ 328 do { \ 329 if ((_map)->ref_count > 0) { \ 330 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 331 rw_assert_wrlock(&(_map)->lock); \ 332 else \ 333 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \ 334 } \ 335 } while (0) 336 337 #define vm_map_modflags(map, set, clear) \ 338 do { \ 339 mtx_enter(&(map)->flags_lock); \ 340 (map)->flags = ((map)->flags | (set)) & ~(clear); \ 341 mtx_leave(&(map)->flags_lock); \ 342 } while (0) 343 344 345 /* 346 * Tree describing entries by address. 347 * 348 * Addresses are unique. 349 * Entries with start == end may only exist if they are the first entry 350 * (sorted by address) within a free-memory tree. 351 */ 352 353 static inline int 354 uvm_mapentry_addrcmp(const struct vm_map_entry *e1, 355 const struct vm_map_entry *e2) 356 { 357 return e1->start < e2->start ? -1 : e1->start > e2->start; 358 } 359 360 /* 361 * Copy mapentry. 362 */ 363 static inline void 364 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 365 { 366 caddr_t csrc, cdst; 367 size_t sz; 368 369 csrc = (caddr_t)src; 370 cdst = (caddr_t)dst; 371 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 372 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 373 374 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 375 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 376 memcpy(cdst, csrc, sz); 377 } 378 379 /* 380 * Handle free-list insertion. 381 */ 382 void 383 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 384 struct vm_map_entry *entry) 385 { 386 const struct uvm_addr_functions *fun; 387 #ifdef VMMAP_DEBUG 388 vaddr_t min, max, bound; 389 #endif 390 391 #ifdef VMMAP_DEBUG 392 /* 393 * Boundary check. 394 * Boundaries are folded if they go on the same free list. 395 */ 396 min = VMMAP_FREE_START(entry); 397 max = VMMAP_FREE_END(entry); 398 399 while (min < max) { 400 bound = uvm_map_boundary(map, min, max); 401 KASSERT(uvm_map_uaddr(map, min) == uaddr); 402 min = bound; 403 } 404 #endif 405 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 406 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 407 408 UVM_MAP_REQ_WRITE(map); 409 410 /* Actual insert: forward to uaddr pointer. */ 411 if (uaddr != NULL) { 412 fun = uaddr->uaddr_functions; 413 KDASSERT(fun != NULL); 414 if (fun->uaddr_free_insert != NULL) 415 (*fun->uaddr_free_insert)(map, uaddr, entry); 416 entry->etype |= UVM_ET_FREEMAPPED; 417 } 418 419 /* Update fspace augmentation. */ 420 uvm_map_addr_augment(entry); 421 } 422 423 /* 424 * Handle free-list removal. 425 */ 426 void 427 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 428 struct vm_map_entry *entry) 429 { 430 const struct uvm_addr_functions *fun; 431 432 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 433 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 434 UVM_MAP_REQ_WRITE(map); 435 436 if (uaddr != NULL) { 437 fun = uaddr->uaddr_functions; 438 if (fun->uaddr_free_remove != NULL) 439 (*fun->uaddr_free_remove)(map, uaddr, entry); 440 entry->etype &= ~UVM_ET_FREEMAPPED; 441 } 442 } 443 444 /* 445 * Handle address tree insertion. 446 */ 447 void 448 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 449 { 450 struct vm_map_entry *res; 451 452 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF)) 453 panic("uvm_mapent_addr_insert: entry still in addr list"); 454 KDASSERT(entry->start <= entry->end); 455 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 456 (entry->end & (vaddr_t)PAGE_MASK) == 0); 457 458 TRACEPOINT(uvm, map_insert, 459 entry->start, entry->end, entry->protection, NULL); 460 461 UVM_MAP_REQ_WRITE(map); 462 res = RBT_INSERT(uvm_map_addr, &map->addr, entry); 463 if (res != NULL) { 464 panic("uvm_mapent_addr_insert: map %p entry %p " 465 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 466 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 467 map, entry, 468 entry->start, entry->end, entry->guard, entry->fspace, 469 res, res->start, res->end, res->guard, res->fspace); 470 } 471 } 472 473 /* 474 * Handle address tree removal. 475 */ 476 void 477 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 478 { 479 struct vm_map_entry *res; 480 481 TRACEPOINT(uvm, map_remove, 482 entry->start, entry->end, entry->protection, NULL); 483 484 UVM_MAP_REQ_WRITE(map); 485 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry); 486 if (res != entry) 487 panic("uvm_mapent_addr_remove"); 488 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF); 489 } 490 491 /* 492 * uvm_map_reference: add reference to a map 493 * 494 * => map need not be locked 495 */ 496 void 497 uvm_map_reference(struct vm_map *map) 498 { 499 atomic_inc_int(&map->ref_count); 500 } 501 502 /* 503 * Calculate the dused delta. 504 */ 505 vsize_t 506 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 507 { 508 struct vmspace *vm; 509 vsize_t sz; 510 vaddr_t lmax; 511 vaddr_t stack_begin, stack_end; /* Position of stack. */ 512 513 KASSERT(map->flags & VM_MAP_ISVMSPACE); 514 vm = (struct vmspace *)map; 515 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 516 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 517 518 sz = 0; 519 while (min != max) { 520 lmax = max; 521 if (min < stack_begin && lmax > stack_begin) 522 lmax = stack_begin; 523 else if (min < stack_end && lmax > stack_end) 524 lmax = stack_end; 525 526 if (min >= stack_begin && min < stack_end) { 527 /* nothing */ 528 } else 529 sz += lmax - min; 530 min = lmax; 531 } 532 533 return sz >> PAGE_SHIFT; 534 } 535 536 /* 537 * Find the entry describing the given address. 538 */ 539 struct vm_map_entry* 540 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 541 { 542 struct vm_map_entry *iter; 543 544 iter = RBT_ROOT(uvm_map_addr, atree); 545 while (iter != NULL) { 546 if (iter->start > addr) 547 iter = RBT_LEFT(uvm_map_addr, iter); 548 else if (VMMAP_FREE_END(iter) <= addr) 549 iter = RBT_RIGHT(uvm_map_addr, iter); 550 else 551 return iter; 552 } 553 return NULL; 554 } 555 556 /* 557 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 558 * 559 * Push dead entries into a linked list. 560 * Since the linked list abuses the address tree for storage, the entry 561 * may not be linked in a map. 562 * 563 * *head must be initialized to NULL before the first call to this macro. 564 * uvm_unmap_detach(*head, 0) will remove dead entries. 565 */ 566 static inline void 567 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 568 { 569 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 570 } 571 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 572 dead_entry_push((_headptr), (_entry)) 573 574 /* 575 * Helper function for uvm_map_findspace_tree. 576 * 577 * Given allocation constraints and pmap constraints, finds the 578 * lowest and highest address in a range that can be used for the 579 * allocation. 580 * 581 * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. 582 * 583 * 584 * Big chunk of math with a seasoning of dragons. 585 */ 586 int 587 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, 588 struct vm_map_entry *sel, vaddr_t align, 589 vaddr_t pmap_align, vaddr_t pmap_off, int bias) 590 { 591 vaddr_t sel_min, sel_max; 592 #ifdef PMAP_PREFER 593 vaddr_t pmap_min, pmap_max; 594 #endif /* PMAP_PREFER */ 595 #ifdef DIAGNOSTIC 596 int bad; 597 #endif /* DIAGNOSTIC */ 598 599 sel_min = VMMAP_FREE_START(sel); 600 sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); 601 602 #ifdef PMAP_PREFER 603 604 /* 605 * There are two special cases, in which we can satisfy the align 606 * requirement and the pmap_prefer requirement. 607 * - when pmap_off == 0, we always select the largest of the two 608 * - when pmap_off % align == 0 and pmap_align > align, we simply 609 * satisfy the pmap_align requirement and automatically 610 * satisfy the align requirement. 611 */ 612 if (align > PAGE_SIZE && 613 !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { 614 /* 615 * Simple case: only use align. 616 */ 617 sel_min = roundup(sel_min, align); 618 sel_max &= ~(align - 1); 619 620 if (sel_min > sel_max) 621 return ENOMEM; 622 623 /* Correct for bias. */ 624 if (sel_max - sel_min > FSPACE_BIASGAP) { 625 if (bias > 0) { 626 sel_min = sel_max - FSPACE_BIASGAP; 627 sel_min = roundup(sel_min, align); 628 } else if (bias < 0) { 629 sel_max = sel_min + FSPACE_BIASGAP; 630 sel_max &= ~(align - 1); 631 } 632 } 633 } else if (pmap_align != 0) { 634 /* 635 * Special case: satisfy both pmap_prefer and 636 * align argument. 637 */ 638 pmap_max = sel_max & ~(pmap_align - 1); 639 pmap_min = sel_min; 640 if (pmap_max < sel_min) 641 return ENOMEM; 642 643 /* Adjust pmap_min for BIASGAP for top-addr bias. */ 644 if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) 645 pmap_min = pmap_max - FSPACE_BIASGAP; 646 /* Align pmap_min. */ 647 pmap_min &= ~(pmap_align - 1); 648 if (pmap_min < sel_min) 649 pmap_min += pmap_align; 650 if (pmap_min > pmap_max) 651 return ENOMEM; 652 653 /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ 654 if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { 655 pmap_max = (pmap_min + FSPACE_BIASGAP) & 656 ~(pmap_align - 1); 657 } 658 if (pmap_min > pmap_max) 659 return ENOMEM; 660 661 /* Apply pmap prefer offset. */ 662 pmap_max |= pmap_off; 663 if (pmap_max > sel_max) 664 pmap_max -= pmap_align; 665 pmap_min |= pmap_off; 666 if (pmap_min < sel_min) 667 pmap_min += pmap_align; 668 669 /* 670 * Fixup: it's possible that pmap_min and pmap_max 671 * cross each other. In this case, try to find one 672 * address that is allowed. 673 * (This usually happens in biased case.) 674 */ 675 if (pmap_min > pmap_max) { 676 if (pmap_min < sel_max) 677 pmap_max = pmap_min; 678 else if (pmap_max > sel_min) 679 pmap_min = pmap_max; 680 else 681 return ENOMEM; 682 } 683 684 /* Internal validation. */ 685 KDASSERT(pmap_min <= pmap_max); 686 687 sel_min = pmap_min; 688 sel_max = pmap_max; 689 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 690 sel_min = sel_max - FSPACE_BIASGAP; 691 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 692 sel_max = sel_min + FSPACE_BIASGAP; 693 694 #else 695 696 if (align > PAGE_SIZE) { 697 sel_min = roundup(sel_min, align); 698 sel_max &= ~(align - 1); 699 if (sel_min > sel_max) 700 return ENOMEM; 701 702 if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { 703 if (bias > 0) { 704 sel_min = roundup(sel_max - FSPACE_BIASGAP, 705 align); 706 } else { 707 sel_max = (sel_min + FSPACE_BIASGAP) & 708 ~(align - 1); 709 } 710 } 711 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 712 sel_min = sel_max - FSPACE_BIASGAP; 713 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 714 sel_max = sel_min + FSPACE_BIASGAP; 715 716 #endif 717 718 if (sel_min > sel_max) 719 return ENOMEM; 720 721 #ifdef DIAGNOSTIC 722 bad = 0; 723 /* Lower boundary check. */ 724 if (sel_min < VMMAP_FREE_START(sel)) { 725 printf("sel_min: 0x%lx, but should be at least 0x%lx\n", 726 sel_min, VMMAP_FREE_START(sel)); 727 bad++; 728 } 729 /* Upper boundary check. */ 730 if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { 731 printf("sel_max: 0x%lx, but should be at most 0x%lx\n", 732 sel_max, 733 VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); 734 bad++; 735 } 736 /* Lower boundary alignment. */ 737 if (align != 0 && (sel_min & (align - 1)) != 0) { 738 printf("sel_min: 0x%lx, not aligned to 0x%lx\n", 739 sel_min, align); 740 bad++; 741 } 742 /* Upper boundary alignment. */ 743 if (align != 0 && (sel_max & (align - 1)) != 0) { 744 printf("sel_max: 0x%lx, not aligned to 0x%lx\n", 745 sel_max, align); 746 bad++; 747 } 748 /* Lower boundary PMAP_PREFER check. */ 749 if (pmap_align != 0 && align == 0 && 750 (sel_min & (pmap_align - 1)) != pmap_off) { 751 printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 752 sel_min, sel_min & (pmap_align - 1), pmap_off); 753 bad++; 754 } 755 /* Upper boundary PMAP_PREFER check. */ 756 if (pmap_align != 0 && align == 0 && 757 (sel_max & (pmap_align - 1)) != pmap_off) { 758 printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 759 sel_max, sel_max & (pmap_align - 1), pmap_off); 760 bad++; 761 } 762 763 if (bad) { 764 panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " 765 "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " 766 "bias = %d, " 767 "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", 768 sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, 769 bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel)); 770 } 771 #endif /* DIAGNOSTIC */ 772 773 *min = sel_min; 774 *max = sel_max; 775 return 0; 776 } 777 778 /* 779 * Test if memory starting at addr with sz bytes is free. 780 * 781 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 782 * the space. 783 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 784 */ 785 int 786 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 787 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 788 vaddr_t addr, vsize_t sz) 789 { 790 struct uvm_addr_state *free; 791 struct uvm_map_addr *atree; 792 struct vm_map_entry *i, *i_end; 793 794 if (addr + sz < addr) 795 return 0; 796 797 /* 798 * Kernel memory above uvm_maxkaddr is considered unavailable. 799 */ 800 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 801 if (addr + sz > uvm_maxkaddr) 802 return 0; 803 } 804 805 atree = &map->addr; 806 807 /* 808 * Fill in first, last, so they point at the entries containing the 809 * first and last address of the range. 810 * Note that if they are not NULL, we don't perform the lookup. 811 */ 812 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 813 if (*start_ptr == NULL) { 814 *start_ptr = uvm_map_entrybyaddr(atree, addr); 815 if (*start_ptr == NULL) 816 return 0; 817 } else 818 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 819 if (*end_ptr == NULL) { 820 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 821 *end_ptr = *start_ptr; 822 else { 823 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 824 if (*end_ptr == NULL) 825 return 0; 826 } 827 } else 828 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 829 830 /* Validation. */ 831 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 832 KDASSERT((*start_ptr)->start <= addr && 833 VMMAP_FREE_END(*start_ptr) > addr && 834 (*end_ptr)->start < addr + sz && 835 VMMAP_FREE_END(*end_ptr) >= addr + sz); 836 837 /* 838 * Check the none of the entries intersects with <addr, addr+sz>. 839 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 840 * considered unavailable unless called by those allocators. 841 */ 842 i = *start_ptr; 843 i_end = RBT_NEXT(uvm_map_addr, *end_ptr); 844 for (; i != i_end; 845 i = RBT_NEXT(uvm_map_addr, i)) { 846 if (i->start != i->end && i->end > addr) 847 return 0; 848 849 /* 850 * uaddr_exe and uaddr_brk_stack may only be used 851 * by these allocators and the NULL uaddr (i.e. no 852 * uaddr). 853 * Reject if this requirement is not met. 854 */ 855 if (uaddr != NULL) { 856 free = uvm_map_uaddr_e(map, i); 857 858 if (uaddr != free && free != NULL && 859 (free == map->uaddr_exe || 860 free == map->uaddr_brk_stack)) 861 return 0; 862 } 863 } 864 865 return -1; 866 } 867 868 /* 869 * Invoke each address selector until an address is found. 870 * Will not invoke uaddr_exe. 871 */ 872 int 873 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 874 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 875 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 876 { 877 struct uvm_addr_state *uaddr; 878 int i; 879 880 /* 881 * Allocation for sz bytes at any address, 882 * using the addr selectors in order. 883 */ 884 for (i = 0; i < nitems(map->uaddr_any); i++) { 885 uaddr = map->uaddr_any[i]; 886 887 if (uvm_addr_invoke(map, uaddr, first, last, 888 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 889 return 0; 890 } 891 892 /* Fall back to brk() and stack() address selectors. */ 893 uaddr = map->uaddr_brk_stack; 894 if (uvm_addr_invoke(map, uaddr, first, last, 895 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 896 return 0; 897 898 return ENOMEM; 899 } 900 901 /* Calculate entry augmentation value. */ 902 vsize_t 903 uvm_map_addr_augment_get(struct vm_map_entry *entry) 904 { 905 vsize_t augment; 906 struct vm_map_entry *left, *right; 907 908 augment = entry->fspace; 909 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL) 910 augment = MAX(augment, left->fspace_augment); 911 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 912 augment = MAX(augment, right->fspace_augment); 913 return augment; 914 } 915 916 /* 917 * Update augmentation data in entry. 918 */ 919 void 920 uvm_map_addr_augment(struct vm_map_entry *entry) 921 { 922 vsize_t augment; 923 924 while (entry != NULL) { 925 /* Calculate value for augmentation. */ 926 augment = uvm_map_addr_augment_get(entry); 927 928 /* 929 * Descend update. 930 * Once we find an entry that already has the correct value, 931 * stop, since it means all its parents will use the correct 932 * value too. 933 */ 934 if (entry->fspace_augment == augment) 935 return; 936 entry->fspace_augment = augment; 937 entry = RBT_PARENT(uvm_map_addr, entry); 938 } 939 } 940 941 /* 942 * uvm_mapanon: establish a valid mapping in map for an anon 943 * 944 * => *addr and sz must be a multiple of PAGE_SIZE. 945 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 946 * => map must be unlocked. 947 * 948 * => align: align vaddr, must be a power-of-2. 949 * Align is only a hint and will be ignored if the alignment fails. 950 */ 951 int 952 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, 953 vsize_t align, unsigned int flags) 954 { 955 struct vm_map_entry *first, *last, *entry, *new; 956 struct uvm_map_deadq dead; 957 vm_prot_t prot; 958 vm_prot_t maxprot; 959 vm_inherit_t inherit; 960 int advice; 961 int error; 962 vaddr_t pmap_align, pmap_offset; 963 vaddr_t hint; 964 965 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE); 966 KASSERT(map != kernel_map); 967 KASSERT((map->flags & UVM_FLAG_HOLE) == 0); 968 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 969 splassert(IPL_NONE); 970 KASSERT((flags & UVM_FLAG_TRYLOCK) == 0); 971 972 /* 973 * We use pmap_align and pmap_offset as alignment and offset variables. 974 * 975 * Because the align parameter takes precedence over pmap prefer, 976 * the pmap_align will need to be set to align, with pmap_offset = 0, 977 * if pmap_prefer will not align. 978 */ 979 pmap_align = MAX(align, PAGE_SIZE); 980 pmap_offset = 0; 981 982 /* Decode parameters. */ 983 prot = UVM_PROTECTION(flags); 984 maxprot = UVM_MAXPROTECTION(flags); 985 advice = UVM_ADVICE(flags); 986 inherit = UVM_INHERIT(flags); 987 error = 0; 988 hint = trunc_page(*addr); 989 TAILQ_INIT(&dead); 990 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 991 KASSERT((align & (align - 1)) == 0); 992 993 /* Check protection. */ 994 if ((prot & maxprot) != prot) 995 return EACCES; 996 997 /* 998 * Before grabbing the lock, allocate a map entry for later 999 * use to ensure we don't wait for memory while holding the 1000 * vm_map_lock. 1001 */ 1002 new = uvm_mapent_alloc(map, flags); 1003 if (new == NULL) 1004 return ENOMEM; 1005 1006 vm_map_lock(map); 1007 first = last = NULL; 1008 if (flags & UVM_FLAG_FIXED) { 1009 /* 1010 * Fixed location. 1011 * 1012 * Note: we ignore align, pmap_prefer. 1013 * Fill in first, last and *addr. 1014 */ 1015 KASSERT((*addr & PAGE_MASK) == 0); 1016 1017 /* Check that the space is available. */ 1018 if (flags & UVM_FLAG_UNMAP) { 1019 if ((flags & UVM_FLAG_STACK) && 1020 !uvm_map_is_stack_remappable(map, *addr, sz)) { 1021 error = EINVAL; 1022 goto unlock; 1023 } 1024 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1025 } 1026 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1027 error = ENOMEM; 1028 goto unlock; 1029 } 1030 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1031 (align == 0 || (*addr & (align - 1)) == 0) && 1032 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1033 /* 1034 * Address used as hint. 1035 * 1036 * Note: we enforce the alignment restriction, 1037 * but ignore pmap_prefer. 1038 */ 1039 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1040 /* Run selection algorithm for executables. */ 1041 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1042 addr, sz, pmap_align, pmap_offset, prot, hint); 1043 1044 if (error != 0) 1045 goto unlock; 1046 } else { 1047 /* Update freelists from vmspace. */ 1048 uvm_map_vmspace_update(map, &dead, flags); 1049 1050 error = uvm_map_findspace(map, &first, &last, addr, sz, 1051 pmap_align, pmap_offset, prot, hint); 1052 1053 if (error != 0) 1054 goto unlock; 1055 } 1056 1057 /* Double-check if selected address doesn't cause overflow. */ 1058 if (*addr + sz < *addr) { 1059 error = ENOMEM; 1060 goto unlock; 1061 } 1062 1063 /* If we only want a query, return now. */ 1064 if (flags & UVM_FLAG_QUERY) { 1065 error = 0; 1066 goto unlock; 1067 } 1068 1069 /* 1070 * Create new entry. 1071 * first and last may be invalidated after this call. 1072 */ 1073 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1074 new); 1075 if (entry == NULL) { 1076 error = ENOMEM; 1077 goto unlock; 1078 } 1079 new = NULL; 1080 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1081 entry->object.uvm_obj = NULL; 1082 entry->offset = 0; 1083 entry->protection = prot; 1084 entry->max_protection = maxprot; 1085 entry->inheritance = inherit; 1086 entry->wired_count = 0; 1087 entry->advice = advice; 1088 if (prot & PROT_WRITE) 1089 map->wserial++; 1090 if (flags & UVM_FLAG_SYSCALL) { 1091 entry->etype |= UVM_ET_SYSCALL; 1092 map->wserial++; 1093 } 1094 if (flags & UVM_FLAG_STACK) { 1095 entry->etype |= UVM_ET_STACK; 1096 if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP)) 1097 map->sserial++; 1098 } 1099 if (flags & UVM_FLAG_COPYONW) { 1100 entry->etype |= UVM_ET_COPYONWRITE; 1101 if ((flags & UVM_FLAG_OVERLAY) == 0) 1102 entry->etype |= UVM_ET_NEEDSCOPY; 1103 } 1104 if (flags & UVM_FLAG_CONCEAL) 1105 entry->etype |= UVM_ET_CONCEAL; 1106 if (flags & UVM_FLAG_OVERLAY) { 1107 entry->aref.ar_pageoff = 0; 1108 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1109 } 1110 1111 /* Update map and process statistics. */ 1112 map->size += sz; 1113 if (prot != PROT_NONE) { 1114 ((struct vmspace *)map)->vm_dused += 1115 uvmspace_dused(map, *addr, *addr + sz); 1116 } 1117 1118 unlock: 1119 vm_map_unlock(map); 1120 1121 /* 1122 * Remove dead entries. 1123 * 1124 * Dead entries may be the result of merging. 1125 * uvm_map_mkentry may also create dead entries, when it attempts to 1126 * destroy free-space entries. 1127 */ 1128 uvm_unmap_detach(&dead, 0); 1129 1130 if (new) 1131 uvm_mapent_free(new); 1132 return error; 1133 } 1134 1135 /* 1136 * uvm_map: establish a valid mapping in map 1137 * 1138 * => *addr and sz must be a multiple of PAGE_SIZE. 1139 * => map must be unlocked. 1140 * => <uobj,uoffset> value meanings (4 cases): 1141 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1142 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1143 * [3] <uobj,uoffset> == normal mapping 1144 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1145 * 1146 * case [4] is for kernel mappings where we don't know the offset until 1147 * we've found a virtual address. note that kernel object offsets are 1148 * always relative to vm_map_min(kernel_map). 1149 * 1150 * => align: align vaddr, must be a power-of-2. 1151 * Align is only a hint and will be ignored if the alignment fails. 1152 */ 1153 int 1154 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 1155 struct uvm_object *uobj, voff_t uoffset, 1156 vsize_t align, unsigned int flags) 1157 { 1158 struct vm_map_entry *first, *last, *entry, *new; 1159 struct uvm_map_deadq dead; 1160 vm_prot_t prot; 1161 vm_prot_t maxprot; 1162 vm_inherit_t inherit; 1163 int advice; 1164 int error; 1165 vaddr_t pmap_align, pmap_offset; 1166 vaddr_t hint; 1167 1168 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1169 splassert(IPL_NONE); 1170 else 1171 splassert(IPL_VM); 1172 1173 /* 1174 * We use pmap_align and pmap_offset as alignment and offset variables. 1175 * 1176 * Because the align parameter takes precedence over pmap prefer, 1177 * the pmap_align will need to be set to align, with pmap_offset = 0, 1178 * if pmap_prefer will not align. 1179 */ 1180 if (uoffset == UVM_UNKNOWN_OFFSET) { 1181 pmap_align = MAX(align, PAGE_SIZE); 1182 pmap_offset = 0; 1183 } else { 1184 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 1185 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 1186 1187 if (align == 0 || 1188 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 1189 /* pmap_offset satisfies align, no change. */ 1190 } else { 1191 /* Align takes precedence over pmap prefer. */ 1192 pmap_align = align; 1193 pmap_offset = 0; 1194 } 1195 } 1196 1197 /* Decode parameters. */ 1198 prot = UVM_PROTECTION(flags); 1199 maxprot = UVM_MAXPROTECTION(flags); 1200 advice = UVM_ADVICE(flags); 1201 inherit = UVM_INHERIT(flags); 1202 error = 0; 1203 hint = trunc_page(*addr); 1204 TAILQ_INIT(&dead); 1205 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1206 KASSERT((align & (align - 1)) == 0); 1207 1208 /* Holes are incompatible with other types of mappings. */ 1209 if (flags & UVM_FLAG_HOLE) { 1210 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 1211 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 1212 } 1213 1214 /* Unset hint for kernel_map non-fixed allocations. */ 1215 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1216 hint = 0; 1217 1218 /* Check protection. */ 1219 if ((prot & maxprot) != prot) 1220 return EACCES; 1221 1222 if (map == kernel_map && 1223 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1224 panic("uvm_map: kernel map W^X violation requested"); 1225 1226 /* 1227 * Before grabbing the lock, allocate a map entry for later 1228 * use to ensure we don't wait for memory while holding the 1229 * vm_map_lock. 1230 */ 1231 new = uvm_mapent_alloc(map, flags); 1232 if (new == NULL) 1233 return ENOMEM; 1234 1235 if (flags & UVM_FLAG_TRYLOCK) { 1236 if (vm_map_lock_try(map) == FALSE) { 1237 error = EFAULT; 1238 goto out; 1239 } 1240 } else { 1241 vm_map_lock(map); 1242 } 1243 1244 first = last = NULL; 1245 if (flags & UVM_FLAG_FIXED) { 1246 /* 1247 * Fixed location. 1248 * 1249 * Note: we ignore align, pmap_prefer. 1250 * Fill in first, last and *addr. 1251 */ 1252 KASSERT((*addr & PAGE_MASK) == 0); 1253 1254 /* 1255 * Grow pmap to include allocated address. 1256 * If the growth fails, the allocation will fail too. 1257 */ 1258 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1259 uvm_maxkaddr < (*addr + sz)) { 1260 uvm_map_kmem_grow(map, &dead, 1261 *addr + sz - uvm_maxkaddr, flags); 1262 } 1263 1264 /* Check that the space is available. */ 1265 if (flags & UVM_FLAG_UNMAP) 1266 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1267 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1268 error = ENOMEM; 1269 goto unlock; 1270 } 1271 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1272 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1273 (align == 0 || (*addr & (align - 1)) == 0) && 1274 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1275 /* 1276 * Address used as hint. 1277 * 1278 * Note: we enforce the alignment restriction, 1279 * but ignore pmap_prefer. 1280 */ 1281 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1282 /* Run selection algorithm for executables. */ 1283 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1284 addr, sz, pmap_align, pmap_offset, prot, hint); 1285 1286 /* Grow kernel memory and try again. */ 1287 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1288 uvm_map_kmem_grow(map, &dead, sz, flags); 1289 1290 error = uvm_addr_invoke(map, map->uaddr_exe, 1291 &first, &last, addr, sz, 1292 pmap_align, pmap_offset, prot, hint); 1293 } 1294 1295 if (error != 0) 1296 goto unlock; 1297 } else { 1298 /* Update freelists from vmspace. */ 1299 if (map->flags & VM_MAP_ISVMSPACE) 1300 uvm_map_vmspace_update(map, &dead, flags); 1301 1302 error = uvm_map_findspace(map, &first, &last, addr, sz, 1303 pmap_align, pmap_offset, prot, hint); 1304 1305 /* Grow kernel memory and try again. */ 1306 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1307 uvm_map_kmem_grow(map, &dead, sz, flags); 1308 1309 error = uvm_map_findspace(map, &first, &last, addr, sz, 1310 pmap_align, pmap_offset, prot, hint); 1311 } 1312 1313 if (error != 0) 1314 goto unlock; 1315 } 1316 1317 /* Double-check if selected address doesn't cause overflow. */ 1318 if (*addr + sz < *addr) { 1319 error = ENOMEM; 1320 goto unlock; 1321 } 1322 1323 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1324 uvm_maxkaddr >= *addr + sz); 1325 1326 /* If we only want a query, return now. */ 1327 if (flags & UVM_FLAG_QUERY) { 1328 error = 0; 1329 goto unlock; 1330 } 1331 1332 if (uobj == NULL) 1333 uoffset = 0; 1334 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1335 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1336 uoffset = *addr - vm_map_min(kernel_map); 1337 } 1338 1339 /* 1340 * Create new entry. 1341 * first and last may be invalidated after this call. 1342 */ 1343 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1344 new); 1345 if (entry == NULL) { 1346 error = ENOMEM; 1347 goto unlock; 1348 } 1349 new = NULL; 1350 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1351 entry->object.uvm_obj = uobj; 1352 entry->offset = uoffset; 1353 entry->protection = prot; 1354 entry->max_protection = maxprot; 1355 entry->inheritance = inherit; 1356 entry->wired_count = 0; 1357 entry->advice = advice; 1358 if (prot & PROT_WRITE) 1359 map->wserial++; 1360 if (flags & UVM_FLAG_SYSCALL) { 1361 entry->etype |= UVM_ET_SYSCALL; 1362 map->wserial++; 1363 } 1364 if (flags & UVM_FLAG_STACK) { 1365 entry->etype |= UVM_ET_STACK; 1366 if (flags & UVM_FLAG_UNMAP) 1367 map->sserial++; 1368 } 1369 if (uobj) 1370 entry->etype |= UVM_ET_OBJ; 1371 else if (flags & UVM_FLAG_HOLE) 1372 entry->etype |= UVM_ET_HOLE; 1373 if (flags & UVM_FLAG_NOFAULT) 1374 entry->etype |= UVM_ET_NOFAULT; 1375 if (flags & UVM_FLAG_WC) 1376 entry->etype |= UVM_ET_WC; 1377 if (flags & UVM_FLAG_COPYONW) { 1378 entry->etype |= UVM_ET_COPYONWRITE; 1379 if ((flags & UVM_FLAG_OVERLAY) == 0) 1380 entry->etype |= UVM_ET_NEEDSCOPY; 1381 } 1382 if (flags & UVM_FLAG_CONCEAL) 1383 entry->etype |= UVM_ET_CONCEAL; 1384 if (flags & UVM_FLAG_OVERLAY) { 1385 entry->aref.ar_pageoff = 0; 1386 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1387 } 1388 1389 /* Update map and process statistics. */ 1390 if (!(flags & UVM_FLAG_HOLE)) { 1391 map->size += sz; 1392 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL && 1393 prot != PROT_NONE) { 1394 ((struct vmspace *)map)->vm_dused += 1395 uvmspace_dused(map, *addr, *addr + sz); 1396 } 1397 } 1398 1399 /* 1400 * Try to merge entry. 1401 * 1402 * Userland allocations are kept separated most of the time. 1403 * Forego the effort of merging what most of the time can't be merged 1404 * and only try the merge if it concerns a kernel entry. 1405 */ 1406 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1407 (map->flags & VM_MAP_ISVMSPACE) == 0) 1408 uvm_mapent_tryjoin(map, entry, &dead); 1409 1410 unlock: 1411 vm_map_unlock(map); 1412 1413 /* 1414 * Remove dead entries. 1415 * 1416 * Dead entries may be the result of merging. 1417 * uvm_map_mkentry may also create dead entries, when it attempts to 1418 * destroy free-space entries. 1419 */ 1420 if (map->flags & VM_MAP_INTRSAFE) 1421 uvm_unmap_detach_intrsafe(&dead); 1422 else 1423 uvm_unmap_detach(&dead, 0); 1424 out: 1425 if (new) 1426 uvm_mapent_free(new); 1427 return error; 1428 } 1429 1430 /* 1431 * True iff e1 and e2 can be joined together. 1432 */ 1433 int 1434 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1435 struct vm_map_entry *e2) 1436 { 1437 KDASSERT(e1 != NULL && e2 != NULL); 1438 1439 /* Must be the same entry type and not have free memory between. */ 1440 if (e1->etype != e2->etype || e1->end != e2->start) 1441 return 0; 1442 1443 /* Submaps are never joined. */ 1444 if (UVM_ET_ISSUBMAP(e1)) 1445 return 0; 1446 1447 /* Never merge wired memory. */ 1448 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1449 return 0; 1450 1451 /* Protection, inheritance and advice must be equal. */ 1452 if (e1->protection != e2->protection || 1453 e1->max_protection != e2->max_protection || 1454 e1->inheritance != e2->inheritance || 1455 e1->advice != e2->advice) 1456 return 0; 1457 1458 /* If uvm_object: object itself and offsets within object must match. */ 1459 if (UVM_ET_ISOBJ(e1)) { 1460 if (e1->object.uvm_obj != e2->object.uvm_obj) 1461 return 0; 1462 if (e1->offset + (e1->end - e1->start) != e2->offset) 1463 return 0; 1464 } 1465 1466 /* 1467 * Cannot join shared amaps. 1468 * Note: no need to lock amap to look at refs, since we don't care 1469 * about its exact value. 1470 * If it is 1 (i.e. we have the only reference) it will stay there. 1471 */ 1472 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1473 return 0; 1474 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1475 return 0; 1476 1477 /* Apparently, e1 and e2 match. */ 1478 return 1; 1479 } 1480 1481 /* 1482 * Join support function. 1483 * 1484 * Returns the merged entry on success. 1485 * Returns NULL if the merge failed. 1486 */ 1487 struct vm_map_entry* 1488 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1489 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1490 { 1491 struct uvm_addr_state *free; 1492 1493 /* 1494 * Merging is not supported for map entries that 1495 * contain an amap in e1. This should never happen 1496 * anyway, because only kernel entries are merged. 1497 * These do not contain amaps. 1498 * e2 contains no real information in its amap, 1499 * so it can be erased immediately. 1500 */ 1501 KASSERT(e1->aref.ar_amap == NULL); 1502 1503 /* 1504 * Don't drop obj reference: 1505 * uvm_unmap_detach will do this for us. 1506 */ 1507 free = uvm_map_uaddr_e(map, e1); 1508 uvm_mapent_free_remove(map, free, e1); 1509 1510 free = uvm_map_uaddr_e(map, e2); 1511 uvm_mapent_free_remove(map, free, e2); 1512 uvm_mapent_addr_remove(map, e2); 1513 e1->end = e2->end; 1514 e1->guard = e2->guard; 1515 e1->fspace = e2->fspace; 1516 uvm_mapent_free_insert(map, free, e1); 1517 1518 DEAD_ENTRY_PUSH(dead, e2); 1519 return e1; 1520 } 1521 1522 /* 1523 * Attempt forward and backward joining of entry. 1524 * 1525 * Returns entry after joins. 1526 * We are guaranteed that the amap of entry is either non-existent or 1527 * has never been used. 1528 */ 1529 struct vm_map_entry* 1530 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1531 struct uvm_map_deadq *dead) 1532 { 1533 struct vm_map_entry *other; 1534 struct vm_map_entry *merged; 1535 1536 /* Merge with previous entry. */ 1537 other = RBT_PREV(uvm_map_addr, entry); 1538 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1539 merged = uvm_mapent_merge(map, other, entry, dead); 1540 if (merged) 1541 entry = merged; 1542 } 1543 1544 /* 1545 * Merge with next entry. 1546 * 1547 * Because amap can only extend forward and the next entry 1548 * probably contains sensible info, only perform forward merging 1549 * in the absence of an amap. 1550 */ 1551 other = RBT_NEXT(uvm_map_addr, entry); 1552 if (other && entry->aref.ar_amap == NULL && 1553 other->aref.ar_amap == NULL && 1554 uvm_mapent_isjoinable(map, entry, other)) { 1555 merged = uvm_mapent_merge(map, entry, other, dead); 1556 if (merged) 1557 entry = merged; 1558 } 1559 1560 return entry; 1561 } 1562 1563 /* 1564 * Kill entries that are no longer in a map. 1565 */ 1566 void 1567 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1568 { 1569 struct vm_map_entry *entry, *tmp; 1570 int waitok = flags & UVM_PLA_WAITOK; 1571 1572 TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) { 1573 /* Skip entries for which we have to grab the kernel lock. */ 1574 if (entry->aref.ar_amap || UVM_ET_ISSUBMAP(entry) || 1575 UVM_ET_ISOBJ(entry)) 1576 continue; 1577 1578 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1579 uvm_mapent_free(entry); 1580 } 1581 1582 if (TAILQ_EMPTY(deadq)) 1583 return; 1584 1585 KERNEL_LOCK(); 1586 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1587 if (waitok) 1588 uvm_pause(); 1589 /* Drop reference to amap, if we've got one. */ 1590 if (entry->aref.ar_amap) 1591 amap_unref(entry->aref.ar_amap, 1592 entry->aref.ar_pageoff, 1593 atop(entry->end - entry->start), 1594 flags & AMAP_REFALL); 1595 1596 /* Drop reference to our backing object, if we've got one. */ 1597 if (UVM_ET_ISSUBMAP(entry)) { 1598 /* ... unlikely to happen, but play it safe */ 1599 uvm_map_deallocate(entry->object.sub_map); 1600 } else if (UVM_ET_ISOBJ(entry) && 1601 entry->object.uvm_obj->pgops->pgo_detach) { 1602 entry->object.uvm_obj->pgops->pgo_detach( 1603 entry->object.uvm_obj); 1604 } 1605 1606 /* Step to next. */ 1607 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1608 uvm_mapent_free(entry); 1609 } 1610 KERNEL_UNLOCK(); 1611 } 1612 1613 void 1614 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq) 1615 { 1616 struct vm_map_entry *entry; 1617 1618 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1619 KASSERT(entry->aref.ar_amap == NULL); 1620 KASSERT(!UVM_ET_ISSUBMAP(entry)); 1621 KASSERT(!UVM_ET_ISOBJ(entry)); 1622 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1623 uvm_mapent_free(entry); 1624 } 1625 } 1626 1627 /* 1628 * Create and insert new entry. 1629 * 1630 * Returned entry contains new addresses and is inserted properly in the tree. 1631 * first and last are (probably) no longer valid. 1632 */ 1633 struct vm_map_entry* 1634 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1635 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1636 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1637 { 1638 struct vm_map_entry *entry, *prev; 1639 struct uvm_addr_state *free; 1640 vaddr_t min, max; /* free space boundaries for new entry */ 1641 1642 KDASSERT(map != NULL); 1643 KDASSERT(first != NULL); 1644 KDASSERT(last != NULL); 1645 KDASSERT(dead != NULL); 1646 KDASSERT(sz > 0); 1647 KDASSERT(addr + sz > addr); 1648 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1649 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1650 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1651 uvm_tree_sanity(map, __FILE__, __LINE__); 1652 1653 min = addr + sz; 1654 max = VMMAP_FREE_END(last); 1655 1656 /* Initialize new entry. */ 1657 if (new == NULL) 1658 entry = uvm_mapent_alloc(map, flags); 1659 else 1660 entry = new; 1661 if (entry == NULL) 1662 return NULL; 1663 entry->offset = 0; 1664 entry->etype = 0; 1665 entry->wired_count = 0; 1666 entry->aref.ar_pageoff = 0; 1667 entry->aref.ar_amap = NULL; 1668 1669 entry->start = addr; 1670 entry->end = min; 1671 entry->guard = 0; 1672 entry->fspace = 0; 1673 1674 /* Reset free space in first. */ 1675 free = uvm_map_uaddr_e(map, first); 1676 uvm_mapent_free_remove(map, free, first); 1677 first->guard = 0; 1678 first->fspace = 0; 1679 1680 /* 1681 * Remove all entries that are fully replaced. 1682 * We are iterating using last in reverse order. 1683 */ 1684 for (; first != last; last = prev) { 1685 prev = RBT_PREV(uvm_map_addr, last); 1686 1687 KDASSERT(last->start == last->end); 1688 free = uvm_map_uaddr_e(map, last); 1689 uvm_mapent_free_remove(map, free, last); 1690 uvm_mapent_addr_remove(map, last); 1691 DEAD_ENTRY_PUSH(dead, last); 1692 } 1693 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1694 if (first->start == addr) { 1695 uvm_mapent_addr_remove(map, first); 1696 DEAD_ENTRY_PUSH(dead, first); 1697 } else { 1698 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1699 addr, flags); 1700 } 1701 1702 /* Finally, link in entry. */ 1703 uvm_mapent_addr_insert(map, entry); 1704 uvm_map_fix_space(map, entry, min, max, flags); 1705 1706 uvm_tree_sanity(map, __FILE__, __LINE__); 1707 return entry; 1708 } 1709 1710 1711 /* 1712 * uvm_mapent_alloc: allocate a map entry 1713 */ 1714 struct vm_map_entry * 1715 uvm_mapent_alloc(struct vm_map *map, int flags) 1716 { 1717 struct vm_map_entry *me, *ne; 1718 int pool_flags; 1719 int i; 1720 1721 pool_flags = PR_WAITOK; 1722 if (flags & UVM_FLAG_TRYLOCK) 1723 pool_flags = PR_NOWAIT; 1724 1725 if (map->flags & VM_MAP_INTRSAFE || cold) { 1726 mtx_enter(&uvm_kmapent_mtx); 1727 if (SLIST_EMPTY(&uvm.kentry_free)) { 1728 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1729 &kd_nowait); 1730 if (ne == NULL) 1731 panic("uvm_mapent_alloc: cannot allocate map " 1732 "entry"); 1733 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) { 1734 SLIST_INSERT_HEAD(&uvm.kentry_free, 1735 &ne[i], daddrs.addr_kentry); 1736 } 1737 if (ratecheck(&uvm_kmapent_last_warn_time, 1738 &uvm_kmapent_warn_rate)) 1739 printf("uvm_mapent_alloc: out of static " 1740 "map entries\n"); 1741 } 1742 me = SLIST_FIRST(&uvm.kentry_free); 1743 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry); 1744 uvmexp.kmapent++; 1745 mtx_leave(&uvm_kmapent_mtx); 1746 me->flags = UVM_MAP_STATIC; 1747 } else if (map == kernel_map) { 1748 splassert(IPL_NONE); 1749 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1750 if (me == NULL) 1751 goto out; 1752 me->flags = UVM_MAP_KMEM; 1753 } else { 1754 splassert(IPL_NONE); 1755 me = pool_get(&uvm_map_entry_pool, pool_flags); 1756 if (me == NULL) 1757 goto out; 1758 me->flags = 0; 1759 } 1760 1761 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF); 1762 out: 1763 return me; 1764 } 1765 1766 /* 1767 * uvm_mapent_free: free map entry 1768 * 1769 * => XXX: static pool for kernel map? 1770 */ 1771 void 1772 uvm_mapent_free(struct vm_map_entry *me) 1773 { 1774 if (me->flags & UVM_MAP_STATIC) { 1775 mtx_enter(&uvm_kmapent_mtx); 1776 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry); 1777 uvmexp.kmapent--; 1778 mtx_leave(&uvm_kmapent_mtx); 1779 } else if (me->flags & UVM_MAP_KMEM) { 1780 splassert(IPL_NONE); 1781 pool_put(&uvm_map_entry_kmem_pool, me); 1782 } else { 1783 splassert(IPL_NONE); 1784 pool_put(&uvm_map_entry_pool, me); 1785 } 1786 } 1787 1788 /* 1789 * uvm_map_lookup_entry: find map entry at or before an address. 1790 * 1791 * => map must at least be read-locked by caller 1792 * => entry is returned in "entry" 1793 * => return value is true if address is in the returned entry 1794 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1795 * returned for those mappings. 1796 */ 1797 boolean_t 1798 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1799 struct vm_map_entry **entry) 1800 { 1801 *entry = uvm_map_entrybyaddr(&map->addr, address); 1802 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1803 (*entry)->start <= address && (*entry)->end > address; 1804 } 1805 1806 /* 1807 * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet 1808 * grown -- then uvm_map_check_region_range() should not cache the entry 1809 * because growth won't be seen. 1810 */ 1811 int 1812 uvm_map_inentry_sp(vm_map_entry_t entry) 1813 { 1814 if ((entry->etype & UVM_ET_STACK) == 0) { 1815 if (entry->protection == PROT_NONE) 1816 return (-1); /* don't update range */ 1817 return (0); 1818 } 1819 return (1); 1820 } 1821 1822 /* 1823 * The system call must not come from a writeable entry, W^X is violated. 1824 * (Would be nice if we can spot aliasing, which is also kind of bad) 1825 * 1826 * The system call must come from an syscall-labeled entry (which are 1827 * the text regions of the main program, sigtramp, ld.so, or libc). 1828 */ 1829 int 1830 uvm_map_inentry_pc(vm_map_entry_t entry) 1831 { 1832 if (entry->protection & PROT_WRITE) 1833 return (0); /* not permitted */ 1834 if ((entry->etype & UVM_ET_SYSCALL) == 0) 1835 return (0); /* not permitted */ 1836 return (1); 1837 } 1838 1839 int 1840 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie) 1841 { 1842 return (serial != ie->ie_serial || ie->ie_start == 0 || 1843 addr < ie->ie_start || addr >= ie->ie_end); 1844 } 1845 1846 /* 1847 * Inside a vm_map find the reg address and verify it via function. 1848 * Remember low and high addresses of region if valid and return TRUE, 1849 * else return FALSE. 1850 */ 1851 boolean_t 1852 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1853 int (*fn)(vm_map_entry_t), u_long serial) 1854 { 1855 vm_map_t map = &p->p_vmspace->vm_map; 1856 vm_map_entry_t entry; 1857 int ret; 1858 1859 if (addr < map->min_offset || addr >= map->max_offset) 1860 return (FALSE); 1861 1862 /* lock map */ 1863 vm_map_lock_read(map); 1864 1865 /* lookup */ 1866 if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) { 1867 vm_map_unlock_read(map); 1868 return (FALSE); 1869 } 1870 1871 ret = (*fn)(entry); 1872 if (ret == 0) { 1873 vm_map_unlock_read(map); 1874 return (FALSE); 1875 } else if (ret == 1) { 1876 ie->ie_start = entry->start; 1877 ie->ie_end = entry->end; 1878 ie->ie_serial = serial; 1879 } else { 1880 /* do not update, re-check later */ 1881 } 1882 vm_map_unlock_read(map); 1883 return (TRUE); 1884 } 1885 1886 boolean_t 1887 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1888 const char *fmt, int (*fn)(vm_map_entry_t), u_long serial) 1889 { 1890 union sigval sv; 1891 boolean_t ok = TRUE; 1892 1893 if (uvm_map_inentry_recheck(serial, addr, ie)) { 1894 ok = uvm_map_inentry_fix(p, ie, addr, fn, serial); 1895 if (!ok) { 1896 KERNEL_LOCK(); 1897 printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid, 1898 addr, ie->ie_start, ie->ie_end); 1899 p->p_p->ps_acflag |= AMAP; 1900 sv.sival_ptr = (void *)PROC_PC(p); 1901 trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv); 1902 KERNEL_UNLOCK(); 1903 } 1904 } 1905 return (ok); 1906 } 1907 1908 /* 1909 * Check whether the given address range can be converted to a MAP_STACK 1910 * mapping. 1911 * 1912 * Must be called with map locked. 1913 */ 1914 boolean_t 1915 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz) 1916 { 1917 vaddr_t end = addr + sz; 1918 struct vm_map_entry *first, *iter, *prev = NULL; 1919 1920 if (!uvm_map_lookup_entry(map, addr, &first)) { 1921 printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n", 1922 addr, end, map); 1923 return FALSE; 1924 } 1925 1926 /* 1927 * Check that the address range exists and is contiguous. 1928 */ 1929 for (iter = first; iter != NULL && iter->start < end; 1930 prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) { 1931 /* 1932 * Make sure that we do not have holes in the range. 1933 */ 1934 #if 0 1935 if (prev != NULL) { 1936 printf("prev->start 0x%lx, prev->end 0x%lx, " 1937 "iter->start 0x%lx, iter->end 0x%lx\n", 1938 prev->start, prev->end, iter->start, iter->end); 1939 } 1940 #endif 1941 1942 if (prev != NULL && prev->end != iter->start) { 1943 printf("map stack 0x%lx-0x%lx of map %p failed: " 1944 "hole in range\n", addr, end, map); 1945 return FALSE; 1946 } 1947 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) { 1948 printf("map stack 0x%lx-0x%lx of map %p failed: " 1949 "hole in range\n", addr, end, map); 1950 return FALSE; 1951 } 1952 } 1953 1954 return TRUE; 1955 } 1956 1957 /* 1958 * Remap the middle-pages of an existing mapping as a stack range. 1959 * If there exists a previous contiguous mapping with the given range 1960 * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the 1961 * mapping is dropped, and a new anon mapping is created and marked as 1962 * a stack. 1963 * 1964 * Must be called with map unlocked. 1965 */ 1966 int 1967 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz) 1968 { 1969 vm_map_t map = &p->p_vmspace->vm_map; 1970 vaddr_t start, end; 1971 int error; 1972 int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, 1973 PROT_READ | PROT_WRITE | PROT_EXEC, 1974 MAP_INHERIT_COPY, MADV_NORMAL, 1975 UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP | 1976 UVM_FLAG_COPYONW); 1977 1978 start = round_page(addr); 1979 end = trunc_page(addr + sz); 1980 #ifdef MACHINE_STACK_GROWS_UP 1981 if (end == addr + sz) 1982 end -= PAGE_SIZE; 1983 #else 1984 if (start == addr) 1985 start += PAGE_SIZE; 1986 #endif 1987 1988 if (start < map->min_offset || end >= map->max_offset || end < start) 1989 return EINVAL; 1990 1991 error = uvm_mapanon(map, &start, end - start, 0, flags); 1992 if (error != 0) 1993 printf("map stack for pid %d failed\n", p->p_p->ps_pid); 1994 1995 return error; 1996 } 1997 1998 /* 1999 * uvm_map_pie: return a random load address for a PIE executable 2000 * properly aligned. 2001 */ 2002 #ifndef VM_PIE_MAX_ADDR 2003 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 2004 #endif 2005 2006 #ifndef VM_PIE_MIN_ADDR 2007 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 2008 #endif 2009 2010 #ifndef VM_PIE_MIN_ALIGN 2011 #define VM_PIE_MIN_ALIGN PAGE_SIZE 2012 #endif 2013 2014 vaddr_t 2015 uvm_map_pie(vaddr_t align) 2016 { 2017 vaddr_t addr, space, min; 2018 2019 align = MAX(align, VM_PIE_MIN_ALIGN); 2020 2021 /* round up to next alignment */ 2022 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 2023 2024 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 2025 return (align); 2026 2027 space = (VM_PIE_MAX_ADDR - min) / align; 2028 space = MIN(space, (u_int32_t)-1); 2029 2030 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 2031 addr += min; 2032 2033 return (addr); 2034 } 2035 2036 void 2037 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 2038 { 2039 struct uvm_map_deadq dead; 2040 2041 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 2042 (end & (vaddr_t)PAGE_MASK) == 0); 2043 TAILQ_INIT(&dead); 2044 vm_map_lock(map); 2045 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); 2046 vm_map_unlock(map); 2047 2048 if (map->flags & VM_MAP_INTRSAFE) 2049 uvm_unmap_detach_intrsafe(&dead); 2050 else 2051 uvm_unmap_detach(&dead, 0); 2052 } 2053 2054 /* 2055 * Mark entry as free. 2056 * 2057 * entry will be put on the dead list. 2058 * The free space will be merged into the previous or a new entry, 2059 * unless markfree is false. 2060 */ 2061 void 2062 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 2063 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 2064 boolean_t markfree) 2065 { 2066 struct uvm_addr_state *free; 2067 struct vm_map_entry *prev; 2068 vaddr_t addr; /* Start of freed range. */ 2069 vaddr_t end; /* End of freed range. */ 2070 2071 prev = *prev_ptr; 2072 if (prev == entry) 2073 *prev_ptr = prev = NULL; 2074 2075 if (prev == NULL || 2076 VMMAP_FREE_END(prev) != entry->start) 2077 prev = RBT_PREV(uvm_map_addr, entry); 2078 2079 /* Entry is describing only free memory and has nothing to drain into. */ 2080 if (prev == NULL && entry->start == entry->end && markfree) { 2081 *prev_ptr = entry; 2082 return; 2083 } 2084 2085 addr = entry->start; 2086 end = VMMAP_FREE_END(entry); 2087 free = uvm_map_uaddr_e(map, entry); 2088 uvm_mapent_free_remove(map, free, entry); 2089 uvm_mapent_addr_remove(map, entry); 2090 DEAD_ENTRY_PUSH(dead, entry); 2091 2092 if (markfree) { 2093 if (prev) { 2094 free = uvm_map_uaddr_e(map, prev); 2095 uvm_mapent_free_remove(map, free, prev); 2096 } 2097 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 2098 } 2099 } 2100 2101 /* 2102 * Unwire and release referenced amap and object from map entry. 2103 */ 2104 void 2105 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 2106 { 2107 /* Unwire removed map entry. */ 2108 if (VM_MAPENT_ISWIRED(entry)) { 2109 KERNEL_LOCK(); 2110 entry->wired_count = 0; 2111 uvm_fault_unwire_locked(map, entry->start, entry->end); 2112 KERNEL_UNLOCK(); 2113 } 2114 2115 /* Entry-type specific code. */ 2116 if (UVM_ET_ISHOLE(entry)) { 2117 /* Nothing to be done for holes. */ 2118 } else if (map->flags & VM_MAP_INTRSAFE) { 2119 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2120 uvm_km_pgremove_intrsafe(entry->start, entry->end); 2121 pmap_kremove(entry->start, entry->end - entry->start); 2122 } else if (UVM_ET_ISOBJ(entry) && 2123 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2124 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2125 /* 2126 * Note: kernel object mappings are currently used in 2127 * two ways: 2128 * [1] "normal" mappings of pages in the kernel object 2129 * [2] uvm_km_valloc'd allocations in which we 2130 * pmap_enter in some non-kernel-object page 2131 * (e.g. vmapbuf). 2132 * 2133 * for case [1], we need to remove the mapping from 2134 * the pmap and then remove the page from the kernel 2135 * object (because, once pages in a kernel object are 2136 * unmapped they are no longer needed, unlike, say, 2137 * a vnode where you might want the data to persist 2138 * until flushed out of a queue). 2139 * 2140 * for case [2], we need to remove the mapping from 2141 * the pmap. there shouldn't be any pages at the 2142 * specified offset in the kernel object [but it 2143 * doesn't hurt to call uvm_km_pgremove just to be 2144 * safe?] 2145 * 2146 * uvm_km_pgremove currently does the following: 2147 * for pages in the kernel object range: 2148 * - drops the swap slot 2149 * - uvm_pagefree the page 2150 * 2151 * note there is version of uvm_km_pgremove() that 2152 * is used for "intrsafe" objects. 2153 */ 2154 /* 2155 * remove mappings from pmap and drop the pages 2156 * from the object. offsets are always relative 2157 * to vm_map_min(kernel_map). 2158 */ 2159 pmap_remove(pmap_kernel(), entry->start, entry->end); 2160 uvm_km_pgremove(entry->object.uvm_obj, 2161 entry->start - vm_map_min(kernel_map), 2162 entry->end - vm_map_min(kernel_map)); 2163 2164 /* 2165 * null out kernel_object reference, we've just 2166 * dropped it 2167 */ 2168 entry->etype &= ~UVM_ET_OBJ; 2169 entry->object.uvm_obj = NULL; /* to be safe */ 2170 } else { 2171 /* remove mappings the standard way. */ 2172 pmap_remove(map->pmap, entry->start, entry->end); 2173 } 2174 } 2175 2176 /* 2177 * Remove all entries from start to end. 2178 * 2179 * If remove_holes, then remove ET_HOLE entries as well. 2180 * If markfree, entry will be properly marked free, otherwise, no replacement 2181 * entry will be put in the tree (corrupting the tree). 2182 */ 2183 void 2184 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2185 struct uvm_map_deadq *dead, boolean_t remove_holes, 2186 boolean_t markfree) 2187 { 2188 struct vm_map_entry *prev_hint, *next, *entry; 2189 2190 start = MAX(start, map->min_offset); 2191 end = MIN(end, map->max_offset); 2192 if (start >= end) 2193 return; 2194 2195 if ((map->flags & VM_MAP_INTRSAFE) == 0) 2196 splassert(IPL_NONE); 2197 else 2198 splassert(IPL_VM); 2199 2200 /* Find first affected entry. */ 2201 entry = uvm_map_entrybyaddr(&map->addr, start); 2202 KDASSERT(entry != NULL && entry->start <= start); 2203 if (entry->end <= start && markfree) 2204 entry = RBT_NEXT(uvm_map_addr, entry); 2205 else 2206 UVM_MAP_CLIP_START(map, entry, start); 2207 2208 /* 2209 * Iterate entries until we reach end address. 2210 * prev_hint hints where the freed space can be appended to. 2211 */ 2212 prev_hint = NULL; 2213 for (; entry != NULL && entry->start < end; entry = next) { 2214 KDASSERT(entry->start >= start); 2215 if (entry->end > end || !markfree) 2216 UVM_MAP_CLIP_END(map, entry, end); 2217 KDASSERT(entry->start >= start && entry->end <= end); 2218 next = RBT_NEXT(uvm_map_addr, entry); 2219 2220 /* Don't remove holes unless asked to do so. */ 2221 if (UVM_ET_ISHOLE(entry)) { 2222 if (!remove_holes) { 2223 prev_hint = entry; 2224 continue; 2225 } 2226 } 2227 2228 /* A stack has been removed.. */ 2229 if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE)) 2230 map->sserial++; 2231 2232 /* Kill entry. */ 2233 uvm_unmap_kill_entry(map, entry); 2234 2235 /* Update space usage. */ 2236 if ((map->flags & VM_MAP_ISVMSPACE) && 2237 entry->object.uvm_obj == NULL && 2238 entry->protection != PROT_NONE && 2239 !UVM_ET_ISHOLE(entry)) { 2240 ((struct vmspace *)map)->vm_dused -= 2241 uvmspace_dused(map, entry->start, entry->end); 2242 } 2243 if (!UVM_ET_ISHOLE(entry)) 2244 map->size -= entry->end - entry->start; 2245 2246 /* Actual removal of entry. */ 2247 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 2248 } 2249 2250 pmap_update(vm_map_pmap(map)); 2251 2252 #ifdef VMMAP_DEBUG 2253 if (markfree) { 2254 for (entry = uvm_map_entrybyaddr(&map->addr, start); 2255 entry != NULL && entry->start < end; 2256 entry = RBT_NEXT(uvm_map_addr, entry)) { 2257 KDASSERT(entry->end <= start || 2258 entry->start == entry->end || 2259 UVM_ET_ISHOLE(entry)); 2260 } 2261 } else { 2262 vaddr_t a; 2263 for (a = start; a < end; a += PAGE_SIZE) 2264 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 2265 } 2266 #endif 2267 } 2268 2269 /* 2270 * Mark all entries from first until end (exclusive) as pageable. 2271 * 2272 * Lock must be exclusive on entry and will not be touched. 2273 */ 2274 void 2275 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 2276 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 2277 { 2278 struct vm_map_entry *iter; 2279 2280 for (iter = first; iter != end; 2281 iter = RBT_NEXT(uvm_map_addr, iter)) { 2282 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2283 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2284 continue; 2285 2286 iter->wired_count = 0; 2287 uvm_fault_unwire_locked(map, iter->start, iter->end); 2288 } 2289 } 2290 2291 /* 2292 * Mark all entries from first until end (exclusive) as wired. 2293 * 2294 * Lockflags determines the lock state on return from this function. 2295 * Lock must be exclusive on entry. 2296 */ 2297 int 2298 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 2299 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 2300 int lockflags) 2301 { 2302 struct vm_map_entry *iter; 2303 #ifdef DIAGNOSTIC 2304 unsigned int timestamp_save; 2305 #endif 2306 int error; 2307 2308 /* 2309 * Wire pages in two passes: 2310 * 2311 * 1: holding the write lock, we create any anonymous maps that need 2312 * to be created. then we clip each map entry to the region to 2313 * be wired and increment its wiring count. 2314 * 2315 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2316 * in the pages for any newly wired area (wired_count == 1). 2317 * 2318 * downgrading to a read lock for uvm_fault_wire avoids a possible 2319 * deadlock with another thread that may have faulted on one of 2320 * the pages to be wired (it would mark the page busy, blocking 2321 * us, then in turn block on the map lock that we hold). 2322 * because we keep the read lock on the map, the copy-on-write 2323 * status of the entries we modify here cannot change. 2324 */ 2325 for (iter = first; iter != end; 2326 iter = RBT_NEXT(uvm_map_addr, iter)) { 2327 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2328 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2329 iter->protection == PROT_NONE) 2330 continue; 2331 2332 /* 2333 * Perform actions of vm_map_lookup that need the write lock. 2334 * - create an anonymous map for copy-on-write 2335 * - anonymous map for zero-fill 2336 * Skip submaps. 2337 */ 2338 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 2339 UVM_ET_ISNEEDSCOPY(iter) && 2340 ((iter->protection & PROT_WRITE) || 2341 iter->object.uvm_obj == NULL)) { 2342 amap_copy(map, iter, M_WAITOK, 2343 UVM_ET_ISSTACK(iter) ? FALSE : TRUE, 2344 iter->start, iter->end); 2345 } 2346 iter->wired_count++; 2347 } 2348 2349 /* 2350 * Pass 2. 2351 */ 2352 #ifdef DIAGNOSTIC 2353 timestamp_save = map->timestamp; 2354 #endif 2355 vm_map_busy(map); 2356 vm_map_downgrade(map); 2357 2358 error = 0; 2359 for (iter = first; error == 0 && iter != end; 2360 iter = RBT_NEXT(uvm_map_addr, iter)) { 2361 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2362 iter->protection == PROT_NONE) 2363 continue; 2364 2365 error = uvm_fault_wire(map, iter->start, iter->end, 2366 iter->protection); 2367 } 2368 2369 if (error) { 2370 /* 2371 * uvm_fault_wire failure 2372 * 2373 * Reacquire lock and undo our work. 2374 */ 2375 vm_map_upgrade(map); 2376 vm_map_unbusy(map); 2377 #ifdef DIAGNOSTIC 2378 if (timestamp_save != map->timestamp) 2379 panic("uvm_map_pageable_wire: stale map"); 2380 #endif 2381 2382 /* 2383 * first is no longer needed to restart loops. 2384 * Use it as iterator to unmap successful mappings. 2385 */ 2386 for (; first != iter; 2387 first = RBT_NEXT(uvm_map_addr, first)) { 2388 if (UVM_ET_ISHOLE(first) || 2389 first->start == first->end || 2390 first->protection == PROT_NONE) 2391 continue; 2392 2393 first->wired_count--; 2394 if (!VM_MAPENT_ISWIRED(first)) { 2395 uvm_fault_unwire_locked(map, 2396 iter->start, iter->end); 2397 } 2398 } 2399 2400 /* decrease counter in the rest of the entries */ 2401 for (; iter != end; 2402 iter = RBT_NEXT(uvm_map_addr, iter)) { 2403 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2404 iter->protection == PROT_NONE) 2405 continue; 2406 2407 iter->wired_count--; 2408 } 2409 2410 if ((lockflags & UVM_LK_EXIT) == 0) 2411 vm_map_unlock(map); 2412 return error; 2413 } 2414 2415 /* We are currently holding a read lock. */ 2416 if ((lockflags & UVM_LK_EXIT) == 0) { 2417 vm_map_unbusy(map); 2418 vm_map_unlock_read(map); 2419 } else { 2420 vm_map_upgrade(map); 2421 vm_map_unbusy(map); 2422 #ifdef DIAGNOSTIC 2423 if (timestamp_save != map->timestamp) 2424 panic("uvm_map_pageable_wire: stale map"); 2425 #endif 2426 } 2427 return 0; 2428 } 2429 2430 /* 2431 * uvm_map_pageable: set pageability of a range in a map. 2432 * 2433 * Flags: 2434 * UVM_LK_ENTER: map is already locked by caller 2435 * UVM_LK_EXIT: don't unlock map on exit 2436 * 2437 * The full range must be in use (entries may not have fspace != 0). 2438 * UVM_ET_HOLE counts as unmapped. 2439 */ 2440 int 2441 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2442 boolean_t new_pageable, int lockflags) 2443 { 2444 struct vm_map_entry *first, *last, *tmp; 2445 int error; 2446 2447 start = trunc_page(start); 2448 end = round_page(end); 2449 2450 if (start > end) 2451 return EINVAL; 2452 if (start == end) 2453 return 0; /* nothing to do */ 2454 if (start < map->min_offset) 2455 return EFAULT; /* why? see first XXX below */ 2456 if (end > map->max_offset) 2457 return EINVAL; /* why? see second XXX below */ 2458 2459 KASSERT(map->flags & VM_MAP_PAGEABLE); 2460 if ((lockflags & UVM_LK_ENTER) == 0) 2461 vm_map_lock(map); 2462 2463 /* 2464 * Find first entry. 2465 * 2466 * Initial test on start is different, because of the different 2467 * error returned. Rest is tested further down. 2468 */ 2469 first = uvm_map_entrybyaddr(&map->addr, start); 2470 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2471 /* 2472 * XXX if the first address is not mapped, it is EFAULT? 2473 */ 2474 error = EFAULT; 2475 goto out; 2476 } 2477 2478 /* Check that the range has no holes. */ 2479 for (last = first; last != NULL && last->start < end; 2480 last = RBT_NEXT(uvm_map_addr, last)) { 2481 if (UVM_ET_ISHOLE(last) || 2482 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2483 /* 2484 * XXX unmapped memory in range, why is it EINVAL 2485 * instead of EFAULT? 2486 */ 2487 error = EINVAL; 2488 goto out; 2489 } 2490 } 2491 2492 /* 2493 * Last ended at the first entry after the range. 2494 * Move back one step. 2495 * 2496 * Note that last may be NULL. 2497 */ 2498 if (last == NULL) { 2499 last = RBT_MAX(uvm_map_addr, &map->addr); 2500 if (last->end < end) { 2501 error = EINVAL; 2502 goto out; 2503 } 2504 } else { 2505 KASSERT(last != first); 2506 last = RBT_PREV(uvm_map_addr, last); 2507 } 2508 2509 /* Wire/unwire pages here. */ 2510 if (new_pageable) { 2511 /* 2512 * Mark pageable. 2513 * entries that are not wired are untouched. 2514 */ 2515 if (VM_MAPENT_ISWIRED(first)) 2516 UVM_MAP_CLIP_START(map, first, start); 2517 /* 2518 * Split last at end. 2519 * Make tmp be the first entry after what is to be touched. 2520 * If last is not wired, don't touch it. 2521 */ 2522 if (VM_MAPENT_ISWIRED(last)) { 2523 UVM_MAP_CLIP_END(map, last, end); 2524 tmp = RBT_NEXT(uvm_map_addr, last); 2525 } else 2526 tmp = last; 2527 2528 uvm_map_pageable_pgon(map, first, tmp, start, end); 2529 error = 0; 2530 2531 out: 2532 if ((lockflags & UVM_LK_EXIT) == 0) 2533 vm_map_unlock(map); 2534 return error; 2535 } else { 2536 /* 2537 * Mark entries wired. 2538 * entries are always touched (because recovery needs this). 2539 */ 2540 if (!VM_MAPENT_ISWIRED(first)) 2541 UVM_MAP_CLIP_START(map, first, start); 2542 /* 2543 * Split last at end. 2544 * Make tmp be the first entry after what is to be touched. 2545 * If last is not wired, don't touch it. 2546 */ 2547 if (!VM_MAPENT_ISWIRED(last)) { 2548 UVM_MAP_CLIP_END(map, last, end); 2549 tmp = RBT_NEXT(uvm_map_addr, last); 2550 } else 2551 tmp = last; 2552 2553 return uvm_map_pageable_wire(map, first, tmp, start, end, 2554 lockflags); 2555 } 2556 } 2557 2558 /* 2559 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2560 * all mapped regions. 2561 * 2562 * Map must not be locked. 2563 * If no flags are specified, all ragions are unwired. 2564 */ 2565 int 2566 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2567 { 2568 vsize_t size; 2569 struct vm_map_entry *iter; 2570 2571 KASSERT(map->flags & VM_MAP_PAGEABLE); 2572 vm_map_lock(map); 2573 2574 if (flags == 0) { 2575 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr), 2576 NULL, map->min_offset, map->max_offset); 2577 2578 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2579 vm_map_unlock(map); 2580 return 0; 2581 } 2582 2583 if (flags & MCL_FUTURE) 2584 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2585 if (!(flags & MCL_CURRENT)) { 2586 vm_map_unlock(map); 2587 return 0; 2588 } 2589 2590 /* 2591 * Count number of pages in all non-wired entries. 2592 * If the number exceeds the limit, abort. 2593 */ 2594 size = 0; 2595 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2596 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2597 continue; 2598 2599 size += iter->end - iter->start; 2600 } 2601 2602 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2603 vm_map_unlock(map); 2604 return ENOMEM; 2605 } 2606 2607 /* XXX non-pmap_wired_count case must be handled by caller */ 2608 #ifdef pmap_wired_count 2609 if (limit != 0 && 2610 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2611 vm_map_unlock(map); 2612 return ENOMEM; 2613 } 2614 #endif 2615 2616 /* 2617 * uvm_map_pageable_wire will release lock 2618 */ 2619 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr), 2620 NULL, map->min_offset, map->max_offset, 0); 2621 } 2622 2623 /* 2624 * Initialize map. 2625 * 2626 * Allocates sufficient entries to describe the free memory in the map. 2627 */ 2628 void 2629 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max, 2630 int flags) 2631 { 2632 int i; 2633 2634 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2635 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2636 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2637 2638 /* 2639 * Update parameters. 2640 * 2641 * This code handles (vaddr_t)-1 and other page mask ending addresses 2642 * properly. 2643 * We lose the top page if the full virtual address space is used. 2644 */ 2645 if (max & (vaddr_t)PAGE_MASK) { 2646 max += 1; 2647 if (max == 0) /* overflow */ 2648 max -= PAGE_SIZE; 2649 } 2650 2651 RBT_INIT(uvm_map_addr, &map->addr); 2652 map->uaddr_exe = NULL; 2653 for (i = 0; i < nitems(map->uaddr_any); ++i) 2654 map->uaddr_any[i] = NULL; 2655 map->uaddr_brk_stack = NULL; 2656 2657 map->pmap = pmap; 2658 map->size = 0; 2659 map->ref_count = 0; 2660 map->min_offset = min; 2661 map->max_offset = max; 2662 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2663 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2664 map->flags = flags; 2665 map->timestamp = 0; 2666 if (flags & VM_MAP_ISVMSPACE) 2667 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK); 2668 else 2669 rw_init(&map->lock, "kmmaplk"); 2670 mtx_init(&map->mtx, IPL_VM); 2671 mtx_init(&map->flags_lock, IPL_VM); 2672 2673 /* Configure the allocators. */ 2674 if (flags & VM_MAP_ISVMSPACE) 2675 uvm_map_setup_md(map); 2676 else 2677 map->uaddr_any[3] = &uaddr_kbootstrap; 2678 2679 /* 2680 * Fill map entries. 2681 * We do not need to write-lock the map here because only the current 2682 * thread sees it right now. Initialize ref_count to 0 above to avoid 2683 * bogus triggering of lock-not-held assertions. 2684 */ 2685 uvm_map_setup_entries(map); 2686 uvm_tree_sanity(map, __FILE__, __LINE__); 2687 map->ref_count = 1; 2688 } 2689 2690 /* 2691 * Destroy the map. 2692 * 2693 * This is the inverse operation to uvm_map_setup. 2694 */ 2695 void 2696 uvm_map_teardown(struct vm_map *map) 2697 { 2698 struct uvm_map_deadq dead_entries; 2699 struct vm_map_entry *entry, *tmp; 2700 #ifdef VMMAP_DEBUG 2701 size_t numq, numt; 2702 #endif 2703 int i; 2704 2705 KERNEL_ASSERT_LOCKED(); 2706 KERNEL_UNLOCK(); 2707 KERNEL_ASSERT_UNLOCKED(); 2708 2709 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2710 2711 /* Remove address selectors. */ 2712 uvm_addr_destroy(map->uaddr_exe); 2713 map->uaddr_exe = NULL; 2714 for (i = 0; i < nitems(map->uaddr_any); i++) { 2715 uvm_addr_destroy(map->uaddr_any[i]); 2716 map->uaddr_any[i] = NULL; 2717 } 2718 uvm_addr_destroy(map->uaddr_brk_stack); 2719 map->uaddr_brk_stack = NULL; 2720 2721 /* 2722 * Remove entries. 2723 * 2724 * The following is based on graph breadth-first search. 2725 * 2726 * In color terms: 2727 * - the dead_entries set contains all nodes that are reachable 2728 * (i.e. both the black and the grey nodes) 2729 * - any entry not in dead_entries is white 2730 * - any entry that appears in dead_entries before entry, 2731 * is black, the rest is grey. 2732 * The set [entry, end] is also referred to as the wavefront. 2733 * 2734 * Since the tree is always a fully connected graph, the breadth-first 2735 * search guarantees that each vmmap_entry is visited exactly once. 2736 * The vm_map is broken down in linear time. 2737 */ 2738 TAILQ_INIT(&dead_entries); 2739 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL) 2740 DEAD_ENTRY_PUSH(&dead_entries, entry); 2741 while (entry != NULL) { 2742 sched_pause(yield); 2743 uvm_unmap_kill_entry(map, entry); 2744 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL) 2745 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2746 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 2747 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2748 /* Update wave-front. */ 2749 entry = TAILQ_NEXT(entry, dfree.deadq); 2750 } 2751 2752 #ifdef VMMAP_DEBUG 2753 numt = numq = 0; 2754 RBT_FOREACH(entry, uvm_map_addr, &map->addr) 2755 numt++; 2756 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2757 numq++; 2758 KASSERT(numt == numq); 2759 #endif 2760 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); 2761 2762 KERNEL_LOCK(); 2763 2764 pmap_destroy(map->pmap); 2765 map->pmap = NULL; 2766 } 2767 2768 /* 2769 * Populate map with free-memory entries. 2770 * 2771 * Map must be initialized and empty. 2772 */ 2773 void 2774 uvm_map_setup_entries(struct vm_map *map) 2775 { 2776 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 2777 2778 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2779 } 2780 2781 /* 2782 * Split entry at given address. 2783 * 2784 * orig: entry that is to be split. 2785 * next: a newly allocated map entry that is not linked. 2786 * split: address at which the split is done. 2787 */ 2788 void 2789 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2790 struct vm_map_entry *next, vaddr_t split) 2791 { 2792 struct uvm_addr_state *free, *free_before; 2793 vsize_t adj; 2794 2795 if ((split & PAGE_MASK) != 0) { 2796 panic("uvm_map_splitentry: split address 0x%lx " 2797 "not on page boundary!", split); 2798 } 2799 KDASSERT(map != NULL && orig != NULL && next != NULL); 2800 uvm_tree_sanity(map, __FILE__, __LINE__); 2801 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2802 2803 #ifdef VMMAP_DEBUG 2804 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig); 2805 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next); 2806 #endif /* VMMAP_DEBUG */ 2807 2808 /* 2809 * Free space will change, unlink from free space tree. 2810 */ 2811 free = uvm_map_uaddr_e(map, orig); 2812 uvm_mapent_free_remove(map, free, orig); 2813 2814 adj = split - orig->start; 2815 2816 uvm_mapent_copy(orig, next); 2817 if (split >= orig->end) { 2818 next->etype = 0; 2819 next->offset = 0; 2820 next->wired_count = 0; 2821 next->start = next->end = split; 2822 next->guard = 0; 2823 next->fspace = VMMAP_FREE_END(orig) - split; 2824 next->aref.ar_amap = NULL; 2825 next->aref.ar_pageoff = 0; 2826 orig->guard = MIN(orig->guard, split - orig->end); 2827 orig->fspace = split - VMMAP_FREE_START(orig); 2828 } else { 2829 orig->fspace = 0; 2830 orig->guard = 0; 2831 orig->end = next->start = split; 2832 2833 if (next->aref.ar_amap) { 2834 amap_splitref(&orig->aref, &next->aref, adj); 2835 } 2836 if (UVM_ET_ISSUBMAP(orig)) { 2837 uvm_map_reference(next->object.sub_map); 2838 next->offset += adj; 2839 } else if (UVM_ET_ISOBJ(orig)) { 2840 if (next->object.uvm_obj->pgops && 2841 next->object.uvm_obj->pgops->pgo_reference) { 2842 KERNEL_LOCK(); 2843 next->object.uvm_obj->pgops->pgo_reference( 2844 next->object.uvm_obj); 2845 KERNEL_UNLOCK(); 2846 } 2847 next->offset += adj; 2848 } 2849 } 2850 2851 /* 2852 * Link next into address tree. 2853 * Link orig and next into free-space tree. 2854 * 2855 * Don't insert 'next' into the addr tree until orig has been linked, 2856 * in case the free-list looks at adjecent entries in the addr tree 2857 * for its decisions. 2858 */ 2859 if (orig->fspace > 0) 2860 free_before = free; 2861 else 2862 free_before = uvm_map_uaddr_e(map, orig); 2863 uvm_mapent_free_insert(map, free_before, orig); 2864 uvm_mapent_addr_insert(map, next); 2865 uvm_mapent_free_insert(map, free, next); 2866 2867 uvm_tree_sanity(map, __FILE__, __LINE__); 2868 } 2869 2870 2871 #ifdef VMMAP_DEBUG 2872 2873 void 2874 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2875 char *file, int line) 2876 { 2877 char* map_special; 2878 2879 if (test) 2880 return; 2881 2882 if (map == kernel_map) 2883 map_special = " (kernel_map)"; 2884 else if (map == kmem_map) 2885 map_special = " (kmem_map)"; 2886 else 2887 map_special = ""; 2888 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2889 line, test_str); 2890 } 2891 2892 /* 2893 * Check that map is sane. 2894 */ 2895 void 2896 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2897 { 2898 struct vm_map_entry *iter; 2899 vaddr_t addr; 2900 vaddr_t min, max, bound; /* Bounds checker. */ 2901 struct uvm_addr_state *free; 2902 2903 addr = vm_map_min(map); 2904 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2905 /* 2906 * Valid start, end. 2907 * Catch overflow for end+fspace. 2908 */ 2909 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2910 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2911 2912 /* May not be empty. */ 2913 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2914 file, line); 2915 2916 /* Addresses for entry must lie within map boundaries. */ 2917 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2918 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2919 2920 /* Tree may not have gaps. */ 2921 UVM_ASSERT(map, iter->start == addr, file, line); 2922 addr = VMMAP_FREE_END(iter); 2923 2924 /* 2925 * Free space may not cross boundaries, unless the same 2926 * free list is used on both sides of the border. 2927 */ 2928 min = VMMAP_FREE_START(iter); 2929 max = VMMAP_FREE_END(iter); 2930 2931 while (min < max && 2932 (bound = uvm_map_boundary(map, min, max)) != max) { 2933 UVM_ASSERT(map, 2934 uvm_map_uaddr(map, bound - 1) == 2935 uvm_map_uaddr(map, bound), 2936 file, line); 2937 min = bound; 2938 } 2939 2940 free = uvm_map_uaddr_e(map, iter); 2941 if (free) { 2942 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2943 file, line); 2944 } else { 2945 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2946 file, line); 2947 } 2948 } 2949 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2950 } 2951 2952 void 2953 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2954 { 2955 struct vm_map_entry *iter; 2956 vsize_t size; 2957 2958 size = 0; 2959 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2960 if (!UVM_ET_ISHOLE(iter)) 2961 size += iter->end - iter->start; 2962 } 2963 2964 if (map->size != size) 2965 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2966 UVM_ASSERT(map, map->size == size, file, line); 2967 2968 vmspace_validate(map); 2969 } 2970 2971 /* 2972 * This function validates the statistics on vmspace. 2973 */ 2974 void 2975 vmspace_validate(struct vm_map *map) 2976 { 2977 struct vmspace *vm; 2978 struct vm_map_entry *iter; 2979 vaddr_t imin, imax; 2980 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2981 vsize_t stack, heap; /* Measured sizes. */ 2982 2983 if (!(map->flags & VM_MAP_ISVMSPACE)) 2984 return; 2985 2986 vm = (struct vmspace *)map; 2987 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2988 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2989 2990 stack = heap = 0; 2991 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2992 imin = imax = iter->start; 2993 2994 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL || 2995 iter->prot != PROT_NONE) 2996 continue; 2997 2998 /* 2999 * Update stack, heap. 3000 * Keep in mind that (theoretically) the entries of 3001 * userspace and stack may be joined. 3002 */ 3003 while (imin != iter->end) { 3004 /* 3005 * Set imax to the first boundary crossed between 3006 * imin and stack addresses. 3007 */ 3008 imax = iter->end; 3009 if (imin < stack_begin && imax > stack_begin) 3010 imax = stack_begin; 3011 else if (imin < stack_end && imax > stack_end) 3012 imax = stack_end; 3013 3014 if (imin >= stack_begin && imin < stack_end) 3015 stack += imax - imin; 3016 else 3017 heap += imax - imin; 3018 imin = imax; 3019 } 3020 } 3021 3022 heap >>= PAGE_SHIFT; 3023 if (heap != vm->vm_dused) { 3024 printf("vmspace stack range: 0x%lx-0x%lx\n", 3025 stack_begin, stack_end); 3026 panic("vmspace_validate: vmspace.vm_dused invalid, " 3027 "expected %ld pgs, got %ld pgs in map %p", 3028 heap, vm->vm_dused, 3029 map); 3030 } 3031 } 3032 3033 #endif /* VMMAP_DEBUG */ 3034 3035 /* 3036 * uvm_map_init: init mapping system at boot time. note that we allocate 3037 * and init the static pool of structs vm_map_entry for the kernel here. 3038 */ 3039 void 3040 uvm_map_init(void) 3041 { 3042 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 3043 int lcv; 3044 3045 /* now set up static pool of kernel map entries ... */ 3046 mtx_init(&uvm_kmapent_mtx, IPL_VM); 3047 SLIST_INIT(&uvm.kentry_free); 3048 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 3049 SLIST_INSERT_HEAD(&uvm.kentry_free, 3050 &kernel_map_entry[lcv], daddrs.addr_kentry); 3051 } 3052 3053 /* initialize the map-related pools. */ 3054 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0, 3055 IPL_NONE, PR_WAITOK, "vmsppl", NULL); 3056 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 3057 IPL_VM, PR_WAITOK, "vmmpepl", NULL); 3058 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0, 3059 IPL_VM, 0, "vmmpekpl", NULL); 3060 pool_sethiwat(&uvm_map_entry_pool, 8192); 3061 3062 uvm_addr_init(); 3063 } 3064 3065 #if defined(DDB) 3066 3067 /* 3068 * DDB hooks 3069 */ 3070 3071 /* 3072 * uvm_map_printit: actually prints the map 3073 */ 3074 void 3075 uvm_map_printit(struct vm_map *map, boolean_t full, 3076 int (*pr)(const char *, ...)) 3077 { 3078 struct vmspace *vm; 3079 struct vm_map_entry *entry; 3080 struct uvm_addr_state *free; 3081 int in_free, i; 3082 char buf[8]; 3083 3084 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 3085 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 3086 map->b_start, map->b_end); 3087 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 3088 map->s_start, map->s_end); 3089 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 3090 map->size, map->ref_count, map->timestamp, 3091 map->flags); 3092 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 3093 pmap_resident_count(map->pmap)); 3094 3095 /* struct vmspace handling. */ 3096 if (map->flags & VM_MAP_ISVMSPACE) { 3097 vm = (struct vmspace *)map; 3098 3099 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 3100 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 3101 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 3102 vm->vm_tsize, vm->vm_dsize); 3103 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 3104 vm->vm_taddr, vm->vm_daddr); 3105 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 3106 vm->vm_maxsaddr, vm->vm_minsaddr); 3107 } 3108 3109 if (!full) 3110 goto print_uaddr; 3111 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 3112 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 3113 entry, entry->start, entry->end, entry->object.uvm_obj, 3114 (long long)entry->offset, entry->aref.ar_amap, 3115 entry->aref.ar_pageoff); 3116 (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, " 3117 "syscall=%c, prot(max)=%d/%d, inh=%d, " 3118 "wc=%d, adv=%d\n", 3119 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 3120 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 3121 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 3122 (entry->etype & UVM_ET_STACK) ? 'T' : 'F', 3123 (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F', 3124 entry->protection, entry->max_protection, 3125 entry->inheritance, entry->wired_count, entry->advice); 3126 3127 free = uvm_map_uaddr_e(map, entry); 3128 in_free = (free != NULL); 3129 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 3130 "free=0x%lx-0x%lx\n", 3131 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 3132 in_free ? 'T' : 'F', 3133 entry->guard, 3134 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 3135 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 3136 (*pr)("\tfreemapped=%c, uaddr=%p\n", 3137 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 3138 if (free) { 3139 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 3140 free->uaddr_minaddr, free->uaddr_maxaddr, 3141 free->uaddr_functions->uaddr_name); 3142 } 3143 } 3144 3145 print_uaddr: 3146 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 3147 for (i = 0; i < nitems(map->uaddr_any); i++) { 3148 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 3149 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 3150 } 3151 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 3152 } 3153 3154 /* 3155 * uvm_object_printit: actually prints the object 3156 */ 3157 void 3158 uvm_object_printit(struct uvm_object *uobj, boolean_t full, 3159 int (*pr)(const char *, ...)) 3160 { 3161 struct vm_page *pg; 3162 int cnt = 0; 3163 3164 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 3165 uobj, uobj->pgops, uobj->uo_npages); 3166 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 3167 (*pr)("refs=<SYSTEM>\n"); 3168 else 3169 (*pr)("refs=%d\n", uobj->uo_refs); 3170 3171 if (!full) { 3172 return; 3173 } 3174 (*pr)(" PAGES <pg,offset>:\n "); 3175 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) { 3176 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 3177 if ((cnt % 3) == 2) { 3178 (*pr)("\n "); 3179 } 3180 cnt++; 3181 } 3182 if ((cnt % 3) != 2) { 3183 (*pr)("\n"); 3184 } 3185 } 3186 3187 /* 3188 * uvm_page_printit: actually print the page 3189 */ 3190 static const char page_flagbits[] = 3191 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 3192 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ" 3193 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5"; 3194 3195 void 3196 uvm_page_printit(struct vm_page *pg, boolean_t full, 3197 int (*pr)(const char *, ...)) 3198 { 3199 struct vm_page *tpg; 3200 struct uvm_object *uobj; 3201 struct pglist *pgl; 3202 3203 (*pr)("PAGE %p:\n", pg); 3204 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 3205 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 3206 (long long)pg->phys_addr); 3207 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 3208 pg->uobject, pg->uanon, (long long)pg->offset); 3209 #if defined(UVM_PAGE_TRKOWN) 3210 if (pg->pg_flags & PG_BUSY) 3211 (*pr)(" owning thread = %d, tag=%s", 3212 pg->owner, pg->owner_tag); 3213 else 3214 (*pr)(" page not busy, no owner"); 3215 #else 3216 (*pr)(" [page ownership tracking disabled]"); 3217 #endif 3218 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 3219 3220 if (!full) 3221 return; 3222 3223 /* cross-verify object/anon */ 3224 if ((pg->pg_flags & PQ_FREE) == 0) { 3225 if (pg->pg_flags & PQ_ANON) { 3226 if (pg->uanon == NULL || pg->uanon->an_page != pg) 3227 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 3228 (pg->uanon) ? pg->uanon->an_page : NULL); 3229 else 3230 (*pr)(" anon backpointer is OK\n"); 3231 } else { 3232 uobj = pg->uobject; 3233 if (uobj) { 3234 (*pr)(" checking object list\n"); 3235 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) { 3236 if (tpg == pg) { 3237 break; 3238 } 3239 } 3240 if (tpg) 3241 (*pr)(" page found on object list\n"); 3242 else 3243 (*pr)(" >>> PAGE NOT FOUND " 3244 "ON OBJECT LIST! <<<\n"); 3245 } 3246 } 3247 } 3248 3249 /* cross-verify page queue */ 3250 if (pg->pg_flags & PQ_FREE) { 3251 if (uvm_pmr_isfree(pg)) 3252 (*pr)(" page found in uvm_pmemrange\n"); 3253 else 3254 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 3255 pgl = NULL; 3256 } else if (pg->pg_flags & PQ_INACTIVE) { 3257 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 3258 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 3259 } else if (pg->pg_flags & PQ_ACTIVE) { 3260 pgl = &uvm.page_active; 3261 } else { 3262 pgl = NULL; 3263 } 3264 3265 if (pgl) { 3266 (*pr)(" checking pageq list\n"); 3267 TAILQ_FOREACH(tpg, pgl, pageq) { 3268 if (tpg == pg) { 3269 break; 3270 } 3271 } 3272 if (tpg) 3273 (*pr)(" page found on pageq list\n"); 3274 else 3275 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3276 } 3277 } 3278 #endif 3279 3280 /* 3281 * uvm_map_protect: change map protection 3282 * 3283 * => set_max means set max_protection. 3284 * => map must be unlocked. 3285 */ 3286 int 3287 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3288 vm_prot_t new_prot, boolean_t set_max) 3289 { 3290 struct vm_map_entry *first, *iter; 3291 vm_prot_t old_prot; 3292 vm_prot_t mask; 3293 vsize_t dused; 3294 int error; 3295 3296 if (start > end) 3297 return EINVAL; 3298 start = MAX(start, map->min_offset); 3299 end = MIN(end, map->max_offset); 3300 if (start >= end) 3301 return 0; 3302 3303 dused = 0; 3304 error = 0; 3305 vm_map_lock(map); 3306 3307 /* 3308 * Set up first and last. 3309 * - first will contain first entry at or after start. 3310 */ 3311 first = uvm_map_entrybyaddr(&map->addr, start); 3312 KDASSERT(first != NULL); 3313 if (first->end <= start) 3314 first = RBT_NEXT(uvm_map_addr, first); 3315 3316 /* First, check for protection violations. */ 3317 for (iter = first; iter != NULL && iter->start < end; 3318 iter = RBT_NEXT(uvm_map_addr, iter)) { 3319 /* Treat memory holes as free space. */ 3320 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3321 continue; 3322 3323 old_prot = iter->protection; 3324 if (old_prot == PROT_NONE && new_prot != old_prot) { 3325 dused += uvmspace_dused( 3326 map, MAX(start, iter->start), MIN(end, iter->end)); 3327 } 3328 3329 if (UVM_ET_ISSUBMAP(iter)) { 3330 error = EINVAL; 3331 goto out; 3332 } 3333 if ((new_prot & iter->max_protection) != new_prot) { 3334 error = EACCES; 3335 goto out; 3336 } 3337 if (map == kernel_map && 3338 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 3339 panic("uvm_map_protect: kernel map W^X violation requested"); 3340 } 3341 3342 /* Check limits. */ 3343 if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) { 3344 vsize_t limit = lim_cur(RLIMIT_DATA); 3345 dused = ptoa(dused); 3346 if (limit < dused || 3347 limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) { 3348 error = ENOMEM; 3349 goto out; 3350 } 3351 } 3352 3353 /* Fix protections. */ 3354 for (iter = first; iter != NULL && iter->start < end; 3355 iter = RBT_NEXT(uvm_map_addr, iter)) { 3356 /* Treat memory holes as free space. */ 3357 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3358 continue; 3359 3360 old_prot = iter->protection; 3361 3362 /* 3363 * Skip adapting protection iff old and new protection 3364 * are equal. 3365 */ 3366 if (set_max) { 3367 if (old_prot == (new_prot & old_prot) && 3368 iter->max_protection == new_prot) 3369 continue; 3370 } else { 3371 if (old_prot == new_prot) 3372 continue; 3373 } 3374 3375 UVM_MAP_CLIP_START(map, iter, start); 3376 UVM_MAP_CLIP_END(map, iter, end); 3377 3378 if (set_max) { 3379 iter->max_protection = new_prot; 3380 iter->protection &= new_prot; 3381 } else 3382 iter->protection = new_prot; 3383 3384 /* 3385 * update physical map if necessary. worry about copy-on-write 3386 * here -- CHECK THIS XXX 3387 */ 3388 if (iter->protection != old_prot) { 3389 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3390 ~PROT_WRITE : PROT_MASK; 3391 3392 /* XXX should only wserial++ if no split occurs */ 3393 if (iter->protection & PROT_WRITE) 3394 map->wserial++; 3395 3396 if (map->flags & VM_MAP_ISVMSPACE) { 3397 if (old_prot == PROT_NONE) { 3398 ((struct vmspace *)map)->vm_dused += 3399 uvmspace_dused(map, iter->start, 3400 iter->end); 3401 } 3402 if (iter->protection == PROT_NONE) { 3403 ((struct vmspace *)map)->vm_dused -= 3404 uvmspace_dused(map, iter->start, 3405 iter->end); 3406 } 3407 } 3408 3409 /* update pmap */ 3410 if ((iter->protection & mask) == PROT_NONE && 3411 VM_MAPENT_ISWIRED(iter)) { 3412 /* 3413 * TODO(ariane) this is stupid. wired_count 3414 * is 0 if not wired, otherwise anything 3415 * larger than 0 (incremented once each time 3416 * wire is called). 3417 * Mostly to be able to undo the damage on 3418 * failure. Not the actually be a wired 3419 * refcounter... 3420 * Originally: iter->wired_count--; 3421 * (don't we have to unwire this in the pmap 3422 * as well?) 3423 */ 3424 iter->wired_count = 0; 3425 } 3426 pmap_protect(map->pmap, iter->start, iter->end, 3427 iter->protection & mask); 3428 } 3429 3430 /* 3431 * If the map is configured to lock any future mappings, 3432 * wire this entry now if the old protection was PROT_NONE 3433 * and the new protection is not PROT_NONE. 3434 */ 3435 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3436 VM_MAPENT_ISWIRED(iter) == 0 && 3437 old_prot == PROT_NONE && 3438 new_prot != PROT_NONE) { 3439 if (uvm_map_pageable(map, iter->start, iter->end, 3440 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3441 /* 3442 * If locking the entry fails, remember the 3443 * error if it's the first one. Note we 3444 * still continue setting the protection in 3445 * the map, but it will return the resource 3446 * storage condition regardless. 3447 * 3448 * XXX Ignore what the actual error is, 3449 * XXX just call it a resource shortage 3450 * XXX so that it doesn't get confused 3451 * XXX what uvm_map_protect() itself would 3452 * XXX normally return. 3453 */ 3454 error = ENOMEM; 3455 } 3456 } 3457 } 3458 pmap_update(map->pmap); 3459 3460 out: 3461 vm_map_unlock(map); 3462 return error; 3463 } 3464 3465 /* 3466 * uvmspace_alloc: allocate a vmspace structure. 3467 * 3468 * - structure includes vm_map and pmap 3469 * - XXX: no locking on this structure 3470 * - refcnt set to 1, rest must be init'd by caller 3471 */ 3472 struct vmspace * 3473 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3474 boolean_t remove_holes) 3475 { 3476 struct vmspace *vm; 3477 3478 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3479 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3480 return (vm); 3481 } 3482 3483 /* 3484 * uvmspace_init: initialize a vmspace structure. 3485 * 3486 * - XXX: no locking on this structure 3487 * - refcnt set to 1, rest must be init'd by caller 3488 */ 3489 void 3490 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3491 boolean_t pageable, boolean_t remove_holes) 3492 { 3493 KASSERT(pmap == NULL || pmap == pmap_kernel()); 3494 3495 if (pmap) 3496 pmap_reference(pmap); 3497 else 3498 pmap = pmap_create(); 3499 3500 uvm_map_setup(&vm->vm_map, pmap, min, max, 3501 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3502 3503 vm->vm_refcnt = 1; 3504 3505 if (remove_holes) 3506 pmap_remove_holes(vm); 3507 } 3508 3509 /* 3510 * uvmspace_share: share a vmspace between two processes 3511 * 3512 * - used for vfork 3513 */ 3514 3515 struct vmspace * 3516 uvmspace_share(struct process *pr) 3517 { 3518 struct vmspace *vm = pr->ps_vmspace; 3519 3520 uvmspace_addref(vm); 3521 return vm; 3522 } 3523 3524 /* 3525 * uvmspace_exec: the process wants to exec a new program 3526 * 3527 * - XXX: no locking on vmspace 3528 */ 3529 3530 void 3531 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3532 { 3533 struct process *pr = p->p_p; 3534 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3535 struct vm_map *map = &ovm->vm_map; 3536 struct uvm_map_deadq dead_entries; 3537 3538 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3539 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3540 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3541 3542 pmap_unuse_final(p); /* before stack addresses go away */ 3543 TAILQ_INIT(&dead_entries); 3544 3545 /* see if more than one process is using this vmspace... */ 3546 if (ovm->vm_refcnt == 1) { 3547 /* 3548 * If pr is the only process using its vmspace then 3549 * we can safely recycle that vmspace for the program 3550 * that is being exec'd. 3551 */ 3552 3553 #ifdef SYSVSHM 3554 /* 3555 * SYSV SHM semantics require us to kill all segments on an exec 3556 */ 3557 if (ovm->vm_shm) 3558 shmexit(ovm); 3559 #endif 3560 3561 /* 3562 * POSIX 1003.1b -- "lock future mappings" is revoked 3563 * when a process execs another program image. 3564 */ 3565 vm_map_lock(map); 3566 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE); 3567 3568 /* 3569 * now unmap the old program 3570 * 3571 * Instead of attempting to keep the map valid, we simply 3572 * nuke all entries and ask uvm_map_setup to reinitialize 3573 * the map to the new boundaries. 3574 * 3575 * uvm_unmap_remove will actually nuke all entries for us 3576 * (as in, not replace them with free-memory entries). 3577 */ 3578 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3579 &dead_entries, TRUE, FALSE); 3580 3581 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 3582 3583 /* Nuke statistics and boundaries. */ 3584 memset(&ovm->vm_startcopy, 0, 3585 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3586 3587 3588 if (end & (vaddr_t)PAGE_MASK) { 3589 end += 1; 3590 if (end == 0) /* overflow */ 3591 end -= PAGE_SIZE; 3592 } 3593 3594 /* Setup new boundaries and populate map with entries. */ 3595 map->min_offset = start; 3596 map->max_offset = end; 3597 uvm_map_setup_entries(map); 3598 vm_map_unlock(map); 3599 3600 /* but keep MMU holes unavailable */ 3601 pmap_remove_holes(ovm); 3602 } else { 3603 /* 3604 * pr's vmspace is being shared, so we can't reuse 3605 * it for pr since it is still being used for others. 3606 * allocate a new vmspace for pr 3607 */ 3608 nvm = uvmspace_alloc(start, end, 3609 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3610 3611 /* install new vmspace and drop our ref to the old one. */ 3612 pmap_deactivate(p); 3613 p->p_vmspace = pr->ps_vmspace = nvm; 3614 pmap_activate(p); 3615 3616 uvmspace_free(ovm); 3617 } 3618 3619 /* Release dead entries */ 3620 uvm_unmap_detach(&dead_entries, 0); 3621 } 3622 3623 /* 3624 * uvmspace_addref: add a reference to a vmspace. 3625 */ 3626 void 3627 uvmspace_addref(struct vmspace *vm) 3628 { 3629 KERNEL_ASSERT_LOCKED(); 3630 KASSERT(vm->vm_refcnt > 0); 3631 3632 vm->vm_refcnt++; 3633 } 3634 3635 /* 3636 * uvmspace_free: free a vmspace data structure 3637 */ 3638 void 3639 uvmspace_free(struct vmspace *vm) 3640 { 3641 KERNEL_ASSERT_LOCKED(); 3642 3643 if (--vm->vm_refcnt == 0) { 3644 /* 3645 * lock the map, to wait out all other references to it. delete 3646 * all of the mappings and pages they hold, then call the pmap 3647 * module to reclaim anything left. 3648 */ 3649 #ifdef SYSVSHM 3650 /* Get rid of any SYSV shared memory segments. */ 3651 if (vm->vm_shm != NULL) 3652 shmexit(vm); 3653 #endif 3654 3655 uvm_map_teardown(&vm->vm_map); 3656 pool_put(&uvm_vmspace_pool, vm); 3657 } 3658 } 3659 3660 /* 3661 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in 3662 * srcmap to the address range [dstaddr, dstaddr + sz) in 3663 * dstmap. 3664 * 3665 * The whole address range in srcmap must be backed by an object 3666 * (no holes). 3667 * 3668 * If successful, the address ranges share memory and the destination 3669 * address range uses the protection flags in prot. 3670 * 3671 * This routine assumes that sz is a multiple of PAGE_SIZE and 3672 * that dstaddr and srcaddr are page-aligned. 3673 */ 3674 int 3675 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot, 3676 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz) 3677 { 3678 int ret = 0; 3679 vaddr_t unmap_end; 3680 vaddr_t dstva; 3681 vsize_t s_off, len, n = sz, remain; 3682 struct vm_map_entry *first = NULL, *last = NULL; 3683 struct vm_map_entry *src_entry, *psrc_entry = NULL; 3684 struct uvm_map_deadq dead; 3685 3686 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr) 3687 return EINVAL; 3688 3689 TAILQ_INIT(&dead); 3690 vm_map_lock(dstmap); 3691 vm_map_lock_read(srcmap); 3692 3693 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) { 3694 ret = ENOMEM; 3695 goto exit_unlock; 3696 } 3697 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) { 3698 ret = EINVAL; 3699 goto exit_unlock; 3700 } 3701 3702 dstva = dstaddr; 3703 unmap_end = dstaddr; 3704 for (; src_entry != NULL; 3705 psrc_entry = src_entry, 3706 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) { 3707 /* hole in address space, bail out */ 3708 if (psrc_entry != NULL && psrc_entry->end != src_entry->start) 3709 break; 3710 if (src_entry->start >= srcaddr + sz) 3711 break; 3712 3713 if (UVM_ET_ISSUBMAP(src_entry)) 3714 panic("uvm_share: encountered a submap (illegal)"); 3715 if (!UVM_ET_ISCOPYONWRITE(src_entry) && 3716 UVM_ET_ISNEEDSCOPY(src_entry)) 3717 panic("uvm_share: non-copy_on_write map entries " 3718 "marked needs_copy (illegal)"); 3719 3720 /* 3721 * srcaddr > map entry start? means we are in the middle of a 3722 * map, so we calculate the offset to use in the source map. 3723 */ 3724 if (srcaddr > src_entry->start) 3725 s_off = srcaddr - src_entry->start; 3726 else if (srcaddr == src_entry->start) 3727 s_off = 0; 3728 else 3729 panic("uvm_share: map entry start > srcaddr"); 3730 3731 remain = src_entry->end - src_entry->start - s_off; 3732 3733 /* Determine how many bytes to share in this pass */ 3734 if (n < remain) 3735 len = n; 3736 else 3737 len = remain; 3738 3739 if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot, 3740 srcmap, src_entry, &dead) == NULL) 3741 break; 3742 3743 n -= len; 3744 dstva += len; 3745 srcaddr += len; 3746 unmap_end = dstva + len; 3747 if (n == 0) 3748 goto exit_unlock; 3749 } 3750 3751 ret = EINVAL; 3752 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE); 3753 3754 exit_unlock: 3755 vm_map_unlock_read(srcmap); 3756 vm_map_unlock(dstmap); 3757 uvm_unmap_detach(&dead, 0); 3758 3759 return ret; 3760 } 3761 3762 /* 3763 * Clone map entry into other map. 3764 * 3765 * Mapping will be placed at dstaddr, for the same length. 3766 * Space must be available. 3767 * Reference counters are incremented. 3768 */ 3769 struct vm_map_entry * 3770 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3771 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, 3772 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3773 int mapent_flags, int amap_share_flags) 3774 { 3775 struct vm_map_entry *new_entry, *first, *last; 3776 3777 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3778 3779 /* Create new entry (linked in on creation). Fill in first, last. */ 3780 first = last = NULL; 3781 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3782 panic("uvm_mapent_clone: no space in map for " 3783 "entry in empty map"); 3784 } 3785 new_entry = uvm_map_mkentry(dstmap, first, last, 3786 dstaddr, dstlen, mapent_flags, dead, NULL); 3787 if (new_entry == NULL) 3788 return NULL; 3789 /* old_entry -> new_entry */ 3790 new_entry->object = old_entry->object; 3791 new_entry->offset = old_entry->offset; 3792 new_entry->aref = old_entry->aref; 3793 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3794 new_entry->protection = prot; 3795 new_entry->max_protection = maxprot; 3796 new_entry->inheritance = old_entry->inheritance; 3797 new_entry->advice = old_entry->advice; 3798 3799 /* gain reference to object backing the map (can't be a submap). */ 3800 if (new_entry->aref.ar_amap) { 3801 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3802 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3803 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3804 amap_share_flags); 3805 } 3806 3807 if (UVM_ET_ISOBJ(new_entry) && 3808 new_entry->object.uvm_obj->pgops->pgo_reference) { 3809 new_entry->offset += off; 3810 new_entry->object.uvm_obj->pgops->pgo_reference 3811 (new_entry->object.uvm_obj); 3812 } 3813 3814 return new_entry; 3815 } 3816 3817 struct vm_map_entry * 3818 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3819 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map, 3820 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3821 { 3822 /* 3823 * If old_entry refers to a copy-on-write region that has not yet been 3824 * written to (needs_copy flag is set), then we need to allocate a new 3825 * amap for old_entry. 3826 * 3827 * If we do not do this, and the process owning old_entry does a copy-on 3828 * write later, old_entry and new_entry will refer to different memory 3829 * regions, and the memory between the processes is no longer shared. 3830 * 3831 * [in other words, we need to clear needs_copy] 3832 */ 3833 3834 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3835 /* get our own amap, clears needs_copy */ 3836 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0); 3837 /* XXXCDC: WAITOK??? */ 3838 } 3839 3840 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off, 3841 prot, maxprot, old_entry, dead, 0, AMAP_SHARED); 3842 } 3843 3844 /* 3845 * share the mapping: this means we want the old and 3846 * new entries to share amaps and backing objects. 3847 */ 3848 struct vm_map_entry * 3849 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3850 struct vm_map *old_map, 3851 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3852 { 3853 struct vm_map_entry *new_entry; 3854 3855 new_entry = uvm_mapent_share(new_map, old_entry->start, 3856 old_entry->end - old_entry->start, 0, old_entry->protection, 3857 old_entry->max_protection, old_map, old_entry, dead); 3858 3859 /* 3860 * pmap_copy the mappings: this routine is optional 3861 * but if it is there it will reduce the number of 3862 * page faults in the new proc. 3863 */ 3864 if (!UVM_ET_ISHOLE(new_entry)) 3865 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, 3866 (new_entry->end - new_entry->start), new_entry->start); 3867 3868 return (new_entry); 3869 } 3870 3871 /* 3872 * copy-on-write the mapping (using mmap's 3873 * MAP_PRIVATE semantics) 3874 * 3875 * allocate new_entry, adjust reference counts. 3876 * (note that new references are read-only). 3877 */ 3878 struct vm_map_entry * 3879 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3880 struct vm_map *old_map, 3881 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3882 { 3883 struct vm_map_entry *new_entry; 3884 boolean_t protect_child; 3885 3886 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3887 old_entry->end - old_entry->start, 0, old_entry->protection, 3888 old_entry->max_protection, old_entry, dead, 0, 0); 3889 3890 new_entry->etype |= 3891 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3892 3893 /* 3894 * the new entry will need an amap. it will either 3895 * need to be copied from the old entry or created 3896 * from scratch (if the old entry does not have an 3897 * amap). can we defer this process until later 3898 * (by setting "needs_copy") or do we need to copy 3899 * the amap now? 3900 * 3901 * we must copy the amap now if any of the following 3902 * conditions hold: 3903 * 1. the old entry has an amap and that amap is 3904 * being shared. this means that the old (parent) 3905 * process is sharing the amap with another 3906 * process. if we do not clear needs_copy here 3907 * we will end up in a situation where both the 3908 * parent and child process are referring to the 3909 * same amap with "needs_copy" set. if the 3910 * parent write-faults, the fault routine will 3911 * clear "needs_copy" in the parent by allocating 3912 * a new amap. this is wrong because the 3913 * parent is supposed to be sharing the old amap 3914 * and the new amap will break that. 3915 * 3916 * 2. if the old entry has an amap and a non-zero 3917 * wire count then we are going to have to call 3918 * amap_cow_now to avoid page faults in the 3919 * parent process. since amap_cow_now requires 3920 * "needs_copy" to be clear we might as well 3921 * clear it here as well. 3922 * 3923 */ 3924 if (old_entry->aref.ar_amap != NULL && 3925 ((amap_flags(old_entry->aref.ar_amap) & 3926 AMAP_SHARED) != 0 || 3927 VM_MAPENT_ISWIRED(old_entry))) { 3928 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3929 0, 0); 3930 /* XXXCDC: M_WAITOK ... ok? */ 3931 } 3932 3933 /* 3934 * if the parent's entry is wired down, then the 3935 * parent process does not want page faults on 3936 * access to that memory. this means that we 3937 * cannot do copy-on-write because we can't write 3938 * protect the old entry. in this case we 3939 * resolve all copy-on-write faults now, using 3940 * amap_cow_now. note that we have already 3941 * allocated any needed amap (above). 3942 */ 3943 if (VM_MAPENT_ISWIRED(old_entry)) { 3944 /* 3945 * resolve all copy-on-write faults now 3946 * (note that there is nothing to do if 3947 * the old mapping does not have an amap). 3948 * XXX: is it worthwhile to bother with 3949 * pmap_copy in this case? 3950 */ 3951 if (old_entry->aref.ar_amap) 3952 amap_cow_now(new_map, new_entry); 3953 } else { 3954 if (old_entry->aref.ar_amap) { 3955 /* 3956 * setup mappings to trigger copy-on-write faults 3957 * we must write-protect the parent if it has 3958 * an amap and it is not already "needs_copy"... 3959 * if it is already "needs_copy" then the parent 3960 * has already been write-protected by a previous 3961 * fork operation. 3962 * 3963 * if we do not write-protect the parent, then 3964 * we must be sure to write-protect the child 3965 * after the pmap_copy() operation. 3966 * 3967 * XXX: pmap_copy should have some way of telling 3968 * us that it didn't do anything so we can avoid 3969 * calling pmap_protect needlessly. 3970 */ 3971 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3972 if (old_entry->max_protection & PROT_WRITE) { 3973 pmap_protect(old_map->pmap, 3974 old_entry->start, 3975 old_entry->end, 3976 old_entry->protection & 3977 ~PROT_WRITE); 3978 pmap_update(old_map->pmap); 3979 } 3980 old_entry->etype |= UVM_ET_NEEDSCOPY; 3981 } 3982 3983 /* parent must now be write-protected */ 3984 protect_child = FALSE; 3985 } else { 3986 /* 3987 * we only need to protect the child if the 3988 * parent has write access. 3989 */ 3990 if (old_entry->max_protection & PROT_WRITE) 3991 protect_child = TRUE; 3992 else 3993 protect_child = FALSE; 3994 } 3995 /* 3996 * copy the mappings 3997 * XXX: need a way to tell if this does anything 3998 */ 3999 if (!UVM_ET_ISHOLE(new_entry)) 4000 pmap_copy(new_map->pmap, old_map->pmap, 4001 new_entry->start, 4002 (old_entry->end - old_entry->start), 4003 old_entry->start); 4004 4005 /* protect the child's mappings if necessary */ 4006 if (protect_child) { 4007 pmap_protect(new_map->pmap, new_entry->start, 4008 new_entry->end, 4009 new_entry->protection & 4010 ~PROT_WRITE); 4011 } 4012 } 4013 4014 return (new_entry); 4015 } 4016 4017 /* 4018 * zero the mapping: the new entry will be zero initialized 4019 */ 4020 struct vm_map_entry * 4021 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 4022 struct vm_map *old_map, 4023 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 4024 { 4025 struct vm_map_entry *new_entry; 4026 4027 new_entry = uvm_mapent_clone(new_map, old_entry->start, 4028 old_entry->end - old_entry->start, 0, old_entry->protection, 4029 old_entry->max_protection, old_entry, dead, 0, 0); 4030 4031 new_entry->etype |= 4032 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 4033 4034 if (new_entry->aref.ar_amap) { 4035 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 4036 atop(new_entry->end - new_entry->start), 0); 4037 new_entry->aref.ar_amap = NULL; 4038 new_entry->aref.ar_pageoff = 0; 4039 } 4040 4041 if (UVM_ET_ISOBJ(new_entry)) { 4042 if (new_entry->object.uvm_obj->pgops->pgo_detach) 4043 new_entry->object.uvm_obj->pgops->pgo_detach( 4044 new_entry->object.uvm_obj); 4045 new_entry->object.uvm_obj = NULL; 4046 new_entry->etype &= ~UVM_ET_OBJ; 4047 } 4048 4049 return (new_entry); 4050 } 4051 4052 /* 4053 * uvmspace_fork: fork a process' main map 4054 * 4055 * => create a new vmspace for child process from parent. 4056 * => parent's map must not be locked. 4057 */ 4058 struct vmspace * 4059 uvmspace_fork(struct process *pr) 4060 { 4061 struct vmspace *vm1 = pr->ps_vmspace; 4062 struct vmspace *vm2; 4063 struct vm_map *old_map = &vm1->vm_map; 4064 struct vm_map *new_map; 4065 struct vm_map_entry *old_entry, *new_entry; 4066 struct uvm_map_deadq dead; 4067 4068 vm_map_lock(old_map); 4069 4070 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 4071 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 4072 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 4073 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 4074 vm2->vm_dused = 0; /* Statistic managed by us. */ 4075 new_map = &vm2->vm_map; 4076 vm_map_lock(new_map); 4077 4078 /* go entry-by-entry */ 4079 TAILQ_INIT(&dead); 4080 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 4081 if (old_entry->start == old_entry->end) 4082 continue; 4083 4084 /* first, some sanity checks on the old entry */ 4085 if (UVM_ET_ISSUBMAP(old_entry)) { 4086 panic("fork: encountered a submap during fork " 4087 "(illegal)"); 4088 } 4089 4090 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 4091 UVM_ET_ISNEEDSCOPY(old_entry)) { 4092 panic("fork: non-copy_on_write map entry marked " 4093 "needs_copy (illegal)"); 4094 } 4095 4096 /* Apply inheritance. */ 4097 switch (old_entry->inheritance) { 4098 case MAP_INHERIT_SHARE: 4099 new_entry = uvm_mapent_forkshared(vm2, new_map, 4100 old_map, old_entry, &dead); 4101 break; 4102 case MAP_INHERIT_COPY: 4103 new_entry = uvm_mapent_forkcopy(vm2, new_map, 4104 old_map, old_entry, &dead); 4105 break; 4106 case MAP_INHERIT_ZERO: 4107 new_entry = uvm_mapent_forkzero(vm2, new_map, 4108 old_map, old_entry, &dead); 4109 break; 4110 default: 4111 continue; 4112 } 4113 4114 /* Update process statistics. */ 4115 if (!UVM_ET_ISHOLE(new_entry)) 4116 new_map->size += new_entry->end - new_entry->start; 4117 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) && 4118 new_entry->protection != PROT_NONE) { 4119 vm2->vm_dused += uvmspace_dused( 4120 new_map, new_entry->start, new_entry->end); 4121 } 4122 } 4123 4124 vm_map_unlock(old_map); 4125 vm_map_unlock(new_map); 4126 4127 /* 4128 * This can actually happen, if multiple entries described a 4129 * space in which an entry was inherited. 4130 */ 4131 uvm_unmap_detach(&dead, 0); 4132 4133 #ifdef SYSVSHM 4134 if (vm1->vm_shm) 4135 shmfork(vm1, vm2); 4136 #endif 4137 4138 return vm2; 4139 } 4140 4141 /* 4142 * uvm_map_hint: return the beginning of the best area suitable for 4143 * creating a new mapping with "prot" protection. 4144 */ 4145 vaddr_t 4146 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr, 4147 vaddr_t maxaddr) 4148 { 4149 vaddr_t addr; 4150 vaddr_t spacing; 4151 4152 #ifdef __i386__ 4153 /* 4154 * If executable skip first two pages, otherwise start 4155 * after data + heap region. 4156 */ 4157 if ((prot & PROT_EXEC) != 0 && 4158 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 4159 addr = (PAGE_SIZE*2) + 4160 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 4161 return (round_page(addr)); 4162 } 4163 #endif 4164 4165 #if defined (__LP64__) 4166 spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4167 #else 4168 spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4169 #endif 4170 4171 /* 4172 * Start malloc/mmap after the brk. 4173 */ 4174 addr = (vaddr_t)vm->vm_daddr + BRKSIZ; 4175 addr = MAX(addr, minaddr); 4176 4177 if (addr < maxaddr) { 4178 while (spacing > maxaddr - addr) 4179 spacing >>= 1; 4180 } 4181 addr += arc4random() & spacing; 4182 return (round_page(addr)); 4183 } 4184 4185 /* 4186 * uvm_map_submap: punch down part of a map into a submap 4187 * 4188 * => only the kernel_map is allowed to be submapped 4189 * => the purpose of submapping is to break up the locking granularity 4190 * of a larger map 4191 * => the range specified must have been mapped previously with a uvm_map() 4192 * call [with uobj==NULL] to create a blank map entry in the main map. 4193 * [And it had better still be blank!] 4194 * => maps which contain submaps should never be copied or forked. 4195 * => to remove a submap, use uvm_unmap() on the main map 4196 * and then uvm_map_deallocate() the submap. 4197 * => main map must be unlocked. 4198 * => submap must have been init'd and have a zero reference count. 4199 * [need not be locked as we don't actually reference it] 4200 */ 4201 int 4202 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 4203 struct vm_map *submap) 4204 { 4205 struct vm_map_entry *entry; 4206 int result; 4207 4208 if (start > map->max_offset || end > map->max_offset || 4209 start < map->min_offset || end < map->min_offset) 4210 return EINVAL; 4211 4212 vm_map_lock(map); 4213 4214 if (uvm_map_lookup_entry(map, start, &entry)) { 4215 UVM_MAP_CLIP_START(map, entry, start); 4216 UVM_MAP_CLIP_END(map, entry, end); 4217 } else 4218 entry = NULL; 4219 4220 if (entry != NULL && 4221 entry->start == start && entry->end == end && 4222 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 4223 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 4224 entry->etype |= UVM_ET_SUBMAP; 4225 entry->object.sub_map = submap; 4226 entry->offset = 0; 4227 uvm_map_reference(submap); 4228 result = 0; 4229 } else 4230 result = EINVAL; 4231 4232 vm_map_unlock(map); 4233 return result; 4234 } 4235 4236 /* 4237 * uvm_map_checkprot: check protection in map 4238 * 4239 * => must allow specific protection in a fully allocated region. 4240 * => map mut be read or write locked by caller. 4241 */ 4242 boolean_t 4243 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 4244 vm_prot_t protection) 4245 { 4246 struct vm_map_entry *entry; 4247 4248 if (start < map->min_offset || end > map->max_offset || start > end) 4249 return FALSE; 4250 if (start == end) 4251 return TRUE; 4252 4253 /* 4254 * Iterate entries. 4255 */ 4256 for (entry = uvm_map_entrybyaddr(&map->addr, start); 4257 entry != NULL && entry->start < end; 4258 entry = RBT_NEXT(uvm_map_addr, entry)) { 4259 /* Fail if a hole is found. */ 4260 if (UVM_ET_ISHOLE(entry) || 4261 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 4262 return FALSE; 4263 4264 /* Check protection. */ 4265 if ((entry->protection & protection) != protection) 4266 return FALSE; 4267 } 4268 return TRUE; 4269 } 4270 4271 /* 4272 * uvm_map_create: create map 4273 */ 4274 vm_map_t 4275 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 4276 { 4277 vm_map_t map; 4278 4279 map = malloc(sizeof *map, M_VMMAP, M_WAITOK); 4280 uvm_map_setup(map, pmap, min, max, flags); 4281 return (map); 4282 } 4283 4284 /* 4285 * uvm_map_deallocate: drop reference to a map 4286 * 4287 * => caller must not lock map 4288 * => we will zap map if ref count goes to zero 4289 */ 4290 void 4291 uvm_map_deallocate(vm_map_t map) 4292 { 4293 int c; 4294 struct uvm_map_deadq dead; 4295 4296 c = atomic_dec_int_nv(&map->ref_count); 4297 if (c > 0) { 4298 return; 4299 } 4300 4301 /* 4302 * all references gone. unmap and free. 4303 * 4304 * No lock required: we are only one to access this map. 4305 */ 4306 TAILQ_INIT(&dead); 4307 uvm_tree_sanity(map, __FILE__, __LINE__); 4308 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 4309 TRUE, FALSE); 4310 pmap_destroy(map->pmap); 4311 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 4312 free(map, M_VMMAP, sizeof *map); 4313 4314 uvm_unmap_detach(&dead, 0); 4315 } 4316 4317 /* 4318 * uvm_map_inherit: set inheritance code for range of addrs in map. 4319 * 4320 * => map must be unlocked 4321 * => note that the inherit code is used during a "fork". see fork 4322 * code for details. 4323 */ 4324 int 4325 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 4326 vm_inherit_t new_inheritance) 4327 { 4328 struct vm_map_entry *entry; 4329 4330 switch (new_inheritance) { 4331 case MAP_INHERIT_NONE: 4332 case MAP_INHERIT_COPY: 4333 case MAP_INHERIT_SHARE: 4334 case MAP_INHERIT_ZERO: 4335 break; 4336 default: 4337 return (EINVAL); 4338 } 4339 4340 if (start > end) 4341 return EINVAL; 4342 start = MAX(start, map->min_offset); 4343 end = MIN(end, map->max_offset); 4344 if (start >= end) 4345 return 0; 4346 4347 vm_map_lock(map); 4348 4349 entry = uvm_map_entrybyaddr(&map->addr, start); 4350 if (entry->end > start) 4351 UVM_MAP_CLIP_START(map, entry, start); 4352 else 4353 entry = RBT_NEXT(uvm_map_addr, entry); 4354 4355 while (entry != NULL && entry->start < end) { 4356 UVM_MAP_CLIP_END(map, entry, end); 4357 entry->inheritance = new_inheritance; 4358 entry = RBT_NEXT(uvm_map_addr, entry); 4359 } 4360 4361 vm_map_unlock(map); 4362 return (0); 4363 } 4364 4365 /* 4366 * uvm_map_syscall: permit system calls for range of addrs in map. 4367 * 4368 * => map must be unlocked 4369 */ 4370 int 4371 uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end) 4372 { 4373 struct vm_map_entry *entry; 4374 4375 if (start > end) 4376 return EINVAL; 4377 start = MAX(start, map->min_offset); 4378 end = MIN(end, map->max_offset); 4379 if (start >= end) 4380 return 0; 4381 if (map->flags & VM_MAP_SYSCALL_ONCE) /* only allowed once */ 4382 return (EPERM); 4383 4384 vm_map_lock(map); 4385 4386 entry = uvm_map_entrybyaddr(&map->addr, start); 4387 if (entry->end > start) 4388 UVM_MAP_CLIP_START(map, entry, start); 4389 else 4390 entry = RBT_NEXT(uvm_map_addr, entry); 4391 4392 while (entry != NULL && entry->start < end) { 4393 UVM_MAP_CLIP_END(map, entry, end); 4394 entry->etype |= UVM_ET_SYSCALL; 4395 entry = RBT_NEXT(uvm_map_addr, entry); 4396 } 4397 4398 map->wserial++; 4399 map->flags |= VM_MAP_SYSCALL_ONCE; 4400 vm_map_unlock(map); 4401 return (0); 4402 } 4403 4404 /* 4405 * uvm_map_advice: set advice code for range of addrs in map. 4406 * 4407 * => map must be unlocked 4408 */ 4409 int 4410 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 4411 { 4412 struct vm_map_entry *entry; 4413 4414 switch (new_advice) { 4415 case MADV_NORMAL: 4416 case MADV_RANDOM: 4417 case MADV_SEQUENTIAL: 4418 break; 4419 default: 4420 return (EINVAL); 4421 } 4422 4423 if (start > end) 4424 return EINVAL; 4425 start = MAX(start, map->min_offset); 4426 end = MIN(end, map->max_offset); 4427 if (start >= end) 4428 return 0; 4429 4430 vm_map_lock(map); 4431 4432 entry = uvm_map_entrybyaddr(&map->addr, start); 4433 if (entry != NULL && entry->end > start) 4434 UVM_MAP_CLIP_START(map, entry, start); 4435 else if (entry!= NULL) 4436 entry = RBT_NEXT(uvm_map_addr, entry); 4437 4438 /* 4439 * XXXJRT: disallow holes? 4440 */ 4441 while (entry != NULL && entry->start < end) { 4442 UVM_MAP_CLIP_END(map, entry, end); 4443 entry->advice = new_advice; 4444 entry = RBT_NEXT(uvm_map_addr, entry); 4445 } 4446 4447 vm_map_unlock(map); 4448 return (0); 4449 } 4450 4451 /* 4452 * uvm_map_extract: extract a mapping from a map and put it somewhere 4453 * in the kernel_map, setting protection to max_prot. 4454 * 4455 * => map should be unlocked (we will write lock it and kernel_map) 4456 * => returns 0 on success, error code otherwise 4457 * => start must be page aligned 4458 * => len must be page sized 4459 * => flags: 4460 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 4461 * Mappings are QREF's. 4462 */ 4463 int 4464 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 4465 vaddr_t *dstaddrp, int flags) 4466 { 4467 struct uvm_map_deadq dead; 4468 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 4469 vaddr_t dstaddr; 4470 vaddr_t end; 4471 vaddr_t cp_start; 4472 vsize_t cp_len, cp_off; 4473 int error; 4474 4475 TAILQ_INIT(&dead); 4476 end = start + len; 4477 4478 /* 4479 * Sanity check on the parameters. 4480 * Also, since the mapping may not contain gaps, error out if the 4481 * mapped area is not in source map. 4482 */ 4483 if ((start & (vaddr_t)PAGE_MASK) != 0 || 4484 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 4485 return EINVAL; 4486 if (start < srcmap->min_offset || end > srcmap->max_offset) 4487 return EINVAL; 4488 4489 /* Initialize dead entries. Handle len == 0 case. */ 4490 if (len == 0) 4491 return 0; 4492 4493 /* Acquire lock on srcmap. */ 4494 vm_map_lock(srcmap); 4495 4496 /* Lock srcmap, lookup first and last entry in <start,len>. */ 4497 first = uvm_map_entrybyaddr(&srcmap->addr, start); 4498 4499 /* Check that the range is contiguous. */ 4500 for (entry = first; entry != NULL && entry->end < end; 4501 entry = RBT_NEXT(uvm_map_addr, entry)) { 4502 if (VMMAP_FREE_END(entry) != entry->end || 4503 UVM_ET_ISHOLE(entry)) { 4504 error = EINVAL; 4505 goto fail; 4506 } 4507 } 4508 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 4509 error = EINVAL; 4510 goto fail; 4511 } 4512 4513 /* 4514 * Handle need-copy flag. 4515 */ 4516 for (entry = first; entry != NULL && entry->start < end; 4517 entry = RBT_NEXT(uvm_map_addr, entry)) { 4518 if (UVM_ET_ISNEEDSCOPY(entry)) 4519 amap_copy(srcmap, entry, M_NOWAIT, 4520 UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end); 4521 if (UVM_ET_ISNEEDSCOPY(entry)) { 4522 /* 4523 * amap_copy failure 4524 */ 4525 error = ENOMEM; 4526 goto fail; 4527 } 4528 } 4529 4530 /* Lock destination map (kernel_map). */ 4531 vm_map_lock(kernel_map); 4532 4533 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 4534 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 4535 PROT_NONE, 0) != 0) { 4536 error = ENOMEM; 4537 goto fail2; 4538 } 4539 *dstaddrp = dstaddr; 4540 4541 /* 4542 * We now have srcmap and kernel_map locked. 4543 * dstaddr contains the destination offset in dstmap. 4544 */ 4545 /* step 1: start looping through map entries, performing extraction. */ 4546 for (entry = first; entry != NULL && entry->start < end; 4547 entry = RBT_NEXT(uvm_map_addr, entry)) { 4548 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 4549 if (UVM_ET_ISHOLE(entry)) 4550 continue; 4551 4552 /* Calculate uvm_mapent_clone parameters. */ 4553 cp_start = entry->start; 4554 if (cp_start < start) { 4555 cp_off = start - cp_start; 4556 cp_start = start; 4557 } else 4558 cp_off = 0; 4559 cp_len = MIN(entry->end, end) - cp_start; 4560 4561 newentry = uvm_mapent_clone(kernel_map, 4562 cp_start - start + dstaddr, cp_len, cp_off, 4563 entry->protection, entry->max_protection, 4564 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4565 if (newentry == NULL) { 4566 error = ENOMEM; 4567 goto fail2_unmap; 4568 } 4569 kernel_map->size += cp_len; 4570 if (flags & UVM_EXTRACT_FIXPROT) 4571 newentry->protection = newentry->max_protection; 4572 4573 /* 4574 * Step 2: perform pmap copy. 4575 * (Doing this in the loop saves one RB traversal.) 4576 */ 4577 pmap_copy(kernel_map->pmap, srcmap->pmap, 4578 cp_start - start + dstaddr, cp_len, cp_start); 4579 } 4580 pmap_update(kernel_map->pmap); 4581 4582 error = 0; 4583 4584 /* Unmap copied entries on failure. */ 4585 fail2_unmap: 4586 if (error) { 4587 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4588 FALSE, TRUE); 4589 } 4590 4591 /* Release maps, release dead entries. */ 4592 fail2: 4593 vm_map_unlock(kernel_map); 4594 4595 fail: 4596 vm_map_unlock(srcmap); 4597 4598 uvm_unmap_detach(&dead, 0); 4599 4600 return error; 4601 } 4602 4603 /* 4604 * uvm_map_clean: clean out a map range 4605 * 4606 * => valid flags: 4607 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4608 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4609 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4610 * if (flags & PGO_FREE): any cached pages are freed after clean 4611 * => returns an error if any part of the specified range isn't mapped 4612 * => never a need to flush amap layer since the anonymous memory has 4613 * no permanent home, but may deactivate pages there 4614 * => called from sys_msync() and sys_madvise() 4615 * => caller must not write-lock map (read OK). 4616 * => we may sleep while cleaning if SYNCIO [with map read-locked] 4617 */ 4618 4619 int 4620 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4621 { 4622 struct vm_map_entry *first, *entry; 4623 struct vm_amap *amap; 4624 struct vm_anon *anon; 4625 struct vm_page *pg; 4626 struct uvm_object *uobj; 4627 vaddr_t cp_start, cp_end; 4628 int refs; 4629 int error; 4630 boolean_t rv; 4631 4632 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4633 (PGO_FREE|PGO_DEACTIVATE)); 4634 4635 if (start > end || start < map->min_offset || end > map->max_offset) 4636 return EINVAL; 4637 4638 vm_map_lock_read(map); 4639 first = uvm_map_entrybyaddr(&map->addr, start); 4640 4641 /* Make a first pass to check for holes. */ 4642 for (entry = first; entry != NULL && entry->start < end; 4643 entry = RBT_NEXT(uvm_map_addr, entry)) { 4644 if (UVM_ET_ISSUBMAP(entry)) { 4645 vm_map_unlock_read(map); 4646 return EINVAL; 4647 } 4648 if (UVM_ET_ISSUBMAP(entry) || 4649 UVM_ET_ISHOLE(entry) || 4650 (entry->end < end && 4651 VMMAP_FREE_END(entry) != entry->end)) { 4652 vm_map_unlock_read(map); 4653 return EFAULT; 4654 } 4655 } 4656 4657 error = 0; 4658 for (entry = first; entry != NULL && entry->start < end; 4659 entry = RBT_NEXT(uvm_map_addr, entry)) { 4660 amap = entry->aref.ar_amap; /* top layer */ 4661 if (UVM_ET_ISOBJ(entry)) 4662 uobj = entry->object.uvm_obj; 4663 else 4664 uobj = NULL; 4665 4666 /* 4667 * No amap cleaning necessary if: 4668 * - there's no amap 4669 * - we're not deactivating or freeing pages. 4670 */ 4671 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4672 goto flush_object; 4673 4674 cp_start = MAX(entry->start, start); 4675 cp_end = MIN(entry->end, end); 4676 4677 amap_lock(amap); 4678 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4679 anon = amap_lookup(&entry->aref, 4680 cp_start - entry->start); 4681 if (anon == NULL) 4682 continue; 4683 4684 KASSERT(anon->an_lock == amap->am_lock); 4685 pg = anon->an_page; 4686 if (pg == NULL) { 4687 continue; 4688 } 4689 KASSERT(pg->pg_flags & PQ_ANON); 4690 4691 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4692 /* 4693 * XXX In these first 3 cases, we always just 4694 * XXX deactivate the page. We may want to 4695 * XXX handle the different cases more 4696 * XXX specifically, in the future. 4697 */ 4698 case PGO_CLEANIT|PGO_FREE: 4699 case PGO_CLEANIT|PGO_DEACTIVATE: 4700 case PGO_DEACTIVATE: 4701 deactivate_it: 4702 /* skip the page if it's wired */ 4703 if (pg->wire_count != 0) 4704 break; 4705 4706 uvm_lock_pageq(); 4707 4708 KASSERT(pg->uanon == anon); 4709 4710 /* zap all mappings for the page. */ 4711 pmap_page_protect(pg, PROT_NONE); 4712 4713 /* ...and deactivate the page. */ 4714 uvm_pagedeactivate(pg); 4715 4716 uvm_unlock_pageq(); 4717 break; 4718 case PGO_FREE: 4719 /* 4720 * If there are multiple references to 4721 * the amap, just deactivate the page. 4722 */ 4723 if (amap_refs(amap) > 1) 4724 goto deactivate_it; 4725 4726 /* XXX skip the page if it's wired */ 4727 if (pg->wire_count != 0) { 4728 break; 4729 } 4730 amap_unadd(&entry->aref, 4731 cp_start - entry->start); 4732 refs = --anon->an_ref; 4733 if (refs == 0) 4734 uvm_anfree(anon); 4735 break; 4736 default: 4737 panic("uvm_map_clean: weird flags"); 4738 } 4739 } 4740 amap_unlock(amap); 4741 4742 flush_object: 4743 cp_start = MAX(entry->start, start); 4744 cp_end = MIN(entry->end, end); 4745 4746 /* 4747 * flush pages if we've got a valid backing object. 4748 * 4749 * Don't PGO_FREE if we don't have write permission 4750 * and don't flush if this is a copy-on-write object 4751 * since we can't know our permissions on it. 4752 */ 4753 if (uobj != NULL && 4754 ((flags & PGO_FREE) == 0 || 4755 ((entry->max_protection & PROT_WRITE) != 0 && 4756 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4757 rv = uobj->pgops->pgo_flush(uobj, 4758 cp_start - entry->start + entry->offset, 4759 cp_end - entry->start + entry->offset, flags); 4760 4761 if (rv == FALSE) 4762 error = EFAULT; 4763 } 4764 } 4765 4766 vm_map_unlock_read(map); 4767 return error; 4768 } 4769 4770 /* 4771 * UVM_MAP_CLIP_END implementation 4772 */ 4773 void 4774 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4775 { 4776 struct vm_map_entry *tmp; 4777 4778 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4779 tmp = uvm_mapent_alloc(map, 0); 4780 4781 /* Invoke splitentry. */ 4782 uvm_map_splitentry(map, entry, tmp, addr); 4783 } 4784 4785 /* 4786 * UVM_MAP_CLIP_START implementation 4787 * 4788 * Clippers are required to not change the pointers to the entry they are 4789 * clipping on. 4790 * Since uvm_map_splitentry turns the original entry into the lowest 4791 * entry (address wise) we do a swap between the new entry and the original 4792 * entry, prior to calling uvm_map_splitentry. 4793 */ 4794 void 4795 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4796 { 4797 struct vm_map_entry *tmp; 4798 struct uvm_addr_state *free; 4799 4800 /* Unlink original. */ 4801 free = uvm_map_uaddr_e(map, entry); 4802 uvm_mapent_free_remove(map, free, entry); 4803 uvm_mapent_addr_remove(map, entry); 4804 4805 /* Copy entry. */ 4806 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4807 tmp = uvm_mapent_alloc(map, 0); 4808 uvm_mapent_copy(entry, tmp); 4809 4810 /* Put new entry in place of original entry. */ 4811 uvm_mapent_addr_insert(map, tmp); 4812 uvm_mapent_free_insert(map, free, tmp); 4813 4814 /* Invoke splitentry. */ 4815 uvm_map_splitentry(map, tmp, entry, addr); 4816 } 4817 4818 /* 4819 * Boundary fixer. 4820 */ 4821 static inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4822 static inline vaddr_t 4823 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4824 { 4825 return (min < bound && max > bound) ? bound : max; 4826 } 4827 4828 /* 4829 * Choose free list based on address at start of free space. 4830 * 4831 * The uvm_addr_state returned contains addr and is the first of: 4832 * - uaddr_exe 4833 * - uaddr_brk_stack 4834 * - uaddr_any 4835 */ 4836 struct uvm_addr_state* 4837 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4838 { 4839 struct uvm_addr_state *uaddr; 4840 int i; 4841 4842 /* Special case the first page, to prevent mmap from returning 0. */ 4843 if (addr < VMMAP_MIN_ADDR) 4844 return NULL; 4845 4846 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4847 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4848 if (addr >= uvm_maxkaddr) 4849 return NULL; 4850 } 4851 4852 /* Is the address inside the exe-only map? */ 4853 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4854 addr < map->uaddr_exe->uaddr_maxaddr) 4855 return map->uaddr_exe; 4856 4857 /* Check if the space falls inside brk/stack area. */ 4858 if ((addr >= map->b_start && addr < map->b_end) || 4859 (addr >= map->s_start && addr < map->s_end)) { 4860 if (map->uaddr_brk_stack != NULL && 4861 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4862 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4863 return map->uaddr_brk_stack; 4864 } else 4865 return NULL; 4866 } 4867 4868 /* 4869 * Check the other selectors. 4870 * 4871 * These selectors are only marked as the owner, if they have insert 4872 * functions. 4873 */ 4874 for (i = 0; i < nitems(map->uaddr_any); i++) { 4875 uaddr = map->uaddr_any[i]; 4876 if (uaddr == NULL) 4877 continue; 4878 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4879 continue; 4880 4881 if (addr >= uaddr->uaddr_minaddr && 4882 addr < uaddr->uaddr_maxaddr) 4883 return uaddr; 4884 } 4885 4886 return NULL; 4887 } 4888 4889 /* 4890 * Choose free list based on address at start of free space. 4891 * 4892 * The uvm_addr_state returned contains addr and is the first of: 4893 * - uaddr_exe 4894 * - uaddr_brk_stack 4895 * - uaddr_any 4896 */ 4897 struct uvm_addr_state* 4898 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4899 { 4900 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4901 } 4902 4903 /* 4904 * Returns the first free-memory boundary that is crossed by [min-max]. 4905 */ 4906 vsize_t 4907 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4908 { 4909 struct uvm_addr_state *uaddr; 4910 int i; 4911 4912 /* Never return first page. */ 4913 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4914 4915 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4916 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4917 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4918 4919 /* Check for exe-only boundaries. */ 4920 if (map->uaddr_exe != NULL) { 4921 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4922 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4923 } 4924 4925 /* Check for exe-only boundaries. */ 4926 if (map->uaddr_brk_stack != NULL) { 4927 max = uvm_map_boundfix(min, max, 4928 map->uaddr_brk_stack->uaddr_minaddr); 4929 max = uvm_map_boundfix(min, max, 4930 map->uaddr_brk_stack->uaddr_maxaddr); 4931 } 4932 4933 /* Check other boundaries. */ 4934 for (i = 0; i < nitems(map->uaddr_any); i++) { 4935 uaddr = map->uaddr_any[i]; 4936 if (uaddr != NULL) { 4937 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4938 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4939 } 4940 } 4941 4942 /* Boundaries at stack and brk() area. */ 4943 max = uvm_map_boundfix(min, max, map->s_start); 4944 max = uvm_map_boundfix(min, max, map->s_end); 4945 max = uvm_map_boundfix(min, max, map->b_start); 4946 max = uvm_map_boundfix(min, max, map->b_end); 4947 4948 return max; 4949 } 4950 4951 /* 4952 * Update map allocation start and end addresses from proc vmspace. 4953 */ 4954 void 4955 uvm_map_vmspace_update(struct vm_map *map, 4956 struct uvm_map_deadq *dead, int flags) 4957 { 4958 struct vmspace *vm; 4959 vaddr_t b_start, b_end, s_start, s_end; 4960 4961 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4962 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4963 4964 /* 4965 * Derive actual allocation boundaries from vmspace. 4966 */ 4967 vm = (struct vmspace *)map; 4968 b_start = (vaddr_t)vm->vm_daddr; 4969 b_end = b_start + BRKSIZ; 4970 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4971 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4972 #ifdef DIAGNOSTIC 4973 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4974 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4975 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4976 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4977 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4978 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4979 vm, b_start, b_end, s_start, s_end); 4980 } 4981 #endif 4982 4983 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4984 map->s_start == s_start && map->s_end == s_end)) 4985 return; 4986 4987 uvm_map_freelist_update(map, dead, b_start, b_end, 4988 s_start, s_end, flags); 4989 } 4990 4991 /* 4992 * Grow kernel memory. 4993 * 4994 * This function is only called for kernel maps when an allocation fails. 4995 * 4996 * If the map has a gap that is large enough to accommodate alloc_sz, this 4997 * function will make sure map->free will include it. 4998 */ 4999 void 5000 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 5001 vsize_t alloc_sz, int flags) 5002 { 5003 vsize_t sz; 5004 vaddr_t end; 5005 struct vm_map_entry *entry; 5006 5007 /* Kernel memory only. */ 5008 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 5009 /* Destroy free list. */ 5010 uvm_map_freelist_update_clear(map, dead); 5011 5012 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 5013 if (map->flags & VM_MAP_GUARDPAGES) 5014 alloc_sz += PAGE_SIZE; 5015 5016 /* 5017 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 5018 * 5019 * Don't handle the case where the multiplication overflows: 5020 * if that happens, the allocation is probably too big anyway. 5021 */ 5022 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 5023 5024 /* 5025 * Walk forward until a gap large enough for alloc_sz shows up. 5026 * 5027 * We assume the kernel map has no boundaries. 5028 * uvm_maxkaddr may be zero. 5029 */ 5030 end = MAX(uvm_maxkaddr, map->min_offset); 5031 entry = uvm_map_entrybyaddr(&map->addr, end); 5032 while (entry && entry->fspace < alloc_sz) 5033 entry = RBT_NEXT(uvm_map_addr, entry); 5034 if (entry) { 5035 end = MAX(VMMAP_FREE_START(entry), end); 5036 end += MIN(sz, map->max_offset - end); 5037 } else 5038 end = map->max_offset; 5039 5040 /* Reserve pmap entries. */ 5041 #ifdef PMAP_GROWKERNEL 5042 uvm_maxkaddr = pmap_growkernel(end); 5043 #else 5044 uvm_maxkaddr = MAX(uvm_maxkaddr, end); 5045 #endif 5046 5047 /* Rebuild free list. */ 5048 uvm_map_freelist_update_refill(map, flags); 5049 } 5050 5051 /* 5052 * Freelist update subfunction: unlink all entries from freelists. 5053 */ 5054 void 5055 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 5056 { 5057 struct uvm_addr_state *free; 5058 struct vm_map_entry *entry, *prev, *next; 5059 5060 prev = NULL; 5061 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL; 5062 entry = next) { 5063 next = RBT_NEXT(uvm_map_addr, entry); 5064 5065 free = uvm_map_uaddr_e(map, entry); 5066 uvm_mapent_free_remove(map, free, entry); 5067 5068 if (prev != NULL && entry->start == entry->end) { 5069 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 5070 uvm_mapent_addr_remove(map, entry); 5071 DEAD_ENTRY_PUSH(dead, entry); 5072 } else 5073 prev = entry; 5074 } 5075 } 5076 5077 /* 5078 * Freelist update subfunction: refill the freelists with entries. 5079 */ 5080 void 5081 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 5082 { 5083 struct vm_map_entry *entry; 5084 vaddr_t min, max; 5085 5086 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5087 min = VMMAP_FREE_START(entry); 5088 max = VMMAP_FREE_END(entry); 5089 entry->fspace = 0; 5090 5091 entry = uvm_map_fix_space(map, entry, min, max, flags); 5092 } 5093 5094 uvm_tree_sanity(map, __FILE__, __LINE__); 5095 } 5096 5097 /* 5098 * Change {a,b}_{start,end} allocation ranges and associated free lists. 5099 */ 5100 void 5101 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 5102 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 5103 { 5104 KDASSERT(b_end >= b_start && s_end >= s_start); 5105 5106 /* Clear all free lists. */ 5107 uvm_map_freelist_update_clear(map, dead); 5108 5109 /* Apply new bounds. */ 5110 map->b_start = b_start; 5111 map->b_end = b_end; 5112 map->s_start = s_start; 5113 map->s_end = s_end; 5114 5115 /* Refill free lists. */ 5116 uvm_map_freelist_update_refill(map, flags); 5117 } 5118 5119 /* 5120 * Assign a uvm_addr_state to the specified pointer in vm_map. 5121 * 5122 * May sleep. 5123 */ 5124 void 5125 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 5126 struct uvm_addr_state *newval) 5127 { 5128 struct uvm_map_deadq dead; 5129 5130 /* Pointer which must be in this map. */ 5131 KASSERT(which != NULL); 5132 KASSERT((void*)map <= (void*)(which) && 5133 (void*)(which) < (void*)(map + 1)); 5134 5135 vm_map_lock(map); 5136 TAILQ_INIT(&dead); 5137 uvm_map_freelist_update_clear(map, &dead); 5138 5139 uvm_addr_destroy(*which); 5140 *which = newval; 5141 5142 uvm_map_freelist_update_refill(map, 0); 5143 vm_map_unlock(map); 5144 uvm_unmap_detach(&dead, 0); 5145 } 5146 5147 /* 5148 * Correct space insert. 5149 * 5150 * Entry must not be on any freelist. 5151 */ 5152 struct vm_map_entry* 5153 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 5154 vaddr_t min, vaddr_t max, int flags) 5155 { 5156 struct uvm_addr_state *free, *entfree; 5157 vaddr_t lmax; 5158 5159 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 5160 KDASSERT(min <= max); 5161 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 5162 min == map->min_offset); 5163 5164 /* 5165 * During the function, entfree will always point at the uaddr state 5166 * for entry. 5167 */ 5168 entfree = (entry == NULL ? NULL : 5169 uvm_map_uaddr_e(map, entry)); 5170 5171 while (min != max) { 5172 /* Claim guard page for entry. */ 5173 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 5174 VMMAP_FREE_END(entry) == entry->end && 5175 entry->start != entry->end) { 5176 if (max - min == 2 * PAGE_SIZE) { 5177 /* 5178 * If the free-space gap is exactly 2 pages, 5179 * we make the guard 2 pages instead of 1. 5180 * Because in a guarded map, an area needs 5181 * at least 2 pages to allocate from: 5182 * one page for the allocation and one for 5183 * the guard. 5184 */ 5185 entry->guard = 2 * PAGE_SIZE; 5186 min = max; 5187 } else { 5188 entry->guard = PAGE_SIZE; 5189 min += PAGE_SIZE; 5190 } 5191 continue; 5192 } 5193 5194 /* 5195 * Handle the case where entry has a 2-page guard, but the 5196 * space after entry is freed. 5197 */ 5198 if (entry != NULL && entry->fspace == 0 && 5199 entry->guard > PAGE_SIZE) { 5200 entry->guard = PAGE_SIZE; 5201 min = VMMAP_FREE_START(entry); 5202 } 5203 5204 lmax = uvm_map_boundary(map, min, max); 5205 free = uvm_map_uaddr(map, min); 5206 5207 /* 5208 * Entries are merged if they point at the same uvm_free(). 5209 * Exception to that rule: if min == uvm_maxkaddr, a new 5210 * entry is started regardless (otherwise the allocators 5211 * will get confused). 5212 */ 5213 if (entry != NULL && free == entfree && 5214 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 5215 min == uvm_maxkaddr)) { 5216 KDASSERT(VMMAP_FREE_END(entry) == min); 5217 entry->fspace += lmax - min; 5218 } else { 5219 /* 5220 * Commit entry to free list: it'll not be added to 5221 * anymore. 5222 * We'll start a new entry and add to that entry 5223 * instead. 5224 */ 5225 if (entry != NULL) 5226 uvm_mapent_free_insert(map, entfree, entry); 5227 5228 /* New entry for new uaddr. */ 5229 entry = uvm_mapent_alloc(map, flags); 5230 KDASSERT(entry != NULL); 5231 entry->end = entry->start = min; 5232 entry->guard = 0; 5233 entry->fspace = lmax - min; 5234 entry->object.uvm_obj = NULL; 5235 entry->offset = 0; 5236 entry->etype = 0; 5237 entry->protection = entry->max_protection = 0; 5238 entry->inheritance = 0; 5239 entry->wired_count = 0; 5240 entry->advice = 0; 5241 entry->aref.ar_pageoff = 0; 5242 entry->aref.ar_amap = NULL; 5243 uvm_mapent_addr_insert(map, entry); 5244 5245 entfree = free; 5246 } 5247 5248 min = lmax; 5249 } 5250 /* Finally put entry on the uaddr state. */ 5251 if (entry != NULL) 5252 uvm_mapent_free_insert(map, entfree, entry); 5253 5254 return entry; 5255 } 5256 5257 /* 5258 * MQuery style of allocation. 5259 * 5260 * This allocator searches forward until sufficient space is found to map 5261 * the given size. 5262 * 5263 * XXX: factor in offset (via pmap_prefer) and protection? 5264 */ 5265 int 5266 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 5267 int flags) 5268 { 5269 struct vm_map_entry *entry, *last; 5270 vaddr_t addr; 5271 vaddr_t tmp, pmap_align, pmap_offset; 5272 int error; 5273 5274 addr = *addr_p; 5275 vm_map_lock_read(map); 5276 5277 /* Configure pmap prefer. */ 5278 if (offset != UVM_UNKNOWN_OFFSET) { 5279 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 5280 pmap_offset = PMAP_PREFER_OFFSET(offset); 5281 } else { 5282 pmap_align = PAGE_SIZE; 5283 pmap_offset = 0; 5284 } 5285 5286 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 5287 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 5288 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5289 if (tmp < addr) 5290 tmp += pmap_align; 5291 addr = tmp; 5292 } 5293 5294 /* First, check if the requested range is fully available. */ 5295 entry = uvm_map_entrybyaddr(&map->addr, addr); 5296 last = NULL; 5297 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5298 error = 0; 5299 goto out; 5300 } 5301 if (flags & UVM_FLAG_FIXED) { 5302 error = EINVAL; 5303 goto out; 5304 } 5305 5306 error = ENOMEM; /* Default error from here. */ 5307 5308 /* 5309 * At this point, the memory at <addr, sz> is not available. 5310 * The reasons are: 5311 * [1] it's outside the map, 5312 * [2] it starts in used memory (and therefore needs to move 5313 * toward the first free page in entry), 5314 * [3] it starts in free memory but bumps into used memory. 5315 * 5316 * Note that for case [2], the forward moving is handled by the 5317 * for loop below. 5318 */ 5319 if (entry == NULL) { 5320 /* [1] Outside the map. */ 5321 if (addr >= map->max_offset) 5322 goto out; 5323 else 5324 entry = RBT_MIN(uvm_map_addr, &map->addr); 5325 } else if (VMMAP_FREE_START(entry) <= addr) { 5326 /* [3] Bumped into used memory. */ 5327 entry = RBT_NEXT(uvm_map_addr, entry); 5328 } 5329 5330 /* Test if the next entry is sufficient for the allocation. */ 5331 for (; entry != NULL; 5332 entry = RBT_NEXT(uvm_map_addr, entry)) { 5333 if (entry->fspace == 0) 5334 continue; 5335 addr = VMMAP_FREE_START(entry); 5336 5337 restart: /* Restart address checks on address change. */ 5338 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5339 if (tmp < addr) 5340 tmp += pmap_align; 5341 addr = tmp; 5342 if (addr >= VMMAP_FREE_END(entry)) 5343 continue; 5344 5345 /* Skip brk() allocation addresses. */ 5346 if (addr + sz > map->b_start && addr < map->b_end) { 5347 if (VMMAP_FREE_END(entry) > map->b_end) { 5348 addr = map->b_end; 5349 goto restart; 5350 } else 5351 continue; 5352 } 5353 /* Skip stack allocation addresses. */ 5354 if (addr + sz > map->s_start && addr < map->s_end) { 5355 if (VMMAP_FREE_END(entry) > map->s_end) { 5356 addr = map->s_end; 5357 goto restart; 5358 } else 5359 continue; 5360 } 5361 5362 last = NULL; 5363 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5364 error = 0; 5365 goto out; 5366 } 5367 } 5368 5369 out: 5370 vm_map_unlock_read(map); 5371 if (error == 0) 5372 *addr_p = addr; 5373 return error; 5374 } 5375 5376 boolean_t 5377 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 5378 { 5379 boolean_t rv; 5380 5381 if (map->flags & VM_MAP_INTRSAFE) { 5382 rv = mtx_enter_try(&map->mtx); 5383 } else { 5384 mtx_enter(&map->flags_lock); 5385 if (map->flags & VM_MAP_BUSY) { 5386 mtx_leave(&map->flags_lock); 5387 return (FALSE); 5388 } 5389 mtx_leave(&map->flags_lock); 5390 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); 5391 /* check if the lock is busy and back out if we won the race */ 5392 if (rv) { 5393 mtx_enter(&map->flags_lock); 5394 if (map->flags & VM_MAP_BUSY) { 5395 rw_exit(&map->lock); 5396 rv = FALSE; 5397 } 5398 mtx_leave(&map->flags_lock); 5399 } 5400 } 5401 5402 if (rv) { 5403 map->timestamp++; 5404 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5405 uvm_tree_sanity(map, file, line); 5406 uvm_tree_size_chk(map, file, line); 5407 } 5408 5409 return (rv); 5410 } 5411 5412 void 5413 vm_map_lock_ln(struct vm_map *map, char *file, int line) 5414 { 5415 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5416 do { 5417 mtx_enter(&map->flags_lock); 5418 tryagain: 5419 while (map->flags & VM_MAP_BUSY) { 5420 map->flags |= VM_MAP_WANTLOCK; 5421 msleep_nsec(&map->flags, &map->flags_lock, 5422 PVM, vmmapbsy, INFSLP); 5423 } 5424 mtx_leave(&map->flags_lock); 5425 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); 5426 /* check if the lock is busy and back out if we won the race */ 5427 mtx_enter(&map->flags_lock); 5428 if (map->flags & VM_MAP_BUSY) { 5429 rw_exit(&map->lock); 5430 goto tryagain; 5431 } 5432 mtx_leave(&map->flags_lock); 5433 } else { 5434 mtx_enter(&map->mtx); 5435 } 5436 5437 map->timestamp++; 5438 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5439 uvm_tree_sanity(map, file, line); 5440 uvm_tree_size_chk(map, file, line); 5441 } 5442 5443 void 5444 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 5445 { 5446 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5447 rw_enter_read(&map->lock); 5448 else 5449 mtx_enter(&map->mtx); 5450 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5451 uvm_tree_sanity(map, file, line); 5452 uvm_tree_size_chk(map, file, line); 5453 } 5454 5455 void 5456 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 5457 { 5458 uvm_tree_sanity(map, file, line); 5459 uvm_tree_size_chk(map, file, line); 5460 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5461 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5462 rw_exit(&map->lock); 5463 else 5464 mtx_leave(&map->mtx); 5465 } 5466 5467 void 5468 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 5469 { 5470 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5471 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5472 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5473 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5474 rw_exit_read(&map->lock); 5475 else 5476 mtx_leave(&map->mtx); 5477 } 5478 5479 void 5480 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 5481 { 5482 uvm_tree_sanity(map, file, line); 5483 uvm_tree_size_chk(map, file, line); 5484 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5485 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5486 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5487 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5488 rw_enter(&map->lock, RW_DOWNGRADE); 5489 } 5490 5491 void 5492 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 5493 { 5494 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5495 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5496 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5497 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5498 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5499 rw_exit_read(&map->lock); 5500 rw_enter_write(&map->lock); 5501 } 5502 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5503 uvm_tree_sanity(map, file, line); 5504 } 5505 5506 void 5507 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5508 { 5509 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5510 mtx_enter(&map->flags_lock); 5511 map->flags |= VM_MAP_BUSY; 5512 mtx_leave(&map->flags_lock); 5513 } 5514 5515 void 5516 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5517 { 5518 int oflags; 5519 5520 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5521 mtx_enter(&map->flags_lock); 5522 oflags = map->flags; 5523 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5524 mtx_leave(&map->flags_lock); 5525 if (oflags & VM_MAP_WANTLOCK) 5526 wakeup(&map->flags); 5527 } 5528 5529 #ifndef SMALL_KERNEL 5530 int 5531 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, 5532 size_t *lenp) 5533 { 5534 struct vm_map_entry *entry; 5535 vaddr_t start; 5536 int cnt, maxcnt, error = 0; 5537 5538 KASSERT(*lenp > 0); 5539 KASSERT((*lenp % sizeof(*kve)) == 0); 5540 cnt = 0; 5541 maxcnt = *lenp / sizeof(*kve); 5542 KASSERT(maxcnt > 0); 5543 5544 /* 5545 * Return only entries whose address is above the given base 5546 * address. This allows userland to iterate without knowing the 5547 * number of entries beforehand. 5548 */ 5549 start = (vaddr_t)kve[0].kve_start; 5550 5551 vm_map_lock(map); 5552 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5553 if (cnt == maxcnt) { 5554 error = ENOMEM; 5555 break; 5556 } 5557 if (start != 0 && entry->start < start) 5558 continue; 5559 kve->kve_start = entry->start; 5560 kve->kve_end = entry->end; 5561 kve->kve_guard = entry->guard; 5562 kve->kve_fspace = entry->fspace; 5563 kve->kve_fspace_augment = entry->fspace_augment; 5564 kve->kve_offset = entry->offset; 5565 kve->kve_wired_count = entry->wired_count; 5566 kve->kve_etype = entry->etype; 5567 kve->kve_protection = entry->protection; 5568 kve->kve_max_protection = entry->max_protection; 5569 kve->kve_advice = entry->advice; 5570 kve->kve_inheritance = entry->inheritance; 5571 kve->kve_flags = entry->flags; 5572 kve++; 5573 cnt++; 5574 } 5575 vm_map_unlock(map); 5576 5577 KASSERT(cnt <= maxcnt); 5578 5579 *lenp = sizeof(*kve) * cnt; 5580 return error; 5581 } 5582 #endif 5583 5584 5585 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5586 uvm_mapentry_addrcmp, uvm_map_addr_augment); 5587 5588 5589 /* 5590 * MD code: vmspace allocator setup. 5591 */ 5592 5593 #ifdef __i386__ 5594 void 5595 uvm_map_setup_md(struct vm_map *map) 5596 { 5597 vaddr_t min, max; 5598 5599 min = map->min_offset; 5600 max = map->max_offset; 5601 5602 /* 5603 * Ensure the selectors will not try to manage page 0; 5604 * it's too special. 5605 */ 5606 if (min < VMMAP_MIN_ADDR) 5607 min = VMMAP_MIN_ADDR; 5608 5609 #if 0 /* Cool stuff, not yet */ 5610 /* Executable code is special. */ 5611 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5612 /* Place normal allocations beyond executable mappings. */ 5613 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5614 #else /* Crappy stuff, for now */ 5615 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5616 #endif 5617 5618 #ifndef SMALL_KERNEL 5619 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5620 #endif /* !SMALL_KERNEL */ 5621 } 5622 #elif __LP64__ 5623 void 5624 uvm_map_setup_md(struct vm_map *map) 5625 { 5626 vaddr_t min, max; 5627 5628 min = map->min_offset; 5629 max = map->max_offset; 5630 5631 /* 5632 * Ensure the selectors will not try to manage page 0; 5633 * it's too special. 5634 */ 5635 if (min < VMMAP_MIN_ADDR) 5636 min = VMMAP_MIN_ADDR; 5637 5638 #if 0 /* Cool stuff, not yet */ 5639 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5640 #else /* Crappy stuff, for now */ 5641 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5642 #endif 5643 5644 #ifndef SMALL_KERNEL 5645 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5646 #endif /* !SMALL_KERNEL */ 5647 } 5648 #else /* non-i386, 32 bit */ 5649 void 5650 uvm_map_setup_md(struct vm_map *map) 5651 { 5652 vaddr_t min, max; 5653 5654 min = map->min_offset; 5655 max = map->max_offset; 5656 5657 /* 5658 * Ensure the selectors will not try to manage page 0; 5659 * it's too special. 5660 */ 5661 if (min < VMMAP_MIN_ADDR) 5662 min = VMMAP_MIN_ADDR; 5663 5664 #if 0 /* Cool stuff, not yet */ 5665 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5666 #else /* Crappy stuff, for now */ 5667 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5668 #endif 5669 5670 #ifndef SMALL_KERNEL 5671 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5672 #endif /* !SMALL_KERNEL */ 5673 } 5674 #endif 5675