1 /* $OpenBSD: uvm_map.c,v 1.263 2020/03/04 21:15:38 kettenis Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/acct.h> 90 #include <sys/mman.h> 91 #include <sys/proc.h> 92 #include <sys/malloc.h> 93 #include <sys/pool.h> 94 #include <sys/sysctl.h> 95 #include <sys/signalvar.h> 96 #include <sys/syslog.h> 97 #include <sys/user.h> 98 99 #ifdef SYSVSHM 100 #include <sys/shm.h> 101 #endif 102 103 #include <uvm/uvm.h> 104 105 #ifdef DDB 106 #include <uvm/uvm_ddb.h> 107 #endif 108 109 #include <uvm/uvm_addr.h> 110 111 112 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 113 int uvm_mapent_isjoinable(struct vm_map*, 114 struct vm_map_entry*, struct vm_map_entry*); 115 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 116 struct vm_map_entry*, struct uvm_map_deadq*); 117 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 118 struct vm_map_entry*, struct uvm_map_deadq*); 119 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 120 struct vm_map_entry*, vaddr_t, vsize_t, int, 121 struct uvm_map_deadq*, struct vm_map_entry*); 122 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 123 void uvm_mapent_free(struct vm_map_entry*); 124 void uvm_unmap_kill_entry(struct vm_map*, 125 struct vm_map_entry*); 126 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); 127 void uvm_mapent_mkfree(struct vm_map*, 128 struct vm_map_entry*, struct vm_map_entry**, 129 struct uvm_map_deadq*, boolean_t); 130 void uvm_map_pageable_pgon(struct vm_map*, 131 struct vm_map_entry*, struct vm_map_entry*, 132 vaddr_t, vaddr_t); 133 int uvm_map_pageable_wire(struct vm_map*, 134 struct vm_map_entry*, struct vm_map_entry*, 135 vaddr_t, vaddr_t, int); 136 void uvm_map_setup_entries(struct vm_map*); 137 void uvm_map_setup_md(struct vm_map*); 138 void uvm_map_teardown(struct vm_map*); 139 void uvm_map_vmspace_update(struct vm_map*, 140 struct uvm_map_deadq*, int); 141 void uvm_map_kmem_grow(struct vm_map*, 142 struct uvm_map_deadq*, vsize_t, int); 143 void uvm_map_freelist_update_clear(struct vm_map*, 144 struct uvm_map_deadq*); 145 void uvm_map_freelist_update_refill(struct vm_map *, int); 146 void uvm_map_freelist_update(struct vm_map*, 147 struct uvm_map_deadq*, vaddr_t, vaddr_t, 148 vaddr_t, vaddr_t, int); 149 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 150 vaddr_t, vaddr_t, int); 151 int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, 152 struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, 153 int); 154 int uvm_map_findspace(struct vm_map*, 155 struct vm_map_entry**, struct vm_map_entry**, 156 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 157 vaddr_t); 158 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 159 void uvm_map_addr_augment(struct vm_map_entry*); 160 161 int uvm_map_inentry_recheck(u_long, vaddr_t, 162 struct p_inentry *); 163 boolean_t uvm_map_inentry_fix(struct proc *, struct p_inentry *, 164 vaddr_t, int (*)(vm_map_entry_t), u_long); 165 /* 166 * Tree management functions. 167 */ 168 169 static __inline void uvm_mapent_copy(struct vm_map_entry*, 170 struct vm_map_entry*); 171 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*, 172 const struct vm_map_entry*); 173 void uvm_mapent_free_insert(struct vm_map*, 174 struct uvm_addr_state*, struct vm_map_entry*); 175 void uvm_mapent_free_remove(struct vm_map*, 176 struct uvm_addr_state*, struct vm_map_entry*); 177 void uvm_mapent_addr_insert(struct vm_map*, 178 struct vm_map_entry*); 179 void uvm_mapent_addr_remove(struct vm_map*, 180 struct vm_map_entry*); 181 void uvm_map_splitentry(struct vm_map*, 182 struct vm_map_entry*, struct vm_map_entry*, 183 vaddr_t); 184 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 185 int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*); 186 187 /* 188 * uvm_vmspace_fork helper functions. 189 */ 190 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 191 vsize_t, vm_prot_t, vm_prot_t, 192 struct vm_map_entry*, struct uvm_map_deadq*, int, 193 int); 194 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t, 195 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*, 196 struct vm_map_entry*, struct uvm_map_deadq*); 197 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 198 struct vm_map*, struct vm_map_entry*, 199 struct uvm_map_deadq*); 200 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 201 struct vm_map*, struct vm_map_entry*, 202 struct uvm_map_deadq*); 203 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 204 struct vm_map*, struct vm_map_entry*, 205 struct uvm_map_deadq*); 206 207 /* 208 * Tree validation. 209 */ 210 #ifdef VMMAP_DEBUG 211 void uvm_tree_assert(struct vm_map*, int, char*, 212 char*, int); 213 #define UVM_ASSERT(map, cond, file, line) \ 214 uvm_tree_assert((map), (cond), #cond, (file), (line)) 215 void uvm_tree_sanity(struct vm_map*, char*, int); 216 void uvm_tree_size_chk(struct vm_map*, char*, int); 217 void vmspace_validate(struct vm_map*); 218 #else 219 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 220 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 221 #define vmspace_validate(_map) do {} while (0) 222 #endif 223 224 /* 225 * All architectures will have pmap_prefer. 226 */ 227 #ifndef PMAP_PREFER 228 #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE 229 #define PMAP_PREFER_OFFSET(off) 0 230 #define PMAP_PREFER(addr, off) (addr) 231 #endif 232 233 /* 234 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 235 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 236 * 237 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 238 * each time. 239 */ 240 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 241 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 242 #define VM_MAP_KSIZE_ALLOCMUL 4 243 /* 244 * When selecting a random free-space block, look at most FSPACE_DELTA blocks 245 * ahead. 246 */ 247 #define FSPACE_DELTA 8 248 /* 249 * Put allocations adjecent to previous allocations when the free-space tree 250 * is larger than FSPACE_COMPACT entries. 251 * 252 * Alignment and PMAP_PREFER may still cause the entry to not be fully 253 * adjecent. Note that this strategy reduces memory fragmentation (by leaving 254 * a large space before or after the allocation). 255 */ 256 #define FSPACE_COMPACT 128 257 /* 258 * Make the address selection skip at most this many bytes from the start of 259 * the free space in which the allocation takes place. 260 * 261 * The main idea behind a randomized address space is that an attacker cannot 262 * know where to target his attack. Therefore, the location of objects must be 263 * as random as possible. However, the goal is not to create the most sparse 264 * map that is possible. 265 * FSPACE_MAXOFF pushes the considered range in bytes down to less insane 266 * sizes, thereby reducing the sparseness. The biggest randomization comes 267 * from fragmentation, i.e. FSPACE_COMPACT. 268 */ 269 #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) 270 /* 271 * Allow for small gaps in the overflow areas. 272 * Gap size is in bytes and does not have to be a multiple of page-size. 273 */ 274 #define FSPACE_BIASGAP ((vaddr_t)32 * 1024) 275 276 /* auto-allocate address lower bound */ 277 #define VMMAP_MIN_ADDR PAGE_SIZE 278 279 280 #ifdef DEADBEEF0 281 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0) 282 #else 283 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0) 284 #endif 285 286 #ifdef DEBUG 287 int uvm_map_printlocks = 0; 288 289 #define LPRINTF(_args) \ 290 do { \ 291 if (uvm_map_printlocks) \ 292 printf _args; \ 293 } while (0) 294 #else 295 #define LPRINTF(_args) do {} while (0) 296 #endif 297 298 static struct mutex uvm_kmapent_mtx; 299 static struct timeval uvm_kmapent_last_warn_time; 300 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 301 302 const char vmmapbsy[] = "vmmapbsy"; 303 304 /* 305 * pool for vmspace structures. 306 */ 307 struct pool uvm_vmspace_pool; 308 309 /* 310 * pool for dynamically-allocated map entries. 311 */ 312 struct pool uvm_map_entry_pool; 313 struct pool uvm_map_entry_kmem_pool; 314 315 /* 316 * This global represents the end of the kernel virtual address 317 * space. If we want to exceed this, we must grow the kernel 318 * virtual address space dynamically. 319 * 320 * Note, this variable is locked by kernel_map's lock. 321 */ 322 vaddr_t uvm_maxkaddr; 323 324 /* 325 * Locking predicate. 326 */ 327 #define UVM_MAP_REQ_WRITE(_map) \ 328 do { \ 329 if ((_map)->ref_count > 0) { \ 330 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 331 rw_assert_wrlock(&(_map)->lock); \ 332 else \ 333 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \ 334 } \ 335 } while (0) 336 337 #define vm_map_modflags(map, set, clear) \ 338 do { \ 339 mtx_enter(&(map)->flags_lock); \ 340 (map)->flags = ((map)->flags | (set)) & ~(clear); \ 341 mtx_leave(&(map)->flags_lock); \ 342 } while (0) 343 344 345 /* 346 * Tree describing entries by address. 347 * 348 * Addresses are unique. 349 * Entries with start == end may only exist if they are the first entry 350 * (sorted by address) within a free-memory tree. 351 */ 352 353 static inline int 354 uvm_mapentry_addrcmp(const struct vm_map_entry *e1, 355 const struct vm_map_entry *e2) 356 { 357 return e1->start < e2->start ? -1 : e1->start > e2->start; 358 } 359 360 /* 361 * Copy mapentry. 362 */ 363 static __inline void 364 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 365 { 366 caddr_t csrc, cdst; 367 size_t sz; 368 369 csrc = (caddr_t)src; 370 cdst = (caddr_t)dst; 371 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 372 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 373 374 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 375 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 376 memcpy(cdst, csrc, sz); 377 } 378 379 /* 380 * Handle free-list insertion. 381 */ 382 void 383 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 384 struct vm_map_entry *entry) 385 { 386 const struct uvm_addr_functions *fun; 387 #ifdef VMMAP_DEBUG 388 vaddr_t min, max, bound; 389 #endif 390 391 #ifdef VMMAP_DEBUG 392 /* 393 * Boundary check. 394 * Boundaries are folded if they go on the same free list. 395 */ 396 min = VMMAP_FREE_START(entry); 397 max = VMMAP_FREE_END(entry); 398 399 while (min < max) { 400 bound = uvm_map_boundary(map, min, max); 401 KASSERT(uvm_map_uaddr(map, min) == uaddr); 402 min = bound; 403 } 404 #endif 405 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 406 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 407 408 UVM_MAP_REQ_WRITE(map); 409 410 /* Actual insert: forward to uaddr pointer. */ 411 if (uaddr != NULL) { 412 fun = uaddr->uaddr_functions; 413 KDASSERT(fun != NULL); 414 if (fun->uaddr_free_insert != NULL) 415 (*fun->uaddr_free_insert)(map, uaddr, entry); 416 entry->etype |= UVM_ET_FREEMAPPED; 417 } 418 419 /* Update fspace augmentation. */ 420 uvm_map_addr_augment(entry); 421 } 422 423 /* 424 * Handle free-list removal. 425 */ 426 void 427 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 428 struct vm_map_entry *entry) 429 { 430 const struct uvm_addr_functions *fun; 431 432 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 433 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 434 UVM_MAP_REQ_WRITE(map); 435 436 if (uaddr != NULL) { 437 fun = uaddr->uaddr_functions; 438 if (fun->uaddr_free_remove != NULL) 439 (*fun->uaddr_free_remove)(map, uaddr, entry); 440 entry->etype &= ~UVM_ET_FREEMAPPED; 441 } 442 } 443 444 /* 445 * Handle address tree insertion. 446 */ 447 void 448 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 449 { 450 struct vm_map_entry *res; 451 452 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF)) 453 panic("uvm_mapent_addr_insert: entry still in addr list"); 454 KDASSERT(entry->start <= entry->end); 455 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 456 (entry->end & (vaddr_t)PAGE_MASK) == 0); 457 458 UVM_MAP_REQ_WRITE(map); 459 res = RBT_INSERT(uvm_map_addr, &map->addr, entry); 460 if (res != NULL) { 461 panic("uvm_mapent_addr_insert: map %p entry %p " 462 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 463 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 464 map, entry, 465 entry->start, entry->end, entry->guard, entry->fspace, 466 res, res->start, res->end, res->guard, res->fspace); 467 } 468 } 469 470 /* 471 * Handle address tree removal. 472 */ 473 void 474 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 475 { 476 struct vm_map_entry *res; 477 478 UVM_MAP_REQ_WRITE(map); 479 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry); 480 if (res != entry) 481 panic("uvm_mapent_addr_remove"); 482 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF); 483 } 484 485 /* 486 * uvm_map_reference: add reference to a map 487 * 488 * XXX check map reference counter lock 489 */ 490 #define uvm_map_reference(_map) \ 491 do { \ 492 map->ref_count++; \ 493 } while (0) 494 495 /* 496 * Calculate the dused delta. 497 */ 498 vsize_t 499 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 500 { 501 struct vmspace *vm; 502 vsize_t sz; 503 vaddr_t lmax; 504 vaddr_t stack_begin, stack_end; /* Position of stack. */ 505 506 KASSERT(map->flags & VM_MAP_ISVMSPACE); 507 vm = (struct vmspace *)map; 508 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 509 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 510 511 sz = 0; 512 while (min != max) { 513 lmax = max; 514 if (min < stack_begin && lmax > stack_begin) 515 lmax = stack_begin; 516 else if (min < stack_end && lmax > stack_end) 517 lmax = stack_end; 518 519 if (min >= stack_begin && min < stack_end) { 520 /* nothing */ 521 } else 522 sz += lmax - min; 523 min = lmax; 524 } 525 526 return sz >> PAGE_SHIFT; 527 } 528 529 /* 530 * Find the entry describing the given address. 531 */ 532 struct vm_map_entry* 533 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 534 { 535 struct vm_map_entry *iter; 536 537 iter = RBT_ROOT(uvm_map_addr, atree); 538 while (iter != NULL) { 539 if (iter->start > addr) 540 iter = RBT_LEFT(uvm_map_addr, iter); 541 else if (VMMAP_FREE_END(iter) <= addr) 542 iter = RBT_RIGHT(uvm_map_addr, iter); 543 else 544 return iter; 545 } 546 return NULL; 547 } 548 549 /* 550 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 551 * 552 * Push dead entries into a linked list. 553 * Since the linked list abuses the address tree for storage, the entry 554 * may not be linked in a map. 555 * 556 * *head must be initialized to NULL before the first call to this macro. 557 * uvm_unmap_detach(*head, 0) will remove dead entries. 558 */ 559 static __inline void 560 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 561 { 562 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 563 } 564 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 565 dead_entry_push((_headptr), (_entry)) 566 567 /* 568 * Helper function for uvm_map_findspace_tree. 569 * 570 * Given allocation constraints and pmap constraints, finds the 571 * lowest and highest address in a range that can be used for the 572 * allocation. 573 * 574 * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. 575 * 576 * 577 * Big chunk of math with a seasoning of dragons. 578 */ 579 int 580 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, 581 struct vm_map_entry *sel, vaddr_t align, 582 vaddr_t pmap_align, vaddr_t pmap_off, int bias) 583 { 584 vaddr_t sel_min, sel_max; 585 #ifdef PMAP_PREFER 586 vaddr_t pmap_min, pmap_max; 587 #endif /* PMAP_PREFER */ 588 #ifdef DIAGNOSTIC 589 int bad; 590 #endif /* DIAGNOSTIC */ 591 592 sel_min = VMMAP_FREE_START(sel); 593 sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); 594 595 #ifdef PMAP_PREFER 596 597 /* 598 * There are two special cases, in which we can satisfy the align 599 * requirement and the pmap_prefer requirement. 600 * - when pmap_off == 0, we always select the largest of the two 601 * - when pmap_off % align == 0 and pmap_align > align, we simply 602 * satisfy the pmap_align requirement and automatically 603 * satisfy the align requirement. 604 */ 605 if (align > PAGE_SIZE && 606 !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { 607 /* 608 * Simple case: only use align. 609 */ 610 sel_min = roundup(sel_min, align); 611 sel_max &= ~(align - 1); 612 613 if (sel_min > sel_max) 614 return ENOMEM; 615 616 /* Correct for bias. */ 617 if (sel_max - sel_min > FSPACE_BIASGAP) { 618 if (bias > 0) { 619 sel_min = sel_max - FSPACE_BIASGAP; 620 sel_min = roundup(sel_min, align); 621 } else if (bias < 0) { 622 sel_max = sel_min + FSPACE_BIASGAP; 623 sel_max &= ~(align - 1); 624 } 625 } 626 } else if (pmap_align != 0) { 627 /* 628 * Special case: satisfy both pmap_prefer and 629 * align argument. 630 */ 631 pmap_max = sel_max & ~(pmap_align - 1); 632 pmap_min = sel_min; 633 if (pmap_max < sel_min) 634 return ENOMEM; 635 636 /* Adjust pmap_min for BIASGAP for top-addr bias. */ 637 if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) 638 pmap_min = pmap_max - FSPACE_BIASGAP; 639 /* Align pmap_min. */ 640 pmap_min &= ~(pmap_align - 1); 641 if (pmap_min < sel_min) 642 pmap_min += pmap_align; 643 if (pmap_min > pmap_max) 644 return ENOMEM; 645 646 /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ 647 if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { 648 pmap_max = (pmap_min + FSPACE_BIASGAP) & 649 ~(pmap_align - 1); 650 } 651 if (pmap_min > pmap_max) 652 return ENOMEM; 653 654 /* Apply pmap prefer offset. */ 655 pmap_max |= pmap_off; 656 if (pmap_max > sel_max) 657 pmap_max -= pmap_align; 658 pmap_min |= pmap_off; 659 if (pmap_min < sel_min) 660 pmap_min += pmap_align; 661 662 /* 663 * Fixup: it's possible that pmap_min and pmap_max 664 * cross eachother. In this case, try to find one 665 * address that is allowed. 666 * (This usually happens in biased case.) 667 */ 668 if (pmap_min > pmap_max) { 669 if (pmap_min < sel_max) 670 pmap_max = pmap_min; 671 else if (pmap_max > sel_min) 672 pmap_min = pmap_max; 673 else 674 return ENOMEM; 675 } 676 677 /* Internal validation. */ 678 KDASSERT(pmap_min <= pmap_max); 679 680 sel_min = pmap_min; 681 sel_max = pmap_max; 682 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 683 sel_min = sel_max - FSPACE_BIASGAP; 684 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 685 sel_max = sel_min + FSPACE_BIASGAP; 686 687 #else 688 689 if (align > PAGE_SIZE) { 690 sel_min = roundup(sel_min, align); 691 sel_max &= ~(align - 1); 692 if (sel_min > sel_max) 693 return ENOMEM; 694 695 if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { 696 if (bias > 0) { 697 sel_min = roundup(sel_max - FSPACE_BIASGAP, 698 align); 699 } else { 700 sel_max = (sel_min + FSPACE_BIASGAP) & 701 ~(align - 1); 702 } 703 } 704 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 705 sel_min = sel_max - FSPACE_BIASGAP; 706 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 707 sel_max = sel_min + FSPACE_BIASGAP; 708 709 #endif 710 711 if (sel_min > sel_max) 712 return ENOMEM; 713 714 #ifdef DIAGNOSTIC 715 bad = 0; 716 /* Lower boundary check. */ 717 if (sel_min < VMMAP_FREE_START(sel)) { 718 printf("sel_min: 0x%lx, but should be at least 0x%lx\n", 719 sel_min, VMMAP_FREE_START(sel)); 720 bad++; 721 } 722 /* Upper boundary check. */ 723 if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { 724 printf("sel_max: 0x%lx, but should be at most 0x%lx\n", 725 sel_max, 726 VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); 727 bad++; 728 } 729 /* Lower boundary alignment. */ 730 if (align != 0 && (sel_min & (align - 1)) != 0) { 731 printf("sel_min: 0x%lx, not aligned to 0x%lx\n", 732 sel_min, align); 733 bad++; 734 } 735 /* Upper boundary alignment. */ 736 if (align != 0 && (sel_max & (align - 1)) != 0) { 737 printf("sel_max: 0x%lx, not aligned to 0x%lx\n", 738 sel_max, align); 739 bad++; 740 } 741 /* Lower boundary PMAP_PREFER check. */ 742 if (pmap_align != 0 && align == 0 && 743 (sel_min & (pmap_align - 1)) != pmap_off) { 744 printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 745 sel_min, sel_min & (pmap_align - 1), pmap_off); 746 bad++; 747 } 748 /* Upper boundary PMAP_PREFER check. */ 749 if (pmap_align != 0 && align == 0 && 750 (sel_max & (pmap_align - 1)) != pmap_off) { 751 printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 752 sel_max, sel_max & (pmap_align - 1), pmap_off); 753 bad++; 754 } 755 756 if (bad) { 757 panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " 758 "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " 759 "bias = %d, " 760 "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", 761 sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, 762 bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel)); 763 } 764 #endif /* DIAGNOSTIC */ 765 766 *min = sel_min; 767 *max = sel_max; 768 return 0; 769 } 770 771 /* 772 * Test if memory starting at addr with sz bytes is free. 773 * 774 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 775 * the space. 776 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 777 */ 778 int 779 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 780 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 781 vaddr_t addr, vsize_t sz) 782 { 783 struct uvm_addr_state *free; 784 struct uvm_map_addr *atree; 785 struct vm_map_entry *i, *i_end; 786 787 if (addr + sz < addr) 788 return 0; 789 790 /* 791 * Kernel memory above uvm_maxkaddr is considered unavailable. 792 */ 793 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 794 if (addr + sz > uvm_maxkaddr) 795 return 0; 796 } 797 798 atree = &map->addr; 799 800 /* 801 * Fill in first, last, so they point at the entries containing the 802 * first and last address of the range. 803 * Note that if they are not NULL, we don't perform the lookup. 804 */ 805 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 806 if (*start_ptr == NULL) { 807 *start_ptr = uvm_map_entrybyaddr(atree, addr); 808 if (*start_ptr == NULL) 809 return 0; 810 } else 811 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 812 if (*end_ptr == NULL) { 813 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 814 *end_ptr = *start_ptr; 815 else { 816 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 817 if (*end_ptr == NULL) 818 return 0; 819 } 820 } else 821 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 822 823 /* Validation. */ 824 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 825 KDASSERT((*start_ptr)->start <= addr && 826 VMMAP_FREE_END(*start_ptr) > addr && 827 (*end_ptr)->start < addr + sz && 828 VMMAP_FREE_END(*end_ptr) >= addr + sz); 829 830 /* 831 * Check the none of the entries intersects with <addr, addr+sz>. 832 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 833 * considered unavailable unless called by those allocators. 834 */ 835 i = *start_ptr; 836 i_end = RBT_NEXT(uvm_map_addr, *end_ptr); 837 for (; i != i_end; 838 i = RBT_NEXT(uvm_map_addr, i)) { 839 if (i->start != i->end && i->end > addr) 840 return 0; 841 842 /* 843 * uaddr_exe and uaddr_brk_stack may only be used 844 * by these allocators and the NULL uaddr (i.e. no 845 * uaddr). 846 * Reject if this requirement is not met. 847 */ 848 if (uaddr != NULL) { 849 free = uvm_map_uaddr_e(map, i); 850 851 if (uaddr != free && free != NULL && 852 (free == map->uaddr_exe || 853 free == map->uaddr_brk_stack)) 854 return 0; 855 } 856 } 857 858 return -1; 859 } 860 861 /* 862 * Invoke each address selector until an address is found. 863 * Will not invoke uaddr_exe. 864 */ 865 int 866 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 867 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 868 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 869 { 870 struct uvm_addr_state *uaddr; 871 int i; 872 873 /* 874 * Allocation for sz bytes at any address, 875 * using the addr selectors in order. 876 */ 877 for (i = 0; i < nitems(map->uaddr_any); i++) { 878 uaddr = map->uaddr_any[i]; 879 880 if (uvm_addr_invoke(map, uaddr, first, last, 881 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 882 return 0; 883 } 884 885 /* Fall back to brk() and stack() address selectors. */ 886 uaddr = map->uaddr_brk_stack; 887 if (uvm_addr_invoke(map, uaddr, first, last, 888 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 889 return 0; 890 891 return ENOMEM; 892 } 893 894 /* Calculate entry augmentation value. */ 895 vsize_t 896 uvm_map_addr_augment_get(struct vm_map_entry *entry) 897 { 898 vsize_t augment; 899 struct vm_map_entry *left, *right; 900 901 augment = entry->fspace; 902 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL) 903 augment = MAX(augment, left->fspace_augment); 904 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 905 augment = MAX(augment, right->fspace_augment); 906 return augment; 907 } 908 909 /* 910 * Update augmentation data in entry. 911 */ 912 void 913 uvm_map_addr_augment(struct vm_map_entry *entry) 914 { 915 vsize_t augment; 916 917 while (entry != NULL) { 918 /* Calculate value for augmentation. */ 919 augment = uvm_map_addr_augment_get(entry); 920 921 /* 922 * Descend update. 923 * Once we find an entry that already has the correct value, 924 * stop, since it means all its parents will use the correct 925 * value too. 926 */ 927 if (entry->fspace_augment == augment) 928 return; 929 entry->fspace_augment = augment; 930 entry = RBT_PARENT(uvm_map_addr, entry); 931 } 932 } 933 934 /* 935 * uvm_mapanon: establish a valid mapping in map for an anon 936 * 937 * => *addr and sz must be a multiple of PAGE_SIZE. 938 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 939 * => map must be unlocked. 940 * 941 * => align: align vaddr, must be a power-of-2. 942 * Align is only a hint and will be ignored if the alignment fails. 943 */ 944 int 945 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, 946 vsize_t align, unsigned int flags) 947 { 948 struct vm_map_entry *first, *last, *entry, *new; 949 struct uvm_map_deadq dead; 950 vm_prot_t prot; 951 vm_prot_t maxprot; 952 vm_inherit_t inherit; 953 int advice; 954 int error; 955 vaddr_t pmap_align, pmap_offset; 956 vaddr_t hint; 957 958 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE); 959 KASSERT(map != kernel_map); 960 KASSERT((map->flags & UVM_FLAG_HOLE) == 0); 961 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 962 splassert(IPL_NONE); 963 KASSERT((flags & UVM_FLAG_TRYLOCK) == 0); 964 965 /* 966 * We use pmap_align and pmap_offset as alignment and offset variables. 967 * 968 * Because the align parameter takes precedence over pmap prefer, 969 * the pmap_align will need to be set to align, with pmap_offset = 0, 970 * if pmap_prefer will not align. 971 */ 972 pmap_align = MAX(align, PAGE_SIZE); 973 pmap_offset = 0; 974 975 /* Decode parameters. */ 976 prot = UVM_PROTECTION(flags); 977 maxprot = UVM_MAXPROTECTION(flags); 978 advice = UVM_ADVICE(flags); 979 inherit = UVM_INHERIT(flags); 980 error = 0; 981 hint = trunc_page(*addr); 982 TAILQ_INIT(&dead); 983 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 984 KASSERT((align & (align - 1)) == 0); 985 986 /* Check protection. */ 987 if ((prot & maxprot) != prot) 988 return EACCES; 989 990 /* 991 * Before grabbing the lock, allocate a map entry for later 992 * use to ensure we don't wait for memory while holding the 993 * vm_map_lock. 994 */ 995 new = uvm_mapent_alloc(map, flags); 996 if (new == NULL) 997 return(ENOMEM); 998 999 vm_map_lock(map); 1000 first = last = NULL; 1001 if (flags & UVM_FLAG_FIXED) { 1002 /* 1003 * Fixed location. 1004 * 1005 * Note: we ignore align, pmap_prefer. 1006 * Fill in first, last and *addr. 1007 */ 1008 KASSERT((*addr & PAGE_MASK) == 0); 1009 1010 /* Check that the space is available. */ 1011 if (flags & UVM_FLAG_UNMAP) { 1012 if ((flags & UVM_FLAG_STACK) && 1013 !uvm_map_is_stack_remappable(map, *addr, sz)) { 1014 error = EINVAL; 1015 goto unlock; 1016 } 1017 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1018 } 1019 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1020 error = ENOMEM; 1021 goto unlock; 1022 } 1023 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1024 (align == 0 || (*addr & (align - 1)) == 0) && 1025 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1026 /* 1027 * Address used as hint. 1028 * 1029 * Note: we enforce the alignment restriction, 1030 * but ignore pmap_prefer. 1031 */ 1032 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1033 /* Run selection algorithm for executables. */ 1034 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1035 addr, sz, pmap_align, pmap_offset, prot, hint); 1036 1037 if (error != 0) 1038 goto unlock; 1039 } else { 1040 /* Update freelists from vmspace. */ 1041 uvm_map_vmspace_update(map, &dead, flags); 1042 1043 error = uvm_map_findspace(map, &first, &last, addr, sz, 1044 pmap_align, pmap_offset, prot, hint); 1045 1046 if (error != 0) 1047 goto unlock; 1048 } 1049 1050 /* Double-check if selected address doesn't cause overflow. */ 1051 if (*addr + sz < *addr) { 1052 error = ENOMEM; 1053 goto unlock; 1054 } 1055 1056 /* If we only want a query, return now. */ 1057 if (flags & UVM_FLAG_QUERY) { 1058 error = 0; 1059 goto unlock; 1060 } 1061 1062 /* 1063 * Create new entry. 1064 * first and last may be invalidated after this call. 1065 */ 1066 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1067 new); 1068 if (entry == NULL) { 1069 error = ENOMEM; 1070 goto unlock; 1071 } 1072 new = NULL; 1073 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1074 entry->object.uvm_obj = NULL; 1075 entry->offset = 0; 1076 entry->protection = prot; 1077 entry->max_protection = maxprot; 1078 entry->inheritance = inherit; 1079 entry->wired_count = 0; 1080 entry->advice = advice; 1081 if (prot & PROT_WRITE) 1082 map->wserial++; 1083 if (flags & UVM_FLAG_SYSCALL) { 1084 entry->etype |= UVM_ET_SYSCALL; 1085 map->wserial++; 1086 } 1087 if (flags & UVM_FLAG_STACK) { 1088 entry->etype |= UVM_ET_STACK; 1089 if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP)) 1090 map->sserial++; 1091 } 1092 if (flags & UVM_FLAG_COPYONW) { 1093 entry->etype |= UVM_ET_COPYONWRITE; 1094 if ((flags & UVM_FLAG_OVERLAY) == 0) 1095 entry->etype |= UVM_ET_NEEDSCOPY; 1096 } 1097 if (flags & UVM_FLAG_CONCEAL) 1098 entry->etype |= UVM_ET_CONCEAL; 1099 if (flags & UVM_FLAG_OVERLAY) { 1100 KERNEL_LOCK(); 1101 entry->aref.ar_pageoff = 0; 1102 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1103 KERNEL_UNLOCK(); 1104 } 1105 1106 /* Update map and process statistics. */ 1107 map->size += sz; 1108 if (prot != PROT_NONE) { 1109 ((struct vmspace *)map)->vm_dused += 1110 uvmspace_dused(map, *addr, *addr + sz); 1111 } 1112 1113 unlock: 1114 vm_map_unlock(map); 1115 1116 /* 1117 * Remove dead entries. 1118 * 1119 * Dead entries may be the result of merging. 1120 * uvm_map_mkentry may also create dead entries, when it attempts to 1121 * destroy free-space entries. 1122 */ 1123 uvm_unmap_detach(&dead, 0); 1124 1125 if (new) 1126 uvm_mapent_free(new); 1127 return error; 1128 } 1129 1130 /* 1131 * uvm_map: establish a valid mapping in map 1132 * 1133 * => *addr and sz must be a multiple of PAGE_SIZE. 1134 * => map must be unlocked. 1135 * => <uobj,uoffset> value meanings (4 cases): 1136 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1137 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1138 * [3] <uobj,uoffset> == normal mapping 1139 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1140 * 1141 * case [4] is for kernel mappings where we don't know the offset until 1142 * we've found a virtual address. note that kernel object offsets are 1143 * always relative to vm_map_min(kernel_map). 1144 * 1145 * => align: align vaddr, must be a power-of-2. 1146 * Align is only a hint and will be ignored if the alignment fails. 1147 */ 1148 int 1149 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 1150 struct uvm_object *uobj, voff_t uoffset, 1151 vsize_t align, unsigned int flags) 1152 { 1153 struct vm_map_entry *first, *last, *entry, *new; 1154 struct uvm_map_deadq dead; 1155 vm_prot_t prot; 1156 vm_prot_t maxprot; 1157 vm_inherit_t inherit; 1158 int advice; 1159 int error; 1160 vaddr_t pmap_align, pmap_offset; 1161 vaddr_t hint; 1162 1163 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1164 splassert(IPL_NONE); 1165 else 1166 splassert(IPL_VM); 1167 1168 /* 1169 * We use pmap_align and pmap_offset as alignment and offset variables. 1170 * 1171 * Because the align parameter takes precedence over pmap prefer, 1172 * the pmap_align will need to be set to align, with pmap_offset = 0, 1173 * if pmap_prefer will not align. 1174 */ 1175 if (uoffset == UVM_UNKNOWN_OFFSET) { 1176 pmap_align = MAX(align, PAGE_SIZE); 1177 pmap_offset = 0; 1178 } else { 1179 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 1180 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 1181 1182 if (align == 0 || 1183 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 1184 /* pmap_offset satisfies align, no change. */ 1185 } else { 1186 /* Align takes precedence over pmap prefer. */ 1187 pmap_align = align; 1188 pmap_offset = 0; 1189 } 1190 } 1191 1192 /* Decode parameters. */ 1193 prot = UVM_PROTECTION(flags); 1194 maxprot = UVM_MAXPROTECTION(flags); 1195 advice = UVM_ADVICE(flags); 1196 inherit = UVM_INHERIT(flags); 1197 error = 0; 1198 hint = trunc_page(*addr); 1199 TAILQ_INIT(&dead); 1200 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1201 KASSERT((align & (align - 1)) == 0); 1202 1203 /* Holes are incompatible with other types of mappings. */ 1204 if (flags & UVM_FLAG_HOLE) { 1205 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 1206 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 1207 } 1208 1209 /* Unset hint for kernel_map non-fixed allocations. */ 1210 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1211 hint = 0; 1212 1213 /* Check protection. */ 1214 if ((prot & maxprot) != prot) 1215 return EACCES; 1216 1217 if (map == kernel_map && 1218 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1219 panic("uvm_map: kernel map W^X violation requested"); 1220 1221 /* 1222 * Before grabbing the lock, allocate a map entry for later 1223 * use to ensure we don't wait for memory while holding the 1224 * vm_map_lock. 1225 */ 1226 new = uvm_mapent_alloc(map, flags); 1227 if (new == NULL) 1228 return(ENOMEM); 1229 1230 if (flags & UVM_FLAG_TRYLOCK) { 1231 if (vm_map_lock_try(map) == FALSE) { 1232 error = EFAULT; 1233 goto out; 1234 } 1235 } else { 1236 vm_map_lock(map); 1237 } 1238 1239 first = last = NULL; 1240 if (flags & UVM_FLAG_FIXED) { 1241 /* 1242 * Fixed location. 1243 * 1244 * Note: we ignore align, pmap_prefer. 1245 * Fill in first, last and *addr. 1246 */ 1247 KASSERT((*addr & PAGE_MASK) == 0); 1248 1249 /* 1250 * Grow pmap to include allocated address. 1251 * If the growth fails, the allocation will fail too. 1252 */ 1253 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1254 uvm_maxkaddr < (*addr + sz)) { 1255 uvm_map_kmem_grow(map, &dead, 1256 *addr + sz - uvm_maxkaddr, flags); 1257 } 1258 1259 /* Check that the space is available. */ 1260 if (flags & UVM_FLAG_UNMAP) 1261 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1262 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1263 error = ENOMEM; 1264 goto unlock; 1265 } 1266 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1267 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1268 (align == 0 || (*addr & (align - 1)) == 0) && 1269 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1270 /* 1271 * Address used as hint. 1272 * 1273 * Note: we enforce the alignment restriction, 1274 * but ignore pmap_prefer. 1275 */ 1276 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1277 /* Run selection algorithm for executables. */ 1278 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1279 addr, sz, pmap_align, pmap_offset, prot, hint); 1280 1281 /* Grow kernel memory and try again. */ 1282 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1283 uvm_map_kmem_grow(map, &dead, sz, flags); 1284 1285 error = uvm_addr_invoke(map, map->uaddr_exe, 1286 &first, &last, addr, sz, 1287 pmap_align, pmap_offset, prot, hint); 1288 } 1289 1290 if (error != 0) 1291 goto unlock; 1292 } else { 1293 /* Update freelists from vmspace. */ 1294 if (map->flags & VM_MAP_ISVMSPACE) 1295 uvm_map_vmspace_update(map, &dead, flags); 1296 1297 error = uvm_map_findspace(map, &first, &last, addr, sz, 1298 pmap_align, pmap_offset, prot, hint); 1299 1300 /* Grow kernel memory and try again. */ 1301 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1302 uvm_map_kmem_grow(map, &dead, sz, flags); 1303 1304 error = uvm_map_findspace(map, &first, &last, addr, sz, 1305 pmap_align, pmap_offset, prot, hint); 1306 } 1307 1308 if (error != 0) 1309 goto unlock; 1310 } 1311 1312 /* Double-check if selected address doesn't cause overflow. */ 1313 if (*addr + sz < *addr) { 1314 error = ENOMEM; 1315 goto unlock; 1316 } 1317 1318 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1319 uvm_maxkaddr >= *addr + sz); 1320 1321 /* If we only want a query, return now. */ 1322 if (flags & UVM_FLAG_QUERY) { 1323 error = 0; 1324 goto unlock; 1325 } 1326 1327 if (uobj == NULL) 1328 uoffset = 0; 1329 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1330 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1331 uoffset = *addr - vm_map_min(kernel_map); 1332 } 1333 1334 /* 1335 * Create new entry. 1336 * first and last may be invalidated after this call. 1337 */ 1338 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1339 new); 1340 if (entry == NULL) { 1341 error = ENOMEM; 1342 goto unlock; 1343 } 1344 new = NULL; 1345 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1346 entry->object.uvm_obj = uobj; 1347 entry->offset = uoffset; 1348 entry->protection = prot; 1349 entry->max_protection = maxprot; 1350 entry->inheritance = inherit; 1351 entry->wired_count = 0; 1352 entry->advice = advice; 1353 if (prot & PROT_WRITE) 1354 map->wserial++; 1355 if (flags & UVM_FLAG_SYSCALL) { 1356 entry->etype |= UVM_ET_SYSCALL; 1357 map->wserial++; 1358 } 1359 if (flags & UVM_FLAG_STACK) { 1360 entry->etype |= UVM_ET_STACK; 1361 if (flags & UVM_FLAG_UNMAP) 1362 map->sserial++; 1363 } 1364 if (uobj) 1365 entry->etype |= UVM_ET_OBJ; 1366 else if (flags & UVM_FLAG_HOLE) 1367 entry->etype |= UVM_ET_HOLE; 1368 if (flags & UVM_FLAG_NOFAULT) 1369 entry->etype |= UVM_ET_NOFAULT; 1370 if (flags & UVM_FLAG_WC) 1371 entry->etype |= UVM_ET_WC; 1372 if (flags & UVM_FLAG_COPYONW) { 1373 entry->etype |= UVM_ET_COPYONWRITE; 1374 if ((flags & UVM_FLAG_OVERLAY) == 0) 1375 entry->etype |= UVM_ET_NEEDSCOPY; 1376 } 1377 if (flags & UVM_FLAG_CONCEAL) 1378 entry->etype |= UVM_ET_CONCEAL; 1379 if (flags & UVM_FLAG_OVERLAY) { 1380 entry->aref.ar_pageoff = 0; 1381 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1382 } 1383 1384 /* Update map and process statistics. */ 1385 if (!(flags & UVM_FLAG_HOLE)) { 1386 map->size += sz; 1387 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL && 1388 prot != PROT_NONE) { 1389 ((struct vmspace *)map)->vm_dused += 1390 uvmspace_dused(map, *addr, *addr + sz); 1391 } 1392 } 1393 1394 /* 1395 * Try to merge entry. 1396 * 1397 * Userland allocations are kept separated most of the time. 1398 * Forego the effort of merging what most of the time can't be merged 1399 * and only try the merge if it concerns a kernel entry. 1400 */ 1401 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1402 (map->flags & VM_MAP_ISVMSPACE) == 0) 1403 uvm_mapent_tryjoin(map, entry, &dead); 1404 1405 unlock: 1406 vm_map_unlock(map); 1407 1408 /* 1409 * Remove dead entries. 1410 * 1411 * Dead entries may be the result of merging. 1412 * uvm_map_mkentry may also create dead entries, when it attempts to 1413 * destroy free-space entries. 1414 */ 1415 if (map->flags & VM_MAP_INTRSAFE) 1416 uvm_unmap_detach_intrsafe(&dead); 1417 else 1418 uvm_unmap_detach(&dead, 0); 1419 out: 1420 if (new) 1421 uvm_mapent_free(new); 1422 return error; 1423 } 1424 1425 /* 1426 * True iff e1 and e2 can be joined together. 1427 */ 1428 int 1429 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1430 struct vm_map_entry *e2) 1431 { 1432 KDASSERT(e1 != NULL && e2 != NULL); 1433 1434 /* Must be the same entry type and not have free memory between. */ 1435 if (e1->etype != e2->etype || e1->end != e2->start) 1436 return 0; 1437 1438 /* Submaps are never joined. */ 1439 if (UVM_ET_ISSUBMAP(e1)) 1440 return 0; 1441 1442 /* Never merge wired memory. */ 1443 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1444 return 0; 1445 1446 /* Protection, inheritance and advice must be equal. */ 1447 if (e1->protection != e2->protection || 1448 e1->max_protection != e2->max_protection || 1449 e1->inheritance != e2->inheritance || 1450 e1->advice != e2->advice) 1451 return 0; 1452 1453 /* If uvm_object: object itself and offsets within object must match. */ 1454 if (UVM_ET_ISOBJ(e1)) { 1455 if (e1->object.uvm_obj != e2->object.uvm_obj) 1456 return 0; 1457 if (e1->offset + (e1->end - e1->start) != e2->offset) 1458 return 0; 1459 } 1460 1461 /* 1462 * Cannot join shared amaps. 1463 * Note: no need to lock amap to look at refs, since we don't care 1464 * about its exact value. 1465 * If it is 1 (i.e. we have the only reference) it will stay there. 1466 */ 1467 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1468 return 0; 1469 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1470 return 0; 1471 1472 /* Apprently, e1 and e2 match. */ 1473 return 1; 1474 } 1475 1476 /* 1477 * Join support function. 1478 * 1479 * Returns the merged entry on succes. 1480 * Returns NULL if the merge failed. 1481 */ 1482 struct vm_map_entry* 1483 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1484 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1485 { 1486 struct uvm_addr_state *free; 1487 1488 /* 1489 * Merging is not supported for map entries that 1490 * contain an amap in e1. This should never happen 1491 * anyway, because only kernel entries are merged. 1492 * These do not contain amaps. 1493 * e2 contains no real information in its amap, 1494 * so it can be erased immediately. 1495 */ 1496 KASSERT(e1->aref.ar_amap == NULL); 1497 1498 /* 1499 * Don't drop obj reference: 1500 * uvm_unmap_detach will do this for us. 1501 */ 1502 free = uvm_map_uaddr_e(map, e1); 1503 uvm_mapent_free_remove(map, free, e1); 1504 1505 free = uvm_map_uaddr_e(map, e2); 1506 uvm_mapent_free_remove(map, free, e2); 1507 uvm_mapent_addr_remove(map, e2); 1508 e1->end = e2->end; 1509 e1->guard = e2->guard; 1510 e1->fspace = e2->fspace; 1511 uvm_mapent_free_insert(map, free, e1); 1512 1513 DEAD_ENTRY_PUSH(dead, e2); 1514 return e1; 1515 } 1516 1517 /* 1518 * Attempt forward and backward joining of entry. 1519 * 1520 * Returns entry after joins. 1521 * We are guaranteed that the amap of entry is either non-existent or 1522 * has never been used. 1523 */ 1524 struct vm_map_entry* 1525 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1526 struct uvm_map_deadq *dead) 1527 { 1528 struct vm_map_entry *other; 1529 struct vm_map_entry *merged; 1530 1531 /* Merge with previous entry. */ 1532 other = RBT_PREV(uvm_map_addr, entry); 1533 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1534 merged = uvm_mapent_merge(map, other, entry, dead); 1535 if (merged) 1536 entry = merged; 1537 } 1538 1539 /* 1540 * Merge with next entry. 1541 * 1542 * Because amap can only extend forward and the next entry 1543 * probably contains sensible info, only perform forward merging 1544 * in the absence of an amap. 1545 */ 1546 other = RBT_NEXT(uvm_map_addr, entry); 1547 if (other && entry->aref.ar_amap == NULL && 1548 other->aref.ar_amap == NULL && 1549 uvm_mapent_isjoinable(map, entry, other)) { 1550 merged = uvm_mapent_merge(map, entry, other, dead); 1551 if (merged) 1552 entry = merged; 1553 } 1554 1555 return entry; 1556 } 1557 1558 /* 1559 * Kill entries that are no longer in a map. 1560 */ 1561 void 1562 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1563 { 1564 struct vm_map_entry *entry, *tmp; 1565 int waitok = flags & UVM_PLA_WAITOK; 1566 1567 TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) { 1568 /* Skip entries for which we have to grab the kernel lock. */ 1569 if (entry->aref.ar_amap || UVM_ET_ISSUBMAP(entry) || 1570 UVM_ET_ISOBJ(entry)) 1571 continue; 1572 1573 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1574 uvm_mapent_free(entry); 1575 } 1576 1577 if (TAILQ_EMPTY(deadq)) 1578 return; 1579 1580 KERNEL_LOCK(); 1581 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1582 if (waitok) 1583 uvm_pause(); 1584 /* Drop reference to amap, if we've got one. */ 1585 if (entry->aref.ar_amap) 1586 amap_unref(entry->aref.ar_amap, 1587 entry->aref.ar_pageoff, 1588 atop(entry->end - entry->start), 1589 flags & AMAP_REFALL); 1590 1591 /* Drop reference to our backing object, if we've got one. */ 1592 if (UVM_ET_ISSUBMAP(entry)) { 1593 /* ... unlikely to happen, but play it safe */ 1594 uvm_map_deallocate(entry->object.sub_map); 1595 } else if (UVM_ET_ISOBJ(entry) && 1596 entry->object.uvm_obj->pgops->pgo_detach) { 1597 entry->object.uvm_obj->pgops->pgo_detach( 1598 entry->object.uvm_obj); 1599 } 1600 1601 /* Step to next. */ 1602 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1603 uvm_mapent_free(entry); 1604 } 1605 KERNEL_UNLOCK(); 1606 } 1607 1608 void 1609 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq) 1610 { 1611 struct vm_map_entry *entry; 1612 1613 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1614 KASSERT(entry->aref.ar_amap == NULL); 1615 KASSERT(!UVM_ET_ISSUBMAP(entry)); 1616 KASSERT(!UVM_ET_ISOBJ(entry)); 1617 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1618 uvm_mapent_free(entry); 1619 } 1620 } 1621 1622 /* 1623 * Create and insert new entry. 1624 * 1625 * Returned entry contains new addresses and is inserted properly in the tree. 1626 * first and last are (probably) no longer valid. 1627 */ 1628 struct vm_map_entry* 1629 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1630 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1631 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1632 { 1633 struct vm_map_entry *entry, *prev; 1634 struct uvm_addr_state *free; 1635 vaddr_t min, max; /* free space boundaries for new entry */ 1636 1637 KDASSERT(map != NULL); 1638 KDASSERT(first != NULL); 1639 KDASSERT(last != NULL); 1640 KDASSERT(dead != NULL); 1641 KDASSERT(sz > 0); 1642 KDASSERT(addr + sz > addr); 1643 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1644 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1645 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1646 uvm_tree_sanity(map, __FILE__, __LINE__); 1647 1648 min = addr + sz; 1649 max = VMMAP_FREE_END(last); 1650 1651 /* Initialize new entry. */ 1652 if (new == NULL) 1653 entry = uvm_mapent_alloc(map, flags); 1654 else 1655 entry = new; 1656 if (entry == NULL) 1657 return NULL; 1658 entry->offset = 0; 1659 entry->etype = 0; 1660 entry->wired_count = 0; 1661 entry->aref.ar_pageoff = 0; 1662 entry->aref.ar_amap = NULL; 1663 1664 entry->start = addr; 1665 entry->end = min; 1666 entry->guard = 0; 1667 entry->fspace = 0; 1668 1669 /* Reset free space in first. */ 1670 free = uvm_map_uaddr_e(map, first); 1671 uvm_mapent_free_remove(map, free, first); 1672 first->guard = 0; 1673 first->fspace = 0; 1674 1675 /* 1676 * Remove all entries that are fully replaced. 1677 * We are iterating using last in reverse order. 1678 */ 1679 for (; first != last; last = prev) { 1680 prev = RBT_PREV(uvm_map_addr, last); 1681 1682 KDASSERT(last->start == last->end); 1683 free = uvm_map_uaddr_e(map, last); 1684 uvm_mapent_free_remove(map, free, last); 1685 uvm_mapent_addr_remove(map, last); 1686 DEAD_ENTRY_PUSH(dead, last); 1687 } 1688 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1689 if (first->start == addr) { 1690 uvm_mapent_addr_remove(map, first); 1691 DEAD_ENTRY_PUSH(dead, first); 1692 } else { 1693 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1694 addr, flags); 1695 } 1696 1697 /* Finally, link in entry. */ 1698 uvm_mapent_addr_insert(map, entry); 1699 uvm_map_fix_space(map, entry, min, max, flags); 1700 1701 uvm_tree_sanity(map, __FILE__, __LINE__); 1702 return entry; 1703 } 1704 1705 1706 /* 1707 * uvm_mapent_alloc: allocate a map entry 1708 */ 1709 struct vm_map_entry * 1710 uvm_mapent_alloc(struct vm_map *map, int flags) 1711 { 1712 struct vm_map_entry *me, *ne; 1713 int pool_flags; 1714 int i; 1715 1716 pool_flags = PR_WAITOK; 1717 if (flags & UVM_FLAG_TRYLOCK) 1718 pool_flags = PR_NOWAIT; 1719 1720 if (map->flags & VM_MAP_INTRSAFE || cold) { 1721 mtx_enter(&uvm_kmapent_mtx); 1722 if (SLIST_EMPTY(&uvm.kentry_free)) { 1723 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1724 &kd_nowait); 1725 if (ne == NULL) 1726 panic("uvm_mapent_alloc: cannot allocate map " 1727 "entry"); 1728 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) { 1729 SLIST_INSERT_HEAD(&uvm.kentry_free, 1730 &ne[i], daddrs.addr_kentry); 1731 } 1732 if (ratecheck(&uvm_kmapent_last_warn_time, 1733 &uvm_kmapent_warn_rate)) 1734 printf("uvm_mapent_alloc: out of static " 1735 "map entries\n"); 1736 } 1737 me = SLIST_FIRST(&uvm.kentry_free); 1738 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry); 1739 uvmexp.kmapent++; 1740 mtx_leave(&uvm_kmapent_mtx); 1741 me->flags = UVM_MAP_STATIC; 1742 } else if (map == kernel_map) { 1743 splassert(IPL_NONE); 1744 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1745 if (me == NULL) 1746 goto out; 1747 me->flags = UVM_MAP_KMEM; 1748 } else { 1749 splassert(IPL_NONE); 1750 me = pool_get(&uvm_map_entry_pool, pool_flags); 1751 if (me == NULL) 1752 goto out; 1753 me->flags = 0; 1754 } 1755 1756 if (me != NULL) { 1757 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF); 1758 } 1759 1760 out: 1761 return(me); 1762 } 1763 1764 /* 1765 * uvm_mapent_free: free map entry 1766 * 1767 * => XXX: static pool for kernel map? 1768 */ 1769 void 1770 uvm_mapent_free(struct vm_map_entry *me) 1771 { 1772 if (me->flags & UVM_MAP_STATIC) { 1773 mtx_enter(&uvm_kmapent_mtx); 1774 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry); 1775 uvmexp.kmapent--; 1776 mtx_leave(&uvm_kmapent_mtx); 1777 } else if (me->flags & UVM_MAP_KMEM) { 1778 splassert(IPL_NONE); 1779 pool_put(&uvm_map_entry_kmem_pool, me); 1780 } else { 1781 splassert(IPL_NONE); 1782 pool_put(&uvm_map_entry_pool, me); 1783 } 1784 } 1785 1786 /* 1787 * uvm_map_lookup_entry: find map entry at or before an address. 1788 * 1789 * => map must at least be read-locked by caller 1790 * => entry is returned in "entry" 1791 * => return value is true if address is in the returned entry 1792 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1793 * returned for those mappings. 1794 */ 1795 boolean_t 1796 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1797 struct vm_map_entry **entry) 1798 { 1799 *entry = uvm_map_entrybyaddr(&map->addr, address); 1800 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1801 (*entry)->start <= address && (*entry)->end > address; 1802 } 1803 1804 /* 1805 * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet 1806 * grown -- then uvm_map_check_region_range() should not cache the entry 1807 * because growth won't be seen. 1808 */ 1809 int 1810 uvm_map_inentry_sp(vm_map_entry_t entry) 1811 { 1812 if ((entry->etype & UVM_ET_STACK) == 0) { 1813 if (entry->protection == PROT_NONE) 1814 return (-1); /* don't update range */ 1815 return (0); 1816 } 1817 return (1); 1818 } 1819 1820 /* 1821 * The system call must not come from a writeable entry, W^X is violated. 1822 * (Would be nice if we can spot aliasing, which is also kind of bad) 1823 * 1824 * The system call must come from an syscall-labeled entry (which are 1825 * the text regions of the main program, sigtramp, ld.so, or libc). 1826 */ 1827 int 1828 uvm_map_inentry_pc(vm_map_entry_t entry) 1829 { 1830 if (entry->protection & PROT_WRITE) 1831 return (0); /* not permitted */ 1832 if ((entry->etype & UVM_ET_SYSCALL) == 0) 1833 return (0); /* not permitted */ 1834 return (1); 1835 } 1836 1837 int 1838 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie) 1839 { 1840 return (serial != ie->ie_serial || ie->ie_start == 0 || 1841 addr < ie->ie_start || addr >= ie->ie_end); 1842 } 1843 1844 /* 1845 * Inside a vm_map find the reg address and verify it via function. 1846 * Remember low and high addresses of region if valid and return TRUE, 1847 * else return FALSE. 1848 */ 1849 boolean_t 1850 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1851 int (*fn)(vm_map_entry_t), u_long serial) 1852 { 1853 vm_map_t map = &p->p_vmspace->vm_map; 1854 vm_map_entry_t entry; 1855 int ret; 1856 1857 if (addr < map->min_offset || addr >= map->max_offset) 1858 return (FALSE); 1859 1860 /* lock map */ 1861 vm_map_lock_read(map); 1862 1863 /* lookup */ 1864 if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) { 1865 vm_map_unlock_read(map); 1866 return (FALSE); 1867 } 1868 1869 ret = (*fn)(entry); 1870 if (ret == 0) { 1871 vm_map_unlock_read(map); 1872 return (FALSE); 1873 } else if (ret == 1) { 1874 ie->ie_start = entry->start; 1875 ie->ie_end = entry->end; 1876 ie->ie_serial = serial; 1877 } else { 1878 /* do not update, re-check later */ 1879 } 1880 vm_map_unlock_read(map); 1881 return (TRUE); 1882 } 1883 1884 boolean_t 1885 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1886 const char *fmt, int (*fn)(vm_map_entry_t), u_long serial) 1887 { 1888 union sigval sv; 1889 boolean_t ok = TRUE; 1890 1891 if (uvm_map_inentry_recheck(serial, addr, ie)) { 1892 KERNEL_LOCK(); 1893 ok = uvm_map_inentry_fix(p, ie, addr, fn, serial); 1894 if (!ok) { 1895 printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid, 1896 addr, ie->ie_start, ie->ie_end); 1897 p->p_p->ps_acflag |= AMAP; 1898 sv.sival_ptr = (void *)PROC_PC(p); 1899 trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv); 1900 } 1901 KERNEL_UNLOCK(); 1902 } 1903 return (ok); 1904 } 1905 1906 /* 1907 * Check whether the given address range can be converted to a MAP_STACK 1908 * mapping. 1909 * 1910 * Must be called with map locked. 1911 */ 1912 boolean_t 1913 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz) 1914 { 1915 vaddr_t end = addr + sz; 1916 struct vm_map_entry *first, *iter, *prev = NULL; 1917 1918 if (!uvm_map_lookup_entry(map, addr, &first)) { 1919 printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n", 1920 addr, end, map); 1921 return FALSE; 1922 } 1923 1924 /* 1925 * Check that the address range exists and is contiguous. 1926 */ 1927 for (iter = first; iter != NULL && iter->start < end; 1928 prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) { 1929 /* 1930 * Make sure that we do not have holes in the range. 1931 */ 1932 #if 0 1933 if (prev != NULL) { 1934 printf("prev->start 0x%lx, prev->end 0x%lx, " 1935 "iter->start 0x%lx, iter->end 0x%lx\n", 1936 prev->start, prev->end, iter->start, iter->end); 1937 } 1938 #endif 1939 1940 if (prev != NULL && prev->end != iter->start) { 1941 printf("map stack 0x%lx-0x%lx of map %p failed: " 1942 "hole in range\n", addr, end, map); 1943 return FALSE; 1944 } 1945 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) { 1946 printf("map stack 0x%lx-0x%lx of map %p failed: " 1947 "hole in range\n", addr, end, map); 1948 return FALSE; 1949 } 1950 } 1951 1952 return TRUE; 1953 } 1954 1955 /* 1956 * Remap the middle-pages of an existing mapping as a stack range. 1957 * If there exists a previous contiguous mapping with the given range 1958 * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the 1959 * mapping is dropped, and a new anon mapping is created and marked as 1960 * a stack. 1961 * 1962 * Must be called with map unlocked. 1963 */ 1964 int 1965 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz) 1966 { 1967 vm_map_t map = &p->p_vmspace->vm_map; 1968 vaddr_t start, end; 1969 int error; 1970 int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, 1971 PROT_READ | PROT_WRITE | PROT_EXEC, 1972 MAP_INHERIT_COPY, MADV_NORMAL, 1973 UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP | 1974 UVM_FLAG_COPYONW); 1975 1976 start = round_page(addr); 1977 end = trunc_page(addr + sz); 1978 #ifdef MACHINE_STACK_GROWS_UP 1979 if (end == addr + sz) 1980 end -= PAGE_SIZE; 1981 #else 1982 if (start == addr) 1983 start += PAGE_SIZE; 1984 #endif 1985 1986 if (start < map->min_offset || end >= map->max_offset || end < start) 1987 return EINVAL; 1988 1989 error = uvm_mapanon(map, &start, end - start, 0, flags); 1990 if (error != 0) 1991 printf("map stack for pid %d failed\n", p->p_p->ps_pid); 1992 1993 return error; 1994 } 1995 1996 /* 1997 * uvm_map_pie: return a random load address for a PIE executable 1998 * properly aligned. 1999 */ 2000 #ifndef VM_PIE_MAX_ADDR 2001 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 2002 #endif 2003 2004 #ifndef VM_PIE_MIN_ADDR 2005 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 2006 #endif 2007 2008 #ifndef VM_PIE_MIN_ALIGN 2009 #define VM_PIE_MIN_ALIGN PAGE_SIZE 2010 #endif 2011 2012 vaddr_t 2013 uvm_map_pie(vaddr_t align) 2014 { 2015 vaddr_t addr, space, min; 2016 2017 align = MAX(align, VM_PIE_MIN_ALIGN); 2018 2019 /* round up to next alignment */ 2020 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 2021 2022 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 2023 return (align); 2024 2025 space = (VM_PIE_MAX_ADDR - min) / align; 2026 space = MIN(space, (u_int32_t)-1); 2027 2028 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 2029 addr += min; 2030 2031 return (addr); 2032 } 2033 2034 void 2035 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 2036 { 2037 struct uvm_map_deadq dead; 2038 2039 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 2040 (end & (vaddr_t)PAGE_MASK) == 0); 2041 TAILQ_INIT(&dead); 2042 vm_map_lock(map); 2043 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); 2044 vm_map_unlock(map); 2045 2046 if (map->flags & VM_MAP_INTRSAFE) 2047 uvm_unmap_detach_intrsafe(&dead); 2048 else 2049 uvm_unmap_detach(&dead, 0); 2050 } 2051 2052 /* 2053 * Mark entry as free. 2054 * 2055 * entry will be put on the dead list. 2056 * The free space will be merged into the previous or a new entry, 2057 * unless markfree is false. 2058 */ 2059 void 2060 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 2061 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 2062 boolean_t markfree) 2063 { 2064 struct uvm_addr_state *free; 2065 struct vm_map_entry *prev; 2066 vaddr_t addr; /* Start of freed range. */ 2067 vaddr_t end; /* End of freed range. */ 2068 2069 prev = *prev_ptr; 2070 if (prev == entry) 2071 *prev_ptr = prev = NULL; 2072 2073 if (prev == NULL || 2074 VMMAP_FREE_END(prev) != entry->start) 2075 prev = RBT_PREV(uvm_map_addr, entry); 2076 2077 /* Entry is describing only free memory and has nothing to drain into. */ 2078 if (prev == NULL && entry->start == entry->end && markfree) { 2079 *prev_ptr = entry; 2080 return; 2081 } 2082 2083 addr = entry->start; 2084 end = VMMAP_FREE_END(entry); 2085 free = uvm_map_uaddr_e(map, entry); 2086 uvm_mapent_free_remove(map, free, entry); 2087 uvm_mapent_addr_remove(map, entry); 2088 DEAD_ENTRY_PUSH(dead, entry); 2089 2090 if (markfree) { 2091 if (prev) { 2092 free = uvm_map_uaddr_e(map, prev); 2093 uvm_mapent_free_remove(map, free, prev); 2094 } 2095 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 2096 } 2097 } 2098 2099 /* 2100 * Unwire and release referenced amap and object from map entry. 2101 */ 2102 void 2103 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 2104 { 2105 /* Unwire removed map entry. */ 2106 if (VM_MAPENT_ISWIRED(entry)) { 2107 KERNEL_LOCK(); 2108 entry->wired_count = 0; 2109 uvm_fault_unwire_locked(map, entry->start, entry->end); 2110 KERNEL_UNLOCK(); 2111 } 2112 2113 /* Entry-type specific code. */ 2114 if (UVM_ET_ISHOLE(entry)) { 2115 /* Nothing to be done for holes. */ 2116 } else if (map->flags & VM_MAP_INTRSAFE) { 2117 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2118 uvm_km_pgremove_intrsafe(entry->start, entry->end); 2119 pmap_kremove(entry->start, entry->end - entry->start); 2120 } else if (UVM_ET_ISOBJ(entry) && 2121 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2122 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2123 /* 2124 * Note: kernel object mappings are currently used in 2125 * two ways: 2126 * [1] "normal" mappings of pages in the kernel object 2127 * [2] uvm_km_valloc'd allocations in which we 2128 * pmap_enter in some non-kernel-object page 2129 * (e.g. vmapbuf). 2130 * 2131 * for case [1], we need to remove the mapping from 2132 * the pmap and then remove the page from the kernel 2133 * object (because, once pages in a kernel object are 2134 * unmapped they are no longer needed, unlike, say, 2135 * a vnode where you might want the data to persist 2136 * until flushed out of a queue). 2137 * 2138 * for case [2], we need to remove the mapping from 2139 * the pmap. there shouldn't be any pages at the 2140 * specified offset in the kernel object [but it 2141 * doesn't hurt to call uvm_km_pgremove just to be 2142 * safe?] 2143 * 2144 * uvm_km_pgremove currently does the following: 2145 * for pages in the kernel object range: 2146 * - drops the swap slot 2147 * - uvm_pagefree the page 2148 * 2149 * note there is version of uvm_km_pgremove() that 2150 * is used for "intrsafe" objects. 2151 */ 2152 /* 2153 * remove mappings from pmap and drop the pages 2154 * from the object. offsets are always relative 2155 * to vm_map_min(kernel_map). 2156 */ 2157 pmap_remove(pmap_kernel(), entry->start, entry->end); 2158 uvm_km_pgremove(entry->object.uvm_obj, 2159 entry->start - vm_map_min(kernel_map), 2160 entry->end - vm_map_min(kernel_map)); 2161 2162 /* 2163 * null out kernel_object reference, we've just 2164 * dropped it 2165 */ 2166 entry->etype &= ~UVM_ET_OBJ; 2167 entry->object.uvm_obj = NULL; /* to be safe */ 2168 } else { 2169 /* remove mappings the standard way. */ 2170 pmap_remove(map->pmap, entry->start, entry->end); 2171 } 2172 } 2173 2174 /* 2175 * Remove all entries from start to end. 2176 * 2177 * If remove_holes, then remove ET_HOLE entries as well. 2178 * If markfree, entry will be properly marked free, otherwise, no replacement 2179 * entry will be put in the tree (corrupting the tree). 2180 */ 2181 void 2182 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2183 struct uvm_map_deadq *dead, boolean_t remove_holes, 2184 boolean_t markfree) 2185 { 2186 struct vm_map_entry *prev_hint, *next, *entry; 2187 2188 start = MAX(start, map->min_offset); 2189 end = MIN(end, map->max_offset); 2190 if (start >= end) 2191 return; 2192 2193 if ((map->flags & VM_MAP_INTRSAFE) == 0) 2194 splassert(IPL_NONE); 2195 else 2196 splassert(IPL_VM); 2197 2198 /* Find first affected entry. */ 2199 entry = uvm_map_entrybyaddr(&map->addr, start); 2200 KDASSERT(entry != NULL && entry->start <= start); 2201 if (entry->end <= start && markfree) 2202 entry = RBT_NEXT(uvm_map_addr, entry); 2203 else 2204 UVM_MAP_CLIP_START(map, entry, start); 2205 2206 /* 2207 * Iterate entries until we reach end address. 2208 * prev_hint hints where the freed space can be appended to. 2209 */ 2210 prev_hint = NULL; 2211 for (; entry != NULL && entry->start < end; entry = next) { 2212 KDASSERT(entry->start >= start); 2213 if (entry->end > end || !markfree) 2214 UVM_MAP_CLIP_END(map, entry, end); 2215 KDASSERT(entry->start >= start && entry->end <= end); 2216 next = RBT_NEXT(uvm_map_addr, entry); 2217 2218 /* Don't remove holes unless asked to do so. */ 2219 if (UVM_ET_ISHOLE(entry)) { 2220 if (!remove_holes) { 2221 prev_hint = entry; 2222 continue; 2223 } 2224 } 2225 2226 /* A stack has been removed.. */ 2227 if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE)) 2228 map->sserial++; 2229 2230 /* Kill entry. */ 2231 uvm_unmap_kill_entry(map, entry); 2232 2233 /* Update space usage. */ 2234 if ((map->flags & VM_MAP_ISVMSPACE) && 2235 entry->object.uvm_obj == NULL && 2236 entry->protection != PROT_NONE && 2237 !UVM_ET_ISHOLE(entry)) { 2238 ((struct vmspace *)map)->vm_dused -= 2239 uvmspace_dused(map, entry->start, entry->end); 2240 } 2241 if (!UVM_ET_ISHOLE(entry)) 2242 map->size -= entry->end - entry->start; 2243 2244 /* Actual removal of entry. */ 2245 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 2246 } 2247 2248 pmap_update(vm_map_pmap(map)); 2249 2250 #ifdef VMMAP_DEBUG 2251 if (markfree) { 2252 for (entry = uvm_map_entrybyaddr(&map->addr, start); 2253 entry != NULL && entry->start < end; 2254 entry = RBT_NEXT(uvm_map_addr, entry)) { 2255 KDASSERT(entry->end <= start || 2256 entry->start == entry->end || 2257 UVM_ET_ISHOLE(entry)); 2258 } 2259 } else { 2260 vaddr_t a; 2261 for (a = start; a < end; a += PAGE_SIZE) 2262 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 2263 } 2264 #endif 2265 } 2266 2267 /* 2268 * Mark all entries from first until end (exclusive) as pageable. 2269 * 2270 * Lock must be exclusive on entry and will not be touched. 2271 */ 2272 void 2273 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 2274 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 2275 { 2276 struct vm_map_entry *iter; 2277 2278 for (iter = first; iter != end; 2279 iter = RBT_NEXT(uvm_map_addr, iter)) { 2280 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2281 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2282 continue; 2283 2284 iter->wired_count = 0; 2285 uvm_fault_unwire_locked(map, iter->start, iter->end); 2286 } 2287 } 2288 2289 /* 2290 * Mark all entries from first until end (exclusive) as wired. 2291 * 2292 * Lockflags determines the lock state on return from this function. 2293 * Lock must be exclusive on entry. 2294 */ 2295 int 2296 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 2297 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 2298 int lockflags) 2299 { 2300 struct vm_map_entry *iter; 2301 #ifdef DIAGNOSTIC 2302 unsigned int timestamp_save; 2303 #endif 2304 int error; 2305 2306 /* 2307 * Wire pages in two passes: 2308 * 2309 * 1: holding the write lock, we create any anonymous maps that need 2310 * to be created. then we clip each map entry to the region to 2311 * be wired and increment its wiring count. 2312 * 2313 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2314 * in the pages for any newly wired area (wired_count == 1). 2315 * 2316 * downgrading to a read lock for uvm_fault_wire avoids a possible 2317 * deadlock with another thread that may have faulted on one of 2318 * the pages to be wired (it would mark the page busy, blocking 2319 * us, then in turn block on the map lock that we hold). 2320 * because we keep the read lock on the map, the copy-on-write 2321 * status of the entries we modify here cannot change. 2322 */ 2323 for (iter = first; iter != end; 2324 iter = RBT_NEXT(uvm_map_addr, iter)) { 2325 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2326 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2327 iter->protection == PROT_NONE) 2328 continue; 2329 2330 /* 2331 * Perform actions of vm_map_lookup that need the write lock. 2332 * - create an anonymous map for copy-on-write 2333 * - anonymous map for zero-fill 2334 * Skip submaps. 2335 */ 2336 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 2337 UVM_ET_ISNEEDSCOPY(iter) && 2338 ((iter->protection & PROT_WRITE) || 2339 iter->object.uvm_obj == NULL)) { 2340 amap_copy(map, iter, M_WAITOK, 2341 UVM_ET_ISSTACK(iter) ? FALSE : TRUE, 2342 iter->start, iter->end); 2343 } 2344 iter->wired_count++; 2345 } 2346 2347 /* 2348 * Pass 2. 2349 */ 2350 #ifdef DIAGNOSTIC 2351 timestamp_save = map->timestamp; 2352 #endif 2353 vm_map_busy(map); 2354 vm_map_downgrade(map); 2355 2356 error = 0; 2357 for (iter = first; error == 0 && iter != end; 2358 iter = RBT_NEXT(uvm_map_addr, iter)) { 2359 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2360 iter->protection == PROT_NONE) 2361 continue; 2362 2363 error = uvm_fault_wire(map, iter->start, iter->end, 2364 iter->protection); 2365 } 2366 2367 if (error) { 2368 /* 2369 * uvm_fault_wire failure 2370 * 2371 * Reacquire lock and undo our work. 2372 */ 2373 vm_map_upgrade(map); 2374 vm_map_unbusy(map); 2375 #ifdef DIAGNOSTIC 2376 if (timestamp_save != map->timestamp) 2377 panic("uvm_map_pageable_wire: stale map"); 2378 #endif 2379 2380 /* 2381 * first is no longer needed to restart loops. 2382 * Use it as iterator to unmap successful mappings. 2383 */ 2384 for (; first != iter; 2385 first = RBT_NEXT(uvm_map_addr, first)) { 2386 if (UVM_ET_ISHOLE(first) || 2387 first->start == first->end || 2388 first->protection == PROT_NONE) 2389 continue; 2390 2391 first->wired_count--; 2392 if (!VM_MAPENT_ISWIRED(first)) { 2393 uvm_fault_unwire_locked(map, 2394 iter->start, iter->end); 2395 } 2396 } 2397 2398 /* decrease counter in the rest of the entries */ 2399 for (; iter != end; 2400 iter = RBT_NEXT(uvm_map_addr, iter)) { 2401 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2402 iter->protection == PROT_NONE) 2403 continue; 2404 2405 iter->wired_count--; 2406 } 2407 2408 if ((lockflags & UVM_LK_EXIT) == 0) 2409 vm_map_unlock(map); 2410 return error; 2411 } 2412 2413 /* We are currently holding a read lock. */ 2414 if ((lockflags & UVM_LK_EXIT) == 0) { 2415 vm_map_unbusy(map); 2416 vm_map_unlock_read(map); 2417 } else { 2418 vm_map_upgrade(map); 2419 vm_map_unbusy(map); 2420 #ifdef DIAGNOSTIC 2421 if (timestamp_save != map->timestamp) 2422 panic("uvm_map_pageable_wire: stale map"); 2423 #endif 2424 } 2425 return 0; 2426 } 2427 2428 /* 2429 * uvm_map_pageable: set pageability of a range in a map. 2430 * 2431 * Flags: 2432 * UVM_LK_ENTER: map is already locked by caller 2433 * UVM_LK_EXIT: don't unlock map on exit 2434 * 2435 * The full range must be in use (entries may not have fspace != 0). 2436 * UVM_ET_HOLE counts as unmapped. 2437 */ 2438 int 2439 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2440 boolean_t new_pageable, int lockflags) 2441 { 2442 struct vm_map_entry *first, *last, *tmp; 2443 int error; 2444 2445 start = trunc_page(start); 2446 end = round_page(end); 2447 2448 if (start > end) 2449 return EINVAL; 2450 if (start == end) 2451 return 0; /* nothing to do */ 2452 if (start < map->min_offset) 2453 return EFAULT; /* why? see first XXX below */ 2454 if (end > map->max_offset) 2455 return EINVAL; /* why? see second XXX below */ 2456 2457 KASSERT(map->flags & VM_MAP_PAGEABLE); 2458 if ((lockflags & UVM_LK_ENTER) == 0) 2459 vm_map_lock(map); 2460 2461 /* 2462 * Find first entry. 2463 * 2464 * Initial test on start is different, because of the different 2465 * error returned. Rest is tested further down. 2466 */ 2467 first = uvm_map_entrybyaddr(&map->addr, start); 2468 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2469 /* 2470 * XXX if the first address is not mapped, it is EFAULT? 2471 */ 2472 error = EFAULT; 2473 goto out; 2474 } 2475 2476 /* Check that the range has no holes. */ 2477 for (last = first; last != NULL && last->start < end; 2478 last = RBT_NEXT(uvm_map_addr, last)) { 2479 if (UVM_ET_ISHOLE(last) || 2480 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2481 /* 2482 * XXX unmapped memory in range, why is it EINVAL 2483 * instead of EFAULT? 2484 */ 2485 error = EINVAL; 2486 goto out; 2487 } 2488 } 2489 2490 /* 2491 * Last ended at the first entry after the range. 2492 * Move back one step. 2493 * 2494 * Note that last may be NULL. 2495 */ 2496 if (last == NULL) { 2497 last = RBT_MAX(uvm_map_addr, &map->addr); 2498 if (last->end < end) { 2499 error = EINVAL; 2500 goto out; 2501 } 2502 } else { 2503 KASSERT(last != first); 2504 last = RBT_PREV(uvm_map_addr, last); 2505 } 2506 2507 /* Wire/unwire pages here. */ 2508 if (new_pageable) { 2509 /* 2510 * Mark pageable. 2511 * entries that are not wired are untouched. 2512 */ 2513 if (VM_MAPENT_ISWIRED(first)) 2514 UVM_MAP_CLIP_START(map, first, start); 2515 /* 2516 * Split last at end. 2517 * Make tmp be the first entry after what is to be touched. 2518 * If last is not wired, don't touch it. 2519 */ 2520 if (VM_MAPENT_ISWIRED(last)) { 2521 UVM_MAP_CLIP_END(map, last, end); 2522 tmp = RBT_NEXT(uvm_map_addr, last); 2523 } else 2524 tmp = last; 2525 2526 uvm_map_pageable_pgon(map, first, tmp, start, end); 2527 error = 0; 2528 2529 out: 2530 if ((lockflags & UVM_LK_EXIT) == 0) 2531 vm_map_unlock(map); 2532 return error; 2533 } else { 2534 /* 2535 * Mark entries wired. 2536 * entries are always touched (because recovery needs this). 2537 */ 2538 if (!VM_MAPENT_ISWIRED(first)) 2539 UVM_MAP_CLIP_START(map, first, start); 2540 /* 2541 * Split last at end. 2542 * Make tmp be the first entry after what is to be touched. 2543 * If last is not wired, don't touch it. 2544 */ 2545 if (!VM_MAPENT_ISWIRED(last)) { 2546 UVM_MAP_CLIP_END(map, last, end); 2547 tmp = RBT_NEXT(uvm_map_addr, last); 2548 } else 2549 tmp = last; 2550 2551 return uvm_map_pageable_wire(map, first, tmp, start, end, 2552 lockflags); 2553 } 2554 } 2555 2556 /* 2557 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2558 * all mapped regions. 2559 * 2560 * Map must not be locked. 2561 * If no flags are specified, all ragions are unwired. 2562 */ 2563 int 2564 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2565 { 2566 vsize_t size; 2567 struct vm_map_entry *iter; 2568 2569 KASSERT(map->flags & VM_MAP_PAGEABLE); 2570 vm_map_lock(map); 2571 2572 if (flags == 0) { 2573 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr), 2574 NULL, map->min_offset, map->max_offset); 2575 2576 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2577 vm_map_unlock(map); 2578 return 0; 2579 } 2580 2581 if (flags & MCL_FUTURE) 2582 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2583 if (!(flags & MCL_CURRENT)) { 2584 vm_map_unlock(map); 2585 return 0; 2586 } 2587 2588 /* 2589 * Count number of pages in all non-wired entries. 2590 * If the number exceeds the limit, abort. 2591 */ 2592 size = 0; 2593 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2594 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2595 continue; 2596 2597 size += iter->end - iter->start; 2598 } 2599 2600 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2601 vm_map_unlock(map); 2602 return ENOMEM; 2603 } 2604 2605 /* XXX non-pmap_wired_count case must be handled by caller */ 2606 #ifdef pmap_wired_count 2607 if (limit != 0 && 2608 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2609 vm_map_unlock(map); 2610 return ENOMEM; 2611 } 2612 #endif 2613 2614 /* 2615 * uvm_map_pageable_wire will release lcok 2616 */ 2617 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr), 2618 NULL, map->min_offset, map->max_offset, 0); 2619 } 2620 2621 /* 2622 * Initialize map. 2623 * 2624 * Allocates sufficient entries to describe the free memory in the map. 2625 */ 2626 void 2627 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max, 2628 int flags) 2629 { 2630 int i; 2631 2632 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2633 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2634 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2635 2636 /* 2637 * Update parameters. 2638 * 2639 * This code handles (vaddr_t)-1 and other page mask ending addresses 2640 * properly. 2641 * We lose the top page if the full virtual address space is used. 2642 */ 2643 if (max & (vaddr_t)PAGE_MASK) { 2644 max += 1; 2645 if (max == 0) /* overflow */ 2646 max -= PAGE_SIZE; 2647 } 2648 2649 RBT_INIT(uvm_map_addr, &map->addr); 2650 map->uaddr_exe = NULL; 2651 for (i = 0; i < nitems(map->uaddr_any); ++i) 2652 map->uaddr_any[i] = NULL; 2653 map->uaddr_brk_stack = NULL; 2654 2655 map->pmap = pmap; 2656 map->size = 0; 2657 map->ref_count = 0; 2658 map->min_offset = min; 2659 map->max_offset = max; 2660 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2661 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2662 map->flags = flags; 2663 map->timestamp = 0; 2664 if (flags & VM_MAP_ISVMSPACE) 2665 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK); 2666 else 2667 rw_init(&map->lock, "kmmaplk"); 2668 mtx_init(&map->mtx, IPL_VM); 2669 mtx_init(&map->flags_lock, IPL_VM); 2670 2671 /* Configure the allocators. */ 2672 if (flags & VM_MAP_ISVMSPACE) 2673 uvm_map_setup_md(map); 2674 else 2675 map->uaddr_any[3] = &uaddr_kbootstrap; 2676 2677 /* 2678 * Fill map entries. 2679 * We do not need to write-lock the map here because only the current 2680 * thread sees it right now. Initialize ref_count to 0 above to avoid 2681 * bogus triggering of lock-not-held assertions. 2682 */ 2683 uvm_map_setup_entries(map); 2684 uvm_tree_sanity(map, __FILE__, __LINE__); 2685 map->ref_count = 1; 2686 } 2687 2688 /* 2689 * Destroy the map. 2690 * 2691 * This is the inverse operation to uvm_map_setup. 2692 */ 2693 void 2694 uvm_map_teardown(struct vm_map *map) 2695 { 2696 struct uvm_map_deadq dead_entries; 2697 struct vm_map_entry *entry, *tmp; 2698 #ifdef VMMAP_DEBUG 2699 size_t numq, numt; 2700 #endif 2701 int i; 2702 2703 KERNEL_ASSERT_LOCKED(); 2704 KERNEL_UNLOCK(); 2705 KERNEL_ASSERT_UNLOCKED(); 2706 2707 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2708 2709 /* Remove address selectors. */ 2710 uvm_addr_destroy(map->uaddr_exe); 2711 map->uaddr_exe = NULL; 2712 for (i = 0; i < nitems(map->uaddr_any); i++) { 2713 uvm_addr_destroy(map->uaddr_any[i]); 2714 map->uaddr_any[i] = NULL; 2715 } 2716 uvm_addr_destroy(map->uaddr_brk_stack); 2717 map->uaddr_brk_stack = NULL; 2718 2719 /* 2720 * Remove entries. 2721 * 2722 * The following is based on graph breadth-first search. 2723 * 2724 * In color terms: 2725 * - the dead_entries set contains all nodes that are reachable 2726 * (i.e. both the black and the grey nodes) 2727 * - any entry not in dead_entries is white 2728 * - any entry that appears in dead_entries before entry, 2729 * is black, the rest is grey. 2730 * The set [entry, end] is also referred to as the wavefront. 2731 * 2732 * Since the tree is always a fully connected graph, the breadth-first 2733 * search guarantees that each vmmap_entry is visited exactly once. 2734 * The vm_map is broken down in linear time. 2735 */ 2736 TAILQ_INIT(&dead_entries); 2737 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL) 2738 DEAD_ENTRY_PUSH(&dead_entries, entry); 2739 while (entry != NULL) { 2740 sched_pause(yield); 2741 uvm_unmap_kill_entry(map, entry); 2742 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL) 2743 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2744 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 2745 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2746 /* Update wave-front. */ 2747 entry = TAILQ_NEXT(entry, dfree.deadq); 2748 } 2749 2750 #ifdef VMMAP_DEBUG 2751 numt = numq = 0; 2752 RBT_FOREACH(entry, uvm_map_addr, &map->addr) 2753 numt++; 2754 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2755 numq++; 2756 KASSERT(numt == numq); 2757 #endif 2758 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); 2759 2760 KERNEL_LOCK(); 2761 2762 pmap_destroy(map->pmap); 2763 map->pmap = NULL; 2764 } 2765 2766 /* 2767 * Populate map with free-memory entries. 2768 * 2769 * Map must be initialized and empty. 2770 */ 2771 void 2772 uvm_map_setup_entries(struct vm_map *map) 2773 { 2774 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 2775 2776 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2777 } 2778 2779 /* 2780 * Split entry at given address. 2781 * 2782 * orig: entry that is to be split. 2783 * next: a newly allocated map entry that is not linked. 2784 * split: address at which the split is done. 2785 */ 2786 void 2787 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2788 struct vm_map_entry *next, vaddr_t split) 2789 { 2790 struct uvm_addr_state *free, *free_before; 2791 vsize_t adj; 2792 2793 if ((split & PAGE_MASK) != 0) { 2794 panic("uvm_map_splitentry: split address 0x%lx " 2795 "not on page boundary!", split); 2796 } 2797 KDASSERT(map != NULL && orig != NULL && next != NULL); 2798 uvm_tree_sanity(map, __FILE__, __LINE__); 2799 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2800 2801 #ifdef VMMAP_DEBUG 2802 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig); 2803 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next); 2804 #endif /* VMMAP_DEBUG */ 2805 2806 /* 2807 * Free space will change, unlink from free space tree. 2808 */ 2809 free = uvm_map_uaddr_e(map, orig); 2810 uvm_mapent_free_remove(map, free, orig); 2811 2812 adj = split - orig->start; 2813 2814 uvm_mapent_copy(orig, next); 2815 if (split >= orig->end) { 2816 next->etype = 0; 2817 next->offset = 0; 2818 next->wired_count = 0; 2819 next->start = next->end = split; 2820 next->guard = 0; 2821 next->fspace = VMMAP_FREE_END(orig) - split; 2822 next->aref.ar_amap = NULL; 2823 next->aref.ar_pageoff = 0; 2824 orig->guard = MIN(orig->guard, split - orig->end); 2825 orig->fspace = split - VMMAP_FREE_START(orig); 2826 } else { 2827 orig->fspace = 0; 2828 orig->guard = 0; 2829 orig->end = next->start = split; 2830 2831 if (next->aref.ar_amap) { 2832 KERNEL_LOCK(); 2833 amap_splitref(&orig->aref, &next->aref, adj); 2834 KERNEL_UNLOCK(); 2835 } 2836 if (UVM_ET_ISSUBMAP(orig)) { 2837 uvm_map_reference(next->object.sub_map); 2838 next->offset += adj; 2839 } else if (UVM_ET_ISOBJ(orig)) { 2840 if (next->object.uvm_obj->pgops && 2841 next->object.uvm_obj->pgops->pgo_reference) { 2842 KERNEL_LOCK(); 2843 next->object.uvm_obj->pgops->pgo_reference( 2844 next->object.uvm_obj); 2845 KERNEL_UNLOCK(); 2846 } 2847 next->offset += adj; 2848 } 2849 } 2850 2851 /* 2852 * Link next into address tree. 2853 * Link orig and next into free-space tree. 2854 * 2855 * Don't insert 'next' into the addr tree until orig has been linked, 2856 * in case the free-list looks at adjecent entries in the addr tree 2857 * for its decisions. 2858 */ 2859 if (orig->fspace > 0) 2860 free_before = free; 2861 else 2862 free_before = uvm_map_uaddr_e(map, orig); 2863 uvm_mapent_free_insert(map, free_before, orig); 2864 uvm_mapent_addr_insert(map, next); 2865 uvm_mapent_free_insert(map, free, next); 2866 2867 uvm_tree_sanity(map, __FILE__, __LINE__); 2868 } 2869 2870 2871 #ifdef VMMAP_DEBUG 2872 2873 void 2874 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2875 char *file, int line) 2876 { 2877 char* map_special; 2878 2879 if (test) 2880 return; 2881 2882 if (map == kernel_map) 2883 map_special = " (kernel_map)"; 2884 else if (map == kmem_map) 2885 map_special = " (kmem_map)"; 2886 else 2887 map_special = ""; 2888 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2889 line, test_str); 2890 } 2891 2892 /* 2893 * Check that map is sane. 2894 */ 2895 void 2896 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2897 { 2898 struct vm_map_entry *iter; 2899 vaddr_t addr; 2900 vaddr_t min, max, bound; /* Bounds checker. */ 2901 struct uvm_addr_state *free; 2902 2903 addr = vm_map_min(map); 2904 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2905 /* 2906 * Valid start, end. 2907 * Catch overflow for end+fspace. 2908 */ 2909 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2910 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2911 2912 /* May not be empty. */ 2913 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2914 file, line); 2915 2916 /* Addresses for entry must lie within map boundaries. */ 2917 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2918 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2919 2920 /* Tree may not have gaps. */ 2921 UVM_ASSERT(map, iter->start == addr, file, line); 2922 addr = VMMAP_FREE_END(iter); 2923 2924 /* 2925 * Free space may not cross boundaries, unless the same 2926 * free list is used on both sides of the border. 2927 */ 2928 min = VMMAP_FREE_START(iter); 2929 max = VMMAP_FREE_END(iter); 2930 2931 while (min < max && 2932 (bound = uvm_map_boundary(map, min, max)) != max) { 2933 UVM_ASSERT(map, 2934 uvm_map_uaddr(map, bound - 1) == 2935 uvm_map_uaddr(map, bound), 2936 file, line); 2937 min = bound; 2938 } 2939 2940 free = uvm_map_uaddr_e(map, iter); 2941 if (free) { 2942 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2943 file, line); 2944 } else { 2945 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2946 file, line); 2947 } 2948 } 2949 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2950 } 2951 2952 void 2953 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2954 { 2955 struct vm_map_entry *iter; 2956 vsize_t size; 2957 2958 size = 0; 2959 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2960 if (!UVM_ET_ISHOLE(iter)) 2961 size += iter->end - iter->start; 2962 } 2963 2964 if (map->size != size) 2965 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2966 UVM_ASSERT(map, map->size == size, file, line); 2967 2968 vmspace_validate(map); 2969 } 2970 2971 /* 2972 * This function validates the statistics on vmspace. 2973 */ 2974 void 2975 vmspace_validate(struct vm_map *map) 2976 { 2977 struct vmspace *vm; 2978 struct vm_map_entry *iter; 2979 vaddr_t imin, imax; 2980 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2981 vsize_t stack, heap; /* Measured sizes. */ 2982 2983 if (!(map->flags & VM_MAP_ISVMSPACE)) 2984 return; 2985 2986 vm = (struct vmspace *)map; 2987 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2988 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2989 2990 stack = heap = 0; 2991 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2992 imin = imax = iter->start; 2993 2994 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL || 2995 iter->prot != PROT_NONE) 2996 continue; 2997 2998 /* 2999 * Update stack, heap. 3000 * Keep in mind that (theoretically) the entries of 3001 * userspace and stack may be joined. 3002 */ 3003 while (imin != iter->end) { 3004 /* 3005 * Set imax to the first boundary crossed between 3006 * imin and stack addresses. 3007 */ 3008 imax = iter->end; 3009 if (imin < stack_begin && imax > stack_begin) 3010 imax = stack_begin; 3011 else if (imin < stack_end && imax > stack_end) 3012 imax = stack_end; 3013 3014 if (imin >= stack_begin && imin < stack_end) 3015 stack += imax - imin; 3016 else 3017 heap += imax - imin; 3018 imin = imax; 3019 } 3020 } 3021 3022 heap >>= PAGE_SHIFT; 3023 if (heap != vm->vm_dused) { 3024 printf("vmspace stack range: 0x%lx-0x%lx\n", 3025 stack_begin, stack_end); 3026 panic("vmspace_validate: vmspace.vm_dused invalid, " 3027 "expected %ld pgs, got %ld pgs in map %p", 3028 heap, vm->vm_dused, 3029 map); 3030 } 3031 } 3032 3033 #endif /* VMMAP_DEBUG */ 3034 3035 /* 3036 * uvm_map_init: init mapping system at boot time. note that we allocate 3037 * and init the static pool of structs vm_map_entry for the kernel here. 3038 */ 3039 void 3040 uvm_map_init(void) 3041 { 3042 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 3043 int lcv; 3044 3045 /* now set up static pool of kernel map entries ... */ 3046 mtx_init(&uvm_kmapent_mtx, IPL_VM); 3047 SLIST_INIT(&uvm.kentry_free); 3048 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 3049 SLIST_INSERT_HEAD(&uvm.kentry_free, 3050 &kernel_map_entry[lcv], daddrs.addr_kentry); 3051 } 3052 3053 /* initialize the map-related pools. */ 3054 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0, 3055 IPL_NONE, PR_WAITOK, "vmsppl", NULL); 3056 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 3057 IPL_VM, PR_WAITOK, "vmmpepl", NULL); 3058 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0, 3059 IPL_VM, 0, "vmmpekpl", NULL); 3060 pool_sethiwat(&uvm_map_entry_pool, 8192); 3061 3062 uvm_addr_init(); 3063 } 3064 3065 #if defined(DDB) 3066 3067 /* 3068 * DDB hooks 3069 */ 3070 3071 /* 3072 * uvm_map_printit: actually prints the map 3073 */ 3074 void 3075 uvm_map_printit(struct vm_map *map, boolean_t full, 3076 int (*pr)(const char *, ...)) 3077 { 3078 struct vmspace *vm; 3079 struct vm_map_entry *entry; 3080 struct uvm_addr_state *free; 3081 int in_free, i; 3082 char buf[8]; 3083 3084 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 3085 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 3086 map->b_start, map->b_end); 3087 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 3088 map->s_start, map->s_end); 3089 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 3090 map->size, map->ref_count, map->timestamp, 3091 map->flags); 3092 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 3093 pmap_resident_count(map->pmap)); 3094 3095 /* struct vmspace handling. */ 3096 if (map->flags & VM_MAP_ISVMSPACE) { 3097 vm = (struct vmspace *)map; 3098 3099 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 3100 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 3101 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 3102 vm->vm_tsize, vm->vm_dsize); 3103 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 3104 vm->vm_taddr, vm->vm_daddr); 3105 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 3106 vm->vm_maxsaddr, vm->vm_minsaddr); 3107 } 3108 3109 if (!full) 3110 goto print_uaddr; 3111 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 3112 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 3113 entry, entry->start, entry->end, entry->object.uvm_obj, 3114 (long long)entry->offset, entry->aref.ar_amap, 3115 entry->aref.ar_pageoff); 3116 (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, " 3117 "syscall=%c, prot(max)=%d/%d, inh=%d, " 3118 "wc=%d, adv=%d\n", 3119 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 3120 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 3121 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 3122 (entry->etype & UVM_ET_STACK) ? 'T' : 'F', 3123 (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F', 3124 entry->protection, entry->max_protection, 3125 entry->inheritance, entry->wired_count, entry->advice); 3126 3127 free = uvm_map_uaddr_e(map, entry); 3128 in_free = (free != NULL); 3129 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 3130 "free=0x%lx-0x%lx\n", 3131 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 3132 in_free ? 'T' : 'F', 3133 entry->guard, 3134 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 3135 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 3136 (*pr)("\tfreemapped=%c, uaddr=%p\n", 3137 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 3138 if (free) { 3139 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 3140 free->uaddr_minaddr, free->uaddr_maxaddr, 3141 free->uaddr_functions->uaddr_name); 3142 } 3143 } 3144 3145 print_uaddr: 3146 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 3147 for (i = 0; i < nitems(map->uaddr_any); i++) { 3148 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 3149 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 3150 } 3151 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 3152 } 3153 3154 /* 3155 * uvm_object_printit: actually prints the object 3156 */ 3157 void 3158 uvm_object_printit(uobj, full, pr) 3159 struct uvm_object *uobj; 3160 boolean_t full; 3161 int (*pr)(const char *, ...); 3162 { 3163 struct vm_page *pg; 3164 int cnt = 0; 3165 3166 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 3167 uobj, uobj->pgops, uobj->uo_npages); 3168 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 3169 (*pr)("refs=<SYSTEM>\n"); 3170 else 3171 (*pr)("refs=%d\n", uobj->uo_refs); 3172 3173 if (!full) { 3174 return; 3175 } 3176 (*pr)(" PAGES <pg,offset>:\n "); 3177 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) { 3178 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 3179 if ((cnt % 3) == 2) { 3180 (*pr)("\n "); 3181 } 3182 cnt++; 3183 } 3184 if ((cnt % 3) != 2) { 3185 (*pr)("\n"); 3186 } 3187 } 3188 3189 /* 3190 * uvm_page_printit: actually print the page 3191 */ 3192 static const char page_flagbits[] = 3193 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 3194 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ" 3195 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5"; 3196 3197 void 3198 uvm_page_printit(pg, full, pr) 3199 struct vm_page *pg; 3200 boolean_t full; 3201 int (*pr)(const char *, ...); 3202 { 3203 struct vm_page *tpg; 3204 struct uvm_object *uobj; 3205 struct pglist *pgl; 3206 3207 (*pr)("PAGE %p:\n", pg); 3208 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 3209 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 3210 (long long)pg->phys_addr); 3211 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 3212 pg->uobject, pg->uanon, (long long)pg->offset); 3213 #if defined(UVM_PAGE_TRKOWN) 3214 if (pg->pg_flags & PG_BUSY) 3215 (*pr)(" owning thread = %d, tag=%s", 3216 pg->owner, pg->owner_tag); 3217 else 3218 (*pr)(" page not busy, no owner"); 3219 #else 3220 (*pr)(" [page ownership tracking disabled]"); 3221 #endif 3222 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 3223 3224 if (!full) 3225 return; 3226 3227 /* cross-verify object/anon */ 3228 if ((pg->pg_flags & PQ_FREE) == 0) { 3229 if (pg->pg_flags & PQ_ANON) { 3230 if (pg->uanon == NULL || pg->uanon->an_page != pg) 3231 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 3232 (pg->uanon) ? pg->uanon->an_page : NULL); 3233 else 3234 (*pr)(" anon backpointer is OK\n"); 3235 } else { 3236 uobj = pg->uobject; 3237 if (uobj) { 3238 (*pr)(" checking object list\n"); 3239 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) { 3240 if (tpg == pg) { 3241 break; 3242 } 3243 } 3244 if (tpg) 3245 (*pr)(" page found on object list\n"); 3246 else 3247 (*pr)(" >>> PAGE NOT FOUND " 3248 "ON OBJECT LIST! <<<\n"); 3249 } 3250 } 3251 } 3252 3253 /* cross-verify page queue */ 3254 if (pg->pg_flags & PQ_FREE) { 3255 if (uvm_pmr_isfree(pg)) 3256 (*pr)(" page found in uvm_pmemrange\n"); 3257 else 3258 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 3259 pgl = NULL; 3260 } else if (pg->pg_flags & PQ_INACTIVE) { 3261 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 3262 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 3263 } else if (pg->pg_flags & PQ_ACTIVE) { 3264 pgl = &uvm.page_active; 3265 } else { 3266 pgl = NULL; 3267 } 3268 3269 if (pgl) { 3270 (*pr)(" checking pageq list\n"); 3271 TAILQ_FOREACH(tpg, pgl, pageq) { 3272 if (tpg == pg) { 3273 break; 3274 } 3275 } 3276 if (tpg) 3277 (*pr)(" page found on pageq list\n"); 3278 else 3279 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3280 } 3281 } 3282 #endif 3283 3284 /* 3285 * uvm_map_protect: change map protection 3286 * 3287 * => set_max means set max_protection. 3288 * => map must be unlocked. 3289 */ 3290 int 3291 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3292 vm_prot_t new_prot, boolean_t set_max) 3293 { 3294 struct vm_map_entry *first, *iter; 3295 vm_prot_t old_prot; 3296 vm_prot_t mask; 3297 vsize_t dused; 3298 int error; 3299 3300 if (start > end) 3301 return EINVAL; 3302 start = MAX(start, map->min_offset); 3303 end = MIN(end, map->max_offset); 3304 if (start >= end) 3305 return 0; 3306 3307 dused = 0; 3308 error = 0; 3309 vm_map_lock(map); 3310 3311 /* 3312 * Set up first and last. 3313 * - first will contain first entry at or after start. 3314 */ 3315 first = uvm_map_entrybyaddr(&map->addr, start); 3316 KDASSERT(first != NULL); 3317 if (first->end <= start) 3318 first = RBT_NEXT(uvm_map_addr, first); 3319 3320 /* First, check for protection violations. */ 3321 for (iter = first; iter != NULL && iter->start < end; 3322 iter = RBT_NEXT(uvm_map_addr, iter)) { 3323 /* Treat memory holes as free space. */ 3324 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3325 continue; 3326 3327 old_prot = iter->protection; 3328 if (old_prot == PROT_NONE && new_prot != old_prot) { 3329 dused += uvmspace_dused( 3330 map, MAX(start, iter->start), MIN(end, iter->end)); 3331 } 3332 3333 if (UVM_ET_ISSUBMAP(iter)) { 3334 error = EINVAL; 3335 goto out; 3336 } 3337 if ((new_prot & iter->max_protection) != new_prot) { 3338 error = EACCES; 3339 goto out; 3340 } 3341 if (map == kernel_map && 3342 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 3343 panic("uvm_map_protect: kernel map W^X violation requested"); 3344 } 3345 3346 /* Check limits. */ 3347 if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) { 3348 vsize_t limit = lim_cur(RLIMIT_DATA); 3349 dused = ptoa(dused); 3350 if (limit < dused || 3351 limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) { 3352 error = ENOMEM; 3353 goto out; 3354 } 3355 } 3356 3357 /* Fix protections. */ 3358 for (iter = first; iter != NULL && iter->start < end; 3359 iter = RBT_NEXT(uvm_map_addr, iter)) { 3360 /* Treat memory holes as free space. */ 3361 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3362 continue; 3363 3364 old_prot = iter->protection; 3365 3366 /* 3367 * Skip adapting protection iff old and new protection 3368 * are equal. 3369 */ 3370 if (set_max) { 3371 if (old_prot == (new_prot & old_prot) && 3372 iter->max_protection == new_prot) 3373 continue; 3374 } else { 3375 if (old_prot == new_prot) 3376 continue; 3377 } 3378 3379 UVM_MAP_CLIP_START(map, iter, start); 3380 UVM_MAP_CLIP_END(map, iter, end); 3381 3382 if (set_max) { 3383 iter->max_protection = new_prot; 3384 iter->protection &= new_prot; 3385 } else 3386 iter->protection = new_prot; 3387 3388 /* 3389 * update physical map if necessary. worry about copy-on-write 3390 * here -- CHECK THIS XXX 3391 */ 3392 if (iter->protection != old_prot) { 3393 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3394 ~PROT_WRITE : PROT_MASK; 3395 3396 /* XXX should only wserial++ if no split occurs */ 3397 if (iter->protection & PROT_WRITE) 3398 map->wserial++; 3399 3400 if (map->flags & VM_MAP_ISVMSPACE) { 3401 if (old_prot == PROT_NONE) { 3402 ((struct vmspace *)map)->vm_dused += 3403 uvmspace_dused(map, iter->start, 3404 iter->end); 3405 } 3406 if (iter->protection == PROT_NONE) { 3407 ((struct vmspace *)map)->vm_dused -= 3408 uvmspace_dused(map, iter->start, 3409 iter->end); 3410 } 3411 } 3412 3413 /* update pmap */ 3414 if ((iter->protection & mask) == PROT_NONE && 3415 VM_MAPENT_ISWIRED(iter)) { 3416 /* 3417 * TODO(ariane) this is stupid. wired_count 3418 * is 0 if not wired, otherwise anything 3419 * larger than 0 (incremented once each time 3420 * wire is called). 3421 * Mostly to be able to undo the damage on 3422 * failure. Not the actually be a wired 3423 * refcounter... 3424 * Originally: iter->wired_count--; 3425 * (don't we have to unwire this in the pmap 3426 * as well?) 3427 */ 3428 iter->wired_count = 0; 3429 } 3430 pmap_protect(map->pmap, iter->start, iter->end, 3431 iter->protection & mask); 3432 } 3433 3434 /* 3435 * If the map is configured to lock any future mappings, 3436 * wire this entry now if the old protection was PROT_NONE 3437 * and the new protection is not PROT_NONE. 3438 */ 3439 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3440 VM_MAPENT_ISWIRED(iter) == 0 && 3441 old_prot == PROT_NONE && 3442 new_prot != PROT_NONE) { 3443 if (uvm_map_pageable(map, iter->start, iter->end, 3444 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3445 /* 3446 * If locking the entry fails, remember the 3447 * error if it's the first one. Note we 3448 * still continue setting the protection in 3449 * the map, but it will return the resource 3450 * storage condition regardless. 3451 * 3452 * XXX Ignore what the actual error is, 3453 * XXX just call it a resource shortage 3454 * XXX so that it doesn't get confused 3455 * XXX what uvm_map_protect() itself would 3456 * XXX normally return. 3457 */ 3458 error = ENOMEM; 3459 } 3460 } 3461 } 3462 pmap_update(map->pmap); 3463 3464 out: 3465 vm_map_unlock(map); 3466 return error; 3467 } 3468 3469 /* 3470 * uvmspace_alloc: allocate a vmspace structure. 3471 * 3472 * - structure includes vm_map and pmap 3473 * - XXX: no locking on this structure 3474 * - refcnt set to 1, rest must be init'd by caller 3475 */ 3476 struct vmspace * 3477 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3478 boolean_t remove_holes) 3479 { 3480 struct vmspace *vm; 3481 3482 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3483 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3484 return (vm); 3485 } 3486 3487 /* 3488 * uvmspace_init: initialize a vmspace structure. 3489 * 3490 * - XXX: no locking on this structure 3491 * - refcnt set to 1, rest must be init'd by caller 3492 */ 3493 void 3494 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3495 boolean_t pageable, boolean_t remove_holes) 3496 { 3497 KASSERT(pmap == NULL || pmap == pmap_kernel()); 3498 3499 if (pmap) 3500 pmap_reference(pmap); 3501 else 3502 pmap = pmap_create(); 3503 3504 uvm_map_setup(&vm->vm_map, pmap, min, max, 3505 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3506 3507 vm->vm_refcnt = 1; 3508 3509 if (remove_holes) 3510 pmap_remove_holes(vm); 3511 } 3512 3513 /* 3514 * uvmspace_share: share a vmspace between two processes 3515 * 3516 * - XXX: no locking on vmspace 3517 * - used for vfork 3518 */ 3519 3520 struct vmspace * 3521 uvmspace_share(struct process *pr) 3522 { 3523 struct vmspace *vm = pr->ps_vmspace; 3524 3525 vm->vm_refcnt++; 3526 return vm; 3527 } 3528 3529 /* 3530 * uvmspace_exec: the process wants to exec a new program 3531 * 3532 * - XXX: no locking on vmspace 3533 */ 3534 3535 void 3536 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3537 { 3538 struct process *pr = p->p_p; 3539 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3540 struct vm_map *map = &ovm->vm_map; 3541 struct uvm_map_deadq dead_entries; 3542 3543 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3544 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3545 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3546 3547 pmap_unuse_final(p); /* before stack addresses go away */ 3548 TAILQ_INIT(&dead_entries); 3549 3550 /* see if more than one process is using this vmspace... */ 3551 if (ovm->vm_refcnt == 1) { 3552 /* 3553 * If pr is the only process using its vmspace then 3554 * we can safely recycle that vmspace for the program 3555 * that is being exec'd. 3556 */ 3557 3558 #ifdef SYSVSHM 3559 /* 3560 * SYSV SHM semantics require us to kill all segments on an exec 3561 */ 3562 if (ovm->vm_shm) 3563 shmexit(ovm); 3564 #endif 3565 3566 /* 3567 * POSIX 1003.1b -- "lock future mappings" is revoked 3568 * when a process execs another program image. 3569 */ 3570 vm_map_lock(map); 3571 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE); 3572 3573 /* 3574 * now unmap the old program 3575 * 3576 * Instead of attempting to keep the map valid, we simply 3577 * nuke all entries and ask uvm_map_setup to reinitialize 3578 * the map to the new boundaries. 3579 * 3580 * uvm_unmap_remove will actually nuke all entries for us 3581 * (as in, not replace them with free-memory entries). 3582 */ 3583 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3584 &dead_entries, TRUE, FALSE); 3585 3586 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 3587 3588 /* Nuke statistics and boundaries. */ 3589 memset(&ovm->vm_startcopy, 0, 3590 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3591 3592 3593 if (end & (vaddr_t)PAGE_MASK) { 3594 end += 1; 3595 if (end == 0) /* overflow */ 3596 end -= PAGE_SIZE; 3597 } 3598 3599 /* Setup new boundaries and populate map with entries. */ 3600 map->min_offset = start; 3601 map->max_offset = end; 3602 uvm_map_setup_entries(map); 3603 vm_map_unlock(map); 3604 3605 /* but keep MMU holes unavailable */ 3606 pmap_remove_holes(ovm); 3607 } else { 3608 /* 3609 * pr's vmspace is being shared, so we can't reuse 3610 * it for pr since it is still being used for others. 3611 * allocate a new vmspace for pr 3612 */ 3613 nvm = uvmspace_alloc(start, end, 3614 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3615 3616 /* install new vmspace and drop our ref to the old one. */ 3617 pmap_deactivate(p); 3618 p->p_vmspace = pr->ps_vmspace = nvm; 3619 pmap_activate(p); 3620 3621 uvmspace_free(ovm); 3622 } 3623 3624 /* Release dead entries */ 3625 uvm_unmap_detach(&dead_entries, 0); 3626 } 3627 3628 /* 3629 * uvmspace_free: free a vmspace data structure 3630 * 3631 * - XXX: no locking on vmspace 3632 */ 3633 void 3634 uvmspace_free(struct vmspace *vm) 3635 { 3636 if (--vm->vm_refcnt == 0) { 3637 /* 3638 * lock the map, to wait out all other references to it. delete 3639 * all of the mappings and pages they hold, then call the pmap 3640 * module to reclaim anything left. 3641 */ 3642 #ifdef SYSVSHM 3643 /* Get rid of any SYSV shared memory segments. */ 3644 if (vm->vm_shm != NULL) 3645 shmexit(vm); 3646 #endif 3647 3648 uvm_map_teardown(&vm->vm_map); 3649 pool_put(&uvm_vmspace_pool, vm); 3650 } 3651 } 3652 3653 /* 3654 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in 3655 * srcmap to the address range [dstaddr, dstaddr + sz) in 3656 * dstmap. 3657 * 3658 * The whole address range in srcmap must be backed by an object 3659 * (no holes). 3660 * 3661 * If successful, the address ranges share memory and the destination 3662 * address range uses the protection flags in prot. 3663 * 3664 * This routine assumes that sz is a multiple of PAGE_SIZE and 3665 * that dstaddr and srcaddr are page-aligned. 3666 */ 3667 int 3668 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot, 3669 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz) 3670 { 3671 int ret = 0; 3672 vaddr_t unmap_end; 3673 vaddr_t dstva; 3674 vsize_t s_off, len, n = sz, remain; 3675 struct vm_map_entry *first = NULL, *last = NULL; 3676 struct vm_map_entry *src_entry, *psrc_entry = NULL; 3677 struct uvm_map_deadq dead; 3678 3679 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr) 3680 return EINVAL; 3681 3682 TAILQ_INIT(&dead); 3683 vm_map_lock(dstmap); 3684 vm_map_lock_read(srcmap); 3685 3686 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) { 3687 ret = ENOMEM; 3688 goto exit_unlock; 3689 } 3690 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) { 3691 ret = EINVAL; 3692 goto exit_unlock; 3693 } 3694 3695 dstva = dstaddr; 3696 unmap_end = dstaddr; 3697 for (; src_entry != NULL; 3698 psrc_entry = src_entry, 3699 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) { 3700 /* hole in address space, bail out */ 3701 if (psrc_entry != NULL && psrc_entry->end != src_entry->start) 3702 break; 3703 if (src_entry->start >= srcaddr + sz) 3704 break; 3705 3706 if (UVM_ET_ISSUBMAP(src_entry)) 3707 panic("uvm_share: encountered a submap (illegal)"); 3708 if (!UVM_ET_ISCOPYONWRITE(src_entry) && 3709 UVM_ET_ISNEEDSCOPY(src_entry)) 3710 panic("uvm_share: non-copy_on_write map entries " 3711 "marked needs_copy (illegal)"); 3712 3713 /* 3714 * srcaddr > map entry start? means we are in the middle of a 3715 * map, so we calculate the offset to use in the source map. 3716 */ 3717 if (srcaddr > src_entry->start) 3718 s_off = srcaddr - src_entry->start; 3719 else if (srcaddr == src_entry->start) 3720 s_off = 0; 3721 else 3722 panic("uvm_share: map entry start > srcaddr"); 3723 3724 remain = src_entry->end - src_entry->start - s_off; 3725 3726 /* Determine how many bytes to share in this pass */ 3727 if (n < remain) 3728 len = n; 3729 else 3730 len = remain; 3731 3732 if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot, 3733 srcmap, src_entry, &dead) == NULL) 3734 break; 3735 3736 n -= len; 3737 dstva += len; 3738 srcaddr += len; 3739 unmap_end = dstva + len; 3740 if (n == 0) 3741 goto exit_unlock; 3742 } 3743 3744 ret = EINVAL; 3745 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE); 3746 3747 exit_unlock: 3748 vm_map_unlock_read(srcmap); 3749 vm_map_unlock(dstmap); 3750 uvm_unmap_detach(&dead, 0); 3751 3752 return ret; 3753 } 3754 3755 /* 3756 * Clone map entry into other map. 3757 * 3758 * Mapping will be placed at dstaddr, for the same length. 3759 * Space must be available. 3760 * Reference counters are incremented. 3761 */ 3762 struct vm_map_entry * 3763 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3764 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, 3765 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3766 int mapent_flags, int amap_share_flags) 3767 { 3768 struct vm_map_entry *new_entry, *first, *last; 3769 3770 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3771 3772 /* Create new entry (linked in on creation). Fill in first, last. */ 3773 first = last = NULL; 3774 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3775 panic("uvm_mapent_clone: no space in map for " 3776 "entry in empty map"); 3777 } 3778 new_entry = uvm_map_mkentry(dstmap, first, last, 3779 dstaddr, dstlen, mapent_flags, dead, NULL); 3780 if (new_entry == NULL) 3781 return NULL; 3782 /* old_entry -> new_entry */ 3783 new_entry->object = old_entry->object; 3784 new_entry->offset = old_entry->offset; 3785 new_entry->aref = old_entry->aref; 3786 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3787 new_entry->protection = prot; 3788 new_entry->max_protection = maxprot; 3789 new_entry->inheritance = old_entry->inheritance; 3790 new_entry->advice = old_entry->advice; 3791 3792 /* gain reference to object backing the map (can't be a submap). */ 3793 if (new_entry->aref.ar_amap) { 3794 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3795 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3796 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3797 amap_share_flags); 3798 } 3799 3800 if (UVM_ET_ISOBJ(new_entry) && 3801 new_entry->object.uvm_obj->pgops->pgo_reference) { 3802 new_entry->offset += off; 3803 new_entry->object.uvm_obj->pgops->pgo_reference 3804 (new_entry->object.uvm_obj); 3805 } 3806 3807 return new_entry; 3808 } 3809 3810 struct vm_map_entry * 3811 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3812 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map, 3813 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3814 { 3815 /* 3816 * If old_entry refers to a copy-on-write region that has not yet been 3817 * written to (needs_copy flag is set), then we need to allocate a new 3818 * amap for old_entry. 3819 * 3820 * If we do not do this, and the process owning old_entry does a copy-on 3821 * write later, old_entry and new_entry will refer to different memory 3822 * regions, and the memory between the processes is no longer shared. 3823 * 3824 * [in other words, we need to clear needs_copy] 3825 */ 3826 3827 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3828 /* get our own amap, clears needs_copy */ 3829 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0); 3830 /* XXXCDC: WAITOK??? */ 3831 } 3832 3833 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off, 3834 prot, maxprot, old_entry, dead, 0, AMAP_SHARED); 3835 } 3836 3837 /* 3838 * share the mapping: this means we want the old and 3839 * new entries to share amaps and backing objects. 3840 */ 3841 struct vm_map_entry * 3842 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3843 struct vm_map *old_map, 3844 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3845 { 3846 struct vm_map_entry *new_entry; 3847 3848 new_entry = uvm_mapent_share(new_map, old_entry->start, 3849 old_entry->end - old_entry->start, 0, old_entry->protection, 3850 old_entry->max_protection, old_map, old_entry, dead); 3851 3852 /* 3853 * pmap_copy the mappings: this routine is optional 3854 * but if it is there it will reduce the number of 3855 * page faults in the new proc. 3856 */ 3857 if (!UVM_ET_ISHOLE(new_entry)) 3858 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, 3859 (new_entry->end - new_entry->start), new_entry->start); 3860 3861 return (new_entry); 3862 } 3863 3864 /* 3865 * copy-on-write the mapping (using mmap's 3866 * MAP_PRIVATE semantics) 3867 * 3868 * allocate new_entry, adjust reference counts. 3869 * (note that new references are read-only). 3870 */ 3871 struct vm_map_entry * 3872 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3873 struct vm_map *old_map, 3874 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3875 { 3876 struct vm_map_entry *new_entry; 3877 boolean_t protect_child; 3878 3879 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3880 old_entry->end - old_entry->start, 0, old_entry->protection, 3881 old_entry->max_protection, old_entry, dead, 0, 0); 3882 3883 new_entry->etype |= 3884 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3885 3886 /* 3887 * the new entry will need an amap. it will either 3888 * need to be copied from the old entry or created 3889 * from scratch (if the old entry does not have an 3890 * amap). can we defer this process until later 3891 * (by setting "needs_copy") or do we need to copy 3892 * the amap now? 3893 * 3894 * we must copy the amap now if any of the following 3895 * conditions hold: 3896 * 1. the old entry has an amap and that amap is 3897 * being shared. this means that the old (parent) 3898 * process is sharing the amap with another 3899 * process. if we do not clear needs_copy here 3900 * we will end up in a situation where both the 3901 * parent and child process are referring to the 3902 * same amap with "needs_copy" set. if the 3903 * parent write-faults, the fault routine will 3904 * clear "needs_copy" in the parent by allocating 3905 * a new amap. this is wrong because the 3906 * parent is supposed to be sharing the old amap 3907 * and the new amap will break that. 3908 * 3909 * 2. if the old entry has an amap and a non-zero 3910 * wire count then we are going to have to call 3911 * amap_cow_now to avoid page faults in the 3912 * parent process. since amap_cow_now requires 3913 * "needs_copy" to be clear we might as well 3914 * clear it here as well. 3915 * 3916 */ 3917 if (old_entry->aref.ar_amap != NULL && 3918 ((amap_flags(old_entry->aref.ar_amap) & 3919 AMAP_SHARED) != 0 || 3920 VM_MAPENT_ISWIRED(old_entry))) { 3921 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3922 0, 0); 3923 /* XXXCDC: M_WAITOK ... ok? */ 3924 } 3925 3926 /* 3927 * if the parent's entry is wired down, then the 3928 * parent process does not want page faults on 3929 * access to that memory. this means that we 3930 * cannot do copy-on-write because we can't write 3931 * protect the old entry. in this case we 3932 * resolve all copy-on-write faults now, using 3933 * amap_cow_now. note that we have already 3934 * allocated any needed amap (above). 3935 */ 3936 if (VM_MAPENT_ISWIRED(old_entry)) { 3937 /* 3938 * resolve all copy-on-write faults now 3939 * (note that there is nothing to do if 3940 * the old mapping does not have an amap). 3941 * XXX: is it worthwhile to bother with 3942 * pmap_copy in this case? 3943 */ 3944 if (old_entry->aref.ar_amap) 3945 amap_cow_now(new_map, new_entry); 3946 } else { 3947 if (old_entry->aref.ar_amap) { 3948 /* 3949 * setup mappings to trigger copy-on-write faults 3950 * we must write-protect the parent if it has 3951 * an amap and it is not already "needs_copy"... 3952 * if it is already "needs_copy" then the parent 3953 * has already been write-protected by a previous 3954 * fork operation. 3955 * 3956 * if we do not write-protect the parent, then 3957 * we must be sure to write-protect the child 3958 * after the pmap_copy() operation. 3959 * 3960 * XXX: pmap_copy should have some way of telling 3961 * us that it didn't do anything so we can avoid 3962 * calling pmap_protect needlessly. 3963 */ 3964 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3965 if (old_entry->max_protection & PROT_WRITE) { 3966 pmap_protect(old_map->pmap, 3967 old_entry->start, 3968 old_entry->end, 3969 old_entry->protection & 3970 ~PROT_WRITE); 3971 pmap_update(old_map->pmap); 3972 } 3973 old_entry->etype |= UVM_ET_NEEDSCOPY; 3974 } 3975 3976 /* parent must now be write-protected */ 3977 protect_child = FALSE; 3978 } else { 3979 /* 3980 * we only need to protect the child if the 3981 * parent has write access. 3982 */ 3983 if (old_entry->max_protection & PROT_WRITE) 3984 protect_child = TRUE; 3985 else 3986 protect_child = FALSE; 3987 } 3988 /* 3989 * copy the mappings 3990 * XXX: need a way to tell if this does anything 3991 */ 3992 if (!UVM_ET_ISHOLE(new_entry)) 3993 pmap_copy(new_map->pmap, old_map->pmap, 3994 new_entry->start, 3995 (old_entry->end - old_entry->start), 3996 old_entry->start); 3997 3998 /* protect the child's mappings if necessary */ 3999 if (protect_child) { 4000 pmap_protect(new_map->pmap, new_entry->start, 4001 new_entry->end, 4002 new_entry->protection & 4003 ~PROT_WRITE); 4004 } 4005 } 4006 4007 return (new_entry); 4008 } 4009 4010 /* 4011 * zero the mapping: the new entry will be zero initialized 4012 */ 4013 struct vm_map_entry * 4014 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 4015 struct vm_map *old_map, 4016 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 4017 { 4018 struct vm_map_entry *new_entry; 4019 4020 new_entry = uvm_mapent_clone(new_map, old_entry->start, 4021 old_entry->end - old_entry->start, 0, old_entry->protection, 4022 old_entry->max_protection, old_entry, dead, 0, 0); 4023 4024 new_entry->etype |= 4025 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 4026 4027 if (new_entry->aref.ar_amap) { 4028 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 4029 atop(new_entry->end - new_entry->start), 0); 4030 new_entry->aref.ar_amap = NULL; 4031 new_entry->aref.ar_pageoff = 0; 4032 } 4033 4034 if (UVM_ET_ISOBJ(new_entry)) { 4035 if (new_entry->object.uvm_obj->pgops->pgo_detach) 4036 new_entry->object.uvm_obj->pgops->pgo_detach( 4037 new_entry->object.uvm_obj); 4038 new_entry->object.uvm_obj = NULL; 4039 new_entry->etype &= ~UVM_ET_OBJ; 4040 } 4041 4042 return (new_entry); 4043 } 4044 4045 /* 4046 * uvmspace_fork: fork a process' main map 4047 * 4048 * => create a new vmspace for child process from parent. 4049 * => parent's map must not be locked. 4050 */ 4051 struct vmspace * 4052 uvmspace_fork(struct process *pr) 4053 { 4054 struct vmspace *vm1 = pr->ps_vmspace; 4055 struct vmspace *vm2; 4056 struct vm_map *old_map = &vm1->vm_map; 4057 struct vm_map *new_map; 4058 struct vm_map_entry *old_entry, *new_entry; 4059 struct uvm_map_deadq dead; 4060 4061 vm_map_lock(old_map); 4062 4063 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 4064 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 4065 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 4066 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 4067 vm2->vm_dused = 0; /* Statistic managed by us. */ 4068 new_map = &vm2->vm_map; 4069 vm_map_lock(new_map); 4070 4071 /* go entry-by-entry */ 4072 TAILQ_INIT(&dead); 4073 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 4074 if (old_entry->start == old_entry->end) 4075 continue; 4076 4077 /* first, some sanity checks on the old entry */ 4078 if (UVM_ET_ISSUBMAP(old_entry)) { 4079 panic("fork: encountered a submap during fork " 4080 "(illegal)"); 4081 } 4082 4083 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 4084 UVM_ET_ISNEEDSCOPY(old_entry)) { 4085 panic("fork: non-copy_on_write map entry marked " 4086 "needs_copy (illegal)"); 4087 } 4088 4089 /* Apply inheritance. */ 4090 switch (old_entry->inheritance) { 4091 case MAP_INHERIT_SHARE: 4092 new_entry = uvm_mapent_forkshared(vm2, new_map, 4093 old_map, old_entry, &dead); 4094 break; 4095 case MAP_INHERIT_COPY: 4096 new_entry = uvm_mapent_forkcopy(vm2, new_map, 4097 old_map, old_entry, &dead); 4098 break; 4099 case MAP_INHERIT_ZERO: 4100 new_entry = uvm_mapent_forkzero(vm2, new_map, 4101 old_map, old_entry, &dead); 4102 break; 4103 default: 4104 continue; 4105 } 4106 4107 /* Update process statistics. */ 4108 if (!UVM_ET_ISHOLE(new_entry)) 4109 new_map->size += new_entry->end - new_entry->start; 4110 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) && 4111 new_entry->protection != PROT_NONE) { 4112 vm2->vm_dused += uvmspace_dused( 4113 new_map, new_entry->start, new_entry->end); 4114 } 4115 } 4116 4117 vm_map_unlock(old_map); 4118 vm_map_unlock(new_map); 4119 4120 /* 4121 * This can actually happen, if multiple entries described a 4122 * space in which an entry was inherited. 4123 */ 4124 uvm_unmap_detach(&dead, 0); 4125 4126 #ifdef SYSVSHM 4127 if (vm1->vm_shm) 4128 shmfork(vm1, vm2); 4129 #endif 4130 4131 return vm2; 4132 } 4133 4134 /* 4135 * uvm_map_hint: return the beginning of the best area suitable for 4136 * creating a new mapping with "prot" protection. 4137 */ 4138 vaddr_t 4139 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr, 4140 vaddr_t maxaddr) 4141 { 4142 vaddr_t addr; 4143 vaddr_t spacing; 4144 4145 #ifdef __i386__ 4146 /* 4147 * If executable skip first two pages, otherwise start 4148 * after data + heap region. 4149 */ 4150 if ((prot & PROT_EXEC) != 0 && 4151 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 4152 addr = (PAGE_SIZE*2) + 4153 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 4154 return (round_page(addr)); 4155 } 4156 #endif 4157 4158 #if defined (__LP64__) 4159 spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4160 #else 4161 spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4162 #endif 4163 4164 /* 4165 * Start malloc/mmap after the brk. 4166 */ 4167 addr = (vaddr_t)vm->vm_daddr + BRKSIZ; 4168 addr = MAX(addr, minaddr); 4169 4170 if (addr < maxaddr) { 4171 while (spacing > maxaddr - addr) 4172 spacing >>= 1; 4173 } 4174 addr += arc4random() & spacing; 4175 return (round_page(addr)); 4176 } 4177 4178 /* 4179 * uvm_map_submap: punch down part of a map into a submap 4180 * 4181 * => only the kernel_map is allowed to be submapped 4182 * => the purpose of submapping is to break up the locking granularity 4183 * of a larger map 4184 * => the range specified must have been mapped previously with a uvm_map() 4185 * call [with uobj==NULL] to create a blank map entry in the main map. 4186 * [And it had better still be blank!] 4187 * => maps which contain submaps should never be copied or forked. 4188 * => to remove a submap, use uvm_unmap() on the main map 4189 * and then uvm_map_deallocate() the submap. 4190 * => main map must be unlocked. 4191 * => submap must have been init'd and have a zero reference count. 4192 * [need not be locked as we don't actually reference it] 4193 */ 4194 int 4195 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 4196 struct vm_map *submap) 4197 { 4198 struct vm_map_entry *entry; 4199 int result; 4200 4201 if (start > map->max_offset || end > map->max_offset || 4202 start < map->min_offset || end < map->min_offset) 4203 return EINVAL; 4204 4205 vm_map_lock(map); 4206 4207 if (uvm_map_lookup_entry(map, start, &entry)) { 4208 UVM_MAP_CLIP_START(map, entry, start); 4209 UVM_MAP_CLIP_END(map, entry, end); 4210 } else 4211 entry = NULL; 4212 4213 if (entry != NULL && 4214 entry->start == start && entry->end == end && 4215 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 4216 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 4217 entry->etype |= UVM_ET_SUBMAP; 4218 entry->object.sub_map = submap; 4219 entry->offset = 0; 4220 uvm_map_reference(submap); 4221 result = 0; 4222 } else 4223 result = EINVAL; 4224 4225 vm_map_unlock(map); 4226 return(result); 4227 } 4228 4229 /* 4230 * uvm_map_checkprot: check protection in map 4231 * 4232 * => must allow specific protection in a fully allocated region. 4233 * => map mut be read or write locked by caller. 4234 */ 4235 boolean_t 4236 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 4237 vm_prot_t protection) 4238 { 4239 struct vm_map_entry *entry; 4240 4241 if (start < map->min_offset || end > map->max_offset || start > end) 4242 return FALSE; 4243 if (start == end) 4244 return TRUE; 4245 4246 /* 4247 * Iterate entries. 4248 */ 4249 for (entry = uvm_map_entrybyaddr(&map->addr, start); 4250 entry != NULL && entry->start < end; 4251 entry = RBT_NEXT(uvm_map_addr, entry)) { 4252 /* Fail if a hole is found. */ 4253 if (UVM_ET_ISHOLE(entry) || 4254 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 4255 return FALSE; 4256 4257 /* Check protection. */ 4258 if ((entry->protection & protection) != protection) 4259 return FALSE; 4260 } 4261 return TRUE; 4262 } 4263 4264 /* 4265 * uvm_map_create: create map 4266 */ 4267 vm_map_t 4268 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 4269 { 4270 vm_map_t map; 4271 4272 map = malloc(sizeof *map, M_VMMAP, M_WAITOK); 4273 uvm_map_setup(map, pmap, min, max, flags); 4274 return (map); 4275 } 4276 4277 /* 4278 * uvm_map_deallocate: drop reference to a map 4279 * 4280 * => caller must not lock map 4281 * => we will zap map if ref count goes to zero 4282 */ 4283 void 4284 uvm_map_deallocate(vm_map_t map) 4285 { 4286 int c; 4287 struct uvm_map_deadq dead; 4288 4289 c = --map->ref_count; 4290 if (c > 0) { 4291 return; 4292 } 4293 4294 /* 4295 * all references gone. unmap and free. 4296 * 4297 * No lock required: we are only one to access this map. 4298 */ 4299 TAILQ_INIT(&dead); 4300 uvm_tree_sanity(map, __FILE__, __LINE__); 4301 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 4302 TRUE, FALSE); 4303 pmap_destroy(map->pmap); 4304 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 4305 free(map, M_VMMAP, sizeof *map); 4306 4307 uvm_unmap_detach(&dead, 0); 4308 } 4309 4310 /* 4311 * uvm_map_inherit: set inheritance code for range of addrs in map. 4312 * 4313 * => map must be unlocked 4314 * => note that the inherit code is used during a "fork". see fork 4315 * code for details. 4316 */ 4317 int 4318 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 4319 vm_inherit_t new_inheritance) 4320 { 4321 struct vm_map_entry *entry; 4322 4323 switch (new_inheritance) { 4324 case MAP_INHERIT_NONE: 4325 case MAP_INHERIT_COPY: 4326 case MAP_INHERIT_SHARE: 4327 case MAP_INHERIT_ZERO: 4328 break; 4329 default: 4330 return (EINVAL); 4331 } 4332 4333 if (start > end) 4334 return EINVAL; 4335 start = MAX(start, map->min_offset); 4336 end = MIN(end, map->max_offset); 4337 if (start >= end) 4338 return 0; 4339 4340 vm_map_lock(map); 4341 4342 entry = uvm_map_entrybyaddr(&map->addr, start); 4343 if (entry->end > start) 4344 UVM_MAP_CLIP_START(map, entry, start); 4345 else 4346 entry = RBT_NEXT(uvm_map_addr, entry); 4347 4348 while (entry != NULL && entry->start < end) { 4349 UVM_MAP_CLIP_END(map, entry, end); 4350 entry->inheritance = new_inheritance; 4351 entry = RBT_NEXT(uvm_map_addr, entry); 4352 } 4353 4354 vm_map_unlock(map); 4355 return (0); 4356 } 4357 4358 /* 4359 * uvm_map_syscall: permit system calls for range of addrs in map. 4360 * 4361 * => map must be unlocked 4362 */ 4363 int 4364 uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end) 4365 { 4366 struct vm_map_entry *entry; 4367 4368 if (start > end) 4369 return EINVAL; 4370 start = MAX(start, map->min_offset); 4371 end = MIN(end, map->max_offset); 4372 if (start >= end) 4373 return 0; 4374 if (map->flags & VM_MAP_SYSCALL_ONCE) /* only allowed once */ 4375 return (EPERM); 4376 4377 vm_map_lock(map); 4378 4379 entry = uvm_map_entrybyaddr(&map->addr, start); 4380 if (entry->end > start) 4381 UVM_MAP_CLIP_START(map, entry, start); 4382 else 4383 entry = RBT_NEXT(uvm_map_addr, entry); 4384 4385 while (entry != NULL && entry->start < end) { 4386 UVM_MAP_CLIP_END(map, entry, end); 4387 entry->etype |= UVM_ET_SYSCALL; 4388 entry = RBT_NEXT(uvm_map_addr, entry); 4389 } 4390 4391 map->wserial++; 4392 map->flags |= VM_MAP_SYSCALL_ONCE; 4393 vm_map_unlock(map); 4394 return (0); 4395 } 4396 4397 /* 4398 * uvm_map_advice: set advice code for range of addrs in map. 4399 * 4400 * => map must be unlocked 4401 */ 4402 int 4403 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 4404 { 4405 struct vm_map_entry *entry; 4406 4407 switch (new_advice) { 4408 case MADV_NORMAL: 4409 case MADV_RANDOM: 4410 case MADV_SEQUENTIAL: 4411 break; 4412 default: 4413 return (EINVAL); 4414 } 4415 4416 if (start > end) 4417 return EINVAL; 4418 start = MAX(start, map->min_offset); 4419 end = MIN(end, map->max_offset); 4420 if (start >= end) 4421 return 0; 4422 4423 vm_map_lock(map); 4424 4425 entry = uvm_map_entrybyaddr(&map->addr, start); 4426 if (entry != NULL && entry->end > start) 4427 UVM_MAP_CLIP_START(map, entry, start); 4428 else if (entry!= NULL) 4429 entry = RBT_NEXT(uvm_map_addr, entry); 4430 4431 /* 4432 * XXXJRT: disallow holes? 4433 */ 4434 while (entry != NULL && entry->start < end) { 4435 UVM_MAP_CLIP_END(map, entry, end); 4436 entry->advice = new_advice; 4437 entry = RBT_NEXT(uvm_map_addr, entry); 4438 } 4439 4440 vm_map_unlock(map); 4441 return (0); 4442 } 4443 4444 /* 4445 * uvm_map_extract: extract a mapping from a map and put it somewhere 4446 * in the kernel_map, setting protection to max_prot. 4447 * 4448 * => map should be unlocked (we will write lock it and kernel_map) 4449 * => returns 0 on success, error code otherwise 4450 * => start must be page aligned 4451 * => len must be page sized 4452 * => flags: 4453 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 4454 * Mappings are QREF's. 4455 */ 4456 int 4457 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 4458 vaddr_t *dstaddrp, int flags) 4459 { 4460 struct uvm_map_deadq dead; 4461 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 4462 vaddr_t dstaddr; 4463 vaddr_t end; 4464 vaddr_t cp_start; 4465 vsize_t cp_len, cp_off; 4466 int error; 4467 4468 TAILQ_INIT(&dead); 4469 end = start + len; 4470 4471 /* 4472 * Sanity check on the parameters. 4473 * Also, since the mapping may not contain gaps, error out if the 4474 * mapped area is not in source map. 4475 */ 4476 if ((start & (vaddr_t)PAGE_MASK) != 0 || 4477 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 4478 return EINVAL; 4479 if (start < srcmap->min_offset || end > srcmap->max_offset) 4480 return EINVAL; 4481 4482 /* Initialize dead entries. Handle len == 0 case. */ 4483 if (len == 0) 4484 return 0; 4485 4486 /* Acquire lock on srcmap. */ 4487 vm_map_lock(srcmap); 4488 4489 /* Lock srcmap, lookup first and last entry in <start,len>. */ 4490 first = uvm_map_entrybyaddr(&srcmap->addr, start); 4491 4492 /* Check that the range is contiguous. */ 4493 for (entry = first; entry != NULL && entry->end < end; 4494 entry = RBT_NEXT(uvm_map_addr, entry)) { 4495 if (VMMAP_FREE_END(entry) != entry->end || 4496 UVM_ET_ISHOLE(entry)) { 4497 error = EINVAL; 4498 goto fail; 4499 } 4500 } 4501 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 4502 error = EINVAL; 4503 goto fail; 4504 } 4505 4506 /* 4507 * Handle need-copy flag. 4508 */ 4509 for (entry = first; entry != NULL && entry->start < end; 4510 entry = RBT_NEXT(uvm_map_addr, entry)) { 4511 if (UVM_ET_ISNEEDSCOPY(entry)) 4512 amap_copy(srcmap, entry, M_NOWAIT, 4513 UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end); 4514 if (UVM_ET_ISNEEDSCOPY(entry)) { 4515 /* 4516 * amap_copy failure 4517 */ 4518 error = ENOMEM; 4519 goto fail; 4520 } 4521 } 4522 4523 /* Lock destination map (kernel_map). */ 4524 vm_map_lock(kernel_map); 4525 4526 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 4527 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 4528 PROT_NONE, 0) != 0) { 4529 error = ENOMEM; 4530 goto fail2; 4531 } 4532 *dstaddrp = dstaddr; 4533 4534 /* 4535 * We now have srcmap and kernel_map locked. 4536 * dstaddr contains the destination offset in dstmap. 4537 */ 4538 /* step 1: start looping through map entries, performing extraction. */ 4539 for (entry = first; entry != NULL && entry->start < end; 4540 entry = RBT_NEXT(uvm_map_addr, entry)) { 4541 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 4542 if (UVM_ET_ISHOLE(entry)) 4543 continue; 4544 4545 /* Calculate uvm_mapent_clone parameters. */ 4546 cp_start = entry->start; 4547 if (cp_start < start) { 4548 cp_off = start - cp_start; 4549 cp_start = start; 4550 } else 4551 cp_off = 0; 4552 cp_len = MIN(entry->end, end) - cp_start; 4553 4554 newentry = uvm_mapent_clone(kernel_map, 4555 cp_start - start + dstaddr, cp_len, cp_off, 4556 entry->protection, entry->max_protection, 4557 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4558 if (newentry == NULL) { 4559 error = ENOMEM; 4560 goto fail2_unmap; 4561 } 4562 kernel_map->size += cp_len; 4563 if (flags & UVM_EXTRACT_FIXPROT) 4564 newentry->protection = newentry->max_protection; 4565 4566 /* 4567 * Step 2: perform pmap copy. 4568 * (Doing this in the loop saves one RB traversal.) 4569 */ 4570 pmap_copy(kernel_map->pmap, srcmap->pmap, 4571 cp_start - start + dstaddr, cp_len, cp_start); 4572 } 4573 pmap_update(kernel_map->pmap); 4574 4575 error = 0; 4576 4577 /* Unmap copied entries on failure. */ 4578 fail2_unmap: 4579 if (error) { 4580 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4581 FALSE, TRUE); 4582 } 4583 4584 /* Release maps, release dead entries. */ 4585 fail2: 4586 vm_map_unlock(kernel_map); 4587 4588 fail: 4589 vm_map_unlock(srcmap); 4590 4591 uvm_unmap_detach(&dead, 0); 4592 4593 return error; 4594 } 4595 4596 /* 4597 * uvm_map_clean: clean out a map range 4598 * 4599 * => valid flags: 4600 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4601 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4602 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4603 * if (flags & PGO_FREE): any cached pages are freed after clean 4604 * => returns an error if any part of the specified range isn't mapped 4605 * => never a need to flush amap layer since the anonymous memory has 4606 * no permanent home, but may deactivate pages there 4607 * => called from sys_msync() and sys_madvise() 4608 * => caller must not write-lock map (read OK). 4609 * => we may sleep while cleaning if SYNCIO [with map read-locked] 4610 */ 4611 4612 int 4613 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4614 { 4615 struct vm_map_entry *first, *entry; 4616 struct vm_amap *amap; 4617 struct vm_anon *anon; 4618 struct vm_page *pg; 4619 struct uvm_object *uobj; 4620 vaddr_t cp_start, cp_end; 4621 int refs; 4622 int error; 4623 boolean_t rv; 4624 4625 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4626 (PGO_FREE|PGO_DEACTIVATE)); 4627 4628 if (start > end || start < map->min_offset || end > map->max_offset) 4629 return EINVAL; 4630 4631 vm_map_lock_read(map); 4632 first = uvm_map_entrybyaddr(&map->addr, start); 4633 4634 /* Make a first pass to check for holes. */ 4635 for (entry = first; entry != NULL && entry->start < end; 4636 entry = RBT_NEXT(uvm_map_addr, entry)) { 4637 if (UVM_ET_ISSUBMAP(entry)) { 4638 vm_map_unlock_read(map); 4639 return EINVAL; 4640 } 4641 if (UVM_ET_ISSUBMAP(entry) || 4642 UVM_ET_ISHOLE(entry) || 4643 (entry->end < end && 4644 VMMAP_FREE_END(entry) != entry->end)) { 4645 vm_map_unlock_read(map); 4646 return EFAULT; 4647 } 4648 } 4649 4650 error = 0; 4651 for (entry = first; entry != NULL && entry->start < end; 4652 entry = RBT_NEXT(uvm_map_addr, entry)) { 4653 amap = entry->aref.ar_amap; /* top layer */ 4654 if (UVM_ET_ISOBJ(entry)) 4655 uobj = entry->object.uvm_obj; 4656 else 4657 uobj = NULL; 4658 4659 /* 4660 * No amap cleaning necessary if: 4661 * - there's no amap 4662 * - we're not deactivating or freeing pages. 4663 */ 4664 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4665 goto flush_object; 4666 4667 cp_start = MAX(entry->start, start); 4668 cp_end = MIN(entry->end, end); 4669 4670 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4671 anon = amap_lookup(&entry->aref, 4672 cp_start - entry->start); 4673 if (anon == NULL) 4674 continue; 4675 4676 pg = anon->an_page; 4677 if (pg == NULL) { 4678 continue; 4679 } 4680 KASSERT(pg->pg_flags & PQ_ANON); 4681 4682 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4683 /* 4684 * XXX In these first 3 cases, we always just 4685 * XXX deactivate the page. We may want to 4686 * XXX handle the different cases more 4687 * XXX specifically, in the future. 4688 */ 4689 case PGO_CLEANIT|PGO_FREE: 4690 case PGO_CLEANIT|PGO_DEACTIVATE: 4691 case PGO_DEACTIVATE: 4692 deactivate_it: 4693 /* skip the page if it's wired */ 4694 if (pg->wire_count != 0) 4695 break; 4696 4697 uvm_lock_pageq(); 4698 4699 KASSERT(pg->uanon == anon); 4700 4701 /* zap all mappings for the page. */ 4702 pmap_page_protect(pg, PROT_NONE); 4703 4704 /* ...and deactivate the page. */ 4705 uvm_pagedeactivate(pg); 4706 4707 uvm_unlock_pageq(); 4708 break; 4709 case PGO_FREE: 4710 /* 4711 * If there are multiple references to 4712 * the amap, just deactivate the page. 4713 */ 4714 if (amap_refs(amap) > 1) 4715 goto deactivate_it; 4716 4717 /* XXX skip the page if it's wired */ 4718 if (pg->wire_count != 0) { 4719 break; 4720 } 4721 amap_unadd(&entry->aref, 4722 cp_start - entry->start); 4723 refs = --anon->an_ref; 4724 if (refs == 0) 4725 uvm_anfree(anon); 4726 break; 4727 default: 4728 panic("uvm_map_clean: weird flags"); 4729 } 4730 } 4731 4732 flush_object: 4733 cp_start = MAX(entry->start, start); 4734 cp_end = MIN(entry->end, end); 4735 4736 /* 4737 * flush pages if we've got a valid backing object. 4738 * 4739 * Don't PGO_FREE if we don't have write permission 4740 * and don't flush if this is a copy-on-write object 4741 * since we can't know our permissions on it. 4742 */ 4743 if (uobj != NULL && 4744 ((flags & PGO_FREE) == 0 || 4745 ((entry->max_protection & PROT_WRITE) != 0 && 4746 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4747 rv = uobj->pgops->pgo_flush(uobj, 4748 cp_start - entry->start + entry->offset, 4749 cp_end - entry->start + entry->offset, flags); 4750 4751 if (rv == FALSE) 4752 error = EFAULT; 4753 } 4754 } 4755 4756 vm_map_unlock_read(map); 4757 return error; 4758 } 4759 4760 /* 4761 * UVM_MAP_CLIP_END implementation 4762 */ 4763 void 4764 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4765 { 4766 struct vm_map_entry *tmp; 4767 4768 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4769 tmp = uvm_mapent_alloc(map, 0); 4770 4771 /* Invoke splitentry. */ 4772 uvm_map_splitentry(map, entry, tmp, addr); 4773 } 4774 4775 /* 4776 * UVM_MAP_CLIP_START implementation 4777 * 4778 * Clippers are required to not change the pointers to the entry they are 4779 * clipping on. 4780 * Since uvm_map_splitentry turns the original entry into the lowest 4781 * entry (address wise) we do a swap between the new entry and the original 4782 * entry, prior to calling uvm_map_splitentry. 4783 */ 4784 void 4785 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4786 { 4787 struct vm_map_entry *tmp; 4788 struct uvm_addr_state *free; 4789 4790 /* Unlink original. */ 4791 free = uvm_map_uaddr_e(map, entry); 4792 uvm_mapent_free_remove(map, free, entry); 4793 uvm_mapent_addr_remove(map, entry); 4794 4795 /* Copy entry. */ 4796 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4797 tmp = uvm_mapent_alloc(map, 0); 4798 uvm_mapent_copy(entry, tmp); 4799 4800 /* Put new entry in place of original entry. */ 4801 uvm_mapent_addr_insert(map, tmp); 4802 uvm_mapent_free_insert(map, free, tmp); 4803 4804 /* Invoke splitentry. */ 4805 uvm_map_splitentry(map, tmp, entry, addr); 4806 } 4807 4808 /* 4809 * Boundary fixer. 4810 */ 4811 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4812 static __inline vaddr_t 4813 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4814 { 4815 return (min < bound && max > bound) ? bound : max; 4816 } 4817 4818 /* 4819 * Choose free list based on address at start of free space. 4820 * 4821 * The uvm_addr_state returned contains addr and is the first of: 4822 * - uaddr_exe 4823 * - uaddr_brk_stack 4824 * - uaddr_any 4825 */ 4826 struct uvm_addr_state* 4827 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4828 { 4829 struct uvm_addr_state *uaddr; 4830 int i; 4831 4832 /* Special case the first page, to prevent mmap from returning 0. */ 4833 if (addr < VMMAP_MIN_ADDR) 4834 return NULL; 4835 4836 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4837 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4838 if (addr >= uvm_maxkaddr) 4839 return NULL; 4840 } 4841 4842 /* Is the address inside the exe-only map? */ 4843 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4844 addr < map->uaddr_exe->uaddr_maxaddr) 4845 return map->uaddr_exe; 4846 4847 /* Check if the space falls inside brk/stack area. */ 4848 if ((addr >= map->b_start && addr < map->b_end) || 4849 (addr >= map->s_start && addr < map->s_end)) { 4850 if (map->uaddr_brk_stack != NULL && 4851 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4852 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4853 return map->uaddr_brk_stack; 4854 } else 4855 return NULL; 4856 } 4857 4858 /* 4859 * Check the other selectors. 4860 * 4861 * These selectors are only marked as the owner, if they have insert 4862 * functions. 4863 */ 4864 for (i = 0; i < nitems(map->uaddr_any); i++) { 4865 uaddr = map->uaddr_any[i]; 4866 if (uaddr == NULL) 4867 continue; 4868 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4869 continue; 4870 4871 if (addr >= uaddr->uaddr_minaddr && 4872 addr < uaddr->uaddr_maxaddr) 4873 return uaddr; 4874 } 4875 4876 return NULL; 4877 } 4878 4879 /* 4880 * Choose free list based on address at start of free space. 4881 * 4882 * The uvm_addr_state returned contains addr and is the first of: 4883 * - uaddr_exe 4884 * - uaddr_brk_stack 4885 * - uaddr_any 4886 */ 4887 struct uvm_addr_state* 4888 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4889 { 4890 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4891 } 4892 4893 /* 4894 * Returns the first free-memory boundary that is crossed by [min-max]. 4895 */ 4896 vsize_t 4897 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4898 { 4899 struct uvm_addr_state *uaddr; 4900 int i; 4901 4902 /* Never return first page. */ 4903 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4904 4905 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4906 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4907 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4908 4909 /* Check for exe-only boundaries. */ 4910 if (map->uaddr_exe != NULL) { 4911 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4912 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4913 } 4914 4915 /* Check for exe-only boundaries. */ 4916 if (map->uaddr_brk_stack != NULL) { 4917 max = uvm_map_boundfix(min, max, 4918 map->uaddr_brk_stack->uaddr_minaddr); 4919 max = uvm_map_boundfix(min, max, 4920 map->uaddr_brk_stack->uaddr_maxaddr); 4921 } 4922 4923 /* Check other boundaries. */ 4924 for (i = 0; i < nitems(map->uaddr_any); i++) { 4925 uaddr = map->uaddr_any[i]; 4926 if (uaddr != NULL) { 4927 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4928 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4929 } 4930 } 4931 4932 /* Boundaries at stack and brk() area. */ 4933 max = uvm_map_boundfix(min, max, map->s_start); 4934 max = uvm_map_boundfix(min, max, map->s_end); 4935 max = uvm_map_boundfix(min, max, map->b_start); 4936 max = uvm_map_boundfix(min, max, map->b_end); 4937 4938 return max; 4939 } 4940 4941 /* 4942 * Update map allocation start and end addresses from proc vmspace. 4943 */ 4944 void 4945 uvm_map_vmspace_update(struct vm_map *map, 4946 struct uvm_map_deadq *dead, int flags) 4947 { 4948 struct vmspace *vm; 4949 vaddr_t b_start, b_end, s_start, s_end; 4950 4951 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4952 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4953 4954 /* 4955 * Derive actual allocation boundaries from vmspace. 4956 */ 4957 vm = (struct vmspace *)map; 4958 b_start = (vaddr_t)vm->vm_daddr; 4959 b_end = b_start + BRKSIZ; 4960 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4961 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4962 #ifdef DIAGNOSTIC 4963 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4964 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4965 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4966 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4967 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4968 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4969 vm, b_start, b_end, s_start, s_end); 4970 } 4971 #endif 4972 4973 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4974 map->s_start == s_start && map->s_end == s_end)) 4975 return; 4976 4977 uvm_map_freelist_update(map, dead, b_start, b_end, 4978 s_start, s_end, flags); 4979 } 4980 4981 /* 4982 * Grow kernel memory. 4983 * 4984 * This function is only called for kernel maps when an allocation fails. 4985 * 4986 * If the map has a gap that is large enough to accommodate alloc_sz, this 4987 * function will make sure map->free will include it. 4988 */ 4989 void 4990 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 4991 vsize_t alloc_sz, int flags) 4992 { 4993 vsize_t sz; 4994 vaddr_t end; 4995 struct vm_map_entry *entry; 4996 4997 /* Kernel memory only. */ 4998 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 4999 /* Destroy free list. */ 5000 uvm_map_freelist_update_clear(map, dead); 5001 5002 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 5003 if (map->flags & VM_MAP_GUARDPAGES) 5004 alloc_sz += PAGE_SIZE; 5005 5006 /* 5007 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 5008 * 5009 * Don't handle the case where the multiplication overflows: 5010 * if that happens, the allocation is probably too big anyway. 5011 */ 5012 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 5013 5014 /* 5015 * Walk forward until a gap large enough for alloc_sz shows up. 5016 * 5017 * We assume the kernel map has no boundaries. 5018 * uvm_maxkaddr may be zero. 5019 */ 5020 end = MAX(uvm_maxkaddr, map->min_offset); 5021 entry = uvm_map_entrybyaddr(&map->addr, end); 5022 while (entry && entry->fspace < alloc_sz) 5023 entry = RBT_NEXT(uvm_map_addr, entry); 5024 if (entry) { 5025 end = MAX(VMMAP_FREE_START(entry), end); 5026 end += MIN(sz, map->max_offset - end); 5027 } else 5028 end = map->max_offset; 5029 5030 /* Reserve pmap entries. */ 5031 #ifdef PMAP_GROWKERNEL 5032 uvm_maxkaddr = pmap_growkernel(end); 5033 #else 5034 uvm_maxkaddr = MAX(uvm_maxkaddr, end); 5035 #endif 5036 5037 /* Rebuild free list. */ 5038 uvm_map_freelist_update_refill(map, flags); 5039 } 5040 5041 /* 5042 * Freelist update subfunction: unlink all entries from freelists. 5043 */ 5044 void 5045 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 5046 { 5047 struct uvm_addr_state *free; 5048 struct vm_map_entry *entry, *prev, *next; 5049 5050 prev = NULL; 5051 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL; 5052 entry = next) { 5053 next = RBT_NEXT(uvm_map_addr, entry); 5054 5055 free = uvm_map_uaddr_e(map, entry); 5056 uvm_mapent_free_remove(map, free, entry); 5057 5058 if (prev != NULL && entry->start == entry->end) { 5059 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 5060 uvm_mapent_addr_remove(map, entry); 5061 DEAD_ENTRY_PUSH(dead, entry); 5062 } else 5063 prev = entry; 5064 } 5065 } 5066 5067 /* 5068 * Freelist update subfunction: refill the freelists with entries. 5069 */ 5070 void 5071 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 5072 { 5073 struct vm_map_entry *entry; 5074 vaddr_t min, max; 5075 5076 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5077 min = VMMAP_FREE_START(entry); 5078 max = VMMAP_FREE_END(entry); 5079 entry->fspace = 0; 5080 5081 entry = uvm_map_fix_space(map, entry, min, max, flags); 5082 } 5083 5084 uvm_tree_sanity(map, __FILE__, __LINE__); 5085 } 5086 5087 /* 5088 * Change {a,b}_{start,end} allocation ranges and associated free lists. 5089 */ 5090 void 5091 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 5092 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 5093 { 5094 KDASSERT(b_end >= b_start && s_end >= s_start); 5095 5096 /* Clear all free lists. */ 5097 uvm_map_freelist_update_clear(map, dead); 5098 5099 /* Apply new bounds. */ 5100 map->b_start = b_start; 5101 map->b_end = b_end; 5102 map->s_start = s_start; 5103 map->s_end = s_end; 5104 5105 /* Refill free lists. */ 5106 uvm_map_freelist_update_refill(map, flags); 5107 } 5108 5109 /* 5110 * Assign a uvm_addr_state to the specified pointer in vm_map. 5111 * 5112 * May sleep. 5113 */ 5114 void 5115 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 5116 struct uvm_addr_state *newval) 5117 { 5118 struct uvm_map_deadq dead; 5119 5120 /* Pointer which must be in this map. */ 5121 KASSERT(which != NULL); 5122 KASSERT((void*)map <= (void*)(which) && 5123 (void*)(which) < (void*)(map + 1)); 5124 5125 vm_map_lock(map); 5126 TAILQ_INIT(&dead); 5127 uvm_map_freelist_update_clear(map, &dead); 5128 5129 uvm_addr_destroy(*which); 5130 *which = newval; 5131 5132 uvm_map_freelist_update_refill(map, 0); 5133 vm_map_unlock(map); 5134 uvm_unmap_detach(&dead, 0); 5135 } 5136 5137 /* 5138 * Correct space insert. 5139 * 5140 * Entry must not be on any freelist. 5141 */ 5142 struct vm_map_entry* 5143 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 5144 vaddr_t min, vaddr_t max, int flags) 5145 { 5146 struct uvm_addr_state *free, *entfree; 5147 vaddr_t lmax; 5148 5149 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 5150 KDASSERT(min <= max); 5151 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 5152 min == map->min_offset); 5153 5154 /* 5155 * During the function, entfree will always point at the uaddr state 5156 * for entry. 5157 */ 5158 entfree = (entry == NULL ? NULL : 5159 uvm_map_uaddr_e(map, entry)); 5160 5161 while (min != max) { 5162 /* Claim guard page for entry. */ 5163 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 5164 VMMAP_FREE_END(entry) == entry->end && 5165 entry->start != entry->end) { 5166 if (max - min == 2 * PAGE_SIZE) { 5167 /* 5168 * If the free-space gap is exactly 2 pages, 5169 * we make the guard 2 pages instead of 1. 5170 * Because in a guarded map, an area needs 5171 * at least 2 pages to allocate from: 5172 * one page for the allocation and one for 5173 * the guard. 5174 */ 5175 entry->guard = 2 * PAGE_SIZE; 5176 min = max; 5177 } else { 5178 entry->guard = PAGE_SIZE; 5179 min += PAGE_SIZE; 5180 } 5181 continue; 5182 } 5183 5184 /* 5185 * Handle the case where entry has a 2-page guard, but the 5186 * space after entry is freed. 5187 */ 5188 if (entry != NULL && entry->fspace == 0 && 5189 entry->guard > PAGE_SIZE) { 5190 entry->guard = PAGE_SIZE; 5191 min = VMMAP_FREE_START(entry); 5192 } 5193 5194 lmax = uvm_map_boundary(map, min, max); 5195 free = uvm_map_uaddr(map, min); 5196 5197 /* 5198 * Entries are merged if they point at the same uvm_free(). 5199 * Exception to that rule: if min == uvm_maxkaddr, a new 5200 * entry is started regardless (otherwise the allocators 5201 * will get confused). 5202 */ 5203 if (entry != NULL && free == entfree && 5204 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 5205 min == uvm_maxkaddr)) { 5206 KDASSERT(VMMAP_FREE_END(entry) == min); 5207 entry->fspace += lmax - min; 5208 } else { 5209 /* 5210 * Commit entry to free list: it'll not be added to 5211 * anymore. 5212 * We'll start a new entry and add to that entry 5213 * instead. 5214 */ 5215 if (entry != NULL) 5216 uvm_mapent_free_insert(map, entfree, entry); 5217 5218 /* New entry for new uaddr. */ 5219 entry = uvm_mapent_alloc(map, flags); 5220 KDASSERT(entry != NULL); 5221 entry->end = entry->start = min; 5222 entry->guard = 0; 5223 entry->fspace = lmax - min; 5224 entry->object.uvm_obj = NULL; 5225 entry->offset = 0; 5226 entry->etype = 0; 5227 entry->protection = entry->max_protection = 0; 5228 entry->inheritance = 0; 5229 entry->wired_count = 0; 5230 entry->advice = 0; 5231 entry->aref.ar_pageoff = 0; 5232 entry->aref.ar_amap = NULL; 5233 uvm_mapent_addr_insert(map, entry); 5234 5235 entfree = free; 5236 } 5237 5238 min = lmax; 5239 } 5240 /* Finally put entry on the uaddr state. */ 5241 if (entry != NULL) 5242 uvm_mapent_free_insert(map, entfree, entry); 5243 5244 return entry; 5245 } 5246 5247 /* 5248 * MQuery style of allocation. 5249 * 5250 * This allocator searches forward until sufficient space is found to map 5251 * the given size. 5252 * 5253 * XXX: factor in offset (via pmap_prefer) and protection? 5254 */ 5255 int 5256 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 5257 int flags) 5258 { 5259 struct vm_map_entry *entry, *last; 5260 vaddr_t addr; 5261 vaddr_t tmp, pmap_align, pmap_offset; 5262 int error; 5263 5264 addr = *addr_p; 5265 vm_map_lock_read(map); 5266 5267 /* Configure pmap prefer. */ 5268 if (offset != UVM_UNKNOWN_OFFSET) { 5269 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 5270 pmap_offset = PMAP_PREFER_OFFSET(offset); 5271 } else { 5272 pmap_align = PAGE_SIZE; 5273 pmap_offset = 0; 5274 } 5275 5276 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 5277 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 5278 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5279 if (tmp < addr) 5280 tmp += pmap_align; 5281 addr = tmp; 5282 } 5283 5284 /* First, check if the requested range is fully available. */ 5285 entry = uvm_map_entrybyaddr(&map->addr, addr); 5286 last = NULL; 5287 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5288 error = 0; 5289 goto out; 5290 } 5291 if (flags & UVM_FLAG_FIXED) { 5292 error = EINVAL; 5293 goto out; 5294 } 5295 5296 error = ENOMEM; /* Default error from here. */ 5297 5298 /* 5299 * At this point, the memory at <addr, sz> is not available. 5300 * The reasons are: 5301 * [1] it's outside the map, 5302 * [2] it starts in used memory (and therefore needs to move 5303 * toward the first free page in entry), 5304 * [3] it starts in free memory but bumps into used memory. 5305 * 5306 * Note that for case [2], the forward moving is handled by the 5307 * for loop below. 5308 */ 5309 if (entry == NULL) { 5310 /* [1] Outside the map. */ 5311 if (addr >= map->max_offset) 5312 goto out; 5313 else 5314 entry = RBT_MIN(uvm_map_addr, &map->addr); 5315 } else if (VMMAP_FREE_START(entry) <= addr) { 5316 /* [3] Bumped into used memory. */ 5317 entry = RBT_NEXT(uvm_map_addr, entry); 5318 } 5319 5320 /* Test if the next entry is sufficient for the allocation. */ 5321 for (; entry != NULL; 5322 entry = RBT_NEXT(uvm_map_addr, entry)) { 5323 if (entry->fspace == 0) 5324 continue; 5325 addr = VMMAP_FREE_START(entry); 5326 5327 restart: /* Restart address checks on address change. */ 5328 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5329 if (tmp < addr) 5330 tmp += pmap_align; 5331 addr = tmp; 5332 if (addr >= VMMAP_FREE_END(entry)) 5333 continue; 5334 5335 /* Skip brk() allocation addresses. */ 5336 if (addr + sz > map->b_start && addr < map->b_end) { 5337 if (VMMAP_FREE_END(entry) > map->b_end) { 5338 addr = map->b_end; 5339 goto restart; 5340 } else 5341 continue; 5342 } 5343 /* Skip stack allocation addresses. */ 5344 if (addr + sz > map->s_start && addr < map->s_end) { 5345 if (VMMAP_FREE_END(entry) > map->s_end) { 5346 addr = map->s_end; 5347 goto restart; 5348 } else 5349 continue; 5350 } 5351 5352 last = NULL; 5353 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5354 error = 0; 5355 goto out; 5356 } 5357 } 5358 5359 out: 5360 vm_map_unlock_read(map); 5361 if (error == 0) 5362 *addr_p = addr; 5363 return error; 5364 } 5365 5366 /* 5367 * Determine allocation bias. 5368 * 5369 * Returns 1 if we should bias to high addresses, -1 for a bias towards low 5370 * addresses, or 0 for no bias. 5371 * The bias mechanism is intended to avoid clashing with brk() and stack 5372 * areas. 5373 */ 5374 int 5375 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry) 5376 { 5377 vaddr_t start, end; 5378 5379 start = VMMAP_FREE_START(entry); 5380 end = VMMAP_FREE_END(entry); 5381 5382 /* Stay at the top of brk() area. */ 5383 if (end >= map->b_start && start < map->b_end) 5384 return 1; 5385 /* Stay at the far end of the stack area. */ 5386 if (end >= map->s_start && start < map->s_end) { 5387 #ifdef MACHINE_STACK_GROWS_UP 5388 return 1; 5389 #else 5390 return -1; 5391 #endif 5392 } 5393 5394 /* No bias, this area is meant for us. */ 5395 return 0; 5396 } 5397 5398 5399 boolean_t 5400 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 5401 { 5402 boolean_t rv; 5403 5404 if (map->flags & VM_MAP_INTRSAFE) { 5405 rv = mtx_enter_try(&map->mtx); 5406 } else { 5407 mtx_enter(&map->flags_lock); 5408 if (map->flags & VM_MAP_BUSY) { 5409 mtx_leave(&map->flags_lock); 5410 return (FALSE); 5411 } 5412 mtx_leave(&map->flags_lock); 5413 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); 5414 /* check if the lock is busy and back out if we won the race */ 5415 if (rv) { 5416 mtx_enter(&map->flags_lock); 5417 if (map->flags & VM_MAP_BUSY) { 5418 rw_exit(&map->lock); 5419 rv = FALSE; 5420 } 5421 mtx_leave(&map->flags_lock); 5422 } 5423 } 5424 5425 if (rv) { 5426 map->timestamp++; 5427 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5428 uvm_tree_sanity(map, file, line); 5429 uvm_tree_size_chk(map, file, line); 5430 } 5431 5432 return (rv); 5433 } 5434 5435 void 5436 vm_map_lock_ln(struct vm_map *map, char *file, int line) 5437 { 5438 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5439 do { 5440 mtx_enter(&map->flags_lock); 5441 tryagain: 5442 while (map->flags & VM_MAP_BUSY) { 5443 map->flags |= VM_MAP_WANTLOCK; 5444 msleep_nsec(&map->flags, &map->flags_lock, 5445 PVM, vmmapbsy, INFSLP); 5446 } 5447 mtx_leave(&map->flags_lock); 5448 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); 5449 /* check if the lock is busy and back out if we won the race */ 5450 mtx_enter(&map->flags_lock); 5451 if (map->flags & VM_MAP_BUSY) { 5452 rw_exit(&map->lock); 5453 goto tryagain; 5454 } 5455 mtx_leave(&map->flags_lock); 5456 } else { 5457 mtx_enter(&map->mtx); 5458 } 5459 5460 map->timestamp++; 5461 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5462 uvm_tree_sanity(map, file, line); 5463 uvm_tree_size_chk(map, file, line); 5464 } 5465 5466 void 5467 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 5468 { 5469 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5470 rw_enter_read(&map->lock); 5471 else 5472 mtx_enter(&map->mtx); 5473 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5474 uvm_tree_sanity(map, file, line); 5475 uvm_tree_size_chk(map, file, line); 5476 } 5477 5478 void 5479 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 5480 { 5481 uvm_tree_sanity(map, file, line); 5482 uvm_tree_size_chk(map, file, line); 5483 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5484 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5485 rw_exit(&map->lock); 5486 else 5487 mtx_leave(&map->mtx); 5488 } 5489 5490 void 5491 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 5492 { 5493 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5494 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5495 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5496 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5497 rw_exit_read(&map->lock); 5498 else 5499 mtx_leave(&map->mtx); 5500 } 5501 5502 void 5503 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 5504 { 5505 uvm_tree_sanity(map, file, line); 5506 uvm_tree_size_chk(map, file, line); 5507 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5508 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5509 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5510 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5511 rw_enter(&map->lock, RW_DOWNGRADE); 5512 } 5513 5514 void 5515 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 5516 { 5517 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5518 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5519 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5520 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5521 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5522 rw_exit_read(&map->lock); 5523 rw_enter_write(&map->lock); 5524 } 5525 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5526 uvm_tree_sanity(map, file, line); 5527 } 5528 5529 void 5530 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5531 { 5532 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5533 mtx_enter(&map->flags_lock); 5534 map->flags |= VM_MAP_BUSY; 5535 mtx_leave(&map->flags_lock); 5536 } 5537 5538 void 5539 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5540 { 5541 int oflags; 5542 5543 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5544 mtx_enter(&map->flags_lock); 5545 oflags = map->flags; 5546 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5547 mtx_leave(&map->flags_lock); 5548 if (oflags & VM_MAP_WANTLOCK) 5549 wakeup(&map->flags); 5550 } 5551 5552 #ifndef SMALL_KERNEL 5553 int 5554 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, 5555 size_t *lenp) 5556 { 5557 struct vm_map_entry *entry; 5558 vaddr_t start; 5559 int cnt, maxcnt, error = 0; 5560 5561 KASSERT(*lenp > 0); 5562 KASSERT((*lenp % sizeof(*kve)) == 0); 5563 cnt = 0; 5564 maxcnt = *lenp / sizeof(*kve); 5565 KASSERT(maxcnt > 0); 5566 5567 /* 5568 * Return only entries whose address is above the given base 5569 * address. This allows userland to iterate without knowing the 5570 * number of entries beforehand. 5571 */ 5572 start = (vaddr_t)kve[0].kve_start; 5573 5574 vm_map_lock(map); 5575 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5576 if (cnt == maxcnt) { 5577 error = ENOMEM; 5578 break; 5579 } 5580 if (start != 0 && entry->start < start) 5581 continue; 5582 kve->kve_start = entry->start; 5583 kve->kve_end = entry->end; 5584 kve->kve_guard = entry->guard; 5585 kve->kve_fspace = entry->fspace; 5586 kve->kve_fspace_augment = entry->fspace_augment; 5587 kve->kve_offset = entry->offset; 5588 kve->kve_wired_count = entry->wired_count; 5589 kve->kve_etype = entry->etype; 5590 kve->kve_protection = entry->protection; 5591 kve->kve_max_protection = entry->max_protection; 5592 kve->kve_advice = entry->advice; 5593 kve->kve_inheritance = entry->inheritance; 5594 kve->kve_flags = entry->flags; 5595 kve++; 5596 cnt++; 5597 } 5598 vm_map_unlock(map); 5599 5600 KASSERT(cnt <= maxcnt); 5601 5602 *lenp = sizeof(*kve) * cnt; 5603 return error; 5604 } 5605 #endif 5606 5607 5608 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5609 uvm_mapentry_addrcmp, uvm_map_addr_augment); 5610 5611 5612 /* 5613 * MD code: vmspace allocator setup. 5614 */ 5615 5616 #ifdef __i386__ 5617 void 5618 uvm_map_setup_md(struct vm_map *map) 5619 { 5620 vaddr_t min, max; 5621 5622 min = map->min_offset; 5623 max = map->max_offset; 5624 5625 /* 5626 * Ensure the selectors will not try to manage page 0; 5627 * it's too special. 5628 */ 5629 if (min < VMMAP_MIN_ADDR) 5630 min = VMMAP_MIN_ADDR; 5631 5632 #if 0 /* Cool stuff, not yet */ 5633 /* Executable code is special. */ 5634 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5635 /* Place normal allocations beyond executable mappings. */ 5636 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5637 #else /* Crappy stuff, for now */ 5638 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5639 #endif 5640 5641 #ifndef SMALL_KERNEL 5642 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5643 #endif /* !SMALL_KERNEL */ 5644 } 5645 #elif __LP64__ 5646 void 5647 uvm_map_setup_md(struct vm_map *map) 5648 { 5649 vaddr_t min, max; 5650 5651 min = map->min_offset; 5652 max = map->max_offset; 5653 5654 /* 5655 * Ensure the selectors will not try to manage page 0; 5656 * it's too special. 5657 */ 5658 if (min < VMMAP_MIN_ADDR) 5659 min = VMMAP_MIN_ADDR; 5660 5661 #if 0 /* Cool stuff, not yet */ 5662 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5663 #else /* Crappy stuff, for now */ 5664 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5665 #endif 5666 5667 #ifndef SMALL_KERNEL 5668 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5669 #endif /* !SMALL_KERNEL */ 5670 } 5671 #else /* non-i386, 32 bit */ 5672 void 5673 uvm_map_setup_md(struct vm_map *map) 5674 { 5675 vaddr_t min, max; 5676 5677 min = map->min_offset; 5678 max = map->max_offset; 5679 5680 /* 5681 * Ensure the selectors will not try to manage page 0; 5682 * it's too special. 5683 */ 5684 if (min < VMMAP_MIN_ADDR) 5685 min = VMMAP_MIN_ADDR; 5686 5687 #if 0 /* Cool stuff, not yet */ 5688 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5689 #else /* Crappy stuff, for now */ 5690 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5691 #endif 5692 5693 #ifndef SMALL_KERNEL 5694 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5695 #endif /* !SMALL_KERNEL */ 5696 } 5697 #endif 5698