1 /* $OpenBSD: uvm_map.c,v 1.269 2020/10/19 08:19:46 mpi Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/acct.h> 90 #include <sys/mman.h> 91 #include <sys/proc.h> 92 #include <sys/malloc.h> 93 #include <sys/pool.h> 94 #include <sys/sysctl.h> 95 #include <sys/signalvar.h> 96 #include <sys/syslog.h> 97 #include <sys/user.h> 98 #include <sys/tracepoint.h> 99 100 #ifdef SYSVSHM 101 #include <sys/shm.h> 102 #endif 103 104 #include <uvm/uvm.h> 105 106 #ifdef DDB 107 #include <uvm/uvm_ddb.h> 108 #endif 109 110 #include <uvm/uvm_addr.h> 111 112 113 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 114 int uvm_mapent_isjoinable(struct vm_map*, 115 struct vm_map_entry*, struct vm_map_entry*); 116 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 117 struct vm_map_entry*, struct uvm_map_deadq*); 118 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 119 struct vm_map_entry*, struct uvm_map_deadq*); 120 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 121 struct vm_map_entry*, vaddr_t, vsize_t, int, 122 struct uvm_map_deadq*, struct vm_map_entry*); 123 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 124 void uvm_mapent_free(struct vm_map_entry*); 125 void uvm_unmap_kill_entry(struct vm_map*, 126 struct vm_map_entry*); 127 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); 128 void uvm_mapent_mkfree(struct vm_map*, 129 struct vm_map_entry*, struct vm_map_entry**, 130 struct uvm_map_deadq*, boolean_t); 131 void uvm_map_pageable_pgon(struct vm_map*, 132 struct vm_map_entry*, struct vm_map_entry*, 133 vaddr_t, vaddr_t); 134 int uvm_map_pageable_wire(struct vm_map*, 135 struct vm_map_entry*, struct vm_map_entry*, 136 vaddr_t, vaddr_t, int); 137 void uvm_map_setup_entries(struct vm_map*); 138 void uvm_map_setup_md(struct vm_map*); 139 void uvm_map_teardown(struct vm_map*); 140 void uvm_map_vmspace_update(struct vm_map*, 141 struct uvm_map_deadq*, int); 142 void uvm_map_kmem_grow(struct vm_map*, 143 struct uvm_map_deadq*, vsize_t, int); 144 void uvm_map_freelist_update_clear(struct vm_map*, 145 struct uvm_map_deadq*); 146 void uvm_map_freelist_update_refill(struct vm_map *, int); 147 void uvm_map_freelist_update(struct vm_map*, 148 struct uvm_map_deadq*, vaddr_t, vaddr_t, 149 vaddr_t, vaddr_t, int); 150 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 151 vaddr_t, vaddr_t, int); 152 int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, 153 struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, 154 int); 155 int uvm_map_findspace(struct vm_map*, 156 struct vm_map_entry**, struct vm_map_entry**, 157 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 158 vaddr_t); 159 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 160 void uvm_map_addr_augment(struct vm_map_entry*); 161 162 int uvm_map_inentry_recheck(u_long, vaddr_t, 163 struct p_inentry *); 164 boolean_t uvm_map_inentry_fix(struct proc *, struct p_inentry *, 165 vaddr_t, int (*)(vm_map_entry_t), u_long); 166 /* 167 * Tree management functions. 168 */ 169 170 static inline void uvm_mapent_copy(struct vm_map_entry*, 171 struct vm_map_entry*); 172 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*, 173 const struct vm_map_entry*); 174 void uvm_mapent_free_insert(struct vm_map*, 175 struct uvm_addr_state*, struct vm_map_entry*); 176 void uvm_mapent_free_remove(struct vm_map*, 177 struct uvm_addr_state*, struct vm_map_entry*); 178 void uvm_mapent_addr_insert(struct vm_map*, 179 struct vm_map_entry*); 180 void uvm_mapent_addr_remove(struct vm_map*, 181 struct vm_map_entry*); 182 void uvm_map_splitentry(struct vm_map*, 183 struct vm_map_entry*, struct vm_map_entry*, 184 vaddr_t); 185 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 186 int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*); 187 188 /* 189 * uvm_vmspace_fork helper functions. 190 */ 191 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 192 vsize_t, vm_prot_t, vm_prot_t, 193 struct vm_map_entry*, struct uvm_map_deadq*, int, 194 int); 195 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t, 196 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*, 197 struct vm_map_entry*, struct uvm_map_deadq*); 198 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 199 struct vm_map*, struct vm_map_entry*, 200 struct uvm_map_deadq*); 201 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 202 struct vm_map*, struct vm_map_entry*, 203 struct uvm_map_deadq*); 204 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 205 struct vm_map*, struct vm_map_entry*, 206 struct uvm_map_deadq*); 207 208 /* 209 * Tree validation. 210 */ 211 #ifdef VMMAP_DEBUG 212 void uvm_tree_assert(struct vm_map*, int, char*, 213 char*, int); 214 #define UVM_ASSERT(map, cond, file, line) \ 215 uvm_tree_assert((map), (cond), #cond, (file), (line)) 216 void uvm_tree_sanity(struct vm_map*, char*, int); 217 void uvm_tree_size_chk(struct vm_map*, char*, int); 218 void vmspace_validate(struct vm_map*); 219 #else 220 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 221 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 222 #define vmspace_validate(_map) do {} while (0) 223 #endif 224 225 /* 226 * All architectures will have pmap_prefer. 227 */ 228 #ifndef PMAP_PREFER 229 #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE 230 #define PMAP_PREFER_OFFSET(off) 0 231 #define PMAP_PREFER(addr, off) (addr) 232 #endif 233 234 /* 235 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 236 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 237 * 238 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 239 * each time. 240 */ 241 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 242 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 243 #define VM_MAP_KSIZE_ALLOCMUL 4 244 /* 245 * When selecting a random free-space block, look at most FSPACE_DELTA blocks 246 * ahead. 247 */ 248 #define FSPACE_DELTA 8 249 /* 250 * Put allocations adjecent to previous allocations when the free-space tree 251 * is larger than FSPACE_COMPACT entries. 252 * 253 * Alignment and PMAP_PREFER may still cause the entry to not be fully 254 * adjecent. Note that this strategy reduces memory fragmentation (by leaving 255 * a large space before or after the allocation). 256 */ 257 #define FSPACE_COMPACT 128 258 /* 259 * Make the address selection skip at most this many bytes from the start of 260 * the free space in which the allocation takes place. 261 * 262 * The main idea behind a randomized address space is that an attacker cannot 263 * know where to target his attack. Therefore, the location of objects must be 264 * as random as possible. However, the goal is not to create the most sparse 265 * map that is possible. 266 * FSPACE_MAXOFF pushes the considered range in bytes down to less insane 267 * sizes, thereby reducing the sparseness. The biggest randomization comes 268 * from fragmentation, i.e. FSPACE_COMPACT. 269 */ 270 #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) 271 /* 272 * Allow for small gaps in the overflow areas. 273 * Gap size is in bytes and does not have to be a multiple of page-size. 274 */ 275 #define FSPACE_BIASGAP ((vaddr_t)32 * 1024) 276 277 /* auto-allocate address lower bound */ 278 #define VMMAP_MIN_ADDR PAGE_SIZE 279 280 281 #ifdef DEADBEEF0 282 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0) 283 #else 284 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0) 285 #endif 286 287 #ifdef DEBUG 288 int uvm_map_printlocks = 0; 289 290 #define LPRINTF(_args) \ 291 do { \ 292 if (uvm_map_printlocks) \ 293 printf _args; \ 294 } while (0) 295 #else 296 #define LPRINTF(_args) do {} while (0) 297 #endif 298 299 static struct mutex uvm_kmapent_mtx; 300 static struct timeval uvm_kmapent_last_warn_time; 301 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 302 303 const char vmmapbsy[] = "vmmapbsy"; 304 305 /* 306 * pool for vmspace structures. 307 */ 308 struct pool uvm_vmspace_pool; 309 310 /* 311 * pool for dynamically-allocated map entries. 312 */ 313 struct pool uvm_map_entry_pool; 314 struct pool uvm_map_entry_kmem_pool; 315 316 /* 317 * This global represents the end of the kernel virtual address 318 * space. If we want to exceed this, we must grow the kernel 319 * virtual address space dynamically. 320 * 321 * Note, this variable is locked by kernel_map's lock. 322 */ 323 vaddr_t uvm_maxkaddr; 324 325 /* 326 * Locking predicate. 327 */ 328 #define UVM_MAP_REQ_WRITE(_map) \ 329 do { \ 330 if ((_map)->ref_count > 0) { \ 331 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 332 rw_assert_wrlock(&(_map)->lock); \ 333 else \ 334 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \ 335 } \ 336 } while (0) 337 338 #define vm_map_modflags(map, set, clear) \ 339 do { \ 340 mtx_enter(&(map)->flags_lock); \ 341 (map)->flags = ((map)->flags | (set)) & ~(clear); \ 342 mtx_leave(&(map)->flags_lock); \ 343 } while (0) 344 345 346 /* 347 * Tree describing entries by address. 348 * 349 * Addresses are unique. 350 * Entries with start == end may only exist if they are the first entry 351 * (sorted by address) within a free-memory tree. 352 */ 353 354 static inline int 355 uvm_mapentry_addrcmp(const struct vm_map_entry *e1, 356 const struct vm_map_entry *e2) 357 { 358 return e1->start < e2->start ? -1 : e1->start > e2->start; 359 } 360 361 /* 362 * Copy mapentry. 363 */ 364 static inline void 365 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 366 { 367 caddr_t csrc, cdst; 368 size_t sz; 369 370 csrc = (caddr_t)src; 371 cdst = (caddr_t)dst; 372 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 373 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 374 375 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 376 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 377 memcpy(cdst, csrc, sz); 378 } 379 380 /* 381 * Handle free-list insertion. 382 */ 383 void 384 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 385 struct vm_map_entry *entry) 386 { 387 const struct uvm_addr_functions *fun; 388 #ifdef VMMAP_DEBUG 389 vaddr_t min, max, bound; 390 #endif 391 392 #ifdef VMMAP_DEBUG 393 /* 394 * Boundary check. 395 * Boundaries are folded if they go on the same free list. 396 */ 397 min = VMMAP_FREE_START(entry); 398 max = VMMAP_FREE_END(entry); 399 400 while (min < max) { 401 bound = uvm_map_boundary(map, min, max); 402 KASSERT(uvm_map_uaddr(map, min) == uaddr); 403 min = bound; 404 } 405 #endif 406 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 407 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 408 409 UVM_MAP_REQ_WRITE(map); 410 411 /* Actual insert: forward to uaddr pointer. */ 412 if (uaddr != NULL) { 413 fun = uaddr->uaddr_functions; 414 KDASSERT(fun != NULL); 415 if (fun->uaddr_free_insert != NULL) 416 (*fun->uaddr_free_insert)(map, uaddr, entry); 417 entry->etype |= UVM_ET_FREEMAPPED; 418 } 419 420 /* Update fspace augmentation. */ 421 uvm_map_addr_augment(entry); 422 } 423 424 /* 425 * Handle free-list removal. 426 */ 427 void 428 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 429 struct vm_map_entry *entry) 430 { 431 const struct uvm_addr_functions *fun; 432 433 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 434 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 435 UVM_MAP_REQ_WRITE(map); 436 437 if (uaddr != NULL) { 438 fun = uaddr->uaddr_functions; 439 if (fun->uaddr_free_remove != NULL) 440 (*fun->uaddr_free_remove)(map, uaddr, entry); 441 entry->etype &= ~UVM_ET_FREEMAPPED; 442 } 443 } 444 445 /* 446 * Handle address tree insertion. 447 */ 448 void 449 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 450 { 451 struct vm_map_entry *res; 452 453 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF)) 454 panic("uvm_mapent_addr_insert: entry still in addr list"); 455 KDASSERT(entry->start <= entry->end); 456 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 457 (entry->end & (vaddr_t)PAGE_MASK) == 0); 458 459 TRACEPOINT(uvm, map_insert, 460 entry->start, entry->end, entry->protection, NULL); 461 462 UVM_MAP_REQ_WRITE(map); 463 res = RBT_INSERT(uvm_map_addr, &map->addr, entry); 464 if (res != NULL) { 465 panic("uvm_mapent_addr_insert: map %p entry %p " 466 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 467 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 468 map, entry, 469 entry->start, entry->end, entry->guard, entry->fspace, 470 res, res->start, res->end, res->guard, res->fspace); 471 } 472 } 473 474 /* 475 * Handle address tree removal. 476 */ 477 void 478 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 479 { 480 struct vm_map_entry *res; 481 482 TRACEPOINT(uvm, map_remove, 483 entry->start, entry->end, entry->protection, NULL); 484 485 UVM_MAP_REQ_WRITE(map); 486 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry); 487 if (res != entry) 488 panic("uvm_mapent_addr_remove"); 489 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF); 490 } 491 492 /* 493 * uvm_map_reference: add reference to a map 494 * 495 * XXX check map reference counter lock 496 */ 497 #define uvm_map_reference(_map) \ 498 do { \ 499 map->ref_count++; \ 500 } while (0) 501 502 /* 503 * Calculate the dused delta. 504 */ 505 vsize_t 506 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 507 { 508 struct vmspace *vm; 509 vsize_t sz; 510 vaddr_t lmax; 511 vaddr_t stack_begin, stack_end; /* Position of stack. */ 512 513 KASSERT(map->flags & VM_MAP_ISVMSPACE); 514 vm = (struct vmspace *)map; 515 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 516 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 517 518 sz = 0; 519 while (min != max) { 520 lmax = max; 521 if (min < stack_begin && lmax > stack_begin) 522 lmax = stack_begin; 523 else if (min < stack_end && lmax > stack_end) 524 lmax = stack_end; 525 526 if (min >= stack_begin && min < stack_end) { 527 /* nothing */ 528 } else 529 sz += lmax - min; 530 min = lmax; 531 } 532 533 return sz >> PAGE_SHIFT; 534 } 535 536 /* 537 * Find the entry describing the given address. 538 */ 539 struct vm_map_entry* 540 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 541 { 542 struct vm_map_entry *iter; 543 544 iter = RBT_ROOT(uvm_map_addr, atree); 545 while (iter != NULL) { 546 if (iter->start > addr) 547 iter = RBT_LEFT(uvm_map_addr, iter); 548 else if (VMMAP_FREE_END(iter) <= addr) 549 iter = RBT_RIGHT(uvm_map_addr, iter); 550 else 551 return iter; 552 } 553 return NULL; 554 } 555 556 /* 557 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 558 * 559 * Push dead entries into a linked list. 560 * Since the linked list abuses the address tree for storage, the entry 561 * may not be linked in a map. 562 * 563 * *head must be initialized to NULL before the first call to this macro. 564 * uvm_unmap_detach(*head, 0) will remove dead entries. 565 */ 566 static inline void 567 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 568 { 569 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 570 } 571 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 572 dead_entry_push((_headptr), (_entry)) 573 574 /* 575 * Helper function for uvm_map_findspace_tree. 576 * 577 * Given allocation constraints and pmap constraints, finds the 578 * lowest and highest address in a range that can be used for the 579 * allocation. 580 * 581 * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. 582 * 583 * 584 * Big chunk of math with a seasoning of dragons. 585 */ 586 int 587 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, 588 struct vm_map_entry *sel, vaddr_t align, 589 vaddr_t pmap_align, vaddr_t pmap_off, int bias) 590 { 591 vaddr_t sel_min, sel_max; 592 #ifdef PMAP_PREFER 593 vaddr_t pmap_min, pmap_max; 594 #endif /* PMAP_PREFER */ 595 #ifdef DIAGNOSTIC 596 int bad; 597 #endif /* DIAGNOSTIC */ 598 599 sel_min = VMMAP_FREE_START(sel); 600 sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); 601 602 #ifdef PMAP_PREFER 603 604 /* 605 * There are two special cases, in which we can satisfy the align 606 * requirement and the pmap_prefer requirement. 607 * - when pmap_off == 0, we always select the largest of the two 608 * - when pmap_off % align == 0 and pmap_align > align, we simply 609 * satisfy the pmap_align requirement and automatically 610 * satisfy the align requirement. 611 */ 612 if (align > PAGE_SIZE && 613 !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { 614 /* 615 * Simple case: only use align. 616 */ 617 sel_min = roundup(sel_min, align); 618 sel_max &= ~(align - 1); 619 620 if (sel_min > sel_max) 621 return ENOMEM; 622 623 /* Correct for bias. */ 624 if (sel_max - sel_min > FSPACE_BIASGAP) { 625 if (bias > 0) { 626 sel_min = sel_max - FSPACE_BIASGAP; 627 sel_min = roundup(sel_min, align); 628 } else if (bias < 0) { 629 sel_max = sel_min + FSPACE_BIASGAP; 630 sel_max &= ~(align - 1); 631 } 632 } 633 } else if (pmap_align != 0) { 634 /* 635 * Special case: satisfy both pmap_prefer and 636 * align argument. 637 */ 638 pmap_max = sel_max & ~(pmap_align - 1); 639 pmap_min = sel_min; 640 if (pmap_max < sel_min) 641 return ENOMEM; 642 643 /* Adjust pmap_min for BIASGAP for top-addr bias. */ 644 if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) 645 pmap_min = pmap_max - FSPACE_BIASGAP; 646 /* Align pmap_min. */ 647 pmap_min &= ~(pmap_align - 1); 648 if (pmap_min < sel_min) 649 pmap_min += pmap_align; 650 if (pmap_min > pmap_max) 651 return ENOMEM; 652 653 /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ 654 if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { 655 pmap_max = (pmap_min + FSPACE_BIASGAP) & 656 ~(pmap_align - 1); 657 } 658 if (pmap_min > pmap_max) 659 return ENOMEM; 660 661 /* Apply pmap prefer offset. */ 662 pmap_max |= pmap_off; 663 if (pmap_max > sel_max) 664 pmap_max -= pmap_align; 665 pmap_min |= pmap_off; 666 if (pmap_min < sel_min) 667 pmap_min += pmap_align; 668 669 /* 670 * Fixup: it's possible that pmap_min and pmap_max 671 * cross eachother. In this case, try to find one 672 * address that is allowed. 673 * (This usually happens in biased case.) 674 */ 675 if (pmap_min > pmap_max) { 676 if (pmap_min < sel_max) 677 pmap_max = pmap_min; 678 else if (pmap_max > sel_min) 679 pmap_min = pmap_max; 680 else 681 return ENOMEM; 682 } 683 684 /* Internal validation. */ 685 KDASSERT(pmap_min <= pmap_max); 686 687 sel_min = pmap_min; 688 sel_max = pmap_max; 689 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 690 sel_min = sel_max - FSPACE_BIASGAP; 691 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 692 sel_max = sel_min + FSPACE_BIASGAP; 693 694 #else 695 696 if (align > PAGE_SIZE) { 697 sel_min = roundup(sel_min, align); 698 sel_max &= ~(align - 1); 699 if (sel_min > sel_max) 700 return ENOMEM; 701 702 if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { 703 if (bias > 0) { 704 sel_min = roundup(sel_max - FSPACE_BIASGAP, 705 align); 706 } else { 707 sel_max = (sel_min + FSPACE_BIASGAP) & 708 ~(align - 1); 709 } 710 } 711 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 712 sel_min = sel_max - FSPACE_BIASGAP; 713 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 714 sel_max = sel_min + FSPACE_BIASGAP; 715 716 #endif 717 718 if (sel_min > sel_max) 719 return ENOMEM; 720 721 #ifdef DIAGNOSTIC 722 bad = 0; 723 /* Lower boundary check. */ 724 if (sel_min < VMMAP_FREE_START(sel)) { 725 printf("sel_min: 0x%lx, but should be at least 0x%lx\n", 726 sel_min, VMMAP_FREE_START(sel)); 727 bad++; 728 } 729 /* Upper boundary check. */ 730 if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { 731 printf("sel_max: 0x%lx, but should be at most 0x%lx\n", 732 sel_max, 733 VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); 734 bad++; 735 } 736 /* Lower boundary alignment. */ 737 if (align != 0 && (sel_min & (align - 1)) != 0) { 738 printf("sel_min: 0x%lx, not aligned to 0x%lx\n", 739 sel_min, align); 740 bad++; 741 } 742 /* Upper boundary alignment. */ 743 if (align != 0 && (sel_max & (align - 1)) != 0) { 744 printf("sel_max: 0x%lx, not aligned to 0x%lx\n", 745 sel_max, align); 746 bad++; 747 } 748 /* Lower boundary PMAP_PREFER check. */ 749 if (pmap_align != 0 && align == 0 && 750 (sel_min & (pmap_align - 1)) != pmap_off) { 751 printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 752 sel_min, sel_min & (pmap_align - 1), pmap_off); 753 bad++; 754 } 755 /* Upper boundary PMAP_PREFER check. */ 756 if (pmap_align != 0 && align == 0 && 757 (sel_max & (pmap_align - 1)) != pmap_off) { 758 printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 759 sel_max, sel_max & (pmap_align - 1), pmap_off); 760 bad++; 761 } 762 763 if (bad) { 764 panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " 765 "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " 766 "bias = %d, " 767 "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", 768 sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, 769 bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel)); 770 } 771 #endif /* DIAGNOSTIC */ 772 773 *min = sel_min; 774 *max = sel_max; 775 return 0; 776 } 777 778 /* 779 * Test if memory starting at addr with sz bytes is free. 780 * 781 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 782 * the space. 783 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 784 */ 785 int 786 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 787 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 788 vaddr_t addr, vsize_t sz) 789 { 790 struct uvm_addr_state *free; 791 struct uvm_map_addr *atree; 792 struct vm_map_entry *i, *i_end; 793 794 if (addr + sz < addr) 795 return 0; 796 797 /* 798 * Kernel memory above uvm_maxkaddr is considered unavailable. 799 */ 800 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 801 if (addr + sz > uvm_maxkaddr) 802 return 0; 803 } 804 805 atree = &map->addr; 806 807 /* 808 * Fill in first, last, so they point at the entries containing the 809 * first and last address of the range. 810 * Note that if they are not NULL, we don't perform the lookup. 811 */ 812 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 813 if (*start_ptr == NULL) { 814 *start_ptr = uvm_map_entrybyaddr(atree, addr); 815 if (*start_ptr == NULL) 816 return 0; 817 } else 818 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 819 if (*end_ptr == NULL) { 820 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 821 *end_ptr = *start_ptr; 822 else { 823 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 824 if (*end_ptr == NULL) 825 return 0; 826 } 827 } else 828 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 829 830 /* Validation. */ 831 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 832 KDASSERT((*start_ptr)->start <= addr && 833 VMMAP_FREE_END(*start_ptr) > addr && 834 (*end_ptr)->start < addr + sz && 835 VMMAP_FREE_END(*end_ptr) >= addr + sz); 836 837 /* 838 * Check the none of the entries intersects with <addr, addr+sz>. 839 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 840 * considered unavailable unless called by those allocators. 841 */ 842 i = *start_ptr; 843 i_end = RBT_NEXT(uvm_map_addr, *end_ptr); 844 for (; i != i_end; 845 i = RBT_NEXT(uvm_map_addr, i)) { 846 if (i->start != i->end && i->end > addr) 847 return 0; 848 849 /* 850 * uaddr_exe and uaddr_brk_stack may only be used 851 * by these allocators and the NULL uaddr (i.e. no 852 * uaddr). 853 * Reject if this requirement is not met. 854 */ 855 if (uaddr != NULL) { 856 free = uvm_map_uaddr_e(map, i); 857 858 if (uaddr != free && free != NULL && 859 (free == map->uaddr_exe || 860 free == map->uaddr_brk_stack)) 861 return 0; 862 } 863 } 864 865 return -1; 866 } 867 868 /* 869 * Invoke each address selector until an address is found. 870 * Will not invoke uaddr_exe. 871 */ 872 int 873 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 874 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 875 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 876 { 877 struct uvm_addr_state *uaddr; 878 int i; 879 880 /* 881 * Allocation for sz bytes at any address, 882 * using the addr selectors in order. 883 */ 884 for (i = 0; i < nitems(map->uaddr_any); i++) { 885 uaddr = map->uaddr_any[i]; 886 887 if (uvm_addr_invoke(map, uaddr, first, last, 888 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 889 return 0; 890 } 891 892 /* Fall back to brk() and stack() address selectors. */ 893 uaddr = map->uaddr_brk_stack; 894 if (uvm_addr_invoke(map, uaddr, first, last, 895 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 896 return 0; 897 898 return ENOMEM; 899 } 900 901 /* Calculate entry augmentation value. */ 902 vsize_t 903 uvm_map_addr_augment_get(struct vm_map_entry *entry) 904 { 905 vsize_t augment; 906 struct vm_map_entry *left, *right; 907 908 augment = entry->fspace; 909 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL) 910 augment = MAX(augment, left->fspace_augment); 911 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 912 augment = MAX(augment, right->fspace_augment); 913 return augment; 914 } 915 916 /* 917 * Update augmentation data in entry. 918 */ 919 void 920 uvm_map_addr_augment(struct vm_map_entry *entry) 921 { 922 vsize_t augment; 923 924 while (entry != NULL) { 925 /* Calculate value for augmentation. */ 926 augment = uvm_map_addr_augment_get(entry); 927 928 /* 929 * Descend update. 930 * Once we find an entry that already has the correct value, 931 * stop, since it means all its parents will use the correct 932 * value too. 933 */ 934 if (entry->fspace_augment == augment) 935 return; 936 entry->fspace_augment = augment; 937 entry = RBT_PARENT(uvm_map_addr, entry); 938 } 939 } 940 941 /* 942 * uvm_mapanon: establish a valid mapping in map for an anon 943 * 944 * => *addr and sz must be a multiple of PAGE_SIZE. 945 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 946 * => map must be unlocked. 947 * 948 * => align: align vaddr, must be a power-of-2. 949 * Align is only a hint and will be ignored if the alignment fails. 950 */ 951 int 952 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, 953 vsize_t align, unsigned int flags) 954 { 955 struct vm_map_entry *first, *last, *entry, *new; 956 struct uvm_map_deadq dead; 957 vm_prot_t prot; 958 vm_prot_t maxprot; 959 vm_inherit_t inherit; 960 int advice; 961 int error; 962 vaddr_t pmap_align, pmap_offset; 963 vaddr_t hint; 964 965 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE); 966 KASSERT(map != kernel_map); 967 KASSERT((map->flags & UVM_FLAG_HOLE) == 0); 968 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 969 splassert(IPL_NONE); 970 KASSERT((flags & UVM_FLAG_TRYLOCK) == 0); 971 972 /* 973 * We use pmap_align and pmap_offset as alignment and offset variables. 974 * 975 * Because the align parameter takes precedence over pmap prefer, 976 * the pmap_align will need to be set to align, with pmap_offset = 0, 977 * if pmap_prefer will not align. 978 */ 979 pmap_align = MAX(align, PAGE_SIZE); 980 pmap_offset = 0; 981 982 /* Decode parameters. */ 983 prot = UVM_PROTECTION(flags); 984 maxprot = UVM_MAXPROTECTION(flags); 985 advice = UVM_ADVICE(flags); 986 inherit = UVM_INHERIT(flags); 987 error = 0; 988 hint = trunc_page(*addr); 989 TAILQ_INIT(&dead); 990 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 991 KASSERT((align & (align - 1)) == 0); 992 993 /* Check protection. */ 994 if ((prot & maxprot) != prot) 995 return EACCES; 996 997 /* 998 * Before grabbing the lock, allocate a map entry for later 999 * use to ensure we don't wait for memory while holding the 1000 * vm_map_lock. 1001 */ 1002 new = uvm_mapent_alloc(map, flags); 1003 if (new == NULL) 1004 return(ENOMEM); 1005 1006 vm_map_lock(map); 1007 first = last = NULL; 1008 if (flags & UVM_FLAG_FIXED) { 1009 /* 1010 * Fixed location. 1011 * 1012 * Note: we ignore align, pmap_prefer. 1013 * Fill in first, last and *addr. 1014 */ 1015 KASSERT((*addr & PAGE_MASK) == 0); 1016 1017 /* Check that the space is available. */ 1018 if (flags & UVM_FLAG_UNMAP) { 1019 if ((flags & UVM_FLAG_STACK) && 1020 !uvm_map_is_stack_remappable(map, *addr, sz)) { 1021 error = EINVAL; 1022 goto unlock; 1023 } 1024 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1025 } 1026 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1027 error = ENOMEM; 1028 goto unlock; 1029 } 1030 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1031 (align == 0 || (*addr & (align - 1)) == 0) && 1032 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1033 /* 1034 * Address used as hint. 1035 * 1036 * Note: we enforce the alignment restriction, 1037 * but ignore pmap_prefer. 1038 */ 1039 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1040 /* Run selection algorithm for executables. */ 1041 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1042 addr, sz, pmap_align, pmap_offset, prot, hint); 1043 1044 if (error != 0) 1045 goto unlock; 1046 } else { 1047 /* Update freelists from vmspace. */ 1048 uvm_map_vmspace_update(map, &dead, flags); 1049 1050 error = uvm_map_findspace(map, &first, &last, addr, sz, 1051 pmap_align, pmap_offset, prot, hint); 1052 1053 if (error != 0) 1054 goto unlock; 1055 } 1056 1057 /* Double-check if selected address doesn't cause overflow. */ 1058 if (*addr + sz < *addr) { 1059 error = ENOMEM; 1060 goto unlock; 1061 } 1062 1063 /* If we only want a query, return now. */ 1064 if (flags & UVM_FLAG_QUERY) { 1065 error = 0; 1066 goto unlock; 1067 } 1068 1069 /* 1070 * Create new entry. 1071 * first and last may be invalidated after this call. 1072 */ 1073 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1074 new); 1075 if (entry == NULL) { 1076 error = ENOMEM; 1077 goto unlock; 1078 } 1079 new = NULL; 1080 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1081 entry->object.uvm_obj = NULL; 1082 entry->offset = 0; 1083 entry->protection = prot; 1084 entry->max_protection = maxprot; 1085 entry->inheritance = inherit; 1086 entry->wired_count = 0; 1087 entry->advice = advice; 1088 if (prot & PROT_WRITE) 1089 map->wserial++; 1090 if (flags & UVM_FLAG_SYSCALL) { 1091 entry->etype |= UVM_ET_SYSCALL; 1092 map->wserial++; 1093 } 1094 if (flags & UVM_FLAG_STACK) { 1095 entry->etype |= UVM_ET_STACK; 1096 if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP)) 1097 map->sserial++; 1098 } 1099 if (flags & UVM_FLAG_COPYONW) { 1100 entry->etype |= UVM_ET_COPYONWRITE; 1101 if ((flags & UVM_FLAG_OVERLAY) == 0) 1102 entry->etype |= UVM_ET_NEEDSCOPY; 1103 } 1104 if (flags & UVM_FLAG_CONCEAL) 1105 entry->etype |= UVM_ET_CONCEAL; 1106 if (flags & UVM_FLAG_OVERLAY) { 1107 KERNEL_LOCK(); 1108 entry->aref.ar_pageoff = 0; 1109 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1110 KERNEL_UNLOCK(); 1111 } 1112 1113 /* Update map and process statistics. */ 1114 map->size += sz; 1115 if (prot != PROT_NONE) { 1116 ((struct vmspace *)map)->vm_dused += 1117 uvmspace_dused(map, *addr, *addr + sz); 1118 } 1119 1120 unlock: 1121 vm_map_unlock(map); 1122 1123 /* 1124 * Remove dead entries. 1125 * 1126 * Dead entries may be the result of merging. 1127 * uvm_map_mkentry may also create dead entries, when it attempts to 1128 * destroy free-space entries. 1129 */ 1130 uvm_unmap_detach(&dead, 0); 1131 1132 if (new) 1133 uvm_mapent_free(new); 1134 return error; 1135 } 1136 1137 /* 1138 * uvm_map: establish a valid mapping in map 1139 * 1140 * => *addr and sz must be a multiple of PAGE_SIZE. 1141 * => map must be unlocked. 1142 * => <uobj,uoffset> value meanings (4 cases): 1143 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1144 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1145 * [3] <uobj,uoffset> == normal mapping 1146 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1147 * 1148 * case [4] is for kernel mappings where we don't know the offset until 1149 * we've found a virtual address. note that kernel object offsets are 1150 * always relative to vm_map_min(kernel_map). 1151 * 1152 * => align: align vaddr, must be a power-of-2. 1153 * Align is only a hint and will be ignored if the alignment fails. 1154 */ 1155 int 1156 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 1157 struct uvm_object *uobj, voff_t uoffset, 1158 vsize_t align, unsigned int flags) 1159 { 1160 struct vm_map_entry *first, *last, *entry, *new; 1161 struct uvm_map_deadq dead; 1162 vm_prot_t prot; 1163 vm_prot_t maxprot; 1164 vm_inherit_t inherit; 1165 int advice; 1166 int error; 1167 vaddr_t pmap_align, pmap_offset; 1168 vaddr_t hint; 1169 1170 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1171 splassert(IPL_NONE); 1172 else 1173 splassert(IPL_VM); 1174 1175 /* 1176 * We use pmap_align and pmap_offset as alignment and offset variables. 1177 * 1178 * Because the align parameter takes precedence over pmap prefer, 1179 * the pmap_align will need to be set to align, with pmap_offset = 0, 1180 * if pmap_prefer will not align. 1181 */ 1182 if (uoffset == UVM_UNKNOWN_OFFSET) { 1183 pmap_align = MAX(align, PAGE_SIZE); 1184 pmap_offset = 0; 1185 } else { 1186 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 1187 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 1188 1189 if (align == 0 || 1190 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 1191 /* pmap_offset satisfies align, no change. */ 1192 } else { 1193 /* Align takes precedence over pmap prefer. */ 1194 pmap_align = align; 1195 pmap_offset = 0; 1196 } 1197 } 1198 1199 /* Decode parameters. */ 1200 prot = UVM_PROTECTION(flags); 1201 maxprot = UVM_MAXPROTECTION(flags); 1202 advice = UVM_ADVICE(flags); 1203 inherit = UVM_INHERIT(flags); 1204 error = 0; 1205 hint = trunc_page(*addr); 1206 TAILQ_INIT(&dead); 1207 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1208 KASSERT((align & (align - 1)) == 0); 1209 1210 /* Holes are incompatible with other types of mappings. */ 1211 if (flags & UVM_FLAG_HOLE) { 1212 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 1213 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 1214 } 1215 1216 /* Unset hint for kernel_map non-fixed allocations. */ 1217 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1218 hint = 0; 1219 1220 /* Check protection. */ 1221 if ((prot & maxprot) != prot) 1222 return EACCES; 1223 1224 if (map == kernel_map && 1225 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1226 panic("uvm_map: kernel map W^X violation requested"); 1227 1228 /* 1229 * Before grabbing the lock, allocate a map entry for later 1230 * use to ensure we don't wait for memory while holding the 1231 * vm_map_lock. 1232 */ 1233 new = uvm_mapent_alloc(map, flags); 1234 if (new == NULL) 1235 return(ENOMEM); 1236 1237 if (flags & UVM_FLAG_TRYLOCK) { 1238 if (vm_map_lock_try(map) == FALSE) { 1239 error = EFAULT; 1240 goto out; 1241 } 1242 } else { 1243 vm_map_lock(map); 1244 } 1245 1246 first = last = NULL; 1247 if (flags & UVM_FLAG_FIXED) { 1248 /* 1249 * Fixed location. 1250 * 1251 * Note: we ignore align, pmap_prefer. 1252 * Fill in first, last and *addr. 1253 */ 1254 KASSERT((*addr & PAGE_MASK) == 0); 1255 1256 /* 1257 * Grow pmap to include allocated address. 1258 * If the growth fails, the allocation will fail too. 1259 */ 1260 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1261 uvm_maxkaddr < (*addr + sz)) { 1262 uvm_map_kmem_grow(map, &dead, 1263 *addr + sz - uvm_maxkaddr, flags); 1264 } 1265 1266 /* Check that the space is available. */ 1267 if (flags & UVM_FLAG_UNMAP) 1268 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1269 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1270 error = ENOMEM; 1271 goto unlock; 1272 } 1273 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1274 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1275 (align == 0 || (*addr & (align - 1)) == 0) && 1276 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1277 /* 1278 * Address used as hint. 1279 * 1280 * Note: we enforce the alignment restriction, 1281 * but ignore pmap_prefer. 1282 */ 1283 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1284 /* Run selection algorithm for executables. */ 1285 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1286 addr, sz, pmap_align, pmap_offset, prot, hint); 1287 1288 /* Grow kernel memory and try again. */ 1289 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1290 uvm_map_kmem_grow(map, &dead, sz, flags); 1291 1292 error = uvm_addr_invoke(map, map->uaddr_exe, 1293 &first, &last, addr, sz, 1294 pmap_align, pmap_offset, prot, hint); 1295 } 1296 1297 if (error != 0) 1298 goto unlock; 1299 } else { 1300 /* Update freelists from vmspace. */ 1301 if (map->flags & VM_MAP_ISVMSPACE) 1302 uvm_map_vmspace_update(map, &dead, flags); 1303 1304 error = uvm_map_findspace(map, &first, &last, addr, sz, 1305 pmap_align, pmap_offset, prot, hint); 1306 1307 /* Grow kernel memory and try again. */ 1308 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1309 uvm_map_kmem_grow(map, &dead, sz, flags); 1310 1311 error = uvm_map_findspace(map, &first, &last, addr, sz, 1312 pmap_align, pmap_offset, prot, hint); 1313 } 1314 1315 if (error != 0) 1316 goto unlock; 1317 } 1318 1319 /* Double-check if selected address doesn't cause overflow. */ 1320 if (*addr + sz < *addr) { 1321 error = ENOMEM; 1322 goto unlock; 1323 } 1324 1325 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1326 uvm_maxkaddr >= *addr + sz); 1327 1328 /* If we only want a query, return now. */ 1329 if (flags & UVM_FLAG_QUERY) { 1330 error = 0; 1331 goto unlock; 1332 } 1333 1334 if (uobj == NULL) 1335 uoffset = 0; 1336 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1337 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1338 uoffset = *addr - vm_map_min(kernel_map); 1339 } 1340 1341 /* 1342 * Create new entry. 1343 * first and last may be invalidated after this call. 1344 */ 1345 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1346 new); 1347 if (entry == NULL) { 1348 error = ENOMEM; 1349 goto unlock; 1350 } 1351 new = NULL; 1352 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1353 entry->object.uvm_obj = uobj; 1354 entry->offset = uoffset; 1355 entry->protection = prot; 1356 entry->max_protection = maxprot; 1357 entry->inheritance = inherit; 1358 entry->wired_count = 0; 1359 entry->advice = advice; 1360 if (prot & PROT_WRITE) 1361 map->wserial++; 1362 if (flags & UVM_FLAG_SYSCALL) { 1363 entry->etype |= UVM_ET_SYSCALL; 1364 map->wserial++; 1365 } 1366 if (flags & UVM_FLAG_STACK) { 1367 entry->etype |= UVM_ET_STACK; 1368 if (flags & UVM_FLAG_UNMAP) 1369 map->sserial++; 1370 } 1371 if (uobj) 1372 entry->etype |= UVM_ET_OBJ; 1373 else if (flags & UVM_FLAG_HOLE) 1374 entry->etype |= UVM_ET_HOLE; 1375 if (flags & UVM_FLAG_NOFAULT) 1376 entry->etype |= UVM_ET_NOFAULT; 1377 if (flags & UVM_FLAG_WC) 1378 entry->etype |= UVM_ET_WC; 1379 if (flags & UVM_FLAG_COPYONW) { 1380 entry->etype |= UVM_ET_COPYONWRITE; 1381 if ((flags & UVM_FLAG_OVERLAY) == 0) 1382 entry->etype |= UVM_ET_NEEDSCOPY; 1383 } 1384 if (flags & UVM_FLAG_CONCEAL) 1385 entry->etype |= UVM_ET_CONCEAL; 1386 if (flags & UVM_FLAG_OVERLAY) { 1387 entry->aref.ar_pageoff = 0; 1388 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1389 } 1390 1391 /* Update map and process statistics. */ 1392 if (!(flags & UVM_FLAG_HOLE)) { 1393 map->size += sz; 1394 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL && 1395 prot != PROT_NONE) { 1396 ((struct vmspace *)map)->vm_dused += 1397 uvmspace_dused(map, *addr, *addr + sz); 1398 } 1399 } 1400 1401 /* 1402 * Try to merge entry. 1403 * 1404 * Userland allocations are kept separated most of the time. 1405 * Forego the effort of merging what most of the time can't be merged 1406 * and only try the merge if it concerns a kernel entry. 1407 */ 1408 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1409 (map->flags & VM_MAP_ISVMSPACE) == 0) 1410 uvm_mapent_tryjoin(map, entry, &dead); 1411 1412 unlock: 1413 vm_map_unlock(map); 1414 1415 /* 1416 * Remove dead entries. 1417 * 1418 * Dead entries may be the result of merging. 1419 * uvm_map_mkentry may also create dead entries, when it attempts to 1420 * destroy free-space entries. 1421 */ 1422 if (map->flags & VM_MAP_INTRSAFE) 1423 uvm_unmap_detach_intrsafe(&dead); 1424 else 1425 uvm_unmap_detach(&dead, 0); 1426 out: 1427 if (new) 1428 uvm_mapent_free(new); 1429 return error; 1430 } 1431 1432 /* 1433 * True iff e1 and e2 can be joined together. 1434 */ 1435 int 1436 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1437 struct vm_map_entry *e2) 1438 { 1439 KDASSERT(e1 != NULL && e2 != NULL); 1440 1441 /* Must be the same entry type and not have free memory between. */ 1442 if (e1->etype != e2->etype || e1->end != e2->start) 1443 return 0; 1444 1445 /* Submaps are never joined. */ 1446 if (UVM_ET_ISSUBMAP(e1)) 1447 return 0; 1448 1449 /* Never merge wired memory. */ 1450 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1451 return 0; 1452 1453 /* Protection, inheritance and advice must be equal. */ 1454 if (e1->protection != e2->protection || 1455 e1->max_protection != e2->max_protection || 1456 e1->inheritance != e2->inheritance || 1457 e1->advice != e2->advice) 1458 return 0; 1459 1460 /* If uvm_object: object itself and offsets within object must match. */ 1461 if (UVM_ET_ISOBJ(e1)) { 1462 if (e1->object.uvm_obj != e2->object.uvm_obj) 1463 return 0; 1464 if (e1->offset + (e1->end - e1->start) != e2->offset) 1465 return 0; 1466 } 1467 1468 /* 1469 * Cannot join shared amaps. 1470 * Note: no need to lock amap to look at refs, since we don't care 1471 * about its exact value. 1472 * If it is 1 (i.e. we have the only reference) it will stay there. 1473 */ 1474 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1475 return 0; 1476 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1477 return 0; 1478 1479 /* Apprently, e1 and e2 match. */ 1480 return 1; 1481 } 1482 1483 /* 1484 * Join support function. 1485 * 1486 * Returns the merged entry on succes. 1487 * Returns NULL if the merge failed. 1488 */ 1489 struct vm_map_entry* 1490 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1491 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1492 { 1493 struct uvm_addr_state *free; 1494 1495 /* 1496 * Merging is not supported for map entries that 1497 * contain an amap in e1. This should never happen 1498 * anyway, because only kernel entries are merged. 1499 * These do not contain amaps. 1500 * e2 contains no real information in its amap, 1501 * so it can be erased immediately. 1502 */ 1503 KASSERT(e1->aref.ar_amap == NULL); 1504 1505 /* 1506 * Don't drop obj reference: 1507 * uvm_unmap_detach will do this for us. 1508 */ 1509 free = uvm_map_uaddr_e(map, e1); 1510 uvm_mapent_free_remove(map, free, e1); 1511 1512 free = uvm_map_uaddr_e(map, e2); 1513 uvm_mapent_free_remove(map, free, e2); 1514 uvm_mapent_addr_remove(map, e2); 1515 e1->end = e2->end; 1516 e1->guard = e2->guard; 1517 e1->fspace = e2->fspace; 1518 uvm_mapent_free_insert(map, free, e1); 1519 1520 DEAD_ENTRY_PUSH(dead, e2); 1521 return e1; 1522 } 1523 1524 /* 1525 * Attempt forward and backward joining of entry. 1526 * 1527 * Returns entry after joins. 1528 * We are guaranteed that the amap of entry is either non-existent or 1529 * has never been used. 1530 */ 1531 struct vm_map_entry* 1532 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1533 struct uvm_map_deadq *dead) 1534 { 1535 struct vm_map_entry *other; 1536 struct vm_map_entry *merged; 1537 1538 /* Merge with previous entry. */ 1539 other = RBT_PREV(uvm_map_addr, entry); 1540 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1541 merged = uvm_mapent_merge(map, other, entry, dead); 1542 if (merged) 1543 entry = merged; 1544 } 1545 1546 /* 1547 * Merge with next entry. 1548 * 1549 * Because amap can only extend forward and the next entry 1550 * probably contains sensible info, only perform forward merging 1551 * in the absence of an amap. 1552 */ 1553 other = RBT_NEXT(uvm_map_addr, entry); 1554 if (other && entry->aref.ar_amap == NULL && 1555 other->aref.ar_amap == NULL && 1556 uvm_mapent_isjoinable(map, entry, other)) { 1557 merged = uvm_mapent_merge(map, entry, other, dead); 1558 if (merged) 1559 entry = merged; 1560 } 1561 1562 return entry; 1563 } 1564 1565 /* 1566 * Kill entries that are no longer in a map. 1567 */ 1568 void 1569 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1570 { 1571 struct vm_map_entry *entry, *tmp; 1572 int waitok = flags & UVM_PLA_WAITOK; 1573 1574 TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) { 1575 /* Skip entries for which we have to grab the kernel lock. */ 1576 if (entry->aref.ar_amap || UVM_ET_ISSUBMAP(entry) || 1577 UVM_ET_ISOBJ(entry)) 1578 continue; 1579 1580 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1581 uvm_mapent_free(entry); 1582 } 1583 1584 if (TAILQ_EMPTY(deadq)) 1585 return; 1586 1587 KERNEL_LOCK(); 1588 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1589 if (waitok) 1590 uvm_pause(); 1591 /* Drop reference to amap, if we've got one. */ 1592 if (entry->aref.ar_amap) 1593 amap_unref(entry->aref.ar_amap, 1594 entry->aref.ar_pageoff, 1595 atop(entry->end - entry->start), 1596 flags & AMAP_REFALL); 1597 1598 /* Drop reference to our backing object, if we've got one. */ 1599 if (UVM_ET_ISSUBMAP(entry)) { 1600 /* ... unlikely to happen, but play it safe */ 1601 uvm_map_deallocate(entry->object.sub_map); 1602 } else if (UVM_ET_ISOBJ(entry) && 1603 entry->object.uvm_obj->pgops->pgo_detach) { 1604 entry->object.uvm_obj->pgops->pgo_detach( 1605 entry->object.uvm_obj); 1606 } 1607 1608 /* Step to next. */ 1609 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1610 uvm_mapent_free(entry); 1611 } 1612 KERNEL_UNLOCK(); 1613 } 1614 1615 void 1616 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq) 1617 { 1618 struct vm_map_entry *entry; 1619 1620 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1621 KASSERT(entry->aref.ar_amap == NULL); 1622 KASSERT(!UVM_ET_ISSUBMAP(entry)); 1623 KASSERT(!UVM_ET_ISOBJ(entry)); 1624 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1625 uvm_mapent_free(entry); 1626 } 1627 } 1628 1629 /* 1630 * Create and insert new entry. 1631 * 1632 * Returned entry contains new addresses and is inserted properly in the tree. 1633 * first and last are (probably) no longer valid. 1634 */ 1635 struct vm_map_entry* 1636 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1637 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1638 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1639 { 1640 struct vm_map_entry *entry, *prev; 1641 struct uvm_addr_state *free; 1642 vaddr_t min, max; /* free space boundaries for new entry */ 1643 1644 KDASSERT(map != NULL); 1645 KDASSERT(first != NULL); 1646 KDASSERT(last != NULL); 1647 KDASSERT(dead != NULL); 1648 KDASSERT(sz > 0); 1649 KDASSERT(addr + sz > addr); 1650 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1651 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1652 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1653 uvm_tree_sanity(map, __FILE__, __LINE__); 1654 1655 min = addr + sz; 1656 max = VMMAP_FREE_END(last); 1657 1658 /* Initialize new entry. */ 1659 if (new == NULL) 1660 entry = uvm_mapent_alloc(map, flags); 1661 else 1662 entry = new; 1663 if (entry == NULL) 1664 return NULL; 1665 entry->offset = 0; 1666 entry->etype = 0; 1667 entry->wired_count = 0; 1668 entry->aref.ar_pageoff = 0; 1669 entry->aref.ar_amap = NULL; 1670 1671 entry->start = addr; 1672 entry->end = min; 1673 entry->guard = 0; 1674 entry->fspace = 0; 1675 1676 /* Reset free space in first. */ 1677 free = uvm_map_uaddr_e(map, first); 1678 uvm_mapent_free_remove(map, free, first); 1679 first->guard = 0; 1680 first->fspace = 0; 1681 1682 /* 1683 * Remove all entries that are fully replaced. 1684 * We are iterating using last in reverse order. 1685 */ 1686 for (; first != last; last = prev) { 1687 prev = RBT_PREV(uvm_map_addr, last); 1688 1689 KDASSERT(last->start == last->end); 1690 free = uvm_map_uaddr_e(map, last); 1691 uvm_mapent_free_remove(map, free, last); 1692 uvm_mapent_addr_remove(map, last); 1693 DEAD_ENTRY_PUSH(dead, last); 1694 } 1695 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1696 if (first->start == addr) { 1697 uvm_mapent_addr_remove(map, first); 1698 DEAD_ENTRY_PUSH(dead, first); 1699 } else { 1700 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1701 addr, flags); 1702 } 1703 1704 /* Finally, link in entry. */ 1705 uvm_mapent_addr_insert(map, entry); 1706 uvm_map_fix_space(map, entry, min, max, flags); 1707 1708 uvm_tree_sanity(map, __FILE__, __LINE__); 1709 return entry; 1710 } 1711 1712 1713 /* 1714 * uvm_mapent_alloc: allocate a map entry 1715 */ 1716 struct vm_map_entry * 1717 uvm_mapent_alloc(struct vm_map *map, int flags) 1718 { 1719 struct vm_map_entry *me, *ne; 1720 int pool_flags; 1721 int i; 1722 1723 pool_flags = PR_WAITOK; 1724 if (flags & UVM_FLAG_TRYLOCK) 1725 pool_flags = PR_NOWAIT; 1726 1727 if (map->flags & VM_MAP_INTRSAFE || cold) { 1728 mtx_enter(&uvm_kmapent_mtx); 1729 if (SLIST_EMPTY(&uvm.kentry_free)) { 1730 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1731 &kd_nowait); 1732 if (ne == NULL) 1733 panic("uvm_mapent_alloc: cannot allocate map " 1734 "entry"); 1735 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) { 1736 SLIST_INSERT_HEAD(&uvm.kentry_free, 1737 &ne[i], daddrs.addr_kentry); 1738 } 1739 if (ratecheck(&uvm_kmapent_last_warn_time, 1740 &uvm_kmapent_warn_rate)) 1741 printf("uvm_mapent_alloc: out of static " 1742 "map entries\n"); 1743 } 1744 me = SLIST_FIRST(&uvm.kentry_free); 1745 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry); 1746 uvmexp.kmapent++; 1747 mtx_leave(&uvm_kmapent_mtx); 1748 me->flags = UVM_MAP_STATIC; 1749 } else if (map == kernel_map) { 1750 splassert(IPL_NONE); 1751 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1752 if (me == NULL) 1753 goto out; 1754 me->flags = UVM_MAP_KMEM; 1755 } else { 1756 splassert(IPL_NONE); 1757 me = pool_get(&uvm_map_entry_pool, pool_flags); 1758 if (me == NULL) 1759 goto out; 1760 me->flags = 0; 1761 } 1762 1763 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF); 1764 out: 1765 return(me); 1766 } 1767 1768 /* 1769 * uvm_mapent_free: free map entry 1770 * 1771 * => XXX: static pool for kernel map? 1772 */ 1773 void 1774 uvm_mapent_free(struct vm_map_entry *me) 1775 { 1776 if (me->flags & UVM_MAP_STATIC) { 1777 mtx_enter(&uvm_kmapent_mtx); 1778 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry); 1779 uvmexp.kmapent--; 1780 mtx_leave(&uvm_kmapent_mtx); 1781 } else if (me->flags & UVM_MAP_KMEM) { 1782 splassert(IPL_NONE); 1783 pool_put(&uvm_map_entry_kmem_pool, me); 1784 } else { 1785 splassert(IPL_NONE); 1786 pool_put(&uvm_map_entry_pool, me); 1787 } 1788 } 1789 1790 /* 1791 * uvm_map_lookup_entry: find map entry at or before an address. 1792 * 1793 * => map must at least be read-locked by caller 1794 * => entry is returned in "entry" 1795 * => return value is true if address is in the returned entry 1796 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1797 * returned for those mappings. 1798 */ 1799 boolean_t 1800 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1801 struct vm_map_entry **entry) 1802 { 1803 *entry = uvm_map_entrybyaddr(&map->addr, address); 1804 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1805 (*entry)->start <= address && (*entry)->end > address; 1806 } 1807 1808 /* 1809 * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet 1810 * grown -- then uvm_map_check_region_range() should not cache the entry 1811 * because growth won't be seen. 1812 */ 1813 int 1814 uvm_map_inentry_sp(vm_map_entry_t entry) 1815 { 1816 if ((entry->etype & UVM_ET_STACK) == 0) { 1817 if (entry->protection == PROT_NONE) 1818 return (-1); /* don't update range */ 1819 return (0); 1820 } 1821 return (1); 1822 } 1823 1824 /* 1825 * The system call must not come from a writeable entry, W^X is violated. 1826 * (Would be nice if we can spot aliasing, which is also kind of bad) 1827 * 1828 * The system call must come from an syscall-labeled entry (which are 1829 * the text regions of the main program, sigtramp, ld.so, or libc). 1830 */ 1831 int 1832 uvm_map_inentry_pc(vm_map_entry_t entry) 1833 { 1834 if (entry->protection & PROT_WRITE) 1835 return (0); /* not permitted */ 1836 if ((entry->etype & UVM_ET_SYSCALL) == 0) 1837 return (0); /* not permitted */ 1838 return (1); 1839 } 1840 1841 int 1842 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie) 1843 { 1844 return (serial != ie->ie_serial || ie->ie_start == 0 || 1845 addr < ie->ie_start || addr >= ie->ie_end); 1846 } 1847 1848 /* 1849 * Inside a vm_map find the reg address and verify it via function. 1850 * Remember low and high addresses of region if valid and return TRUE, 1851 * else return FALSE. 1852 */ 1853 boolean_t 1854 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1855 int (*fn)(vm_map_entry_t), u_long serial) 1856 { 1857 vm_map_t map = &p->p_vmspace->vm_map; 1858 vm_map_entry_t entry; 1859 int ret; 1860 1861 if (addr < map->min_offset || addr >= map->max_offset) 1862 return (FALSE); 1863 1864 /* lock map */ 1865 vm_map_lock_read(map); 1866 1867 /* lookup */ 1868 if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) { 1869 vm_map_unlock_read(map); 1870 return (FALSE); 1871 } 1872 1873 ret = (*fn)(entry); 1874 if (ret == 0) { 1875 vm_map_unlock_read(map); 1876 return (FALSE); 1877 } else if (ret == 1) { 1878 ie->ie_start = entry->start; 1879 ie->ie_end = entry->end; 1880 ie->ie_serial = serial; 1881 } else { 1882 /* do not update, re-check later */ 1883 } 1884 vm_map_unlock_read(map); 1885 return (TRUE); 1886 } 1887 1888 boolean_t 1889 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1890 const char *fmt, int (*fn)(vm_map_entry_t), u_long serial) 1891 { 1892 union sigval sv; 1893 boolean_t ok = TRUE; 1894 1895 if (uvm_map_inentry_recheck(serial, addr, ie)) { 1896 ok = uvm_map_inentry_fix(p, ie, addr, fn, serial); 1897 if (!ok) { 1898 KERNEL_LOCK(); 1899 printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid, 1900 addr, ie->ie_start, ie->ie_end); 1901 p->p_p->ps_acflag |= AMAP; 1902 sv.sival_ptr = (void *)PROC_PC(p); 1903 trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv); 1904 KERNEL_UNLOCK(); 1905 } 1906 } 1907 return (ok); 1908 } 1909 1910 /* 1911 * Check whether the given address range can be converted to a MAP_STACK 1912 * mapping. 1913 * 1914 * Must be called with map locked. 1915 */ 1916 boolean_t 1917 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz) 1918 { 1919 vaddr_t end = addr + sz; 1920 struct vm_map_entry *first, *iter, *prev = NULL; 1921 1922 if (!uvm_map_lookup_entry(map, addr, &first)) { 1923 printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n", 1924 addr, end, map); 1925 return FALSE; 1926 } 1927 1928 /* 1929 * Check that the address range exists and is contiguous. 1930 */ 1931 for (iter = first; iter != NULL && iter->start < end; 1932 prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) { 1933 /* 1934 * Make sure that we do not have holes in the range. 1935 */ 1936 #if 0 1937 if (prev != NULL) { 1938 printf("prev->start 0x%lx, prev->end 0x%lx, " 1939 "iter->start 0x%lx, iter->end 0x%lx\n", 1940 prev->start, prev->end, iter->start, iter->end); 1941 } 1942 #endif 1943 1944 if (prev != NULL && prev->end != iter->start) { 1945 printf("map stack 0x%lx-0x%lx of map %p failed: " 1946 "hole in range\n", addr, end, map); 1947 return FALSE; 1948 } 1949 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) { 1950 printf("map stack 0x%lx-0x%lx of map %p failed: " 1951 "hole in range\n", addr, end, map); 1952 return FALSE; 1953 } 1954 } 1955 1956 return TRUE; 1957 } 1958 1959 /* 1960 * Remap the middle-pages of an existing mapping as a stack range. 1961 * If there exists a previous contiguous mapping with the given range 1962 * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the 1963 * mapping is dropped, and a new anon mapping is created and marked as 1964 * a stack. 1965 * 1966 * Must be called with map unlocked. 1967 */ 1968 int 1969 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz) 1970 { 1971 vm_map_t map = &p->p_vmspace->vm_map; 1972 vaddr_t start, end; 1973 int error; 1974 int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, 1975 PROT_READ | PROT_WRITE | PROT_EXEC, 1976 MAP_INHERIT_COPY, MADV_NORMAL, 1977 UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP | 1978 UVM_FLAG_COPYONW); 1979 1980 start = round_page(addr); 1981 end = trunc_page(addr + sz); 1982 #ifdef MACHINE_STACK_GROWS_UP 1983 if (end == addr + sz) 1984 end -= PAGE_SIZE; 1985 #else 1986 if (start == addr) 1987 start += PAGE_SIZE; 1988 #endif 1989 1990 if (start < map->min_offset || end >= map->max_offset || end < start) 1991 return EINVAL; 1992 1993 error = uvm_mapanon(map, &start, end - start, 0, flags); 1994 if (error != 0) 1995 printf("map stack for pid %d failed\n", p->p_p->ps_pid); 1996 1997 return error; 1998 } 1999 2000 /* 2001 * uvm_map_pie: return a random load address for a PIE executable 2002 * properly aligned. 2003 */ 2004 #ifndef VM_PIE_MAX_ADDR 2005 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 2006 #endif 2007 2008 #ifndef VM_PIE_MIN_ADDR 2009 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 2010 #endif 2011 2012 #ifndef VM_PIE_MIN_ALIGN 2013 #define VM_PIE_MIN_ALIGN PAGE_SIZE 2014 #endif 2015 2016 vaddr_t 2017 uvm_map_pie(vaddr_t align) 2018 { 2019 vaddr_t addr, space, min; 2020 2021 align = MAX(align, VM_PIE_MIN_ALIGN); 2022 2023 /* round up to next alignment */ 2024 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 2025 2026 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 2027 return (align); 2028 2029 space = (VM_PIE_MAX_ADDR - min) / align; 2030 space = MIN(space, (u_int32_t)-1); 2031 2032 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 2033 addr += min; 2034 2035 return (addr); 2036 } 2037 2038 void 2039 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 2040 { 2041 struct uvm_map_deadq dead; 2042 2043 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 2044 (end & (vaddr_t)PAGE_MASK) == 0); 2045 TAILQ_INIT(&dead); 2046 vm_map_lock(map); 2047 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); 2048 vm_map_unlock(map); 2049 2050 if (map->flags & VM_MAP_INTRSAFE) 2051 uvm_unmap_detach_intrsafe(&dead); 2052 else 2053 uvm_unmap_detach(&dead, 0); 2054 } 2055 2056 /* 2057 * Mark entry as free. 2058 * 2059 * entry will be put on the dead list. 2060 * The free space will be merged into the previous or a new entry, 2061 * unless markfree is false. 2062 */ 2063 void 2064 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 2065 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 2066 boolean_t markfree) 2067 { 2068 struct uvm_addr_state *free; 2069 struct vm_map_entry *prev; 2070 vaddr_t addr; /* Start of freed range. */ 2071 vaddr_t end; /* End of freed range. */ 2072 2073 prev = *prev_ptr; 2074 if (prev == entry) 2075 *prev_ptr = prev = NULL; 2076 2077 if (prev == NULL || 2078 VMMAP_FREE_END(prev) != entry->start) 2079 prev = RBT_PREV(uvm_map_addr, entry); 2080 2081 /* Entry is describing only free memory and has nothing to drain into. */ 2082 if (prev == NULL && entry->start == entry->end && markfree) { 2083 *prev_ptr = entry; 2084 return; 2085 } 2086 2087 addr = entry->start; 2088 end = VMMAP_FREE_END(entry); 2089 free = uvm_map_uaddr_e(map, entry); 2090 uvm_mapent_free_remove(map, free, entry); 2091 uvm_mapent_addr_remove(map, entry); 2092 DEAD_ENTRY_PUSH(dead, entry); 2093 2094 if (markfree) { 2095 if (prev) { 2096 free = uvm_map_uaddr_e(map, prev); 2097 uvm_mapent_free_remove(map, free, prev); 2098 } 2099 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 2100 } 2101 } 2102 2103 /* 2104 * Unwire and release referenced amap and object from map entry. 2105 */ 2106 void 2107 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 2108 { 2109 /* Unwire removed map entry. */ 2110 if (VM_MAPENT_ISWIRED(entry)) { 2111 KERNEL_LOCK(); 2112 entry->wired_count = 0; 2113 uvm_fault_unwire_locked(map, entry->start, entry->end); 2114 KERNEL_UNLOCK(); 2115 } 2116 2117 /* Entry-type specific code. */ 2118 if (UVM_ET_ISHOLE(entry)) { 2119 /* Nothing to be done for holes. */ 2120 } else if (map->flags & VM_MAP_INTRSAFE) { 2121 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2122 uvm_km_pgremove_intrsafe(entry->start, entry->end); 2123 pmap_kremove(entry->start, entry->end - entry->start); 2124 } else if (UVM_ET_ISOBJ(entry) && 2125 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2126 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2127 /* 2128 * Note: kernel object mappings are currently used in 2129 * two ways: 2130 * [1] "normal" mappings of pages in the kernel object 2131 * [2] uvm_km_valloc'd allocations in which we 2132 * pmap_enter in some non-kernel-object page 2133 * (e.g. vmapbuf). 2134 * 2135 * for case [1], we need to remove the mapping from 2136 * the pmap and then remove the page from the kernel 2137 * object (because, once pages in a kernel object are 2138 * unmapped they are no longer needed, unlike, say, 2139 * a vnode where you might want the data to persist 2140 * until flushed out of a queue). 2141 * 2142 * for case [2], we need to remove the mapping from 2143 * the pmap. there shouldn't be any pages at the 2144 * specified offset in the kernel object [but it 2145 * doesn't hurt to call uvm_km_pgremove just to be 2146 * safe?] 2147 * 2148 * uvm_km_pgremove currently does the following: 2149 * for pages in the kernel object range: 2150 * - drops the swap slot 2151 * - uvm_pagefree the page 2152 * 2153 * note there is version of uvm_km_pgremove() that 2154 * is used for "intrsafe" objects. 2155 */ 2156 /* 2157 * remove mappings from pmap and drop the pages 2158 * from the object. offsets are always relative 2159 * to vm_map_min(kernel_map). 2160 */ 2161 pmap_remove(pmap_kernel(), entry->start, entry->end); 2162 uvm_km_pgremove(entry->object.uvm_obj, 2163 entry->start - vm_map_min(kernel_map), 2164 entry->end - vm_map_min(kernel_map)); 2165 2166 /* 2167 * null out kernel_object reference, we've just 2168 * dropped it 2169 */ 2170 entry->etype &= ~UVM_ET_OBJ; 2171 entry->object.uvm_obj = NULL; /* to be safe */ 2172 } else { 2173 /* remove mappings the standard way. */ 2174 pmap_remove(map->pmap, entry->start, entry->end); 2175 } 2176 } 2177 2178 /* 2179 * Remove all entries from start to end. 2180 * 2181 * If remove_holes, then remove ET_HOLE entries as well. 2182 * If markfree, entry will be properly marked free, otherwise, no replacement 2183 * entry will be put in the tree (corrupting the tree). 2184 */ 2185 void 2186 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2187 struct uvm_map_deadq *dead, boolean_t remove_holes, 2188 boolean_t markfree) 2189 { 2190 struct vm_map_entry *prev_hint, *next, *entry; 2191 2192 start = MAX(start, map->min_offset); 2193 end = MIN(end, map->max_offset); 2194 if (start >= end) 2195 return; 2196 2197 if ((map->flags & VM_MAP_INTRSAFE) == 0) 2198 splassert(IPL_NONE); 2199 else 2200 splassert(IPL_VM); 2201 2202 /* Find first affected entry. */ 2203 entry = uvm_map_entrybyaddr(&map->addr, start); 2204 KDASSERT(entry != NULL && entry->start <= start); 2205 if (entry->end <= start && markfree) 2206 entry = RBT_NEXT(uvm_map_addr, entry); 2207 else 2208 UVM_MAP_CLIP_START(map, entry, start); 2209 2210 /* 2211 * Iterate entries until we reach end address. 2212 * prev_hint hints where the freed space can be appended to. 2213 */ 2214 prev_hint = NULL; 2215 for (; entry != NULL && entry->start < end; entry = next) { 2216 KDASSERT(entry->start >= start); 2217 if (entry->end > end || !markfree) 2218 UVM_MAP_CLIP_END(map, entry, end); 2219 KDASSERT(entry->start >= start && entry->end <= end); 2220 next = RBT_NEXT(uvm_map_addr, entry); 2221 2222 /* Don't remove holes unless asked to do so. */ 2223 if (UVM_ET_ISHOLE(entry)) { 2224 if (!remove_holes) { 2225 prev_hint = entry; 2226 continue; 2227 } 2228 } 2229 2230 /* A stack has been removed.. */ 2231 if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE)) 2232 map->sserial++; 2233 2234 /* Kill entry. */ 2235 uvm_unmap_kill_entry(map, entry); 2236 2237 /* Update space usage. */ 2238 if ((map->flags & VM_MAP_ISVMSPACE) && 2239 entry->object.uvm_obj == NULL && 2240 entry->protection != PROT_NONE && 2241 !UVM_ET_ISHOLE(entry)) { 2242 ((struct vmspace *)map)->vm_dused -= 2243 uvmspace_dused(map, entry->start, entry->end); 2244 } 2245 if (!UVM_ET_ISHOLE(entry)) 2246 map->size -= entry->end - entry->start; 2247 2248 /* Actual removal of entry. */ 2249 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 2250 } 2251 2252 pmap_update(vm_map_pmap(map)); 2253 2254 #ifdef VMMAP_DEBUG 2255 if (markfree) { 2256 for (entry = uvm_map_entrybyaddr(&map->addr, start); 2257 entry != NULL && entry->start < end; 2258 entry = RBT_NEXT(uvm_map_addr, entry)) { 2259 KDASSERT(entry->end <= start || 2260 entry->start == entry->end || 2261 UVM_ET_ISHOLE(entry)); 2262 } 2263 } else { 2264 vaddr_t a; 2265 for (a = start; a < end; a += PAGE_SIZE) 2266 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 2267 } 2268 #endif 2269 } 2270 2271 /* 2272 * Mark all entries from first until end (exclusive) as pageable. 2273 * 2274 * Lock must be exclusive on entry and will not be touched. 2275 */ 2276 void 2277 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 2278 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 2279 { 2280 struct vm_map_entry *iter; 2281 2282 for (iter = first; iter != end; 2283 iter = RBT_NEXT(uvm_map_addr, iter)) { 2284 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2285 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2286 continue; 2287 2288 iter->wired_count = 0; 2289 uvm_fault_unwire_locked(map, iter->start, iter->end); 2290 } 2291 } 2292 2293 /* 2294 * Mark all entries from first until end (exclusive) as wired. 2295 * 2296 * Lockflags determines the lock state on return from this function. 2297 * Lock must be exclusive on entry. 2298 */ 2299 int 2300 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 2301 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 2302 int lockflags) 2303 { 2304 struct vm_map_entry *iter; 2305 #ifdef DIAGNOSTIC 2306 unsigned int timestamp_save; 2307 #endif 2308 int error; 2309 2310 /* 2311 * Wire pages in two passes: 2312 * 2313 * 1: holding the write lock, we create any anonymous maps that need 2314 * to be created. then we clip each map entry to the region to 2315 * be wired and increment its wiring count. 2316 * 2317 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2318 * in the pages for any newly wired area (wired_count == 1). 2319 * 2320 * downgrading to a read lock for uvm_fault_wire avoids a possible 2321 * deadlock with another thread that may have faulted on one of 2322 * the pages to be wired (it would mark the page busy, blocking 2323 * us, then in turn block on the map lock that we hold). 2324 * because we keep the read lock on the map, the copy-on-write 2325 * status of the entries we modify here cannot change. 2326 */ 2327 for (iter = first; iter != end; 2328 iter = RBT_NEXT(uvm_map_addr, iter)) { 2329 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2330 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2331 iter->protection == PROT_NONE) 2332 continue; 2333 2334 /* 2335 * Perform actions of vm_map_lookup that need the write lock. 2336 * - create an anonymous map for copy-on-write 2337 * - anonymous map for zero-fill 2338 * Skip submaps. 2339 */ 2340 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 2341 UVM_ET_ISNEEDSCOPY(iter) && 2342 ((iter->protection & PROT_WRITE) || 2343 iter->object.uvm_obj == NULL)) { 2344 amap_copy(map, iter, M_WAITOK, 2345 UVM_ET_ISSTACK(iter) ? FALSE : TRUE, 2346 iter->start, iter->end); 2347 } 2348 iter->wired_count++; 2349 } 2350 2351 /* 2352 * Pass 2. 2353 */ 2354 #ifdef DIAGNOSTIC 2355 timestamp_save = map->timestamp; 2356 #endif 2357 vm_map_busy(map); 2358 vm_map_downgrade(map); 2359 2360 error = 0; 2361 for (iter = first; error == 0 && iter != end; 2362 iter = RBT_NEXT(uvm_map_addr, iter)) { 2363 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2364 iter->protection == PROT_NONE) 2365 continue; 2366 2367 error = uvm_fault_wire(map, iter->start, iter->end, 2368 iter->protection); 2369 } 2370 2371 if (error) { 2372 /* 2373 * uvm_fault_wire failure 2374 * 2375 * Reacquire lock and undo our work. 2376 */ 2377 vm_map_upgrade(map); 2378 vm_map_unbusy(map); 2379 #ifdef DIAGNOSTIC 2380 if (timestamp_save != map->timestamp) 2381 panic("uvm_map_pageable_wire: stale map"); 2382 #endif 2383 2384 /* 2385 * first is no longer needed to restart loops. 2386 * Use it as iterator to unmap successful mappings. 2387 */ 2388 for (; first != iter; 2389 first = RBT_NEXT(uvm_map_addr, first)) { 2390 if (UVM_ET_ISHOLE(first) || 2391 first->start == first->end || 2392 first->protection == PROT_NONE) 2393 continue; 2394 2395 first->wired_count--; 2396 if (!VM_MAPENT_ISWIRED(first)) { 2397 uvm_fault_unwire_locked(map, 2398 iter->start, iter->end); 2399 } 2400 } 2401 2402 /* decrease counter in the rest of the entries */ 2403 for (; iter != end; 2404 iter = RBT_NEXT(uvm_map_addr, iter)) { 2405 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2406 iter->protection == PROT_NONE) 2407 continue; 2408 2409 iter->wired_count--; 2410 } 2411 2412 if ((lockflags & UVM_LK_EXIT) == 0) 2413 vm_map_unlock(map); 2414 return error; 2415 } 2416 2417 /* We are currently holding a read lock. */ 2418 if ((lockflags & UVM_LK_EXIT) == 0) { 2419 vm_map_unbusy(map); 2420 vm_map_unlock_read(map); 2421 } else { 2422 vm_map_upgrade(map); 2423 vm_map_unbusy(map); 2424 #ifdef DIAGNOSTIC 2425 if (timestamp_save != map->timestamp) 2426 panic("uvm_map_pageable_wire: stale map"); 2427 #endif 2428 } 2429 return 0; 2430 } 2431 2432 /* 2433 * uvm_map_pageable: set pageability of a range in a map. 2434 * 2435 * Flags: 2436 * UVM_LK_ENTER: map is already locked by caller 2437 * UVM_LK_EXIT: don't unlock map on exit 2438 * 2439 * The full range must be in use (entries may not have fspace != 0). 2440 * UVM_ET_HOLE counts as unmapped. 2441 */ 2442 int 2443 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2444 boolean_t new_pageable, int lockflags) 2445 { 2446 struct vm_map_entry *first, *last, *tmp; 2447 int error; 2448 2449 start = trunc_page(start); 2450 end = round_page(end); 2451 2452 if (start > end) 2453 return EINVAL; 2454 if (start == end) 2455 return 0; /* nothing to do */ 2456 if (start < map->min_offset) 2457 return EFAULT; /* why? see first XXX below */ 2458 if (end > map->max_offset) 2459 return EINVAL; /* why? see second XXX below */ 2460 2461 KASSERT(map->flags & VM_MAP_PAGEABLE); 2462 if ((lockflags & UVM_LK_ENTER) == 0) 2463 vm_map_lock(map); 2464 2465 /* 2466 * Find first entry. 2467 * 2468 * Initial test on start is different, because of the different 2469 * error returned. Rest is tested further down. 2470 */ 2471 first = uvm_map_entrybyaddr(&map->addr, start); 2472 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2473 /* 2474 * XXX if the first address is not mapped, it is EFAULT? 2475 */ 2476 error = EFAULT; 2477 goto out; 2478 } 2479 2480 /* Check that the range has no holes. */ 2481 for (last = first; last != NULL && last->start < end; 2482 last = RBT_NEXT(uvm_map_addr, last)) { 2483 if (UVM_ET_ISHOLE(last) || 2484 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2485 /* 2486 * XXX unmapped memory in range, why is it EINVAL 2487 * instead of EFAULT? 2488 */ 2489 error = EINVAL; 2490 goto out; 2491 } 2492 } 2493 2494 /* 2495 * Last ended at the first entry after the range. 2496 * Move back one step. 2497 * 2498 * Note that last may be NULL. 2499 */ 2500 if (last == NULL) { 2501 last = RBT_MAX(uvm_map_addr, &map->addr); 2502 if (last->end < end) { 2503 error = EINVAL; 2504 goto out; 2505 } 2506 } else { 2507 KASSERT(last != first); 2508 last = RBT_PREV(uvm_map_addr, last); 2509 } 2510 2511 /* Wire/unwire pages here. */ 2512 if (new_pageable) { 2513 /* 2514 * Mark pageable. 2515 * entries that are not wired are untouched. 2516 */ 2517 if (VM_MAPENT_ISWIRED(first)) 2518 UVM_MAP_CLIP_START(map, first, start); 2519 /* 2520 * Split last at end. 2521 * Make tmp be the first entry after what is to be touched. 2522 * If last is not wired, don't touch it. 2523 */ 2524 if (VM_MAPENT_ISWIRED(last)) { 2525 UVM_MAP_CLIP_END(map, last, end); 2526 tmp = RBT_NEXT(uvm_map_addr, last); 2527 } else 2528 tmp = last; 2529 2530 uvm_map_pageable_pgon(map, first, tmp, start, end); 2531 error = 0; 2532 2533 out: 2534 if ((lockflags & UVM_LK_EXIT) == 0) 2535 vm_map_unlock(map); 2536 return error; 2537 } else { 2538 /* 2539 * Mark entries wired. 2540 * entries are always touched (because recovery needs this). 2541 */ 2542 if (!VM_MAPENT_ISWIRED(first)) 2543 UVM_MAP_CLIP_START(map, first, start); 2544 /* 2545 * Split last at end. 2546 * Make tmp be the first entry after what is to be touched. 2547 * If last is not wired, don't touch it. 2548 */ 2549 if (!VM_MAPENT_ISWIRED(last)) { 2550 UVM_MAP_CLIP_END(map, last, end); 2551 tmp = RBT_NEXT(uvm_map_addr, last); 2552 } else 2553 tmp = last; 2554 2555 return uvm_map_pageable_wire(map, first, tmp, start, end, 2556 lockflags); 2557 } 2558 } 2559 2560 /* 2561 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2562 * all mapped regions. 2563 * 2564 * Map must not be locked. 2565 * If no flags are specified, all ragions are unwired. 2566 */ 2567 int 2568 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2569 { 2570 vsize_t size; 2571 struct vm_map_entry *iter; 2572 2573 KASSERT(map->flags & VM_MAP_PAGEABLE); 2574 vm_map_lock(map); 2575 2576 if (flags == 0) { 2577 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr), 2578 NULL, map->min_offset, map->max_offset); 2579 2580 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2581 vm_map_unlock(map); 2582 return 0; 2583 } 2584 2585 if (flags & MCL_FUTURE) 2586 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2587 if (!(flags & MCL_CURRENT)) { 2588 vm_map_unlock(map); 2589 return 0; 2590 } 2591 2592 /* 2593 * Count number of pages in all non-wired entries. 2594 * If the number exceeds the limit, abort. 2595 */ 2596 size = 0; 2597 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2598 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2599 continue; 2600 2601 size += iter->end - iter->start; 2602 } 2603 2604 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2605 vm_map_unlock(map); 2606 return ENOMEM; 2607 } 2608 2609 /* XXX non-pmap_wired_count case must be handled by caller */ 2610 #ifdef pmap_wired_count 2611 if (limit != 0 && 2612 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2613 vm_map_unlock(map); 2614 return ENOMEM; 2615 } 2616 #endif 2617 2618 /* 2619 * uvm_map_pageable_wire will release lock 2620 */ 2621 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr), 2622 NULL, map->min_offset, map->max_offset, 0); 2623 } 2624 2625 /* 2626 * Initialize map. 2627 * 2628 * Allocates sufficient entries to describe the free memory in the map. 2629 */ 2630 void 2631 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max, 2632 int flags) 2633 { 2634 int i; 2635 2636 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2637 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2638 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2639 2640 /* 2641 * Update parameters. 2642 * 2643 * This code handles (vaddr_t)-1 and other page mask ending addresses 2644 * properly. 2645 * We lose the top page if the full virtual address space is used. 2646 */ 2647 if (max & (vaddr_t)PAGE_MASK) { 2648 max += 1; 2649 if (max == 0) /* overflow */ 2650 max -= PAGE_SIZE; 2651 } 2652 2653 RBT_INIT(uvm_map_addr, &map->addr); 2654 map->uaddr_exe = NULL; 2655 for (i = 0; i < nitems(map->uaddr_any); ++i) 2656 map->uaddr_any[i] = NULL; 2657 map->uaddr_brk_stack = NULL; 2658 2659 map->pmap = pmap; 2660 map->size = 0; 2661 map->ref_count = 0; 2662 map->min_offset = min; 2663 map->max_offset = max; 2664 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2665 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2666 map->flags = flags; 2667 map->timestamp = 0; 2668 if (flags & VM_MAP_ISVMSPACE) 2669 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK); 2670 else 2671 rw_init(&map->lock, "kmmaplk"); 2672 mtx_init(&map->mtx, IPL_VM); 2673 mtx_init(&map->flags_lock, IPL_VM); 2674 2675 /* Configure the allocators. */ 2676 if (flags & VM_MAP_ISVMSPACE) 2677 uvm_map_setup_md(map); 2678 else 2679 map->uaddr_any[3] = &uaddr_kbootstrap; 2680 2681 /* 2682 * Fill map entries. 2683 * We do not need to write-lock the map here because only the current 2684 * thread sees it right now. Initialize ref_count to 0 above to avoid 2685 * bogus triggering of lock-not-held assertions. 2686 */ 2687 uvm_map_setup_entries(map); 2688 uvm_tree_sanity(map, __FILE__, __LINE__); 2689 map->ref_count = 1; 2690 } 2691 2692 /* 2693 * Destroy the map. 2694 * 2695 * This is the inverse operation to uvm_map_setup. 2696 */ 2697 void 2698 uvm_map_teardown(struct vm_map *map) 2699 { 2700 struct uvm_map_deadq dead_entries; 2701 struct vm_map_entry *entry, *tmp; 2702 #ifdef VMMAP_DEBUG 2703 size_t numq, numt; 2704 #endif 2705 int i; 2706 2707 KERNEL_ASSERT_LOCKED(); 2708 KERNEL_UNLOCK(); 2709 KERNEL_ASSERT_UNLOCKED(); 2710 2711 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2712 2713 /* Remove address selectors. */ 2714 uvm_addr_destroy(map->uaddr_exe); 2715 map->uaddr_exe = NULL; 2716 for (i = 0; i < nitems(map->uaddr_any); i++) { 2717 uvm_addr_destroy(map->uaddr_any[i]); 2718 map->uaddr_any[i] = NULL; 2719 } 2720 uvm_addr_destroy(map->uaddr_brk_stack); 2721 map->uaddr_brk_stack = NULL; 2722 2723 /* 2724 * Remove entries. 2725 * 2726 * The following is based on graph breadth-first search. 2727 * 2728 * In color terms: 2729 * - the dead_entries set contains all nodes that are reachable 2730 * (i.e. both the black and the grey nodes) 2731 * - any entry not in dead_entries is white 2732 * - any entry that appears in dead_entries before entry, 2733 * is black, the rest is grey. 2734 * The set [entry, end] is also referred to as the wavefront. 2735 * 2736 * Since the tree is always a fully connected graph, the breadth-first 2737 * search guarantees that each vmmap_entry is visited exactly once. 2738 * The vm_map is broken down in linear time. 2739 */ 2740 TAILQ_INIT(&dead_entries); 2741 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL) 2742 DEAD_ENTRY_PUSH(&dead_entries, entry); 2743 while (entry != NULL) { 2744 sched_pause(yield); 2745 uvm_unmap_kill_entry(map, entry); 2746 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL) 2747 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2748 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 2749 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2750 /* Update wave-front. */ 2751 entry = TAILQ_NEXT(entry, dfree.deadq); 2752 } 2753 2754 #ifdef VMMAP_DEBUG 2755 numt = numq = 0; 2756 RBT_FOREACH(entry, uvm_map_addr, &map->addr) 2757 numt++; 2758 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2759 numq++; 2760 KASSERT(numt == numq); 2761 #endif 2762 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); 2763 2764 KERNEL_LOCK(); 2765 2766 pmap_destroy(map->pmap); 2767 map->pmap = NULL; 2768 } 2769 2770 /* 2771 * Populate map with free-memory entries. 2772 * 2773 * Map must be initialized and empty. 2774 */ 2775 void 2776 uvm_map_setup_entries(struct vm_map *map) 2777 { 2778 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 2779 2780 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2781 } 2782 2783 /* 2784 * Split entry at given address. 2785 * 2786 * orig: entry that is to be split. 2787 * next: a newly allocated map entry that is not linked. 2788 * split: address at which the split is done. 2789 */ 2790 void 2791 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2792 struct vm_map_entry *next, vaddr_t split) 2793 { 2794 struct uvm_addr_state *free, *free_before; 2795 vsize_t adj; 2796 2797 if ((split & PAGE_MASK) != 0) { 2798 panic("uvm_map_splitentry: split address 0x%lx " 2799 "not on page boundary!", split); 2800 } 2801 KDASSERT(map != NULL && orig != NULL && next != NULL); 2802 uvm_tree_sanity(map, __FILE__, __LINE__); 2803 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2804 2805 #ifdef VMMAP_DEBUG 2806 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig); 2807 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next); 2808 #endif /* VMMAP_DEBUG */ 2809 2810 /* 2811 * Free space will change, unlink from free space tree. 2812 */ 2813 free = uvm_map_uaddr_e(map, orig); 2814 uvm_mapent_free_remove(map, free, orig); 2815 2816 adj = split - orig->start; 2817 2818 uvm_mapent_copy(orig, next); 2819 if (split >= orig->end) { 2820 next->etype = 0; 2821 next->offset = 0; 2822 next->wired_count = 0; 2823 next->start = next->end = split; 2824 next->guard = 0; 2825 next->fspace = VMMAP_FREE_END(orig) - split; 2826 next->aref.ar_amap = NULL; 2827 next->aref.ar_pageoff = 0; 2828 orig->guard = MIN(orig->guard, split - orig->end); 2829 orig->fspace = split - VMMAP_FREE_START(orig); 2830 } else { 2831 orig->fspace = 0; 2832 orig->guard = 0; 2833 orig->end = next->start = split; 2834 2835 if (next->aref.ar_amap) { 2836 KERNEL_LOCK(); 2837 amap_splitref(&orig->aref, &next->aref, adj); 2838 KERNEL_UNLOCK(); 2839 } 2840 if (UVM_ET_ISSUBMAP(orig)) { 2841 uvm_map_reference(next->object.sub_map); 2842 next->offset += adj; 2843 } else if (UVM_ET_ISOBJ(orig)) { 2844 if (next->object.uvm_obj->pgops && 2845 next->object.uvm_obj->pgops->pgo_reference) { 2846 KERNEL_LOCK(); 2847 next->object.uvm_obj->pgops->pgo_reference( 2848 next->object.uvm_obj); 2849 KERNEL_UNLOCK(); 2850 } 2851 next->offset += adj; 2852 } 2853 } 2854 2855 /* 2856 * Link next into address tree. 2857 * Link orig and next into free-space tree. 2858 * 2859 * Don't insert 'next' into the addr tree until orig has been linked, 2860 * in case the free-list looks at adjecent entries in the addr tree 2861 * for its decisions. 2862 */ 2863 if (orig->fspace > 0) 2864 free_before = free; 2865 else 2866 free_before = uvm_map_uaddr_e(map, orig); 2867 uvm_mapent_free_insert(map, free_before, orig); 2868 uvm_mapent_addr_insert(map, next); 2869 uvm_mapent_free_insert(map, free, next); 2870 2871 uvm_tree_sanity(map, __FILE__, __LINE__); 2872 } 2873 2874 2875 #ifdef VMMAP_DEBUG 2876 2877 void 2878 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2879 char *file, int line) 2880 { 2881 char* map_special; 2882 2883 if (test) 2884 return; 2885 2886 if (map == kernel_map) 2887 map_special = " (kernel_map)"; 2888 else if (map == kmem_map) 2889 map_special = " (kmem_map)"; 2890 else 2891 map_special = ""; 2892 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2893 line, test_str); 2894 } 2895 2896 /* 2897 * Check that map is sane. 2898 */ 2899 void 2900 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2901 { 2902 struct vm_map_entry *iter; 2903 vaddr_t addr; 2904 vaddr_t min, max, bound; /* Bounds checker. */ 2905 struct uvm_addr_state *free; 2906 2907 addr = vm_map_min(map); 2908 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2909 /* 2910 * Valid start, end. 2911 * Catch overflow for end+fspace. 2912 */ 2913 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2914 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2915 2916 /* May not be empty. */ 2917 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2918 file, line); 2919 2920 /* Addresses for entry must lie within map boundaries. */ 2921 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2922 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2923 2924 /* Tree may not have gaps. */ 2925 UVM_ASSERT(map, iter->start == addr, file, line); 2926 addr = VMMAP_FREE_END(iter); 2927 2928 /* 2929 * Free space may not cross boundaries, unless the same 2930 * free list is used on both sides of the border. 2931 */ 2932 min = VMMAP_FREE_START(iter); 2933 max = VMMAP_FREE_END(iter); 2934 2935 while (min < max && 2936 (bound = uvm_map_boundary(map, min, max)) != max) { 2937 UVM_ASSERT(map, 2938 uvm_map_uaddr(map, bound - 1) == 2939 uvm_map_uaddr(map, bound), 2940 file, line); 2941 min = bound; 2942 } 2943 2944 free = uvm_map_uaddr_e(map, iter); 2945 if (free) { 2946 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2947 file, line); 2948 } else { 2949 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2950 file, line); 2951 } 2952 } 2953 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2954 } 2955 2956 void 2957 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2958 { 2959 struct vm_map_entry *iter; 2960 vsize_t size; 2961 2962 size = 0; 2963 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2964 if (!UVM_ET_ISHOLE(iter)) 2965 size += iter->end - iter->start; 2966 } 2967 2968 if (map->size != size) 2969 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2970 UVM_ASSERT(map, map->size == size, file, line); 2971 2972 vmspace_validate(map); 2973 } 2974 2975 /* 2976 * This function validates the statistics on vmspace. 2977 */ 2978 void 2979 vmspace_validate(struct vm_map *map) 2980 { 2981 struct vmspace *vm; 2982 struct vm_map_entry *iter; 2983 vaddr_t imin, imax; 2984 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2985 vsize_t stack, heap; /* Measured sizes. */ 2986 2987 if (!(map->flags & VM_MAP_ISVMSPACE)) 2988 return; 2989 2990 vm = (struct vmspace *)map; 2991 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2992 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2993 2994 stack = heap = 0; 2995 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2996 imin = imax = iter->start; 2997 2998 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL || 2999 iter->prot != PROT_NONE) 3000 continue; 3001 3002 /* 3003 * Update stack, heap. 3004 * Keep in mind that (theoretically) the entries of 3005 * userspace and stack may be joined. 3006 */ 3007 while (imin != iter->end) { 3008 /* 3009 * Set imax to the first boundary crossed between 3010 * imin and stack addresses. 3011 */ 3012 imax = iter->end; 3013 if (imin < stack_begin && imax > stack_begin) 3014 imax = stack_begin; 3015 else if (imin < stack_end && imax > stack_end) 3016 imax = stack_end; 3017 3018 if (imin >= stack_begin && imin < stack_end) 3019 stack += imax - imin; 3020 else 3021 heap += imax - imin; 3022 imin = imax; 3023 } 3024 } 3025 3026 heap >>= PAGE_SHIFT; 3027 if (heap != vm->vm_dused) { 3028 printf("vmspace stack range: 0x%lx-0x%lx\n", 3029 stack_begin, stack_end); 3030 panic("vmspace_validate: vmspace.vm_dused invalid, " 3031 "expected %ld pgs, got %ld pgs in map %p", 3032 heap, vm->vm_dused, 3033 map); 3034 } 3035 } 3036 3037 #endif /* VMMAP_DEBUG */ 3038 3039 /* 3040 * uvm_map_init: init mapping system at boot time. note that we allocate 3041 * and init the static pool of structs vm_map_entry for the kernel here. 3042 */ 3043 void 3044 uvm_map_init(void) 3045 { 3046 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 3047 int lcv; 3048 3049 /* now set up static pool of kernel map entries ... */ 3050 mtx_init(&uvm_kmapent_mtx, IPL_VM); 3051 SLIST_INIT(&uvm.kentry_free); 3052 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 3053 SLIST_INSERT_HEAD(&uvm.kentry_free, 3054 &kernel_map_entry[lcv], daddrs.addr_kentry); 3055 } 3056 3057 /* initialize the map-related pools. */ 3058 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0, 3059 IPL_NONE, PR_WAITOK, "vmsppl", NULL); 3060 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 3061 IPL_VM, PR_WAITOK, "vmmpepl", NULL); 3062 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0, 3063 IPL_VM, 0, "vmmpekpl", NULL); 3064 pool_sethiwat(&uvm_map_entry_pool, 8192); 3065 3066 uvm_addr_init(); 3067 } 3068 3069 #if defined(DDB) 3070 3071 /* 3072 * DDB hooks 3073 */ 3074 3075 /* 3076 * uvm_map_printit: actually prints the map 3077 */ 3078 void 3079 uvm_map_printit(struct vm_map *map, boolean_t full, 3080 int (*pr)(const char *, ...)) 3081 { 3082 struct vmspace *vm; 3083 struct vm_map_entry *entry; 3084 struct uvm_addr_state *free; 3085 int in_free, i; 3086 char buf[8]; 3087 3088 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 3089 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 3090 map->b_start, map->b_end); 3091 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 3092 map->s_start, map->s_end); 3093 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 3094 map->size, map->ref_count, map->timestamp, 3095 map->flags); 3096 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 3097 pmap_resident_count(map->pmap)); 3098 3099 /* struct vmspace handling. */ 3100 if (map->flags & VM_MAP_ISVMSPACE) { 3101 vm = (struct vmspace *)map; 3102 3103 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 3104 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 3105 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 3106 vm->vm_tsize, vm->vm_dsize); 3107 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 3108 vm->vm_taddr, vm->vm_daddr); 3109 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 3110 vm->vm_maxsaddr, vm->vm_minsaddr); 3111 } 3112 3113 if (!full) 3114 goto print_uaddr; 3115 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 3116 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 3117 entry, entry->start, entry->end, entry->object.uvm_obj, 3118 (long long)entry->offset, entry->aref.ar_amap, 3119 entry->aref.ar_pageoff); 3120 (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, " 3121 "syscall=%c, prot(max)=%d/%d, inh=%d, " 3122 "wc=%d, adv=%d\n", 3123 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 3124 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 3125 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 3126 (entry->etype & UVM_ET_STACK) ? 'T' : 'F', 3127 (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F', 3128 entry->protection, entry->max_protection, 3129 entry->inheritance, entry->wired_count, entry->advice); 3130 3131 free = uvm_map_uaddr_e(map, entry); 3132 in_free = (free != NULL); 3133 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 3134 "free=0x%lx-0x%lx\n", 3135 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 3136 in_free ? 'T' : 'F', 3137 entry->guard, 3138 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 3139 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 3140 (*pr)("\tfreemapped=%c, uaddr=%p\n", 3141 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 3142 if (free) { 3143 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 3144 free->uaddr_minaddr, free->uaddr_maxaddr, 3145 free->uaddr_functions->uaddr_name); 3146 } 3147 } 3148 3149 print_uaddr: 3150 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 3151 for (i = 0; i < nitems(map->uaddr_any); i++) { 3152 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 3153 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 3154 } 3155 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 3156 } 3157 3158 /* 3159 * uvm_object_printit: actually prints the object 3160 */ 3161 void 3162 uvm_object_printit(uobj, full, pr) 3163 struct uvm_object *uobj; 3164 boolean_t full; 3165 int (*pr)(const char *, ...); 3166 { 3167 struct vm_page *pg; 3168 int cnt = 0; 3169 3170 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 3171 uobj, uobj->pgops, uobj->uo_npages); 3172 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 3173 (*pr)("refs=<SYSTEM>\n"); 3174 else 3175 (*pr)("refs=%d\n", uobj->uo_refs); 3176 3177 if (!full) { 3178 return; 3179 } 3180 (*pr)(" PAGES <pg,offset>:\n "); 3181 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) { 3182 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 3183 if ((cnt % 3) == 2) { 3184 (*pr)("\n "); 3185 } 3186 cnt++; 3187 } 3188 if ((cnt % 3) != 2) { 3189 (*pr)("\n"); 3190 } 3191 } 3192 3193 /* 3194 * uvm_page_printit: actually print the page 3195 */ 3196 static const char page_flagbits[] = 3197 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 3198 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ" 3199 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5"; 3200 3201 void 3202 uvm_page_printit(pg, full, pr) 3203 struct vm_page *pg; 3204 boolean_t full; 3205 int (*pr)(const char *, ...); 3206 { 3207 struct vm_page *tpg; 3208 struct uvm_object *uobj; 3209 struct pglist *pgl; 3210 3211 (*pr)("PAGE %p:\n", pg); 3212 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 3213 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 3214 (long long)pg->phys_addr); 3215 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 3216 pg->uobject, pg->uanon, (long long)pg->offset); 3217 #if defined(UVM_PAGE_TRKOWN) 3218 if (pg->pg_flags & PG_BUSY) 3219 (*pr)(" owning thread = %d, tag=%s", 3220 pg->owner, pg->owner_tag); 3221 else 3222 (*pr)(" page not busy, no owner"); 3223 #else 3224 (*pr)(" [page ownership tracking disabled]"); 3225 #endif 3226 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 3227 3228 if (!full) 3229 return; 3230 3231 /* cross-verify object/anon */ 3232 if ((pg->pg_flags & PQ_FREE) == 0) { 3233 if (pg->pg_flags & PQ_ANON) { 3234 if (pg->uanon == NULL || pg->uanon->an_page != pg) 3235 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 3236 (pg->uanon) ? pg->uanon->an_page : NULL); 3237 else 3238 (*pr)(" anon backpointer is OK\n"); 3239 } else { 3240 uobj = pg->uobject; 3241 if (uobj) { 3242 (*pr)(" checking object list\n"); 3243 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) { 3244 if (tpg == pg) { 3245 break; 3246 } 3247 } 3248 if (tpg) 3249 (*pr)(" page found on object list\n"); 3250 else 3251 (*pr)(" >>> PAGE NOT FOUND " 3252 "ON OBJECT LIST! <<<\n"); 3253 } 3254 } 3255 } 3256 3257 /* cross-verify page queue */ 3258 if (pg->pg_flags & PQ_FREE) { 3259 if (uvm_pmr_isfree(pg)) 3260 (*pr)(" page found in uvm_pmemrange\n"); 3261 else 3262 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 3263 pgl = NULL; 3264 } else if (pg->pg_flags & PQ_INACTIVE) { 3265 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 3266 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 3267 } else if (pg->pg_flags & PQ_ACTIVE) { 3268 pgl = &uvm.page_active; 3269 } else { 3270 pgl = NULL; 3271 } 3272 3273 if (pgl) { 3274 (*pr)(" checking pageq list\n"); 3275 TAILQ_FOREACH(tpg, pgl, pageq) { 3276 if (tpg == pg) { 3277 break; 3278 } 3279 } 3280 if (tpg) 3281 (*pr)(" page found on pageq list\n"); 3282 else 3283 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3284 } 3285 } 3286 #endif 3287 3288 /* 3289 * uvm_map_protect: change map protection 3290 * 3291 * => set_max means set max_protection. 3292 * => map must be unlocked. 3293 */ 3294 int 3295 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3296 vm_prot_t new_prot, boolean_t set_max) 3297 { 3298 struct vm_map_entry *first, *iter; 3299 vm_prot_t old_prot; 3300 vm_prot_t mask; 3301 vsize_t dused; 3302 int error; 3303 3304 if (start > end) 3305 return EINVAL; 3306 start = MAX(start, map->min_offset); 3307 end = MIN(end, map->max_offset); 3308 if (start >= end) 3309 return 0; 3310 3311 dused = 0; 3312 error = 0; 3313 vm_map_lock(map); 3314 3315 /* 3316 * Set up first and last. 3317 * - first will contain first entry at or after start. 3318 */ 3319 first = uvm_map_entrybyaddr(&map->addr, start); 3320 KDASSERT(first != NULL); 3321 if (first->end <= start) 3322 first = RBT_NEXT(uvm_map_addr, first); 3323 3324 /* First, check for protection violations. */ 3325 for (iter = first; iter != NULL && iter->start < end; 3326 iter = RBT_NEXT(uvm_map_addr, iter)) { 3327 /* Treat memory holes as free space. */ 3328 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3329 continue; 3330 3331 old_prot = iter->protection; 3332 if (old_prot == PROT_NONE && new_prot != old_prot) { 3333 dused += uvmspace_dused( 3334 map, MAX(start, iter->start), MIN(end, iter->end)); 3335 } 3336 3337 if (UVM_ET_ISSUBMAP(iter)) { 3338 error = EINVAL; 3339 goto out; 3340 } 3341 if ((new_prot & iter->max_protection) != new_prot) { 3342 error = EACCES; 3343 goto out; 3344 } 3345 if (map == kernel_map && 3346 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 3347 panic("uvm_map_protect: kernel map W^X violation requested"); 3348 } 3349 3350 /* Check limits. */ 3351 if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) { 3352 vsize_t limit = lim_cur(RLIMIT_DATA); 3353 dused = ptoa(dused); 3354 if (limit < dused || 3355 limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) { 3356 error = ENOMEM; 3357 goto out; 3358 } 3359 } 3360 3361 /* Fix protections. */ 3362 for (iter = first; iter != NULL && iter->start < end; 3363 iter = RBT_NEXT(uvm_map_addr, iter)) { 3364 /* Treat memory holes as free space. */ 3365 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3366 continue; 3367 3368 old_prot = iter->protection; 3369 3370 /* 3371 * Skip adapting protection iff old and new protection 3372 * are equal. 3373 */ 3374 if (set_max) { 3375 if (old_prot == (new_prot & old_prot) && 3376 iter->max_protection == new_prot) 3377 continue; 3378 } else { 3379 if (old_prot == new_prot) 3380 continue; 3381 } 3382 3383 UVM_MAP_CLIP_START(map, iter, start); 3384 UVM_MAP_CLIP_END(map, iter, end); 3385 3386 if (set_max) { 3387 iter->max_protection = new_prot; 3388 iter->protection &= new_prot; 3389 } else 3390 iter->protection = new_prot; 3391 3392 /* 3393 * update physical map if necessary. worry about copy-on-write 3394 * here -- CHECK THIS XXX 3395 */ 3396 if (iter->protection != old_prot) { 3397 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3398 ~PROT_WRITE : PROT_MASK; 3399 3400 /* XXX should only wserial++ if no split occurs */ 3401 if (iter->protection & PROT_WRITE) 3402 map->wserial++; 3403 3404 if (map->flags & VM_MAP_ISVMSPACE) { 3405 if (old_prot == PROT_NONE) { 3406 ((struct vmspace *)map)->vm_dused += 3407 uvmspace_dused(map, iter->start, 3408 iter->end); 3409 } 3410 if (iter->protection == PROT_NONE) { 3411 ((struct vmspace *)map)->vm_dused -= 3412 uvmspace_dused(map, iter->start, 3413 iter->end); 3414 } 3415 } 3416 3417 /* update pmap */ 3418 if ((iter->protection & mask) == PROT_NONE && 3419 VM_MAPENT_ISWIRED(iter)) { 3420 /* 3421 * TODO(ariane) this is stupid. wired_count 3422 * is 0 if not wired, otherwise anything 3423 * larger than 0 (incremented once each time 3424 * wire is called). 3425 * Mostly to be able to undo the damage on 3426 * failure. Not the actually be a wired 3427 * refcounter... 3428 * Originally: iter->wired_count--; 3429 * (don't we have to unwire this in the pmap 3430 * as well?) 3431 */ 3432 iter->wired_count = 0; 3433 } 3434 pmap_protect(map->pmap, iter->start, iter->end, 3435 iter->protection & mask); 3436 } 3437 3438 /* 3439 * If the map is configured to lock any future mappings, 3440 * wire this entry now if the old protection was PROT_NONE 3441 * and the new protection is not PROT_NONE. 3442 */ 3443 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3444 VM_MAPENT_ISWIRED(iter) == 0 && 3445 old_prot == PROT_NONE && 3446 new_prot != PROT_NONE) { 3447 if (uvm_map_pageable(map, iter->start, iter->end, 3448 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3449 /* 3450 * If locking the entry fails, remember the 3451 * error if it's the first one. Note we 3452 * still continue setting the protection in 3453 * the map, but it will return the resource 3454 * storage condition regardless. 3455 * 3456 * XXX Ignore what the actual error is, 3457 * XXX just call it a resource shortage 3458 * XXX so that it doesn't get confused 3459 * XXX what uvm_map_protect() itself would 3460 * XXX normally return. 3461 */ 3462 error = ENOMEM; 3463 } 3464 } 3465 } 3466 pmap_update(map->pmap); 3467 3468 out: 3469 vm_map_unlock(map); 3470 return error; 3471 } 3472 3473 /* 3474 * uvmspace_alloc: allocate a vmspace structure. 3475 * 3476 * - structure includes vm_map and pmap 3477 * - XXX: no locking on this structure 3478 * - refcnt set to 1, rest must be init'd by caller 3479 */ 3480 struct vmspace * 3481 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3482 boolean_t remove_holes) 3483 { 3484 struct vmspace *vm; 3485 3486 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3487 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3488 return (vm); 3489 } 3490 3491 /* 3492 * uvmspace_init: initialize a vmspace structure. 3493 * 3494 * - XXX: no locking on this structure 3495 * - refcnt set to 1, rest must be init'd by caller 3496 */ 3497 void 3498 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3499 boolean_t pageable, boolean_t remove_holes) 3500 { 3501 KASSERT(pmap == NULL || pmap == pmap_kernel()); 3502 3503 if (pmap) 3504 pmap_reference(pmap); 3505 else 3506 pmap = pmap_create(); 3507 3508 uvm_map_setup(&vm->vm_map, pmap, min, max, 3509 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3510 3511 vm->vm_refcnt = 1; 3512 3513 if (remove_holes) 3514 pmap_remove_holes(vm); 3515 } 3516 3517 /* 3518 * uvmspace_share: share a vmspace between two processes 3519 * 3520 * - used for vfork 3521 */ 3522 3523 struct vmspace * 3524 uvmspace_share(struct process *pr) 3525 { 3526 struct vmspace *vm = pr->ps_vmspace; 3527 3528 uvmspace_addref(vm); 3529 return vm; 3530 } 3531 3532 /* 3533 * uvmspace_exec: the process wants to exec a new program 3534 * 3535 * - XXX: no locking on vmspace 3536 */ 3537 3538 void 3539 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3540 { 3541 struct process *pr = p->p_p; 3542 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3543 struct vm_map *map = &ovm->vm_map; 3544 struct uvm_map_deadq dead_entries; 3545 3546 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3547 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3548 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3549 3550 pmap_unuse_final(p); /* before stack addresses go away */ 3551 TAILQ_INIT(&dead_entries); 3552 3553 /* see if more than one process is using this vmspace... */ 3554 if (ovm->vm_refcnt == 1) { 3555 /* 3556 * If pr is the only process using its vmspace then 3557 * we can safely recycle that vmspace for the program 3558 * that is being exec'd. 3559 */ 3560 3561 #ifdef SYSVSHM 3562 /* 3563 * SYSV SHM semantics require us to kill all segments on an exec 3564 */ 3565 if (ovm->vm_shm) 3566 shmexit(ovm); 3567 #endif 3568 3569 /* 3570 * POSIX 1003.1b -- "lock future mappings" is revoked 3571 * when a process execs another program image. 3572 */ 3573 vm_map_lock(map); 3574 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE); 3575 3576 /* 3577 * now unmap the old program 3578 * 3579 * Instead of attempting to keep the map valid, we simply 3580 * nuke all entries and ask uvm_map_setup to reinitialize 3581 * the map to the new boundaries. 3582 * 3583 * uvm_unmap_remove will actually nuke all entries for us 3584 * (as in, not replace them with free-memory entries). 3585 */ 3586 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3587 &dead_entries, TRUE, FALSE); 3588 3589 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 3590 3591 /* Nuke statistics and boundaries. */ 3592 memset(&ovm->vm_startcopy, 0, 3593 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3594 3595 3596 if (end & (vaddr_t)PAGE_MASK) { 3597 end += 1; 3598 if (end == 0) /* overflow */ 3599 end -= PAGE_SIZE; 3600 } 3601 3602 /* Setup new boundaries and populate map with entries. */ 3603 map->min_offset = start; 3604 map->max_offset = end; 3605 uvm_map_setup_entries(map); 3606 vm_map_unlock(map); 3607 3608 /* but keep MMU holes unavailable */ 3609 pmap_remove_holes(ovm); 3610 } else { 3611 /* 3612 * pr's vmspace is being shared, so we can't reuse 3613 * it for pr since it is still being used for others. 3614 * allocate a new vmspace for pr 3615 */ 3616 nvm = uvmspace_alloc(start, end, 3617 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3618 3619 /* install new vmspace and drop our ref to the old one. */ 3620 pmap_deactivate(p); 3621 p->p_vmspace = pr->ps_vmspace = nvm; 3622 pmap_activate(p); 3623 3624 uvmspace_free(ovm); 3625 } 3626 3627 /* Release dead entries */ 3628 uvm_unmap_detach(&dead_entries, 0); 3629 } 3630 3631 /* 3632 * uvmspace_addref: add a reference to a vmspace. 3633 */ 3634 void 3635 uvmspace_addref(struct vmspace *vm) 3636 { 3637 KERNEL_ASSERT_LOCKED(); 3638 KASSERT(vm->vm_refcnt > 0); 3639 3640 vm->vm_refcnt++; 3641 } 3642 3643 /* 3644 * uvmspace_free: free a vmspace data structure 3645 */ 3646 void 3647 uvmspace_free(struct vmspace *vm) 3648 { 3649 KERNEL_ASSERT_LOCKED(); 3650 3651 if (--vm->vm_refcnt == 0) { 3652 /* 3653 * lock the map, to wait out all other references to it. delete 3654 * all of the mappings and pages they hold, then call the pmap 3655 * module to reclaim anything left. 3656 */ 3657 #ifdef SYSVSHM 3658 /* Get rid of any SYSV shared memory segments. */ 3659 if (vm->vm_shm != NULL) 3660 shmexit(vm); 3661 #endif 3662 3663 uvm_map_teardown(&vm->vm_map); 3664 pool_put(&uvm_vmspace_pool, vm); 3665 } 3666 } 3667 3668 /* 3669 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in 3670 * srcmap to the address range [dstaddr, dstaddr + sz) in 3671 * dstmap. 3672 * 3673 * The whole address range in srcmap must be backed by an object 3674 * (no holes). 3675 * 3676 * If successful, the address ranges share memory and the destination 3677 * address range uses the protection flags in prot. 3678 * 3679 * This routine assumes that sz is a multiple of PAGE_SIZE and 3680 * that dstaddr and srcaddr are page-aligned. 3681 */ 3682 int 3683 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot, 3684 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz) 3685 { 3686 int ret = 0; 3687 vaddr_t unmap_end; 3688 vaddr_t dstva; 3689 vsize_t s_off, len, n = sz, remain; 3690 struct vm_map_entry *first = NULL, *last = NULL; 3691 struct vm_map_entry *src_entry, *psrc_entry = NULL; 3692 struct uvm_map_deadq dead; 3693 3694 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr) 3695 return EINVAL; 3696 3697 TAILQ_INIT(&dead); 3698 vm_map_lock(dstmap); 3699 vm_map_lock_read(srcmap); 3700 3701 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) { 3702 ret = ENOMEM; 3703 goto exit_unlock; 3704 } 3705 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) { 3706 ret = EINVAL; 3707 goto exit_unlock; 3708 } 3709 3710 dstva = dstaddr; 3711 unmap_end = dstaddr; 3712 for (; src_entry != NULL; 3713 psrc_entry = src_entry, 3714 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) { 3715 /* hole in address space, bail out */ 3716 if (psrc_entry != NULL && psrc_entry->end != src_entry->start) 3717 break; 3718 if (src_entry->start >= srcaddr + sz) 3719 break; 3720 3721 if (UVM_ET_ISSUBMAP(src_entry)) 3722 panic("uvm_share: encountered a submap (illegal)"); 3723 if (!UVM_ET_ISCOPYONWRITE(src_entry) && 3724 UVM_ET_ISNEEDSCOPY(src_entry)) 3725 panic("uvm_share: non-copy_on_write map entries " 3726 "marked needs_copy (illegal)"); 3727 3728 /* 3729 * srcaddr > map entry start? means we are in the middle of a 3730 * map, so we calculate the offset to use in the source map. 3731 */ 3732 if (srcaddr > src_entry->start) 3733 s_off = srcaddr - src_entry->start; 3734 else if (srcaddr == src_entry->start) 3735 s_off = 0; 3736 else 3737 panic("uvm_share: map entry start > srcaddr"); 3738 3739 remain = src_entry->end - src_entry->start - s_off; 3740 3741 /* Determine how many bytes to share in this pass */ 3742 if (n < remain) 3743 len = n; 3744 else 3745 len = remain; 3746 3747 if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot, 3748 srcmap, src_entry, &dead) == NULL) 3749 break; 3750 3751 n -= len; 3752 dstva += len; 3753 srcaddr += len; 3754 unmap_end = dstva + len; 3755 if (n == 0) 3756 goto exit_unlock; 3757 } 3758 3759 ret = EINVAL; 3760 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE); 3761 3762 exit_unlock: 3763 vm_map_unlock_read(srcmap); 3764 vm_map_unlock(dstmap); 3765 uvm_unmap_detach(&dead, 0); 3766 3767 return ret; 3768 } 3769 3770 /* 3771 * Clone map entry into other map. 3772 * 3773 * Mapping will be placed at dstaddr, for the same length. 3774 * Space must be available. 3775 * Reference counters are incremented. 3776 */ 3777 struct vm_map_entry * 3778 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3779 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, 3780 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3781 int mapent_flags, int amap_share_flags) 3782 { 3783 struct vm_map_entry *new_entry, *first, *last; 3784 3785 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3786 3787 /* Create new entry (linked in on creation). Fill in first, last. */ 3788 first = last = NULL; 3789 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3790 panic("uvm_mapent_clone: no space in map for " 3791 "entry in empty map"); 3792 } 3793 new_entry = uvm_map_mkentry(dstmap, first, last, 3794 dstaddr, dstlen, mapent_flags, dead, NULL); 3795 if (new_entry == NULL) 3796 return NULL; 3797 /* old_entry -> new_entry */ 3798 new_entry->object = old_entry->object; 3799 new_entry->offset = old_entry->offset; 3800 new_entry->aref = old_entry->aref; 3801 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3802 new_entry->protection = prot; 3803 new_entry->max_protection = maxprot; 3804 new_entry->inheritance = old_entry->inheritance; 3805 new_entry->advice = old_entry->advice; 3806 3807 /* gain reference to object backing the map (can't be a submap). */ 3808 if (new_entry->aref.ar_amap) { 3809 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3810 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3811 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3812 amap_share_flags); 3813 } 3814 3815 if (UVM_ET_ISOBJ(new_entry) && 3816 new_entry->object.uvm_obj->pgops->pgo_reference) { 3817 new_entry->offset += off; 3818 new_entry->object.uvm_obj->pgops->pgo_reference 3819 (new_entry->object.uvm_obj); 3820 } 3821 3822 return new_entry; 3823 } 3824 3825 struct vm_map_entry * 3826 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3827 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map, 3828 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3829 { 3830 /* 3831 * If old_entry refers to a copy-on-write region that has not yet been 3832 * written to (needs_copy flag is set), then we need to allocate a new 3833 * amap for old_entry. 3834 * 3835 * If we do not do this, and the process owning old_entry does a copy-on 3836 * write later, old_entry and new_entry will refer to different memory 3837 * regions, and the memory between the processes is no longer shared. 3838 * 3839 * [in other words, we need to clear needs_copy] 3840 */ 3841 3842 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3843 /* get our own amap, clears needs_copy */ 3844 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0); 3845 /* XXXCDC: WAITOK??? */ 3846 } 3847 3848 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off, 3849 prot, maxprot, old_entry, dead, 0, AMAP_SHARED); 3850 } 3851 3852 /* 3853 * share the mapping: this means we want the old and 3854 * new entries to share amaps and backing objects. 3855 */ 3856 struct vm_map_entry * 3857 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3858 struct vm_map *old_map, 3859 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3860 { 3861 struct vm_map_entry *new_entry; 3862 3863 new_entry = uvm_mapent_share(new_map, old_entry->start, 3864 old_entry->end - old_entry->start, 0, old_entry->protection, 3865 old_entry->max_protection, old_map, old_entry, dead); 3866 3867 /* 3868 * pmap_copy the mappings: this routine is optional 3869 * but if it is there it will reduce the number of 3870 * page faults in the new proc. 3871 */ 3872 if (!UVM_ET_ISHOLE(new_entry)) 3873 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, 3874 (new_entry->end - new_entry->start), new_entry->start); 3875 3876 return (new_entry); 3877 } 3878 3879 /* 3880 * copy-on-write the mapping (using mmap's 3881 * MAP_PRIVATE semantics) 3882 * 3883 * allocate new_entry, adjust reference counts. 3884 * (note that new references are read-only). 3885 */ 3886 struct vm_map_entry * 3887 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3888 struct vm_map *old_map, 3889 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3890 { 3891 struct vm_map_entry *new_entry; 3892 boolean_t protect_child; 3893 3894 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3895 old_entry->end - old_entry->start, 0, old_entry->protection, 3896 old_entry->max_protection, old_entry, dead, 0, 0); 3897 3898 new_entry->etype |= 3899 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3900 3901 /* 3902 * the new entry will need an amap. it will either 3903 * need to be copied from the old entry or created 3904 * from scratch (if the old entry does not have an 3905 * amap). can we defer this process until later 3906 * (by setting "needs_copy") or do we need to copy 3907 * the amap now? 3908 * 3909 * we must copy the amap now if any of the following 3910 * conditions hold: 3911 * 1. the old entry has an amap and that amap is 3912 * being shared. this means that the old (parent) 3913 * process is sharing the amap with another 3914 * process. if we do not clear needs_copy here 3915 * we will end up in a situation where both the 3916 * parent and child process are referring to the 3917 * same amap with "needs_copy" set. if the 3918 * parent write-faults, the fault routine will 3919 * clear "needs_copy" in the parent by allocating 3920 * a new amap. this is wrong because the 3921 * parent is supposed to be sharing the old amap 3922 * and the new amap will break that. 3923 * 3924 * 2. if the old entry has an amap and a non-zero 3925 * wire count then we are going to have to call 3926 * amap_cow_now to avoid page faults in the 3927 * parent process. since amap_cow_now requires 3928 * "needs_copy" to be clear we might as well 3929 * clear it here as well. 3930 * 3931 */ 3932 if (old_entry->aref.ar_amap != NULL && 3933 ((amap_flags(old_entry->aref.ar_amap) & 3934 AMAP_SHARED) != 0 || 3935 VM_MAPENT_ISWIRED(old_entry))) { 3936 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3937 0, 0); 3938 /* XXXCDC: M_WAITOK ... ok? */ 3939 } 3940 3941 /* 3942 * if the parent's entry is wired down, then the 3943 * parent process does not want page faults on 3944 * access to that memory. this means that we 3945 * cannot do copy-on-write because we can't write 3946 * protect the old entry. in this case we 3947 * resolve all copy-on-write faults now, using 3948 * amap_cow_now. note that we have already 3949 * allocated any needed amap (above). 3950 */ 3951 if (VM_MAPENT_ISWIRED(old_entry)) { 3952 /* 3953 * resolve all copy-on-write faults now 3954 * (note that there is nothing to do if 3955 * the old mapping does not have an amap). 3956 * XXX: is it worthwhile to bother with 3957 * pmap_copy in this case? 3958 */ 3959 if (old_entry->aref.ar_amap) 3960 amap_cow_now(new_map, new_entry); 3961 } else { 3962 if (old_entry->aref.ar_amap) { 3963 /* 3964 * setup mappings to trigger copy-on-write faults 3965 * we must write-protect the parent if it has 3966 * an amap and it is not already "needs_copy"... 3967 * if it is already "needs_copy" then the parent 3968 * has already been write-protected by a previous 3969 * fork operation. 3970 * 3971 * if we do not write-protect the parent, then 3972 * we must be sure to write-protect the child 3973 * after the pmap_copy() operation. 3974 * 3975 * XXX: pmap_copy should have some way of telling 3976 * us that it didn't do anything so we can avoid 3977 * calling pmap_protect needlessly. 3978 */ 3979 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3980 if (old_entry->max_protection & PROT_WRITE) { 3981 pmap_protect(old_map->pmap, 3982 old_entry->start, 3983 old_entry->end, 3984 old_entry->protection & 3985 ~PROT_WRITE); 3986 pmap_update(old_map->pmap); 3987 } 3988 old_entry->etype |= UVM_ET_NEEDSCOPY; 3989 } 3990 3991 /* parent must now be write-protected */ 3992 protect_child = FALSE; 3993 } else { 3994 /* 3995 * we only need to protect the child if the 3996 * parent has write access. 3997 */ 3998 if (old_entry->max_protection & PROT_WRITE) 3999 protect_child = TRUE; 4000 else 4001 protect_child = FALSE; 4002 } 4003 /* 4004 * copy the mappings 4005 * XXX: need a way to tell if this does anything 4006 */ 4007 if (!UVM_ET_ISHOLE(new_entry)) 4008 pmap_copy(new_map->pmap, old_map->pmap, 4009 new_entry->start, 4010 (old_entry->end - old_entry->start), 4011 old_entry->start); 4012 4013 /* protect the child's mappings if necessary */ 4014 if (protect_child) { 4015 pmap_protect(new_map->pmap, new_entry->start, 4016 new_entry->end, 4017 new_entry->protection & 4018 ~PROT_WRITE); 4019 } 4020 } 4021 4022 return (new_entry); 4023 } 4024 4025 /* 4026 * zero the mapping: the new entry will be zero initialized 4027 */ 4028 struct vm_map_entry * 4029 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 4030 struct vm_map *old_map, 4031 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 4032 { 4033 struct vm_map_entry *new_entry; 4034 4035 new_entry = uvm_mapent_clone(new_map, old_entry->start, 4036 old_entry->end - old_entry->start, 0, old_entry->protection, 4037 old_entry->max_protection, old_entry, dead, 0, 0); 4038 4039 new_entry->etype |= 4040 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 4041 4042 if (new_entry->aref.ar_amap) { 4043 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 4044 atop(new_entry->end - new_entry->start), 0); 4045 new_entry->aref.ar_amap = NULL; 4046 new_entry->aref.ar_pageoff = 0; 4047 } 4048 4049 if (UVM_ET_ISOBJ(new_entry)) { 4050 if (new_entry->object.uvm_obj->pgops->pgo_detach) 4051 new_entry->object.uvm_obj->pgops->pgo_detach( 4052 new_entry->object.uvm_obj); 4053 new_entry->object.uvm_obj = NULL; 4054 new_entry->etype &= ~UVM_ET_OBJ; 4055 } 4056 4057 return (new_entry); 4058 } 4059 4060 /* 4061 * uvmspace_fork: fork a process' main map 4062 * 4063 * => create a new vmspace for child process from parent. 4064 * => parent's map must not be locked. 4065 */ 4066 struct vmspace * 4067 uvmspace_fork(struct process *pr) 4068 { 4069 struct vmspace *vm1 = pr->ps_vmspace; 4070 struct vmspace *vm2; 4071 struct vm_map *old_map = &vm1->vm_map; 4072 struct vm_map *new_map; 4073 struct vm_map_entry *old_entry, *new_entry; 4074 struct uvm_map_deadq dead; 4075 4076 vm_map_lock(old_map); 4077 4078 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 4079 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 4080 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 4081 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 4082 vm2->vm_dused = 0; /* Statistic managed by us. */ 4083 new_map = &vm2->vm_map; 4084 vm_map_lock(new_map); 4085 4086 /* go entry-by-entry */ 4087 TAILQ_INIT(&dead); 4088 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 4089 if (old_entry->start == old_entry->end) 4090 continue; 4091 4092 /* first, some sanity checks on the old entry */ 4093 if (UVM_ET_ISSUBMAP(old_entry)) { 4094 panic("fork: encountered a submap during fork " 4095 "(illegal)"); 4096 } 4097 4098 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 4099 UVM_ET_ISNEEDSCOPY(old_entry)) { 4100 panic("fork: non-copy_on_write map entry marked " 4101 "needs_copy (illegal)"); 4102 } 4103 4104 /* Apply inheritance. */ 4105 switch (old_entry->inheritance) { 4106 case MAP_INHERIT_SHARE: 4107 new_entry = uvm_mapent_forkshared(vm2, new_map, 4108 old_map, old_entry, &dead); 4109 break; 4110 case MAP_INHERIT_COPY: 4111 new_entry = uvm_mapent_forkcopy(vm2, new_map, 4112 old_map, old_entry, &dead); 4113 break; 4114 case MAP_INHERIT_ZERO: 4115 new_entry = uvm_mapent_forkzero(vm2, new_map, 4116 old_map, old_entry, &dead); 4117 break; 4118 default: 4119 continue; 4120 } 4121 4122 /* Update process statistics. */ 4123 if (!UVM_ET_ISHOLE(new_entry)) 4124 new_map->size += new_entry->end - new_entry->start; 4125 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) && 4126 new_entry->protection != PROT_NONE) { 4127 vm2->vm_dused += uvmspace_dused( 4128 new_map, new_entry->start, new_entry->end); 4129 } 4130 } 4131 4132 vm_map_unlock(old_map); 4133 vm_map_unlock(new_map); 4134 4135 /* 4136 * This can actually happen, if multiple entries described a 4137 * space in which an entry was inherited. 4138 */ 4139 uvm_unmap_detach(&dead, 0); 4140 4141 #ifdef SYSVSHM 4142 if (vm1->vm_shm) 4143 shmfork(vm1, vm2); 4144 #endif 4145 4146 return vm2; 4147 } 4148 4149 /* 4150 * uvm_map_hint: return the beginning of the best area suitable for 4151 * creating a new mapping with "prot" protection. 4152 */ 4153 vaddr_t 4154 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr, 4155 vaddr_t maxaddr) 4156 { 4157 vaddr_t addr; 4158 vaddr_t spacing; 4159 4160 #ifdef __i386__ 4161 /* 4162 * If executable skip first two pages, otherwise start 4163 * after data + heap region. 4164 */ 4165 if ((prot & PROT_EXEC) != 0 && 4166 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 4167 addr = (PAGE_SIZE*2) + 4168 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 4169 return (round_page(addr)); 4170 } 4171 #endif 4172 4173 #if defined (__LP64__) 4174 spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4175 #else 4176 spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4177 #endif 4178 4179 /* 4180 * Start malloc/mmap after the brk. 4181 */ 4182 addr = (vaddr_t)vm->vm_daddr + BRKSIZ; 4183 addr = MAX(addr, minaddr); 4184 4185 if (addr < maxaddr) { 4186 while (spacing > maxaddr - addr) 4187 spacing >>= 1; 4188 } 4189 addr += arc4random() & spacing; 4190 return (round_page(addr)); 4191 } 4192 4193 /* 4194 * uvm_map_submap: punch down part of a map into a submap 4195 * 4196 * => only the kernel_map is allowed to be submapped 4197 * => the purpose of submapping is to break up the locking granularity 4198 * of a larger map 4199 * => the range specified must have been mapped previously with a uvm_map() 4200 * call [with uobj==NULL] to create a blank map entry in the main map. 4201 * [And it had better still be blank!] 4202 * => maps which contain submaps should never be copied or forked. 4203 * => to remove a submap, use uvm_unmap() on the main map 4204 * and then uvm_map_deallocate() the submap. 4205 * => main map must be unlocked. 4206 * => submap must have been init'd and have a zero reference count. 4207 * [need not be locked as we don't actually reference it] 4208 */ 4209 int 4210 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 4211 struct vm_map *submap) 4212 { 4213 struct vm_map_entry *entry; 4214 int result; 4215 4216 if (start > map->max_offset || end > map->max_offset || 4217 start < map->min_offset || end < map->min_offset) 4218 return EINVAL; 4219 4220 vm_map_lock(map); 4221 4222 if (uvm_map_lookup_entry(map, start, &entry)) { 4223 UVM_MAP_CLIP_START(map, entry, start); 4224 UVM_MAP_CLIP_END(map, entry, end); 4225 } else 4226 entry = NULL; 4227 4228 if (entry != NULL && 4229 entry->start == start && entry->end == end && 4230 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 4231 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 4232 entry->etype |= UVM_ET_SUBMAP; 4233 entry->object.sub_map = submap; 4234 entry->offset = 0; 4235 uvm_map_reference(submap); 4236 result = 0; 4237 } else 4238 result = EINVAL; 4239 4240 vm_map_unlock(map); 4241 return(result); 4242 } 4243 4244 /* 4245 * uvm_map_checkprot: check protection in map 4246 * 4247 * => must allow specific protection in a fully allocated region. 4248 * => map mut be read or write locked by caller. 4249 */ 4250 boolean_t 4251 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 4252 vm_prot_t protection) 4253 { 4254 struct vm_map_entry *entry; 4255 4256 if (start < map->min_offset || end > map->max_offset || start > end) 4257 return FALSE; 4258 if (start == end) 4259 return TRUE; 4260 4261 /* 4262 * Iterate entries. 4263 */ 4264 for (entry = uvm_map_entrybyaddr(&map->addr, start); 4265 entry != NULL && entry->start < end; 4266 entry = RBT_NEXT(uvm_map_addr, entry)) { 4267 /* Fail if a hole is found. */ 4268 if (UVM_ET_ISHOLE(entry) || 4269 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 4270 return FALSE; 4271 4272 /* Check protection. */ 4273 if ((entry->protection & protection) != protection) 4274 return FALSE; 4275 } 4276 return TRUE; 4277 } 4278 4279 /* 4280 * uvm_map_create: create map 4281 */ 4282 vm_map_t 4283 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 4284 { 4285 vm_map_t map; 4286 4287 map = malloc(sizeof *map, M_VMMAP, M_WAITOK); 4288 uvm_map_setup(map, pmap, min, max, flags); 4289 return (map); 4290 } 4291 4292 /* 4293 * uvm_map_deallocate: drop reference to a map 4294 * 4295 * => caller must not lock map 4296 * => we will zap map if ref count goes to zero 4297 */ 4298 void 4299 uvm_map_deallocate(vm_map_t map) 4300 { 4301 int c; 4302 struct uvm_map_deadq dead; 4303 4304 c = --map->ref_count; 4305 if (c > 0) { 4306 return; 4307 } 4308 4309 /* 4310 * all references gone. unmap and free. 4311 * 4312 * No lock required: we are only one to access this map. 4313 */ 4314 TAILQ_INIT(&dead); 4315 uvm_tree_sanity(map, __FILE__, __LINE__); 4316 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 4317 TRUE, FALSE); 4318 pmap_destroy(map->pmap); 4319 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 4320 free(map, M_VMMAP, sizeof *map); 4321 4322 uvm_unmap_detach(&dead, 0); 4323 } 4324 4325 /* 4326 * uvm_map_inherit: set inheritance code for range of addrs in map. 4327 * 4328 * => map must be unlocked 4329 * => note that the inherit code is used during a "fork". see fork 4330 * code for details. 4331 */ 4332 int 4333 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 4334 vm_inherit_t new_inheritance) 4335 { 4336 struct vm_map_entry *entry; 4337 4338 switch (new_inheritance) { 4339 case MAP_INHERIT_NONE: 4340 case MAP_INHERIT_COPY: 4341 case MAP_INHERIT_SHARE: 4342 case MAP_INHERIT_ZERO: 4343 break; 4344 default: 4345 return (EINVAL); 4346 } 4347 4348 if (start > end) 4349 return EINVAL; 4350 start = MAX(start, map->min_offset); 4351 end = MIN(end, map->max_offset); 4352 if (start >= end) 4353 return 0; 4354 4355 vm_map_lock(map); 4356 4357 entry = uvm_map_entrybyaddr(&map->addr, start); 4358 if (entry->end > start) 4359 UVM_MAP_CLIP_START(map, entry, start); 4360 else 4361 entry = RBT_NEXT(uvm_map_addr, entry); 4362 4363 while (entry != NULL && entry->start < end) { 4364 UVM_MAP_CLIP_END(map, entry, end); 4365 entry->inheritance = new_inheritance; 4366 entry = RBT_NEXT(uvm_map_addr, entry); 4367 } 4368 4369 vm_map_unlock(map); 4370 return (0); 4371 } 4372 4373 /* 4374 * uvm_map_syscall: permit system calls for range of addrs in map. 4375 * 4376 * => map must be unlocked 4377 */ 4378 int 4379 uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end) 4380 { 4381 struct vm_map_entry *entry; 4382 4383 if (start > end) 4384 return EINVAL; 4385 start = MAX(start, map->min_offset); 4386 end = MIN(end, map->max_offset); 4387 if (start >= end) 4388 return 0; 4389 if (map->flags & VM_MAP_SYSCALL_ONCE) /* only allowed once */ 4390 return (EPERM); 4391 4392 vm_map_lock(map); 4393 4394 entry = uvm_map_entrybyaddr(&map->addr, start); 4395 if (entry->end > start) 4396 UVM_MAP_CLIP_START(map, entry, start); 4397 else 4398 entry = RBT_NEXT(uvm_map_addr, entry); 4399 4400 while (entry != NULL && entry->start < end) { 4401 UVM_MAP_CLIP_END(map, entry, end); 4402 entry->etype |= UVM_ET_SYSCALL; 4403 entry = RBT_NEXT(uvm_map_addr, entry); 4404 } 4405 4406 map->wserial++; 4407 map->flags |= VM_MAP_SYSCALL_ONCE; 4408 vm_map_unlock(map); 4409 return (0); 4410 } 4411 4412 /* 4413 * uvm_map_advice: set advice code for range of addrs in map. 4414 * 4415 * => map must be unlocked 4416 */ 4417 int 4418 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 4419 { 4420 struct vm_map_entry *entry; 4421 4422 switch (new_advice) { 4423 case MADV_NORMAL: 4424 case MADV_RANDOM: 4425 case MADV_SEQUENTIAL: 4426 break; 4427 default: 4428 return (EINVAL); 4429 } 4430 4431 if (start > end) 4432 return EINVAL; 4433 start = MAX(start, map->min_offset); 4434 end = MIN(end, map->max_offset); 4435 if (start >= end) 4436 return 0; 4437 4438 vm_map_lock(map); 4439 4440 entry = uvm_map_entrybyaddr(&map->addr, start); 4441 if (entry != NULL && entry->end > start) 4442 UVM_MAP_CLIP_START(map, entry, start); 4443 else if (entry!= NULL) 4444 entry = RBT_NEXT(uvm_map_addr, entry); 4445 4446 /* 4447 * XXXJRT: disallow holes? 4448 */ 4449 while (entry != NULL && entry->start < end) { 4450 UVM_MAP_CLIP_END(map, entry, end); 4451 entry->advice = new_advice; 4452 entry = RBT_NEXT(uvm_map_addr, entry); 4453 } 4454 4455 vm_map_unlock(map); 4456 return (0); 4457 } 4458 4459 /* 4460 * uvm_map_extract: extract a mapping from a map and put it somewhere 4461 * in the kernel_map, setting protection to max_prot. 4462 * 4463 * => map should be unlocked (we will write lock it and kernel_map) 4464 * => returns 0 on success, error code otherwise 4465 * => start must be page aligned 4466 * => len must be page sized 4467 * => flags: 4468 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 4469 * Mappings are QREF's. 4470 */ 4471 int 4472 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 4473 vaddr_t *dstaddrp, int flags) 4474 { 4475 struct uvm_map_deadq dead; 4476 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 4477 vaddr_t dstaddr; 4478 vaddr_t end; 4479 vaddr_t cp_start; 4480 vsize_t cp_len, cp_off; 4481 int error; 4482 4483 TAILQ_INIT(&dead); 4484 end = start + len; 4485 4486 /* 4487 * Sanity check on the parameters. 4488 * Also, since the mapping may not contain gaps, error out if the 4489 * mapped area is not in source map. 4490 */ 4491 if ((start & (vaddr_t)PAGE_MASK) != 0 || 4492 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 4493 return EINVAL; 4494 if (start < srcmap->min_offset || end > srcmap->max_offset) 4495 return EINVAL; 4496 4497 /* Initialize dead entries. Handle len == 0 case. */ 4498 if (len == 0) 4499 return 0; 4500 4501 /* Acquire lock on srcmap. */ 4502 vm_map_lock(srcmap); 4503 4504 /* Lock srcmap, lookup first and last entry in <start,len>. */ 4505 first = uvm_map_entrybyaddr(&srcmap->addr, start); 4506 4507 /* Check that the range is contiguous. */ 4508 for (entry = first; entry != NULL && entry->end < end; 4509 entry = RBT_NEXT(uvm_map_addr, entry)) { 4510 if (VMMAP_FREE_END(entry) != entry->end || 4511 UVM_ET_ISHOLE(entry)) { 4512 error = EINVAL; 4513 goto fail; 4514 } 4515 } 4516 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 4517 error = EINVAL; 4518 goto fail; 4519 } 4520 4521 /* 4522 * Handle need-copy flag. 4523 */ 4524 for (entry = first; entry != NULL && entry->start < end; 4525 entry = RBT_NEXT(uvm_map_addr, entry)) { 4526 if (UVM_ET_ISNEEDSCOPY(entry)) 4527 amap_copy(srcmap, entry, M_NOWAIT, 4528 UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end); 4529 if (UVM_ET_ISNEEDSCOPY(entry)) { 4530 /* 4531 * amap_copy failure 4532 */ 4533 error = ENOMEM; 4534 goto fail; 4535 } 4536 } 4537 4538 /* Lock destination map (kernel_map). */ 4539 vm_map_lock(kernel_map); 4540 4541 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 4542 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 4543 PROT_NONE, 0) != 0) { 4544 error = ENOMEM; 4545 goto fail2; 4546 } 4547 *dstaddrp = dstaddr; 4548 4549 /* 4550 * We now have srcmap and kernel_map locked. 4551 * dstaddr contains the destination offset in dstmap. 4552 */ 4553 /* step 1: start looping through map entries, performing extraction. */ 4554 for (entry = first; entry != NULL && entry->start < end; 4555 entry = RBT_NEXT(uvm_map_addr, entry)) { 4556 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 4557 if (UVM_ET_ISHOLE(entry)) 4558 continue; 4559 4560 /* Calculate uvm_mapent_clone parameters. */ 4561 cp_start = entry->start; 4562 if (cp_start < start) { 4563 cp_off = start - cp_start; 4564 cp_start = start; 4565 } else 4566 cp_off = 0; 4567 cp_len = MIN(entry->end, end) - cp_start; 4568 4569 newentry = uvm_mapent_clone(kernel_map, 4570 cp_start - start + dstaddr, cp_len, cp_off, 4571 entry->protection, entry->max_protection, 4572 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4573 if (newentry == NULL) { 4574 error = ENOMEM; 4575 goto fail2_unmap; 4576 } 4577 kernel_map->size += cp_len; 4578 if (flags & UVM_EXTRACT_FIXPROT) 4579 newentry->protection = newentry->max_protection; 4580 4581 /* 4582 * Step 2: perform pmap copy. 4583 * (Doing this in the loop saves one RB traversal.) 4584 */ 4585 pmap_copy(kernel_map->pmap, srcmap->pmap, 4586 cp_start - start + dstaddr, cp_len, cp_start); 4587 } 4588 pmap_update(kernel_map->pmap); 4589 4590 error = 0; 4591 4592 /* Unmap copied entries on failure. */ 4593 fail2_unmap: 4594 if (error) { 4595 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4596 FALSE, TRUE); 4597 } 4598 4599 /* Release maps, release dead entries. */ 4600 fail2: 4601 vm_map_unlock(kernel_map); 4602 4603 fail: 4604 vm_map_unlock(srcmap); 4605 4606 uvm_unmap_detach(&dead, 0); 4607 4608 return error; 4609 } 4610 4611 /* 4612 * uvm_map_clean: clean out a map range 4613 * 4614 * => valid flags: 4615 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4616 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4617 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4618 * if (flags & PGO_FREE): any cached pages are freed after clean 4619 * => returns an error if any part of the specified range isn't mapped 4620 * => never a need to flush amap layer since the anonymous memory has 4621 * no permanent home, but may deactivate pages there 4622 * => called from sys_msync() and sys_madvise() 4623 * => caller must not write-lock map (read OK). 4624 * => we may sleep while cleaning if SYNCIO [with map read-locked] 4625 */ 4626 4627 int 4628 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4629 { 4630 struct vm_map_entry *first, *entry; 4631 struct vm_amap *amap; 4632 struct vm_anon *anon; 4633 struct vm_page *pg; 4634 struct uvm_object *uobj; 4635 vaddr_t cp_start, cp_end; 4636 int refs; 4637 int error; 4638 boolean_t rv; 4639 4640 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4641 (PGO_FREE|PGO_DEACTIVATE)); 4642 4643 if (start > end || start < map->min_offset || end > map->max_offset) 4644 return EINVAL; 4645 4646 vm_map_lock_read(map); 4647 first = uvm_map_entrybyaddr(&map->addr, start); 4648 4649 /* Make a first pass to check for holes. */ 4650 for (entry = first; entry != NULL && entry->start < end; 4651 entry = RBT_NEXT(uvm_map_addr, entry)) { 4652 if (UVM_ET_ISSUBMAP(entry)) { 4653 vm_map_unlock_read(map); 4654 return EINVAL; 4655 } 4656 if (UVM_ET_ISSUBMAP(entry) || 4657 UVM_ET_ISHOLE(entry) || 4658 (entry->end < end && 4659 VMMAP_FREE_END(entry) != entry->end)) { 4660 vm_map_unlock_read(map); 4661 return EFAULT; 4662 } 4663 } 4664 4665 error = 0; 4666 for (entry = first; entry != NULL && entry->start < end; 4667 entry = RBT_NEXT(uvm_map_addr, entry)) { 4668 amap = entry->aref.ar_amap; /* top layer */ 4669 if (UVM_ET_ISOBJ(entry)) 4670 uobj = entry->object.uvm_obj; 4671 else 4672 uobj = NULL; 4673 4674 /* 4675 * No amap cleaning necessary if: 4676 * - there's no amap 4677 * - we're not deactivating or freeing pages. 4678 */ 4679 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4680 goto flush_object; 4681 4682 cp_start = MAX(entry->start, start); 4683 cp_end = MIN(entry->end, end); 4684 4685 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4686 anon = amap_lookup(&entry->aref, 4687 cp_start - entry->start); 4688 if (anon == NULL) 4689 continue; 4690 4691 pg = anon->an_page; 4692 if (pg == NULL) { 4693 continue; 4694 } 4695 KASSERT(pg->pg_flags & PQ_ANON); 4696 4697 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4698 /* 4699 * XXX In these first 3 cases, we always just 4700 * XXX deactivate the page. We may want to 4701 * XXX handle the different cases more 4702 * XXX specifically, in the future. 4703 */ 4704 case PGO_CLEANIT|PGO_FREE: 4705 case PGO_CLEANIT|PGO_DEACTIVATE: 4706 case PGO_DEACTIVATE: 4707 deactivate_it: 4708 /* skip the page if it's wired */ 4709 if (pg->wire_count != 0) 4710 break; 4711 4712 uvm_lock_pageq(); 4713 4714 KASSERT(pg->uanon == anon); 4715 4716 /* zap all mappings for the page. */ 4717 pmap_page_protect(pg, PROT_NONE); 4718 4719 /* ...and deactivate the page. */ 4720 uvm_pagedeactivate(pg); 4721 4722 uvm_unlock_pageq(); 4723 break; 4724 case PGO_FREE: 4725 /* 4726 * If there are multiple references to 4727 * the amap, just deactivate the page. 4728 */ 4729 if (amap_refs(amap) > 1) 4730 goto deactivate_it; 4731 4732 /* XXX skip the page if it's wired */ 4733 if (pg->wire_count != 0) { 4734 break; 4735 } 4736 amap_unadd(&entry->aref, 4737 cp_start - entry->start); 4738 refs = --anon->an_ref; 4739 if (refs == 0) 4740 uvm_anfree(anon); 4741 break; 4742 default: 4743 panic("uvm_map_clean: weird flags"); 4744 } 4745 } 4746 4747 flush_object: 4748 cp_start = MAX(entry->start, start); 4749 cp_end = MIN(entry->end, end); 4750 4751 /* 4752 * flush pages if we've got a valid backing object. 4753 * 4754 * Don't PGO_FREE if we don't have write permission 4755 * and don't flush if this is a copy-on-write object 4756 * since we can't know our permissions on it. 4757 */ 4758 if (uobj != NULL && 4759 ((flags & PGO_FREE) == 0 || 4760 ((entry->max_protection & PROT_WRITE) != 0 && 4761 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4762 rv = uobj->pgops->pgo_flush(uobj, 4763 cp_start - entry->start + entry->offset, 4764 cp_end - entry->start + entry->offset, flags); 4765 4766 if (rv == FALSE) 4767 error = EFAULT; 4768 } 4769 } 4770 4771 vm_map_unlock_read(map); 4772 return error; 4773 } 4774 4775 /* 4776 * UVM_MAP_CLIP_END implementation 4777 */ 4778 void 4779 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4780 { 4781 struct vm_map_entry *tmp; 4782 4783 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4784 tmp = uvm_mapent_alloc(map, 0); 4785 4786 /* Invoke splitentry. */ 4787 uvm_map_splitentry(map, entry, tmp, addr); 4788 } 4789 4790 /* 4791 * UVM_MAP_CLIP_START implementation 4792 * 4793 * Clippers are required to not change the pointers to the entry they are 4794 * clipping on. 4795 * Since uvm_map_splitentry turns the original entry into the lowest 4796 * entry (address wise) we do a swap between the new entry and the original 4797 * entry, prior to calling uvm_map_splitentry. 4798 */ 4799 void 4800 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4801 { 4802 struct vm_map_entry *tmp; 4803 struct uvm_addr_state *free; 4804 4805 /* Unlink original. */ 4806 free = uvm_map_uaddr_e(map, entry); 4807 uvm_mapent_free_remove(map, free, entry); 4808 uvm_mapent_addr_remove(map, entry); 4809 4810 /* Copy entry. */ 4811 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4812 tmp = uvm_mapent_alloc(map, 0); 4813 uvm_mapent_copy(entry, tmp); 4814 4815 /* Put new entry in place of original entry. */ 4816 uvm_mapent_addr_insert(map, tmp); 4817 uvm_mapent_free_insert(map, free, tmp); 4818 4819 /* Invoke splitentry. */ 4820 uvm_map_splitentry(map, tmp, entry, addr); 4821 } 4822 4823 /* 4824 * Boundary fixer. 4825 */ 4826 static inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4827 static inline vaddr_t 4828 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4829 { 4830 return (min < bound && max > bound) ? bound : max; 4831 } 4832 4833 /* 4834 * Choose free list based on address at start of free space. 4835 * 4836 * The uvm_addr_state returned contains addr and is the first of: 4837 * - uaddr_exe 4838 * - uaddr_brk_stack 4839 * - uaddr_any 4840 */ 4841 struct uvm_addr_state* 4842 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4843 { 4844 struct uvm_addr_state *uaddr; 4845 int i; 4846 4847 /* Special case the first page, to prevent mmap from returning 0. */ 4848 if (addr < VMMAP_MIN_ADDR) 4849 return NULL; 4850 4851 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4852 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4853 if (addr >= uvm_maxkaddr) 4854 return NULL; 4855 } 4856 4857 /* Is the address inside the exe-only map? */ 4858 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4859 addr < map->uaddr_exe->uaddr_maxaddr) 4860 return map->uaddr_exe; 4861 4862 /* Check if the space falls inside brk/stack area. */ 4863 if ((addr >= map->b_start && addr < map->b_end) || 4864 (addr >= map->s_start && addr < map->s_end)) { 4865 if (map->uaddr_brk_stack != NULL && 4866 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4867 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4868 return map->uaddr_brk_stack; 4869 } else 4870 return NULL; 4871 } 4872 4873 /* 4874 * Check the other selectors. 4875 * 4876 * These selectors are only marked as the owner, if they have insert 4877 * functions. 4878 */ 4879 for (i = 0; i < nitems(map->uaddr_any); i++) { 4880 uaddr = map->uaddr_any[i]; 4881 if (uaddr == NULL) 4882 continue; 4883 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4884 continue; 4885 4886 if (addr >= uaddr->uaddr_minaddr && 4887 addr < uaddr->uaddr_maxaddr) 4888 return uaddr; 4889 } 4890 4891 return NULL; 4892 } 4893 4894 /* 4895 * Choose free list based on address at start of free space. 4896 * 4897 * The uvm_addr_state returned contains addr and is the first of: 4898 * - uaddr_exe 4899 * - uaddr_brk_stack 4900 * - uaddr_any 4901 */ 4902 struct uvm_addr_state* 4903 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4904 { 4905 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4906 } 4907 4908 /* 4909 * Returns the first free-memory boundary that is crossed by [min-max]. 4910 */ 4911 vsize_t 4912 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4913 { 4914 struct uvm_addr_state *uaddr; 4915 int i; 4916 4917 /* Never return first page. */ 4918 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4919 4920 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4921 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4922 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4923 4924 /* Check for exe-only boundaries. */ 4925 if (map->uaddr_exe != NULL) { 4926 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4927 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4928 } 4929 4930 /* Check for exe-only boundaries. */ 4931 if (map->uaddr_brk_stack != NULL) { 4932 max = uvm_map_boundfix(min, max, 4933 map->uaddr_brk_stack->uaddr_minaddr); 4934 max = uvm_map_boundfix(min, max, 4935 map->uaddr_brk_stack->uaddr_maxaddr); 4936 } 4937 4938 /* Check other boundaries. */ 4939 for (i = 0; i < nitems(map->uaddr_any); i++) { 4940 uaddr = map->uaddr_any[i]; 4941 if (uaddr != NULL) { 4942 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4943 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4944 } 4945 } 4946 4947 /* Boundaries at stack and brk() area. */ 4948 max = uvm_map_boundfix(min, max, map->s_start); 4949 max = uvm_map_boundfix(min, max, map->s_end); 4950 max = uvm_map_boundfix(min, max, map->b_start); 4951 max = uvm_map_boundfix(min, max, map->b_end); 4952 4953 return max; 4954 } 4955 4956 /* 4957 * Update map allocation start and end addresses from proc vmspace. 4958 */ 4959 void 4960 uvm_map_vmspace_update(struct vm_map *map, 4961 struct uvm_map_deadq *dead, int flags) 4962 { 4963 struct vmspace *vm; 4964 vaddr_t b_start, b_end, s_start, s_end; 4965 4966 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4967 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4968 4969 /* 4970 * Derive actual allocation boundaries from vmspace. 4971 */ 4972 vm = (struct vmspace *)map; 4973 b_start = (vaddr_t)vm->vm_daddr; 4974 b_end = b_start + BRKSIZ; 4975 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4976 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4977 #ifdef DIAGNOSTIC 4978 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4979 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4980 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4981 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4982 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4983 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4984 vm, b_start, b_end, s_start, s_end); 4985 } 4986 #endif 4987 4988 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4989 map->s_start == s_start && map->s_end == s_end)) 4990 return; 4991 4992 uvm_map_freelist_update(map, dead, b_start, b_end, 4993 s_start, s_end, flags); 4994 } 4995 4996 /* 4997 * Grow kernel memory. 4998 * 4999 * This function is only called for kernel maps when an allocation fails. 5000 * 5001 * If the map has a gap that is large enough to accommodate alloc_sz, this 5002 * function will make sure map->free will include it. 5003 */ 5004 void 5005 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 5006 vsize_t alloc_sz, int flags) 5007 { 5008 vsize_t sz; 5009 vaddr_t end; 5010 struct vm_map_entry *entry; 5011 5012 /* Kernel memory only. */ 5013 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 5014 /* Destroy free list. */ 5015 uvm_map_freelist_update_clear(map, dead); 5016 5017 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 5018 if (map->flags & VM_MAP_GUARDPAGES) 5019 alloc_sz += PAGE_SIZE; 5020 5021 /* 5022 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 5023 * 5024 * Don't handle the case where the multiplication overflows: 5025 * if that happens, the allocation is probably too big anyway. 5026 */ 5027 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 5028 5029 /* 5030 * Walk forward until a gap large enough for alloc_sz shows up. 5031 * 5032 * We assume the kernel map has no boundaries. 5033 * uvm_maxkaddr may be zero. 5034 */ 5035 end = MAX(uvm_maxkaddr, map->min_offset); 5036 entry = uvm_map_entrybyaddr(&map->addr, end); 5037 while (entry && entry->fspace < alloc_sz) 5038 entry = RBT_NEXT(uvm_map_addr, entry); 5039 if (entry) { 5040 end = MAX(VMMAP_FREE_START(entry), end); 5041 end += MIN(sz, map->max_offset - end); 5042 } else 5043 end = map->max_offset; 5044 5045 /* Reserve pmap entries. */ 5046 #ifdef PMAP_GROWKERNEL 5047 uvm_maxkaddr = pmap_growkernel(end); 5048 #else 5049 uvm_maxkaddr = MAX(uvm_maxkaddr, end); 5050 #endif 5051 5052 /* Rebuild free list. */ 5053 uvm_map_freelist_update_refill(map, flags); 5054 } 5055 5056 /* 5057 * Freelist update subfunction: unlink all entries from freelists. 5058 */ 5059 void 5060 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 5061 { 5062 struct uvm_addr_state *free; 5063 struct vm_map_entry *entry, *prev, *next; 5064 5065 prev = NULL; 5066 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL; 5067 entry = next) { 5068 next = RBT_NEXT(uvm_map_addr, entry); 5069 5070 free = uvm_map_uaddr_e(map, entry); 5071 uvm_mapent_free_remove(map, free, entry); 5072 5073 if (prev != NULL && entry->start == entry->end) { 5074 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 5075 uvm_mapent_addr_remove(map, entry); 5076 DEAD_ENTRY_PUSH(dead, entry); 5077 } else 5078 prev = entry; 5079 } 5080 } 5081 5082 /* 5083 * Freelist update subfunction: refill the freelists with entries. 5084 */ 5085 void 5086 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 5087 { 5088 struct vm_map_entry *entry; 5089 vaddr_t min, max; 5090 5091 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5092 min = VMMAP_FREE_START(entry); 5093 max = VMMAP_FREE_END(entry); 5094 entry->fspace = 0; 5095 5096 entry = uvm_map_fix_space(map, entry, min, max, flags); 5097 } 5098 5099 uvm_tree_sanity(map, __FILE__, __LINE__); 5100 } 5101 5102 /* 5103 * Change {a,b}_{start,end} allocation ranges and associated free lists. 5104 */ 5105 void 5106 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 5107 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 5108 { 5109 KDASSERT(b_end >= b_start && s_end >= s_start); 5110 5111 /* Clear all free lists. */ 5112 uvm_map_freelist_update_clear(map, dead); 5113 5114 /* Apply new bounds. */ 5115 map->b_start = b_start; 5116 map->b_end = b_end; 5117 map->s_start = s_start; 5118 map->s_end = s_end; 5119 5120 /* Refill free lists. */ 5121 uvm_map_freelist_update_refill(map, flags); 5122 } 5123 5124 /* 5125 * Assign a uvm_addr_state to the specified pointer in vm_map. 5126 * 5127 * May sleep. 5128 */ 5129 void 5130 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 5131 struct uvm_addr_state *newval) 5132 { 5133 struct uvm_map_deadq dead; 5134 5135 /* Pointer which must be in this map. */ 5136 KASSERT(which != NULL); 5137 KASSERT((void*)map <= (void*)(which) && 5138 (void*)(which) < (void*)(map + 1)); 5139 5140 vm_map_lock(map); 5141 TAILQ_INIT(&dead); 5142 uvm_map_freelist_update_clear(map, &dead); 5143 5144 uvm_addr_destroy(*which); 5145 *which = newval; 5146 5147 uvm_map_freelist_update_refill(map, 0); 5148 vm_map_unlock(map); 5149 uvm_unmap_detach(&dead, 0); 5150 } 5151 5152 /* 5153 * Correct space insert. 5154 * 5155 * Entry must not be on any freelist. 5156 */ 5157 struct vm_map_entry* 5158 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 5159 vaddr_t min, vaddr_t max, int flags) 5160 { 5161 struct uvm_addr_state *free, *entfree; 5162 vaddr_t lmax; 5163 5164 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 5165 KDASSERT(min <= max); 5166 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 5167 min == map->min_offset); 5168 5169 /* 5170 * During the function, entfree will always point at the uaddr state 5171 * for entry. 5172 */ 5173 entfree = (entry == NULL ? NULL : 5174 uvm_map_uaddr_e(map, entry)); 5175 5176 while (min != max) { 5177 /* Claim guard page for entry. */ 5178 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 5179 VMMAP_FREE_END(entry) == entry->end && 5180 entry->start != entry->end) { 5181 if (max - min == 2 * PAGE_SIZE) { 5182 /* 5183 * If the free-space gap is exactly 2 pages, 5184 * we make the guard 2 pages instead of 1. 5185 * Because in a guarded map, an area needs 5186 * at least 2 pages to allocate from: 5187 * one page for the allocation and one for 5188 * the guard. 5189 */ 5190 entry->guard = 2 * PAGE_SIZE; 5191 min = max; 5192 } else { 5193 entry->guard = PAGE_SIZE; 5194 min += PAGE_SIZE; 5195 } 5196 continue; 5197 } 5198 5199 /* 5200 * Handle the case where entry has a 2-page guard, but the 5201 * space after entry is freed. 5202 */ 5203 if (entry != NULL && entry->fspace == 0 && 5204 entry->guard > PAGE_SIZE) { 5205 entry->guard = PAGE_SIZE; 5206 min = VMMAP_FREE_START(entry); 5207 } 5208 5209 lmax = uvm_map_boundary(map, min, max); 5210 free = uvm_map_uaddr(map, min); 5211 5212 /* 5213 * Entries are merged if they point at the same uvm_free(). 5214 * Exception to that rule: if min == uvm_maxkaddr, a new 5215 * entry is started regardless (otherwise the allocators 5216 * will get confused). 5217 */ 5218 if (entry != NULL && free == entfree && 5219 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 5220 min == uvm_maxkaddr)) { 5221 KDASSERT(VMMAP_FREE_END(entry) == min); 5222 entry->fspace += lmax - min; 5223 } else { 5224 /* 5225 * Commit entry to free list: it'll not be added to 5226 * anymore. 5227 * We'll start a new entry and add to that entry 5228 * instead. 5229 */ 5230 if (entry != NULL) 5231 uvm_mapent_free_insert(map, entfree, entry); 5232 5233 /* New entry for new uaddr. */ 5234 entry = uvm_mapent_alloc(map, flags); 5235 KDASSERT(entry != NULL); 5236 entry->end = entry->start = min; 5237 entry->guard = 0; 5238 entry->fspace = lmax - min; 5239 entry->object.uvm_obj = NULL; 5240 entry->offset = 0; 5241 entry->etype = 0; 5242 entry->protection = entry->max_protection = 0; 5243 entry->inheritance = 0; 5244 entry->wired_count = 0; 5245 entry->advice = 0; 5246 entry->aref.ar_pageoff = 0; 5247 entry->aref.ar_amap = NULL; 5248 uvm_mapent_addr_insert(map, entry); 5249 5250 entfree = free; 5251 } 5252 5253 min = lmax; 5254 } 5255 /* Finally put entry on the uaddr state. */ 5256 if (entry != NULL) 5257 uvm_mapent_free_insert(map, entfree, entry); 5258 5259 return entry; 5260 } 5261 5262 /* 5263 * MQuery style of allocation. 5264 * 5265 * This allocator searches forward until sufficient space is found to map 5266 * the given size. 5267 * 5268 * XXX: factor in offset (via pmap_prefer) and protection? 5269 */ 5270 int 5271 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 5272 int flags) 5273 { 5274 struct vm_map_entry *entry, *last; 5275 vaddr_t addr; 5276 vaddr_t tmp, pmap_align, pmap_offset; 5277 int error; 5278 5279 addr = *addr_p; 5280 vm_map_lock_read(map); 5281 5282 /* Configure pmap prefer. */ 5283 if (offset != UVM_UNKNOWN_OFFSET) { 5284 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 5285 pmap_offset = PMAP_PREFER_OFFSET(offset); 5286 } else { 5287 pmap_align = PAGE_SIZE; 5288 pmap_offset = 0; 5289 } 5290 5291 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 5292 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 5293 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5294 if (tmp < addr) 5295 tmp += pmap_align; 5296 addr = tmp; 5297 } 5298 5299 /* First, check if the requested range is fully available. */ 5300 entry = uvm_map_entrybyaddr(&map->addr, addr); 5301 last = NULL; 5302 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5303 error = 0; 5304 goto out; 5305 } 5306 if (flags & UVM_FLAG_FIXED) { 5307 error = EINVAL; 5308 goto out; 5309 } 5310 5311 error = ENOMEM; /* Default error from here. */ 5312 5313 /* 5314 * At this point, the memory at <addr, sz> is not available. 5315 * The reasons are: 5316 * [1] it's outside the map, 5317 * [2] it starts in used memory (and therefore needs to move 5318 * toward the first free page in entry), 5319 * [3] it starts in free memory but bumps into used memory. 5320 * 5321 * Note that for case [2], the forward moving is handled by the 5322 * for loop below. 5323 */ 5324 if (entry == NULL) { 5325 /* [1] Outside the map. */ 5326 if (addr >= map->max_offset) 5327 goto out; 5328 else 5329 entry = RBT_MIN(uvm_map_addr, &map->addr); 5330 } else if (VMMAP_FREE_START(entry) <= addr) { 5331 /* [3] Bumped into used memory. */ 5332 entry = RBT_NEXT(uvm_map_addr, entry); 5333 } 5334 5335 /* Test if the next entry is sufficient for the allocation. */ 5336 for (; entry != NULL; 5337 entry = RBT_NEXT(uvm_map_addr, entry)) { 5338 if (entry->fspace == 0) 5339 continue; 5340 addr = VMMAP_FREE_START(entry); 5341 5342 restart: /* Restart address checks on address change. */ 5343 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5344 if (tmp < addr) 5345 tmp += pmap_align; 5346 addr = tmp; 5347 if (addr >= VMMAP_FREE_END(entry)) 5348 continue; 5349 5350 /* Skip brk() allocation addresses. */ 5351 if (addr + sz > map->b_start && addr < map->b_end) { 5352 if (VMMAP_FREE_END(entry) > map->b_end) { 5353 addr = map->b_end; 5354 goto restart; 5355 } else 5356 continue; 5357 } 5358 /* Skip stack allocation addresses. */ 5359 if (addr + sz > map->s_start && addr < map->s_end) { 5360 if (VMMAP_FREE_END(entry) > map->s_end) { 5361 addr = map->s_end; 5362 goto restart; 5363 } else 5364 continue; 5365 } 5366 5367 last = NULL; 5368 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5369 error = 0; 5370 goto out; 5371 } 5372 } 5373 5374 out: 5375 vm_map_unlock_read(map); 5376 if (error == 0) 5377 *addr_p = addr; 5378 return error; 5379 } 5380 5381 /* 5382 * Determine allocation bias. 5383 * 5384 * Returns 1 if we should bias to high addresses, -1 for a bias towards low 5385 * addresses, or 0 for no bias. 5386 * The bias mechanism is intended to avoid clashing with brk() and stack 5387 * areas. 5388 */ 5389 int 5390 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry) 5391 { 5392 vaddr_t start, end; 5393 5394 start = VMMAP_FREE_START(entry); 5395 end = VMMAP_FREE_END(entry); 5396 5397 /* Stay at the top of brk() area. */ 5398 if (end >= map->b_start && start < map->b_end) 5399 return 1; 5400 /* Stay at the far end of the stack area. */ 5401 if (end >= map->s_start && start < map->s_end) { 5402 #ifdef MACHINE_STACK_GROWS_UP 5403 return 1; 5404 #else 5405 return -1; 5406 #endif 5407 } 5408 5409 /* No bias, this area is meant for us. */ 5410 return 0; 5411 } 5412 5413 5414 boolean_t 5415 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 5416 { 5417 boolean_t rv; 5418 5419 if (map->flags & VM_MAP_INTRSAFE) { 5420 rv = mtx_enter_try(&map->mtx); 5421 } else { 5422 mtx_enter(&map->flags_lock); 5423 if (map->flags & VM_MAP_BUSY) { 5424 mtx_leave(&map->flags_lock); 5425 return (FALSE); 5426 } 5427 mtx_leave(&map->flags_lock); 5428 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); 5429 /* check if the lock is busy and back out if we won the race */ 5430 if (rv) { 5431 mtx_enter(&map->flags_lock); 5432 if (map->flags & VM_MAP_BUSY) { 5433 rw_exit(&map->lock); 5434 rv = FALSE; 5435 } 5436 mtx_leave(&map->flags_lock); 5437 } 5438 } 5439 5440 if (rv) { 5441 map->timestamp++; 5442 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5443 uvm_tree_sanity(map, file, line); 5444 uvm_tree_size_chk(map, file, line); 5445 } 5446 5447 return (rv); 5448 } 5449 5450 void 5451 vm_map_lock_ln(struct vm_map *map, char *file, int line) 5452 { 5453 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5454 do { 5455 mtx_enter(&map->flags_lock); 5456 tryagain: 5457 while (map->flags & VM_MAP_BUSY) { 5458 map->flags |= VM_MAP_WANTLOCK; 5459 msleep_nsec(&map->flags, &map->flags_lock, 5460 PVM, vmmapbsy, INFSLP); 5461 } 5462 mtx_leave(&map->flags_lock); 5463 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); 5464 /* check if the lock is busy and back out if we won the race */ 5465 mtx_enter(&map->flags_lock); 5466 if (map->flags & VM_MAP_BUSY) { 5467 rw_exit(&map->lock); 5468 goto tryagain; 5469 } 5470 mtx_leave(&map->flags_lock); 5471 } else { 5472 mtx_enter(&map->mtx); 5473 } 5474 5475 map->timestamp++; 5476 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5477 uvm_tree_sanity(map, file, line); 5478 uvm_tree_size_chk(map, file, line); 5479 } 5480 5481 void 5482 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 5483 { 5484 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5485 rw_enter_read(&map->lock); 5486 else 5487 mtx_enter(&map->mtx); 5488 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5489 uvm_tree_sanity(map, file, line); 5490 uvm_tree_size_chk(map, file, line); 5491 } 5492 5493 void 5494 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 5495 { 5496 uvm_tree_sanity(map, file, line); 5497 uvm_tree_size_chk(map, file, line); 5498 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5499 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5500 rw_exit(&map->lock); 5501 else 5502 mtx_leave(&map->mtx); 5503 } 5504 5505 void 5506 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 5507 { 5508 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5509 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5510 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5511 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5512 rw_exit_read(&map->lock); 5513 else 5514 mtx_leave(&map->mtx); 5515 } 5516 5517 void 5518 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 5519 { 5520 uvm_tree_sanity(map, file, line); 5521 uvm_tree_size_chk(map, file, line); 5522 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5523 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5524 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5525 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5526 rw_enter(&map->lock, RW_DOWNGRADE); 5527 } 5528 5529 void 5530 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 5531 { 5532 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5533 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5534 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5535 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5536 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5537 rw_exit_read(&map->lock); 5538 rw_enter_write(&map->lock); 5539 } 5540 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5541 uvm_tree_sanity(map, file, line); 5542 } 5543 5544 void 5545 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5546 { 5547 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5548 mtx_enter(&map->flags_lock); 5549 map->flags |= VM_MAP_BUSY; 5550 mtx_leave(&map->flags_lock); 5551 } 5552 5553 void 5554 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5555 { 5556 int oflags; 5557 5558 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5559 mtx_enter(&map->flags_lock); 5560 oflags = map->flags; 5561 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5562 mtx_leave(&map->flags_lock); 5563 if (oflags & VM_MAP_WANTLOCK) 5564 wakeup(&map->flags); 5565 } 5566 5567 #ifndef SMALL_KERNEL 5568 int 5569 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, 5570 size_t *lenp) 5571 { 5572 struct vm_map_entry *entry; 5573 vaddr_t start; 5574 int cnt, maxcnt, error = 0; 5575 5576 KASSERT(*lenp > 0); 5577 KASSERT((*lenp % sizeof(*kve)) == 0); 5578 cnt = 0; 5579 maxcnt = *lenp / sizeof(*kve); 5580 KASSERT(maxcnt > 0); 5581 5582 /* 5583 * Return only entries whose address is above the given base 5584 * address. This allows userland to iterate without knowing the 5585 * number of entries beforehand. 5586 */ 5587 start = (vaddr_t)kve[0].kve_start; 5588 5589 vm_map_lock(map); 5590 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5591 if (cnt == maxcnt) { 5592 error = ENOMEM; 5593 break; 5594 } 5595 if (start != 0 && entry->start < start) 5596 continue; 5597 kve->kve_start = entry->start; 5598 kve->kve_end = entry->end; 5599 kve->kve_guard = entry->guard; 5600 kve->kve_fspace = entry->fspace; 5601 kve->kve_fspace_augment = entry->fspace_augment; 5602 kve->kve_offset = entry->offset; 5603 kve->kve_wired_count = entry->wired_count; 5604 kve->kve_etype = entry->etype; 5605 kve->kve_protection = entry->protection; 5606 kve->kve_max_protection = entry->max_protection; 5607 kve->kve_advice = entry->advice; 5608 kve->kve_inheritance = entry->inheritance; 5609 kve->kve_flags = entry->flags; 5610 kve++; 5611 cnt++; 5612 } 5613 vm_map_unlock(map); 5614 5615 KASSERT(cnt <= maxcnt); 5616 5617 *lenp = sizeof(*kve) * cnt; 5618 return error; 5619 } 5620 #endif 5621 5622 5623 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5624 uvm_mapentry_addrcmp, uvm_map_addr_augment); 5625 5626 5627 /* 5628 * MD code: vmspace allocator setup. 5629 */ 5630 5631 #ifdef __i386__ 5632 void 5633 uvm_map_setup_md(struct vm_map *map) 5634 { 5635 vaddr_t min, max; 5636 5637 min = map->min_offset; 5638 max = map->max_offset; 5639 5640 /* 5641 * Ensure the selectors will not try to manage page 0; 5642 * it's too special. 5643 */ 5644 if (min < VMMAP_MIN_ADDR) 5645 min = VMMAP_MIN_ADDR; 5646 5647 #if 0 /* Cool stuff, not yet */ 5648 /* Executable code is special. */ 5649 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5650 /* Place normal allocations beyond executable mappings. */ 5651 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5652 #else /* Crappy stuff, for now */ 5653 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5654 #endif 5655 5656 #ifndef SMALL_KERNEL 5657 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5658 #endif /* !SMALL_KERNEL */ 5659 } 5660 #elif __LP64__ 5661 void 5662 uvm_map_setup_md(struct vm_map *map) 5663 { 5664 vaddr_t min, max; 5665 5666 min = map->min_offset; 5667 max = map->max_offset; 5668 5669 /* 5670 * Ensure the selectors will not try to manage page 0; 5671 * it's too special. 5672 */ 5673 if (min < VMMAP_MIN_ADDR) 5674 min = VMMAP_MIN_ADDR; 5675 5676 #if 0 /* Cool stuff, not yet */ 5677 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5678 #else /* Crappy stuff, for now */ 5679 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5680 #endif 5681 5682 #ifndef SMALL_KERNEL 5683 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5684 #endif /* !SMALL_KERNEL */ 5685 } 5686 #else /* non-i386, 32 bit */ 5687 void 5688 uvm_map_setup_md(struct vm_map *map) 5689 { 5690 vaddr_t min, max; 5691 5692 min = map->min_offset; 5693 max = map->max_offset; 5694 5695 /* 5696 * Ensure the selectors will not try to manage page 0; 5697 * it's too special. 5698 */ 5699 if (min < VMMAP_MIN_ADDR) 5700 min = VMMAP_MIN_ADDR; 5701 5702 #if 0 /* Cool stuff, not yet */ 5703 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5704 #else /* Crappy stuff, for now */ 5705 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5706 #endif 5707 5708 #ifndef SMALL_KERNEL 5709 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5710 #endif /* !SMALL_KERNEL */ 5711 } 5712 #endif 5713