1 /* $OpenBSD: uvm_map.c,v 1.245 2019/06/01 22:42:20 deraadt Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/mman.h> 90 #include <sys/proc.h> 91 #include <sys/malloc.h> 92 #include <sys/pool.h> 93 #include <sys/sysctl.h> 94 #include <sys/signalvar.h> 95 #include <sys/syslog.h> 96 #include <sys/user.h> 97 98 #ifdef SYSVSHM 99 #include <sys/shm.h> 100 #endif 101 102 #include <uvm/uvm.h> 103 104 #ifdef DDB 105 #include <uvm/uvm_ddb.h> 106 #endif 107 108 #include <uvm/uvm_addr.h> 109 110 111 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 112 int uvm_mapent_isjoinable(struct vm_map*, 113 struct vm_map_entry*, struct vm_map_entry*); 114 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 115 struct vm_map_entry*, struct uvm_map_deadq*); 116 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 117 struct vm_map_entry*, struct uvm_map_deadq*); 118 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 119 struct vm_map_entry*, vaddr_t, vsize_t, int, 120 struct uvm_map_deadq*, struct vm_map_entry*); 121 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 122 void uvm_mapent_free(struct vm_map_entry*); 123 void uvm_unmap_kill_entry(struct vm_map*, 124 struct vm_map_entry*); 125 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); 126 void uvm_mapent_mkfree(struct vm_map*, 127 struct vm_map_entry*, struct vm_map_entry**, 128 struct uvm_map_deadq*, boolean_t); 129 void uvm_map_pageable_pgon(struct vm_map*, 130 struct vm_map_entry*, struct vm_map_entry*, 131 vaddr_t, vaddr_t); 132 int uvm_map_pageable_wire(struct vm_map*, 133 struct vm_map_entry*, struct vm_map_entry*, 134 vaddr_t, vaddr_t, int); 135 void uvm_map_setup_entries(struct vm_map*); 136 void uvm_map_setup_md(struct vm_map*); 137 void uvm_map_teardown(struct vm_map*); 138 void uvm_map_vmspace_update(struct vm_map*, 139 struct uvm_map_deadq*, int); 140 void uvm_map_kmem_grow(struct vm_map*, 141 struct uvm_map_deadq*, vsize_t, int); 142 void uvm_map_freelist_update_clear(struct vm_map*, 143 struct uvm_map_deadq*); 144 void uvm_map_freelist_update_refill(struct vm_map *, int); 145 void uvm_map_freelist_update(struct vm_map*, 146 struct uvm_map_deadq*, vaddr_t, vaddr_t, 147 vaddr_t, vaddr_t, int); 148 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 149 vaddr_t, vaddr_t, int); 150 int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, 151 struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, 152 int); 153 int uvm_map_findspace(struct vm_map*, 154 struct vm_map_entry**, struct vm_map_entry**, 155 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 156 vaddr_t); 157 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 158 void uvm_map_addr_augment(struct vm_map_entry*); 159 160 /* 161 * Tree management functions. 162 */ 163 164 static __inline void uvm_mapent_copy(struct vm_map_entry*, 165 struct vm_map_entry*); 166 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*, 167 const struct vm_map_entry*); 168 void uvm_mapent_free_insert(struct vm_map*, 169 struct uvm_addr_state*, struct vm_map_entry*); 170 void uvm_mapent_free_remove(struct vm_map*, 171 struct uvm_addr_state*, struct vm_map_entry*); 172 void uvm_mapent_addr_insert(struct vm_map*, 173 struct vm_map_entry*); 174 void uvm_mapent_addr_remove(struct vm_map*, 175 struct vm_map_entry*); 176 void uvm_map_splitentry(struct vm_map*, 177 struct vm_map_entry*, struct vm_map_entry*, 178 vaddr_t); 179 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 180 int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*); 181 182 /* 183 * uvm_vmspace_fork helper functions. 184 */ 185 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 186 vsize_t, vm_prot_t, vm_prot_t, 187 struct vm_map_entry*, struct uvm_map_deadq*, int, 188 int); 189 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t, 190 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*, 191 struct vm_map_entry*, struct uvm_map_deadq*); 192 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 193 struct vm_map*, struct vm_map_entry*, 194 struct uvm_map_deadq*); 195 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 196 struct vm_map*, struct vm_map_entry*, 197 struct uvm_map_deadq*); 198 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 199 struct vm_map*, struct vm_map_entry*, 200 struct uvm_map_deadq*); 201 202 /* 203 * Tree validation. 204 */ 205 #ifdef VMMAP_DEBUG 206 void uvm_tree_assert(struct vm_map*, int, char*, 207 char*, int); 208 #define UVM_ASSERT(map, cond, file, line) \ 209 uvm_tree_assert((map), (cond), #cond, (file), (line)) 210 void uvm_tree_sanity(struct vm_map*, char*, int); 211 void uvm_tree_size_chk(struct vm_map*, char*, int); 212 void vmspace_validate(struct vm_map*); 213 #else 214 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 215 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 216 #define vmspace_validate(_map) do {} while (0) 217 #endif 218 219 /* 220 * All architectures will have pmap_prefer. 221 */ 222 #ifndef PMAP_PREFER 223 #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE 224 #define PMAP_PREFER_OFFSET(off) 0 225 #define PMAP_PREFER(addr, off) (addr) 226 #endif 227 228 229 /* 230 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 231 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 232 * 233 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 234 * each time. 235 */ 236 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 237 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 238 #define VM_MAP_KSIZE_ALLOCMUL 4 239 /* 240 * When selecting a random free-space block, look at most FSPACE_DELTA blocks 241 * ahead. 242 */ 243 #define FSPACE_DELTA 8 244 /* 245 * Put allocations adjecent to previous allocations when the free-space tree 246 * is larger than FSPACE_COMPACT entries. 247 * 248 * Alignment and PMAP_PREFER may still cause the entry to not be fully 249 * adjecent. Note that this strategy reduces memory fragmentation (by leaving 250 * a large space before or after the allocation). 251 */ 252 #define FSPACE_COMPACT 128 253 /* 254 * Make the address selection skip at most this many bytes from the start of 255 * the free space in which the allocation takes place. 256 * 257 * The main idea behind a randomized address space is that an attacker cannot 258 * know where to target his attack. Therefore, the location of objects must be 259 * as random as possible. However, the goal is not to create the most sparse 260 * map that is possible. 261 * FSPACE_MAXOFF pushes the considered range in bytes down to less insane 262 * sizes, thereby reducing the sparseness. The biggest randomization comes 263 * from fragmentation, i.e. FSPACE_COMPACT. 264 */ 265 #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) 266 /* 267 * Allow for small gaps in the overflow areas. 268 * Gap size is in bytes and does not have to be a multiple of page-size. 269 */ 270 #define FSPACE_BIASGAP ((vaddr_t)32 * 1024) 271 272 /* auto-allocate address lower bound */ 273 #define VMMAP_MIN_ADDR PAGE_SIZE 274 275 276 #ifdef DEADBEEF0 277 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0) 278 #else 279 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0) 280 #endif 281 282 #ifdef DEBUG 283 int uvm_map_printlocks = 0; 284 285 #define LPRINTF(_args) \ 286 do { \ 287 if (uvm_map_printlocks) \ 288 printf _args; \ 289 } while (0) 290 #else 291 #define LPRINTF(_args) do {} while (0) 292 #endif 293 294 static struct mutex uvm_kmapent_mtx; 295 static struct timeval uvm_kmapent_last_warn_time; 296 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 297 298 const char vmmapbsy[] = "vmmapbsy"; 299 300 /* 301 * pool for vmspace structures. 302 */ 303 struct pool uvm_vmspace_pool; 304 305 /* 306 * pool for dynamically-allocated map entries. 307 */ 308 struct pool uvm_map_entry_pool; 309 struct pool uvm_map_entry_kmem_pool; 310 311 /* 312 * This global represents the end of the kernel virtual address 313 * space. If we want to exceed this, we must grow the kernel 314 * virtual address space dynamically. 315 * 316 * Note, this variable is locked by kernel_map's lock. 317 */ 318 vaddr_t uvm_maxkaddr; 319 320 /* 321 * Locking predicate. 322 */ 323 #define UVM_MAP_REQ_WRITE(_map) \ 324 do { \ 325 if ((_map)->ref_count > 0) { \ 326 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 327 rw_assert_wrlock(&(_map)->lock); \ 328 else \ 329 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \ 330 } \ 331 } while (0) 332 333 /* 334 * Tree describing entries by address. 335 * 336 * Addresses are unique. 337 * Entries with start == end may only exist if they are the first entry 338 * (sorted by address) within a free-memory tree. 339 */ 340 341 static inline int 342 uvm_mapentry_addrcmp(const struct vm_map_entry *e1, 343 const struct vm_map_entry *e2) 344 { 345 return e1->start < e2->start ? -1 : e1->start > e2->start; 346 } 347 348 /* 349 * Copy mapentry. 350 */ 351 static __inline void 352 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 353 { 354 caddr_t csrc, cdst; 355 size_t sz; 356 357 csrc = (caddr_t)src; 358 cdst = (caddr_t)dst; 359 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 360 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 361 362 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 363 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 364 memcpy(cdst, csrc, sz); 365 } 366 367 /* 368 * Handle free-list insertion. 369 */ 370 void 371 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 372 struct vm_map_entry *entry) 373 { 374 const struct uvm_addr_functions *fun; 375 #ifdef VMMAP_DEBUG 376 vaddr_t min, max, bound; 377 #endif 378 379 #ifdef VMMAP_DEBUG 380 /* 381 * Boundary check. 382 * Boundaries are folded if they go on the same free list. 383 */ 384 min = VMMAP_FREE_START(entry); 385 max = VMMAP_FREE_END(entry); 386 387 while (min < max) { 388 bound = uvm_map_boundary(map, min, max); 389 KASSERT(uvm_map_uaddr(map, min) == uaddr); 390 min = bound; 391 } 392 #endif 393 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 394 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 395 396 UVM_MAP_REQ_WRITE(map); 397 398 /* Actual insert: forward to uaddr pointer. */ 399 if (uaddr != NULL) { 400 fun = uaddr->uaddr_functions; 401 KDASSERT(fun != NULL); 402 if (fun->uaddr_free_insert != NULL) 403 (*fun->uaddr_free_insert)(map, uaddr, entry); 404 entry->etype |= UVM_ET_FREEMAPPED; 405 } 406 407 /* Update fspace augmentation. */ 408 uvm_map_addr_augment(entry); 409 } 410 411 /* 412 * Handle free-list removal. 413 */ 414 void 415 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 416 struct vm_map_entry *entry) 417 { 418 const struct uvm_addr_functions *fun; 419 420 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 421 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 422 UVM_MAP_REQ_WRITE(map); 423 424 if (uaddr != NULL) { 425 fun = uaddr->uaddr_functions; 426 if (fun->uaddr_free_remove != NULL) 427 (*fun->uaddr_free_remove)(map, uaddr, entry); 428 entry->etype &= ~UVM_ET_FREEMAPPED; 429 } 430 } 431 432 /* 433 * Handle address tree insertion. 434 */ 435 void 436 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 437 { 438 struct vm_map_entry *res; 439 440 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF)) 441 panic("uvm_mapent_addr_insert: entry still in addr list"); 442 KDASSERT(entry->start <= entry->end); 443 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 444 (entry->end & (vaddr_t)PAGE_MASK) == 0); 445 446 UVM_MAP_REQ_WRITE(map); 447 res = RBT_INSERT(uvm_map_addr, &map->addr, entry); 448 if (res != NULL) { 449 panic("uvm_mapent_addr_insert: map %p entry %p " 450 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 451 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 452 map, entry, 453 entry->start, entry->end, entry->guard, entry->fspace, 454 res, res->start, res->end, res->guard, res->fspace); 455 } 456 } 457 458 /* 459 * Handle address tree removal. 460 */ 461 void 462 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 463 { 464 struct vm_map_entry *res; 465 466 UVM_MAP_REQ_WRITE(map); 467 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry); 468 if (res != entry) 469 panic("uvm_mapent_addr_remove"); 470 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF); 471 } 472 473 /* 474 * uvm_map_reference: add reference to a map 475 * 476 * XXX check map reference counter lock 477 */ 478 #define uvm_map_reference(_map) \ 479 do { \ 480 map->ref_count++; \ 481 } while (0) 482 483 /* 484 * Calculate the dused delta. 485 */ 486 vsize_t 487 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 488 { 489 struct vmspace *vm; 490 vsize_t sz; 491 vaddr_t lmax; 492 vaddr_t stack_begin, stack_end; /* Position of stack. */ 493 494 KASSERT(map->flags & VM_MAP_ISVMSPACE); 495 vm = (struct vmspace *)map; 496 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 497 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 498 499 sz = 0; 500 while (min != max) { 501 lmax = max; 502 if (min < stack_begin && lmax > stack_begin) 503 lmax = stack_begin; 504 else if (min < stack_end && lmax > stack_end) 505 lmax = stack_end; 506 507 if (min >= stack_begin && min < stack_end) { 508 /* nothing */ 509 } else 510 sz += lmax - min; 511 min = lmax; 512 } 513 514 return sz >> PAGE_SHIFT; 515 } 516 517 /* 518 * Find the entry describing the given address. 519 */ 520 struct vm_map_entry* 521 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 522 { 523 struct vm_map_entry *iter; 524 525 iter = RBT_ROOT(uvm_map_addr, atree); 526 while (iter != NULL) { 527 if (iter->start > addr) 528 iter = RBT_LEFT(uvm_map_addr, iter); 529 else if (VMMAP_FREE_END(iter) <= addr) 530 iter = RBT_RIGHT(uvm_map_addr, iter); 531 else 532 return iter; 533 } 534 return NULL; 535 } 536 537 /* 538 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 539 * 540 * Push dead entries into a linked list. 541 * Since the linked list abuses the address tree for storage, the entry 542 * may not be linked in a map. 543 * 544 * *head must be initialized to NULL before the first call to this macro. 545 * uvm_unmap_detach(*head, 0) will remove dead entries. 546 */ 547 static __inline void 548 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 549 { 550 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 551 } 552 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 553 dead_entry_push((_headptr), (_entry)) 554 555 /* 556 * Helper function for uvm_map_findspace_tree. 557 * 558 * Given allocation constraints and pmap constraints, finds the 559 * lowest and highest address in a range that can be used for the 560 * allocation. 561 * 562 * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. 563 * 564 * 565 * Big chunk of math with a seasoning of dragons. 566 */ 567 int 568 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, 569 struct vm_map_entry *sel, vaddr_t align, 570 vaddr_t pmap_align, vaddr_t pmap_off, int bias) 571 { 572 vaddr_t sel_min, sel_max; 573 #ifdef PMAP_PREFER 574 vaddr_t pmap_min, pmap_max; 575 #endif /* PMAP_PREFER */ 576 #ifdef DIAGNOSTIC 577 int bad; 578 #endif /* DIAGNOSTIC */ 579 580 sel_min = VMMAP_FREE_START(sel); 581 sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); 582 583 #ifdef PMAP_PREFER 584 585 /* 586 * There are two special cases, in which we can satisfy the align 587 * requirement and the pmap_prefer requirement. 588 * - when pmap_off == 0, we always select the largest of the two 589 * - when pmap_off % align == 0 and pmap_align > align, we simply 590 * satisfy the pmap_align requirement and automatically 591 * satisfy the align requirement. 592 */ 593 if (align > PAGE_SIZE && 594 !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { 595 /* 596 * Simple case: only use align. 597 */ 598 sel_min = roundup(sel_min, align); 599 sel_max &= ~(align - 1); 600 601 if (sel_min > sel_max) 602 return ENOMEM; 603 604 /* Correct for bias. */ 605 if (sel_max - sel_min > FSPACE_BIASGAP) { 606 if (bias > 0) { 607 sel_min = sel_max - FSPACE_BIASGAP; 608 sel_min = roundup(sel_min, align); 609 } else if (bias < 0) { 610 sel_max = sel_min + FSPACE_BIASGAP; 611 sel_max &= ~(align - 1); 612 } 613 } 614 } else if (pmap_align != 0) { 615 /* 616 * Special case: satisfy both pmap_prefer and 617 * align argument. 618 */ 619 pmap_max = sel_max & ~(pmap_align - 1); 620 pmap_min = sel_min; 621 if (pmap_max < sel_min) 622 return ENOMEM; 623 624 /* Adjust pmap_min for BIASGAP for top-addr bias. */ 625 if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) 626 pmap_min = pmap_max - FSPACE_BIASGAP; 627 /* Align pmap_min. */ 628 pmap_min &= ~(pmap_align - 1); 629 if (pmap_min < sel_min) 630 pmap_min += pmap_align; 631 if (pmap_min > pmap_max) 632 return ENOMEM; 633 634 /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ 635 if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { 636 pmap_max = (pmap_min + FSPACE_BIASGAP) & 637 ~(pmap_align - 1); 638 } 639 if (pmap_min > pmap_max) 640 return ENOMEM; 641 642 /* Apply pmap prefer offset. */ 643 pmap_max |= pmap_off; 644 if (pmap_max > sel_max) 645 pmap_max -= pmap_align; 646 pmap_min |= pmap_off; 647 if (pmap_min < sel_min) 648 pmap_min += pmap_align; 649 650 /* 651 * Fixup: it's possible that pmap_min and pmap_max 652 * cross eachother. In this case, try to find one 653 * address that is allowed. 654 * (This usually happens in biased case.) 655 */ 656 if (pmap_min > pmap_max) { 657 if (pmap_min < sel_max) 658 pmap_max = pmap_min; 659 else if (pmap_max > sel_min) 660 pmap_min = pmap_max; 661 else 662 return ENOMEM; 663 } 664 665 /* Internal validation. */ 666 KDASSERT(pmap_min <= pmap_max); 667 668 sel_min = pmap_min; 669 sel_max = pmap_max; 670 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 671 sel_min = sel_max - FSPACE_BIASGAP; 672 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 673 sel_max = sel_min + FSPACE_BIASGAP; 674 675 #else 676 677 if (align > PAGE_SIZE) { 678 sel_min = roundup(sel_min, align); 679 sel_max &= ~(align - 1); 680 if (sel_min > sel_max) 681 return ENOMEM; 682 683 if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { 684 if (bias > 0) { 685 sel_min = roundup(sel_max - FSPACE_BIASGAP, 686 align); 687 } else { 688 sel_max = (sel_min + FSPACE_BIASGAP) & 689 ~(align - 1); 690 } 691 } 692 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 693 sel_min = sel_max - FSPACE_BIASGAP; 694 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 695 sel_max = sel_min + FSPACE_BIASGAP; 696 697 #endif 698 699 if (sel_min > sel_max) 700 return ENOMEM; 701 702 #ifdef DIAGNOSTIC 703 bad = 0; 704 /* Lower boundary check. */ 705 if (sel_min < VMMAP_FREE_START(sel)) { 706 printf("sel_min: 0x%lx, but should be at least 0x%lx\n", 707 sel_min, VMMAP_FREE_START(sel)); 708 bad++; 709 } 710 /* Upper boundary check. */ 711 if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { 712 printf("sel_max: 0x%lx, but should be at most 0x%lx\n", 713 sel_max, 714 VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); 715 bad++; 716 } 717 /* Lower boundary alignment. */ 718 if (align != 0 && (sel_min & (align - 1)) != 0) { 719 printf("sel_min: 0x%lx, not aligned to 0x%lx\n", 720 sel_min, align); 721 bad++; 722 } 723 /* Upper boundary alignment. */ 724 if (align != 0 && (sel_max & (align - 1)) != 0) { 725 printf("sel_max: 0x%lx, not aligned to 0x%lx\n", 726 sel_max, align); 727 bad++; 728 } 729 /* Lower boundary PMAP_PREFER check. */ 730 if (pmap_align != 0 && align == 0 && 731 (sel_min & (pmap_align - 1)) != pmap_off) { 732 printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 733 sel_min, sel_min & (pmap_align - 1), pmap_off); 734 bad++; 735 } 736 /* Upper boundary PMAP_PREFER check. */ 737 if (pmap_align != 0 && align == 0 && 738 (sel_max & (pmap_align - 1)) != pmap_off) { 739 printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 740 sel_max, sel_max & (pmap_align - 1), pmap_off); 741 bad++; 742 } 743 744 if (bad) { 745 panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " 746 "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " 747 "bias = %d, " 748 "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", 749 sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, 750 bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel)); 751 } 752 #endif /* DIAGNOSTIC */ 753 754 *min = sel_min; 755 *max = sel_max; 756 return 0; 757 } 758 759 /* 760 * Test if memory starting at addr with sz bytes is free. 761 * 762 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 763 * the space. 764 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 765 */ 766 int 767 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 768 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 769 vaddr_t addr, vsize_t sz) 770 { 771 struct uvm_addr_state *free; 772 struct uvm_map_addr *atree; 773 struct vm_map_entry *i, *i_end; 774 775 if (addr + sz < addr) 776 return 0; 777 778 /* 779 * Kernel memory above uvm_maxkaddr is considered unavailable. 780 */ 781 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 782 if (addr + sz > uvm_maxkaddr) 783 return 0; 784 } 785 786 atree = &map->addr; 787 788 /* 789 * Fill in first, last, so they point at the entries containing the 790 * first and last address of the range. 791 * Note that if they are not NULL, we don't perform the lookup. 792 */ 793 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 794 if (*start_ptr == NULL) { 795 *start_ptr = uvm_map_entrybyaddr(atree, addr); 796 if (*start_ptr == NULL) 797 return 0; 798 } else 799 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 800 if (*end_ptr == NULL) { 801 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 802 *end_ptr = *start_ptr; 803 else { 804 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 805 if (*end_ptr == NULL) 806 return 0; 807 } 808 } else 809 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 810 811 /* Validation. */ 812 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 813 KDASSERT((*start_ptr)->start <= addr && 814 VMMAP_FREE_END(*start_ptr) > addr && 815 (*end_ptr)->start < addr + sz && 816 VMMAP_FREE_END(*end_ptr) >= addr + sz); 817 818 /* 819 * Check the none of the entries intersects with <addr, addr+sz>. 820 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 821 * considered unavailable unless called by those allocators. 822 */ 823 i = *start_ptr; 824 i_end = RBT_NEXT(uvm_map_addr, *end_ptr); 825 for (; i != i_end; 826 i = RBT_NEXT(uvm_map_addr, i)) { 827 if (i->start != i->end && i->end > addr) 828 return 0; 829 830 /* 831 * uaddr_exe and uaddr_brk_stack may only be used 832 * by these allocators and the NULL uaddr (i.e. no 833 * uaddr). 834 * Reject if this requirement is not met. 835 */ 836 if (uaddr != NULL) { 837 free = uvm_map_uaddr_e(map, i); 838 839 if (uaddr != free && free != NULL && 840 (free == map->uaddr_exe || 841 free == map->uaddr_brk_stack)) 842 return 0; 843 } 844 } 845 846 return -1; 847 } 848 849 /* 850 * Invoke each address selector until an address is found. 851 * Will not invoke uaddr_exe. 852 */ 853 int 854 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 855 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 856 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 857 { 858 struct uvm_addr_state *uaddr; 859 int i; 860 861 /* 862 * Allocation for sz bytes at any address, 863 * using the addr selectors in order. 864 */ 865 for (i = 0; i < nitems(map->uaddr_any); i++) { 866 uaddr = map->uaddr_any[i]; 867 868 if (uvm_addr_invoke(map, uaddr, first, last, 869 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 870 return 0; 871 } 872 873 /* Fall back to brk() and stack() address selectors. */ 874 uaddr = map->uaddr_brk_stack; 875 if (uvm_addr_invoke(map, uaddr, first, last, 876 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 877 return 0; 878 879 return ENOMEM; 880 } 881 882 /* Calculate entry augmentation value. */ 883 vsize_t 884 uvm_map_addr_augment_get(struct vm_map_entry *entry) 885 { 886 vsize_t augment; 887 struct vm_map_entry *left, *right; 888 889 augment = entry->fspace; 890 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL) 891 augment = MAX(augment, left->fspace_augment); 892 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 893 augment = MAX(augment, right->fspace_augment); 894 return augment; 895 } 896 897 /* 898 * Update augmentation data in entry. 899 */ 900 void 901 uvm_map_addr_augment(struct vm_map_entry *entry) 902 { 903 vsize_t augment; 904 905 while (entry != NULL) { 906 /* Calculate value for augmentation. */ 907 augment = uvm_map_addr_augment_get(entry); 908 909 /* 910 * Descend update. 911 * Once we find an entry that already has the correct value, 912 * stop, since it means all its parents will use the correct 913 * value too. 914 */ 915 if (entry->fspace_augment == augment) 916 return; 917 entry->fspace_augment = augment; 918 entry = RBT_PARENT(uvm_map_addr, entry); 919 } 920 } 921 922 /* 923 * uvm_mapanon: establish a valid mapping in map for an anon 924 * 925 * => *addr and sz must be a multiple of PAGE_SIZE. 926 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 927 * => map must be unlocked. 928 * 929 * => align: align vaddr, must be a power-of-2. 930 * Align is only a hint and will be ignored if the alignment fails. 931 */ 932 int 933 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, 934 vsize_t align, unsigned int flags) 935 { 936 struct vm_map_entry *first, *last, *entry, *new; 937 struct uvm_map_deadq dead; 938 vm_prot_t prot; 939 vm_prot_t maxprot; 940 vm_inherit_t inherit; 941 int advice; 942 int error; 943 vaddr_t pmap_align, pmap_offset; 944 vaddr_t hint; 945 946 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE); 947 KASSERT(map != kernel_map); 948 KASSERT((map->flags & UVM_FLAG_HOLE) == 0); 949 950 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 951 splassert(IPL_NONE); 952 953 /* 954 * We use pmap_align and pmap_offset as alignment and offset variables. 955 * 956 * Because the align parameter takes precedence over pmap prefer, 957 * the pmap_align will need to be set to align, with pmap_offset = 0, 958 * if pmap_prefer will not align. 959 */ 960 pmap_align = MAX(align, PAGE_SIZE); 961 pmap_offset = 0; 962 963 /* Decode parameters. */ 964 prot = UVM_PROTECTION(flags); 965 maxprot = UVM_MAXPROTECTION(flags); 966 advice = UVM_ADVICE(flags); 967 inherit = UVM_INHERIT(flags); 968 error = 0; 969 hint = trunc_page(*addr); 970 TAILQ_INIT(&dead); 971 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 972 KASSERT((align & (align - 1)) == 0); 973 974 /* Check protection. */ 975 if ((prot & maxprot) != prot) 976 return EACCES; 977 978 /* 979 * Before grabbing the lock, allocate a map entry for later 980 * use to ensure we don't wait for memory while holding the 981 * vm_map_lock. 982 */ 983 new = uvm_mapent_alloc(map, flags); 984 if (new == NULL) 985 return(ENOMEM); 986 987 if (flags & UVM_FLAG_TRYLOCK) { 988 if (vm_map_lock_try(map) == FALSE) { 989 error = EFAULT; 990 goto out; 991 } 992 } else 993 vm_map_lock(map); 994 995 first = last = NULL; 996 if (flags & UVM_FLAG_FIXED) { 997 /* 998 * Fixed location. 999 * 1000 * Note: we ignore align, pmap_prefer. 1001 * Fill in first, last and *addr. 1002 */ 1003 KASSERT((*addr & PAGE_MASK) == 0); 1004 1005 /* Check that the space is available. */ 1006 if (flags & UVM_FLAG_UNMAP) { 1007 if ((flags & UVM_FLAG_STACK) && 1008 !uvm_map_is_stack_remappable(map, *addr, sz)) { 1009 error = EINVAL; 1010 goto unlock; 1011 } 1012 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1013 } 1014 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1015 error = ENOMEM; 1016 goto unlock; 1017 } 1018 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1019 (align == 0 || (*addr & (align - 1)) == 0) && 1020 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1021 /* 1022 * Address used as hint. 1023 * 1024 * Note: we enforce the alignment restriction, 1025 * but ignore pmap_prefer. 1026 */ 1027 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1028 /* Run selection algorithm for executables. */ 1029 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1030 addr, sz, pmap_align, pmap_offset, prot, hint); 1031 1032 if (error != 0) 1033 goto unlock; 1034 } else { 1035 /* Update freelists from vmspace. */ 1036 uvm_map_vmspace_update(map, &dead, flags); 1037 1038 error = uvm_map_findspace(map, &first, &last, addr, sz, 1039 pmap_align, pmap_offset, prot, hint); 1040 1041 if (error != 0) 1042 goto unlock; 1043 } 1044 1045 /* Double-check if selected address doesn't cause overflow. */ 1046 if (*addr + sz < *addr) { 1047 error = ENOMEM; 1048 goto unlock; 1049 } 1050 1051 /* If we only want a query, return now. */ 1052 if (flags & UVM_FLAG_QUERY) { 1053 error = 0; 1054 goto unlock; 1055 } 1056 1057 /* 1058 * Create new entry. 1059 * first and last may be invalidated after this call. 1060 */ 1061 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1062 new); 1063 if (entry == NULL) { 1064 error = ENOMEM; 1065 goto unlock; 1066 } 1067 new = NULL; 1068 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1069 entry->object.uvm_obj = NULL; 1070 entry->offset = 0; 1071 entry->protection = prot; 1072 entry->max_protection = maxprot; 1073 entry->inheritance = inherit; 1074 entry->wired_count = 0; 1075 entry->advice = advice; 1076 if (prot & PROT_WRITE) 1077 map->wserial++; 1078 if (flags & UVM_FLAG_STACK) { 1079 entry->etype |= UVM_ET_STACK; 1080 if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP)) 1081 map->sserial++; 1082 } 1083 if (flags & UVM_FLAG_COPYONW) { 1084 entry->etype |= UVM_ET_COPYONWRITE; 1085 if ((flags & UVM_FLAG_OVERLAY) == 0) 1086 entry->etype |= UVM_ET_NEEDSCOPY; 1087 } 1088 if (flags & UVM_FLAG_CONCEAL) 1089 entry->etype |= UVM_ET_CONCEAL; 1090 if (flags & UVM_FLAG_OVERLAY) { 1091 KERNEL_LOCK(); 1092 entry->aref.ar_pageoff = 0; 1093 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1094 KERNEL_UNLOCK(); 1095 } 1096 1097 /* Update map and process statistics. */ 1098 map->size += sz; 1099 ((struct vmspace *)map)->vm_dused += uvmspace_dused(map, *addr, *addr + sz); 1100 1101 unlock: 1102 vm_map_unlock(map); 1103 1104 /* 1105 * Remove dead entries. 1106 * 1107 * Dead entries may be the result of merging. 1108 * uvm_map_mkentry may also create dead entries, when it attempts to 1109 * destroy free-space entries. 1110 */ 1111 uvm_unmap_detach(&dead, 0); 1112 out: 1113 if (new) 1114 uvm_mapent_free(new); 1115 return error; 1116 } 1117 1118 /* 1119 * uvm_map: establish a valid mapping in map 1120 * 1121 * => *addr and sz must be a multiple of PAGE_SIZE. 1122 * => map must be unlocked. 1123 * => <uobj,uoffset> value meanings (4 cases): 1124 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1125 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1126 * [3] <uobj,uoffset> == normal mapping 1127 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1128 * 1129 * case [4] is for kernel mappings where we don't know the offset until 1130 * we've found a virtual address. note that kernel object offsets are 1131 * always relative to vm_map_min(kernel_map). 1132 * 1133 * => align: align vaddr, must be a power-of-2. 1134 * Align is only a hint and will be ignored if the alignment fails. 1135 */ 1136 int 1137 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 1138 struct uvm_object *uobj, voff_t uoffset, 1139 vsize_t align, unsigned int flags) 1140 { 1141 struct vm_map_entry *first, *last, *entry, *new; 1142 struct uvm_map_deadq dead; 1143 vm_prot_t prot; 1144 vm_prot_t maxprot; 1145 vm_inherit_t inherit; 1146 int advice; 1147 int error; 1148 vaddr_t pmap_align, pmap_offset; 1149 vaddr_t hint; 1150 1151 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1152 splassert(IPL_NONE); 1153 else 1154 splassert(IPL_VM); 1155 1156 /* 1157 * We use pmap_align and pmap_offset as alignment and offset variables. 1158 * 1159 * Because the align parameter takes precedence over pmap prefer, 1160 * the pmap_align will need to be set to align, with pmap_offset = 0, 1161 * if pmap_prefer will not align. 1162 */ 1163 if (uoffset == UVM_UNKNOWN_OFFSET) { 1164 pmap_align = MAX(align, PAGE_SIZE); 1165 pmap_offset = 0; 1166 } else { 1167 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 1168 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 1169 1170 if (align == 0 || 1171 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 1172 /* pmap_offset satisfies align, no change. */ 1173 } else { 1174 /* Align takes precedence over pmap prefer. */ 1175 pmap_align = align; 1176 pmap_offset = 0; 1177 } 1178 } 1179 1180 /* Decode parameters. */ 1181 prot = UVM_PROTECTION(flags); 1182 maxprot = UVM_MAXPROTECTION(flags); 1183 advice = UVM_ADVICE(flags); 1184 inherit = UVM_INHERIT(flags); 1185 error = 0; 1186 hint = trunc_page(*addr); 1187 TAILQ_INIT(&dead); 1188 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1189 KASSERT((align & (align - 1)) == 0); 1190 1191 /* Holes are incompatible with other types of mappings. */ 1192 if (flags & UVM_FLAG_HOLE) { 1193 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 1194 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 1195 } 1196 1197 /* Unset hint for kernel_map non-fixed allocations. */ 1198 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1199 hint = 0; 1200 1201 /* Check protection. */ 1202 if ((prot & maxprot) != prot) 1203 return EACCES; 1204 1205 if (map == kernel_map && 1206 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1207 panic("uvm_map: kernel map W^X violation requested"); 1208 1209 /* 1210 * Before grabbing the lock, allocate a map entry for later 1211 * use to ensure we don't wait for memory while holding the 1212 * vm_map_lock. 1213 */ 1214 new = uvm_mapent_alloc(map, flags); 1215 if (new == NULL) 1216 return(ENOMEM); 1217 1218 if (flags & UVM_FLAG_TRYLOCK) { 1219 if (vm_map_lock_try(map) == FALSE) { 1220 error = EFAULT; 1221 goto out; 1222 } 1223 } else { 1224 vm_map_lock(map); 1225 } 1226 1227 first = last = NULL; 1228 if (flags & UVM_FLAG_FIXED) { 1229 /* 1230 * Fixed location. 1231 * 1232 * Note: we ignore align, pmap_prefer. 1233 * Fill in first, last and *addr. 1234 */ 1235 KASSERT((*addr & PAGE_MASK) == 0); 1236 1237 /* 1238 * Grow pmap to include allocated address. 1239 * If the growth fails, the allocation will fail too. 1240 */ 1241 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1242 uvm_maxkaddr < (*addr + sz)) { 1243 uvm_map_kmem_grow(map, &dead, 1244 *addr + sz - uvm_maxkaddr, flags); 1245 } 1246 1247 /* Check that the space is available. */ 1248 if (flags & UVM_FLAG_UNMAP) 1249 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1250 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1251 error = ENOMEM; 1252 goto unlock; 1253 } 1254 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1255 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1256 (align == 0 || (*addr & (align - 1)) == 0) && 1257 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1258 /* 1259 * Address used as hint. 1260 * 1261 * Note: we enforce the alignment restriction, 1262 * but ignore pmap_prefer. 1263 */ 1264 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1265 /* Run selection algorithm for executables. */ 1266 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1267 addr, sz, pmap_align, pmap_offset, prot, hint); 1268 1269 /* Grow kernel memory and try again. */ 1270 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1271 uvm_map_kmem_grow(map, &dead, sz, flags); 1272 1273 error = uvm_addr_invoke(map, map->uaddr_exe, 1274 &first, &last, addr, sz, 1275 pmap_align, pmap_offset, prot, hint); 1276 } 1277 1278 if (error != 0) 1279 goto unlock; 1280 } else { 1281 /* Update freelists from vmspace. */ 1282 if (map->flags & VM_MAP_ISVMSPACE) 1283 uvm_map_vmspace_update(map, &dead, flags); 1284 1285 error = uvm_map_findspace(map, &first, &last, addr, sz, 1286 pmap_align, pmap_offset, prot, hint); 1287 1288 /* Grow kernel memory and try again. */ 1289 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1290 uvm_map_kmem_grow(map, &dead, sz, flags); 1291 1292 error = uvm_map_findspace(map, &first, &last, addr, sz, 1293 pmap_align, pmap_offset, prot, hint); 1294 } 1295 1296 if (error != 0) 1297 goto unlock; 1298 } 1299 1300 /* Double-check if selected address doesn't cause overflow. */ 1301 if (*addr + sz < *addr) { 1302 error = ENOMEM; 1303 goto unlock; 1304 } 1305 1306 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1307 uvm_maxkaddr >= *addr + sz); 1308 1309 /* If we only want a query, return now. */ 1310 if (flags & UVM_FLAG_QUERY) { 1311 error = 0; 1312 goto unlock; 1313 } 1314 1315 if (uobj == NULL) 1316 uoffset = 0; 1317 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1318 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1319 uoffset = *addr - vm_map_min(kernel_map); 1320 } 1321 1322 /* 1323 * Create new entry. 1324 * first and last may be invalidated after this call. 1325 */ 1326 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1327 new); 1328 if (entry == NULL) { 1329 error = ENOMEM; 1330 goto unlock; 1331 } 1332 new = NULL; 1333 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1334 entry->object.uvm_obj = uobj; 1335 entry->offset = uoffset; 1336 entry->protection = prot; 1337 entry->max_protection = maxprot; 1338 entry->inheritance = inherit; 1339 entry->wired_count = 0; 1340 entry->advice = advice; 1341 if (prot & PROT_WRITE) 1342 map->wserial++; 1343 if (flags & UVM_FLAG_STACK) { 1344 entry->etype |= UVM_ET_STACK; 1345 if (flags & UVM_FLAG_UNMAP) 1346 map->sserial++; 1347 } 1348 if (uobj) 1349 entry->etype |= UVM_ET_OBJ; 1350 else if (flags & UVM_FLAG_HOLE) 1351 entry->etype |= UVM_ET_HOLE; 1352 if (flags & UVM_FLAG_NOFAULT) 1353 entry->etype |= UVM_ET_NOFAULT; 1354 if (flags & UVM_FLAG_WC) 1355 entry->etype |= UVM_ET_WC; 1356 if (flags & UVM_FLAG_COPYONW) { 1357 entry->etype |= UVM_ET_COPYONWRITE; 1358 if ((flags & UVM_FLAG_OVERLAY) == 0) 1359 entry->etype |= UVM_ET_NEEDSCOPY; 1360 } 1361 if (flags & UVM_FLAG_CONCEAL) 1362 entry->etype |= UVM_ET_CONCEAL; 1363 if (flags & UVM_FLAG_OVERLAY) { 1364 entry->aref.ar_pageoff = 0; 1365 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1366 } 1367 1368 /* Update map and process statistics. */ 1369 if (!(flags & UVM_FLAG_HOLE)) { 1370 map->size += sz; 1371 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) { 1372 ((struct vmspace *)map)->vm_dused += 1373 uvmspace_dused(map, *addr, *addr + sz); 1374 } 1375 } 1376 1377 /* 1378 * Try to merge entry. 1379 * 1380 * Userland allocations are kept separated most of the time. 1381 * Forego the effort of merging what most of the time can't be merged 1382 * and only try the merge if it concerns a kernel entry. 1383 */ 1384 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1385 (map->flags & VM_MAP_ISVMSPACE) == 0) 1386 uvm_mapent_tryjoin(map, entry, &dead); 1387 1388 unlock: 1389 vm_map_unlock(map); 1390 1391 /* 1392 * Remove dead entries. 1393 * 1394 * Dead entries may be the result of merging. 1395 * uvm_map_mkentry may also create dead entries, when it attempts to 1396 * destroy free-space entries. 1397 */ 1398 if (map->flags & VM_MAP_INTRSAFE) 1399 uvm_unmap_detach_intrsafe(&dead); 1400 else 1401 uvm_unmap_detach(&dead, 0); 1402 out: 1403 if (new) 1404 uvm_mapent_free(new); 1405 return error; 1406 } 1407 1408 /* 1409 * True iff e1 and e2 can be joined together. 1410 */ 1411 int 1412 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1413 struct vm_map_entry *e2) 1414 { 1415 KDASSERT(e1 != NULL && e2 != NULL); 1416 1417 /* Must be the same entry type and not have free memory between. */ 1418 if (e1->etype != e2->etype || e1->end != e2->start) 1419 return 0; 1420 1421 /* Submaps are never joined. */ 1422 if (UVM_ET_ISSUBMAP(e1)) 1423 return 0; 1424 1425 /* Never merge wired memory. */ 1426 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1427 return 0; 1428 1429 /* Protection, inheritance and advice must be equal. */ 1430 if (e1->protection != e2->protection || 1431 e1->max_protection != e2->max_protection || 1432 e1->inheritance != e2->inheritance || 1433 e1->advice != e2->advice) 1434 return 0; 1435 1436 /* If uvm_object: object itself and offsets within object must match. */ 1437 if (UVM_ET_ISOBJ(e1)) { 1438 if (e1->object.uvm_obj != e2->object.uvm_obj) 1439 return 0; 1440 if (e1->offset + (e1->end - e1->start) != e2->offset) 1441 return 0; 1442 } 1443 1444 /* 1445 * Cannot join shared amaps. 1446 * Note: no need to lock amap to look at refs, since we don't care 1447 * about its exact value. 1448 * If it is 1 (i.e. we have the only reference) it will stay there. 1449 */ 1450 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1451 return 0; 1452 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1453 return 0; 1454 1455 /* Apprently, e1 and e2 match. */ 1456 return 1; 1457 } 1458 1459 /* 1460 * Join support function. 1461 * 1462 * Returns the merged entry on succes. 1463 * Returns NULL if the merge failed. 1464 */ 1465 struct vm_map_entry* 1466 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1467 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1468 { 1469 struct uvm_addr_state *free; 1470 1471 /* 1472 * Merging is not supported for map entries that 1473 * contain an amap in e1. This should never happen 1474 * anyway, because only kernel entries are merged. 1475 * These do not contain amaps. 1476 * e2 contains no real information in its amap, 1477 * so it can be erased immediately. 1478 */ 1479 KASSERT(e1->aref.ar_amap == NULL); 1480 1481 /* 1482 * Don't drop obj reference: 1483 * uvm_unmap_detach will do this for us. 1484 */ 1485 free = uvm_map_uaddr_e(map, e1); 1486 uvm_mapent_free_remove(map, free, e1); 1487 1488 free = uvm_map_uaddr_e(map, e2); 1489 uvm_mapent_free_remove(map, free, e2); 1490 uvm_mapent_addr_remove(map, e2); 1491 e1->end = e2->end; 1492 e1->guard = e2->guard; 1493 e1->fspace = e2->fspace; 1494 uvm_mapent_free_insert(map, free, e1); 1495 1496 DEAD_ENTRY_PUSH(dead, e2); 1497 return e1; 1498 } 1499 1500 /* 1501 * Attempt forward and backward joining of entry. 1502 * 1503 * Returns entry after joins. 1504 * We are guaranteed that the amap of entry is either non-existent or 1505 * has never been used. 1506 */ 1507 struct vm_map_entry* 1508 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1509 struct uvm_map_deadq *dead) 1510 { 1511 struct vm_map_entry *other; 1512 struct vm_map_entry *merged; 1513 1514 /* Merge with previous entry. */ 1515 other = RBT_PREV(uvm_map_addr, entry); 1516 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1517 merged = uvm_mapent_merge(map, other, entry, dead); 1518 if (merged) 1519 entry = merged; 1520 } 1521 1522 /* 1523 * Merge with next entry. 1524 * 1525 * Because amap can only extend forward and the next entry 1526 * probably contains sensible info, only perform forward merging 1527 * in the absence of an amap. 1528 */ 1529 other = RBT_NEXT(uvm_map_addr, entry); 1530 if (other && entry->aref.ar_amap == NULL && 1531 other->aref.ar_amap == NULL && 1532 uvm_mapent_isjoinable(map, entry, other)) { 1533 merged = uvm_mapent_merge(map, entry, other, dead); 1534 if (merged) 1535 entry = merged; 1536 } 1537 1538 return entry; 1539 } 1540 1541 /* 1542 * Kill entries that are no longer in a map. 1543 */ 1544 void 1545 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1546 { 1547 struct vm_map_entry *entry, *tmp; 1548 int waitok = flags & UVM_PLA_WAITOK; 1549 1550 TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) { 1551 /* Skip entries for which we have to grab the kernel lock. */ 1552 if (entry->aref.ar_amap || UVM_ET_ISSUBMAP(entry) || 1553 UVM_ET_ISOBJ(entry)) 1554 continue; 1555 1556 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1557 uvm_mapent_free(entry); 1558 } 1559 1560 if (TAILQ_EMPTY(deadq)) 1561 return; 1562 1563 KERNEL_LOCK(); 1564 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1565 if (waitok) 1566 uvm_pause(); 1567 /* Drop reference to amap, if we've got one. */ 1568 if (entry->aref.ar_amap) 1569 amap_unref(entry->aref.ar_amap, 1570 entry->aref.ar_pageoff, 1571 atop(entry->end - entry->start), 1572 flags & AMAP_REFALL); 1573 1574 /* Drop reference to our backing object, if we've got one. */ 1575 if (UVM_ET_ISSUBMAP(entry)) { 1576 /* ... unlikely to happen, but play it safe */ 1577 uvm_map_deallocate(entry->object.sub_map); 1578 } else if (UVM_ET_ISOBJ(entry) && 1579 entry->object.uvm_obj->pgops->pgo_detach) { 1580 entry->object.uvm_obj->pgops->pgo_detach( 1581 entry->object.uvm_obj); 1582 } 1583 1584 /* Step to next. */ 1585 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1586 uvm_mapent_free(entry); 1587 } 1588 KERNEL_UNLOCK(); 1589 } 1590 1591 void 1592 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq) 1593 { 1594 struct vm_map_entry *entry; 1595 1596 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1597 KASSERT(entry->aref.ar_amap == NULL); 1598 KASSERT(!UVM_ET_ISSUBMAP(entry)); 1599 KASSERT(!UVM_ET_ISOBJ(entry)); 1600 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1601 uvm_mapent_free(entry); 1602 } 1603 } 1604 1605 /* 1606 * Create and insert new entry. 1607 * 1608 * Returned entry contains new addresses and is inserted properly in the tree. 1609 * first and last are (probably) no longer valid. 1610 */ 1611 struct vm_map_entry* 1612 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1613 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1614 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1615 { 1616 struct vm_map_entry *entry, *prev; 1617 struct uvm_addr_state *free; 1618 vaddr_t min, max; /* free space boundaries for new entry */ 1619 1620 KDASSERT(map != NULL); 1621 KDASSERT(first != NULL); 1622 KDASSERT(last != NULL); 1623 KDASSERT(dead != NULL); 1624 KDASSERT(sz > 0); 1625 KDASSERT(addr + sz > addr); 1626 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1627 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1628 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1629 uvm_tree_sanity(map, __FILE__, __LINE__); 1630 1631 min = addr + sz; 1632 max = VMMAP_FREE_END(last); 1633 1634 /* Initialize new entry. */ 1635 if (new == NULL) 1636 entry = uvm_mapent_alloc(map, flags); 1637 else 1638 entry = new; 1639 if (entry == NULL) 1640 return NULL; 1641 entry->offset = 0; 1642 entry->etype = 0; 1643 entry->wired_count = 0; 1644 entry->aref.ar_pageoff = 0; 1645 entry->aref.ar_amap = NULL; 1646 1647 entry->start = addr; 1648 entry->end = min; 1649 entry->guard = 0; 1650 entry->fspace = 0; 1651 1652 /* Reset free space in first. */ 1653 free = uvm_map_uaddr_e(map, first); 1654 uvm_mapent_free_remove(map, free, first); 1655 first->guard = 0; 1656 first->fspace = 0; 1657 1658 /* 1659 * Remove all entries that are fully replaced. 1660 * We are iterating using last in reverse order. 1661 */ 1662 for (; first != last; last = prev) { 1663 prev = RBT_PREV(uvm_map_addr, last); 1664 1665 KDASSERT(last->start == last->end); 1666 free = uvm_map_uaddr_e(map, last); 1667 uvm_mapent_free_remove(map, free, last); 1668 uvm_mapent_addr_remove(map, last); 1669 DEAD_ENTRY_PUSH(dead, last); 1670 } 1671 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1672 if (first->start == addr) { 1673 uvm_mapent_addr_remove(map, first); 1674 DEAD_ENTRY_PUSH(dead, first); 1675 } else { 1676 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1677 addr, flags); 1678 } 1679 1680 /* Finally, link in entry. */ 1681 uvm_mapent_addr_insert(map, entry); 1682 uvm_map_fix_space(map, entry, min, max, flags); 1683 1684 uvm_tree_sanity(map, __FILE__, __LINE__); 1685 return entry; 1686 } 1687 1688 1689 /* 1690 * uvm_mapent_alloc: allocate a map entry 1691 */ 1692 struct vm_map_entry * 1693 uvm_mapent_alloc(struct vm_map *map, int flags) 1694 { 1695 struct vm_map_entry *me, *ne; 1696 int pool_flags; 1697 int i; 1698 1699 pool_flags = PR_WAITOK; 1700 if (flags & UVM_FLAG_TRYLOCK) 1701 pool_flags = PR_NOWAIT; 1702 1703 if (map->flags & VM_MAP_INTRSAFE || cold) { 1704 mtx_enter(&uvm_kmapent_mtx); 1705 if (SLIST_EMPTY(&uvm.kentry_free)) { 1706 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1707 &kd_nowait); 1708 if (ne == NULL) 1709 panic("uvm_mapent_alloc: cannot allocate map " 1710 "entry"); 1711 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) { 1712 SLIST_INSERT_HEAD(&uvm.kentry_free, 1713 &ne[i], daddrs.addr_kentry); 1714 } 1715 if (ratecheck(&uvm_kmapent_last_warn_time, 1716 &uvm_kmapent_warn_rate)) 1717 printf("uvm_mapent_alloc: out of static " 1718 "map entries\n"); 1719 } 1720 me = SLIST_FIRST(&uvm.kentry_free); 1721 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry); 1722 uvmexp.kmapent++; 1723 mtx_leave(&uvm_kmapent_mtx); 1724 me->flags = UVM_MAP_STATIC; 1725 } else if (map == kernel_map) { 1726 splassert(IPL_NONE); 1727 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1728 if (me == NULL) 1729 goto out; 1730 me->flags = UVM_MAP_KMEM; 1731 } else { 1732 splassert(IPL_NONE); 1733 me = pool_get(&uvm_map_entry_pool, pool_flags); 1734 if (me == NULL) 1735 goto out; 1736 me->flags = 0; 1737 } 1738 1739 if (me != NULL) { 1740 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF); 1741 } 1742 1743 out: 1744 return(me); 1745 } 1746 1747 /* 1748 * uvm_mapent_free: free map entry 1749 * 1750 * => XXX: static pool for kernel map? 1751 */ 1752 void 1753 uvm_mapent_free(struct vm_map_entry *me) 1754 { 1755 if (me->flags & UVM_MAP_STATIC) { 1756 mtx_enter(&uvm_kmapent_mtx); 1757 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry); 1758 uvmexp.kmapent--; 1759 mtx_leave(&uvm_kmapent_mtx); 1760 } else if (me->flags & UVM_MAP_KMEM) { 1761 splassert(IPL_NONE); 1762 pool_put(&uvm_map_entry_kmem_pool, me); 1763 } else { 1764 splassert(IPL_NONE); 1765 pool_put(&uvm_map_entry_pool, me); 1766 } 1767 } 1768 1769 /* 1770 * uvm_map_lookup_entry: find map entry at or before an address. 1771 * 1772 * => map must at least be read-locked by caller 1773 * => entry is returned in "entry" 1774 * => return value is true if address is in the returned entry 1775 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1776 * returned for those mappings. 1777 */ 1778 boolean_t 1779 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1780 struct vm_map_entry **entry) 1781 { 1782 *entry = uvm_map_entrybyaddr(&map->addr, address); 1783 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1784 (*entry)->start <= address && (*entry)->end > address; 1785 } 1786 1787 /* 1788 * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet 1789 * grown -- then uvm_map_check_region_range() should not cache the entry 1790 * because growth won't be seen. 1791 */ 1792 int 1793 uvm_map_inentry_sp(vm_map_entry_t entry) 1794 { 1795 if ((entry->etype & UVM_ET_STACK) == 0) { 1796 if (entry->protection == PROT_NONE) 1797 return (-1); /* don't update range */ 1798 return (0); 1799 } 1800 return (1); 1801 } 1802 1803 /* 1804 * If a syscall comes from a writeable entry, W^X is violated. 1805 * (Would be nice if we can spot aliasing, which is also kind of bad) 1806 */ 1807 int 1808 uvm_map_inentry_pc(vm_map_entry_t entry) 1809 { 1810 if (entry->protection & PROT_WRITE) 1811 return (0); /* not permitted */ 1812 return (1); 1813 } 1814 1815 int 1816 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie) 1817 { 1818 return (serial != ie->ie_serial || ie->ie_start == 0 || 1819 addr < ie->ie_start || addr >= ie->ie_end); 1820 } 1821 1822 /* 1823 * Inside a vm_map find the reg address and verify it via function. 1824 * Remember low and high addresses of region if valid and return TRUE, 1825 * else return FALSE. 1826 */ 1827 boolean_t 1828 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1829 int (*fn)(vm_map_entry_t), u_long serial) 1830 { 1831 vm_map_t map = &p->p_vmspace->vm_map; 1832 vm_map_entry_t entry; 1833 int ret; 1834 1835 if (addr < map->min_offset || addr >= map->max_offset) 1836 return (FALSE); 1837 1838 /* lock map */ 1839 vm_map_lock_read(map); 1840 1841 /* lookup */ 1842 if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) { 1843 vm_map_unlock_read(map); 1844 return (FALSE); 1845 } 1846 1847 ret = (*fn)(entry); 1848 if (ret == 0) { 1849 vm_map_unlock_read(map); 1850 return (FALSE); 1851 } else if (ret == 1) { 1852 ie->ie_start = entry->start; 1853 ie->ie_end = entry->end; 1854 ie->ie_serial = serial; 1855 } else { 1856 /* do not update, re-check later */ 1857 } 1858 vm_map_unlock_read(map); 1859 return (TRUE); 1860 } 1861 1862 boolean_t 1863 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr, char *name, 1864 int (*fn)(vm_map_entry_t), u_long serial) 1865 { 1866 union sigval sv; 1867 boolean_t ok = TRUE; 1868 1869 if (uvm_map_inentry_recheck(serial, addr, ie)) { 1870 KERNEL_LOCK(); 1871 ok = uvm_map_inentry_fix(p, ie, addr, fn, serial); 1872 if (!ok) { 1873 printf("[%s]%d/%d %s %lx not inside %lx-%lx\n", 1874 p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid, 1875 name, addr, ie->ie_start, ie->ie_end); 1876 sv.sival_ptr = (void *)PROC_PC(p); 1877 trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv); 1878 } 1879 KERNEL_UNLOCK(); 1880 } 1881 return (ok); 1882 } 1883 1884 /* 1885 * Check whether the given address range can be converted to a MAP_STACK 1886 * mapping. 1887 * 1888 * Must be called with map locked. 1889 */ 1890 boolean_t 1891 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz) 1892 { 1893 vaddr_t end = addr + sz; 1894 struct vm_map_entry *first, *iter, *prev = NULL; 1895 1896 if (!uvm_map_lookup_entry(map, addr, &first)) { 1897 printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n", 1898 addr, end, map); 1899 return FALSE; 1900 } 1901 1902 /* 1903 * Check that the address range exists and is contiguous. 1904 */ 1905 for (iter = first; iter != NULL && iter->start < end; 1906 prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) { 1907 /* 1908 * Make sure that we do not have holes in the range. 1909 */ 1910 #if 0 1911 if (prev != NULL) { 1912 printf("prev->start 0x%lx, prev->end 0x%lx, " 1913 "iter->start 0x%lx, iter->end 0x%lx\n", 1914 prev->start, prev->end, iter->start, iter->end); 1915 } 1916 #endif 1917 1918 if (prev != NULL && prev->end != iter->start) { 1919 printf("map stack 0x%lx-0x%lx of map %p failed: " 1920 "hole in range\n", addr, end, map); 1921 return FALSE; 1922 } 1923 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) { 1924 printf("map stack 0x%lx-0x%lx of map %p failed: " 1925 "hole in range\n", addr, end, map); 1926 return FALSE; 1927 } 1928 } 1929 1930 return TRUE; 1931 } 1932 1933 /* 1934 * Remap the middle-pages of an existing mapping as a stack range. 1935 * If there exists a previous contiguous mapping with the given range 1936 * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the 1937 * mapping is dropped, and a new anon mapping is created and marked as 1938 * a stack. 1939 * 1940 * Must be called with map unlocked. 1941 */ 1942 int 1943 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz) 1944 { 1945 vm_map_t map = &p->p_vmspace->vm_map; 1946 vaddr_t start, end; 1947 int error; 1948 int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, 1949 PROT_READ | PROT_WRITE | PROT_EXEC, 1950 MAP_INHERIT_COPY, MADV_NORMAL, 1951 UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP | 1952 UVM_FLAG_COPYONW); 1953 1954 start = round_page(addr); 1955 end = trunc_page(addr + sz); 1956 #ifdef MACHINE_STACK_GROWS_UP 1957 if (end == addr + sz) 1958 end -= PAGE_SIZE; 1959 #else 1960 if (start == addr) 1961 start += PAGE_SIZE; 1962 #endif 1963 1964 if (start < map->min_offset || end >= map->max_offset || end < start) 1965 return EINVAL; 1966 1967 error = uvm_mapanon(map, &start, end - start, 0, flags); 1968 if (error != 0) 1969 printf("map stack for pid %d failed\n", p->p_p->ps_pid); 1970 1971 return error; 1972 } 1973 1974 /* 1975 * uvm_map_pie: return a random load address for a PIE executable 1976 * properly aligned. 1977 */ 1978 #ifndef VM_PIE_MAX_ADDR 1979 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1980 #endif 1981 1982 #ifndef VM_PIE_MIN_ADDR 1983 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1984 #endif 1985 1986 #ifndef VM_PIE_MIN_ALIGN 1987 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1988 #endif 1989 1990 vaddr_t 1991 uvm_map_pie(vaddr_t align) 1992 { 1993 vaddr_t addr, space, min; 1994 1995 align = MAX(align, VM_PIE_MIN_ALIGN); 1996 1997 /* round up to next alignment */ 1998 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1999 2000 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 2001 return (align); 2002 2003 space = (VM_PIE_MAX_ADDR - min) / align; 2004 space = MIN(space, (u_int32_t)-1); 2005 2006 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 2007 addr += min; 2008 2009 return (addr); 2010 } 2011 2012 void 2013 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 2014 { 2015 struct uvm_map_deadq dead; 2016 2017 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 2018 (end & (vaddr_t)PAGE_MASK) == 0); 2019 TAILQ_INIT(&dead); 2020 vm_map_lock(map); 2021 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); 2022 vm_map_unlock(map); 2023 2024 if (map->flags & VM_MAP_INTRSAFE) 2025 uvm_unmap_detach_intrsafe(&dead); 2026 else 2027 uvm_unmap_detach(&dead, 0); 2028 } 2029 2030 /* 2031 * Mark entry as free. 2032 * 2033 * entry will be put on the dead list. 2034 * The free space will be merged into the previous or a new entry, 2035 * unless markfree is false. 2036 */ 2037 void 2038 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 2039 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 2040 boolean_t markfree) 2041 { 2042 struct uvm_addr_state *free; 2043 struct vm_map_entry *prev; 2044 vaddr_t addr; /* Start of freed range. */ 2045 vaddr_t end; /* End of freed range. */ 2046 2047 prev = *prev_ptr; 2048 if (prev == entry) 2049 *prev_ptr = prev = NULL; 2050 2051 if (prev == NULL || 2052 VMMAP_FREE_END(prev) != entry->start) 2053 prev = RBT_PREV(uvm_map_addr, entry); 2054 2055 /* Entry is describing only free memory and has nothing to drain into. */ 2056 if (prev == NULL && entry->start == entry->end && markfree) { 2057 *prev_ptr = entry; 2058 return; 2059 } 2060 2061 addr = entry->start; 2062 end = VMMAP_FREE_END(entry); 2063 free = uvm_map_uaddr_e(map, entry); 2064 uvm_mapent_free_remove(map, free, entry); 2065 uvm_mapent_addr_remove(map, entry); 2066 DEAD_ENTRY_PUSH(dead, entry); 2067 2068 if (markfree) { 2069 if (prev) { 2070 free = uvm_map_uaddr_e(map, prev); 2071 uvm_mapent_free_remove(map, free, prev); 2072 } 2073 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 2074 } 2075 } 2076 2077 /* 2078 * Unwire and release referenced amap and object from map entry. 2079 */ 2080 void 2081 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 2082 { 2083 /* Unwire removed map entry. */ 2084 if (VM_MAPENT_ISWIRED(entry)) { 2085 KERNEL_LOCK(); 2086 entry->wired_count = 0; 2087 uvm_fault_unwire_locked(map, entry->start, entry->end); 2088 KERNEL_UNLOCK(); 2089 } 2090 2091 /* Entry-type specific code. */ 2092 if (UVM_ET_ISHOLE(entry)) { 2093 /* Nothing to be done for holes. */ 2094 } else if (map->flags & VM_MAP_INTRSAFE) { 2095 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2096 uvm_km_pgremove_intrsafe(entry->start, entry->end); 2097 pmap_kremove(entry->start, entry->end - entry->start); 2098 } else if (UVM_ET_ISOBJ(entry) && 2099 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2100 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2101 /* 2102 * Note: kernel object mappings are currently used in 2103 * two ways: 2104 * [1] "normal" mappings of pages in the kernel object 2105 * [2] uvm_km_valloc'd allocations in which we 2106 * pmap_enter in some non-kernel-object page 2107 * (e.g. vmapbuf). 2108 * 2109 * for case [1], we need to remove the mapping from 2110 * the pmap and then remove the page from the kernel 2111 * object (because, once pages in a kernel object are 2112 * unmapped they are no longer needed, unlike, say, 2113 * a vnode where you might want the data to persist 2114 * until flushed out of a queue). 2115 * 2116 * for case [2], we need to remove the mapping from 2117 * the pmap. there shouldn't be any pages at the 2118 * specified offset in the kernel object [but it 2119 * doesn't hurt to call uvm_km_pgremove just to be 2120 * safe?] 2121 * 2122 * uvm_km_pgremove currently does the following: 2123 * for pages in the kernel object range: 2124 * - drops the swap slot 2125 * - uvm_pagefree the page 2126 * 2127 * note there is version of uvm_km_pgremove() that 2128 * is used for "intrsafe" objects. 2129 */ 2130 /* 2131 * remove mappings from pmap and drop the pages 2132 * from the object. offsets are always relative 2133 * to vm_map_min(kernel_map). 2134 */ 2135 pmap_remove(pmap_kernel(), entry->start, entry->end); 2136 uvm_km_pgremove(entry->object.uvm_obj, 2137 entry->start - vm_map_min(kernel_map), 2138 entry->end - vm_map_min(kernel_map)); 2139 2140 /* 2141 * null out kernel_object reference, we've just 2142 * dropped it 2143 */ 2144 entry->etype &= ~UVM_ET_OBJ; 2145 entry->object.uvm_obj = NULL; /* to be safe */ 2146 } else { 2147 /* remove mappings the standard way. */ 2148 pmap_remove(map->pmap, entry->start, entry->end); 2149 } 2150 } 2151 2152 /* 2153 * Remove all entries from start to end. 2154 * 2155 * If remove_holes, then remove ET_HOLE entries as well. 2156 * If markfree, entry will be properly marked free, otherwise, no replacement 2157 * entry will be put in the tree (corrupting the tree). 2158 */ 2159 void 2160 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2161 struct uvm_map_deadq *dead, boolean_t remove_holes, 2162 boolean_t markfree) 2163 { 2164 struct vm_map_entry *prev_hint, *next, *entry; 2165 2166 start = MAX(start, map->min_offset); 2167 end = MIN(end, map->max_offset); 2168 if (start >= end) 2169 return; 2170 2171 if ((map->flags & VM_MAP_INTRSAFE) == 0) 2172 splassert(IPL_NONE); 2173 else 2174 splassert(IPL_VM); 2175 2176 /* Find first affected entry. */ 2177 entry = uvm_map_entrybyaddr(&map->addr, start); 2178 KDASSERT(entry != NULL && entry->start <= start); 2179 if (entry->end <= start && markfree) 2180 entry = RBT_NEXT(uvm_map_addr, entry); 2181 else 2182 UVM_MAP_CLIP_START(map, entry, start); 2183 2184 /* 2185 * Iterate entries until we reach end address. 2186 * prev_hint hints where the freed space can be appended to. 2187 */ 2188 prev_hint = NULL; 2189 for (; entry != NULL && entry->start < end; entry = next) { 2190 KDASSERT(entry->start >= start); 2191 if (entry->end > end || !markfree) 2192 UVM_MAP_CLIP_END(map, entry, end); 2193 KDASSERT(entry->start >= start && entry->end <= end); 2194 next = RBT_NEXT(uvm_map_addr, entry); 2195 2196 /* Don't remove holes unless asked to do so. */ 2197 if (UVM_ET_ISHOLE(entry)) { 2198 if (!remove_holes) { 2199 prev_hint = entry; 2200 continue; 2201 } 2202 } 2203 2204 /* A stack has been removed.. */ 2205 if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE)) 2206 map->sserial++; 2207 2208 /* Kill entry. */ 2209 uvm_unmap_kill_entry(map, entry); 2210 2211 /* Update space usage. */ 2212 if ((map->flags & VM_MAP_ISVMSPACE) && 2213 entry->object.uvm_obj == NULL && 2214 !UVM_ET_ISHOLE(entry)) { 2215 ((struct vmspace *)map)->vm_dused -= 2216 uvmspace_dused(map, entry->start, entry->end); 2217 } 2218 if (!UVM_ET_ISHOLE(entry)) 2219 map->size -= entry->end - entry->start; 2220 2221 /* Actual removal of entry. */ 2222 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 2223 } 2224 2225 pmap_update(vm_map_pmap(map)); 2226 2227 #ifdef VMMAP_DEBUG 2228 if (markfree) { 2229 for (entry = uvm_map_entrybyaddr(&map->addr, start); 2230 entry != NULL && entry->start < end; 2231 entry = RBT_NEXT(uvm_map_addr, entry)) { 2232 KDASSERT(entry->end <= start || 2233 entry->start == entry->end || 2234 UVM_ET_ISHOLE(entry)); 2235 } 2236 } else { 2237 vaddr_t a; 2238 for (a = start; a < end; a += PAGE_SIZE) 2239 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 2240 } 2241 #endif 2242 } 2243 2244 /* 2245 * Mark all entries from first until end (exclusive) as pageable. 2246 * 2247 * Lock must be exclusive on entry and will not be touched. 2248 */ 2249 void 2250 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 2251 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 2252 { 2253 struct vm_map_entry *iter; 2254 2255 for (iter = first; iter != end; 2256 iter = RBT_NEXT(uvm_map_addr, iter)) { 2257 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2258 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2259 continue; 2260 2261 iter->wired_count = 0; 2262 uvm_fault_unwire_locked(map, iter->start, iter->end); 2263 } 2264 } 2265 2266 /* 2267 * Mark all entries from first until end (exclusive) as wired. 2268 * 2269 * Lockflags determines the lock state on return from this function. 2270 * Lock must be exclusive on entry. 2271 */ 2272 int 2273 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 2274 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 2275 int lockflags) 2276 { 2277 struct vm_map_entry *iter; 2278 #ifdef DIAGNOSTIC 2279 unsigned int timestamp_save; 2280 #endif 2281 int error; 2282 2283 /* 2284 * Wire pages in two passes: 2285 * 2286 * 1: holding the write lock, we create any anonymous maps that need 2287 * to be created. then we clip each map entry to the region to 2288 * be wired and increment its wiring count. 2289 * 2290 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2291 * in the pages for any newly wired area (wired_count == 1). 2292 * 2293 * downgrading to a read lock for uvm_fault_wire avoids a possible 2294 * deadlock with another thread that may have faulted on one of 2295 * the pages to be wired (it would mark the page busy, blocking 2296 * us, then in turn block on the map lock that we hold). 2297 * because we keep the read lock on the map, the copy-on-write 2298 * status of the entries we modify here cannot change. 2299 */ 2300 for (iter = first; iter != end; 2301 iter = RBT_NEXT(uvm_map_addr, iter)) { 2302 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2303 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2304 iter->protection == PROT_NONE) 2305 continue; 2306 2307 /* 2308 * Perform actions of vm_map_lookup that need the write lock. 2309 * - create an anonymous map for copy-on-write 2310 * - anonymous map for zero-fill 2311 * Skip submaps. 2312 */ 2313 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 2314 UVM_ET_ISNEEDSCOPY(iter) && 2315 ((iter->protection & PROT_WRITE) || 2316 iter->object.uvm_obj == NULL)) { 2317 amap_copy(map, iter, M_WAITOK, 2318 UVM_ET_ISSTACK(iter) ? FALSE : TRUE, 2319 iter->start, iter->end); 2320 } 2321 iter->wired_count++; 2322 } 2323 2324 /* 2325 * Pass 2. 2326 */ 2327 #ifdef DIAGNOSTIC 2328 timestamp_save = map->timestamp; 2329 #endif 2330 vm_map_busy(map); 2331 vm_map_downgrade(map); 2332 2333 error = 0; 2334 for (iter = first; error == 0 && iter != end; 2335 iter = RBT_NEXT(uvm_map_addr, iter)) { 2336 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2337 iter->protection == PROT_NONE) 2338 continue; 2339 2340 error = uvm_fault_wire(map, iter->start, iter->end, 2341 iter->protection); 2342 } 2343 2344 if (error) { 2345 /* 2346 * uvm_fault_wire failure 2347 * 2348 * Reacquire lock and undo our work. 2349 */ 2350 vm_map_upgrade(map); 2351 vm_map_unbusy(map); 2352 #ifdef DIAGNOSTIC 2353 if (timestamp_save != map->timestamp) 2354 panic("uvm_map_pageable_wire: stale map"); 2355 #endif 2356 2357 /* 2358 * first is no longer needed to restart loops. 2359 * Use it as iterator to unmap successful mappings. 2360 */ 2361 for (; first != iter; 2362 first = RBT_NEXT(uvm_map_addr, first)) { 2363 if (UVM_ET_ISHOLE(first) || 2364 first->start == first->end || 2365 first->protection == PROT_NONE) 2366 continue; 2367 2368 first->wired_count--; 2369 if (!VM_MAPENT_ISWIRED(first)) { 2370 uvm_fault_unwire_locked(map, 2371 iter->start, iter->end); 2372 } 2373 } 2374 2375 /* decrease counter in the rest of the entries */ 2376 for (; iter != end; 2377 iter = RBT_NEXT(uvm_map_addr, iter)) { 2378 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2379 iter->protection == PROT_NONE) 2380 continue; 2381 2382 iter->wired_count--; 2383 } 2384 2385 if ((lockflags & UVM_LK_EXIT) == 0) 2386 vm_map_unlock(map); 2387 return error; 2388 } 2389 2390 /* We are currently holding a read lock. */ 2391 if ((lockflags & UVM_LK_EXIT) == 0) { 2392 vm_map_unbusy(map); 2393 vm_map_unlock_read(map); 2394 } else { 2395 vm_map_upgrade(map); 2396 vm_map_unbusy(map); 2397 #ifdef DIAGNOSTIC 2398 if (timestamp_save != map->timestamp) 2399 panic("uvm_map_pageable_wire: stale map"); 2400 #endif 2401 } 2402 return 0; 2403 } 2404 2405 /* 2406 * uvm_map_pageable: set pageability of a range in a map. 2407 * 2408 * Flags: 2409 * UVM_LK_ENTER: map is already locked by caller 2410 * UVM_LK_EXIT: don't unlock map on exit 2411 * 2412 * The full range must be in use (entries may not have fspace != 0). 2413 * UVM_ET_HOLE counts as unmapped. 2414 */ 2415 int 2416 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2417 boolean_t new_pageable, int lockflags) 2418 { 2419 struct vm_map_entry *first, *last, *tmp; 2420 int error; 2421 2422 start = trunc_page(start); 2423 end = round_page(end); 2424 2425 if (start > end) 2426 return EINVAL; 2427 if (start == end) 2428 return 0; /* nothing to do */ 2429 if (start < map->min_offset) 2430 return EFAULT; /* why? see first XXX below */ 2431 if (end > map->max_offset) 2432 return EINVAL; /* why? see second XXX below */ 2433 2434 KASSERT(map->flags & VM_MAP_PAGEABLE); 2435 if ((lockflags & UVM_LK_ENTER) == 0) 2436 vm_map_lock(map); 2437 2438 /* 2439 * Find first entry. 2440 * 2441 * Initial test on start is different, because of the different 2442 * error returned. Rest is tested further down. 2443 */ 2444 first = uvm_map_entrybyaddr(&map->addr, start); 2445 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2446 /* 2447 * XXX if the first address is not mapped, it is EFAULT? 2448 */ 2449 error = EFAULT; 2450 goto out; 2451 } 2452 2453 /* Check that the range has no holes. */ 2454 for (last = first; last != NULL && last->start < end; 2455 last = RBT_NEXT(uvm_map_addr, last)) { 2456 if (UVM_ET_ISHOLE(last) || 2457 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2458 /* 2459 * XXX unmapped memory in range, why is it EINVAL 2460 * instead of EFAULT? 2461 */ 2462 error = EINVAL; 2463 goto out; 2464 } 2465 } 2466 2467 /* 2468 * Last ended at the first entry after the range. 2469 * Move back one step. 2470 * 2471 * Note that last may be NULL. 2472 */ 2473 if (last == NULL) { 2474 last = RBT_MAX(uvm_map_addr, &map->addr); 2475 if (last->end < end) { 2476 error = EINVAL; 2477 goto out; 2478 } 2479 } else { 2480 KASSERT(last != first); 2481 last = RBT_PREV(uvm_map_addr, last); 2482 } 2483 2484 /* Wire/unwire pages here. */ 2485 if (new_pageable) { 2486 /* 2487 * Mark pageable. 2488 * entries that are not wired are untouched. 2489 */ 2490 if (VM_MAPENT_ISWIRED(first)) 2491 UVM_MAP_CLIP_START(map, first, start); 2492 /* 2493 * Split last at end. 2494 * Make tmp be the first entry after what is to be touched. 2495 * If last is not wired, don't touch it. 2496 */ 2497 if (VM_MAPENT_ISWIRED(last)) { 2498 UVM_MAP_CLIP_END(map, last, end); 2499 tmp = RBT_NEXT(uvm_map_addr, last); 2500 } else 2501 tmp = last; 2502 2503 uvm_map_pageable_pgon(map, first, tmp, start, end); 2504 error = 0; 2505 2506 out: 2507 if ((lockflags & UVM_LK_EXIT) == 0) 2508 vm_map_unlock(map); 2509 return error; 2510 } else { 2511 /* 2512 * Mark entries wired. 2513 * entries are always touched (because recovery needs this). 2514 */ 2515 if (!VM_MAPENT_ISWIRED(first)) 2516 UVM_MAP_CLIP_START(map, first, start); 2517 /* 2518 * Split last at end. 2519 * Make tmp be the first entry after what is to be touched. 2520 * If last is not wired, don't touch it. 2521 */ 2522 if (!VM_MAPENT_ISWIRED(last)) { 2523 UVM_MAP_CLIP_END(map, last, end); 2524 tmp = RBT_NEXT(uvm_map_addr, last); 2525 } else 2526 tmp = last; 2527 2528 return uvm_map_pageable_wire(map, first, tmp, start, end, 2529 lockflags); 2530 } 2531 } 2532 2533 /* 2534 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2535 * all mapped regions. 2536 * 2537 * Map must not be locked. 2538 * If no flags are specified, all ragions are unwired. 2539 */ 2540 int 2541 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2542 { 2543 vsize_t size; 2544 struct vm_map_entry *iter; 2545 2546 KASSERT(map->flags & VM_MAP_PAGEABLE); 2547 vm_map_lock(map); 2548 2549 if (flags == 0) { 2550 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr), 2551 NULL, map->min_offset, map->max_offset); 2552 2553 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2554 vm_map_unlock(map); 2555 return 0; 2556 } 2557 2558 if (flags & MCL_FUTURE) 2559 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2560 if (!(flags & MCL_CURRENT)) { 2561 vm_map_unlock(map); 2562 return 0; 2563 } 2564 2565 /* 2566 * Count number of pages in all non-wired entries. 2567 * If the number exceeds the limit, abort. 2568 */ 2569 size = 0; 2570 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2571 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2572 continue; 2573 2574 size += iter->end - iter->start; 2575 } 2576 2577 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2578 vm_map_unlock(map); 2579 return ENOMEM; 2580 } 2581 2582 /* XXX non-pmap_wired_count case must be handled by caller */ 2583 #ifdef pmap_wired_count 2584 if (limit != 0 && 2585 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2586 vm_map_unlock(map); 2587 return ENOMEM; 2588 } 2589 #endif 2590 2591 /* 2592 * uvm_map_pageable_wire will release lcok 2593 */ 2594 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr), 2595 NULL, map->min_offset, map->max_offset, 0); 2596 } 2597 2598 /* 2599 * Initialize map. 2600 * 2601 * Allocates sufficient entries to describe the free memory in the map. 2602 */ 2603 void 2604 uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags) 2605 { 2606 int i; 2607 2608 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2609 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2610 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2611 2612 /* 2613 * Update parameters. 2614 * 2615 * This code handles (vaddr_t)-1 and other page mask ending addresses 2616 * properly. 2617 * We lose the top page if the full virtual address space is used. 2618 */ 2619 if (max & (vaddr_t)PAGE_MASK) { 2620 max += 1; 2621 if (max == 0) /* overflow */ 2622 max -= PAGE_SIZE; 2623 } 2624 2625 RBT_INIT(uvm_map_addr, &map->addr); 2626 map->uaddr_exe = NULL; 2627 for (i = 0; i < nitems(map->uaddr_any); ++i) 2628 map->uaddr_any[i] = NULL; 2629 map->uaddr_brk_stack = NULL; 2630 2631 map->size = 0; 2632 map->ref_count = 0; 2633 map->min_offset = min; 2634 map->max_offset = max; 2635 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2636 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2637 map->flags = flags; 2638 map->timestamp = 0; 2639 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK); 2640 mtx_init(&map->mtx, IPL_VM); 2641 mtx_init(&map->flags_lock, IPL_VM); 2642 2643 /* Configure the allocators. */ 2644 if (flags & VM_MAP_ISVMSPACE) 2645 uvm_map_setup_md(map); 2646 else 2647 map->uaddr_any[3] = &uaddr_kbootstrap; 2648 2649 /* 2650 * Fill map entries. 2651 * We do not need to write-lock the map here because only the current 2652 * thread sees it right now. Initialize ref_count to 0 above to avoid 2653 * bogus triggering of lock-not-held assertions. 2654 */ 2655 uvm_map_setup_entries(map); 2656 uvm_tree_sanity(map, __FILE__, __LINE__); 2657 map->ref_count = 1; 2658 } 2659 2660 /* 2661 * Destroy the map. 2662 * 2663 * This is the inverse operation to uvm_map_setup. 2664 */ 2665 void 2666 uvm_map_teardown(struct vm_map *map) 2667 { 2668 struct uvm_map_deadq dead_entries; 2669 struct vm_map_entry *entry, *tmp; 2670 #ifdef VMMAP_DEBUG 2671 size_t numq, numt; 2672 #endif 2673 int i; 2674 2675 KERNEL_ASSERT_LOCKED(); 2676 KERNEL_UNLOCK(); 2677 KERNEL_ASSERT_UNLOCKED(); 2678 2679 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2680 2681 /* Remove address selectors. */ 2682 uvm_addr_destroy(map->uaddr_exe); 2683 map->uaddr_exe = NULL; 2684 for (i = 0; i < nitems(map->uaddr_any); i++) { 2685 uvm_addr_destroy(map->uaddr_any[i]); 2686 map->uaddr_any[i] = NULL; 2687 } 2688 uvm_addr_destroy(map->uaddr_brk_stack); 2689 map->uaddr_brk_stack = NULL; 2690 2691 /* 2692 * Remove entries. 2693 * 2694 * The following is based on graph breadth-first search. 2695 * 2696 * In color terms: 2697 * - the dead_entries set contains all nodes that are reachable 2698 * (i.e. both the black and the grey nodes) 2699 * - any entry not in dead_entries is white 2700 * - any entry that appears in dead_entries before entry, 2701 * is black, the rest is grey. 2702 * The set [entry, end] is also referred to as the wavefront. 2703 * 2704 * Since the tree is always a fully connected graph, the breadth-first 2705 * search guarantees that each vmmap_entry is visited exactly once. 2706 * The vm_map is broken down in linear time. 2707 */ 2708 TAILQ_INIT(&dead_entries); 2709 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL) 2710 DEAD_ENTRY_PUSH(&dead_entries, entry); 2711 while (entry != NULL) { 2712 sched_pause(yield); 2713 uvm_unmap_kill_entry(map, entry); 2714 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL) 2715 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2716 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 2717 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2718 /* Update wave-front. */ 2719 entry = TAILQ_NEXT(entry, dfree.deadq); 2720 } 2721 2722 #ifdef VMMAP_DEBUG 2723 numt = numq = 0; 2724 RBT_FOREACH(entry, uvm_map_addr, &map->addr) 2725 numt++; 2726 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2727 numq++; 2728 KASSERT(numt == numq); 2729 #endif 2730 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); 2731 2732 KERNEL_LOCK(); 2733 2734 pmap_destroy(map->pmap); 2735 map->pmap = NULL; 2736 } 2737 2738 /* 2739 * Populate map with free-memory entries. 2740 * 2741 * Map must be initialized and empty. 2742 */ 2743 void 2744 uvm_map_setup_entries(struct vm_map *map) 2745 { 2746 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 2747 2748 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2749 } 2750 2751 /* 2752 * Split entry at given address. 2753 * 2754 * orig: entry that is to be split. 2755 * next: a newly allocated map entry that is not linked. 2756 * split: address at which the split is done. 2757 */ 2758 void 2759 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2760 struct vm_map_entry *next, vaddr_t split) 2761 { 2762 struct uvm_addr_state *free, *free_before; 2763 vsize_t adj; 2764 2765 if ((split & PAGE_MASK) != 0) { 2766 panic("uvm_map_splitentry: split address 0x%lx " 2767 "not on page boundary!", split); 2768 } 2769 KDASSERT(map != NULL && orig != NULL && next != NULL); 2770 uvm_tree_sanity(map, __FILE__, __LINE__); 2771 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2772 2773 #ifdef VMMAP_DEBUG 2774 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig); 2775 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next); 2776 #endif /* VMMAP_DEBUG */ 2777 2778 /* 2779 * Free space will change, unlink from free space tree. 2780 */ 2781 free = uvm_map_uaddr_e(map, orig); 2782 uvm_mapent_free_remove(map, free, orig); 2783 2784 adj = split - orig->start; 2785 2786 uvm_mapent_copy(orig, next); 2787 if (split >= orig->end) { 2788 next->etype = 0; 2789 next->offset = 0; 2790 next->wired_count = 0; 2791 next->start = next->end = split; 2792 next->guard = 0; 2793 next->fspace = VMMAP_FREE_END(orig) - split; 2794 next->aref.ar_amap = NULL; 2795 next->aref.ar_pageoff = 0; 2796 orig->guard = MIN(orig->guard, split - orig->end); 2797 orig->fspace = split - VMMAP_FREE_START(orig); 2798 } else { 2799 orig->fspace = 0; 2800 orig->guard = 0; 2801 orig->end = next->start = split; 2802 2803 if (next->aref.ar_amap) { 2804 KERNEL_LOCK(); 2805 amap_splitref(&orig->aref, &next->aref, adj); 2806 KERNEL_UNLOCK(); 2807 } 2808 if (UVM_ET_ISSUBMAP(orig)) { 2809 uvm_map_reference(next->object.sub_map); 2810 next->offset += adj; 2811 } else if (UVM_ET_ISOBJ(orig)) { 2812 if (next->object.uvm_obj->pgops && 2813 next->object.uvm_obj->pgops->pgo_reference) { 2814 KERNEL_LOCK(); 2815 next->object.uvm_obj->pgops->pgo_reference( 2816 next->object.uvm_obj); 2817 KERNEL_UNLOCK(); 2818 } 2819 next->offset += adj; 2820 } 2821 } 2822 2823 /* 2824 * Link next into address tree. 2825 * Link orig and next into free-space tree. 2826 * 2827 * Don't insert 'next' into the addr tree until orig has been linked, 2828 * in case the free-list looks at adjecent entries in the addr tree 2829 * for its decisions. 2830 */ 2831 if (orig->fspace > 0) 2832 free_before = free; 2833 else 2834 free_before = uvm_map_uaddr_e(map, orig); 2835 uvm_mapent_free_insert(map, free_before, orig); 2836 uvm_mapent_addr_insert(map, next); 2837 uvm_mapent_free_insert(map, free, next); 2838 2839 uvm_tree_sanity(map, __FILE__, __LINE__); 2840 } 2841 2842 2843 #ifdef VMMAP_DEBUG 2844 2845 void 2846 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2847 char *file, int line) 2848 { 2849 char* map_special; 2850 2851 if (test) 2852 return; 2853 2854 if (map == kernel_map) 2855 map_special = " (kernel_map)"; 2856 else if (map == kmem_map) 2857 map_special = " (kmem_map)"; 2858 else 2859 map_special = ""; 2860 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2861 line, test_str); 2862 } 2863 2864 /* 2865 * Check that map is sane. 2866 */ 2867 void 2868 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2869 { 2870 struct vm_map_entry *iter; 2871 vaddr_t addr; 2872 vaddr_t min, max, bound; /* Bounds checker. */ 2873 struct uvm_addr_state *free; 2874 2875 addr = vm_map_min(map); 2876 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2877 /* 2878 * Valid start, end. 2879 * Catch overflow for end+fspace. 2880 */ 2881 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2882 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2883 2884 /* May not be empty. */ 2885 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2886 file, line); 2887 2888 /* Addresses for entry must lie within map boundaries. */ 2889 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2890 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2891 2892 /* Tree may not have gaps. */ 2893 UVM_ASSERT(map, iter->start == addr, file, line); 2894 addr = VMMAP_FREE_END(iter); 2895 2896 /* 2897 * Free space may not cross boundaries, unless the same 2898 * free list is used on both sides of the border. 2899 */ 2900 min = VMMAP_FREE_START(iter); 2901 max = VMMAP_FREE_END(iter); 2902 2903 while (min < max && 2904 (bound = uvm_map_boundary(map, min, max)) != max) { 2905 UVM_ASSERT(map, 2906 uvm_map_uaddr(map, bound - 1) == 2907 uvm_map_uaddr(map, bound), 2908 file, line); 2909 min = bound; 2910 } 2911 2912 free = uvm_map_uaddr_e(map, iter); 2913 if (free) { 2914 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2915 file, line); 2916 } else { 2917 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2918 file, line); 2919 } 2920 } 2921 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2922 } 2923 2924 void 2925 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2926 { 2927 struct vm_map_entry *iter; 2928 vsize_t size; 2929 2930 size = 0; 2931 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2932 if (!UVM_ET_ISHOLE(iter)) 2933 size += iter->end - iter->start; 2934 } 2935 2936 if (map->size != size) 2937 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2938 UVM_ASSERT(map, map->size == size, file, line); 2939 2940 vmspace_validate(map); 2941 } 2942 2943 /* 2944 * This function validates the statistics on vmspace. 2945 */ 2946 void 2947 vmspace_validate(struct vm_map *map) 2948 { 2949 struct vmspace *vm; 2950 struct vm_map_entry *iter; 2951 vaddr_t imin, imax; 2952 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2953 vsize_t stack, heap; /* Measured sizes. */ 2954 2955 if (!(map->flags & VM_MAP_ISVMSPACE)) 2956 return; 2957 2958 vm = (struct vmspace *)map; 2959 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2960 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2961 2962 stack = heap = 0; 2963 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2964 imin = imax = iter->start; 2965 2966 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL) 2967 continue; 2968 2969 /* 2970 * Update stack, heap. 2971 * Keep in mind that (theoretically) the entries of 2972 * userspace and stack may be joined. 2973 */ 2974 while (imin != iter->end) { 2975 /* 2976 * Set imax to the first boundary crossed between 2977 * imin and stack addresses. 2978 */ 2979 imax = iter->end; 2980 if (imin < stack_begin && imax > stack_begin) 2981 imax = stack_begin; 2982 else if (imin < stack_end && imax > stack_end) 2983 imax = stack_end; 2984 2985 if (imin >= stack_begin && imin < stack_end) 2986 stack += imax - imin; 2987 else 2988 heap += imax - imin; 2989 imin = imax; 2990 } 2991 } 2992 2993 heap >>= PAGE_SHIFT; 2994 if (heap != vm->vm_dused) { 2995 printf("vmspace stack range: 0x%lx-0x%lx\n", 2996 stack_begin, stack_end); 2997 panic("vmspace_validate: vmspace.vm_dused invalid, " 2998 "expected %ld pgs, got %ld pgs in map %p", 2999 heap, vm->vm_dused, 3000 map); 3001 } 3002 } 3003 3004 #endif /* VMMAP_DEBUG */ 3005 3006 /* 3007 * uvm_map_init: init mapping system at boot time. note that we allocate 3008 * and init the static pool of structs vm_map_entry for the kernel here. 3009 */ 3010 void 3011 uvm_map_init(void) 3012 { 3013 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 3014 int lcv; 3015 3016 /* now set up static pool of kernel map entries ... */ 3017 mtx_init(&uvm_kmapent_mtx, IPL_VM); 3018 SLIST_INIT(&uvm.kentry_free); 3019 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 3020 SLIST_INSERT_HEAD(&uvm.kentry_free, 3021 &kernel_map_entry[lcv], daddrs.addr_kentry); 3022 } 3023 3024 /* initialize the map-related pools. */ 3025 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0, 3026 IPL_NONE, PR_WAITOK, "vmsppl", NULL); 3027 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 3028 IPL_VM, PR_WAITOK, "vmmpepl", NULL); 3029 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0, 3030 IPL_VM, 0, "vmmpekpl", NULL); 3031 pool_sethiwat(&uvm_map_entry_pool, 8192); 3032 3033 uvm_addr_init(); 3034 } 3035 3036 #if defined(DDB) 3037 3038 /* 3039 * DDB hooks 3040 */ 3041 3042 /* 3043 * uvm_map_printit: actually prints the map 3044 */ 3045 void 3046 uvm_map_printit(struct vm_map *map, boolean_t full, 3047 int (*pr)(const char *, ...)) 3048 { 3049 struct vmspace *vm; 3050 struct vm_map_entry *entry; 3051 struct uvm_addr_state *free; 3052 int in_free, i; 3053 char buf[8]; 3054 3055 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 3056 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 3057 map->b_start, map->b_end); 3058 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 3059 map->s_start, map->s_end); 3060 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 3061 map->size, map->ref_count, map->timestamp, 3062 map->flags); 3063 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 3064 pmap_resident_count(map->pmap)); 3065 3066 /* struct vmspace handling. */ 3067 if (map->flags & VM_MAP_ISVMSPACE) { 3068 vm = (struct vmspace *)map; 3069 3070 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 3071 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 3072 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 3073 vm->vm_tsize, vm->vm_dsize); 3074 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 3075 vm->vm_taddr, vm->vm_daddr); 3076 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 3077 vm->vm_maxsaddr, vm->vm_minsaddr); 3078 } 3079 3080 if (!full) 3081 goto print_uaddr; 3082 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 3083 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 3084 entry, entry->start, entry->end, entry->object.uvm_obj, 3085 (long long)entry->offset, entry->aref.ar_amap, 3086 entry->aref.ar_pageoff); 3087 (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, prot(max)=%d/%d, inh=%d, " 3088 "wc=%d, adv=%d\n", 3089 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 3090 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 3091 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 3092 (entry->etype & UVM_ET_STACK) ? 'T' : 'F', 3093 entry->protection, entry->max_protection, 3094 entry->inheritance, entry->wired_count, entry->advice); 3095 3096 free = uvm_map_uaddr_e(map, entry); 3097 in_free = (free != NULL); 3098 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 3099 "free=0x%lx-0x%lx\n", 3100 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 3101 in_free ? 'T' : 'F', 3102 entry->guard, 3103 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 3104 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 3105 (*pr)("\tfreemapped=%c, uaddr=%p\n", 3106 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 3107 if (free) { 3108 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 3109 free->uaddr_minaddr, free->uaddr_maxaddr, 3110 free->uaddr_functions->uaddr_name); 3111 } 3112 } 3113 3114 print_uaddr: 3115 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 3116 for (i = 0; i < nitems(map->uaddr_any); i++) { 3117 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 3118 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 3119 } 3120 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 3121 } 3122 3123 /* 3124 * uvm_object_printit: actually prints the object 3125 */ 3126 void 3127 uvm_object_printit(uobj, full, pr) 3128 struct uvm_object *uobj; 3129 boolean_t full; 3130 int (*pr)(const char *, ...); 3131 { 3132 struct vm_page *pg; 3133 int cnt = 0; 3134 3135 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 3136 uobj, uobj->pgops, uobj->uo_npages); 3137 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 3138 (*pr)("refs=<SYSTEM>\n"); 3139 else 3140 (*pr)("refs=%d\n", uobj->uo_refs); 3141 3142 if (!full) { 3143 return; 3144 } 3145 (*pr)(" PAGES <pg,offset>:\n "); 3146 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) { 3147 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 3148 if ((cnt % 3) == 2) { 3149 (*pr)("\n "); 3150 } 3151 cnt++; 3152 } 3153 if ((cnt % 3) != 2) { 3154 (*pr)("\n"); 3155 } 3156 } 3157 3158 /* 3159 * uvm_page_printit: actually print the page 3160 */ 3161 static const char page_flagbits[] = 3162 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 3163 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ" 3164 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5"; 3165 3166 void 3167 uvm_page_printit(pg, full, pr) 3168 struct vm_page *pg; 3169 boolean_t full; 3170 int (*pr)(const char *, ...); 3171 { 3172 struct vm_page *tpg; 3173 struct uvm_object *uobj; 3174 struct pglist *pgl; 3175 3176 (*pr)("PAGE %p:\n", pg); 3177 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 3178 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 3179 (long long)pg->phys_addr); 3180 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 3181 pg->uobject, pg->uanon, (long long)pg->offset); 3182 #if defined(UVM_PAGE_TRKOWN) 3183 if (pg->pg_flags & PG_BUSY) 3184 (*pr)(" owning thread = %d, tag=%s", 3185 pg->owner, pg->owner_tag); 3186 else 3187 (*pr)(" page not busy, no owner"); 3188 #else 3189 (*pr)(" [page ownership tracking disabled]"); 3190 #endif 3191 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 3192 3193 if (!full) 3194 return; 3195 3196 /* cross-verify object/anon */ 3197 if ((pg->pg_flags & PQ_FREE) == 0) { 3198 if (pg->pg_flags & PQ_ANON) { 3199 if (pg->uanon == NULL || pg->uanon->an_page != pg) 3200 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 3201 (pg->uanon) ? pg->uanon->an_page : NULL); 3202 else 3203 (*pr)(" anon backpointer is OK\n"); 3204 } else { 3205 uobj = pg->uobject; 3206 if (uobj) { 3207 (*pr)(" checking object list\n"); 3208 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) { 3209 if (tpg == pg) { 3210 break; 3211 } 3212 } 3213 if (tpg) 3214 (*pr)(" page found on object list\n"); 3215 else 3216 (*pr)(" >>> PAGE NOT FOUND " 3217 "ON OBJECT LIST! <<<\n"); 3218 } 3219 } 3220 } 3221 3222 /* cross-verify page queue */ 3223 if (pg->pg_flags & PQ_FREE) { 3224 if (uvm_pmr_isfree(pg)) 3225 (*pr)(" page found in uvm_pmemrange\n"); 3226 else 3227 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 3228 pgl = NULL; 3229 } else if (pg->pg_flags & PQ_INACTIVE) { 3230 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 3231 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 3232 } else if (pg->pg_flags & PQ_ACTIVE) { 3233 pgl = &uvm.page_active; 3234 } else { 3235 pgl = NULL; 3236 } 3237 3238 if (pgl) { 3239 (*pr)(" checking pageq list\n"); 3240 TAILQ_FOREACH(tpg, pgl, pageq) { 3241 if (tpg == pg) { 3242 break; 3243 } 3244 } 3245 if (tpg) 3246 (*pr)(" page found on pageq list\n"); 3247 else 3248 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3249 } 3250 } 3251 #endif 3252 3253 /* 3254 * uvm_map_protect: change map protection 3255 * 3256 * => set_max means set max_protection. 3257 * => map must be unlocked. 3258 */ 3259 int 3260 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3261 vm_prot_t new_prot, boolean_t set_max) 3262 { 3263 struct vm_map_entry *first, *iter; 3264 vm_prot_t old_prot; 3265 vm_prot_t mask; 3266 int error; 3267 3268 if (start > end) 3269 return EINVAL; 3270 start = MAX(start, map->min_offset); 3271 end = MIN(end, map->max_offset); 3272 if (start >= end) 3273 return 0; 3274 3275 error = 0; 3276 vm_map_lock(map); 3277 3278 /* 3279 * Set up first and last. 3280 * - first will contain first entry at or after start. 3281 */ 3282 first = uvm_map_entrybyaddr(&map->addr, start); 3283 KDASSERT(first != NULL); 3284 if (first->end <= start) 3285 first = RBT_NEXT(uvm_map_addr, first); 3286 3287 /* First, check for protection violations. */ 3288 for (iter = first; iter != NULL && iter->start < end; 3289 iter = RBT_NEXT(uvm_map_addr, iter)) { 3290 /* Treat memory holes as free space. */ 3291 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3292 continue; 3293 3294 if (UVM_ET_ISSUBMAP(iter)) { 3295 error = EINVAL; 3296 goto out; 3297 } 3298 if ((new_prot & iter->max_protection) != new_prot) { 3299 error = EACCES; 3300 goto out; 3301 } 3302 if (map == kernel_map && 3303 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 3304 panic("uvm_map_protect: kernel map W^X violation requested"); 3305 } 3306 3307 /* Fix protections. */ 3308 for (iter = first; iter != NULL && iter->start < end; 3309 iter = RBT_NEXT(uvm_map_addr, iter)) { 3310 /* Treat memory holes as free space. */ 3311 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3312 continue; 3313 3314 old_prot = iter->protection; 3315 3316 /* 3317 * Skip adapting protection iff old and new protection 3318 * are equal. 3319 */ 3320 if (set_max) { 3321 if (old_prot == (new_prot & old_prot) && 3322 iter->max_protection == new_prot) 3323 continue; 3324 } else { 3325 if (old_prot == new_prot) 3326 continue; 3327 } 3328 3329 UVM_MAP_CLIP_START(map, iter, start); 3330 UVM_MAP_CLIP_END(map, iter, end); 3331 3332 if (set_max) { 3333 iter->max_protection = new_prot; 3334 iter->protection &= new_prot; 3335 } else 3336 iter->protection = new_prot; 3337 3338 /* 3339 * update physical map if necessary. worry about copy-on-write 3340 * here -- CHECK THIS XXX 3341 */ 3342 if (iter->protection != old_prot) { 3343 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3344 ~PROT_WRITE : PROT_MASK; 3345 3346 /* XXX should only wserial++ if no split occurs */ 3347 if (iter->protection & PROT_WRITE) 3348 map->wserial++; 3349 3350 /* update pmap */ 3351 if ((iter->protection & mask) == PROT_NONE && 3352 VM_MAPENT_ISWIRED(iter)) { 3353 /* 3354 * TODO(ariane) this is stupid. wired_count 3355 * is 0 if not wired, otherwise anything 3356 * larger than 0 (incremented once each time 3357 * wire is called). 3358 * Mostly to be able to undo the damage on 3359 * failure. Not the actually be a wired 3360 * refcounter... 3361 * Originally: iter->wired_count--; 3362 * (don't we have to unwire this in the pmap 3363 * as well?) 3364 */ 3365 iter->wired_count = 0; 3366 } 3367 pmap_protect(map->pmap, iter->start, iter->end, 3368 iter->protection & mask); 3369 } 3370 3371 /* 3372 * If the map is configured to lock any future mappings, 3373 * wire this entry now if the old protection was PROT_NONE 3374 * and the new protection is not PROT_NONE. 3375 */ 3376 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3377 VM_MAPENT_ISWIRED(iter) == 0 && 3378 old_prot == PROT_NONE && 3379 new_prot != PROT_NONE) { 3380 if (uvm_map_pageable(map, iter->start, iter->end, 3381 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3382 /* 3383 * If locking the entry fails, remember the 3384 * error if it's the first one. Note we 3385 * still continue setting the protection in 3386 * the map, but it will return the resource 3387 * storage condition regardless. 3388 * 3389 * XXX Ignore what the actual error is, 3390 * XXX just call it a resource shortage 3391 * XXX so that it doesn't get confused 3392 * XXX what uvm_map_protect() itself would 3393 * XXX normally return. 3394 */ 3395 error = ENOMEM; 3396 } 3397 } 3398 } 3399 pmap_update(map->pmap); 3400 3401 out: 3402 vm_map_unlock(map); 3403 return error; 3404 } 3405 3406 /* 3407 * uvmspace_alloc: allocate a vmspace structure. 3408 * 3409 * - structure includes vm_map and pmap 3410 * - XXX: no locking on this structure 3411 * - refcnt set to 1, rest must be init'd by caller 3412 */ 3413 struct vmspace * 3414 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3415 boolean_t remove_holes) 3416 { 3417 struct vmspace *vm; 3418 3419 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3420 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3421 return (vm); 3422 } 3423 3424 /* 3425 * uvmspace_init: initialize a vmspace structure. 3426 * 3427 * - XXX: no locking on this structure 3428 * - refcnt set to 1, rest must be init'd by caller 3429 */ 3430 void 3431 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3432 boolean_t pageable, boolean_t remove_holes) 3433 { 3434 KASSERT(pmap == NULL || pmap == pmap_kernel()); 3435 3436 if (pmap) 3437 pmap_reference(pmap); 3438 else 3439 pmap = pmap_create(); 3440 vm->vm_map.pmap = pmap; 3441 3442 uvm_map_setup(&vm->vm_map, min, max, 3443 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3444 3445 vm->vm_refcnt = 1; 3446 3447 if (remove_holes) 3448 pmap_remove_holes(vm); 3449 } 3450 3451 /* 3452 * uvmspace_share: share a vmspace between two processes 3453 * 3454 * - XXX: no locking on vmspace 3455 * - used for vfork 3456 */ 3457 3458 struct vmspace * 3459 uvmspace_share(struct process *pr) 3460 { 3461 struct vmspace *vm = pr->ps_vmspace; 3462 3463 vm->vm_refcnt++; 3464 return vm; 3465 } 3466 3467 /* 3468 * uvmspace_exec: the process wants to exec a new program 3469 * 3470 * - XXX: no locking on vmspace 3471 */ 3472 3473 void 3474 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3475 { 3476 struct process *pr = p->p_p; 3477 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3478 struct vm_map *map = &ovm->vm_map; 3479 struct uvm_map_deadq dead_entries; 3480 3481 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3482 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3483 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3484 3485 pmap_unuse_final(p); /* before stack addresses go away */ 3486 TAILQ_INIT(&dead_entries); 3487 3488 /* see if more than one process is using this vmspace... */ 3489 if (ovm->vm_refcnt == 1) { 3490 /* 3491 * If pr is the only process using its vmspace then 3492 * we can safely recycle that vmspace for the program 3493 * that is being exec'd. 3494 */ 3495 3496 #ifdef SYSVSHM 3497 /* 3498 * SYSV SHM semantics require us to kill all segments on an exec 3499 */ 3500 if (ovm->vm_shm) 3501 shmexit(ovm); 3502 #endif 3503 3504 /* 3505 * POSIX 1003.1b -- "lock future mappings" is revoked 3506 * when a process execs another program image. 3507 */ 3508 vm_map_lock(map); 3509 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3510 3511 /* 3512 * now unmap the old program 3513 * 3514 * Instead of attempting to keep the map valid, we simply 3515 * nuke all entries and ask uvm_map_setup to reinitialize 3516 * the map to the new boundaries. 3517 * 3518 * uvm_unmap_remove will actually nuke all entries for us 3519 * (as in, not replace them with free-memory entries). 3520 */ 3521 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3522 &dead_entries, TRUE, FALSE); 3523 3524 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 3525 3526 /* Nuke statistics and boundaries. */ 3527 memset(&ovm->vm_startcopy, 0, 3528 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3529 3530 3531 if (end & (vaddr_t)PAGE_MASK) { 3532 end += 1; 3533 if (end == 0) /* overflow */ 3534 end -= PAGE_SIZE; 3535 } 3536 3537 /* Setup new boundaries and populate map with entries. */ 3538 map->min_offset = start; 3539 map->max_offset = end; 3540 uvm_map_setup_entries(map); 3541 vm_map_unlock(map); 3542 3543 /* but keep MMU holes unavailable */ 3544 pmap_remove_holes(ovm); 3545 } else { 3546 /* 3547 * pr's vmspace is being shared, so we can't reuse 3548 * it for pr since it is still being used for others. 3549 * allocate a new vmspace for pr 3550 */ 3551 nvm = uvmspace_alloc(start, end, 3552 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3553 3554 /* install new vmspace and drop our ref to the old one. */ 3555 pmap_deactivate(p); 3556 p->p_vmspace = pr->ps_vmspace = nvm; 3557 pmap_activate(p); 3558 3559 uvmspace_free(ovm); 3560 } 3561 3562 /* Release dead entries */ 3563 uvm_unmap_detach(&dead_entries, 0); 3564 } 3565 3566 /* 3567 * uvmspace_free: free a vmspace data structure 3568 * 3569 * - XXX: no locking on vmspace 3570 */ 3571 void 3572 uvmspace_free(struct vmspace *vm) 3573 { 3574 if (--vm->vm_refcnt == 0) { 3575 /* 3576 * lock the map, to wait out all other references to it. delete 3577 * all of the mappings and pages they hold, then call the pmap 3578 * module to reclaim anything left. 3579 */ 3580 #ifdef SYSVSHM 3581 /* Get rid of any SYSV shared memory segments. */ 3582 if (vm->vm_shm != NULL) 3583 shmexit(vm); 3584 #endif 3585 3586 uvm_map_teardown(&vm->vm_map); 3587 pool_put(&uvm_vmspace_pool, vm); 3588 } 3589 } 3590 3591 /* 3592 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in 3593 * srcmap to the address range [dstaddr, dstaddr + sz) in 3594 * dstmap. 3595 * 3596 * The whole address range in srcmap must be backed by an object 3597 * (no holes). 3598 * 3599 * If successful, the address ranges share memory and the destination 3600 * address range uses the protection flags in prot. 3601 * 3602 * This routine assumes that sz is a multiple of PAGE_SIZE and 3603 * that dstaddr and srcaddr are page-aligned. 3604 */ 3605 int 3606 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot, 3607 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz) 3608 { 3609 int ret = 0; 3610 vaddr_t unmap_end; 3611 vaddr_t dstva; 3612 vsize_t off, len, n = sz; 3613 struct vm_map_entry *first = NULL, *last = NULL; 3614 struct vm_map_entry *src_entry, *psrc_entry = NULL; 3615 struct uvm_map_deadq dead; 3616 3617 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr) 3618 return EINVAL; 3619 3620 TAILQ_INIT(&dead); 3621 vm_map_lock(dstmap); 3622 vm_map_lock_read(srcmap); 3623 3624 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) { 3625 ret = ENOMEM; 3626 goto exit_unlock; 3627 } 3628 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) { 3629 ret = EINVAL; 3630 goto exit_unlock; 3631 } 3632 3633 unmap_end = dstaddr; 3634 for (; src_entry != NULL; 3635 psrc_entry = src_entry, 3636 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) { 3637 /* hole in address space, bail out */ 3638 if (psrc_entry != NULL && psrc_entry->end != src_entry->start) 3639 break; 3640 if (src_entry->start >= srcaddr + sz) 3641 break; 3642 3643 if (UVM_ET_ISSUBMAP(src_entry)) 3644 panic("uvm_share: encountered a submap (illegal)"); 3645 if (!UVM_ET_ISCOPYONWRITE(src_entry) && 3646 UVM_ET_ISNEEDSCOPY(src_entry)) 3647 panic("uvm_share: non-copy_on_write map entries " 3648 "marked needs_copy (illegal)"); 3649 3650 dstva = dstaddr; 3651 if (src_entry->start > srcaddr) { 3652 dstva += src_entry->start - srcaddr; 3653 off = 0; 3654 } else 3655 off = srcaddr - src_entry->start; 3656 3657 if (n < src_entry->end - src_entry->start) 3658 len = n; 3659 else 3660 len = src_entry->end - src_entry->start; 3661 n -= len; 3662 3663 if (uvm_mapent_share(dstmap, dstva, len, off, prot, prot, 3664 srcmap, src_entry, &dead) == NULL) 3665 break; 3666 3667 unmap_end = dstva + len; 3668 if (n == 0) 3669 goto exit_unlock; 3670 } 3671 3672 ret = EINVAL; 3673 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE); 3674 3675 exit_unlock: 3676 vm_map_unlock_read(srcmap); 3677 vm_map_unlock(dstmap); 3678 uvm_unmap_detach(&dead, 0); 3679 3680 return ret; 3681 } 3682 3683 /* 3684 * Clone map entry into other map. 3685 * 3686 * Mapping will be placed at dstaddr, for the same length. 3687 * Space must be available. 3688 * Reference counters are incremented. 3689 */ 3690 struct vm_map_entry * 3691 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3692 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, 3693 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3694 int mapent_flags, int amap_share_flags) 3695 { 3696 struct vm_map_entry *new_entry, *first, *last; 3697 3698 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3699 3700 /* Create new entry (linked in on creation). Fill in first, last. */ 3701 first = last = NULL; 3702 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3703 panic("uvmspace_fork: no space in map for " 3704 "entry in empty map"); 3705 } 3706 new_entry = uvm_map_mkentry(dstmap, first, last, 3707 dstaddr, dstlen, mapent_flags, dead, NULL); 3708 if (new_entry == NULL) 3709 return NULL; 3710 /* old_entry -> new_entry */ 3711 new_entry->object = old_entry->object; 3712 new_entry->offset = old_entry->offset; 3713 new_entry->aref = old_entry->aref; 3714 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3715 new_entry->protection = prot; 3716 new_entry->max_protection = maxprot; 3717 new_entry->inheritance = old_entry->inheritance; 3718 new_entry->advice = old_entry->advice; 3719 3720 /* gain reference to object backing the map (can't be a submap). */ 3721 if (new_entry->aref.ar_amap) { 3722 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3723 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3724 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3725 amap_share_flags); 3726 } 3727 3728 if (UVM_ET_ISOBJ(new_entry) && 3729 new_entry->object.uvm_obj->pgops->pgo_reference) { 3730 new_entry->offset += off; 3731 new_entry->object.uvm_obj->pgops->pgo_reference 3732 (new_entry->object.uvm_obj); 3733 } 3734 3735 return new_entry; 3736 } 3737 3738 struct vm_map_entry * 3739 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3740 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map, 3741 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3742 { 3743 /* 3744 * If old_entry refers to a copy-on-write region that has not yet been 3745 * written to (needs_copy flag is set), then we need to allocate a new 3746 * amap for old_entry. 3747 * 3748 * If we do not do this, and the process owning old_entry does a copy-on 3749 * write later, old_entry and new_entry will refer to different memory 3750 * regions, and the memory between the processes is no longer shared. 3751 * 3752 * [in other words, we need to clear needs_copy] 3753 */ 3754 3755 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3756 /* get our own amap, clears needs_copy */ 3757 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 3758 0, 0); 3759 /* XXXCDC: WAITOK??? */ 3760 } 3761 3762 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off, 3763 prot, maxprot, old_entry, dead, 0, AMAP_SHARED); 3764 } 3765 3766 /* 3767 * share the mapping: this means we want the old and 3768 * new entries to share amaps and backing objects. 3769 */ 3770 struct vm_map_entry * 3771 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3772 struct vm_map *old_map, 3773 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3774 { 3775 struct vm_map_entry *new_entry; 3776 3777 new_entry = uvm_mapent_share(new_map, old_entry->start, 3778 old_entry->end - old_entry->start, 0, old_entry->protection, 3779 old_entry->max_protection, old_map, old_entry, dead); 3780 3781 /* 3782 * pmap_copy the mappings: this routine is optional 3783 * but if it is there it will reduce the number of 3784 * page faults in the new proc. 3785 */ 3786 if (!UVM_ET_ISHOLE(new_entry)) 3787 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, 3788 (new_entry->end - new_entry->start), new_entry->start); 3789 3790 return (new_entry); 3791 } 3792 3793 /* 3794 * copy-on-write the mapping (using mmap's 3795 * MAP_PRIVATE semantics) 3796 * 3797 * allocate new_entry, adjust reference counts. 3798 * (note that new references are read-only). 3799 */ 3800 struct vm_map_entry * 3801 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3802 struct vm_map *old_map, 3803 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3804 { 3805 struct vm_map_entry *new_entry; 3806 boolean_t protect_child; 3807 3808 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3809 old_entry->end - old_entry->start, 0, old_entry->protection, 3810 old_entry->max_protection, old_entry, dead, 0, 0); 3811 3812 new_entry->etype |= 3813 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3814 3815 /* 3816 * the new entry will need an amap. it will either 3817 * need to be copied from the old entry or created 3818 * from scratch (if the old entry does not have an 3819 * amap). can we defer this process until later 3820 * (by setting "needs_copy") or do we need to copy 3821 * the amap now? 3822 * 3823 * we must copy the amap now if any of the following 3824 * conditions hold: 3825 * 1. the old entry has an amap and that amap is 3826 * being shared. this means that the old (parent) 3827 * process is sharing the amap with another 3828 * process. if we do not clear needs_copy here 3829 * we will end up in a situation where both the 3830 * parent and child process are referring to the 3831 * same amap with "needs_copy" set. if the 3832 * parent write-faults, the fault routine will 3833 * clear "needs_copy" in the parent by allocating 3834 * a new amap. this is wrong because the 3835 * parent is supposed to be sharing the old amap 3836 * and the new amap will break that. 3837 * 3838 * 2. if the old entry has an amap and a non-zero 3839 * wire count then we are going to have to call 3840 * amap_cow_now to avoid page faults in the 3841 * parent process. since amap_cow_now requires 3842 * "needs_copy" to be clear we might as well 3843 * clear it here as well. 3844 * 3845 */ 3846 if (old_entry->aref.ar_amap != NULL && 3847 ((amap_flags(old_entry->aref.ar_amap) & 3848 AMAP_SHARED) != 0 || 3849 VM_MAPENT_ISWIRED(old_entry))) { 3850 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3851 0, 0); 3852 /* XXXCDC: M_WAITOK ... ok? */ 3853 } 3854 3855 /* 3856 * if the parent's entry is wired down, then the 3857 * parent process does not want page faults on 3858 * access to that memory. this means that we 3859 * cannot do copy-on-write because we can't write 3860 * protect the old entry. in this case we 3861 * resolve all copy-on-write faults now, using 3862 * amap_cow_now. note that we have already 3863 * allocated any needed amap (above). 3864 */ 3865 if (VM_MAPENT_ISWIRED(old_entry)) { 3866 /* 3867 * resolve all copy-on-write faults now 3868 * (note that there is nothing to do if 3869 * the old mapping does not have an amap). 3870 * XXX: is it worthwhile to bother with 3871 * pmap_copy in this case? 3872 */ 3873 if (old_entry->aref.ar_amap) 3874 amap_cow_now(new_map, new_entry); 3875 } else { 3876 if (old_entry->aref.ar_amap) { 3877 /* 3878 * setup mappings to trigger copy-on-write faults 3879 * we must write-protect the parent if it has 3880 * an amap and it is not already "needs_copy"... 3881 * if it is already "needs_copy" then the parent 3882 * has already been write-protected by a previous 3883 * fork operation. 3884 * 3885 * if we do not write-protect the parent, then 3886 * we must be sure to write-protect the child 3887 * after the pmap_copy() operation. 3888 * 3889 * XXX: pmap_copy should have some way of telling 3890 * us that it didn't do anything so we can avoid 3891 * calling pmap_protect needlessly. 3892 */ 3893 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3894 if (old_entry->max_protection & PROT_WRITE) { 3895 pmap_protect(old_map->pmap, 3896 old_entry->start, 3897 old_entry->end, 3898 old_entry->protection & 3899 ~PROT_WRITE); 3900 pmap_update(old_map->pmap); 3901 } 3902 old_entry->etype |= UVM_ET_NEEDSCOPY; 3903 } 3904 3905 /* parent must now be write-protected */ 3906 protect_child = FALSE; 3907 } else { 3908 /* 3909 * we only need to protect the child if the 3910 * parent has write access. 3911 */ 3912 if (old_entry->max_protection & PROT_WRITE) 3913 protect_child = TRUE; 3914 else 3915 protect_child = FALSE; 3916 } 3917 /* 3918 * copy the mappings 3919 * XXX: need a way to tell if this does anything 3920 */ 3921 if (!UVM_ET_ISHOLE(new_entry)) 3922 pmap_copy(new_map->pmap, old_map->pmap, 3923 new_entry->start, 3924 (old_entry->end - old_entry->start), 3925 old_entry->start); 3926 3927 /* protect the child's mappings if necessary */ 3928 if (protect_child) { 3929 pmap_protect(new_map->pmap, new_entry->start, 3930 new_entry->end, 3931 new_entry->protection & 3932 ~PROT_WRITE); 3933 } 3934 } 3935 3936 return (new_entry); 3937 } 3938 3939 /* 3940 * zero the mapping: the new entry will be zero initialized 3941 */ 3942 struct vm_map_entry * 3943 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 3944 struct vm_map *old_map, 3945 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3946 { 3947 struct vm_map_entry *new_entry; 3948 3949 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3950 old_entry->end - old_entry->start, 0, old_entry->protection, 3951 old_entry->max_protection, old_entry, dead, 0, 0); 3952 3953 new_entry->etype |= 3954 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3955 3956 if (new_entry->aref.ar_amap) { 3957 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3958 atop(new_entry->end - new_entry->start), 0); 3959 new_entry->aref.ar_amap = NULL; 3960 new_entry->aref.ar_pageoff = 0; 3961 } 3962 3963 if (UVM_ET_ISOBJ(new_entry)) { 3964 if (new_entry->object.uvm_obj->pgops->pgo_detach) 3965 new_entry->object.uvm_obj->pgops->pgo_detach( 3966 new_entry->object.uvm_obj); 3967 new_entry->object.uvm_obj = NULL; 3968 new_entry->etype &= ~UVM_ET_OBJ; 3969 } 3970 3971 return (new_entry); 3972 } 3973 3974 /* 3975 * uvmspace_fork: fork a process' main map 3976 * 3977 * => create a new vmspace for child process from parent. 3978 * => parent's map must not be locked. 3979 */ 3980 struct vmspace * 3981 uvmspace_fork(struct process *pr) 3982 { 3983 struct vmspace *vm1 = pr->ps_vmspace; 3984 struct vmspace *vm2; 3985 struct vm_map *old_map = &vm1->vm_map; 3986 struct vm_map *new_map; 3987 struct vm_map_entry *old_entry, *new_entry; 3988 struct uvm_map_deadq dead; 3989 3990 vm_map_lock(old_map); 3991 3992 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3993 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3994 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3995 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3996 vm2->vm_dused = 0; /* Statistic managed by us. */ 3997 new_map = &vm2->vm_map; 3998 vm_map_lock(new_map); 3999 4000 /* go entry-by-entry */ 4001 TAILQ_INIT(&dead); 4002 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 4003 if (old_entry->start == old_entry->end) 4004 continue; 4005 4006 /* first, some sanity checks on the old entry */ 4007 if (UVM_ET_ISSUBMAP(old_entry)) { 4008 panic("fork: encountered a submap during fork " 4009 "(illegal)"); 4010 } 4011 4012 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 4013 UVM_ET_ISNEEDSCOPY(old_entry)) { 4014 panic("fork: non-copy_on_write map entry marked " 4015 "needs_copy (illegal)"); 4016 } 4017 4018 /* Apply inheritance. */ 4019 switch (old_entry->inheritance) { 4020 case MAP_INHERIT_SHARE: 4021 new_entry = uvm_mapent_forkshared(vm2, new_map, 4022 old_map, old_entry, &dead); 4023 break; 4024 case MAP_INHERIT_COPY: 4025 new_entry = uvm_mapent_forkcopy(vm2, new_map, 4026 old_map, old_entry, &dead); 4027 break; 4028 case MAP_INHERIT_ZERO: 4029 new_entry = uvm_mapent_forkzero(vm2, new_map, 4030 old_map, old_entry, &dead); 4031 break; 4032 default: 4033 continue; 4034 } 4035 4036 /* Update process statistics. */ 4037 if (!UVM_ET_ISHOLE(new_entry)) 4038 new_map->size += new_entry->end - new_entry->start; 4039 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { 4040 vm2->vm_dused += uvmspace_dused( 4041 new_map, new_entry->start, new_entry->end); 4042 } 4043 } 4044 4045 vm_map_unlock(old_map); 4046 vm_map_unlock(new_map); 4047 4048 /* 4049 * This can actually happen, if multiple entries described a 4050 * space in which an entry was inherited. 4051 */ 4052 uvm_unmap_detach(&dead, 0); 4053 4054 #ifdef SYSVSHM 4055 if (vm1->vm_shm) 4056 shmfork(vm1, vm2); 4057 #endif 4058 4059 return vm2; 4060 } 4061 4062 /* 4063 * uvm_map_hint: return the beginning of the best area suitable for 4064 * creating a new mapping with "prot" protection. 4065 */ 4066 vaddr_t 4067 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr, 4068 vaddr_t maxaddr) 4069 { 4070 vaddr_t addr; 4071 vaddr_t spacing; 4072 4073 #ifdef __i386__ 4074 /* 4075 * If executable skip first two pages, otherwise start 4076 * after data + heap region. 4077 */ 4078 if ((prot & PROT_EXEC) != 0 && 4079 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 4080 addr = (PAGE_SIZE*2) + 4081 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 4082 return (round_page(addr)); 4083 } 4084 #endif 4085 4086 #if defined (__LP64__) 4087 spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4088 #else 4089 spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4090 #endif 4091 4092 /* 4093 * Start malloc/mmap after the brk. 4094 */ 4095 addr = (vaddr_t)vm->vm_daddr + BRKSIZ; 4096 addr = MAX(addr, minaddr); 4097 4098 if (addr < maxaddr) { 4099 while (spacing > maxaddr - addr) 4100 spacing >>= 1; 4101 } 4102 addr += arc4random() & spacing; 4103 return (round_page(addr)); 4104 } 4105 4106 /* 4107 * uvm_map_submap: punch down part of a map into a submap 4108 * 4109 * => only the kernel_map is allowed to be submapped 4110 * => the purpose of submapping is to break up the locking granularity 4111 * of a larger map 4112 * => the range specified must have been mapped previously with a uvm_map() 4113 * call [with uobj==NULL] to create a blank map entry in the main map. 4114 * [And it had better still be blank!] 4115 * => maps which contain submaps should never be copied or forked. 4116 * => to remove a submap, use uvm_unmap() on the main map 4117 * and then uvm_map_deallocate() the submap. 4118 * => main map must be unlocked. 4119 * => submap must have been init'd and have a zero reference count. 4120 * [need not be locked as we don't actually reference it] 4121 */ 4122 int 4123 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 4124 struct vm_map *submap) 4125 { 4126 struct vm_map_entry *entry; 4127 int result; 4128 4129 if (start > map->max_offset || end > map->max_offset || 4130 start < map->min_offset || end < map->min_offset) 4131 return EINVAL; 4132 4133 vm_map_lock(map); 4134 4135 if (uvm_map_lookup_entry(map, start, &entry)) { 4136 UVM_MAP_CLIP_START(map, entry, start); 4137 UVM_MAP_CLIP_END(map, entry, end); 4138 } else 4139 entry = NULL; 4140 4141 if (entry != NULL && 4142 entry->start == start && entry->end == end && 4143 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 4144 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 4145 entry->etype |= UVM_ET_SUBMAP; 4146 entry->object.sub_map = submap; 4147 entry->offset = 0; 4148 uvm_map_reference(submap); 4149 result = 0; 4150 } else 4151 result = EINVAL; 4152 4153 vm_map_unlock(map); 4154 return(result); 4155 } 4156 4157 /* 4158 * uvm_map_checkprot: check protection in map 4159 * 4160 * => must allow specific protection in a fully allocated region. 4161 * => map mut be read or write locked by caller. 4162 */ 4163 boolean_t 4164 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 4165 vm_prot_t protection) 4166 { 4167 struct vm_map_entry *entry; 4168 4169 if (start < map->min_offset || end > map->max_offset || start > end) 4170 return FALSE; 4171 if (start == end) 4172 return TRUE; 4173 4174 /* 4175 * Iterate entries. 4176 */ 4177 for (entry = uvm_map_entrybyaddr(&map->addr, start); 4178 entry != NULL && entry->start < end; 4179 entry = RBT_NEXT(uvm_map_addr, entry)) { 4180 /* Fail if a hole is found. */ 4181 if (UVM_ET_ISHOLE(entry) || 4182 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 4183 return FALSE; 4184 4185 /* Check protection. */ 4186 if ((entry->protection & protection) != protection) 4187 return FALSE; 4188 } 4189 return TRUE; 4190 } 4191 4192 /* 4193 * uvm_map_create: create map 4194 */ 4195 vm_map_t 4196 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 4197 { 4198 vm_map_t map; 4199 4200 map = malloc(sizeof *map, M_VMMAP, M_WAITOK); 4201 map->pmap = pmap; 4202 uvm_map_setup(map, min, max, flags); 4203 return (map); 4204 } 4205 4206 /* 4207 * uvm_map_deallocate: drop reference to a map 4208 * 4209 * => caller must not lock map 4210 * => we will zap map if ref count goes to zero 4211 */ 4212 void 4213 uvm_map_deallocate(vm_map_t map) 4214 { 4215 int c; 4216 struct uvm_map_deadq dead; 4217 4218 c = --map->ref_count; 4219 if (c > 0) { 4220 return; 4221 } 4222 4223 /* 4224 * all references gone. unmap and free. 4225 * 4226 * No lock required: we are only one to access this map. 4227 */ 4228 TAILQ_INIT(&dead); 4229 uvm_tree_sanity(map, __FILE__, __LINE__); 4230 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 4231 TRUE, FALSE); 4232 pmap_destroy(map->pmap); 4233 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 4234 free(map, M_VMMAP, sizeof *map); 4235 4236 uvm_unmap_detach(&dead, 0); 4237 } 4238 4239 /* 4240 * uvm_map_inherit: set inheritance code for range of addrs in map. 4241 * 4242 * => map must be unlocked 4243 * => note that the inherit code is used during a "fork". see fork 4244 * code for details. 4245 */ 4246 int 4247 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 4248 vm_inherit_t new_inheritance) 4249 { 4250 struct vm_map_entry *entry; 4251 4252 switch (new_inheritance) { 4253 case MAP_INHERIT_NONE: 4254 case MAP_INHERIT_COPY: 4255 case MAP_INHERIT_SHARE: 4256 case MAP_INHERIT_ZERO: 4257 break; 4258 default: 4259 return (EINVAL); 4260 } 4261 4262 if (start > end) 4263 return EINVAL; 4264 start = MAX(start, map->min_offset); 4265 end = MIN(end, map->max_offset); 4266 if (start >= end) 4267 return 0; 4268 4269 vm_map_lock(map); 4270 4271 entry = uvm_map_entrybyaddr(&map->addr, start); 4272 if (entry->end > start) 4273 UVM_MAP_CLIP_START(map, entry, start); 4274 else 4275 entry = RBT_NEXT(uvm_map_addr, entry); 4276 4277 while (entry != NULL && entry->start < end) { 4278 UVM_MAP_CLIP_END(map, entry, end); 4279 entry->inheritance = new_inheritance; 4280 entry = RBT_NEXT(uvm_map_addr, entry); 4281 } 4282 4283 vm_map_unlock(map); 4284 return (0); 4285 } 4286 4287 /* 4288 * uvm_map_advice: set advice code for range of addrs in map. 4289 * 4290 * => map must be unlocked 4291 */ 4292 int 4293 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 4294 { 4295 struct vm_map_entry *entry; 4296 4297 switch (new_advice) { 4298 case MADV_NORMAL: 4299 case MADV_RANDOM: 4300 case MADV_SEQUENTIAL: 4301 break; 4302 default: 4303 return (EINVAL); 4304 } 4305 4306 if (start > end) 4307 return EINVAL; 4308 start = MAX(start, map->min_offset); 4309 end = MIN(end, map->max_offset); 4310 if (start >= end) 4311 return 0; 4312 4313 vm_map_lock(map); 4314 4315 entry = uvm_map_entrybyaddr(&map->addr, start); 4316 if (entry != NULL && entry->end > start) 4317 UVM_MAP_CLIP_START(map, entry, start); 4318 else if (entry!= NULL) 4319 entry = RBT_NEXT(uvm_map_addr, entry); 4320 4321 /* 4322 * XXXJRT: disallow holes? 4323 */ 4324 while (entry != NULL && entry->start < end) { 4325 UVM_MAP_CLIP_END(map, entry, end); 4326 entry->advice = new_advice; 4327 entry = RBT_NEXT(uvm_map_addr, entry); 4328 } 4329 4330 vm_map_unlock(map); 4331 return (0); 4332 } 4333 4334 /* 4335 * uvm_map_extract: extract a mapping from a map and put it somewhere 4336 * in the kernel_map, setting protection to max_prot. 4337 * 4338 * => map should be unlocked (we will write lock it and kernel_map) 4339 * => returns 0 on success, error code otherwise 4340 * => start must be page aligned 4341 * => len must be page sized 4342 * => flags: 4343 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 4344 * Mappings are QREF's. 4345 */ 4346 int 4347 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 4348 vaddr_t *dstaddrp, int flags) 4349 { 4350 struct uvm_map_deadq dead; 4351 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 4352 vaddr_t dstaddr; 4353 vaddr_t end; 4354 vaddr_t cp_start; 4355 vsize_t cp_len, cp_off; 4356 int error; 4357 4358 TAILQ_INIT(&dead); 4359 end = start + len; 4360 4361 /* 4362 * Sanity check on the parameters. 4363 * Also, since the mapping may not contain gaps, error out if the 4364 * mapped area is not in source map. 4365 */ 4366 if ((start & (vaddr_t)PAGE_MASK) != 0 || 4367 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 4368 return EINVAL; 4369 if (start < srcmap->min_offset || end > srcmap->max_offset) 4370 return EINVAL; 4371 4372 /* Initialize dead entries. Handle len == 0 case. */ 4373 if (len == 0) 4374 return 0; 4375 4376 /* Acquire lock on srcmap. */ 4377 vm_map_lock(srcmap); 4378 4379 /* Lock srcmap, lookup first and last entry in <start,len>. */ 4380 first = uvm_map_entrybyaddr(&srcmap->addr, start); 4381 4382 /* Check that the range is contiguous. */ 4383 for (entry = first; entry != NULL && entry->end < end; 4384 entry = RBT_NEXT(uvm_map_addr, entry)) { 4385 if (VMMAP_FREE_END(entry) != entry->end || 4386 UVM_ET_ISHOLE(entry)) { 4387 error = EINVAL; 4388 goto fail; 4389 } 4390 } 4391 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 4392 error = EINVAL; 4393 goto fail; 4394 } 4395 4396 /* 4397 * Handle need-copy flag. 4398 */ 4399 for (entry = first; entry != NULL && entry->start < end; 4400 entry = RBT_NEXT(uvm_map_addr, entry)) { 4401 if (UVM_ET_ISNEEDSCOPY(entry)) 4402 amap_copy(srcmap, entry, M_NOWAIT, 4403 UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end); 4404 if (UVM_ET_ISNEEDSCOPY(entry)) { 4405 /* 4406 * amap_copy failure 4407 */ 4408 error = ENOMEM; 4409 goto fail; 4410 } 4411 } 4412 4413 /* Lock destination map (kernel_map). */ 4414 vm_map_lock(kernel_map); 4415 4416 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 4417 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 4418 PROT_NONE, 0) != 0) { 4419 error = ENOMEM; 4420 goto fail2; 4421 } 4422 *dstaddrp = dstaddr; 4423 4424 /* 4425 * We now have srcmap and kernel_map locked. 4426 * dstaddr contains the destination offset in dstmap. 4427 */ 4428 /* step 1: start looping through map entries, performing extraction. */ 4429 for (entry = first; entry != NULL && entry->start < end; 4430 entry = RBT_NEXT(uvm_map_addr, entry)) { 4431 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 4432 if (UVM_ET_ISHOLE(entry)) 4433 continue; 4434 4435 /* Calculate uvm_mapent_clone parameters. */ 4436 cp_start = entry->start; 4437 if (cp_start < start) { 4438 cp_off = start - cp_start; 4439 cp_start = start; 4440 } else 4441 cp_off = 0; 4442 cp_len = MIN(entry->end, end) - cp_start; 4443 4444 newentry = uvm_mapent_clone(kernel_map, 4445 cp_start - start + dstaddr, cp_len, cp_off, 4446 entry->protection, entry->max_protection, 4447 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4448 if (newentry == NULL) { 4449 error = ENOMEM; 4450 goto fail2_unmap; 4451 } 4452 kernel_map->size += cp_len; 4453 if (flags & UVM_EXTRACT_FIXPROT) 4454 newentry->protection = newentry->max_protection; 4455 4456 /* 4457 * Step 2: perform pmap copy. 4458 * (Doing this in the loop saves one RB traversal.) 4459 */ 4460 pmap_copy(kernel_map->pmap, srcmap->pmap, 4461 cp_start - start + dstaddr, cp_len, cp_start); 4462 } 4463 pmap_update(kernel_map->pmap); 4464 4465 error = 0; 4466 4467 /* Unmap copied entries on failure. */ 4468 fail2_unmap: 4469 if (error) { 4470 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4471 FALSE, TRUE); 4472 } 4473 4474 /* Release maps, release dead entries. */ 4475 fail2: 4476 vm_map_unlock(kernel_map); 4477 4478 fail: 4479 vm_map_unlock(srcmap); 4480 4481 uvm_unmap_detach(&dead, 0); 4482 4483 return error; 4484 } 4485 4486 /* 4487 * uvm_map_clean: clean out a map range 4488 * 4489 * => valid flags: 4490 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4491 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4492 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4493 * if (flags & PGO_FREE): any cached pages are freed after clean 4494 * => returns an error if any part of the specified range isn't mapped 4495 * => never a need to flush amap layer since the anonymous memory has 4496 * no permanent home, but may deactivate pages there 4497 * => called from sys_msync() and sys_madvise() 4498 * => caller must not write-lock map (read OK). 4499 * => we may sleep while cleaning if SYNCIO [with map read-locked] 4500 */ 4501 4502 int 4503 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4504 { 4505 struct vm_map_entry *first, *entry; 4506 struct vm_amap *amap; 4507 struct vm_anon *anon; 4508 struct vm_page *pg; 4509 struct uvm_object *uobj; 4510 vaddr_t cp_start, cp_end; 4511 int refs; 4512 int error; 4513 boolean_t rv; 4514 4515 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4516 (PGO_FREE|PGO_DEACTIVATE)); 4517 4518 if (start > end || start < map->min_offset || end > map->max_offset) 4519 return EINVAL; 4520 4521 vm_map_lock_read(map); 4522 first = uvm_map_entrybyaddr(&map->addr, start); 4523 4524 /* Make a first pass to check for holes. */ 4525 for (entry = first; entry != NULL && entry->start < end; 4526 entry = RBT_NEXT(uvm_map_addr, entry)) { 4527 if (UVM_ET_ISSUBMAP(entry)) { 4528 vm_map_unlock_read(map); 4529 return EINVAL; 4530 } 4531 if (UVM_ET_ISSUBMAP(entry) || 4532 UVM_ET_ISHOLE(entry) || 4533 (entry->end < end && 4534 VMMAP_FREE_END(entry) != entry->end)) { 4535 vm_map_unlock_read(map); 4536 return EFAULT; 4537 } 4538 } 4539 4540 error = 0; 4541 for (entry = first; entry != NULL && entry->start < end; 4542 entry = RBT_NEXT(uvm_map_addr, entry)) { 4543 amap = entry->aref.ar_amap; /* top layer */ 4544 if (UVM_ET_ISOBJ(entry)) 4545 uobj = entry->object.uvm_obj; 4546 else 4547 uobj = NULL; 4548 4549 /* 4550 * No amap cleaning necessary if: 4551 * - there's no amap 4552 * - we're not deactivating or freeing pages. 4553 */ 4554 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4555 goto flush_object; 4556 4557 cp_start = MAX(entry->start, start); 4558 cp_end = MIN(entry->end, end); 4559 4560 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4561 anon = amap_lookup(&entry->aref, 4562 cp_start - entry->start); 4563 if (anon == NULL) 4564 continue; 4565 4566 pg = anon->an_page; 4567 if (pg == NULL) { 4568 continue; 4569 } 4570 KASSERT(pg->pg_flags & PQ_ANON); 4571 4572 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4573 /* 4574 * XXX In these first 3 cases, we always just 4575 * XXX deactivate the page. We may want to 4576 * XXX handle the different cases more 4577 * XXX specifically, in the future. 4578 */ 4579 case PGO_CLEANIT|PGO_FREE: 4580 case PGO_CLEANIT|PGO_DEACTIVATE: 4581 case PGO_DEACTIVATE: 4582 deactivate_it: 4583 /* skip the page if it's wired */ 4584 if (pg->wire_count != 0) 4585 break; 4586 4587 uvm_lock_pageq(); 4588 4589 KASSERT(pg->uanon == anon); 4590 4591 /* zap all mappings for the page. */ 4592 pmap_page_protect(pg, PROT_NONE); 4593 4594 /* ...and deactivate the page. */ 4595 uvm_pagedeactivate(pg); 4596 4597 uvm_unlock_pageq(); 4598 break; 4599 case PGO_FREE: 4600 /* 4601 * If there are multiple references to 4602 * the amap, just deactivate the page. 4603 */ 4604 if (amap_refs(amap) > 1) 4605 goto deactivate_it; 4606 4607 /* XXX skip the page if it's wired */ 4608 if (pg->wire_count != 0) { 4609 break; 4610 } 4611 amap_unadd(&entry->aref, 4612 cp_start - entry->start); 4613 refs = --anon->an_ref; 4614 if (refs == 0) 4615 uvm_anfree(anon); 4616 break; 4617 default: 4618 panic("uvm_map_clean: weird flags"); 4619 } 4620 } 4621 4622 flush_object: 4623 cp_start = MAX(entry->start, start); 4624 cp_end = MIN(entry->end, end); 4625 4626 /* 4627 * flush pages if we've got a valid backing object. 4628 * 4629 * Don't PGO_FREE if we don't have write permission 4630 * and don't flush if this is a copy-on-write object 4631 * since we can't know our permissions on it. 4632 */ 4633 if (uobj != NULL && 4634 ((flags & PGO_FREE) == 0 || 4635 ((entry->max_protection & PROT_WRITE) != 0 && 4636 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4637 rv = uobj->pgops->pgo_flush(uobj, 4638 cp_start - entry->start + entry->offset, 4639 cp_end - entry->start + entry->offset, flags); 4640 4641 if (rv == FALSE) 4642 error = EFAULT; 4643 } 4644 } 4645 4646 vm_map_unlock_read(map); 4647 return error; 4648 } 4649 4650 /* 4651 * UVM_MAP_CLIP_END implementation 4652 */ 4653 void 4654 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4655 { 4656 struct vm_map_entry *tmp; 4657 4658 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4659 tmp = uvm_mapent_alloc(map, 0); 4660 4661 /* Invoke splitentry. */ 4662 uvm_map_splitentry(map, entry, tmp, addr); 4663 } 4664 4665 /* 4666 * UVM_MAP_CLIP_START implementation 4667 * 4668 * Clippers are required to not change the pointers to the entry they are 4669 * clipping on. 4670 * Since uvm_map_splitentry turns the original entry into the lowest 4671 * entry (address wise) we do a swap between the new entry and the original 4672 * entry, prior to calling uvm_map_splitentry. 4673 */ 4674 void 4675 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4676 { 4677 struct vm_map_entry *tmp; 4678 struct uvm_addr_state *free; 4679 4680 /* Unlink original. */ 4681 free = uvm_map_uaddr_e(map, entry); 4682 uvm_mapent_free_remove(map, free, entry); 4683 uvm_mapent_addr_remove(map, entry); 4684 4685 /* Copy entry. */ 4686 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4687 tmp = uvm_mapent_alloc(map, 0); 4688 uvm_mapent_copy(entry, tmp); 4689 4690 /* Put new entry in place of original entry. */ 4691 uvm_mapent_addr_insert(map, tmp); 4692 uvm_mapent_free_insert(map, free, tmp); 4693 4694 /* Invoke splitentry. */ 4695 uvm_map_splitentry(map, tmp, entry, addr); 4696 } 4697 4698 /* 4699 * Boundary fixer. 4700 */ 4701 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4702 static __inline vaddr_t 4703 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4704 { 4705 return (min < bound && max > bound) ? bound : max; 4706 } 4707 4708 /* 4709 * Choose free list based on address at start of free space. 4710 * 4711 * The uvm_addr_state returned contains addr and is the first of: 4712 * - uaddr_exe 4713 * - uaddr_brk_stack 4714 * - uaddr_any 4715 */ 4716 struct uvm_addr_state* 4717 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4718 { 4719 struct uvm_addr_state *uaddr; 4720 int i; 4721 4722 /* Special case the first page, to prevent mmap from returning 0. */ 4723 if (addr < VMMAP_MIN_ADDR) 4724 return NULL; 4725 4726 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4727 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4728 if (addr >= uvm_maxkaddr) 4729 return NULL; 4730 } 4731 4732 /* Is the address inside the exe-only map? */ 4733 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4734 addr < map->uaddr_exe->uaddr_maxaddr) 4735 return map->uaddr_exe; 4736 4737 /* Check if the space falls inside brk/stack area. */ 4738 if ((addr >= map->b_start && addr < map->b_end) || 4739 (addr >= map->s_start && addr < map->s_end)) { 4740 if (map->uaddr_brk_stack != NULL && 4741 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4742 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4743 return map->uaddr_brk_stack; 4744 } else 4745 return NULL; 4746 } 4747 4748 /* 4749 * Check the other selectors. 4750 * 4751 * These selectors are only marked as the owner, if they have insert 4752 * functions. 4753 */ 4754 for (i = 0; i < nitems(map->uaddr_any); i++) { 4755 uaddr = map->uaddr_any[i]; 4756 if (uaddr == NULL) 4757 continue; 4758 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4759 continue; 4760 4761 if (addr >= uaddr->uaddr_minaddr && 4762 addr < uaddr->uaddr_maxaddr) 4763 return uaddr; 4764 } 4765 4766 return NULL; 4767 } 4768 4769 /* 4770 * Choose free list based on address at start of free space. 4771 * 4772 * The uvm_addr_state returned contains addr and is the first of: 4773 * - uaddr_exe 4774 * - uaddr_brk_stack 4775 * - uaddr_any 4776 */ 4777 struct uvm_addr_state* 4778 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4779 { 4780 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4781 } 4782 4783 /* 4784 * Returns the first free-memory boundary that is crossed by [min-max]. 4785 */ 4786 vsize_t 4787 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4788 { 4789 struct uvm_addr_state *uaddr; 4790 int i; 4791 4792 /* Never return first page. */ 4793 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4794 4795 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4796 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4797 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4798 4799 /* Check for exe-only boundaries. */ 4800 if (map->uaddr_exe != NULL) { 4801 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4802 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4803 } 4804 4805 /* Check for exe-only boundaries. */ 4806 if (map->uaddr_brk_stack != NULL) { 4807 max = uvm_map_boundfix(min, max, 4808 map->uaddr_brk_stack->uaddr_minaddr); 4809 max = uvm_map_boundfix(min, max, 4810 map->uaddr_brk_stack->uaddr_maxaddr); 4811 } 4812 4813 /* Check other boundaries. */ 4814 for (i = 0; i < nitems(map->uaddr_any); i++) { 4815 uaddr = map->uaddr_any[i]; 4816 if (uaddr != NULL) { 4817 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4818 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4819 } 4820 } 4821 4822 /* Boundaries at stack and brk() area. */ 4823 max = uvm_map_boundfix(min, max, map->s_start); 4824 max = uvm_map_boundfix(min, max, map->s_end); 4825 max = uvm_map_boundfix(min, max, map->b_start); 4826 max = uvm_map_boundfix(min, max, map->b_end); 4827 4828 return max; 4829 } 4830 4831 /* 4832 * Update map allocation start and end addresses from proc vmspace. 4833 */ 4834 void 4835 uvm_map_vmspace_update(struct vm_map *map, 4836 struct uvm_map_deadq *dead, int flags) 4837 { 4838 struct vmspace *vm; 4839 vaddr_t b_start, b_end, s_start, s_end; 4840 4841 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4842 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4843 4844 /* 4845 * Derive actual allocation boundaries from vmspace. 4846 */ 4847 vm = (struct vmspace *)map; 4848 b_start = (vaddr_t)vm->vm_daddr; 4849 b_end = b_start + BRKSIZ; 4850 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4851 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4852 #ifdef DIAGNOSTIC 4853 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4854 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4855 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4856 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4857 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4858 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4859 vm, b_start, b_end, s_start, s_end); 4860 } 4861 #endif 4862 4863 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4864 map->s_start == s_start && map->s_end == s_end)) 4865 return; 4866 4867 uvm_map_freelist_update(map, dead, b_start, b_end, 4868 s_start, s_end, flags); 4869 } 4870 4871 /* 4872 * Grow kernel memory. 4873 * 4874 * This function is only called for kernel maps when an allocation fails. 4875 * 4876 * If the map has a gap that is large enough to accommodate alloc_sz, this 4877 * function will make sure map->free will include it. 4878 */ 4879 void 4880 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 4881 vsize_t alloc_sz, int flags) 4882 { 4883 vsize_t sz; 4884 vaddr_t end; 4885 struct vm_map_entry *entry; 4886 4887 /* Kernel memory only. */ 4888 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 4889 /* Destroy free list. */ 4890 uvm_map_freelist_update_clear(map, dead); 4891 4892 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 4893 if (map->flags & VM_MAP_GUARDPAGES) 4894 alloc_sz += PAGE_SIZE; 4895 4896 /* 4897 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 4898 * 4899 * Don't handle the case where the multiplication overflows: 4900 * if that happens, the allocation is probably too big anyway. 4901 */ 4902 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 4903 4904 /* 4905 * Walk forward until a gap large enough for alloc_sz shows up. 4906 * 4907 * We assume the kernel map has no boundaries. 4908 * uvm_maxkaddr may be zero. 4909 */ 4910 end = MAX(uvm_maxkaddr, map->min_offset); 4911 entry = uvm_map_entrybyaddr(&map->addr, end); 4912 while (entry && entry->fspace < alloc_sz) 4913 entry = RBT_NEXT(uvm_map_addr, entry); 4914 if (entry) { 4915 end = MAX(VMMAP_FREE_START(entry), end); 4916 end += MIN(sz, map->max_offset - end); 4917 } else 4918 end = map->max_offset; 4919 4920 /* Reserve pmap entries. */ 4921 #ifdef PMAP_GROWKERNEL 4922 uvm_maxkaddr = pmap_growkernel(end); 4923 #else 4924 uvm_maxkaddr = MAX(uvm_maxkaddr, end); 4925 #endif 4926 4927 /* Rebuild free list. */ 4928 uvm_map_freelist_update_refill(map, flags); 4929 } 4930 4931 /* 4932 * Freelist update subfunction: unlink all entries from freelists. 4933 */ 4934 void 4935 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 4936 { 4937 struct uvm_addr_state *free; 4938 struct vm_map_entry *entry, *prev, *next; 4939 4940 prev = NULL; 4941 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL; 4942 entry = next) { 4943 next = RBT_NEXT(uvm_map_addr, entry); 4944 4945 free = uvm_map_uaddr_e(map, entry); 4946 uvm_mapent_free_remove(map, free, entry); 4947 4948 if (prev != NULL && entry->start == entry->end) { 4949 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 4950 uvm_mapent_addr_remove(map, entry); 4951 DEAD_ENTRY_PUSH(dead, entry); 4952 } else 4953 prev = entry; 4954 } 4955 } 4956 4957 /* 4958 * Freelist update subfunction: refill the freelists with entries. 4959 */ 4960 void 4961 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 4962 { 4963 struct vm_map_entry *entry; 4964 vaddr_t min, max; 4965 4966 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 4967 min = VMMAP_FREE_START(entry); 4968 max = VMMAP_FREE_END(entry); 4969 entry->fspace = 0; 4970 4971 entry = uvm_map_fix_space(map, entry, min, max, flags); 4972 } 4973 4974 uvm_tree_sanity(map, __FILE__, __LINE__); 4975 } 4976 4977 /* 4978 * Change {a,b}_{start,end} allocation ranges and associated free lists. 4979 */ 4980 void 4981 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 4982 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 4983 { 4984 KDASSERT(b_end >= b_start && s_end >= s_start); 4985 4986 /* Clear all free lists. */ 4987 uvm_map_freelist_update_clear(map, dead); 4988 4989 /* Apply new bounds. */ 4990 map->b_start = b_start; 4991 map->b_end = b_end; 4992 map->s_start = s_start; 4993 map->s_end = s_end; 4994 4995 /* Refill free lists. */ 4996 uvm_map_freelist_update_refill(map, flags); 4997 } 4998 4999 /* 5000 * Assign a uvm_addr_state to the specified pointer in vm_map. 5001 * 5002 * May sleep. 5003 */ 5004 void 5005 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 5006 struct uvm_addr_state *newval) 5007 { 5008 struct uvm_map_deadq dead; 5009 5010 /* Pointer which must be in this map. */ 5011 KASSERT(which != NULL); 5012 KASSERT((void*)map <= (void*)(which) && 5013 (void*)(which) < (void*)(map + 1)); 5014 5015 vm_map_lock(map); 5016 TAILQ_INIT(&dead); 5017 uvm_map_freelist_update_clear(map, &dead); 5018 5019 uvm_addr_destroy(*which); 5020 *which = newval; 5021 5022 uvm_map_freelist_update_refill(map, 0); 5023 vm_map_unlock(map); 5024 uvm_unmap_detach(&dead, 0); 5025 } 5026 5027 /* 5028 * Correct space insert. 5029 * 5030 * Entry must not be on any freelist. 5031 */ 5032 struct vm_map_entry* 5033 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 5034 vaddr_t min, vaddr_t max, int flags) 5035 { 5036 struct uvm_addr_state *free, *entfree; 5037 vaddr_t lmax; 5038 5039 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 5040 KDASSERT(min <= max); 5041 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 5042 min == map->min_offset); 5043 5044 /* 5045 * During the function, entfree will always point at the uaddr state 5046 * for entry. 5047 */ 5048 entfree = (entry == NULL ? NULL : 5049 uvm_map_uaddr_e(map, entry)); 5050 5051 while (min != max) { 5052 /* Claim guard page for entry. */ 5053 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 5054 VMMAP_FREE_END(entry) == entry->end && 5055 entry->start != entry->end) { 5056 if (max - min == 2 * PAGE_SIZE) { 5057 /* 5058 * If the free-space gap is exactly 2 pages, 5059 * we make the guard 2 pages instead of 1. 5060 * Because in a guarded map, an area needs 5061 * at least 2 pages to allocate from: 5062 * one page for the allocation and one for 5063 * the guard. 5064 */ 5065 entry->guard = 2 * PAGE_SIZE; 5066 min = max; 5067 } else { 5068 entry->guard = PAGE_SIZE; 5069 min += PAGE_SIZE; 5070 } 5071 continue; 5072 } 5073 5074 /* 5075 * Handle the case where entry has a 2-page guard, but the 5076 * space after entry is freed. 5077 */ 5078 if (entry != NULL && entry->fspace == 0 && 5079 entry->guard > PAGE_SIZE) { 5080 entry->guard = PAGE_SIZE; 5081 min = VMMAP_FREE_START(entry); 5082 } 5083 5084 lmax = uvm_map_boundary(map, min, max); 5085 free = uvm_map_uaddr(map, min); 5086 5087 /* 5088 * Entries are merged if they point at the same uvm_free(). 5089 * Exception to that rule: if min == uvm_maxkaddr, a new 5090 * entry is started regardless (otherwise the allocators 5091 * will get confused). 5092 */ 5093 if (entry != NULL && free == entfree && 5094 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 5095 min == uvm_maxkaddr)) { 5096 KDASSERT(VMMAP_FREE_END(entry) == min); 5097 entry->fspace += lmax - min; 5098 } else { 5099 /* 5100 * Commit entry to free list: it'll not be added to 5101 * anymore. 5102 * We'll start a new entry and add to that entry 5103 * instead. 5104 */ 5105 if (entry != NULL) 5106 uvm_mapent_free_insert(map, entfree, entry); 5107 5108 /* New entry for new uaddr. */ 5109 entry = uvm_mapent_alloc(map, flags); 5110 KDASSERT(entry != NULL); 5111 entry->end = entry->start = min; 5112 entry->guard = 0; 5113 entry->fspace = lmax - min; 5114 entry->object.uvm_obj = NULL; 5115 entry->offset = 0; 5116 entry->etype = 0; 5117 entry->protection = entry->max_protection = 0; 5118 entry->inheritance = 0; 5119 entry->wired_count = 0; 5120 entry->advice = 0; 5121 entry->aref.ar_pageoff = 0; 5122 entry->aref.ar_amap = NULL; 5123 uvm_mapent_addr_insert(map, entry); 5124 5125 entfree = free; 5126 } 5127 5128 min = lmax; 5129 } 5130 /* Finally put entry on the uaddr state. */ 5131 if (entry != NULL) 5132 uvm_mapent_free_insert(map, entfree, entry); 5133 5134 return entry; 5135 } 5136 5137 /* 5138 * MQuery style of allocation. 5139 * 5140 * This allocator searches forward until sufficient space is found to map 5141 * the given size. 5142 * 5143 * XXX: factor in offset (via pmap_prefer) and protection? 5144 */ 5145 int 5146 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 5147 int flags) 5148 { 5149 struct vm_map_entry *entry, *last; 5150 vaddr_t addr; 5151 vaddr_t tmp, pmap_align, pmap_offset; 5152 int error; 5153 5154 addr = *addr_p; 5155 vm_map_lock_read(map); 5156 5157 /* Configure pmap prefer. */ 5158 if (offset != UVM_UNKNOWN_OFFSET) { 5159 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 5160 pmap_offset = PMAP_PREFER_OFFSET(offset); 5161 } else { 5162 pmap_align = PAGE_SIZE; 5163 pmap_offset = 0; 5164 } 5165 5166 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 5167 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 5168 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5169 if (tmp < addr) 5170 tmp += pmap_align; 5171 addr = tmp; 5172 } 5173 5174 /* First, check if the requested range is fully available. */ 5175 entry = uvm_map_entrybyaddr(&map->addr, addr); 5176 last = NULL; 5177 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5178 error = 0; 5179 goto out; 5180 } 5181 if (flags & UVM_FLAG_FIXED) { 5182 error = EINVAL; 5183 goto out; 5184 } 5185 5186 error = ENOMEM; /* Default error from here. */ 5187 5188 /* 5189 * At this point, the memory at <addr, sz> is not available. 5190 * The reasons are: 5191 * [1] it's outside the map, 5192 * [2] it starts in used memory (and therefore needs to move 5193 * toward the first free page in entry), 5194 * [3] it starts in free memory but bumps into used memory. 5195 * 5196 * Note that for case [2], the forward moving is handled by the 5197 * for loop below. 5198 */ 5199 if (entry == NULL) { 5200 /* [1] Outside the map. */ 5201 if (addr >= map->max_offset) 5202 goto out; 5203 else 5204 entry = RBT_MIN(uvm_map_addr, &map->addr); 5205 } else if (VMMAP_FREE_START(entry) <= addr) { 5206 /* [3] Bumped into used memory. */ 5207 entry = RBT_NEXT(uvm_map_addr, entry); 5208 } 5209 5210 /* Test if the next entry is sufficient for the allocation. */ 5211 for (; entry != NULL; 5212 entry = RBT_NEXT(uvm_map_addr, entry)) { 5213 if (entry->fspace == 0) 5214 continue; 5215 addr = VMMAP_FREE_START(entry); 5216 5217 restart: /* Restart address checks on address change. */ 5218 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5219 if (tmp < addr) 5220 tmp += pmap_align; 5221 addr = tmp; 5222 if (addr >= VMMAP_FREE_END(entry)) 5223 continue; 5224 5225 /* Skip brk() allocation addresses. */ 5226 if (addr + sz > map->b_start && addr < map->b_end) { 5227 if (VMMAP_FREE_END(entry) > map->b_end) { 5228 addr = map->b_end; 5229 goto restart; 5230 } else 5231 continue; 5232 } 5233 /* Skip stack allocation addresses. */ 5234 if (addr + sz > map->s_start && addr < map->s_end) { 5235 if (VMMAP_FREE_END(entry) > map->s_end) { 5236 addr = map->s_end; 5237 goto restart; 5238 } else 5239 continue; 5240 } 5241 5242 last = NULL; 5243 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5244 error = 0; 5245 goto out; 5246 } 5247 } 5248 5249 out: 5250 vm_map_unlock_read(map); 5251 if (error == 0) 5252 *addr_p = addr; 5253 return error; 5254 } 5255 5256 /* 5257 * Determine allocation bias. 5258 * 5259 * Returns 1 if we should bias to high addresses, -1 for a bias towards low 5260 * addresses, or 0 for no bias. 5261 * The bias mechanism is intended to avoid clashing with brk() and stack 5262 * areas. 5263 */ 5264 int 5265 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry) 5266 { 5267 vaddr_t start, end; 5268 5269 start = VMMAP_FREE_START(entry); 5270 end = VMMAP_FREE_END(entry); 5271 5272 /* Stay at the top of brk() area. */ 5273 if (end >= map->b_start && start < map->b_end) 5274 return 1; 5275 /* Stay at the far end of the stack area. */ 5276 if (end >= map->s_start && start < map->s_end) { 5277 #ifdef MACHINE_STACK_GROWS_UP 5278 return 1; 5279 #else 5280 return -1; 5281 #endif 5282 } 5283 5284 /* No bias, this area is meant for us. */ 5285 return 0; 5286 } 5287 5288 5289 boolean_t 5290 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 5291 { 5292 boolean_t rv; 5293 5294 if (map->flags & VM_MAP_INTRSAFE) { 5295 rv = mtx_enter_try(&map->mtx); 5296 } else { 5297 mtx_enter(&map->flags_lock); 5298 if (map->flags & VM_MAP_BUSY) { 5299 mtx_leave(&map->flags_lock); 5300 return (FALSE); 5301 } 5302 mtx_leave(&map->flags_lock); 5303 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); 5304 /* check if the lock is busy and back out if we won the race */ 5305 if (rv) { 5306 mtx_enter(&map->flags_lock); 5307 if (map->flags & VM_MAP_BUSY) { 5308 rw_exit(&map->lock); 5309 rv = FALSE; 5310 } 5311 mtx_leave(&map->flags_lock); 5312 } 5313 } 5314 5315 if (rv) { 5316 map->timestamp++; 5317 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5318 uvm_tree_sanity(map, file, line); 5319 uvm_tree_size_chk(map, file, line); 5320 } 5321 5322 return (rv); 5323 } 5324 5325 void 5326 vm_map_lock_ln(struct vm_map *map, char *file, int line) 5327 { 5328 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5329 do { 5330 mtx_enter(&map->flags_lock); 5331 tryagain: 5332 while (map->flags & VM_MAP_BUSY) { 5333 map->flags |= VM_MAP_WANTLOCK; 5334 msleep(&map->flags, &map->flags_lock, 5335 PVM, vmmapbsy, 0); 5336 } 5337 mtx_leave(&map->flags_lock); 5338 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); 5339 /* check if the lock is busy and back out if we won the race */ 5340 mtx_enter(&map->flags_lock); 5341 if (map->flags & VM_MAP_BUSY) { 5342 rw_exit(&map->lock); 5343 goto tryagain; 5344 } 5345 mtx_leave(&map->flags_lock); 5346 } else { 5347 mtx_enter(&map->mtx); 5348 } 5349 5350 map->timestamp++; 5351 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5352 uvm_tree_sanity(map, file, line); 5353 uvm_tree_size_chk(map, file, line); 5354 } 5355 5356 void 5357 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 5358 { 5359 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5360 rw_enter_read(&map->lock); 5361 else 5362 mtx_enter(&map->mtx); 5363 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5364 uvm_tree_sanity(map, file, line); 5365 uvm_tree_size_chk(map, file, line); 5366 } 5367 5368 void 5369 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 5370 { 5371 uvm_tree_sanity(map, file, line); 5372 uvm_tree_size_chk(map, file, line); 5373 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5374 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5375 rw_exit(&map->lock); 5376 else 5377 mtx_leave(&map->mtx); 5378 } 5379 5380 void 5381 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 5382 { 5383 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5384 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5385 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5386 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5387 rw_exit_read(&map->lock); 5388 else 5389 mtx_leave(&map->mtx); 5390 } 5391 5392 void 5393 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 5394 { 5395 uvm_tree_sanity(map, file, line); 5396 uvm_tree_size_chk(map, file, line); 5397 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5398 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5399 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5400 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5401 rw_enter(&map->lock, RW_DOWNGRADE); 5402 } 5403 5404 void 5405 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 5406 { 5407 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5408 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5409 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5410 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5411 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5412 rw_exit_read(&map->lock); 5413 rw_enter_write(&map->lock); 5414 } 5415 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5416 uvm_tree_sanity(map, file, line); 5417 } 5418 5419 void 5420 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5421 { 5422 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5423 mtx_enter(&map->flags_lock); 5424 map->flags |= VM_MAP_BUSY; 5425 mtx_leave(&map->flags_lock); 5426 } 5427 5428 void 5429 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5430 { 5431 int oflags; 5432 5433 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5434 mtx_enter(&map->flags_lock); 5435 oflags = map->flags; 5436 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5437 mtx_leave(&map->flags_lock); 5438 if (oflags & VM_MAP_WANTLOCK) 5439 wakeup(&map->flags); 5440 } 5441 5442 #ifndef SMALL_KERNEL 5443 int 5444 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, 5445 size_t *lenp) 5446 { 5447 struct vm_map_entry *entry; 5448 vaddr_t start; 5449 int cnt, maxcnt, error = 0; 5450 5451 KASSERT(*lenp > 0); 5452 KASSERT((*lenp % sizeof(*kve)) == 0); 5453 cnt = 0; 5454 maxcnt = *lenp / sizeof(*kve); 5455 KASSERT(maxcnt > 0); 5456 5457 /* 5458 * Return only entries whose address is above the given base 5459 * address. This allows userland to iterate without knowing the 5460 * number of entries beforehand. 5461 */ 5462 start = (vaddr_t)kve[0].kve_start; 5463 5464 vm_map_lock(map); 5465 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5466 if (cnt == maxcnt) { 5467 error = ENOMEM; 5468 break; 5469 } 5470 if (start != 0 && entry->start < start) 5471 continue; 5472 kve->kve_start = entry->start; 5473 kve->kve_end = entry->end; 5474 kve->kve_guard = entry->guard; 5475 kve->kve_fspace = entry->fspace; 5476 kve->kve_fspace_augment = entry->fspace_augment; 5477 kve->kve_offset = entry->offset; 5478 kve->kve_wired_count = entry->wired_count; 5479 kve->kve_etype = entry->etype; 5480 kve->kve_protection = entry->protection; 5481 kve->kve_max_protection = entry->max_protection; 5482 kve->kve_advice = entry->advice; 5483 kve->kve_inheritance = entry->inheritance; 5484 kve->kve_flags = entry->flags; 5485 kve++; 5486 cnt++; 5487 } 5488 vm_map_unlock(map); 5489 5490 KASSERT(cnt <= maxcnt); 5491 5492 *lenp = sizeof(*kve) * cnt; 5493 return error; 5494 } 5495 #endif 5496 5497 5498 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5499 uvm_mapentry_addrcmp, uvm_map_addr_augment); 5500 5501 5502 /* 5503 * MD code: vmspace allocator setup. 5504 */ 5505 5506 #ifdef __i386__ 5507 void 5508 uvm_map_setup_md(struct vm_map *map) 5509 { 5510 vaddr_t min, max; 5511 5512 min = map->min_offset; 5513 max = map->max_offset; 5514 5515 /* 5516 * Ensure the selectors will not try to manage page 0; 5517 * it's too special. 5518 */ 5519 if (min < VMMAP_MIN_ADDR) 5520 min = VMMAP_MIN_ADDR; 5521 5522 #if 0 /* Cool stuff, not yet */ 5523 /* Executable code is special. */ 5524 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5525 /* Place normal allocations beyond executable mappings. */ 5526 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5527 #else /* Crappy stuff, for now */ 5528 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5529 #endif 5530 5531 #ifndef SMALL_KERNEL 5532 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5533 #endif /* !SMALL_KERNEL */ 5534 } 5535 #elif __LP64__ 5536 void 5537 uvm_map_setup_md(struct vm_map *map) 5538 { 5539 vaddr_t min, max; 5540 5541 min = map->min_offset; 5542 max = map->max_offset; 5543 5544 /* 5545 * Ensure the selectors will not try to manage page 0; 5546 * it's too special. 5547 */ 5548 if (min < VMMAP_MIN_ADDR) 5549 min = VMMAP_MIN_ADDR; 5550 5551 #if 0 /* Cool stuff, not yet */ 5552 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5553 #else /* Crappy stuff, for now */ 5554 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5555 #endif 5556 5557 #ifndef SMALL_KERNEL 5558 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5559 #endif /* !SMALL_KERNEL */ 5560 } 5561 #else /* non-i386, 32 bit */ 5562 void 5563 uvm_map_setup_md(struct vm_map *map) 5564 { 5565 vaddr_t min, max; 5566 5567 min = map->min_offset; 5568 max = map->max_offset; 5569 5570 /* 5571 * Ensure the selectors will not try to manage page 0; 5572 * it's too special. 5573 */ 5574 if (min < VMMAP_MIN_ADDR) 5575 min = VMMAP_MIN_ADDR; 5576 5577 #if 0 /* Cool stuff, not yet */ 5578 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5579 #else /* Crappy stuff, for now */ 5580 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5581 #endif 5582 5583 #ifndef SMALL_KERNEL 5584 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5585 #endif /* !SMALL_KERNEL */ 5586 } 5587 #endif 5588