1 /* $OpenBSD: uvm_map.c,v 1.243 2019/04/23 13:35:12 visa Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/mman.h> 90 #include <sys/proc.h> 91 #include <sys/malloc.h> 92 #include <sys/pool.h> 93 #include <sys/sysctl.h> 94 #include <sys/syslog.h> 95 96 #ifdef SYSVSHM 97 #include <sys/shm.h> 98 #endif 99 100 #include <uvm/uvm.h> 101 102 #ifdef DDB 103 #include <uvm/uvm_ddb.h> 104 #endif 105 106 #include <uvm/uvm_addr.h> 107 108 109 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 110 int uvm_mapent_isjoinable(struct vm_map*, 111 struct vm_map_entry*, struct vm_map_entry*); 112 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 113 struct vm_map_entry*, struct uvm_map_deadq*); 114 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 115 struct vm_map_entry*, struct uvm_map_deadq*); 116 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 117 struct vm_map_entry*, vaddr_t, vsize_t, int, 118 struct uvm_map_deadq*, struct vm_map_entry*); 119 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 120 void uvm_mapent_free(struct vm_map_entry*); 121 void uvm_unmap_kill_entry(struct vm_map*, 122 struct vm_map_entry*); 123 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); 124 void uvm_mapent_mkfree(struct vm_map*, 125 struct vm_map_entry*, struct vm_map_entry**, 126 struct uvm_map_deadq*, boolean_t); 127 void uvm_map_pageable_pgon(struct vm_map*, 128 struct vm_map_entry*, struct vm_map_entry*, 129 vaddr_t, vaddr_t); 130 int uvm_map_pageable_wire(struct vm_map*, 131 struct vm_map_entry*, struct vm_map_entry*, 132 vaddr_t, vaddr_t, int); 133 void uvm_map_setup_entries(struct vm_map*); 134 void uvm_map_setup_md(struct vm_map*); 135 void uvm_map_teardown(struct vm_map*); 136 void uvm_map_vmspace_update(struct vm_map*, 137 struct uvm_map_deadq*, int); 138 void uvm_map_kmem_grow(struct vm_map*, 139 struct uvm_map_deadq*, vsize_t, int); 140 void uvm_map_freelist_update_clear(struct vm_map*, 141 struct uvm_map_deadq*); 142 void uvm_map_freelist_update_refill(struct vm_map *, int); 143 void uvm_map_freelist_update(struct vm_map*, 144 struct uvm_map_deadq*, vaddr_t, vaddr_t, 145 vaddr_t, vaddr_t, int); 146 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 147 vaddr_t, vaddr_t, int); 148 int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, 149 struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, 150 int); 151 int uvm_map_findspace(struct vm_map*, 152 struct vm_map_entry**, struct vm_map_entry**, 153 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 154 vaddr_t); 155 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 156 void uvm_map_addr_augment(struct vm_map_entry*); 157 158 /* 159 * Tree management functions. 160 */ 161 162 static __inline void uvm_mapent_copy(struct vm_map_entry*, 163 struct vm_map_entry*); 164 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*, 165 const struct vm_map_entry*); 166 void uvm_mapent_free_insert(struct vm_map*, 167 struct uvm_addr_state*, struct vm_map_entry*); 168 void uvm_mapent_free_remove(struct vm_map*, 169 struct uvm_addr_state*, struct vm_map_entry*); 170 void uvm_mapent_addr_insert(struct vm_map*, 171 struct vm_map_entry*); 172 void uvm_mapent_addr_remove(struct vm_map*, 173 struct vm_map_entry*); 174 void uvm_map_splitentry(struct vm_map*, 175 struct vm_map_entry*, struct vm_map_entry*, 176 vaddr_t); 177 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 178 int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*); 179 180 /* 181 * uvm_vmspace_fork helper functions. 182 */ 183 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 184 vsize_t, vm_prot_t, vm_prot_t, 185 struct vm_map_entry*, struct uvm_map_deadq*, int, 186 int); 187 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t, 188 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*, 189 struct vm_map_entry*, struct uvm_map_deadq*); 190 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 191 struct vm_map*, struct vm_map_entry*, 192 struct uvm_map_deadq*); 193 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 194 struct vm_map*, struct vm_map_entry*, 195 struct uvm_map_deadq*); 196 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 197 struct vm_map*, struct vm_map_entry*, 198 struct uvm_map_deadq*); 199 200 /* 201 * Tree validation. 202 */ 203 #ifdef VMMAP_DEBUG 204 void uvm_tree_assert(struct vm_map*, int, char*, 205 char*, int); 206 #define UVM_ASSERT(map, cond, file, line) \ 207 uvm_tree_assert((map), (cond), #cond, (file), (line)) 208 void uvm_tree_sanity(struct vm_map*, char*, int); 209 void uvm_tree_size_chk(struct vm_map*, char*, int); 210 void vmspace_validate(struct vm_map*); 211 #else 212 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 213 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 214 #define vmspace_validate(_map) do {} while (0) 215 #endif 216 217 /* 218 * All architectures will have pmap_prefer. 219 */ 220 #ifndef PMAP_PREFER 221 #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE 222 #define PMAP_PREFER_OFFSET(off) 0 223 #define PMAP_PREFER(addr, off) (addr) 224 #endif 225 226 227 /* 228 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 229 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 230 * 231 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 232 * each time. 233 */ 234 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 235 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 236 #define VM_MAP_KSIZE_ALLOCMUL 4 237 /* 238 * When selecting a random free-space block, look at most FSPACE_DELTA blocks 239 * ahead. 240 */ 241 #define FSPACE_DELTA 8 242 /* 243 * Put allocations adjecent to previous allocations when the free-space tree 244 * is larger than FSPACE_COMPACT entries. 245 * 246 * Alignment and PMAP_PREFER may still cause the entry to not be fully 247 * adjecent. Note that this strategy reduces memory fragmentation (by leaving 248 * a large space before or after the allocation). 249 */ 250 #define FSPACE_COMPACT 128 251 /* 252 * Make the address selection skip at most this many bytes from the start of 253 * the free space in which the allocation takes place. 254 * 255 * The main idea behind a randomized address space is that an attacker cannot 256 * know where to target his attack. Therefore, the location of objects must be 257 * as random as possible. However, the goal is not to create the most sparse 258 * map that is possible. 259 * FSPACE_MAXOFF pushes the considered range in bytes down to less insane 260 * sizes, thereby reducing the sparseness. The biggest randomization comes 261 * from fragmentation, i.e. FSPACE_COMPACT. 262 */ 263 #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) 264 /* 265 * Allow for small gaps in the overflow areas. 266 * Gap size is in bytes and does not have to be a multiple of page-size. 267 */ 268 #define FSPACE_BIASGAP ((vaddr_t)32 * 1024) 269 270 /* auto-allocate address lower bound */ 271 #define VMMAP_MIN_ADDR PAGE_SIZE 272 273 274 #ifdef DEADBEEF0 275 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0) 276 #else 277 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0) 278 #endif 279 280 #ifdef DEBUG 281 int uvm_map_printlocks = 0; 282 283 #define LPRINTF(_args) \ 284 do { \ 285 if (uvm_map_printlocks) \ 286 printf _args; \ 287 } while (0) 288 #else 289 #define LPRINTF(_args) do {} while (0) 290 #endif 291 292 static struct mutex uvm_kmapent_mtx; 293 static struct timeval uvm_kmapent_last_warn_time; 294 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 295 296 const char vmmapbsy[] = "vmmapbsy"; 297 298 /* 299 * pool for vmspace structures. 300 */ 301 struct pool uvm_vmspace_pool; 302 303 /* 304 * pool for dynamically-allocated map entries. 305 */ 306 struct pool uvm_map_entry_pool; 307 struct pool uvm_map_entry_kmem_pool; 308 309 /* 310 * This global represents the end of the kernel virtual address 311 * space. If we want to exceed this, we must grow the kernel 312 * virtual address space dynamically. 313 * 314 * Note, this variable is locked by kernel_map's lock. 315 */ 316 vaddr_t uvm_maxkaddr; 317 318 /* 319 * Locking predicate. 320 */ 321 #define UVM_MAP_REQ_WRITE(_map) \ 322 do { \ 323 if ((_map)->ref_count > 0) { \ 324 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 325 rw_assert_wrlock(&(_map)->lock); \ 326 else \ 327 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \ 328 } \ 329 } while (0) 330 331 /* 332 * Tree describing entries by address. 333 * 334 * Addresses are unique. 335 * Entries with start == end may only exist if they are the first entry 336 * (sorted by address) within a free-memory tree. 337 */ 338 339 static inline int 340 uvm_mapentry_addrcmp(const struct vm_map_entry *e1, 341 const struct vm_map_entry *e2) 342 { 343 return e1->start < e2->start ? -1 : e1->start > e2->start; 344 } 345 346 /* 347 * Copy mapentry. 348 */ 349 static __inline void 350 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 351 { 352 caddr_t csrc, cdst; 353 size_t sz; 354 355 csrc = (caddr_t)src; 356 cdst = (caddr_t)dst; 357 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 358 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 359 360 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 361 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 362 memcpy(cdst, csrc, sz); 363 } 364 365 /* 366 * Handle free-list insertion. 367 */ 368 void 369 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 370 struct vm_map_entry *entry) 371 { 372 const struct uvm_addr_functions *fun; 373 #ifdef VMMAP_DEBUG 374 vaddr_t min, max, bound; 375 #endif 376 377 #ifdef VMMAP_DEBUG 378 /* 379 * Boundary check. 380 * Boundaries are folded if they go on the same free list. 381 */ 382 min = VMMAP_FREE_START(entry); 383 max = VMMAP_FREE_END(entry); 384 385 while (min < max) { 386 bound = uvm_map_boundary(map, min, max); 387 KASSERT(uvm_map_uaddr(map, min) == uaddr); 388 min = bound; 389 } 390 #endif 391 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 392 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 393 394 UVM_MAP_REQ_WRITE(map); 395 396 /* Actual insert: forward to uaddr pointer. */ 397 if (uaddr != NULL) { 398 fun = uaddr->uaddr_functions; 399 KDASSERT(fun != NULL); 400 if (fun->uaddr_free_insert != NULL) 401 (*fun->uaddr_free_insert)(map, uaddr, entry); 402 entry->etype |= UVM_ET_FREEMAPPED; 403 } 404 405 /* Update fspace augmentation. */ 406 uvm_map_addr_augment(entry); 407 } 408 409 /* 410 * Handle free-list removal. 411 */ 412 void 413 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 414 struct vm_map_entry *entry) 415 { 416 const struct uvm_addr_functions *fun; 417 418 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 419 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 420 UVM_MAP_REQ_WRITE(map); 421 422 if (uaddr != NULL) { 423 fun = uaddr->uaddr_functions; 424 if (fun->uaddr_free_remove != NULL) 425 (*fun->uaddr_free_remove)(map, uaddr, entry); 426 entry->etype &= ~UVM_ET_FREEMAPPED; 427 } 428 } 429 430 /* 431 * Handle address tree insertion. 432 */ 433 void 434 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 435 { 436 struct vm_map_entry *res; 437 438 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF)) 439 panic("uvm_mapent_addr_insert: entry still in addr list"); 440 KDASSERT(entry->start <= entry->end); 441 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 442 (entry->end & (vaddr_t)PAGE_MASK) == 0); 443 444 UVM_MAP_REQ_WRITE(map); 445 res = RBT_INSERT(uvm_map_addr, &map->addr, entry); 446 if (res != NULL) { 447 panic("uvm_mapent_addr_insert: map %p entry %p " 448 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 449 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 450 map, entry, 451 entry->start, entry->end, entry->guard, entry->fspace, 452 res, res->start, res->end, res->guard, res->fspace); 453 } 454 } 455 456 /* 457 * Handle address tree removal. 458 */ 459 void 460 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 461 { 462 struct vm_map_entry *res; 463 464 UVM_MAP_REQ_WRITE(map); 465 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry); 466 if (res != entry) 467 panic("uvm_mapent_addr_remove"); 468 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF); 469 } 470 471 /* 472 * uvm_map_reference: add reference to a map 473 * 474 * XXX check map reference counter lock 475 */ 476 #define uvm_map_reference(_map) \ 477 do { \ 478 map->ref_count++; \ 479 } while (0) 480 481 /* 482 * Calculate the dused delta. 483 */ 484 vsize_t 485 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 486 { 487 struct vmspace *vm; 488 vsize_t sz; 489 vaddr_t lmax; 490 vaddr_t stack_begin, stack_end; /* Position of stack. */ 491 492 KASSERT(map->flags & VM_MAP_ISVMSPACE); 493 vm = (struct vmspace *)map; 494 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 495 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 496 497 sz = 0; 498 while (min != max) { 499 lmax = max; 500 if (min < stack_begin && lmax > stack_begin) 501 lmax = stack_begin; 502 else if (min < stack_end && lmax > stack_end) 503 lmax = stack_end; 504 505 if (min >= stack_begin && min < stack_end) { 506 /* nothing */ 507 } else 508 sz += lmax - min; 509 min = lmax; 510 } 511 512 return sz >> PAGE_SHIFT; 513 } 514 515 /* 516 * Find the entry describing the given address. 517 */ 518 struct vm_map_entry* 519 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 520 { 521 struct vm_map_entry *iter; 522 523 iter = RBT_ROOT(uvm_map_addr, atree); 524 while (iter != NULL) { 525 if (iter->start > addr) 526 iter = RBT_LEFT(uvm_map_addr, iter); 527 else if (VMMAP_FREE_END(iter) <= addr) 528 iter = RBT_RIGHT(uvm_map_addr, iter); 529 else 530 return iter; 531 } 532 return NULL; 533 } 534 535 /* 536 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 537 * 538 * Push dead entries into a linked list. 539 * Since the linked list abuses the address tree for storage, the entry 540 * may not be linked in a map. 541 * 542 * *head must be initialized to NULL before the first call to this macro. 543 * uvm_unmap_detach(*head, 0) will remove dead entries. 544 */ 545 static __inline void 546 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 547 { 548 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 549 } 550 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 551 dead_entry_push((_headptr), (_entry)) 552 553 /* 554 * Helper function for uvm_map_findspace_tree. 555 * 556 * Given allocation constraints and pmap constraints, finds the 557 * lowest and highest address in a range that can be used for the 558 * allocation. 559 * 560 * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. 561 * 562 * 563 * Big chunk of math with a seasoning of dragons. 564 */ 565 int 566 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, 567 struct vm_map_entry *sel, vaddr_t align, 568 vaddr_t pmap_align, vaddr_t pmap_off, int bias) 569 { 570 vaddr_t sel_min, sel_max; 571 #ifdef PMAP_PREFER 572 vaddr_t pmap_min, pmap_max; 573 #endif /* PMAP_PREFER */ 574 #ifdef DIAGNOSTIC 575 int bad; 576 #endif /* DIAGNOSTIC */ 577 578 sel_min = VMMAP_FREE_START(sel); 579 sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); 580 581 #ifdef PMAP_PREFER 582 583 /* 584 * There are two special cases, in which we can satisfy the align 585 * requirement and the pmap_prefer requirement. 586 * - when pmap_off == 0, we always select the largest of the two 587 * - when pmap_off % align == 0 and pmap_align > align, we simply 588 * satisfy the pmap_align requirement and automatically 589 * satisfy the align requirement. 590 */ 591 if (align > PAGE_SIZE && 592 !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { 593 /* 594 * Simple case: only use align. 595 */ 596 sel_min = roundup(sel_min, align); 597 sel_max &= ~(align - 1); 598 599 if (sel_min > sel_max) 600 return ENOMEM; 601 602 /* Correct for bias. */ 603 if (sel_max - sel_min > FSPACE_BIASGAP) { 604 if (bias > 0) { 605 sel_min = sel_max - FSPACE_BIASGAP; 606 sel_min = roundup(sel_min, align); 607 } else if (bias < 0) { 608 sel_max = sel_min + FSPACE_BIASGAP; 609 sel_max &= ~(align - 1); 610 } 611 } 612 } else if (pmap_align != 0) { 613 /* 614 * Special case: satisfy both pmap_prefer and 615 * align argument. 616 */ 617 pmap_max = sel_max & ~(pmap_align - 1); 618 pmap_min = sel_min; 619 if (pmap_max < sel_min) 620 return ENOMEM; 621 622 /* Adjust pmap_min for BIASGAP for top-addr bias. */ 623 if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) 624 pmap_min = pmap_max - FSPACE_BIASGAP; 625 /* Align pmap_min. */ 626 pmap_min &= ~(pmap_align - 1); 627 if (pmap_min < sel_min) 628 pmap_min += pmap_align; 629 if (pmap_min > pmap_max) 630 return ENOMEM; 631 632 /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ 633 if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { 634 pmap_max = (pmap_min + FSPACE_BIASGAP) & 635 ~(pmap_align - 1); 636 } 637 if (pmap_min > pmap_max) 638 return ENOMEM; 639 640 /* Apply pmap prefer offset. */ 641 pmap_max |= pmap_off; 642 if (pmap_max > sel_max) 643 pmap_max -= pmap_align; 644 pmap_min |= pmap_off; 645 if (pmap_min < sel_min) 646 pmap_min += pmap_align; 647 648 /* 649 * Fixup: it's possible that pmap_min and pmap_max 650 * cross eachother. In this case, try to find one 651 * address that is allowed. 652 * (This usually happens in biased case.) 653 */ 654 if (pmap_min > pmap_max) { 655 if (pmap_min < sel_max) 656 pmap_max = pmap_min; 657 else if (pmap_max > sel_min) 658 pmap_min = pmap_max; 659 else 660 return ENOMEM; 661 } 662 663 /* Internal validation. */ 664 KDASSERT(pmap_min <= pmap_max); 665 666 sel_min = pmap_min; 667 sel_max = pmap_max; 668 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 669 sel_min = sel_max - FSPACE_BIASGAP; 670 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 671 sel_max = sel_min + FSPACE_BIASGAP; 672 673 #else 674 675 if (align > PAGE_SIZE) { 676 sel_min = roundup(sel_min, align); 677 sel_max &= ~(align - 1); 678 if (sel_min > sel_max) 679 return ENOMEM; 680 681 if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { 682 if (bias > 0) { 683 sel_min = roundup(sel_max - FSPACE_BIASGAP, 684 align); 685 } else { 686 sel_max = (sel_min + FSPACE_BIASGAP) & 687 ~(align - 1); 688 } 689 } 690 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 691 sel_min = sel_max - FSPACE_BIASGAP; 692 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 693 sel_max = sel_min + FSPACE_BIASGAP; 694 695 #endif 696 697 if (sel_min > sel_max) 698 return ENOMEM; 699 700 #ifdef DIAGNOSTIC 701 bad = 0; 702 /* Lower boundary check. */ 703 if (sel_min < VMMAP_FREE_START(sel)) { 704 printf("sel_min: 0x%lx, but should be at least 0x%lx\n", 705 sel_min, VMMAP_FREE_START(sel)); 706 bad++; 707 } 708 /* Upper boundary check. */ 709 if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { 710 printf("sel_max: 0x%lx, but should be at most 0x%lx\n", 711 sel_max, 712 VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); 713 bad++; 714 } 715 /* Lower boundary alignment. */ 716 if (align != 0 && (sel_min & (align - 1)) != 0) { 717 printf("sel_min: 0x%lx, not aligned to 0x%lx\n", 718 sel_min, align); 719 bad++; 720 } 721 /* Upper boundary alignment. */ 722 if (align != 0 && (sel_max & (align - 1)) != 0) { 723 printf("sel_max: 0x%lx, not aligned to 0x%lx\n", 724 sel_max, align); 725 bad++; 726 } 727 /* Lower boundary PMAP_PREFER check. */ 728 if (pmap_align != 0 && align == 0 && 729 (sel_min & (pmap_align - 1)) != pmap_off) { 730 printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 731 sel_min, sel_min & (pmap_align - 1), pmap_off); 732 bad++; 733 } 734 /* Upper boundary PMAP_PREFER check. */ 735 if (pmap_align != 0 && align == 0 && 736 (sel_max & (pmap_align - 1)) != pmap_off) { 737 printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 738 sel_max, sel_max & (pmap_align - 1), pmap_off); 739 bad++; 740 } 741 742 if (bad) { 743 panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " 744 "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " 745 "bias = %d, " 746 "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", 747 sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, 748 bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel)); 749 } 750 #endif /* DIAGNOSTIC */ 751 752 *min = sel_min; 753 *max = sel_max; 754 return 0; 755 } 756 757 /* 758 * Test if memory starting at addr with sz bytes is free. 759 * 760 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 761 * the space. 762 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 763 */ 764 int 765 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 766 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 767 vaddr_t addr, vsize_t sz) 768 { 769 struct uvm_addr_state *free; 770 struct uvm_map_addr *atree; 771 struct vm_map_entry *i, *i_end; 772 773 if (addr + sz < addr) 774 return 0; 775 776 /* 777 * Kernel memory above uvm_maxkaddr is considered unavailable. 778 */ 779 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 780 if (addr + sz > uvm_maxkaddr) 781 return 0; 782 } 783 784 atree = &map->addr; 785 786 /* 787 * Fill in first, last, so they point at the entries containing the 788 * first and last address of the range. 789 * Note that if they are not NULL, we don't perform the lookup. 790 */ 791 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 792 if (*start_ptr == NULL) { 793 *start_ptr = uvm_map_entrybyaddr(atree, addr); 794 if (*start_ptr == NULL) 795 return 0; 796 } else 797 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 798 if (*end_ptr == NULL) { 799 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 800 *end_ptr = *start_ptr; 801 else { 802 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 803 if (*end_ptr == NULL) 804 return 0; 805 } 806 } else 807 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 808 809 /* Validation. */ 810 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 811 KDASSERT((*start_ptr)->start <= addr && 812 VMMAP_FREE_END(*start_ptr) > addr && 813 (*end_ptr)->start < addr + sz && 814 VMMAP_FREE_END(*end_ptr) >= addr + sz); 815 816 /* 817 * Check the none of the entries intersects with <addr, addr+sz>. 818 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 819 * considered unavailable unless called by those allocators. 820 */ 821 i = *start_ptr; 822 i_end = RBT_NEXT(uvm_map_addr, *end_ptr); 823 for (; i != i_end; 824 i = RBT_NEXT(uvm_map_addr, i)) { 825 if (i->start != i->end && i->end > addr) 826 return 0; 827 828 /* 829 * uaddr_exe and uaddr_brk_stack may only be used 830 * by these allocators and the NULL uaddr (i.e. no 831 * uaddr). 832 * Reject if this requirement is not met. 833 */ 834 if (uaddr != NULL) { 835 free = uvm_map_uaddr_e(map, i); 836 837 if (uaddr != free && free != NULL && 838 (free == map->uaddr_exe || 839 free == map->uaddr_brk_stack)) 840 return 0; 841 } 842 } 843 844 return -1; 845 } 846 847 /* 848 * Invoke each address selector until an address is found. 849 * Will not invoke uaddr_exe. 850 */ 851 int 852 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 853 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 854 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 855 { 856 struct uvm_addr_state *uaddr; 857 int i; 858 859 /* 860 * Allocation for sz bytes at any address, 861 * using the addr selectors in order. 862 */ 863 for (i = 0; i < nitems(map->uaddr_any); i++) { 864 uaddr = map->uaddr_any[i]; 865 866 if (uvm_addr_invoke(map, uaddr, first, last, 867 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 868 return 0; 869 } 870 871 /* Fall back to brk() and stack() address selectors. */ 872 uaddr = map->uaddr_brk_stack; 873 if (uvm_addr_invoke(map, uaddr, first, last, 874 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 875 return 0; 876 877 return ENOMEM; 878 } 879 880 /* Calculate entry augmentation value. */ 881 vsize_t 882 uvm_map_addr_augment_get(struct vm_map_entry *entry) 883 { 884 vsize_t augment; 885 struct vm_map_entry *left, *right; 886 887 augment = entry->fspace; 888 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL) 889 augment = MAX(augment, left->fspace_augment); 890 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 891 augment = MAX(augment, right->fspace_augment); 892 return augment; 893 } 894 895 /* 896 * Update augmentation data in entry. 897 */ 898 void 899 uvm_map_addr_augment(struct vm_map_entry *entry) 900 { 901 vsize_t augment; 902 903 while (entry != NULL) { 904 /* Calculate value for augmentation. */ 905 augment = uvm_map_addr_augment_get(entry); 906 907 /* 908 * Descend update. 909 * Once we find an entry that already has the correct value, 910 * stop, since it means all its parents will use the correct 911 * value too. 912 */ 913 if (entry->fspace_augment == augment) 914 return; 915 entry->fspace_augment = augment; 916 entry = RBT_PARENT(uvm_map_addr, entry); 917 } 918 } 919 920 /* 921 * uvm_mapanon: establish a valid mapping in map for an anon 922 * 923 * => *addr and sz must be a multiple of PAGE_SIZE. 924 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 925 * => map must be unlocked. 926 * 927 * => align: align vaddr, must be a power-of-2. 928 * Align is only a hint and will be ignored if the alignment fails. 929 */ 930 int 931 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, 932 vsize_t align, unsigned int flags) 933 { 934 struct vm_map_entry *first, *last, *entry, *new; 935 struct uvm_map_deadq dead; 936 vm_prot_t prot; 937 vm_prot_t maxprot; 938 vm_inherit_t inherit; 939 int advice; 940 int error; 941 vaddr_t pmap_align, pmap_offset; 942 vaddr_t hint; 943 944 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE); 945 KASSERT(map != kernel_map); 946 KASSERT((map->flags & UVM_FLAG_HOLE) == 0); 947 948 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 949 splassert(IPL_NONE); 950 951 /* 952 * We use pmap_align and pmap_offset as alignment and offset variables. 953 * 954 * Because the align parameter takes precedence over pmap prefer, 955 * the pmap_align will need to be set to align, with pmap_offset = 0, 956 * if pmap_prefer will not align. 957 */ 958 pmap_align = MAX(align, PAGE_SIZE); 959 pmap_offset = 0; 960 961 /* Decode parameters. */ 962 prot = UVM_PROTECTION(flags); 963 maxprot = UVM_MAXPROTECTION(flags); 964 advice = UVM_ADVICE(flags); 965 inherit = UVM_INHERIT(flags); 966 error = 0; 967 hint = trunc_page(*addr); 968 TAILQ_INIT(&dead); 969 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 970 KASSERT((align & (align - 1)) == 0); 971 972 /* Check protection. */ 973 if ((prot & maxprot) != prot) 974 return EACCES; 975 976 /* 977 * Before grabbing the lock, allocate a map entry for later 978 * use to ensure we don't wait for memory while holding the 979 * vm_map_lock. 980 */ 981 new = uvm_mapent_alloc(map, flags); 982 if (new == NULL) 983 return(ENOMEM); 984 985 if (flags & UVM_FLAG_TRYLOCK) { 986 if (vm_map_lock_try(map) == FALSE) { 987 error = EFAULT; 988 goto out; 989 } 990 } else 991 vm_map_lock(map); 992 993 first = last = NULL; 994 if (flags & UVM_FLAG_FIXED) { 995 /* 996 * Fixed location. 997 * 998 * Note: we ignore align, pmap_prefer. 999 * Fill in first, last and *addr. 1000 */ 1001 KASSERT((*addr & PAGE_MASK) == 0); 1002 1003 /* Check that the space is available. */ 1004 if (flags & UVM_FLAG_UNMAP) { 1005 if ((flags & UVM_FLAG_STACK) && 1006 !uvm_map_is_stack_remappable(map, *addr, sz)) { 1007 error = EINVAL; 1008 goto unlock; 1009 } 1010 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1011 } 1012 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1013 error = ENOMEM; 1014 goto unlock; 1015 } 1016 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1017 (align == 0 || (*addr & (align - 1)) == 0) && 1018 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1019 /* 1020 * Address used as hint. 1021 * 1022 * Note: we enforce the alignment restriction, 1023 * but ignore pmap_prefer. 1024 */ 1025 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1026 /* Run selection algorithm for executables. */ 1027 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1028 addr, sz, pmap_align, pmap_offset, prot, hint); 1029 1030 if (error != 0) 1031 goto unlock; 1032 } else { 1033 /* Update freelists from vmspace. */ 1034 uvm_map_vmspace_update(map, &dead, flags); 1035 1036 error = uvm_map_findspace(map, &first, &last, addr, sz, 1037 pmap_align, pmap_offset, prot, hint); 1038 1039 if (error != 0) 1040 goto unlock; 1041 } 1042 1043 /* Double-check if selected address doesn't cause overflow. */ 1044 if (*addr + sz < *addr) { 1045 error = ENOMEM; 1046 goto unlock; 1047 } 1048 1049 /* If we only want a query, return now. */ 1050 if (flags & UVM_FLAG_QUERY) { 1051 error = 0; 1052 goto unlock; 1053 } 1054 1055 /* 1056 * Create new entry. 1057 * first and last may be invalidated after this call. 1058 */ 1059 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1060 new); 1061 if (entry == NULL) { 1062 error = ENOMEM; 1063 goto unlock; 1064 } 1065 new = NULL; 1066 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1067 entry->object.uvm_obj = NULL; 1068 entry->offset = 0; 1069 entry->protection = prot; 1070 entry->max_protection = maxprot; 1071 entry->inheritance = inherit; 1072 entry->wired_count = 0; 1073 entry->advice = advice; 1074 if (flags & UVM_FLAG_STACK) { 1075 entry->etype |= UVM_ET_STACK; 1076 if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP)) 1077 map->serial++; 1078 } 1079 if (flags & UVM_FLAG_COPYONW) { 1080 entry->etype |= UVM_ET_COPYONWRITE; 1081 if ((flags & UVM_FLAG_OVERLAY) == 0) 1082 entry->etype |= UVM_ET_NEEDSCOPY; 1083 } 1084 if (flags & UVM_FLAG_CONCEAL) 1085 entry->etype |= UVM_ET_CONCEAL; 1086 if (flags & UVM_FLAG_OVERLAY) { 1087 KERNEL_LOCK(); 1088 entry->aref.ar_pageoff = 0; 1089 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1090 KERNEL_UNLOCK(); 1091 } 1092 1093 /* Update map and process statistics. */ 1094 map->size += sz; 1095 ((struct vmspace *)map)->vm_dused += uvmspace_dused(map, *addr, *addr + sz); 1096 1097 unlock: 1098 vm_map_unlock(map); 1099 1100 /* 1101 * Remove dead entries. 1102 * 1103 * Dead entries may be the result of merging. 1104 * uvm_map_mkentry may also create dead entries, when it attempts to 1105 * destroy free-space entries. 1106 */ 1107 uvm_unmap_detach(&dead, 0); 1108 out: 1109 if (new) 1110 uvm_mapent_free(new); 1111 return error; 1112 } 1113 1114 /* 1115 * uvm_map: establish a valid mapping in map 1116 * 1117 * => *addr and sz must be a multiple of PAGE_SIZE. 1118 * => map must be unlocked. 1119 * => <uobj,uoffset> value meanings (4 cases): 1120 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1121 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1122 * [3] <uobj,uoffset> == normal mapping 1123 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1124 * 1125 * case [4] is for kernel mappings where we don't know the offset until 1126 * we've found a virtual address. note that kernel object offsets are 1127 * always relative to vm_map_min(kernel_map). 1128 * 1129 * => align: align vaddr, must be a power-of-2. 1130 * Align is only a hint and will be ignored if the alignment fails. 1131 */ 1132 int 1133 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 1134 struct uvm_object *uobj, voff_t uoffset, 1135 vsize_t align, unsigned int flags) 1136 { 1137 struct vm_map_entry *first, *last, *entry, *new; 1138 struct uvm_map_deadq dead; 1139 vm_prot_t prot; 1140 vm_prot_t maxprot; 1141 vm_inherit_t inherit; 1142 int advice; 1143 int error; 1144 vaddr_t pmap_align, pmap_offset; 1145 vaddr_t hint; 1146 1147 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1148 splassert(IPL_NONE); 1149 else 1150 splassert(IPL_VM); 1151 1152 /* 1153 * We use pmap_align and pmap_offset as alignment and offset variables. 1154 * 1155 * Because the align parameter takes precedence over pmap prefer, 1156 * the pmap_align will need to be set to align, with pmap_offset = 0, 1157 * if pmap_prefer will not align. 1158 */ 1159 if (uoffset == UVM_UNKNOWN_OFFSET) { 1160 pmap_align = MAX(align, PAGE_SIZE); 1161 pmap_offset = 0; 1162 } else { 1163 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 1164 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 1165 1166 if (align == 0 || 1167 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 1168 /* pmap_offset satisfies align, no change. */ 1169 } else { 1170 /* Align takes precedence over pmap prefer. */ 1171 pmap_align = align; 1172 pmap_offset = 0; 1173 } 1174 } 1175 1176 /* Decode parameters. */ 1177 prot = UVM_PROTECTION(flags); 1178 maxprot = UVM_MAXPROTECTION(flags); 1179 advice = UVM_ADVICE(flags); 1180 inherit = UVM_INHERIT(flags); 1181 error = 0; 1182 hint = trunc_page(*addr); 1183 TAILQ_INIT(&dead); 1184 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1185 KASSERT((align & (align - 1)) == 0); 1186 1187 /* Holes are incompatible with other types of mappings. */ 1188 if (flags & UVM_FLAG_HOLE) { 1189 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 1190 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 1191 } 1192 1193 /* Unset hint for kernel_map non-fixed allocations. */ 1194 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1195 hint = 0; 1196 1197 /* Check protection. */ 1198 if ((prot & maxprot) != prot) 1199 return EACCES; 1200 1201 if (map == kernel_map && 1202 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1203 panic("uvm_map: kernel map W^X violation requested"); 1204 1205 /* 1206 * Before grabbing the lock, allocate a map entry for later 1207 * use to ensure we don't wait for memory while holding the 1208 * vm_map_lock. 1209 */ 1210 new = uvm_mapent_alloc(map, flags); 1211 if (new == NULL) 1212 return(ENOMEM); 1213 1214 if (flags & UVM_FLAG_TRYLOCK) { 1215 if (vm_map_lock_try(map) == FALSE) { 1216 error = EFAULT; 1217 goto out; 1218 } 1219 } else { 1220 vm_map_lock(map); 1221 } 1222 1223 first = last = NULL; 1224 if (flags & UVM_FLAG_FIXED) { 1225 /* 1226 * Fixed location. 1227 * 1228 * Note: we ignore align, pmap_prefer. 1229 * Fill in first, last and *addr. 1230 */ 1231 KASSERT((*addr & PAGE_MASK) == 0); 1232 1233 /* 1234 * Grow pmap to include allocated address. 1235 * If the growth fails, the allocation will fail too. 1236 */ 1237 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1238 uvm_maxkaddr < (*addr + sz)) { 1239 uvm_map_kmem_grow(map, &dead, 1240 *addr + sz - uvm_maxkaddr, flags); 1241 } 1242 1243 /* Check that the space is available. */ 1244 if (flags & UVM_FLAG_UNMAP) 1245 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1246 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1247 error = ENOMEM; 1248 goto unlock; 1249 } 1250 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1251 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1252 (align == 0 || (*addr & (align - 1)) == 0) && 1253 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1254 /* 1255 * Address used as hint. 1256 * 1257 * Note: we enforce the alignment restriction, 1258 * but ignore pmap_prefer. 1259 */ 1260 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1261 /* Run selection algorithm for executables. */ 1262 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1263 addr, sz, pmap_align, pmap_offset, prot, hint); 1264 1265 /* Grow kernel memory and try again. */ 1266 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1267 uvm_map_kmem_grow(map, &dead, sz, flags); 1268 1269 error = uvm_addr_invoke(map, map->uaddr_exe, 1270 &first, &last, addr, sz, 1271 pmap_align, pmap_offset, prot, hint); 1272 } 1273 1274 if (error != 0) 1275 goto unlock; 1276 } else { 1277 /* Update freelists from vmspace. */ 1278 if (map->flags & VM_MAP_ISVMSPACE) 1279 uvm_map_vmspace_update(map, &dead, flags); 1280 1281 error = uvm_map_findspace(map, &first, &last, addr, sz, 1282 pmap_align, pmap_offset, prot, hint); 1283 1284 /* Grow kernel memory and try again. */ 1285 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1286 uvm_map_kmem_grow(map, &dead, sz, flags); 1287 1288 error = uvm_map_findspace(map, &first, &last, addr, sz, 1289 pmap_align, pmap_offset, prot, hint); 1290 } 1291 1292 if (error != 0) 1293 goto unlock; 1294 } 1295 1296 /* Double-check if selected address doesn't cause overflow. */ 1297 if (*addr + sz < *addr) { 1298 error = ENOMEM; 1299 goto unlock; 1300 } 1301 1302 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1303 uvm_maxkaddr >= *addr + sz); 1304 1305 /* If we only want a query, return now. */ 1306 if (flags & UVM_FLAG_QUERY) { 1307 error = 0; 1308 goto unlock; 1309 } 1310 1311 if (uobj == NULL) 1312 uoffset = 0; 1313 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1314 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1315 uoffset = *addr - vm_map_min(kernel_map); 1316 } 1317 1318 /* 1319 * Create new entry. 1320 * first and last may be invalidated after this call. 1321 */ 1322 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1323 new); 1324 if (entry == NULL) { 1325 error = ENOMEM; 1326 goto unlock; 1327 } 1328 new = NULL; 1329 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1330 entry->object.uvm_obj = uobj; 1331 entry->offset = uoffset; 1332 entry->protection = prot; 1333 entry->max_protection = maxprot; 1334 entry->inheritance = inherit; 1335 entry->wired_count = 0; 1336 entry->advice = advice; 1337 if (flags & UVM_FLAG_STACK) { 1338 entry->etype |= UVM_ET_STACK; 1339 if (flags & UVM_FLAG_UNMAP) 1340 map->serial++; 1341 } 1342 if (uobj) 1343 entry->etype |= UVM_ET_OBJ; 1344 else if (flags & UVM_FLAG_HOLE) 1345 entry->etype |= UVM_ET_HOLE; 1346 if (flags & UVM_FLAG_NOFAULT) 1347 entry->etype |= UVM_ET_NOFAULT; 1348 if (flags & UVM_FLAG_WC) 1349 entry->etype |= UVM_ET_WC; 1350 if (flags & UVM_FLAG_COPYONW) { 1351 entry->etype |= UVM_ET_COPYONWRITE; 1352 if ((flags & UVM_FLAG_OVERLAY) == 0) 1353 entry->etype |= UVM_ET_NEEDSCOPY; 1354 } 1355 if (flags & UVM_FLAG_CONCEAL) 1356 entry->etype |= UVM_ET_CONCEAL; 1357 if (flags & UVM_FLAG_OVERLAY) { 1358 entry->aref.ar_pageoff = 0; 1359 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1360 } 1361 1362 /* Update map and process statistics. */ 1363 if (!(flags & UVM_FLAG_HOLE)) { 1364 map->size += sz; 1365 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) { 1366 ((struct vmspace *)map)->vm_dused += 1367 uvmspace_dused(map, *addr, *addr + sz); 1368 } 1369 } 1370 1371 /* 1372 * Try to merge entry. 1373 * 1374 * Userland allocations are kept separated most of the time. 1375 * Forego the effort of merging what most of the time can't be merged 1376 * and only try the merge if it concerns a kernel entry. 1377 */ 1378 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1379 (map->flags & VM_MAP_ISVMSPACE) == 0) 1380 uvm_mapent_tryjoin(map, entry, &dead); 1381 1382 unlock: 1383 vm_map_unlock(map); 1384 1385 /* 1386 * Remove dead entries. 1387 * 1388 * Dead entries may be the result of merging. 1389 * uvm_map_mkentry may also create dead entries, when it attempts to 1390 * destroy free-space entries. 1391 */ 1392 if (map->flags & VM_MAP_INTRSAFE) 1393 uvm_unmap_detach_intrsafe(&dead); 1394 else 1395 uvm_unmap_detach(&dead, 0); 1396 out: 1397 if (new) 1398 uvm_mapent_free(new); 1399 return error; 1400 } 1401 1402 /* 1403 * True iff e1 and e2 can be joined together. 1404 */ 1405 int 1406 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1407 struct vm_map_entry *e2) 1408 { 1409 KDASSERT(e1 != NULL && e2 != NULL); 1410 1411 /* Must be the same entry type and not have free memory between. */ 1412 if (e1->etype != e2->etype || e1->end != e2->start) 1413 return 0; 1414 1415 /* Submaps are never joined. */ 1416 if (UVM_ET_ISSUBMAP(e1)) 1417 return 0; 1418 1419 /* Never merge wired memory. */ 1420 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1421 return 0; 1422 1423 /* Protection, inheritance and advice must be equal. */ 1424 if (e1->protection != e2->protection || 1425 e1->max_protection != e2->max_protection || 1426 e1->inheritance != e2->inheritance || 1427 e1->advice != e2->advice) 1428 return 0; 1429 1430 /* If uvm_object: object itself and offsets within object must match. */ 1431 if (UVM_ET_ISOBJ(e1)) { 1432 if (e1->object.uvm_obj != e2->object.uvm_obj) 1433 return 0; 1434 if (e1->offset + (e1->end - e1->start) != e2->offset) 1435 return 0; 1436 } 1437 1438 /* 1439 * Cannot join shared amaps. 1440 * Note: no need to lock amap to look at refs, since we don't care 1441 * about its exact value. 1442 * If it is 1 (i.e. we have the only reference) it will stay there. 1443 */ 1444 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1445 return 0; 1446 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1447 return 0; 1448 1449 /* Apprently, e1 and e2 match. */ 1450 return 1; 1451 } 1452 1453 /* 1454 * Join support function. 1455 * 1456 * Returns the merged entry on succes. 1457 * Returns NULL if the merge failed. 1458 */ 1459 struct vm_map_entry* 1460 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1461 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1462 { 1463 struct uvm_addr_state *free; 1464 1465 /* 1466 * Merging is not supported for map entries that 1467 * contain an amap in e1. This should never happen 1468 * anyway, because only kernel entries are merged. 1469 * These do not contain amaps. 1470 * e2 contains no real information in its amap, 1471 * so it can be erased immediately. 1472 */ 1473 KASSERT(e1->aref.ar_amap == NULL); 1474 1475 /* 1476 * Don't drop obj reference: 1477 * uvm_unmap_detach will do this for us. 1478 */ 1479 free = uvm_map_uaddr_e(map, e1); 1480 uvm_mapent_free_remove(map, free, e1); 1481 1482 free = uvm_map_uaddr_e(map, e2); 1483 uvm_mapent_free_remove(map, free, e2); 1484 uvm_mapent_addr_remove(map, e2); 1485 e1->end = e2->end; 1486 e1->guard = e2->guard; 1487 e1->fspace = e2->fspace; 1488 uvm_mapent_free_insert(map, free, e1); 1489 1490 DEAD_ENTRY_PUSH(dead, e2); 1491 return e1; 1492 } 1493 1494 /* 1495 * Attempt forward and backward joining of entry. 1496 * 1497 * Returns entry after joins. 1498 * We are guaranteed that the amap of entry is either non-existent or 1499 * has never been used. 1500 */ 1501 struct vm_map_entry* 1502 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1503 struct uvm_map_deadq *dead) 1504 { 1505 struct vm_map_entry *other; 1506 struct vm_map_entry *merged; 1507 1508 /* Merge with previous entry. */ 1509 other = RBT_PREV(uvm_map_addr, entry); 1510 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1511 merged = uvm_mapent_merge(map, other, entry, dead); 1512 if (merged) 1513 entry = merged; 1514 } 1515 1516 /* 1517 * Merge with next entry. 1518 * 1519 * Because amap can only extend forward and the next entry 1520 * probably contains sensible info, only perform forward merging 1521 * in the absence of an amap. 1522 */ 1523 other = RBT_NEXT(uvm_map_addr, entry); 1524 if (other && entry->aref.ar_amap == NULL && 1525 other->aref.ar_amap == NULL && 1526 uvm_mapent_isjoinable(map, entry, other)) { 1527 merged = uvm_mapent_merge(map, entry, other, dead); 1528 if (merged) 1529 entry = merged; 1530 } 1531 1532 return entry; 1533 } 1534 1535 /* 1536 * Kill entries that are no longer in a map. 1537 */ 1538 void 1539 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1540 { 1541 struct vm_map_entry *entry; 1542 int waitok = flags & UVM_PLA_WAITOK; 1543 1544 if (TAILQ_EMPTY(deadq)) 1545 return; 1546 1547 KERNEL_LOCK(); 1548 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1549 if (waitok) 1550 uvm_pause(); 1551 /* Drop reference to amap, if we've got one. */ 1552 if (entry->aref.ar_amap) 1553 amap_unref(entry->aref.ar_amap, 1554 entry->aref.ar_pageoff, 1555 atop(entry->end - entry->start), 1556 flags & AMAP_REFALL); 1557 1558 /* Drop reference to our backing object, if we've got one. */ 1559 if (UVM_ET_ISSUBMAP(entry)) { 1560 /* ... unlikely to happen, but play it safe */ 1561 uvm_map_deallocate(entry->object.sub_map); 1562 } else if (UVM_ET_ISOBJ(entry) && 1563 entry->object.uvm_obj->pgops->pgo_detach) { 1564 entry->object.uvm_obj->pgops->pgo_detach( 1565 entry->object.uvm_obj); 1566 } 1567 1568 /* Step to next. */ 1569 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1570 uvm_mapent_free(entry); 1571 } 1572 KERNEL_UNLOCK(); 1573 } 1574 1575 void 1576 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq) 1577 { 1578 struct vm_map_entry *entry; 1579 1580 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1581 KASSERT(entry->aref.ar_amap == NULL); 1582 KASSERT(!UVM_ET_ISSUBMAP(entry)); 1583 KASSERT(!UVM_ET_ISOBJ(entry)); 1584 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1585 uvm_mapent_free(entry); 1586 } 1587 } 1588 1589 /* 1590 * Create and insert new entry. 1591 * 1592 * Returned entry contains new addresses and is inserted properly in the tree. 1593 * first and last are (probably) no longer valid. 1594 */ 1595 struct vm_map_entry* 1596 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1597 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1598 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1599 { 1600 struct vm_map_entry *entry, *prev; 1601 struct uvm_addr_state *free; 1602 vaddr_t min, max; /* free space boundaries for new entry */ 1603 1604 KDASSERT(map != NULL); 1605 KDASSERT(first != NULL); 1606 KDASSERT(last != NULL); 1607 KDASSERT(dead != NULL); 1608 KDASSERT(sz > 0); 1609 KDASSERT(addr + sz > addr); 1610 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1611 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1612 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1613 uvm_tree_sanity(map, __FILE__, __LINE__); 1614 1615 min = addr + sz; 1616 max = VMMAP_FREE_END(last); 1617 1618 /* Initialize new entry. */ 1619 if (new == NULL) 1620 entry = uvm_mapent_alloc(map, flags); 1621 else 1622 entry = new; 1623 if (entry == NULL) 1624 return NULL; 1625 entry->offset = 0; 1626 entry->etype = 0; 1627 entry->wired_count = 0; 1628 entry->aref.ar_pageoff = 0; 1629 entry->aref.ar_amap = NULL; 1630 1631 entry->start = addr; 1632 entry->end = min; 1633 entry->guard = 0; 1634 entry->fspace = 0; 1635 1636 /* Reset free space in first. */ 1637 free = uvm_map_uaddr_e(map, first); 1638 uvm_mapent_free_remove(map, free, first); 1639 first->guard = 0; 1640 first->fspace = 0; 1641 1642 /* 1643 * Remove all entries that are fully replaced. 1644 * We are iterating using last in reverse order. 1645 */ 1646 for (; first != last; last = prev) { 1647 prev = RBT_PREV(uvm_map_addr, last); 1648 1649 KDASSERT(last->start == last->end); 1650 free = uvm_map_uaddr_e(map, last); 1651 uvm_mapent_free_remove(map, free, last); 1652 uvm_mapent_addr_remove(map, last); 1653 DEAD_ENTRY_PUSH(dead, last); 1654 } 1655 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1656 if (first->start == addr) { 1657 uvm_mapent_addr_remove(map, first); 1658 DEAD_ENTRY_PUSH(dead, first); 1659 } else { 1660 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1661 addr, flags); 1662 } 1663 1664 /* Finally, link in entry. */ 1665 uvm_mapent_addr_insert(map, entry); 1666 uvm_map_fix_space(map, entry, min, max, flags); 1667 1668 uvm_tree_sanity(map, __FILE__, __LINE__); 1669 return entry; 1670 } 1671 1672 1673 /* 1674 * uvm_mapent_alloc: allocate a map entry 1675 */ 1676 struct vm_map_entry * 1677 uvm_mapent_alloc(struct vm_map *map, int flags) 1678 { 1679 struct vm_map_entry *me, *ne; 1680 int pool_flags; 1681 int i; 1682 1683 pool_flags = PR_WAITOK; 1684 if (flags & UVM_FLAG_TRYLOCK) 1685 pool_flags = PR_NOWAIT; 1686 1687 if (map->flags & VM_MAP_INTRSAFE || cold) { 1688 mtx_enter(&uvm_kmapent_mtx); 1689 if (SLIST_EMPTY(&uvm.kentry_free)) { 1690 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1691 &kd_nowait); 1692 if (ne == NULL) 1693 panic("uvm_mapent_alloc: cannot allocate map " 1694 "entry"); 1695 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) { 1696 SLIST_INSERT_HEAD(&uvm.kentry_free, 1697 &ne[i], daddrs.addr_kentry); 1698 } 1699 if (ratecheck(&uvm_kmapent_last_warn_time, 1700 &uvm_kmapent_warn_rate)) 1701 printf("uvm_mapent_alloc: out of static " 1702 "map entries\n"); 1703 } 1704 me = SLIST_FIRST(&uvm.kentry_free); 1705 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry); 1706 uvmexp.kmapent++; 1707 mtx_leave(&uvm_kmapent_mtx); 1708 me->flags = UVM_MAP_STATIC; 1709 } else if (map == kernel_map) { 1710 splassert(IPL_NONE); 1711 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1712 if (me == NULL) 1713 goto out; 1714 me->flags = UVM_MAP_KMEM; 1715 } else { 1716 splassert(IPL_NONE); 1717 me = pool_get(&uvm_map_entry_pool, pool_flags); 1718 if (me == NULL) 1719 goto out; 1720 me->flags = 0; 1721 } 1722 1723 if (me != NULL) { 1724 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF); 1725 } 1726 1727 out: 1728 return(me); 1729 } 1730 1731 /* 1732 * uvm_mapent_free: free map entry 1733 * 1734 * => XXX: static pool for kernel map? 1735 */ 1736 void 1737 uvm_mapent_free(struct vm_map_entry *me) 1738 { 1739 if (me->flags & UVM_MAP_STATIC) { 1740 mtx_enter(&uvm_kmapent_mtx); 1741 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry); 1742 uvmexp.kmapent--; 1743 mtx_leave(&uvm_kmapent_mtx); 1744 } else if (me->flags & UVM_MAP_KMEM) { 1745 splassert(IPL_NONE); 1746 pool_put(&uvm_map_entry_kmem_pool, me); 1747 } else { 1748 splassert(IPL_NONE); 1749 pool_put(&uvm_map_entry_pool, me); 1750 } 1751 } 1752 1753 /* 1754 * uvm_map_lookup_entry: find map entry at or before an address. 1755 * 1756 * => map must at least be read-locked by caller 1757 * => entry is returned in "entry" 1758 * => return value is true if address is in the returned entry 1759 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1760 * returned for those mappings. 1761 */ 1762 boolean_t 1763 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1764 struct vm_map_entry **entry) 1765 { 1766 *entry = uvm_map_entrybyaddr(&map->addr, address); 1767 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1768 (*entry)->start <= address && (*entry)->end > address; 1769 } 1770 1771 /* 1772 * Inside a vm_map find the sp address and verify MAP_STACK, and also 1773 * remember low and high regions of that of region which is marked 1774 * with MAP_STACK. Return TRUE. 1775 * If sp isn't in a non-guard MAP_STACK region return FALSE. 1776 */ 1777 boolean_t 1778 uvm_map_check_stack_range(struct proc *p, vaddr_t sp) 1779 { 1780 vm_map_t map = &p->p_vmspace->vm_map; 1781 vm_map_entry_t entry; 1782 1783 if (sp < map->min_offset || sp >= map->max_offset) 1784 return(FALSE); 1785 1786 /* lock map */ 1787 vm_map_lock_read(map); 1788 1789 /* lookup */ 1790 if (!uvm_map_lookup_entry(map, trunc_page(sp), &entry)) { 1791 vm_map_unlock_read(map); 1792 return(FALSE); 1793 } 1794 1795 if ((entry->etype & UVM_ET_STACK) == 0) { 1796 int protection = entry->protection; 1797 1798 vm_map_unlock_read(map); 1799 if (protection == PROT_NONE) 1800 return (TRUE); /* don't update range */ 1801 return (FALSE); 1802 } 1803 p->p_spstart = entry->start; 1804 p->p_spend = entry->end; 1805 p->p_spserial = map->serial; 1806 vm_map_unlock_read(map); 1807 return(TRUE); 1808 } 1809 1810 /* 1811 * Check whether the given address range can be converted to a MAP_STACK 1812 * mapping. 1813 * 1814 * Must be called with map locked. 1815 */ 1816 boolean_t 1817 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz) 1818 { 1819 vaddr_t end = addr + sz; 1820 struct vm_map_entry *first, *iter, *prev = NULL; 1821 1822 if (!uvm_map_lookup_entry(map, addr, &first)) { 1823 printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n", 1824 addr, end, map); 1825 return FALSE; 1826 } 1827 1828 /* 1829 * Check that the address range exists and is contiguous. 1830 */ 1831 for (iter = first; iter != NULL && iter->start < end; 1832 prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) { 1833 /* 1834 * Make sure that we do not have holes in the range. 1835 */ 1836 #if 0 1837 if (prev != NULL) { 1838 printf("prev->start 0x%lx, prev->end 0x%lx, " 1839 "iter->start 0x%lx, iter->end 0x%lx\n", 1840 prev->start, prev->end, iter->start, iter->end); 1841 } 1842 #endif 1843 1844 if (prev != NULL && prev->end != iter->start) { 1845 printf("map stack 0x%lx-0x%lx of map %p failed: " 1846 "hole in range\n", addr, end, map); 1847 return FALSE; 1848 } 1849 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) { 1850 printf("map stack 0x%lx-0x%lx of map %p failed: " 1851 "hole in range\n", addr, end, map); 1852 return FALSE; 1853 } 1854 } 1855 1856 return TRUE; 1857 } 1858 1859 /* 1860 * Remap the middle-pages of an existing mapping as a stack range. 1861 * If there exists a previous contiguous mapping with the given range 1862 * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the 1863 * mapping is dropped, and a new anon mapping is created and marked as 1864 * a stack. 1865 * 1866 * Must be called with map unlocked. 1867 */ 1868 int 1869 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz) 1870 { 1871 vm_map_t map = &p->p_vmspace->vm_map; 1872 vaddr_t start, end; 1873 int error; 1874 int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, 1875 PROT_READ | PROT_WRITE | PROT_EXEC, 1876 MAP_INHERIT_COPY, MADV_NORMAL, 1877 UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP | 1878 UVM_FLAG_COPYONW); 1879 1880 start = round_page(addr); 1881 end = trunc_page(addr + sz); 1882 #ifdef MACHINE_STACK_GROWS_UP 1883 if (end == addr + sz) 1884 end -= PAGE_SIZE; 1885 #else 1886 if (start == addr) 1887 start += PAGE_SIZE; 1888 #endif 1889 1890 if (start < map->min_offset || end >= map->max_offset || end < start) 1891 return EINVAL; 1892 1893 error = uvm_mapanon(map, &start, end - start, 0, flags); 1894 if (error != 0) 1895 printf("map stack for pid %d failed\n", p->p_p->ps_pid); 1896 1897 return error; 1898 } 1899 1900 /* 1901 * uvm_map_pie: return a random load address for a PIE executable 1902 * properly aligned. 1903 */ 1904 #ifndef VM_PIE_MAX_ADDR 1905 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1906 #endif 1907 1908 #ifndef VM_PIE_MIN_ADDR 1909 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1910 #endif 1911 1912 #ifndef VM_PIE_MIN_ALIGN 1913 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1914 #endif 1915 1916 vaddr_t 1917 uvm_map_pie(vaddr_t align) 1918 { 1919 vaddr_t addr, space, min; 1920 1921 align = MAX(align, VM_PIE_MIN_ALIGN); 1922 1923 /* round up to next alignment */ 1924 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1925 1926 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 1927 return (align); 1928 1929 space = (VM_PIE_MAX_ADDR - min) / align; 1930 space = MIN(space, (u_int32_t)-1); 1931 1932 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 1933 addr += min; 1934 1935 return (addr); 1936 } 1937 1938 void 1939 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 1940 { 1941 struct uvm_map_deadq dead; 1942 1943 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 1944 (end & (vaddr_t)PAGE_MASK) == 0); 1945 TAILQ_INIT(&dead); 1946 vm_map_lock(map); 1947 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); 1948 vm_map_unlock(map); 1949 1950 if (map->flags & VM_MAP_INTRSAFE) 1951 uvm_unmap_detach_intrsafe(&dead); 1952 else 1953 uvm_unmap_detach(&dead, 0); 1954 } 1955 1956 /* 1957 * Mark entry as free. 1958 * 1959 * entry will be put on the dead list. 1960 * The free space will be merged into the previous or a new entry, 1961 * unless markfree is false. 1962 */ 1963 void 1964 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 1965 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 1966 boolean_t markfree) 1967 { 1968 struct uvm_addr_state *free; 1969 struct vm_map_entry *prev; 1970 vaddr_t addr; /* Start of freed range. */ 1971 vaddr_t end; /* End of freed range. */ 1972 1973 prev = *prev_ptr; 1974 if (prev == entry) 1975 *prev_ptr = prev = NULL; 1976 1977 if (prev == NULL || 1978 VMMAP_FREE_END(prev) != entry->start) 1979 prev = RBT_PREV(uvm_map_addr, entry); 1980 1981 /* Entry is describing only free memory and has nothing to drain into. */ 1982 if (prev == NULL && entry->start == entry->end && markfree) { 1983 *prev_ptr = entry; 1984 return; 1985 } 1986 1987 addr = entry->start; 1988 end = VMMAP_FREE_END(entry); 1989 free = uvm_map_uaddr_e(map, entry); 1990 uvm_mapent_free_remove(map, free, entry); 1991 uvm_mapent_addr_remove(map, entry); 1992 DEAD_ENTRY_PUSH(dead, entry); 1993 1994 if (markfree) { 1995 if (prev) { 1996 free = uvm_map_uaddr_e(map, prev); 1997 uvm_mapent_free_remove(map, free, prev); 1998 } 1999 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 2000 } 2001 } 2002 2003 /* 2004 * Unwire and release referenced amap and object from map entry. 2005 */ 2006 void 2007 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 2008 { 2009 /* Unwire removed map entry. */ 2010 if (VM_MAPENT_ISWIRED(entry)) { 2011 KERNEL_LOCK(); 2012 entry->wired_count = 0; 2013 uvm_fault_unwire_locked(map, entry->start, entry->end); 2014 KERNEL_UNLOCK(); 2015 } 2016 2017 /* Entry-type specific code. */ 2018 if (UVM_ET_ISHOLE(entry)) { 2019 /* Nothing to be done for holes. */ 2020 } else if (map->flags & VM_MAP_INTRSAFE) { 2021 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2022 uvm_km_pgremove_intrsafe(entry->start, entry->end); 2023 pmap_kremove(entry->start, entry->end - entry->start); 2024 } else if (UVM_ET_ISOBJ(entry) && 2025 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2026 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2027 /* 2028 * Note: kernel object mappings are currently used in 2029 * two ways: 2030 * [1] "normal" mappings of pages in the kernel object 2031 * [2] uvm_km_valloc'd allocations in which we 2032 * pmap_enter in some non-kernel-object page 2033 * (e.g. vmapbuf). 2034 * 2035 * for case [1], we need to remove the mapping from 2036 * the pmap and then remove the page from the kernel 2037 * object (because, once pages in a kernel object are 2038 * unmapped they are no longer needed, unlike, say, 2039 * a vnode where you might want the data to persist 2040 * until flushed out of a queue). 2041 * 2042 * for case [2], we need to remove the mapping from 2043 * the pmap. there shouldn't be any pages at the 2044 * specified offset in the kernel object [but it 2045 * doesn't hurt to call uvm_km_pgremove just to be 2046 * safe?] 2047 * 2048 * uvm_km_pgremove currently does the following: 2049 * for pages in the kernel object range: 2050 * - drops the swap slot 2051 * - uvm_pagefree the page 2052 * 2053 * note there is version of uvm_km_pgremove() that 2054 * is used for "intrsafe" objects. 2055 */ 2056 /* 2057 * remove mappings from pmap and drop the pages 2058 * from the object. offsets are always relative 2059 * to vm_map_min(kernel_map). 2060 */ 2061 pmap_remove(pmap_kernel(), entry->start, entry->end); 2062 uvm_km_pgremove(entry->object.uvm_obj, 2063 entry->start - vm_map_min(kernel_map), 2064 entry->end - vm_map_min(kernel_map)); 2065 2066 /* 2067 * null out kernel_object reference, we've just 2068 * dropped it 2069 */ 2070 entry->etype &= ~UVM_ET_OBJ; 2071 entry->object.uvm_obj = NULL; /* to be safe */ 2072 } else { 2073 /* remove mappings the standard way. */ 2074 pmap_remove(map->pmap, entry->start, entry->end); 2075 } 2076 } 2077 2078 /* 2079 * Remove all entries from start to end. 2080 * 2081 * If remove_holes, then remove ET_HOLE entries as well. 2082 * If markfree, entry will be properly marked free, otherwise, no replacement 2083 * entry will be put in the tree (corrupting the tree). 2084 */ 2085 void 2086 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2087 struct uvm_map_deadq *dead, boolean_t remove_holes, 2088 boolean_t markfree) 2089 { 2090 struct vm_map_entry *prev_hint, *next, *entry; 2091 2092 start = MAX(start, map->min_offset); 2093 end = MIN(end, map->max_offset); 2094 if (start >= end) 2095 return; 2096 2097 if ((map->flags & VM_MAP_INTRSAFE) == 0) 2098 splassert(IPL_NONE); 2099 else 2100 splassert(IPL_VM); 2101 2102 /* Find first affected entry. */ 2103 entry = uvm_map_entrybyaddr(&map->addr, start); 2104 KDASSERT(entry != NULL && entry->start <= start); 2105 if (entry->end <= start && markfree) 2106 entry = RBT_NEXT(uvm_map_addr, entry); 2107 else 2108 UVM_MAP_CLIP_START(map, entry, start); 2109 2110 /* 2111 * Iterate entries until we reach end address. 2112 * prev_hint hints where the freed space can be appended to. 2113 */ 2114 prev_hint = NULL; 2115 for (; entry != NULL && entry->start < end; entry = next) { 2116 KDASSERT(entry->start >= start); 2117 if (entry->end > end || !markfree) 2118 UVM_MAP_CLIP_END(map, entry, end); 2119 KDASSERT(entry->start >= start && entry->end <= end); 2120 next = RBT_NEXT(uvm_map_addr, entry); 2121 2122 /* Don't remove holes unless asked to do so. */ 2123 if (UVM_ET_ISHOLE(entry)) { 2124 if (!remove_holes) { 2125 prev_hint = entry; 2126 continue; 2127 } 2128 } 2129 2130 /* A stack has been removed.. */ 2131 if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE)) 2132 map->serial++; 2133 2134 /* Kill entry. */ 2135 uvm_unmap_kill_entry(map, entry); 2136 2137 /* Update space usage. */ 2138 if ((map->flags & VM_MAP_ISVMSPACE) && 2139 entry->object.uvm_obj == NULL && 2140 !UVM_ET_ISHOLE(entry)) { 2141 ((struct vmspace *)map)->vm_dused -= 2142 uvmspace_dused(map, entry->start, entry->end); 2143 } 2144 if (!UVM_ET_ISHOLE(entry)) 2145 map->size -= entry->end - entry->start; 2146 2147 /* Actual removal of entry. */ 2148 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 2149 } 2150 2151 pmap_update(vm_map_pmap(map)); 2152 2153 #ifdef VMMAP_DEBUG 2154 if (markfree) { 2155 for (entry = uvm_map_entrybyaddr(&map->addr, start); 2156 entry != NULL && entry->start < end; 2157 entry = RBT_NEXT(uvm_map_addr, entry)) { 2158 KDASSERT(entry->end <= start || 2159 entry->start == entry->end || 2160 UVM_ET_ISHOLE(entry)); 2161 } 2162 } else { 2163 vaddr_t a; 2164 for (a = start; a < end; a += PAGE_SIZE) 2165 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 2166 } 2167 #endif 2168 } 2169 2170 /* 2171 * Mark all entries from first until end (exclusive) as pageable. 2172 * 2173 * Lock must be exclusive on entry and will not be touched. 2174 */ 2175 void 2176 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 2177 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 2178 { 2179 struct vm_map_entry *iter; 2180 2181 for (iter = first; iter != end; 2182 iter = RBT_NEXT(uvm_map_addr, iter)) { 2183 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2184 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2185 continue; 2186 2187 iter->wired_count = 0; 2188 uvm_fault_unwire_locked(map, iter->start, iter->end); 2189 } 2190 } 2191 2192 /* 2193 * Mark all entries from first until end (exclusive) as wired. 2194 * 2195 * Lockflags determines the lock state on return from this function. 2196 * Lock must be exclusive on entry. 2197 */ 2198 int 2199 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 2200 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 2201 int lockflags) 2202 { 2203 struct vm_map_entry *iter; 2204 #ifdef DIAGNOSTIC 2205 unsigned int timestamp_save; 2206 #endif 2207 int error; 2208 2209 /* 2210 * Wire pages in two passes: 2211 * 2212 * 1: holding the write lock, we create any anonymous maps that need 2213 * to be created. then we clip each map entry to the region to 2214 * be wired and increment its wiring count. 2215 * 2216 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2217 * in the pages for any newly wired area (wired_count == 1). 2218 * 2219 * downgrading to a read lock for uvm_fault_wire avoids a possible 2220 * deadlock with another thread that may have faulted on one of 2221 * the pages to be wired (it would mark the page busy, blocking 2222 * us, then in turn block on the map lock that we hold). 2223 * because we keep the read lock on the map, the copy-on-write 2224 * status of the entries we modify here cannot change. 2225 */ 2226 for (iter = first; iter != end; 2227 iter = RBT_NEXT(uvm_map_addr, iter)) { 2228 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2229 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2230 iter->protection == PROT_NONE) 2231 continue; 2232 2233 /* 2234 * Perform actions of vm_map_lookup that need the write lock. 2235 * - create an anonymous map for copy-on-write 2236 * - anonymous map for zero-fill 2237 * Skip submaps. 2238 */ 2239 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 2240 UVM_ET_ISNEEDSCOPY(iter) && 2241 ((iter->protection & PROT_WRITE) || 2242 iter->object.uvm_obj == NULL)) { 2243 amap_copy(map, iter, M_WAITOK, 2244 UVM_ET_ISSTACK(iter) ? FALSE : TRUE, 2245 iter->start, iter->end); 2246 } 2247 iter->wired_count++; 2248 } 2249 2250 /* 2251 * Pass 2. 2252 */ 2253 #ifdef DIAGNOSTIC 2254 timestamp_save = map->timestamp; 2255 #endif 2256 vm_map_busy(map); 2257 vm_map_downgrade(map); 2258 2259 error = 0; 2260 for (iter = first; error == 0 && iter != end; 2261 iter = RBT_NEXT(uvm_map_addr, iter)) { 2262 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2263 iter->protection == PROT_NONE) 2264 continue; 2265 2266 error = uvm_fault_wire(map, iter->start, iter->end, 2267 iter->protection); 2268 } 2269 2270 if (error) { 2271 /* 2272 * uvm_fault_wire failure 2273 * 2274 * Reacquire lock and undo our work. 2275 */ 2276 vm_map_upgrade(map); 2277 vm_map_unbusy(map); 2278 #ifdef DIAGNOSTIC 2279 if (timestamp_save != map->timestamp) 2280 panic("uvm_map_pageable_wire: stale map"); 2281 #endif 2282 2283 /* 2284 * first is no longer needed to restart loops. 2285 * Use it as iterator to unmap successful mappings. 2286 */ 2287 for (; first != iter; 2288 first = RBT_NEXT(uvm_map_addr, first)) { 2289 if (UVM_ET_ISHOLE(first) || 2290 first->start == first->end || 2291 first->protection == PROT_NONE) 2292 continue; 2293 2294 first->wired_count--; 2295 if (!VM_MAPENT_ISWIRED(first)) { 2296 uvm_fault_unwire_locked(map, 2297 iter->start, iter->end); 2298 } 2299 } 2300 2301 /* decrease counter in the rest of the entries */ 2302 for (; iter != end; 2303 iter = RBT_NEXT(uvm_map_addr, iter)) { 2304 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2305 iter->protection == PROT_NONE) 2306 continue; 2307 2308 iter->wired_count--; 2309 } 2310 2311 if ((lockflags & UVM_LK_EXIT) == 0) 2312 vm_map_unlock(map); 2313 return error; 2314 } 2315 2316 /* We are currently holding a read lock. */ 2317 if ((lockflags & UVM_LK_EXIT) == 0) { 2318 vm_map_unbusy(map); 2319 vm_map_unlock_read(map); 2320 } else { 2321 vm_map_upgrade(map); 2322 vm_map_unbusy(map); 2323 #ifdef DIAGNOSTIC 2324 if (timestamp_save != map->timestamp) 2325 panic("uvm_map_pageable_wire: stale map"); 2326 #endif 2327 } 2328 return 0; 2329 } 2330 2331 /* 2332 * uvm_map_pageable: set pageability of a range in a map. 2333 * 2334 * Flags: 2335 * UVM_LK_ENTER: map is already locked by caller 2336 * UVM_LK_EXIT: don't unlock map on exit 2337 * 2338 * The full range must be in use (entries may not have fspace != 0). 2339 * UVM_ET_HOLE counts as unmapped. 2340 */ 2341 int 2342 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2343 boolean_t new_pageable, int lockflags) 2344 { 2345 struct vm_map_entry *first, *last, *tmp; 2346 int error; 2347 2348 start = trunc_page(start); 2349 end = round_page(end); 2350 2351 if (start > end) 2352 return EINVAL; 2353 if (start == end) 2354 return 0; /* nothing to do */ 2355 if (start < map->min_offset) 2356 return EFAULT; /* why? see first XXX below */ 2357 if (end > map->max_offset) 2358 return EINVAL; /* why? see second XXX below */ 2359 2360 KASSERT(map->flags & VM_MAP_PAGEABLE); 2361 if ((lockflags & UVM_LK_ENTER) == 0) 2362 vm_map_lock(map); 2363 2364 /* 2365 * Find first entry. 2366 * 2367 * Initial test on start is different, because of the different 2368 * error returned. Rest is tested further down. 2369 */ 2370 first = uvm_map_entrybyaddr(&map->addr, start); 2371 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2372 /* 2373 * XXX if the first address is not mapped, it is EFAULT? 2374 */ 2375 error = EFAULT; 2376 goto out; 2377 } 2378 2379 /* Check that the range has no holes. */ 2380 for (last = first; last != NULL && last->start < end; 2381 last = RBT_NEXT(uvm_map_addr, last)) { 2382 if (UVM_ET_ISHOLE(last) || 2383 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2384 /* 2385 * XXX unmapped memory in range, why is it EINVAL 2386 * instead of EFAULT? 2387 */ 2388 error = EINVAL; 2389 goto out; 2390 } 2391 } 2392 2393 /* 2394 * Last ended at the first entry after the range. 2395 * Move back one step. 2396 * 2397 * Note that last may be NULL. 2398 */ 2399 if (last == NULL) { 2400 last = RBT_MAX(uvm_map_addr, &map->addr); 2401 if (last->end < end) { 2402 error = EINVAL; 2403 goto out; 2404 } 2405 } else { 2406 KASSERT(last != first); 2407 last = RBT_PREV(uvm_map_addr, last); 2408 } 2409 2410 /* Wire/unwire pages here. */ 2411 if (new_pageable) { 2412 /* 2413 * Mark pageable. 2414 * entries that are not wired are untouched. 2415 */ 2416 if (VM_MAPENT_ISWIRED(first)) 2417 UVM_MAP_CLIP_START(map, first, start); 2418 /* 2419 * Split last at end. 2420 * Make tmp be the first entry after what is to be touched. 2421 * If last is not wired, don't touch it. 2422 */ 2423 if (VM_MAPENT_ISWIRED(last)) { 2424 UVM_MAP_CLIP_END(map, last, end); 2425 tmp = RBT_NEXT(uvm_map_addr, last); 2426 } else 2427 tmp = last; 2428 2429 uvm_map_pageable_pgon(map, first, tmp, start, end); 2430 error = 0; 2431 2432 out: 2433 if ((lockflags & UVM_LK_EXIT) == 0) 2434 vm_map_unlock(map); 2435 return error; 2436 } else { 2437 /* 2438 * Mark entries wired. 2439 * entries are always touched (because recovery needs this). 2440 */ 2441 if (!VM_MAPENT_ISWIRED(first)) 2442 UVM_MAP_CLIP_START(map, first, start); 2443 /* 2444 * Split last at end. 2445 * Make tmp be the first entry after what is to be touched. 2446 * If last is not wired, don't touch it. 2447 */ 2448 if (!VM_MAPENT_ISWIRED(last)) { 2449 UVM_MAP_CLIP_END(map, last, end); 2450 tmp = RBT_NEXT(uvm_map_addr, last); 2451 } else 2452 tmp = last; 2453 2454 return uvm_map_pageable_wire(map, first, tmp, start, end, 2455 lockflags); 2456 } 2457 } 2458 2459 /* 2460 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2461 * all mapped regions. 2462 * 2463 * Map must not be locked. 2464 * If no flags are specified, all ragions are unwired. 2465 */ 2466 int 2467 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2468 { 2469 vsize_t size; 2470 struct vm_map_entry *iter; 2471 2472 KASSERT(map->flags & VM_MAP_PAGEABLE); 2473 vm_map_lock(map); 2474 2475 if (flags == 0) { 2476 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr), 2477 NULL, map->min_offset, map->max_offset); 2478 2479 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2480 vm_map_unlock(map); 2481 return 0; 2482 } 2483 2484 if (flags & MCL_FUTURE) 2485 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2486 if (!(flags & MCL_CURRENT)) { 2487 vm_map_unlock(map); 2488 return 0; 2489 } 2490 2491 /* 2492 * Count number of pages in all non-wired entries. 2493 * If the number exceeds the limit, abort. 2494 */ 2495 size = 0; 2496 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2497 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2498 continue; 2499 2500 size += iter->end - iter->start; 2501 } 2502 2503 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2504 vm_map_unlock(map); 2505 return ENOMEM; 2506 } 2507 2508 /* XXX non-pmap_wired_count case must be handled by caller */ 2509 #ifdef pmap_wired_count 2510 if (limit != 0 && 2511 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2512 vm_map_unlock(map); 2513 return ENOMEM; 2514 } 2515 #endif 2516 2517 /* 2518 * uvm_map_pageable_wire will release lcok 2519 */ 2520 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr), 2521 NULL, map->min_offset, map->max_offset, 0); 2522 } 2523 2524 /* 2525 * Initialize map. 2526 * 2527 * Allocates sufficient entries to describe the free memory in the map. 2528 */ 2529 void 2530 uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags) 2531 { 2532 int i; 2533 2534 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2535 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2536 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2537 2538 /* 2539 * Update parameters. 2540 * 2541 * This code handles (vaddr_t)-1 and other page mask ending addresses 2542 * properly. 2543 * We lose the top page if the full virtual address space is used. 2544 */ 2545 if (max & (vaddr_t)PAGE_MASK) { 2546 max += 1; 2547 if (max == 0) /* overflow */ 2548 max -= PAGE_SIZE; 2549 } 2550 2551 RBT_INIT(uvm_map_addr, &map->addr); 2552 map->uaddr_exe = NULL; 2553 for (i = 0; i < nitems(map->uaddr_any); ++i) 2554 map->uaddr_any[i] = NULL; 2555 map->uaddr_brk_stack = NULL; 2556 2557 map->size = 0; 2558 map->ref_count = 0; 2559 map->min_offset = min; 2560 map->max_offset = max; 2561 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2562 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2563 map->flags = flags; 2564 map->timestamp = 0; 2565 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK); 2566 mtx_init(&map->mtx, IPL_VM); 2567 mtx_init(&map->flags_lock, IPL_VM); 2568 2569 /* Configure the allocators. */ 2570 if (flags & VM_MAP_ISVMSPACE) 2571 uvm_map_setup_md(map); 2572 else 2573 map->uaddr_any[3] = &uaddr_kbootstrap; 2574 2575 /* 2576 * Fill map entries. 2577 * We do not need to write-lock the map here because only the current 2578 * thread sees it right now. Initialize ref_count to 0 above to avoid 2579 * bogus triggering of lock-not-held assertions. 2580 */ 2581 uvm_map_setup_entries(map); 2582 uvm_tree_sanity(map, __FILE__, __LINE__); 2583 map->ref_count = 1; 2584 } 2585 2586 /* 2587 * Destroy the map. 2588 * 2589 * This is the inverse operation to uvm_map_setup. 2590 */ 2591 void 2592 uvm_map_teardown(struct vm_map *map) 2593 { 2594 struct uvm_map_deadq dead_entries; 2595 struct vm_map_entry *entry, *tmp; 2596 #ifdef VMMAP_DEBUG 2597 size_t numq, numt; 2598 #endif 2599 int i; 2600 2601 KERNEL_ASSERT_LOCKED(); 2602 KERNEL_UNLOCK(); 2603 KERNEL_ASSERT_UNLOCKED(); 2604 2605 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2606 2607 /* Remove address selectors. */ 2608 uvm_addr_destroy(map->uaddr_exe); 2609 map->uaddr_exe = NULL; 2610 for (i = 0; i < nitems(map->uaddr_any); i++) { 2611 uvm_addr_destroy(map->uaddr_any[i]); 2612 map->uaddr_any[i] = NULL; 2613 } 2614 uvm_addr_destroy(map->uaddr_brk_stack); 2615 map->uaddr_brk_stack = NULL; 2616 2617 /* 2618 * Remove entries. 2619 * 2620 * The following is based on graph breadth-first search. 2621 * 2622 * In color terms: 2623 * - the dead_entries set contains all nodes that are reachable 2624 * (i.e. both the black and the grey nodes) 2625 * - any entry not in dead_entries is white 2626 * - any entry that appears in dead_entries before entry, 2627 * is black, the rest is grey. 2628 * The set [entry, end] is also referred to as the wavefront. 2629 * 2630 * Since the tree is always a fully connected graph, the breadth-first 2631 * search guarantees that each vmmap_entry is visited exactly once. 2632 * The vm_map is broken down in linear time. 2633 */ 2634 TAILQ_INIT(&dead_entries); 2635 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL) 2636 DEAD_ENTRY_PUSH(&dead_entries, entry); 2637 while (entry != NULL) { 2638 sched_pause(yield); 2639 uvm_unmap_kill_entry(map, entry); 2640 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL) 2641 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2642 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 2643 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2644 /* Update wave-front. */ 2645 entry = TAILQ_NEXT(entry, dfree.deadq); 2646 } 2647 2648 #ifdef VMMAP_DEBUG 2649 numt = numq = 0; 2650 RBT_FOREACH(entry, uvm_map_addr, &map->addr) 2651 numt++; 2652 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2653 numq++; 2654 KASSERT(numt == numq); 2655 #endif 2656 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); 2657 2658 KERNEL_LOCK(); 2659 2660 pmap_destroy(map->pmap); 2661 map->pmap = NULL; 2662 } 2663 2664 /* 2665 * Populate map with free-memory entries. 2666 * 2667 * Map must be initialized and empty. 2668 */ 2669 void 2670 uvm_map_setup_entries(struct vm_map *map) 2671 { 2672 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 2673 2674 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2675 } 2676 2677 /* 2678 * Split entry at given address. 2679 * 2680 * orig: entry that is to be split. 2681 * next: a newly allocated map entry that is not linked. 2682 * split: address at which the split is done. 2683 */ 2684 void 2685 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2686 struct vm_map_entry *next, vaddr_t split) 2687 { 2688 struct uvm_addr_state *free, *free_before; 2689 vsize_t adj; 2690 2691 if ((split & PAGE_MASK) != 0) { 2692 panic("uvm_map_splitentry: split address 0x%lx " 2693 "not on page boundary!", split); 2694 } 2695 KDASSERT(map != NULL && orig != NULL && next != NULL); 2696 uvm_tree_sanity(map, __FILE__, __LINE__); 2697 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2698 2699 #ifdef VMMAP_DEBUG 2700 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig); 2701 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next); 2702 #endif /* VMMAP_DEBUG */ 2703 2704 /* 2705 * Free space will change, unlink from free space tree. 2706 */ 2707 free = uvm_map_uaddr_e(map, orig); 2708 uvm_mapent_free_remove(map, free, orig); 2709 2710 adj = split - orig->start; 2711 2712 uvm_mapent_copy(orig, next); 2713 if (split >= orig->end) { 2714 next->etype = 0; 2715 next->offset = 0; 2716 next->wired_count = 0; 2717 next->start = next->end = split; 2718 next->guard = 0; 2719 next->fspace = VMMAP_FREE_END(orig) - split; 2720 next->aref.ar_amap = NULL; 2721 next->aref.ar_pageoff = 0; 2722 orig->guard = MIN(orig->guard, split - orig->end); 2723 orig->fspace = split - VMMAP_FREE_START(orig); 2724 } else { 2725 orig->fspace = 0; 2726 orig->guard = 0; 2727 orig->end = next->start = split; 2728 2729 if (next->aref.ar_amap) { 2730 KERNEL_LOCK(); 2731 amap_splitref(&orig->aref, &next->aref, adj); 2732 KERNEL_UNLOCK(); 2733 } 2734 if (UVM_ET_ISSUBMAP(orig)) { 2735 uvm_map_reference(next->object.sub_map); 2736 next->offset += adj; 2737 } else if (UVM_ET_ISOBJ(orig)) { 2738 if (next->object.uvm_obj->pgops && 2739 next->object.uvm_obj->pgops->pgo_reference) { 2740 KERNEL_LOCK(); 2741 next->object.uvm_obj->pgops->pgo_reference( 2742 next->object.uvm_obj); 2743 KERNEL_UNLOCK(); 2744 } 2745 next->offset += adj; 2746 } 2747 } 2748 2749 /* 2750 * Link next into address tree. 2751 * Link orig and next into free-space tree. 2752 * 2753 * Don't insert 'next' into the addr tree until orig has been linked, 2754 * in case the free-list looks at adjecent entries in the addr tree 2755 * for its decisions. 2756 */ 2757 if (orig->fspace > 0) 2758 free_before = free; 2759 else 2760 free_before = uvm_map_uaddr_e(map, orig); 2761 uvm_mapent_free_insert(map, free_before, orig); 2762 uvm_mapent_addr_insert(map, next); 2763 uvm_mapent_free_insert(map, free, next); 2764 2765 uvm_tree_sanity(map, __FILE__, __LINE__); 2766 } 2767 2768 2769 #ifdef VMMAP_DEBUG 2770 2771 void 2772 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2773 char *file, int line) 2774 { 2775 char* map_special; 2776 2777 if (test) 2778 return; 2779 2780 if (map == kernel_map) 2781 map_special = " (kernel_map)"; 2782 else if (map == kmem_map) 2783 map_special = " (kmem_map)"; 2784 else 2785 map_special = ""; 2786 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2787 line, test_str); 2788 } 2789 2790 /* 2791 * Check that map is sane. 2792 */ 2793 void 2794 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2795 { 2796 struct vm_map_entry *iter; 2797 vaddr_t addr; 2798 vaddr_t min, max, bound; /* Bounds checker. */ 2799 struct uvm_addr_state *free; 2800 2801 addr = vm_map_min(map); 2802 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2803 /* 2804 * Valid start, end. 2805 * Catch overflow for end+fspace. 2806 */ 2807 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2808 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2809 2810 /* May not be empty. */ 2811 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2812 file, line); 2813 2814 /* Addresses for entry must lie within map boundaries. */ 2815 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2816 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2817 2818 /* Tree may not have gaps. */ 2819 UVM_ASSERT(map, iter->start == addr, file, line); 2820 addr = VMMAP_FREE_END(iter); 2821 2822 /* 2823 * Free space may not cross boundaries, unless the same 2824 * free list is used on both sides of the border. 2825 */ 2826 min = VMMAP_FREE_START(iter); 2827 max = VMMAP_FREE_END(iter); 2828 2829 while (min < max && 2830 (bound = uvm_map_boundary(map, min, max)) != max) { 2831 UVM_ASSERT(map, 2832 uvm_map_uaddr(map, bound - 1) == 2833 uvm_map_uaddr(map, bound), 2834 file, line); 2835 min = bound; 2836 } 2837 2838 free = uvm_map_uaddr_e(map, iter); 2839 if (free) { 2840 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2841 file, line); 2842 } else { 2843 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2844 file, line); 2845 } 2846 } 2847 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2848 } 2849 2850 void 2851 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2852 { 2853 struct vm_map_entry *iter; 2854 vsize_t size; 2855 2856 size = 0; 2857 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2858 if (!UVM_ET_ISHOLE(iter)) 2859 size += iter->end - iter->start; 2860 } 2861 2862 if (map->size != size) 2863 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2864 UVM_ASSERT(map, map->size == size, file, line); 2865 2866 vmspace_validate(map); 2867 } 2868 2869 /* 2870 * This function validates the statistics on vmspace. 2871 */ 2872 void 2873 vmspace_validate(struct vm_map *map) 2874 { 2875 struct vmspace *vm; 2876 struct vm_map_entry *iter; 2877 vaddr_t imin, imax; 2878 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2879 vsize_t stack, heap; /* Measured sizes. */ 2880 2881 if (!(map->flags & VM_MAP_ISVMSPACE)) 2882 return; 2883 2884 vm = (struct vmspace *)map; 2885 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2886 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2887 2888 stack = heap = 0; 2889 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2890 imin = imax = iter->start; 2891 2892 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL) 2893 continue; 2894 2895 /* 2896 * Update stack, heap. 2897 * Keep in mind that (theoretically) the entries of 2898 * userspace and stack may be joined. 2899 */ 2900 while (imin != iter->end) { 2901 /* 2902 * Set imax to the first boundary crossed between 2903 * imin and stack addresses. 2904 */ 2905 imax = iter->end; 2906 if (imin < stack_begin && imax > stack_begin) 2907 imax = stack_begin; 2908 else if (imin < stack_end && imax > stack_end) 2909 imax = stack_end; 2910 2911 if (imin >= stack_begin && imin < stack_end) 2912 stack += imax - imin; 2913 else 2914 heap += imax - imin; 2915 imin = imax; 2916 } 2917 } 2918 2919 heap >>= PAGE_SHIFT; 2920 if (heap != vm->vm_dused) { 2921 printf("vmspace stack range: 0x%lx-0x%lx\n", 2922 stack_begin, stack_end); 2923 panic("vmspace_validate: vmspace.vm_dused invalid, " 2924 "expected %ld pgs, got %ld pgs in map %p", 2925 heap, vm->vm_dused, 2926 map); 2927 } 2928 } 2929 2930 #endif /* VMMAP_DEBUG */ 2931 2932 /* 2933 * uvm_map_init: init mapping system at boot time. note that we allocate 2934 * and init the static pool of structs vm_map_entry for the kernel here. 2935 */ 2936 void 2937 uvm_map_init(void) 2938 { 2939 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 2940 int lcv; 2941 2942 /* now set up static pool of kernel map entries ... */ 2943 mtx_init(&uvm_kmapent_mtx, IPL_VM); 2944 SLIST_INIT(&uvm.kentry_free); 2945 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 2946 SLIST_INSERT_HEAD(&uvm.kentry_free, 2947 &kernel_map_entry[lcv], daddrs.addr_kentry); 2948 } 2949 2950 /* initialize the map-related pools. */ 2951 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0, 2952 IPL_NONE, PR_WAITOK, "vmsppl", NULL); 2953 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 2954 IPL_VM, PR_WAITOK, "vmmpepl", NULL); 2955 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0, 2956 IPL_VM, 0, "vmmpekpl", NULL); 2957 pool_sethiwat(&uvm_map_entry_pool, 8192); 2958 2959 uvm_addr_init(); 2960 } 2961 2962 #if defined(DDB) 2963 2964 /* 2965 * DDB hooks 2966 */ 2967 2968 /* 2969 * uvm_map_printit: actually prints the map 2970 */ 2971 void 2972 uvm_map_printit(struct vm_map *map, boolean_t full, 2973 int (*pr)(const char *, ...)) 2974 { 2975 struct vmspace *vm; 2976 struct vm_map_entry *entry; 2977 struct uvm_addr_state *free; 2978 int in_free, i; 2979 char buf[8]; 2980 2981 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 2982 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 2983 map->b_start, map->b_end); 2984 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 2985 map->s_start, map->s_end); 2986 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 2987 map->size, map->ref_count, map->timestamp, 2988 map->flags); 2989 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 2990 pmap_resident_count(map->pmap)); 2991 2992 /* struct vmspace handling. */ 2993 if (map->flags & VM_MAP_ISVMSPACE) { 2994 vm = (struct vmspace *)map; 2995 2996 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 2997 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 2998 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 2999 vm->vm_tsize, vm->vm_dsize); 3000 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 3001 vm->vm_taddr, vm->vm_daddr); 3002 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 3003 vm->vm_maxsaddr, vm->vm_minsaddr); 3004 } 3005 3006 if (!full) 3007 goto print_uaddr; 3008 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 3009 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 3010 entry, entry->start, entry->end, entry->object.uvm_obj, 3011 (long long)entry->offset, entry->aref.ar_amap, 3012 entry->aref.ar_pageoff); 3013 (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, prot(max)=%d/%d, inh=%d, " 3014 "wc=%d, adv=%d\n", 3015 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 3016 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 3017 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 3018 (entry->etype & UVM_ET_STACK) ? 'T' : 'F', 3019 entry->protection, entry->max_protection, 3020 entry->inheritance, entry->wired_count, entry->advice); 3021 3022 free = uvm_map_uaddr_e(map, entry); 3023 in_free = (free != NULL); 3024 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 3025 "free=0x%lx-0x%lx\n", 3026 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 3027 in_free ? 'T' : 'F', 3028 entry->guard, 3029 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 3030 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 3031 (*pr)("\tfreemapped=%c, uaddr=%p\n", 3032 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 3033 if (free) { 3034 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 3035 free->uaddr_minaddr, free->uaddr_maxaddr, 3036 free->uaddr_functions->uaddr_name); 3037 } 3038 } 3039 3040 print_uaddr: 3041 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 3042 for (i = 0; i < nitems(map->uaddr_any); i++) { 3043 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 3044 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 3045 } 3046 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 3047 } 3048 3049 /* 3050 * uvm_object_printit: actually prints the object 3051 */ 3052 void 3053 uvm_object_printit(uobj, full, pr) 3054 struct uvm_object *uobj; 3055 boolean_t full; 3056 int (*pr)(const char *, ...); 3057 { 3058 struct vm_page *pg; 3059 int cnt = 0; 3060 3061 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 3062 uobj, uobj->pgops, uobj->uo_npages); 3063 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 3064 (*pr)("refs=<SYSTEM>\n"); 3065 else 3066 (*pr)("refs=%d\n", uobj->uo_refs); 3067 3068 if (!full) { 3069 return; 3070 } 3071 (*pr)(" PAGES <pg,offset>:\n "); 3072 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) { 3073 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 3074 if ((cnt % 3) == 2) { 3075 (*pr)("\n "); 3076 } 3077 cnt++; 3078 } 3079 if ((cnt % 3) != 2) { 3080 (*pr)("\n"); 3081 } 3082 } 3083 3084 /* 3085 * uvm_page_printit: actually print the page 3086 */ 3087 static const char page_flagbits[] = 3088 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 3089 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ" 3090 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5"; 3091 3092 void 3093 uvm_page_printit(pg, full, pr) 3094 struct vm_page *pg; 3095 boolean_t full; 3096 int (*pr)(const char *, ...); 3097 { 3098 struct vm_page *tpg; 3099 struct uvm_object *uobj; 3100 struct pglist *pgl; 3101 3102 (*pr)("PAGE %p:\n", pg); 3103 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 3104 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 3105 (long long)pg->phys_addr); 3106 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 3107 pg->uobject, pg->uanon, (long long)pg->offset); 3108 #if defined(UVM_PAGE_TRKOWN) 3109 if (pg->pg_flags & PG_BUSY) 3110 (*pr)(" owning thread = %d, tag=%s", 3111 pg->owner, pg->owner_tag); 3112 else 3113 (*pr)(" page not busy, no owner"); 3114 #else 3115 (*pr)(" [page ownership tracking disabled]"); 3116 #endif 3117 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 3118 3119 if (!full) 3120 return; 3121 3122 /* cross-verify object/anon */ 3123 if ((pg->pg_flags & PQ_FREE) == 0) { 3124 if (pg->pg_flags & PQ_ANON) { 3125 if (pg->uanon == NULL || pg->uanon->an_page != pg) 3126 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 3127 (pg->uanon) ? pg->uanon->an_page : NULL); 3128 else 3129 (*pr)(" anon backpointer is OK\n"); 3130 } else { 3131 uobj = pg->uobject; 3132 if (uobj) { 3133 (*pr)(" checking object list\n"); 3134 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) { 3135 if (tpg == pg) { 3136 break; 3137 } 3138 } 3139 if (tpg) 3140 (*pr)(" page found on object list\n"); 3141 else 3142 (*pr)(" >>> PAGE NOT FOUND " 3143 "ON OBJECT LIST! <<<\n"); 3144 } 3145 } 3146 } 3147 3148 /* cross-verify page queue */ 3149 if (pg->pg_flags & PQ_FREE) { 3150 if (uvm_pmr_isfree(pg)) 3151 (*pr)(" page found in uvm_pmemrange\n"); 3152 else 3153 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 3154 pgl = NULL; 3155 } else if (pg->pg_flags & PQ_INACTIVE) { 3156 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 3157 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 3158 } else if (pg->pg_flags & PQ_ACTIVE) { 3159 pgl = &uvm.page_active; 3160 } else { 3161 pgl = NULL; 3162 } 3163 3164 if (pgl) { 3165 (*pr)(" checking pageq list\n"); 3166 TAILQ_FOREACH(tpg, pgl, pageq) { 3167 if (tpg == pg) { 3168 break; 3169 } 3170 } 3171 if (tpg) 3172 (*pr)(" page found on pageq list\n"); 3173 else 3174 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3175 } 3176 } 3177 #endif 3178 3179 /* 3180 * uvm_map_protect: change map protection 3181 * 3182 * => set_max means set max_protection. 3183 * => map must be unlocked. 3184 */ 3185 int 3186 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3187 vm_prot_t new_prot, boolean_t set_max) 3188 { 3189 struct vm_map_entry *first, *iter; 3190 vm_prot_t old_prot; 3191 vm_prot_t mask; 3192 int error; 3193 3194 if (start > end) 3195 return EINVAL; 3196 start = MAX(start, map->min_offset); 3197 end = MIN(end, map->max_offset); 3198 if (start >= end) 3199 return 0; 3200 3201 error = 0; 3202 vm_map_lock(map); 3203 3204 /* 3205 * Set up first and last. 3206 * - first will contain first entry at or after start. 3207 */ 3208 first = uvm_map_entrybyaddr(&map->addr, start); 3209 KDASSERT(first != NULL); 3210 if (first->end <= start) 3211 first = RBT_NEXT(uvm_map_addr, first); 3212 3213 /* First, check for protection violations. */ 3214 for (iter = first; iter != NULL && iter->start < end; 3215 iter = RBT_NEXT(uvm_map_addr, iter)) { 3216 /* Treat memory holes as free space. */ 3217 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3218 continue; 3219 3220 if (UVM_ET_ISSUBMAP(iter)) { 3221 error = EINVAL; 3222 goto out; 3223 } 3224 if ((new_prot & iter->max_protection) != new_prot) { 3225 error = EACCES; 3226 goto out; 3227 } 3228 if (map == kernel_map && 3229 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 3230 panic("uvm_map_protect: kernel map W^X violation requested"); 3231 } 3232 3233 /* Fix protections. */ 3234 for (iter = first; iter != NULL && iter->start < end; 3235 iter = RBT_NEXT(uvm_map_addr, iter)) { 3236 /* Treat memory holes as free space. */ 3237 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3238 continue; 3239 3240 old_prot = iter->protection; 3241 3242 /* 3243 * Skip adapting protection iff old and new protection 3244 * are equal. 3245 */ 3246 if (set_max) { 3247 if (old_prot == (new_prot & old_prot) && 3248 iter->max_protection == new_prot) 3249 continue; 3250 } else { 3251 if (old_prot == new_prot) 3252 continue; 3253 } 3254 3255 UVM_MAP_CLIP_START(map, iter, start); 3256 UVM_MAP_CLIP_END(map, iter, end); 3257 3258 if (set_max) { 3259 iter->max_protection = new_prot; 3260 iter->protection &= new_prot; 3261 } else 3262 iter->protection = new_prot; 3263 3264 /* 3265 * update physical map if necessary. worry about copy-on-write 3266 * here -- CHECK THIS XXX 3267 */ 3268 if (iter->protection != old_prot) { 3269 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3270 ~PROT_WRITE : PROT_MASK; 3271 3272 /* update pmap */ 3273 if ((iter->protection & mask) == PROT_NONE && 3274 VM_MAPENT_ISWIRED(iter)) { 3275 /* 3276 * TODO(ariane) this is stupid. wired_count 3277 * is 0 if not wired, otherwise anything 3278 * larger than 0 (incremented once each time 3279 * wire is called). 3280 * Mostly to be able to undo the damage on 3281 * failure. Not the actually be a wired 3282 * refcounter... 3283 * Originally: iter->wired_count--; 3284 * (don't we have to unwire this in the pmap 3285 * as well?) 3286 */ 3287 iter->wired_count = 0; 3288 } 3289 pmap_protect(map->pmap, iter->start, iter->end, 3290 iter->protection & mask); 3291 } 3292 3293 /* 3294 * If the map is configured to lock any future mappings, 3295 * wire this entry now if the old protection was PROT_NONE 3296 * and the new protection is not PROT_NONE. 3297 */ 3298 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3299 VM_MAPENT_ISWIRED(iter) == 0 && 3300 old_prot == PROT_NONE && 3301 new_prot != PROT_NONE) { 3302 if (uvm_map_pageable(map, iter->start, iter->end, 3303 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3304 /* 3305 * If locking the entry fails, remember the 3306 * error if it's the first one. Note we 3307 * still continue setting the protection in 3308 * the map, but it will return the resource 3309 * storage condition regardless. 3310 * 3311 * XXX Ignore what the actual error is, 3312 * XXX just call it a resource shortage 3313 * XXX so that it doesn't get confused 3314 * XXX what uvm_map_protect() itself would 3315 * XXX normally return. 3316 */ 3317 error = ENOMEM; 3318 } 3319 } 3320 } 3321 pmap_update(map->pmap); 3322 3323 out: 3324 vm_map_unlock(map); 3325 return error; 3326 } 3327 3328 /* 3329 * uvmspace_alloc: allocate a vmspace structure. 3330 * 3331 * - structure includes vm_map and pmap 3332 * - XXX: no locking on this structure 3333 * - refcnt set to 1, rest must be init'd by caller 3334 */ 3335 struct vmspace * 3336 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3337 boolean_t remove_holes) 3338 { 3339 struct vmspace *vm; 3340 3341 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3342 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3343 return (vm); 3344 } 3345 3346 /* 3347 * uvmspace_init: initialize a vmspace structure. 3348 * 3349 * - XXX: no locking on this structure 3350 * - refcnt set to 1, rest must be init'd by caller 3351 */ 3352 void 3353 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3354 boolean_t pageable, boolean_t remove_holes) 3355 { 3356 KASSERT(pmap == NULL || pmap == pmap_kernel()); 3357 3358 if (pmap) 3359 pmap_reference(pmap); 3360 else 3361 pmap = pmap_create(); 3362 vm->vm_map.pmap = pmap; 3363 3364 uvm_map_setup(&vm->vm_map, min, max, 3365 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3366 3367 vm->vm_refcnt = 1; 3368 3369 if (remove_holes) 3370 pmap_remove_holes(vm); 3371 } 3372 3373 /* 3374 * uvmspace_share: share a vmspace between two processes 3375 * 3376 * - XXX: no locking on vmspace 3377 * - used for vfork 3378 */ 3379 3380 struct vmspace * 3381 uvmspace_share(struct process *pr) 3382 { 3383 struct vmspace *vm = pr->ps_vmspace; 3384 3385 vm->vm_refcnt++; 3386 return vm; 3387 } 3388 3389 /* 3390 * uvmspace_exec: the process wants to exec a new program 3391 * 3392 * - XXX: no locking on vmspace 3393 */ 3394 3395 void 3396 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3397 { 3398 struct process *pr = p->p_p; 3399 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3400 struct vm_map *map = &ovm->vm_map; 3401 struct uvm_map_deadq dead_entries; 3402 3403 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3404 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3405 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3406 3407 pmap_unuse_final(p); /* before stack addresses go away */ 3408 TAILQ_INIT(&dead_entries); 3409 3410 /* see if more than one process is using this vmspace... */ 3411 if (ovm->vm_refcnt == 1) { 3412 /* 3413 * If pr is the only process using its vmspace then 3414 * we can safely recycle that vmspace for the program 3415 * that is being exec'd. 3416 */ 3417 3418 #ifdef SYSVSHM 3419 /* 3420 * SYSV SHM semantics require us to kill all segments on an exec 3421 */ 3422 if (ovm->vm_shm) 3423 shmexit(ovm); 3424 #endif 3425 3426 /* 3427 * POSIX 1003.1b -- "lock future mappings" is revoked 3428 * when a process execs another program image. 3429 */ 3430 vm_map_lock(map); 3431 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3432 3433 /* 3434 * now unmap the old program 3435 * 3436 * Instead of attempting to keep the map valid, we simply 3437 * nuke all entries and ask uvm_map_setup to reinitialize 3438 * the map to the new boundaries. 3439 * 3440 * uvm_unmap_remove will actually nuke all entries for us 3441 * (as in, not replace them with free-memory entries). 3442 */ 3443 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3444 &dead_entries, TRUE, FALSE); 3445 3446 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 3447 3448 /* Nuke statistics and boundaries. */ 3449 memset(&ovm->vm_startcopy, 0, 3450 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3451 3452 3453 if (end & (vaddr_t)PAGE_MASK) { 3454 end += 1; 3455 if (end == 0) /* overflow */ 3456 end -= PAGE_SIZE; 3457 } 3458 3459 /* Setup new boundaries and populate map with entries. */ 3460 map->min_offset = start; 3461 map->max_offset = end; 3462 uvm_map_setup_entries(map); 3463 vm_map_unlock(map); 3464 3465 /* but keep MMU holes unavailable */ 3466 pmap_remove_holes(ovm); 3467 } else { 3468 /* 3469 * pr's vmspace is being shared, so we can't reuse 3470 * it for pr since it is still being used for others. 3471 * allocate a new vmspace for pr 3472 */ 3473 nvm = uvmspace_alloc(start, end, 3474 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3475 3476 /* install new vmspace and drop our ref to the old one. */ 3477 pmap_deactivate(p); 3478 p->p_vmspace = pr->ps_vmspace = nvm; 3479 pmap_activate(p); 3480 3481 uvmspace_free(ovm); 3482 } 3483 3484 /* Release dead entries */ 3485 uvm_unmap_detach(&dead_entries, 0); 3486 } 3487 3488 /* 3489 * uvmspace_free: free a vmspace data structure 3490 * 3491 * - XXX: no locking on vmspace 3492 */ 3493 void 3494 uvmspace_free(struct vmspace *vm) 3495 { 3496 if (--vm->vm_refcnt == 0) { 3497 /* 3498 * lock the map, to wait out all other references to it. delete 3499 * all of the mappings and pages they hold, then call the pmap 3500 * module to reclaim anything left. 3501 */ 3502 #ifdef SYSVSHM 3503 /* Get rid of any SYSV shared memory segments. */ 3504 if (vm->vm_shm != NULL) 3505 shmexit(vm); 3506 #endif 3507 3508 uvm_map_teardown(&vm->vm_map); 3509 pool_put(&uvm_vmspace_pool, vm); 3510 } 3511 } 3512 3513 /* 3514 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in 3515 * srcmap to the address range [dstaddr, dstaddr + sz) in 3516 * dstmap. 3517 * 3518 * The whole address range in srcmap must be backed by an object 3519 * (no holes). 3520 * 3521 * If successful, the address ranges share memory and the destination 3522 * address range uses the protection flags in prot. 3523 * 3524 * This routine assumes that sz is a multiple of PAGE_SIZE and 3525 * that dstaddr and srcaddr are page-aligned. 3526 */ 3527 int 3528 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot, 3529 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz) 3530 { 3531 int ret = 0; 3532 vaddr_t unmap_end; 3533 vaddr_t dstva; 3534 vsize_t off, len, n = sz; 3535 struct vm_map_entry *first = NULL, *last = NULL; 3536 struct vm_map_entry *src_entry, *psrc_entry = NULL; 3537 struct uvm_map_deadq dead; 3538 3539 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr) 3540 return EINVAL; 3541 3542 TAILQ_INIT(&dead); 3543 vm_map_lock(dstmap); 3544 vm_map_lock_read(srcmap); 3545 3546 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) { 3547 ret = ENOMEM; 3548 goto exit_unlock; 3549 } 3550 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) { 3551 ret = EINVAL; 3552 goto exit_unlock; 3553 } 3554 3555 unmap_end = dstaddr; 3556 for (; src_entry != NULL; 3557 psrc_entry = src_entry, 3558 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) { 3559 /* hole in address space, bail out */ 3560 if (psrc_entry != NULL && psrc_entry->end != src_entry->start) 3561 break; 3562 if (src_entry->start >= srcaddr + sz) 3563 break; 3564 3565 if (UVM_ET_ISSUBMAP(src_entry)) 3566 panic("uvm_share: encountered a submap (illegal)"); 3567 if (!UVM_ET_ISCOPYONWRITE(src_entry) && 3568 UVM_ET_ISNEEDSCOPY(src_entry)) 3569 panic("uvm_share: non-copy_on_write map entries " 3570 "marked needs_copy (illegal)"); 3571 3572 dstva = dstaddr; 3573 if (src_entry->start > srcaddr) { 3574 dstva += src_entry->start - srcaddr; 3575 off = 0; 3576 } else 3577 off = srcaddr - src_entry->start; 3578 3579 if (n < src_entry->end - src_entry->start) 3580 len = n; 3581 else 3582 len = src_entry->end - src_entry->start; 3583 n -= len; 3584 3585 if (uvm_mapent_share(dstmap, dstva, len, off, prot, prot, 3586 srcmap, src_entry, &dead) == NULL) 3587 break; 3588 3589 unmap_end = dstva + len; 3590 if (n == 0) 3591 goto exit_unlock; 3592 } 3593 3594 ret = EINVAL; 3595 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE); 3596 3597 exit_unlock: 3598 vm_map_unlock_read(srcmap); 3599 vm_map_unlock(dstmap); 3600 uvm_unmap_detach(&dead, 0); 3601 3602 return ret; 3603 } 3604 3605 /* 3606 * Clone map entry into other map. 3607 * 3608 * Mapping will be placed at dstaddr, for the same length. 3609 * Space must be available. 3610 * Reference counters are incremented. 3611 */ 3612 struct vm_map_entry * 3613 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3614 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, 3615 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3616 int mapent_flags, int amap_share_flags) 3617 { 3618 struct vm_map_entry *new_entry, *first, *last; 3619 3620 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3621 3622 /* Create new entry (linked in on creation). Fill in first, last. */ 3623 first = last = NULL; 3624 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3625 panic("uvmspace_fork: no space in map for " 3626 "entry in empty map"); 3627 } 3628 new_entry = uvm_map_mkentry(dstmap, first, last, 3629 dstaddr, dstlen, mapent_flags, dead, NULL); 3630 if (new_entry == NULL) 3631 return NULL; 3632 /* old_entry -> new_entry */ 3633 new_entry->object = old_entry->object; 3634 new_entry->offset = old_entry->offset; 3635 new_entry->aref = old_entry->aref; 3636 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3637 new_entry->protection = prot; 3638 new_entry->max_protection = maxprot; 3639 new_entry->inheritance = old_entry->inheritance; 3640 new_entry->advice = old_entry->advice; 3641 3642 /* gain reference to object backing the map (can't be a submap). */ 3643 if (new_entry->aref.ar_amap) { 3644 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3645 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3646 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3647 amap_share_flags); 3648 } 3649 3650 if (UVM_ET_ISOBJ(new_entry) && 3651 new_entry->object.uvm_obj->pgops->pgo_reference) { 3652 new_entry->offset += off; 3653 new_entry->object.uvm_obj->pgops->pgo_reference 3654 (new_entry->object.uvm_obj); 3655 } 3656 3657 return new_entry; 3658 } 3659 3660 struct vm_map_entry * 3661 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3662 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map, 3663 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3664 { 3665 /* 3666 * If old_entry refers to a copy-on-write region that has not yet been 3667 * written to (needs_copy flag is set), then we need to allocate a new 3668 * amap for old_entry. 3669 * 3670 * If we do not do this, and the process owning old_entry does a copy-on 3671 * write later, old_entry and new_entry will refer to different memory 3672 * regions, and the memory between the processes is no longer shared. 3673 * 3674 * [in other words, we need to clear needs_copy] 3675 */ 3676 3677 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3678 /* get our own amap, clears needs_copy */ 3679 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 3680 0, 0); 3681 /* XXXCDC: WAITOK??? */ 3682 } 3683 3684 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off, 3685 prot, maxprot, old_entry, dead, 0, AMAP_SHARED); 3686 } 3687 3688 /* 3689 * share the mapping: this means we want the old and 3690 * new entries to share amaps and backing objects. 3691 */ 3692 struct vm_map_entry * 3693 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3694 struct vm_map *old_map, 3695 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3696 { 3697 struct vm_map_entry *new_entry; 3698 3699 new_entry = uvm_mapent_share(new_map, old_entry->start, 3700 old_entry->end - old_entry->start, 0, old_entry->protection, 3701 old_entry->max_protection, old_map, old_entry, dead); 3702 3703 /* 3704 * pmap_copy the mappings: this routine is optional 3705 * but if it is there it will reduce the number of 3706 * page faults in the new proc. 3707 */ 3708 if (!UVM_ET_ISHOLE(new_entry)) 3709 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, 3710 (new_entry->end - new_entry->start), new_entry->start); 3711 3712 return (new_entry); 3713 } 3714 3715 /* 3716 * copy-on-write the mapping (using mmap's 3717 * MAP_PRIVATE semantics) 3718 * 3719 * allocate new_entry, adjust reference counts. 3720 * (note that new references are read-only). 3721 */ 3722 struct vm_map_entry * 3723 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3724 struct vm_map *old_map, 3725 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3726 { 3727 struct vm_map_entry *new_entry; 3728 boolean_t protect_child; 3729 3730 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3731 old_entry->end - old_entry->start, 0, old_entry->protection, 3732 old_entry->max_protection, old_entry, dead, 0, 0); 3733 3734 new_entry->etype |= 3735 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3736 3737 /* 3738 * the new entry will need an amap. it will either 3739 * need to be copied from the old entry or created 3740 * from scratch (if the old entry does not have an 3741 * amap). can we defer this process until later 3742 * (by setting "needs_copy") or do we need to copy 3743 * the amap now? 3744 * 3745 * we must copy the amap now if any of the following 3746 * conditions hold: 3747 * 1. the old entry has an amap and that amap is 3748 * being shared. this means that the old (parent) 3749 * process is sharing the amap with another 3750 * process. if we do not clear needs_copy here 3751 * we will end up in a situation where both the 3752 * parent and child process are referring to the 3753 * same amap with "needs_copy" set. if the 3754 * parent write-faults, the fault routine will 3755 * clear "needs_copy" in the parent by allocating 3756 * a new amap. this is wrong because the 3757 * parent is supposed to be sharing the old amap 3758 * and the new amap will break that. 3759 * 3760 * 2. if the old entry has an amap and a non-zero 3761 * wire count then we are going to have to call 3762 * amap_cow_now to avoid page faults in the 3763 * parent process. since amap_cow_now requires 3764 * "needs_copy" to be clear we might as well 3765 * clear it here as well. 3766 * 3767 */ 3768 if (old_entry->aref.ar_amap != NULL && 3769 ((amap_flags(old_entry->aref.ar_amap) & 3770 AMAP_SHARED) != 0 || 3771 VM_MAPENT_ISWIRED(old_entry))) { 3772 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3773 0, 0); 3774 /* XXXCDC: M_WAITOK ... ok? */ 3775 } 3776 3777 /* 3778 * if the parent's entry is wired down, then the 3779 * parent process does not want page faults on 3780 * access to that memory. this means that we 3781 * cannot do copy-on-write because we can't write 3782 * protect the old entry. in this case we 3783 * resolve all copy-on-write faults now, using 3784 * amap_cow_now. note that we have already 3785 * allocated any needed amap (above). 3786 */ 3787 if (VM_MAPENT_ISWIRED(old_entry)) { 3788 /* 3789 * resolve all copy-on-write faults now 3790 * (note that there is nothing to do if 3791 * the old mapping does not have an amap). 3792 * XXX: is it worthwhile to bother with 3793 * pmap_copy in this case? 3794 */ 3795 if (old_entry->aref.ar_amap) 3796 amap_cow_now(new_map, new_entry); 3797 } else { 3798 if (old_entry->aref.ar_amap) { 3799 /* 3800 * setup mappings to trigger copy-on-write faults 3801 * we must write-protect the parent if it has 3802 * an amap and it is not already "needs_copy"... 3803 * if it is already "needs_copy" then the parent 3804 * has already been write-protected by a previous 3805 * fork operation. 3806 * 3807 * if we do not write-protect the parent, then 3808 * we must be sure to write-protect the child 3809 * after the pmap_copy() operation. 3810 * 3811 * XXX: pmap_copy should have some way of telling 3812 * us that it didn't do anything so we can avoid 3813 * calling pmap_protect needlessly. 3814 */ 3815 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3816 if (old_entry->max_protection & PROT_WRITE) { 3817 pmap_protect(old_map->pmap, 3818 old_entry->start, 3819 old_entry->end, 3820 old_entry->protection & 3821 ~PROT_WRITE); 3822 pmap_update(old_map->pmap); 3823 } 3824 old_entry->etype |= UVM_ET_NEEDSCOPY; 3825 } 3826 3827 /* parent must now be write-protected */ 3828 protect_child = FALSE; 3829 } else { 3830 /* 3831 * we only need to protect the child if the 3832 * parent has write access. 3833 */ 3834 if (old_entry->max_protection & PROT_WRITE) 3835 protect_child = TRUE; 3836 else 3837 protect_child = FALSE; 3838 } 3839 /* 3840 * copy the mappings 3841 * XXX: need a way to tell if this does anything 3842 */ 3843 if (!UVM_ET_ISHOLE(new_entry)) 3844 pmap_copy(new_map->pmap, old_map->pmap, 3845 new_entry->start, 3846 (old_entry->end - old_entry->start), 3847 old_entry->start); 3848 3849 /* protect the child's mappings if necessary */ 3850 if (protect_child) { 3851 pmap_protect(new_map->pmap, new_entry->start, 3852 new_entry->end, 3853 new_entry->protection & 3854 ~PROT_WRITE); 3855 } 3856 } 3857 3858 return (new_entry); 3859 } 3860 3861 /* 3862 * zero the mapping: the new entry will be zero initialized 3863 */ 3864 struct vm_map_entry * 3865 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 3866 struct vm_map *old_map, 3867 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3868 { 3869 struct vm_map_entry *new_entry; 3870 3871 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3872 old_entry->end - old_entry->start, 0, old_entry->protection, 3873 old_entry->max_protection, old_entry, dead, 0, 0); 3874 3875 new_entry->etype |= 3876 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3877 3878 if (new_entry->aref.ar_amap) { 3879 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3880 atop(new_entry->end - new_entry->start), 0); 3881 new_entry->aref.ar_amap = NULL; 3882 new_entry->aref.ar_pageoff = 0; 3883 } 3884 3885 if (UVM_ET_ISOBJ(new_entry)) { 3886 if (new_entry->object.uvm_obj->pgops->pgo_detach) 3887 new_entry->object.uvm_obj->pgops->pgo_detach( 3888 new_entry->object.uvm_obj); 3889 new_entry->object.uvm_obj = NULL; 3890 new_entry->etype &= ~UVM_ET_OBJ; 3891 } 3892 3893 return (new_entry); 3894 } 3895 3896 /* 3897 * uvmspace_fork: fork a process' main map 3898 * 3899 * => create a new vmspace for child process from parent. 3900 * => parent's map must not be locked. 3901 */ 3902 struct vmspace * 3903 uvmspace_fork(struct process *pr) 3904 { 3905 struct vmspace *vm1 = pr->ps_vmspace; 3906 struct vmspace *vm2; 3907 struct vm_map *old_map = &vm1->vm_map; 3908 struct vm_map *new_map; 3909 struct vm_map_entry *old_entry, *new_entry; 3910 struct uvm_map_deadq dead; 3911 3912 vm_map_lock(old_map); 3913 3914 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3915 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3916 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3917 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3918 vm2->vm_dused = 0; /* Statistic managed by us. */ 3919 new_map = &vm2->vm_map; 3920 vm_map_lock(new_map); 3921 3922 /* go entry-by-entry */ 3923 TAILQ_INIT(&dead); 3924 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 3925 if (old_entry->start == old_entry->end) 3926 continue; 3927 3928 /* first, some sanity checks on the old entry */ 3929 if (UVM_ET_ISSUBMAP(old_entry)) { 3930 panic("fork: encountered a submap during fork " 3931 "(illegal)"); 3932 } 3933 3934 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 3935 UVM_ET_ISNEEDSCOPY(old_entry)) { 3936 panic("fork: non-copy_on_write map entry marked " 3937 "needs_copy (illegal)"); 3938 } 3939 3940 /* Apply inheritance. */ 3941 switch (old_entry->inheritance) { 3942 case MAP_INHERIT_SHARE: 3943 new_entry = uvm_mapent_forkshared(vm2, new_map, 3944 old_map, old_entry, &dead); 3945 break; 3946 case MAP_INHERIT_COPY: 3947 new_entry = uvm_mapent_forkcopy(vm2, new_map, 3948 old_map, old_entry, &dead); 3949 break; 3950 case MAP_INHERIT_ZERO: 3951 new_entry = uvm_mapent_forkzero(vm2, new_map, 3952 old_map, old_entry, &dead); 3953 break; 3954 default: 3955 continue; 3956 } 3957 3958 /* Update process statistics. */ 3959 if (!UVM_ET_ISHOLE(new_entry)) 3960 new_map->size += new_entry->end - new_entry->start; 3961 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { 3962 vm2->vm_dused += uvmspace_dused( 3963 new_map, new_entry->start, new_entry->end); 3964 } 3965 } 3966 3967 vm_map_unlock(old_map); 3968 vm_map_unlock(new_map); 3969 3970 /* 3971 * This can actually happen, if multiple entries described a 3972 * space in which an entry was inherited. 3973 */ 3974 uvm_unmap_detach(&dead, 0); 3975 3976 #ifdef SYSVSHM 3977 if (vm1->vm_shm) 3978 shmfork(vm1, vm2); 3979 #endif 3980 3981 return vm2; 3982 } 3983 3984 /* 3985 * uvm_map_hint: return the beginning of the best area suitable for 3986 * creating a new mapping with "prot" protection. 3987 */ 3988 vaddr_t 3989 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr, 3990 vaddr_t maxaddr) 3991 { 3992 vaddr_t addr; 3993 vaddr_t spacing; 3994 3995 #ifdef __i386__ 3996 /* 3997 * If executable skip first two pages, otherwise start 3998 * after data + heap region. 3999 */ 4000 if ((prot & PROT_EXEC) != 0 && 4001 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 4002 addr = (PAGE_SIZE*2) + 4003 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 4004 return (round_page(addr)); 4005 } 4006 #endif 4007 4008 #if defined (__LP64__) 4009 spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4010 #else 4011 spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4012 #endif 4013 4014 /* 4015 * Start malloc/mmap after the brk. 4016 */ 4017 addr = (vaddr_t)vm->vm_daddr + BRKSIZ; 4018 addr = MAX(addr, minaddr); 4019 4020 if (addr < maxaddr) { 4021 while (spacing > maxaddr - addr) 4022 spacing >>= 1; 4023 } 4024 addr += arc4random() & spacing; 4025 return (round_page(addr)); 4026 } 4027 4028 /* 4029 * uvm_map_submap: punch down part of a map into a submap 4030 * 4031 * => only the kernel_map is allowed to be submapped 4032 * => the purpose of submapping is to break up the locking granularity 4033 * of a larger map 4034 * => the range specified must have been mapped previously with a uvm_map() 4035 * call [with uobj==NULL] to create a blank map entry in the main map. 4036 * [And it had better still be blank!] 4037 * => maps which contain submaps should never be copied or forked. 4038 * => to remove a submap, use uvm_unmap() on the main map 4039 * and then uvm_map_deallocate() the submap. 4040 * => main map must be unlocked. 4041 * => submap must have been init'd and have a zero reference count. 4042 * [need not be locked as we don't actually reference it] 4043 */ 4044 int 4045 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 4046 struct vm_map *submap) 4047 { 4048 struct vm_map_entry *entry; 4049 int result; 4050 4051 if (start > map->max_offset || end > map->max_offset || 4052 start < map->min_offset || end < map->min_offset) 4053 return EINVAL; 4054 4055 vm_map_lock(map); 4056 4057 if (uvm_map_lookup_entry(map, start, &entry)) { 4058 UVM_MAP_CLIP_START(map, entry, start); 4059 UVM_MAP_CLIP_END(map, entry, end); 4060 } else 4061 entry = NULL; 4062 4063 if (entry != NULL && 4064 entry->start == start && entry->end == end && 4065 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 4066 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 4067 entry->etype |= UVM_ET_SUBMAP; 4068 entry->object.sub_map = submap; 4069 entry->offset = 0; 4070 uvm_map_reference(submap); 4071 result = 0; 4072 } else 4073 result = EINVAL; 4074 4075 vm_map_unlock(map); 4076 return(result); 4077 } 4078 4079 /* 4080 * uvm_map_checkprot: check protection in map 4081 * 4082 * => must allow specific protection in a fully allocated region. 4083 * => map mut be read or write locked by caller. 4084 */ 4085 boolean_t 4086 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 4087 vm_prot_t protection) 4088 { 4089 struct vm_map_entry *entry; 4090 4091 if (start < map->min_offset || end > map->max_offset || start > end) 4092 return FALSE; 4093 if (start == end) 4094 return TRUE; 4095 4096 /* 4097 * Iterate entries. 4098 */ 4099 for (entry = uvm_map_entrybyaddr(&map->addr, start); 4100 entry != NULL && entry->start < end; 4101 entry = RBT_NEXT(uvm_map_addr, entry)) { 4102 /* Fail if a hole is found. */ 4103 if (UVM_ET_ISHOLE(entry) || 4104 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 4105 return FALSE; 4106 4107 /* Check protection. */ 4108 if ((entry->protection & protection) != protection) 4109 return FALSE; 4110 } 4111 return TRUE; 4112 } 4113 4114 /* 4115 * uvm_map_create: create map 4116 */ 4117 vm_map_t 4118 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 4119 { 4120 vm_map_t map; 4121 4122 map = malloc(sizeof *map, M_VMMAP, M_WAITOK); 4123 map->pmap = pmap; 4124 uvm_map_setup(map, min, max, flags); 4125 return (map); 4126 } 4127 4128 /* 4129 * uvm_map_deallocate: drop reference to a map 4130 * 4131 * => caller must not lock map 4132 * => we will zap map if ref count goes to zero 4133 */ 4134 void 4135 uvm_map_deallocate(vm_map_t map) 4136 { 4137 int c; 4138 struct uvm_map_deadq dead; 4139 4140 c = --map->ref_count; 4141 if (c > 0) { 4142 return; 4143 } 4144 4145 /* 4146 * all references gone. unmap and free. 4147 * 4148 * No lock required: we are only one to access this map. 4149 */ 4150 TAILQ_INIT(&dead); 4151 uvm_tree_sanity(map, __FILE__, __LINE__); 4152 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 4153 TRUE, FALSE); 4154 pmap_destroy(map->pmap); 4155 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 4156 free(map, M_VMMAP, sizeof *map); 4157 4158 uvm_unmap_detach(&dead, 0); 4159 } 4160 4161 /* 4162 * uvm_map_inherit: set inheritance code for range of addrs in map. 4163 * 4164 * => map must be unlocked 4165 * => note that the inherit code is used during a "fork". see fork 4166 * code for details. 4167 */ 4168 int 4169 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 4170 vm_inherit_t new_inheritance) 4171 { 4172 struct vm_map_entry *entry; 4173 4174 switch (new_inheritance) { 4175 case MAP_INHERIT_NONE: 4176 case MAP_INHERIT_COPY: 4177 case MAP_INHERIT_SHARE: 4178 case MAP_INHERIT_ZERO: 4179 break; 4180 default: 4181 return (EINVAL); 4182 } 4183 4184 if (start > end) 4185 return EINVAL; 4186 start = MAX(start, map->min_offset); 4187 end = MIN(end, map->max_offset); 4188 if (start >= end) 4189 return 0; 4190 4191 vm_map_lock(map); 4192 4193 entry = uvm_map_entrybyaddr(&map->addr, start); 4194 if (entry->end > start) 4195 UVM_MAP_CLIP_START(map, entry, start); 4196 else 4197 entry = RBT_NEXT(uvm_map_addr, entry); 4198 4199 while (entry != NULL && entry->start < end) { 4200 UVM_MAP_CLIP_END(map, entry, end); 4201 entry->inheritance = new_inheritance; 4202 entry = RBT_NEXT(uvm_map_addr, entry); 4203 } 4204 4205 vm_map_unlock(map); 4206 return (0); 4207 } 4208 4209 /* 4210 * uvm_map_advice: set advice code for range of addrs in map. 4211 * 4212 * => map must be unlocked 4213 */ 4214 int 4215 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 4216 { 4217 struct vm_map_entry *entry; 4218 4219 switch (new_advice) { 4220 case MADV_NORMAL: 4221 case MADV_RANDOM: 4222 case MADV_SEQUENTIAL: 4223 break; 4224 default: 4225 return (EINVAL); 4226 } 4227 4228 if (start > end) 4229 return EINVAL; 4230 start = MAX(start, map->min_offset); 4231 end = MIN(end, map->max_offset); 4232 if (start >= end) 4233 return 0; 4234 4235 vm_map_lock(map); 4236 4237 entry = uvm_map_entrybyaddr(&map->addr, start); 4238 if (entry != NULL && entry->end > start) 4239 UVM_MAP_CLIP_START(map, entry, start); 4240 else if (entry!= NULL) 4241 entry = RBT_NEXT(uvm_map_addr, entry); 4242 4243 /* 4244 * XXXJRT: disallow holes? 4245 */ 4246 while (entry != NULL && entry->start < end) { 4247 UVM_MAP_CLIP_END(map, entry, end); 4248 entry->advice = new_advice; 4249 entry = RBT_NEXT(uvm_map_addr, entry); 4250 } 4251 4252 vm_map_unlock(map); 4253 return (0); 4254 } 4255 4256 /* 4257 * uvm_map_extract: extract a mapping from a map and put it somewhere 4258 * in the kernel_map, setting protection to max_prot. 4259 * 4260 * => map should be unlocked (we will write lock it and kernel_map) 4261 * => returns 0 on success, error code otherwise 4262 * => start must be page aligned 4263 * => len must be page sized 4264 * => flags: 4265 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 4266 * Mappings are QREF's. 4267 */ 4268 int 4269 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 4270 vaddr_t *dstaddrp, int flags) 4271 { 4272 struct uvm_map_deadq dead; 4273 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 4274 vaddr_t dstaddr; 4275 vaddr_t end; 4276 vaddr_t cp_start; 4277 vsize_t cp_len, cp_off; 4278 int error; 4279 4280 TAILQ_INIT(&dead); 4281 end = start + len; 4282 4283 /* 4284 * Sanity check on the parameters. 4285 * Also, since the mapping may not contain gaps, error out if the 4286 * mapped area is not in source map. 4287 */ 4288 if ((start & (vaddr_t)PAGE_MASK) != 0 || 4289 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 4290 return EINVAL; 4291 if (start < srcmap->min_offset || end > srcmap->max_offset) 4292 return EINVAL; 4293 4294 /* Initialize dead entries. Handle len == 0 case. */ 4295 if (len == 0) 4296 return 0; 4297 4298 /* Acquire lock on srcmap. */ 4299 vm_map_lock(srcmap); 4300 4301 /* Lock srcmap, lookup first and last entry in <start,len>. */ 4302 first = uvm_map_entrybyaddr(&srcmap->addr, start); 4303 4304 /* Check that the range is contiguous. */ 4305 for (entry = first; entry != NULL && entry->end < end; 4306 entry = RBT_NEXT(uvm_map_addr, entry)) { 4307 if (VMMAP_FREE_END(entry) != entry->end || 4308 UVM_ET_ISHOLE(entry)) { 4309 error = EINVAL; 4310 goto fail; 4311 } 4312 } 4313 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 4314 error = EINVAL; 4315 goto fail; 4316 } 4317 4318 /* 4319 * Handle need-copy flag. 4320 */ 4321 for (entry = first; entry != NULL && entry->start < end; 4322 entry = RBT_NEXT(uvm_map_addr, entry)) { 4323 if (UVM_ET_ISNEEDSCOPY(entry)) 4324 amap_copy(srcmap, entry, M_NOWAIT, 4325 UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end); 4326 if (UVM_ET_ISNEEDSCOPY(entry)) { 4327 /* 4328 * amap_copy failure 4329 */ 4330 error = ENOMEM; 4331 goto fail; 4332 } 4333 } 4334 4335 /* Lock destination map (kernel_map). */ 4336 vm_map_lock(kernel_map); 4337 4338 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 4339 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 4340 PROT_NONE, 0) != 0) { 4341 error = ENOMEM; 4342 goto fail2; 4343 } 4344 *dstaddrp = dstaddr; 4345 4346 /* 4347 * We now have srcmap and kernel_map locked. 4348 * dstaddr contains the destination offset in dstmap. 4349 */ 4350 /* step 1: start looping through map entries, performing extraction. */ 4351 for (entry = first; entry != NULL && entry->start < end; 4352 entry = RBT_NEXT(uvm_map_addr, entry)) { 4353 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 4354 if (UVM_ET_ISHOLE(entry)) 4355 continue; 4356 4357 /* Calculate uvm_mapent_clone parameters. */ 4358 cp_start = entry->start; 4359 if (cp_start < start) { 4360 cp_off = start - cp_start; 4361 cp_start = start; 4362 } else 4363 cp_off = 0; 4364 cp_len = MIN(entry->end, end) - cp_start; 4365 4366 newentry = uvm_mapent_clone(kernel_map, 4367 cp_start - start + dstaddr, cp_len, cp_off, 4368 entry->protection, entry->max_protection, 4369 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4370 if (newentry == NULL) { 4371 error = ENOMEM; 4372 goto fail2_unmap; 4373 } 4374 kernel_map->size += cp_len; 4375 if (flags & UVM_EXTRACT_FIXPROT) 4376 newentry->protection = newentry->max_protection; 4377 4378 /* 4379 * Step 2: perform pmap copy. 4380 * (Doing this in the loop saves one RB traversal.) 4381 */ 4382 pmap_copy(kernel_map->pmap, srcmap->pmap, 4383 cp_start - start + dstaddr, cp_len, cp_start); 4384 } 4385 pmap_update(kernel_map->pmap); 4386 4387 error = 0; 4388 4389 /* Unmap copied entries on failure. */ 4390 fail2_unmap: 4391 if (error) { 4392 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4393 FALSE, TRUE); 4394 } 4395 4396 /* Release maps, release dead entries. */ 4397 fail2: 4398 vm_map_unlock(kernel_map); 4399 4400 fail: 4401 vm_map_unlock(srcmap); 4402 4403 uvm_unmap_detach(&dead, 0); 4404 4405 return error; 4406 } 4407 4408 /* 4409 * uvm_map_clean: clean out a map range 4410 * 4411 * => valid flags: 4412 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4413 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4414 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4415 * if (flags & PGO_FREE): any cached pages are freed after clean 4416 * => returns an error if any part of the specified range isn't mapped 4417 * => never a need to flush amap layer since the anonymous memory has 4418 * no permanent home, but may deactivate pages there 4419 * => called from sys_msync() and sys_madvise() 4420 * => caller must not write-lock map (read OK). 4421 * => we may sleep while cleaning if SYNCIO [with map read-locked] 4422 */ 4423 4424 int 4425 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4426 { 4427 struct vm_map_entry *first, *entry; 4428 struct vm_amap *amap; 4429 struct vm_anon *anon; 4430 struct vm_page *pg; 4431 struct uvm_object *uobj; 4432 vaddr_t cp_start, cp_end; 4433 int refs; 4434 int error; 4435 boolean_t rv; 4436 4437 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4438 (PGO_FREE|PGO_DEACTIVATE)); 4439 4440 if (start > end || start < map->min_offset || end > map->max_offset) 4441 return EINVAL; 4442 4443 vm_map_lock_read(map); 4444 first = uvm_map_entrybyaddr(&map->addr, start); 4445 4446 /* Make a first pass to check for holes. */ 4447 for (entry = first; entry != NULL && entry->start < end; 4448 entry = RBT_NEXT(uvm_map_addr, entry)) { 4449 if (UVM_ET_ISSUBMAP(entry)) { 4450 vm_map_unlock_read(map); 4451 return EINVAL; 4452 } 4453 if (UVM_ET_ISSUBMAP(entry) || 4454 UVM_ET_ISHOLE(entry) || 4455 (entry->end < end && 4456 VMMAP_FREE_END(entry) != entry->end)) { 4457 vm_map_unlock_read(map); 4458 return EFAULT; 4459 } 4460 } 4461 4462 error = 0; 4463 for (entry = first; entry != NULL && entry->start < end; 4464 entry = RBT_NEXT(uvm_map_addr, entry)) { 4465 amap = entry->aref.ar_amap; /* top layer */ 4466 if (UVM_ET_ISOBJ(entry)) 4467 uobj = entry->object.uvm_obj; 4468 else 4469 uobj = NULL; 4470 4471 /* 4472 * No amap cleaning necessary if: 4473 * - there's no amap 4474 * - we're not deactivating or freeing pages. 4475 */ 4476 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4477 goto flush_object; 4478 4479 cp_start = MAX(entry->start, start); 4480 cp_end = MIN(entry->end, end); 4481 4482 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4483 anon = amap_lookup(&entry->aref, 4484 cp_start - entry->start); 4485 if (anon == NULL) 4486 continue; 4487 4488 pg = anon->an_page; 4489 if (pg == NULL) { 4490 continue; 4491 } 4492 KASSERT(pg->pg_flags & PQ_ANON); 4493 4494 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4495 /* 4496 * XXX In these first 3 cases, we always just 4497 * XXX deactivate the page. We may want to 4498 * XXX handle the different cases more 4499 * XXX specifically, in the future. 4500 */ 4501 case PGO_CLEANIT|PGO_FREE: 4502 case PGO_CLEANIT|PGO_DEACTIVATE: 4503 case PGO_DEACTIVATE: 4504 deactivate_it: 4505 /* skip the page if it's wired */ 4506 if (pg->wire_count != 0) 4507 break; 4508 4509 uvm_lock_pageq(); 4510 4511 KASSERT(pg->uanon == anon); 4512 4513 /* zap all mappings for the page. */ 4514 pmap_page_protect(pg, PROT_NONE); 4515 4516 /* ...and deactivate the page. */ 4517 uvm_pagedeactivate(pg); 4518 4519 uvm_unlock_pageq(); 4520 break; 4521 case PGO_FREE: 4522 /* 4523 * If there are multiple references to 4524 * the amap, just deactivate the page. 4525 */ 4526 if (amap_refs(amap) > 1) 4527 goto deactivate_it; 4528 4529 /* XXX skip the page if it's wired */ 4530 if (pg->wire_count != 0) { 4531 break; 4532 } 4533 amap_unadd(&entry->aref, 4534 cp_start - entry->start); 4535 refs = --anon->an_ref; 4536 if (refs == 0) 4537 uvm_anfree(anon); 4538 break; 4539 default: 4540 panic("uvm_map_clean: weird flags"); 4541 } 4542 } 4543 4544 flush_object: 4545 cp_start = MAX(entry->start, start); 4546 cp_end = MIN(entry->end, end); 4547 4548 /* 4549 * flush pages if we've got a valid backing object. 4550 * 4551 * Don't PGO_FREE if we don't have write permission 4552 * and don't flush if this is a copy-on-write object 4553 * since we can't know our permissions on it. 4554 */ 4555 if (uobj != NULL && 4556 ((flags & PGO_FREE) == 0 || 4557 ((entry->max_protection & PROT_WRITE) != 0 && 4558 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4559 rv = uobj->pgops->pgo_flush(uobj, 4560 cp_start - entry->start + entry->offset, 4561 cp_end - entry->start + entry->offset, flags); 4562 4563 if (rv == FALSE) 4564 error = EFAULT; 4565 } 4566 } 4567 4568 vm_map_unlock_read(map); 4569 return error; 4570 } 4571 4572 /* 4573 * UVM_MAP_CLIP_END implementation 4574 */ 4575 void 4576 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4577 { 4578 struct vm_map_entry *tmp; 4579 4580 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4581 tmp = uvm_mapent_alloc(map, 0); 4582 4583 /* Invoke splitentry. */ 4584 uvm_map_splitentry(map, entry, tmp, addr); 4585 } 4586 4587 /* 4588 * UVM_MAP_CLIP_START implementation 4589 * 4590 * Clippers are required to not change the pointers to the entry they are 4591 * clipping on. 4592 * Since uvm_map_splitentry turns the original entry into the lowest 4593 * entry (address wise) we do a swap between the new entry and the original 4594 * entry, prior to calling uvm_map_splitentry. 4595 */ 4596 void 4597 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4598 { 4599 struct vm_map_entry *tmp; 4600 struct uvm_addr_state *free; 4601 4602 /* Unlink original. */ 4603 free = uvm_map_uaddr_e(map, entry); 4604 uvm_mapent_free_remove(map, free, entry); 4605 uvm_mapent_addr_remove(map, entry); 4606 4607 /* Copy entry. */ 4608 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4609 tmp = uvm_mapent_alloc(map, 0); 4610 uvm_mapent_copy(entry, tmp); 4611 4612 /* Put new entry in place of original entry. */ 4613 uvm_mapent_addr_insert(map, tmp); 4614 uvm_mapent_free_insert(map, free, tmp); 4615 4616 /* Invoke splitentry. */ 4617 uvm_map_splitentry(map, tmp, entry, addr); 4618 } 4619 4620 /* 4621 * Boundary fixer. 4622 */ 4623 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4624 static __inline vaddr_t 4625 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4626 { 4627 return (min < bound && max > bound) ? bound : max; 4628 } 4629 4630 /* 4631 * Choose free list based on address at start of free space. 4632 * 4633 * The uvm_addr_state returned contains addr and is the first of: 4634 * - uaddr_exe 4635 * - uaddr_brk_stack 4636 * - uaddr_any 4637 */ 4638 struct uvm_addr_state* 4639 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4640 { 4641 struct uvm_addr_state *uaddr; 4642 int i; 4643 4644 /* Special case the first page, to prevent mmap from returning 0. */ 4645 if (addr < VMMAP_MIN_ADDR) 4646 return NULL; 4647 4648 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4649 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4650 if (addr >= uvm_maxkaddr) 4651 return NULL; 4652 } 4653 4654 /* Is the address inside the exe-only map? */ 4655 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4656 addr < map->uaddr_exe->uaddr_maxaddr) 4657 return map->uaddr_exe; 4658 4659 /* Check if the space falls inside brk/stack area. */ 4660 if ((addr >= map->b_start && addr < map->b_end) || 4661 (addr >= map->s_start && addr < map->s_end)) { 4662 if (map->uaddr_brk_stack != NULL && 4663 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4664 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4665 return map->uaddr_brk_stack; 4666 } else 4667 return NULL; 4668 } 4669 4670 /* 4671 * Check the other selectors. 4672 * 4673 * These selectors are only marked as the owner, if they have insert 4674 * functions. 4675 */ 4676 for (i = 0; i < nitems(map->uaddr_any); i++) { 4677 uaddr = map->uaddr_any[i]; 4678 if (uaddr == NULL) 4679 continue; 4680 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4681 continue; 4682 4683 if (addr >= uaddr->uaddr_minaddr && 4684 addr < uaddr->uaddr_maxaddr) 4685 return uaddr; 4686 } 4687 4688 return NULL; 4689 } 4690 4691 /* 4692 * Choose free list based on address at start of free space. 4693 * 4694 * The uvm_addr_state returned contains addr and is the first of: 4695 * - uaddr_exe 4696 * - uaddr_brk_stack 4697 * - uaddr_any 4698 */ 4699 struct uvm_addr_state* 4700 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4701 { 4702 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4703 } 4704 4705 /* 4706 * Returns the first free-memory boundary that is crossed by [min-max]. 4707 */ 4708 vsize_t 4709 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4710 { 4711 struct uvm_addr_state *uaddr; 4712 int i; 4713 4714 /* Never return first page. */ 4715 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4716 4717 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4718 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4719 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4720 4721 /* Check for exe-only boundaries. */ 4722 if (map->uaddr_exe != NULL) { 4723 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4724 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4725 } 4726 4727 /* Check for exe-only boundaries. */ 4728 if (map->uaddr_brk_stack != NULL) { 4729 max = uvm_map_boundfix(min, max, 4730 map->uaddr_brk_stack->uaddr_minaddr); 4731 max = uvm_map_boundfix(min, max, 4732 map->uaddr_brk_stack->uaddr_maxaddr); 4733 } 4734 4735 /* Check other boundaries. */ 4736 for (i = 0; i < nitems(map->uaddr_any); i++) { 4737 uaddr = map->uaddr_any[i]; 4738 if (uaddr != NULL) { 4739 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4740 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4741 } 4742 } 4743 4744 /* Boundaries at stack and brk() area. */ 4745 max = uvm_map_boundfix(min, max, map->s_start); 4746 max = uvm_map_boundfix(min, max, map->s_end); 4747 max = uvm_map_boundfix(min, max, map->b_start); 4748 max = uvm_map_boundfix(min, max, map->b_end); 4749 4750 return max; 4751 } 4752 4753 /* 4754 * Update map allocation start and end addresses from proc vmspace. 4755 */ 4756 void 4757 uvm_map_vmspace_update(struct vm_map *map, 4758 struct uvm_map_deadq *dead, int flags) 4759 { 4760 struct vmspace *vm; 4761 vaddr_t b_start, b_end, s_start, s_end; 4762 4763 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4764 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4765 4766 /* 4767 * Derive actual allocation boundaries from vmspace. 4768 */ 4769 vm = (struct vmspace *)map; 4770 b_start = (vaddr_t)vm->vm_daddr; 4771 b_end = b_start + BRKSIZ; 4772 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4773 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4774 #ifdef DIAGNOSTIC 4775 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4776 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4777 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4778 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4779 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4780 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4781 vm, b_start, b_end, s_start, s_end); 4782 } 4783 #endif 4784 4785 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4786 map->s_start == s_start && map->s_end == s_end)) 4787 return; 4788 4789 uvm_map_freelist_update(map, dead, b_start, b_end, 4790 s_start, s_end, flags); 4791 } 4792 4793 /* 4794 * Grow kernel memory. 4795 * 4796 * This function is only called for kernel maps when an allocation fails. 4797 * 4798 * If the map has a gap that is large enough to accommodate alloc_sz, this 4799 * function will make sure map->free will include it. 4800 */ 4801 void 4802 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 4803 vsize_t alloc_sz, int flags) 4804 { 4805 vsize_t sz; 4806 vaddr_t end; 4807 struct vm_map_entry *entry; 4808 4809 /* Kernel memory only. */ 4810 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 4811 /* Destroy free list. */ 4812 uvm_map_freelist_update_clear(map, dead); 4813 4814 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 4815 if (map->flags & VM_MAP_GUARDPAGES) 4816 alloc_sz += PAGE_SIZE; 4817 4818 /* 4819 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 4820 * 4821 * Don't handle the case where the multiplication overflows: 4822 * if that happens, the allocation is probably too big anyway. 4823 */ 4824 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 4825 4826 /* 4827 * Walk forward until a gap large enough for alloc_sz shows up. 4828 * 4829 * We assume the kernel map has no boundaries. 4830 * uvm_maxkaddr may be zero. 4831 */ 4832 end = MAX(uvm_maxkaddr, map->min_offset); 4833 entry = uvm_map_entrybyaddr(&map->addr, end); 4834 while (entry && entry->fspace < alloc_sz) 4835 entry = RBT_NEXT(uvm_map_addr, entry); 4836 if (entry) { 4837 end = MAX(VMMAP_FREE_START(entry), end); 4838 end += MIN(sz, map->max_offset - end); 4839 } else 4840 end = map->max_offset; 4841 4842 /* Reserve pmap entries. */ 4843 #ifdef PMAP_GROWKERNEL 4844 uvm_maxkaddr = pmap_growkernel(end); 4845 #else 4846 uvm_maxkaddr = MAX(uvm_maxkaddr, end); 4847 #endif 4848 4849 /* Rebuild free list. */ 4850 uvm_map_freelist_update_refill(map, flags); 4851 } 4852 4853 /* 4854 * Freelist update subfunction: unlink all entries from freelists. 4855 */ 4856 void 4857 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 4858 { 4859 struct uvm_addr_state *free; 4860 struct vm_map_entry *entry, *prev, *next; 4861 4862 prev = NULL; 4863 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL; 4864 entry = next) { 4865 next = RBT_NEXT(uvm_map_addr, entry); 4866 4867 free = uvm_map_uaddr_e(map, entry); 4868 uvm_mapent_free_remove(map, free, entry); 4869 4870 if (prev != NULL && entry->start == entry->end) { 4871 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 4872 uvm_mapent_addr_remove(map, entry); 4873 DEAD_ENTRY_PUSH(dead, entry); 4874 } else 4875 prev = entry; 4876 } 4877 } 4878 4879 /* 4880 * Freelist update subfunction: refill the freelists with entries. 4881 */ 4882 void 4883 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 4884 { 4885 struct vm_map_entry *entry; 4886 vaddr_t min, max; 4887 4888 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 4889 min = VMMAP_FREE_START(entry); 4890 max = VMMAP_FREE_END(entry); 4891 entry->fspace = 0; 4892 4893 entry = uvm_map_fix_space(map, entry, min, max, flags); 4894 } 4895 4896 uvm_tree_sanity(map, __FILE__, __LINE__); 4897 } 4898 4899 /* 4900 * Change {a,b}_{start,end} allocation ranges and associated free lists. 4901 */ 4902 void 4903 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 4904 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 4905 { 4906 KDASSERT(b_end >= b_start && s_end >= s_start); 4907 4908 /* Clear all free lists. */ 4909 uvm_map_freelist_update_clear(map, dead); 4910 4911 /* Apply new bounds. */ 4912 map->b_start = b_start; 4913 map->b_end = b_end; 4914 map->s_start = s_start; 4915 map->s_end = s_end; 4916 4917 /* Refill free lists. */ 4918 uvm_map_freelist_update_refill(map, flags); 4919 } 4920 4921 /* 4922 * Assign a uvm_addr_state to the specified pointer in vm_map. 4923 * 4924 * May sleep. 4925 */ 4926 void 4927 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 4928 struct uvm_addr_state *newval) 4929 { 4930 struct uvm_map_deadq dead; 4931 4932 /* Pointer which must be in this map. */ 4933 KASSERT(which != NULL); 4934 KASSERT((void*)map <= (void*)(which) && 4935 (void*)(which) < (void*)(map + 1)); 4936 4937 vm_map_lock(map); 4938 TAILQ_INIT(&dead); 4939 uvm_map_freelist_update_clear(map, &dead); 4940 4941 uvm_addr_destroy(*which); 4942 *which = newval; 4943 4944 uvm_map_freelist_update_refill(map, 0); 4945 vm_map_unlock(map); 4946 uvm_unmap_detach(&dead, 0); 4947 } 4948 4949 /* 4950 * Correct space insert. 4951 * 4952 * Entry must not be on any freelist. 4953 */ 4954 struct vm_map_entry* 4955 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 4956 vaddr_t min, vaddr_t max, int flags) 4957 { 4958 struct uvm_addr_state *free, *entfree; 4959 vaddr_t lmax; 4960 4961 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 4962 KDASSERT(min <= max); 4963 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 4964 min == map->min_offset); 4965 4966 /* 4967 * During the function, entfree will always point at the uaddr state 4968 * for entry. 4969 */ 4970 entfree = (entry == NULL ? NULL : 4971 uvm_map_uaddr_e(map, entry)); 4972 4973 while (min != max) { 4974 /* Claim guard page for entry. */ 4975 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 4976 VMMAP_FREE_END(entry) == entry->end && 4977 entry->start != entry->end) { 4978 if (max - min == 2 * PAGE_SIZE) { 4979 /* 4980 * If the free-space gap is exactly 2 pages, 4981 * we make the guard 2 pages instead of 1. 4982 * Because in a guarded map, an area needs 4983 * at least 2 pages to allocate from: 4984 * one page for the allocation and one for 4985 * the guard. 4986 */ 4987 entry->guard = 2 * PAGE_SIZE; 4988 min = max; 4989 } else { 4990 entry->guard = PAGE_SIZE; 4991 min += PAGE_SIZE; 4992 } 4993 continue; 4994 } 4995 4996 /* 4997 * Handle the case where entry has a 2-page guard, but the 4998 * space after entry is freed. 4999 */ 5000 if (entry != NULL && entry->fspace == 0 && 5001 entry->guard > PAGE_SIZE) { 5002 entry->guard = PAGE_SIZE; 5003 min = VMMAP_FREE_START(entry); 5004 } 5005 5006 lmax = uvm_map_boundary(map, min, max); 5007 free = uvm_map_uaddr(map, min); 5008 5009 /* 5010 * Entries are merged if they point at the same uvm_free(). 5011 * Exception to that rule: if min == uvm_maxkaddr, a new 5012 * entry is started regardless (otherwise the allocators 5013 * will get confused). 5014 */ 5015 if (entry != NULL && free == entfree && 5016 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 5017 min == uvm_maxkaddr)) { 5018 KDASSERT(VMMAP_FREE_END(entry) == min); 5019 entry->fspace += lmax - min; 5020 } else { 5021 /* 5022 * Commit entry to free list: it'll not be added to 5023 * anymore. 5024 * We'll start a new entry and add to that entry 5025 * instead. 5026 */ 5027 if (entry != NULL) 5028 uvm_mapent_free_insert(map, entfree, entry); 5029 5030 /* New entry for new uaddr. */ 5031 entry = uvm_mapent_alloc(map, flags); 5032 KDASSERT(entry != NULL); 5033 entry->end = entry->start = min; 5034 entry->guard = 0; 5035 entry->fspace = lmax - min; 5036 entry->object.uvm_obj = NULL; 5037 entry->offset = 0; 5038 entry->etype = 0; 5039 entry->protection = entry->max_protection = 0; 5040 entry->inheritance = 0; 5041 entry->wired_count = 0; 5042 entry->advice = 0; 5043 entry->aref.ar_pageoff = 0; 5044 entry->aref.ar_amap = NULL; 5045 uvm_mapent_addr_insert(map, entry); 5046 5047 entfree = free; 5048 } 5049 5050 min = lmax; 5051 } 5052 /* Finally put entry on the uaddr state. */ 5053 if (entry != NULL) 5054 uvm_mapent_free_insert(map, entfree, entry); 5055 5056 return entry; 5057 } 5058 5059 /* 5060 * MQuery style of allocation. 5061 * 5062 * This allocator searches forward until sufficient space is found to map 5063 * the given size. 5064 * 5065 * XXX: factor in offset (via pmap_prefer) and protection? 5066 */ 5067 int 5068 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 5069 int flags) 5070 { 5071 struct vm_map_entry *entry, *last; 5072 vaddr_t addr; 5073 vaddr_t tmp, pmap_align, pmap_offset; 5074 int error; 5075 5076 addr = *addr_p; 5077 vm_map_lock_read(map); 5078 5079 /* Configure pmap prefer. */ 5080 if (offset != UVM_UNKNOWN_OFFSET) { 5081 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 5082 pmap_offset = PMAP_PREFER_OFFSET(offset); 5083 } else { 5084 pmap_align = PAGE_SIZE; 5085 pmap_offset = 0; 5086 } 5087 5088 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 5089 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 5090 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5091 if (tmp < addr) 5092 tmp += pmap_align; 5093 addr = tmp; 5094 } 5095 5096 /* First, check if the requested range is fully available. */ 5097 entry = uvm_map_entrybyaddr(&map->addr, addr); 5098 last = NULL; 5099 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5100 error = 0; 5101 goto out; 5102 } 5103 if (flags & UVM_FLAG_FIXED) { 5104 error = EINVAL; 5105 goto out; 5106 } 5107 5108 error = ENOMEM; /* Default error from here. */ 5109 5110 /* 5111 * At this point, the memory at <addr, sz> is not available. 5112 * The reasons are: 5113 * [1] it's outside the map, 5114 * [2] it starts in used memory (and therefore needs to move 5115 * toward the first free page in entry), 5116 * [3] it starts in free memory but bumps into used memory. 5117 * 5118 * Note that for case [2], the forward moving is handled by the 5119 * for loop below. 5120 */ 5121 if (entry == NULL) { 5122 /* [1] Outside the map. */ 5123 if (addr >= map->max_offset) 5124 goto out; 5125 else 5126 entry = RBT_MIN(uvm_map_addr, &map->addr); 5127 } else if (VMMAP_FREE_START(entry) <= addr) { 5128 /* [3] Bumped into used memory. */ 5129 entry = RBT_NEXT(uvm_map_addr, entry); 5130 } 5131 5132 /* Test if the next entry is sufficient for the allocation. */ 5133 for (; entry != NULL; 5134 entry = RBT_NEXT(uvm_map_addr, entry)) { 5135 if (entry->fspace == 0) 5136 continue; 5137 addr = VMMAP_FREE_START(entry); 5138 5139 restart: /* Restart address checks on address change. */ 5140 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5141 if (tmp < addr) 5142 tmp += pmap_align; 5143 addr = tmp; 5144 if (addr >= VMMAP_FREE_END(entry)) 5145 continue; 5146 5147 /* Skip brk() allocation addresses. */ 5148 if (addr + sz > map->b_start && addr < map->b_end) { 5149 if (VMMAP_FREE_END(entry) > map->b_end) { 5150 addr = map->b_end; 5151 goto restart; 5152 } else 5153 continue; 5154 } 5155 /* Skip stack allocation addresses. */ 5156 if (addr + sz > map->s_start && addr < map->s_end) { 5157 if (VMMAP_FREE_END(entry) > map->s_end) { 5158 addr = map->s_end; 5159 goto restart; 5160 } else 5161 continue; 5162 } 5163 5164 last = NULL; 5165 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5166 error = 0; 5167 goto out; 5168 } 5169 } 5170 5171 out: 5172 vm_map_unlock_read(map); 5173 if (error == 0) 5174 *addr_p = addr; 5175 return error; 5176 } 5177 5178 /* 5179 * Determine allocation bias. 5180 * 5181 * Returns 1 if we should bias to high addresses, -1 for a bias towards low 5182 * addresses, or 0 for no bias. 5183 * The bias mechanism is intended to avoid clashing with brk() and stack 5184 * areas. 5185 */ 5186 int 5187 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry) 5188 { 5189 vaddr_t start, end; 5190 5191 start = VMMAP_FREE_START(entry); 5192 end = VMMAP_FREE_END(entry); 5193 5194 /* Stay at the top of brk() area. */ 5195 if (end >= map->b_start && start < map->b_end) 5196 return 1; 5197 /* Stay at the far end of the stack area. */ 5198 if (end >= map->s_start && start < map->s_end) { 5199 #ifdef MACHINE_STACK_GROWS_UP 5200 return 1; 5201 #else 5202 return -1; 5203 #endif 5204 } 5205 5206 /* No bias, this area is meant for us. */ 5207 return 0; 5208 } 5209 5210 5211 boolean_t 5212 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 5213 { 5214 boolean_t rv; 5215 5216 if (map->flags & VM_MAP_INTRSAFE) { 5217 rv = mtx_enter_try(&map->mtx); 5218 } else { 5219 mtx_enter(&map->flags_lock); 5220 if (map->flags & VM_MAP_BUSY) { 5221 mtx_leave(&map->flags_lock); 5222 return (FALSE); 5223 } 5224 mtx_leave(&map->flags_lock); 5225 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); 5226 /* check if the lock is busy and back out if we won the race */ 5227 if (rv) { 5228 mtx_enter(&map->flags_lock); 5229 if (map->flags & VM_MAP_BUSY) { 5230 rw_exit(&map->lock); 5231 rv = FALSE; 5232 } 5233 mtx_leave(&map->flags_lock); 5234 } 5235 } 5236 5237 if (rv) { 5238 map->timestamp++; 5239 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5240 uvm_tree_sanity(map, file, line); 5241 uvm_tree_size_chk(map, file, line); 5242 } 5243 5244 return (rv); 5245 } 5246 5247 void 5248 vm_map_lock_ln(struct vm_map *map, char *file, int line) 5249 { 5250 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5251 do { 5252 mtx_enter(&map->flags_lock); 5253 tryagain: 5254 while (map->flags & VM_MAP_BUSY) { 5255 map->flags |= VM_MAP_WANTLOCK; 5256 msleep(&map->flags, &map->flags_lock, 5257 PVM, vmmapbsy, 0); 5258 } 5259 mtx_leave(&map->flags_lock); 5260 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); 5261 /* check if the lock is busy and back out if we won the race */ 5262 mtx_enter(&map->flags_lock); 5263 if (map->flags & VM_MAP_BUSY) { 5264 rw_exit(&map->lock); 5265 goto tryagain; 5266 } 5267 mtx_leave(&map->flags_lock); 5268 } else { 5269 mtx_enter(&map->mtx); 5270 } 5271 5272 map->timestamp++; 5273 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5274 uvm_tree_sanity(map, file, line); 5275 uvm_tree_size_chk(map, file, line); 5276 } 5277 5278 void 5279 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 5280 { 5281 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5282 rw_enter_read(&map->lock); 5283 else 5284 mtx_enter(&map->mtx); 5285 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5286 uvm_tree_sanity(map, file, line); 5287 uvm_tree_size_chk(map, file, line); 5288 } 5289 5290 void 5291 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 5292 { 5293 uvm_tree_sanity(map, file, line); 5294 uvm_tree_size_chk(map, file, line); 5295 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5296 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5297 rw_exit(&map->lock); 5298 else 5299 mtx_leave(&map->mtx); 5300 } 5301 5302 void 5303 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 5304 { 5305 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5306 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5307 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5308 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5309 rw_exit_read(&map->lock); 5310 else 5311 mtx_leave(&map->mtx); 5312 } 5313 5314 void 5315 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 5316 { 5317 uvm_tree_sanity(map, file, line); 5318 uvm_tree_size_chk(map, file, line); 5319 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5320 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5321 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5322 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5323 rw_enter(&map->lock, RW_DOWNGRADE); 5324 } 5325 5326 void 5327 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 5328 { 5329 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5330 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5331 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5332 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5333 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5334 rw_exit_read(&map->lock); 5335 rw_enter_write(&map->lock); 5336 } 5337 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5338 uvm_tree_sanity(map, file, line); 5339 } 5340 5341 void 5342 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5343 { 5344 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5345 mtx_enter(&map->flags_lock); 5346 map->flags |= VM_MAP_BUSY; 5347 mtx_leave(&map->flags_lock); 5348 } 5349 5350 void 5351 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5352 { 5353 int oflags; 5354 5355 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5356 mtx_enter(&map->flags_lock); 5357 oflags = map->flags; 5358 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5359 mtx_leave(&map->flags_lock); 5360 if (oflags & VM_MAP_WANTLOCK) 5361 wakeup(&map->flags); 5362 } 5363 5364 #ifndef SMALL_KERNEL 5365 int 5366 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, 5367 size_t *lenp) 5368 { 5369 struct vm_map_entry *entry; 5370 vaddr_t start; 5371 int cnt, maxcnt, error = 0; 5372 5373 KASSERT(*lenp > 0); 5374 KASSERT((*lenp % sizeof(*kve)) == 0); 5375 cnt = 0; 5376 maxcnt = *lenp / sizeof(*kve); 5377 KASSERT(maxcnt > 0); 5378 5379 /* 5380 * Return only entries whose address is above the given base 5381 * address. This allows userland to iterate without knowing the 5382 * number of entries beforehand. 5383 */ 5384 start = (vaddr_t)kve[0].kve_start; 5385 5386 vm_map_lock(map); 5387 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5388 if (cnt == maxcnt) { 5389 error = ENOMEM; 5390 break; 5391 } 5392 if (start != 0 && entry->start < start) 5393 continue; 5394 kve->kve_start = entry->start; 5395 kve->kve_end = entry->end; 5396 kve->kve_guard = entry->guard; 5397 kve->kve_fspace = entry->fspace; 5398 kve->kve_fspace_augment = entry->fspace_augment; 5399 kve->kve_offset = entry->offset; 5400 kve->kve_wired_count = entry->wired_count; 5401 kve->kve_etype = entry->etype; 5402 kve->kve_protection = entry->protection; 5403 kve->kve_max_protection = entry->max_protection; 5404 kve->kve_advice = entry->advice; 5405 kve->kve_inheritance = entry->inheritance; 5406 kve->kve_flags = entry->flags; 5407 kve++; 5408 cnt++; 5409 } 5410 vm_map_unlock(map); 5411 5412 KASSERT(cnt <= maxcnt); 5413 5414 *lenp = sizeof(*kve) * cnt; 5415 return error; 5416 } 5417 #endif 5418 5419 5420 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5421 uvm_mapentry_addrcmp, uvm_map_addr_augment); 5422 5423 5424 /* 5425 * MD code: vmspace allocator setup. 5426 */ 5427 5428 #ifdef __i386__ 5429 void 5430 uvm_map_setup_md(struct vm_map *map) 5431 { 5432 vaddr_t min, max; 5433 5434 min = map->min_offset; 5435 max = map->max_offset; 5436 5437 /* 5438 * Ensure the selectors will not try to manage page 0; 5439 * it's too special. 5440 */ 5441 if (min < VMMAP_MIN_ADDR) 5442 min = VMMAP_MIN_ADDR; 5443 5444 #if 0 /* Cool stuff, not yet */ 5445 /* Executable code is special. */ 5446 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5447 /* Place normal allocations beyond executable mappings. */ 5448 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5449 #else /* Crappy stuff, for now */ 5450 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5451 #endif 5452 5453 #ifndef SMALL_KERNEL 5454 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5455 #endif /* !SMALL_KERNEL */ 5456 } 5457 #elif __LP64__ 5458 void 5459 uvm_map_setup_md(struct vm_map *map) 5460 { 5461 vaddr_t min, max; 5462 5463 min = map->min_offset; 5464 max = map->max_offset; 5465 5466 /* 5467 * Ensure the selectors will not try to manage page 0; 5468 * it's too special. 5469 */ 5470 if (min < VMMAP_MIN_ADDR) 5471 min = VMMAP_MIN_ADDR; 5472 5473 #if 0 /* Cool stuff, not yet */ 5474 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5475 #else /* Crappy stuff, for now */ 5476 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5477 #endif 5478 5479 #ifndef SMALL_KERNEL 5480 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5481 #endif /* !SMALL_KERNEL */ 5482 } 5483 #else /* non-i386, 32 bit */ 5484 void 5485 uvm_map_setup_md(struct vm_map *map) 5486 { 5487 vaddr_t min, max; 5488 5489 min = map->min_offset; 5490 max = map->max_offset; 5491 5492 /* 5493 * Ensure the selectors will not try to manage page 0; 5494 * it's too special. 5495 */ 5496 if (min < VMMAP_MIN_ADDR) 5497 min = VMMAP_MIN_ADDR; 5498 5499 #if 0 /* Cool stuff, not yet */ 5500 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5501 #else /* Crappy stuff, for now */ 5502 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5503 #endif 5504 5505 #ifndef SMALL_KERNEL 5506 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5507 #endif /* !SMALL_KERNEL */ 5508 } 5509 #endif 5510