1 /* $OpenBSD: uvm_map.c,v 1.240 2019/02/10 16:42:35 phessler Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/mman.h> 90 #include <sys/proc.h> 91 #include <sys/malloc.h> 92 #include <sys/pool.h> 93 #include <sys/sysctl.h> 94 #include <sys/syslog.h> 95 96 #ifdef SYSVSHM 97 #include <sys/shm.h> 98 #endif 99 100 #include <uvm/uvm.h> 101 102 #ifdef DDB 103 #include <uvm/uvm_ddb.h> 104 #endif 105 106 #include <uvm/uvm_addr.h> 107 108 109 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 110 int uvm_mapent_isjoinable(struct vm_map*, 111 struct vm_map_entry*, struct vm_map_entry*); 112 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 113 struct vm_map_entry*, struct uvm_map_deadq*); 114 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 115 struct vm_map_entry*, struct uvm_map_deadq*); 116 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 117 struct vm_map_entry*, vaddr_t, vsize_t, int, 118 struct uvm_map_deadq*, struct vm_map_entry*); 119 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 120 void uvm_mapent_free(struct vm_map_entry*); 121 void uvm_unmap_kill_entry(struct vm_map*, 122 struct vm_map_entry*); 123 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); 124 void uvm_mapent_mkfree(struct vm_map*, 125 struct vm_map_entry*, struct vm_map_entry**, 126 struct uvm_map_deadq*, boolean_t); 127 void uvm_map_pageable_pgon(struct vm_map*, 128 struct vm_map_entry*, struct vm_map_entry*, 129 vaddr_t, vaddr_t); 130 int uvm_map_pageable_wire(struct vm_map*, 131 struct vm_map_entry*, struct vm_map_entry*, 132 vaddr_t, vaddr_t, int); 133 void uvm_map_setup_entries(struct vm_map*); 134 void uvm_map_setup_md(struct vm_map*); 135 void uvm_map_teardown(struct vm_map*); 136 void uvm_map_vmspace_update(struct vm_map*, 137 struct uvm_map_deadq*, int); 138 void uvm_map_kmem_grow(struct vm_map*, 139 struct uvm_map_deadq*, vsize_t, int); 140 void uvm_map_freelist_update_clear(struct vm_map*, 141 struct uvm_map_deadq*); 142 void uvm_map_freelist_update_refill(struct vm_map *, int); 143 void uvm_map_freelist_update(struct vm_map*, 144 struct uvm_map_deadq*, vaddr_t, vaddr_t, 145 vaddr_t, vaddr_t, int); 146 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 147 vaddr_t, vaddr_t, int); 148 int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, 149 struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, 150 int); 151 int uvm_map_findspace(struct vm_map*, 152 struct vm_map_entry**, struct vm_map_entry**, 153 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 154 vaddr_t); 155 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 156 void uvm_map_addr_augment(struct vm_map_entry*); 157 158 /* 159 * Tree management functions. 160 */ 161 162 static __inline void uvm_mapent_copy(struct vm_map_entry*, 163 struct vm_map_entry*); 164 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*, 165 const struct vm_map_entry*); 166 void uvm_mapent_free_insert(struct vm_map*, 167 struct uvm_addr_state*, struct vm_map_entry*); 168 void uvm_mapent_free_remove(struct vm_map*, 169 struct uvm_addr_state*, struct vm_map_entry*); 170 void uvm_mapent_addr_insert(struct vm_map*, 171 struct vm_map_entry*); 172 void uvm_mapent_addr_remove(struct vm_map*, 173 struct vm_map_entry*); 174 void uvm_map_splitentry(struct vm_map*, 175 struct vm_map_entry*, struct vm_map_entry*, 176 vaddr_t); 177 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 178 int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*); 179 180 /* 181 * uvm_vmspace_fork helper functions. 182 */ 183 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 184 vsize_t, vm_prot_t, vm_prot_t, 185 struct vm_map_entry*, struct uvm_map_deadq*, int, 186 int); 187 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t, 188 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*, 189 struct vm_map_entry*, struct uvm_map_deadq*); 190 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 191 struct vm_map*, struct vm_map_entry*, 192 struct uvm_map_deadq*); 193 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 194 struct vm_map*, struct vm_map_entry*, 195 struct uvm_map_deadq*); 196 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 197 struct vm_map*, struct vm_map_entry*, 198 struct uvm_map_deadq*); 199 200 /* 201 * Tree validation. 202 */ 203 #ifdef VMMAP_DEBUG 204 void uvm_tree_assert(struct vm_map*, int, char*, 205 char*, int); 206 #define UVM_ASSERT(map, cond, file, line) \ 207 uvm_tree_assert((map), (cond), #cond, (file), (line)) 208 void uvm_tree_sanity(struct vm_map*, char*, int); 209 void uvm_tree_size_chk(struct vm_map*, char*, int); 210 void vmspace_validate(struct vm_map*); 211 #else 212 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 213 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 214 #define vmspace_validate(_map) do {} while (0) 215 #endif 216 217 /* 218 * All architectures will have pmap_prefer. 219 */ 220 #ifndef PMAP_PREFER 221 #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE 222 #define PMAP_PREFER_OFFSET(off) 0 223 #define PMAP_PREFER(addr, off) (addr) 224 #endif 225 226 227 /* 228 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 229 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 230 * 231 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 232 * each time. 233 */ 234 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 235 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 236 #define VM_MAP_KSIZE_ALLOCMUL 4 237 /* 238 * When selecting a random free-space block, look at most FSPACE_DELTA blocks 239 * ahead. 240 */ 241 #define FSPACE_DELTA 8 242 /* 243 * Put allocations adjecent to previous allocations when the free-space tree 244 * is larger than FSPACE_COMPACT entries. 245 * 246 * Alignment and PMAP_PREFER may still cause the entry to not be fully 247 * adjecent. Note that this strategy reduces memory fragmentation (by leaving 248 * a large space before or after the allocation). 249 */ 250 #define FSPACE_COMPACT 128 251 /* 252 * Make the address selection skip at most this many bytes from the start of 253 * the free space in which the allocation takes place. 254 * 255 * The main idea behind a randomized address space is that an attacker cannot 256 * know where to target his attack. Therefore, the location of objects must be 257 * as random as possible. However, the goal is not to create the most sparse 258 * map that is possible. 259 * FSPACE_MAXOFF pushes the considered range in bytes down to less insane 260 * sizes, thereby reducing the sparseness. The biggest randomization comes 261 * from fragmentation, i.e. FSPACE_COMPACT. 262 */ 263 #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) 264 /* 265 * Allow for small gaps in the overflow areas. 266 * Gap size is in bytes and does not have to be a multiple of page-size. 267 */ 268 #define FSPACE_BIASGAP ((vaddr_t)32 * 1024) 269 270 /* auto-allocate address lower bound */ 271 #define VMMAP_MIN_ADDR PAGE_SIZE 272 273 274 #ifdef DEADBEEF0 275 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0) 276 #else 277 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0) 278 #endif 279 280 #ifdef DEBUG 281 int uvm_map_printlocks = 0; 282 283 #define LPRINTF(_args) \ 284 do { \ 285 if (uvm_map_printlocks) \ 286 printf _args; \ 287 } while (0) 288 #else 289 #define LPRINTF(_args) do {} while (0) 290 #endif 291 292 static struct mutex uvm_kmapent_mtx; 293 static struct timeval uvm_kmapent_last_warn_time; 294 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 295 296 const char vmmapbsy[] = "vmmapbsy"; 297 298 /* 299 * pool for vmspace structures. 300 */ 301 struct pool uvm_vmspace_pool; 302 303 /* 304 * pool for dynamically-allocated map entries. 305 */ 306 struct pool uvm_map_entry_pool; 307 struct pool uvm_map_entry_kmem_pool; 308 309 /* 310 * This global represents the end of the kernel virtual address 311 * space. If we want to exceed this, we must grow the kernel 312 * virtual address space dynamically. 313 * 314 * Note, this variable is locked by kernel_map's lock. 315 */ 316 vaddr_t uvm_maxkaddr; 317 318 /* 319 * Locking predicate. 320 */ 321 #define UVM_MAP_REQ_WRITE(_map) \ 322 do { \ 323 if ((_map)->ref_count > 0) { \ 324 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 325 rw_assert_wrlock(&(_map)->lock); \ 326 else \ 327 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \ 328 } \ 329 } while (0) 330 331 /* 332 * Tree describing entries by address. 333 * 334 * Addresses are unique. 335 * Entries with start == end may only exist if they are the first entry 336 * (sorted by address) within a free-memory tree. 337 */ 338 339 static inline int 340 uvm_mapentry_addrcmp(const struct vm_map_entry *e1, 341 const struct vm_map_entry *e2) 342 { 343 return e1->start < e2->start ? -1 : e1->start > e2->start; 344 } 345 346 /* 347 * Copy mapentry. 348 */ 349 static __inline void 350 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 351 { 352 caddr_t csrc, cdst; 353 size_t sz; 354 355 csrc = (caddr_t)src; 356 cdst = (caddr_t)dst; 357 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 358 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 359 360 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 361 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 362 memcpy(cdst, csrc, sz); 363 } 364 365 /* 366 * Handle free-list insertion. 367 */ 368 void 369 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 370 struct vm_map_entry *entry) 371 { 372 const struct uvm_addr_functions *fun; 373 #ifdef VMMAP_DEBUG 374 vaddr_t min, max, bound; 375 #endif 376 377 #ifdef VMMAP_DEBUG 378 /* 379 * Boundary check. 380 * Boundaries are folded if they go on the same free list. 381 */ 382 min = VMMAP_FREE_START(entry); 383 max = VMMAP_FREE_END(entry); 384 385 while (min < max) { 386 bound = uvm_map_boundary(map, min, max); 387 KASSERT(uvm_map_uaddr(map, min) == uaddr); 388 min = bound; 389 } 390 #endif 391 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 392 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 393 394 UVM_MAP_REQ_WRITE(map); 395 396 /* Actual insert: forward to uaddr pointer. */ 397 if (uaddr != NULL) { 398 fun = uaddr->uaddr_functions; 399 KDASSERT(fun != NULL); 400 if (fun->uaddr_free_insert != NULL) 401 (*fun->uaddr_free_insert)(map, uaddr, entry); 402 entry->etype |= UVM_ET_FREEMAPPED; 403 } 404 405 /* Update fspace augmentation. */ 406 uvm_map_addr_augment(entry); 407 } 408 409 /* 410 * Handle free-list removal. 411 */ 412 void 413 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 414 struct vm_map_entry *entry) 415 { 416 const struct uvm_addr_functions *fun; 417 418 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 419 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 420 UVM_MAP_REQ_WRITE(map); 421 422 if (uaddr != NULL) { 423 fun = uaddr->uaddr_functions; 424 if (fun->uaddr_free_remove != NULL) 425 (*fun->uaddr_free_remove)(map, uaddr, entry); 426 entry->etype &= ~UVM_ET_FREEMAPPED; 427 } 428 } 429 430 /* 431 * Handle address tree insertion. 432 */ 433 void 434 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 435 { 436 struct vm_map_entry *res; 437 438 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF)) 439 panic("uvm_mapent_addr_insert: entry still in addr list"); 440 KDASSERT(entry->start <= entry->end); 441 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 442 (entry->end & (vaddr_t)PAGE_MASK) == 0); 443 444 UVM_MAP_REQ_WRITE(map); 445 res = RBT_INSERT(uvm_map_addr, &map->addr, entry); 446 if (res != NULL) { 447 panic("uvm_mapent_addr_insert: map %p entry %p " 448 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 449 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 450 map, entry, 451 entry->start, entry->end, entry->guard, entry->fspace, 452 res, res->start, res->end, res->guard, res->fspace); 453 } 454 } 455 456 /* 457 * Handle address tree removal. 458 */ 459 void 460 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 461 { 462 struct vm_map_entry *res; 463 464 UVM_MAP_REQ_WRITE(map); 465 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry); 466 if (res != entry) 467 panic("uvm_mapent_addr_remove"); 468 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF); 469 } 470 471 /* 472 * uvm_map_reference: add reference to a map 473 * 474 * XXX check map reference counter lock 475 */ 476 #define uvm_map_reference(_map) \ 477 do { \ 478 map->ref_count++; \ 479 } while (0) 480 481 /* 482 * Calculate the dused delta. 483 */ 484 vsize_t 485 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 486 { 487 struct vmspace *vm; 488 vsize_t sz; 489 vaddr_t lmax; 490 vaddr_t stack_begin, stack_end; /* Position of stack. */ 491 492 KASSERT(map->flags & VM_MAP_ISVMSPACE); 493 vm = (struct vmspace *)map; 494 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 495 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 496 497 sz = 0; 498 while (min != max) { 499 lmax = max; 500 if (min < stack_begin && lmax > stack_begin) 501 lmax = stack_begin; 502 else if (min < stack_end && lmax > stack_end) 503 lmax = stack_end; 504 505 if (min >= stack_begin && min < stack_end) { 506 /* nothing */ 507 } else 508 sz += lmax - min; 509 min = lmax; 510 } 511 512 return sz >> PAGE_SHIFT; 513 } 514 515 /* 516 * Find the entry describing the given address. 517 */ 518 struct vm_map_entry* 519 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 520 { 521 struct vm_map_entry *iter; 522 523 iter = RBT_ROOT(uvm_map_addr, atree); 524 while (iter != NULL) { 525 if (iter->start > addr) 526 iter = RBT_LEFT(uvm_map_addr, iter); 527 else if (VMMAP_FREE_END(iter) <= addr) 528 iter = RBT_RIGHT(uvm_map_addr, iter); 529 else 530 return iter; 531 } 532 return NULL; 533 } 534 535 /* 536 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 537 * 538 * Push dead entries into a linked list. 539 * Since the linked list abuses the address tree for storage, the entry 540 * may not be linked in a map. 541 * 542 * *head must be initialized to NULL before the first call to this macro. 543 * uvm_unmap_detach(*head, 0) will remove dead entries. 544 */ 545 static __inline void 546 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 547 { 548 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 549 } 550 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 551 dead_entry_push((_headptr), (_entry)) 552 553 /* 554 * Helper function for uvm_map_findspace_tree. 555 * 556 * Given allocation constraints and pmap constraints, finds the 557 * lowest and highest address in a range that can be used for the 558 * allocation. 559 * 560 * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. 561 * 562 * 563 * Big chunk of math with a seasoning of dragons. 564 */ 565 int 566 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, 567 struct vm_map_entry *sel, vaddr_t align, 568 vaddr_t pmap_align, vaddr_t pmap_off, int bias) 569 { 570 vaddr_t sel_min, sel_max; 571 #ifdef PMAP_PREFER 572 vaddr_t pmap_min, pmap_max; 573 #endif /* PMAP_PREFER */ 574 #ifdef DIAGNOSTIC 575 int bad; 576 #endif /* DIAGNOSTIC */ 577 578 sel_min = VMMAP_FREE_START(sel); 579 sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); 580 581 #ifdef PMAP_PREFER 582 583 /* 584 * There are two special cases, in which we can satisfy the align 585 * requirement and the pmap_prefer requirement. 586 * - when pmap_off == 0, we always select the largest of the two 587 * - when pmap_off % align == 0 and pmap_align > align, we simply 588 * satisfy the pmap_align requirement and automatically 589 * satisfy the align requirement. 590 */ 591 if (align > PAGE_SIZE && 592 !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { 593 /* 594 * Simple case: only use align. 595 */ 596 sel_min = roundup(sel_min, align); 597 sel_max &= ~(align - 1); 598 599 if (sel_min > sel_max) 600 return ENOMEM; 601 602 /* Correct for bias. */ 603 if (sel_max - sel_min > FSPACE_BIASGAP) { 604 if (bias > 0) { 605 sel_min = sel_max - FSPACE_BIASGAP; 606 sel_min = roundup(sel_min, align); 607 } else if (bias < 0) { 608 sel_max = sel_min + FSPACE_BIASGAP; 609 sel_max &= ~(align - 1); 610 } 611 } 612 } else if (pmap_align != 0) { 613 /* 614 * Special case: satisfy both pmap_prefer and 615 * align argument. 616 */ 617 pmap_max = sel_max & ~(pmap_align - 1); 618 pmap_min = sel_min; 619 if (pmap_max < sel_min) 620 return ENOMEM; 621 622 /* Adjust pmap_min for BIASGAP for top-addr bias. */ 623 if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) 624 pmap_min = pmap_max - FSPACE_BIASGAP; 625 /* Align pmap_min. */ 626 pmap_min &= ~(pmap_align - 1); 627 if (pmap_min < sel_min) 628 pmap_min += pmap_align; 629 if (pmap_min > pmap_max) 630 return ENOMEM; 631 632 /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ 633 if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { 634 pmap_max = (pmap_min + FSPACE_BIASGAP) & 635 ~(pmap_align - 1); 636 } 637 if (pmap_min > pmap_max) 638 return ENOMEM; 639 640 /* Apply pmap prefer offset. */ 641 pmap_max |= pmap_off; 642 if (pmap_max > sel_max) 643 pmap_max -= pmap_align; 644 pmap_min |= pmap_off; 645 if (pmap_min < sel_min) 646 pmap_min += pmap_align; 647 648 /* 649 * Fixup: it's possible that pmap_min and pmap_max 650 * cross eachother. In this case, try to find one 651 * address that is allowed. 652 * (This usually happens in biased case.) 653 */ 654 if (pmap_min > pmap_max) { 655 if (pmap_min < sel_max) 656 pmap_max = pmap_min; 657 else if (pmap_max > sel_min) 658 pmap_min = pmap_max; 659 else 660 return ENOMEM; 661 } 662 663 /* Internal validation. */ 664 KDASSERT(pmap_min <= pmap_max); 665 666 sel_min = pmap_min; 667 sel_max = pmap_max; 668 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 669 sel_min = sel_max - FSPACE_BIASGAP; 670 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 671 sel_max = sel_min + FSPACE_BIASGAP; 672 673 #else 674 675 if (align > PAGE_SIZE) { 676 sel_min = roundup(sel_min, align); 677 sel_max &= ~(align - 1); 678 if (sel_min > sel_max) 679 return ENOMEM; 680 681 if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { 682 if (bias > 0) { 683 sel_min = roundup(sel_max - FSPACE_BIASGAP, 684 align); 685 } else { 686 sel_max = (sel_min + FSPACE_BIASGAP) & 687 ~(align - 1); 688 } 689 } 690 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 691 sel_min = sel_max - FSPACE_BIASGAP; 692 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 693 sel_max = sel_min + FSPACE_BIASGAP; 694 695 #endif 696 697 if (sel_min > sel_max) 698 return ENOMEM; 699 700 #ifdef DIAGNOSTIC 701 bad = 0; 702 /* Lower boundary check. */ 703 if (sel_min < VMMAP_FREE_START(sel)) { 704 printf("sel_min: 0x%lx, but should be at least 0x%lx\n", 705 sel_min, VMMAP_FREE_START(sel)); 706 bad++; 707 } 708 /* Upper boundary check. */ 709 if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { 710 printf("sel_max: 0x%lx, but should be at most 0x%lx\n", 711 sel_max, 712 VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); 713 bad++; 714 } 715 /* Lower boundary alignment. */ 716 if (align != 0 && (sel_min & (align - 1)) != 0) { 717 printf("sel_min: 0x%lx, not aligned to 0x%lx\n", 718 sel_min, align); 719 bad++; 720 } 721 /* Upper boundary alignment. */ 722 if (align != 0 && (sel_max & (align - 1)) != 0) { 723 printf("sel_max: 0x%lx, not aligned to 0x%lx\n", 724 sel_max, align); 725 bad++; 726 } 727 /* Lower boundary PMAP_PREFER check. */ 728 if (pmap_align != 0 && align == 0 && 729 (sel_min & (pmap_align - 1)) != pmap_off) { 730 printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 731 sel_min, sel_min & (pmap_align - 1), pmap_off); 732 bad++; 733 } 734 /* Upper boundary PMAP_PREFER check. */ 735 if (pmap_align != 0 && align == 0 && 736 (sel_max & (pmap_align - 1)) != pmap_off) { 737 printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 738 sel_max, sel_max & (pmap_align - 1), pmap_off); 739 bad++; 740 } 741 742 if (bad) { 743 panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " 744 "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " 745 "bias = %d, " 746 "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", 747 sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, 748 bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel)); 749 } 750 #endif /* DIAGNOSTIC */ 751 752 *min = sel_min; 753 *max = sel_max; 754 return 0; 755 } 756 757 /* 758 * Test if memory starting at addr with sz bytes is free. 759 * 760 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 761 * the space. 762 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 763 */ 764 int 765 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 766 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 767 vaddr_t addr, vsize_t sz) 768 { 769 struct uvm_addr_state *free; 770 struct uvm_map_addr *atree; 771 struct vm_map_entry *i, *i_end; 772 773 if (addr + sz < addr) 774 return 0; 775 776 /* 777 * Kernel memory above uvm_maxkaddr is considered unavailable. 778 */ 779 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 780 if (addr + sz > uvm_maxkaddr) 781 return 0; 782 } 783 784 atree = &map->addr; 785 786 /* 787 * Fill in first, last, so they point at the entries containing the 788 * first and last address of the range. 789 * Note that if they are not NULL, we don't perform the lookup. 790 */ 791 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 792 if (*start_ptr == NULL) { 793 *start_ptr = uvm_map_entrybyaddr(atree, addr); 794 if (*start_ptr == NULL) 795 return 0; 796 } else 797 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 798 if (*end_ptr == NULL) { 799 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 800 *end_ptr = *start_ptr; 801 else { 802 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 803 if (*end_ptr == NULL) 804 return 0; 805 } 806 } else 807 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 808 809 /* Validation. */ 810 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 811 KDASSERT((*start_ptr)->start <= addr && 812 VMMAP_FREE_END(*start_ptr) > addr && 813 (*end_ptr)->start < addr + sz && 814 VMMAP_FREE_END(*end_ptr) >= addr + sz); 815 816 /* 817 * Check the none of the entries intersects with <addr, addr+sz>. 818 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 819 * considered unavailable unless called by those allocators. 820 */ 821 i = *start_ptr; 822 i_end = RBT_NEXT(uvm_map_addr, *end_ptr); 823 for (; i != i_end; 824 i = RBT_NEXT(uvm_map_addr, i)) { 825 if (i->start != i->end && i->end > addr) 826 return 0; 827 828 /* 829 * uaddr_exe and uaddr_brk_stack may only be used 830 * by these allocators and the NULL uaddr (i.e. no 831 * uaddr). 832 * Reject if this requirement is not met. 833 */ 834 if (uaddr != NULL) { 835 free = uvm_map_uaddr_e(map, i); 836 837 if (uaddr != free && free != NULL && 838 (free == map->uaddr_exe || 839 free == map->uaddr_brk_stack)) 840 return 0; 841 } 842 } 843 844 return -1; 845 } 846 847 /* 848 * Invoke each address selector until an address is found. 849 * Will not invoke uaddr_exe. 850 */ 851 int 852 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 853 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 854 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 855 { 856 struct uvm_addr_state *uaddr; 857 int i; 858 859 /* 860 * Allocation for sz bytes at any address, 861 * using the addr selectors in order. 862 */ 863 for (i = 0; i < nitems(map->uaddr_any); i++) { 864 uaddr = map->uaddr_any[i]; 865 866 if (uvm_addr_invoke(map, uaddr, first, last, 867 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 868 return 0; 869 } 870 871 /* Fall back to brk() and stack() address selectors. */ 872 uaddr = map->uaddr_brk_stack; 873 if (uvm_addr_invoke(map, uaddr, first, last, 874 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 875 return 0; 876 877 return ENOMEM; 878 } 879 880 /* Calculate entry augmentation value. */ 881 vsize_t 882 uvm_map_addr_augment_get(struct vm_map_entry *entry) 883 { 884 vsize_t augment; 885 struct vm_map_entry *left, *right; 886 887 augment = entry->fspace; 888 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL) 889 augment = MAX(augment, left->fspace_augment); 890 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 891 augment = MAX(augment, right->fspace_augment); 892 return augment; 893 } 894 895 /* 896 * Update augmentation data in entry. 897 */ 898 void 899 uvm_map_addr_augment(struct vm_map_entry *entry) 900 { 901 vsize_t augment; 902 903 while (entry != NULL) { 904 /* Calculate value for augmentation. */ 905 augment = uvm_map_addr_augment_get(entry); 906 907 /* 908 * Descend update. 909 * Once we find an entry that already has the correct value, 910 * stop, since it means all its parents will use the correct 911 * value too. 912 */ 913 if (entry->fspace_augment == augment) 914 return; 915 entry->fspace_augment = augment; 916 entry = RBT_PARENT(uvm_map_addr, entry); 917 } 918 } 919 920 /* 921 * uvm_mapanon: establish a valid mapping in map for an anon 922 * 923 * => *addr and sz must be a multiple of PAGE_SIZE. 924 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 925 * => map must be unlocked. 926 * 927 * => align: align vaddr, must be a power-of-2. 928 * Align is only a hint and will be ignored if the alignment fails. 929 */ 930 int 931 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, 932 vsize_t align, unsigned int flags) 933 { 934 struct vm_map_entry *first, *last, *entry, *new; 935 struct uvm_map_deadq dead; 936 vm_prot_t prot; 937 vm_prot_t maxprot; 938 vm_inherit_t inherit; 939 int advice; 940 int error; 941 vaddr_t pmap_align, pmap_offset; 942 vaddr_t hint; 943 944 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE); 945 KASSERT(map != kernel_map); 946 KASSERT((map->flags & UVM_FLAG_HOLE) == 0); 947 948 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 949 splassert(IPL_NONE); 950 951 /* 952 * We use pmap_align and pmap_offset as alignment and offset variables. 953 * 954 * Because the align parameter takes precedence over pmap prefer, 955 * the pmap_align will need to be set to align, with pmap_offset = 0, 956 * if pmap_prefer will not align. 957 */ 958 pmap_align = MAX(align, PAGE_SIZE); 959 pmap_offset = 0; 960 961 /* Decode parameters. */ 962 prot = UVM_PROTECTION(flags); 963 maxprot = UVM_MAXPROTECTION(flags); 964 advice = UVM_ADVICE(flags); 965 inherit = UVM_INHERIT(flags); 966 error = 0; 967 hint = trunc_page(*addr); 968 TAILQ_INIT(&dead); 969 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 970 KASSERT((align & (align - 1)) == 0); 971 972 /* Check protection. */ 973 if ((prot & maxprot) != prot) 974 return EACCES; 975 976 /* 977 * Before grabbing the lock, allocate a map entry for later 978 * use to ensure we don't wait for memory while holding the 979 * vm_map_lock. 980 */ 981 new = uvm_mapent_alloc(map, flags); 982 if (new == NULL) 983 return(ENOMEM); 984 985 if (flags & UVM_FLAG_TRYLOCK) { 986 if (vm_map_lock_try(map) == FALSE) { 987 error = EFAULT; 988 goto out; 989 } 990 } else 991 vm_map_lock(map); 992 993 first = last = NULL; 994 if (flags & UVM_FLAG_FIXED) { 995 /* 996 * Fixed location. 997 * 998 * Note: we ignore align, pmap_prefer. 999 * Fill in first, last and *addr. 1000 */ 1001 KASSERT((*addr & PAGE_MASK) == 0); 1002 1003 /* Check that the space is available. */ 1004 if (flags & UVM_FLAG_UNMAP) { 1005 if ((flags & UVM_FLAG_STACK) && 1006 !uvm_map_is_stack_remappable(map, *addr, sz)) { 1007 error = EINVAL; 1008 goto unlock; 1009 } 1010 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1011 } 1012 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1013 error = ENOMEM; 1014 goto unlock; 1015 } 1016 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1017 (align == 0 || (*addr & (align - 1)) == 0) && 1018 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1019 /* 1020 * Address used as hint. 1021 * 1022 * Note: we enforce the alignment restriction, 1023 * but ignore pmap_prefer. 1024 */ 1025 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1026 /* Run selection algorithm for executables. */ 1027 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1028 addr, sz, pmap_align, pmap_offset, prot, hint); 1029 1030 if (error != 0) 1031 goto unlock; 1032 } else { 1033 /* Update freelists from vmspace. */ 1034 uvm_map_vmspace_update(map, &dead, flags); 1035 1036 error = uvm_map_findspace(map, &first, &last, addr, sz, 1037 pmap_align, pmap_offset, prot, hint); 1038 1039 if (error != 0) 1040 goto unlock; 1041 } 1042 1043 /* Double-check if selected address doesn't cause overflow. */ 1044 if (*addr + sz < *addr) { 1045 error = ENOMEM; 1046 goto unlock; 1047 } 1048 1049 /* If we only want a query, return now. */ 1050 if (flags & UVM_FLAG_QUERY) { 1051 error = 0; 1052 goto unlock; 1053 } 1054 1055 /* 1056 * Create new entry. 1057 * first and last may be invalidated after this call. 1058 */ 1059 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1060 new); 1061 if (entry == NULL) { 1062 error = ENOMEM; 1063 goto unlock; 1064 } 1065 new = NULL; 1066 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1067 entry->object.uvm_obj = NULL; 1068 entry->offset = 0; 1069 entry->protection = prot; 1070 entry->max_protection = maxprot; 1071 entry->inheritance = inherit; 1072 entry->wired_count = 0; 1073 entry->advice = advice; 1074 if (flags & UVM_FLAG_STACK) { 1075 entry->etype |= UVM_ET_STACK; 1076 if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP)) 1077 map->serial++; 1078 } 1079 if (flags & UVM_FLAG_COPYONW) { 1080 entry->etype |= UVM_ET_COPYONWRITE; 1081 if ((flags & UVM_FLAG_OVERLAY) == 0) 1082 entry->etype |= UVM_ET_NEEDSCOPY; 1083 } 1084 if (flags & UVM_FLAG_OVERLAY) { 1085 KERNEL_LOCK(); 1086 entry->aref.ar_pageoff = 0; 1087 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1088 KERNEL_UNLOCK(); 1089 } 1090 1091 /* Update map and process statistics. */ 1092 map->size += sz; 1093 ((struct vmspace *)map)->vm_dused += uvmspace_dused(map, *addr, *addr + sz); 1094 1095 unlock: 1096 vm_map_unlock(map); 1097 1098 /* 1099 * Remove dead entries. 1100 * 1101 * Dead entries may be the result of merging. 1102 * uvm_map_mkentry may also create dead entries, when it attempts to 1103 * destroy free-space entries. 1104 */ 1105 uvm_unmap_detach(&dead, 0); 1106 out: 1107 if (new) 1108 uvm_mapent_free(new); 1109 return error; 1110 } 1111 1112 /* 1113 * uvm_map: establish a valid mapping in map 1114 * 1115 * => *addr and sz must be a multiple of PAGE_SIZE. 1116 * => map must be unlocked. 1117 * => <uobj,uoffset> value meanings (4 cases): 1118 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1119 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1120 * [3] <uobj,uoffset> == normal mapping 1121 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1122 * 1123 * case [4] is for kernel mappings where we don't know the offset until 1124 * we've found a virtual address. note that kernel object offsets are 1125 * always relative to vm_map_min(kernel_map). 1126 * 1127 * => align: align vaddr, must be a power-of-2. 1128 * Align is only a hint and will be ignored if the alignment fails. 1129 */ 1130 int 1131 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 1132 struct uvm_object *uobj, voff_t uoffset, 1133 vsize_t align, unsigned int flags) 1134 { 1135 struct vm_map_entry *first, *last, *entry, *new; 1136 struct uvm_map_deadq dead; 1137 vm_prot_t prot; 1138 vm_prot_t maxprot; 1139 vm_inherit_t inherit; 1140 int advice; 1141 int error; 1142 vaddr_t pmap_align, pmap_offset; 1143 vaddr_t hint; 1144 1145 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1146 splassert(IPL_NONE); 1147 else 1148 splassert(IPL_VM); 1149 1150 /* 1151 * We use pmap_align and pmap_offset as alignment and offset variables. 1152 * 1153 * Because the align parameter takes precedence over pmap prefer, 1154 * the pmap_align will need to be set to align, with pmap_offset = 0, 1155 * if pmap_prefer will not align. 1156 */ 1157 if (uoffset == UVM_UNKNOWN_OFFSET) { 1158 pmap_align = MAX(align, PAGE_SIZE); 1159 pmap_offset = 0; 1160 } else { 1161 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 1162 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 1163 1164 if (align == 0 || 1165 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 1166 /* pmap_offset satisfies align, no change. */ 1167 } else { 1168 /* Align takes precedence over pmap prefer. */ 1169 pmap_align = align; 1170 pmap_offset = 0; 1171 } 1172 } 1173 1174 /* Decode parameters. */ 1175 prot = UVM_PROTECTION(flags); 1176 maxprot = UVM_MAXPROTECTION(flags); 1177 advice = UVM_ADVICE(flags); 1178 inherit = UVM_INHERIT(flags); 1179 error = 0; 1180 hint = trunc_page(*addr); 1181 TAILQ_INIT(&dead); 1182 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1183 KASSERT((align & (align - 1)) == 0); 1184 1185 /* Holes are incompatible with other types of mappings. */ 1186 if (flags & UVM_FLAG_HOLE) { 1187 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 1188 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 1189 } 1190 1191 /* Unset hint for kernel_map non-fixed allocations. */ 1192 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1193 hint = 0; 1194 1195 /* Check protection. */ 1196 if ((prot & maxprot) != prot) 1197 return EACCES; 1198 1199 if (map == kernel_map && 1200 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1201 panic("uvm_map: kernel map W^X violation requested"); 1202 1203 /* 1204 * Before grabbing the lock, allocate a map entry for later 1205 * use to ensure we don't wait for memory while holding the 1206 * vm_map_lock. 1207 */ 1208 new = uvm_mapent_alloc(map, flags); 1209 if (new == NULL) 1210 return(ENOMEM); 1211 1212 if (flags & UVM_FLAG_TRYLOCK) { 1213 if (vm_map_lock_try(map) == FALSE) { 1214 error = EFAULT; 1215 goto out; 1216 } 1217 } else { 1218 vm_map_lock(map); 1219 } 1220 1221 first = last = NULL; 1222 if (flags & UVM_FLAG_FIXED) { 1223 /* 1224 * Fixed location. 1225 * 1226 * Note: we ignore align, pmap_prefer. 1227 * Fill in first, last and *addr. 1228 */ 1229 KASSERT((*addr & PAGE_MASK) == 0); 1230 1231 /* 1232 * Grow pmap to include allocated address. 1233 * If the growth fails, the allocation will fail too. 1234 */ 1235 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1236 uvm_maxkaddr < (*addr + sz)) { 1237 uvm_map_kmem_grow(map, &dead, 1238 *addr + sz - uvm_maxkaddr, flags); 1239 } 1240 1241 /* Check that the space is available. */ 1242 if (flags & UVM_FLAG_UNMAP) 1243 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1244 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1245 error = ENOMEM; 1246 goto unlock; 1247 } 1248 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1249 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1250 (align == 0 || (*addr & (align - 1)) == 0) && 1251 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1252 /* 1253 * Address used as hint. 1254 * 1255 * Note: we enforce the alignment restriction, 1256 * but ignore pmap_prefer. 1257 */ 1258 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1259 /* Run selection algorithm for executables. */ 1260 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1261 addr, sz, pmap_align, pmap_offset, prot, hint); 1262 1263 /* Grow kernel memory and try again. */ 1264 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1265 uvm_map_kmem_grow(map, &dead, sz, flags); 1266 1267 error = uvm_addr_invoke(map, map->uaddr_exe, 1268 &first, &last, addr, sz, 1269 pmap_align, pmap_offset, prot, hint); 1270 } 1271 1272 if (error != 0) 1273 goto unlock; 1274 } else { 1275 /* Update freelists from vmspace. */ 1276 if (map->flags & VM_MAP_ISVMSPACE) 1277 uvm_map_vmspace_update(map, &dead, flags); 1278 1279 error = uvm_map_findspace(map, &first, &last, addr, sz, 1280 pmap_align, pmap_offset, prot, hint); 1281 1282 /* Grow kernel memory and try again. */ 1283 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1284 uvm_map_kmem_grow(map, &dead, sz, flags); 1285 1286 error = uvm_map_findspace(map, &first, &last, addr, sz, 1287 pmap_align, pmap_offset, prot, hint); 1288 } 1289 1290 if (error != 0) 1291 goto unlock; 1292 } 1293 1294 /* Double-check if selected address doesn't cause overflow. */ 1295 if (*addr + sz < *addr) { 1296 error = ENOMEM; 1297 goto unlock; 1298 } 1299 1300 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1301 uvm_maxkaddr >= *addr + sz); 1302 1303 /* If we only want a query, return now. */ 1304 if (flags & UVM_FLAG_QUERY) { 1305 error = 0; 1306 goto unlock; 1307 } 1308 1309 if (uobj == NULL) 1310 uoffset = 0; 1311 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1312 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1313 uoffset = *addr - vm_map_min(kernel_map); 1314 } 1315 1316 /* 1317 * Create new entry. 1318 * first and last may be invalidated after this call. 1319 */ 1320 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1321 new); 1322 if (entry == NULL) { 1323 error = ENOMEM; 1324 goto unlock; 1325 } 1326 new = NULL; 1327 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1328 entry->object.uvm_obj = uobj; 1329 entry->offset = uoffset; 1330 entry->protection = prot; 1331 entry->max_protection = maxprot; 1332 entry->inheritance = inherit; 1333 entry->wired_count = 0; 1334 entry->advice = advice; 1335 if (flags & UVM_FLAG_STACK) { 1336 entry->etype |= UVM_ET_STACK; 1337 if (flags & UVM_FLAG_UNMAP) 1338 map->serial++; 1339 } 1340 if (uobj) 1341 entry->etype |= UVM_ET_OBJ; 1342 else if (flags & UVM_FLAG_HOLE) 1343 entry->etype |= UVM_ET_HOLE; 1344 if (flags & UVM_FLAG_NOFAULT) 1345 entry->etype |= UVM_ET_NOFAULT; 1346 if (flags & UVM_FLAG_WC) 1347 entry->etype |= UVM_ET_WC; 1348 if (flags & UVM_FLAG_COPYONW) { 1349 entry->etype |= UVM_ET_COPYONWRITE; 1350 if ((flags & UVM_FLAG_OVERLAY) == 0) 1351 entry->etype |= UVM_ET_NEEDSCOPY; 1352 } 1353 if (flags & UVM_FLAG_OVERLAY) { 1354 entry->aref.ar_pageoff = 0; 1355 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1356 } 1357 1358 /* Update map and process statistics. */ 1359 if (!(flags & UVM_FLAG_HOLE)) { 1360 map->size += sz; 1361 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) { 1362 ((struct vmspace *)map)->vm_dused += 1363 uvmspace_dused(map, *addr, *addr + sz); 1364 } 1365 } 1366 1367 /* 1368 * Try to merge entry. 1369 * 1370 * Userland allocations are kept separated most of the time. 1371 * Forego the effort of merging what most of the time can't be merged 1372 * and only try the merge if it concerns a kernel entry. 1373 */ 1374 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1375 (map->flags & VM_MAP_ISVMSPACE) == 0) 1376 uvm_mapent_tryjoin(map, entry, &dead); 1377 1378 unlock: 1379 vm_map_unlock(map); 1380 1381 /* 1382 * Remove dead entries. 1383 * 1384 * Dead entries may be the result of merging. 1385 * uvm_map_mkentry may also create dead entries, when it attempts to 1386 * destroy free-space entries. 1387 */ 1388 if (map->flags & VM_MAP_INTRSAFE) 1389 uvm_unmap_detach_intrsafe(&dead); 1390 else 1391 uvm_unmap_detach(&dead, 0); 1392 out: 1393 if (new) 1394 uvm_mapent_free(new); 1395 return error; 1396 } 1397 1398 /* 1399 * True iff e1 and e2 can be joined together. 1400 */ 1401 int 1402 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1403 struct vm_map_entry *e2) 1404 { 1405 KDASSERT(e1 != NULL && e2 != NULL); 1406 1407 /* Must be the same entry type and not have free memory between. */ 1408 if (e1->etype != e2->etype || e1->end != e2->start) 1409 return 0; 1410 1411 /* Submaps are never joined. */ 1412 if (UVM_ET_ISSUBMAP(e1)) 1413 return 0; 1414 1415 /* Never merge wired memory. */ 1416 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1417 return 0; 1418 1419 /* Protection, inheritance and advice must be equal. */ 1420 if (e1->protection != e2->protection || 1421 e1->max_protection != e2->max_protection || 1422 e1->inheritance != e2->inheritance || 1423 e1->advice != e2->advice) 1424 return 0; 1425 1426 /* If uvm_object: object itself and offsets within object must match. */ 1427 if (UVM_ET_ISOBJ(e1)) { 1428 if (e1->object.uvm_obj != e2->object.uvm_obj) 1429 return 0; 1430 if (e1->offset + (e1->end - e1->start) != e2->offset) 1431 return 0; 1432 } 1433 1434 /* 1435 * Cannot join shared amaps. 1436 * Note: no need to lock amap to look at refs, since we don't care 1437 * about its exact value. 1438 * If it is 1 (i.e. we have the only reference) it will stay there. 1439 */ 1440 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1441 return 0; 1442 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1443 return 0; 1444 1445 /* Apprently, e1 and e2 match. */ 1446 return 1; 1447 } 1448 1449 /* 1450 * Join support function. 1451 * 1452 * Returns the merged entry on succes. 1453 * Returns NULL if the merge failed. 1454 */ 1455 struct vm_map_entry* 1456 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1457 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1458 { 1459 struct uvm_addr_state *free; 1460 1461 /* 1462 * Merging is not supported for map entries that 1463 * contain an amap in e1. This should never happen 1464 * anyway, because only kernel entries are merged. 1465 * These do not contain amaps. 1466 * e2 contains no real information in its amap, 1467 * so it can be erased immediately. 1468 */ 1469 KASSERT(e1->aref.ar_amap == NULL); 1470 1471 /* 1472 * Don't drop obj reference: 1473 * uvm_unmap_detach will do this for us. 1474 */ 1475 free = uvm_map_uaddr_e(map, e1); 1476 uvm_mapent_free_remove(map, free, e1); 1477 1478 free = uvm_map_uaddr_e(map, e2); 1479 uvm_mapent_free_remove(map, free, e2); 1480 uvm_mapent_addr_remove(map, e2); 1481 e1->end = e2->end; 1482 e1->guard = e2->guard; 1483 e1->fspace = e2->fspace; 1484 uvm_mapent_free_insert(map, free, e1); 1485 1486 DEAD_ENTRY_PUSH(dead, e2); 1487 return e1; 1488 } 1489 1490 /* 1491 * Attempt forward and backward joining of entry. 1492 * 1493 * Returns entry after joins. 1494 * We are guaranteed that the amap of entry is either non-existent or 1495 * has never been used. 1496 */ 1497 struct vm_map_entry* 1498 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1499 struct uvm_map_deadq *dead) 1500 { 1501 struct vm_map_entry *other; 1502 struct vm_map_entry *merged; 1503 1504 /* Merge with previous entry. */ 1505 other = RBT_PREV(uvm_map_addr, entry); 1506 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1507 merged = uvm_mapent_merge(map, other, entry, dead); 1508 if (merged) 1509 entry = merged; 1510 } 1511 1512 /* 1513 * Merge with next entry. 1514 * 1515 * Because amap can only extend forward and the next entry 1516 * probably contains sensible info, only perform forward merging 1517 * in the absence of an amap. 1518 */ 1519 other = RBT_NEXT(uvm_map_addr, entry); 1520 if (other && entry->aref.ar_amap == NULL && 1521 other->aref.ar_amap == NULL && 1522 uvm_mapent_isjoinable(map, entry, other)) { 1523 merged = uvm_mapent_merge(map, entry, other, dead); 1524 if (merged) 1525 entry = merged; 1526 } 1527 1528 return entry; 1529 } 1530 1531 /* 1532 * Kill entries that are no longer in a map. 1533 */ 1534 void 1535 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1536 { 1537 struct vm_map_entry *entry; 1538 int waitok = flags & UVM_PLA_WAITOK; 1539 1540 if (TAILQ_EMPTY(deadq)) 1541 return; 1542 1543 KERNEL_LOCK(); 1544 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1545 if (waitok) 1546 uvm_pause(); 1547 /* Drop reference to amap, if we've got one. */ 1548 if (entry->aref.ar_amap) 1549 amap_unref(entry->aref.ar_amap, 1550 entry->aref.ar_pageoff, 1551 atop(entry->end - entry->start), 1552 flags & AMAP_REFALL); 1553 1554 /* Drop reference to our backing object, if we've got one. */ 1555 if (UVM_ET_ISSUBMAP(entry)) { 1556 /* ... unlikely to happen, but play it safe */ 1557 uvm_map_deallocate(entry->object.sub_map); 1558 } else if (UVM_ET_ISOBJ(entry) && 1559 entry->object.uvm_obj->pgops->pgo_detach) { 1560 entry->object.uvm_obj->pgops->pgo_detach( 1561 entry->object.uvm_obj); 1562 } 1563 1564 /* Step to next. */ 1565 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1566 uvm_mapent_free(entry); 1567 } 1568 KERNEL_UNLOCK(); 1569 } 1570 1571 void 1572 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq) 1573 { 1574 struct vm_map_entry *entry; 1575 1576 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1577 KASSERT(entry->aref.ar_amap == NULL); 1578 KASSERT(!UVM_ET_ISSUBMAP(entry)); 1579 KASSERT(!UVM_ET_ISOBJ(entry)); 1580 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1581 uvm_mapent_free(entry); 1582 } 1583 } 1584 1585 /* 1586 * Create and insert new entry. 1587 * 1588 * Returned entry contains new addresses and is inserted properly in the tree. 1589 * first and last are (probably) no longer valid. 1590 */ 1591 struct vm_map_entry* 1592 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1593 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1594 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1595 { 1596 struct vm_map_entry *entry, *prev; 1597 struct uvm_addr_state *free; 1598 vaddr_t min, max; /* free space boundaries for new entry */ 1599 1600 KDASSERT(map != NULL); 1601 KDASSERT(first != NULL); 1602 KDASSERT(last != NULL); 1603 KDASSERT(dead != NULL); 1604 KDASSERT(sz > 0); 1605 KDASSERT(addr + sz > addr); 1606 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1607 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1608 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1609 uvm_tree_sanity(map, __FILE__, __LINE__); 1610 1611 min = addr + sz; 1612 max = VMMAP_FREE_END(last); 1613 1614 /* Initialize new entry. */ 1615 if (new == NULL) 1616 entry = uvm_mapent_alloc(map, flags); 1617 else 1618 entry = new; 1619 if (entry == NULL) 1620 return NULL; 1621 entry->offset = 0; 1622 entry->etype = 0; 1623 entry->wired_count = 0; 1624 entry->aref.ar_pageoff = 0; 1625 entry->aref.ar_amap = NULL; 1626 1627 entry->start = addr; 1628 entry->end = min; 1629 entry->guard = 0; 1630 entry->fspace = 0; 1631 1632 /* Reset free space in first. */ 1633 free = uvm_map_uaddr_e(map, first); 1634 uvm_mapent_free_remove(map, free, first); 1635 first->guard = 0; 1636 first->fspace = 0; 1637 1638 /* 1639 * Remove all entries that are fully replaced. 1640 * We are iterating using last in reverse order. 1641 */ 1642 for (; first != last; last = prev) { 1643 prev = RBT_PREV(uvm_map_addr, last); 1644 1645 KDASSERT(last->start == last->end); 1646 free = uvm_map_uaddr_e(map, last); 1647 uvm_mapent_free_remove(map, free, last); 1648 uvm_mapent_addr_remove(map, last); 1649 DEAD_ENTRY_PUSH(dead, last); 1650 } 1651 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1652 if (first->start == addr) { 1653 uvm_mapent_addr_remove(map, first); 1654 DEAD_ENTRY_PUSH(dead, first); 1655 } else { 1656 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1657 addr, flags); 1658 } 1659 1660 /* Finally, link in entry. */ 1661 uvm_mapent_addr_insert(map, entry); 1662 uvm_map_fix_space(map, entry, min, max, flags); 1663 1664 uvm_tree_sanity(map, __FILE__, __LINE__); 1665 return entry; 1666 } 1667 1668 1669 /* 1670 * uvm_mapent_alloc: allocate a map entry 1671 */ 1672 struct vm_map_entry * 1673 uvm_mapent_alloc(struct vm_map *map, int flags) 1674 { 1675 struct vm_map_entry *me, *ne; 1676 int pool_flags; 1677 int i; 1678 1679 pool_flags = PR_WAITOK; 1680 if (flags & UVM_FLAG_TRYLOCK) 1681 pool_flags = PR_NOWAIT; 1682 1683 if (map->flags & VM_MAP_INTRSAFE || cold) { 1684 mtx_enter(&uvm_kmapent_mtx); 1685 if (SLIST_EMPTY(&uvm.kentry_free)) { 1686 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1687 &kd_nowait); 1688 if (ne == NULL) 1689 panic("uvm_mapent_alloc: cannot allocate map " 1690 "entry"); 1691 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) { 1692 SLIST_INSERT_HEAD(&uvm.kentry_free, 1693 &ne[i], daddrs.addr_kentry); 1694 } 1695 if (ratecheck(&uvm_kmapent_last_warn_time, 1696 &uvm_kmapent_warn_rate)) 1697 printf("uvm_mapent_alloc: out of static " 1698 "map entries\n"); 1699 } 1700 me = SLIST_FIRST(&uvm.kentry_free); 1701 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry); 1702 uvmexp.kmapent++; 1703 mtx_leave(&uvm_kmapent_mtx); 1704 me->flags = UVM_MAP_STATIC; 1705 } else if (map == kernel_map) { 1706 splassert(IPL_NONE); 1707 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1708 if (me == NULL) 1709 goto out; 1710 me->flags = UVM_MAP_KMEM; 1711 } else { 1712 splassert(IPL_NONE); 1713 me = pool_get(&uvm_map_entry_pool, pool_flags); 1714 if (me == NULL) 1715 goto out; 1716 me->flags = 0; 1717 } 1718 1719 if (me != NULL) { 1720 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF); 1721 } 1722 1723 out: 1724 return(me); 1725 } 1726 1727 /* 1728 * uvm_mapent_free: free map entry 1729 * 1730 * => XXX: static pool for kernel map? 1731 */ 1732 void 1733 uvm_mapent_free(struct vm_map_entry *me) 1734 { 1735 if (me->flags & UVM_MAP_STATIC) { 1736 mtx_enter(&uvm_kmapent_mtx); 1737 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry); 1738 uvmexp.kmapent--; 1739 mtx_leave(&uvm_kmapent_mtx); 1740 } else if (me->flags & UVM_MAP_KMEM) { 1741 splassert(IPL_NONE); 1742 pool_put(&uvm_map_entry_kmem_pool, me); 1743 } else { 1744 splassert(IPL_NONE); 1745 pool_put(&uvm_map_entry_pool, me); 1746 } 1747 } 1748 1749 /* 1750 * uvm_map_lookup_entry: find map entry at or before an address. 1751 * 1752 * => map must at least be read-locked by caller 1753 * => entry is returned in "entry" 1754 * => return value is true if address is in the returned entry 1755 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1756 * returned for those mappings. 1757 */ 1758 boolean_t 1759 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1760 struct vm_map_entry **entry) 1761 { 1762 *entry = uvm_map_entrybyaddr(&map->addr, address); 1763 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1764 (*entry)->start <= address && (*entry)->end > address; 1765 } 1766 1767 /* 1768 * Inside a vm_map find the sp address and verify MAP_STACK, and also 1769 * remember low and high regions of that of region which is marked 1770 * with MAP_STACK. Return TRUE. 1771 * If sp isn't in a MAP_STACK region return FALSE. 1772 */ 1773 boolean_t 1774 uvm_map_check_stack_range(struct proc *p, vaddr_t sp) 1775 { 1776 vm_map_t map = &p->p_vmspace->vm_map; 1777 vm_map_entry_t entry; 1778 1779 if (sp < map->min_offset || sp >= map->max_offset) 1780 return(FALSE); 1781 1782 /* lock map */ 1783 vm_map_lock_read(map); 1784 1785 /* lookup */ 1786 if (!uvm_map_lookup_entry(map, trunc_page(sp), &entry)) { 1787 vm_map_unlock_read(map); 1788 return(FALSE); 1789 } 1790 1791 if ((entry->etype & UVM_ET_STACK) == 0) { 1792 vm_map_unlock_read(map); 1793 return (FALSE); 1794 } 1795 p->p_spstart = entry->start; 1796 p->p_spend = entry->end; 1797 p->p_spserial = map->serial; 1798 vm_map_unlock_read(map); 1799 return(TRUE); 1800 } 1801 1802 /* 1803 * Check whether the given address range can be converted to a MAP_STACK 1804 * mapping. 1805 * 1806 * Must be called with map locked. 1807 */ 1808 boolean_t 1809 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz) 1810 { 1811 vaddr_t end = addr + sz; 1812 struct vm_map_entry *first, *iter, *prev = NULL; 1813 1814 if (!uvm_map_lookup_entry(map, addr, &first)) { 1815 printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n", 1816 addr, end, map); 1817 return FALSE; 1818 } 1819 1820 /* 1821 * Check that the address range exists and is contiguous. 1822 */ 1823 for (iter = first; iter != NULL && iter->start < end; 1824 prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) { 1825 /* 1826 * Make sure that we do not have holes in the range. 1827 */ 1828 #if 0 1829 if (prev != NULL) { 1830 printf("prev->start 0x%lx, prev->end 0x%lx, " 1831 "iter->start 0x%lx, iter->end 0x%lx\n", 1832 prev->start, prev->end, iter->start, iter->end); 1833 } 1834 #endif 1835 1836 if (prev != NULL && prev->end != iter->start) { 1837 printf("map stack 0x%lx-0x%lx of map %p failed: " 1838 "hole in range\n", addr, end, map); 1839 return FALSE; 1840 } 1841 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) { 1842 printf("map stack 0x%lx-0x%lx of map %p failed: " 1843 "hole in range\n", addr, end, map); 1844 return FALSE; 1845 } 1846 } 1847 1848 return TRUE; 1849 } 1850 1851 /* 1852 * Remap the middle-pages of an existing mapping as a stack range. 1853 * If there exists a previous contiguous mapping with the given range 1854 * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the 1855 * mapping is dropped, and a new anon mapping is created and marked as 1856 * a stack. 1857 * 1858 * Must be called with map unlocked. 1859 */ 1860 int 1861 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz) 1862 { 1863 vm_map_t map = &p->p_vmspace->vm_map; 1864 vaddr_t start, end; 1865 int error; 1866 int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, 1867 PROT_READ | PROT_WRITE | PROT_EXEC, 1868 MAP_INHERIT_COPY, MADV_NORMAL, 1869 UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP | 1870 UVM_FLAG_COPYONW); 1871 1872 start = round_page(addr); 1873 end = trunc_page(addr + sz); 1874 #ifdef MACHINE_STACK_GROWS_UP 1875 if (end == addr + sz) 1876 end -= PAGE_SIZE; 1877 #else 1878 if (start == addr) 1879 start += PAGE_SIZE; 1880 #endif 1881 1882 if (start < map->min_offset || end >= map->max_offset || end < start) 1883 return EINVAL; 1884 1885 error = uvm_mapanon(map, &start, end - start, 0, flags); 1886 if (error != 0) 1887 printf("map stack for pid %d failed\n", p->p_p->ps_pid); 1888 1889 return error; 1890 } 1891 1892 /* 1893 * uvm_map_pie: return a random load address for a PIE executable 1894 * properly aligned. 1895 */ 1896 #ifndef VM_PIE_MAX_ADDR 1897 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1898 #endif 1899 1900 #ifndef VM_PIE_MIN_ADDR 1901 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1902 #endif 1903 1904 #ifndef VM_PIE_MIN_ALIGN 1905 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1906 #endif 1907 1908 vaddr_t 1909 uvm_map_pie(vaddr_t align) 1910 { 1911 vaddr_t addr, space, min; 1912 1913 align = MAX(align, VM_PIE_MIN_ALIGN); 1914 1915 /* round up to next alignment */ 1916 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1917 1918 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 1919 return (align); 1920 1921 space = (VM_PIE_MAX_ADDR - min) / align; 1922 space = MIN(space, (u_int32_t)-1); 1923 1924 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 1925 addr += min; 1926 1927 return (addr); 1928 } 1929 1930 void 1931 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 1932 { 1933 struct uvm_map_deadq dead; 1934 1935 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 1936 (end & (vaddr_t)PAGE_MASK) == 0); 1937 TAILQ_INIT(&dead); 1938 vm_map_lock(map); 1939 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); 1940 vm_map_unlock(map); 1941 1942 if (map->flags & VM_MAP_INTRSAFE) 1943 uvm_unmap_detach_intrsafe(&dead); 1944 else 1945 uvm_unmap_detach(&dead, 0); 1946 } 1947 1948 /* 1949 * Mark entry as free. 1950 * 1951 * entry will be put on the dead list. 1952 * The free space will be merged into the previous or a new entry, 1953 * unless markfree is false. 1954 */ 1955 void 1956 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 1957 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 1958 boolean_t markfree) 1959 { 1960 struct uvm_addr_state *free; 1961 struct vm_map_entry *prev; 1962 vaddr_t addr; /* Start of freed range. */ 1963 vaddr_t end; /* End of freed range. */ 1964 1965 prev = *prev_ptr; 1966 if (prev == entry) 1967 *prev_ptr = prev = NULL; 1968 1969 if (prev == NULL || 1970 VMMAP_FREE_END(prev) != entry->start) 1971 prev = RBT_PREV(uvm_map_addr, entry); 1972 1973 /* Entry is describing only free memory and has nothing to drain into. */ 1974 if (prev == NULL && entry->start == entry->end && markfree) { 1975 *prev_ptr = entry; 1976 return; 1977 } 1978 1979 addr = entry->start; 1980 end = VMMAP_FREE_END(entry); 1981 free = uvm_map_uaddr_e(map, entry); 1982 uvm_mapent_free_remove(map, free, entry); 1983 uvm_mapent_addr_remove(map, entry); 1984 DEAD_ENTRY_PUSH(dead, entry); 1985 1986 if (markfree) { 1987 if (prev) { 1988 free = uvm_map_uaddr_e(map, prev); 1989 uvm_mapent_free_remove(map, free, prev); 1990 } 1991 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 1992 } 1993 } 1994 1995 /* 1996 * Unwire and release referenced amap and object from map entry. 1997 */ 1998 void 1999 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 2000 { 2001 /* Unwire removed map entry. */ 2002 if (VM_MAPENT_ISWIRED(entry)) { 2003 KERNEL_LOCK(); 2004 entry->wired_count = 0; 2005 uvm_fault_unwire_locked(map, entry->start, entry->end); 2006 KERNEL_UNLOCK(); 2007 } 2008 2009 /* Entry-type specific code. */ 2010 if (UVM_ET_ISHOLE(entry)) { 2011 /* Nothing to be done for holes. */ 2012 } else if (map->flags & VM_MAP_INTRSAFE) { 2013 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2014 uvm_km_pgremove_intrsafe(entry->start, entry->end); 2015 pmap_kremove(entry->start, entry->end - entry->start); 2016 } else if (UVM_ET_ISOBJ(entry) && 2017 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 2018 KASSERT(vm_map_pmap(map) == pmap_kernel()); 2019 /* 2020 * Note: kernel object mappings are currently used in 2021 * two ways: 2022 * [1] "normal" mappings of pages in the kernel object 2023 * [2] uvm_km_valloc'd allocations in which we 2024 * pmap_enter in some non-kernel-object page 2025 * (e.g. vmapbuf). 2026 * 2027 * for case [1], we need to remove the mapping from 2028 * the pmap and then remove the page from the kernel 2029 * object (because, once pages in a kernel object are 2030 * unmapped they are no longer needed, unlike, say, 2031 * a vnode where you might want the data to persist 2032 * until flushed out of a queue). 2033 * 2034 * for case [2], we need to remove the mapping from 2035 * the pmap. there shouldn't be any pages at the 2036 * specified offset in the kernel object [but it 2037 * doesn't hurt to call uvm_km_pgremove just to be 2038 * safe?] 2039 * 2040 * uvm_km_pgremove currently does the following: 2041 * for pages in the kernel object range: 2042 * - drops the swap slot 2043 * - uvm_pagefree the page 2044 * 2045 * note there is version of uvm_km_pgremove() that 2046 * is used for "intrsafe" objects. 2047 */ 2048 /* 2049 * remove mappings from pmap and drop the pages 2050 * from the object. offsets are always relative 2051 * to vm_map_min(kernel_map). 2052 */ 2053 pmap_remove(pmap_kernel(), entry->start, entry->end); 2054 uvm_km_pgremove(entry->object.uvm_obj, 2055 entry->start - vm_map_min(kernel_map), 2056 entry->end - vm_map_min(kernel_map)); 2057 2058 /* 2059 * null out kernel_object reference, we've just 2060 * dropped it 2061 */ 2062 entry->etype &= ~UVM_ET_OBJ; 2063 entry->object.uvm_obj = NULL; /* to be safe */ 2064 } else { 2065 /* remove mappings the standard way. */ 2066 pmap_remove(map->pmap, entry->start, entry->end); 2067 } 2068 } 2069 2070 /* 2071 * Remove all entries from start to end. 2072 * 2073 * If remove_holes, then remove ET_HOLE entries as well. 2074 * If markfree, entry will be properly marked free, otherwise, no replacement 2075 * entry will be put in the tree (corrupting the tree). 2076 */ 2077 void 2078 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 2079 struct uvm_map_deadq *dead, boolean_t remove_holes, 2080 boolean_t markfree) 2081 { 2082 struct vm_map_entry *prev_hint, *next, *entry; 2083 2084 start = MAX(start, map->min_offset); 2085 end = MIN(end, map->max_offset); 2086 if (start >= end) 2087 return; 2088 2089 if ((map->flags & VM_MAP_INTRSAFE) == 0) 2090 splassert(IPL_NONE); 2091 else 2092 splassert(IPL_VM); 2093 2094 /* Find first affected entry. */ 2095 entry = uvm_map_entrybyaddr(&map->addr, start); 2096 KDASSERT(entry != NULL && entry->start <= start); 2097 if (entry->end <= start && markfree) 2098 entry = RBT_NEXT(uvm_map_addr, entry); 2099 else 2100 UVM_MAP_CLIP_START(map, entry, start); 2101 2102 /* 2103 * Iterate entries until we reach end address. 2104 * prev_hint hints where the freed space can be appended to. 2105 */ 2106 prev_hint = NULL; 2107 for (; entry != NULL && entry->start < end; entry = next) { 2108 KDASSERT(entry->start >= start); 2109 if (entry->end > end || !markfree) 2110 UVM_MAP_CLIP_END(map, entry, end); 2111 KDASSERT(entry->start >= start && entry->end <= end); 2112 next = RBT_NEXT(uvm_map_addr, entry); 2113 2114 /* Don't remove holes unless asked to do so. */ 2115 if (UVM_ET_ISHOLE(entry)) { 2116 if (!remove_holes) { 2117 prev_hint = entry; 2118 continue; 2119 } 2120 } 2121 2122 /* A stack has been removed.. */ 2123 if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE)) 2124 map->serial++; 2125 2126 /* Kill entry. */ 2127 uvm_unmap_kill_entry(map, entry); 2128 2129 /* Update space usage. */ 2130 if ((map->flags & VM_MAP_ISVMSPACE) && 2131 entry->object.uvm_obj == NULL && 2132 !UVM_ET_ISHOLE(entry)) { 2133 ((struct vmspace *)map)->vm_dused -= 2134 uvmspace_dused(map, entry->start, entry->end); 2135 } 2136 if (!UVM_ET_ISHOLE(entry)) 2137 map->size -= entry->end - entry->start; 2138 2139 /* Actual removal of entry. */ 2140 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 2141 } 2142 2143 pmap_update(vm_map_pmap(map)); 2144 2145 #ifdef VMMAP_DEBUG 2146 if (markfree) { 2147 for (entry = uvm_map_entrybyaddr(&map->addr, start); 2148 entry != NULL && entry->start < end; 2149 entry = RBT_NEXT(uvm_map_addr, entry)) { 2150 KDASSERT(entry->end <= start || 2151 entry->start == entry->end || 2152 UVM_ET_ISHOLE(entry)); 2153 } 2154 } else { 2155 vaddr_t a; 2156 for (a = start; a < end; a += PAGE_SIZE) 2157 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 2158 } 2159 #endif 2160 } 2161 2162 /* 2163 * Mark all entries from first until end (exclusive) as pageable. 2164 * 2165 * Lock must be exclusive on entry and will not be touched. 2166 */ 2167 void 2168 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 2169 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 2170 { 2171 struct vm_map_entry *iter; 2172 2173 for (iter = first; iter != end; 2174 iter = RBT_NEXT(uvm_map_addr, iter)) { 2175 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2176 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2177 continue; 2178 2179 iter->wired_count = 0; 2180 uvm_fault_unwire_locked(map, iter->start, iter->end); 2181 } 2182 } 2183 2184 /* 2185 * Mark all entries from first until end (exclusive) as wired. 2186 * 2187 * Lockflags determines the lock state on return from this function. 2188 * Lock must be exclusive on entry. 2189 */ 2190 int 2191 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 2192 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 2193 int lockflags) 2194 { 2195 struct vm_map_entry *iter; 2196 #ifdef DIAGNOSTIC 2197 unsigned int timestamp_save; 2198 #endif 2199 int error; 2200 2201 /* 2202 * Wire pages in two passes: 2203 * 2204 * 1: holding the write lock, we create any anonymous maps that need 2205 * to be created. then we clip each map entry to the region to 2206 * be wired and increment its wiring count. 2207 * 2208 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2209 * in the pages for any newly wired area (wired_count == 1). 2210 * 2211 * downgrading to a read lock for uvm_fault_wire avoids a possible 2212 * deadlock with another thread that may have faulted on one of 2213 * the pages to be wired (it would mark the page busy, blocking 2214 * us, then in turn block on the map lock that we hold). 2215 * because we keep the read lock on the map, the copy-on-write 2216 * status of the entries we modify here cannot change. 2217 */ 2218 for (iter = first; iter != end; 2219 iter = RBT_NEXT(uvm_map_addr, iter)) { 2220 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2221 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2222 iter->protection == PROT_NONE) 2223 continue; 2224 2225 /* 2226 * Perform actions of vm_map_lookup that need the write lock. 2227 * - create an anonymous map for copy-on-write 2228 * - anonymous map for zero-fill 2229 * Skip submaps. 2230 */ 2231 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 2232 UVM_ET_ISNEEDSCOPY(iter) && 2233 ((iter->protection & PROT_WRITE) || 2234 iter->object.uvm_obj == NULL)) { 2235 amap_copy(map, iter, M_WAITOK, 2236 UVM_ET_ISSTACK(iter) ? FALSE : TRUE, 2237 iter->start, iter->end); 2238 } 2239 iter->wired_count++; 2240 } 2241 2242 /* 2243 * Pass 2. 2244 */ 2245 #ifdef DIAGNOSTIC 2246 timestamp_save = map->timestamp; 2247 #endif 2248 vm_map_busy(map); 2249 vm_map_downgrade(map); 2250 2251 error = 0; 2252 for (iter = first; error == 0 && iter != end; 2253 iter = RBT_NEXT(uvm_map_addr, iter)) { 2254 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2255 iter->protection == PROT_NONE) 2256 continue; 2257 2258 error = uvm_fault_wire(map, iter->start, iter->end, 2259 iter->protection); 2260 } 2261 2262 if (error) { 2263 /* 2264 * uvm_fault_wire failure 2265 * 2266 * Reacquire lock and undo our work. 2267 */ 2268 vm_map_upgrade(map); 2269 vm_map_unbusy(map); 2270 #ifdef DIAGNOSTIC 2271 if (timestamp_save != map->timestamp) 2272 panic("uvm_map_pageable_wire: stale map"); 2273 #endif 2274 2275 /* 2276 * first is no longer needed to restart loops. 2277 * Use it as iterator to unmap successful mappings. 2278 */ 2279 for (; first != iter; 2280 first = RBT_NEXT(uvm_map_addr, first)) { 2281 if (UVM_ET_ISHOLE(first) || 2282 first->start == first->end || 2283 first->protection == PROT_NONE) 2284 continue; 2285 2286 first->wired_count--; 2287 if (!VM_MAPENT_ISWIRED(first)) { 2288 uvm_fault_unwire_locked(map, 2289 iter->start, iter->end); 2290 } 2291 } 2292 2293 /* decrease counter in the rest of the entries */ 2294 for (; iter != end; 2295 iter = RBT_NEXT(uvm_map_addr, iter)) { 2296 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2297 iter->protection == PROT_NONE) 2298 continue; 2299 2300 iter->wired_count--; 2301 } 2302 2303 if ((lockflags & UVM_LK_EXIT) == 0) 2304 vm_map_unlock(map); 2305 return error; 2306 } 2307 2308 /* We are currently holding a read lock. */ 2309 if ((lockflags & UVM_LK_EXIT) == 0) { 2310 vm_map_unbusy(map); 2311 vm_map_unlock_read(map); 2312 } else { 2313 vm_map_upgrade(map); 2314 vm_map_unbusy(map); 2315 #ifdef DIAGNOSTIC 2316 if (timestamp_save != map->timestamp) 2317 panic("uvm_map_pageable_wire: stale map"); 2318 #endif 2319 } 2320 return 0; 2321 } 2322 2323 /* 2324 * uvm_map_pageable: set pageability of a range in a map. 2325 * 2326 * Flags: 2327 * UVM_LK_ENTER: map is already locked by caller 2328 * UVM_LK_EXIT: don't unlock map on exit 2329 * 2330 * The full range must be in use (entries may not have fspace != 0). 2331 * UVM_ET_HOLE counts as unmapped. 2332 */ 2333 int 2334 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2335 boolean_t new_pageable, int lockflags) 2336 { 2337 struct vm_map_entry *first, *last, *tmp; 2338 int error; 2339 2340 start = trunc_page(start); 2341 end = round_page(end); 2342 2343 if (start > end) 2344 return EINVAL; 2345 if (start == end) 2346 return 0; /* nothing to do */ 2347 if (start < map->min_offset) 2348 return EFAULT; /* why? see first XXX below */ 2349 if (end > map->max_offset) 2350 return EINVAL; /* why? see second XXX below */ 2351 2352 KASSERT(map->flags & VM_MAP_PAGEABLE); 2353 if ((lockflags & UVM_LK_ENTER) == 0) 2354 vm_map_lock(map); 2355 2356 /* 2357 * Find first entry. 2358 * 2359 * Initial test on start is different, because of the different 2360 * error returned. Rest is tested further down. 2361 */ 2362 first = uvm_map_entrybyaddr(&map->addr, start); 2363 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2364 /* 2365 * XXX if the first address is not mapped, it is EFAULT? 2366 */ 2367 error = EFAULT; 2368 goto out; 2369 } 2370 2371 /* Check that the range has no holes. */ 2372 for (last = first; last != NULL && last->start < end; 2373 last = RBT_NEXT(uvm_map_addr, last)) { 2374 if (UVM_ET_ISHOLE(last) || 2375 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2376 /* 2377 * XXX unmapped memory in range, why is it EINVAL 2378 * instead of EFAULT? 2379 */ 2380 error = EINVAL; 2381 goto out; 2382 } 2383 } 2384 2385 /* 2386 * Last ended at the first entry after the range. 2387 * Move back one step. 2388 * 2389 * Note that last may be NULL. 2390 */ 2391 if (last == NULL) { 2392 last = RBT_MAX(uvm_map_addr, &map->addr); 2393 if (last->end < end) { 2394 error = EINVAL; 2395 goto out; 2396 } 2397 } else { 2398 KASSERT(last != first); 2399 last = RBT_PREV(uvm_map_addr, last); 2400 } 2401 2402 /* Wire/unwire pages here. */ 2403 if (new_pageable) { 2404 /* 2405 * Mark pageable. 2406 * entries that are not wired are untouched. 2407 */ 2408 if (VM_MAPENT_ISWIRED(first)) 2409 UVM_MAP_CLIP_START(map, first, start); 2410 /* 2411 * Split last at end. 2412 * Make tmp be the first entry after what is to be touched. 2413 * If last is not wired, don't touch it. 2414 */ 2415 if (VM_MAPENT_ISWIRED(last)) { 2416 UVM_MAP_CLIP_END(map, last, end); 2417 tmp = RBT_NEXT(uvm_map_addr, last); 2418 } else 2419 tmp = last; 2420 2421 uvm_map_pageable_pgon(map, first, tmp, start, end); 2422 error = 0; 2423 2424 out: 2425 if ((lockflags & UVM_LK_EXIT) == 0) 2426 vm_map_unlock(map); 2427 return error; 2428 } else { 2429 /* 2430 * Mark entries wired. 2431 * entries are always touched (because recovery needs this). 2432 */ 2433 if (!VM_MAPENT_ISWIRED(first)) 2434 UVM_MAP_CLIP_START(map, first, start); 2435 /* 2436 * Split last at end. 2437 * Make tmp be the first entry after what is to be touched. 2438 * If last is not wired, don't touch it. 2439 */ 2440 if (!VM_MAPENT_ISWIRED(last)) { 2441 UVM_MAP_CLIP_END(map, last, end); 2442 tmp = RBT_NEXT(uvm_map_addr, last); 2443 } else 2444 tmp = last; 2445 2446 return uvm_map_pageable_wire(map, first, tmp, start, end, 2447 lockflags); 2448 } 2449 } 2450 2451 /* 2452 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2453 * all mapped regions. 2454 * 2455 * Map must not be locked. 2456 * If no flags are specified, all ragions are unwired. 2457 */ 2458 int 2459 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2460 { 2461 vsize_t size; 2462 struct vm_map_entry *iter; 2463 2464 KASSERT(map->flags & VM_MAP_PAGEABLE); 2465 vm_map_lock(map); 2466 2467 if (flags == 0) { 2468 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr), 2469 NULL, map->min_offset, map->max_offset); 2470 2471 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2472 vm_map_unlock(map); 2473 return 0; 2474 } 2475 2476 if (flags & MCL_FUTURE) 2477 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2478 if (!(flags & MCL_CURRENT)) { 2479 vm_map_unlock(map); 2480 return 0; 2481 } 2482 2483 /* 2484 * Count number of pages in all non-wired entries. 2485 * If the number exceeds the limit, abort. 2486 */ 2487 size = 0; 2488 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2489 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2490 continue; 2491 2492 size += iter->end - iter->start; 2493 } 2494 2495 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2496 vm_map_unlock(map); 2497 return ENOMEM; 2498 } 2499 2500 /* XXX non-pmap_wired_count case must be handled by caller */ 2501 #ifdef pmap_wired_count 2502 if (limit != 0 && 2503 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2504 vm_map_unlock(map); 2505 return ENOMEM; 2506 } 2507 #endif 2508 2509 /* 2510 * uvm_map_pageable_wire will release lcok 2511 */ 2512 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr), 2513 NULL, map->min_offset, map->max_offset, 0); 2514 } 2515 2516 /* 2517 * Initialize map. 2518 * 2519 * Allocates sufficient entries to describe the free memory in the map. 2520 */ 2521 void 2522 uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags) 2523 { 2524 int i; 2525 2526 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2527 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2528 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2529 2530 /* 2531 * Update parameters. 2532 * 2533 * This code handles (vaddr_t)-1 and other page mask ending addresses 2534 * properly. 2535 * We lose the top page if the full virtual address space is used. 2536 */ 2537 if (max & (vaddr_t)PAGE_MASK) { 2538 max += 1; 2539 if (max == 0) /* overflow */ 2540 max -= PAGE_SIZE; 2541 } 2542 2543 RBT_INIT(uvm_map_addr, &map->addr); 2544 map->uaddr_exe = NULL; 2545 for (i = 0; i < nitems(map->uaddr_any); ++i) 2546 map->uaddr_any[i] = NULL; 2547 map->uaddr_brk_stack = NULL; 2548 2549 map->size = 0; 2550 map->ref_count = 0; 2551 map->min_offset = min; 2552 map->max_offset = max; 2553 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2554 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2555 map->flags = flags; 2556 map->timestamp = 0; 2557 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK); 2558 mtx_init(&map->mtx, IPL_VM); 2559 mtx_init(&map->flags_lock, IPL_VM); 2560 2561 /* Configure the allocators. */ 2562 if (flags & VM_MAP_ISVMSPACE) 2563 uvm_map_setup_md(map); 2564 else 2565 map->uaddr_any[3] = &uaddr_kbootstrap; 2566 2567 /* 2568 * Fill map entries. 2569 * We do not need to write-lock the map here because only the current 2570 * thread sees it right now. Initialize ref_count to 0 above to avoid 2571 * bogus triggering of lock-not-held assertions. 2572 */ 2573 uvm_map_setup_entries(map); 2574 uvm_tree_sanity(map, __FILE__, __LINE__); 2575 map->ref_count = 1; 2576 } 2577 2578 /* 2579 * Destroy the map. 2580 * 2581 * This is the inverse operation to uvm_map_setup. 2582 */ 2583 void 2584 uvm_map_teardown(struct vm_map *map) 2585 { 2586 struct uvm_map_deadq dead_entries; 2587 struct vm_map_entry *entry, *tmp; 2588 #ifdef VMMAP_DEBUG 2589 size_t numq, numt; 2590 #endif 2591 int i; 2592 2593 KERNEL_ASSERT_LOCKED(); 2594 KERNEL_UNLOCK(); 2595 KERNEL_ASSERT_UNLOCKED(); 2596 2597 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2598 2599 /* Remove address selectors. */ 2600 uvm_addr_destroy(map->uaddr_exe); 2601 map->uaddr_exe = NULL; 2602 for (i = 0; i < nitems(map->uaddr_any); i++) { 2603 uvm_addr_destroy(map->uaddr_any[i]); 2604 map->uaddr_any[i] = NULL; 2605 } 2606 uvm_addr_destroy(map->uaddr_brk_stack); 2607 map->uaddr_brk_stack = NULL; 2608 2609 /* 2610 * Remove entries. 2611 * 2612 * The following is based on graph breadth-first search. 2613 * 2614 * In color terms: 2615 * - the dead_entries set contains all nodes that are reachable 2616 * (i.e. both the black and the grey nodes) 2617 * - any entry not in dead_entries is white 2618 * - any entry that appears in dead_entries before entry, 2619 * is black, the rest is grey. 2620 * The set [entry, end] is also referred to as the wavefront. 2621 * 2622 * Since the tree is always a fully connected graph, the breadth-first 2623 * search guarantees that each vmmap_entry is visited exactly once. 2624 * The vm_map is broken down in linear time. 2625 */ 2626 TAILQ_INIT(&dead_entries); 2627 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL) 2628 DEAD_ENTRY_PUSH(&dead_entries, entry); 2629 while (entry != NULL) { 2630 sched_pause(yield); 2631 uvm_unmap_kill_entry(map, entry); 2632 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL) 2633 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2634 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 2635 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2636 /* Update wave-front. */ 2637 entry = TAILQ_NEXT(entry, dfree.deadq); 2638 } 2639 2640 #ifdef VMMAP_DEBUG 2641 numt = numq = 0; 2642 RBT_FOREACH(entry, uvm_map_addr, &map->addr) 2643 numt++; 2644 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2645 numq++; 2646 KASSERT(numt == numq); 2647 #endif 2648 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); 2649 2650 KERNEL_LOCK(); 2651 2652 pmap_destroy(map->pmap); 2653 map->pmap = NULL; 2654 } 2655 2656 /* 2657 * Populate map with free-memory entries. 2658 * 2659 * Map must be initialized and empty. 2660 */ 2661 void 2662 uvm_map_setup_entries(struct vm_map *map) 2663 { 2664 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 2665 2666 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2667 } 2668 2669 /* 2670 * Split entry at given address. 2671 * 2672 * orig: entry that is to be split. 2673 * next: a newly allocated map entry that is not linked. 2674 * split: address at which the split is done. 2675 */ 2676 void 2677 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2678 struct vm_map_entry *next, vaddr_t split) 2679 { 2680 struct uvm_addr_state *free, *free_before; 2681 vsize_t adj; 2682 2683 if ((split & PAGE_MASK) != 0) { 2684 panic("uvm_map_splitentry: split address 0x%lx " 2685 "not on page boundary!", split); 2686 } 2687 KDASSERT(map != NULL && orig != NULL && next != NULL); 2688 uvm_tree_sanity(map, __FILE__, __LINE__); 2689 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2690 2691 #ifdef VMMAP_DEBUG 2692 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig); 2693 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next); 2694 #endif /* VMMAP_DEBUG */ 2695 2696 /* 2697 * Free space will change, unlink from free space tree. 2698 */ 2699 free = uvm_map_uaddr_e(map, orig); 2700 uvm_mapent_free_remove(map, free, orig); 2701 2702 adj = split - orig->start; 2703 2704 uvm_mapent_copy(orig, next); 2705 if (split >= orig->end) { 2706 next->etype = 0; 2707 next->offset = 0; 2708 next->wired_count = 0; 2709 next->start = next->end = split; 2710 next->guard = 0; 2711 next->fspace = VMMAP_FREE_END(orig) - split; 2712 next->aref.ar_amap = NULL; 2713 next->aref.ar_pageoff = 0; 2714 orig->guard = MIN(orig->guard, split - orig->end); 2715 orig->fspace = split - VMMAP_FREE_START(orig); 2716 } else { 2717 orig->fspace = 0; 2718 orig->guard = 0; 2719 orig->end = next->start = split; 2720 2721 if (next->aref.ar_amap) { 2722 KERNEL_LOCK(); 2723 amap_splitref(&orig->aref, &next->aref, adj); 2724 KERNEL_UNLOCK(); 2725 } 2726 if (UVM_ET_ISSUBMAP(orig)) { 2727 uvm_map_reference(next->object.sub_map); 2728 next->offset += adj; 2729 } else if (UVM_ET_ISOBJ(orig)) { 2730 if (next->object.uvm_obj->pgops && 2731 next->object.uvm_obj->pgops->pgo_reference) { 2732 KERNEL_LOCK(); 2733 next->object.uvm_obj->pgops->pgo_reference( 2734 next->object.uvm_obj); 2735 KERNEL_UNLOCK(); 2736 } 2737 next->offset += adj; 2738 } 2739 } 2740 2741 /* 2742 * Link next into address tree. 2743 * Link orig and next into free-space tree. 2744 * 2745 * Don't insert 'next' into the addr tree until orig has been linked, 2746 * in case the free-list looks at adjecent entries in the addr tree 2747 * for its decisions. 2748 */ 2749 if (orig->fspace > 0) 2750 free_before = free; 2751 else 2752 free_before = uvm_map_uaddr_e(map, orig); 2753 uvm_mapent_free_insert(map, free_before, orig); 2754 uvm_mapent_addr_insert(map, next); 2755 uvm_mapent_free_insert(map, free, next); 2756 2757 uvm_tree_sanity(map, __FILE__, __LINE__); 2758 } 2759 2760 2761 #ifdef VMMAP_DEBUG 2762 2763 void 2764 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2765 char *file, int line) 2766 { 2767 char* map_special; 2768 2769 if (test) 2770 return; 2771 2772 if (map == kernel_map) 2773 map_special = " (kernel_map)"; 2774 else if (map == kmem_map) 2775 map_special = " (kmem_map)"; 2776 else 2777 map_special = ""; 2778 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2779 line, test_str); 2780 } 2781 2782 /* 2783 * Check that map is sane. 2784 */ 2785 void 2786 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2787 { 2788 struct vm_map_entry *iter; 2789 vaddr_t addr; 2790 vaddr_t min, max, bound; /* Bounds checker. */ 2791 struct uvm_addr_state *free; 2792 2793 addr = vm_map_min(map); 2794 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2795 /* 2796 * Valid start, end. 2797 * Catch overflow for end+fspace. 2798 */ 2799 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2800 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2801 2802 /* May not be empty. */ 2803 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2804 file, line); 2805 2806 /* Addresses for entry must lie within map boundaries. */ 2807 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2808 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2809 2810 /* Tree may not have gaps. */ 2811 UVM_ASSERT(map, iter->start == addr, file, line); 2812 addr = VMMAP_FREE_END(iter); 2813 2814 /* 2815 * Free space may not cross boundaries, unless the same 2816 * free list is used on both sides of the border. 2817 */ 2818 min = VMMAP_FREE_START(iter); 2819 max = VMMAP_FREE_END(iter); 2820 2821 while (min < max && 2822 (bound = uvm_map_boundary(map, min, max)) != max) { 2823 UVM_ASSERT(map, 2824 uvm_map_uaddr(map, bound - 1) == 2825 uvm_map_uaddr(map, bound), 2826 file, line); 2827 min = bound; 2828 } 2829 2830 free = uvm_map_uaddr_e(map, iter); 2831 if (free) { 2832 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2833 file, line); 2834 } else { 2835 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2836 file, line); 2837 } 2838 } 2839 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2840 } 2841 2842 void 2843 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2844 { 2845 struct vm_map_entry *iter; 2846 vsize_t size; 2847 2848 size = 0; 2849 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2850 if (!UVM_ET_ISHOLE(iter)) 2851 size += iter->end - iter->start; 2852 } 2853 2854 if (map->size != size) 2855 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2856 UVM_ASSERT(map, map->size == size, file, line); 2857 2858 vmspace_validate(map); 2859 } 2860 2861 /* 2862 * This function validates the statistics on vmspace. 2863 */ 2864 void 2865 vmspace_validate(struct vm_map *map) 2866 { 2867 struct vmspace *vm; 2868 struct vm_map_entry *iter; 2869 vaddr_t imin, imax; 2870 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2871 vsize_t stack, heap; /* Measured sizes. */ 2872 2873 if (!(map->flags & VM_MAP_ISVMSPACE)) 2874 return; 2875 2876 vm = (struct vmspace *)map; 2877 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2878 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2879 2880 stack = heap = 0; 2881 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2882 imin = imax = iter->start; 2883 2884 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL) 2885 continue; 2886 2887 /* 2888 * Update stack, heap. 2889 * Keep in mind that (theoretically) the entries of 2890 * userspace and stack may be joined. 2891 */ 2892 while (imin != iter->end) { 2893 /* 2894 * Set imax to the first boundary crossed between 2895 * imin and stack addresses. 2896 */ 2897 imax = iter->end; 2898 if (imin < stack_begin && imax > stack_begin) 2899 imax = stack_begin; 2900 else if (imin < stack_end && imax > stack_end) 2901 imax = stack_end; 2902 2903 if (imin >= stack_begin && imin < stack_end) 2904 stack += imax - imin; 2905 else 2906 heap += imax - imin; 2907 imin = imax; 2908 } 2909 } 2910 2911 heap >>= PAGE_SHIFT; 2912 if (heap != vm->vm_dused) { 2913 printf("vmspace stack range: 0x%lx-0x%lx\n", 2914 stack_begin, stack_end); 2915 panic("vmspace_validate: vmspace.vm_dused invalid, " 2916 "expected %ld pgs, got %ld pgs in map %p", 2917 heap, vm->vm_dused, 2918 map); 2919 } 2920 } 2921 2922 #endif /* VMMAP_DEBUG */ 2923 2924 /* 2925 * uvm_map_init: init mapping system at boot time. note that we allocate 2926 * and init the static pool of structs vm_map_entry for the kernel here. 2927 */ 2928 void 2929 uvm_map_init(void) 2930 { 2931 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 2932 int lcv; 2933 2934 /* now set up static pool of kernel map entries ... */ 2935 mtx_init(&uvm_kmapent_mtx, IPL_VM); 2936 SLIST_INIT(&uvm.kentry_free); 2937 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 2938 SLIST_INSERT_HEAD(&uvm.kentry_free, 2939 &kernel_map_entry[lcv], daddrs.addr_kentry); 2940 } 2941 2942 /* initialize the map-related pools. */ 2943 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0, 2944 IPL_NONE, PR_WAITOK, "vmsppl", NULL); 2945 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 2946 IPL_VM, PR_WAITOK, "vmmpepl", NULL); 2947 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0, 2948 IPL_VM, 0, "vmmpekpl", NULL); 2949 pool_sethiwat(&uvm_map_entry_pool, 8192); 2950 2951 uvm_addr_init(); 2952 } 2953 2954 #if defined(DDB) 2955 2956 /* 2957 * DDB hooks 2958 */ 2959 2960 /* 2961 * uvm_map_printit: actually prints the map 2962 */ 2963 void 2964 uvm_map_printit(struct vm_map *map, boolean_t full, 2965 int (*pr)(const char *, ...)) 2966 { 2967 struct vmspace *vm; 2968 struct vm_map_entry *entry; 2969 struct uvm_addr_state *free; 2970 int in_free, i; 2971 char buf[8]; 2972 2973 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 2974 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 2975 map->b_start, map->b_end); 2976 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 2977 map->s_start, map->s_end); 2978 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 2979 map->size, map->ref_count, map->timestamp, 2980 map->flags); 2981 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 2982 pmap_resident_count(map->pmap)); 2983 2984 /* struct vmspace handling. */ 2985 if (map->flags & VM_MAP_ISVMSPACE) { 2986 vm = (struct vmspace *)map; 2987 2988 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 2989 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 2990 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 2991 vm->vm_tsize, vm->vm_dsize); 2992 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 2993 vm->vm_taddr, vm->vm_daddr); 2994 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 2995 vm->vm_maxsaddr, vm->vm_minsaddr); 2996 } 2997 2998 if (!full) 2999 goto print_uaddr; 3000 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 3001 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 3002 entry, entry->start, entry->end, entry->object.uvm_obj, 3003 (long long)entry->offset, entry->aref.ar_amap, 3004 entry->aref.ar_pageoff); 3005 (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, prot(max)=%d/%d, inh=%d, " 3006 "wc=%d, adv=%d\n", 3007 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 3008 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 3009 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 3010 (entry->etype & UVM_ET_STACK) ? 'T' : 'F', 3011 entry->protection, entry->max_protection, 3012 entry->inheritance, entry->wired_count, entry->advice); 3013 3014 free = uvm_map_uaddr_e(map, entry); 3015 in_free = (free != NULL); 3016 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 3017 "free=0x%lx-0x%lx\n", 3018 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 3019 in_free ? 'T' : 'F', 3020 entry->guard, 3021 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 3022 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 3023 (*pr)("\tfreemapped=%c, uaddr=%p\n", 3024 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 3025 if (free) { 3026 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 3027 free->uaddr_minaddr, free->uaddr_maxaddr, 3028 free->uaddr_functions->uaddr_name); 3029 } 3030 } 3031 3032 print_uaddr: 3033 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 3034 for (i = 0; i < nitems(map->uaddr_any); i++) { 3035 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 3036 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 3037 } 3038 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 3039 } 3040 3041 /* 3042 * uvm_object_printit: actually prints the object 3043 */ 3044 void 3045 uvm_object_printit(uobj, full, pr) 3046 struct uvm_object *uobj; 3047 boolean_t full; 3048 int (*pr)(const char *, ...); 3049 { 3050 struct vm_page *pg; 3051 int cnt = 0; 3052 3053 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 3054 uobj, uobj->pgops, uobj->uo_npages); 3055 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 3056 (*pr)("refs=<SYSTEM>\n"); 3057 else 3058 (*pr)("refs=%d\n", uobj->uo_refs); 3059 3060 if (!full) { 3061 return; 3062 } 3063 (*pr)(" PAGES <pg,offset>:\n "); 3064 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) { 3065 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 3066 if ((cnt % 3) == 2) { 3067 (*pr)("\n "); 3068 } 3069 cnt++; 3070 } 3071 if ((cnt % 3) != 2) { 3072 (*pr)("\n"); 3073 } 3074 } 3075 3076 /* 3077 * uvm_page_printit: actually print the page 3078 */ 3079 static const char page_flagbits[] = 3080 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 3081 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ" 3082 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5"; 3083 3084 void 3085 uvm_page_printit(pg, full, pr) 3086 struct vm_page *pg; 3087 boolean_t full; 3088 int (*pr)(const char *, ...); 3089 { 3090 struct vm_page *tpg; 3091 struct uvm_object *uobj; 3092 struct pglist *pgl; 3093 3094 (*pr)("PAGE %p:\n", pg); 3095 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 3096 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 3097 (long long)pg->phys_addr); 3098 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 3099 pg->uobject, pg->uanon, (long long)pg->offset); 3100 #if defined(UVM_PAGE_TRKOWN) 3101 if (pg->pg_flags & PG_BUSY) 3102 (*pr)(" owning thread = %d, tag=%s", 3103 pg->owner, pg->owner_tag); 3104 else 3105 (*pr)(" page not busy, no owner"); 3106 #else 3107 (*pr)(" [page ownership tracking disabled]"); 3108 #endif 3109 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 3110 3111 if (!full) 3112 return; 3113 3114 /* cross-verify object/anon */ 3115 if ((pg->pg_flags & PQ_FREE) == 0) { 3116 if (pg->pg_flags & PQ_ANON) { 3117 if (pg->uanon == NULL || pg->uanon->an_page != pg) 3118 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 3119 (pg->uanon) ? pg->uanon->an_page : NULL); 3120 else 3121 (*pr)(" anon backpointer is OK\n"); 3122 } else { 3123 uobj = pg->uobject; 3124 if (uobj) { 3125 (*pr)(" checking object list\n"); 3126 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) { 3127 if (tpg == pg) { 3128 break; 3129 } 3130 } 3131 if (tpg) 3132 (*pr)(" page found on object list\n"); 3133 else 3134 (*pr)(" >>> PAGE NOT FOUND " 3135 "ON OBJECT LIST! <<<\n"); 3136 } 3137 } 3138 } 3139 3140 /* cross-verify page queue */ 3141 if (pg->pg_flags & PQ_FREE) { 3142 if (uvm_pmr_isfree(pg)) 3143 (*pr)(" page found in uvm_pmemrange\n"); 3144 else 3145 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 3146 pgl = NULL; 3147 } else if (pg->pg_flags & PQ_INACTIVE) { 3148 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 3149 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 3150 } else if (pg->pg_flags & PQ_ACTIVE) { 3151 pgl = &uvm.page_active; 3152 } else { 3153 pgl = NULL; 3154 } 3155 3156 if (pgl) { 3157 (*pr)(" checking pageq list\n"); 3158 TAILQ_FOREACH(tpg, pgl, pageq) { 3159 if (tpg == pg) { 3160 break; 3161 } 3162 } 3163 if (tpg) 3164 (*pr)(" page found on pageq list\n"); 3165 else 3166 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3167 } 3168 } 3169 #endif 3170 3171 /* 3172 * uvm_map_protect: change map protection 3173 * 3174 * => set_max means set max_protection. 3175 * => map must be unlocked. 3176 */ 3177 int 3178 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3179 vm_prot_t new_prot, boolean_t set_max) 3180 { 3181 struct vm_map_entry *first, *iter; 3182 vm_prot_t old_prot; 3183 vm_prot_t mask; 3184 int error; 3185 3186 if (start > end) 3187 return EINVAL; 3188 start = MAX(start, map->min_offset); 3189 end = MIN(end, map->max_offset); 3190 if (start >= end) 3191 return 0; 3192 3193 error = 0; 3194 vm_map_lock(map); 3195 3196 /* 3197 * Set up first and last. 3198 * - first will contain first entry at or after start. 3199 */ 3200 first = uvm_map_entrybyaddr(&map->addr, start); 3201 KDASSERT(first != NULL); 3202 if (first->end <= start) 3203 first = RBT_NEXT(uvm_map_addr, first); 3204 3205 /* First, check for protection violations. */ 3206 for (iter = first; iter != NULL && iter->start < end; 3207 iter = RBT_NEXT(uvm_map_addr, iter)) { 3208 /* Treat memory holes as free space. */ 3209 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3210 continue; 3211 3212 if (UVM_ET_ISSUBMAP(iter)) { 3213 error = EINVAL; 3214 goto out; 3215 } 3216 if ((new_prot & iter->max_protection) != new_prot) { 3217 error = EACCES; 3218 goto out; 3219 } 3220 if (map == kernel_map && 3221 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 3222 panic("uvm_map_protect: kernel map W^X violation requested"); 3223 } 3224 3225 /* Fix protections. */ 3226 for (iter = first; iter != NULL && iter->start < end; 3227 iter = RBT_NEXT(uvm_map_addr, iter)) { 3228 /* Treat memory holes as free space. */ 3229 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3230 continue; 3231 3232 old_prot = iter->protection; 3233 3234 /* 3235 * Skip adapting protection iff old and new protection 3236 * are equal. 3237 */ 3238 if (set_max) { 3239 if (old_prot == (new_prot & old_prot) && 3240 iter->max_protection == new_prot) 3241 continue; 3242 } else { 3243 if (old_prot == new_prot) 3244 continue; 3245 } 3246 3247 UVM_MAP_CLIP_START(map, iter, start); 3248 UVM_MAP_CLIP_END(map, iter, end); 3249 3250 if (set_max) { 3251 iter->max_protection = new_prot; 3252 iter->protection &= new_prot; 3253 } else 3254 iter->protection = new_prot; 3255 3256 /* 3257 * update physical map if necessary. worry about copy-on-write 3258 * here -- CHECK THIS XXX 3259 */ 3260 if (iter->protection != old_prot) { 3261 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3262 ~PROT_WRITE : PROT_MASK; 3263 3264 /* update pmap */ 3265 if ((iter->protection & mask) == PROT_NONE && 3266 VM_MAPENT_ISWIRED(iter)) { 3267 /* 3268 * TODO(ariane) this is stupid. wired_count 3269 * is 0 if not wired, otherwise anything 3270 * larger than 0 (incremented once each time 3271 * wire is called). 3272 * Mostly to be able to undo the damage on 3273 * failure. Not the actually be a wired 3274 * refcounter... 3275 * Originally: iter->wired_count--; 3276 * (don't we have to unwire this in the pmap 3277 * as well?) 3278 */ 3279 iter->wired_count = 0; 3280 } 3281 pmap_protect(map->pmap, iter->start, iter->end, 3282 iter->protection & mask); 3283 } 3284 3285 /* 3286 * If the map is configured to lock any future mappings, 3287 * wire this entry now if the old protection was PROT_NONE 3288 * and the new protection is not PROT_NONE. 3289 */ 3290 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3291 VM_MAPENT_ISWIRED(iter) == 0 && 3292 old_prot == PROT_NONE && 3293 new_prot != PROT_NONE) { 3294 if (uvm_map_pageable(map, iter->start, iter->end, 3295 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3296 /* 3297 * If locking the entry fails, remember the 3298 * error if it's the first one. Note we 3299 * still continue setting the protection in 3300 * the map, but it will return the resource 3301 * storage condition regardless. 3302 * 3303 * XXX Ignore what the actual error is, 3304 * XXX just call it a resource shortage 3305 * XXX so that it doesn't get confused 3306 * XXX what uvm_map_protect() itself would 3307 * XXX normally return. 3308 */ 3309 error = ENOMEM; 3310 } 3311 } 3312 } 3313 pmap_update(map->pmap); 3314 3315 out: 3316 vm_map_unlock(map); 3317 return error; 3318 } 3319 3320 /* 3321 * uvmspace_alloc: allocate a vmspace structure. 3322 * 3323 * - structure includes vm_map and pmap 3324 * - XXX: no locking on this structure 3325 * - refcnt set to 1, rest must be init'd by caller 3326 */ 3327 struct vmspace * 3328 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3329 boolean_t remove_holes) 3330 { 3331 struct vmspace *vm; 3332 3333 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3334 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3335 return (vm); 3336 } 3337 3338 /* 3339 * uvmspace_init: initialize a vmspace structure. 3340 * 3341 * - XXX: no locking on this structure 3342 * - refcnt set to 1, rest must be init'd by caller 3343 */ 3344 void 3345 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3346 boolean_t pageable, boolean_t remove_holes) 3347 { 3348 KASSERT(pmap == NULL || pmap == pmap_kernel()); 3349 3350 if (pmap) 3351 pmap_reference(pmap); 3352 else 3353 pmap = pmap_create(); 3354 vm->vm_map.pmap = pmap; 3355 3356 uvm_map_setup(&vm->vm_map, min, max, 3357 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3358 3359 vm->vm_refcnt = 1; 3360 3361 if (remove_holes) 3362 pmap_remove_holes(vm); 3363 } 3364 3365 /* 3366 * uvmspace_share: share a vmspace between two processes 3367 * 3368 * - XXX: no locking on vmspace 3369 * - used for vfork 3370 */ 3371 3372 struct vmspace * 3373 uvmspace_share(struct process *pr) 3374 { 3375 struct vmspace *vm = pr->ps_vmspace; 3376 3377 vm->vm_refcnt++; 3378 return vm; 3379 } 3380 3381 /* 3382 * uvmspace_exec: the process wants to exec a new program 3383 * 3384 * - XXX: no locking on vmspace 3385 */ 3386 3387 void 3388 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3389 { 3390 struct process *pr = p->p_p; 3391 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3392 struct vm_map *map = &ovm->vm_map; 3393 struct uvm_map_deadq dead_entries; 3394 3395 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3396 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3397 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3398 3399 pmap_unuse_final(p); /* before stack addresses go away */ 3400 TAILQ_INIT(&dead_entries); 3401 3402 /* see if more than one process is using this vmspace... */ 3403 if (ovm->vm_refcnt == 1) { 3404 /* 3405 * If pr is the only process using its vmspace then 3406 * we can safely recycle that vmspace for the program 3407 * that is being exec'd. 3408 */ 3409 3410 #ifdef SYSVSHM 3411 /* 3412 * SYSV SHM semantics require us to kill all segments on an exec 3413 */ 3414 if (ovm->vm_shm) 3415 shmexit(ovm); 3416 #endif 3417 3418 /* 3419 * POSIX 1003.1b -- "lock future mappings" is revoked 3420 * when a process execs another program image. 3421 */ 3422 vm_map_lock(map); 3423 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3424 3425 /* 3426 * now unmap the old program 3427 * 3428 * Instead of attempting to keep the map valid, we simply 3429 * nuke all entries and ask uvm_map_setup to reinitialize 3430 * the map to the new boundaries. 3431 * 3432 * uvm_unmap_remove will actually nuke all entries for us 3433 * (as in, not replace them with free-memory entries). 3434 */ 3435 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3436 &dead_entries, TRUE, FALSE); 3437 3438 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 3439 3440 /* Nuke statistics and boundaries. */ 3441 memset(&ovm->vm_startcopy, 0, 3442 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3443 3444 3445 if (end & (vaddr_t)PAGE_MASK) { 3446 end += 1; 3447 if (end == 0) /* overflow */ 3448 end -= PAGE_SIZE; 3449 } 3450 3451 /* Setup new boundaries and populate map with entries. */ 3452 map->min_offset = start; 3453 map->max_offset = end; 3454 uvm_map_setup_entries(map); 3455 vm_map_unlock(map); 3456 3457 /* but keep MMU holes unavailable */ 3458 pmap_remove_holes(ovm); 3459 } else { 3460 /* 3461 * pr's vmspace is being shared, so we can't reuse 3462 * it for pr since it is still being used for others. 3463 * allocate a new vmspace for pr 3464 */ 3465 nvm = uvmspace_alloc(start, end, 3466 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3467 3468 /* install new vmspace and drop our ref to the old one. */ 3469 pmap_deactivate(p); 3470 p->p_vmspace = pr->ps_vmspace = nvm; 3471 pmap_activate(p); 3472 3473 uvmspace_free(ovm); 3474 } 3475 3476 /* Release dead entries */ 3477 uvm_unmap_detach(&dead_entries, 0); 3478 } 3479 3480 /* 3481 * uvmspace_free: free a vmspace data structure 3482 * 3483 * - XXX: no locking on vmspace 3484 */ 3485 void 3486 uvmspace_free(struct vmspace *vm) 3487 { 3488 if (--vm->vm_refcnt == 0) { 3489 /* 3490 * lock the map, to wait out all other references to it. delete 3491 * all of the mappings and pages they hold, then call the pmap 3492 * module to reclaim anything left. 3493 */ 3494 #ifdef SYSVSHM 3495 /* Get rid of any SYSV shared memory segments. */ 3496 if (vm->vm_shm != NULL) 3497 shmexit(vm); 3498 #endif 3499 3500 uvm_map_teardown(&vm->vm_map); 3501 pool_put(&uvm_vmspace_pool, vm); 3502 } 3503 } 3504 3505 /* 3506 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in 3507 * srcmap to the address range [dstaddr, dstaddr + sz) in 3508 * dstmap. 3509 * 3510 * The whole address range in srcmap must be backed by an object 3511 * (no holes). 3512 * 3513 * If successful, the address ranges share memory and the destination 3514 * address range uses the protection flags in prot. 3515 * 3516 * This routine assumes that sz is a multiple of PAGE_SIZE and 3517 * that dstaddr and srcaddr are page-aligned. 3518 */ 3519 int 3520 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot, 3521 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz) 3522 { 3523 int ret = 0; 3524 vaddr_t unmap_end; 3525 vaddr_t dstva; 3526 vsize_t off, len, n = sz; 3527 struct vm_map_entry *first = NULL, *last = NULL; 3528 struct vm_map_entry *src_entry, *psrc_entry = NULL; 3529 struct uvm_map_deadq dead; 3530 3531 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr) 3532 return EINVAL; 3533 3534 TAILQ_INIT(&dead); 3535 vm_map_lock(dstmap); 3536 vm_map_lock_read(srcmap); 3537 3538 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) { 3539 ret = ENOMEM; 3540 goto exit_unlock; 3541 } 3542 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) { 3543 ret = EINVAL; 3544 goto exit_unlock; 3545 } 3546 3547 unmap_end = dstaddr; 3548 for (; src_entry != NULL; 3549 psrc_entry = src_entry, 3550 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) { 3551 /* hole in address space, bail out */ 3552 if (psrc_entry != NULL && psrc_entry->end != src_entry->start) 3553 break; 3554 if (src_entry->start >= srcaddr + sz) 3555 break; 3556 3557 if (UVM_ET_ISSUBMAP(src_entry)) 3558 panic("uvm_share: encountered a submap (illegal)"); 3559 if (!UVM_ET_ISCOPYONWRITE(src_entry) && 3560 UVM_ET_ISNEEDSCOPY(src_entry)) 3561 panic("uvm_share: non-copy_on_write map entries " 3562 "marked needs_copy (illegal)"); 3563 3564 dstva = dstaddr; 3565 if (src_entry->start > srcaddr) { 3566 dstva += src_entry->start - srcaddr; 3567 off = 0; 3568 } else 3569 off = srcaddr - src_entry->start; 3570 3571 if (n < src_entry->end - src_entry->start) 3572 len = n; 3573 else 3574 len = src_entry->end - src_entry->start; 3575 n -= len; 3576 3577 if (uvm_mapent_share(dstmap, dstva, len, off, prot, prot, 3578 srcmap, src_entry, &dead) == NULL) 3579 break; 3580 3581 unmap_end = dstva + len; 3582 if (n == 0) 3583 goto exit_unlock; 3584 } 3585 3586 ret = EINVAL; 3587 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE); 3588 3589 exit_unlock: 3590 vm_map_unlock_read(srcmap); 3591 vm_map_unlock(dstmap); 3592 uvm_unmap_detach(&dead, 0); 3593 3594 return ret; 3595 } 3596 3597 /* 3598 * Clone map entry into other map. 3599 * 3600 * Mapping will be placed at dstaddr, for the same length. 3601 * Space must be available. 3602 * Reference counters are incremented. 3603 */ 3604 struct vm_map_entry * 3605 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3606 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, 3607 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3608 int mapent_flags, int amap_share_flags) 3609 { 3610 struct vm_map_entry *new_entry, *first, *last; 3611 3612 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3613 3614 /* Create new entry (linked in on creation). Fill in first, last. */ 3615 first = last = NULL; 3616 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3617 panic("uvmspace_fork: no space in map for " 3618 "entry in empty map"); 3619 } 3620 new_entry = uvm_map_mkentry(dstmap, first, last, 3621 dstaddr, dstlen, mapent_flags, dead, NULL); 3622 if (new_entry == NULL) 3623 return NULL; 3624 /* old_entry -> new_entry */ 3625 new_entry->object = old_entry->object; 3626 new_entry->offset = old_entry->offset; 3627 new_entry->aref = old_entry->aref; 3628 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3629 new_entry->protection = prot; 3630 new_entry->max_protection = maxprot; 3631 new_entry->inheritance = old_entry->inheritance; 3632 new_entry->advice = old_entry->advice; 3633 3634 /* gain reference to object backing the map (can't be a submap). */ 3635 if (new_entry->aref.ar_amap) { 3636 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3637 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3638 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3639 amap_share_flags); 3640 } 3641 3642 if (UVM_ET_ISOBJ(new_entry) && 3643 new_entry->object.uvm_obj->pgops->pgo_reference) { 3644 new_entry->offset += off; 3645 new_entry->object.uvm_obj->pgops->pgo_reference 3646 (new_entry->object.uvm_obj); 3647 } 3648 3649 return new_entry; 3650 } 3651 3652 struct vm_map_entry * 3653 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3654 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map, 3655 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3656 { 3657 /* 3658 * If old_entry refers to a copy-on-write region that has not yet been 3659 * written to (needs_copy flag is set), then we need to allocate a new 3660 * amap for old_entry. 3661 * 3662 * If we do not do this, and the process owning old_entry does a copy-on 3663 * write later, old_entry and new_entry will refer to different memory 3664 * regions, and the memory between the processes is no longer shared. 3665 * 3666 * [in other words, we need to clear needs_copy] 3667 */ 3668 3669 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3670 /* get our own amap, clears needs_copy */ 3671 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 3672 0, 0); 3673 /* XXXCDC: WAITOK??? */ 3674 } 3675 3676 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off, 3677 prot, maxprot, old_entry, dead, 0, AMAP_SHARED); 3678 } 3679 3680 /* 3681 * share the mapping: this means we want the old and 3682 * new entries to share amaps and backing objects. 3683 */ 3684 struct vm_map_entry * 3685 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3686 struct vm_map *old_map, 3687 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3688 { 3689 struct vm_map_entry *new_entry; 3690 3691 new_entry = uvm_mapent_share(new_map, old_entry->start, 3692 old_entry->end - old_entry->start, 0, old_entry->protection, 3693 old_entry->max_protection, old_map, old_entry, dead); 3694 3695 /* 3696 * pmap_copy the mappings: this routine is optional 3697 * but if it is there it will reduce the number of 3698 * page faults in the new proc. 3699 */ 3700 if (!UVM_ET_ISHOLE(new_entry)) 3701 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, 3702 (new_entry->end - new_entry->start), new_entry->start); 3703 3704 return (new_entry); 3705 } 3706 3707 /* 3708 * copy-on-write the mapping (using mmap's 3709 * MAP_PRIVATE semantics) 3710 * 3711 * allocate new_entry, adjust reference counts. 3712 * (note that new references are read-only). 3713 */ 3714 struct vm_map_entry * 3715 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3716 struct vm_map *old_map, 3717 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3718 { 3719 struct vm_map_entry *new_entry; 3720 boolean_t protect_child; 3721 3722 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3723 old_entry->end - old_entry->start, 0, old_entry->protection, 3724 old_entry->max_protection, old_entry, dead, 0, 0); 3725 3726 new_entry->etype |= 3727 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3728 3729 /* 3730 * the new entry will need an amap. it will either 3731 * need to be copied from the old entry or created 3732 * from scratch (if the old entry does not have an 3733 * amap). can we defer this process until later 3734 * (by setting "needs_copy") or do we need to copy 3735 * the amap now? 3736 * 3737 * we must copy the amap now if any of the following 3738 * conditions hold: 3739 * 1. the old entry has an amap and that amap is 3740 * being shared. this means that the old (parent) 3741 * process is sharing the amap with another 3742 * process. if we do not clear needs_copy here 3743 * we will end up in a situation where both the 3744 * parent and child process are referring to the 3745 * same amap with "needs_copy" set. if the 3746 * parent write-faults, the fault routine will 3747 * clear "needs_copy" in the parent by allocating 3748 * a new amap. this is wrong because the 3749 * parent is supposed to be sharing the old amap 3750 * and the new amap will break that. 3751 * 3752 * 2. if the old entry has an amap and a non-zero 3753 * wire count then we are going to have to call 3754 * amap_cow_now to avoid page faults in the 3755 * parent process. since amap_cow_now requires 3756 * "needs_copy" to be clear we might as well 3757 * clear it here as well. 3758 * 3759 */ 3760 if (old_entry->aref.ar_amap != NULL && 3761 ((amap_flags(old_entry->aref.ar_amap) & 3762 AMAP_SHARED) != 0 || 3763 VM_MAPENT_ISWIRED(old_entry))) { 3764 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3765 0, 0); 3766 /* XXXCDC: M_WAITOK ... ok? */ 3767 } 3768 3769 /* 3770 * if the parent's entry is wired down, then the 3771 * parent process does not want page faults on 3772 * access to that memory. this means that we 3773 * cannot do copy-on-write because we can't write 3774 * protect the old entry. in this case we 3775 * resolve all copy-on-write faults now, using 3776 * amap_cow_now. note that we have already 3777 * allocated any needed amap (above). 3778 */ 3779 if (VM_MAPENT_ISWIRED(old_entry)) { 3780 /* 3781 * resolve all copy-on-write faults now 3782 * (note that there is nothing to do if 3783 * the old mapping does not have an amap). 3784 * XXX: is it worthwhile to bother with 3785 * pmap_copy in this case? 3786 */ 3787 if (old_entry->aref.ar_amap) 3788 amap_cow_now(new_map, new_entry); 3789 } else { 3790 if (old_entry->aref.ar_amap) { 3791 /* 3792 * setup mappings to trigger copy-on-write faults 3793 * we must write-protect the parent if it has 3794 * an amap and it is not already "needs_copy"... 3795 * if it is already "needs_copy" then the parent 3796 * has already been write-protected by a previous 3797 * fork operation. 3798 * 3799 * if we do not write-protect the parent, then 3800 * we must be sure to write-protect the child 3801 * after the pmap_copy() operation. 3802 * 3803 * XXX: pmap_copy should have some way of telling 3804 * us that it didn't do anything so we can avoid 3805 * calling pmap_protect needlessly. 3806 */ 3807 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3808 if (old_entry->max_protection & PROT_WRITE) { 3809 pmap_protect(old_map->pmap, 3810 old_entry->start, 3811 old_entry->end, 3812 old_entry->protection & 3813 ~PROT_WRITE); 3814 pmap_update(old_map->pmap); 3815 } 3816 old_entry->etype |= UVM_ET_NEEDSCOPY; 3817 } 3818 3819 /* parent must now be write-protected */ 3820 protect_child = FALSE; 3821 } else { 3822 /* 3823 * we only need to protect the child if the 3824 * parent has write access. 3825 */ 3826 if (old_entry->max_protection & PROT_WRITE) 3827 protect_child = TRUE; 3828 else 3829 protect_child = FALSE; 3830 } 3831 /* 3832 * copy the mappings 3833 * XXX: need a way to tell if this does anything 3834 */ 3835 if (!UVM_ET_ISHOLE(new_entry)) 3836 pmap_copy(new_map->pmap, old_map->pmap, 3837 new_entry->start, 3838 (old_entry->end - old_entry->start), 3839 old_entry->start); 3840 3841 /* protect the child's mappings if necessary */ 3842 if (protect_child) { 3843 pmap_protect(new_map->pmap, new_entry->start, 3844 new_entry->end, 3845 new_entry->protection & 3846 ~PROT_WRITE); 3847 } 3848 } 3849 3850 return (new_entry); 3851 } 3852 3853 /* 3854 * zero the mapping: the new entry will be zero initialized 3855 */ 3856 struct vm_map_entry * 3857 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 3858 struct vm_map *old_map, 3859 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3860 { 3861 struct vm_map_entry *new_entry; 3862 3863 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3864 old_entry->end - old_entry->start, 0, old_entry->protection, 3865 old_entry->max_protection, old_entry, dead, 0, 0); 3866 3867 new_entry->etype |= 3868 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3869 3870 if (new_entry->aref.ar_amap) { 3871 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3872 atop(new_entry->end - new_entry->start), 0); 3873 new_entry->aref.ar_amap = NULL; 3874 new_entry->aref.ar_pageoff = 0; 3875 } 3876 3877 if (UVM_ET_ISOBJ(new_entry)) { 3878 if (new_entry->object.uvm_obj->pgops->pgo_detach) 3879 new_entry->object.uvm_obj->pgops->pgo_detach( 3880 new_entry->object.uvm_obj); 3881 new_entry->object.uvm_obj = NULL; 3882 new_entry->etype &= ~UVM_ET_OBJ; 3883 } 3884 3885 return (new_entry); 3886 } 3887 3888 /* 3889 * uvmspace_fork: fork a process' main map 3890 * 3891 * => create a new vmspace for child process from parent. 3892 * => parent's map must not be locked. 3893 */ 3894 struct vmspace * 3895 uvmspace_fork(struct process *pr) 3896 { 3897 struct vmspace *vm1 = pr->ps_vmspace; 3898 struct vmspace *vm2; 3899 struct vm_map *old_map = &vm1->vm_map; 3900 struct vm_map *new_map; 3901 struct vm_map_entry *old_entry, *new_entry; 3902 struct uvm_map_deadq dead; 3903 3904 vm_map_lock(old_map); 3905 3906 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3907 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3908 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3909 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3910 vm2->vm_dused = 0; /* Statistic managed by us. */ 3911 new_map = &vm2->vm_map; 3912 vm_map_lock(new_map); 3913 3914 /* go entry-by-entry */ 3915 TAILQ_INIT(&dead); 3916 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 3917 if (old_entry->start == old_entry->end) 3918 continue; 3919 3920 /* first, some sanity checks on the old entry */ 3921 if (UVM_ET_ISSUBMAP(old_entry)) { 3922 panic("fork: encountered a submap during fork " 3923 "(illegal)"); 3924 } 3925 3926 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 3927 UVM_ET_ISNEEDSCOPY(old_entry)) { 3928 panic("fork: non-copy_on_write map entry marked " 3929 "needs_copy (illegal)"); 3930 } 3931 3932 /* Apply inheritance. */ 3933 switch (old_entry->inheritance) { 3934 case MAP_INHERIT_SHARE: 3935 new_entry = uvm_mapent_forkshared(vm2, new_map, 3936 old_map, old_entry, &dead); 3937 break; 3938 case MAP_INHERIT_COPY: 3939 new_entry = uvm_mapent_forkcopy(vm2, new_map, 3940 old_map, old_entry, &dead); 3941 break; 3942 case MAP_INHERIT_ZERO: 3943 new_entry = uvm_mapent_forkzero(vm2, new_map, 3944 old_map, old_entry, &dead); 3945 break; 3946 default: 3947 continue; 3948 } 3949 3950 /* Update process statistics. */ 3951 if (!UVM_ET_ISHOLE(new_entry)) 3952 new_map->size += new_entry->end - new_entry->start; 3953 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { 3954 vm2->vm_dused += uvmspace_dused( 3955 new_map, new_entry->start, new_entry->end); 3956 } 3957 } 3958 3959 vm_map_unlock(old_map); 3960 vm_map_unlock(new_map); 3961 3962 /* 3963 * This can actually happen, if multiple entries described a 3964 * space in which an entry was inherited. 3965 */ 3966 uvm_unmap_detach(&dead, 0); 3967 3968 #ifdef SYSVSHM 3969 if (vm1->vm_shm) 3970 shmfork(vm1, vm2); 3971 #endif 3972 3973 return vm2; 3974 } 3975 3976 /* 3977 * uvm_map_hint: return the beginning of the best area suitable for 3978 * creating a new mapping with "prot" protection. 3979 */ 3980 vaddr_t 3981 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr, 3982 vaddr_t maxaddr) 3983 { 3984 vaddr_t addr; 3985 vaddr_t spacing; 3986 3987 #ifdef __i386__ 3988 /* 3989 * If executable skip first two pages, otherwise start 3990 * after data + heap region. 3991 */ 3992 if ((prot & PROT_EXEC) != 0 && 3993 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 3994 addr = (PAGE_SIZE*2) + 3995 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 3996 return (round_page(addr)); 3997 } 3998 #endif 3999 4000 #if defined (__LP64__) 4001 spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4002 #else 4003 spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1; 4004 #endif 4005 4006 /* 4007 * Start malloc/mmap after the brk. 4008 */ 4009 addr = (vaddr_t)vm->vm_daddr + BRKSIZ; 4010 addr = MAX(addr, minaddr); 4011 4012 if (addr < maxaddr) { 4013 while (spacing > maxaddr - addr) 4014 spacing >>= 1; 4015 } 4016 addr += arc4random() & spacing; 4017 return (round_page(addr)); 4018 } 4019 4020 /* 4021 * uvm_map_submap: punch down part of a map into a submap 4022 * 4023 * => only the kernel_map is allowed to be submapped 4024 * => the purpose of submapping is to break up the locking granularity 4025 * of a larger map 4026 * => the range specified must have been mapped previously with a uvm_map() 4027 * call [with uobj==NULL] to create a blank map entry in the main map. 4028 * [And it had better still be blank!] 4029 * => maps which contain submaps should never be copied or forked. 4030 * => to remove a submap, use uvm_unmap() on the main map 4031 * and then uvm_map_deallocate() the submap. 4032 * => main map must be unlocked. 4033 * => submap must have been init'd and have a zero reference count. 4034 * [need not be locked as we don't actually reference it] 4035 */ 4036 int 4037 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 4038 struct vm_map *submap) 4039 { 4040 struct vm_map_entry *entry; 4041 int result; 4042 4043 if (start > map->max_offset || end > map->max_offset || 4044 start < map->min_offset || end < map->min_offset) 4045 return EINVAL; 4046 4047 vm_map_lock(map); 4048 4049 if (uvm_map_lookup_entry(map, start, &entry)) { 4050 UVM_MAP_CLIP_START(map, entry, start); 4051 UVM_MAP_CLIP_END(map, entry, end); 4052 } else 4053 entry = NULL; 4054 4055 if (entry != NULL && 4056 entry->start == start && entry->end == end && 4057 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 4058 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 4059 entry->etype |= UVM_ET_SUBMAP; 4060 entry->object.sub_map = submap; 4061 entry->offset = 0; 4062 uvm_map_reference(submap); 4063 result = 0; 4064 } else 4065 result = EINVAL; 4066 4067 vm_map_unlock(map); 4068 return(result); 4069 } 4070 4071 /* 4072 * uvm_map_checkprot: check protection in map 4073 * 4074 * => must allow specific protection in a fully allocated region. 4075 * => map mut be read or write locked by caller. 4076 */ 4077 boolean_t 4078 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 4079 vm_prot_t protection) 4080 { 4081 struct vm_map_entry *entry; 4082 4083 if (start < map->min_offset || end > map->max_offset || start > end) 4084 return FALSE; 4085 if (start == end) 4086 return TRUE; 4087 4088 /* 4089 * Iterate entries. 4090 */ 4091 for (entry = uvm_map_entrybyaddr(&map->addr, start); 4092 entry != NULL && entry->start < end; 4093 entry = RBT_NEXT(uvm_map_addr, entry)) { 4094 /* Fail if a hole is found. */ 4095 if (UVM_ET_ISHOLE(entry) || 4096 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 4097 return FALSE; 4098 4099 /* Check protection. */ 4100 if ((entry->protection & protection) != protection) 4101 return FALSE; 4102 } 4103 return TRUE; 4104 } 4105 4106 /* 4107 * uvm_map_create: create map 4108 */ 4109 vm_map_t 4110 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 4111 { 4112 vm_map_t map; 4113 4114 map = malloc(sizeof *map, M_VMMAP, M_WAITOK); 4115 map->pmap = pmap; 4116 uvm_map_setup(map, min, max, flags); 4117 return (map); 4118 } 4119 4120 /* 4121 * uvm_map_deallocate: drop reference to a map 4122 * 4123 * => caller must not lock map 4124 * => we will zap map if ref count goes to zero 4125 */ 4126 void 4127 uvm_map_deallocate(vm_map_t map) 4128 { 4129 int c; 4130 struct uvm_map_deadq dead; 4131 4132 c = --map->ref_count; 4133 if (c > 0) { 4134 return; 4135 } 4136 4137 /* 4138 * all references gone. unmap and free. 4139 * 4140 * No lock required: we are only one to access this map. 4141 */ 4142 TAILQ_INIT(&dead); 4143 uvm_tree_sanity(map, __FILE__, __LINE__); 4144 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 4145 TRUE, FALSE); 4146 pmap_destroy(map->pmap); 4147 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 4148 free(map, M_VMMAP, sizeof *map); 4149 4150 uvm_unmap_detach(&dead, 0); 4151 } 4152 4153 /* 4154 * uvm_map_inherit: set inheritance code for range of addrs in map. 4155 * 4156 * => map must be unlocked 4157 * => note that the inherit code is used during a "fork". see fork 4158 * code for details. 4159 */ 4160 int 4161 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 4162 vm_inherit_t new_inheritance) 4163 { 4164 struct vm_map_entry *entry; 4165 4166 switch (new_inheritance) { 4167 case MAP_INHERIT_NONE: 4168 case MAP_INHERIT_COPY: 4169 case MAP_INHERIT_SHARE: 4170 case MAP_INHERIT_ZERO: 4171 break; 4172 default: 4173 return (EINVAL); 4174 } 4175 4176 if (start > end) 4177 return EINVAL; 4178 start = MAX(start, map->min_offset); 4179 end = MIN(end, map->max_offset); 4180 if (start >= end) 4181 return 0; 4182 4183 vm_map_lock(map); 4184 4185 entry = uvm_map_entrybyaddr(&map->addr, start); 4186 if (entry->end > start) 4187 UVM_MAP_CLIP_START(map, entry, start); 4188 else 4189 entry = RBT_NEXT(uvm_map_addr, entry); 4190 4191 while (entry != NULL && entry->start < end) { 4192 UVM_MAP_CLIP_END(map, entry, end); 4193 entry->inheritance = new_inheritance; 4194 entry = RBT_NEXT(uvm_map_addr, entry); 4195 } 4196 4197 vm_map_unlock(map); 4198 return (0); 4199 } 4200 4201 /* 4202 * uvm_map_advice: set advice code for range of addrs in map. 4203 * 4204 * => map must be unlocked 4205 */ 4206 int 4207 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 4208 { 4209 struct vm_map_entry *entry; 4210 4211 switch (new_advice) { 4212 case MADV_NORMAL: 4213 case MADV_RANDOM: 4214 case MADV_SEQUENTIAL: 4215 break; 4216 default: 4217 return (EINVAL); 4218 } 4219 4220 if (start > end) 4221 return EINVAL; 4222 start = MAX(start, map->min_offset); 4223 end = MIN(end, map->max_offset); 4224 if (start >= end) 4225 return 0; 4226 4227 vm_map_lock(map); 4228 4229 entry = uvm_map_entrybyaddr(&map->addr, start); 4230 if (entry != NULL && entry->end > start) 4231 UVM_MAP_CLIP_START(map, entry, start); 4232 else if (entry!= NULL) 4233 entry = RBT_NEXT(uvm_map_addr, entry); 4234 4235 /* 4236 * XXXJRT: disallow holes? 4237 */ 4238 while (entry != NULL && entry->start < end) { 4239 UVM_MAP_CLIP_END(map, entry, end); 4240 entry->advice = new_advice; 4241 entry = RBT_NEXT(uvm_map_addr, entry); 4242 } 4243 4244 vm_map_unlock(map); 4245 return (0); 4246 } 4247 4248 /* 4249 * uvm_map_extract: extract a mapping from a map and put it somewhere 4250 * in the kernel_map, setting protection to max_prot. 4251 * 4252 * => map should be unlocked (we will write lock it and kernel_map) 4253 * => returns 0 on success, error code otherwise 4254 * => start must be page aligned 4255 * => len must be page sized 4256 * => flags: 4257 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 4258 * Mappings are QREF's. 4259 */ 4260 int 4261 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 4262 vaddr_t *dstaddrp, int flags) 4263 { 4264 struct uvm_map_deadq dead; 4265 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 4266 vaddr_t dstaddr; 4267 vaddr_t end; 4268 vaddr_t cp_start; 4269 vsize_t cp_len, cp_off; 4270 int error; 4271 4272 TAILQ_INIT(&dead); 4273 end = start + len; 4274 4275 /* 4276 * Sanity check on the parameters. 4277 * Also, since the mapping may not contain gaps, error out if the 4278 * mapped area is not in source map. 4279 */ 4280 if ((start & (vaddr_t)PAGE_MASK) != 0 || 4281 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 4282 return EINVAL; 4283 if (start < srcmap->min_offset || end > srcmap->max_offset) 4284 return EINVAL; 4285 4286 /* Initialize dead entries. Handle len == 0 case. */ 4287 if (len == 0) 4288 return 0; 4289 4290 /* Acquire lock on srcmap. */ 4291 vm_map_lock(srcmap); 4292 4293 /* Lock srcmap, lookup first and last entry in <start,len>. */ 4294 first = uvm_map_entrybyaddr(&srcmap->addr, start); 4295 4296 /* Check that the range is contiguous. */ 4297 for (entry = first; entry != NULL && entry->end < end; 4298 entry = RBT_NEXT(uvm_map_addr, entry)) { 4299 if (VMMAP_FREE_END(entry) != entry->end || 4300 UVM_ET_ISHOLE(entry)) { 4301 error = EINVAL; 4302 goto fail; 4303 } 4304 } 4305 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 4306 error = EINVAL; 4307 goto fail; 4308 } 4309 4310 /* 4311 * Handle need-copy flag. 4312 */ 4313 for (entry = first; entry != NULL && entry->start < end; 4314 entry = RBT_NEXT(uvm_map_addr, entry)) { 4315 if (UVM_ET_ISNEEDSCOPY(entry)) 4316 amap_copy(srcmap, entry, M_NOWAIT, 4317 UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end); 4318 if (UVM_ET_ISNEEDSCOPY(entry)) { 4319 /* 4320 * amap_copy failure 4321 */ 4322 error = ENOMEM; 4323 goto fail; 4324 } 4325 } 4326 4327 /* Lock destination map (kernel_map). */ 4328 vm_map_lock(kernel_map); 4329 4330 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 4331 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 4332 PROT_NONE, 0) != 0) { 4333 error = ENOMEM; 4334 goto fail2; 4335 } 4336 *dstaddrp = dstaddr; 4337 4338 /* 4339 * We now have srcmap and kernel_map locked. 4340 * dstaddr contains the destination offset in dstmap. 4341 */ 4342 /* step 1: start looping through map entries, performing extraction. */ 4343 for (entry = first; entry != NULL && entry->start < end; 4344 entry = RBT_NEXT(uvm_map_addr, entry)) { 4345 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 4346 if (UVM_ET_ISHOLE(entry)) 4347 continue; 4348 4349 /* Calculate uvm_mapent_clone parameters. */ 4350 cp_start = entry->start; 4351 if (cp_start < start) { 4352 cp_off = start - cp_start; 4353 cp_start = start; 4354 } else 4355 cp_off = 0; 4356 cp_len = MIN(entry->end, end) - cp_start; 4357 4358 newentry = uvm_mapent_clone(kernel_map, 4359 cp_start - start + dstaddr, cp_len, cp_off, 4360 entry->protection, entry->max_protection, 4361 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4362 if (newentry == NULL) { 4363 error = ENOMEM; 4364 goto fail2_unmap; 4365 } 4366 kernel_map->size += cp_len; 4367 if (flags & UVM_EXTRACT_FIXPROT) 4368 newentry->protection = newentry->max_protection; 4369 4370 /* 4371 * Step 2: perform pmap copy. 4372 * (Doing this in the loop saves one RB traversal.) 4373 */ 4374 pmap_copy(kernel_map->pmap, srcmap->pmap, 4375 cp_start - start + dstaddr, cp_len, cp_start); 4376 } 4377 pmap_update(kernel_map->pmap); 4378 4379 error = 0; 4380 4381 /* Unmap copied entries on failure. */ 4382 fail2_unmap: 4383 if (error) { 4384 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4385 FALSE, TRUE); 4386 } 4387 4388 /* Release maps, release dead entries. */ 4389 fail2: 4390 vm_map_unlock(kernel_map); 4391 4392 fail: 4393 vm_map_unlock(srcmap); 4394 4395 uvm_unmap_detach(&dead, 0); 4396 4397 return error; 4398 } 4399 4400 /* 4401 * uvm_map_clean: clean out a map range 4402 * 4403 * => valid flags: 4404 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4405 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4406 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4407 * if (flags & PGO_FREE): any cached pages are freed after clean 4408 * => returns an error if any part of the specified range isn't mapped 4409 * => never a need to flush amap layer since the anonymous memory has 4410 * no permanent home, but may deactivate pages there 4411 * => called from sys_msync() and sys_madvise() 4412 * => caller must not write-lock map (read OK). 4413 * => we may sleep while cleaning if SYNCIO [with map read-locked] 4414 */ 4415 4416 int 4417 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4418 { 4419 struct vm_map_entry *first, *entry; 4420 struct vm_amap *amap; 4421 struct vm_anon *anon; 4422 struct vm_page *pg; 4423 struct uvm_object *uobj; 4424 vaddr_t cp_start, cp_end; 4425 int refs; 4426 int error; 4427 boolean_t rv; 4428 4429 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4430 (PGO_FREE|PGO_DEACTIVATE)); 4431 4432 if (start > end || start < map->min_offset || end > map->max_offset) 4433 return EINVAL; 4434 4435 vm_map_lock_read(map); 4436 first = uvm_map_entrybyaddr(&map->addr, start); 4437 4438 /* Make a first pass to check for holes. */ 4439 for (entry = first; entry != NULL && entry->start < end; 4440 entry = RBT_NEXT(uvm_map_addr, entry)) { 4441 if (UVM_ET_ISSUBMAP(entry)) { 4442 vm_map_unlock_read(map); 4443 return EINVAL; 4444 } 4445 if (UVM_ET_ISSUBMAP(entry) || 4446 UVM_ET_ISHOLE(entry) || 4447 (entry->end < end && 4448 VMMAP_FREE_END(entry) != entry->end)) { 4449 vm_map_unlock_read(map); 4450 return EFAULT; 4451 } 4452 } 4453 4454 error = 0; 4455 for (entry = first; entry != NULL && entry->start < end; 4456 entry = RBT_NEXT(uvm_map_addr, entry)) { 4457 amap = entry->aref.ar_amap; /* top layer */ 4458 if (UVM_ET_ISOBJ(entry)) 4459 uobj = entry->object.uvm_obj; 4460 else 4461 uobj = NULL; 4462 4463 /* 4464 * No amap cleaning necessary if: 4465 * - there's no amap 4466 * - we're not deactivating or freeing pages. 4467 */ 4468 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4469 goto flush_object; 4470 4471 cp_start = MAX(entry->start, start); 4472 cp_end = MIN(entry->end, end); 4473 4474 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4475 anon = amap_lookup(&entry->aref, 4476 cp_start - entry->start); 4477 if (anon == NULL) 4478 continue; 4479 4480 pg = anon->an_page; 4481 if (pg == NULL) { 4482 continue; 4483 } 4484 KASSERT(pg->pg_flags & PQ_ANON); 4485 4486 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4487 /* 4488 * XXX In these first 3 cases, we always just 4489 * XXX deactivate the page. We may want to 4490 * XXX handle the different cases more 4491 * XXX specifically, in the future. 4492 */ 4493 case PGO_CLEANIT|PGO_FREE: 4494 case PGO_CLEANIT|PGO_DEACTIVATE: 4495 case PGO_DEACTIVATE: 4496 deactivate_it: 4497 /* skip the page if it's wired */ 4498 if (pg->wire_count != 0) 4499 break; 4500 4501 uvm_lock_pageq(); 4502 4503 KASSERT(pg->uanon == anon); 4504 4505 /* zap all mappings for the page. */ 4506 pmap_page_protect(pg, PROT_NONE); 4507 4508 /* ...and deactivate the page. */ 4509 uvm_pagedeactivate(pg); 4510 4511 uvm_unlock_pageq(); 4512 break; 4513 case PGO_FREE: 4514 /* 4515 * If there are multiple references to 4516 * the amap, just deactivate the page. 4517 */ 4518 if (amap_refs(amap) > 1) 4519 goto deactivate_it; 4520 4521 /* XXX skip the page if it's wired */ 4522 if (pg->wire_count != 0) { 4523 break; 4524 } 4525 amap_unadd(&entry->aref, 4526 cp_start - entry->start); 4527 refs = --anon->an_ref; 4528 if (refs == 0) 4529 uvm_anfree(anon); 4530 break; 4531 default: 4532 panic("uvm_map_clean: weird flags"); 4533 } 4534 } 4535 4536 flush_object: 4537 cp_start = MAX(entry->start, start); 4538 cp_end = MIN(entry->end, end); 4539 4540 /* 4541 * flush pages if we've got a valid backing object. 4542 * 4543 * Don't PGO_FREE if we don't have write permission 4544 * and don't flush if this is a copy-on-write object 4545 * since we can't know our permissions on it. 4546 */ 4547 if (uobj != NULL && 4548 ((flags & PGO_FREE) == 0 || 4549 ((entry->max_protection & PROT_WRITE) != 0 && 4550 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4551 rv = uobj->pgops->pgo_flush(uobj, 4552 cp_start - entry->start + entry->offset, 4553 cp_end - entry->start + entry->offset, flags); 4554 4555 if (rv == FALSE) 4556 error = EFAULT; 4557 } 4558 } 4559 4560 vm_map_unlock_read(map); 4561 return error; 4562 } 4563 4564 /* 4565 * UVM_MAP_CLIP_END implementation 4566 */ 4567 void 4568 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4569 { 4570 struct vm_map_entry *tmp; 4571 4572 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4573 tmp = uvm_mapent_alloc(map, 0); 4574 4575 /* Invoke splitentry. */ 4576 uvm_map_splitentry(map, entry, tmp, addr); 4577 } 4578 4579 /* 4580 * UVM_MAP_CLIP_START implementation 4581 * 4582 * Clippers are required to not change the pointers to the entry they are 4583 * clipping on. 4584 * Since uvm_map_splitentry turns the original entry into the lowest 4585 * entry (address wise) we do a swap between the new entry and the original 4586 * entry, prior to calling uvm_map_splitentry. 4587 */ 4588 void 4589 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4590 { 4591 struct vm_map_entry *tmp; 4592 struct uvm_addr_state *free; 4593 4594 /* Unlink original. */ 4595 free = uvm_map_uaddr_e(map, entry); 4596 uvm_mapent_free_remove(map, free, entry); 4597 uvm_mapent_addr_remove(map, entry); 4598 4599 /* Copy entry. */ 4600 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4601 tmp = uvm_mapent_alloc(map, 0); 4602 uvm_mapent_copy(entry, tmp); 4603 4604 /* Put new entry in place of original entry. */ 4605 uvm_mapent_addr_insert(map, tmp); 4606 uvm_mapent_free_insert(map, free, tmp); 4607 4608 /* Invoke splitentry. */ 4609 uvm_map_splitentry(map, tmp, entry, addr); 4610 } 4611 4612 /* 4613 * Boundary fixer. 4614 */ 4615 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4616 static __inline vaddr_t 4617 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4618 { 4619 return (min < bound && max > bound) ? bound : max; 4620 } 4621 4622 /* 4623 * Choose free list based on address at start of free space. 4624 * 4625 * The uvm_addr_state returned contains addr and is the first of: 4626 * - uaddr_exe 4627 * - uaddr_brk_stack 4628 * - uaddr_any 4629 */ 4630 struct uvm_addr_state* 4631 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4632 { 4633 struct uvm_addr_state *uaddr; 4634 int i; 4635 4636 /* Special case the first page, to prevent mmap from returning 0. */ 4637 if (addr < VMMAP_MIN_ADDR) 4638 return NULL; 4639 4640 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4641 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4642 if (addr >= uvm_maxkaddr) 4643 return NULL; 4644 } 4645 4646 /* Is the address inside the exe-only map? */ 4647 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4648 addr < map->uaddr_exe->uaddr_maxaddr) 4649 return map->uaddr_exe; 4650 4651 /* Check if the space falls inside brk/stack area. */ 4652 if ((addr >= map->b_start && addr < map->b_end) || 4653 (addr >= map->s_start && addr < map->s_end)) { 4654 if (map->uaddr_brk_stack != NULL && 4655 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4656 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4657 return map->uaddr_brk_stack; 4658 } else 4659 return NULL; 4660 } 4661 4662 /* 4663 * Check the other selectors. 4664 * 4665 * These selectors are only marked as the owner, if they have insert 4666 * functions. 4667 */ 4668 for (i = 0; i < nitems(map->uaddr_any); i++) { 4669 uaddr = map->uaddr_any[i]; 4670 if (uaddr == NULL) 4671 continue; 4672 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4673 continue; 4674 4675 if (addr >= uaddr->uaddr_minaddr && 4676 addr < uaddr->uaddr_maxaddr) 4677 return uaddr; 4678 } 4679 4680 return NULL; 4681 } 4682 4683 /* 4684 * Choose free list based on address at start of free space. 4685 * 4686 * The uvm_addr_state returned contains addr and is the first of: 4687 * - uaddr_exe 4688 * - uaddr_brk_stack 4689 * - uaddr_any 4690 */ 4691 struct uvm_addr_state* 4692 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4693 { 4694 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4695 } 4696 4697 /* 4698 * Returns the first free-memory boundary that is crossed by [min-max]. 4699 */ 4700 vsize_t 4701 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4702 { 4703 struct uvm_addr_state *uaddr; 4704 int i; 4705 4706 /* Never return first page. */ 4707 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4708 4709 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4710 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4711 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4712 4713 /* Check for exe-only boundaries. */ 4714 if (map->uaddr_exe != NULL) { 4715 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4716 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4717 } 4718 4719 /* Check for exe-only boundaries. */ 4720 if (map->uaddr_brk_stack != NULL) { 4721 max = uvm_map_boundfix(min, max, 4722 map->uaddr_brk_stack->uaddr_minaddr); 4723 max = uvm_map_boundfix(min, max, 4724 map->uaddr_brk_stack->uaddr_maxaddr); 4725 } 4726 4727 /* Check other boundaries. */ 4728 for (i = 0; i < nitems(map->uaddr_any); i++) { 4729 uaddr = map->uaddr_any[i]; 4730 if (uaddr != NULL) { 4731 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4732 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4733 } 4734 } 4735 4736 /* Boundaries at stack and brk() area. */ 4737 max = uvm_map_boundfix(min, max, map->s_start); 4738 max = uvm_map_boundfix(min, max, map->s_end); 4739 max = uvm_map_boundfix(min, max, map->b_start); 4740 max = uvm_map_boundfix(min, max, map->b_end); 4741 4742 return max; 4743 } 4744 4745 /* 4746 * Update map allocation start and end addresses from proc vmspace. 4747 */ 4748 void 4749 uvm_map_vmspace_update(struct vm_map *map, 4750 struct uvm_map_deadq *dead, int flags) 4751 { 4752 struct vmspace *vm; 4753 vaddr_t b_start, b_end, s_start, s_end; 4754 4755 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4756 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4757 4758 /* 4759 * Derive actual allocation boundaries from vmspace. 4760 */ 4761 vm = (struct vmspace *)map; 4762 b_start = (vaddr_t)vm->vm_daddr; 4763 b_end = b_start + BRKSIZ; 4764 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4765 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4766 #ifdef DIAGNOSTIC 4767 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4768 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4769 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4770 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4771 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4772 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4773 vm, b_start, b_end, s_start, s_end); 4774 } 4775 #endif 4776 4777 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4778 map->s_start == s_start && map->s_end == s_end)) 4779 return; 4780 4781 uvm_map_freelist_update(map, dead, b_start, b_end, 4782 s_start, s_end, flags); 4783 } 4784 4785 /* 4786 * Grow kernel memory. 4787 * 4788 * This function is only called for kernel maps when an allocation fails. 4789 * 4790 * If the map has a gap that is large enough to accommodate alloc_sz, this 4791 * function will make sure map->free will include it. 4792 */ 4793 void 4794 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 4795 vsize_t alloc_sz, int flags) 4796 { 4797 vsize_t sz; 4798 vaddr_t end; 4799 struct vm_map_entry *entry; 4800 4801 /* Kernel memory only. */ 4802 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 4803 /* Destroy free list. */ 4804 uvm_map_freelist_update_clear(map, dead); 4805 4806 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 4807 if (map->flags & VM_MAP_GUARDPAGES) 4808 alloc_sz += PAGE_SIZE; 4809 4810 /* 4811 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 4812 * 4813 * Don't handle the case where the multiplication overflows: 4814 * if that happens, the allocation is probably too big anyway. 4815 */ 4816 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 4817 4818 /* 4819 * Walk forward until a gap large enough for alloc_sz shows up. 4820 * 4821 * We assume the kernel map has no boundaries. 4822 * uvm_maxkaddr may be zero. 4823 */ 4824 end = MAX(uvm_maxkaddr, map->min_offset); 4825 entry = uvm_map_entrybyaddr(&map->addr, end); 4826 while (entry && entry->fspace < alloc_sz) 4827 entry = RBT_NEXT(uvm_map_addr, entry); 4828 if (entry) { 4829 end = MAX(VMMAP_FREE_START(entry), end); 4830 end += MIN(sz, map->max_offset - end); 4831 } else 4832 end = map->max_offset; 4833 4834 /* Reserve pmap entries. */ 4835 #ifdef PMAP_GROWKERNEL 4836 uvm_maxkaddr = pmap_growkernel(end); 4837 #else 4838 uvm_maxkaddr = MAX(uvm_maxkaddr, end); 4839 #endif 4840 4841 /* Rebuild free list. */ 4842 uvm_map_freelist_update_refill(map, flags); 4843 } 4844 4845 /* 4846 * Freelist update subfunction: unlink all entries from freelists. 4847 */ 4848 void 4849 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 4850 { 4851 struct uvm_addr_state *free; 4852 struct vm_map_entry *entry, *prev, *next; 4853 4854 prev = NULL; 4855 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL; 4856 entry = next) { 4857 next = RBT_NEXT(uvm_map_addr, entry); 4858 4859 free = uvm_map_uaddr_e(map, entry); 4860 uvm_mapent_free_remove(map, free, entry); 4861 4862 if (prev != NULL && entry->start == entry->end) { 4863 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 4864 uvm_mapent_addr_remove(map, entry); 4865 DEAD_ENTRY_PUSH(dead, entry); 4866 } else 4867 prev = entry; 4868 } 4869 } 4870 4871 /* 4872 * Freelist update subfunction: refill the freelists with entries. 4873 */ 4874 void 4875 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 4876 { 4877 struct vm_map_entry *entry; 4878 vaddr_t min, max; 4879 4880 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 4881 min = VMMAP_FREE_START(entry); 4882 max = VMMAP_FREE_END(entry); 4883 entry->fspace = 0; 4884 4885 entry = uvm_map_fix_space(map, entry, min, max, flags); 4886 } 4887 4888 uvm_tree_sanity(map, __FILE__, __LINE__); 4889 } 4890 4891 /* 4892 * Change {a,b}_{start,end} allocation ranges and associated free lists. 4893 */ 4894 void 4895 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 4896 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 4897 { 4898 KDASSERT(b_end >= b_start && s_end >= s_start); 4899 4900 /* Clear all free lists. */ 4901 uvm_map_freelist_update_clear(map, dead); 4902 4903 /* Apply new bounds. */ 4904 map->b_start = b_start; 4905 map->b_end = b_end; 4906 map->s_start = s_start; 4907 map->s_end = s_end; 4908 4909 /* Refill free lists. */ 4910 uvm_map_freelist_update_refill(map, flags); 4911 } 4912 4913 /* 4914 * Assign a uvm_addr_state to the specified pointer in vm_map. 4915 * 4916 * May sleep. 4917 */ 4918 void 4919 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 4920 struct uvm_addr_state *newval) 4921 { 4922 struct uvm_map_deadq dead; 4923 4924 /* Pointer which must be in this map. */ 4925 KASSERT(which != NULL); 4926 KASSERT((void*)map <= (void*)(which) && 4927 (void*)(which) < (void*)(map + 1)); 4928 4929 vm_map_lock(map); 4930 TAILQ_INIT(&dead); 4931 uvm_map_freelist_update_clear(map, &dead); 4932 4933 uvm_addr_destroy(*which); 4934 *which = newval; 4935 4936 uvm_map_freelist_update_refill(map, 0); 4937 vm_map_unlock(map); 4938 uvm_unmap_detach(&dead, 0); 4939 } 4940 4941 /* 4942 * Correct space insert. 4943 * 4944 * Entry must not be on any freelist. 4945 */ 4946 struct vm_map_entry* 4947 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 4948 vaddr_t min, vaddr_t max, int flags) 4949 { 4950 struct uvm_addr_state *free, *entfree; 4951 vaddr_t lmax; 4952 4953 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 4954 KDASSERT(min <= max); 4955 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 4956 min == map->min_offset); 4957 4958 /* 4959 * During the function, entfree will always point at the uaddr state 4960 * for entry. 4961 */ 4962 entfree = (entry == NULL ? NULL : 4963 uvm_map_uaddr_e(map, entry)); 4964 4965 while (min != max) { 4966 /* Claim guard page for entry. */ 4967 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 4968 VMMAP_FREE_END(entry) == entry->end && 4969 entry->start != entry->end) { 4970 if (max - min == 2 * PAGE_SIZE) { 4971 /* 4972 * If the free-space gap is exactly 2 pages, 4973 * we make the guard 2 pages instead of 1. 4974 * Because in a guarded map, an area needs 4975 * at least 2 pages to allocate from: 4976 * one page for the allocation and one for 4977 * the guard. 4978 */ 4979 entry->guard = 2 * PAGE_SIZE; 4980 min = max; 4981 } else { 4982 entry->guard = PAGE_SIZE; 4983 min += PAGE_SIZE; 4984 } 4985 continue; 4986 } 4987 4988 /* 4989 * Handle the case where entry has a 2-page guard, but the 4990 * space after entry is freed. 4991 */ 4992 if (entry != NULL && entry->fspace == 0 && 4993 entry->guard > PAGE_SIZE) { 4994 entry->guard = PAGE_SIZE; 4995 min = VMMAP_FREE_START(entry); 4996 } 4997 4998 lmax = uvm_map_boundary(map, min, max); 4999 free = uvm_map_uaddr(map, min); 5000 5001 /* 5002 * Entries are merged if they point at the same uvm_free(). 5003 * Exception to that rule: if min == uvm_maxkaddr, a new 5004 * entry is started regardless (otherwise the allocators 5005 * will get confused). 5006 */ 5007 if (entry != NULL && free == entfree && 5008 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 5009 min == uvm_maxkaddr)) { 5010 KDASSERT(VMMAP_FREE_END(entry) == min); 5011 entry->fspace += lmax - min; 5012 } else { 5013 /* 5014 * Commit entry to free list: it'll not be added to 5015 * anymore. 5016 * We'll start a new entry and add to that entry 5017 * instead. 5018 */ 5019 if (entry != NULL) 5020 uvm_mapent_free_insert(map, entfree, entry); 5021 5022 /* New entry for new uaddr. */ 5023 entry = uvm_mapent_alloc(map, flags); 5024 KDASSERT(entry != NULL); 5025 entry->end = entry->start = min; 5026 entry->guard = 0; 5027 entry->fspace = lmax - min; 5028 entry->object.uvm_obj = NULL; 5029 entry->offset = 0; 5030 entry->etype = 0; 5031 entry->protection = entry->max_protection = 0; 5032 entry->inheritance = 0; 5033 entry->wired_count = 0; 5034 entry->advice = 0; 5035 entry->aref.ar_pageoff = 0; 5036 entry->aref.ar_amap = NULL; 5037 uvm_mapent_addr_insert(map, entry); 5038 5039 entfree = free; 5040 } 5041 5042 min = lmax; 5043 } 5044 /* Finally put entry on the uaddr state. */ 5045 if (entry != NULL) 5046 uvm_mapent_free_insert(map, entfree, entry); 5047 5048 return entry; 5049 } 5050 5051 /* 5052 * MQuery style of allocation. 5053 * 5054 * This allocator searches forward until sufficient space is found to map 5055 * the given size. 5056 * 5057 * XXX: factor in offset (via pmap_prefer) and protection? 5058 */ 5059 int 5060 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 5061 int flags) 5062 { 5063 struct vm_map_entry *entry, *last; 5064 vaddr_t addr; 5065 vaddr_t tmp, pmap_align, pmap_offset; 5066 int error; 5067 5068 addr = *addr_p; 5069 vm_map_lock_read(map); 5070 5071 /* Configure pmap prefer. */ 5072 if (offset != UVM_UNKNOWN_OFFSET) { 5073 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 5074 pmap_offset = PMAP_PREFER_OFFSET(offset); 5075 } else { 5076 pmap_align = PAGE_SIZE; 5077 pmap_offset = 0; 5078 } 5079 5080 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 5081 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 5082 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5083 if (tmp < addr) 5084 tmp += pmap_align; 5085 addr = tmp; 5086 } 5087 5088 /* First, check if the requested range is fully available. */ 5089 entry = uvm_map_entrybyaddr(&map->addr, addr); 5090 last = NULL; 5091 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5092 error = 0; 5093 goto out; 5094 } 5095 if (flags & UVM_FLAG_FIXED) { 5096 error = EINVAL; 5097 goto out; 5098 } 5099 5100 error = ENOMEM; /* Default error from here. */ 5101 5102 /* 5103 * At this point, the memory at <addr, sz> is not available. 5104 * The reasons are: 5105 * [1] it's outside the map, 5106 * [2] it starts in used memory (and therefore needs to move 5107 * toward the first free page in entry), 5108 * [3] it starts in free memory but bumps into used memory. 5109 * 5110 * Note that for case [2], the forward moving is handled by the 5111 * for loop below. 5112 */ 5113 if (entry == NULL) { 5114 /* [1] Outside the map. */ 5115 if (addr >= map->max_offset) 5116 goto out; 5117 else 5118 entry = RBT_MIN(uvm_map_addr, &map->addr); 5119 } else if (VMMAP_FREE_START(entry) <= addr) { 5120 /* [3] Bumped into used memory. */ 5121 entry = RBT_NEXT(uvm_map_addr, entry); 5122 } 5123 5124 /* Test if the next entry is sufficient for the allocation. */ 5125 for (; entry != NULL; 5126 entry = RBT_NEXT(uvm_map_addr, entry)) { 5127 if (entry->fspace == 0) 5128 continue; 5129 addr = VMMAP_FREE_START(entry); 5130 5131 restart: /* Restart address checks on address change. */ 5132 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5133 if (tmp < addr) 5134 tmp += pmap_align; 5135 addr = tmp; 5136 if (addr >= VMMAP_FREE_END(entry)) 5137 continue; 5138 5139 /* Skip brk() allocation addresses. */ 5140 if (addr + sz > map->b_start && addr < map->b_end) { 5141 if (VMMAP_FREE_END(entry) > map->b_end) { 5142 addr = map->b_end; 5143 goto restart; 5144 } else 5145 continue; 5146 } 5147 /* Skip stack allocation addresses. */ 5148 if (addr + sz > map->s_start && addr < map->s_end) { 5149 if (VMMAP_FREE_END(entry) > map->s_end) { 5150 addr = map->s_end; 5151 goto restart; 5152 } else 5153 continue; 5154 } 5155 5156 last = NULL; 5157 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5158 error = 0; 5159 goto out; 5160 } 5161 } 5162 5163 out: 5164 vm_map_unlock_read(map); 5165 if (error == 0) 5166 *addr_p = addr; 5167 return error; 5168 } 5169 5170 /* 5171 * Determine allocation bias. 5172 * 5173 * Returns 1 if we should bias to high addresses, -1 for a bias towards low 5174 * addresses, or 0 for no bias. 5175 * The bias mechanism is intended to avoid clashing with brk() and stack 5176 * areas. 5177 */ 5178 int 5179 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry) 5180 { 5181 vaddr_t start, end; 5182 5183 start = VMMAP_FREE_START(entry); 5184 end = VMMAP_FREE_END(entry); 5185 5186 /* Stay at the top of brk() area. */ 5187 if (end >= map->b_start && start < map->b_end) 5188 return 1; 5189 /* Stay at the far end of the stack area. */ 5190 if (end >= map->s_start && start < map->s_end) { 5191 #ifdef MACHINE_STACK_GROWS_UP 5192 return 1; 5193 #else 5194 return -1; 5195 #endif 5196 } 5197 5198 /* No bias, this area is meant for us. */ 5199 return 0; 5200 } 5201 5202 5203 boolean_t 5204 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 5205 { 5206 boolean_t rv; 5207 5208 if (map->flags & VM_MAP_INTRSAFE) { 5209 rv = _mtx_enter_try(&map->mtx LOCK_FL_ARGS); 5210 } else { 5211 mtx_enter(&map->flags_lock); 5212 if (map->flags & VM_MAP_BUSY) { 5213 mtx_leave(&map->flags_lock); 5214 return (FALSE); 5215 } 5216 mtx_leave(&map->flags_lock); 5217 rv = (_rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP LOCK_FL_ARGS) 5218 == 0); 5219 /* check if the lock is busy and back out if we won the race */ 5220 if (rv) { 5221 mtx_enter(&map->flags_lock); 5222 if (map->flags & VM_MAP_BUSY) { 5223 _rw_exit(&map->lock LOCK_FL_ARGS); 5224 rv = FALSE; 5225 } 5226 mtx_leave(&map->flags_lock); 5227 } 5228 } 5229 5230 if (rv) { 5231 map->timestamp++; 5232 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5233 uvm_tree_sanity(map, file, line); 5234 uvm_tree_size_chk(map, file, line); 5235 } 5236 5237 return (rv); 5238 } 5239 5240 void 5241 vm_map_lock_ln(struct vm_map *map, char *file, int line) 5242 { 5243 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5244 do { 5245 mtx_enter(&map->flags_lock); 5246 tryagain: 5247 while (map->flags & VM_MAP_BUSY) { 5248 map->flags |= VM_MAP_WANTLOCK; 5249 msleep(&map->flags, &map->flags_lock, 5250 PVM, vmmapbsy, 0); 5251 } 5252 mtx_leave(&map->flags_lock); 5253 } while (_rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL 5254 LOCK_FL_ARGS) != 0); 5255 /* check if the lock is busy and back out if we won the race */ 5256 mtx_enter(&map->flags_lock); 5257 if (map->flags & VM_MAP_BUSY) { 5258 _rw_exit(&map->lock LOCK_FL_ARGS); 5259 goto tryagain; 5260 } 5261 mtx_leave(&map->flags_lock); 5262 } else { 5263 _mtx_enter(&map->mtx LOCK_FL_ARGS); 5264 } 5265 5266 map->timestamp++; 5267 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5268 uvm_tree_sanity(map, file, line); 5269 uvm_tree_size_chk(map, file, line); 5270 } 5271 5272 void 5273 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 5274 { 5275 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5276 _rw_enter_read(&map->lock LOCK_FL_ARGS); 5277 else 5278 _mtx_enter(&map->mtx LOCK_FL_ARGS); 5279 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5280 uvm_tree_sanity(map, file, line); 5281 uvm_tree_size_chk(map, file, line); 5282 } 5283 5284 void 5285 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 5286 { 5287 uvm_tree_sanity(map, file, line); 5288 uvm_tree_size_chk(map, file, line); 5289 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5290 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5291 _rw_exit(&map->lock LOCK_FL_ARGS); 5292 else 5293 _mtx_leave(&map->mtx LOCK_FL_ARGS); 5294 } 5295 5296 void 5297 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 5298 { 5299 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5300 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5301 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5302 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5303 _rw_exit_read(&map->lock LOCK_FL_ARGS); 5304 else 5305 _mtx_leave(&map->mtx LOCK_FL_ARGS); 5306 } 5307 5308 void 5309 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 5310 { 5311 uvm_tree_sanity(map, file, line); 5312 uvm_tree_size_chk(map, file, line); 5313 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5314 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5315 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5316 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5317 _rw_enter(&map->lock, RW_DOWNGRADE LOCK_FL_ARGS); 5318 } 5319 5320 void 5321 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 5322 { 5323 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5324 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5325 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5326 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5327 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5328 _rw_exit_read(&map->lock LOCK_FL_ARGS); 5329 _rw_enter_write(&map->lock LOCK_FL_ARGS); 5330 } 5331 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5332 uvm_tree_sanity(map, file, line); 5333 } 5334 5335 void 5336 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5337 { 5338 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5339 mtx_enter(&map->flags_lock); 5340 map->flags |= VM_MAP_BUSY; 5341 mtx_leave(&map->flags_lock); 5342 } 5343 5344 void 5345 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5346 { 5347 int oflags; 5348 5349 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5350 mtx_enter(&map->flags_lock); 5351 oflags = map->flags; 5352 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5353 mtx_leave(&map->flags_lock); 5354 if (oflags & VM_MAP_WANTLOCK) 5355 wakeup(&map->flags); 5356 } 5357 5358 #ifndef SMALL_KERNEL 5359 int 5360 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, 5361 size_t *lenp) 5362 { 5363 struct vm_map_entry *entry; 5364 vaddr_t start; 5365 int cnt, maxcnt, error = 0; 5366 5367 KASSERT(*lenp > 0); 5368 KASSERT((*lenp % sizeof(*kve)) == 0); 5369 cnt = 0; 5370 maxcnt = *lenp / sizeof(*kve); 5371 KASSERT(maxcnt > 0); 5372 5373 /* 5374 * Return only entries whose address is above the given base 5375 * address. This allows userland to iterate without knowing the 5376 * number of entries beforehand. 5377 */ 5378 start = (vaddr_t)kve[0].kve_start; 5379 5380 vm_map_lock(map); 5381 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5382 if (cnt == maxcnt) { 5383 error = ENOMEM; 5384 break; 5385 } 5386 if (start != 0 && entry->start < start) 5387 continue; 5388 kve->kve_start = entry->start; 5389 kve->kve_end = entry->end; 5390 kve->kve_guard = entry->guard; 5391 kve->kve_fspace = entry->fspace; 5392 kve->kve_fspace_augment = entry->fspace_augment; 5393 kve->kve_offset = entry->offset; 5394 kve->kve_wired_count = entry->wired_count; 5395 kve->kve_etype = entry->etype; 5396 kve->kve_protection = entry->protection; 5397 kve->kve_max_protection = entry->max_protection; 5398 kve->kve_advice = entry->advice; 5399 kve->kve_inheritance = entry->inheritance; 5400 kve->kve_flags = entry->flags; 5401 kve++; 5402 cnt++; 5403 } 5404 vm_map_unlock(map); 5405 5406 KASSERT(cnt <= maxcnt); 5407 5408 *lenp = sizeof(*kve) * cnt; 5409 return error; 5410 } 5411 #endif 5412 5413 5414 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5415 uvm_mapentry_addrcmp, uvm_map_addr_augment); 5416 5417 5418 /* 5419 * MD code: vmspace allocator setup. 5420 */ 5421 5422 #ifdef __i386__ 5423 void 5424 uvm_map_setup_md(struct vm_map *map) 5425 { 5426 vaddr_t min, max; 5427 5428 min = map->min_offset; 5429 max = map->max_offset; 5430 5431 /* 5432 * Ensure the selectors will not try to manage page 0; 5433 * it's too special. 5434 */ 5435 if (min < VMMAP_MIN_ADDR) 5436 min = VMMAP_MIN_ADDR; 5437 5438 #if 0 /* Cool stuff, not yet */ 5439 /* Executable code is special. */ 5440 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5441 /* Place normal allocations beyond executable mappings. */ 5442 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5443 #else /* Crappy stuff, for now */ 5444 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5445 #endif 5446 5447 #ifndef SMALL_KERNEL 5448 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5449 #endif /* !SMALL_KERNEL */ 5450 } 5451 #elif __LP64__ 5452 void 5453 uvm_map_setup_md(struct vm_map *map) 5454 { 5455 vaddr_t min, max; 5456 5457 min = map->min_offset; 5458 max = map->max_offset; 5459 5460 /* 5461 * Ensure the selectors will not try to manage page 0; 5462 * it's too special. 5463 */ 5464 if (min < VMMAP_MIN_ADDR) 5465 min = VMMAP_MIN_ADDR; 5466 5467 #if 0 /* Cool stuff, not yet */ 5468 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5469 #else /* Crappy stuff, for now */ 5470 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5471 #endif 5472 5473 #ifndef SMALL_KERNEL 5474 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5475 #endif /* !SMALL_KERNEL */ 5476 } 5477 #else /* non-i386, 32 bit */ 5478 void 5479 uvm_map_setup_md(struct vm_map *map) 5480 { 5481 vaddr_t min, max; 5482 5483 min = map->min_offset; 5484 max = map->max_offset; 5485 5486 /* 5487 * Ensure the selectors will not try to manage page 0; 5488 * it's too special. 5489 */ 5490 if (min < VMMAP_MIN_ADDR) 5491 min = VMMAP_MIN_ADDR; 5492 5493 #if 0 /* Cool stuff, not yet */ 5494 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5495 #else /* Crappy stuff, for now */ 5496 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5497 #endif 5498 5499 #ifndef SMALL_KERNEL 5500 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5501 #endif /* !SMALL_KERNEL */ 5502 } 5503 #endif 5504