1 /* $OpenBSD: uvm_map.c,v 1.225 2016/09/16 02:35:42 dlg Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/mman.h> 90 #include <sys/proc.h> 91 #include <sys/malloc.h> 92 #include <sys/pool.h> 93 #include <sys/sysctl.h> 94 95 #ifdef SYSVSHM 96 #include <sys/shm.h> 97 #endif 98 99 #include <uvm/uvm.h> 100 101 #ifdef DDB 102 #include <uvm/uvm_ddb.h> 103 #endif 104 105 #include <uvm/uvm_addr.h> 106 107 108 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 109 int uvm_mapent_isjoinable(struct vm_map*, 110 struct vm_map_entry*, struct vm_map_entry*); 111 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 112 struct vm_map_entry*, struct uvm_map_deadq*); 113 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 114 struct vm_map_entry*, struct uvm_map_deadq*); 115 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 116 struct vm_map_entry*, vaddr_t, vsize_t, int, 117 struct uvm_map_deadq*, struct vm_map_entry*); 118 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 119 void uvm_mapent_free(struct vm_map_entry*); 120 void uvm_unmap_kill_entry(struct vm_map*, 121 struct vm_map_entry*); 122 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); 123 void uvm_mapent_mkfree(struct vm_map*, 124 struct vm_map_entry*, struct vm_map_entry**, 125 struct uvm_map_deadq*, boolean_t); 126 void uvm_map_pageable_pgon(struct vm_map*, 127 struct vm_map_entry*, struct vm_map_entry*, 128 vaddr_t, vaddr_t); 129 int uvm_map_pageable_wire(struct vm_map*, 130 struct vm_map_entry*, struct vm_map_entry*, 131 vaddr_t, vaddr_t, int); 132 void uvm_map_setup_entries(struct vm_map*); 133 void uvm_map_setup_md(struct vm_map*); 134 void uvm_map_teardown(struct vm_map*); 135 void uvm_map_vmspace_update(struct vm_map*, 136 struct uvm_map_deadq*, int); 137 void uvm_map_kmem_grow(struct vm_map*, 138 struct uvm_map_deadq*, vsize_t, int); 139 void uvm_map_freelist_update_clear(struct vm_map*, 140 struct uvm_map_deadq*); 141 void uvm_map_freelist_update_refill(struct vm_map *, int); 142 void uvm_map_freelist_update(struct vm_map*, 143 struct uvm_map_deadq*, vaddr_t, vaddr_t, 144 vaddr_t, vaddr_t, int); 145 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 146 vaddr_t, vaddr_t, int); 147 int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, 148 struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, 149 int); 150 int uvm_map_findspace(struct vm_map*, 151 struct vm_map_entry**, struct vm_map_entry**, 152 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 153 vaddr_t); 154 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 155 void uvm_map_addr_augment(struct vm_map_entry*); 156 157 /* 158 * Tree management functions. 159 */ 160 161 static __inline void uvm_mapent_copy(struct vm_map_entry*, 162 struct vm_map_entry*); 163 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*, 164 const struct vm_map_entry*); 165 void uvm_mapent_free_insert(struct vm_map*, 166 struct uvm_addr_state*, struct vm_map_entry*); 167 void uvm_mapent_free_remove(struct vm_map*, 168 struct uvm_addr_state*, struct vm_map_entry*); 169 void uvm_mapent_addr_insert(struct vm_map*, 170 struct vm_map_entry*); 171 void uvm_mapent_addr_remove(struct vm_map*, 172 struct vm_map_entry*); 173 void uvm_map_splitentry(struct vm_map*, 174 struct vm_map_entry*, struct vm_map_entry*, 175 vaddr_t); 176 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 177 int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*); 178 179 /* 180 * uvm_vmspace_fork helper functions. 181 */ 182 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 183 vsize_t, vm_prot_t, vm_prot_t, 184 struct vm_map_entry*, struct uvm_map_deadq*, int, 185 int); 186 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t, 187 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*, 188 struct vm_map_entry*, struct uvm_map_deadq*); 189 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 190 struct vm_map*, struct vm_map_entry*, 191 struct uvm_map_deadq*); 192 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 193 struct vm_map*, struct vm_map_entry*, 194 struct uvm_map_deadq*); 195 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 196 struct vm_map*, struct vm_map_entry*, 197 struct uvm_map_deadq*); 198 199 /* 200 * Tree validation. 201 */ 202 #ifdef VMMAP_DEBUG 203 void uvm_tree_assert(struct vm_map*, int, char*, 204 char*, int); 205 #define UVM_ASSERT(map, cond, file, line) \ 206 uvm_tree_assert((map), (cond), #cond, (file), (line)) 207 void uvm_tree_sanity(struct vm_map*, char*, int); 208 void uvm_tree_size_chk(struct vm_map*, char*, int); 209 void vmspace_validate(struct vm_map*); 210 #else 211 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 212 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 213 #define vmspace_validate(_map) do {} while (0) 214 #endif 215 216 /* 217 * All architectures will have pmap_prefer. 218 */ 219 #ifndef PMAP_PREFER 220 #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE 221 #define PMAP_PREFER_OFFSET(off) 0 222 #define PMAP_PREFER(addr, off) (addr) 223 #endif 224 225 226 /* 227 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 228 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 229 * 230 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 231 * each time. 232 */ 233 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 234 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 235 #define VM_MAP_KSIZE_ALLOCMUL 4 236 /* 237 * When selecting a random free-space block, look at most FSPACE_DELTA blocks 238 * ahead. 239 */ 240 #define FSPACE_DELTA 8 241 /* 242 * Put allocations adjecent to previous allocations when the free-space tree 243 * is larger than FSPACE_COMPACT entries. 244 * 245 * Alignment and PMAP_PREFER may still cause the entry to not be fully 246 * adjecent. Note that this strategy reduces memory fragmentation (by leaving 247 * a large space before or after the allocation). 248 */ 249 #define FSPACE_COMPACT 128 250 /* 251 * Make the address selection skip at most this many bytes from the start of 252 * the free space in which the allocation takes place. 253 * 254 * The main idea behind a randomized address space is that an attacker cannot 255 * know where to target his attack. Therefore, the location of objects must be 256 * as random as possible. However, the goal is not to create the most sparse 257 * map that is possible. 258 * FSPACE_MAXOFF pushes the considered range in bytes down to less insane 259 * sizes, thereby reducing the sparseness. The biggest randomization comes 260 * from fragmentation, i.e. FSPACE_COMPACT. 261 */ 262 #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) 263 /* 264 * Allow for small gaps in the overflow areas. 265 * Gap size is in bytes and does not have to be a multiple of page-size. 266 */ 267 #define FSPACE_BIASGAP ((vaddr_t)32 * 1024) 268 269 /* auto-allocate address lower bound */ 270 #define VMMAP_MIN_ADDR PAGE_SIZE 271 272 273 #ifdef DEADBEEF0 274 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0) 275 #else 276 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0) 277 #endif 278 279 #ifdef DEBUG 280 int uvm_map_printlocks = 0; 281 282 #define LPRINTF(_args) \ 283 do { \ 284 if (uvm_map_printlocks) \ 285 printf _args; \ 286 } while (0) 287 #else 288 #define LPRINTF(_args) do {} while (0) 289 #endif 290 291 static struct mutex uvm_kmapent_mtx; 292 static struct timeval uvm_kmapent_last_warn_time; 293 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 294 295 const char vmmapbsy[] = "vmmapbsy"; 296 297 /* 298 * pool for vmspace structures. 299 */ 300 struct pool uvm_vmspace_pool; 301 302 /* 303 * pool for dynamically-allocated map entries. 304 */ 305 struct pool uvm_map_entry_pool; 306 struct pool uvm_map_entry_kmem_pool; 307 308 /* 309 * This global represents the end of the kernel virtual address 310 * space. If we want to exceed this, we must grow the kernel 311 * virtual address space dynamically. 312 * 313 * Note, this variable is locked by kernel_map's lock. 314 */ 315 vaddr_t uvm_maxkaddr; 316 317 /* 318 * Locking predicate. 319 */ 320 #define UVM_MAP_REQ_WRITE(_map) \ 321 do { \ 322 if ((_map)->ref_count > 0) { \ 323 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 324 rw_assert_wrlock(&(_map)->lock); \ 325 else \ 326 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \ 327 } \ 328 } while (0) 329 330 /* 331 * Tree describing entries by address. 332 * 333 * Addresses are unique. 334 * Entries with start == end may only exist if they are the first entry 335 * (sorted by address) within a free-memory tree. 336 */ 337 338 static inline int 339 uvm_mapentry_addrcmp(const struct vm_map_entry *e1, 340 const struct vm_map_entry *e2) 341 { 342 return e1->start < e2->start ? -1 : e1->start > e2->start; 343 } 344 345 /* 346 * Copy mapentry. 347 */ 348 static __inline void 349 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 350 { 351 caddr_t csrc, cdst; 352 size_t sz; 353 354 csrc = (caddr_t)src; 355 cdst = (caddr_t)dst; 356 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 357 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 358 359 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 360 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 361 memcpy(cdst, csrc, sz); 362 } 363 364 /* 365 * Handle free-list insertion. 366 */ 367 void 368 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 369 struct vm_map_entry *entry) 370 { 371 const struct uvm_addr_functions *fun; 372 #ifdef VMMAP_DEBUG 373 vaddr_t min, max, bound; 374 #endif 375 376 #ifdef VMMAP_DEBUG 377 /* 378 * Boundary check. 379 * Boundaries are folded if they go on the same free list. 380 */ 381 min = VMMAP_FREE_START(entry); 382 max = VMMAP_FREE_END(entry); 383 384 while (min < max) { 385 bound = uvm_map_boundary(map, min, max); 386 KASSERT(uvm_map_uaddr(map, min) == uaddr); 387 min = bound; 388 } 389 #endif 390 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 391 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 392 393 UVM_MAP_REQ_WRITE(map); 394 395 /* Actual insert: forward to uaddr pointer. */ 396 if (uaddr != NULL) { 397 fun = uaddr->uaddr_functions; 398 KDASSERT(fun != NULL); 399 if (fun->uaddr_free_insert != NULL) 400 (*fun->uaddr_free_insert)(map, uaddr, entry); 401 entry->etype |= UVM_ET_FREEMAPPED; 402 } 403 404 /* Update fspace augmentation. */ 405 uvm_map_addr_augment(entry); 406 } 407 408 /* 409 * Handle free-list removal. 410 */ 411 void 412 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 413 struct vm_map_entry *entry) 414 { 415 const struct uvm_addr_functions *fun; 416 417 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 418 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 419 UVM_MAP_REQ_WRITE(map); 420 421 if (uaddr != NULL) { 422 fun = uaddr->uaddr_functions; 423 if (fun->uaddr_free_remove != NULL) 424 (*fun->uaddr_free_remove)(map, uaddr, entry); 425 entry->etype &= ~UVM_ET_FREEMAPPED; 426 } 427 } 428 429 /* 430 * Handle address tree insertion. 431 */ 432 void 433 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 434 { 435 struct vm_map_entry *res; 436 437 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF)) 438 panic("uvm_mapent_addr_insert: entry still in addr list"); 439 KDASSERT(entry->start <= entry->end); 440 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 441 (entry->end & (vaddr_t)PAGE_MASK) == 0); 442 443 UVM_MAP_REQ_WRITE(map); 444 res = RBT_INSERT(uvm_map_addr, &map->addr, entry); 445 if (res != NULL) { 446 panic("uvm_mapent_addr_insert: map %p entry %p " 447 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 448 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 449 map, entry, 450 entry->start, entry->end, entry->guard, entry->fspace, 451 res, res->start, res->end, res->guard, res->fspace); 452 } 453 } 454 455 /* 456 * Handle address tree removal. 457 */ 458 void 459 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 460 { 461 struct vm_map_entry *res; 462 463 UVM_MAP_REQ_WRITE(map); 464 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry); 465 if (res != entry) 466 panic("uvm_mapent_addr_remove"); 467 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF); 468 } 469 470 /* 471 * uvm_map_reference: add reference to a map 472 * 473 * XXX check map reference counter lock 474 */ 475 #define uvm_map_reference(_map) \ 476 do { \ 477 map->ref_count++; \ 478 } while (0) 479 480 /* 481 * Calculate the dused delta. 482 */ 483 vsize_t 484 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 485 { 486 struct vmspace *vm; 487 vsize_t sz; 488 vaddr_t lmax; 489 vaddr_t stack_begin, stack_end; /* Position of stack. */ 490 491 KASSERT(map->flags & VM_MAP_ISVMSPACE); 492 vm = (struct vmspace *)map; 493 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 494 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 495 496 sz = 0; 497 while (min != max) { 498 lmax = max; 499 if (min < stack_begin && lmax > stack_begin) 500 lmax = stack_begin; 501 else if (min < stack_end && lmax > stack_end) 502 lmax = stack_end; 503 504 if (min >= stack_begin && min < stack_end) { 505 /* nothing */ 506 } else 507 sz += lmax - min; 508 min = lmax; 509 } 510 511 return sz >> PAGE_SHIFT; 512 } 513 514 /* 515 * Find the entry describing the given address. 516 */ 517 struct vm_map_entry* 518 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 519 { 520 struct vm_map_entry *iter; 521 522 iter = RBT_ROOT(uvm_map_addr, atree); 523 while (iter != NULL) { 524 if (iter->start > addr) 525 iter = RBT_LEFT(uvm_map_addr, iter); 526 else if (VMMAP_FREE_END(iter) <= addr) 527 iter = RBT_RIGHT(uvm_map_addr, iter); 528 else 529 return iter; 530 } 531 return NULL; 532 } 533 534 /* 535 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 536 * 537 * Push dead entries into a linked list. 538 * Since the linked list abuses the address tree for storage, the entry 539 * may not be linked in a map. 540 * 541 * *head must be initialized to NULL before the first call to this macro. 542 * uvm_unmap_detach(*head, 0) will remove dead entries. 543 */ 544 static __inline void 545 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 546 { 547 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 548 } 549 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 550 dead_entry_push((_headptr), (_entry)) 551 552 /* 553 * Helper function for uvm_map_findspace_tree. 554 * 555 * Given allocation constraints and pmap constraints, finds the 556 * lowest and highest address in a range that can be used for the 557 * allocation. 558 * 559 * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. 560 * 561 * 562 * Big chunk of math with a seasoning of dragons. 563 */ 564 int 565 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, 566 struct vm_map_entry *sel, vaddr_t align, 567 vaddr_t pmap_align, vaddr_t pmap_off, int bias) 568 { 569 vaddr_t sel_min, sel_max; 570 #ifdef PMAP_PREFER 571 vaddr_t pmap_min, pmap_max; 572 #endif /* PMAP_PREFER */ 573 #ifdef DIAGNOSTIC 574 int bad; 575 #endif /* DIAGNOSTIC */ 576 577 sel_min = VMMAP_FREE_START(sel); 578 sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); 579 580 #ifdef PMAP_PREFER 581 582 /* 583 * There are two special cases, in which we can satisfy the align 584 * requirement and the pmap_prefer requirement. 585 * - when pmap_off == 0, we always select the largest of the two 586 * - when pmap_off % align == 0 and pmap_align > align, we simply 587 * satisfy the pmap_align requirement and automatically 588 * satisfy the align requirement. 589 */ 590 if (align > PAGE_SIZE && 591 !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { 592 /* 593 * Simple case: only use align. 594 */ 595 sel_min = roundup(sel_min, align); 596 sel_max &= ~(align - 1); 597 598 if (sel_min > sel_max) 599 return ENOMEM; 600 601 /* Correct for bias. */ 602 if (sel_max - sel_min > FSPACE_BIASGAP) { 603 if (bias > 0) { 604 sel_min = sel_max - FSPACE_BIASGAP; 605 sel_min = roundup(sel_min, align); 606 } else if (bias < 0) { 607 sel_max = sel_min + FSPACE_BIASGAP; 608 sel_max &= ~(align - 1); 609 } 610 } 611 } else if (pmap_align != 0) { 612 /* 613 * Special case: satisfy both pmap_prefer and 614 * align argument. 615 */ 616 pmap_max = sel_max & ~(pmap_align - 1); 617 pmap_min = sel_min; 618 if (pmap_max < sel_min) 619 return ENOMEM; 620 621 /* Adjust pmap_min for BIASGAP for top-addr bias. */ 622 if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) 623 pmap_min = pmap_max - FSPACE_BIASGAP; 624 /* Align pmap_min. */ 625 pmap_min &= ~(pmap_align - 1); 626 if (pmap_min < sel_min) 627 pmap_min += pmap_align; 628 if (pmap_min > pmap_max) 629 return ENOMEM; 630 631 /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ 632 if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { 633 pmap_max = (pmap_min + FSPACE_BIASGAP) & 634 ~(pmap_align - 1); 635 } 636 if (pmap_min > pmap_max) 637 return ENOMEM; 638 639 /* Apply pmap prefer offset. */ 640 pmap_max |= pmap_off; 641 if (pmap_max > sel_max) 642 pmap_max -= pmap_align; 643 pmap_min |= pmap_off; 644 if (pmap_min < sel_min) 645 pmap_min += pmap_align; 646 647 /* 648 * Fixup: it's possible that pmap_min and pmap_max 649 * cross eachother. In this case, try to find one 650 * address that is allowed. 651 * (This usually happens in biased case.) 652 */ 653 if (pmap_min > pmap_max) { 654 if (pmap_min < sel_max) 655 pmap_max = pmap_min; 656 else if (pmap_max > sel_min) 657 pmap_min = pmap_max; 658 else 659 return ENOMEM; 660 } 661 662 /* Internal validation. */ 663 KDASSERT(pmap_min <= pmap_max); 664 665 sel_min = pmap_min; 666 sel_max = pmap_max; 667 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 668 sel_min = sel_max - FSPACE_BIASGAP; 669 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 670 sel_max = sel_min + FSPACE_BIASGAP; 671 672 #else 673 674 if (align > PAGE_SIZE) { 675 sel_min = roundup(sel_min, align); 676 sel_max &= ~(align - 1); 677 if (sel_min > sel_max) 678 return ENOMEM; 679 680 if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { 681 if (bias > 0) { 682 sel_min = roundup(sel_max - FSPACE_BIASGAP, 683 align); 684 } else { 685 sel_max = (sel_min + FSPACE_BIASGAP) & 686 ~(align - 1); 687 } 688 } 689 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 690 sel_min = sel_max - FSPACE_BIASGAP; 691 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 692 sel_max = sel_min + FSPACE_BIASGAP; 693 694 #endif 695 696 if (sel_min > sel_max) 697 return ENOMEM; 698 699 #ifdef DIAGNOSTIC 700 bad = 0; 701 /* Lower boundary check. */ 702 if (sel_min < VMMAP_FREE_START(sel)) { 703 printf("sel_min: 0x%lx, but should be at least 0x%lx\n", 704 sel_min, VMMAP_FREE_START(sel)); 705 bad++; 706 } 707 /* Upper boundary check. */ 708 if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { 709 printf("sel_max: 0x%lx, but should be at most 0x%lx\n", 710 sel_max, 711 VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); 712 bad++; 713 } 714 /* Lower boundary alignment. */ 715 if (align != 0 && (sel_min & (align - 1)) != 0) { 716 printf("sel_min: 0x%lx, not aligned to 0x%lx\n", 717 sel_min, align); 718 bad++; 719 } 720 /* Upper boundary alignment. */ 721 if (align != 0 && (sel_max & (align - 1)) != 0) { 722 printf("sel_max: 0x%lx, not aligned to 0x%lx\n", 723 sel_max, align); 724 bad++; 725 } 726 /* Lower boundary PMAP_PREFER check. */ 727 if (pmap_align != 0 && align == 0 && 728 (sel_min & (pmap_align - 1)) != pmap_off) { 729 printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 730 sel_min, sel_min & (pmap_align - 1), pmap_off); 731 bad++; 732 } 733 /* Upper boundary PMAP_PREFER check. */ 734 if (pmap_align != 0 && align == 0 && 735 (sel_max & (pmap_align - 1)) != pmap_off) { 736 printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 737 sel_max, sel_max & (pmap_align - 1), pmap_off); 738 bad++; 739 } 740 741 if (bad) { 742 panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " 743 "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " 744 "bias = %d, " 745 "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", 746 sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, 747 bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel)); 748 } 749 #endif /* DIAGNOSTIC */ 750 751 *min = sel_min; 752 *max = sel_max; 753 return 0; 754 } 755 756 /* 757 * Test if memory starting at addr with sz bytes is free. 758 * 759 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 760 * the space. 761 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 762 */ 763 int 764 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 765 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 766 vaddr_t addr, vsize_t sz) 767 { 768 struct uvm_addr_state *free; 769 struct uvm_map_addr *atree; 770 struct vm_map_entry *i, *i_end; 771 772 if (addr + sz < addr) 773 return 0; 774 775 /* 776 * Kernel memory above uvm_maxkaddr is considered unavailable. 777 */ 778 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 779 if (addr + sz > uvm_maxkaddr) 780 return 0; 781 } 782 783 atree = &map->addr; 784 785 /* 786 * Fill in first, last, so they point at the entries containing the 787 * first and last address of the range. 788 * Note that if they are not NULL, we don't perform the lookup. 789 */ 790 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 791 if (*start_ptr == NULL) { 792 *start_ptr = uvm_map_entrybyaddr(atree, addr); 793 if (*start_ptr == NULL) 794 return 0; 795 } else 796 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 797 if (*end_ptr == NULL) { 798 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 799 *end_ptr = *start_ptr; 800 else { 801 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 802 if (*end_ptr == NULL) 803 return 0; 804 } 805 } else 806 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 807 808 /* Validation. */ 809 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 810 KDASSERT((*start_ptr)->start <= addr && 811 VMMAP_FREE_END(*start_ptr) > addr && 812 (*end_ptr)->start < addr + sz && 813 VMMAP_FREE_END(*end_ptr) >= addr + sz); 814 815 /* 816 * Check the none of the entries intersects with <addr, addr+sz>. 817 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 818 * considered unavailable unless called by those allocators. 819 */ 820 i = *start_ptr; 821 i_end = RBT_NEXT(uvm_map_addr, *end_ptr); 822 for (; i != i_end; 823 i = RBT_NEXT(uvm_map_addr, i)) { 824 if (i->start != i->end && i->end > addr) 825 return 0; 826 827 /* 828 * uaddr_exe and uaddr_brk_stack may only be used 829 * by these allocators and the NULL uaddr (i.e. no 830 * uaddr). 831 * Reject if this requirement is not met. 832 */ 833 if (uaddr != NULL) { 834 free = uvm_map_uaddr_e(map, i); 835 836 if (uaddr != free && free != NULL && 837 (free == map->uaddr_exe || 838 free == map->uaddr_brk_stack)) 839 return 0; 840 } 841 } 842 843 return -1; 844 } 845 846 /* 847 * Invoke each address selector until an address is found. 848 * Will not invoke uaddr_exe. 849 */ 850 int 851 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 852 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 853 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 854 { 855 struct uvm_addr_state *uaddr; 856 int i; 857 858 /* 859 * Allocation for sz bytes at any address, 860 * using the addr selectors in order. 861 */ 862 for (i = 0; i < nitems(map->uaddr_any); i++) { 863 uaddr = map->uaddr_any[i]; 864 865 if (uvm_addr_invoke(map, uaddr, first, last, 866 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 867 return 0; 868 } 869 870 /* Fall back to brk() and stack() address selectors. */ 871 uaddr = map->uaddr_brk_stack; 872 if (uvm_addr_invoke(map, uaddr, first, last, 873 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 874 return 0; 875 876 return ENOMEM; 877 } 878 879 /* Calculate entry augmentation value. */ 880 vsize_t 881 uvm_map_addr_augment_get(struct vm_map_entry *entry) 882 { 883 vsize_t augment; 884 struct vm_map_entry *left, *right; 885 886 augment = entry->fspace; 887 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL) 888 augment = MAX(augment, left->fspace_augment); 889 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 890 augment = MAX(augment, right->fspace_augment); 891 return augment; 892 } 893 894 /* 895 * Update augmentation data in entry. 896 */ 897 void 898 uvm_map_addr_augment(struct vm_map_entry *entry) 899 { 900 vsize_t augment; 901 902 while (entry != NULL) { 903 /* Calculate value for augmentation. */ 904 augment = uvm_map_addr_augment_get(entry); 905 906 /* 907 * Descend update. 908 * Once we find an entry that already has the correct value, 909 * stop, since it means all its parents will use the correct 910 * value too. 911 */ 912 if (entry->fspace_augment == augment) 913 return; 914 entry->fspace_augment = augment; 915 entry = RBT_PARENT(uvm_map_addr, entry); 916 } 917 } 918 919 /* 920 * uvm_mapanon: establish a valid mapping in map for an anon 921 * 922 * => *addr and sz must be a multiple of PAGE_SIZE. 923 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 924 * => map must be unlocked. 925 * 926 * => align: align vaddr, must be a power-of-2. 927 * Align is only a hint and will be ignored if the alignment fails. 928 */ 929 int 930 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, 931 vsize_t align, unsigned int flags) 932 { 933 struct vm_map_entry *first, *last, *entry, *new; 934 struct uvm_map_deadq dead; 935 vm_prot_t prot; 936 vm_prot_t maxprot; 937 vm_inherit_t inherit; 938 int advice; 939 int error; 940 vaddr_t pmap_align, pmap_offset; 941 vaddr_t hint; 942 943 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE); 944 KASSERT(map != kernel_map); 945 KASSERT((map->flags & UVM_FLAG_HOLE) == 0); 946 947 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 948 splassert(IPL_NONE); 949 950 /* 951 * We use pmap_align and pmap_offset as alignment and offset variables. 952 * 953 * Because the align parameter takes precedence over pmap prefer, 954 * the pmap_align will need to be set to align, with pmap_offset = 0, 955 * if pmap_prefer will not align. 956 */ 957 pmap_align = MAX(align, PAGE_SIZE); 958 pmap_offset = 0; 959 960 /* Decode parameters. */ 961 prot = UVM_PROTECTION(flags); 962 maxprot = UVM_MAXPROTECTION(flags); 963 advice = UVM_ADVICE(flags); 964 inherit = UVM_INHERIT(flags); 965 error = 0; 966 hint = trunc_page(*addr); 967 TAILQ_INIT(&dead); 968 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 969 KASSERT((align & (align - 1)) == 0); 970 971 /* Check protection. */ 972 if ((prot & maxprot) != prot) 973 return EACCES; 974 975 /* 976 * Before grabbing the lock, allocate a map entry for later 977 * use to ensure we don't wait for memory while holding the 978 * vm_map_lock. 979 */ 980 new = uvm_mapent_alloc(map, flags); 981 if (new == NULL) 982 return(ENOMEM); 983 984 if (flags & UVM_FLAG_TRYLOCK) { 985 if (vm_map_lock_try(map) == FALSE) { 986 error = EFAULT; 987 goto out; 988 } 989 } else 990 vm_map_lock(map); 991 992 first = last = NULL; 993 if (flags & UVM_FLAG_FIXED) { 994 /* 995 * Fixed location. 996 * 997 * Note: we ignore align, pmap_prefer. 998 * Fill in first, last and *addr. 999 */ 1000 KASSERT((*addr & PAGE_MASK) == 0); 1001 1002 /* Check that the space is available. */ 1003 if (flags & UVM_FLAG_UNMAP) 1004 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1005 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1006 error = ENOMEM; 1007 goto unlock; 1008 } 1009 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1010 (align == 0 || (*addr & (align - 1)) == 0) && 1011 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1012 /* 1013 * Address used as hint. 1014 * 1015 * Note: we enforce the alignment restriction, 1016 * but ignore pmap_prefer. 1017 */ 1018 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1019 /* Run selection algorithm for executables. */ 1020 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1021 addr, sz, pmap_align, pmap_offset, prot, hint); 1022 1023 if (error != 0) 1024 goto unlock; 1025 } else { 1026 /* Update freelists from vmspace. */ 1027 uvm_map_vmspace_update(map, &dead, flags); 1028 1029 error = uvm_map_findspace(map, &first, &last, addr, sz, 1030 pmap_align, pmap_offset, prot, hint); 1031 1032 if (error != 0) 1033 goto unlock; 1034 } 1035 1036 /* Double-check if selected address doesn't cause overflow. */ 1037 if (*addr + sz < *addr) { 1038 error = ENOMEM; 1039 goto unlock; 1040 } 1041 1042 /* If we only want a query, return now. */ 1043 if (flags & UVM_FLAG_QUERY) { 1044 error = 0; 1045 goto unlock; 1046 } 1047 1048 /* 1049 * Create new entry. 1050 * first and last may be invalidated after this call. 1051 */ 1052 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1053 new); 1054 if (entry == NULL) { 1055 error = ENOMEM; 1056 goto unlock; 1057 } 1058 new = NULL; 1059 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1060 entry->object.uvm_obj = NULL; 1061 entry->offset = 0; 1062 entry->protection = prot; 1063 entry->max_protection = maxprot; 1064 entry->inheritance = inherit; 1065 entry->wired_count = 0; 1066 entry->advice = advice; 1067 if (flags & UVM_FLAG_NOFAULT) 1068 entry->etype |= UVM_ET_NOFAULT; 1069 if (flags & UVM_FLAG_COPYONW) { 1070 entry->etype |= UVM_ET_COPYONWRITE; 1071 if ((flags & UVM_FLAG_OVERLAY) == 0) 1072 entry->etype |= UVM_ET_NEEDSCOPY; 1073 } 1074 if (flags & UVM_FLAG_OVERLAY) { 1075 KERNEL_LOCK(); 1076 entry->aref.ar_pageoff = 0; 1077 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1078 KERNEL_UNLOCK(); 1079 } 1080 1081 /* Update map and process statistics. */ 1082 map->size += sz; 1083 ((struct vmspace *)map)->vm_dused += uvmspace_dused(map, *addr, *addr + sz); 1084 1085 unlock: 1086 vm_map_unlock(map); 1087 1088 /* 1089 * Remove dead entries. 1090 * 1091 * Dead entries may be the result of merging. 1092 * uvm_map_mkentry may also create dead entries, when it attempts to 1093 * destroy free-space entries. 1094 */ 1095 uvm_unmap_detach(&dead, 0); 1096 out: 1097 if (new) 1098 uvm_mapent_free(new); 1099 return error; 1100 } 1101 1102 /* 1103 * uvm_map: establish a valid mapping in map 1104 * 1105 * => *addr and sz must be a multiple of PAGE_SIZE. 1106 * => map must be unlocked. 1107 * => <uobj,uoffset> value meanings (4 cases): 1108 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1109 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1110 * [3] <uobj,uoffset> == normal mapping 1111 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1112 * 1113 * case [4] is for kernel mappings where we don't know the offset until 1114 * we've found a virtual address. note that kernel object offsets are 1115 * always relative to vm_map_min(kernel_map). 1116 * 1117 * => align: align vaddr, must be a power-of-2. 1118 * Align is only a hint and will be ignored if the alignment fails. 1119 */ 1120 int 1121 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 1122 struct uvm_object *uobj, voff_t uoffset, 1123 vsize_t align, unsigned int flags) 1124 { 1125 struct vm_map_entry *first, *last, *entry, *new; 1126 struct uvm_map_deadq dead; 1127 vm_prot_t prot; 1128 vm_prot_t maxprot; 1129 vm_inherit_t inherit; 1130 int advice; 1131 int error; 1132 vaddr_t pmap_align, pmap_offset; 1133 vaddr_t hint; 1134 1135 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1136 splassert(IPL_NONE); 1137 else 1138 splassert(IPL_VM); 1139 1140 /* 1141 * We use pmap_align and pmap_offset as alignment and offset variables. 1142 * 1143 * Because the align parameter takes precedence over pmap prefer, 1144 * the pmap_align will need to be set to align, with pmap_offset = 0, 1145 * if pmap_prefer will not align. 1146 */ 1147 if (uoffset == UVM_UNKNOWN_OFFSET) { 1148 pmap_align = MAX(align, PAGE_SIZE); 1149 pmap_offset = 0; 1150 } else { 1151 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 1152 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 1153 1154 if (align == 0 || 1155 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 1156 /* pmap_offset satisfies align, no change. */ 1157 } else { 1158 /* Align takes precedence over pmap prefer. */ 1159 pmap_align = align; 1160 pmap_offset = 0; 1161 } 1162 } 1163 1164 /* Decode parameters. */ 1165 prot = UVM_PROTECTION(flags); 1166 maxprot = UVM_MAXPROTECTION(flags); 1167 advice = UVM_ADVICE(flags); 1168 inherit = UVM_INHERIT(flags); 1169 error = 0; 1170 hint = trunc_page(*addr); 1171 TAILQ_INIT(&dead); 1172 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1173 KASSERT((align & (align - 1)) == 0); 1174 1175 /* Holes are incompatible with other types of mappings. */ 1176 if (flags & UVM_FLAG_HOLE) { 1177 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 1178 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 1179 } 1180 1181 /* Unset hint for kernel_map non-fixed allocations. */ 1182 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1183 hint = 0; 1184 1185 /* Check protection. */ 1186 if ((prot & maxprot) != prot) 1187 return EACCES; 1188 1189 if (map == kernel_map && 1190 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1191 panic("uvm_map: kernel map W^X violation requested"); 1192 1193 /* 1194 * Before grabbing the lock, allocate a map entry for later 1195 * use to ensure we don't wait for memory while holding the 1196 * vm_map_lock. 1197 */ 1198 new = uvm_mapent_alloc(map, flags); 1199 if (new == NULL) 1200 return(ENOMEM); 1201 1202 if (flags & UVM_FLAG_TRYLOCK) { 1203 if (vm_map_lock_try(map) == FALSE) { 1204 error = EFAULT; 1205 goto out; 1206 } 1207 } else { 1208 vm_map_lock(map); 1209 } 1210 1211 first = last = NULL; 1212 if (flags & UVM_FLAG_FIXED) { 1213 /* 1214 * Fixed location. 1215 * 1216 * Note: we ignore align, pmap_prefer. 1217 * Fill in first, last and *addr. 1218 */ 1219 KASSERT((*addr & PAGE_MASK) == 0); 1220 1221 /* 1222 * Grow pmap to include allocated address. 1223 * If the growth fails, the allocation will fail too. 1224 */ 1225 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1226 uvm_maxkaddr < (*addr + sz)) { 1227 uvm_map_kmem_grow(map, &dead, 1228 *addr + sz - uvm_maxkaddr, flags); 1229 } 1230 1231 /* Check that the space is available. */ 1232 if (flags & UVM_FLAG_UNMAP) 1233 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1234 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1235 error = ENOMEM; 1236 goto unlock; 1237 } 1238 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1239 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1240 (align == 0 || (*addr & (align - 1)) == 0) && 1241 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1242 /* 1243 * Address used as hint. 1244 * 1245 * Note: we enforce the alignment restriction, 1246 * but ignore pmap_prefer. 1247 */ 1248 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1249 /* Run selection algorithm for executables. */ 1250 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1251 addr, sz, pmap_align, pmap_offset, prot, hint); 1252 1253 /* Grow kernel memory and try again. */ 1254 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1255 uvm_map_kmem_grow(map, &dead, sz, flags); 1256 1257 error = uvm_addr_invoke(map, map->uaddr_exe, 1258 &first, &last, addr, sz, 1259 pmap_align, pmap_offset, prot, hint); 1260 } 1261 1262 if (error != 0) 1263 goto unlock; 1264 } else { 1265 /* Update freelists from vmspace. */ 1266 if (map->flags & VM_MAP_ISVMSPACE) 1267 uvm_map_vmspace_update(map, &dead, flags); 1268 1269 error = uvm_map_findspace(map, &first, &last, addr, sz, 1270 pmap_align, pmap_offset, prot, hint); 1271 1272 /* Grow kernel memory and try again. */ 1273 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1274 uvm_map_kmem_grow(map, &dead, sz, flags); 1275 1276 error = uvm_map_findspace(map, &first, &last, addr, sz, 1277 pmap_align, pmap_offset, prot, hint); 1278 } 1279 1280 if (error != 0) 1281 goto unlock; 1282 } 1283 1284 /* Double-check if selected address doesn't cause overflow. */ 1285 if (*addr + sz < *addr) { 1286 error = ENOMEM; 1287 goto unlock; 1288 } 1289 1290 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1291 uvm_maxkaddr >= *addr + sz); 1292 1293 /* If we only want a query, return now. */ 1294 if (flags & UVM_FLAG_QUERY) { 1295 error = 0; 1296 goto unlock; 1297 } 1298 1299 if (uobj == NULL) 1300 uoffset = 0; 1301 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1302 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1303 uoffset = *addr - vm_map_min(kernel_map); 1304 } 1305 1306 /* 1307 * Create new entry. 1308 * first and last may be invalidated after this call. 1309 */ 1310 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1311 new); 1312 if (entry == NULL) { 1313 error = ENOMEM; 1314 goto unlock; 1315 } 1316 new = NULL; 1317 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1318 entry->object.uvm_obj = uobj; 1319 entry->offset = uoffset; 1320 entry->protection = prot; 1321 entry->max_protection = maxprot; 1322 entry->inheritance = inherit; 1323 entry->wired_count = 0; 1324 entry->advice = advice; 1325 if (uobj) 1326 entry->etype |= UVM_ET_OBJ; 1327 else if (flags & UVM_FLAG_HOLE) 1328 entry->etype |= UVM_ET_HOLE; 1329 if (flags & UVM_FLAG_NOFAULT) 1330 entry->etype |= UVM_ET_NOFAULT; 1331 if (flags & UVM_FLAG_COPYONW) { 1332 entry->etype |= UVM_ET_COPYONWRITE; 1333 if ((flags & UVM_FLAG_OVERLAY) == 0) 1334 entry->etype |= UVM_ET_NEEDSCOPY; 1335 } 1336 if (flags & UVM_FLAG_OVERLAY) { 1337 entry->aref.ar_pageoff = 0; 1338 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1339 } 1340 1341 /* Update map and process statistics. */ 1342 if (!(flags & UVM_FLAG_HOLE)) { 1343 map->size += sz; 1344 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) { 1345 ((struct vmspace *)map)->vm_dused += 1346 uvmspace_dused(map, *addr, *addr + sz); 1347 } 1348 } 1349 1350 /* 1351 * Try to merge entry. 1352 * 1353 * Userland allocations are kept separated most of the time. 1354 * Forego the effort of merging what most of the time can't be merged 1355 * and only try the merge if it concerns a kernel entry. 1356 */ 1357 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1358 (map->flags & VM_MAP_ISVMSPACE) == 0) 1359 uvm_mapent_tryjoin(map, entry, &dead); 1360 1361 unlock: 1362 vm_map_unlock(map); 1363 1364 /* 1365 * Remove dead entries. 1366 * 1367 * Dead entries may be the result of merging. 1368 * uvm_map_mkentry may also create dead entries, when it attempts to 1369 * destroy free-space entries. 1370 */ 1371 if (map->flags & VM_MAP_INTRSAFE) 1372 uvm_unmap_detach_intrsafe(&dead); 1373 else 1374 uvm_unmap_detach(&dead, 0); 1375 out: 1376 if (new) 1377 uvm_mapent_free(new); 1378 return error; 1379 } 1380 1381 /* 1382 * True iff e1 and e2 can be joined together. 1383 */ 1384 int 1385 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1386 struct vm_map_entry *e2) 1387 { 1388 KDASSERT(e1 != NULL && e2 != NULL); 1389 1390 /* Must be the same entry type and not have free memory between. */ 1391 if (e1->etype != e2->etype || e1->end != e2->start) 1392 return 0; 1393 1394 /* Submaps are never joined. */ 1395 if (UVM_ET_ISSUBMAP(e1)) 1396 return 0; 1397 1398 /* Never merge wired memory. */ 1399 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1400 return 0; 1401 1402 /* Protection, inheritance and advice must be equal. */ 1403 if (e1->protection != e2->protection || 1404 e1->max_protection != e2->max_protection || 1405 e1->inheritance != e2->inheritance || 1406 e1->advice != e2->advice) 1407 return 0; 1408 1409 /* If uvm_object: object itself and offsets within object must match. */ 1410 if (UVM_ET_ISOBJ(e1)) { 1411 if (e1->object.uvm_obj != e2->object.uvm_obj) 1412 return 0; 1413 if (e1->offset + (e1->end - e1->start) != e2->offset) 1414 return 0; 1415 } 1416 1417 /* 1418 * Cannot join shared amaps. 1419 * Note: no need to lock amap to look at refs, since we don't care 1420 * about its exact value. 1421 * If it is 1 (i.e. we have the only reference) it will stay there. 1422 */ 1423 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1424 return 0; 1425 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1426 return 0; 1427 1428 /* Apprently, e1 and e2 match. */ 1429 return 1; 1430 } 1431 1432 /* 1433 * Join support function. 1434 * 1435 * Returns the merged entry on succes. 1436 * Returns NULL if the merge failed. 1437 */ 1438 struct vm_map_entry* 1439 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1440 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1441 { 1442 struct uvm_addr_state *free; 1443 1444 /* 1445 * Merging is not supported for map entries that 1446 * contain an amap in e1. This should never happen 1447 * anyway, because only kernel entries are merged. 1448 * These do not contain amaps. 1449 * e2 contains no real information in its amap, 1450 * so it can be erased immediately. 1451 */ 1452 KASSERT(e1->aref.ar_amap == NULL); 1453 1454 /* 1455 * Don't drop obj reference: 1456 * uvm_unmap_detach will do this for us. 1457 */ 1458 free = uvm_map_uaddr_e(map, e1); 1459 uvm_mapent_free_remove(map, free, e1); 1460 1461 free = uvm_map_uaddr_e(map, e2); 1462 uvm_mapent_free_remove(map, free, e2); 1463 uvm_mapent_addr_remove(map, e2); 1464 e1->end = e2->end; 1465 e1->guard = e2->guard; 1466 e1->fspace = e2->fspace; 1467 uvm_mapent_free_insert(map, free, e1); 1468 1469 DEAD_ENTRY_PUSH(dead, e2); 1470 return e1; 1471 } 1472 1473 /* 1474 * Attempt forward and backward joining of entry. 1475 * 1476 * Returns entry after joins. 1477 * We are guaranteed that the amap of entry is either non-existant or 1478 * has never been used. 1479 */ 1480 struct vm_map_entry* 1481 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1482 struct uvm_map_deadq *dead) 1483 { 1484 struct vm_map_entry *other; 1485 struct vm_map_entry *merged; 1486 1487 /* Merge with previous entry. */ 1488 other = RBT_PREV(uvm_map_addr, entry); 1489 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1490 merged = uvm_mapent_merge(map, other, entry, dead); 1491 if (merged) 1492 entry = merged; 1493 } 1494 1495 /* 1496 * Merge with next entry. 1497 * 1498 * Because amap can only extend forward and the next entry 1499 * probably contains sensible info, only perform forward merging 1500 * in the absence of an amap. 1501 */ 1502 other = RBT_NEXT(uvm_map_addr, entry); 1503 if (other && entry->aref.ar_amap == NULL && 1504 other->aref.ar_amap == NULL && 1505 uvm_mapent_isjoinable(map, entry, other)) { 1506 merged = uvm_mapent_merge(map, entry, other, dead); 1507 if (merged) 1508 entry = merged; 1509 } 1510 1511 return entry; 1512 } 1513 1514 /* 1515 * Kill entries that are no longer in a map. 1516 */ 1517 void 1518 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1519 { 1520 struct vm_map_entry *entry; 1521 int waitok = flags & UVM_PLA_WAITOK; 1522 1523 if (TAILQ_EMPTY(deadq)) 1524 return; 1525 1526 KERNEL_LOCK(); 1527 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1528 if (waitok) 1529 uvm_pause(); 1530 /* Drop reference to amap, if we've got one. */ 1531 if (entry->aref.ar_amap) 1532 amap_unref(entry->aref.ar_amap, 1533 entry->aref.ar_pageoff, 1534 atop(entry->end - entry->start), 1535 flags & AMAP_REFALL); 1536 1537 /* Drop reference to our backing object, if we've got one. */ 1538 if (UVM_ET_ISSUBMAP(entry)) { 1539 /* ... unlikely to happen, but play it safe */ 1540 uvm_map_deallocate(entry->object.sub_map); 1541 } else if (UVM_ET_ISOBJ(entry) && 1542 entry->object.uvm_obj->pgops->pgo_detach) { 1543 entry->object.uvm_obj->pgops->pgo_detach( 1544 entry->object.uvm_obj); 1545 } 1546 1547 /* Step to next. */ 1548 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1549 uvm_mapent_free(entry); 1550 } 1551 KERNEL_UNLOCK(); 1552 } 1553 1554 void 1555 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq) 1556 { 1557 struct vm_map_entry *entry; 1558 1559 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1560 KASSERT(entry->aref.ar_amap == NULL); 1561 KASSERT(!UVM_ET_ISSUBMAP(entry)); 1562 KASSERT(!UVM_ET_ISOBJ(entry)); 1563 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1564 uvm_mapent_free(entry); 1565 } 1566 } 1567 1568 /* 1569 * Create and insert new entry. 1570 * 1571 * Returned entry contains new addresses and is inserted properly in the tree. 1572 * first and last are (probably) no longer valid. 1573 */ 1574 struct vm_map_entry* 1575 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1576 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1577 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1578 { 1579 struct vm_map_entry *entry, *prev; 1580 struct uvm_addr_state *free; 1581 vaddr_t min, max; /* free space boundaries for new entry */ 1582 1583 KDASSERT(map != NULL); 1584 KDASSERT(first != NULL); 1585 KDASSERT(last != NULL); 1586 KDASSERT(dead != NULL); 1587 KDASSERT(sz > 0); 1588 KDASSERT(addr + sz > addr); 1589 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1590 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1591 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1592 uvm_tree_sanity(map, __FILE__, __LINE__); 1593 1594 min = addr + sz; 1595 max = VMMAP_FREE_END(last); 1596 1597 /* Initialize new entry. */ 1598 if (new == NULL) 1599 entry = uvm_mapent_alloc(map, flags); 1600 else 1601 entry = new; 1602 if (entry == NULL) 1603 return NULL; 1604 entry->offset = 0; 1605 entry->etype = 0; 1606 entry->wired_count = 0; 1607 entry->aref.ar_pageoff = 0; 1608 entry->aref.ar_amap = NULL; 1609 1610 entry->start = addr; 1611 entry->end = min; 1612 entry->guard = 0; 1613 entry->fspace = 0; 1614 1615 /* Reset free space in first. */ 1616 free = uvm_map_uaddr_e(map, first); 1617 uvm_mapent_free_remove(map, free, first); 1618 first->guard = 0; 1619 first->fspace = 0; 1620 1621 /* 1622 * Remove all entries that are fully replaced. 1623 * We are iterating using last in reverse order. 1624 */ 1625 for (; first != last; last = prev) { 1626 prev = RBT_PREV(uvm_map_addr, last); 1627 1628 KDASSERT(last->start == last->end); 1629 free = uvm_map_uaddr_e(map, last); 1630 uvm_mapent_free_remove(map, free, last); 1631 uvm_mapent_addr_remove(map, last); 1632 DEAD_ENTRY_PUSH(dead, last); 1633 } 1634 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1635 if (first->start == addr) { 1636 uvm_mapent_addr_remove(map, first); 1637 DEAD_ENTRY_PUSH(dead, first); 1638 } else { 1639 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1640 addr, flags); 1641 } 1642 1643 /* Finally, link in entry. */ 1644 uvm_mapent_addr_insert(map, entry); 1645 uvm_map_fix_space(map, entry, min, max, flags); 1646 1647 uvm_tree_sanity(map, __FILE__, __LINE__); 1648 return entry; 1649 } 1650 1651 1652 /* 1653 * uvm_mapent_alloc: allocate a map entry 1654 */ 1655 struct vm_map_entry * 1656 uvm_mapent_alloc(struct vm_map *map, int flags) 1657 { 1658 struct vm_map_entry *me, *ne; 1659 int pool_flags; 1660 int i; 1661 1662 pool_flags = PR_WAITOK; 1663 if (flags & UVM_FLAG_TRYLOCK) 1664 pool_flags = PR_NOWAIT; 1665 1666 if (map->flags & VM_MAP_INTRSAFE || cold) { 1667 mtx_enter(&uvm_kmapent_mtx); 1668 if (SLIST_EMPTY(&uvm.kentry_free)) { 1669 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1670 &kd_nowait); 1671 if (ne == NULL) 1672 panic("uvm_mapent_alloc: cannot allocate map " 1673 "entry"); 1674 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) { 1675 SLIST_INSERT_HEAD(&uvm.kentry_free, 1676 &ne[i], daddrs.addr_kentry); 1677 } 1678 if (ratecheck(&uvm_kmapent_last_warn_time, 1679 &uvm_kmapent_warn_rate)) 1680 printf("uvm_mapent_alloc: out of static " 1681 "map entries\n"); 1682 } 1683 me = SLIST_FIRST(&uvm.kentry_free); 1684 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry); 1685 uvmexp.kmapent++; 1686 mtx_leave(&uvm_kmapent_mtx); 1687 me->flags = UVM_MAP_STATIC; 1688 } else if (map == kernel_map) { 1689 splassert(IPL_NONE); 1690 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1691 if (me == NULL) 1692 goto out; 1693 me->flags = UVM_MAP_KMEM; 1694 } else { 1695 splassert(IPL_NONE); 1696 me = pool_get(&uvm_map_entry_pool, pool_flags); 1697 if (me == NULL) 1698 goto out; 1699 me->flags = 0; 1700 } 1701 1702 if (me != NULL) { 1703 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF); 1704 } 1705 1706 out: 1707 return(me); 1708 } 1709 1710 /* 1711 * uvm_mapent_free: free map entry 1712 * 1713 * => XXX: static pool for kernel map? 1714 */ 1715 void 1716 uvm_mapent_free(struct vm_map_entry *me) 1717 { 1718 if (me->flags & UVM_MAP_STATIC) { 1719 mtx_enter(&uvm_kmapent_mtx); 1720 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry); 1721 uvmexp.kmapent--; 1722 mtx_leave(&uvm_kmapent_mtx); 1723 } else if (me->flags & UVM_MAP_KMEM) { 1724 splassert(IPL_NONE); 1725 pool_put(&uvm_map_entry_kmem_pool, me); 1726 } else { 1727 splassert(IPL_NONE); 1728 pool_put(&uvm_map_entry_pool, me); 1729 } 1730 } 1731 1732 /* 1733 * uvm_map_lookup_entry: find map entry at or before an address. 1734 * 1735 * => map must at least be read-locked by caller 1736 * => entry is returned in "entry" 1737 * => return value is true if address is in the returned entry 1738 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1739 * returned for those mappings. 1740 */ 1741 boolean_t 1742 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1743 struct vm_map_entry **entry) 1744 { 1745 *entry = uvm_map_entrybyaddr(&map->addr, address); 1746 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1747 (*entry)->start <= address && (*entry)->end > address; 1748 } 1749 1750 /* 1751 * uvm_map_pie: return a random load address for a PIE executable 1752 * properly aligned. 1753 */ 1754 #ifndef VM_PIE_MAX_ADDR 1755 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1756 #endif 1757 1758 #ifndef VM_PIE_MIN_ADDR 1759 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1760 #endif 1761 1762 #ifndef VM_PIE_MIN_ALIGN 1763 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1764 #endif 1765 1766 vaddr_t 1767 uvm_map_pie(vaddr_t align) 1768 { 1769 vaddr_t addr, space, min; 1770 1771 align = MAX(align, VM_PIE_MIN_ALIGN); 1772 1773 /* round up to next alignment */ 1774 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1775 1776 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 1777 return (align); 1778 1779 space = (VM_PIE_MAX_ADDR - min) / align; 1780 space = MIN(space, (u_int32_t)-1); 1781 1782 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 1783 addr += min; 1784 1785 return (addr); 1786 } 1787 1788 void 1789 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 1790 { 1791 struct uvm_map_deadq dead; 1792 1793 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 1794 (end & (vaddr_t)PAGE_MASK) == 0); 1795 TAILQ_INIT(&dead); 1796 vm_map_lock(map); 1797 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); 1798 vm_map_unlock(map); 1799 1800 if (map->flags & VM_MAP_INTRSAFE) 1801 uvm_unmap_detach_intrsafe(&dead); 1802 else 1803 uvm_unmap_detach(&dead, 0); 1804 } 1805 1806 /* 1807 * Mark entry as free. 1808 * 1809 * entry will be put on the dead list. 1810 * The free space will be merged into the previous or a new entry, 1811 * unless markfree is false. 1812 */ 1813 void 1814 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 1815 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 1816 boolean_t markfree) 1817 { 1818 struct uvm_addr_state *free; 1819 struct vm_map_entry *prev; 1820 vaddr_t addr; /* Start of freed range. */ 1821 vaddr_t end; /* End of freed range. */ 1822 1823 prev = *prev_ptr; 1824 if (prev == entry) 1825 *prev_ptr = prev = NULL; 1826 1827 if (prev == NULL || 1828 VMMAP_FREE_END(prev) != entry->start) 1829 prev = RBT_PREV(uvm_map_addr, entry); 1830 1831 /* Entry is describing only free memory and has nothing to drain into. */ 1832 if (prev == NULL && entry->start == entry->end && markfree) { 1833 *prev_ptr = entry; 1834 return; 1835 } 1836 1837 addr = entry->start; 1838 end = VMMAP_FREE_END(entry); 1839 free = uvm_map_uaddr_e(map, entry); 1840 uvm_mapent_free_remove(map, free, entry); 1841 uvm_mapent_addr_remove(map, entry); 1842 DEAD_ENTRY_PUSH(dead, entry); 1843 1844 if (markfree) { 1845 if (prev) { 1846 free = uvm_map_uaddr_e(map, prev); 1847 uvm_mapent_free_remove(map, free, prev); 1848 } 1849 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 1850 } 1851 } 1852 1853 /* 1854 * Unwire and release referenced amap and object from map entry. 1855 */ 1856 void 1857 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 1858 { 1859 /* Unwire removed map entry. */ 1860 if (VM_MAPENT_ISWIRED(entry)) { 1861 KERNEL_LOCK(); 1862 entry->wired_count = 0; 1863 uvm_fault_unwire_locked(map, entry->start, entry->end); 1864 KERNEL_UNLOCK(); 1865 } 1866 1867 /* Entry-type specific code. */ 1868 if (UVM_ET_ISHOLE(entry)) { 1869 /* Nothing to be done for holes. */ 1870 } else if (map->flags & VM_MAP_INTRSAFE) { 1871 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1872 uvm_km_pgremove_intrsafe(entry->start, entry->end); 1873 pmap_kremove(entry->start, entry->end - entry->start); 1874 } else if (UVM_ET_ISOBJ(entry) && 1875 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 1876 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1877 /* 1878 * Note: kernel object mappings are currently used in 1879 * two ways: 1880 * [1] "normal" mappings of pages in the kernel object 1881 * [2] uvm_km_valloc'd allocations in which we 1882 * pmap_enter in some non-kernel-object page 1883 * (e.g. vmapbuf). 1884 * 1885 * for case [1], we need to remove the mapping from 1886 * the pmap and then remove the page from the kernel 1887 * object (because, once pages in a kernel object are 1888 * unmapped they are no longer needed, unlike, say, 1889 * a vnode where you might want the data to persist 1890 * until flushed out of a queue). 1891 * 1892 * for case [2], we need to remove the mapping from 1893 * the pmap. there shouldn't be any pages at the 1894 * specified offset in the kernel object [but it 1895 * doesn't hurt to call uvm_km_pgremove just to be 1896 * safe?] 1897 * 1898 * uvm_km_pgremove currently does the following: 1899 * for pages in the kernel object range: 1900 * - drops the swap slot 1901 * - uvm_pagefree the page 1902 * 1903 * note there is version of uvm_km_pgremove() that 1904 * is used for "intrsafe" objects. 1905 */ 1906 /* 1907 * remove mappings from pmap and drop the pages 1908 * from the object. offsets are always relative 1909 * to vm_map_min(kernel_map). 1910 */ 1911 pmap_remove(pmap_kernel(), entry->start, entry->end); 1912 uvm_km_pgremove(entry->object.uvm_obj, 1913 entry->start - vm_map_min(kernel_map), 1914 entry->end - vm_map_min(kernel_map)); 1915 1916 /* 1917 * null out kernel_object reference, we've just 1918 * dropped it 1919 */ 1920 entry->etype &= ~UVM_ET_OBJ; 1921 entry->object.uvm_obj = NULL; /* to be safe */ 1922 } else { 1923 /* remove mappings the standard way. */ 1924 pmap_remove(map->pmap, entry->start, entry->end); 1925 } 1926 } 1927 1928 /* 1929 * Remove all entries from start to end. 1930 * 1931 * If remove_holes, then remove ET_HOLE entries as well. 1932 * If markfree, entry will be properly marked free, otherwise, no replacement 1933 * entry will be put in the tree (corrupting the tree). 1934 */ 1935 void 1936 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 1937 struct uvm_map_deadq *dead, boolean_t remove_holes, 1938 boolean_t markfree) 1939 { 1940 struct vm_map_entry *prev_hint, *next, *entry; 1941 1942 start = MAX(start, map->min_offset); 1943 end = MIN(end, map->max_offset); 1944 if (start >= end) 1945 return; 1946 1947 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1948 splassert(IPL_NONE); 1949 else 1950 splassert(IPL_VM); 1951 1952 /* Find first affected entry. */ 1953 entry = uvm_map_entrybyaddr(&map->addr, start); 1954 KDASSERT(entry != NULL && entry->start <= start); 1955 if (entry->end <= start && markfree) 1956 entry = RBT_NEXT(uvm_map_addr, entry); 1957 else 1958 UVM_MAP_CLIP_START(map, entry, start); 1959 1960 /* 1961 * Iterate entries until we reach end address. 1962 * prev_hint hints where the freed space can be appended to. 1963 */ 1964 prev_hint = NULL; 1965 for (; entry != NULL && entry->start < end; entry = next) { 1966 KDASSERT(entry->start >= start); 1967 if (entry->end > end || !markfree) 1968 UVM_MAP_CLIP_END(map, entry, end); 1969 KDASSERT(entry->start >= start && entry->end <= end); 1970 next = RBT_NEXT(uvm_map_addr, entry); 1971 1972 /* Don't remove holes unless asked to do so. */ 1973 if (UVM_ET_ISHOLE(entry)) { 1974 if (!remove_holes) { 1975 prev_hint = entry; 1976 continue; 1977 } 1978 } 1979 1980 /* Kill entry. */ 1981 uvm_unmap_kill_entry(map, entry); 1982 1983 /* Update space usage. */ 1984 if ((map->flags & VM_MAP_ISVMSPACE) && 1985 entry->object.uvm_obj == NULL && 1986 !UVM_ET_ISHOLE(entry)) { 1987 ((struct vmspace *)map)->vm_dused -= 1988 uvmspace_dused(map, entry->start, entry->end); 1989 } 1990 if (!UVM_ET_ISHOLE(entry)) 1991 map->size -= entry->end - entry->start; 1992 1993 /* Actual removal of entry. */ 1994 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 1995 } 1996 1997 pmap_update(vm_map_pmap(map)); 1998 1999 #ifdef VMMAP_DEBUG 2000 if (markfree) { 2001 for (entry = uvm_map_entrybyaddr(&map->addr, start); 2002 entry != NULL && entry->start < end; 2003 entry = RBT_NEXT(uvm_map_addr, entry)) { 2004 KDASSERT(entry->end <= start || 2005 entry->start == entry->end || 2006 UVM_ET_ISHOLE(entry)); 2007 } 2008 } else { 2009 vaddr_t a; 2010 for (a = start; a < end; a += PAGE_SIZE) 2011 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 2012 } 2013 #endif 2014 } 2015 2016 /* 2017 * Mark all entries from first until end (exclusive) as pageable. 2018 * 2019 * Lock must be exclusive on entry and will not be touched. 2020 */ 2021 void 2022 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 2023 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 2024 { 2025 struct vm_map_entry *iter; 2026 2027 for (iter = first; iter != end; 2028 iter = RBT_NEXT(uvm_map_addr, iter)) { 2029 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2030 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2031 continue; 2032 2033 iter->wired_count = 0; 2034 uvm_fault_unwire_locked(map, iter->start, iter->end); 2035 } 2036 } 2037 2038 /* 2039 * Mark all entries from first until end (exclusive) as wired. 2040 * 2041 * Lockflags determines the lock state on return from this function. 2042 * Lock must be exclusive on entry. 2043 */ 2044 int 2045 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 2046 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 2047 int lockflags) 2048 { 2049 struct vm_map_entry *iter; 2050 #ifdef DIAGNOSTIC 2051 unsigned int timestamp_save; 2052 #endif 2053 int error; 2054 2055 /* 2056 * Wire pages in two passes: 2057 * 2058 * 1: holding the write lock, we create any anonymous maps that need 2059 * to be created. then we clip each map entry to the region to 2060 * be wired and increment its wiring count. 2061 * 2062 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2063 * in the pages for any newly wired area (wired_count == 1). 2064 * 2065 * downgrading to a read lock for uvm_fault_wire avoids a possible 2066 * deadlock with another thread that may have faulted on one of 2067 * the pages to be wired (it would mark the page busy, blocking 2068 * us, then in turn block on the map lock that we hold). 2069 * because we keep the read lock on the map, the copy-on-write 2070 * status of the entries we modify here cannot change. 2071 */ 2072 for (iter = first; iter != end; 2073 iter = RBT_NEXT(uvm_map_addr, iter)) { 2074 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2075 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2076 iter->protection == PROT_NONE) 2077 continue; 2078 2079 /* 2080 * Perform actions of vm_map_lookup that need the write lock. 2081 * - create an anonymous map for copy-on-write 2082 * - anonymous map for zero-fill 2083 * Skip submaps. 2084 */ 2085 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 2086 UVM_ET_ISNEEDSCOPY(iter) && 2087 ((iter->protection & PROT_WRITE) || 2088 iter->object.uvm_obj == NULL)) { 2089 amap_copy(map, iter, M_WAITOK, TRUE, 2090 iter->start, iter->end); 2091 } 2092 iter->wired_count++; 2093 } 2094 2095 /* 2096 * Pass 2. 2097 */ 2098 #ifdef DIAGNOSTIC 2099 timestamp_save = map->timestamp; 2100 #endif 2101 vm_map_busy(map); 2102 vm_map_downgrade(map); 2103 2104 error = 0; 2105 for (iter = first; error == 0 && iter != end; 2106 iter = RBT_NEXT(uvm_map_addr, iter)) { 2107 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2108 iter->protection == PROT_NONE) 2109 continue; 2110 2111 error = uvm_fault_wire(map, iter->start, iter->end, 2112 iter->protection); 2113 } 2114 2115 if (error) { 2116 /* 2117 * uvm_fault_wire failure 2118 * 2119 * Reacquire lock and undo our work. 2120 */ 2121 vm_map_upgrade(map); 2122 vm_map_unbusy(map); 2123 #ifdef DIAGNOSTIC 2124 if (timestamp_save != map->timestamp) 2125 panic("uvm_map_pageable_wire: stale map"); 2126 #endif 2127 2128 /* 2129 * first is no longer needed to restart loops. 2130 * Use it as iterator to unmap successful mappings. 2131 */ 2132 for (; first != iter; 2133 first = RBT_NEXT(uvm_map_addr, first)) { 2134 if (UVM_ET_ISHOLE(first) || 2135 first->start == first->end || 2136 first->protection == PROT_NONE) 2137 continue; 2138 2139 first->wired_count--; 2140 if (!VM_MAPENT_ISWIRED(first)) { 2141 uvm_fault_unwire_locked(map, 2142 iter->start, iter->end); 2143 } 2144 } 2145 2146 /* decrease counter in the rest of the entries */ 2147 for (; iter != end; 2148 iter = RBT_NEXT(uvm_map_addr, iter)) { 2149 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2150 iter->protection == PROT_NONE) 2151 continue; 2152 2153 iter->wired_count--; 2154 } 2155 2156 if ((lockflags & UVM_LK_EXIT) == 0) 2157 vm_map_unlock(map); 2158 return error; 2159 } 2160 2161 /* We are currently holding a read lock. */ 2162 if ((lockflags & UVM_LK_EXIT) == 0) { 2163 vm_map_unbusy(map); 2164 vm_map_unlock_read(map); 2165 } else { 2166 vm_map_upgrade(map); 2167 vm_map_unbusy(map); 2168 #ifdef DIAGNOSTIC 2169 if (timestamp_save != map->timestamp) 2170 panic("uvm_map_pageable_wire: stale map"); 2171 #endif 2172 } 2173 return 0; 2174 } 2175 2176 /* 2177 * uvm_map_pageable: set pageability of a range in a map. 2178 * 2179 * Flags: 2180 * UVM_LK_ENTER: map is already locked by caller 2181 * UVM_LK_EXIT: don't unlock map on exit 2182 * 2183 * The full range must be in use (entries may not have fspace != 0). 2184 * UVM_ET_HOLE counts as unmapped. 2185 */ 2186 int 2187 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2188 boolean_t new_pageable, int lockflags) 2189 { 2190 struct vm_map_entry *first, *last, *tmp; 2191 int error; 2192 2193 start = trunc_page(start); 2194 end = round_page(end); 2195 2196 if (start > end) 2197 return EINVAL; 2198 if (start == end) 2199 return 0; /* nothing to do */ 2200 if (start < map->min_offset) 2201 return EFAULT; /* why? see first XXX below */ 2202 if (end > map->max_offset) 2203 return EINVAL; /* why? see second XXX below */ 2204 2205 KASSERT(map->flags & VM_MAP_PAGEABLE); 2206 if ((lockflags & UVM_LK_ENTER) == 0) 2207 vm_map_lock(map); 2208 2209 /* 2210 * Find first entry. 2211 * 2212 * Initial test on start is different, because of the different 2213 * error returned. Rest is tested further down. 2214 */ 2215 first = uvm_map_entrybyaddr(&map->addr, start); 2216 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2217 /* 2218 * XXX if the first address is not mapped, it is EFAULT? 2219 */ 2220 error = EFAULT; 2221 goto out; 2222 } 2223 2224 /* Check that the range has no holes. */ 2225 for (last = first; last != NULL && last->start < end; 2226 last = RBT_NEXT(uvm_map_addr, last)) { 2227 if (UVM_ET_ISHOLE(last) || 2228 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2229 /* 2230 * XXX unmapped memory in range, why is it EINVAL 2231 * instead of EFAULT? 2232 */ 2233 error = EINVAL; 2234 goto out; 2235 } 2236 } 2237 2238 /* 2239 * Last ended at the first entry after the range. 2240 * Move back one step. 2241 * 2242 * Note that last may be NULL. 2243 */ 2244 if (last == NULL) { 2245 last = RBT_MAX(uvm_map_addr, &map->addr); 2246 if (last->end < end) { 2247 error = EINVAL; 2248 goto out; 2249 } 2250 } else { 2251 KASSERT(last != first); 2252 last = RBT_PREV(uvm_map_addr, last); 2253 } 2254 2255 /* Wire/unwire pages here. */ 2256 if (new_pageable) { 2257 /* 2258 * Mark pageable. 2259 * entries that are not wired are untouched. 2260 */ 2261 if (VM_MAPENT_ISWIRED(first)) 2262 UVM_MAP_CLIP_START(map, first, start); 2263 /* 2264 * Split last at end. 2265 * Make tmp be the first entry after what is to be touched. 2266 * If last is not wired, don't touch it. 2267 */ 2268 if (VM_MAPENT_ISWIRED(last)) { 2269 UVM_MAP_CLIP_END(map, last, end); 2270 tmp = RBT_NEXT(uvm_map_addr, last); 2271 } else 2272 tmp = last; 2273 2274 uvm_map_pageable_pgon(map, first, tmp, start, end); 2275 error = 0; 2276 2277 out: 2278 if ((lockflags & UVM_LK_EXIT) == 0) 2279 vm_map_unlock(map); 2280 return error; 2281 } else { 2282 /* 2283 * Mark entries wired. 2284 * entries are always touched (because recovery needs this). 2285 */ 2286 if (!VM_MAPENT_ISWIRED(first)) 2287 UVM_MAP_CLIP_START(map, first, start); 2288 /* 2289 * Split last at end. 2290 * Make tmp be the first entry after what is to be touched. 2291 * If last is not wired, don't touch it. 2292 */ 2293 if (!VM_MAPENT_ISWIRED(last)) { 2294 UVM_MAP_CLIP_END(map, last, end); 2295 tmp = RBT_NEXT(uvm_map_addr, last); 2296 } else 2297 tmp = last; 2298 2299 return uvm_map_pageable_wire(map, first, tmp, start, end, 2300 lockflags); 2301 } 2302 } 2303 2304 /* 2305 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2306 * all mapped regions. 2307 * 2308 * Map must not be locked. 2309 * If no flags are specified, all ragions are unwired. 2310 */ 2311 int 2312 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2313 { 2314 vsize_t size; 2315 struct vm_map_entry *iter; 2316 2317 KASSERT(map->flags & VM_MAP_PAGEABLE); 2318 vm_map_lock(map); 2319 2320 if (flags == 0) { 2321 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr), 2322 NULL, map->min_offset, map->max_offset); 2323 2324 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2325 vm_map_unlock(map); 2326 return 0; 2327 } 2328 2329 if (flags & MCL_FUTURE) 2330 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2331 if (!(flags & MCL_CURRENT)) { 2332 vm_map_unlock(map); 2333 return 0; 2334 } 2335 2336 /* 2337 * Count number of pages in all non-wired entries. 2338 * If the number exceeds the limit, abort. 2339 */ 2340 size = 0; 2341 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2342 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2343 continue; 2344 2345 size += iter->end - iter->start; 2346 } 2347 2348 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2349 vm_map_unlock(map); 2350 return ENOMEM; 2351 } 2352 2353 /* XXX non-pmap_wired_count case must be handled by caller */ 2354 #ifdef pmap_wired_count 2355 if (limit != 0 && 2356 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2357 vm_map_unlock(map); 2358 return ENOMEM; 2359 } 2360 #endif 2361 2362 /* 2363 * uvm_map_pageable_wire will release lcok 2364 */ 2365 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr), 2366 NULL, map->min_offset, map->max_offset, 0); 2367 } 2368 2369 /* 2370 * Initialize map. 2371 * 2372 * Allocates sufficient entries to describe the free memory in the map. 2373 */ 2374 void 2375 uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags) 2376 { 2377 int i; 2378 2379 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2380 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2381 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2382 2383 /* 2384 * Update parameters. 2385 * 2386 * This code handles (vaddr_t)-1 and other page mask ending addresses 2387 * properly. 2388 * We lose the top page if the full virtual address space is used. 2389 */ 2390 if (max & (vaddr_t)PAGE_MASK) { 2391 max += 1; 2392 if (max == 0) /* overflow */ 2393 max -= PAGE_SIZE; 2394 } 2395 2396 RBT_INIT(uvm_map_addr, &map->addr); 2397 map->uaddr_exe = NULL; 2398 for (i = 0; i < nitems(map->uaddr_any); ++i) 2399 map->uaddr_any[i] = NULL; 2400 map->uaddr_brk_stack = NULL; 2401 2402 map->size = 0; 2403 map->ref_count = 0; 2404 map->min_offset = min; 2405 map->max_offset = max; 2406 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2407 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2408 map->flags = flags; 2409 map->timestamp = 0; 2410 rw_init(&map->lock, "vmmaplk"); 2411 mtx_init(&map->mtx, IPL_VM); 2412 mtx_init(&map->flags_lock, IPL_VM); 2413 2414 /* Configure the allocators. */ 2415 if (flags & VM_MAP_ISVMSPACE) 2416 uvm_map_setup_md(map); 2417 else 2418 map->uaddr_any[3] = &uaddr_kbootstrap; 2419 2420 /* 2421 * Fill map entries. 2422 * We do not need to write-lock the map here because only the current 2423 * thread sees it right now. Initialize ref_count to 0 above to avoid 2424 * bogus triggering of lock-not-held assertions. 2425 */ 2426 uvm_map_setup_entries(map); 2427 uvm_tree_sanity(map, __FILE__, __LINE__); 2428 map->ref_count = 1; 2429 } 2430 2431 /* 2432 * Destroy the map. 2433 * 2434 * This is the inverse operation to uvm_map_setup. 2435 */ 2436 void 2437 uvm_map_teardown(struct vm_map *map) 2438 { 2439 struct uvm_map_deadq dead_entries; 2440 struct vm_map_entry *entry, *tmp; 2441 #ifdef VMMAP_DEBUG 2442 size_t numq, numt; 2443 #endif 2444 int i; 2445 2446 KERNEL_ASSERT_LOCKED(); 2447 KERNEL_UNLOCK(); 2448 KERNEL_ASSERT_UNLOCKED(); 2449 2450 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2451 2452 /* Remove address selectors. */ 2453 uvm_addr_destroy(map->uaddr_exe); 2454 map->uaddr_exe = NULL; 2455 for (i = 0; i < nitems(map->uaddr_any); i++) { 2456 uvm_addr_destroy(map->uaddr_any[i]); 2457 map->uaddr_any[i] = NULL; 2458 } 2459 uvm_addr_destroy(map->uaddr_brk_stack); 2460 map->uaddr_brk_stack = NULL; 2461 2462 /* 2463 * Remove entries. 2464 * 2465 * The following is based on graph breadth-first search. 2466 * 2467 * In color terms: 2468 * - the dead_entries set contains all nodes that are reachable 2469 * (i.e. both the black and the grey nodes) 2470 * - any entry not in dead_entries is white 2471 * - any entry that appears in dead_entries before entry, 2472 * is black, the rest is grey. 2473 * The set [entry, end] is also referred to as the wavefront. 2474 * 2475 * Since the tree is always a fully connected graph, the breadth-first 2476 * search guarantees that each vmmap_entry is visited exactly once. 2477 * The vm_map is broken down in linear time. 2478 */ 2479 TAILQ_INIT(&dead_entries); 2480 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL) 2481 DEAD_ENTRY_PUSH(&dead_entries, entry); 2482 while (entry != NULL) { 2483 sched_pause(); 2484 uvm_unmap_kill_entry(map, entry); 2485 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL) 2486 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2487 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 2488 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2489 /* Update wave-front. */ 2490 entry = TAILQ_NEXT(entry, dfree.deadq); 2491 } 2492 2493 #ifdef VMMAP_DEBUG 2494 numt = numq = 0; 2495 RBT_FOREACH(entry, uvm_map_addr, &map->addr) 2496 numt++; 2497 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2498 numq++; 2499 KASSERT(numt == numq); 2500 #endif 2501 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); 2502 2503 KERNEL_LOCK(); 2504 2505 pmap_destroy(map->pmap); 2506 map->pmap = NULL; 2507 } 2508 2509 /* 2510 * Populate map with free-memory entries. 2511 * 2512 * Map must be initialized and empty. 2513 */ 2514 void 2515 uvm_map_setup_entries(struct vm_map *map) 2516 { 2517 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 2518 2519 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2520 } 2521 2522 /* 2523 * Split entry at given address. 2524 * 2525 * orig: entry that is to be split. 2526 * next: a newly allocated map entry that is not linked. 2527 * split: address at which the split is done. 2528 */ 2529 void 2530 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2531 struct vm_map_entry *next, vaddr_t split) 2532 { 2533 struct uvm_addr_state *free, *free_before; 2534 vsize_t adj; 2535 2536 if ((split & PAGE_MASK) != 0) { 2537 panic("uvm_map_splitentry: split address 0x%lx " 2538 "not on page boundary!", split); 2539 } 2540 KDASSERT(map != NULL && orig != NULL && next != NULL); 2541 uvm_tree_sanity(map, __FILE__, __LINE__); 2542 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2543 2544 #ifdef VMMAP_DEBUG 2545 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig); 2546 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next); 2547 #endif /* VMMAP_DEBUG */ 2548 2549 /* 2550 * Free space will change, unlink from free space tree. 2551 */ 2552 free = uvm_map_uaddr_e(map, orig); 2553 uvm_mapent_free_remove(map, free, orig); 2554 2555 adj = split - orig->start; 2556 2557 uvm_mapent_copy(orig, next); 2558 if (split >= orig->end) { 2559 next->etype = 0; 2560 next->offset = 0; 2561 next->wired_count = 0; 2562 next->start = next->end = split; 2563 next->guard = 0; 2564 next->fspace = VMMAP_FREE_END(orig) - split; 2565 next->aref.ar_amap = NULL; 2566 next->aref.ar_pageoff = 0; 2567 orig->guard = MIN(orig->guard, split - orig->end); 2568 orig->fspace = split - VMMAP_FREE_START(orig); 2569 } else { 2570 orig->fspace = 0; 2571 orig->guard = 0; 2572 orig->end = next->start = split; 2573 2574 if (next->aref.ar_amap) { 2575 KERNEL_LOCK(); 2576 amap_splitref(&orig->aref, &next->aref, adj); 2577 KERNEL_UNLOCK(); 2578 } 2579 if (UVM_ET_ISSUBMAP(orig)) { 2580 uvm_map_reference(next->object.sub_map); 2581 next->offset += adj; 2582 } else if (UVM_ET_ISOBJ(orig)) { 2583 if (next->object.uvm_obj->pgops && 2584 next->object.uvm_obj->pgops->pgo_reference) { 2585 KERNEL_LOCK(); 2586 next->object.uvm_obj->pgops->pgo_reference( 2587 next->object.uvm_obj); 2588 KERNEL_UNLOCK(); 2589 } 2590 next->offset += adj; 2591 } 2592 } 2593 2594 /* 2595 * Link next into address tree. 2596 * Link orig and next into free-space tree. 2597 * 2598 * Don't insert 'next' into the addr tree until orig has been linked, 2599 * in case the free-list looks at adjecent entries in the addr tree 2600 * for its decisions. 2601 */ 2602 if (orig->fspace > 0) 2603 free_before = free; 2604 else 2605 free_before = uvm_map_uaddr_e(map, orig); 2606 uvm_mapent_free_insert(map, free_before, orig); 2607 uvm_mapent_addr_insert(map, next); 2608 uvm_mapent_free_insert(map, free, next); 2609 2610 uvm_tree_sanity(map, __FILE__, __LINE__); 2611 } 2612 2613 2614 #ifdef VMMAP_DEBUG 2615 2616 void 2617 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2618 char *file, int line) 2619 { 2620 char* map_special; 2621 2622 if (test) 2623 return; 2624 2625 if (map == kernel_map) 2626 map_special = " (kernel_map)"; 2627 else if (map == kmem_map) 2628 map_special = " (kmem_map)"; 2629 else 2630 map_special = ""; 2631 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2632 line, test_str); 2633 } 2634 2635 /* 2636 * Check that map is sane. 2637 */ 2638 void 2639 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2640 { 2641 struct vm_map_entry *iter; 2642 vaddr_t addr; 2643 vaddr_t min, max, bound; /* Bounds checker. */ 2644 struct uvm_addr_state *free; 2645 2646 addr = vm_map_min(map); 2647 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2648 /* 2649 * Valid start, end. 2650 * Catch overflow for end+fspace. 2651 */ 2652 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2653 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2654 2655 /* May not be empty. */ 2656 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2657 file, line); 2658 2659 /* Addresses for entry must lie within map boundaries. */ 2660 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2661 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2662 2663 /* Tree may not have gaps. */ 2664 UVM_ASSERT(map, iter->start == addr, file, line); 2665 addr = VMMAP_FREE_END(iter); 2666 2667 /* 2668 * Free space may not cross boundaries, unless the same 2669 * free list is used on both sides of the border. 2670 */ 2671 min = VMMAP_FREE_START(iter); 2672 max = VMMAP_FREE_END(iter); 2673 2674 while (min < max && 2675 (bound = uvm_map_boundary(map, min, max)) != max) { 2676 UVM_ASSERT(map, 2677 uvm_map_uaddr(map, bound - 1) == 2678 uvm_map_uaddr(map, bound), 2679 file, line); 2680 min = bound; 2681 } 2682 2683 free = uvm_map_uaddr_e(map, iter); 2684 if (free) { 2685 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2686 file, line); 2687 } else { 2688 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2689 file, line); 2690 } 2691 } 2692 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2693 } 2694 2695 void 2696 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2697 { 2698 struct vm_map_entry *iter; 2699 vsize_t size; 2700 2701 size = 0; 2702 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2703 if (!UVM_ET_ISHOLE(iter)) 2704 size += iter->end - iter->start; 2705 } 2706 2707 if (map->size != size) 2708 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2709 UVM_ASSERT(map, map->size == size, file, line); 2710 2711 vmspace_validate(map); 2712 } 2713 2714 /* 2715 * This function validates the statistics on vmspace. 2716 */ 2717 void 2718 vmspace_validate(struct vm_map *map) 2719 { 2720 struct vmspace *vm; 2721 struct vm_map_entry *iter; 2722 vaddr_t imin, imax; 2723 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2724 vsize_t stack, heap; /* Measured sizes. */ 2725 2726 if (!(map->flags & VM_MAP_ISVMSPACE)) 2727 return; 2728 2729 vm = (struct vmspace *)map; 2730 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2731 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2732 2733 stack = heap = 0; 2734 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2735 imin = imax = iter->start; 2736 2737 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL) 2738 continue; 2739 2740 /* 2741 * Update stack, heap. 2742 * Keep in mind that (theoretically) the entries of 2743 * userspace and stack may be joined. 2744 */ 2745 while (imin != iter->end) { 2746 /* 2747 * Set imax to the first boundary crossed between 2748 * imin and stack addresses. 2749 */ 2750 imax = iter->end; 2751 if (imin < stack_begin && imax > stack_begin) 2752 imax = stack_begin; 2753 else if (imin < stack_end && imax > stack_end) 2754 imax = stack_end; 2755 2756 if (imin >= stack_begin && imin < stack_end) 2757 stack += imax - imin; 2758 else 2759 heap += imax - imin; 2760 imin = imax; 2761 } 2762 } 2763 2764 heap >>= PAGE_SHIFT; 2765 if (heap != vm->vm_dused) { 2766 printf("vmspace stack range: 0x%lx-0x%lx\n", 2767 stack_begin, stack_end); 2768 panic("vmspace_validate: vmspace.vm_dused invalid, " 2769 "expected %ld pgs, got %ld pgs in map %p", 2770 heap, vm->vm_dused, 2771 map); 2772 } 2773 } 2774 2775 #endif /* VMMAP_DEBUG */ 2776 2777 /* 2778 * uvm_map_init: init mapping system at boot time. note that we allocate 2779 * and init the static pool of structs vm_map_entry for the kernel here. 2780 */ 2781 void 2782 uvm_map_init(void) 2783 { 2784 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 2785 int lcv; 2786 2787 /* now set up static pool of kernel map entries ... */ 2788 mtx_init(&uvm_kmapent_mtx, IPL_VM); 2789 SLIST_INIT(&uvm.kentry_free); 2790 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 2791 SLIST_INSERT_HEAD(&uvm.kentry_free, 2792 &kernel_map_entry[lcv], daddrs.addr_kentry); 2793 } 2794 2795 /* initialize the map-related pools. */ 2796 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0, 2797 IPL_NONE, PR_WAITOK, "vmsppl", NULL); 2798 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 2799 IPL_VM, PR_WAITOK, "vmmpepl", NULL); 2800 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0, 2801 IPL_NONE, 0, "vmmpekpl", NULL); 2802 pool_sethiwat(&uvm_map_entry_pool, 8192); 2803 2804 uvm_addr_init(); 2805 } 2806 2807 #if defined(DDB) 2808 2809 /* 2810 * DDB hooks 2811 */ 2812 2813 /* 2814 * uvm_map_printit: actually prints the map 2815 */ 2816 void 2817 uvm_map_printit(struct vm_map *map, boolean_t full, 2818 int (*pr)(const char *, ...)) 2819 { 2820 struct vmspace *vm; 2821 struct vm_map_entry *entry; 2822 struct uvm_addr_state *free; 2823 int in_free, i; 2824 char buf[8]; 2825 2826 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 2827 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 2828 map->b_start, map->b_end); 2829 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 2830 map->s_start, map->s_end); 2831 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 2832 map->size, map->ref_count, map->timestamp, 2833 map->flags); 2834 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 2835 pmap_resident_count(map->pmap)); 2836 2837 /* struct vmspace handling. */ 2838 if (map->flags & VM_MAP_ISVMSPACE) { 2839 vm = (struct vmspace *)map; 2840 2841 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 2842 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 2843 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 2844 vm->vm_tsize, vm->vm_dsize); 2845 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 2846 vm->vm_taddr, vm->vm_daddr); 2847 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 2848 vm->vm_maxsaddr, vm->vm_minsaddr); 2849 } 2850 2851 if (!full) 2852 goto print_uaddr; 2853 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 2854 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 2855 entry, entry->start, entry->end, entry->object.uvm_obj, 2856 (long long)entry->offset, entry->aref.ar_amap, 2857 entry->aref.ar_pageoff); 2858 (*pr)("\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 2859 "wc=%d, adv=%d\n", 2860 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 2861 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 2862 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 2863 entry->protection, entry->max_protection, 2864 entry->inheritance, entry->wired_count, entry->advice); 2865 2866 free = uvm_map_uaddr_e(map, entry); 2867 in_free = (free != NULL); 2868 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 2869 "free=0x%lx-0x%lx\n", 2870 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 2871 in_free ? 'T' : 'F', 2872 entry->guard, 2873 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 2874 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 2875 (*pr)("\tfreemapped=%c, uaddr=%p\n", 2876 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 2877 if (free) { 2878 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 2879 free->uaddr_minaddr, free->uaddr_maxaddr, 2880 free->uaddr_functions->uaddr_name); 2881 } 2882 } 2883 2884 print_uaddr: 2885 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 2886 for (i = 0; i < nitems(map->uaddr_any); i++) { 2887 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 2888 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 2889 } 2890 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 2891 } 2892 2893 /* 2894 * uvm_object_printit: actually prints the object 2895 */ 2896 void 2897 uvm_object_printit(uobj, full, pr) 2898 struct uvm_object *uobj; 2899 boolean_t full; 2900 int (*pr)(const char *, ...); 2901 { 2902 struct vm_page *pg; 2903 int cnt = 0; 2904 2905 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 2906 uobj, uobj->pgops, uobj->uo_npages); 2907 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 2908 (*pr)("refs=<SYSTEM>\n"); 2909 else 2910 (*pr)("refs=%d\n", uobj->uo_refs); 2911 2912 if (!full) { 2913 return; 2914 } 2915 (*pr)(" PAGES <pg,offset>:\n "); 2916 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) { 2917 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 2918 if ((cnt % 3) == 2) { 2919 (*pr)("\n "); 2920 } 2921 cnt++; 2922 } 2923 if ((cnt % 3) != 2) { 2924 (*pr)("\n"); 2925 } 2926 } 2927 2928 /* 2929 * uvm_page_printit: actually print the page 2930 */ 2931 static const char page_flagbits[] = 2932 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 2933 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ" 2934 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5"; 2935 2936 void 2937 uvm_page_printit(pg, full, pr) 2938 struct vm_page *pg; 2939 boolean_t full; 2940 int (*pr)(const char *, ...); 2941 { 2942 struct vm_page *tpg; 2943 struct uvm_object *uobj; 2944 struct pglist *pgl; 2945 2946 (*pr)("PAGE %p:\n", pg); 2947 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 2948 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 2949 (long long)pg->phys_addr); 2950 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 2951 pg->uobject, pg->uanon, (long long)pg->offset); 2952 #if defined(UVM_PAGE_TRKOWN) 2953 if (pg->pg_flags & PG_BUSY) 2954 (*pr)(" owning process = %d, tag=%s", 2955 pg->owner, pg->owner_tag); 2956 else 2957 (*pr)(" page not busy, no owner"); 2958 #else 2959 (*pr)(" [page ownership tracking disabled]"); 2960 #endif 2961 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 2962 2963 if (!full) 2964 return; 2965 2966 /* cross-verify object/anon */ 2967 if ((pg->pg_flags & PQ_FREE) == 0) { 2968 if (pg->pg_flags & PQ_ANON) { 2969 if (pg->uanon == NULL || pg->uanon->an_page != pg) 2970 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 2971 (pg->uanon) ? pg->uanon->an_page : NULL); 2972 else 2973 (*pr)(" anon backpointer is OK\n"); 2974 } else { 2975 uobj = pg->uobject; 2976 if (uobj) { 2977 (*pr)(" checking object list\n"); 2978 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) { 2979 if (tpg == pg) { 2980 break; 2981 } 2982 } 2983 if (tpg) 2984 (*pr)(" page found on object list\n"); 2985 else 2986 (*pr)(" >>> PAGE NOT FOUND " 2987 "ON OBJECT LIST! <<<\n"); 2988 } 2989 } 2990 } 2991 2992 /* cross-verify page queue */ 2993 if (pg->pg_flags & PQ_FREE) { 2994 if (uvm_pmr_isfree(pg)) 2995 (*pr)(" page found in uvm_pmemrange\n"); 2996 else 2997 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 2998 pgl = NULL; 2999 } else if (pg->pg_flags & PQ_INACTIVE) { 3000 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 3001 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 3002 } else if (pg->pg_flags & PQ_ACTIVE) { 3003 pgl = &uvm.page_active; 3004 } else { 3005 pgl = NULL; 3006 } 3007 3008 if (pgl) { 3009 (*pr)(" checking pageq list\n"); 3010 TAILQ_FOREACH(tpg, pgl, pageq) { 3011 if (tpg == pg) { 3012 break; 3013 } 3014 } 3015 if (tpg) 3016 (*pr)(" page found on pageq list\n"); 3017 else 3018 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3019 } 3020 } 3021 #endif 3022 3023 /* 3024 * uvm_map_protect: change map protection 3025 * 3026 * => set_max means set max_protection. 3027 * => map must be unlocked. 3028 */ 3029 int 3030 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3031 vm_prot_t new_prot, boolean_t set_max) 3032 { 3033 struct vm_map_entry *first, *iter; 3034 vm_prot_t old_prot; 3035 vm_prot_t mask; 3036 int error; 3037 3038 if (start > end) 3039 return EINVAL; 3040 start = MAX(start, map->min_offset); 3041 end = MIN(end, map->max_offset); 3042 if (start >= end) 3043 return 0; 3044 3045 error = 0; 3046 vm_map_lock(map); 3047 3048 /* 3049 * Set up first and last. 3050 * - first will contain first entry at or after start. 3051 */ 3052 first = uvm_map_entrybyaddr(&map->addr, start); 3053 KDASSERT(first != NULL); 3054 if (first->end < start) 3055 first = RBT_NEXT(uvm_map_addr, first); 3056 3057 /* First, check for protection violations. */ 3058 for (iter = first; iter != NULL && iter->start < end; 3059 iter = RBT_NEXT(uvm_map_addr, iter)) { 3060 /* Treat memory holes as free space. */ 3061 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3062 continue; 3063 3064 if (UVM_ET_ISSUBMAP(iter)) { 3065 error = EINVAL; 3066 goto out; 3067 } 3068 if ((new_prot & iter->max_protection) != new_prot) { 3069 error = EACCES; 3070 goto out; 3071 } 3072 if (map == kernel_map && 3073 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 3074 panic("uvm_map_protect: kernel map W^X violation requested"); 3075 } 3076 3077 /* Fix protections. */ 3078 for (iter = first; iter != NULL && iter->start < end; 3079 iter = RBT_NEXT(uvm_map_addr, iter)) { 3080 /* Treat memory holes as free space. */ 3081 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3082 continue; 3083 3084 old_prot = iter->protection; 3085 3086 /* 3087 * Skip adapting protection iff old and new protection 3088 * are equal. 3089 */ 3090 if (set_max) { 3091 if (old_prot == (new_prot & old_prot) && 3092 iter->max_protection == new_prot) 3093 continue; 3094 } else { 3095 if (old_prot == new_prot) 3096 continue; 3097 } 3098 3099 UVM_MAP_CLIP_START(map, iter, start); 3100 UVM_MAP_CLIP_END(map, iter, end); 3101 3102 if (set_max) { 3103 iter->max_protection = new_prot; 3104 iter->protection &= new_prot; 3105 } else 3106 iter->protection = new_prot; 3107 3108 /* 3109 * update physical map if necessary. worry about copy-on-write 3110 * here -- CHECK THIS XXX 3111 */ 3112 if (iter->protection != old_prot) { 3113 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3114 ~PROT_WRITE : PROT_MASK; 3115 3116 /* update pmap */ 3117 if ((iter->protection & mask) == PROT_NONE && 3118 VM_MAPENT_ISWIRED(iter)) { 3119 /* 3120 * TODO(ariane) this is stupid. wired_count 3121 * is 0 if not wired, otherwise anything 3122 * larger than 0 (incremented once each time 3123 * wire is called). 3124 * Mostly to be able to undo the damage on 3125 * failure. Not the actually be a wired 3126 * refcounter... 3127 * Originally: iter->wired_count--; 3128 * (don't we have to unwire this in the pmap 3129 * as well?) 3130 */ 3131 iter->wired_count = 0; 3132 } 3133 pmap_protect(map->pmap, iter->start, iter->end, 3134 iter->protection & mask); 3135 } 3136 3137 /* 3138 * If the map is configured to lock any future mappings, 3139 * wire this entry now if the old protection was PROT_NONE 3140 * and the new protection is not PROT_NONE. 3141 */ 3142 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3143 VM_MAPENT_ISWIRED(iter) == 0 && 3144 old_prot == PROT_NONE && 3145 new_prot != PROT_NONE) { 3146 if (uvm_map_pageable(map, iter->start, iter->end, 3147 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3148 /* 3149 * If locking the entry fails, remember the 3150 * error if it's the first one. Note we 3151 * still continue setting the protection in 3152 * the map, but it will return the resource 3153 * storage condition regardless. 3154 * 3155 * XXX Ignore what the actual error is, 3156 * XXX just call it a resource shortage 3157 * XXX so that it doesn't get confused 3158 * XXX what uvm_map_protect() itself would 3159 * XXX normally return. 3160 */ 3161 error = ENOMEM; 3162 } 3163 } 3164 } 3165 pmap_update(map->pmap); 3166 3167 out: 3168 vm_map_unlock(map); 3169 return error; 3170 } 3171 3172 /* 3173 * uvmspace_alloc: allocate a vmspace structure. 3174 * 3175 * - structure includes vm_map and pmap 3176 * - XXX: no locking on this structure 3177 * - refcnt set to 1, rest must be init'd by caller 3178 */ 3179 struct vmspace * 3180 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3181 boolean_t remove_holes) 3182 { 3183 struct vmspace *vm; 3184 3185 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3186 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3187 return (vm); 3188 } 3189 3190 /* 3191 * uvmspace_init: initialize a vmspace structure. 3192 * 3193 * - XXX: no locking on this structure 3194 * - refcnt set to 1, rest must be init'd by caller 3195 */ 3196 void 3197 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3198 boolean_t pageable, boolean_t remove_holes) 3199 { 3200 KASSERT(pmap == NULL || pmap == pmap_kernel()); 3201 3202 if (pmap) 3203 pmap_reference(pmap); 3204 else 3205 pmap = pmap_create(); 3206 vm->vm_map.pmap = pmap; 3207 3208 uvm_map_setup(&vm->vm_map, min, max, 3209 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3210 3211 vm->vm_refcnt = 1; 3212 3213 if (remove_holes) 3214 pmap_remove_holes(vm); 3215 } 3216 3217 /* 3218 * uvmspace_share: share a vmspace between two processes 3219 * 3220 * - XXX: no locking on vmspace 3221 * - used for vfork 3222 */ 3223 3224 struct vmspace * 3225 uvmspace_share(struct process *pr) 3226 { 3227 struct vmspace *vm = pr->ps_vmspace; 3228 3229 vm->vm_refcnt++; 3230 return vm; 3231 } 3232 3233 /* 3234 * uvmspace_exec: the process wants to exec a new program 3235 * 3236 * - XXX: no locking on vmspace 3237 */ 3238 3239 void 3240 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3241 { 3242 struct process *pr = p->p_p; 3243 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3244 struct vm_map *map = &ovm->vm_map; 3245 struct uvm_map_deadq dead_entries; 3246 3247 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3248 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3249 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3250 3251 pmap_unuse_final(p); /* before stack addresses go away */ 3252 TAILQ_INIT(&dead_entries); 3253 3254 /* see if more than one process is using this vmspace... */ 3255 if (ovm->vm_refcnt == 1) { 3256 /* 3257 * If pr is the only process using its vmspace then 3258 * we can safely recycle that vmspace for the program 3259 * that is being exec'd. 3260 */ 3261 3262 #ifdef SYSVSHM 3263 /* 3264 * SYSV SHM semantics require us to kill all segments on an exec 3265 */ 3266 if (ovm->vm_shm) 3267 shmexit(ovm); 3268 #endif 3269 3270 /* 3271 * POSIX 1003.1b -- "lock future mappings" is revoked 3272 * when a process execs another program image. 3273 */ 3274 vm_map_lock(map); 3275 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3276 3277 /* 3278 * now unmap the old program 3279 * 3280 * Instead of attempting to keep the map valid, we simply 3281 * nuke all entries and ask uvm_map_setup to reinitialize 3282 * the map to the new boundaries. 3283 * 3284 * uvm_unmap_remove will actually nuke all entries for us 3285 * (as in, not replace them with free-memory entries). 3286 */ 3287 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3288 &dead_entries, TRUE, FALSE); 3289 3290 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 3291 3292 /* Nuke statistics and boundaries. */ 3293 memset(&ovm->vm_startcopy, 0, 3294 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3295 3296 3297 if (end & (vaddr_t)PAGE_MASK) { 3298 end += 1; 3299 if (end == 0) /* overflow */ 3300 end -= PAGE_SIZE; 3301 } 3302 3303 /* Setup new boundaries and populate map with entries. */ 3304 map->min_offset = start; 3305 map->max_offset = end; 3306 uvm_map_setup_entries(map); 3307 vm_map_unlock(map); 3308 3309 /* but keep MMU holes unavailable */ 3310 pmap_remove_holes(ovm); 3311 } else { 3312 /* 3313 * pr's vmspace is being shared, so we can't reuse 3314 * it for pr since it is still being used for others. 3315 * allocate a new vmspace for pr 3316 */ 3317 nvm = uvmspace_alloc(start, end, 3318 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3319 3320 /* install new vmspace and drop our ref to the old one. */ 3321 pmap_deactivate(p); 3322 p->p_vmspace = pr->ps_vmspace = nvm; 3323 pmap_activate(p); 3324 3325 uvmspace_free(ovm); 3326 } 3327 3328 /* Release dead entries */ 3329 uvm_unmap_detach(&dead_entries, 0); 3330 } 3331 3332 /* 3333 * uvmspace_free: free a vmspace data structure 3334 * 3335 * - XXX: no locking on vmspace 3336 */ 3337 void 3338 uvmspace_free(struct vmspace *vm) 3339 { 3340 if (--vm->vm_refcnt == 0) { 3341 /* 3342 * lock the map, to wait out all other references to it. delete 3343 * all of the mappings and pages they hold, then call the pmap 3344 * module to reclaim anything left. 3345 */ 3346 #ifdef SYSVSHM 3347 /* Get rid of any SYSV shared memory segments. */ 3348 if (vm->vm_shm != NULL) 3349 shmexit(vm); 3350 #endif 3351 3352 uvm_map_teardown(&vm->vm_map); 3353 pool_put(&uvm_vmspace_pool, vm); 3354 } 3355 } 3356 3357 /* 3358 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in 3359 * srcmap to the address range [dstaddr, dstaddr + sz) in 3360 * dstmap. 3361 * 3362 * The whole address range in srcmap must be backed by an object 3363 * (no holes). 3364 * 3365 * If successful, the address ranges share memory and the destination 3366 * address range uses the protection flags in prot. 3367 * 3368 * This routine assumes that sz is a multiple of PAGE_SIZE and 3369 * that dstaddr and srcaddr are page-aligned. 3370 */ 3371 int 3372 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot, 3373 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz) 3374 { 3375 int ret = 0; 3376 vaddr_t unmap_end; 3377 vaddr_t dstva; 3378 vsize_t off, len, n = sz; 3379 struct vm_map_entry *first = NULL, *last = NULL; 3380 struct vm_map_entry *src_entry, *psrc_entry = NULL; 3381 struct uvm_map_deadq dead; 3382 3383 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr) 3384 return EINVAL; 3385 3386 TAILQ_INIT(&dead); 3387 vm_map_lock(dstmap); 3388 vm_map_lock_read(srcmap); 3389 3390 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) { 3391 ret = ENOMEM; 3392 goto exit_unlock; 3393 } 3394 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) { 3395 ret = EINVAL; 3396 goto exit_unlock; 3397 } 3398 3399 unmap_end = dstaddr; 3400 for (; src_entry != NULL; 3401 psrc_entry = src_entry, 3402 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) { 3403 /* hole in address space, bail out */ 3404 if (psrc_entry != NULL && psrc_entry->end != src_entry->start) 3405 break; 3406 if (src_entry->start >= srcaddr + sz) 3407 break; 3408 3409 if (UVM_ET_ISSUBMAP(src_entry)) 3410 panic("uvm_share: encountered a submap (illegal)"); 3411 if (!UVM_ET_ISCOPYONWRITE(src_entry) && 3412 UVM_ET_ISNEEDSCOPY(src_entry)) 3413 panic("uvm_share: non-copy_on_write map entries " 3414 "marked needs_copy (illegal)"); 3415 3416 dstva = dstaddr; 3417 if (src_entry->start > srcaddr) { 3418 dstva += src_entry->start - srcaddr; 3419 off = 0; 3420 } else 3421 off = srcaddr - src_entry->start; 3422 3423 if (n < src_entry->end - src_entry->start) 3424 len = n; 3425 else 3426 len = src_entry->end - src_entry->start; 3427 n -= len; 3428 3429 if (uvm_mapent_share(dstmap, dstva, len, off, prot, prot, 3430 srcmap, src_entry, &dead) == NULL) 3431 break; 3432 3433 unmap_end = dstva + len; 3434 if (n == 0) 3435 goto exit_unlock; 3436 } 3437 3438 ret = EINVAL; 3439 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE); 3440 3441 exit_unlock: 3442 vm_map_unlock_read(srcmap); 3443 vm_map_unlock(dstmap); 3444 uvm_unmap_detach(&dead, 0); 3445 3446 return ret; 3447 } 3448 3449 /* 3450 * Clone map entry into other map. 3451 * 3452 * Mapping will be placed at dstaddr, for the same length. 3453 * Space must be available. 3454 * Reference counters are incremented. 3455 */ 3456 struct vm_map_entry * 3457 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3458 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, 3459 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3460 int mapent_flags, int amap_share_flags) 3461 { 3462 struct vm_map_entry *new_entry, *first, *last; 3463 3464 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3465 3466 /* Create new entry (linked in on creation). Fill in first, last. */ 3467 first = last = NULL; 3468 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3469 panic("uvmspace_fork: no space in map for " 3470 "entry in empty map"); 3471 } 3472 new_entry = uvm_map_mkentry(dstmap, first, last, 3473 dstaddr, dstlen, mapent_flags, dead, NULL); 3474 if (new_entry == NULL) 3475 return NULL; 3476 /* old_entry -> new_entry */ 3477 new_entry->object = old_entry->object; 3478 new_entry->offset = old_entry->offset; 3479 new_entry->aref = old_entry->aref; 3480 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3481 new_entry->protection = prot; 3482 new_entry->max_protection = maxprot; 3483 new_entry->inheritance = old_entry->inheritance; 3484 new_entry->advice = old_entry->advice; 3485 3486 /* gain reference to object backing the map (can't be a submap). */ 3487 if (new_entry->aref.ar_amap) { 3488 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3489 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3490 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3491 amap_share_flags); 3492 } 3493 3494 if (UVM_ET_ISOBJ(new_entry) && 3495 new_entry->object.uvm_obj->pgops->pgo_reference) { 3496 new_entry->offset += off; 3497 new_entry->object.uvm_obj->pgops->pgo_reference 3498 (new_entry->object.uvm_obj); 3499 } 3500 3501 return new_entry; 3502 } 3503 3504 struct vm_map_entry * 3505 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3506 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map, 3507 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3508 { 3509 /* 3510 * If old_entry refers to a copy-on-write region that has not yet been 3511 * written to (needs_copy flag is set), then we need to allocate a new 3512 * amap for old_entry. 3513 * 3514 * If we do not do this, and the process owning old_entry does a copy-on 3515 * write later, old_entry and new_entry will refer to different memory 3516 * regions, and the memory between the processes is no longer shared. 3517 * 3518 * [in other words, we need to clear needs_copy] 3519 */ 3520 3521 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3522 /* get our own amap, clears needs_copy */ 3523 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 3524 0, 0); 3525 /* XXXCDC: WAITOK??? */ 3526 } 3527 3528 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off, 3529 prot, maxprot, old_entry, dead, 0, AMAP_SHARED); 3530 } 3531 3532 /* 3533 * share the mapping: this means we want the old and 3534 * new entries to share amaps and backing objects. 3535 */ 3536 struct vm_map_entry * 3537 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3538 struct vm_map *old_map, 3539 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3540 { 3541 struct vm_map_entry *new_entry; 3542 3543 new_entry = uvm_mapent_share(new_map, old_entry->start, 3544 old_entry->end - old_entry->start, 0, old_entry->protection, 3545 old_entry->max_protection, old_map, old_entry, dead); 3546 3547 /* 3548 * pmap_copy the mappings: this routine is optional 3549 * but if it is there it will reduce the number of 3550 * page faults in the new proc. 3551 */ 3552 if (!UVM_ET_ISHOLE(new_entry)) 3553 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, 3554 (new_entry->end - new_entry->start), new_entry->start); 3555 3556 return (new_entry); 3557 } 3558 3559 /* 3560 * copy-on-write the mapping (using mmap's 3561 * MAP_PRIVATE semantics) 3562 * 3563 * allocate new_entry, adjust reference counts. 3564 * (note that new references are read-only). 3565 */ 3566 struct vm_map_entry * 3567 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3568 struct vm_map *old_map, 3569 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3570 { 3571 struct vm_map_entry *new_entry; 3572 boolean_t protect_child; 3573 3574 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3575 old_entry->end - old_entry->start, 0, old_entry->protection, 3576 old_entry->max_protection, old_entry, dead, 0, 0); 3577 3578 new_entry->etype |= 3579 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3580 3581 /* 3582 * the new entry will need an amap. it will either 3583 * need to be copied from the old entry or created 3584 * from scratch (if the old entry does not have an 3585 * amap). can we defer this process until later 3586 * (by setting "needs_copy") or do we need to copy 3587 * the amap now? 3588 * 3589 * we must copy the amap now if any of the following 3590 * conditions hold: 3591 * 1. the old entry has an amap and that amap is 3592 * being shared. this means that the old (parent) 3593 * process is sharing the amap with another 3594 * process. if we do not clear needs_copy here 3595 * we will end up in a situation where both the 3596 * parent and child process are referring to the 3597 * same amap with "needs_copy" set. if the 3598 * parent write-faults, the fault routine will 3599 * clear "needs_copy" in the parent by allocating 3600 * a new amap. this is wrong because the 3601 * parent is supposed to be sharing the old amap 3602 * and the new amap will break that. 3603 * 3604 * 2. if the old entry has an amap and a non-zero 3605 * wire count then we are going to have to call 3606 * amap_cow_now to avoid page faults in the 3607 * parent process. since amap_cow_now requires 3608 * "needs_copy" to be clear we might as well 3609 * clear it here as well. 3610 * 3611 */ 3612 if (old_entry->aref.ar_amap != NULL && 3613 ((amap_flags(old_entry->aref.ar_amap) & 3614 AMAP_SHARED) != 0 || 3615 VM_MAPENT_ISWIRED(old_entry))) { 3616 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3617 0, 0); 3618 /* XXXCDC: M_WAITOK ... ok? */ 3619 } 3620 3621 /* 3622 * if the parent's entry is wired down, then the 3623 * parent process does not want page faults on 3624 * access to that memory. this means that we 3625 * cannot do copy-on-write because we can't write 3626 * protect the old entry. in this case we 3627 * resolve all copy-on-write faults now, using 3628 * amap_cow_now. note that we have already 3629 * allocated any needed amap (above). 3630 */ 3631 if (VM_MAPENT_ISWIRED(old_entry)) { 3632 /* 3633 * resolve all copy-on-write faults now 3634 * (note that there is nothing to do if 3635 * the old mapping does not have an amap). 3636 * XXX: is it worthwhile to bother with 3637 * pmap_copy in this case? 3638 */ 3639 if (old_entry->aref.ar_amap) 3640 amap_cow_now(new_map, new_entry); 3641 } else { 3642 if (old_entry->aref.ar_amap) { 3643 /* 3644 * setup mappings to trigger copy-on-write faults 3645 * we must write-protect the parent if it has 3646 * an amap and it is not already "needs_copy"... 3647 * if it is already "needs_copy" then the parent 3648 * has already been write-protected by a previous 3649 * fork operation. 3650 * 3651 * if we do not write-protect the parent, then 3652 * we must be sure to write-protect the child 3653 * after the pmap_copy() operation. 3654 * 3655 * XXX: pmap_copy should have some way of telling 3656 * us that it didn't do anything so we can avoid 3657 * calling pmap_protect needlessly. 3658 */ 3659 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3660 if (old_entry->max_protection & PROT_WRITE) { 3661 pmap_protect(old_map->pmap, 3662 old_entry->start, 3663 old_entry->end, 3664 old_entry->protection & 3665 ~PROT_WRITE); 3666 pmap_update(old_map->pmap); 3667 } 3668 old_entry->etype |= UVM_ET_NEEDSCOPY; 3669 } 3670 3671 /* parent must now be write-protected */ 3672 protect_child = FALSE; 3673 } else { 3674 /* 3675 * we only need to protect the child if the 3676 * parent has write access. 3677 */ 3678 if (old_entry->max_protection & PROT_WRITE) 3679 protect_child = TRUE; 3680 else 3681 protect_child = FALSE; 3682 } 3683 /* 3684 * copy the mappings 3685 * XXX: need a way to tell if this does anything 3686 */ 3687 if (!UVM_ET_ISHOLE(new_entry)) 3688 pmap_copy(new_map->pmap, old_map->pmap, 3689 new_entry->start, 3690 (old_entry->end - old_entry->start), 3691 old_entry->start); 3692 3693 /* protect the child's mappings if necessary */ 3694 if (protect_child) { 3695 pmap_protect(new_map->pmap, new_entry->start, 3696 new_entry->end, 3697 new_entry->protection & 3698 ~PROT_WRITE); 3699 } 3700 } 3701 3702 return (new_entry); 3703 } 3704 3705 /* 3706 * zero the mapping: the new entry will be zero initialized 3707 */ 3708 struct vm_map_entry * 3709 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 3710 struct vm_map *old_map, 3711 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3712 { 3713 struct vm_map_entry *new_entry; 3714 3715 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3716 old_entry->end - old_entry->start, 0, old_entry->protection, 3717 old_entry->max_protection, old_entry, dead, 0, 0); 3718 3719 new_entry->etype |= 3720 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3721 3722 if (new_entry->aref.ar_amap) { 3723 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3724 atop(new_entry->end - new_entry->start), 0); 3725 new_entry->aref.ar_amap = NULL; 3726 new_entry->aref.ar_pageoff = 0; 3727 } 3728 3729 if (UVM_ET_ISOBJ(new_entry)) { 3730 if (new_entry->object.uvm_obj->pgops->pgo_detach) 3731 new_entry->object.uvm_obj->pgops->pgo_detach( 3732 new_entry->object.uvm_obj); 3733 new_entry->object.uvm_obj = NULL; 3734 new_entry->etype &= ~UVM_ET_OBJ; 3735 } 3736 3737 return (new_entry); 3738 } 3739 3740 /* 3741 * uvmspace_fork: fork a process' main map 3742 * 3743 * => create a new vmspace for child process from parent. 3744 * => parent's map must not be locked. 3745 */ 3746 struct vmspace * 3747 uvmspace_fork(struct process *pr) 3748 { 3749 struct vmspace *vm1 = pr->ps_vmspace; 3750 struct vmspace *vm2; 3751 struct vm_map *old_map = &vm1->vm_map; 3752 struct vm_map *new_map; 3753 struct vm_map_entry *old_entry, *new_entry; 3754 struct uvm_map_deadq dead; 3755 3756 vm_map_lock(old_map); 3757 3758 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3759 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3760 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3761 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3762 vm2->vm_dused = 0; /* Statistic managed by us. */ 3763 new_map = &vm2->vm_map; 3764 vm_map_lock(new_map); 3765 3766 /* go entry-by-entry */ 3767 TAILQ_INIT(&dead); 3768 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 3769 if (old_entry->start == old_entry->end) 3770 continue; 3771 3772 /* first, some sanity checks on the old entry */ 3773 if (UVM_ET_ISSUBMAP(old_entry)) { 3774 panic("fork: encountered a submap during fork " 3775 "(illegal)"); 3776 } 3777 3778 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 3779 UVM_ET_ISNEEDSCOPY(old_entry)) { 3780 panic("fork: non-copy_on_write map entry marked " 3781 "needs_copy (illegal)"); 3782 } 3783 3784 /* Apply inheritance. */ 3785 switch (old_entry->inheritance) { 3786 case MAP_INHERIT_SHARE: 3787 new_entry = uvm_mapent_forkshared(vm2, new_map, 3788 old_map, old_entry, &dead); 3789 break; 3790 case MAP_INHERIT_COPY: 3791 new_entry = uvm_mapent_forkcopy(vm2, new_map, 3792 old_map, old_entry, &dead); 3793 break; 3794 case MAP_INHERIT_ZERO: 3795 new_entry = uvm_mapent_forkzero(vm2, new_map, 3796 old_map, old_entry, &dead); 3797 break; 3798 default: 3799 continue; 3800 } 3801 3802 /* Update process statistics. */ 3803 if (!UVM_ET_ISHOLE(new_entry)) 3804 new_map->size += new_entry->end - new_entry->start; 3805 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { 3806 vm2->vm_dused += uvmspace_dused( 3807 new_map, new_entry->start, new_entry->end); 3808 } 3809 } 3810 3811 vm_map_unlock(old_map); 3812 vm_map_unlock(new_map); 3813 3814 /* 3815 * This can actually happen, if multiple entries described a 3816 * space in which an entry was inherited. 3817 */ 3818 uvm_unmap_detach(&dead, 0); 3819 3820 #ifdef SYSVSHM 3821 if (vm1->vm_shm) 3822 shmfork(vm1, vm2); 3823 #endif 3824 3825 return vm2; 3826 } 3827 3828 /* 3829 * uvm_map_hint: return the beginning of the best area suitable for 3830 * creating a new mapping with "prot" protection. 3831 */ 3832 vaddr_t 3833 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr, 3834 vaddr_t maxaddr) 3835 { 3836 vaddr_t addr; 3837 vaddr_t spacing; 3838 3839 #ifdef __i386__ 3840 /* 3841 * If executable skip first two pages, otherwise start 3842 * after data + heap region. 3843 */ 3844 if ((prot & PROT_EXEC) != 0 && 3845 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 3846 addr = (PAGE_SIZE*2) + 3847 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 3848 return (round_page(addr)); 3849 } 3850 #endif 3851 3852 #if defined (__LP64__) 3853 spacing = (MIN((4UL * 1024 * 1024 * 1024), BRKSIZ) - 1); 3854 #else 3855 spacing = (MIN((256 * 1024 * 1024), BRKSIZ) - 1); 3856 #endif 3857 3858 addr = (vaddr_t)vm->vm_daddr; 3859 /* 3860 * Start malloc/mmap after the brk. 3861 * If the random spacing area has been used up, 3862 * the brk area becomes fair game for mmap as well. 3863 */ 3864 if (vm->vm_dused < spacing >> PAGE_SHIFT) 3865 addr += BRKSIZ; 3866 if (addr < maxaddr) { 3867 while (spacing > maxaddr - addr) 3868 spacing >>= 1; 3869 } 3870 addr += arc4random() & spacing; 3871 return (round_page(addr)); 3872 } 3873 3874 /* 3875 * uvm_map_submap: punch down part of a map into a submap 3876 * 3877 * => only the kernel_map is allowed to be submapped 3878 * => the purpose of submapping is to break up the locking granularity 3879 * of a larger map 3880 * => the range specified must have been mapped previously with a uvm_map() 3881 * call [with uobj==NULL] to create a blank map entry in the main map. 3882 * [And it had better still be blank!] 3883 * => maps which contain submaps should never be copied or forked. 3884 * => to remove a submap, use uvm_unmap() on the main map 3885 * and then uvm_map_deallocate() the submap. 3886 * => main map must be unlocked. 3887 * => submap must have been init'd and have a zero reference count. 3888 * [need not be locked as we don't actually reference it] 3889 */ 3890 int 3891 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 3892 struct vm_map *submap) 3893 { 3894 struct vm_map_entry *entry; 3895 int result; 3896 3897 if (start > map->max_offset || end > map->max_offset || 3898 start < map->min_offset || end < map->min_offset) 3899 return EINVAL; 3900 3901 vm_map_lock(map); 3902 3903 if (uvm_map_lookup_entry(map, start, &entry)) { 3904 UVM_MAP_CLIP_START(map, entry, start); 3905 UVM_MAP_CLIP_END(map, entry, end); 3906 } else 3907 entry = NULL; 3908 3909 if (entry != NULL && 3910 entry->start == start && entry->end == end && 3911 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 3912 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 3913 entry->etype |= UVM_ET_SUBMAP; 3914 entry->object.sub_map = submap; 3915 entry->offset = 0; 3916 uvm_map_reference(submap); 3917 result = 0; 3918 } else 3919 result = EINVAL; 3920 3921 vm_map_unlock(map); 3922 return(result); 3923 } 3924 3925 /* 3926 * uvm_map_checkprot: check protection in map 3927 * 3928 * => must allow specific protection in a fully allocated region. 3929 * => map mut be read or write locked by caller. 3930 */ 3931 boolean_t 3932 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3933 vm_prot_t protection) 3934 { 3935 struct vm_map_entry *entry; 3936 3937 if (start < map->min_offset || end > map->max_offset || start > end) 3938 return FALSE; 3939 if (start == end) 3940 return TRUE; 3941 3942 /* 3943 * Iterate entries. 3944 */ 3945 for (entry = uvm_map_entrybyaddr(&map->addr, start); 3946 entry != NULL && entry->start < end; 3947 entry = RBT_NEXT(uvm_map_addr, entry)) { 3948 /* Fail if a hole is found. */ 3949 if (UVM_ET_ISHOLE(entry) || 3950 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 3951 return FALSE; 3952 3953 /* Check protection. */ 3954 if ((entry->protection & protection) != protection) 3955 return FALSE; 3956 } 3957 return TRUE; 3958 } 3959 3960 /* 3961 * uvm_map_create: create map 3962 */ 3963 vm_map_t 3964 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 3965 { 3966 vm_map_t map; 3967 3968 map = malloc(sizeof *map, M_VMMAP, M_WAITOK); 3969 map->pmap = pmap; 3970 uvm_map_setup(map, min, max, flags); 3971 return (map); 3972 } 3973 3974 /* 3975 * uvm_map_deallocate: drop reference to a map 3976 * 3977 * => caller must not lock map 3978 * => we will zap map if ref count goes to zero 3979 */ 3980 void 3981 uvm_map_deallocate(vm_map_t map) 3982 { 3983 int c; 3984 struct uvm_map_deadq dead; 3985 3986 c = --map->ref_count; 3987 if (c > 0) { 3988 return; 3989 } 3990 3991 /* 3992 * all references gone. unmap and free. 3993 * 3994 * No lock required: we are only one to access this map. 3995 */ 3996 TAILQ_INIT(&dead); 3997 uvm_tree_sanity(map, __FILE__, __LINE__); 3998 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 3999 TRUE, FALSE); 4000 pmap_destroy(map->pmap); 4001 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 4002 free(map, M_VMMAP, sizeof *map); 4003 4004 uvm_unmap_detach(&dead, 0); 4005 } 4006 4007 /* 4008 * uvm_map_inherit: set inheritance code for range of addrs in map. 4009 * 4010 * => map must be unlocked 4011 * => note that the inherit code is used during a "fork". see fork 4012 * code for details. 4013 */ 4014 int 4015 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 4016 vm_inherit_t new_inheritance) 4017 { 4018 struct vm_map_entry *entry; 4019 4020 switch (new_inheritance) { 4021 case MAP_INHERIT_NONE: 4022 case MAP_INHERIT_COPY: 4023 case MAP_INHERIT_SHARE: 4024 case MAP_INHERIT_ZERO: 4025 break; 4026 default: 4027 return (EINVAL); 4028 } 4029 4030 if (start > end) 4031 return EINVAL; 4032 start = MAX(start, map->min_offset); 4033 end = MIN(end, map->max_offset); 4034 if (start >= end) 4035 return 0; 4036 4037 vm_map_lock(map); 4038 4039 entry = uvm_map_entrybyaddr(&map->addr, start); 4040 if (entry->end > start) 4041 UVM_MAP_CLIP_START(map, entry, start); 4042 else 4043 entry = RBT_NEXT(uvm_map_addr, entry); 4044 4045 while (entry != NULL && entry->start < end) { 4046 UVM_MAP_CLIP_END(map, entry, end); 4047 entry->inheritance = new_inheritance; 4048 entry = RBT_NEXT(uvm_map_addr, entry); 4049 } 4050 4051 vm_map_unlock(map); 4052 return (0); 4053 } 4054 4055 /* 4056 * uvm_map_advice: set advice code for range of addrs in map. 4057 * 4058 * => map must be unlocked 4059 */ 4060 int 4061 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 4062 { 4063 struct vm_map_entry *entry; 4064 4065 switch (new_advice) { 4066 case MADV_NORMAL: 4067 case MADV_RANDOM: 4068 case MADV_SEQUENTIAL: 4069 break; 4070 default: 4071 return (EINVAL); 4072 } 4073 4074 if (start > end) 4075 return EINVAL; 4076 start = MAX(start, map->min_offset); 4077 end = MIN(end, map->max_offset); 4078 if (start >= end) 4079 return 0; 4080 4081 vm_map_lock(map); 4082 4083 entry = uvm_map_entrybyaddr(&map->addr, start); 4084 if (entry != NULL && entry->end > start) 4085 UVM_MAP_CLIP_START(map, entry, start); 4086 else if (entry!= NULL) 4087 entry = RBT_NEXT(uvm_map_addr, entry); 4088 4089 /* 4090 * XXXJRT: disallow holes? 4091 */ 4092 while (entry != NULL && entry->start < end) { 4093 UVM_MAP_CLIP_END(map, entry, end); 4094 entry->advice = new_advice; 4095 entry = RBT_NEXT(uvm_map_addr, entry); 4096 } 4097 4098 vm_map_unlock(map); 4099 return (0); 4100 } 4101 4102 /* 4103 * uvm_map_extract: extract a mapping from a map and put it somewhere 4104 * in the kernel_map, setting protection to max_prot. 4105 * 4106 * => map should be unlocked (we will write lock it and kernel_map) 4107 * => returns 0 on success, error code otherwise 4108 * => start must be page aligned 4109 * => len must be page sized 4110 * => flags: 4111 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 4112 * Mappings are QREF's. 4113 */ 4114 int 4115 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 4116 vaddr_t *dstaddrp, int flags) 4117 { 4118 struct uvm_map_deadq dead; 4119 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 4120 vaddr_t dstaddr; 4121 vaddr_t end; 4122 vaddr_t cp_start; 4123 vsize_t cp_len, cp_off; 4124 int error; 4125 4126 TAILQ_INIT(&dead); 4127 end = start + len; 4128 4129 /* 4130 * Sanity check on the parameters. 4131 * Also, since the mapping may not contain gaps, error out if the 4132 * mapped area is not in source map. 4133 */ 4134 if ((start & (vaddr_t)PAGE_MASK) != 0 || 4135 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 4136 return EINVAL; 4137 if (start < srcmap->min_offset || end > srcmap->max_offset) 4138 return EINVAL; 4139 4140 /* Initialize dead entries. Handle len == 0 case. */ 4141 if (len == 0) 4142 return 0; 4143 4144 /* Acquire lock on srcmap. */ 4145 vm_map_lock(srcmap); 4146 4147 /* Lock srcmap, lookup first and last entry in <start,len>. */ 4148 first = uvm_map_entrybyaddr(&srcmap->addr, start); 4149 4150 /* Check that the range is contiguous. */ 4151 for (entry = first; entry != NULL && entry->end < end; 4152 entry = RBT_NEXT(uvm_map_addr, entry)) { 4153 if (VMMAP_FREE_END(entry) != entry->end || 4154 UVM_ET_ISHOLE(entry)) { 4155 error = EINVAL; 4156 goto fail; 4157 } 4158 } 4159 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 4160 error = EINVAL; 4161 goto fail; 4162 } 4163 4164 /* 4165 * Handle need-copy flag. 4166 * This may invalidate last, hence the re-initialization during the 4167 * loop. 4168 * 4169 * Also, perform clipping of last if not UVM_EXTRACT_QREF. 4170 */ 4171 for (entry = first; entry != NULL && entry->start < end; 4172 entry = RBT_NEXT(uvm_map_addr, entry)) { 4173 if (UVM_ET_ISNEEDSCOPY(entry)) 4174 amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); 4175 if (UVM_ET_ISNEEDSCOPY(entry)) { 4176 /* 4177 * amap_copy failure 4178 */ 4179 error = ENOMEM; 4180 goto fail; 4181 } 4182 } 4183 4184 /* Lock destination map (kernel_map). */ 4185 vm_map_lock(kernel_map); 4186 4187 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 4188 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 4189 PROT_NONE, 0) != 0) { 4190 error = ENOMEM; 4191 goto fail2; 4192 } 4193 *dstaddrp = dstaddr; 4194 4195 /* 4196 * We now have srcmap and kernel_map locked. 4197 * dstaddr contains the destination offset in dstmap. 4198 */ 4199 /* step 1: start looping through map entries, performing extraction. */ 4200 for (entry = first; entry != NULL && entry->start < end; 4201 entry = RBT_NEXT(uvm_map_addr, entry)) { 4202 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 4203 if (UVM_ET_ISHOLE(entry)) 4204 continue; 4205 4206 /* Calculate uvm_mapent_clone parameters. */ 4207 cp_start = entry->start; 4208 if (cp_start < start) { 4209 cp_off = start - cp_start; 4210 cp_start = start; 4211 } else 4212 cp_off = 0; 4213 cp_len = MIN(entry->end, end) - cp_start; 4214 4215 newentry = uvm_mapent_clone(kernel_map, 4216 cp_start - start + dstaddr, cp_len, cp_off, 4217 entry->protection, entry->max_protection, 4218 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4219 if (newentry == NULL) { 4220 error = ENOMEM; 4221 goto fail2_unmap; 4222 } 4223 kernel_map->size += cp_len; 4224 if (flags & UVM_EXTRACT_FIXPROT) 4225 newentry->protection = newentry->max_protection; 4226 4227 /* 4228 * Step 2: perform pmap copy. 4229 * (Doing this in the loop saves one RB traversal.) 4230 */ 4231 pmap_copy(kernel_map->pmap, srcmap->pmap, 4232 cp_start - start + dstaddr, cp_len, cp_start); 4233 } 4234 pmap_update(kernel_map->pmap); 4235 4236 error = 0; 4237 4238 /* Unmap copied entries on failure. */ 4239 fail2_unmap: 4240 if (error) { 4241 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4242 FALSE, TRUE); 4243 } 4244 4245 /* Release maps, release dead entries. */ 4246 fail2: 4247 vm_map_unlock(kernel_map); 4248 4249 fail: 4250 vm_map_unlock(srcmap); 4251 4252 uvm_unmap_detach(&dead, 0); 4253 4254 return error; 4255 } 4256 4257 /* 4258 * uvm_map_clean: clean out a map range 4259 * 4260 * => valid flags: 4261 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4262 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4263 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4264 * if (flags & PGO_FREE): any cached pages are freed after clean 4265 * => returns an error if any part of the specified range isn't mapped 4266 * => never a need to flush amap layer since the anonymous memory has 4267 * no permanent home, but may deactivate pages there 4268 * => called from sys_msync() and sys_madvise() 4269 * => caller must not write-lock map (read OK). 4270 * => we may sleep while cleaning if SYNCIO [with map read-locked] 4271 */ 4272 4273 int 4274 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4275 { 4276 struct vm_map_entry *first, *entry; 4277 struct vm_amap *amap; 4278 struct vm_anon *anon; 4279 struct vm_page *pg; 4280 struct uvm_object *uobj; 4281 vaddr_t cp_start, cp_end; 4282 int refs; 4283 int error; 4284 boolean_t rv; 4285 4286 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4287 (PGO_FREE|PGO_DEACTIVATE)); 4288 4289 if (start > end || start < map->min_offset || end > map->max_offset) 4290 return EINVAL; 4291 4292 vm_map_lock_read(map); 4293 first = uvm_map_entrybyaddr(&map->addr, start); 4294 4295 /* Make a first pass to check for holes. */ 4296 for (entry = first; entry != NULL && entry->start < end; 4297 entry = RBT_NEXT(uvm_map_addr, entry)) { 4298 if (UVM_ET_ISSUBMAP(entry)) { 4299 vm_map_unlock_read(map); 4300 return EINVAL; 4301 } 4302 if (UVM_ET_ISSUBMAP(entry) || 4303 UVM_ET_ISHOLE(entry) || 4304 (entry->end < end && 4305 VMMAP_FREE_END(entry) != entry->end)) { 4306 vm_map_unlock_read(map); 4307 return EFAULT; 4308 } 4309 } 4310 4311 error = 0; 4312 for (entry = first; entry != NULL && entry->start < end; 4313 entry = RBT_NEXT(uvm_map_addr, entry)) { 4314 amap = entry->aref.ar_amap; /* top layer */ 4315 if (UVM_ET_ISOBJ(entry)) 4316 uobj = entry->object.uvm_obj; 4317 else 4318 uobj = NULL; 4319 4320 /* 4321 * No amap cleaning necessary if: 4322 * - there's no amap 4323 * - we're not deactivating or freeing pages. 4324 */ 4325 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4326 goto flush_object; 4327 4328 cp_start = MAX(entry->start, start); 4329 cp_end = MIN(entry->end, end); 4330 4331 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4332 anon = amap_lookup(&entry->aref, 4333 cp_start - entry->start); 4334 if (anon == NULL) 4335 continue; 4336 4337 pg = anon->an_page; 4338 if (pg == NULL) { 4339 continue; 4340 } 4341 KASSERT(pg->pg_flags & PQ_ANON); 4342 4343 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4344 /* 4345 * XXX In these first 3 cases, we always just 4346 * XXX deactivate the page. We may want to 4347 * XXX handle the different cases more 4348 * XXX specifically, in the future. 4349 */ 4350 case PGO_CLEANIT|PGO_FREE: 4351 case PGO_CLEANIT|PGO_DEACTIVATE: 4352 case PGO_DEACTIVATE: 4353 deactivate_it: 4354 /* skip the page if it's wired */ 4355 if (pg->wire_count != 0) 4356 break; 4357 4358 uvm_lock_pageq(); 4359 4360 KASSERT(pg->uanon == anon); 4361 4362 /* zap all mappings for the page. */ 4363 pmap_page_protect(pg, PROT_NONE); 4364 4365 /* ...and deactivate the page. */ 4366 uvm_pagedeactivate(pg); 4367 4368 uvm_unlock_pageq(); 4369 break; 4370 case PGO_FREE: 4371 /* 4372 * If there are multiple references to 4373 * the amap, just deactivate the page. 4374 */ 4375 if (amap_refs(amap) > 1) 4376 goto deactivate_it; 4377 4378 /* XXX skip the page if it's wired */ 4379 if (pg->wire_count != 0) { 4380 break; 4381 } 4382 amap_unadd(&entry->aref, 4383 cp_start - entry->start); 4384 refs = --anon->an_ref; 4385 if (refs == 0) 4386 uvm_anfree(anon); 4387 break; 4388 default: 4389 panic("uvm_map_clean: weird flags"); 4390 } 4391 } 4392 4393 flush_object: 4394 cp_start = MAX(entry->start, start); 4395 cp_end = MIN(entry->end, end); 4396 4397 /* 4398 * flush pages if we've got a valid backing object. 4399 * 4400 * Don't PGO_FREE if we don't have write permission 4401 * and don't flush if this is a copy-on-write object 4402 * since we can't know our permissions on it. 4403 */ 4404 if (uobj != NULL && 4405 ((flags & PGO_FREE) == 0 || 4406 ((entry->max_protection & PROT_WRITE) != 0 && 4407 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4408 rv = uobj->pgops->pgo_flush(uobj, 4409 cp_start - entry->start + entry->offset, 4410 cp_end - entry->start + entry->offset, flags); 4411 4412 if (rv == FALSE) 4413 error = EFAULT; 4414 } 4415 } 4416 4417 vm_map_unlock_read(map); 4418 return error; 4419 } 4420 4421 /* 4422 * UVM_MAP_CLIP_END implementation 4423 */ 4424 void 4425 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4426 { 4427 struct vm_map_entry *tmp; 4428 4429 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4430 tmp = uvm_mapent_alloc(map, 0); 4431 4432 /* Invoke splitentry. */ 4433 uvm_map_splitentry(map, entry, tmp, addr); 4434 } 4435 4436 /* 4437 * UVM_MAP_CLIP_START implementation 4438 * 4439 * Clippers are required to not change the pointers to the entry they are 4440 * clipping on. 4441 * Since uvm_map_splitentry turns the original entry into the lowest 4442 * entry (address wise) we do a swap between the new entry and the original 4443 * entry, prior to calling uvm_map_splitentry. 4444 */ 4445 void 4446 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4447 { 4448 struct vm_map_entry *tmp; 4449 struct uvm_addr_state *free; 4450 4451 /* Unlink original. */ 4452 free = uvm_map_uaddr_e(map, entry); 4453 uvm_mapent_free_remove(map, free, entry); 4454 uvm_mapent_addr_remove(map, entry); 4455 4456 /* Copy entry. */ 4457 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4458 tmp = uvm_mapent_alloc(map, 0); 4459 uvm_mapent_copy(entry, tmp); 4460 4461 /* Put new entry in place of original entry. */ 4462 uvm_mapent_addr_insert(map, tmp); 4463 uvm_mapent_free_insert(map, free, tmp); 4464 4465 /* Invoke splitentry. */ 4466 uvm_map_splitentry(map, tmp, entry, addr); 4467 } 4468 4469 /* 4470 * Boundary fixer. 4471 */ 4472 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4473 static __inline vaddr_t 4474 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4475 { 4476 return (min < bound && max > bound) ? bound : max; 4477 } 4478 4479 /* 4480 * Choose free list based on address at start of free space. 4481 * 4482 * The uvm_addr_state returned contains addr and is the first of: 4483 * - uaddr_exe 4484 * - uaddr_brk_stack 4485 * - uaddr_any 4486 */ 4487 struct uvm_addr_state* 4488 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4489 { 4490 struct uvm_addr_state *uaddr; 4491 int i; 4492 4493 /* Special case the first page, to prevent mmap from returning 0. */ 4494 if (addr < VMMAP_MIN_ADDR) 4495 return NULL; 4496 4497 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4498 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4499 if (addr >= uvm_maxkaddr) 4500 return NULL; 4501 } 4502 4503 /* Is the address inside the exe-only map? */ 4504 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4505 addr < map->uaddr_exe->uaddr_maxaddr) 4506 return map->uaddr_exe; 4507 4508 /* Check if the space falls inside brk/stack area. */ 4509 if ((addr >= map->b_start && addr < map->b_end) || 4510 (addr >= map->s_start && addr < map->s_end)) { 4511 if (map->uaddr_brk_stack != NULL && 4512 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4513 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4514 return map->uaddr_brk_stack; 4515 } else 4516 return NULL; 4517 } 4518 4519 /* 4520 * Check the other selectors. 4521 * 4522 * These selectors are only marked as the owner, if they have insert 4523 * functions. 4524 */ 4525 for (i = 0; i < nitems(map->uaddr_any); i++) { 4526 uaddr = map->uaddr_any[i]; 4527 if (uaddr == NULL) 4528 continue; 4529 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4530 continue; 4531 4532 if (addr >= uaddr->uaddr_minaddr && 4533 addr < uaddr->uaddr_maxaddr) 4534 return uaddr; 4535 } 4536 4537 return NULL; 4538 } 4539 4540 /* 4541 * Choose free list based on address at start of free space. 4542 * 4543 * The uvm_addr_state returned contains addr and is the first of: 4544 * - uaddr_exe 4545 * - uaddr_brk_stack 4546 * - uaddr_any 4547 */ 4548 struct uvm_addr_state* 4549 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4550 { 4551 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4552 } 4553 4554 /* 4555 * Returns the first free-memory boundary that is crossed by [min-max]. 4556 */ 4557 vsize_t 4558 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4559 { 4560 struct uvm_addr_state *uaddr; 4561 int i; 4562 4563 /* Never return first page. */ 4564 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4565 4566 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4567 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4568 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4569 4570 /* Check for exe-only boundaries. */ 4571 if (map->uaddr_exe != NULL) { 4572 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4573 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4574 } 4575 4576 /* Check for exe-only boundaries. */ 4577 if (map->uaddr_brk_stack != NULL) { 4578 max = uvm_map_boundfix(min, max, 4579 map->uaddr_brk_stack->uaddr_minaddr); 4580 max = uvm_map_boundfix(min, max, 4581 map->uaddr_brk_stack->uaddr_maxaddr); 4582 } 4583 4584 /* Check other boundaries. */ 4585 for (i = 0; i < nitems(map->uaddr_any); i++) { 4586 uaddr = map->uaddr_any[i]; 4587 if (uaddr != NULL) { 4588 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4589 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4590 } 4591 } 4592 4593 /* Boundaries at stack and brk() area. */ 4594 max = uvm_map_boundfix(min, max, map->s_start); 4595 max = uvm_map_boundfix(min, max, map->s_end); 4596 max = uvm_map_boundfix(min, max, map->b_start); 4597 max = uvm_map_boundfix(min, max, map->b_end); 4598 4599 return max; 4600 } 4601 4602 /* 4603 * Update map allocation start and end addresses from proc vmspace. 4604 */ 4605 void 4606 uvm_map_vmspace_update(struct vm_map *map, 4607 struct uvm_map_deadq *dead, int flags) 4608 { 4609 struct vmspace *vm; 4610 vaddr_t b_start, b_end, s_start, s_end; 4611 4612 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4613 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4614 4615 /* 4616 * Derive actual allocation boundaries from vmspace. 4617 */ 4618 vm = (struct vmspace *)map; 4619 b_start = (vaddr_t)vm->vm_daddr; 4620 b_end = b_start + BRKSIZ; 4621 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4622 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4623 #ifdef DIAGNOSTIC 4624 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4625 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4626 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4627 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4628 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4629 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4630 vm, b_start, b_end, s_start, s_end); 4631 } 4632 #endif 4633 4634 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4635 map->s_start == s_start && map->s_end == s_end)) 4636 return; 4637 4638 uvm_map_freelist_update(map, dead, b_start, b_end, 4639 s_start, s_end, flags); 4640 } 4641 4642 /* 4643 * Grow kernel memory. 4644 * 4645 * This function is only called for kernel maps when an allocation fails. 4646 * 4647 * If the map has a gap that is large enough to accommodate alloc_sz, this 4648 * function will make sure map->free will include it. 4649 */ 4650 void 4651 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 4652 vsize_t alloc_sz, int flags) 4653 { 4654 vsize_t sz; 4655 vaddr_t end; 4656 struct vm_map_entry *entry; 4657 4658 /* Kernel memory only. */ 4659 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 4660 /* Destroy free list. */ 4661 uvm_map_freelist_update_clear(map, dead); 4662 4663 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 4664 if (map->flags & VM_MAP_GUARDPAGES) 4665 alloc_sz += PAGE_SIZE; 4666 4667 /* 4668 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 4669 * 4670 * Don't handle the case where the multiplication overflows: 4671 * if that happens, the allocation is probably too big anyway. 4672 */ 4673 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 4674 4675 /* 4676 * Walk forward until a gap large enough for alloc_sz shows up. 4677 * 4678 * We assume the kernel map has no boundaries. 4679 * uvm_maxkaddr may be zero. 4680 */ 4681 end = MAX(uvm_maxkaddr, map->min_offset); 4682 entry = uvm_map_entrybyaddr(&map->addr, end); 4683 while (entry && entry->fspace < alloc_sz) 4684 entry = RBT_NEXT(uvm_map_addr, entry); 4685 if (entry) { 4686 end = MAX(VMMAP_FREE_START(entry), end); 4687 end += MIN(sz, map->max_offset - end); 4688 } else 4689 end = map->max_offset; 4690 4691 /* Reserve pmap entries. */ 4692 #ifdef PMAP_GROWKERNEL 4693 uvm_maxkaddr = pmap_growkernel(end); 4694 #else 4695 uvm_maxkaddr = MAX(uvm_maxkaddr, end); 4696 #endif 4697 4698 /* Rebuild free list. */ 4699 uvm_map_freelist_update_refill(map, flags); 4700 } 4701 4702 /* 4703 * Freelist update subfunction: unlink all entries from freelists. 4704 */ 4705 void 4706 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 4707 { 4708 struct uvm_addr_state *free; 4709 struct vm_map_entry *entry, *prev, *next; 4710 4711 prev = NULL; 4712 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL; 4713 entry = next) { 4714 next = RBT_NEXT(uvm_map_addr, entry); 4715 4716 free = uvm_map_uaddr_e(map, entry); 4717 uvm_mapent_free_remove(map, free, entry); 4718 4719 if (prev != NULL && entry->start == entry->end) { 4720 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 4721 uvm_mapent_addr_remove(map, entry); 4722 DEAD_ENTRY_PUSH(dead, entry); 4723 } else 4724 prev = entry; 4725 } 4726 } 4727 4728 /* 4729 * Freelist update subfunction: refill the freelists with entries. 4730 */ 4731 void 4732 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 4733 { 4734 struct vm_map_entry *entry; 4735 vaddr_t min, max; 4736 4737 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 4738 min = VMMAP_FREE_START(entry); 4739 max = VMMAP_FREE_END(entry); 4740 entry->fspace = 0; 4741 4742 entry = uvm_map_fix_space(map, entry, min, max, flags); 4743 } 4744 4745 uvm_tree_sanity(map, __FILE__, __LINE__); 4746 } 4747 4748 /* 4749 * Change {a,b}_{start,end} allocation ranges and associated free lists. 4750 */ 4751 void 4752 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 4753 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 4754 { 4755 KDASSERT(b_end >= b_start && s_end >= s_start); 4756 4757 /* Clear all free lists. */ 4758 uvm_map_freelist_update_clear(map, dead); 4759 4760 /* Apply new bounds. */ 4761 map->b_start = b_start; 4762 map->b_end = b_end; 4763 map->s_start = s_start; 4764 map->s_end = s_end; 4765 4766 /* Refill free lists. */ 4767 uvm_map_freelist_update_refill(map, flags); 4768 } 4769 4770 /* 4771 * Assign a uvm_addr_state to the specified pointer in vm_map. 4772 * 4773 * May sleep. 4774 */ 4775 void 4776 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 4777 struct uvm_addr_state *newval) 4778 { 4779 struct uvm_map_deadq dead; 4780 4781 /* Pointer which must be in this map. */ 4782 KASSERT(which != NULL); 4783 KASSERT((void*)map <= (void*)(which) && 4784 (void*)(which) < (void*)(map + 1)); 4785 4786 vm_map_lock(map); 4787 TAILQ_INIT(&dead); 4788 uvm_map_freelist_update_clear(map, &dead); 4789 4790 uvm_addr_destroy(*which); 4791 *which = newval; 4792 4793 uvm_map_freelist_update_refill(map, 0); 4794 vm_map_unlock(map); 4795 uvm_unmap_detach(&dead, 0); 4796 } 4797 4798 /* 4799 * Correct space insert. 4800 * 4801 * Entry must not be on any freelist. 4802 */ 4803 struct vm_map_entry* 4804 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 4805 vaddr_t min, vaddr_t max, int flags) 4806 { 4807 struct uvm_addr_state *free, *entfree; 4808 vaddr_t lmax; 4809 4810 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 4811 KDASSERT(min <= max); 4812 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 4813 min == map->min_offset); 4814 4815 /* 4816 * During the function, entfree will always point at the uaddr state 4817 * for entry. 4818 */ 4819 entfree = (entry == NULL ? NULL : 4820 uvm_map_uaddr_e(map, entry)); 4821 4822 while (min != max) { 4823 /* Claim guard page for entry. */ 4824 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 4825 VMMAP_FREE_END(entry) == entry->end && 4826 entry->start != entry->end) { 4827 if (max - min == 2 * PAGE_SIZE) { 4828 /* 4829 * If the free-space gap is exactly 2 pages, 4830 * we make the guard 2 pages instead of 1. 4831 * Because in a guarded map, an area needs 4832 * at least 2 pages to allocate from: 4833 * one page for the allocation and one for 4834 * the guard. 4835 */ 4836 entry->guard = 2 * PAGE_SIZE; 4837 min = max; 4838 } else { 4839 entry->guard = PAGE_SIZE; 4840 min += PAGE_SIZE; 4841 } 4842 continue; 4843 } 4844 4845 /* 4846 * Handle the case where entry has a 2-page guard, but the 4847 * space after entry is freed. 4848 */ 4849 if (entry != NULL && entry->fspace == 0 && 4850 entry->guard > PAGE_SIZE) { 4851 entry->guard = PAGE_SIZE; 4852 min = VMMAP_FREE_START(entry); 4853 } 4854 4855 lmax = uvm_map_boundary(map, min, max); 4856 free = uvm_map_uaddr(map, min); 4857 4858 /* 4859 * Entries are merged if they point at the same uvm_free(). 4860 * Exception to that rule: if min == uvm_maxkaddr, a new 4861 * entry is started regardless (otherwise the allocators 4862 * will get confused). 4863 */ 4864 if (entry != NULL && free == entfree && 4865 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 4866 min == uvm_maxkaddr)) { 4867 KDASSERT(VMMAP_FREE_END(entry) == min); 4868 entry->fspace += lmax - min; 4869 } else { 4870 /* 4871 * Commit entry to free list: it'll not be added to 4872 * anymore. 4873 * We'll start a new entry and add to that entry 4874 * instead. 4875 */ 4876 if (entry != NULL) 4877 uvm_mapent_free_insert(map, entfree, entry); 4878 4879 /* New entry for new uaddr. */ 4880 entry = uvm_mapent_alloc(map, flags); 4881 KDASSERT(entry != NULL); 4882 entry->end = entry->start = min; 4883 entry->guard = 0; 4884 entry->fspace = lmax - min; 4885 entry->object.uvm_obj = NULL; 4886 entry->offset = 0; 4887 entry->etype = 0; 4888 entry->protection = entry->max_protection = 0; 4889 entry->inheritance = 0; 4890 entry->wired_count = 0; 4891 entry->advice = 0; 4892 entry->aref.ar_pageoff = 0; 4893 entry->aref.ar_amap = NULL; 4894 uvm_mapent_addr_insert(map, entry); 4895 4896 entfree = free; 4897 } 4898 4899 min = lmax; 4900 } 4901 /* Finally put entry on the uaddr state. */ 4902 if (entry != NULL) 4903 uvm_mapent_free_insert(map, entfree, entry); 4904 4905 return entry; 4906 } 4907 4908 /* 4909 * MQuery style of allocation. 4910 * 4911 * This allocator searches forward until sufficient space is found to map 4912 * the given size. 4913 * 4914 * XXX: factor in offset (via pmap_prefer) and protection? 4915 */ 4916 int 4917 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 4918 int flags) 4919 { 4920 struct vm_map_entry *entry, *last; 4921 vaddr_t addr; 4922 vaddr_t tmp, pmap_align, pmap_offset; 4923 int error; 4924 4925 addr = *addr_p; 4926 vm_map_lock_read(map); 4927 4928 /* Configure pmap prefer. */ 4929 if (offset != UVM_UNKNOWN_OFFSET) { 4930 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 4931 pmap_offset = PMAP_PREFER_OFFSET(offset); 4932 } else { 4933 pmap_align = PAGE_SIZE; 4934 pmap_offset = 0; 4935 } 4936 4937 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 4938 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 4939 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 4940 if (tmp < addr) 4941 tmp += pmap_align; 4942 addr = tmp; 4943 } 4944 4945 /* First, check if the requested range is fully available. */ 4946 entry = uvm_map_entrybyaddr(&map->addr, addr); 4947 last = NULL; 4948 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 4949 error = 0; 4950 goto out; 4951 } 4952 if (flags & UVM_FLAG_FIXED) { 4953 error = EINVAL; 4954 goto out; 4955 } 4956 4957 error = ENOMEM; /* Default error from here. */ 4958 4959 /* 4960 * At this point, the memory at <addr, sz> is not available. 4961 * The reasons are: 4962 * [1] it's outside the map, 4963 * [2] it starts in used memory (and therefore needs to move 4964 * toward the first free page in entry), 4965 * [3] it starts in free memory but bumps into used memory. 4966 * 4967 * Note that for case [2], the forward moving is handled by the 4968 * for loop below. 4969 */ 4970 if (entry == NULL) { 4971 /* [1] Outside the map. */ 4972 if (addr >= map->max_offset) 4973 goto out; 4974 else 4975 entry = RBT_MIN(uvm_map_addr, &map->addr); 4976 } else if (VMMAP_FREE_START(entry) <= addr) { 4977 /* [3] Bumped into used memory. */ 4978 entry = RBT_NEXT(uvm_map_addr, entry); 4979 } 4980 4981 /* Test if the next entry is sufficient for the allocation. */ 4982 for (; entry != NULL; 4983 entry = RBT_NEXT(uvm_map_addr, entry)) { 4984 if (entry->fspace == 0) 4985 continue; 4986 addr = VMMAP_FREE_START(entry); 4987 4988 restart: /* Restart address checks on address change. */ 4989 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 4990 if (tmp < addr) 4991 tmp += pmap_align; 4992 addr = tmp; 4993 if (addr >= VMMAP_FREE_END(entry)) 4994 continue; 4995 4996 /* Skip brk() allocation addresses. */ 4997 if (addr + sz > map->b_start && addr < map->b_end) { 4998 if (VMMAP_FREE_END(entry) > map->b_end) { 4999 addr = map->b_end; 5000 goto restart; 5001 } else 5002 continue; 5003 } 5004 /* Skip stack allocation addresses. */ 5005 if (addr + sz > map->s_start && addr < map->s_end) { 5006 if (VMMAP_FREE_END(entry) > map->s_end) { 5007 addr = map->s_end; 5008 goto restart; 5009 } else 5010 continue; 5011 } 5012 5013 last = NULL; 5014 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5015 error = 0; 5016 goto out; 5017 } 5018 } 5019 5020 out: 5021 vm_map_unlock_read(map); 5022 if (error == 0) 5023 *addr_p = addr; 5024 return error; 5025 } 5026 5027 /* 5028 * Determine allocation bias. 5029 * 5030 * Returns 1 if we should bias to high addresses, -1 for a bias towards low 5031 * addresses, or 0 for no bias. 5032 * The bias mechanism is intended to avoid clashing with brk() and stack 5033 * areas. 5034 */ 5035 int 5036 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry) 5037 { 5038 vaddr_t start, end; 5039 5040 start = VMMAP_FREE_START(entry); 5041 end = VMMAP_FREE_END(entry); 5042 5043 /* Stay at the top of brk() area. */ 5044 if (end >= map->b_start && start < map->b_end) 5045 return 1; 5046 /* Stay at the far end of the stack area. */ 5047 if (end >= map->s_start && start < map->s_end) { 5048 #ifdef MACHINE_STACK_GROWS_UP 5049 return 1; 5050 #else 5051 return -1; 5052 #endif 5053 } 5054 5055 /* No bias, this area is meant for us. */ 5056 return 0; 5057 } 5058 5059 5060 boolean_t 5061 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 5062 { 5063 boolean_t rv; 5064 5065 if (map->flags & VM_MAP_INTRSAFE) { 5066 rv = mtx_enter_try(&map->mtx); 5067 } else { 5068 mtx_enter(&map->flags_lock); 5069 if (map->flags & VM_MAP_BUSY) { 5070 mtx_leave(&map->flags_lock); 5071 return (FALSE); 5072 } 5073 mtx_leave(&map->flags_lock); 5074 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); 5075 /* check if the lock is busy and back out if we won the race */ 5076 if (rv) { 5077 mtx_enter(&map->flags_lock); 5078 if (map->flags & VM_MAP_BUSY) { 5079 rw_exit(&map->lock); 5080 rv = FALSE; 5081 } 5082 mtx_leave(&map->flags_lock); 5083 } 5084 } 5085 5086 if (rv) { 5087 map->timestamp++; 5088 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5089 uvm_tree_sanity(map, file, line); 5090 uvm_tree_size_chk(map, file, line); 5091 } 5092 5093 return (rv); 5094 } 5095 5096 void 5097 vm_map_lock_ln(struct vm_map *map, char *file, int line) 5098 { 5099 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5100 do { 5101 mtx_enter(&map->flags_lock); 5102 tryagain: 5103 while (map->flags & VM_MAP_BUSY) { 5104 map->flags |= VM_MAP_WANTLOCK; 5105 msleep(&map->flags, &map->flags_lock, 5106 PVM, vmmapbsy, 0); 5107 } 5108 mtx_leave(&map->flags_lock); 5109 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); 5110 /* check if the lock is busy and back out if we won the race */ 5111 mtx_enter(&map->flags_lock); 5112 if (map->flags & VM_MAP_BUSY) { 5113 rw_exit(&map->lock); 5114 goto tryagain; 5115 } 5116 mtx_leave(&map->flags_lock); 5117 } else { 5118 mtx_enter(&map->mtx); 5119 } 5120 5121 map->timestamp++; 5122 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5123 uvm_tree_sanity(map, file, line); 5124 uvm_tree_size_chk(map, file, line); 5125 } 5126 5127 void 5128 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 5129 { 5130 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5131 rw_enter_read(&map->lock); 5132 else 5133 mtx_enter(&map->mtx); 5134 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5135 uvm_tree_sanity(map, file, line); 5136 uvm_tree_size_chk(map, file, line); 5137 } 5138 5139 void 5140 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 5141 { 5142 uvm_tree_sanity(map, file, line); 5143 uvm_tree_size_chk(map, file, line); 5144 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5145 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5146 rw_exit(&map->lock); 5147 else 5148 mtx_leave(&map->mtx); 5149 } 5150 5151 void 5152 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 5153 { 5154 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5155 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5156 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5157 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5158 rw_exit_read(&map->lock); 5159 else 5160 mtx_leave(&map->mtx); 5161 } 5162 5163 void 5164 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 5165 { 5166 uvm_tree_sanity(map, file, line); 5167 uvm_tree_size_chk(map, file, line); 5168 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5169 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5170 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5171 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5172 rw_enter(&map->lock, RW_DOWNGRADE); 5173 } 5174 5175 void 5176 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 5177 { 5178 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5179 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5180 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5181 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5182 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5183 rw_exit_read(&map->lock); 5184 rw_enter_write(&map->lock); 5185 } 5186 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5187 uvm_tree_sanity(map, file, line); 5188 } 5189 5190 void 5191 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5192 { 5193 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5194 mtx_enter(&map->flags_lock); 5195 map->flags |= VM_MAP_BUSY; 5196 mtx_leave(&map->flags_lock); 5197 } 5198 5199 void 5200 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5201 { 5202 int oflags; 5203 5204 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5205 mtx_enter(&map->flags_lock); 5206 oflags = map->flags; 5207 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5208 mtx_leave(&map->flags_lock); 5209 if (oflags & VM_MAP_WANTLOCK) 5210 wakeup(&map->flags); 5211 } 5212 5213 #ifndef SMALL_KERNEL 5214 int 5215 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, 5216 size_t *lenp) 5217 { 5218 struct vm_map_entry *entry; 5219 vaddr_t start; 5220 int cnt, maxcnt, error = 0; 5221 5222 KASSERT(*lenp > 0); 5223 KASSERT((*lenp % sizeof(*kve)) == 0); 5224 cnt = 0; 5225 maxcnt = *lenp / sizeof(*kve); 5226 KASSERT(maxcnt > 0); 5227 5228 /* 5229 * Return only entries whose address is above the given base 5230 * address. This allows userland to iterate without knowing the 5231 * number of entries beforehand. 5232 */ 5233 start = (vaddr_t)kve[0].kve_start; 5234 5235 vm_map_lock(map); 5236 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5237 if (cnt == maxcnt) { 5238 error = ENOMEM; 5239 break; 5240 } 5241 if (start != 0 && entry->start < start) 5242 continue; 5243 kve->kve_start = entry->start; 5244 kve->kve_end = entry->end; 5245 kve->kve_guard = entry->guard; 5246 kve->kve_fspace = entry->fspace; 5247 kve->kve_fspace_augment = entry->fspace_augment; 5248 kve->kve_offset = entry->offset; 5249 kve->kve_wired_count = entry->wired_count; 5250 kve->kve_etype = entry->etype; 5251 kve->kve_protection = entry->protection; 5252 kve->kve_max_protection = entry->max_protection; 5253 kve->kve_advice = entry->advice; 5254 kve->kve_inheritance = entry->inheritance; 5255 kve->kve_flags = entry->flags; 5256 kve++; 5257 cnt++; 5258 } 5259 vm_map_unlock(map); 5260 5261 KASSERT(cnt <= maxcnt); 5262 5263 *lenp = sizeof(*kve) * cnt; 5264 return error; 5265 } 5266 #endif 5267 5268 5269 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5270 uvm_mapentry_addrcmp, uvm_map_addr_augment); 5271 5272 5273 /* 5274 * MD code: vmspace allocator setup. 5275 */ 5276 5277 #ifdef __i386__ 5278 void 5279 uvm_map_setup_md(struct vm_map *map) 5280 { 5281 vaddr_t min, max; 5282 5283 min = map->min_offset; 5284 max = map->max_offset; 5285 5286 /* 5287 * Ensure the selectors will not try to manage page 0; 5288 * it's too special. 5289 */ 5290 if (min < VMMAP_MIN_ADDR) 5291 min = VMMAP_MIN_ADDR; 5292 5293 #if 0 /* Cool stuff, not yet */ 5294 /* Hinted allocations. */ 5295 map->uaddr_any[1] = uaddr_hint_create(min, max, 1024 * 1024 * 1024); 5296 5297 /* Executable code is special. */ 5298 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5299 /* Place normal allocations beyond executable mappings. */ 5300 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5301 #else /* Crappy stuff, for now */ 5302 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5303 #endif 5304 5305 #ifndef SMALL_KERNEL 5306 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5307 #endif /* !SMALL_KERNEL */ 5308 } 5309 #elif __LP64__ 5310 void 5311 uvm_map_setup_md(struct vm_map *map) 5312 { 5313 vaddr_t min, max; 5314 5315 min = map->min_offset; 5316 max = map->max_offset; 5317 5318 /* 5319 * Ensure the selectors will not try to manage page 0; 5320 * it's too special. 5321 */ 5322 if (min < VMMAP_MIN_ADDR) 5323 min = VMMAP_MIN_ADDR; 5324 5325 #if 0 /* Cool stuff, not yet */ 5326 /* Hinted allocations above 4GB */ 5327 map->uaddr_any[0] = 5328 uaddr_hint_create(0x100000000ULL, max, 1024 * 1024 * 1024); 5329 /* Hinted allocations below 4GB */ 5330 map->uaddr_any[1] = uaddr_hint_create(min, 0x100000000ULL, 5331 1024 * 1024 * 1024); 5332 /* Normal allocations, always above 4GB */ 5333 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5334 #else /* Crappy stuff, for now */ 5335 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5336 #endif 5337 5338 #ifndef SMALL_KERNEL 5339 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5340 #endif /* !SMALL_KERNEL */ 5341 } 5342 #else /* non-i386, 32 bit */ 5343 void 5344 uvm_map_setup_md(struct vm_map *map) 5345 { 5346 vaddr_t min, max; 5347 5348 min = map->min_offset; 5349 max = map->max_offset; 5350 5351 /* 5352 * Ensure the selectors will not try to manage page 0; 5353 * it's too special. 5354 */ 5355 if (min < VMMAP_MIN_ADDR) 5356 min = VMMAP_MIN_ADDR; 5357 5358 #if 0 /* Cool stuff, not yet */ 5359 /* Hinted allocations. */ 5360 map->uaddr_any[1] = uaddr_hint_create(min, max, 1024 * 1024 * 1024); 5361 /* Normal allocations. */ 5362 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5363 #else /* Crappy stuff, for now */ 5364 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5365 #endif 5366 5367 #ifndef SMALL_KERNEL 5368 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5369 #endif /* !SMALL_KERNEL */ 5370 } 5371 #endif 5372