1 /* $OpenBSD: uvm_map.c,v 1.217 2016/06/17 10:48:25 dlg Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/mman.h> 90 #include <sys/proc.h> 91 #include <sys/malloc.h> 92 #include <sys/pool.h> 93 #include <sys/sysctl.h> 94 95 #ifdef SYSVSHM 96 #include <sys/shm.h> 97 #endif 98 99 #include <uvm/uvm.h> 100 101 #ifdef DDB 102 #include <uvm/uvm_ddb.h> 103 #endif 104 105 #include <uvm/uvm_addr.h> 106 107 108 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 109 int uvm_mapent_isjoinable(struct vm_map*, 110 struct vm_map_entry*, struct vm_map_entry*); 111 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 112 struct vm_map_entry*, struct uvm_map_deadq*); 113 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 114 struct vm_map_entry*, struct uvm_map_deadq*); 115 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 116 struct vm_map_entry*, vaddr_t, vsize_t, int, 117 struct uvm_map_deadq*, struct vm_map_entry*); 118 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 119 void uvm_mapent_free(struct vm_map_entry*); 120 void uvm_unmap_kill_entry(struct vm_map*, 121 struct vm_map_entry*); 122 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); 123 void uvm_mapent_mkfree(struct vm_map*, 124 struct vm_map_entry*, struct vm_map_entry**, 125 struct uvm_map_deadq*, boolean_t); 126 void uvm_map_pageable_pgon(struct vm_map*, 127 struct vm_map_entry*, struct vm_map_entry*, 128 vaddr_t, vaddr_t); 129 int uvm_map_pageable_wire(struct vm_map*, 130 struct vm_map_entry*, struct vm_map_entry*, 131 vaddr_t, vaddr_t, int); 132 void uvm_map_setup_entries(struct vm_map*); 133 void uvm_map_setup_md(struct vm_map*); 134 void uvm_map_teardown(struct vm_map*); 135 void uvm_map_vmspace_update(struct vm_map*, 136 struct uvm_map_deadq*, int); 137 void uvm_map_kmem_grow(struct vm_map*, 138 struct uvm_map_deadq*, vsize_t, int); 139 void uvm_map_freelist_update_clear(struct vm_map*, 140 struct uvm_map_deadq*); 141 void uvm_map_freelist_update_refill(struct vm_map *, int); 142 void uvm_map_freelist_update(struct vm_map*, 143 struct uvm_map_deadq*, vaddr_t, vaddr_t, 144 vaddr_t, vaddr_t, int); 145 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 146 vaddr_t, vaddr_t, int); 147 int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, 148 struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, 149 int); 150 int uvm_map_findspace(struct vm_map*, 151 struct vm_map_entry**, struct vm_map_entry**, 152 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 153 vaddr_t); 154 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 155 void uvm_map_addr_augment(struct vm_map_entry*); 156 157 /* 158 * Tree management functions. 159 */ 160 161 static __inline void uvm_mapent_copy(struct vm_map_entry*, 162 struct vm_map_entry*); 163 static int uvm_mapentry_addrcmp(struct vm_map_entry*, 164 struct vm_map_entry*); 165 void uvm_mapent_free_insert(struct vm_map*, 166 struct uvm_addr_state*, struct vm_map_entry*); 167 void uvm_mapent_free_remove(struct vm_map*, 168 struct uvm_addr_state*, struct vm_map_entry*); 169 void uvm_mapent_addr_insert(struct vm_map*, 170 struct vm_map_entry*); 171 void uvm_mapent_addr_remove(struct vm_map*, 172 struct vm_map_entry*); 173 void uvm_map_splitentry(struct vm_map*, 174 struct vm_map_entry*, struct vm_map_entry*, 175 vaddr_t); 176 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 177 int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*); 178 179 /* 180 * uvm_vmspace_fork helper functions. 181 */ 182 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 183 vsize_t, vm_prot_t, vm_prot_t, 184 struct vm_map_entry*, struct uvm_map_deadq*, int, 185 int); 186 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t, 187 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*, 188 struct vm_map_entry*, struct uvm_map_deadq*); 189 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 190 struct vm_map*, struct vm_map_entry*, 191 struct uvm_map_deadq*); 192 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 193 struct vm_map*, struct vm_map_entry*, 194 struct uvm_map_deadq*); 195 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 196 struct vm_map*, struct vm_map_entry*, 197 struct uvm_map_deadq*); 198 199 /* 200 * Tree validation. 201 */ 202 #ifdef VMMAP_DEBUG 203 void uvm_tree_assert(struct vm_map*, int, char*, 204 char*, int); 205 #define UVM_ASSERT(map, cond, file, line) \ 206 uvm_tree_assert((map), (cond), #cond, (file), (line)) 207 void uvm_tree_sanity(struct vm_map*, char*, int); 208 void uvm_tree_size_chk(struct vm_map*, char*, int); 209 void vmspace_validate(struct vm_map*); 210 #else 211 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 212 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 213 #define vmspace_validate(_map) do {} while (0) 214 #endif 215 216 /* 217 * All architectures will have pmap_prefer. 218 */ 219 #ifndef PMAP_PREFER 220 #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE 221 #define PMAP_PREFER_OFFSET(off) 0 222 #define PMAP_PREFER(addr, off) (addr) 223 #endif 224 225 226 /* 227 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 228 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 229 * 230 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 231 * each time. 232 */ 233 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 234 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 235 #define VM_MAP_KSIZE_ALLOCMUL 4 236 /* 237 * When selecting a random free-space block, look at most FSPACE_DELTA blocks 238 * ahead. 239 */ 240 #define FSPACE_DELTA 8 241 /* 242 * Put allocations adjecent to previous allocations when the free-space tree 243 * is larger than FSPACE_COMPACT entries. 244 * 245 * Alignment and PMAP_PREFER may still cause the entry to not be fully 246 * adjecent. Note that this strategy reduces memory fragmentation (by leaving 247 * a large space before or after the allocation). 248 */ 249 #define FSPACE_COMPACT 128 250 /* 251 * Make the address selection skip at most this many bytes from the start of 252 * the free space in which the allocation takes place. 253 * 254 * The main idea behind a randomized address space is that an attacker cannot 255 * know where to target his attack. Therefore, the location of objects must be 256 * as random as possible. However, the goal is not to create the most sparse 257 * map that is possible. 258 * FSPACE_MAXOFF pushes the considered range in bytes down to less insane 259 * sizes, thereby reducing the sparseness. The biggest randomization comes 260 * from fragmentation, i.e. FSPACE_COMPACT. 261 */ 262 #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) 263 /* 264 * Allow for small gaps in the overflow areas. 265 * Gap size is in bytes and does not have to be a multiple of page-size. 266 */ 267 #define FSPACE_BIASGAP ((vaddr_t)32 * 1024) 268 269 /* auto-allocate address lower bound */ 270 #define VMMAP_MIN_ADDR PAGE_SIZE 271 272 273 #ifdef DEADBEEF0 274 #define UVMMAP_DEADBEEF ((void*)DEADBEEF0) 275 #else 276 #define UVMMAP_DEADBEEF ((void*)0xdeadd0d0) 277 #endif 278 279 #ifdef DEBUG 280 int uvm_map_printlocks = 0; 281 282 #define LPRINTF(_args) \ 283 do { \ 284 if (uvm_map_printlocks) \ 285 printf _args; \ 286 } while (0) 287 #else 288 #define LPRINTF(_args) do {} while (0) 289 #endif 290 291 static struct mutex uvm_kmapent_mtx; 292 static struct timeval uvm_kmapent_last_warn_time; 293 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 294 295 const char vmmapbsy[] = "vmmapbsy"; 296 297 /* 298 * pool for vmspace structures. 299 */ 300 struct pool uvm_vmspace_pool; 301 302 /* 303 * pool for dynamically-allocated map entries. 304 */ 305 struct pool uvm_map_entry_pool; 306 struct pool uvm_map_entry_kmem_pool; 307 308 /* 309 * This global represents the end of the kernel virtual address 310 * space. If we want to exceed this, we must grow the kernel 311 * virtual address space dynamically. 312 * 313 * Note, this variable is locked by kernel_map's lock. 314 */ 315 vaddr_t uvm_maxkaddr; 316 317 /* 318 * Locking predicate. 319 */ 320 #define UVM_MAP_REQ_WRITE(_map) \ 321 do { \ 322 if ((_map)->ref_count > 0) { \ 323 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 324 rw_assert_wrlock(&(_map)->lock); \ 325 else \ 326 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \ 327 } \ 328 } while (0) 329 330 /* 331 * Tree describing entries by address. 332 * 333 * Addresses are unique. 334 * Entries with start == end may only exist if they are the first entry 335 * (sorted by address) within a free-memory tree. 336 */ 337 338 static __inline int 339 uvm_mapentry_addrcmp(struct vm_map_entry *e1, struct vm_map_entry *e2) 340 { 341 return e1->start < e2->start ? -1 : e1->start > e2->start; 342 } 343 344 /* 345 * Copy mapentry. 346 */ 347 static __inline void 348 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 349 { 350 caddr_t csrc, cdst; 351 size_t sz; 352 353 csrc = (caddr_t)src; 354 cdst = (caddr_t)dst; 355 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 356 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 357 358 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 359 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 360 memcpy(cdst, csrc, sz); 361 } 362 363 /* 364 * Handle free-list insertion. 365 */ 366 void 367 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 368 struct vm_map_entry *entry) 369 { 370 const struct uvm_addr_functions *fun; 371 #ifdef VMMAP_DEBUG 372 vaddr_t min, max, bound; 373 #endif 374 375 #ifdef VMMAP_DEBUG 376 /* 377 * Boundary check. 378 * Boundaries are folded if they go on the same free list. 379 */ 380 min = VMMAP_FREE_START(entry); 381 max = VMMAP_FREE_END(entry); 382 383 while (min < max) { 384 bound = uvm_map_boundary(map, min, max); 385 KASSERT(uvm_map_uaddr(map, min) == uaddr); 386 min = bound; 387 } 388 #endif 389 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 390 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 391 392 UVM_MAP_REQ_WRITE(map); 393 394 /* Actual insert: forward to uaddr pointer. */ 395 if (uaddr != NULL) { 396 fun = uaddr->uaddr_functions; 397 KDASSERT(fun != NULL); 398 if (fun->uaddr_free_insert != NULL) 399 (*fun->uaddr_free_insert)(map, uaddr, entry); 400 entry->etype |= UVM_ET_FREEMAPPED; 401 } 402 403 /* Update fspace augmentation. */ 404 uvm_map_addr_augment(entry); 405 } 406 407 /* 408 * Handle free-list removal. 409 */ 410 void 411 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 412 struct vm_map_entry *entry) 413 { 414 const struct uvm_addr_functions *fun; 415 416 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 417 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 418 UVM_MAP_REQ_WRITE(map); 419 420 if (uaddr != NULL) { 421 fun = uaddr->uaddr_functions; 422 if (fun->uaddr_free_remove != NULL) 423 (*fun->uaddr_free_remove)(map, uaddr, entry); 424 entry->etype &= ~UVM_ET_FREEMAPPED; 425 } 426 } 427 428 /* 429 * Handle address tree insertion. 430 */ 431 void 432 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 433 { 434 struct vm_map_entry *res; 435 436 if (RB_LEFT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF || 437 RB_RIGHT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF || 438 RB_PARENT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF) 439 panic("uvm_mapent_addr_insert: entry still in addr list"); 440 KDASSERT(entry->start <= entry->end); 441 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 442 (entry->end & (vaddr_t)PAGE_MASK) == 0); 443 444 UVM_MAP_REQ_WRITE(map); 445 res = RB_INSERT(uvm_map_addr, &map->addr, entry); 446 if (res != NULL) { 447 panic("uvm_mapent_addr_insert: map %p entry %p " 448 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 449 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 450 map, entry, 451 entry->start, entry->end, entry->guard, entry->fspace, 452 res, res->start, res->end, res->guard, res->fspace); 453 } 454 } 455 456 /* 457 * Handle address tree removal. 458 */ 459 void 460 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 461 { 462 struct vm_map_entry *res; 463 464 UVM_MAP_REQ_WRITE(map); 465 res = RB_REMOVE(uvm_map_addr, &map->addr, entry); 466 if (res != entry) 467 panic("uvm_mapent_addr_remove"); 468 RB_LEFT(entry, daddrs.addr_entry) = RB_RIGHT(entry, daddrs.addr_entry) = 469 RB_PARENT(entry, daddrs.addr_entry) = UVMMAP_DEADBEEF; 470 } 471 472 /* 473 * uvm_map_reference: add reference to a map 474 * 475 * XXX check map reference counter lock 476 */ 477 #define uvm_map_reference(_map) \ 478 do { \ 479 map->ref_count++; \ 480 } while (0) 481 482 /* 483 * Calculate the dused delta. 484 */ 485 vsize_t 486 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 487 { 488 struct vmspace *vm; 489 vsize_t sz; 490 vaddr_t lmax; 491 vaddr_t stack_begin, stack_end; /* Position of stack. */ 492 493 KASSERT(map->flags & VM_MAP_ISVMSPACE); 494 vm = (struct vmspace *)map; 495 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 496 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 497 498 sz = 0; 499 while (min != max) { 500 lmax = max; 501 if (min < stack_begin && lmax > stack_begin) 502 lmax = stack_begin; 503 else if (min < stack_end && lmax > stack_end) 504 lmax = stack_end; 505 506 if (min >= stack_begin && min < stack_end) { 507 /* nothing */ 508 } else 509 sz += lmax - min; 510 min = lmax; 511 } 512 513 return sz >> PAGE_SHIFT; 514 } 515 516 /* 517 * Find the entry describing the given address. 518 */ 519 struct vm_map_entry* 520 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 521 { 522 struct vm_map_entry *iter; 523 524 iter = RB_ROOT(atree); 525 while (iter != NULL) { 526 if (iter->start > addr) 527 iter = RB_LEFT(iter, daddrs.addr_entry); 528 else if (VMMAP_FREE_END(iter) <= addr) 529 iter = RB_RIGHT(iter, daddrs.addr_entry); 530 else 531 return iter; 532 } 533 return NULL; 534 } 535 536 /* 537 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 538 * 539 * Push dead entries into a linked list. 540 * Since the linked list abuses the address tree for storage, the entry 541 * may not be linked in a map. 542 * 543 * *head must be initialized to NULL before the first call to this macro. 544 * uvm_unmap_detach(*head, 0) will remove dead entries. 545 */ 546 static __inline void 547 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 548 { 549 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 550 } 551 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 552 dead_entry_push((_headptr), (_entry)) 553 554 /* 555 * Helper function for uvm_map_findspace_tree. 556 * 557 * Given allocation constraints and pmap constraints, finds the 558 * lowest and highest address in a range that can be used for the 559 * allocation. 560 * 561 * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. 562 * 563 * 564 * Big chunk of math with a seasoning of dragons. 565 */ 566 int 567 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, 568 struct vm_map_entry *sel, vaddr_t align, 569 vaddr_t pmap_align, vaddr_t pmap_off, int bias) 570 { 571 vaddr_t sel_min, sel_max; 572 #ifdef PMAP_PREFER 573 vaddr_t pmap_min, pmap_max; 574 #endif /* PMAP_PREFER */ 575 #ifdef DIAGNOSTIC 576 int bad; 577 #endif /* DIAGNOSTIC */ 578 579 sel_min = VMMAP_FREE_START(sel); 580 sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); 581 582 #ifdef PMAP_PREFER 583 584 /* 585 * There are two special cases, in which we can satisfy the align 586 * requirement and the pmap_prefer requirement. 587 * - when pmap_off == 0, we always select the largest of the two 588 * - when pmap_off % align == 0 and pmap_align > align, we simply 589 * satisfy the pmap_align requirement and automatically 590 * satisfy the align requirement. 591 */ 592 if (align > PAGE_SIZE && 593 !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { 594 /* 595 * Simple case: only use align. 596 */ 597 sel_min = roundup(sel_min, align); 598 sel_max &= ~(align - 1); 599 600 if (sel_min > sel_max) 601 return ENOMEM; 602 603 /* Correct for bias. */ 604 if (sel_max - sel_min > FSPACE_BIASGAP) { 605 if (bias > 0) { 606 sel_min = sel_max - FSPACE_BIASGAP; 607 sel_min = roundup(sel_min, align); 608 } else if (bias < 0) { 609 sel_max = sel_min + FSPACE_BIASGAP; 610 sel_max &= ~(align - 1); 611 } 612 } 613 } else if (pmap_align != 0) { 614 /* 615 * Special case: satisfy both pmap_prefer and 616 * align argument. 617 */ 618 pmap_max = sel_max & ~(pmap_align - 1); 619 pmap_min = sel_min; 620 if (pmap_max < sel_min) 621 return ENOMEM; 622 623 /* Adjust pmap_min for BIASGAP for top-addr bias. */ 624 if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) 625 pmap_min = pmap_max - FSPACE_BIASGAP; 626 /* Align pmap_min. */ 627 pmap_min &= ~(pmap_align - 1); 628 if (pmap_min < sel_min) 629 pmap_min += pmap_align; 630 if (pmap_min > pmap_max) 631 return ENOMEM; 632 633 /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ 634 if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { 635 pmap_max = (pmap_min + FSPACE_BIASGAP) & 636 ~(pmap_align - 1); 637 } 638 if (pmap_min > pmap_max) 639 return ENOMEM; 640 641 /* Apply pmap prefer offset. */ 642 pmap_max |= pmap_off; 643 if (pmap_max > sel_max) 644 pmap_max -= pmap_align; 645 pmap_min |= pmap_off; 646 if (pmap_min < sel_min) 647 pmap_min += pmap_align; 648 649 /* 650 * Fixup: it's possible that pmap_min and pmap_max 651 * cross eachother. In this case, try to find one 652 * address that is allowed. 653 * (This usually happens in biased case.) 654 */ 655 if (pmap_min > pmap_max) { 656 if (pmap_min < sel_max) 657 pmap_max = pmap_min; 658 else if (pmap_max > sel_min) 659 pmap_min = pmap_max; 660 else 661 return ENOMEM; 662 } 663 664 /* Internal validation. */ 665 KDASSERT(pmap_min <= pmap_max); 666 667 sel_min = pmap_min; 668 sel_max = pmap_max; 669 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 670 sel_min = sel_max - FSPACE_BIASGAP; 671 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 672 sel_max = sel_min + FSPACE_BIASGAP; 673 674 #else 675 676 if (align > PAGE_SIZE) { 677 sel_min = roundup(sel_min, align); 678 sel_max &= ~(align - 1); 679 if (sel_min > sel_max) 680 return ENOMEM; 681 682 if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { 683 if (bias > 0) { 684 sel_min = roundup(sel_max - FSPACE_BIASGAP, 685 align); 686 } else { 687 sel_max = (sel_min + FSPACE_BIASGAP) & 688 ~(align - 1); 689 } 690 } 691 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 692 sel_min = sel_max - FSPACE_BIASGAP; 693 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 694 sel_max = sel_min + FSPACE_BIASGAP; 695 696 #endif 697 698 if (sel_min > sel_max) 699 return ENOMEM; 700 701 #ifdef DIAGNOSTIC 702 bad = 0; 703 /* Lower boundary check. */ 704 if (sel_min < VMMAP_FREE_START(sel)) { 705 printf("sel_min: 0x%lx, but should be at least 0x%lx\n", 706 sel_min, VMMAP_FREE_START(sel)); 707 bad++; 708 } 709 /* Upper boundary check. */ 710 if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { 711 printf("sel_max: 0x%lx, but should be at most 0x%lx\n", 712 sel_max, 713 VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); 714 bad++; 715 } 716 /* Lower boundary alignment. */ 717 if (align != 0 && (sel_min & (align - 1)) != 0) { 718 printf("sel_min: 0x%lx, not aligned to 0x%lx\n", 719 sel_min, align); 720 bad++; 721 } 722 /* Upper boundary alignment. */ 723 if (align != 0 && (sel_max & (align - 1)) != 0) { 724 printf("sel_max: 0x%lx, not aligned to 0x%lx\n", 725 sel_max, align); 726 bad++; 727 } 728 /* Lower boundary PMAP_PREFER check. */ 729 if (pmap_align != 0 && align == 0 && 730 (sel_min & (pmap_align - 1)) != pmap_off) { 731 printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 732 sel_min, sel_min & (pmap_align - 1), pmap_off); 733 bad++; 734 } 735 /* Upper boundary PMAP_PREFER check. */ 736 if (pmap_align != 0 && align == 0 && 737 (sel_max & (pmap_align - 1)) != pmap_off) { 738 printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 739 sel_max, sel_max & (pmap_align - 1), pmap_off); 740 bad++; 741 } 742 743 if (bad) { 744 panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " 745 "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " 746 "bias = %d, " 747 "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", 748 sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, 749 bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel)); 750 } 751 #endif /* DIAGNOSTIC */ 752 753 *min = sel_min; 754 *max = sel_max; 755 return 0; 756 } 757 758 /* 759 * Test if memory starting at addr with sz bytes is free. 760 * 761 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 762 * the space. 763 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 764 */ 765 int 766 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 767 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 768 vaddr_t addr, vsize_t sz) 769 { 770 struct uvm_addr_state *free; 771 struct uvm_map_addr *atree; 772 struct vm_map_entry *i, *i_end; 773 774 /* 775 * Kernel memory above uvm_maxkaddr is considered unavailable. 776 */ 777 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 778 if (addr + sz > uvm_maxkaddr) 779 return 0; 780 } 781 782 atree = &map->addr; 783 784 /* 785 * Fill in first, last, so they point at the entries containing the 786 * first and last address of the range. 787 * Note that if they are not NULL, we don't perform the lookup. 788 */ 789 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 790 if (*start_ptr == NULL) { 791 *start_ptr = uvm_map_entrybyaddr(atree, addr); 792 if (*start_ptr == NULL) 793 return 0; 794 } else 795 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 796 if (*end_ptr == NULL) { 797 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 798 *end_ptr = *start_ptr; 799 else { 800 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 801 if (*end_ptr == NULL) 802 return 0; 803 } 804 } else 805 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 806 807 /* Validation. */ 808 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 809 KDASSERT((*start_ptr)->start <= addr && 810 VMMAP_FREE_END(*start_ptr) > addr && 811 (*end_ptr)->start < addr + sz && 812 VMMAP_FREE_END(*end_ptr) >= addr + sz); 813 814 /* 815 * Check the none of the entries intersects with <addr, addr+sz>. 816 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 817 * considered unavailable unless called by those allocators. 818 */ 819 i = *start_ptr; 820 i_end = RB_NEXT(uvm_map_addr, atree, *end_ptr); 821 for (; i != i_end; 822 i = RB_NEXT(uvm_map_addr, atree, i)) { 823 if (i->start != i->end && i->end > addr) 824 return 0; 825 826 /* 827 * uaddr_exe and uaddr_brk_stack may only be used 828 * by these allocators and the NULL uaddr (i.e. no 829 * uaddr). 830 * Reject if this requirement is not met. 831 */ 832 if (uaddr != NULL) { 833 free = uvm_map_uaddr_e(map, i); 834 835 if (uaddr != free && free != NULL && 836 (free == map->uaddr_exe || 837 free == map->uaddr_brk_stack)) 838 return 0; 839 } 840 } 841 842 return -1; 843 } 844 845 /* 846 * Invoke each address selector until an address is found. 847 * Will not invoke uaddr_exe. 848 */ 849 int 850 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 851 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 852 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 853 { 854 struct uvm_addr_state *uaddr; 855 int i; 856 857 /* 858 * Allocation for sz bytes at any address, 859 * using the addr selectors in order. 860 */ 861 for (i = 0; i < nitems(map->uaddr_any); i++) { 862 uaddr = map->uaddr_any[i]; 863 864 if (uvm_addr_invoke(map, uaddr, first, last, 865 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 866 return 0; 867 } 868 869 /* Fall back to brk() and stack() address selectors. */ 870 uaddr = map->uaddr_brk_stack; 871 if (uvm_addr_invoke(map, uaddr, first, last, 872 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 873 return 0; 874 875 return ENOMEM; 876 } 877 878 /* Calculate entry augmentation value. */ 879 vsize_t 880 uvm_map_addr_augment_get(struct vm_map_entry *entry) 881 { 882 vsize_t augment; 883 struct vm_map_entry *left, *right; 884 885 augment = entry->fspace; 886 if ((left = RB_LEFT(entry, daddrs.addr_entry)) != NULL) 887 augment = MAX(augment, left->fspace_augment); 888 if ((right = RB_RIGHT(entry, daddrs.addr_entry)) != NULL) 889 augment = MAX(augment, right->fspace_augment); 890 return augment; 891 } 892 893 /* 894 * Update augmentation data in entry. 895 */ 896 void 897 uvm_map_addr_augment(struct vm_map_entry *entry) 898 { 899 vsize_t augment; 900 901 while (entry != NULL) { 902 /* Calculate value for augmentation. */ 903 augment = uvm_map_addr_augment_get(entry); 904 905 /* 906 * Descend update. 907 * Once we find an entry that already has the correct value, 908 * stop, since it means all its parents will use the correct 909 * value too. 910 */ 911 if (entry->fspace_augment == augment) 912 return; 913 entry->fspace_augment = augment; 914 entry = RB_PARENT(entry, daddrs.addr_entry); 915 } 916 } 917 918 /* 919 * uvm_mapanon: establish a valid mapping in map for an anon 920 * 921 * => *addr and sz must be a multiple of PAGE_SIZE. 922 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 923 * => map must be unlocked. 924 * 925 * => align: align vaddr, must be a power-of-2. 926 * Align is only a hint and will be ignored if the alignment fails. 927 */ 928 int 929 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, 930 vsize_t align, unsigned int flags) 931 { 932 struct vm_map_entry *first, *last, *entry, *new; 933 struct uvm_map_deadq dead; 934 vm_prot_t prot; 935 vm_prot_t maxprot; 936 vm_inherit_t inherit; 937 int advice; 938 int error; 939 vaddr_t pmap_align, pmap_offset; 940 vaddr_t hint; 941 942 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE); 943 KASSERT(map != kernel_map); 944 KASSERT((map->flags & UVM_FLAG_HOLE) == 0); 945 946 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 947 splassert(IPL_NONE); 948 949 /* 950 * We use pmap_align and pmap_offset as alignment and offset variables. 951 * 952 * Because the align parameter takes precedence over pmap prefer, 953 * the pmap_align will need to be set to align, with pmap_offset = 0, 954 * if pmap_prefer will not align. 955 */ 956 pmap_align = MAX(align, PAGE_SIZE); 957 pmap_offset = 0; 958 959 /* Decode parameters. */ 960 prot = UVM_PROTECTION(flags); 961 maxprot = UVM_MAXPROTECTION(flags); 962 advice = UVM_ADVICE(flags); 963 inherit = UVM_INHERIT(flags); 964 error = 0; 965 hint = trunc_page(*addr); 966 TAILQ_INIT(&dead); 967 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 968 KASSERT((align & (align - 1)) == 0); 969 970 /* Check protection. */ 971 if ((prot & maxprot) != prot) 972 return EACCES; 973 974 /* 975 * Before grabbing the lock, allocate a map entry for later 976 * use to ensure we don't wait for memory while holding the 977 * vm_map_lock. 978 */ 979 new = uvm_mapent_alloc(map, flags); 980 if (new == NULL) 981 return(ENOMEM); 982 983 if (flags & UVM_FLAG_TRYLOCK) { 984 if (vm_map_lock_try(map) == FALSE) { 985 error = EFAULT; 986 goto out; 987 } 988 } else 989 vm_map_lock(map); 990 991 first = last = NULL; 992 if (flags & UVM_FLAG_FIXED) { 993 /* 994 * Fixed location. 995 * 996 * Note: we ignore align, pmap_prefer. 997 * Fill in first, last and *addr. 998 */ 999 KASSERT((*addr & PAGE_MASK) == 0); 1000 1001 /* Check that the space is available. */ 1002 if (flags & UVM_FLAG_UNMAP) 1003 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1004 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1005 error = ENOMEM; 1006 goto unlock; 1007 } 1008 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1009 (align == 0 || (*addr & (align - 1)) == 0) && 1010 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1011 /* 1012 * Address used as hint. 1013 * 1014 * Note: we enforce the alignment restriction, 1015 * but ignore pmap_prefer. 1016 */ 1017 } else if ((maxprot & PROT_EXEC) != 0 && 1018 map->uaddr_exe != NULL) { 1019 /* Run selection algorithm for executables. */ 1020 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1021 addr, sz, pmap_align, pmap_offset, prot, hint); 1022 1023 if (error != 0) 1024 goto unlock; 1025 } else { 1026 /* Update freelists from vmspace. */ 1027 uvm_map_vmspace_update(map, &dead, flags); 1028 1029 error = uvm_map_findspace(map, &first, &last, addr, sz, 1030 pmap_align, pmap_offset, prot, hint); 1031 1032 if (error != 0) 1033 goto unlock; 1034 } 1035 1036 /* If we only want a query, return now. */ 1037 if (flags & UVM_FLAG_QUERY) { 1038 error = 0; 1039 goto unlock; 1040 } 1041 1042 /* 1043 * Create new entry. 1044 * first and last may be invalidated after this call. 1045 */ 1046 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1047 new); 1048 if (entry == NULL) { 1049 error = ENOMEM; 1050 goto unlock; 1051 } 1052 new = NULL; 1053 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1054 entry->object.uvm_obj = NULL; 1055 entry->offset = 0; 1056 entry->protection = prot; 1057 entry->max_protection = maxprot; 1058 entry->inheritance = inherit; 1059 entry->wired_count = 0; 1060 entry->advice = advice; 1061 if (flags & UVM_FLAG_NOFAULT) 1062 entry->etype |= UVM_ET_NOFAULT; 1063 if (flags & UVM_FLAG_COPYONW) { 1064 entry->etype |= UVM_ET_COPYONWRITE; 1065 if ((flags & UVM_FLAG_OVERLAY) == 0) 1066 entry->etype |= UVM_ET_NEEDSCOPY; 1067 } 1068 if (flags & UVM_FLAG_OVERLAY) { 1069 KERNEL_LOCK(); 1070 entry->aref.ar_pageoff = 0; 1071 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1072 KERNEL_UNLOCK(); 1073 } 1074 1075 /* Update map and process statistics. */ 1076 map->size += sz; 1077 ((struct vmspace *)map)->vm_dused += uvmspace_dused(map, *addr, *addr + sz); 1078 1079 unlock: 1080 vm_map_unlock(map); 1081 1082 /* 1083 * Remove dead entries. 1084 * 1085 * Dead entries may be the result of merging. 1086 * uvm_map_mkentry may also create dead entries, when it attempts to 1087 * destroy free-space entries. 1088 */ 1089 uvm_unmap_detach(&dead, 0); 1090 out: 1091 if (new) 1092 uvm_mapent_free(new); 1093 return error; 1094 } 1095 1096 /* 1097 * uvm_map: establish a valid mapping in map 1098 * 1099 * => *addr and sz must be a multiple of PAGE_SIZE. 1100 * => map must be unlocked. 1101 * => <uobj,uoffset> value meanings (4 cases): 1102 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1103 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1104 * [3] <uobj,uoffset> == normal mapping 1105 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1106 * 1107 * case [4] is for kernel mappings where we don't know the offset until 1108 * we've found a virtual address. note that kernel object offsets are 1109 * always relative to vm_map_min(kernel_map). 1110 * 1111 * => align: align vaddr, must be a power-of-2. 1112 * Align is only a hint and will be ignored if the alignment fails. 1113 */ 1114 int 1115 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 1116 struct uvm_object *uobj, voff_t uoffset, 1117 vsize_t align, unsigned int flags) 1118 { 1119 struct vm_map_entry *first, *last, *entry, *new; 1120 struct uvm_map_deadq dead; 1121 vm_prot_t prot; 1122 vm_prot_t maxprot; 1123 vm_inherit_t inherit; 1124 int advice; 1125 int error; 1126 vaddr_t pmap_align, pmap_offset; 1127 vaddr_t hint; 1128 1129 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1130 splassert(IPL_NONE); 1131 else 1132 splassert(IPL_VM); 1133 1134 /* 1135 * We use pmap_align and pmap_offset as alignment and offset variables. 1136 * 1137 * Because the align parameter takes precedence over pmap prefer, 1138 * the pmap_align will need to be set to align, with pmap_offset = 0, 1139 * if pmap_prefer will not align. 1140 */ 1141 if (uoffset == UVM_UNKNOWN_OFFSET) { 1142 pmap_align = MAX(align, PAGE_SIZE); 1143 pmap_offset = 0; 1144 } else { 1145 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 1146 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 1147 1148 if (align == 0 || 1149 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 1150 /* pmap_offset satisfies align, no change. */ 1151 } else { 1152 /* Align takes precedence over pmap prefer. */ 1153 pmap_align = align; 1154 pmap_offset = 0; 1155 } 1156 } 1157 1158 /* Decode parameters. */ 1159 prot = UVM_PROTECTION(flags); 1160 maxprot = UVM_MAXPROTECTION(flags); 1161 advice = UVM_ADVICE(flags); 1162 inherit = UVM_INHERIT(flags); 1163 error = 0; 1164 hint = trunc_page(*addr); 1165 TAILQ_INIT(&dead); 1166 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1167 KASSERT((align & (align - 1)) == 0); 1168 1169 /* Holes are incompatible with other types of mappings. */ 1170 if (flags & UVM_FLAG_HOLE) { 1171 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 1172 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 1173 } 1174 1175 /* Unset hint for kernel_map non-fixed allocations. */ 1176 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1177 hint = 0; 1178 1179 /* Check protection. */ 1180 if ((prot & maxprot) != prot) 1181 return EACCES; 1182 1183 if (map == kernel_map && 1184 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1185 panic("uvm_map: kernel map W^X violation requested"); 1186 1187 /* 1188 * Before grabbing the lock, allocate a map entry for later 1189 * use to ensure we don't wait for memory while holding the 1190 * vm_map_lock. 1191 */ 1192 new = uvm_mapent_alloc(map, flags); 1193 if (new == NULL) 1194 return(ENOMEM); 1195 1196 if (flags & UVM_FLAG_TRYLOCK) { 1197 if (vm_map_lock_try(map) == FALSE) { 1198 error = EFAULT; 1199 goto out; 1200 } 1201 } else { 1202 vm_map_lock(map); 1203 } 1204 1205 first = last = NULL; 1206 if (flags & UVM_FLAG_FIXED) { 1207 /* 1208 * Fixed location. 1209 * 1210 * Note: we ignore align, pmap_prefer. 1211 * Fill in first, last and *addr. 1212 */ 1213 KASSERT((*addr & PAGE_MASK) == 0); 1214 1215 /* 1216 * Grow pmap to include allocated address. 1217 * If the growth fails, the allocation will fail too. 1218 */ 1219 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1220 uvm_maxkaddr < (*addr + sz)) { 1221 uvm_map_kmem_grow(map, &dead, 1222 *addr + sz - uvm_maxkaddr, flags); 1223 } 1224 1225 /* Check that the space is available. */ 1226 if (flags & UVM_FLAG_UNMAP) 1227 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1228 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1229 error = ENOMEM; 1230 goto unlock; 1231 } 1232 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1233 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1234 (align == 0 || (*addr & (align - 1)) == 0) && 1235 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1236 /* 1237 * Address used as hint. 1238 * 1239 * Note: we enforce the alignment restriction, 1240 * but ignore pmap_prefer. 1241 */ 1242 } else if ((maxprot & PROT_EXEC) != 0 && 1243 map->uaddr_exe != NULL) { 1244 /* Run selection algorithm for executables. */ 1245 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1246 addr, sz, pmap_align, pmap_offset, prot, hint); 1247 1248 /* Grow kernel memory and try again. */ 1249 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1250 uvm_map_kmem_grow(map, &dead, sz, flags); 1251 1252 error = uvm_addr_invoke(map, map->uaddr_exe, 1253 &first, &last, addr, sz, 1254 pmap_align, pmap_offset, prot, hint); 1255 } 1256 1257 if (error != 0) 1258 goto unlock; 1259 } else { 1260 /* Update freelists from vmspace. */ 1261 if (map->flags & VM_MAP_ISVMSPACE) 1262 uvm_map_vmspace_update(map, &dead, flags); 1263 1264 error = uvm_map_findspace(map, &first, &last, addr, sz, 1265 pmap_align, pmap_offset, prot, hint); 1266 1267 /* Grow kernel memory and try again. */ 1268 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1269 uvm_map_kmem_grow(map, &dead, sz, flags); 1270 1271 error = uvm_map_findspace(map, &first, &last, addr, sz, 1272 pmap_align, pmap_offset, prot, hint); 1273 } 1274 1275 if (error != 0) 1276 goto unlock; 1277 } 1278 1279 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1280 uvm_maxkaddr >= *addr + sz); 1281 1282 /* If we only want a query, return now. */ 1283 if (flags & UVM_FLAG_QUERY) { 1284 error = 0; 1285 goto unlock; 1286 } 1287 1288 if (uobj == NULL) 1289 uoffset = 0; 1290 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1291 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1292 uoffset = *addr - vm_map_min(kernel_map); 1293 } 1294 1295 /* 1296 * Create new entry. 1297 * first and last may be invalidated after this call. 1298 */ 1299 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1300 new); 1301 if (entry == NULL) { 1302 error = ENOMEM; 1303 goto unlock; 1304 } 1305 new = NULL; 1306 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1307 entry->object.uvm_obj = uobj; 1308 entry->offset = uoffset; 1309 entry->protection = prot; 1310 entry->max_protection = maxprot; 1311 entry->inheritance = inherit; 1312 entry->wired_count = 0; 1313 entry->advice = advice; 1314 if (uobj) 1315 entry->etype |= UVM_ET_OBJ; 1316 else if (flags & UVM_FLAG_HOLE) 1317 entry->etype |= UVM_ET_HOLE; 1318 if (flags & UVM_FLAG_NOFAULT) 1319 entry->etype |= UVM_ET_NOFAULT; 1320 if (flags & UVM_FLAG_COPYONW) { 1321 entry->etype |= UVM_ET_COPYONWRITE; 1322 if ((flags & UVM_FLAG_OVERLAY) == 0) 1323 entry->etype |= UVM_ET_NEEDSCOPY; 1324 } 1325 if (flags & UVM_FLAG_OVERLAY) { 1326 entry->aref.ar_pageoff = 0; 1327 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1328 } 1329 1330 /* Update map and process statistics. */ 1331 if (!(flags & UVM_FLAG_HOLE)) { 1332 map->size += sz; 1333 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) { 1334 ((struct vmspace *)map)->vm_dused += 1335 uvmspace_dused(map, *addr, *addr + sz); 1336 } 1337 } 1338 1339 /* 1340 * Try to merge entry. 1341 * 1342 * Userland allocations are kept separated most of the time. 1343 * Forego the effort of merging what most of the time can't be merged 1344 * and only try the merge if it concerns a kernel entry. 1345 */ 1346 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1347 (map->flags & VM_MAP_ISVMSPACE) == 0) 1348 uvm_mapent_tryjoin(map, entry, &dead); 1349 1350 unlock: 1351 vm_map_unlock(map); 1352 1353 /* 1354 * Remove dead entries. 1355 * 1356 * Dead entries may be the result of merging. 1357 * uvm_map_mkentry may also create dead entries, when it attempts to 1358 * destroy free-space entries. 1359 */ 1360 if (map->flags & VM_MAP_INTRSAFE) 1361 uvm_unmap_detach_intrsafe(&dead); 1362 else 1363 uvm_unmap_detach(&dead, 0); 1364 out: 1365 if (new) 1366 uvm_mapent_free(new); 1367 return error; 1368 } 1369 1370 /* 1371 * True iff e1 and e2 can be joined together. 1372 */ 1373 int 1374 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1375 struct vm_map_entry *e2) 1376 { 1377 KDASSERT(e1 != NULL && e2 != NULL); 1378 1379 /* Must be the same entry type and not have free memory between. */ 1380 if (e1->etype != e2->etype || e1->end != e2->start) 1381 return 0; 1382 1383 /* Submaps are never joined. */ 1384 if (UVM_ET_ISSUBMAP(e1)) 1385 return 0; 1386 1387 /* Never merge wired memory. */ 1388 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1389 return 0; 1390 1391 /* Protection, inheritance and advice must be equal. */ 1392 if (e1->protection != e2->protection || 1393 e1->max_protection != e2->max_protection || 1394 e1->inheritance != e2->inheritance || 1395 e1->advice != e2->advice) 1396 return 0; 1397 1398 /* If uvm_object: object itself and offsets within object must match. */ 1399 if (UVM_ET_ISOBJ(e1)) { 1400 if (e1->object.uvm_obj != e2->object.uvm_obj) 1401 return 0; 1402 if (e1->offset + (e1->end - e1->start) != e2->offset) 1403 return 0; 1404 } 1405 1406 /* 1407 * Cannot join shared amaps. 1408 * Note: no need to lock amap to look at refs, since we don't care 1409 * about its exact value. 1410 * If it is 1 (i.e. we have the only reference) it will stay there. 1411 */ 1412 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1413 return 0; 1414 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1415 return 0; 1416 1417 /* Apprently, e1 and e2 match. */ 1418 return 1; 1419 } 1420 1421 /* 1422 * Join support function. 1423 * 1424 * Returns the merged entry on succes. 1425 * Returns NULL if the merge failed. 1426 */ 1427 struct vm_map_entry* 1428 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1429 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1430 { 1431 struct uvm_addr_state *free; 1432 1433 /* 1434 * Merging is not supported for map entries that 1435 * contain an amap in e1. This should never happen 1436 * anyway, because only kernel entries are merged. 1437 * These do not contain amaps. 1438 * e2 contains no real information in its amap, 1439 * so it can be erased immediately. 1440 */ 1441 KASSERT(e1->aref.ar_amap == NULL); 1442 1443 /* 1444 * Don't drop obj reference: 1445 * uvm_unmap_detach will do this for us. 1446 */ 1447 free = uvm_map_uaddr_e(map, e1); 1448 uvm_mapent_free_remove(map, free, e1); 1449 1450 free = uvm_map_uaddr_e(map, e2); 1451 uvm_mapent_free_remove(map, free, e2); 1452 uvm_mapent_addr_remove(map, e2); 1453 e1->end = e2->end; 1454 e1->guard = e2->guard; 1455 e1->fspace = e2->fspace; 1456 uvm_mapent_free_insert(map, free, e1); 1457 1458 DEAD_ENTRY_PUSH(dead, e2); 1459 return e1; 1460 } 1461 1462 /* 1463 * Attempt forward and backward joining of entry. 1464 * 1465 * Returns entry after joins. 1466 * We are guaranteed that the amap of entry is either non-existant or 1467 * has never been used. 1468 */ 1469 struct vm_map_entry* 1470 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1471 struct uvm_map_deadq *dead) 1472 { 1473 struct vm_map_entry *other; 1474 struct vm_map_entry *merged; 1475 1476 /* Merge with previous entry. */ 1477 other = RB_PREV(uvm_map_addr, &map->addr, entry); 1478 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1479 merged = uvm_mapent_merge(map, other, entry, dead); 1480 if (merged) 1481 entry = merged; 1482 } 1483 1484 /* 1485 * Merge with next entry. 1486 * 1487 * Because amap can only extend forward and the next entry 1488 * probably contains sensible info, only perform forward merging 1489 * in the absence of an amap. 1490 */ 1491 other = RB_NEXT(uvm_map_addr, &map->addr, entry); 1492 if (other && entry->aref.ar_amap == NULL && 1493 other->aref.ar_amap == NULL && 1494 uvm_mapent_isjoinable(map, entry, other)) { 1495 merged = uvm_mapent_merge(map, entry, other, dead); 1496 if (merged) 1497 entry = merged; 1498 } 1499 1500 return entry; 1501 } 1502 1503 /* 1504 * Kill entries that are no longer in a map. 1505 */ 1506 void 1507 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1508 { 1509 struct vm_map_entry *entry; 1510 int waitok = flags & UVM_PLA_WAITOK; 1511 1512 if (TAILQ_EMPTY(deadq)) 1513 return; 1514 1515 KERNEL_LOCK(); 1516 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1517 if (waitok) 1518 uvm_pause(); 1519 /* Drop reference to amap, if we've got one. */ 1520 if (entry->aref.ar_amap) 1521 amap_unref(entry->aref.ar_amap, 1522 entry->aref.ar_pageoff, 1523 atop(entry->end - entry->start), 1524 flags & AMAP_REFALL); 1525 1526 /* Drop reference to our backing object, if we've got one. */ 1527 if (UVM_ET_ISSUBMAP(entry)) { 1528 /* ... unlikely to happen, but play it safe */ 1529 uvm_map_deallocate(entry->object.sub_map); 1530 } else if (UVM_ET_ISOBJ(entry) && 1531 entry->object.uvm_obj->pgops->pgo_detach) { 1532 entry->object.uvm_obj->pgops->pgo_detach( 1533 entry->object.uvm_obj); 1534 } 1535 1536 /* Step to next. */ 1537 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1538 uvm_mapent_free(entry); 1539 } 1540 KERNEL_UNLOCK(); 1541 } 1542 1543 void 1544 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq) 1545 { 1546 struct vm_map_entry *entry; 1547 1548 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1549 KASSERT(entry->aref.ar_amap == NULL); 1550 KASSERT(!UVM_ET_ISSUBMAP(entry)); 1551 KASSERT(!UVM_ET_ISOBJ(entry)); 1552 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1553 uvm_mapent_free(entry); 1554 } 1555 } 1556 1557 /* 1558 * Create and insert new entry. 1559 * 1560 * Returned entry contains new addresses and is inserted properly in the tree. 1561 * first and last are (probably) no longer valid. 1562 */ 1563 struct vm_map_entry* 1564 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1565 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1566 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1567 { 1568 struct vm_map_entry *entry, *prev; 1569 struct uvm_addr_state *free; 1570 vaddr_t min, max; /* free space boundaries for new entry */ 1571 1572 KDASSERT(map != NULL); 1573 KDASSERT(first != NULL); 1574 KDASSERT(last != NULL); 1575 KDASSERT(dead != NULL); 1576 KDASSERT(sz > 0); 1577 KDASSERT(addr + sz > addr); 1578 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1579 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1580 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1581 uvm_tree_sanity(map, __FILE__, __LINE__); 1582 1583 min = addr + sz; 1584 max = VMMAP_FREE_END(last); 1585 1586 /* Initialize new entry. */ 1587 if (new == NULL) 1588 entry = uvm_mapent_alloc(map, flags); 1589 else 1590 entry = new; 1591 if (entry == NULL) 1592 return NULL; 1593 entry->offset = 0; 1594 entry->etype = 0; 1595 entry->wired_count = 0; 1596 entry->aref.ar_pageoff = 0; 1597 entry->aref.ar_amap = NULL; 1598 1599 entry->start = addr; 1600 entry->end = min; 1601 entry->guard = 0; 1602 entry->fspace = 0; 1603 1604 /* Reset free space in first. */ 1605 free = uvm_map_uaddr_e(map, first); 1606 uvm_mapent_free_remove(map, free, first); 1607 first->guard = 0; 1608 first->fspace = 0; 1609 1610 /* 1611 * Remove all entries that are fully replaced. 1612 * We are iterating using last in reverse order. 1613 */ 1614 for (; first != last; last = prev) { 1615 prev = RB_PREV(uvm_map_addr, &map->addr, last); 1616 1617 KDASSERT(last->start == last->end); 1618 free = uvm_map_uaddr_e(map, last); 1619 uvm_mapent_free_remove(map, free, last); 1620 uvm_mapent_addr_remove(map, last); 1621 DEAD_ENTRY_PUSH(dead, last); 1622 } 1623 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1624 if (first->start == addr) { 1625 uvm_mapent_addr_remove(map, first); 1626 DEAD_ENTRY_PUSH(dead, first); 1627 } else { 1628 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1629 addr, flags); 1630 } 1631 1632 /* Finally, link in entry. */ 1633 uvm_mapent_addr_insert(map, entry); 1634 uvm_map_fix_space(map, entry, min, max, flags); 1635 1636 uvm_tree_sanity(map, __FILE__, __LINE__); 1637 return entry; 1638 } 1639 1640 1641 /* 1642 * uvm_mapent_alloc: allocate a map entry 1643 */ 1644 struct vm_map_entry * 1645 uvm_mapent_alloc(struct vm_map *map, int flags) 1646 { 1647 struct vm_map_entry *me, *ne; 1648 int pool_flags; 1649 int i; 1650 1651 pool_flags = PR_WAITOK; 1652 if (flags & UVM_FLAG_TRYLOCK) 1653 pool_flags = PR_NOWAIT; 1654 1655 if (map->flags & VM_MAP_INTRSAFE || cold) { 1656 mtx_enter(&uvm_kmapent_mtx); 1657 me = uvm.kentry_free; 1658 if (me == NULL) { 1659 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1660 &kd_nowait); 1661 if (ne == NULL) 1662 panic("uvm_mapent_alloc: cannot allocate map " 1663 "entry"); 1664 for (i = 0; 1665 i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1; 1666 i++) 1667 RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1]; 1668 RB_LEFT(&ne[i], daddrs.addr_entry) = NULL; 1669 me = ne; 1670 if (ratecheck(&uvm_kmapent_last_warn_time, 1671 &uvm_kmapent_warn_rate)) 1672 printf("uvm_mapent_alloc: out of static " 1673 "map entries\n"); 1674 } 1675 uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry); 1676 uvmexp.kmapent++; 1677 mtx_leave(&uvm_kmapent_mtx); 1678 me->flags = UVM_MAP_STATIC; 1679 } else if (map == kernel_map) { 1680 splassert(IPL_NONE); 1681 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1682 if (me == NULL) 1683 goto out; 1684 me->flags = UVM_MAP_KMEM; 1685 } else { 1686 splassert(IPL_NONE); 1687 me = pool_get(&uvm_map_entry_pool, pool_flags); 1688 if (me == NULL) 1689 goto out; 1690 me->flags = 0; 1691 } 1692 1693 if (me != NULL) { 1694 RB_LEFT(me, daddrs.addr_entry) = 1695 RB_RIGHT(me, daddrs.addr_entry) = 1696 RB_PARENT(me, daddrs.addr_entry) = UVMMAP_DEADBEEF; 1697 } 1698 1699 out: 1700 return(me); 1701 } 1702 1703 /* 1704 * uvm_mapent_free: free map entry 1705 * 1706 * => XXX: static pool for kernel map? 1707 */ 1708 void 1709 uvm_mapent_free(struct vm_map_entry *me) 1710 { 1711 if (me->flags & UVM_MAP_STATIC) { 1712 mtx_enter(&uvm_kmapent_mtx); 1713 RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free; 1714 uvm.kentry_free = me; 1715 uvmexp.kmapent--; 1716 mtx_leave(&uvm_kmapent_mtx); 1717 } else if (me->flags & UVM_MAP_KMEM) { 1718 splassert(IPL_NONE); 1719 pool_put(&uvm_map_entry_kmem_pool, me); 1720 } else { 1721 splassert(IPL_NONE); 1722 pool_put(&uvm_map_entry_pool, me); 1723 } 1724 } 1725 1726 /* 1727 * uvm_map_lookup_entry: find map entry at or before an address. 1728 * 1729 * => map must at least be read-locked by caller 1730 * => entry is returned in "entry" 1731 * => return value is true if address is in the returned entry 1732 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1733 * returned for those mappings. 1734 */ 1735 boolean_t 1736 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1737 struct vm_map_entry **entry) 1738 { 1739 *entry = uvm_map_entrybyaddr(&map->addr, address); 1740 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1741 (*entry)->start <= address && (*entry)->end > address; 1742 } 1743 1744 /* 1745 * uvm_map_pie: return a random load address for a PIE executable 1746 * properly aligned. 1747 */ 1748 #ifndef VM_PIE_MAX_ADDR 1749 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1750 #endif 1751 1752 #ifndef VM_PIE_MIN_ADDR 1753 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1754 #endif 1755 1756 #ifndef VM_PIE_MIN_ALIGN 1757 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1758 #endif 1759 1760 vaddr_t 1761 uvm_map_pie(vaddr_t align) 1762 { 1763 vaddr_t addr, space, min; 1764 1765 align = MAX(align, VM_PIE_MIN_ALIGN); 1766 1767 /* round up to next alignment */ 1768 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1769 1770 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 1771 return (align); 1772 1773 space = (VM_PIE_MAX_ADDR - min) / align; 1774 space = MIN(space, (u_int32_t)-1); 1775 1776 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 1777 addr += min; 1778 1779 return (addr); 1780 } 1781 1782 void 1783 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 1784 { 1785 struct uvm_map_deadq dead; 1786 1787 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 1788 (end & (vaddr_t)PAGE_MASK) == 0); 1789 TAILQ_INIT(&dead); 1790 vm_map_lock(map); 1791 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); 1792 vm_map_unlock(map); 1793 1794 if (map->flags & VM_MAP_INTRSAFE) 1795 uvm_unmap_detach_intrsafe(&dead); 1796 else 1797 uvm_unmap_detach(&dead, 0); 1798 } 1799 1800 /* 1801 * Mark entry as free. 1802 * 1803 * entry will be put on the dead list. 1804 * The free space will be merged into the previous or a new entry, 1805 * unless markfree is false. 1806 */ 1807 void 1808 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 1809 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 1810 boolean_t markfree) 1811 { 1812 struct uvm_addr_state *free; 1813 struct vm_map_entry *prev; 1814 vaddr_t addr; /* Start of freed range. */ 1815 vaddr_t end; /* End of freed range. */ 1816 1817 prev = *prev_ptr; 1818 if (prev == entry) 1819 *prev_ptr = prev = NULL; 1820 1821 if (prev == NULL || 1822 VMMAP_FREE_END(prev) != entry->start) 1823 prev = RB_PREV(uvm_map_addr, &map->addr, entry); 1824 1825 /* Entry is describing only free memory and has nothing to drain into. */ 1826 if (prev == NULL && entry->start == entry->end && markfree) { 1827 *prev_ptr = entry; 1828 return; 1829 } 1830 1831 addr = entry->start; 1832 end = VMMAP_FREE_END(entry); 1833 free = uvm_map_uaddr_e(map, entry); 1834 uvm_mapent_free_remove(map, free, entry); 1835 uvm_mapent_addr_remove(map, entry); 1836 DEAD_ENTRY_PUSH(dead, entry); 1837 1838 if (markfree) { 1839 if (prev) { 1840 free = uvm_map_uaddr_e(map, prev); 1841 uvm_mapent_free_remove(map, free, prev); 1842 } 1843 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 1844 } 1845 } 1846 1847 /* 1848 * Unwire and release referenced amap and object from map entry. 1849 */ 1850 void 1851 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 1852 { 1853 /* Unwire removed map entry. */ 1854 if (VM_MAPENT_ISWIRED(entry)) { 1855 KERNEL_LOCK(); 1856 entry->wired_count = 0; 1857 uvm_fault_unwire_locked(map, entry->start, entry->end); 1858 KERNEL_UNLOCK(); 1859 } 1860 1861 /* Entry-type specific code. */ 1862 if (UVM_ET_ISHOLE(entry)) { 1863 /* Nothing to be done for holes. */ 1864 } else if (map->flags & VM_MAP_INTRSAFE) { 1865 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1866 uvm_km_pgremove_intrsafe(entry->start, entry->end); 1867 pmap_kremove(entry->start, entry->end - entry->start); 1868 } else if (UVM_ET_ISOBJ(entry) && 1869 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 1870 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1871 /* 1872 * Note: kernel object mappings are currently used in 1873 * two ways: 1874 * [1] "normal" mappings of pages in the kernel object 1875 * [2] uvm_km_valloc'd allocations in which we 1876 * pmap_enter in some non-kernel-object page 1877 * (e.g. vmapbuf). 1878 * 1879 * for case [1], we need to remove the mapping from 1880 * the pmap and then remove the page from the kernel 1881 * object (because, once pages in a kernel object are 1882 * unmapped they are no longer needed, unlike, say, 1883 * a vnode where you might want the data to persist 1884 * until flushed out of a queue). 1885 * 1886 * for case [2], we need to remove the mapping from 1887 * the pmap. there shouldn't be any pages at the 1888 * specified offset in the kernel object [but it 1889 * doesn't hurt to call uvm_km_pgremove just to be 1890 * safe?] 1891 * 1892 * uvm_km_pgremove currently does the following: 1893 * for pages in the kernel object range: 1894 * - drops the swap slot 1895 * - uvm_pagefree the page 1896 * 1897 * note there is version of uvm_km_pgremove() that 1898 * is used for "intrsafe" objects. 1899 */ 1900 /* 1901 * remove mappings from pmap and drop the pages 1902 * from the object. offsets are always relative 1903 * to vm_map_min(kernel_map). 1904 */ 1905 pmap_remove(pmap_kernel(), entry->start, entry->end); 1906 uvm_km_pgremove(entry->object.uvm_obj, 1907 entry->start - vm_map_min(kernel_map), 1908 entry->end - vm_map_min(kernel_map)); 1909 1910 /* 1911 * null out kernel_object reference, we've just 1912 * dropped it 1913 */ 1914 entry->etype &= ~UVM_ET_OBJ; 1915 entry->object.uvm_obj = NULL; /* to be safe */ 1916 } else { 1917 /* remove mappings the standard way. */ 1918 pmap_remove(map->pmap, entry->start, entry->end); 1919 } 1920 } 1921 1922 /* 1923 * Remove all entries from start to end. 1924 * 1925 * If remove_holes, then remove ET_HOLE entries as well. 1926 * If markfree, entry will be properly marked free, otherwise, no replacement 1927 * entry will be put in the tree (corrupting the tree). 1928 */ 1929 void 1930 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 1931 struct uvm_map_deadq *dead, boolean_t remove_holes, 1932 boolean_t markfree) 1933 { 1934 struct vm_map_entry *prev_hint, *next, *entry; 1935 1936 start = MAX(start, map->min_offset); 1937 end = MIN(end, map->max_offset); 1938 if (start >= end) 1939 return; 1940 1941 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1942 splassert(IPL_NONE); 1943 else 1944 splassert(IPL_VM); 1945 1946 /* Find first affected entry. */ 1947 entry = uvm_map_entrybyaddr(&map->addr, start); 1948 KDASSERT(entry != NULL && entry->start <= start); 1949 if (entry->end <= start && markfree) 1950 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 1951 else 1952 UVM_MAP_CLIP_START(map, entry, start); 1953 1954 /* 1955 * Iterate entries until we reach end address. 1956 * prev_hint hints where the freed space can be appended to. 1957 */ 1958 prev_hint = NULL; 1959 for (; entry != NULL && entry->start < end; entry = next) { 1960 KDASSERT(entry->start >= start); 1961 if (entry->end > end || !markfree) 1962 UVM_MAP_CLIP_END(map, entry, end); 1963 KDASSERT(entry->start >= start && entry->end <= end); 1964 next = RB_NEXT(uvm_map_addr, &map->addr, entry); 1965 1966 /* Don't remove holes unless asked to do so. */ 1967 if (UVM_ET_ISHOLE(entry)) { 1968 if (!remove_holes) { 1969 prev_hint = entry; 1970 continue; 1971 } 1972 } 1973 1974 /* Kill entry. */ 1975 uvm_unmap_kill_entry(map, entry); 1976 1977 /* Update space usage. */ 1978 if ((map->flags & VM_MAP_ISVMSPACE) && 1979 entry->object.uvm_obj == NULL && 1980 !UVM_ET_ISHOLE(entry)) { 1981 ((struct vmspace *)map)->vm_dused -= 1982 uvmspace_dused(map, entry->start, entry->end); 1983 } 1984 if (!UVM_ET_ISHOLE(entry)) 1985 map->size -= entry->end - entry->start; 1986 1987 /* Actual removal of entry. */ 1988 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 1989 } 1990 1991 pmap_update(vm_map_pmap(map)); 1992 1993 #ifdef VMMAP_DEBUG 1994 if (markfree) { 1995 for (entry = uvm_map_entrybyaddr(&map->addr, start); 1996 entry != NULL && entry->start < end; 1997 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 1998 KDASSERT(entry->end <= start || 1999 entry->start == entry->end || 2000 UVM_ET_ISHOLE(entry)); 2001 } 2002 } else { 2003 vaddr_t a; 2004 for (a = start; a < end; a += PAGE_SIZE) 2005 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 2006 } 2007 #endif 2008 } 2009 2010 /* 2011 * Mark all entries from first until end (exclusive) as pageable. 2012 * 2013 * Lock must be exclusive on entry and will not be touched. 2014 */ 2015 void 2016 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 2017 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 2018 { 2019 struct vm_map_entry *iter; 2020 2021 for (iter = first; iter != end; 2022 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 2023 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2024 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2025 continue; 2026 2027 iter->wired_count = 0; 2028 uvm_fault_unwire_locked(map, iter->start, iter->end); 2029 } 2030 } 2031 2032 /* 2033 * Mark all entries from first until end (exclusive) as wired. 2034 * 2035 * Lockflags determines the lock state on return from this function. 2036 * Lock must be exclusive on entry. 2037 */ 2038 int 2039 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 2040 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 2041 int lockflags) 2042 { 2043 struct vm_map_entry *iter; 2044 #ifdef DIAGNOSTIC 2045 unsigned int timestamp_save; 2046 #endif 2047 int error; 2048 2049 /* 2050 * Wire pages in two passes: 2051 * 2052 * 1: holding the write lock, we create any anonymous maps that need 2053 * to be created. then we clip each map entry to the region to 2054 * be wired and increment its wiring count. 2055 * 2056 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2057 * in the pages for any newly wired area (wired_count == 1). 2058 * 2059 * downgrading to a read lock for uvm_fault_wire avoids a possible 2060 * deadlock with another thread that may have faulted on one of 2061 * the pages to be wired (it would mark the page busy, blocking 2062 * us, then in turn block on the map lock that we hold). 2063 * because we keep the read lock on the map, the copy-on-write 2064 * status of the entries we modify here cannot change. 2065 */ 2066 for (iter = first; iter != end; 2067 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 2068 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2069 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2070 iter->protection == PROT_NONE) 2071 continue; 2072 2073 /* 2074 * Perform actions of vm_map_lookup that need the write lock. 2075 * - create an anonymous map for copy-on-write 2076 * - anonymous map for zero-fill 2077 * Skip submaps. 2078 */ 2079 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 2080 UVM_ET_ISNEEDSCOPY(iter) && 2081 ((iter->protection & PROT_WRITE) || 2082 iter->object.uvm_obj == NULL)) { 2083 amap_copy(map, iter, M_WAITOK, TRUE, 2084 iter->start, iter->end); 2085 } 2086 iter->wired_count++; 2087 } 2088 2089 /* 2090 * Pass 2. 2091 */ 2092 #ifdef DIAGNOSTIC 2093 timestamp_save = map->timestamp; 2094 #endif 2095 vm_map_busy(map); 2096 vm_map_downgrade(map); 2097 2098 error = 0; 2099 for (iter = first; error == 0 && iter != end; 2100 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 2101 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2102 iter->protection == PROT_NONE) 2103 continue; 2104 2105 error = uvm_fault_wire(map, iter->start, iter->end, 2106 iter->protection); 2107 } 2108 2109 if (error) { 2110 /* 2111 * uvm_fault_wire failure 2112 * 2113 * Reacquire lock and undo our work. 2114 */ 2115 vm_map_upgrade(map); 2116 vm_map_unbusy(map); 2117 #ifdef DIAGNOSTIC 2118 if (timestamp_save != map->timestamp) 2119 panic("uvm_map_pageable_wire: stale map"); 2120 #endif 2121 2122 /* 2123 * first is no longer needed to restart loops. 2124 * Use it as iterator to unmap successful mappings. 2125 */ 2126 for (; first != iter; 2127 first = RB_NEXT(uvm_map_addr, &map->addr, first)) { 2128 if (UVM_ET_ISHOLE(first) || 2129 first->start == first->end || 2130 first->protection == PROT_NONE) 2131 continue; 2132 2133 first->wired_count--; 2134 if (!VM_MAPENT_ISWIRED(first)) { 2135 uvm_fault_unwire_locked(map, 2136 iter->start, iter->end); 2137 } 2138 } 2139 2140 /* decrease counter in the rest of the entries */ 2141 for (; iter != end; 2142 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 2143 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2144 iter->protection == PROT_NONE) 2145 continue; 2146 2147 iter->wired_count--; 2148 } 2149 2150 if ((lockflags & UVM_LK_EXIT) == 0) 2151 vm_map_unlock(map); 2152 return error; 2153 } 2154 2155 /* We are currently holding a read lock. */ 2156 if ((lockflags & UVM_LK_EXIT) == 0) { 2157 vm_map_unbusy(map); 2158 vm_map_unlock_read(map); 2159 } else { 2160 vm_map_upgrade(map); 2161 vm_map_unbusy(map); 2162 #ifdef DIAGNOSTIC 2163 if (timestamp_save != map->timestamp) 2164 panic("uvm_map_pageable_wire: stale map"); 2165 #endif 2166 } 2167 return 0; 2168 } 2169 2170 /* 2171 * uvm_map_pageable: set pageability of a range in a map. 2172 * 2173 * Flags: 2174 * UVM_LK_ENTER: map is already locked by caller 2175 * UVM_LK_EXIT: don't unlock map on exit 2176 * 2177 * The full range must be in use (entries may not have fspace != 0). 2178 * UVM_ET_HOLE counts as unmapped. 2179 */ 2180 int 2181 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2182 boolean_t new_pageable, int lockflags) 2183 { 2184 struct vm_map_entry *first, *last, *tmp; 2185 int error; 2186 2187 start = trunc_page(start); 2188 end = round_page(end); 2189 2190 if (start > end) 2191 return EINVAL; 2192 if (start == end) 2193 return 0; /* nothing to do */ 2194 if (start < map->min_offset) 2195 return EFAULT; /* why? see first XXX below */ 2196 if (end > map->max_offset) 2197 return EINVAL; /* why? see second XXX below */ 2198 2199 KASSERT(map->flags & VM_MAP_PAGEABLE); 2200 if ((lockflags & UVM_LK_ENTER) == 0) 2201 vm_map_lock(map); 2202 2203 /* 2204 * Find first entry. 2205 * 2206 * Initial test on start is different, because of the different 2207 * error returned. Rest is tested further down. 2208 */ 2209 first = uvm_map_entrybyaddr(&map->addr, start); 2210 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2211 /* 2212 * XXX if the first address is not mapped, it is EFAULT? 2213 */ 2214 error = EFAULT; 2215 goto out; 2216 } 2217 2218 /* Check that the range has no holes. */ 2219 for (last = first; last != NULL && last->start < end; 2220 last = RB_NEXT(uvm_map_addr, &map->addr, last)) { 2221 if (UVM_ET_ISHOLE(last) || 2222 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2223 /* 2224 * XXX unmapped memory in range, why is it EINVAL 2225 * instead of EFAULT? 2226 */ 2227 error = EINVAL; 2228 goto out; 2229 } 2230 } 2231 2232 /* 2233 * Last ended at the first entry after the range. 2234 * Move back one step. 2235 * 2236 * Note that last may be NULL. 2237 */ 2238 if (last == NULL) { 2239 last = RB_MAX(uvm_map_addr, &map->addr); 2240 if (last->end < end) { 2241 error = EINVAL; 2242 goto out; 2243 } 2244 } else { 2245 KASSERT(last != first); 2246 last = RB_PREV(uvm_map_addr, &map->addr, last); 2247 } 2248 2249 /* Wire/unwire pages here. */ 2250 if (new_pageable) { 2251 /* 2252 * Mark pageable. 2253 * entries that are not wired are untouched. 2254 */ 2255 if (VM_MAPENT_ISWIRED(first)) 2256 UVM_MAP_CLIP_START(map, first, start); 2257 /* 2258 * Split last at end. 2259 * Make tmp be the first entry after what is to be touched. 2260 * If last is not wired, don't touch it. 2261 */ 2262 if (VM_MAPENT_ISWIRED(last)) { 2263 UVM_MAP_CLIP_END(map, last, end); 2264 tmp = RB_NEXT(uvm_map_addr, &map->addr, last); 2265 } else 2266 tmp = last; 2267 2268 uvm_map_pageable_pgon(map, first, tmp, start, end); 2269 error = 0; 2270 2271 out: 2272 if ((lockflags & UVM_LK_EXIT) == 0) 2273 vm_map_unlock(map); 2274 return error; 2275 } else { 2276 /* 2277 * Mark entries wired. 2278 * entries are always touched (because recovery needs this). 2279 */ 2280 if (!VM_MAPENT_ISWIRED(first)) 2281 UVM_MAP_CLIP_START(map, first, start); 2282 /* 2283 * Split last at end. 2284 * Make tmp be the first entry after what is to be touched. 2285 * If last is not wired, don't touch it. 2286 */ 2287 if (!VM_MAPENT_ISWIRED(last)) { 2288 UVM_MAP_CLIP_END(map, last, end); 2289 tmp = RB_NEXT(uvm_map_addr, &map->addr, last); 2290 } else 2291 tmp = last; 2292 2293 return uvm_map_pageable_wire(map, first, tmp, start, end, 2294 lockflags); 2295 } 2296 } 2297 2298 /* 2299 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2300 * all mapped regions. 2301 * 2302 * Map must not be locked. 2303 * If no flags are specified, all ragions are unwired. 2304 */ 2305 int 2306 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2307 { 2308 vsize_t size; 2309 struct vm_map_entry *iter; 2310 2311 KASSERT(map->flags & VM_MAP_PAGEABLE); 2312 vm_map_lock(map); 2313 2314 if (flags == 0) { 2315 uvm_map_pageable_pgon(map, RB_MIN(uvm_map_addr, &map->addr), 2316 NULL, map->min_offset, map->max_offset); 2317 2318 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2319 vm_map_unlock(map); 2320 return 0; 2321 } 2322 2323 if (flags & MCL_FUTURE) 2324 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2325 if (!(flags & MCL_CURRENT)) { 2326 vm_map_unlock(map); 2327 return 0; 2328 } 2329 2330 /* 2331 * Count number of pages in all non-wired entries. 2332 * If the number exceeds the limit, abort. 2333 */ 2334 size = 0; 2335 RB_FOREACH(iter, uvm_map_addr, &map->addr) { 2336 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2337 continue; 2338 2339 size += iter->end - iter->start; 2340 } 2341 2342 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2343 vm_map_unlock(map); 2344 return ENOMEM; 2345 } 2346 2347 /* XXX non-pmap_wired_count case must be handled by caller */ 2348 #ifdef pmap_wired_count 2349 if (limit != 0 && 2350 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2351 vm_map_unlock(map); 2352 return ENOMEM; 2353 } 2354 #endif 2355 2356 /* 2357 * uvm_map_pageable_wire will release lcok 2358 */ 2359 return uvm_map_pageable_wire(map, RB_MIN(uvm_map_addr, &map->addr), 2360 NULL, map->min_offset, map->max_offset, 0); 2361 } 2362 2363 /* 2364 * Initialize map. 2365 * 2366 * Allocates sufficient entries to describe the free memory in the map. 2367 */ 2368 void 2369 uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags) 2370 { 2371 int i; 2372 2373 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2374 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2375 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2376 2377 /* 2378 * Update parameters. 2379 * 2380 * This code handles (vaddr_t)-1 and other page mask ending addresses 2381 * properly. 2382 * We lose the top page if the full virtual address space is used. 2383 */ 2384 if (max & (vaddr_t)PAGE_MASK) { 2385 max += 1; 2386 if (max == 0) /* overflow */ 2387 max -= PAGE_SIZE; 2388 } 2389 2390 RB_INIT(&map->addr); 2391 map->uaddr_exe = NULL; 2392 for (i = 0; i < nitems(map->uaddr_any); ++i) 2393 map->uaddr_any[i] = NULL; 2394 map->uaddr_brk_stack = NULL; 2395 2396 map->size = 0; 2397 map->ref_count = 0; 2398 map->min_offset = min; 2399 map->max_offset = max; 2400 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2401 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2402 map->flags = flags; 2403 map->timestamp = 0; 2404 rw_init(&map->lock, "vmmaplk"); 2405 mtx_init(&map->mtx, IPL_VM); 2406 mtx_init(&map->flags_lock, IPL_VM); 2407 2408 /* Configure the allocators. */ 2409 if (flags & VM_MAP_ISVMSPACE) 2410 uvm_map_setup_md(map); 2411 else 2412 map->uaddr_any[3] = &uaddr_kbootstrap; 2413 2414 /* 2415 * Fill map entries. 2416 * We do not need to write-lock the map here because only the current 2417 * thread sees it right now. Initialize ref_count to 0 above to avoid 2418 * bogus triggering of lock-not-held assertions. 2419 */ 2420 uvm_map_setup_entries(map); 2421 uvm_tree_sanity(map, __FILE__, __LINE__); 2422 map->ref_count = 1; 2423 } 2424 2425 /* 2426 * Destroy the map. 2427 * 2428 * This is the inverse operation to uvm_map_setup. 2429 */ 2430 void 2431 uvm_map_teardown(struct vm_map *map) 2432 { 2433 struct uvm_map_deadq dead_entries; 2434 struct vm_map_entry *entry, *tmp; 2435 #ifdef VMMAP_DEBUG 2436 size_t numq, numt; 2437 #endif 2438 int i; 2439 2440 KERNEL_ASSERT_LOCKED(); 2441 KERNEL_UNLOCK(); 2442 KERNEL_ASSERT_UNLOCKED(); 2443 2444 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2445 2446 /* Remove address selectors. */ 2447 uvm_addr_destroy(map->uaddr_exe); 2448 map->uaddr_exe = NULL; 2449 for (i = 0; i < nitems(map->uaddr_any); i++) { 2450 uvm_addr_destroy(map->uaddr_any[i]); 2451 map->uaddr_any[i] = NULL; 2452 } 2453 uvm_addr_destroy(map->uaddr_brk_stack); 2454 map->uaddr_brk_stack = NULL; 2455 2456 /* 2457 * Remove entries. 2458 * 2459 * The following is based on graph breadth-first search. 2460 * 2461 * In color terms: 2462 * - the dead_entries set contains all nodes that are reachable 2463 * (i.e. both the black and the grey nodes) 2464 * - any entry not in dead_entries is white 2465 * - any entry that appears in dead_entries before entry, 2466 * is black, the rest is grey. 2467 * The set [entry, end] is also referred to as the wavefront. 2468 * 2469 * Since the tree is always a fully connected graph, the breadth-first 2470 * search guarantees that each vmmap_entry is visited exactly once. 2471 * The vm_map is broken down in linear time. 2472 */ 2473 TAILQ_INIT(&dead_entries); 2474 if ((entry = RB_ROOT(&map->addr)) != NULL) 2475 DEAD_ENTRY_PUSH(&dead_entries, entry); 2476 while (entry != NULL) { 2477 sched_pause(); 2478 uvm_unmap_kill_entry(map, entry); 2479 if ((tmp = RB_LEFT(entry, daddrs.addr_entry)) != NULL) 2480 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2481 if ((tmp = RB_RIGHT(entry, daddrs.addr_entry)) != NULL) 2482 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2483 /* Update wave-front. */ 2484 entry = TAILQ_NEXT(entry, dfree.deadq); 2485 } 2486 2487 #ifdef VMMAP_DEBUG 2488 numt = numq = 0; 2489 RB_FOREACH(entry, uvm_map_addr, &map->addr) 2490 numt++; 2491 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2492 numq++; 2493 KASSERT(numt == numq); 2494 #endif 2495 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); 2496 2497 KERNEL_LOCK(); 2498 2499 pmap_destroy(map->pmap); 2500 map->pmap = NULL; 2501 } 2502 2503 /* 2504 * Populate map with free-memory entries. 2505 * 2506 * Map must be initialized and empty. 2507 */ 2508 void 2509 uvm_map_setup_entries(struct vm_map *map) 2510 { 2511 KDASSERT(RB_EMPTY(&map->addr)); 2512 2513 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2514 } 2515 2516 /* 2517 * Split entry at given address. 2518 * 2519 * orig: entry that is to be split. 2520 * next: a newly allocated map entry that is not linked. 2521 * split: address at which the split is done. 2522 */ 2523 void 2524 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2525 struct vm_map_entry *next, vaddr_t split) 2526 { 2527 struct uvm_addr_state *free, *free_before; 2528 vsize_t adj; 2529 2530 if ((split & PAGE_MASK) != 0) { 2531 panic("uvm_map_splitentry: split address 0x%lx " 2532 "not on page boundary!", split); 2533 } 2534 KDASSERT(map != NULL && orig != NULL && next != NULL); 2535 uvm_tree_sanity(map, __FILE__, __LINE__); 2536 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2537 2538 #ifdef VMMAP_DEBUG 2539 KDASSERT(RB_FIND(uvm_map_addr, &map->addr, orig) == orig); 2540 KDASSERT(RB_FIND(uvm_map_addr, &map->addr, next) != next); 2541 #endif /* VMMAP_DEBUG */ 2542 2543 /* 2544 * Free space will change, unlink from free space tree. 2545 */ 2546 free = uvm_map_uaddr_e(map, orig); 2547 uvm_mapent_free_remove(map, free, orig); 2548 2549 adj = split - orig->start; 2550 2551 uvm_mapent_copy(orig, next); 2552 if (split >= orig->end) { 2553 next->etype = 0; 2554 next->offset = 0; 2555 next->wired_count = 0; 2556 next->start = next->end = split; 2557 next->guard = 0; 2558 next->fspace = VMMAP_FREE_END(orig) - split; 2559 next->aref.ar_amap = NULL; 2560 next->aref.ar_pageoff = 0; 2561 orig->guard = MIN(orig->guard, split - orig->end); 2562 orig->fspace = split - VMMAP_FREE_START(orig); 2563 } else { 2564 orig->fspace = 0; 2565 orig->guard = 0; 2566 orig->end = next->start = split; 2567 2568 if (next->aref.ar_amap) { 2569 KERNEL_LOCK(); 2570 amap_splitref(&orig->aref, &next->aref, adj); 2571 KERNEL_UNLOCK(); 2572 } 2573 if (UVM_ET_ISSUBMAP(orig)) { 2574 uvm_map_reference(next->object.sub_map); 2575 next->offset += adj; 2576 } else if (UVM_ET_ISOBJ(orig)) { 2577 if (next->object.uvm_obj->pgops && 2578 next->object.uvm_obj->pgops->pgo_reference) { 2579 KERNEL_LOCK(); 2580 next->object.uvm_obj->pgops->pgo_reference( 2581 next->object.uvm_obj); 2582 KERNEL_UNLOCK(); 2583 } 2584 next->offset += adj; 2585 } 2586 } 2587 2588 /* 2589 * Link next into address tree. 2590 * Link orig and next into free-space tree. 2591 * 2592 * Don't insert 'next' into the addr tree until orig has been linked, 2593 * in case the free-list looks at adjecent entries in the addr tree 2594 * for its decisions. 2595 */ 2596 if (orig->fspace > 0) 2597 free_before = free; 2598 else 2599 free_before = uvm_map_uaddr_e(map, orig); 2600 uvm_mapent_free_insert(map, free_before, orig); 2601 uvm_mapent_addr_insert(map, next); 2602 uvm_mapent_free_insert(map, free, next); 2603 2604 uvm_tree_sanity(map, __FILE__, __LINE__); 2605 } 2606 2607 2608 #ifdef VMMAP_DEBUG 2609 2610 void 2611 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2612 char *file, int line) 2613 { 2614 char* map_special; 2615 2616 if (test) 2617 return; 2618 2619 if (map == kernel_map) 2620 map_special = " (kernel_map)"; 2621 else if (map == kmem_map) 2622 map_special = " (kmem_map)"; 2623 else 2624 map_special = ""; 2625 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2626 line, test_str); 2627 } 2628 2629 /* 2630 * Check that map is sane. 2631 */ 2632 void 2633 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2634 { 2635 struct vm_map_entry *iter; 2636 vaddr_t addr; 2637 vaddr_t min, max, bound; /* Bounds checker. */ 2638 struct uvm_addr_state *free; 2639 2640 addr = vm_map_min(map); 2641 RB_FOREACH(iter, uvm_map_addr, &map->addr) { 2642 /* 2643 * Valid start, end. 2644 * Catch overflow for end+fspace. 2645 */ 2646 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2647 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2648 2649 /* May not be empty. */ 2650 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2651 file, line); 2652 2653 /* Addresses for entry must lie within map boundaries. */ 2654 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2655 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2656 2657 /* Tree may not have gaps. */ 2658 UVM_ASSERT(map, iter->start == addr, file, line); 2659 addr = VMMAP_FREE_END(iter); 2660 2661 /* 2662 * Free space may not cross boundaries, unless the same 2663 * free list is used on both sides of the border. 2664 */ 2665 min = VMMAP_FREE_START(iter); 2666 max = VMMAP_FREE_END(iter); 2667 2668 while (min < max && 2669 (bound = uvm_map_boundary(map, min, max)) != max) { 2670 UVM_ASSERT(map, 2671 uvm_map_uaddr(map, bound - 1) == 2672 uvm_map_uaddr(map, bound), 2673 file, line); 2674 min = bound; 2675 } 2676 2677 free = uvm_map_uaddr_e(map, iter); 2678 if (free) { 2679 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2680 file, line); 2681 } else { 2682 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2683 file, line); 2684 } 2685 } 2686 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2687 } 2688 2689 void 2690 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2691 { 2692 struct vm_map_entry *iter; 2693 vsize_t size; 2694 2695 size = 0; 2696 RB_FOREACH(iter, uvm_map_addr, &map->addr) { 2697 if (!UVM_ET_ISHOLE(iter)) 2698 size += iter->end - iter->start; 2699 } 2700 2701 if (map->size != size) 2702 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2703 UVM_ASSERT(map, map->size == size, file, line); 2704 2705 vmspace_validate(map); 2706 } 2707 2708 /* 2709 * This function validates the statistics on vmspace. 2710 */ 2711 void 2712 vmspace_validate(struct vm_map *map) 2713 { 2714 struct vmspace *vm; 2715 struct vm_map_entry *iter; 2716 vaddr_t imin, imax; 2717 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2718 vsize_t stack, heap; /* Measured sizes. */ 2719 2720 if (!(map->flags & VM_MAP_ISVMSPACE)) 2721 return; 2722 2723 vm = (struct vmspace *)map; 2724 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2725 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2726 2727 stack = heap = 0; 2728 RB_FOREACH(iter, uvm_map_addr, &map->addr) { 2729 imin = imax = iter->start; 2730 2731 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL) 2732 continue; 2733 2734 /* 2735 * Update stack, heap. 2736 * Keep in mind that (theoretically) the entries of 2737 * userspace and stack may be joined. 2738 */ 2739 while (imin != iter->end) { 2740 /* 2741 * Set imax to the first boundary crossed between 2742 * imin and stack addresses. 2743 */ 2744 imax = iter->end; 2745 if (imin < stack_begin && imax > stack_begin) 2746 imax = stack_begin; 2747 else if (imin < stack_end && imax > stack_end) 2748 imax = stack_end; 2749 2750 if (imin >= stack_begin && imin < stack_end) 2751 stack += imax - imin; 2752 else 2753 heap += imax - imin; 2754 imin = imax; 2755 } 2756 } 2757 2758 heap >>= PAGE_SHIFT; 2759 if (heap != vm->vm_dused) { 2760 printf("vmspace stack range: 0x%lx-0x%lx\n", 2761 stack_begin, stack_end); 2762 panic("vmspace_validate: vmspace.vm_dused invalid, " 2763 "expected %ld pgs, got %ld pgs in map %p", 2764 heap, vm->vm_dused, 2765 map); 2766 } 2767 } 2768 2769 #endif /* VMMAP_DEBUG */ 2770 2771 /* 2772 * uvm_map_init: init mapping system at boot time. note that we allocate 2773 * and init the static pool of structs vm_map_entry for the kernel here. 2774 */ 2775 void 2776 uvm_map_init(void) 2777 { 2778 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 2779 int lcv; 2780 2781 /* now set up static pool of kernel map entries ... */ 2782 mtx_init(&uvm_kmapent_mtx, IPL_VM); 2783 uvm.kentry_free = NULL; 2784 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 2785 RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) = 2786 uvm.kentry_free; 2787 uvm.kentry_free = &kernel_map_entry[lcv]; 2788 } 2789 2790 /* initialize the map-related pools. */ 2791 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 2792 0, 0, PR_WAITOK, "vmsppl", NULL); 2793 pool_setipl(&uvm_vmspace_pool, IPL_NONE); 2794 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 2795 0, 0, PR_WAITOK, "vmmpepl", NULL); 2796 pool_setipl(&uvm_map_entry_pool, IPL_VM); 2797 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 2798 0, 0, 0, "vmmpekpl", NULL); 2799 pool_setipl(&uvm_map_entry_kmem_pool, IPL_NONE); 2800 pool_sethiwat(&uvm_map_entry_pool, 8192); 2801 2802 uvm_addr_init(); 2803 } 2804 2805 #if defined(DDB) 2806 2807 /* 2808 * DDB hooks 2809 */ 2810 2811 /* 2812 * uvm_map_printit: actually prints the map 2813 */ 2814 void 2815 uvm_map_printit(struct vm_map *map, boolean_t full, 2816 int (*pr)(const char *, ...)) 2817 { 2818 struct vmspace *vm; 2819 struct vm_map_entry *entry; 2820 struct uvm_addr_state *free; 2821 int in_free, i; 2822 char buf[8]; 2823 2824 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 2825 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 2826 map->b_start, map->b_end); 2827 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 2828 map->s_start, map->s_end); 2829 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 2830 map->size, map->ref_count, map->timestamp, 2831 map->flags); 2832 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 2833 pmap_resident_count(map->pmap)); 2834 2835 /* struct vmspace handling. */ 2836 if (map->flags & VM_MAP_ISVMSPACE) { 2837 vm = (struct vmspace *)map; 2838 2839 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 2840 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 2841 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 2842 vm->vm_tsize, vm->vm_dsize); 2843 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 2844 vm->vm_taddr, vm->vm_daddr); 2845 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 2846 vm->vm_maxsaddr, vm->vm_minsaddr); 2847 } 2848 2849 if (!full) 2850 goto print_uaddr; 2851 RB_FOREACH(entry, uvm_map_addr, &map->addr) { 2852 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 2853 entry, entry->start, entry->end, entry->object.uvm_obj, 2854 (long long)entry->offset, entry->aref.ar_amap, 2855 entry->aref.ar_pageoff); 2856 (*pr)("\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 2857 "wc=%d, adv=%d\n", 2858 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 2859 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 2860 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 2861 entry->protection, entry->max_protection, 2862 entry->inheritance, entry->wired_count, entry->advice); 2863 2864 free = uvm_map_uaddr_e(map, entry); 2865 in_free = (free != NULL); 2866 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 2867 "free=0x%lx-0x%lx\n", 2868 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 2869 in_free ? 'T' : 'F', 2870 entry->guard, 2871 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 2872 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 2873 (*pr)("\tfreemapped=%c, uaddr=%p\n", 2874 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 2875 if (free) { 2876 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 2877 free->uaddr_minaddr, free->uaddr_maxaddr, 2878 free->uaddr_functions->uaddr_name); 2879 } 2880 } 2881 2882 print_uaddr: 2883 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 2884 for (i = 0; i < nitems(map->uaddr_any); i++) { 2885 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 2886 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 2887 } 2888 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 2889 } 2890 2891 /* 2892 * uvm_object_printit: actually prints the object 2893 */ 2894 void 2895 uvm_object_printit(uobj, full, pr) 2896 struct uvm_object *uobj; 2897 boolean_t full; 2898 int (*pr)(const char *, ...); 2899 { 2900 struct vm_page *pg; 2901 int cnt = 0; 2902 2903 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 2904 uobj, uobj->pgops, uobj->uo_npages); 2905 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 2906 (*pr)("refs=<SYSTEM>\n"); 2907 else 2908 (*pr)("refs=%d\n", uobj->uo_refs); 2909 2910 if (!full) { 2911 return; 2912 } 2913 (*pr)(" PAGES <pg,offset>:\n "); 2914 RB_FOREACH(pg, uvm_objtree, &uobj->memt) { 2915 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 2916 if ((cnt % 3) == 2) { 2917 (*pr)("\n "); 2918 } 2919 cnt++; 2920 } 2921 if ((cnt % 3) != 2) { 2922 (*pr)("\n"); 2923 } 2924 } 2925 2926 /* 2927 * uvm_page_printit: actually print the page 2928 */ 2929 static const char page_flagbits[] = 2930 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 2931 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ" 2932 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5"; 2933 2934 void 2935 uvm_page_printit(pg, full, pr) 2936 struct vm_page *pg; 2937 boolean_t full; 2938 int (*pr)(const char *, ...); 2939 { 2940 struct vm_page *tpg; 2941 struct uvm_object *uobj; 2942 struct pglist *pgl; 2943 2944 (*pr)("PAGE %p:\n", pg); 2945 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 2946 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 2947 (long long)pg->phys_addr); 2948 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 2949 pg->uobject, pg->uanon, (long long)pg->offset); 2950 #if defined(UVM_PAGE_TRKOWN) 2951 if (pg->pg_flags & PG_BUSY) 2952 (*pr)(" owning process = %d, tag=%s", 2953 pg->owner, pg->owner_tag); 2954 else 2955 (*pr)(" page not busy, no owner"); 2956 #else 2957 (*pr)(" [page ownership tracking disabled]"); 2958 #endif 2959 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 2960 2961 if (!full) 2962 return; 2963 2964 /* cross-verify object/anon */ 2965 if ((pg->pg_flags & PQ_FREE) == 0) { 2966 if (pg->pg_flags & PQ_ANON) { 2967 if (pg->uanon == NULL || pg->uanon->an_page != pg) 2968 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 2969 (pg->uanon) ? pg->uanon->an_page : NULL); 2970 else 2971 (*pr)(" anon backpointer is OK\n"); 2972 } else { 2973 uobj = pg->uobject; 2974 if (uobj) { 2975 (*pr)(" checking object list\n"); 2976 RB_FOREACH(tpg, uvm_objtree, &uobj->memt) { 2977 if (tpg == pg) { 2978 break; 2979 } 2980 } 2981 if (tpg) 2982 (*pr)(" page found on object list\n"); 2983 else 2984 (*pr)(" >>> PAGE NOT FOUND " 2985 "ON OBJECT LIST! <<<\n"); 2986 } 2987 } 2988 } 2989 2990 /* cross-verify page queue */ 2991 if (pg->pg_flags & PQ_FREE) { 2992 if (uvm_pmr_isfree(pg)) 2993 (*pr)(" page found in uvm_pmemrange\n"); 2994 else 2995 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 2996 pgl = NULL; 2997 } else if (pg->pg_flags & PQ_INACTIVE) { 2998 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 2999 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 3000 } else if (pg->pg_flags & PQ_ACTIVE) { 3001 pgl = &uvm.page_active; 3002 } else { 3003 pgl = NULL; 3004 } 3005 3006 if (pgl) { 3007 (*pr)(" checking pageq list\n"); 3008 TAILQ_FOREACH(tpg, pgl, pageq) { 3009 if (tpg == pg) { 3010 break; 3011 } 3012 } 3013 if (tpg) 3014 (*pr)(" page found on pageq list\n"); 3015 else 3016 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3017 } 3018 } 3019 #endif 3020 3021 /* 3022 * uvm_map_protect: change map protection 3023 * 3024 * => set_max means set max_protection. 3025 * => map must be unlocked. 3026 */ 3027 int 3028 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3029 vm_prot_t new_prot, boolean_t set_max) 3030 { 3031 struct vm_map_entry *first, *iter; 3032 vm_prot_t old_prot; 3033 vm_prot_t mask; 3034 int error; 3035 3036 if (start > end) 3037 return EINVAL; 3038 start = MAX(start, map->min_offset); 3039 end = MIN(end, map->max_offset); 3040 if (start >= end) 3041 return 0; 3042 3043 error = 0; 3044 vm_map_lock(map); 3045 3046 /* 3047 * Set up first and last. 3048 * - first will contain first entry at or after start. 3049 */ 3050 first = uvm_map_entrybyaddr(&map->addr, start); 3051 KDASSERT(first != NULL); 3052 if (first->end < start) 3053 first = RB_NEXT(uvm_map_addr, &map->addr, first); 3054 3055 /* First, check for protection violations. */ 3056 for (iter = first; iter != NULL && iter->start < end; 3057 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 3058 /* Treat memory holes as free space. */ 3059 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3060 continue; 3061 3062 if (UVM_ET_ISSUBMAP(iter)) { 3063 error = EINVAL; 3064 goto out; 3065 } 3066 if ((new_prot & iter->max_protection) != new_prot) { 3067 error = EACCES; 3068 goto out; 3069 } 3070 if (map == kernel_map && 3071 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 3072 panic("uvm_map_protect: kernel map W^X violation requested"); 3073 } 3074 3075 /* Fix protections. */ 3076 for (iter = first; iter != NULL && iter->start < end; 3077 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 3078 /* Treat memory holes as free space. */ 3079 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3080 continue; 3081 3082 old_prot = iter->protection; 3083 3084 /* 3085 * Skip adapting protection iff old and new protection 3086 * are equal. 3087 */ 3088 if (set_max) { 3089 if (old_prot == (new_prot & old_prot) && 3090 iter->max_protection == new_prot) 3091 continue; 3092 } else { 3093 if (old_prot == new_prot) 3094 continue; 3095 } 3096 3097 UVM_MAP_CLIP_START(map, iter, start); 3098 UVM_MAP_CLIP_END(map, iter, end); 3099 3100 if (set_max) { 3101 iter->max_protection = new_prot; 3102 iter->protection &= new_prot; 3103 } else 3104 iter->protection = new_prot; 3105 3106 /* 3107 * update physical map if necessary. worry about copy-on-write 3108 * here -- CHECK THIS XXX 3109 */ 3110 if (iter->protection != old_prot) { 3111 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3112 ~PROT_WRITE : PROT_MASK; 3113 3114 /* update pmap */ 3115 if ((iter->protection & mask) == PROT_NONE && 3116 VM_MAPENT_ISWIRED(iter)) { 3117 /* 3118 * TODO(ariane) this is stupid. wired_count 3119 * is 0 if not wired, otherwise anything 3120 * larger than 0 (incremented once each time 3121 * wire is called). 3122 * Mostly to be able to undo the damage on 3123 * failure. Not the actually be a wired 3124 * refcounter... 3125 * Originally: iter->wired_count--; 3126 * (don't we have to unwire this in the pmap 3127 * as well?) 3128 */ 3129 iter->wired_count = 0; 3130 } 3131 pmap_protect(map->pmap, iter->start, iter->end, 3132 iter->protection & mask); 3133 } 3134 3135 /* 3136 * If the map is configured to lock any future mappings, 3137 * wire this entry now if the old protection was PROT_NONE 3138 * and the new protection is not PROT_NONE. 3139 */ 3140 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3141 VM_MAPENT_ISWIRED(iter) == 0 && 3142 old_prot == PROT_NONE && 3143 new_prot != PROT_NONE) { 3144 if (uvm_map_pageable(map, iter->start, iter->end, 3145 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3146 /* 3147 * If locking the entry fails, remember the 3148 * error if it's the first one. Note we 3149 * still continue setting the protection in 3150 * the map, but it will return the resource 3151 * storage condition regardless. 3152 * 3153 * XXX Ignore what the actual error is, 3154 * XXX just call it a resource shortage 3155 * XXX so that it doesn't get confused 3156 * XXX what uvm_map_protect() itself would 3157 * XXX normally return. 3158 */ 3159 error = ENOMEM; 3160 } 3161 } 3162 } 3163 pmap_update(map->pmap); 3164 3165 out: 3166 vm_map_unlock(map); 3167 return error; 3168 } 3169 3170 /* 3171 * uvmspace_alloc: allocate a vmspace structure. 3172 * 3173 * - structure includes vm_map and pmap 3174 * - XXX: no locking on this structure 3175 * - refcnt set to 1, rest must be init'd by caller 3176 */ 3177 struct vmspace * 3178 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3179 boolean_t remove_holes) 3180 { 3181 struct vmspace *vm; 3182 3183 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3184 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3185 return (vm); 3186 } 3187 3188 /* 3189 * uvmspace_init: initialize a vmspace structure. 3190 * 3191 * - XXX: no locking on this structure 3192 * - refcnt set to 1, rest must be init'd by caller 3193 */ 3194 void 3195 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3196 boolean_t pageable, boolean_t remove_holes) 3197 { 3198 KASSERT(pmap == NULL || pmap == pmap_kernel()); 3199 3200 if (pmap) 3201 pmap_reference(pmap); 3202 else 3203 pmap = pmap_create(); 3204 vm->vm_map.pmap = pmap; 3205 3206 uvm_map_setup(&vm->vm_map, min, max, 3207 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3208 3209 vm->vm_refcnt = 1; 3210 3211 if (remove_holes) 3212 pmap_remove_holes(vm); 3213 } 3214 3215 /* 3216 * uvmspace_share: share a vmspace between two processes 3217 * 3218 * - XXX: no locking on vmspace 3219 * - used for vfork 3220 */ 3221 3222 struct vmspace * 3223 uvmspace_share(struct process *pr) 3224 { 3225 struct vmspace *vm = pr->ps_vmspace; 3226 3227 vm->vm_refcnt++; 3228 return vm; 3229 } 3230 3231 /* 3232 * uvmspace_exec: the process wants to exec a new program 3233 * 3234 * - XXX: no locking on vmspace 3235 */ 3236 3237 void 3238 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3239 { 3240 struct process *pr = p->p_p; 3241 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3242 struct vm_map *map = &ovm->vm_map; 3243 struct uvm_map_deadq dead_entries; 3244 3245 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3246 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3247 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3248 3249 pmap_unuse_final(p); /* before stack addresses go away */ 3250 TAILQ_INIT(&dead_entries); 3251 3252 /* see if more than one process is using this vmspace... */ 3253 if (ovm->vm_refcnt == 1) { 3254 /* 3255 * If pr is the only process using its vmspace then 3256 * we can safely recycle that vmspace for the program 3257 * that is being exec'd. 3258 */ 3259 3260 #ifdef SYSVSHM 3261 /* 3262 * SYSV SHM semantics require us to kill all segments on an exec 3263 */ 3264 if (ovm->vm_shm) 3265 shmexit(ovm); 3266 #endif 3267 3268 /* 3269 * POSIX 1003.1b -- "lock future mappings" is revoked 3270 * when a process execs another program image. 3271 */ 3272 vm_map_lock(map); 3273 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3274 3275 /* 3276 * now unmap the old program 3277 * 3278 * Instead of attempting to keep the map valid, we simply 3279 * nuke all entries and ask uvm_map_setup to reinitialize 3280 * the map to the new boundaries. 3281 * 3282 * uvm_unmap_remove will actually nuke all entries for us 3283 * (as in, not replace them with free-memory entries). 3284 */ 3285 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3286 &dead_entries, TRUE, FALSE); 3287 3288 KDASSERT(RB_EMPTY(&map->addr)); 3289 3290 /* Nuke statistics and boundaries. */ 3291 memset(&ovm->vm_startcopy, 0, 3292 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3293 3294 3295 if (end & (vaddr_t)PAGE_MASK) { 3296 end += 1; 3297 if (end == 0) /* overflow */ 3298 end -= PAGE_SIZE; 3299 } 3300 3301 /* Setup new boundaries and populate map with entries. */ 3302 map->min_offset = start; 3303 map->max_offset = end; 3304 uvm_map_setup_entries(map); 3305 vm_map_unlock(map); 3306 3307 /* but keep MMU holes unavailable */ 3308 pmap_remove_holes(ovm); 3309 } else { 3310 /* 3311 * pr's vmspace is being shared, so we can't reuse 3312 * it for pr since it is still being used for others. 3313 * allocate a new vmspace for pr 3314 */ 3315 nvm = uvmspace_alloc(start, end, 3316 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3317 3318 /* install new vmspace and drop our ref to the old one. */ 3319 pmap_deactivate(p); 3320 p->p_vmspace = pr->ps_vmspace = nvm; 3321 pmap_activate(p); 3322 3323 uvmspace_free(ovm); 3324 } 3325 3326 /* Release dead entries */ 3327 uvm_unmap_detach(&dead_entries, 0); 3328 } 3329 3330 /* 3331 * uvmspace_free: free a vmspace data structure 3332 * 3333 * - XXX: no locking on vmspace 3334 */ 3335 void 3336 uvmspace_free(struct vmspace *vm) 3337 { 3338 if (--vm->vm_refcnt == 0) { 3339 /* 3340 * lock the map, to wait out all other references to it. delete 3341 * all of the mappings and pages they hold, then call the pmap 3342 * module to reclaim anything left. 3343 */ 3344 #ifdef SYSVSHM 3345 /* Get rid of any SYSV shared memory segments. */ 3346 if (vm->vm_shm != NULL) 3347 shmexit(vm); 3348 #endif 3349 3350 uvm_map_teardown(&vm->vm_map); 3351 pool_put(&uvm_vmspace_pool, vm); 3352 } 3353 } 3354 3355 /* 3356 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in 3357 * srcmap to the address range [dstaddr, dstaddr + sz) in 3358 * dstmap. 3359 * 3360 * The whole address range in srcmap must be backed by an object 3361 * (no holes). 3362 * 3363 * If successful, the address ranges share memory and the destination 3364 * address range uses the protection flags in prot. 3365 * 3366 * This routine assumes that sz is a multiple of PAGE_SIZE and 3367 * that dstaddr and srcaddr are page-aligned. 3368 */ 3369 int 3370 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot, 3371 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz) 3372 { 3373 int ret = 0; 3374 vaddr_t unmap_end; 3375 vaddr_t dstva; 3376 vsize_t off, len, n = sz; 3377 struct vm_map_entry *first = NULL, *last = NULL; 3378 struct vm_map_entry *src_entry, *psrc_entry = NULL; 3379 struct uvm_map_deadq dead; 3380 3381 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr) 3382 return EINVAL; 3383 3384 TAILQ_INIT(&dead); 3385 vm_map_lock(dstmap); 3386 vm_map_lock_read(srcmap); 3387 3388 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) { 3389 ret = ENOMEM; 3390 goto exit_unlock; 3391 } 3392 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) { 3393 ret = EINVAL; 3394 goto exit_unlock; 3395 } 3396 3397 unmap_end = dstaddr; 3398 for (; src_entry != NULL; 3399 psrc_entry = src_entry, 3400 src_entry = RB_NEXT(uvm_map_addr, &srcmap->addr, src_entry)) { 3401 /* hole in address space, bail out */ 3402 if (psrc_entry != NULL && psrc_entry->end != src_entry->start) 3403 break; 3404 if (src_entry->start >= srcaddr + sz) 3405 break; 3406 3407 if (UVM_ET_ISSUBMAP(src_entry)) 3408 panic("uvm_share: encountered a submap (illegal)"); 3409 if (!UVM_ET_ISCOPYONWRITE(src_entry) && 3410 UVM_ET_ISNEEDSCOPY(src_entry)) 3411 panic("uvm_share: non-copy_on_write map entries " 3412 "marked needs_copy (illegal)"); 3413 3414 dstva = dstaddr; 3415 if (src_entry->start > srcaddr) { 3416 dstva += src_entry->start - srcaddr; 3417 off = 0; 3418 } else 3419 off = srcaddr - src_entry->start; 3420 3421 if (n < src_entry->end - src_entry->start) 3422 len = n; 3423 else 3424 len = src_entry->end - src_entry->start; 3425 n -= len; 3426 3427 if (uvm_mapent_share(dstmap, dstva, len, off, prot, prot, 3428 srcmap, src_entry, &dead) == NULL) 3429 break; 3430 3431 unmap_end = dstva + len; 3432 if (n == 0) 3433 goto exit_unlock; 3434 } 3435 3436 ret = EINVAL; 3437 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE); 3438 3439 exit_unlock: 3440 vm_map_unlock_read(srcmap); 3441 vm_map_unlock(dstmap); 3442 uvm_unmap_detach(&dead, 0); 3443 3444 return ret; 3445 } 3446 3447 /* 3448 * Clone map entry into other map. 3449 * 3450 * Mapping will be placed at dstaddr, for the same length. 3451 * Space must be available. 3452 * Reference counters are incremented. 3453 */ 3454 struct vm_map_entry * 3455 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3456 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, 3457 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3458 int mapent_flags, int amap_share_flags) 3459 { 3460 struct vm_map_entry *new_entry, *first, *last; 3461 3462 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3463 3464 /* Create new entry (linked in on creation). Fill in first, last. */ 3465 first = last = NULL; 3466 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3467 panic("uvmspace_fork: no space in map for " 3468 "entry in empty map"); 3469 } 3470 new_entry = uvm_map_mkentry(dstmap, first, last, 3471 dstaddr, dstlen, mapent_flags, dead, NULL); 3472 if (new_entry == NULL) 3473 return NULL; 3474 /* old_entry -> new_entry */ 3475 new_entry->object = old_entry->object; 3476 new_entry->offset = old_entry->offset; 3477 new_entry->aref = old_entry->aref; 3478 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3479 new_entry->protection = prot; 3480 new_entry->max_protection = maxprot; 3481 new_entry->inheritance = old_entry->inheritance; 3482 new_entry->advice = old_entry->advice; 3483 3484 /* gain reference to object backing the map (can't be a submap). */ 3485 if (new_entry->aref.ar_amap) { 3486 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3487 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3488 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3489 amap_share_flags); 3490 } 3491 3492 if (UVM_ET_ISOBJ(new_entry) && 3493 new_entry->object.uvm_obj->pgops->pgo_reference) { 3494 new_entry->offset += off; 3495 new_entry->object.uvm_obj->pgops->pgo_reference 3496 (new_entry->object.uvm_obj); 3497 } 3498 3499 return new_entry; 3500 } 3501 3502 struct vm_map_entry * 3503 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3504 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map, 3505 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3506 { 3507 /* 3508 * If old_entry refers to a copy-on-write region that has not yet been 3509 * written to (needs_copy flag is set), then we need to allocate a new 3510 * amap for old_entry. 3511 * 3512 * If we do not do this, and the process owning old_entry does a copy-on 3513 * write later, old_entry and new_entry will refer to different memory 3514 * regions, and the memory between the processes is no longer shared. 3515 * 3516 * [in other words, we need to clear needs_copy] 3517 */ 3518 3519 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3520 /* get our own amap, clears needs_copy */ 3521 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 3522 0, 0); 3523 /* XXXCDC: WAITOK??? */ 3524 } 3525 3526 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off, 3527 prot, maxprot, old_entry, dead, 0, AMAP_SHARED); 3528 } 3529 3530 /* 3531 * share the mapping: this means we want the old and 3532 * new entries to share amaps and backing objects. 3533 */ 3534 struct vm_map_entry * 3535 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3536 struct vm_map *old_map, 3537 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3538 { 3539 struct vm_map_entry *new_entry; 3540 3541 new_entry = uvm_mapent_share(new_map, old_entry->start, 3542 old_entry->end - old_entry->start, 0, old_entry->protection, 3543 old_entry->max_protection, old_map, old_entry, dead); 3544 3545 /* 3546 * pmap_copy the mappings: this routine is optional 3547 * but if it is there it will reduce the number of 3548 * page faults in the new proc. 3549 */ 3550 if (!UVM_ET_ISHOLE(new_entry)) 3551 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, 3552 (new_entry->end - new_entry->start), new_entry->start); 3553 3554 return (new_entry); 3555 } 3556 3557 /* 3558 * copy-on-write the mapping (using mmap's 3559 * MAP_PRIVATE semantics) 3560 * 3561 * allocate new_entry, adjust reference counts. 3562 * (note that new references are read-only). 3563 */ 3564 struct vm_map_entry * 3565 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3566 struct vm_map *old_map, 3567 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3568 { 3569 struct vm_map_entry *new_entry; 3570 boolean_t protect_child; 3571 3572 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3573 old_entry->end - old_entry->start, 0, old_entry->protection, 3574 old_entry->max_protection, old_entry, dead, 0, 0); 3575 3576 new_entry->etype |= 3577 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3578 3579 /* 3580 * the new entry will need an amap. it will either 3581 * need to be copied from the old entry or created 3582 * from scratch (if the old entry does not have an 3583 * amap). can we defer this process until later 3584 * (by setting "needs_copy") or do we need to copy 3585 * the amap now? 3586 * 3587 * we must copy the amap now if any of the following 3588 * conditions hold: 3589 * 1. the old entry has an amap and that amap is 3590 * being shared. this means that the old (parent) 3591 * process is sharing the amap with another 3592 * process. if we do not clear needs_copy here 3593 * we will end up in a situation where both the 3594 * parent and child process are referring to the 3595 * same amap with "needs_copy" set. if the 3596 * parent write-faults, the fault routine will 3597 * clear "needs_copy" in the parent by allocating 3598 * a new amap. this is wrong because the 3599 * parent is supposed to be sharing the old amap 3600 * and the new amap will break that. 3601 * 3602 * 2. if the old entry has an amap and a non-zero 3603 * wire count then we are going to have to call 3604 * amap_cow_now to avoid page faults in the 3605 * parent process. since amap_cow_now requires 3606 * "needs_copy" to be clear we might as well 3607 * clear it here as well. 3608 * 3609 */ 3610 if (old_entry->aref.ar_amap != NULL && 3611 ((amap_flags(old_entry->aref.ar_amap) & 3612 AMAP_SHARED) != 0 || 3613 VM_MAPENT_ISWIRED(old_entry))) { 3614 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3615 0, 0); 3616 /* XXXCDC: M_WAITOK ... ok? */ 3617 } 3618 3619 /* 3620 * if the parent's entry is wired down, then the 3621 * parent process does not want page faults on 3622 * access to that memory. this means that we 3623 * cannot do copy-on-write because we can't write 3624 * protect the old entry. in this case we 3625 * resolve all copy-on-write faults now, using 3626 * amap_cow_now. note that we have already 3627 * allocated any needed amap (above). 3628 */ 3629 if (VM_MAPENT_ISWIRED(old_entry)) { 3630 /* 3631 * resolve all copy-on-write faults now 3632 * (note that there is nothing to do if 3633 * the old mapping does not have an amap). 3634 * XXX: is it worthwhile to bother with 3635 * pmap_copy in this case? 3636 */ 3637 if (old_entry->aref.ar_amap) 3638 amap_cow_now(new_map, new_entry); 3639 } else { 3640 if (old_entry->aref.ar_amap) { 3641 /* 3642 * setup mappings to trigger copy-on-write faults 3643 * we must write-protect the parent if it has 3644 * an amap and it is not already "needs_copy"... 3645 * if it is already "needs_copy" then the parent 3646 * has already been write-protected by a previous 3647 * fork operation. 3648 * 3649 * if we do not write-protect the parent, then 3650 * we must be sure to write-protect the child 3651 * after the pmap_copy() operation. 3652 * 3653 * XXX: pmap_copy should have some way of telling 3654 * us that it didn't do anything so we can avoid 3655 * calling pmap_protect needlessly. 3656 */ 3657 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3658 if (old_entry->max_protection & PROT_WRITE) { 3659 pmap_protect(old_map->pmap, 3660 old_entry->start, 3661 old_entry->end, 3662 old_entry->protection & 3663 ~PROT_WRITE); 3664 pmap_update(old_map->pmap); 3665 } 3666 old_entry->etype |= UVM_ET_NEEDSCOPY; 3667 } 3668 3669 /* parent must now be write-protected */ 3670 protect_child = FALSE; 3671 } else { 3672 /* 3673 * we only need to protect the child if the 3674 * parent has write access. 3675 */ 3676 if (old_entry->max_protection & PROT_WRITE) 3677 protect_child = TRUE; 3678 else 3679 protect_child = FALSE; 3680 } 3681 /* 3682 * copy the mappings 3683 * XXX: need a way to tell if this does anything 3684 */ 3685 if (!UVM_ET_ISHOLE(new_entry)) 3686 pmap_copy(new_map->pmap, old_map->pmap, 3687 new_entry->start, 3688 (old_entry->end - old_entry->start), 3689 old_entry->start); 3690 3691 /* protect the child's mappings if necessary */ 3692 if (protect_child) { 3693 pmap_protect(new_map->pmap, new_entry->start, 3694 new_entry->end, 3695 new_entry->protection & 3696 ~PROT_WRITE); 3697 } 3698 } 3699 3700 return (new_entry); 3701 } 3702 3703 /* 3704 * zero the mapping: the new entry will be zero initialized 3705 */ 3706 struct vm_map_entry * 3707 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 3708 struct vm_map *old_map, 3709 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3710 { 3711 struct vm_map_entry *new_entry; 3712 3713 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3714 old_entry->end - old_entry->start, 0, old_entry->protection, 3715 old_entry->max_protection, old_entry, dead, 0, 0); 3716 3717 new_entry->etype |= 3718 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3719 3720 if (new_entry->aref.ar_amap) { 3721 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3722 atop(new_entry->end - new_entry->start), 0); 3723 new_entry->aref.ar_amap = NULL; 3724 new_entry->aref.ar_pageoff = 0; 3725 } 3726 3727 if (UVM_ET_ISOBJ(new_entry)) { 3728 if (new_entry->object.uvm_obj->pgops->pgo_detach) 3729 new_entry->object.uvm_obj->pgops->pgo_detach( 3730 new_entry->object.uvm_obj); 3731 new_entry->object.uvm_obj = NULL; 3732 new_entry->etype &= ~UVM_ET_OBJ; 3733 } 3734 3735 return (new_entry); 3736 } 3737 3738 /* 3739 * uvmspace_fork: fork a process' main map 3740 * 3741 * => create a new vmspace for child process from parent. 3742 * => parent's map must not be locked. 3743 */ 3744 struct vmspace * 3745 uvmspace_fork(struct process *pr) 3746 { 3747 struct vmspace *vm1 = pr->ps_vmspace; 3748 struct vmspace *vm2; 3749 struct vm_map *old_map = &vm1->vm_map; 3750 struct vm_map *new_map; 3751 struct vm_map_entry *old_entry, *new_entry; 3752 struct uvm_map_deadq dead; 3753 3754 vm_map_lock(old_map); 3755 3756 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3757 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3758 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3759 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3760 vm2->vm_dused = 0; /* Statistic managed by us. */ 3761 new_map = &vm2->vm_map; 3762 vm_map_lock(new_map); 3763 3764 /* go entry-by-entry */ 3765 TAILQ_INIT(&dead); 3766 RB_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 3767 if (old_entry->start == old_entry->end) 3768 continue; 3769 3770 /* first, some sanity checks on the old entry */ 3771 if (UVM_ET_ISSUBMAP(old_entry)) { 3772 panic("fork: encountered a submap during fork " 3773 "(illegal)"); 3774 } 3775 3776 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 3777 UVM_ET_ISNEEDSCOPY(old_entry)) { 3778 panic("fork: non-copy_on_write map entry marked " 3779 "needs_copy (illegal)"); 3780 } 3781 3782 /* Apply inheritance. */ 3783 switch (old_entry->inheritance) { 3784 case MAP_INHERIT_SHARE: 3785 new_entry = uvm_mapent_forkshared(vm2, new_map, 3786 old_map, old_entry, &dead); 3787 break; 3788 case MAP_INHERIT_COPY: 3789 new_entry = uvm_mapent_forkcopy(vm2, new_map, 3790 old_map, old_entry, &dead); 3791 break; 3792 case MAP_INHERIT_ZERO: 3793 new_entry = uvm_mapent_forkzero(vm2, new_map, 3794 old_map, old_entry, &dead); 3795 break; 3796 default: 3797 continue; 3798 } 3799 3800 /* Update process statistics. */ 3801 if (!UVM_ET_ISHOLE(new_entry)) 3802 new_map->size += new_entry->end - new_entry->start; 3803 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { 3804 vm2->vm_dused += uvmspace_dused( 3805 new_map, new_entry->start, new_entry->end); 3806 } 3807 } 3808 3809 vm_map_unlock(old_map); 3810 vm_map_unlock(new_map); 3811 3812 /* 3813 * This can actually happen, if multiple entries described a 3814 * space in which an entry was inherited. 3815 */ 3816 uvm_unmap_detach(&dead, 0); 3817 3818 #ifdef SYSVSHM 3819 if (vm1->vm_shm) 3820 shmfork(vm1, vm2); 3821 #endif 3822 3823 return vm2; 3824 } 3825 3826 /* 3827 * uvm_map_hint: return the beginning of the best area suitable for 3828 * creating a new mapping with "prot" protection. 3829 */ 3830 vaddr_t 3831 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr, 3832 vaddr_t maxaddr) 3833 { 3834 vaddr_t addr; 3835 vaddr_t spacing; 3836 3837 #ifdef __i386__ 3838 /* 3839 * If executable skip first two pages, otherwise start 3840 * after data + heap region. 3841 */ 3842 if ((prot & PROT_EXEC) != 0 && 3843 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 3844 addr = (PAGE_SIZE*2) + 3845 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 3846 return (round_page(addr)); 3847 } 3848 #endif 3849 3850 #if defined (__LP64__) 3851 spacing = (MIN((4UL * 1024 * 1024 * 1024), BRKSIZ) - 1); 3852 #else 3853 spacing = (MIN((256 * 1024 * 1024), BRKSIZ) - 1); 3854 #endif 3855 3856 addr = (vaddr_t)vm->vm_daddr; 3857 /* 3858 * Start malloc/mmap after the brk. 3859 * If the random spacing area has been used up, 3860 * the brk area becomes fair game for mmap as well. 3861 */ 3862 if (vm->vm_dused < spacing >> PAGE_SHIFT) 3863 addr += BRKSIZ; 3864 if (addr < maxaddr) { 3865 while (spacing > maxaddr - addr) 3866 spacing >>= 1; 3867 } 3868 addr += arc4random() & spacing; 3869 return (round_page(addr)); 3870 } 3871 3872 /* 3873 * uvm_map_submap: punch down part of a map into a submap 3874 * 3875 * => only the kernel_map is allowed to be submapped 3876 * => the purpose of submapping is to break up the locking granularity 3877 * of a larger map 3878 * => the range specified must have been mapped previously with a uvm_map() 3879 * call [with uobj==NULL] to create a blank map entry in the main map. 3880 * [And it had better still be blank!] 3881 * => maps which contain submaps should never be copied or forked. 3882 * => to remove a submap, use uvm_unmap() on the main map 3883 * and then uvm_map_deallocate() the submap. 3884 * => main map must be unlocked. 3885 * => submap must have been init'd and have a zero reference count. 3886 * [need not be locked as we don't actually reference it] 3887 */ 3888 int 3889 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 3890 struct vm_map *submap) 3891 { 3892 struct vm_map_entry *entry; 3893 int result; 3894 3895 if (start > map->max_offset || end > map->max_offset || 3896 start < map->min_offset || end < map->min_offset) 3897 return EINVAL; 3898 3899 vm_map_lock(map); 3900 3901 if (uvm_map_lookup_entry(map, start, &entry)) { 3902 UVM_MAP_CLIP_START(map, entry, start); 3903 UVM_MAP_CLIP_END(map, entry, end); 3904 } else 3905 entry = NULL; 3906 3907 if (entry != NULL && 3908 entry->start == start && entry->end == end && 3909 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 3910 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 3911 entry->etype |= UVM_ET_SUBMAP; 3912 entry->object.sub_map = submap; 3913 entry->offset = 0; 3914 uvm_map_reference(submap); 3915 result = 0; 3916 } else 3917 result = EINVAL; 3918 3919 vm_map_unlock(map); 3920 return(result); 3921 } 3922 3923 /* 3924 * uvm_map_checkprot: check protection in map 3925 * 3926 * => must allow specific protection in a fully allocated region. 3927 * => map mut be read or write locked by caller. 3928 */ 3929 boolean_t 3930 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3931 vm_prot_t protection) 3932 { 3933 struct vm_map_entry *entry; 3934 3935 if (start < map->min_offset || end > map->max_offset || start > end) 3936 return FALSE; 3937 if (start == end) 3938 return TRUE; 3939 3940 /* 3941 * Iterate entries. 3942 */ 3943 for (entry = uvm_map_entrybyaddr(&map->addr, start); 3944 entry != NULL && entry->start < end; 3945 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 3946 /* Fail if a hole is found. */ 3947 if (UVM_ET_ISHOLE(entry) || 3948 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 3949 return FALSE; 3950 3951 /* Check protection. */ 3952 if ((entry->protection & protection) != protection) 3953 return FALSE; 3954 } 3955 return TRUE; 3956 } 3957 3958 /* 3959 * uvm_map_create: create map 3960 */ 3961 vm_map_t 3962 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 3963 { 3964 vm_map_t map; 3965 3966 map = malloc(sizeof *map, M_VMMAP, M_WAITOK); 3967 map->pmap = pmap; 3968 uvm_map_setup(map, min, max, flags); 3969 return (map); 3970 } 3971 3972 /* 3973 * uvm_map_deallocate: drop reference to a map 3974 * 3975 * => caller must not lock map 3976 * => we will zap map if ref count goes to zero 3977 */ 3978 void 3979 uvm_map_deallocate(vm_map_t map) 3980 { 3981 int c; 3982 struct uvm_map_deadq dead; 3983 3984 c = --map->ref_count; 3985 if (c > 0) { 3986 return; 3987 } 3988 3989 /* 3990 * all references gone. unmap and free. 3991 * 3992 * No lock required: we are only one to access this map. 3993 */ 3994 TAILQ_INIT(&dead); 3995 uvm_tree_sanity(map, __FILE__, __LINE__); 3996 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 3997 TRUE, FALSE); 3998 pmap_destroy(map->pmap); 3999 KASSERT(RB_EMPTY(&map->addr)); 4000 free(map, M_VMMAP, sizeof *map); 4001 4002 uvm_unmap_detach(&dead, 0); 4003 } 4004 4005 /* 4006 * uvm_map_inherit: set inheritance code for range of addrs in map. 4007 * 4008 * => map must be unlocked 4009 * => note that the inherit code is used during a "fork". see fork 4010 * code for details. 4011 */ 4012 int 4013 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 4014 vm_inherit_t new_inheritance) 4015 { 4016 struct vm_map_entry *entry; 4017 4018 switch (new_inheritance) { 4019 case MAP_INHERIT_NONE: 4020 case MAP_INHERIT_COPY: 4021 case MAP_INHERIT_SHARE: 4022 case MAP_INHERIT_ZERO: 4023 break; 4024 default: 4025 return (EINVAL); 4026 } 4027 4028 if (start > end) 4029 return EINVAL; 4030 start = MAX(start, map->min_offset); 4031 end = MIN(end, map->max_offset); 4032 if (start >= end) 4033 return 0; 4034 4035 vm_map_lock(map); 4036 4037 entry = uvm_map_entrybyaddr(&map->addr, start); 4038 if (entry->end > start) 4039 UVM_MAP_CLIP_START(map, entry, start); 4040 else 4041 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 4042 4043 while (entry != NULL && entry->start < end) { 4044 UVM_MAP_CLIP_END(map, entry, end); 4045 entry->inheritance = new_inheritance; 4046 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 4047 } 4048 4049 vm_map_unlock(map); 4050 return (0); 4051 } 4052 4053 /* 4054 * uvm_map_advice: set advice code for range of addrs in map. 4055 * 4056 * => map must be unlocked 4057 */ 4058 int 4059 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 4060 { 4061 struct vm_map_entry *entry; 4062 4063 switch (new_advice) { 4064 case MADV_NORMAL: 4065 case MADV_RANDOM: 4066 case MADV_SEQUENTIAL: 4067 break; 4068 default: 4069 return (EINVAL); 4070 } 4071 4072 if (start > end) 4073 return EINVAL; 4074 start = MAX(start, map->min_offset); 4075 end = MIN(end, map->max_offset); 4076 if (start >= end) 4077 return 0; 4078 4079 vm_map_lock(map); 4080 4081 entry = uvm_map_entrybyaddr(&map->addr, start); 4082 if (entry != NULL && entry->end > start) 4083 UVM_MAP_CLIP_START(map, entry, start); 4084 else if (entry!= NULL) 4085 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 4086 4087 /* 4088 * XXXJRT: disallow holes? 4089 */ 4090 while (entry != NULL && entry->start < end) { 4091 UVM_MAP_CLIP_END(map, entry, end); 4092 entry->advice = new_advice; 4093 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 4094 } 4095 4096 vm_map_unlock(map); 4097 return (0); 4098 } 4099 4100 /* 4101 * uvm_map_extract: extract a mapping from a map and put it somewhere 4102 * in the kernel_map, setting protection to max_prot. 4103 * 4104 * => map should be unlocked (we will write lock it and kernel_map) 4105 * => returns 0 on success, error code otherwise 4106 * => start must be page aligned 4107 * => len must be page sized 4108 * => flags: 4109 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 4110 * Mappings are QREF's. 4111 */ 4112 int 4113 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 4114 vaddr_t *dstaddrp, int flags) 4115 { 4116 struct uvm_map_deadq dead; 4117 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 4118 vaddr_t dstaddr; 4119 vaddr_t end; 4120 vaddr_t cp_start; 4121 vsize_t cp_len, cp_off; 4122 int error; 4123 4124 TAILQ_INIT(&dead); 4125 end = start + len; 4126 4127 /* 4128 * Sanity check on the parameters. 4129 * Also, since the mapping may not contain gaps, error out if the 4130 * mapped area is not in source map. 4131 */ 4132 if ((start & (vaddr_t)PAGE_MASK) != 0 || 4133 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 4134 return EINVAL; 4135 if (start < srcmap->min_offset || end > srcmap->max_offset) 4136 return EINVAL; 4137 4138 /* Initialize dead entries. Handle len == 0 case. */ 4139 if (len == 0) 4140 return 0; 4141 4142 /* Acquire lock on srcmap. */ 4143 vm_map_lock(srcmap); 4144 4145 /* Lock srcmap, lookup first and last entry in <start,len>. */ 4146 first = uvm_map_entrybyaddr(&srcmap->addr, start); 4147 4148 /* Check that the range is contiguous. */ 4149 for (entry = first; entry != NULL && entry->end < end; 4150 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 4151 if (VMMAP_FREE_END(entry) != entry->end || 4152 UVM_ET_ISHOLE(entry)) { 4153 error = EINVAL; 4154 goto fail; 4155 } 4156 } 4157 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 4158 error = EINVAL; 4159 goto fail; 4160 } 4161 4162 /* 4163 * Handle need-copy flag. 4164 * This may invalidate last, hence the re-initialization during the 4165 * loop. 4166 * 4167 * Also, perform clipping of last if not UVM_EXTRACT_QREF. 4168 */ 4169 for (entry = first; entry != NULL && entry->start < end; 4170 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 4171 if (UVM_ET_ISNEEDSCOPY(entry)) 4172 amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); 4173 if (UVM_ET_ISNEEDSCOPY(entry)) { 4174 /* 4175 * amap_copy failure 4176 */ 4177 error = ENOMEM; 4178 goto fail; 4179 } 4180 } 4181 4182 /* Lock destination map (kernel_map). */ 4183 vm_map_lock(kernel_map); 4184 4185 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 4186 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 4187 PROT_NONE, 0) != 0) { 4188 error = ENOMEM; 4189 goto fail2; 4190 } 4191 *dstaddrp = dstaddr; 4192 4193 /* 4194 * We now have srcmap and kernel_map locked. 4195 * dstaddr contains the destination offset in dstmap. 4196 */ 4197 /* step 1: start looping through map entries, performing extraction. */ 4198 for (entry = first; entry != NULL && entry->start < end; 4199 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 4200 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 4201 if (UVM_ET_ISHOLE(entry)) 4202 continue; 4203 4204 /* Calculate uvm_mapent_clone parameters. */ 4205 cp_start = entry->start; 4206 if (cp_start < start) { 4207 cp_off = start - cp_start; 4208 cp_start = start; 4209 } else 4210 cp_off = 0; 4211 cp_len = MIN(entry->end, end) - cp_start; 4212 4213 newentry = uvm_mapent_clone(kernel_map, 4214 cp_start - start + dstaddr, cp_len, cp_off, 4215 entry->protection, entry->max_protection, 4216 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4217 if (newentry == NULL) { 4218 error = ENOMEM; 4219 goto fail2_unmap; 4220 } 4221 kernel_map->size += cp_len; 4222 if (flags & UVM_EXTRACT_FIXPROT) 4223 newentry->protection = newentry->max_protection; 4224 4225 /* 4226 * Step 2: perform pmap copy. 4227 * (Doing this in the loop saves one RB traversal.) 4228 */ 4229 pmap_copy(kernel_map->pmap, srcmap->pmap, 4230 cp_start - start + dstaddr, cp_len, cp_start); 4231 } 4232 pmap_update(kernel_map->pmap); 4233 4234 error = 0; 4235 4236 /* Unmap copied entries on failure. */ 4237 fail2_unmap: 4238 if (error) { 4239 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4240 FALSE, TRUE); 4241 } 4242 4243 /* Release maps, release dead entries. */ 4244 fail2: 4245 vm_map_unlock(kernel_map); 4246 4247 fail: 4248 vm_map_unlock(srcmap); 4249 4250 uvm_unmap_detach(&dead, 0); 4251 4252 return error; 4253 } 4254 4255 /* 4256 * uvm_map_clean: clean out a map range 4257 * 4258 * => valid flags: 4259 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4260 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4261 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4262 * if (flags & PGO_FREE): any cached pages are freed after clean 4263 * => returns an error if any part of the specified range isn't mapped 4264 * => never a need to flush amap layer since the anonymous memory has 4265 * no permanent home, but may deactivate pages there 4266 * => called from sys_msync() and sys_madvise() 4267 * => caller must not write-lock map (read OK). 4268 * => we may sleep while cleaning if SYNCIO [with map read-locked] 4269 */ 4270 4271 int 4272 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4273 { 4274 struct vm_map_entry *first, *entry; 4275 struct vm_amap *amap; 4276 struct vm_anon *anon; 4277 struct vm_page *pg; 4278 struct uvm_object *uobj; 4279 vaddr_t cp_start, cp_end; 4280 int refs; 4281 int error; 4282 boolean_t rv; 4283 4284 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4285 (PGO_FREE|PGO_DEACTIVATE)); 4286 4287 if (start > end || start < map->min_offset || end > map->max_offset) 4288 return EINVAL; 4289 4290 vm_map_lock_read(map); 4291 first = uvm_map_entrybyaddr(&map->addr, start); 4292 4293 /* Make a first pass to check for holes. */ 4294 for (entry = first; entry != NULL && entry->start < end; 4295 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 4296 if (UVM_ET_ISSUBMAP(entry)) { 4297 vm_map_unlock_read(map); 4298 return EINVAL; 4299 } 4300 if (UVM_ET_ISSUBMAP(entry) || 4301 UVM_ET_ISHOLE(entry) || 4302 (entry->end < end && 4303 VMMAP_FREE_END(entry) != entry->end)) { 4304 vm_map_unlock_read(map); 4305 return EFAULT; 4306 } 4307 } 4308 4309 error = 0; 4310 for (entry = first; entry != NULL && entry->start < end; 4311 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 4312 amap = entry->aref.ar_amap; /* top layer */ 4313 if (UVM_ET_ISOBJ(entry)) 4314 uobj = entry->object.uvm_obj; 4315 else 4316 uobj = NULL; 4317 4318 /* 4319 * No amap cleaning necessary if: 4320 * - there's no amap 4321 * - we're not deactivating or freeing pages. 4322 */ 4323 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4324 goto flush_object; 4325 4326 cp_start = MAX(entry->start, start); 4327 cp_end = MIN(entry->end, end); 4328 4329 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4330 anon = amap_lookup(&entry->aref, 4331 cp_start - entry->start); 4332 if (anon == NULL) 4333 continue; 4334 4335 pg = anon->an_page; 4336 if (pg == NULL) { 4337 continue; 4338 } 4339 KASSERT(pg->pg_flags & PQ_ANON); 4340 4341 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4342 /* 4343 * XXX In these first 3 cases, we always just 4344 * XXX deactivate the page. We may want to 4345 * XXX handle the different cases more 4346 * XXX specifically, in the future. 4347 */ 4348 case PGO_CLEANIT|PGO_FREE: 4349 case PGO_CLEANIT|PGO_DEACTIVATE: 4350 case PGO_DEACTIVATE: 4351 deactivate_it: 4352 /* skip the page if it's wired */ 4353 if (pg->wire_count != 0) 4354 break; 4355 4356 uvm_lock_pageq(); 4357 4358 KASSERT(pg->uanon == anon); 4359 4360 /* zap all mappings for the page. */ 4361 pmap_page_protect(pg, PROT_NONE); 4362 4363 /* ...and deactivate the page. */ 4364 uvm_pagedeactivate(pg); 4365 4366 uvm_unlock_pageq(); 4367 break; 4368 case PGO_FREE: 4369 /* 4370 * If there are multiple references to 4371 * the amap, just deactivate the page. 4372 */ 4373 if (amap_refs(amap) > 1) 4374 goto deactivate_it; 4375 4376 /* XXX skip the page if it's wired */ 4377 if (pg->wire_count != 0) { 4378 break; 4379 } 4380 amap_unadd(&entry->aref, 4381 cp_start - entry->start); 4382 refs = --anon->an_ref; 4383 if (refs == 0) 4384 uvm_anfree(anon); 4385 break; 4386 default: 4387 panic("uvm_map_clean: weird flags"); 4388 } 4389 } 4390 4391 flush_object: 4392 cp_start = MAX(entry->start, start); 4393 cp_end = MIN(entry->end, end); 4394 4395 /* 4396 * flush pages if we've got a valid backing object. 4397 * 4398 * Don't PGO_FREE if we don't have write permission 4399 * and don't flush if this is a copy-on-write object 4400 * since we can't know our permissions on it. 4401 */ 4402 if (uobj != NULL && 4403 ((flags & PGO_FREE) == 0 || 4404 ((entry->max_protection & PROT_WRITE) != 0 && 4405 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4406 rv = uobj->pgops->pgo_flush(uobj, 4407 cp_start - entry->start + entry->offset, 4408 cp_end - entry->start + entry->offset, flags); 4409 4410 if (rv == FALSE) 4411 error = EFAULT; 4412 } 4413 } 4414 4415 vm_map_unlock_read(map); 4416 return error; 4417 } 4418 4419 /* 4420 * UVM_MAP_CLIP_END implementation 4421 */ 4422 void 4423 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4424 { 4425 struct vm_map_entry *tmp; 4426 4427 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4428 tmp = uvm_mapent_alloc(map, 0); 4429 4430 /* Invoke splitentry. */ 4431 uvm_map_splitentry(map, entry, tmp, addr); 4432 } 4433 4434 /* 4435 * UVM_MAP_CLIP_START implementation 4436 * 4437 * Clippers are required to not change the pointers to the entry they are 4438 * clipping on. 4439 * Since uvm_map_splitentry turns the original entry into the lowest 4440 * entry (address wise) we do a swap between the new entry and the original 4441 * entry, prior to calling uvm_map_splitentry. 4442 */ 4443 void 4444 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4445 { 4446 struct vm_map_entry *tmp; 4447 struct uvm_addr_state *free; 4448 4449 /* Unlink original. */ 4450 free = uvm_map_uaddr_e(map, entry); 4451 uvm_mapent_free_remove(map, free, entry); 4452 uvm_mapent_addr_remove(map, entry); 4453 4454 /* Copy entry. */ 4455 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4456 tmp = uvm_mapent_alloc(map, 0); 4457 uvm_mapent_copy(entry, tmp); 4458 4459 /* Put new entry in place of original entry. */ 4460 uvm_mapent_addr_insert(map, tmp); 4461 uvm_mapent_free_insert(map, free, tmp); 4462 4463 /* Invoke splitentry. */ 4464 uvm_map_splitentry(map, tmp, entry, addr); 4465 } 4466 4467 /* 4468 * Boundary fixer. 4469 */ 4470 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4471 static __inline vaddr_t 4472 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4473 { 4474 return (min < bound && max > bound) ? bound : max; 4475 } 4476 4477 /* 4478 * Choose free list based on address at start of free space. 4479 * 4480 * The uvm_addr_state returned contains addr and is the first of: 4481 * - uaddr_exe 4482 * - uaddr_brk_stack 4483 * - uaddr_any 4484 */ 4485 struct uvm_addr_state* 4486 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4487 { 4488 struct uvm_addr_state *uaddr; 4489 int i; 4490 4491 /* Special case the first page, to prevent mmap from returning 0. */ 4492 if (addr < VMMAP_MIN_ADDR) 4493 return NULL; 4494 4495 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4496 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4497 if (addr >= uvm_maxkaddr) 4498 return NULL; 4499 } 4500 4501 /* Is the address inside the exe-only map? */ 4502 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4503 addr < map->uaddr_exe->uaddr_maxaddr) 4504 return map->uaddr_exe; 4505 4506 /* Check if the space falls inside brk/stack area. */ 4507 if ((addr >= map->b_start && addr < map->b_end) || 4508 (addr >= map->s_start && addr < map->s_end)) { 4509 if (map->uaddr_brk_stack != NULL && 4510 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4511 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4512 return map->uaddr_brk_stack; 4513 } else 4514 return NULL; 4515 } 4516 4517 /* 4518 * Check the other selectors. 4519 * 4520 * These selectors are only marked as the owner, if they have insert 4521 * functions. 4522 */ 4523 for (i = 0; i < nitems(map->uaddr_any); i++) { 4524 uaddr = map->uaddr_any[i]; 4525 if (uaddr == NULL) 4526 continue; 4527 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4528 continue; 4529 4530 if (addr >= uaddr->uaddr_minaddr && 4531 addr < uaddr->uaddr_maxaddr) 4532 return uaddr; 4533 } 4534 4535 return NULL; 4536 } 4537 4538 /* 4539 * Choose free list based on address at start of free space. 4540 * 4541 * The uvm_addr_state returned contains addr and is the first of: 4542 * - uaddr_exe 4543 * - uaddr_brk_stack 4544 * - uaddr_any 4545 */ 4546 struct uvm_addr_state* 4547 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4548 { 4549 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4550 } 4551 4552 /* 4553 * Returns the first free-memory boundary that is crossed by [min-max]. 4554 */ 4555 vsize_t 4556 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4557 { 4558 struct uvm_addr_state *uaddr; 4559 int i; 4560 4561 /* Never return first page. */ 4562 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4563 4564 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4565 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4566 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4567 4568 /* Check for exe-only boundaries. */ 4569 if (map->uaddr_exe != NULL) { 4570 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4571 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4572 } 4573 4574 /* Check for exe-only boundaries. */ 4575 if (map->uaddr_brk_stack != NULL) { 4576 max = uvm_map_boundfix(min, max, 4577 map->uaddr_brk_stack->uaddr_minaddr); 4578 max = uvm_map_boundfix(min, max, 4579 map->uaddr_brk_stack->uaddr_maxaddr); 4580 } 4581 4582 /* Check other boundaries. */ 4583 for (i = 0; i < nitems(map->uaddr_any); i++) { 4584 uaddr = map->uaddr_any[i]; 4585 if (uaddr != NULL) { 4586 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4587 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4588 } 4589 } 4590 4591 /* Boundaries at stack and brk() area. */ 4592 max = uvm_map_boundfix(min, max, map->s_start); 4593 max = uvm_map_boundfix(min, max, map->s_end); 4594 max = uvm_map_boundfix(min, max, map->b_start); 4595 max = uvm_map_boundfix(min, max, map->b_end); 4596 4597 return max; 4598 } 4599 4600 /* 4601 * Update map allocation start and end addresses from proc vmspace. 4602 */ 4603 void 4604 uvm_map_vmspace_update(struct vm_map *map, 4605 struct uvm_map_deadq *dead, int flags) 4606 { 4607 struct vmspace *vm; 4608 vaddr_t b_start, b_end, s_start, s_end; 4609 4610 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4611 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4612 4613 /* 4614 * Derive actual allocation boundaries from vmspace. 4615 */ 4616 vm = (struct vmspace *)map; 4617 b_start = (vaddr_t)vm->vm_daddr; 4618 b_end = b_start + BRKSIZ; 4619 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4620 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4621 #ifdef DIAGNOSTIC 4622 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4623 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4624 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4625 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4626 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4627 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4628 vm, b_start, b_end, s_start, s_end); 4629 } 4630 #endif 4631 4632 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4633 map->s_start == s_start && map->s_end == s_end)) 4634 return; 4635 4636 uvm_map_freelist_update(map, dead, b_start, b_end, 4637 s_start, s_end, flags); 4638 } 4639 4640 /* 4641 * Grow kernel memory. 4642 * 4643 * This function is only called for kernel maps when an allocation fails. 4644 * 4645 * If the map has a gap that is large enough to accommodate alloc_sz, this 4646 * function will make sure map->free will include it. 4647 */ 4648 void 4649 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 4650 vsize_t alloc_sz, int flags) 4651 { 4652 vsize_t sz; 4653 vaddr_t end; 4654 struct vm_map_entry *entry; 4655 4656 /* Kernel memory only. */ 4657 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 4658 /* Destroy free list. */ 4659 uvm_map_freelist_update_clear(map, dead); 4660 4661 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 4662 if (map->flags & VM_MAP_GUARDPAGES) 4663 alloc_sz += PAGE_SIZE; 4664 4665 /* 4666 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 4667 * 4668 * Don't handle the case where the multiplication overflows: 4669 * if that happens, the allocation is probably too big anyway. 4670 */ 4671 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 4672 4673 /* 4674 * Walk forward until a gap large enough for alloc_sz shows up. 4675 * 4676 * We assume the kernel map has no boundaries. 4677 * uvm_maxkaddr may be zero. 4678 */ 4679 end = MAX(uvm_maxkaddr, map->min_offset); 4680 entry = uvm_map_entrybyaddr(&map->addr, end); 4681 while (entry && entry->fspace < alloc_sz) 4682 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 4683 if (entry) { 4684 end = MAX(VMMAP_FREE_START(entry), end); 4685 end += MIN(sz, map->max_offset - end); 4686 } else 4687 end = map->max_offset; 4688 4689 /* Reserve pmap entries. */ 4690 #ifdef PMAP_GROWKERNEL 4691 uvm_maxkaddr = pmap_growkernel(end); 4692 #else 4693 uvm_maxkaddr = MAX(uvm_maxkaddr, end); 4694 #endif 4695 4696 /* Rebuild free list. */ 4697 uvm_map_freelist_update_refill(map, flags); 4698 } 4699 4700 /* 4701 * Freelist update subfunction: unlink all entries from freelists. 4702 */ 4703 void 4704 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 4705 { 4706 struct uvm_addr_state *free; 4707 struct vm_map_entry *entry, *prev, *next; 4708 4709 prev = NULL; 4710 for (entry = RB_MIN(uvm_map_addr, &map->addr); entry != NULL; 4711 entry = next) { 4712 next = RB_NEXT(uvm_map_addr, &map->addr, entry); 4713 4714 free = uvm_map_uaddr_e(map, entry); 4715 uvm_mapent_free_remove(map, free, entry); 4716 4717 if (prev != NULL && entry->start == entry->end) { 4718 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 4719 uvm_mapent_addr_remove(map, entry); 4720 DEAD_ENTRY_PUSH(dead, entry); 4721 } else 4722 prev = entry; 4723 } 4724 } 4725 4726 /* 4727 * Freelist update subfunction: refill the freelists with entries. 4728 */ 4729 void 4730 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 4731 { 4732 struct vm_map_entry *entry; 4733 vaddr_t min, max; 4734 4735 RB_FOREACH(entry, uvm_map_addr, &map->addr) { 4736 min = VMMAP_FREE_START(entry); 4737 max = VMMAP_FREE_END(entry); 4738 entry->fspace = 0; 4739 4740 entry = uvm_map_fix_space(map, entry, min, max, flags); 4741 } 4742 4743 uvm_tree_sanity(map, __FILE__, __LINE__); 4744 } 4745 4746 /* 4747 * Change {a,b}_{start,end} allocation ranges and associated free lists. 4748 */ 4749 void 4750 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 4751 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 4752 { 4753 KDASSERT(b_end >= b_start && s_end >= s_start); 4754 4755 /* Clear all free lists. */ 4756 uvm_map_freelist_update_clear(map, dead); 4757 4758 /* Apply new bounds. */ 4759 map->b_start = b_start; 4760 map->b_end = b_end; 4761 map->s_start = s_start; 4762 map->s_end = s_end; 4763 4764 /* Refill free lists. */ 4765 uvm_map_freelist_update_refill(map, flags); 4766 } 4767 4768 /* 4769 * Assign a uvm_addr_state to the specified pointer in vm_map. 4770 * 4771 * May sleep. 4772 */ 4773 void 4774 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 4775 struct uvm_addr_state *newval) 4776 { 4777 struct uvm_map_deadq dead; 4778 4779 /* Pointer which must be in this map. */ 4780 KASSERT(which != NULL); 4781 KASSERT((void*)map <= (void*)(which) && 4782 (void*)(which) < (void*)(map + 1)); 4783 4784 vm_map_lock(map); 4785 TAILQ_INIT(&dead); 4786 uvm_map_freelist_update_clear(map, &dead); 4787 4788 uvm_addr_destroy(*which); 4789 *which = newval; 4790 4791 uvm_map_freelist_update_refill(map, 0); 4792 vm_map_unlock(map); 4793 uvm_unmap_detach(&dead, 0); 4794 } 4795 4796 /* 4797 * Correct space insert. 4798 * 4799 * Entry must not be on any freelist. 4800 */ 4801 struct vm_map_entry* 4802 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 4803 vaddr_t min, vaddr_t max, int flags) 4804 { 4805 struct uvm_addr_state *free, *entfree; 4806 vaddr_t lmax; 4807 4808 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 4809 KDASSERT(min <= max); 4810 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 4811 min == map->min_offset); 4812 4813 /* 4814 * During the function, entfree will always point at the uaddr state 4815 * for entry. 4816 */ 4817 entfree = (entry == NULL ? NULL : 4818 uvm_map_uaddr_e(map, entry)); 4819 4820 while (min != max) { 4821 /* Claim guard page for entry. */ 4822 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 4823 VMMAP_FREE_END(entry) == entry->end && 4824 entry->start != entry->end) { 4825 if (max - min == 2 * PAGE_SIZE) { 4826 /* 4827 * If the free-space gap is exactly 2 pages, 4828 * we make the guard 2 pages instead of 1. 4829 * Because in a guarded map, an area needs 4830 * at least 2 pages to allocate from: 4831 * one page for the allocation and one for 4832 * the guard. 4833 */ 4834 entry->guard = 2 * PAGE_SIZE; 4835 min = max; 4836 } else { 4837 entry->guard = PAGE_SIZE; 4838 min += PAGE_SIZE; 4839 } 4840 continue; 4841 } 4842 4843 /* 4844 * Handle the case where entry has a 2-page guard, but the 4845 * space after entry is freed. 4846 */ 4847 if (entry != NULL && entry->fspace == 0 && 4848 entry->guard > PAGE_SIZE) { 4849 entry->guard = PAGE_SIZE; 4850 min = VMMAP_FREE_START(entry); 4851 } 4852 4853 lmax = uvm_map_boundary(map, min, max); 4854 free = uvm_map_uaddr(map, min); 4855 4856 /* 4857 * Entries are merged if they point at the same uvm_free(). 4858 * Exception to that rule: if min == uvm_maxkaddr, a new 4859 * entry is started regardless (otherwise the allocators 4860 * will get confused). 4861 */ 4862 if (entry != NULL && free == entfree && 4863 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 4864 min == uvm_maxkaddr)) { 4865 KDASSERT(VMMAP_FREE_END(entry) == min); 4866 entry->fspace += lmax - min; 4867 } else { 4868 /* 4869 * Commit entry to free list: it'll not be added to 4870 * anymore. 4871 * We'll start a new entry and add to that entry 4872 * instead. 4873 */ 4874 if (entry != NULL) 4875 uvm_mapent_free_insert(map, entfree, entry); 4876 4877 /* New entry for new uaddr. */ 4878 entry = uvm_mapent_alloc(map, flags); 4879 KDASSERT(entry != NULL); 4880 entry->end = entry->start = min; 4881 entry->guard = 0; 4882 entry->fspace = lmax - min; 4883 entry->object.uvm_obj = NULL; 4884 entry->offset = 0; 4885 entry->etype = 0; 4886 entry->protection = entry->max_protection = 0; 4887 entry->inheritance = 0; 4888 entry->wired_count = 0; 4889 entry->advice = 0; 4890 entry->aref.ar_pageoff = 0; 4891 entry->aref.ar_amap = NULL; 4892 uvm_mapent_addr_insert(map, entry); 4893 4894 entfree = free; 4895 } 4896 4897 min = lmax; 4898 } 4899 /* Finally put entry on the uaddr state. */ 4900 if (entry != NULL) 4901 uvm_mapent_free_insert(map, entfree, entry); 4902 4903 return entry; 4904 } 4905 4906 /* 4907 * MQuery style of allocation. 4908 * 4909 * This allocator searches forward until sufficient space is found to map 4910 * the given size. 4911 * 4912 * XXX: factor in offset (via pmap_prefer) and protection? 4913 */ 4914 int 4915 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 4916 int flags) 4917 { 4918 struct vm_map_entry *entry, *last; 4919 vaddr_t addr; 4920 vaddr_t tmp, pmap_align, pmap_offset; 4921 int error; 4922 4923 addr = *addr_p; 4924 vm_map_lock_read(map); 4925 4926 /* Configure pmap prefer. */ 4927 if (offset != UVM_UNKNOWN_OFFSET) { 4928 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 4929 pmap_offset = PMAP_PREFER_OFFSET(offset); 4930 } else { 4931 pmap_align = PAGE_SIZE; 4932 pmap_offset = 0; 4933 } 4934 4935 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 4936 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 4937 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 4938 if (tmp < addr) 4939 tmp += pmap_align; 4940 addr = tmp; 4941 } 4942 4943 /* First, check if the requested range is fully available. */ 4944 entry = uvm_map_entrybyaddr(&map->addr, addr); 4945 last = NULL; 4946 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 4947 error = 0; 4948 goto out; 4949 } 4950 if (flags & UVM_FLAG_FIXED) { 4951 error = EINVAL; 4952 goto out; 4953 } 4954 4955 error = ENOMEM; /* Default error from here. */ 4956 4957 /* 4958 * At this point, the memory at <addr, sz> is not available. 4959 * The reasons are: 4960 * [1] it's outside the map, 4961 * [2] it starts in used memory (and therefore needs to move 4962 * toward the first free page in entry), 4963 * [3] it starts in free memory but bumps into used memory. 4964 * 4965 * Note that for case [2], the forward moving is handled by the 4966 * for loop below. 4967 */ 4968 if (entry == NULL) { 4969 /* [1] Outside the map. */ 4970 if (addr >= map->max_offset) 4971 goto out; 4972 else 4973 entry = RB_MIN(uvm_map_addr, &map->addr); 4974 } else if (VMMAP_FREE_START(entry) <= addr) { 4975 /* [3] Bumped into used memory. */ 4976 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 4977 } 4978 4979 /* Test if the next entry is sufficient for the allocation. */ 4980 for (; entry != NULL; 4981 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 4982 if (entry->fspace == 0) 4983 continue; 4984 addr = VMMAP_FREE_START(entry); 4985 4986 restart: /* Restart address checks on address change. */ 4987 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 4988 if (tmp < addr) 4989 tmp += pmap_align; 4990 addr = tmp; 4991 if (addr >= VMMAP_FREE_END(entry)) 4992 continue; 4993 4994 /* Skip brk() allocation addresses. */ 4995 if (addr + sz > map->b_start && addr < map->b_end) { 4996 if (VMMAP_FREE_END(entry) > map->b_end) { 4997 addr = map->b_end; 4998 goto restart; 4999 } else 5000 continue; 5001 } 5002 /* Skip stack allocation addresses. */ 5003 if (addr + sz > map->s_start && addr < map->s_end) { 5004 if (VMMAP_FREE_END(entry) > map->s_end) { 5005 addr = map->s_end; 5006 goto restart; 5007 } else 5008 continue; 5009 } 5010 5011 last = NULL; 5012 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5013 error = 0; 5014 goto out; 5015 } 5016 } 5017 5018 out: 5019 vm_map_unlock_read(map); 5020 if (error == 0) 5021 *addr_p = addr; 5022 return error; 5023 } 5024 5025 /* 5026 * Determine allocation bias. 5027 * 5028 * Returns 1 if we should bias to high addresses, -1 for a bias towards low 5029 * addresses, or 0 for no bias. 5030 * The bias mechanism is intended to avoid clashing with brk() and stack 5031 * areas. 5032 */ 5033 int 5034 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry) 5035 { 5036 vaddr_t start, end; 5037 5038 start = VMMAP_FREE_START(entry); 5039 end = VMMAP_FREE_END(entry); 5040 5041 /* Stay at the top of brk() area. */ 5042 if (end >= map->b_start && start < map->b_end) 5043 return 1; 5044 /* Stay at the far end of the stack area. */ 5045 if (end >= map->s_start && start < map->s_end) { 5046 #ifdef MACHINE_STACK_GROWS_UP 5047 return 1; 5048 #else 5049 return -1; 5050 #endif 5051 } 5052 5053 /* No bias, this area is meant for us. */ 5054 return 0; 5055 } 5056 5057 5058 boolean_t 5059 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 5060 { 5061 boolean_t rv; 5062 5063 if (map->flags & VM_MAP_INTRSAFE) { 5064 rv = mtx_enter_try(&map->mtx); 5065 } else { 5066 mtx_enter(&map->flags_lock); 5067 if (map->flags & VM_MAP_BUSY) { 5068 mtx_leave(&map->flags_lock); 5069 return (FALSE); 5070 } 5071 mtx_leave(&map->flags_lock); 5072 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); 5073 /* check if the lock is busy and back out if we won the race */ 5074 if (rv) { 5075 mtx_enter(&map->flags_lock); 5076 if (map->flags & VM_MAP_BUSY) { 5077 rw_exit(&map->lock); 5078 rv = FALSE; 5079 } 5080 mtx_leave(&map->flags_lock); 5081 } 5082 } 5083 5084 if (rv) { 5085 map->timestamp++; 5086 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5087 uvm_tree_sanity(map, file, line); 5088 uvm_tree_size_chk(map, file, line); 5089 } 5090 5091 return (rv); 5092 } 5093 5094 void 5095 vm_map_lock_ln(struct vm_map *map, char *file, int line) 5096 { 5097 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5098 do { 5099 mtx_enter(&map->flags_lock); 5100 tryagain: 5101 while (map->flags & VM_MAP_BUSY) { 5102 map->flags |= VM_MAP_WANTLOCK; 5103 msleep(&map->flags, &map->flags_lock, 5104 PVM, vmmapbsy, 0); 5105 } 5106 mtx_leave(&map->flags_lock); 5107 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); 5108 /* check if the lock is busy and back out if we won the race */ 5109 mtx_enter(&map->flags_lock); 5110 if (map->flags & VM_MAP_BUSY) { 5111 rw_exit(&map->lock); 5112 goto tryagain; 5113 } 5114 mtx_leave(&map->flags_lock); 5115 } else { 5116 mtx_enter(&map->mtx); 5117 } 5118 5119 map->timestamp++; 5120 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5121 uvm_tree_sanity(map, file, line); 5122 uvm_tree_size_chk(map, file, line); 5123 } 5124 5125 void 5126 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 5127 { 5128 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5129 rw_enter_read(&map->lock); 5130 else 5131 mtx_enter(&map->mtx); 5132 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5133 uvm_tree_sanity(map, file, line); 5134 uvm_tree_size_chk(map, file, line); 5135 } 5136 5137 void 5138 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 5139 { 5140 uvm_tree_sanity(map, file, line); 5141 uvm_tree_size_chk(map, file, line); 5142 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5143 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5144 rw_exit(&map->lock); 5145 else 5146 mtx_leave(&map->mtx); 5147 } 5148 5149 void 5150 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 5151 { 5152 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5153 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5154 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5155 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5156 rw_exit_read(&map->lock); 5157 else 5158 mtx_leave(&map->mtx); 5159 } 5160 5161 void 5162 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 5163 { 5164 uvm_tree_sanity(map, file, line); 5165 uvm_tree_size_chk(map, file, line); 5166 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5167 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5168 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5169 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5170 rw_enter(&map->lock, RW_DOWNGRADE); 5171 } 5172 5173 void 5174 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 5175 { 5176 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5177 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5178 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5179 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5180 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5181 rw_exit_read(&map->lock); 5182 rw_enter_write(&map->lock); 5183 } 5184 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5185 uvm_tree_sanity(map, file, line); 5186 } 5187 5188 void 5189 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5190 { 5191 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5192 mtx_enter(&map->flags_lock); 5193 map->flags |= VM_MAP_BUSY; 5194 mtx_leave(&map->flags_lock); 5195 } 5196 5197 void 5198 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5199 { 5200 int oflags; 5201 5202 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5203 mtx_enter(&map->flags_lock); 5204 oflags = map->flags; 5205 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5206 mtx_leave(&map->flags_lock); 5207 if (oflags & VM_MAP_WANTLOCK) 5208 wakeup(&map->flags); 5209 } 5210 5211 #ifndef SMALL_KERNEL 5212 int 5213 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, 5214 size_t *lenp) 5215 { 5216 struct vm_map_entry *entry; 5217 vaddr_t start; 5218 int cnt, maxcnt, error = 0; 5219 5220 KASSERT(*lenp > 0); 5221 KASSERT((*lenp % sizeof(*kve)) == 0); 5222 cnt = 0; 5223 maxcnt = *lenp / sizeof(*kve); 5224 KASSERT(maxcnt > 0); 5225 5226 /* 5227 * Return only entries whose address is above the given base 5228 * address. This allows userland to iterate without knowing the 5229 * number of entries beforehand. 5230 */ 5231 start = (vaddr_t)kve[0].kve_start; 5232 5233 vm_map_lock(map); 5234 RB_FOREACH(entry, uvm_map_addr, &map->addr) { 5235 if (cnt == maxcnt) { 5236 error = ENOMEM; 5237 break; 5238 } 5239 if (start != 0 && entry->start < start) 5240 continue; 5241 kve->kve_start = entry->start; 5242 kve->kve_end = entry->end; 5243 kve->kve_guard = entry->guard; 5244 kve->kve_fspace = entry->fspace; 5245 kve->kve_fspace_augment = entry->fspace_augment; 5246 kve->kve_offset = entry->offset; 5247 kve->kve_wired_count = entry->wired_count; 5248 kve->kve_etype = entry->etype; 5249 kve->kve_protection = entry->protection; 5250 kve->kve_max_protection = entry->max_protection; 5251 kve->kve_advice = entry->advice; 5252 kve->kve_inheritance = entry->inheritance; 5253 kve->kve_flags = entry->flags; 5254 kve++; 5255 cnt++; 5256 } 5257 vm_map_unlock(map); 5258 5259 KASSERT(cnt <= maxcnt); 5260 5261 *lenp = sizeof(*kve) * cnt; 5262 return error; 5263 } 5264 #endif 5265 5266 5267 #undef RB_AUGMENT 5268 #define RB_AUGMENT(x) uvm_map_addr_augment((x)) 5269 RB_GENERATE(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5270 uvm_mapentry_addrcmp); 5271 #undef RB_AUGMENT 5272 5273 5274 /* 5275 * MD code: vmspace allocator setup. 5276 */ 5277 5278 #ifdef __i386__ 5279 void 5280 uvm_map_setup_md(struct vm_map *map) 5281 { 5282 vaddr_t min, max; 5283 5284 min = map->min_offset; 5285 max = map->max_offset; 5286 5287 /* 5288 * Ensure the selectors will not try to manage page 0; 5289 * it's too special. 5290 */ 5291 if (min < VMMAP_MIN_ADDR) 5292 min = VMMAP_MIN_ADDR; 5293 5294 #if 0 /* Cool stuff, not yet */ 5295 /* Hinted allocations. */ 5296 map->uaddr_any[1] = uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), max, 5297 1024 * 1024 * 1024); 5298 5299 /* Executable code is special. */ 5300 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5301 /* Place normal allocations beyond executable mappings. */ 5302 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5303 #else /* Crappy stuff, for now */ 5304 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5305 #endif 5306 5307 #ifndef SMALL_KERNEL 5308 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5309 #endif /* !SMALL_KERNEL */ 5310 } 5311 #elif __LP64__ 5312 void 5313 uvm_map_setup_md(struct vm_map *map) 5314 { 5315 vaddr_t min, max; 5316 5317 min = map->min_offset; 5318 max = map->max_offset; 5319 5320 /* 5321 * Ensure the selectors will not try to manage page 0; 5322 * it's too special. 5323 */ 5324 if (min < VMMAP_MIN_ADDR) 5325 min = VMMAP_MIN_ADDR; 5326 5327 #if 0 /* Cool stuff, not yet */ 5328 /* Hinted allocations above 4GB */ 5329 map->uaddr_any[0] = 5330 uaddr_hint_create(0x100000000ULL, max, 1024 * 1024 * 1024); 5331 /* Hinted allocations below 4GB */ 5332 map->uaddr_any[1] = 5333 uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), 0x100000000ULL, 5334 1024 * 1024 * 1024); 5335 /* Normal allocations, always above 4GB */ 5336 map->uaddr_any[3] = 5337 uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5338 #else /* Crappy stuff, for now */ 5339 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5340 #endif 5341 5342 #ifndef SMALL_KERNEL 5343 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5344 #endif /* !SMALL_KERNEL */ 5345 } 5346 #else /* non-i386, 32 bit */ 5347 void 5348 uvm_map_setup_md(struct vm_map *map) 5349 { 5350 vaddr_t min, max; 5351 5352 min = map->min_offset; 5353 max = map->max_offset; 5354 5355 /* 5356 * Ensure the selectors will not try to manage page 0; 5357 * it's too special. 5358 */ 5359 if (min < VMMAP_MIN_ADDR) 5360 min = VMMAP_MIN_ADDR; 5361 5362 #if 0 /* Cool stuff, not yet */ 5363 /* Hinted allocations. */ 5364 map->uaddr_any[1] = uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), max, 5365 1024 * 1024 * 1024); 5366 /* Normal allocations. */ 5367 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5368 #else /* Crappy stuff, for now */ 5369 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5370 #endif 5371 5372 #ifndef SMALL_KERNEL 5373 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5374 #endif /* !SMALL_KERNEL */ 5375 } 5376 #endif 5377