1 /* $OpenBSD: uvm_map.c,v 1.173 2014/07/13 15:33:28 pirofti Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/mman.h> 90 #include <sys/proc.h> 91 #include <sys/malloc.h> 92 #include <sys/pool.h> 93 #include <sys/kernel.h> 94 95 #include <dev/rndvar.h> 96 97 #ifdef SYSVSHM 98 #include <sys/shm.h> 99 #endif 100 101 #include <uvm/uvm.h> 102 103 #ifdef DDB 104 #include <uvm/uvm_ddb.h> 105 #endif 106 107 #include <uvm/uvm_addr.h> 108 109 110 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 111 int uvm_mapent_isjoinable(struct vm_map*, 112 struct vm_map_entry*, struct vm_map_entry*); 113 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 114 struct vm_map_entry*, struct uvm_map_deadq*); 115 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 116 struct vm_map_entry*, struct uvm_map_deadq*); 117 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 118 struct vm_map_entry*, vaddr_t, vsize_t, int, 119 struct uvm_map_deadq*, struct vm_map_entry*); 120 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 121 void uvm_mapent_free(struct vm_map_entry*); 122 void uvm_unmap_kill_entry(struct vm_map*, 123 struct vm_map_entry*); 124 void uvm_mapent_mkfree(struct vm_map*, 125 struct vm_map_entry*, struct vm_map_entry**, 126 struct uvm_map_deadq*, boolean_t); 127 void uvm_map_pageable_pgon(struct vm_map*, 128 struct vm_map_entry*, struct vm_map_entry*, 129 vaddr_t, vaddr_t); 130 int uvm_map_pageable_wire(struct vm_map*, 131 struct vm_map_entry*, struct vm_map_entry*, 132 vaddr_t, vaddr_t, int); 133 void uvm_map_setup_entries(struct vm_map*); 134 void uvm_map_setup_md(struct vm_map*); 135 void uvm_map_teardown(struct vm_map*); 136 void uvm_map_vmspace_update(struct vm_map*, 137 struct uvm_map_deadq*, int); 138 void uvm_map_kmem_grow(struct vm_map*, 139 struct uvm_map_deadq*, vsize_t, int); 140 void uvm_map_freelist_update_clear(struct vm_map*, 141 struct uvm_map_deadq*); 142 void uvm_map_freelist_update_refill(struct vm_map *, int); 143 void uvm_map_freelist_update(struct vm_map*, 144 struct uvm_map_deadq*, vaddr_t, vaddr_t, 145 vaddr_t, vaddr_t, int); 146 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 147 vaddr_t, vaddr_t, int); 148 int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, 149 struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, 150 int); 151 int uvm_map_findspace(struct vm_map*, 152 struct vm_map_entry**, struct vm_map_entry**, 153 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 154 vaddr_t); 155 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 156 void uvm_map_addr_augment(struct vm_map_entry*); 157 158 /* 159 * Tree management functions. 160 */ 161 162 static __inline void uvm_mapent_copy(struct vm_map_entry*, 163 struct vm_map_entry*); 164 static int uvm_mapentry_addrcmp(struct vm_map_entry*, 165 struct vm_map_entry*); 166 static int uvm_mapentry_freecmp(struct vm_map_entry*, 167 struct vm_map_entry*); 168 void uvm_mapent_free_insert(struct vm_map*, 169 struct uvm_addr_state*, struct vm_map_entry*); 170 void uvm_mapent_free_remove(struct vm_map*, 171 struct uvm_addr_state*, struct vm_map_entry*); 172 void uvm_mapent_addr_insert(struct vm_map*, 173 struct vm_map_entry*); 174 void uvm_mapent_addr_remove(struct vm_map*, 175 struct vm_map_entry*); 176 void uvm_map_splitentry(struct vm_map*, 177 struct vm_map_entry*, struct vm_map_entry*, 178 vaddr_t); 179 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 180 int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*); 181 182 /* 183 * uvm_vmspace_fork helper functions. 184 */ 185 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 186 vsize_t, struct vm_map_entry*, 187 struct uvm_map_deadq*, int, int); 188 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 189 struct vm_map*, struct vm_map_entry*, 190 struct uvm_map_deadq*); 191 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 192 struct vm_map*, struct vm_map_entry*, 193 struct uvm_map_deadq*); 194 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 195 struct vm_map*, struct vm_map_entry*, 196 struct uvm_map_deadq*); 197 198 /* 199 * Tree validation. 200 */ 201 #ifdef VMMAP_DEBUG 202 void uvm_tree_assert(struct vm_map*, int, char*, 203 char*, int); 204 #define UVM_ASSERT(map, cond, file, line) \ 205 uvm_tree_assert((map), (cond), #cond, (file), (line)) 206 void uvm_tree_sanity(struct vm_map*, char*, int); 207 void uvm_tree_size_chk(struct vm_map*, char*, int); 208 void vmspace_validate(struct vm_map*); 209 #else 210 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 211 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 212 #define vmspace_validate(_map) do {} while (0) 213 #endif 214 215 /* 216 * All architectures will have pmap_prefer. 217 */ 218 #ifndef PMAP_PREFER 219 #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE 220 #define PMAP_PREFER_OFFSET(off) 0 221 #define PMAP_PREFER(addr, off) (addr) 222 #endif 223 224 225 /* 226 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 227 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 228 * 229 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 230 * each time. 231 */ 232 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 233 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 234 #define VM_MAP_KSIZE_ALLOCMUL 4 235 /* 236 * When selecting a random free-space block, look at most FSPACE_DELTA blocks 237 * ahead. 238 */ 239 #define FSPACE_DELTA 8 240 /* 241 * Put allocations adjecent to previous allocations when the free-space tree 242 * is larger than FSPACE_COMPACT entries. 243 * 244 * Alignment and PMAP_PREFER may still cause the entry to not be fully 245 * adjecent. Note that this strategy reduces memory fragmentation (by leaving 246 * a large space before or after the allocation). 247 */ 248 #define FSPACE_COMPACT 128 249 /* 250 * Make the address selection skip at most this many bytes from the start of 251 * the free space in which the allocation takes place. 252 * 253 * The main idea behind a randomized address space is that an attacker cannot 254 * know where to target his attack. Therefore, the location of objects must be 255 * as random as possible. However, the goal is not to create the most sparse 256 * map that is possible. 257 * FSPACE_MAXOFF pushes the considered range in bytes down to less insane 258 * sizes, thereby reducing the sparseness. The biggest randomization comes 259 * from fragmentation, i.e. FSPACE_COMPACT. 260 */ 261 #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) 262 /* 263 * Allow for small gaps in the overflow areas. 264 * Gap size is in bytes and does not have to be a multiple of page-size. 265 */ 266 #define FSPACE_BIASGAP ((vaddr_t)32 * 1024) 267 268 /* auto-allocate address lower bound */ 269 #define VMMAP_MIN_ADDR PAGE_SIZE 270 271 272 #ifdef DEADBEEF0 273 #define UVMMAP_DEADBEEF ((void*)DEADBEEF0) 274 #else 275 #define UVMMAP_DEADBEEF ((void*)0xdeadd0d0) 276 #endif 277 278 #ifdef DEBUG 279 int uvm_map_printlocks = 0; 280 281 #define LPRINTF(_args) \ 282 do { \ 283 if (uvm_map_printlocks) \ 284 printf _args; \ 285 } while (0) 286 #else 287 #define LPRINTF(_args) do {} while (0) 288 #endif 289 290 static struct timeval uvm_kmapent_last_warn_time; 291 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 292 293 const char vmmapbsy[] = "vmmapbsy"; 294 295 /* 296 * pool for vmspace structures. 297 */ 298 struct pool uvm_vmspace_pool; 299 300 /* 301 * pool for dynamically-allocated map entries. 302 */ 303 struct pool uvm_map_entry_pool; 304 struct pool uvm_map_entry_kmem_pool; 305 306 /* 307 * This global represents the end of the kernel virtual address 308 * space. If we want to exceed this, we must grow the kernel 309 * virtual address space dynamically. 310 * 311 * Note, this variable is locked by kernel_map's lock. 312 */ 313 vaddr_t uvm_maxkaddr; 314 315 /* 316 * Locking predicate. 317 */ 318 #define UVM_MAP_REQ_WRITE(_map) \ 319 do { \ 320 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 321 rw_assert_wrlock(&(_map)->lock); \ 322 } while (0) 323 324 /* 325 * Tree describing entries by address. 326 * 327 * Addresses are unique. 328 * Entries with start == end may only exist if they are the first entry 329 * (sorted by address) within a free-memory tree. 330 */ 331 332 static __inline int 333 uvm_mapentry_addrcmp(struct vm_map_entry *e1, struct vm_map_entry *e2) 334 { 335 return e1->start < e2->start ? -1 : e1->start > e2->start; 336 } 337 338 /* 339 * Tree describing free memory. 340 * 341 * Free memory is indexed (so we can use array semantics in O(log N). 342 * Free memory is ordered by size (so we can reduce fragmentation). 343 * 344 * The address range in the tree can be limited, having part of the 345 * free memory not in the free-memory tree. Only free memory in the 346 * tree will be considered during 'any address' allocations. 347 */ 348 349 static __inline int 350 uvm_mapentry_freecmp(struct vm_map_entry *e1, struct vm_map_entry *e2) 351 { 352 int cmp = e1->fspace < e2->fspace ? -1 : e1->fspace > e2->fspace; 353 return cmp ? cmp : uvm_mapentry_addrcmp(e1, e2); 354 } 355 356 /* 357 * Copy mapentry. 358 */ 359 static __inline void 360 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 361 { 362 caddr_t csrc, cdst; 363 size_t sz; 364 365 csrc = (caddr_t)src; 366 cdst = (caddr_t)dst; 367 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 368 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 369 370 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 371 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 372 memcpy(cdst, csrc, sz); 373 } 374 375 /* 376 * Handle free-list insertion. 377 */ 378 void 379 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 380 struct vm_map_entry *entry) 381 { 382 const struct uvm_addr_functions *fun; 383 #ifdef VMMAP_DEBUG 384 vaddr_t min, max, bound; 385 #endif 386 387 #ifdef VMMAP_DEBUG 388 /* 389 * Boundary check. 390 * Boundaries are folded if they go on the same free list. 391 */ 392 min = VMMAP_FREE_START(entry); 393 max = VMMAP_FREE_END(entry); 394 395 while (min < max) { 396 bound = uvm_map_boundary(map, min, max); 397 KASSERT(uvm_map_uaddr(map, min) == uaddr); 398 min = bound; 399 } 400 #endif 401 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 402 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 403 404 UVM_MAP_REQ_WRITE(map); 405 406 /* Actual insert: forward to uaddr pointer. */ 407 if (uaddr != NULL) { 408 fun = uaddr->uaddr_functions; 409 KDASSERT(fun != NULL); 410 if (fun->uaddr_free_insert != NULL) 411 (*fun->uaddr_free_insert)(map, uaddr, entry); 412 entry->etype |= UVM_ET_FREEMAPPED; 413 } 414 415 /* Update fspace augmentation. */ 416 uvm_map_addr_augment(entry); 417 } 418 419 /* 420 * Handle free-list removal. 421 */ 422 void 423 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 424 struct vm_map_entry *entry) 425 { 426 const struct uvm_addr_functions *fun; 427 428 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 429 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 430 UVM_MAP_REQ_WRITE(map); 431 432 if (uaddr != NULL) { 433 fun = uaddr->uaddr_functions; 434 if (fun->uaddr_free_remove != NULL) 435 (*fun->uaddr_free_remove)(map, uaddr, entry); 436 entry->etype &= ~UVM_ET_FREEMAPPED; 437 } 438 } 439 440 /* 441 * Handle address tree insertion. 442 */ 443 void 444 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 445 { 446 struct vm_map_entry *res; 447 448 if (RB_LEFT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF || 449 RB_RIGHT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF || 450 RB_PARENT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF) 451 panic("uvm_mapent_addr_insert: entry still in addr list"); 452 KDASSERT(entry->start <= entry->end); 453 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 454 (entry->end & (vaddr_t)PAGE_MASK) == 0); 455 456 UVM_MAP_REQ_WRITE(map); 457 res = RB_INSERT(uvm_map_addr, &map->addr, entry); 458 if (res != NULL) { 459 panic("uvm_mapent_addr_insert: map %p entry %p " 460 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 461 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 462 map, entry, 463 entry->start, entry->end, entry->guard, entry->fspace, 464 res, res->start, res->end, res->guard, res->fspace); 465 } 466 } 467 468 /* 469 * Handle address tree removal. 470 */ 471 void 472 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 473 { 474 struct vm_map_entry *res; 475 476 UVM_MAP_REQ_WRITE(map); 477 res = RB_REMOVE(uvm_map_addr, &map->addr, entry); 478 if (res != entry) 479 panic("uvm_mapent_addr_remove"); 480 RB_LEFT(entry, daddrs.addr_entry) = RB_RIGHT(entry, daddrs.addr_entry) = 481 RB_PARENT(entry, daddrs.addr_entry) = UVMMAP_DEADBEEF; 482 } 483 484 /* 485 * uvm_map_reference: add reference to a map 486 * 487 * XXX check map reference counter lock 488 */ 489 #define uvm_map_reference(_map) \ 490 do { \ 491 map->ref_count++; \ 492 } while (0) 493 494 /* 495 * Calculate the dused delta. 496 */ 497 vsize_t 498 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 499 { 500 struct vmspace *vm; 501 vsize_t sz; 502 vaddr_t lmax; 503 vaddr_t stack_begin, stack_end; /* Position of stack. */ 504 505 KASSERT(map->flags & VM_MAP_ISVMSPACE); 506 vm = (struct vmspace *)map; 507 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 508 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 509 510 sz = 0; 511 while (min != max) { 512 lmax = max; 513 if (min < stack_begin && lmax > stack_begin) 514 lmax = stack_begin; 515 else if (min < stack_end && lmax > stack_end) 516 lmax = stack_end; 517 518 if (min >= stack_begin && min < stack_end) { 519 /* nothing */ 520 } else 521 sz += lmax - min; 522 min = lmax; 523 } 524 525 return sz >> PAGE_SHIFT; 526 } 527 528 /* 529 * Find the entry describing the given address. 530 */ 531 struct vm_map_entry* 532 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 533 { 534 struct vm_map_entry *iter; 535 536 iter = RB_ROOT(atree); 537 while (iter != NULL) { 538 if (iter->start > addr) 539 iter = RB_LEFT(iter, daddrs.addr_entry); 540 else if (VMMAP_FREE_END(iter) <= addr) 541 iter = RB_RIGHT(iter, daddrs.addr_entry); 542 else 543 return iter; 544 } 545 return NULL; 546 } 547 548 /* 549 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 550 * 551 * Push dead entries into a linked list. 552 * Since the linked list abuses the address tree for storage, the entry 553 * may not be linked in a map. 554 * 555 * *head must be initialized to NULL before the first call to this macro. 556 * uvm_unmap_detach(*head, 0) will remove dead entries. 557 */ 558 static __inline void 559 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 560 { 561 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 562 } 563 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 564 dead_entry_push((_headptr), (_entry)) 565 566 /* 567 * Helper function for uvm_map_findspace_tree. 568 * 569 * Given allocation constraints and pmap constraints, finds the 570 * lowest and highest address in a range that can be used for the 571 * allocation. 572 * 573 * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. 574 * 575 * 576 * Big chunk of math with a seasoning of dragons. 577 */ 578 int 579 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, 580 struct vm_map_entry *sel, vaddr_t align, 581 vaddr_t pmap_align, vaddr_t pmap_off, int bias) 582 { 583 vaddr_t sel_min, sel_max; 584 #ifdef PMAP_PREFER 585 vaddr_t pmap_min, pmap_max; 586 #endif /* PMAP_PREFER */ 587 #ifdef DIAGNOSTIC 588 int bad; 589 #endif /* DIAGNOSTIC */ 590 591 sel_min = VMMAP_FREE_START(sel); 592 sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); 593 594 #ifdef PMAP_PREFER 595 596 /* 597 * There are two special cases, in which we can satisfy the align 598 * requirement and the pmap_prefer requirement. 599 * - when pmap_off == 0, we always select the largest of the two 600 * - when pmap_off % align == 0 and pmap_align > align, we simply 601 * satisfy the pmap_align requirement and automatically 602 * satisfy the align requirement. 603 */ 604 if (align > PAGE_SIZE && 605 !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { 606 /* 607 * Simple case: only use align. 608 */ 609 sel_min = roundup(sel_min, align); 610 sel_max &= ~(align - 1); 611 612 if (sel_min > sel_max) 613 return ENOMEM; 614 615 /* Correct for bias. */ 616 if (sel_max - sel_min > FSPACE_BIASGAP) { 617 if (bias > 0) { 618 sel_min = sel_max - FSPACE_BIASGAP; 619 sel_min = roundup(sel_min, align); 620 } else if (bias < 0) { 621 sel_max = sel_min + FSPACE_BIASGAP; 622 sel_max &= ~(align - 1); 623 } 624 } 625 } else if (pmap_align != 0) { 626 /* 627 * Special case: satisfy both pmap_prefer and 628 * align argument. 629 */ 630 pmap_max = sel_max & ~(pmap_align - 1); 631 pmap_min = sel_min; 632 if (pmap_max < sel_min) 633 return ENOMEM; 634 635 /* Adjust pmap_min for BIASGAP for top-addr bias. */ 636 if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) 637 pmap_min = pmap_max - FSPACE_BIASGAP; 638 /* Align pmap_min. */ 639 pmap_min &= ~(pmap_align - 1); 640 if (pmap_min < sel_min) 641 pmap_min += pmap_align; 642 if (pmap_min > pmap_max) 643 return ENOMEM; 644 645 /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ 646 if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { 647 pmap_max = (pmap_min + FSPACE_BIASGAP) & 648 ~(pmap_align - 1); 649 } 650 if (pmap_min > pmap_max) 651 return ENOMEM; 652 653 /* Apply pmap prefer offset. */ 654 pmap_max |= pmap_off; 655 if (pmap_max > sel_max) 656 pmap_max -= pmap_align; 657 pmap_min |= pmap_off; 658 if (pmap_min < sel_min) 659 pmap_min += pmap_align; 660 661 /* 662 * Fixup: it's possible that pmap_min and pmap_max 663 * cross eachother. In this case, try to find one 664 * address that is allowed. 665 * (This usually happens in biased case.) 666 */ 667 if (pmap_min > pmap_max) { 668 if (pmap_min < sel_max) 669 pmap_max = pmap_min; 670 else if (pmap_max > sel_min) 671 pmap_min = pmap_max; 672 else 673 return ENOMEM; 674 } 675 676 /* Internal validation. */ 677 KDASSERT(pmap_min <= pmap_max); 678 679 sel_min = pmap_min; 680 sel_max = pmap_max; 681 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 682 sel_min = sel_max - FSPACE_BIASGAP; 683 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 684 sel_max = sel_min + FSPACE_BIASGAP; 685 686 #else 687 688 if (align > PAGE_SIZE) { 689 sel_min = roundup(sel_min, align); 690 sel_max &= ~(align - 1); 691 if (sel_min > sel_max) 692 return ENOMEM; 693 694 if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { 695 if (bias > 0) { 696 sel_min = roundup(sel_max - FSPACE_BIASGAP, 697 align); 698 } else { 699 sel_max = (sel_min + FSPACE_BIASGAP) & 700 ~(align - 1); 701 } 702 } 703 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 704 sel_min = sel_max - FSPACE_BIASGAP; 705 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 706 sel_max = sel_min + FSPACE_BIASGAP; 707 708 #endif 709 710 if (sel_min > sel_max) 711 return ENOMEM; 712 713 #ifdef DIAGNOSTIC 714 bad = 0; 715 /* Lower boundary check. */ 716 if (sel_min < VMMAP_FREE_START(sel)) { 717 printf("sel_min: 0x%lx, but should be at least 0x%lx\n", 718 sel_min, VMMAP_FREE_START(sel)); 719 bad++; 720 } 721 /* Upper boundary check. */ 722 if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { 723 printf("sel_max: 0x%lx, but should be at most 0x%lx\n", 724 sel_max, 725 VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); 726 bad++; 727 } 728 /* Lower boundary alignment. */ 729 if (align != 0 && (sel_min & (align - 1)) != 0) { 730 printf("sel_min: 0x%lx, not aligned to 0x%lx\n", 731 sel_min, align); 732 bad++; 733 } 734 /* Upper boundary alignment. */ 735 if (align != 0 && (sel_max & (align - 1)) != 0) { 736 printf("sel_max: 0x%lx, not aligned to 0x%lx\n", 737 sel_max, align); 738 bad++; 739 } 740 /* Lower boundary PMAP_PREFER check. */ 741 if (pmap_align != 0 && align == 0 && 742 (sel_min & (pmap_align - 1)) != pmap_off) { 743 printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 744 sel_min, sel_min & (pmap_align - 1), pmap_off); 745 bad++; 746 } 747 /* Upper boundary PMAP_PREFER check. */ 748 if (pmap_align != 0 && align == 0 && 749 (sel_max & (pmap_align - 1)) != pmap_off) { 750 printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 751 sel_max, sel_max & (pmap_align - 1), pmap_off); 752 bad++; 753 } 754 755 if (bad) { 756 panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " 757 "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " 758 "bias = %d, " 759 "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", 760 sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, 761 bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel)); 762 } 763 #endif /* DIAGNOSTIC */ 764 765 *min = sel_min; 766 *max = sel_max; 767 return 0; 768 } 769 770 /* 771 * Test if memory starting at addr with sz bytes is free. 772 * 773 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 774 * the space. 775 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 776 */ 777 int 778 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 779 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 780 vaddr_t addr, vsize_t sz) 781 { 782 struct uvm_addr_state *free; 783 struct uvm_map_addr *atree; 784 struct vm_map_entry *i, *i_end; 785 786 /* 787 * Kernel memory above uvm_maxkaddr is considered unavailable. 788 */ 789 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 790 if (addr + sz > uvm_maxkaddr) 791 return 0; 792 } 793 794 atree = &map->addr; 795 796 /* 797 * Fill in first, last, so they point at the entries containing the 798 * first and last address of the range. 799 * Note that if they are not NULL, we don't perform the lookup. 800 */ 801 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 802 if (*start_ptr == NULL) { 803 *start_ptr = uvm_map_entrybyaddr(atree, addr); 804 if (*start_ptr == NULL) 805 return 0; 806 } else 807 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 808 if (*end_ptr == NULL) { 809 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 810 *end_ptr = *start_ptr; 811 else { 812 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 813 if (*end_ptr == NULL) 814 return 0; 815 } 816 } else 817 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 818 819 /* Validation. */ 820 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 821 KDASSERT((*start_ptr)->start <= addr && 822 VMMAP_FREE_END(*start_ptr) > addr && 823 (*end_ptr)->start < addr + sz && 824 VMMAP_FREE_END(*end_ptr) >= addr + sz); 825 826 /* 827 * Check the none of the entries intersects with <addr, addr+sz>. 828 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 829 * considered unavailable unless called by those allocators. 830 */ 831 i = *start_ptr; 832 i_end = RB_NEXT(uvm_map_addr, atree, *end_ptr); 833 for (; i != i_end; 834 i = RB_NEXT(uvm_map_addr, atree, i)) { 835 if (i->start != i->end && i->end > addr) 836 return 0; 837 838 /* 839 * uaddr_exe and uaddr_brk_stack may only be used 840 * by these allocators and the NULL uaddr (i.e. no 841 * uaddr). 842 * Reject if this requirement is not met. 843 */ 844 if (uaddr != NULL) { 845 free = uvm_map_uaddr_e(map, i); 846 847 if (uaddr != free && free != NULL && 848 (free == map->uaddr_exe || 849 free == map->uaddr_brk_stack)) 850 return 0; 851 } 852 } 853 854 return -1; 855 } 856 857 /* 858 * Invoke each address selector until an address is found. 859 * Will not invoke uaddr_exe. 860 */ 861 int 862 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 863 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 864 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 865 { 866 struct uvm_addr_state *uaddr; 867 int i; 868 869 /* 870 * Allocation for sz bytes at any address, 871 * using the addr selectors in order. 872 */ 873 for (i = 0; i < nitems(map->uaddr_any); i++) { 874 uaddr = map->uaddr_any[i]; 875 876 if (uvm_addr_invoke(map, uaddr, first, last, 877 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 878 return 0; 879 } 880 881 /* Fall back to brk() and stack() address selectors. */ 882 uaddr = map->uaddr_brk_stack; 883 if (uvm_addr_invoke(map, uaddr, first, last, 884 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 885 return 0; 886 887 return ENOMEM; 888 } 889 890 /* Calculate entry augmentation value. */ 891 vsize_t 892 uvm_map_addr_augment_get(struct vm_map_entry *entry) 893 { 894 vsize_t augment; 895 struct vm_map_entry *left, *right; 896 897 augment = entry->fspace; 898 if ((left = RB_LEFT(entry, daddrs.addr_entry)) != NULL) 899 augment = MAX(augment, left->fspace_augment); 900 if ((right = RB_RIGHT(entry, daddrs.addr_entry)) != NULL) 901 augment = MAX(augment, right->fspace_augment); 902 return augment; 903 } 904 905 /* 906 * Update augmentation data in entry. 907 */ 908 void 909 uvm_map_addr_augment(struct vm_map_entry *entry) 910 { 911 vsize_t augment; 912 913 while (entry != NULL) { 914 /* Calculate value for augmentation. */ 915 augment = uvm_map_addr_augment_get(entry); 916 917 /* 918 * Descend update. 919 * Once we find an entry that already has the correct value, 920 * stop, since it means all its parents will use the correct 921 * value too. 922 */ 923 if (entry->fspace_augment == augment) 924 return; 925 entry->fspace_augment = augment; 926 entry = RB_PARENT(entry, daddrs.addr_entry); 927 } 928 } 929 930 /* 931 * uvm_map: establish a valid mapping in map 932 * 933 * => *addr and sz must be a multiple of PAGE_SIZE. 934 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 935 * => map must be unlocked. 936 * => <uobj,uoffset> value meanings (4 cases): 937 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 938 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 939 * [3] <uobj,uoffset> == normal mapping 940 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 941 * 942 * case [4] is for kernel mappings where we don't know the offset until 943 * we've found a virtual address. note that kernel object offsets are 944 * always relative to vm_map_min(kernel_map). 945 * 946 * => align: align vaddr, must be a power-of-2. 947 * Align is only a hint and will be ignored if the alignment fails. 948 */ 949 int 950 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 951 struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags) 952 { 953 struct vm_map_entry *first, *last, *entry, *new; 954 struct uvm_map_deadq dead; 955 vm_prot_t prot; 956 vm_prot_t maxprot; 957 vm_inherit_t inherit; 958 int advice; 959 int error; 960 vaddr_t pmap_align, pmap_offset; 961 vaddr_t hint; 962 963 if ((map->flags & VM_MAP_INTRSAFE) == 0) 964 splassert(IPL_NONE); 965 else 966 splassert(IPL_VM); 967 968 /* 969 * We use pmap_align and pmap_offset as alignment and offset variables. 970 * 971 * Because the align parameter takes precedence over pmap prefer, 972 * the pmap_align will need to be set to align, with pmap_offset = 0, 973 * if pmap_prefer will not align. 974 */ 975 if (uoffset == UVM_UNKNOWN_OFFSET) { 976 pmap_align = MAX(align, PAGE_SIZE); 977 pmap_offset = 0; 978 } else { 979 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 980 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 981 982 if (align == 0 || 983 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 984 /* pmap_offset satisfies align, no change. */ 985 } else { 986 /* Align takes precedence over pmap prefer. */ 987 pmap_align = align; 988 pmap_offset = 0; 989 } 990 } 991 992 /* Decode parameters. */ 993 prot = UVM_PROTECTION(flags); 994 maxprot = UVM_MAXPROTECTION(flags); 995 advice = UVM_ADVICE(flags); 996 inherit = UVM_INHERIT(flags); 997 error = 0; 998 hint = trunc_page(*addr); 999 TAILQ_INIT(&dead); 1000 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1001 KASSERT((align & (align - 1)) == 0); 1002 1003 /* Holes are incompatible with other types of mappings. */ 1004 if (flags & UVM_FLAG_HOLE) { 1005 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 1006 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 1007 } 1008 1009 /* Unset hint for kernel_map non-fixed allocations. */ 1010 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1011 hint = 0; 1012 1013 /* Check protection. */ 1014 if ((prot & maxprot) != prot) 1015 return EACCES; 1016 1017 /* 1018 * Before grabbing the lock, allocate a map entry for later 1019 * use to ensure we don't wait for memory while holding the 1020 * vm_map_lock. 1021 */ 1022 new = uvm_mapent_alloc(map, flags); 1023 if (new == NULL) 1024 return(ENOMEM); 1025 1026 if (flags & UVM_FLAG_TRYLOCK) { 1027 if (vm_map_lock_try(map) == FALSE) { 1028 error = EFAULT; 1029 goto out; 1030 } 1031 } else 1032 vm_map_lock(map); 1033 1034 first = last = NULL; 1035 if (flags & UVM_FLAG_FIXED) { 1036 /* 1037 * Fixed location. 1038 * 1039 * Note: we ignore align, pmap_prefer. 1040 * Fill in first, last and *addr. 1041 */ 1042 KASSERT((*addr & PAGE_MASK) == 0); 1043 1044 /* 1045 * Grow pmap to include allocated address. 1046 * If the growth fails, the allocation will fail too. 1047 */ 1048 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1049 uvm_maxkaddr < (*addr + sz)) { 1050 uvm_map_kmem_grow(map, &dead, 1051 *addr + sz - uvm_maxkaddr, flags); 1052 } 1053 1054 /* Check that the space is available. */ 1055 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1056 error = ENOMEM; 1057 goto unlock; 1058 } 1059 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1060 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1061 (align == 0 || (*addr & (align - 1)) == 0) && 1062 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1063 /* 1064 * Address used as hint. 1065 * 1066 * Note: we enforce the alignment restriction, 1067 * but ignore pmap_prefer. 1068 */ 1069 } else if ((maxprot & VM_PROT_EXECUTE) != 0 && 1070 map->uaddr_exe != NULL) { 1071 /* Run selection algorithm for executables. */ 1072 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1073 addr, sz, pmap_align, pmap_offset, prot, hint); 1074 1075 /* Grow kernel memory and try again. */ 1076 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1077 uvm_map_kmem_grow(map, &dead, sz, flags); 1078 1079 error = uvm_addr_invoke(map, map->uaddr_exe, 1080 &first, &last, addr, sz, 1081 pmap_align, pmap_offset, prot, hint); 1082 } 1083 1084 if (error != 0) 1085 goto unlock; 1086 } else { 1087 /* Update freelists from vmspace. */ 1088 if (map->flags & VM_MAP_ISVMSPACE) 1089 uvm_map_vmspace_update(map, &dead, flags); 1090 1091 error = uvm_map_findspace(map, &first, &last, addr, sz, 1092 pmap_align, pmap_offset, prot, hint); 1093 1094 /* Grow kernel memory and try again. */ 1095 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1096 uvm_map_kmem_grow(map, &dead, sz, flags); 1097 1098 error = uvm_map_findspace(map, &first, &last, addr, sz, 1099 pmap_align, pmap_offset, prot, hint); 1100 } 1101 1102 if (error != 0) 1103 goto unlock; 1104 } 1105 1106 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1107 uvm_maxkaddr >= *addr + sz); 1108 1109 /* If we only want a query, return now. */ 1110 if (flags & UVM_FLAG_QUERY) { 1111 error = 0; 1112 goto unlock; 1113 } 1114 1115 if (uobj == NULL) 1116 uoffset = 0; 1117 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1118 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1119 uoffset = *addr - vm_map_min(kernel_map); 1120 } 1121 1122 /* 1123 * Create new entry. 1124 * first and last may be invalidated after this call. 1125 */ 1126 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1127 new); 1128 if (entry == NULL) { 1129 error = ENOMEM; 1130 goto unlock; 1131 } 1132 new = NULL; 1133 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1134 entry->object.uvm_obj = uobj; 1135 entry->offset = uoffset; 1136 entry->protection = prot; 1137 entry->max_protection = maxprot; 1138 entry->inheritance = inherit; 1139 entry->wired_count = 0; 1140 entry->advice = advice; 1141 if (uobj) 1142 entry->etype |= UVM_ET_OBJ; 1143 else if (flags & UVM_FLAG_HOLE) 1144 entry->etype |= UVM_ET_HOLE; 1145 if (flags & UVM_FLAG_COPYONW) { 1146 entry->etype |= UVM_ET_COPYONWRITE; 1147 if ((flags & UVM_FLAG_OVERLAY) == 0) 1148 entry->etype |= UVM_ET_NEEDSCOPY; 1149 } 1150 if (flags & UVM_FLAG_OVERLAY) { 1151 entry->aref.ar_pageoff = 0; 1152 entry->aref.ar_amap = amap_alloc(sz, 1153 ptoa(flags & UVM_FLAG_AMAPPAD ? UVM_AMAP_CHUNK : 0), 1154 M_WAITOK); 1155 } 1156 1157 /* Update map and process statistics. */ 1158 if (!(flags & UVM_FLAG_HOLE)) { 1159 map->size += sz; 1160 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) { 1161 ((struct vmspace *)map)->vm_dused += 1162 uvmspace_dused(map, *addr, *addr + sz); 1163 } 1164 } 1165 1166 /* 1167 * Try to merge entry. 1168 * 1169 * Userland allocations are kept separated most of the time. 1170 * Forego the effort of merging what most of the time can't be merged 1171 * and only try the merge if it concerns a kernel entry. 1172 */ 1173 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1174 (map->flags & VM_MAP_ISVMSPACE) == 0) 1175 uvm_mapent_tryjoin(map, entry, &dead); 1176 1177 unlock: 1178 vm_map_unlock(map); 1179 1180 /* 1181 * Remove dead entries. 1182 * 1183 * Dead entries may be the result of merging. 1184 * uvm_map_mkentry may also create dead entries, when it attempts to 1185 * destroy free-space entries. 1186 */ 1187 uvm_unmap_detach(&dead, 0); 1188 out: 1189 if (new) 1190 uvm_mapent_free(new); 1191 return error; 1192 } 1193 1194 /* 1195 * True iff e1 and e2 can be joined together. 1196 */ 1197 int 1198 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1199 struct vm_map_entry *e2) 1200 { 1201 KDASSERT(e1 != NULL && e2 != NULL); 1202 1203 /* Must be the same entry type and not have free memory between. */ 1204 if (e1->etype != e2->etype || e1->end != e2->start) 1205 return 0; 1206 1207 /* Submaps are never joined. */ 1208 if (UVM_ET_ISSUBMAP(e1)) 1209 return 0; 1210 1211 /* Never merge wired memory. */ 1212 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1213 return 0; 1214 1215 /* Protection, inheritance and advice must be equal. */ 1216 if (e1->protection != e2->protection || 1217 e1->max_protection != e2->max_protection || 1218 e1->inheritance != e2->inheritance || 1219 e1->advice != e2->advice) 1220 return 0; 1221 1222 /* If uvm_object: object itself and offsets within object must match. */ 1223 if (UVM_ET_ISOBJ(e1)) { 1224 if (e1->object.uvm_obj != e2->object.uvm_obj) 1225 return 0; 1226 if (e1->offset + (e1->end - e1->start) != e2->offset) 1227 return 0; 1228 } 1229 1230 /* 1231 * Cannot join shared amaps. 1232 * Note: no need to lock amap to look at refs, since we don't care 1233 * about its exact value. 1234 * If it is 1 (i.e. we have the only reference) it will stay there. 1235 */ 1236 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1237 return 0; 1238 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1239 return 0; 1240 1241 /* Apprently, e1 and e2 match. */ 1242 return 1; 1243 } 1244 1245 /* 1246 * Join support function. 1247 * 1248 * Returns the merged entry on succes. 1249 * Returns NULL if the merge failed. 1250 */ 1251 struct vm_map_entry* 1252 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1253 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1254 { 1255 struct uvm_addr_state *free; 1256 1257 /* 1258 * Amap of e1 must be extended to include e2. 1259 * e2 contains no real information in its amap, 1260 * so it can be erased immediately. 1261 */ 1262 if (e1->aref.ar_amap) { 1263 if (amap_extend(e1, e2->end - e2->start)) 1264 return NULL; 1265 } 1266 1267 /* 1268 * Don't drop obj reference: 1269 * uvm_unmap_detach will do this for us. 1270 */ 1271 free = uvm_map_uaddr_e(map, e1); 1272 uvm_mapent_free_remove(map, free, e1); 1273 1274 free = uvm_map_uaddr_e(map, e2); 1275 uvm_mapent_free_remove(map, free, e2); 1276 uvm_mapent_addr_remove(map, e2); 1277 e1->end = e2->end; 1278 e1->guard = e2->guard; 1279 e1->fspace = e2->fspace; 1280 uvm_mapent_free_insert(map, free, e1); 1281 1282 DEAD_ENTRY_PUSH(dead, e2); 1283 return e1; 1284 } 1285 1286 /* 1287 * Attempt forward and backward joining of entry. 1288 * 1289 * Returns entry after joins. 1290 * We are guaranteed that the amap of entry is either non-existant or 1291 * has never been used. 1292 */ 1293 struct vm_map_entry* 1294 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1295 struct uvm_map_deadq *dead) 1296 { 1297 struct vm_map_entry *other; 1298 struct vm_map_entry *merged; 1299 1300 /* Merge with previous entry. */ 1301 other = RB_PREV(uvm_map_addr, &map->addr, entry); 1302 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1303 merged = uvm_mapent_merge(map, other, entry, dead); 1304 if (merged) 1305 entry = merged; 1306 } 1307 1308 /* 1309 * Merge with next entry. 1310 * 1311 * Because amap can only extend forward and the next entry 1312 * probably contains sensible info, only perform forward merging 1313 * in the absence of an amap. 1314 */ 1315 other = RB_NEXT(uvm_map_addr, &map->addr, entry); 1316 if (other && entry->aref.ar_amap == NULL && 1317 other->aref.ar_amap == NULL && 1318 uvm_mapent_isjoinable(map, entry, other)) { 1319 merged = uvm_mapent_merge(map, entry, other, dead); 1320 if (merged) 1321 entry = merged; 1322 } 1323 1324 return entry; 1325 } 1326 1327 /* 1328 * Kill entries that are no longer in a map. 1329 */ 1330 void 1331 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1332 { 1333 struct vm_map_entry *entry; 1334 int waitok; 1335 1336 waitok = flags & UVM_PLA_WAITOK; 1337 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1338 if (waitok) 1339 uvm_pause(); 1340 /* Drop reference to amap, if we've got one. */ 1341 if (entry->aref.ar_amap) 1342 amap_unref(entry->aref.ar_amap, 1343 entry->aref.ar_pageoff, 1344 atop(entry->end - entry->start), 1345 flags & AMAP_REFALL); 1346 1347 /* Drop reference to our backing object, if we've got one. */ 1348 if (UVM_ET_ISSUBMAP(entry)) { 1349 /* ... unlikely to happen, but play it safe */ 1350 uvm_map_deallocate(entry->object.sub_map); 1351 } else if (UVM_ET_ISOBJ(entry) && 1352 entry->object.uvm_obj->pgops->pgo_detach) { 1353 entry->object.uvm_obj->pgops->pgo_detach( 1354 entry->object.uvm_obj); 1355 } 1356 1357 /* Step to next. */ 1358 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1359 uvm_mapent_free(entry); 1360 } 1361 } 1362 1363 /* 1364 * Create and insert new entry. 1365 * 1366 * Returned entry contains new addresses and is inserted properly in the tree. 1367 * first and last are (probably) no longer valid. 1368 */ 1369 struct vm_map_entry* 1370 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1371 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1372 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1373 { 1374 struct vm_map_entry *entry, *prev; 1375 struct uvm_addr_state *free; 1376 vaddr_t min, max; /* free space boundaries for new entry */ 1377 1378 KDASSERT(map != NULL); 1379 KDASSERT(first != NULL); 1380 KDASSERT(last != NULL); 1381 KDASSERT(dead != NULL); 1382 KDASSERT(sz > 0); 1383 KDASSERT(addr + sz > addr); 1384 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1385 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1386 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1387 uvm_tree_sanity(map, __FILE__, __LINE__); 1388 1389 min = addr + sz; 1390 max = VMMAP_FREE_END(last); 1391 1392 /* Initialize new entry. */ 1393 if (new == NULL) 1394 entry = uvm_mapent_alloc(map, flags); 1395 else 1396 entry = new; 1397 if (entry == NULL) 1398 return NULL; 1399 entry->offset = 0; 1400 entry->etype = 0; 1401 entry->wired_count = 0; 1402 entry->aref.ar_pageoff = 0; 1403 entry->aref.ar_amap = NULL; 1404 1405 entry->start = addr; 1406 entry->end = min; 1407 entry->guard = 0; 1408 entry->fspace = 0; 1409 1410 /* Reset free space in first. */ 1411 free = uvm_map_uaddr_e(map, first); 1412 uvm_mapent_free_remove(map, free, first); 1413 first->guard = 0; 1414 first->fspace = 0; 1415 1416 /* 1417 * Remove all entries that are fully replaced. 1418 * We are iterating using last in reverse order. 1419 */ 1420 for (; first != last; last = prev) { 1421 prev = RB_PREV(uvm_map_addr, &map->addr, last); 1422 1423 KDASSERT(last->start == last->end); 1424 free = uvm_map_uaddr_e(map, last); 1425 uvm_mapent_free_remove(map, free, last); 1426 uvm_mapent_addr_remove(map, last); 1427 DEAD_ENTRY_PUSH(dead, last); 1428 } 1429 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1430 if (first->start == addr) { 1431 uvm_mapent_addr_remove(map, first); 1432 DEAD_ENTRY_PUSH(dead, first); 1433 } else { 1434 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1435 addr, flags); 1436 } 1437 1438 /* Finally, link in entry. */ 1439 uvm_mapent_addr_insert(map, entry); 1440 uvm_map_fix_space(map, entry, min, max, flags); 1441 1442 uvm_tree_sanity(map, __FILE__, __LINE__); 1443 return entry; 1444 } 1445 1446 /* 1447 * uvm_mapent_alloc: allocate a map entry 1448 */ 1449 struct vm_map_entry * 1450 uvm_mapent_alloc(struct vm_map *map, int flags) 1451 { 1452 struct vm_map_entry *me, *ne; 1453 int s, i; 1454 int pool_flags; 1455 1456 pool_flags = PR_WAITOK; 1457 if (flags & UVM_FLAG_TRYLOCK) 1458 pool_flags = PR_NOWAIT; 1459 1460 if (map->flags & VM_MAP_INTRSAFE || cold) { 1461 s = splvm(); 1462 me = uvm.kentry_free; 1463 if (me == NULL) { 1464 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1465 &kd_nowait); 1466 if (ne == NULL) 1467 panic("uvm_mapent_alloc: cannot allocate map " 1468 "entry"); 1469 for (i = 0; 1470 i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1; 1471 i++) 1472 RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1]; 1473 RB_LEFT(&ne[i], daddrs.addr_entry) = NULL; 1474 me = ne; 1475 if (ratecheck(&uvm_kmapent_last_warn_time, 1476 &uvm_kmapent_warn_rate)) 1477 printf("uvm_mapent_alloc: out of static " 1478 "map entries\n"); 1479 } 1480 uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry); 1481 uvmexp.kmapent++; 1482 splx(s); 1483 me->flags = UVM_MAP_STATIC; 1484 } else if (map == kernel_map) { 1485 splassert(IPL_NONE); 1486 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1487 if (me == NULL) 1488 goto out; 1489 me->flags = UVM_MAP_KMEM; 1490 } else { 1491 splassert(IPL_NONE); 1492 me = pool_get(&uvm_map_entry_pool, pool_flags); 1493 if (me == NULL) 1494 goto out; 1495 me->flags = 0; 1496 } 1497 1498 if (me != NULL) { 1499 RB_LEFT(me, daddrs.addr_entry) = 1500 RB_RIGHT(me, daddrs.addr_entry) = 1501 RB_PARENT(me, daddrs.addr_entry) = UVMMAP_DEADBEEF; 1502 } 1503 1504 out: 1505 return(me); 1506 } 1507 1508 /* 1509 * uvm_mapent_free: free map entry 1510 * 1511 * => XXX: static pool for kernel map? 1512 */ 1513 void 1514 uvm_mapent_free(struct vm_map_entry *me) 1515 { 1516 int s; 1517 1518 if (me->flags & UVM_MAP_STATIC) { 1519 s = splvm(); 1520 RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free; 1521 uvm.kentry_free = me; 1522 uvmexp.kmapent--; 1523 splx(s); 1524 } else if (me->flags & UVM_MAP_KMEM) { 1525 splassert(IPL_NONE); 1526 pool_put(&uvm_map_entry_kmem_pool, me); 1527 } else { 1528 splassert(IPL_NONE); 1529 pool_put(&uvm_map_entry_pool, me); 1530 } 1531 } 1532 1533 /* 1534 * uvm_map_lookup_entry: find map entry at or before an address. 1535 * 1536 * => map must at least be read-locked by caller 1537 * => entry is returned in "entry" 1538 * => return value is true if address is in the returned entry 1539 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1540 * returned for those mappings. 1541 */ 1542 boolean_t 1543 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1544 struct vm_map_entry **entry) 1545 { 1546 *entry = uvm_map_entrybyaddr(&map->addr, address); 1547 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1548 (*entry)->start <= address && (*entry)->end > address; 1549 } 1550 1551 /* 1552 * uvm_map_pie: return a random load address for a PIE executable 1553 * properly aligned. 1554 */ 1555 #ifndef VM_PIE_MAX_ADDR 1556 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1557 #endif 1558 1559 #ifndef VM_PIE_MIN_ADDR 1560 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1561 #endif 1562 1563 #ifndef VM_PIE_MIN_ALIGN 1564 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1565 #endif 1566 1567 vaddr_t 1568 uvm_map_pie(vaddr_t align) 1569 { 1570 vaddr_t addr, space, min; 1571 1572 align = MAX(align, VM_PIE_MIN_ALIGN); 1573 1574 /* round up to next alignment */ 1575 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1576 1577 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 1578 return (align); 1579 1580 space = (VM_PIE_MAX_ADDR - min) / align; 1581 space = MIN(space, (u_int32_t)-1); 1582 1583 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 1584 addr += min; 1585 1586 return (addr); 1587 } 1588 1589 void 1590 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 1591 { 1592 struct uvm_map_deadq dead; 1593 1594 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 1595 (end & (vaddr_t)PAGE_MASK) == 0); 1596 TAILQ_INIT(&dead); 1597 vm_map_lock(map); 1598 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); 1599 vm_map_unlock(map); 1600 1601 uvm_unmap_detach(&dead, 0); 1602 } 1603 1604 /* 1605 * Mark entry as free. 1606 * 1607 * entry will be put on the dead list. 1608 * The free space will be merged into the previous or a new entry, 1609 * unless markfree is false. 1610 */ 1611 void 1612 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 1613 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 1614 boolean_t markfree) 1615 { 1616 struct uvm_addr_state *free; 1617 struct vm_map_entry *prev; 1618 vaddr_t addr; /* Start of freed range. */ 1619 vaddr_t end; /* End of freed range. */ 1620 1621 prev = *prev_ptr; 1622 if (prev == entry) 1623 *prev_ptr = prev = NULL; 1624 1625 if (prev == NULL || 1626 VMMAP_FREE_END(prev) != entry->start) 1627 prev = RB_PREV(uvm_map_addr, &map->addr, entry); 1628 1629 /* Entry is describing only free memory and has nothing to drain into. */ 1630 if (prev == NULL && entry->start == entry->end && markfree) { 1631 *prev_ptr = entry; 1632 return; 1633 } 1634 1635 addr = entry->start; 1636 end = VMMAP_FREE_END(entry); 1637 free = uvm_map_uaddr_e(map, entry); 1638 uvm_mapent_free_remove(map, free, entry); 1639 uvm_mapent_addr_remove(map, entry); 1640 DEAD_ENTRY_PUSH(dead, entry); 1641 1642 if (markfree) { 1643 if (prev) { 1644 free = uvm_map_uaddr_e(map, prev); 1645 uvm_mapent_free_remove(map, free, prev); 1646 } 1647 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 1648 } 1649 } 1650 1651 /* 1652 * Unwire and release referenced amap and object from map entry. 1653 */ 1654 void 1655 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 1656 { 1657 /* Unwire removed map entry. */ 1658 if (VM_MAPENT_ISWIRED(entry)) { 1659 entry->wired_count = 0; 1660 uvm_fault_unwire_locked(map, entry->start, entry->end); 1661 } 1662 1663 /* Entry-type specific code. */ 1664 if (UVM_ET_ISHOLE(entry)) { 1665 /* Nothing to be done for holes. */ 1666 } else if (map->flags & VM_MAP_INTRSAFE) { 1667 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1668 uvm_km_pgremove_intrsafe(entry->start, entry->end); 1669 pmap_kremove(entry->start, entry->end - entry->start); 1670 } else if (UVM_ET_ISOBJ(entry) && 1671 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 1672 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1673 /* 1674 * Note: kernel object mappings are currently used in 1675 * two ways: 1676 * [1] "normal" mappings of pages in the kernel object 1677 * [2] uvm_km_valloc'd allocations in which we 1678 * pmap_enter in some non-kernel-object page 1679 * (e.g. vmapbuf). 1680 * 1681 * for case [1], we need to remove the mapping from 1682 * the pmap and then remove the page from the kernel 1683 * object (because, once pages in a kernel object are 1684 * unmapped they are no longer needed, unlike, say, 1685 * a vnode where you might want the data to persist 1686 * until flushed out of a queue). 1687 * 1688 * for case [2], we need to remove the mapping from 1689 * the pmap. there shouldn't be any pages at the 1690 * specified offset in the kernel object [but it 1691 * doesn't hurt to call uvm_km_pgremove just to be 1692 * safe?] 1693 * 1694 * uvm_km_pgremove currently does the following: 1695 * for pages in the kernel object range: 1696 * - drops the swap slot 1697 * - uvm_pagefree the page 1698 * 1699 * note there is version of uvm_km_pgremove() that 1700 * is used for "intrsafe" objects. 1701 */ 1702 /* 1703 * remove mappings from pmap and drop the pages 1704 * from the object. offsets are always relative 1705 * to vm_map_min(kernel_map). 1706 */ 1707 pmap_remove(pmap_kernel(), entry->start, entry->end); 1708 uvm_km_pgremove(entry->object.uvm_obj, 1709 entry->start - vm_map_min(kernel_map), 1710 entry->end - vm_map_min(kernel_map)); 1711 1712 /* 1713 * null out kernel_object reference, we've just 1714 * dropped it 1715 */ 1716 entry->etype &= ~UVM_ET_OBJ; 1717 entry->object.uvm_obj = NULL; /* to be safe */ 1718 } else { 1719 /* remove mappings the standard way. */ 1720 pmap_remove(map->pmap, entry->start, entry->end); 1721 } 1722 } 1723 1724 /* 1725 * Remove all entries from start to end. 1726 * 1727 * If remove_holes, then remove ET_HOLE entries as well. 1728 * If markfree, entry will be properly marked free, otherwise, no replacement 1729 * entry will be put in the tree (corrupting the tree). 1730 */ 1731 void 1732 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 1733 struct uvm_map_deadq *dead, boolean_t remove_holes, 1734 boolean_t markfree) 1735 { 1736 struct vm_map_entry *prev_hint, *next, *entry; 1737 1738 start = MAX(start, map->min_offset); 1739 end = MIN(end, map->max_offset); 1740 if (start >= end) 1741 return; 1742 1743 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1744 splassert(IPL_NONE); 1745 else 1746 splassert(IPL_VM); 1747 1748 /* Find first affected entry. */ 1749 entry = uvm_map_entrybyaddr(&map->addr, start); 1750 KDASSERT(entry != NULL && entry->start <= start); 1751 if (entry->end <= start && markfree) 1752 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 1753 else 1754 UVM_MAP_CLIP_START(map, entry, start); 1755 1756 /* 1757 * Iterate entries until we reach end address. 1758 * prev_hint hints where the freed space can be appended to. 1759 */ 1760 prev_hint = NULL; 1761 for (; entry != NULL && entry->start < end; entry = next) { 1762 KDASSERT(entry->start >= start); 1763 if (entry->end > end || !markfree) 1764 UVM_MAP_CLIP_END(map, entry, end); 1765 KDASSERT(entry->start >= start && entry->end <= end); 1766 next = RB_NEXT(uvm_map_addr, &map->addr, entry); 1767 1768 /* Don't remove holes unless asked to do so. */ 1769 if (UVM_ET_ISHOLE(entry)) { 1770 if (!remove_holes) { 1771 prev_hint = entry; 1772 continue; 1773 } 1774 } 1775 1776 /* Kill entry. */ 1777 uvm_unmap_kill_entry(map, entry); 1778 1779 /* Update space usage. */ 1780 if ((map->flags & VM_MAP_ISVMSPACE) && 1781 entry->object.uvm_obj == NULL && 1782 !UVM_ET_ISHOLE(entry)) { 1783 ((struct vmspace *)map)->vm_dused -= 1784 uvmspace_dused(map, entry->start, entry->end); 1785 } 1786 if (!UVM_ET_ISHOLE(entry)) 1787 map->size -= entry->end - entry->start; 1788 1789 /* Actual removal of entry. */ 1790 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 1791 } 1792 1793 pmap_update(vm_map_pmap(map)); 1794 1795 #ifdef VMMAP_DEBUG 1796 if (markfree) { 1797 for (entry = uvm_map_entrybyaddr(&map->addr, start); 1798 entry != NULL && entry->start < end; 1799 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 1800 KDASSERT(entry->end <= start || 1801 entry->start == entry->end || 1802 UVM_ET_ISHOLE(entry)); 1803 } 1804 } else { 1805 vaddr_t a; 1806 for (a = start; a < end; a += PAGE_SIZE) 1807 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 1808 } 1809 #endif 1810 } 1811 1812 /* 1813 * Mark all entries from first until end (exclusive) as pageable. 1814 * 1815 * Lock must be exclusive on entry and will not be touched. 1816 */ 1817 void 1818 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 1819 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 1820 { 1821 struct vm_map_entry *iter; 1822 1823 for (iter = first; iter != end; 1824 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 1825 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 1826 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 1827 continue; 1828 1829 iter->wired_count = 0; 1830 uvm_fault_unwire_locked(map, iter->start, iter->end); 1831 } 1832 } 1833 1834 /* 1835 * Mark all entries from first until end (exclusive) as wired. 1836 * 1837 * Lockflags determines the lock state on return from this function. 1838 * Lock must be exclusive on entry. 1839 */ 1840 int 1841 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 1842 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 1843 int lockflags) 1844 { 1845 struct vm_map_entry *iter; 1846 #ifdef DIAGNOSTIC 1847 unsigned int timestamp_save; 1848 #endif 1849 int error; 1850 1851 /* 1852 * Wire pages in two passes: 1853 * 1854 * 1: holding the write lock, we create any anonymous maps that need 1855 * to be created. then we clip each map entry to the region to 1856 * be wired and increment its wiring count. 1857 * 1858 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 1859 * in the pages for any newly wired area (wired_count == 1). 1860 * 1861 * downgrading to a read lock for uvm_fault_wire avoids a possible 1862 * deadlock with another thread that may have faulted on one of 1863 * the pages to be wired (it would mark the page busy, blocking 1864 * us, then in turn block on the map lock that we hold). 1865 * because we keep the read lock on the map, the copy-on-write 1866 * status of the entries we modify here cannot change. 1867 */ 1868 for (iter = first; iter != end; 1869 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 1870 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 1871 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 1872 iter->protection == VM_PROT_NONE) 1873 continue; 1874 1875 /* 1876 * Perform actions of vm_map_lookup that need the write lock. 1877 * - create an anonymous map for copy-on-write 1878 * - anonymous map for zero-fill 1879 * Skip submaps. 1880 */ 1881 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 1882 UVM_ET_ISNEEDSCOPY(iter) && 1883 ((iter->protection & VM_PROT_WRITE) || 1884 iter->object.uvm_obj == NULL)) { 1885 amap_copy(map, iter, M_WAITOK, TRUE, 1886 iter->start, iter->end); 1887 } 1888 iter->wired_count++; 1889 } 1890 1891 /* 1892 * Pass 2. 1893 */ 1894 #ifdef DIAGNOSTIC 1895 timestamp_save = map->timestamp; 1896 #endif 1897 vm_map_busy(map); 1898 vm_map_downgrade(map); 1899 1900 error = 0; 1901 for (iter = first; error == 0 && iter != end; 1902 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 1903 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 1904 iter->protection == VM_PROT_NONE) 1905 continue; 1906 1907 error = uvm_fault_wire(map, iter->start, iter->end, 1908 iter->protection); 1909 } 1910 1911 if (error) { 1912 /* 1913 * uvm_fault_wire failure 1914 * 1915 * Reacquire lock and undo our work. 1916 */ 1917 vm_map_upgrade(map); 1918 vm_map_unbusy(map); 1919 #ifdef DIAGNOSTIC 1920 if (timestamp_save != map->timestamp) 1921 panic("uvm_map_pageable_wire: stale map"); 1922 #endif 1923 1924 /* 1925 * first is no longer needed to restart loops. 1926 * Use it as iterator to unmap successful mappings. 1927 */ 1928 for (; first != iter; 1929 first = RB_NEXT(uvm_map_addr, &map->addr, first)) { 1930 if (UVM_ET_ISHOLE(first) || 1931 first->start == first->end || 1932 first->protection == VM_PROT_NONE) 1933 continue; 1934 1935 first->wired_count--; 1936 if (!VM_MAPENT_ISWIRED(first)) { 1937 uvm_fault_unwire_locked(map, 1938 iter->start, iter->end); 1939 } 1940 } 1941 1942 /* decrease counter in the rest of the entries */ 1943 for (; iter != end; 1944 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 1945 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 1946 iter->protection == VM_PROT_NONE) 1947 continue; 1948 1949 iter->wired_count--; 1950 } 1951 1952 if ((lockflags & UVM_LK_EXIT) == 0) 1953 vm_map_unlock(map); 1954 return error; 1955 } 1956 1957 /* We are currently holding a read lock. */ 1958 if ((lockflags & UVM_LK_EXIT) == 0) { 1959 vm_map_unbusy(map); 1960 vm_map_unlock_read(map); 1961 } else { 1962 vm_map_upgrade(map); 1963 vm_map_unbusy(map); 1964 #ifdef DIAGNOSTIC 1965 if (timestamp_save != map->timestamp) 1966 panic("uvm_map_pageable_wire: stale map"); 1967 #endif 1968 } 1969 return 0; 1970 } 1971 1972 /* 1973 * uvm_map_pageable: set pageability of a range in a map. 1974 * 1975 * Flags: 1976 * UVM_LK_ENTER: map is already locked by caller 1977 * UVM_LK_EXIT: don't unlock map on exit 1978 * 1979 * The full range must be in use (entries may not have fspace != 0). 1980 * UVM_ET_HOLE counts as unmapped. 1981 */ 1982 int 1983 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 1984 boolean_t new_pageable, int lockflags) 1985 { 1986 struct vm_map_entry *first, *last, *tmp; 1987 int error; 1988 1989 start = trunc_page(start); 1990 end = round_page(end); 1991 1992 if (start > end) 1993 return EINVAL; 1994 if (start == end) 1995 return 0; /* nothing to do */ 1996 if (start < map->min_offset) 1997 return EFAULT; /* why? see first XXX below */ 1998 if (end > map->max_offset) 1999 return EINVAL; /* why? see second XXX below */ 2000 2001 KASSERT(map->flags & VM_MAP_PAGEABLE); 2002 if ((lockflags & UVM_LK_ENTER) == 0) 2003 vm_map_lock(map); 2004 2005 /* 2006 * Find first entry. 2007 * 2008 * Initial test on start is different, because of the different 2009 * error returned. Rest is tested further down. 2010 */ 2011 first = uvm_map_entrybyaddr(&map->addr, start); 2012 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2013 /* 2014 * XXX if the first address is not mapped, it is EFAULT? 2015 */ 2016 error = EFAULT; 2017 goto out; 2018 } 2019 2020 /* Check that the range has no holes. */ 2021 for (last = first; last != NULL && last->start < end; 2022 last = RB_NEXT(uvm_map_addr, &map->addr, last)) { 2023 if (UVM_ET_ISHOLE(last) || 2024 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2025 /* 2026 * XXX unmapped memory in range, why is it EINVAL 2027 * instead of EFAULT? 2028 */ 2029 error = EINVAL; 2030 goto out; 2031 } 2032 } 2033 2034 /* 2035 * Last ended at the first entry after the range. 2036 * Move back one step. 2037 * 2038 * Note that last may be NULL. 2039 */ 2040 if (last == NULL) { 2041 last = RB_MAX(uvm_map_addr, &map->addr); 2042 if (last->end < end) { 2043 error = EINVAL; 2044 goto out; 2045 } 2046 } else { 2047 KASSERT(last != first); 2048 last = RB_PREV(uvm_map_addr, &map->addr, last); 2049 } 2050 2051 /* Wire/unwire pages here. */ 2052 if (new_pageable) { 2053 /* 2054 * Mark pageable. 2055 * entries that are not wired are untouched. 2056 */ 2057 if (VM_MAPENT_ISWIRED(first)) 2058 UVM_MAP_CLIP_START(map, first, start); 2059 /* 2060 * Split last at end. 2061 * Make tmp be the first entry after what is to be touched. 2062 * If last is not wired, don't touch it. 2063 */ 2064 if (VM_MAPENT_ISWIRED(last)) { 2065 UVM_MAP_CLIP_END(map, last, end); 2066 tmp = RB_NEXT(uvm_map_addr, &map->addr, last); 2067 } else 2068 tmp = last; 2069 2070 uvm_map_pageable_pgon(map, first, tmp, start, end); 2071 error = 0; 2072 2073 out: 2074 if ((lockflags & UVM_LK_EXIT) == 0) 2075 vm_map_unlock(map); 2076 return error; 2077 } else { 2078 /* 2079 * Mark entries wired. 2080 * entries are always touched (because recovery needs this). 2081 */ 2082 if (!VM_MAPENT_ISWIRED(first)) 2083 UVM_MAP_CLIP_START(map, first, start); 2084 /* 2085 * Split last at end. 2086 * Make tmp be the first entry after what is to be touched. 2087 * If last is not wired, don't touch it. 2088 */ 2089 if (!VM_MAPENT_ISWIRED(last)) { 2090 UVM_MAP_CLIP_END(map, last, end); 2091 tmp = RB_NEXT(uvm_map_addr, &map->addr, last); 2092 } else 2093 tmp = last; 2094 2095 return uvm_map_pageable_wire(map, first, tmp, start, end, 2096 lockflags); 2097 } 2098 } 2099 2100 /* 2101 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2102 * all mapped regions. 2103 * 2104 * Map must not be locked. 2105 * If no flags are specified, all ragions are unwired. 2106 */ 2107 int 2108 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2109 { 2110 vsize_t size; 2111 struct vm_map_entry *iter; 2112 2113 KASSERT(map->flags & VM_MAP_PAGEABLE); 2114 vm_map_lock(map); 2115 2116 if (flags == 0) { 2117 uvm_map_pageable_pgon(map, RB_MIN(uvm_map_addr, &map->addr), 2118 NULL, map->min_offset, map->max_offset); 2119 2120 atomic_clearbits_int(&map->flags, VM_MAP_WIREFUTURE); 2121 vm_map_unlock(map); 2122 return 0; 2123 } 2124 2125 if (flags & MCL_FUTURE) 2126 atomic_setbits_int(&map->flags, VM_MAP_WIREFUTURE); 2127 if (!(flags & MCL_CURRENT)) { 2128 vm_map_unlock(map); 2129 return 0; 2130 } 2131 2132 /* 2133 * Count number of pages in all non-wired entries. 2134 * If the number exceeds the limit, abort. 2135 */ 2136 size = 0; 2137 RB_FOREACH(iter, uvm_map_addr, &map->addr) { 2138 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2139 continue; 2140 2141 size += iter->end - iter->start; 2142 } 2143 2144 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2145 vm_map_unlock(map); 2146 return ENOMEM; 2147 } 2148 2149 /* XXX non-pmap_wired_count case must be handled by caller */ 2150 #ifdef pmap_wired_count 2151 if (limit != 0 && 2152 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2153 vm_map_unlock(map); 2154 return ENOMEM; 2155 } 2156 #endif 2157 2158 /* 2159 * uvm_map_pageable_wire will release lcok 2160 */ 2161 return uvm_map_pageable_wire(map, RB_MIN(uvm_map_addr, &map->addr), 2162 NULL, map->min_offset, map->max_offset, 0); 2163 } 2164 2165 /* 2166 * Initialize map. 2167 * 2168 * Allocates sufficient entries to describe the free memory in the map. 2169 */ 2170 void 2171 uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags) 2172 { 2173 int i; 2174 2175 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2176 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2177 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2178 2179 /* 2180 * Update parameters. 2181 * 2182 * This code handles (vaddr_t)-1 and other page mask ending addresses 2183 * properly. 2184 * We lose the top page if the full virtual address space is used. 2185 */ 2186 if (max & (vaddr_t)PAGE_MASK) { 2187 max += 1; 2188 if (max == 0) /* overflow */ 2189 max -= PAGE_SIZE; 2190 } 2191 2192 RB_INIT(&map->addr); 2193 map->uaddr_exe = NULL; 2194 for (i = 0; i < nitems(map->uaddr_any); ++i) 2195 map->uaddr_any[i] = NULL; 2196 map->uaddr_brk_stack = NULL; 2197 2198 map->size = 0; 2199 map->ref_count = 1; 2200 map->min_offset = min; 2201 map->max_offset = max; 2202 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2203 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2204 map->flags = flags; 2205 map->timestamp = 0; 2206 rw_init(&map->lock, "vmmaplk"); 2207 2208 /* Configure the allocators. */ 2209 if (flags & VM_MAP_ISVMSPACE) 2210 uvm_map_setup_md(map); 2211 else 2212 map->uaddr_any[3] = &uaddr_kbootstrap; 2213 2214 /* 2215 * Fill map entries. 2216 * This requires a write-locked map (because of diagnostic assertions 2217 * in insert code). 2218 */ 2219 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 2220 if (rw_enter(&map->lock, RW_NOSLEEP|RW_WRITE) != 0) 2221 panic("uvm_map_setup: rw_enter failed on new map"); 2222 } 2223 uvm_map_setup_entries(map); 2224 uvm_tree_sanity(map, __FILE__, __LINE__); 2225 if ((map->flags & VM_MAP_INTRSAFE) == 0) 2226 rw_exit(&map->lock); 2227 } 2228 2229 /* 2230 * Destroy the map. 2231 * 2232 * This is the inverse operation to uvm_map_setup. 2233 */ 2234 void 2235 uvm_map_teardown(struct vm_map *map) 2236 { 2237 struct uvm_map_deadq dead_entries; 2238 int i, waitok = 0; 2239 struct vm_map_entry *entry, *tmp; 2240 #ifdef VMMAP_DEBUG 2241 size_t numq, numt; 2242 #endif 2243 2244 if ((map->flags & VM_MAP_INTRSAFE) == 0) 2245 waitok = 1; 2246 if (waitok) { 2247 if (rw_enter(&map->lock, RW_NOSLEEP | RW_WRITE) != 0) 2248 panic("uvm_map_teardown: rw_enter failed on free map"); 2249 } 2250 2251 /* Remove address selectors. */ 2252 uvm_addr_destroy(map->uaddr_exe); 2253 map->uaddr_exe = NULL; 2254 for (i = 0; i < nitems(map->uaddr_any); i++) { 2255 uvm_addr_destroy(map->uaddr_any[i]); 2256 map->uaddr_any[i] = NULL; 2257 } 2258 uvm_addr_destroy(map->uaddr_brk_stack); 2259 map->uaddr_brk_stack = NULL; 2260 2261 /* 2262 * Remove entries. 2263 * 2264 * The following is based on graph breadth-first search. 2265 * 2266 * In color terms: 2267 * - the dead_entries set contains all nodes that are reachable 2268 * (i.e. both the black and the grey nodes) 2269 * - any entry not in dead_entries is white 2270 * - any entry that appears in dead_entries before entry, 2271 * is black, the rest is grey. 2272 * The set [entry, end] is also referred to as the wavefront. 2273 * 2274 * Since the tree is always a fully connected graph, the breadth-first 2275 * search guarantees that each vmmap_entry is visited exactly once. 2276 * The vm_map is broken down in linear time. 2277 */ 2278 TAILQ_INIT(&dead_entries); 2279 if ((entry = RB_ROOT(&map->addr)) != NULL) 2280 DEAD_ENTRY_PUSH(&dead_entries, entry); 2281 while (entry != NULL) { 2282 if (waitok) 2283 uvm_pause(); 2284 uvm_unmap_kill_entry(map, entry); 2285 if ((tmp = RB_LEFT(entry, daddrs.addr_entry)) != NULL) 2286 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2287 if ((tmp = RB_RIGHT(entry, daddrs.addr_entry)) != NULL) 2288 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2289 /* Update wave-front. */ 2290 entry = TAILQ_NEXT(entry, dfree.deadq); 2291 } 2292 2293 if (waitok) 2294 rw_exit(&map->lock); 2295 2296 #ifdef VMMAP_DEBUG 2297 numt = numq = 0; 2298 RB_FOREACH(entry, uvm_map_addr, &map->addr) 2299 numt++; 2300 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2301 numq++; 2302 KASSERT(numt == numq); 2303 #endif 2304 uvm_unmap_detach(&dead_entries, waitok ? UVM_PLA_WAITOK : 0); 2305 pmap_destroy(map->pmap); 2306 map->pmap = NULL; 2307 } 2308 2309 /* 2310 * Populate map with free-memory entries. 2311 * 2312 * Map must be initialized and empty. 2313 */ 2314 void 2315 uvm_map_setup_entries(struct vm_map *map) 2316 { 2317 KDASSERT(RB_EMPTY(&map->addr)); 2318 2319 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2320 } 2321 2322 /* 2323 * Split entry at given address. 2324 * 2325 * orig: entry that is to be split. 2326 * next: a newly allocated map entry that is not linked. 2327 * split: address at which the split is done. 2328 */ 2329 void 2330 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2331 struct vm_map_entry *next, vaddr_t split) 2332 { 2333 struct uvm_addr_state *free, *free_before; 2334 vsize_t adj; 2335 2336 if ((split & PAGE_MASK) != 0) { 2337 panic("uvm_map_splitentry: split address 0x%lx " 2338 "not on page boundary!", split); 2339 } 2340 KDASSERT(map != NULL && orig != NULL && next != NULL); 2341 uvm_tree_sanity(map, __FILE__, __LINE__); 2342 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2343 2344 #ifdef VMMAP_DEBUG 2345 KDASSERT(RB_FIND(uvm_map_addr, &map->addr, orig) == orig); 2346 KDASSERT(RB_FIND(uvm_map_addr, &map->addr, next) != next); 2347 #endif /* VMMAP_DEBUG */ 2348 2349 /* 2350 * Free space will change, unlink from free space tree. 2351 */ 2352 free = uvm_map_uaddr_e(map, orig); 2353 uvm_mapent_free_remove(map, free, orig); 2354 2355 adj = split - orig->start; 2356 2357 uvm_mapent_copy(orig, next); 2358 if (split >= orig->end) { 2359 next->etype = 0; 2360 next->offset = 0; 2361 next->wired_count = 0; 2362 next->start = next->end = split; 2363 next->guard = 0; 2364 next->fspace = VMMAP_FREE_END(orig) - split; 2365 next->aref.ar_amap = NULL; 2366 next->aref.ar_pageoff = 0; 2367 orig->guard = MIN(orig->guard, split - orig->end); 2368 orig->fspace = split - VMMAP_FREE_START(orig); 2369 } else { 2370 orig->fspace = 0; 2371 orig->guard = 0; 2372 orig->end = next->start = split; 2373 2374 if (next->aref.ar_amap) 2375 amap_splitref(&orig->aref, &next->aref, adj); 2376 if (UVM_ET_ISSUBMAP(orig)) { 2377 uvm_map_reference(next->object.sub_map); 2378 next->offset += adj; 2379 } else if (UVM_ET_ISOBJ(orig)) { 2380 if (next->object.uvm_obj->pgops && 2381 next->object.uvm_obj->pgops->pgo_reference) { 2382 next->object.uvm_obj->pgops->pgo_reference( 2383 next->object.uvm_obj); 2384 } 2385 next->offset += adj; 2386 } 2387 } 2388 2389 /* 2390 * Link next into address tree. 2391 * Link orig and next into free-space tree. 2392 * 2393 * Don't insert 'next' into the addr tree until orig has been linked, 2394 * in case the free-list looks at adjecent entries in the addr tree 2395 * for its decisions. 2396 */ 2397 if (orig->fspace > 0) 2398 free_before = free; 2399 else 2400 free_before = uvm_map_uaddr_e(map, orig); 2401 uvm_mapent_free_insert(map, free_before, orig); 2402 uvm_mapent_addr_insert(map, next); 2403 uvm_mapent_free_insert(map, free, next); 2404 2405 uvm_tree_sanity(map, __FILE__, __LINE__); 2406 } 2407 2408 2409 #ifdef VMMAP_DEBUG 2410 2411 void 2412 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2413 char *file, int line) 2414 { 2415 char* map_special; 2416 2417 if (test) 2418 return; 2419 2420 if (map == kernel_map) 2421 map_special = " (kernel_map)"; 2422 else if (map == kmem_map) 2423 map_special = " (kmem_map)"; 2424 else 2425 map_special = ""; 2426 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2427 line, test_str); 2428 } 2429 2430 /* 2431 * Check that map is sane. 2432 */ 2433 void 2434 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2435 { 2436 struct vm_map_entry *iter; 2437 vaddr_t addr; 2438 vaddr_t min, max, bound; /* Bounds checker. */ 2439 struct uvm_addr_state *free; 2440 2441 addr = vm_map_min(map); 2442 RB_FOREACH(iter, uvm_map_addr, &map->addr) { 2443 /* 2444 * Valid start, end. 2445 * Catch overflow for end+fspace. 2446 */ 2447 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2448 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2449 2450 /* May not be empty. */ 2451 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2452 file, line); 2453 2454 /* Addresses for entry must lie within map boundaries. */ 2455 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2456 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2457 2458 /* Tree may not have gaps. */ 2459 UVM_ASSERT(map, iter->start == addr, file, line); 2460 addr = VMMAP_FREE_END(iter); 2461 2462 /* 2463 * Free space may not cross boundaries, unless the same 2464 * free list is used on both sides of the border. 2465 */ 2466 min = VMMAP_FREE_START(iter); 2467 max = VMMAP_FREE_END(iter); 2468 2469 while (min < max && 2470 (bound = uvm_map_boundary(map, min, max)) != max) { 2471 UVM_ASSERT(map, 2472 uvm_map_uaddr(map, bound - 1) == 2473 uvm_map_uaddr(map, bound), 2474 file, line); 2475 min = bound; 2476 } 2477 2478 free = uvm_map_uaddr_e(map, iter); 2479 if (free) { 2480 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2481 file, line); 2482 } else { 2483 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2484 file, line); 2485 } 2486 } 2487 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2488 } 2489 2490 void 2491 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2492 { 2493 struct vm_map_entry *iter; 2494 vsize_t size; 2495 2496 size = 0; 2497 RB_FOREACH(iter, uvm_map_addr, &map->addr) { 2498 if (!UVM_ET_ISHOLE(iter)) 2499 size += iter->end - iter->start; 2500 } 2501 2502 if (map->size != size) 2503 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2504 UVM_ASSERT(map, map->size == size, file, line); 2505 2506 vmspace_validate(map); 2507 } 2508 2509 /* 2510 * This function validates the statistics on vmspace. 2511 */ 2512 void 2513 vmspace_validate(struct vm_map *map) 2514 { 2515 struct vmspace *vm; 2516 struct vm_map_entry *iter; 2517 vaddr_t imin, imax; 2518 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2519 vsize_t stack, heap; /* Measured sizes. */ 2520 2521 if (!(map->flags & VM_MAP_ISVMSPACE)) 2522 return; 2523 2524 vm = (struct vmspace *)map; 2525 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2526 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2527 2528 stack = heap = 0; 2529 RB_FOREACH(iter, uvm_map_addr, &map->addr) { 2530 imin = imax = iter->start; 2531 2532 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL) 2533 continue; 2534 2535 /* 2536 * Update stack, heap. 2537 * Keep in mind that (theoretically) the entries of 2538 * userspace and stack may be joined. 2539 */ 2540 while (imin != iter->end) { 2541 /* 2542 * Set imax to the first boundary crossed between 2543 * imin and stack addresses. 2544 */ 2545 imax = iter->end; 2546 if (imin < stack_begin && imax > stack_begin) 2547 imax = stack_begin; 2548 else if (imin < stack_end && imax > stack_end) 2549 imax = stack_end; 2550 2551 if (imin >= stack_begin && imin < stack_end) 2552 stack += imax - imin; 2553 else 2554 heap += imax - imin; 2555 imin = imax; 2556 } 2557 } 2558 2559 heap >>= PAGE_SHIFT; 2560 if (heap != vm->vm_dused) { 2561 printf("vmspace stack range: 0x%lx-0x%lx\n", 2562 stack_begin, stack_end); 2563 panic("vmspace_validate: vmspace.vm_dused invalid, " 2564 "expected %ld pgs, got %ld pgs in map %p", 2565 heap, vm->vm_dused, 2566 map); 2567 } 2568 } 2569 2570 #endif /* VMMAP_DEBUG */ 2571 2572 /* 2573 * uvm_map_init: init mapping system at boot time. note that we allocate 2574 * and init the static pool of structs vm_map_entry for the kernel here. 2575 */ 2576 void 2577 uvm_map_init(void) 2578 { 2579 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 2580 int lcv; 2581 2582 /* now set up static pool of kernel map entries ... */ 2583 uvm.kentry_free = NULL; 2584 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 2585 RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) = 2586 uvm.kentry_free; 2587 uvm.kentry_free = &kernel_map_entry[lcv]; 2588 } 2589 2590 /* initialize the map-related pools. */ 2591 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 2592 0, 0, 0, "vmsppl", &pool_allocator_nointr); 2593 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 2594 0, 0, 0, "vmmpepl", &pool_allocator_nointr); 2595 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 2596 0, 0, 0, "vmmpekpl", NULL); 2597 pool_sethiwat(&uvm_map_entry_pool, 8192); 2598 2599 uvm_addr_init(); 2600 } 2601 2602 #if defined(DDB) 2603 2604 /* 2605 * DDB hooks 2606 */ 2607 2608 /* 2609 * uvm_map_printit: actually prints the map 2610 */ 2611 void 2612 uvm_map_printit(struct vm_map *map, boolean_t full, 2613 int (*pr)(const char *, ...)) 2614 { 2615 struct vmspace *vm; 2616 struct vm_map_entry *entry; 2617 struct uvm_addr_state *free; 2618 int in_free, i; 2619 char buf[8]; 2620 2621 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 2622 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 2623 map->b_start, map->b_end); 2624 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 2625 map->s_start, map->s_end); 2626 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 2627 map->size, map->ref_count, map->timestamp, 2628 map->flags); 2629 #ifdef pmap_resident_count 2630 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 2631 pmap_resident_count(map->pmap)); 2632 #else 2633 /* XXXCDC: this should be required ... */ 2634 (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap); 2635 #endif 2636 2637 /* struct vmspace handling. */ 2638 if (map->flags & VM_MAP_ISVMSPACE) { 2639 vm = (struct vmspace *)map; 2640 2641 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 2642 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 2643 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 2644 vm->vm_tsize, vm->vm_dsize); 2645 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 2646 vm->vm_taddr, vm->vm_daddr); 2647 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 2648 vm->vm_maxsaddr, vm->vm_minsaddr); 2649 } 2650 2651 if (!full) 2652 goto print_uaddr; 2653 RB_FOREACH(entry, uvm_map_addr, &map->addr) { 2654 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 2655 entry, entry->start, entry->end, entry->object.uvm_obj, 2656 (long long)entry->offset, entry->aref.ar_amap, 2657 entry->aref.ar_pageoff); 2658 (*pr)("\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 2659 "wc=%d, adv=%d\n", 2660 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 2661 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 2662 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 2663 entry->protection, entry->max_protection, 2664 entry->inheritance, entry->wired_count, entry->advice); 2665 2666 free = uvm_map_uaddr_e(map, entry); 2667 in_free = (free != NULL); 2668 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 2669 "free=0x%lx-0x%lx\n", 2670 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 2671 in_free ? 'T' : 'F', 2672 entry->guard, 2673 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 2674 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 2675 (*pr)("\tfreemapped=%c, uaddr=%p\n", 2676 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 2677 if (free) { 2678 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 2679 free->uaddr_minaddr, free->uaddr_maxaddr, 2680 free->uaddr_functions->uaddr_name); 2681 } 2682 } 2683 2684 print_uaddr: 2685 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 2686 for (i = 0; i < nitems(map->uaddr_any); i++) { 2687 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 2688 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 2689 } 2690 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 2691 } 2692 2693 /* 2694 * uvm_object_printit: actually prints the object 2695 */ 2696 void 2697 uvm_object_printit(uobj, full, pr) 2698 struct uvm_object *uobj; 2699 boolean_t full; 2700 int (*pr)(const char *, ...); 2701 { 2702 struct vm_page *pg; 2703 int cnt = 0; 2704 2705 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 2706 uobj, uobj->pgops, uobj->uo_npages); 2707 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 2708 (*pr)("refs=<SYSTEM>\n"); 2709 else 2710 (*pr)("refs=%d\n", uobj->uo_refs); 2711 2712 if (!full) { 2713 return; 2714 } 2715 (*pr)(" PAGES <pg,offset>:\n "); 2716 RB_FOREACH(pg, uvm_objtree, &uobj->memt) { 2717 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 2718 if ((cnt % 3) == 2) { 2719 (*pr)("\n "); 2720 } 2721 cnt++; 2722 } 2723 if ((cnt % 3) != 2) { 2724 (*pr)("\n"); 2725 } 2726 } 2727 2728 /* 2729 * uvm_page_printit: actually print the page 2730 */ 2731 static const char page_flagbits[] = 2732 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 2733 "\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0" 2734 "\31PMAP1\32PMAP2\33PMAP3"; 2735 2736 void 2737 uvm_page_printit(pg, full, pr) 2738 struct vm_page *pg; 2739 boolean_t full; 2740 int (*pr)(const char *, ...); 2741 { 2742 struct vm_page *tpg; 2743 struct uvm_object *uobj; 2744 struct pglist *pgl; 2745 2746 (*pr)("PAGE %p:\n", pg); 2747 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 2748 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 2749 (long long)pg->phys_addr); 2750 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", 2751 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); 2752 #if defined(UVM_PAGE_TRKOWN) 2753 if (pg->pg_flags & PG_BUSY) 2754 (*pr)(" owning process = %d, tag=%s", 2755 pg->owner, pg->owner_tag); 2756 else 2757 (*pr)(" page not busy, no owner"); 2758 #else 2759 (*pr)(" [page ownership tracking disabled]"); 2760 #endif 2761 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 2762 2763 if (!full) 2764 return; 2765 2766 /* cross-verify object/anon */ 2767 if ((pg->pg_flags & PQ_FREE) == 0) { 2768 if (pg->pg_flags & PQ_ANON) { 2769 if (pg->uanon == NULL || pg->uanon->an_page != pg) 2770 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 2771 (pg->uanon) ? pg->uanon->an_page : NULL); 2772 else 2773 (*pr)(" anon backpointer is OK\n"); 2774 } else { 2775 uobj = pg->uobject; 2776 if (uobj) { 2777 (*pr)(" checking object list\n"); 2778 RB_FOREACH(tpg, uvm_objtree, &uobj->memt) { 2779 if (tpg == pg) { 2780 break; 2781 } 2782 } 2783 if (tpg) 2784 (*pr)(" page found on object list\n"); 2785 else 2786 (*pr)(" >>> PAGE NOT FOUND " 2787 "ON OBJECT LIST! <<<\n"); 2788 } 2789 } 2790 } 2791 2792 /* cross-verify page queue */ 2793 if (pg->pg_flags & PQ_FREE) { 2794 if (uvm_pmr_isfree(pg)) 2795 (*pr)(" page found in uvm_pmemrange\n"); 2796 else 2797 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 2798 pgl = NULL; 2799 } else if (pg->pg_flags & PQ_INACTIVE) { 2800 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 2801 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 2802 } else if (pg->pg_flags & PQ_ACTIVE) { 2803 pgl = &uvm.page_active; 2804 } else { 2805 pgl = NULL; 2806 } 2807 2808 if (pgl) { 2809 (*pr)(" checking pageq list\n"); 2810 TAILQ_FOREACH(tpg, pgl, pageq) { 2811 if (tpg == pg) { 2812 break; 2813 } 2814 } 2815 if (tpg) 2816 (*pr)(" page found on pageq list\n"); 2817 else 2818 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 2819 } 2820 } 2821 #endif 2822 2823 /* 2824 * uvm_map_protect: change map protection 2825 * 2826 * => set_max means set max_protection. 2827 * => map must be unlocked. 2828 */ 2829 int 2830 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 2831 vm_prot_t new_prot, boolean_t set_max) 2832 { 2833 struct vm_map_entry *first, *iter; 2834 vm_prot_t old_prot; 2835 vm_prot_t mask; 2836 int error; 2837 2838 if (start > end) 2839 return EINVAL; 2840 start = MAX(start, map->min_offset); 2841 end = MIN(end, map->max_offset); 2842 if (start >= end) 2843 return 0; 2844 2845 error = 0; 2846 vm_map_lock(map); 2847 2848 /* 2849 * Set up first and last. 2850 * - first will contain first entry at or after start. 2851 */ 2852 first = uvm_map_entrybyaddr(&map->addr, start); 2853 KDASSERT(first != NULL); 2854 if (first->end < start) 2855 first = RB_NEXT(uvm_map_addr, &map->addr, first); 2856 2857 /* First, check for protection violations. */ 2858 for (iter = first; iter != NULL && iter->start < end; 2859 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 2860 /* Treat memory holes as free space. */ 2861 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 2862 continue; 2863 2864 if (UVM_ET_ISSUBMAP(iter)) { 2865 error = EINVAL; 2866 goto out; 2867 } 2868 if ((new_prot & iter->max_protection) != new_prot) { 2869 error = EACCES; 2870 goto out; 2871 } 2872 } 2873 2874 /* Fix protections. */ 2875 for (iter = first; iter != NULL && iter->start < end; 2876 iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) { 2877 /* Treat memory holes as free space. */ 2878 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 2879 continue; 2880 2881 old_prot = iter->protection; 2882 2883 /* 2884 * Skip adapting protection iff old and new protection 2885 * are equal. 2886 */ 2887 if (set_max) { 2888 if (old_prot == (new_prot & old_prot) && 2889 iter->max_protection == new_prot) 2890 continue; 2891 } else { 2892 if (old_prot == new_prot) 2893 continue; 2894 } 2895 2896 UVM_MAP_CLIP_START(map, iter, start); 2897 UVM_MAP_CLIP_END(map, iter, end); 2898 2899 if (set_max) { 2900 iter->max_protection = new_prot; 2901 iter->protection &= new_prot; 2902 } else 2903 iter->protection = new_prot; 2904 2905 /* 2906 * update physical map if necessary. worry about copy-on-write 2907 * here -- CHECK THIS XXX 2908 */ 2909 if (iter->protection != old_prot) { 2910 mask = UVM_ET_ISCOPYONWRITE(iter) ? 2911 ~VM_PROT_WRITE : VM_PROT_ALL; 2912 2913 /* update pmap */ 2914 if ((iter->protection & mask) == PROT_NONE && 2915 VM_MAPENT_ISWIRED(iter)) { 2916 /* 2917 * TODO(ariane) this is stupid. wired_count 2918 * is 0 if not wired, otherwise anything 2919 * larger than 0 (incremented once each time 2920 * wire is called). 2921 * Mostly to be able to undo the damage on 2922 * failure. Not the actually be a wired 2923 * refcounter... 2924 * Originally: iter->wired_count--; 2925 * (don't we have to unwire this in the pmap 2926 * as well?) 2927 */ 2928 iter->wired_count = 0; 2929 } 2930 pmap_protect(map->pmap, iter->start, iter->end, 2931 iter->protection & mask); 2932 } 2933 2934 /* 2935 * If the map is configured to lock any future mappings, 2936 * wire this entry now if the old protection was VM_PROT_NONE 2937 * and the new protection is not VM_PROT_NONE. 2938 */ 2939 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 2940 VM_MAPENT_ISWIRED(iter) == 0 && 2941 old_prot == VM_PROT_NONE && 2942 new_prot != VM_PROT_NONE) { 2943 if (uvm_map_pageable(map, iter->start, iter->end, 2944 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 2945 /* 2946 * If locking the entry fails, remember the 2947 * error if it's the first one. Note we 2948 * still continue setting the protection in 2949 * the map, but it will return the resource 2950 * storage condition regardless. 2951 * 2952 * XXX Ignore what the actual error is, 2953 * XXX just call it a resource shortage 2954 * XXX so that it doesn't get confused 2955 * XXX what uvm_map_protect() itself would 2956 * XXX normally return. 2957 */ 2958 error = ENOMEM; 2959 } 2960 } 2961 } 2962 pmap_update(map->pmap); 2963 2964 out: 2965 vm_map_unlock(map); 2966 return error; 2967 } 2968 2969 /* 2970 * uvmspace_alloc: allocate a vmspace structure. 2971 * 2972 * - structure includes vm_map and pmap 2973 * - XXX: no locking on this structure 2974 * - refcnt set to 1, rest must be init'd by caller 2975 */ 2976 struct vmspace * 2977 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 2978 boolean_t remove_holes) 2979 { 2980 struct vmspace *vm; 2981 2982 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 2983 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 2984 return (vm); 2985 } 2986 2987 /* 2988 * uvmspace_init: initialize a vmspace structure. 2989 * 2990 * - XXX: no locking on this structure 2991 * - refcnt set to 1, rest must be init'd by caller 2992 */ 2993 void 2994 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 2995 boolean_t pageable, boolean_t remove_holes) 2996 { 2997 if (pmap) 2998 pmap_reference(pmap); 2999 else 3000 pmap = pmap_create(); 3001 vm->vm_map.pmap = pmap; 3002 3003 uvm_map_setup(&vm->vm_map, min, max, 3004 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3005 3006 vm->vm_refcnt = 1; 3007 3008 if (remove_holes) 3009 pmap_remove_holes(&vm->vm_map); 3010 } 3011 3012 /* 3013 * uvmspace_share: share a vmspace between two processes 3014 * 3015 * - XXX: no locking on vmspace 3016 * - used for vfork 3017 */ 3018 3019 struct vmspace * 3020 uvmspace_share(struct process *pr) 3021 { 3022 struct vmspace *vm = pr->ps_vmspace; 3023 3024 vm->vm_refcnt++; 3025 return vm; 3026 } 3027 3028 /* 3029 * uvmspace_exec: the process wants to exec a new program 3030 * 3031 * - XXX: no locking on vmspace 3032 */ 3033 3034 void 3035 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3036 { 3037 struct process *pr = p->p_p; 3038 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3039 struct vm_map *map = &ovm->vm_map; 3040 struct uvm_map_deadq dead_entries; 3041 3042 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3043 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3044 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3045 3046 pmap_unuse_final(p); /* before stack addresses go away */ 3047 TAILQ_INIT(&dead_entries); 3048 3049 /* see if more than one process is using this vmspace... */ 3050 if (ovm->vm_refcnt == 1) { 3051 /* 3052 * If pr is the only process using its vmspace then 3053 * we can safely recycle that vmspace for the program 3054 * that is being exec'd. 3055 */ 3056 3057 #ifdef SYSVSHM 3058 /* 3059 * SYSV SHM semantics require us to kill all segments on an exec 3060 */ 3061 if (ovm->vm_shm) 3062 shmexit(ovm); 3063 #endif 3064 3065 /* 3066 * POSIX 1003.1b -- "lock future mappings" is revoked 3067 * when a process execs another program image. 3068 */ 3069 vm_map_lock(map); 3070 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3071 3072 /* 3073 * now unmap the old program 3074 * 3075 * Instead of attempting to keep the map valid, we simply 3076 * nuke all entries and ask uvm_map_setup to reinitialize 3077 * the map to the new boundaries. 3078 * 3079 * uvm_unmap_remove will actually nuke all entries for us 3080 * (as in, not replace them with free-memory entries). 3081 */ 3082 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3083 &dead_entries, TRUE, FALSE); 3084 3085 KDASSERT(RB_EMPTY(&map->addr)); 3086 3087 /* Nuke statistics and boundaries. */ 3088 bzero(&ovm->vm_startcopy, 3089 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3090 3091 3092 if (end & (vaddr_t)PAGE_MASK) { 3093 end += 1; 3094 if (end == 0) /* overflow */ 3095 end -= PAGE_SIZE; 3096 } 3097 3098 /* Setup new boundaries and populate map with entries. */ 3099 map->min_offset = start; 3100 map->max_offset = end; 3101 uvm_map_setup_entries(map); 3102 vm_map_unlock(map); 3103 3104 /* but keep MMU holes unavailable */ 3105 pmap_remove_holes(map); 3106 } else { 3107 /* 3108 * pr's vmspace is being shared, so we can't reuse 3109 * it for pr since it is still being used for others. 3110 * allocate a new vmspace for pr 3111 */ 3112 nvm = uvmspace_alloc(start, end, 3113 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3114 3115 /* install new vmspace and drop our ref to the old one. */ 3116 pmap_deactivate(p); 3117 p->p_vmspace = pr->ps_vmspace = nvm; 3118 pmap_activate(p); 3119 3120 uvmspace_free(ovm); 3121 } 3122 3123 /* Release dead entries */ 3124 uvm_unmap_detach(&dead_entries, 0); 3125 } 3126 3127 /* 3128 * uvmspace_free: free a vmspace data structure 3129 * 3130 * - XXX: no locking on vmspace 3131 */ 3132 void 3133 uvmspace_free(struct vmspace *vm) 3134 { 3135 if (--vm->vm_refcnt == 0) { 3136 /* 3137 * lock the map, to wait out all other references to it. delete 3138 * all of the mappings and pages they hold, then call the pmap 3139 * module to reclaim anything left. 3140 */ 3141 #ifdef SYSVSHM 3142 /* Get rid of any SYSV shared memory segments. */ 3143 if (vm->vm_shm != NULL) 3144 shmexit(vm); 3145 #endif 3146 3147 uvm_map_teardown(&vm->vm_map); 3148 pool_put(&uvm_vmspace_pool, vm); 3149 } 3150 } 3151 3152 /* 3153 * Clone map entry into other map. 3154 * 3155 * Mapping will be placed at dstaddr, for the same length. 3156 * Space must be available. 3157 * Reference counters are incremented. 3158 */ 3159 struct vm_map_entry * 3160 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3161 vsize_t off, struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3162 int mapent_flags, int amap_share_flags) 3163 { 3164 struct vm_map_entry *new_entry, *first, *last; 3165 3166 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3167 3168 /* Create new entry (linked in on creation). Fill in first, last. */ 3169 first = last = NULL; 3170 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3171 panic("uvmspace_fork: no space in map for " 3172 "entry in empty map"); 3173 } 3174 new_entry = uvm_map_mkentry(dstmap, first, last, 3175 dstaddr, dstlen, mapent_flags, dead, NULL); 3176 if (new_entry == NULL) 3177 return NULL; 3178 /* old_entry -> new_entry */ 3179 new_entry->object = old_entry->object; 3180 new_entry->offset = old_entry->offset; 3181 new_entry->aref = old_entry->aref; 3182 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3183 new_entry->protection = old_entry->protection; 3184 new_entry->max_protection = old_entry->max_protection; 3185 new_entry->inheritance = old_entry->inheritance; 3186 new_entry->advice = old_entry->advice; 3187 3188 /* gain reference to object backing the map (can't be a submap). */ 3189 if (new_entry->aref.ar_amap) { 3190 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3191 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3192 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3193 amap_share_flags); 3194 } 3195 3196 if (UVM_ET_ISOBJ(new_entry) && 3197 new_entry->object.uvm_obj->pgops->pgo_reference) { 3198 new_entry->offset += off; 3199 new_entry->object.uvm_obj->pgops->pgo_reference 3200 (new_entry->object.uvm_obj); 3201 } 3202 3203 return new_entry; 3204 } 3205 3206 /* 3207 * share the mapping: this means we want the old and 3208 * new entries to share amaps and backing objects. 3209 */ 3210 struct vm_map_entry * 3211 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3212 struct vm_map *old_map, 3213 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3214 { 3215 struct vm_map_entry *new_entry; 3216 3217 /* 3218 * if the old_entry needs a new amap (due to prev fork) 3219 * then we need to allocate it now so that we have 3220 * something we own to share with the new_entry. [in 3221 * other words, we need to clear needs_copy] 3222 */ 3223 3224 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3225 /* get our own amap, clears needs_copy */ 3226 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 3227 0, 0); 3228 /* XXXCDC: WAITOK??? */ 3229 } 3230 3231 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3232 old_entry->end - old_entry->start, 0, old_entry, 3233 dead, 0, AMAP_SHARED); 3234 3235 /* 3236 * pmap_copy the mappings: this routine is optional 3237 * but if it is there it will reduce the number of 3238 * page faults in the new proc. 3239 */ 3240 if (!UVM_ET_ISHOLE(new_entry)) 3241 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, 3242 (new_entry->end - new_entry->start), new_entry->start); 3243 3244 return (new_entry); 3245 } 3246 3247 /* 3248 * copy-on-write the mapping (using mmap's 3249 * MAP_PRIVATE semantics) 3250 * 3251 * allocate new_entry, adjust reference counts. 3252 * (note that new references are read-only). 3253 */ 3254 struct vm_map_entry * 3255 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3256 struct vm_map *old_map, 3257 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3258 { 3259 struct vm_map_entry *new_entry; 3260 boolean_t protect_child; 3261 3262 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3263 old_entry->end - old_entry->start, 0, old_entry, 3264 dead, 0, 0); 3265 3266 new_entry->etype |= 3267 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3268 3269 /* 3270 * the new entry will need an amap. it will either 3271 * need to be copied from the old entry or created 3272 * from scratch (if the old entry does not have an 3273 * amap). can we defer this process until later 3274 * (by setting "needs_copy") or do we need to copy 3275 * the amap now? 3276 * 3277 * we must copy the amap now if any of the following 3278 * conditions hold: 3279 * 1. the old entry has an amap and that amap is 3280 * being shared. this means that the old (parent) 3281 * process is sharing the amap with another 3282 * process. if we do not clear needs_copy here 3283 * we will end up in a situation where both the 3284 * parent and child process are referring to the 3285 * same amap with "needs_copy" set. if the 3286 * parent write-faults, the fault routine will 3287 * clear "needs_copy" in the parent by allocating 3288 * a new amap. this is wrong because the 3289 * parent is supposed to be sharing the old amap 3290 * and the new amap will break that. 3291 * 3292 * 2. if the old entry has an amap and a non-zero 3293 * wire count then we are going to have to call 3294 * amap_cow_now to avoid page faults in the 3295 * parent process. since amap_cow_now requires 3296 * "needs_copy" to be clear we might as well 3297 * clear it here as well. 3298 * 3299 */ 3300 if (old_entry->aref.ar_amap != NULL && 3301 ((amap_flags(old_entry->aref.ar_amap) & 3302 AMAP_SHARED) != 0 || 3303 VM_MAPENT_ISWIRED(old_entry))) { 3304 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3305 0, 0); 3306 /* XXXCDC: M_WAITOK ... ok? */ 3307 } 3308 3309 /* 3310 * if the parent's entry is wired down, then the 3311 * parent process does not want page faults on 3312 * access to that memory. this means that we 3313 * cannot do copy-on-write because we can't write 3314 * protect the old entry. in this case we 3315 * resolve all copy-on-write faults now, using 3316 * amap_cow_now. note that we have already 3317 * allocated any needed amap (above). 3318 */ 3319 if (VM_MAPENT_ISWIRED(old_entry)) { 3320 /* 3321 * resolve all copy-on-write faults now 3322 * (note that there is nothing to do if 3323 * the old mapping does not have an amap). 3324 * XXX: is it worthwhile to bother with 3325 * pmap_copy in this case? 3326 */ 3327 if (old_entry->aref.ar_amap) 3328 amap_cow_now(new_map, new_entry); 3329 } else { 3330 if (old_entry->aref.ar_amap) { 3331 /* 3332 * setup mappings to trigger copy-on-write faults 3333 * we must write-protect the parent if it has 3334 * an amap and it is not already "needs_copy"... 3335 * if it is already "needs_copy" then the parent 3336 * has already been write-protected by a previous 3337 * fork operation. 3338 * 3339 * if we do not write-protect the parent, then 3340 * we must be sure to write-protect the child 3341 * after the pmap_copy() operation. 3342 * 3343 * XXX: pmap_copy should have some way of telling 3344 * us that it didn't do anything so we can avoid 3345 * calling pmap_protect needlessly. 3346 */ 3347 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3348 if (old_entry->max_protection & 3349 VM_PROT_WRITE) { 3350 pmap_protect(old_map->pmap, 3351 old_entry->start, 3352 old_entry->end, 3353 old_entry->protection & 3354 ~VM_PROT_WRITE); 3355 pmap_update(old_map->pmap); 3356 } 3357 old_entry->etype |= UVM_ET_NEEDSCOPY; 3358 } 3359 3360 /* parent must now be write-protected */ 3361 protect_child = FALSE; 3362 } else { 3363 /* 3364 * we only need to protect the child if the 3365 * parent has write access. 3366 */ 3367 if (old_entry->max_protection & VM_PROT_WRITE) 3368 protect_child = TRUE; 3369 else 3370 protect_child = FALSE; 3371 } 3372 /* 3373 * copy the mappings 3374 * XXX: need a way to tell if this does anything 3375 */ 3376 if (!UVM_ET_ISHOLE(new_entry)) 3377 pmap_copy(new_map->pmap, old_map->pmap, 3378 new_entry->start, 3379 (old_entry->end - old_entry->start), 3380 old_entry->start); 3381 3382 /* protect the child's mappings if necessary */ 3383 if (protect_child) { 3384 pmap_protect(new_map->pmap, new_entry->start, 3385 new_entry->end, 3386 new_entry->protection & 3387 ~VM_PROT_WRITE); 3388 } 3389 } 3390 3391 return (new_entry); 3392 } 3393 3394 /* 3395 * zero the mapping: the new entry will be zero initialized 3396 */ 3397 struct vm_map_entry * 3398 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 3399 struct vm_map *old_map, 3400 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3401 { 3402 struct vm_map_entry *new_entry; 3403 3404 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3405 old_entry->end - old_entry->start, 0, old_entry, 3406 dead, 0, 0); 3407 3408 new_entry->etype |= 3409 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3410 3411 if (new_entry->aref.ar_amap) { 3412 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3413 atop(new_entry->end - new_entry->start), 0); 3414 new_entry->aref.ar_amap = NULL; 3415 new_entry->aref.ar_pageoff = 0; 3416 } 3417 3418 if (UVM_ET_ISOBJ(new_entry)) { 3419 if (new_entry->object.uvm_obj->pgops->pgo_detach) 3420 new_entry->object.uvm_obj->pgops->pgo_detach( 3421 new_entry->object.uvm_obj); 3422 new_entry->object.uvm_obj = NULL; 3423 new_entry->etype &= ~UVM_ET_OBJ; 3424 } 3425 3426 return (new_entry); 3427 } 3428 3429 /* 3430 * uvmspace_fork: fork a process' main map 3431 * 3432 * => create a new vmspace for child process from parent. 3433 * => parent's map must not be locked. 3434 */ 3435 struct vmspace * 3436 uvmspace_fork(struct process *pr) 3437 { 3438 struct vmspace *vm1 = pr->ps_vmspace; 3439 struct vmspace *vm2; 3440 struct vm_map *old_map = &vm1->vm_map; 3441 struct vm_map *new_map; 3442 struct vm_map_entry *old_entry, *new_entry; 3443 struct uvm_map_deadq dead; 3444 3445 vm_map_lock(old_map); 3446 3447 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3448 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3449 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3450 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3451 vm2->vm_dused = 0; /* Statistic managed by us. */ 3452 new_map = &vm2->vm_map; 3453 vm_map_lock(new_map); 3454 3455 /* go entry-by-entry */ 3456 TAILQ_INIT(&dead); 3457 RB_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 3458 if (old_entry->start == old_entry->end) 3459 continue; 3460 3461 /* first, some sanity checks on the old entry */ 3462 if (UVM_ET_ISSUBMAP(old_entry)) { 3463 panic("fork: encountered a submap during fork " 3464 "(illegal)"); 3465 } 3466 3467 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 3468 UVM_ET_ISNEEDSCOPY(old_entry)) { 3469 panic("fork: non-copy_on_write map entry marked " 3470 "needs_copy (illegal)"); 3471 } 3472 3473 /* Apply inheritance. */ 3474 switch (old_entry->inheritance) { 3475 case MAP_INHERIT_SHARE: 3476 new_entry = uvm_mapent_forkshared(vm2, new_map, 3477 old_map, old_entry, &dead); 3478 break; 3479 case MAP_INHERIT_COPY: 3480 new_entry = uvm_mapent_forkcopy(vm2, new_map, 3481 old_map, old_entry, &dead); 3482 break; 3483 case MAP_INHERIT_ZERO: 3484 new_entry = uvm_mapent_forkzero(vm2, new_map, 3485 old_map, old_entry, &dead); 3486 break; 3487 default: 3488 continue; 3489 } 3490 3491 /* Update process statistics. */ 3492 if (!UVM_ET_ISHOLE(new_entry)) 3493 new_map->size += new_entry->end - new_entry->start; 3494 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { 3495 vm2->vm_dused += uvmspace_dused( 3496 new_map, new_entry->start, new_entry->end); 3497 } 3498 } 3499 3500 vm_map_unlock(old_map); 3501 vm_map_unlock(new_map); 3502 3503 /* 3504 * This can actually happen, if multiple entries described a 3505 * space in which an entry was inherited. 3506 */ 3507 uvm_unmap_detach(&dead, 0); 3508 3509 #ifdef SYSVSHM 3510 if (vm1->vm_shm) 3511 shmfork(vm1, vm2); 3512 #endif 3513 3514 #ifdef PMAP_FORK 3515 pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap); 3516 #endif 3517 3518 return vm2; 3519 } 3520 3521 /* 3522 * uvm_map_hint: return the beginning of the best area suitable for 3523 * creating a new mapping with "prot" protection. 3524 */ 3525 vaddr_t 3526 uvm_map_hint(struct vmspace *vm, vm_prot_t prot) 3527 { 3528 vaddr_t addr; 3529 vaddr_t spacing; 3530 3531 #ifdef __i386__ 3532 /* 3533 * If executable skip first two pages, otherwise start 3534 * after data + heap region. 3535 */ 3536 if ((prot & VM_PROT_EXECUTE) != 0 && 3537 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 3538 addr = (PAGE_SIZE*2) + 3539 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 3540 return (round_page(addr)); 3541 } 3542 #endif 3543 3544 #if defined (__LP64__) && !defined (__mips64__) 3545 spacing = (MIN((4UL * 1024 * 1024 * 1024), BRKSIZ) - 1); 3546 #else 3547 spacing = (MIN((256 * 1024 * 1024), BRKSIZ) - 1); 3548 #endif 3549 3550 addr = (vaddr_t)vm->vm_daddr; 3551 /* 3552 * Start malloc/mmap after the brk. 3553 * If the random spacing area has been used up, 3554 * the brk area becomes fair game for mmap as well. 3555 */ 3556 if (vm->vm_dused < spacing >> PAGE_SHIFT) 3557 addr += BRKSIZ; 3558 #if !defined(__vax__) 3559 addr += arc4random() & spacing; 3560 #endif 3561 return (round_page(addr)); 3562 } 3563 3564 /* 3565 * uvm_map_submap: punch down part of a map into a submap 3566 * 3567 * => only the kernel_map is allowed to be submapped 3568 * => the purpose of submapping is to break up the locking granularity 3569 * of a larger map 3570 * => the range specified must have been mapped previously with a uvm_map() 3571 * call [with uobj==NULL] to create a blank map entry in the main map. 3572 * [And it had better still be blank!] 3573 * => maps which contain submaps should never be copied or forked. 3574 * => to remove a submap, use uvm_unmap() on the main map 3575 * and then uvm_map_deallocate() the submap. 3576 * => main map must be unlocked. 3577 * => submap must have been init'd and have a zero reference count. 3578 * [need not be locked as we don't actually reference it] 3579 */ 3580 int 3581 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 3582 struct vm_map *submap) 3583 { 3584 struct vm_map_entry *entry; 3585 int result; 3586 3587 if (start > map->max_offset || end > map->max_offset || 3588 start < map->min_offset || end < map->min_offset) 3589 return EINVAL; 3590 3591 vm_map_lock(map); 3592 3593 if (uvm_map_lookup_entry(map, start, &entry)) { 3594 UVM_MAP_CLIP_START(map, entry, start); 3595 UVM_MAP_CLIP_END(map, entry, end); 3596 } else 3597 entry = NULL; 3598 3599 if (entry != NULL && 3600 entry->start == start && entry->end == end && 3601 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 3602 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 3603 entry->etype |= UVM_ET_SUBMAP; 3604 entry->object.sub_map = submap; 3605 entry->offset = 0; 3606 uvm_map_reference(submap); 3607 result = 0; 3608 } else 3609 result = EINVAL; 3610 3611 vm_map_unlock(map); 3612 return(result); 3613 } 3614 3615 /* 3616 * uvm_map_checkprot: check protection in map 3617 * 3618 * => must allow specific protection in a fully allocated region. 3619 * => map mut be read or write locked by caller. 3620 */ 3621 boolean_t 3622 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3623 vm_prot_t protection) 3624 { 3625 struct vm_map_entry *entry; 3626 3627 if (start < map->min_offset || end > map->max_offset || start > end) 3628 return FALSE; 3629 if (start == end) 3630 return TRUE; 3631 3632 /* 3633 * Iterate entries. 3634 */ 3635 for (entry = uvm_map_entrybyaddr(&map->addr, start); 3636 entry != NULL && entry->start < end; 3637 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 3638 /* Fail if a hole is found. */ 3639 if (UVM_ET_ISHOLE(entry) || 3640 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 3641 return FALSE; 3642 3643 /* Check protection. */ 3644 if ((entry->protection & protection) != protection) 3645 return FALSE; 3646 } 3647 return TRUE; 3648 } 3649 3650 /* 3651 * uvm_map_create: create map 3652 */ 3653 vm_map_t 3654 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 3655 { 3656 vm_map_t result; 3657 3658 result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK); 3659 result->pmap = pmap; 3660 uvm_map_setup(result, min, max, flags); 3661 return(result); 3662 } 3663 3664 /* 3665 * uvm_map_deallocate: drop reference to a map 3666 * 3667 * => caller must not lock map 3668 * => we will zap map if ref count goes to zero 3669 */ 3670 void 3671 uvm_map_deallocate(vm_map_t map) 3672 { 3673 int c; 3674 struct uvm_map_deadq dead; 3675 3676 c = --map->ref_count; 3677 if (c > 0) { 3678 return; 3679 } 3680 3681 /* 3682 * all references gone. unmap and free. 3683 * 3684 * No lock required: we are only one to access this map. 3685 */ 3686 TAILQ_INIT(&dead); 3687 uvm_tree_sanity(map, __FILE__, __LINE__); 3688 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 3689 TRUE, FALSE); 3690 pmap_destroy(map->pmap); 3691 KASSERT(RB_EMPTY(&map->addr)); 3692 free(map, M_VMMAP, 0); 3693 3694 uvm_unmap_detach(&dead, 0); 3695 } 3696 3697 /* 3698 * uvm_map_inherit: set inheritance code for range of addrs in map. 3699 * 3700 * => map must be unlocked 3701 * => note that the inherit code is used during a "fork". see fork 3702 * code for details. 3703 */ 3704 int 3705 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 3706 vm_inherit_t new_inheritance) 3707 { 3708 struct vm_map_entry *entry; 3709 3710 switch (new_inheritance) { 3711 case MAP_INHERIT_NONE: 3712 case MAP_INHERIT_COPY: 3713 case MAP_INHERIT_SHARE: 3714 case MAP_INHERIT_ZERO: 3715 break; 3716 default: 3717 return (EINVAL); 3718 } 3719 3720 if (start > end) 3721 return EINVAL; 3722 start = MAX(start, map->min_offset); 3723 end = MIN(end, map->max_offset); 3724 if (start >= end) 3725 return 0; 3726 3727 vm_map_lock(map); 3728 3729 entry = uvm_map_entrybyaddr(&map->addr, start); 3730 if (entry->end > start) 3731 UVM_MAP_CLIP_START(map, entry, start); 3732 else 3733 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 3734 3735 while (entry != NULL && entry->start < end) { 3736 UVM_MAP_CLIP_END(map, entry, end); 3737 entry->inheritance = new_inheritance; 3738 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 3739 } 3740 3741 vm_map_unlock(map); 3742 return (0); 3743 } 3744 3745 /* 3746 * uvm_map_advice: set advice code for range of addrs in map. 3747 * 3748 * => map must be unlocked 3749 */ 3750 int 3751 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 3752 { 3753 struct vm_map_entry *entry; 3754 3755 switch (new_advice) { 3756 case MADV_NORMAL: 3757 case MADV_RANDOM: 3758 case MADV_SEQUENTIAL: 3759 break; 3760 default: 3761 return (EINVAL); 3762 } 3763 3764 if (start > end) 3765 return EINVAL; 3766 start = MAX(start, map->min_offset); 3767 end = MIN(end, map->max_offset); 3768 if (start >= end) 3769 return 0; 3770 3771 vm_map_lock(map); 3772 3773 entry = uvm_map_entrybyaddr(&map->addr, start); 3774 if (entry != NULL && entry->end > start) 3775 UVM_MAP_CLIP_START(map, entry, start); 3776 else if (entry!= NULL) 3777 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 3778 3779 /* 3780 * XXXJRT: disallow holes? 3781 */ 3782 while (entry != NULL && entry->start < end) { 3783 UVM_MAP_CLIP_END(map, entry, end); 3784 entry->advice = new_advice; 3785 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 3786 } 3787 3788 vm_map_unlock(map); 3789 return (0); 3790 } 3791 3792 /* 3793 * uvm_map_extract: extract a mapping from a map and put it somewhere 3794 * in the kernel_map, setting protection to max_prot. 3795 * 3796 * => map should be unlocked (we will write lock it and kernel_map) 3797 * => returns 0 on success, error code otherwise 3798 * => start must be page aligned 3799 * => len must be page sized 3800 * => flags: 3801 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 3802 * Mappings are QREF's. 3803 */ 3804 int 3805 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 3806 vaddr_t *dstaddrp, int flags) 3807 { 3808 struct uvm_map_deadq dead; 3809 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 3810 vaddr_t dstaddr; 3811 vaddr_t end; 3812 vaddr_t cp_start; 3813 vsize_t cp_len, cp_off; 3814 int error; 3815 3816 TAILQ_INIT(&dead); 3817 end = start + len; 3818 3819 /* 3820 * Sanity check on the parameters. 3821 * Also, since the mapping may not contain gaps, error out if the 3822 * mapped area is not in source map. 3823 */ 3824 if ((start & (vaddr_t)PAGE_MASK) != 0 || 3825 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 3826 return EINVAL; 3827 if (start < srcmap->min_offset || end > srcmap->max_offset) 3828 return EINVAL; 3829 3830 /* Initialize dead entries. Handle len == 0 case. */ 3831 if (len == 0) 3832 return 0; 3833 3834 /* Acquire lock on srcmap. */ 3835 vm_map_lock(srcmap); 3836 3837 /* Lock srcmap, lookup first and last entry in <start,len>. */ 3838 first = uvm_map_entrybyaddr(&srcmap->addr, start); 3839 3840 /* Check that the range is contiguous. */ 3841 for (entry = first; entry != NULL && entry->end < end; 3842 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 3843 if (VMMAP_FREE_END(entry) != entry->end || 3844 UVM_ET_ISHOLE(entry)) { 3845 error = EINVAL; 3846 goto fail; 3847 } 3848 } 3849 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 3850 error = EINVAL; 3851 goto fail; 3852 } 3853 3854 /* 3855 * Handle need-copy flag. 3856 * This may invalidate last, hence the re-initialization during the 3857 * loop. 3858 * 3859 * Also, perform clipping of last if not UVM_EXTRACT_QREF. 3860 */ 3861 for (entry = first; entry != NULL && entry->start < end; 3862 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 3863 if (UVM_ET_ISNEEDSCOPY(entry)) 3864 amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); 3865 if (UVM_ET_ISNEEDSCOPY(entry)) { 3866 /* 3867 * amap_copy failure 3868 */ 3869 error = ENOMEM; 3870 goto fail; 3871 } 3872 } 3873 3874 /* Lock destination map (kernel_map). */ 3875 vm_map_lock(kernel_map); 3876 3877 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 3878 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 3879 VM_PROT_NONE, 0) != 0) { 3880 error = ENOMEM; 3881 goto fail2; 3882 } 3883 *dstaddrp = dstaddr; 3884 3885 /* 3886 * We now have srcmap and kernel_map locked. 3887 * dstaddr contains the destination offset in dstmap. 3888 */ 3889 /* step 1: start looping through map entries, performing extraction. */ 3890 for (entry = first; entry != NULL && entry->start < end; 3891 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 3892 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 3893 if (UVM_ET_ISHOLE(entry)) 3894 continue; 3895 3896 /* Calculate uvm_mapent_clone parameters. */ 3897 cp_start = entry->start; 3898 if (cp_start < start) { 3899 cp_off = start - cp_start; 3900 cp_start = start; 3901 } else 3902 cp_off = 0; 3903 cp_len = MIN(entry->end, end) - cp_start; 3904 3905 newentry = uvm_mapent_clone(kernel_map, 3906 cp_start - start + dstaddr, cp_len, cp_off, 3907 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 3908 if (newentry == NULL) { 3909 error = ENOMEM; 3910 goto fail2_unmap; 3911 } 3912 kernel_map->size += cp_len; 3913 if (flags & UVM_EXTRACT_FIXPROT) 3914 newentry->protection = newentry->max_protection; 3915 3916 /* 3917 * Step 2: perform pmap copy. 3918 * (Doing this in the loop saves one RB traversal.) 3919 */ 3920 pmap_copy(kernel_map->pmap, srcmap->pmap, 3921 cp_start - start + dstaddr, cp_len, cp_start); 3922 } 3923 pmap_update(kernel_map->pmap); 3924 3925 error = 0; 3926 3927 /* Unmap copied entries on failure. */ 3928 fail2_unmap: 3929 if (error) { 3930 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 3931 FALSE, TRUE); 3932 } 3933 3934 /* Release maps, release dead entries. */ 3935 fail2: 3936 vm_map_unlock(kernel_map); 3937 3938 fail: 3939 vm_map_unlock(srcmap); 3940 3941 uvm_unmap_detach(&dead, 0); 3942 3943 return error; 3944 } 3945 3946 /* 3947 * uvm_map_clean: clean out a map range 3948 * 3949 * => valid flags: 3950 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 3951 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 3952 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 3953 * if (flags & PGO_FREE): any cached pages are freed after clean 3954 * => returns an error if any part of the specified range isn't mapped 3955 * => never a need to flush amap layer since the anonymous memory has 3956 * no permanent home, but may deactivate pages there 3957 * => called from sys_msync() and sys_madvise() 3958 * => caller must not write-lock map (read OK). 3959 * => we may sleep while cleaning if SYNCIO [with map read-locked] 3960 */ 3961 int amap_clean_works = 1; /* XXX for now, just in case... */ 3962 3963 int 3964 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 3965 { 3966 struct vm_map_entry *first, *entry; 3967 struct vm_amap *amap; 3968 struct vm_anon *anon; 3969 struct vm_page *pg; 3970 struct uvm_object *uobj; 3971 vaddr_t cp_start, cp_end; 3972 int refs; 3973 int error; 3974 boolean_t rv; 3975 3976 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 3977 (PGO_FREE|PGO_DEACTIVATE)); 3978 3979 if (start > end || start < map->min_offset || end > map->max_offset) 3980 return EINVAL; 3981 3982 vm_map_lock_read(map); 3983 first = uvm_map_entrybyaddr(&map->addr, start); 3984 3985 /* Make a first pass to check for holes. */ 3986 for (entry = first; entry->start < end; 3987 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 3988 if (UVM_ET_ISSUBMAP(entry)) { 3989 vm_map_unlock_read(map); 3990 return EINVAL; 3991 } 3992 if (UVM_ET_ISSUBMAP(entry) || 3993 UVM_ET_ISHOLE(entry) || 3994 (entry->end < end && 3995 VMMAP_FREE_END(entry) != entry->end)) { 3996 vm_map_unlock_read(map); 3997 return EFAULT; 3998 } 3999 } 4000 4001 error = 0; 4002 for (entry = first; entry != NULL && entry->start < end; 4003 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 4004 amap = entry->aref.ar_amap; /* top layer */ 4005 if (UVM_ET_ISOBJ(entry)) 4006 uobj = entry->object.uvm_obj; 4007 else 4008 uobj = NULL; 4009 4010 /* 4011 * No amap cleaning necessary if: 4012 * - there's no amap 4013 * - we're not deactivating or freeing pages. 4014 */ 4015 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4016 goto flush_object; 4017 if (!amap_clean_works) 4018 goto flush_object; 4019 4020 cp_start = MAX(entry->start, start); 4021 cp_end = MIN(entry->end, end); 4022 4023 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4024 anon = amap_lookup(&entry->aref, 4025 cp_start - entry->start); 4026 if (anon == NULL) 4027 continue; 4028 4029 pg = anon->an_page; 4030 if (pg == NULL) { 4031 continue; 4032 } 4033 4034 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4035 /* 4036 * XXX In these first 3 cases, we always just 4037 * XXX deactivate the page. We may want to 4038 * XXX handle the different cases more 4039 * XXX specifically, in the future. 4040 */ 4041 case PGO_CLEANIT|PGO_FREE: 4042 case PGO_CLEANIT|PGO_DEACTIVATE: 4043 case PGO_DEACTIVATE: 4044 deactivate_it: 4045 /* skip the page if it's loaned or wired */ 4046 if (pg->loan_count != 0 || 4047 pg->wire_count != 0) { 4048 break; 4049 } 4050 4051 uvm_lock_pageq(); 4052 4053 /* 4054 * skip the page if it's not actually owned 4055 * by the anon (may simply be loaned to the 4056 * anon). 4057 */ 4058 if ((pg->pg_flags & PQ_ANON) == 0) { 4059 KASSERT(pg->uobject == NULL); 4060 uvm_unlock_pageq(); 4061 break; 4062 } 4063 KASSERT(pg->uanon == anon); 4064 4065 /* zap all mappings for the page. */ 4066 pmap_page_protect(pg, VM_PROT_NONE); 4067 4068 /* ...and deactivate the page. */ 4069 uvm_pagedeactivate(pg); 4070 4071 uvm_unlock_pageq(); 4072 break; 4073 case PGO_FREE: 4074 /* 4075 * If there are mutliple references to 4076 * the amap, just deactivate the page. 4077 */ 4078 if (amap_refs(amap) > 1) 4079 goto deactivate_it; 4080 4081 /* XXX skip the page if it's wired */ 4082 if (pg->wire_count != 0) { 4083 break; 4084 } 4085 amap_unadd(&entry->aref, 4086 cp_start - entry->start); 4087 refs = --anon->an_ref; 4088 if (refs == 0) 4089 uvm_anfree(anon); 4090 break; 4091 default: 4092 panic("uvm_map_clean: weird flags"); 4093 } 4094 } 4095 4096 flush_object: 4097 cp_start = MAX(entry->start, start); 4098 cp_end = MIN(entry->end, end); 4099 4100 /* 4101 * flush pages if we've got a valid backing object. 4102 * 4103 * Don't PGO_FREE if we don't have write permission 4104 * and don't flush if this is a copy-on-write object 4105 * since we can't know our permissions on it. 4106 */ 4107 if (uobj != NULL && 4108 ((flags & PGO_FREE) == 0 || 4109 ((entry->max_protection & VM_PROT_WRITE) != 0 && 4110 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4111 rv = uobj->pgops->pgo_flush(uobj, 4112 cp_start - entry->start + entry->offset, 4113 cp_end - entry->start + entry->offset, flags); 4114 4115 if (rv == FALSE) 4116 error = EFAULT; 4117 } 4118 } 4119 4120 vm_map_unlock_read(map); 4121 return error; 4122 } 4123 4124 /* 4125 * UVM_MAP_CLIP_END implementation 4126 */ 4127 void 4128 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4129 { 4130 struct vm_map_entry *tmp; 4131 4132 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4133 tmp = uvm_mapent_alloc(map, 0); 4134 4135 /* Invoke splitentry. */ 4136 uvm_map_splitentry(map, entry, tmp, addr); 4137 } 4138 4139 /* 4140 * UVM_MAP_CLIP_START implementation 4141 * 4142 * Clippers are required to not change the pointers to the entry they are 4143 * clipping on. 4144 * Since uvm_map_splitentry turns the original entry into the lowest 4145 * entry (address wise) we do a swap between the new entry and the original 4146 * entry, prior to calling uvm_map_splitentry. 4147 */ 4148 void 4149 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4150 { 4151 struct vm_map_entry *tmp; 4152 struct uvm_addr_state *free; 4153 4154 /* Unlink original. */ 4155 free = uvm_map_uaddr_e(map, entry); 4156 uvm_mapent_free_remove(map, free, entry); 4157 uvm_mapent_addr_remove(map, entry); 4158 4159 /* Copy entry. */ 4160 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4161 tmp = uvm_mapent_alloc(map, 0); 4162 uvm_mapent_copy(entry, tmp); 4163 4164 /* Put new entry in place of original entry. */ 4165 uvm_mapent_addr_insert(map, tmp); 4166 uvm_mapent_free_insert(map, free, tmp); 4167 4168 /* Invoke splitentry. */ 4169 uvm_map_splitentry(map, tmp, entry, addr); 4170 } 4171 4172 /* 4173 * Boundary fixer. 4174 */ 4175 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4176 static __inline vaddr_t 4177 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4178 { 4179 return (min < bound && max > bound) ? bound : max; 4180 } 4181 4182 /* 4183 * Choose free list based on address at start of free space. 4184 * 4185 * The uvm_addr_state returned contains addr and is the first of: 4186 * - uaddr_exe 4187 * - uaddr_brk_stack 4188 * - uaddr_any 4189 */ 4190 struct uvm_addr_state* 4191 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4192 { 4193 struct uvm_addr_state *uaddr; 4194 int i; 4195 4196 /* Special case the first page, to prevent mmap from returning 0. */ 4197 if (addr < VMMAP_MIN_ADDR) 4198 return NULL; 4199 4200 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4201 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4202 if (addr >= uvm_maxkaddr) 4203 return NULL; 4204 } 4205 4206 /* Is the address inside the exe-only map? */ 4207 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4208 addr < map->uaddr_exe->uaddr_maxaddr) 4209 return map->uaddr_exe; 4210 4211 /* Check if the space falls inside brk/stack area. */ 4212 if ((addr >= map->b_start && addr < map->b_end) || 4213 (addr >= map->s_start && addr < map->s_end)) { 4214 if (map->uaddr_brk_stack != NULL && 4215 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4216 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4217 return map->uaddr_brk_stack; 4218 } else 4219 return NULL; 4220 } 4221 4222 /* 4223 * Check the other selectors. 4224 * 4225 * These selectors are only marked as the owner, if they have insert 4226 * functions. 4227 */ 4228 for (i = 0; i < nitems(map->uaddr_any); i++) { 4229 uaddr = map->uaddr_any[i]; 4230 if (uaddr == NULL) 4231 continue; 4232 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4233 continue; 4234 4235 if (addr >= uaddr->uaddr_minaddr && 4236 addr < uaddr->uaddr_maxaddr) 4237 return uaddr; 4238 } 4239 4240 return NULL; 4241 } 4242 4243 /* 4244 * Choose free list based on address at start of free space. 4245 * 4246 * The uvm_addr_state returned contains addr and is the first of: 4247 * - uaddr_exe 4248 * - uaddr_brk_stack 4249 * - uaddr_any 4250 */ 4251 struct uvm_addr_state* 4252 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4253 { 4254 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4255 } 4256 4257 /* 4258 * Returns the first free-memory boundary that is crossed by [min-max]. 4259 */ 4260 vsize_t 4261 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4262 { 4263 struct uvm_addr_state *uaddr; 4264 int i; 4265 4266 /* Never return first page. */ 4267 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4268 4269 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4270 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4271 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4272 4273 /* Check for exe-only boundaries. */ 4274 if (map->uaddr_exe != NULL) { 4275 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4276 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4277 } 4278 4279 /* Check for exe-only boundaries. */ 4280 if (map->uaddr_brk_stack != NULL) { 4281 max = uvm_map_boundfix(min, max, 4282 map->uaddr_brk_stack->uaddr_minaddr); 4283 max = uvm_map_boundfix(min, max, 4284 map->uaddr_brk_stack->uaddr_maxaddr); 4285 } 4286 4287 /* Check other boundaries. */ 4288 for (i = 0; i < nitems(map->uaddr_any); i++) { 4289 uaddr = map->uaddr_any[i]; 4290 if (uaddr != NULL) { 4291 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4292 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4293 } 4294 } 4295 4296 /* Boundaries at stack and brk() area. */ 4297 max = uvm_map_boundfix(min, max, map->s_start); 4298 max = uvm_map_boundfix(min, max, map->s_end); 4299 max = uvm_map_boundfix(min, max, map->b_start); 4300 max = uvm_map_boundfix(min, max, map->b_end); 4301 4302 return max; 4303 } 4304 4305 /* 4306 * Update map allocation start and end addresses from proc vmspace. 4307 */ 4308 void 4309 uvm_map_vmspace_update(struct vm_map *map, 4310 struct uvm_map_deadq *dead, int flags) 4311 { 4312 struct vmspace *vm; 4313 vaddr_t b_start, b_end, s_start, s_end; 4314 4315 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4316 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4317 4318 /* 4319 * Derive actual allocation boundaries from vmspace. 4320 */ 4321 vm = (struct vmspace *)map; 4322 b_start = (vaddr_t)vm->vm_daddr; 4323 b_end = b_start + BRKSIZ; 4324 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4325 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4326 #ifdef DIAGNOSTIC 4327 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4328 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4329 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4330 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4331 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4332 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4333 vm, b_start, b_end, s_start, s_end); 4334 } 4335 #endif 4336 4337 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4338 map->s_start == s_start && map->s_end == s_end)) 4339 return; 4340 4341 uvm_map_freelist_update(map, dead, b_start, b_end, 4342 s_start, s_end, flags); 4343 } 4344 4345 /* 4346 * Grow kernel memory. 4347 * 4348 * This function is only called for kernel maps when an allocation fails. 4349 * 4350 * If the map has a gap that is large enough to accomodate alloc_sz, this 4351 * function will make sure map->free will include it. 4352 */ 4353 void 4354 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 4355 vsize_t alloc_sz, int flags) 4356 { 4357 vsize_t sz; 4358 vaddr_t end; 4359 struct vm_map_entry *entry; 4360 4361 /* Kernel memory only. */ 4362 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 4363 /* Destroy free list. */ 4364 uvm_map_freelist_update_clear(map, dead); 4365 4366 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 4367 if (map->flags & VM_MAP_GUARDPAGES) 4368 alloc_sz += PAGE_SIZE; 4369 4370 /* 4371 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 4372 * 4373 * Don't handle the case where the multiplication overflows: 4374 * if that happens, the allocation is probably too big anyway. 4375 */ 4376 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 4377 4378 /* 4379 * Walk forward until a gap large enough for alloc_sz shows up. 4380 * 4381 * We assume the kernel map has no boundaries. 4382 * uvm_maxkaddr may be zero. 4383 */ 4384 end = MAX(uvm_maxkaddr, map->min_offset); 4385 entry = uvm_map_entrybyaddr(&map->addr, end); 4386 while (entry && entry->fspace < alloc_sz) 4387 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 4388 if (entry) { 4389 end = MAX(VMMAP_FREE_START(entry), end); 4390 end += MIN(sz, map->max_offset - end); 4391 } else 4392 end = map->max_offset; 4393 4394 /* Reserve pmap entries. */ 4395 #ifdef PMAP_GROWKERNEL 4396 uvm_maxkaddr = pmap_growkernel(end); 4397 #else 4398 uvm_maxkaddr = end; 4399 #endif 4400 4401 /* Rebuild free list. */ 4402 uvm_map_freelist_update_refill(map, flags); 4403 } 4404 4405 /* 4406 * Freelist update subfunction: unlink all entries from freelists. 4407 */ 4408 void 4409 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 4410 { 4411 struct uvm_addr_state *free; 4412 struct vm_map_entry *entry, *prev, *next; 4413 4414 prev = NULL; 4415 for (entry = RB_MIN(uvm_map_addr, &map->addr); entry != NULL; 4416 entry = next) { 4417 next = RB_NEXT(uvm_map_addr, &map->addr, entry); 4418 4419 free = uvm_map_uaddr_e(map, entry); 4420 uvm_mapent_free_remove(map, free, entry); 4421 4422 if (prev != NULL && entry->start == entry->end) { 4423 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 4424 uvm_mapent_addr_remove(map, entry); 4425 DEAD_ENTRY_PUSH(dead, entry); 4426 } else 4427 prev = entry; 4428 } 4429 } 4430 4431 /* 4432 * Freelist update subfunction: refill the freelists with entries. 4433 */ 4434 void 4435 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 4436 { 4437 struct vm_map_entry *entry; 4438 vaddr_t min, max; 4439 4440 RB_FOREACH(entry, uvm_map_addr, &map->addr) { 4441 min = VMMAP_FREE_START(entry); 4442 max = VMMAP_FREE_END(entry); 4443 entry->fspace = 0; 4444 4445 entry = uvm_map_fix_space(map, entry, min, max, flags); 4446 } 4447 4448 uvm_tree_sanity(map, __FILE__, __LINE__); 4449 } 4450 4451 /* 4452 * Change {a,b}_{start,end} allocation ranges and associated free lists. 4453 */ 4454 void 4455 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 4456 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 4457 { 4458 KDASSERT(b_end >= b_start && s_end >= s_start); 4459 4460 /* Clear all free lists. */ 4461 uvm_map_freelist_update_clear(map, dead); 4462 4463 /* Apply new bounds. */ 4464 map->b_start = b_start; 4465 map->b_end = b_end; 4466 map->s_start = s_start; 4467 map->s_end = s_end; 4468 4469 /* Refill free lists. */ 4470 uvm_map_freelist_update_refill(map, flags); 4471 } 4472 4473 /* 4474 * Assign a uvm_addr_state to the specified pointer in vm_map. 4475 * 4476 * May sleep. 4477 */ 4478 void 4479 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 4480 struct uvm_addr_state *newval) 4481 { 4482 struct uvm_map_deadq dead; 4483 4484 /* Pointer which must be in this map. */ 4485 KASSERT(which != NULL); 4486 KASSERT((void*)map <= (void*)(which) && 4487 (void*)(which) < (void*)(map + 1)); 4488 4489 vm_map_lock(map); 4490 TAILQ_INIT(&dead); 4491 uvm_map_freelist_update_clear(map, &dead); 4492 4493 uvm_addr_destroy(*which); 4494 *which = newval; 4495 4496 uvm_map_freelist_update_refill(map, 0); 4497 vm_map_unlock(map); 4498 uvm_unmap_detach(&dead, 0); 4499 } 4500 4501 /* 4502 * Correct space insert. 4503 * 4504 * Entry must not be on any freelist. 4505 */ 4506 struct vm_map_entry* 4507 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 4508 vaddr_t min, vaddr_t max, int flags) 4509 { 4510 struct uvm_addr_state *free, *entfree; 4511 vaddr_t lmax; 4512 4513 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 4514 KDASSERT(min <= max); 4515 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 4516 min == map->min_offset); 4517 4518 /* 4519 * During the function, entfree will always point at the uaddr state 4520 * for entry. 4521 */ 4522 entfree = (entry == NULL ? NULL : 4523 uvm_map_uaddr_e(map, entry)); 4524 4525 while (min != max) { 4526 /* Claim guard page for entry. */ 4527 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 4528 VMMAP_FREE_END(entry) == entry->end && 4529 entry->start != entry->end) { 4530 if (max - min == 2 * PAGE_SIZE) { 4531 /* 4532 * If the free-space gap is exactly 2 pages, 4533 * we make the guard 2 pages instead of 1. 4534 * Because in a guarded map, an area needs 4535 * at least 2 pages to allocate from: 4536 * one page for the allocation and one for 4537 * the guard. 4538 */ 4539 entry->guard = 2 * PAGE_SIZE; 4540 min = max; 4541 } else { 4542 entry->guard = PAGE_SIZE; 4543 min += PAGE_SIZE; 4544 } 4545 continue; 4546 } 4547 4548 /* 4549 * Handle the case where entry has a 2-page guard, but the 4550 * space after entry is freed. 4551 */ 4552 if (entry != NULL && entry->fspace == 0 && 4553 entry->guard > PAGE_SIZE) { 4554 entry->guard = PAGE_SIZE; 4555 min = VMMAP_FREE_START(entry); 4556 } 4557 4558 lmax = uvm_map_boundary(map, min, max); 4559 free = uvm_map_uaddr(map, min); 4560 4561 /* 4562 * Entries are merged if they point at the same uvm_free(). 4563 * Exception to that rule: if min == uvm_maxkaddr, a new 4564 * entry is started regardless (otherwise the allocators 4565 * will get confused). 4566 */ 4567 if (entry != NULL && free == entfree && 4568 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 4569 min == uvm_maxkaddr)) { 4570 KDASSERT(VMMAP_FREE_END(entry) == min); 4571 entry->fspace += lmax - min; 4572 } else { 4573 /* 4574 * Commit entry to free list: it'll not be added to 4575 * anymore. 4576 * We'll start a new entry and add to that entry 4577 * instead. 4578 */ 4579 if (entry != NULL) 4580 uvm_mapent_free_insert(map, entfree, entry); 4581 4582 /* New entry for new uaddr. */ 4583 entry = uvm_mapent_alloc(map, flags); 4584 KDASSERT(entry != NULL); 4585 entry->end = entry->start = min; 4586 entry->guard = 0; 4587 entry->fspace = lmax - min; 4588 entry->object.uvm_obj = NULL; 4589 entry->offset = 0; 4590 entry->etype = 0; 4591 entry->protection = entry->max_protection = 0; 4592 entry->inheritance = 0; 4593 entry->wired_count = 0; 4594 entry->advice = 0; 4595 entry->aref.ar_pageoff = 0; 4596 entry->aref.ar_amap = NULL; 4597 uvm_mapent_addr_insert(map, entry); 4598 4599 entfree = free; 4600 } 4601 4602 min = lmax; 4603 } 4604 /* Finally put entry on the uaddr state. */ 4605 if (entry != NULL) 4606 uvm_mapent_free_insert(map, entfree, entry); 4607 4608 return entry; 4609 } 4610 4611 /* 4612 * MQuery style of allocation. 4613 * 4614 * This allocator searches forward until sufficient space is found to map 4615 * the given size. 4616 * 4617 * XXX: factor in offset (via pmap_prefer) and protection? 4618 */ 4619 int 4620 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 4621 int flags) 4622 { 4623 struct vm_map_entry *entry, *last; 4624 vaddr_t addr; 4625 vaddr_t tmp, pmap_align, pmap_offset; 4626 int error; 4627 4628 addr = *addr_p; 4629 vm_map_lock_read(map); 4630 4631 /* Configure pmap prefer. */ 4632 if (offset != UVM_UNKNOWN_OFFSET) { 4633 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 4634 pmap_offset = PMAP_PREFER_OFFSET(offset); 4635 } else { 4636 pmap_align = PAGE_SIZE; 4637 pmap_offset = 0; 4638 } 4639 4640 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 4641 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 4642 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 4643 if (tmp < addr) 4644 tmp += pmap_align; 4645 addr = tmp; 4646 } 4647 4648 /* First, check if the requested range is fully available. */ 4649 entry = uvm_map_entrybyaddr(&map->addr, addr); 4650 last = NULL; 4651 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 4652 error = 0; 4653 goto out; 4654 } 4655 if (flags & UVM_FLAG_FIXED) { 4656 error = EINVAL; 4657 goto out; 4658 } 4659 4660 error = ENOMEM; /* Default error from here. */ 4661 4662 /* 4663 * At this point, the memory at <addr, sz> is not available. 4664 * The reasons are: 4665 * [1] it's outside the map, 4666 * [2] it starts in used memory (and therefore needs to move 4667 * toward the first free page in entry), 4668 * [3] it starts in free memory but bumps into used memory. 4669 * 4670 * Note that for case [2], the forward moving is handled by the 4671 * for loop below. 4672 */ 4673 if (entry == NULL) { 4674 /* [1] Outside the map. */ 4675 if (addr >= map->max_offset) 4676 goto out; 4677 else 4678 entry = RB_MIN(uvm_map_addr, &map->addr); 4679 } else if (VMMAP_FREE_START(entry) <= addr) { 4680 /* [3] Bumped into used memory. */ 4681 entry = RB_NEXT(uvm_map_addr, &map->addr, entry); 4682 } 4683 4684 /* Test if the next entry is sufficient for the allocation. */ 4685 for (; entry != NULL; 4686 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 4687 if (entry->fspace == 0) 4688 continue; 4689 addr = VMMAP_FREE_START(entry); 4690 4691 restart: /* Restart address checks on address change. */ 4692 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 4693 if (tmp < addr) 4694 tmp += pmap_align; 4695 addr = tmp; 4696 if (addr >= VMMAP_FREE_END(entry)) 4697 continue; 4698 4699 /* Skip brk() allocation addresses. */ 4700 if (addr + sz > map->b_start && addr < map->b_end) { 4701 if (VMMAP_FREE_END(entry) > map->b_end) { 4702 addr = map->b_end; 4703 goto restart; 4704 } else 4705 continue; 4706 } 4707 /* Skip stack allocation addresses. */ 4708 if (addr + sz > map->s_start && addr < map->s_end) { 4709 if (VMMAP_FREE_END(entry) > map->s_end) { 4710 addr = map->s_end; 4711 goto restart; 4712 } else 4713 continue; 4714 } 4715 4716 last = NULL; 4717 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 4718 error = 0; 4719 goto out; 4720 } 4721 } 4722 4723 out: 4724 vm_map_unlock_read(map); 4725 if (error == 0) 4726 *addr_p = addr; 4727 return error; 4728 } 4729 4730 /* 4731 * Determine allocation bias. 4732 * 4733 * Returns 1 if we should bias to high addresses, -1 for a bias towards low 4734 * addresses, or 0 for no bias. 4735 * The bias mechanism is intended to avoid clashing with brk() and stack 4736 * areas. 4737 */ 4738 int 4739 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry) 4740 { 4741 vaddr_t start, end; 4742 4743 start = VMMAP_FREE_START(entry); 4744 end = VMMAP_FREE_END(entry); 4745 4746 /* Stay at the top of brk() area. */ 4747 if (end >= map->b_start && start < map->b_end) 4748 return 1; 4749 /* Stay at the far end of the stack area. */ 4750 if (end >= map->s_start && start < map->s_end) { 4751 #ifdef MACHINE_STACK_GROWS_UP 4752 return 1; 4753 #else 4754 return -1; 4755 #endif 4756 } 4757 4758 /* No bias, this area is meant for us. */ 4759 return 0; 4760 } 4761 4762 4763 boolean_t 4764 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 4765 { 4766 boolean_t rv; 4767 4768 if (map->flags & VM_MAP_INTRSAFE) { 4769 rv = TRUE; 4770 } else { 4771 if (map->flags & VM_MAP_BUSY) { 4772 return (FALSE); 4773 } 4774 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); 4775 } 4776 4777 if (rv) { 4778 map->timestamp++; 4779 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 4780 uvm_tree_sanity(map, file, line); 4781 uvm_tree_size_chk(map, file, line); 4782 } 4783 4784 return (rv); 4785 } 4786 4787 void 4788 vm_map_lock_ln(struct vm_map *map, char *file, int line) 4789 { 4790 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 4791 do { 4792 while (map->flags & VM_MAP_BUSY) { 4793 map->flags |= VM_MAP_WANTLOCK; 4794 tsleep(&map->flags, PVM, (char *)vmmapbsy, 0); 4795 } 4796 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); 4797 } 4798 4799 map->timestamp++; 4800 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 4801 uvm_tree_sanity(map, file, line); 4802 uvm_tree_size_chk(map, file, line); 4803 } 4804 4805 void 4806 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 4807 { 4808 if ((map->flags & VM_MAP_INTRSAFE) == 0) 4809 rw_enter_read(&map->lock); 4810 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 4811 uvm_tree_sanity(map, file, line); 4812 uvm_tree_size_chk(map, file, line); 4813 } 4814 4815 void 4816 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 4817 { 4818 uvm_tree_sanity(map, file, line); 4819 uvm_tree_size_chk(map, file, line); 4820 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 4821 if ((map->flags & VM_MAP_INTRSAFE) == 0) 4822 rw_exit(&map->lock); 4823 } 4824 4825 void 4826 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 4827 { 4828 /* XXX: RO */ uvm_tree_sanity(map, file, line); 4829 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 4830 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 4831 if ((map->flags & VM_MAP_INTRSAFE) == 0) 4832 rw_exit_read(&map->lock); 4833 } 4834 4835 void 4836 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 4837 { 4838 uvm_tree_sanity(map, file, line); 4839 uvm_tree_size_chk(map, file, line); 4840 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 4841 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 4842 if ((map->flags & VM_MAP_INTRSAFE) == 0) 4843 rw_enter(&map->lock, RW_DOWNGRADE); 4844 } 4845 4846 void 4847 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 4848 { 4849 /* XXX: RO */ uvm_tree_sanity(map, file, line); 4850 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 4851 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 4852 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 4853 rw_exit_read(&map->lock); 4854 rw_enter_write(&map->lock); 4855 } 4856 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 4857 uvm_tree_sanity(map, file, line); 4858 } 4859 4860 void 4861 vm_map_busy_ln(struct vm_map *map, char *file, int line) 4862 { 4863 map->flags |= VM_MAP_BUSY; 4864 } 4865 4866 void 4867 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 4868 { 4869 int oflags; 4870 4871 oflags = map->flags; 4872 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 4873 if (oflags & VM_MAP_WANTLOCK) 4874 wakeup(&map->flags); 4875 } 4876 4877 4878 #undef RB_AUGMENT 4879 #define RB_AUGMENT(x) uvm_map_addr_augment((x)) 4880 RB_GENERATE(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 4881 uvm_mapentry_addrcmp); 4882 #undef RB_AUGMENT 4883 4884 4885 /* 4886 * MD code: vmspace allocator setup. 4887 */ 4888 4889 #ifdef __i386__ 4890 void 4891 uvm_map_setup_md(struct vm_map *map) 4892 { 4893 vaddr_t min, max; 4894 4895 min = map->min_offset; 4896 max = map->max_offset; 4897 4898 /* 4899 * Ensure the selectors will not try to manage page 0; 4900 * it's too special. 4901 */ 4902 if (min < VMMAP_MIN_ADDR) 4903 min = VMMAP_MIN_ADDR; 4904 4905 #if 0 /* Cool stuff, not yet */ 4906 /* Hinted allocations. */ 4907 map->uaddr_any[1] = uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), max, 4908 1024 * 1024 * 1024); 4909 4910 /* Executable code is special. */ 4911 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 4912 /* Place normal allocations beyond executable mappings. */ 4913 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 4914 #else /* Crappy stuff, for now */ 4915 map->uaddr_any[0] = uaddr_rnd_create(min, max); 4916 #endif 4917 4918 #ifndef SMALL_KERNEL 4919 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 4920 #endif /* !SMALL_KERNEL */ 4921 } 4922 #elif __LP64__ 4923 void 4924 uvm_map_setup_md(struct vm_map *map) 4925 { 4926 vaddr_t min, max; 4927 4928 min = map->min_offset; 4929 max = map->max_offset; 4930 4931 /* 4932 * Ensure the selectors will not try to manage page 0; 4933 * it's too special. 4934 */ 4935 if (min < VMMAP_MIN_ADDR) 4936 min = VMMAP_MIN_ADDR; 4937 4938 #if 0 /* Cool stuff, not yet */ 4939 /* Hinted allocations above 4GB */ 4940 map->uaddr_any[0] = 4941 uaddr_hint_create(0x100000000ULL, max, 1024 * 1024 * 1024); 4942 /* Hinted allocations below 4GB */ 4943 map->uaddr_any[1] = 4944 uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), 0x100000000ULL, 4945 1024 * 1024 * 1024); 4946 /* Normal allocations, always above 4GB */ 4947 map->uaddr_any[3] = 4948 uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 4949 #else /* Crappy stuff, for now */ 4950 map->uaddr_any[0] = uaddr_rnd_create(min, max); 4951 #endif 4952 4953 #ifndef SMALL_KERNEL 4954 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 4955 #endif /* !SMALL_KERNEL */ 4956 } 4957 #else /* non-i386, 32 bit */ 4958 void 4959 uvm_map_setup_md(struct vm_map *map) 4960 { 4961 vaddr_t min, max; 4962 4963 min = map->min_offset; 4964 max = map->max_offset; 4965 4966 /* 4967 * Ensure the selectors will not try to manage page 0; 4968 * it's too special. 4969 */ 4970 if (min < VMMAP_MIN_ADDR) 4971 min = VMMAP_MIN_ADDR; 4972 4973 #if 0 /* Cool stuff, not yet */ 4974 /* Hinted allocations. */ 4975 map->uaddr_any[1] = uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), max, 4976 1024 * 1024 * 1024); 4977 /* Normal allocations. */ 4978 map->uaddr_any[3] = uaddr_pivot_create(min, max); 4979 #else /* Crappy stuff, for now */ 4980 map->uaddr_any[0] = uaddr_rnd_create(min, max); 4981 #endif 4982 4983 #ifndef SMALL_KERNEL 4984 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 4985 #endif /* !SMALL_KERNEL */ 4986 } 4987 #endif 4988