1 /* $OpenBSD: uvm_map.c,v 1.233 2017/11/30 00:36:10 guenther Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/mman.h> 90 #include <sys/proc.h> 91 #include <sys/malloc.h> 92 #include <sys/pool.h> 93 #include <sys/sysctl.h> 94 95 #ifdef SYSVSHM 96 #include <sys/shm.h> 97 #endif 98 99 #include <uvm/uvm.h> 100 101 #ifdef DDB 102 #include <uvm/uvm_ddb.h> 103 #endif 104 105 #include <uvm/uvm_addr.h> 106 107 108 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 109 int uvm_mapent_isjoinable(struct vm_map*, 110 struct vm_map_entry*, struct vm_map_entry*); 111 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 112 struct vm_map_entry*, struct uvm_map_deadq*); 113 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 114 struct vm_map_entry*, struct uvm_map_deadq*); 115 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 116 struct vm_map_entry*, vaddr_t, vsize_t, int, 117 struct uvm_map_deadq*, struct vm_map_entry*); 118 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 119 void uvm_mapent_free(struct vm_map_entry*); 120 void uvm_unmap_kill_entry(struct vm_map*, 121 struct vm_map_entry*); 122 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); 123 void uvm_mapent_mkfree(struct vm_map*, 124 struct vm_map_entry*, struct vm_map_entry**, 125 struct uvm_map_deadq*, boolean_t); 126 void uvm_map_pageable_pgon(struct vm_map*, 127 struct vm_map_entry*, struct vm_map_entry*, 128 vaddr_t, vaddr_t); 129 int uvm_map_pageable_wire(struct vm_map*, 130 struct vm_map_entry*, struct vm_map_entry*, 131 vaddr_t, vaddr_t, int); 132 void uvm_map_setup_entries(struct vm_map*); 133 void uvm_map_setup_md(struct vm_map*); 134 void uvm_map_teardown(struct vm_map*); 135 void uvm_map_vmspace_update(struct vm_map*, 136 struct uvm_map_deadq*, int); 137 void uvm_map_kmem_grow(struct vm_map*, 138 struct uvm_map_deadq*, vsize_t, int); 139 void uvm_map_freelist_update_clear(struct vm_map*, 140 struct uvm_map_deadq*); 141 void uvm_map_freelist_update_refill(struct vm_map *, int); 142 void uvm_map_freelist_update(struct vm_map*, 143 struct uvm_map_deadq*, vaddr_t, vaddr_t, 144 vaddr_t, vaddr_t, int); 145 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 146 vaddr_t, vaddr_t, int); 147 int uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int, 148 struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t, 149 int); 150 int uvm_map_findspace(struct vm_map*, 151 struct vm_map_entry**, struct vm_map_entry**, 152 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 153 vaddr_t); 154 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 155 void uvm_map_addr_augment(struct vm_map_entry*); 156 157 /* 158 * Tree management functions. 159 */ 160 161 static __inline void uvm_mapent_copy(struct vm_map_entry*, 162 struct vm_map_entry*); 163 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*, 164 const struct vm_map_entry*); 165 void uvm_mapent_free_insert(struct vm_map*, 166 struct uvm_addr_state*, struct vm_map_entry*); 167 void uvm_mapent_free_remove(struct vm_map*, 168 struct uvm_addr_state*, struct vm_map_entry*); 169 void uvm_mapent_addr_insert(struct vm_map*, 170 struct vm_map_entry*); 171 void uvm_mapent_addr_remove(struct vm_map*, 172 struct vm_map_entry*); 173 void uvm_map_splitentry(struct vm_map*, 174 struct vm_map_entry*, struct vm_map_entry*, 175 vaddr_t); 176 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 177 int uvm_mapent_bias(struct vm_map*, struct vm_map_entry*); 178 179 /* 180 * uvm_vmspace_fork helper functions. 181 */ 182 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 183 vsize_t, vm_prot_t, vm_prot_t, 184 struct vm_map_entry*, struct uvm_map_deadq*, int, 185 int); 186 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t, 187 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*, 188 struct vm_map_entry*, struct uvm_map_deadq*); 189 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 190 struct vm_map*, struct vm_map_entry*, 191 struct uvm_map_deadq*); 192 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 193 struct vm_map*, struct vm_map_entry*, 194 struct uvm_map_deadq*); 195 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 196 struct vm_map*, struct vm_map_entry*, 197 struct uvm_map_deadq*); 198 199 /* 200 * Tree validation. 201 */ 202 #ifdef VMMAP_DEBUG 203 void uvm_tree_assert(struct vm_map*, int, char*, 204 char*, int); 205 #define UVM_ASSERT(map, cond, file, line) \ 206 uvm_tree_assert((map), (cond), #cond, (file), (line)) 207 void uvm_tree_sanity(struct vm_map*, char*, int); 208 void uvm_tree_size_chk(struct vm_map*, char*, int); 209 void vmspace_validate(struct vm_map*); 210 #else 211 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 212 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 213 #define vmspace_validate(_map) do {} while (0) 214 #endif 215 216 /* 217 * All architectures will have pmap_prefer. 218 */ 219 #ifndef PMAP_PREFER 220 #define PMAP_PREFER_ALIGN() (vaddr_t)PAGE_SIZE 221 #define PMAP_PREFER_OFFSET(off) 0 222 #define PMAP_PREFER(addr, off) (addr) 223 #endif 224 225 226 /* 227 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 228 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 229 * 230 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 231 * each time. 232 */ 233 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 234 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 235 #define VM_MAP_KSIZE_ALLOCMUL 4 236 /* 237 * When selecting a random free-space block, look at most FSPACE_DELTA blocks 238 * ahead. 239 */ 240 #define FSPACE_DELTA 8 241 /* 242 * Put allocations adjecent to previous allocations when the free-space tree 243 * is larger than FSPACE_COMPACT entries. 244 * 245 * Alignment and PMAP_PREFER may still cause the entry to not be fully 246 * adjecent. Note that this strategy reduces memory fragmentation (by leaving 247 * a large space before or after the allocation). 248 */ 249 #define FSPACE_COMPACT 128 250 /* 251 * Make the address selection skip at most this many bytes from the start of 252 * the free space in which the allocation takes place. 253 * 254 * The main idea behind a randomized address space is that an attacker cannot 255 * know where to target his attack. Therefore, the location of objects must be 256 * as random as possible. However, the goal is not to create the most sparse 257 * map that is possible. 258 * FSPACE_MAXOFF pushes the considered range in bytes down to less insane 259 * sizes, thereby reducing the sparseness. The biggest randomization comes 260 * from fragmentation, i.e. FSPACE_COMPACT. 261 */ 262 #define FSPACE_MAXOFF ((vaddr_t)32 * 1024 * 1024) 263 /* 264 * Allow for small gaps in the overflow areas. 265 * Gap size is in bytes and does not have to be a multiple of page-size. 266 */ 267 #define FSPACE_BIASGAP ((vaddr_t)32 * 1024) 268 269 /* auto-allocate address lower bound */ 270 #define VMMAP_MIN_ADDR PAGE_SIZE 271 272 273 #ifdef DEADBEEF0 274 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0) 275 #else 276 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0) 277 #endif 278 279 #ifdef DEBUG 280 int uvm_map_printlocks = 0; 281 282 #define LPRINTF(_args) \ 283 do { \ 284 if (uvm_map_printlocks) \ 285 printf _args; \ 286 } while (0) 287 #else 288 #define LPRINTF(_args) do {} while (0) 289 #endif 290 291 static struct mutex uvm_kmapent_mtx; 292 static struct timeval uvm_kmapent_last_warn_time; 293 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 294 295 const char vmmapbsy[] = "vmmapbsy"; 296 297 /* 298 * pool for vmspace structures. 299 */ 300 struct pool uvm_vmspace_pool; 301 302 /* 303 * pool for dynamically-allocated map entries. 304 */ 305 struct pool uvm_map_entry_pool; 306 struct pool uvm_map_entry_kmem_pool; 307 308 /* 309 * This global represents the end of the kernel virtual address 310 * space. If we want to exceed this, we must grow the kernel 311 * virtual address space dynamically. 312 * 313 * Note, this variable is locked by kernel_map's lock. 314 */ 315 vaddr_t uvm_maxkaddr; 316 317 /* 318 * Locking predicate. 319 */ 320 #define UVM_MAP_REQ_WRITE(_map) \ 321 do { \ 322 if ((_map)->ref_count > 0) { \ 323 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 324 rw_assert_wrlock(&(_map)->lock); \ 325 else \ 326 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \ 327 } \ 328 } while (0) 329 330 /* 331 * Tree describing entries by address. 332 * 333 * Addresses are unique. 334 * Entries with start == end may only exist if they are the first entry 335 * (sorted by address) within a free-memory tree. 336 */ 337 338 static inline int 339 uvm_mapentry_addrcmp(const struct vm_map_entry *e1, 340 const struct vm_map_entry *e2) 341 { 342 return e1->start < e2->start ? -1 : e1->start > e2->start; 343 } 344 345 /* 346 * Copy mapentry. 347 */ 348 static __inline void 349 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 350 { 351 caddr_t csrc, cdst; 352 size_t sz; 353 354 csrc = (caddr_t)src; 355 cdst = (caddr_t)dst; 356 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 357 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 358 359 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 360 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 361 memcpy(cdst, csrc, sz); 362 } 363 364 /* 365 * Handle free-list insertion. 366 */ 367 void 368 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 369 struct vm_map_entry *entry) 370 { 371 const struct uvm_addr_functions *fun; 372 #ifdef VMMAP_DEBUG 373 vaddr_t min, max, bound; 374 #endif 375 376 #ifdef VMMAP_DEBUG 377 /* 378 * Boundary check. 379 * Boundaries are folded if they go on the same free list. 380 */ 381 min = VMMAP_FREE_START(entry); 382 max = VMMAP_FREE_END(entry); 383 384 while (min < max) { 385 bound = uvm_map_boundary(map, min, max); 386 KASSERT(uvm_map_uaddr(map, min) == uaddr); 387 min = bound; 388 } 389 #endif 390 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 391 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 392 393 UVM_MAP_REQ_WRITE(map); 394 395 /* Actual insert: forward to uaddr pointer. */ 396 if (uaddr != NULL) { 397 fun = uaddr->uaddr_functions; 398 KDASSERT(fun != NULL); 399 if (fun->uaddr_free_insert != NULL) 400 (*fun->uaddr_free_insert)(map, uaddr, entry); 401 entry->etype |= UVM_ET_FREEMAPPED; 402 } 403 404 /* Update fspace augmentation. */ 405 uvm_map_addr_augment(entry); 406 } 407 408 /* 409 * Handle free-list removal. 410 */ 411 void 412 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 413 struct vm_map_entry *entry) 414 { 415 const struct uvm_addr_functions *fun; 416 417 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 418 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 419 UVM_MAP_REQ_WRITE(map); 420 421 if (uaddr != NULL) { 422 fun = uaddr->uaddr_functions; 423 if (fun->uaddr_free_remove != NULL) 424 (*fun->uaddr_free_remove)(map, uaddr, entry); 425 entry->etype &= ~UVM_ET_FREEMAPPED; 426 } 427 } 428 429 /* 430 * Handle address tree insertion. 431 */ 432 void 433 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 434 { 435 struct vm_map_entry *res; 436 437 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF)) 438 panic("uvm_mapent_addr_insert: entry still in addr list"); 439 KDASSERT(entry->start <= entry->end); 440 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 441 (entry->end & (vaddr_t)PAGE_MASK) == 0); 442 443 UVM_MAP_REQ_WRITE(map); 444 res = RBT_INSERT(uvm_map_addr, &map->addr, entry); 445 if (res != NULL) { 446 panic("uvm_mapent_addr_insert: map %p entry %p " 447 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 448 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 449 map, entry, 450 entry->start, entry->end, entry->guard, entry->fspace, 451 res, res->start, res->end, res->guard, res->fspace); 452 } 453 } 454 455 /* 456 * Handle address tree removal. 457 */ 458 void 459 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 460 { 461 struct vm_map_entry *res; 462 463 UVM_MAP_REQ_WRITE(map); 464 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry); 465 if (res != entry) 466 panic("uvm_mapent_addr_remove"); 467 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF); 468 } 469 470 /* 471 * uvm_map_reference: add reference to a map 472 * 473 * XXX check map reference counter lock 474 */ 475 #define uvm_map_reference(_map) \ 476 do { \ 477 map->ref_count++; \ 478 } while (0) 479 480 /* 481 * Calculate the dused delta. 482 */ 483 vsize_t 484 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 485 { 486 struct vmspace *vm; 487 vsize_t sz; 488 vaddr_t lmax; 489 vaddr_t stack_begin, stack_end; /* Position of stack. */ 490 491 KASSERT(map->flags & VM_MAP_ISVMSPACE); 492 vm = (struct vmspace *)map; 493 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 494 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 495 496 sz = 0; 497 while (min != max) { 498 lmax = max; 499 if (min < stack_begin && lmax > stack_begin) 500 lmax = stack_begin; 501 else if (min < stack_end && lmax > stack_end) 502 lmax = stack_end; 503 504 if (min >= stack_begin && min < stack_end) { 505 /* nothing */ 506 } else 507 sz += lmax - min; 508 min = lmax; 509 } 510 511 return sz >> PAGE_SHIFT; 512 } 513 514 /* 515 * Find the entry describing the given address. 516 */ 517 struct vm_map_entry* 518 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 519 { 520 struct vm_map_entry *iter; 521 522 iter = RBT_ROOT(uvm_map_addr, atree); 523 while (iter != NULL) { 524 if (iter->start > addr) 525 iter = RBT_LEFT(uvm_map_addr, iter); 526 else if (VMMAP_FREE_END(iter) <= addr) 527 iter = RBT_RIGHT(uvm_map_addr, iter); 528 else 529 return iter; 530 } 531 return NULL; 532 } 533 534 /* 535 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 536 * 537 * Push dead entries into a linked list. 538 * Since the linked list abuses the address tree for storage, the entry 539 * may not be linked in a map. 540 * 541 * *head must be initialized to NULL before the first call to this macro. 542 * uvm_unmap_detach(*head, 0) will remove dead entries. 543 */ 544 static __inline void 545 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 546 { 547 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 548 } 549 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 550 dead_entry_push((_headptr), (_entry)) 551 552 /* 553 * Helper function for uvm_map_findspace_tree. 554 * 555 * Given allocation constraints and pmap constraints, finds the 556 * lowest and highest address in a range that can be used for the 557 * allocation. 558 * 559 * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs. 560 * 561 * 562 * Big chunk of math with a seasoning of dragons. 563 */ 564 int 565 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg, 566 struct vm_map_entry *sel, vaddr_t align, 567 vaddr_t pmap_align, vaddr_t pmap_off, int bias) 568 { 569 vaddr_t sel_min, sel_max; 570 #ifdef PMAP_PREFER 571 vaddr_t pmap_min, pmap_max; 572 #endif /* PMAP_PREFER */ 573 #ifdef DIAGNOSTIC 574 int bad; 575 #endif /* DIAGNOSTIC */ 576 577 sel_min = VMMAP_FREE_START(sel); 578 sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0); 579 580 #ifdef PMAP_PREFER 581 582 /* 583 * There are two special cases, in which we can satisfy the align 584 * requirement and the pmap_prefer requirement. 585 * - when pmap_off == 0, we always select the largest of the two 586 * - when pmap_off % align == 0 and pmap_align > align, we simply 587 * satisfy the pmap_align requirement and automatically 588 * satisfy the align requirement. 589 */ 590 if (align > PAGE_SIZE && 591 !(pmap_align > align && (pmap_off & (align - 1)) == 0)) { 592 /* 593 * Simple case: only use align. 594 */ 595 sel_min = roundup(sel_min, align); 596 sel_max &= ~(align - 1); 597 598 if (sel_min > sel_max) 599 return ENOMEM; 600 601 /* Correct for bias. */ 602 if (sel_max - sel_min > FSPACE_BIASGAP) { 603 if (bias > 0) { 604 sel_min = sel_max - FSPACE_BIASGAP; 605 sel_min = roundup(sel_min, align); 606 } else if (bias < 0) { 607 sel_max = sel_min + FSPACE_BIASGAP; 608 sel_max &= ~(align - 1); 609 } 610 } 611 } else if (pmap_align != 0) { 612 /* 613 * Special case: satisfy both pmap_prefer and 614 * align argument. 615 */ 616 pmap_max = sel_max & ~(pmap_align - 1); 617 pmap_min = sel_min; 618 if (pmap_max < sel_min) 619 return ENOMEM; 620 621 /* Adjust pmap_min for BIASGAP for top-addr bias. */ 622 if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP) 623 pmap_min = pmap_max - FSPACE_BIASGAP; 624 /* Align pmap_min. */ 625 pmap_min &= ~(pmap_align - 1); 626 if (pmap_min < sel_min) 627 pmap_min += pmap_align; 628 if (pmap_min > pmap_max) 629 return ENOMEM; 630 631 /* Adjust pmap_max for BIASGAP for bottom-addr bias. */ 632 if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) { 633 pmap_max = (pmap_min + FSPACE_BIASGAP) & 634 ~(pmap_align - 1); 635 } 636 if (pmap_min > pmap_max) 637 return ENOMEM; 638 639 /* Apply pmap prefer offset. */ 640 pmap_max |= pmap_off; 641 if (pmap_max > sel_max) 642 pmap_max -= pmap_align; 643 pmap_min |= pmap_off; 644 if (pmap_min < sel_min) 645 pmap_min += pmap_align; 646 647 /* 648 * Fixup: it's possible that pmap_min and pmap_max 649 * cross eachother. In this case, try to find one 650 * address that is allowed. 651 * (This usually happens in biased case.) 652 */ 653 if (pmap_min > pmap_max) { 654 if (pmap_min < sel_max) 655 pmap_max = pmap_min; 656 else if (pmap_max > sel_min) 657 pmap_min = pmap_max; 658 else 659 return ENOMEM; 660 } 661 662 /* Internal validation. */ 663 KDASSERT(pmap_min <= pmap_max); 664 665 sel_min = pmap_min; 666 sel_max = pmap_max; 667 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 668 sel_min = sel_max - FSPACE_BIASGAP; 669 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 670 sel_max = sel_min + FSPACE_BIASGAP; 671 672 #else 673 674 if (align > PAGE_SIZE) { 675 sel_min = roundup(sel_min, align); 676 sel_max &= ~(align - 1); 677 if (sel_min > sel_max) 678 return ENOMEM; 679 680 if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) { 681 if (bias > 0) { 682 sel_min = roundup(sel_max - FSPACE_BIASGAP, 683 align); 684 } else { 685 sel_max = (sel_min + FSPACE_BIASGAP) & 686 ~(align - 1); 687 } 688 } 689 } else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP) 690 sel_min = sel_max - FSPACE_BIASGAP; 691 else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP) 692 sel_max = sel_min + FSPACE_BIASGAP; 693 694 #endif 695 696 if (sel_min > sel_max) 697 return ENOMEM; 698 699 #ifdef DIAGNOSTIC 700 bad = 0; 701 /* Lower boundary check. */ 702 if (sel_min < VMMAP_FREE_START(sel)) { 703 printf("sel_min: 0x%lx, but should be at least 0x%lx\n", 704 sel_min, VMMAP_FREE_START(sel)); 705 bad++; 706 } 707 /* Upper boundary check. */ 708 if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) { 709 printf("sel_max: 0x%lx, but should be at most 0x%lx\n", 710 sel_max, 711 VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)); 712 bad++; 713 } 714 /* Lower boundary alignment. */ 715 if (align != 0 && (sel_min & (align - 1)) != 0) { 716 printf("sel_min: 0x%lx, not aligned to 0x%lx\n", 717 sel_min, align); 718 bad++; 719 } 720 /* Upper boundary alignment. */ 721 if (align != 0 && (sel_max & (align - 1)) != 0) { 722 printf("sel_max: 0x%lx, not aligned to 0x%lx\n", 723 sel_max, align); 724 bad++; 725 } 726 /* Lower boundary PMAP_PREFER check. */ 727 if (pmap_align != 0 && align == 0 && 728 (sel_min & (pmap_align - 1)) != pmap_off) { 729 printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 730 sel_min, sel_min & (pmap_align - 1), pmap_off); 731 bad++; 732 } 733 /* Upper boundary PMAP_PREFER check. */ 734 if (pmap_align != 0 && align == 0 && 735 (sel_max & (pmap_align - 1)) != pmap_off) { 736 printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n", 737 sel_max, sel_max & (pmap_align - 1), pmap_off); 738 bad++; 739 } 740 741 if (bad) { 742 panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, " 743 "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, " 744 "bias = %d, " 745 "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)", 746 sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off, 747 bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel)); 748 } 749 #endif /* DIAGNOSTIC */ 750 751 *min = sel_min; 752 *max = sel_max; 753 return 0; 754 } 755 756 /* 757 * Test if memory starting at addr with sz bytes is free. 758 * 759 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 760 * the space. 761 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 762 */ 763 int 764 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 765 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 766 vaddr_t addr, vsize_t sz) 767 { 768 struct uvm_addr_state *free; 769 struct uvm_map_addr *atree; 770 struct vm_map_entry *i, *i_end; 771 772 if (addr + sz < addr) 773 return 0; 774 775 /* 776 * Kernel memory above uvm_maxkaddr is considered unavailable. 777 */ 778 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 779 if (addr + sz > uvm_maxkaddr) 780 return 0; 781 } 782 783 atree = &map->addr; 784 785 /* 786 * Fill in first, last, so they point at the entries containing the 787 * first and last address of the range. 788 * Note that if they are not NULL, we don't perform the lookup. 789 */ 790 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 791 if (*start_ptr == NULL) { 792 *start_ptr = uvm_map_entrybyaddr(atree, addr); 793 if (*start_ptr == NULL) 794 return 0; 795 } else 796 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 797 if (*end_ptr == NULL) { 798 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 799 *end_ptr = *start_ptr; 800 else { 801 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 802 if (*end_ptr == NULL) 803 return 0; 804 } 805 } else 806 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 807 808 /* Validation. */ 809 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 810 KDASSERT((*start_ptr)->start <= addr && 811 VMMAP_FREE_END(*start_ptr) > addr && 812 (*end_ptr)->start < addr + sz && 813 VMMAP_FREE_END(*end_ptr) >= addr + sz); 814 815 /* 816 * Check the none of the entries intersects with <addr, addr+sz>. 817 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 818 * considered unavailable unless called by those allocators. 819 */ 820 i = *start_ptr; 821 i_end = RBT_NEXT(uvm_map_addr, *end_ptr); 822 for (; i != i_end; 823 i = RBT_NEXT(uvm_map_addr, i)) { 824 if (i->start != i->end && i->end > addr) 825 return 0; 826 827 /* 828 * uaddr_exe and uaddr_brk_stack may only be used 829 * by these allocators and the NULL uaddr (i.e. no 830 * uaddr). 831 * Reject if this requirement is not met. 832 */ 833 if (uaddr != NULL) { 834 free = uvm_map_uaddr_e(map, i); 835 836 if (uaddr != free && free != NULL && 837 (free == map->uaddr_exe || 838 free == map->uaddr_brk_stack)) 839 return 0; 840 } 841 } 842 843 return -1; 844 } 845 846 /* 847 * Invoke each address selector until an address is found. 848 * Will not invoke uaddr_exe. 849 */ 850 int 851 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 852 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 853 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 854 { 855 struct uvm_addr_state *uaddr; 856 int i; 857 858 /* 859 * Allocation for sz bytes at any address, 860 * using the addr selectors in order. 861 */ 862 for (i = 0; i < nitems(map->uaddr_any); i++) { 863 uaddr = map->uaddr_any[i]; 864 865 if (uvm_addr_invoke(map, uaddr, first, last, 866 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 867 return 0; 868 } 869 870 /* Fall back to brk() and stack() address selectors. */ 871 uaddr = map->uaddr_brk_stack; 872 if (uvm_addr_invoke(map, uaddr, first, last, 873 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 874 return 0; 875 876 return ENOMEM; 877 } 878 879 /* Calculate entry augmentation value. */ 880 vsize_t 881 uvm_map_addr_augment_get(struct vm_map_entry *entry) 882 { 883 vsize_t augment; 884 struct vm_map_entry *left, *right; 885 886 augment = entry->fspace; 887 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL) 888 augment = MAX(augment, left->fspace_augment); 889 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 890 augment = MAX(augment, right->fspace_augment); 891 return augment; 892 } 893 894 /* 895 * Update augmentation data in entry. 896 */ 897 void 898 uvm_map_addr_augment(struct vm_map_entry *entry) 899 { 900 vsize_t augment; 901 902 while (entry != NULL) { 903 /* Calculate value for augmentation. */ 904 augment = uvm_map_addr_augment_get(entry); 905 906 /* 907 * Descend update. 908 * Once we find an entry that already has the correct value, 909 * stop, since it means all its parents will use the correct 910 * value too. 911 */ 912 if (entry->fspace_augment == augment) 913 return; 914 entry->fspace_augment = augment; 915 entry = RBT_PARENT(uvm_map_addr, entry); 916 } 917 } 918 919 /* 920 * uvm_mapanon: establish a valid mapping in map for an anon 921 * 922 * => *addr and sz must be a multiple of PAGE_SIZE. 923 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 924 * => map must be unlocked. 925 * 926 * => align: align vaddr, must be a power-of-2. 927 * Align is only a hint and will be ignored if the alignment fails. 928 */ 929 int 930 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, 931 vsize_t align, unsigned int flags) 932 { 933 struct vm_map_entry *first, *last, *entry, *new; 934 struct uvm_map_deadq dead; 935 vm_prot_t prot; 936 vm_prot_t maxprot; 937 vm_inherit_t inherit; 938 int advice; 939 int error; 940 vaddr_t pmap_align, pmap_offset; 941 vaddr_t hint; 942 943 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE); 944 KASSERT(map != kernel_map); 945 KASSERT((map->flags & UVM_FLAG_HOLE) == 0); 946 947 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 948 splassert(IPL_NONE); 949 950 /* 951 * We use pmap_align and pmap_offset as alignment and offset variables. 952 * 953 * Because the align parameter takes precedence over pmap prefer, 954 * the pmap_align will need to be set to align, with pmap_offset = 0, 955 * if pmap_prefer will not align. 956 */ 957 pmap_align = MAX(align, PAGE_SIZE); 958 pmap_offset = 0; 959 960 /* Decode parameters. */ 961 prot = UVM_PROTECTION(flags); 962 maxprot = UVM_MAXPROTECTION(flags); 963 advice = UVM_ADVICE(flags); 964 inherit = UVM_INHERIT(flags); 965 error = 0; 966 hint = trunc_page(*addr); 967 TAILQ_INIT(&dead); 968 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 969 KASSERT((align & (align - 1)) == 0); 970 971 /* Check protection. */ 972 if ((prot & maxprot) != prot) 973 return EACCES; 974 975 /* 976 * Before grabbing the lock, allocate a map entry for later 977 * use to ensure we don't wait for memory while holding the 978 * vm_map_lock. 979 */ 980 new = uvm_mapent_alloc(map, flags); 981 if (new == NULL) 982 return(ENOMEM); 983 984 if (flags & UVM_FLAG_TRYLOCK) { 985 if (vm_map_lock_try(map) == FALSE) { 986 error = EFAULT; 987 goto out; 988 } 989 } else 990 vm_map_lock(map); 991 992 first = last = NULL; 993 if (flags & UVM_FLAG_FIXED) { 994 /* 995 * Fixed location. 996 * 997 * Note: we ignore align, pmap_prefer. 998 * Fill in first, last and *addr. 999 */ 1000 KASSERT((*addr & PAGE_MASK) == 0); 1001 1002 /* Check that the space is available. */ 1003 if (flags & UVM_FLAG_UNMAP) 1004 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1005 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1006 error = ENOMEM; 1007 goto unlock; 1008 } 1009 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1010 (align == 0 || (*addr & (align - 1)) == 0) && 1011 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1012 /* 1013 * Address used as hint. 1014 * 1015 * Note: we enforce the alignment restriction, 1016 * but ignore pmap_prefer. 1017 */ 1018 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1019 /* Run selection algorithm for executables. */ 1020 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1021 addr, sz, pmap_align, pmap_offset, prot, hint); 1022 1023 if (error != 0) 1024 goto unlock; 1025 } else { 1026 /* Update freelists from vmspace. */ 1027 uvm_map_vmspace_update(map, &dead, flags); 1028 1029 error = uvm_map_findspace(map, &first, &last, addr, sz, 1030 pmap_align, pmap_offset, prot, hint); 1031 1032 if (error != 0) 1033 goto unlock; 1034 } 1035 1036 /* Double-check if selected address doesn't cause overflow. */ 1037 if (*addr + sz < *addr) { 1038 error = ENOMEM; 1039 goto unlock; 1040 } 1041 1042 /* If we only want a query, return now. */ 1043 if (flags & UVM_FLAG_QUERY) { 1044 error = 0; 1045 goto unlock; 1046 } 1047 1048 /* 1049 * Create new entry. 1050 * first and last may be invalidated after this call. 1051 */ 1052 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1053 new); 1054 if (entry == NULL) { 1055 error = ENOMEM; 1056 goto unlock; 1057 } 1058 new = NULL; 1059 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1060 entry->object.uvm_obj = NULL; 1061 entry->offset = 0; 1062 entry->protection = prot; 1063 entry->max_protection = maxprot; 1064 entry->inheritance = inherit; 1065 entry->wired_count = 0; 1066 entry->advice = advice; 1067 if (flags & UVM_FLAG_COPYONW) { 1068 entry->etype |= UVM_ET_COPYONWRITE; 1069 if ((flags & UVM_FLAG_OVERLAY) == 0) 1070 entry->etype |= UVM_ET_NEEDSCOPY; 1071 } 1072 if (flags & UVM_FLAG_OVERLAY) { 1073 KERNEL_LOCK(); 1074 entry->aref.ar_pageoff = 0; 1075 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1076 KERNEL_UNLOCK(); 1077 } 1078 1079 /* Update map and process statistics. */ 1080 map->size += sz; 1081 ((struct vmspace *)map)->vm_dused += uvmspace_dused(map, *addr, *addr + sz); 1082 1083 unlock: 1084 vm_map_unlock(map); 1085 1086 /* 1087 * Remove dead entries. 1088 * 1089 * Dead entries may be the result of merging. 1090 * uvm_map_mkentry may also create dead entries, when it attempts to 1091 * destroy free-space entries. 1092 */ 1093 uvm_unmap_detach(&dead, 0); 1094 out: 1095 if (new) 1096 uvm_mapent_free(new); 1097 return error; 1098 } 1099 1100 /* 1101 * uvm_map: establish a valid mapping in map 1102 * 1103 * => *addr and sz must be a multiple of PAGE_SIZE. 1104 * => map must be unlocked. 1105 * => <uobj,uoffset> value meanings (4 cases): 1106 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 1107 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 1108 * [3] <uobj,uoffset> == normal mapping 1109 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 1110 * 1111 * case [4] is for kernel mappings where we don't know the offset until 1112 * we've found a virtual address. note that kernel object offsets are 1113 * always relative to vm_map_min(kernel_map). 1114 * 1115 * => align: align vaddr, must be a power-of-2. 1116 * Align is only a hint and will be ignored if the alignment fails. 1117 */ 1118 int 1119 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 1120 struct uvm_object *uobj, voff_t uoffset, 1121 vsize_t align, unsigned int flags) 1122 { 1123 struct vm_map_entry *first, *last, *entry, *new; 1124 struct uvm_map_deadq dead; 1125 vm_prot_t prot; 1126 vm_prot_t maxprot; 1127 vm_inherit_t inherit; 1128 int advice; 1129 int error; 1130 vaddr_t pmap_align, pmap_offset; 1131 vaddr_t hint; 1132 1133 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1134 splassert(IPL_NONE); 1135 else 1136 splassert(IPL_VM); 1137 1138 /* 1139 * We use pmap_align and pmap_offset as alignment and offset variables. 1140 * 1141 * Because the align parameter takes precedence over pmap prefer, 1142 * the pmap_align will need to be set to align, with pmap_offset = 0, 1143 * if pmap_prefer will not align. 1144 */ 1145 if (uoffset == UVM_UNKNOWN_OFFSET) { 1146 pmap_align = MAX(align, PAGE_SIZE); 1147 pmap_offset = 0; 1148 } else { 1149 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 1150 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 1151 1152 if (align == 0 || 1153 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 1154 /* pmap_offset satisfies align, no change. */ 1155 } else { 1156 /* Align takes precedence over pmap prefer. */ 1157 pmap_align = align; 1158 pmap_offset = 0; 1159 } 1160 } 1161 1162 /* Decode parameters. */ 1163 prot = UVM_PROTECTION(flags); 1164 maxprot = UVM_MAXPROTECTION(flags); 1165 advice = UVM_ADVICE(flags); 1166 inherit = UVM_INHERIT(flags); 1167 error = 0; 1168 hint = trunc_page(*addr); 1169 TAILQ_INIT(&dead); 1170 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 1171 KASSERT((align & (align - 1)) == 0); 1172 1173 /* Holes are incompatible with other types of mappings. */ 1174 if (flags & UVM_FLAG_HOLE) { 1175 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 1176 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 1177 } 1178 1179 /* Unset hint for kernel_map non-fixed allocations. */ 1180 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 1181 hint = 0; 1182 1183 /* Check protection. */ 1184 if ((prot & maxprot) != prot) 1185 return EACCES; 1186 1187 if (map == kernel_map && 1188 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1189 panic("uvm_map: kernel map W^X violation requested"); 1190 1191 /* 1192 * Before grabbing the lock, allocate a map entry for later 1193 * use to ensure we don't wait for memory while holding the 1194 * vm_map_lock. 1195 */ 1196 new = uvm_mapent_alloc(map, flags); 1197 if (new == NULL) 1198 return(ENOMEM); 1199 1200 if (flags & UVM_FLAG_TRYLOCK) { 1201 if (vm_map_lock_try(map) == FALSE) { 1202 error = EFAULT; 1203 goto out; 1204 } 1205 } else { 1206 vm_map_lock(map); 1207 } 1208 1209 first = last = NULL; 1210 if (flags & UVM_FLAG_FIXED) { 1211 /* 1212 * Fixed location. 1213 * 1214 * Note: we ignore align, pmap_prefer. 1215 * Fill in first, last and *addr. 1216 */ 1217 KASSERT((*addr & PAGE_MASK) == 0); 1218 1219 /* 1220 * Grow pmap to include allocated address. 1221 * If the growth fails, the allocation will fail too. 1222 */ 1223 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1224 uvm_maxkaddr < (*addr + sz)) { 1225 uvm_map_kmem_grow(map, &dead, 1226 *addr + sz - uvm_maxkaddr, flags); 1227 } 1228 1229 /* Check that the space is available. */ 1230 if (flags & UVM_FLAG_UNMAP) 1231 uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE); 1232 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1233 error = ENOMEM; 1234 goto unlock; 1235 } 1236 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1237 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1238 (align == 0 || (*addr & (align - 1)) == 0) && 1239 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1240 /* 1241 * Address used as hint. 1242 * 1243 * Note: we enforce the alignment restriction, 1244 * but ignore pmap_prefer. 1245 */ 1246 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1247 /* Run selection algorithm for executables. */ 1248 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1249 addr, sz, pmap_align, pmap_offset, prot, hint); 1250 1251 /* Grow kernel memory and try again. */ 1252 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1253 uvm_map_kmem_grow(map, &dead, sz, flags); 1254 1255 error = uvm_addr_invoke(map, map->uaddr_exe, 1256 &first, &last, addr, sz, 1257 pmap_align, pmap_offset, prot, hint); 1258 } 1259 1260 if (error != 0) 1261 goto unlock; 1262 } else { 1263 /* Update freelists from vmspace. */ 1264 if (map->flags & VM_MAP_ISVMSPACE) 1265 uvm_map_vmspace_update(map, &dead, flags); 1266 1267 error = uvm_map_findspace(map, &first, &last, addr, sz, 1268 pmap_align, pmap_offset, prot, hint); 1269 1270 /* Grow kernel memory and try again. */ 1271 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1272 uvm_map_kmem_grow(map, &dead, sz, flags); 1273 1274 error = uvm_map_findspace(map, &first, &last, addr, sz, 1275 pmap_align, pmap_offset, prot, hint); 1276 } 1277 1278 if (error != 0) 1279 goto unlock; 1280 } 1281 1282 /* Double-check if selected address doesn't cause overflow. */ 1283 if (*addr + sz < *addr) { 1284 error = ENOMEM; 1285 goto unlock; 1286 } 1287 1288 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1289 uvm_maxkaddr >= *addr + sz); 1290 1291 /* If we only want a query, return now. */ 1292 if (flags & UVM_FLAG_QUERY) { 1293 error = 0; 1294 goto unlock; 1295 } 1296 1297 if (uobj == NULL) 1298 uoffset = 0; 1299 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1300 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1301 uoffset = *addr - vm_map_min(kernel_map); 1302 } 1303 1304 /* 1305 * Create new entry. 1306 * first and last may be invalidated after this call. 1307 */ 1308 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1309 new); 1310 if (entry == NULL) { 1311 error = ENOMEM; 1312 goto unlock; 1313 } 1314 new = NULL; 1315 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1316 entry->object.uvm_obj = uobj; 1317 entry->offset = uoffset; 1318 entry->protection = prot; 1319 entry->max_protection = maxprot; 1320 entry->inheritance = inherit; 1321 entry->wired_count = 0; 1322 entry->advice = advice; 1323 if (uobj) 1324 entry->etype |= UVM_ET_OBJ; 1325 else if (flags & UVM_FLAG_HOLE) 1326 entry->etype |= UVM_ET_HOLE; 1327 if (flags & UVM_FLAG_NOFAULT) 1328 entry->etype |= UVM_ET_NOFAULT; 1329 if (flags & UVM_FLAG_COPYONW) { 1330 entry->etype |= UVM_ET_COPYONWRITE; 1331 if ((flags & UVM_FLAG_OVERLAY) == 0) 1332 entry->etype |= UVM_ET_NEEDSCOPY; 1333 } 1334 if (flags & UVM_FLAG_OVERLAY) { 1335 entry->aref.ar_pageoff = 0; 1336 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1337 } 1338 1339 /* Update map and process statistics. */ 1340 if (!(flags & UVM_FLAG_HOLE)) { 1341 map->size += sz; 1342 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) { 1343 ((struct vmspace *)map)->vm_dused += 1344 uvmspace_dused(map, *addr, *addr + sz); 1345 } 1346 } 1347 1348 /* 1349 * Try to merge entry. 1350 * 1351 * Userland allocations are kept separated most of the time. 1352 * Forego the effort of merging what most of the time can't be merged 1353 * and only try the merge if it concerns a kernel entry. 1354 */ 1355 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1356 (map->flags & VM_MAP_ISVMSPACE) == 0) 1357 uvm_mapent_tryjoin(map, entry, &dead); 1358 1359 unlock: 1360 vm_map_unlock(map); 1361 1362 /* 1363 * Remove dead entries. 1364 * 1365 * Dead entries may be the result of merging. 1366 * uvm_map_mkentry may also create dead entries, when it attempts to 1367 * destroy free-space entries. 1368 */ 1369 if (map->flags & VM_MAP_INTRSAFE) 1370 uvm_unmap_detach_intrsafe(&dead); 1371 else 1372 uvm_unmap_detach(&dead, 0); 1373 out: 1374 if (new) 1375 uvm_mapent_free(new); 1376 return error; 1377 } 1378 1379 /* 1380 * True iff e1 and e2 can be joined together. 1381 */ 1382 int 1383 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1384 struct vm_map_entry *e2) 1385 { 1386 KDASSERT(e1 != NULL && e2 != NULL); 1387 1388 /* Must be the same entry type and not have free memory between. */ 1389 if (e1->etype != e2->etype || e1->end != e2->start) 1390 return 0; 1391 1392 /* Submaps are never joined. */ 1393 if (UVM_ET_ISSUBMAP(e1)) 1394 return 0; 1395 1396 /* Never merge wired memory. */ 1397 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1398 return 0; 1399 1400 /* Protection, inheritance and advice must be equal. */ 1401 if (e1->protection != e2->protection || 1402 e1->max_protection != e2->max_protection || 1403 e1->inheritance != e2->inheritance || 1404 e1->advice != e2->advice) 1405 return 0; 1406 1407 /* If uvm_object: object itself and offsets within object must match. */ 1408 if (UVM_ET_ISOBJ(e1)) { 1409 if (e1->object.uvm_obj != e2->object.uvm_obj) 1410 return 0; 1411 if (e1->offset + (e1->end - e1->start) != e2->offset) 1412 return 0; 1413 } 1414 1415 /* 1416 * Cannot join shared amaps. 1417 * Note: no need to lock amap to look at refs, since we don't care 1418 * about its exact value. 1419 * If it is 1 (i.e. we have the only reference) it will stay there. 1420 */ 1421 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1422 return 0; 1423 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1424 return 0; 1425 1426 /* Apprently, e1 and e2 match. */ 1427 return 1; 1428 } 1429 1430 /* 1431 * Join support function. 1432 * 1433 * Returns the merged entry on succes. 1434 * Returns NULL if the merge failed. 1435 */ 1436 struct vm_map_entry* 1437 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1438 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1439 { 1440 struct uvm_addr_state *free; 1441 1442 /* 1443 * Merging is not supported for map entries that 1444 * contain an amap in e1. This should never happen 1445 * anyway, because only kernel entries are merged. 1446 * These do not contain amaps. 1447 * e2 contains no real information in its amap, 1448 * so it can be erased immediately. 1449 */ 1450 KASSERT(e1->aref.ar_amap == NULL); 1451 1452 /* 1453 * Don't drop obj reference: 1454 * uvm_unmap_detach will do this for us. 1455 */ 1456 free = uvm_map_uaddr_e(map, e1); 1457 uvm_mapent_free_remove(map, free, e1); 1458 1459 free = uvm_map_uaddr_e(map, e2); 1460 uvm_mapent_free_remove(map, free, e2); 1461 uvm_mapent_addr_remove(map, e2); 1462 e1->end = e2->end; 1463 e1->guard = e2->guard; 1464 e1->fspace = e2->fspace; 1465 uvm_mapent_free_insert(map, free, e1); 1466 1467 DEAD_ENTRY_PUSH(dead, e2); 1468 return e1; 1469 } 1470 1471 /* 1472 * Attempt forward and backward joining of entry. 1473 * 1474 * Returns entry after joins. 1475 * We are guaranteed that the amap of entry is either non-existant or 1476 * has never been used. 1477 */ 1478 struct vm_map_entry* 1479 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1480 struct uvm_map_deadq *dead) 1481 { 1482 struct vm_map_entry *other; 1483 struct vm_map_entry *merged; 1484 1485 /* Merge with previous entry. */ 1486 other = RBT_PREV(uvm_map_addr, entry); 1487 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1488 merged = uvm_mapent_merge(map, other, entry, dead); 1489 if (merged) 1490 entry = merged; 1491 } 1492 1493 /* 1494 * Merge with next entry. 1495 * 1496 * Because amap can only extend forward and the next entry 1497 * probably contains sensible info, only perform forward merging 1498 * in the absence of an amap. 1499 */ 1500 other = RBT_NEXT(uvm_map_addr, entry); 1501 if (other && entry->aref.ar_amap == NULL && 1502 other->aref.ar_amap == NULL && 1503 uvm_mapent_isjoinable(map, entry, other)) { 1504 merged = uvm_mapent_merge(map, entry, other, dead); 1505 if (merged) 1506 entry = merged; 1507 } 1508 1509 return entry; 1510 } 1511 1512 /* 1513 * Kill entries that are no longer in a map. 1514 */ 1515 void 1516 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1517 { 1518 struct vm_map_entry *entry; 1519 int waitok = flags & UVM_PLA_WAITOK; 1520 1521 if (TAILQ_EMPTY(deadq)) 1522 return; 1523 1524 KERNEL_LOCK(); 1525 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1526 if (waitok) 1527 uvm_pause(); 1528 /* Drop reference to amap, if we've got one. */ 1529 if (entry->aref.ar_amap) 1530 amap_unref(entry->aref.ar_amap, 1531 entry->aref.ar_pageoff, 1532 atop(entry->end - entry->start), 1533 flags & AMAP_REFALL); 1534 1535 /* Drop reference to our backing object, if we've got one. */ 1536 if (UVM_ET_ISSUBMAP(entry)) { 1537 /* ... unlikely to happen, but play it safe */ 1538 uvm_map_deallocate(entry->object.sub_map); 1539 } else if (UVM_ET_ISOBJ(entry) && 1540 entry->object.uvm_obj->pgops->pgo_detach) { 1541 entry->object.uvm_obj->pgops->pgo_detach( 1542 entry->object.uvm_obj); 1543 } 1544 1545 /* Step to next. */ 1546 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1547 uvm_mapent_free(entry); 1548 } 1549 KERNEL_UNLOCK(); 1550 } 1551 1552 void 1553 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq) 1554 { 1555 struct vm_map_entry *entry; 1556 1557 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1558 KASSERT(entry->aref.ar_amap == NULL); 1559 KASSERT(!UVM_ET_ISSUBMAP(entry)); 1560 KASSERT(!UVM_ET_ISOBJ(entry)); 1561 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1562 uvm_mapent_free(entry); 1563 } 1564 } 1565 1566 /* 1567 * Create and insert new entry. 1568 * 1569 * Returned entry contains new addresses and is inserted properly in the tree. 1570 * first and last are (probably) no longer valid. 1571 */ 1572 struct vm_map_entry* 1573 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1574 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1575 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1576 { 1577 struct vm_map_entry *entry, *prev; 1578 struct uvm_addr_state *free; 1579 vaddr_t min, max; /* free space boundaries for new entry */ 1580 1581 KDASSERT(map != NULL); 1582 KDASSERT(first != NULL); 1583 KDASSERT(last != NULL); 1584 KDASSERT(dead != NULL); 1585 KDASSERT(sz > 0); 1586 KDASSERT(addr + sz > addr); 1587 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1588 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1589 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1590 uvm_tree_sanity(map, __FILE__, __LINE__); 1591 1592 min = addr + sz; 1593 max = VMMAP_FREE_END(last); 1594 1595 /* Initialize new entry. */ 1596 if (new == NULL) 1597 entry = uvm_mapent_alloc(map, flags); 1598 else 1599 entry = new; 1600 if (entry == NULL) 1601 return NULL; 1602 entry->offset = 0; 1603 entry->etype = 0; 1604 entry->wired_count = 0; 1605 entry->aref.ar_pageoff = 0; 1606 entry->aref.ar_amap = NULL; 1607 1608 entry->start = addr; 1609 entry->end = min; 1610 entry->guard = 0; 1611 entry->fspace = 0; 1612 1613 /* Reset free space in first. */ 1614 free = uvm_map_uaddr_e(map, first); 1615 uvm_mapent_free_remove(map, free, first); 1616 first->guard = 0; 1617 first->fspace = 0; 1618 1619 /* 1620 * Remove all entries that are fully replaced. 1621 * We are iterating using last in reverse order. 1622 */ 1623 for (; first != last; last = prev) { 1624 prev = RBT_PREV(uvm_map_addr, last); 1625 1626 KDASSERT(last->start == last->end); 1627 free = uvm_map_uaddr_e(map, last); 1628 uvm_mapent_free_remove(map, free, last); 1629 uvm_mapent_addr_remove(map, last); 1630 DEAD_ENTRY_PUSH(dead, last); 1631 } 1632 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1633 if (first->start == addr) { 1634 uvm_mapent_addr_remove(map, first); 1635 DEAD_ENTRY_PUSH(dead, first); 1636 } else { 1637 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1638 addr, flags); 1639 } 1640 1641 /* Finally, link in entry. */ 1642 uvm_mapent_addr_insert(map, entry); 1643 uvm_map_fix_space(map, entry, min, max, flags); 1644 1645 uvm_tree_sanity(map, __FILE__, __LINE__); 1646 return entry; 1647 } 1648 1649 1650 /* 1651 * uvm_mapent_alloc: allocate a map entry 1652 */ 1653 struct vm_map_entry * 1654 uvm_mapent_alloc(struct vm_map *map, int flags) 1655 { 1656 struct vm_map_entry *me, *ne; 1657 int pool_flags; 1658 int i; 1659 1660 pool_flags = PR_WAITOK; 1661 if (flags & UVM_FLAG_TRYLOCK) 1662 pool_flags = PR_NOWAIT; 1663 1664 if (map->flags & VM_MAP_INTRSAFE || cold) { 1665 mtx_enter(&uvm_kmapent_mtx); 1666 if (SLIST_EMPTY(&uvm.kentry_free)) { 1667 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1668 &kd_nowait); 1669 if (ne == NULL) 1670 panic("uvm_mapent_alloc: cannot allocate map " 1671 "entry"); 1672 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) { 1673 SLIST_INSERT_HEAD(&uvm.kentry_free, 1674 &ne[i], daddrs.addr_kentry); 1675 } 1676 if (ratecheck(&uvm_kmapent_last_warn_time, 1677 &uvm_kmapent_warn_rate)) 1678 printf("uvm_mapent_alloc: out of static " 1679 "map entries\n"); 1680 } 1681 me = SLIST_FIRST(&uvm.kentry_free); 1682 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry); 1683 uvmexp.kmapent++; 1684 mtx_leave(&uvm_kmapent_mtx); 1685 me->flags = UVM_MAP_STATIC; 1686 } else if (map == kernel_map) { 1687 splassert(IPL_NONE); 1688 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1689 if (me == NULL) 1690 goto out; 1691 me->flags = UVM_MAP_KMEM; 1692 } else { 1693 splassert(IPL_NONE); 1694 me = pool_get(&uvm_map_entry_pool, pool_flags); 1695 if (me == NULL) 1696 goto out; 1697 me->flags = 0; 1698 } 1699 1700 if (me != NULL) { 1701 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF); 1702 } 1703 1704 out: 1705 return(me); 1706 } 1707 1708 /* 1709 * uvm_mapent_free: free map entry 1710 * 1711 * => XXX: static pool for kernel map? 1712 */ 1713 void 1714 uvm_mapent_free(struct vm_map_entry *me) 1715 { 1716 if (me->flags & UVM_MAP_STATIC) { 1717 mtx_enter(&uvm_kmapent_mtx); 1718 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry); 1719 uvmexp.kmapent--; 1720 mtx_leave(&uvm_kmapent_mtx); 1721 } else if (me->flags & UVM_MAP_KMEM) { 1722 splassert(IPL_NONE); 1723 pool_put(&uvm_map_entry_kmem_pool, me); 1724 } else { 1725 splassert(IPL_NONE); 1726 pool_put(&uvm_map_entry_pool, me); 1727 } 1728 } 1729 1730 /* 1731 * uvm_map_lookup_entry: find map entry at or before an address. 1732 * 1733 * => map must at least be read-locked by caller 1734 * => entry is returned in "entry" 1735 * => return value is true if address is in the returned entry 1736 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1737 * returned for those mappings. 1738 */ 1739 boolean_t 1740 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1741 struct vm_map_entry **entry) 1742 { 1743 *entry = uvm_map_entrybyaddr(&map->addr, address); 1744 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1745 (*entry)->start <= address && (*entry)->end > address; 1746 } 1747 1748 /* 1749 * uvm_map_pie: return a random load address for a PIE executable 1750 * properly aligned. 1751 */ 1752 #ifndef VM_PIE_MAX_ADDR 1753 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1754 #endif 1755 1756 #ifndef VM_PIE_MIN_ADDR 1757 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1758 #endif 1759 1760 #ifndef VM_PIE_MIN_ALIGN 1761 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1762 #endif 1763 1764 vaddr_t 1765 uvm_map_pie(vaddr_t align) 1766 { 1767 vaddr_t addr, space, min; 1768 1769 align = MAX(align, VM_PIE_MIN_ALIGN); 1770 1771 /* round up to next alignment */ 1772 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1773 1774 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 1775 return (align); 1776 1777 space = (VM_PIE_MAX_ADDR - min) / align; 1778 space = MIN(space, (u_int32_t)-1); 1779 1780 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 1781 addr += min; 1782 1783 return (addr); 1784 } 1785 1786 void 1787 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 1788 { 1789 struct uvm_map_deadq dead; 1790 1791 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 1792 (end & (vaddr_t)PAGE_MASK) == 0); 1793 TAILQ_INIT(&dead); 1794 vm_map_lock(map); 1795 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE); 1796 vm_map_unlock(map); 1797 1798 if (map->flags & VM_MAP_INTRSAFE) 1799 uvm_unmap_detach_intrsafe(&dead); 1800 else 1801 uvm_unmap_detach(&dead, 0); 1802 } 1803 1804 /* 1805 * Mark entry as free. 1806 * 1807 * entry will be put on the dead list. 1808 * The free space will be merged into the previous or a new entry, 1809 * unless markfree is false. 1810 */ 1811 void 1812 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 1813 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 1814 boolean_t markfree) 1815 { 1816 struct uvm_addr_state *free; 1817 struct vm_map_entry *prev; 1818 vaddr_t addr; /* Start of freed range. */ 1819 vaddr_t end; /* End of freed range. */ 1820 1821 prev = *prev_ptr; 1822 if (prev == entry) 1823 *prev_ptr = prev = NULL; 1824 1825 if (prev == NULL || 1826 VMMAP_FREE_END(prev) != entry->start) 1827 prev = RBT_PREV(uvm_map_addr, entry); 1828 1829 /* Entry is describing only free memory and has nothing to drain into. */ 1830 if (prev == NULL && entry->start == entry->end && markfree) { 1831 *prev_ptr = entry; 1832 return; 1833 } 1834 1835 addr = entry->start; 1836 end = VMMAP_FREE_END(entry); 1837 free = uvm_map_uaddr_e(map, entry); 1838 uvm_mapent_free_remove(map, free, entry); 1839 uvm_mapent_addr_remove(map, entry); 1840 DEAD_ENTRY_PUSH(dead, entry); 1841 1842 if (markfree) { 1843 if (prev) { 1844 free = uvm_map_uaddr_e(map, prev); 1845 uvm_mapent_free_remove(map, free, prev); 1846 } 1847 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 1848 } 1849 } 1850 1851 /* 1852 * Unwire and release referenced amap and object from map entry. 1853 */ 1854 void 1855 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 1856 { 1857 /* Unwire removed map entry. */ 1858 if (VM_MAPENT_ISWIRED(entry)) { 1859 KERNEL_LOCK(); 1860 entry->wired_count = 0; 1861 uvm_fault_unwire_locked(map, entry->start, entry->end); 1862 KERNEL_UNLOCK(); 1863 } 1864 1865 /* Entry-type specific code. */ 1866 if (UVM_ET_ISHOLE(entry)) { 1867 /* Nothing to be done for holes. */ 1868 } else if (map->flags & VM_MAP_INTRSAFE) { 1869 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1870 uvm_km_pgremove_intrsafe(entry->start, entry->end); 1871 pmap_kremove(entry->start, entry->end - entry->start); 1872 } else if (UVM_ET_ISOBJ(entry) && 1873 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 1874 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1875 /* 1876 * Note: kernel object mappings are currently used in 1877 * two ways: 1878 * [1] "normal" mappings of pages in the kernel object 1879 * [2] uvm_km_valloc'd allocations in which we 1880 * pmap_enter in some non-kernel-object page 1881 * (e.g. vmapbuf). 1882 * 1883 * for case [1], we need to remove the mapping from 1884 * the pmap and then remove the page from the kernel 1885 * object (because, once pages in a kernel object are 1886 * unmapped they are no longer needed, unlike, say, 1887 * a vnode where you might want the data to persist 1888 * until flushed out of a queue). 1889 * 1890 * for case [2], we need to remove the mapping from 1891 * the pmap. there shouldn't be any pages at the 1892 * specified offset in the kernel object [but it 1893 * doesn't hurt to call uvm_km_pgremove just to be 1894 * safe?] 1895 * 1896 * uvm_km_pgremove currently does the following: 1897 * for pages in the kernel object range: 1898 * - drops the swap slot 1899 * - uvm_pagefree the page 1900 * 1901 * note there is version of uvm_km_pgremove() that 1902 * is used for "intrsafe" objects. 1903 */ 1904 /* 1905 * remove mappings from pmap and drop the pages 1906 * from the object. offsets are always relative 1907 * to vm_map_min(kernel_map). 1908 */ 1909 pmap_remove(pmap_kernel(), entry->start, entry->end); 1910 uvm_km_pgremove(entry->object.uvm_obj, 1911 entry->start - vm_map_min(kernel_map), 1912 entry->end - vm_map_min(kernel_map)); 1913 1914 /* 1915 * null out kernel_object reference, we've just 1916 * dropped it 1917 */ 1918 entry->etype &= ~UVM_ET_OBJ; 1919 entry->object.uvm_obj = NULL; /* to be safe */ 1920 } else { 1921 /* remove mappings the standard way. */ 1922 pmap_remove(map->pmap, entry->start, entry->end); 1923 } 1924 } 1925 1926 /* 1927 * Remove all entries from start to end. 1928 * 1929 * If remove_holes, then remove ET_HOLE entries as well. 1930 * If markfree, entry will be properly marked free, otherwise, no replacement 1931 * entry will be put in the tree (corrupting the tree). 1932 */ 1933 void 1934 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 1935 struct uvm_map_deadq *dead, boolean_t remove_holes, 1936 boolean_t markfree) 1937 { 1938 struct vm_map_entry *prev_hint, *next, *entry; 1939 1940 start = MAX(start, map->min_offset); 1941 end = MIN(end, map->max_offset); 1942 if (start >= end) 1943 return; 1944 1945 if ((map->flags & VM_MAP_INTRSAFE) == 0) 1946 splassert(IPL_NONE); 1947 else 1948 splassert(IPL_VM); 1949 1950 /* Find first affected entry. */ 1951 entry = uvm_map_entrybyaddr(&map->addr, start); 1952 KDASSERT(entry != NULL && entry->start <= start); 1953 if (entry->end <= start && markfree) 1954 entry = RBT_NEXT(uvm_map_addr, entry); 1955 else 1956 UVM_MAP_CLIP_START(map, entry, start); 1957 1958 /* 1959 * Iterate entries until we reach end address. 1960 * prev_hint hints where the freed space can be appended to. 1961 */ 1962 prev_hint = NULL; 1963 for (; entry != NULL && entry->start < end; entry = next) { 1964 KDASSERT(entry->start >= start); 1965 if (entry->end > end || !markfree) 1966 UVM_MAP_CLIP_END(map, entry, end); 1967 KDASSERT(entry->start >= start && entry->end <= end); 1968 next = RBT_NEXT(uvm_map_addr, entry); 1969 1970 /* Don't remove holes unless asked to do so. */ 1971 if (UVM_ET_ISHOLE(entry)) { 1972 if (!remove_holes) { 1973 prev_hint = entry; 1974 continue; 1975 } 1976 } 1977 1978 /* Kill entry. */ 1979 uvm_unmap_kill_entry(map, entry); 1980 1981 /* Update space usage. */ 1982 if ((map->flags & VM_MAP_ISVMSPACE) && 1983 entry->object.uvm_obj == NULL && 1984 !UVM_ET_ISHOLE(entry)) { 1985 ((struct vmspace *)map)->vm_dused -= 1986 uvmspace_dused(map, entry->start, entry->end); 1987 } 1988 if (!UVM_ET_ISHOLE(entry)) 1989 map->size -= entry->end - entry->start; 1990 1991 /* Actual removal of entry. */ 1992 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 1993 } 1994 1995 pmap_update(vm_map_pmap(map)); 1996 1997 #ifdef VMMAP_DEBUG 1998 if (markfree) { 1999 for (entry = uvm_map_entrybyaddr(&map->addr, start); 2000 entry != NULL && entry->start < end; 2001 entry = RBT_NEXT(uvm_map_addr, entry)) { 2002 KDASSERT(entry->end <= start || 2003 entry->start == entry->end || 2004 UVM_ET_ISHOLE(entry)); 2005 } 2006 } else { 2007 vaddr_t a; 2008 for (a = start; a < end; a += PAGE_SIZE) 2009 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 2010 } 2011 #endif 2012 } 2013 2014 /* 2015 * Mark all entries from first until end (exclusive) as pageable. 2016 * 2017 * Lock must be exclusive on entry and will not be touched. 2018 */ 2019 void 2020 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 2021 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 2022 { 2023 struct vm_map_entry *iter; 2024 2025 for (iter = first; iter != end; 2026 iter = RBT_NEXT(uvm_map_addr, iter)) { 2027 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2028 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2029 continue; 2030 2031 iter->wired_count = 0; 2032 uvm_fault_unwire_locked(map, iter->start, iter->end); 2033 } 2034 } 2035 2036 /* 2037 * Mark all entries from first until end (exclusive) as wired. 2038 * 2039 * Lockflags determines the lock state on return from this function. 2040 * Lock must be exclusive on entry. 2041 */ 2042 int 2043 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 2044 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 2045 int lockflags) 2046 { 2047 struct vm_map_entry *iter; 2048 #ifdef DIAGNOSTIC 2049 unsigned int timestamp_save; 2050 #endif 2051 int error; 2052 2053 /* 2054 * Wire pages in two passes: 2055 * 2056 * 1: holding the write lock, we create any anonymous maps that need 2057 * to be created. then we clip each map entry to the region to 2058 * be wired and increment its wiring count. 2059 * 2060 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault 2061 * in the pages for any newly wired area (wired_count == 1). 2062 * 2063 * downgrading to a read lock for uvm_fault_wire avoids a possible 2064 * deadlock with another thread that may have faulted on one of 2065 * the pages to be wired (it would mark the page busy, blocking 2066 * us, then in turn block on the map lock that we hold). 2067 * because we keep the read lock on the map, the copy-on-write 2068 * status of the entries we modify here cannot change. 2069 */ 2070 for (iter = first; iter != end; 2071 iter = RBT_NEXT(uvm_map_addr, iter)) { 2072 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2073 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2074 iter->protection == PROT_NONE) 2075 continue; 2076 2077 /* 2078 * Perform actions of vm_map_lookup that need the write lock. 2079 * - create an anonymous map for copy-on-write 2080 * - anonymous map for zero-fill 2081 * Skip submaps. 2082 */ 2083 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 2084 UVM_ET_ISNEEDSCOPY(iter) && 2085 ((iter->protection & PROT_WRITE) || 2086 iter->object.uvm_obj == NULL)) { 2087 amap_copy(map, iter, M_WAITOK, TRUE, 2088 iter->start, iter->end); 2089 } 2090 iter->wired_count++; 2091 } 2092 2093 /* 2094 * Pass 2. 2095 */ 2096 #ifdef DIAGNOSTIC 2097 timestamp_save = map->timestamp; 2098 #endif 2099 vm_map_busy(map); 2100 vm_map_downgrade(map); 2101 2102 error = 0; 2103 for (iter = first; error == 0 && iter != end; 2104 iter = RBT_NEXT(uvm_map_addr, iter)) { 2105 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2106 iter->protection == PROT_NONE) 2107 continue; 2108 2109 error = uvm_fault_wire(map, iter->start, iter->end, 2110 iter->protection); 2111 } 2112 2113 if (error) { 2114 /* 2115 * uvm_fault_wire failure 2116 * 2117 * Reacquire lock and undo our work. 2118 */ 2119 vm_map_upgrade(map); 2120 vm_map_unbusy(map); 2121 #ifdef DIAGNOSTIC 2122 if (timestamp_save != map->timestamp) 2123 panic("uvm_map_pageable_wire: stale map"); 2124 #endif 2125 2126 /* 2127 * first is no longer needed to restart loops. 2128 * Use it as iterator to unmap successful mappings. 2129 */ 2130 for (; first != iter; 2131 first = RBT_NEXT(uvm_map_addr, first)) { 2132 if (UVM_ET_ISHOLE(first) || 2133 first->start == first->end || 2134 first->protection == PROT_NONE) 2135 continue; 2136 2137 first->wired_count--; 2138 if (!VM_MAPENT_ISWIRED(first)) { 2139 uvm_fault_unwire_locked(map, 2140 iter->start, iter->end); 2141 } 2142 } 2143 2144 /* decrease counter in the rest of the entries */ 2145 for (; iter != end; 2146 iter = RBT_NEXT(uvm_map_addr, iter)) { 2147 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2148 iter->protection == PROT_NONE) 2149 continue; 2150 2151 iter->wired_count--; 2152 } 2153 2154 if ((lockflags & UVM_LK_EXIT) == 0) 2155 vm_map_unlock(map); 2156 return error; 2157 } 2158 2159 /* We are currently holding a read lock. */ 2160 if ((lockflags & UVM_LK_EXIT) == 0) { 2161 vm_map_unbusy(map); 2162 vm_map_unlock_read(map); 2163 } else { 2164 vm_map_upgrade(map); 2165 vm_map_unbusy(map); 2166 #ifdef DIAGNOSTIC 2167 if (timestamp_save != map->timestamp) 2168 panic("uvm_map_pageable_wire: stale map"); 2169 #endif 2170 } 2171 return 0; 2172 } 2173 2174 /* 2175 * uvm_map_pageable: set pageability of a range in a map. 2176 * 2177 * Flags: 2178 * UVM_LK_ENTER: map is already locked by caller 2179 * UVM_LK_EXIT: don't unlock map on exit 2180 * 2181 * The full range must be in use (entries may not have fspace != 0). 2182 * UVM_ET_HOLE counts as unmapped. 2183 */ 2184 int 2185 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2186 boolean_t new_pageable, int lockflags) 2187 { 2188 struct vm_map_entry *first, *last, *tmp; 2189 int error; 2190 2191 start = trunc_page(start); 2192 end = round_page(end); 2193 2194 if (start > end) 2195 return EINVAL; 2196 if (start == end) 2197 return 0; /* nothing to do */ 2198 if (start < map->min_offset) 2199 return EFAULT; /* why? see first XXX below */ 2200 if (end > map->max_offset) 2201 return EINVAL; /* why? see second XXX below */ 2202 2203 KASSERT(map->flags & VM_MAP_PAGEABLE); 2204 if ((lockflags & UVM_LK_ENTER) == 0) 2205 vm_map_lock(map); 2206 2207 /* 2208 * Find first entry. 2209 * 2210 * Initial test on start is different, because of the different 2211 * error returned. Rest is tested further down. 2212 */ 2213 first = uvm_map_entrybyaddr(&map->addr, start); 2214 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2215 /* 2216 * XXX if the first address is not mapped, it is EFAULT? 2217 */ 2218 error = EFAULT; 2219 goto out; 2220 } 2221 2222 /* Check that the range has no holes. */ 2223 for (last = first; last != NULL && last->start < end; 2224 last = RBT_NEXT(uvm_map_addr, last)) { 2225 if (UVM_ET_ISHOLE(last) || 2226 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2227 /* 2228 * XXX unmapped memory in range, why is it EINVAL 2229 * instead of EFAULT? 2230 */ 2231 error = EINVAL; 2232 goto out; 2233 } 2234 } 2235 2236 /* 2237 * Last ended at the first entry after the range. 2238 * Move back one step. 2239 * 2240 * Note that last may be NULL. 2241 */ 2242 if (last == NULL) { 2243 last = RBT_MAX(uvm_map_addr, &map->addr); 2244 if (last->end < end) { 2245 error = EINVAL; 2246 goto out; 2247 } 2248 } else { 2249 KASSERT(last != first); 2250 last = RBT_PREV(uvm_map_addr, last); 2251 } 2252 2253 /* Wire/unwire pages here. */ 2254 if (new_pageable) { 2255 /* 2256 * Mark pageable. 2257 * entries that are not wired are untouched. 2258 */ 2259 if (VM_MAPENT_ISWIRED(first)) 2260 UVM_MAP_CLIP_START(map, first, start); 2261 /* 2262 * Split last at end. 2263 * Make tmp be the first entry after what is to be touched. 2264 * If last is not wired, don't touch it. 2265 */ 2266 if (VM_MAPENT_ISWIRED(last)) { 2267 UVM_MAP_CLIP_END(map, last, end); 2268 tmp = RBT_NEXT(uvm_map_addr, last); 2269 } else 2270 tmp = last; 2271 2272 uvm_map_pageable_pgon(map, first, tmp, start, end); 2273 error = 0; 2274 2275 out: 2276 if ((lockflags & UVM_LK_EXIT) == 0) 2277 vm_map_unlock(map); 2278 return error; 2279 } else { 2280 /* 2281 * Mark entries wired. 2282 * entries are always touched (because recovery needs this). 2283 */ 2284 if (!VM_MAPENT_ISWIRED(first)) 2285 UVM_MAP_CLIP_START(map, first, start); 2286 /* 2287 * Split last at end. 2288 * Make tmp be the first entry after what is to be touched. 2289 * If last is not wired, don't touch it. 2290 */ 2291 if (!VM_MAPENT_ISWIRED(last)) { 2292 UVM_MAP_CLIP_END(map, last, end); 2293 tmp = RBT_NEXT(uvm_map_addr, last); 2294 } else 2295 tmp = last; 2296 2297 return uvm_map_pageable_wire(map, first, tmp, start, end, 2298 lockflags); 2299 } 2300 } 2301 2302 /* 2303 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2304 * all mapped regions. 2305 * 2306 * Map must not be locked. 2307 * If no flags are specified, all ragions are unwired. 2308 */ 2309 int 2310 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2311 { 2312 vsize_t size; 2313 struct vm_map_entry *iter; 2314 2315 KASSERT(map->flags & VM_MAP_PAGEABLE); 2316 vm_map_lock(map); 2317 2318 if (flags == 0) { 2319 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr), 2320 NULL, map->min_offset, map->max_offset); 2321 2322 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2323 vm_map_unlock(map); 2324 return 0; 2325 } 2326 2327 if (flags & MCL_FUTURE) 2328 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2329 if (!(flags & MCL_CURRENT)) { 2330 vm_map_unlock(map); 2331 return 0; 2332 } 2333 2334 /* 2335 * Count number of pages in all non-wired entries. 2336 * If the number exceeds the limit, abort. 2337 */ 2338 size = 0; 2339 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2340 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2341 continue; 2342 2343 size += iter->end - iter->start; 2344 } 2345 2346 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2347 vm_map_unlock(map); 2348 return ENOMEM; 2349 } 2350 2351 /* XXX non-pmap_wired_count case must be handled by caller */ 2352 #ifdef pmap_wired_count 2353 if (limit != 0 && 2354 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2355 vm_map_unlock(map); 2356 return ENOMEM; 2357 } 2358 #endif 2359 2360 /* 2361 * uvm_map_pageable_wire will release lcok 2362 */ 2363 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr), 2364 NULL, map->min_offset, map->max_offset, 0); 2365 } 2366 2367 /* 2368 * Initialize map. 2369 * 2370 * Allocates sufficient entries to describe the free memory in the map. 2371 */ 2372 void 2373 uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags) 2374 { 2375 int i; 2376 2377 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2378 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2379 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2380 2381 /* 2382 * Update parameters. 2383 * 2384 * This code handles (vaddr_t)-1 and other page mask ending addresses 2385 * properly. 2386 * We lose the top page if the full virtual address space is used. 2387 */ 2388 if (max & (vaddr_t)PAGE_MASK) { 2389 max += 1; 2390 if (max == 0) /* overflow */ 2391 max -= PAGE_SIZE; 2392 } 2393 2394 RBT_INIT(uvm_map_addr, &map->addr); 2395 map->uaddr_exe = NULL; 2396 for (i = 0; i < nitems(map->uaddr_any); ++i) 2397 map->uaddr_any[i] = NULL; 2398 map->uaddr_brk_stack = NULL; 2399 2400 map->size = 0; 2401 map->ref_count = 0; 2402 map->min_offset = min; 2403 map->max_offset = max; 2404 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2405 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2406 map->flags = flags; 2407 map->timestamp = 0; 2408 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK); 2409 mtx_init(&map->mtx, IPL_VM); 2410 mtx_init(&map->flags_lock, IPL_VM); 2411 2412 /* Configure the allocators. */ 2413 if (flags & VM_MAP_ISVMSPACE) 2414 uvm_map_setup_md(map); 2415 else 2416 map->uaddr_any[3] = &uaddr_kbootstrap; 2417 2418 /* 2419 * Fill map entries. 2420 * We do not need to write-lock the map here because only the current 2421 * thread sees it right now. Initialize ref_count to 0 above to avoid 2422 * bogus triggering of lock-not-held assertions. 2423 */ 2424 uvm_map_setup_entries(map); 2425 uvm_tree_sanity(map, __FILE__, __LINE__); 2426 map->ref_count = 1; 2427 } 2428 2429 /* 2430 * Destroy the map. 2431 * 2432 * This is the inverse operation to uvm_map_setup. 2433 */ 2434 void 2435 uvm_map_teardown(struct vm_map *map) 2436 { 2437 struct uvm_map_deadq dead_entries; 2438 struct vm_map_entry *entry, *tmp; 2439 #ifdef VMMAP_DEBUG 2440 size_t numq, numt; 2441 #endif 2442 int i; 2443 2444 KERNEL_ASSERT_LOCKED(); 2445 KERNEL_UNLOCK(); 2446 KERNEL_ASSERT_UNLOCKED(); 2447 2448 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2449 2450 /* Remove address selectors. */ 2451 uvm_addr_destroy(map->uaddr_exe); 2452 map->uaddr_exe = NULL; 2453 for (i = 0; i < nitems(map->uaddr_any); i++) { 2454 uvm_addr_destroy(map->uaddr_any[i]); 2455 map->uaddr_any[i] = NULL; 2456 } 2457 uvm_addr_destroy(map->uaddr_brk_stack); 2458 map->uaddr_brk_stack = NULL; 2459 2460 /* 2461 * Remove entries. 2462 * 2463 * The following is based on graph breadth-first search. 2464 * 2465 * In color terms: 2466 * - the dead_entries set contains all nodes that are reachable 2467 * (i.e. both the black and the grey nodes) 2468 * - any entry not in dead_entries is white 2469 * - any entry that appears in dead_entries before entry, 2470 * is black, the rest is grey. 2471 * The set [entry, end] is also referred to as the wavefront. 2472 * 2473 * Since the tree is always a fully connected graph, the breadth-first 2474 * search guarantees that each vmmap_entry is visited exactly once. 2475 * The vm_map is broken down in linear time. 2476 */ 2477 TAILQ_INIT(&dead_entries); 2478 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL) 2479 DEAD_ENTRY_PUSH(&dead_entries, entry); 2480 while (entry != NULL) { 2481 sched_pause(yield); 2482 uvm_unmap_kill_entry(map, entry); 2483 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL) 2484 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2485 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 2486 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2487 /* Update wave-front. */ 2488 entry = TAILQ_NEXT(entry, dfree.deadq); 2489 } 2490 2491 #ifdef VMMAP_DEBUG 2492 numt = numq = 0; 2493 RBT_FOREACH(entry, uvm_map_addr, &map->addr) 2494 numt++; 2495 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2496 numq++; 2497 KASSERT(numt == numq); 2498 #endif 2499 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); 2500 2501 KERNEL_LOCK(); 2502 2503 pmap_destroy(map->pmap); 2504 map->pmap = NULL; 2505 } 2506 2507 /* 2508 * Populate map with free-memory entries. 2509 * 2510 * Map must be initialized and empty. 2511 */ 2512 void 2513 uvm_map_setup_entries(struct vm_map *map) 2514 { 2515 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 2516 2517 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2518 } 2519 2520 /* 2521 * Split entry at given address. 2522 * 2523 * orig: entry that is to be split. 2524 * next: a newly allocated map entry that is not linked. 2525 * split: address at which the split is done. 2526 */ 2527 void 2528 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2529 struct vm_map_entry *next, vaddr_t split) 2530 { 2531 struct uvm_addr_state *free, *free_before; 2532 vsize_t adj; 2533 2534 if ((split & PAGE_MASK) != 0) { 2535 panic("uvm_map_splitentry: split address 0x%lx " 2536 "not on page boundary!", split); 2537 } 2538 KDASSERT(map != NULL && orig != NULL && next != NULL); 2539 uvm_tree_sanity(map, __FILE__, __LINE__); 2540 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2541 2542 #ifdef VMMAP_DEBUG 2543 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig); 2544 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next); 2545 #endif /* VMMAP_DEBUG */ 2546 2547 /* 2548 * Free space will change, unlink from free space tree. 2549 */ 2550 free = uvm_map_uaddr_e(map, orig); 2551 uvm_mapent_free_remove(map, free, orig); 2552 2553 adj = split - orig->start; 2554 2555 uvm_mapent_copy(orig, next); 2556 if (split >= orig->end) { 2557 next->etype = 0; 2558 next->offset = 0; 2559 next->wired_count = 0; 2560 next->start = next->end = split; 2561 next->guard = 0; 2562 next->fspace = VMMAP_FREE_END(orig) - split; 2563 next->aref.ar_amap = NULL; 2564 next->aref.ar_pageoff = 0; 2565 orig->guard = MIN(orig->guard, split - orig->end); 2566 orig->fspace = split - VMMAP_FREE_START(orig); 2567 } else { 2568 orig->fspace = 0; 2569 orig->guard = 0; 2570 orig->end = next->start = split; 2571 2572 if (next->aref.ar_amap) { 2573 KERNEL_LOCK(); 2574 amap_splitref(&orig->aref, &next->aref, adj); 2575 KERNEL_UNLOCK(); 2576 } 2577 if (UVM_ET_ISSUBMAP(orig)) { 2578 uvm_map_reference(next->object.sub_map); 2579 next->offset += adj; 2580 } else if (UVM_ET_ISOBJ(orig)) { 2581 if (next->object.uvm_obj->pgops && 2582 next->object.uvm_obj->pgops->pgo_reference) { 2583 KERNEL_LOCK(); 2584 next->object.uvm_obj->pgops->pgo_reference( 2585 next->object.uvm_obj); 2586 KERNEL_UNLOCK(); 2587 } 2588 next->offset += adj; 2589 } 2590 } 2591 2592 /* 2593 * Link next into address tree. 2594 * Link orig and next into free-space tree. 2595 * 2596 * Don't insert 'next' into the addr tree until orig has been linked, 2597 * in case the free-list looks at adjecent entries in the addr tree 2598 * for its decisions. 2599 */ 2600 if (orig->fspace > 0) 2601 free_before = free; 2602 else 2603 free_before = uvm_map_uaddr_e(map, orig); 2604 uvm_mapent_free_insert(map, free_before, orig); 2605 uvm_mapent_addr_insert(map, next); 2606 uvm_mapent_free_insert(map, free, next); 2607 2608 uvm_tree_sanity(map, __FILE__, __LINE__); 2609 } 2610 2611 2612 #ifdef VMMAP_DEBUG 2613 2614 void 2615 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2616 char *file, int line) 2617 { 2618 char* map_special; 2619 2620 if (test) 2621 return; 2622 2623 if (map == kernel_map) 2624 map_special = " (kernel_map)"; 2625 else if (map == kmem_map) 2626 map_special = " (kmem_map)"; 2627 else 2628 map_special = ""; 2629 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2630 line, test_str); 2631 } 2632 2633 /* 2634 * Check that map is sane. 2635 */ 2636 void 2637 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2638 { 2639 struct vm_map_entry *iter; 2640 vaddr_t addr; 2641 vaddr_t min, max, bound; /* Bounds checker. */ 2642 struct uvm_addr_state *free; 2643 2644 addr = vm_map_min(map); 2645 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2646 /* 2647 * Valid start, end. 2648 * Catch overflow for end+fspace. 2649 */ 2650 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2651 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2652 2653 /* May not be empty. */ 2654 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2655 file, line); 2656 2657 /* Addresses for entry must lie within map boundaries. */ 2658 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2659 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2660 2661 /* Tree may not have gaps. */ 2662 UVM_ASSERT(map, iter->start == addr, file, line); 2663 addr = VMMAP_FREE_END(iter); 2664 2665 /* 2666 * Free space may not cross boundaries, unless the same 2667 * free list is used on both sides of the border. 2668 */ 2669 min = VMMAP_FREE_START(iter); 2670 max = VMMAP_FREE_END(iter); 2671 2672 while (min < max && 2673 (bound = uvm_map_boundary(map, min, max)) != max) { 2674 UVM_ASSERT(map, 2675 uvm_map_uaddr(map, bound - 1) == 2676 uvm_map_uaddr(map, bound), 2677 file, line); 2678 min = bound; 2679 } 2680 2681 free = uvm_map_uaddr_e(map, iter); 2682 if (free) { 2683 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2684 file, line); 2685 } else { 2686 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2687 file, line); 2688 } 2689 } 2690 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2691 } 2692 2693 void 2694 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2695 { 2696 struct vm_map_entry *iter; 2697 vsize_t size; 2698 2699 size = 0; 2700 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2701 if (!UVM_ET_ISHOLE(iter)) 2702 size += iter->end - iter->start; 2703 } 2704 2705 if (map->size != size) 2706 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2707 UVM_ASSERT(map, map->size == size, file, line); 2708 2709 vmspace_validate(map); 2710 } 2711 2712 /* 2713 * This function validates the statistics on vmspace. 2714 */ 2715 void 2716 vmspace_validate(struct vm_map *map) 2717 { 2718 struct vmspace *vm; 2719 struct vm_map_entry *iter; 2720 vaddr_t imin, imax; 2721 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2722 vsize_t stack, heap; /* Measured sizes. */ 2723 2724 if (!(map->flags & VM_MAP_ISVMSPACE)) 2725 return; 2726 2727 vm = (struct vmspace *)map; 2728 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2729 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2730 2731 stack = heap = 0; 2732 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2733 imin = imax = iter->start; 2734 2735 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL) 2736 continue; 2737 2738 /* 2739 * Update stack, heap. 2740 * Keep in mind that (theoretically) the entries of 2741 * userspace and stack may be joined. 2742 */ 2743 while (imin != iter->end) { 2744 /* 2745 * Set imax to the first boundary crossed between 2746 * imin and stack addresses. 2747 */ 2748 imax = iter->end; 2749 if (imin < stack_begin && imax > stack_begin) 2750 imax = stack_begin; 2751 else if (imin < stack_end && imax > stack_end) 2752 imax = stack_end; 2753 2754 if (imin >= stack_begin && imin < stack_end) 2755 stack += imax - imin; 2756 else 2757 heap += imax - imin; 2758 imin = imax; 2759 } 2760 } 2761 2762 heap >>= PAGE_SHIFT; 2763 if (heap != vm->vm_dused) { 2764 printf("vmspace stack range: 0x%lx-0x%lx\n", 2765 stack_begin, stack_end); 2766 panic("vmspace_validate: vmspace.vm_dused invalid, " 2767 "expected %ld pgs, got %ld pgs in map %p", 2768 heap, vm->vm_dused, 2769 map); 2770 } 2771 } 2772 2773 #endif /* VMMAP_DEBUG */ 2774 2775 /* 2776 * uvm_map_init: init mapping system at boot time. note that we allocate 2777 * and init the static pool of structs vm_map_entry for the kernel here. 2778 */ 2779 void 2780 uvm_map_init(void) 2781 { 2782 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 2783 int lcv; 2784 2785 /* now set up static pool of kernel map entries ... */ 2786 mtx_init(&uvm_kmapent_mtx, IPL_VM); 2787 SLIST_INIT(&uvm.kentry_free); 2788 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 2789 SLIST_INSERT_HEAD(&uvm.kentry_free, 2790 &kernel_map_entry[lcv], daddrs.addr_kentry); 2791 } 2792 2793 /* initialize the map-related pools. */ 2794 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0, 2795 IPL_NONE, PR_WAITOK, "vmsppl", NULL); 2796 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 2797 IPL_VM, PR_WAITOK, "vmmpepl", NULL); 2798 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0, 2799 IPL_VM, 0, "vmmpekpl", NULL); 2800 pool_sethiwat(&uvm_map_entry_pool, 8192); 2801 2802 uvm_addr_init(); 2803 } 2804 2805 #if defined(DDB) 2806 2807 /* 2808 * DDB hooks 2809 */ 2810 2811 /* 2812 * uvm_map_printit: actually prints the map 2813 */ 2814 void 2815 uvm_map_printit(struct vm_map *map, boolean_t full, 2816 int (*pr)(const char *, ...)) 2817 { 2818 struct vmspace *vm; 2819 struct vm_map_entry *entry; 2820 struct uvm_addr_state *free; 2821 int in_free, i; 2822 char buf[8]; 2823 2824 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 2825 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 2826 map->b_start, map->b_end); 2827 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 2828 map->s_start, map->s_end); 2829 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 2830 map->size, map->ref_count, map->timestamp, 2831 map->flags); 2832 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 2833 pmap_resident_count(map->pmap)); 2834 2835 /* struct vmspace handling. */ 2836 if (map->flags & VM_MAP_ISVMSPACE) { 2837 vm = (struct vmspace *)map; 2838 2839 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 2840 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 2841 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 2842 vm->vm_tsize, vm->vm_dsize); 2843 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 2844 vm->vm_taddr, vm->vm_daddr); 2845 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 2846 vm->vm_maxsaddr, vm->vm_minsaddr); 2847 } 2848 2849 if (!full) 2850 goto print_uaddr; 2851 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 2852 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 2853 entry, entry->start, entry->end, entry->object.uvm_obj, 2854 (long long)entry->offset, entry->aref.ar_amap, 2855 entry->aref.ar_pageoff); 2856 (*pr)("\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, " 2857 "wc=%d, adv=%d\n", 2858 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 2859 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 2860 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 2861 entry->protection, entry->max_protection, 2862 entry->inheritance, entry->wired_count, entry->advice); 2863 2864 free = uvm_map_uaddr_e(map, entry); 2865 in_free = (free != NULL); 2866 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 2867 "free=0x%lx-0x%lx\n", 2868 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 2869 in_free ? 'T' : 'F', 2870 entry->guard, 2871 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 2872 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 2873 (*pr)("\tfreemapped=%c, uaddr=%p\n", 2874 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 2875 if (free) { 2876 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 2877 free->uaddr_minaddr, free->uaddr_maxaddr, 2878 free->uaddr_functions->uaddr_name); 2879 } 2880 } 2881 2882 print_uaddr: 2883 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 2884 for (i = 0; i < nitems(map->uaddr_any); i++) { 2885 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 2886 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 2887 } 2888 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 2889 } 2890 2891 /* 2892 * uvm_object_printit: actually prints the object 2893 */ 2894 void 2895 uvm_object_printit(uobj, full, pr) 2896 struct uvm_object *uobj; 2897 boolean_t full; 2898 int (*pr)(const char *, ...); 2899 { 2900 struct vm_page *pg; 2901 int cnt = 0; 2902 2903 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 2904 uobj, uobj->pgops, uobj->uo_npages); 2905 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 2906 (*pr)("refs=<SYSTEM>\n"); 2907 else 2908 (*pr)("refs=%d\n", uobj->uo_refs); 2909 2910 if (!full) { 2911 return; 2912 } 2913 (*pr)(" PAGES <pg,offset>:\n "); 2914 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) { 2915 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 2916 if ((cnt % 3) == 2) { 2917 (*pr)("\n "); 2918 } 2919 cnt++; 2920 } 2921 if ((cnt % 3) != 2) { 2922 (*pr)("\n"); 2923 } 2924 } 2925 2926 /* 2927 * uvm_page_printit: actually print the page 2928 */ 2929 static const char page_flagbits[] = 2930 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 2931 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ" 2932 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5"; 2933 2934 void 2935 uvm_page_printit(pg, full, pr) 2936 struct vm_page *pg; 2937 boolean_t full; 2938 int (*pr)(const char *, ...); 2939 { 2940 struct vm_page *tpg; 2941 struct uvm_object *uobj; 2942 struct pglist *pgl; 2943 2944 (*pr)("PAGE %p:\n", pg); 2945 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 2946 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 2947 (long long)pg->phys_addr); 2948 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 2949 pg->uobject, pg->uanon, (long long)pg->offset); 2950 #if defined(UVM_PAGE_TRKOWN) 2951 if (pg->pg_flags & PG_BUSY) 2952 (*pr)(" owning thread = %d, tag=%s", 2953 pg->owner, pg->owner_tag); 2954 else 2955 (*pr)(" page not busy, no owner"); 2956 #else 2957 (*pr)(" [page ownership tracking disabled]"); 2958 #endif 2959 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 2960 2961 if (!full) 2962 return; 2963 2964 /* cross-verify object/anon */ 2965 if ((pg->pg_flags & PQ_FREE) == 0) { 2966 if (pg->pg_flags & PQ_ANON) { 2967 if (pg->uanon == NULL || pg->uanon->an_page != pg) 2968 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 2969 (pg->uanon) ? pg->uanon->an_page : NULL); 2970 else 2971 (*pr)(" anon backpointer is OK\n"); 2972 } else { 2973 uobj = pg->uobject; 2974 if (uobj) { 2975 (*pr)(" checking object list\n"); 2976 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) { 2977 if (tpg == pg) { 2978 break; 2979 } 2980 } 2981 if (tpg) 2982 (*pr)(" page found on object list\n"); 2983 else 2984 (*pr)(" >>> PAGE NOT FOUND " 2985 "ON OBJECT LIST! <<<\n"); 2986 } 2987 } 2988 } 2989 2990 /* cross-verify page queue */ 2991 if (pg->pg_flags & PQ_FREE) { 2992 if (uvm_pmr_isfree(pg)) 2993 (*pr)(" page found in uvm_pmemrange\n"); 2994 else 2995 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 2996 pgl = NULL; 2997 } else if (pg->pg_flags & PQ_INACTIVE) { 2998 pgl = (pg->pg_flags & PQ_SWAPBACKED) ? 2999 &uvm.page_inactive_swp : &uvm.page_inactive_obj; 3000 } else if (pg->pg_flags & PQ_ACTIVE) { 3001 pgl = &uvm.page_active; 3002 } else { 3003 pgl = NULL; 3004 } 3005 3006 if (pgl) { 3007 (*pr)(" checking pageq list\n"); 3008 TAILQ_FOREACH(tpg, pgl, pageq) { 3009 if (tpg == pg) { 3010 break; 3011 } 3012 } 3013 if (tpg) 3014 (*pr)(" page found on pageq list\n"); 3015 else 3016 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3017 } 3018 } 3019 #endif 3020 3021 /* 3022 * uvm_map_protect: change map protection 3023 * 3024 * => set_max means set max_protection. 3025 * => map must be unlocked. 3026 */ 3027 int 3028 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3029 vm_prot_t new_prot, boolean_t set_max) 3030 { 3031 struct vm_map_entry *first, *iter; 3032 vm_prot_t old_prot; 3033 vm_prot_t mask; 3034 int error; 3035 3036 if (start > end) 3037 return EINVAL; 3038 start = MAX(start, map->min_offset); 3039 end = MIN(end, map->max_offset); 3040 if (start >= end) 3041 return 0; 3042 3043 error = 0; 3044 vm_map_lock(map); 3045 3046 /* 3047 * Set up first and last. 3048 * - first will contain first entry at or after start. 3049 */ 3050 first = uvm_map_entrybyaddr(&map->addr, start); 3051 KDASSERT(first != NULL); 3052 if (first->end < start) 3053 first = RBT_NEXT(uvm_map_addr, first); 3054 3055 /* First, check for protection violations. */ 3056 for (iter = first; iter != NULL && iter->start < end; 3057 iter = RBT_NEXT(uvm_map_addr, iter)) { 3058 /* Treat memory holes as free space. */ 3059 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3060 continue; 3061 3062 if (UVM_ET_ISSUBMAP(iter)) { 3063 error = EINVAL; 3064 goto out; 3065 } 3066 if ((new_prot & iter->max_protection) != new_prot) { 3067 error = EACCES; 3068 goto out; 3069 } 3070 if (map == kernel_map && 3071 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 3072 panic("uvm_map_protect: kernel map W^X violation requested"); 3073 } 3074 3075 /* Fix protections. */ 3076 for (iter = first; iter != NULL && iter->start < end; 3077 iter = RBT_NEXT(uvm_map_addr, iter)) { 3078 /* Treat memory holes as free space. */ 3079 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3080 continue; 3081 3082 old_prot = iter->protection; 3083 3084 /* 3085 * Skip adapting protection iff old and new protection 3086 * are equal. 3087 */ 3088 if (set_max) { 3089 if (old_prot == (new_prot & old_prot) && 3090 iter->max_protection == new_prot) 3091 continue; 3092 } else { 3093 if (old_prot == new_prot) 3094 continue; 3095 } 3096 3097 UVM_MAP_CLIP_START(map, iter, start); 3098 UVM_MAP_CLIP_END(map, iter, end); 3099 3100 if (set_max) { 3101 iter->max_protection = new_prot; 3102 iter->protection &= new_prot; 3103 } else 3104 iter->protection = new_prot; 3105 3106 /* 3107 * update physical map if necessary. worry about copy-on-write 3108 * here -- CHECK THIS XXX 3109 */ 3110 if (iter->protection != old_prot) { 3111 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3112 ~PROT_WRITE : PROT_MASK; 3113 3114 /* update pmap */ 3115 if ((iter->protection & mask) == PROT_NONE && 3116 VM_MAPENT_ISWIRED(iter)) { 3117 /* 3118 * TODO(ariane) this is stupid. wired_count 3119 * is 0 if not wired, otherwise anything 3120 * larger than 0 (incremented once each time 3121 * wire is called). 3122 * Mostly to be able to undo the damage on 3123 * failure. Not the actually be a wired 3124 * refcounter... 3125 * Originally: iter->wired_count--; 3126 * (don't we have to unwire this in the pmap 3127 * as well?) 3128 */ 3129 iter->wired_count = 0; 3130 } 3131 pmap_protect(map->pmap, iter->start, iter->end, 3132 iter->protection & mask); 3133 } 3134 3135 /* 3136 * If the map is configured to lock any future mappings, 3137 * wire this entry now if the old protection was PROT_NONE 3138 * and the new protection is not PROT_NONE. 3139 */ 3140 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3141 VM_MAPENT_ISWIRED(iter) == 0 && 3142 old_prot == PROT_NONE && 3143 new_prot != PROT_NONE) { 3144 if (uvm_map_pageable(map, iter->start, iter->end, 3145 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3146 /* 3147 * If locking the entry fails, remember the 3148 * error if it's the first one. Note we 3149 * still continue setting the protection in 3150 * the map, but it will return the resource 3151 * storage condition regardless. 3152 * 3153 * XXX Ignore what the actual error is, 3154 * XXX just call it a resource shortage 3155 * XXX so that it doesn't get confused 3156 * XXX what uvm_map_protect() itself would 3157 * XXX normally return. 3158 */ 3159 error = ENOMEM; 3160 } 3161 } 3162 } 3163 pmap_update(map->pmap); 3164 3165 out: 3166 vm_map_unlock(map); 3167 return error; 3168 } 3169 3170 /* 3171 * uvmspace_alloc: allocate a vmspace structure. 3172 * 3173 * - structure includes vm_map and pmap 3174 * - XXX: no locking on this structure 3175 * - refcnt set to 1, rest must be init'd by caller 3176 */ 3177 struct vmspace * 3178 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3179 boolean_t remove_holes) 3180 { 3181 struct vmspace *vm; 3182 3183 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3184 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3185 return (vm); 3186 } 3187 3188 /* 3189 * uvmspace_init: initialize a vmspace structure. 3190 * 3191 * - XXX: no locking on this structure 3192 * - refcnt set to 1, rest must be init'd by caller 3193 */ 3194 void 3195 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3196 boolean_t pageable, boolean_t remove_holes) 3197 { 3198 KASSERT(pmap == NULL || pmap == pmap_kernel()); 3199 3200 if (pmap) 3201 pmap_reference(pmap); 3202 else 3203 pmap = pmap_create(); 3204 vm->vm_map.pmap = pmap; 3205 3206 uvm_map_setup(&vm->vm_map, min, max, 3207 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3208 3209 vm->vm_refcnt = 1; 3210 3211 if (remove_holes) 3212 pmap_remove_holes(vm); 3213 } 3214 3215 /* 3216 * uvmspace_share: share a vmspace between two processes 3217 * 3218 * - XXX: no locking on vmspace 3219 * - used for vfork 3220 */ 3221 3222 struct vmspace * 3223 uvmspace_share(struct process *pr) 3224 { 3225 struct vmspace *vm = pr->ps_vmspace; 3226 3227 vm->vm_refcnt++; 3228 return vm; 3229 } 3230 3231 /* 3232 * uvmspace_exec: the process wants to exec a new program 3233 * 3234 * - XXX: no locking on vmspace 3235 */ 3236 3237 void 3238 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3239 { 3240 struct process *pr = p->p_p; 3241 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3242 struct vm_map *map = &ovm->vm_map; 3243 struct uvm_map_deadq dead_entries; 3244 3245 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3246 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3247 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3248 3249 pmap_unuse_final(p); /* before stack addresses go away */ 3250 TAILQ_INIT(&dead_entries); 3251 3252 /* see if more than one process is using this vmspace... */ 3253 if (ovm->vm_refcnt == 1) { 3254 /* 3255 * If pr is the only process using its vmspace then 3256 * we can safely recycle that vmspace for the program 3257 * that is being exec'd. 3258 */ 3259 3260 #ifdef SYSVSHM 3261 /* 3262 * SYSV SHM semantics require us to kill all segments on an exec 3263 */ 3264 if (ovm->vm_shm) 3265 shmexit(ovm); 3266 #endif 3267 3268 /* 3269 * POSIX 1003.1b -- "lock future mappings" is revoked 3270 * when a process execs another program image. 3271 */ 3272 vm_map_lock(map); 3273 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 3274 3275 /* 3276 * now unmap the old program 3277 * 3278 * Instead of attempting to keep the map valid, we simply 3279 * nuke all entries and ask uvm_map_setup to reinitialize 3280 * the map to the new boundaries. 3281 * 3282 * uvm_unmap_remove will actually nuke all entries for us 3283 * (as in, not replace them with free-memory entries). 3284 */ 3285 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3286 &dead_entries, TRUE, FALSE); 3287 3288 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 3289 3290 /* Nuke statistics and boundaries. */ 3291 memset(&ovm->vm_startcopy, 0, 3292 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3293 3294 3295 if (end & (vaddr_t)PAGE_MASK) { 3296 end += 1; 3297 if (end == 0) /* overflow */ 3298 end -= PAGE_SIZE; 3299 } 3300 3301 /* Setup new boundaries and populate map with entries. */ 3302 map->min_offset = start; 3303 map->max_offset = end; 3304 uvm_map_setup_entries(map); 3305 vm_map_unlock(map); 3306 3307 /* but keep MMU holes unavailable */ 3308 pmap_remove_holes(ovm); 3309 } else { 3310 /* 3311 * pr's vmspace is being shared, so we can't reuse 3312 * it for pr since it is still being used for others. 3313 * allocate a new vmspace for pr 3314 */ 3315 nvm = uvmspace_alloc(start, end, 3316 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3317 3318 /* install new vmspace and drop our ref to the old one. */ 3319 pmap_deactivate(p); 3320 p->p_vmspace = pr->ps_vmspace = nvm; 3321 pmap_activate(p); 3322 3323 uvmspace_free(ovm); 3324 } 3325 3326 /* Release dead entries */ 3327 uvm_unmap_detach(&dead_entries, 0); 3328 } 3329 3330 /* 3331 * uvmspace_free: free a vmspace data structure 3332 * 3333 * - XXX: no locking on vmspace 3334 */ 3335 void 3336 uvmspace_free(struct vmspace *vm) 3337 { 3338 if (--vm->vm_refcnt == 0) { 3339 /* 3340 * lock the map, to wait out all other references to it. delete 3341 * all of the mappings and pages they hold, then call the pmap 3342 * module to reclaim anything left. 3343 */ 3344 #ifdef SYSVSHM 3345 /* Get rid of any SYSV shared memory segments. */ 3346 if (vm->vm_shm != NULL) 3347 shmexit(vm); 3348 #endif 3349 3350 uvm_map_teardown(&vm->vm_map); 3351 pool_put(&uvm_vmspace_pool, vm); 3352 } 3353 } 3354 3355 /* 3356 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in 3357 * srcmap to the address range [dstaddr, dstaddr + sz) in 3358 * dstmap. 3359 * 3360 * The whole address range in srcmap must be backed by an object 3361 * (no holes). 3362 * 3363 * If successful, the address ranges share memory and the destination 3364 * address range uses the protection flags in prot. 3365 * 3366 * This routine assumes that sz is a multiple of PAGE_SIZE and 3367 * that dstaddr and srcaddr are page-aligned. 3368 */ 3369 int 3370 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot, 3371 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz) 3372 { 3373 int ret = 0; 3374 vaddr_t unmap_end; 3375 vaddr_t dstva; 3376 vsize_t off, len, n = sz; 3377 struct vm_map_entry *first = NULL, *last = NULL; 3378 struct vm_map_entry *src_entry, *psrc_entry = NULL; 3379 struct uvm_map_deadq dead; 3380 3381 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr) 3382 return EINVAL; 3383 3384 TAILQ_INIT(&dead); 3385 vm_map_lock(dstmap); 3386 vm_map_lock_read(srcmap); 3387 3388 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) { 3389 ret = ENOMEM; 3390 goto exit_unlock; 3391 } 3392 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) { 3393 ret = EINVAL; 3394 goto exit_unlock; 3395 } 3396 3397 unmap_end = dstaddr; 3398 for (; src_entry != NULL; 3399 psrc_entry = src_entry, 3400 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) { 3401 /* hole in address space, bail out */ 3402 if (psrc_entry != NULL && psrc_entry->end != src_entry->start) 3403 break; 3404 if (src_entry->start >= srcaddr + sz) 3405 break; 3406 3407 if (UVM_ET_ISSUBMAP(src_entry)) 3408 panic("uvm_share: encountered a submap (illegal)"); 3409 if (!UVM_ET_ISCOPYONWRITE(src_entry) && 3410 UVM_ET_ISNEEDSCOPY(src_entry)) 3411 panic("uvm_share: non-copy_on_write map entries " 3412 "marked needs_copy (illegal)"); 3413 3414 dstva = dstaddr; 3415 if (src_entry->start > srcaddr) { 3416 dstva += src_entry->start - srcaddr; 3417 off = 0; 3418 } else 3419 off = srcaddr - src_entry->start; 3420 3421 if (n < src_entry->end - src_entry->start) 3422 len = n; 3423 else 3424 len = src_entry->end - src_entry->start; 3425 n -= len; 3426 3427 if (uvm_mapent_share(dstmap, dstva, len, off, prot, prot, 3428 srcmap, src_entry, &dead) == NULL) 3429 break; 3430 3431 unmap_end = dstva + len; 3432 if (n == 0) 3433 goto exit_unlock; 3434 } 3435 3436 ret = EINVAL; 3437 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE); 3438 3439 exit_unlock: 3440 vm_map_unlock_read(srcmap); 3441 vm_map_unlock(dstmap); 3442 uvm_unmap_detach(&dead, 0); 3443 3444 return ret; 3445 } 3446 3447 /* 3448 * Clone map entry into other map. 3449 * 3450 * Mapping will be placed at dstaddr, for the same length. 3451 * Space must be available. 3452 * Reference counters are incremented. 3453 */ 3454 struct vm_map_entry * 3455 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3456 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, 3457 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3458 int mapent_flags, int amap_share_flags) 3459 { 3460 struct vm_map_entry *new_entry, *first, *last; 3461 3462 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3463 3464 /* Create new entry (linked in on creation). Fill in first, last. */ 3465 first = last = NULL; 3466 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3467 panic("uvmspace_fork: no space in map for " 3468 "entry in empty map"); 3469 } 3470 new_entry = uvm_map_mkentry(dstmap, first, last, 3471 dstaddr, dstlen, mapent_flags, dead, NULL); 3472 if (new_entry == NULL) 3473 return NULL; 3474 /* old_entry -> new_entry */ 3475 new_entry->object = old_entry->object; 3476 new_entry->offset = old_entry->offset; 3477 new_entry->aref = old_entry->aref; 3478 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3479 new_entry->protection = prot; 3480 new_entry->max_protection = maxprot; 3481 new_entry->inheritance = old_entry->inheritance; 3482 new_entry->advice = old_entry->advice; 3483 3484 /* gain reference to object backing the map (can't be a submap). */ 3485 if (new_entry->aref.ar_amap) { 3486 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3487 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3488 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3489 amap_share_flags); 3490 } 3491 3492 if (UVM_ET_ISOBJ(new_entry) && 3493 new_entry->object.uvm_obj->pgops->pgo_reference) { 3494 new_entry->offset += off; 3495 new_entry->object.uvm_obj->pgops->pgo_reference 3496 (new_entry->object.uvm_obj); 3497 } 3498 3499 return new_entry; 3500 } 3501 3502 struct vm_map_entry * 3503 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3504 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map, 3505 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3506 { 3507 /* 3508 * If old_entry refers to a copy-on-write region that has not yet been 3509 * written to (needs_copy flag is set), then we need to allocate a new 3510 * amap for old_entry. 3511 * 3512 * If we do not do this, and the process owning old_entry does a copy-on 3513 * write later, old_entry and new_entry will refer to different memory 3514 * regions, and the memory between the processes is no longer shared. 3515 * 3516 * [in other words, we need to clear needs_copy] 3517 */ 3518 3519 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3520 /* get our own amap, clears needs_copy */ 3521 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 3522 0, 0); 3523 /* XXXCDC: WAITOK??? */ 3524 } 3525 3526 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off, 3527 prot, maxprot, old_entry, dead, 0, AMAP_SHARED); 3528 } 3529 3530 /* 3531 * share the mapping: this means we want the old and 3532 * new entries to share amaps and backing objects. 3533 */ 3534 struct vm_map_entry * 3535 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3536 struct vm_map *old_map, 3537 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3538 { 3539 struct vm_map_entry *new_entry; 3540 3541 new_entry = uvm_mapent_share(new_map, old_entry->start, 3542 old_entry->end - old_entry->start, 0, old_entry->protection, 3543 old_entry->max_protection, old_map, old_entry, dead); 3544 3545 /* 3546 * pmap_copy the mappings: this routine is optional 3547 * but if it is there it will reduce the number of 3548 * page faults in the new proc. 3549 */ 3550 if (!UVM_ET_ISHOLE(new_entry)) 3551 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, 3552 (new_entry->end - new_entry->start), new_entry->start); 3553 3554 return (new_entry); 3555 } 3556 3557 /* 3558 * copy-on-write the mapping (using mmap's 3559 * MAP_PRIVATE semantics) 3560 * 3561 * allocate new_entry, adjust reference counts. 3562 * (note that new references are read-only). 3563 */ 3564 struct vm_map_entry * 3565 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3566 struct vm_map *old_map, 3567 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3568 { 3569 struct vm_map_entry *new_entry; 3570 boolean_t protect_child; 3571 3572 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3573 old_entry->end - old_entry->start, 0, old_entry->protection, 3574 old_entry->max_protection, old_entry, dead, 0, 0); 3575 3576 new_entry->etype |= 3577 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3578 3579 /* 3580 * the new entry will need an amap. it will either 3581 * need to be copied from the old entry or created 3582 * from scratch (if the old entry does not have an 3583 * amap). can we defer this process until later 3584 * (by setting "needs_copy") or do we need to copy 3585 * the amap now? 3586 * 3587 * we must copy the amap now if any of the following 3588 * conditions hold: 3589 * 1. the old entry has an amap and that amap is 3590 * being shared. this means that the old (parent) 3591 * process is sharing the amap with another 3592 * process. if we do not clear needs_copy here 3593 * we will end up in a situation where both the 3594 * parent and child process are referring to the 3595 * same amap with "needs_copy" set. if the 3596 * parent write-faults, the fault routine will 3597 * clear "needs_copy" in the parent by allocating 3598 * a new amap. this is wrong because the 3599 * parent is supposed to be sharing the old amap 3600 * and the new amap will break that. 3601 * 3602 * 2. if the old entry has an amap and a non-zero 3603 * wire count then we are going to have to call 3604 * amap_cow_now to avoid page faults in the 3605 * parent process. since amap_cow_now requires 3606 * "needs_copy" to be clear we might as well 3607 * clear it here as well. 3608 * 3609 */ 3610 if (old_entry->aref.ar_amap != NULL && 3611 ((amap_flags(old_entry->aref.ar_amap) & 3612 AMAP_SHARED) != 0 || 3613 VM_MAPENT_ISWIRED(old_entry))) { 3614 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3615 0, 0); 3616 /* XXXCDC: M_WAITOK ... ok? */ 3617 } 3618 3619 /* 3620 * if the parent's entry is wired down, then the 3621 * parent process does not want page faults on 3622 * access to that memory. this means that we 3623 * cannot do copy-on-write because we can't write 3624 * protect the old entry. in this case we 3625 * resolve all copy-on-write faults now, using 3626 * amap_cow_now. note that we have already 3627 * allocated any needed amap (above). 3628 */ 3629 if (VM_MAPENT_ISWIRED(old_entry)) { 3630 /* 3631 * resolve all copy-on-write faults now 3632 * (note that there is nothing to do if 3633 * the old mapping does not have an amap). 3634 * XXX: is it worthwhile to bother with 3635 * pmap_copy in this case? 3636 */ 3637 if (old_entry->aref.ar_amap) 3638 amap_cow_now(new_map, new_entry); 3639 } else { 3640 if (old_entry->aref.ar_amap) { 3641 /* 3642 * setup mappings to trigger copy-on-write faults 3643 * we must write-protect the parent if it has 3644 * an amap and it is not already "needs_copy"... 3645 * if it is already "needs_copy" then the parent 3646 * has already been write-protected by a previous 3647 * fork operation. 3648 * 3649 * if we do not write-protect the parent, then 3650 * we must be sure to write-protect the child 3651 * after the pmap_copy() operation. 3652 * 3653 * XXX: pmap_copy should have some way of telling 3654 * us that it didn't do anything so we can avoid 3655 * calling pmap_protect needlessly. 3656 */ 3657 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3658 if (old_entry->max_protection & PROT_WRITE) { 3659 pmap_protect(old_map->pmap, 3660 old_entry->start, 3661 old_entry->end, 3662 old_entry->protection & 3663 ~PROT_WRITE); 3664 pmap_update(old_map->pmap); 3665 } 3666 old_entry->etype |= UVM_ET_NEEDSCOPY; 3667 } 3668 3669 /* parent must now be write-protected */ 3670 protect_child = FALSE; 3671 } else { 3672 /* 3673 * we only need to protect the child if the 3674 * parent has write access. 3675 */ 3676 if (old_entry->max_protection & PROT_WRITE) 3677 protect_child = TRUE; 3678 else 3679 protect_child = FALSE; 3680 } 3681 /* 3682 * copy the mappings 3683 * XXX: need a way to tell if this does anything 3684 */ 3685 if (!UVM_ET_ISHOLE(new_entry)) 3686 pmap_copy(new_map->pmap, old_map->pmap, 3687 new_entry->start, 3688 (old_entry->end - old_entry->start), 3689 old_entry->start); 3690 3691 /* protect the child's mappings if necessary */ 3692 if (protect_child) { 3693 pmap_protect(new_map->pmap, new_entry->start, 3694 new_entry->end, 3695 new_entry->protection & 3696 ~PROT_WRITE); 3697 } 3698 } 3699 3700 return (new_entry); 3701 } 3702 3703 /* 3704 * zero the mapping: the new entry will be zero initialized 3705 */ 3706 struct vm_map_entry * 3707 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 3708 struct vm_map *old_map, 3709 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3710 { 3711 struct vm_map_entry *new_entry; 3712 3713 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3714 old_entry->end - old_entry->start, 0, old_entry->protection, 3715 old_entry->max_protection, old_entry, dead, 0, 0); 3716 3717 new_entry->etype |= 3718 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3719 3720 if (new_entry->aref.ar_amap) { 3721 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3722 atop(new_entry->end - new_entry->start), 0); 3723 new_entry->aref.ar_amap = NULL; 3724 new_entry->aref.ar_pageoff = 0; 3725 } 3726 3727 if (UVM_ET_ISOBJ(new_entry)) { 3728 if (new_entry->object.uvm_obj->pgops->pgo_detach) 3729 new_entry->object.uvm_obj->pgops->pgo_detach( 3730 new_entry->object.uvm_obj); 3731 new_entry->object.uvm_obj = NULL; 3732 new_entry->etype &= ~UVM_ET_OBJ; 3733 } 3734 3735 return (new_entry); 3736 } 3737 3738 /* 3739 * uvmspace_fork: fork a process' main map 3740 * 3741 * => create a new vmspace for child process from parent. 3742 * => parent's map must not be locked. 3743 */ 3744 struct vmspace * 3745 uvmspace_fork(struct process *pr) 3746 { 3747 struct vmspace *vm1 = pr->ps_vmspace; 3748 struct vmspace *vm2; 3749 struct vm_map *old_map = &vm1->vm_map; 3750 struct vm_map *new_map; 3751 struct vm_map_entry *old_entry, *new_entry; 3752 struct uvm_map_deadq dead; 3753 3754 vm_map_lock(old_map); 3755 3756 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3757 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3758 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3759 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3760 vm2->vm_dused = 0; /* Statistic managed by us. */ 3761 new_map = &vm2->vm_map; 3762 vm_map_lock(new_map); 3763 3764 /* go entry-by-entry */ 3765 TAILQ_INIT(&dead); 3766 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 3767 if (old_entry->start == old_entry->end) 3768 continue; 3769 3770 /* first, some sanity checks on the old entry */ 3771 if (UVM_ET_ISSUBMAP(old_entry)) { 3772 panic("fork: encountered a submap during fork " 3773 "(illegal)"); 3774 } 3775 3776 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 3777 UVM_ET_ISNEEDSCOPY(old_entry)) { 3778 panic("fork: non-copy_on_write map entry marked " 3779 "needs_copy (illegal)"); 3780 } 3781 3782 /* Apply inheritance. */ 3783 switch (old_entry->inheritance) { 3784 case MAP_INHERIT_SHARE: 3785 new_entry = uvm_mapent_forkshared(vm2, new_map, 3786 old_map, old_entry, &dead); 3787 break; 3788 case MAP_INHERIT_COPY: 3789 new_entry = uvm_mapent_forkcopy(vm2, new_map, 3790 old_map, old_entry, &dead); 3791 break; 3792 case MAP_INHERIT_ZERO: 3793 new_entry = uvm_mapent_forkzero(vm2, new_map, 3794 old_map, old_entry, &dead); 3795 break; 3796 default: 3797 continue; 3798 } 3799 3800 /* Update process statistics. */ 3801 if (!UVM_ET_ISHOLE(new_entry)) 3802 new_map->size += new_entry->end - new_entry->start; 3803 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) { 3804 vm2->vm_dused += uvmspace_dused( 3805 new_map, new_entry->start, new_entry->end); 3806 } 3807 } 3808 3809 vm_map_unlock(old_map); 3810 vm_map_unlock(new_map); 3811 3812 /* 3813 * This can actually happen, if multiple entries described a 3814 * space in which an entry was inherited. 3815 */ 3816 uvm_unmap_detach(&dead, 0); 3817 3818 #ifdef SYSVSHM 3819 if (vm1->vm_shm) 3820 shmfork(vm1, vm2); 3821 #endif 3822 3823 return vm2; 3824 } 3825 3826 /* 3827 * uvm_map_hint: return the beginning of the best area suitable for 3828 * creating a new mapping with "prot" protection. 3829 */ 3830 vaddr_t 3831 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr, 3832 vaddr_t maxaddr) 3833 { 3834 vaddr_t addr; 3835 vaddr_t spacing; 3836 3837 #ifdef __i386__ 3838 /* 3839 * If executable skip first two pages, otherwise start 3840 * after data + heap region. 3841 */ 3842 if ((prot & PROT_EXEC) != 0 && 3843 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 3844 addr = (PAGE_SIZE*2) + 3845 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 3846 return (round_page(addr)); 3847 } 3848 #endif 3849 3850 #if defined (__LP64__) 3851 spacing = (MIN((4UL * 1024 * 1024 * 1024), BRKSIZ) - 1); 3852 #else 3853 spacing = (MIN((256 * 1024 * 1024), BRKSIZ) - 1); 3854 #endif 3855 3856 addr = (vaddr_t)vm->vm_daddr; 3857 /* 3858 * Start malloc/mmap after the brk. 3859 * If the random spacing area has been used up, 3860 * the brk area becomes fair game for mmap as well. 3861 */ 3862 if (vm->vm_dused < spacing >> PAGE_SHIFT) 3863 addr += BRKSIZ; 3864 if (addr < maxaddr) { 3865 while (spacing > maxaddr - addr) 3866 spacing >>= 1; 3867 } 3868 addr += arc4random() & spacing; 3869 return (round_page(addr)); 3870 } 3871 3872 /* 3873 * uvm_map_submap: punch down part of a map into a submap 3874 * 3875 * => only the kernel_map is allowed to be submapped 3876 * => the purpose of submapping is to break up the locking granularity 3877 * of a larger map 3878 * => the range specified must have been mapped previously with a uvm_map() 3879 * call [with uobj==NULL] to create a blank map entry in the main map. 3880 * [And it had better still be blank!] 3881 * => maps which contain submaps should never be copied or forked. 3882 * => to remove a submap, use uvm_unmap() on the main map 3883 * and then uvm_map_deallocate() the submap. 3884 * => main map must be unlocked. 3885 * => submap must have been init'd and have a zero reference count. 3886 * [need not be locked as we don't actually reference it] 3887 */ 3888 int 3889 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 3890 struct vm_map *submap) 3891 { 3892 struct vm_map_entry *entry; 3893 int result; 3894 3895 if (start > map->max_offset || end > map->max_offset || 3896 start < map->min_offset || end < map->min_offset) 3897 return EINVAL; 3898 3899 vm_map_lock(map); 3900 3901 if (uvm_map_lookup_entry(map, start, &entry)) { 3902 UVM_MAP_CLIP_START(map, entry, start); 3903 UVM_MAP_CLIP_END(map, entry, end); 3904 } else 3905 entry = NULL; 3906 3907 if (entry != NULL && 3908 entry->start == start && entry->end == end && 3909 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 3910 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 3911 entry->etype |= UVM_ET_SUBMAP; 3912 entry->object.sub_map = submap; 3913 entry->offset = 0; 3914 uvm_map_reference(submap); 3915 result = 0; 3916 } else 3917 result = EINVAL; 3918 3919 vm_map_unlock(map); 3920 return(result); 3921 } 3922 3923 /* 3924 * uvm_map_checkprot: check protection in map 3925 * 3926 * => must allow specific protection in a fully allocated region. 3927 * => map mut be read or write locked by caller. 3928 */ 3929 boolean_t 3930 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 3931 vm_prot_t protection) 3932 { 3933 struct vm_map_entry *entry; 3934 3935 if (start < map->min_offset || end > map->max_offset || start > end) 3936 return FALSE; 3937 if (start == end) 3938 return TRUE; 3939 3940 /* 3941 * Iterate entries. 3942 */ 3943 for (entry = uvm_map_entrybyaddr(&map->addr, start); 3944 entry != NULL && entry->start < end; 3945 entry = RBT_NEXT(uvm_map_addr, entry)) { 3946 /* Fail if a hole is found. */ 3947 if (UVM_ET_ISHOLE(entry) || 3948 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 3949 return FALSE; 3950 3951 /* Check protection. */ 3952 if ((entry->protection & protection) != protection) 3953 return FALSE; 3954 } 3955 return TRUE; 3956 } 3957 3958 /* 3959 * uvm_map_create: create map 3960 */ 3961 vm_map_t 3962 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 3963 { 3964 vm_map_t map; 3965 3966 map = malloc(sizeof *map, M_VMMAP, M_WAITOK); 3967 map->pmap = pmap; 3968 uvm_map_setup(map, min, max, flags); 3969 return (map); 3970 } 3971 3972 /* 3973 * uvm_map_deallocate: drop reference to a map 3974 * 3975 * => caller must not lock map 3976 * => we will zap map if ref count goes to zero 3977 */ 3978 void 3979 uvm_map_deallocate(vm_map_t map) 3980 { 3981 int c; 3982 struct uvm_map_deadq dead; 3983 3984 c = --map->ref_count; 3985 if (c > 0) { 3986 return; 3987 } 3988 3989 /* 3990 * all references gone. unmap and free. 3991 * 3992 * No lock required: we are only one to access this map. 3993 */ 3994 TAILQ_INIT(&dead); 3995 uvm_tree_sanity(map, __FILE__, __LINE__); 3996 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 3997 TRUE, FALSE); 3998 pmap_destroy(map->pmap); 3999 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 4000 free(map, M_VMMAP, sizeof *map); 4001 4002 uvm_unmap_detach(&dead, 0); 4003 } 4004 4005 /* 4006 * uvm_map_inherit: set inheritance code for range of addrs in map. 4007 * 4008 * => map must be unlocked 4009 * => note that the inherit code is used during a "fork". see fork 4010 * code for details. 4011 */ 4012 int 4013 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 4014 vm_inherit_t new_inheritance) 4015 { 4016 struct vm_map_entry *entry; 4017 4018 switch (new_inheritance) { 4019 case MAP_INHERIT_NONE: 4020 case MAP_INHERIT_COPY: 4021 case MAP_INHERIT_SHARE: 4022 case MAP_INHERIT_ZERO: 4023 break; 4024 default: 4025 return (EINVAL); 4026 } 4027 4028 if (start > end) 4029 return EINVAL; 4030 start = MAX(start, map->min_offset); 4031 end = MIN(end, map->max_offset); 4032 if (start >= end) 4033 return 0; 4034 4035 vm_map_lock(map); 4036 4037 entry = uvm_map_entrybyaddr(&map->addr, start); 4038 if (entry->end > start) 4039 UVM_MAP_CLIP_START(map, entry, start); 4040 else 4041 entry = RBT_NEXT(uvm_map_addr, entry); 4042 4043 while (entry != NULL && entry->start < end) { 4044 UVM_MAP_CLIP_END(map, entry, end); 4045 entry->inheritance = new_inheritance; 4046 entry = RBT_NEXT(uvm_map_addr, entry); 4047 } 4048 4049 vm_map_unlock(map); 4050 return (0); 4051 } 4052 4053 /* 4054 * uvm_map_advice: set advice code for range of addrs in map. 4055 * 4056 * => map must be unlocked 4057 */ 4058 int 4059 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 4060 { 4061 struct vm_map_entry *entry; 4062 4063 switch (new_advice) { 4064 case MADV_NORMAL: 4065 case MADV_RANDOM: 4066 case MADV_SEQUENTIAL: 4067 break; 4068 default: 4069 return (EINVAL); 4070 } 4071 4072 if (start > end) 4073 return EINVAL; 4074 start = MAX(start, map->min_offset); 4075 end = MIN(end, map->max_offset); 4076 if (start >= end) 4077 return 0; 4078 4079 vm_map_lock(map); 4080 4081 entry = uvm_map_entrybyaddr(&map->addr, start); 4082 if (entry != NULL && entry->end > start) 4083 UVM_MAP_CLIP_START(map, entry, start); 4084 else if (entry!= NULL) 4085 entry = RBT_NEXT(uvm_map_addr, entry); 4086 4087 /* 4088 * XXXJRT: disallow holes? 4089 */ 4090 while (entry != NULL && entry->start < end) { 4091 UVM_MAP_CLIP_END(map, entry, end); 4092 entry->advice = new_advice; 4093 entry = RBT_NEXT(uvm_map_addr, entry); 4094 } 4095 4096 vm_map_unlock(map); 4097 return (0); 4098 } 4099 4100 /* 4101 * uvm_map_extract: extract a mapping from a map and put it somewhere 4102 * in the kernel_map, setting protection to max_prot. 4103 * 4104 * => map should be unlocked (we will write lock it and kernel_map) 4105 * => returns 0 on success, error code otherwise 4106 * => start must be page aligned 4107 * => len must be page sized 4108 * => flags: 4109 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 4110 * Mappings are QREF's. 4111 */ 4112 int 4113 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 4114 vaddr_t *dstaddrp, int flags) 4115 { 4116 struct uvm_map_deadq dead; 4117 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 4118 vaddr_t dstaddr; 4119 vaddr_t end; 4120 vaddr_t cp_start; 4121 vsize_t cp_len, cp_off; 4122 int error; 4123 4124 TAILQ_INIT(&dead); 4125 end = start + len; 4126 4127 /* 4128 * Sanity check on the parameters. 4129 * Also, since the mapping may not contain gaps, error out if the 4130 * mapped area is not in source map. 4131 */ 4132 if ((start & (vaddr_t)PAGE_MASK) != 0 || 4133 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 4134 return EINVAL; 4135 if (start < srcmap->min_offset || end > srcmap->max_offset) 4136 return EINVAL; 4137 4138 /* Initialize dead entries. Handle len == 0 case. */ 4139 if (len == 0) 4140 return 0; 4141 4142 /* Acquire lock on srcmap. */ 4143 vm_map_lock(srcmap); 4144 4145 /* Lock srcmap, lookup first and last entry in <start,len>. */ 4146 first = uvm_map_entrybyaddr(&srcmap->addr, start); 4147 4148 /* Check that the range is contiguous. */ 4149 for (entry = first; entry != NULL && entry->end < end; 4150 entry = RBT_NEXT(uvm_map_addr, entry)) { 4151 if (VMMAP_FREE_END(entry) != entry->end || 4152 UVM_ET_ISHOLE(entry)) { 4153 error = EINVAL; 4154 goto fail; 4155 } 4156 } 4157 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 4158 error = EINVAL; 4159 goto fail; 4160 } 4161 4162 /* 4163 * Handle need-copy flag. 4164 */ 4165 for (entry = first; entry != NULL && entry->start < end; 4166 entry = RBT_NEXT(uvm_map_addr, entry)) { 4167 if (UVM_ET_ISNEEDSCOPY(entry)) 4168 amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end); 4169 if (UVM_ET_ISNEEDSCOPY(entry)) { 4170 /* 4171 * amap_copy failure 4172 */ 4173 error = ENOMEM; 4174 goto fail; 4175 } 4176 } 4177 4178 /* Lock destination map (kernel_map). */ 4179 vm_map_lock(kernel_map); 4180 4181 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 4182 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 4183 PROT_NONE, 0) != 0) { 4184 error = ENOMEM; 4185 goto fail2; 4186 } 4187 *dstaddrp = dstaddr; 4188 4189 /* 4190 * We now have srcmap and kernel_map locked. 4191 * dstaddr contains the destination offset in dstmap. 4192 */ 4193 /* step 1: start looping through map entries, performing extraction. */ 4194 for (entry = first; entry != NULL && entry->start < end; 4195 entry = RBT_NEXT(uvm_map_addr, entry)) { 4196 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 4197 if (UVM_ET_ISHOLE(entry)) 4198 continue; 4199 4200 /* Calculate uvm_mapent_clone parameters. */ 4201 cp_start = entry->start; 4202 if (cp_start < start) { 4203 cp_off = start - cp_start; 4204 cp_start = start; 4205 } else 4206 cp_off = 0; 4207 cp_len = MIN(entry->end, end) - cp_start; 4208 4209 newentry = uvm_mapent_clone(kernel_map, 4210 cp_start - start + dstaddr, cp_len, cp_off, 4211 entry->protection, entry->max_protection, 4212 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4213 if (newentry == NULL) { 4214 error = ENOMEM; 4215 goto fail2_unmap; 4216 } 4217 kernel_map->size += cp_len; 4218 if (flags & UVM_EXTRACT_FIXPROT) 4219 newentry->protection = newentry->max_protection; 4220 4221 /* 4222 * Step 2: perform pmap copy. 4223 * (Doing this in the loop saves one RB traversal.) 4224 */ 4225 pmap_copy(kernel_map->pmap, srcmap->pmap, 4226 cp_start - start + dstaddr, cp_len, cp_start); 4227 } 4228 pmap_update(kernel_map->pmap); 4229 4230 error = 0; 4231 4232 /* Unmap copied entries on failure. */ 4233 fail2_unmap: 4234 if (error) { 4235 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4236 FALSE, TRUE); 4237 } 4238 4239 /* Release maps, release dead entries. */ 4240 fail2: 4241 vm_map_unlock(kernel_map); 4242 4243 fail: 4244 vm_map_unlock(srcmap); 4245 4246 uvm_unmap_detach(&dead, 0); 4247 4248 return error; 4249 } 4250 4251 /* 4252 * uvm_map_clean: clean out a map range 4253 * 4254 * => valid flags: 4255 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4256 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4257 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4258 * if (flags & PGO_FREE): any cached pages are freed after clean 4259 * => returns an error if any part of the specified range isn't mapped 4260 * => never a need to flush amap layer since the anonymous memory has 4261 * no permanent home, but may deactivate pages there 4262 * => called from sys_msync() and sys_madvise() 4263 * => caller must not write-lock map (read OK). 4264 * => we may sleep while cleaning if SYNCIO [with map read-locked] 4265 */ 4266 4267 int 4268 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4269 { 4270 struct vm_map_entry *first, *entry; 4271 struct vm_amap *amap; 4272 struct vm_anon *anon; 4273 struct vm_page *pg; 4274 struct uvm_object *uobj; 4275 vaddr_t cp_start, cp_end; 4276 int refs; 4277 int error; 4278 boolean_t rv; 4279 4280 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4281 (PGO_FREE|PGO_DEACTIVATE)); 4282 4283 if (start > end || start < map->min_offset || end > map->max_offset) 4284 return EINVAL; 4285 4286 vm_map_lock_read(map); 4287 first = uvm_map_entrybyaddr(&map->addr, start); 4288 4289 /* Make a first pass to check for holes. */ 4290 for (entry = first; entry != NULL && entry->start < end; 4291 entry = RBT_NEXT(uvm_map_addr, entry)) { 4292 if (UVM_ET_ISSUBMAP(entry)) { 4293 vm_map_unlock_read(map); 4294 return EINVAL; 4295 } 4296 if (UVM_ET_ISSUBMAP(entry) || 4297 UVM_ET_ISHOLE(entry) || 4298 (entry->end < end && 4299 VMMAP_FREE_END(entry) != entry->end)) { 4300 vm_map_unlock_read(map); 4301 return EFAULT; 4302 } 4303 } 4304 4305 error = 0; 4306 for (entry = first; entry != NULL && entry->start < end; 4307 entry = RBT_NEXT(uvm_map_addr, entry)) { 4308 amap = entry->aref.ar_amap; /* top layer */ 4309 if (UVM_ET_ISOBJ(entry)) 4310 uobj = entry->object.uvm_obj; 4311 else 4312 uobj = NULL; 4313 4314 /* 4315 * No amap cleaning necessary if: 4316 * - there's no amap 4317 * - we're not deactivating or freeing pages. 4318 */ 4319 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4320 goto flush_object; 4321 4322 cp_start = MAX(entry->start, start); 4323 cp_end = MIN(entry->end, end); 4324 4325 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4326 anon = amap_lookup(&entry->aref, 4327 cp_start - entry->start); 4328 if (anon == NULL) 4329 continue; 4330 4331 pg = anon->an_page; 4332 if (pg == NULL) { 4333 continue; 4334 } 4335 KASSERT(pg->pg_flags & PQ_ANON); 4336 4337 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4338 /* 4339 * XXX In these first 3 cases, we always just 4340 * XXX deactivate the page. We may want to 4341 * XXX handle the different cases more 4342 * XXX specifically, in the future. 4343 */ 4344 case PGO_CLEANIT|PGO_FREE: 4345 case PGO_CLEANIT|PGO_DEACTIVATE: 4346 case PGO_DEACTIVATE: 4347 deactivate_it: 4348 /* skip the page if it's wired */ 4349 if (pg->wire_count != 0) 4350 break; 4351 4352 uvm_lock_pageq(); 4353 4354 KASSERT(pg->uanon == anon); 4355 4356 /* zap all mappings for the page. */ 4357 pmap_page_protect(pg, PROT_NONE); 4358 4359 /* ...and deactivate the page. */ 4360 uvm_pagedeactivate(pg); 4361 4362 uvm_unlock_pageq(); 4363 break; 4364 case PGO_FREE: 4365 /* 4366 * If there are multiple references to 4367 * the amap, just deactivate the page. 4368 */ 4369 if (amap_refs(amap) > 1) 4370 goto deactivate_it; 4371 4372 /* XXX skip the page if it's wired */ 4373 if (pg->wire_count != 0) { 4374 break; 4375 } 4376 amap_unadd(&entry->aref, 4377 cp_start - entry->start); 4378 refs = --anon->an_ref; 4379 if (refs == 0) 4380 uvm_anfree(anon); 4381 break; 4382 default: 4383 panic("uvm_map_clean: weird flags"); 4384 } 4385 } 4386 4387 flush_object: 4388 cp_start = MAX(entry->start, start); 4389 cp_end = MIN(entry->end, end); 4390 4391 /* 4392 * flush pages if we've got a valid backing object. 4393 * 4394 * Don't PGO_FREE if we don't have write permission 4395 * and don't flush if this is a copy-on-write object 4396 * since we can't know our permissions on it. 4397 */ 4398 if (uobj != NULL && 4399 ((flags & PGO_FREE) == 0 || 4400 ((entry->max_protection & PROT_WRITE) != 0 && 4401 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4402 rv = uobj->pgops->pgo_flush(uobj, 4403 cp_start - entry->start + entry->offset, 4404 cp_end - entry->start + entry->offset, flags); 4405 4406 if (rv == FALSE) 4407 error = EFAULT; 4408 } 4409 } 4410 4411 vm_map_unlock_read(map); 4412 return error; 4413 } 4414 4415 /* 4416 * UVM_MAP_CLIP_END implementation 4417 */ 4418 void 4419 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4420 { 4421 struct vm_map_entry *tmp; 4422 4423 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4424 tmp = uvm_mapent_alloc(map, 0); 4425 4426 /* Invoke splitentry. */ 4427 uvm_map_splitentry(map, entry, tmp, addr); 4428 } 4429 4430 /* 4431 * UVM_MAP_CLIP_START implementation 4432 * 4433 * Clippers are required to not change the pointers to the entry they are 4434 * clipping on. 4435 * Since uvm_map_splitentry turns the original entry into the lowest 4436 * entry (address wise) we do a swap between the new entry and the original 4437 * entry, prior to calling uvm_map_splitentry. 4438 */ 4439 void 4440 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4441 { 4442 struct vm_map_entry *tmp; 4443 struct uvm_addr_state *free; 4444 4445 /* Unlink original. */ 4446 free = uvm_map_uaddr_e(map, entry); 4447 uvm_mapent_free_remove(map, free, entry); 4448 uvm_mapent_addr_remove(map, entry); 4449 4450 /* Copy entry. */ 4451 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4452 tmp = uvm_mapent_alloc(map, 0); 4453 uvm_mapent_copy(entry, tmp); 4454 4455 /* Put new entry in place of original entry. */ 4456 uvm_mapent_addr_insert(map, tmp); 4457 uvm_mapent_free_insert(map, free, tmp); 4458 4459 /* Invoke splitentry. */ 4460 uvm_map_splitentry(map, tmp, entry, addr); 4461 } 4462 4463 /* 4464 * Boundary fixer. 4465 */ 4466 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4467 static __inline vaddr_t 4468 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4469 { 4470 return (min < bound && max > bound) ? bound : max; 4471 } 4472 4473 /* 4474 * Choose free list based on address at start of free space. 4475 * 4476 * The uvm_addr_state returned contains addr and is the first of: 4477 * - uaddr_exe 4478 * - uaddr_brk_stack 4479 * - uaddr_any 4480 */ 4481 struct uvm_addr_state* 4482 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4483 { 4484 struct uvm_addr_state *uaddr; 4485 int i; 4486 4487 /* Special case the first page, to prevent mmap from returning 0. */ 4488 if (addr < VMMAP_MIN_ADDR) 4489 return NULL; 4490 4491 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4492 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4493 if (addr >= uvm_maxkaddr) 4494 return NULL; 4495 } 4496 4497 /* Is the address inside the exe-only map? */ 4498 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4499 addr < map->uaddr_exe->uaddr_maxaddr) 4500 return map->uaddr_exe; 4501 4502 /* Check if the space falls inside brk/stack area. */ 4503 if ((addr >= map->b_start && addr < map->b_end) || 4504 (addr >= map->s_start && addr < map->s_end)) { 4505 if (map->uaddr_brk_stack != NULL && 4506 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4507 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4508 return map->uaddr_brk_stack; 4509 } else 4510 return NULL; 4511 } 4512 4513 /* 4514 * Check the other selectors. 4515 * 4516 * These selectors are only marked as the owner, if they have insert 4517 * functions. 4518 */ 4519 for (i = 0; i < nitems(map->uaddr_any); i++) { 4520 uaddr = map->uaddr_any[i]; 4521 if (uaddr == NULL) 4522 continue; 4523 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4524 continue; 4525 4526 if (addr >= uaddr->uaddr_minaddr && 4527 addr < uaddr->uaddr_maxaddr) 4528 return uaddr; 4529 } 4530 4531 return NULL; 4532 } 4533 4534 /* 4535 * Choose free list based on address at start of free space. 4536 * 4537 * The uvm_addr_state returned contains addr and is the first of: 4538 * - uaddr_exe 4539 * - uaddr_brk_stack 4540 * - uaddr_any 4541 */ 4542 struct uvm_addr_state* 4543 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4544 { 4545 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4546 } 4547 4548 /* 4549 * Returns the first free-memory boundary that is crossed by [min-max]. 4550 */ 4551 vsize_t 4552 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4553 { 4554 struct uvm_addr_state *uaddr; 4555 int i; 4556 4557 /* Never return first page. */ 4558 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4559 4560 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4561 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4562 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4563 4564 /* Check for exe-only boundaries. */ 4565 if (map->uaddr_exe != NULL) { 4566 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4567 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4568 } 4569 4570 /* Check for exe-only boundaries. */ 4571 if (map->uaddr_brk_stack != NULL) { 4572 max = uvm_map_boundfix(min, max, 4573 map->uaddr_brk_stack->uaddr_minaddr); 4574 max = uvm_map_boundfix(min, max, 4575 map->uaddr_brk_stack->uaddr_maxaddr); 4576 } 4577 4578 /* Check other boundaries. */ 4579 for (i = 0; i < nitems(map->uaddr_any); i++) { 4580 uaddr = map->uaddr_any[i]; 4581 if (uaddr != NULL) { 4582 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4583 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4584 } 4585 } 4586 4587 /* Boundaries at stack and brk() area. */ 4588 max = uvm_map_boundfix(min, max, map->s_start); 4589 max = uvm_map_boundfix(min, max, map->s_end); 4590 max = uvm_map_boundfix(min, max, map->b_start); 4591 max = uvm_map_boundfix(min, max, map->b_end); 4592 4593 return max; 4594 } 4595 4596 /* 4597 * Update map allocation start and end addresses from proc vmspace. 4598 */ 4599 void 4600 uvm_map_vmspace_update(struct vm_map *map, 4601 struct uvm_map_deadq *dead, int flags) 4602 { 4603 struct vmspace *vm; 4604 vaddr_t b_start, b_end, s_start, s_end; 4605 4606 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4607 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4608 4609 /* 4610 * Derive actual allocation boundaries from vmspace. 4611 */ 4612 vm = (struct vmspace *)map; 4613 b_start = (vaddr_t)vm->vm_daddr; 4614 b_end = b_start + BRKSIZ; 4615 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4616 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4617 #ifdef DIAGNOSTIC 4618 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4619 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4620 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4621 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4622 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4623 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4624 vm, b_start, b_end, s_start, s_end); 4625 } 4626 #endif 4627 4628 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4629 map->s_start == s_start && map->s_end == s_end)) 4630 return; 4631 4632 uvm_map_freelist_update(map, dead, b_start, b_end, 4633 s_start, s_end, flags); 4634 } 4635 4636 /* 4637 * Grow kernel memory. 4638 * 4639 * This function is only called for kernel maps when an allocation fails. 4640 * 4641 * If the map has a gap that is large enough to accommodate alloc_sz, this 4642 * function will make sure map->free will include it. 4643 */ 4644 void 4645 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 4646 vsize_t alloc_sz, int flags) 4647 { 4648 vsize_t sz; 4649 vaddr_t end; 4650 struct vm_map_entry *entry; 4651 4652 /* Kernel memory only. */ 4653 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 4654 /* Destroy free list. */ 4655 uvm_map_freelist_update_clear(map, dead); 4656 4657 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 4658 if (map->flags & VM_MAP_GUARDPAGES) 4659 alloc_sz += PAGE_SIZE; 4660 4661 /* 4662 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 4663 * 4664 * Don't handle the case where the multiplication overflows: 4665 * if that happens, the allocation is probably too big anyway. 4666 */ 4667 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 4668 4669 /* 4670 * Walk forward until a gap large enough for alloc_sz shows up. 4671 * 4672 * We assume the kernel map has no boundaries. 4673 * uvm_maxkaddr may be zero. 4674 */ 4675 end = MAX(uvm_maxkaddr, map->min_offset); 4676 entry = uvm_map_entrybyaddr(&map->addr, end); 4677 while (entry && entry->fspace < alloc_sz) 4678 entry = RBT_NEXT(uvm_map_addr, entry); 4679 if (entry) { 4680 end = MAX(VMMAP_FREE_START(entry), end); 4681 end += MIN(sz, map->max_offset - end); 4682 } else 4683 end = map->max_offset; 4684 4685 /* Reserve pmap entries. */ 4686 #ifdef PMAP_GROWKERNEL 4687 uvm_maxkaddr = pmap_growkernel(end); 4688 #else 4689 uvm_maxkaddr = MAX(uvm_maxkaddr, end); 4690 #endif 4691 4692 /* Rebuild free list. */ 4693 uvm_map_freelist_update_refill(map, flags); 4694 } 4695 4696 /* 4697 * Freelist update subfunction: unlink all entries from freelists. 4698 */ 4699 void 4700 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 4701 { 4702 struct uvm_addr_state *free; 4703 struct vm_map_entry *entry, *prev, *next; 4704 4705 prev = NULL; 4706 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL; 4707 entry = next) { 4708 next = RBT_NEXT(uvm_map_addr, entry); 4709 4710 free = uvm_map_uaddr_e(map, entry); 4711 uvm_mapent_free_remove(map, free, entry); 4712 4713 if (prev != NULL && entry->start == entry->end) { 4714 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 4715 uvm_mapent_addr_remove(map, entry); 4716 DEAD_ENTRY_PUSH(dead, entry); 4717 } else 4718 prev = entry; 4719 } 4720 } 4721 4722 /* 4723 * Freelist update subfunction: refill the freelists with entries. 4724 */ 4725 void 4726 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 4727 { 4728 struct vm_map_entry *entry; 4729 vaddr_t min, max; 4730 4731 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 4732 min = VMMAP_FREE_START(entry); 4733 max = VMMAP_FREE_END(entry); 4734 entry->fspace = 0; 4735 4736 entry = uvm_map_fix_space(map, entry, min, max, flags); 4737 } 4738 4739 uvm_tree_sanity(map, __FILE__, __LINE__); 4740 } 4741 4742 /* 4743 * Change {a,b}_{start,end} allocation ranges and associated free lists. 4744 */ 4745 void 4746 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 4747 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 4748 { 4749 KDASSERT(b_end >= b_start && s_end >= s_start); 4750 4751 /* Clear all free lists. */ 4752 uvm_map_freelist_update_clear(map, dead); 4753 4754 /* Apply new bounds. */ 4755 map->b_start = b_start; 4756 map->b_end = b_end; 4757 map->s_start = s_start; 4758 map->s_end = s_end; 4759 4760 /* Refill free lists. */ 4761 uvm_map_freelist_update_refill(map, flags); 4762 } 4763 4764 /* 4765 * Assign a uvm_addr_state to the specified pointer in vm_map. 4766 * 4767 * May sleep. 4768 */ 4769 void 4770 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 4771 struct uvm_addr_state *newval) 4772 { 4773 struct uvm_map_deadq dead; 4774 4775 /* Pointer which must be in this map. */ 4776 KASSERT(which != NULL); 4777 KASSERT((void*)map <= (void*)(which) && 4778 (void*)(which) < (void*)(map + 1)); 4779 4780 vm_map_lock(map); 4781 TAILQ_INIT(&dead); 4782 uvm_map_freelist_update_clear(map, &dead); 4783 4784 uvm_addr_destroy(*which); 4785 *which = newval; 4786 4787 uvm_map_freelist_update_refill(map, 0); 4788 vm_map_unlock(map); 4789 uvm_unmap_detach(&dead, 0); 4790 } 4791 4792 /* 4793 * Correct space insert. 4794 * 4795 * Entry must not be on any freelist. 4796 */ 4797 struct vm_map_entry* 4798 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 4799 vaddr_t min, vaddr_t max, int flags) 4800 { 4801 struct uvm_addr_state *free, *entfree; 4802 vaddr_t lmax; 4803 4804 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 4805 KDASSERT(min <= max); 4806 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 4807 min == map->min_offset); 4808 4809 /* 4810 * During the function, entfree will always point at the uaddr state 4811 * for entry. 4812 */ 4813 entfree = (entry == NULL ? NULL : 4814 uvm_map_uaddr_e(map, entry)); 4815 4816 while (min != max) { 4817 /* Claim guard page for entry. */ 4818 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 4819 VMMAP_FREE_END(entry) == entry->end && 4820 entry->start != entry->end) { 4821 if (max - min == 2 * PAGE_SIZE) { 4822 /* 4823 * If the free-space gap is exactly 2 pages, 4824 * we make the guard 2 pages instead of 1. 4825 * Because in a guarded map, an area needs 4826 * at least 2 pages to allocate from: 4827 * one page for the allocation and one for 4828 * the guard. 4829 */ 4830 entry->guard = 2 * PAGE_SIZE; 4831 min = max; 4832 } else { 4833 entry->guard = PAGE_SIZE; 4834 min += PAGE_SIZE; 4835 } 4836 continue; 4837 } 4838 4839 /* 4840 * Handle the case where entry has a 2-page guard, but the 4841 * space after entry is freed. 4842 */ 4843 if (entry != NULL && entry->fspace == 0 && 4844 entry->guard > PAGE_SIZE) { 4845 entry->guard = PAGE_SIZE; 4846 min = VMMAP_FREE_START(entry); 4847 } 4848 4849 lmax = uvm_map_boundary(map, min, max); 4850 free = uvm_map_uaddr(map, min); 4851 4852 /* 4853 * Entries are merged if they point at the same uvm_free(). 4854 * Exception to that rule: if min == uvm_maxkaddr, a new 4855 * entry is started regardless (otherwise the allocators 4856 * will get confused). 4857 */ 4858 if (entry != NULL && free == entfree && 4859 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 4860 min == uvm_maxkaddr)) { 4861 KDASSERT(VMMAP_FREE_END(entry) == min); 4862 entry->fspace += lmax - min; 4863 } else { 4864 /* 4865 * Commit entry to free list: it'll not be added to 4866 * anymore. 4867 * We'll start a new entry and add to that entry 4868 * instead. 4869 */ 4870 if (entry != NULL) 4871 uvm_mapent_free_insert(map, entfree, entry); 4872 4873 /* New entry for new uaddr. */ 4874 entry = uvm_mapent_alloc(map, flags); 4875 KDASSERT(entry != NULL); 4876 entry->end = entry->start = min; 4877 entry->guard = 0; 4878 entry->fspace = lmax - min; 4879 entry->object.uvm_obj = NULL; 4880 entry->offset = 0; 4881 entry->etype = 0; 4882 entry->protection = entry->max_protection = 0; 4883 entry->inheritance = 0; 4884 entry->wired_count = 0; 4885 entry->advice = 0; 4886 entry->aref.ar_pageoff = 0; 4887 entry->aref.ar_amap = NULL; 4888 uvm_mapent_addr_insert(map, entry); 4889 4890 entfree = free; 4891 } 4892 4893 min = lmax; 4894 } 4895 /* Finally put entry on the uaddr state. */ 4896 if (entry != NULL) 4897 uvm_mapent_free_insert(map, entfree, entry); 4898 4899 return entry; 4900 } 4901 4902 /* 4903 * MQuery style of allocation. 4904 * 4905 * This allocator searches forward until sufficient space is found to map 4906 * the given size. 4907 * 4908 * XXX: factor in offset (via pmap_prefer) and protection? 4909 */ 4910 int 4911 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 4912 int flags) 4913 { 4914 struct vm_map_entry *entry, *last; 4915 vaddr_t addr; 4916 vaddr_t tmp, pmap_align, pmap_offset; 4917 int error; 4918 4919 addr = *addr_p; 4920 vm_map_lock_read(map); 4921 4922 /* Configure pmap prefer. */ 4923 if (offset != UVM_UNKNOWN_OFFSET) { 4924 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 4925 pmap_offset = PMAP_PREFER_OFFSET(offset); 4926 } else { 4927 pmap_align = PAGE_SIZE; 4928 pmap_offset = 0; 4929 } 4930 4931 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 4932 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 4933 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 4934 if (tmp < addr) 4935 tmp += pmap_align; 4936 addr = tmp; 4937 } 4938 4939 /* First, check if the requested range is fully available. */ 4940 entry = uvm_map_entrybyaddr(&map->addr, addr); 4941 last = NULL; 4942 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 4943 error = 0; 4944 goto out; 4945 } 4946 if (flags & UVM_FLAG_FIXED) { 4947 error = EINVAL; 4948 goto out; 4949 } 4950 4951 error = ENOMEM; /* Default error from here. */ 4952 4953 /* 4954 * At this point, the memory at <addr, sz> is not available. 4955 * The reasons are: 4956 * [1] it's outside the map, 4957 * [2] it starts in used memory (and therefore needs to move 4958 * toward the first free page in entry), 4959 * [3] it starts in free memory but bumps into used memory. 4960 * 4961 * Note that for case [2], the forward moving is handled by the 4962 * for loop below. 4963 */ 4964 if (entry == NULL) { 4965 /* [1] Outside the map. */ 4966 if (addr >= map->max_offset) 4967 goto out; 4968 else 4969 entry = RBT_MIN(uvm_map_addr, &map->addr); 4970 } else if (VMMAP_FREE_START(entry) <= addr) { 4971 /* [3] Bumped into used memory. */ 4972 entry = RBT_NEXT(uvm_map_addr, entry); 4973 } 4974 4975 /* Test if the next entry is sufficient for the allocation. */ 4976 for (; entry != NULL; 4977 entry = RBT_NEXT(uvm_map_addr, entry)) { 4978 if (entry->fspace == 0) 4979 continue; 4980 addr = VMMAP_FREE_START(entry); 4981 4982 restart: /* Restart address checks on address change. */ 4983 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 4984 if (tmp < addr) 4985 tmp += pmap_align; 4986 addr = tmp; 4987 if (addr >= VMMAP_FREE_END(entry)) 4988 continue; 4989 4990 /* Skip brk() allocation addresses. */ 4991 if (addr + sz > map->b_start && addr < map->b_end) { 4992 if (VMMAP_FREE_END(entry) > map->b_end) { 4993 addr = map->b_end; 4994 goto restart; 4995 } else 4996 continue; 4997 } 4998 /* Skip stack allocation addresses. */ 4999 if (addr + sz > map->s_start && addr < map->s_end) { 5000 if (VMMAP_FREE_END(entry) > map->s_end) { 5001 addr = map->s_end; 5002 goto restart; 5003 } else 5004 continue; 5005 } 5006 5007 last = NULL; 5008 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5009 error = 0; 5010 goto out; 5011 } 5012 } 5013 5014 out: 5015 vm_map_unlock_read(map); 5016 if (error == 0) 5017 *addr_p = addr; 5018 return error; 5019 } 5020 5021 /* 5022 * Determine allocation bias. 5023 * 5024 * Returns 1 if we should bias to high addresses, -1 for a bias towards low 5025 * addresses, or 0 for no bias. 5026 * The bias mechanism is intended to avoid clashing with brk() and stack 5027 * areas. 5028 */ 5029 int 5030 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry) 5031 { 5032 vaddr_t start, end; 5033 5034 start = VMMAP_FREE_START(entry); 5035 end = VMMAP_FREE_END(entry); 5036 5037 /* Stay at the top of brk() area. */ 5038 if (end >= map->b_start && start < map->b_end) 5039 return 1; 5040 /* Stay at the far end of the stack area. */ 5041 if (end >= map->s_start && start < map->s_end) { 5042 #ifdef MACHINE_STACK_GROWS_UP 5043 return 1; 5044 #else 5045 return -1; 5046 #endif 5047 } 5048 5049 /* No bias, this area is meant for us. */ 5050 return 0; 5051 } 5052 5053 5054 boolean_t 5055 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 5056 { 5057 boolean_t rv; 5058 5059 if (map->flags & VM_MAP_INTRSAFE) { 5060 rv = _mtx_enter_try(&map->mtx LOCK_FL_ARGS); 5061 } else { 5062 mtx_enter(&map->flags_lock); 5063 if (map->flags & VM_MAP_BUSY) { 5064 mtx_leave(&map->flags_lock); 5065 return (FALSE); 5066 } 5067 mtx_leave(&map->flags_lock); 5068 rv = (_rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP LOCK_FL_ARGS) 5069 == 0); 5070 /* check if the lock is busy and back out if we won the race */ 5071 if (rv) { 5072 mtx_enter(&map->flags_lock); 5073 if (map->flags & VM_MAP_BUSY) { 5074 _rw_exit(&map->lock LOCK_FL_ARGS); 5075 rv = FALSE; 5076 } 5077 mtx_leave(&map->flags_lock); 5078 } 5079 } 5080 5081 if (rv) { 5082 map->timestamp++; 5083 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5084 uvm_tree_sanity(map, file, line); 5085 uvm_tree_size_chk(map, file, line); 5086 } 5087 5088 return (rv); 5089 } 5090 5091 void 5092 vm_map_lock_ln(struct vm_map *map, char *file, int line) 5093 { 5094 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5095 do { 5096 mtx_enter(&map->flags_lock); 5097 tryagain: 5098 while (map->flags & VM_MAP_BUSY) { 5099 map->flags |= VM_MAP_WANTLOCK; 5100 msleep(&map->flags, &map->flags_lock, 5101 PVM, vmmapbsy, 0); 5102 } 5103 mtx_leave(&map->flags_lock); 5104 } while (_rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL 5105 LOCK_FL_ARGS) != 0); 5106 /* check if the lock is busy and back out if we won the race */ 5107 mtx_enter(&map->flags_lock); 5108 if (map->flags & VM_MAP_BUSY) { 5109 _rw_exit(&map->lock LOCK_FL_ARGS); 5110 goto tryagain; 5111 } 5112 mtx_leave(&map->flags_lock); 5113 } else { 5114 _mtx_enter(&map->mtx LOCK_FL_ARGS); 5115 } 5116 5117 map->timestamp++; 5118 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5119 uvm_tree_sanity(map, file, line); 5120 uvm_tree_size_chk(map, file, line); 5121 } 5122 5123 void 5124 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 5125 { 5126 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5127 _rw_enter_read(&map->lock LOCK_FL_ARGS); 5128 else 5129 _mtx_enter(&map->mtx LOCK_FL_ARGS); 5130 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5131 uvm_tree_sanity(map, file, line); 5132 uvm_tree_size_chk(map, file, line); 5133 } 5134 5135 void 5136 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 5137 { 5138 uvm_tree_sanity(map, file, line); 5139 uvm_tree_size_chk(map, file, line); 5140 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5141 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5142 _rw_exit(&map->lock LOCK_FL_ARGS); 5143 else 5144 _mtx_leave(&map->mtx LOCK_FL_ARGS); 5145 } 5146 5147 void 5148 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 5149 { 5150 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5151 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5152 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5153 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5154 _rw_exit_read(&map->lock LOCK_FL_ARGS); 5155 else 5156 _mtx_leave(&map->mtx LOCK_FL_ARGS); 5157 } 5158 5159 void 5160 vm_map_downgrade_ln(struct vm_map *map, char *file, int line) 5161 { 5162 uvm_tree_sanity(map, file, line); 5163 uvm_tree_size_chk(map, file, line); 5164 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5165 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5166 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5167 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5168 _rw_enter(&map->lock, RW_DOWNGRADE LOCK_FL_ARGS); 5169 } 5170 5171 void 5172 vm_map_upgrade_ln(struct vm_map *map, char *file, int line) 5173 { 5174 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5175 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5176 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5177 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5178 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5179 _rw_exit_read(&map->lock LOCK_FL_ARGS); 5180 _rw_enter_write(&map->lock LOCK_FL_ARGS); 5181 } 5182 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5183 uvm_tree_sanity(map, file, line); 5184 } 5185 5186 void 5187 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5188 { 5189 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5190 mtx_enter(&map->flags_lock); 5191 map->flags |= VM_MAP_BUSY; 5192 mtx_leave(&map->flags_lock); 5193 } 5194 5195 void 5196 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5197 { 5198 int oflags; 5199 5200 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5201 mtx_enter(&map->flags_lock); 5202 oflags = map->flags; 5203 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5204 mtx_leave(&map->flags_lock); 5205 if (oflags & VM_MAP_WANTLOCK) 5206 wakeup(&map->flags); 5207 } 5208 5209 #ifndef SMALL_KERNEL 5210 int 5211 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, 5212 size_t *lenp) 5213 { 5214 struct vm_map_entry *entry; 5215 vaddr_t start; 5216 int cnt, maxcnt, error = 0; 5217 5218 KASSERT(*lenp > 0); 5219 KASSERT((*lenp % sizeof(*kve)) == 0); 5220 cnt = 0; 5221 maxcnt = *lenp / sizeof(*kve); 5222 KASSERT(maxcnt > 0); 5223 5224 /* 5225 * Return only entries whose address is above the given base 5226 * address. This allows userland to iterate without knowing the 5227 * number of entries beforehand. 5228 */ 5229 start = (vaddr_t)kve[0].kve_start; 5230 5231 vm_map_lock(map); 5232 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5233 if (cnt == maxcnt) { 5234 error = ENOMEM; 5235 break; 5236 } 5237 if (start != 0 && entry->start < start) 5238 continue; 5239 kve->kve_start = entry->start; 5240 kve->kve_end = entry->end; 5241 kve->kve_guard = entry->guard; 5242 kve->kve_fspace = entry->fspace; 5243 kve->kve_fspace_augment = entry->fspace_augment; 5244 kve->kve_offset = entry->offset; 5245 kve->kve_wired_count = entry->wired_count; 5246 kve->kve_etype = entry->etype; 5247 kve->kve_protection = entry->protection; 5248 kve->kve_max_protection = entry->max_protection; 5249 kve->kve_advice = entry->advice; 5250 kve->kve_inheritance = entry->inheritance; 5251 kve->kve_flags = entry->flags; 5252 kve++; 5253 cnt++; 5254 } 5255 vm_map_unlock(map); 5256 5257 KASSERT(cnt <= maxcnt); 5258 5259 *lenp = sizeof(*kve) * cnt; 5260 return error; 5261 } 5262 #endif 5263 5264 5265 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5266 uvm_mapentry_addrcmp, uvm_map_addr_augment); 5267 5268 5269 /* 5270 * MD code: vmspace allocator setup. 5271 */ 5272 5273 #ifdef __i386__ 5274 void 5275 uvm_map_setup_md(struct vm_map *map) 5276 { 5277 vaddr_t min, max; 5278 5279 min = map->min_offset; 5280 max = map->max_offset; 5281 5282 /* 5283 * Ensure the selectors will not try to manage page 0; 5284 * it's too special. 5285 */ 5286 if (min < VMMAP_MIN_ADDR) 5287 min = VMMAP_MIN_ADDR; 5288 5289 #if 0 /* Cool stuff, not yet */ 5290 /* Executable code is special. */ 5291 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5292 /* Place normal allocations beyond executable mappings. */ 5293 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5294 #else /* Crappy stuff, for now */ 5295 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5296 #endif 5297 5298 #ifndef SMALL_KERNEL 5299 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5300 #endif /* !SMALL_KERNEL */ 5301 } 5302 #elif __LP64__ 5303 void 5304 uvm_map_setup_md(struct vm_map *map) 5305 { 5306 vaddr_t min, max; 5307 5308 min = map->min_offset; 5309 max = map->max_offset; 5310 5311 /* 5312 * Ensure the selectors will not try to manage page 0; 5313 * it's too special. 5314 */ 5315 if (min < VMMAP_MIN_ADDR) 5316 min = VMMAP_MIN_ADDR; 5317 5318 #if 0 /* Cool stuff, not yet */ 5319 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5320 #else /* Crappy stuff, for now */ 5321 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5322 #endif 5323 5324 #ifndef SMALL_KERNEL 5325 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5326 #endif /* !SMALL_KERNEL */ 5327 } 5328 #else /* non-i386, 32 bit */ 5329 void 5330 uvm_map_setup_md(struct vm_map *map) 5331 { 5332 vaddr_t min, max; 5333 5334 min = map->min_offset; 5335 max = map->max_offset; 5336 5337 /* 5338 * Ensure the selectors will not try to manage page 0; 5339 * it's too special. 5340 */ 5341 if (min < VMMAP_MIN_ADDR) 5342 min = VMMAP_MIN_ADDR; 5343 5344 #if 0 /* Cool stuff, not yet */ 5345 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5346 #else /* Crappy stuff, for now */ 5347 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5348 #endif 5349 5350 #ifndef SMALL_KERNEL 5351 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5352 #endif /* !SMALL_KERNEL */ 5353 } 5354 #endif 5355