1 /* $NetBSD: uvm_mmap.c,v 1.153 2015/08/04 18:28:10 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993 The Regents of the University of California. 6 * Copyright (c) 1988 University of Utah. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * the Systems Programming Group of the University of Utah Computer 12 * Science Department. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94 40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp 41 */ 42 43 /* 44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap 45 * function. 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.153 2015/08/04 18:28:10 maxv Exp $"); 50 51 #include "opt_compat_netbsd.h" 52 #include "opt_pax.h" 53 54 #include <sys/types.h> 55 #include <sys/file.h> 56 #include <sys/filedesc.h> 57 #include <sys/resourcevar.h> 58 #include <sys/mman.h> 59 60 #if defined(PAX_ASLR) || defined(PAX_MPROTECT) 61 #include <sys/pax.h> 62 #endif /* PAX_ASLR || PAX_MPROTECT */ 63 64 #include <sys/syscallargs.h> 65 66 #include <uvm/uvm.h> 67 #include <uvm/uvm_device.h> 68 69 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, 70 int, int, struct uvm_object *, voff_t, vsize_t); 71 72 static int 73 range_test(vaddr_t addr, vsize_t size, bool ismmap) 74 { 75 vaddr_t vm_min_address = VM_MIN_ADDRESS; 76 vaddr_t vm_max_address = VM_MAXUSER_ADDRESS; 77 vaddr_t eaddr = addr + size; 78 int res = 0; 79 80 if (addr < vm_min_address) 81 return EINVAL; 82 if (eaddr > vm_max_address) 83 return ismmap ? EFBIG : EINVAL; 84 if (addr > eaddr) /* no wrapping! */ 85 return ismmap ? EOVERFLOW : EINVAL; 86 87 #ifdef MD_MMAP_RANGE_TEST 88 res = MD_MMAP_RANGE_TEST(addr, eaddr); 89 #endif 90 91 return res; 92 } 93 94 /* 95 * unimplemented VM system calls: 96 */ 97 98 /* 99 * sys_sbrk: sbrk system call. 100 */ 101 102 /* ARGSUSED */ 103 int 104 sys_sbrk(struct lwp *l, const struct sys_sbrk_args *uap, register_t *retval) 105 { 106 /* { 107 syscallarg(intptr_t) incr; 108 } */ 109 110 return (ENOSYS); 111 } 112 113 /* 114 * sys_sstk: sstk system call. 115 */ 116 117 /* ARGSUSED */ 118 int 119 sys_sstk(struct lwp *l, const struct sys_sstk_args *uap, register_t *retval) 120 { 121 /* { 122 syscallarg(int) incr; 123 } */ 124 125 return (ENOSYS); 126 } 127 128 /* 129 * sys_mincore: determine if pages are in core or not. 130 */ 131 132 /* ARGSUSED */ 133 int 134 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, 135 register_t *retval) 136 { 137 /* { 138 syscallarg(void *) addr; 139 syscallarg(size_t) len; 140 syscallarg(char *) vec; 141 } */ 142 struct proc *p = l->l_proc; 143 struct vm_page *pg; 144 char *vec, pgi; 145 struct uvm_object *uobj; 146 struct vm_amap *amap; 147 struct vm_anon *anon; 148 struct vm_map_entry *entry; 149 vaddr_t start, end, lim; 150 struct vm_map *map; 151 vsize_t len; 152 int error = 0, npgs; 153 154 map = &p->p_vmspace->vm_map; 155 156 start = (vaddr_t)SCARG(uap, addr); 157 len = SCARG(uap, len); 158 vec = SCARG(uap, vec); 159 160 if (start & PAGE_MASK) 161 return (EINVAL); 162 len = round_page(len); 163 end = start + len; 164 if (end <= start) 165 return (EINVAL); 166 167 /* 168 * Lock down vec, so our returned status isn't outdated by 169 * storing the status byte for a page. 170 */ 171 172 npgs = len >> PAGE_SHIFT; 173 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE); 174 if (error) { 175 return error; 176 } 177 vm_map_lock_read(map); 178 179 if (uvm_map_lookup_entry(map, start, &entry) == false) { 180 error = ENOMEM; 181 goto out; 182 } 183 184 for (/* nothing */; 185 entry != &map->header && entry->start < end; 186 entry = entry->next) { 187 KASSERT(!UVM_ET_ISSUBMAP(entry)); 188 KASSERT(start >= entry->start); 189 190 /* Make sure there are no holes. */ 191 if (entry->end < end && 192 (entry->next == &map->header || 193 entry->next->start > entry->end)) { 194 error = ENOMEM; 195 goto out; 196 } 197 198 lim = end < entry->end ? end : entry->end; 199 200 /* 201 * Special case for objects with no "real" pages. Those 202 * are always considered resident (mapped devices). 203 */ 204 205 if (UVM_ET_ISOBJ(entry)) { 206 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); 207 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) { 208 for (/* nothing */; start < lim; 209 start += PAGE_SIZE, vec++) 210 subyte(vec, 1); 211 continue; 212 } 213 } 214 215 amap = entry->aref.ar_amap; /* upper layer */ 216 uobj = entry->object.uvm_obj; /* lower layer */ 217 218 if (amap != NULL) 219 amap_lock(amap); 220 if (uobj != NULL) 221 mutex_enter(uobj->vmobjlock); 222 223 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { 224 pgi = 0; 225 if (amap != NULL) { 226 /* Check the upper layer first. */ 227 anon = amap_lookup(&entry->aref, 228 start - entry->start); 229 /* Don't need to lock anon here. */ 230 if (anon != NULL && anon->an_page != NULL) { 231 232 /* 233 * Anon has the page for this entry 234 * offset. 235 */ 236 237 pgi = 1; 238 } 239 } 240 if (uobj != NULL && pgi == 0) { 241 /* Check the lower layer. */ 242 pg = uvm_pagelookup(uobj, 243 entry->offset + (start - entry->start)); 244 if (pg != NULL) { 245 246 /* 247 * Object has the page for this entry 248 * offset. 249 */ 250 251 pgi = 1; 252 } 253 } 254 (void) subyte(vec, pgi); 255 } 256 if (uobj != NULL) 257 mutex_exit(uobj->vmobjlock); 258 if (amap != NULL) 259 amap_unlock(amap); 260 } 261 262 out: 263 vm_map_unlock_read(map); 264 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs); 265 return (error); 266 } 267 268 /* 269 * sys_mmap: mmap system call. 270 * 271 * => file offset and address may not be page aligned 272 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE 273 * - if address isn't page aligned the mapping starts at trunc_page(addr) 274 * and the return value is adjusted up by the page offset. 275 */ 276 277 int 278 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval) 279 { 280 /* { 281 syscallarg(void *) addr; 282 syscallarg(size_t) len; 283 syscallarg(int) prot; 284 syscallarg(int) flags; 285 syscallarg(int) fd; 286 syscallarg(long) pad; 287 syscallarg(off_t) pos; 288 } */ 289 struct proc *p = l->l_proc; 290 vaddr_t addr; 291 off_t pos; 292 vsize_t size, pageoff, newsize; 293 vm_prot_t prot, maxprot; 294 int flags, fd, advice; 295 vaddr_t defaddr; 296 struct file *fp = NULL; 297 struct uvm_object *uobj; 298 int error; 299 #ifdef PAX_ASLR 300 vaddr_t orig_addr; 301 #endif /* PAX_ASLR */ 302 303 /* 304 * first, extract syscall args from the uap. 305 */ 306 307 addr = (vaddr_t)SCARG(uap, addr); 308 size = (vsize_t)SCARG(uap, len); 309 prot = SCARG(uap, prot) & VM_PROT_ALL; 310 flags = SCARG(uap, flags); 311 fd = SCARG(uap, fd); 312 pos = SCARG(uap, pos); 313 314 #ifdef PAX_ASLR 315 orig_addr = addr; 316 #endif /* PAX_ASLR */ 317 318 /* 319 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and 320 * validate the flags. 321 */ 322 if (flags & MAP_COPY) { 323 flags = (flags & ~MAP_COPY) | MAP_PRIVATE; 324 #if defined(COMPAT_10) && defined(__i386__) 325 /* 326 * Ancient kernel on x86 did not obey PROT_EXEC on i386 at least 327 * and ld.so did not turn it on. We take care of this on amd64 328 * in compat32. 329 */ 330 prot |= PROT_EXEC; 331 #endif 332 } 333 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE)) 334 return (EINVAL); 335 336 /* 337 * align file position and save offset. adjust size. 338 */ 339 340 pageoff = (pos & PAGE_MASK); 341 pos -= pageoff; 342 newsize = size + pageoff; /* add offset */ 343 newsize = (vsize_t)round_page(newsize); /* round up */ 344 345 if (newsize < size) 346 return (ENOMEM); 347 size = newsize; 348 349 /* 350 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 351 */ 352 if (flags & MAP_FIXED) { 353 354 /* ensure address and file offset are aligned properly */ 355 addr -= pageoff; 356 if (addr & PAGE_MASK) 357 return (EINVAL); 358 359 error = range_test(addr, size, true); 360 if (error) { 361 return error; 362 } 363 364 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) { 365 366 /* 367 * not fixed: make sure we skip over the largest 368 * possible heap for non-topdown mapping arrangements. 369 * we will refine our guess later (e.g. to account for 370 * VAC, etc) 371 */ 372 373 defaddr = p->p_emul->e_vm_default_addr(p, 374 (vaddr_t)p->p_vmspace->vm_daddr, size); 375 376 if (addr == 0 || 377 !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN)) 378 addr = MAX(addr, defaddr); 379 else 380 addr = MIN(addr, defaddr); 381 } 382 383 /* 384 * check for file mappings (i.e. not anonymous) and verify file. 385 */ 386 387 advice = UVM_ADV_NORMAL; 388 if ((flags & MAP_ANON) == 0) { 389 if ((fp = fd_getfile(fd)) == NULL) 390 return (EBADF); 391 392 if (fp->f_ops->fo_mmap == NULL) { 393 error = ENODEV; 394 goto out; 395 } 396 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags, 397 &advice, &uobj, &maxprot); 398 if (error) { 399 goto out; 400 } 401 if (uobj == NULL) { 402 flags |= MAP_ANON; 403 fd_putfile(fd); 404 fp = NULL; 405 goto is_anon; 406 } 407 } else { /* MAP_ANON case */ 408 /* 409 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0? 410 */ 411 if (fd != -1) 412 return (EINVAL); 413 414 is_anon: /* label for SunOS style /dev/zero */ 415 uobj = NULL; 416 maxprot = VM_PROT_ALL; 417 pos = 0; 418 } 419 420 #ifdef PAX_MPROTECT 421 pax_mprotect(l, &prot, &maxprot); 422 #endif /* PAX_MPROTECT */ 423 424 #ifdef PAX_ASLR 425 pax_aslr_mmap(l, &addr, orig_addr, flags); 426 #endif /* PAX_ASLR */ 427 428 /* 429 * now let kernel internal function uvm_mmap do the work. 430 */ 431 432 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 433 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 434 435 /* remember to add offset */ 436 *retval = (register_t)(addr + pageoff); 437 438 out: 439 if (fp != NULL) 440 fd_putfile(fd); 441 442 return (error); 443 } 444 445 /* 446 * sys___msync13: the msync system call (a front-end for flush) 447 */ 448 449 int 450 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap, 451 register_t *retval) 452 { 453 /* { 454 syscallarg(void *) addr; 455 syscallarg(size_t) len; 456 syscallarg(int) flags; 457 } */ 458 struct proc *p = l->l_proc; 459 vaddr_t addr; 460 vsize_t size, pageoff; 461 struct vm_map *map; 462 int error, rv, flags, uvmflags; 463 464 /* 465 * extract syscall args from the uap 466 */ 467 468 addr = (vaddr_t)SCARG(uap, addr); 469 size = (vsize_t)SCARG(uap, len); 470 flags = SCARG(uap, flags); 471 472 /* sanity check flags */ 473 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || 474 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || 475 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) 476 return (EINVAL); 477 if ((flags & (MS_ASYNC | MS_SYNC)) == 0) 478 flags |= MS_SYNC; 479 480 /* 481 * align the address to a page boundary and adjust the size accordingly. 482 */ 483 484 pageoff = (addr & PAGE_MASK); 485 addr -= pageoff; 486 size += pageoff; 487 size = (vsize_t)round_page(size); 488 489 error = range_test(addr, size, false); 490 if (error) 491 return error; 492 493 /* 494 * get map 495 */ 496 497 map = &p->p_vmspace->vm_map; 498 499 /* 500 * XXXCDC: do we really need this semantic? 501 * 502 * XXX Gak! If size is zero we are supposed to sync "all modified 503 * pages with the region containing addr". Unfortunately, we 504 * don't really keep track of individual mmaps so we approximate 505 * by flushing the range of the map entry containing addr. 506 * This can be incorrect if the region splits or is coalesced 507 * with a neighbor. 508 */ 509 510 if (size == 0) { 511 struct vm_map_entry *entry; 512 513 vm_map_lock_read(map); 514 rv = uvm_map_lookup_entry(map, addr, &entry); 515 if (rv == true) { 516 addr = entry->start; 517 size = entry->end - entry->start; 518 } 519 vm_map_unlock_read(map); 520 if (rv == false) 521 return (EINVAL); 522 } 523 524 /* 525 * translate MS_ flags into PGO_ flags 526 */ 527 528 uvmflags = PGO_CLEANIT; 529 if (flags & MS_INVALIDATE) 530 uvmflags |= PGO_FREE; 531 if (flags & MS_SYNC) 532 uvmflags |= PGO_SYNCIO; 533 534 error = uvm_map_clean(map, addr, addr+size, uvmflags); 535 return error; 536 } 537 538 /* 539 * sys_munmap: unmap a users memory 540 */ 541 542 int 543 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval) 544 { 545 /* { 546 syscallarg(void *) addr; 547 syscallarg(size_t) len; 548 } */ 549 struct proc *p = l->l_proc; 550 vaddr_t addr; 551 vsize_t size, pageoff; 552 struct vm_map *map; 553 struct vm_map_entry *dead_entries; 554 int error; 555 556 /* 557 * get syscall args. 558 */ 559 560 addr = (vaddr_t)SCARG(uap, addr); 561 size = (vsize_t)SCARG(uap, len); 562 563 /* 564 * align the address to a page boundary and adjust the size accordingly. 565 */ 566 567 pageoff = (addr & PAGE_MASK); 568 addr -= pageoff; 569 size += pageoff; 570 size = (vsize_t)round_page(size); 571 572 if (size == 0) 573 return (0); 574 575 error = range_test(addr, size, false); 576 if (error) 577 return error; 578 579 map = &p->p_vmspace->vm_map; 580 581 /* 582 * interesting system call semantic: make sure entire range is 583 * allocated before allowing an unmap. 584 */ 585 586 vm_map_lock(map); 587 #if 0 588 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) { 589 vm_map_unlock(map); 590 return (EINVAL); 591 } 592 #endif 593 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0); 594 vm_map_unlock(map); 595 if (dead_entries != NULL) 596 uvm_unmap_detach(dead_entries, 0); 597 return (0); 598 } 599 600 /* 601 * sys_mprotect: the mprotect system call 602 */ 603 604 int 605 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap, 606 register_t *retval) 607 { 608 /* { 609 syscallarg(void *) addr; 610 syscallarg(size_t) len; 611 syscallarg(int) prot; 612 } */ 613 struct proc *p = l->l_proc; 614 vaddr_t addr; 615 vsize_t size, pageoff; 616 vm_prot_t prot; 617 int error; 618 619 /* 620 * extract syscall args from uap 621 */ 622 623 addr = (vaddr_t)SCARG(uap, addr); 624 size = (vsize_t)SCARG(uap, len); 625 prot = SCARG(uap, prot) & VM_PROT_ALL; 626 627 /* 628 * align the address to a page boundary and adjust the size accordingly. 629 */ 630 631 pageoff = (addr & PAGE_MASK); 632 addr -= pageoff; 633 size += pageoff; 634 size = round_page(size); 635 636 error = range_test(addr, size, false); 637 if (error) 638 return error; 639 640 error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 641 false); 642 return error; 643 } 644 645 /* 646 * sys_minherit: the minherit system call 647 */ 648 649 int 650 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap, 651 register_t *retval) 652 { 653 /* { 654 syscallarg(void *) addr; 655 syscallarg(int) len; 656 syscallarg(int) inherit; 657 } */ 658 struct proc *p = l->l_proc; 659 vaddr_t addr; 660 vsize_t size, pageoff; 661 vm_inherit_t inherit; 662 int error; 663 664 addr = (vaddr_t)SCARG(uap, addr); 665 size = (vsize_t)SCARG(uap, len); 666 inherit = SCARG(uap, inherit); 667 668 /* 669 * align the address to a page boundary and adjust the size accordingly. 670 */ 671 672 pageoff = (addr & PAGE_MASK); 673 addr -= pageoff; 674 size += pageoff; 675 size = (vsize_t)round_page(size); 676 677 error = range_test(addr, size, false); 678 if (error) 679 return error; 680 681 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size, 682 inherit); 683 return error; 684 } 685 686 /* 687 * sys_madvise: give advice about memory usage. 688 */ 689 690 /* ARGSUSED */ 691 int 692 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap, 693 register_t *retval) 694 { 695 /* { 696 syscallarg(void *) addr; 697 syscallarg(size_t) len; 698 syscallarg(int) behav; 699 } */ 700 struct proc *p = l->l_proc; 701 vaddr_t addr; 702 vsize_t size, pageoff; 703 int advice, error; 704 705 addr = (vaddr_t)SCARG(uap, addr); 706 size = (vsize_t)SCARG(uap, len); 707 advice = SCARG(uap, behav); 708 709 /* 710 * align the address to a page boundary, and adjust the size accordingly 711 */ 712 713 pageoff = (addr & PAGE_MASK); 714 addr -= pageoff; 715 size += pageoff; 716 size = (vsize_t)round_page(size); 717 718 error = range_test(addr, size, false); 719 if (error) 720 return error; 721 722 switch (advice) { 723 case MADV_NORMAL: 724 case MADV_RANDOM: 725 case MADV_SEQUENTIAL: 726 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size, 727 advice); 728 break; 729 730 case MADV_WILLNEED: 731 732 /* 733 * Activate all these pages, pre-faulting them in if 734 * necessary. 735 */ 736 error = uvm_map_willneed(&p->p_vmspace->vm_map, 737 addr, addr + size); 738 break; 739 740 case MADV_DONTNEED: 741 742 /* 743 * Deactivate all these pages. We don't need them 744 * any more. We don't, however, toss the data in 745 * the pages. 746 */ 747 748 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 749 PGO_DEACTIVATE); 750 break; 751 752 case MADV_FREE: 753 754 /* 755 * These pages contain no valid data, and may be 756 * garbage-collected. Toss all resources, including 757 * any swap space in use. 758 */ 759 760 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 761 PGO_FREE); 762 break; 763 764 case MADV_SPACEAVAIL: 765 766 /* 767 * XXXMRG What is this? I think it's: 768 * 769 * Ensure that we have allocated backing-store 770 * for these pages. 771 * 772 * This is going to require changes to the page daemon, 773 * as it will free swap space allocated to pages in core. 774 * There's also what to do for device/file/anonymous memory. 775 */ 776 777 return (EINVAL); 778 779 default: 780 return (EINVAL); 781 } 782 783 return error; 784 } 785 786 /* 787 * sys_mlock: memory lock 788 */ 789 790 int 791 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval) 792 { 793 /* { 794 syscallarg(const void *) addr; 795 syscallarg(size_t) len; 796 } */ 797 struct proc *p = l->l_proc; 798 vaddr_t addr; 799 vsize_t size, pageoff; 800 int error; 801 802 /* 803 * extract syscall args from uap 804 */ 805 806 addr = (vaddr_t)SCARG(uap, addr); 807 size = (vsize_t)SCARG(uap, len); 808 809 /* 810 * align the address to a page boundary and adjust the size accordingly 811 */ 812 813 pageoff = (addr & PAGE_MASK); 814 addr -= pageoff; 815 size += pageoff; 816 size = (vsize_t)round_page(size); 817 818 error = range_test(addr, size, false); 819 if (error) 820 return error; 821 822 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) 823 return (EAGAIN); 824 825 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 826 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 827 return (EAGAIN); 828 829 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false, 830 0); 831 if (error == EFAULT) 832 error = ENOMEM; 833 return error; 834 } 835 836 /* 837 * sys_munlock: unlock wired pages 838 */ 839 840 int 841 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap, 842 register_t *retval) 843 { 844 /* { 845 syscallarg(const void *) addr; 846 syscallarg(size_t) len; 847 } */ 848 struct proc *p = l->l_proc; 849 vaddr_t addr; 850 vsize_t size, pageoff; 851 int error; 852 853 /* 854 * extract syscall args from uap 855 */ 856 857 addr = (vaddr_t)SCARG(uap, addr); 858 size = (vsize_t)SCARG(uap, len); 859 860 /* 861 * align the address to a page boundary, and adjust the size accordingly 862 */ 863 864 pageoff = (addr & PAGE_MASK); 865 addr -= pageoff; 866 size += pageoff; 867 size = (vsize_t)round_page(size); 868 869 error = range_test(addr, size, false); 870 if (error) 871 return error; 872 873 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 874 0); 875 if (error == EFAULT) 876 error = ENOMEM; 877 return error; 878 } 879 880 /* 881 * sys_mlockall: lock all pages mapped into an address space. 882 */ 883 884 int 885 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap, 886 register_t *retval) 887 { 888 /* { 889 syscallarg(int) flags; 890 } */ 891 struct proc *p = l->l_proc; 892 int error, flags; 893 894 flags = SCARG(uap, flags); 895 896 if (flags == 0 || 897 (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0) 898 return (EINVAL); 899 900 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, 901 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 902 return (error); 903 } 904 905 /* 906 * sys_munlockall: unlock all pages mapped into an address space. 907 */ 908 909 int 910 sys_munlockall(struct lwp *l, const void *v, register_t *retval) 911 { 912 struct proc *p = l->l_proc; 913 914 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0); 915 return (0); 916 } 917 918 /* 919 * uvm_mmap: internal version of mmap 920 * 921 * - used by sys_mmap and various framebuffers 922 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON 923 * - caller must page-align the file offset 924 */ 925 926 int 927 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 928 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj, 929 voff_t foff, vsize_t locklimit) 930 { 931 vaddr_t align = 0; 932 int error; 933 uvm_flag_t uvmflag = 0; 934 935 /* 936 * check params 937 */ 938 939 if (size == 0) 940 return(0); 941 if (foff & PAGE_MASK) 942 return(EINVAL); 943 if ((prot & maxprot) != prot) 944 return(EINVAL); 945 946 /* 947 * for non-fixed mappings, round off the suggested address. 948 * for fixed mappings, check alignment and zap old mappings. 949 */ 950 951 if ((flags & MAP_FIXED) == 0) { 952 *addr = round_page(*addr); 953 } else { 954 if (*addr & PAGE_MASK) 955 return(EINVAL); 956 uvmflag |= UVM_FLAG_FIXED; 957 (void) uvm_unmap(map, *addr, *addr + size); 958 } 959 960 /* 961 * Try to see if any requested alignment can even be attemped. 962 * Make sure we can express the alignment (asking for a >= 4GB 963 * alignment on an ILP32 architecure make no sense) and the 964 * alignment is at least for a page sized quanitiy. If the 965 * request was for a fixed mapping, make sure supplied address 966 * adheres to the request alignment. 967 */ 968 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT; 969 if (align) { 970 if (align >= sizeof(vaddr_t) * NBBY) 971 return(EINVAL); 972 align = 1L << align; 973 if (align < PAGE_SIZE) 974 return(EINVAL); 975 if (align >= vm_map_max(map)) 976 return(ENOMEM); 977 if (flags & MAP_FIXED) { 978 if ((*addr & (align-1)) != 0) 979 return(EINVAL); 980 align = 0; 981 } 982 } 983 984 /* 985 * check resource limits 986 */ 987 988 if (!VM_MAP_IS_KERNEL(map) && 989 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) > 990 curproc->p_rlimit[RLIMIT_AS].rlim_cur)) 991 return ENOMEM; 992 993 /* 994 * handle anon vs. non-anon mappings. for non-anon mappings attach 995 * to underlying vm object. 996 */ 997 998 if (flags & MAP_ANON) { 999 KASSERT(uobj == NULL); 1000 foff = UVM_UNKNOWN_OFFSET; 1001 if ((flags & MAP_SHARED) == 0) 1002 /* XXX: defer amap create */ 1003 uvmflag |= UVM_FLAG_COPYONW; 1004 else 1005 /* shared: create amap now */ 1006 uvmflag |= UVM_FLAG_OVERLAY; 1007 1008 } else { 1009 KASSERT(uobj != NULL); 1010 if ((flags & MAP_SHARED) == 0) { 1011 uvmflag |= UVM_FLAG_COPYONW; 1012 } 1013 } 1014 1015 uvmflag = UVM_MAPFLAG(prot, maxprot, 1016 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, 1017 advice, uvmflag); 1018 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag); 1019 if (error) { 1020 if (uobj) 1021 uobj->pgops->pgo_detach(uobj); 1022 return error; 1023 } 1024 1025 /* 1026 * POSIX 1003.1b -- if our address space was configured 1027 * to lock all future mappings, wire the one we just made. 1028 * 1029 * Also handle the MAP_WIRED flag here. 1030 */ 1031 1032 if (prot == VM_PROT_NONE) { 1033 1034 /* 1035 * No more work to do in this case. 1036 */ 1037 1038 return (0); 1039 } 1040 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) { 1041 vm_map_lock(map); 1042 if (atop(size) + uvmexp.wired > uvmexp.wiredmax || 1043 (locklimit != 0 && 1044 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > 1045 locklimit)) { 1046 vm_map_unlock(map); 1047 uvm_unmap(map, *addr, *addr + size); 1048 return ENOMEM; 1049 } 1050 1051 /* 1052 * uvm_map_pageable() always returns the map unlocked. 1053 */ 1054 1055 error = uvm_map_pageable(map, *addr, *addr + size, 1056 false, UVM_LK_ENTER); 1057 if (error) { 1058 uvm_unmap(map, *addr, *addr + size); 1059 return error; 1060 } 1061 return (0); 1062 } 1063 return 0; 1064 } 1065 1066 vaddr_t 1067 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz) 1068 { 1069 1070 if (p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN) 1071 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz); 1072 else 1073 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz); 1074 } 1075 1076 int 1077 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev, 1078 off_t off) 1079 { 1080 struct uvm_object *uobj; 1081 int error, flags, prot; 1082 1083 flags = MAP_SHARED; 1084 prot = VM_PROT_READ | VM_PROT_WRITE; 1085 if (*addrp) 1086 flags |= MAP_FIXED; 1087 else 1088 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1089 (vaddr_t)p->p_vmspace->vm_daddr, len); 1090 1091 uobj = udv_attach(dev, prot, off, len); 1092 if (uobj == NULL) 1093 return EINVAL; 1094 1095 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1096 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, 1097 uobj, off, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1098 return error; 1099 } 1100 1101 int 1102 uvm_mmap_anon(struct proc *p, void **addrp, size_t len) 1103 { 1104 int error, flags, prot; 1105 1106 flags = MAP_PRIVATE | MAP_ANON; 1107 prot = VM_PROT_READ | VM_PROT_WRITE; 1108 if (*addrp) 1109 flags |= MAP_FIXED; 1110 else 1111 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1112 (vaddr_t)p->p_vmspace->vm_daddr, len); 1113 1114 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1115 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, 1116 NULL, 0, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1117 return error; 1118 } 1119