1 /* $NetBSD: uvm_mmap.c,v 1.154 2015/11/26 13:15:34 martin Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993 The Regents of the University of California. 6 * Copyright (c) 1988 University of Utah. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * the Systems Programming Group of the University of Utah Computer 12 * Science Department. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94 40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp 41 */ 42 43 /* 44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap 45 * function. 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.154 2015/11/26 13:15:34 martin Exp $"); 50 51 #include "opt_compat_netbsd.h" 52 #include "opt_pax.h" 53 54 #include <sys/types.h> 55 #include <sys/file.h> 56 #include <sys/filedesc.h> 57 #include <sys/resourcevar.h> 58 #include <sys/mman.h> 59 60 #if defined(PAX_ASLR) || defined(PAX_MPROTECT) 61 #include <sys/pax.h> 62 #endif /* PAX_ASLR || PAX_MPROTECT */ 63 64 #include <sys/syscallargs.h> 65 66 #include <uvm/uvm.h> 67 #include <uvm/uvm_device.h> 68 69 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, 70 int, int, struct uvm_object *, voff_t, vsize_t); 71 72 static int 73 range_test(vaddr_t addr, vsize_t size, bool ismmap) 74 { 75 vaddr_t vm_min_address = VM_MIN_ADDRESS; 76 vaddr_t vm_max_address = VM_MAXUSER_ADDRESS; 77 vaddr_t eaddr = addr + size; 78 int res = 0; 79 80 if (addr < vm_min_address) 81 return EINVAL; 82 if (eaddr > vm_max_address) 83 return ismmap ? EFBIG : EINVAL; 84 if (addr > eaddr) /* no wrapping! */ 85 return ismmap ? EOVERFLOW : EINVAL; 86 87 #ifdef MD_MMAP_RANGE_TEST 88 res = MD_MMAP_RANGE_TEST(addr, eaddr); 89 #endif 90 91 return res; 92 } 93 94 /* 95 * unimplemented VM system calls: 96 */ 97 98 /* 99 * sys_sbrk: sbrk system call. 100 */ 101 102 /* ARGSUSED */ 103 int 104 sys_sbrk(struct lwp *l, const struct sys_sbrk_args *uap, register_t *retval) 105 { 106 /* { 107 syscallarg(intptr_t) incr; 108 } */ 109 110 return (ENOSYS); 111 } 112 113 /* 114 * sys_sstk: sstk system call. 115 */ 116 117 /* ARGSUSED */ 118 int 119 sys_sstk(struct lwp *l, const struct sys_sstk_args *uap, register_t *retval) 120 { 121 /* { 122 syscallarg(int) incr; 123 } */ 124 125 return (ENOSYS); 126 } 127 128 /* 129 * sys_mincore: determine if pages are in core or not. 130 */ 131 132 /* ARGSUSED */ 133 int 134 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, 135 register_t *retval) 136 { 137 /* { 138 syscallarg(void *) addr; 139 syscallarg(size_t) len; 140 syscallarg(char *) vec; 141 } */ 142 struct proc *p = l->l_proc; 143 struct vm_page *pg; 144 char *vec, pgi; 145 struct uvm_object *uobj; 146 struct vm_amap *amap; 147 struct vm_anon *anon; 148 struct vm_map_entry *entry; 149 vaddr_t start, end, lim; 150 struct vm_map *map; 151 vsize_t len; 152 int error = 0, npgs; 153 154 map = &p->p_vmspace->vm_map; 155 156 start = (vaddr_t)SCARG(uap, addr); 157 len = SCARG(uap, len); 158 vec = SCARG(uap, vec); 159 160 if (start & PAGE_MASK) 161 return (EINVAL); 162 len = round_page(len); 163 end = start + len; 164 if (end <= start) 165 return (EINVAL); 166 167 /* 168 * Lock down vec, so our returned status isn't outdated by 169 * storing the status byte for a page. 170 */ 171 172 npgs = len >> PAGE_SHIFT; 173 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE); 174 if (error) { 175 return error; 176 } 177 vm_map_lock_read(map); 178 179 if (uvm_map_lookup_entry(map, start, &entry) == false) { 180 error = ENOMEM; 181 goto out; 182 } 183 184 for (/* nothing */; 185 entry != &map->header && entry->start < end; 186 entry = entry->next) { 187 KASSERT(!UVM_ET_ISSUBMAP(entry)); 188 KASSERT(start >= entry->start); 189 190 /* Make sure there are no holes. */ 191 if (entry->end < end && 192 (entry->next == &map->header || 193 entry->next->start > entry->end)) { 194 error = ENOMEM; 195 goto out; 196 } 197 198 lim = end < entry->end ? end : entry->end; 199 200 /* 201 * Special case for objects with no "real" pages. Those 202 * are always considered resident (mapped devices). 203 */ 204 205 if (UVM_ET_ISOBJ(entry)) { 206 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); 207 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) { 208 for (/* nothing */; start < lim; 209 start += PAGE_SIZE, vec++) 210 subyte(vec, 1); 211 continue; 212 } 213 } 214 215 amap = entry->aref.ar_amap; /* upper layer */ 216 uobj = entry->object.uvm_obj; /* lower layer */ 217 218 if (amap != NULL) 219 amap_lock(amap); 220 if (uobj != NULL) 221 mutex_enter(uobj->vmobjlock); 222 223 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { 224 pgi = 0; 225 if (amap != NULL) { 226 /* Check the upper layer first. */ 227 anon = amap_lookup(&entry->aref, 228 start - entry->start); 229 /* Don't need to lock anon here. */ 230 if (anon != NULL && anon->an_page != NULL) { 231 232 /* 233 * Anon has the page for this entry 234 * offset. 235 */ 236 237 pgi = 1; 238 } 239 } 240 if (uobj != NULL && pgi == 0) { 241 /* Check the lower layer. */ 242 pg = uvm_pagelookup(uobj, 243 entry->offset + (start - entry->start)); 244 if (pg != NULL) { 245 246 /* 247 * Object has the page for this entry 248 * offset. 249 */ 250 251 pgi = 1; 252 } 253 } 254 (void) subyte(vec, pgi); 255 } 256 if (uobj != NULL) 257 mutex_exit(uobj->vmobjlock); 258 if (amap != NULL) 259 amap_unlock(amap); 260 } 261 262 out: 263 vm_map_unlock_read(map); 264 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs); 265 return (error); 266 } 267 268 /* 269 * sys_mmap: mmap system call. 270 * 271 * => file offset and address may not be page aligned 272 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE 273 * - if address isn't page aligned the mapping starts at trunc_page(addr) 274 * and the return value is adjusted up by the page offset. 275 */ 276 277 int 278 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval) 279 { 280 /* { 281 syscallarg(void *) addr; 282 syscallarg(size_t) len; 283 syscallarg(int) prot; 284 syscallarg(int) flags; 285 syscallarg(int) fd; 286 syscallarg(long) pad; 287 syscallarg(off_t) pos; 288 } */ 289 struct proc *p = l->l_proc; 290 vaddr_t addr; 291 off_t pos; 292 vsize_t size, pageoff, newsize; 293 vm_prot_t prot, maxprot; 294 int flags, fd, advice; 295 vaddr_t defaddr; 296 struct file *fp = NULL; 297 struct uvm_object *uobj; 298 int error; 299 #ifdef PAX_ASLR 300 vaddr_t orig_addr; 301 #endif /* PAX_ASLR */ 302 303 /* 304 * first, extract syscall args from the uap. 305 */ 306 307 addr = (vaddr_t)SCARG(uap, addr); 308 size = (vsize_t)SCARG(uap, len); 309 prot = SCARG(uap, prot) & VM_PROT_ALL; 310 flags = SCARG(uap, flags); 311 fd = SCARG(uap, fd); 312 pos = SCARG(uap, pos); 313 314 #ifdef PAX_ASLR 315 orig_addr = addr; 316 #endif /* PAX_ASLR */ 317 318 /* 319 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and 320 * validate the flags. 321 */ 322 if (flags & MAP_COPY) { 323 flags = (flags & ~MAP_COPY) | MAP_PRIVATE; 324 #if defined(COMPAT_10) && defined(__i386__) 325 /* 326 * Ancient kernel on x86 did not obey PROT_EXEC on i386 at least 327 * and ld.so did not turn it on. We take care of this on amd64 328 * in compat32. 329 */ 330 prot |= PROT_EXEC; 331 #endif 332 } 333 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE)) 334 return (EINVAL); 335 336 /* 337 * align file position and save offset. adjust size. 338 */ 339 340 pageoff = (pos & PAGE_MASK); 341 pos -= pageoff; 342 newsize = size + pageoff; /* add offset */ 343 newsize = (vsize_t)round_page(newsize); /* round up */ 344 345 if (newsize < size) 346 return (ENOMEM); 347 size = newsize; 348 349 /* 350 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 351 */ 352 if (flags & MAP_FIXED) { 353 354 /* ensure address and file offset are aligned properly */ 355 addr -= pageoff; 356 if (addr & PAGE_MASK) 357 return (EINVAL); 358 359 error = range_test(addr, size, true); 360 if (error) { 361 return error; 362 } 363 364 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) { 365 366 /* 367 * not fixed: make sure we skip over the largest 368 * possible heap for non-topdown mapping arrangements. 369 * we will refine our guess later (e.g. to account for 370 * VAC, etc) 371 */ 372 373 defaddr = p->p_emul->e_vm_default_addr(p, 374 (vaddr_t)p->p_vmspace->vm_daddr, size, 375 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 376 377 if (addr == 0 || 378 !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN)) 379 addr = MAX(addr, defaddr); 380 else 381 addr = MIN(addr, defaddr); 382 } 383 384 /* 385 * check for file mappings (i.e. not anonymous) and verify file. 386 */ 387 388 advice = UVM_ADV_NORMAL; 389 if ((flags & MAP_ANON) == 0) { 390 if ((fp = fd_getfile(fd)) == NULL) 391 return (EBADF); 392 393 if (fp->f_ops->fo_mmap == NULL) { 394 error = ENODEV; 395 goto out; 396 } 397 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags, 398 &advice, &uobj, &maxprot); 399 if (error) { 400 goto out; 401 } 402 if (uobj == NULL) { 403 flags |= MAP_ANON; 404 fd_putfile(fd); 405 fp = NULL; 406 goto is_anon; 407 } 408 } else { /* MAP_ANON case */ 409 /* 410 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0? 411 */ 412 if (fd != -1) 413 return (EINVAL); 414 415 is_anon: /* label for SunOS style /dev/zero */ 416 uobj = NULL; 417 maxprot = VM_PROT_ALL; 418 pos = 0; 419 } 420 421 #ifdef PAX_MPROTECT 422 pax_mprotect(l, &prot, &maxprot); 423 #endif /* PAX_MPROTECT */ 424 425 #ifdef PAX_ASLR 426 pax_aslr_mmap(l, &addr, orig_addr, flags); 427 #endif /* PAX_ASLR */ 428 429 /* 430 * now let kernel internal function uvm_mmap do the work. 431 */ 432 433 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 434 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 435 436 /* remember to add offset */ 437 *retval = (register_t)(addr + pageoff); 438 439 out: 440 if (fp != NULL) 441 fd_putfile(fd); 442 443 return (error); 444 } 445 446 /* 447 * sys___msync13: the msync system call (a front-end for flush) 448 */ 449 450 int 451 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap, 452 register_t *retval) 453 { 454 /* { 455 syscallarg(void *) addr; 456 syscallarg(size_t) len; 457 syscallarg(int) flags; 458 } */ 459 struct proc *p = l->l_proc; 460 vaddr_t addr; 461 vsize_t size, pageoff; 462 struct vm_map *map; 463 int error, rv, flags, uvmflags; 464 465 /* 466 * extract syscall args from the uap 467 */ 468 469 addr = (vaddr_t)SCARG(uap, addr); 470 size = (vsize_t)SCARG(uap, len); 471 flags = SCARG(uap, flags); 472 473 /* sanity check flags */ 474 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || 475 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || 476 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) 477 return (EINVAL); 478 if ((flags & (MS_ASYNC | MS_SYNC)) == 0) 479 flags |= MS_SYNC; 480 481 /* 482 * align the address to a page boundary and adjust the size accordingly. 483 */ 484 485 pageoff = (addr & PAGE_MASK); 486 addr -= pageoff; 487 size += pageoff; 488 size = (vsize_t)round_page(size); 489 490 error = range_test(addr, size, false); 491 if (error) 492 return error; 493 494 /* 495 * get map 496 */ 497 498 map = &p->p_vmspace->vm_map; 499 500 /* 501 * XXXCDC: do we really need this semantic? 502 * 503 * XXX Gak! If size is zero we are supposed to sync "all modified 504 * pages with the region containing addr". Unfortunately, we 505 * don't really keep track of individual mmaps so we approximate 506 * by flushing the range of the map entry containing addr. 507 * This can be incorrect if the region splits or is coalesced 508 * with a neighbor. 509 */ 510 511 if (size == 0) { 512 struct vm_map_entry *entry; 513 514 vm_map_lock_read(map); 515 rv = uvm_map_lookup_entry(map, addr, &entry); 516 if (rv == true) { 517 addr = entry->start; 518 size = entry->end - entry->start; 519 } 520 vm_map_unlock_read(map); 521 if (rv == false) 522 return (EINVAL); 523 } 524 525 /* 526 * translate MS_ flags into PGO_ flags 527 */ 528 529 uvmflags = PGO_CLEANIT; 530 if (flags & MS_INVALIDATE) 531 uvmflags |= PGO_FREE; 532 if (flags & MS_SYNC) 533 uvmflags |= PGO_SYNCIO; 534 535 error = uvm_map_clean(map, addr, addr+size, uvmflags); 536 return error; 537 } 538 539 /* 540 * sys_munmap: unmap a users memory 541 */ 542 543 int 544 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval) 545 { 546 /* { 547 syscallarg(void *) addr; 548 syscallarg(size_t) len; 549 } */ 550 struct proc *p = l->l_proc; 551 vaddr_t addr; 552 vsize_t size, pageoff; 553 struct vm_map *map; 554 struct vm_map_entry *dead_entries; 555 int error; 556 557 /* 558 * get syscall args. 559 */ 560 561 addr = (vaddr_t)SCARG(uap, addr); 562 size = (vsize_t)SCARG(uap, len); 563 564 /* 565 * align the address to a page boundary and adjust the size accordingly. 566 */ 567 568 pageoff = (addr & PAGE_MASK); 569 addr -= pageoff; 570 size += pageoff; 571 size = (vsize_t)round_page(size); 572 573 if (size == 0) 574 return (0); 575 576 error = range_test(addr, size, false); 577 if (error) 578 return error; 579 580 map = &p->p_vmspace->vm_map; 581 582 /* 583 * interesting system call semantic: make sure entire range is 584 * allocated before allowing an unmap. 585 */ 586 587 vm_map_lock(map); 588 #if 0 589 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) { 590 vm_map_unlock(map); 591 return (EINVAL); 592 } 593 #endif 594 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0); 595 vm_map_unlock(map); 596 if (dead_entries != NULL) 597 uvm_unmap_detach(dead_entries, 0); 598 return (0); 599 } 600 601 /* 602 * sys_mprotect: the mprotect system call 603 */ 604 605 int 606 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap, 607 register_t *retval) 608 { 609 /* { 610 syscallarg(void *) addr; 611 syscallarg(size_t) len; 612 syscallarg(int) prot; 613 } */ 614 struct proc *p = l->l_proc; 615 vaddr_t addr; 616 vsize_t size, pageoff; 617 vm_prot_t prot; 618 int error; 619 620 /* 621 * extract syscall args from uap 622 */ 623 624 addr = (vaddr_t)SCARG(uap, addr); 625 size = (vsize_t)SCARG(uap, len); 626 prot = SCARG(uap, prot) & VM_PROT_ALL; 627 628 /* 629 * align the address to a page boundary and adjust the size accordingly. 630 */ 631 632 pageoff = (addr & PAGE_MASK); 633 addr -= pageoff; 634 size += pageoff; 635 size = round_page(size); 636 637 error = range_test(addr, size, false); 638 if (error) 639 return error; 640 641 error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 642 false); 643 return error; 644 } 645 646 /* 647 * sys_minherit: the minherit system call 648 */ 649 650 int 651 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap, 652 register_t *retval) 653 { 654 /* { 655 syscallarg(void *) addr; 656 syscallarg(int) len; 657 syscallarg(int) inherit; 658 } */ 659 struct proc *p = l->l_proc; 660 vaddr_t addr; 661 vsize_t size, pageoff; 662 vm_inherit_t inherit; 663 int error; 664 665 addr = (vaddr_t)SCARG(uap, addr); 666 size = (vsize_t)SCARG(uap, len); 667 inherit = SCARG(uap, inherit); 668 669 /* 670 * align the address to a page boundary and adjust the size accordingly. 671 */ 672 673 pageoff = (addr & PAGE_MASK); 674 addr -= pageoff; 675 size += pageoff; 676 size = (vsize_t)round_page(size); 677 678 error = range_test(addr, size, false); 679 if (error) 680 return error; 681 682 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size, 683 inherit); 684 return error; 685 } 686 687 /* 688 * sys_madvise: give advice about memory usage. 689 */ 690 691 /* ARGSUSED */ 692 int 693 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap, 694 register_t *retval) 695 { 696 /* { 697 syscallarg(void *) addr; 698 syscallarg(size_t) len; 699 syscallarg(int) behav; 700 } */ 701 struct proc *p = l->l_proc; 702 vaddr_t addr; 703 vsize_t size, pageoff; 704 int advice, error; 705 706 addr = (vaddr_t)SCARG(uap, addr); 707 size = (vsize_t)SCARG(uap, len); 708 advice = SCARG(uap, behav); 709 710 /* 711 * align the address to a page boundary, and adjust the size accordingly 712 */ 713 714 pageoff = (addr & PAGE_MASK); 715 addr -= pageoff; 716 size += pageoff; 717 size = (vsize_t)round_page(size); 718 719 error = range_test(addr, size, false); 720 if (error) 721 return error; 722 723 switch (advice) { 724 case MADV_NORMAL: 725 case MADV_RANDOM: 726 case MADV_SEQUENTIAL: 727 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size, 728 advice); 729 break; 730 731 case MADV_WILLNEED: 732 733 /* 734 * Activate all these pages, pre-faulting them in if 735 * necessary. 736 */ 737 error = uvm_map_willneed(&p->p_vmspace->vm_map, 738 addr, addr + size); 739 break; 740 741 case MADV_DONTNEED: 742 743 /* 744 * Deactivate all these pages. We don't need them 745 * any more. We don't, however, toss the data in 746 * the pages. 747 */ 748 749 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 750 PGO_DEACTIVATE); 751 break; 752 753 case MADV_FREE: 754 755 /* 756 * These pages contain no valid data, and may be 757 * garbage-collected. Toss all resources, including 758 * any swap space in use. 759 */ 760 761 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 762 PGO_FREE); 763 break; 764 765 case MADV_SPACEAVAIL: 766 767 /* 768 * XXXMRG What is this? I think it's: 769 * 770 * Ensure that we have allocated backing-store 771 * for these pages. 772 * 773 * This is going to require changes to the page daemon, 774 * as it will free swap space allocated to pages in core. 775 * There's also what to do for device/file/anonymous memory. 776 */ 777 778 return (EINVAL); 779 780 default: 781 return (EINVAL); 782 } 783 784 return error; 785 } 786 787 /* 788 * sys_mlock: memory lock 789 */ 790 791 int 792 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval) 793 { 794 /* { 795 syscallarg(const void *) addr; 796 syscallarg(size_t) len; 797 } */ 798 struct proc *p = l->l_proc; 799 vaddr_t addr; 800 vsize_t size, pageoff; 801 int error; 802 803 /* 804 * extract syscall args from uap 805 */ 806 807 addr = (vaddr_t)SCARG(uap, addr); 808 size = (vsize_t)SCARG(uap, len); 809 810 /* 811 * align the address to a page boundary and adjust the size accordingly 812 */ 813 814 pageoff = (addr & PAGE_MASK); 815 addr -= pageoff; 816 size += pageoff; 817 size = (vsize_t)round_page(size); 818 819 error = range_test(addr, size, false); 820 if (error) 821 return error; 822 823 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) 824 return (EAGAIN); 825 826 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 827 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 828 return (EAGAIN); 829 830 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false, 831 0); 832 if (error == EFAULT) 833 error = ENOMEM; 834 return error; 835 } 836 837 /* 838 * sys_munlock: unlock wired pages 839 */ 840 841 int 842 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap, 843 register_t *retval) 844 { 845 /* { 846 syscallarg(const void *) addr; 847 syscallarg(size_t) len; 848 } */ 849 struct proc *p = l->l_proc; 850 vaddr_t addr; 851 vsize_t size, pageoff; 852 int error; 853 854 /* 855 * extract syscall args from uap 856 */ 857 858 addr = (vaddr_t)SCARG(uap, addr); 859 size = (vsize_t)SCARG(uap, len); 860 861 /* 862 * align the address to a page boundary, and adjust the size accordingly 863 */ 864 865 pageoff = (addr & PAGE_MASK); 866 addr -= pageoff; 867 size += pageoff; 868 size = (vsize_t)round_page(size); 869 870 error = range_test(addr, size, false); 871 if (error) 872 return error; 873 874 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 875 0); 876 if (error == EFAULT) 877 error = ENOMEM; 878 return error; 879 } 880 881 /* 882 * sys_mlockall: lock all pages mapped into an address space. 883 */ 884 885 int 886 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap, 887 register_t *retval) 888 { 889 /* { 890 syscallarg(int) flags; 891 } */ 892 struct proc *p = l->l_proc; 893 int error, flags; 894 895 flags = SCARG(uap, flags); 896 897 if (flags == 0 || 898 (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0) 899 return (EINVAL); 900 901 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, 902 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 903 return (error); 904 } 905 906 /* 907 * sys_munlockall: unlock all pages mapped into an address space. 908 */ 909 910 int 911 sys_munlockall(struct lwp *l, const void *v, register_t *retval) 912 { 913 struct proc *p = l->l_proc; 914 915 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0); 916 return (0); 917 } 918 919 /* 920 * uvm_mmap: internal version of mmap 921 * 922 * - used by sys_mmap and various framebuffers 923 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON 924 * - caller must page-align the file offset 925 */ 926 927 int 928 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 929 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj, 930 voff_t foff, vsize_t locklimit) 931 { 932 vaddr_t align = 0; 933 int error; 934 uvm_flag_t uvmflag = 0; 935 936 /* 937 * check params 938 */ 939 940 if (size == 0) 941 return(0); 942 if (foff & PAGE_MASK) 943 return(EINVAL); 944 if ((prot & maxprot) != prot) 945 return(EINVAL); 946 947 /* 948 * for non-fixed mappings, round off the suggested address. 949 * for fixed mappings, check alignment and zap old mappings. 950 */ 951 952 if ((flags & MAP_FIXED) == 0) { 953 *addr = round_page(*addr); 954 } else { 955 if (*addr & PAGE_MASK) 956 return(EINVAL); 957 uvmflag |= UVM_FLAG_FIXED; 958 (void) uvm_unmap(map, *addr, *addr + size); 959 } 960 961 /* 962 * Try to see if any requested alignment can even be attemped. 963 * Make sure we can express the alignment (asking for a >= 4GB 964 * alignment on an ILP32 architecure make no sense) and the 965 * alignment is at least for a page sized quanitiy. If the 966 * request was for a fixed mapping, make sure supplied address 967 * adheres to the request alignment. 968 */ 969 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT; 970 if (align) { 971 if (align >= sizeof(vaddr_t) * NBBY) 972 return(EINVAL); 973 align = 1L << align; 974 if (align < PAGE_SIZE) 975 return(EINVAL); 976 if (align >= vm_map_max(map)) 977 return(ENOMEM); 978 if (flags & MAP_FIXED) { 979 if ((*addr & (align-1)) != 0) 980 return(EINVAL); 981 align = 0; 982 } 983 } 984 985 /* 986 * check resource limits 987 */ 988 989 if (!VM_MAP_IS_KERNEL(map) && 990 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) > 991 curproc->p_rlimit[RLIMIT_AS].rlim_cur)) 992 return ENOMEM; 993 994 /* 995 * handle anon vs. non-anon mappings. for non-anon mappings attach 996 * to underlying vm object. 997 */ 998 999 if (flags & MAP_ANON) { 1000 KASSERT(uobj == NULL); 1001 foff = UVM_UNKNOWN_OFFSET; 1002 if ((flags & MAP_SHARED) == 0) 1003 /* XXX: defer amap create */ 1004 uvmflag |= UVM_FLAG_COPYONW; 1005 else 1006 /* shared: create amap now */ 1007 uvmflag |= UVM_FLAG_OVERLAY; 1008 1009 } else { 1010 KASSERT(uobj != NULL); 1011 if ((flags & MAP_SHARED) == 0) { 1012 uvmflag |= UVM_FLAG_COPYONW; 1013 } 1014 } 1015 1016 uvmflag = UVM_MAPFLAG(prot, maxprot, 1017 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, 1018 advice, uvmflag); 1019 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag); 1020 if (error) { 1021 if (uobj) 1022 uobj->pgops->pgo_detach(uobj); 1023 return error; 1024 } 1025 1026 /* 1027 * POSIX 1003.1b -- if our address space was configured 1028 * to lock all future mappings, wire the one we just made. 1029 * 1030 * Also handle the MAP_WIRED flag here. 1031 */ 1032 1033 if (prot == VM_PROT_NONE) { 1034 1035 /* 1036 * No more work to do in this case. 1037 */ 1038 1039 return (0); 1040 } 1041 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) { 1042 vm_map_lock(map); 1043 if (atop(size) + uvmexp.wired > uvmexp.wiredmax || 1044 (locklimit != 0 && 1045 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > 1046 locklimit)) { 1047 vm_map_unlock(map); 1048 uvm_unmap(map, *addr, *addr + size); 1049 return ENOMEM; 1050 } 1051 1052 /* 1053 * uvm_map_pageable() always returns the map unlocked. 1054 */ 1055 1056 error = uvm_map_pageable(map, *addr, *addr + size, 1057 false, UVM_LK_ENTER); 1058 if (error) { 1059 uvm_unmap(map, *addr, *addr + size); 1060 return error; 1061 } 1062 return (0); 1063 } 1064 return 0; 1065 } 1066 1067 vaddr_t 1068 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown) 1069 { 1070 1071 if (topdown) 1072 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz); 1073 else 1074 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz); 1075 } 1076 1077 int 1078 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev, 1079 off_t off) 1080 { 1081 struct uvm_object *uobj; 1082 int error, flags, prot; 1083 1084 flags = MAP_SHARED; 1085 prot = VM_PROT_READ | VM_PROT_WRITE; 1086 if (*addrp) 1087 flags |= MAP_FIXED; 1088 else 1089 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1090 (vaddr_t)p->p_vmspace->vm_daddr, len, 1091 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1092 1093 uobj = udv_attach(dev, prot, off, len); 1094 if (uobj == NULL) 1095 return EINVAL; 1096 1097 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1098 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, 1099 uobj, off, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1100 return error; 1101 } 1102 1103 int 1104 uvm_mmap_anon(struct proc *p, void **addrp, size_t len) 1105 { 1106 int error, flags, prot; 1107 1108 flags = MAP_PRIVATE | MAP_ANON; 1109 prot = VM_PROT_READ | VM_PROT_WRITE; 1110 if (*addrp) 1111 flags |= MAP_FIXED; 1112 else 1113 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1114 (vaddr_t)p->p_vmspace->vm_daddr, len, 1115 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1116 1117 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1118 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, 1119 NULL, 0, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1120 return error; 1121 } 1122