1 /* $NetBSD: uvm_mmap.c,v 1.159 2016/06/01 12:14:08 pgoyette Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993 The Regents of the University of California. 6 * Copyright (c) 1988 University of Utah. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * the Systems Programming Group of the University of Utah Computer 12 * Science Department. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94 40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp 41 */ 42 43 /* 44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap 45 * function. 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.159 2016/06/01 12:14:08 pgoyette Exp $"); 50 51 #include "opt_compat_netbsd.h" 52 #include "opt_pax.h" 53 54 #include <sys/types.h> 55 #include <sys/file.h> 56 #include <sys/filedesc.h> 57 #include <sys/resourcevar.h> 58 #include <sys/mman.h> 59 #include <sys/pax.h> 60 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm.h> 64 #include <uvm/uvm_device.h> 65 66 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, 67 int, int, struct uvm_object *, voff_t, vsize_t); 68 69 static int 70 range_test(struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap) 71 { 72 vaddr_t vm_min_address = vm_map_min(map); 73 vaddr_t vm_max_address = vm_map_max(map); 74 vaddr_t eaddr = addr + size; 75 int res = 0; 76 77 if (addr < vm_min_address) 78 return EINVAL; 79 if (eaddr > vm_max_address) 80 return ismmap ? EFBIG : EINVAL; 81 if (addr > eaddr) /* no wrapping! */ 82 return ismmap ? EOVERFLOW : EINVAL; 83 84 #ifdef MD_MMAP_RANGE_TEST 85 res = MD_MMAP_RANGE_TEST(addr, eaddr); 86 #endif 87 88 return res; 89 } 90 91 /* 92 * unimplemented VM system calls: 93 */ 94 95 /* 96 * sys_sbrk: sbrk system call. 97 */ 98 99 /* ARGSUSED */ 100 int 101 sys_sbrk(struct lwp *l, const struct sys_sbrk_args *uap, register_t *retval) 102 { 103 /* { 104 syscallarg(intptr_t) incr; 105 } */ 106 107 return (ENOSYS); 108 } 109 110 /* 111 * sys_sstk: sstk system call. 112 */ 113 114 /* ARGSUSED */ 115 int 116 sys_sstk(struct lwp *l, const struct sys_sstk_args *uap, register_t *retval) 117 { 118 /* { 119 syscallarg(int) incr; 120 } */ 121 122 return (ENOSYS); 123 } 124 125 /* 126 * sys_mincore: determine if pages are in core or not. 127 */ 128 129 /* ARGSUSED */ 130 int 131 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, 132 register_t *retval) 133 { 134 /* { 135 syscallarg(void *) addr; 136 syscallarg(size_t) len; 137 syscallarg(char *) vec; 138 } */ 139 struct proc *p = l->l_proc; 140 struct vm_page *pg; 141 char *vec, pgi; 142 struct uvm_object *uobj; 143 struct vm_amap *amap; 144 struct vm_anon *anon; 145 struct vm_map_entry *entry; 146 vaddr_t start, end, lim; 147 struct vm_map *map; 148 vsize_t len; 149 int error = 0, npgs; 150 151 map = &p->p_vmspace->vm_map; 152 153 start = (vaddr_t)SCARG(uap, addr); 154 len = SCARG(uap, len); 155 vec = SCARG(uap, vec); 156 157 if (start & PAGE_MASK) 158 return (EINVAL); 159 len = round_page(len); 160 end = start + len; 161 if (end <= start) 162 return (EINVAL); 163 164 /* 165 * Lock down vec, so our returned status isn't outdated by 166 * storing the status byte for a page. 167 */ 168 169 npgs = len >> PAGE_SHIFT; 170 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE); 171 if (error) { 172 return error; 173 } 174 vm_map_lock_read(map); 175 176 if (uvm_map_lookup_entry(map, start, &entry) == false) { 177 error = ENOMEM; 178 goto out; 179 } 180 181 for (/* nothing */; 182 entry != &map->header && entry->start < end; 183 entry = entry->next) { 184 KASSERT(!UVM_ET_ISSUBMAP(entry)); 185 KASSERT(start >= entry->start); 186 187 /* Make sure there are no holes. */ 188 if (entry->end < end && 189 (entry->next == &map->header || 190 entry->next->start > entry->end)) { 191 error = ENOMEM; 192 goto out; 193 } 194 195 lim = end < entry->end ? end : entry->end; 196 197 /* 198 * Special case for objects with no "real" pages. Those 199 * are always considered resident (mapped devices). 200 */ 201 202 if (UVM_ET_ISOBJ(entry)) { 203 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); 204 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) { 205 for (/* nothing */; start < lim; 206 start += PAGE_SIZE, vec++) 207 subyte(vec, 1); 208 continue; 209 } 210 } 211 212 amap = entry->aref.ar_amap; /* upper layer */ 213 uobj = entry->object.uvm_obj; /* lower layer */ 214 215 if (amap != NULL) 216 amap_lock(amap); 217 if (uobj != NULL) 218 mutex_enter(uobj->vmobjlock); 219 220 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { 221 pgi = 0; 222 if (amap != NULL) { 223 /* Check the upper layer first. */ 224 anon = amap_lookup(&entry->aref, 225 start - entry->start); 226 /* Don't need to lock anon here. */ 227 if (anon != NULL && anon->an_page != NULL) { 228 229 /* 230 * Anon has the page for this entry 231 * offset. 232 */ 233 234 pgi = 1; 235 } 236 } 237 if (uobj != NULL && pgi == 0) { 238 /* Check the lower layer. */ 239 pg = uvm_pagelookup(uobj, 240 entry->offset + (start - entry->start)); 241 if (pg != NULL) { 242 243 /* 244 * Object has the page for this entry 245 * offset. 246 */ 247 248 pgi = 1; 249 } 250 } 251 (void) subyte(vec, pgi); 252 } 253 if (uobj != NULL) 254 mutex_exit(uobj->vmobjlock); 255 if (amap != NULL) 256 amap_unlock(amap); 257 } 258 259 out: 260 vm_map_unlock_read(map); 261 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs); 262 return (error); 263 } 264 265 /* 266 * sys_mmap: mmap system call. 267 * 268 * => file offset and address may not be page aligned 269 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE 270 * - if address isn't page aligned the mapping starts at trunc_page(addr) 271 * and the return value is adjusted up by the page offset. 272 */ 273 274 int 275 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval) 276 { 277 /* { 278 syscallarg(void *) addr; 279 syscallarg(size_t) len; 280 syscallarg(int) prot; 281 syscallarg(int) flags; 282 syscallarg(int) fd; 283 syscallarg(long) pad; 284 syscallarg(off_t) pos; 285 } */ 286 struct proc *p = l->l_proc; 287 vaddr_t addr; 288 off_t pos; 289 vsize_t size, pageoff, newsize; 290 vm_prot_t prot, maxprot; 291 int flags, fd, advice; 292 vaddr_t defaddr; 293 struct file *fp = NULL; 294 struct uvm_object *uobj; 295 int error; 296 #ifdef PAX_ASLR 297 vaddr_t orig_addr; 298 #endif /* PAX_ASLR */ 299 300 /* 301 * first, extract syscall args from the uap. 302 */ 303 304 addr = (vaddr_t)SCARG(uap, addr); 305 size = (vsize_t)SCARG(uap, len); 306 prot = SCARG(uap, prot) & VM_PROT_ALL; 307 flags = SCARG(uap, flags); 308 fd = SCARG(uap, fd); 309 pos = SCARG(uap, pos); 310 311 #ifdef PAX_ASLR 312 orig_addr = addr; 313 #endif /* PAX_ASLR */ 314 315 /* 316 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and 317 * validate the flags. 318 */ 319 if (flags & MAP_COPY) { 320 flags = (flags & ~MAP_COPY) | MAP_PRIVATE; 321 #if defined(COMPAT_10) && defined(__i386__) 322 /* 323 * Ancient kernel on x86 did not obey PROT_EXEC on i386 at least 324 * and ld.so did not turn it on. We take care of this on amd64 325 * in compat32. 326 */ 327 prot |= PROT_EXEC; 328 #endif 329 } 330 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE)) 331 return (EINVAL); 332 333 /* 334 * align file position and save offset. adjust size. 335 */ 336 337 pageoff = (pos & PAGE_MASK); 338 pos -= pageoff; 339 newsize = size + pageoff; /* add offset */ 340 newsize = (vsize_t)round_page(newsize); /* round up */ 341 342 if (newsize < size) 343 return (ENOMEM); 344 size = newsize; 345 346 /* 347 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 348 */ 349 if (flags & MAP_FIXED) { 350 351 /* ensure address and file offset are aligned properly */ 352 addr -= pageoff; 353 if (addr & PAGE_MASK) 354 return (EINVAL); 355 356 error = range_test(&p->p_vmspace->vm_map, addr, size, true); 357 if (error) { 358 return error; 359 } 360 361 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) { 362 363 /* 364 * not fixed: make sure we skip over the largest 365 * possible heap for non-topdown mapping arrangements. 366 * we will refine our guess later (e.g. to account for 367 * VAC, etc) 368 */ 369 370 defaddr = p->p_emul->e_vm_default_addr(p, 371 (vaddr_t)p->p_vmspace->vm_daddr, size, 372 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 373 374 if (addr == 0 || 375 !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN)) 376 addr = MAX(addr, defaddr); 377 else 378 addr = MIN(addr, defaddr); 379 } 380 381 /* 382 * check for file mappings (i.e. not anonymous) and verify file. 383 */ 384 385 advice = UVM_ADV_NORMAL; 386 if ((flags & MAP_ANON) == 0) { 387 if ((fp = fd_getfile(fd)) == NULL) 388 return (EBADF); 389 390 if (fp->f_ops->fo_mmap == NULL) { 391 error = ENODEV; 392 goto out; 393 } 394 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags, 395 &advice, &uobj, &maxprot); 396 if (error) { 397 goto out; 398 } 399 if (uobj == NULL) { 400 flags |= MAP_ANON; 401 fd_putfile(fd); 402 fp = NULL; 403 goto is_anon; 404 } 405 } else { /* MAP_ANON case */ 406 /* 407 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0? 408 */ 409 if (fd != -1) 410 return (EINVAL); 411 412 is_anon: /* label for SunOS style /dev/zero */ 413 uobj = NULL; 414 maxprot = VM_PROT_ALL; 415 pos = 0; 416 } 417 418 PAX_MPROTECT_ADJUST(l, &prot, &maxprot); 419 420 pax_aslr_mmap(l, &addr, orig_addr, flags); 421 422 /* 423 * now let kernel internal function uvm_mmap do the work. 424 */ 425 426 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 427 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 428 429 /* remember to add offset */ 430 *retval = (register_t)(addr + pageoff); 431 432 out: 433 if (fp != NULL) 434 fd_putfile(fd); 435 436 return (error); 437 } 438 439 /* 440 * sys___msync13: the msync system call (a front-end for flush) 441 */ 442 443 int 444 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap, 445 register_t *retval) 446 { 447 /* { 448 syscallarg(void *) addr; 449 syscallarg(size_t) len; 450 syscallarg(int) flags; 451 } */ 452 struct proc *p = l->l_proc; 453 vaddr_t addr; 454 vsize_t size, pageoff; 455 struct vm_map *map; 456 int error, flags, uvmflags; 457 bool rv; 458 459 /* 460 * extract syscall args from the uap 461 */ 462 463 addr = (vaddr_t)SCARG(uap, addr); 464 size = (vsize_t)SCARG(uap, len); 465 flags = SCARG(uap, flags); 466 467 /* sanity check flags */ 468 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || 469 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || 470 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) 471 return (EINVAL); 472 if ((flags & (MS_ASYNC | MS_SYNC)) == 0) 473 flags |= MS_SYNC; 474 475 /* 476 * align the address to a page boundary and adjust the size accordingly. 477 */ 478 479 pageoff = (addr & PAGE_MASK); 480 addr -= pageoff; 481 size += pageoff; 482 size = (vsize_t)round_page(size); 483 484 485 /* 486 * get map 487 */ 488 map = &p->p_vmspace->vm_map; 489 490 error = range_test(map, addr, size, false); 491 if (error) 492 return error; 493 494 /* 495 * XXXCDC: do we really need this semantic? 496 * 497 * XXX Gak! If size is zero we are supposed to sync "all modified 498 * pages with the region containing addr". Unfortunately, we 499 * don't really keep track of individual mmaps so we approximate 500 * by flushing the range of the map entry containing addr. 501 * This can be incorrect if the region splits or is coalesced 502 * with a neighbor. 503 */ 504 505 if (size == 0) { 506 struct vm_map_entry *entry; 507 508 vm_map_lock_read(map); 509 rv = uvm_map_lookup_entry(map, addr, &entry); 510 if (rv == true) { 511 addr = entry->start; 512 size = entry->end - entry->start; 513 } 514 vm_map_unlock_read(map); 515 if (rv == false) 516 return (EINVAL); 517 } 518 519 /* 520 * translate MS_ flags into PGO_ flags 521 */ 522 523 uvmflags = PGO_CLEANIT; 524 if (flags & MS_INVALIDATE) 525 uvmflags |= PGO_FREE; 526 if (flags & MS_SYNC) 527 uvmflags |= PGO_SYNCIO; 528 529 error = uvm_map_clean(map, addr, addr+size, uvmflags); 530 return error; 531 } 532 533 /* 534 * sys_munmap: unmap a users memory 535 */ 536 537 int 538 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval) 539 { 540 /* { 541 syscallarg(void *) addr; 542 syscallarg(size_t) len; 543 } */ 544 struct proc *p = l->l_proc; 545 vaddr_t addr; 546 vsize_t size, pageoff; 547 struct vm_map *map; 548 struct vm_map_entry *dead_entries; 549 int error; 550 551 /* 552 * get syscall args. 553 */ 554 555 addr = (vaddr_t)SCARG(uap, addr); 556 size = (vsize_t)SCARG(uap, len); 557 558 /* 559 * align the address to a page boundary and adjust the size accordingly. 560 */ 561 562 pageoff = (addr & PAGE_MASK); 563 addr -= pageoff; 564 size += pageoff; 565 size = (vsize_t)round_page(size); 566 567 if (size == 0) 568 return (0); 569 570 map = &p->p_vmspace->vm_map; 571 572 error = range_test(map, addr, size, false); 573 if (error) 574 return error; 575 576 /* 577 * interesting system call semantic: make sure entire range is 578 * allocated before allowing an unmap. 579 */ 580 581 vm_map_lock(map); 582 #if 0 583 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) { 584 vm_map_unlock(map); 585 return (EINVAL); 586 } 587 #endif 588 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0); 589 vm_map_unlock(map); 590 if (dead_entries != NULL) 591 uvm_unmap_detach(dead_entries, 0); 592 return (0); 593 } 594 595 /* 596 * sys_mprotect: the mprotect system call 597 */ 598 599 int 600 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap, 601 register_t *retval) 602 { 603 /* { 604 syscallarg(void *) addr; 605 syscallarg(size_t) len; 606 syscallarg(int) prot; 607 } */ 608 struct proc *p = l->l_proc; 609 vaddr_t addr; 610 vsize_t size, pageoff; 611 vm_prot_t prot; 612 int error; 613 614 /* 615 * extract syscall args from uap 616 */ 617 618 addr = (vaddr_t)SCARG(uap, addr); 619 size = (vsize_t)SCARG(uap, len); 620 prot = SCARG(uap, prot) & VM_PROT_ALL; 621 622 /* 623 * align the address to a page boundary and adjust the size accordingly. 624 */ 625 626 pageoff = (addr & PAGE_MASK); 627 addr -= pageoff; 628 size += pageoff; 629 size = round_page(size); 630 631 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 632 if (error) 633 return error; 634 635 error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 636 false); 637 return error; 638 } 639 640 /* 641 * sys_minherit: the minherit system call 642 */ 643 644 int 645 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap, 646 register_t *retval) 647 { 648 /* { 649 syscallarg(void *) addr; 650 syscallarg(int) len; 651 syscallarg(int) inherit; 652 } */ 653 struct proc *p = l->l_proc; 654 vaddr_t addr; 655 vsize_t size, pageoff; 656 vm_inherit_t inherit; 657 int error; 658 659 addr = (vaddr_t)SCARG(uap, addr); 660 size = (vsize_t)SCARG(uap, len); 661 inherit = SCARG(uap, inherit); 662 663 /* 664 * align the address to a page boundary and adjust the size accordingly. 665 */ 666 667 pageoff = (addr & PAGE_MASK); 668 addr -= pageoff; 669 size += pageoff; 670 size = (vsize_t)round_page(size); 671 672 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 673 if (error) 674 return error; 675 676 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size, 677 inherit); 678 return error; 679 } 680 681 /* 682 * sys_madvise: give advice about memory usage. 683 */ 684 685 /* ARGSUSED */ 686 int 687 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap, 688 register_t *retval) 689 { 690 /* { 691 syscallarg(void *) addr; 692 syscallarg(size_t) len; 693 syscallarg(int) behav; 694 } */ 695 struct proc *p = l->l_proc; 696 vaddr_t addr; 697 vsize_t size, pageoff; 698 int advice, error; 699 700 addr = (vaddr_t)SCARG(uap, addr); 701 size = (vsize_t)SCARG(uap, len); 702 advice = SCARG(uap, behav); 703 704 /* 705 * align the address to a page boundary, and adjust the size accordingly 706 */ 707 708 pageoff = (addr & PAGE_MASK); 709 addr -= pageoff; 710 size += pageoff; 711 size = (vsize_t)round_page(size); 712 713 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 714 if (error) 715 return error; 716 717 switch (advice) { 718 case MADV_NORMAL: 719 case MADV_RANDOM: 720 case MADV_SEQUENTIAL: 721 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size, 722 advice); 723 break; 724 725 case MADV_WILLNEED: 726 727 /* 728 * Activate all these pages, pre-faulting them in if 729 * necessary. 730 */ 731 error = uvm_map_willneed(&p->p_vmspace->vm_map, 732 addr, addr + size); 733 break; 734 735 case MADV_DONTNEED: 736 737 /* 738 * Deactivate all these pages. We don't need them 739 * any more. We don't, however, toss the data in 740 * the pages. 741 */ 742 743 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 744 PGO_DEACTIVATE); 745 break; 746 747 case MADV_FREE: 748 749 /* 750 * These pages contain no valid data, and may be 751 * garbage-collected. Toss all resources, including 752 * any swap space in use. 753 */ 754 755 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 756 PGO_FREE); 757 break; 758 759 case MADV_SPACEAVAIL: 760 761 /* 762 * XXXMRG What is this? I think it's: 763 * 764 * Ensure that we have allocated backing-store 765 * for these pages. 766 * 767 * This is going to require changes to the page daemon, 768 * as it will free swap space allocated to pages in core. 769 * There's also what to do for device/file/anonymous memory. 770 */ 771 772 return (EINVAL); 773 774 default: 775 return (EINVAL); 776 } 777 778 return error; 779 } 780 781 /* 782 * sys_mlock: memory lock 783 */ 784 785 int 786 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval) 787 { 788 /* { 789 syscallarg(const void *) addr; 790 syscallarg(size_t) len; 791 } */ 792 struct proc *p = l->l_proc; 793 vaddr_t addr; 794 vsize_t size, pageoff; 795 int error; 796 797 /* 798 * extract syscall args from uap 799 */ 800 801 addr = (vaddr_t)SCARG(uap, addr); 802 size = (vsize_t)SCARG(uap, len); 803 804 /* 805 * align the address to a page boundary and adjust the size accordingly 806 */ 807 808 pageoff = (addr & PAGE_MASK); 809 addr -= pageoff; 810 size += pageoff; 811 size = (vsize_t)round_page(size); 812 813 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 814 if (error) 815 return error; 816 817 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) 818 return (EAGAIN); 819 820 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 821 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 822 return (EAGAIN); 823 824 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false, 825 0); 826 if (error == EFAULT) 827 error = ENOMEM; 828 return error; 829 } 830 831 /* 832 * sys_munlock: unlock wired pages 833 */ 834 835 int 836 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap, 837 register_t *retval) 838 { 839 /* { 840 syscallarg(const void *) addr; 841 syscallarg(size_t) len; 842 } */ 843 struct proc *p = l->l_proc; 844 vaddr_t addr; 845 vsize_t size, pageoff; 846 int error; 847 848 /* 849 * extract syscall args from uap 850 */ 851 852 addr = (vaddr_t)SCARG(uap, addr); 853 size = (vsize_t)SCARG(uap, len); 854 855 /* 856 * align the address to a page boundary, and adjust the size accordingly 857 */ 858 859 pageoff = (addr & PAGE_MASK); 860 addr -= pageoff; 861 size += pageoff; 862 size = (vsize_t)round_page(size); 863 864 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 865 if (error) 866 return error; 867 868 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 869 0); 870 if (error == EFAULT) 871 error = ENOMEM; 872 return error; 873 } 874 875 /* 876 * sys_mlockall: lock all pages mapped into an address space. 877 */ 878 879 int 880 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap, 881 register_t *retval) 882 { 883 /* { 884 syscallarg(int) flags; 885 } */ 886 struct proc *p = l->l_proc; 887 int error, flags; 888 889 flags = SCARG(uap, flags); 890 891 if (flags == 0 || 892 (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0) 893 return (EINVAL); 894 895 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, 896 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 897 return (error); 898 } 899 900 /* 901 * sys_munlockall: unlock all pages mapped into an address space. 902 */ 903 904 int 905 sys_munlockall(struct lwp *l, const void *v, register_t *retval) 906 { 907 struct proc *p = l->l_proc; 908 909 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0); 910 return (0); 911 } 912 913 /* 914 * uvm_mmap: internal version of mmap 915 * 916 * - used by sys_mmap and various framebuffers 917 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON 918 * - caller must page-align the file offset 919 */ 920 921 int 922 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 923 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj, 924 voff_t foff, vsize_t locklimit) 925 { 926 vaddr_t align = 0; 927 int error; 928 uvm_flag_t uvmflag = 0; 929 930 /* 931 * check params 932 */ 933 934 if (size == 0) 935 return(0); 936 if (foff & PAGE_MASK) 937 return(EINVAL); 938 if ((prot & maxprot) != prot) 939 return(EINVAL); 940 941 /* 942 * for non-fixed mappings, round off the suggested address. 943 * for fixed mappings, check alignment and zap old mappings. 944 */ 945 946 if ((flags & MAP_FIXED) == 0) { 947 *addr = round_page(*addr); 948 } else { 949 if (*addr & PAGE_MASK) 950 return(EINVAL); 951 uvmflag |= UVM_FLAG_FIXED; 952 (void) uvm_unmap(map, *addr, *addr + size); 953 } 954 955 /* 956 * Try to see if any requested alignment can even be attemped. 957 * Make sure we can express the alignment (asking for a >= 4GB 958 * alignment on an ILP32 architecure make no sense) and the 959 * alignment is at least for a page sized quanitiy. If the 960 * request was for a fixed mapping, make sure supplied address 961 * adheres to the request alignment. 962 */ 963 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT; 964 if (align) { 965 if (align >= sizeof(vaddr_t) * NBBY) 966 return(EINVAL); 967 align = 1L << align; 968 if (align < PAGE_SIZE) 969 return(EINVAL); 970 if (align >= vm_map_max(map)) 971 return(ENOMEM); 972 if (flags & MAP_FIXED) { 973 if ((*addr & (align-1)) != 0) 974 return(EINVAL); 975 align = 0; 976 } 977 } 978 979 /* 980 * check resource limits 981 */ 982 983 if (!VM_MAP_IS_KERNEL(map) && 984 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) > 985 curproc->p_rlimit[RLIMIT_AS].rlim_cur)) 986 return ENOMEM; 987 988 /* 989 * handle anon vs. non-anon mappings. for non-anon mappings attach 990 * to underlying vm object. 991 */ 992 993 if (flags & MAP_ANON) { 994 KASSERT(uobj == NULL); 995 foff = UVM_UNKNOWN_OFFSET; 996 if ((flags & MAP_SHARED) == 0) 997 /* XXX: defer amap create */ 998 uvmflag |= UVM_FLAG_COPYONW; 999 else 1000 /* shared: create amap now */ 1001 uvmflag |= UVM_FLAG_OVERLAY; 1002 1003 } else { 1004 KASSERT(uobj != NULL); 1005 if ((flags & MAP_SHARED) == 0) { 1006 uvmflag |= UVM_FLAG_COPYONW; 1007 } 1008 } 1009 1010 uvmflag = UVM_MAPFLAG(prot, maxprot, 1011 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, 1012 advice, uvmflag); 1013 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag); 1014 if (error) { 1015 if (uobj) 1016 uobj->pgops->pgo_detach(uobj); 1017 return error; 1018 } 1019 1020 /* 1021 * POSIX 1003.1b -- if our address space was configured 1022 * to lock all future mappings, wire the one we just made. 1023 * 1024 * Also handle the MAP_WIRED flag here. 1025 */ 1026 1027 if (prot == VM_PROT_NONE) { 1028 1029 /* 1030 * No more work to do in this case. 1031 */ 1032 1033 return (0); 1034 } 1035 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) { 1036 vm_map_lock(map); 1037 if (atop(size) + uvmexp.wired > uvmexp.wiredmax || 1038 (locklimit != 0 && 1039 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > 1040 locklimit)) { 1041 vm_map_unlock(map); 1042 uvm_unmap(map, *addr, *addr + size); 1043 return ENOMEM; 1044 } 1045 1046 /* 1047 * uvm_map_pageable() always returns the map unlocked. 1048 */ 1049 1050 error = uvm_map_pageable(map, *addr, *addr + size, 1051 false, UVM_LK_ENTER); 1052 if (error) { 1053 uvm_unmap(map, *addr, *addr + size); 1054 return error; 1055 } 1056 return (0); 1057 } 1058 return 0; 1059 } 1060 1061 vaddr_t 1062 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown) 1063 { 1064 1065 if (topdown) 1066 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz); 1067 else 1068 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz); 1069 } 1070 1071 int 1072 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev, 1073 off_t off) 1074 { 1075 struct uvm_object *uobj; 1076 int error, flags, prot; 1077 1078 flags = MAP_SHARED; 1079 prot = VM_PROT_READ | VM_PROT_WRITE; 1080 if (*addrp) 1081 flags |= MAP_FIXED; 1082 else 1083 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1084 (vaddr_t)p->p_vmspace->vm_daddr, len, 1085 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1086 1087 uobj = udv_attach(dev, prot, off, len); 1088 if (uobj == NULL) 1089 return EINVAL; 1090 1091 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1092 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, 1093 uobj, off, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1094 return error; 1095 } 1096 1097 int 1098 uvm_mmap_anon(struct proc *p, void **addrp, size_t len) 1099 { 1100 int error, flags, prot; 1101 1102 flags = MAP_PRIVATE | MAP_ANON; 1103 prot = VM_PROT_READ | VM_PROT_WRITE; 1104 if (*addrp) 1105 flags |= MAP_FIXED; 1106 else 1107 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1108 (vaddr_t)p->p_vmspace->vm_daddr, len, 1109 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1110 1111 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1112 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, 1113 NULL, 0, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1114 return error; 1115 } 1116