1 /* $NetBSD: uvm_mmap.c,v 1.162 2016/08/09 12:17:04 kre Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993 The Regents of the University of California. 6 * Copyright (c) 1988 University of Utah. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * the Systems Programming Group of the University of Utah Computer 12 * Science Department. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94 40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp 41 */ 42 43 /* 44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap 45 * function. 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.162 2016/08/09 12:17:04 kre Exp $"); 50 51 #include "opt_compat_netbsd.h" 52 #include "opt_pax.h" 53 54 #include <sys/types.h> 55 #include <sys/file.h> 56 #include <sys/filedesc.h> 57 #include <sys/resourcevar.h> 58 #include <sys/mman.h> 59 #include <sys/pax.h> 60 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm.h> 64 #include <uvm/uvm_device.h> 65 66 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, 67 int, int, struct uvm_object *, voff_t, vsize_t); 68 69 static int 70 range_test(struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap) 71 { 72 vaddr_t vm_min_address = vm_map_min(map); 73 vaddr_t vm_max_address = vm_map_max(map); 74 vaddr_t eaddr = addr + size; 75 int res = 0; 76 77 if (addr < vm_min_address) 78 return EINVAL; 79 if (eaddr > vm_max_address) 80 return ismmap ? EFBIG : EINVAL; 81 if (addr > eaddr) /* no wrapping! */ 82 return ismmap ? EOVERFLOW : EINVAL; 83 84 #ifdef MD_MMAP_RANGE_TEST 85 res = MD_MMAP_RANGE_TEST(addr, eaddr); 86 #endif 87 88 return res; 89 } 90 91 /* 92 * unimplemented VM system calls: 93 */ 94 95 /* 96 * sys_sbrk: sbrk system call. 97 */ 98 99 /* ARGSUSED */ 100 int 101 sys_sbrk(struct lwp *l, const struct sys_sbrk_args *uap, register_t *retval) 102 { 103 /* { 104 syscallarg(intptr_t) incr; 105 } */ 106 107 return ENOSYS; 108 } 109 110 /* 111 * sys_sstk: sstk system call. 112 */ 113 114 /* ARGSUSED */ 115 int 116 sys_sstk(struct lwp *l, const struct sys_sstk_args *uap, register_t *retval) 117 { 118 /* { 119 syscallarg(int) incr; 120 } */ 121 122 return ENOSYS; 123 } 124 125 /* 126 * sys_mincore: determine if pages are in core or not. 127 */ 128 129 /* ARGSUSED */ 130 int 131 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, 132 register_t *retval) 133 { 134 /* { 135 syscallarg(void *) addr; 136 syscallarg(size_t) len; 137 syscallarg(char *) vec; 138 } */ 139 struct proc *p = l->l_proc; 140 struct vm_page *pg; 141 char *vec, pgi; 142 struct uvm_object *uobj; 143 struct vm_amap *amap; 144 struct vm_anon *anon; 145 struct vm_map_entry *entry; 146 vaddr_t start, end, lim; 147 struct vm_map *map; 148 vsize_t len; 149 int error = 0, npgs; 150 151 map = &p->p_vmspace->vm_map; 152 153 start = (vaddr_t)SCARG(uap, addr); 154 len = SCARG(uap, len); 155 vec = SCARG(uap, vec); 156 157 if (start & PAGE_MASK) 158 return EINVAL; 159 len = round_page(len); 160 end = start + len; 161 if (end <= start) 162 return EINVAL; 163 164 /* 165 * Lock down vec, so our returned status isn't outdated by 166 * storing the status byte for a page. 167 */ 168 169 npgs = len >> PAGE_SHIFT; 170 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE); 171 if (error) { 172 return error; 173 } 174 vm_map_lock_read(map); 175 176 if (uvm_map_lookup_entry(map, start, &entry) == false) { 177 error = ENOMEM; 178 goto out; 179 } 180 181 for (/* nothing */; 182 entry != &map->header && entry->start < end; 183 entry = entry->next) { 184 KASSERT(!UVM_ET_ISSUBMAP(entry)); 185 KASSERT(start >= entry->start); 186 187 /* Make sure there are no holes. */ 188 if (entry->end < end && 189 (entry->next == &map->header || 190 entry->next->start > entry->end)) { 191 error = ENOMEM; 192 goto out; 193 } 194 195 lim = end < entry->end ? end : entry->end; 196 197 /* 198 * Special case for objects with no "real" pages. Those 199 * are always considered resident (mapped devices). 200 */ 201 202 if (UVM_ET_ISOBJ(entry)) { 203 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); 204 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) { 205 for (/* nothing */; start < lim; 206 start += PAGE_SIZE, vec++) 207 subyte(vec, 1); 208 continue; 209 } 210 } 211 212 amap = entry->aref.ar_amap; /* upper layer */ 213 uobj = entry->object.uvm_obj; /* lower layer */ 214 215 if (amap != NULL) 216 amap_lock(amap); 217 if (uobj != NULL) 218 mutex_enter(uobj->vmobjlock); 219 220 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { 221 pgi = 0; 222 if (amap != NULL) { 223 /* Check the upper layer first. */ 224 anon = amap_lookup(&entry->aref, 225 start - entry->start); 226 /* Don't need to lock anon here. */ 227 if (anon != NULL && anon->an_page != NULL) { 228 229 /* 230 * Anon has the page for this entry 231 * offset. 232 */ 233 234 pgi = 1; 235 } 236 } 237 if (uobj != NULL && pgi == 0) { 238 /* Check the lower layer. */ 239 pg = uvm_pagelookup(uobj, 240 entry->offset + (start - entry->start)); 241 if (pg != NULL) { 242 243 /* 244 * Object has the page for this entry 245 * offset. 246 */ 247 248 pgi = 1; 249 } 250 } 251 (void) subyte(vec, pgi); 252 } 253 if (uobj != NULL) 254 mutex_exit(uobj->vmobjlock); 255 if (amap != NULL) 256 amap_unlock(amap); 257 } 258 259 out: 260 vm_map_unlock_read(map); 261 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs); 262 return error; 263 } 264 265 /* 266 * sys_mmap: mmap system call. 267 * 268 * => file offset and address may not be page aligned 269 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE 270 * - if address isn't page aligned the mapping starts at trunc_page(addr) 271 * and the return value is adjusted up by the page offset. 272 */ 273 274 int 275 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval) 276 { 277 /* { 278 syscallarg(void *) addr; 279 syscallarg(size_t) len; 280 syscallarg(int) prot; 281 syscallarg(int) flags; 282 syscallarg(int) fd; 283 syscallarg(long) pad; 284 syscallarg(off_t) pos; 285 } */ 286 struct proc *p = l->l_proc; 287 vaddr_t addr; 288 off_t pos; 289 vsize_t size, pageoff, newsize; 290 vm_prot_t prot, maxprot; 291 int flags, fd, advice; 292 vaddr_t defaddr; 293 struct file *fp = NULL; 294 struct uvm_object *uobj; 295 int error; 296 #ifdef PAX_ASLR 297 vaddr_t orig_addr; 298 #endif /* PAX_ASLR */ 299 300 /* 301 * first, extract syscall args from the uap. 302 */ 303 304 addr = (vaddr_t)SCARG(uap, addr); 305 size = (vsize_t)SCARG(uap, len); 306 prot = SCARG(uap, prot) & VM_PROT_ALL; 307 flags = SCARG(uap, flags); 308 fd = SCARG(uap, fd); 309 pos = SCARG(uap, pos); 310 311 #ifdef PAX_ASLR 312 orig_addr = addr; 313 #endif /* PAX_ASLR */ 314 315 /* 316 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and 317 * validate the flags. 318 */ 319 if (flags & MAP_COPY) { 320 flags = (flags & ~MAP_COPY) | MAP_PRIVATE; 321 #if defined(COMPAT_10) && defined(__i386__) 322 /* 323 * Ancient kernel on x86 did not obey PROT_EXEC on i386 at least 324 * and ld.so did not turn it on. We take care of this on amd64 325 * in compat32. 326 */ 327 prot |= PROT_EXEC; 328 #endif 329 } 330 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE)) 331 return EINVAL; 332 333 /* 334 * align file position and save offset. adjust size. 335 */ 336 337 pageoff = (pos & PAGE_MASK); 338 pos -= pageoff; 339 newsize = size + pageoff; /* add offset */ 340 newsize = (vsize_t)round_page(newsize); /* round up */ 341 342 if (newsize < size) 343 return ENOMEM; 344 size = newsize; 345 346 /* 347 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 348 */ 349 if (flags & MAP_FIXED) { 350 /* ensure address and file offset are aligned properly */ 351 addr -= pageoff; 352 if (addr & PAGE_MASK) 353 return EINVAL; 354 355 error = range_test(&p->p_vmspace->vm_map, addr, size, true); 356 if (error) { 357 return error; 358 } 359 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) { 360 /* 361 * not fixed: make sure we skip over the largest 362 * possible heap for non-topdown mapping arrangements. 363 * we will refine our guess later (e.g. to account for 364 * VAC, etc) 365 */ 366 367 defaddr = p->p_emul->e_vm_default_addr(p, 368 (vaddr_t)p->p_vmspace->vm_daddr, size, 369 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 370 371 if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN)) 372 addr = MAX(addr, defaddr); 373 else 374 addr = MIN(addr, defaddr); 375 } 376 377 /* 378 * check for file mappings (i.e. not anonymous) and verify file. 379 */ 380 381 advice = UVM_ADV_NORMAL; 382 if ((flags & MAP_ANON) == 0) { 383 if ((fp = fd_getfile(fd)) == NULL) 384 return EBADF; 385 386 if (fp->f_ops->fo_mmap == NULL) { 387 error = ENODEV; 388 goto out; 389 } 390 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags, 391 &advice, &uobj, &maxprot); 392 if (error) { 393 goto out; 394 } 395 if (uobj == NULL) { 396 flags |= MAP_ANON; 397 fd_putfile(fd); 398 fp = NULL; 399 goto is_anon; 400 } 401 } else { /* MAP_ANON case */ 402 /* 403 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0? 404 */ 405 if (fd != -1) 406 return EINVAL; 407 408 is_anon: /* label for SunOS style /dev/zero */ 409 uobj = NULL; 410 maxprot = VM_PROT_ALL; 411 pos = 0; 412 } 413 414 PAX_MPROTECT_ADJUST(l, &prot, &maxprot); 415 416 pax_aslr_mmap(l, &addr, orig_addr, flags); 417 418 /* 419 * now let kernel internal function uvm_mmap do the work. 420 */ 421 422 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 423 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 424 425 /* remember to add offset */ 426 *retval = (register_t)(addr + pageoff); 427 428 out: 429 if (fp != NULL) 430 fd_putfile(fd); 431 432 return error; 433 } 434 435 /* 436 * sys___msync13: the msync system call (a front-end for flush) 437 */ 438 439 int 440 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap, 441 register_t *retval) 442 { 443 /* { 444 syscallarg(void *) addr; 445 syscallarg(size_t) len; 446 syscallarg(int) flags; 447 } */ 448 struct proc *p = l->l_proc; 449 vaddr_t addr; 450 vsize_t size, pageoff; 451 struct vm_map *map; 452 int error, flags, uvmflags; 453 bool rv; 454 455 /* 456 * extract syscall args from the uap 457 */ 458 459 addr = (vaddr_t)SCARG(uap, addr); 460 size = (vsize_t)SCARG(uap, len); 461 flags = SCARG(uap, flags); 462 463 /* sanity check flags */ 464 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || 465 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || 466 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) 467 return EINVAL; 468 if ((flags & (MS_ASYNC | MS_SYNC)) == 0) 469 flags |= MS_SYNC; 470 471 /* 472 * align the address to a page boundary and adjust the size accordingly. 473 */ 474 475 pageoff = (addr & PAGE_MASK); 476 addr -= pageoff; 477 size += pageoff; 478 size = (vsize_t)round_page(size); 479 480 481 /* 482 * get map 483 */ 484 map = &p->p_vmspace->vm_map; 485 486 error = range_test(map, addr, size, false); 487 if (error) 488 return ENOMEM; 489 490 /* 491 * XXXCDC: do we really need this semantic? 492 * 493 * XXX Gak! If size is zero we are supposed to sync "all modified 494 * pages with the region containing addr". Unfortunately, we 495 * don't really keep track of individual mmaps so we approximate 496 * by flushing the range of the map entry containing addr. 497 * This can be incorrect if the region splits or is coalesced 498 * with a neighbor. 499 */ 500 501 if (size == 0) { 502 struct vm_map_entry *entry; 503 504 vm_map_lock_read(map); 505 rv = uvm_map_lookup_entry(map, addr, &entry); 506 if (rv == true) { 507 addr = entry->start; 508 size = entry->end - entry->start; 509 } 510 vm_map_unlock_read(map); 511 if (rv == false) 512 return EINVAL; 513 } 514 515 /* 516 * translate MS_ flags into PGO_ flags 517 */ 518 519 uvmflags = PGO_CLEANIT; 520 if (flags & MS_INVALIDATE) 521 uvmflags |= PGO_FREE; 522 if (flags & MS_SYNC) 523 uvmflags |= PGO_SYNCIO; 524 525 error = uvm_map_clean(map, addr, addr+size, uvmflags); 526 return error; 527 } 528 529 /* 530 * sys_munmap: unmap a users memory 531 */ 532 533 int 534 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval) 535 { 536 /* { 537 syscallarg(void *) addr; 538 syscallarg(size_t) len; 539 } */ 540 struct proc *p = l->l_proc; 541 vaddr_t addr; 542 vsize_t size, pageoff; 543 struct vm_map *map; 544 struct vm_map_entry *dead_entries; 545 int error; 546 547 /* 548 * get syscall args. 549 */ 550 551 addr = (vaddr_t)SCARG(uap, addr); 552 size = (vsize_t)SCARG(uap, len); 553 554 /* 555 * align the address to a page boundary and adjust the size accordingly. 556 */ 557 558 pageoff = (addr & PAGE_MASK); 559 addr -= pageoff; 560 size += pageoff; 561 size = (vsize_t)round_page(size); 562 563 if (size == 0) 564 return 0; 565 566 map = &p->p_vmspace->vm_map; 567 568 error = range_test(map, addr, size, false); 569 if (error) 570 return EINVAL; 571 572 vm_map_lock(map); 573 #if 0 574 /* 575 * interesting system call semantic: make sure entire range is 576 * allocated before allowing an unmap. 577 */ 578 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) { 579 vm_map_unlock(map); 580 return EINVAL; 581 } 582 #endif 583 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0); 584 vm_map_unlock(map); 585 if (dead_entries != NULL) 586 uvm_unmap_detach(dead_entries, 0); 587 return 0; 588 } 589 590 /* 591 * sys_mprotect: the mprotect system call 592 */ 593 594 int 595 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap, 596 register_t *retval) 597 { 598 /* { 599 syscallarg(void *) addr; 600 syscallarg(size_t) len; 601 syscallarg(int) prot; 602 } */ 603 struct proc *p = l->l_proc; 604 vaddr_t addr; 605 vsize_t size, pageoff; 606 vm_prot_t prot; 607 int error; 608 609 /* 610 * extract syscall args from uap 611 */ 612 613 addr = (vaddr_t)SCARG(uap, addr); 614 size = (vsize_t)SCARG(uap, len); 615 prot = SCARG(uap, prot) & VM_PROT_ALL; 616 617 /* 618 * align the address to a page boundary and adjust the size accordingly. 619 */ 620 621 pageoff = (addr & PAGE_MASK); 622 addr -= pageoff; 623 size += pageoff; 624 size = round_page(size); 625 626 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 627 if (error) 628 return EINVAL; 629 630 error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, 631 false); 632 return error; 633 } 634 635 /* 636 * sys_minherit: the minherit system call 637 */ 638 639 int 640 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap, 641 register_t *retval) 642 { 643 /* { 644 syscallarg(void *) addr; 645 syscallarg(int) len; 646 syscallarg(int) inherit; 647 } */ 648 struct proc *p = l->l_proc; 649 vaddr_t addr; 650 vsize_t size, pageoff; 651 vm_inherit_t inherit; 652 int error; 653 654 addr = (vaddr_t)SCARG(uap, addr); 655 size = (vsize_t)SCARG(uap, len); 656 inherit = SCARG(uap, inherit); 657 658 /* 659 * align the address to a page boundary and adjust the size accordingly. 660 */ 661 662 pageoff = (addr & PAGE_MASK); 663 addr -= pageoff; 664 size += pageoff; 665 size = (vsize_t)round_page(size); 666 667 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 668 if (error) 669 return EINVAL; 670 671 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size, 672 inherit); 673 return error; 674 } 675 676 /* 677 * sys_madvise: give advice about memory usage. 678 */ 679 680 /* ARGSUSED */ 681 int 682 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap, 683 register_t *retval) 684 { 685 /* { 686 syscallarg(void *) addr; 687 syscallarg(size_t) len; 688 syscallarg(int) behav; 689 } */ 690 struct proc *p = l->l_proc; 691 vaddr_t addr; 692 vsize_t size, pageoff; 693 int advice, error; 694 695 addr = (vaddr_t)SCARG(uap, addr); 696 size = (vsize_t)SCARG(uap, len); 697 advice = SCARG(uap, behav); 698 699 /* 700 * align the address to a page boundary, and adjust the size accordingly 701 */ 702 703 pageoff = (addr & PAGE_MASK); 704 addr -= pageoff; 705 size += pageoff; 706 size = (vsize_t)round_page(size); 707 708 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 709 if (error) 710 return EINVAL; 711 712 switch (advice) { 713 case MADV_NORMAL: 714 case MADV_RANDOM: 715 case MADV_SEQUENTIAL: 716 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size, 717 advice); 718 break; 719 720 case MADV_WILLNEED: 721 722 /* 723 * Activate all these pages, pre-faulting them in if 724 * necessary. 725 */ 726 error = uvm_map_willneed(&p->p_vmspace->vm_map, 727 addr, addr + size); 728 break; 729 730 case MADV_DONTNEED: 731 732 /* 733 * Deactivate all these pages. We don't need them 734 * any more. We don't, however, toss the data in 735 * the pages. 736 */ 737 738 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 739 PGO_DEACTIVATE); 740 break; 741 742 case MADV_FREE: 743 744 /* 745 * These pages contain no valid data, and may be 746 * garbage-collected. Toss all resources, including 747 * any swap space in use. 748 */ 749 750 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 751 PGO_FREE); 752 break; 753 754 case MADV_SPACEAVAIL: 755 756 /* 757 * XXXMRG What is this? I think it's: 758 * 759 * Ensure that we have allocated backing-store 760 * for these pages. 761 * 762 * This is going to require changes to the page daemon, 763 * as it will free swap space allocated to pages in core. 764 * There's also what to do for device/file/anonymous memory. 765 */ 766 767 return EINVAL; 768 769 default: 770 return EINVAL; 771 } 772 773 return error; 774 } 775 776 /* 777 * sys_mlock: memory lock 778 */ 779 780 int 781 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval) 782 { 783 /* { 784 syscallarg(const void *) addr; 785 syscallarg(size_t) len; 786 } */ 787 struct proc *p = l->l_proc; 788 vaddr_t addr; 789 vsize_t size, pageoff; 790 int error; 791 792 /* 793 * extract syscall args from uap 794 */ 795 796 addr = (vaddr_t)SCARG(uap, addr); 797 size = (vsize_t)SCARG(uap, len); 798 799 /* 800 * align the address to a page boundary and adjust the size accordingly 801 */ 802 803 pageoff = (addr & PAGE_MASK); 804 addr -= pageoff; 805 size += pageoff; 806 size = (vsize_t)round_page(size); 807 808 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 809 if (error) 810 return ENOMEM; 811 812 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) 813 return EAGAIN; 814 815 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 816 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 817 return EAGAIN; 818 819 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false, 820 0); 821 if (error == EFAULT) 822 error = ENOMEM; 823 return error; 824 } 825 826 /* 827 * sys_munlock: unlock wired pages 828 */ 829 830 int 831 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap, 832 register_t *retval) 833 { 834 /* { 835 syscallarg(const void *) addr; 836 syscallarg(size_t) len; 837 } */ 838 struct proc *p = l->l_proc; 839 vaddr_t addr; 840 vsize_t size, pageoff; 841 int error; 842 843 /* 844 * extract syscall args from uap 845 */ 846 847 addr = (vaddr_t)SCARG(uap, addr); 848 size = (vsize_t)SCARG(uap, len); 849 850 /* 851 * align the address to a page boundary, and adjust the size accordingly 852 */ 853 854 pageoff = (addr & PAGE_MASK); 855 addr -= pageoff; 856 size += pageoff; 857 size = (vsize_t)round_page(size); 858 859 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 860 if (error) 861 return ENOMEM; 862 863 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 864 0); 865 if (error) 866 return ENOMEM; 867 868 return 0; 869 } 870 871 /* 872 * sys_mlockall: lock all pages mapped into an address space. 873 */ 874 875 int 876 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap, 877 register_t *retval) 878 { 879 /* { 880 syscallarg(int) flags; 881 } */ 882 struct proc *p = l->l_proc; 883 int error, flags; 884 885 flags = SCARG(uap, flags); 886 887 if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0) 888 return EINVAL; 889 890 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, 891 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 892 return error; 893 } 894 895 /* 896 * sys_munlockall: unlock all pages mapped into an address space. 897 */ 898 899 int 900 sys_munlockall(struct lwp *l, const void *v, register_t *retval) 901 { 902 struct proc *p = l->l_proc; 903 904 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0); 905 return 0; 906 } 907 908 /* 909 * uvm_mmap: internal version of mmap 910 * 911 * - used by sys_mmap and various framebuffers 912 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON 913 * - caller must page-align the file offset 914 */ 915 916 int 917 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 918 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj, 919 voff_t foff, vsize_t locklimit) 920 { 921 vaddr_t align = 0; 922 int error; 923 uvm_flag_t uvmflag = 0; 924 925 /* 926 * check params 927 */ 928 929 if (size == 0) 930 return 0; 931 if (foff & PAGE_MASK) 932 return EINVAL; 933 if ((prot & maxprot) != prot) 934 return EINVAL; 935 936 /* 937 * for non-fixed mappings, round off the suggested address. 938 * for fixed mappings, check alignment and zap old mappings. 939 */ 940 941 if ((flags & MAP_FIXED) == 0) { 942 *addr = round_page(*addr); 943 } else { 944 if (*addr & PAGE_MASK) 945 return EINVAL; 946 uvmflag |= UVM_FLAG_FIXED; 947 (void) uvm_unmap(map, *addr, *addr + size); 948 } 949 950 /* 951 * Try to see if any requested alignment can even be attemped. 952 * Make sure we can express the alignment (asking for a >= 4GB 953 * alignment on an ILP32 architecure make no sense) and the 954 * alignment is at least for a page sized quanitiy. If the 955 * request was for a fixed mapping, make sure supplied address 956 * adheres to the request alignment. 957 */ 958 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT; 959 if (align) { 960 if (align >= sizeof(vaddr_t) * NBBY) 961 return EINVAL; 962 align = 1L << align; 963 if (align < PAGE_SIZE) 964 return EINVAL; 965 if (align >= vm_map_max(map)) 966 return ENOMEM; 967 if (flags & MAP_FIXED) { 968 if ((*addr & (align-1)) != 0) 969 return EINVAL; 970 align = 0; 971 } 972 } 973 974 /* 975 * check resource limits 976 */ 977 978 if (!VM_MAP_IS_KERNEL(map) && 979 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) > 980 curproc->p_rlimit[RLIMIT_AS].rlim_cur)) 981 return ENOMEM; 982 983 /* 984 * handle anon vs. non-anon mappings. for non-anon mappings attach 985 * to underlying vm object. 986 */ 987 988 if (flags & MAP_ANON) { 989 KASSERT(uobj == NULL); 990 foff = UVM_UNKNOWN_OFFSET; 991 if ((flags & MAP_SHARED) == 0) 992 /* XXX: defer amap create */ 993 uvmflag |= UVM_FLAG_COPYONW; 994 else 995 /* shared: create amap now */ 996 uvmflag |= UVM_FLAG_OVERLAY; 997 998 } else { 999 KASSERT(uobj != NULL); 1000 if ((flags & MAP_SHARED) == 0) { 1001 uvmflag |= UVM_FLAG_COPYONW; 1002 } 1003 } 1004 1005 uvmflag = UVM_MAPFLAG(prot, maxprot, 1006 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice, 1007 uvmflag); 1008 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag); 1009 if (error) { 1010 if (uobj) 1011 uobj->pgops->pgo_detach(uobj); 1012 return error; 1013 } 1014 1015 /* 1016 * POSIX 1003.1b -- if our address space was configured 1017 * to lock all future mappings, wire the one we just made. 1018 * 1019 * Also handle the MAP_WIRED flag here. 1020 */ 1021 1022 if (prot == VM_PROT_NONE) { 1023 1024 /* 1025 * No more work to do in this case. 1026 */ 1027 1028 return 0; 1029 } 1030 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) { 1031 vm_map_lock(map); 1032 if (atop(size) + uvmexp.wired > uvmexp.wiredmax || 1033 (locklimit != 0 && 1034 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > 1035 locklimit)) { 1036 vm_map_unlock(map); 1037 uvm_unmap(map, *addr, *addr + size); 1038 return ENOMEM; 1039 } 1040 1041 /* 1042 * uvm_map_pageable() always returns the map unlocked. 1043 */ 1044 1045 error = uvm_map_pageable(map, *addr, *addr + size, 1046 false, UVM_LK_ENTER); 1047 if (error) { 1048 uvm_unmap(map, *addr, *addr + size); 1049 return error; 1050 } 1051 return 0; 1052 } 1053 return 0; 1054 } 1055 1056 vaddr_t 1057 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown) 1058 { 1059 1060 if (topdown) 1061 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz); 1062 else 1063 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz); 1064 } 1065 1066 int 1067 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev, 1068 off_t off) 1069 { 1070 struct uvm_object *uobj; 1071 int error, flags, prot; 1072 1073 flags = MAP_SHARED; 1074 prot = VM_PROT_READ | VM_PROT_WRITE; 1075 if (*addrp) 1076 flags |= MAP_FIXED; 1077 else 1078 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1079 (vaddr_t)p->p_vmspace->vm_daddr, len, 1080 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1081 1082 uobj = udv_attach(dev, prot, off, len); 1083 if (uobj == NULL) 1084 return EINVAL; 1085 1086 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1087 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off, 1088 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1089 return error; 1090 } 1091 1092 int 1093 uvm_mmap_anon(struct proc *p, void **addrp, size_t len) 1094 { 1095 int error, flags, prot; 1096 1097 flags = MAP_PRIVATE | MAP_ANON; 1098 prot = VM_PROT_READ | VM_PROT_WRITE; 1099 if (*addrp) 1100 flags |= MAP_FIXED; 1101 else 1102 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1103 (vaddr_t)p->p_vmspace->vm_daddr, len, 1104 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1105 1106 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1107 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0, 1108 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1109 return error; 1110 } 1111