1 /* $NetBSD: uvm_mmap.c,v 1.167 2017/10/27 12:01:08 utkarsh009 Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993 The Regents of the University of California. 6 * Copyright (c) 1988 University of Utah. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * the Systems Programming Group of the University of Utah Computer 12 * Science Department. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94 40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp 41 */ 42 43 /* 44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap 45 * function. 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.167 2017/10/27 12:01:08 utkarsh009 Exp $"); 50 51 #include "opt_compat_netbsd.h" 52 #include "opt_pax.h" 53 54 #include <sys/types.h> 55 #include <sys/file.h> 56 #include <sys/filedesc.h> 57 #include <sys/resourcevar.h> 58 #include <sys/mman.h> 59 #include <sys/pax.h> 60 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm.h> 64 #include <uvm/uvm_device.h> 65 66 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, 67 int, int, struct uvm_object *, voff_t, vsize_t); 68 69 static int 70 range_test(struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap) 71 { 72 vaddr_t vm_min_address = vm_map_min(map); 73 vaddr_t vm_max_address = vm_map_max(map); 74 vaddr_t eaddr = addr + size; 75 int res = 0; 76 77 if (addr < vm_min_address) 78 return EINVAL; 79 if (eaddr > vm_max_address) 80 return ismmap ? EFBIG : EINVAL; 81 if (addr > eaddr) /* no wrapping! */ 82 return ismmap ? EOVERFLOW : EINVAL; 83 84 #ifdef MD_MMAP_RANGE_TEST 85 res = MD_MMAP_RANGE_TEST(addr, eaddr); 86 #endif 87 88 return res; 89 } 90 91 /* 92 * unimplemented VM system calls: 93 */ 94 95 /* 96 * sys_sbrk: sbrk system call. 97 */ 98 99 /* ARGSUSED */ 100 int 101 sys_sbrk(struct lwp *l, const struct sys_sbrk_args *uap, register_t *retval) 102 { 103 /* { 104 syscallarg(intptr_t) incr; 105 } */ 106 107 return ENOSYS; 108 } 109 110 /* 111 * sys_sstk: sstk system call. 112 */ 113 114 /* ARGSUSED */ 115 int 116 sys_sstk(struct lwp *l, const struct sys_sstk_args *uap, register_t *retval) 117 { 118 /* { 119 syscallarg(int) incr; 120 } */ 121 122 return ENOSYS; 123 } 124 125 /* 126 * sys_mincore: determine if pages are in core or not. 127 */ 128 129 /* ARGSUSED */ 130 int 131 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, 132 register_t *retval) 133 { 134 /* { 135 syscallarg(void *) addr; 136 syscallarg(size_t) len; 137 syscallarg(char *) vec; 138 } */ 139 struct proc *p = l->l_proc; 140 struct vm_page *pg; 141 char *vec, pgi; 142 struct uvm_object *uobj; 143 struct vm_amap *amap; 144 struct vm_anon *anon; 145 struct vm_map_entry *entry; 146 vaddr_t start, end, lim; 147 struct vm_map *map; 148 vsize_t len; 149 int error = 0, npgs; 150 151 map = &p->p_vmspace->vm_map; 152 153 start = (vaddr_t)SCARG(uap, addr); 154 len = SCARG(uap, len); 155 vec = SCARG(uap, vec); 156 157 if (start & PAGE_MASK) 158 return EINVAL; 159 len = round_page(len); 160 end = start + len; 161 if (end <= start) 162 return EINVAL; 163 164 /* 165 * Lock down vec, so our returned status isn't outdated by 166 * storing the status byte for a page. 167 */ 168 169 npgs = len >> PAGE_SHIFT; 170 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE); 171 if (error) { 172 return error; 173 } 174 vm_map_lock_read(map); 175 176 if (uvm_map_lookup_entry(map, start, &entry) == false) { 177 error = ENOMEM; 178 goto out; 179 } 180 181 for (/* nothing */; 182 entry != &map->header && entry->start < end; 183 entry = entry->next) { 184 KASSERT(!UVM_ET_ISSUBMAP(entry)); 185 KASSERT(start >= entry->start); 186 187 /* Make sure there are no holes. */ 188 if (entry->end < end && 189 (entry->next == &map->header || 190 entry->next->start > entry->end)) { 191 error = ENOMEM; 192 goto out; 193 } 194 195 lim = end < entry->end ? end : entry->end; 196 197 /* 198 * Special case for objects with no "real" pages. Those 199 * are always considered resident (mapped devices). 200 */ 201 202 if (UVM_ET_ISOBJ(entry)) { 203 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); 204 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) { 205 for (/* nothing */; start < lim; 206 start += PAGE_SIZE, vec++) 207 subyte(vec, 1); 208 continue; 209 } 210 } 211 212 amap = entry->aref.ar_amap; /* upper layer */ 213 uobj = entry->object.uvm_obj; /* lower layer */ 214 215 if (amap != NULL) 216 amap_lock(amap); 217 if (uobj != NULL) 218 mutex_enter(uobj->vmobjlock); 219 220 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { 221 pgi = 0; 222 if (amap != NULL) { 223 /* Check the upper layer first. */ 224 anon = amap_lookup(&entry->aref, 225 start - entry->start); 226 /* Don't need to lock anon here. */ 227 if (anon != NULL && anon->an_page != NULL) { 228 229 /* 230 * Anon has the page for this entry 231 * offset. 232 */ 233 234 pgi = 1; 235 } 236 } 237 if (uobj != NULL && pgi == 0) { 238 /* Check the lower layer. */ 239 pg = uvm_pagelookup(uobj, 240 entry->offset + (start - entry->start)); 241 if (pg != NULL) { 242 243 /* 244 * Object has the page for this entry 245 * offset. 246 */ 247 248 pgi = 1; 249 } 250 } 251 (void) subyte(vec, pgi); 252 } 253 if (uobj != NULL) 254 mutex_exit(uobj->vmobjlock); 255 if (amap != NULL) 256 amap_unlock(amap); 257 } 258 259 out: 260 vm_map_unlock_read(map); 261 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs); 262 return error; 263 } 264 265 /* 266 * sys_mmap: mmap system call. 267 * 268 * => file offset and address may not be page aligned 269 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE 270 * - if address isn't page aligned the mapping starts at trunc_page(addr) 271 * and the return value is adjusted up by the page offset. 272 */ 273 274 int 275 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval) 276 { 277 /* { 278 syscallarg(void *) addr; 279 syscallarg(size_t) len; 280 syscallarg(int) prot; 281 syscallarg(int) flags; 282 syscallarg(int) fd; 283 syscallarg(long) pad; 284 syscallarg(off_t) pos; 285 } */ 286 struct proc *p = l->l_proc; 287 vaddr_t addr; 288 off_t pos; 289 vsize_t size, pageoff, newsize; 290 vm_prot_t prot, maxprot, extraprot; 291 int flags, fd, advice; 292 vaddr_t defaddr; 293 struct file *fp = NULL; 294 struct uvm_object *uobj; 295 int error; 296 #ifdef PAX_ASLR 297 vaddr_t orig_addr; 298 #endif /* PAX_ASLR */ 299 300 /* 301 * first, extract syscall args from the uap. 302 */ 303 304 addr = (vaddr_t)SCARG(uap, addr); 305 size = (vsize_t)SCARG(uap, len); 306 prot = SCARG(uap, prot) & VM_PROT_ALL; 307 extraprot = PROT_MPROTECT_EXTRACT(SCARG(uap, prot)); 308 flags = SCARG(uap, flags); 309 fd = SCARG(uap, fd); 310 pos = SCARG(uap, pos); 311 312 #ifdef PAX_ASLR 313 orig_addr = addr; 314 #endif /* PAX_ASLR */ 315 316 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE)) 317 return EINVAL; 318 319 /* 320 * align file position and save offset. adjust size. 321 */ 322 323 pageoff = (pos & PAGE_MASK); 324 pos -= pageoff; 325 newsize = size + pageoff; /* add offset */ 326 newsize = (vsize_t)round_page(newsize); /* round up */ 327 328 if (newsize < size) 329 return ENOMEM; 330 size = newsize; 331 332 /* 333 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 334 */ 335 if (flags & MAP_FIXED) { 336 /* ensure address and file offset are aligned properly */ 337 addr -= pageoff; 338 if (addr & PAGE_MASK) 339 return EINVAL; 340 341 error = range_test(&p->p_vmspace->vm_map, addr, size, true); 342 if (error) { 343 return error; 344 } 345 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) { 346 /* 347 * not fixed: make sure we skip over the largest 348 * possible heap for non-topdown mapping arrangements. 349 * we will refine our guess later (e.g. to account for 350 * VAC, etc) 351 */ 352 353 defaddr = p->p_emul->e_vm_default_addr(p, 354 (vaddr_t)p->p_vmspace->vm_daddr, size, 355 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 356 357 if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN)) 358 addr = MAX(addr, defaddr); 359 else 360 addr = MIN(addr, defaddr); 361 } 362 363 /* 364 * check for file mappings (i.e. not anonymous) and verify file. 365 */ 366 367 advice = UVM_ADV_NORMAL; 368 if ((flags & MAP_ANON) == 0) { 369 if ((fp = fd_getfile(fd)) == NULL) 370 return EBADF; 371 372 if (fp->f_ops->fo_mmap == NULL) { 373 error = ENODEV; 374 goto out; 375 } 376 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags, 377 &advice, &uobj, &maxprot); 378 if (error) { 379 goto out; 380 } 381 if (uobj == NULL) { 382 flags |= MAP_ANON; 383 fd_putfile(fd); 384 fp = NULL; 385 goto is_anon; 386 } 387 } else { /* MAP_ANON case */ 388 /* 389 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0? 390 */ 391 if (fd != -1) 392 return EINVAL; 393 394 is_anon: /* label for SunOS style /dev/zero */ 395 uobj = NULL; 396 maxprot = VM_PROT_ALL; 397 pos = 0; 398 } 399 400 maxprot = PAX_MPROTECT_MAXPROTECT(l, prot, extraprot, maxprot); 401 if (((prot | extraprot) & maxprot) != (prot | extraprot)) { 402 error = EACCES; 403 goto out; 404 } 405 if ((error = PAX_MPROTECT_VALIDATE(l, prot))) 406 goto out; 407 408 pax_aslr_mmap(l, &addr, orig_addr, flags); 409 410 /* 411 * now let kernel internal function uvm_mmap do the work. 412 */ 413 414 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 415 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 416 417 /* remember to add offset */ 418 *retval = (register_t)(addr + pageoff); 419 420 out: 421 if (fp != NULL) 422 fd_putfile(fd); 423 424 return error; 425 } 426 427 /* 428 * sys___msync13: the msync system call (a front-end for flush) 429 */ 430 431 int 432 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap, 433 register_t *retval) 434 { 435 /* { 436 syscallarg(void *) addr; 437 syscallarg(size_t) len; 438 syscallarg(int) flags; 439 } */ 440 struct proc *p = l->l_proc; 441 vaddr_t addr; 442 vsize_t size, pageoff; 443 struct vm_map *map; 444 int error, flags, uvmflags; 445 bool rv; 446 447 /* 448 * extract syscall args from the uap 449 */ 450 451 addr = (vaddr_t)SCARG(uap, addr); 452 size = (vsize_t)SCARG(uap, len); 453 flags = SCARG(uap, flags); 454 455 /* sanity check flags */ 456 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || 457 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || 458 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) 459 return EINVAL; 460 if ((flags & (MS_ASYNC | MS_SYNC)) == 0) 461 flags |= MS_SYNC; 462 463 /* 464 * align the address to a page boundary and adjust the size accordingly. 465 */ 466 467 pageoff = (addr & PAGE_MASK); 468 addr -= pageoff; 469 size += pageoff; 470 size = (vsize_t)round_page(size); 471 472 473 /* 474 * get map 475 */ 476 map = &p->p_vmspace->vm_map; 477 478 error = range_test(map, addr, size, false); 479 if (error) 480 return ENOMEM; 481 482 /* 483 * XXXCDC: do we really need this semantic? 484 * 485 * XXX Gak! If size is zero we are supposed to sync "all modified 486 * pages with the region containing addr". Unfortunately, we 487 * don't really keep track of individual mmaps so we approximate 488 * by flushing the range of the map entry containing addr. 489 * This can be incorrect if the region splits or is coalesced 490 * with a neighbor. 491 */ 492 493 if (size == 0) { 494 struct vm_map_entry *entry; 495 496 vm_map_lock_read(map); 497 rv = uvm_map_lookup_entry(map, addr, &entry); 498 if (rv == true) { 499 addr = entry->start; 500 size = entry->end - entry->start; 501 } 502 vm_map_unlock_read(map); 503 if (rv == false) 504 return EINVAL; 505 } 506 507 /* 508 * translate MS_ flags into PGO_ flags 509 */ 510 511 uvmflags = PGO_CLEANIT; 512 if (flags & MS_INVALIDATE) 513 uvmflags |= PGO_FREE; 514 if (flags & MS_SYNC) 515 uvmflags |= PGO_SYNCIO; 516 517 error = uvm_map_clean(map, addr, addr+size, uvmflags); 518 return error; 519 } 520 521 /* 522 * sys_munmap: unmap a users memory 523 */ 524 525 int 526 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval) 527 { 528 /* { 529 syscallarg(void *) addr; 530 syscallarg(size_t) len; 531 } */ 532 struct proc *p = l->l_proc; 533 vaddr_t addr; 534 vsize_t size, pageoff; 535 struct vm_map *map; 536 struct vm_map_entry *dead_entries; 537 int error; 538 539 /* 540 * get syscall args. 541 */ 542 543 addr = (vaddr_t)SCARG(uap, addr); 544 size = (vsize_t)SCARG(uap, len); 545 546 /* 547 * align the address to a page boundary and adjust the size accordingly. 548 */ 549 550 pageoff = (addr & PAGE_MASK); 551 addr -= pageoff; 552 size += pageoff; 553 size = (vsize_t)round_page(size); 554 555 if (size == 0) 556 return 0; 557 558 map = &p->p_vmspace->vm_map; 559 560 error = range_test(map, addr, size, false); 561 if (error) 562 return EINVAL; 563 564 vm_map_lock(map); 565 #if 0 566 /* 567 * interesting system call semantic: make sure entire range is 568 * allocated before allowing an unmap. 569 */ 570 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) { 571 vm_map_unlock(map); 572 return EINVAL; 573 } 574 #endif 575 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0); 576 vm_map_unlock(map); 577 if (dead_entries != NULL) 578 uvm_unmap_detach(dead_entries, 0); 579 return 0; 580 } 581 582 /* 583 * sys_mprotect: the mprotect system call 584 */ 585 586 int 587 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap, 588 register_t *retval) 589 { 590 /* { 591 syscallarg(void *) addr; 592 syscallarg(size_t) len; 593 syscallarg(int) prot; 594 } */ 595 struct proc *p = l->l_proc; 596 vaddr_t addr; 597 vsize_t size, pageoff; 598 vm_prot_t prot; 599 int error; 600 601 /* 602 * extract syscall args from uap 603 */ 604 605 addr = (vaddr_t)SCARG(uap, addr); 606 size = (vsize_t)SCARG(uap, len); 607 prot = SCARG(uap, prot) & VM_PROT_ALL; 608 609 /* 610 * align the address to a page boundary and adjust the size accordingly. 611 */ 612 613 pageoff = (addr & PAGE_MASK); 614 addr -= pageoff; 615 size += pageoff; 616 size = round_page(size); 617 618 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 619 if (error) 620 return EINVAL; 621 622 error = uvm_map_protect_user(l, addr, addr + size, prot); 623 return error; 624 } 625 626 /* 627 * sys_minherit: the minherit system call 628 */ 629 630 int 631 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap, 632 register_t *retval) 633 { 634 /* { 635 syscallarg(void *) addr; 636 syscallarg(int) len; 637 syscallarg(int) inherit; 638 } */ 639 struct proc *p = l->l_proc; 640 vaddr_t addr; 641 vsize_t size, pageoff; 642 vm_inherit_t inherit; 643 int error; 644 645 addr = (vaddr_t)SCARG(uap, addr); 646 size = (vsize_t)SCARG(uap, len); 647 inherit = SCARG(uap, inherit); 648 649 /* 650 * align the address to a page boundary and adjust the size accordingly. 651 */ 652 653 pageoff = (addr & PAGE_MASK); 654 addr -= pageoff; 655 size += pageoff; 656 size = (vsize_t)round_page(size); 657 658 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 659 if (error) 660 return EINVAL; 661 662 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size, 663 inherit); 664 return error; 665 } 666 667 /* 668 * sys_madvise: give advice about memory usage. 669 */ 670 671 /* ARGSUSED */ 672 int 673 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap, 674 register_t *retval) 675 { 676 /* { 677 syscallarg(void *) addr; 678 syscallarg(size_t) len; 679 syscallarg(int) behav; 680 } */ 681 struct proc *p = l->l_proc; 682 vaddr_t addr; 683 vsize_t size, pageoff; 684 int advice, error; 685 686 addr = (vaddr_t)SCARG(uap, addr); 687 size = (vsize_t)SCARG(uap, len); 688 advice = SCARG(uap, behav); 689 690 /* 691 * align the address to a page boundary, and adjust the size accordingly 692 */ 693 694 pageoff = (addr & PAGE_MASK); 695 addr -= pageoff; 696 size += pageoff; 697 size = (vsize_t)round_page(size); 698 699 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 700 if (error) 701 return EINVAL; 702 703 switch (advice) { 704 case MADV_NORMAL: 705 case MADV_RANDOM: 706 case MADV_SEQUENTIAL: 707 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size, 708 advice); 709 break; 710 711 case MADV_WILLNEED: 712 713 /* 714 * Activate all these pages, pre-faulting them in if 715 * necessary. 716 */ 717 error = uvm_map_willneed(&p->p_vmspace->vm_map, 718 addr, addr + size); 719 break; 720 721 case MADV_DONTNEED: 722 723 /* 724 * Deactivate all these pages. We don't need them 725 * any more. We don't, however, toss the data in 726 * the pages. 727 */ 728 729 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 730 PGO_DEACTIVATE); 731 break; 732 733 case MADV_FREE: 734 735 /* 736 * These pages contain no valid data, and may be 737 * garbage-collected. Toss all resources, including 738 * any swap space in use. 739 */ 740 741 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 742 PGO_FREE); 743 break; 744 745 case MADV_SPACEAVAIL: 746 747 /* 748 * XXXMRG What is this? I think it's: 749 * 750 * Ensure that we have allocated backing-store 751 * for these pages. 752 * 753 * This is going to require changes to the page daemon, 754 * as it will free swap space allocated to pages in core. 755 * There's also what to do for device/file/anonymous memory. 756 */ 757 758 return EINVAL; 759 760 default: 761 return EINVAL; 762 } 763 764 return error; 765 } 766 767 /* 768 * sys_mlock: memory lock 769 */ 770 771 int 772 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval) 773 { 774 /* { 775 syscallarg(const void *) addr; 776 syscallarg(size_t) len; 777 } */ 778 struct proc *p = l->l_proc; 779 vaddr_t addr; 780 vsize_t size, pageoff; 781 int error; 782 783 /* 784 * extract syscall args from uap 785 */ 786 787 addr = (vaddr_t)SCARG(uap, addr); 788 size = (vsize_t)SCARG(uap, len); 789 790 /* 791 * align the address to a page boundary and adjust the size accordingly 792 */ 793 794 pageoff = (addr & PAGE_MASK); 795 addr -= pageoff; 796 size += pageoff; 797 size = (vsize_t)round_page(size); 798 799 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 800 if (error) 801 return ENOMEM; 802 803 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) 804 return EAGAIN; 805 806 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 807 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 808 return EAGAIN; 809 810 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false, 811 0); 812 if (error == EFAULT) 813 error = ENOMEM; 814 return error; 815 } 816 817 /* 818 * sys_munlock: unlock wired pages 819 */ 820 821 int 822 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap, 823 register_t *retval) 824 { 825 /* { 826 syscallarg(const void *) addr; 827 syscallarg(size_t) len; 828 } */ 829 struct proc *p = l->l_proc; 830 vaddr_t addr; 831 vsize_t size, pageoff; 832 int error; 833 834 /* 835 * extract syscall args from uap 836 */ 837 838 addr = (vaddr_t)SCARG(uap, addr); 839 size = (vsize_t)SCARG(uap, len); 840 841 /* 842 * align the address to a page boundary, and adjust the size accordingly 843 */ 844 845 pageoff = (addr & PAGE_MASK); 846 addr -= pageoff; 847 size += pageoff; 848 size = (vsize_t)round_page(size); 849 850 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 851 if (error) 852 return ENOMEM; 853 854 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 855 0); 856 if (error) 857 return ENOMEM; 858 859 return 0; 860 } 861 862 /* 863 * sys_mlockall: lock all pages mapped into an address space. 864 */ 865 866 int 867 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap, 868 register_t *retval) 869 { 870 /* { 871 syscallarg(int) flags; 872 } */ 873 struct proc *p = l->l_proc; 874 int error, flags; 875 876 flags = SCARG(uap, flags); 877 878 if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0) 879 return EINVAL; 880 881 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, 882 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 883 return error; 884 } 885 886 /* 887 * sys_munlockall: unlock all pages mapped into an address space. 888 */ 889 890 int 891 sys_munlockall(struct lwp *l, const void *v, register_t *retval) 892 { 893 struct proc *p = l->l_proc; 894 895 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0); 896 return 0; 897 } 898 899 /* 900 * uvm_mmap: internal version of mmap 901 * 902 * - used by sys_mmap and various framebuffers 903 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON 904 * - caller must page-align the file offset 905 */ 906 907 int 908 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 909 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj, 910 voff_t foff, vsize_t locklimit) 911 { 912 vaddr_t align = 0; 913 int error; 914 uvm_flag_t uvmflag = 0; 915 916 /* 917 * check params 918 */ 919 920 if (size == 0) 921 return 0; 922 if (foff & PAGE_MASK) 923 return EINVAL; 924 if ((prot & maxprot) != prot) 925 return EINVAL; 926 927 /* 928 * for non-fixed mappings, round off the suggested address. 929 * for fixed mappings, check alignment. 930 */ 931 932 if ((flags & MAP_FIXED) == 0) { 933 *addr = round_page(*addr); 934 } else { 935 if (*addr & PAGE_MASK) 936 return EINVAL; 937 uvmflag |= UVM_FLAG_FIXED | UVM_FLAG_UNMAP; 938 } 939 940 /* 941 * Try to see if any requested alignment can even be attemped. 942 * Make sure we can express the alignment (asking for a >= 4GB 943 * alignment on an ILP32 architecure make no sense) and the 944 * alignment is at least for a page sized quanitiy. If the 945 * request was for a fixed mapping, make sure supplied address 946 * adheres to the request alignment. 947 */ 948 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT; 949 if (align) { 950 if (align >= sizeof(vaddr_t) * NBBY) 951 return EINVAL; 952 align = 1L << align; 953 if (align < PAGE_SIZE) 954 return EINVAL; 955 if (align >= vm_map_max(map)) 956 return ENOMEM; 957 if (flags & MAP_FIXED) { 958 if ((*addr & (align-1)) != 0) 959 return EINVAL; 960 align = 0; 961 } 962 } 963 964 /* 965 * check resource limits 966 */ 967 968 if (!VM_MAP_IS_KERNEL(map) && 969 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) > 970 curproc->p_rlimit[RLIMIT_AS].rlim_cur)) 971 return ENOMEM; 972 973 /* 974 * handle anon vs. non-anon mappings. for non-anon mappings attach 975 * to underlying vm object. 976 */ 977 978 if (flags & MAP_ANON) { 979 KASSERT(uobj == NULL); 980 foff = UVM_UNKNOWN_OFFSET; 981 if ((flags & MAP_SHARED) == 0) 982 /* XXX: defer amap create */ 983 uvmflag |= UVM_FLAG_COPYONW; 984 else 985 /* shared: create amap now */ 986 uvmflag |= UVM_FLAG_OVERLAY; 987 988 } else { 989 KASSERT(uobj != NULL); 990 if ((flags & MAP_SHARED) == 0) { 991 uvmflag |= UVM_FLAG_COPYONW; 992 } 993 } 994 995 uvmflag = UVM_MAPFLAG(prot, maxprot, 996 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice, 997 uvmflag); 998 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag); 999 if (error) { 1000 if (uobj) 1001 uobj->pgops->pgo_detach(uobj); 1002 return error; 1003 } 1004 1005 /* 1006 * POSIX 1003.1b -- if our address space was configured 1007 * to lock all future mappings, wire the one we just made. 1008 * 1009 * Also handle the MAP_WIRED flag here. 1010 */ 1011 1012 if (prot == VM_PROT_NONE) { 1013 1014 /* 1015 * No more work to do in this case. 1016 */ 1017 1018 return 0; 1019 } 1020 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) { 1021 vm_map_lock(map); 1022 if (atop(size) + uvmexp.wired > uvmexp.wiredmax || 1023 (locklimit != 0 && 1024 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > 1025 locklimit)) { 1026 vm_map_unlock(map); 1027 uvm_unmap(map, *addr, *addr + size); 1028 return ENOMEM; 1029 } 1030 1031 /* 1032 * uvm_map_pageable() always returns the map unlocked. 1033 */ 1034 1035 error = uvm_map_pageable(map, *addr, *addr + size, 1036 false, UVM_LK_ENTER); 1037 if (error) { 1038 uvm_unmap(map, *addr, *addr + size); 1039 return error; 1040 } 1041 return 0; 1042 } 1043 return 0; 1044 } 1045 1046 vaddr_t 1047 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown) 1048 { 1049 1050 if (topdown) 1051 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz); 1052 else 1053 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz); 1054 } 1055 1056 int 1057 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev, 1058 off_t off) 1059 { 1060 struct uvm_object *uobj; 1061 int error, flags, prot; 1062 1063 flags = MAP_SHARED; 1064 prot = VM_PROT_READ | VM_PROT_WRITE; 1065 if (*addrp) 1066 flags |= MAP_FIXED; 1067 else 1068 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1069 (vaddr_t)p->p_vmspace->vm_daddr, len, 1070 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1071 1072 uobj = udv_attach(dev, prot, off, len); 1073 if (uobj == NULL) 1074 return EINVAL; 1075 1076 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1077 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off, 1078 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1079 return error; 1080 } 1081 1082 int 1083 uvm_mmap_anon(struct proc *p, void **addrp, size_t len) 1084 { 1085 int error, flags, prot; 1086 1087 flags = MAP_PRIVATE | MAP_ANON; 1088 prot = VM_PROT_READ | VM_PROT_WRITE; 1089 if (*addrp) 1090 flags |= MAP_FIXED; 1091 else 1092 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1093 (vaddr_t)p->p_vmspace->vm_daddr, len, 1094 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1095 1096 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1097 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0, 1098 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1099 return error; 1100 } 1101