1 /* $NetBSD: uvm_mmap.c,v 1.169 2017/12/19 18:34:47 kamil Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993 The Regents of the University of California. 6 * Copyright (c) 1988 University of Utah. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * the Systems Programming Group of the University of Utah Computer 12 * Science Department. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94 40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp 41 */ 42 43 /* 44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap 45 * function. 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.169 2017/12/19 18:34:47 kamil Exp $"); 50 51 #include "opt_compat_netbsd.h" 52 #include "opt_pax.h" 53 54 #include <sys/types.h> 55 #include <sys/file.h> 56 #include <sys/filedesc.h> 57 #include <sys/resourcevar.h> 58 #include <sys/mman.h> 59 #include <sys/pax.h> 60 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm.h> 64 #include <uvm/uvm_device.h> 65 66 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, 67 int, int, struct uvm_object *, voff_t, vsize_t); 68 69 static int 70 range_test(struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap) 71 { 72 vaddr_t vm_min_address = vm_map_min(map); 73 vaddr_t vm_max_address = vm_map_max(map); 74 vaddr_t eaddr = addr + size; 75 int res = 0; 76 77 if (addr < vm_min_address) 78 return EINVAL; 79 if (eaddr > vm_max_address) 80 return ismmap ? EFBIG : EINVAL; 81 if (addr > eaddr) /* no wrapping! */ 82 return ismmap ? EOVERFLOW : EINVAL; 83 84 #ifdef MD_MMAP_RANGE_TEST 85 res = MD_MMAP_RANGE_TEST(addr, eaddr); 86 #endif 87 88 return res; 89 } 90 91 /* 92 * sys_mincore: determine if pages are in core or not. 93 */ 94 95 /* ARGSUSED */ 96 int 97 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, 98 register_t *retval) 99 { 100 /* { 101 syscallarg(void *) addr; 102 syscallarg(size_t) len; 103 syscallarg(char *) vec; 104 } */ 105 struct proc *p = l->l_proc; 106 struct vm_page *pg; 107 char *vec, pgi; 108 struct uvm_object *uobj; 109 struct vm_amap *amap; 110 struct vm_anon *anon; 111 struct vm_map_entry *entry; 112 vaddr_t start, end, lim; 113 struct vm_map *map; 114 vsize_t len; 115 int error = 0, npgs; 116 117 map = &p->p_vmspace->vm_map; 118 119 start = (vaddr_t)SCARG(uap, addr); 120 len = SCARG(uap, len); 121 vec = SCARG(uap, vec); 122 123 if (start & PAGE_MASK) 124 return EINVAL; 125 len = round_page(len); 126 end = start + len; 127 if (end <= start) 128 return EINVAL; 129 130 /* 131 * Lock down vec, so our returned status isn't outdated by 132 * storing the status byte for a page. 133 */ 134 135 npgs = len >> PAGE_SHIFT; 136 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE); 137 if (error) { 138 return error; 139 } 140 vm_map_lock_read(map); 141 142 if (uvm_map_lookup_entry(map, start, &entry) == false) { 143 error = ENOMEM; 144 goto out; 145 } 146 147 for (/* nothing */; 148 entry != &map->header && entry->start < end; 149 entry = entry->next) { 150 KASSERT(!UVM_ET_ISSUBMAP(entry)); 151 KASSERT(start >= entry->start); 152 153 /* Make sure there are no holes. */ 154 if (entry->end < end && 155 (entry->next == &map->header || 156 entry->next->start > entry->end)) { 157 error = ENOMEM; 158 goto out; 159 } 160 161 lim = end < entry->end ? end : entry->end; 162 163 /* 164 * Special case for objects with no "real" pages. Those 165 * are always considered resident (mapped devices). 166 */ 167 168 if (UVM_ET_ISOBJ(entry)) { 169 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); 170 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) { 171 for (/* nothing */; start < lim; 172 start += PAGE_SIZE, vec++) 173 subyte(vec, 1); 174 continue; 175 } 176 } 177 178 amap = entry->aref.ar_amap; /* upper layer */ 179 uobj = entry->object.uvm_obj; /* lower layer */ 180 181 if (amap != NULL) 182 amap_lock(amap); 183 if (uobj != NULL) 184 mutex_enter(uobj->vmobjlock); 185 186 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { 187 pgi = 0; 188 if (amap != NULL) { 189 /* Check the upper layer first. */ 190 anon = amap_lookup(&entry->aref, 191 start - entry->start); 192 /* Don't need to lock anon here. */ 193 if (anon != NULL && anon->an_page != NULL) { 194 195 /* 196 * Anon has the page for this entry 197 * offset. 198 */ 199 200 pgi = 1; 201 } 202 } 203 if (uobj != NULL && pgi == 0) { 204 /* Check the lower layer. */ 205 pg = uvm_pagelookup(uobj, 206 entry->offset + (start - entry->start)); 207 if (pg != NULL) { 208 209 /* 210 * Object has the page for this entry 211 * offset. 212 */ 213 214 pgi = 1; 215 } 216 } 217 (void) subyte(vec, pgi); 218 } 219 if (uobj != NULL) 220 mutex_exit(uobj->vmobjlock); 221 if (amap != NULL) 222 amap_unlock(amap); 223 } 224 225 out: 226 vm_map_unlock_read(map); 227 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs); 228 return error; 229 } 230 231 /* 232 * sys_mmap: mmap system call. 233 * 234 * => file offset and address may not be page aligned 235 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE 236 * - if address isn't page aligned the mapping starts at trunc_page(addr) 237 * and the return value is adjusted up by the page offset. 238 */ 239 240 int 241 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval) 242 { 243 /* { 244 syscallarg(void *) addr; 245 syscallarg(size_t) len; 246 syscallarg(int) prot; 247 syscallarg(int) flags; 248 syscallarg(int) fd; 249 syscallarg(long) pad; 250 syscallarg(off_t) pos; 251 } */ 252 struct proc *p = l->l_proc; 253 vaddr_t addr; 254 off_t pos; 255 vsize_t size, pageoff, newsize; 256 vm_prot_t prot, maxprot, extraprot; 257 int flags, fd, advice; 258 vaddr_t defaddr; 259 struct file *fp = NULL; 260 struct uvm_object *uobj; 261 int error; 262 #ifdef PAX_ASLR 263 vaddr_t orig_addr; 264 #endif /* PAX_ASLR */ 265 266 /* 267 * first, extract syscall args from the uap. 268 */ 269 270 addr = (vaddr_t)SCARG(uap, addr); 271 size = (vsize_t)SCARG(uap, len); 272 prot = SCARG(uap, prot) & VM_PROT_ALL; 273 extraprot = PROT_MPROTECT_EXTRACT(SCARG(uap, prot)); 274 flags = SCARG(uap, flags); 275 fd = SCARG(uap, fd); 276 pos = SCARG(uap, pos); 277 278 #ifdef PAX_ASLR 279 orig_addr = addr; 280 #endif /* PAX_ASLR */ 281 282 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE)) 283 return EINVAL; 284 285 /* 286 * align file position and save offset. adjust size. 287 */ 288 289 pageoff = (pos & PAGE_MASK); 290 pos -= pageoff; 291 newsize = size + pageoff; /* add offset */ 292 newsize = (vsize_t)round_page(newsize); /* round up */ 293 294 if (newsize < size) 295 return ENOMEM; 296 size = newsize; 297 298 /* 299 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 300 */ 301 if (flags & MAP_FIXED) { 302 /* ensure address and file offset are aligned properly */ 303 addr -= pageoff; 304 if (addr & PAGE_MASK) 305 return EINVAL; 306 307 error = range_test(&p->p_vmspace->vm_map, addr, size, true); 308 if (error) { 309 return error; 310 } 311 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) { 312 /* 313 * not fixed: make sure we skip over the largest 314 * possible heap for non-topdown mapping arrangements. 315 * we will refine our guess later (e.g. to account for 316 * VAC, etc) 317 */ 318 319 defaddr = p->p_emul->e_vm_default_addr(p, 320 (vaddr_t)p->p_vmspace->vm_daddr, size, 321 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 322 323 if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN)) 324 addr = MAX(addr, defaddr); 325 else 326 addr = MIN(addr, defaddr); 327 } 328 329 /* 330 * check for file mappings (i.e. not anonymous) and verify file. 331 */ 332 333 advice = UVM_ADV_NORMAL; 334 if ((flags & MAP_ANON) == 0) { 335 if ((fp = fd_getfile(fd)) == NULL) 336 return EBADF; 337 338 if (fp->f_ops->fo_mmap == NULL) { 339 error = ENODEV; 340 goto out; 341 } 342 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags, 343 &advice, &uobj, &maxprot); 344 if (error) { 345 goto out; 346 } 347 if (uobj == NULL) { 348 flags |= MAP_ANON; 349 fd_putfile(fd); 350 fp = NULL; 351 goto is_anon; 352 } 353 } else { /* MAP_ANON case */ 354 /* 355 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0? 356 */ 357 if (fd != -1) 358 return EINVAL; 359 360 is_anon: /* label for SunOS style /dev/zero */ 361 uobj = NULL; 362 maxprot = VM_PROT_ALL; 363 pos = 0; 364 } 365 366 maxprot = PAX_MPROTECT_MAXPROTECT(l, prot, extraprot, maxprot); 367 if (((prot | extraprot) & maxprot) != (prot | extraprot)) { 368 error = EACCES; 369 goto out; 370 } 371 if ((error = PAX_MPROTECT_VALIDATE(l, prot))) 372 goto out; 373 374 pax_aslr_mmap(l, &addr, orig_addr, flags); 375 376 /* 377 * now let kernel internal function uvm_mmap do the work. 378 */ 379 380 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 381 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 382 383 /* remember to add offset */ 384 *retval = (register_t)(addr + pageoff); 385 386 out: 387 if (fp != NULL) 388 fd_putfile(fd); 389 390 return error; 391 } 392 393 /* 394 * sys___msync13: the msync system call (a front-end for flush) 395 */ 396 397 int 398 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap, 399 register_t *retval) 400 { 401 /* { 402 syscallarg(void *) addr; 403 syscallarg(size_t) len; 404 syscallarg(int) flags; 405 } */ 406 struct proc *p = l->l_proc; 407 vaddr_t addr; 408 vsize_t size, pageoff; 409 struct vm_map *map; 410 int error, flags, uvmflags; 411 bool rv; 412 413 /* 414 * extract syscall args from the uap 415 */ 416 417 addr = (vaddr_t)SCARG(uap, addr); 418 size = (vsize_t)SCARG(uap, len); 419 flags = SCARG(uap, flags); 420 421 /* sanity check flags */ 422 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || 423 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || 424 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) 425 return EINVAL; 426 if ((flags & (MS_ASYNC | MS_SYNC)) == 0) 427 flags |= MS_SYNC; 428 429 /* 430 * align the address to a page boundary and adjust the size accordingly. 431 */ 432 433 pageoff = (addr & PAGE_MASK); 434 addr -= pageoff; 435 size += pageoff; 436 size = (vsize_t)round_page(size); 437 438 439 /* 440 * get map 441 */ 442 map = &p->p_vmspace->vm_map; 443 444 error = range_test(map, addr, size, false); 445 if (error) 446 return ENOMEM; 447 448 /* 449 * XXXCDC: do we really need this semantic? 450 * 451 * XXX Gak! If size is zero we are supposed to sync "all modified 452 * pages with the region containing addr". Unfortunately, we 453 * don't really keep track of individual mmaps so we approximate 454 * by flushing the range of the map entry containing addr. 455 * This can be incorrect if the region splits or is coalesced 456 * with a neighbor. 457 */ 458 459 if (size == 0) { 460 struct vm_map_entry *entry; 461 462 vm_map_lock_read(map); 463 rv = uvm_map_lookup_entry(map, addr, &entry); 464 if (rv == true) { 465 addr = entry->start; 466 size = entry->end - entry->start; 467 } 468 vm_map_unlock_read(map); 469 if (rv == false) 470 return EINVAL; 471 } 472 473 /* 474 * translate MS_ flags into PGO_ flags 475 */ 476 477 uvmflags = PGO_CLEANIT; 478 if (flags & MS_INVALIDATE) 479 uvmflags |= PGO_FREE; 480 if (flags & MS_SYNC) 481 uvmflags |= PGO_SYNCIO; 482 483 error = uvm_map_clean(map, addr, addr+size, uvmflags); 484 return error; 485 } 486 487 /* 488 * sys_munmap: unmap a users memory 489 */ 490 491 int 492 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval) 493 { 494 /* { 495 syscallarg(void *) addr; 496 syscallarg(size_t) len; 497 } */ 498 struct proc *p = l->l_proc; 499 vaddr_t addr; 500 vsize_t size, pageoff; 501 struct vm_map *map; 502 struct vm_map_entry *dead_entries; 503 int error; 504 505 /* 506 * get syscall args. 507 */ 508 509 addr = (vaddr_t)SCARG(uap, addr); 510 size = (vsize_t)SCARG(uap, len); 511 512 /* 513 * align the address to a page boundary and adjust the size accordingly. 514 */ 515 516 pageoff = (addr & PAGE_MASK); 517 addr -= pageoff; 518 size += pageoff; 519 size = (vsize_t)round_page(size); 520 521 if (size == 0) 522 return 0; 523 524 map = &p->p_vmspace->vm_map; 525 526 error = range_test(map, addr, size, false); 527 if (error) 528 return EINVAL; 529 530 vm_map_lock(map); 531 #if 0 532 /* 533 * interesting system call semantic: make sure entire range is 534 * allocated before allowing an unmap. 535 */ 536 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) { 537 vm_map_unlock(map); 538 return EINVAL; 539 } 540 #endif 541 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0); 542 vm_map_unlock(map); 543 if (dead_entries != NULL) 544 uvm_unmap_detach(dead_entries, 0); 545 return 0; 546 } 547 548 /* 549 * sys_mprotect: the mprotect system call 550 */ 551 552 int 553 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap, 554 register_t *retval) 555 { 556 /* { 557 syscallarg(void *) addr; 558 syscallarg(size_t) len; 559 syscallarg(int) prot; 560 } */ 561 struct proc *p = l->l_proc; 562 vaddr_t addr; 563 vsize_t size, pageoff; 564 vm_prot_t prot; 565 int error; 566 567 /* 568 * extract syscall args from uap 569 */ 570 571 addr = (vaddr_t)SCARG(uap, addr); 572 size = (vsize_t)SCARG(uap, len); 573 prot = SCARG(uap, prot) & VM_PROT_ALL; 574 575 /* 576 * align the address to a page boundary and adjust the size accordingly. 577 */ 578 579 pageoff = (addr & PAGE_MASK); 580 addr -= pageoff; 581 size += pageoff; 582 size = round_page(size); 583 584 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 585 if (error) 586 return EINVAL; 587 588 error = uvm_map_protect_user(l, addr, addr + size, prot); 589 return error; 590 } 591 592 /* 593 * sys_minherit: the minherit system call 594 */ 595 596 int 597 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap, 598 register_t *retval) 599 { 600 /* { 601 syscallarg(void *) addr; 602 syscallarg(int) len; 603 syscallarg(int) inherit; 604 } */ 605 struct proc *p = l->l_proc; 606 vaddr_t addr; 607 vsize_t size, pageoff; 608 vm_inherit_t inherit; 609 int error; 610 611 addr = (vaddr_t)SCARG(uap, addr); 612 size = (vsize_t)SCARG(uap, len); 613 inherit = SCARG(uap, inherit); 614 615 /* 616 * align the address to a page boundary and adjust the size accordingly. 617 */ 618 619 pageoff = (addr & PAGE_MASK); 620 addr -= pageoff; 621 size += pageoff; 622 size = (vsize_t)round_page(size); 623 624 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 625 if (error) 626 return EINVAL; 627 628 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size, 629 inherit); 630 return error; 631 } 632 633 /* 634 * sys_madvise: give advice about memory usage. 635 */ 636 637 /* ARGSUSED */ 638 int 639 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap, 640 register_t *retval) 641 { 642 /* { 643 syscallarg(void *) addr; 644 syscallarg(size_t) len; 645 syscallarg(int) behav; 646 } */ 647 struct proc *p = l->l_proc; 648 vaddr_t addr; 649 vsize_t size, pageoff; 650 int advice, error; 651 652 addr = (vaddr_t)SCARG(uap, addr); 653 size = (vsize_t)SCARG(uap, len); 654 advice = SCARG(uap, behav); 655 656 /* 657 * align the address to a page boundary, and adjust the size accordingly 658 */ 659 660 pageoff = (addr & PAGE_MASK); 661 addr -= pageoff; 662 size += pageoff; 663 size = (vsize_t)round_page(size); 664 665 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 666 if (error) 667 return EINVAL; 668 669 switch (advice) { 670 case MADV_NORMAL: 671 case MADV_RANDOM: 672 case MADV_SEQUENTIAL: 673 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size, 674 advice); 675 break; 676 677 case MADV_WILLNEED: 678 679 /* 680 * Activate all these pages, pre-faulting them in if 681 * necessary. 682 */ 683 error = uvm_map_willneed(&p->p_vmspace->vm_map, 684 addr, addr + size); 685 break; 686 687 case MADV_DONTNEED: 688 689 /* 690 * Deactivate all these pages. We don't need them 691 * any more. We don't, however, toss the data in 692 * the pages. 693 */ 694 695 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 696 PGO_DEACTIVATE); 697 break; 698 699 case MADV_FREE: 700 701 /* 702 * These pages contain no valid data, and may be 703 * garbage-collected. Toss all resources, including 704 * any swap space in use. 705 */ 706 707 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 708 PGO_FREE); 709 break; 710 711 case MADV_SPACEAVAIL: 712 713 /* 714 * XXXMRG What is this? I think it's: 715 * 716 * Ensure that we have allocated backing-store 717 * for these pages. 718 * 719 * This is going to require changes to the page daemon, 720 * as it will free swap space allocated to pages in core. 721 * There's also what to do for device/file/anonymous memory. 722 */ 723 724 return EINVAL; 725 726 default: 727 return EINVAL; 728 } 729 730 return error; 731 } 732 733 /* 734 * sys_mlock: memory lock 735 */ 736 737 int 738 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval) 739 { 740 /* { 741 syscallarg(const void *) addr; 742 syscallarg(size_t) len; 743 } */ 744 struct proc *p = l->l_proc; 745 vaddr_t addr; 746 vsize_t size, pageoff; 747 int error; 748 749 /* 750 * extract syscall args from uap 751 */ 752 753 addr = (vaddr_t)SCARG(uap, addr); 754 size = (vsize_t)SCARG(uap, len); 755 756 /* 757 * align the address to a page boundary and adjust the size accordingly 758 */ 759 760 pageoff = (addr & PAGE_MASK); 761 addr -= pageoff; 762 size += pageoff; 763 size = (vsize_t)round_page(size); 764 765 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 766 if (error) 767 return ENOMEM; 768 769 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) 770 return EAGAIN; 771 772 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 773 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 774 return EAGAIN; 775 776 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false, 777 0); 778 if (error == EFAULT) 779 error = ENOMEM; 780 return error; 781 } 782 783 /* 784 * sys_munlock: unlock wired pages 785 */ 786 787 int 788 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap, 789 register_t *retval) 790 { 791 /* { 792 syscallarg(const void *) addr; 793 syscallarg(size_t) len; 794 } */ 795 struct proc *p = l->l_proc; 796 vaddr_t addr; 797 vsize_t size, pageoff; 798 int error; 799 800 /* 801 * extract syscall args from uap 802 */ 803 804 addr = (vaddr_t)SCARG(uap, addr); 805 size = (vsize_t)SCARG(uap, len); 806 807 /* 808 * align the address to a page boundary, and adjust the size accordingly 809 */ 810 811 pageoff = (addr & PAGE_MASK); 812 addr -= pageoff; 813 size += pageoff; 814 size = (vsize_t)round_page(size); 815 816 error = range_test(&p->p_vmspace->vm_map, addr, size, false); 817 if (error) 818 return ENOMEM; 819 820 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 821 0); 822 if (error) 823 return ENOMEM; 824 825 return 0; 826 } 827 828 /* 829 * sys_mlockall: lock all pages mapped into an address space. 830 */ 831 832 int 833 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap, 834 register_t *retval) 835 { 836 /* { 837 syscallarg(int) flags; 838 } */ 839 struct proc *p = l->l_proc; 840 int error, flags; 841 842 flags = SCARG(uap, flags); 843 844 if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0) 845 return EINVAL; 846 847 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, 848 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 849 return error; 850 } 851 852 /* 853 * sys_munlockall: unlock all pages mapped into an address space. 854 */ 855 856 int 857 sys_munlockall(struct lwp *l, const void *v, register_t *retval) 858 { 859 struct proc *p = l->l_proc; 860 861 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0); 862 return 0; 863 } 864 865 /* 866 * uvm_mmap: internal version of mmap 867 * 868 * - used by sys_mmap and various framebuffers 869 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON 870 * - caller must page-align the file offset 871 */ 872 873 int 874 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 875 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj, 876 voff_t foff, vsize_t locklimit) 877 { 878 vaddr_t align = 0; 879 int error; 880 uvm_flag_t uvmflag = 0; 881 882 /* 883 * check params 884 */ 885 886 if (size == 0) 887 return 0; 888 if (foff & PAGE_MASK) 889 return EINVAL; 890 if ((prot & maxprot) != prot) 891 return EINVAL; 892 893 /* 894 * for non-fixed mappings, round off the suggested address. 895 * for fixed mappings, check alignment. 896 */ 897 898 if ((flags & MAP_FIXED) == 0) { 899 *addr = round_page(*addr); 900 } else { 901 if (*addr & PAGE_MASK) 902 return EINVAL; 903 uvmflag |= UVM_FLAG_FIXED | UVM_FLAG_UNMAP; 904 } 905 906 /* 907 * Try to see if any requested alignment can even be attemped. 908 * Make sure we can express the alignment (asking for a >= 4GB 909 * alignment on an ILP32 architecure make no sense) and the 910 * alignment is at least for a page sized quanitiy. If the 911 * request was for a fixed mapping, make sure supplied address 912 * adheres to the request alignment. 913 */ 914 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT; 915 if (align) { 916 if (align >= sizeof(vaddr_t) * NBBY) 917 return EINVAL; 918 align = 1L << align; 919 if (align < PAGE_SIZE) 920 return EINVAL; 921 if (align >= vm_map_max(map)) 922 return ENOMEM; 923 if (flags & MAP_FIXED) { 924 if ((*addr & (align-1)) != 0) 925 return EINVAL; 926 align = 0; 927 } 928 } 929 930 /* 931 * check resource limits 932 */ 933 934 if (!VM_MAP_IS_KERNEL(map) && 935 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) > 936 curproc->p_rlimit[RLIMIT_AS].rlim_cur)) 937 return ENOMEM; 938 939 /* 940 * handle anon vs. non-anon mappings. for non-anon mappings attach 941 * to underlying vm object. 942 */ 943 944 if (flags & MAP_ANON) { 945 KASSERT(uobj == NULL); 946 foff = UVM_UNKNOWN_OFFSET; 947 if ((flags & MAP_SHARED) == 0) 948 /* XXX: defer amap create */ 949 uvmflag |= UVM_FLAG_COPYONW; 950 else 951 /* shared: create amap now */ 952 uvmflag |= UVM_FLAG_OVERLAY; 953 954 } else { 955 KASSERT(uobj != NULL); 956 if ((flags & MAP_SHARED) == 0) { 957 uvmflag |= UVM_FLAG_COPYONW; 958 } 959 } 960 961 uvmflag = UVM_MAPFLAG(prot, maxprot, 962 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice, 963 uvmflag); 964 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag); 965 if (error) { 966 if (uobj) 967 uobj->pgops->pgo_detach(uobj); 968 return error; 969 } 970 971 /* 972 * POSIX 1003.1b -- if our address space was configured 973 * to lock all future mappings, wire the one we just made. 974 * 975 * Also handle the MAP_WIRED flag here. 976 */ 977 978 if (prot == VM_PROT_NONE) { 979 980 /* 981 * No more work to do in this case. 982 */ 983 984 return 0; 985 } 986 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) { 987 vm_map_lock(map); 988 if (atop(size) + uvmexp.wired > uvmexp.wiredmax || 989 (locklimit != 0 && 990 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > 991 locklimit)) { 992 vm_map_unlock(map); 993 uvm_unmap(map, *addr, *addr + size); 994 return ENOMEM; 995 } 996 997 /* 998 * uvm_map_pageable() always returns the map unlocked. 999 */ 1000 1001 error = uvm_map_pageable(map, *addr, *addr + size, 1002 false, UVM_LK_ENTER); 1003 if (error) { 1004 uvm_unmap(map, *addr, *addr + size); 1005 return error; 1006 } 1007 return 0; 1008 } 1009 return 0; 1010 } 1011 1012 vaddr_t 1013 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown) 1014 { 1015 1016 if (topdown) 1017 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz); 1018 else 1019 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz); 1020 } 1021 1022 int 1023 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev, 1024 off_t off) 1025 { 1026 struct uvm_object *uobj; 1027 int error, flags, prot; 1028 1029 flags = MAP_SHARED; 1030 prot = VM_PROT_READ | VM_PROT_WRITE; 1031 if (*addrp) 1032 flags |= MAP_FIXED; 1033 else 1034 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1035 (vaddr_t)p->p_vmspace->vm_daddr, len, 1036 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1037 1038 uobj = udv_attach(dev, prot, off, len); 1039 if (uobj == NULL) 1040 return EINVAL; 1041 1042 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1043 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off, 1044 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1045 return error; 1046 } 1047 1048 int 1049 uvm_mmap_anon(struct proc *p, void **addrp, size_t len) 1050 { 1051 int error, flags, prot; 1052 1053 flags = MAP_PRIVATE | MAP_ANON; 1054 prot = VM_PROT_READ | VM_PROT_WRITE; 1055 if (*addrp) 1056 flags |= MAP_FIXED; 1057 else 1058 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1059 (vaddr_t)p->p_vmspace->vm_daddr, len, 1060 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1061 1062 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1063 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0, 1064 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1065 return error; 1066 } 1067