1 /* $NetBSD: uvm_mmap.c,v 1.175 2020/02/23 15:46:43 ad Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993 The Regents of the University of California. 6 * Copyright (c) 1988 University of Utah. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * the Systems Programming Group of the University of Utah Computer 12 * Science Department. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94 40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp 41 */ 42 43 /* 44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap 45 * function. 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.175 2020/02/23 15:46:43 ad Exp $"); 50 51 #include "opt_compat_netbsd.h" 52 #include "opt_pax.h" 53 54 #include <sys/types.h> 55 #include <sys/file.h> 56 #include <sys/filedesc.h> 57 #include <sys/resourcevar.h> 58 #include <sys/mman.h> 59 #include <sys/pax.h> 60 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm.h> 64 #include <uvm/uvm_device.h> 65 66 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, 67 int, int, struct uvm_object *, voff_t, vsize_t); 68 69 static int 70 range_test(const struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap) 71 { 72 vaddr_t vm_min_address = vm_map_min(map); 73 vaddr_t vm_max_address = vm_map_max(map); 74 vaddr_t eaddr = addr + size; 75 int res = 0; 76 77 if (addr < vm_min_address) 78 return EINVAL; 79 if (eaddr > vm_max_address) 80 return ismmap ? EFBIG : EINVAL; 81 if (addr > eaddr) /* no wrapping! */ 82 return ismmap ? EOVERFLOW : EINVAL; 83 84 #ifdef MD_MMAP_RANGE_TEST 85 res = MD_MMAP_RANGE_TEST(addr, eaddr); 86 #endif 87 88 return res; 89 } 90 91 /* 92 * align the address to a page boundary, and adjust the size accordingly 93 */ 94 static int 95 round_and_check(const struct vm_map *map, vaddr_t *addr, vsize_t *size) 96 { 97 const vsize_t pageoff = (vsize_t)(*addr & PAGE_MASK); 98 99 *addr -= pageoff; 100 101 if (*size != 0) { 102 *size += pageoff; 103 *size = (vsize_t)round_page(*size); 104 } else if (*addr + *size < *addr) { 105 return ENOMEM; 106 } 107 108 return range_test(map, *addr, *size, false); 109 } 110 111 /* 112 * sys_mincore: determine if pages are in core or not. 113 */ 114 115 /* ARGSUSED */ 116 int 117 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, 118 register_t *retval) 119 { 120 /* { 121 syscallarg(void *) addr; 122 syscallarg(size_t) len; 123 syscallarg(char *) vec; 124 } */ 125 struct proc *p = l->l_proc; 126 struct vm_page *pg; 127 char *vec, pgi; 128 struct uvm_object *uobj; 129 struct vm_amap *amap; 130 struct vm_anon *anon; 131 struct vm_map_entry *entry; 132 vaddr_t start, end, lim; 133 struct vm_map *map; 134 vsize_t len; 135 int error = 0; 136 size_t npgs; 137 138 map = &p->p_vmspace->vm_map; 139 140 start = (vaddr_t)SCARG(uap, addr); 141 len = SCARG(uap, len); 142 vec = SCARG(uap, vec); 143 144 if (start & PAGE_MASK) 145 return EINVAL; 146 len = round_page(len); 147 end = start + len; 148 if (end <= start) 149 return EINVAL; 150 151 /* 152 * Lock down vec, so our returned status isn't outdated by 153 * storing the status byte for a page. 154 */ 155 156 npgs = len >> PAGE_SHIFT; 157 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE); 158 if (error) { 159 return error; 160 } 161 vm_map_lock_read(map); 162 163 if (uvm_map_lookup_entry(map, start, &entry) == false) { 164 error = ENOMEM; 165 goto out; 166 } 167 168 for (/* nothing */; 169 entry != &map->header && entry->start < end; 170 entry = entry->next) { 171 KASSERT(!UVM_ET_ISSUBMAP(entry)); 172 KASSERT(start >= entry->start); 173 174 /* Make sure there are no holes. */ 175 if (entry->end < end && 176 (entry->next == &map->header || 177 entry->next->start > entry->end)) { 178 error = ENOMEM; 179 goto out; 180 } 181 182 lim = end < entry->end ? end : entry->end; 183 184 /* 185 * Special case for objects with no "real" pages. Those 186 * are always considered resident (mapped devices). 187 */ 188 189 if (UVM_ET_ISOBJ(entry)) { 190 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); 191 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) { 192 for (/* nothing */; start < lim; 193 start += PAGE_SIZE, vec++) 194 ustore_char(vec, 1); 195 continue; 196 } 197 } 198 199 amap = entry->aref.ar_amap; /* upper layer */ 200 uobj = entry->object.uvm_obj; /* lower layer */ 201 202 if (amap != NULL) 203 amap_lock(amap, RW_READER); 204 if (uobj != NULL) 205 rw_enter(uobj->vmobjlock, RW_READER); 206 207 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { 208 pgi = 0; 209 if (amap != NULL) { 210 /* Check the upper layer first. */ 211 anon = amap_lookup(&entry->aref, 212 start - entry->start); 213 /* Don't need to lock anon here. */ 214 if (anon != NULL && anon->an_page != NULL) { 215 216 /* 217 * Anon has the page for this entry 218 * offset. 219 */ 220 221 pgi = 1; 222 } 223 } 224 if (uobj != NULL && pgi == 0) { 225 /* Check the lower layer. */ 226 pg = uvm_pagelookup(uobj, 227 entry->offset + (start - entry->start)); 228 if (pg != NULL) { 229 230 /* 231 * Object has the page for this entry 232 * offset. 233 */ 234 235 pgi = 1; 236 } 237 } 238 (void) ustore_char(vec, pgi); 239 } 240 if (uobj != NULL) 241 rw_exit(uobj->vmobjlock); 242 if (amap != NULL) 243 amap_unlock(amap); 244 } 245 246 out: 247 vm_map_unlock_read(map); 248 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs); 249 return error; 250 } 251 252 /* 253 * sys_mmap: mmap system call. 254 * 255 * => file offset and address may not be page aligned 256 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE 257 * - if address isn't page aligned the mapping starts at trunc_page(addr) 258 * and the return value is adjusted up by the page offset. 259 */ 260 261 int 262 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval) 263 { 264 /* { 265 syscallarg(void *) addr; 266 syscallarg(size_t) len; 267 syscallarg(int) prot; 268 syscallarg(int) flags; 269 syscallarg(int) fd; 270 syscallarg(long) pad; 271 syscallarg(off_t) pos; 272 } */ 273 struct proc *p = l->l_proc; 274 vaddr_t addr; 275 off_t pos; 276 vsize_t size, pageoff, newsize; 277 vm_prot_t prot, maxprot, extraprot; 278 int flags, fd, advice; 279 vaddr_t defaddr; 280 struct file *fp = NULL; 281 struct uvm_object *uobj; 282 int error; 283 #ifdef PAX_ASLR 284 vaddr_t orig_addr; 285 #endif /* PAX_ASLR */ 286 287 /* 288 * first, extract syscall args from the uap. 289 */ 290 291 addr = (vaddr_t)SCARG(uap, addr); 292 size = (vsize_t)SCARG(uap, len); 293 prot = SCARG(uap, prot) & VM_PROT_ALL; 294 extraprot = PROT_MPROTECT_EXTRACT(SCARG(uap, prot)); 295 flags = SCARG(uap, flags); 296 fd = SCARG(uap, fd); 297 pos = SCARG(uap, pos); 298 299 #ifdef PAX_ASLR 300 orig_addr = addr; 301 #endif /* PAX_ASLR */ 302 303 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE)) 304 return EINVAL; 305 306 /* 307 * align file position and save offset. adjust size. 308 */ 309 310 pageoff = (pos & PAGE_MASK); 311 pos -= pageoff; 312 newsize = size + pageoff; /* add offset */ 313 newsize = (vsize_t)round_page(newsize); /* round up */ 314 315 if (newsize < size) 316 return ENOMEM; 317 size = newsize; 318 319 /* 320 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 321 */ 322 if (flags & MAP_FIXED) { 323 /* ensure address and file offset are aligned properly */ 324 addr -= pageoff; 325 if (addr & PAGE_MASK) 326 return EINVAL; 327 328 error = range_test(&p->p_vmspace->vm_map, addr, size, true); 329 if (error) { 330 return error; 331 } 332 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) { 333 /* 334 * not fixed: make sure we skip over the largest 335 * possible heap for non-topdown mapping arrangements. 336 * we will refine our guess later (e.g. to account for 337 * VAC, etc) 338 */ 339 340 defaddr = p->p_emul->e_vm_default_addr(p, 341 (vaddr_t)p->p_vmspace->vm_daddr, size, 342 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 343 344 if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN)) 345 addr = MAX(addr, defaddr); 346 else 347 addr = MIN(addr, defaddr); 348 } 349 350 /* 351 * check for file mappings (i.e. not anonymous) and verify file. 352 */ 353 354 advice = UVM_ADV_NORMAL; 355 if ((flags & MAP_ANON) == 0) { 356 if ((fp = fd_getfile(fd)) == NULL) 357 return EBADF; 358 359 if (fp->f_ops->fo_mmap == NULL) { 360 error = ENODEV; 361 goto out; 362 } 363 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags, 364 &advice, &uobj, &maxprot); 365 if (error) { 366 goto out; 367 } 368 if (uobj == NULL) { 369 flags |= MAP_ANON; 370 fd_putfile(fd); 371 fp = NULL; 372 goto is_anon; 373 } 374 } else { /* MAP_ANON case */ 375 /* 376 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0? 377 */ 378 if (fd != -1) 379 return EINVAL; 380 381 is_anon: /* label for SunOS style /dev/zero */ 382 uobj = NULL; 383 maxprot = VM_PROT_ALL; 384 pos = 0; 385 } 386 387 maxprot = PAX_MPROTECT_MAXPROTECT(l, prot, extraprot, maxprot); 388 if (((prot | extraprot) & maxprot) != (prot | extraprot)) { 389 error = EACCES; 390 goto out; 391 } 392 if ((error = PAX_MPROTECT_VALIDATE(l, prot))) 393 goto out; 394 395 pax_aslr_mmap(l, &addr, orig_addr, flags); 396 397 /* 398 * now let kernel internal function uvm_mmap do the work. 399 */ 400 401 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 402 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 403 404 /* remember to add offset */ 405 *retval = (register_t)(addr + pageoff); 406 407 out: 408 if (fp != NULL) 409 fd_putfile(fd); 410 411 return error; 412 } 413 414 /* 415 * sys___msync13: the msync system call (a front-end for flush) 416 */ 417 418 int 419 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap, 420 register_t *retval) 421 { 422 /* { 423 syscallarg(void *) addr; 424 syscallarg(size_t) len; 425 syscallarg(int) flags; 426 } */ 427 struct proc *p = l->l_proc; 428 vaddr_t addr; 429 vsize_t size; 430 struct vm_map *map; 431 int error, flags, uvmflags; 432 bool rv; 433 434 /* 435 * extract syscall args from the uap 436 */ 437 438 addr = (vaddr_t)SCARG(uap, addr); 439 size = (vsize_t)SCARG(uap, len); 440 flags = SCARG(uap, flags); 441 442 /* sanity check flags */ 443 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || 444 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || 445 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) 446 return EINVAL; 447 if ((flags & (MS_ASYNC | MS_SYNC)) == 0) 448 flags |= MS_SYNC; 449 450 /* 451 * get map 452 */ 453 map = &p->p_vmspace->vm_map; 454 455 if (round_and_check(map, &addr, &size)) 456 return ENOMEM; 457 458 /* 459 * XXXCDC: do we really need this semantic? 460 * 461 * XXX Gak! If size is zero we are supposed to sync "all modified 462 * pages with the region containing addr". Unfortunately, we 463 * don't really keep track of individual mmaps so we approximate 464 * by flushing the range of the map entry containing addr. 465 * This can be incorrect if the region splits or is coalesced 466 * with a neighbor. 467 */ 468 469 if (size == 0) { 470 struct vm_map_entry *entry; 471 472 vm_map_lock_read(map); 473 rv = uvm_map_lookup_entry(map, addr, &entry); 474 if (rv == true) { 475 addr = entry->start; 476 size = entry->end - entry->start; 477 } 478 vm_map_unlock_read(map); 479 if (rv == false) 480 return EINVAL; 481 } 482 483 /* 484 * translate MS_ flags into PGO_ flags 485 */ 486 487 uvmflags = PGO_CLEANIT; 488 if (flags & MS_INVALIDATE) 489 uvmflags |= PGO_FREE; 490 if (flags & MS_SYNC) 491 uvmflags |= PGO_SYNCIO; 492 493 error = uvm_map_clean(map, addr, addr+size, uvmflags); 494 return error; 495 } 496 497 /* 498 * sys_munmap: unmap a users memory 499 */ 500 501 int 502 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval) 503 { 504 /* { 505 syscallarg(void *) addr; 506 syscallarg(size_t) len; 507 } */ 508 struct proc *p = l->l_proc; 509 vaddr_t addr; 510 vsize_t size; 511 struct vm_map *map; 512 struct vm_map_entry *dead_entries; 513 514 /* 515 * get syscall args. 516 */ 517 518 addr = (vaddr_t)SCARG(uap, addr); 519 size = (vsize_t)SCARG(uap, len); 520 521 map = &p->p_vmspace->vm_map; 522 523 if (round_and_check(map, &addr, &size)) 524 return EINVAL; 525 526 if (size == 0) 527 return 0; 528 529 vm_map_lock(map); 530 #if 0 531 /* 532 * interesting system call semantic: make sure entire range is 533 * allocated before allowing an unmap. 534 */ 535 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) { 536 vm_map_unlock(map); 537 return EINVAL; 538 } 539 #endif 540 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0); 541 vm_map_unlock(map); 542 if (dead_entries != NULL) 543 uvm_unmap_detach(dead_entries, 0); 544 return 0; 545 } 546 547 /* 548 * sys_mprotect: the mprotect system call 549 */ 550 551 int 552 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap, 553 register_t *retval) 554 { 555 /* { 556 syscallarg(void *) addr; 557 syscallarg(size_t) len; 558 syscallarg(int) prot; 559 } */ 560 struct proc *p = l->l_proc; 561 vaddr_t addr; 562 vsize_t size; 563 vm_prot_t prot; 564 int error; 565 566 /* 567 * extract syscall args from uap 568 */ 569 570 addr = (vaddr_t)SCARG(uap, addr); 571 size = (vsize_t)SCARG(uap, len); 572 prot = SCARG(uap, prot) & VM_PROT_ALL; 573 574 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size)) 575 return EINVAL; 576 577 error = uvm_map_protect_user(l, addr, addr + size, prot); 578 return error; 579 } 580 581 /* 582 * sys_minherit: the minherit system call 583 */ 584 585 int 586 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap, 587 register_t *retval) 588 { 589 /* { 590 syscallarg(void *) addr; 591 syscallarg(int) len; 592 syscallarg(int) inherit; 593 } */ 594 struct proc *p = l->l_proc; 595 vaddr_t addr; 596 vsize_t size; 597 vm_inherit_t inherit; 598 int error; 599 600 addr = (vaddr_t)SCARG(uap, addr); 601 size = (vsize_t)SCARG(uap, len); 602 inherit = SCARG(uap, inherit); 603 604 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size)) 605 return EINVAL; 606 607 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size, 608 inherit); 609 return error; 610 } 611 612 /* 613 * sys_madvise: give advice about memory usage. 614 */ 615 616 /* ARGSUSED */ 617 int 618 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap, 619 register_t *retval) 620 { 621 /* { 622 syscallarg(void *) addr; 623 syscallarg(size_t) len; 624 syscallarg(int) behav; 625 } */ 626 struct proc *p = l->l_proc; 627 vaddr_t addr; 628 vsize_t size; 629 int advice, error; 630 631 addr = (vaddr_t)SCARG(uap, addr); 632 size = (vsize_t)SCARG(uap, len); 633 advice = SCARG(uap, behav); 634 635 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size)) 636 return EINVAL; 637 638 switch (advice) { 639 case MADV_NORMAL: 640 case MADV_RANDOM: 641 case MADV_SEQUENTIAL: 642 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size, 643 advice); 644 break; 645 646 case MADV_WILLNEED: 647 648 /* 649 * Activate all these pages, pre-faulting them in if 650 * necessary. 651 */ 652 error = uvm_map_willneed(&p->p_vmspace->vm_map, 653 addr, addr + size); 654 break; 655 656 case MADV_DONTNEED: 657 658 /* 659 * Deactivate all these pages. We don't need them 660 * any more. We don't, however, toss the data in 661 * the pages. 662 */ 663 664 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 665 PGO_DEACTIVATE); 666 break; 667 668 case MADV_FREE: 669 670 /* 671 * These pages contain no valid data, and may be 672 * garbage-collected. Toss all resources, including 673 * any swap space in use. 674 */ 675 676 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 677 PGO_FREE); 678 break; 679 680 case MADV_SPACEAVAIL: 681 682 /* 683 * XXXMRG What is this? I think it's: 684 * 685 * Ensure that we have allocated backing-store 686 * for these pages. 687 * 688 * This is going to require changes to the page daemon, 689 * as it will free swap space allocated to pages in core. 690 * There's also what to do for device/file/anonymous memory. 691 */ 692 693 return EINVAL; 694 695 default: 696 return EINVAL; 697 } 698 699 return error; 700 } 701 702 /* 703 * sys_mlock: memory lock 704 */ 705 706 int 707 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval) 708 { 709 /* { 710 syscallarg(const void *) addr; 711 syscallarg(size_t) len; 712 } */ 713 struct proc *p = l->l_proc; 714 vaddr_t addr; 715 vsize_t size; 716 int error; 717 718 /* 719 * extract syscall args from uap 720 */ 721 722 addr = (vaddr_t)SCARG(uap, addr); 723 size = (vsize_t)SCARG(uap, len); 724 725 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size)) 726 return ENOMEM; 727 728 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) 729 return EAGAIN; 730 731 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 732 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 733 return EAGAIN; 734 735 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false, 736 0); 737 if (error == EFAULT) 738 error = ENOMEM; 739 return error; 740 } 741 742 /* 743 * sys_munlock: unlock wired pages 744 */ 745 746 int 747 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap, 748 register_t *retval) 749 { 750 /* { 751 syscallarg(const void *) addr; 752 syscallarg(size_t) len; 753 } */ 754 struct proc *p = l->l_proc; 755 vaddr_t addr; 756 vsize_t size; 757 758 /* 759 * extract syscall args from uap 760 */ 761 762 addr = (vaddr_t)SCARG(uap, addr); 763 size = (vsize_t)SCARG(uap, len); 764 765 if (round_and_check(&p->p_vmspace->vm_map, &addr, &size)) 766 return ENOMEM; 767 768 if (uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 0)) 769 return ENOMEM; 770 771 return 0; 772 } 773 774 /* 775 * sys_mlockall: lock all pages mapped into an address space. 776 */ 777 778 int 779 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap, 780 register_t *retval) 781 { 782 /* { 783 syscallarg(int) flags; 784 } */ 785 struct proc *p = l->l_proc; 786 int error, flags; 787 788 flags = SCARG(uap, flags); 789 790 if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0) 791 return EINVAL; 792 793 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, 794 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 795 return error; 796 } 797 798 /* 799 * sys_munlockall: unlock all pages mapped into an address space. 800 */ 801 802 int 803 sys_munlockall(struct lwp *l, const void *v, register_t *retval) 804 { 805 struct proc *p = l->l_proc; 806 807 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0); 808 return 0; 809 } 810 811 /* 812 * uvm_mmap: internal version of mmap 813 * 814 * - used by sys_mmap and various framebuffers 815 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON 816 * - caller must page-align the file offset 817 */ 818 819 int 820 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 821 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj, 822 voff_t foff, vsize_t locklimit) 823 { 824 vaddr_t align = 0; 825 int error; 826 uvm_flag_t uvmflag = 0; 827 828 /* 829 * check params 830 */ 831 832 if (size == 0) 833 return 0; 834 if (foff & PAGE_MASK) 835 return EINVAL; 836 if ((prot & maxprot) != prot) 837 return EINVAL; 838 839 /* 840 * for non-fixed mappings, round off the suggested address. 841 * for fixed mappings, check alignment. 842 */ 843 844 if ((flags & MAP_FIXED) == 0) { 845 *addr = round_page(*addr); 846 } else { 847 if (*addr & PAGE_MASK) 848 return EINVAL; 849 uvmflag |= UVM_FLAG_FIXED | UVM_FLAG_UNMAP; 850 } 851 852 /* 853 * Try to see if any requested alignment can even be attemped. 854 * Make sure we can express the alignment (asking for a >= 4GB 855 * alignment on an ILP32 architecure make no sense) and the 856 * alignment is at least for a page sized quanitiy. If the 857 * request was for a fixed mapping, make sure supplied address 858 * adheres to the request alignment. 859 */ 860 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT; 861 if (align) { 862 if (align >= sizeof(vaddr_t) * NBBY) 863 return EINVAL; 864 align = 1UL << align; 865 if (align < PAGE_SIZE) 866 return EINVAL; 867 if (align >= vm_map_max(map)) 868 return ENOMEM; 869 if (flags & MAP_FIXED) { 870 if ((*addr & (align-1)) != 0) 871 return EINVAL; 872 align = 0; 873 } 874 } 875 876 /* 877 * check resource limits 878 */ 879 880 if (!VM_MAP_IS_KERNEL(map) && 881 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) > 882 curproc->p_rlimit[RLIMIT_AS].rlim_cur)) 883 return ENOMEM; 884 885 /* 886 * handle anon vs. non-anon mappings. for non-anon mappings attach 887 * to underlying vm object. 888 */ 889 890 if (flags & MAP_ANON) { 891 KASSERT(uobj == NULL); 892 foff = UVM_UNKNOWN_OFFSET; 893 if ((flags & MAP_SHARED) == 0) 894 /* XXX: defer amap create */ 895 uvmflag |= UVM_FLAG_COPYONW; 896 else 897 /* shared: create amap now */ 898 uvmflag |= UVM_FLAG_OVERLAY; 899 900 } else { 901 KASSERT(uobj != NULL); 902 if ((flags & MAP_SHARED) == 0) { 903 uvmflag |= UVM_FLAG_COPYONW; 904 } 905 } 906 907 uvmflag = UVM_MAPFLAG(prot, maxprot, 908 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice, 909 uvmflag); 910 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag); 911 if (error) { 912 if (uobj) 913 uobj->pgops->pgo_detach(uobj); 914 return error; 915 } 916 917 /* 918 * POSIX 1003.1b -- if our address space was configured 919 * to lock all future mappings, wire the one we just made. 920 * 921 * Also handle the MAP_WIRED flag here. 922 */ 923 924 if (prot == VM_PROT_NONE) { 925 926 /* 927 * No more work to do in this case. 928 */ 929 930 return 0; 931 } 932 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) { 933 vm_map_lock(map); 934 if (atop(size) + uvmexp.wired > uvmexp.wiredmax || 935 (locklimit != 0 && 936 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > 937 locklimit)) { 938 vm_map_unlock(map); 939 uvm_unmap(map, *addr, *addr + size); 940 return ENOMEM; 941 } 942 943 /* 944 * uvm_map_pageable() always returns the map unlocked. 945 */ 946 947 error = uvm_map_pageable(map, *addr, *addr + size, 948 false, UVM_LK_ENTER); 949 if (error) { 950 uvm_unmap(map, *addr, *addr + size); 951 return error; 952 } 953 return 0; 954 } 955 return 0; 956 } 957 958 vaddr_t 959 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown) 960 { 961 962 if (topdown) 963 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz); 964 else 965 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz); 966 } 967 968 int 969 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev, 970 off_t off) 971 { 972 struct uvm_object *uobj; 973 int error, flags, prot; 974 975 flags = MAP_SHARED; 976 prot = VM_PROT_READ | VM_PROT_WRITE; 977 if (*addrp) 978 flags |= MAP_FIXED; 979 else 980 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 981 (vaddr_t)p->p_vmspace->vm_daddr, len, 982 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 983 984 uobj = udv_attach(dev, prot, off, len); 985 if (uobj == NULL) 986 return EINVAL; 987 988 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 989 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off, 990 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 991 return error; 992 } 993 994 int 995 uvm_mmap_anon(struct proc *p, void **addrp, size_t len) 996 { 997 int error, flags, prot; 998 999 flags = MAP_PRIVATE | MAP_ANON; 1000 prot = VM_PROT_READ | VM_PROT_WRITE; 1001 if (*addrp) 1002 flags |= MAP_FIXED; 1003 else 1004 *addrp = (void *)p->p_emul->e_vm_default_addr(p, 1005 (vaddr_t)p->p_vmspace->vm_daddr, len, 1006 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1007 1008 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp, 1009 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0, 1010 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1011 return error; 1012 } 1013