1 /* $NetBSD: uvm_mmap.c,v 1.48 2001/01/08 01:35:03 thorpej Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * Copyright (c) 1991, 1993 The Regents of the University of California. 6 * Copyright (c) 1988 University of Utah. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * the Systems Programming Group of the University of Utah Computer 12 * Science Department. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the Charles D. Cranor, 25 * Washington University, University of California, Berkeley and 26 * its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 44 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94 45 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp 46 */ 47 48 /* 49 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap 50 * function. 51 */ 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/file.h> 55 #include <sys/filedesc.h> 56 #include <sys/resourcevar.h> 57 #include <sys/mman.h> 58 #include <sys/mount.h> 59 #include <sys/proc.h> 60 #include <sys/malloc.h> 61 #include <sys/vnode.h> 62 #include <sys/conf.h> 63 #include <sys/stat.h> 64 65 #include <miscfs/specfs/specdev.h> 66 67 #include <sys/syscallargs.h> 68 69 #include <uvm/uvm.h> 70 #include <uvm/uvm_device.h> 71 #include <uvm/uvm_vnode.h> 72 73 74 /* 75 * unimplemented VM system calls: 76 */ 77 78 /* 79 * sys_sbrk: sbrk system call. 80 */ 81 82 /* ARGSUSED */ 83 int 84 sys_sbrk(p, v, retval) 85 struct proc *p; 86 void *v; 87 register_t *retval; 88 { 89 #if 0 90 struct sys_sbrk_args /* { 91 syscallarg(intptr_t) incr; 92 } */ *uap = v; 93 #endif 94 95 return (ENOSYS); 96 } 97 98 /* 99 * sys_sstk: sstk system call. 100 */ 101 102 /* ARGSUSED */ 103 int 104 sys_sstk(p, v, retval) 105 struct proc *p; 106 void *v; 107 register_t *retval; 108 { 109 #if 0 110 struct sys_sstk_args /* { 111 syscallarg(int) incr; 112 } */ *uap = v; 113 #endif 114 115 return (ENOSYS); 116 } 117 118 /* 119 * sys_mincore: determine if pages are in core or not. 120 */ 121 122 /* ARGSUSED */ 123 int 124 sys_mincore(p, v, retval) 125 struct proc *p; 126 void *v; 127 register_t *retval; 128 { 129 struct sys_mincore_args /* { 130 syscallarg(void *) addr; 131 syscallarg(size_t) len; 132 syscallarg(char *) vec; 133 } */ *uap = v; 134 vm_page_t m; 135 char *vec, pgi; 136 struct uvm_object *uobj; 137 struct vm_amap *amap; 138 struct vm_anon *anon; 139 vm_map_entry_t entry; 140 vaddr_t start, end, lim; 141 vm_map_t map; 142 vsize_t len; 143 int error = 0, npgs; 144 145 map = &p->p_vmspace->vm_map; 146 147 start = (vaddr_t)SCARG(uap, addr); 148 len = SCARG(uap, len); 149 vec = SCARG(uap, vec); 150 151 if (start & PAGE_MASK) 152 return (EINVAL); 153 len = round_page(len); 154 end = start + len; 155 if (end <= start) 156 return (EINVAL); 157 158 npgs = len >> PAGE_SHIFT; 159 160 if (uvm_useracc(vec, npgs, B_WRITE) == FALSE) 161 return (EFAULT); 162 163 /* 164 * Lock down vec, so our returned status isn't outdated by 165 * storing the status byte for a page. 166 */ 167 uvm_vslock(p, vec, npgs, VM_PROT_WRITE); 168 169 vm_map_lock_read(map); 170 171 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { 172 error = ENOMEM; 173 goto out; 174 } 175 176 for (/* nothing */; 177 entry != &map->header && entry->start < end; 178 entry = entry->next) { 179 #ifdef DIAGNOSTIC 180 if (UVM_ET_ISSUBMAP(entry)) 181 panic("mincore: user map has submap"); 182 if (start < entry->start) 183 panic("mincore: hole"); 184 #endif 185 /* Make sure there are no holes. */ 186 if (entry->end < end && 187 (entry->next == &map->header || 188 entry->next->start > entry->end)) { 189 error = ENOMEM; 190 goto out; 191 } 192 193 lim = end < entry->end ? end : entry->end; 194 195 /* 196 * Special case for objects with no "real" pages. Those 197 * are always considered resident (mapped devices). 198 */ 199 if (UVM_ET_ISOBJ(entry)) { 200 #ifdef DIAGNOSTIC 201 if (UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) 202 panic("mincore: user map has kernel object"); 203 #endif 204 if (entry->object.uvm_obj->pgops->pgo_releasepg 205 == NULL) { 206 for (/* nothing */; start < lim; 207 start += PAGE_SIZE, vec++) 208 subyte(vec, 1); 209 continue; 210 } 211 } 212 213 amap = entry->aref.ar_amap; /* top layer */ 214 uobj = entry->object.uvm_obj; /* bottom layer */ 215 216 if (amap != NULL) 217 amap_lock(amap); 218 if (uobj != NULL) 219 simple_lock(&uobj->vmobjlock); 220 221 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { 222 pgi = 0; 223 if (amap != NULL) { 224 /* Check the top layer first. */ 225 anon = amap_lookup(&entry->aref, 226 start - entry->start); 227 /* Don't need to lock anon here. */ 228 if (anon != NULL && anon->u.an_page != NULL) { 229 /* 230 * Anon has the page for this entry 231 * offset. 232 */ 233 pgi = 1; 234 } 235 } 236 237 if (uobj != NULL && pgi == 0) { 238 /* Check the bottom layer. */ 239 m = uvm_pagelookup(uobj, 240 entry->offset + (start - entry->start)); 241 if (m != NULL) { 242 /* 243 * Object has the page for this entry 244 * offset. 245 */ 246 pgi = 1; 247 } 248 } 249 250 (void) subyte(vec, pgi); 251 } 252 253 if (uobj != NULL) 254 simple_unlock(&uobj->vmobjlock); 255 if (amap != NULL) 256 amap_unlock(amap); 257 } 258 259 out: 260 vm_map_unlock_read(map); 261 uvm_vsunlock(p, SCARG(uap, vec), npgs); 262 return (error); 263 } 264 265 /* 266 * sys_mmap: mmap system call. 267 * 268 * => file offest and address may not be page aligned 269 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE 270 * - if address isn't page aligned the mapping starts at trunc_page(addr) 271 * and the return value is adjusted up by the page offset. 272 */ 273 274 int 275 sys_mmap(p, v, retval) 276 struct proc *p; 277 void *v; 278 register_t *retval; 279 { 280 struct sys_mmap_args /* { 281 syscallarg(caddr_t) addr; 282 syscallarg(size_t) len; 283 syscallarg(int) prot; 284 syscallarg(int) flags; 285 syscallarg(int) fd; 286 syscallarg(long) pad; 287 syscallarg(off_t) pos; 288 } */ *uap = v; 289 vaddr_t addr; 290 struct vattr va; 291 off_t pos; 292 vsize_t size, pageoff; 293 vm_prot_t prot, maxprot; 294 int flags, fd; 295 vaddr_t vm_min_address = VM_MIN_ADDRESS; 296 struct filedesc *fdp = p->p_fd; 297 struct file *fp; 298 struct vnode *vp; 299 caddr_t handle; 300 int error; 301 302 /* 303 * first, extract syscall args from the uap. 304 */ 305 306 addr = (vaddr_t) SCARG(uap, addr); 307 size = (vsize_t) SCARG(uap, len); 308 prot = SCARG(uap, prot) & VM_PROT_ALL; 309 flags = SCARG(uap, flags); 310 fd = SCARG(uap, fd); 311 pos = SCARG(uap, pos); 312 313 /* 314 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and 315 * validate the flags. 316 */ 317 if (flags & MAP_COPY) 318 flags = (flags & ~MAP_COPY) | MAP_PRIVATE; 319 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE)) 320 return (EINVAL); 321 322 /* 323 * align file position and save offset. adjust size. 324 */ 325 326 pageoff = (pos & PAGE_MASK); 327 pos -= pageoff; 328 size += pageoff; /* add offset */ 329 size = (vsize_t) round_page(size); /* round up */ 330 if ((ssize_t) size < 0) 331 return (EINVAL); /* don't allow wrap */ 332 333 /* 334 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 335 */ 336 337 if (flags & MAP_FIXED) { 338 339 /* ensure address and file offset are aligned properly */ 340 addr -= pageoff; 341 if (addr & PAGE_MASK) 342 return (EINVAL); 343 344 if (VM_MAXUSER_ADDRESS > 0 && 345 (addr + size) > VM_MAXUSER_ADDRESS) 346 return (EINVAL); 347 if (vm_min_address > 0 && addr < vm_min_address) 348 return (EINVAL); 349 if (addr > addr + size) 350 return (EINVAL); /* no wrapping! */ 351 352 } else { 353 354 /* 355 * not fixed: make sure we skip over the largest possible heap. 356 * we will refine our guess later (e.g. to account for VAC, etc) 357 */ 358 359 if (addr < round_page((vaddr_t)p->p_vmspace->vm_daddr + 360 MAXDSIZ)) 361 addr = round_page((vaddr_t)p->p_vmspace->vm_daddr + 362 MAXDSIZ); 363 } 364 365 /* 366 * check for file mappings (i.e. not anonymous) and verify file. 367 */ 368 369 if ((flags & MAP_ANON) == 0) { 370 371 if (fd < 0 || fd >= fdp->fd_nfiles) 372 return(EBADF); /* failed range check? */ 373 fp = fdp->fd_ofiles[fd]; /* convert to file pointer */ 374 if (fp == NULL) 375 return(EBADF); 376 377 if (fp->f_type != DTYPE_VNODE) 378 return (ENODEV); /* only mmap vnodes! */ 379 vp = (struct vnode *)fp->f_data; /* convert to vnode */ 380 381 if (vp->v_type != VREG && vp->v_type != VCHR && 382 vp->v_type != VBLK) 383 return (ENODEV); /* only REG/CHR/BLK support mmap */ 384 385 if (vp->v_type == VREG && (pos + size) < pos) 386 return (EOVERFLOW); /* no offset wrapping */ 387 388 /* special case: catch SunOS style /dev/zero */ 389 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 390 flags |= MAP_ANON; 391 goto is_anon; 392 } 393 394 /* 395 * Old programs may not select a specific sharing type, so 396 * default to an appropriate one. 397 * 398 * XXX: how does MAP_ANON fit in the picture? 399 */ 400 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) { 401 #if defined(DEBUG) 402 printf("WARNING: defaulted mmap() share type to " 403 "%s (pid %d comm %s)\n", vp->v_type == VCHR ? 404 "MAP_SHARED" : "MAP_PRIVATE", p->p_pid, 405 p->p_comm); 406 #endif 407 if (vp->v_type == VCHR) 408 flags |= MAP_SHARED; /* for a device */ 409 else 410 flags |= MAP_PRIVATE; /* for a file */ 411 } 412 413 /* 414 * MAP_PRIVATE device mappings don't make sense (and aren't 415 * supported anyway). However, some programs rely on this, 416 * so just change it to MAP_SHARED. 417 */ 418 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) { 419 #if defined(DIAGNOSTIC) 420 printf("WARNING: converted MAP_PRIVATE device mapping " 421 "to MAP_SHARED (pid %d comm %s)\n", p->p_pid, 422 p->p_comm); 423 #endif 424 flags = (flags & ~MAP_PRIVATE) | MAP_SHARED; 425 } 426 427 /* 428 * now check protection 429 */ 430 431 maxprot = VM_PROT_EXECUTE; 432 433 /* check read access */ 434 if (fp->f_flag & FREAD) 435 maxprot |= VM_PROT_READ; 436 else if (prot & PROT_READ) 437 return (EACCES); 438 439 /* check write access, shared case first */ 440 if (flags & MAP_SHARED) { 441 /* 442 * if the file is writable, only add PROT_WRITE to 443 * maxprot if the file is not immutable, append-only. 444 * otherwise, if we have asked for PROT_WRITE, return 445 * EPERM. 446 */ 447 if (fp->f_flag & FWRITE) { 448 if ((error = 449 VOP_GETATTR(vp, &va, p->p_ucred, p))) 450 return (error); 451 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0) 452 maxprot |= VM_PROT_WRITE; 453 else if (prot & PROT_WRITE) 454 return (EPERM); 455 } 456 else if (prot & PROT_WRITE) 457 return (EACCES); 458 } else { 459 /* MAP_PRIVATE mappings can always write to */ 460 maxprot |= VM_PROT_WRITE; 461 } 462 463 /* 464 * set handle to vnode 465 */ 466 467 handle = (caddr_t)vp; 468 469 } else { /* MAP_ANON case */ 470 /* 471 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0? 472 */ 473 if (fd != -1) 474 return (EINVAL); 475 476 is_anon: /* label for SunOS style /dev/zero */ 477 handle = NULL; 478 maxprot = VM_PROT_ALL; 479 pos = 0; 480 } 481 482 /* 483 * XXX (in)sanity check. We don't do proper datasize checking 484 * XXX for anonymous (or private writable) mmap(). However, 485 * XXX know that if we're trying to allocate more than the amount 486 * XXX remaining under our current data size limit, _that_ should 487 * XXX be disallowed. 488 */ 489 if ((flags & MAP_ANON) != 0 || 490 ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) { 491 if (size > 492 (p->p_rlimit[RLIMIT_DATA].rlim_cur - ctob(p->p_vmspace->vm_dsize))) { 493 return (ENOMEM); 494 } 495 } 496 497 /* 498 * now let kernel internal function uvm_mmap do the work. 499 */ 500 501 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 502 flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 503 504 if (error == 0) 505 /* remember to add offset */ 506 *retval = (register_t)(addr + pageoff); 507 508 return (error); 509 } 510 511 /* 512 * sys___msync13: the msync system call (a front-end for flush) 513 */ 514 515 int 516 sys___msync13(p, v, retval) 517 struct proc *p; 518 void *v; 519 register_t *retval; 520 { 521 struct sys___msync13_args /* { 522 syscallarg(caddr_t) addr; 523 syscallarg(size_t) len; 524 syscallarg(int) flags; 525 } */ *uap = v; 526 vaddr_t addr; 527 vsize_t size, pageoff; 528 vm_map_t map; 529 int rv, flags, uvmflags; 530 531 /* 532 * extract syscall args from the uap 533 */ 534 535 addr = (vaddr_t)SCARG(uap, addr); 536 size = (vsize_t)SCARG(uap, len); 537 flags = SCARG(uap, flags); 538 539 /* sanity check flags */ 540 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || 541 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || 542 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) 543 return (EINVAL); 544 if ((flags & (MS_ASYNC | MS_SYNC)) == 0) 545 flags |= MS_SYNC; 546 547 /* 548 * align the address to a page boundary, and adjust the size accordingly 549 */ 550 551 pageoff = (addr & PAGE_MASK); 552 addr -= pageoff; 553 size += pageoff; 554 size = (vsize_t) round_page(size); 555 556 /* disallow wrap-around. */ 557 if (addr + size < addr) 558 return (EINVAL); 559 560 /* 561 * get map 562 */ 563 564 map = &p->p_vmspace->vm_map; 565 566 /* 567 * XXXCDC: do we really need this semantic? 568 * 569 * XXX Gak! If size is zero we are supposed to sync "all modified 570 * pages with the region containing addr". Unfortunately, we 571 * don't really keep track of individual mmaps so we approximate 572 * by flushing the range of the map entry containing addr. 573 * This can be incorrect if the region splits or is coalesced 574 * with a neighbor. 575 */ 576 if (size == 0) { 577 vm_map_entry_t entry; 578 579 vm_map_lock_read(map); 580 rv = uvm_map_lookup_entry(map, addr, &entry); 581 if (rv == TRUE) { 582 addr = entry->start; 583 size = entry->end - entry->start; 584 } 585 vm_map_unlock_read(map); 586 if (rv == FALSE) 587 return (EINVAL); 588 } 589 590 /* 591 * translate MS_ flags into PGO_ flags 592 */ 593 uvmflags = PGO_CLEANIT; 594 if (flags & MS_INVALIDATE) 595 uvmflags |= PGO_FREE; 596 if (flags & MS_SYNC) 597 uvmflags |= PGO_SYNCIO; 598 else 599 uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */ 600 601 /* 602 * doit! 603 */ 604 rv = uvm_map_clean(map, addr, addr+size, uvmflags); 605 606 /* 607 * and return... 608 */ 609 switch (rv) { 610 case KERN_SUCCESS: 611 return(0); 612 case KERN_INVALID_ADDRESS: 613 return (ENOMEM); 614 case KERN_FAILURE: 615 return (EIO); 616 case KERN_PAGES_LOCKED: /* XXXCDC: uvm doesn't return this */ 617 return (EBUSY); 618 default: 619 return (EINVAL); 620 } 621 /*NOTREACHED*/ 622 } 623 624 /* 625 * sys_munmap: unmap a users memory 626 */ 627 628 int 629 sys_munmap(p, v, retval) 630 struct proc *p; 631 void *v; 632 register_t *retval; 633 { 634 struct sys_munmap_args /* { 635 syscallarg(caddr_t) addr; 636 syscallarg(size_t) len; 637 } */ *uap = v; 638 vaddr_t addr; 639 vsize_t size, pageoff; 640 vm_map_t map; 641 vaddr_t vm_min_address = VM_MIN_ADDRESS; 642 struct vm_map_entry *dead_entries; 643 644 /* 645 * get syscall args... 646 */ 647 648 addr = (vaddr_t) SCARG(uap, addr); 649 size = (vsize_t) SCARG(uap, len); 650 651 /* 652 * align the address to a page boundary, and adjust the size accordingly 653 */ 654 655 pageoff = (addr & PAGE_MASK); 656 addr -= pageoff; 657 size += pageoff; 658 size = (vsize_t) round_page(size); 659 660 if ((int)size < 0) 661 return (EINVAL); 662 if (size == 0) 663 return (0); 664 665 /* 666 * Check for illegal addresses. Watch out for address wrap... 667 * Note that VM_*_ADDRESS are not constants due to casts (argh). 668 */ 669 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 670 return (EINVAL); 671 if (vm_min_address > 0 && addr < vm_min_address) 672 return (EINVAL); 673 if (addr > addr + size) 674 return (EINVAL); 675 map = &p->p_vmspace->vm_map; 676 677 678 vm_map_lock(map); /* lock map so we can checkprot */ 679 680 /* 681 * interesting system call semantic: make sure entire range is 682 * allocated before allowing an unmap. 683 */ 684 685 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) { 686 vm_map_unlock(map); 687 return (EINVAL); 688 } 689 690 /* 691 * doit! 692 */ 693 (void) uvm_unmap_remove(map, addr, addr + size, &dead_entries); 694 695 vm_map_unlock(map); /* and unlock */ 696 697 if (dead_entries != NULL) 698 uvm_unmap_detach(dead_entries, 0); 699 700 return (0); 701 } 702 703 /* 704 * sys_mprotect: the mprotect system call 705 */ 706 707 int 708 sys_mprotect(p, v, retval) 709 struct proc *p; 710 void *v; 711 register_t *retval; 712 { 713 struct sys_mprotect_args /* { 714 syscallarg(caddr_t) addr; 715 syscallarg(int) len; 716 syscallarg(int) prot; 717 } */ *uap = v; 718 vaddr_t addr; 719 vsize_t size, pageoff; 720 vm_prot_t prot; 721 int rv; 722 723 /* 724 * extract syscall args from uap 725 */ 726 727 addr = (vaddr_t)SCARG(uap, addr); 728 size = (vsize_t)SCARG(uap, len); 729 prot = SCARG(uap, prot) & VM_PROT_ALL; 730 731 /* 732 * align the address to a page boundary, and adjust the size accordingly 733 */ 734 pageoff = (addr & PAGE_MASK); 735 addr -= pageoff; 736 size += pageoff; 737 size = (vsize_t) round_page(size); 738 if ((int)size < 0) 739 return (EINVAL); 740 741 /* 742 * doit 743 */ 744 745 rv = uvm_map_protect(&p->p_vmspace->vm_map, 746 addr, addr+size, prot, FALSE); 747 748 if (rv == KERN_SUCCESS) 749 return (0); 750 if (rv == KERN_PROTECTION_FAILURE) 751 return (EACCES); 752 return (EINVAL); 753 } 754 755 /* 756 * sys_minherit: the minherit system call 757 */ 758 759 int 760 sys_minherit(p, v, retval) 761 struct proc *p; 762 void *v; 763 register_t *retval; 764 { 765 struct sys_minherit_args /* { 766 syscallarg(caddr_t) addr; 767 syscallarg(int) len; 768 syscallarg(int) inherit; 769 } */ *uap = v; 770 vaddr_t addr; 771 vsize_t size, pageoff; 772 vm_inherit_t inherit; 773 774 addr = (vaddr_t)SCARG(uap, addr); 775 size = (vsize_t)SCARG(uap, len); 776 inherit = SCARG(uap, inherit); 777 /* 778 * align the address to a page boundary, and adjust the size accordingly 779 */ 780 781 pageoff = (addr & PAGE_MASK); 782 addr -= pageoff; 783 size += pageoff; 784 size = (vsize_t) round_page(size); 785 786 if ((int)size < 0) 787 return (EINVAL); 788 789 switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, 790 inherit)) { 791 case KERN_SUCCESS: 792 return (0); 793 case KERN_PROTECTION_FAILURE: 794 return (EACCES); 795 } 796 return (EINVAL); 797 } 798 799 /* 800 * sys_madvise: give advice about memory usage. 801 */ 802 803 /* ARGSUSED */ 804 int 805 sys_madvise(p, v, retval) 806 struct proc *p; 807 void *v; 808 register_t *retval; 809 { 810 struct sys_madvise_args /* { 811 syscallarg(caddr_t) addr; 812 syscallarg(size_t) len; 813 syscallarg(int) behav; 814 } */ *uap = v; 815 vaddr_t addr; 816 vsize_t size, pageoff; 817 int advice, rv;; 818 819 addr = (vaddr_t)SCARG(uap, addr); 820 size = (vsize_t)SCARG(uap, len); 821 advice = SCARG(uap, behav); 822 823 /* 824 * align the address to a page boundary, and adjust the size accordingly 825 */ 826 pageoff = (addr & PAGE_MASK); 827 addr -= pageoff; 828 size += pageoff; 829 size = (vsize_t) round_page(size); 830 831 if ((ssize_t)size <= 0) 832 return (EINVAL); 833 834 switch (advice) { 835 case MADV_NORMAL: 836 case MADV_RANDOM: 837 case MADV_SEQUENTIAL: 838 rv = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size, 839 advice); 840 break; 841 842 case MADV_WILLNEED: 843 /* 844 * Activate all these pages, pre-faulting them in if 845 * necessary. 846 */ 847 /* 848 * XXX IMPLEMENT ME. 849 * Should invent a "weak" mode for uvm_fault() 850 * which would only do the PGO_LOCKED pgo_get(). 851 */ 852 return (0); 853 854 case MADV_DONTNEED: 855 /* 856 * Deactivate all these pages. We don't need them 857 * any more. We don't, however, toss the data in 858 * the pages. 859 */ 860 rv = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 861 PGO_DEACTIVATE); 862 break; 863 864 case MADV_FREE: 865 /* 866 * These pages contain no valid data, and may be 867 * garbage-collected. Toss all resources, including 868 * any swap space in use. 869 */ 870 rv = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 871 PGO_FREE); 872 break; 873 874 case MADV_SPACEAVAIL: 875 /* 876 * XXXMRG What is this? I think it's: 877 * 878 * Ensure that we have allocated backing-store 879 * for these pages. 880 * 881 * This is going to require changes to the page daemon, 882 * as it will free swap space allocated to pages in core. 883 * There's also what to do for device/file/anonymous memory. 884 */ 885 return (EINVAL); 886 887 default: 888 return (EINVAL); 889 } 890 891 switch (rv) { 892 case KERN_SUCCESS: 893 return (0); 894 case KERN_NO_SPACE: 895 return (EAGAIN); 896 case KERN_INVALID_ADDRESS: 897 return (ENOMEM); 898 case KERN_FAILURE: 899 return (EIO); 900 } 901 902 return (EINVAL); 903 } 904 905 /* 906 * sys_mlock: memory lock 907 */ 908 909 int 910 sys_mlock(p, v, retval) 911 struct proc *p; 912 void *v; 913 register_t *retval; 914 { 915 struct sys_mlock_args /* { 916 syscallarg(const void *) addr; 917 syscallarg(size_t) len; 918 } */ *uap = v; 919 vaddr_t addr; 920 vsize_t size, pageoff; 921 int error; 922 923 /* 924 * extract syscall args from uap 925 */ 926 addr = (vaddr_t)SCARG(uap, addr); 927 size = (vsize_t)SCARG(uap, len); 928 929 /* 930 * align the address to a page boundary and adjust the size accordingly 931 */ 932 pageoff = (addr & PAGE_MASK); 933 addr -= pageoff; 934 size += pageoff; 935 size = (vsize_t) round_page(size); 936 937 /* disallow wrap-around. */ 938 if (addr + (int)size < addr) 939 return (EINVAL); 940 941 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) 942 return (EAGAIN); 943 944 #ifdef pmap_wired_count 945 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 946 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 947 return (EAGAIN); 948 #else 949 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 950 return (error); 951 #endif 952 953 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE, 954 0); 955 return (error == KERN_SUCCESS ? 0 : ENOMEM); 956 } 957 958 /* 959 * sys_munlock: unlock wired pages 960 */ 961 962 int 963 sys_munlock(p, v, retval) 964 struct proc *p; 965 void *v; 966 register_t *retval; 967 { 968 struct sys_munlock_args /* { 969 syscallarg(const void *) addr; 970 syscallarg(size_t) len; 971 } */ *uap = v; 972 vaddr_t addr; 973 vsize_t size, pageoff; 974 int error; 975 976 /* 977 * extract syscall args from uap 978 */ 979 980 addr = (vaddr_t)SCARG(uap, addr); 981 size = (vsize_t)SCARG(uap, len); 982 983 /* 984 * align the address to a page boundary, and adjust the size accordingly 985 */ 986 pageoff = (addr & PAGE_MASK); 987 addr -= pageoff; 988 size += pageoff; 989 size = (vsize_t) round_page(size); 990 991 /* disallow wrap-around. */ 992 if (addr + (int)size < addr) 993 return (EINVAL); 994 995 #ifndef pmap_wired_count 996 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 997 return (error); 998 #endif 999 1000 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE, 1001 0); 1002 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1003 } 1004 1005 /* 1006 * sys_mlockall: lock all pages mapped into an address space. 1007 */ 1008 1009 int 1010 sys_mlockall(p, v, retval) 1011 struct proc *p; 1012 void *v; 1013 register_t *retval; 1014 { 1015 struct sys_mlockall_args /* { 1016 syscallarg(int) flags; 1017 } */ *uap = v; 1018 int error, flags; 1019 1020 flags = SCARG(uap, flags); 1021 1022 if (flags == 0 || 1023 (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0) 1024 return (EINVAL); 1025 1026 #ifndef pmap_wired_count 1027 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 1028 return (error); 1029 #endif 1030 1031 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, 1032 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 1033 switch (error) { 1034 case KERN_SUCCESS: 1035 error = 0; 1036 break; 1037 1038 case KERN_NO_SPACE: /* XXX overloaded */ 1039 error = ENOMEM; 1040 break; 1041 1042 default: 1043 /* 1044 * "Some or all of the memory could not be locked when 1045 * the call was made." 1046 */ 1047 error = EAGAIN; 1048 } 1049 1050 return (error); 1051 } 1052 1053 /* 1054 * sys_munlockall: unlock all pages mapped into an address space. 1055 */ 1056 1057 int 1058 sys_munlockall(p, v, retval) 1059 struct proc *p; 1060 void *v; 1061 register_t *retval; 1062 { 1063 1064 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0); 1065 return (0); 1066 } 1067 1068 /* 1069 * uvm_mmap: internal version of mmap 1070 * 1071 * - used by sys_mmap, exec, and sysv shm 1072 * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true, 1073 * sysv shm uses "named anonymous memory") 1074 * - caller must page-align the file offset 1075 */ 1076 1077 int 1078 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit) 1079 vm_map_t map; 1080 vaddr_t *addr; 1081 vsize_t size; 1082 vm_prot_t prot, maxprot; 1083 int flags; 1084 caddr_t handle; /* XXX: VNODE? */ 1085 voff_t foff; 1086 vsize_t locklimit; 1087 { 1088 struct uvm_object *uobj; 1089 struct vnode *vp; 1090 int retval; 1091 int advice = UVM_ADV_NORMAL; 1092 uvm_flag_t uvmflag = 0; 1093 1094 /* 1095 * check params 1096 */ 1097 1098 if (size == 0) 1099 return(0); 1100 if (foff & PAGE_MASK) 1101 return(EINVAL); 1102 if ((prot & maxprot) != prot) 1103 return(EINVAL); 1104 1105 /* 1106 * for non-fixed mappings, round off the suggested address. 1107 * for fixed mappings, check alignment and zap old mappings. 1108 */ 1109 1110 if ((flags & MAP_FIXED) == 0) { 1111 *addr = round_page(*addr); /* round */ 1112 } else { 1113 1114 if (*addr & PAGE_MASK) 1115 return(EINVAL); 1116 uvmflag |= UVM_FLAG_FIXED; 1117 (void) uvm_unmap(map, *addr, *addr + size); /* zap! */ 1118 } 1119 1120 /* 1121 * handle anon vs. non-anon mappings. for non-anon mappings attach 1122 * to underlying vm object. 1123 */ 1124 1125 if (flags & MAP_ANON) { 1126 foff = UVM_UNKNOWN_OFFSET; 1127 uobj = NULL; 1128 if ((flags & MAP_SHARED) == 0) 1129 /* XXX: defer amap create */ 1130 uvmflag |= UVM_FLAG_COPYONW; 1131 else 1132 /* shared: create amap now */ 1133 uvmflag |= UVM_FLAG_OVERLAY; 1134 1135 } else { 1136 1137 vp = (struct vnode *) handle; /* get vnode */ 1138 if (vp->v_type != VCHR) { 1139 uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ? 1140 maxprot : (maxprot & ~VM_PROT_WRITE)); 1141 1142 /* XXX for now, attach doesn't gain a ref */ 1143 VREF(vp); 1144 } else { 1145 uobj = udv_attach((void *) &vp->v_rdev, 1146 (flags & MAP_SHARED) ? maxprot : 1147 (maxprot & ~VM_PROT_WRITE), foff, size); 1148 /* 1149 * XXX Some devices don't like to be mapped with 1150 * XXX PROT_EXEC, but we don't really have a 1151 * XXX better way of handling this, right now 1152 */ 1153 if (uobj == NULL && (prot & PROT_EXEC) == 0) { 1154 maxprot &= ~VM_PROT_EXECUTE; 1155 uobj = udv_attach((void *) &vp->v_rdev, 1156 (flags & MAP_SHARED) ? maxprot : 1157 (maxprot & ~VM_PROT_WRITE), foff, size); 1158 } 1159 advice = UVM_ADV_RANDOM; 1160 } 1161 1162 if (uobj == NULL) 1163 return((vp->v_type == VREG) ? ENOMEM : EINVAL); 1164 1165 if ((flags & MAP_SHARED) == 0) 1166 uvmflag |= UVM_FLAG_COPYONW; 1167 } 1168 1169 /* 1170 * set up mapping flags 1171 */ 1172 1173 uvmflag = UVM_MAPFLAG(prot, maxprot, 1174 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, 1175 advice, uvmflag); 1176 1177 /* 1178 * do it! 1179 */ 1180 1181 retval = uvm_map(map, addr, size, uobj, foff, 0, uvmflag); 1182 1183 if (retval == KERN_SUCCESS) { 1184 /* 1185 * POSIX 1003.1b -- if our address space was configured 1186 * to lock all future mappings, wire the one we just made. 1187 */ 1188 if (prot == VM_PROT_NONE) { 1189 /* 1190 * No more work to do in this case. 1191 */ 1192 return (0); 1193 } 1194 1195 vm_map_lock(map); 1196 1197 if (map->flags & VM_MAP_WIREFUTURE) { 1198 if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax 1199 #ifdef pmap_wired_count 1200 || (locklimit != 0 && (size + 1201 ptoa(pmap_wired_count(vm_map_pmap(map)))) > 1202 locklimit) 1203 #endif 1204 ) { 1205 retval = KERN_RESOURCE_SHORTAGE; 1206 vm_map_unlock(map); 1207 /* unmap the region! */ 1208 (void) uvm_unmap(map, *addr, *addr + size); 1209 goto bad; 1210 } 1211 /* 1212 * uvm_map_pageable() always returns the map 1213 * unlocked. 1214 */ 1215 retval = uvm_map_pageable(map, *addr, *addr + size, 1216 FALSE, UVM_LK_ENTER); 1217 if (retval != KERN_SUCCESS) { 1218 /* unmap the region! */ 1219 (void) uvm_unmap(map, *addr, *addr + size); 1220 goto bad; 1221 } 1222 return (0); 1223 } 1224 1225 vm_map_unlock(map); 1226 1227 return (0); 1228 } 1229 1230 /* 1231 * errors: first detach from the uobj, if any. 1232 */ 1233 1234 if (uobj) 1235 uobj->pgops->pgo_detach(uobj); 1236 1237 bad: 1238 switch (retval) { 1239 case KERN_INVALID_ADDRESS: 1240 case KERN_NO_SPACE: 1241 return(ENOMEM); 1242 case KERN_RESOURCE_SHORTAGE: 1243 return (EAGAIN); 1244 case KERN_PROTECTION_FAILURE: 1245 return(EACCES); 1246 } 1247 return(EINVAL); 1248 } 1249