1 /* $OpenBSD: uvm_mmap.c,v 1.87 2011/07/09 05:31:26 matthew Exp $ */ 2 /* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * Copyright (c) 1991, 1993 The Regents of the University of California. 7 * Copyright (c) 1988 University of Utah. 8 * 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the Charles D. Cranor, 26 * Washington University, University of California, Berkeley and 27 * its contributors. 28 * 4. Neither the name of the University nor the names of its contributors 29 * may be used to endorse or promote products derived from this software 30 * without specific prior written permission. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 42 * SUCH DAMAGE. 43 * 44 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 45 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94 46 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp 47 */ 48 49 /* 50 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap 51 * function. 52 */ 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/file.h> 56 #include <sys/filedesc.h> 57 #include <sys/resourcevar.h> 58 #include <sys/mman.h> 59 #include <sys/mount.h> 60 #include <sys/proc.h> 61 #include <sys/malloc.h> 62 #include <sys/vnode.h> 63 #include <sys/conf.h> 64 #include <sys/stat.h> 65 #include <sys/specdev.h> 66 67 #include <machine/exec.h> /* for __LDPGSZ */ 68 69 #include <sys/syscallargs.h> 70 71 #include <uvm/uvm.h> 72 #include <uvm/uvm_device.h> 73 #include <uvm/uvm_vnode.h> 74 75 /* 76 * Page align addr and size, returning EINVAL on wraparound. 77 */ 78 #define ALIGN_ADDR(addr, size, pageoff) do { \ 79 pageoff = (addr & PAGE_MASK); \ 80 if (pageoff != 0) { \ 81 if (size > SIZE_MAX - pageoff) \ 82 return (EINVAL); /* wraparound */ \ 83 addr -= pageoff; \ 84 size += pageoff; \ 85 } \ 86 if (size != 0) { \ 87 size = (vsize_t)round_page(size); \ 88 if (size == 0) \ 89 return (EINVAL); /* wraparound */ \ 90 } \ 91 } while (0) 92 93 /* 94 * sys_mquery: provide mapping hints to applications that do fixed mappings 95 * 96 * flags: 0 or MAP_FIXED (MAP_FIXED - means that we insist on this addr and 97 * don't care about PMAP_PREFER or such) 98 * addr: hint where we'd like to place the mapping. 99 * size: size of the mapping 100 * fd: fd of the file we want to map 101 * off: offset within the file 102 */ 103 104 int 105 sys_mquery(struct proc *p, void *v, register_t *retval) 106 { 107 struct sys_mquery_args /* { 108 syscallarg(void *) addr; 109 syscallarg(size_t) len; 110 syscallarg(int) prot; 111 syscallarg(int) flags; 112 syscallarg(int) fd; 113 syscallarg(long) pad; 114 syscallarg(off_t) pos; 115 } */ *uap = v; 116 struct file *fp; 117 struct uvm_object *uobj; 118 voff_t uoff; 119 int error; 120 vaddr_t vaddr; 121 int flags = 0; 122 vsize_t size; 123 vm_prot_t prot; 124 int fd; 125 126 vaddr = (vaddr_t) SCARG(uap, addr); 127 prot = SCARG(uap, prot); 128 size = (vsize_t) SCARG(uap, len); 129 fd = SCARG(uap, fd); 130 131 if ((prot & VM_PROT_ALL) != prot) 132 return (EINVAL); 133 134 if (SCARG(uap, flags) & MAP_FIXED) 135 flags |= UVM_FLAG_FIXED; 136 137 if (fd >= 0) { 138 if ((error = getvnode(p->p_fd, fd, &fp)) != 0) 139 return (error); 140 uobj = &((struct vnode *)fp->f_data)->v_uvm.u_obj; 141 uoff = SCARG(uap, pos); 142 } else { 143 fp = NULL; 144 uobj = NULL; 145 uoff = 0; 146 } 147 148 if (vaddr == 0) 149 vaddr = uvm_map_hint(p, prot); 150 151 /* prevent a user requested address from falling in heap space */ 152 if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr) && 153 (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ)) { 154 if (flags & UVM_FLAG_FIXED) { 155 error = EINVAL; 156 goto done; 157 } 158 vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ); 159 } 160 vm_map_lock(&p->p_vmspace->vm_map); 161 162 again: 163 if (uvm_map_findspace(&p->p_vmspace->vm_map, vaddr, size, 164 &vaddr, uobj, uoff, 0, flags) == NULL) { 165 if (flags & UVM_FLAG_FIXED) 166 error = EINVAL; 167 else 168 error = ENOMEM; 169 } else { 170 /* prevent a returned address from falling in heap space */ 171 if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr) 172 && (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ)) { 173 vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr + 174 BRKSIZ); 175 goto again; 176 } 177 error = 0; 178 *retval = (register_t)(vaddr); 179 } 180 vm_map_unlock(&p->p_vmspace->vm_map); 181 done: 182 if (fp != NULL) 183 FRELE(fp); 184 return (error); 185 } 186 187 /* 188 * sys_mincore: determine if pages are in core or not. 189 */ 190 191 /* ARGSUSED */ 192 int 193 sys_mincore(struct proc *p, void *v, register_t *retval) 194 { 195 struct sys_mincore_args /* { 196 syscallarg(void *) addr; 197 syscallarg(size_t) len; 198 syscallarg(char *) vec; 199 } */ *uap = v; 200 vm_page_t m; 201 char *vec, *pgi, *pgs; 202 struct uvm_object *uobj; 203 struct vm_amap *amap; 204 struct vm_anon *anon; 205 vm_map_entry_t entry; 206 vaddr_t start, end, lim; 207 vm_map_t map; 208 vsize_t len, npgs; 209 int error = 0; 210 211 map = &p->p_vmspace->vm_map; 212 213 start = (vaddr_t)SCARG(uap, addr); 214 len = SCARG(uap, len); 215 vec = SCARG(uap, vec); 216 217 if (start & PAGE_MASK) 218 return (EINVAL); 219 len = round_page(len); 220 end = start + len; 221 if (end <= start) 222 return (EINVAL); 223 224 npgs = len >> PAGE_SHIFT; 225 226 /* 227 * < art> Anyone trying to mincore more than 4GB of address space is 228 * clearly insane. 229 */ 230 if (npgs >= (0xffffffff >> PAGE_SHIFT)) 231 return (E2BIG); 232 pgs = malloc(sizeof(*pgs) * npgs, M_TEMP, M_WAITOK | M_CANFAIL); 233 if (pgs == NULL) 234 return (ENOMEM); 235 pgi = pgs; 236 237 /* 238 * Lock down vec, so our returned status isn't outdated by 239 * storing the status byte for a page. 240 */ 241 if ((error = uvm_vslock(p, vec, npgs, VM_PROT_WRITE)) != 0) { 242 free(pgs, M_TEMP); 243 return (error); 244 } 245 246 vm_map_lock_read(map); 247 248 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { 249 error = ENOMEM; 250 goto out; 251 } 252 253 for (/* nothing */; 254 entry != &map->header && entry->start < end; 255 entry = entry->next) { 256 KASSERT(!UVM_ET_ISSUBMAP(entry)); 257 KASSERT(start >= entry->start); 258 259 /* Make sure there are no holes. */ 260 if (entry->end < end && 261 (entry->next == &map->header || 262 entry->next->start > entry->end)) { 263 error = ENOMEM; 264 goto out; 265 } 266 267 lim = end < entry->end ? end : entry->end; 268 269 /* 270 * Special case for objects with no "real" pages. Those 271 * are always considered resident (mapped devices). 272 */ 273 if (UVM_ET_ISOBJ(entry)) { 274 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); 275 if (entry->object.uvm_obj->pgops->pgo_fault != NULL) { 276 for (/* nothing */; start < lim; 277 start += PAGE_SIZE, pgi++) 278 *pgi = 1; 279 continue; 280 } 281 } 282 283 amap = entry->aref.ar_amap; /* top layer */ 284 uobj = entry->object.uvm_obj; /* bottom layer */ 285 286 if (uobj != NULL) 287 simple_lock(&uobj->vmobjlock); 288 289 for (/* nothing */; start < lim; start += PAGE_SIZE, pgi++) { 290 *pgi = 0; 291 if (amap != NULL) { 292 /* Check the top layer first. */ 293 anon = amap_lookup(&entry->aref, 294 start - entry->start); 295 /* Don't need to lock anon here. */ 296 if (anon != NULL && anon->an_page != NULL) { 297 /* 298 * Anon has the page for this entry 299 * offset. 300 */ 301 *pgi = 1; 302 } 303 } 304 305 if (uobj != NULL && *pgi == 0) { 306 /* Check the bottom layer. */ 307 m = uvm_pagelookup(uobj, 308 entry->offset + (start - entry->start)); 309 if (m != NULL) { 310 /* 311 * Object has the page for this entry 312 * offset. 313 */ 314 *pgi = 1; 315 } 316 } 317 } 318 319 if (uobj != NULL) 320 simple_unlock(&uobj->vmobjlock); 321 } 322 323 out: 324 vm_map_unlock_read(map); 325 uvm_vsunlock(p, SCARG(uap, vec), npgs); 326 /* now the map is unlocked we can copyout without fear. */ 327 if (error == 0) 328 copyout(pgs, vec, npgs * sizeof(char)); 329 free(pgs, M_TEMP); 330 return (error); 331 } 332 333 /* 334 * sys_mmap: mmap system call. 335 * 336 * => file offset and address may not be page aligned 337 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE 338 * - if address isn't page aligned the mapping starts at trunc_page(addr) 339 * and the return value is adjusted up by the page offset. 340 */ 341 342 int 343 sys_mmap(struct proc *p, void *v, register_t *retval) 344 { 345 struct sys_mmap_args /* { 346 syscallarg(void *) addr; 347 syscallarg(size_t) len; 348 syscallarg(int) prot; 349 syscallarg(int) flags; 350 syscallarg(int) fd; 351 syscallarg(long) pad; 352 syscallarg(off_t) pos; 353 } */ *uap = v; 354 vaddr_t addr; 355 struct vattr va; 356 off_t pos; 357 vsize_t size, pageoff; 358 vm_prot_t prot, maxprot; 359 int flags, fd; 360 vaddr_t vm_min_address = VM_MIN_ADDRESS; 361 struct filedesc *fdp = p->p_fd; 362 struct file *fp = NULL; 363 struct vnode *vp; 364 caddr_t handle; 365 int error; 366 367 /* 368 * first, extract syscall args from the uap. 369 */ 370 371 addr = (vaddr_t) SCARG(uap, addr); 372 size = (vsize_t) SCARG(uap, len); 373 prot = SCARG(uap, prot); 374 flags = SCARG(uap, flags); 375 fd = SCARG(uap, fd); 376 pos = SCARG(uap, pos); 377 378 /* 379 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and 380 * validate the flags. 381 */ 382 if ((prot & VM_PROT_ALL) != prot) 383 return (EINVAL); 384 if ((flags & MAP_FLAGMASK) != flags) 385 return (EINVAL); 386 if (flags & MAP_COPY) 387 flags = (flags & ~MAP_COPY) | MAP_PRIVATE; 388 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE)) 389 return (EINVAL); 390 391 /* 392 * align file position and save offset. adjust size. 393 */ 394 ALIGN_ADDR(pos, size, pageoff); 395 396 /* 397 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 398 */ 399 400 if (flags & MAP_FIXED) { 401 402 /* adjust address by the same amount as we did the offset */ 403 addr -= pageoff; 404 if (addr & PAGE_MASK) 405 return (EINVAL); /* not page aligned */ 406 407 if (addr > SIZE_MAX - size) 408 return (EINVAL); /* no wrapping! */ 409 if (VM_MAXUSER_ADDRESS > 0 && 410 (addr + size) > VM_MAXUSER_ADDRESS) 411 return (EINVAL); 412 if (vm_min_address > 0 && addr < vm_min_address) 413 return (EINVAL); 414 415 } else { 416 417 /* 418 * not fixed: make sure we skip over the largest possible heap. 419 * we will refine our guess later (e.g. to account for VAC, etc) 420 */ 421 if (addr == 0) 422 addr = uvm_map_hint(p, prot); 423 else if (!(flags & MAP_TRYFIXED) && 424 addr < (vaddr_t)p->p_vmspace->vm_daddr) 425 addr = uvm_map_hint(p, prot); 426 } 427 428 /* 429 * check for file mappings (i.e. not anonymous) and verify file. 430 */ 431 if ((flags & MAP_ANON) == 0) { 432 433 if ((fp = fd_getfile(fdp, fd)) == NULL) 434 return (EBADF); 435 436 FREF(fp); 437 438 if (fp->f_type != DTYPE_VNODE) { 439 error = ENODEV; /* only mmap vnodes! */ 440 goto out; 441 } 442 vp = (struct vnode *)fp->f_data; /* convert to vnode */ 443 444 if (vp->v_type != VREG && vp->v_type != VCHR && 445 vp->v_type != VBLK) { 446 error = ENODEV; /* only REG/CHR/BLK support mmap */ 447 goto out; 448 } 449 450 if (vp->v_type == VREG && (pos + size) < pos) { 451 error = EINVAL; /* no offset wrapping */ 452 goto out; 453 } 454 455 /* special case: catch SunOS style /dev/zero */ 456 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 457 flags |= MAP_ANON; 458 FRELE(fp); 459 fp = NULL; 460 goto is_anon; 461 } 462 463 /* 464 * Old programs may not select a specific sharing type, so 465 * default to an appropriate one. 466 * 467 * XXX: how does MAP_ANON fit in the picture? 468 */ 469 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) { 470 #if defined(DEBUG) 471 printf("WARNING: defaulted mmap() share type to " 472 "%s (pid %d comm %s)\n", vp->v_type == VCHR ? 473 "MAP_SHARED" : "MAP_PRIVATE", p->p_pid, 474 p->p_comm); 475 #endif 476 if (vp->v_type == VCHR) 477 flags |= MAP_SHARED; /* for a device */ 478 else 479 flags |= MAP_PRIVATE; /* for a file */ 480 } 481 482 /* 483 * MAP_PRIVATE device mappings don't make sense (and aren't 484 * supported anyway). However, some programs rely on this, 485 * so just change it to MAP_SHARED. 486 */ 487 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) { 488 flags = (flags & ~MAP_PRIVATE) | MAP_SHARED; 489 } 490 491 /* 492 * now check protection 493 */ 494 495 maxprot = VM_PROT_EXECUTE; 496 497 /* check read access */ 498 if (fp->f_flag & FREAD) 499 maxprot |= VM_PROT_READ; 500 else if (prot & PROT_READ) { 501 error = EACCES; 502 goto out; 503 } 504 505 /* check write access, shared case first */ 506 if (flags & MAP_SHARED) { 507 /* 508 * if the file is writable, only add PROT_WRITE to 509 * maxprot if the file is not immutable, append-only. 510 * otherwise, if we have asked for PROT_WRITE, return 511 * EPERM. 512 */ 513 if (fp->f_flag & FWRITE) { 514 if ((error = 515 VOP_GETATTR(vp, &va, p->p_ucred, p))) 516 goto out; 517 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0) 518 maxprot |= VM_PROT_WRITE; 519 else if (prot & PROT_WRITE) { 520 error = EPERM; 521 goto out; 522 } 523 } else if (prot & PROT_WRITE) { 524 error = EACCES; 525 goto out; 526 } 527 } else { 528 /* MAP_PRIVATE mappings can always write to */ 529 maxprot |= VM_PROT_WRITE; 530 } 531 532 /* 533 * set handle to vnode 534 */ 535 536 handle = (caddr_t)vp; 537 538 } else { /* MAP_ANON case */ 539 /* 540 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0? 541 */ 542 if (fd != -1) { 543 error = EINVAL; 544 goto out; 545 } 546 547 is_anon: /* label for SunOS style /dev/zero */ 548 handle = NULL; 549 maxprot = VM_PROT_ALL; 550 pos = 0; 551 } 552 553 if ((flags & MAP_ANON) != 0 || 554 ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) { 555 if (size > 556 (p->p_rlimit[RLIMIT_DATA].rlim_cur - ptoa(p->p_vmspace->vm_dused))) { 557 error = ENOMEM; 558 goto out; 559 } 560 } 561 562 /* 563 * now let kernel internal function uvm_mmap do the work. 564 */ 565 566 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 567 flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p); 568 if (error == ENOMEM && !(flags & (MAP_FIXED | MAP_TRYFIXED))) { 569 /* once more, with feeling */ 570 addr = uvm_map_hint1(p, prot, 0); 571 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, 572 maxprot, flags, handle, pos, 573 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p); 574 } 575 576 if (error == 0) 577 /* remember to add offset */ 578 *retval = (register_t)(addr + pageoff); 579 580 out: 581 if (fp) 582 FRELE(fp); 583 return (error); 584 } 585 586 /* 587 * sys_msync: the msync system call (a front-end for flush) 588 */ 589 590 int 591 sys_msync(struct proc *p, void *v, register_t *retval) 592 { 593 struct sys_msync_args /* { 594 syscallarg(void *) addr; 595 syscallarg(size_t) len; 596 syscallarg(int) flags; 597 } */ *uap = v; 598 vaddr_t addr; 599 vsize_t size, pageoff; 600 vm_map_t map; 601 int flags, uvmflags; 602 603 /* 604 * extract syscall args from the uap 605 */ 606 607 addr = (vaddr_t)SCARG(uap, addr); 608 size = (vsize_t)SCARG(uap, len); 609 flags = SCARG(uap, flags); 610 611 /* sanity check flags */ 612 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || 613 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || 614 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) 615 return (EINVAL); 616 if ((flags & (MS_ASYNC | MS_SYNC)) == 0) 617 flags |= MS_SYNC; 618 619 /* 620 * align the address to a page boundary, and adjust the size accordingly 621 */ 622 ALIGN_ADDR(addr, size, pageoff); 623 if (addr > SIZE_MAX - size) 624 return (EINVAL); /* disallow wrap-around. */ 625 626 /* 627 * get map 628 */ 629 630 map = &p->p_vmspace->vm_map; 631 632 /* 633 * translate MS_ flags into PGO_ flags 634 */ 635 uvmflags = PGO_CLEANIT; 636 if (flags & MS_INVALIDATE) 637 uvmflags |= PGO_FREE; 638 if (flags & MS_SYNC) 639 uvmflags |= PGO_SYNCIO; 640 else 641 uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */ 642 643 return (uvm_map_clean(map, addr, addr+size, uvmflags)); 644 } 645 646 /* 647 * sys_munmap: unmap a users memory 648 */ 649 650 int 651 sys_munmap(struct proc *p, void *v, register_t *retval) 652 { 653 struct sys_munmap_args /* { 654 syscallarg(void *) addr; 655 syscallarg(size_t) len; 656 } */ *uap = v; 657 vaddr_t addr; 658 vsize_t size, pageoff; 659 vm_map_t map; 660 vaddr_t vm_min_address = VM_MIN_ADDRESS; 661 struct vm_map_entry *dead_entries; 662 663 /* 664 * get syscall args... 665 */ 666 667 addr = (vaddr_t) SCARG(uap, addr); 668 size = (vsize_t) SCARG(uap, len); 669 670 /* 671 * align the address to a page boundary, and adjust the size accordingly 672 */ 673 ALIGN_ADDR(addr, size, pageoff); 674 675 /* 676 * Check for illegal addresses. Watch out for address wrap... 677 * Note that VM_*_ADDRESS are not constants due to casts (argh). 678 */ 679 if (addr > SIZE_MAX - size) 680 return (EINVAL); 681 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 682 return (EINVAL); 683 if (vm_min_address > 0 && addr < vm_min_address) 684 return (EINVAL); 685 map = &p->p_vmspace->vm_map; 686 687 688 vm_map_lock(map); /* lock map so we can checkprot */ 689 690 /* 691 * interesting system call semantic: make sure entire range is 692 * allocated before allowing an unmap. 693 */ 694 695 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) { 696 vm_map_unlock(map); 697 return (EINVAL); 698 } 699 700 /* 701 * doit! 702 */ 703 uvm_unmap_remove(map, addr, addr + size, &dead_entries, p, FALSE); 704 705 vm_map_unlock(map); /* and unlock */ 706 707 if (dead_entries != NULL) 708 uvm_unmap_detach(dead_entries, 0); 709 710 return (0); 711 } 712 713 /* 714 * sys_mprotect: the mprotect system call 715 */ 716 717 int 718 sys_mprotect(struct proc *p, void *v, register_t *retval) 719 { 720 struct sys_mprotect_args /* { 721 syscallarg(void *) addr; 722 syscallarg(size_t) len; 723 syscallarg(int) prot; 724 } */ *uap = v; 725 vaddr_t addr; 726 vsize_t size, pageoff; 727 vm_prot_t prot; 728 729 /* 730 * extract syscall args from uap 731 */ 732 733 addr = (vaddr_t)SCARG(uap, addr); 734 size = (vsize_t)SCARG(uap, len); 735 prot = SCARG(uap, prot); 736 737 if ((prot & VM_PROT_ALL) != prot) 738 return (EINVAL); 739 740 /* 741 * align the address to a page boundary, and adjust the size accordingly 742 */ 743 ALIGN_ADDR(addr, size, pageoff); 744 if (addr > SIZE_MAX - size) 745 return (EINVAL); /* disallow wrap-around. */ 746 747 return (uvm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, 748 prot, FALSE)); 749 } 750 751 /* 752 * sys_minherit: the minherit system call 753 */ 754 755 int 756 sys_minherit(struct proc *p, void *v, register_t *retval) 757 { 758 struct sys_minherit_args /* { 759 syscallarg(void *) addr; 760 syscallarg(size_t) len; 761 syscallarg(int) inherit; 762 } */ *uap = v; 763 vaddr_t addr; 764 vsize_t size, pageoff; 765 vm_inherit_t inherit; 766 767 addr = (vaddr_t)SCARG(uap, addr); 768 size = (vsize_t)SCARG(uap, len); 769 inherit = SCARG(uap, inherit); 770 771 /* 772 * align the address to a page boundary, and adjust the size accordingly 773 */ 774 ALIGN_ADDR(addr, size, pageoff); 775 if (addr > SIZE_MAX - size) 776 return (EINVAL); /* disallow wrap-around. */ 777 778 return (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, 779 inherit)); 780 } 781 782 /* 783 * sys_madvise: give advice about memory usage. 784 */ 785 786 /* ARGSUSED */ 787 int 788 sys_madvise(struct proc *p, void *v, register_t *retval) 789 { 790 struct sys_madvise_args /* { 791 syscallarg(void *) addr; 792 syscallarg(size_t) len; 793 syscallarg(int) behav; 794 } */ *uap = v; 795 vaddr_t addr; 796 vsize_t size, pageoff; 797 int advice, error; 798 799 addr = (vaddr_t)SCARG(uap, addr); 800 size = (vsize_t)SCARG(uap, len); 801 advice = SCARG(uap, behav); 802 803 /* 804 * align the address to a page boundary, and adjust the size accordingly 805 */ 806 ALIGN_ADDR(addr, size, pageoff); 807 if (addr > SIZE_MAX - size) 808 return (EINVAL); /* disallow wrap-around. */ 809 810 switch (advice) { 811 case MADV_NORMAL: 812 case MADV_RANDOM: 813 case MADV_SEQUENTIAL: 814 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, 815 addr + size, advice); 816 break; 817 818 case MADV_WILLNEED: 819 /* 820 * Activate all these pages, pre-faulting them in if 821 * necessary. 822 */ 823 /* 824 * XXX IMPLEMENT ME. 825 * Should invent a "weak" mode for uvm_fault() 826 * which would only do the PGO_LOCKED pgo_get(). 827 */ 828 return (0); 829 830 case MADV_DONTNEED: 831 /* 832 * Deactivate all these pages. We don't need them 833 * any more. We don't, however, toss the data in 834 * the pages. 835 */ 836 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 837 PGO_DEACTIVATE); 838 break; 839 840 case MADV_FREE: 841 /* 842 * These pages contain no valid data, and may be 843 * garbage-collected. Toss all resources, including 844 * any swap space in use. 845 */ 846 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 847 PGO_FREE); 848 break; 849 850 case MADV_SPACEAVAIL: 851 /* 852 * XXXMRG What is this? I think it's: 853 * 854 * Ensure that we have allocated backing-store 855 * for these pages. 856 * 857 * This is going to require changes to the page daemon, 858 * as it will free swap space allocated to pages in core. 859 * There's also what to do for device/file/anonymous memory. 860 */ 861 return (EINVAL); 862 863 default: 864 return (EINVAL); 865 } 866 867 return (error); 868 } 869 870 /* 871 * sys_mlock: memory lock 872 */ 873 874 int 875 sys_mlock(struct proc *p, void *v, register_t *retval) 876 { 877 struct sys_mlock_args /* { 878 syscallarg(const void *) addr; 879 syscallarg(size_t) len; 880 } */ *uap = v; 881 vaddr_t addr; 882 vsize_t size, pageoff; 883 int error; 884 885 /* 886 * extract syscall args from uap 887 */ 888 addr = (vaddr_t)SCARG(uap, addr); 889 size = (vsize_t)SCARG(uap, len); 890 891 /* 892 * align the address to a page boundary and adjust the size accordingly 893 */ 894 ALIGN_ADDR(addr, size, pageoff); 895 if (addr > SIZE_MAX - size) 896 return (EINVAL); /* disallow wrap-around. */ 897 898 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) 899 return (EAGAIN); 900 901 #ifdef pmap_wired_count 902 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 903 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 904 return (EAGAIN); 905 #else 906 if ((error = suser(p, 0)) != 0) 907 return (error); 908 #endif 909 910 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE, 911 0); 912 return (error == 0 ? 0 : ENOMEM); 913 } 914 915 /* 916 * sys_munlock: unlock wired pages 917 */ 918 919 int 920 sys_munlock(struct proc *p, void *v, register_t *retval) 921 { 922 struct sys_munlock_args /* { 923 syscallarg(const void *) addr; 924 syscallarg(size_t) len; 925 } */ *uap = v; 926 vaddr_t addr; 927 vsize_t size, pageoff; 928 int error; 929 930 /* 931 * extract syscall args from uap 932 */ 933 934 addr = (vaddr_t)SCARG(uap, addr); 935 size = (vsize_t)SCARG(uap, len); 936 937 /* 938 * align the address to a page boundary, and adjust the size accordingly 939 */ 940 ALIGN_ADDR(addr, size, pageoff); 941 if (addr > SIZE_MAX - size) 942 return (EINVAL); /* disallow wrap-around. */ 943 944 #ifndef pmap_wired_count 945 if ((error = suser(p, 0)) != 0) 946 return (error); 947 #endif 948 949 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE, 950 0); 951 return (error == 0 ? 0 : ENOMEM); 952 } 953 954 /* 955 * sys_mlockall: lock all pages mapped into an address space. 956 */ 957 958 int 959 sys_mlockall(struct proc *p, void *v, register_t *retval) 960 { 961 struct sys_mlockall_args /* { 962 syscallarg(int) flags; 963 } */ *uap = v; 964 int error, flags; 965 966 flags = SCARG(uap, flags); 967 968 if (flags == 0 || 969 (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0) 970 return (EINVAL); 971 972 #ifndef pmap_wired_count 973 if ((error = suser(p, 0)) != 0) 974 return (error); 975 #endif 976 977 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, 978 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 979 if (error != 0 && error != ENOMEM) 980 return (EAGAIN); 981 return (error); 982 } 983 984 /* 985 * sys_munlockall: unlock all pages mapped into an address space. 986 */ 987 988 int 989 sys_munlockall(struct proc *p, void *v, register_t *retval) 990 { 991 992 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0); 993 return (0); 994 } 995 996 /* 997 * uvm_mmap: internal version of mmap 998 * 999 * - used by sys_mmap, exec, and sysv shm 1000 * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true, 1001 * sysv shm uses "named anonymous memory") 1002 * - caller must page-align the file offset 1003 */ 1004 1005 int 1006 uvm_mmap(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 1007 vm_prot_t maxprot, int flags, caddr_t handle, voff_t foff, 1008 vsize_t locklimit, struct proc *p) 1009 { 1010 struct uvm_object *uobj; 1011 struct vnode *vp; 1012 int error; 1013 int advice = UVM_ADV_NORMAL; 1014 uvm_flag_t uvmflag = 0; 1015 vsize_t align = 0; /* userland page size */ 1016 1017 /* 1018 * check params 1019 */ 1020 1021 if (size == 0) 1022 return(0); 1023 if (foff & PAGE_MASK) 1024 return(EINVAL); 1025 if ((prot & maxprot) != prot) 1026 return(EINVAL); 1027 1028 /* 1029 * for non-fixed mappings, round off the suggested address. 1030 * for fixed mappings, check alignment and zap old mappings. 1031 */ 1032 1033 if ((flags & MAP_FIXED) == 0) { 1034 *addr = round_page(*addr); /* round */ 1035 } else { 1036 if (*addr & PAGE_MASK) 1037 return(EINVAL); 1038 uvmflag |= UVM_FLAG_FIXED; 1039 uvm_unmap_p(map, *addr, *addr + size, p); /* zap! */ 1040 } 1041 1042 /* 1043 * handle anon vs. non-anon mappings. for non-anon mappings attach 1044 * to underlying vm object. 1045 */ 1046 1047 if (flags & MAP_ANON) { 1048 if ((flags & MAP_FIXED) == 0 && size >= __LDPGSZ) 1049 align = __LDPGSZ; 1050 foff = UVM_UNKNOWN_OFFSET; 1051 uobj = NULL; 1052 if ((flags & MAP_SHARED) == 0) 1053 /* XXX: defer amap create */ 1054 uvmflag |= UVM_FLAG_COPYONW; 1055 else 1056 /* shared: create amap now */ 1057 uvmflag |= UVM_FLAG_OVERLAY; 1058 1059 } else { 1060 1061 vp = (struct vnode *) handle; /* get vnode */ 1062 if (vp->v_type != VCHR) { 1063 uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ? 1064 maxprot : (maxprot & ~VM_PROT_WRITE)); 1065 1066 #ifndef UBC 1067 /* 1068 * XXXCDC: hack from old code 1069 * don't allow vnodes which have been mapped 1070 * shared-writeable to persist [forces them to be 1071 * flushed out when last reference goes]. 1072 * XXXCDC: interesting side effect: avoids a bug. 1073 * note that in WRITE [ufs_readwrite.c] that we 1074 * allocate buffer, uncache, and then do the write. 1075 * the problem with this is that if the uncache causes 1076 * VM data to be flushed to the same area of the file 1077 * we are writing to... in that case we've got the 1078 * buffer locked and our process goes to sleep forever. 1079 * 1080 * XXXCDC: checking maxprot protects us from the 1081 * "persistbug" program but this is not a long term 1082 * solution. 1083 * 1084 * XXXCDC: we don't bother calling uncache with the vp 1085 * VOP_LOCKed since we know that we are already 1086 * holding a valid reference to the uvn (from the 1087 * uvn_attach above), and thus it is impossible for 1088 * the uncache to kill the uvn and trigger I/O. 1089 */ 1090 if (flags & MAP_SHARED) { 1091 if ((prot & VM_PROT_WRITE) || 1092 (maxprot & VM_PROT_WRITE)) { 1093 uvm_vnp_uncache(vp); 1094 } 1095 } 1096 #else 1097 /* XXX for now, attach doesn't gain a ref */ 1098 vref(vp); 1099 #endif 1100 } else { 1101 uobj = udv_attach((void *) &vp->v_rdev, 1102 (flags & MAP_SHARED) ? maxprot : 1103 (maxprot & ~VM_PROT_WRITE), foff, size); 1104 /* 1105 * XXX Some devices don't like to be mapped with 1106 * XXX PROT_EXEC, but we don't really have a 1107 * XXX better way of handling this, right now 1108 */ 1109 if (uobj == NULL && (prot & PROT_EXEC) == 0) { 1110 maxprot &= ~VM_PROT_EXECUTE; 1111 uobj = udv_attach((void *) &vp->v_rdev, 1112 (flags & MAP_SHARED) ? maxprot : 1113 (maxprot & ~VM_PROT_WRITE), foff, size); 1114 } 1115 advice = UVM_ADV_RANDOM; 1116 } 1117 1118 if (uobj == NULL) 1119 return((vp->v_type == VREG) ? ENOMEM : EINVAL); 1120 1121 if ((flags & MAP_SHARED) == 0) 1122 uvmflag |= UVM_FLAG_COPYONW; 1123 } 1124 1125 /* 1126 * set up mapping flags 1127 */ 1128 1129 uvmflag = UVM_MAPFLAG(prot, maxprot, 1130 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, 1131 advice, uvmflag); 1132 1133 error = uvm_map_p(map, addr, size, uobj, foff, align, uvmflag, p); 1134 1135 if (error == 0) { 1136 /* 1137 * POSIX 1003.1b -- if our address space was configured 1138 * to lock all future mappings, wire the one we just made. 1139 */ 1140 if (prot == VM_PROT_NONE) { 1141 /* 1142 * No more work to do in this case. 1143 */ 1144 return (0); 1145 } 1146 1147 vm_map_lock(map); 1148 1149 if (map->flags & VM_MAP_WIREFUTURE) { 1150 if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax 1151 #ifdef pmap_wired_count 1152 || (locklimit != 0 && (size + 1153 ptoa(pmap_wired_count(vm_map_pmap(map)))) > 1154 locklimit) 1155 #endif 1156 ) { 1157 error = ENOMEM; 1158 vm_map_unlock(map); 1159 /* unmap the region! */ 1160 uvm_unmap(map, *addr, *addr + size); 1161 goto bad; 1162 } 1163 /* 1164 * uvm_map_pageable() always returns the map 1165 * unlocked. 1166 */ 1167 error = uvm_map_pageable(map, *addr, *addr + size, 1168 FALSE, UVM_LK_ENTER); 1169 if (error != 0) { 1170 /* unmap the region! */ 1171 uvm_unmap(map, *addr, *addr + size); 1172 goto bad; 1173 } 1174 return (0); 1175 } 1176 1177 vm_map_unlock(map); 1178 1179 return (0); 1180 } 1181 1182 /* 1183 * errors: first detach from the uobj, if any. 1184 */ 1185 1186 if (uobj) 1187 uobj->pgops->pgo_detach(uobj); 1188 1189 bad: 1190 return (error); 1191 } 1192