1 /* $OpenBSD: uvm_mmap.c,v 1.98 2014/07/12 18:44:01 tedu Exp $ */ 2 /* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * Copyright (c) 1991, 1993 The Regents of the University of California. 7 * Copyright (c) 1988 University of Utah. 8 * 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the Charles D. Cranor, 26 * Washington University, University of California, Berkeley and 27 * its contributors. 28 * 4. Neither the name of the University nor the names of its contributors 29 * may be used to endorse or promote products derived from this software 30 * without specific prior written permission. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 42 * SUCH DAMAGE. 43 * 44 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 45 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94 46 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp 47 */ 48 49 /* 50 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap 51 * function. 52 */ 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/file.h> 56 #include <sys/filedesc.h> 57 #include <sys/resourcevar.h> 58 #include <sys/mman.h> 59 #include <sys/mount.h> 60 #include <sys/proc.h> 61 #include <sys/malloc.h> 62 #include <sys/vnode.h> 63 #include <sys/conf.h> 64 #include <sys/stat.h> 65 #include <sys/specdev.h> 66 67 #include <machine/exec.h> /* for __LDPGSZ */ 68 69 #include <sys/syscallargs.h> 70 71 #include <uvm/uvm.h> 72 #include <uvm/uvm_device.h> 73 #include <uvm/uvm_vnode.h> 74 75 /* 76 * Page align addr and size, returning EINVAL on wraparound. 77 */ 78 #define ALIGN_ADDR(addr, size, pageoff) do { \ 79 pageoff = (addr & PAGE_MASK); \ 80 if (pageoff != 0) { \ 81 if (size > SIZE_MAX - pageoff) \ 82 return (EINVAL); /* wraparound */ \ 83 addr -= pageoff; \ 84 size += pageoff; \ 85 } \ 86 if (size != 0) { \ 87 size = (vsize_t)round_page(size); \ 88 if (size == 0) \ 89 return (EINVAL); /* wraparound */ \ 90 } \ 91 } while (0) 92 93 /* 94 * sys_mquery: provide mapping hints to applications that do fixed mappings 95 * 96 * flags: 0 or MAP_FIXED (MAP_FIXED - means that we insist on this addr and 97 * don't care about PMAP_PREFER or such) 98 * addr: hint where we'd like to place the mapping. 99 * size: size of the mapping 100 * fd: fd of the file we want to map 101 * off: offset within the file 102 */ 103 int 104 sys_mquery(struct proc *p, void *v, register_t *retval) 105 { 106 struct sys_mquery_args /* { 107 syscallarg(void *) addr; 108 syscallarg(size_t) len; 109 syscallarg(int) prot; 110 syscallarg(int) flags; 111 syscallarg(int) fd; 112 syscallarg(long) pad; 113 syscallarg(off_t) pos; 114 } */ *uap = v; 115 struct file *fp; 116 struct uvm_object *uobj; 117 voff_t uoff; 118 int error; 119 vaddr_t vaddr; 120 int flags = 0; 121 vsize_t size; 122 vm_prot_t prot; 123 int fd; 124 125 vaddr = (vaddr_t) SCARG(uap, addr); 126 prot = SCARG(uap, prot); 127 size = (vsize_t) SCARG(uap, len); 128 fd = SCARG(uap, fd); 129 130 if ((prot & VM_PROT_ALL) != prot) 131 return (EINVAL); 132 133 if (SCARG(uap, flags) & MAP_FIXED) 134 flags |= UVM_FLAG_FIXED; 135 136 if (fd >= 0) { 137 if ((error = getvnode(p->p_fd, fd, &fp)) != 0) 138 return (error); 139 uobj = &((struct vnode *)fp->f_data)->v_uvm.u_obj; 140 uoff = SCARG(uap, pos); 141 } else { 142 fp = NULL; 143 uobj = NULL; 144 uoff = UVM_UNKNOWN_OFFSET; 145 } 146 147 if (vaddr == 0) 148 vaddr = uvm_map_hint(p->p_vmspace, prot); 149 150 error = uvm_map_mquery(&p->p_vmspace->vm_map, &vaddr, size, uoff, 151 flags); 152 if (error == 0) 153 *retval = (register_t)(vaddr); 154 155 if (fp != NULL) 156 FRELE(fp, p); 157 return (error); 158 } 159 160 /* 161 * sys_mincore: determine if pages are in core or not. 162 */ 163 /* ARGSUSED */ 164 int 165 sys_mincore(struct proc *p, void *v, register_t *retval) 166 { 167 struct sys_mincore_args /* { 168 syscallarg(void *) addr; 169 syscallarg(size_t) len; 170 syscallarg(char *) vec; 171 } */ *uap = v; 172 vm_page_t m; 173 char *vec, *pgi, *pgs; 174 struct uvm_object *uobj; 175 struct vm_amap *amap; 176 struct vm_anon *anon; 177 vm_map_entry_t entry, next; 178 vaddr_t start, end, lim; 179 vm_map_t map; 180 vsize_t len, npgs; 181 int error = 0; 182 183 map = &p->p_vmspace->vm_map; 184 185 start = (vaddr_t)SCARG(uap, addr); 186 len = SCARG(uap, len); 187 vec = SCARG(uap, vec); 188 189 if (start & PAGE_MASK) 190 return (EINVAL); 191 len = round_page(len); 192 end = start + len; 193 if (end <= start) 194 return (EINVAL); 195 196 npgs = len >> PAGE_SHIFT; 197 198 /* 199 * < art> Anyone trying to mincore more than 4GB of address space is 200 * clearly insane. 201 */ 202 if (npgs >= (0xffffffff >> PAGE_SHIFT)) 203 return (E2BIG); 204 pgs = malloc(sizeof(*pgs) * npgs, M_TEMP, M_WAITOK | M_CANFAIL); 205 if (pgs == NULL) 206 return (ENOMEM); 207 pgi = pgs; 208 209 /* 210 * Lock down vec, so our returned status isn't outdated by 211 * storing the status byte for a page. 212 */ 213 if ((error = uvm_vslock(p, vec, npgs, VM_PROT_WRITE)) != 0) { 214 free(pgs, M_TEMP, 0); 215 return (error); 216 } 217 218 vm_map_lock_read(map); 219 220 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { 221 error = ENOMEM; 222 goto out; 223 } 224 225 for (/* nothing */; 226 entry != NULL && entry->start < end; 227 entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) { 228 KASSERT(!UVM_ET_ISSUBMAP(entry)); 229 KASSERT(start >= entry->start); 230 231 /* Make sure there are no holes. */ 232 next = RB_NEXT(uvm_map_addr, &map->addr, entry); 233 if (entry->end < end && 234 (next == NULL || 235 next->start > entry->end)) { 236 error = ENOMEM; 237 goto out; 238 } 239 240 lim = end < entry->end ? end : entry->end; 241 242 /* 243 * Special case for objects with no "real" pages. Those 244 * are always considered resident (mapped devices). 245 */ 246 if (UVM_ET_ISOBJ(entry)) { 247 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); 248 if (entry->object.uvm_obj->pgops->pgo_fault != NULL) { 249 for (/* nothing */; start < lim; 250 start += PAGE_SIZE, pgi++) 251 *pgi = 1; 252 continue; 253 } 254 } 255 256 amap = entry->aref.ar_amap; /* top layer */ 257 uobj = entry->object.uvm_obj; /* bottom layer */ 258 259 for (/* nothing */; start < lim; start += PAGE_SIZE, pgi++) { 260 *pgi = 0; 261 if (amap != NULL) { 262 /* Check the top layer first. */ 263 anon = amap_lookup(&entry->aref, 264 start - entry->start); 265 if (anon != NULL && anon->an_page != NULL) { 266 /* 267 * Anon has the page for this entry 268 * offset. 269 */ 270 *pgi = 1; 271 } 272 } 273 274 if (uobj != NULL && *pgi == 0) { 275 /* Check the bottom layer. */ 276 m = uvm_pagelookup(uobj, 277 entry->offset + (start - entry->start)); 278 if (m != NULL) { 279 /* 280 * Object has the page for this entry 281 * offset. 282 */ 283 *pgi = 1; 284 } 285 } 286 } 287 } 288 289 out: 290 vm_map_unlock_read(map); 291 uvm_vsunlock(p, SCARG(uap, vec), npgs); 292 /* now the map is unlocked we can copyout without fear. */ 293 if (error == 0) 294 copyout(pgs, vec, npgs * sizeof(char)); 295 free(pgs, M_TEMP, 0); 296 return (error); 297 } 298 299 /* 300 * sys_mmap: mmap system call. 301 * 302 * => file offset and address may not be page aligned 303 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE 304 * - if address isn't page aligned the mapping starts at trunc_page(addr) 305 * and the return value is adjusted up by the page offset. 306 */ 307 int 308 sys_mmap(struct proc *p, void *v, register_t *retval) 309 { 310 struct sys_mmap_args /* { 311 syscallarg(void *) addr; 312 syscallarg(size_t) len; 313 syscallarg(int) prot; 314 syscallarg(int) flags; 315 syscallarg(int) fd; 316 syscallarg(long) pad; 317 syscallarg(off_t) pos; 318 } */ *uap = v; 319 vaddr_t addr; 320 struct vattr va; 321 off_t pos; 322 vsize_t size, pageoff; 323 vm_prot_t prot, maxprot; 324 int flags, fd; 325 vaddr_t vm_min_address = VM_MIN_ADDRESS; 326 struct filedesc *fdp = p->p_fd; 327 struct file *fp = NULL; 328 struct vnode *vp; 329 caddr_t handle; 330 int error; 331 332 /* first, extract syscall args from the uap. */ 333 addr = (vaddr_t) SCARG(uap, addr); 334 size = (vsize_t) SCARG(uap, len); 335 prot = SCARG(uap, prot); 336 flags = SCARG(uap, flags); 337 fd = SCARG(uap, fd); 338 pos = SCARG(uap, pos); 339 340 /* 341 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and 342 * validate the flags. 343 */ 344 if ((prot & VM_PROT_ALL) != prot) 345 return (EINVAL); 346 if ((flags & MAP_FLAGMASK) != flags) 347 return (EINVAL); 348 if (flags & MAP_OLDCOPY) 349 flags = (flags & MAP_OLDCOPY) | MAP_PRIVATE; 350 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE)) 351 return (EINVAL); 352 if ((flags & (MAP_FIXED|__MAP_NOREPLACE)) == __MAP_NOREPLACE) 353 return (EINVAL); 354 if (size == 0) 355 return (EINVAL); 356 357 /* align file position and save offset. adjust size. */ 358 ALIGN_ADDR(pos, size, pageoff); 359 360 /* now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" */ 361 if (flags & MAP_FIXED) { 362 /* adjust address by the same amount as we did the offset */ 363 addr -= pageoff; 364 if (addr & PAGE_MASK) 365 return (EINVAL); /* not page aligned */ 366 367 if (addr > SIZE_MAX - size) 368 return (EINVAL); /* no wrapping! */ 369 if (VM_MAXUSER_ADDRESS > 0 && 370 (addr + size) > VM_MAXUSER_ADDRESS) 371 return (EINVAL); 372 if (vm_min_address > 0 && addr < vm_min_address) 373 return (EINVAL); 374 375 } 376 377 /* check for file mappings (i.e. not anonymous) and verify file. */ 378 if ((flags & MAP_ANON) == 0) { 379 if ((fp = fd_getfile(fdp, fd)) == NULL) 380 return (EBADF); 381 382 FREF(fp); 383 384 if (fp->f_type != DTYPE_VNODE) { 385 error = ENODEV; /* only mmap vnodes! */ 386 goto out; 387 } 388 vp = (struct vnode *)fp->f_data; /* convert to vnode */ 389 390 if (vp->v_type != VREG && vp->v_type != VCHR && 391 vp->v_type != VBLK) { 392 error = ENODEV; /* only REG/CHR/BLK support mmap */ 393 goto out; 394 } 395 396 if (vp->v_type == VREG && (pos + size) < pos) { 397 error = EINVAL; /* no offset wrapping */ 398 goto out; 399 } 400 401 /* special case: catch SunOS style /dev/zero */ 402 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 403 flags |= MAP_ANON; 404 FRELE(fp, p); 405 fp = NULL; 406 goto is_anon; 407 } 408 409 /* 410 * Old programs may not select a specific sharing type, so 411 * default to an appropriate one. 412 * 413 * XXX: how does MAP_ANON fit in the picture? 414 */ 415 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) { 416 #if defined(DEBUG) 417 printf("WARNING: defaulted mmap() share type to " 418 "%s (pid %d comm %s)\n", vp->v_type == VCHR ? 419 "MAP_SHARED" : "MAP_PRIVATE", p->p_pid, 420 p->p_comm); 421 #endif 422 if (vp->v_type == VCHR) 423 flags |= MAP_SHARED; /* for a device */ 424 else 425 flags |= MAP_PRIVATE; /* for a file */ 426 } 427 428 /* 429 * MAP_PRIVATE device mappings don't make sense (and aren't 430 * supported anyway). However, some programs rely on this, 431 * so just change it to MAP_SHARED. 432 */ 433 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) { 434 flags = (flags & ~MAP_PRIVATE) | MAP_SHARED; 435 } 436 437 /* now check protection */ 438 maxprot = VM_PROT_EXECUTE; 439 440 /* check read access */ 441 if (fp->f_flag & FREAD) 442 maxprot |= VM_PROT_READ; 443 else if (prot & PROT_READ) { 444 error = EACCES; 445 goto out; 446 } 447 448 /* check write access, shared case first */ 449 if (flags & MAP_SHARED) { 450 /* 451 * if the file is writable, only add PROT_WRITE to 452 * maxprot if the file is not immutable, append-only. 453 * otherwise, if we have asked for PROT_WRITE, return 454 * EPERM. 455 */ 456 if (fp->f_flag & FWRITE) { 457 if ((error = 458 VOP_GETATTR(vp, &va, p->p_ucred, p))) 459 goto out; 460 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0) 461 maxprot |= VM_PROT_WRITE; 462 else if (prot & PROT_WRITE) { 463 error = EPERM; 464 goto out; 465 } 466 } else if (prot & PROT_WRITE) { 467 error = EACCES; 468 goto out; 469 } 470 } else { 471 /* MAP_PRIVATE mappings can always write to */ 472 maxprot |= VM_PROT_WRITE; 473 } 474 475 /* set handle to vnode */ 476 handle = (caddr_t)vp; 477 } else { /* MAP_ANON case */ 478 /* 479 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0? 480 */ 481 if (fd != -1) { 482 error = EINVAL; 483 goto out; 484 } 485 486 is_anon: /* label for SunOS style /dev/zero */ 487 handle = NULL; 488 maxprot = VM_PROT_ALL; 489 pos = 0; 490 } 491 492 if ((flags & MAP_ANON) != 0 || 493 ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) { 494 if (size > 495 (p->p_rlimit[RLIMIT_DATA].rlim_cur - ptoa(p->p_vmspace->vm_dused))) { 496 error = ENOMEM; 497 goto out; 498 } 499 } 500 501 /* now let kernel internal function uvm_mmap do the work. */ 502 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 503 flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p); 504 505 if (error == 0) 506 /* remember to add offset */ 507 *retval = (register_t)(addr + pageoff); 508 509 out: 510 if (fp) 511 FRELE(fp, p); 512 return (error); 513 } 514 515 /* 516 * sys_msync: the msync system call (a front-end for flush) 517 */ 518 519 int 520 sys_msync(struct proc *p, void *v, register_t *retval) 521 { 522 struct sys_msync_args /* { 523 syscallarg(void *) addr; 524 syscallarg(size_t) len; 525 syscallarg(int) flags; 526 } */ *uap = v; 527 vaddr_t addr; 528 vsize_t size, pageoff; 529 vm_map_t map; 530 int flags, uvmflags; 531 532 /* extract syscall args from the uap */ 533 addr = (vaddr_t)SCARG(uap, addr); 534 size = (vsize_t)SCARG(uap, len); 535 flags = SCARG(uap, flags); 536 537 /* sanity check flags */ 538 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 || 539 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 || 540 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)) 541 return (EINVAL); 542 if ((flags & (MS_ASYNC | MS_SYNC)) == 0) 543 flags |= MS_SYNC; 544 545 /* align the address to a page boundary, and adjust the size accordingly */ 546 ALIGN_ADDR(addr, size, pageoff); 547 if (addr > SIZE_MAX - size) 548 return (EINVAL); /* disallow wrap-around. */ 549 550 /* get map */ 551 map = &p->p_vmspace->vm_map; 552 553 /* translate MS_ flags into PGO_ flags */ 554 uvmflags = PGO_CLEANIT; 555 if (flags & MS_INVALIDATE) 556 uvmflags |= PGO_FREE; 557 if (flags & MS_SYNC) 558 uvmflags |= PGO_SYNCIO; 559 else 560 uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */ 561 562 return (uvm_map_clean(map, addr, addr+size, uvmflags)); 563 } 564 565 /* 566 * sys_munmap: unmap a users memory 567 */ 568 int 569 sys_munmap(struct proc *p, void *v, register_t *retval) 570 { 571 struct sys_munmap_args /* { 572 syscallarg(void *) addr; 573 syscallarg(size_t) len; 574 } */ *uap = v; 575 vaddr_t addr; 576 vsize_t size, pageoff; 577 vm_map_t map; 578 vaddr_t vm_min_address = VM_MIN_ADDRESS; 579 struct uvm_map_deadq dead_entries; 580 581 /* get syscall args... */ 582 addr = (vaddr_t) SCARG(uap, addr); 583 size = (vsize_t) SCARG(uap, len); 584 585 /* align address to a page boundary, and adjust size accordingly */ 586 ALIGN_ADDR(addr, size, pageoff); 587 588 /* 589 * Check for illegal addresses. Watch out for address wrap... 590 * Note that VM_*_ADDRESS are not constants due to casts (argh). 591 */ 592 if (addr > SIZE_MAX - size) 593 return (EINVAL); 594 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) 595 return (EINVAL); 596 if (vm_min_address > 0 && addr < vm_min_address) 597 return (EINVAL); 598 map = &p->p_vmspace->vm_map; 599 600 601 vm_map_lock(map); /* lock map so we can checkprot */ 602 603 /* 604 * interesting system call semantic: make sure entire range is 605 * allocated before allowing an unmap. 606 */ 607 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) { 608 vm_map_unlock(map); 609 return (EINVAL); 610 } 611 612 TAILQ_INIT(&dead_entries); 613 uvm_unmap_remove(map, addr, addr + size, &dead_entries, FALSE, TRUE); 614 615 vm_map_unlock(map); /* and unlock */ 616 617 uvm_unmap_detach(&dead_entries, 0); 618 619 return (0); 620 } 621 622 /* 623 * sys_mprotect: the mprotect system call 624 */ 625 int 626 sys_mprotect(struct proc *p, void *v, register_t *retval) 627 { 628 struct sys_mprotect_args /* { 629 syscallarg(void *) addr; 630 syscallarg(size_t) len; 631 syscallarg(int) prot; 632 } */ *uap = v; 633 vaddr_t addr; 634 vsize_t size, pageoff; 635 vm_prot_t prot; 636 637 /* 638 * extract syscall args from uap 639 */ 640 641 addr = (vaddr_t)SCARG(uap, addr); 642 size = (vsize_t)SCARG(uap, len); 643 prot = SCARG(uap, prot); 644 645 if ((prot & VM_PROT_ALL) != prot) 646 return (EINVAL); 647 648 /* 649 * align the address to a page boundary, and adjust the size accordingly 650 */ 651 ALIGN_ADDR(addr, size, pageoff); 652 if (addr > SIZE_MAX - size) 653 return (EINVAL); /* disallow wrap-around. */ 654 655 return (uvm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, 656 prot, FALSE)); 657 } 658 659 /* 660 * sys_minherit: the minherit system call 661 */ 662 int 663 sys_minherit(struct proc *p, void *v, register_t *retval) 664 { 665 struct sys_minherit_args /* { 666 syscallarg(void *) addr; 667 syscallarg(size_t) len; 668 syscallarg(int) inherit; 669 } */ *uap = v; 670 vaddr_t addr; 671 vsize_t size, pageoff; 672 vm_inherit_t inherit; 673 674 addr = (vaddr_t)SCARG(uap, addr); 675 size = (vsize_t)SCARG(uap, len); 676 inherit = SCARG(uap, inherit); 677 678 /* 679 * align the address to a page boundary, and adjust the size accordingly 680 */ 681 ALIGN_ADDR(addr, size, pageoff); 682 if (addr > SIZE_MAX - size) 683 return (EINVAL); /* disallow wrap-around. */ 684 685 return (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, 686 inherit)); 687 } 688 689 /* 690 * sys_madvise: give advice about memory usage. 691 */ 692 /* ARGSUSED */ 693 int 694 sys_madvise(struct proc *p, void *v, register_t *retval) 695 { 696 struct sys_madvise_args /* { 697 syscallarg(void *) addr; 698 syscallarg(size_t) len; 699 syscallarg(int) behav; 700 } */ *uap = v; 701 vaddr_t addr; 702 vsize_t size, pageoff; 703 int advice, error; 704 705 addr = (vaddr_t)SCARG(uap, addr); 706 size = (vsize_t)SCARG(uap, len); 707 advice = SCARG(uap, behav); 708 709 /* 710 * align the address to a page boundary, and adjust the size accordingly 711 */ 712 ALIGN_ADDR(addr, size, pageoff); 713 if (addr > SIZE_MAX - size) 714 return (EINVAL); /* disallow wrap-around. */ 715 716 switch (advice) { 717 case MADV_NORMAL: 718 case MADV_RANDOM: 719 case MADV_SEQUENTIAL: 720 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, 721 addr + size, advice); 722 break; 723 724 case MADV_WILLNEED: 725 /* 726 * Activate all these pages, pre-faulting them in if 727 * necessary. 728 */ 729 /* 730 * XXX IMPLEMENT ME. 731 * Should invent a "weak" mode for uvm_fault() 732 * which would only do the PGO_LOCKED pgo_get(). 733 */ 734 return (0); 735 736 case MADV_DONTNEED: 737 /* 738 * Deactivate all these pages. We don't need them 739 * any more. We don't, however, toss the data in 740 * the pages. 741 */ 742 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 743 PGO_DEACTIVATE); 744 break; 745 746 case MADV_FREE: 747 /* 748 * These pages contain no valid data, and may be 749 * garbage-collected. Toss all resources, including 750 * any swap space in use. 751 */ 752 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size, 753 PGO_FREE); 754 break; 755 756 case MADV_SPACEAVAIL: 757 /* 758 * XXXMRG What is this? I think it's: 759 * 760 * Ensure that we have allocated backing-store 761 * for these pages. 762 * 763 * This is going to require changes to the page daemon, 764 * as it will free swap space allocated to pages in core. 765 * There's also what to do for device/file/anonymous memory. 766 */ 767 return (EINVAL); 768 769 default: 770 return (EINVAL); 771 } 772 773 return (error); 774 } 775 776 /* 777 * sys_mlock: memory lock 778 */ 779 780 int 781 sys_mlock(struct proc *p, void *v, register_t *retval) 782 { 783 struct sys_mlock_args /* { 784 syscallarg(const void *) addr; 785 syscallarg(size_t) len; 786 } */ *uap = v; 787 vaddr_t addr; 788 vsize_t size, pageoff; 789 int error; 790 791 /* extract syscall args from uap */ 792 addr = (vaddr_t)SCARG(uap, addr); 793 size = (vsize_t)SCARG(uap, len); 794 795 /* align address to a page boundary and adjust size accordingly */ 796 ALIGN_ADDR(addr, size, pageoff); 797 if (addr > SIZE_MAX - size) 798 return (EINVAL); /* disallow wrap-around. */ 799 800 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) 801 return (EAGAIN); 802 803 #ifdef pmap_wired_count 804 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 805 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 806 return (EAGAIN); 807 #else 808 if ((error = suser(p, 0)) != 0) 809 return (error); 810 #endif 811 812 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE, 813 0); 814 return (error == 0 ? 0 : ENOMEM); 815 } 816 817 /* 818 * sys_munlock: unlock wired pages 819 */ 820 821 int 822 sys_munlock(struct proc *p, void *v, register_t *retval) 823 { 824 struct sys_munlock_args /* { 825 syscallarg(const void *) addr; 826 syscallarg(size_t) len; 827 } */ *uap = v; 828 vaddr_t addr; 829 vsize_t size, pageoff; 830 int error; 831 832 /* extract syscall args from uap */ 833 addr = (vaddr_t)SCARG(uap, addr); 834 size = (vsize_t)SCARG(uap, len); 835 836 /* align address to a page boundary, and adjust size accordingly */ 837 ALIGN_ADDR(addr, size, pageoff); 838 if (addr > SIZE_MAX - size) 839 return (EINVAL); /* disallow wrap-around. */ 840 841 #ifndef pmap_wired_count 842 if ((error = suser(p, 0)) != 0) 843 return (error); 844 #endif 845 846 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE, 847 0); 848 return (error == 0 ? 0 : ENOMEM); 849 } 850 851 /* 852 * sys_mlockall: lock all pages mapped into an address space. 853 */ 854 int 855 sys_mlockall(struct proc *p, void *v, register_t *retval) 856 { 857 struct sys_mlockall_args /* { 858 syscallarg(int) flags; 859 } */ *uap = v; 860 int error, flags; 861 862 flags = SCARG(uap, flags); 863 864 if (flags == 0 || 865 (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0) 866 return (EINVAL); 867 868 #ifndef pmap_wired_count 869 if ((error = suser(p, 0)) != 0) 870 return (error); 871 #endif 872 873 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, 874 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 875 if (error != 0 && error != ENOMEM) 876 return (EAGAIN); 877 return (error); 878 } 879 880 /* 881 * sys_munlockall: unlock all pages mapped into an address space. 882 */ 883 int 884 sys_munlockall(struct proc *p, void *v, register_t *retval) 885 { 886 887 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0); 888 return (0); 889 } 890 891 /* 892 * uvm_mmap: internal version of mmap 893 * 894 * - used by sys_mmap, exec, and sysv shm 895 * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true, 896 * sysv shm uses "named anonymous memory") 897 * - caller must page-align the file offset 898 */ 899 int 900 uvm_mmap(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 901 vm_prot_t maxprot, int flags, caddr_t handle, voff_t foff, 902 vsize_t locklimit, struct proc *p) 903 { 904 struct uvm_object *uobj; 905 struct vnode *vp; 906 int error; 907 int advice = UVM_ADV_NORMAL; 908 uvm_flag_t uvmflag = 0; 909 vsize_t align = 0; /* userland page size */ 910 911 /* check params */ 912 if (size == 0) 913 return(0); 914 if (foff & PAGE_MASK) 915 return(EINVAL); 916 if ((prot & maxprot) != prot) 917 return(EINVAL); 918 919 /* 920 * for non-fixed mappings, round off the suggested address. 921 * for fixed mappings, check alignment and zap old mappings. 922 */ 923 if ((flags & MAP_FIXED) == 0) { 924 *addr = round_page(*addr); /* round */ 925 } else { 926 if (*addr & PAGE_MASK) 927 return(EINVAL); 928 929 uvmflag |= UVM_FLAG_FIXED; 930 if ((flags & __MAP_NOREPLACE) == 0) 931 uvm_unmap(map, *addr, *addr + size); /* zap! */ 932 } 933 934 /* 935 * handle anon vs. non-anon mappings. for non-anon mappings attach 936 * to underlying vm object. 937 */ 938 if (flags & MAP_ANON) { 939 if ((flags & MAP_FIXED) == 0 && size >= __LDPGSZ) 940 align = __LDPGSZ; 941 foff = UVM_UNKNOWN_OFFSET; 942 uobj = NULL; 943 if ((flags & MAP_SHARED) == 0) 944 /* XXX: defer amap create */ 945 uvmflag |= UVM_FLAG_COPYONW; 946 else 947 /* shared: create amap now */ 948 uvmflag |= UVM_FLAG_OVERLAY; 949 } else { 950 vp = (struct vnode *) handle; /* get vnode */ 951 if (vp->v_type != VCHR) { 952 uobj = uvn_attach(vp, (flags & MAP_SHARED) ? 953 maxprot : (maxprot & ~VM_PROT_WRITE)); 954 955 /* 956 * XXXCDC: hack from old code 957 * don't allow vnodes which have been mapped 958 * shared-writeable to persist [forces them to be 959 * flushed out when last reference goes]. 960 * XXXCDC: interesting side effect: avoids a bug. 961 * note that in WRITE [ufs_readwrite.c] that we 962 * allocate buffer, uncache, and then do the write. 963 * the problem with this is that if the uncache causes 964 * VM data to be flushed to the same area of the file 965 * we are writing to... in that case we've got the 966 * buffer locked and our process goes to sleep forever. 967 * 968 * XXXCDC: checking maxprot protects us from the 969 * "persistbug" program but this is not a long term 970 * solution. 971 * 972 * XXXCDC: we don't bother calling uncache with the vp 973 * VOP_LOCKed since we know that we are already 974 * holding a valid reference to the uvn (from the 975 * uvn_attach above), and thus it is impossible for 976 * the uncache to kill the uvn and trigger I/O. 977 */ 978 if (flags & MAP_SHARED) { 979 if ((prot & VM_PROT_WRITE) || 980 (maxprot & VM_PROT_WRITE)) { 981 uvm_vnp_uncache(vp); 982 } 983 } 984 } else { 985 uobj = udv_attach(vp->v_rdev, 986 (flags & MAP_SHARED) ? maxprot : 987 (maxprot & ~VM_PROT_WRITE), foff, size); 988 /* 989 * XXX Some devices don't like to be mapped with 990 * XXX PROT_EXEC, but we don't really have a 991 * XXX better way of handling this, right now 992 */ 993 if (uobj == NULL && (prot & PROT_EXEC) == 0) { 994 maxprot &= ~VM_PROT_EXECUTE; 995 uobj = udv_attach(vp->v_rdev, 996 (flags & MAP_SHARED) ? maxprot : 997 (maxprot & ~VM_PROT_WRITE), foff, size); 998 } 999 advice = UVM_ADV_RANDOM; 1000 } 1001 1002 if (uobj == NULL) 1003 return((vp->v_type == VREG) ? ENOMEM : EINVAL); 1004 1005 if ((flags & MAP_SHARED) == 0) 1006 uvmflag |= UVM_FLAG_COPYONW; 1007 } 1008 1009 /* set up mapping flags */ 1010 uvmflag = UVM_MAPFLAG(prot, maxprot, 1011 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, 1012 advice, uvmflag); 1013 1014 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag); 1015 1016 if (error == 0) { 1017 /* 1018 * POSIX 1003.1b -- if our address space was configured 1019 * to lock all future mappings, wire the one we just made. 1020 */ 1021 if (prot == VM_PROT_NONE) { 1022 /* 1023 * No more work to do in this case. 1024 */ 1025 return (0); 1026 } 1027 1028 vm_map_lock(map); 1029 1030 if (map->flags & VM_MAP_WIREFUTURE) { 1031 if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax 1032 #ifdef pmap_wired_count 1033 || (locklimit != 0 && (size + 1034 ptoa(pmap_wired_count(vm_map_pmap(map)))) > 1035 locklimit) 1036 #endif 1037 ) { 1038 error = ENOMEM; 1039 vm_map_unlock(map); 1040 /* unmap the region! */ 1041 uvm_unmap(map, *addr, *addr + size); 1042 goto bad; 1043 } 1044 /* 1045 * uvm_map_pageable() always returns the map 1046 * unlocked. 1047 */ 1048 error = uvm_map_pageable(map, *addr, *addr + size, 1049 FALSE, UVM_LK_ENTER); 1050 if (error != 0) { 1051 /* unmap the region! */ 1052 uvm_unmap(map, *addr, *addr + size); 1053 goto bad; 1054 } 1055 return (0); 1056 } 1057 1058 vm_map_unlock(map); 1059 1060 return (0); 1061 } 1062 1063 /* errors: first detach from the uobj, if any. */ 1064 if (uobj) 1065 uobj->pgops->pgo_detach(uobj); 1066 1067 bad: 1068 return (error); 1069 } 1070