1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/buf.h> 45 #include <sys/conf.h> 46 #include <sys/sysent.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/mountctl.h> 50 #include <sys/sysproto.h> 51 #include <sys/filedesc.h> 52 #include <sys/kernel.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/linker.h> 56 #include <sys/stat.h> 57 #include <sys/unistd.h> 58 #include <sys/vnode.h> 59 #include <sys/proc.h> 60 #include <sys/priv.h> 61 #include <sys/jail.h> 62 #include <sys/namei.h> 63 #include <sys/nlookup.h> 64 #include <sys/dirent.h> 65 #include <sys/extattr.h> 66 #include <sys/spinlock.h> 67 #include <sys/kern_syscall.h> 68 #include <sys/objcache.h> 69 #include <sys/sysctl.h> 70 71 #include <sys/buf2.h> 72 #include <sys/file2.h> 73 #include <sys/spinlock2.h> 74 #include <sys/mplock2.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_object.h> 78 #include <vm/vm_page.h> 79 80 #include <machine/limits.h> 81 #include <machine/stdarg.h> 82 83 #include <vfs/union/union.h> 84 85 static void mount_warning(struct mount *mp, const char *ctl, ...) 86 __printflike(2, 3); 87 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 88 static int checkvp_chdir (struct vnode *vn, struct thread *td); 89 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 90 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 91 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 92 static int getutimes (const struct timeval *, struct timespec *); 93 static int setfown (struct vnode *, uid_t, gid_t); 94 static int setfmode (struct vnode *, int); 95 static int setfflags (struct vnode *, int); 96 static int setutimes (struct vnode *, struct vattr *, 97 const struct timespec *, int); 98 static int usermount = 0; /* if 1, non-root can mount fs. */ 99 100 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 101 102 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 103 "Allow non-root users to mount filesystems"); 104 105 /* 106 * Virtual File System System Calls 107 */ 108 109 /* 110 * Mount a file system. 111 * 112 * mount_args(char *type, char *path, int flags, caddr_t data) 113 * 114 * MPALMOSTSAFE 115 */ 116 int 117 sys_mount(struct mount_args *uap) 118 { 119 struct thread *td = curthread; 120 struct vnode *vp; 121 struct nchandle nch; 122 struct mount *mp, *nullmp; 123 struct vfsconf *vfsp; 124 int error, flag = 0, flag2 = 0; 125 int hasmount; 126 struct vattr va; 127 struct nlookupdata nd; 128 char fstypename[MFSNAMELEN]; 129 struct ucred *cred; 130 131 get_mplock(); 132 cred = td->td_ucred; 133 if (jailed(cred)) { 134 error = EPERM; 135 goto done; 136 } 137 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 138 goto done; 139 140 /* 141 * Do not allow NFS export by non-root users. 142 */ 143 if (uap->flags & MNT_EXPORTED) { 144 error = priv_check(td, PRIV_ROOT); 145 if (error) 146 goto done; 147 } 148 /* 149 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 150 */ 151 if (priv_check(td, PRIV_ROOT)) 152 uap->flags |= MNT_NOSUID | MNT_NODEV; 153 154 /* 155 * Lookup the requested path and extract the nch and vnode. 156 */ 157 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 158 if (error == 0) { 159 if ((error = nlookup(&nd)) == 0) { 160 if (nd.nl_nch.ncp->nc_vp == NULL) 161 error = ENOENT; 162 } 163 } 164 if (error) { 165 nlookup_done(&nd); 166 goto done; 167 } 168 169 /* 170 * If the target filesystem is resolved via a nullfs mount, then 171 * nd.nl_nch.mount will be pointing to the nullfs mount structure 172 * instead of the target file system. We need it in case we are 173 * doing an update. 174 */ 175 nullmp = nd.nl_nch.mount; 176 177 /* 178 * Extract the locked+refd ncp and cleanup the nd structure 179 */ 180 nch = nd.nl_nch; 181 cache_zero(&nd.nl_nch); 182 nlookup_done(&nd); 183 184 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch)) 185 hasmount = 1; 186 else 187 hasmount = 0; 188 189 190 /* 191 * now we have the locked ref'd nch and unreferenced vnode. 192 */ 193 vp = nch.ncp->nc_vp; 194 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 195 cache_put(&nch); 196 goto done; 197 } 198 cache_unlock(&nch); 199 200 /* 201 * Extract the file system type. We need to know this early, to take 202 * appropriate actions if we are dealing with a nullfs. 203 */ 204 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 205 cache_drop(&nch); 206 vput(vp); 207 goto done; 208 } 209 210 /* 211 * Now we have an unlocked ref'd nch and a locked ref'd vp 212 */ 213 if (uap->flags & MNT_UPDATE) { 214 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 215 cache_drop(&nch); 216 vput(vp); 217 error = EINVAL; 218 goto done; 219 } 220 221 if (strncmp(fstypename, "null", 5) == 0) { 222 KKASSERT(nullmp); 223 mp = nullmp; 224 } else { 225 mp = vp->v_mount; 226 } 227 228 flag = mp->mnt_flag; 229 flag2 = mp->mnt_kern_flag; 230 /* 231 * We only allow the filesystem to be reloaded if it 232 * is currently mounted read-only. 233 */ 234 if ((uap->flags & MNT_RELOAD) && 235 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 236 cache_drop(&nch); 237 vput(vp); 238 error = EOPNOTSUPP; /* Needs translation */ 239 goto done; 240 } 241 /* 242 * Only root, or the user that did the original mount is 243 * permitted to update it. 244 */ 245 if (mp->mnt_stat.f_owner != cred->cr_uid && 246 (error = priv_check(td, PRIV_ROOT))) { 247 cache_drop(&nch); 248 vput(vp); 249 goto done; 250 } 251 if (vfs_busy(mp, LK_NOWAIT)) { 252 cache_drop(&nch); 253 vput(vp); 254 error = EBUSY; 255 goto done; 256 } 257 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 258 cache_drop(&nch); 259 vfs_unbusy(mp); 260 vput(vp); 261 error = EBUSY; 262 goto done; 263 } 264 vsetflags(vp, VMOUNT); 265 mp->mnt_flag |= 266 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 267 vn_unlock(vp); 268 goto update; 269 } 270 /* 271 * If the user is not root, ensure that they own the directory 272 * onto which we are attempting to mount. 273 */ 274 if ((error = VOP_GETATTR(vp, &va)) || 275 (va.va_uid != cred->cr_uid && (error = priv_check(td, PRIV_ROOT)))) { 276 cache_drop(&nch); 277 vput(vp); 278 goto done; 279 } 280 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 281 cache_drop(&nch); 282 vput(vp); 283 goto done; 284 } 285 if (vp->v_type != VDIR) { 286 cache_drop(&nch); 287 vput(vp); 288 error = ENOTDIR; 289 goto done; 290 } 291 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 292 cache_drop(&nch); 293 vput(vp); 294 error = EPERM; 295 goto done; 296 } 297 vfsp = vfsconf_find_by_name(fstypename); 298 if (vfsp == NULL) { 299 linker_file_t lf; 300 301 /* Only load modules for root (very important!) */ 302 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 303 cache_drop(&nch); 304 vput(vp); 305 goto done; 306 } 307 error = linker_load_file(fstypename, &lf); 308 if (error || lf == NULL) { 309 cache_drop(&nch); 310 vput(vp); 311 if (lf == NULL) 312 error = ENODEV; 313 goto done; 314 } 315 lf->userrefs++; 316 /* lookup again, see if the VFS was loaded */ 317 vfsp = vfsconf_find_by_name(fstypename); 318 if (vfsp == NULL) { 319 lf->userrefs--; 320 linker_file_unload(lf); 321 cache_drop(&nch); 322 vput(vp); 323 error = ENODEV; 324 goto done; 325 } 326 } 327 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 328 cache_drop(&nch); 329 vput(vp); 330 error = EBUSY; 331 goto done; 332 } 333 vsetflags(vp, VMOUNT); 334 335 /* 336 * Allocate and initialize the filesystem. 337 */ 338 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 339 mount_init(mp); 340 vfs_busy(mp, LK_NOWAIT); 341 mp->mnt_op = vfsp->vfc_vfsops; 342 mp->mnt_vfc = vfsp; 343 vfsp->vfc_refcount++; 344 mp->mnt_stat.f_type = vfsp->vfc_typenum; 345 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 346 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 347 mp->mnt_stat.f_owner = cred->cr_uid; 348 vn_unlock(vp); 349 update: 350 /* 351 * Set the mount level flags. 352 */ 353 if (uap->flags & MNT_RDONLY) 354 mp->mnt_flag |= MNT_RDONLY; 355 else if (mp->mnt_flag & MNT_RDONLY) 356 mp->mnt_kern_flag |= MNTK_WANTRDWR; 357 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 358 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 359 MNT_NOSYMFOLLOW | MNT_IGNORE | 360 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 361 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 362 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 363 MNT_NOSYMFOLLOW | MNT_IGNORE | 364 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 365 /* 366 * Mount the filesystem. 367 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 368 * get. 369 */ 370 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 371 if (mp->mnt_flag & MNT_UPDATE) { 372 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 373 mp->mnt_flag &= ~MNT_RDONLY; 374 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 375 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 376 if (error) { 377 mp->mnt_flag = flag; 378 mp->mnt_kern_flag = flag2; 379 } 380 vfs_unbusy(mp); 381 vclrflags(vp, VMOUNT); 382 vrele(vp); 383 cache_drop(&nch); 384 goto done; 385 } 386 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 387 /* 388 * Put the new filesystem on the mount list after root. The mount 389 * point gets its own mnt_ncmountpt (unless the VFS already set one 390 * up) which represents the root of the mount. The lookup code 391 * detects the mount point going forward and checks the root of 392 * the mount going backwards. 393 * 394 * It is not necessary to invalidate or purge the vnode underneath 395 * because elements under the mount will be given their own glue 396 * namecache record. 397 */ 398 if (!error) { 399 if (mp->mnt_ncmountpt.ncp == NULL) { 400 /* 401 * allocate, then unlock, but leave the ref intact 402 */ 403 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 404 cache_unlock(&mp->mnt_ncmountpt); 405 } 406 mp->mnt_ncmounton = nch; /* inherits ref */ 407 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 408 409 /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ 410 vclrflags(vp, VMOUNT); 411 mountlist_insert(mp, MNTINS_LAST); 412 vn_unlock(vp); 413 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 414 error = vfs_allocate_syncvnode(mp); 415 vfs_unbusy(mp); 416 error = VFS_START(mp, 0); 417 vrele(vp); 418 } else { 419 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 420 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 421 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 422 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 423 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 424 vclrflags(vp, VMOUNT); 425 mp->mnt_vfc->vfc_refcount--; 426 vfs_unbusy(mp); 427 kfree(mp, M_MOUNT); 428 cache_drop(&nch); 429 vput(vp); 430 } 431 done: 432 rel_mplock(); 433 return (error); 434 } 435 436 /* 437 * Scan all active processes to see if any of them have a current 438 * or root directory onto which the new filesystem has just been 439 * mounted. If so, replace them with the new mount point. 440 * 441 * The passed ncp is ref'd and locked (from the mount code) and 442 * must be associated with the vnode representing the root of the 443 * mount point. 444 */ 445 struct checkdirs_info { 446 struct nchandle old_nch; 447 struct nchandle new_nch; 448 struct vnode *old_vp; 449 struct vnode *new_vp; 450 }; 451 452 static int checkdirs_callback(struct proc *p, void *data); 453 454 static void 455 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 456 { 457 struct checkdirs_info info; 458 struct vnode *olddp; 459 struct vnode *newdp; 460 struct mount *mp; 461 462 /* 463 * If the old mount point's vnode has a usecount of 1, it is not 464 * being held as a descriptor anywhere. 465 */ 466 olddp = old_nch->ncp->nc_vp; 467 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 468 return; 469 470 /* 471 * Force the root vnode of the new mount point to be resolved 472 * so we can update any matching processes. 473 */ 474 mp = new_nch->mount; 475 if (VFS_ROOT(mp, &newdp)) 476 panic("mount: lost mount"); 477 cache_setunresolved(new_nch); 478 cache_setvp(new_nch, newdp); 479 480 /* 481 * Special handling of the root node 482 */ 483 if (rootvnode == olddp) { 484 vref(newdp); 485 vfs_cache_setroot(newdp, cache_hold(new_nch)); 486 } 487 488 /* 489 * Pass newdp separately so the callback does not have to access 490 * it via new_nch->ncp->nc_vp. 491 */ 492 info.old_nch = *old_nch; 493 info.new_nch = *new_nch; 494 info.new_vp = newdp; 495 allproc_scan(checkdirs_callback, &info); 496 vput(newdp); 497 } 498 499 /* 500 * NOTE: callback is not MP safe because the scanned process's filedesc 501 * structure can be ripped out from under us, amoung other things. 502 */ 503 static int 504 checkdirs_callback(struct proc *p, void *data) 505 { 506 struct checkdirs_info *info = data; 507 struct filedesc *fdp; 508 struct nchandle ncdrop1; 509 struct nchandle ncdrop2; 510 struct vnode *vprele1; 511 struct vnode *vprele2; 512 513 if ((fdp = p->p_fd) != NULL) { 514 cache_zero(&ncdrop1); 515 cache_zero(&ncdrop2); 516 vprele1 = NULL; 517 vprele2 = NULL; 518 519 /* 520 * MPUNSAFE - XXX fdp can be pulled out from under a 521 * foreign process. 522 * 523 * A shared filedesc is ok, we don't have to copy it 524 * because we are making this change globally. 525 */ 526 spin_lock(&fdp->fd_spin); 527 if (fdp->fd_ncdir.mount == info->old_nch.mount && 528 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 529 vprele1 = fdp->fd_cdir; 530 vref(info->new_vp); 531 fdp->fd_cdir = info->new_vp; 532 ncdrop1 = fdp->fd_ncdir; 533 cache_copy(&info->new_nch, &fdp->fd_ncdir); 534 } 535 if (fdp->fd_nrdir.mount == info->old_nch.mount && 536 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 537 vprele2 = fdp->fd_rdir; 538 vref(info->new_vp); 539 fdp->fd_rdir = info->new_vp; 540 ncdrop2 = fdp->fd_nrdir; 541 cache_copy(&info->new_nch, &fdp->fd_nrdir); 542 } 543 spin_unlock(&fdp->fd_spin); 544 if (ncdrop1.ncp) 545 cache_drop(&ncdrop1); 546 if (ncdrop2.ncp) 547 cache_drop(&ncdrop2); 548 if (vprele1) 549 vrele(vprele1); 550 if (vprele2) 551 vrele(vprele2); 552 } 553 return(0); 554 } 555 556 /* 557 * Unmount a file system. 558 * 559 * Note: unmount takes a path to the vnode mounted on as argument, 560 * not special file (as before). 561 * 562 * umount_args(char *path, int flags) 563 * 564 * MPALMOSTSAFE 565 */ 566 int 567 sys_unmount(struct unmount_args *uap) 568 { 569 struct thread *td = curthread; 570 struct proc *p __debugvar = td->td_proc; 571 struct mount *mp = NULL; 572 struct nlookupdata nd; 573 int error; 574 575 KKASSERT(p); 576 get_mplock(); 577 if (td->td_ucred->cr_prison != NULL) { 578 error = EPERM; 579 goto done; 580 } 581 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 582 goto done; 583 584 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 585 if (error == 0) 586 error = nlookup(&nd); 587 if (error) 588 goto out; 589 590 mp = nd.nl_nch.mount; 591 592 /* 593 * Only root, or the user that did the original mount is 594 * permitted to unmount this filesystem. 595 */ 596 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 597 (error = priv_check(td, PRIV_ROOT))) 598 goto out; 599 600 /* 601 * Don't allow unmounting the root file system. 602 */ 603 if (mp->mnt_flag & MNT_ROOTFS) { 604 error = EINVAL; 605 goto out; 606 } 607 608 /* 609 * Must be the root of the filesystem 610 */ 611 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 612 error = EINVAL; 613 goto out; 614 } 615 616 out: 617 nlookup_done(&nd); 618 if (error == 0) 619 error = dounmount(mp, uap->flags); 620 done: 621 rel_mplock(); 622 return (error); 623 } 624 625 /* 626 * Do the actual file system unmount. 627 */ 628 static int 629 dounmount_interlock(struct mount *mp) 630 { 631 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 632 return (EBUSY); 633 mp->mnt_kern_flag |= MNTK_UNMOUNT; 634 return(0); 635 } 636 637 static int 638 unmount_allproc_cb(struct proc *p, void *arg) 639 { 640 struct mount *mp; 641 642 if (p->p_textnch.ncp == NULL) 643 return 0; 644 645 mp = (struct mount *)arg; 646 if (p->p_textnch.mount == mp) 647 cache_drop(&p->p_textnch); 648 649 return 0; 650 } 651 652 int 653 dounmount(struct mount *mp, int flags) 654 { 655 struct namecache *ncp; 656 struct nchandle nch; 657 struct vnode *vp; 658 int error; 659 int async_flag; 660 int lflags; 661 int freeok = 1; 662 663 /* 664 * Exclusive access for unmounting purposes 665 */ 666 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 667 return (error); 668 669 /* 670 * Allow filesystems to detect that a forced unmount is in progress. 671 */ 672 if (flags & MNT_FORCE) 673 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 674 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 675 error = lockmgr(&mp->mnt_lock, lflags); 676 if (error) { 677 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 678 if (mp->mnt_kern_flag & MNTK_MWAIT) 679 wakeup(mp); 680 return (error); 681 } 682 683 if (mp->mnt_flag & MNT_EXPUBLIC) 684 vfs_setpublicfs(NULL, NULL, NULL); 685 686 vfs_msync(mp, MNT_WAIT); 687 async_flag = mp->mnt_flag & MNT_ASYNC; 688 mp->mnt_flag &=~ MNT_ASYNC; 689 690 /* 691 * If this filesystem isn't aliasing other filesystems, 692 * try to invalidate any remaining namecache entries and 693 * check the count afterwords. 694 */ 695 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 696 cache_lock(&mp->mnt_ncmountpt); 697 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 698 cache_unlock(&mp->mnt_ncmountpt); 699 700 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 701 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 702 allproc_scan(&unmount_allproc_cb, mp); 703 } 704 705 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 706 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 707 708 if ((flags & MNT_FORCE) == 0) { 709 error = EBUSY; 710 mount_warning(mp, "Cannot unmount: " 711 "%d namecache " 712 "references still " 713 "present", 714 ncp->nc_refs - 1); 715 } else { 716 mount_warning(mp, "Forced unmount: " 717 "%d namecache " 718 "references still " 719 "present", 720 ncp->nc_refs - 1); 721 freeok = 0; 722 } 723 } 724 } 725 726 /* 727 * nchandle records ref the mount structure. Expect a count of 1 728 * (our mount->mnt_ncmountpt). 729 */ 730 if (mp->mnt_refs != 1) { 731 if ((flags & MNT_FORCE) == 0) { 732 mount_warning(mp, "Cannot unmount: " 733 "%d process references still " 734 "present", mp->mnt_refs); 735 error = EBUSY; 736 } else { 737 mount_warning(mp, "Forced unmount: " 738 "%d process references still " 739 "present", mp->mnt_refs); 740 freeok = 0; 741 } 742 } 743 744 /* 745 * Decomission our special mnt_syncer vnode. This also stops 746 * the vnlru code. If we are unable to unmount we recommission 747 * the vnode. 748 */ 749 if (error == 0) { 750 if ((vp = mp->mnt_syncer) != NULL) { 751 mp->mnt_syncer = NULL; 752 vrele(vp); 753 } 754 if (((mp->mnt_flag & MNT_RDONLY) || 755 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 756 (flags & MNT_FORCE)) { 757 error = VFS_UNMOUNT(mp, flags); 758 } 759 } 760 if (error) { 761 if (mp->mnt_syncer == NULL) 762 vfs_allocate_syncvnode(mp); 763 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 764 mp->mnt_flag |= async_flag; 765 lockmgr(&mp->mnt_lock, LK_RELEASE); 766 if (mp->mnt_kern_flag & MNTK_MWAIT) 767 wakeup(mp); 768 return (error); 769 } 770 /* 771 * Clean up any journals still associated with the mount after 772 * filesystem activity has ceased. 773 */ 774 journal_remove_all_journals(mp, 775 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 776 777 mountlist_remove(mp); 778 779 /* 780 * Remove any installed vnode ops here so the individual VFSs don't 781 * have to. 782 */ 783 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 784 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 785 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 786 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 787 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 788 789 if (mp->mnt_ncmountpt.ncp != NULL) { 790 nch = mp->mnt_ncmountpt; 791 cache_zero(&mp->mnt_ncmountpt); 792 cache_clrmountpt(&nch); 793 cache_drop(&nch); 794 } 795 if (mp->mnt_ncmounton.ncp != NULL) { 796 nch = mp->mnt_ncmounton; 797 cache_zero(&mp->mnt_ncmounton); 798 cache_clrmountpt(&nch); 799 cache_drop(&nch); 800 } 801 802 mp->mnt_vfc->vfc_refcount--; 803 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 804 panic("unmount: dangling vnode"); 805 lockmgr(&mp->mnt_lock, LK_RELEASE); 806 if (mp->mnt_kern_flag & MNTK_MWAIT) 807 wakeup(mp); 808 if (freeok) 809 kfree(mp, M_MOUNT); 810 return (0); 811 } 812 813 static 814 void 815 mount_warning(struct mount *mp, const char *ctl, ...) 816 { 817 char *ptr; 818 char *buf; 819 __va_list va; 820 821 __va_start(va, ctl); 822 if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf, 0) == 0) { 823 kprintf("unmount(%s): ", ptr); 824 kvprintf(ctl, va); 825 kprintf("\n"); 826 kfree(buf, M_TEMP); 827 } else { 828 kprintf("unmount(%p", mp); 829 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 830 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 831 kprintf("): "); 832 kvprintf(ctl, va); 833 kprintf("\n"); 834 } 835 __va_end(va); 836 } 837 838 /* 839 * Shim cache_fullpath() to handle the case where a process is chrooted into 840 * a subdirectory of a mount. In this case if the root mount matches the 841 * process root directory's mount we have to specify the process's root 842 * directory instead of the mount point, because the mount point might 843 * be above the root directory. 844 */ 845 static 846 int 847 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 848 { 849 struct nchandle *nch; 850 851 if (p && p->p_fd->fd_nrdir.mount == mp) 852 nch = &p->p_fd->fd_nrdir; 853 else 854 nch = &mp->mnt_ncmountpt; 855 return(cache_fullpath(p, nch, rb, fb, 0)); 856 } 857 858 /* 859 * Sync each mounted filesystem. 860 */ 861 862 #ifdef DEBUG 863 static int syncprt = 0; 864 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 865 #endif /* DEBUG */ 866 867 static int sync_callback(struct mount *mp, void *data); 868 869 int 870 sys_sync(struct sync_args *uap) 871 { 872 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 873 #ifdef DEBUG 874 /* 875 * print out buffer pool stat information on each sync() call. 876 */ 877 if (syncprt) 878 vfs_bufstats(); 879 #endif /* DEBUG */ 880 return (0); 881 } 882 883 static 884 int 885 sync_callback(struct mount *mp, void *data __unused) 886 { 887 int asyncflag; 888 889 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 890 asyncflag = mp->mnt_flag & MNT_ASYNC; 891 mp->mnt_flag &= ~MNT_ASYNC; 892 vfs_msync(mp, MNT_NOWAIT); 893 VFS_SYNC(mp, MNT_NOWAIT | MNT_LAZY); 894 mp->mnt_flag |= asyncflag; 895 } 896 return(0); 897 } 898 899 /* XXX PRISON: could be per prison flag */ 900 static int prison_quotas; 901 #if 0 902 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 903 #endif 904 905 /* 906 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 907 * 908 * Change filesystem quotas. 909 * 910 * MPALMOSTSAFE 911 */ 912 int 913 sys_quotactl(struct quotactl_args *uap) 914 { 915 struct nlookupdata nd; 916 struct thread *td; 917 struct mount *mp; 918 int error; 919 920 get_mplock(); 921 td = curthread; 922 if (td->td_ucred->cr_prison && !prison_quotas) { 923 error = EPERM; 924 goto done; 925 } 926 927 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 928 if (error == 0) 929 error = nlookup(&nd); 930 if (error == 0) { 931 mp = nd.nl_nch.mount; 932 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 933 uap->arg, nd.nl_cred); 934 } 935 nlookup_done(&nd); 936 done: 937 rel_mplock(); 938 return (error); 939 } 940 941 /* 942 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 943 * void *buf, int buflen) 944 * 945 * This function operates on a mount point and executes the specified 946 * operation using the specified control data, and possibly returns data. 947 * 948 * The actual number of bytes stored in the result buffer is returned, 0 949 * if none, otherwise an error is returned. 950 * 951 * MPALMOSTSAFE 952 */ 953 int 954 sys_mountctl(struct mountctl_args *uap) 955 { 956 struct thread *td = curthread; 957 struct proc *p = td->td_proc; 958 struct file *fp; 959 void *ctl = NULL; 960 void *buf = NULL; 961 char *path = NULL; 962 int error; 963 964 /* 965 * Sanity and permissions checks. We must be root. 966 */ 967 KKASSERT(p); 968 if (td->td_ucred->cr_prison != NULL) 969 return (EPERM); 970 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 971 (error = priv_check(td, PRIV_ROOT)) != 0) 972 return (error); 973 974 /* 975 * Argument length checks 976 */ 977 if (uap->ctllen < 0 || uap->ctllen > 1024) 978 return (EINVAL); 979 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 980 return (EINVAL); 981 if (uap->path == NULL) 982 return (EINVAL); 983 984 /* 985 * Allocate the necessary buffers and copyin data 986 */ 987 path = objcache_get(namei_oc, M_WAITOK); 988 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 989 if (error) 990 goto done; 991 992 if (uap->ctllen) { 993 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 994 error = copyin(uap->ctl, ctl, uap->ctllen); 995 if (error) 996 goto done; 997 } 998 if (uap->buflen) 999 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1000 1001 /* 1002 * Validate the descriptor 1003 */ 1004 if (uap->fd >= 0) { 1005 fp = holdfp(p->p_fd, uap->fd, -1); 1006 if (fp == NULL) { 1007 error = EBADF; 1008 goto done; 1009 } 1010 } else { 1011 fp = NULL; 1012 } 1013 1014 /* 1015 * Execute the internal kernel function and clean up. 1016 */ 1017 get_mplock(); 1018 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1019 rel_mplock(); 1020 if (fp) 1021 fdrop(fp); 1022 if (error == 0 && uap->sysmsg_result > 0) 1023 error = copyout(buf, uap->buf, uap->sysmsg_result); 1024 done: 1025 if (path) 1026 objcache_put(namei_oc, path); 1027 if (ctl) 1028 kfree(ctl, M_TEMP); 1029 if (buf) 1030 kfree(buf, M_TEMP); 1031 return (error); 1032 } 1033 1034 /* 1035 * Execute a mount control operation by resolving the path to a mount point 1036 * and calling vop_mountctl(). 1037 * 1038 * Use the mount point from the nch instead of the vnode so nullfs mounts 1039 * can properly spike the VOP. 1040 */ 1041 int 1042 kern_mountctl(const char *path, int op, struct file *fp, 1043 const void *ctl, int ctllen, 1044 void *buf, int buflen, int *res) 1045 { 1046 struct vnode *vp; 1047 struct mount *mp; 1048 struct nlookupdata nd; 1049 int error; 1050 1051 *res = 0; 1052 vp = NULL; 1053 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1054 if (error == 0) 1055 error = nlookup(&nd); 1056 if (error == 0) 1057 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1058 mp = nd.nl_nch.mount; 1059 nlookup_done(&nd); 1060 if (error) 1061 return (error); 1062 vn_unlock(vp); 1063 1064 /* 1065 * Must be the root of the filesystem 1066 */ 1067 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1068 vrele(vp); 1069 return (EINVAL); 1070 } 1071 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1072 buf, buflen, res); 1073 vrele(vp); 1074 return (error); 1075 } 1076 1077 int 1078 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1079 { 1080 struct thread *td = curthread; 1081 struct proc *p = td->td_proc; 1082 struct mount *mp; 1083 struct statfs *sp; 1084 char *fullpath, *freepath; 1085 int error; 1086 1087 if ((error = nlookup(nd)) != 0) 1088 return (error); 1089 mp = nd->nl_nch.mount; 1090 sp = &mp->mnt_stat; 1091 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1092 return (error); 1093 1094 error = mount_path(p, mp, &fullpath, &freepath); 1095 if (error) 1096 return(error); 1097 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1098 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1099 kfree(freepath, M_TEMP); 1100 1101 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1102 bcopy(sp, buf, sizeof(*buf)); 1103 /* Only root should have access to the fsid's. */ 1104 if (priv_check(td, PRIV_ROOT)) 1105 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1106 return (0); 1107 } 1108 1109 /* 1110 * statfs_args(char *path, struct statfs *buf) 1111 * 1112 * Get filesystem statistics. 1113 */ 1114 int 1115 sys_statfs(struct statfs_args *uap) 1116 { 1117 struct nlookupdata nd; 1118 struct statfs buf; 1119 int error; 1120 1121 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1122 if (error == 0) 1123 error = kern_statfs(&nd, &buf); 1124 nlookup_done(&nd); 1125 if (error == 0) 1126 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1127 return (error); 1128 } 1129 1130 int 1131 kern_fstatfs(int fd, struct statfs *buf) 1132 { 1133 struct thread *td = curthread; 1134 struct proc *p = td->td_proc; 1135 struct file *fp; 1136 struct mount *mp; 1137 struct statfs *sp; 1138 char *fullpath, *freepath; 1139 int error; 1140 1141 KKASSERT(p); 1142 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1143 return (error); 1144 mp = ((struct vnode *)fp->f_data)->v_mount; 1145 if (mp == NULL) { 1146 error = EBADF; 1147 goto done; 1148 } 1149 if (fp->f_cred == NULL) { 1150 error = EINVAL; 1151 goto done; 1152 } 1153 sp = &mp->mnt_stat; 1154 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1155 goto done; 1156 1157 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1158 goto done; 1159 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1160 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1161 kfree(freepath, M_TEMP); 1162 1163 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1164 bcopy(sp, buf, sizeof(*buf)); 1165 1166 /* Only root should have access to the fsid's. */ 1167 if (priv_check(td, PRIV_ROOT)) 1168 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1169 error = 0; 1170 done: 1171 fdrop(fp); 1172 return (error); 1173 } 1174 1175 /* 1176 * fstatfs_args(int fd, struct statfs *buf) 1177 * 1178 * Get filesystem statistics. 1179 */ 1180 int 1181 sys_fstatfs(struct fstatfs_args *uap) 1182 { 1183 struct statfs buf; 1184 int error; 1185 1186 error = kern_fstatfs(uap->fd, &buf); 1187 1188 if (error == 0) 1189 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1190 return (error); 1191 } 1192 1193 int 1194 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1195 { 1196 struct mount *mp; 1197 struct statvfs *sp; 1198 int error; 1199 1200 if ((error = nlookup(nd)) != 0) 1201 return (error); 1202 mp = nd->nl_nch.mount; 1203 sp = &mp->mnt_vstat; 1204 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1205 return (error); 1206 1207 sp->f_flag = 0; 1208 if (mp->mnt_flag & MNT_RDONLY) 1209 sp->f_flag |= ST_RDONLY; 1210 if (mp->mnt_flag & MNT_NOSUID) 1211 sp->f_flag |= ST_NOSUID; 1212 bcopy(sp, buf, sizeof(*buf)); 1213 return (0); 1214 } 1215 1216 /* 1217 * statfs_args(char *path, struct statfs *buf) 1218 * 1219 * Get filesystem statistics. 1220 */ 1221 int 1222 sys_statvfs(struct statvfs_args *uap) 1223 { 1224 struct nlookupdata nd; 1225 struct statvfs buf; 1226 int error; 1227 1228 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1229 if (error == 0) 1230 error = kern_statvfs(&nd, &buf); 1231 nlookup_done(&nd); 1232 if (error == 0) 1233 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1234 return (error); 1235 } 1236 1237 int 1238 kern_fstatvfs(int fd, struct statvfs *buf) 1239 { 1240 struct thread *td = curthread; 1241 struct proc *p = td->td_proc; 1242 struct file *fp; 1243 struct mount *mp; 1244 struct statvfs *sp; 1245 int error; 1246 1247 KKASSERT(p); 1248 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1249 return (error); 1250 mp = ((struct vnode *)fp->f_data)->v_mount; 1251 if (mp == NULL) { 1252 error = EBADF; 1253 goto done; 1254 } 1255 if (fp->f_cred == NULL) { 1256 error = EINVAL; 1257 goto done; 1258 } 1259 sp = &mp->mnt_vstat; 1260 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1261 goto done; 1262 1263 sp->f_flag = 0; 1264 if (mp->mnt_flag & MNT_RDONLY) 1265 sp->f_flag |= ST_RDONLY; 1266 if (mp->mnt_flag & MNT_NOSUID) 1267 sp->f_flag |= ST_NOSUID; 1268 1269 bcopy(sp, buf, sizeof(*buf)); 1270 error = 0; 1271 done: 1272 fdrop(fp); 1273 return (error); 1274 } 1275 1276 /* 1277 * fstatfs_args(int fd, struct statfs *buf) 1278 * 1279 * Get filesystem statistics. 1280 */ 1281 int 1282 sys_fstatvfs(struct fstatvfs_args *uap) 1283 { 1284 struct statvfs buf; 1285 int error; 1286 1287 error = kern_fstatvfs(uap->fd, &buf); 1288 1289 if (error == 0) 1290 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1291 return (error); 1292 } 1293 1294 /* 1295 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1296 * 1297 * Get statistics on all filesystems. 1298 */ 1299 1300 struct getfsstat_info { 1301 struct statfs *sfsp; 1302 long count; 1303 long maxcount; 1304 int error; 1305 int flags; 1306 struct thread *td; 1307 }; 1308 1309 static int getfsstat_callback(struct mount *, void *); 1310 1311 int 1312 sys_getfsstat(struct getfsstat_args *uap) 1313 { 1314 struct thread *td = curthread; 1315 struct getfsstat_info info; 1316 1317 bzero(&info, sizeof(info)); 1318 1319 info.maxcount = uap->bufsize / sizeof(struct statfs); 1320 info.sfsp = uap->buf; 1321 info.count = 0; 1322 info.flags = uap->flags; 1323 info.td = td; 1324 1325 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1326 if (info.sfsp && info.count > info.maxcount) 1327 uap->sysmsg_result = info.maxcount; 1328 else 1329 uap->sysmsg_result = info.count; 1330 return (info.error); 1331 } 1332 1333 static int 1334 getfsstat_callback(struct mount *mp, void *data) 1335 { 1336 struct getfsstat_info *info = data; 1337 struct statfs *sp; 1338 char *freepath; 1339 char *fullpath; 1340 int error; 1341 1342 if (info->sfsp && info->count < info->maxcount) { 1343 if (info->td->td_proc && 1344 !chroot_visible_mnt(mp, info->td->td_proc)) { 1345 return(0); 1346 } 1347 sp = &mp->mnt_stat; 1348 1349 /* 1350 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1351 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1352 * overrides MNT_WAIT. 1353 */ 1354 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1355 (info->flags & MNT_WAIT)) && 1356 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1357 return(0); 1358 } 1359 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1360 1361 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1362 if (error) { 1363 info->error = error; 1364 return(-1); 1365 } 1366 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1367 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1368 kfree(freepath, M_TEMP); 1369 1370 error = copyout(sp, info->sfsp, sizeof(*sp)); 1371 if (error) { 1372 info->error = error; 1373 return (-1); 1374 } 1375 ++info->sfsp; 1376 } 1377 info->count++; 1378 return(0); 1379 } 1380 1381 /* 1382 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1383 long bufsize, int flags) 1384 * 1385 * Get statistics on all filesystems. 1386 */ 1387 1388 struct getvfsstat_info { 1389 struct statfs *sfsp; 1390 struct statvfs *vsfsp; 1391 long count; 1392 long maxcount; 1393 int error; 1394 int flags; 1395 struct thread *td; 1396 }; 1397 1398 static int getvfsstat_callback(struct mount *, void *); 1399 1400 int 1401 sys_getvfsstat(struct getvfsstat_args *uap) 1402 { 1403 struct thread *td = curthread; 1404 struct getvfsstat_info info; 1405 1406 bzero(&info, sizeof(info)); 1407 1408 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1409 info.sfsp = uap->buf; 1410 info.vsfsp = uap->vbuf; 1411 info.count = 0; 1412 info.flags = uap->flags; 1413 info.td = td; 1414 1415 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1416 if (info.vsfsp && info.count > info.maxcount) 1417 uap->sysmsg_result = info.maxcount; 1418 else 1419 uap->sysmsg_result = info.count; 1420 return (info.error); 1421 } 1422 1423 static int 1424 getvfsstat_callback(struct mount *mp, void *data) 1425 { 1426 struct getvfsstat_info *info = data; 1427 struct statfs *sp; 1428 struct statvfs *vsp; 1429 char *freepath; 1430 char *fullpath; 1431 int error; 1432 1433 if (info->vsfsp && info->count < info->maxcount) { 1434 if (info->td->td_proc && 1435 !chroot_visible_mnt(mp, info->td->td_proc)) { 1436 return(0); 1437 } 1438 sp = &mp->mnt_stat; 1439 vsp = &mp->mnt_vstat; 1440 1441 /* 1442 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1443 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1444 * overrides MNT_WAIT. 1445 */ 1446 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1447 (info->flags & MNT_WAIT)) && 1448 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1449 return(0); 1450 } 1451 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1452 1453 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1454 (info->flags & MNT_WAIT)) && 1455 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1456 return(0); 1457 } 1458 vsp->f_flag = 0; 1459 if (mp->mnt_flag & MNT_RDONLY) 1460 vsp->f_flag |= ST_RDONLY; 1461 if (mp->mnt_flag & MNT_NOSUID) 1462 vsp->f_flag |= ST_NOSUID; 1463 1464 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1465 if (error) { 1466 info->error = error; 1467 return(-1); 1468 } 1469 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1470 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1471 kfree(freepath, M_TEMP); 1472 1473 error = copyout(sp, info->sfsp, sizeof(*sp)); 1474 if (error == 0) 1475 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1476 if (error) { 1477 info->error = error; 1478 return (-1); 1479 } 1480 ++info->sfsp; 1481 ++info->vsfsp; 1482 } 1483 info->count++; 1484 return(0); 1485 } 1486 1487 1488 /* 1489 * fchdir_args(int fd) 1490 * 1491 * Change current working directory to a given file descriptor. 1492 */ 1493 int 1494 sys_fchdir(struct fchdir_args *uap) 1495 { 1496 struct thread *td = curthread; 1497 struct proc *p = td->td_proc; 1498 struct filedesc *fdp = p->p_fd; 1499 struct vnode *vp, *ovp; 1500 struct mount *mp; 1501 struct file *fp; 1502 struct nchandle nch, onch, tnch; 1503 int error; 1504 1505 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1506 return (error); 1507 lwkt_gettoken(&p->p_token); 1508 vp = (struct vnode *)fp->f_data; 1509 vref(vp); 1510 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1511 if (fp->f_nchandle.ncp == NULL) 1512 error = ENOTDIR; 1513 else 1514 error = checkvp_chdir(vp, td); 1515 if (error) { 1516 vput(vp); 1517 goto done; 1518 } 1519 cache_copy(&fp->f_nchandle, &nch); 1520 1521 /* 1522 * If the ncp has become a mount point, traverse through 1523 * the mount point. 1524 */ 1525 1526 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1527 (mp = cache_findmount(&nch)) != NULL 1528 ) { 1529 error = nlookup_mp(mp, &tnch); 1530 if (error == 0) { 1531 cache_unlock(&tnch); /* leave ref intact */ 1532 vput(vp); 1533 vp = tnch.ncp->nc_vp; 1534 error = vget(vp, LK_SHARED); 1535 KKASSERT(error == 0); 1536 cache_drop(&nch); 1537 nch = tnch; 1538 } 1539 } 1540 if (error == 0) { 1541 ovp = fdp->fd_cdir; 1542 onch = fdp->fd_ncdir; 1543 vn_unlock(vp); /* leave ref intact */ 1544 fdp->fd_cdir = vp; 1545 fdp->fd_ncdir = nch; 1546 cache_drop(&onch); 1547 vrele(ovp); 1548 } else { 1549 cache_drop(&nch); 1550 vput(vp); 1551 } 1552 fdrop(fp); 1553 done: 1554 lwkt_reltoken(&p->p_token); 1555 return (error); 1556 } 1557 1558 int 1559 kern_chdir(struct nlookupdata *nd) 1560 { 1561 struct thread *td = curthread; 1562 struct proc *p = td->td_proc; 1563 struct filedesc *fdp = p->p_fd; 1564 struct vnode *vp, *ovp; 1565 struct nchandle onch; 1566 int error; 1567 1568 if ((error = nlookup(nd)) != 0) 1569 return (error); 1570 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1571 return (ENOENT); 1572 if ((error = vget(vp, LK_SHARED)) != 0) 1573 return (error); 1574 1575 lwkt_gettoken(&p->p_token); 1576 error = checkvp_chdir(vp, td); 1577 vn_unlock(vp); 1578 if (error == 0) { 1579 ovp = fdp->fd_cdir; 1580 onch = fdp->fd_ncdir; 1581 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1582 fdp->fd_ncdir = nd->nl_nch; 1583 fdp->fd_cdir = vp; 1584 cache_drop(&onch); 1585 vrele(ovp); 1586 cache_zero(&nd->nl_nch); 1587 } else { 1588 vrele(vp); 1589 } 1590 lwkt_reltoken(&p->p_token); 1591 return (error); 1592 } 1593 1594 /* 1595 * chdir_args(char *path) 1596 * 1597 * Change current working directory (``.''). 1598 */ 1599 int 1600 sys_chdir(struct chdir_args *uap) 1601 { 1602 struct nlookupdata nd; 1603 int error; 1604 1605 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1606 if (error == 0) 1607 error = kern_chdir(&nd); 1608 nlookup_done(&nd); 1609 return (error); 1610 } 1611 1612 /* 1613 * Helper function for raised chroot(2) security function: Refuse if 1614 * any filedescriptors are open directories. 1615 */ 1616 static int 1617 chroot_refuse_vdir_fds(struct filedesc *fdp) 1618 { 1619 struct vnode *vp; 1620 struct file *fp; 1621 int error; 1622 int fd; 1623 1624 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1625 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1626 continue; 1627 vp = (struct vnode *)fp->f_data; 1628 if (vp->v_type != VDIR) { 1629 fdrop(fp); 1630 continue; 1631 } 1632 fdrop(fp); 1633 return(EPERM); 1634 } 1635 return (0); 1636 } 1637 1638 /* 1639 * This sysctl determines if we will allow a process to chroot(2) if it 1640 * has a directory open: 1641 * 0: disallowed for all processes. 1642 * 1: allowed for processes that were not already chroot(2)'ed. 1643 * 2: allowed for all processes. 1644 */ 1645 1646 static int chroot_allow_open_directories = 1; 1647 1648 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1649 &chroot_allow_open_directories, 0, ""); 1650 1651 /* 1652 * chroot to the specified namecache entry. We obtain the vp from the 1653 * namecache data. The passed ncp must be locked and referenced and will 1654 * remain locked and referenced on return. 1655 */ 1656 int 1657 kern_chroot(struct nchandle *nch) 1658 { 1659 struct thread *td = curthread; 1660 struct proc *p = td->td_proc; 1661 struct filedesc *fdp = p->p_fd; 1662 struct vnode *vp; 1663 int error; 1664 1665 /* 1666 * Only privileged user can chroot 1667 */ 1668 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1669 if (error) 1670 return (error); 1671 1672 /* 1673 * Disallow open directory descriptors (fchdir() breakouts). 1674 */ 1675 if (chroot_allow_open_directories == 0 || 1676 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1677 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1678 return (error); 1679 } 1680 if ((vp = nch->ncp->nc_vp) == NULL) 1681 return (ENOENT); 1682 1683 if ((error = vget(vp, LK_SHARED)) != 0) 1684 return (error); 1685 1686 /* 1687 * Check the validity of vp as a directory to change to and 1688 * associate it with rdir/jdir. 1689 */ 1690 error = checkvp_chdir(vp, td); 1691 vn_unlock(vp); /* leave reference intact */ 1692 if (error == 0) { 1693 vrele(fdp->fd_rdir); 1694 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1695 cache_drop(&fdp->fd_nrdir); 1696 cache_copy(nch, &fdp->fd_nrdir); 1697 if (fdp->fd_jdir == NULL) { 1698 fdp->fd_jdir = vp; 1699 vref(fdp->fd_jdir); 1700 cache_copy(nch, &fdp->fd_njdir); 1701 } 1702 } else { 1703 vrele(vp); 1704 } 1705 return (error); 1706 } 1707 1708 /* 1709 * chroot_args(char *path) 1710 * 1711 * Change notion of root (``/'') directory. 1712 */ 1713 int 1714 sys_chroot(struct chroot_args *uap) 1715 { 1716 struct thread *td __debugvar = curthread; 1717 struct nlookupdata nd; 1718 int error; 1719 1720 KKASSERT(td->td_proc); 1721 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1722 if (error == 0) { 1723 nd.nl_flags |= NLC_EXEC; 1724 error = nlookup(&nd); 1725 if (error == 0) 1726 error = kern_chroot(&nd.nl_nch); 1727 } 1728 nlookup_done(&nd); 1729 return(error); 1730 } 1731 1732 int 1733 sys_chroot_kernel(struct chroot_kernel_args *uap) 1734 { 1735 struct thread *td = curthread; 1736 struct nlookupdata nd; 1737 struct nchandle *nch; 1738 struct vnode *vp; 1739 int error; 1740 1741 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1742 if (error) 1743 goto error_nond; 1744 1745 error = nlookup(&nd); 1746 if (error) 1747 goto error_out; 1748 1749 nch = &nd.nl_nch; 1750 1751 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1752 if (error) 1753 goto error_out; 1754 1755 if ((vp = nch->ncp->nc_vp) == NULL) { 1756 error = ENOENT; 1757 goto error_out; 1758 } 1759 1760 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1761 goto error_out; 1762 1763 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1764 get_mplock(); 1765 vfs_cache_setroot(vp, cache_hold(nch)); 1766 rel_mplock(); 1767 1768 error_out: 1769 nlookup_done(&nd); 1770 error_nond: 1771 return(error); 1772 } 1773 1774 /* 1775 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1776 * determine whether it is legal to chdir to the vnode. The vnode's state 1777 * is not changed by this call. 1778 */ 1779 int 1780 checkvp_chdir(struct vnode *vp, struct thread *td) 1781 { 1782 int error; 1783 1784 if (vp->v_type != VDIR) 1785 error = ENOTDIR; 1786 else 1787 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1788 return (error); 1789 } 1790 1791 int 1792 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1793 { 1794 struct thread *td = curthread; 1795 struct proc *p = td->td_proc; 1796 struct lwp *lp = td->td_lwp; 1797 struct filedesc *fdp = p->p_fd; 1798 int cmode, flags; 1799 struct file *nfp; 1800 struct file *fp; 1801 struct vnode *vp; 1802 int type, indx, error; 1803 struct flock lf; 1804 1805 if ((oflags & O_ACCMODE) == O_ACCMODE) 1806 return (EINVAL); 1807 flags = FFLAGS(oflags); 1808 error = falloc(lp, &nfp, NULL); 1809 if (error) 1810 return (error); 1811 fp = nfp; 1812 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1813 1814 /* 1815 * XXX p_dupfd is a real mess. It allows a device to return a 1816 * file descriptor to be duplicated rather then doing the open 1817 * itself. 1818 */ 1819 lp->lwp_dupfd = -1; 1820 1821 /* 1822 * Call vn_open() to do the lookup and assign the vnode to the 1823 * file pointer. vn_open() does not change the ref count on fp 1824 * and the vnode, on success, will be inherited by the file pointer 1825 * and unlocked. 1826 */ 1827 nd->nl_flags |= NLC_LOCKVP; 1828 error = vn_open(nd, fp, flags, cmode); 1829 nlookup_done(nd); 1830 if (error) { 1831 /* 1832 * handle special fdopen() case. bleh. dupfdopen() is 1833 * responsible for dropping the old contents of ofiles[indx] 1834 * if it succeeds. 1835 * 1836 * Note that fsetfd() will add a ref to fp which represents 1837 * the fd_files[] assignment. We must still drop our 1838 * reference. 1839 */ 1840 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1841 if (fdalloc(p, 0, &indx) == 0) { 1842 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1843 if (error == 0) { 1844 *res = indx; 1845 fdrop(fp); /* our ref */ 1846 return (0); 1847 } 1848 fsetfd(fdp, NULL, indx); 1849 } 1850 } 1851 fdrop(fp); /* our ref */ 1852 if (error == ERESTART) 1853 error = EINTR; 1854 return (error); 1855 } 1856 1857 /* 1858 * ref the vnode for ourselves so it can't be ripped out from under 1859 * is. XXX need an ND flag to request that the vnode be returned 1860 * anyway. 1861 * 1862 * Reserve a file descriptor but do not assign it until the open 1863 * succeeds. 1864 */ 1865 vp = (struct vnode *)fp->f_data; 1866 vref(vp); 1867 if ((error = fdalloc(p, 0, &indx)) != 0) { 1868 fdrop(fp); 1869 vrele(vp); 1870 return (error); 1871 } 1872 1873 /* 1874 * If no error occurs the vp will have been assigned to the file 1875 * pointer. 1876 */ 1877 lp->lwp_dupfd = 0; 1878 1879 if (flags & (O_EXLOCK | O_SHLOCK)) { 1880 lf.l_whence = SEEK_SET; 1881 lf.l_start = 0; 1882 lf.l_len = 0; 1883 if (flags & O_EXLOCK) 1884 lf.l_type = F_WRLCK; 1885 else 1886 lf.l_type = F_RDLCK; 1887 if (flags & FNONBLOCK) 1888 type = 0; 1889 else 1890 type = F_WAIT; 1891 1892 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1893 /* 1894 * lock request failed. Clean up the reserved 1895 * descriptor. 1896 */ 1897 vrele(vp); 1898 fsetfd(fdp, NULL, indx); 1899 fdrop(fp); 1900 return (error); 1901 } 1902 fp->f_flag |= FHASLOCK; 1903 } 1904 #if 0 1905 /* 1906 * Assert that all regular file vnodes were created with a object. 1907 */ 1908 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1909 ("open: regular file has no backing object after vn_open")); 1910 #endif 1911 1912 vrele(vp); 1913 1914 /* 1915 * release our private reference, leaving the one associated with the 1916 * descriptor table intact. 1917 */ 1918 fsetfd(fdp, fp, indx); 1919 fdrop(fp); 1920 *res = indx; 1921 return (0); 1922 } 1923 1924 /* 1925 * open_args(char *path, int flags, int mode) 1926 * 1927 * Check permissions, allocate an open file structure, 1928 * and call the device open routine if any. 1929 */ 1930 int 1931 sys_open(struct open_args *uap) 1932 { 1933 struct nlookupdata nd; 1934 int error; 1935 1936 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1937 if (error == 0) { 1938 error = kern_open(&nd, uap->flags, 1939 uap->mode, &uap->sysmsg_result); 1940 } 1941 nlookup_done(&nd); 1942 return (error); 1943 } 1944 1945 /* 1946 * openat_args(int fd, char *path, int flags, int mode) 1947 */ 1948 int 1949 sys_openat(struct openat_args *uap) 1950 { 1951 struct nlookupdata nd; 1952 int error; 1953 struct file *fp; 1954 1955 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 1956 if (error == 0) { 1957 error = kern_open(&nd, uap->flags, uap->mode, 1958 &uap->sysmsg_result); 1959 } 1960 nlookup_done_at(&nd, fp); 1961 return (error); 1962 } 1963 1964 int 1965 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 1966 { 1967 struct thread *td = curthread; 1968 struct proc *p = td->td_proc; 1969 struct vnode *vp; 1970 struct vattr vattr; 1971 int error; 1972 int whiteout = 0; 1973 1974 KKASSERT(p); 1975 1976 VATTR_NULL(&vattr); 1977 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1978 vattr.va_rmajor = rmajor; 1979 vattr.va_rminor = rminor; 1980 1981 switch (mode & S_IFMT) { 1982 case S_IFMT: /* used by badsect to flag bad sectors */ 1983 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 1984 vattr.va_type = VBAD; 1985 break; 1986 case S_IFCHR: 1987 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1988 vattr.va_type = VCHR; 1989 break; 1990 case S_IFBLK: 1991 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1992 vattr.va_type = VBLK; 1993 break; 1994 case S_IFWHT: 1995 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 1996 whiteout = 1; 1997 break; 1998 case S_IFDIR: /* special directories support for HAMMER */ 1999 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2000 vattr.va_type = VDIR; 2001 break; 2002 default: 2003 error = EINVAL; 2004 break; 2005 } 2006 2007 if (error) 2008 return (error); 2009 2010 bwillinode(1); 2011 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2012 if ((error = nlookup(nd)) != 0) 2013 return (error); 2014 if (nd->nl_nch.ncp->nc_vp) 2015 return (EEXIST); 2016 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2017 return (error); 2018 2019 if (whiteout) { 2020 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2021 nd->nl_cred, NAMEI_CREATE); 2022 } else { 2023 vp = NULL; 2024 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2025 &vp, nd->nl_cred, &vattr); 2026 if (error == 0) 2027 vput(vp); 2028 } 2029 return (error); 2030 } 2031 2032 /* 2033 * mknod_args(char *path, int mode, int dev) 2034 * 2035 * Create a special file. 2036 */ 2037 int 2038 sys_mknod(struct mknod_args *uap) 2039 { 2040 struct nlookupdata nd; 2041 int error; 2042 2043 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2044 if (error == 0) { 2045 error = kern_mknod(&nd, uap->mode, 2046 umajor(uap->dev), uminor(uap->dev)); 2047 } 2048 nlookup_done(&nd); 2049 return (error); 2050 } 2051 2052 /* 2053 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2054 * 2055 * Create a special file. The path is relative to the directory associated 2056 * with fd. 2057 */ 2058 int 2059 sys_mknodat(struct mknodat_args *uap) 2060 { 2061 struct nlookupdata nd; 2062 struct file *fp; 2063 int error; 2064 2065 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2066 if (error == 0) { 2067 error = kern_mknod(&nd, uap->mode, 2068 umajor(uap->dev), uminor(uap->dev)); 2069 } 2070 nlookup_done_at(&nd, fp); 2071 return (error); 2072 } 2073 2074 int 2075 kern_mkfifo(struct nlookupdata *nd, int mode) 2076 { 2077 struct thread *td = curthread; 2078 struct proc *p = td->td_proc; 2079 struct vattr vattr; 2080 struct vnode *vp; 2081 int error; 2082 2083 bwillinode(1); 2084 2085 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2086 if ((error = nlookup(nd)) != 0) 2087 return (error); 2088 if (nd->nl_nch.ncp->nc_vp) 2089 return (EEXIST); 2090 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2091 return (error); 2092 2093 VATTR_NULL(&vattr); 2094 vattr.va_type = VFIFO; 2095 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2096 vp = NULL; 2097 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2098 if (error == 0) 2099 vput(vp); 2100 return (error); 2101 } 2102 2103 /* 2104 * mkfifo_args(char *path, int mode) 2105 * 2106 * Create a named pipe. 2107 */ 2108 int 2109 sys_mkfifo(struct mkfifo_args *uap) 2110 { 2111 struct nlookupdata nd; 2112 int error; 2113 2114 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2115 if (error == 0) 2116 error = kern_mkfifo(&nd, uap->mode); 2117 nlookup_done(&nd); 2118 return (error); 2119 } 2120 2121 /* 2122 * mkfifoat_args(int fd, char *path, mode_t mode) 2123 * 2124 * Create a named pipe. The path is relative to the directory associated 2125 * with fd. 2126 */ 2127 int 2128 sys_mkfifoat(struct mkfifoat_args *uap) 2129 { 2130 struct nlookupdata nd; 2131 struct file *fp; 2132 int error; 2133 2134 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2135 if (error == 0) 2136 error = kern_mkfifo(&nd, uap->mode); 2137 nlookup_done_at(&nd, fp); 2138 return (error); 2139 } 2140 2141 static int hardlink_check_uid = 0; 2142 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2143 &hardlink_check_uid, 0, 2144 "Unprivileged processes cannot create hard links to files owned by other " 2145 "users"); 2146 static int hardlink_check_gid = 0; 2147 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2148 &hardlink_check_gid, 0, 2149 "Unprivileged processes cannot create hard links to files owned by other " 2150 "groups"); 2151 2152 static int 2153 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2154 { 2155 struct vattr va; 2156 int error; 2157 2158 /* 2159 * Shortcut if disabled 2160 */ 2161 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2162 return (0); 2163 2164 /* 2165 * Privileged user can always hardlink 2166 */ 2167 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2168 return (0); 2169 2170 /* 2171 * Otherwise only if the originating file is owned by the 2172 * same user or group. Note that any group is allowed if 2173 * the file is owned by the caller. 2174 */ 2175 error = VOP_GETATTR(vp, &va); 2176 if (error != 0) 2177 return (error); 2178 2179 if (hardlink_check_uid) { 2180 if (cred->cr_uid != va.va_uid) 2181 return (EPERM); 2182 } 2183 2184 if (hardlink_check_gid) { 2185 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2186 return (EPERM); 2187 } 2188 2189 return (0); 2190 } 2191 2192 int 2193 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2194 { 2195 struct thread *td = curthread; 2196 struct vnode *vp; 2197 int error; 2198 2199 /* 2200 * Lookup the source and obtained a locked vnode. 2201 * 2202 * You may only hardlink a file which you have write permission 2203 * on or which you own. 2204 * 2205 * XXX relookup on vget failure / race ? 2206 */ 2207 bwillinode(1); 2208 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2209 if ((error = nlookup(nd)) != 0) 2210 return (error); 2211 vp = nd->nl_nch.ncp->nc_vp; 2212 KKASSERT(vp != NULL); 2213 if (vp->v_type == VDIR) 2214 return (EPERM); /* POSIX */ 2215 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2216 return (error); 2217 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2218 return (error); 2219 2220 /* 2221 * Unlock the source so we can lookup the target without deadlocking 2222 * (XXX vp is locked already, possible other deadlock?). The target 2223 * must not exist. 2224 */ 2225 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2226 nd->nl_flags &= ~NLC_NCPISLOCKED; 2227 cache_unlock(&nd->nl_nch); 2228 vn_unlock(vp); 2229 2230 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2231 if ((error = nlookup(linknd)) != 0) { 2232 vrele(vp); 2233 return (error); 2234 } 2235 if (linknd->nl_nch.ncp->nc_vp) { 2236 vrele(vp); 2237 return (EEXIST); 2238 } 2239 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 2240 vrele(vp); 2241 return (error); 2242 } 2243 2244 /* 2245 * Finally run the new API VOP. 2246 */ 2247 error = can_hardlink(vp, td, td->td_ucred); 2248 if (error == 0) { 2249 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2250 vp, linknd->nl_cred); 2251 } 2252 vput(vp); 2253 return (error); 2254 } 2255 2256 /* 2257 * link_args(char *path, char *link) 2258 * 2259 * Make a hard file link. 2260 */ 2261 int 2262 sys_link(struct link_args *uap) 2263 { 2264 struct nlookupdata nd, linknd; 2265 int error; 2266 2267 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2268 if (error == 0) { 2269 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2270 if (error == 0) 2271 error = kern_link(&nd, &linknd); 2272 nlookup_done(&linknd); 2273 } 2274 nlookup_done(&nd); 2275 return (error); 2276 } 2277 2278 int 2279 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2280 { 2281 struct vattr vattr; 2282 struct vnode *vp; 2283 struct vnode *dvp; 2284 int error; 2285 2286 bwillinode(1); 2287 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2288 if ((error = nlookup(nd)) != 0) 2289 return (error); 2290 if (nd->nl_nch.ncp->nc_vp) 2291 return (EEXIST); 2292 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2293 return (error); 2294 dvp = nd->nl_dvp; 2295 VATTR_NULL(&vattr); 2296 vattr.va_mode = mode; 2297 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2298 if (error == 0) 2299 vput(vp); 2300 return (error); 2301 } 2302 2303 /* 2304 * symlink(char *path, char *link) 2305 * 2306 * Make a symbolic link. 2307 */ 2308 int 2309 sys_symlink(struct symlink_args *uap) 2310 { 2311 struct thread *td = curthread; 2312 struct nlookupdata nd; 2313 char *path; 2314 int error; 2315 int mode; 2316 2317 path = objcache_get(namei_oc, M_WAITOK); 2318 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2319 if (error == 0) { 2320 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2321 if (error == 0) { 2322 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2323 error = kern_symlink(&nd, path, mode); 2324 } 2325 nlookup_done(&nd); 2326 } 2327 objcache_put(namei_oc, path); 2328 return (error); 2329 } 2330 2331 /* 2332 * symlinkat_args(char *path1, int fd, char *path2) 2333 * 2334 * Make a symbolic link. The path2 argument is relative to the directory 2335 * associated with fd. 2336 */ 2337 int 2338 sys_symlinkat(struct symlinkat_args *uap) 2339 { 2340 struct thread *td = curthread; 2341 struct nlookupdata nd; 2342 struct file *fp; 2343 char *path1; 2344 int error; 2345 int mode; 2346 2347 path1 = objcache_get(namei_oc, M_WAITOK); 2348 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2349 if (error == 0) { 2350 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2351 UIO_USERSPACE, 0); 2352 if (error == 0) { 2353 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2354 error = kern_symlink(&nd, path1, mode); 2355 } 2356 nlookup_done_at(&nd, fp); 2357 } 2358 objcache_put(namei_oc, path1); 2359 return (error); 2360 } 2361 2362 /* 2363 * undelete_args(char *path) 2364 * 2365 * Delete a whiteout from the filesystem. 2366 */ 2367 int 2368 sys_undelete(struct undelete_args *uap) 2369 { 2370 struct nlookupdata nd; 2371 int error; 2372 2373 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2374 bwillinode(1); 2375 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2376 if (error == 0) 2377 error = nlookup(&nd); 2378 if (error == 0) 2379 error = ncp_writechk(&nd.nl_nch); 2380 if (error == 0) { 2381 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2382 NAMEI_DELETE); 2383 } 2384 nlookup_done(&nd); 2385 return (error); 2386 } 2387 2388 int 2389 kern_unlink(struct nlookupdata *nd) 2390 { 2391 int error; 2392 2393 bwillinode(1); 2394 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2395 if ((error = nlookup(nd)) != 0) 2396 return (error); 2397 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2398 return (error); 2399 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2400 return (error); 2401 } 2402 2403 /* 2404 * unlink_args(char *path) 2405 * 2406 * Delete a name from the filesystem. 2407 */ 2408 int 2409 sys_unlink(struct unlink_args *uap) 2410 { 2411 struct nlookupdata nd; 2412 int error; 2413 2414 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2415 if (error == 0) 2416 error = kern_unlink(&nd); 2417 nlookup_done(&nd); 2418 return (error); 2419 } 2420 2421 2422 /* 2423 * unlinkat_args(int fd, char *path, int flags) 2424 * 2425 * Delete the file or directory entry pointed to by fd/path. 2426 */ 2427 int 2428 sys_unlinkat(struct unlinkat_args *uap) 2429 { 2430 struct nlookupdata nd; 2431 struct file *fp; 2432 int error; 2433 2434 if (uap->flags & ~AT_REMOVEDIR) 2435 return (EINVAL); 2436 2437 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2438 if (error == 0) { 2439 if (uap->flags & AT_REMOVEDIR) 2440 error = kern_rmdir(&nd); 2441 else 2442 error = kern_unlink(&nd); 2443 } 2444 nlookup_done_at(&nd, fp); 2445 return (error); 2446 } 2447 2448 int 2449 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2450 { 2451 struct thread *td = curthread; 2452 struct proc *p = td->td_proc; 2453 struct file *fp; 2454 struct vnode *vp; 2455 struct vattr vattr; 2456 off_t new_offset; 2457 int error; 2458 2459 fp = holdfp(p->p_fd, fd, -1); 2460 if (fp == NULL) 2461 return (EBADF); 2462 if (fp->f_type != DTYPE_VNODE) { 2463 error = ESPIPE; 2464 goto done; 2465 } 2466 vp = (struct vnode *)fp->f_data; 2467 2468 switch (whence) { 2469 case L_INCR: 2470 spin_lock(&fp->f_spin); 2471 new_offset = fp->f_offset + offset; 2472 error = 0; 2473 break; 2474 case L_XTND: 2475 error = VOP_GETATTR(vp, &vattr); 2476 spin_lock(&fp->f_spin); 2477 new_offset = offset + vattr.va_size; 2478 break; 2479 case L_SET: 2480 new_offset = offset; 2481 error = 0; 2482 spin_lock(&fp->f_spin); 2483 break; 2484 default: 2485 new_offset = 0; 2486 error = EINVAL; 2487 spin_lock(&fp->f_spin); 2488 break; 2489 } 2490 2491 /* 2492 * Validate the seek position. Negative offsets are not allowed 2493 * for regular files or directories. 2494 * 2495 * Normally we would also not want to allow negative offsets for 2496 * character and block-special devices. However kvm addresses 2497 * on 64 bit architectures might appear to be negative and must 2498 * be allowed. 2499 */ 2500 if (error == 0) { 2501 if (new_offset < 0 && 2502 (vp->v_type == VREG || vp->v_type == VDIR)) { 2503 error = EINVAL; 2504 } else { 2505 fp->f_offset = new_offset; 2506 } 2507 } 2508 *res = fp->f_offset; 2509 spin_unlock(&fp->f_spin); 2510 done: 2511 fdrop(fp); 2512 return (error); 2513 } 2514 2515 /* 2516 * lseek_args(int fd, int pad, off_t offset, int whence) 2517 * 2518 * Reposition read/write file offset. 2519 */ 2520 int 2521 sys_lseek(struct lseek_args *uap) 2522 { 2523 int error; 2524 2525 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2526 &uap->sysmsg_offset); 2527 2528 return (error); 2529 } 2530 2531 /* 2532 * Check if current process can access given file. amode is a bitmask of *_OK 2533 * access bits. flags is a bitmask of AT_* flags. 2534 */ 2535 int 2536 kern_access(struct nlookupdata *nd, int amode, int flags) 2537 { 2538 struct vnode *vp; 2539 int error, mode; 2540 2541 if (flags & ~AT_EACCESS) 2542 return (EINVAL); 2543 if ((error = nlookup(nd)) != 0) 2544 return (error); 2545 retry: 2546 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2547 if (error) 2548 return (error); 2549 2550 /* Flags == 0 means only check for existence. */ 2551 if (amode) { 2552 mode = 0; 2553 if (amode & R_OK) 2554 mode |= VREAD; 2555 if (amode & W_OK) 2556 mode |= VWRITE; 2557 if (amode & X_OK) 2558 mode |= VEXEC; 2559 if ((mode & VWRITE) == 0 || 2560 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2561 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2562 2563 /* 2564 * If the file handle is stale we have to re-resolve the 2565 * entry. This is a hack at the moment. 2566 */ 2567 if (error == ESTALE) { 2568 vput(vp); 2569 cache_setunresolved(&nd->nl_nch); 2570 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2571 if (error == 0) { 2572 vp = NULL; 2573 goto retry; 2574 } 2575 return(error); 2576 } 2577 } 2578 vput(vp); 2579 return (error); 2580 } 2581 2582 /* 2583 * access_args(char *path, int flags) 2584 * 2585 * Check access permissions. 2586 */ 2587 int 2588 sys_access(struct access_args *uap) 2589 { 2590 struct nlookupdata nd; 2591 int error; 2592 2593 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2594 if (error == 0) 2595 error = kern_access(&nd, uap->flags, 0); 2596 nlookup_done(&nd); 2597 return (error); 2598 } 2599 2600 2601 /* 2602 * faccessat_args(int fd, char *path, int amode, int flags) 2603 * 2604 * Check access permissions. 2605 */ 2606 int 2607 sys_faccessat(struct faccessat_args *uap) 2608 { 2609 struct nlookupdata nd; 2610 struct file *fp; 2611 int error; 2612 2613 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2614 NLC_FOLLOW); 2615 if (error == 0) 2616 error = kern_access(&nd, uap->amode, uap->flags); 2617 nlookup_done_at(&nd, fp); 2618 return (error); 2619 } 2620 2621 2622 int 2623 kern_stat(struct nlookupdata *nd, struct stat *st) 2624 { 2625 int error; 2626 struct vnode *vp; 2627 2628 if ((error = nlookup(nd)) != 0) 2629 return (error); 2630 again: 2631 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2632 return (ENOENT); 2633 2634 if ((error = vget(vp, LK_SHARED)) != 0) 2635 return (error); 2636 error = vn_stat(vp, st, nd->nl_cred); 2637 2638 /* 2639 * If the file handle is stale we have to re-resolve the entry. This 2640 * is a hack at the moment. 2641 */ 2642 if (error == ESTALE) { 2643 vput(vp); 2644 cache_setunresolved(&nd->nl_nch); 2645 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2646 if (error == 0) 2647 goto again; 2648 } else { 2649 vput(vp); 2650 } 2651 return (error); 2652 } 2653 2654 /* 2655 * stat_args(char *path, struct stat *ub) 2656 * 2657 * Get file status; this version follows links. 2658 */ 2659 int 2660 sys_stat(struct stat_args *uap) 2661 { 2662 struct nlookupdata nd; 2663 struct stat st; 2664 int error; 2665 2666 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2667 if (error == 0) { 2668 error = kern_stat(&nd, &st); 2669 if (error == 0) 2670 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2671 } 2672 nlookup_done(&nd); 2673 return (error); 2674 } 2675 2676 /* 2677 * lstat_args(char *path, struct stat *ub) 2678 * 2679 * Get file status; this version does not follow links. 2680 */ 2681 int 2682 sys_lstat(struct lstat_args *uap) 2683 { 2684 struct nlookupdata nd; 2685 struct stat st; 2686 int error; 2687 2688 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2689 if (error == 0) { 2690 error = kern_stat(&nd, &st); 2691 if (error == 0) 2692 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2693 } 2694 nlookup_done(&nd); 2695 return (error); 2696 } 2697 2698 /* 2699 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2700 * 2701 * Get status of file pointed to by fd/path. 2702 */ 2703 int 2704 sys_fstatat(struct fstatat_args *uap) 2705 { 2706 struct nlookupdata nd; 2707 struct stat st; 2708 int error; 2709 int flags; 2710 struct file *fp; 2711 2712 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2713 return (EINVAL); 2714 2715 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2716 2717 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2718 UIO_USERSPACE, flags); 2719 if (error == 0) { 2720 error = kern_stat(&nd, &st); 2721 if (error == 0) 2722 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2723 } 2724 nlookup_done_at(&nd, fp); 2725 return (error); 2726 } 2727 2728 /* 2729 * pathconf_Args(char *path, int name) 2730 * 2731 * Get configurable pathname variables. 2732 */ 2733 int 2734 sys_pathconf(struct pathconf_args *uap) 2735 { 2736 struct nlookupdata nd; 2737 struct vnode *vp; 2738 int error; 2739 2740 vp = NULL; 2741 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2742 if (error == 0) 2743 error = nlookup(&nd); 2744 if (error == 0) 2745 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2746 nlookup_done(&nd); 2747 if (error == 0) { 2748 error = VOP_PATHCONF(vp, uap->name, &uap->sysmsg_reg); 2749 vput(vp); 2750 } 2751 return (error); 2752 } 2753 2754 /* 2755 * XXX: daver 2756 * kern_readlink isn't properly split yet. There is a copyin burried 2757 * in VOP_READLINK(). 2758 */ 2759 int 2760 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2761 { 2762 struct thread *td = curthread; 2763 struct vnode *vp; 2764 struct iovec aiov; 2765 struct uio auio; 2766 int error; 2767 2768 if ((error = nlookup(nd)) != 0) 2769 return (error); 2770 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2771 if (error) 2772 return (error); 2773 if (vp->v_type != VLNK) { 2774 error = EINVAL; 2775 } else { 2776 aiov.iov_base = buf; 2777 aiov.iov_len = count; 2778 auio.uio_iov = &aiov; 2779 auio.uio_iovcnt = 1; 2780 auio.uio_offset = 0; 2781 auio.uio_rw = UIO_READ; 2782 auio.uio_segflg = UIO_USERSPACE; 2783 auio.uio_td = td; 2784 auio.uio_resid = count; 2785 error = VOP_READLINK(vp, &auio, td->td_ucred); 2786 } 2787 vput(vp); 2788 *res = count - auio.uio_resid; 2789 return (error); 2790 } 2791 2792 /* 2793 * readlink_args(char *path, char *buf, int count) 2794 * 2795 * Return target name of a symbolic link. 2796 */ 2797 int 2798 sys_readlink(struct readlink_args *uap) 2799 { 2800 struct nlookupdata nd; 2801 int error; 2802 2803 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2804 if (error == 0) { 2805 error = kern_readlink(&nd, uap->buf, uap->count, 2806 &uap->sysmsg_result); 2807 } 2808 nlookup_done(&nd); 2809 return (error); 2810 } 2811 2812 /* 2813 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 2814 * 2815 * Return target name of a symbolic link. The path is relative to the 2816 * directory associated with fd. 2817 */ 2818 int 2819 sys_readlinkat(struct readlinkat_args *uap) 2820 { 2821 struct nlookupdata nd; 2822 struct file *fp; 2823 int error; 2824 2825 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2826 if (error == 0) { 2827 error = kern_readlink(&nd, uap->buf, uap->bufsize, 2828 &uap->sysmsg_result); 2829 } 2830 nlookup_done_at(&nd, fp); 2831 return (error); 2832 } 2833 2834 static int 2835 setfflags(struct vnode *vp, int flags) 2836 { 2837 struct thread *td = curthread; 2838 int error; 2839 struct vattr vattr; 2840 2841 /* 2842 * Prevent non-root users from setting flags on devices. When 2843 * a device is reused, users can retain ownership of the device 2844 * if they are allowed to set flags and programs assume that 2845 * chown can't fail when done as root. 2846 */ 2847 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2848 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2849 return (error); 2850 2851 /* 2852 * note: vget is required for any operation that might mod the vnode 2853 * so VINACTIVE is properly cleared. 2854 */ 2855 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2856 VATTR_NULL(&vattr); 2857 vattr.va_flags = flags; 2858 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2859 vput(vp); 2860 } 2861 return (error); 2862 } 2863 2864 /* 2865 * chflags(char *path, int flags) 2866 * 2867 * Change flags of a file given a path name. 2868 */ 2869 int 2870 sys_chflags(struct chflags_args *uap) 2871 { 2872 struct nlookupdata nd; 2873 struct vnode *vp; 2874 int error; 2875 2876 vp = NULL; 2877 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2878 if (error == 0) 2879 error = nlookup(&nd); 2880 if (error == 0) 2881 error = ncp_writechk(&nd.nl_nch); 2882 if (error == 0) 2883 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2884 nlookup_done(&nd); 2885 if (error == 0) { 2886 error = setfflags(vp, uap->flags); 2887 vrele(vp); 2888 } 2889 return (error); 2890 } 2891 2892 /* 2893 * lchflags(char *path, int flags) 2894 * 2895 * Change flags of a file given a path name, but don't follow symlinks. 2896 */ 2897 int 2898 sys_lchflags(struct lchflags_args *uap) 2899 { 2900 struct nlookupdata nd; 2901 struct vnode *vp; 2902 int error; 2903 2904 vp = NULL; 2905 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2906 if (error == 0) 2907 error = nlookup(&nd); 2908 if (error == 0) 2909 error = ncp_writechk(&nd.nl_nch); 2910 if (error == 0) 2911 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2912 nlookup_done(&nd); 2913 if (error == 0) { 2914 error = setfflags(vp, uap->flags); 2915 vrele(vp); 2916 } 2917 return (error); 2918 } 2919 2920 /* 2921 * fchflags_args(int fd, int flags) 2922 * 2923 * Change flags of a file given a file descriptor. 2924 */ 2925 int 2926 sys_fchflags(struct fchflags_args *uap) 2927 { 2928 struct thread *td = curthread; 2929 struct proc *p = td->td_proc; 2930 struct file *fp; 2931 int error; 2932 2933 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2934 return (error); 2935 if (fp->f_nchandle.ncp) 2936 error = ncp_writechk(&fp->f_nchandle); 2937 if (error == 0) 2938 error = setfflags((struct vnode *) fp->f_data, uap->flags); 2939 fdrop(fp); 2940 return (error); 2941 } 2942 2943 static int 2944 setfmode(struct vnode *vp, int mode) 2945 { 2946 struct thread *td = curthread; 2947 int error; 2948 struct vattr vattr; 2949 2950 /* 2951 * note: vget is required for any operation that might mod the vnode 2952 * so VINACTIVE is properly cleared. 2953 */ 2954 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2955 VATTR_NULL(&vattr); 2956 vattr.va_mode = mode & ALLPERMS; 2957 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2958 vput(vp); 2959 } 2960 return error; 2961 } 2962 2963 int 2964 kern_chmod(struct nlookupdata *nd, int mode) 2965 { 2966 struct vnode *vp; 2967 int error; 2968 2969 if ((error = nlookup(nd)) != 0) 2970 return (error); 2971 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2972 return (error); 2973 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2974 error = setfmode(vp, mode); 2975 vrele(vp); 2976 return (error); 2977 } 2978 2979 /* 2980 * chmod_args(char *path, int mode) 2981 * 2982 * Change mode of a file given path name. 2983 */ 2984 int 2985 sys_chmod(struct chmod_args *uap) 2986 { 2987 struct nlookupdata nd; 2988 int error; 2989 2990 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2991 if (error == 0) 2992 error = kern_chmod(&nd, uap->mode); 2993 nlookup_done(&nd); 2994 return (error); 2995 } 2996 2997 /* 2998 * lchmod_args(char *path, int mode) 2999 * 3000 * Change mode of a file given path name (don't follow links.) 3001 */ 3002 int 3003 sys_lchmod(struct lchmod_args *uap) 3004 { 3005 struct nlookupdata nd; 3006 int error; 3007 3008 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3009 if (error == 0) 3010 error = kern_chmod(&nd, uap->mode); 3011 nlookup_done(&nd); 3012 return (error); 3013 } 3014 3015 /* 3016 * fchmod_args(int fd, int mode) 3017 * 3018 * Change mode of a file given a file descriptor. 3019 */ 3020 int 3021 sys_fchmod(struct fchmod_args *uap) 3022 { 3023 struct thread *td = curthread; 3024 struct proc *p = td->td_proc; 3025 struct file *fp; 3026 int error; 3027 3028 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3029 return (error); 3030 if (fp->f_nchandle.ncp) 3031 error = ncp_writechk(&fp->f_nchandle); 3032 if (error == 0) 3033 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3034 fdrop(fp); 3035 return (error); 3036 } 3037 3038 /* 3039 * fchmodat_args(char *path, int mode) 3040 * 3041 * Change mode of a file pointed to by fd/path. 3042 */ 3043 int 3044 sys_fchmodat(struct fchmodat_args *uap) 3045 { 3046 struct nlookupdata nd; 3047 struct file *fp; 3048 int error; 3049 int flags; 3050 3051 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3052 return (EINVAL); 3053 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3054 3055 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3056 UIO_USERSPACE, flags); 3057 if (error == 0) 3058 error = kern_chmod(&nd, uap->mode); 3059 nlookup_done_at(&nd, fp); 3060 return (error); 3061 } 3062 3063 static int 3064 setfown(struct vnode *vp, uid_t uid, gid_t gid) 3065 { 3066 struct thread *td = curthread; 3067 int error; 3068 struct vattr vattr; 3069 3070 /* 3071 * note: vget is required for any operation that might mod the vnode 3072 * so VINACTIVE is properly cleared. 3073 */ 3074 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3075 VATTR_NULL(&vattr); 3076 vattr.va_uid = uid; 3077 vattr.va_gid = gid; 3078 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3079 vput(vp); 3080 } 3081 return error; 3082 } 3083 3084 int 3085 kern_chown(struct nlookupdata *nd, int uid, int gid) 3086 { 3087 struct vnode *vp; 3088 int error; 3089 3090 if ((error = nlookup(nd)) != 0) 3091 return (error); 3092 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3093 return (error); 3094 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3095 error = setfown(vp, uid, gid); 3096 vrele(vp); 3097 return (error); 3098 } 3099 3100 /* 3101 * chown(char *path, int uid, int gid) 3102 * 3103 * Set ownership given a path name. 3104 */ 3105 int 3106 sys_chown(struct chown_args *uap) 3107 { 3108 struct nlookupdata nd; 3109 int error; 3110 3111 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3112 if (error == 0) 3113 error = kern_chown(&nd, uap->uid, uap->gid); 3114 nlookup_done(&nd); 3115 return (error); 3116 } 3117 3118 /* 3119 * lchown_args(char *path, int uid, int gid) 3120 * 3121 * Set ownership given a path name, do not cross symlinks. 3122 */ 3123 int 3124 sys_lchown(struct lchown_args *uap) 3125 { 3126 struct nlookupdata nd; 3127 int error; 3128 3129 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3130 if (error == 0) 3131 error = kern_chown(&nd, uap->uid, uap->gid); 3132 nlookup_done(&nd); 3133 return (error); 3134 } 3135 3136 /* 3137 * fchown_args(int fd, int uid, int gid) 3138 * 3139 * Set ownership given a file descriptor. 3140 */ 3141 int 3142 sys_fchown(struct fchown_args *uap) 3143 { 3144 struct thread *td = curthread; 3145 struct proc *p = td->td_proc; 3146 struct file *fp; 3147 int error; 3148 3149 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3150 return (error); 3151 if (fp->f_nchandle.ncp) 3152 error = ncp_writechk(&fp->f_nchandle); 3153 if (error == 0) 3154 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid); 3155 fdrop(fp); 3156 return (error); 3157 } 3158 3159 /* 3160 * fchownat(int fd, char *path, int uid, int gid, int flags) 3161 * 3162 * Set ownership of file pointed to by fd/path. 3163 */ 3164 int 3165 sys_fchownat(struct fchownat_args *uap) 3166 { 3167 struct nlookupdata nd; 3168 struct file *fp; 3169 int error; 3170 int flags; 3171 3172 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3173 return (EINVAL); 3174 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3175 3176 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3177 UIO_USERSPACE, flags); 3178 if (error == 0) 3179 error = kern_chown(&nd, uap->uid, uap->gid); 3180 nlookup_done_at(&nd, fp); 3181 return (error); 3182 } 3183 3184 3185 static int 3186 getutimes(const struct timeval *tvp, struct timespec *tsp) 3187 { 3188 struct timeval tv[2]; 3189 3190 if (tvp == NULL) { 3191 microtime(&tv[0]); 3192 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3193 tsp[1] = tsp[0]; 3194 } else { 3195 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3196 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3197 } 3198 return 0; 3199 } 3200 3201 static int 3202 setutimes(struct vnode *vp, struct vattr *vattr, 3203 const struct timespec *ts, int nullflag) 3204 { 3205 struct thread *td = curthread; 3206 int error; 3207 3208 VATTR_NULL(vattr); 3209 vattr->va_atime = ts[0]; 3210 vattr->va_mtime = ts[1]; 3211 if (nullflag) 3212 vattr->va_vaflags |= VA_UTIMES_NULL; 3213 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3214 3215 return error; 3216 } 3217 3218 int 3219 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3220 { 3221 struct timespec ts[2]; 3222 struct vnode *vp; 3223 struct vattr vattr; 3224 int error; 3225 3226 if ((error = getutimes(tptr, ts)) != 0) 3227 return (error); 3228 3229 /* 3230 * NOTE: utimes() succeeds for the owner even if the file 3231 * is not user-writable. 3232 */ 3233 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3234 3235 if ((error = nlookup(nd)) != 0) 3236 return (error); 3237 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3238 return (error); 3239 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3240 return (error); 3241 3242 /* 3243 * note: vget is required for any operation that might mod the vnode 3244 * so VINACTIVE is properly cleared. 3245 */ 3246 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3247 error = vget(vp, LK_EXCLUSIVE); 3248 if (error == 0) { 3249 error = setutimes(vp, &vattr, ts, (tptr == NULL)); 3250 vput(vp); 3251 } 3252 } 3253 vrele(vp); 3254 return (error); 3255 } 3256 3257 /* 3258 * utimes_args(char *path, struct timeval *tptr) 3259 * 3260 * Set the access and modification times of a file. 3261 */ 3262 int 3263 sys_utimes(struct utimes_args *uap) 3264 { 3265 struct timeval tv[2]; 3266 struct nlookupdata nd; 3267 int error; 3268 3269 if (uap->tptr) { 3270 error = copyin(uap->tptr, tv, sizeof(tv)); 3271 if (error) 3272 return (error); 3273 } 3274 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3275 if (error == 0) 3276 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3277 nlookup_done(&nd); 3278 return (error); 3279 } 3280 3281 /* 3282 * lutimes_args(char *path, struct timeval *tptr) 3283 * 3284 * Set the access and modification times of a file. 3285 */ 3286 int 3287 sys_lutimes(struct lutimes_args *uap) 3288 { 3289 struct timeval tv[2]; 3290 struct nlookupdata nd; 3291 int error; 3292 3293 if (uap->tptr) { 3294 error = copyin(uap->tptr, tv, sizeof(tv)); 3295 if (error) 3296 return (error); 3297 } 3298 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3299 if (error == 0) 3300 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3301 nlookup_done(&nd); 3302 return (error); 3303 } 3304 3305 /* 3306 * Set utimes on a file descriptor. The creds used to open the 3307 * file are used to determine whether the operation is allowed 3308 * or not. 3309 */ 3310 int 3311 kern_futimes(int fd, struct timeval *tptr) 3312 { 3313 struct thread *td = curthread; 3314 struct proc *p = td->td_proc; 3315 struct timespec ts[2]; 3316 struct file *fp; 3317 struct vnode *vp; 3318 struct vattr vattr; 3319 int error; 3320 3321 error = getutimes(tptr, ts); 3322 if (error) 3323 return (error); 3324 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3325 return (error); 3326 if (fp->f_nchandle.ncp) 3327 error = ncp_writechk(&fp->f_nchandle); 3328 if (error == 0) { 3329 vp = fp->f_data; 3330 error = vget(vp, LK_EXCLUSIVE); 3331 if (error == 0) { 3332 error = VOP_GETATTR(vp, &vattr); 3333 if (error == 0) { 3334 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3335 fp->f_cred); 3336 } 3337 if (error == 0) { 3338 error = setutimes(vp, &vattr, ts, 3339 (tptr == NULL)); 3340 } 3341 vput(vp); 3342 } 3343 } 3344 fdrop(fp); 3345 return (error); 3346 } 3347 3348 /* 3349 * futimes_args(int fd, struct timeval *tptr) 3350 * 3351 * Set the access and modification times of a file. 3352 */ 3353 int 3354 sys_futimes(struct futimes_args *uap) 3355 { 3356 struct timeval tv[2]; 3357 int error; 3358 3359 if (uap->tptr) { 3360 error = copyin(uap->tptr, tv, sizeof(tv)); 3361 if (error) 3362 return (error); 3363 } 3364 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3365 3366 return (error); 3367 } 3368 3369 int 3370 kern_truncate(struct nlookupdata *nd, off_t length) 3371 { 3372 struct vnode *vp; 3373 struct vattr vattr; 3374 int error; 3375 3376 if (length < 0) 3377 return(EINVAL); 3378 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3379 if ((error = nlookup(nd)) != 0) 3380 return (error); 3381 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3382 return (error); 3383 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3384 return (error); 3385 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 3386 vrele(vp); 3387 return (error); 3388 } 3389 if (vp->v_type == VDIR) { 3390 error = EISDIR; 3391 } else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3392 VATTR_NULL(&vattr); 3393 vattr.va_size = length; 3394 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3395 } 3396 vput(vp); 3397 return (error); 3398 } 3399 3400 /* 3401 * truncate(char *path, int pad, off_t length) 3402 * 3403 * Truncate a file given its path name. 3404 */ 3405 int 3406 sys_truncate(struct truncate_args *uap) 3407 { 3408 struct nlookupdata nd; 3409 int error; 3410 3411 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3412 if (error == 0) 3413 error = kern_truncate(&nd, uap->length); 3414 nlookup_done(&nd); 3415 return error; 3416 } 3417 3418 int 3419 kern_ftruncate(int fd, off_t length) 3420 { 3421 struct thread *td = curthread; 3422 struct proc *p = td->td_proc; 3423 struct vattr vattr; 3424 struct vnode *vp; 3425 struct file *fp; 3426 int error; 3427 3428 if (length < 0) 3429 return(EINVAL); 3430 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3431 return (error); 3432 if (fp->f_nchandle.ncp) { 3433 error = ncp_writechk(&fp->f_nchandle); 3434 if (error) 3435 goto done; 3436 } 3437 if ((fp->f_flag & FWRITE) == 0) { 3438 error = EINVAL; 3439 goto done; 3440 } 3441 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3442 error = EINVAL; 3443 goto done; 3444 } 3445 vp = (struct vnode *)fp->f_data; 3446 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3447 if (vp->v_type == VDIR) { 3448 error = EISDIR; 3449 } else if ((error = vn_writechk(vp, NULL)) == 0) { 3450 VATTR_NULL(&vattr); 3451 vattr.va_size = length; 3452 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3453 } 3454 vn_unlock(vp); 3455 done: 3456 fdrop(fp); 3457 return (error); 3458 } 3459 3460 /* 3461 * ftruncate_args(int fd, int pad, off_t length) 3462 * 3463 * Truncate a file given a file descriptor. 3464 */ 3465 int 3466 sys_ftruncate(struct ftruncate_args *uap) 3467 { 3468 int error; 3469 3470 error = kern_ftruncate(uap->fd, uap->length); 3471 3472 return (error); 3473 } 3474 3475 /* 3476 * fsync(int fd) 3477 * 3478 * Sync an open file. 3479 */ 3480 int 3481 sys_fsync(struct fsync_args *uap) 3482 { 3483 struct thread *td = curthread; 3484 struct proc *p = td->td_proc; 3485 struct vnode *vp; 3486 struct file *fp; 3487 vm_object_t obj; 3488 int error; 3489 3490 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3491 return (error); 3492 vp = (struct vnode *)fp->f_data; 3493 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3494 if ((obj = vp->v_object) != NULL) 3495 vm_object_page_clean(obj, 0, 0, 0); 3496 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3497 if (error == 0 && vp->v_mount) 3498 error = buf_fsync(vp); 3499 vn_unlock(vp); 3500 fdrop(fp); 3501 3502 return (error); 3503 } 3504 3505 int 3506 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3507 { 3508 struct nchandle fnchd; 3509 struct nchandle tnchd; 3510 struct namecache *ncp; 3511 struct vnode *fdvp; 3512 struct vnode *tdvp; 3513 struct mount *mp; 3514 int error; 3515 3516 bwillinode(1); 3517 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3518 if ((error = nlookup(fromnd)) != 0) 3519 return (error); 3520 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3521 return (ENOENT); 3522 fnchd.mount = fromnd->nl_nch.mount; 3523 cache_hold(&fnchd); 3524 3525 /* 3526 * unlock the source nch so we can lookup the target nch without 3527 * deadlocking. The target may or may not exist so we do not check 3528 * for a target vp like kern_mkdir() and other creation functions do. 3529 * 3530 * The source and target directories are ref'd and rechecked after 3531 * everything is relocked to determine if the source or target file 3532 * has been renamed. 3533 */ 3534 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3535 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3536 cache_unlock(&fromnd->nl_nch); 3537 3538 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3539 if ((error = nlookup(tond)) != 0) { 3540 cache_drop(&fnchd); 3541 return (error); 3542 } 3543 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3544 cache_drop(&fnchd); 3545 return (ENOENT); 3546 } 3547 tnchd.mount = tond->nl_nch.mount; 3548 cache_hold(&tnchd); 3549 3550 /* 3551 * If the source and target are the same there is nothing to do 3552 */ 3553 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3554 cache_drop(&fnchd); 3555 cache_drop(&tnchd); 3556 return (0); 3557 } 3558 3559 /* 3560 * Mount points cannot be renamed or overwritten 3561 */ 3562 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3563 NCF_ISMOUNTPT 3564 ) { 3565 cache_drop(&fnchd); 3566 cache_drop(&tnchd); 3567 return (EINVAL); 3568 } 3569 3570 /* 3571 * Relock the source ncp. cache_relock() will deal with any 3572 * deadlocks against the already-locked tond and will also 3573 * make sure both are resolved. 3574 * 3575 * NOTE AFTER RELOCKING: The source or target ncp may have become 3576 * invalid while they were unlocked, nc_vp and nc_mount could 3577 * be NULL. 3578 */ 3579 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3580 &tond->nl_nch, tond->nl_cred); 3581 fromnd->nl_flags |= NLC_NCPISLOCKED; 3582 3583 /* 3584 * make sure the parent directories linkages are the same 3585 */ 3586 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3587 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3588 cache_drop(&fnchd); 3589 cache_drop(&tnchd); 3590 return (ENOENT); 3591 } 3592 3593 /* 3594 * Both the source and target must be within the same filesystem and 3595 * in the same filesystem as their parent directories within the 3596 * namecache topology. 3597 * 3598 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3599 */ 3600 mp = fnchd.mount; 3601 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3602 mp != tond->nl_nch.mount) { 3603 cache_drop(&fnchd); 3604 cache_drop(&tnchd); 3605 return (EXDEV); 3606 } 3607 3608 /* 3609 * Make sure the mount point is writable 3610 */ 3611 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3612 cache_drop(&fnchd); 3613 cache_drop(&tnchd); 3614 return (error); 3615 } 3616 3617 /* 3618 * If the target exists and either the source or target is a directory, 3619 * then both must be directories. 3620 * 3621 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3622 * have become NULL. 3623 */ 3624 if (tond->nl_nch.ncp->nc_vp) { 3625 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3626 error = ENOENT; 3627 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3628 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3629 error = ENOTDIR; 3630 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3631 error = EISDIR; 3632 } 3633 } 3634 3635 /* 3636 * You cannot rename a source into itself or a subdirectory of itself. 3637 * We check this by travsersing the target directory upwards looking 3638 * for a match against the source. 3639 * 3640 * XXX MPSAFE 3641 */ 3642 if (error == 0) { 3643 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3644 if (fromnd->nl_nch.ncp == ncp) { 3645 error = EINVAL; 3646 break; 3647 } 3648 } 3649 } 3650 3651 cache_drop(&fnchd); 3652 cache_drop(&tnchd); 3653 3654 /* 3655 * Even though the namespaces are different, they may still represent 3656 * hardlinks to the same file. The filesystem might have a hard time 3657 * with this so we issue a NREMOVE of the source instead of a NRENAME 3658 * when we detect the situation. 3659 */ 3660 if (error == 0) { 3661 fdvp = fromnd->nl_dvp; 3662 tdvp = tond->nl_dvp; 3663 if (fdvp == NULL || tdvp == NULL) { 3664 error = EPERM; 3665 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3666 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3667 fromnd->nl_cred); 3668 } else { 3669 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3670 fdvp, tdvp, tond->nl_cred); 3671 } 3672 } 3673 return (error); 3674 } 3675 3676 /* 3677 * rename_args(char *from, char *to) 3678 * 3679 * Rename files. Source and destination must either both be directories, 3680 * or both not be directories. If target is a directory, it must be empty. 3681 */ 3682 int 3683 sys_rename(struct rename_args *uap) 3684 { 3685 struct nlookupdata fromnd, tond; 3686 int error; 3687 3688 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3689 if (error == 0) { 3690 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3691 if (error == 0) 3692 error = kern_rename(&fromnd, &tond); 3693 nlookup_done(&tond); 3694 } 3695 nlookup_done(&fromnd); 3696 return (error); 3697 } 3698 3699 /* 3700 * renameat_args(int oldfd, char *old, int newfd, char *new) 3701 * 3702 * Rename files using paths relative to the directories associated with 3703 * oldfd and newfd. Source and destination must either both be directories, 3704 * or both not be directories. If target is a directory, it must be empty. 3705 */ 3706 int 3707 sys_renameat(struct renameat_args *uap) 3708 { 3709 struct nlookupdata oldnd, newnd; 3710 struct file *oldfp, *newfp; 3711 int error; 3712 3713 error = nlookup_init_at(&oldnd, &oldfp, uap->oldfd, uap->old, 3714 UIO_USERSPACE, 0); 3715 if (error == 0) { 3716 error = nlookup_init_at(&newnd, &newfp, uap->newfd, uap->new, 3717 UIO_USERSPACE, 0); 3718 if (error == 0) 3719 error = kern_rename(&oldnd, &newnd); 3720 nlookup_done_at(&newnd, newfp); 3721 } 3722 nlookup_done_at(&oldnd, oldfp); 3723 return (error); 3724 } 3725 3726 int 3727 kern_mkdir(struct nlookupdata *nd, int mode) 3728 { 3729 struct thread *td = curthread; 3730 struct proc *p = td->td_proc; 3731 struct vnode *vp; 3732 struct vattr vattr; 3733 int error; 3734 3735 bwillinode(1); 3736 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3737 if ((error = nlookup(nd)) != 0) 3738 return (error); 3739 3740 if (nd->nl_nch.ncp->nc_vp) 3741 return (EEXIST); 3742 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3743 return (error); 3744 VATTR_NULL(&vattr); 3745 vattr.va_type = VDIR; 3746 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3747 3748 vp = NULL; 3749 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 3750 if (error == 0) 3751 vput(vp); 3752 return (error); 3753 } 3754 3755 /* 3756 * mkdir_args(char *path, int mode) 3757 * 3758 * Make a directory file. 3759 */ 3760 int 3761 sys_mkdir(struct mkdir_args *uap) 3762 { 3763 struct nlookupdata nd; 3764 int error; 3765 3766 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3767 if (error == 0) 3768 error = kern_mkdir(&nd, uap->mode); 3769 nlookup_done(&nd); 3770 return (error); 3771 } 3772 3773 /* 3774 * mkdirat_args(int fd, char *path, mode_t mode) 3775 * 3776 * Make a directory file. The path is relative to the directory associated 3777 * with fd. 3778 */ 3779 int 3780 sys_mkdirat(struct mkdirat_args *uap) 3781 { 3782 struct nlookupdata nd; 3783 struct file *fp; 3784 int error; 3785 3786 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3787 if (error == 0) 3788 error = kern_mkdir(&nd, uap->mode); 3789 nlookup_done_at(&nd, fp); 3790 return (error); 3791 } 3792 3793 int 3794 kern_rmdir(struct nlookupdata *nd) 3795 { 3796 int error; 3797 3798 bwillinode(1); 3799 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 3800 if ((error = nlookup(nd)) != 0) 3801 return (error); 3802 3803 /* 3804 * Do not allow directories representing mount points to be 3805 * deleted, even if empty. Check write perms on mount point 3806 * in case the vnode is aliased (aka nullfs). 3807 */ 3808 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 3809 return (EINVAL); 3810 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3811 return (error); 3812 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 3813 return (error); 3814 } 3815 3816 /* 3817 * rmdir_args(char *path) 3818 * 3819 * Remove a directory file. 3820 */ 3821 int 3822 sys_rmdir(struct rmdir_args *uap) 3823 { 3824 struct nlookupdata nd; 3825 int error; 3826 3827 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3828 if (error == 0) 3829 error = kern_rmdir(&nd); 3830 nlookup_done(&nd); 3831 return (error); 3832 } 3833 3834 int 3835 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 3836 enum uio_seg direction) 3837 { 3838 struct thread *td = curthread; 3839 struct proc *p = td->td_proc; 3840 struct vnode *vp; 3841 struct file *fp; 3842 struct uio auio; 3843 struct iovec aiov; 3844 off_t loff; 3845 int error, eofflag; 3846 3847 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3848 return (error); 3849 if ((fp->f_flag & FREAD) == 0) { 3850 error = EBADF; 3851 goto done; 3852 } 3853 vp = (struct vnode *)fp->f_data; 3854 unionread: 3855 if (vp->v_type != VDIR) { 3856 error = EINVAL; 3857 goto done; 3858 } 3859 aiov.iov_base = buf; 3860 aiov.iov_len = count; 3861 auio.uio_iov = &aiov; 3862 auio.uio_iovcnt = 1; 3863 auio.uio_rw = UIO_READ; 3864 auio.uio_segflg = direction; 3865 auio.uio_td = td; 3866 auio.uio_resid = count; 3867 loff = auio.uio_offset = fp->f_offset; 3868 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 3869 fp->f_offset = auio.uio_offset; 3870 if (error) 3871 goto done; 3872 if (count == auio.uio_resid) { 3873 if (union_dircheckp) { 3874 error = union_dircheckp(td, &vp, fp); 3875 if (error == -1) 3876 goto unionread; 3877 if (error) 3878 goto done; 3879 } 3880 #if 0 3881 if ((vp->v_flag & VROOT) && 3882 (vp->v_mount->mnt_flag & MNT_UNION)) { 3883 struct vnode *tvp = vp; 3884 vp = vp->v_mount->mnt_vnodecovered; 3885 vref(vp); 3886 fp->f_data = vp; 3887 fp->f_offset = 0; 3888 vrele(tvp); 3889 goto unionread; 3890 } 3891 #endif 3892 } 3893 3894 /* 3895 * WARNING! *basep may not be wide enough to accomodate the 3896 * seek offset. XXX should we hack this to return the upper 32 bits 3897 * for offsets greater then 4G? 3898 */ 3899 if (basep) { 3900 *basep = (long)loff; 3901 } 3902 *res = count - auio.uio_resid; 3903 done: 3904 fdrop(fp); 3905 return (error); 3906 } 3907 3908 /* 3909 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 3910 * 3911 * Read a block of directory entries in a file system independent format. 3912 */ 3913 int 3914 sys_getdirentries(struct getdirentries_args *uap) 3915 { 3916 long base; 3917 int error; 3918 3919 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 3920 &uap->sysmsg_result, UIO_USERSPACE); 3921 3922 if (error == 0 && uap->basep) 3923 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 3924 return (error); 3925 } 3926 3927 /* 3928 * getdents_args(int fd, char *buf, size_t count) 3929 */ 3930 int 3931 sys_getdents(struct getdents_args *uap) 3932 { 3933 int error; 3934 3935 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 3936 &uap->sysmsg_result, UIO_USERSPACE); 3937 3938 return (error); 3939 } 3940 3941 /* 3942 * Set the mode mask for creation of filesystem nodes. 3943 * 3944 * umask(int newmask) 3945 */ 3946 int 3947 sys_umask(struct umask_args *uap) 3948 { 3949 struct thread *td = curthread; 3950 struct proc *p = td->td_proc; 3951 struct filedesc *fdp; 3952 3953 fdp = p->p_fd; 3954 uap->sysmsg_result = fdp->fd_cmask; 3955 fdp->fd_cmask = uap->newmask & ALLPERMS; 3956 return (0); 3957 } 3958 3959 /* 3960 * revoke(char *path) 3961 * 3962 * Void all references to file by ripping underlying filesystem 3963 * away from vnode. 3964 */ 3965 int 3966 sys_revoke(struct revoke_args *uap) 3967 { 3968 struct nlookupdata nd; 3969 struct vattr vattr; 3970 struct vnode *vp; 3971 struct ucred *cred; 3972 int error; 3973 3974 vp = NULL; 3975 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3976 if (error == 0) 3977 error = nlookup(&nd); 3978 if (error == 0) 3979 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3980 cred = crhold(nd.nl_cred); 3981 nlookup_done(&nd); 3982 if (error == 0) { 3983 if (error == 0) 3984 error = VOP_GETATTR(vp, &vattr); 3985 if (error == 0 && cred->cr_uid != vattr.va_uid) 3986 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 3987 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 3988 if (vcount(vp) > 0) 3989 error = vrevoke(vp, cred); 3990 } else if (error == 0) { 3991 error = vrevoke(vp, cred); 3992 } 3993 vrele(vp); 3994 } 3995 if (cred) 3996 crfree(cred); 3997 return (error); 3998 } 3999 4000 /* 4001 * getfh_args(char *fname, fhandle_t *fhp) 4002 * 4003 * Get (NFS) file handle 4004 * 4005 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4006 * mount. This allows nullfs mounts to be explicitly exported. 4007 * 4008 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4009 * 4010 * nullfs mounts of subdirectories are not safe. That is, it will 4011 * work, but you do not really have protection against access to 4012 * the related parent directories. 4013 */ 4014 int 4015 sys_getfh(struct getfh_args *uap) 4016 { 4017 struct thread *td = curthread; 4018 struct nlookupdata nd; 4019 fhandle_t fh; 4020 struct vnode *vp; 4021 struct mount *mp; 4022 int error; 4023 4024 /* 4025 * Must be super user 4026 */ 4027 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4028 return (error); 4029 4030 vp = NULL; 4031 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4032 if (error == 0) 4033 error = nlookup(&nd); 4034 if (error == 0) 4035 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4036 mp = nd.nl_nch.mount; 4037 nlookup_done(&nd); 4038 if (error == 0) { 4039 bzero(&fh, sizeof(fh)); 4040 fh.fh_fsid = mp->mnt_stat.f_fsid; 4041 error = VFS_VPTOFH(vp, &fh.fh_fid); 4042 vput(vp); 4043 if (error == 0) 4044 error = copyout(&fh, uap->fhp, sizeof(fh)); 4045 } 4046 return (error); 4047 } 4048 4049 /* 4050 * fhopen_args(const struct fhandle *u_fhp, int flags) 4051 * 4052 * syscall for the rpc.lockd to use to translate a NFS file handle into 4053 * an open descriptor. 4054 * 4055 * warning: do not remove the priv_check() call or this becomes one giant 4056 * security hole. 4057 */ 4058 int 4059 sys_fhopen(struct fhopen_args *uap) 4060 { 4061 struct thread *td = curthread; 4062 struct filedesc *fdp = td->td_proc->p_fd; 4063 struct mount *mp; 4064 struct vnode *vp; 4065 struct fhandle fhp; 4066 struct vattr vat; 4067 struct vattr *vap = &vat; 4068 struct flock lf; 4069 int fmode, mode, error, type; 4070 struct file *nfp; 4071 struct file *fp; 4072 int indx; 4073 4074 /* 4075 * Must be super user 4076 */ 4077 error = priv_check(td, PRIV_ROOT); 4078 if (error) 4079 return (error); 4080 4081 fmode = FFLAGS(uap->flags); 4082 4083 /* 4084 * Why not allow a non-read/write open for our lockd? 4085 */ 4086 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4087 return (EINVAL); 4088 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4089 if (error) 4090 return(error); 4091 4092 /* 4093 * Find the mount point 4094 */ 4095 mp = vfs_getvfs(&fhp.fh_fsid); 4096 if (mp == NULL) { 4097 error = ESTALE; 4098 goto done; 4099 } 4100 /* now give me my vnode, it gets returned to me locked */ 4101 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4102 if (error) 4103 goto done; 4104 /* 4105 * from now on we have to make sure not 4106 * to forget about the vnode 4107 * any error that causes an abort must vput(vp) 4108 * just set error = err and 'goto bad;'. 4109 */ 4110 4111 /* 4112 * from vn_open 4113 */ 4114 if (vp->v_type == VLNK) { 4115 error = EMLINK; 4116 goto bad; 4117 } 4118 if (vp->v_type == VSOCK) { 4119 error = EOPNOTSUPP; 4120 goto bad; 4121 } 4122 mode = 0; 4123 if (fmode & (FWRITE | O_TRUNC)) { 4124 if (vp->v_type == VDIR) { 4125 error = EISDIR; 4126 goto bad; 4127 } 4128 error = vn_writechk(vp, NULL); 4129 if (error) 4130 goto bad; 4131 mode |= VWRITE; 4132 } 4133 if (fmode & FREAD) 4134 mode |= VREAD; 4135 if (mode) { 4136 error = VOP_ACCESS(vp, mode, td->td_ucred); 4137 if (error) 4138 goto bad; 4139 } 4140 if (fmode & O_TRUNC) { 4141 vn_unlock(vp); /* XXX */ 4142 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4143 VATTR_NULL(vap); 4144 vap->va_size = 0; 4145 error = VOP_SETATTR(vp, vap, td->td_ucred); 4146 if (error) 4147 goto bad; 4148 } 4149 4150 /* 4151 * VOP_OPEN needs the file pointer so it can potentially override 4152 * it. 4153 * 4154 * WARNING! no f_nchandle will be associated when fhopen()ing a 4155 * directory. XXX 4156 */ 4157 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4158 goto bad; 4159 fp = nfp; 4160 4161 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4162 if (error) { 4163 /* 4164 * setting f_ops this way prevents VOP_CLOSE from being 4165 * called or fdrop() releasing the vp from v_data. Since 4166 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4167 */ 4168 fp->f_ops = &badfileops; 4169 fp->f_data = NULL; 4170 goto bad_drop; 4171 } 4172 4173 /* 4174 * The fp is given its own reference, we still have our ref and lock. 4175 * 4176 * Assert that all regular files must be created with a VM object. 4177 */ 4178 if (vp->v_type == VREG && vp->v_object == NULL) { 4179 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4180 goto bad_drop; 4181 } 4182 4183 /* 4184 * The open was successful. Handle any locking requirements. 4185 */ 4186 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4187 lf.l_whence = SEEK_SET; 4188 lf.l_start = 0; 4189 lf.l_len = 0; 4190 if (fmode & O_EXLOCK) 4191 lf.l_type = F_WRLCK; 4192 else 4193 lf.l_type = F_RDLCK; 4194 if (fmode & FNONBLOCK) 4195 type = 0; 4196 else 4197 type = F_WAIT; 4198 vn_unlock(vp); 4199 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4200 /* 4201 * release our private reference. 4202 */ 4203 fsetfd(fdp, NULL, indx); 4204 fdrop(fp); 4205 vrele(vp); 4206 goto done; 4207 } 4208 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4209 fp->f_flag |= FHASLOCK; 4210 } 4211 4212 /* 4213 * Clean up. Associate the file pointer with the previously 4214 * reserved descriptor and return it. 4215 */ 4216 vput(vp); 4217 fsetfd(fdp, fp, indx); 4218 fdrop(fp); 4219 uap->sysmsg_result = indx; 4220 return (0); 4221 4222 bad_drop: 4223 fsetfd(fdp, NULL, indx); 4224 fdrop(fp); 4225 bad: 4226 vput(vp); 4227 done: 4228 return (error); 4229 } 4230 4231 /* 4232 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4233 */ 4234 int 4235 sys_fhstat(struct fhstat_args *uap) 4236 { 4237 struct thread *td = curthread; 4238 struct stat sb; 4239 fhandle_t fh; 4240 struct mount *mp; 4241 struct vnode *vp; 4242 int error; 4243 4244 /* 4245 * Must be super user 4246 */ 4247 error = priv_check(td, PRIV_ROOT); 4248 if (error) 4249 return (error); 4250 4251 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4252 if (error) 4253 return (error); 4254 4255 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4256 error = ESTALE; 4257 if (error == 0) { 4258 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4259 error = vn_stat(vp, &sb, td->td_ucred); 4260 vput(vp); 4261 } 4262 } 4263 if (error == 0) 4264 error = copyout(&sb, uap->sb, sizeof(sb)); 4265 return (error); 4266 } 4267 4268 /* 4269 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4270 */ 4271 int 4272 sys_fhstatfs(struct fhstatfs_args *uap) 4273 { 4274 struct thread *td = curthread; 4275 struct proc *p = td->td_proc; 4276 struct statfs *sp; 4277 struct mount *mp; 4278 struct vnode *vp; 4279 struct statfs sb; 4280 char *fullpath, *freepath; 4281 fhandle_t fh; 4282 int error; 4283 4284 /* 4285 * Must be super user 4286 */ 4287 if ((error = priv_check(td, PRIV_ROOT))) 4288 return (error); 4289 4290 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4291 return (error); 4292 4293 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4294 error = ESTALE; 4295 goto done; 4296 } 4297 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4298 error = ESTALE; 4299 goto done; 4300 } 4301 4302 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4303 goto done; 4304 mp = vp->v_mount; 4305 sp = &mp->mnt_stat; 4306 vput(vp); 4307 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4308 goto done; 4309 4310 error = mount_path(p, mp, &fullpath, &freepath); 4311 if (error) 4312 goto done; 4313 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4314 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4315 kfree(freepath, M_TEMP); 4316 4317 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4318 if (priv_check(td, PRIV_ROOT)) { 4319 bcopy(sp, &sb, sizeof(sb)); 4320 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4321 sp = &sb; 4322 } 4323 error = copyout(sp, uap->buf, sizeof(*sp)); 4324 done: 4325 return (error); 4326 } 4327 4328 /* 4329 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4330 */ 4331 int 4332 sys_fhstatvfs(struct fhstatvfs_args *uap) 4333 { 4334 struct thread *td = curthread; 4335 struct proc *p = td->td_proc; 4336 struct statvfs *sp; 4337 struct mount *mp; 4338 struct vnode *vp; 4339 fhandle_t fh; 4340 int error; 4341 4342 /* 4343 * Must be super user 4344 */ 4345 if ((error = priv_check(td, PRIV_ROOT))) 4346 return (error); 4347 4348 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4349 return (error); 4350 4351 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4352 error = ESTALE; 4353 goto done; 4354 } 4355 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4356 error = ESTALE; 4357 goto done; 4358 } 4359 4360 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4361 goto done; 4362 mp = vp->v_mount; 4363 sp = &mp->mnt_vstat; 4364 vput(vp); 4365 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4366 goto done; 4367 4368 sp->f_flag = 0; 4369 if (mp->mnt_flag & MNT_RDONLY) 4370 sp->f_flag |= ST_RDONLY; 4371 if (mp->mnt_flag & MNT_NOSUID) 4372 sp->f_flag |= ST_NOSUID; 4373 error = copyout(sp, uap->buf, sizeof(*sp)); 4374 done: 4375 return (error); 4376 } 4377 4378 4379 /* 4380 * Syscall to push extended attribute configuration information into the 4381 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4382 * a command (int cmd), and attribute name and misc data. For now, the 4383 * attribute name is left in userspace for consumption by the VFS_op. 4384 * It will probably be changed to be copied into sysspace by the 4385 * syscall in the future, once issues with various consumers of the 4386 * attribute code have raised their hands. 4387 * 4388 * Currently this is used only by UFS Extended Attributes. 4389 */ 4390 int 4391 sys_extattrctl(struct extattrctl_args *uap) 4392 { 4393 struct nlookupdata nd; 4394 struct vnode *vp; 4395 char attrname[EXTATTR_MAXNAMELEN]; 4396 int error; 4397 size_t size; 4398 4399 attrname[0] = 0; 4400 vp = NULL; 4401 error = 0; 4402 4403 if (error == 0 && uap->filename) { 4404 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4405 NLC_FOLLOW); 4406 if (error == 0) 4407 error = nlookup(&nd); 4408 if (error == 0) 4409 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4410 nlookup_done(&nd); 4411 } 4412 4413 if (error == 0 && uap->attrname) { 4414 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4415 &size); 4416 } 4417 4418 if (error == 0) { 4419 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4420 if (error == 0) 4421 error = nlookup(&nd); 4422 if (error == 0) 4423 error = ncp_writechk(&nd.nl_nch); 4424 if (error == 0) { 4425 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4426 uap->attrnamespace, 4427 uap->attrname, nd.nl_cred); 4428 } 4429 nlookup_done(&nd); 4430 } 4431 4432 return (error); 4433 } 4434 4435 /* 4436 * Syscall to get a named extended attribute on a file or directory. 4437 */ 4438 int 4439 sys_extattr_set_file(struct extattr_set_file_args *uap) 4440 { 4441 char attrname[EXTATTR_MAXNAMELEN]; 4442 struct nlookupdata nd; 4443 struct vnode *vp; 4444 struct uio auio; 4445 struct iovec aiov; 4446 int error; 4447 4448 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4449 if (error) 4450 return (error); 4451 4452 vp = NULL; 4453 4454 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4455 if (error == 0) 4456 error = nlookup(&nd); 4457 if (error == 0) 4458 error = ncp_writechk(&nd.nl_nch); 4459 if (error == 0) 4460 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4461 if (error) { 4462 nlookup_done(&nd); 4463 return (error); 4464 } 4465 4466 bzero(&auio, sizeof(auio)); 4467 aiov.iov_base = uap->data; 4468 aiov.iov_len = uap->nbytes; 4469 auio.uio_iov = &aiov; 4470 auio.uio_iovcnt = 1; 4471 auio.uio_offset = 0; 4472 auio.uio_resid = uap->nbytes; 4473 auio.uio_rw = UIO_WRITE; 4474 auio.uio_td = curthread; 4475 4476 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4477 &auio, nd.nl_cred); 4478 4479 vput(vp); 4480 nlookup_done(&nd); 4481 return (error); 4482 } 4483 4484 /* 4485 * Syscall to get a named extended attribute on a file or directory. 4486 */ 4487 int 4488 sys_extattr_get_file(struct extattr_get_file_args *uap) 4489 { 4490 char attrname[EXTATTR_MAXNAMELEN]; 4491 struct nlookupdata nd; 4492 struct uio auio; 4493 struct iovec aiov; 4494 struct vnode *vp; 4495 int error; 4496 4497 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4498 if (error) 4499 return (error); 4500 4501 vp = NULL; 4502 4503 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4504 if (error == 0) 4505 error = nlookup(&nd); 4506 if (error == 0) 4507 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4508 if (error) { 4509 nlookup_done(&nd); 4510 return (error); 4511 } 4512 4513 bzero(&auio, sizeof(auio)); 4514 aiov.iov_base = uap->data; 4515 aiov.iov_len = uap->nbytes; 4516 auio.uio_iov = &aiov; 4517 auio.uio_iovcnt = 1; 4518 auio.uio_offset = 0; 4519 auio.uio_resid = uap->nbytes; 4520 auio.uio_rw = UIO_READ; 4521 auio.uio_td = curthread; 4522 4523 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4524 &auio, nd.nl_cred); 4525 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4526 4527 vput(vp); 4528 nlookup_done(&nd); 4529 return(error); 4530 } 4531 4532 /* 4533 * Syscall to delete a named extended attribute from a file or directory. 4534 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4535 */ 4536 int 4537 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4538 { 4539 char attrname[EXTATTR_MAXNAMELEN]; 4540 struct nlookupdata nd; 4541 struct vnode *vp; 4542 int error; 4543 4544 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4545 if (error) 4546 return(error); 4547 4548 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4549 if (error == 0) 4550 error = nlookup(&nd); 4551 if (error == 0) 4552 error = ncp_writechk(&nd.nl_nch); 4553 if (error == 0) { 4554 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4555 if (error == 0) { 4556 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4557 attrname, NULL, nd.nl_cred); 4558 vput(vp); 4559 } 4560 } 4561 nlookup_done(&nd); 4562 return(error); 4563 } 4564 4565 /* 4566 * Determine if the mount is visible to the process. 4567 */ 4568 static int 4569 chroot_visible_mnt(struct mount *mp, struct proc *p) 4570 { 4571 struct nchandle nch; 4572 4573 /* 4574 * Traverse from the mount point upwards. If we hit the process 4575 * root then the mount point is visible to the process. 4576 */ 4577 nch = mp->mnt_ncmountpt; 4578 while (nch.ncp) { 4579 if (nch.mount == p->p_fd->fd_nrdir.mount && 4580 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4581 return(1); 4582 } 4583 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4584 nch = nch.mount->mnt_ncmounton; 4585 } else { 4586 nch.ncp = nch.ncp->nc_parent; 4587 } 4588 } 4589 4590 /* 4591 * If the mount point is not visible to the process, but the 4592 * process root is in a subdirectory of the mount, return 4593 * TRUE anyway. 4594 */ 4595 if (p->p_fd->fd_nrdir.mount == mp) 4596 return(1); 4597 4598 return(0); 4599 } 4600 4601