1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/buf.h> 45 #include <sys/conf.h> 46 #include <sys/sysent.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/mountctl.h> 50 #include <sys/sysproto.h> 51 #include <sys/filedesc.h> 52 #include <sys/kernel.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/linker.h> 56 #include <sys/stat.h> 57 #include <sys/unistd.h> 58 #include <sys/vnode.h> 59 #include <sys/proc.h> 60 #include <sys/priv.h> 61 #include <sys/jail.h> 62 #include <sys/namei.h> 63 #include <sys/nlookup.h> 64 #include <sys/dirent.h> 65 #include <sys/extattr.h> 66 #include <sys/spinlock.h> 67 #include <sys/kern_syscall.h> 68 #include <sys/objcache.h> 69 #include <sys/sysctl.h> 70 71 #include <sys/buf2.h> 72 #include <sys/file2.h> 73 #include <sys/spinlock2.h> 74 #include <sys/mplock2.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_object.h> 78 #include <vm/vm_page.h> 79 80 #include <machine/limits.h> 81 #include <machine/stdarg.h> 82 83 #include <vfs/union/union.h> 84 85 static void mount_warning(struct mount *mp, const char *ctl, ...); 86 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 87 static int checkvp_chdir (struct vnode *vn, struct thread *td); 88 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 89 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 90 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 91 static int getutimes (const struct timeval *, struct timespec *); 92 static int setfown (struct vnode *, uid_t, gid_t); 93 static int setfmode (struct vnode *, int); 94 static int setfflags (struct vnode *, int); 95 static int setutimes (struct vnode *, struct vattr *, 96 const struct timespec *, int); 97 static int usermount = 0; /* if 1, non-root can mount fs. */ 98 99 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 100 101 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 102 103 /* 104 * Virtual File System System Calls 105 */ 106 107 /* 108 * Mount a file system. 109 * 110 * mount_args(char *type, char *path, int flags, caddr_t data) 111 * 112 * MPALMOSTSAFE 113 */ 114 int 115 sys_mount(struct mount_args *uap) 116 { 117 struct thread *td = curthread; 118 struct vnode *vp; 119 struct nchandle nch; 120 struct mount *mp, *nullmp; 121 struct vfsconf *vfsp; 122 int error, flag = 0, flag2 = 0; 123 int hasmount; 124 struct vattr va; 125 struct nlookupdata nd; 126 char fstypename[MFSNAMELEN]; 127 struct ucred *cred; 128 129 get_mplock(); 130 cred = td->td_ucred; 131 if (jailed(cred)) { 132 error = EPERM; 133 goto done; 134 } 135 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 136 goto done; 137 138 /* 139 * Do not allow NFS export by non-root users. 140 */ 141 if (uap->flags & MNT_EXPORTED) { 142 error = priv_check(td, PRIV_ROOT); 143 if (error) 144 goto done; 145 } 146 /* 147 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 148 */ 149 if (priv_check(td, PRIV_ROOT)) 150 uap->flags |= MNT_NOSUID | MNT_NODEV; 151 152 /* 153 * Lookup the requested path and extract the nch and vnode. 154 */ 155 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 156 if (error == 0) { 157 if ((error = nlookup(&nd)) == 0) { 158 if (nd.nl_nch.ncp->nc_vp == NULL) 159 error = ENOENT; 160 } 161 } 162 if (error) { 163 nlookup_done(&nd); 164 goto done; 165 } 166 167 /* 168 * If the target filesystem is resolved via a nullfs mount, then 169 * nd.nl_nch.mount will be pointing to the nullfs mount structure 170 * instead of the target file system. We need it in case we are 171 * doing an update. 172 */ 173 nullmp = nd.nl_nch.mount; 174 175 /* 176 * Extract the locked+refd ncp and cleanup the nd structure 177 */ 178 nch = nd.nl_nch; 179 cache_zero(&nd.nl_nch); 180 nlookup_done(&nd); 181 182 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch)) 183 hasmount = 1; 184 else 185 hasmount = 0; 186 187 188 /* 189 * now we have the locked ref'd nch and unreferenced vnode. 190 */ 191 vp = nch.ncp->nc_vp; 192 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 193 cache_put(&nch); 194 goto done; 195 } 196 cache_unlock(&nch); 197 198 /* 199 * Extract the file system type. We need to know this early, to take 200 * appropriate actions if we are dealing with a nullfs. 201 */ 202 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 203 cache_drop(&nch); 204 vput(vp); 205 goto done; 206 } 207 208 /* 209 * Now we have an unlocked ref'd nch and a locked ref'd vp 210 */ 211 if (uap->flags & MNT_UPDATE) { 212 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 213 cache_drop(&nch); 214 vput(vp); 215 error = EINVAL; 216 goto done; 217 } 218 219 if (strncmp(fstypename, "null", 5) == 0) { 220 KKASSERT(nullmp); 221 mp = nullmp; 222 } else { 223 mp = vp->v_mount; 224 } 225 226 flag = mp->mnt_flag; 227 flag2 = mp->mnt_kern_flag; 228 /* 229 * We only allow the filesystem to be reloaded if it 230 * is currently mounted read-only. 231 */ 232 if ((uap->flags & MNT_RELOAD) && 233 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 234 cache_drop(&nch); 235 vput(vp); 236 error = EOPNOTSUPP; /* Needs translation */ 237 goto done; 238 } 239 /* 240 * Only root, or the user that did the original mount is 241 * permitted to update it. 242 */ 243 if (mp->mnt_stat.f_owner != cred->cr_uid && 244 (error = priv_check(td, PRIV_ROOT))) { 245 cache_drop(&nch); 246 vput(vp); 247 goto done; 248 } 249 if (vfs_busy(mp, LK_NOWAIT)) { 250 cache_drop(&nch); 251 vput(vp); 252 error = EBUSY; 253 goto done; 254 } 255 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 256 cache_drop(&nch); 257 vfs_unbusy(mp); 258 vput(vp); 259 error = EBUSY; 260 goto done; 261 } 262 vsetflags(vp, VMOUNT); 263 mp->mnt_flag |= 264 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 265 vn_unlock(vp); 266 goto update; 267 } 268 /* 269 * If the user is not root, ensure that they own the directory 270 * onto which we are attempting to mount. 271 */ 272 if ((error = VOP_GETATTR(vp, &va)) || 273 (va.va_uid != cred->cr_uid && (error = priv_check(td, PRIV_ROOT)))) { 274 cache_drop(&nch); 275 vput(vp); 276 goto done; 277 } 278 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 279 cache_drop(&nch); 280 vput(vp); 281 goto done; 282 } 283 if (vp->v_type != VDIR) { 284 cache_drop(&nch); 285 vput(vp); 286 error = ENOTDIR; 287 goto done; 288 } 289 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 290 cache_drop(&nch); 291 vput(vp); 292 error = EPERM; 293 goto done; 294 } 295 vfsp = vfsconf_find_by_name(fstypename); 296 if (vfsp == NULL) { 297 linker_file_t lf; 298 299 /* Only load modules for root (very important!) */ 300 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 301 cache_drop(&nch); 302 vput(vp); 303 goto done; 304 } 305 error = linker_load_file(fstypename, &lf); 306 if (error || lf == NULL) { 307 cache_drop(&nch); 308 vput(vp); 309 if (lf == NULL) 310 error = ENODEV; 311 goto done; 312 } 313 lf->userrefs++; 314 /* lookup again, see if the VFS was loaded */ 315 vfsp = vfsconf_find_by_name(fstypename); 316 if (vfsp == NULL) { 317 lf->userrefs--; 318 linker_file_unload(lf); 319 cache_drop(&nch); 320 vput(vp); 321 error = ENODEV; 322 goto done; 323 } 324 } 325 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 326 cache_drop(&nch); 327 vput(vp); 328 error = EBUSY; 329 goto done; 330 } 331 vsetflags(vp, VMOUNT); 332 333 /* 334 * Allocate and initialize the filesystem. 335 */ 336 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 337 mount_init(mp); 338 vfs_busy(mp, LK_NOWAIT); 339 mp->mnt_op = vfsp->vfc_vfsops; 340 mp->mnt_vfc = vfsp; 341 vfsp->vfc_refcount++; 342 mp->mnt_stat.f_type = vfsp->vfc_typenum; 343 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 344 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 345 mp->mnt_stat.f_owner = cred->cr_uid; 346 vn_unlock(vp); 347 update: 348 /* 349 * Set the mount level flags. 350 */ 351 if (uap->flags & MNT_RDONLY) 352 mp->mnt_flag |= MNT_RDONLY; 353 else if (mp->mnt_flag & MNT_RDONLY) 354 mp->mnt_kern_flag |= MNTK_WANTRDWR; 355 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 356 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 357 MNT_NOSYMFOLLOW | MNT_IGNORE | 358 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 359 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 360 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 361 MNT_NOSYMFOLLOW | MNT_IGNORE | 362 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 363 /* 364 * Mount the filesystem. 365 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 366 * get. 367 */ 368 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 369 if (mp->mnt_flag & MNT_UPDATE) { 370 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 371 mp->mnt_flag &= ~MNT_RDONLY; 372 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 373 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 374 if (error) { 375 mp->mnt_flag = flag; 376 mp->mnt_kern_flag = flag2; 377 } 378 vfs_unbusy(mp); 379 vclrflags(vp, VMOUNT); 380 vrele(vp); 381 cache_drop(&nch); 382 goto done; 383 } 384 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 385 /* 386 * Put the new filesystem on the mount list after root. The mount 387 * point gets its own mnt_ncmountpt (unless the VFS already set one 388 * up) which represents the root of the mount. The lookup code 389 * detects the mount point going forward and checks the root of 390 * the mount going backwards. 391 * 392 * It is not necessary to invalidate or purge the vnode underneath 393 * because elements under the mount will be given their own glue 394 * namecache record. 395 */ 396 if (!error) { 397 if (mp->mnt_ncmountpt.ncp == NULL) { 398 /* 399 * allocate, then unlock, but leave the ref intact 400 */ 401 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 402 cache_unlock(&mp->mnt_ncmountpt); 403 } 404 mp->mnt_ncmounton = nch; /* inherits ref */ 405 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 406 407 /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ 408 vclrflags(vp, VMOUNT); 409 mountlist_insert(mp, MNTINS_LAST); 410 vn_unlock(vp); 411 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 412 error = vfs_allocate_syncvnode(mp); 413 vfs_unbusy(mp); 414 error = VFS_START(mp, 0); 415 vrele(vp); 416 } else { 417 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 418 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 419 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 420 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 421 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 422 vclrflags(vp, VMOUNT); 423 mp->mnt_vfc->vfc_refcount--; 424 vfs_unbusy(mp); 425 kfree(mp, M_MOUNT); 426 cache_drop(&nch); 427 vput(vp); 428 } 429 done: 430 rel_mplock(); 431 return (error); 432 } 433 434 /* 435 * Scan all active processes to see if any of them have a current 436 * or root directory onto which the new filesystem has just been 437 * mounted. If so, replace them with the new mount point. 438 * 439 * The passed ncp is ref'd and locked (from the mount code) and 440 * must be associated with the vnode representing the root of the 441 * mount point. 442 */ 443 struct checkdirs_info { 444 struct nchandle old_nch; 445 struct nchandle new_nch; 446 struct vnode *old_vp; 447 struct vnode *new_vp; 448 }; 449 450 static int checkdirs_callback(struct proc *p, void *data); 451 452 static void 453 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 454 { 455 struct checkdirs_info info; 456 struct vnode *olddp; 457 struct vnode *newdp; 458 struct mount *mp; 459 460 /* 461 * If the old mount point's vnode has a usecount of 1, it is not 462 * being held as a descriptor anywhere. 463 */ 464 olddp = old_nch->ncp->nc_vp; 465 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 466 return; 467 468 /* 469 * Force the root vnode of the new mount point to be resolved 470 * so we can update any matching processes. 471 */ 472 mp = new_nch->mount; 473 if (VFS_ROOT(mp, &newdp)) 474 panic("mount: lost mount"); 475 cache_setunresolved(new_nch); 476 cache_setvp(new_nch, newdp); 477 478 /* 479 * Special handling of the root node 480 */ 481 if (rootvnode == olddp) { 482 vref(newdp); 483 vfs_cache_setroot(newdp, cache_hold(new_nch)); 484 } 485 486 /* 487 * Pass newdp separately so the callback does not have to access 488 * it via new_nch->ncp->nc_vp. 489 */ 490 info.old_nch = *old_nch; 491 info.new_nch = *new_nch; 492 info.new_vp = newdp; 493 allproc_scan(checkdirs_callback, &info); 494 vput(newdp); 495 } 496 497 /* 498 * NOTE: callback is not MP safe because the scanned process's filedesc 499 * structure can be ripped out from under us, amoung other things. 500 */ 501 static int 502 checkdirs_callback(struct proc *p, void *data) 503 { 504 struct checkdirs_info *info = data; 505 struct filedesc *fdp; 506 struct nchandle ncdrop1; 507 struct nchandle ncdrop2; 508 struct vnode *vprele1; 509 struct vnode *vprele2; 510 511 if ((fdp = p->p_fd) != NULL) { 512 cache_zero(&ncdrop1); 513 cache_zero(&ncdrop2); 514 vprele1 = NULL; 515 vprele2 = NULL; 516 517 /* 518 * MPUNSAFE - XXX fdp can be pulled out from under a 519 * foreign process. 520 * 521 * A shared filedesc is ok, we don't have to copy it 522 * because we are making this change globally. 523 */ 524 spin_lock_wr(&fdp->fd_spin); 525 if (fdp->fd_ncdir.mount == info->old_nch.mount && 526 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 527 vprele1 = fdp->fd_cdir; 528 vref(info->new_vp); 529 fdp->fd_cdir = info->new_vp; 530 ncdrop1 = fdp->fd_ncdir; 531 cache_copy(&info->new_nch, &fdp->fd_ncdir); 532 } 533 if (fdp->fd_nrdir.mount == info->old_nch.mount && 534 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 535 vprele2 = fdp->fd_rdir; 536 vref(info->new_vp); 537 fdp->fd_rdir = info->new_vp; 538 ncdrop2 = fdp->fd_nrdir; 539 cache_copy(&info->new_nch, &fdp->fd_nrdir); 540 } 541 spin_unlock_wr(&fdp->fd_spin); 542 if (ncdrop1.ncp) 543 cache_drop(&ncdrop1); 544 if (ncdrop2.ncp) 545 cache_drop(&ncdrop2); 546 if (vprele1) 547 vrele(vprele1); 548 if (vprele2) 549 vrele(vprele2); 550 } 551 return(0); 552 } 553 554 /* 555 * Unmount a file system. 556 * 557 * Note: unmount takes a path to the vnode mounted on as argument, 558 * not special file (as before). 559 * 560 * umount_args(char *path, int flags) 561 * 562 * MPALMOSTSAFE 563 */ 564 int 565 sys_unmount(struct unmount_args *uap) 566 { 567 struct thread *td = curthread; 568 struct proc *p __debugvar = td->td_proc; 569 struct mount *mp = NULL; 570 struct nlookupdata nd; 571 int error; 572 573 KKASSERT(p); 574 get_mplock(); 575 if (td->td_ucred->cr_prison != NULL) { 576 error = EPERM; 577 goto done; 578 } 579 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 580 goto done; 581 582 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 583 if (error == 0) 584 error = nlookup(&nd); 585 if (error) 586 goto out; 587 588 mp = nd.nl_nch.mount; 589 590 /* 591 * Only root, or the user that did the original mount is 592 * permitted to unmount this filesystem. 593 */ 594 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 595 (error = priv_check(td, PRIV_ROOT))) 596 goto out; 597 598 /* 599 * Don't allow unmounting the root file system. 600 */ 601 if (mp->mnt_flag & MNT_ROOTFS) { 602 error = EINVAL; 603 goto out; 604 } 605 606 /* 607 * Must be the root of the filesystem 608 */ 609 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 610 error = EINVAL; 611 goto out; 612 } 613 614 out: 615 nlookup_done(&nd); 616 if (error == 0) 617 error = dounmount(mp, uap->flags); 618 done: 619 rel_mplock(); 620 return (error); 621 } 622 623 /* 624 * Do the actual file system unmount. 625 */ 626 static int 627 dounmount_interlock(struct mount *mp) 628 { 629 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 630 return (EBUSY); 631 mp->mnt_kern_flag |= MNTK_UNMOUNT; 632 return(0); 633 } 634 635 static int 636 unmount_allproc_cb(struct proc *p, void *arg) 637 { 638 struct mount *mp; 639 640 if (p->p_textnch.ncp == NULL) 641 return 0; 642 643 mp = (struct mount *)arg; 644 if (p->p_textnch.mount == mp) 645 cache_drop(&p->p_textnch); 646 647 return 0; 648 } 649 650 int 651 dounmount(struct mount *mp, int flags) 652 { 653 struct namecache *ncp; 654 struct nchandle nch; 655 struct vnode *vp; 656 int error; 657 int async_flag; 658 int lflags; 659 int freeok = 1; 660 661 /* 662 * Exclusive access for unmounting purposes 663 */ 664 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 665 return (error); 666 667 /* 668 * Allow filesystems to detect that a forced unmount is in progress. 669 */ 670 if (flags & MNT_FORCE) 671 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 672 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 673 error = lockmgr(&mp->mnt_lock, lflags); 674 if (error) { 675 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 676 if (mp->mnt_kern_flag & MNTK_MWAIT) 677 wakeup(mp); 678 return (error); 679 } 680 681 if (mp->mnt_flag & MNT_EXPUBLIC) 682 vfs_setpublicfs(NULL, NULL, NULL); 683 684 vfs_msync(mp, MNT_WAIT); 685 async_flag = mp->mnt_flag & MNT_ASYNC; 686 mp->mnt_flag &=~ MNT_ASYNC; 687 688 /* 689 * If this filesystem isn't aliasing other filesystems, 690 * try to invalidate any remaining namecache entries and 691 * check the count afterwords. 692 */ 693 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 694 cache_lock(&mp->mnt_ncmountpt); 695 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 696 cache_unlock(&mp->mnt_ncmountpt); 697 698 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 699 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 700 allproc_scan(&unmount_allproc_cb, mp); 701 } 702 703 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 704 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 705 706 if ((flags & MNT_FORCE) == 0) { 707 error = EBUSY; 708 mount_warning(mp, "Cannot unmount: " 709 "%d namecache " 710 "references still " 711 "present", 712 ncp->nc_refs - 1); 713 } else { 714 mount_warning(mp, "Forced unmount: " 715 "%d namecache " 716 "references still " 717 "present", 718 ncp->nc_refs - 1); 719 freeok = 0; 720 } 721 } 722 } 723 724 /* 725 * nchandle records ref the mount structure. Expect a count of 1 726 * (our mount->mnt_ncmountpt). 727 */ 728 if (mp->mnt_refs != 1) { 729 if ((flags & MNT_FORCE) == 0) { 730 mount_warning(mp, "Cannot unmount: " 731 "%d process references still " 732 "present", mp->mnt_refs); 733 error = EBUSY; 734 } else { 735 mount_warning(mp, "Forced unmount: " 736 "%d process references still " 737 "present", mp->mnt_refs); 738 freeok = 0; 739 } 740 } 741 742 /* 743 * Decomission our special mnt_syncer vnode. This also stops 744 * the vnlru code. If we are unable to unmount we recommission 745 * the vnode. 746 */ 747 if (error == 0) { 748 if ((vp = mp->mnt_syncer) != NULL) { 749 mp->mnt_syncer = NULL; 750 vrele(vp); 751 } 752 if (((mp->mnt_flag & MNT_RDONLY) || 753 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 754 (flags & MNT_FORCE)) { 755 error = VFS_UNMOUNT(mp, flags); 756 } 757 } 758 if (error) { 759 if (mp->mnt_syncer == NULL) 760 vfs_allocate_syncvnode(mp); 761 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 762 mp->mnt_flag |= async_flag; 763 lockmgr(&mp->mnt_lock, LK_RELEASE); 764 if (mp->mnt_kern_flag & MNTK_MWAIT) 765 wakeup(mp); 766 return (error); 767 } 768 /* 769 * Clean up any journals still associated with the mount after 770 * filesystem activity has ceased. 771 */ 772 journal_remove_all_journals(mp, 773 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 774 775 mountlist_remove(mp); 776 777 /* 778 * Remove any installed vnode ops here so the individual VFSs don't 779 * have to. 780 */ 781 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 782 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 783 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 784 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 785 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 786 787 if (mp->mnt_ncmountpt.ncp != NULL) { 788 nch = mp->mnt_ncmountpt; 789 cache_zero(&mp->mnt_ncmountpt); 790 cache_clrmountpt(&nch); 791 cache_drop(&nch); 792 } 793 if (mp->mnt_ncmounton.ncp != NULL) { 794 nch = mp->mnt_ncmounton; 795 cache_zero(&mp->mnt_ncmounton); 796 cache_clrmountpt(&nch); 797 cache_drop(&nch); 798 } 799 800 mp->mnt_vfc->vfc_refcount--; 801 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 802 panic("unmount: dangling vnode"); 803 lockmgr(&mp->mnt_lock, LK_RELEASE); 804 if (mp->mnt_kern_flag & MNTK_MWAIT) 805 wakeup(mp); 806 if (freeok) 807 kfree(mp, M_MOUNT); 808 return (0); 809 } 810 811 static 812 void 813 mount_warning(struct mount *mp, const char *ctl, ...) 814 { 815 char *ptr; 816 char *buf; 817 __va_list va; 818 819 __va_start(va, ctl); 820 if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf, 0) == 0) { 821 kprintf("unmount(%s): ", ptr); 822 kvprintf(ctl, va); 823 kprintf("\n"); 824 kfree(buf, M_TEMP); 825 } else { 826 kprintf("unmount(%p", mp); 827 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 828 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 829 kprintf("): "); 830 kvprintf(ctl, va); 831 kprintf("\n"); 832 } 833 __va_end(va); 834 } 835 836 /* 837 * Shim cache_fullpath() to handle the case where a process is chrooted into 838 * a subdirectory of a mount. In this case if the root mount matches the 839 * process root directory's mount we have to specify the process's root 840 * directory instead of the mount point, because the mount point might 841 * be above the root directory. 842 */ 843 static 844 int 845 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 846 { 847 struct nchandle *nch; 848 849 if (p && p->p_fd->fd_nrdir.mount == mp) 850 nch = &p->p_fd->fd_nrdir; 851 else 852 nch = &mp->mnt_ncmountpt; 853 return(cache_fullpath(p, nch, rb, fb, 0)); 854 } 855 856 /* 857 * Sync each mounted filesystem. 858 */ 859 860 #ifdef DEBUG 861 static int syncprt = 0; 862 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 863 #endif /* DEBUG */ 864 865 static int sync_callback(struct mount *mp, void *data); 866 867 /* 868 * MPALMOSTSAFE 869 */ 870 int 871 sys_sync(struct sync_args *uap) 872 { 873 get_mplock(); 874 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 875 #ifdef DEBUG 876 /* 877 * print out buffer pool stat information on each sync() call. 878 */ 879 if (syncprt) 880 vfs_bufstats(); 881 #endif /* DEBUG */ 882 rel_mplock(); 883 return (0); 884 } 885 886 static 887 int 888 sync_callback(struct mount *mp, void *data __unused) 889 { 890 int asyncflag; 891 892 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 893 asyncflag = mp->mnt_flag & MNT_ASYNC; 894 mp->mnt_flag &= ~MNT_ASYNC; 895 vfs_msync(mp, MNT_NOWAIT); 896 VFS_SYNC(mp, MNT_NOWAIT); 897 mp->mnt_flag |= asyncflag; 898 } 899 return(0); 900 } 901 902 /* XXX PRISON: could be per prison flag */ 903 static int prison_quotas; 904 #if 0 905 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 906 #endif 907 908 /* 909 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 910 * 911 * Change filesystem quotas. 912 * 913 * MPALMOSTSAFE 914 */ 915 int 916 sys_quotactl(struct quotactl_args *uap) 917 { 918 struct nlookupdata nd; 919 struct thread *td; 920 struct proc *p; 921 struct mount *mp; 922 int error; 923 924 get_mplock(); 925 td = curthread; 926 p = td->td_proc; 927 if (td->td_ucred->cr_prison && !prison_quotas) { 928 error = EPERM; 929 goto done; 930 } 931 932 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 933 if (error == 0) 934 error = nlookup(&nd); 935 if (error == 0) { 936 mp = nd.nl_nch.mount; 937 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 938 uap->arg, nd.nl_cred); 939 } 940 nlookup_done(&nd); 941 done: 942 rel_mplock(); 943 return (error); 944 } 945 946 /* 947 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 948 * void *buf, int buflen) 949 * 950 * This function operates on a mount point and executes the specified 951 * operation using the specified control data, and possibly returns data. 952 * 953 * The actual number of bytes stored in the result buffer is returned, 0 954 * if none, otherwise an error is returned. 955 * 956 * MPALMOSTSAFE 957 */ 958 int 959 sys_mountctl(struct mountctl_args *uap) 960 { 961 struct thread *td = curthread; 962 struct proc *p = td->td_proc; 963 struct file *fp; 964 void *ctl = NULL; 965 void *buf = NULL; 966 char *path = NULL; 967 int error; 968 969 /* 970 * Sanity and permissions checks. We must be root. 971 */ 972 KKASSERT(p); 973 if (td->td_ucred->cr_prison != NULL) 974 return (EPERM); 975 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 976 (error = priv_check(td, PRIV_ROOT)) != 0) 977 return (error); 978 979 /* 980 * Argument length checks 981 */ 982 if (uap->ctllen < 0 || uap->ctllen > 1024) 983 return (EINVAL); 984 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 985 return (EINVAL); 986 if (uap->path == NULL) 987 return (EINVAL); 988 989 /* 990 * Allocate the necessary buffers and copyin data 991 */ 992 path = objcache_get(namei_oc, M_WAITOK); 993 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 994 if (error) 995 goto done; 996 997 if (uap->ctllen) { 998 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 999 error = copyin(uap->ctl, ctl, uap->ctllen); 1000 if (error) 1001 goto done; 1002 } 1003 if (uap->buflen) 1004 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1005 1006 /* 1007 * Validate the descriptor 1008 */ 1009 if (uap->fd >= 0) { 1010 fp = holdfp(p->p_fd, uap->fd, -1); 1011 if (fp == NULL) { 1012 error = EBADF; 1013 goto done; 1014 } 1015 } else { 1016 fp = NULL; 1017 } 1018 1019 /* 1020 * Execute the internal kernel function and clean up. 1021 */ 1022 get_mplock(); 1023 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1024 rel_mplock(); 1025 if (fp) 1026 fdrop(fp); 1027 if (error == 0 && uap->sysmsg_result > 0) 1028 error = copyout(buf, uap->buf, uap->sysmsg_result); 1029 done: 1030 if (path) 1031 objcache_put(namei_oc, path); 1032 if (ctl) 1033 kfree(ctl, M_TEMP); 1034 if (buf) 1035 kfree(buf, M_TEMP); 1036 return (error); 1037 } 1038 1039 /* 1040 * Execute a mount control operation by resolving the path to a mount point 1041 * and calling vop_mountctl(). 1042 * 1043 * Use the mount point from the nch instead of the vnode so nullfs mounts 1044 * can properly spike the VOP. 1045 */ 1046 int 1047 kern_mountctl(const char *path, int op, struct file *fp, 1048 const void *ctl, int ctllen, 1049 void *buf, int buflen, int *res) 1050 { 1051 struct vnode *vp; 1052 struct mount *mp; 1053 struct nlookupdata nd; 1054 int error; 1055 1056 *res = 0; 1057 vp = NULL; 1058 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1059 if (error == 0) 1060 error = nlookup(&nd); 1061 if (error == 0) 1062 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1063 mp = nd.nl_nch.mount; 1064 nlookup_done(&nd); 1065 if (error) 1066 return (error); 1067 vn_unlock(vp); 1068 1069 /* 1070 * Must be the root of the filesystem 1071 */ 1072 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1073 vrele(vp); 1074 return (EINVAL); 1075 } 1076 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1077 buf, buflen, res); 1078 vrele(vp); 1079 return (error); 1080 } 1081 1082 int 1083 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1084 { 1085 struct thread *td = curthread; 1086 struct proc *p = td->td_proc; 1087 struct mount *mp; 1088 struct statfs *sp; 1089 char *fullpath, *freepath; 1090 int error; 1091 1092 if ((error = nlookup(nd)) != 0) 1093 return (error); 1094 mp = nd->nl_nch.mount; 1095 sp = &mp->mnt_stat; 1096 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1097 return (error); 1098 1099 error = mount_path(p, mp, &fullpath, &freepath); 1100 if (error) 1101 return(error); 1102 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1103 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1104 kfree(freepath, M_TEMP); 1105 1106 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1107 bcopy(sp, buf, sizeof(*buf)); 1108 /* Only root should have access to the fsid's. */ 1109 if (priv_check(td, PRIV_ROOT)) 1110 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1111 return (0); 1112 } 1113 1114 /* 1115 * statfs_args(char *path, struct statfs *buf) 1116 * 1117 * Get filesystem statistics. 1118 * 1119 * MPALMOSTSAFE 1120 */ 1121 int 1122 sys_statfs(struct statfs_args *uap) 1123 { 1124 struct nlookupdata nd; 1125 struct statfs buf; 1126 int error; 1127 1128 get_mplock(); 1129 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1130 if (error == 0) 1131 error = kern_statfs(&nd, &buf); 1132 nlookup_done(&nd); 1133 if (error == 0) 1134 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1135 rel_mplock(); 1136 return (error); 1137 } 1138 1139 /* 1140 * MPALMOSTSAFE 1141 */ 1142 int 1143 kern_fstatfs(int fd, struct statfs *buf) 1144 { 1145 struct thread *td = curthread; 1146 struct proc *p = td->td_proc; 1147 struct file *fp; 1148 struct mount *mp; 1149 struct statfs *sp; 1150 char *fullpath, *freepath; 1151 int error; 1152 1153 KKASSERT(p); 1154 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1155 return (error); 1156 get_mplock(); 1157 mp = ((struct vnode *)fp->f_data)->v_mount; 1158 if (mp == NULL) { 1159 error = EBADF; 1160 goto done; 1161 } 1162 if (fp->f_cred == NULL) { 1163 error = EINVAL; 1164 goto done; 1165 } 1166 sp = &mp->mnt_stat; 1167 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1168 goto done; 1169 1170 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1171 goto done; 1172 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1173 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1174 kfree(freepath, M_TEMP); 1175 1176 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1177 bcopy(sp, buf, sizeof(*buf)); 1178 1179 /* Only root should have access to the fsid's. */ 1180 if (priv_check(td, PRIV_ROOT)) 1181 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1182 error = 0; 1183 done: 1184 rel_mplock(); 1185 fdrop(fp); 1186 return (error); 1187 } 1188 1189 /* 1190 * fstatfs_args(int fd, struct statfs *buf) 1191 * 1192 * Get filesystem statistics. 1193 * 1194 * MPSAFE 1195 */ 1196 int 1197 sys_fstatfs(struct fstatfs_args *uap) 1198 { 1199 struct statfs buf; 1200 int error; 1201 1202 error = kern_fstatfs(uap->fd, &buf); 1203 1204 if (error == 0) 1205 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1206 return (error); 1207 } 1208 1209 int 1210 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1211 { 1212 struct mount *mp; 1213 struct statvfs *sp; 1214 int error; 1215 1216 if ((error = nlookup(nd)) != 0) 1217 return (error); 1218 mp = nd->nl_nch.mount; 1219 sp = &mp->mnt_vstat; 1220 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1221 return (error); 1222 1223 sp->f_flag = 0; 1224 if (mp->mnt_flag & MNT_RDONLY) 1225 sp->f_flag |= ST_RDONLY; 1226 if (mp->mnt_flag & MNT_NOSUID) 1227 sp->f_flag |= ST_NOSUID; 1228 bcopy(sp, buf, sizeof(*buf)); 1229 return (0); 1230 } 1231 1232 /* 1233 * statfs_args(char *path, struct statfs *buf) 1234 * 1235 * Get filesystem statistics. 1236 * 1237 * MPALMOSTSAFE 1238 */ 1239 int 1240 sys_statvfs(struct statvfs_args *uap) 1241 { 1242 struct nlookupdata nd; 1243 struct statvfs buf; 1244 int error; 1245 1246 get_mplock(); 1247 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1248 if (error == 0) 1249 error = kern_statvfs(&nd, &buf); 1250 nlookup_done(&nd); 1251 if (error == 0) 1252 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1253 rel_mplock(); 1254 return (error); 1255 } 1256 1257 int 1258 kern_fstatvfs(int fd, struct statvfs *buf) 1259 { 1260 struct thread *td = curthread; 1261 struct proc *p = td->td_proc; 1262 struct file *fp; 1263 struct mount *mp; 1264 struct statvfs *sp; 1265 int error; 1266 1267 KKASSERT(p); 1268 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1269 return (error); 1270 mp = ((struct vnode *)fp->f_data)->v_mount; 1271 if (mp == NULL) { 1272 error = EBADF; 1273 goto done; 1274 } 1275 if (fp->f_cred == NULL) { 1276 error = EINVAL; 1277 goto done; 1278 } 1279 sp = &mp->mnt_vstat; 1280 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1281 goto done; 1282 1283 sp->f_flag = 0; 1284 if (mp->mnt_flag & MNT_RDONLY) 1285 sp->f_flag |= ST_RDONLY; 1286 if (mp->mnt_flag & MNT_NOSUID) 1287 sp->f_flag |= ST_NOSUID; 1288 1289 bcopy(sp, buf, sizeof(*buf)); 1290 error = 0; 1291 done: 1292 fdrop(fp); 1293 return (error); 1294 } 1295 1296 /* 1297 * fstatfs_args(int fd, struct statfs *buf) 1298 * 1299 * Get filesystem statistics. 1300 * 1301 * MPALMOSTSAFE 1302 */ 1303 int 1304 sys_fstatvfs(struct fstatvfs_args *uap) 1305 { 1306 struct statvfs buf; 1307 int error; 1308 1309 get_mplock(); 1310 error = kern_fstatvfs(uap->fd, &buf); 1311 rel_mplock(); 1312 1313 if (error == 0) 1314 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1315 return (error); 1316 } 1317 1318 /* 1319 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1320 * 1321 * Get statistics on all filesystems. 1322 */ 1323 1324 struct getfsstat_info { 1325 struct statfs *sfsp; 1326 long count; 1327 long maxcount; 1328 int error; 1329 int flags; 1330 struct thread *td; 1331 }; 1332 1333 static int getfsstat_callback(struct mount *, void *); 1334 1335 /* 1336 * MPALMOSTSAFE 1337 */ 1338 int 1339 sys_getfsstat(struct getfsstat_args *uap) 1340 { 1341 struct thread *td = curthread; 1342 struct getfsstat_info info; 1343 1344 bzero(&info, sizeof(info)); 1345 1346 info.maxcount = uap->bufsize / sizeof(struct statfs); 1347 info.sfsp = uap->buf; 1348 info.count = 0; 1349 info.flags = uap->flags; 1350 info.td = td; 1351 1352 get_mplock(); 1353 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1354 rel_mplock(); 1355 if (info.sfsp && info.count > info.maxcount) 1356 uap->sysmsg_result = info.maxcount; 1357 else 1358 uap->sysmsg_result = info.count; 1359 return (info.error); 1360 } 1361 1362 static int 1363 getfsstat_callback(struct mount *mp, void *data) 1364 { 1365 struct getfsstat_info *info = data; 1366 struct statfs *sp; 1367 char *freepath; 1368 char *fullpath; 1369 int error; 1370 1371 if (info->sfsp && info->count < info->maxcount) { 1372 if (info->td->td_proc && 1373 !chroot_visible_mnt(mp, info->td->td_proc)) { 1374 return(0); 1375 } 1376 sp = &mp->mnt_stat; 1377 1378 /* 1379 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1380 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1381 * overrides MNT_WAIT. 1382 */ 1383 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1384 (info->flags & MNT_WAIT)) && 1385 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1386 return(0); 1387 } 1388 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1389 1390 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1391 if (error) { 1392 info->error = error; 1393 return(-1); 1394 } 1395 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1396 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1397 kfree(freepath, M_TEMP); 1398 1399 error = copyout(sp, info->sfsp, sizeof(*sp)); 1400 if (error) { 1401 info->error = error; 1402 return (-1); 1403 } 1404 ++info->sfsp; 1405 } 1406 info->count++; 1407 return(0); 1408 } 1409 1410 /* 1411 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1412 long bufsize, int flags) 1413 * 1414 * Get statistics on all filesystems. 1415 */ 1416 1417 struct getvfsstat_info { 1418 struct statfs *sfsp; 1419 struct statvfs *vsfsp; 1420 long count; 1421 long maxcount; 1422 int error; 1423 int flags; 1424 struct thread *td; 1425 }; 1426 1427 static int getvfsstat_callback(struct mount *, void *); 1428 1429 /* 1430 * MPALMOSTSAFE 1431 */ 1432 int 1433 sys_getvfsstat(struct getvfsstat_args *uap) 1434 { 1435 struct thread *td = curthread; 1436 struct getvfsstat_info info; 1437 1438 bzero(&info, sizeof(info)); 1439 1440 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1441 info.sfsp = uap->buf; 1442 info.vsfsp = uap->vbuf; 1443 info.count = 0; 1444 info.flags = uap->flags; 1445 info.td = td; 1446 1447 get_mplock(); 1448 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1449 if (info.vsfsp && info.count > info.maxcount) 1450 uap->sysmsg_result = info.maxcount; 1451 else 1452 uap->sysmsg_result = info.count; 1453 rel_mplock(); 1454 return (info.error); 1455 } 1456 1457 static int 1458 getvfsstat_callback(struct mount *mp, void *data) 1459 { 1460 struct getvfsstat_info *info = data; 1461 struct statfs *sp; 1462 struct statvfs *vsp; 1463 char *freepath; 1464 char *fullpath; 1465 int error; 1466 1467 if (info->vsfsp && info->count < info->maxcount) { 1468 if (info->td->td_proc && 1469 !chroot_visible_mnt(mp, info->td->td_proc)) { 1470 return(0); 1471 } 1472 sp = &mp->mnt_stat; 1473 vsp = &mp->mnt_vstat; 1474 1475 /* 1476 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1477 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1478 * overrides MNT_WAIT. 1479 */ 1480 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1481 (info->flags & MNT_WAIT)) && 1482 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1483 return(0); 1484 } 1485 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1486 1487 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1488 (info->flags & MNT_WAIT)) && 1489 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1490 return(0); 1491 } 1492 vsp->f_flag = 0; 1493 if (mp->mnt_flag & MNT_RDONLY) 1494 vsp->f_flag |= ST_RDONLY; 1495 if (mp->mnt_flag & MNT_NOSUID) 1496 vsp->f_flag |= ST_NOSUID; 1497 1498 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1499 if (error) { 1500 info->error = error; 1501 return(-1); 1502 } 1503 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1504 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1505 kfree(freepath, M_TEMP); 1506 1507 error = copyout(sp, info->sfsp, sizeof(*sp)); 1508 if (error == 0) 1509 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1510 if (error) { 1511 info->error = error; 1512 return (-1); 1513 } 1514 ++info->sfsp; 1515 ++info->vsfsp; 1516 } 1517 info->count++; 1518 return(0); 1519 } 1520 1521 1522 /* 1523 * fchdir_args(int fd) 1524 * 1525 * Change current working directory to a given file descriptor. 1526 * 1527 * MPALMOSTSAFE 1528 */ 1529 int 1530 sys_fchdir(struct fchdir_args *uap) 1531 { 1532 struct thread *td = curthread; 1533 struct proc *p = td->td_proc; 1534 struct filedesc *fdp = p->p_fd; 1535 struct vnode *vp, *ovp; 1536 struct mount *mp; 1537 struct file *fp; 1538 struct nchandle nch, onch, tnch; 1539 int error; 1540 1541 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1542 return (error); 1543 get_mplock(); 1544 vp = (struct vnode *)fp->f_data; 1545 vref(vp); 1546 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1547 if (fp->f_nchandle.ncp == NULL) 1548 error = ENOTDIR; 1549 else 1550 error = checkvp_chdir(vp, td); 1551 if (error) { 1552 vput(vp); 1553 goto done; 1554 } 1555 cache_copy(&fp->f_nchandle, &nch); 1556 1557 /* 1558 * If the ncp has become a mount point, traverse through 1559 * the mount point. 1560 */ 1561 1562 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1563 (mp = cache_findmount(&nch)) != NULL 1564 ) { 1565 error = nlookup_mp(mp, &tnch); 1566 if (error == 0) { 1567 cache_unlock(&tnch); /* leave ref intact */ 1568 vput(vp); 1569 vp = tnch.ncp->nc_vp; 1570 error = vget(vp, LK_SHARED); 1571 KKASSERT(error == 0); 1572 cache_drop(&nch); 1573 nch = tnch; 1574 } 1575 } 1576 if (error == 0) { 1577 ovp = fdp->fd_cdir; 1578 onch = fdp->fd_ncdir; 1579 vn_unlock(vp); /* leave ref intact */ 1580 fdp->fd_cdir = vp; 1581 fdp->fd_ncdir = nch; 1582 cache_drop(&onch); 1583 vrele(ovp); 1584 } else { 1585 cache_drop(&nch); 1586 vput(vp); 1587 } 1588 fdrop(fp); 1589 done: 1590 rel_mplock(); 1591 return (error); 1592 } 1593 1594 int 1595 kern_chdir(struct nlookupdata *nd) 1596 { 1597 struct thread *td = curthread; 1598 struct proc *p = td->td_proc; 1599 struct filedesc *fdp = p->p_fd; 1600 struct vnode *vp, *ovp; 1601 struct nchandle onch; 1602 int error; 1603 1604 if ((error = nlookup(nd)) != 0) 1605 return (error); 1606 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1607 return (ENOENT); 1608 if ((error = vget(vp, LK_SHARED)) != 0) 1609 return (error); 1610 1611 error = checkvp_chdir(vp, td); 1612 vn_unlock(vp); 1613 if (error == 0) { 1614 ovp = fdp->fd_cdir; 1615 onch = fdp->fd_ncdir; 1616 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1617 fdp->fd_ncdir = nd->nl_nch; 1618 fdp->fd_cdir = vp; 1619 cache_drop(&onch); 1620 vrele(ovp); 1621 cache_zero(&nd->nl_nch); 1622 } else { 1623 vrele(vp); 1624 } 1625 return (error); 1626 } 1627 1628 /* 1629 * chdir_args(char *path) 1630 * 1631 * Change current working directory (``.''). 1632 * 1633 * MPALMOSTSAFE 1634 */ 1635 int 1636 sys_chdir(struct chdir_args *uap) 1637 { 1638 struct nlookupdata nd; 1639 int error; 1640 1641 get_mplock(); 1642 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1643 if (error == 0) 1644 error = kern_chdir(&nd); 1645 nlookup_done(&nd); 1646 rel_mplock(); 1647 return (error); 1648 } 1649 1650 /* 1651 * Helper function for raised chroot(2) security function: Refuse if 1652 * any filedescriptors are open directories. 1653 */ 1654 static int 1655 chroot_refuse_vdir_fds(struct filedesc *fdp) 1656 { 1657 struct vnode *vp; 1658 struct file *fp; 1659 int error; 1660 int fd; 1661 1662 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1663 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1664 continue; 1665 vp = (struct vnode *)fp->f_data; 1666 if (vp->v_type != VDIR) { 1667 fdrop(fp); 1668 continue; 1669 } 1670 fdrop(fp); 1671 return(EPERM); 1672 } 1673 return (0); 1674 } 1675 1676 /* 1677 * This sysctl determines if we will allow a process to chroot(2) if it 1678 * has a directory open: 1679 * 0: disallowed for all processes. 1680 * 1: allowed for processes that were not already chroot(2)'ed. 1681 * 2: allowed for all processes. 1682 */ 1683 1684 static int chroot_allow_open_directories = 1; 1685 1686 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1687 &chroot_allow_open_directories, 0, ""); 1688 1689 /* 1690 * chroot to the specified namecache entry. We obtain the vp from the 1691 * namecache data. The passed ncp must be locked and referenced and will 1692 * remain locked and referenced on return. 1693 */ 1694 int 1695 kern_chroot(struct nchandle *nch) 1696 { 1697 struct thread *td = curthread; 1698 struct proc *p = td->td_proc; 1699 struct filedesc *fdp = p->p_fd; 1700 struct vnode *vp; 1701 int error; 1702 1703 /* 1704 * Only privileged user can chroot 1705 */ 1706 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1707 if (error) 1708 return (error); 1709 1710 /* 1711 * Disallow open directory descriptors (fchdir() breakouts). 1712 */ 1713 if (chroot_allow_open_directories == 0 || 1714 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1715 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1716 return (error); 1717 } 1718 if ((vp = nch->ncp->nc_vp) == NULL) 1719 return (ENOENT); 1720 1721 if ((error = vget(vp, LK_SHARED)) != 0) 1722 return (error); 1723 1724 /* 1725 * Check the validity of vp as a directory to change to and 1726 * associate it with rdir/jdir. 1727 */ 1728 error = checkvp_chdir(vp, td); 1729 vn_unlock(vp); /* leave reference intact */ 1730 if (error == 0) { 1731 vrele(fdp->fd_rdir); 1732 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1733 cache_drop(&fdp->fd_nrdir); 1734 cache_copy(nch, &fdp->fd_nrdir); 1735 if (fdp->fd_jdir == NULL) { 1736 fdp->fd_jdir = vp; 1737 vref(fdp->fd_jdir); 1738 cache_copy(nch, &fdp->fd_njdir); 1739 } 1740 } else { 1741 vrele(vp); 1742 } 1743 return (error); 1744 } 1745 1746 /* 1747 * chroot_args(char *path) 1748 * 1749 * Change notion of root (``/'') directory. 1750 * 1751 * MPALMOSTSAFE 1752 */ 1753 int 1754 sys_chroot(struct chroot_args *uap) 1755 { 1756 struct thread *td __debugvar = curthread; 1757 struct nlookupdata nd; 1758 int error; 1759 1760 KKASSERT(td->td_proc); 1761 get_mplock(); 1762 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1763 if (error == 0) { 1764 nd.nl_flags |= NLC_EXEC; 1765 error = nlookup(&nd); 1766 if (error == 0) 1767 error = kern_chroot(&nd.nl_nch); 1768 } 1769 nlookup_done(&nd); 1770 rel_mplock(); 1771 return(error); 1772 } 1773 1774 int 1775 sys_chroot_kernel(struct chroot_kernel_args *uap) 1776 { 1777 struct thread *td = curthread; 1778 struct nlookupdata nd; 1779 struct nchandle *nch; 1780 struct vnode *vp; 1781 int error; 1782 1783 get_mplock(); 1784 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1785 if (error) 1786 goto error_nond; 1787 1788 error = nlookup(&nd); 1789 if (error) 1790 goto error_out; 1791 1792 nch = &nd.nl_nch; 1793 1794 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1795 if (error) 1796 goto error_out; 1797 1798 if ((vp = nch->ncp->nc_vp) == NULL) { 1799 error = ENOENT; 1800 goto error_out; 1801 } 1802 1803 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1804 goto error_out; 1805 1806 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1807 vfs_cache_setroot(vp, cache_hold(nch)); 1808 1809 error_out: 1810 nlookup_done(&nd); 1811 error_nond: 1812 rel_mplock(); 1813 return(error); 1814 } 1815 1816 /* 1817 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1818 * determine whether it is legal to chdir to the vnode. The vnode's state 1819 * is not changed by this call. 1820 */ 1821 int 1822 checkvp_chdir(struct vnode *vp, struct thread *td) 1823 { 1824 int error; 1825 1826 if (vp->v_type != VDIR) 1827 error = ENOTDIR; 1828 else 1829 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1830 return (error); 1831 } 1832 1833 /* 1834 * MPSAFE 1835 */ 1836 int 1837 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1838 { 1839 struct thread *td = curthread; 1840 struct proc *p = td->td_proc; 1841 struct lwp *lp = td->td_lwp; 1842 struct filedesc *fdp = p->p_fd; 1843 int cmode, flags; 1844 struct file *nfp; 1845 struct file *fp; 1846 struct vnode *vp; 1847 int type, indx, error; 1848 struct flock lf; 1849 1850 if ((oflags & O_ACCMODE) == O_ACCMODE) 1851 return (EINVAL); 1852 flags = FFLAGS(oflags); 1853 error = falloc(lp, &nfp, NULL); 1854 if (error) 1855 return (error); 1856 fp = nfp; 1857 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1858 1859 /* 1860 * XXX p_dupfd is a real mess. It allows a device to return a 1861 * file descriptor to be duplicated rather then doing the open 1862 * itself. 1863 */ 1864 lp->lwp_dupfd = -1; 1865 1866 /* 1867 * Call vn_open() to do the lookup and assign the vnode to the 1868 * file pointer. vn_open() does not change the ref count on fp 1869 * and the vnode, on success, will be inherited by the file pointer 1870 * and unlocked. 1871 */ 1872 nd->nl_flags |= NLC_LOCKVP; 1873 error = vn_open(nd, fp, flags, cmode); 1874 nlookup_done(nd); 1875 if (error) { 1876 /* 1877 * handle special fdopen() case. bleh. dupfdopen() is 1878 * responsible for dropping the old contents of ofiles[indx] 1879 * if it succeeds. 1880 * 1881 * Note that fsetfd() will add a ref to fp which represents 1882 * the fd_files[] assignment. We must still drop our 1883 * reference. 1884 */ 1885 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1886 if (fdalloc(p, 0, &indx) == 0) { 1887 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1888 if (error == 0) { 1889 *res = indx; 1890 fdrop(fp); /* our ref */ 1891 return (0); 1892 } 1893 fsetfd(fdp, NULL, indx); 1894 } 1895 } 1896 fdrop(fp); /* our ref */ 1897 if (error == ERESTART) 1898 error = EINTR; 1899 return (error); 1900 } 1901 1902 /* 1903 * ref the vnode for ourselves so it can't be ripped out from under 1904 * is. XXX need an ND flag to request that the vnode be returned 1905 * anyway. 1906 * 1907 * Reserve a file descriptor but do not assign it until the open 1908 * succeeds. 1909 */ 1910 vp = (struct vnode *)fp->f_data; 1911 vref(vp); 1912 if ((error = fdalloc(p, 0, &indx)) != 0) { 1913 fdrop(fp); 1914 vrele(vp); 1915 return (error); 1916 } 1917 1918 /* 1919 * If no error occurs the vp will have been assigned to the file 1920 * pointer. 1921 */ 1922 lp->lwp_dupfd = 0; 1923 1924 if (flags & (O_EXLOCK | O_SHLOCK)) { 1925 lf.l_whence = SEEK_SET; 1926 lf.l_start = 0; 1927 lf.l_len = 0; 1928 if (flags & O_EXLOCK) 1929 lf.l_type = F_WRLCK; 1930 else 1931 lf.l_type = F_RDLCK; 1932 if (flags & FNONBLOCK) 1933 type = 0; 1934 else 1935 type = F_WAIT; 1936 1937 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1938 /* 1939 * lock request failed. Clean up the reserved 1940 * descriptor. 1941 */ 1942 vrele(vp); 1943 fsetfd(fdp, NULL, indx); 1944 fdrop(fp); 1945 return (error); 1946 } 1947 fp->f_flag |= FHASLOCK; 1948 } 1949 #if 0 1950 /* 1951 * Assert that all regular file vnodes were created with a object. 1952 */ 1953 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1954 ("open: regular file has no backing object after vn_open")); 1955 #endif 1956 1957 vrele(vp); 1958 1959 /* 1960 * release our private reference, leaving the one associated with the 1961 * descriptor table intact. 1962 */ 1963 fsetfd(fdp, fp, indx); 1964 fdrop(fp); 1965 *res = indx; 1966 return (0); 1967 } 1968 1969 /* 1970 * open_args(char *path, int flags, int mode) 1971 * 1972 * Check permissions, allocate an open file structure, 1973 * and call the device open routine if any. 1974 * 1975 * MPALMOSTSAFE 1976 */ 1977 int 1978 sys_open(struct open_args *uap) 1979 { 1980 CACHE_MPLOCK_DECLARE; 1981 struct nlookupdata nd; 1982 int error; 1983 1984 CACHE_GETMPLOCK1(); 1985 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1986 if (error == 0) { 1987 error = kern_open(&nd, uap->flags, 1988 uap->mode, &uap->sysmsg_result); 1989 } 1990 nlookup_done(&nd); 1991 CACHE_RELMPLOCK(); 1992 return (error); 1993 } 1994 1995 /* 1996 * openat_args(int fd, char *path, int flags, int mode) 1997 * 1998 * MPALMOSTSAFE 1999 */ 2000 int 2001 sys_openat(struct openat_args *uap) 2002 { 2003 CACHE_MPLOCK_DECLARE; 2004 struct nlookupdata nd; 2005 int error; 2006 struct file *fp; 2007 2008 CACHE_GETMPLOCK1(); 2009 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2010 if (error == 0) { 2011 error = kern_open(&nd, uap->flags, uap->mode, 2012 &uap->sysmsg_result); 2013 } 2014 nlookup_done_at(&nd, fp); 2015 CACHE_RELMPLOCK(); 2016 return (error); 2017 } 2018 2019 int 2020 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2021 { 2022 struct thread *td = curthread; 2023 struct proc *p = td->td_proc; 2024 struct vnode *vp; 2025 struct vattr vattr; 2026 int error; 2027 int whiteout = 0; 2028 2029 KKASSERT(p); 2030 2031 VATTR_NULL(&vattr); 2032 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2033 vattr.va_rmajor = rmajor; 2034 vattr.va_rminor = rminor; 2035 2036 switch (mode & S_IFMT) { 2037 case S_IFMT: /* used by badsect to flag bad sectors */ 2038 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2039 vattr.va_type = VBAD; 2040 break; 2041 case S_IFCHR: 2042 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2043 vattr.va_type = VCHR; 2044 break; 2045 case S_IFBLK: 2046 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2047 vattr.va_type = VBLK; 2048 break; 2049 case S_IFWHT: 2050 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2051 whiteout = 1; 2052 break; 2053 case S_IFDIR: /* special directories support for HAMMER */ 2054 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2055 vattr.va_type = VDIR; 2056 break; 2057 default: 2058 error = EINVAL; 2059 break; 2060 } 2061 2062 if (error) 2063 return (error); 2064 2065 bwillinode(1); 2066 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2067 if ((error = nlookup(nd)) != 0) 2068 return (error); 2069 if (nd->nl_nch.ncp->nc_vp) 2070 return (EEXIST); 2071 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2072 return (error); 2073 2074 if (whiteout) { 2075 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2076 nd->nl_cred, NAMEI_CREATE); 2077 } else { 2078 vp = NULL; 2079 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2080 &vp, nd->nl_cred, &vattr); 2081 if (error == 0) 2082 vput(vp); 2083 } 2084 return (error); 2085 } 2086 2087 /* 2088 * mknod_args(char *path, int mode, int dev) 2089 * 2090 * Create a special file. 2091 * 2092 * MPALMOSTSAFE 2093 */ 2094 int 2095 sys_mknod(struct mknod_args *uap) 2096 { 2097 struct nlookupdata nd; 2098 int error; 2099 2100 get_mplock(); 2101 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2102 if (error == 0) { 2103 error = kern_mknod(&nd, uap->mode, 2104 umajor(uap->dev), uminor(uap->dev)); 2105 } 2106 nlookup_done(&nd); 2107 rel_mplock(); 2108 return (error); 2109 } 2110 2111 int 2112 kern_mkfifo(struct nlookupdata *nd, int mode) 2113 { 2114 struct thread *td = curthread; 2115 struct proc *p = td->td_proc; 2116 struct vattr vattr; 2117 struct vnode *vp; 2118 int error; 2119 2120 bwillinode(1); 2121 2122 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2123 if ((error = nlookup(nd)) != 0) 2124 return (error); 2125 if (nd->nl_nch.ncp->nc_vp) 2126 return (EEXIST); 2127 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2128 return (error); 2129 2130 VATTR_NULL(&vattr); 2131 vattr.va_type = VFIFO; 2132 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2133 vp = NULL; 2134 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2135 if (error == 0) 2136 vput(vp); 2137 return (error); 2138 } 2139 2140 /* 2141 * mkfifo_args(char *path, int mode) 2142 * 2143 * Create a named pipe. 2144 * 2145 * MPALMOSTSAFE 2146 */ 2147 int 2148 sys_mkfifo(struct mkfifo_args *uap) 2149 { 2150 struct nlookupdata nd; 2151 int error; 2152 2153 get_mplock(); 2154 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2155 if (error == 0) 2156 error = kern_mkfifo(&nd, uap->mode); 2157 nlookup_done(&nd); 2158 rel_mplock(); 2159 return (error); 2160 } 2161 2162 static int hardlink_check_uid = 0; 2163 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2164 &hardlink_check_uid, 0, 2165 "Unprivileged processes cannot create hard links to files owned by other " 2166 "users"); 2167 static int hardlink_check_gid = 0; 2168 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2169 &hardlink_check_gid, 0, 2170 "Unprivileged processes cannot create hard links to files owned by other " 2171 "groups"); 2172 2173 static int 2174 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2175 { 2176 struct vattr va; 2177 int error; 2178 2179 /* 2180 * Shortcut if disabled 2181 */ 2182 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2183 return (0); 2184 2185 /* 2186 * Privileged user can always hardlink 2187 */ 2188 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2189 return (0); 2190 2191 /* 2192 * Otherwise only if the originating file is owned by the 2193 * same user or group. Note that any group is allowed if 2194 * the file is owned by the caller. 2195 */ 2196 error = VOP_GETATTR(vp, &va); 2197 if (error != 0) 2198 return (error); 2199 2200 if (hardlink_check_uid) { 2201 if (cred->cr_uid != va.va_uid) 2202 return (EPERM); 2203 } 2204 2205 if (hardlink_check_gid) { 2206 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2207 return (EPERM); 2208 } 2209 2210 return (0); 2211 } 2212 2213 int 2214 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2215 { 2216 struct thread *td = curthread; 2217 struct vnode *vp; 2218 int error; 2219 2220 /* 2221 * Lookup the source and obtained a locked vnode. 2222 * 2223 * You may only hardlink a file which you have write permission 2224 * on or which you own. 2225 * 2226 * XXX relookup on vget failure / race ? 2227 */ 2228 bwillinode(1); 2229 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2230 if ((error = nlookup(nd)) != 0) 2231 return (error); 2232 vp = nd->nl_nch.ncp->nc_vp; 2233 KKASSERT(vp != NULL); 2234 if (vp->v_type == VDIR) 2235 return (EPERM); /* POSIX */ 2236 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2237 return (error); 2238 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2239 return (error); 2240 2241 /* 2242 * Unlock the source so we can lookup the target without deadlocking 2243 * (XXX vp is locked already, possible other deadlock?). The target 2244 * must not exist. 2245 */ 2246 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2247 nd->nl_flags &= ~NLC_NCPISLOCKED; 2248 cache_unlock(&nd->nl_nch); 2249 vn_unlock(vp); 2250 2251 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2252 if ((error = nlookup(linknd)) != 0) { 2253 vrele(vp); 2254 return (error); 2255 } 2256 if (linknd->nl_nch.ncp->nc_vp) { 2257 vrele(vp); 2258 return (EEXIST); 2259 } 2260 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 2261 vrele(vp); 2262 return (error); 2263 } 2264 2265 /* 2266 * Finally run the new API VOP. 2267 */ 2268 error = can_hardlink(vp, td, td->td_ucred); 2269 if (error == 0) { 2270 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2271 vp, linknd->nl_cred); 2272 } 2273 vput(vp); 2274 return (error); 2275 } 2276 2277 /* 2278 * link_args(char *path, char *link) 2279 * 2280 * Make a hard file link. 2281 * 2282 * MPALMOSTSAFE 2283 */ 2284 int 2285 sys_link(struct link_args *uap) 2286 { 2287 struct nlookupdata nd, linknd; 2288 int error; 2289 2290 get_mplock(); 2291 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2292 if (error == 0) { 2293 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2294 if (error == 0) 2295 error = kern_link(&nd, &linknd); 2296 nlookup_done(&linknd); 2297 } 2298 nlookup_done(&nd); 2299 rel_mplock(); 2300 return (error); 2301 } 2302 2303 int 2304 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2305 { 2306 struct vattr vattr; 2307 struct vnode *vp; 2308 struct vnode *dvp; 2309 int error; 2310 2311 bwillinode(1); 2312 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2313 if ((error = nlookup(nd)) != 0) 2314 return (error); 2315 if (nd->nl_nch.ncp->nc_vp) 2316 return (EEXIST); 2317 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2318 return (error); 2319 dvp = nd->nl_dvp; 2320 VATTR_NULL(&vattr); 2321 vattr.va_mode = mode; 2322 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2323 if (error == 0) 2324 vput(vp); 2325 return (error); 2326 } 2327 2328 /* 2329 * symlink(char *path, char *link) 2330 * 2331 * Make a symbolic link. 2332 * 2333 * MPALMOSTSAFE 2334 */ 2335 int 2336 sys_symlink(struct symlink_args *uap) 2337 { 2338 struct thread *td = curthread; 2339 struct nlookupdata nd; 2340 char *path; 2341 int error; 2342 int mode; 2343 2344 path = objcache_get(namei_oc, M_WAITOK); 2345 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2346 if (error == 0) { 2347 get_mplock(); 2348 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2349 if (error == 0) { 2350 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2351 error = kern_symlink(&nd, path, mode); 2352 } 2353 nlookup_done(&nd); 2354 rel_mplock(); 2355 } 2356 objcache_put(namei_oc, path); 2357 return (error); 2358 } 2359 2360 /* 2361 * undelete_args(char *path) 2362 * 2363 * Delete a whiteout from the filesystem. 2364 * 2365 * MPALMOSTSAFE 2366 */ 2367 int 2368 sys_undelete(struct undelete_args *uap) 2369 { 2370 struct nlookupdata nd; 2371 int error; 2372 2373 get_mplock(); 2374 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2375 bwillinode(1); 2376 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2377 if (error == 0) 2378 error = nlookup(&nd); 2379 if (error == 0) 2380 error = ncp_writechk(&nd.nl_nch); 2381 if (error == 0) { 2382 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2383 NAMEI_DELETE); 2384 } 2385 nlookup_done(&nd); 2386 rel_mplock(); 2387 return (error); 2388 } 2389 2390 int 2391 kern_unlink(struct nlookupdata *nd) 2392 { 2393 int error; 2394 2395 bwillinode(1); 2396 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2397 if ((error = nlookup(nd)) != 0) 2398 return (error); 2399 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2400 return (error); 2401 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2402 return (error); 2403 } 2404 2405 /* 2406 * unlink_args(char *path) 2407 * 2408 * Delete a name from the filesystem. 2409 * 2410 * MPALMOSTSAFE 2411 */ 2412 int 2413 sys_unlink(struct unlink_args *uap) 2414 { 2415 struct nlookupdata nd; 2416 int error; 2417 2418 get_mplock(); 2419 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2420 if (error == 0) 2421 error = kern_unlink(&nd); 2422 nlookup_done(&nd); 2423 rel_mplock(); 2424 return (error); 2425 } 2426 2427 2428 /* 2429 * unlinkat_args(int fd, char *path, int flags) 2430 * 2431 * Delete the file or directory entry pointed to by fd/path. 2432 * 2433 * MPALMOSTSAFE 2434 */ 2435 int 2436 sys_unlinkat(struct unlinkat_args *uap) 2437 { 2438 struct nlookupdata nd; 2439 struct file *fp; 2440 int error; 2441 2442 if (uap->flags & ~AT_REMOVEDIR) 2443 return (EINVAL); 2444 2445 get_mplock(); 2446 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2447 if (error == 0) { 2448 if (uap->flags & AT_REMOVEDIR) 2449 error = kern_rmdir(&nd); 2450 else 2451 error = kern_unlink(&nd); 2452 } 2453 nlookup_done_at(&nd, fp); 2454 rel_mplock(); 2455 return (error); 2456 } 2457 2458 /* 2459 * MPALMOSTSAFE 2460 */ 2461 int 2462 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2463 { 2464 struct thread *td = curthread; 2465 struct proc *p = td->td_proc; 2466 struct file *fp; 2467 struct vnode *vp; 2468 struct vattr vattr; 2469 off_t new_offset; 2470 int error; 2471 2472 fp = holdfp(p->p_fd, fd, -1); 2473 if (fp == NULL) 2474 return (EBADF); 2475 if (fp->f_type != DTYPE_VNODE) { 2476 error = ESPIPE; 2477 goto done; 2478 } 2479 vp = (struct vnode *)fp->f_data; 2480 2481 switch (whence) { 2482 case L_INCR: 2483 spin_lock_wr(&fp->f_spin); 2484 new_offset = fp->f_offset + offset; 2485 error = 0; 2486 break; 2487 case L_XTND: 2488 get_mplock(); 2489 error = VOP_GETATTR(vp, &vattr); 2490 rel_mplock(); 2491 spin_lock_wr(&fp->f_spin); 2492 new_offset = offset + vattr.va_size; 2493 break; 2494 case L_SET: 2495 new_offset = offset; 2496 error = 0; 2497 spin_lock_wr(&fp->f_spin); 2498 break; 2499 default: 2500 new_offset = 0; 2501 error = EINVAL; 2502 spin_lock_wr(&fp->f_spin); 2503 break; 2504 } 2505 2506 /* 2507 * Validate the seek position. Negative offsets are not allowed 2508 * for regular files or directories. 2509 * 2510 * Normally we would also not want to allow negative offsets for 2511 * character and block-special devices. However kvm addresses 2512 * on 64 bit architectures might appear to be negative and must 2513 * be allowed. 2514 */ 2515 if (error == 0) { 2516 if (new_offset < 0 && 2517 (vp->v_type == VREG || vp->v_type == VDIR)) { 2518 error = EINVAL; 2519 } else { 2520 fp->f_offset = new_offset; 2521 } 2522 } 2523 *res = fp->f_offset; 2524 spin_unlock_wr(&fp->f_spin); 2525 done: 2526 fdrop(fp); 2527 return (error); 2528 } 2529 2530 /* 2531 * lseek_args(int fd, int pad, off_t offset, int whence) 2532 * 2533 * Reposition read/write file offset. 2534 * 2535 * MPSAFE 2536 */ 2537 int 2538 sys_lseek(struct lseek_args *uap) 2539 { 2540 int error; 2541 2542 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2543 &uap->sysmsg_offset); 2544 2545 return (error); 2546 } 2547 2548 /* 2549 * Check if current process can access given file. amode is a bitmask of *_OK 2550 * access bits. flags is a bitmask of AT_* flags. 2551 */ 2552 int 2553 kern_access(struct nlookupdata *nd, int amode, int flags) 2554 { 2555 struct vnode *vp; 2556 int error, mode; 2557 2558 if (flags & ~AT_EACCESS) 2559 return (EINVAL); 2560 if ((error = nlookup(nd)) != 0) 2561 return (error); 2562 retry: 2563 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2564 if (error) 2565 return (error); 2566 2567 /* Flags == 0 means only check for existence. */ 2568 if (amode) { 2569 mode = 0; 2570 if (amode & R_OK) 2571 mode |= VREAD; 2572 if (amode & W_OK) 2573 mode |= VWRITE; 2574 if (amode & X_OK) 2575 mode |= VEXEC; 2576 if ((mode & VWRITE) == 0 || 2577 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2578 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2579 2580 /* 2581 * If the file handle is stale we have to re-resolve the 2582 * entry. This is a hack at the moment. 2583 */ 2584 if (error == ESTALE) { 2585 vput(vp); 2586 cache_setunresolved(&nd->nl_nch); 2587 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2588 if (error == 0) { 2589 vp = NULL; 2590 goto retry; 2591 } 2592 return(error); 2593 } 2594 } 2595 vput(vp); 2596 return (error); 2597 } 2598 2599 /* 2600 * access_args(char *path, int flags) 2601 * 2602 * Check access permissions. 2603 * 2604 * MPALMOSTSAFE 2605 */ 2606 int 2607 sys_access(struct access_args *uap) 2608 { 2609 struct nlookupdata nd; 2610 int error; 2611 2612 get_mplock(); 2613 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2614 if (error == 0) 2615 error = kern_access(&nd, uap->flags, 0); 2616 nlookup_done(&nd); 2617 rel_mplock(); 2618 return (error); 2619 } 2620 2621 2622 /* 2623 * faccessat_args(int fd, char *path, int amode, int flags) 2624 * 2625 * Check access permissions. 2626 * 2627 * MPALMOSTSAFE 2628 */ 2629 int 2630 sys_faccessat(struct faccessat_args *uap) 2631 { 2632 struct nlookupdata nd; 2633 struct file *fp; 2634 int error; 2635 2636 get_mplock(); 2637 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2638 NLC_FOLLOW); 2639 if (error == 0) 2640 error = kern_access(&nd, uap->amode, uap->flags); 2641 nlookup_done_at(&nd, fp); 2642 rel_mplock(); 2643 return (error); 2644 } 2645 2646 2647 /* 2648 * MPSAFE 2649 */ 2650 int 2651 kern_stat(struct nlookupdata *nd, struct stat *st) 2652 { 2653 int error; 2654 struct vnode *vp; 2655 thread_t td; 2656 2657 if ((error = nlookup(nd)) != 0) 2658 return (error); 2659 again: 2660 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2661 return (ENOENT); 2662 2663 td = curthread; 2664 if ((error = vget(vp, LK_SHARED)) != 0) 2665 return (error); 2666 error = vn_stat(vp, st, nd->nl_cred); 2667 2668 /* 2669 * If the file handle is stale we have to re-resolve the entry. This 2670 * is a hack at the moment. 2671 */ 2672 if (error == ESTALE) { 2673 vput(vp); 2674 cache_setunresolved(&nd->nl_nch); 2675 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2676 if (error == 0) 2677 goto again; 2678 } else { 2679 vput(vp); 2680 } 2681 return (error); 2682 } 2683 2684 /* 2685 * stat_args(char *path, struct stat *ub) 2686 * 2687 * Get file status; this version follows links. 2688 * 2689 * MPSAFE 2690 */ 2691 int 2692 sys_stat(struct stat_args *uap) 2693 { 2694 CACHE_MPLOCK_DECLARE; 2695 struct nlookupdata nd; 2696 struct stat st; 2697 int error; 2698 2699 CACHE_GETMPLOCK1(); 2700 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2701 if (error == 0) { 2702 error = kern_stat(&nd, &st); 2703 if (error == 0) 2704 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2705 } 2706 nlookup_done(&nd); 2707 CACHE_RELMPLOCK(); 2708 return (error); 2709 } 2710 2711 /* 2712 * lstat_args(char *path, struct stat *ub) 2713 * 2714 * Get file status; this version does not follow links. 2715 * 2716 * MPALMOSTSAFE 2717 */ 2718 int 2719 sys_lstat(struct lstat_args *uap) 2720 { 2721 CACHE_MPLOCK_DECLARE; 2722 struct nlookupdata nd; 2723 struct stat st; 2724 int error; 2725 2726 CACHE_GETMPLOCK1(); 2727 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2728 if (error == 0) { 2729 error = kern_stat(&nd, &st); 2730 if (error == 0) 2731 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2732 } 2733 nlookup_done(&nd); 2734 CACHE_RELMPLOCK(); 2735 return (error); 2736 } 2737 2738 /* 2739 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2740 * 2741 * Get status of file pointed to by fd/path. 2742 * 2743 * MPALMOSTSAFE 2744 */ 2745 int 2746 sys_fstatat(struct fstatat_args *uap) 2747 { 2748 CACHE_MPLOCK_DECLARE; 2749 struct nlookupdata nd; 2750 struct stat st; 2751 int error; 2752 int flags; 2753 struct file *fp; 2754 2755 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2756 return (EINVAL); 2757 2758 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2759 2760 CACHE_GETMPLOCK1(); 2761 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2762 UIO_USERSPACE, flags); 2763 if (error == 0) { 2764 error = kern_stat(&nd, &st); 2765 if (error == 0) 2766 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2767 } 2768 nlookup_done_at(&nd, fp); 2769 CACHE_RELMPLOCK(); 2770 return (error); 2771 } 2772 2773 /* 2774 * pathconf_Args(char *path, int name) 2775 * 2776 * Get configurable pathname variables. 2777 * 2778 * MPALMOSTSAFE 2779 */ 2780 int 2781 sys_pathconf(struct pathconf_args *uap) 2782 { 2783 struct nlookupdata nd; 2784 struct vnode *vp; 2785 int error; 2786 2787 vp = NULL; 2788 get_mplock(); 2789 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2790 if (error == 0) 2791 error = nlookup(&nd); 2792 if (error == 0) 2793 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2794 nlookup_done(&nd); 2795 if (error == 0) { 2796 error = VOP_PATHCONF(vp, uap->name, &uap->sysmsg_reg); 2797 vput(vp); 2798 } 2799 rel_mplock(); 2800 return (error); 2801 } 2802 2803 /* 2804 * XXX: daver 2805 * kern_readlink isn't properly split yet. There is a copyin burried 2806 * in VOP_READLINK(). 2807 */ 2808 int 2809 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2810 { 2811 struct thread *td = curthread; 2812 struct vnode *vp; 2813 struct iovec aiov; 2814 struct uio auio; 2815 int error; 2816 2817 if ((error = nlookup(nd)) != 0) 2818 return (error); 2819 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2820 if (error) 2821 return (error); 2822 if (vp->v_type != VLNK) { 2823 error = EINVAL; 2824 } else { 2825 aiov.iov_base = buf; 2826 aiov.iov_len = count; 2827 auio.uio_iov = &aiov; 2828 auio.uio_iovcnt = 1; 2829 auio.uio_offset = 0; 2830 auio.uio_rw = UIO_READ; 2831 auio.uio_segflg = UIO_USERSPACE; 2832 auio.uio_td = td; 2833 auio.uio_resid = count; 2834 error = VOP_READLINK(vp, &auio, td->td_ucred); 2835 } 2836 vput(vp); 2837 *res = count - auio.uio_resid; 2838 return (error); 2839 } 2840 2841 /* 2842 * readlink_args(char *path, char *buf, int count) 2843 * 2844 * Return target name of a symbolic link. 2845 * 2846 * MPALMOSTSAFE 2847 */ 2848 int 2849 sys_readlink(struct readlink_args *uap) 2850 { 2851 struct nlookupdata nd; 2852 int error; 2853 2854 get_mplock(); 2855 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2856 if (error == 0) { 2857 error = kern_readlink(&nd, uap->buf, uap->count, 2858 &uap->sysmsg_result); 2859 } 2860 nlookup_done(&nd); 2861 rel_mplock(); 2862 return (error); 2863 } 2864 2865 static int 2866 setfflags(struct vnode *vp, int flags) 2867 { 2868 struct thread *td = curthread; 2869 int error; 2870 struct vattr vattr; 2871 2872 /* 2873 * Prevent non-root users from setting flags on devices. When 2874 * a device is reused, users can retain ownership of the device 2875 * if they are allowed to set flags and programs assume that 2876 * chown can't fail when done as root. 2877 */ 2878 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2879 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2880 return (error); 2881 2882 /* 2883 * note: vget is required for any operation that might mod the vnode 2884 * so VINACTIVE is properly cleared. 2885 */ 2886 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2887 VATTR_NULL(&vattr); 2888 vattr.va_flags = flags; 2889 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2890 vput(vp); 2891 } 2892 return (error); 2893 } 2894 2895 /* 2896 * chflags(char *path, int flags) 2897 * 2898 * Change flags of a file given a path name. 2899 * 2900 * MPALMOSTSAFE 2901 */ 2902 int 2903 sys_chflags(struct chflags_args *uap) 2904 { 2905 struct nlookupdata nd; 2906 struct vnode *vp; 2907 int error; 2908 2909 vp = NULL; 2910 get_mplock(); 2911 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2912 if (error == 0) 2913 error = nlookup(&nd); 2914 if (error == 0) 2915 error = ncp_writechk(&nd.nl_nch); 2916 if (error == 0) 2917 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2918 nlookup_done(&nd); 2919 if (error == 0) { 2920 error = setfflags(vp, uap->flags); 2921 vrele(vp); 2922 } 2923 rel_mplock(); 2924 return (error); 2925 } 2926 2927 /* 2928 * lchflags(char *path, int flags) 2929 * 2930 * Change flags of a file given a path name, but don't follow symlinks. 2931 * 2932 * MPALMOSTSAFE 2933 */ 2934 int 2935 sys_lchflags(struct lchflags_args *uap) 2936 { 2937 struct nlookupdata nd; 2938 struct vnode *vp; 2939 int error; 2940 2941 vp = NULL; 2942 get_mplock(); 2943 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2944 if (error == 0) 2945 error = nlookup(&nd); 2946 if (error == 0) 2947 error = ncp_writechk(&nd.nl_nch); 2948 if (error == 0) 2949 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2950 nlookup_done(&nd); 2951 if (error == 0) { 2952 error = setfflags(vp, uap->flags); 2953 vrele(vp); 2954 } 2955 rel_mplock(); 2956 return (error); 2957 } 2958 2959 /* 2960 * fchflags_args(int fd, int flags) 2961 * 2962 * Change flags of a file given a file descriptor. 2963 * 2964 * MPALMOSTSAFE 2965 */ 2966 int 2967 sys_fchflags(struct fchflags_args *uap) 2968 { 2969 struct thread *td = curthread; 2970 struct proc *p = td->td_proc; 2971 struct file *fp; 2972 int error; 2973 2974 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2975 return (error); 2976 get_mplock(); 2977 if (fp->f_nchandle.ncp) 2978 error = ncp_writechk(&fp->f_nchandle); 2979 if (error == 0) 2980 error = setfflags((struct vnode *) fp->f_data, uap->flags); 2981 rel_mplock(); 2982 fdrop(fp); 2983 return (error); 2984 } 2985 2986 static int 2987 setfmode(struct vnode *vp, int mode) 2988 { 2989 struct thread *td = curthread; 2990 int error; 2991 struct vattr vattr; 2992 2993 /* 2994 * note: vget is required for any operation that might mod the vnode 2995 * so VINACTIVE is properly cleared. 2996 */ 2997 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2998 VATTR_NULL(&vattr); 2999 vattr.va_mode = mode & ALLPERMS; 3000 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3001 vput(vp); 3002 } 3003 return error; 3004 } 3005 3006 int 3007 kern_chmod(struct nlookupdata *nd, int mode) 3008 { 3009 struct vnode *vp; 3010 int error; 3011 3012 if ((error = nlookup(nd)) != 0) 3013 return (error); 3014 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3015 return (error); 3016 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3017 error = setfmode(vp, mode); 3018 vrele(vp); 3019 return (error); 3020 } 3021 3022 /* 3023 * chmod_args(char *path, int mode) 3024 * 3025 * Change mode of a file given path name. 3026 * 3027 * MPALMOSTSAFE 3028 */ 3029 int 3030 sys_chmod(struct chmod_args *uap) 3031 { 3032 struct nlookupdata nd; 3033 int error; 3034 3035 get_mplock(); 3036 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3037 if (error == 0) 3038 error = kern_chmod(&nd, uap->mode); 3039 nlookup_done(&nd); 3040 rel_mplock(); 3041 return (error); 3042 } 3043 3044 /* 3045 * lchmod_args(char *path, int mode) 3046 * 3047 * Change mode of a file given path name (don't follow links.) 3048 * 3049 * MPALMOSTSAFE 3050 */ 3051 int 3052 sys_lchmod(struct lchmod_args *uap) 3053 { 3054 struct nlookupdata nd; 3055 int error; 3056 3057 get_mplock(); 3058 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3059 if (error == 0) 3060 error = kern_chmod(&nd, uap->mode); 3061 nlookup_done(&nd); 3062 rel_mplock(); 3063 return (error); 3064 } 3065 3066 /* 3067 * fchmod_args(int fd, int mode) 3068 * 3069 * Change mode of a file given a file descriptor. 3070 * 3071 * MPALMOSTSAFE 3072 */ 3073 int 3074 sys_fchmod(struct fchmod_args *uap) 3075 { 3076 struct thread *td = curthread; 3077 struct proc *p = td->td_proc; 3078 struct file *fp; 3079 int error; 3080 3081 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3082 return (error); 3083 get_mplock(); 3084 if (fp->f_nchandle.ncp) 3085 error = ncp_writechk(&fp->f_nchandle); 3086 if (error == 0) 3087 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3088 rel_mplock(); 3089 fdrop(fp); 3090 return (error); 3091 } 3092 3093 /* 3094 * fchmodat_args(char *path, int mode) 3095 * 3096 * Change mode of a file pointed to by fd/path. 3097 * 3098 * MPALMOSTSAFE 3099 */ 3100 int 3101 sys_fchmodat(struct fchmodat_args *uap) 3102 { 3103 struct nlookupdata nd; 3104 struct file *fp; 3105 int error; 3106 int flags; 3107 3108 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3109 return (EINVAL); 3110 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3111 3112 get_mplock(); 3113 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3114 UIO_USERSPACE, flags); 3115 if (error == 0) 3116 error = kern_chmod(&nd, uap->mode); 3117 nlookup_done_at(&nd, fp); 3118 rel_mplock(); 3119 return (error); 3120 } 3121 3122 static int 3123 setfown(struct vnode *vp, uid_t uid, gid_t gid) 3124 { 3125 struct thread *td = curthread; 3126 int error; 3127 struct vattr vattr; 3128 3129 /* 3130 * note: vget is required for any operation that might mod the vnode 3131 * so VINACTIVE is properly cleared. 3132 */ 3133 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3134 VATTR_NULL(&vattr); 3135 vattr.va_uid = uid; 3136 vattr.va_gid = gid; 3137 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3138 vput(vp); 3139 } 3140 return error; 3141 } 3142 3143 int 3144 kern_chown(struct nlookupdata *nd, int uid, int gid) 3145 { 3146 struct vnode *vp; 3147 int error; 3148 3149 if ((error = nlookup(nd)) != 0) 3150 return (error); 3151 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3152 return (error); 3153 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3154 error = setfown(vp, uid, gid); 3155 vrele(vp); 3156 return (error); 3157 } 3158 3159 /* 3160 * chown(char *path, int uid, int gid) 3161 * 3162 * Set ownership given a path name. 3163 * 3164 * MPALMOSTSAFE 3165 */ 3166 int 3167 sys_chown(struct chown_args *uap) 3168 { 3169 struct nlookupdata nd; 3170 int error; 3171 3172 get_mplock(); 3173 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3174 if (error == 0) 3175 error = kern_chown(&nd, uap->uid, uap->gid); 3176 nlookup_done(&nd); 3177 rel_mplock(); 3178 return (error); 3179 } 3180 3181 /* 3182 * lchown_args(char *path, int uid, int gid) 3183 * 3184 * Set ownership given a path name, do not cross symlinks. 3185 * 3186 * MPALMOSTSAFE 3187 */ 3188 int 3189 sys_lchown(struct lchown_args *uap) 3190 { 3191 struct nlookupdata nd; 3192 int error; 3193 3194 get_mplock(); 3195 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3196 if (error == 0) 3197 error = kern_chown(&nd, uap->uid, uap->gid); 3198 nlookup_done(&nd); 3199 rel_mplock(); 3200 return (error); 3201 } 3202 3203 /* 3204 * fchown_args(int fd, int uid, int gid) 3205 * 3206 * Set ownership given a file descriptor. 3207 * 3208 * MPALMOSTSAFE 3209 */ 3210 int 3211 sys_fchown(struct fchown_args *uap) 3212 { 3213 struct thread *td = curthread; 3214 struct proc *p = td->td_proc; 3215 struct file *fp; 3216 int error; 3217 3218 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3219 return (error); 3220 get_mplock(); 3221 if (fp->f_nchandle.ncp) 3222 error = ncp_writechk(&fp->f_nchandle); 3223 if (error == 0) 3224 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid); 3225 rel_mplock(); 3226 fdrop(fp); 3227 return (error); 3228 } 3229 3230 /* 3231 * fchownat(int fd, char *path, int uid, int gid, int flags) 3232 * 3233 * Set ownership of file pointed to by fd/path. 3234 * 3235 * MPALMOSTSAFE 3236 */ 3237 int 3238 sys_fchownat(struct fchownat_args *uap) 3239 { 3240 struct nlookupdata nd; 3241 struct file *fp; 3242 int error; 3243 int flags; 3244 3245 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3246 return (EINVAL); 3247 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3248 3249 get_mplock(); 3250 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3251 UIO_USERSPACE, flags); 3252 if (error == 0) 3253 error = kern_chown(&nd, uap->uid, uap->gid); 3254 nlookup_done_at(&nd, fp); 3255 rel_mplock(); 3256 return (error); 3257 } 3258 3259 3260 static int 3261 getutimes(const struct timeval *tvp, struct timespec *tsp) 3262 { 3263 struct timeval tv[2]; 3264 3265 if (tvp == NULL) { 3266 microtime(&tv[0]); 3267 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3268 tsp[1] = tsp[0]; 3269 } else { 3270 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3271 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3272 } 3273 return 0; 3274 } 3275 3276 static int 3277 setutimes(struct vnode *vp, struct vattr *vattr, 3278 const struct timespec *ts, int nullflag) 3279 { 3280 struct thread *td = curthread; 3281 int error; 3282 3283 VATTR_NULL(vattr); 3284 vattr->va_atime = ts[0]; 3285 vattr->va_mtime = ts[1]; 3286 if (nullflag) 3287 vattr->va_vaflags |= VA_UTIMES_NULL; 3288 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3289 3290 return error; 3291 } 3292 3293 int 3294 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3295 { 3296 struct timespec ts[2]; 3297 struct vnode *vp; 3298 struct vattr vattr; 3299 int error; 3300 3301 if ((error = getutimes(tptr, ts)) != 0) 3302 return (error); 3303 3304 /* 3305 * NOTE: utimes() succeeds for the owner even if the file 3306 * is not user-writable. 3307 */ 3308 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3309 3310 if ((error = nlookup(nd)) != 0) 3311 return (error); 3312 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3313 return (error); 3314 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3315 return (error); 3316 3317 /* 3318 * note: vget is required for any operation that might mod the vnode 3319 * so VINACTIVE is properly cleared. 3320 */ 3321 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3322 error = vget(vp, LK_EXCLUSIVE); 3323 if (error == 0) { 3324 error = setutimes(vp, &vattr, ts, (tptr == NULL)); 3325 vput(vp); 3326 } 3327 } 3328 vrele(vp); 3329 return (error); 3330 } 3331 3332 /* 3333 * utimes_args(char *path, struct timeval *tptr) 3334 * 3335 * Set the access and modification times of a file. 3336 * 3337 * MPALMOSTSAFE 3338 */ 3339 int 3340 sys_utimes(struct utimes_args *uap) 3341 { 3342 struct timeval tv[2]; 3343 struct nlookupdata nd; 3344 int error; 3345 3346 if (uap->tptr) { 3347 error = copyin(uap->tptr, tv, sizeof(tv)); 3348 if (error) 3349 return (error); 3350 } 3351 get_mplock(); 3352 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3353 if (error == 0) 3354 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3355 nlookup_done(&nd); 3356 rel_mplock(); 3357 return (error); 3358 } 3359 3360 /* 3361 * lutimes_args(char *path, struct timeval *tptr) 3362 * 3363 * Set the access and modification times of a file. 3364 * 3365 * MPALMOSTSAFE 3366 */ 3367 int 3368 sys_lutimes(struct lutimes_args *uap) 3369 { 3370 struct timeval tv[2]; 3371 struct nlookupdata nd; 3372 int error; 3373 3374 if (uap->tptr) { 3375 error = copyin(uap->tptr, tv, sizeof(tv)); 3376 if (error) 3377 return (error); 3378 } 3379 get_mplock(); 3380 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3381 if (error == 0) 3382 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3383 nlookup_done(&nd); 3384 rel_mplock(); 3385 return (error); 3386 } 3387 3388 /* 3389 * Set utimes on a file descriptor. The creds used to open the 3390 * file are used to determine whether the operation is allowed 3391 * or not. 3392 */ 3393 int 3394 kern_futimes(int fd, struct timeval *tptr) 3395 { 3396 struct thread *td = curthread; 3397 struct proc *p = td->td_proc; 3398 struct timespec ts[2]; 3399 struct file *fp; 3400 struct vnode *vp; 3401 struct vattr vattr; 3402 int error; 3403 3404 error = getutimes(tptr, ts); 3405 if (error) 3406 return (error); 3407 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3408 return (error); 3409 if (fp->f_nchandle.ncp) 3410 error = ncp_writechk(&fp->f_nchandle); 3411 if (error == 0) { 3412 vp = fp->f_data; 3413 error = vget(vp, LK_EXCLUSIVE); 3414 if (error == 0) { 3415 error = VOP_GETATTR(vp, &vattr); 3416 if (error == 0) { 3417 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3418 fp->f_cred); 3419 } 3420 if (error == 0) { 3421 error = setutimes(vp, &vattr, ts, 3422 (tptr == NULL)); 3423 } 3424 vput(vp); 3425 } 3426 } 3427 fdrop(fp); 3428 return (error); 3429 } 3430 3431 /* 3432 * futimes_args(int fd, struct timeval *tptr) 3433 * 3434 * Set the access and modification times of a file. 3435 * 3436 * MPALMOSTSAFE 3437 */ 3438 int 3439 sys_futimes(struct futimes_args *uap) 3440 { 3441 struct timeval tv[2]; 3442 int error; 3443 3444 if (uap->tptr) { 3445 error = copyin(uap->tptr, tv, sizeof(tv)); 3446 if (error) 3447 return (error); 3448 } 3449 get_mplock(); 3450 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3451 rel_mplock(); 3452 3453 return (error); 3454 } 3455 3456 int 3457 kern_truncate(struct nlookupdata *nd, off_t length) 3458 { 3459 struct vnode *vp; 3460 struct vattr vattr; 3461 int error; 3462 3463 if (length < 0) 3464 return(EINVAL); 3465 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3466 if ((error = nlookup(nd)) != 0) 3467 return (error); 3468 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3469 return (error); 3470 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3471 return (error); 3472 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 3473 vrele(vp); 3474 return (error); 3475 } 3476 if (vp->v_type == VDIR) { 3477 error = EISDIR; 3478 } else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3479 VATTR_NULL(&vattr); 3480 vattr.va_size = length; 3481 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3482 } 3483 vput(vp); 3484 return (error); 3485 } 3486 3487 /* 3488 * truncate(char *path, int pad, off_t length) 3489 * 3490 * Truncate a file given its path name. 3491 * 3492 * MPALMOSTSAFE 3493 */ 3494 int 3495 sys_truncate(struct truncate_args *uap) 3496 { 3497 struct nlookupdata nd; 3498 int error; 3499 3500 get_mplock(); 3501 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3502 if (error == 0) 3503 error = kern_truncate(&nd, uap->length); 3504 nlookup_done(&nd); 3505 rel_mplock(); 3506 return error; 3507 } 3508 3509 int 3510 kern_ftruncate(int fd, off_t length) 3511 { 3512 struct thread *td = curthread; 3513 struct proc *p = td->td_proc; 3514 struct vattr vattr; 3515 struct vnode *vp; 3516 struct file *fp; 3517 int error; 3518 3519 if (length < 0) 3520 return(EINVAL); 3521 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3522 return (error); 3523 if (fp->f_nchandle.ncp) { 3524 error = ncp_writechk(&fp->f_nchandle); 3525 if (error) 3526 goto done; 3527 } 3528 if ((fp->f_flag & FWRITE) == 0) { 3529 error = EINVAL; 3530 goto done; 3531 } 3532 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3533 error = EINVAL; 3534 goto done; 3535 } 3536 vp = (struct vnode *)fp->f_data; 3537 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3538 if (vp->v_type == VDIR) { 3539 error = EISDIR; 3540 } else if ((error = vn_writechk(vp, NULL)) == 0) { 3541 VATTR_NULL(&vattr); 3542 vattr.va_size = length; 3543 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3544 } 3545 vn_unlock(vp); 3546 done: 3547 fdrop(fp); 3548 return (error); 3549 } 3550 3551 /* 3552 * ftruncate_args(int fd, int pad, off_t length) 3553 * 3554 * Truncate a file given a file descriptor. 3555 * 3556 * MPALMOSTSAFE 3557 */ 3558 int 3559 sys_ftruncate(struct ftruncate_args *uap) 3560 { 3561 int error; 3562 3563 get_mplock(); 3564 error = kern_ftruncate(uap->fd, uap->length); 3565 rel_mplock(); 3566 3567 return (error); 3568 } 3569 3570 /* 3571 * fsync(int fd) 3572 * 3573 * Sync an open file. 3574 * 3575 * MPALMOSTSAFE 3576 */ 3577 int 3578 sys_fsync(struct fsync_args *uap) 3579 { 3580 struct thread *td = curthread; 3581 struct proc *p = td->td_proc; 3582 struct vnode *vp; 3583 struct file *fp; 3584 vm_object_t obj; 3585 int error; 3586 3587 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3588 return (error); 3589 get_mplock(); 3590 vp = (struct vnode *)fp->f_data; 3591 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3592 if ((obj = vp->v_object) != NULL) 3593 vm_object_page_clean(obj, 0, 0, 0); 3594 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3595 if (error == 0 && vp->v_mount) 3596 error = buf_fsync(vp); 3597 vn_unlock(vp); 3598 rel_mplock(); 3599 fdrop(fp); 3600 3601 return (error); 3602 } 3603 3604 int 3605 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3606 { 3607 struct nchandle fnchd; 3608 struct nchandle tnchd; 3609 struct namecache *ncp; 3610 struct vnode *fdvp; 3611 struct vnode *tdvp; 3612 struct mount *mp; 3613 int error; 3614 3615 bwillinode(1); 3616 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3617 if ((error = nlookup(fromnd)) != 0) 3618 return (error); 3619 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3620 return (ENOENT); 3621 fnchd.mount = fromnd->nl_nch.mount; 3622 cache_hold(&fnchd); 3623 3624 /* 3625 * unlock the source nch so we can lookup the target nch without 3626 * deadlocking. The target may or may not exist so we do not check 3627 * for a target vp like kern_mkdir() and other creation functions do. 3628 * 3629 * The source and target directories are ref'd and rechecked after 3630 * everything is relocked to determine if the source or target file 3631 * has been renamed. 3632 */ 3633 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3634 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3635 cache_unlock(&fromnd->nl_nch); 3636 3637 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3638 if ((error = nlookup(tond)) != 0) { 3639 cache_drop(&fnchd); 3640 return (error); 3641 } 3642 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3643 cache_drop(&fnchd); 3644 return (ENOENT); 3645 } 3646 tnchd.mount = tond->nl_nch.mount; 3647 cache_hold(&tnchd); 3648 3649 /* 3650 * If the source and target are the same there is nothing to do 3651 */ 3652 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3653 cache_drop(&fnchd); 3654 cache_drop(&tnchd); 3655 return (0); 3656 } 3657 3658 /* 3659 * Mount points cannot be renamed or overwritten 3660 */ 3661 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3662 NCF_ISMOUNTPT 3663 ) { 3664 cache_drop(&fnchd); 3665 cache_drop(&tnchd); 3666 return (EINVAL); 3667 } 3668 3669 /* 3670 * Relock the source ncp. cache_relock() will deal with any 3671 * deadlocks against the already-locked tond and will also 3672 * make sure both are resolved. 3673 * 3674 * NOTE AFTER RELOCKING: The source or target ncp may have become 3675 * invalid while they were unlocked, nc_vp and nc_mount could 3676 * be NULL. 3677 */ 3678 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3679 &tond->nl_nch, tond->nl_cred); 3680 fromnd->nl_flags |= NLC_NCPISLOCKED; 3681 3682 /* 3683 * make sure the parent directories linkages are the same 3684 */ 3685 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3686 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3687 cache_drop(&fnchd); 3688 cache_drop(&tnchd); 3689 return (ENOENT); 3690 } 3691 3692 /* 3693 * Both the source and target must be within the same filesystem and 3694 * in the same filesystem as their parent directories within the 3695 * namecache topology. 3696 * 3697 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3698 */ 3699 mp = fnchd.mount; 3700 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3701 mp != tond->nl_nch.mount) { 3702 cache_drop(&fnchd); 3703 cache_drop(&tnchd); 3704 return (EXDEV); 3705 } 3706 3707 /* 3708 * Make sure the mount point is writable 3709 */ 3710 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3711 cache_drop(&fnchd); 3712 cache_drop(&tnchd); 3713 return (error); 3714 } 3715 3716 /* 3717 * If the target exists and either the source or target is a directory, 3718 * then both must be directories. 3719 * 3720 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3721 * have become NULL. 3722 */ 3723 if (tond->nl_nch.ncp->nc_vp) { 3724 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3725 error = ENOENT; 3726 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3727 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3728 error = ENOTDIR; 3729 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3730 error = EISDIR; 3731 } 3732 } 3733 3734 /* 3735 * You cannot rename a source into itself or a subdirectory of itself. 3736 * We check this by travsersing the target directory upwards looking 3737 * for a match against the source. 3738 * 3739 * XXX MPSAFE 3740 */ 3741 if (error == 0) { 3742 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3743 if (fromnd->nl_nch.ncp == ncp) { 3744 error = EINVAL; 3745 break; 3746 } 3747 } 3748 } 3749 3750 cache_drop(&fnchd); 3751 cache_drop(&tnchd); 3752 3753 /* 3754 * Even though the namespaces are different, they may still represent 3755 * hardlinks to the same file. The filesystem might have a hard time 3756 * with this so we issue a NREMOVE of the source instead of a NRENAME 3757 * when we detect the situation. 3758 */ 3759 if (error == 0) { 3760 fdvp = fromnd->nl_dvp; 3761 tdvp = tond->nl_dvp; 3762 if (fdvp == NULL || tdvp == NULL) { 3763 error = EPERM; 3764 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3765 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3766 fromnd->nl_cred); 3767 } else { 3768 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3769 fdvp, tdvp, tond->nl_cred); 3770 } 3771 } 3772 return (error); 3773 } 3774 3775 /* 3776 * rename_args(char *from, char *to) 3777 * 3778 * Rename files. Source and destination must either both be directories, 3779 * or both not be directories. If target is a directory, it must be empty. 3780 * 3781 * MPALMOSTSAFE 3782 */ 3783 int 3784 sys_rename(struct rename_args *uap) 3785 { 3786 struct nlookupdata fromnd, tond; 3787 int error; 3788 3789 get_mplock(); 3790 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3791 if (error == 0) { 3792 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3793 if (error == 0) 3794 error = kern_rename(&fromnd, &tond); 3795 nlookup_done(&tond); 3796 } 3797 nlookup_done(&fromnd); 3798 rel_mplock(); 3799 return (error); 3800 } 3801 3802 /* 3803 * renameat_args(int oldfd, char *old, int newfd, char *new) 3804 * 3805 * Rename files using paths relative to the directories associated with 3806 * oldfd and newfd. Source and destination must either both be directories, 3807 * or both not be directories. If target is a directory, it must be empty. 3808 * 3809 * MPALMOSTSAFE 3810 */ 3811 int 3812 sys_renameat(struct renameat_args *uap) 3813 { 3814 struct nlookupdata oldnd, newnd; 3815 struct file *oldfp, *newfp; 3816 int error; 3817 3818 get_mplock(); 3819 error = nlookup_init_at(&oldnd, &oldfp, uap->oldfd, uap->old, 3820 UIO_USERSPACE, 0); 3821 if (error == 0) { 3822 error = nlookup_init_at(&newnd, &newfp, uap->newfd, uap->new, 3823 UIO_USERSPACE, 0); 3824 if (error == 0) 3825 error = kern_rename(&oldnd, &newnd); 3826 nlookup_done_at(&newnd, newfp); 3827 } 3828 nlookup_done_at(&oldnd, oldfp); 3829 rel_mplock(); 3830 return (error); 3831 } 3832 3833 int 3834 kern_mkdir(struct nlookupdata *nd, int mode) 3835 { 3836 struct thread *td = curthread; 3837 struct proc *p = td->td_proc; 3838 struct vnode *vp; 3839 struct vattr vattr; 3840 int error; 3841 3842 bwillinode(1); 3843 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3844 if ((error = nlookup(nd)) != 0) 3845 return (error); 3846 3847 if (nd->nl_nch.ncp->nc_vp) 3848 return (EEXIST); 3849 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3850 return (error); 3851 VATTR_NULL(&vattr); 3852 vattr.va_type = VDIR; 3853 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3854 3855 vp = NULL; 3856 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 3857 if (error == 0) 3858 vput(vp); 3859 return (error); 3860 } 3861 3862 /* 3863 * mkdir_args(char *path, int mode) 3864 * 3865 * Make a directory file. 3866 * 3867 * MPALMOSTSAFE 3868 */ 3869 int 3870 sys_mkdir(struct mkdir_args *uap) 3871 { 3872 struct nlookupdata nd; 3873 int error; 3874 3875 get_mplock(); 3876 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3877 if (error == 0) 3878 error = kern_mkdir(&nd, uap->mode); 3879 nlookup_done(&nd); 3880 rel_mplock(); 3881 return (error); 3882 } 3883 3884 int 3885 kern_rmdir(struct nlookupdata *nd) 3886 { 3887 int error; 3888 3889 bwillinode(1); 3890 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 3891 if ((error = nlookup(nd)) != 0) 3892 return (error); 3893 3894 /* 3895 * Do not allow directories representing mount points to be 3896 * deleted, even if empty. Check write perms on mount point 3897 * in case the vnode is aliased (aka nullfs). 3898 */ 3899 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 3900 return (EINVAL); 3901 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3902 return (error); 3903 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 3904 return (error); 3905 } 3906 3907 /* 3908 * rmdir_args(char *path) 3909 * 3910 * Remove a directory file. 3911 * 3912 * MPALMOSTSAFE 3913 */ 3914 int 3915 sys_rmdir(struct rmdir_args *uap) 3916 { 3917 struct nlookupdata nd; 3918 int error; 3919 3920 get_mplock(); 3921 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3922 if (error == 0) 3923 error = kern_rmdir(&nd); 3924 nlookup_done(&nd); 3925 rel_mplock(); 3926 return (error); 3927 } 3928 3929 int 3930 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 3931 enum uio_seg direction) 3932 { 3933 struct thread *td = curthread; 3934 struct proc *p = td->td_proc; 3935 struct vnode *vp; 3936 struct file *fp; 3937 struct uio auio; 3938 struct iovec aiov; 3939 off_t loff; 3940 int error, eofflag; 3941 3942 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3943 return (error); 3944 if ((fp->f_flag & FREAD) == 0) { 3945 error = EBADF; 3946 goto done; 3947 } 3948 vp = (struct vnode *)fp->f_data; 3949 unionread: 3950 if (vp->v_type != VDIR) { 3951 error = EINVAL; 3952 goto done; 3953 } 3954 aiov.iov_base = buf; 3955 aiov.iov_len = count; 3956 auio.uio_iov = &aiov; 3957 auio.uio_iovcnt = 1; 3958 auio.uio_rw = UIO_READ; 3959 auio.uio_segflg = direction; 3960 auio.uio_td = td; 3961 auio.uio_resid = count; 3962 loff = auio.uio_offset = fp->f_offset; 3963 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 3964 fp->f_offset = auio.uio_offset; 3965 if (error) 3966 goto done; 3967 if (count == auio.uio_resid) { 3968 if (union_dircheckp) { 3969 error = union_dircheckp(td, &vp, fp); 3970 if (error == -1) 3971 goto unionread; 3972 if (error) 3973 goto done; 3974 } 3975 #if 0 3976 if ((vp->v_flag & VROOT) && 3977 (vp->v_mount->mnt_flag & MNT_UNION)) { 3978 struct vnode *tvp = vp; 3979 vp = vp->v_mount->mnt_vnodecovered; 3980 vref(vp); 3981 fp->f_data = vp; 3982 fp->f_offset = 0; 3983 vrele(tvp); 3984 goto unionread; 3985 } 3986 #endif 3987 } 3988 3989 /* 3990 * WARNING! *basep may not be wide enough to accomodate the 3991 * seek offset. XXX should we hack this to return the upper 32 bits 3992 * for offsets greater then 4G? 3993 */ 3994 if (basep) { 3995 *basep = (long)loff; 3996 } 3997 *res = count - auio.uio_resid; 3998 done: 3999 fdrop(fp); 4000 return (error); 4001 } 4002 4003 /* 4004 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4005 * 4006 * Read a block of directory entries in a file system independent format. 4007 * 4008 * MPALMOSTSAFE 4009 */ 4010 int 4011 sys_getdirentries(struct getdirentries_args *uap) 4012 { 4013 long base; 4014 int error; 4015 4016 get_mplock(); 4017 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4018 &uap->sysmsg_result, UIO_USERSPACE); 4019 rel_mplock(); 4020 4021 if (error == 0 && uap->basep) 4022 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4023 return (error); 4024 } 4025 4026 /* 4027 * getdents_args(int fd, char *buf, size_t count) 4028 * 4029 * MPALMOSTSAFE 4030 */ 4031 int 4032 sys_getdents(struct getdents_args *uap) 4033 { 4034 int error; 4035 4036 get_mplock(); 4037 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4038 &uap->sysmsg_result, UIO_USERSPACE); 4039 rel_mplock(); 4040 4041 return (error); 4042 } 4043 4044 /* 4045 * Set the mode mask for creation of filesystem nodes. 4046 * 4047 * umask(int newmask) 4048 * 4049 * MPSAFE 4050 */ 4051 int 4052 sys_umask(struct umask_args *uap) 4053 { 4054 struct thread *td = curthread; 4055 struct proc *p = td->td_proc; 4056 struct filedesc *fdp; 4057 4058 fdp = p->p_fd; 4059 uap->sysmsg_result = fdp->fd_cmask; 4060 fdp->fd_cmask = uap->newmask & ALLPERMS; 4061 return (0); 4062 } 4063 4064 /* 4065 * revoke(char *path) 4066 * 4067 * Void all references to file by ripping underlying filesystem 4068 * away from vnode. 4069 * 4070 * MPALMOSTSAFE 4071 */ 4072 int 4073 sys_revoke(struct revoke_args *uap) 4074 { 4075 struct nlookupdata nd; 4076 struct vattr vattr; 4077 struct vnode *vp; 4078 struct ucred *cred; 4079 int error; 4080 4081 vp = NULL; 4082 get_mplock(); 4083 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4084 if (error == 0) 4085 error = nlookup(&nd); 4086 if (error == 0) 4087 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4088 cred = crhold(nd.nl_cred); 4089 nlookup_done(&nd); 4090 if (error == 0) { 4091 if (error == 0) 4092 error = VOP_GETATTR(vp, &vattr); 4093 if (error == 0 && cred->cr_uid != vattr.va_uid) 4094 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4095 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4096 if (vcount(vp) > 0) 4097 error = vrevoke(vp, cred); 4098 } else if (error == 0) { 4099 error = vrevoke(vp, cred); 4100 } 4101 vrele(vp); 4102 } 4103 if (cred) 4104 crfree(cred); 4105 rel_mplock(); 4106 return (error); 4107 } 4108 4109 /* 4110 * getfh_args(char *fname, fhandle_t *fhp) 4111 * 4112 * Get (NFS) file handle 4113 * 4114 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4115 * mount. This allows nullfs mounts to be explicitly exported. 4116 * 4117 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4118 * 4119 * nullfs mounts of subdirectories are not safe. That is, it will 4120 * work, but you do not really have protection against access to 4121 * the related parent directories. 4122 * 4123 * MPALMOSTSAFE 4124 */ 4125 int 4126 sys_getfh(struct getfh_args *uap) 4127 { 4128 struct thread *td = curthread; 4129 struct nlookupdata nd; 4130 fhandle_t fh; 4131 struct vnode *vp; 4132 struct mount *mp; 4133 int error; 4134 4135 /* 4136 * Must be super user 4137 */ 4138 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4139 return (error); 4140 4141 vp = NULL; 4142 get_mplock(); 4143 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4144 if (error == 0) 4145 error = nlookup(&nd); 4146 if (error == 0) 4147 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4148 mp = nd.nl_nch.mount; 4149 nlookup_done(&nd); 4150 if (error == 0) { 4151 bzero(&fh, sizeof(fh)); 4152 fh.fh_fsid = mp->mnt_stat.f_fsid; 4153 error = VFS_VPTOFH(vp, &fh.fh_fid); 4154 vput(vp); 4155 if (error == 0) 4156 error = copyout(&fh, uap->fhp, sizeof(fh)); 4157 } 4158 rel_mplock(); 4159 return (error); 4160 } 4161 4162 /* 4163 * fhopen_args(const struct fhandle *u_fhp, int flags) 4164 * 4165 * syscall for the rpc.lockd to use to translate a NFS file handle into 4166 * an open descriptor. 4167 * 4168 * warning: do not remove the priv_check() call or this becomes one giant 4169 * security hole. 4170 * 4171 * MPALMOSTSAFE 4172 */ 4173 int 4174 sys_fhopen(struct fhopen_args *uap) 4175 { 4176 struct thread *td = curthread; 4177 struct filedesc *fdp = td->td_proc->p_fd; 4178 struct mount *mp; 4179 struct vnode *vp; 4180 struct fhandle fhp; 4181 struct vattr vat; 4182 struct vattr *vap = &vat; 4183 struct flock lf; 4184 int fmode, mode, error, type; 4185 struct file *nfp; 4186 struct file *fp; 4187 int indx; 4188 4189 /* 4190 * Must be super user 4191 */ 4192 error = priv_check(td, PRIV_ROOT); 4193 if (error) 4194 return (error); 4195 4196 fmode = FFLAGS(uap->flags); 4197 4198 /* 4199 * Why not allow a non-read/write open for our lockd? 4200 */ 4201 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4202 return (EINVAL); 4203 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4204 if (error) 4205 return(error); 4206 4207 /* 4208 * Find the mount point 4209 */ 4210 get_mplock(); 4211 mp = vfs_getvfs(&fhp.fh_fsid); 4212 if (mp == NULL) { 4213 error = ESTALE; 4214 goto done; 4215 } 4216 /* now give me my vnode, it gets returned to me locked */ 4217 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4218 if (error) 4219 goto done; 4220 /* 4221 * from now on we have to make sure not 4222 * to forget about the vnode 4223 * any error that causes an abort must vput(vp) 4224 * just set error = err and 'goto bad;'. 4225 */ 4226 4227 /* 4228 * from vn_open 4229 */ 4230 if (vp->v_type == VLNK) { 4231 error = EMLINK; 4232 goto bad; 4233 } 4234 if (vp->v_type == VSOCK) { 4235 error = EOPNOTSUPP; 4236 goto bad; 4237 } 4238 mode = 0; 4239 if (fmode & (FWRITE | O_TRUNC)) { 4240 if (vp->v_type == VDIR) { 4241 error = EISDIR; 4242 goto bad; 4243 } 4244 error = vn_writechk(vp, NULL); 4245 if (error) 4246 goto bad; 4247 mode |= VWRITE; 4248 } 4249 if (fmode & FREAD) 4250 mode |= VREAD; 4251 if (mode) { 4252 error = VOP_ACCESS(vp, mode, td->td_ucred); 4253 if (error) 4254 goto bad; 4255 } 4256 if (fmode & O_TRUNC) { 4257 vn_unlock(vp); /* XXX */ 4258 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4259 VATTR_NULL(vap); 4260 vap->va_size = 0; 4261 error = VOP_SETATTR(vp, vap, td->td_ucred); 4262 if (error) 4263 goto bad; 4264 } 4265 4266 /* 4267 * VOP_OPEN needs the file pointer so it can potentially override 4268 * it. 4269 * 4270 * WARNING! no f_nchandle will be associated when fhopen()ing a 4271 * directory. XXX 4272 */ 4273 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4274 goto bad; 4275 fp = nfp; 4276 4277 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4278 if (error) { 4279 /* 4280 * setting f_ops this way prevents VOP_CLOSE from being 4281 * called or fdrop() releasing the vp from v_data. Since 4282 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4283 */ 4284 fp->f_ops = &badfileops; 4285 fp->f_data = NULL; 4286 goto bad_drop; 4287 } 4288 4289 /* 4290 * The fp is given its own reference, we still have our ref and lock. 4291 * 4292 * Assert that all regular files must be created with a VM object. 4293 */ 4294 if (vp->v_type == VREG && vp->v_object == NULL) { 4295 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4296 goto bad_drop; 4297 } 4298 4299 /* 4300 * The open was successful. Handle any locking requirements. 4301 */ 4302 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4303 lf.l_whence = SEEK_SET; 4304 lf.l_start = 0; 4305 lf.l_len = 0; 4306 if (fmode & O_EXLOCK) 4307 lf.l_type = F_WRLCK; 4308 else 4309 lf.l_type = F_RDLCK; 4310 if (fmode & FNONBLOCK) 4311 type = 0; 4312 else 4313 type = F_WAIT; 4314 vn_unlock(vp); 4315 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4316 /* 4317 * release our private reference. 4318 */ 4319 fsetfd(fdp, NULL, indx); 4320 fdrop(fp); 4321 vrele(vp); 4322 goto done; 4323 } 4324 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4325 fp->f_flag |= FHASLOCK; 4326 } 4327 4328 /* 4329 * Clean up. Associate the file pointer with the previously 4330 * reserved descriptor and return it. 4331 */ 4332 vput(vp); 4333 rel_mplock(); 4334 fsetfd(fdp, fp, indx); 4335 fdrop(fp); 4336 uap->sysmsg_result = indx; 4337 return (0); 4338 4339 bad_drop: 4340 fsetfd(fdp, NULL, indx); 4341 fdrop(fp); 4342 bad: 4343 vput(vp); 4344 done: 4345 rel_mplock(); 4346 return (error); 4347 } 4348 4349 /* 4350 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4351 * 4352 * MPALMOSTSAFE 4353 */ 4354 int 4355 sys_fhstat(struct fhstat_args *uap) 4356 { 4357 struct thread *td = curthread; 4358 struct stat sb; 4359 fhandle_t fh; 4360 struct mount *mp; 4361 struct vnode *vp; 4362 int error; 4363 4364 /* 4365 * Must be super user 4366 */ 4367 error = priv_check(td, PRIV_ROOT); 4368 if (error) 4369 return (error); 4370 4371 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4372 if (error) 4373 return (error); 4374 4375 get_mplock(); 4376 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4377 error = ESTALE; 4378 if (error == 0) { 4379 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4380 error = vn_stat(vp, &sb, td->td_ucred); 4381 vput(vp); 4382 } 4383 } 4384 rel_mplock(); 4385 if (error == 0) 4386 error = copyout(&sb, uap->sb, sizeof(sb)); 4387 return (error); 4388 } 4389 4390 /* 4391 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4392 * 4393 * MPALMOSTSAFE 4394 */ 4395 int 4396 sys_fhstatfs(struct fhstatfs_args *uap) 4397 { 4398 struct thread *td = curthread; 4399 struct proc *p = td->td_proc; 4400 struct statfs *sp; 4401 struct mount *mp; 4402 struct vnode *vp; 4403 struct statfs sb; 4404 char *fullpath, *freepath; 4405 fhandle_t fh; 4406 int error; 4407 4408 /* 4409 * Must be super user 4410 */ 4411 if ((error = priv_check(td, PRIV_ROOT))) 4412 return (error); 4413 4414 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4415 return (error); 4416 4417 get_mplock(); 4418 4419 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4420 error = ESTALE; 4421 goto done; 4422 } 4423 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4424 error = ESTALE; 4425 goto done; 4426 } 4427 4428 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4429 goto done; 4430 mp = vp->v_mount; 4431 sp = &mp->mnt_stat; 4432 vput(vp); 4433 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4434 goto done; 4435 4436 error = mount_path(p, mp, &fullpath, &freepath); 4437 if (error) 4438 goto done; 4439 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4440 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4441 kfree(freepath, M_TEMP); 4442 4443 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4444 if (priv_check(td, PRIV_ROOT)) { 4445 bcopy(sp, &sb, sizeof(sb)); 4446 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4447 sp = &sb; 4448 } 4449 error = copyout(sp, uap->buf, sizeof(*sp)); 4450 done: 4451 rel_mplock(); 4452 return (error); 4453 } 4454 4455 /* 4456 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4457 * 4458 * MPALMOSTSAFE 4459 */ 4460 int 4461 sys_fhstatvfs(struct fhstatvfs_args *uap) 4462 { 4463 struct thread *td = curthread; 4464 struct proc *p = td->td_proc; 4465 struct statvfs *sp; 4466 struct mount *mp; 4467 struct vnode *vp; 4468 fhandle_t fh; 4469 int error; 4470 4471 /* 4472 * Must be super user 4473 */ 4474 if ((error = priv_check(td, PRIV_ROOT))) 4475 return (error); 4476 4477 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4478 return (error); 4479 4480 get_mplock(); 4481 4482 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4483 error = ESTALE; 4484 goto done; 4485 } 4486 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4487 error = ESTALE; 4488 goto done; 4489 } 4490 4491 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4492 goto done; 4493 mp = vp->v_mount; 4494 sp = &mp->mnt_vstat; 4495 vput(vp); 4496 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4497 goto done; 4498 4499 sp->f_flag = 0; 4500 if (mp->mnt_flag & MNT_RDONLY) 4501 sp->f_flag |= ST_RDONLY; 4502 if (mp->mnt_flag & MNT_NOSUID) 4503 sp->f_flag |= ST_NOSUID; 4504 error = copyout(sp, uap->buf, sizeof(*sp)); 4505 done: 4506 rel_mplock(); 4507 return (error); 4508 } 4509 4510 4511 /* 4512 * Syscall to push extended attribute configuration information into the 4513 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4514 * a command (int cmd), and attribute name and misc data. For now, the 4515 * attribute name is left in userspace for consumption by the VFS_op. 4516 * It will probably be changed to be copied into sysspace by the 4517 * syscall in the future, once issues with various consumers of the 4518 * attribute code have raised their hands. 4519 * 4520 * Currently this is used only by UFS Extended Attributes. 4521 * 4522 * MPALMOSTSAFE 4523 */ 4524 int 4525 sys_extattrctl(struct extattrctl_args *uap) 4526 { 4527 struct nlookupdata nd; 4528 struct vnode *vp; 4529 char attrname[EXTATTR_MAXNAMELEN]; 4530 int error; 4531 size_t size; 4532 4533 get_mplock(); 4534 4535 attrname[0] = 0; 4536 vp = NULL; 4537 error = 0; 4538 4539 if (error == 0 && uap->filename) { 4540 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4541 NLC_FOLLOW); 4542 if (error == 0) 4543 error = nlookup(&nd); 4544 if (error == 0) 4545 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4546 nlookup_done(&nd); 4547 } 4548 4549 if (error == 0 && uap->attrname) { 4550 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4551 &size); 4552 } 4553 4554 if (error == 0) { 4555 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4556 if (error == 0) 4557 error = nlookup(&nd); 4558 if (error == 0) 4559 error = ncp_writechk(&nd.nl_nch); 4560 if (error == 0) { 4561 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4562 uap->attrnamespace, 4563 uap->attrname, nd.nl_cred); 4564 } 4565 nlookup_done(&nd); 4566 } 4567 4568 rel_mplock(); 4569 4570 return (error); 4571 } 4572 4573 /* 4574 * Syscall to get a named extended attribute on a file or directory. 4575 * 4576 * MPALMOSTSAFE 4577 */ 4578 int 4579 sys_extattr_set_file(struct extattr_set_file_args *uap) 4580 { 4581 char attrname[EXTATTR_MAXNAMELEN]; 4582 struct nlookupdata nd; 4583 struct vnode *vp; 4584 struct uio auio; 4585 struct iovec aiov; 4586 int error; 4587 4588 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4589 if (error) 4590 return (error); 4591 4592 vp = NULL; 4593 get_mplock(); 4594 4595 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4596 if (error == 0) 4597 error = nlookup(&nd); 4598 if (error == 0) 4599 error = ncp_writechk(&nd.nl_nch); 4600 if (error == 0) 4601 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4602 if (error) { 4603 nlookup_done(&nd); 4604 rel_mplock(); 4605 return (error); 4606 } 4607 4608 bzero(&auio, sizeof(auio)); 4609 aiov.iov_base = uap->data; 4610 aiov.iov_len = uap->nbytes; 4611 auio.uio_iov = &aiov; 4612 auio.uio_iovcnt = 1; 4613 auio.uio_offset = 0; 4614 auio.uio_resid = uap->nbytes; 4615 auio.uio_rw = UIO_WRITE; 4616 auio.uio_td = curthread; 4617 4618 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4619 &auio, nd.nl_cred); 4620 4621 vput(vp); 4622 nlookup_done(&nd); 4623 rel_mplock(); 4624 return (error); 4625 } 4626 4627 /* 4628 * Syscall to get a named extended attribute on a file or directory. 4629 * 4630 * MPALMOSTSAFE 4631 */ 4632 int 4633 sys_extattr_get_file(struct extattr_get_file_args *uap) 4634 { 4635 char attrname[EXTATTR_MAXNAMELEN]; 4636 struct nlookupdata nd; 4637 struct uio auio; 4638 struct iovec aiov; 4639 struct vnode *vp; 4640 int error; 4641 4642 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4643 if (error) 4644 return (error); 4645 4646 vp = NULL; 4647 get_mplock(); 4648 4649 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4650 if (error == 0) 4651 error = nlookup(&nd); 4652 if (error == 0) 4653 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4654 if (error) { 4655 nlookup_done(&nd); 4656 rel_mplock(); 4657 return (error); 4658 } 4659 4660 bzero(&auio, sizeof(auio)); 4661 aiov.iov_base = uap->data; 4662 aiov.iov_len = uap->nbytes; 4663 auio.uio_iov = &aiov; 4664 auio.uio_iovcnt = 1; 4665 auio.uio_offset = 0; 4666 auio.uio_resid = uap->nbytes; 4667 auio.uio_rw = UIO_READ; 4668 auio.uio_td = curthread; 4669 4670 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4671 &auio, nd.nl_cred); 4672 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4673 4674 vput(vp); 4675 nlookup_done(&nd); 4676 rel_mplock(); 4677 return(error); 4678 } 4679 4680 /* 4681 * Syscall to delete a named extended attribute from a file or directory. 4682 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4683 * 4684 * MPALMOSTSAFE 4685 */ 4686 int 4687 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4688 { 4689 char attrname[EXTATTR_MAXNAMELEN]; 4690 struct nlookupdata nd; 4691 struct vnode *vp; 4692 int error; 4693 4694 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4695 if (error) 4696 return(error); 4697 4698 get_mplock(); 4699 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4700 if (error == 0) 4701 error = nlookup(&nd); 4702 if (error == 0) 4703 error = ncp_writechk(&nd.nl_nch); 4704 if (error == 0) { 4705 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4706 if (error == 0) { 4707 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4708 attrname, NULL, nd.nl_cred); 4709 vput(vp); 4710 } 4711 } 4712 nlookup_done(&nd); 4713 rel_mplock(); 4714 return(error); 4715 } 4716 4717 /* 4718 * Determine if the mount is visible to the process. 4719 */ 4720 static int 4721 chroot_visible_mnt(struct mount *mp, struct proc *p) 4722 { 4723 struct nchandle nch; 4724 4725 /* 4726 * Traverse from the mount point upwards. If we hit the process 4727 * root then the mount point is visible to the process. 4728 */ 4729 nch = mp->mnt_ncmountpt; 4730 while (nch.ncp) { 4731 if (nch.mount == p->p_fd->fd_nrdir.mount && 4732 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4733 return(1); 4734 } 4735 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4736 nch = nch.mount->mnt_ncmounton; 4737 } else { 4738 nch.ncp = nch.ncp->nc_parent; 4739 } 4740 } 4741 4742 /* 4743 * If the mount point is not visible to the process, but the 4744 * process root is in a subdirectory of the mount, return 4745 * TRUE anyway. 4746 */ 4747 if (p->p_fd->fd_nrdir.mount == mp) 4748 return(1); 4749 4750 return(0); 4751 } 4752 4753