1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.135 2008/11/11 00:55:49 pavalos Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/conf.h> 47 #include <sys/sysent.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mountctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/filedesc.h> 53 #include <sys/kernel.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/linker.h> 57 #include <sys/stat.h> 58 #include <sys/unistd.h> 59 #include <sys/vnode.h> 60 #include <sys/proc.h> 61 #include <sys/priv.h> 62 #include <sys/jail.h> 63 #include <sys/namei.h> 64 #include <sys/nlookup.h> 65 #include <sys/dirent.h> 66 #include <sys/extattr.h> 67 #include <sys/spinlock.h> 68 #include <sys/kern_syscall.h> 69 #include <sys/objcache.h> 70 #include <sys/sysctl.h> 71 72 #include <sys/buf2.h> 73 #include <sys/file2.h> 74 #include <sys/spinlock2.h> 75 #include <sys/mplock2.h> 76 77 #include <vm/vm.h> 78 #include <vm/vm_object.h> 79 #include <vm/vm_page.h> 80 81 #include <machine/limits.h> 82 #include <machine/stdarg.h> 83 84 #include <vfs/union/union.h> 85 86 static void mount_warning(struct mount *mp, const char *ctl, ...); 87 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 88 static int checkvp_chdir (struct vnode *vn, struct thread *td); 89 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 90 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 91 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 92 static int getutimes (const struct timeval *, struct timespec *); 93 static int setfown (struct vnode *, uid_t, gid_t); 94 static int setfmode (struct vnode *, int); 95 static int setfflags (struct vnode *, int); 96 static int setutimes (struct vnode *, struct vattr *, 97 const struct timespec *, int); 98 static int usermount = 0; /* if 1, non-root can mount fs. */ 99 100 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 101 102 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 103 104 /* 105 * Virtual File System System Calls 106 */ 107 108 /* 109 * Mount a file system. 110 * 111 * mount_args(char *type, char *path, int flags, caddr_t data) 112 * 113 * MPALMOSTSAFE 114 */ 115 int 116 sys_mount(struct mount_args *uap) 117 { 118 struct thread *td = curthread; 119 struct vnode *vp; 120 struct nchandle nch; 121 struct mount *mp, *nullmp; 122 struct vfsconf *vfsp; 123 int error, flag = 0, flag2 = 0; 124 int hasmount; 125 struct vattr va; 126 struct nlookupdata nd; 127 char fstypename[MFSNAMELEN]; 128 struct ucred *cred; 129 130 get_mplock(); 131 cred = td->td_ucred; 132 if (jailed(cred)) { 133 error = EPERM; 134 goto done; 135 } 136 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 137 goto done; 138 139 /* 140 * Do not allow NFS export by non-root users. 141 */ 142 if (uap->flags & MNT_EXPORTED) { 143 error = priv_check(td, PRIV_ROOT); 144 if (error) 145 goto done; 146 } 147 /* 148 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 149 */ 150 if (priv_check(td, PRIV_ROOT)) 151 uap->flags |= MNT_NOSUID | MNT_NODEV; 152 153 /* 154 * Lookup the requested path and extract the nch and vnode. 155 */ 156 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 157 if (error == 0) { 158 if ((error = nlookup(&nd)) == 0) { 159 if (nd.nl_nch.ncp->nc_vp == NULL) 160 error = ENOENT; 161 } 162 } 163 if (error) { 164 nlookup_done(&nd); 165 goto done; 166 } 167 168 /* 169 * If the target filesystem is resolved via a nullfs mount, then 170 * nd.nl_nch.mount will be pointing to the nullfs mount structure 171 * instead of the target file system. We need it in case we are 172 * doing an update. 173 */ 174 nullmp = nd.nl_nch.mount; 175 176 /* 177 * Extract the locked+refd ncp and cleanup the nd structure 178 */ 179 nch = nd.nl_nch; 180 cache_zero(&nd.nl_nch); 181 nlookup_done(&nd); 182 183 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch)) 184 hasmount = 1; 185 else 186 hasmount = 0; 187 188 189 /* 190 * now we have the locked ref'd nch and unreferenced vnode. 191 */ 192 vp = nch.ncp->nc_vp; 193 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 194 cache_put(&nch); 195 goto done; 196 } 197 cache_unlock(&nch); 198 199 /* 200 * Extract the file system type. We need to know this early, to take 201 * appropriate actions if we are dealing with a nullfs. 202 */ 203 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 204 cache_drop(&nch); 205 vput(vp); 206 goto done; 207 } 208 209 /* 210 * Now we have an unlocked ref'd nch and a locked ref'd vp 211 */ 212 if (uap->flags & MNT_UPDATE) { 213 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 214 cache_drop(&nch); 215 vput(vp); 216 error = EINVAL; 217 goto done; 218 } 219 220 if (strncmp(fstypename, "null", 5) == 0) { 221 KKASSERT(nullmp); 222 mp = nullmp; 223 } else { 224 mp = vp->v_mount; 225 } 226 227 flag = mp->mnt_flag; 228 flag2 = mp->mnt_kern_flag; 229 /* 230 * We only allow the filesystem to be reloaded if it 231 * is currently mounted read-only. 232 */ 233 if ((uap->flags & MNT_RELOAD) && 234 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 235 cache_drop(&nch); 236 vput(vp); 237 error = EOPNOTSUPP; /* Needs translation */ 238 goto done; 239 } 240 /* 241 * Only root, or the user that did the original mount is 242 * permitted to update it. 243 */ 244 if (mp->mnt_stat.f_owner != cred->cr_uid && 245 (error = priv_check(td, PRIV_ROOT))) { 246 cache_drop(&nch); 247 vput(vp); 248 goto done; 249 } 250 if (vfs_busy(mp, LK_NOWAIT)) { 251 cache_drop(&nch); 252 vput(vp); 253 error = EBUSY; 254 goto done; 255 } 256 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 257 cache_drop(&nch); 258 vfs_unbusy(mp); 259 vput(vp); 260 error = EBUSY; 261 goto done; 262 } 263 vsetflags(vp, VMOUNT); 264 mp->mnt_flag |= 265 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 266 vn_unlock(vp); 267 goto update; 268 } 269 /* 270 * If the user is not root, ensure that they own the directory 271 * onto which we are attempting to mount. 272 */ 273 if ((error = VOP_GETATTR(vp, &va)) || 274 (va.va_uid != cred->cr_uid && (error = priv_check(td, PRIV_ROOT)))) { 275 cache_drop(&nch); 276 vput(vp); 277 goto done; 278 } 279 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 280 cache_drop(&nch); 281 vput(vp); 282 goto done; 283 } 284 if (vp->v_type != VDIR) { 285 cache_drop(&nch); 286 vput(vp); 287 error = ENOTDIR; 288 goto done; 289 } 290 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 291 cache_drop(&nch); 292 vput(vp); 293 error = EPERM; 294 goto done; 295 } 296 vfsp = vfsconf_find_by_name(fstypename); 297 if (vfsp == NULL) { 298 linker_file_t lf; 299 300 /* Only load modules for root (very important!) */ 301 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 302 cache_drop(&nch); 303 vput(vp); 304 goto done; 305 } 306 error = linker_load_file(fstypename, &lf); 307 if (error || lf == NULL) { 308 cache_drop(&nch); 309 vput(vp); 310 if (lf == NULL) 311 error = ENODEV; 312 goto done; 313 } 314 lf->userrefs++; 315 /* lookup again, see if the VFS was loaded */ 316 vfsp = vfsconf_find_by_name(fstypename); 317 if (vfsp == NULL) { 318 lf->userrefs--; 319 linker_file_unload(lf); 320 cache_drop(&nch); 321 vput(vp); 322 error = ENODEV; 323 goto done; 324 } 325 } 326 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 327 cache_drop(&nch); 328 vput(vp); 329 error = EBUSY; 330 goto done; 331 } 332 vsetflags(vp, VMOUNT); 333 334 /* 335 * Allocate and initialize the filesystem. 336 */ 337 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 338 mount_init(mp); 339 vfs_busy(mp, LK_NOWAIT); 340 mp->mnt_op = vfsp->vfc_vfsops; 341 mp->mnt_vfc = vfsp; 342 vfsp->vfc_refcount++; 343 mp->mnt_stat.f_type = vfsp->vfc_typenum; 344 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 345 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 346 mp->mnt_stat.f_owner = cred->cr_uid; 347 vn_unlock(vp); 348 update: 349 /* 350 * Set the mount level flags. 351 */ 352 if (uap->flags & MNT_RDONLY) 353 mp->mnt_flag |= MNT_RDONLY; 354 else if (mp->mnt_flag & MNT_RDONLY) 355 mp->mnt_kern_flag |= MNTK_WANTRDWR; 356 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 357 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 358 MNT_NOSYMFOLLOW | MNT_IGNORE | 359 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 360 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 361 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 362 MNT_NOSYMFOLLOW | MNT_IGNORE | 363 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 364 /* 365 * Mount the filesystem. 366 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 367 * get. 368 */ 369 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 370 if (mp->mnt_flag & MNT_UPDATE) { 371 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 372 mp->mnt_flag &= ~MNT_RDONLY; 373 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 374 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 375 if (error) { 376 mp->mnt_flag = flag; 377 mp->mnt_kern_flag = flag2; 378 } 379 vfs_unbusy(mp); 380 vclrflags(vp, VMOUNT); 381 vrele(vp); 382 cache_drop(&nch); 383 goto done; 384 } 385 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 386 /* 387 * Put the new filesystem on the mount list after root. The mount 388 * point gets its own mnt_ncmountpt (unless the VFS already set one 389 * up) which represents the root of the mount. The lookup code 390 * detects the mount point going forward and checks the root of 391 * the mount going backwards. 392 * 393 * It is not necessary to invalidate or purge the vnode underneath 394 * because elements under the mount will be given their own glue 395 * namecache record. 396 */ 397 if (!error) { 398 if (mp->mnt_ncmountpt.ncp == NULL) { 399 /* 400 * allocate, then unlock, but leave the ref intact 401 */ 402 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 403 cache_unlock(&mp->mnt_ncmountpt); 404 } 405 mp->mnt_ncmounton = nch; /* inherits ref */ 406 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 407 408 /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ 409 vclrflags(vp, VMOUNT); 410 mountlist_insert(mp, MNTINS_LAST); 411 vn_unlock(vp); 412 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 413 error = vfs_allocate_syncvnode(mp); 414 vfs_unbusy(mp); 415 error = VFS_START(mp, 0); 416 vrele(vp); 417 } else { 418 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 419 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 420 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 421 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 422 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 423 vclrflags(vp, VMOUNT); 424 mp->mnt_vfc->vfc_refcount--; 425 vfs_unbusy(mp); 426 kfree(mp, M_MOUNT); 427 cache_drop(&nch); 428 vput(vp); 429 } 430 done: 431 rel_mplock(); 432 return (error); 433 } 434 435 /* 436 * Scan all active processes to see if any of them have a current 437 * or root directory onto which the new filesystem has just been 438 * mounted. If so, replace them with the new mount point. 439 * 440 * The passed ncp is ref'd and locked (from the mount code) and 441 * must be associated with the vnode representing the root of the 442 * mount point. 443 */ 444 struct checkdirs_info { 445 struct nchandle old_nch; 446 struct nchandle new_nch; 447 struct vnode *old_vp; 448 struct vnode *new_vp; 449 }; 450 451 static int checkdirs_callback(struct proc *p, void *data); 452 453 static void 454 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 455 { 456 struct checkdirs_info info; 457 struct vnode *olddp; 458 struct vnode *newdp; 459 struct mount *mp; 460 461 /* 462 * If the old mount point's vnode has a usecount of 1, it is not 463 * being held as a descriptor anywhere. 464 */ 465 olddp = old_nch->ncp->nc_vp; 466 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 467 return; 468 469 /* 470 * Force the root vnode of the new mount point to be resolved 471 * so we can update any matching processes. 472 */ 473 mp = new_nch->mount; 474 if (VFS_ROOT(mp, &newdp)) 475 panic("mount: lost mount"); 476 cache_setunresolved(new_nch); 477 cache_setvp(new_nch, newdp); 478 479 /* 480 * Special handling of the root node 481 */ 482 if (rootvnode == olddp) { 483 vref(newdp); 484 vfs_cache_setroot(newdp, cache_hold(new_nch)); 485 } 486 487 /* 488 * Pass newdp separately so the callback does not have to access 489 * it via new_nch->ncp->nc_vp. 490 */ 491 info.old_nch = *old_nch; 492 info.new_nch = *new_nch; 493 info.new_vp = newdp; 494 allproc_scan(checkdirs_callback, &info); 495 vput(newdp); 496 } 497 498 /* 499 * NOTE: callback is not MP safe because the scanned process's filedesc 500 * structure can be ripped out from under us, amoung other things. 501 */ 502 static int 503 checkdirs_callback(struct proc *p, void *data) 504 { 505 struct checkdirs_info *info = data; 506 struct filedesc *fdp; 507 struct nchandle ncdrop1; 508 struct nchandle ncdrop2; 509 struct vnode *vprele1; 510 struct vnode *vprele2; 511 512 if ((fdp = p->p_fd) != NULL) { 513 cache_zero(&ncdrop1); 514 cache_zero(&ncdrop2); 515 vprele1 = NULL; 516 vprele2 = NULL; 517 518 /* 519 * MPUNSAFE - XXX fdp can be pulled out from under a 520 * foreign process. 521 * 522 * A shared filedesc is ok, we don't have to copy it 523 * because we are making this change globally. 524 */ 525 spin_lock_wr(&fdp->fd_spin); 526 if (fdp->fd_ncdir.mount == info->old_nch.mount && 527 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 528 vprele1 = fdp->fd_cdir; 529 vref(info->new_vp); 530 fdp->fd_cdir = info->new_vp; 531 ncdrop1 = fdp->fd_ncdir; 532 cache_copy(&info->new_nch, &fdp->fd_ncdir); 533 } 534 if (fdp->fd_nrdir.mount == info->old_nch.mount && 535 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 536 vprele2 = fdp->fd_rdir; 537 vref(info->new_vp); 538 fdp->fd_rdir = info->new_vp; 539 ncdrop2 = fdp->fd_nrdir; 540 cache_copy(&info->new_nch, &fdp->fd_nrdir); 541 } 542 spin_unlock_wr(&fdp->fd_spin); 543 if (ncdrop1.ncp) 544 cache_drop(&ncdrop1); 545 if (ncdrop2.ncp) 546 cache_drop(&ncdrop2); 547 if (vprele1) 548 vrele(vprele1); 549 if (vprele2) 550 vrele(vprele2); 551 } 552 return(0); 553 } 554 555 /* 556 * Unmount a file system. 557 * 558 * Note: unmount takes a path to the vnode mounted on as argument, 559 * not special file (as before). 560 * 561 * umount_args(char *path, int flags) 562 * 563 * MPALMOSTSAFE 564 */ 565 int 566 sys_unmount(struct unmount_args *uap) 567 { 568 struct thread *td = curthread; 569 struct proc *p __debugvar = td->td_proc; 570 struct mount *mp = NULL; 571 struct nlookupdata nd; 572 int error; 573 574 KKASSERT(p); 575 get_mplock(); 576 if (td->td_ucred->cr_prison != NULL) { 577 error = EPERM; 578 goto done; 579 } 580 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 581 goto done; 582 583 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 584 if (error == 0) 585 error = nlookup(&nd); 586 if (error) 587 goto out; 588 589 mp = nd.nl_nch.mount; 590 591 /* 592 * Only root, or the user that did the original mount is 593 * permitted to unmount this filesystem. 594 */ 595 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 596 (error = priv_check(td, PRIV_ROOT))) 597 goto out; 598 599 /* 600 * Don't allow unmounting the root file system. 601 */ 602 if (mp->mnt_flag & MNT_ROOTFS) { 603 error = EINVAL; 604 goto out; 605 } 606 607 /* 608 * Must be the root of the filesystem 609 */ 610 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 611 error = EINVAL; 612 goto out; 613 } 614 615 out: 616 nlookup_done(&nd); 617 if (error == 0) 618 error = dounmount(mp, uap->flags); 619 done: 620 rel_mplock(); 621 return (error); 622 } 623 624 /* 625 * Do the actual file system unmount. 626 */ 627 static int 628 dounmount_interlock(struct mount *mp) 629 { 630 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 631 return (EBUSY); 632 mp->mnt_kern_flag |= MNTK_UNMOUNT; 633 return(0); 634 } 635 636 int 637 dounmount(struct mount *mp, int flags) 638 { 639 struct namecache *ncp; 640 struct nchandle nch; 641 struct vnode *vp; 642 int error; 643 int async_flag; 644 int lflags; 645 int freeok = 1; 646 647 /* 648 * Exclusive access for unmounting purposes 649 */ 650 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 651 return (error); 652 653 /* 654 * Allow filesystems to detect that a forced unmount is in progress. 655 */ 656 if (flags & MNT_FORCE) 657 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 658 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 659 error = lockmgr(&mp->mnt_lock, lflags); 660 if (error) { 661 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 662 if (mp->mnt_kern_flag & MNTK_MWAIT) 663 wakeup(mp); 664 return (error); 665 } 666 667 if (mp->mnt_flag & MNT_EXPUBLIC) 668 vfs_setpublicfs(NULL, NULL, NULL); 669 670 vfs_msync(mp, MNT_WAIT); 671 async_flag = mp->mnt_flag & MNT_ASYNC; 672 mp->mnt_flag &=~ MNT_ASYNC; 673 674 /* 675 * If this filesystem isn't aliasing other filesystems, 676 * try to invalidate any remaining namecache entries and 677 * check the count afterwords. 678 */ 679 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 680 cache_lock(&mp->mnt_ncmountpt); 681 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 682 cache_unlock(&mp->mnt_ncmountpt); 683 684 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 685 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 686 687 if ((flags & MNT_FORCE) == 0) { 688 error = EBUSY; 689 mount_warning(mp, "Cannot unmount: " 690 "%d namecache " 691 "references still " 692 "present", 693 ncp->nc_refs - 1); 694 } else { 695 mount_warning(mp, "Forced unmount: " 696 "%d namecache " 697 "references still " 698 "present", 699 ncp->nc_refs - 1); 700 freeok = 0; 701 } 702 } 703 } 704 705 /* 706 * nchandle records ref the mount structure. Expect a count of 1 707 * (our mount->mnt_ncmountpt). 708 */ 709 if (mp->mnt_refs != 1) { 710 if ((flags & MNT_FORCE) == 0) { 711 mount_warning(mp, "Cannot unmount: " 712 "%d process references still " 713 "present", mp->mnt_refs); 714 error = EBUSY; 715 } else { 716 mount_warning(mp, "Forced unmount: " 717 "%d process references still " 718 "present", mp->mnt_refs); 719 freeok = 0; 720 } 721 } 722 723 /* 724 * Decomission our special mnt_syncer vnode. This also stops 725 * the vnlru code. If we are unable to unmount we recommission 726 * the vnode. 727 */ 728 if (error == 0) { 729 if ((vp = mp->mnt_syncer) != NULL) { 730 mp->mnt_syncer = NULL; 731 vrele(vp); 732 } 733 if (((mp->mnt_flag & MNT_RDONLY) || 734 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 735 (flags & MNT_FORCE)) { 736 error = VFS_UNMOUNT(mp, flags); 737 } 738 } 739 if (error) { 740 if (mp->mnt_syncer == NULL) 741 vfs_allocate_syncvnode(mp); 742 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 743 mp->mnt_flag |= async_flag; 744 lockmgr(&mp->mnt_lock, LK_RELEASE); 745 if (mp->mnt_kern_flag & MNTK_MWAIT) 746 wakeup(mp); 747 return (error); 748 } 749 /* 750 * Clean up any journals still associated with the mount after 751 * filesystem activity has ceased. 752 */ 753 journal_remove_all_journals(mp, 754 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 755 756 mountlist_remove(mp); 757 758 /* 759 * Remove any installed vnode ops here so the individual VFSs don't 760 * have to. 761 */ 762 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 763 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 764 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 765 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 766 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 767 768 if (mp->mnt_ncmountpt.ncp != NULL) { 769 nch = mp->mnt_ncmountpt; 770 cache_zero(&mp->mnt_ncmountpt); 771 cache_clrmountpt(&nch); 772 cache_drop(&nch); 773 } 774 if (mp->mnt_ncmounton.ncp != NULL) { 775 nch = mp->mnt_ncmounton; 776 cache_zero(&mp->mnt_ncmounton); 777 cache_clrmountpt(&nch); 778 cache_drop(&nch); 779 } 780 781 mp->mnt_vfc->vfc_refcount--; 782 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 783 panic("unmount: dangling vnode"); 784 lockmgr(&mp->mnt_lock, LK_RELEASE); 785 if (mp->mnt_kern_flag & MNTK_MWAIT) 786 wakeup(mp); 787 if (freeok) 788 kfree(mp, M_MOUNT); 789 return (0); 790 } 791 792 static 793 void 794 mount_warning(struct mount *mp, const char *ctl, ...) 795 { 796 char *ptr; 797 char *buf; 798 __va_list va; 799 800 __va_start(va, ctl); 801 if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf, 0) == 0) { 802 kprintf("unmount(%s): ", ptr); 803 kvprintf(ctl, va); 804 kprintf("\n"); 805 kfree(buf, M_TEMP); 806 } else { 807 kprintf("unmount(%p", mp); 808 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 809 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 810 kprintf("): "); 811 kvprintf(ctl, va); 812 kprintf("\n"); 813 } 814 __va_end(va); 815 } 816 817 /* 818 * Shim cache_fullpath() to handle the case where a process is chrooted into 819 * a subdirectory of a mount. In this case if the root mount matches the 820 * process root directory's mount we have to specify the process's root 821 * directory instead of the mount point, because the mount point might 822 * be above the root directory. 823 */ 824 static 825 int 826 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 827 { 828 struct nchandle *nch; 829 830 if (p && p->p_fd->fd_nrdir.mount == mp) 831 nch = &p->p_fd->fd_nrdir; 832 else 833 nch = &mp->mnt_ncmountpt; 834 return(cache_fullpath(p, nch, rb, fb, 0)); 835 } 836 837 /* 838 * Sync each mounted filesystem. 839 */ 840 841 #ifdef DEBUG 842 static int syncprt = 0; 843 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 844 #endif /* DEBUG */ 845 846 static int sync_callback(struct mount *mp, void *data); 847 848 /* 849 * MPALMOSTSAFE 850 */ 851 int 852 sys_sync(struct sync_args *uap) 853 { 854 get_mplock(); 855 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 856 #ifdef DEBUG 857 /* 858 * print out buffer pool stat information on each sync() call. 859 */ 860 if (syncprt) 861 vfs_bufstats(); 862 #endif /* DEBUG */ 863 rel_mplock(); 864 return (0); 865 } 866 867 static 868 int 869 sync_callback(struct mount *mp, void *data __unused) 870 { 871 int asyncflag; 872 873 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 874 asyncflag = mp->mnt_flag & MNT_ASYNC; 875 mp->mnt_flag &= ~MNT_ASYNC; 876 vfs_msync(mp, MNT_NOWAIT); 877 VFS_SYNC(mp, MNT_NOWAIT); 878 mp->mnt_flag |= asyncflag; 879 } 880 return(0); 881 } 882 883 /* XXX PRISON: could be per prison flag */ 884 static int prison_quotas; 885 #if 0 886 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 887 #endif 888 889 /* 890 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 891 * 892 * Change filesystem quotas. 893 * 894 * MPALMOSTSAFE 895 */ 896 int 897 sys_quotactl(struct quotactl_args *uap) 898 { 899 struct nlookupdata nd; 900 struct thread *td; 901 struct proc *p; 902 struct mount *mp; 903 int error; 904 905 get_mplock(); 906 td = curthread; 907 p = td->td_proc; 908 if (td->td_ucred->cr_prison && !prison_quotas) { 909 error = EPERM; 910 goto done; 911 } 912 913 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 914 if (error == 0) 915 error = nlookup(&nd); 916 if (error == 0) { 917 mp = nd.nl_nch.mount; 918 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 919 uap->arg, nd.nl_cred); 920 } 921 nlookup_done(&nd); 922 done: 923 rel_mplock(); 924 return (error); 925 } 926 927 /* 928 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 929 * void *buf, int buflen) 930 * 931 * This function operates on a mount point and executes the specified 932 * operation using the specified control data, and possibly returns data. 933 * 934 * The actual number of bytes stored in the result buffer is returned, 0 935 * if none, otherwise an error is returned. 936 * 937 * MPALMOSTSAFE 938 */ 939 int 940 sys_mountctl(struct mountctl_args *uap) 941 { 942 struct thread *td = curthread; 943 struct proc *p = td->td_proc; 944 struct file *fp; 945 void *ctl = NULL; 946 void *buf = NULL; 947 char *path = NULL; 948 int error; 949 950 /* 951 * Sanity and permissions checks. We must be root. 952 */ 953 KKASSERT(p); 954 if (td->td_ucred->cr_prison != NULL) 955 return (EPERM); 956 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 957 (error = priv_check(td, PRIV_ROOT)) != 0) 958 return (error); 959 960 /* 961 * Argument length checks 962 */ 963 if (uap->ctllen < 0 || uap->ctllen > 1024) 964 return (EINVAL); 965 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 966 return (EINVAL); 967 if (uap->path == NULL) 968 return (EINVAL); 969 970 /* 971 * Allocate the necessary buffers and copyin data 972 */ 973 path = objcache_get(namei_oc, M_WAITOK); 974 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 975 if (error) 976 goto done; 977 978 if (uap->ctllen) { 979 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 980 error = copyin(uap->ctl, ctl, uap->ctllen); 981 if (error) 982 goto done; 983 } 984 if (uap->buflen) 985 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 986 987 /* 988 * Validate the descriptor 989 */ 990 if (uap->fd >= 0) { 991 fp = holdfp(p->p_fd, uap->fd, -1); 992 if (fp == NULL) { 993 error = EBADF; 994 goto done; 995 } 996 } else { 997 fp = NULL; 998 } 999 1000 /* 1001 * Execute the internal kernel function and clean up. 1002 */ 1003 get_mplock(); 1004 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1005 rel_mplock(); 1006 if (fp) 1007 fdrop(fp); 1008 if (error == 0 && uap->sysmsg_result > 0) 1009 error = copyout(buf, uap->buf, uap->sysmsg_result); 1010 done: 1011 if (path) 1012 objcache_put(namei_oc, path); 1013 if (ctl) 1014 kfree(ctl, M_TEMP); 1015 if (buf) 1016 kfree(buf, M_TEMP); 1017 return (error); 1018 } 1019 1020 /* 1021 * Execute a mount control operation by resolving the path to a mount point 1022 * and calling vop_mountctl(). 1023 * 1024 * Use the mount point from the nch instead of the vnode so nullfs mounts 1025 * can properly spike the VOP. 1026 */ 1027 int 1028 kern_mountctl(const char *path, int op, struct file *fp, 1029 const void *ctl, int ctllen, 1030 void *buf, int buflen, int *res) 1031 { 1032 struct vnode *vp; 1033 struct mount *mp; 1034 struct nlookupdata nd; 1035 int error; 1036 1037 *res = 0; 1038 vp = NULL; 1039 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1040 if (error == 0) 1041 error = nlookup(&nd); 1042 if (error == 0) 1043 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1044 mp = nd.nl_nch.mount; 1045 nlookup_done(&nd); 1046 if (error) 1047 return (error); 1048 vn_unlock(vp); 1049 1050 /* 1051 * Must be the root of the filesystem 1052 */ 1053 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1054 vrele(vp); 1055 return (EINVAL); 1056 } 1057 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1058 buf, buflen, res); 1059 vrele(vp); 1060 return (error); 1061 } 1062 1063 int 1064 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1065 { 1066 struct thread *td = curthread; 1067 struct proc *p = td->td_proc; 1068 struct mount *mp; 1069 struct statfs *sp; 1070 char *fullpath, *freepath; 1071 int error; 1072 1073 if ((error = nlookup(nd)) != 0) 1074 return (error); 1075 mp = nd->nl_nch.mount; 1076 sp = &mp->mnt_stat; 1077 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1078 return (error); 1079 1080 error = mount_path(p, mp, &fullpath, &freepath); 1081 if (error) 1082 return(error); 1083 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1084 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1085 kfree(freepath, M_TEMP); 1086 1087 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1088 bcopy(sp, buf, sizeof(*buf)); 1089 /* Only root should have access to the fsid's. */ 1090 if (priv_check(td, PRIV_ROOT)) 1091 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1092 return (0); 1093 } 1094 1095 /* 1096 * statfs_args(char *path, struct statfs *buf) 1097 * 1098 * Get filesystem statistics. 1099 * 1100 * MPALMOSTSAFE 1101 */ 1102 int 1103 sys_statfs(struct statfs_args *uap) 1104 { 1105 struct nlookupdata nd; 1106 struct statfs buf; 1107 int error; 1108 1109 get_mplock(); 1110 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1111 if (error == 0) 1112 error = kern_statfs(&nd, &buf); 1113 nlookup_done(&nd); 1114 if (error == 0) 1115 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1116 rel_mplock(); 1117 return (error); 1118 } 1119 1120 /* 1121 * MPALMOSTSAFE 1122 */ 1123 int 1124 kern_fstatfs(int fd, struct statfs *buf) 1125 { 1126 struct thread *td = curthread; 1127 struct proc *p = td->td_proc; 1128 struct file *fp; 1129 struct mount *mp; 1130 struct statfs *sp; 1131 char *fullpath, *freepath; 1132 int error; 1133 1134 KKASSERT(p); 1135 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1136 return (error); 1137 get_mplock(); 1138 mp = ((struct vnode *)fp->f_data)->v_mount; 1139 if (mp == NULL) { 1140 error = EBADF; 1141 goto done; 1142 } 1143 if (fp->f_cred == NULL) { 1144 error = EINVAL; 1145 goto done; 1146 } 1147 sp = &mp->mnt_stat; 1148 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1149 goto done; 1150 1151 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1152 goto done; 1153 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1154 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1155 kfree(freepath, M_TEMP); 1156 1157 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1158 bcopy(sp, buf, sizeof(*buf)); 1159 1160 /* Only root should have access to the fsid's. */ 1161 if (priv_check(td, PRIV_ROOT)) 1162 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1163 error = 0; 1164 done: 1165 rel_mplock(); 1166 fdrop(fp); 1167 return (error); 1168 } 1169 1170 /* 1171 * fstatfs_args(int fd, struct statfs *buf) 1172 * 1173 * Get filesystem statistics. 1174 * 1175 * MPSAFE 1176 */ 1177 int 1178 sys_fstatfs(struct fstatfs_args *uap) 1179 { 1180 struct statfs buf; 1181 int error; 1182 1183 error = kern_fstatfs(uap->fd, &buf); 1184 1185 if (error == 0) 1186 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1187 return (error); 1188 } 1189 1190 int 1191 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1192 { 1193 struct mount *mp; 1194 struct statvfs *sp; 1195 int error; 1196 1197 if ((error = nlookup(nd)) != 0) 1198 return (error); 1199 mp = nd->nl_nch.mount; 1200 sp = &mp->mnt_vstat; 1201 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1202 return (error); 1203 1204 sp->f_flag = 0; 1205 if (mp->mnt_flag & MNT_RDONLY) 1206 sp->f_flag |= ST_RDONLY; 1207 if (mp->mnt_flag & MNT_NOSUID) 1208 sp->f_flag |= ST_NOSUID; 1209 bcopy(sp, buf, sizeof(*buf)); 1210 return (0); 1211 } 1212 1213 /* 1214 * statfs_args(char *path, struct statfs *buf) 1215 * 1216 * Get filesystem statistics. 1217 * 1218 * MPALMOSTSAFE 1219 */ 1220 int 1221 sys_statvfs(struct statvfs_args *uap) 1222 { 1223 struct nlookupdata nd; 1224 struct statvfs buf; 1225 int error; 1226 1227 get_mplock(); 1228 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1229 if (error == 0) 1230 error = kern_statvfs(&nd, &buf); 1231 nlookup_done(&nd); 1232 if (error == 0) 1233 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1234 rel_mplock(); 1235 return (error); 1236 } 1237 1238 int 1239 kern_fstatvfs(int fd, struct statvfs *buf) 1240 { 1241 struct thread *td = curthread; 1242 struct proc *p = td->td_proc; 1243 struct file *fp; 1244 struct mount *mp; 1245 struct statvfs *sp; 1246 int error; 1247 1248 KKASSERT(p); 1249 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1250 return (error); 1251 mp = ((struct vnode *)fp->f_data)->v_mount; 1252 if (mp == NULL) { 1253 error = EBADF; 1254 goto done; 1255 } 1256 if (fp->f_cred == NULL) { 1257 error = EINVAL; 1258 goto done; 1259 } 1260 sp = &mp->mnt_vstat; 1261 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1262 goto done; 1263 1264 sp->f_flag = 0; 1265 if (mp->mnt_flag & MNT_RDONLY) 1266 sp->f_flag |= ST_RDONLY; 1267 if (mp->mnt_flag & MNT_NOSUID) 1268 sp->f_flag |= ST_NOSUID; 1269 1270 bcopy(sp, buf, sizeof(*buf)); 1271 error = 0; 1272 done: 1273 fdrop(fp); 1274 return (error); 1275 } 1276 1277 /* 1278 * fstatfs_args(int fd, struct statfs *buf) 1279 * 1280 * Get filesystem statistics. 1281 * 1282 * MPALMOSTSAFE 1283 */ 1284 int 1285 sys_fstatvfs(struct fstatvfs_args *uap) 1286 { 1287 struct statvfs buf; 1288 int error; 1289 1290 get_mplock(); 1291 error = kern_fstatvfs(uap->fd, &buf); 1292 rel_mplock(); 1293 1294 if (error == 0) 1295 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1296 return (error); 1297 } 1298 1299 /* 1300 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1301 * 1302 * Get statistics on all filesystems. 1303 */ 1304 1305 struct getfsstat_info { 1306 struct statfs *sfsp; 1307 long count; 1308 long maxcount; 1309 int error; 1310 int flags; 1311 struct thread *td; 1312 }; 1313 1314 static int getfsstat_callback(struct mount *, void *); 1315 1316 /* 1317 * MPALMOSTSAFE 1318 */ 1319 int 1320 sys_getfsstat(struct getfsstat_args *uap) 1321 { 1322 struct thread *td = curthread; 1323 struct getfsstat_info info; 1324 1325 bzero(&info, sizeof(info)); 1326 1327 info.maxcount = uap->bufsize / sizeof(struct statfs); 1328 info.sfsp = uap->buf; 1329 info.count = 0; 1330 info.flags = uap->flags; 1331 info.td = td; 1332 1333 get_mplock(); 1334 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1335 rel_mplock(); 1336 if (info.sfsp && info.count > info.maxcount) 1337 uap->sysmsg_result = info.maxcount; 1338 else 1339 uap->sysmsg_result = info.count; 1340 return (info.error); 1341 } 1342 1343 static int 1344 getfsstat_callback(struct mount *mp, void *data) 1345 { 1346 struct getfsstat_info *info = data; 1347 struct statfs *sp; 1348 char *freepath; 1349 char *fullpath; 1350 int error; 1351 1352 if (info->sfsp && info->count < info->maxcount) { 1353 if (info->td->td_proc && 1354 !chroot_visible_mnt(mp, info->td->td_proc)) { 1355 return(0); 1356 } 1357 sp = &mp->mnt_stat; 1358 1359 /* 1360 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1361 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1362 * overrides MNT_WAIT. 1363 */ 1364 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1365 (info->flags & MNT_WAIT)) && 1366 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1367 return(0); 1368 } 1369 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1370 1371 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1372 if (error) { 1373 info->error = error; 1374 return(-1); 1375 } 1376 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1377 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1378 kfree(freepath, M_TEMP); 1379 1380 error = copyout(sp, info->sfsp, sizeof(*sp)); 1381 if (error) { 1382 info->error = error; 1383 return (-1); 1384 } 1385 ++info->sfsp; 1386 } 1387 info->count++; 1388 return(0); 1389 } 1390 1391 /* 1392 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1393 long bufsize, int flags) 1394 * 1395 * Get statistics on all filesystems. 1396 */ 1397 1398 struct getvfsstat_info { 1399 struct statfs *sfsp; 1400 struct statvfs *vsfsp; 1401 long count; 1402 long maxcount; 1403 int error; 1404 int flags; 1405 struct thread *td; 1406 }; 1407 1408 static int getvfsstat_callback(struct mount *, void *); 1409 1410 /* 1411 * MPALMOSTSAFE 1412 */ 1413 int 1414 sys_getvfsstat(struct getvfsstat_args *uap) 1415 { 1416 struct thread *td = curthread; 1417 struct getvfsstat_info info; 1418 1419 bzero(&info, sizeof(info)); 1420 1421 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1422 info.sfsp = uap->buf; 1423 info.vsfsp = uap->vbuf; 1424 info.count = 0; 1425 info.flags = uap->flags; 1426 info.td = td; 1427 1428 get_mplock(); 1429 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1430 if (info.vsfsp && info.count > info.maxcount) 1431 uap->sysmsg_result = info.maxcount; 1432 else 1433 uap->sysmsg_result = info.count; 1434 rel_mplock(); 1435 return (info.error); 1436 } 1437 1438 static int 1439 getvfsstat_callback(struct mount *mp, void *data) 1440 { 1441 struct getvfsstat_info *info = data; 1442 struct statfs *sp; 1443 struct statvfs *vsp; 1444 char *freepath; 1445 char *fullpath; 1446 int error; 1447 1448 if (info->vsfsp && info->count < info->maxcount) { 1449 if (info->td->td_proc && 1450 !chroot_visible_mnt(mp, info->td->td_proc)) { 1451 return(0); 1452 } 1453 sp = &mp->mnt_stat; 1454 vsp = &mp->mnt_vstat; 1455 1456 /* 1457 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1458 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1459 * overrides MNT_WAIT. 1460 */ 1461 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1462 (info->flags & MNT_WAIT)) && 1463 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1464 return(0); 1465 } 1466 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1467 1468 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1469 (info->flags & MNT_WAIT)) && 1470 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1471 return(0); 1472 } 1473 vsp->f_flag = 0; 1474 if (mp->mnt_flag & MNT_RDONLY) 1475 vsp->f_flag |= ST_RDONLY; 1476 if (mp->mnt_flag & MNT_NOSUID) 1477 vsp->f_flag |= ST_NOSUID; 1478 1479 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1480 if (error) { 1481 info->error = error; 1482 return(-1); 1483 } 1484 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1485 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1486 kfree(freepath, M_TEMP); 1487 1488 error = copyout(sp, info->sfsp, sizeof(*sp)); 1489 if (error == 0) 1490 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1491 if (error) { 1492 info->error = error; 1493 return (-1); 1494 } 1495 ++info->sfsp; 1496 ++info->vsfsp; 1497 } 1498 info->count++; 1499 return(0); 1500 } 1501 1502 1503 /* 1504 * fchdir_args(int fd) 1505 * 1506 * Change current working directory to a given file descriptor. 1507 * 1508 * MPALMOSTSAFE 1509 */ 1510 int 1511 sys_fchdir(struct fchdir_args *uap) 1512 { 1513 struct thread *td = curthread; 1514 struct proc *p = td->td_proc; 1515 struct filedesc *fdp = p->p_fd; 1516 struct vnode *vp, *ovp; 1517 struct mount *mp; 1518 struct file *fp; 1519 struct nchandle nch, onch, tnch; 1520 int error; 1521 1522 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1523 return (error); 1524 get_mplock(); 1525 vp = (struct vnode *)fp->f_data; 1526 vref(vp); 1527 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1528 if (fp->f_nchandle.ncp == NULL) 1529 error = ENOTDIR; 1530 else 1531 error = checkvp_chdir(vp, td); 1532 if (error) { 1533 vput(vp); 1534 goto done; 1535 } 1536 cache_copy(&fp->f_nchandle, &nch); 1537 1538 /* 1539 * If the ncp has become a mount point, traverse through 1540 * the mount point. 1541 */ 1542 1543 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1544 (mp = cache_findmount(&nch)) != NULL 1545 ) { 1546 error = nlookup_mp(mp, &tnch); 1547 if (error == 0) { 1548 cache_unlock(&tnch); /* leave ref intact */ 1549 vput(vp); 1550 vp = tnch.ncp->nc_vp; 1551 error = vget(vp, LK_SHARED); 1552 KKASSERT(error == 0); 1553 cache_drop(&nch); 1554 nch = tnch; 1555 } 1556 } 1557 if (error == 0) { 1558 ovp = fdp->fd_cdir; 1559 onch = fdp->fd_ncdir; 1560 vn_unlock(vp); /* leave ref intact */ 1561 fdp->fd_cdir = vp; 1562 fdp->fd_ncdir = nch; 1563 cache_drop(&onch); 1564 vrele(ovp); 1565 } else { 1566 cache_drop(&nch); 1567 vput(vp); 1568 } 1569 fdrop(fp); 1570 done: 1571 rel_mplock(); 1572 return (error); 1573 } 1574 1575 int 1576 kern_chdir(struct nlookupdata *nd) 1577 { 1578 struct thread *td = curthread; 1579 struct proc *p = td->td_proc; 1580 struct filedesc *fdp = p->p_fd; 1581 struct vnode *vp, *ovp; 1582 struct nchandle onch; 1583 int error; 1584 1585 if ((error = nlookup(nd)) != 0) 1586 return (error); 1587 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1588 return (ENOENT); 1589 if ((error = vget(vp, LK_SHARED)) != 0) 1590 return (error); 1591 1592 error = checkvp_chdir(vp, td); 1593 vn_unlock(vp); 1594 if (error == 0) { 1595 ovp = fdp->fd_cdir; 1596 onch = fdp->fd_ncdir; 1597 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1598 fdp->fd_ncdir = nd->nl_nch; 1599 fdp->fd_cdir = vp; 1600 cache_drop(&onch); 1601 vrele(ovp); 1602 cache_zero(&nd->nl_nch); 1603 } else { 1604 vrele(vp); 1605 } 1606 return (error); 1607 } 1608 1609 /* 1610 * chdir_args(char *path) 1611 * 1612 * Change current working directory (``.''). 1613 * 1614 * MPALMOSTSAFE 1615 */ 1616 int 1617 sys_chdir(struct chdir_args *uap) 1618 { 1619 struct nlookupdata nd; 1620 int error; 1621 1622 get_mplock(); 1623 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1624 if (error == 0) 1625 error = kern_chdir(&nd); 1626 nlookup_done(&nd); 1627 rel_mplock(); 1628 return (error); 1629 } 1630 1631 /* 1632 * Helper function for raised chroot(2) security function: Refuse if 1633 * any filedescriptors are open directories. 1634 */ 1635 static int 1636 chroot_refuse_vdir_fds(struct filedesc *fdp) 1637 { 1638 struct vnode *vp; 1639 struct file *fp; 1640 int error; 1641 int fd; 1642 1643 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1644 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1645 continue; 1646 vp = (struct vnode *)fp->f_data; 1647 if (vp->v_type != VDIR) { 1648 fdrop(fp); 1649 continue; 1650 } 1651 fdrop(fp); 1652 return(EPERM); 1653 } 1654 return (0); 1655 } 1656 1657 /* 1658 * This sysctl determines if we will allow a process to chroot(2) if it 1659 * has a directory open: 1660 * 0: disallowed for all processes. 1661 * 1: allowed for processes that were not already chroot(2)'ed. 1662 * 2: allowed for all processes. 1663 */ 1664 1665 static int chroot_allow_open_directories = 1; 1666 1667 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1668 &chroot_allow_open_directories, 0, ""); 1669 1670 /* 1671 * chroot to the specified namecache entry. We obtain the vp from the 1672 * namecache data. The passed ncp must be locked and referenced and will 1673 * remain locked and referenced on return. 1674 */ 1675 int 1676 kern_chroot(struct nchandle *nch) 1677 { 1678 struct thread *td = curthread; 1679 struct proc *p = td->td_proc; 1680 struct filedesc *fdp = p->p_fd; 1681 struct vnode *vp; 1682 int error; 1683 1684 /* 1685 * Only privileged user can chroot 1686 */ 1687 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1688 if (error) 1689 return (error); 1690 1691 /* 1692 * Disallow open directory descriptors (fchdir() breakouts). 1693 */ 1694 if (chroot_allow_open_directories == 0 || 1695 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1696 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1697 return (error); 1698 } 1699 if ((vp = nch->ncp->nc_vp) == NULL) 1700 return (ENOENT); 1701 1702 if ((error = vget(vp, LK_SHARED)) != 0) 1703 return (error); 1704 1705 /* 1706 * Check the validity of vp as a directory to change to and 1707 * associate it with rdir/jdir. 1708 */ 1709 error = checkvp_chdir(vp, td); 1710 vn_unlock(vp); /* leave reference intact */ 1711 if (error == 0) { 1712 vrele(fdp->fd_rdir); 1713 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1714 cache_drop(&fdp->fd_nrdir); 1715 cache_copy(nch, &fdp->fd_nrdir); 1716 if (fdp->fd_jdir == NULL) { 1717 fdp->fd_jdir = vp; 1718 vref(fdp->fd_jdir); 1719 cache_copy(nch, &fdp->fd_njdir); 1720 } 1721 } else { 1722 vrele(vp); 1723 } 1724 return (error); 1725 } 1726 1727 /* 1728 * chroot_args(char *path) 1729 * 1730 * Change notion of root (``/'') directory. 1731 * 1732 * MPALMOSTSAFE 1733 */ 1734 int 1735 sys_chroot(struct chroot_args *uap) 1736 { 1737 struct thread *td __debugvar = curthread; 1738 struct nlookupdata nd; 1739 int error; 1740 1741 KKASSERT(td->td_proc); 1742 get_mplock(); 1743 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1744 if (error == 0) { 1745 nd.nl_flags |= NLC_EXEC; 1746 error = nlookup(&nd); 1747 if (error == 0) 1748 error = kern_chroot(&nd.nl_nch); 1749 } 1750 nlookup_done(&nd); 1751 rel_mplock(); 1752 return(error); 1753 } 1754 1755 /* 1756 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1757 * determine whether it is legal to chdir to the vnode. The vnode's state 1758 * is not changed by this call. 1759 */ 1760 int 1761 checkvp_chdir(struct vnode *vp, struct thread *td) 1762 { 1763 int error; 1764 1765 if (vp->v_type != VDIR) 1766 error = ENOTDIR; 1767 else 1768 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1769 return (error); 1770 } 1771 1772 /* 1773 * MPSAFE 1774 */ 1775 int 1776 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1777 { 1778 struct thread *td = curthread; 1779 struct proc *p = td->td_proc; 1780 struct lwp *lp = td->td_lwp; 1781 struct filedesc *fdp = p->p_fd; 1782 int cmode, flags; 1783 struct file *nfp; 1784 struct file *fp; 1785 struct vnode *vp; 1786 int type, indx, error; 1787 struct flock lf; 1788 1789 if ((oflags & O_ACCMODE) == O_ACCMODE) 1790 return (EINVAL); 1791 flags = FFLAGS(oflags); 1792 error = falloc(lp, &nfp, NULL); 1793 if (error) 1794 return (error); 1795 fp = nfp; 1796 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1797 1798 /* 1799 * XXX p_dupfd is a real mess. It allows a device to return a 1800 * file descriptor to be duplicated rather then doing the open 1801 * itself. 1802 */ 1803 lp->lwp_dupfd = -1; 1804 1805 /* 1806 * Call vn_open() to do the lookup and assign the vnode to the 1807 * file pointer. vn_open() does not change the ref count on fp 1808 * and the vnode, on success, will be inherited by the file pointer 1809 * and unlocked. 1810 */ 1811 nd->nl_flags |= NLC_LOCKVP; 1812 error = vn_open(nd, fp, flags, cmode); 1813 nlookup_done(nd); 1814 if (error) { 1815 /* 1816 * handle special fdopen() case. bleh. dupfdopen() is 1817 * responsible for dropping the old contents of ofiles[indx] 1818 * if it succeeds. 1819 * 1820 * Note that fsetfd() will add a ref to fp which represents 1821 * the fd_files[] assignment. We must still drop our 1822 * reference. 1823 */ 1824 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1825 if (fdalloc(p, 0, &indx) == 0) { 1826 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1827 if (error == 0) { 1828 *res = indx; 1829 fdrop(fp); /* our ref */ 1830 return (0); 1831 } 1832 fsetfd(fdp, NULL, indx); 1833 } 1834 } 1835 fdrop(fp); /* our ref */ 1836 if (error == ERESTART) 1837 error = EINTR; 1838 return (error); 1839 } 1840 1841 /* 1842 * ref the vnode for ourselves so it can't be ripped out from under 1843 * is. XXX need an ND flag to request that the vnode be returned 1844 * anyway. 1845 * 1846 * Reserve a file descriptor but do not assign it until the open 1847 * succeeds. 1848 */ 1849 vp = (struct vnode *)fp->f_data; 1850 vref(vp); 1851 if ((error = fdalloc(p, 0, &indx)) != 0) { 1852 fdrop(fp); 1853 vrele(vp); 1854 return (error); 1855 } 1856 1857 /* 1858 * If no error occurs the vp will have been assigned to the file 1859 * pointer. 1860 */ 1861 lp->lwp_dupfd = 0; 1862 1863 if (flags & (O_EXLOCK | O_SHLOCK)) { 1864 lf.l_whence = SEEK_SET; 1865 lf.l_start = 0; 1866 lf.l_len = 0; 1867 if (flags & O_EXLOCK) 1868 lf.l_type = F_WRLCK; 1869 else 1870 lf.l_type = F_RDLCK; 1871 if (flags & FNONBLOCK) 1872 type = 0; 1873 else 1874 type = F_WAIT; 1875 1876 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1877 /* 1878 * lock request failed. Clean up the reserved 1879 * descriptor. 1880 */ 1881 vrele(vp); 1882 fsetfd(fdp, NULL, indx); 1883 fdrop(fp); 1884 return (error); 1885 } 1886 fp->f_flag |= FHASLOCK; 1887 } 1888 #if 0 1889 /* 1890 * Assert that all regular file vnodes were created with a object. 1891 */ 1892 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1893 ("open: regular file has no backing object after vn_open")); 1894 #endif 1895 1896 vrele(vp); 1897 1898 /* 1899 * release our private reference, leaving the one associated with the 1900 * descriptor table intact. 1901 */ 1902 fsetfd(fdp, fp, indx); 1903 fdrop(fp); 1904 *res = indx; 1905 return (0); 1906 } 1907 1908 /* 1909 * open_args(char *path, int flags, int mode) 1910 * 1911 * Check permissions, allocate an open file structure, 1912 * and call the device open routine if any. 1913 * 1914 * MPALMOSTSAFE 1915 */ 1916 int 1917 sys_open(struct open_args *uap) 1918 { 1919 CACHE_MPLOCK_DECLARE; 1920 struct nlookupdata nd; 1921 int error; 1922 1923 CACHE_GETMPLOCK1(); 1924 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1925 if (error == 0) { 1926 error = kern_open(&nd, uap->flags, 1927 uap->mode, &uap->sysmsg_result); 1928 } 1929 nlookup_done(&nd); 1930 CACHE_RELMPLOCK(); 1931 return (error); 1932 } 1933 1934 /* 1935 * openat_args(int fd, char *path, int flags, int mode) 1936 * 1937 * MPALMOSTSAFE 1938 */ 1939 int 1940 sys_openat(struct openat_args *uap) 1941 { 1942 CACHE_MPLOCK_DECLARE; 1943 struct nlookupdata nd; 1944 int error; 1945 struct file *fp; 1946 1947 CACHE_GETMPLOCK1(); 1948 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 1949 if (error == 0) { 1950 error = kern_open(&nd, uap->flags, uap->mode, 1951 &uap->sysmsg_result); 1952 } 1953 nlookup_done_at(&nd, fp); 1954 CACHE_RELMPLOCK(); 1955 return (error); 1956 } 1957 1958 int 1959 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 1960 { 1961 struct thread *td = curthread; 1962 struct proc *p = td->td_proc; 1963 struct vnode *vp; 1964 struct vattr vattr; 1965 int error; 1966 int whiteout = 0; 1967 1968 KKASSERT(p); 1969 1970 VATTR_NULL(&vattr); 1971 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1972 vattr.va_rmajor = rmajor; 1973 vattr.va_rminor = rminor; 1974 1975 switch (mode & S_IFMT) { 1976 case S_IFMT: /* used by badsect to flag bad sectors */ 1977 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 1978 vattr.va_type = VBAD; 1979 break; 1980 case S_IFCHR: 1981 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1982 vattr.va_type = VCHR; 1983 break; 1984 case S_IFBLK: 1985 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1986 vattr.va_type = VBLK; 1987 break; 1988 case S_IFWHT: 1989 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 1990 whiteout = 1; 1991 break; 1992 case S_IFDIR: /* special directories support for HAMMER */ 1993 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 1994 vattr.va_type = VDIR; 1995 break; 1996 default: 1997 error = EINVAL; 1998 break; 1999 } 2000 2001 if (error) 2002 return (error); 2003 2004 bwillinode(1); 2005 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2006 if ((error = nlookup(nd)) != 0) 2007 return (error); 2008 if (nd->nl_nch.ncp->nc_vp) 2009 return (EEXIST); 2010 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2011 return (error); 2012 2013 if (whiteout) { 2014 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2015 nd->nl_cred, NAMEI_CREATE); 2016 } else { 2017 vp = NULL; 2018 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2019 &vp, nd->nl_cred, &vattr); 2020 if (error == 0) 2021 vput(vp); 2022 } 2023 return (error); 2024 } 2025 2026 /* 2027 * mknod_args(char *path, int mode, int dev) 2028 * 2029 * Create a special file. 2030 * 2031 * MPALMOSTSAFE 2032 */ 2033 int 2034 sys_mknod(struct mknod_args *uap) 2035 { 2036 struct nlookupdata nd; 2037 int error; 2038 2039 get_mplock(); 2040 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2041 if (error == 0) { 2042 error = kern_mknod(&nd, uap->mode, 2043 umajor(uap->dev), uminor(uap->dev)); 2044 } 2045 nlookup_done(&nd); 2046 rel_mplock(); 2047 return (error); 2048 } 2049 2050 int 2051 kern_mkfifo(struct nlookupdata *nd, int mode) 2052 { 2053 struct thread *td = curthread; 2054 struct proc *p = td->td_proc; 2055 struct vattr vattr; 2056 struct vnode *vp; 2057 int error; 2058 2059 bwillinode(1); 2060 2061 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2062 if ((error = nlookup(nd)) != 0) 2063 return (error); 2064 if (nd->nl_nch.ncp->nc_vp) 2065 return (EEXIST); 2066 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2067 return (error); 2068 2069 VATTR_NULL(&vattr); 2070 vattr.va_type = VFIFO; 2071 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2072 vp = NULL; 2073 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2074 if (error == 0) 2075 vput(vp); 2076 return (error); 2077 } 2078 2079 /* 2080 * mkfifo_args(char *path, int mode) 2081 * 2082 * Create a named pipe. 2083 * 2084 * MPALMOSTSAFE 2085 */ 2086 int 2087 sys_mkfifo(struct mkfifo_args *uap) 2088 { 2089 struct nlookupdata nd; 2090 int error; 2091 2092 get_mplock(); 2093 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2094 if (error == 0) 2095 error = kern_mkfifo(&nd, uap->mode); 2096 nlookup_done(&nd); 2097 rel_mplock(); 2098 return (error); 2099 } 2100 2101 static int hardlink_check_uid = 0; 2102 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2103 &hardlink_check_uid, 0, 2104 "Unprivileged processes cannot create hard links to files owned by other " 2105 "users"); 2106 static int hardlink_check_gid = 0; 2107 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2108 &hardlink_check_gid, 0, 2109 "Unprivileged processes cannot create hard links to files owned by other " 2110 "groups"); 2111 2112 static int 2113 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2114 { 2115 struct vattr va; 2116 int error; 2117 2118 /* 2119 * Shortcut if disabled 2120 */ 2121 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2122 return (0); 2123 2124 /* 2125 * Privileged user can always hardlink 2126 */ 2127 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2128 return (0); 2129 2130 /* 2131 * Otherwise only if the originating file is owned by the 2132 * same user or group. Note that any group is allowed if 2133 * the file is owned by the caller. 2134 */ 2135 error = VOP_GETATTR(vp, &va); 2136 if (error != 0) 2137 return (error); 2138 2139 if (hardlink_check_uid) { 2140 if (cred->cr_uid != va.va_uid) 2141 return (EPERM); 2142 } 2143 2144 if (hardlink_check_gid) { 2145 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2146 return (EPERM); 2147 } 2148 2149 return (0); 2150 } 2151 2152 int 2153 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2154 { 2155 struct thread *td = curthread; 2156 struct vnode *vp; 2157 int error; 2158 2159 /* 2160 * Lookup the source and obtained a locked vnode. 2161 * 2162 * You may only hardlink a file which you have write permission 2163 * on or which you own. 2164 * 2165 * XXX relookup on vget failure / race ? 2166 */ 2167 bwillinode(1); 2168 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2169 if ((error = nlookup(nd)) != 0) 2170 return (error); 2171 vp = nd->nl_nch.ncp->nc_vp; 2172 KKASSERT(vp != NULL); 2173 if (vp->v_type == VDIR) 2174 return (EPERM); /* POSIX */ 2175 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2176 return (error); 2177 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2178 return (error); 2179 2180 /* 2181 * Unlock the source so we can lookup the target without deadlocking 2182 * (XXX vp is locked already, possible other deadlock?). The target 2183 * must not exist. 2184 */ 2185 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2186 nd->nl_flags &= ~NLC_NCPISLOCKED; 2187 cache_unlock(&nd->nl_nch); 2188 vn_unlock(vp); 2189 2190 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2191 if ((error = nlookup(linknd)) != 0) { 2192 vrele(vp); 2193 return (error); 2194 } 2195 if (linknd->nl_nch.ncp->nc_vp) { 2196 vrele(vp); 2197 return (EEXIST); 2198 } 2199 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 2200 vrele(vp); 2201 return (error); 2202 } 2203 2204 /* 2205 * Finally run the new API VOP. 2206 */ 2207 error = can_hardlink(vp, td, td->td_ucred); 2208 if (error == 0) { 2209 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2210 vp, linknd->nl_cred); 2211 } 2212 vput(vp); 2213 return (error); 2214 } 2215 2216 /* 2217 * link_args(char *path, char *link) 2218 * 2219 * Make a hard file link. 2220 * 2221 * MPALMOSTSAFE 2222 */ 2223 int 2224 sys_link(struct link_args *uap) 2225 { 2226 struct nlookupdata nd, linknd; 2227 int error; 2228 2229 get_mplock(); 2230 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2231 if (error == 0) { 2232 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2233 if (error == 0) 2234 error = kern_link(&nd, &linknd); 2235 nlookup_done(&linknd); 2236 } 2237 nlookup_done(&nd); 2238 rel_mplock(); 2239 return (error); 2240 } 2241 2242 int 2243 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2244 { 2245 struct vattr vattr; 2246 struct vnode *vp; 2247 struct vnode *dvp; 2248 int error; 2249 2250 bwillinode(1); 2251 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2252 if ((error = nlookup(nd)) != 0) 2253 return (error); 2254 if (nd->nl_nch.ncp->nc_vp) 2255 return (EEXIST); 2256 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2257 return (error); 2258 dvp = nd->nl_dvp; 2259 VATTR_NULL(&vattr); 2260 vattr.va_mode = mode; 2261 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2262 if (error == 0) 2263 vput(vp); 2264 return (error); 2265 } 2266 2267 /* 2268 * symlink(char *path, char *link) 2269 * 2270 * Make a symbolic link. 2271 * 2272 * MPALMOSTSAFE 2273 */ 2274 int 2275 sys_symlink(struct symlink_args *uap) 2276 { 2277 struct thread *td = curthread; 2278 struct nlookupdata nd; 2279 char *path; 2280 int error; 2281 int mode; 2282 2283 path = objcache_get(namei_oc, M_WAITOK); 2284 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2285 if (error == 0) { 2286 get_mplock(); 2287 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2288 if (error == 0) { 2289 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2290 error = kern_symlink(&nd, path, mode); 2291 } 2292 nlookup_done(&nd); 2293 rel_mplock(); 2294 } 2295 objcache_put(namei_oc, path); 2296 return (error); 2297 } 2298 2299 /* 2300 * undelete_args(char *path) 2301 * 2302 * Delete a whiteout from the filesystem. 2303 * 2304 * MPALMOSTSAFE 2305 */ 2306 int 2307 sys_undelete(struct undelete_args *uap) 2308 { 2309 struct nlookupdata nd; 2310 int error; 2311 2312 get_mplock(); 2313 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2314 bwillinode(1); 2315 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2316 if (error == 0) 2317 error = nlookup(&nd); 2318 if (error == 0) 2319 error = ncp_writechk(&nd.nl_nch); 2320 if (error == 0) { 2321 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2322 NAMEI_DELETE); 2323 } 2324 nlookup_done(&nd); 2325 rel_mplock(); 2326 return (error); 2327 } 2328 2329 int 2330 kern_unlink(struct nlookupdata *nd) 2331 { 2332 int error; 2333 2334 bwillinode(1); 2335 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2336 if ((error = nlookup(nd)) != 0) 2337 return (error); 2338 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2339 return (error); 2340 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2341 return (error); 2342 } 2343 2344 /* 2345 * unlink_args(char *path) 2346 * 2347 * Delete a name from the filesystem. 2348 * 2349 * MPALMOSTSAFE 2350 */ 2351 int 2352 sys_unlink(struct unlink_args *uap) 2353 { 2354 struct nlookupdata nd; 2355 int error; 2356 2357 get_mplock(); 2358 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2359 if (error == 0) 2360 error = kern_unlink(&nd); 2361 nlookup_done(&nd); 2362 rel_mplock(); 2363 return (error); 2364 } 2365 2366 2367 /* 2368 * unlinkat_args(int fd, char *path, int flags) 2369 * 2370 * Delete the file or directory entry pointed to by fd/path. 2371 * 2372 * MPALMOSTSAFE 2373 */ 2374 int 2375 sys_unlinkat(struct unlinkat_args *uap) 2376 { 2377 struct nlookupdata nd; 2378 struct file *fp; 2379 int error; 2380 2381 if (uap->flags & ~AT_REMOVEDIR) 2382 return (EINVAL); 2383 2384 get_mplock(); 2385 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2386 if (error == 0) { 2387 if (uap->flags & AT_REMOVEDIR) 2388 error = kern_rmdir(&nd); 2389 else 2390 error = kern_unlink(&nd); 2391 } 2392 nlookup_done_at(&nd, fp); 2393 rel_mplock(); 2394 return (error); 2395 } 2396 2397 /* 2398 * MPALMOSTSAFE 2399 */ 2400 int 2401 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2402 { 2403 struct thread *td = curthread; 2404 struct proc *p = td->td_proc; 2405 struct file *fp; 2406 struct vnode *vp; 2407 struct vattr vattr; 2408 off_t new_offset; 2409 int error; 2410 2411 fp = holdfp(p->p_fd, fd, -1); 2412 if (fp == NULL) 2413 return (EBADF); 2414 if (fp->f_type != DTYPE_VNODE) { 2415 error = ESPIPE; 2416 goto done; 2417 } 2418 vp = (struct vnode *)fp->f_data; 2419 2420 switch (whence) { 2421 case L_INCR: 2422 spin_lock_wr(&fp->f_spin); 2423 new_offset = fp->f_offset + offset; 2424 error = 0; 2425 break; 2426 case L_XTND: 2427 get_mplock(); 2428 error = VOP_GETATTR(vp, &vattr); 2429 rel_mplock(); 2430 spin_lock_wr(&fp->f_spin); 2431 new_offset = offset + vattr.va_size; 2432 break; 2433 case L_SET: 2434 new_offset = offset; 2435 error = 0; 2436 spin_lock_wr(&fp->f_spin); 2437 break; 2438 default: 2439 new_offset = 0; 2440 error = EINVAL; 2441 spin_lock_wr(&fp->f_spin); 2442 break; 2443 } 2444 2445 /* 2446 * Validate the seek position. Negative offsets are not allowed 2447 * for regular files or directories. 2448 * 2449 * Normally we would also not want to allow negative offsets for 2450 * character and block-special devices. However kvm addresses 2451 * on 64 bit architectures might appear to be negative and must 2452 * be allowed. 2453 */ 2454 if (error == 0) { 2455 if (new_offset < 0 && 2456 (vp->v_type == VREG || vp->v_type == VDIR)) { 2457 error = EINVAL; 2458 } else { 2459 fp->f_offset = new_offset; 2460 } 2461 } 2462 *res = fp->f_offset; 2463 spin_unlock_wr(&fp->f_spin); 2464 done: 2465 fdrop(fp); 2466 return (error); 2467 } 2468 2469 /* 2470 * lseek_args(int fd, int pad, off_t offset, int whence) 2471 * 2472 * Reposition read/write file offset. 2473 * 2474 * MPSAFE 2475 */ 2476 int 2477 sys_lseek(struct lseek_args *uap) 2478 { 2479 int error; 2480 2481 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2482 &uap->sysmsg_offset); 2483 2484 return (error); 2485 } 2486 2487 /* 2488 * Check if current process can access given file. amode is a bitmask of *_OK 2489 * access bits. flags is a bitmask of AT_* flags. 2490 */ 2491 int 2492 kern_access(struct nlookupdata *nd, int amode, int flags) 2493 { 2494 struct vnode *vp; 2495 int error, mode; 2496 2497 if (flags & ~AT_EACCESS) 2498 return (EINVAL); 2499 if ((error = nlookup(nd)) != 0) 2500 return (error); 2501 retry: 2502 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2503 if (error) 2504 return (error); 2505 2506 /* Flags == 0 means only check for existence. */ 2507 if (amode) { 2508 mode = 0; 2509 if (amode & R_OK) 2510 mode |= VREAD; 2511 if (amode & W_OK) 2512 mode |= VWRITE; 2513 if (amode & X_OK) 2514 mode |= VEXEC; 2515 if ((mode & VWRITE) == 0 || 2516 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2517 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2518 2519 /* 2520 * If the file handle is stale we have to re-resolve the 2521 * entry. This is a hack at the moment. 2522 */ 2523 if (error == ESTALE) { 2524 vput(vp); 2525 cache_setunresolved(&nd->nl_nch); 2526 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2527 if (error == 0) { 2528 vp = NULL; 2529 goto retry; 2530 } 2531 return(error); 2532 } 2533 } 2534 vput(vp); 2535 return (error); 2536 } 2537 2538 /* 2539 * access_args(char *path, int flags) 2540 * 2541 * Check access permissions. 2542 * 2543 * MPALMOSTSAFE 2544 */ 2545 int 2546 sys_access(struct access_args *uap) 2547 { 2548 struct nlookupdata nd; 2549 int error; 2550 2551 get_mplock(); 2552 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2553 if (error == 0) 2554 error = kern_access(&nd, uap->flags, 0); 2555 nlookup_done(&nd); 2556 rel_mplock(); 2557 return (error); 2558 } 2559 2560 2561 /* 2562 * faccessat_args(int fd, char *path, int amode, int flags) 2563 * 2564 * Check access permissions. 2565 * 2566 * MPALMOSTSAFE 2567 */ 2568 int 2569 sys_faccessat(struct faccessat_args *uap) 2570 { 2571 struct nlookupdata nd; 2572 struct file *fp; 2573 int error; 2574 2575 get_mplock(); 2576 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2577 NLC_FOLLOW); 2578 if (error == 0) 2579 error = kern_access(&nd, uap->amode, uap->flags); 2580 nlookup_done_at(&nd, fp); 2581 rel_mplock(); 2582 return (error); 2583 } 2584 2585 2586 /* 2587 * MPSAFE 2588 */ 2589 int 2590 kern_stat(struct nlookupdata *nd, struct stat *st) 2591 { 2592 int error; 2593 struct vnode *vp; 2594 thread_t td; 2595 2596 if ((error = nlookup(nd)) != 0) 2597 return (error); 2598 again: 2599 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2600 return (ENOENT); 2601 2602 td = curthread; 2603 if ((error = vget(vp, LK_SHARED)) != 0) 2604 return (error); 2605 error = vn_stat(vp, st, nd->nl_cred); 2606 2607 /* 2608 * If the file handle is stale we have to re-resolve the entry. This 2609 * is a hack at the moment. 2610 */ 2611 if (error == ESTALE) { 2612 vput(vp); 2613 cache_setunresolved(&nd->nl_nch); 2614 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2615 if (error == 0) 2616 goto again; 2617 } else { 2618 vput(vp); 2619 } 2620 return (error); 2621 } 2622 2623 /* 2624 * stat_args(char *path, struct stat *ub) 2625 * 2626 * Get file status; this version follows links. 2627 * 2628 * MPSAFE 2629 */ 2630 int 2631 sys_stat(struct stat_args *uap) 2632 { 2633 CACHE_MPLOCK_DECLARE; 2634 struct nlookupdata nd; 2635 struct stat st; 2636 int error; 2637 2638 CACHE_GETMPLOCK1(); 2639 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2640 if (error == 0) { 2641 error = kern_stat(&nd, &st); 2642 if (error == 0) 2643 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2644 } 2645 nlookup_done(&nd); 2646 CACHE_RELMPLOCK(); 2647 return (error); 2648 } 2649 2650 /* 2651 * lstat_args(char *path, struct stat *ub) 2652 * 2653 * Get file status; this version does not follow links. 2654 * 2655 * MPALMOSTSAFE 2656 */ 2657 int 2658 sys_lstat(struct lstat_args *uap) 2659 { 2660 CACHE_MPLOCK_DECLARE; 2661 struct nlookupdata nd; 2662 struct stat st; 2663 int error; 2664 2665 CACHE_GETMPLOCK1(); 2666 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2667 if (error == 0) { 2668 error = kern_stat(&nd, &st); 2669 if (error == 0) 2670 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2671 } 2672 nlookup_done(&nd); 2673 CACHE_RELMPLOCK(); 2674 return (error); 2675 } 2676 2677 /* 2678 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2679 * 2680 * Get status of file pointed to by fd/path. 2681 * 2682 * MPALMOSTSAFE 2683 */ 2684 int 2685 sys_fstatat(struct fstatat_args *uap) 2686 { 2687 CACHE_MPLOCK_DECLARE; 2688 struct nlookupdata nd; 2689 struct stat st; 2690 int error; 2691 int flags; 2692 struct file *fp; 2693 2694 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2695 return (EINVAL); 2696 2697 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2698 2699 CACHE_GETMPLOCK1(); 2700 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2701 UIO_USERSPACE, flags); 2702 if (error == 0) { 2703 error = kern_stat(&nd, &st); 2704 if (error == 0) 2705 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2706 } 2707 nlookup_done_at(&nd, fp); 2708 CACHE_RELMPLOCK(); 2709 return (error); 2710 } 2711 2712 /* 2713 * pathconf_Args(char *path, int name) 2714 * 2715 * Get configurable pathname variables. 2716 * 2717 * MPALMOSTSAFE 2718 */ 2719 int 2720 sys_pathconf(struct pathconf_args *uap) 2721 { 2722 struct nlookupdata nd; 2723 struct vnode *vp; 2724 int error; 2725 2726 vp = NULL; 2727 get_mplock(); 2728 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2729 if (error == 0) 2730 error = nlookup(&nd); 2731 if (error == 0) 2732 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2733 nlookup_done(&nd); 2734 if (error == 0) { 2735 error = VOP_PATHCONF(vp, uap->name, &uap->sysmsg_reg); 2736 vput(vp); 2737 } 2738 rel_mplock(); 2739 return (error); 2740 } 2741 2742 /* 2743 * XXX: daver 2744 * kern_readlink isn't properly split yet. There is a copyin burried 2745 * in VOP_READLINK(). 2746 */ 2747 int 2748 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2749 { 2750 struct thread *td = curthread; 2751 struct vnode *vp; 2752 struct iovec aiov; 2753 struct uio auio; 2754 int error; 2755 2756 if ((error = nlookup(nd)) != 0) 2757 return (error); 2758 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2759 if (error) 2760 return (error); 2761 if (vp->v_type != VLNK) { 2762 error = EINVAL; 2763 } else { 2764 aiov.iov_base = buf; 2765 aiov.iov_len = count; 2766 auio.uio_iov = &aiov; 2767 auio.uio_iovcnt = 1; 2768 auio.uio_offset = 0; 2769 auio.uio_rw = UIO_READ; 2770 auio.uio_segflg = UIO_USERSPACE; 2771 auio.uio_td = td; 2772 auio.uio_resid = count; 2773 error = VOP_READLINK(vp, &auio, td->td_ucred); 2774 } 2775 vput(vp); 2776 *res = count - auio.uio_resid; 2777 return (error); 2778 } 2779 2780 /* 2781 * readlink_args(char *path, char *buf, int count) 2782 * 2783 * Return target name of a symbolic link. 2784 * 2785 * MPALMOSTSAFE 2786 */ 2787 int 2788 sys_readlink(struct readlink_args *uap) 2789 { 2790 struct nlookupdata nd; 2791 int error; 2792 2793 get_mplock(); 2794 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2795 if (error == 0) { 2796 error = kern_readlink(&nd, uap->buf, uap->count, 2797 &uap->sysmsg_result); 2798 } 2799 nlookup_done(&nd); 2800 rel_mplock(); 2801 return (error); 2802 } 2803 2804 static int 2805 setfflags(struct vnode *vp, int flags) 2806 { 2807 struct thread *td = curthread; 2808 int error; 2809 struct vattr vattr; 2810 2811 /* 2812 * Prevent non-root users from setting flags on devices. When 2813 * a device is reused, users can retain ownership of the device 2814 * if they are allowed to set flags and programs assume that 2815 * chown can't fail when done as root. 2816 */ 2817 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2818 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2819 return (error); 2820 2821 /* 2822 * note: vget is required for any operation that might mod the vnode 2823 * so VINACTIVE is properly cleared. 2824 */ 2825 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2826 VATTR_NULL(&vattr); 2827 vattr.va_flags = flags; 2828 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2829 vput(vp); 2830 } 2831 return (error); 2832 } 2833 2834 /* 2835 * chflags(char *path, int flags) 2836 * 2837 * Change flags of a file given a path name. 2838 * 2839 * MPALMOSTSAFE 2840 */ 2841 int 2842 sys_chflags(struct chflags_args *uap) 2843 { 2844 struct nlookupdata nd; 2845 struct vnode *vp; 2846 int error; 2847 2848 vp = NULL; 2849 get_mplock(); 2850 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2851 if (error == 0) 2852 error = nlookup(&nd); 2853 if (error == 0) 2854 error = ncp_writechk(&nd.nl_nch); 2855 if (error == 0) 2856 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2857 nlookup_done(&nd); 2858 if (error == 0) { 2859 error = setfflags(vp, uap->flags); 2860 vrele(vp); 2861 } 2862 rel_mplock(); 2863 return (error); 2864 } 2865 2866 /* 2867 * lchflags(char *path, int flags) 2868 * 2869 * Change flags of a file given a path name, but don't follow symlinks. 2870 * 2871 * MPALMOSTSAFE 2872 */ 2873 int 2874 sys_lchflags(struct lchflags_args *uap) 2875 { 2876 struct nlookupdata nd; 2877 struct vnode *vp; 2878 int error; 2879 2880 vp = NULL; 2881 get_mplock(); 2882 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2883 if (error == 0) 2884 error = nlookup(&nd); 2885 if (error == 0) 2886 error = ncp_writechk(&nd.nl_nch); 2887 if (error == 0) 2888 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2889 nlookup_done(&nd); 2890 if (error == 0) { 2891 error = setfflags(vp, uap->flags); 2892 vrele(vp); 2893 } 2894 rel_mplock(); 2895 return (error); 2896 } 2897 2898 /* 2899 * fchflags_args(int fd, int flags) 2900 * 2901 * Change flags of a file given a file descriptor. 2902 * 2903 * MPALMOSTSAFE 2904 */ 2905 int 2906 sys_fchflags(struct fchflags_args *uap) 2907 { 2908 struct thread *td = curthread; 2909 struct proc *p = td->td_proc; 2910 struct file *fp; 2911 int error; 2912 2913 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2914 return (error); 2915 get_mplock(); 2916 if (fp->f_nchandle.ncp) 2917 error = ncp_writechk(&fp->f_nchandle); 2918 if (error == 0) 2919 error = setfflags((struct vnode *) fp->f_data, uap->flags); 2920 rel_mplock(); 2921 fdrop(fp); 2922 return (error); 2923 } 2924 2925 static int 2926 setfmode(struct vnode *vp, int mode) 2927 { 2928 struct thread *td = curthread; 2929 int error; 2930 struct vattr vattr; 2931 2932 /* 2933 * note: vget is required for any operation that might mod the vnode 2934 * so VINACTIVE is properly cleared. 2935 */ 2936 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2937 VATTR_NULL(&vattr); 2938 vattr.va_mode = mode & ALLPERMS; 2939 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2940 vput(vp); 2941 } 2942 return error; 2943 } 2944 2945 int 2946 kern_chmod(struct nlookupdata *nd, int mode) 2947 { 2948 struct vnode *vp; 2949 int error; 2950 2951 if ((error = nlookup(nd)) != 0) 2952 return (error); 2953 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2954 return (error); 2955 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2956 error = setfmode(vp, mode); 2957 vrele(vp); 2958 return (error); 2959 } 2960 2961 /* 2962 * chmod_args(char *path, int mode) 2963 * 2964 * Change mode of a file given path name. 2965 * 2966 * MPALMOSTSAFE 2967 */ 2968 int 2969 sys_chmod(struct chmod_args *uap) 2970 { 2971 struct nlookupdata nd; 2972 int error; 2973 2974 get_mplock(); 2975 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2976 if (error == 0) 2977 error = kern_chmod(&nd, uap->mode); 2978 nlookup_done(&nd); 2979 rel_mplock(); 2980 return (error); 2981 } 2982 2983 /* 2984 * lchmod_args(char *path, int mode) 2985 * 2986 * Change mode of a file given path name (don't follow links.) 2987 * 2988 * MPALMOSTSAFE 2989 */ 2990 int 2991 sys_lchmod(struct lchmod_args *uap) 2992 { 2993 struct nlookupdata nd; 2994 int error; 2995 2996 get_mplock(); 2997 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2998 if (error == 0) 2999 error = kern_chmod(&nd, uap->mode); 3000 nlookup_done(&nd); 3001 rel_mplock(); 3002 return (error); 3003 } 3004 3005 /* 3006 * fchmod_args(int fd, int mode) 3007 * 3008 * Change mode of a file given a file descriptor. 3009 * 3010 * MPALMOSTSAFE 3011 */ 3012 int 3013 sys_fchmod(struct fchmod_args *uap) 3014 { 3015 struct thread *td = curthread; 3016 struct proc *p = td->td_proc; 3017 struct file *fp; 3018 int error; 3019 3020 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3021 return (error); 3022 get_mplock(); 3023 if (fp->f_nchandle.ncp) 3024 error = ncp_writechk(&fp->f_nchandle); 3025 if (error == 0) 3026 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3027 rel_mplock(); 3028 fdrop(fp); 3029 return (error); 3030 } 3031 3032 /* 3033 * fchmodat_args(char *path, int mode) 3034 * 3035 * Change mode of a file pointed to by fd/path. 3036 * 3037 * MPALMOSTSAFE 3038 */ 3039 int 3040 sys_fchmodat(struct fchmodat_args *uap) 3041 { 3042 struct nlookupdata nd; 3043 struct file *fp; 3044 int error; 3045 int flags; 3046 3047 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3048 return (EINVAL); 3049 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3050 3051 get_mplock(); 3052 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3053 UIO_USERSPACE, flags); 3054 if (error == 0) 3055 error = kern_chmod(&nd, uap->mode); 3056 nlookup_done_at(&nd, fp); 3057 rel_mplock(); 3058 return (error); 3059 } 3060 3061 static int 3062 setfown(struct vnode *vp, uid_t uid, gid_t gid) 3063 { 3064 struct thread *td = curthread; 3065 int error; 3066 struct vattr vattr; 3067 3068 /* 3069 * note: vget is required for any operation that might mod the vnode 3070 * so VINACTIVE is properly cleared. 3071 */ 3072 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3073 VATTR_NULL(&vattr); 3074 vattr.va_uid = uid; 3075 vattr.va_gid = gid; 3076 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3077 vput(vp); 3078 } 3079 return error; 3080 } 3081 3082 int 3083 kern_chown(struct nlookupdata *nd, int uid, int gid) 3084 { 3085 struct vnode *vp; 3086 int error; 3087 3088 if ((error = nlookup(nd)) != 0) 3089 return (error); 3090 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3091 return (error); 3092 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3093 error = setfown(vp, uid, gid); 3094 vrele(vp); 3095 return (error); 3096 } 3097 3098 /* 3099 * chown(char *path, int uid, int gid) 3100 * 3101 * Set ownership given a path name. 3102 * 3103 * MPALMOSTSAFE 3104 */ 3105 int 3106 sys_chown(struct chown_args *uap) 3107 { 3108 struct nlookupdata nd; 3109 int error; 3110 3111 get_mplock(); 3112 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3113 if (error == 0) 3114 error = kern_chown(&nd, uap->uid, uap->gid); 3115 nlookup_done(&nd); 3116 rel_mplock(); 3117 return (error); 3118 } 3119 3120 /* 3121 * lchown_args(char *path, int uid, int gid) 3122 * 3123 * Set ownership given a path name, do not cross symlinks. 3124 * 3125 * MPALMOSTSAFE 3126 */ 3127 int 3128 sys_lchown(struct lchown_args *uap) 3129 { 3130 struct nlookupdata nd; 3131 int error; 3132 3133 get_mplock(); 3134 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3135 if (error == 0) 3136 error = kern_chown(&nd, uap->uid, uap->gid); 3137 nlookup_done(&nd); 3138 rel_mplock(); 3139 return (error); 3140 } 3141 3142 /* 3143 * fchown_args(int fd, int uid, int gid) 3144 * 3145 * Set ownership given a file descriptor. 3146 * 3147 * MPALMOSTSAFE 3148 */ 3149 int 3150 sys_fchown(struct fchown_args *uap) 3151 { 3152 struct thread *td = curthread; 3153 struct proc *p = td->td_proc; 3154 struct file *fp; 3155 int error; 3156 3157 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3158 return (error); 3159 get_mplock(); 3160 if (fp->f_nchandle.ncp) 3161 error = ncp_writechk(&fp->f_nchandle); 3162 if (error == 0) 3163 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid); 3164 rel_mplock(); 3165 fdrop(fp); 3166 return (error); 3167 } 3168 3169 /* 3170 * fchownat(int fd, char *path, int uid, int gid, int flags) 3171 * 3172 * Set ownership of file pointed to by fd/path. 3173 * 3174 * MPALMOSTSAFE 3175 */ 3176 int 3177 sys_fchownat(struct fchownat_args *uap) 3178 { 3179 struct nlookupdata nd; 3180 struct file *fp; 3181 int error; 3182 int flags; 3183 3184 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3185 return (EINVAL); 3186 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3187 3188 get_mplock(); 3189 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3190 UIO_USERSPACE, flags); 3191 if (error == 0) 3192 error = kern_chown(&nd, uap->uid, uap->gid); 3193 nlookup_done_at(&nd, fp); 3194 rel_mplock(); 3195 return (error); 3196 } 3197 3198 3199 static int 3200 getutimes(const struct timeval *tvp, struct timespec *tsp) 3201 { 3202 struct timeval tv[2]; 3203 3204 if (tvp == NULL) { 3205 microtime(&tv[0]); 3206 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3207 tsp[1] = tsp[0]; 3208 } else { 3209 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3210 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3211 } 3212 return 0; 3213 } 3214 3215 static int 3216 setutimes(struct vnode *vp, struct vattr *vattr, 3217 const struct timespec *ts, int nullflag) 3218 { 3219 struct thread *td = curthread; 3220 int error; 3221 3222 VATTR_NULL(vattr); 3223 vattr->va_atime = ts[0]; 3224 vattr->va_mtime = ts[1]; 3225 if (nullflag) 3226 vattr->va_vaflags |= VA_UTIMES_NULL; 3227 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3228 3229 return error; 3230 } 3231 3232 int 3233 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3234 { 3235 struct timespec ts[2]; 3236 struct vnode *vp; 3237 struct vattr vattr; 3238 int error; 3239 3240 if ((error = getutimes(tptr, ts)) != 0) 3241 return (error); 3242 3243 /* 3244 * NOTE: utimes() succeeds for the owner even if the file 3245 * is not user-writable. 3246 */ 3247 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3248 3249 if ((error = nlookup(nd)) != 0) 3250 return (error); 3251 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3252 return (error); 3253 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3254 return (error); 3255 3256 /* 3257 * note: vget is required for any operation that might mod the vnode 3258 * so VINACTIVE is properly cleared. 3259 */ 3260 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3261 error = vget(vp, LK_EXCLUSIVE); 3262 if (error == 0) { 3263 error = setutimes(vp, &vattr, ts, (tptr == NULL)); 3264 vput(vp); 3265 } 3266 } 3267 vrele(vp); 3268 return (error); 3269 } 3270 3271 /* 3272 * utimes_args(char *path, struct timeval *tptr) 3273 * 3274 * Set the access and modification times of a file. 3275 * 3276 * MPALMOSTSAFE 3277 */ 3278 int 3279 sys_utimes(struct utimes_args *uap) 3280 { 3281 struct timeval tv[2]; 3282 struct nlookupdata nd; 3283 int error; 3284 3285 if (uap->tptr) { 3286 error = copyin(uap->tptr, tv, sizeof(tv)); 3287 if (error) 3288 return (error); 3289 } 3290 get_mplock(); 3291 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3292 if (error == 0) 3293 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3294 nlookup_done(&nd); 3295 rel_mplock(); 3296 return (error); 3297 } 3298 3299 /* 3300 * lutimes_args(char *path, struct timeval *tptr) 3301 * 3302 * Set the access and modification times of a file. 3303 * 3304 * MPALMOSTSAFE 3305 */ 3306 int 3307 sys_lutimes(struct lutimes_args *uap) 3308 { 3309 struct timeval tv[2]; 3310 struct nlookupdata nd; 3311 int error; 3312 3313 if (uap->tptr) { 3314 error = copyin(uap->tptr, tv, sizeof(tv)); 3315 if (error) 3316 return (error); 3317 } 3318 get_mplock(); 3319 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3320 if (error == 0) 3321 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3322 nlookup_done(&nd); 3323 rel_mplock(); 3324 return (error); 3325 } 3326 3327 /* 3328 * Set utimes on a file descriptor. The creds used to open the 3329 * file are used to determine whether the operation is allowed 3330 * or not. 3331 */ 3332 int 3333 kern_futimes(int fd, struct timeval *tptr) 3334 { 3335 struct thread *td = curthread; 3336 struct proc *p = td->td_proc; 3337 struct timespec ts[2]; 3338 struct file *fp; 3339 struct vnode *vp; 3340 struct vattr vattr; 3341 int error; 3342 3343 error = getutimes(tptr, ts); 3344 if (error) 3345 return (error); 3346 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3347 return (error); 3348 if (fp->f_nchandle.ncp) 3349 error = ncp_writechk(&fp->f_nchandle); 3350 if (error == 0) { 3351 vp = fp->f_data; 3352 error = vget(vp, LK_EXCLUSIVE); 3353 if (error == 0) { 3354 error = VOP_GETATTR(vp, &vattr); 3355 if (error == 0) { 3356 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3357 fp->f_cred); 3358 } 3359 if (error == 0) { 3360 error = setutimes(vp, &vattr, ts, 3361 (tptr == NULL)); 3362 } 3363 vput(vp); 3364 } 3365 } 3366 fdrop(fp); 3367 return (error); 3368 } 3369 3370 /* 3371 * futimes_args(int fd, struct timeval *tptr) 3372 * 3373 * Set the access and modification times of a file. 3374 * 3375 * MPALMOSTSAFE 3376 */ 3377 int 3378 sys_futimes(struct futimes_args *uap) 3379 { 3380 struct timeval tv[2]; 3381 int error; 3382 3383 if (uap->tptr) { 3384 error = copyin(uap->tptr, tv, sizeof(tv)); 3385 if (error) 3386 return (error); 3387 } 3388 get_mplock(); 3389 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3390 rel_mplock(); 3391 3392 return (error); 3393 } 3394 3395 int 3396 kern_truncate(struct nlookupdata *nd, off_t length) 3397 { 3398 struct vnode *vp; 3399 struct vattr vattr; 3400 int error; 3401 3402 if (length < 0) 3403 return(EINVAL); 3404 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3405 if ((error = nlookup(nd)) != 0) 3406 return (error); 3407 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3408 return (error); 3409 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3410 return (error); 3411 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 3412 vrele(vp); 3413 return (error); 3414 } 3415 if (vp->v_type == VDIR) { 3416 error = EISDIR; 3417 } else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3418 VATTR_NULL(&vattr); 3419 vattr.va_size = length; 3420 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3421 } 3422 vput(vp); 3423 return (error); 3424 } 3425 3426 /* 3427 * truncate(char *path, int pad, off_t length) 3428 * 3429 * Truncate a file given its path name. 3430 * 3431 * MPALMOSTSAFE 3432 */ 3433 int 3434 sys_truncate(struct truncate_args *uap) 3435 { 3436 struct nlookupdata nd; 3437 int error; 3438 3439 get_mplock(); 3440 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3441 if (error == 0) 3442 error = kern_truncate(&nd, uap->length); 3443 nlookup_done(&nd); 3444 rel_mplock(); 3445 return error; 3446 } 3447 3448 int 3449 kern_ftruncate(int fd, off_t length) 3450 { 3451 struct thread *td = curthread; 3452 struct proc *p = td->td_proc; 3453 struct vattr vattr; 3454 struct vnode *vp; 3455 struct file *fp; 3456 int error; 3457 3458 if (length < 0) 3459 return(EINVAL); 3460 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3461 return (error); 3462 if (fp->f_nchandle.ncp) { 3463 error = ncp_writechk(&fp->f_nchandle); 3464 if (error) 3465 goto done; 3466 } 3467 if ((fp->f_flag & FWRITE) == 0) { 3468 error = EINVAL; 3469 goto done; 3470 } 3471 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3472 error = EINVAL; 3473 goto done; 3474 } 3475 vp = (struct vnode *)fp->f_data; 3476 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3477 if (vp->v_type == VDIR) { 3478 error = EISDIR; 3479 } else if ((error = vn_writechk(vp, NULL)) == 0) { 3480 VATTR_NULL(&vattr); 3481 vattr.va_size = length; 3482 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3483 } 3484 vn_unlock(vp); 3485 done: 3486 fdrop(fp); 3487 return (error); 3488 } 3489 3490 /* 3491 * ftruncate_args(int fd, int pad, off_t length) 3492 * 3493 * Truncate a file given a file descriptor. 3494 * 3495 * MPALMOSTSAFE 3496 */ 3497 int 3498 sys_ftruncate(struct ftruncate_args *uap) 3499 { 3500 int error; 3501 3502 get_mplock(); 3503 error = kern_ftruncate(uap->fd, uap->length); 3504 rel_mplock(); 3505 3506 return (error); 3507 } 3508 3509 /* 3510 * fsync(int fd) 3511 * 3512 * Sync an open file. 3513 * 3514 * MPALMOSTSAFE 3515 */ 3516 int 3517 sys_fsync(struct fsync_args *uap) 3518 { 3519 struct thread *td = curthread; 3520 struct proc *p = td->td_proc; 3521 struct vnode *vp; 3522 struct file *fp; 3523 vm_object_t obj; 3524 int error; 3525 3526 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3527 return (error); 3528 get_mplock(); 3529 vp = (struct vnode *)fp->f_data; 3530 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3531 if ((obj = vp->v_object) != NULL) 3532 vm_object_page_clean(obj, 0, 0, 0); 3533 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3534 if (error == 0 && vp->v_mount) 3535 error = buf_fsync(vp); 3536 vn_unlock(vp); 3537 rel_mplock(); 3538 fdrop(fp); 3539 3540 return (error); 3541 } 3542 3543 int 3544 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3545 { 3546 struct nchandle fnchd; 3547 struct nchandle tnchd; 3548 struct namecache *ncp; 3549 struct vnode *fdvp; 3550 struct vnode *tdvp; 3551 struct mount *mp; 3552 int error; 3553 3554 bwillinode(1); 3555 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3556 if ((error = nlookup(fromnd)) != 0) 3557 return (error); 3558 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3559 return (ENOENT); 3560 fnchd.mount = fromnd->nl_nch.mount; 3561 cache_hold(&fnchd); 3562 3563 /* 3564 * unlock the source nch so we can lookup the target nch without 3565 * deadlocking. The target may or may not exist so we do not check 3566 * for a target vp like kern_mkdir() and other creation functions do. 3567 * 3568 * The source and target directories are ref'd and rechecked after 3569 * everything is relocked to determine if the source or target file 3570 * has been renamed. 3571 */ 3572 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3573 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3574 cache_unlock(&fromnd->nl_nch); 3575 3576 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3577 if ((error = nlookup(tond)) != 0) { 3578 cache_drop(&fnchd); 3579 return (error); 3580 } 3581 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3582 cache_drop(&fnchd); 3583 return (ENOENT); 3584 } 3585 tnchd.mount = tond->nl_nch.mount; 3586 cache_hold(&tnchd); 3587 3588 /* 3589 * If the source and target are the same there is nothing to do 3590 */ 3591 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3592 cache_drop(&fnchd); 3593 cache_drop(&tnchd); 3594 return (0); 3595 } 3596 3597 /* 3598 * Mount points cannot be renamed or overwritten 3599 */ 3600 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3601 NCF_ISMOUNTPT 3602 ) { 3603 cache_drop(&fnchd); 3604 cache_drop(&tnchd); 3605 return (EINVAL); 3606 } 3607 3608 /* 3609 * Relock the source ncp. cache_relock() will deal with any 3610 * deadlocks against the already-locked tond and will also 3611 * make sure both are resolved. 3612 * 3613 * NOTE AFTER RELOCKING: The source or target ncp may have become 3614 * invalid while they were unlocked, nc_vp and nc_mount could 3615 * be NULL. 3616 */ 3617 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3618 &tond->nl_nch, tond->nl_cred); 3619 fromnd->nl_flags |= NLC_NCPISLOCKED; 3620 3621 /* 3622 * make sure the parent directories linkages are the same 3623 */ 3624 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3625 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3626 cache_drop(&fnchd); 3627 cache_drop(&tnchd); 3628 return (ENOENT); 3629 } 3630 3631 /* 3632 * Both the source and target must be within the same filesystem and 3633 * in the same filesystem as their parent directories within the 3634 * namecache topology. 3635 * 3636 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3637 */ 3638 mp = fnchd.mount; 3639 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3640 mp != tond->nl_nch.mount) { 3641 cache_drop(&fnchd); 3642 cache_drop(&tnchd); 3643 return (EXDEV); 3644 } 3645 3646 /* 3647 * Make sure the mount point is writable 3648 */ 3649 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3650 cache_drop(&fnchd); 3651 cache_drop(&tnchd); 3652 return (error); 3653 } 3654 3655 /* 3656 * If the target exists and either the source or target is a directory, 3657 * then both must be directories. 3658 * 3659 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3660 * have become NULL. 3661 */ 3662 if (tond->nl_nch.ncp->nc_vp) { 3663 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3664 error = ENOENT; 3665 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3666 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3667 error = ENOTDIR; 3668 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3669 error = EISDIR; 3670 } 3671 } 3672 3673 /* 3674 * You cannot rename a source into itself or a subdirectory of itself. 3675 * We check this by travsersing the target directory upwards looking 3676 * for a match against the source. 3677 * 3678 * XXX MPSAFE 3679 */ 3680 if (error == 0) { 3681 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3682 if (fromnd->nl_nch.ncp == ncp) { 3683 error = EINVAL; 3684 break; 3685 } 3686 } 3687 } 3688 3689 cache_drop(&fnchd); 3690 cache_drop(&tnchd); 3691 3692 /* 3693 * Even though the namespaces are different, they may still represent 3694 * hardlinks to the same file. The filesystem might have a hard time 3695 * with this so we issue a NREMOVE of the source instead of a NRENAME 3696 * when we detect the situation. 3697 */ 3698 if (error == 0) { 3699 fdvp = fromnd->nl_dvp; 3700 tdvp = tond->nl_dvp; 3701 if (fdvp == NULL || tdvp == NULL) { 3702 error = EPERM; 3703 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3704 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3705 fromnd->nl_cred); 3706 } else { 3707 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3708 fdvp, tdvp, tond->nl_cred); 3709 } 3710 } 3711 return (error); 3712 } 3713 3714 /* 3715 * rename_args(char *from, char *to) 3716 * 3717 * Rename files. Source and destination must either both be directories, 3718 * or both not be directories. If target is a directory, it must be empty. 3719 * 3720 * MPALMOSTSAFE 3721 */ 3722 int 3723 sys_rename(struct rename_args *uap) 3724 { 3725 struct nlookupdata fromnd, tond; 3726 int error; 3727 3728 get_mplock(); 3729 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3730 if (error == 0) { 3731 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3732 if (error == 0) 3733 error = kern_rename(&fromnd, &tond); 3734 nlookup_done(&tond); 3735 } 3736 nlookup_done(&fromnd); 3737 rel_mplock(); 3738 return (error); 3739 } 3740 3741 int 3742 kern_mkdir(struct nlookupdata *nd, int mode) 3743 { 3744 struct thread *td = curthread; 3745 struct proc *p = td->td_proc; 3746 struct vnode *vp; 3747 struct vattr vattr; 3748 int error; 3749 3750 bwillinode(1); 3751 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3752 if ((error = nlookup(nd)) != 0) 3753 return (error); 3754 3755 if (nd->nl_nch.ncp->nc_vp) 3756 return (EEXIST); 3757 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3758 return (error); 3759 VATTR_NULL(&vattr); 3760 vattr.va_type = VDIR; 3761 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3762 3763 vp = NULL; 3764 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 3765 if (error == 0) 3766 vput(vp); 3767 return (error); 3768 } 3769 3770 /* 3771 * mkdir_args(char *path, int mode) 3772 * 3773 * Make a directory file. 3774 * 3775 * MPALMOSTSAFE 3776 */ 3777 int 3778 sys_mkdir(struct mkdir_args *uap) 3779 { 3780 struct nlookupdata nd; 3781 int error; 3782 3783 get_mplock(); 3784 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3785 if (error == 0) 3786 error = kern_mkdir(&nd, uap->mode); 3787 nlookup_done(&nd); 3788 rel_mplock(); 3789 return (error); 3790 } 3791 3792 int 3793 kern_rmdir(struct nlookupdata *nd) 3794 { 3795 int error; 3796 3797 bwillinode(1); 3798 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 3799 if ((error = nlookup(nd)) != 0) 3800 return (error); 3801 3802 /* 3803 * Do not allow directories representing mount points to be 3804 * deleted, even if empty. Check write perms on mount point 3805 * in case the vnode is aliased (aka nullfs). 3806 */ 3807 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 3808 return (EINVAL); 3809 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3810 return (error); 3811 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 3812 return (error); 3813 } 3814 3815 /* 3816 * rmdir_args(char *path) 3817 * 3818 * Remove a directory file. 3819 * 3820 * MPALMOSTSAFE 3821 */ 3822 int 3823 sys_rmdir(struct rmdir_args *uap) 3824 { 3825 struct nlookupdata nd; 3826 int error; 3827 3828 get_mplock(); 3829 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3830 if (error == 0) 3831 error = kern_rmdir(&nd); 3832 nlookup_done(&nd); 3833 rel_mplock(); 3834 return (error); 3835 } 3836 3837 int 3838 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 3839 enum uio_seg direction) 3840 { 3841 struct thread *td = curthread; 3842 struct proc *p = td->td_proc; 3843 struct vnode *vp; 3844 struct file *fp; 3845 struct uio auio; 3846 struct iovec aiov; 3847 off_t loff; 3848 int error, eofflag; 3849 3850 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3851 return (error); 3852 if ((fp->f_flag & FREAD) == 0) { 3853 error = EBADF; 3854 goto done; 3855 } 3856 vp = (struct vnode *)fp->f_data; 3857 unionread: 3858 if (vp->v_type != VDIR) { 3859 error = EINVAL; 3860 goto done; 3861 } 3862 aiov.iov_base = buf; 3863 aiov.iov_len = count; 3864 auio.uio_iov = &aiov; 3865 auio.uio_iovcnt = 1; 3866 auio.uio_rw = UIO_READ; 3867 auio.uio_segflg = direction; 3868 auio.uio_td = td; 3869 auio.uio_resid = count; 3870 loff = auio.uio_offset = fp->f_offset; 3871 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 3872 fp->f_offset = auio.uio_offset; 3873 if (error) 3874 goto done; 3875 if (count == auio.uio_resid) { 3876 if (union_dircheckp) { 3877 error = union_dircheckp(td, &vp, fp); 3878 if (error == -1) 3879 goto unionread; 3880 if (error) 3881 goto done; 3882 } 3883 #if 0 3884 if ((vp->v_flag & VROOT) && 3885 (vp->v_mount->mnt_flag & MNT_UNION)) { 3886 struct vnode *tvp = vp; 3887 vp = vp->v_mount->mnt_vnodecovered; 3888 vref(vp); 3889 fp->f_data = vp; 3890 fp->f_offset = 0; 3891 vrele(tvp); 3892 goto unionread; 3893 } 3894 #endif 3895 } 3896 3897 /* 3898 * WARNING! *basep may not be wide enough to accomodate the 3899 * seek offset. XXX should we hack this to return the upper 32 bits 3900 * for offsets greater then 4G? 3901 */ 3902 if (basep) { 3903 *basep = (long)loff; 3904 } 3905 *res = count - auio.uio_resid; 3906 done: 3907 fdrop(fp); 3908 return (error); 3909 } 3910 3911 /* 3912 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 3913 * 3914 * Read a block of directory entries in a file system independent format. 3915 * 3916 * MPALMOSTSAFE 3917 */ 3918 int 3919 sys_getdirentries(struct getdirentries_args *uap) 3920 { 3921 long base; 3922 int error; 3923 3924 get_mplock(); 3925 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 3926 &uap->sysmsg_result, UIO_USERSPACE); 3927 rel_mplock(); 3928 3929 if (error == 0 && uap->basep) 3930 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 3931 return (error); 3932 } 3933 3934 /* 3935 * getdents_args(int fd, char *buf, size_t count) 3936 * 3937 * MPALMOSTSAFE 3938 */ 3939 int 3940 sys_getdents(struct getdents_args *uap) 3941 { 3942 int error; 3943 3944 get_mplock(); 3945 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 3946 &uap->sysmsg_result, UIO_USERSPACE); 3947 rel_mplock(); 3948 3949 return (error); 3950 } 3951 3952 /* 3953 * Set the mode mask for creation of filesystem nodes. 3954 * 3955 * umask(int newmask) 3956 * 3957 * MPSAFE 3958 */ 3959 int 3960 sys_umask(struct umask_args *uap) 3961 { 3962 struct thread *td = curthread; 3963 struct proc *p = td->td_proc; 3964 struct filedesc *fdp; 3965 3966 fdp = p->p_fd; 3967 uap->sysmsg_result = fdp->fd_cmask; 3968 fdp->fd_cmask = uap->newmask & ALLPERMS; 3969 return (0); 3970 } 3971 3972 /* 3973 * revoke(char *path) 3974 * 3975 * Void all references to file by ripping underlying filesystem 3976 * away from vnode. 3977 * 3978 * MPALMOSTSAFE 3979 */ 3980 int 3981 sys_revoke(struct revoke_args *uap) 3982 { 3983 struct nlookupdata nd; 3984 struct vattr vattr; 3985 struct vnode *vp; 3986 struct ucred *cred; 3987 int error; 3988 3989 vp = NULL; 3990 get_mplock(); 3991 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3992 if (error == 0) 3993 error = nlookup(&nd); 3994 if (error == 0) 3995 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3996 cred = crhold(nd.nl_cred); 3997 nlookup_done(&nd); 3998 if (error == 0) { 3999 if (error == 0) 4000 error = VOP_GETATTR(vp, &vattr); 4001 if (error == 0 && cred->cr_uid != vattr.va_uid) 4002 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4003 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4004 if (vcount(vp) > 0) 4005 error = vrevoke(vp, cred); 4006 } else if (error == 0) { 4007 error = vrevoke(vp, cred); 4008 } 4009 vrele(vp); 4010 } 4011 if (cred) 4012 crfree(cred); 4013 rel_mplock(); 4014 return (error); 4015 } 4016 4017 /* 4018 * getfh_args(char *fname, fhandle_t *fhp) 4019 * 4020 * Get (NFS) file handle 4021 * 4022 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4023 * mount. This allows nullfs mounts to be explicitly exported. 4024 * 4025 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4026 * 4027 * nullfs mounts of subdirectories are not safe. That is, it will 4028 * work, but you do not really have protection against access to 4029 * the related parent directories. 4030 * 4031 * MPALMOSTSAFE 4032 */ 4033 int 4034 sys_getfh(struct getfh_args *uap) 4035 { 4036 struct thread *td = curthread; 4037 struct nlookupdata nd; 4038 fhandle_t fh; 4039 struct vnode *vp; 4040 struct mount *mp; 4041 int error; 4042 4043 /* 4044 * Must be super user 4045 */ 4046 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4047 return (error); 4048 4049 vp = NULL; 4050 get_mplock(); 4051 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4052 if (error == 0) 4053 error = nlookup(&nd); 4054 if (error == 0) 4055 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4056 mp = nd.nl_nch.mount; 4057 nlookup_done(&nd); 4058 if (error == 0) { 4059 bzero(&fh, sizeof(fh)); 4060 fh.fh_fsid = mp->mnt_stat.f_fsid; 4061 error = VFS_VPTOFH(vp, &fh.fh_fid); 4062 vput(vp); 4063 if (error == 0) 4064 error = copyout(&fh, uap->fhp, sizeof(fh)); 4065 } 4066 rel_mplock(); 4067 return (error); 4068 } 4069 4070 /* 4071 * fhopen_args(const struct fhandle *u_fhp, int flags) 4072 * 4073 * syscall for the rpc.lockd to use to translate a NFS file handle into 4074 * an open descriptor. 4075 * 4076 * warning: do not remove the priv_check() call or this becomes one giant 4077 * security hole. 4078 * 4079 * MPALMOSTSAFE 4080 */ 4081 int 4082 sys_fhopen(struct fhopen_args *uap) 4083 { 4084 struct thread *td = curthread; 4085 struct filedesc *fdp = td->td_proc->p_fd; 4086 struct mount *mp; 4087 struct vnode *vp; 4088 struct fhandle fhp; 4089 struct vattr vat; 4090 struct vattr *vap = &vat; 4091 struct flock lf; 4092 int fmode, mode, error, type; 4093 struct file *nfp; 4094 struct file *fp; 4095 int indx; 4096 4097 /* 4098 * Must be super user 4099 */ 4100 error = priv_check(td, PRIV_ROOT); 4101 if (error) 4102 return (error); 4103 4104 fmode = FFLAGS(uap->flags); 4105 4106 /* 4107 * Why not allow a non-read/write open for our lockd? 4108 */ 4109 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4110 return (EINVAL); 4111 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4112 if (error) 4113 return(error); 4114 4115 /* 4116 * Find the mount point 4117 */ 4118 get_mplock(); 4119 mp = vfs_getvfs(&fhp.fh_fsid); 4120 if (mp == NULL) { 4121 error = ESTALE; 4122 goto done; 4123 } 4124 /* now give me my vnode, it gets returned to me locked */ 4125 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4126 if (error) 4127 goto done; 4128 /* 4129 * from now on we have to make sure not 4130 * to forget about the vnode 4131 * any error that causes an abort must vput(vp) 4132 * just set error = err and 'goto bad;'. 4133 */ 4134 4135 /* 4136 * from vn_open 4137 */ 4138 if (vp->v_type == VLNK) { 4139 error = EMLINK; 4140 goto bad; 4141 } 4142 if (vp->v_type == VSOCK) { 4143 error = EOPNOTSUPP; 4144 goto bad; 4145 } 4146 mode = 0; 4147 if (fmode & (FWRITE | O_TRUNC)) { 4148 if (vp->v_type == VDIR) { 4149 error = EISDIR; 4150 goto bad; 4151 } 4152 error = vn_writechk(vp, NULL); 4153 if (error) 4154 goto bad; 4155 mode |= VWRITE; 4156 } 4157 if (fmode & FREAD) 4158 mode |= VREAD; 4159 if (mode) { 4160 error = VOP_ACCESS(vp, mode, td->td_ucred); 4161 if (error) 4162 goto bad; 4163 } 4164 if (fmode & O_TRUNC) { 4165 vn_unlock(vp); /* XXX */ 4166 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4167 VATTR_NULL(vap); 4168 vap->va_size = 0; 4169 error = VOP_SETATTR(vp, vap, td->td_ucred); 4170 if (error) 4171 goto bad; 4172 } 4173 4174 /* 4175 * VOP_OPEN needs the file pointer so it can potentially override 4176 * it. 4177 * 4178 * WARNING! no f_nchandle will be associated when fhopen()ing a 4179 * directory. XXX 4180 */ 4181 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4182 goto bad; 4183 fp = nfp; 4184 4185 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4186 if (error) { 4187 /* 4188 * setting f_ops this way prevents VOP_CLOSE from being 4189 * called or fdrop() releasing the vp from v_data. Since 4190 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4191 */ 4192 fp->f_ops = &badfileops; 4193 fp->f_data = NULL; 4194 goto bad_drop; 4195 } 4196 4197 /* 4198 * The fp is given its own reference, we still have our ref and lock. 4199 * 4200 * Assert that all regular files must be created with a VM object. 4201 */ 4202 if (vp->v_type == VREG && vp->v_object == NULL) { 4203 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4204 goto bad_drop; 4205 } 4206 4207 /* 4208 * The open was successful. Handle any locking requirements. 4209 */ 4210 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4211 lf.l_whence = SEEK_SET; 4212 lf.l_start = 0; 4213 lf.l_len = 0; 4214 if (fmode & O_EXLOCK) 4215 lf.l_type = F_WRLCK; 4216 else 4217 lf.l_type = F_RDLCK; 4218 if (fmode & FNONBLOCK) 4219 type = 0; 4220 else 4221 type = F_WAIT; 4222 vn_unlock(vp); 4223 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4224 /* 4225 * release our private reference. 4226 */ 4227 fsetfd(fdp, NULL, indx); 4228 fdrop(fp); 4229 vrele(vp); 4230 goto done; 4231 } 4232 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4233 fp->f_flag |= FHASLOCK; 4234 } 4235 4236 /* 4237 * Clean up. Associate the file pointer with the previously 4238 * reserved descriptor and return it. 4239 */ 4240 vput(vp); 4241 rel_mplock(); 4242 fsetfd(fdp, fp, indx); 4243 fdrop(fp); 4244 uap->sysmsg_result = indx; 4245 return (0); 4246 4247 bad_drop: 4248 fsetfd(fdp, NULL, indx); 4249 fdrop(fp); 4250 bad: 4251 vput(vp); 4252 done: 4253 rel_mplock(); 4254 return (error); 4255 } 4256 4257 /* 4258 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4259 * 4260 * MPALMOSTSAFE 4261 */ 4262 int 4263 sys_fhstat(struct fhstat_args *uap) 4264 { 4265 struct thread *td = curthread; 4266 struct stat sb; 4267 fhandle_t fh; 4268 struct mount *mp; 4269 struct vnode *vp; 4270 int error; 4271 4272 /* 4273 * Must be super user 4274 */ 4275 error = priv_check(td, PRIV_ROOT); 4276 if (error) 4277 return (error); 4278 4279 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4280 if (error) 4281 return (error); 4282 4283 get_mplock(); 4284 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4285 error = ESTALE; 4286 if (error == 0) { 4287 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4288 error = vn_stat(vp, &sb, td->td_ucred); 4289 vput(vp); 4290 } 4291 } 4292 rel_mplock(); 4293 if (error == 0) 4294 error = copyout(&sb, uap->sb, sizeof(sb)); 4295 return (error); 4296 } 4297 4298 /* 4299 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4300 * 4301 * MPALMOSTSAFE 4302 */ 4303 int 4304 sys_fhstatfs(struct fhstatfs_args *uap) 4305 { 4306 struct thread *td = curthread; 4307 struct proc *p = td->td_proc; 4308 struct statfs *sp; 4309 struct mount *mp; 4310 struct vnode *vp; 4311 struct statfs sb; 4312 char *fullpath, *freepath; 4313 fhandle_t fh; 4314 int error; 4315 4316 /* 4317 * Must be super user 4318 */ 4319 if ((error = priv_check(td, PRIV_ROOT))) 4320 return (error); 4321 4322 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4323 return (error); 4324 4325 get_mplock(); 4326 4327 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4328 error = ESTALE; 4329 goto done; 4330 } 4331 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4332 error = ESTALE; 4333 goto done; 4334 } 4335 4336 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4337 goto done; 4338 mp = vp->v_mount; 4339 sp = &mp->mnt_stat; 4340 vput(vp); 4341 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4342 goto done; 4343 4344 error = mount_path(p, mp, &fullpath, &freepath); 4345 if (error) 4346 goto done; 4347 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4348 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4349 kfree(freepath, M_TEMP); 4350 4351 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4352 if (priv_check(td, PRIV_ROOT)) { 4353 bcopy(sp, &sb, sizeof(sb)); 4354 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4355 sp = &sb; 4356 } 4357 error = copyout(sp, uap->buf, sizeof(*sp)); 4358 done: 4359 rel_mplock(); 4360 return (error); 4361 } 4362 4363 /* 4364 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4365 * 4366 * MPALMOSTSAFE 4367 */ 4368 int 4369 sys_fhstatvfs(struct fhstatvfs_args *uap) 4370 { 4371 struct thread *td = curthread; 4372 struct proc *p = td->td_proc; 4373 struct statvfs *sp; 4374 struct mount *mp; 4375 struct vnode *vp; 4376 fhandle_t fh; 4377 int error; 4378 4379 /* 4380 * Must be super user 4381 */ 4382 if ((error = priv_check(td, PRIV_ROOT))) 4383 return (error); 4384 4385 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4386 return (error); 4387 4388 get_mplock(); 4389 4390 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4391 error = ESTALE; 4392 goto done; 4393 } 4394 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4395 error = ESTALE; 4396 goto done; 4397 } 4398 4399 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4400 goto done; 4401 mp = vp->v_mount; 4402 sp = &mp->mnt_vstat; 4403 vput(vp); 4404 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4405 goto done; 4406 4407 sp->f_flag = 0; 4408 if (mp->mnt_flag & MNT_RDONLY) 4409 sp->f_flag |= ST_RDONLY; 4410 if (mp->mnt_flag & MNT_NOSUID) 4411 sp->f_flag |= ST_NOSUID; 4412 error = copyout(sp, uap->buf, sizeof(*sp)); 4413 done: 4414 rel_mplock(); 4415 return (error); 4416 } 4417 4418 4419 /* 4420 * Syscall to push extended attribute configuration information into the 4421 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4422 * a command (int cmd), and attribute name and misc data. For now, the 4423 * attribute name is left in userspace for consumption by the VFS_op. 4424 * It will probably be changed to be copied into sysspace by the 4425 * syscall in the future, once issues with various consumers of the 4426 * attribute code have raised their hands. 4427 * 4428 * Currently this is used only by UFS Extended Attributes. 4429 * 4430 * MPALMOSTSAFE 4431 */ 4432 int 4433 sys_extattrctl(struct extattrctl_args *uap) 4434 { 4435 struct nlookupdata nd; 4436 struct vnode *vp; 4437 char attrname[EXTATTR_MAXNAMELEN]; 4438 int error; 4439 size_t size; 4440 4441 get_mplock(); 4442 4443 attrname[0] = 0; 4444 vp = NULL; 4445 error = 0; 4446 4447 if (error == 0 && uap->filename) { 4448 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4449 NLC_FOLLOW); 4450 if (error == 0) 4451 error = nlookup(&nd); 4452 if (error == 0) 4453 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4454 nlookup_done(&nd); 4455 } 4456 4457 if (error == 0 && uap->attrname) { 4458 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4459 &size); 4460 } 4461 4462 if (error == 0) { 4463 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4464 if (error == 0) 4465 error = nlookup(&nd); 4466 if (error == 0) 4467 error = ncp_writechk(&nd.nl_nch); 4468 if (error == 0) { 4469 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4470 uap->attrnamespace, 4471 uap->attrname, nd.nl_cred); 4472 } 4473 nlookup_done(&nd); 4474 } 4475 4476 rel_mplock(); 4477 4478 return (error); 4479 } 4480 4481 /* 4482 * Syscall to get a named extended attribute on a file or directory. 4483 * 4484 * MPALMOSTSAFE 4485 */ 4486 int 4487 sys_extattr_set_file(struct extattr_set_file_args *uap) 4488 { 4489 char attrname[EXTATTR_MAXNAMELEN]; 4490 struct nlookupdata nd; 4491 struct vnode *vp; 4492 struct uio auio; 4493 struct iovec aiov; 4494 int error; 4495 4496 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4497 if (error) 4498 return (error); 4499 4500 vp = NULL; 4501 get_mplock(); 4502 4503 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4504 if (error == 0) 4505 error = nlookup(&nd); 4506 if (error == 0) 4507 error = ncp_writechk(&nd.nl_nch); 4508 if (error == 0) 4509 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4510 if (error) { 4511 nlookup_done(&nd); 4512 rel_mplock(); 4513 return (error); 4514 } 4515 4516 bzero(&auio, sizeof(auio)); 4517 aiov.iov_base = uap->data; 4518 aiov.iov_len = uap->nbytes; 4519 auio.uio_iov = &aiov; 4520 auio.uio_iovcnt = 1; 4521 auio.uio_offset = 0; 4522 auio.uio_resid = uap->nbytes; 4523 auio.uio_rw = UIO_WRITE; 4524 auio.uio_td = curthread; 4525 4526 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4527 &auio, nd.nl_cred); 4528 4529 vput(vp); 4530 nlookup_done(&nd); 4531 rel_mplock(); 4532 return (error); 4533 } 4534 4535 /* 4536 * Syscall to get a named extended attribute on a file or directory. 4537 * 4538 * MPALMOSTSAFE 4539 */ 4540 int 4541 sys_extattr_get_file(struct extattr_get_file_args *uap) 4542 { 4543 char attrname[EXTATTR_MAXNAMELEN]; 4544 struct nlookupdata nd; 4545 struct uio auio; 4546 struct iovec aiov; 4547 struct vnode *vp; 4548 int error; 4549 4550 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4551 if (error) 4552 return (error); 4553 4554 vp = NULL; 4555 get_mplock(); 4556 4557 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4558 if (error == 0) 4559 error = nlookup(&nd); 4560 if (error == 0) 4561 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4562 if (error) { 4563 nlookup_done(&nd); 4564 rel_mplock(); 4565 return (error); 4566 } 4567 4568 bzero(&auio, sizeof(auio)); 4569 aiov.iov_base = uap->data; 4570 aiov.iov_len = uap->nbytes; 4571 auio.uio_iov = &aiov; 4572 auio.uio_iovcnt = 1; 4573 auio.uio_offset = 0; 4574 auio.uio_resid = uap->nbytes; 4575 auio.uio_rw = UIO_READ; 4576 auio.uio_td = curthread; 4577 4578 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4579 &auio, nd.nl_cred); 4580 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4581 4582 vput(vp); 4583 nlookup_done(&nd); 4584 rel_mplock(); 4585 return(error); 4586 } 4587 4588 /* 4589 * Syscall to delete a named extended attribute from a file or directory. 4590 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4591 * 4592 * MPALMOSTSAFE 4593 */ 4594 int 4595 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4596 { 4597 char attrname[EXTATTR_MAXNAMELEN]; 4598 struct nlookupdata nd; 4599 struct vnode *vp; 4600 int error; 4601 4602 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4603 if (error) 4604 return(error); 4605 4606 get_mplock(); 4607 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4608 if (error == 0) 4609 error = nlookup(&nd); 4610 if (error == 0) 4611 error = ncp_writechk(&nd.nl_nch); 4612 if (error == 0) { 4613 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4614 if (error == 0) { 4615 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4616 attrname, NULL, nd.nl_cred); 4617 vput(vp); 4618 } 4619 } 4620 nlookup_done(&nd); 4621 rel_mplock(); 4622 return(error); 4623 } 4624 4625 /* 4626 * Determine if the mount is visible to the process. 4627 */ 4628 static int 4629 chroot_visible_mnt(struct mount *mp, struct proc *p) 4630 { 4631 struct nchandle nch; 4632 4633 /* 4634 * Traverse from the mount point upwards. If we hit the process 4635 * root then the mount point is visible to the process. 4636 */ 4637 nch = mp->mnt_ncmountpt; 4638 while (nch.ncp) { 4639 if (nch.mount == p->p_fd->fd_nrdir.mount && 4640 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4641 return(1); 4642 } 4643 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4644 nch = nch.mount->mnt_ncmounton; 4645 } else { 4646 nch.ncp = nch.ncp->nc_parent; 4647 } 4648 } 4649 4650 /* 4651 * If the mount point is not visible to the process, but the 4652 * process root is in a subdirectory of the mount, return 4653 * TRUE anyway. 4654 */ 4655 if (p->p_fd->fd_nrdir.mount == mp) 4656 return(1); 4657 4658 return(0); 4659 } 4660 4661