1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 #include <sys/mplock2.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 76 #include <machine/limits.h> 77 #include <machine/stdarg.h> 78 79 #include <vfs/union/union.h> 80 81 static void mount_warning(struct mount *mp, const char *ctl, ...) 82 __printflike(2, 3); 83 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 84 static int checkvp_chdir (struct vnode *vn, struct thread *td); 85 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 86 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 87 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 88 static int getutimes (const struct timeval *, struct timespec *); 89 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 90 static int setfmode (struct vnode *, int); 91 static int setfflags (struct vnode *, int); 92 static int setutimes (struct vnode *, struct vattr *, 93 const struct timespec *, int); 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 96 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 97 98 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 99 "Allow non-root users to mount filesystems"); 100 101 /* 102 * Virtual File System System Calls 103 */ 104 105 /* 106 * Mount a file system. 107 * 108 * mount_args(char *type, char *path, int flags, caddr_t data) 109 * 110 * MPALMOSTSAFE 111 */ 112 int 113 sys_mount(struct mount_args *uap) 114 { 115 struct thread *td = curthread; 116 struct vnode *vp; 117 struct nchandle nch; 118 struct mount *mp, *nullmp; 119 struct vfsconf *vfsp; 120 int error, flag = 0, flag2 = 0; 121 int hasmount; 122 struct vattr va; 123 struct nlookupdata nd; 124 char fstypename[MFSNAMELEN]; 125 struct ucred *cred; 126 127 cred = td->td_ucred; 128 if (jailed(cred)) { 129 error = EPERM; 130 goto done; 131 } 132 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 133 goto done; 134 135 /* 136 * Do not allow NFS export by non-root users. 137 */ 138 if (uap->flags & MNT_EXPORTED) { 139 error = priv_check(td, PRIV_ROOT); 140 if (error) 141 goto done; 142 } 143 /* 144 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 145 */ 146 if (priv_check(td, PRIV_ROOT)) 147 uap->flags |= MNT_NOSUID | MNT_NODEV; 148 149 /* 150 * Lookup the requested path and extract the nch and vnode. 151 */ 152 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 153 if (error == 0) { 154 if ((error = nlookup(&nd)) == 0) { 155 if (nd.nl_nch.ncp->nc_vp == NULL) 156 error = ENOENT; 157 } 158 } 159 if (error) { 160 nlookup_done(&nd); 161 goto done; 162 } 163 164 /* 165 * If the target filesystem is resolved via a nullfs mount, then 166 * nd.nl_nch.mount will be pointing to the nullfs mount structure 167 * instead of the target file system. We need it in case we are 168 * doing an update. 169 */ 170 nullmp = nd.nl_nch.mount; 171 172 /* 173 * Extract the locked+refd ncp and cleanup the nd structure 174 */ 175 nch = nd.nl_nch; 176 cache_zero(&nd.nl_nch); 177 nlookup_done(&nd); 178 179 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 180 (mp = cache_findmount(&nch)) != NULL) { 181 cache_dropmount(mp); 182 hasmount = 1; 183 } else { 184 hasmount = 0; 185 } 186 187 188 /* 189 * now we have the locked ref'd nch and unreferenced vnode. 190 */ 191 vp = nch.ncp->nc_vp; 192 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 193 cache_put(&nch); 194 goto done; 195 } 196 cache_unlock(&nch); 197 198 /* 199 * Extract the file system type. We need to know this early, to take 200 * appropriate actions if we are dealing with a nullfs. 201 */ 202 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 203 cache_drop(&nch); 204 vput(vp); 205 goto done; 206 } 207 208 /* 209 * Now we have an unlocked ref'd nch and a locked ref'd vp 210 */ 211 if (uap->flags & MNT_UPDATE) { 212 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 213 cache_drop(&nch); 214 vput(vp); 215 error = EINVAL; 216 goto done; 217 } 218 219 if (strncmp(fstypename, "null", 5) == 0) { 220 KKASSERT(nullmp); 221 mp = nullmp; 222 } else { 223 mp = vp->v_mount; 224 } 225 226 flag = mp->mnt_flag; 227 flag2 = mp->mnt_kern_flag; 228 /* 229 * We only allow the filesystem to be reloaded if it 230 * is currently mounted read-only. 231 */ 232 if ((uap->flags & MNT_RELOAD) && 233 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 234 cache_drop(&nch); 235 vput(vp); 236 error = EOPNOTSUPP; /* Needs translation */ 237 goto done; 238 } 239 /* 240 * Only root, or the user that did the original mount is 241 * permitted to update it. 242 */ 243 if (mp->mnt_stat.f_owner != cred->cr_uid && 244 (error = priv_check(td, PRIV_ROOT))) { 245 cache_drop(&nch); 246 vput(vp); 247 goto done; 248 } 249 if (vfs_busy(mp, LK_NOWAIT)) { 250 cache_drop(&nch); 251 vput(vp); 252 error = EBUSY; 253 goto done; 254 } 255 if (hasmount) { 256 cache_drop(&nch); 257 vfs_unbusy(mp); 258 vput(vp); 259 error = EBUSY; 260 goto done; 261 } 262 mp->mnt_flag |= 263 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 264 lwkt_gettoken(&mp->mnt_token); 265 vn_unlock(vp); 266 goto update; 267 } 268 269 /* 270 * If the user is not root, ensure that they own the directory 271 * onto which we are attempting to mount. 272 */ 273 if ((error = VOP_GETATTR(vp, &va)) || 274 (va.va_uid != cred->cr_uid && 275 (error = priv_check(td, PRIV_ROOT)))) { 276 cache_drop(&nch); 277 vput(vp); 278 goto done; 279 } 280 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 281 cache_drop(&nch); 282 vput(vp); 283 goto done; 284 } 285 if (vp->v_type != VDIR) { 286 cache_drop(&nch); 287 vput(vp); 288 error = ENOTDIR; 289 goto done; 290 } 291 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 292 cache_drop(&nch); 293 vput(vp); 294 error = EPERM; 295 goto done; 296 } 297 vfsp = vfsconf_find_by_name(fstypename); 298 if (vfsp == NULL) { 299 linker_file_t lf; 300 301 /* Only load modules for root (very important!) */ 302 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 303 cache_drop(&nch); 304 vput(vp); 305 goto done; 306 } 307 error = linker_load_file(fstypename, &lf); 308 if (error || lf == NULL) { 309 cache_drop(&nch); 310 vput(vp); 311 if (lf == NULL) 312 error = ENODEV; 313 goto done; 314 } 315 lf->userrefs++; 316 /* lookup again, see if the VFS was loaded */ 317 vfsp = vfsconf_find_by_name(fstypename); 318 if (vfsp == NULL) { 319 lf->userrefs--; 320 linker_file_unload(lf); 321 cache_drop(&nch); 322 vput(vp); 323 error = ENODEV; 324 goto done; 325 } 326 } 327 if (hasmount) { 328 cache_drop(&nch); 329 vput(vp); 330 error = EBUSY; 331 goto done; 332 } 333 334 /* 335 * Allocate and initialize the filesystem. 336 */ 337 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 338 mount_init(mp); 339 vfs_busy(mp, LK_NOWAIT); 340 mp->mnt_op = vfsp->vfc_vfsops; 341 mp->mnt_vfc = vfsp; 342 vfsp->vfc_refcount++; 343 mp->mnt_stat.f_type = vfsp->vfc_typenum; 344 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 345 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 346 mp->mnt_stat.f_owner = cred->cr_uid; 347 lwkt_gettoken(&mp->mnt_token); 348 vn_unlock(vp); 349 update: 350 /* 351 * (per-mount token acquired at this point) 352 * 353 * Set the mount level flags. 354 */ 355 if (uap->flags & MNT_RDONLY) 356 mp->mnt_flag |= MNT_RDONLY; 357 else if (mp->mnt_flag & MNT_RDONLY) 358 mp->mnt_kern_flag |= MNTK_WANTRDWR; 359 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 360 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 361 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 362 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 363 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 364 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 365 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 366 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 367 /* 368 * Mount the filesystem. 369 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 370 * get. 371 */ 372 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 373 if (mp->mnt_flag & MNT_UPDATE) { 374 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 375 mp->mnt_flag &= ~MNT_RDONLY; 376 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 377 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 378 if (error) { 379 mp->mnt_flag = flag; 380 mp->mnt_kern_flag = flag2; 381 } 382 lwkt_reltoken(&mp->mnt_token); 383 vfs_unbusy(mp); 384 vrele(vp); 385 cache_drop(&nch); 386 goto done; 387 } 388 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 389 390 /* 391 * Put the new filesystem on the mount list after root. The mount 392 * point gets its own mnt_ncmountpt (unless the VFS already set one 393 * up) which represents the root of the mount. The lookup code 394 * detects the mount point going forward and checks the root of 395 * the mount going backwards. 396 * 397 * It is not necessary to invalidate or purge the vnode underneath 398 * because elements under the mount will be given their own glue 399 * namecache record. 400 */ 401 if (!error) { 402 if (mp->mnt_ncmountpt.ncp == NULL) { 403 /* 404 * allocate, then unlock, but leave the ref intact 405 */ 406 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 407 cache_unlock(&mp->mnt_ncmountpt); 408 } 409 mp->mnt_ncmounton = nch; /* inherits ref */ 410 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 411 cache_ismounting(mp); 412 413 mountlist_insert(mp, MNTINS_LAST); 414 vn_unlock(vp); 415 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 416 error = vfs_allocate_syncvnode(mp); 417 lwkt_reltoken(&mp->mnt_token); 418 vfs_unbusy(mp); 419 error = VFS_START(mp, 0); 420 vrele(vp); 421 } else { 422 vn_syncer_thr_stop(mp); 423 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 424 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 425 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 426 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 427 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 428 mp->mnt_vfc->vfc_refcount--; 429 lwkt_reltoken(&mp->mnt_token); 430 vfs_unbusy(mp); 431 kfree(mp, M_MOUNT); 432 cache_drop(&nch); 433 vput(vp); 434 } 435 done: 436 return (error); 437 } 438 439 /* 440 * Scan all active processes to see if any of them have a current 441 * or root directory onto which the new filesystem has just been 442 * mounted. If so, replace them with the new mount point. 443 * 444 * Both old_nch and new_nch are ref'd on call but not locked. 445 * new_nch must be temporarily locked so it can be associated with the 446 * vnode representing the root of the mount point. 447 */ 448 struct checkdirs_info { 449 struct nchandle old_nch; 450 struct nchandle new_nch; 451 struct vnode *old_vp; 452 struct vnode *new_vp; 453 }; 454 455 static int checkdirs_callback(struct proc *p, void *data); 456 457 static void 458 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 459 { 460 struct checkdirs_info info; 461 struct vnode *olddp; 462 struct vnode *newdp; 463 struct mount *mp; 464 465 /* 466 * If the old mount point's vnode has a usecount of 1, it is not 467 * being held as a descriptor anywhere. 468 */ 469 olddp = old_nch->ncp->nc_vp; 470 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 471 return; 472 473 /* 474 * Force the root vnode of the new mount point to be resolved 475 * so we can update any matching processes. 476 */ 477 mp = new_nch->mount; 478 if (VFS_ROOT(mp, &newdp)) 479 panic("mount: lost mount"); 480 vn_unlock(newdp); 481 cache_lock(new_nch); 482 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 483 cache_setunresolved(new_nch); 484 cache_setvp(new_nch, newdp); 485 cache_unlock(new_nch); 486 487 /* 488 * Special handling of the root node 489 */ 490 if (rootvnode == olddp) { 491 vref(newdp); 492 vfs_cache_setroot(newdp, cache_hold(new_nch)); 493 } 494 495 /* 496 * Pass newdp separately so the callback does not have to access 497 * it via new_nch->ncp->nc_vp. 498 */ 499 info.old_nch = *old_nch; 500 info.new_nch = *new_nch; 501 info.new_vp = newdp; 502 allproc_scan(checkdirs_callback, &info); 503 vput(newdp); 504 } 505 506 /* 507 * NOTE: callback is not MP safe because the scanned process's filedesc 508 * structure can be ripped out from under us, amoung other things. 509 */ 510 static int 511 checkdirs_callback(struct proc *p, void *data) 512 { 513 struct checkdirs_info *info = data; 514 struct filedesc *fdp; 515 struct nchandle ncdrop1; 516 struct nchandle ncdrop2; 517 struct vnode *vprele1; 518 struct vnode *vprele2; 519 520 if ((fdp = p->p_fd) != NULL) { 521 cache_zero(&ncdrop1); 522 cache_zero(&ncdrop2); 523 vprele1 = NULL; 524 vprele2 = NULL; 525 526 /* 527 * MPUNSAFE - XXX fdp can be pulled out from under a 528 * foreign process. 529 * 530 * A shared filedesc is ok, we don't have to copy it 531 * because we are making this change globally. 532 */ 533 spin_lock(&fdp->fd_spin); 534 if (fdp->fd_ncdir.mount == info->old_nch.mount && 535 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 536 vprele1 = fdp->fd_cdir; 537 vref(info->new_vp); 538 fdp->fd_cdir = info->new_vp; 539 ncdrop1 = fdp->fd_ncdir; 540 cache_copy(&info->new_nch, &fdp->fd_ncdir); 541 } 542 if (fdp->fd_nrdir.mount == info->old_nch.mount && 543 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 544 vprele2 = fdp->fd_rdir; 545 vref(info->new_vp); 546 fdp->fd_rdir = info->new_vp; 547 ncdrop2 = fdp->fd_nrdir; 548 cache_copy(&info->new_nch, &fdp->fd_nrdir); 549 } 550 spin_unlock(&fdp->fd_spin); 551 if (ncdrop1.ncp) 552 cache_drop(&ncdrop1); 553 if (ncdrop2.ncp) 554 cache_drop(&ncdrop2); 555 if (vprele1) 556 vrele(vprele1); 557 if (vprele2) 558 vrele(vprele2); 559 } 560 return(0); 561 } 562 563 /* 564 * Unmount a file system. 565 * 566 * Note: unmount takes a path to the vnode mounted on as argument, 567 * not special file (as before). 568 * 569 * umount_args(char *path, int flags) 570 * 571 * MPALMOSTSAFE 572 */ 573 int 574 sys_unmount(struct unmount_args *uap) 575 { 576 struct thread *td = curthread; 577 struct proc *p __debugvar = td->td_proc; 578 struct mount *mp = NULL; 579 struct nlookupdata nd; 580 int error; 581 582 KKASSERT(p); 583 get_mplock(); 584 if (td->td_ucred->cr_prison != NULL) { 585 error = EPERM; 586 goto done; 587 } 588 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 589 goto done; 590 591 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 592 if (error == 0) 593 error = nlookup(&nd); 594 if (error) 595 goto out; 596 597 mp = nd.nl_nch.mount; 598 599 /* 600 * Only root, or the user that did the original mount is 601 * permitted to unmount this filesystem. 602 */ 603 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 604 (error = priv_check(td, PRIV_ROOT))) 605 goto out; 606 607 /* 608 * Don't allow unmounting the root file system. 609 */ 610 if (mp->mnt_flag & MNT_ROOTFS) { 611 error = EINVAL; 612 goto out; 613 } 614 615 /* 616 * Must be the root of the filesystem 617 */ 618 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 619 error = EINVAL; 620 goto out; 621 } 622 623 out: 624 nlookup_done(&nd); 625 if (error == 0) 626 error = dounmount(mp, uap->flags); 627 done: 628 rel_mplock(); 629 return (error); 630 } 631 632 /* 633 * Do the actual file system unmount. 634 */ 635 static int 636 dounmount_interlock(struct mount *mp) 637 { 638 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 639 return (EBUSY); 640 mp->mnt_kern_flag |= MNTK_UNMOUNT; 641 return(0); 642 } 643 644 static int 645 unmount_allproc_cb(struct proc *p, void *arg) 646 { 647 struct mount *mp; 648 649 if (p->p_textnch.ncp == NULL) 650 return 0; 651 652 mp = (struct mount *)arg; 653 if (p->p_textnch.mount == mp) 654 cache_drop(&p->p_textnch); 655 656 return 0; 657 } 658 659 int 660 dounmount(struct mount *mp, int flags) 661 { 662 struct namecache *ncp; 663 struct nchandle nch; 664 struct vnode *vp; 665 int error; 666 int async_flag; 667 int lflags; 668 int freeok = 1; 669 int retry; 670 671 lwkt_gettoken(&mp->mnt_token); 672 /* 673 * Exclusive access for unmounting purposes 674 */ 675 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 676 goto out; 677 678 /* 679 * Allow filesystems to detect that a forced unmount is in progress. 680 */ 681 if (flags & MNT_FORCE) 682 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 683 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 684 error = lockmgr(&mp->mnt_lock, lflags); 685 if (error) { 686 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 687 if (mp->mnt_kern_flag & MNTK_MWAIT) { 688 mp->mnt_kern_flag &= ~MNTK_MWAIT; 689 wakeup(mp); 690 } 691 goto out; 692 } 693 694 if (mp->mnt_flag & MNT_EXPUBLIC) 695 vfs_setpublicfs(NULL, NULL, NULL); 696 697 vfs_msync(mp, MNT_WAIT); 698 async_flag = mp->mnt_flag & MNT_ASYNC; 699 mp->mnt_flag &=~ MNT_ASYNC; 700 701 /* 702 * If this filesystem isn't aliasing other filesystems, 703 * try to invalidate any remaining namecache entries and 704 * check the count afterwords. 705 */ 706 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 707 cache_lock(&mp->mnt_ncmountpt); 708 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 709 cache_unlock(&mp->mnt_ncmountpt); 710 711 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 712 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 713 allproc_scan(&unmount_allproc_cb, mp); 714 } 715 716 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 717 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 718 719 if ((flags & MNT_FORCE) == 0) { 720 error = EBUSY; 721 mount_warning(mp, "Cannot unmount: " 722 "%d namecache " 723 "references still " 724 "present", 725 ncp->nc_refs - 1); 726 } else { 727 mount_warning(mp, "Forced unmount: " 728 "%d namecache " 729 "references still " 730 "present", 731 ncp->nc_refs - 1); 732 freeok = 0; 733 } 734 } 735 } 736 737 /* 738 * Decomission our special mnt_syncer vnode. This also stops 739 * the vnlru code. If we are unable to unmount we recommission 740 * the vnode. 741 * 742 * Then sync the filesystem. 743 */ 744 if ((vp = mp->mnt_syncer) != NULL) { 745 mp->mnt_syncer = NULL; 746 vrele(vp); 747 } 748 if ((mp->mnt_flag & MNT_RDONLY) == 0) 749 VFS_SYNC(mp, MNT_WAIT); 750 751 /* 752 * nchandle records ref the mount structure. Expect a count of 1 753 * (our mount->mnt_ncmountpt). 754 * 755 * Scans can get temporary refs on a mountpoint (thought really 756 * heavy duty stuff like cache_findmount() do not). 757 */ 758 for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) { 759 cache_unmounting(mp); 760 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 761 } 762 if (mp->mnt_refs != 1) { 763 if ((flags & MNT_FORCE) == 0) { 764 mount_warning(mp, "Cannot unmount: " 765 "%d mount refs still present", 766 mp->mnt_refs); 767 error = EBUSY; 768 } else { 769 mount_warning(mp, "Forced unmount: " 770 "%d mount refs still present", 771 mp->mnt_refs); 772 freeok = 0; 773 } 774 } 775 776 /* 777 * So far so good, sync the filesystem once more and 778 * call the VFS unmount code if the sync succeeds. 779 */ 780 if (error == 0) { 781 if (((mp->mnt_flag & MNT_RDONLY) || 782 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 783 (flags & MNT_FORCE)) { 784 error = VFS_UNMOUNT(mp, flags); 785 } 786 } 787 788 /* 789 * If an error occurred we can still recover, restoring the 790 * syncer vnode and misc flags. 791 */ 792 if (error) { 793 if (mp->mnt_syncer == NULL) 794 vfs_allocate_syncvnode(mp); 795 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 796 mp->mnt_flag |= async_flag; 797 lockmgr(&mp->mnt_lock, LK_RELEASE); 798 if (mp->mnt_kern_flag & MNTK_MWAIT) { 799 mp->mnt_kern_flag &= ~MNTK_MWAIT; 800 wakeup(mp); 801 } 802 goto out; 803 } 804 /* 805 * Clean up any journals still associated with the mount after 806 * filesystem activity has ceased. 807 */ 808 journal_remove_all_journals(mp, 809 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 810 811 mountlist_remove(mp); 812 813 /* 814 * Remove any installed vnode ops here so the individual VFSs don't 815 * have to. 816 */ 817 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 818 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 819 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 820 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 821 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 822 823 if (mp->mnt_ncmountpt.ncp != NULL) { 824 nch = mp->mnt_ncmountpt; 825 cache_zero(&mp->mnt_ncmountpt); 826 cache_clrmountpt(&nch); 827 cache_drop(&nch); 828 } 829 if (mp->mnt_ncmounton.ncp != NULL) { 830 cache_unmounting(mp); 831 nch = mp->mnt_ncmounton; 832 cache_zero(&mp->mnt_ncmounton); 833 cache_clrmountpt(&nch); 834 cache_drop(&nch); 835 } 836 837 mp->mnt_vfc->vfc_refcount--; 838 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 839 panic("unmount: dangling vnode"); 840 lockmgr(&mp->mnt_lock, LK_RELEASE); 841 if (mp->mnt_kern_flag & MNTK_MWAIT) { 842 mp->mnt_kern_flag &= ~MNTK_MWAIT; 843 wakeup(mp); 844 } 845 846 /* 847 * If we reach here and freeok != 0 we must free the mount. 848 * If refs > 1 cycle and wait, just in case someone tried 849 * to busy the mount after we decided to do the unmount. 850 */ 851 if (freeok) { 852 while (mp->mnt_refs > 1) { 853 cache_unmounting(mp); 854 wakeup(mp); 855 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 856 } 857 lwkt_reltoken(&mp->mnt_token); 858 kfree(mp, M_MOUNT); 859 mp = NULL; 860 } 861 error = 0; 862 out: 863 if (mp) 864 lwkt_reltoken(&mp->mnt_token); 865 return (error); 866 } 867 868 static 869 void 870 mount_warning(struct mount *mp, const char *ctl, ...) 871 { 872 char *ptr; 873 char *buf; 874 __va_list va; 875 876 __va_start(va, ctl); 877 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 878 &ptr, &buf, 0) == 0) { 879 kprintf("unmount(%s): ", ptr); 880 kvprintf(ctl, va); 881 kprintf("\n"); 882 kfree(buf, M_TEMP); 883 } else { 884 kprintf("unmount(%p", mp); 885 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 886 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 887 kprintf("): "); 888 kvprintf(ctl, va); 889 kprintf("\n"); 890 } 891 __va_end(va); 892 } 893 894 /* 895 * Shim cache_fullpath() to handle the case where a process is chrooted into 896 * a subdirectory of a mount. In this case if the root mount matches the 897 * process root directory's mount we have to specify the process's root 898 * directory instead of the mount point, because the mount point might 899 * be above the root directory. 900 */ 901 static 902 int 903 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 904 { 905 struct nchandle *nch; 906 907 if (p && p->p_fd->fd_nrdir.mount == mp) 908 nch = &p->p_fd->fd_nrdir; 909 else 910 nch = &mp->mnt_ncmountpt; 911 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 912 } 913 914 /* 915 * Sync each mounted filesystem. 916 */ 917 918 #ifdef DEBUG 919 static int syncprt = 0; 920 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 921 #endif /* DEBUG */ 922 923 static int sync_callback(struct mount *mp, void *data); 924 925 int 926 sys_sync(struct sync_args *uap) 927 { 928 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 929 #ifdef DEBUG 930 /* 931 * print out buffer pool stat information on each sync() call. 932 */ 933 if (syncprt) 934 vfs_bufstats(); 935 #endif /* DEBUG */ 936 return (0); 937 } 938 939 static 940 int 941 sync_callback(struct mount *mp, void *data __unused) 942 { 943 int asyncflag; 944 945 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 946 asyncflag = mp->mnt_flag & MNT_ASYNC; 947 mp->mnt_flag &= ~MNT_ASYNC; 948 vfs_msync(mp, MNT_NOWAIT); 949 VFS_SYNC(mp, MNT_NOWAIT); 950 mp->mnt_flag |= asyncflag; 951 } 952 return(0); 953 } 954 955 /* XXX PRISON: could be per prison flag */ 956 static int prison_quotas; 957 #if 0 958 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 959 #endif 960 961 /* 962 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 963 * 964 * Change filesystem quotas. 965 * 966 * MPALMOSTSAFE 967 */ 968 int 969 sys_quotactl(struct quotactl_args *uap) 970 { 971 struct nlookupdata nd; 972 struct thread *td; 973 struct mount *mp; 974 int error; 975 976 get_mplock(); 977 td = curthread; 978 if (td->td_ucred->cr_prison && !prison_quotas) { 979 error = EPERM; 980 goto done; 981 } 982 983 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 984 if (error == 0) 985 error = nlookup(&nd); 986 if (error == 0) { 987 mp = nd.nl_nch.mount; 988 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 989 uap->arg, nd.nl_cred); 990 } 991 nlookup_done(&nd); 992 done: 993 rel_mplock(); 994 return (error); 995 } 996 997 /* 998 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 999 * void *buf, int buflen) 1000 * 1001 * This function operates on a mount point and executes the specified 1002 * operation using the specified control data, and possibly returns data. 1003 * 1004 * The actual number of bytes stored in the result buffer is returned, 0 1005 * if none, otherwise an error is returned. 1006 * 1007 * MPALMOSTSAFE 1008 */ 1009 int 1010 sys_mountctl(struct mountctl_args *uap) 1011 { 1012 struct thread *td = curthread; 1013 struct proc *p = td->td_proc; 1014 struct file *fp; 1015 void *ctl = NULL; 1016 void *buf = NULL; 1017 char *path = NULL; 1018 int error; 1019 1020 /* 1021 * Sanity and permissions checks. We must be root. 1022 */ 1023 KKASSERT(p); 1024 if (td->td_ucred->cr_prison != NULL) 1025 return (EPERM); 1026 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1027 (error = priv_check(td, PRIV_ROOT)) != 0) 1028 return (error); 1029 1030 /* 1031 * Argument length checks 1032 */ 1033 if (uap->ctllen < 0 || uap->ctllen > 1024) 1034 return (EINVAL); 1035 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1036 return (EINVAL); 1037 if (uap->path == NULL) 1038 return (EINVAL); 1039 1040 /* 1041 * Allocate the necessary buffers and copyin data 1042 */ 1043 path = objcache_get(namei_oc, M_WAITOK); 1044 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1045 if (error) 1046 goto done; 1047 1048 if (uap->ctllen) { 1049 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1050 error = copyin(uap->ctl, ctl, uap->ctllen); 1051 if (error) 1052 goto done; 1053 } 1054 if (uap->buflen) 1055 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1056 1057 /* 1058 * Validate the descriptor 1059 */ 1060 if (uap->fd >= 0) { 1061 fp = holdfp(p->p_fd, uap->fd, -1); 1062 if (fp == NULL) { 1063 error = EBADF; 1064 goto done; 1065 } 1066 } else { 1067 fp = NULL; 1068 } 1069 1070 /* 1071 * Execute the internal kernel function and clean up. 1072 */ 1073 get_mplock(); 1074 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1075 rel_mplock(); 1076 if (fp) 1077 fdrop(fp); 1078 if (error == 0 && uap->sysmsg_result > 0) 1079 error = copyout(buf, uap->buf, uap->sysmsg_result); 1080 done: 1081 if (path) 1082 objcache_put(namei_oc, path); 1083 if (ctl) 1084 kfree(ctl, M_TEMP); 1085 if (buf) 1086 kfree(buf, M_TEMP); 1087 return (error); 1088 } 1089 1090 /* 1091 * Execute a mount control operation by resolving the path to a mount point 1092 * and calling vop_mountctl(). 1093 * 1094 * Use the mount point from the nch instead of the vnode so nullfs mounts 1095 * can properly spike the VOP. 1096 */ 1097 int 1098 kern_mountctl(const char *path, int op, struct file *fp, 1099 const void *ctl, int ctllen, 1100 void *buf, int buflen, int *res) 1101 { 1102 struct vnode *vp; 1103 struct mount *mp; 1104 struct nlookupdata nd; 1105 int error; 1106 1107 *res = 0; 1108 vp = NULL; 1109 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1110 if (error == 0) 1111 error = nlookup(&nd); 1112 if (error == 0) 1113 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1114 mp = nd.nl_nch.mount; 1115 nlookup_done(&nd); 1116 if (error) 1117 return (error); 1118 vn_unlock(vp); 1119 1120 /* 1121 * Must be the root of the filesystem 1122 */ 1123 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1124 vrele(vp); 1125 return (EINVAL); 1126 } 1127 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1128 buf, buflen, res); 1129 vrele(vp); 1130 return (error); 1131 } 1132 1133 int 1134 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1135 { 1136 struct thread *td = curthread; 1137 struct proc *p = td->td_proc; 1138 struct mount *mp; 1139 struct statfs *sp; 1140 char *fullpath, *freepath; 1141 int error; 1142 1143 if ((error = nlookup(nd)) != 0) 1144 return (error); 1145 mp = nd->nl_nch.mount; 1146 sp = &mp->mnt_stat; 1147 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1148 return (error); 1149 1150 error = mount_path(p, mp, &fullpath, &freepath); 1151 if (error) 1152 return(error); 1153 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1154 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1155 kfree(freepath, M_TEMP); 1156 1157 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1158 bcopy(sp, buf, sizeof(*buf)); 1159 /* Only root should have access to the fsid's. */ 1160 if (priv_check(td, PRIV_ROOT)) 1161 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1162 return (0); 1163 } 1164 1165 /* 1166 * statfs_args(char *path, struct statfs *buf) 1167 * 1168 * Get filesystem statistics. 1169 */ 1170 int 1171 sys_statfs(struct statfs_args *uap) 1172 { 1173 struct nlookupdata nd; 1174 struct statfs buf; 1175 int error; 1176 1177 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1178 if (error == 0) 1179 error = kern_statfs(&nd, &buf); 1180 nlookup_done(&nd); 1181 if (error == 0) 1182 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1183 return (error); 1184 } 1185 1186 int 1187 kern_fstatfs(int fd, struct statfs *buf) 1188 { 1189 struct thread *td = curthread; 1190 struct proc *p = td->td_proc; 1191 struct file *fp; 1192 struct mount *mp; 1193 struct statfs *sp; 1194 char *fullpath, *freepath; 1195 int error; 1196 1197 KKASSERT(p); 1198 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1199 return (error); 1200 1201 /* 1202 * Try to use mount info from any overlays rather than the 1203 * mount info for the underlying vnode, otherwise we will 1204 * fail when operating on null-mounted paths inside a chroot. 1205 */ 1206 if ((mp = fp->f_nchandle.mount) == NULL) 1207 mp = ((struct vnode *)fp->f_data)->v_mount; 1208 if (mp == NULL) { 1209 error = EBADF; 1210 goto done; 1211 } 1212 if (fp->f_cred == NULL) { 1213 error = EINVAL; 1214 goto done; 1215 } 1216 sp = &mp->mnt_stat; 1217 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1218 goto done; 1219 1220 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1221 goto done; 1222 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1223 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1224 kfree(freepath, M_TEMP); 1225 1226 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1227 bcopy(sp, buf, sizeof(*buf)); 1228 1229 /* Only root should have access to the fsid's. */ 1230 if (priv_check(td, PRIV_ROOT)) 1231 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1232 error = 0; 1233 done: 1234 fdrop(fp); 1235 return (error); 1236 } 1237 1238 /* 1239 * fstatfs_args(int fd, struct statfs *buf) 1240 * 1241 * Get filesystem statistics. 1242 */ 1243 int 1244 sys_fstatfs(struct fstatfs_args *uap) 1245 { 1246 struct statfs buf; 1247 int error; 1248 1249 error = kern_fstatfs(uap->fd, &buf); 1250 1251 if (error == 0) 1252 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1253 return (error); 1254 } 1255 1256 int 1257 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1258 { 1259 struct mount *mp; 1260 struct statvfs *sp; 1261 int error; 1262 1263 if ((error = nlookup(nd)) != 0) 1264 return (error); 1265 mp = nd->nl_nch.mount; 1266 sp = &mp->mnt_vstat; 1267 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1268 return (error); 1269 1270 sp->f_flag = 0; 1271 if (mp->mnt_flag & MNT_RDONLY) 1272 sp->f_flag |= ST_RDONLY; 1273 if (mp->mnt_flag & MNT_NOSUID) 1274 sp->f_flag |= ST_NOSUID; 1275 bcopy(sp, buf, sizeof(*buf)); 1276 return (0); 1277 } 1278 1279 /* 1280 * statfs_args(char *path, struct statfs *buf) 1281 * 1282 * Get filesystem statistics. 1283 */ 1284 int 1285 sys_statvfs(struct statvfs_args *uap) 1286 { 1287 struct nlookupdata nd; 1288 struct statvfs buf; 1289 int error; 1290 1291 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1292 if (error == 0) 1293 error = kern_statvfs(&nd, &buf); 1294 nlookup_done(&nd); 1295 if (error == 0) 1296 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1297 return (error); 1298 } 1299 1300 int 1301 kern_fstatvfs(int fd, struct statvfs *buf) 1302 { 1303 struct thread *td = curthread; 1304 struct proc *p = td->td_proc; 1305 struct file *fp; 1306 struct mount *mp; 1307 struct statvfs *sp; 1308 int error; 1309 1310 KKASSERT(p); 1311 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1312 return (error); 1313 if ((mp = fp->f_nchandle.mount) == NULL) 1314 mp = ((struct vnode *)fp->f_data)->v_mount; 1315 if (mp == NULL) { 1316 error = EBADF; 1317 goto done; 1318 } 1319 if (fp->f_cred == NULL) { 1320 error = EINVAL; 1321 goto done; 1322 } 1323 sp = &mp->mnt_vstat; 1324 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1325 goto done; 1326 1327 sp->f_flag = 0; 1328 if (mp->mnt_flag & MNT_RDONLY) 1329 sp->f_flag |= ST_RDONLY; 1330 if (mp->mnt_flag & MNT_NOSUID) 1331 sp->f_flag |= ST_NOSUID; 1332 1333 bcopy(sp, buf, sizeof(*buf)); 1334 error = 0; 1335 done: 1336 fdrop(fp); 1337 return (error); 1338 } 1339 1340 /* 1341 * fstatfs_args(int fd, struct statfs *buf) 1342 * 1343 * Get filesystem statistics. 1344 */ 1345 int 1346 sys_fstatvfs(struct fstatvfs_args *uap) 1347 { 1348 struct statvfs buf; 1349 int error; 1350 1351 error = kern_fstatvfs(uap->fd, &buf); 1352 1353 if (error == 0) 1354 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1355 return (error); 1356 } 1357 1358 /* 1359 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1360 * 1361 * Get statistics on all filesystems. 1362 */ 1363 1364 struct getfsstat_info { 1365 struct statfs *sfsp; 1366 long count; 1367 long maxcount; 1368 int error; 1369 int flags; 1370 struct thread *td; 1371 }; 1372 1373 static int getfsstat_callback(struct mount *, void *); 1374 1375 int 1376 sys_getfsstat(struct getfsstat_args *uap) 1377 { 1378 struct thread *td = curthread; 1379 struct getfsstat_info info; 1380 1381 bzero(&info, sizeof(info)); 1382 1383 info.maxcount = uap->bufsize / sizeof(struct statfs); 1384 info.sfsp = uap->buf; 1385 info.count = 0; 1386 info.flags = uap->flags; 1387 info.td = td; 1388 1389 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1390 if (info.sfsp && info.count > info.maxcount) 1391 uap->sysmsg_result = info.maxcount; 1392 else 1393 uap->sysmsg_result = info.count; 1394 return (info.error); 1395 } 1396 1397 static int 1398 getfsstat_callback(struct mount *mp, void *data) 1399 { 1400 struct getfsstat_info *info = data; 1401 struct statfs *sp; 1402 char *freepath; 1403 char *fullpath; 1404 int error; 1405 1406 if (info->sfsp && info->count < info->maxcount) { 1407 if (info->td->td_proc && 1408 !chroot_visible_mnt(mp, info->td->td_proc)) { 1409 return(0); 1410 } 1411 sp = &mp->mnt_stat; 1412 1413 /* 1414 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1415 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1416 * overrides MNT_WAIT. 1417 */ 1418 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1419 (info->flags & MNT_WAIT)) && 1420 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1421 return(0); 1422 } 1423 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1424 1425 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1426 if (error) { 1427 info->error = error; 1428 return(-1); 1429 } 1430 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1431 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1432 kfree(freepath, M_TEMP); 1433 1434 error = copyout(sp, info->sfsp, sizeof(*sp)); 1435 if (error) { 1436 info->error = error; 1437 return (-1); 1438 } 1439 ++info->sfsp; 1440 } 1441 info->count++; 1442 return(0); 1443 } 1444 1445 /* 1446 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1447 long bufsize, int flags) 1448 * 1449 * Get statistics on all filesystems. 1450 */ 1451 1452 struct getvfsstat_info { 1453 struct statfs *sfsp; 1454 struct statvfs *vsfsp; 1455 long count; 1456 long maxcount; 1457 int error; 1458 int flags; 1459 struct thread *td; 1460 }; 1461 1462 static int getvfsstat_callback(struct mount *, void *); 1463 1464 int 1465 sys_getvfsstat(struct getvfsstat_args *uap) 1466 { 1467 struct thread *td = curthread; 1468 struct getvfsstat_info info; 1469 1470 bzero(&info, sizeof(info)); 1471 1472 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1473 info.sfsp = uap->buf; 1474 info.vsfsp = uap->vbuf; 1475 info.count = 0; 1476 info.flags = uap->flags; 1477 info.td = td; 1478 1479 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1480 if (info.vsfsp && info.count > info.maxcount) 1481 uap->sysmsg_result = info.maxcount; 1482 else 1483 uap->sysmsg_result = info.count; 1484 return (info.error); 1485 } 1486 1487 static int 1488 getvfsstat_callback(struct mount *mp, void *data) 1489 { 1490 struct getvfsstat_info *info = data; 1491 struct statfs *sp; 1492 struct statvfs *vsp; 1493 char *freepath; 1494 char *fullpath; 1495 int error; 1496 1497 if (info->vsfsp && info->count < info->maxcount) { 1498 if (info->td->td_proc && 1499 !chroot_visible_mnt(mp, info->td->td_proc)) { 1500 return(0); 1501 } 1502 sp = &mp->mnt_stat; 1503 vsp = &mp->mnt_vstat; 1504 1505 /* 1506 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1507 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1508 * overrides MNT_WAIT. 1509 */ 1510 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1511 (info->flags & MNT_WAIT)) && 1512 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1513 return(0); 1514 } 1515 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1516 1517 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1518 (info->flags & MNT_WAIT)) && 1519 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1520 return(0); 1521 } 1522 vsp->f_flag = 0; 1523 if (mp->mnt_flag & MNT_RDONLY) 1524 vsp->f_flag |= ST_RDONLY; 1525 if (mp->mnt_flag & MNT_NOSUID) 1526 vsp->f_flag |= ST_NOSUID; 1527 1528 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1529 if (error) { 1530 info->error = error; 1531 return(-1); 1532 } 1533 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1534 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1535 kfree(freepath, M_TEMP); 1536 1537 error = copyout(sp, info->sfsp, sizeof(*sp)); 1538 if (error == 0) 1539 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1540 if (error) { 1541 info->error = error; 1542 return (-1); 1543 } 1544 ++info->sfsp; 1545 ++info->vsfsp; 1546 } 1547 info->count++; 1548 return(0); 1549 } 1550 1551 1552 /* 1553 * fchdir_args(int fd) 1554 * 1555 * Change current working directory to a given file descriptor. 1556 */ 1557 int 1558 sys_fchdir(struct fchdir_args *uap) 1559 { 1560 struct thread *td = curthread; 1561 struct proc *p = td->td_proc; 1562 struct filedesc *fdp = p->p_fd; 1563 struct vnode *vp, *ovp; 1564 struct mount *mp; 1565 struct file *fp; 1566 struct nchandle nch, onch, tnch; 1567 int error; 1568 1569 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1570 return (error); 1571 lwkt_gettoken(&p->p_token); 1572 vp = (struct vnode *)fp->f_data; 1573 vref(vp); 1574 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1575 if (fp->f_nchandle.ncp == NULL) 1576 error = ENOTDIR; 1577 else 1578 error = checkvp_chdir(vp, td); 1579 if (error) { 1580 vput(vp); 1581 goto done; 1582 } 1583 cache_copy(&fp->f_nchandle, &nch); 1584 1585 /* 1586 * If the ncp has become a mount point, traverse through 1587 * the mount point. 1588 */ 1589 1590 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1591 (mp = cache_findmount(&nch)) != NULL 1592 ) { 1593 error = nlookup_mp(mp, &tnch); 1594 if (error == 0) { 1595 cache_unlock(&tnch); /* leave ref intact */ 1596 vput(vp); 1597 vp = tnch.ncp->nc_vp; 1598 error = vget(vp, LK_SHARED); 1599 KKASSERT(error == 0); 1600 cache_drop(&nch); 1601 nch = tnch; 1602 } 1603 cache_dropmount(mp); 1604 } 1605 if (error == 0) { 1606 ovp = fdp->fd_cdir; 1607 onch = fdp->fd_ncdir; 1608 vn_unlock(vp); /* leave ref intact */ 1609 fdp->fd_cdir = vp; 1610 fdp->fd_ncdir = nch; 1611 cache_drop(&onch); 1612 vrele(ovp); 1613 } else { 1614 cache_drop(&nch); 1615 vput(vp); 1616 } 1617 fdrop(fp); 1618 done: 1619 lwkt_reltoken(&p->p_token); 1620 return (error); 1621 } 1622 1623 int 1624 kern_chdir(struct nlookupdata *nd) 1625 { 1626 struct thread *td = curthread; 1627 struct proc *p = td->td_proc; 1628 struct filedesc *fdp = p->p_fd; 1629 struct vnode *vp, *ovp; 1630 struct nchandle onch; 1631 int error; 1632 1633 if ((error = nlookup(nd)) != 0) 1634 return (error); 1635 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1636 return (ENOENT); 1637 if ((error = vget(vp, LK_SHARED)) != 0) 1638 return (error); 1639 1640 lwkt_gettoken(&p->p_token); 1641 error = checkvp_chdir(vp, td); 1642 vn_unlock(vp); 1643 if (error == 0) { 1644 ovp = fdp->fd_cdir; 1645 onch = fdp->fd_ncdir; 1646 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1647 fdp->fd_ncdir = nd->nl_nch; 1648 fdp->fd_cdir = vp; 1649 cache_drop(&onch); 1650 vrele(ovp); 1651 cache_zero(&nd->nl_nch); 1652 } else { 1653 vrele(vp); 1654 } 1655 lwkt_reltoken(&p->p_token); 1656 return (error); 1657 } 1658 1659 /* 1660 * chdir_args(char *path) 1661 * 1662 * Change current working directory (``.''). 1663 */ 1664 int 1665 sys_chdir(struct chdir_args *uap) 1666 { 1667 struct nlookupdata nd; 1668 int error; 1669 1670 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1671 if (error == 0) 1672 error = kern_chdir(&nd); 1673 nlookup_done(&nd); 1674 return (error); 1675 } 1676 1677 /* 1678 * Helper function for raised chroot(2) security function: Refuse if 1679 * any filedescriptors are open directories. 1680 */ 1681 static int 1682 chroot_refuse_vdir_fds(struct filedesc *fdp) 1683 { 1684 struct vnode *vp; 1685 struct file *fp; 1686 int error; 1687 int fd; 1688 1689 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1690 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1691 continue; 1692 vp = (struct vnode *)fp->f_data; 1693 if (vp->v_type != VDIR) { 1694 fdrop(fp); 1695 continue; 1696 } 1697 fdrop(fp); 1698 return(EPERM); 1699 } 1700 return (0); 1701 } 1702 1703 /* 1704 * This sysctl determines if we will allow a process to chroot(2) if it 1705 * has a directory open: 1706 * 0: disallowed for all processes. 1707 * 1: allowed for processes that were not already chroot(2)'ed. 1708 * 2: allowed for all processes. 1709 */ 1710 1711 static int chroot_allow_open_directories = 1; 1712 1713 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1714 &chroot_allow_open_directories, 0, ""); 1715 1716 /* 1717 * chroot to the specified namecache entry. We obtain the vp from the 1718 * namecache data. The passed ncp must be locked and referenced and will 1719 * remain locked and referenced on return. 1720 */ 1721 int 1722 kern_chroot(struct nchandle *nch) 1723 { 1724 struct thread *td = curthread; 1725 struct proc *p = td->td_proc; 1726 struct filedesc *fdp = p->p_fd; 1727 struct vnode *vp; 1728 int error; 1729 1730 /* 1731 * Only privileged user can chroot 1732 */ 1733 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1734 if (error) 1735 return (error); 1736 1737 /* 1738 * Disallow open directory descriptors (fchdir() breakouts). 1739 */ 1740 if (chroot_allow_open_directories == 0 || 1741 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1742 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1743 return (error); 1744 } 1745 if ((vp = nch->ncp->nc_vp) == NULL) 1746 return (ENOENT); 1747 1748 if ((error = vget(vp, LK_SHARED)) != 0) 1749 return (error); 1750 1751 /* 1752 * Check the validity of vp as a directory to change to and 1753 * associate it with rdir/jdir. 1754 */ 1755 error = checkvp_chdir(vp, td); 1756 vn_unlock(vp); /* leave reference intact */ 1757 if (error == 0) { 1758 vrele(fdp->fd_rdir); 1759 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1760 cache_drop(&fdp->fd_nrdir); 1761 cache_copy(nch, &fdp->fd_nrdir); 1762 if (fdp->fd_jdir == NULL) { 1763 fdp->fd_jdir = vp; 1764 vref(fdp->fd_jdir); 1765 cache_copy(nch, &fdp->fd_njdir); 1766 } 1767 } else { 1768 vrele(vp); 1769 } 1770 return (error); 1771 } 1772 1773 /* 1774 * chroot_args(char *path) 1775 * 1776 * Change notion of root (``/'') directory. 1777 */ 1778 int 1779 sys_chroot(struct chroot_args *uap) 1780 { 1781 struct thread *td __debugvar = curthread; 1782 struct nlookupdata nd; 1783 int error; 1784 1785 KKASSERT(td->td_proc); 1786 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1787 if (error == 0) { 1788 nd.nl_flags |= NLC_EXEC; 1789 error = nlookup(&nd); 1790 if (error == 0) 1791 error = kern_chroot(&nd.nl_nch); 1792 } 1793 nlookup_done(&nd); 1794 return(error); 1795 } 1796 1797 int 1798 sys_chroot_kernel(struct chroot_kernel_args *uap) 1799 { 1800 struct thread *td = curthread; 1801 struct nlookupdata nd; 1802 struct nchandle *nch; 1803 struct vnode *vp; 1804 int error; 1805 1806 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1807 if (error) 1808 goto error_nond; 1809 1810 error = nlookup(&nd); 1811 if (error) 1812 goto error_out; 1813 1814 nch = &nd.nl_nch; 1815 1816 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1817 if (error) 1818 goto error_out; 1819 1820 if ((vp = nch->ncp->nc_vp) == NULL) { 1821 error = ENOENT; 1822 goto error_out; 1823 } 1824 1825 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1826 goto error_out; 1827 1828 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1829 get_mplock(); 1830 vfs_cache_setroot(vp, cache_hold(nch)); 1831 rel_mplock(); 1832 1833 error_out: 1834 nlookup_done(&nd); 1835 error_nond: 1836 return(error); 1837 } 1838 1839 /* 1840 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1841 * determine whether it is legal to chdir to the vnode. The vnode's state 1842 * is not changed by this call. 1843 */ 1844 int 1845 checkvp_chdir(struct vnode *vp, struct thread *td) 1846 { 1847 int error; 1848 1849 if (vp->v_type != VDIR) 1850 error = ENOTDIR; 1851 else 1852 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1853 return (error); 1854 } 1855 1856 int 1857 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1858 { 1859 struct thread *td = curthread; 1860 struct proc *p = td->td_proc; 1861 struct lwp *lp = td->td_lwp; 1862 struct filedesc *fdp = p->p_fd; 1863 int cmode, flags; 1864 struct file *nfp; 1865 struct file *fp; 1866 struct vnode *vp; 1867 int type, indx, error = 0; 1868 struct flock lf; 1869 1870 if ((oflags & O_ACCMODE) == O_ACCMODE) 1871 return (EINVAL); 1872 flags = FFLAGS(oflags); 1873 error = falloc(lp, &nfp, NULL); 1874 if (error) 1875 return (error); 1876 fp = nfp; 1877 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1878 1879 /* 1880 * XXX p_dupfd is a real mess. It allows a device to return a 1881 * file descriptor to be duplicated rather then doing the open 1882 * itself. 1883 */ 1884 lp->lwp_dupfd = -1; 1885 1886 /* 1887 * Call vn_open() to do the lookup and assign the vnode to the 1888 * file pointer. vn_open() does not change the ref count on fp 1889 * and the vnode, on success, will be inherited by the file pointer 1890 * and unlocked. 1891 */ 1892 nd->nl_flags |= NLC_LOCKVP; 1893 error = vn_open(nd, fp, flags, cmode); 1894 nlookup_done(nd); 1895 if (error) { 1896 /* 1897 * handle special fdopen() case. bleh. dupfdopen() is 1898 * responsible for dropping the old contents of ofiles[indx] 1899 * if it succeeds. 1900 * 1901 * Note that fsetfd() will add a ref to fp which represents 1902 * the fd_files[] assignment. We must still drop our 1903 * reference. 1904 */ 1905 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1906 if (fdalloc(p, 0, &indx) == 0) { 1907 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1908 if (error == 0) { 1909 *res = indx; 1910 fdrop(fp); /* our ref */ 1911 return (0); 1912 } 1913 fsetfd(fdp, NULL, indx); 1914 } 1915 } 1916 fdrop(fp); /* our ref */ 1917 if (error == ERESTART) 1918 error = EINTR; 1919 return (error); 1920 } 1921 1922 /* 1923 * ref the vnode for ourselves so it can't be ripped out from under 1924 * is. XXX need an ND flag to request that the vnode be returned 1925 * anyway. 1926 * 1927 * Reserve a file descriptor but do not assign it until the open 1928 * succeeds. 1929 */ 1930 vp = (struct vnode *)fp->f_data; 1931 vref(vp); 1932 if ((error = fdalloc(p, 0, &indx)) != 0) { 1933 fdrop(fp); 1934 vrele(vp); 1935 return (error); 1936 } 1937 1938 /* 1939 * If no error occurs the vp will have been assigned to the file 1940 * pointer. 1941 */ 1942 lp->lwp_dupfd = 0; 1943 1944 if (flags & (O_EXLOCK | O_SHLOCK)) { 1945 lf.l_whence = SEEK_SET; 1946 lf.l_start = 0; 1947 lf.l_len = 0; 1948 if (flags & O_EXLOCK) 1949 lf.l_type = F_WRLCK; 1950 else 1951 lf.l_type = F_RDLCK; 1952 if (flags & FNONBLOCK) 1953 type = 0; 1954 else 1955 type = F_WAIT; 1956 1957 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1958 /* 1959 * lock request failed. Clean up the reserved 1960 * descriptor. 1961 */ 1962 vrele(vp); 1963 fsetfd(fdp, NULL, indx); 1964 fdrop(fp); 1965 return (error); 1966 } 1967 fp->f_flag |= FHASLOCK; 1968 } 1969 #if 0 1970 /* 1971 * Assert that all regular file vnodes were created with a object. 1972 */ 1973 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1974 ("open: regular file has no backing object after vn_open")); 1975 #endif 1976 1977 vrele(vp); 1978 1979 /* 1980 * release our private reference, leaving the one associated with the 1981 * descriptor table intact. 1982 */ 1983 fsetfd(fdp, fp, indx); 1984 fdrop(fp); 1985 *res = indx; 1986 if (oflags & O_CLOEXEC) 1987 error = fsetfdflags(fdp, *res, UF_EXCLOSE); 1988 return (error); 1989 } 1990 1991 /* 1992 * open_args(char *path, int flags, int mode) 1993 * 1994 * Check permissions, allocate an open file structure, 1995 * and call the device open routine if any. 1996 */ 1997 int 1998 sys_open(struct open_args *uap) 1999 { 2000 struct nlookupdata nd; 2001 int error; 2002 2003 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2004 if (error == 0) { 2005 error = kern_open(&nd, uap->flags, 2006 uap->mode, &uap->sysmsg_result); 2007 } 2008 nlookup_done(&nd); 2009 return (error); 2010 } 2011 2012 /* 2013 * openat_args(int fd, char *path, int flags, int mode) 2014 */ 2015 int 2016 sys_openat(struct openat_args *uap) 2017 { 2018 struct nlookupdata nd; 2019 int error; 2020 struct file *fp; 2021 2022 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2023 if (error == 0) { 2024 error = kern_open(&nd, uap->flags, uap->mode, 2025 &uap->sysmsg_result); 2026 } 2027 nlookup_done_at(&nd, fp); 2028 return (error); 2029 } 2030 2031 int 2032 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2033 { 2034 struct thread *td = curthread; 2035 struct proc *p = td->td_proc; 2036 struct vnode *vp; 2037 struct vattr vattr; 2038 int error; 2039 int whiteout = 0; 2040 2041 KKASSERT(p); 2042 2043 VATTR_NULL(&vattr); 2044 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2045 vattr.va_rmajor = rmajor; 2046 vattr.va_rminor = rminor; 2047 2048 switch (mode & S_IFMT) { 2049 case S_IFMT: /* used by badsect to flag bad sectors */ 2050 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2051 vattr.va_type = VBAD; 2052 break; 2053 case S_IFCHR: 2054 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2055 vattr.va_type = VCHR; 2056 break; 2057 case S_IFBLK: 2058 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2059 vattr.va_type = VBLK; 2060 break; 2061 case S_IFWHT: 2062 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2063 whiteout = 1; 2064 break; 2065 case S_IFDIR: /* special directories support for HAMMER */ 2066 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2067 vattr.va_type = VDIR; 2068 break; 2069 default: 2070 error = EINVAL; 2071 break; 2072 } 2073 2074 if (error) 2075 return (error); 2076 2077 bwillinode(1); 2078 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2079 if ((error = nlookup(nd)) != 0) 2080 return (error); 2081 if (nd->nl_nch.ncp->nc_vp) 2082 return (EEXIST); 2083 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2084 return (error); 2085 2086 if (whiteout) { 2087 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2088 nd->nl_cred, NAMEI_CREATE); 2089 } else { 2090 vp = NULL; 2091 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2092 &vp, nd->nl_cred, &vattr); 2093 if (error == 0) 2094 vput(vp); 2095 } 2096 return (error); 2097 } 2098 2099 /* 2100 * mknod_args(char *path, int mode, int dev) 2101 * 2102 * Create a special file. 2103 */ 2104 int 2105 sys_mknod(struct mknod_args *uap) 2106 { 2107 struct nlookupdata nd; 2108 int error; 2109 2110 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2111 if (error == 0) { 2112 error = kern_mknod(&nd, uap->mode, 2113 umajor(uap->dev), uminor(uap->dev)); 2114 } 2115 nlookup_done(&nd); 2116 return (error); 2117 } 2118 2119 /* 2120 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2121 * 2122 * Create a special file. The path is relative to the directory associated 2123 * with fd. 2124 */ 2125 int 2126 sys_mknodat(struct mknodat_args *uap) 2127 { 2128 struct nlookupdata nd; 2129 struct file *fp; 2130 int error; 2131 2132 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2133 if (error == 0) { 2134 error = kern_mknod(&nd, uap->mode, 2135 umajor(uap->dev), uminor(uap->dev)); 2136 } 2137 nlookup_done_at(&nd, fp); 2138 return (error); 2139 } 2140 2141 int 2142 kern_mkfifo(struct nlookupdata *nd, int mode) 2143 { 2144 struct thread *td = curthread; 2145 struct proc *p = td->td_proc; 2146 struct vattr vattr; 2147 struct vnode *vp; 2148 int error; 2149 2150 bwillinode(1); 2151 2152 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2153 if ((error = nlookup(nd)) != 0) 2154 return (error); 2155 if (nd->nl_nch.ncp->nc_vp) 2156 return (EEXIST); 2157 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2158 return (error); 2159 2160 VATTR_NULL(&vattr); 2161 vattr.va_type = VFIFO; 2162 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2163 vp = NULL; 2164 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2165 if (error == 0) 2166 vput(vp); 2167 return (error); 2168 } 2169 2170 /* 2171 * mkfifo_args(char *path, int mode) 2172 * 2173 * Create a named pipe. 2174 */ 2175 int 2176 sys_mkfifo(struct mkfifo_args *uap) 2177 { 2178 struct nlookupdata nd; 2179 int error; 2180 2181 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2182 if (error == 0) 2183 error = kern_mkfifo(&nd, uap->mode); 2184 nlookup_done(&nd); 2185 return (error); 2186 } 2187 2188 /* 2189 * mkfifoat_args(int fd, char *path, mode_t mode) 2190 * 2191 * Create a named pipe. The path is relative to the directory associated 2192 * with fd. 2193 */ 2194 int 2195 sys_mkfifoat(struct mkfifoat_args *uap) 2196 { 2197 struct nlookupdata nd; 2198 struct file *fp; 2199 int error; 2200 2201 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2202 if (error == 0) 2203 error = kern_mkfifo(&nd, uap->mode); 2204 nlookup_done_at(&nd, fp); 2205 return (error); 2206 } 2207 2208 static int hardlink_check_uid = 0; 2209 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2210 &hardlink_check_uid, 0, 2211 "Unprivileged processes cannot create hard links to files owned by other " 2212 "users"); 2213 static int hardlink_check_gid = 0; 2214 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2215 &hardlink_check_gid, 0, 2216 "Unprivileged processes cannot create hard links to files owned by other " 2217 "groups"); 2218 2219 static int 2220 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2221 { 2222 struct vattr va; 2223 int error; 2224 2225 /* 2226 * Shortcut if disabled 2227 */ 2228 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2229 return (0); 2230 2231 /* 2232 * Privileged user can always hardlink 2233 */ 2234 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2235 return (0); 2236 2237 /* 2238 * Otherwise only if the originating file is owned by the 2239 * same user or group. Note that any group is allowed if 2240 * the file is owned by the caller. 2241 */ 2242 error = VOP_GETATTR(vp, &va); 2243 if (error != 0) 2244 return (error); 2245 2246 if (hardlink_check_uid) { 2247 if (cred->cr_uid != va.va_uid) 2248 return (EPERM); 2249 } 2250 2251 if (hardlink_check_gid) { 2252 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2253 return (EPERM); 2254 } 2255 2256 return (0); 2257 } 2258 2259 int 2260 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2261 { 2262 struct thread *td = curthread; 2263 struct vnode *vp; 2264 int error; 2265 2266 /* 2267 * Lookup the source and obtained a locked vnode. 2268 * 2269 * You may only hardlink a file which you have write permission 2270 * on or which you own. 2271 * 2272 * XXX relookup on vget failure / race ? 2273 */ 2274 bwillinode(1); 2275 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2276 if ((error = nlookup(nd)) != 0) 2277 return (error); 2278 vp = nd->nl_nch.ncp->nc_vp; 2279 KKASSERT(vp != NULL); 2280 if (vp->v_type == VDIR) 2281 return (EPERM); /* POSIX */ 2282 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2283 return (error); 2284 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2285 return (error); 2286 2287 /* 2288 * Unlock the source so we can lookup the target without deadlocking 2289 * (XXX vp is locked already, possible other deadlock?). The target 2290 * must not exist. 2291 */ 2292 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2293 nd->nl_flags &= ~NLC_NCPISLOCKED; 2294 cache_unlock(&nd->nl_nch); 2295 vn_unlock(vp); 2296 2297 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2298 if ((error = nlookup(linknd)) != 0) { 2299 vrele(vp); 2300 return (error); 2301 } 2302 if (linknd->nl_nch.ncp->nc_vp) { 2303 vrele(vp); 2304 return (EEXIST); 2305 } 2306 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 2307 vrele(vp); 2308 return (error); 2309 } 2310 2311 /* 2312 * Finally run the new API VOP. 2313 */ 2314 error = can_hardlink(vp, td, td->td_ucred); 2315 if (error == 0) { 2316 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2317 vp, linknd->nl_cred); 2318 } 2319 vput(vp); 2320 return (error); 2321 } 2322 2323 /* 2324 * link_args(char *path, char *link) 2325 * 2326 * Make a hard file link. 2327 */ 2328 int 2329 sys_link(struct link_args *uap) 2330 { 2331 struct nlookupdata nd, linknd; 2332 int error; 2333 2334 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2335 if (error == 0) { 2336 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2337 if (error == 0) 2338 error = kern_link(&nd, &linknd); 2339 nlookup_done(&linknd); 2340 } 2341 nlookup_done(&nd); 2342 return (error); 2343 } 2344 2345 /* 2346 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2347 * 2348 * Make a hard file link. The path1 argument is relative to the directory 2349 * associated with fd1, and similarly the path2 argument is relative to 2350 * the directory associated with fd2. 2351 */ 2352 int 2353 sys_linkat(struct linkat_args *uap) 2354 { 2355 struct nlookupdata nd, linknd; 2356 struct file *fp1, *fp2; 2357 int error; 2358 2359 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2360 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2361 if (error == 0) { 2362 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2363 uap->path2, UIO_USERSPACE, 0); 2364 if (error == 0) 2365 error = kern_link(&nd, &linknd); 2366 nlookup_done_at(&linknd, fp2); 2367 } 2368 nlookup_done_at(&nd, fp1); 2369 return (error); 2370 } 2371 2372 int 2373 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2374 { 2375 struct vattr vattr; 2376 struct vnode *vp; 2377 struct vnode *dvp; 2378 int error; 2379 2380 bwillinode(1); 2381 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2382 if ((error = nlookup(nd)) != 0) 2383 return (error); 2384 if (nd->nl_nch.ncp->nc_vp) 2385 return (EEXIST); 2386 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2387 return (error); 2388 dvp = nd->nl_dvp; 2389 VATTR_NULL(&vattr); 2390 vattr.va_mode = mode; 2391 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2392 if (error == 0) 2393 vput(vp); 2394 return (error); 2395 } 2396 2397 /* 2398 * symlink(char *path, char *link) 2399 * 2400 * Make a symbolic link. 2401 */ 2402 int 2403 sys_symlink(struct symlink_args *uap) 2404 { 2405 struct thread *td = curthread; 2406 struct nlookupdata nd; 2407 char *path; 2408 int error; 2409 int mode; 2410 2411 path = objcache_get(namei_oc, M_WAITOK); 2412 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2413 if (error == 0) { 2414 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2415 if (error == 0) { 2416 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2417 error = kern_symlink(&nd, path, mode); 2418 } 2419 nlookup_done(&nd); 2420 } 2421 objcache_put(namei_oc, path); 2422 return (error); 2423 } 2424 2425 /* 2426 * symlinkat_args(char *path1, int fd, char *path2) 2427 * 2428 * Make a symbolic link. The path2 argument is relative to the directory 2429 * associated with fd. 2430 */ 2431 int 2432 sys_symlinkat(struct symlinkat_args *uap) 2433 { 2434 struct thread *td = curthread; 2435 struct nlookupdata nd; 2436 struct file *fp; 2437 char *path1; 2438 int error; 2439 int mode; 2440 2441 path1 = objcache_get(namei_oc, M_WAITOK); 2442 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2443 if (error == 0) { 2444 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2445 UIO_USERSPACE, 0); 2446 if (error == 0) { 2447 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2448 error = kern_symlink(&nd, path1, mode); 2449 } 2450 nlookup_done_at(&nd, fp); 2451 } 2452 objcache_put(namei_oc, path1); 2453 return (error); 2454 } 2455 2456 /* 2457 * undelete_args(char *path) 2458 * 2459 * Delete a whiteout from the filesystem. 2460 */ 2461 int 2462 sys_undelete(struct undelete_args *uap) 2463 { 2464 struct nlookupdata nd; 2465 int error; 2466 2467 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2468 bwillinode(1); 2469 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2470 if (error == 0) 2471 error = nlookup(&nd); 2472 if (error == 0) 2473 error = ncp_writechk(&nd.nl_nch); 2474 if (error == 0) { 2475 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2476 NAMEI_DELETE); 2477 } 2478 nlookup_done(&nd); 2479 return (error); 2480 } 2481 2482 int 2483 kern_unlink(struct nlookupdata *nd) 2484 { 2485 int error; 2486 2487 bwillinode(1); 2488 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2489 if ((error = nlookup(nd)) != 0) 2490 return (error); 2491 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2492 return (error); 2493 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2494 return (error); 2495 } 2496 2497 /* 2498 * unlink_args(char *path) 2499 * 2500 * Delete a name from the filesystem. 2501 */ 2502 int 2503 sys_unlink(struct unlink_args *uap) 2504 { 2505 struct nlookupdata nd; 2506 int error; 2507 2508 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2509 if (error == 0) 2510 error = kern_unlink(&nd); 2511 nlookup_done(&nd); 2512 return (error); 2513 } 2514 2515 2516 /* 2517 * unlinkat_args(int fd, char *path, int flags) 2518 * 2519 * Delete the file or directory entry pointed to by fd/path. 2520 */ 2521 int 2522 sys_unlinkat(struct unlinkat_args *uap) 2523 { 2524 struct nlookupdata nd; 2525 struct file *fp; 2526 int error; 2527 2528 if (uap->flags & ~AT_REMOVEDIR) 2529 return (EINVAL); 2530 2531 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2532 if (error == 0) { 2533 if (uap->flags & AT_REMOVEDIR) 2534 error = kern_rmdir(&nd); 2535 else 2536 error = kern_unlink(&nd); 2537 } 2538 nlookup_done_at(&nd, fp); 2539 return (error); 2540 } 2541 2542 int 2543 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2544 { 2545 struct thread *td = curthread; 2546 struct proc *p = td->td_proc; 2547 struct file *fp; 2548 struct vnode *vp; 2549 struct vattr vattr; 2550 off_t new_offset; 2551 int error; 2552 2553 fp = holdfp(p->p_fd, fd, -1); 2554 if (fp == NULL) 2555 return (EBADF); 2556 if (fp->f_type != DTYPE_VNODE) { 2557 error = ESPIPE; 2558 goto done; 2559 } 2560 vp = (struct vnode *)fp->f_data; 2561 2562 switch (whence) { 2563 case L_INCR: 2564 spin_lock(&fp->f_spin); 2565 new_offset = fp->f_offset + offset; 2566 error = 0; 2567 break; 2568 case L_XTND: 2569 error = VOP_GETATTR(vp, &vattr); 2570 spin_lock(&fp->f_spin); 2571 new_offset = offset + vattr.va_size; 2572 break; 2573 case L_SET: 2574 new_offset = offset; 2575 error = 0; 2576 spin_lock(&fp->f_spin); 2577 break; 2578 default: 2579 new_offset = 0; 2580 error = EINVAL; 2581 spin_lock(&fp->f_spin); 2582 break; 2583 } 2584 2585 /* 2586 * Validate the seek position. Negative offsets are not allowed 2587 * for regular files or directories. 2588 * 2589 * Normally we would also not want to allow negative offsets for 2590 * character and block-special devices. However kvm addresses 2591 * on 64 bit architectures might appear to be negative and must 2592 * be allowed. 2593 */ 2594 if (error == 0) { 2595 if (new_offset < 0 && 2596 (vp->v_type == VREG || vp->v_type == VDIR)) { 2597 error = EINVAL; 2598 } else { 2599 fp->f_offset = new_offset; 2600 } 2601 } 2602 *res = fp->f_offset; 2603 spin_unlock(&fp->f_spin); 2604 done: 2605 fdrop(fp); 2606 return (error); 2607 } 2608 2609 /* 2610 * lseek_args(int fd, int pad, off_t offset, int whence) 2611 * 2612 * Reposition read/write file offset. 2613 */ 2614 int 2615 sys_lseek(struct lseek_args *uap) 2616 { 2617 int error; 2618 2619 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2620 &uap->sysmsg_offset); 2621 2622 return (error); 2623 } 2624 2625 /* 2626 * Check if current process can access given file. amode is a bitmask of *_OK 2627 * access bits. flags is a bitmask of AT_* flags. 2628 */ 2629 int 2630 kern_access(struct nlookupdata *nd, int amode, int flags) 2631 { 2632 struct vnode *vp; 2633 int error, mode; 2634 2635 if (flags & ~AT_EACCESS) 2636 return (EINVAL); 2637 if ((error = nlookup(nd)) != 0) 2638 return (error); 2639 retry: 2640 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2641 if (error) 2642 return (error); 2643 2644 /* Flags == 0 means only check for existence. */ 2645 if (amode) { 2646 mode = 0; 2647 if (amode & R_OK) 2648 mode |= VREAD; 2649 if (amode & W_OK) 2650 mode |= VWRITE; 2651 if (amode & X_OK) 2652 mode |= VEXEC; 2653 if ((mode & VWRITE) == 0 || 2654 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2655 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2656 2657 /* 2658 * If the file handle is stale we have to re-resolve the 2659 * entry. This is a hack at the moment. 2660 */ 2661 if (error == ESTALE) { 2662 vput(vp); 2663 cache_setunresolved(&nd->nl_nch); 2664 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2665 if (error == 0) { 2666 vp = NULL; 2667 goto retry; 2668 } 2669 return(error); 2670 } 2671 } 2672 vput(vp); 2673 return (error); 2674 } 2675 2676 /* 2677 * access_args(char *path, int flags) 2678 * 2679 * Check access permissions. 2680 */ 2681 int 2682 sys_access(struct access_args *uap) 2683 { 2684 struct nlookupdata nd; 2685 int error; 2686 2687 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2688 if (error == 0) 2689 error = kern_access(&nd, uap->flags, 0); 2690 nlookup_done(&nd); 2691 return (error); 2692 } 2693 2694 2695 /* 2696 * eaccess_args(char *path, int flags) 2697 * 2698 * Check access permissions. 2699 */ 2700 int 2701 sys_eaccess(struct eaccess_args *uap) 2702 { 2703 struct nlookupdata nd; 2704 int error; 2705 2706 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2707 if (error == 0) 2708 error = kern_access(&nd, uap->flags, AT_EACCESS); 2709 nlookup_done(&nd); 2710 return (error); 2711 } 2712 2713 2714 /* 2715 * faccessat_args(int fd, char *path, int amode, int flags) 2716 * 2717 * Check access permissions. 2718 */ 2719 int 2720 sys_faccessat(struct faccessat_args *uap) 2721 { 2722 struct nlookupdata nd; 2723 struct file *fp; 2724 int error; 2725 2726 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2727 NLC_FOLLOW); 2728 if (error == 0) 2729 error = kern_access(&nd, uap->amode, uap->flags); 2730 nlookup_done_at(&nd, fp); 2731 return (error); 2732 } 2733 2734 2735 int 2736 kern_stat(struct nlookupdata *nd, struct stat *st) 2737 { 2738 int error; 2739 struct vnode *vp; 2740 2741 if ((error = nlookup(nd)) != 0) 2742 return (error); 2743 again: 2744 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2745 return (ENOENT); 2746 2747 if ((error = vget(vp, LK_SHARED)) != 0) 2748 return (error); 2749 error = vn_stat(vp, st, nd->nl_cred); 2750 2751 /* 2752 * If the file handle is stale we have to re-resolve the entry. This 2753 * is a hack at the moment. 2754 */ 2755 if (error == ESTALE) { 2756 vput(vp); 2757 cache_setunresolved(&nd->nl_nch); 2758 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2759 if (error == 0) 2760 goto again; 2761 } else { 2762 vput(vp); 2763 } 2764 return (error); 2765 } 2766 2767 /* 2768 * stat_args(char *path, struct stat *ub) 2769 * 2770 * Get file status; this version follows links. 2771 */ 2772 int 2773 sys_stat(struct stat_args *uap) 2774 { 2775 struct nlookupdata nd; 2776 struct stat st; 2777 int error; 2778 2779 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2780 if (error == 0) { 2781 error = kern_stat(&nd, &st); 2782 if (error == 0) 2783 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2784 } 2785 nlookup_done(&nd); 2786 return (error); 2787 } 2788 2789 /* 2790 * lstat_args(char *path, struct stat *ub) 2791 * 2792 * Get file status; this version does not follow links. 2793 */ 2794 int 2795 sys_lstat(struct lstat_args *uap) 2796 { 2797 struct nlookupdata nd; 2798 struct stat st; 2799 int error; 2800 2801 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2802 if (error == 0) { 2803 error = kern_stat(&nd, &st); 2804 if (error == 0) 2805 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2806 } 2807 nlookup_done(&nd); 2808 return (error); 2809 } 2810 2811 /* 2812 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2813 * 2814 * Get status of file pointed to by fd/path. 2815 */ 2816 int 2817 sys_fstatat(struct fstatat_args *uap) 2818 { 2819 struct nlookupdata nd; 2820 struct stat st; 2821 int error; 2822 int flags; 2823 struct file *fp; 2824 2825 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2826 return (EINVAL); 2827 2828 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2829 2830 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2831 UIO_USERSPACE, flags); 2832 if (error == 0) { 2833 error = kern_stat(&nd, &st); 2834 if (error == 0) 2835 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2836 } 2837 nlookup_done_at(&nd, fp); 2838 return (error); 2839 } 2840 2841 static int 2842 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 2843 { 2844 struct nlookupdata nd; 2845 struct vnode *vp; 2846 int error; 2847 2848 vp = NULL; 2849 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 2850 if (error == 0) 2851 error = nlookup(&nd); 2852 if (error == 0) 2853 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2854 nlookup_done(&nd); 2855 if (error == 0) { 2856 error = VOP_PATHCONF(vp, name, sysmsg_regp); 2857 vput(vp); 2858 } 2859 return (error); 2860 } 2861 2862 /* 2863 * pathconf_Args(char *path, int name) 2864 * 2865 * Get configurable pathname variables. 2866 */ 2867 int 2868 sys_pathconf(struct pathconf_args *uap) 2869 { 2870 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 2871 &uap->sysmsg_reg)); 2872 } 2873 2874 /* 2875 * lpathconf_Args(char *path, int name) 2876 * 2877 * Get configurable pathname variables, but don't follow symlinks. 2878 */ 2879 int 2880 sys_lpathconf(struct lpathconf_args *uap) 2881 { 2882 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 2883 } 2884 2885 /* 2886 * XXX: daver 2887 * kern_readlink isn't properly split yet. There is a copyin burried 2888 * in VOP_READLINK(). 2889 */ 2890 int 2891 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2892 { 2893 struct thread *td = curthread; 2894 struct vnode *vp; 2895 struct iovec aiov; 2896 struct uio auio; 2897 int error; 2898 2899 if ((error = nlookup(nd)) != 0) 2900 return (error); 2901 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2902 if (error) 2903 return (error); 2904 if (vp->v_type != VLNK) { 2905 error = EINVAL; 2906 } else { 2907 aiov.iov_base = buf; 2908 aiov.iov_len = count; 2909 auio.uio_iov = &aiov; 2910 auio.uio_iovcnt = 1; 2911 auio.uio_offset = 0; 2912 auio.uio_rw = UIO_READ; 2913 auio.uio_segflg = UIO_USERSPACE; 2914 auio.uio_td = td; 2915 auio.uio_resid = count; 2916 error = VOP_READLINK(vp, &auio, td->td_ucred); 2917 } 2918 vput(vp); 2919 *res = count - auio.uio_resid; 2920 return (error); 2921 } 2922 2923 /* 2924 * readlink_args(char *path, char *buf, int count) 2925 * 2926 * Return target name of a symbolic link. 2927 */ 2928 int 2929 sys_readlink(struct readlink_args *uap) 2930 { 2931 struct nlookupdata nd; 2932 int error; 2933 2934 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2935 if (error == 0) { 2936 error = kern_readlink(&nd, uap->buf, uap->count, 2937 &uap->sysmsg_result); 2938 } 2939 nlookup_done(&nd); 2940 return (error); 2941 } 2942 2943 /* 2944 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 2945 * 2946 * Return target name of a symbolic link. The path is relative to the 2947 * directory associated with fd. 2948 */ 2949 int 2950 sys_readlinkat(struct readlinkat_args *uap) 2951 { 2952 struct nlookupdata nd; 2953 struct file *fp; 2954 int error; 2955 2956 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2957 if (error == 0) { 2958 error = kern_readlink(&nd, uap->buf, uap->bufsize, 2959 &uap->sysmsg_result); 2960 } 2961 nlookup_done_at(&nd, fp); 2962 return (error); 2963 } 2964 2965 static int 2966 setfflags(struct vnode *vp, int flags) 2967 { 2968 struct thread *td = curthread; 2969 int error; 2970 struct vattr vattr; 2971 2972 /* 2973 * Prevent non-root users from setting flags on devices. When 2974 * a device is reused, users can retain ownership of the device 2975 * if they are allowed to set flags and programs assume that 2976 * chown can't fail when done as root. 2977 */ 2978 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2979 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2980 return (error); 2981 2982 /* 2983 * note: vget is required for any operation that might mod the vnode 2984 * so VINACTIVE is properly cleared. 2985 */ 2986 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2987 VATTR_NULL(&vattr); 2988 vattr.va_flags = flags; 2989 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2990 vput(vp); 2991 } 2992 return (error); 2993 } 2994 2995 /* 2996 * chflags(char *path, int flags) 2997 * 2998 * Change flags of a file given a path name. 2999 */ 3000 int 3001 sys_chflags(struct chflags_args *uap) 3002 { 3003 struct nlookupdata nd; 3004 struct vnode *vp; 3005 int error; 3006 3007 vp = NULL; 3008 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3009 if (error == 0) 3010 error = nlookup(&nd); 3011 if (error == 0) 3012 error = ncp_writechk(&nd.nl_nch); 3013 if (error == 0) 3014 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3015 nlookup_done(&nd); 3016 if (error == 0) { 3017 error = setfflags(vp, uap->flags); 3018 vrele(vp); 3019 } 3020 return (error); 3021 } 3022 3023 /* 3024 * lchflags(char *path, int flags) 3025 * 3026 * Change flags of a file given a path name, but don't follow symlinks. 3027 */ 3028 int 3029 sys_lchflags(struct lchflags_args *uap) 3030 { 3031 struct nlookupdata nd; 3032 struct vnode *vp; 3033 int error; 3034 3035 vp = NULL; 3036 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3037 if (error == 0) 3038 error = nlookup(&nd); 3039 if (error == 0) 3040 error = ncp_writechk(&nd.nl_nch); 3041 if (error == 0) 3042 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3043 nlookup_done(&nd); 3044 if (error == 0) { 3045 error = setfflags(vp, uap->flags); 3046 vrele(vp); 3047 } 3048 return (error); 3049 } 3050 3051 /* 3052 * fchflags_args(int fd, int flags) 3053 * 3054 * Change flags of a file given a file descriptor. 3055 */ 3056 int 3057 sys_fchflags(struct fchflags_args *uap) 3058 { 3059 struct thread *td = curthread; 3060 struct proc *p = td->td_proc; 3061 struct file *fp; 3062 int error; 3063 3064 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3065 return (error); 3066 if (fp->f_nchandle.ncp) 3067 error = ncp_writechk(&fp->f_nchandle); 3068 if (error == 0) 3069 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3070 fdrop(fp); 3071 return (error); 3072 } 3073 3074 static int 3075 setfmode(struct vnode *vp, int mode) 3076 { 3077 struct thread *td = curthread; 3078 int error; 3079 struct vattr vattr; 3080 3081 /* 3082 * note: vget is required for any operation that might mod the vnode 3083 * so VINACTIVE is properly cleared. 3084 */ 3085 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3086 VATTR_NULL(&vattr); 3087 vattr.va_mode = mode & ALLPERMS; 3088 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3089 vput(vp); 3090 } 3091 return error; 3092 } 3093 3094 int 3095 kern_chmod(struct nlookupdata *nd, int mode) 3096 { 3097 struct vnode *vp; 3098 int error; 3099 3100 if ((error = nlookup(nd)) != 0) 3101 return (error); 3102 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3103 return (error); 3104 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3105 error = setfmode(vp, mode); 3106 vrele(vp); 3107 return (error); 3108 } 3109 3110 /* 3111 * chmod_args(char *path, int mode) 3112 * 3113 * Change mode of a file given path name. 3114 */ 3115 int 3116 sys_chmod(struct chmod_args *uap) 3117 { 3118 struct nlookupdata nd; 3119 int error; 3120 3121 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3122 if (error == 0) 3123 error = kern_chmod(&nd, uap->mode); 3124 nlookup_done(&nd); 3125 return (error); 3126 } 3127 3128 /* 3129 * lchmod_args(char *path, int mode) 3130 * 3131 * Change mode of a file given path name (don't follow links.) 3132 */ 3133 int 3134 sys_lchmod(struct lchmod_args *uap) 3135 { 3136 struct nlookupdata nd; 3137 int error; 3138 3139 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3140 if (error == 0) 3141 error = kern_chmod(&nd, uap->mode); 3142 nlookup_done(&nd); 3143 return (error); 3144 } 3145 3146 /* 3147 * fchmod_args(int fd, int mode) 3148 * 3149 * Change mode of a file given a file descriptor. 3150 */ 3151 int 3152 sys_fchmod(struct fchmod_args *uap) 3153 { 3154 struct thread *td = curthread; 3155 struct proc *p = td->td_proc; 3156 struct file *fp; 3157 int error; 3158 3159 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3160 return (error); 3161 if (fp->f_nchandle.ncp) 3162 error = ncp_writechk(&fp->f_nchandle); 3163 if (error == 0) 3164 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3165 fdrop(fp); 3166 return (error); 3167 } 3168 3169 /* 3170 * fchmodat_args(char *path, int mode) 3171 * 3172 * Change mode of a file pointed to by fd/path. 3173 */ 3174 int 3175 sys_fchmodat(struct fchmodat_args *uap) 3176 { 3177 struct nlookupdata nd; 3178 struct file *fp; 3179 int error; 3180 int flags; 3181 3182 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3183 return (EINVAL); 3184 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3185 3186 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3187 UIO_USERSPACE, flags); 3188 if (error == 0) 3189 error = kern_chmod(&nd, uap->mode); 3190 nlookup_done_at(&nd, fp); 3191 return (error); 3192 } 3193 3194 static int 3195 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3196 { 3197 struct thread *td = curthread; 3198 int error; 3199 struct vattr vattr; 3200 uid_t o_uid; 3201 gid_t o_gid; 3202 uint64_t size; 3203 3204 /* 3205 * note: vget is required for any operation that might mod the vnode 3206 * so VINACTIVE is properly cleared. 3207 */ 3208 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3209 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3210 return error; 3211 o_uid = vattr.va_uid; 3212 o_gid = vattr.va_gid; 3213 size = vattr.va_size; 3214 3215 VATTR_NULL(&vattr); 3216 vattr.va_uid = uid; 3217 vattr.va_gid = gid; 3218 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3219 vput(vp); 3220 } 3221 3222 if (error == 0) { 3223 if (uid == -1) 3224 uid = o_uid; 3225 if (gid == -1) 3226 gid = o_gid; 3227 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3228 VFS_ACCOUNT(mp, uid, gid, size); 3229 } 3230 3231 return error; 3232 } 3233 3234 int 3235 kern_chown(struct nlookupdata *nd, int uid, int gid) 3236 { 3237 struct vnode *vp; 3238 int error; 3239 3240 if ((error = nlookup(nd)) != 0) 3241 return (error); 3242 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3243 return (error); 3244 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3245 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3246 vrele(vp); 3247 return (error); 3248 } 3249 3250 /* 3251 * chown(char *path, int uid, int gid) 3252 * 3253 * Set ownership given a path name. 3254 */ 3255 int 3256 sys_chown(struct chown_args *uap) 3257 { 3258 struct nlookupdata nd; 3259 int error; 3260 3261 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3262 if (error == 0) 3263 error = kern_chown(&nd, uap->uid, uap->gid); 3264 nlookup_done(&nd); 3265 return (error); 3266 } 3267 3268 /* 3269 * lchown_args(char *path, int uid, int gid) 3270 * 3271 * Set ownership given a path name, do not cross symlinks. 3272 */ 3273 int 3274 sys_lchown(struct lchown_args *uap) 3275 { 3276 struct nlookupdata nd; 3277 int error; 3278 3279 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3280 if (error == 0) 3281 error = kern_chown(&nd, uap->uid, uap->gid); 3282 nlookup_done(&nd); 3283 return (error); 3284 } 3285 3286 /* 3287 * fchown_args(int fd, int uid, int gid) 3288 * 3289 * Set ownership given a file descriptor. 3290 */ 3291 int 3292 sys_fchown(struct fchown_args *uap) 3293 { 3294 struct thread *td = curthread; 3295 struct proc *p = td->td_proc; 3296 struct file *fp; 3297 int error; 3298 3299 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3300 return (error); 3301 if (fp->f_nchandle.ncp) 3302 error = ncp_writechk(&fp->f_nchandle); 3303 if (error == 0) 3304 error = setfown(p->p_fd->fd_ncdir.mount, 3305 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3306 fdrop(fp); 3307 return (error); 3308 } 3309 3310 /* 3311 * fchownat(int fd, char *path, int uid, int gid, int flags) 3312 * 3313 * Set ownership of file pointed to by fd/path. 3314 */ 3315 int 3316 sys_fchownat(struct fchownat_args *uap) 3317 { 3318 struct nlookupdata nd; 3319 struct file *fp; 3320 int error; 3321 int flags; 3322 3323 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3324 return (EINVAL); 3325 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3326 3327 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3328 UIO_USERSPACE, flags); 3329 if (error == 0) 3330 error = kern_chown(&nd, uap->uid, uap->gid); 3331 nlookup_done_at(&nd, fp); 3332 return (error); 3333 } 3334 3335 3336 static int 3337 getutimes(const struct timeval *tvp, struct timespec *tsp) 3338 { 3339 struct timeval tv[2]; 3340 3341 if (tvp == NULL) { 3342 microtime(&tv[0]); 3343 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3344 tsp[1] = tsp[0]; 3345 } else { 3346 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3347 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3348 } 3349 return 0; 3350 } 3351 3352 static int 3353 setutimes(struct vnode *vp, struct vattr *vattr, 3354 const struct timespec *ts, int nullflag) 3355 { 3356 struct thread *td = curthread; 3357 int error; 3358 3359 VATTR_NULL(vattr); 3360 vattr->va_atime = ts[0]; 3361 vattr->va_mtime = ts[1]; 3362 if (nullflag) 3363 vattr->va_vaflags |= VA_UTIMES_NULL; 3364 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3365 3366 return error; 3367 } 3368 3369 int 3370 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3371 { 3372 struct timespec ts[2]; 3373 struct vnode *vp; 3374 struct vattr vattr; 3375 int error; 3376 3377 if ((error = getutimes(tptr, ts)) != 0) 3378 return (error); 3379 3380 /* 3381 * NOTE: utimes() succeeds for the owner even if the file 3382 * is not user-writable. 3383 */ 3384 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3385 3386 if ((error = nlookup(nd)) != 0) 3387 return (error); 3388 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3389 return (error); 3390 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3391 return (error); 3392 3393 /* 3394 * note: vget is required for any operation that might mod the vnode 3395 * so VINACTIVE is properly cleared. 3396 */ 3397 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3398 error = vget(vp, LK_EXCLUSIVE); 3399 if (error == 0) { 3400 error = setutimes(vp, &vattr, ts, (tptr == NULL)); 3401 vput(vp); 3402 } 3403 } 3404 vrele(vp); 3405 return (error); 3406 } 3407 3408 /* 3409 * utimes_args(char *path, struct timeval *tptr) 3410 * 3411 * Set the access and modification times of a file. 3412 */ 3413 int 3414 sys_utimes(struct utimes_args *uap) 3415 { 3416 struct timeval tv[2]; 3417 struct nlookupdata nd; 3418 int error; 3419 3420 if (uap->tptr) { 3421 error = copyin(uap->tptr, tv, sizeof(tv)); 3422 if (error) 3423 return (error); 3424 } 3425 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3426 if (error == 0) 3427 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3428 nlookup_done(&nd); 3429 return (error); 3430 } 3431 3432 /* 3433 * lutimes_args(char *path, struct timeval *tptr) 3434 * 3435 * Set the access and modification times of a file. 3436 */ 3437 int 3438 sys_lutimes(struct lutimes_args *uap) 3439 { 3440 struct timeval tv[2]; 3441 struct nlookupdata nd; 3442 int error; 3443 3444 if (uap->tptr) { 3445 error = copyin(uap->tptr, tv, sizeof(tv)); 3446 if (error) 3447 return (error); 3448 } 3449 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3450 if (error == 0) 3451 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3452 nlookup_done(&nd); 3453 return (error); 3454 } 3455 3456 /* 3457 * Set utimes on a file descriptor. The creds used to open the 3458 * file are used to determine whether the operation is allowed 3459 * or not. 3460 */ 3461 int 3462 kern_futimes(int fd, struct timeval *tptr) 3463 { 3464 struct thread *td = curthread; 3465 struct proc *p = td->td_proc; 3466 struct timespec ts[2]; 3467 struct file *fp; 3468 struct vnode *vp; 3469 struct vattr vattr; 3470 int error; 3471 3472 error = getutimes(tptr, ts); 3473 if (error) 3474 return (error); 3475 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3476 return (error); 3477 if (fp->f_nchandle.ncp) 3478 error = ncp_writechk(&fp->f_nchandle); 3479 if (error == 0) { 3480 vp = fp->f_data; 3481 error = vget(vp, LK_EXCLUSIVE); 3482 if (error == 0) { 3483 error = VOP_GETATTR(vp, &vattr); 3484 if (error == 0) { 3485 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3486 fp->f_cred); 3487 } 3488 if (error == 0) { 3489 error = setutimes(vp, &vattr, ts, 3490 (tptr == NULL)); 3491 } 3492 vput(vp); 3493 } 3494 } 3495 fdrop(fp); 3496 return (error); 3497 } 3498 3499 /* 3500 * futimes_args(int fd, struct timeval *tptr) 3501 * 3502 * Set the access and modification times of a file. 3503 */ 3504 int 3505 sys_futimes(struct futimes_args *uap) 3506 { 3507 struct timeval tv[2]; 3508 int error; 3509 3510 if (uap->tptr) { 3511 error = copyin(uap->tptr, tv, sizeof(tv)); 3512 if (error) 3513 return (error); 3514 } 3515 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3516 3517 return (error); 3518 } 3519 3520 int 3521 kern_truncate(struct nlookupdata *nd, off_t length) 3522 { 3523 struct vnode *vp; 3524 struct vattr vattr; 3525 int error; 3526 uid_t uid = 0; 3527 gid_t gid = 0; 3528 uint64_t old_size = 0; 3529 3530 if (length < 0) 3531 return(EINVAL); 3532 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3533 if ((error = nlookup(nd)) != 0) 3534 return (error); 3535 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3536 return (error); 3537 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3538 return (error); 3539 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 3540 vrele(vp); 3541 return (error); 3542 } 3543 if (vp->v_type == VDIR) { 3544 error = EISDIR; 3545 goto done; 3546 } 3547 if (vfs_quota_enabled) { 3548 error = VOP_GETATTR(vp, &vattr); 3549 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3550 uid = vattr.va_uid; 3551 gid = vattr.va_gid; 3552 old_size = vattr.va_size; 3553 } 3554 3555 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3556 VATTR_NULL(&vattr); 3557 vattr.va_size = length; 3558 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3559 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3560 } 3561 done: 3562 vput(vp); 3563 return (error); 3564 } 3565 3566 /* 3567 * truncate(char *path, int pad, off_t length) 3568 * 3569 * Truncate a file given its path name. 3570 */ 3571 int 3572 sys_truncate(struct truncate_args *uap) 3573 { 3574 struct nlookupdata nd; 3575 int error; 3576 3577 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3578 if (error == 0) 3579 error = kern_truncate(&nd, uap->length); 3580 nlookup_done(&nd); 3581 return error; 3582 } 3583 3584 int 3585 kern_ftruncate(int fd, off_t length) 3586 { 3587 struct thread *td = curthread; 3588 struct proc *p = td->td_proc; 3589 struct vattr vattr; 3590 struct vnode *vp; 3591 struct file *fp; 3592 int error; 3593 uid_t uid = 0; 3594 gid_t gid = 0; 3595 uint64_t old_size = 0; 3596 struct mount *mp; 3597 3598 if (length < 0) 3599 return(EINVAL); 3600 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3601 return (error); 3602 if (fp->f_nchandle.ncp) { 3603 error = ncp_writechk(&fp->f_nchandle); 3604 if (error) 3605 goto done; 3606 } 3607 if ((fp->f_flag & FWRITE) == 0) { 3608 error = EINVAL; 3609 goto done; 3610 } 3611 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3612 error = EINVAL; 3613 goto done; 3614 } 3615 vp = (struct vnode *)fp->f_data; 3616 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3617 if (vp->v_type == VDIR) { 3618 error = EISDIR; 3619 goto done; 3620 } 3621 3622 if (vfs_quota_enabled) { 3623 error = VOP_GETATTR(vp, &vattr); 3624 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3625 uid = vattr.va_uid; 3626 gid = vattr.va_gid; 3627 old_size = vattr.va_size; 3628 } 3629 3630 if ((error = vn_writechk(vp, NULL)) == 0) { 3631 VATTR_NULL(&vattr); 3632 vattr.va_size = length; 3633 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3634 mp = vq_vptomp(vp); 3635 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3636 } 3637 vn_unlock(vp); 3638 done: 3639 fdrop(fp); 3640 return (error); 3641 } 3642 3643 /* 3644 * ftruncate_args(int fd, int pad, off_t length) 3645 * 3646 * Truncate a file given a file descriptor. 3647 */ 3648 int 3649 sys_ftruncate(struct ftruncate_args *uap) 3650 { 3651 int error; 3652 3653 error = kern_ftruncate(uap->fd, uap->length); 3654 3655 return (error); 3656 } 3657 3658 /* 3659 * fsync(int fd) 3660 * 3661 * Sync an open file. 3662 */ 3663 int 3664 sys_fsync(struct fsync_args *uap) 3665 { 3666 struct thread *td = curthread; 3667 struct proc *p = td->td_proc; 3668 struct vnode *vp; 3669 struct file *fp; 3670 vm_object_t obj; 3671 int error; 3672 3673 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3674 return (error); 3675 vp = (struct vnode *)fp->f_data; 3676 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3677 if ((obj = vp->v_object) != NULL) { 3678 if (vp->v_mount == NULL || 3679 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 3680 vm_object_page_clean(obj, 0, 0, 0); 3681 } 3682 } 3683 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3684 if (error == 0 && vp->v_mount) 3685 error = buf_fsync(vp); 3686 vn_unlock(vp); 3687 fdrop(fp); 3688 3689 return (error); 3690 } 3691 3692 int 3693 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3694 { 3695 struct nchandle fnchd; 3696 struct nchandle tnchd; 3697 struct namecache *ncp; 3698 struct vnode *fdvp; 3699 struct vnode *tdvp; 3700 struct mount *mp; 3701 int error; 3702 3703 bwillinode(1); 3704 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3705 if ((error = nlookup(fromnd)) != 0) 3706 return (error); 3707 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3708 return (ENOENT); 3709 fnchd.mount = fromnd->nl_nch.mount; 3710 cache_hold(&fnchd); 3711 3712 /* 3713 * unlock the source nch so we can lookup the target nch without 3714 * deadlocking. The target may or may not exist so we do not check 3715 * for a target vp like kern_mkdir() and other creation functions do. 3716 * 3717 * The source and target directories are ref'd and rechecked after 3718 * everything is relocked to determine if the source or target file 3719 * has been renamed. 3720 */ 3721 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3722 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3723 cache_unlock(&fromnd->nl_nch); 3724 3725 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3726 if ((error = nlookup(tond)) != 0) { 3727 cache_drop(&fnchd); 3728 return (error); 3729 } 3730 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3731 cache_drop(&fnchd); 3732 return (ENOENT); 3733 } 3734 tnchd.mount = tond->nl_nch.mount; 3735 cache_hold(&tnchd); 3736 3737 /* 3738 * If the source and target are the same there is nothing to do 3739 */ 3740 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3741 cache_drop(&fnchd); 3742 cache_drop(&tnchd); 3743 return (0); 3744 } 3745 3746 /* 3747 * Mount points cannot be renamed or overwritten 3748 */ 3749 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3750 NCF_ISMOUNTPT 3751 ) { 3752 cache_drop(&fnchd); 3753 cache_drop(&tnchd); 3754 return (EINVAL); 3755 } 3756 3757 /* 3758 * Relock the source ncp. cache_relock() will deal with any 3759 * deadlocks against the already-locked tond and will also 3760 * make sure both are resolved. 3761 * 3762 * NOTE AFTER RELOCKING: The source or target ncp may have become 3763 * invalid while they were unlocked, nc_vp and nc_mount could 3764 * be NULL. 3765 */ 3766 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3767 &tond->nl_nch, tond->nl_cred); 3768 fromnd->nl_flags |= NLC_NCPISLOCKED; 3769 3770 /* 3771 * If either fromnd or tond are marked destroyed a ripout occured 3772 * out from under us and we must retry. 3773 */ 3774 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 3775 fromnd->nl_nch.ncp->nc_vp == NULL || 3776 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 3777 kprintf("kern_rename: retry due to ripout on: " 3778 "\"%s\" -> \"%s\"\n", 3779 fromnd->nl_nch.ncp->nc_name, 3780 tond->nl_nch.ncp->nc_name); 3781 cache_drop(&fnchd); 3782 cache_drop(&tnchd); 3783 return (EAGAIN); 3784 } 3785 3786 /* 3787 * make sure the parent directories linkages are the same 3788 */ 3789 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3790 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3791 cache_drop(&fnchd); 3792 cache_drop(&tnchd); 3793 return (ENOENT); 3794 } 3795 3796 /* 3797 * Both the source and target must be within the same filesystem and 3798 * in the same filesystem as their parent directories within the 3799 * namecache topology. 3800 * 3801 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3802 */ 3803 mp = fnchd.mount; 3804 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3805 mp != tond->nl_nch.mount) { 3806 cache_drop(&fnchd); 3807 cache_drop(&tnchd); 3808 return (EXDEV); 3809 } 3810 3811 /* 3812 * Make sure the mount point is writable 3813 */ 3814 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3815 cache_drop(&fnchd); 3816 cache_drop(&tnchd); 3817 return (error); 3818 } 3819 3820 /* 3821 * If the target exists and either the source or target is a directory, 3822 * then both must be directories. 3823 * 3824 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3825 * have become NULL. 3826 */ 3827 if (tond->nl_nch.ncp->nc_vp) { 3828 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3829 error = ENOENT; 3830 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3831 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3832 error = ENOTDIR; 3833 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3834 error = EISDIR; 3835 } 3836 } 3837 3838 /* 3839 * You cannot rename a source into itself or a subdirectory of itself. 3840 * We check this by travsersing the target directory upwards looking 3841 * for a match against the source. 3842 * 3843 * XXX MPSAFE 3844 */ 3845 if (error == 0) { 3846 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3847 if (fromnd->nl_nch.ncp == ncp) { 3848 error = EINVAL; 3849 break; 3850 } 3851 } 3852 } 3853 3854 cache_drop(&fnchd); 3855 cache_drop(&tnchd); 3856 3857 /* 3858 * Even though the namespaces are different, they may still represent 3859 * hardlinks to the same file. The filesystem might have a hard time 3860 * with this so we issue a NREMOVE of the source instead of a NRENAME 3861 * when we detect the situation. 3862 */ 3863 if (error == 0) { 3864 fdvp = fromnd->nl_dvp; 3865 tdvp = tond->nl_dvp; 3866 if (fdvp == NULL || tdvp == NULL) { 3867 error = EPERM; 3868 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3869 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3870 fromnd->nl_cred); 3871 } else { 3872 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3873 fdvp, tdvp, tond->nl_cred); 3874 } 3875 } 3876 return (error); 3877 } 3878 3879 /* 3880 * rename_args(char *from, char *to) 3881 * 3882 * Rename files. Source and destination must either both be directories, 3883 * or both not be directories. If target is a directory, it must be empty. 3884 */ 3885 int 3886 sys_rename(struct rename_args *uap) 3887 { 3888 struct nlookupdata fromnd, tond; 3889 int error; 3890 3891 do { 3892 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3893 if (error == 0) { 3894 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3895 if (error == 0) 3896 error = kern_rename(&fromnd, &tond); 3897 nlookup_done(&tond); 3898 } 3899 nlookup_done(&fromnd); 3900 } while (error == EAGAIN); 3901 return (error); 3902 } 3903 3904 /* 3905 * renameat_args(int oldfd, char *old, int newfd, char *new) 3906 * 3907 * Rename files using paths relative to the directories associated with 3908 * oldfd and newfd. Source and destination must either both be directories, 3909 * or both not be directories. If target is a directory, it must be empty. 3910 */ 3911 int 3912 sys_renameat(struct renameat_args *uap) 3913 { 3914 struct nlookupdata oldnd, newnd; 3915 struct file *oldfp, *newfp; 3916 int error; 3917 3918 do { 3919 error = nlookup_init_at(&oldnd, &oldfp, 3920 uap->oldfd, uap->old, 3921 UIO_USERSPACE, 0); 3922 if (error == 0) { 3923 error = nlookup_init_at(&newnd, &newfp, 3924 uap->newfd, uap->new, 3925 UIO_USERSPACE, 0); 3926 if (error == 0) 3927 error = kern_rename(&oldnd, &newnd); 3928 nlookup_done_at(&newnd, newfp); 3929 } 3930 nlookup_done_at(&oldnd, oldfp); 3931 } while (error == EAGAIN); 3932 return (error); 3933 } 3934 3935 int 3936 kern_mkdir(struct nlookupdata *nd, int mode) 3937 { 3938 struct thread *td = curthread; 3939 struct proc *p = td->td_proc; 3940 struct vnode *vp; 3941 struct vattr vattr; 3942 int error; 3943 3944 bwillinode(1); 3945 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3946 if ((error = nlookup(nd)) != 0) 3947 return (error); 3948 3949 if (nd->nl_nch.ncp->nc_vp) 3950 return (EEXIST); 3951 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3952 return (error); 3953 VATTR_NULL(&vattr); 3954 vattr.va_type = VDIR; 3955 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3956 3957 vp = NULL; 3958 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 3959 if (error == 0) 3960 vput(vp); 3961 return (error); 3962 } 3963 3964 /* 3965 * mkdir_args(char *path, int mode) 3966 * 3967 * Make a directory file. 3968 */ 3969 int 3970 sys_mkdir(struct mkdir_args *uap) 3971 { 3972 struct nlookupdata nd; 3973 int error; 3974 3975 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3976 if (error == 0) 3977 error = kern_mkdir(&nd, uap->mode); 3978 nlookup_done(&nd); 3979 return (error); 3980 } 3981 3982 /* 3983 * mkdirat_args(int fd, char *path, mode_t mode) 3984 * 3985 * Make a directory file. The path is relative to the directory associated 3986 * with fd. 3987 */ 3988 int 3989 sys_mkdirat(struct mkdirat_args *uap) 3990 { 3991 struct nlookupdata nd; 3992 struct file *fp; 3993 int error; 3994 3995 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3996 if (error == 0) 3997 error = kern_mkdir(&nd, uap->mode); 3998 nlookup_done_at(&nd, fp); 3999 return (error); 4000 } 4001 4002 int 4003 kern_rmdir(struct nlookupdata *nd) 4004 { 4005 int error; 4006 4007 bwillinode(1); 4008 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4009 if ((error = nlookup(nd)) != 0) 4010 return (error); 4011 4012 /* 4013 * Do not allow directories representing mount points to be 4014 * deleted, even if empty. Check write perms on mount point 4015 * in case the vnode is aliased (aka nullfs). 4016 */ 4017 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4018 return (EBUSY); 4019 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4020 return (error); 4021 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4022 return (error); 4023 } 4024 4025 /* 4026 * rmdir_args(char *path) 4027 * 4028 * Remove a directory file. 4029 */ 4030 int 4031 sys_rmdir(struct rmdir_args *uap) 4032 { 4033 struct nlookupdata nd; 4034 int error; 4035 4036 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4037 if (error == 0) 4038 error = kern_rmdir(&nd); 4039 nlookup_done(&nd); 4040 return (error); 4041 } 4042 4043 int 4044 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4045 enum uio_seg direction) 4046 { 4047 struct thread *td = curthread; 4048 struct proc *p = td->td_proc; 4049 struct vnode *vp; 4050 struct file *fp; 4051 struct uio auio; 4052 struct iovec aiov; 4053 off_t loff; 4054 int error, eofflag; 4055 4056 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 4057 return (error); 4058 if ((fp->f_flag & FREAD) == 0) { 4059 error = EBADF; 4060 goto done; 4061 } 4062 vp = (struct vnode *)fp->f_data; 4063 unionread: 4064 if (vp->v_type != VDIR) { 4065 error = EINVAL; 4066 goto done; 4067 } 4068 aiov.iov_base = buf; 4069 aiov.iov_len = count; 4070 auio.uio_iov = &aiov; 4071 auio.uio_iovcnt = 1; 4072 auio.uio_rw = UIO_READ; 4073 auio.uio_segflg = direction; 4074 auio.uio_td = td; 4075 auio.uio_resid = count; 4076 loff = auio.uio_offset = fp->f_offset; 4077 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4078 fp->f_offset = auio.uio_offset; 4079 if (error) 4080 goto done; 4081 if (count == auio.uio_resid) { 4082 if (union_dircheckp) { 4083 error = union_dircheckp(td, &vp, fp); 4084 if (error == -1) 4085 goto unionread; 4086 if (error) 4087 goto done; 4088 } 4089 #if 0 4090 if ((vp->v_flag & VROOT) && 4091 (vp->v_mount->mnt_flag & MNT_UNION)) { 4092 struct vnode *tvp = vp; 4093 vp = vp->v_mount->mnt_vnodecovered; 4094 vref(vp); 4095 fp->f_data = vp; 4096 fp->f_offset = 0; 4097 vrele(tvp); 4098 goto unionread; 4099 } 4100 #endif 4101 } 4102 4103 /* 4104 * WARNING! *basep may not be wide enough to accomodate the 4105 * seek offset. XXX should we hack this to return the upper 32 bits 4106 * for offsets greater then 4G? 4107 */ 4108 if (basep) { 4109 *basep = (long)loff; 4110 } 4111 *res = count - auio.uio_resid; 4112 done: 4113 fdrop(fp); 4114 return (error); 4115 } 4116 4117 /* 4118 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4119 * 4120 * Read a block of directory entries in a file system independent format. 4121 */ 4122 int 4123 sys_getdirentries(struct getdirentries_args *uap) 4124 { 4125 long base; 4126 int error; 4127 4128 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4129 &uap->sysmsg_result, UIO_USERSPACE); 4130 4131 if (error == 0 && uap->basep) 4132 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4133 return (error); 4134 } 4135 4136 /* 4137 * getdents_args(int fd, char *buf, size_t count) 4138 */ 4139 int 4140 sys_getdents(struct getdents_args *uap) 4141 { 4142 int error; 4143 4144 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4145 &uap->sysmsg_result, UIO_USERSPACE); 4146 4147 return (error); 4148 } 4149 4150 /* 4151 * Set the mode mask for creation of filesystem nodes. 4152 * 4153 * umask(int newmask) 4154 */ 4155 int 4156 sys_umask(struct umask_args *uap) 4157 { 4158 struct thread *td = curthread; 4159 struct proc *p = td->td_proc; 4160 struct filedesc *fdp; 4161 4162 fdp = p->p_fd; 4163 uap->sysmsg_result = fdp->fd_cmask; 4164 fdp->fd_cmask = uap->newmask & ALLPERMS; 4165 return (0); 4166 } 4167 4168 /* 4169 * revoke(char *path) 4170 * 4171 * Void all references to file by ripping underlying filesystem 4172 * away from vnode. 4173 */ 4174 int 4175 sys_revoke(struct revoke_args *uap) 4176 { 4177 struct nlookupdata nd; 4178 struct vattr vattr; 4179 struct vnode *vp; 4180 struct ucred *cred; 4181 int error; 4182 4183 vp = NULL; 4184 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4185 if (error == 0) 4186 error = nlookup(&nd); 4187 if (error == 0) 4188 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4189 cred = crhold(nd.nl_cred); 4190 nlookup_done(&nd); 4191 if (error == 0) { 4192 if (error == 0) 4193 error = VOP_GETATTR(vp, &vattr); 4194 if (error == 0 && cred->cr_uid != vattr.va_uid) 4195 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4196 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4197 if (vcount(vp) > 0) 4198 error = vrevoke(vp, cred); 4199 } else if (error == 0) { 4200 error = vrevoke(vp, cred); 4201 } 4202 vrele(vp); 4203 } 4204 if (cred) 4205 crfree(cred); 4206 return (error); 4207 } 4208 4209 /* 4210 * getfh_args(char *fname, fhandle_t *fhp) 4211 * 4212 * Get (NFS) file handle 4213 * 4214 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4215 * mount. This allows nullfs mounts to be explicitly exported. 4216 * 4217 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4218 * 4219 * nullfs mounts of subdirectories are not safe. That is, it will 4220 * work, but you do not really have protection against access to 4221 * the related parent directories. 4222 */ 4223 int 4224 sys_getfh(struct getfh_args *uap) 4225 { 4226 struct thread *td = curthread; 4227 struct nlookupdata nd; 4228 fhandle_t fh; 4229 struct vnode *vp; 4230 struct mount *mp; 4231 int error; 4232 4233 /* 4234 * Must be super user 4235 */ 4236 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4237 return (error); 4238 4239 vp = NULL; 4240 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4241 if (error == 0) 4242 error = nlookup(&nd); 4243 if (error == 0) 4244 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4245 mp = nd.nl_nch.mount; 4246 nlookup_done(&nd); 4247 if (error == 0) { 4248 bzero(&fh, sizeof(fh)); 4249 fh.fh_fsid = mp->mnt_stat.f_fsid; 4250 error = VFS_VPTOFH(vp, &fh.fh_fid); 4251 vput(vp); 4252 if (error == 0) 4253 error = copyout(&fh, uap->fhp, sizeof(fh)); 4254 } 4255 return (error); 4256 } 4257 4258 /* 4259 * fhopen_args(const struct fhandle *u_fhp, int flags) 4260 * 4261 * syscall for the rpc.lockd to use to translate a NFS file handle into 4262 * an open descriptor. 4263 * 4264 * warning: do not remove the priv_check() call or this becomes one giant 4265 * security hole. 4266 */ 4267 int 4268 sys_fhopen(struct fhopen_args *uap) 4269 { 4270 struct thread *td = curthread; 4271 struct filedesc *fdp = td->td_proc->p_fd; 4272 struct mount *mp; 4273 struct vnode *vp; 4274 struct fhandle fhp; 4275 struct vattr vat; 4276 struct vattr *vap = &vat; 4277 struct flock lf; 4278 int fmode, mode, error = 0, type; 4279 struct file *nfp; 4280 struct file *fp; 4281 int indx; 4282 4283 /* 4284 * Must be super user 4285 */ 4286 error = priv_check(td, PRIV_ROOT); 4287 if (error) 4288 return (error); 4289 4290 fmode = FFLAGS(uap->flags); 4291 4292 /* 4293 * Why not allow a non-read/write open for our lockd? 4294 */ 4295 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4296 return (EINVAL); 4297 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4298 if (error) 4299 return(error); 4300 4301 /* 4302 * Find the mount point 4303 */ 4304 mp = vfs_getvfs(&fhp.fh_fsid); 4305 if (mp == NULL) { 4306 error = ESTALE; 4307 goto done; 4308 } 4309 /* now give me my vnode, it gets returned to me locked */ 4310 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4311 if (error) 4312 goto done; 4313 /* 4314 * from now on we have to make sure not 4315 * to forget about the vnode 4316 * any error that causes an abort must vput(vp) 4317 * just set error = err and 'goto bad;'. 4318 */ 4319 4320 /* 4321 * from vn_open 4322 */ 4323 if (vp->v_type == VLNK) { 4324 error = EMLINK; 4325 goto bad; 4326 } 4327 if (vp->v_type == VSOCK) { 4328 error = EOPNOTSUPP; 4329 goto bad; 4330 } 4331 mode = 0; 4332 if (fmode & (FWRITE | O_TRUNC)) { 4333 if (vp->v_type == VDIR) { 4334 error = EISDIR; 4335 goto bad; 4336 } 4337 error = vn_writechk(vp, NULL); 4338 if (error) 4339 goto bad; 4340 mode |= VWRITE; 4341 } 4342 if (fmode & FREAD) 4343 mode |= VREAD; 4344 if (mode) { 4345 error = VOP_ACCESS(vp, mode, td->td_ucred); 4346 if (error) 4347 goto bad; 4348 } 4349 if (fmode & O_TRUNC) { 4350 vn_unlock(vp); /* XXX */ 4351 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4352 VATTR_NULL(vap); 4353 vap->va_size = 0; 4354 error = VOP_SETATTR(vp, vap, td->td_ucred); 4355 if (error) 4356 goto bad; 4357 } 4358 4359 /* 4360 * VOP_OPEN needs the file pointer so it can potentially override 4361 * it. 4362 * 4363 * WARNING! no f_nchandle will be associated when fhopen()ing a 4364 * directory. XXX 4365 */ 4366 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4367 goto bad; 4368 fp = nfp; 4369 4370 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4371 if (error) { 4372 /* 4373 * setting f_ops this way prevents VOP_CLOSE from being 4374 * called or fdrop() releasing the vp from v_data. Since 4375 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4376 */ 4377 fp->f_ops = &badfileops; 4378 fp->f_data = NULL; 4379 goto bad_drop; 4380 } 4381 4382 /* 4383 * The fp is given its own reference, we still have our ref and lock. 4384 * 4385 * Assert that all regular files must be created with a VM object. 4386 */ 4387 if (vp->v_type == VREG && vp->v_object == NULL) { 4388 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4389 goto bad_drop; 4390 } 4391 4392 /* 4393 * The open was successful. Handle any locking requirements. 4394 */ 4395 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4396 lf.l_whence = SEEK_SET; 4397 lf.l_start = 0; 4398 lf.l_len = 0; 4399 if (fmode & O_EXLOCK) 4400 lf.l_type = F_WRLCK; 4401 else 4402 lf.l_type = F_RDLCK; 4403 if (fmode & FNONBLOCK) 4404 type = 0; 4405 else 4406 type = F_WAIT; 4407 vn_unlock(vp); 4408 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4409 /* 4410 * release our private reference. 4411 */ 4412 fsetfd(fdp, NULL, indx); 4413 fdrop(fp); 4414 vrele(vp); 4415 goto done; 4416 } 4417 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4418 fp->f_flag |= FHASLOCK; 4419 } 4420 4421 /* 4422 * Clean up. Associate the file pointer with the previously 4423 * reserved descriptor and return it. 4424 */ 4425 vput(vp); 4426 fsetfd(fdp, fp, indx); 4427 fdrop(fp); 4428 uap->sysmsg_result = indx; 4429 if (uap->flags & O_CLOEXEC) 4430 error = fsetfdflags(fdp, indx, UF_EXCLOSE); 4431 return (error); 4432 4433 bad_drop: 4434 fsetfd(fdp, NULL, indx); 4435 fdrop(fp); 4436 bad: 4437 vput(vp); 4438 done: 4439 return (error); 4440 } 4441 4442 /* 4443 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4444 */ 4445 int 4446 sys_fhstat(struct fhstat_args *uap) 4447 { 4448 struct thread *td = curthread; 4449 struct stat sb; 4450 fhandle_t fh; 4451 struct mount *mp; 4452 struct vnode *vp; 4453 int error; 4454 4455 /* 4456 * Must be super user 4457 */ 4458 error = priv_check(td, PRIV_ROOT); 4459 if (error) 4460 return (error); 4461 4462 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4463 if (error) 4464 return (error); 4465 4466 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4467 error = ESTALE; 4468 if (error == 0) { 4469 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4470 error = vn_stat(vp, &sb, td->td_ucred); 4471 vput(vp); 4472 } 4473 } 4474 if (error == 0) 4475 error = copyout(&sb, uap->sb, sizeof(sb)); 4476 return (error); 4477 } 4478 4479 /* 4480 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4481 */ 4482 int 4483 sys_fhstatfs(struct fhstatfs_args *uap) 4484 { 4485 struct thread *td = curthread; 4486 struct proc *p = td->td_proc; 4487 struct statfs *sp; 4488 struct mount *mp; 4489 struct vnode *vp; 4490 struct statfs sb; 4491 char *fullpath, *freepath; 4492 fhandle_t fh; 4493 int error; 4494 4495 /* 4496 * Must be super user 4497 */ 4498 if ((error = priv_check(td, PRIV_ROOT))) 4499 return (error); 4500 4501 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4502 return (error); 4503 4504 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4505 error = ESTALE; 4506 goto done; 4507 } 4508 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4509 error = ESTALE; 4510 goto done; 4511 } 4512 4513 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4514 goto done; 4515 mp = vp->v_mount; 4516 sp = &mp->mnt_stat; 4517 vput(vp); 4518 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4519 goto done; 4520 4521 error = mount_path(p, mp, &fullpath, &freepath); 4522 if (error) 4523 goto done; 4524 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4525 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4526 kfree(freepath, M_TEMP); 4527 4528 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4529 if (priv_check(td, PRIV_ROOT)) { 4530 bcopy(sp, &sb, sizeof(sb)); 4531 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4532 sp = &sb; 4533 } 4534 error = copyout(sp, uap->buf, sizeof(*sp)); 4535 done: 4536 return (error); 4537 } 4538 4539 /* 4540 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4541 */ 4542 int 4543 sys_fhstatvfs(struct fhstatvfs_args *uap) 4544 { 4545 struct thread *td = curthread; 4546 struct proc *p = td->td_proc; 4547 struct statvfs *sp; 4548 struct mount *mp; 4549 struct vnode *vp; 4550 fhandle_t fh; 4551 int error; 4552 4553 /* 4554 * Must be super user 4555 */ 4556 if ((error = priv_check(td, PRIV_ROOT))) 4557 return (error); 4558 4559 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4560 return (error); 4561 4562 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4563 error = ESTALE; 4564 goto done; 4565 } 4566 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4567 error = ESTALE; 4568 goto done; 4569 } 4570 4571 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4572 goto done; 4573 mp = vp->v_mount; 4574 sp = &mp->mnt_vstat; 4575 vput(vp); 4576 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4577 goto done; 4578 4579 sp->f_flag = 0; 4580 if (mp->mnt_flag & MNT_RDONLY) 4581 sp->f_flag |= ST_RDONLY; 4582 if (mp->mnt_flag & MNT_NOSUID) 4583 sp->f_flag |= ST_NOSUID; 4584 error = copyout(sp, uap->buf, sizeof(*sp)); 4585 done: 4586 return (error); 4587 } 4588 4589 4590 /* 4591 * Syscall to push extended attribute configuration information into the 4592 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4593 * a command (int cmd), and attribute name and misc data. For now, the 4594 * attribute name is left in userspace for consumption by the VFS_op. 4595 * It will probably be changed to be copied into sysspace by the 4596 * syscall in the future, once issues with various consumers of the 4597 * attribute code have raised their hands. 4598 * 4599 * Currently this is used only by UFS Extended Attributes. 4600 */ 4601 int 4602 sys_extattrctl(struct extattrctl_args *uap) 4603 { 4604 struct nlookupdata nd; 4605 struct vnode *vp; 4606 char attrname[EXTATTR_MAXNAMELEN]; 4607 int error; 4608 size_t size; 4609 4610 attrname[0] = 0; 4611 vp = NULL; 4612 error = 0; 4613 4614 if (error == 0 && uap->filename) { 4615 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4616 NLC_FOLLOW); 4617 if (error == 0) 4618 error = nlookup(&nd); 4619 if (error == 0) 4620 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4621 nlookup_done(&nd); 4622 } 4623 4624 if (error == 0 && uap->attrname) { 4625 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4626 &size); 4627 } 4628 4629 if (error == 0) { 4630 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4631 if (error == 0) 4632 error = nlookup(&nd); 4633 if (error == 0) 4634 error = ncp_writechk(&nd.nl_nch); 4635 if (error == 0) { 4636 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4637 uap->attrnamespace, 4638 uap->attrname, nd.nl_cred); 4639 } 4640 nlookup_done(&nd); 4641 } 4642 4643 return (error); 4644 } 4645 4646 /* 4647 * Syscall to get a named extended attribute on a file or directory. 4648 */ 4649 int 4650 sys_extattr_set_file(struct extattr_set_file_args *uap) 4651 { 4652 char attrname[EXTATTR_MAXNAMELEN]; 4653 struct nlookupdata nd; 4654 struct vnode *vp; 4655 struct uio auio; 4656 struct iovec aiov; 4657 int error; 4658 4659 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4660 if (error) 4661 return (error); 4662 4663 vp = NULL; 4664 4665 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4666 if (error == 0) 4667 error = nlookup(&nd); 4668 if (error == 0) 4669 error = ncp_writechk(&nd.nl_nch); 4670 if (error == 0) 4671 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4672 if (error) { 4673 nlookup_done(&nd); 4674 return (error); 4675 } 4676 4677 bzero(&auio, sizeof(auio)); 4678 aiov.iov_base = uap->data; 4679 aiov.iov_len = uap->nbytes; 4680 auio.uio_iov = &aiov; 4681 auio.uio_iovcnt = 1; 4682 auio.uio_offset = 0; 4683 auio.uio_resid = uap->nbytes; 4684 auio.uio_rw = UIO_WRITE; 4685 auio.uio_td = curthread; 4686 4687 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4688 &auio, nd.nl_cred); 4689 4690 vput(vp); 4691 nlookup_done(&nd); 4692 return (error); 4693 } 4694 4695 /* 4696 * Syscall to get a named extended attribute on a file or directory. 4697 */ 4698 int 4699 sys_extattr_get_file(struct extattr_get_file_args *uap) 4700 { 4701 char attrname[EXTATTR_MAXNAMELEN]; 4702 struct nlookupdata nd; 4703 struct uio auio; 4704 struct iovec aiov; 4705 struct vnode *vp; 4706 int error; 4707 4708 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4709 if (error) 4710 return (error); 4711 4712 vp = NULL; 4713 4714 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4715 if (error == 0) 4716 error = nlookup(&nd); 4717 if (error == 0) 4718 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4719 if (error) { 4720 nlookup_done(&nd); 4721 return (error); 4722 } 4723 4724 bzero(&auio, sizeof(auio)); 4725 aiov.iov_base = uap->data; 4726 aiov.iov_len = uap->nbytes; 4727 auio.uio_iov = &aiov; 4728 auio.uio_iovcnt = 1; 4729 auio.uio_offset = 0; 4730 auio.uio_resid = uap->nbytes; 4731 auio.uio_rw = UIO_READ; 4732 auio.uio_td = curthread; 4733 4734 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4735 &auio, nd.nl_cred); 4736 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4737 4738 vput(vp); 4739 nlookup_done(&nd); 4740 return(error); 4741 } 4742 4743 /* 4744 * Syscall to delete a named extended attribute from a file or directory. 4745 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4746 */ 4747 int 4748 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4749 { 4750 char attrname[EXTATTR_MAXNAMELEN]; 4751 struct nlookupdata nd; 4752 struct vnode *vp; 4753 int error; 4754 4755 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4756 if (error) 4757 return(error); 4758 4759 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4760 if (error == 0) 4761 error = nlookup(&nd); 4762 if (error == 0) 4763 error = ncp_writechk(&nd.nl_nch); 4764 if (error == 0) { 4765 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4766 if (error == 0) { 4767 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4768 attrname, NULL, nd.nl_cred); 4769 vput(vp); 4770 } 4771 } 4772 nlookup_done(&nd); 4773 return(error); 4774 } 4775 4776 /* 4777 * Determine if the mount is visible to the process. 4778 */ 4779 static int 4780 chroot_visible_mnt(struct mount *mp, struct proc *p) 4781 { 4782 struct nchandle nch; 4783 4784 /* 4785 * Traverse from the mount point upwards. If we hit the process 4786 * root then the mount point is visible to the process. 4787 */ 4788 nch = mp->mnt_ncmountpt; 4789 while (nch.ncp) { 4790 if (nch.mount == p->p_fd->fd_nrdir.mount && 4791 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4792 return(1); 4793 } 4794 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4795 nch = nch.mount->mnt_ncmounton; 4796 } else { 4797 nch.ncp = nch.ncp->nc_parent; 4798 } 4799 } 4800 4801 /* 4802 * If the mount point is not visible to the process, but the 4803 * process root is in a subdirectory of the mount, return 4804 * TRUE anyway. 4805 */ 4806 if (p->p_fd->fd_nrdir.mount == mp) 4807 return(1); 4808 4809 return(0); 4810 } 4811 4812