1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 #include <sys/mplock2.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 76 #include <machine/limits.h> 77 #include <machine/stdarg.h> 78 79 #include <vfs/union/union.h> 80 81 static void mount_warning(struct mount *mp, const char *ctl, ...) 82 __printflike(2, 3); 83 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 84 static int checkvp_chdir (struct vnode *vn, struct thread *td); 85 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 86 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 87 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 88 static int getutimes (const struct timeval *, struct timespec *); 89 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 90 static int setfmode (struct vnode *, int); 91 static int setfflags (struct vnode *, int); 92 static int setutimes (struct vnode *, struct vattr *, 93 const struct timespec *, int); 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 96 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 97 98 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 99 "Allow non-root users to mount filesystems"); 100 101 /* 102 * Virtual File System System Calls 103 */ 104 105 /* 106 * Mount a file system. 107 * 108 * mount_args(char *type, char *path, int flags, caddr_t data) 109 * 110 * MPALMOSTSAFE 111 */ 112 int 113 sys_mount(struct mount_args *uap) 114 { 115 struct thread *td = curthread; 116 struct vnode *vp; 117 struct nchandle nch; 118 struct mount *mp, *nullmp; 119 struct vfsconf *vfsp; 120 int error, flag = 0, flag2 = 0; 121 int hasmount; 122 struct vattr va; 123 struct nlookupdata nd; 124 char fstypename[MFSNAMELEN]; 125 struct ucred *cred; 126 127 cred = td->td_ucred; 128 if (jailed(cred)) { 129 error = EPERM; 130 goto done; 131 } 132 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 133 goto done; 134 135 /* 136 * Do not allow NFS export by non-root users. 137 */ 138 if (uap->flags & MNT_EXPORTED) { 139 error = priv_check(td, PRIV_ROOT); 140 if (error) 141 goto done; 142 } 143 /* 144 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 145 */ 146 if (priv_check(td, PRIV_ROOT)) 147 uap->flags |= MNT_NOSUID | MNT_NODEV; 148 149 /* 150 * Lookup the requested path and extract the nch and vnode. 151 */ 152 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 153 if (error == 0) { 154 if ((error = nlookup(&nd)) == 0) { 155 if (nd.nl_nch.ncp->nc_vp == NULL) 156 error = ENOENT; 157 } 158 } 159 if (error) { 160 nlookup_done(&nd); 161 goto done; 162 } 163 164 /* 165 * If the target filesystem is resolved via a nullfs mount, then 166 * nd.nl_nch.mount will be pointing to the nullfs mount structure 167 * instead of the target file system. We need it in case we are 168 * doing an update. 169 */ 170 nullmp = nd.nl_nch.mount; 171 172 /* 173 * Extract the locked+refd ncp and cleanup the nd structure 174 */ 175 nch = nd.nl_nch; 176 cache_zero(&nd.nl_nch); 177 nlookup_done(&nd); 178 179 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 180 (mp = cache_findmount(&nch)) != NULL) { 181 cache_dropmount(mp); 182 hasmount = 1; 183 } else { 184 hasmount = 0; 185 } 186 187 188 /* 189 * now we have the locked ref'd nch and unreferenced vnode. 190 */ 191 vp = nch.ncp->nc_vp; 192 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 193 cache_put(&nch); 194 goto done; 195 } 196 cache_unlock(&nch); 197 198 /* 199 * Extract the file system type. We need to know this early, to take 200 * appropriate actions if we are dealing with a nullfs. 201 */ 202 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 203 cache_drop(&nch); 204 vput(vp); 205 goto done; 206 } 207 208 /* 209 * Now we have an unlocked ref'd nch and a locked ref'd vp 210 */ 211 if (uap->flags & MNT_UPDATE) { 212 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 213 cache_drop(&nch); 214 vput(vp); 215 error = EINVAL; 216 goto done; 217 } 218 219 if (strncmp(fstypename, "null", 5) == 0) { 220 KKASSERT(nullmp); 221 mp = nullmp; 222 } else { 223 mp = vp->v_mount; 224 } 225 226 flag = mp->mnt_flag; 227 flag2 = mp->mnt_kern_flag; 228 /* 229 * We only allow the filesystem to be reloaded if it 230 * is currently mounted read-only. 231 */ 232 if ((uap->flags & MNT_RELOAD) && 233 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 234 cache_drop(&nch); 235 vput(vp); 236 error = EOPNOTSUPP; /* Needs translation */ 237 goto done; 238 } 239 /* 240 * Only root, or the user that did the original mount is 241 * permitted to update it. 242 */ 243 if (mp->mnt_stat.f_owner != cred->cr_uid && 244 (error = priv_check(td, PRIV_ROOT))) { 245 cache_drop(&nch); 246 vput(vp); 247 goto done; 248 } 249 if (vfs_busy(mp, LK_NOWAIT)) { 250 cache_drop(&nch); 251 vput(vp); 252 error = EBUSY; 253 goto done; 254 } 255 if (hasmount) { 256 cache_drop(&nch); 257 vfs_unbusy(mp); 258 vput(vp); 259 error = EBUSY; 260 goto done; 261 } 262 mp->mnt_flag |= 263 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 264 lwkt_gettoken(&mp->mnt_token); 265 vn_unlock(vp); 266 goto update; 267 } 268 269 /* 270 * If the user is not root, ensure that they own the directory 271 * onto which we are attempting to mount. 272 */ 273 if ((error = VOP_GETATTR(vp, &va)) || 274 (va.va_uid != cred->cr_uid && 275 (error = priv_check(td, PRIV_ROOT)))) { 276 cache_drop(&nch); 277 vput(vp); 278 goto done; 279 } 280 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 281 cache_drop(&nch); 282 vput(vp); 283 goto done; 284 } 285 if (vp->v_type != VDIR) { 286 cache_drop(&nch); 287 vput(vp); 288 error = ENOTDIR; 289 goto done; 290 } 291 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 292 cache_drop(&nch); 293 vput(vp); 294 error = EPERM; 295 goto done; 296 } 297 vfsp = vfsconf_find_by_name(fstypename); 298 if (vfsp == NULL) { 299 linker_file_t lf; 300 301 /* Only load modules for root (very important!) */ 302 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 303 cache_drop(&nch); 304 vput(vp); 305 goto done; 306 } 307 error = linker_load_file(fstypename, &lf); 308 if (error || lf == NULL) { 309 cache_drop(&nch); 310 vput(vp); 311 if (lf == NULL) 312 error = ENODEV; 313 goto done; 314 } 315 lf->userrefs++; 316 /* lookup again, see if the VFS was loaded */ 317 vfsp = vfsconf_find_by_name(fstypename); 318 if (vfsp == NULL) { 319 lf->userrefs--; 320 linker_file_unload(lf); 321 cache_drop(&nch); 322 vput(vp); 323 error = ENODEV; 324 goto done; 325 } 326 } 327 if (hasmount) { 328 cache_drop(&nch); 329 vput(vp); 330 error = EBUSY; 331 goto done; 332 } 333 334 /* 335 * Allocate and initialize the filesystem. 336 */ 337 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 338 mount_init(mp); 339 vfs_busy(mp, LK_NOWAIT); 340 mp->mnt_op = vfsp->vfc_vfsops; 341 mp->mnt_vfc = vfsp; 342 vfsp->vfc_refcount++; 343 mp->mnt_stat.f_type = vfsp->vfc_typenum; 344 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 345 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 346 mp->mnt_stat.f_owner = cred->cr_uid; 347 lwkt_gettoken(&mp->mnt_token); 348 vn_unlock(vp); 349 update: 350 /* 351 * (per-mount token acquired at this point) 352 * 353 * Set the mount level flags. 354 */ 355 if (uap->flags & MNT_RDONLY) 356 mp->mnt_flag |= MNT_RDONLY; 357 else if (mp->mnt_flag & MNT_RDONLY) 358 mp->mnt_kern_flag |= MNTK_WANTRDWR; 359 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 360 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 361 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 362 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 363 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 364 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 365 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 366 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 367 /* 368 * Mount the filesystem. 369 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 370 * get. 371 */ 372 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 373 if (mp->mnt_flag & MNT_UPDATE) { 374 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 375 mp->mnt_flag &= ~MNT_RDONLY; 376 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 377 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 378 if (error) { 379 mp->mnt_flag = flag; 380 mp->mnt_kern_flag = flag2; 381 } 382 lwkt_reltoken(&mp->mnt_token); 383 vfs_unbusy(mp); 384 vrele(vp); 385 cache_drop(&nch); 386 goto done; 387 } 388 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 389 390 /* 391 * Put the new filesystem on the mount list after root. The mount 392 * point gets its own mnt_ncmountpt (unless the VFS already set one 393 * up) which represents the root of the mount. The lookup code 394 * detects the mount point going forward and checks the root of 395 * the mount going backwards. 396 * 397 * It is not necessary to invalidate or purge the vnode underneath 398 * because elements under the mount will be given their own glue 399 * namecache record. 400 */ 401 if (!error) { 402 if (mp->mnt_ncmountpt.ncp == NULL) { 403 /* 404 * allocate, then unlock, but leave the ref intact 405 */ 406 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 407 cache_unlock(&mp->mnt_ncmountpt); 408 } 409 mp->mnt_ncmounton = nch; /* inherits ref */ 410 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 411 cache_ismounting(mp); 412 413 mountlist_insert(mp, MNTINS_LAST); 414 vn_unlock(vp); 415 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 416 error = vfs_allocate_syncvnode(mp); 417 lwkt_reltoken(&mp->mnt_token); 418 vfs_unbusy(mp); 419 error = VFS_START(mp, 0); 420 vrele(vp); 421 } else { 422 vn_syncer_thr_stop(mp); 423 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 424 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 425 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 426 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 427 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 428 mp->mnt_vfc->vfc_refcount--; 429 lwkt_reltoken(&mp->mnt_token); 430 vfs_unbusy(mp); 431 kfree(mp, M_MOUNT); 432 cache_drop(&nch); 433 vput(vp); 434 } 435 done: 436 return (error); 437 } 438 439 /* 440 * Scan all active processes to see if any of them have a current 441 * or root directory onto which the new filesystem has just been 442 * mounted. If so, replace them with the new mount point. 443 * 444 * Both old_nch and new_nch are ref'd on call but not locked. 445 * new_nch must be temporarily locked so it can be associated with the 446 * vnode representing the root of the mount point. 447 */ 448 struct checkdirs_info { 449 struct nchandle old_nch; 450 struct nchandle new_nch; 451 struct vnode *old_vp; 452 struct vnode *new_vp; 453 }; 454 455 static int checkdirs_callback(struct proc *p, void *data); 456 457 static void 458 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 459 { 460 struct checkdirs_info info; 461 struct vnode *olddp; 462 struct vnode *newdp; 463 struct mount *mp; 464 465 /* 466 * If the old mount point's vnode has a usecount of 1, it is not 467 * being held as a descriptor anywhere. 468 */ 469 olddp = old_nch->ncp->nc_vp; 470 if (olddp == NULL || VREFCNT(olddp) == 1) 471 return; 472 473 /* 474 * Force the root vnode of the new mount point to be resolved 475 * so we can update any matching processes. 476 */ 477 mp = new_nch->mount; 478 if (VFS_ROOT(mp, &newdp)) 479 panic("mount: lost mount"); 480 vn_unlock(newdp); 481 cache_lock(new_nch); 482 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 483 cache_setunresolved(new_nch); 484 cache_setvp(new_nch, newdp); 485 cache_unlock(new_nch); 486 487 /* 488 * Special handling of the root node 489 */ 490 if (rootvnode == olddp) { 491 vref(newdp); 492 vfs_cache_setroot(newdp, cache_hold(new_nch)); 493 } 494 495 /* 496 * Pass newdp separately so the callback does not have to access 497 * it via new_nch->ncp->nc_vp. 498 */ 499 info.old_nch = *old_nch; 500 info.new_nch = *new_nch; 501 info.new_vp = newdp; 502 allproc_scan(checkdirs_callback, &info); 503 vput(newdp); 504 } 505 506 /* 507 * NOTE: callback is not MP safe because the scanned process's filedesc 508 * structure can be ripped out from under us, amoung other things. 509 */ 510 static int 511 checkdirs_callback(struct proc *p, void *data) 512 { 513 struct checkdirs_info *info = data; 514 struct filedesc *fdp; 515 struct nchandle ncdrop1; 516 struct nchandle ncdrop2; 517 struct vnode *vprele1; 518 struct vnode *vprele2; 519 520 if ((fdp = p->p_fd) != NULL) { 521 cache_zero(&ncdrop1); 522 cache_zero(&ncdrop2); 523 vprele1 = NULL; 524 vprele2 = NULL; 525 526 /* 527 * MPUNSAFE - XXX fdp can be pulled out from under a 528 * foreign process. 529 * 530 * A shared filedesc is ok, we don't have to copy it 531 * because we are making this change globally. 532 */ 533 spin_lock(&fdp->fd_spin); 534 if (fdp->fd_ncdir.mount == info->old_nch.mount && 535 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 536 vprele1 = fdp->fd_cdir; 537 vref(info->new_vp); 538 fdp->fd_cdir = info->new_vp; 539 ncdrop1 = fdp->fd_ncdir; 540 cache_copy(&info->new_nch, &fdp->fd_ncdir); 541 } 542 if (fdp->fd_nrdir.mount == info->old_nch.mount && 543 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 544 vprele2 = fdp->fd_rdir; 545 vref(info->new_vp); 546 fdp->fd_rdir = info->new_vp; 547 ncdrop2 = fdp->fd_nrdir; 548 cache_copy(&info->new_nch, &fdp->fd_nrdir); 549 } 550 spin_unlock(&fdp->fd_spin); 551 if (ncdrop1.ncp) 552 cache_drop(&ncdrop1); 553 if (ncdrop2.ncp) 554 cache_drop(&ncdrop2); 555 if (vprele1) 556 vrele(vprele1); 557 if (vprele2) 558 vrele(vprele2); 559 } 560 return(0); 561 } 562 563 /* 564 * Unmount a file system. 565 * 566 * Note: unmount takes a path to the vnode mounted on as argument, 567 * not special file (as before). 568 * 569 * umount_args(char *path, int flags) 570 * 571 * MPALMOSTSAFE 572 */ 573 int 574 sys_unmount(struct unmount_args *uap) 575 { 576 struct thread *td = curthread; 577 struct proc *p __debugvar = td->td_proc; 578 struct mount *mp = NULL; 579 struct nlookupdata nd; 580 int error; 581 582 KKASSERT(p); 583 get_mplock(); 584 if (td->td_ucred->cr_prison != NULL) { 585 error = EPERM; 586 goto done; 587 } 588 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 589 goto done; 590 591 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 592 if (error == 0) 593 error = nlookup(&nd); 594 if (error) 595 goto out; 596 597 mp = nd.nl_nch.mount; 598 599 /* 600 * Only root, or the user that did the original mount is 601 * permitted to unmount this filesystem. 602 */ 603 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 604 (error = priv_check(td, PRIV_ROOT))) 605 goto out; 606 607 /* 608 * Don't allow unmounting the root file system. 609 */ 610 if (mp->mnt_flag & MNT_ROOTFS) { 611 error = EINVAL; 612 goto out; 613 } 614 615 /* 616 * Must be the root of the filesystem 617 */ 618 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 619 error = EINVAL; 620 goto out; 621 } 622 623 out: 624 nlookup_done(&nd); 625 if (error == 0) 626 error = dounmount(mp, uap->flags); 627 done: 628 rel_mplock(); 629 return (error); 630 } 631 632 /* 633 * Do the actual file system unmount. 634 */ 635 static int 636 dounmount_interlock(struct mount *mp) 637 { 638 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 639 return (EBUSY); 640 mp->mnt_kern_flag |= MNTK_UNMOUNT; 641 return(0); 642 } 643 644 static int 645 unmount_allproc_cb(struct proc *p, void *arg) 646 { 647 struct mount *mp; 648 649 if (p->p_textnch.ncp == NULL) 650 return 0; 651 652 mp = (struct mount *)arg; 653 if (p->p_textnch.mount == mp) 654 cache_drop(&p->p_textnch); 655 656 return 0; 657 } 658 659 int 660 dounmount(struct mount *mp, int flags) 661 { 662 struct namecache *ncp; 663 struct nchandle nch; 664 struct vnode *vp; 665 int error; 666 int async_flag; 667 int lflags; 668 int freeok = 1; 669 int retry; 670 671 lwkt_gettoken(&mp->mnt_token); 672 /* 673 * Exclusive access for unmounting purposes 674 */ 675 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 676 goto out; 677 678 /* 679 * Allow filesystems to detect that a forced unmount is in progress. 680 */ 681 if (flags & MNT_FORCE) 682 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 683 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 684 error = lockmgr(&mp->mnt_lock, lflags); 685 if (error) { 686 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 687 if (mp->mnt_kern_flag & MNTK_MWAIT) { 688 mp->mnt_kern_flag &= ~MNTK_MWAIT; 689 wakeup(mp); 690 } 691 goto out; 692 } 693 694 if (mp->mnt_flag & MNT_EXPUBLIC) 695 vfs_setpublicfs(NULL, NULL, NULL); 696 697 vfs_msync(mp, MNT_WAIT); 698 async_flag = mp->mnt_flag & MNT_ASYNC; 699 mp->mnt_flag &=~ MNT_ASYNC; 700 701 /* 702 * If this filesystem isn't aliasing other filesystems, 703 * try to invalidate any remaining namecache entries and 704 * check the count afterwords. 705 */ 706 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 707 cache_lock(&mp->mnt_ncmountpt); 708 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 709 cache_unlock(&mp->mnt_ncmountpt); 710 711 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 712 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 713 allproc_scan(&unmount_allproc_cb, mp); 714 } 715 716 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 717 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 718 719 if ((flags & MNT_FORCE) == 0) { 720 error = EBUSY; 721 mount_warning(mp, "Cannot unmount: " 722 "%d namecache " 723 "references still " 724 "present", 725 ncp->nc_refs - 1); 726 } else { 727 mount_warning(mp, "Forced unmount: " 728 "%d namecache " 729 "references still " 730 "present", 731 ncp->nc_refs - 1); 732 freeok = 0; 733 } 734 } 735 } 736 737 /* 738 * Decomission our special mnt_syncer vnode. This also stops 739 * the vnlru code. If we are unable to unmount we recommission 740 * the vnode. 741 * 742 * Then sync the filesystem. 743 */ 744 if ((vp = mp->mnt_syncer) != NULL) { 745 mp->mnt_syncer = NULL; 746 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 747 vrele(vp); 748 } 749 if ((mp->mnt_flag & MNT_RDONLY) == 0) 750 VFS_SYNC(mp, MNT_WAIT); 751 752 /* 753 * nchandle records ref the mount structure. Expect a count of 1 754 * (our mount->mnt_ncmountpt). 755 * 756 * Scans can get temporary refs on a mountpoint (thought really 757 * heavy duty stuff like cache_findmount() do not). 758 */ 759 for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) { 760 cache_unmounting(mp); 761 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 762 } 763 if (mp->mnt_refs != 1) { 764 if ((flags & MNT_FORCE) == 0) { 765 mount_warning(mp, "Cannot unmount: " 766 "%d mount refs still present", 767 mp->mnt_refs); 768 error = EBUSY; 769 } else { 770 mount_warning(mp, "Forced unmount: " 771 "%d mount refs still present", 772 mp->mnt_refs); 773 freeok = 0; 774 } 775 } 776 777 /* 778 * So far so good, sync the filesystem once more and 779 * call the VFS unmount code if the sync succeeds. 780 */ 781 if (error == 0) { 782 if (((mp->mnt_flag & MNT_RDONLY) || 783 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 784 (flags & MNT_FORCE)) { 785 error = VFS_UNMOUNT(mp, flags); 786 } 787 } 788 789 /* 790 * If an error occurred we can still recover, restoring the 791 * syncer vnode and misc flags. 792 */ 793 if (error) { 794 if (mp->mnt_syncer == NULL) 795 vfs_allocate_syncvnode(mp); 796 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 797 mp->mnt_flag |= async_flag; 798 lockmgr(&mp->mnt_lock, LK_RELEASE); 799 if (mp->mnt_kern_flag & MNTK_MWAIT) { 800 mp->mnt_kern_flag &= ~MNTK_MWAIT; 801 wakeup(mp); 802 } 803 goto out; 804 } 805 /* 806 * Clean up any journals still associated with the mount after 807 * filesystem activity has ceased. 808 */ 809 journal_remove_all_journals(mp, 810 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 811 812 mountlist_remove(mp); 813 814 /* 815 * Remove any installed vnode ops here so the individual VFSs don't 816 * have to. 817 */ 818 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 819 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 820 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 821 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 822 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 823 824 if (mp->mnt_ncmountpt.ncp != NULL) { 825 nch = mp->mnt_ncmountpt; 826 cache_zero(&mp->mnt_ncmountpt); 827 cache_clrmountpt(&nch); 828 cache_drop(&nch); 829 } 830 if (mp->mnt_ncmounton.ncp != NULL) { 831 cache_unmounting(mp); 832 nch = mp->mnt_ncmounton; 833 cache_zero(&mp->mnt_ncmounton); 834 cache_clrmountpt(&nch); 835 cache_drop(&nch); 836 } 837 838 mp->mnt_vfc->vfc_refcount--; 839 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 840 panic("unmount: dangling vnode"); 841 lockmgr(&mp->mnt_lock, LK_RELEASE); 842 if (mp->mnt_kern_flag & MNTK_MWAIT) { 843 mp->mnt_kern_flag &= ~MNTK_MWAIT; 844 wakeup(mp); 845 } 846 847 /* 848 * If we reach here and freeok != 0 we must free the mount. 849 * If refs > 1 cycle and wait, just in case someone tried 850 * to busy the mount after we decided to do the unmount. 851 */ 852 if (freeok) { 853 while (mp->mnt_refs > 1) { 854 cache_unmounting(mp); 855 wakeup(mp); 856 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 857 } 858 lwkt_reltoken(&mp->mnt_token); 859 kfree(mp, M_MOUNT); 860 mp = NULL; 861 } 862 error = 0; 863 out: 864 if (mp) 865 lwkt_reltoken(&mp->mnt_token); 866 return (error); 867 } 868 869 static 870 void 871 mount_warning(struct mount *mp, const char *ctl, ...) 872 { 873 char *ptr; 874 char *buf; 875 __va_list va; 876 877 __va_start(va, ctl); 878 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 879 &ptr, &buf, 0) == 0) { 880 kprintf("unmount(%s): ", ptr); 881 kvprintf(ctl, va); 882 kprintf("\n"); 883 kfree(buf, M_TEMP); 884 } else { 885 kprintf("unmount(%p", mp); 886 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 887 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 888 kprintf("): "); 889 kvprintf(ctl, va); 890 kprintf("\n"); 891 } 892 __va_end(va); 893 } 894 895 /* 896 * Shim cache_fullpath() to handle the case where a process is chrooted into 897 * a subdirectory of a mount. In this case if the root mount matches the 898 * process root directory's mount we have to specify the process's root 899 * directory instead of the mount point, because the mount point might 900 * be above the root directory. 901 */ 902 static 903 int 904 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 905 { 906 struct nchandle *nch; 907 908 if (p && p->p_fd->fd_nrdir.mount == mp) 909 nch = &p->p_fd->fd_nrdir; 910 else 911 nch = &mp->mnt_ncmountpt; 912 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 913 } 914 915 /* 916 * Sync each mounted filesystem. 917 */ 918 919 #ifdef DEBUG 920 static int syncprt = 0; 921 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 922 #endif /* DEBUG */ 923 924 static int sync_callback(struct mount *mp, void *data); 925 926 int 927 sys_sync(struct sync_args *uap) 928 { 929 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 930 #ifdef DEBUG 931 /* 932 * print out buffer pool stat information on each sync() call. 933 */ 934 if (syncprt) 935 vfs_bufstats(); 936 #endif /* DEBUG */ 937 return (0); 938 } 939 940 static 941 int 942 sync_callback(struct mount *mp, void *data __unused) 943 { 944 int asyncflag; 945 946 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 947 asyncflag = mp->mnt_flag & MNT_ASYNC; 948 mp->mnt_flag &= ~MNT_ASYNC; 949 vfs_msync(mp, MNT_NOWAIT); 950 VFS_SYNC(mp, MNT_NOWAIT); 951 mp->mnt_flag |= asyncflag; 952 } 953 return(0); 954 } 955 956 /* XXX PRISON: could be per prison flag */ 957 static int prison_quotas; 958 #if 0 959 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 960 #endif 961 962 /* 963 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 964 * 965 * Change filesystem quotas. 966 * 967 * MPALMOSTSAFE 968 */ 969 int 970 sys_quotactl(struct quotactl_args *uap) 971 { 972 struct nlookupdata nd; 973 struct thread *td; 974 struct mount *mp; 975 int error; 976 977 get_mplock(); 978 td = curthread; 979 if (td->td_ucred->cr_prison && !prison_quotas) { 980 error = EPERM; 981 goto done; 982 } 983 984 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 985 if (error == 0) 986 error = nlookup(&nd); 987 if (error == 0) { 988 mp = nd.nl_nch.mount; 989 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 990 uap->arg, nd.nl_cred); 991 } 992 nlookup_done(&nd); 993 done: 994 rel_mplock(); 995 return (error); 996 } 997 998 /* 999 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1000 * void *buf, int buflen) 1001 * 1002 * This function operates on a mount point and executes the specified 1003 * operation using the specified control data, and possibly returns data. 1004 * 1005 * The actual number of bytes stored in the result buffer is returned, 0 1006 * if none, otherwise an error is returned. 1007 * 1008 * MPALMOSTSAFE 1009 */ 1010 int 1011 sys_mountctl(struct mountctl_args *uap) 1012 { 1013 struct thread *td = curthread; 1014 struct proc *p = td->td_proc; 1015 struct file *fp; 1016 void *ctl = NULL; 1017 void *buf = NULL; 1018 char *path = NULL; 1019 int error; 1020 1021 /* 1022 * Sanity and permissions checks. We must be root. 1023 */ 1024 KKASSERT(p); 1025 if (td->td_ucred->cr_prison != NULL) 1026 return (EPERM); 1027 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1028 (error = priv_check(td, PRIV_ROOT)) != 0) 1029 return (error); 1030 1031 /* 1032 * Argument length checks 1033 */ 1034 if (uap->ctllen < 0 || uap->ctllen > 1024) 1035 return (EINVAL); 1036 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1037 return (EINVAL); 1038 if (uap->path == NULL) 1039 return (EINVAL); 1040 1041 /* 1042 * Allocate the necessary buffers and copyin data 1043 */ 1044 path = objcache_get(namei_oc, M_WAITOK); 1045 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1046 if (error) 1047 goto done; 1048 1049 if (uap->ctllen) { 1050 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1051 error = copyin(uap->ctl, ctl, uap->ctllen); 1052 if (error) 1053 goto done; 1054 } 1055 if (uap->buflen) 1056 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1057 1058 /* 1059 * Validate the descriptor 1060 */ 1061 if (uap->fd >= 0) { 1062 fp = holdfp(p->p_fd, uap->fd, -1); 1063 if (fp == NULL) { 1064 error = EBADF; 1065 goto done; 1066 } 1067 } else { 1068 fp = NULL; 1069 } 1070 1071 /* 1072 * Execute the internal kernel function and clean up. 1073 */ 1074 get_mplock(); 1075 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1076 rel_mplock(); 1077 if (fp) 1078 fdrop(fp); 1079 if (error == 0 && uap->sysmsg_result > 0) 1080 error = copyout(buf, uap->buf, uap->sysmsg_result); 1081 done: 1082 if (path) 1083 objcache_put(namei_oc, path); 1084 if (ctl) 1085 kfree(ctl, M_TEMP); 1086 if (buf) 1087 kfree(buf, M_TEMP); 1088 return (error); 1089 } 1090 1091 /* 1092 * Execute a mount control operation by resolving the path to a mount point 1093 * and calling vop_mountctl(). 1094 * 1095 * Use the mount point from the nch instead of the vnode so nullfs mounts 1096 * can properly spike the VOP. 1097 */ 1098 int 1099 kern_mountctl(const char *path, int op, struct file *fp, 1100 const void *ctl, int ctllen, 1101 void *buf, int buflen, int *res) 1102 { 1103 struct vnode *vp; 1104 struct mount *mp; 1105 struct nlookupdata nd; 1106 int error; 1107 1108 *res = 0; 1109 vp = NULL; 1110 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1111 if (error == 0) 1112 error = nlookup(&nd); 1113 if (error == 0) 1114 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1115 mp = nd.nl_nch.mount; 1116 nlookup_done(&nd); 1117 if (error) 1118 return (error); 1119 vn_unlock(vp); 1120 1121 /* 1122 * Must be the root of the filesystem 1123 */ 1124 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1125 vrele(vp); 1126 return (EINVAL); 1127 } 1128 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1129 buf, buflen, res); 1130 vrele(vp); 1131 return (error); 1132 } 1133 1134 int 1135 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1136 { 1137 struct thread *td = curthread; 1138 struct proc *p = td->td_proc; 1139 struct mount *mp; 1140 struct statfs *sp; 1141 char *fullpath, *freepath; 1142 int error; 1143 1144 if ((error = nlookup(nd)) != 0) 1145 return (error); 1146 mp = nd->nl_nch.mount; 1147 sp = &mp->mnt_stat; 1148 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1149 return (error); 1150 1151 error = mount_path(p, mp, &fullpath, &freepath); 1152 if (error) 1153 return(error); 1154 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1155 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1156 kfree(freepath, M_TEMP); 1157 1158 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1159 bcopy(sp, buf, sizeof(*buf)); 1160 /* Only root should have access to the fsid's. */ 1161 if (priv_check(td, PRIV_ROOT)) 1162 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1163 return (0); 1164 } 1165 1166 /* 1167 * statfs_args(char *path, struct statfs *buf) 1168 * 1169 * Get filesystem statistics. 1170 */ 1171 int 1172 sys_statfs(struct statfs_args *uap) 1173 { 1174 struct nlookupdata nd; 1175 struct statfs buf; 1176 int error; 1177 1178 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1179 if (error == 0) 1180 error = kern_statfs(&nd, &buf); 1181 nlookup_done(&nd); 1182 if (error == 0) 1183 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1184 return (error); 1185 } 1186 1187 int 1188 kern_fstatfs(int fd, struct statfs *buf) 1189 { 1190 struct thread *td = curthread; 1191 struct proc *p = td->td_proc; 1192 struct file *fp; 1193 struct mount *mp; 1194 struct statfs *sp; 1195 char *fullpath, *freepath; 1196 int error; 1197 1198 KKASSERT(p); 1199 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1200 return (error); 1201 1202 /* 1203 * Try to use mount info from any overlays rather than the 1204 * mount info for the underlying vnode, otherwise we will 1205 * fail when operating on null-mounted paths inside a chroot. 1206 */ 1207 if ((mp = fp->f_nchandle.mount) == NULL) 1208 mp = ((struct vnode *)fp->f_data)->v_mount; 1209 if (mp == NULL) { 1210 error = EBADF; 1211 goto done; 1212 } 1213 if (fp->f_cred == NULL) { 1214 error = EINVAL; 1215 goto done; 1216 } 1217 sp = &mp->mnt_stat; 1218 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1219 goto done; 1220 1221 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1222 goto done; 1223 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1224 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1225 kfree(freepath, M_TEMP); 1226 1227 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1228 bcopy(sp, buf, sizeof(*buf)); 1229 1230 /* Only root should have access to the fsid's. */ 1231 if (priv_check(td, PRIV_ROOT)) 1232 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1233 error = 0; 1234 done: 1235 fdrop(fp); 1236 return (error); 1237 } 1238 1239 /* 1240 * fstatfs_args(int fd, struct statfs *buf) 1241 * 1242 * Get filesystem statistics. 1243 */ 1244 int 1245 sys_fstatfs(struct fstatfs_args *uap) 1246 { 1247 struct statfs buf; 1248 int error; 1249 1250 error = kern_fstatfs(uap->fd, &buf); 1251 1252 if (error == 0) 1253 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1254 return (error); 1255 } 1256 1257 int 1258 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1259 { 1260 struct mount *mp; 1261 struct statvfs *sp; 1262 int error; 1263 1264 if ((error = nlookup(nd)) != 0) 1265 return (error); 1266 mp = nd->nl_nch.mount; 1267 sp = &mp->mnt_vstat; 1268 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1269 return (error); 1270 1271 sp->f_flag = 0; 1272 if (mp->mnt_flag & MNT_RDONLY) 1273 sp->f_flag |= ST_RDONLY; 1274 if (mp->mnt_flag & MNT_NOSUID) 1275 sp->f_flag |= ST_NOSUID; 1276 bcopy(sp, buf, sizeof(*buf)); 1277 return (0); 1278 } 1279 1280 /* 1281 * statfs_args(char *path, struct statfs *buf) 1282 * 1283 * Get filesystem statistics. 1284 */ 1285 int 1286 sys_statvfs(struct statvfs_args *uap) 1287 { 1288 struct nlookupdata nd; 1289 struct statvfs buf; 1290 int error; 1291 1292 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1293 if (error == 0) 1294 error = kern_statvfs(&nd, &buf); 1295 nlookup_done(&nd); 1296 if (error == 0) 1297 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1298 return (error); 1299 } 1300 1301 int 1302 kern_fstatvfs(int fd, struct statvfs *buf) 1303 { 1304 struct thread *td = curthread; 1305 struct proc *p = td->td_proc; 1306 struct file *fp; 1307 struct mount *mp; 1308 struct statvfs *sp; 1309 int error; 1310 1311 KKASSERT(p); 1312 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1313 return (error); 1314 if ((mp = fp->f_nchandle.mount) == NULL) 1315 mp = ((struct vnode *)fp->f_data)->v_mount; 1316 if (mp == NULL) { 1317 error = EBADF; 1318 goto done; 1319 } 1320 if (fp->f_cred == NULL) { 1321 error = EINVAL; 1322 goto done; 1323 } 1324 sp = &mp->mnt_vstat; 1325 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1326 goto done; 1327 1328 sp->f_flag = 0; 1329 if (mp->mnt_flag & MNT_RDONLY) 1330 sp->f_flag |= ST_RDONLY; 1331 if (mp->mnt_flag & MNT_NOSUID) 1332 sp->f_flag |= ST_NOSUID; 1333 1334 bcopy(sp, buf, sizeof(*buf)); 1335 error = 0; 1336 done: 1337 fdrop(fp); 1338 return (error); 1339 } 1340 1341 /* 1342 * fstatfs_args(int fd, struct statfs *buf) 1343 * 1344 * Get filesystem statistics. 1345 */ 1346 int 1347 sys_fstatvfs(struct fstatvfs_args *uap) 1348 { 1349 struct statvfs buf; 1350 int error; 1351 1352 error = kern_fstatvfs(uap->fd, &buf); 1353 1354 if (error == 0) 1355 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1356 return (error); 1357 } 1358 1359 /* 1360 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1361 * 1362 * Get statistics on all filesystems. 1363 */ 1364 1365 struct getfsstat_info { 1366 struct statfs *sfsp; 1367 long count; 1368 long maxcount; 1369 int error; 1370 int flags; 1371 struct thread *td; 1372 }; 1373 1374 static int getfsstat_callback(struct mount *, void *); 1375 1376 int 1377 sys_getfsstat(struct getfsstat_args *uap) 1378 { 1379 struct thread *td = curthread; 1380 struct getfsstat_info info; 1381 1382 bzero(&info, sizeof(info)); 1383 1384 info.maxcount = uap->bufsize / sizeof(struct statfs); 1385 info.sfsp = uap->buf; 1386 info.count = 0; 1387 info.flags = uap->flags; 1388 info.td = td; 1389 1390 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1391 if (info.sfsp && info.count > info.maxcount) 1392 uap->sysmsg_result = info.maxcount; 1393 else 1394 uap->sysmsg_result = info.count; 1395 return (info.error); 1396 } 1397 1398 static int 1399 getfsstat_callback(struct mount *mp, void *data) 1400 { 1401 struct getfsstat_info *info = data; 1402 struct statfs *sp; 1403 char *freepath; 1404 char *fullpath; 1405 int error; 1406 1407 if (info->sfsp && info->count < info->maxcount) { 1408 if (info->td->td_proc && 1409 !chroot_visible_mnt(mp, info->td->td_proc)) { 1410 return(0); 1411 } 1412 sp = &mp->mnt_stat; 1413 1414 /* 1415 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1416 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1417 * overrides MNT_WAIT. 1418 */ 1419 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1420 (info->flags & MNT_WAIT)) && 1421 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1422 return(0); 1423 } 1424 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1425 1426 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1427 if (error) { 1428 info->error = error; 1429 return(-1); 1430 } 1431 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1432 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1433 kfree(freepath, M_TEMP); 1434 1435 error = copyout(sp, info->sfsp, sizeof(*sp)); 1436 if (error) { 1437 info->error = error; 1438 return (-1); 1439 } 1440 ++info->sfsp; 1441 } 1442 info->count++; 1443 return(0); 1444 } 1445 1446 /* 1447 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1448 long bufsize, int flags) 1449 * 1450 * Get statistics on all filesystems. 1451 */ 1452 1453 struct getvfsstat_info { 1454 struct statfs *sfsp; 1455 struct statvfs *vsfsp; 1456 long count; 1457 long maxcount; 1458 int error; 1459 int flags; 1460 struct thread *td; 1461 }; 1462 1463 static int getvfsstat_callback(struct mount *, void *); 1464 1465 int 1466 sys_getvfsstat(struct getvfsstat_args *uap) 1467 { 1468 struct thread *td = curthread; 1469 struct getvfsstat_info info; 1470 1471 bzero(&info, sizeof(info)); 1472 1473 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1474 info.sfsp = uap->buf; 1475 info.vsfsp = uap->vbuf; 1476 info.count = 0; 1477 info.flags = uap->flags; 1478 info.td = td; 1479 1480 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1481 if (info.vsfsp && info.count > info.maxcount) 1482 uap->sysmsg_result = info.maxcount; 1483 else 1484 uap->sysmsg_result = info.count; 1485 return (info.error); 1486 } 1487 1488 static int 1489 getvfsstat_callback(struct mount *mp, void *data) 1490 { 1491 struct getvfsstat_info *info = data; 1492 struct statfs *sp; 1493 struct statvfs *vsp; 1494 char *freepath; 1495 char *fullpath; 1496 int error; 1497 1498 if (info->vsfsp && info->count < info->maxcount) { 1499 if (info->td->td_proc && 1500 !chroot_visible_mnt(mp, info->td->td_proc)) { 1501 return(0); 1502 } 1503 sp = &mp->mnt_stat; 1504 vsp = &mp->mnt_vstat; 1505 1506 /* 1507 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1508 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1509 * overrides MNT_WAIT. 1510 */ 1511 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1512 (info->flags & MNT_WAIT)) && 1513 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1514 return(0); 1515 } 1516 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1517 1518 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1519 (info->flags & MNT_WAIT)) && 1520 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1521 return(0); 1522 } 1523 vsp->f_flag = 0; 1524 if (mp->mnt_flag & MNT_RDONLY) 1525 vsp->f_flag |= ST_RDONLY; 1526 if (mp->mnt_flag & MNT_NOSUID) 1527 vsp->f_flag |= ST_NOSUID; 1528 1529 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1530 if (error) { 1531 info->error = error; 1532 return(-1); 1533 } 1534 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1535 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1536 kfree(freepath, M_TEMP); 1537 1538 error = copyout(sp, info->sfsp, sizeof(*sp)); 1539 if (error == 0) 1540 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1541 if (error) { 1542 info->error = error; 1543 return (-1); 1544 } 1545 ++info->sfsp; 1546 ++info->vsfsp; 1547 } 1548 info->count++; 1549 return(0); 1550 } 1551 1552 1553 /* 1554 * fchdir_args(int fd) 1555 * 1556 * Change current working directory to a given file descriptor. 1557 */ 1558 int 1559 sys_fchdir(struct fchdir_args *uap) 1560 { 1561 struct thread *td = curthread; 1562 struct proc *p = td->td_proc; 1563 struct filedesc *fdp = p->p_fd; 1564 struct vnode *vp, *ovp; 1565 struct mount *mp; 1566 struct file *fp; 1567 struct nchandle nch, onch, tnch; 1568 int error; 1569 1570 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1571 return (error); 1572 lwkt_gettoken(&p->p_token); 1573 vp = (struct vnode *)fp->f_data; 1574 vref(vp); 1575 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1576 if (fp->f_nchandle.ncp == NULL) 1577 error = ENOTDIR; 1578 else 1579 error = checkvp_chdir(vp, td); 1580 if (error) { 1581 vput(vp); 1582 goto done; 1583 } 1584 cache_copy(&fp->f_nchandle, &nch); 1585 1586 /* 1587 * If the ncp has become a mount point, traverse through 1588 * the mount point. 1589 */ 1590 1591 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1592 (mp = cache_findmount(&nch)) != NULL 1593 ) { 1594 error = nlookup_mp(mp, &tnch); 1595 if (error == 0) { 1596 cache_unlock(&tnch); /* leave ref intact */ 1597 vput(vp); 1598 vp = tnch.ncp->nc_vp; 1599 error = vget(vp, LK_SHARED); 1600 KKASSERT(error == 0); 1601 cache_drop(&nch); 1602 nch = tnch; 1603 } 1604 cache_dropmount(mp); 1605 } 1606 if (error == 0) { 1607 ovp = fdp->fd_cdir; 1608 onch = fdp->fd_ncdir; 1609 vn_unlock(vp); /* leave ref intact */ 1610 fdp->fd_cdir = vp; 1611 fdp->fd_ncdir = nch; 1612 cache_drop(&onch); 1613 vrele(ovp); 1614 } else { 1615 cache_drop(&nch); 1616 vput(vp); 1617 } 1618 fdrop(fp); 1619 done: 1620 lwkt_reltoken(&p->p_token); 1621 return (error); 1622 } 1623 1624 int 1625 kern_chdir(struct nlookupdata *nd) 1626 { 1627 struct thread *td = curthread; 1628 struct proc *p = td->td_proc; 1629 struct filedesc *fdp = p->p_fd; 1630 struct vnode *vp, *ovp; 1631 struct nchandle onch; 1632 int error; 1633 1634 if ((error = nlookup(nd)) != 0) 1635 return (error); 1636 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1637 return (ENOENT); 1638 if ((error = vget(vp, LK_SHARED)) != 0) 1639 return (error); 1640 1641 lwkt_gettoken(&p->p_token); 1642 error = checkvp_chdir(vp, td); 1643 vn_unlock(vp); 1644 if (error == 0) { 1645 ovp = fdp->fd_cdir; 1646 onch = fdp->fd_ncdir; 1647 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1648 fdp->fd_ncdir = nd->nl_nch; 1649 fdp->fd_cdir = vp; 1650 cache_drop(&onch); 1651 vrele(ovp); 1652 cache_zero(&nd->nl_nch); 1653 } else { 1654 vrele(vp); 1655 } 1656 lwkt_reltoken(&p->p_token); 1657 return (error); 1658 } 1659 1660 /* 1661 * chdir_args(char *path) 1662 * 1663 * Change current working directory (``.''). 1664 */ 1665 int 1666 sys_chdir(struct chdir_args *uap) 1667 { 1668 struct nlookupdata nd; 1669 int error; 1670 1671 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1672 if (error == 0) 1673 error = kern_chdir(&nd); 1674 nlookup_done(&nd); 1675 return (error); 1676 } 1677 1678 /* 1679 * Helper function for raised chroot(2) security function: Refuse if 1680 * any filedescriptors are open directories. 1681 */ 1682 static int 1683 chroot_refuse_vdir_fds(struct filedesc *fdp) 1684 { 1685 struct vnode *vp; 1686 struct file *fp; 1687 int error; 1688 int fd; 1689 1690 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1691 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1692 continue; 1693 vp = (struct vnode *)fp->f_data; 1694 if (vp->v_type != VDIR) { 1695 fdrop(fp); 1696 continue; 1697 } 1698 fdrop(fp); 1699 return(EPERM); 1700 } 1701 return (0); 1702 } 1703 1704 /* 1705 * This sysctl determines if we will allow a process to chroot(2) if it 1706 * has a directory open: 1707 * 0: disallowed for all processes. 1708 * 1: allowed for processes that were not already chroot(2)'ed. 1709 * 2: allowed for all processes. 1710 */ 1711 1712 static int chroot_allow_open_directories = 1; 1713 1714 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1715 &chroot_allow_open_directories, 0, ""); 1716 1717 /* 1718 * chroot to the specified namecache entry. We obtain the vp from the 1719 * namecache data. The passed ncp must be locked and referenced and will 1720 * remain locked and referenced on return. 1721 */ 1722 int 1723 kern_chroot(struct nchandle *nch) 1724 { 1725 struct thread *td = curthread; 1726 struct proc *p = td->td_proc; 1727 struct filedesc *fdp = p->p_fd; 1728 struct vnode *vp; 1729 int error; 1730 1731 /* 1732 * Only privileged user can chroot 1733 */ 1734 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1735 if (error) 1736 return (error); 1737 1738 /* 1739 * Disallow open directory descriptors (fchdir() breakouts). 1740 */ 1741 if (chroot_allow_open_directories == 0 || 1742 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1743 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1744 return (error); 1745 } 1746 if ((vp = nch->ncp->nc_vp) == NULL) 1747 return (ENOENT); 1748 1749 if ((error = vget(vp, LK_SHARED)) != 0) 1750 return (error); 1751 1752 /* 1753 * Check the validity of vp as a directory to change to and 1754 * associate it with rdir/jdir. 1755 */ 1756 error = checkvp_chdir(vp, td); 1757 vn_unlock(vp); /* leave reference intact */ 1758 if (error == 0) { 1759 vrele(fdp->fd_rdir); 1760 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1761 cache_drop(&fdp->fd_nrdir); 1762 cache_copy(nch, &fdp->fd_nrdir); 1763 if (fdp->fd_jdir == NULL) { 1764 fdp->fd_jdir = vp; 1765 vref(fdp->fd_jdir); 1766 cache_copy(nch, &fdp->fd_njdir); 1767 } 1768 } else { 1769 vrele(vp); 1770 } 1771 return (error); 1772 } 1773 1774 /* 1775 * chroot_args(char *path) 1776 * 1777 * Change notion of root (``/'') directory. 1778 */ 1779 int 1780 sys_chroot(struct chroot_args *uap) 1781 { 1782 struct thread *td __debugvar = curthread; 1783 struct nlookupdata nd; 1784 int error; 1785 1786 KKASSERT(td->td_proc); 1787 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1788 if (error == 0) { 1789 nd.nl_flags |= NLC_EXEC; 1790 error = nlookup(&nd); 1791 if (error == 0) 1792 error = kern_chroot(&nd.nl_nch); 1793 } 1794 nlookup_done(&nd); 1795 return(error); 1796 } 1797 1798 int 1799 sys_chroot_kernel(struct chroot_kernel_args *uap) 1800 { 1801 struct thread *td = curthread; 1802 struct nlookupdata nd; 1803 struct nchandle *nch; 1804 struct vnode *vp; 1805 int error; 1806 1807 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1808 if (error) 1809 goto error_nond; 1810 1811 error = nlookup(&nd); 1812 if (error) 1813 goto error_out; 1814 1815 nch = &nd.nl_nch; 1816 1817 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1818 if (error) 1819 goto error_out; 1820 1821 if ((vp = nch->ncp->nc_vp) == NULL) { 1822 error = ENOENT; 1823 goto error_out; 1824 } 1825 1826 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1827 goto error_out; 1828 1829 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1830 get_mplock(); 1831 vfs_cache_setroot(vp, cache_hold(nch)); 1832 rel_mplock(); 1833 1834 error_out: 1835 nlookup_done(&nd); 1836 error_nond: 1837 return(error); 1838 } 1839 1840 /* 1841 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1842 * determine whether it is legal to chdir to the vnode. The vnode's state 1843 * is not changed by this call. 1844 */ 1845 int 1846 checkvp_chdir(struct vnode *vp, struct thread *td) 1847 { 1848 int error; 1849 1850 if (vp->v_type != VDIR) 1851 error = ENOTDIR; 1852 else 1853 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1854 return (error); 1855 } 1856 1857 int 1858 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1859 { 1860 struct thread *td = curthread; 1861 struct proc *p = td->td_proc; 1862 struct lwp *lp = td->td_lwp; 1863 struct filedesc *fdp = p->p_fd; 1864 int cmode, flags; 1865 struct file *nfp; 1866 struct file *fp; 1867 struct vnode *vp; 1868 int type, indx, error = 0; 1869 struct flock lf; 1870 1871 if ((oflags & O_ACCMODE) == O_ACCMODE) 1872 return (EINVAL); 1873 flags = FFLAGS(oflags); 1874 error = falloc(lp, &nfp, NULL); 1875 if (error) 1876 return (error); 1877 fp = nfp; 1878 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1879 1880 /* 1881 * XXX p_dupfd is a real mess. It allows a device to return a 1882 * file descriptor to be duplicated rather then doing the open 1883 * itself. 1884 */ 1885 lp->lwp_dupfd = -1; 1886 1887 /* 1888 * Call vn_open() to do the lookup and assign the vnode to the 1889 * file pointer. vn_open() does not change the ref count on fp 1890 * and the vnode, on success, will be inherited by the file pointer 1891 * and unlocked. 1892 */ 1893 nd->nl_flags |= NLC_LOCKVP; 1894 error = vn_open(nd, fp, flags, cmode); 1895 nlookup_done(nd); 1896 if (error) { 1897 /* 1898 * handle special fdopen() case. bleh. dupfdopen() is 1899 * responsible for dropping the old contents of ofiles[indx] 1900 * if it succeeds. 1901 * 1902 * Note that fsetfd() will add a ref to fp which represents 1903 * the fd_files[] assignment. We must still drop our 1904 * reference. 1905 */ 1906 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1907 if (fdalloc(p, 0, &indx) == 0) { 1908 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1909 if (error == 0) { 1910 *res = indx; 1911 fdrop(fp); /* our ref */ 1912 return (0); 1913 } 1914 fsetfd(fdp, NULL, indx); 1915 } 1916 } 1917 fdrop(fp); /* our ref */ 1918 if (error == ERESTART) 1919 error = EINTR; 1920 return (error); 1921 } 1922 1923 /* 1924 * ref the vnode for ourselves so it can't be ripped out from under 1925 * is. XXX need an ND flag to request that the vnode be returned 1926 * anyway. 1927 * 1928 * Reserve a file descriptor but do not assign it until the open 1929 * succeeds. 1930 */ 1931 vp = (struct vnode *)fp->f_data; 1932 vref(vp); 1933 if ((error = fdalloc(p, 0, &indx)) != 0) { 1934 fdrop(fp); 1935 vrele(vp); 1936 return (error); 1937 } 1938 1939 /* 1940 * If no error occurs the vp will have been assigned to the file 1941 * pointer. 1942 */ 1943 lp->lwp_dupfd = 0; 1944 1945 if (flags & (O_EXLOCK | O_SHLOCK)) { 1946 lf.l_whence = SEEK_SET; 1947 lf.l_start = 0; 1948 lf.l_len = 0; 1949 if (flags & O_EXLOCK) 1950 lf.l_type = F_WRLCK; 1951 else 1952 lf.l_type = F_RDLCK; 1953 if (flags & FNONBLOCK) 1954 type = 0; 1955 else 1956 type = F_WAIT; 1957 1958 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1959 /* 1960 * lock request failed. Clean up the reserved 1961 * descriptor. 1962 */ 1963 vrele(vp); 1964 fsetfd(fdp, NULL, indx); 1965 fdrop(fp); 1966 return (error); 1967 } 1968 fp->f_flag |= FHASLOCK; 1969 } 1970 #if 0 1971 /* 1972 * Assert that all regular file vnodes were created with a object. 1973 */ 1974 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1975 ("open: regular file has no backing object after vn_open")); 1976 #endif 1977 1978 vrele(vp); 1979 1980 /* 1981 * release our private reference, leaving the one associated with the 1982 * descriptor table intact. 1983 */ 1984 fsetfd(fdp, fp, indx); 1985 fdrop(fp); 1986 *res = indx; 1987 if (oflags & O_CLOEXEC) 1988 error = fsetfdflags(fdp, *res, UF_EXCLOSE); 1989 return (error); 1990 } 1991 1992 /* 1993 * open_args(char *path, int flags, int mode) 1994 * 1995 * Check permissions, allocate an open file structure, 1996 * and call the device open routine if any. 1997 */ 1998 int 1999 sys_open(struct open_args *uap) 2000 { 2001 struct nlookupdata nd; 2002 int error; 2003 2004 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2005 if (error == 0) { 2006 error = kern_open(&nd, uap->flags, 2007 uap->mode, &uap->sysmsg_result); 2008 } 2009 nlookup_done(&nd); 2010 return (error); 2011 } 2012 2013 /* 2014 * openat_args(int fd, char *path, int flags, int mode) 2015 */ 2016 int 2017 sys_openat(struct openat_args *uap) 2018 { 2019 struct nlookupdata nd; 2020 int error; 2021 struct file *fp; 2022 2023 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2024 if (error == 0) { 2025 error = kern_open(&nd, uap->flags, uap->mode, 2026 &uap->sysmsg_result); 2027 } 2028 nlookup_done_at(&nd, fp); 2029 return (error); 2030 } 2031 2032 int 2033 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2034 { 2035 struct thread *td = curthread; 2036 struct proc *p = td->td_proc; 2037 struct vnode *vp; 2038 struct vattr vattr; 2039 int error; 2040 int whiteout = 0; 2041 2042 KKASSERT(p); 2043 2044 VATTR_NULL(&vattr); 2045 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2046 vattr.va_rmajor = rmajor; 2047 vattr.va_rminor = rminor; 2048 2049 switch (mode & S_IFMT) { 2050 case S_IFMT: /* used by badsect to flag bad sectors */ 2051 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2052 vattr.va_type = VBAD; 2053 break; 2054 case S_IFCHR: 2055 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2056 vattr.va_type = VCHR; 2057 break; 2058 case S_IFBLK: 2059 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2060 vattr.va_type = VBLK; 2061 break; 2062 case S_IFWHT: 2063 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2064 whiteout = 1; 2065 break; 2066 case S_IFDIR: /* special directories support for HAMMER */ 2067 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2068 vattr.va_type = VDIR; 2069 break; 2070 default: 2071 error = EINVAL; 2072 break; 2073 } 2074 2075 if (error) 2076 return (error); 2077 2078 bwillinode(1); 2079 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2080 if ((error = nlookup(nd)) != 0) 2081 return (error); 2082 if (nd->nl_nch.ncp->nc_vp) 2083 return (EEXIST); 2084 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2085 return (error); 2086 2087 if (whiteout) { 2088 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2089 nd->nl_cred, NAMEI_CREATE); 2090 } else { 2091 vp = NULL; 2092 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2093 &vp, nd->nl_cred, &vattr); 2094 if (error == 0) 2095 vput(vp); 2096 } 2097 return (error); 2098 } 2099 2100 /* 2101 * mknod_args(char *path, int mode, int dev) 2102 * 2103 * Create a special file. 2104 */ 2105 int 2106 sys_mknod(struct mknod_args *uap) 2107 { 2108 struct nlookupdata nd; 2109 int error; 2110 2111 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2112 if (error == 0) { 2113 error = kern_mknod(&nd, uap->mode, 2114 umajor(uap->dev), uminor(uap->dev)); 2115 } 2116 nlookup_done(&nd); 2117 return (error); 2118 } 2119 2120 /* 2121 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2122 * 2123 * Create a special file. The path is relative to the directory associated 2124 * with fd. 2125 */ 2126 int 2127 sys_mknodat(struct mknodat_args *uap) 2128 { 2129 struct nlookupdata nd; 2130 struct file *fp; 2131 int error; 2132 2133 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2134 if (error == 0) { 2135 error = kern_mknod(&nd, uap->mode, 2136 umajor(uap->dev), uminor(uap->dev)); 2137 } 2138 nlookup_done_at(&nd, fp); 2139 return (error); 2140 } 2141 2142 int 2143 kern_mkfifo(struct nlookupdata *nd, int mode) 2144 { 2145 struct thread *td = curthread; 2146 struct proc *p = td->td_proc; 2147 struct vattr vattr; 2148 struct vnode *vp; 2149 int error; 2150 2151 bwillinode(1); 2152 2153 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2154 if ((error = nlookup(nd)) != 0) 2155 return (error); 2156 if (nd->nl_nch.ncp->nc_vp) 2157 return (EEXIST); 2158 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2159 return (error); 2160 2161 VATTR_NULL(&vattr); 2162 vattr.va_type = VFIFO; 2163 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2164 vp = NULL; 2165 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2166 if (error == 0) 2167 vput(vp); 2168 return (error); 2169 } 2170 2171 /* 2172 * mkfifo_args(char *path, int mode) 2173 * 2174 * Create a named pipe. 2175 */ 2176 int 2177 sys_mkfifo(struct mkfifo_args *uap) 2178 { 2179 struct nlookupdata nd; 2180 int error; 2181 2182 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2183 if (error == 0) 2184 error = kern_mkfifo(&nd, uap->mode); 2185 nlookup_done(&nd); 2186 return (error); 2187 } 2188 2189 /* 2190 * mkfifoat_args(int fd, char *path, mode_t mode) 2191 * 2192 * Create a named pipe. The path is relative to the directory associated 2193 * with fd. 2194 */ 2195 int 2196 sys_mkfifoat(struct mkfifoat_args *uap) 2197 { 2198 struct nlookupdata nd; 2199 struct file *fp; 2200 int error; 2201 2202 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2203 if (error == 0) 2204 error = kern_mkfifo(&nd, uap->mode); 2205 nlookup_done_at(&nd, fp); 2206 return (error); 2207 } 2208 2209 static int hardlink_check_uid = 0; 2210 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2211 &hardlink_check_uid, 0, 2212 "Unprivileged processes cannot create hard links to files owned by other " 2213 "users"); 2214 static int hardlink_check_gid = 0; 2215 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2216 &hardlink_check_gid, 0, 2217 "Unprivileged processes cannot create hard links to files owned by other " 2218 "groups"); 2219 2220 static int 2221 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2222 { 2223 struct vattr va; 2224 int error; 2225 2226 /* 2227 * Shortcut if disabled 2228 */ 2229 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2230 return (0); 2231 2232 /* 2233 * Privileged user can always hardlink 2234 */ 2235 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2236 return (0); 2237 2238 /* 2239 * Otherwise only if the originating file is owned by the 2240 * same user or group. Note that any group is allowed if 2241 * the file is owned by the caller. 2242 */ 2243 error = VOP_GETATTR(vp, &va); 2244 if (error != 0) 2245 return (error); 2246 2247 if (hardlink_check_uid) { 2248 if (cred->cr_uid != va.va_uid) 2249 return (EPERM); 2250 } 2251 2252 if (hardlink_check_gid) { 2253 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2254 return (EPERM); 2255 } 2256 2257 return (0); 2258 } 2259 2260 int 2261 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2262 { 2263 struct thread *td = curthread; 2264 struct vnode *vp; 2265 int error; 2266 2267 /* 2268 * Lookup the source and obtained a locked vnode. 2269 * 2270 * You may only hardlink a file which you have write permission 2271 * on or which you own. 2272 * 2273 * XXX relookup on vget failure / race ? 2274 */ 2275 bwillinode(1); 2276 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2277 if ((error = nlookup(nd)) != 0) 2278 return (error); 2279 vp = nd->nl_nch.ncp->nc_vp; 2280 KKASSERT(vp != NULL); 2281 if (vp->v_type == VDIR) 2282 return (EPERM); /* POSIX */ 2283 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2284 return (error); 2285 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2286 return (error); 2287 2288 /* 2289 * Unlock the source so we can lookup the target without deadlocking 2290 * (XXX vp is locked already, possible other deadlock?). The target 2291 * must not exist. 2292 */ 2293 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2294 nd->nl_flags &= ~NLC_NCPISLOCKED; 2295 cache_unlock(&nd->nl_nch); 2296 vn_unlock(vp); 2297 2298 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2299 if ((error = nlookup(linknd)) != 0) { 2300 vrele(vp); 2301 return (error); 2302 } 2303 if (linknd->nl_nch.ncp->nc_vp) { 2304 vrele(vp); 2305 return (EEXIST); 2306 } 2307 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 2308 vrele(vp); 2309 return (error); 2310 } 2311 2312 /* 2313 * Finally run the new API VOP. 2314 */ 2315 error = can_hardlink(vp, td, td->td_ucred); 2316 if (error == 0) { 2317 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2318 vp, linknd->nl_cred); 2319 } 2320 vput(vp); 2321 return (error); 2322 } 2323 2324 /* 2325 * link_args(char *path, char *link) 2326 * 2327 * Make a hard file link. 2328 */ 2329 int 2330 sys_link(struct link_args *uap) 2331 { 2332 struct nlookupdata nd, linknd; 2333 int error; 2334 2335 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2336 if (error == 0) { 2337 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2338 if (error == 0) 2339 error = kern_link(&nd, &linknd); 2340 nlookup_done(&linknd); 2341 } 2342 nlookup_done(&nd); 2343 return (error); 2344 } 2345 2346 /* 2347 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2348 * 2349 * Make a hard file link. The path1 argument is relative to the directory 2350 * associated with fd1, and similarly the path2 argument is relative to 2351 * the directory associated with fd2. 2352 */ 2353 int 2354 sys_linkat(struct linkat_args *uap) 2355 { 2356 struct nlookupdata nd, linknd; 2357 struct file *fp1, *fp2; 2358 int error; 2359 2360 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2361 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2362 if (error == 0) { 2363 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2364 uap->path2, UIO_USERSPACE, 0); 2365 if (error == 0) 2366 error = kern_link(&nd, &linknd); 2367 nlookup_done_at(&linknd, fp2); 2368 } 2369 nlookup_done_at(&nd, fp1); 2370 return (error); 2371 } 2372 2373 int 2374 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2375 { 2376 struct vattr vattr; 2377 struct vnode *vp; 2378 struct vnode *dvp; 2379 int error; 2380 2381 bwillinode(1); 2382 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2383 if ((error = nlookup(nd)) != 0) 2384 return (error); 2385 if (nd->nl_nch.ncp->nc_vp) 2386 return (EEXIST); 2387 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2388 return (error); 2389 dvp = nd->nl_dvp; 2390 VATTR_NULL(&vattr); 2391 vattr.va_mode = mode; 2392 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2393 if (error == 0) 2394 vput(vp); 2395 return (error); 2396 } 2397 2398 /* 2399 * symlink(char *path, char *link) 2400 * 2401 * Make a symbolic link. 2402 */ 2403 int 2404 sys_symlink(struct symlink_args *uap) 2405 { 2406 struct thread *td = curthread; 2407 struct nlookupdata nd; 2408 char *path; 2409 int error; 2410 int mode; 2411 2412 path = objcache_get(namei_oc, M_WAITOK); 2413 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2414 if (error == 0) { 2415 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2416 if (error == 0) { 2417 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2418 error = kern_symlink(&nd, path, mode); 2419 } 2420 nlookup_done(&nd); 2421 } 2422 objcache_put(namei_oc, path); 2423 return (error); 2424 } 2425 2426 /* 2427 * symlinkat_args(char *path1, int fd, char *path2) 2428 * 2429 * Make a symbolic link. The path2 argument is relative to the directory 2430 * associated with fd. 2431 */ 2432 int 2433 sys_symlinkat(struct symlinkat_args *uap) 2434 { 2435 struct thread *td = curthread; 2436 struct nlookupdata nd; 2437 struct file *fp; 2438 char *path1; 2439 int error; 2440 int mode; 2441 2442 path1 = objcache_get(namei_oc, M_WAITOK); 2443 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2444 if (error == 0) { 2445 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2446 UIO_USERSPACE, 0); 2447 if (error == 0) { 2448 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2449 error = kern_symlink(&nd, path1, mode); 2450 } 2451 nlookup_done_at(&nd, fp); 2452 } 2453 objcache_put(namei_oc, path1); 2454 return (error); 2455 } 2456 2457 /* 2458 * undelete_args(char *path) 2459 * 2460 * Delete a whiteout from the filesystem. 2461 */ 2462 int 2463 sys_undelete(struct undelete_args *uap) 2464 { 2465 struct nlookupdata nd; 2466 int error; 2467 2468 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2469 bwillinode(1); 2470 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2471 if (error == 0) 2472 error = nlookup(&nd); 2473 if (error == 0) 2474 error = ncp_writechk(&nd.nl_nch); 2475 if (error == 0) { 2476 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2477 NAMEI_DELETE); 2478 } 2479 nlookup_done(&nd); 2480 return (error); 2481 } 2482 2483 int 2484 kern_unlink(struct nlookupdata *nd) 2485 { 2486 int error; 2487 2488 bwillinode(1); 2489 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2490 if ((error = nlookup(nd)) != 0) 2491 return (error); 2492 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2493 return (error); 2494 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2495 return (error); 2496 } 2497 2498 /* 2499 * unlink_args(char *path) 2500 * 2501 * Delete a name from the filesystem. 2502 */ 2503 int 2504 sys_unlink(struct unlink_args *uap) 2505 { 2506 struct nlookupdata nd; 2507 int error; 2508 2509 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2510 if (error == 0) 2511 error = kern_unlink(&nd); 2512 nlookup_done(&nd); 2513 return (error); 2514 } 2515 2516 2517 /* 2518 * unlinkat_args(int fd, char *path, int flags) 2519 * 2520 * Delete the file or directory entry pointed to by fd/path. 2521 */ 2522 int 2523 sys_unlinkat(struct unlinkat_args *uap) 2524 { 2525 struct nlookupdata nd; 2526 struct file *fp; 2527 int error; 2528 2529 if (uap->flags & ~AT_REMOVEDIR) 2530 return (EINVAL); 2531 2532 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2533 if (error == 0) { 2534 if (uap->flags & AT_REMOVEDIR) 2535 error = kern_rmdir(&nd); 2536 else 2537 error = kern_unlink(&nd); 2538 } 2539 nlookup_done_at(&nd, fp); 2540 return (error); 2541 } 2542 2543 int 2544 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2545 { 2546 struct thread *td = curthread; 2547 struct proc *p = td->td_proc; 2548 struct file *fp; 2549 struct vnode *vp; 2550 struct vattr vattr; 2551 off_t new_offset; 2552 int error; 2553 2554 fp = holdfp(p->p_fd, fd, -1); 2555 if (fp == NULL) 2556 return (EBADF); 2557 if (fp->f_type != DTYPE_VNODE) { 2558 error = ESPIPE; 2559 goto done; 2560 } 2561 vp = (struct vnode *)fp->f_data; 2562 2563 switch (whence) { 2564 case L_INCR: 2565 spin_lock(&fp->f_spin); 2566 new_offset = fp->f_offset + offset; 2567 error = 0; 2568 break; 2569 case L_XTND: 2570 error = VOP_GETATTR(vp, &vattr); 2571 spin_lock(&fp->f_spin); 2572 new_offset = offset + vattr.va_size; 2573 break; 2574 case L_SET: 2575 new_offset = offset; 2576 error = 0; 2577 spin_lock(&fp->f_spin); 2578 break; 2579 default: 2580 new_offset = 0; 2581 error = EINVAL; 2582 spin_lock(&fp->f_spin); 2583 break; 2584 } 2585 2586 /* 2587 * Validate the seek position. Negative offsets are not allowed 2588 * for regular files or directories. 2589 * 2590 * Normally we would also not want to allow negative offsets for 2591 * character and block-special devices. However kvm addresses 2592 * on 64 bit architectures might appear to be negative and must 2593 * be allowed. 2594 */ 2595 if (error == 0) { 2596 if (new_offset < 0 && 2597 (vp->v_type == VREG || vp->v_type == VDIR)) { 2598 error = EINVAL; 2599 } else { 2600 fp->f_offset = new_offset; 2601 } 2602 } 2603 *res = fp->f_offset; 2604 spin_unlock(&fp->f_spin); 2605 done: 2606 fdrop(fp); 2607 return (error); 2608 } 2609 2610 /* 2611 * lseek_args(int fd, int pad, off_t offset, int whence) 2612 * 2613 * Reposition read/write file offset. 2614 */ 2615 int 2616 sys_lseek(struct lseek_args *uap) 2617 { 2618 int error; 2619 2620 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2621 &uap->sysmsg_offset); 2622 2623 return (error); 2624 } 2625 2626 /* 2627 * Check if current process can access given file. amode is a bitmask of *_OK 2628 * access bits. flags is a bitmask of AT_* flags. 2629 */ 2630 int 2631 kern_access(struct nlookupdata *nd, int amode, int flags) 2632 { 2633 struct vnode *vp; 2634 int error, mode; 2635 2636 if (flags & ~AT_EACCESS) 2637 return (EINVAL); 2638 if ((error = nlookup(nd)) != 0) 2639 return (error); 2640 retry: 2641 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2642 if (error) 2643 return (error); 2644 2645 /* Flags == 0 means only check for existence. */ 2646 if (amode) { 2647 mode = 0; 2648 if (amode & R_OK) 2649 mode |= VREAD; 2650 if (amode & W_OK) 2651 mode |= VWRITE; 2652 if (amode & X_OK) 2653 mode |= VEXEC; 2654 if ((mode & VWRITE) == 0 || 2655 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2656 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2657 2658 /* 2659 * If the file handle is stale we have to re-resolve the 2660 * entry. This is a hack at the moment. 2661 */ 2662 if (error == ESTALE) { 2663 vput(vp); 2664 cache_setunresolved(&nd->nl_nch); 2665 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2666 if (error == 0) { 2667 vp = NULL; 2668 goto retry; 2669 } 2670 return(error); 2671 } 2672 } 2673 vput(vp); 2674 return (error); 2675 } 2676 2677 /* 2678 * access_args(char *path, int flags) 2679 * 2680 * Check access permissions. 2681 */ 2682 int 2683 sys_access(struct access_args *uap) 2684 { 2685 struct nlookupdata nd; 2686 int error; 2687 2688 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2689 if (error == 0) 2690 error = kern_access(&nd, uap->flags, 0); 2691 nlookup_done(&nd); 2692 return (error); 2693 } 2694 2695 2696 /* 2697 * eaccess_args(char *path, int flags) 2698 * 2699 * Check access permissions. 2700 */ 2701 int 2702 sys_eaccess(struct eaccess_args *uap) 2703 { 2704 struct nlookupdata nd; 2705 int error; 2706 2707 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2708 if (error == 0) 2709 error = kern_access(&nd, uap->flags, AT_EACCESS); 2710 nlookup_done(&nd); 2711 return (error); 2712 } 2713 2714 2715 /* 2716 * faccessat_args(int fd, char *path, int amode, int flags) 2717 * 2718 * Check access permissions. 2719 */ 2720 int 2721 sys_faccessat(struct faccessat_args *uap) 2722 { 2723 struct nlookupdata nd; 2724 struct file *fp; 2725 int error; 2726 2727 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2728 NLC_FOLLOW); 2729 if (error == 0) 2730 error = kern_access(&nd, uap->amode, uap->flags); 2731 nlookup_done_at(&nd, fp); 2732 return (error); 2733 } 2734 2735 2736 int 2737 kern_stat(struct nlookupdata *nd, struct stat *st) 2738 { 2739 int error; 2740 struct vnode *vp; 2741 2742 if ((error = nlookup(nd)) != 0) 2743 return (error); 2744 again: 2745 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2746 return (ENOENT); 2747 2748 if ((error = vget(vp, LK_SHARED)) != 0) 2749 return (error); 2750 error = vn_stat(vp, st, nd->nl_cred); 2751 2752 /* 2753 * If the file handle is stale we have to re-resolve the entry. This 2754 * is a hack at the moment. 2755 */ 2756 if (error == ESTALE) { 2757 vput(vp); 2758 cache_setunresolved(&nd->nl_nch); 2759 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2760 if (error == 0) 2761 goto again; 2762 } else { 2763 vput(vp); 2764 } 2765 return (error); 2766 } 2767 2768 /* 2769 * stat_args(char *path, struct stat *ub) 2770 * 2771 * Get file status; this version follows links. 2772 */ 2773 int 2774 sys_stat(struct stat_args *uap) 2775 { 2776 struct nlookupdata nd; 2777 struct stat st; 2778 int error; 2779 2780 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2781 if (error == 0) { 2782 error = kern_stat(&nd, &st); 2783 if (error == 0) 2784 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2785 } 2786 nlookup_done(&nd); 2787 return (error); 2788 } 2789 2790 /* 2791 * lstat_args(char *path, struct stat *ub) 2792 * 2793 * Get file status; this version does not follow links. 2794 */ 2795 int 2796 sys_lstat(struct lstat_args *uap) 2797 { 2798 struct nlookupdata nd; 2799 struct stat st; 2800 int error; 2801 2802 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2803 if (error == 0) { 2804 error = kern_stat(&nd, &st); 2805 if (error == 0) 2806 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2807 } 2808 nlookup_done(&nd); 2809 return (error); 2810 } 2811 2812 /* 2813 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2814 * 2815 * Get status of file pointed to by fd/path. 2816 */ 2817 int 2818 sys_fstatat(struct fstatat_args *uap) 2819 { 2820 struct nlookupdata nd; 2821 struct stat st; 2822 int error; 2823 int flags; 2824 struct file *fp; 2825 2826 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2827 return (EINVAL); 2828 2829 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2830 2831 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2832 UIO_USERSPACE, flags); 2833 if (error == 0) { 2834 error = kern_stat(&nd, &st); 2835 if (error == 0) 2836 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2837 } 2838 nlookup_done_at(&nd, fp); 2839 return (error); 2840 } 2841 2842 static int 2843 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 2844 { 2845 struct nlookupdata nd; 2846 struct vnode *vp; 2847 int error; 2848 2849 vp = NULL; 2850 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 2851 if (error == 0) 2852 error = nlookup(&nd); 2853 if (error == 0) 2854 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2855 nlookup_done(&nd); 2856 if (error == 0) { 2857 error = VOP_PATHCONF(vp, name, sysmsg_regp); 2858 vput(vp); 2859 } 2860 return (error); 2861 } 2862 2863 /* 2864 * pathconf_Args(char *path, int name) 2865 * 2866 * Get configurable pathname variables. 2867 */ 2868 int 2869 sys_pathconf(struct pathconf_args *uap) 2870 { 2871 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 2872 &uap->sysmsg_reg)); 2873 } 2874 2875 /* 2876 * lpathconf_Args(char *path, int name) 2877 * 2878 * Get configurable pathname variables, but don't follow symlinks. 2879 */ 2880 int 2881 sys_lpathconf(struct lpathconf_args *uap) 2882 { 2883 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 2884 } 2885 2886 /* 2887 * XXX: daver 2888 * kern_readlink isn't properly split yet. There is a copyin burried 2889 * in VOP_READLINK(). 2890 */ 2891 int 2892 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2893 { 2894 struct thread *td = curthread; 2895 struct vnode *vp; 2896 struct iovec aiov; 2897 struct uio auio; 2898 int error; 2899 2900 if ((error = nlookup(nd)) != 0) 2901 return (error); 2902 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2903 if (error) 2904 return (error); 2905 if (vp->v_type != VLNK) { 2906 error = EINVAL; 2907 } else { 2908 aiov.iov_base = buf; 2909 aiov.iov_len = count; 2910 auio.uio_iov = &aiov; 2911 auio.uio_iovcnt = 1; 2912 auio.uio_offset = 0; 2913 auio.uio_rw = UIO_READ; 2914 auio.uio_segflg = UIO_USERSPACE; 2915 auio.uio_td = td; 2916 auio.uio_resid = count; 2917 error = VOP_READLINK(vp, &auio, td->td_ucred); 2918 } 2919 vput(vp); 2920 *res = count - auio.uio_resid; 2921 return (error); 2922 } 2923 2924 /* 2925 * readlink_args(char *path, char *buf, int count) 2926 * 2927 * Return target name of a symbolic link. 2928 */ 2929 int 2930 sys_readlink(struct readlink_args *uap) 2931 { 2932 struct nlookupdata nd; 2933 int error; 2934 2935 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2936 if (error == 0) { 2937 error = kern_readlink(&nd, uap->buf, uap->count, 2938 &uap->sysmsg_result); 2939 } 2940 nlookup_done(&nd); 2941 return (error); 2942 } 2943 2944 /* 2945 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 2946 * 2947 * Return target name of a symbolic link. The path is relative to the 2948 * directory associated with fd. 2949 */ 2950 int 2951 sys_readlinkat(struct readlinkat_args *uap) 2952 { 2953 struct nlookupdata nd; 2954 struct file *fp; 2955 int error; 2956 2957 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2958 if (error == 0) { 2959 error = kern_readlink(&nd, uap->buf, uap->bufsize, 2960 &uap->sysmsg_result); 2961 } 2962 nlookup_done_at(&nd, fp); 2963 return (error); 2964 } 2965 2966 static int 2967 setfflags(struct vnode *vp, int flags) 2968 { 2969 struct thread *td = curthread; 2970 int error; 2971 struct vattr vattr; 2972 2973 /* 2974 * Prevent non-root users from setting flags on devices. When 2975 * a device is reused, users can retain ownership of the device 2976 * if they are allowed to set flags and programs assume that 2977 * chown can't fail when done as root. 2978 */ 2979 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2980 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2981 return (error); 2982 2983 /* 2984 * note: vget is required for any operation that might mod the vnode 2985 * so VINACTIVE is properly cleared. 2986 */ 2987 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2988 VATTR_NULL(&vattr); 2989 vattr.va_flags = flags; 2990 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2991 vput(vp); 2992 } 2993 return (error); 2994 } 2995 2996 /* 2997 * chflags(char *path, int flags) 2998 * 2999 * Change flags of a file given a path name. 3000 */ 3001 int 3002 sys_chflags(struct chflags_args *uap) 3003 { 3004 struct nlookupdata nd; 3005 struct vnode *vp; 3006 int error; 3007 3008 vp = NULL; 3009 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3010 if (error == 0) 3011 error = nlookup(&nd); 3012 if (error == 0) 3013 error = ncp_writechk(&nd.nl_nch); 3014 if (error == 0) 3015 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3016 nlookup_done(&nd); 3017 if (error == 0) { 3018 error = setfflags(vp, uap->flags); 3019 vrele(vp); 3020 } 3021 return (error); 3022 } 3023 3024 /* 3025 * lchflags(char *path, int flags) 3026 * 3027 * Change flags of a file given a path name, but don't follow symlinks. 3028 */ 3029 int 3030 sys_lchflags(struct lchflags_args *uap) 3031 { 3032 struct nlookupdata nd; 3033 struct vnode *vp; 3034 int error; 3035 3036 vp = NULL; 3037 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3038 if (error == 0) 3039 error = nlookup(&nd); 3040 if (error == 0) 3041 error = ncp_writechk(&nd.nl_nch); 3042 if (error == 0) 3043 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3044 nlookup_done(&nd); 3045 if (error == 0) { 3046 error = setfflags(vp, uap->flags); 3047 vrele(vp); 3048 } 3049 return (error); 3050 } 3051 3052 /* 3053 * fchflags_args(int fd, int flags) 3054 * 3055 * Change flags of a file given a file descriptor. 3056 */ 3057 int 3058 sys_fchflags(struct fchflags_args *uap) 3059 { 3060 struct thread *td = curthread; 3061 struct proc *p = td->td_proc; 3062 struct file *fp; 3063 int error; 3064 3065 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3066 return (error); 3067 if (fp->f_nchandle.ncp) 3068 error = ncp_writechk(&fp->f_nchandle); 3069 if (error == 0) 3070 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3071 fdrop(fp); 3072 return (error); 3073 } 3074 3075 static int 3076 setfmode(struct vnode *vp, int mode) 3077 { 3078 struct thread *td = curthread; 3079 int error; 3080 struct vattr vattr; 3081 3082 /* 3083 * note: vget is required for any operation that might mod the vnode 3084 * so VINACTIVE is properly cleared. 3085 */ 3086 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3087 VATTR_NULL(&vattr); 3088 vattr.va_mode = mode & ALLPERMS; 3089 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3090 vput(vp); 3091 } 3092 return error; 3093 } 3094 3095 int 3096 kern_chmod(struct nlookupdata *nd, int mode) 3097 { 3098 struct vnode *vp; 3099 int error; 3100 3101 if ((error = nlookup(nd)) != 0) 3102 return (error); 3103 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3104 return (error); 3105 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3106 error = setfmode(vp, mode); 3107 vrele(vp); 3108 return (error); 3109 } 3110 3111 /* 3112 * chmod_args(char *path, int mode) 3113 * 3114 * Change mode of a file given path name. 3115 */ 3116 int 3117 sys_chmod(struct chmod_args *uap) 3118 { 3119 struct nlookupdata nd; 3120 int error; 3121 3122 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3123 if (error == 0) 3124 error = kern_chmod(&nd, uap->mode); 3125 nlookup_done(&nd); 3126 return (error); 3127 } 3128 3129 /* 3130 * lchmod_args(char *path, int mode) 3131 * 3132 * Change mode of a file given path name (don't follow links.) 3133 */ 3134 int 3135 sys_lchmod(struct lchmod_args *uap) 3136 { 3137 struct nlookupdata nd; 3138 int error; 3139 3140 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3141 if (error == 0) 3142 error = kern_chmod(&nd, uap->mode); 3143 nlookup_done(&nd); 3144 return (error); 3145 } 3146 3147 /* 3148 * fchmod_args(int fd, int mode) 3149 * 3150 * Change mode of a file given a file descriptor. 3151 */ 3152 int 3153 sys_fchmod(struct fchmod_args *uap) 3154 { 3155 struct thread *td = curthread; 3156 struct proc *p = td->td_proc; 3157 struct file *fp; 3158 int error; 3159 3160 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3161 return (error); 3162 if (fp->f_nchandle.ncp) 3163 error = ncp_writechk(&fp->f_nchandle); 3164 if (error == 0) 3165 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3166 fdrop(fp); 3167 return (error); 3168 } 3169 3170 /* 3171 * fchmodat_args(char *path, int mode) 3172 * 3173 * Change mode of a file pointed to by fd/path. 3174 */ 3175 int 3176 sys_fchmodat(struct fchmodat_args *uap) 3177 { 3178 struct nlookupdata nd; 3179 struct file *fp; 3180 int error; 3181 int flags; 3182 3183 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3184 return (EINVAL); 3185 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3186 3187 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3188 UIO_USERSPACE, flags); 3189 if (error == 0) 3190 error = kern_chmod(&nd, uap->mode); 3191 nlookup_done_at(&nd, fp); 3192 return (error); 3193 } 3194 3195 static int 3196 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3197 { 3198 struct thread *td = curthread; 3199 int error; 3200 struct vattr vattr; 3201 uid_t o_uid; 3202 gid_t o_gid; 3203 uint64_t size; 3204 3205 /* 3206 * note: vget is required for any operation that might mod the vnode 3207 * so VINACTIVE is properly cleared. 3208 */ 3209 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3210 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3211 return error; 3212 o_uid = vattr.va_uid; 3213 o_gid = vattr.va_gid; 3214 size = vattr.va_size; 3215 3216 VATTR_NULL(&vattr); 3217 vattr.va_uid = uid; 3218 vattr.va_gid = gid; 3219 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3220 vput(vp); 3221 } 3222 3223 if (error == 0) { 3224 if (uid == -1) 3225 uid = o_uid; 3226 if (gid == -1) 3227 gid = o_gid; 3228 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3229 VFS_ACCOUNT(mp, uid, gid, size); 3230 } 3231 3232 return error; 3233 } 3234 3235 int 3236 kern_chown(struct nlookupdata *nd, int uid, int gid) 3237 { 3238 struct vnode *vp; 3239 int error; 3240 3241 if ((error = nlookup(nd)) != 0) 3242 return (error); 3243 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3244 return (error); 3245 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3246 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3247 vrele(vp); 3248 return (error); 3249 } 3250 3251 /* 3252 * chown(char *path, int uid, int gid) 3253 * 3254 * Set ownership given a path name. 3255 */ 3256 int 3257 sys_chown(struct chown_args *uap) 3258 { 3259 struct nlookupdata nd; 3260 int error; 3261 3262 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3263 if (error == 0) 3264 error = kern_chown(&nd, uap->uid, uap->gid); 3265 nlookup_done(&nd); 3266 return (error); 3267 } 3268 3269 /* 3270 * lchown_args(char *path, int uid, int gid) 3271 * 3272 * Set ownership given a path name, do not cross symlinks. 3273 */ 3274 int 3275 sys_lchown(struct lchown_args *uap) 3276 { 3277 struct nlookupdata nd; 3278 int error; 3279 3280 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3281 if (error == 0) 3282 error = kern_chown(&nd, uap->uid, uap->gid); 3283 nlookup_done(&nd); 3284 return (error); 3285 } 3286 3287 /* 3288 * fchown_args(int fd, int uid, int gid) 3289 * 3290 * Set ownership given a file descriptor. 3291 */ 3292 int 3293 sys_fchown(struct fchown_args *uap) 3294 { 3295 struct thread *td = curthread; 3296 struct proc *p = td->td_proc; 3297 struct file *fp; 3298 int error; 3299 3300 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3301 return (error); 3302 if (fp->f_nchandle.ncp) 3303 error = ncp_writechk(&fp->f_nchandle); 3304 if (error == 0) 3305 error = setfown(p->p_fd->fd_ncdir.mount, 3306 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3307 fdrop(fp); 3308 return (error); 3309 } 3310 3311 /* 3312 * fchownat(int fd, char *path, int uid, int gid, int flags) 3313 * 3314 * Set ownership of file pointed to by fd/path. 3315 */ 3316 int 3317 sys_fchownat(struct fchownat_args *uap) 3318 { 3319 struct nlookupdata nd; 3320 struct file *fp; 3321 int error; 3322 int flags; 3323 3324 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3325 return (EINVAL); 3326 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3327 3328 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3329 UIO_USERSPACE, flags); 3330 if (error == 0) 3331 error = kern_chown(&nd, uap->uid, uap->gid); 3332 nlookup_done_at(&nd, fp); 3333 return (error); 3334 } 3335 3336 3337 static int 3338 getutimes(const struct timeval *tvp, struct timespec *tsp) 3339 { 3340 struct timeval tv[2]; 3341 3342 if (tvp == NULL) { 3343 microtime(&tv[0]); 3344 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3345 tsp[1] = tsp[0]; 3346 } else { 3347 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3348 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3349 } 3350 return 0; 3351 } 3352 3353 static int 3354 setutimes(struct vnode *vp, struct vattr *vattr, 3355 const struct timespec *ts, int nullflag) 3356 { 3357 struct thread *td = curthread; 3358 int error; 3359 3360 VATTR_NULL(vattr); 3361 vattr->va_atime = ts[0]; 3362 vattr->va_mtime = ts[1]; 3363 if (nullflag) 3364 vattr->va_vaflags |= VA_UTIMES_NULL; 3365 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3366 3367 return error; 3368 } 3369 3370 int 3371 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3372 { 3373 struct timespec ts[2]; 3374 struct vnode *vp; 3375 struct vattr vattr; 3376 int error; 3377 3378 if ((error = getutimes(tptr, ts)) != 0) 3379 return (error); 3380 3381 /* 3382 * NOTE: utimes() succeeds for the owner even if the file 3383 * is not user-writable. 3384 */ 3385 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3386 3387 if ((error = nlookup(nd)) != 0) 3388 return (error); 3389 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3390 return (error); 3391 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3392 return (error); 3393 3394 /* 3395 * note: vget is required for any operation that might mod the vnode 3396 * so VINACTIVE is properly cleared. 3397 */ 3398 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3399 error = vget(vp, LK_EXCLUSIVE); 3400 if (error == 0) { 3401 error = setutimes(vp, &vattr, ts, (tptr == NULL)); 3402 vput(vp); 3403 } 3404 } 3405 vrele(vp); 3406 return (error); 3407 } 3408 3409 /* 3410 * utimes_args(char *path, struct timeval *tptr) 3411 * 3412 * Set the access and modification times of a file. 3413 */ 3414 int 3415 sys_utimes(struct utimes_args *uap) 3416 { 3417 struct timeval tv[2]; 3418 struct nlookupdata nd; 3419 int error; 3420 3421 if (uap->tptr) { 3422 error = copyin(uap->tptr, tv, sizeof(tv)); 3423 if (error) 3424 return (error); 3425 } 3426 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3427 if (error == 0) 3428 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3429 nlookup_done(&nd); 3430 return (error); 3431 } 3432 3433 /* 3434 * lutimes_args(char *path, struct timeval *tptr) 3435 * 3436 * Set the access and modification times of a file. 3437 */ 3438 int 3439 sys_lutimes(struct lutimes_args *uap) 3440 { 3441 struct timeval tv[2]; 3442 struct nlookupdata nd; 3443 int error; 3444 3445 if (uap->tptr) { 3446 error = copyin(uap->tptr, tv, sizeof(tv)); 3447 if (error) 3448 return (error); 3449 } 3450 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3451 if (error == 0) 3452 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3453 nlookup_done(&nd); 3454 return (error); 3455 } 3456 3457 /* 3458 * Set utimes on a file descriptor. The creds used to open the 3459 * file are used to determine whether the operation is allowed 3460 * or not. 3461 */ 3462 int 3463 kern_futimes(int fd, struct timeval *tptr) 3464 { 3465 struct thread *td = curthread; 3466 struct proc *p = td->td_proc; 3467 struct timespec ts[2]; 3468 struct file *fp; 3469 struct vnode *vp; 3470 struct vattr vattr; 3471 int error; 3472 3473 error = getutimes(tptr, ts); 3474 if (error) 3475 return (error); 3476 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3477 return (error); 3478 if (fp->f_nchandle.ncp) 3479 error = ncp_writechk(&fp->f_nchandle); 3480 if (error == 0) { 3481 vp = fp->f_data; 3482 error = vget(vp, LK_EXCLUSIVE); 3483 if (error == 0) { 3484 error = VOP_GETATTR(vp, &vattr); 3485 if (error == 0) { 3486 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3487 fp->f_cred); 3488 } 3489 if (error == 0) { 3490 error = setutimes(vp, &vattr, ts, 3491 (tptr == NULL)); 3492 } 3493 vput(vp); 3494 } 3495 } 3496 fdrop(fp); 3497 return (error); 3498 } 3499 3500 /* 3501 * futimes_args(int fd, struct timeval *tptr) 3502 * 3503 * Set the access and modification times of a file. 3504 */ 3505 int 3506 sys_futimes(struct futimes_args *uap) 3507 { 3508 struct timeval tv[2]; 3509 int error; 3510 3511 if (uap->tptr) { 3512 error = copyin(uap->tptr, tv, sizeof(tv)); 3513 if (error) 3514 return (error); 3515 } 3516 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3517 3518 return (error); 3519 } 3520 3521 int 3522 kern_truncate(struct nlookupdata *nd, off_t length) 3523 { 3524 struct vnode *vp; 3525 struct vattr vattr; 3526 int error; 3527 uid_t uid = 0; 3528 gid_t gid = 0; 3529 uint64_t old_size = 0; 3530 3531 if (length < 0) 3532 return(EINVAL); 3533 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3534 if ((error = nlookup(nd)) != 0) 3535 return (error); 3536 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3537 return (error); 3538 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3539 return (error); 3540 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 3541 vrele(vp); 3542 return (error); 3543 } 3544 if (vp->v_type == VDIR) { 3545 error = EISDIR; 3546 goto done; 3547 } 3548 if (vfs_quota_enabled) { 3549 error = VOP_GETATTR(vp, &vattr); 3550 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3551 uid = vattr.va_uid; 3552 gid = vattr.va_gid; 3553 old_size = vattr.va_size; 3554 } 3555 3556 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3557 VATTR_NULL(&vattr); 3558 vattr.va_size = length; 3559 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3560 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3561 } 3562 done: 3563 vput(vp); 3564 return (error); 3565 } 3566 3567 /* 3568 * truncate(char *path, int pad, off_t length) 3569 * 3570 * Truncate a file given its path name. 3571 */ 3572 int 3573 sys_truncate(struct truncate_args *uap) 3574 { 3575 struct nlookupdata nd; 3576 int error; 3577 3578 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3579 if (error == 0) 3580 error = kern_truncate(&nd, uap->length); 3581 nlookup_done(&nd); 3582 return error; 3583 } 3584 3585 int 3586 kern_ftruncate(int fd, off_t length) 3587 { 3588 struct thread *td = curthread; 3589 struct proc *p = td->td_proc; 3590 struct vattr vattr; 3591 struct vnode *vp; 3592 struct file *fp; 3593 int error; 3594 uid_t uid = 0; 3595 gid_t gid = 0; 3596 uint64_t old_size = 0; 3597 struct mount *mp; 3598 3599 if (length < 0) 3600 return(EINVAL); 3601 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3602 return (error); 3603 if (fp->f_nchandle.ncp) { 3604 error = ncp_writechk(&fp->f_nchandle); 3605 if (error) 3606 goto done; 3607 } 3608 if ((fp->f_flag & FWRITE) == 0) { 3609 error = EINVAL; 3610 goto done; 3611 } 3612 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3613 error = EINVAL; 3614 goto done; 3615 } 3616 vp = (struct vnode *)fp->f_data; 3617 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3618 if (vp->v_type == VDIR) { 3619 error = EISDIR; 3620 goto done; 3621 } 3622 3623 if (vfs_quota_enabled) { 3624 error = VOP_GETATTR(vp, &vattr); 3625 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3626 uid = vattr.va_uid; 3627 gid = vattr.va_gid; 3628 old_size = vattr.va_size; 3629 } 3630 3631 if ((error = vn_writechk(vp, NULL)) == 0) { 3632 VATTR_NULL(&vattr); 3633 vattr.va_size = length; 3634 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3635 mp = vq_vptomp(vp); 3636 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3637 } 3638 vn_unlock(vp); 3639 done: 3640 fdrop(fp); 3641 return (error); 3642 } 3643 3644 /* 3645 * ftruncate_args(int fd, int pad, off_t length) 3646 * 3647 * Truncate a file given a file descriptor. 3648 */ 3649 int 3650 sys_ftruncate(struct ftruncate_args *uap) 3651 { 3652 int error; 3653 3654 error = kern_ftruncate(uap->fd, uap->length); 3655 3656 return (error); 3657 } 3658 3659 /* 3660 * fsync(int fd) 3661 * 3662 * Sync an open file. 3663 */ 3664 int 3665 sys_fsync(struct fsync_args *uap) 3666 { 3667 struct thread *td = curthread; 3668 struct proc *p = td->td_proc; 3669 struct vnode *vp; 3670 struct file *fp; 3671 vm_object_t obj; 3672 int error; 3673 3674 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3675 return (error); 3676 vp = (struct vnode *)fp->f_data; 3677 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3678 if ((obj = vp->v_object) != NULL) { 3679 if (vp->v_mount == NULL || 3680 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 3681 vm_object_page_clean(obj, 0, 0, 0); 3682 } 3683 } 3684 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3685 if (error == 0 && vp->v_mount) 3686 error = buf_fsync(vp); 3687 vn_unlock(vp); 3688 fdrop(fp); 3689 3690 return (error); 3691 } 3692 3693 int 3694 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3695 { 3696 struct nchandle fnchd; 3697 struct nchandle tnchd; 3698 struct namecache *ncp; 3699 struct vnode *fdvp; 3700 struct vnode *tdvp; 3701 struct mount *mp; 3702 int error; 3703 3704 bwillinode(1); 3705 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3706 if ((error = nlookup(fromnd)) != 0) 3707 return (error); 3708 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3709 return (ENOENT); 3710 fnchd.mount = fromnd->nl_nch.mount; 3711 cache_hold(&fnchd); 3712 3713 /* 3714 * unlock the source nch so we can lookup the target nch without 3715 * deadlocking. The target may or may not exist so we do not check 3716 * for a target vp like kern_mkdir() and other creation functions do. 3717 * 3718 * The source and target directories are ref'd and rechecked after 3719 * everything is relocked to determine if the source or target file 3720 * has been renamed. 3721 */ 3722 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3723 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3724 cache_unlock(&fromnd->nl_nch); 3725 3726 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3727 if ((error = nlookup(tond)) != 0) { 3728 cache_drop(&fnchd); 3729 return (error); 3730 } 3731 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3732 cache_drop(&fnchd); 3733 return (ENOENT); 3734 } 3735 tnchd.mount = tond->nl_nch.mount; 3736 cache_hold(&tnchd); 3737 3738 /* 3739 * If the source and target are the same there is nothing to do 3740 */ 3741 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3742 cache_drop(&fnchd); 3743 cache_drop(&tnchd); 3744 return (0); 3745 } 3746 3747 /* 3748 * Mount points cannot be renamed or overwritten 3749 */ 3750 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3751 NCF_ISMOUNTPT 3752 ) { 3753 cache_drop(&fnchd); 3754 cache_drop(&tnchd); 3755 return (EINVAL); 3756 } 3757 3758 /* 3759 * Relock the source ncp. cache_relock() will deal with any 3760 * deadlocks against the already-locked tond and will also 3761 * make sure both are resolved. 3762 * 3763 * NOTE AFTER RELOCKING: The source or target ncp may have become 3764 * invalid while they were unlocked, nc_vp and nc_mount could 3765 * be NULL. 3766 */ 3767 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3768 &tond->nl_nch, tond->nl_cred); 3769 fromnd->nl_flags |= NLC_NCPISLOCKED; 3770 3771 /* 3772 * If either fromnd or tond are marked destroyed a ripout occured 3773 * out from under us and we must retry. 3774 */ 3775 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 3776 fromnd->nl_nch.ncp->nc_vp == NULL || 3777 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 3778 kprintf("kern_rename: retry due to ripout on: " 3779 "\"%s\" -> \"%s\"\n", 3780 fromnd->nl_nch.ncp->nc_name, 3781 tond->nl_nch.ncp->nc_name); 3782 cache_drop(&fnchd); 3783 cache_drop(&tnchd); 3784 return (EAGAIN); 3785 } 3786 3787 /* 3788 * make sure the parent directories linkages are the same 3789 */ 3790 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3791 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3792 cache_drop(&fnchd); 3793 cache_drop(&tnchd); 3794 return (ENOENT); 3795 } 3796 3797 /* 3798 * Both the source and target must be within the same filesystem and 3799 * in the same filesystem as their parent directories within the 3800 * namecache topology. 3801 * 3802 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3803 */ 3804 mp = fnchd.mount; 3805 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3806 mp != tond->nl_nch.mount) { 3807 cache_drop(&fnchd); 3808 cache_drop(&tnchd); 3809 return (EXDEV); 3810 } 3811 3812 /* 3813 * Make sure the mount point is writable 3814 */ 3815 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3816 cache_drop(&fnchd); 3817 cache_drop(&tnchd); 3818 return (error); 3819 } 3820 3821 /* 3822 * If the target exists and either the source or target is a directory, 3823 * then both must be directories. 3824 * 3825 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3826 * have become NULL. 3827 */ 3828 if (tond->nl_nch.ncp->nc_vp) { 3829 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3830 error = ENOENT; 3831 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3832 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3833 error = ENOTDIR; 3834 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3835 error = EISDIR; 3836 } 3837 } 3838 3839 /* 3840 * You cannot rename a source into itself or a subdirectory of itself. 3841 * We check this by travsersing the target directory upwards looking 3842 * for a match against the source. 3843 * 3844 * XXX MPSAFE 3845 */ 3846 if (error == 0) { 3847 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3848 if (fromnd->nl_nch.ncp == ncp) { 3849 error = EINVAL; 3850 break; 3851 } 3852 } 3853 } 3854 3855 cache_drop(&fnchd); 3856 cache_drop(&tnchd); 3857 3858 /* 3859 * Even though the namespaces are different, they may still represent 3860 * hardlinks to the same file. The filesystem might have a hard time 3861 * with this so we issue a NREMOVE of the source instead of a NRENAME 3862 * when we detect the situation. 3863 */ 3864 if (error == 0) { 3865 fdvp = fromnd->nl_dvp; 3866 tdvp = tond->nl_dvp; 3867 if (fdvp == NULL || tdvp == NULL) { 3868 error = EPERM; 3869 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3870 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3871 fromnd->nl_cred); 3872 } else { 3873 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3874 fdvp, tdvp, tond->nl_cred); 3875 } 3876 } 3877 return (error); 3878 } 3879 3880 /* 3881 * rename_args(char *from, char *to) 3882 * 3883 * Rename files. Source and destination must either both be directories, 3884 * or both not be directories. If target is a directory, it must be empty. 3885 */ 3886 int 3887 sys_rename(struct rename_args *uap) 3888 { 3889 struct nlookupdata fromnd, tond; 3890 int error; 3891 3892 do { 3893 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3894 if (error == 0) { 3895 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3896 if (error == 0) 3897 error = kern_rename(&fromnd, &tond); 3898 nlookup_done(&tond); 3899 } 3900 nlookup_done(&fromnd); 3901 } while (error == EAGAIN); 3902 return (error); 3903 } 3904 3905 /* 3906 * renameat_args(int oldfd, char *old, int newfd, char *new) 3907 * 3908 * Rename files using paths relative to the directories associated with 3909 * oldfd and newfd. Source and destination must either both be directories, 3910 * or both not be directories. If target is a directory, it must be empty. 3911 */ 3912 int 3913 sys_renameat(struct renameat_args *uap) 3914 { 3915 struct nlookupdata oldnd, newnd; 3916 struct file *oldfp, *newfp; 3917 int error; 3918 3919 do { 3920 error = nlookup_init_at(&oldnd, &oldfp, 3921 uap->oldfd, uap->old, 3922 UIO_USERSPACE, 0); 3923 if (error == 0) { 3924 error = nlookup_init_at(&newnd, &newfp, 3925 uap->newfd, uap->new, 3926 UIO_USERSPACE, 0); 3927 if (error == 0) 3928 error = kern_rename(&oldnd, &newnd); 3929 nlookup_done_at(&newnd, newfp); 3930 } 3931 nlookup_done_at(&oldnd, oldfp); 3932 } while (error == EAGAIN); 3933 return (error); 3934 } 3935 3936 int 3937 kern_mkdir(struct nlookupdata *nd, int mode) 3938 { 3939 struct thread *td = curthread; 3940 struct proc *p = td->td_proc; 3941 struct vnode *vp; 3942 struct vattr vattr; 3943 int error; 3944 3945 bwillinode(1); 3946 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3947 if ((error = nlookup(nd)) != 0) 3948 return (error); 3949 3950 if (nd->nl_nch.ncp->nc_vp) 3951 return (EEXIST); 3952 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3953 return (error); 3954 VATTR_NULL(&vattr); 3955 vattr.va_type = VDIR; 3956 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3957 3958 vp = NULL; 3959 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 3960 if (error == 0) 3961 vput(vp); 3962 return (error); 3963 } 3964 3965 /* 3966 * mkdir_args(char *path, int mode) 3967 * 3968 * Make a directory file. 3969 */ 3970 int 3971 sys_mkdir(struct mkdir_args *uap) 3972 { 3973 struct nlookupdata nd; 3974 int error; 3975 3976 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3977 if (error == 0) 3978 error = kern_mkdir(&nd, uap->mode); 3979 nlookup_done(&nd); 3980 return (error); 3981 } 3982 3983 /* 3984 * mkdirat_args(int fd, char *path, mode_t mode) 3985 * 3986 * Make a directory file. The path is relative to the directory associated 3987 * with fd. 3988 */ 3989 int 3990 sys_mkdirat(struct mkdirat_args *uap) 3991 { 3992 struct nlookupdata nd; 3993 struct file *fp; 3994 int error; 3995 3996 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3997 if (error == 0) 3998 error = kern_mkdir(&nd, uap->mode); 3999 nlookup_done_at(&nd, fp); 4000 return (error); 4001 } 4002 4003 int 4004 kern_rmdir(struct nlookupdata *nd) 4005 { 4006 int error; 4007 4008 bwillinode(1); 4009 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4010 if ((error = nlookup(nd)) != 0) 4011 return (error); 4012 4013 /* 4014 * Do not allow directories representing mount points to be 4015 * deleted, even if empty. Check write perms on mount point 4016 * in case the vnode is aliased (aka nullfs). 4017 */ 4018 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4019 return (EBUSY); 4020 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4021 return (error); 4022 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4023 return (error); 4024 } 4025 4026 /* 4027 * rmdir_args(char *path) 4028 * 4029 * Remove a directory file. 4030 */ 4031 int 4032 sys_rmdir(struct rmdir_args *uap) 4033 { 4034 struct nlookupdata nd; 4035 int error; 4036 4037 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4038 if (error == 0) 4039 error = kern_rmdir(&nd); 4040 nlookup_done(&nd); 4041 return (error); 4042 } 4043 4044 int 4045 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4046 enum uio_seg direction) 4047 { 4048 struct thread *td = curthread; 4049 struct proc *p = td->td_proc; 4050 struct vnode *vp; 4051 struct file *fp; 4052 struct uio auio; 4053 struct iovec aiov; 4054 off_t loff; 4055 int error, eofflag; 4056 4057 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 4058 return (error); 4059 if ((fp->f_flag & FREAD) == 0) { 4060 error = EBADF; 4061 goto done; 4062 } 4063 vp = (struct vnode *)fp->f_data; 4064 unionread: 4065 if (vp->v_type != VDIR) { 4066 error = EINVAL; 4067 goto done; 4068 } 4069 aiov.iov_base = buf; 4070 aiov.iov_len = count; 4071 auio.uio_iov = &aiov; 4072 auio.uio_iovcnt = 1; 4073 auio.uio_rw = UIO_READ; 4074 auio.uio_segflg = direction; 4075 auio.uio_td = td; 4076 auio.uio_resid = count; 4077 loff = auio.uio_offset = fp->f_offset; 4078 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4079 fp->f_offset = auio.uio_offset; 4080 if (error) 4081 goto done; 4082 if (count == auio.uio_resid) { 4083 if (union_dircheckp) { 4084 error = union_dircheckp(td, &vp, fp); 4085 if (error == -1) 4086 goto unionread; 4087 if (error) 4088 goto done; 4089 } 4090 #if 0 4091 if ((vp->v_flag & VROOT) && 4092 (vp->v_mount->mnt_flag & MNT_UNION)) { 4093 struct vnode *tvp = vp; 4094 vp = vp->v_mount->mnt_vnodecovered; 4095 vref(vp); 4096 fp->f_data = vp; 4097 fp->f_offset = 0; 4098 vrele(tvp); 4099 goto unionread; 4100 } 4101 #endif 4102 } 4103 4104 /* 4105 * WARNING! *basep may not be wide enough to accomodate the 4106 * seek offset. XXX should we hack this to return the upper 32 bits 4107 * for offsets greater then 4G? 4108 */ 4109 if (basep) { 4110 *basep = (long)loff; 4111 } 4112 *res = count - auio.uio_resid; 4113 done: 4114 fdrop(fp); 4115 return (error); 4116 } 4117 4118 /* 4119 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4120 * 4121 * Read a block of directory entries in a file system independent format. 4122 */ 4123 int 4124 sys_getdirentries(struct getdirentries_args *uap) 4125 { 4126 long base; 4127 int error; 4128 4129 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4130 &uap->sysmsg_result, UIO_USERSPACE); 4131 4132 if (error == 0 && uap->basep) 4133 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4134 return (error); 4135 } 4136 4137 /* 4138 * getdents_args(int fd, char *buf, size_t count) 4139 */ 4140 int 4141 sys_getdents(struct getdents_args *uap) 4142 { 4143 int error; 4144 4145 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4146 &uap->sysmsg_result, UIO_USERSPACE); 4147 4148 return (error); 4149 } 4150 4151 /* 4152 * Set the mode mask for creation of filesystem nodes. 4153 * 4154 * umask(int newmask) 4155 */ 4156 int 4157 sys_umask(struct umask_args *uap) 4158 { 4159 struct thread *td = curthread; 4160 struct proc *p = td->td_proc; 4161 struct filedesc *fdp; 4162 4163 fdp = p->p_fd; 4164 uap->sysmsg_result = fdp->fd_cmask; 4165 fdp->fd_cmask = uap->newmask & ALLPERMS; 4166 return (0); 4167 } 4168 4169 /* 4170 * revoke(char *path) 4171 * 4172 * Void all references to file by ripping underlying filesystem 4173 * away from vnode. 4174 */ 4175 int 4176 sys_revoke(struct revoke_args *uap) 4177 { 4178 struct nlookupdata nd; 4179 struct vattr vattr; 4180 struct vnode *vp; 4181 struct ucred *cred; 4182 int error; 4183 4184 vp = NULL; 4185 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4186 if (error == 0) 4187 error = nlookup(&nd); 4188 if (error == 0) 4189 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4190 cred = crhold(nd.nl_cred); 4191 nlookup_done(&nd); 4192 if (error == 0) { 4193 if (error == 0) 4194 error = VOP_GETATTR(vp, &vattr); 4195 if (error == 0 && cred->cr_uid != vattr.va_uid) 4196 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4197 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4198 if (vcount(vp) > 0) 4199 error = vrevoke(vp, cred); 4200 } else if (error == 0) { 4201 error = vrevoke(vp, cred); 4202 } 4203 vrele(vp); 4204 } 4205 if (cred) 4206 crfree(cred); 4207 return (error); 4208 } 4209 4210 /* 4211 * getfh_args(char *fname, fhandle_t *fhp) 4212 * 4213 * Get (NFS) file handle 4214 * 4215 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4216 * mount. This allows nullfs mounts to be explicitly exported. 4217 * 4218 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4219 * 4220 * nullfs mounts of subdirectories are not safe. That is, it will 4221 * work, but you do not really have protection against access to 4222 * the related parent directories. 4223 */ 4224 int 4225 sys_getfh(struct getfh_args *uap) 4226 { 4227 struct thread *td = curthread; 4228 struct nlookupdata nd; 4229 fhandle_t fh; 4230 struct vnode *vp; 4231 struct mount *mp; 4232 int error; 4233 4234 /* 4235 * Must be super user 4236 */ 4237 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4238 return (error); 4239 4240 vp = NULL; 4241 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4242 if (error == 0) 4243 error = nlookup(&nd); 4244 if (error == 0) 4245 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4246 mp = nd.nl_nch.mount; 4247 nlookup_done(&nd); 4248 if (error == 0) { 4249 bzero(&fh, sizeof(fh)); 4250 fh.fh_fsid = mp->mnt_stat.f_fsid; 4251 error = VFS_VPTOFH(vp, &fh.fh_fid); 4252 vput(vp); 4253 if (error == 0) 4254 error = copyout(&fh, uap->fhp, sizeof(fh)); 4255 } 4256 return (error); 4257 } 4258 4259 /* 4260 * fhopen_args(const struct fhandle *u_fhp, int flags) 4261 * 4262 * syscall for the rpc.lockd to use to translate a NFS file handle into 4263 * an open descriptor. 4264 * 4265 * warning: do not remove the priv_check() call or this becomes one giant 4266 * security hole. 4267 */ 4268 int 4269 sys_fhopen(struct fhopen_args *uap) 4270 { 4271 struct thread *td = curthread; 4272 struct filedesc *fdp = td->td_proc->p_fd; 4273 struct mount *mp; 4274 struct vnode *vp; 4275 struct fhandle fhp; 4276 struct vattr vat; 4277 struct vattr *vap = &vat; 4278 struct flock lf; 4279 int fmode, mode, error = 0, type; 4280 struct file *nfp; 4281 struct file *fp; 4282 int indx; 4283 4284 /* 4285 * Must be super user 4286 */ 4287 error = priv_check(td, PRIV_ROOT); 4288 if (error) 4289 return (error); 4290 4291 fmode = FFLAGS(uap->flags); 4292 4293 /* 4294 * Why not allow a non-read/write open for our lockd? 4295 */ 4296 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4297 return (EINVAL); 4298 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4299 if (error) 4300 return(error); 4301 4302 /* 4303 * Find the mount point 4304 */ 4305 mp = vfs_getvfs(&fhp.fh_fsid); 4306 if (mp == NULL) { 4307 error = ESTALE; 4308 goto done; 4309 } 4310 /* now give me my vnode, it gets returned to me locked */ 4311 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4312 if (error) 4313 goto done; 4314 /* 4315 * from now on we have to make sure not 4316 * to forget about the vnode 4317 * any error that causes an abort must vput(vp) 4318 * just set error = err and 'goto bad;'. 4319 */ 4320 4321 /* 4322 * from vn_open 4323 */ 4324 if (vp->v_type == VLNK) { 4325 error = EMLINK; 4326 goto bad; 4327 } 4328 if (vp->v_type == VSOCK) { 4329 error = EOPNOTSUPP; 4330 goto bad; 4331 } 4332 mode = 0; 4333 if (fmode & (FWRITE | O_TRUNC)) { 4334 if (vp->v_type == VDIR) { 4335 error = EISDIR; 4336 goto bad; 4337 } 4338 error = vn_writechk(vp, NULL); 4339 if (error) 4340 goto bad; 4341 mode |= VWRITE; 4342 } 4343 if (fmode & FREAD) 4344 mode |= VREAD; 4345 if (mode) { 4346 error = VOP_ACCESS(vp, mode, td->td_ucred); 4347 if (error) 4348 goto bad; 4349 } 4350 if (fmode & O_TRUNC) { 4351 vn_unlock(vp); /* XXX */ 4352 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4353 VATTR_NULL(vap); 4354 vap->va_size = 0; 4355 error = VOP_SETATTR(vp, vap, td->td_ucred); 4356 if (error) 4357 goto bad; 4358 } 4359 4360 /* 4361 * VOP_OPEN needs the file pointer so it can potentially override 4362 * it. 4363 * 4364 * WARNING! no f_nchandle will be associated when fhopen()ing a 4365 * directory. XXX 4366 */ 4367 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4368 goto bad; 4369 fp = nfp; 4370 4371 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4372 if (error) { 4373 /* 4374 * setting f_ops this way prevents VOP_CLOSE from being 4375 * called or fdrop() releasing the vp from v_data. Since 4376 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4377 */ 4378 fp->f_ops = &badfileops; 4379 fp->f_data = NULL; 4380 goto bad_drop; 4381 } 4382 4383 /* 4384 * The fp is given its own reference, we still have our ref and lock. 4385 * 4386 * Assert that all regular files must be created with a VM object. 4387 */ 4388 if (vp->v_type == VREG && vp->v_object == NULL) { 4389 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4390 goto bad_drop; 4391 } 4392 4393 /* 4394 * The open was successful. Handle any locking requirements. 4395 */ 4396 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4397 lf.l_whence = SEEK_SET; 4398 lf.l_start = 0; 4399 lf.l_len = 0; 4400 if (fmode & O_EXLOCK) 4401 lf.l_type = F_WRLCK; 4402 else 4403 lf.l_type = F_RDLCK; 4404 if (fmode & FNONBLOCK) 4405 type = 0; 4406 else 4407 type = F_WAIT; 4408 vn_unlock(vp); 4409 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4410 /* 4411 * release our private reference. 4412 */ 4413 fsetfd(fdp, NULL, indx); 4414 fdrop(fp); 4415 vrele(vp); 4416 goto done; 4417 } 4418 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4419 fp->f_flag |= FHASLOCK; 4420 } 4421 4422 /* 4423 * Clean up. Associate the file pointer with the previously 4424 * reserved descriptor and return it. 4425 */ 4426 vput(vp); 4427 fsetfd(fdp, fp, indx); 4428 fdrop(fp); 4429 uap->sysmsg_result = indx; 4430 if (uap->flags & O_CLOEXEC) 4431 error = fsetfdflags(fdp, indx, UF_EXCLOSE); 4432 return (error); 4433 4434 bad_drop: 4435 fsetfd(fdp, NULL, indx); 4436 fdrop(fp); 4437 bad: 4438 vput(vp); 4439 done: 4440 return (error); 4441 } 4442 4443 /* 4444 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4445 */ 4446 int 4447 sys_fhstat(struct fhstat_args *uap) 4448 { 4449 struct thread *td = curthread; 4450 struct stat sb; 4451 fhandle_t fh; 4452 struct mount *mp; 4453 struct vnode *vp; 4454 int error; 4455 4456 /* 4457 * Must be super user 4458 */ 4459 error = priv_check(td, PRIV_ROOT); 4460 if (error) 4461 return (error); 4462 4463 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4464 if (error) 4465 return (error); 4466 4467 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4468 error = ESTALE; 4469 if (error == 0) { 4470 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4471 error = vn_stat(vp, &sb, td->td_ucred); 4472 vput(vp); 4473 } 4474 } 4475 if (error == 0) 4476 error = copyout(&sb, uap->sb, sizeof(sb)); 4477 return (error); 4478 } 4479 4480 /* 4481 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4482 */ 4483 int 4484 sys_fhstatfs(struct fhstatfs_args *uap) 4485 { 4486 struct thread *td = curthread; 4487 struct proc *p = td->td_proc; 4488 struct statfs *sp; 4489 struct mount *mp; 4490 struct vnode *vp; 4491 struct statfs sb; 4492 char *fullpath, *freepath; 4493 fhandle_t fh; 4494 int error; 4495 4496 /* 4497 * Must be super user 4498 */ 4499 if ((error = priv_check(td, PRIV_ROOT))) 4500 return (error); 4501 4502 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4503 return (error); 4504 4505 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4506 error = ESTALE; 4507 goto done; 4508 } 4509 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4510 error = ESTALE; 4511 goto done; 4512 } 4513 4514 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4515 goto done; 4516 mp = vp->v_mount; 4517 sp = &mp->mnt_stat; 4518 vput(vp); 4519 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4520 goto done; 4521 4522 error = mount_path(p, mp, &fullpath, &freepath); 4523 if (error) 4524 goto done; 4525 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4526 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4527 kfree(freepath, M_TEMP); 4528 4529 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4530 if (priv_check(td, PRIV_ROOT)) { 4531 bcopy(sp, &sb, sizeof(sb)); 4532 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4533 sp = &sb; 4534 } 4535 error = copyout(sp, uap->buf, sizeof(*sp)); 4536 done: 4537 return (error); 4538 } 4539 4540 /* 4541 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4542 */ 4543 int 4544 sys_fhstatvfs(struct fhstatvfs_args *uap) 4545 { 4546 struct thread *td = curthread; 4547 struct proc *p = td->td_proc; 4548 struct statvfs *sp; 4549 struct mount *mp; 4550 struct vnode *vp; 4551 fhandle_t fh; 4552 int error; 4553 4554 /* 4555 * Must be super user 4556 */ 4557 if ((error = priv_check(td, PRIV_ROOT))) 4558 return (error); 4559 4560 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4561 return (error); 4562 4563 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4564 error = ESTALE; 4565 goto done; 4566 } 4567 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4568 error = ESTALE; 4569 goto done; 4570 } 4571 4572 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4573 goto done; 4574 mp = vp->v_mount; 4575 sp = &mp->mnt_vstat; 4576 vput(vp); 4577 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4578 goto done; 4579 4580 sp->f_flag = 0; 4581 if (mp->mnt_flag & MNT_RDONLY) 4582 sp->f_flag |= ST_RDONLY; 4583 if (mp->mnt_flag & MNT_NOSUID) 4584 sp->f_flag |= ST_NOSUID; 4585 error = copyout(sp, uap->buf, sizeof(*sp)); 4586 done: 4587 return (error); 4588 } 4589 4590 4591 /* 4592 * Syscall to push extended attribute configuration information into the 4593 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4594 * a command (int cmd), and attribute name and misc data. For now, the 4595 * attribute name is left in userspace for consumption by the VFS_op. 4596 * It will probably be changed to be copied into sysspace by the 4597 * syscall in the future, once issues with various consumers of the 4598 * attribute code have raised their hands. 4599 * 4600 * Currently this is used only by UFS Extended Attributes. 4601 */ 4602 int 4603 sys_extattrctl(struct extattrctl_args *uap) 4604 { 4605 struct nlookupdata nd; 4606 struct vnode *vp; 4607 char attrname[EXTATTR_MAXNAMELEN]; 4608 int error; 4609 size_t size; 4610 4611 attrname[0] = 0; 4612 vp = NULL; 4613 error = 0; 4614 4615 if (error == 0 && uap->filename) { 4616 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4617 NLC_FOLLOW); 4618 if (error == 0) 4619 error = nlookup(&nd); 4620 if (error == 0) 4621 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4622 nlookup_done(&nd); 4623 } 4624 4625 if (error == 0 && uap->attrname) { 4626 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4627 &size); 4628 } 4629 4630 if (error == 0) { 4631 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4632 if (error == 0) 4633 error = nlookup(&nd); 4634 if (error == 0) 4635 error = ncp_writechk(&nd.nl_nch); 4636 if (error == 0) { 4637 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4638 uap->attrnamespace, 4639 uap->attrname, nd.nl_cred); 4640 } 4641 nlookup_done(&nd); 4642 } 4643 4644 return (error); 4645 } 4646 4647 /* 4648 * Syscall to get a named extended attribute on a file or directory. 4649 */ 4650 int 4651 sys_extattr_set_file(struct extattr_set_file_args *uap) 4652 { 4653 char attrname[EXTATTR_MAXNAMELEN]; 4654 struct nlookupdata nd; 4655 struct vnode *vp; 4656 struct uio auio; 4657 struct iovec aiov; 4658 int error; 4659 4660 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4661 if (error) 4662 return (error); 4663 4664 vp = NULL; 4665 4666 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4667 if (error == 0) 4668 error = nlookup(&nd); 4669 if (error == 0) 4670 error = ncp_writechk(&nd.nl_nch); 4671 if (error == 0) 4672 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4673 if (error) { 4674 nlookup_done(&nd); 4675 return (error); 4676 } 4677 4678 bzero(&auio, sizeof(auio)); 4679 aiov.iov_base = uap->data; 4680 aiov.iov_len = uap->nbytes; 4681 auio.uio_iov = &aiov; 4682 auio.uio_iovcnt = 1; 4683 auio.uio_offset = 0; 4684 auio.uio_resid = uap->nbytes; 4685 auio.uio_rw = UIO_WRITE; 4686 auio.uio_td = curthread; 4687 4688 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4689 &auio, nd.nl_cred); 4690 4691 vput(vp); 4692 nlookup_done(&nd); 4693 return (error); 4694 } 4695 4696 /* 4697 * Syscall to get a named extended attribute on a file or directory. 4698 */ 4699 int 4700 sys_extattr_get_file(struct extattr_get_file_args *uap) 4701 { 4702 char attrname[EXTATTR_MAXNAMELEN]; 4703 struct nlookupdata nd; 4704 struct uio auio; 4705 struct iovec aiov; 4706 struct vnode *vp; 4707 int error; 4708 4709 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4710 if (error) 4711 return (error); 4712 4713 vp = NULL; 4714 4715 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4716 if (error == 0) 4717 error = nlookup(&nd); 4718 if (error == 0) 4719 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4720 if (error) { 4721 nlookup_done(&nd); 4722 return (error); 4723 } 4724 4725 bzero(&auio, sizeof(auio)); 4726 aiov.iov_base = uap->data; 4727 aiov.iov_len = uap->nbytes; 4728 auio.uio_iov = &aiov; 4729 auio.uio_iovcnt = 1; 4730 auio.uio_offset = 0; 4731 auio.uio_resid = uap->nbytes; 4732 auio.uio_rw = UIO_READ; 4733 auio.uio_td = curthread; 4734 4735 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4736 &auio, nd.nl_cred); 4737 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4738 4739 vput(vp); 4740 nlookup_done(&nd); 4741 return(error); 4742 } 4743 4744 /* 4745 * Syscall to delete a named extended attribute from a file or directory. 4746 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4747 */ 4748 int 4749 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4750 { 4751 char attrname[EXTATTR_MAXNAMELEN]; 4752 struct nlookupdata nd; 4753 struct vnode *vp; 4754 int error; 4755 4756 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4757 if (error) 4758 return(error); 4759 4760 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4761 if (error == 0) 4762 error = nlookup(&nd); 4763 if (error == 0) 4764 error = ncp_writechk(&nd.nl_nch); 4765 if (error == 0) { 4766 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4767 if (error == 0) { 4768 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4769 attrname, NULL, nd.nl_cred); 4770 vput(vp); 4771 } 4772 } 4773 nlookup_done(&nd); 4774 return(error); 4775 } 4776 4777 /* 4778 * Determine if the mount is visible to the process. 4779 */ 4780 static int 4781 chroot_visible_mnt(struct mount *mp, struct proc *p) 4782 { 4783 struct nchandle nch; 4784 4785 /* 4786 * Traverse from the mount point upwards. If we hit the process 4787 * root then the mount point is visible to the process. 4788 */ 4789 nch = mp->mnt_ncmountpt; 4790 while (nch.ncp) { 4791 if (nch.mount == p->p_fd->fd_nrdir.mount && 4792 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4793 return(1); 4794 } 4795 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4796 nch = nch.mount->mnt_ncmounton; 4797 } else { 4798 nch.ncp = nch.ncp->nc_parent; 4799 } 4800 } 4801 4802 /* 4803 * If the mount point is not visible to the process, but the 4804 * process root is in a subdirectory of the mount, return 4805 * TRUE anyway. 4806 */ 4807 if (p->p_fd->fd_nrdir.mount == mp) 4808 return(1); 4809 4810 return(0); 4811 } 4812 4813