1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.135 2008/11/11 00:55:49 pavalos Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/conf.h> 47 #include <sys/sysent.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mountctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/filedesc.h> 53 #include <sys/kernel.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/linker.h> 57 #include <sys/stat.h> 58 #include <sys/unistd.h> 59 #include <sys/vnode.h> 60 #include <sys/proc.h> 61 #include <sys/namei.h> 62 #include <sys/nlookup.h> 63 #include <sys/dirent.h> 64 #include <sys/extattr.h> 65 #include <sys/spinlock.h> 66 #include <sys/kern_syscall.h> 67 #include <sys/objcache.h> 68 #include <sys/sysctl.h> 69 70 #include <sys/buf2.h> 71 #include <sys/file2.h> 72 #include <sys/spinlock2.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_object.h> 76 #include <vm/vm_page.h> 77 78 #include <machine/limits.h> 79 #include <machine/stdarg.h> 80 81 #include <vfs/union/union.h> 82 83 static void mount_warning(struct mount *mp, const char *ctl, ...); 84 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 85 static int checkvp_chdir (struct vnode *vn, struct thread *td); 86 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 87 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 88 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 89 static int getutimes (const struct timeval *, struct timespec *); 90 static int setfown (struct vnode *, uid_t, gid_t); 91 static int setfmode (struct vnode *, int); 92 static int setfflags (struct vnode *, int); 93 static int setutimes (struct vnode *, const struct timespec *, int); 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 96 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 97 98 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 99 100 /* 101 * Virtual File System System Calls 102 */ 103 104 /* 105 * Mount a file system. 106 */ 107 /* 108 * mount_args(char *type, char *path, int flags, caddr_t data) 109 */ 110 /* ARGSUSED */ 111 int 112 sys_mount(struct mount_args *uap) 113 { 114 struct thread *td = curthread; 115 struct proc *p = td->td_proc; 116 struct vnode *vp; 117 struct nchandle nch; 118 struct mount *mp; 119 struct vfsconf *vfsp; 120 int error, flag = 0, flag2 = 0; 121 int hasmount; 122 struct vattr va; 123 struct nlookupdata nd; 124 char fstypename[MFSNAMELEN]; 125 struct ucred *cred = p->p_ucred; 126 127 KKASSERT(p); 128 if (cred->cr_prison != NULL) 129 return (EPERM); 130 if (usermount == 0 && (error = suser(td))) 131 return (error); 132 /* 133 * Do not allow NFS export by non-root users. 134 */ 135 if (uap->flags & MNT_EXPORTED) { 136 error = suser(td); 137 if (error) 138 return (error); 139 } 140 /* 141 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 142 */ 143 if (suser(td)) 144 uap->flags |= MNT_NOSUID | MNT_NODEV; 145 146 /* 147 * Lookup the requested path and extract the nch and vnode. 148 */ 149 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 150 if (error == 0) { 151 if ((error = nlookup(&nd)) == 0) { 152 if (nd.nl_nch.ncp->nc_vp == NULL) 153 error = ENOENT; 154 } 155 } 156 if (error) { 157 nlookup_done(&nd); 158 return (error); 159 } 160 161 /* 162 * Extract the locked+refd ncp and cleanup the nd structure 163 */ 164 nch = nd.nl_nch; 165 cache_zero(&nd.nl_nch); 166 nlookup_done(&nd); 167 168 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch)) 169 hasmount = 1; 170 else 171 hasmount = 0; 172 173 174 /* 175 * now we have the locked ref'd nch and unreferenced vnode. 176 */ 177 vp = nch.ncp->nc_vp; 178 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 179 cache_put(&nch); 180 return (error); 181 } 182 cache_unlock(&nch); 183 184 /* 185 * Now we have an unlocked ref'd nch and a locked ref'd vp 186 */ 187 if (uap->flags & MNT_UPDATE) { 188 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 189 cache_drop(&nch); 190 vput(vp); 191 return (EINVAL); 192 } 193 mp = vp->v_mount; 194 flag = mp->mnt_flag; 195 flag2 = mp->mnt_kern_flag; 196 /* 197 * We only allow the filesystem to be reloaded if it 198 * is currently mounted read-only. 199 */ 200 if ((uap->flags & MNT_RELOAD) && 201 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 202 cache_drop(&nch); 203 vput(vp); 204 return (EOPNOTSUPP); /* Needs translation */ 205 } 206 /* 207 * Only root, or the user that did the original mount is 208 * permitted to update it. 209 */ 210 if (mp->mnt_stat.f_owner != cred->cr_uid && 211 (error = suser(td))) { 212 cache_drop(&nch); 213 vput(vp); 214 return (error); 215 } 216 if (vfs_busy(mp, LK_NOWAIT)) { 217 cache_drop(&nch); 218 vput(vp); 219 return (EBUSY); 220 } 221 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 222 cache_drop(&nch); 223 vfs_unbusy(mp); 224 vput(vp); 225 return (EBUSY); 226 } 227 vp->v_flag |= VMOUNT; 228 mp->mnt_flag |= 229 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 230 vn_unlock(vp); 231 goto update; 232 } 233 /* 234 * If the user is not root, ensure that they own the directory 235 * onto which we are attempting to mount. 236 */ 237 if ((error = VOP_GETATTR(vp, &va)) || 238 (va.va_uid != cred->cr_uid && (error = suser(td)))) { 239 cache_drop(&nch); 240 vput(vp); 241 return (error); 242 } 243 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 244 cache_drop(&nch); 245 vput(vp); 246 return (error); 247 } 248 if (vp->v_type != VDIR) { 249 cache_drop(&nch); 250 vput(vp); 251 return (ENOTDIR); 252 } 253 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 254 cache_drop(&nch); 255 vput(vp); 256 return (EPERM); 257 } 258 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 259 cache_drop(&nch); 260 vput(vp); 261 return (error); 262 } 263 vfsp = vfsconf_find_by_name(fstypename); 264 if (vfsp == NULL) { 265 linker_file_t lf; 266 267 /* Only load modules for root (very important!) */ 268 if ((error = suser(td)) != 0) { 269 cache_drop(&nch); 270 vput(vp); 271 return error; 272 } 273 error = linker_load_file(fstypename, &lf); 274 if (error || lf == NULL) { 275 cache_drop(&nch); 276 vput(vp); 277 if (lf == NULL) 278 error = ENODEV; 279 return error; 280 } 281 lf->userrefs++; 282 /* lookup again, see if the VFS was loaded */ 283 vfsp = vfsconf_find_by_name(fstypename); 284 if (vfsp == NULL) { 285 lf->userrefs--; 286 linker_file_unload(lf); 287 cache_drop(&nch); 288 vput(vp); 289 return (ENODEV); 290 } 291 } 292 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 293 cache_drop(&nch); 294 vput(vp); 295 return (EBUSY); 296 } 297 vp->v_flag |= VMOUNT; 298 299 /* 300 * Allocate and initialize the filesystem. 301 */ 302 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 303 TAILQ_INIT(&mp->mnt_nvnodelist); 304 TAILQ_INIT(&mp->mnt_reservedvnlist); 305 TAILQ_INIT(&mp->mnt_jlist); 306 mp->mnt_nvnodelistsize = 0; 307 lockinit(&mp->mnt_lock, "vfslock", 0, 0); 308 vfs_busy(mp, LK_NOWAIT); 309 mp->mnt_op = vfsp->vfc_vfsops; 310 mp->mnt_vfc = vfsp; 311 vfsp->vfc_refcount++; 312 mp->mnt_stat.f_type = vfsp->vfc_typenum; 313 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 314 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 315 mp->mnt_stat.f_owner = cred->cr_uid; 316 mp->mnt_iosize_max = DFLTPHYS; 317 vn_unlock(vp); 318 update: 319 /* 320 * Set the mount level flags. 321 */ 322 if (uap->flags & MNT_RDONLY) 323 mp->mnt_flag |= MNT_RDONLY; 324 else if (mp->mnt_flag & MNT_RDONLY) 325 mp->mnt_kern_flag |= MNTK_WANTRDWR; 326 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 327 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 328 MNT_NOSYMFOLLOW | MNT_IGNORE | 329 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 330 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 331 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 332 MNT_NOSYMFOLLOW | MNT_IGNORE | 333 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 334 /* 335 * Mount the filesystem. 336 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 337 * get. 338 */ 339 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 340 if (mp->mnt_flag & MNT_UPDATE) { 341 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 342 mp->mnt_flag &= ~MNT_RDONLY; 343 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 344 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 345 if (error) { 346 mp->mnt_flag = flag; 347 mp->mnt_kern_flag = flag2; 348 } 349 vfs_unbusy(mp); 350 vp->v_flag &= ~VMOUNT; 351 vrele(vp); 352 cache_drop(&nch); 353 return (error); 354 } 355 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 356 /* 357 * Put the new filesystem on the mount list after root. The mount 358 * point gets its own mnt_ncmountpt (unless the VFS already set one 359 * up) which represents the root of the mount. The lookup code 360 * detects the mount point going forward and checks the root of 361 * the mount going backwards. 362 * 363 * It is not necessary to invalidate or purge the vnode underneath 364 * because elements under the mount will be given their own glue 365 * namecache record. 366 */ 367 if (!error) { 368 if (mp->mnt_ncmountpt.ncp == NULL) { 369 /* 370 * allocate, then unlock, but leave the ref intact 371 */ 372 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 373 cache_unlock(&mp->mnt_ncmountpt); 374 } 375 mp->mnt_ncmounton = nch; /* inherits ref */ 376 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 377 378 /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ 379 vp->v_flag &= ~VMOUNT; 380 mountlist_insert(mp, MNTINS_LAST); 381 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 382 vn_unlock(vp); 383 error = vfs_allocate_syncvnode(mp); 384 vfs_unbusy(mp); 385 error = VFS_START(mp, 0); 386 vrele(vp); 387 } else { 388 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 389 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 390 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 391 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 392 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 393 vp->v_flag &= ~VMOUNT; 394 mp->mnt_vfc->vfc_refcount--; 395 vfs_unbusy(mp); 396 kfree(mp, M_MOUNT); 397 cache_drop(&nch); 398 vput(vp); 399 } 400 return (error); 401 } 402 403 /* 404 * Scan all active processes to see if any of them have a current 405 * or root directory onto which the new filesystem has just been 406 * mounted. If so, replace them with the new mount point. 407 * 408 * The passed ncp is ref'd and locked (from the mount code) and 409 * must be associated with the vnode representing the root of the 410 * mount point. 411 */ 412 struct checkdirs_info { 413 struct nchandle old_nch; 414 struct nchandle new_nch; 415 struct vnode *old_vp; 416 struct vnode *new_vp; 417 }; 418 419 static int checkdirs_callback(struct proc *p, void *data); 420 421 static void 422 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 423 { 424 struct checkdirs_info info; 425 struct vnode *olddp; 426 struct vnode *newdp; 427 struct mount *mp; 428 429 /* 430 * If the old mount point's vnode has a usecount of 1, it is not 431 * being held as a descriptor anywhere. 432 */ 433 olddp = old_nch->ncp->nc_vp; 434 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 435 return; 436 437 /* 438 * Force the root vnode of the new mount point to be resolved 439 * so we can update any matching processes. 440 */ 441 mp = new_nch->mount; 442 if (VFS_ROOT(mp, &newdp)) 443 panic("mount: lost mount"); 444 cache_setunresolved(new_nch); 445 cache_setvp(new_nch, newdp); 446 447 /* 448 * Special handling of the root node 449 */ 450 if (rootvnode == olddp) { 451 vref(newdp); 452 vfs_cache_setroot(newdp, cache_hold(new_nch)); 453 } 454 455 /* 456 * Pass newdp separately so the callback does not have to access 457 * it via new_nch->ncp->nc_vp. 458 */ 459 info.old_nch = *old_nch; 460 info.new_nch = *new_nch; 461 info.new_vp = newdp; 462 allproc_scan(checkdirs_callback, &info); 463 vput(newdp); 464 } 465 466 /* 467 * NOTE: callback is not MP safe because the scanned process's filedesc 468 * structure can be ripped out from under us, amoung other things. 469 */ 470 static int 471 checkdirs_callback(struct proc *p, void *data) 472 { 473 struct checkdirs_info *info = data; 474 struct filedesc *fdp; 475 struct nchandle ncdrop1; 476 struct nchandle ncdrop2; 477 struct vnode *vprele1; 478 struct vnode *vprele2; 479 480 if ((fdp = p->p_fd) != NULL) { 481 cache_zero(&ncdrop1); 482 cache_zero(&ncdrop2); 483 vprele1 = NULL; 484 vprele2 = NULL; 485 486 /* 487 * MPUNSAFE - XXX fdp can be pulled out from under a 488 * foreign process. 489 * 490 * A shared filedesc is ok, we don't have to copy it 491 * because we are making this change globally. 492 */ 493 spin_lock_wr(&fdp->fd_spin); 494 if (fdp->fd_ncdir.mount == info->old_nch.mount && 495 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 496 vprele1 = fdp->fd_cdir; 497 vref(info->new_vp); 498 fdp->fd_cdir = info->new_vp; 499 ncdrop1 = fdp->fd_ncdir; 500 cache_copy(&info->new_nch, &fdp->fd_ncdir); 501 } 502 if (fdp->fd_nrdir.mount == info->old_nch.mount && 503 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 504 vprele2 = fdp->fd_rdir; 505 vref(info->new_vp); 506 fdp->fd_rdir = info->new_vp; 507 ncdrop2 = fdp->fd_nrdir; 508 cache_copy(&info->new_nch, &fdp->fd_nrdir); 509 } 510 spin_unlock_wr(&fdp->fd_spin); 511 if (ncdrop1.ncp) 512 cache_drop(&ncdrop1); 513 if (ncdrop2.ncp) 514 cache_drop(&ncdrop2); 515 if (vprele1) 516 vrele(vprele1); 517 if (vprele2) 518 vrele(vprele2); 519 } 520 return(0); 521 } 522 523 /* 524 * Unmount a file system. 525 * 526 * Note: unmount takes a path to the vnode mounted on as argument, 527 * not special file (as before). 528 */ 529 /* 530 * umount_args(char *path, int flags) 531 */ 532 /* ARGSUSED */ 533 int 534 sys_unmount(struct unmount_args *uap) 535 { 536 struct thread *td = curthread; 537 struct proc *p = td->td_proc; 538 struct mount *mp = NULL; 539 int error; 540 struct nlookupdata nd; 541 542 KKASSERT(p); 543 if (p->p_ucred->cr_prison != NULL) 544 return (EPERM); 545 if (usermount == 0 && (error = suser(td))) 546 return (error); 547 548 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 549 if (error == 0) 550 error = nlookup(&nd); 551 if (error) 552 goto out; 553 554 mp = nd.nl_nch.mount; 555 556 /* 557 * Only root, or the user that did the original mount is 558 * permitted to unmount this filesystem. 559 */ 560 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && 561 (error = suser(td))) 562 goto out; 563 564 /* 565 * Don't allow unmounting the root file system. 566 */ 567 if (mp->mnt_flag & MNT_ROOTFS) { 568 error = EINVAL; 569 goto out; 570 } 571 572 /* 573 * Must be the root of the filesystem 574 */ 575 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 576 error = EINVAL; 577 goto out; 578 } 579 580 out: 581 nlookup_done(&nd); 582 if (error) 583 return (error); 584 return (dounmount(mp, uap->flags)); 585 } 586 587 /* 588 * Do the actual file system unmount. 589 */ 590 static int 591 dounmount_interlock(struct mount *mp) 592 { 593 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 594 return (EBUSY); 595 mp->mnt_kern_flag |= MNTK_UNMOUNT; 596 return(0); 597 } 598 599 int 600 dounmount(struct mount *mp, int flags) 601 { 602 struct namecache *ncp; 603 struct nchandle nch; 604 struct vnode *vp; 605 int error; 606 int async_flag; 607 int lflags; 608 int freeok = 1; 609 610 /* 611 * Exclusive access for unmounting purposes 612 */ 613 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 614 return (error); 615 616 /* 617 * Allow filesystems to detect that a forced unmount is in progress. 618 */ 619 if (flags & MNT_FORCE) 620 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 621 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 622 error = lockmgr(&mp->mnt_lock, lflags); 623 if (error) { 624 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 625 if (mp->mnt_kern_flag & MNTK_MWAIT) 626 wakeup(mp); 627 return (error); 628 } 629 630 if (mp->mnt_flag & MNT_EXPUBLIC) 631 vfs_setpublicfs(NULL, NULL, NULL); 632 633 vfs_msync(mp, MNT_WAIT); 634 async_flag = mp->mnt_flag & MNT_ASYNC; 635 mp->mnt_flag &=~ MNT_ASYNC; 636 637 /* 638 * If this filesystem isn't aliasing other filesystems, 639 * try to invalidate any remaining namecache entries and 640 * check the count afterwords. 641 */ 642 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 643 cache_lock(&mp->mnt_ncmountpt); 644 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 645 cache_unlock(&mp->mnt_ncmountpt); 646 647 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 648 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 649 650 if ((flags & MNT_FORCE) == 0) { 651 error = EBUSY; 652 mount_warning(mp, "Cannot unmount: " 653 "%d namecache " 654 "references still " 655 "present", 656 ncp->nc_refs - 1); 657 } else { 658 mount_warning(mp, "Forced unmount: " 659 "%d namecache " 660 "references still " 661 "present", 662 ncp->nc_refs - 1); 663 freeok = 0; 664 } 665 } 666 } 667 668 /* 669 * nchandle records ref the mount structure. Expect a count of 1 670 * (our mount->mnt_ncmountpt). 671 */ 672 if (mp->mnt_refs != 1) { 673 if ((flags & MNT_FORCE) == 0) { 674 mount_warning(mp, "Cannot unmount: " 675 "%d process references still " 676 "present", mp->mnt_refs); 677 error = EBUSY; 678 } else { 679 mount_warning(mp, "Forced unmount: " 680 "%d process references still " 681 "present", mp->mnt_refs); 682 freeok = 0; 683 } 684 } 685 686 /* 687 * Decomission our special mnt_syncer vnode. This also stops 688 * the vnlru code. If we are unable to unmount we recommission 689 * the vnode. 690 */ 691 if (error == 0) { 692 if ((vp = mp->mnt_syncer) != NULL) { 693 mp->mnt_syncer = NULL; 694 vrele(vp); 695 } 696 if (((mp->mnt_flag & MNT_RDONLY) || 697 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 698 (flags & MNT_FORCE)) { 699 error = VFS_UNMOUNT(mp, flags); 700 } 701 } 702 if (error) { 703 if (mp->mnt_syncer == NULL) 704 vfs_allocate_syncvnode(mp); 705 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 706 mp->mnt_flag |= async_flag; 707 lockmgr(&mp->mnt_lock, LK_RELEASE); 708 if (mp->mnt_kern_flag & MNTK_MWAIT) 709 wakeup(mp); 710 return (error); 711 } 712 /* 713 * Clean up any journals still associated with the mount after 714 * filesystem activity has ceased. 715 */ 716 journal_remove_all_journals(mp, 717 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 718 719 mountlist_remove(mp); 720 721 /* 722 * Remove any installed vnode ops here so the individual VFSs don't 723 * have to. 724 */ 725 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 726 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 727 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 728 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 729 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 730 731 if (mp->mnt_ncmountpt.ncp != NULL) { 732 nch = mp->mnt_ncmountpt; 733 cache_zero(&mp->mnt_ncmountpt); 734 cache_clrmountpt(&nch); 735 cache_drop(&nch); 736 } 737 if (mp->mnt_ncmounton.ncp != NULL) { 738 nch = mp->mnt_ncmounton; 739 cache_zero(&mp->mnt_ncmounton); 740 cache_clrmountpt(&nch); 741 cache_drop(&nch); 742 } 743 744 mp->mnt_vfc->vfc_refcount--; 745 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 746 panic("unmount: dangling vnode"); 747 lockmgr(&mp->mnt_lock, LK_RELEASE); 748 if (mp->mnt_kern_flag & MNTK_MWAIT) 749 wakeup(mp); 750 if (freeok) 751 kfree(mp, M_MOUNT); 752 return (0); 753 } 754 755 static 756 void 757 mount_warning(struct mount *mp, const char *ctl, ...) 758 { 759 char *ptr; 760 char *buf; 761 __va_list va; 762 763 __va_start(va, ctl); 764 if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf) == 0) { 765 kprintf("unmount(%s): ", ptr); 766 kvprintf(ctl, va); 767 kprintf("\n"); 768 kfree(buf, M_TEMP); 769 } else { 770 kprintf("unmount(%p", mp); 771 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 772 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 773 kprintf("): "); 774 kvprintf(ctl, va); 775 kprintf("\n"); 776 } 777 __va_end(va); 778 } 779 780 /* 781 * Shim cache_fullpath() to handle the case where a process is chrooted into 782 * a subdirectory of a mount. In this case if the root mount matches the 783 * process root directory's mount we have to specify the process's root 784 * directory instead of the mount point, because the mount point might 785 * be above the root directory. 786 */ 787 static 788 int 789 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 790 { 791 struct nchandle *nch; 792 793 if (p && p->p_fd->fd_nrdir.mount == mp) 794 nch = &p->p_fd->fd_nrdir; 795 else 796 nch = &mp->mnt_ncmountpt; 797 return(cache_fullpath(p, nch, rb, fb)); 798 } 799 800 /* 801 * Sync each mounted filesystem. 802 */ 803 804 #ifdef DEBUG 805 static int syncprt = 0; 806 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 807 #endif /* DEBUG */ 808 809 static int sync_callback(struct mount *mp, void *data); 810 811 /* ARGSUSED */ 812 int 813 sys_sync(struct sync_args *uap) 814 { 815 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 816 #ifdef DEBUG 817 /* 818 * print out buffer pool stat information on each sync() call. 819 */ 820 if (syncprt) 821 vfs_bufstats(); 822 #endif /* DEBUG */ 823 return (0); 824 } 825 826 static 827 int 828 sync_callback(struct mount *mp, void *data __unused) 829 { 830 int asyncflag; 831 832 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 833 asyncflag = mp->mnt_flag & MNT_ASYNC; 834 mp->mnt_flag &= ~MNT_ASYNC; 835 vfs_msync(mp, MNT_NOWAIT); 836 VFS_SYNC(mp, MNT_NOWAIT); 837 mp->mnt_flag |= asyncflag; 838 } 839 return(0); 840 } 841 842 /* XXX PRISON: could be per prison flag */ 843 static int prison_quotas; 844 #if 0 845 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 846 #endif 847 848 /* 849 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 850 * 851 * Change filesystem quotas. 852 */ 853 /* ARGSUSED */ 854 int 855 sys_quotactl(struct quotactl_args *uap) 856 { 857 struct nlookupdata nd; 858 struct thread *td; 859 struct proc *p; 860 struct mount *mp; 861 int error; 862 863 td = curthread; 864 p = td->td_proc; 865 if (p->p_ucred->cr_prison && !prison_quotas) 866 return (EPERM); 867 868 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 869 if (error == 0) 870 error = nlookup(&nd); 871 if (error == 0) { 872 mp = nd.nl_nch.mount; 873 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 874 uap->arg, nd.nl_cred); 875 } 876 nlookup_done(&nd); 877 return (error); 878 } 879 880 /* 881 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 882 * void *buf, int buflen) 883 * 884 * This function operates on a mount point and executes the specified 885 * operation using the specified control data, and possibly returns data. 886 * 887 * The actual number of bytes stored in the result buffer is returned, 0 888 * if none, otherwise an error is returned. 889 */ 890 /* ARGSUSED */ 891 int 892 sys_mountctl(struct mountctl_args *uap) 893 { 894 struct thread *td = curthread; 895 struct proc *p = td->td_proc; 896 struct file *fp; 897 void *ctl = NULL; 898 void *buf = NULL; 899 char *path = NULL; 900 int error; 901 902 /* 903 * Sanity and permissions checks. We must be root. 904 */ 905 KKASSERT(p); 906 if (p->p_ucred->cr_prison != NULL) 907 return (EPERM); 908 if ((error = suser(td)) != 0) 909 return (error); 910 911 /* 912 * Argument length checks 913 */ 914 if (uap->ctllen < 0 || uap->ctllen > 1024) 915 return (EINVAL); 916 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 917 return (EINVAL); 918 if (uap->path == NULL) 919 return (EINVAL); 920 921 /* 922 * Allocate the necessary buffers and copyin data 923 */ 924 path = objcache_get(namei_oc, M_WAITOK); 925 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 926 if (error) 927 goto done; 928 929 if (uap->ctllen) { 930 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 931 error = copyin(uap->ctl, ctl, uap->ctllen); 932 if (error) 933 goto done; 934 } 935 if (uap->buflen) 936 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 937 938 /* 939 * Validate the descriptor 940 */ 941 if (uap->fd >= 0) { 942 fp = holdfp(p->p_fd, uap->fd, -1); 943 if (fp == NULL) { 944 error = EBADF; 945 goto done; 946 } 947 } else { 948 fp = NULL; 949 } 950 951 /* 952 * Execute the internal kernel function and clean up. 953 */ 954 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 955 if (fp) 956 fdrop(fp); 957 if (error == 0 && uap->sysmsg_result > 0) 958 error = copyout(buf, uap->buf, uap->sysmsg_result); 959 done: 960 if (path) 961 objcache_put(namei_oc, path); 962 if (ctl) 963 kfree(ctl, M_TEMP); 964 if (buf) 965 kfree(buf, M_TEMP); 966 return (error); 967 } 968 969 /* 970 * Execute a mount control operation by resolving the path to a mount point 971 * and calling vop_mountctl(). 972 * 973 * Use the mount point from the nch instead of the vnode so nullfs mounts 974 * can properly spike the VOP. 975 */ 976 int 977 kern_mountctl(const char *path, int op, struct file *fp, 978 const void *ctl, int ctllen, 979 void *buf, int buflen, int *res) 980 { 981 struct vnode *vp; 982 struct mount *mp; 983 struct nlookupdata nd; 984 int error; 985 986 *res = 0; 987 vp = NULL; 988 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 989 if (error == 0) 990 error = nlookup(&nd); 991 if (error == 0) 992 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 993 mp = nd.nl_nch.mount; 994 nlookup_done(&nd); 995 if (error) 996 return (error); 997 998 /* 999 * Must be the root of the filesystem 1000 */ 1001 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1002 vput(vp); 1003 return (EINVAL); 1004 } 1005 error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen, 1006 buf, buflen, res); 1007 vput(vp); 1008 return (error); 1009 } 1010 1011 int 1012 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1013 { 1014 struct thread *td = curthread; 1015 struct proc *p = td->td_proc; 1016 struct mount *mp; 1017 struct statfs *sp; 1018 char *fullpath, *freepath; 1019 int error; 1020 1021 if ((error = nlookup(nd)) != 0) 1022 return (error); 1023 mp = nd->nl_nch.mount; 1024 sp = &mp->mnt_stat; 1025 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1026 return (error); 1027 1028 error = mount_path(p, mp, &fullpath, &freepath); 1029 if (error) 1030 return(error); 1031 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1032 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1033 kfree(freepath, M_TEMP); 1034 1035 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1036 bcopy(sp, buf, sizeof(*buf)); 1037 /* Only root should have access to the fsid's. */ 1038 if (suser(td)) 1039 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1040 return (0); 1041 } 1042 1043 /* 1044 * statfs_args(char *path, struct statfs *buf) 1045 * 1046 * Get filesystem statistics. 1047 */ 1048 int 1049 sys_statfs(struct statfs_args *uap) 1050 { 1051 struct nlookupdata nd; 1052 struct statfs buf; 1053 int error; 1054 1055 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1056 if (error == 0) 1057 error = kern_statfs(&nd, &buf); 1058 nlookup_done(&nd); 1059 if (error == 0) 1060 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1061 return (error); 1062 } 1063 1064 int 1065 kern_fstatfs(int fd, struct statfs *buf) 1066 { 1067 struct thread *td = curthread; 1068 struct proc *p = td->td_proc; 1069 struct file *fp; 1070 struct mount *mp; 1071 struct statfs *sp; 1072 char *fullpath, *freepath; 1073 int error; 1074 1075 KKASSERT(p); 1076 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1077 return (error); 1078 mp = ((struct vnode *)fp->f_data)->v_mount; 1079 if (mp == NULL) { 1080 error = EBADF; 1081 goto done; 1082 } 1083 if (fp->f_cred == NULL) { 1084 error = EINVAL; 1085 goto done; 1086 } 1087 sp = &mp->mnt_stat; 1088 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1089 goto done; 1090 1091 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1092 goto done; 1093 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1094 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1095 kfree(freepath, M_TEMP); 1096 1097 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1098 bcopy(sp, buf, sizeof(*buf)); 1099 1100 /* Only root should have access to the fsid's. */ 1101 if (suser(td)) 1102 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1103 error = 0; 1104 done: 1105 fdrop(fp); 1106 return (error); 1107 } 1108 1109 /* 1110 * fstatfs_args(int fd, struct statfs *buf) 1111 * 1112 * Get filesystem statistics. 1113 */ 1114 int 1115 sys_fstatfs(struct fstatfs_args *uap) 1116 { 1117 struct statfs buf; 1118 int error; 1119 1120 error = kern_fstatfs(uap->fd, &buf); 1121 1122 if (error == 0) 1123 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1124 return (error); 1125 } 1126 1127 int 1128 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1129 { 1130 struct mount *mp; 1131 struct statvfs *sp; 1132 int error; 1133 1134 if ((error = nlookup(nd)) != 0) 1135 return (error); 1136 mp = nd->nl_nch.mount; 1137 sp = &mp->mnt_vstat; 1138 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1139 return (error); 1140 1141 sp->f_flag = 0; 1142 if (mp->mnt_flag & MNT_RDONLY) 1143 sp->f_flag |= ST_RDONLY; 1144 if (mp->mnt_flag & MNT_NOSUID) 1145 sp->f_flag |= ST_NOSUID; 1146 bcopy(sp, buf, sizeof(*buf)); 1147 return (0); 1148 } 1149 1150 /* 1151 * statfs_args(char *path, struct statfs *buf) 1152 * 1153 * Get filesystem statistics. 1154 */ 1155 int 1156 sys_statvfs(struct statvfs_args *uap) 1157 { 1158 struct nlookupdata nd; 1159 struct statvfs buf; 1160 int error; 1161 1162 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1163 if (error == 0) 1164 error = kern_statvfs(&nd, &buf); 1165 nlookup_done(&nd); 1166 if (error == 0) 1167 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1168 return (error); 1169 } 1170 1171 int 1172 kern_fstatvfs(int fd, struct statvfs *buf) 1173 { 1174 struct thread *td = curthread; 1175 struct proc *p = td->td_proc; 1176 struct file *fp; 1177 struct mount *mp; 1178 struct statvfs *sp; 1179 int error; 1180 1181 KKASSERT(p); 1182 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1183 return (error); 1184 mp = ((struct vnode *)fp->f_data)->v_mount; 1185 if (mp == NULL) { 1186 error = EBADF; 1187 goto done; 1188 } 1189 if (fp->f_cred == NULL) { 1190 error = EINVAL; 1191 goto done; 1192 } 1193 sp = &mp->mnt_vstat; 1194 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1195 goto done; 1196 1197 sp->f_flag = 0; 1198 if (mp->mnt_flag & MNT_RDONLY) 1199 sp->f_flag |= ST_RDONLY; 1200 if (mp->mnt_flag & MNT_NOSUID) 1201 sp->f_flag |= ST_NOSUID; 1202 1203 bcopy(sp, buf, sizeof(*buf)); 1204 error = 0; 1205 done: 1206 fdrop(fp); 1207 return (error); 1208 } 1209 1210 /* 1211 * fstatfs_args(int fd, struct statfs *buf) 1212 * 1213 * Get filesystem statistics. 1214 */ 1215 int 1216 sys_fstatvfs(struct fstatvfs_args *uap) 1217 { 1218 struct statvfs buf; 1219 int error; 1220 1221 error = kern_fstatvfs(uap->fd, &buf); 1222 1223 if (error == 0) 1224 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1225 return (error); 1226 } 1227 1228 /* 1229 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1230 * 1231 * Get statistics on all filesystems. 1232 */ 1233 1234 struct getfsstat_info { 1235 struct statfs *sfsp; 1236 long count; 1237 long maxcount; 1238 int error; 1239 int flags; 1240 struct proc *p; 1241 }; 1242 1243 static int getfsstat_callback(struct mount *, void *); 1244 1245 /* ARGSUSED */ 1246 int 1247 sys_getfsstat(struct getfsstat_args *uap) 1248 { 1249 struct thread *td = curthread; 1250 struct proc *p = td->td_proc; 1251 struct getfsstat_info info; 1252 1253 bzero(&info, sizeof(info)); 1254 1255 info.maxcount = uap->bufsize / sizeof(struct statfs); 1256 info.sfsp = uap->buf; 1257 info.count = 0; 1258 info.flags = uap->flags; 1259 info.p = p; 1260 1261 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1262 if (info.sfsp && info.count > info.maxcount) 1263 uap->sysmsg_result = info.maxcount; 1264 else 1265 uap->sysmsg_result = info.count; 1266 return (info.error); 1267 } 1268 1269 static int 1270 getfsstat_callback(struct mount *mp, void *data) 1271 { 1272 struct getfsstat_info *info = data; 1273 struct statfs *sp; 1274 char *freepath; 1275 char *fullpath; 1276 int error; 1277 1278 if (info->sfsp && info->count < info->maxcount) { 1279 if (info->p && !chroot_visible_mnt(mp, info->p)) 1280 return(0); 1281 sp = &mp->mnt_stat; 1282 1283 /* 1284 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1285 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1286 * overrides MNT_WAIT. 1287 */ 1288 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1289 (info->flags & MNT_WAIT)) && 1290 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) { 1291 return(0); 1292 } 1293 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1294 1295 error = mount_path(info->p, mp, &fullpath, &freepath); 1296 if (error) { 1297 info->error = error; 1298 return(-1); 1299 } 1300 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1301 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1302 kfree(freepath, M_TEMP); 1303 1304 error = copyout(sp, info->sfsp, sizeof(*sp)); 1305 if (error) { 1306 info->error = error; 1307 return (-1); 1308 } 1309 ++info->sfsp; 1310 } 1311 info->count++; 1312 return(0); 1313 } 1314 1315 /* 1316 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1317 long bufsize, int flags) 1318 * 1319 * Get statistics on all filesystems. 1320 */ 1321 1322 struct getvfsstat_info { 1323 struct statfs *sfsp; 1324 struct statvfs *vsfsp; 1325 long count; 1326 long maxcount; 1327 int error; 1328 int flags; 1329 struct proc *p; 1330 }; 1331 1332 static int getvfsstat_callback(struct mount *, void *); 1333 1334 /* ARGSUSED */ 1335 int 1336 sys_getvfsstat(struct getvfsstat_args *uap) 1337 { 1338 struct thread *td = curthread; 1339 struct proc *p = td->td_proc; 1340 struct getvfsstat_info info; 1341 1342 bzero(&info, sizeof(info)); 1343 1344 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1345 info.sfsp = uap->buf; 1346 info.vsfsp = uap->vbuf; 1347 info.count = 0; 1348 info.flags = uap->flags; 1349 info.p = p; 1350 1351 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1352 if (info.vsfsp && info.count > info.maxcount) 1353 uap->sysmsg_result = info.maxcount; 1354 else 1355 uap->sysmsg_result = info.count; 1356 return (info.error); 1357 } 1358 1359 static int 1360 getvfsstat_callback(struct mount *mp, void *data) 1361 { 1362 struct getvfsstat_info *info = data; 1363 struct statfs *sp; 1364 struct statvfs *vsp; 1365 char *freepath; 1366 char *fullpath; 1367 int error; 1368 1369 if (info->vsfsp && info->count < info->maxcount) { 1370 if (info->p && !chroot_visible_mnt(mp, info->p)) 1371 return(0); 1372 sp = &mp->mnt_stat; 1373 vsp = &mp->mnt_vstat; 1374 1375 /* 1376 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1377 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1378 * overrides MNT_WAIT. 1379 */ 1380 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1381 (info->flags & MNT_WAIT)) && 1382 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) { 1383 return(0); 1384 } 1385 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1386 1387 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1388 (info->flags & MNT_WAIT)) && 1389 (error = VFS_STATVFS(mp, vsp, info->p->p_ucred))) { 1390 return(0); 1391 } 1392 vsp->f_flag = 0; 1393 if (mp->mnt_flag & MNT_RDONLY) 1394 vsp->f_flag |= ST_RDONLY; 1395 if (mp->mnt_flag & MNT_NOSUID) 1396 vsp->f_flag |= ST_NOSUID; 1397 1398 error = mount_path(info->p, mp, &fullpath, &freepath); 1399 if (error) { 1400 info->error = error; 1401 return(-1); 1402 } 1403 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1404 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1405 kfree(freepath, M_TEMP); 1406 1407 error = copyout(sp, info->sfsp, sizeof(*sp)); 1408 if (error == 0) 1409 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1410 if (error) { 1411 info->error = error; 1412 return (-1); 1413 } 1414 ++info->sfsp; 1415 ++info->vsfsp; 1416 } 1417 info->count++; 1418 return(0); 1419 } 1420 1421 1422 /* 1423 * fchdir_args(int fd) 1424 * 1425 * Change current working directory to a given file descriptor. 1426 */ 1427 /* ARGSUSED */ 1428 int 1429 sys_fchdir(struct fchdir_args *uap) 1430 { 1431 struct thread *td = curthread; 1432 struct proc *p = td->td_proc; 1433 struct filedesc *fdp = p->p_fd; 1434 struct vnode *vp, *ovp; 1435 struct mount *mp; 1436 struct file *fp; 1437 struct nchandle nch, onch, tnch; 1438 int error; 1439 1440 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1441 return (error); 1442 vp = (struct vnode *)fp->f_data; 1443 vref(vp); 1444 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1445 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) 1446 error = ENOTDIR; 1447 else 1448 error = VOP_ACCESS(vp, VEXEC, p->p_ucred); 1449 if (error) { 1450 vput(vp); 1451 fdrop(fp); 1452 return (error); 1453 } 1454 cache_copy(&fp->f_nchandle, &nch); 1455 1456 /* 1457 * If the ncp has become a mount point, traverse through 1458 * the mount point. 1459 */ 1460 1461 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1462 (mp = cache_findmount(&nch)) != NULL 1463 ) { 1464 error = nlookup_mp(mp, &tnch); 1465 if (error == 0) { 1466 cache_unlock(&tnch); /* leave ref intact */ 1467 vput(vp); 1468 vp = tnch.ncp->nc_vp; 1469 error = vget(vp, LK_SHARED); 1470 KKASSERT(error == 0); 1471 cache_drop(&nch); 1472 nch = tnch; 1473 } 1474 } 1475 if (error == 0) { 1476 ovp = fdp->fd_cdir; 1477 onch = fdp->fd_ncdir; 1478 vn_unlock(vp); /* leave ref intact */ 1479 fdp->fd_cdir = vp; 1480 fdp->fd_ncdir = nch; 1481 cache_drop(&onch); 1482 vrele(ovp); 1483 } else { 1484 cache_drop(&nch); 1485 vput(vp); 1486 } 1487 fdrop(fp); 1488 return (error); 1489 } 1490 1491 int 1492 kern_chdir(struct nlookupdata *nd) 1493 { 1494 struct thread *td = curthread; 1495 struct proc *p = td->td_proc; 1496 struct filedesc *fdp = p->p_fd; 1497 struct vnode *vp, *ovp; 1498 struct nchandle onch; 1499 int error; 1500 1501 if ((error = nlookup(nd)) != 0) 1502 return (error); 1503 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1504 return (ENOENT); 1505 if ((error = vget(vp, LK_SHARED)) != 0) 1506 return (error); 1507 1508 error = checkvp_chdir(vp, td); 1509 vn_unlock(vp); 1510 if (error == 0) { 1511 ovp = fdp->fd_cdir; 1512 onch = fdp->fd_ncdir; 1513 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1514 fdp->fd_ncdir = nd->nl_nch; 1515 fdp->fd_cdir = vp; 1516 cache_drop(&onch); 1517 vrele(ovp); 1518 cache_zero(&nd->nl_nch); 1519 } else { 1520 vrele(vp); 1521 } 1522 return (error); 1523 } 1524 1525 /* 1526 * chdir_args(char *path) 1527 * 1528 * Change current working directory (``.''). 1529 */ 1530 int 1531 sys_chdir(struct chdir_args *uap) 1532 { 1533 struct nlookupdata nd; 1534 int error; 1535 1536 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1537 if (error == 0) 1538 error = kern_chdir(&nd); 1539 nlookup_done(&nd); 1540 return (error); 1541 } 1542 1543 /* 1544 * Helper function for raised chroot(2) security function: Refuse if 1545 * any filedescriptors are open directories. 1546 */ 1547 static int 1548 chroot_refuse_vdir_fds(struct filedesc *fdp) 1549 { 1550 struct vnode *vp; 1551 struct file *fp; 1552 int error; 1553 int fd; 1554 1555 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1556 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1557 continue; 1558 vp = (struct vnode *)fp->f_data; 1559 if (vp->v_type != VDIR) { 1560 fdrop(fp); 1561 continue; 1562 } 1563 fdrop(fp); 1564 return(EPERM); 1565 } 1566 return (0); 1567 } 1568 1569 /* 1570 * This sysctl determines if we will allow a process to chroot(2) if it 1571 * has a directory open: 1572 * 0: disallowed for all processes. 1573 * 1: allowed for processes that were not already chroot(2)'ed. 1574 * 2: allowed for all processes. 1575 */ 1576 1577 static int chroot_allow_open_directories = 1; 1578 1579 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1580 &chroot_allow_open_directories, 0, ""); 1581 1582 /* 1583 * chroot to the specified namecache entry. We obtain the vp from the 1584 * namecache data. The passed ncp must be locked and referenced and will 1585 * remain locked and referenced on return. 1586 */ 1587 int 1588 kern_chroot(struct nchandle *nch) 1589 { 1590 struct thread *td = curthread; 1591 struct proc *p = td->td_proc; 1592 struct filedesc *fdp = p->p_fd; 1593 struct vnode *vp; 1594 int error; 1595 1596 /* 1597 * Only root can chroot 1598 */ 1599 if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0) 1600 return (error); 1601 1602 /* 1603 * Disallow open directory descriptors (fchdir() breakouts). 1604 */ 1605 if (chroot_allow_open_directories == 0 || 1606 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1607 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1608 return (error); 1609 } 1610 if ((vp = nch->ncp->nc_vp) == NULL) 1611 return (ENOENT); 1612 1613 if ((error = vget(vp, LK_SHARED)) != 0) 1614 return (error); 1615 1616 /* 1617 * Check the validity of vp as a directory to change to and 1618 * associate it with rdir/jdir. 1619 */ 1620 error = checkvp_chdir(vp, td); 1621 vn_unlock(vp); /* leave reference intact */ 1622 if (error == 0) { 1623 vrele(fdp->fd_rdir); 1624 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1625 cache_drop(&fdp->fd_nrdir); 1626 cache_copy(nch, &fdp->fd_nrdir); 1627 if (fdp->fd_jdir == NULL) { 1628 fdp->fd_jdir = vp; 1629 vref(fdp->fd_jdir); 1630 cache_copy(nch, &fdp->fd_njdir); 1631 } 1632 } else { 1633 vrele(vp); 1634 } 1635 return (error); 1636 } 1637 1638 /* 1639 * chroot_args(char *path) 1640 * 1641 * Change notion of root (``/'') directory. 1642 */ 1643 /* ARGSUSED */ 1644 int 1645 sys_chroot(struct chroot_args *uap) 1646 { 1647 struct thread *td = curthread; 1648 struct nlookupdata nd; 1649 int error; 1650 1651 KKASSERT(td->td_proc); 1652 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1653 if (error) { 1654 nlookup_done(&nd); 1655 return(error); 1656 } 1657 error = nlookup(&nd); 1658 if (error == 0) 1659 error = kern_chroot(&nd.nl_nch); 1660 nlookup_done(&nd); 1661 return(error); 1662 } 1663 1664 /* 1665 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1666 * determine whether it is legal to chdir to the vnode. The vnode's state 1667 * is not changed by this call. 1668 */ 1669 int 1670 checkvp_chdir(struct vnode *vp, struct thread *td) 1671 { 1672 int error; 1673 1674 if (vp->v_type != VDIR) 1675 error = ENOTDIR; 1676 else 1677 error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred); 1678 return (error); 1679 } 1680 1681 int 1682 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1683 { 1684 struct thread *td = curthread; 1685 struct proc *p = td->td_proc; 1686 struct lwp *lp = td->td_lwp; 1687 struct filedesc *fdp = p->p_fd; 1688 int cmode, flags; 1689 struct file *nfp; 1690 struct file *fp; 1691 struct vnode *vp; 1692 int type, indx, error; 1693 struct flock lf; 1694 1695 if ((oflags & O_ACCMODE) == O_ACCMODE) 1696 return (EINVAL); 1697 flags = FFLAGS(oflags); 1698 error = falloc(p, &nfp, NULL); 1699 if (error) 1700 return (error); 1701 fp = nfp; 1702 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1703 1704 /* 1705 * XXX p_dupfd is a real mess. It allows a device to return a 1706 * file descriptor to be duplicated rather then doing the open 1707 * itself. 1708 */ 1709 lp->lwp_dupfd = -1; 1710 1711 /* 1712 * Call vn_open() to do the lookup and assign the vnode to the 1713 * file pointer. vn_open() does not change the ref count on fp 1714 * and the vnode, on success, will be inherited by the file pointer 1715 * and unlocked. 1716 */ 1717 nd->nl_flags |= NLC_LOCKVP; 1718 error = vn_open(nd, fp, flags, cmode); 1719 nlookup_done(nd); 1720 if (error) { 1721 /* 1722 * handle special fdopen() case. bleh. dupfdopen() is 1723 * responsible for dropping the old contents of ofiles[indx] 1724 * if it succeeds. 1725 * 1726 * Note that fsetfd() will add a ref to fp which represents 1727 * the fd_files[] assignment. We must still drop our 1728 * reference. 1729 */ 1730 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1731 if (fdalloc(p, 0, &indx) == 0) { 1732 error = dupfdopen(p, indx, lp->lwp_dupfd, flags, error); 1733 if (error == 0) { 1734 *res = indx; 1735 fdrop(fp); /* our ref */ 1736 return (0); 1737 } 1738 fsetfd(p, NULL, indx); 1739 } 1740 } 1741 fdrop(fp); /* our ref */ 1742 if (error == ERESTART) 1743 error = EINTR; 1744 return (error); 1745 } 1746 1747 /* 1748 * ref the vnode for ourselves so it can't be ripped out from under 1749 * is. XXX need an ND flag to request that the vnode be returned 1750 * anyway. 1751 * 1752 * Reserve a file descriptor but do not assign it until the open 1753 * succeeds. 1754 */ 1755 vp = (struct vnode *)fp->f_data; 1756 vref(vp); 1757 if ((error = fdalloc(p, 0, &indx)) != 0) { 1758 fdrop(fp); 1759 vrele(vp); 1760 return (error); 1761 } 1762 1763 /* 1764 * If no error occurs the vp will have been assigned to the file 1765 * pointer. 1766 */ 1767 lp->lwp_dupfd = 0; 1768 1769 if (flags & (O_EXLOCK | O_SHLOCK)) { 1770 lf.l_whence = SEEK_SET; 1771 lf.l_start = 0; 1772 lf.l_len = 0; 1773 if (flags & O_EXLOCK) 1774 lf.l_type = F_WRLCK; 1775 else 1776 lf.l_type = F_RDLCK; 1777 if (flags & FNONBLOCK) 1778 type = 0; 1779 else 1780 type = F_WAIT; 1781 1782 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1783 /* 1784 * lock request failed. Clean up the reserved 1785 * descriptor. 1786 */ 1787 vrele(vp); 1788 fsetfd(p, NULL, indx); 1789 fdrop(fp); 1790 return (error); 1791 } 1792 fp->f_flag |= FHASLOCK; 1793 } 1794 #if 0 1795 /* 1796 * Assert that all regular file vnodes were created with a object. 1797 */ 1798 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1799 ("open: regular file has no backing object after vn_open")); 1800 #endif 1801 1802 vrele(vp); 1803 1804 /* 1805 * release our private reference, leaving the one associated with the 1806 * descriptor table intact. 1807 */ 1808 fsetfd(p, fp, indx); 1809 fdrop(fp); 1810 *res = indx; 1811 return (0); 1812 } 1813 1814 /* 1815 * open_args(char *path, int flags, int mode) 1816 * 1817 * Check permissions, allocate an open file structure, 1818 * and call the device open routine if any. 1819 */ 1820 int 1821 sys_open(struct open_args *uap) 1822 { 1823 struct nlookupdata nd; 1824 int error; 1825 1826 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1827 if (error == 0) { 1828 error = kern_open(&nd, uap->flags, 1829 uap->mode, &uap->sysmsg_result); 1830 } 1831 nlookup_done(&nd); 1832 return (error); 1833 } 1834 1835 int 1836 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 1837 { 1838 struct thread *td = curthread; 1839 struct proc *p = td->td_proc; 1840 struct vnode *vp; 1841 struct vattr vattr; 1842 int error; 1843 int whiteout = 0; 1844 1845 KKASSERT(p); 1846 1847 switch (mode & S_IFMT) { 1848 case S_IFCHR: 1849 case S_IFBLK: 1850 error = suser(td); 1851 break; 1852 default: 1853 error = suser_cred(p->p_ucred, PRISON_ROOT); 1854 break; 1855 } 1856 if (error) 1857 return (error); 1858 1859 bwillinode(1); 1860 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 1861 if ((error = nlookup(nd)) != 0) 1862 return (error); 1863 if (nd->nl_nch.ncp->nc_vp) 1864 return (EEXIST); 1865 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1866 return (error); 1867 1868 VATTR_NULL(&vattr); 1869 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1870 vattr.va_rmajor = rmajor; 1871 vattr.va_rminor = rminor; 1872 whiteout = 0; 1873 1874 switch (mode & S_IFMT) { 1875 case S_IFMT: /* used by badsect to flag bad sectors */ 1876 vattr.va_type = VBAD; 1877 break; 1878 case S_IFCHR: 1879 vattr.va_type = VCHR; 1880 break; 1881 case S_IFBLK: 1882 vattr.va_type = VBLK; 1883 break; 1884 case S_IFWHT: 1885 whiteout = 1; 1886 break; 1887 case S_IFDIR: 1888 /* special directories support for HAMMER */ 1889 vattr.va_type = VDIR; 1890 break; 1891 default: 1892 error = EINVAL; 1893 break; 1894 } 1895 if (error == 0) { 1896 if (whiteout) { 1897 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 1898 nd->nl_cred, NAMEI_CREATE); 1899 } else { 1900 vp = NULL; 1901 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 1902 &vp, nd->nl_cred, &vattr); 1903 if (error == 0) 1904 vput(vp); 1905 } 1906 } 1907 return (error); 1908 } 1909 1910 /* 1911 * mknod_args(char *path, int mode, int dev) 1912 * 1913 * Create a special file. 1914 */ 1915 int 1916 sys_mknod(struct mknod_args *uap) 1917 { 1918 struct nlookupdata nd; 1919 int error; 1920 1921 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1922 if (error == 0) { 1923 error = kern_mknod(&nd, uap->mode, 1924 umajor(uap->dev), uminor(uap->dev)); 1925 } 1926 nlookup_done(&nd); 1927 return (error); 1928 } 1929 1930 int 1931 kern_mkfifo(struct nlookupdata *nd, int mode) 1932 { 1933 struct thread *td = curthread; 1934 struct proc *p = td->td_proc; 1935 struct vattr vattr; 1936 struct vnode *vp; 1937 int error; 1938 1939 bwillinode(1); 1940 1941 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 1942 if ((error = nlookup(nd)) != 0) 1943 return (error); 1944 if (nd->nl_nch.ncp->nc_vp) 1945 return (EEXIST); 1946 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1947 return (error); 1948 1949 VATTR_NULL(&vattr); 1950 vattr.va_type = VFIFO; 1951 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1952 vp = NULL; 1953 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 1954 if (error == 0) 1955 vput(vp); 1956 return (error); 1957 } 1958 1959 /* 1960 * mkfifo_args(char *path, int mode) 1961 * 1962 * Create a named pipe. 1963 */ 1964 int 1965 sys_mkfifo(struct mkfifo_args *uap) 1966 { 1967 struct nlookupdata nd; 1968 int error; 1969 1970 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1971 if (error == 0) 1972 error = kern_mkfifo(&nd, uap->mode); 1973 nlookup_done(&nd); 1974 return (error); 1975 } 1976 1977 static int hardlink_check_uid = 0; 1978 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1979 &hardlink_check_uid, 0, 1980 "Unprivileged processes cannot create hard links to files owned by other " 1981 "users"); 1982 static int hardlink_check_gid = 0; 1983 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1984 &hardlink_check_gid, 0, 1985 "Unprivileged processes cannot create hard links to files owned by other " 1986 "groups"); 1987 1988 static int 1989 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 1990 { 1991 struct vattr va; 1992 int error; 1993 1994 /* 1995 * Shortcut if disabled 1996 */ 1997 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 1998 return (0); 1999 2000 /* 2001 * root cred can always hardlink 2002 */ 2003 if (suser_cred(cred, PRISON_ROOT) == 0) 2004 return (0); 2005 2006 /* 2007 * Otherwise only if the originating file is owned by the 2008 * same user or group. Note that any group is allowed if 2009 * the file is owned by the caller. 2010 */ 2011 error = VOP_GETATTR(vp, &va); 2012 if (error != 0) 2013 return (error); 2014 2015 if (hardlink_check_uid) { 2016 if (cred->cr_uid != va.va_uid) 2017 return (EPERM); 2018 } 2019 2020 if (hardlink_check_gid) { 2021 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2022 return (EPERM); 2023 } 2024 2025 return (0); 2026 } 2027 2028 int 2029 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2030 { 2031 struct thread *td = curthread; 2032 struct vnode *vp; 2033 int error; 2034 2035 /* 2036 * Lookup the source and obtained a locked vnode. 2037 * 2038 * XXX relookup on vget failure / race ? 2039 */ 2040 bwillinode(1); 2041 if ((error = nlookup(nd)) != 0) 2042 return (error); 2043 vp = nd->nl_nch.ncp->nc_vp; 2044 KKASSERT(vp != NULL); 2045 if (vp->v_type == VDIR) 2046 return (EPERM); /* POSIX */ 2047 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2048 return (error); 2049 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2050 return (error); 2051 2052 /* 2053 * Unlock the source so we can lookup the target without deadlocking 2054 * (XXX vp is locked already, possible other deadlock?). The target 2055 * must not exist. 2056 */ 2057 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2058 nd->nl_flags &= ~NLC_NCPISLOCKED; 2059 cache_unlock(&nd->nl_nch); 2060 2061 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2062 if ((error = nlookup(linknd)) != 0) { 2063 vput(vp); 2064 return (error); 2065 } 2066 if (linknd->nl_nch.ncp->nc_vp) { 2067 vput(vp); 2068 return (EEXIST); 2069 } 2070 2071 /* 2072 * Finally run the new API VOP. 2073 */ 2074 error = can_hardlink(vp, td, td->td_proc->p_ucred); 2075 if (error == 0) { 2076 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2077 vp, linknd->nl_cred); 2078 } 2079 vput(vp); 2080 return (error); 2081 } 2082 2083 /* 2084 * link_args(char *path, char *link) 2085 * 2086 * Make a hard file link. 2087 */ 2088 int 2089 sys_link(struct link_args *uap) 2090 { 2091 struct nlookupdata nd, linknd; 2092 int error; 2093 2094 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2095 if (error == 0) { 2096 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2097 if (error == 0) 2098 error = kern_link(&nd, &linknd); 2099 nlookup_done(&linknd); 2100 } 2101 nlookup_done(&nd); 2102 return (error); 2103 } 2104 2105 int 2106 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2107 { 2108 struct vattr vattr; 2109 struct vnode *vp; 2110 struct vnode *dvp; 2111 int error; 2112 2113 bwillinode(1); 2114 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2115 if ((error = nlookup(nd)) != 0) 2116 return (error); 2117 if (nd->nl_nch.ncp->nc_vp) 2118 return (EEXIST); 2119 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2120 return (error); 2121 dvp = nd->nl_dvp; 2122 VATTR_NULL(&vattr); 2123 vattr.va_mode = mode; 2124 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2125 if (error == 0) 2126 vput(vp); 2127 return (error); 2128 } 2129 2130 /* 2131 * symlink(char *path, char *link) 2132 * 2133 * Make a symbolic link. 2134 */ 2135 int 2136 sys_symlink(struct symlink_args *uap) 2137 { 2138 struct thread *td = curthread; 2139 struct nlookupdata nd; 2140 char *path; 2141 int error; 2142 int mode; 2143 2144 path = objcache_get(namei_oc, M_WAITOK); 2145 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2146 if (error == 0) { 2147 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2148 if (error == 0) { 2149 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2150 error = kern_symlink(&nd, path, mode); 2151 } 2152 nlookup_done(&nd); 2153 } 2154 objcache_put(namei_oc, path); 2155 return (error); 2156 } 2157 2158 /* 2159 * undelete_args(char *path) 2160 * 2161 * Delete a whiteout from the filesystem. 2162 */ 2163 /* ARGSUSED */ 2164 int 2165 sys_undelete(struct undelete_args *uap) 2166 { 2167 struct nlookupdata nd; 2168 int error; 2169 2170 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2171 bwillinode(1); 2172 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2173 if (error == 0) 2174 error = nlookup(&nd); 2175 if (error == 0) 2176 error = ncp_writechk(&nd.nl_nch); 2177 if (error == 0) { 2178 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2179 NAMEI_DELETE); 2180 } 2181 nlookup_done(&nd); 2182 return (error); 2183 } 2184 2185 int 2186 kern_unlink(struct nlookupdata *nd) 2187 { 2188 int error; 2189 2190 bwillinode(1); 2191 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2192 if ((error = nlookup(nd)) != 0) 2193 return (error); 2194 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2195 return (error); 2196 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2197 return (error); 2198 } 2199 2200 /* 2201 * unlink_args(char *path) 2202 * 2203 * Delete a name from the filesystem. 2204 */ 2205 int 2206 sys_unlink(struct unlink_args *uap) 2207 { 2208 struct nlookupdata nd; 2209 int error; 2210 2211 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2212 if (error == 0) 2213 error = kern_unlink(&nd); 2214 nlookup_done(&nd); 2215 return (error); 2216 } 2217 2218 int 2219 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2220 { 2221 struct thread *td = curthread; 2222 struct proc *p = td->td_proc; 2223 struct file *fp; 2224 struct vnode *vp; 2225 struct vattr vattr; 2226 off_t new_offset; 2227 int error; 2228 2229 fp = holdfp(p->p_fd, fd, -1); 2230 if (fp == NULL) 2231 return (EBADF); 2232 if (fp->f_type != DTYPE_VNODE) { 2233 error = ESPIPE; 2234 goto done; 2235 } 2236 vp = (struct vnode *)fp->f_data; 2237 2238 switch (whence) { 2239 case L_INCR: 2240 new_offset = fp->f_offset + offset; 2241 error = 0; 2242 break; 2243 case L_XTND: 2244 error = VOP_GETATTR(vp, &vattr); 2245 new_offset = offset + vattr.va_size; 2246 break; 2247 case L_SET: 2248 new_offset = offset; 2249 error = 0; 2250 break; 2251 default: 2252 new_offset = 0; 2253 error = EINVAL; 2254 break; 2255 } 2256 2257 /* 2258 * Validate the seek position. Negative offsets are not allowed 2259 * for regular files, block specials, or directories. 2260 */ 2261 if (error == 0) { 2262 if (new_offset < 0 && 2263 (vp->v_type == VREG || vp->v_type == VDIR || 2264 vp->v_type == VCHR || vp->v_type == VBLK)) { 2265 error = EINVAL; 2266 } else { 2267 fp->f_offset = new_offset; 2268 } 2269 } 2270 *res = fp->f_offset; 2271 done: 2272 fdrop(fp); 2273 return (error); 2274 } 2275 2276 /* 2277 * lseek_args(int fd, int pad, off_t offset, int whence) 2278 * 2279 * Reposition read/write file offset. 2280 */ 2281 int 2282 sys_lseek(struct lseek_args *uap) 2283 { 2284 int error; 2285 2286 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2287 &uap->sysmsg_offset); 2288 2289 return (error); 2290 } 2291 2292 int 2293 kern_access(struct nlookupdata *nd, int aflags) 2294 { 2295 struct vnode *vp; 2296 int error, flags; 2297 2298 if ((error = nlookup(nd)) != 0) 2299 return (error); 2300 retry: 2301 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2302 if (error) 2303 return (error); 2304 2305 /* Flags == 0 means only check for existence. */ 2306 if (aflags) { 2307 flags = 0; 2308 if (aflags & R_OK) 2309 flags |= VREAD; 2310 if (aflags & W_OK) 2311 flags |= VWRITE; 2312 if (aflags & X_OK) 2313 flags |= VEXEC; 2314 if ((flags & VWRITE) == 0 || 2315 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2316 error = VOP_ACCESS(vp, flags, nd->nl_cred); 2317 2318 /* 2319 * If the file handle is stale we have to re-resolve the 2320 * entry. This is a hack at the moment. 2321 */ 2322 if (error == ESTALE) { 2323 vput(vp); 2324 cache_setunresolved(&nd->nl_nch); 2325 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2326 if (error == 0) { 2327 vp = NULL; 2328 goto retry; 2329 } 2330 return(error); 2331 } 2332 } 2333 vput(vp); 2334 return (error); 2335 } 2336 2337 /* 2338 * access_args(char *path, int flags) 2339 * 2340 * Check access permissions. 2341 */ 2342 int 2343 sys_access(struct access_args *uap) 2344 { 2345 struct nlookupdata nd; 2346 int error; 2347 2348 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2349 if (error == 0) 2350 error = kern_access(&nd, uap->flags); 2351 nlookup_done(&nd); 2352 return (error); 2353 } 2354 2355 int 2356 kern_stat(struct nlookupdata *nd, struct stat *st) 2357 { 2358 int error; 2359 struct vnode *vp; 2360 thread_t td; 2361 2362 if ((error = nlookup(nd)) != 0) 2363 return (error); 2364 again: 2365 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2366 return (ENOENT); 2367 2368 td = curthread; 2369 if ((error = vget(vp, LK_SHARED)) != 0) 2370 return (error); 2371 error = vn_stat(vp, st, nd->nl_cred); 2372 2373 /* 2374 * If the file handle is stale we have to re-resolve the entry. This 2375 * is a hack at the moment. 2376 */ 2377 if (error == ESTALE) { 2378 vput(vp); 2379 cache_setunresolved(&nd->nl_nch); 2380 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2381 if (error == 0) 2382 goto again; 2383 } else { 2384 vput(vp); 2385 } 2386 return (error); 2387 } 2388 2389 /* 2390 * stat_args(char *path, struct stat *ub) 2391 * 2392 * Get file status; this version follows links. 2393 */ 2394 int 2395 sys_stat(struct stat_args *uap) 2396 { 2397 struct nlookupdata nd; 2398 struct stat st; 2399 int error; 2400 2401 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2402 if (error == 0) { 2403 error = kern_stat(&nd, &st); 2404 if (error == 0) 2405 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2406 } 2407 nlookup_done(&nd); 2408 return (error); 2409 } 2410 2411 /* 2412 * lstat_args(char *path, struct stat *ub) 2413 * 2414 * Get file status; this version does not follow links. 2415 */ 2416 int 2417 sys_lstat(struct lstat_args *uap) 2418 { 2419 struct nlookupdata nd; 2420 struct stat st; 2421 int error; 2422 2423 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2424 if (error == 0) { 2425 error = kern_stat(&nd, &st); 2426 if (error == 0) 2427 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2428 } 2429 nlookup_done(&nd); 2430 return (error); 2431 } 2432 2433 /* 2434 * pathconf_Args(char *path, int name) 2435 * 2436 * Get configurable pathname variables. 2437 */ 2438 /* ARGSUSED */ 2439 int 2440 sys_pathconf(struct pathconf_args *uap) 2441 { 2442 struct nlookupdata nd; 2443 struct vnode *vp; 2444 int error; 2445 2446 vp = NULL; 2447 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2448 if (error == 0) 2449 error = nlookup(&nd); 2450 if (error == 0) 2451 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2452 nlookup_done(&nd); 2453 if (error == 0) { 2454 error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds); 2455 vput(vp); 2456 } 2457 return (error); 2458 } 2459 2460 /* 2461 * XXX: daver 2462 * kern_readlink isn't properly split yet. There is a copyin burried 2463 * in VOP_READLINK(). 2464 */ 2465 int 2466 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2467 { 2468 struct thread *td = curthread; 2469 struct proc *p = td->td_proc; 2470 struct vnode *vp; 2471 struct iovec aiov; 2472 struct uio auio; 2473 int error; 2474 2475 if ((error = nlookup(nd)) != 0) 2476 return (error); 2477 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2478 if (error) 2479 return (error); 2480 if (vp->v_type != VLNK) { 2481 error = EINVAL; 2482 } else { 2483 aiov.iov_base = buf; 2484 aiov.iov_len = count; 2485 auio.uio_iov = &aiov; 2486 auio.uio_iovcnt = 1; 2487 auio.uio_offset = 0; 2488 auio.uio_rw = UIO_READ; 2489 auio.uio_segflg = UIO_USERSPACE; 2490 auio.uio_td = td; 2491 auio.uio_resid = count; 2492 error = VOP_READLINK(vp, &auio, p->p_ucred); 2493 } 2494 vput(vp); 2495 *res = count - auio.uio_resid; 2496 return (error); 2497 } 2498 2499 /* 2500 * readlink_args(char *path, char *buf, int count) 2501 * 2502 * Return target name of a symbolic link. 2503 */ 2504 int 2505 sys_readlink(struct readlink_args *uap) 2506 { 2507 struct nlookupdata nd; 2508 int error; 2509 2510 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2511 if (error == 0) { 2512 error = kern_readlink(&nd, uap->buf, uap->count, 2513 &uap->sysmsg_result); 2514 } 2515 nlookup_done(&nd); 2516 return (error); 2517 } 2518 2519 static int 2520 setfflags(struct vnode *vp, int flags) 2521 { 2522 struct thread *td = curthread; 2523 struct proc *p = td->td_proc; 2524 int error; 2525 struct vattr vattr; 2526 2527 /* 2528 * Prevent non-root users from setting flags on devices. When 2529 * a device is reused, users can retain ownership of the device 2530 * if they are allowed to set flags and programs assume that 2531 * chown can't fail when done as root. 2532 */ 2533 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2534 ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)) 2535 return (error); 2536 2537 /* 2538 * note: vget is required for any operation that might mod the vnode 2539 * so VINACTIVE is properly cleared. 2540 */ 2541 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2542 VATTR_NULL(&vattr); 2543 vattr.va_flags = flags; 2544 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2545 vput(vp); 2546 } 2547 return (error); 2548 } 2549 2550 /* 2551 * chflags(char *path, int flags) 2552 * 2553 * Change flags of a file given a path name. 2554 */ 2555 /* ARGSUSED */ 2556 int 2557 sys_chflags(struct chflags_args *uap) 2558 { 2559 struct nlookupdata nd; 2560 struct vnode *vp; 2561 int error; 2562 2563 vp = NULL; 2564 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2565 /* XXX Add NLC flag indicating modifying operation? */ 2566 if (error == 0) 2567 error = nlookup(&nd); 2568 if (error == 0) 2569 error = ncp_writechk(&nd.nl_nch); 2570 if (error == 0) 2571 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2572 nlookup_done(&nd); 2573 if (error == 0) { 2574 error = setfflags(vp, uap->flags); 2575 vrele(vp); 2576 } 2577 return (error); 2578 } 2579 2580 /* 2581 * lchflags(char *path, int flags) 2582 * 2583 * Change flags of a file given a path name, but don't follow symlinks. 2584 */ 2585 /* ARGSUSED */ 2586 int 2587 sys_lchflags(struct lchflags_args *uap) 2588 { 2589 struct nlookupdata nd; 2590 struct vnode *vp; 2591 int error; 2592 2593 vp = NULL; 2594 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2595 /* XXX Add NLC flag indicating modifying operation? */ 2596 if (error == 0) 2597 error = nlookup(&nd); 2598 if (error == 0) 2599 error = ncp_writechk(&nd.nl_nch); 2600 if (error == 0) 2601 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2602 nlookup_done(&nd); 2603 if (error == 0) { 2604 error = setfflags(vp, uap->flags); 2605 vrele(vp); 2606 } 2607 return (error); 2608 } 2609 2610 /* 2611 * fchflags_args(int fd, int flags) 2612 * 2613 * Change flags of a file given a file descriptor. 2614 */ 2615 /* ARGSUSED */ 2616 int 2617 sys_fchflags(struct fchflags_args *uap) 2618 { 2619 struct thread *td = curthread; 2620 struct proc *p = td->td_proc; 2621 struct file *fp; 2622 int error; 2623 2624 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2625 return (error); 2626 if (fp->f_nchandle.ncp) 2627 error = ncp_writechk(&fp->f_nchandle); 2628 if (error == 0) 2629 error = setfflags((struct vnode *) fp->f_data, uap->flags); 2630 fdrop(fp); 2631 return (error); 2632 } 2633 2634 static int 2635 setfmode(struct vnode *vp, int mode) 2636 { 2637 struct thread *td = curthread; 2638 struct proc *p = td->td_proc; 2639 int error; 2640 struct vattr vattr; 2641 2642 /* 2643 * note: vget is required for any operation that might mod the vnode 2644 * so VINACTIVE is properly cleared. 2645 */ 2646 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2647 VATTR_NULL(&vattr); 2648 vattr.va_mode = mode & ALLPERMS; 2649 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2650 vput(vp); 2651 } 2652 return error; 2653 } 2654 2655 int 2656 kern_chmod(struct nlookupdata *nd, int mode) 2657 { 2658 struct vnode *vp; 2659 int error; 2660 2661 /* XXX Add NLC flag indicating modifying operation? */ 2662 if ((error = nlookup(nd)) != 0) 2663 return (error); 2664 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2665 return (error); 2666 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2667 error = setfmode(vp, mode); 2668 vrele(vp); 2669 return (error); 2670 } 2671 2672 /* 2673 * chmod_args(char *path, int mode) 2674 * 2675 * Change mode of a file given path name. 2676 */ 2677 /* ARGSUSED */ 2678 int 2679 sys_chmod(struct chmod_args *uap) 2680 { 2681 struct nlookupdata nd; 2682 int error; 2683 2684 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2685 if (error == 0) 2686 error = kern_chmod(&nd, uap->mode); 2687 nlookup_done(&nd); 2688 return (error); 2689 } 2690 2691 /* 2692 * lchmod_args(char *path, int mode) 2693 * 2694 * Change mode of a file given path name (don't follow links.) 2695 */ 2696 /* ARGSUSED */ 2697 int 2698 sys_lchmod(struct lchmod_args *uap) 2699 { 2700 struct nlookupdata nd; 2701 int error; 2702 2703 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2704 if (error == 0) 2705 error = kern_chmod(&nd, uap->mode); 2706 nlookup_done(&nd); 2707 return (error); 2708 } 2709 2710 /* 2711 * fchmod_args(int fd, int mode) 2712 * 2713 * Change mode of a file given a file descriptor. 2714 */ 2715 /* ARGSUSED */ 2716 int 2717 sys_fchmod(struct fchmod_args *uap) 2718 { 2719 struct thread *td = curthread; 2720 struct proc *p = td->td_proc; 2721 struct file *fp; 2722 int error; 2723 2724 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2725 return (error); 2726 if (fp->f_nchandle.ncp) 2727 error = ncp_writechk(&fp->f_nchandle); 2728 if (error == 0) 2729 error = setfmode((struct vnode *)fp->f_data, uap->mode); 2730 fdrop(fp); 2731 return (error); 2732 } 2733 2734 static int 2735 setfown(struct vnode *vp, uid_t uid, gid_t gid) 2736 { 2737 struct thread *td = curthread; 2738 struct proc *p = td->td_proc; 2739 int error; 2740 struct vattr vattr; 2741 2742 /* 2743 * note: vget is required for any operation that might mod the vnode 2744 * so VINACTIVE is properly cleared. 2745 */ 2746 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2747 VATTR_NULL(&vattr); 2748 vattr.va_uid = uid; 2749 vattr.va_gid = gid; 2750 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2751 vput(vp); 2752 } 2753 return error; 2754 } 2755 2756 int 2757 kern_chown(struct nlookupdata *nd, int uid, int gid) 2758 { 2759 struct vnode *vp; 2760 int error; 2761 2762 /* XXX Add NLC flag indicating modifying operation? */ 2763 if ((error = nlookup(nd)) != 0) 2764 return (error); 2765 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2766 return (error); 2767 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2768 error = setfown(vp, uid, gid); 2769 vrele(vp); 2770 return (error); 2771 } 2772 2773 /* 2774 * chown(char *path, int uid, int gid) 2775 * 2776 * Set ownership given a path name. 2777 */ 2778 int 2779 sys_chown(struct chown_args *uap) 2780 { 2781 struct nlookupdata nd; 2782 int error; 2783 2784 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2785 if (error == 0) 2786 error = kern_chown(&nd, uap->uid, uap->gid); 2787 nlookup_done(&nd); 2788 return (error); 2789 } 2790 2791 /* 2792 * lchown_args(char *path, int uid, int gid) 2793 * 2794 * Set ownership given a path name, do not cross symlinks. 2795 */ 2796 int 2797 sys_lchown(struct lchown_args *uap) 2798 { 2799 struct nlookupdata nd; 2800 int error; 2801 2802 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2803 if (error == 0) 2804 error = kern_chown(&nd, uap->uid, uap->gid); 2805 nlookup_done(&nd); 2806 return (error); 2807 } 2808 2809 /* 2810 * fchown_args(int fd, int uid, int gid) 2811 * 2812 * Set ownership given a file descriptor. 2813 */ 2814 /* ARGSUSED */ 2815 int 2816 sys_fchown(struct fchown_args *uap) 2817 { 2818 struct thread *td = curthread; 2819 struct proc *p = td->td_proc; 2820 struct file *fp; 2821 int error; 2822 2823 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2824 return (error); 2825 if (fp->f_nchandle.ncp) 2826 error = ncp_writechk(&fp->f_nchandle); 2827 if (error == 0) 2828 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid); 2829 fdrop(fp); 2830 return (error); 2831 } 2832 2833 static int 2834 getutimes(const struct timeval *tvp, struct timespec *tsp) 2835 { 2836 struct timeval tv[2]; 2837 2838 if (tvp == NULL) { 2839 microtime(&tv[0]); 2840 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 2841 tsp[1] = tsp[0]; 2842 } else { 2843 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2844 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2845 } 2846 return 0; 2847 } 2848 2849 static int 2850 setutimes(struct vnode *vp, const struct timespec *ts, int nullflag) 2851 { 2852 struct thread *td = curthread; 2853 struct proc *p = td->td_proc; 2854 int error; 2855 struct vattr vattr; 2856 2857 /* 2858 * note: vget is required for any operation that might mod the vnode 2859 * so VINACTIVE is properly cleared. 2860 */ 2861 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2862 VATTR_NULL(&vattr); 2863 vattr.va_atime = ts[0]; 2864 vattr.va_mtime = ts[1]; 2865 if (nullflag) 2866 vattr.va_vaflags |= VA_UTIMES_NULL; 2867 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2868 vput(vp); 2869 } 2870 return error; 2871 } 2872 2873 int 2874 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 2875 { 2876 struct timespec ts[2]; 2877 struct vnode *vp; 2878 int error; 2879 2880 if ((error = getutimes(tptr, ts)) != 0) 2881 return (error); 2882 /* XXX Add NLC flag indicating modifying operation? */ 2883 if ((error = nlookup(nd)) != 0) 2884 return (error); 2885 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2886 return (error); 2887 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2888 return (error); 2889 error = setutimes(vp, ts, tptr == NULL); 2890 vrele(vp); 2891 return (error); 2892 } 2893 2894 /* 2895 * utimes_args(char *path, struct timeval *tptr) 2896 * 2897 * Set the access and modification times of a file. 2898 */ 2899 int 2900 sys_utimes(struct utimes_args *uap) 2901 { 2902 struct timeval tv[2]; 2903 struct nlookupdata nd; 2904 int error; 2905 2906 if (uap->tptr) { 2907 error = copyin(uap->tptr, tv, sizeof(tv)); 2908 if (error) 2909 return (error); 2910 } 2911 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2912 if (error == 0) 2913 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2914 nlookup_done(&nd); 2915 return (error); 2916 } 2917 2918 /* 2919 * lutimes_args(char *path, struct timeval *tptr) 2920 * 2921 * Set the access and modification times of a file. 2922 */ 2923 int 2924 sys_lutimes(struct lutimes_args *uap) 2925 { 2926 struct timeval tv[2]; 2927 struct nlookupdata nd; 2928 int error; 2929 2930 if (uap->tptr) { 2931 error = copyin(uap->tptr, tv, sizeof(tv)); 2932 if (error) 2933 return (error); 2934 } 2935 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2936 if (error == 0) 2937 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2938 nlookup_done(&nd); 2939 return (error); 2940 } 2941 2942 int 2943 kern_futimes(int fd, struct timeval *tptr) 2944 { 2945 struct thread *td = curthread; 2946 struct proc *p = td->td_proc; 2947 struct timespec ts[2]; 2948 struct file *fp; 2949 int error; 2950 2951 error = getutimes(tptr, ts); 2952 if (error) 2953 return (error); 2954 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 2955 return (error); 2956 if (fp->f_nchandle.ncp) 2957 error = ncp_writechk(&fp->f_nchandle); 2958 if (error == 0) 2959 error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL); 2960 fdrop(fp); 2961 return (error); 2962 } 2963 2964 /* 2965 * futimes_args(int fd, struct timeval *tptr) 2966 * 2967 * Set the access and modification times of a file. 2968 */ 2969 int 2970 sys_futimes(struct futimes_args *uap) 2971 { 2972 struct timeval tv[2]; 2973 int error; 2974 2975 if (uap->tptr) { 2976 error = copyin(uap->tptr, tv, sizeof(tv)); 2977 if (error) 2978 return (error); 2979 } 2980 2981 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 2982 2983 return (error); 2984 } 2985 2986 int 2987 kern_truncate(struct nlookupdata *nd, off_t length) 2988 { 2989 struct vnode *vp; 2990 struct vattr vattr; 2991 int error; 2992 2993 if (length < 0) 2994 return(EINVAL); 2995 /* XXX Add NLC flag indicating modifying operation? */ 2996 if ((error = nlookup(nd)) != 0) 2997 return (error); 2998 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2999 return (error); 3000 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3001 return (error); 3002 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 3003 vrele(vp); 3004 return (error); 3005 } 3006 if (vp->v_type == VDIR) { 3007 error = EISDIR; 3008 } else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0 && 3009 (error = VOP_ACCESS(vp, VWRITE, nd->nl_cred)) == 0) { 3010 VATTR_NULL(&vattr); 3011 vattr.va_size = length; 3012 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3013 } 3014 vput(vp); 3015 return (error); 3016 } 3017 3018 /* 3019 * truncate(char *path, int pad, off_t length) 3020 * 3021 * Truncate a file given its path name. 3022 */ 3023 int 3024 sys_truncate(struct truncate_args *uap) 3025 { 3026 struct nlookupdata nd; 3027 int error; 3028 3029 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3030 if (error == 0) 3031 error = kern_truncate(&nd, uap->length); 3032 nlookup_done(&nd); 3033 return error; 3034 } 3035 3036 int 3037 kern_ftruncate(int fd, off_t length) 3038 { 3039 struct thread *td = curthread; 3040 struct proc *p = td->td_proc; 3041 struct vattr vattr; 3042 struct vnode *vp; 3043 struct file *fp; 3044 int error; 3045 3046 if (length < 0) 3047 return(EINVAL); 3048 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3049 return (error); 3050 if (fp->f_nchandle.ncp) { 3051 error = ncp_writechk(&fp->f_nchandle); 3052 if (error) 3053 goto done; 3054 } 3055 if ((fp->f_flag & FWRITE) == 0) { 3056 error = EINVAL; 3057 goto done; 3058 } 3059 vp = (struct vnode *)fp->f_data; 3060 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3061 if (vp->v_type == VDIR) { 3062 error = EISDIR; 3063 } else if ((error = vn_writechk(vp, NULL)) == 0) { 3064 VATTR_NULL(&vattr); 3065 vattr.va_size = length; 3066 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3067 } 3068 vn_unlock(vp); 3069 done: 3070 fdrop(fp); 3071 return (error); 3072 } 3073 3074 /* 3075 * ftruncate_args(int fd, int pad, off_t length) 3076 * 3077 * Truncate a file given a file descriptor. 3078 */ 3079 int 3080 sys_ftruncate(struct ftruncate_args *uap) 3081 { 3082 int error; 3083 3084 error = kern_ftruncate(uap->fd, uap->length); 3085 3086 return (error); 3087 } 3088 3089 /* 3090 * fsync(int fd) 3091 * 3092 * Sync an open file. 3093 */ 3094 /* ARGSUSED */ 3095 int 3096 sys_fsync(struct fsync_args *uap) 3097 { 3098 struct thread *td = curthread; 3099 struct proc *p = td->td_proc; 3100 struct vnode *vp; 3101 struct file *fp; 3102 vm_object_t obj; 3103 int error; 3104 3105 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3106 return (error); 3107 vp = (struct vnode *)fp->f_data; 3108 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3109 if ((obj = vp->v_object) != NULL) 3110 vm_object_page_clean(obj, 0, 0, 0); 3111 if ((error = VOP_FSYNC(vp, MNT_WAIT)) == 0 && vp->v_mount) 3112 error = buf_fsync(vp); 3113 vn_unlock(vp); 3114 fdrop(fp); 3115 return (error); 3116 } 3117 3118 int 3119 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3120 { 3121 struct nchandle fnchd; 3122 struct nchandle tnchd; 3123 struct namecache *ncp; 3124 struct vnode *fdvp; 3125 struct vnode *tdvp; 3126 struct mount *mp; 3127 int error; 3128 3129 bwillinode(1); 3130 fromnd->nl_flags |= NLC_REFDVP; 3131 if ((error = nlookup(fromnd)) != 0) 3132 return (error); 3133 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3134 return (ENOENT); 3135 fnchd.mount = fromnd->nl_nch.mount; 3136 cache_hold(&fnchd); 3137 3138 /* 3139 * unlock the source nch so we can lookup the target nch without 3140 * deadlocking. The target may or may not exist so we do not check 3141 * for a target vp like kern_mkdir() and other creation functions do. 3142 * 3143 * The source and target directories are ref'd and rechecked after 3144 * everything is relocked to determine if the source or target file 3145 * has been renamed. 3146 */ 3147 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3148 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3149 cache_unlock(&fromnd->nl_nch); 3150 3151 tond->nl_flags |= NLC_CREATE | NLC_REFDVP; 3152 if ((error = nlookup(tond)) != 0) { 3153 cache_drop(&fnchd); 3154 return (error); 3155 } 3156 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3157 cache_drop(&fnchd); 3158 return (ENOENT); 3159 } 3160 tnchd.mount = tond->nl_nch.mount; 3161 cache_hold(&tnchd); 3162 3163 /* 3164 * If the source and target are the same there is nothing to do 3165 */ 3166 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3167 cache_drop(&fnchd); 3168 cache_drop(&tnchd); 3169 return (0); 3170 } 3171 3172 /* 3173 * Mount points cannot be renamed or overwritten 3174 */ 3175 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3176 NCF_ISMOUNTPT 3177 ) { 3178 cache_drop(&fnchd); 3179 cache_drop(&tnchd); 3180 return (EINVAL); 3181 } 3182 3183 /* 3184 * relock the source ncp. NOTE AFTER RELOCKING: the source ncp 3185 * may have become invalid while it was unlocked, nc_vp and nc_mount 3186 * could be NULL. 3187 */ 3188 if (cache_lock_nonblock(&fromnd->nl_nch) == 0) { 3189 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3190 } else if (fromnd->nl_nch.ncp > tond->nl_nch.ncp) { 3191 cache_lock(&fromnd->nl_nch); 3192 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3193 } else { 3194 cache_unlock(&tond->nl_nch); 3195 cache_lock(&fromnd->nl_nch); 3196 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3197 cache_lock(&tond->nl_nch); 3198 cache_resolve(&tond->nl_nch, tond->nl_cred); 3199 } 3200 fromnd->nl_flags |= NLC_NCPISLOCKED; 3201 3202 /* 3203 * make sure the parent directories linkages are the same 3204 */ 3205 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3206 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3207 cache_drop(&fnchd); 3208 cache_drop(&tnchd); 3209 return (ENOENT); 3210 } 3211 3212 /* 3213 * Both the source and target must be within the same filesystem and 3214 * in the same filesystem as their parent directories within the 3215 * namecache topology. 3216 * 3217 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3218 */ 3219 mp = fnchd.mount; 3220 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3221 mp != tond->nl_nch.mount) { 3222 cache_drop(&fnchd); 3223 cache_drop(&tnchd); 3224 return (EXDEV); 3225 } 3226 3227 /* 3228 * Make sure the mount point is writable 3229 */ 3230 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3231 cache_drop(&fnchd); 3232 cache_drop(&tnchd); 3233 return (error); 3234 } 3235 3236 /* 3237 * If the target exists and either the source or target is a directory, 3238 * then both must be directories. 3239 * 3240 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3241 * have become NULL. 3242 */ 3243 if (tond->nl_nch.ncp->nc_vp) { 3244 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3245 error = ENOENT; 3246 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3247 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3248 error = ENOTDIR; 3249 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3250 error = EISDIR; 3251 } 3252 } 3253 3254 /* 3255 * You cannot rename a source into itself or a subdirectory of itself. 3256 * We check this by travsersing the target directory upwards looking 3257 * for a match against the source. 3258 */ 3259 if (error == 0) { 3260 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3261 if (fromnd->nl_nch.ncp == ncp) { 3262 error = EINVAL; 3263 break; 3264 } 3265 } 3266 } 3267 3268 cache_drop(&fnchd); 3269 cache_drop(&tnchd); 3270 3271 /* 3272 * Even though the namespaces are different, they may still represent 3273 * hardlinks to the same file. The filesystem might have a hard time 3274 * with this so we issue a NREMOVE of the source instead of a NRENAME 3275 * when we detect the situation. 3276 */ 3277 if (error == 0) { 3278 fdvp = fromnd->nl_dvp; 3279 tdvp = tond->nl_dvp; 3280 if (fdvp == NULL || tdvp == NULL) { 3281 error = EPERM; 3282 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3283 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3284 fromnd->nl_cred); 3285 } else { 3286 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3287 fdvp, tdvp, tond->nl_cred); 3288 } 3289 } 3290 return (error); 3291 } 3292 3293 /* 3294 * rename_args(char *from, char *to) 3295 * 3296 * Rename files. Source and destination must either both be directories, 3297 * or both not be directories. If target is a directory, it must be empty. 3298 */ 3299 int 3300 sys_rename(struct rename_args *uap) 3301 { 3302 struct nlookupdata fromnd, tond; 3303 int error; 3304 3305 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3306 if (error == 0) { 3307 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3308 if (error == 0) 3309 error = kern_rename(&fromnd, &tond); 3310 nlookup_done(&tond); 3311 } 3312 nlookup_done(&fromnd); 3313 return (error); 3314 } 3315 3316 int 3317 kern_mkdir(struct nlookupdata *nd, int mode) 3318 { 3319 struct thread *td = curthread; 3320 struct proc *p = td->td_proc; 3321 struct vnode *vp; 3322 struct vattr vattr; 3323 int error; 3324 3325 bwillinode(1); 3326 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3327 if ((error = nlookup(nd)) != 0) 3328 return (error); 3329 3330 if (nd->nl_nch.ncp->nc_vp) 3331 return (EEXIST); 3332 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3333 return (error); 3334 VATTR_NULL(&vattr); 3335 vattr.va_type = VDIR; 3336 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3337 3338 vp = NULL; 3339 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, p->p_ucred, &vattr); 3340 if (error == 0) 3341 vput(vp); 3342 return (error); 3343 } 3344 3345 /* 3346 * mkdir_args(char *path, int mode) 3347 * 3348 * Make a directory file. 3349 */ 3350 /* ARGSUSED */ 3351 int 3352 sys_mkdir(struct mkdir_args *uap) 3353 { 3354 struct nlookupdata nd; 3355 int error; 3356 3357 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3358 if (error == 0) 3359 error = kern_mkdir(&nd, uap->mode); 3360 nlookup_done(&nd); 3361 return (error); 3362 } 3363 3364 int 3365 kern_rmdir(struct nlookupdata *nd) 3366 { 3367 int error; 3368 3369 bwillinode(1); 3370 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 3371 if ((error = nlookup(nd)) != 0) 3372 return (error); 3373 3374 /* 3375 * Do not allow directories representing mount points to be 3376 * deleted, even if empty. Check write perms on mount point 3377 * in case the vnode is aliased (aka nullfs). 3378 */ 3379 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 3380 return (EINVAL); 3381 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3382 return (error); 3383 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 3384 return (error); 3385 } 3386 3387 /* 3388 * rmdir_args(char *path) 3389 * 3390 * Remove a directory file. 3391 */ 3392 /* ARGSUSED */ 3393 int 3394 sys_rmdir(struct rmdir_args *uap) 3395 { 3396 struct nlookupdata nd; 3397 int error; 3398 3399 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3400 if (error == 0) 3401 error = kern_rmdir(&nd); 3402 nlookup_done(&nd); 3403 return (error); 3404 } 3405 3406 int 3407 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 3408 enum uio_seg direction) 3409 { 3410 struct thread *td = curthread; 3411 struct proc *p = td->td_proc; 3412 struct vnode *vp; 3413 struct file *fp; 3414 struct uio auio; 3415 struct iovec aiov; 3416 off_t loff; 3417 int error, eofflag; 3418 3419 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3420 return (error); 3421 if ((fp->f_flag & FREAD) == 0) { 3422 error = EBADF; 3423 goto done; 3424 } 3425 vp = (struct vnode *)fp->f_data; 3426 unionread: 3427 if (vp->v_type != VDIR) { 3428 error = EINVAL; 3429 goto done; 3430 } 3431 aiov.iov_base = buf; 3432 aiov.iov_len = count; 3433 auio.uio_iov = &aiov; 3434 auio.uio_iovcnt = 1; 3435 auio.uio_rw = UIO_READ; 3436 auio.uio_segflg = direction; 3437 auio.uio_td = td; 3438 auio.uio_resid = count; 3439 loff = auio.uio_offset = fp->f_offset; 3440 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 3441 fp->f_offset = auio.uio_offset; 3442 if (error) 3443 goto done; 3444 if (count == auio.uio_resid) { 3445 if (union_dircheckp) { 3446 error = union_dircheckp(td, &vp, fp); 3447 if (error == -1) 3448 goto unionread; 3449 if (error) 3450 goto done; 3451 } 3452 #if 0 3453 if ((vp->v_flag & VROOT) && 3454 (vp->v_mount->mnt_flag & MNT_UNION)) { 3455 struct vnode *tvp = vp; 3456 vp = vp->v_mount->mnt_vnodecovered; 3457 vref(vp); 3458 fp->f_data = vp; 3459 fp->f_offset = 0; 3460 vrele(tvp); 3461 goto unionread; 3462 } 3463 #endif 3464 } 3465 3466 /* 3467 * WARNING! *basep may not be wide enough to accomodate the 3468 * seek offset. XXX should we hack this to return the upper 32 bits 3469 * for offsets greater then 4G? 3470 */ 3471 if (basep) { 3472 *basep = (long)loff; 3473 } 3474 *res = count - auio.uio_resid; 3475 done: 3476 fdrop(fp); 3477 return (error); 3478 } 3479 3480 /* 3481 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 3482 * 3483 * Read a block of directory entries in a file system independent format. 3484 */ 3485 int 3486 sys_getdirentries(struct getdirentries_args *uap) 3487 { 3488 long base; 3489 int error; 3490 3491 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 3492 &uap->sysmsg_result, UIO_USERSPACE); 3493 3494 if (error == 0 && uap->basep) 3495 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 3496 return (error); 3497 } 3498 3499 /* 3500 * getdents_args(int fd, char *buf, size_t count) 3501 */ 3502 int 3503 sys_getdents(struct getdents_args *uap) 3504 { 3505 int error; 3506 3507 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 3508 &uap->sysmsg_result, UIO_USERSPACE); 3509 3510 return (error); 3511 } 3512 3513 /* 3514 * umask(int newmask) 3515 * 3516 * Set the mode mask for creation of filesystem nodes. 3517 * 3518 * MP SAFE 3519 */ 3520 int 3521 sys_umask(struct umask_args *uap) 3522 { 3523 struct thread *td = curthread; 3524 struct proc *p = td->td_proc; 3525 struct filedesc *fdp; 3526 3527 fdp = p->p_fd; 3528 uap->sysmsg_result = fdp->fd_cmask; 3529 fdp->fd_cmask = uap->newmask & ALLPERMS; 3530 return (0); 3531 } 3532 3533 /* 3534 * revoke(char *path) 3535 * 3536 * Void all references to file by ripping underlying filesystem 3537 * away from vnode. 3538 */ 3539 /* ARGSUSED */ 3540 int 3541 sys_revoke(struct revoke_args *uap) 3542 { 3543 struct nlookupdata nd; 3544 struct vattr vattr; 3545 struct vnode *vp; 3546 struct ucred *cred; 3547 int error; 3548 3549 vp = NULL; 3550 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3551 if (error == 0) 3552 error = nlookup(&nd); 3553 if (error == 0) 3554 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3555 cred = crhold(nd.nl_cred); 3556 nlookup_done(&nd); 3557 if (error == 0) { 3558 if (vp->v_type != VCHR && vp->v_type != VBLK) 3559 error = EINVAL; 3560 if (error == 0) 3561 error = VOP_GETATTR(vp, &vattr); 3562 if (error == 0 && cred->cr_uid != vattr.va_uid) 3563 error = suser_cred(cred, PRISON_ROOT); 3564 if (error == 0 && count_udev(vp->v_umajor, vp->v_uminor) > 0) { 3565 error = 0; 3566 vx_lock(vp); 3567 VOP_REVOKE(vp, REVOKEALL); 3568 vx_unlock(vp); 3569 } 3570 vrele(vp); 3571 } 3572 if (cred) 3573 crfree(cred); 3574 return (error); 3575 } 3576 3577 /* 3578 * getfh_args(char *fname, fhandle_t *fhp) 3579 * 3580 * Get (NFS) file handle 3581 * 3582 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 3583 * mount. This allows nullfs mounts to be explicitly exported. 3584 * 3585 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 3586 * 3587 * nullfs mounts of subdirectories are not safe. That is, it will 3588 * work, but you do not really have protection against access to 3589 * the related parent directories. 3590 */ 3591 int 3592 sys_getfh(struct getfh_args *uap) 3593 { 3594 struct thread *td = curthread; 3595 struct nlookupdata nd; 3596 fhandle_t fh; 3597 struct vnode *vp; 3598 struct mount *mp; 3599 int error; 3600 3601 /* 3602 * Must be super user 3603 */ 3604 if ((error = suser(td)) != 0) 3605 return (error); 3606 3607 vp = NULL; 3608 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 3609 if (error == 0) 3610 error = nlookup(&nd); 3611 if (error == 0) 3612 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3613 mp = nd.nl_nch.mount; 3614 nlookup_done(&nd); 3615 if (error == 0) { 3616 bzero(&fh, sizeof(fh)); 3617 fh.fh_fsid = mp->mnt_stat.f_fsid; 3618 error = VFS_VPTOFH(vp, &fh.fh_fid); 3619 vput(vp); 3620 if (error == 0) 3621 error = copyout(&fh, uap->fhp, sizeof(fh)); 3622 } 3623 return (error); 3624 } 3625 3626 /* 3627 * fhopen_args(const struct fhandle *u_fhp, int flags) 3628 * 3629 * syscall for the rpc.lockd to use to translate a NFS file handle into 3630 * an open descriptor. 3631 * 3632 * warning: do not remove the suser() call or this becomes one giant 3633 * security hole. 3634 */ 3635 int 3636 sys_fhopen(struct fhopen_args *uap) 3637 { 3638 struct thread *td = curthread; 3639 struct proc *p = td->td_proc; 3640 struct mount *mp; 3641 struct vnode *vp; 3642 struct fhandle fhp; 3643 struct vattr vat; 3644 struct vattr *vap = &vat; 3645 struct flock lf; 3646 int fmode, mode, error, type; 3647 struct file *nfp; 3648 struct file *fp; 3649 int indx; 3650 3651 /* 3652 * Must be super user 3653 */ 3654 error = suser(td); 3655 if (error) 3656 return (error); 3657 3658 fmode = FFLAGS(uap->flags); 3659 /* why not allow a non-read/write open for our lockd? */ 3660 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3661 return (EINVAL); 3662 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3663 if (error) 3664 return(error); 3665 /* find the mount point */ 3666 mp = vfs_getvfs(&fhp.fh_fsid); 3667 if (mp == NULL) 3668 return (ESTALE); 3669 /* now give me my vnode, it gets returned to me locked */ 3670 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 3671 if (error) 3672 return (error); 3673 /* 3674 * from now on we have to make sure not 3675 * to forget about the vnode 3676 * any error that causes an abort must vput(vp) 3677 * just set error = err and 'goto bad;'. 3678 */ 3679 3680 /* 3681 * from vn_open 3682 */ 3683 if (vp->v_type == VLNK) { 3684 error = EMLINK; 3685 goto bad; 3686 } 3687 if (vp->v_type == VSOCK) { 3688 error = EOPNOTSUPP; 3689 goto bad; 3690 } 3691 mode = 0; 3692 if (fmode & (FWRITE | O_TRUNC)) { 3693 if (vp->v_type == VDIR) { 3694 error = EISDIR; 3695 goto bad; 3696 } 3697 error = vn_writechk(vp, NULL); 3698 if (error) 3699 goto bad; 3700 mode |= VWRITE; 3701 } 3702 if (fmode & FREAD) 3703 mode |= VREAD; 3704 if (mode) { 3705 error = VOP_ACCESS(vp, mode, p->p_ucred); 3706 if (error) 3707 goto bad; 3708 } 3709 if (fmode & O_TRUNC) { 3710 vn_unlock(vp); /* XXX */ 3711 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 3712 VATTR_NULL(vap); 3713 vap->va_size = 0; 3714 error = VOP_SETATTR(vp, vap, p->p_ucred); 3715 if (error) 3716 goto bad; 3717 } 3718 3719 /* 3720 * VOP_OPEN needs the file pointer so it can potentially override 3721 * it. 3722 * 3723 * WARNING! no f_nchandle will be associated when fhopen()ing a 3724 * directory. XXX 3725 */ 3726 if ((error = falloc(p, &nfp, &indx)) != 0) 3727 goto bad; 3728 fp = nfp; 3729 3730 error = VOP_OPEN(vp, fmode, p->p_ucred, fp); 3731 if (error) { 3732 /* 3733 * setting f_ops this way prevents VOP_CLOSE from being 3734 * called or fdrop() releasing the vp from v_data. Since 3735 * the VOP_OPEN failed we don't want to VOP_CLOSE. 3736 */ 3737 fp->f_ops = &badfileops; 3738 fp->f_data = NULL; 3739 goto bad_drop; 3740 } 3741 3742 /* 3743 * The fp is given its own reference, we still have our ref and lock. 3744 * 3745 * Assert that all regular files must be created with a VM object. 3746 */ 3747 if (vp->v_type == VREG && vp->v_object == NULL) { 3748 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 3749 goto bad_drop; 3750 } 3751 3752 /* 3753 * The open was successful. Handle any locking requirements. 3754 */ 3755 if (fmode & (O_EXLOCK | O_SHLOCK)) { 3756 lf.l_whence = SEEK_SET; 3757 lf.l_start = 0; 3758 lf.l_len = 0; 3759 if (fmode & O_EXLOCK) 3760 lf.l_type = F_WRLCK; 3761 else 3762 lf.l_type = F_RDLCK; 3763 if (fmode & FNONBLOCK) 3764 type = 0; 3765 else 3766 type = F_WAIT; 3767 vn_unlock(vp); 3768 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 3769 /* 3770 * release our private reference. 3771 */ 3772 fsetfd(p, NULL, indx); 3773 fdrop(fp); 3774 vrele(vp); 3775 return (error); 3776 } 3777 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3778 fp->f_flag |= FHASLOCK; 3779 } 3780 3781 /* 3782 * Clean up. Associate the file pointer with the previously 3783 * reserved descriptor and return it. 3784 */ 3785 vput(vp); 3786 fsetfd(p, fp, indx); 3787 fdrop(fp); 3788 uap->sysmsg_result = indx; 3789 return (0); 3790 3791 bad_drop: 3792 fsetfd(p, NULL, indx); 3793 fdrop(fp); 3794 bad: 3795 vput(vp); 3796 return (error); 3797 } 3798 3799 /* 3800 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 3801 */ 3802 int 3803 sys_fhstat(struct fhstat_args *uap) 3804 { 3805 struct thread *td = curthread; 3806 struct stat sb; 3807 fhandle_t fh; 3808 struct mount *mp; 3809 struct vnode *vp; 3810 int error; 3811 3812 /* 3813 * Must be super user 3814 */ 3815 error = suser(td); 3816 if (error) 3817 return (error); 3818 3819 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 3820 if (error) 3821 return (error); 3822 3823 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3824 return (ESTALE); 3825 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 3826 return (error); 3827 error = vn_stat(vp, &sb, td->td_proc->p_ucred); 3828 vput(vp); 3829 if (error) 3830 return (error); 3831 error = copyout(&sb, uap->sb, sizeof(sb)); 3832 return (error); 3833 } 3834 3835 /* 3836 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 3837 */ 3838 int 3839 sys_fhstatfs(struct fhstatfs_args *uap) 3840 { 3841 struct thread *td = curthread; 3842 struct proc *p = td->td_proc; 3843 struct statfs *sp; 3844 struct mount *mp; 3845 struct vnode *vp; 3846 struct statfs sb; 3847 char *fullpath, *freepath; 3848 fhandle_t fh; 3849 int error; 3850 3851 /* 3852 * Must be super user 3853 */ 3854 if ((error = suser(td))) 3855 return (error); 3856 3857 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3858 return (error); 3859 3860 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3861 return (ESTALE); 3862 3863 if (p != NULL && !chroot_visible_mnt(mp, p)) 3864 return (ESTALE); 3865 3866 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 3867 return (error); 3868 mp = vp->v_mount; 3869 sp = &mp->mnt_stat; 3870 vput(vp); 3871 if ((error = VFS_STATFS(mp, sp, p->p_ucred)) != 0) 3872 return (error); 3873 3874 error = mount_path(p, mp, &fullpath, &freepath); 3875 if (error) 3876 return(error); 3877 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3878 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 3879 kfree(freepath, M_TEMP); 3880 3881 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 3882 if (suser(td)) { 3883 bcopy(sp, &sb, sizeof(sb)); 3884 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 3885 sp = &sb; 3886 } 3887 return (copyout(sp, uap->buf, sizeof(*sp))); 3888 } 3889 3890 /* 3891 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 3892 */ 3893 int 3894 sys_fhstatvfs(struct fhstatvfs_args *uap) 3895 { 3896 struct thread *td = curthread; 3897 struct proc *p = td->td_proc; 3898 struct statvfs *sp; 3899 struct mount *mp; 3900 struct vnode *vp; 3901 fhandle_t fh; 3902 int error; 3903 3904 /* 3905 * Must be super user 3906 */ 3907 if ((error = suser(td))) 3908 return (error); 3909 3910 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3911 return (error); 3912 3913 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3914 return (ESTALE); 3915 3916 if (p != NULL && !chroot_visible_mnt(mp, p)) 3917 return (ESTALE); 3918 3919 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 3920 return (error); 3921 mp = vp->v_mount; 3922 sp = &mp->mnt_vstat; 3923 vput(vp); 3924 if ((error = VFS_STATVFS(mp, sp, p->p_ucred)) != 0) 3925 return (error); 3926 3927 sp->f_flag = 0; 3928 if (mp->mnt_flag & MNT_RDONLY) 3929 sp->f_flag |= ST_RDONLY; 3930 if (mp->mnt_flag & MNT_NOSUID) 3931 sp->f_flag |= ST_NOSUID; 3932 3933 return (copyout(sp, uap->buf, sizeof(*sp))); 3934 } 3935 3936 3937 /* 3938 * Syscall to push extended attribute configuration information into the 3939 * VFS. Accepts a path, which it converts to a mountpoint, as well as 3940 * a command (int cmd), and attribute name and misc data. For now, the 3941 * attribute name is left in userspace for consumption by the VFS_op. 3942 * It will probably be changed to be copied into sysspace by the 3943 * syscall in the future, once issues with various consumers of the 3944 * attribute code have raised their hands. 3945 * 3946 * Currently this is used only by UFS Extended Attributes. 3947 */ 3948 int 3949 sys_extattrctl(struct extattrctl_args *uap) 3950 { 3951 struct nlookupdata nd; 3952 struct mount *mp; 3953 struct vnode *vp; 3954 int error; 3955 3956 vp = NULL; 3957 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3958 if (error == 0) 3959 error = nlookup(&nd); 3960 if (error == 0) { 3961 mp = nd.nl_nch.mount; 3962 error = VFS_EXTATTRCTL(mp, uap->cmd, 3963 uap->attrname, uap->arg, 3964 nd.nl_cred); 3965 } 3966 nlookup_done(&nd); 3967 return (error); 3968 } 3969 3970 /* 3971 * Syscall to set a named extended attribute on a file or directory. 3972 * Accepts attribute name, and a uio structure pointing to the data to set. 3973 * The uio is consumed in the style of writev(). The real work happens 3974 * in VOP_SETEXTATTR(). 3975 */ 3976 int 3977 sys_extattr_set_file(struct extattr_set_file_args *uap) 3978 { 3979 char attrname[EXTATTR_MAXNAMELEN]; 3980 struct iovec aiov[UIO_SMALLIOV]; 3981 struct iovec *needfree; 3982 struct nlookupdata nd; 3983 struct iovec *iov; 3984 struct vnode *vp; 3985 struct uio auio; 3986 u_int iovlen; 3987 u_int cnt; 3988 int error; 3989 int i; 3990 3991 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3992 if (error) 3993 return (error); 3994 3995 vp = NULL; 3996 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3997 if (error == 0) 3998 error = nlookup(&nd); 3999 if (error == 0) 4000 error = ncp_writechk(&nd.nl_nch); 4001 if (error == 0) 4002 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4003 if (error) { 4004 nlookup_done(&nd); 4005 return (error); 4006 } 4007 4008 needfree = NULL; 4009 iovlen = uap->iovcnt * sizeof(struct iovec); 4010 if (uap->iovcnt > UIO_SMALLIOV) { 4011 if (uap->iovcnt > UIO_MAXIOV) { 4012 error = EINVAL; 4013 goto done; 4014 } 4015 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 4016 needfree = iov; 4017 } else { 4018 iov = aiov; 4019 } 4020 auio.uio_iov = iov; 4021 auio.uio_iovcnt = uap->iovcnt; 4022 auio.uio_rw = UIO_WRITE; 4023 auio.uio_segflg = UIO_USERSPACE; 4024 auio.uio_td = nd.nl_td; 4025 auio.uio_offset = 0; 4026 if ((error = copyin(uap->iovp, iov, iovlen))) 4027 goto done; 4028 auio.uio_resid = 0; 4029 for (i = 0; i < uap->iovcnt; i++) { 4030 if (iov->iov_len > INT_MAX - auio.uio_resid) { 4031 error = EINVAL; 4032 goto done; 4033 } 4034 auio.uio_resid += iov->iov_len; 4035 iov++; 4036 } 4037 cnt = auio.uio_resid; 4038 error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred); 4039 cnt -= auio.uio_resid; 4040 uap->sysmsg_result = cnt; 4041 done: 4042 vput(vp); 4043 nlookup_done(&nd); 4044 if (needfree) 4045 FREE(needfree, M_IOV); 4046 return (error); 4047 } 4048 4049 /* 4050 * Syscall to get a named extended attribute on a file or directory. 4051 * Accepts attribute name, and a uio structure pointing to a buffer for the 4052 * data. The uio is consumed in the style of readv(). The real work 4053 * happens in VOP_GETEXTATTR(); 4054 */ 4055 int 4056 sys_extattr_get_file(struct extattr_get_file_args *uap) 4057 { 4058 char attrname[EXTATTR_MAXNAMELEN]; 4059 struct iovec aiov[UIO_SMALLIOV]; 4060 struct iovec *needfree; 4061 struct nlookupdata nd; 4062 struct iovec *iov; 4063 struct vnode *vp; 4064 struct uio auio; 4065 u_int iovlen; 4066 u_int cnt; 4067 int error; 4068 int i; 4069 4070 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4071 if (error) 4072 return (error); 4073 4074 vp = NULL; 4075 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4076 if (error == 0) 4077 error = nlookup(&nd); 4078 if (error == 0) 4079 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4080 if (error) { 4081 nlookup_done(&nd); 4082 return (error); 4083 } 4084 4085 iovlen = uap->iovcnt * sizeof (struct iovec); 4086 needfree = NULL; 4087 if (uap->iovcnt > UIO_SMALLIOV) { 4088 if (uap->iovcnt > UIO_MAXIOV) { 4089 error = EINVAL; 4090 goto done; 4091 } 4092 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 4093 needfree = iov; 4094 } else { 4095 iov = aiov; 4096 } 4097 auio.uio_iov = iov; 4098 auio.uio_iovcnt = uap->iovcnt; 4099 auio.uio_rw = UIO_READ; 4100 auio.uio_segflg = UIO_USERSPACE; 4101 auio.uio_td = nd.nl_td; 4102 auio.uio_offset = 0; 4103 if ((error = copyin(uap->iovp, iov, iovlen))) 4104 goto done; 4105 auio.uio_resid = 0; 4106 for (i = 0; i < uap->iovcnt; i++) { 4107 if (iov->iov_len > INT_MAX - auio.uio_resid) { 4108 error = EINVAL; 4109 goto done; 4110 } 4111 auio.uio_resid += iov->iov_len; 4112 iov++; 4113 } 4114 cnt = auio.uio_resid; 4115 error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred); 4116 cnt -= auio.uio_resid; 4117 uap->sysmsg_result = cnt; 4118 done: 4119 vput(vp); 4120 nlookup_done(&nd); 4121 if (needfree) 4122 FREE(needfree, M_IOV); 4123 return(error); 4124 } 4125 4126 /* 4127 * Syscall to delete a named extended attribute from a file or directory. 4128 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4129 */ 4130 int 4131 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4132 { 4133 char attrname[EXTATTR_MAXNAMELEN]; 4134 struct nlookupdata nd; 4135 struct vnode *vp; 4136 int error; 4137 4138 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4139 if (error) 4140 return(error); 4141 4142 vp = NULL; 4143 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4144 if (error == 0) 4145 error = nlookup(&nd); 4146 if (error == 0) 4147 error = ncp_writechk(&nd.nl_nch); 4148 if (error == 0) 4149 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4150 if (error) { 4151 nlookup_done(&nd); 4152 return (error); 4153 } 4154 4155 error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred); 4156 vput(vp); 4157 nlookup_done(&nd); 4158 return(error); 4159 } 4160 4161 /* 4162 * Determine if the mount is visible to the process. 4163 */ 4164 static int 4165 chroot_visible_mnt(struct mount *mp, struct proc *p) 4166 { 4167 struct nchandle nch; 4168 4169 /* 4170 * Traverse from the mount point upwards. If we hit the process 4171 * root then the mount point is visible to the process. 4172 */ 4173 nch = mp->mnt_ncmountpt; 4174 while (nch.ncp) { 4175 if (nch.mount == p->p_fd->fd_nrdir.mount && 4176 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4177 return(1); 4178 } 4179 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4180 nch = nch.mount->mnt_ncmounton; 4181 } else { 4182 nch.ncp = nch.ncp->nc_parent; 4183 } 4184 } 4185 4186 /* 4187 * If the mount point is not visible to the process, but the 4188 * process root is in a subdirectory of the mount, return 4189 * TRUE anyway. 4190 */ 4191 if (p->p_fd->fd_nrdir.mount == mp) 4192 return(1); 4193 4194 return(0); 4195 } 4196 4197