1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.121 2007/09/10 15:08:43 dillon Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/conf.h> 47 #include <sys/sysent.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mountctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/filedesc.h> 53 #include <sys/kernel.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/linker.h> 57 #include <sys/stat.h> 58 #include <sys/unistd.h> 59 #include <sys/vnode.h> 60 #include <sys/proc.h> 61 #include <sys/namei.h> 62 #include <sys/nlookup.h> 63 #include <sys/dirent.h> 64 #include <sys/extattr.h> 65 #include <sys/spinlock.h> 66 #include <sys/kern_syscall.h> 67 #include <sys/objcache.h> 68 #include <sys/sysctl.h> 69 #include <sys/file2.h> 70 #include <sys/spinlock2.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 76 #include <machine/limits.h> 77 #include <machine/stdarg.h> 78 79 #include <vfs/union/union.h> 80 81 static void mount_warning(struct mount *mp, const char *ctl, ...); 82 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 83 static int checkvp_chdir (struct vnode *vn, struct thread *td); 84 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 85 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 86 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 87 static int getutimes (const struct timeval *, struct timespec *); 88 static int setfown (struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, int); 91 static int setutimes (struct vnode *, const struct timespec *, int); 92 static int usermount = 0; /* if 1, non-root can mount fs. */ 93 94 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 95 96 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 97 98 /* 99 * Virtual File System System Calls 100 */ 101 102 /* 103 * Mount a file system. 104 */ 105 /* 106 * mount_args(char *type, char *path, int flags, caddr_t data) 107 */ 108 /* ARGSUSED */ 109 int 110 sys_mount(struct mount_args *uap) 111 { 112 struct thread *td = curthread; 113 struct proc *p = td->td_proc; 114 struct vnode *vp; 115 struct nchandle nch; 116 struct mount *mp; 117 struct vfsconf *vfsp; 118 int error, flag = 0, flag2 = 0; 119 int hasmount; 120 struct vattr va; 121 struct nlookupdata nd; 122 char fstypename[MFSNAMELEN]; 123 struct ucred *cred = p->p_ucred; 124 125 KKASSERT(p); 126 if (cred->cr_prison != NULL) 127 return (EPERM); 128 if (usermount == 0 && (error = suser(td))) 129 return (error); 130 /* 131 * Do not allow NFS export by non-root users. 132 */ 133 if (uap->flags & MNT_EXPORTED) { 134 error = suser(td); 135 if (error) 136 return (error); 137 } 138 /* 139 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 140 */ 141 if (suser(td)) 142 uap->flags |= MNT_NOSUID | MNT_NODEV; 143 144 /* 145 * Lookup the requested path and extract the nch and vnode. 146 */ 147 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 148 if (error == 0) { 149 if ((error = nlookup(&nd)) == 0) { 150 if (nd.nl_nch.ncp->nc_vp == NULL) 151 error = ENOENT; 152 } 153 } 154 if (error) { 155 nlookup_done(&nd); 156 return (error); 157 } 158 159 /* 160 * Extract the locked+refd ncp and cleanup the nd structure 161 */ 162 nch = nd.nl_nch; 163 cache_zero(&nd.nl_nch); 164 nlookup_done(&nd); 165 166 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch)) 167 hasmount = 1; 168 else 169 hasmount = 0; 170 171 172 /* 173 * now we have the locked ref'd nch and unreferenced vnode. 174 */ 175 vp = nch.ncp->nc_vp; 176 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 177 cache_put(&nch); 178 return (error); 179 } 180 cache_unlock(&nch); 181 182 /* 183 * Now we have an unlocked ref'd nch and a locked ref'd vp 184 */ 185 if (uap->flags & MNT_UPDATE) { 186 if ((vp->v_flag & VROOT) == 0) { 187 cache_drop(&nch); 188 vput(vp); 189 return (EINVAL); 190 } 191 mp = vp->v_mount; 192 flag = mp->mnt_flag; 193 flag2 = mp->mnt_kern_flag; 194 /* 195 * We only allow the filesystem to be reloaded if it 196 * is currently mounted read-only. 197 */ 198 if ((uap->flags & MNT_RELOAD) && 199 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 200 cache_drop(&nch); 201 vput(vp); 202 return (EOPNOTSUPP); /* Needs translation */ 203 } 204 /* 205 * Only root, or the user that did the original mount is 206 * permitted to update it. 207 */ 208 if (mp->mnt_stat.f_owner != cred->cr_uid && 209 (error = suser(td))) { 210 cache_drop(&nch); 211 vput(vp); 212 return (error); 213 } 214 if (vfs_busy(mp, LK_NOWAIT)) { 215 cache_drop(&nch); 216 vput(vp); 217 return (EBUSY); 218 } 219 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 220 cache_drop(&nch); 221 vfs_unbusy(mp); 222 vput(vp); 223 return (EBUSY); 224 } 225 vp->v_flag |= VMOUNT; 226 mp->mnt_flag |= 227 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 228 vn_unlock(vp); 229 goto update; 230 } 231 /* 232 * If the user is not root, ensure that they own the directory 233 * onto which we are attempting to mount. 234 */ 235 if ((error = VOP_GETATTR(vp, &va)) || 236 (va.va_uid != cred->cr_uid && (error = suser(td)))) { 237 cache_drop(&nch); 238 vput(vp); 239 return (error); 240 } 241 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 242 cache_drop(&nch); 243 vput(vp); 244 return (error); 245 } 246 if (vp->v_type != VDIR) { 247 cache_drop(&nch); 248 vput(vp); 249 return (ENOTDIR); 250 } 251 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 252 cache_drop(&nch); 253 vput(vp); 254 return (EPERM); 255 } 256 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 257 cache_drop(&nch); 258 vput(vp); 259 return (error); 260 } 261 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 262 if (!strcmp(vfsp->vfc_name, fstypename)) 263 break; 264 } 265 if (vfsp == NULL) { 266 linker_file_t lf; 267 268 /* Only load modules for root (very important!) */ 269 if ((error = suser(td)) != 0) { 270 cache_drop(&nch); 271 vput(vp); 272 return error; 273 } 274 error = linker_load_file(fstypename, &lf); 275 if (error || lf == NULL) { 276 cache_drop(&nch); 277 vput(vp); 278 if (lf == NULL) 279 error = ENODEV; 280 return error; 281 } 282 lf->userrefs++; 283 /* lookup again, see if the VFS was loaded */ 284 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 285 if (!strcmp(vfsp->vfc_name, fstypename)) 286 break; 287 } 288 if (vfsp == NULL) { 289 lf->userrefs--; 290 linker_file_unload(lf); 291 cache_drop(&nch); 292 vput(vp); 293 return (ENODEV); 294 } 295 } 296 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 297 cache_drop(&nch); 298 vput(vp); 299 return (EBUSY); 300 } 301 vp->v_flag |= VMOUNT; 302 303 /* 304 * Allocate and initialize the filesystem. 305 */ 306 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 307 TAILQ_INIT(&mp->mnt_nvnodelist); 308 TAILQ_INIT(&mp->mnt_reservedvnlist); 309 TAILQ_INIT(&mp->mnt_jlist); 310 mp->mnt_nvnodelistsize = 0; 311 lockinit(&mp->mnt_lock, "vfslock", 0, 0); 312 vfs_busy(mp, LK_NOWAIT); 313 mp->mnt_op = vfsp->vfc_vfsops; 314 mp->mnt_vfc = vfsp; 315 vfsp->vfc_refcount++; 316 mp->mnt_stat.f_type = vfsp->vfc_typenum; 317 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 318 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 319 mp->mnt_stat.f_owner = cred->cr_uid; 320 mp->mnt_iosize_max = DFLTPHYS; 321 vn_unlock(vp); 322 update: 323 /* 324 * Set the mount level flags. 325 */ 326 if (uap->flags & MNT_RDONLY) 327 mp->mnt_flag |= MNT_RDONLY; 328 else if (mp->mnt_flag & MNT_RDONLY) 329 mp->mnt_kern_flag |= MNTK_WANTRDWR; 330 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 331 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 332 MNT_NOSYMFOLLOW | MNT_IGNORE | 333 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 334 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 335 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 336 MNT_NOSYMFOLLOW | MNT_IGNORE | 337 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 338 /* 339 * Mount the filesystem. 340 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 341 * get. 342 */ 343 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 344 if (mp->mnt_flag & MNT_UPDATE) { 345 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 346 mp->mnt_flag &= ~MNT_RDONLY; 347 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 348 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 349 if (error) { 350 mp->mnt_flag = flag; 351 mp->mnt_kern_flag = flag2; 352 } 353 vfs_unbusy(mp); 354 vp->v_flag &= ~VMOUNT; 355 vrele(vp); 356 cache_drop(&nch); 357 return (error); 358 } 359 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 360 /* 361 * Put the new filesystem on the mount list after root. The mount 362 * point gets its own mnt_ncmountpt (unless the VFS already set one 363 * up) which represents the root of the mount. The lookup code 364 * detects the mount point going forward and checks the root of 365 * the mount going backwards. 366 * 367 * It is not necessary to invalidate or purge the vnode underneath 368 * because elements under the mount will be given their own glue 369 * namecache record. 370 */ 371 if (!error) { 372 if (mp->mnt_ncmountpt.ncp == NULL) { 373 /* 374 * allocate, then unlock, but leave the ref intact 375 */ 376 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 377 cache_unlock(&mp->mnt_ncmountpt); 378 } 379 mp->mnt_ncmounton = nch; /* inherits ref */ 380 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 381 382 /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ 383 vp->v_flag &= ~VMOUNT; 384 mountlist_insert(mp, MNTINS_LAST); 385 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 386 vn_unlock(vp); 387 error = vfs_allocate_syncvnode(mp); 388 vfs_unbusy(mp); 389 error = VFS_START(mp, 0); 390 vrele(vp); 391 } else { 392 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 393 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 394 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 395 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 396 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 397 vp->v_flag &= ~VMOUNT; 398 mp->mnt_vfc->vfc_refcount--; 399 vfs_unbusy(mp); 400 kfree(mp, M_MOUNT); 401 cache_drop(&nch); 402 vput(vp); 403 } 404 return (error); 405 } 406 407 /* 408 * Scan all active processes to see if any of them have a current 409 * or root directory onto which the new filesystem has just been 410 * mounted. If so, replace them with the new mount point. 411 * 412 * The passed ncp is ref'd and locked (from the mount code) and 413 * must be associated with the vnode representing the root of the 414 * mount point. 415 */ 416 struct checkdirs_info { 417 struct nchandle old_nch; 418 struct nchandle new_nch; 419 struct vnode *old_vp; 420 struct vnode *new_vp; 421 }; 422 423 static int checkdirs_callback(struct proc *p, void *data); 424 425 static void 426 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 427 { 428 struct checkdirs_info info; 429 struct vnode *olddp; 430 struct vnode *newdp; 431 struct mount *mp; 432 433 /* 434 * If the old mount point's vnode has a usecount of 1, it is not 435 * being held as a descriptor anywhere. 436 */ 437 olddp = old_nch->ncp->nc_vp; 438 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 439 return; 440 441 /* 442 * Force the root vnode of the new mount point to be resolved 443 * so we can update any matching processes. 444 */ 445 mp = new_nch->mount; 446 if (VFS_ROOT(mp, &newdp)) 447 panic("mount: lost mount"); 448 cache_setunresolved(new_nch); 449 cache_setvp(new_nch, newdp); 450 451 /* 452 * Special handling of the root node 453 */ 454 if (rootvnode == olddp) { 455 vref(newdp); 456 vfs_cache_setroot(newdp, cache_hold(new_nch)); 457 } 458 459 /* 460 * Pass newdp separately so the callback does not have to access 461 * it via new_nch->ncp->nc_vp. 462 */ 463 info.old_nch = *old_nch; 464 info.new_nch = *new_nch; 465 info.new_vp = newdp; 466 allproc_scan(checkdirs_callback, &info); 467 vput(newdp); 468 } 469 470 /* 471 * NOTE: callback is not MP safe because the scanned process's filedesc 472 * structure can be ripped out from under us, amoung other things. 473 */ 474 static int 475 checkdirs_callback(struct proc *p, void *data) 476 { 477 struct checkdirs_info *info = data; 478 struct filedesc *fdp; 479 struct nchandle ncdrop1; 480 struct nchandle ncdrop2; 481 struct vnode *vprele1; 482 struct vnode *vprele2; 483 484 if ((fdp = p->p_fd) != NULL) { 485 cache_zero(&ncdrop1); 486 cache_zero(&ncdrop2); 487 vprele1 = NULL; 488 vprele2 = NULL; 489 490 /* 491 * MPUNSAFE - XXX fdp can be pulled out from under a 492 * foreign process. 493 * 494 * A shared filedesc is ok, we don't have to copy it 495 * because we are making this change globally. 496 */ 497 spin_lock_wr(&fdp->fd_spin); 498 if (fdp->fd_ncdir.mount == info->old_nch.mount && 499 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 500 vprele1 = fdp->fd_cdir; 501 vref(info->new_vp); 502 fdp->fd_cdir = info->new_vp; 503 ncdrop1 = fdp->fd_ncdir; 504 cache_copy(&info->new_nch, &fdp->fd_ncdir); 505 } 506 if (fdp->fd_nrdir.mount == info->old_nch.mount && 507 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 508 vprele2 = fdp->fd_rdir; 509 vref(info->new_vp); 510 fdp->fd_rdir = info->new_vp; 511 ncdrop2 = fdp->fd_nrdir; 512 cache_copy(&info->new_nch, &fdp->fd_nrdir); 513 } 514 spin_unlock_wr(&fdp->fd_spin); 515 if (ncdrop1.ncp) 516 cache_drop(&ncdrop1); 517 if (ncdrop2.ncp) 518 cache_drop(&ncdrop2); 519 if (vprele1) 520 vrele(vprele1); 521 if (vprele2) 522 vrele(vprele2); 523 } 524 return(0); 525 } 526 527 /* 528 * Unmount a file system. 529 * 530 * Note: unmount takes a path to the vnode mounted on as argument, 531 * not special file (as before). 532 */ 533 /* 534 * umount_args(char *path, int flags) 535 */ 536 /* ARGSUSED */ 537 int 538 sys_unmount(struct unmount_args *uap) 539 { 540 struct thread *td = curthread; 541 struct proc *p = td->td_proc; 542 struct mount *mp = NULL; 543 int error; 544 struct nlookupdata nd; 545 546 KKASSERT(p); 547 if (p->p_ucred->cr_prison != NULL) 548 return (EPERM); 549 if (usermount == 0 && (error = suser(td))) 550 return (error); 551 552 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 553 if (error == 0) 554 error = nlookup(&nd); 555 if (error) 556 goto out; 557 558 mp = nd.nl_nch.mount; 559 560 /* 561 * Only root, or the user that did the original mount is 562 * permitted to unmount this filesystem. 563 */ 564 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && 565 (error = suser(td))) 566 goto out; 567 568 /* 569 * Don't allow unmounting the root file system. 570 */ 571 if (mp->mnt_flag & MNT_ROOTFS) { 572 error = EINVAL; 573 goto out; 574 } 575 576 /* 577 * Must be the root of the filesystem 578 */ 579 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 580 error = EINVAL; 581 goto out; 582 } 583 584 out: 585 nlookup_done(&nd); 586 if (error) 587 return (error); 588 return (dounmount(mp, uap->flags)); 589 } 590 591 /* 592 * Do the actual file system unmount. 593 */ 594 static int 595 dounmount_interlock(struct mount *mp) 596 { 597 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 598 return (EBUSY); 599 mp->mnt_kern_flag |= MNTK_UNMOUNT; 600 return(0); 601 } 602 603 int 604 dounmount(struct mount *mp, int flags) 605 { 606 struct namecache *ncp; 607 struct nchandle nch; 608 int error; 609 int async_flag; 610 int lflags; 611 int freeok = 1; 612 613 /* 614 * Exclusive access for unmounting purposes 615 */ 616 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 617 return (error); 618 619 /* 620 * Allow filesystems to detect that a forced unmount is in progress. 621 */ 622 if (flags & MNT_FORCE) 623 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 624 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 625 error = lockmgr(&mp->mnt_lock, lflags); 626 if (error) { 627 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 628 if (mp->mnt_kern_flag & MNTK_MWAIT) 629 wakeup(mp); 630 return (error); 631 } 632 633 if (mp->mnt_flag & MNT_EXPUBLIC) 634 vfs_setpublicfs(NULL, NULL, NULL); 635 636 vfs_msync(mp, MNT_WAIT); 637 async_flag = mp->mnt_flag & MNT_ASYNC; 638 mp->mnt_flag &=~ MNT_ASYNC; 639 640 /* 641 * If this filesystem isn't aliasing other filesystems, 642 * try to invalidate any remaining namecache entries and 643 * check the count afterwords. 644 */ 645 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 646 cache_lock(&mp->mnt_ncmountpt); 647 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 648 cache_unlock(&mp->mnt_ncmountpt); 649 650 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 651 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 652 653 if ((flags & MNT_FORCE) == 0) { 654 error = EBUSY; 655 mount_warning(mp, "Cannot unmount: " 656 "%d namecache " 657 "references still " 658 "present", 659 ncp->nc_refs - 1); 660 } else { 661 mount_warning(mp, "Forced unmount: " 662 "%d namecache " 663 "references still " 664 "present", 665 ncp->nc_refs - 1); 666 freeok = 0; 667 } 668 } 669 } 670 671 /* 672 * nchandle records ref the mount structure. Expect a count of 1 673 * (our mount->mnt_ncmountpt). 674 */ 675 if (mp->mnt_refs != 1) { 676 if ((flags & MNT_FORCE) == 0) { 677 mount_warning(mp, "Cannot unmount: " 678 "%d process references still " 679 "present", mp->mnt_refs); 680 error = EBUSY; 681 } else { 682 mount_warning(mp, "Forced unmount: " 683 "%d process references still " 684 "present", mp->mnt_refs); 685 freeok = 0; 686 } 687 } 688 689 if (error == 0) { 690 if (mp->mnt_syncer != NULL) 691 vrele(mp->mnt_syncer); 692 if (((mp->mnt_flag & MNT_RDONLY) || 693 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 694 (flags & MNT_FORCE)) { 695 error = VFS_UNMOUNT(mp, flags); 696 } 697 } 698 if (error) { 699 if (mp->mnt_syncer == NULL) 700 vfs_allocate_syncvnode(mp); 701 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 702 mp->mnt_flag |= async_flag; 703 lockmgr(&mp->mnt_lock, LK_RELEASE); 704 if (mp->mnt_kern_flag & MNTK_MWAIT) 705 wakeup(mp); 706 return (error); 707 } 708 /* 709 * Clean up any journals still associated with the mount after 710 * filesystem activity has ceased. 711 */ 712 journal_remove_all_journals(mp, 713 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 714 715 mountlist_remove(mp); 716 717 /* 718 * Remove any installed vnode ops here so the individual VFSs don't 719 * have to. 720 */ 721 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 722 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 723 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 724 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 725 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 726 727 if (mp->mnt_ncmountpt.ncp != NULL) { 728 nch = mp->mnt_ncmountpt; 729 cache_zero(&mp->mnt_ncmountpt); 730 cache_clrmountpt(&nch); 731 cache_drop(&nch); 732 } 733 if (mp->mnt_ncmounton.ncp != NULL) { 734 nch = mp->mnt_ncmounton; 735 cache_zero(&mp->mnt_ncmounton); 736 cache_clrmountpt(&nch); 737 cache_drop(&nch); 738 } 739 740 mp->mnt_vfc->vfc_refcount--; 741 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 742 panic("unmount: dangling vnode"); 743 lockmgr(&mp->mnt_lock, LK_RELEASE); 744 if (mp->mnt_kern_flag & MNTK_MWAIT) 745 wakeup(mp); 746 if (freeok) 747 kfree(mp, M_MOUNT); 748 return (0); 749 } 750 751 static 752 void 753 mount_warning(struct mount *mp, const char *ctl, ...) 754 { 755 char *ptr; 756 char *buf; 757 __va_list va; 758 759 __va_start(va, ctl); 760 if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf) == 0) { 761 kprintf("unmount(%s): ", ptr); 762 kvprintf(ctl, va); 763 kprintf("\n"); 764 kfree(buf, M_TEMP); 765 } else { 766 kprintf("unmount(%p", mp); 767 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 768 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 769 kprintf("): "); 770 kvprintf(ctl, va); 771 kprintf("\n"); 772 } 773 __va_end(va); 774 } 775 776 /* 777 * Shim cache_fullpath() to handle the case where a process is chrooted into 778 * a subdirectory of a mount. In this case if the root mount matches the 779 * process root directory's mount we have to specify the process's root 780 * directory instead of the mount point, because the mount point might 781 * be above the root directory. 782 */ 783 static 784 int 785 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 786 { 787 struct nchandle *nch; 788 789 if (p && p->p_fd->fd_nrdir.mount == mp) 790 nch = &p->p_fd->fd_nrdir; 791 else 792 nch = &mp->mnt_ncmountpt; 793 return(cache_fullpath(p, nch, rb, fb)); 794 } 795 796 /* 797 * Sync each mounted filesystem. 798 */ 799 800 #ifdef DEBUG 801 static int syncprt = 0; 802 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 803 #endif /* DEBUG */ 804 805 static int sync_callback(struct mount *mp, void *data); 806 807 /* ARGSUSED */ 808 int 809 sys_sync(struct sync_args *uap) 810 { 811 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 812 #ifdef DEBUG 813 /* 814 * print out buffer pool stat information on each sync() call. 815 */ 816 if (syncprt) 817 vfs_bufstats(); 818 #endif /* DEBUG */ 819 return (0); 820 } 821 822 static 823 int 824 sync_callback(struct mount *mp, void *data __unused) 825 { 826 int asyncflag; 827 828 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 829 asyncflag = mp->mnt_flag & MNT_ASYNC; 830 mp->mnt_flag &= ~MNT_ASYNC; 831 vfs_msync(mp, MNT_NOWAIT); 832 VFS_SYNC(mp, MNT_NOWAIT); 833 mp->mnt_flag |= asyncflag; 834 } 835 return(0); 836 } 837 838 /* XXX PRISON: could be per prison flag */ 839 static int prison_quotas; 840 #if 0 841 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 842 #endif 843 844 /* 845 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 846 * 847 * Change filesystem quotas. 848 */ 849 /* ARGSUSED */ 850 int 851 sys_quotactl(struct quotactl_args *uap) 852 { 853 struct nlookupdata nd; 854 struct thread *td; 855 struct proc *p; 856 struct mount *mp; 857 int error; 858 859 td = curthread; 860 p = td->td_proc; 861 if (p->p_ucred->cr_prison && !prison_quotas) 862 return (EPERM); 863 864 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 865 if (error == 0) 866 error = nlookup(&nd); 867 if (error == 0) { 868 mp = nd.nl_nch.mount; 869 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 870 uap->arg, nd.nl_cred); 871 } 872 nlookup_done(&nd); 873 return (error); 874 } 875 876 /* 877 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 878 * void *buf, int buflen) 879 * 880 * This function operates on a mount point and executes the specified 881 * operation using the specified control data, and possibly returns data. 882 * 883 * The actual number of bytes stored in the result buffer is returned, 0 884 * if none, otherwise an error is returned. 885 */ 886 /* ARGSUSED */ 887 int 888 sys_mountctl(struct mountctl_args *uap) 889 { 890 struct thread *td = curthread; 891 struct proc *p = td->td_proc; 892 struct file *fp; 893 void *ctl = NULL; 894 void *buf = NULL; 895 char *path = NULL; 896 int error; 897 898 /* 899 * Sanity and permissions checks. We must be root. 900 */ 901 KKASSERT(p); 902 if (p->p_ucred->cr_prison != NULL) 903 return (EPERM); 904 if ((error = suser(td)) != 0) 905 return (error); 906 907 /* 908 * Argument length checks 909 */ 910 if (uap->ctllen < 0 || uap->ctllen > 1024) 911 return (EINVAL); 912 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 913 return (EINVAL); 914 if (uap->path == NULL) 915 return (EINVAL); 916 917 /* 918 * Allocate the necessary buffers and copyin data 919 */ 920 path = objcache_get(namei_oc, M_WAITOK); 921 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 922 if (error) 923 goto done; 924 925 if (uap->ctllen) { 926 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 927 error = copyin(uap->ctl, ctl, uap->ctllen); 928 if (error) 929 goto done; 930 } 931 if (uap->buflen) 932 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 933 934 /* 935 * Validate the descriptor 936 */ 937 if (uap->fd >= 0) { 938 fp = holdfp(p->p_fd, uap->fd, -1); 939 if (fp == NULL) { 940 error = EBADF; 941 goto done; 942 } 943 } else { 944 fp = NULL; 945 } 946 947 /* 948 * Execute the internal kernel function and clean up. 949 */ 950 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 951 if (fp) 952 fdrop(fp); 953 if (error == 0 && uap->sysmsg_result > 0) 954 error = copyout(buf, uap->buf, uap->sysmsg_result); 955 done: 956 if (path) 957 objcache_put(namei_oc, path); 958 if (ctl) 959 kfree(ctl, M_TEMP); 960 if (buf) 961 kfree(buf, M_TEMP); 962 return (error); 963 } 964 965 /* 966 * Execute a mount control operation by resolving the path to a mount point 967 * and calling vop_mountctl(). 968 */ 969 int 970 kern_mountctl(const char *path, int op, struct file *fp, 971 const void *ctl, int ctllen, 972 void *buf, int buflen, int *res) 973 { 974 struct vnode *vp; 975 struct mount *mp; 976 struct nlookupdata nd; 977 int error; 978 979 *res = 0; 980 vp = NULL; 981 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 982 if (error == 0) 983 error = nlookup(&nd); 984 if (error == 0) 985 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 986 nlookup_done(&nd); 987 if (error) 988 return (error); 989 990 mp = vp->v_mount; 991 992 /* 993 * Must be the root of the filesystem 994 */ 995 if ((vp->v_flag & VROOT) == 0) { 996 vput(vp); 997 return (EINVAL); 998 } 999 error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen, 1000 buf, buflen, res); 1001 vput(vp); 1002 return (error); 1003 } 1004 1005 int 1006 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1007 { 1008 struct thread *td = curthread; 1009 struct proc *p = td->td_proc; 1010 struct mount *mp; 1011 struct statfs *sp; 1012 char *fullpath, *freepath; 1013 int error; 1014 1015 if ((error = nlookup(nd)) != 0) 1016 return (error); 1017 mp = nd->nl_nch.mount; 1018 sp = &mp->mnt_stat; 1019 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1020 return (error); 1021 1022 error = mount_path(p, mp, &fullpath, &freepath); 1023 if (error) 1024 return(error); 1025 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1026 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1027 kfree(freepath, M_TEMP); 1028 1029 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1030 bcopy(sp, buf, sizeof(*buf)); 1031 /* Only root should have access to the fsid's. */ 1032 if (suser(td)) 1033 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1034 return (0); 1035 } 1036 1037 /* 1038 * statfs_args(char *path, struct statfs *buf) 1039 * 1040 * Get filesystem statistics. 1041 */ 1042 int 1043 sys_statfs(struct statfs_args *uap) 1044 { 1045 struct nlookupdata nd; 1046 struct statfs buf; 1047 int error; 1048 1049 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1050 if (error == 0) 1051 error = kern_statfs(&nd, &buf); 1052 nlookup_done(&nd); 1053 if (error == 0) 1054 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1055 return (error); 1056 } 1057 1058 int 1059 kern_fstatfs(int fd, struct statfs *buf) 1060 { 1061 struct thread *td = curthread; 1062 struct proc *p = td->td_proc; 1063 struct file *fp; 1064 struct mount *mp; 1065 struct statfs *sp; 1066 char *fullpath, *freepath; 1067 int error; 1068 1069 KKASSERT(p); 1070 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1071 return (error); 1072 mp = ((struct vnode *)fp->f_data)->v_mount; 1073 if (mp == NULL) { 1074 error = EBADF; 1075 goto done; 1076 } 1077 if (fp->f_cred == NULL) { 1078 error = EINVAL; 1079 goto done; 1080 } 1081 sp = &mp->mnt_stat; 1082 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1083 goto done; 1084 1085 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1086 goto done; 1087 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1088 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1089 kfree(freepath, M_TEMP); 1090 1091 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1092 bcopy(sp, buf, sizeof(*buf)); 1093 1094 /* Only root should have access to the fsid's. */ 1095 if (suser(td)) 1096 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1097 error = 0; 1098 done: 1099 fdrop(fp); 1100 return (error); 1101 } 1102 1103 /* 1104 * fstatfs_args(int fd, struct statfs *buf) 1105 * 1106 * Get filesystem statistics. 1107 */ 1108 int 1109 sys_fstatfs(struct fstatfs_args *uap) 1110 { 1111 struct statfs buf; 1112 int error; 1113 1114 error = kern_fstatfs(uap->fd, &buf); 1115 1116 if (error == 0) 1117 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1118 return (error); 1119 } 1120 1121 /* 1122 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1123 * 1124 * Get statistics on all filesystems. 1125 */ 1126 1127 struct getfsstat_info { 1128 struct statfs *sfsp; 1129 long count; 1130 long maxcount; 1131 int error; 1132 int flags; 1133 struct proc *p; 1134 }; 1135 1136 static int getfsstat_callback(struct mount *, void *); 1137 1138 /* ARGSUSED */ 1139 int 1140 sys_getfsstat(struct getfsstat_args *uap) 1141 { 1142 struct thread *td = curthread; 1143 struct proc *p = td->td_proc; 1144 struct getfsstat_info info; 1145 1146 bzero(&info, sizeof(info)); 1147 1148 info.maxcount = uap->bufsize / sizeof(struct statfs); 1149 info.sfsp = uap->buf; 1150 info.count = 0; 1151 info.flags = uap->flags; 1152 info.p = p; 1153 1154 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1155 if (info.sfsp && info.count > info.maxcount) 1156 uap->sysmsg_result = info.maxcount; 1157 else 1158 uap->sysmsg_result = info.count; 1159 return (info.error); 1160 } 1161 1162 static int 1163 getfsstat_callback(struct mount *mp, void *data) 1164 { 1165 struct getfsstat_info *info = data; 1166 struct statfs *sp; 1167 char *freepath; 1168 char *fullpath; 1169 int error; 1170 1171 if (info->sfsp && info->count < info->maxcount) { 1172 if (info->p && !chroot_visible_mnt(mp, info->p)) 1173 return(0); 1174 sp = &mp->mnt_stat; 1175 1176 /* 1177 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1178 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1179 * overrides MNT_WAIT. 1180 */ 1181 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1182 (info->flags & MNT_WAIT)) && 1183 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) { 1184 return(0); 1185 } 1186 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1187 1188 error = mount_path(info->p, mp, &fullpath, &freepath); 1189 if (error) { 1190 info->error = error; 1191 return(-1); 1192 } 1193 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1194 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1195 kfree(freepath, M_TEMP); 1196 1197 error = copyout(sp, info->sfsp, sizeof(*sp)); 1198 if (error) { 1199 info->error = error; 1200 return (-1); 1201 } 1202 ++info->sfsp; 1203 } 1204 info->count++; 1205 return(0); 1206 } 1207 1208 /* 1209 * fchdir_args(int fd) 1210 * 1211 * Change current working directory to a given file descriptor. 1212 */ 1213 /* ARGSUSED */ 1214 int 1215 sys_fchdir(struct fchdir_args *uap) 1216 { 1217 struct thread *td = curthread; 1218 struct proc *p = td->td_proc; 1219 struct filedesc *fdp = p->p_fd; 1220 struct vnode *vp, *ovp; 1221 struct mount *mp; 1222 struct file *fp; 1223 struct nchandle nch, onch, tnch; 1224 int error; 1225 1226 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1227 return (error); 1228 vp = (struct vnode *)fp->f_data; 1229 vref(vp); 1230 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1231 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) 1232 error = ENOTDIR; 1233 else 1234 error = VOP_ACCESS(vp, VEXEC, p->p_ucred); 1235 if (error) { 1236 vput(vp); 1237 fdrop(fp); 1238 return (error); 1239 } 1240 cache_copy(&fp->f_nchandle, &nch); 1241 1242 /* 1243 * If the ncp has become a mount point, traverse through 1244 * the mount point. 1245 */ 1246 1247 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1248 (mp = cache_findmount(&nch)) != NULL 1249 ) { 1250 error = nlookup_mp(mp, &tnch); 1251 if (error == 0) { 1252 cache_unlock(&tnch); /* leave ref intact */ 1253 vput(vp); 1254 vp = tnch.ncp->nc_vp; 1255 error = vget(vp, LK_SHARED); 1256 KKASSERT(error == 0); 1257 cache_drop(&nch); 1258 nch = tnch; 1259 } 1260 } 1261 if (error == 0) { 1262 ovp = fdp->fd_cdir; 1263 onch = fdp->fd_ncdir; 1264 vn_unlock(vp); /* leave ref intact */ 1265 fdp->fd_cdir = vp; 1266 fdp->fd_ncdir = nch; 1267 cache_drop(&onch); 1268 vrele(ovp); 1269 } else { 1270 cache_drop(&nch); 1271 vput(vp); 1272 } 1273 fdrop(fp); 1274 return (error); 1275 } 1276 1277 int 1278 kern_chdir(struct nlookupdata *nd) 1279 { 1280 struct thread *td = curthread; 1281 struct proc *p = td->td_proc; 1282 struct filedesc *fdp = p->p_fd; 1283 struct vnode *vp, *ovp; 1284 struct nchandle onch; 1285 int error; 1286 1287 if ((error = nlookup(nd)) != 0) 1288 return (error); 1289 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1290 return (ENOENT); 1291 if ((error = vget(vp, LK_SHARED)) != 0) 1292 return (error); 1293 1294 error = checkvp_chdir(vp, td); 1295 vn_unlock(vp); 1296 if (error == 0) { 1297 ovp = fdp->fd_cdir; 1298 onch = fdp->fd_ncdir; 1299 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1300 fdp->fd_ncdir = nd->nl_nch; 1301 fdp->fd_cdir = vp; 1302 cache_drop(&onch); 1303 vrele(ovp); 1304 cache_zero(&nd->nl_nch); 1305 } else { 1306 vrele(vp); 1307 } 1308 return (error); 1309 } 1310 1311 /* 1312 * chdir_args(char *path) 1313 * 1314 * Change current working directory (``.''). 1315 */ 1316 int 1317 sys_chdir(struct chdir_args *uap) 1318 { 1319 struct nlookupdata nd; 1320 int error; 1321 1322 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1323 if (error == 0) 1324 error = kern_chdir(&nd); 1325 nlookup_done(&nd); 1326 return (error); 1327 } 1328 1329 /* 1330 * Helper function for raised chroot(2) security function: Refuse if 1331 * any filedescriptors are open directories. 1332 */ 1333 static int 1334 chroot_refuse_vdir_fds(struct filedesc *fdp) 1335 { 1336 struct vnode *vp; 1337 struct file *fp; 1338 int error; 1339 int fd; 1340 1341 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1342 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1343 continue; 1344 vp = (struct vnode *)fp->f_data; 1345 if (vp->v_type != VDIR) { 1346 fdrop(fp); 1347 continue; 1348 } 1349 fdrop(fp); 1350 return(EPERM); 1351 } 1352 return (0); 1353 } 1354 1355 /* 1356 * This sysctl determines if we will allow a process to chroot(2) if it 1357 * has a directory open: 1358 * 0: disallowed for all processes. 1359 * 1: allowed for processes that were not already chroot(2)'ed. 1360 * 2: allowed for all processes. 1361 */ 1362 1363 static int chroot_allow_open_directories = 1; 1364 1365 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1366 &chroot_allow_open_directories, 0, ""); 1367 1368 /* 1369 * chroot to the specified namecache entry. We obtain the vp from the 1370 * namecache data. The passed ncp must be locked and referenced and will 1371 * remain locked and referenced on return. 1372 */ 1373 int 1374 kern_chroot(struct nchandle *nch) 1375 { 1376 struct thread *td = curthread; 1377 struct proc *p = td->td_proc; 1378 struct filedesc *fdp = p->p_fd; 1379 struct vnode *vp; 1380 int error; 1381 1382 /* 1383 * Only root can chroot 1384 */ 1385 if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0) 1386 return (error); 1387 1388 /* 1389 * Disallow open directory descriptors (fchdir() breakouts). 1390 */ 1391 if (chroot_allow_open_directories == 0 || 1392 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1393 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1394 return (error); 1395 } 1396 if ((vp = nch->ncp->nc_vp) == NULL) 1397 return (ENOENT); 1398 1399 if ((error = vget(vp, LK_SHARED)) != 0) 1400 return (error); 1401 1402 /* 1403 * Check the validity of vp as a directory to change to and 1404 * associate it with rdir/jdir. 1405 */ 1406 error = checkvp_chdir(vp, td); 1407 vn_unlock(vp); /* leave reference intact */ 1408 if (error == 0) { 1409 vrele(fdp->fd_rdir); 1410 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1411 cache_drop(&fdp->fd_nrdir); 1412 cache_copy(nch, &fdp->fd_nrdir); 1413 if (fdp->fd_jdir == NULL) { 1414 fdp->fd_jdir = vp; 1415 vref(fdp->fd_jdir); 1416 cache_copy(nch, &fdp->fd_njdir); 1417 } 1418 } else { 1419 vrele(vp); 1420 } 1421 return (error); 1422 } 1423 1424 /* 1425 * chroot_args(char *path) 1426 * 1427 * Change notion of root (``/'') directory. 1428 */ 1429 /* ARGSUSED */ 1430 int 1431 sys_chroot(struct chroot_args *uap) 1432 { 1433 struct thread *td = curthread; 1434 struct nlookupdata nd; 1435 int error; 1436 1437 KKASSERT(td->td_proc); 1438 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1439 if (error) { 1440 nlookup_done(&nd); 1441 return(error); 1442 } 1443 error = nlookup(&nd); 1444 if (error == 0) 1445 error = kern_chroot(&nd.nl_nch); 1446 nlookup_done(&nd); 1447 return(error); 1448 } 1449 1450 /* 1451 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1452 * determine whether it is legal to chdir to the vnode. The vnode's state 1453 * is not changed by this call. 1454 */ 1455 int 1456 checkvp_chdir(struct vnode *vp, struct thread *td) 1457 { 1458 int error; 1459 1460 if (vp->v_type != VDIR) 1461 error = ENOTDIR; 1462 else 1463 error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred); 1464 return (error); 1465 } 1466 1467 int 1468 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1469 { 1470 struct thread *td = curthread; 1471 struct proc *p = td->td_proc; 1472 struct lwp *lp = td->td_lwp; 1473 struct filedesc *fdp = p->p_fd; 1474 int cmode, flags; 1475 struct file *nfp; 1476 struct file *fp; 1477 struct vnode *vp; 1478 int type, indx, error; 1479 struct flock lf; 1480 1481 if ((oflags & O_ACCMODE) == O_ACCMODE) 1482 return (EINVAL); 1483 flags = FFLAGS(oflags); 1484 error = falloc(p, &nfp, NULL); 1485 if (error) 1486 return (error); 1487 fp = nfp; 1488 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1489 1490 /* 1491 * XXX p_dupfd is a real mess. It allows a device to return a 1492 * file descriptor to be duplicated rather then doing the open 1493 * itself. 1494 */ 1495 lp->lwp_dupfd = -1; 1496 1497 /* 1498 * Call vn_open() to do the lookup and assign the vnode to the 1499 * file pointer. vn_open() does not change the ref count on fp 1500 * and the vnode, on success, will be inherited by the file pointer 1501 * and unlocked. 1502 */ 1503 nd->nl_flags |= NLC_LOCKVP; 1504 error = vn_open(nd, fp, flags, cmode); 1505 nlookup_done(nd); 1506 if (error) { 1507 /* 1508 * handle special fdopen() case. bleh. dupfdopen() is 1509 * responsible for dropping the old contents of ofiles[indx] 1510 * if it succeeds. 1511 * 1512 * Note that fsetfd() will add a ref to fp which represents 1513 * the fd_files[] assignment. We must still drop our 1514 * reference. 1515 */ 1516 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1517 if (fdalloc(p, 0, &indx) == 0) { 1518 error = dupfdopen(p, indx, lp->lwp_dupfd, flags, error); 1519 if (error == 0) { 1520 *res = indx; 1521 fdrop(fp); /* our ref */ 1522 return (0); 1523 } 1524 fsetfd(p, NULL, indx); 1525 } 1526 } 1527 fdrop(fp); /* our ref */ 1528 if (error == ERESTART) 1529 error = EINTR; 1530 return (error); 1531 } 1532 1533 /* 1534 * ref the vnode for ourselves so it can't be ripped out from under 1535 * is. XXX need an ND flag to request that the vnode be returned 1536 * anyway. 1537 * 1538 * Reserve a file descriptor but do not assign it until the open 1539 * succeeds. 1540 */ 1541 vp = (struct vnode *)fp->f_data; 1542 vref(vp); 1543 if ((error = fdalloc(p, 0, &indx)) != 0) { 1544 fdrop(fp); 1545 vrele(vp); 1546 return (error); 1547 } 1548 1549 /* 1550 * If no error occurs the vp will have been assigned to the file 1551 * pointer. 1552 */ 1553 lp->lwp_dupfd = 0; 1554 1555 if (flags & (O_EXLOCK | O_SHLOCK)) { 1556 lf.l_whence = SEEK_SET; 1557 lf.l_start = 0; 1558 lf.l_len = 0; 1559 if (flags & O_EXLOCK) 1560 lf.l_type = F_WRLCK; 1561 else 1562 lf.l_type = F_RDLCK; 1563 if (flags & FNONBLOCK) 1564 type = 0; 1565 else 1566 type = F_WAIT; 1567 1568 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1569 /* 1570 * lock request failed. Clean up the reserved 1571 * descriptor. 1572 */ 1573 vrele(vp); 1574 fsetfd(p, NULL, indx); 1575 fdrop(fp); 1576 return (error); 1577 } 1578 fp->f_flag |= FHASLOCK; 1579 } 1580 #if 0 1581 /* 1582 * Assert that all regular file vnodes were created with a object. 1583 */ 1584 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1585 ("open: regular file has no backing object after vn_open")); 1586 #endif 1587 1588 vrele(vp); 1589 1590 /* 1591 * release our private reference, leaving the one associated with the 1592 * descriptor table intact. 1593 */ 1594 fsetfd(p, fp, indx); 1595 fdrop(fp); 1596 *res = indx; 1597 return (0); 1598 } 1599 1600 /* 1601 * open_args(char *path, int flags, int mode) 1602 * 1603 * Check permissions, allocate an open file structure, 1604 * and call the device open routine if any. 1605 */ 1606 int 1607 sys_open(struct open_args *uap) 1608 { 1609 struct nlookupdata nd; 1610 int error; 1611 1612 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1613 if (error == 0) { 1614 error = kern_open(&nd, uap->flags, 1615 uap->mode, &uap->sysmsg_result); 1616 } 1617 nlookup_done(&nd); 1618 return (error); 1619 } 1620 1621 int 1622 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 1623 { 1624 struct thread *td = curthread; 1625 struct proc *p = td->td_proc; 1626 struct vnode *vp; 1627 struct vnode *dvp; 1628 struct vattr vattr; 1629 int error; 1630 int whiteout = 0; 1631 1632 KKASSERT(p); 1633 1634 switch (mode & S_IFMT) { 1635 case S_IFCHR: 1636 case S_IFBLK: 1637 error = suser(td); 1638 break; 1639 default: 1640 error = suser_cred(p->p_ucred, PRISON_ROOT); 1641 break; 1642 } 1643 if (error) 1644 return (error); 1645 1646 bwillwrite(); 1647 nd->nl_flags |= NLC_CREATE; 1648 if ((error = nlookup(nd)) != 0) 1649 return (error); 1650 if (nd->nl_nch.ncp->nc_vp) 1651 return (EEXIST); 1652 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1653 return (error); 1654 if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) 1655 return (EPERM); 1656 /* vhold(dvp); - DVP can't go away */ 1657 1658 VATTR_NULL(&vattr); 1659 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1660 vattr.va_rmajor = rmajor; 1661 vattr.va_rminor = rminor; 1662 whiteout = 0; 1663 1664 switch (mode & S_IFMT) { 1665 case S_IFMT: /* used by badsect to flag bad sectors */ 1666 vattr.va_type = VBAD; 1667 break; 1668 case S_IFCHR: 1669 vattr.va_type = VCHR; 1670 break; 1671 case S_IFBLK: 1672 vattr.va_type = VBLK; 1673 break; 1674 case S_IFWHT: 1675 whiteout = 1; 1676 break; 1677 default: 1678 error = EINVAL; 1679 break; 1680 } 1681 if (error == 0) { 1682 if (whiteout) { 1683 error = VOP_NWHITEOUT(&nd->nl_nch, dvp, nd->nl_cred, NAMEI_CREATE); 1684 } else { 1685 vp = NULL; 1686 error = VOP_NMKNOD(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr); 1687 if (error == 0) 1688 vput(vp); 1689 } 1690 } 1691 /* vdrop(dvp); */ 1692 return (error); 1693 } 1694 1695 /* 1696 * mknod_args(char *path, int mode, int dev) 1697 * 1698 * Create a special file. 1699 */ 1700 int 1701 sys_mknod(struct mknod_args *uap) 1702 { 1703 struct nlookupdata nd; 1704 int error; 1705 1706 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1707 if (error == 0) { 1708 error = kern_mknod(&nd, uap->mode, 1709 umajor(uap->dev), uminor(uap->dev)); 1710 } 1711 nlookup_done(&nd); 1712 return (error); 1713 } 1714 1715 int 1716 kern_mkfifo(struct nlookupdata *nd, int mode) 1717 { 1718 struct thread *td = curthread; 1719 struct proc *p = td->td_proc; 1720 struct vattr vattr; 1721 struct vnode *vp; 1722 struct vnode *dvp; 1723 int error; 1724 1725 bwillwrite(); 1726 1727 nd->nl_flags |= NLC_CREATE; 1728 if ((error = nlookup(nd)) != 0) 1729 return (error); 1730 if (nd->nl_nch.ncp->nc_vp) 1731 return (EEXIST); 1732 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1733 return (error); 1734 if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) 1735 return (EPERM); 1736 /* vhold(dvp); - DVP can't go away */ 1737 1738 VATTR_NULL(&vattr); 1739 vattr.va_type = VFIFO; 1740 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1741 vp = NULL; 1742 error = VOP_NMKNOD(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr); 1743 /* vdrop(dvp); */ 1744 if (error == 0) 1745 vput(vp); 1746 return (error); 1747 } 1748 1749 /* 1750 * mkfifo_args(char *path, int mode) 1751 * 1752 * Create a named pipe. 1753 */ 1754 int 1755 sys_mkfifo(struct mkfifo_args *uap) 1756 { 1757 struct nlookupdata nd; 1758 int error; 1759 1760 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1761 if (error == 0) 1762 error = kern_mkfifo(&nd, uap->mode); 1763 nlookup_done(&nd); 1764 return (error); 1765 } 1766 1767 static int hardlink_check_uid = 0; 1768 SYSCTL_INT(_kern, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1769 &hardlink_check_uid, 0, 1770 "Unprivileged processes cannot create hard links to files owned by other " 1771 "users"); 1772 static int hardlink_check_gid = 0; 1773 SYSCTL_INT(_kern, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1774 &hardlink_check_gid, 0, 1775 "Unprivileged processes cannot create hard links to files owned by other " 1776 "groups"); 1777 1778 static int 1779 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 1780 { 1781 struct vattr va; 1782 int error; 1783 1784 /* 1785 * Shortcut if disabled 1786 */ 1787 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 1788 return (0); 1789 1790 /* 1791 * root cred can always hardlink 1792 */ 1793 if (suser_cred(cred, PRISON_ROOT) == 0) 1794 return (0); 1795 1796 /* 1797 * Otherwise only if the originating file is owned by the 1798 * same user or group. Note that any group is allowed if 1799 * the file is owned by the caller. 1800 */ 1801 error = VOP_GETATTR(vp, &va); 1802 if (error != 0) 1803 return (error); 1804 1805 if (hardlink_check_uid) { 1806 if (cred->cr_uid != va.va_uid) 1807 return (EPERM); 1808 } 1809 1810 if (hardlink_check_gid) { 1811 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 1812 return (EPERM); 1813 } 1814 1815 return (0); 1816 } 1817 1818 int 1819 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 1820 { 1821 struct thread *td = curthread; 1822 struct vnode *vp; 1823 struct vnode *dvp; 1824 int error; 1825 1826 /* 1827 * Lookup the source and obtained a locked vnode. 1828 * 1829 * XXX relookup on vget failure / race ? 1830 */ 1831 bwillwrite(); 1832 if ((error = nlookup(nd)) != 0) 1833 return (error); 1834 vp = nd->nl_nch.ncp->nc_vp; 1835 KKASSERT(vp != NULL); 1836 if (vp->v_type == VDIR) 1837 return (EPERM); /* POSIX */ 1838 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1839 return (error); 1840 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 1841 return (error); 1842 1843 /* 1844 * Unlock the source so we can lookup the target without deadlocking 1845 * (XXX vp is locked already, possible other deadlock?). The target 1846 * must not exist. 1847 */ 1848 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 1849 nd->nl_flags &= ~NLC_NCPISLOCKED; 1850 cache_unlock(&nd->nl_nch); 1851 1852 linknd->nl_flags |= NLC_CREATE; 1853 if ((error = nlookup(linknd)) != 0) { 1854 vput(vp); 1855 return (error); 1856 } 1857 if (linknd->nl_nch.ncp->nc_vp) { 1858 vput(vp); 1859 return (EEXIST); 1860 } 1861 if ((dvp = linknd->nl_nch.ncp->nc_parent->nc_vp) == NULL) { 1862 vput(vp); 1863 return (EPERM); 1864 } 1865 /* vhold(dvp); - dvp can't go away */ 1866 1867 /* 1868 * Finally run the new API VOP. 1869 */ 1870 error = can_hardlink(vp, td, td->td_proc->p_ucred); 1871 if (error == 0) 1872 error = VOP_NLINK(&linknd->nl_nch, dvp, vp, linknd->nl_cred); 1873 /* vdrop(dvp); */ 1874 vput(vp); 1875 return (error); 1876 } 1877 1878 /* 1879 * link_args(char *path, char *link) 1880 * 1881 * Make a hard file link. 1882 */ 1883 int 1884 sys_link(struct link_args *uap) 1885 { 1886 struct nlookupdata nd, linknd; 1887 int error; 1888 1889 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1890 if (error == 0) { 1891 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 1892 if (error == 0) 1893 error = kern_link(&nd, &linknd); 1894 nlookup_done(&linknd); 1895 } 1896 nlookup_done(&nd); 1897 return (error); 1898 } 1899 1900 int 1901 kern_symlink(struct nlookupdata *nd, char *path, int mode) 1902 { 1903 struct vattr vattr; 1904 struct vnode *vp; 1905 struct vnode *dvp; 1906 int error; 1907 1908 bwillwrite(); 1909 nd->nl_flags |= NLC_CREATE; 1910 if ((error = nlookup(nd)) != 0) 1911 return (error); 1912 if (nd->nl_nch.ncp->nc_vp) 1913 return (EEXIST); 1914 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1915 return (error); 1916 if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) 1917 return (EPERM); 1918 /* vhold(dvp); - dvp can't go away */ 1919 VATTR_NULL(&vattr); 1920 vattr.va_mode = mode; 1921 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 1922 /* vdrop(dvp); */ 1923 if (error == 0) 1924 vput(vp); 1925 return (error); 1926 } 1927 1928 /* 1929 * symlink(char *path, char *link) 1930 * 1931 * Make a symbolic link. 1932 */ 1933 int 1934 sys_symlink(struct symlink_args *uap) 1935 { 1936 struct thread *td = curthread; 1937 struct nlookupdata nd; 1938 char *path; 1939 int error; 1940 int mode; 1941 1942 path = objcache_get(namei_oc, M_WAITOK); 1943 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1944 if (error == 0) { 1945 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 1946 if (error == 0) { 1947 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 1948 error = kern_symlink(&nd, path, mode); 1949 } 1950 nlookup_done(&nd); 1951 } 1952 objcache_put(namei_oc, path); 1953 return (error); 1954 } 1955 1956 /* 1957 * undelete_args(char *path) 1958 * 1959 * Delete a whiteout from the filesystem. 1960 */ 1961 /* ARGSUSED */ 1962 int 1963 sys_undelete(struct undelete_args *uap) 1964 { 1965 struct nlookupdata nd; 1966 struct vnode *dvp; 1967 int error; 1968 1969 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1970 bwillwrite(); 1971 nd.nl_flags |= NLC_DELETE; 1972 if (error == 0) 1973 error = nlookup(&nd); 1974 if (error == 0) 1975 error = ncp_writechk(&nd.nl_nch); 1976 dvp = NULL; 1977 if (error == 0) { 1978 if ((dvp = nd.nl_nch.ncp->nc_parent->nc_vp) == NULL) 1979 error = EPERM; 1980 } 1981 if (error == 0) { 1982 /* vhold(dvp); - dvp can't go away */ 1983 error = VOP_NWHITEOUT(&nd.nl_nch, dvp, nd.nl_cred, NAMEI_DELETE); 1984 /* vdrop(dvp); */ 1985 } 1986 nlookup_done(&nd); 1987 return (error); 1988 } 1989 1990 int 1991 kern_unlink(struct nlookupdata *nd) 1992 { 1993 struct vnode *dvp; 1994 int error; 1995 1996 bwillwrite(); 1997 nd->nl_flags |= NLC_DELETE; 1998 if ((error = nlookup(nd)) != 0) 1999 return (error); 2000 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2001 return (error); 2002 if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) 2003 return (EPERM); 2004 /* vhold(dvp); - dvp can't go away */ 2005 error = VOP_NREMOVE(&nd->nl_nch, dvp, nd->nl_cred); 2006 /* vdrop(dvp); */ 2007 return (error); 2008 } 2009 2010 /* 2011 * unlink_args(char *path) 2012 * 2013 * Delete a name from the filesystem. 2014 */ 2015 int 2016 sys_unlink(struct unlink_args *uap) 2017 { 2018 struct nlookupdata nd; 2019 int error; 2020 2021 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2022 if (error == 0) 2023 error = kern_unlink(&nd); 2024 nlookup_done(&nd); 2025 return (error); 2026 } 2027 2028 int 2029 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2030 { 2031 struct thread *td = curthread; 2032 struct proc *p = td->td_proc; 2033 struct file *fp; 2034 struct vattr vattr; 2035 int error; 2036 2037 fp = holdfp(p->p_fd, fd, -1); 2038 if (fp == NULL) 2039 return (EBADF); 2040 if (fp->f_type != DTYPE_VNODE) { 2041 error = ESPIPE; 2042 goto done; 2043 } 2044 2045 switch (whence) { 2046 case L_INCR: 2047 fp->f_offset += offset; 2048 error = 0; 2049 break; 2050 case L_XTND: 2051 error = VOP_GETATTR((struct vnode *)fp->f_data, &vattr); 2052 if (error == 0) 2053 fp->f_offset = offset + vattr.va_size; 2054 break; 2055 case L_SET: 2056 fp->f_offset = offset; 2057 error = 0; 2058 break; 2059 default: 2060 error = EINVAL; 2061 break; 2062 } 2063 *res = fp->f_offset; 2064 done: 2065 fdrop(fp); 2066 return (error); 2067 } 2068 2069 /* 2070 * lseek_args(int fd, int pad, off_t offset, int whence) 2071 * 2072 * Reposition read/write file offset. 2073 */ 2074 int 2075 sys_lseek(struct lseek_args *uap) 2076 { 2077 int error; 2078 2079 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2080 &uap->sysmsg_offset); 2081 2082 return (error); 2083 } 2084 2085 int 2086 kern_access(struct nlookupdata *nd, int aflags) 2087 { 2088 struct vnode *vp; 2089 int error, flags; 2090 2091 if ((error = nlookup(nd)) != 0) 2092 return (error); 2093 retry: 2094 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2095 if (error) 2096 return (error); 2097 2098 /* Flags == 0 means only check for existence. */ 2099 if (aflags) { 2100 flags = 0; 2101 if (aflags & R_OK) 2102 flags |= VREAD; 2103 if (aflags & W_OK) 2104 flags |= VWRITE; 2105 if (aflags & X_OK) 2106 flags |= VEXEC; 2107 if ((flags & VWRITE) == 0 || 2108 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2109 error = VOP_ACCESS(vp, flags, nd->nl_cred); 2110 2111 /* 2112 * If the file handle is stale we have to re-resolve the 2113 * entry. This is a hack at the moment. 2114 */ 2115 if (error == ESTALE) { 2116 vput(vp); 2117 cache_setunresolved(&nd->nl_nch); 2118 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2119 if (error == 0) { 2120 vp = NULL; 2121 goto retry; 2122 } 2123 return(error); 2124 } 2125 } 2126 vput(vp); 2127 return (error); 2128 } 2129 2130 /* 2131 * access_args(char *path, int flags) 2132 * 2133 * Check access permissions. 2134 */ 2135 int 2136 sys_access(struct access_args *uap) 2137 { 2138 struct nlookupdata nd; 2139 int error; 2140 2141 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2142 if (error == 0) 2143 error = kern_access(&nd, uap->flags); 2144 nlookup_done(&nd); 2145 return (error); 2146 } 2147 2148 int 2149 kern_stat(struct nlookupdata *nd, struct stat *st) 2150 { 2151 int error; 2152 struct vnode *vp; 2153 thread_t td; 2154 2155 if ((error = nlookup(nd)) != 0) 2156 return (error); 2157 again: 2158 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2159 return (ENOENT); 2160 2161 td = curthread; 2162 if ((error = vget(vp, LK_SHARED)) != 0) 2163 return (error); 2164 error = vn_stat(vp, st, nd->nl_cred); 2165 2166 /* 2167 * If the file handle is stale we have to re-resolve the entry. This 2168 * is a hack at the moment. 2169 */ 2170 if (error == ESTALE) { 2171 vput(vp); 2172 cache_setunresolved(&nd->nl_nch); 2173 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2174 if (error == 0) 2175 goto again; 2176 } else { 2177 vput(vp); 2178 } 2179 return (error); 2180 } 2181 2182 /* 2183 * stat_args(char *path, struct stat *ub) 2184 * 2185 * Get file status; this version follows links. 2186 */ 2187 int 2188 sys_stat(struct stat_args *uap) 2189 { 2190 struct nlookupdata nd; 2191 struct stat st; 2192 int error; 2193 2194 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2195 if (error == 0) { 2196 error = kern_stat(&nd, &st); 2197 if (error == 0) 2198 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2199 } 2200 nlookup_done(&nd); 2201 return (error); 2202 } 2203 2204 /* 2205 * lstat_args(char *path, struct stat *ub) 2206 * 2207 * Get file status; this version does not follow links. 2208 */ 2209 int 2210 sys_lstat(struct lstat_args *uap) 2211 { 2212 struct nlookupdata nd; 2213 struct stat st; 2214 int error; 2215 2216 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2217 if (error == 0) { 2218 error = kern_stat(&nd, &st); 2219 if (error == 0) 2220 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2221 } 2222 nlookup_done(&nd); 2223 return (error); 2224 } 2225 2226 /* 2227 * pathconf_Args(char *path, int name) 2228 * 2229 * Get configurable pathname variables. 2230 */ 2231 /* ARGSUSED */ 2232 int 2233 sys_pathconf(struct pathconf_args *uap) 2234 { 2235 struct nlookupdata nd; 2236 struct vnode *vp; 2237 int error; 2238 2239 vp = NULL; 2240 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2241 if (error == 0) 2242 error = nlookup(&nd); 2243 if (error == 0) 2244 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2245 nlookup_done(&nd); 2246 if (error == 0) { 2247 error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds); 2248 vput(vp); 2249 } 2250 return (error); 2251 } 2252 2253 /* 2254 * XXX: daver 2255 * kern_readlink isn't properly split yet. There is a copyin burried 2256 * in VOP_READLINK(). 2257 */ 2258 int 2259 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2260 { 2261 struct thread *td = curthread; 2262 struct proc *p = td->td_proc; 2263 struct vnode *vp; 2264 struct iovec aiov; 2265 struct uio auio; 2266 int error; 2267 2268 if ((error = nlookup(nd)) != 0) 2269 return (error); 2270 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2271 if (error) 2272 return (error); 2273 if (vp->v_type != VLNK) { 2274 error = EINVAL; 2275 } else { 2276 aiov.iov_base = buf; 2277 aiov.iov_len = count; 2278 auio.uio_iov = &aiov; 2279 auio.uio_iovcnt = 1; 2280 auio.uio_offset = 0; 2281 auio.uio_rw = UIO_READ; 2282 auio.uio_segflg = UIO_USERSPACE; 2283 auio.uio_td = td; 2284 auio.uio_resid = count; 2285 error = VOP_READLINK(vp, &auio, p->p_ucred); 2286 } 2287 vput(vp); 2288 *res = count - auio.uio_resid; 2289 return (error); 2290 } 2291 2292 /* 2293 * readlink_args(char *path, char *buf, int count) 2294 * 2295 * Return target name of a symbolic link. 2296 */ 2297 int 2298 sys_readlink(struct readlink_args *uap) 2299 { 2300 struct nlookupdata nd; 2301 int error; 2302 2303 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2304 if (error == 0) { 2305 error = kern_readlink(&nd, uap->buf, uap->count, 2306 &uap->sysmsg_result); 2307 } 2308 nlookup_done(&nd); 2309 return (error); 2310 } 2311 2312 static int 2313 setfflags(struct vnode *vp, int flags) 2314 { 2315 struct thread *td = curthread; 2316 struct proc *p = td->td_proc; 2317 int error; 2318 struct vattr vattr; 2319 2320 /* 2321 * Prevent non-root users from setting flags on devices. When 2322 * a device is reused, users can retain ownership of the device 2323 * if they are allowed to set flags and programs assume that 2324 * chown can't fail when done as root. 2325 */ 2326 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2327 ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)) 2328 return (error); 2329 2330 /* 2331 * note: vget is required for any operation that might mod the vnode 2332 * so VINACTIVE is properly cleared. 2333 */ 2334 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2335 VATTR_NULL(&vattr); 2336 vattr.va_flags = flags; 2337 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2338 vput(vp); 2339 } 2340 return (error); 2341 } 2342 2343 /* 2344 * chflags(char *path, int flags) 2345 * 2346 * Change flags of a file given a path name. 2347 */ 2348 /* ARGSUSED */ 2349 int 2350 sys_chflags(struct chflags_args *uap) 2351 { 2352 struct nlookupdata nd; 2353 struct vnode *vp; 2354 int error; 2355 2356 vp = NULL; 2357 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2358 /* XXX Add NLC flag indicating modifying operation? */ 2359 if (error == 0) 2360 error = nlookup(&nd); 2361 if (error == 0) 2362 error = ncp_writechk(&nd.nl_nch); 2363 if (error == 0) 2364 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2365 nlookup_done(&nd); 2366 if (error == 0) { 2367 error = setfflags(vp, uap->flags); 2368 vrele(vp); 2369 } 2370 return (error); 2371 } 2372 2373 /* 2374 * fchflags_args(int fd, int flags) 2375 * 2376 * Change flags of a file given a file descriptor. 2377 */ 2378 /* ARGSUSED */ 2379 int 2380 sys_fchflags(struct fchflags_args *uap) 2381 { 2382 struct thread *td = curthread; 2383 struct proc *p = td->td_proc; 2384 struct file *fp; 2385 int error; 2386 2387 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2388 return (error); 2389 if (fp->f_nchandle.ncp) 2390 error = ncp_writechk(&fp->f_nchandle); 2391 if (error == 0) 2392 error = setfflags((struct vnode *) fp->f_data, uap->flags); 2393 fdrop(fp); 2394 return (error); 2395 } 2396 2397 static int 2398 setfmode(struct vnode *vp, int mode) 2399 { 2400 struct thread *td = curthread; 2401 struct proc *p = td->td_proc; 2402 int error; 2403 struct vattr vattr; 2404 2405 /* 2406 * note: vget is required for any operation that might mod the vnode 2407 * so VINACTIVE is properly cleared. 2408 */ 2409 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2410 VATTR_NULL(&vattr); 2411 vattr.va_mode = mode & ALLPERMS; 2412 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2413 vput(vp); 2414 } 2415 return error; 2416 } 2417 2418 int 2419 kern_chmod(struct nlookupdata *nd, int mode) 2420 { 2421 struct vnode *vp; 2422 int error; 2423 2424 /* XXX Add NLC flag indicating modifying operation? */ 2425 if ((error = nlookup(nd)) != 0) 2426 return (error); 2427 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2428 return (error); 2429 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2430 error = setfmode(vp, mode); 2431 vrele(vp); 2432 return (error); 2433 } 2434 2435 /* 2436 * chmod_args(char *path, int mode) 2437 * 2438 * Change mode of a file given path name. 2439 */ 2440 /* ARGSUSED */ 2441 int 2442 sys_chmod(struct chmod_args *uap) 2443 { 2444 struct nlookupdata nd; 2445 int error; 2446 2447 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2448 if (error == 0) 2449 error = kern_chmod(&nd, uap->mode); 2450 nlookup_done(&nd); 2451 return (error); 2452 } 2453 2454 /* 2455 * lchmod_args(char *path, int mode) 2456 * 2457 * Change mode of a file given path name (don't follow links.) 2458 */ 2459 /* ARGSUSED */ 2460 int 2461 sys_lchmod(struct lchmod_args *uap) 2462 { 2463 struct nlookupdata nd; 2464 int error; 2465 2466 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2467 if (error == 0) 2468 error = kern_chmod(&nd, uap->mode); 2469 nlookup_done(&nd); 2470 return (error); 2471 } 2472 2473 /* 2474 * fchmod_args(int fd, int mode) 2475 * 2476 * Change mode of a file given a file descriptor. 2477 */ 2478 /* ARGSUSED */ 2479 int 2480 sys_fchmod(struct fchmod_args *uap) 2481 { 2482 struct thread *td = curthread; 2483 struct proc *p = td->td_proc; 2484 struct file *fp; 2485 int error; 2486 2487 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2488 return (error); 2489 if (fp->f_nchandle.ncp) 2490 error = ncp_writechk(&fp->f_nchandle); 2491 if (error == 0) 2492 error = setfmode((struct vnode *)fp->f_data, uap->mode); 2493 fdrop(fp); 2494 return (error); 2495 } 2496 2497 static int 2498 setfown(struct vnode *vp, uid_t uid, gid_t gid) 2499 { 2500 struct thread *td = curthread; 2501 struct proc *p = td->td_proc; 2502 int error; 2503 struct vattr vattr; 2504 2505 /* 2506 * note: vget is required for any operation that might mod the vnode 2507 * so VINACTIVE is properly cleared. 2508 */ 2509 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2510 VATTR_NULL(&vattr); 2511 vattr.va_uid = uid; 2512 vattr.va_gid = gid; 2513 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2514 vput(vp); 2515 } 2516 return error; 2517 } 2518 2519 int 2520 kern_chown(struct nlookupdata *nd, int uid, int gid) 2521 { 2522 struct vnode *vp; 2523 int error; 2524 2525 /* XXX Add NLC flag indicating modifying operation? */ 2526 if ((error = nlookup(nd)) != 0) 2527 return (error); 2528 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2529 return (error); 2530 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2531 error = setfown(vp, uid, gid); 2532 vrele(vp); 2533 return (error); 2534 } 2535 2536 /* 2537 * chown(char *path, int uid, int gid) 2538 * 2539 * Set ownership given a path name. 2540 */ 2541 int 2542 sys_chown(struct chown_args *uap) 2543 { 2544 struct nlookupdata nd; 2545 int error; 2546 2547 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2548 if (error == 0) 2549 error = kern_chown(&nd, uap->uid, uap->gid); 2550 nlookup_done(&nd); 2551 return (error); 2552 } 2553 2554 /* 2555 * lchown_args(char *path, int uid, int gid) 2556 * 2557 * Set ownership given a path name, do not cross symlinks. 2558 */ 2559 int 2560 sys_lchown(struct lchown_args *uap) 2561 { 2562 struct nlookupdata nd; 2563 int error; 2564 2565 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2566 if (error == 0) 2567 error = kern_chown(&nd, uap->uid, uap->gid); 2568 nlookup_done(&nd); 2569 return (error); 2570 } 2571 2572 /* 2573 * fchown_args(int fd, int uid, int gid) 2574 * 2575 * Set ownership given a file descriptor. 2576 */ 2577 /* ARGSUSED */ 2578 int 2579 sys_fchown(struct fchown_args *uap) 2580 { 2581 struct thread *td = curthread; 2582 struct proc *p = td->td_proc; 2583 struct file *fp; 2584 int error; 2585 2586 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2587 return (error); 2588 if (fp->f_nchandle.ncp) 2589 error = ncp_writechk(&fp->f_nchandle); 2590 if (error == 0) 2591 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid); 2592 fdrop(fp); 2593 return (error); 2594 } 2595 2596 static int 2597 getutimes(const struct timeval *tvp, struct timespec *tsp) 2598 { 2599 struct timeval tv[2]; 2600 2601 if (tvp == NULL) { 2602 microtime(&tv[0]); 2603 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 2604 tsp[1] = tsp[0]; 2605 } else { 2606 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2607 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2608 } 2609 return 0; 2610 } 2611 2612 static int 2613 setutimes(struct vnode *vp, const struct timespec *ts, int nullflag) 2614 { 2615 struct thread *td = curthread; 2616 struct proc *p = td->td_proc; 2617 int error; 2618 struct vattr vattr; 2619 2620 /* 2621 * note: vget is required for any operation that might mod the vnode 2622 * so VINACTIVE is properly cleared. 2623 */ 2624 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2625 VATTR_NULL(&vattr); 2626 vattr.va_atime = ts[0]; 2627 vattr.va_mtime = ts[1]; 2628 if (nullflag) 2629 vattr.va_vaflags |= VA_UTIMES_NULL; 2630 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2631 vput(vp); 2632 } 2633 return error; 2634 } 2635 2636 int 2637 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 2638 { 2639 struct timespec ts[2]; 2640 struct vnode *vp; 2641 int error; 2642 2643 if ((error = getutimes(tptr, ts)) != 0) 2644 return (error); 2645 /* XXX Add NLC flag indicating modifying operation? */ 2646 if ((error = nlookup(nd)) != 0) 2647 return (error); 2648 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2649 return (error); 2650 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2651 return (error); 2652 error = setutimes(vp, ts, tptr == NULL); 2653 vrele(vp); 2654 return (error); 2655 } 2656 2657 /* 2658 * utimes_args(char *path, struct timeval *tptr) 2659 * 2660 * Set the access and modification times of a file. 2661 */ 2662 int 2663 sys_utimes(struct utimes_args *uap) 2664 { 2665 struct timeval tv[2]; 2666 struct nlookupdata nd; 2667 int error; 2668 2669 if (uap->tptr) { 2670 error = copyin(uap->tptr, tv, sizeof(tv)); 2671 if (error) 2672 return (error); 2673 } 2674 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2675 if (error == 0) 2676 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2677 nlookup_done(&nd); 2678 return (error); 2679 } 2680 2681 /* 2682 * lutimes_args(char *path, struct timeval *tptr) 2683 * 2684 * Set the access and modification times of a file. 2685 */ 2686 int 2687 sys_lutimes(struct lutimes_args *uap) 2688 { 2689 struct timeval tv[2]; 2690 struct nlookupdata nd; 2691 int error; 2692 2693 if (uap->tptr) { 2694 error = copyin(uap->tptr, tv, sizeof(tv)); 2695 if (error) 2696 return (error); 2697 } 2698 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2699 if (error == 0) 2700 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2701 nlookup_done(&nd); 2702 return (error); 2703 } 2704 2705 int 2706 kern_futimes(int fd, struct timeval *tptr) 2707 { 2708 struct thread *td = curthread; 2709 struct proc *p = td->td_proc; 2710 struct timespec ts[2]; 2711 struct file *fp; 2712 int error; 2713 2714 error = getutimes(tptr, ts); 2715 if (error) 2716 return (error); 2717 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 2718 return (error); 2719 if (fp->f_nchandle.ncp) 2720 error = ncp_writechk(&fp->f_nchandle); 2721 if (error == 0) 2722 error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL); 2723 fdrop(fp); 2724 return (error); 2725 } 2726 2727 /* 2728 * futimes_args(int fd, struct timeval *tptr) 2729 * 2730 * Set the access and modification times of a file. 2731 */ 2732 int 2733 sys_futimes(struct futimes_args *uap) 2734 { 2735 struct timeval tv[2]; 2736 int error; 2737 2738 if (uap->tptr) { 2739 error = copyin(uap->tptr, tv, sizeof(tv)); 2740 if (error) 2741 return (error); 2742 } 2743 2744 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 2745 2746 return (error); 2747 } 2748 2749 int 2750 kern_truncate(struct nlookupdata *nd, off_t length) 2751 { 2752 struct vnode *vp; 2753 struct vattr vattr; 2754 int error; 2755 2756 if (length < 0) 2757 return(EINVAL); 2758 /* XXX Add NLC flag indicating modifying operation? */ 2759 if ((error = nlookup(nd)) != 0) 2760 return (error); 2761 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2762 return (error); 2763 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2764 return (error); 2765 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 2766 vrele(vp); 2767 return (error); 2768 } 2769 if (vp->v_type == VDIR) { 2770 error = EISDIR; 2771 } else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0 && 2772 (error = VOP_ACCESS(vp, VWRITE, nd->nl_cred)) == 0) { 2773 VATTR_NULL(&vattr); 2774 vattr.va_size = length; 2775 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 2776 } 2777 vput(vp); 2778 return (error); 2779 } 2780 2781 /* 2782 * truncate(char *path, int pad, off_t length) 2783 * 2784 * Truncate a file given its path name. 2785 */ 2786 int 2787 sys_truncate(struct truncate_args *uap) 2788 { 2789 struct nlookupdata nd; 2790 int error; 2791 2792 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2793 if (error == 0) 2794 error = kern_truncate(&nd, uap->length); 2795 nlookup_done(&nd); 2796 return error; 2797 } 2798 2799 int 2800 kern_ftruncate(int fd, off_t length) 2801 { 2802 struct thread *td = curthread; 2803 struct proc *p = td->td_proc; 2804 struct vattr vattr; 2805 struct vnode *vp; 2806 struct file *fp; 2807 int error; 2808 2809 if (length < 0) 2810 return(EINVAL); 2811 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 2812 return (error); 2813 if (fp->f_nchandle.ncp) { 2814 error = ncp_writechk(&fp->f_nchandle); 2815 if (error) 2816 goto done; 2817 } 2818 if ((fp->f_flag & FWRITE) == 0) { 2819 error = EINVAL; 2820 goto done; 2821 } 2822 vp = (struct vnode *)fp->f_data; 2823 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2824 if (vp->v_type == VDIR) { 2825 error = EISDIR; 2826 } else if ((error = vn_writechk(vp, NULL)) == 0) { 2827 VATTR_NULL(&vattr); 2828 vattr.va_size = length; 2829 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 2830 } 2831 vn_unlock(vp); 2832 done: 2833 fdrop(fp); 2834 return (error); 2835 } 2836 2837 /* 2838 * ftruncate_args(int fd, int pad, off_t length) 2839 * 2840 * Truncate a file given a file descriptor. 2841 */ 2842 int 2843 sys_ftruncate(struct ftruncate_args *uap) 2844 { 2845 int error; 2846 2847 error = kern_ftruncate(uap->fd, uap->length); 2848 2849 return (error); 2850 } 2851 2852 /* 2853 * fsync(int fd) 2854 * 2855 * Sync an open file. 2856 */ 2857 /* ARGSUSED */ 2858 int 2859 sys_fsync(struct fsync_args *uap) 2860 { 2861 struct thread *td = curthread; 2862 struct proc *p = td->td_proc; 2863 struct vnode *vp; 2864 struct file *fp; 2865 vm_object_t obj; 2866 int error; 2867 2868 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2869 return (error); 2870 vp = (struct vnode *)fp->f_data; 2871 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2872 if ((obj = vp->v_object) != NULL) 2873 vm_object_page_clean(obj, 0, 0, 0); 2874 if ((error = VOP_FSYNC(vp, MNT_WAIT)) == 0 && 2875 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP) && 2876 bioops.io_fsync) { 2877 error = (*bioops.io_fsync)(vp); 2878 } 2879 vn_unlock(vp); 2880 fdrop(fp); 2881 return (error); 2882 } 2883 2884 int 2885 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 2886 { 2887 struct nchandle fnchd; 2888 struct nchandle tnchd; 2889 struct namecache *ncp; 2890 struct vnode *fdvp; 2891 struct vnode *tdvp; 2892 struct mount *mp; 2893 int error; 2894 2895 bwillwrite(); 2896 if ((error = nlookup(fromnd)) != 0) 2897 return (error); 2898 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 2899 return (ENOENT); 2900 fnchd.mount = fromnd->nl_nch.mount; 2901 cache_hold(&fnchd); 2902 2903 /* 2904 * unlock the source nch so we can lookup the target nch without 2905 * deadlocking. The target may or may not exist so we do not check 2906 * for a target vp like kern_mkdir() and other creation functions do. 2907 * 2908 * The source and target directories are ref'd and rechecked after 2909 * everything is relocked to determine if the source or target file 2910 * has been renamed. 2911 */ 2912 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 2913 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 2914 cache_unlock(&fromnd->nl_nch); 2915 2916 tond->nl_flags |= NLC_CREATE; 2917 if ((error = nlookup(tond)) != 0) { 2918 cache_drop(&fnchd); 2919 return (error); 2920 } 2921 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 2922 cache_drop(&fnchd); 2923 return (ENOENT); 2924 } 2925 tnchd.mount = tond->nl_nch.mount; 2926 cache_hold(&tnchd); 2927 2928 /* 2929 * If the source and target are the same there is nothing to do 2930 */ 2931 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 2932 cache_drop(&fnchd); 2933 cache_drop(&tnchd); 2934 return (0); 2935 } 2936 2937 /* 2938 * Mount points cannot be renamed or overwritten 2939 */ 2940 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 2941 NCF_ISMOUNTPT 2942 ) { 2943 cache_drop(&fnchd); 2944 cache_drop(&tnchd); 2945 return (EINVAL); 2946 } 2947 2948 /* 2949 * relock the source ncp. NOTE AFTER RELOCKING: the source ncp 2950 * may have become invalid while it was unlocked, nc_vp and nc_mount 2951 * could be NULL. 2952 */ 2953 if (cache_lock_nonblock(&fromnd->nl_nch) == 0) { 2954 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 2955 } else if (fromnd->nl_nch.ncp > tond->nl_nch.ncp) { 2956 cache_lock(&fromnd->nl_nch); 2957 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 2958 } else { 2959 cache_unlock(&tond->nl_nch); 2960 cache_lock(&fromnd->nl_nch); 2961 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 2962 cache_lock(&tond->nl_nch); 2963 cache_resolve(&tond->nl_nch, tond->nl_cred); 2964 } 2965 fromnd->nl_flags |= NLC_NCPISLOCKED; 2966 2967 /* 2968 * make sure the parent directories linkages are the same 2969 */ 2970 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 2971 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 2972 cache_drop(&fnchd); 2973 cache_drop(&tnchd); 2974 return (ENOENT); 2975 } 2976 2977 /* 2978 * Both the source and target must be within the same filesystem and 2979 * in the same filesystem as their parent directories within the 2980 * namecache topology. 2981 * 2982 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 2983 */ 2984 mp = fnchd.mount; 2985 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 2986 mp != tond->nl_nch.mount) { 2987 cache_drop(&fnchd); 2988 cache_drop(&tnchd); 2989 return (EXDEV); 2990 } 2991 2992 /* 2993 * Make sure the mount point is writable 2994 */ 2995 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 2996 cache_drop(&fnchd); 2997 cache_drop(&tnchd); 2998 return (error); 2999 } 3000 3001 /* 3002 * If the target exists and either the source or target is a directory, 3003 * then both must be directories. 3004 * 3005 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3006 * have become NULL. 3007 */ 3008 if (tond->nl_nch.ncp->nc_vp) { 3009 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3010 error = ENOENT; 3011 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3012 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3013 error = ENOTDIR; 3014 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3015 error = EISDIR; 3016 } 3017 } 3018 3019 /* 3020 * You cannot rename a source into itself or a subdirectory of itself. 3021 * We check this by travsersing the target directory upwards looking 3022 * for a match against the source. 3023 */ 3024 if (error == 0) { 3025 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3026 if (fromnd->nl_nch.ncp == ncp) { 3027 error = EINVAL; 3028 break; 3029 } 3030 } 3031 } 3032 3033 cache_drop(&fnchd); 3034 cache_drop(&tnchd); 3035 3036 /* 3037 * Even though the namespaces are different, they may still represent 3038 * hardlinks to the same file. The filesystem might have a hard time 3039 * with this so we issue a NREMOVE of the source instead of a NRENAME 3040 * when we detect the situation. 3041 */ 3042 if (error == 0) { 3043 fdvp = fromnd->nl_nch.ncp->nc_parent->nc_vp; 3044 tdvp = tond->nl_nch.ncp->nc_parent->nc_vp; 3045 if (fdvp == NULL || tdvp == NULL) { 3046 error = EPERM; 3047 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3048 /* vhold(fdvp); - dvp can't go away */ 3049 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3050 fromnd->nl_cred); 3051 /* vdrop(fdvp); */ 3052 } else { 3053 /* vhold(fdvp); - dvp can't go away */ 3054 /* vhold(tdvp); - dvp can't go away */ 3055 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3056 fdvp, tdvp, tond->nl_cred); 3057 /* vdrop(fdvp); */ 3058 /* vdrop(tdvp); */ 3059 } 3060 } 3061 return (error); 3062 } 3063 3064 /* 3065 * rename_args(char *from, char *to) 3066 * 3067 * Rename files. Source and destination must either both be directories, 3068 * or both not be directories. If target is a directory, it must be empty. 3069 */ 3070 int 3071 sys_rename(struct rename_args *uap) 3072 { 3073 struct nlookupdata fromnd, tond; 3074 int error; 3075 3076 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3077 if (error == 0) { 3078 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3079 if (error == 0) 3080 error = kern_rename(&fromnd, &tond); 3081 nlookup_done(&tond); 3082 } 3083 nlookup_done(&fromnd); 3084 return (error); 3085 } 3086 3087 int 3088 kern_mkdir(struct nlookupdata *nd, int mode) 3089 { 3090 struct thread *td = curthread; 3091 struct proc *p = td->td_proc; 3092 struct vnode *vp; 3093 struct vnode *dvp; 3094 struct vattr vattr; 3095 int error; 3096 3097 bwillwrite(); 3098 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE; 3099 if ((error = nlookup(nd)) != 0) 3100 return (error); 3101 3102 if (nd->nl_nch.ncp->nc_vp) 3103 return (EEXIST); 3104 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3105 return (error); 3106 if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) 3107 return (EPERM); 3108 /* vhold(dvp); - dvp can't go away */ 3109 VATTR_NULL(&vattr); 3110 vattr.va_type = VDIR; 3111 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3112 3113 vp = NULL; 3114 error = VOP_NMKDIR(&nd->nl_nch, dvp, &vp, p->p_ucred, &vattr); 3115 /* vdrop(dvp); */ 3116 if (error == 0) 3117 vput(vp); 3118 return (error); 3119 } 3120 3121 /* 3122 * mkdir_args(char *path, int mode) 3123 * 3124 * Make a directory file. 3125 */ 3126 /* ARGSUSED */ 3127 int 3128 sys_mkdir(struct mkdir_args *uap) 3129 { 3130 struct nlookupdata nd; 3131 int error; 3132 3133 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3134 if (error == 0) 3135 error = kern_mkdir(&nd, uap->mode); 3136 nlookup_done(&nd); 3137 return (error); 3138 } 3139 3140 int 3141 kern_rmdir(struct nlookupdata *nd) 3142 { 3143 struct vnode *dvp; 3144 int error; 3145 3146 bwillwrite(); 3147 nd->nl_flags |= NLC_DELETE; 3148 if ((error = nlookup(nd)) != 0) 3149 return (error); 3150 3151 /* 3152 * Do not allow directories representing mount points to be 3153 * deleted, even if empty. Check write perms on mount point 3154 * in case the vnode is aliased (aka nullfs). 3155 */ 3156 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 3157 return (EINVAL); 3158 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3159 return (error); 3160 if ((dvp = nd->nl_nch.ncp->nc_parent->nc_vp) == NULL) 3161 return (EPERM); 3162 /* vhold(dvp); - dvp can't go away */ 3163 error = VOP_NRMDIR(&nd->nl_nch, dvp, nd->nl_cred); 3164 /* vdrop(dvp); */ 3165 return (error); 3166 } 3167 3168 /* 3169 * rmdir_args(char *path) 3170 * 3171 * Remove a directory file. 3172 */ 3173 /* ARGSUSED */ 3174 int 3175 sys_rmdir(struct rmdir_args *uap) 3176 { 3177 struct nlookupdata nd; 3178 int error; 3179 3180 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3181 if (error == 0) 3182 error = kern_rmdir(&nd); 3183 nlookup_done(&nd); 3184 return (error); 3185 } 3186 3187 int 3188 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 3189 enum uio_seg direction) 3190 { 3191 struct thread *td = curthread; 3192 struct proc *p = td->td_proc; 3193 struct vnode *vp; 3194 struct file *fp; 3195 struct uio auio; 3196 struct iovec aiov; 3197 long loff; 3198 int error, eofflag; 3199 3200 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3201 return (error); 3202 if ((fp->f_flag & FREAD) == 0) { 3203 error = EBADF; 3204 goto done; 3205 } 3206 vp = (struct vnode *)fp->f_data; 3207 unionread: 3208 if (vp->v_type != VDIR) { 3209 error = EINVAL; 3210 goto done; 3211 } 3212 aiov.iov_base = buf; 3213 aiov.iov_len = count; 3214 auio.uio_iov = &aiov; 3215 auio.uio_iovcnt = 1; 3216 auio.uio_rw = UIO_READ; 3217 auio.uio_segflg = direction; 3218 auio.uio_td = td; 3219 auio.uio_resid = count; 3220 loff = auio.uio_offset = fp->f_offset; 3221 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 3222 fp->f_offset = auio.uio_offset; 3223 if (error) 3224 goto done; 3225 if (count == auio.uio_resid) { 3226 if (union_dircheckp) { 3227 error = union_dircheckp(td, &vp, fp); 3228 if (error == -1) 3229 goto unionread; 3230 if (error) 3231 goto done; 3232 } 3233 #if 0 3234 if ((vp->v_flag & VROOT) && 3235 (vp->v_mount->mnt_flag & MNT_UNION)) { 3236 struct vnode *tvp = vp; 3237 vp = vp->v_mount->mnt_vnodecovered; 3238 vref(vp); 3239 fp->f_data = vp; 3240 fp->f_offset = 0; 3241 vrele(tvp); 3242 goto unionread; 3243 } 3244 #endif 3245 } 3246 if (basep) { 3247 *basep = loff; 3248 } 3249 *res = count - auio.uio_resid; 3250 done: 3251 fdrop(fp); 3252 return (error); 3253 } 3254 3255 /* 3256 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 3257 * 3258 * Read a block of directory entries in a file system independent format. 3259 */ 3260 int 3261 sys_getdirentries(struct getdirentries_args *uap) 3262 { 3263 long base; 3264 int error; 3265 3266 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 3267 &uap->sysmsg_result, UIO_USERSPACE); 3268 3269 if (error == 0) 3270 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 3271 return (error); 3272 } 3273 3274 /* 3275 * getdents_args(int fd, char *buf, size_t count) 3276 */ 3277 int 3278 sys_getdents(struct getdents_args *uap) 3279 { 3280 int error; 3281 3282 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 3283 &uap->sysmsg_result, UIO_USERSPACE); 3284 3285 return (error); 3286 } 3287 3288 /* 3289 * umask(int newmask) 3290 * 3291 * Set the mode mask for creation of filesystem nodes. 3292 * 3293 * MP SAFE 3294 */ 3295 int 3296 sys_umask(struct umask_args *uap) 3297 { 3298 struct thread *td = curthread; 3299 struct proc *p = td->td_proc; 3300 struct filedesc *fdp; 3301 3302 fdp = p->p_fd; 3303 uap->sysmsg_result = fdp->fd_cmask; 3304 fdp->fd_cmask = uap->newmask & ALLPERMS; 3305 return (0); 3306 } 3307 3308 /* 3309 * revoke(char *path) 3310 * 3311 * Void all references to file by ripping underlying filesystem 3312 * away from vnode. 3313 */ 3314 /* ARGSUSED */ 3315 int 3316 sys_revoke(struct revoke_args *uap) 3317 { 3318 struct nlookupdata nd; 3319 struct vattr vattr; 3320 struct vnode *vp; 3321 struct ucred *cred; 3322 int error; 3323 3324 vp = NULL; 3325 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3326 if (error == 0) 3327 error = nlookup(&nd); 3328 if (error == 0) 3329 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3330 cred = crhold(nd.nl_cred); 3331 nlookup_done(&nd); 3332 if (error == 0) { 3333 if (vp->v_type != VCHR && vp->v_type != VBLK) 3334 error = EINVAL; 3335 if (error == 0) 3336 error = VOP_GETATTR(vp, &vattr); 3337 if (error == 0 && cred->cr_uid != vattr.va_uid) 3338 error = suser_cred(cred, PRISON_ROOT); 3339 if (error == 0 && count_udev(vp->v_umajor, vp->v_uminor) > 0) { 3340 error = 0; 3341 vx_lock(vp); 3342 VOP_REVOKE(vp, REVOKEALL); 3343 vx_unlock(vp); 3344 } 3345 vrele(vp); 3346 } 3347 if (cred) 3348 crfree(cred); 3349 return (error); 3350 } 3351 3352 /* 3353 * getfh_args(char *fname, fhandle_t *fhp) 3354 * 3355 * Get (NFS) file handle 3356 */ 3357 int 3358 sys_getfh(struct getfh_args *uap) 3359 { 3360 struct thread *td = curthread; 3361 struct nlookupdata nd; 3362 fhandle_t fh; 3363 struct vnode *vp; 3364 int error; 3365 3366 /* 3367 * Must be super user 3368 */ 3369 if ((error = suser(td)) != 0) 3370 return (error); 3371 3372 vp = NULL; 3373 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 3374 if (error == 0) 3375 error = nlookup(&nd); 3376 if (error == 0) 3377 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3378 nlookup_done(&nd); 3379 if (error == 0) { 3380 bzero(&fh, sizeof(fh)); 3381 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3382 error = VFS_VPTOFH(vp, &fh.fh_fid); 3383 vput(vp); 3384 if (error == 0) 3385 error = copyout(&fh, uap->fhp, sizeof(fh)); 3386 } 3387 return (error); 3388 } 3389 3390 /* 3391 * fhopen_args(const struct fhandle *u_fhp, int flags) 3392 * 3393 * syscall for the rpc.lockd to use to translate a NFS file handle into 3394 * an open descriptor. 3395 * 3396 * warning: do not remove the suser() call or this becomes one giant 3397 * security hole. 3398 */ 3399 int 3400 sys_fhopen(struct fhopen_args *uap) 3401 { 3402 struct thread *td = curthread; 3403 struct proc *p = td->td_proc; 3404 struct mount *mp; 3405 struct vnode *vp; 3406 struct fhandle fhp; 3407 struct vattr vat; 3408 struct vattr *vap = &vat; 3409 struct flock lf; 3410 int fmode, mode, error, type; 3411 struct file *nfp; 3412 struct file *fp; 3413 int indx; 3414 3415 /* 3416 * Must be super user 3417 */ 3418 error = suser(td); 3419 if (error) 3420 return (error); 3421 3422 fmode = FFLAGS(uap->flags); 3423 /* why not allow a non-read/write open for our lockd? */ 3424 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3425 return (EINVAL); 3426 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3427 if (error) 3428 return(error); 3429 /* find the mount point */ 3430 mp = vfs_getvfs(&fhp.fh_fsid); 3431 if (mp == NULL) 3432 return (ESTALE); 3433 /* now give me my vnode, it gets returned to me locked */ 3434 error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp); 3435 if (error) 3436 return (error); 3437 /* 3438 * from now on we have to make sure not 3439 * to forget about the vnode 3440 * any error that causes an abort must vput(vp) 3441 * just set error = err and 'goto bad;'. 3442 */ 3443 3444 /* 3445 * from vn_open 3446 */ 3447 if (vp->v_type == VLNK) { 3448 error = EMLINK; 3449 goto bad; 3450 } 3451 if (vp->v_type == VSOCK) { 3452 error = EOPNOTSUPP; 3453 goto bad; 3454 } 3455 mode = 0; 3456 if (fmode & (FWRITE | O_TRUNC)) { 3457 if (vp->v_type == VDIR) { 3458 error = EISDIR; 3459 goto bad; 3460 } 3461 error = vn_writechk(vp, NULL); 3462 if (error) 3463 goto bad; 3464 mode |= VWRITE; 3465 } 3466 if (fmode & FREAD) 3467 mode |= VREAD; 3468 if (mode) { 3469 error = VOP_ACCESS(vp, mode, p->p_ucred); 3470 if (error) 3471 goto bad; 3472 } 3473 if (fmode & O_TRUNC) { 3474 vn_unlock(vp); /* XXX */ 3475 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 3476 VATTR_NULL(vap); 3477 vap->va_size = 0; 3478 error = VOP_SETATTR(vp, vap, p->p_ucred); 3479 if (error) 3480 goto bad; 3481 } 3482 3483 /* 3484 * VOP_OPEN needs the file pointer so it can potentially override 3485 * it. 3486 * 3487 * WARNING! no f_nchandle will be associated when fhopen()ing a 3488 * directory. XXX 3489 */ 3490 if ((error = falloc(p, &nfp, &indx)) != 0) 3491 goto bad; 3492 fp = nfp; 3493 3494 error = VOP_OPEN(vp, fmode, p->p_ucred, fp); 3495 if (error) { 3496 /* 3497 * setting f_ops this way prevents VOP_CLOSE from being 3498 * called or fdrop() releasing the vp from v_data. Since 3499 * the VOP_OPEN failed we don't want to VOP_CLOSE. 3500 */ 3501 fp->f_ops = &badfileops; 3502 fp->f_data = NULL; 3503 goto bad_drop; 3504 } 3505 3506 /* 3507 * The fp is given its own reference, we still have our ref and lock. 3508 * 3509 * Assert that all regular files must be created with a VM object. 3510 */ 3511 if (vp->v_type == VREG && vp->v_object == NULL) { 3512 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 3513 goto bad_drop; 3514 } 3515 3516 /* 3517 * The open was successful. Handle any locking requirements. 3518 */ 3519 if (fmode & (O_EXLOCK | O_SHLOCK)) { 3520 lf.l_whence = SEEK_SET; 3521 lf.l_start = 0; 3522 lf.l_len = 0; 3523 if (fmode & O_EXLOCK) 3524 lf.l_type = F_WRLCK; 3525 else 3526 lf.l_type = F_RDLCK; 3527 if (fmode & FNONBLOCK) 3528 type = 0; 3529 else 3530 type = F_WAIT; 3531 vn_unlock(vp); 3532 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 3533 /* 3534 * release our private reference. 3535 */ 3536 fsetfd(p, NULL, indx); 3537 fdrop(fp); 3538 vrele(vp); 3539 return (error); 3540 } 3541 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3542 fp->f_flag |= FHASLOCK; 3543 } 3544 3545 /* 3546 * Clean up. Associate the file pointer with the previously 3547 * reserved descriptor and return it. 3548 */ 3549 vput(vp); 3550 fsetfd(p, fp, indx); 3551 fdrop(fp); 3552 uap->sysmsg_result = indx; 3553 return (0); 3554 3555 bad_drop: 3556 fsetfd(p, NULL, indx); 3557 fdrop(fp); 3558 bad: 3559 vput(vp); 3560 return (error); 3561 } 3562 3563 /* 3564 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 3565 */ 3566 int 3567 sys_fhstat(struct fhstat_args *uap) 3568 { 3569 struct thread *td = curthread; 3570 struct stat sb; 3571 fhandle_t fh; 3572 struct mount *mp; 3573 struct vnode *vp; 3574 int error; 3575 3576 /* 3577 * Must be super user 3578 */ 3579 error = suser(td); 3580 if (error) 3581 return (error); 3582 3583 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 3584 if (error) 3585 return (error); 3586 3587 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3588 return (ESTALE); 3589 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3590 return (error); 3591 error = vn_stat(vp, &sb, td->td_proc->p_ucred); 3592 vput(vp); 3593 if (error) 3594 return (error); 3595 error = copyout(&sb, uap->sb, sizeof(sb)); 3596 return (error); 3597 } 3598 3599 /* 3600 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 3601 */ 3602 int 3603 sys_fhstatfs(struct fhstatfs_args *uap) 3604 { 3605 struct thread *td = curthread; 3606 struct proc *p = td->td_proc; 3607 struct statfs *sp; 3608 struct mount *mp; 3609 struct vnode *vp; 3610 struct statfs sb; 3611 char *fullpath, *freepath; 3612 fhandle_t fh; 3613 int error; 3614 3615 /* 3616 * Must be super user 3617 */ 3618 if ((error = suser(td))) 3619 return (error); 3620 3621 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3622 return (error); 3623 3624 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3625 return (ESTALE); 3626 3627 if (p != NULL && !chroot_visible_mnt(mp, p)) 3628 return (ESTALE); 3629 3630 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3631 return (error); 3632 mp = vp->v_mount; 3633 sp = &mp->mnt_stat; 3634 vput(vp); 3635 if ((error = VFS_STATFS(mp, sp, p->p_ucred)) != 0) 3636 return (error); 3637 3638 error = mount_path(p, mp, &fullpath, &freepath); 3639 if (error) 3640 return(error); 3641 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3642 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 3643 kfree(freepath, M_TEMP); 3644 3645 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 3646 if (suser(td)) { 3647 bcopy(sp, &sb, sizeof(sb)); 3648 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 3649 sp = &sb; 3650 } 3651 return (copyout(sp, uap->buf, sizeof(*sp))); 3652 } 3653 3654 /* 3655 * Syscall to push extended attribute configuration information into the 3656 * VFS. Accepts a path, which it converts to a mountpoint, as well as 3657 * a command (int cmd), and attribute name and misc data. For now, the 3658 * attribute name is left in userspace for consumption by the VFS_op. 3659 * It will probably be changed to be copied into sysspace by the 3660 * syscall in the future, once issues with various consumers of the 3661 * attribute code have raised their hands. 3662 * 3663 * Currently this is used only by UFS Extended Attributes. 3664 */ 3665 int 3666 sys_extattrctl(struct extattrctl_args *uap) 3667 { 3668 struct nlookupdata nd; 3669 struct mount *mp; 3670 struct vnode *vp; 3671 int error; 3672 3673 vp = NULL; 3674 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3675 if (error == 0) 3676 error = nlookup(&nd); 3677 if (error == 0) { 3678 mp = nd.nl_nch.mount; 3679 error = VFS_EXTATTRCTL(mp, uap->cmd, 3680 uap->attrname, uap->arg, 3681 nd.nl_cred); 3682 } 3683 nlookup_done(&nd); 3684 return (error); 3685 } 3686 3687 /* 3688 * Syscall to set a named extended attribute on a file or directory. 3689 * Accepts attribute name, and a uio structure pointing to the data to set. 3690 * The uio is consumed in the style of writev(). The real work happens 3691 * in VOP_SETEXTATTR(). 3692 */ 3693 int 3694 sys_extattr_set_file(struct extattr_set_file_args *uap) 3695 { 3696 char attrname[EXTATTR_MAXNAMELEN]; 3697 struct iovec aiov[UIO_SMALLIOV]; 3698 struct iovec *needfree; 3699 struct nlookupdata nd; 3700 struct iovec *iov; 3701 struct vnode *vp; 3702 struct uio auio; 3703 u_int iovlen; 3704 u_int cnt; 3705 int error; 3706 int i; 3707 3708 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3709 if (error) 3710 return (error); 3711 3712 vp = NULL; 3713 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3714 if (error == 0) 3715 error = nlookup(&nd); 3716 if (error == 0) 3717 error = ncp_writechk(&nd.nl_nch); 3718 if (error == 0) 3719 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3720 if (error) { 3721 nlookup_done(&nd); 3722 return (error); 3723 } 3724 3725 needfree = NULL; 3726 iovlen = uap->iovcnt * sizeof(struct iovec); 3727 if (uap->iovcnt > UIO_SMALLIOV) { 3728 if (uap->iovcnt > UIO_MAXIOV) { 3729 error = EINVAL; 3730 goto done; 3731 } 3732 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3733 needfree = iov; 3734 } else { 3735 iov = aiov; 3736 } 3737 auio.uio_iov = iov; 3738 auio.uio_iovcnt = uap->iovcnt; 3739 auio.uio_rw = UIO_WRITE; 3740 auio.uio_segflg = UIO_USERSPACE; 3741 auio.uio_td = nd.nl_td; 3742 auio.uio_offset = 0; 3743 if ((error = copyin(uap->iovp, iov, iovlen))) 3744 goto done; 3745 auio.uio_resid = 0; 3746 for (i = 0; i < uap->iovcnt; i++) { 3747 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3748 error = EINVAL; 3749 goto done; 3750 } 3751 auio.uio_resid += iov->iov_len; 3752 iov++; 3753 } 3754 cnt = auio.uio_resid; 3755 error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred); 3756 cnt -= auio.uio_resid; 3757 uap->sysmsg_result = cnt; 3758 done: 3759 vput(vp); 3760 nlookup_done(&nd); 3761 if (needfree) 3762 FREE(needfree, M_IOV); 3763 return (error); 3764 } 3765 3766 /* 3767 * Syscall to get a named extended attribute on a file or directory. 3768 * Accepts attribute name, and a uio structure pointing to a buffer for the 3769 * data. The uio is consumed in the style of readv(). The real work 3770 * happens in VOP_GETEXTATTR(); 3771 */ 3772 int 3773 sys_extattr_get_file(struct extattr_get_file_args *uap) 3774 { 3775 char attrname[EXTATTR_MAXNAMELEN]; 3776 struct iovec aiov[UIO_SMALLIOV]; 3777 struct iovec *needfree; 3778 struct nlookupdata nd; 3779 struct iovec *iov; 3780 struct vnode *vp; 3781 struct uio auio; 3782 u_int iovlen; 3783 u_int cnt; 3784 int error; 3785 int i; 3786 3787 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3788 if (error) 3789 return (error); 3790 3791 vp = NULL; 3792 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3793 if (error == 0) 3794 error = nlookup(&nd); 3795 if (error == 0) 3796 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3797 if (error) { 3798 nlookup_done(&nd); 3799 return (error); 3800 } 3801 3802 iovlen = uap->iovcnt * sizeof (struct iovec); 3803 needfree = NULL; 3804 if (uap->iovcnt > UIO_SMALLIOV) { 3805 if (uap->iovcnt > UIO_MAXIOV) { 3806 error = EINVAL; 3807 goto done; 3808 } 3809 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3810 needfree = iov; 3811 } else { 3812 iov = aiov; 3813 } 3814 auio.uio_iov = iov; 3815 auio.uio_iovcnt = uap->iovcnt; 3816 auio.uio_rw = UIO_READ; 3817 auio.uio_segflg = UIO_USERSPACE; 3818 auio.uio_td = nd.nl_td; 3819 auio.uio_offset = 0; 3820 if ((error = copyin(uap->iovp, iov, iovlen))) 3821 goto done; 3822 auio.uio_resid = 0; 3823 for (i = 0; i < uap->iovcnt; i++) { 3824 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3825 error = EINVAL; 3826 goto done; 3827 } 3828 auio.uio_resid += iov->iov_len; 3829 iov++; 3830 } 3831 cnt = auio.uio_resid; 3832 error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred); 3833 cnt -= auio.uio_resid; 3834 uap->sysmsg_result = cnt; 3835 done: 3836 vput(vp); 3837 nlookup_done(&nd); 3838 if (needfree) 3839 FREE(needfree, M_IOV); 3840 return(error); 3841 } 3842 3843 /* 3844 * Syscall to delete a named extended attribute from a file or directory. 3845 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 3846 */ 3847 int 3848 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 3849 { 3850 char attrname[EXTATTR_MAXNAMELEN]; 3851 struct nlookupdata nd; 3852 struct vnode *vp; 3853 int error; 3854 3855 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3856 if (error) 3857 return(error); 3858 3859 vp = NULL; 3860 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3861 if (error == 0) 3862 error = nlookup(&nd); 3863 if (error == 0) 3864 error = ncp_writechk(&nd.nl_nch); 3865 if (error == 0) 3866 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3867 if (error) { 3868 nlookup_done(&nd); 3869 return (error); 3870 } 3871 3872 error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred); 3873 vput(vp); 3874 nlookup_done(&nd); 3875 return(error); 3876 } 3877 3878 /* 3879 * Determine if the mount is visible to the process. 3880 */ 3881 static int 3882 chroot_visible_mnt(struct mount *mp, struct proc *p) 3883 { 3884 struct nchandle nch; 3885 3886 /* 3887 * Traverse from the mount point upwards. If we hit the process 3888 * root then the mount point is visible to the process. 3889 */ 3890 nch = mp->mnt_ncmountpt; 3891 while (nch.ncp) { 3892 if (nch.mount == p->p_fd->fd_nrdir.mount && 3893 nch.ncp == p->p_fd->fd_nrdir.ncp) { 3894 return(1); 3895 } 3896 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 3897 nch = nch.mount->mnt_ncmounton; 3898 } else { 3899 nch.ncp = nch.ncp->nc_parent; 3900 } 3901 } 3902 3903 /* 3904 * If the mount point is not visible to the process, but the 3905 * process root is in a subdirectory of the mount, return 3906 * TRUE anyway. 3907 */ 3908 if (p->p_fd->fd_nrdir.mount == mp) 3909 return(1); 3910 3911 return(0); 3912 } 3913 3914