1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.135 2008/11/11 00:55:49 pavalos Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/conf.h> 47 #include <sys/sysent.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mountctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/filedesc.h> 53 #include <sys/kernel.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/linker.h> 57 #include <sys/stat.h> 58 #include <sys/unistd.h> 59 #include <sys/vnode.h> 60 #include <sys/proc.h> 61 #include <sys/namei.h> 62 #include <sys/nlookup.h> 63 #include <sys/dirent.h> 64 #include <sys/extattr.h> 65 #include <sys/spinlock.h> 66 #include <sys/kern_syscall.h> 67 #include <sys/objcache.h> 68 #include <sys/sysctl.h> 69 70 #include <sys/buf2.h> 71 #include <sys/file2.h> 72 #include <sys/spinlock2.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_object.h> 76 #include <vm/vm_page.h> 77 78 #include <machine/limits.h> 79 #include <machine/stdarg.h> 80 81 #include <vfs/union/union.h> 82 83 static void mount_warning(struct mount *mp, const char *ctl, ...); 84 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 85 static int checkvp_chdir (struct vnode *vn, struct thread *td); 86 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 87 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 88 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 89 static int getutimes (const struct timeval *, struct timespec *); 90 static int setfown (struct vnode *, uid_t, gid_t); 91 static int setfmode (struct vnode *, int); 92 static int setfflags (struct vnode *, int); 93 static int setutimes (struct vnode *, const struct timespec *, int); 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 96 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 97 98 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 99 100 /* 101 * Virtual File System System Calls 102 */ 103 104 /* 105 * Mount a file system. 106 */ 107 /* 108 * mount_args(char *type, char *path, int flags, caddr_t data) 109 */ 110 /* ARGSUSED */ 111 int 112 sys_mount(struct mount_args *uap) 113 { 114 struct thread *td = curthread; 115 struct proc *p = td->td_proc; 116 struct vnode *vp; 117 struct nchandle nch; 118 struct mount *mp; 119 struct vfsconf *vfsp; 120 int error, flag = 0, flag2 = 0; 121 int hasmount; 122 struct vattr va; 123 struct nlookupdata nd; 124 char fstypename[MFSNAMELEN]; 125 struct ucred *cred = p->p_ucred; 126 127 KKASSERT(p); 128 if (cred->cr_prison != NULL) 129 return (EPERM); 130 if (usermount == 0 && (error = suser(td))) 131 return (error); 132 /* 133 * Do not allow NFS export by non-root users. 134 */ 135 if (uap->flags & MNT_EXPORTED) { 136 error = suser(td); 137 if (error) 138 return (error); 139 } 140 /* 141 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 142 */ 143 if (suser(td)) 144 uap->flags |= MNT_NOSUID | MNT_NODEV; 145 146 /* 147 * Lookup the requested path and extract the nch and vnode. 148 */ 149 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 150 if (error == 0) { 151 if ((error = nlookup(&nd)) == 0) { 152 if (nd.nl_nch.ncp->nc_vp == NULL) 153 error = ENOENT; 154 } 155 } 156 if (error) { 157 nlookup_done(&nd); 158 return (error); 159 } 160 161 /* 162 * Extract the locked+refd ncp and cleanup the nd structure 163 */ 164 nch = nd.nl_nch; 165 cache_zero(&nd.nl_nch); 166 nlookup_done(&nd); 167 168 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch)) 169 hasmount = 1; 170 else 171 hasmount = 0; 172 173 174 /* 175 * now we have the locked ref'd nch and unreferenced vnode. 176 */ 177 vp = nch.ncp->nc_vp; 178 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 179 cache_put(&nch); 180 return (error); 181 } 182 cache_unlock(&nch); 183 184 /* 185 * Now we have an unlocked ref'd nch and a locked ref'd vp 186 */ 187 if (uap->flags & MNT_UPDATE) { 188 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 189 cache_drop(&nch); 190 vput(vp); 191 return (EINVAL); 192 } 193 mp = vp->v_mount; 194 flag = mp->mnt_flag; 195 flag2 = mp->mnt_kern_flag; 196 /* 197 * We only allow the filesystem to be reloaded if it 198 * is currently mounted read-only. 199 */ 200 if ((uap->flags & MNT_RELOAD) && 201 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 202 cache_drop(&nch); 203 vput(vp); 204 return (EOPNOTSUPP); /* Needs translation */ 205 } 206 /* 207 * Only root, or the user that did the original mount is 208 * permitted to update it. 209 */ 210 if (mp->mnt_stat.f_owner != cred->cr_uid && 211 (error = suser(td))) { 212 cache_drop(&nch); 213 vput(vp); 214 return (error); 215 } 216 if (vfs_busy(mp, LK_NOWAIT)) { 217 cache_drop(&nch); 218 vput(vp); 219 return (EBUSY); 220 } 221 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 222 cache_drop(&nch); 223 vfs_unbusy(mp); 224 vput(vp); 225 return (EBUSY); 226 } 227 vp->v_flag |= VMOUNT; 228 mp->mnt_flag |= 229 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 230 vn_unlock(vp); 231 goto update; 232 } 233 /* 234 * If the user is not root, ensure that they own the directory 235 * onto which we are attempting to mount. 236 */ 237 if ((error = VOP_GETATTR(vp, &va)) || 238 (va.va_uid != cred->cr_uid && (error = suser(td)))) { 239 cache_drop(&nch); 240 vput(vp); 241 return (error); 242 } 243 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 244 cache_drop(&nch); 245 vput(vp); 246 return (error); 247 } 248 if (vp->v_type != VDIR) { 249 cache_drop(&nch); 250 vput(vp); 251 return (ENOTDIR); 252 } 253 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 254 cache_drop(&nch); 255 vput(vp); 256 return (EPERM); 257 } 258 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 259 cache_drop(&nch); 260 vput(vp); 261 return (error); 262 } 263 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 264 if (!strcmp(vfsp->vfc_name, fstypename)) 265 break; 266 } 267 if (vfsp == NULL) { 268 linker_file_t lf; 269 270 /* Only load modules for root (very important!) */ 271 if ((error = suser(td)) != 0) { 272 cache_drop(&nch); 273 vput(vp); 274 return error; 275 } 276 error = linker_load_file(fstypename, &lf); 277 if (error || lf == NULL) { 278 cache_drop(&nch); 279 vput(vp); 280 if (lf == NULL) 281 error = ENODEV; 282 return error; 283 } 284 lf->userrefs++; 285 /* lookup again, see if the VFS was loaded */ 286 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 287 if (!strcmp(vfsp->vfc_name, fstypename)) 288 break; 289 } 290 if (vfsp == NULL) { 291 lf->userrefs--; 292 linker_file_unload(lf); 293 cache_drop(&nch); 294 vput(vp); 295 return (ENODEV); 296 } 297 } 298 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 299 cache_drop(&nch); 300 vput(vp); 301 return (EBUSY); 302 } 303 vp->v_flag |= VMOUNT; 304 305 /* 306 * Allocate and initialize the filesystem. 307 */ 308 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 309 TAILQ_INIT(&mp->mnt_nvnodelist); 310 TAILQ_INIT(&mp->mnt_reservedvnlist); 311 TAILQ_INIT(&mp->mnt_jlist); 312 mp->mnt_nvnodelistsize = 0; 313 lockinit(&mp->mnt_lock, "vfslock", 0, 0); 314 vfs_busy(mp, LK_NOWAIT); 315 mp->mnt_op = vfsp->vfc_vfsops; 316 mp->mnt_vfc = vfsp; 317 vfsp->vfc_refcount++; 318 mp->mnt_stat.f_type = vfsp->vfc_typenum; 319 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 320 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 321 mp->mnt_stat.f_owner = cred->cr_uid; 322 mp->mnt_iosize_max = DFLTPHYS; 323 vn_unlock(vp); 324 update: 325 /* 326 * Set the mount level flags. 327 */ 328 if (uap->flags & MNT_RDONLY) 329 mp->mnt_flag |= MNT_RDONLY; 330 else if (mp->mnt_flag & MNT_RDONLY) 331 mp->mnt_kern_flag |= MNTK_WANTRDWR; 332 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 333 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 334 MNT_NOSYMFOLLOW | MNT_IGNORE | 335 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 336 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 337 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 338 MNT_NOSYMFOLLOW | MNT_IGNORE | 339 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 340 /* 341 * Mount the filesystem. 342 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 343 * get. 344 */ 345 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 346 if (mp->mnt_flag & MNT_UPDATE) { 347 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 348 mp->mnt_flag &= ~MNT_RDONLY; 349 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 350 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 351 if (error) { 352 mp->mnt_flag = flag; 353 mp->mnt_kern_flag = flag2; 354 } 355 vfs_unbusy(mp); 356 vp->v_flag &= ~VMOUNT; 357 vrele(vp); 358 cache_drop(&nch); 359 return (error); 360 } 361 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 362 /* 363 * Put the new filesystem on the mount list after root. The mount 364 * point gets its own mnt_ncmountpt (unless the VFS already set one 365 * up) which represents the root of the mount. The lookup code 366 * detects the mount point going forward and checks the root of 367 * the mount going backwards. 368 * 369 * It is not necessary to invalidate or purge the vnode underneath 370 * because elements under the mount will be given their own glue 371 * namecache record. 372 */ 373 if (!error) { 374 if (mp->mnt_ncmountpt.ncp == NULL) { 375 /* 376 * allocate, then unlock, but leave the ref intact 377 */ 378 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 379 cache_unlock(&mp->mnt_ncmountpt); 380 } 381 mp->mnt_ncmounton = nch; /* inherits ref */ 382 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 383 384 /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ 385 vp->v_flag &= ~VMOUNT; 386 mountlist_insert(mp, MNTINS_LAST); 387 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 388 vn_unlock(vp); 389 error = vfs_allocate_syncvnode(mp); 390 vfs_unbusy(mp); 391 error = VFS_START(mp, 0); 392 vrele(vp); 393 } else { 394 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 395 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 396 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 397 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 398 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 399 vp->v_flag &= ~VMOUNT; 400 mp->mnt_vfc->vfc_refcount--; 401 vfs_unbusy(mp); 402 kfree(mp, M_MOUNT); 403 cache_drop(&nch); 404 vput(vp); 405 } 406 return (error); 407 } 408 409 /* 410 * Scan all active processes to see if any of them have a current 411 * or root directory onto which the new filesystem has just been 412 * mounted. If so, replace them with the new mount point. 413 * 414 * The passed ncp is ref'd and locked (from the mount code) and 415 * must be associated with the vnode representing the root of the 416 * mount point. 417 */ 418 struct checkdirs_info { 419 struct nchandle old_nch; 420 struct nchandle new_nch; 421 struct vnode *old_vp; 422 struct vnode *new_vp; 423 }; 424 425 static int checkdirs_callback(struct proc *p, void *data); 426 427 static void 428 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 429 { 430 struct checkdirs_info info; 431 struct vnode *olddp; 432 struct vnode *newdp; 433 struct mount *mp; 434 435 /* 436 * If the old mount point's vnode has a usecount of 1, it is not 437 * being held as a descriptor anywhere. 438 */ 439 olddp = old_nch->ncp->nc_vp; 440 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 441 return; 442 443 /* 444 * Force the root vnode of the new mount point to be resolved 445 * so we can update any matching processes. 446 */ 447 mp = new_nch->mount; 448 if (VFS_ROOT(mp, &newdp)) 449 panic("mount: lost mount"); 450 cache_setunresolved(new_nch); 451 cache_setvp(new_nch, newdp); 452 453 /* 454 * Special handling of the root node 455 */ 456 if (rootvnode == olddp) { 457 vref(newdp); 458 vfs_cache_setroot(newdp, cache_hold(new_nch)); 459 } 460 461 /* 462 * Pass newdp separately so the callback does not have to access 463 * it via new_nch->ncp->nc_vp. 464 */ 465 info.old_nch = *old_nch; 466 info.new_nch = *new_nch; 467 info.new_vp = newdp; 468 allproc_scan(checkdirs_callback, &info); 469 vput(newdp); 470 } 471 472 /* 473 * NOTE: callback is not MP safe because the scanned process's filedesc 474 * structure can be ripped out from under us, amoung other things. 475 */ 476 static int 477 checkdirs_callback(struct proc *p, void *data) 478 { 479 struct checkdirs_info *info = data; 480 struct filedesc *fdp; 481 struct nchandle ncdrop1; 482 struct nchandle ncdrop2; 483 struct vnode *vprele1; 484 struct vnode *vprele2; 485 486 if ((fdp = p->p_fd) != NULL) { 487 cache_zero(&ncdrop1); 488 cache_zero(&ncdrop2); 489 vprele1 = NULL; 490 vprele2 = NULL; 491 492 /* 493 * MPUNSAFE - XXX fdp can be pulled out from under a 494 * foreign process. 495 * 496 * A shared filedesc is ok, we don't have to copy it 497 * because we are making this change globally. 498 */ 499 spin_lock_wr(&fdp->fd_spin); 500 if (fdp->fd_ncdir.mount == info->old_nch.mount && 501 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 502 vprele1 = fdp->fd_cdir; 503 vref(info->new_vp); 504 fdp->fd_cdir = info->new_vp; 505 ncdrop1 = fdp->fd_ncdir; 506 cache_copy(&info->new_nch, &fdp->fd_ncdir); 507 } 508 if (fdp->fd_nrdir.mount == info->old_nch.mount && 509 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 510 vprele2 = fdp->fd_rdir; 511 vref(info->new_vp); 512 fdp->fd_rdir = info->new_vp; 513 ncdrop2 = fdp->fd_nrdir; 514 cache_copy(&info->new_nch, &fdp->fd_nrdir); 515 } 516 spin_unlock_wr(&fdp->fd_spin); 517 if (ncdrop1.ncp) 518 cache_drop(&ncdrop1); 519 if (ncdrop2.ncp) 520 cache_drop(&ncdrop2); 521 if (vprele1) 522 vrele(vprele1); 523 if (vprele2) 524 vrele(vprele2); 525 } 526 return(0); 527 } 528 529 /* 530 * Unmount a file system. 531 * 532 * Note: unmount takes a path to the vnode mounted on as argument, 533 * not special file (as before). 534 */ 535 /* 536 * umount_args(char *path, int flags) 537 */ 538 /* ARGSUSED */ 539 int 540 sys_unmount(struct unmount_args *uap) 541 { 542 struct thread *td = curthread; 543 struct proc *p = td->td_proc; 544 struct mount *mp = NULL; 545 int error; 546 struct nlookupdata nd; 547 548 KKASSERT(p); 549 if (p->p_ucred->cr_prison != NULL) 550 return (EPERM); 551 if (usermount == 0 && (error = suser(td))) 552 return (error); 553 554 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 555 if (error == 0) 556 error = nlookup(&nd); 557 if (error) 558 goto out; 559 560 mp = nd.nl_nch.mount; 561 562 /* 563 * Only root, or the user that did the original mount is 564 * permitted to unmount this filesystem. 565 */ 566 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && 567 (error = suser(td))) 568 goto out; 569 570 /* 571 * Don't allow unmounting the root file system. 572 */ 573 if (mp->mnt_flag & MNT_ROOTFS) { 574 error = EINVAL; 575 goto out; 576 } 577 578 /* 579 * Must be the root of the filesystem 580 */ 581 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 582 error = EINVAL; 583 goto out; 584 } 585 586 out: 587 nlookup_done(&nd); 588 if (error) 589 return (error); 590 return (dounmount(mp, uap->flags)); 591 } 592 593 /* 594 * Do the actual file system unmount. 595 */ 596 static int 597 dounmount_interlock(struct mount *mp) 598 { 599 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 600 return (EBUSY); 601 mp->mnt_kern_flag |= MNTK_UNMOUNT; 602 return(0); 603 } 604 605 int 606 dounmount(struct mount *mp, int flags) 607 { 608 struct namecache *ncp; 609 struct nchandle nch; 610 struct vnode *vp; 611 int error; 612 int async_flag; 613 int lflags; 614 int freeok = 1; 615 616 /* 617 * Exclusive access for unmounting purposes 618 */ 619 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 620 return (error); 621 622 /* 623 * Allow filesystems to detect that a forced unmount is in progress. 624 */ 625 if (flags & MNT_FORCE) 626 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 627 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 628 error = lockmgr(&mp->mnt_lock, lflags); 629 if (error) { 630 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 631 if (mp->mnt_kern_flag & MNTK_MWAIT) 632 wakeup(mp); 633 return (error); 634 } 635 636 if (mp->mnt_flag & MNT_EXPUBLIC) 637 vfs_setpublicfs(NULL, NULL, NULL); 638 639 vfs_msync(mp, MNT_WAIT); 640 async_flag = mp->mnt_flag & MNT_ASYNC; 641 mp->mnt_flag &=~ MNT_ASYNC; 642 643 /* 644 * If this filesystem isn't aliasing other filesystems, 645 * try to invalidate any remaining namecache entries and 646 * check the count afterwords. 647 */ 648 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 649 cache_lock(&mp->mnt_ncmountpt); 650 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 651 cache_unlock(&mp->mnt_ncmountpt); 652 653 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 654 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 655 656 if ((flags & MNT_FORCE) == 0) { 657 error = EBUSY; 658 mount_warning(mp, "Cannot unmount: " 659 "%d namecache " 660 "references still " 661 "present", 662 ncp->nc_refs - 1); 663 } else { 664 mount_warning(mp, "Forced unmount: " 665 "%d namecache " 666 "references still " 667 "present", 668 ncp->nc_refs - 1); 669 freeok = 0; 670 } 671 } 672 } 673 674 /* 675 * nchandle records ref the mount structure. Expect a count of 1 676 * (our mount->mnt_ncmountpt). 677 */ 678 if (mp->mnt_refs != 1) { 679 if ((flags & MNT_FORCE) == 0) { 680 mount_warning(mp, "Cannot unmount: " 681 "%d process references still " 682 "present", mp->mnt_refs); 683 error = EBUSY; 684 } else { 685 mount_warning(mp, "Forced unmount: " 686 "%d process references still " 687 "present", mp->mnt_refs); 688 freeok = 0; 689 } 690 } 691 692 /* 693 * Decomission our special mnt_syncer vnode. This also stops 694 * the vnlru code. If we are unable to unmount we recommission 695 * the vnode. 696 */ 697 if (error == 0) { 698 if ((vp = mp->mnt_syncer) != NULL) { 699 mp->mnt_syncer = NULL; 700 vrele(vp); 701 } 702 if (((mp->mnt_flag & MNT_RDONLY) || 703 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 704 (flags & MNT_FORCE)) { 705 error = VFS_UNMOUNT(mp, flags); 706 } 707 } 708 if (error) { 709 if (mp->mnt_syncer == NULL) 710 vfs_allocate_syncvnode(mp); 711 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 712 mp->mnt_flag |= async_flag; 713 lockmgr(&mp->mnt_lock, LK_RELEASE); 714 if (mp->mnt_kern_flag & MNTK_MWAIT) 715 wakeup(mp); 716 return (error); 717 } 718 /* 719 * Clean up any journals still associated with the mount after 720 * filesystem activity has ceased. 721 */ 722 journal_remove_all_journals(mp, 723 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 724 725 mountlist_remove(mp); 726 727 /* 728 * Remove any installed vnode ops here so the individual VFSs don't 729 * have to. 730 */ 731 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 732 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 733 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 734 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 735 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 736 737 if (mp->mnt_ncmountpt.ncp != NULL) { 738 nch = mp->mnt_ncmountpt; 739 cache_zero(&mp->mnt_ncmountpt); 740 cache_clrmountpt(&nch); 741 cache_drop(&nch); 742 } 743 if (mp->mnt_ncmounton.ncp != NULL) { 744 nch = mp->mnt_ncmounton; 745 cache_zero(&mp->mnt_ncmounton); 746 cache_clrmountpt(&nch); 747 cache_drop(&nch); 748 } 749 750 mp->mnt_vfc->vfc_refcount--; 751 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 752 panic("unmount: dangling vnode"); 753 lockmgr(&mp->mnt_lock, LK_RELEASE); 754 if (mp->mnt_kern_flag & MNTK_MWAIT) 755 wakeup(mp); 756 if (freeok) 757 kfree(mp, M_MOUNT); 758 return (0); 759 } 760 761 static 762 void 763 mount_warning(struct mount *mp, const char *ctl, ...) 764 { 765 char *ptr; 766 char *buf; 767 __va_list va; 768 769 __va_start(va, ctl); 770 if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf) == 0) { 771 kprintf("unmount(%s): ", ptr); 772 kvprintf(ctl, va); 773 kprintf("\n"); 774 kfree(buf, M_TEMP); 775 } else { 776 kprintf("unmount(%p", mp); 777 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 778 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 779 kprintf("): "); 780 kvprintf(ctl, va); 781 kprintf("\n"); 782 } 783 __va_end(va); 784 } 785 786 /* 787 * Shim cache_fullpath() to handle the case where a process is chrooted into 788 * a subdirectory of a mount. In this case if the root mount matches the 789 * process root directory's mount we have to specify the process's root 790 * directory instead of the mount point, because the mount point might 791 * be above the root directory. 792 */ 793 static 794 int 795 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 796 { 797 struct nchandle *nch; 798 799 if (p && p->p_fd->fd_nrdir.mount == mp) 800 nch = &p->p_fd->fd_nrdir; 801 else 802 nch = &mp->mnt_ncmountpt; 803 return(cache_fullpath(p, nch, rb, fb)); 804 } 805 806 /* 807 * Sync each mounted filesystem. 808 */ 809 810 #ifdef DEBUG 811 static int syncprt = 0; 812 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 813 #endif /* DEBUG */ 814 815 static int sync_callback(struct mount *mp, void *data); 816 817 /* ARGSUSED */ 818 int 819 sys_sync(struct sync_args *uap) 820 { 821 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 822 #ifdef DEBUG 823 /* 824 * print out buffer pool stat information on each sync() call. 825 */ 826 if (syncprt) 827 vfs_bufstats(); 828 #endif /* DEBUG */ 829 return (0); 830 } 831 832 static 833 int 834 sync_callback(struct mount *mp, void *data __unused) 835 { 836 int asyncflag; 837 838 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 839 asyncflag = mp->mnt_flag & MNT_ASYNC; 840 mp->mnt_flag &= ~MNT_ASYNC; 841 vfs_msync(mp, MNT_NOWAIT); 842 VFS_SYNC(mp, MNT_NOWAIT); 843 mp->mnt_flag |= asyncflag; 844 } 845 return(0); 846 } 847 848 /* XXX PRISON: could be per prison flag */ 849 static int prison_quotas; 850 #if 0 851 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 852 #endif 853 854 /* 855 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 856 * 857 * Change filesystem quotas. 858 */ 859 /* ARGSUSED */ 860 int 861 sys_quotactl(struct quotactl_args *uap) 862 { 863 struct nlookupdata nd; 864 struct thread *td; 865 struct proc *p; 866 struct mount *mp; 867 int error; 868 869 td = curthread; 870 p = td->td_proc; 871 if (p->p_ucred->cr_prison && !prison_quotas) 872 return (EPERM); 873 874 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 875 if (error == 0) 876 error = nlookup(&nd); 877 if (error == 0) { 878 mp = nd.nl_nch.mount; 879 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 880 uap->arg, nd.nl_cred); 881 } 882 nlookup_done(&nd); 883 return (error); 884 } 885 886 /* 887 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 888 * void *buf, int buflen) 889 * 890 * This function operates on a mount point and executes the specified 891 * operation using the specified control data, and possibly returns data. 892 * 893 * The actual number of bytes stored in the result buffer is returned, 0 894 * if none, otherwise an error is returned. 895 */ 896 /* ARGSUSED */ 897 int 898 sys_mountctl(struct mountctl_args *uap) 899 { 900 struct thread *td = curthread; 901 struct proc *p = td->td_proc; 902 struct file *fp; 903 void *ctl = NULL; 904 void *buf = NULL; 905 char *path = NULL; 906 int error; 907 908 /* 909 * Sanity and permissions checks. We must be root. 910 */ 911 KKASSERT(p); 912 if (p->p_ucred->cr_prison != NULL) 913 return (EPERM); 914 if ((error = suser(td)) != 0) 915 return (error); 916 917 /* 918 * Argument length checks 919 */ 920 if (uap->ctllen < 0 || uap->ctllen > 1024) 921 return (EINVAL); 922 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 923 return (EINVAL); 924 if (uap->path == NULL) 925 return (EINVAL); 926 927 /* 928 * Allocate the necessary buffers and copyin data 929 */ 930 path = objcache_get(namei_oc, M_WAITOK); 931 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 932 if (error) 933 goto done; 934 935 if (uap->ctllen) { 936 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 937 error = copyin(uap->ctl, ctl, uap->ctllen); 938 if (error) 939 goto done; 940 } 941 if (uap->buflen) 942 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 943 944 /* 945 * Validate the descriptor 946 */ 947 if (uap->fd >= 0) { 948 fp = holdfp(p->p_fd, uap->fd, -1); 949 if (fp == NULL) { 950 error = EBADF; 951 goto done; 952 } 953 } else { 954 fp = NULL; 955 } 956 957 /* 958 * Execute the internal kernel function and clean up. 959 */ 960 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 961 if (fp) 962 fdrop(fp); 963 if (error == 0 && uap->sysmsg_result > 0) 964 error = copyout(buf, uap->buf, uap->sysmsg_result); 965 done: 966 if (path) 967 objcache_put(namei_oc, path); 968 if (ctl) 969 kfree(ctl, M_TEMP); 970 if (buf) 971 kfree(buf, M_TEMP); 972 return (error); 973 } 974 975 /* 976 * Execute a mount control operation by resolving the path to a mount point 977 * and calling vop_mountctl(). 978 * 979 * Use the mount point from the nch instead of the vnode so nullfs mounts 980 * can properly spike the VOP. 981 */ 982 int 983 kern_mountctl(const char *path, int op, struct file *fp, 984 const void *ctl, int ctllen, 985 void *buf, int buflen, int *res) 986 { 987 struct vnode *vp; 988 struct mount *mp; 989 struct nlookupdata nd; 990 int error; 991 992 *res = 0; 993 vp = NULL; 994 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 995 if (error == 0) 996 error = nlookup(&nd); 997 if (error == 0) 998 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 999 mp = nd.nl_nch.mount; 1000 nlookup_done(&nd); 1001 if (error) 1002 return (error); 1003 1004 /* 1005 * Must be the root of the filesystem 1006 */ 1007 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1008 vput(vp); 1009 return (EINVAL); 1010 } 1011 error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen, 1012 buf, buflen, res); 1013 vput(vp); 1014 return (error); 1015 } 1016 1017 int 1018 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1019 { 1020 struct thread *td = curthread; 1021 struct proc *p = td->td_proc; 1022 struct mount *mp; 1023 struct statfs *sp; 1024 char *fullpath, *freepath; 1025 int error; 1026 1027 if ((error = nlookup(nd)) != 0) 1028 return (error); 1029 mp = nd->nl_nch.mount; 1030 sp = &mp->mnt_stat; 1031 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1032 return (error); 1033 1034 error = mount_path(p, mp, &fullpath, &freepath); 1035 if (error) 1036 return(error); 1037 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1038 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1039 kfree(freepath, M_TEMP); 1040 1041 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1042 bcopy(sp, buf, sizeof(*buf)); 1043 /* Only root should have access to the fsid's. */ 1044 if (suser(td)) 1045 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1046 return (0); 1047 } 1048 1049 /* 1050 * statfs_args(char *path, struct statfs *buf) 1051 * 1052 * Get filesystem statistics. 1053 */ 1054 int 1055 sys_statfs(struct statfs_args *uap) 1056 { 1057 struct nlookupdata nd; 1058 struct statfs buf; 1059 int error; 1060 1061 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1062 if (error == 0) 1063 error = kern_statfs(&nd, &buf); 1064 nlookup_done(&nd); 1065 if (error == 0) 1066 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1067 return (error); 1068 } 1069 1070 int 1071 kern_fstatfs(int fd, struct statfs *buf) 1072 { 1073 struct thread *td = curthread; 1074 struct proc *p = td->td_proc; 1075 struct file *fp; 1076 struct mount *mp; 1077 struct statfs *sp; 1078 char *fullpath, *freepath; 1079 int error; 1080 1081 KKASSERT(p); 1082 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1083 return (error); 1084 mp = ((struct vnode *)fp->f_data)->v_mount; 1085 if (mp == NULL) { 1086 error = EBADF; 1087 goto done; 1088 } 1089 if (fp->f_cred == NULL) { 1090 error = EINVAL; 1091 goto done; 1092 } 1093 sp = &mp->mnt_stat; 1094 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1095 goto done; 1096 1097 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1098 goto done; 1099 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1100 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1101 kfree(freepath, M_TEMP); 1102 1103 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1104 bcopy(sp, buf, sizeof(*buf)); 1105 1106 /* Only root should have access to the fsid's. */ 1107 if (suser(td)) 1108 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1109 error = 0; 1110 done: 1111 fdrop(fp); 1112 return (error); 1113 } 1114 1115 /* 1116 * fstatfs_args(int fd, struct statfs *buf) 1117 * 1118 * Get filesystem statistics. 1119 */ 1120 int 1121 sys_fstatfs(struct fstatfs_args *uap) 1122 { 1123 struct statfs buf; 1124 int error; 1125 1126 error = kern_fstatfs(uap->fd, &buf); 1127 1128 if (error == 0) 1129 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1130 return (error); 1131 } 1132 1133 int 1134 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1135 { 1136 struct mount *mp; 1137 struct statvfs *sp; 1138 int error; 1139 1140 if ((error = nlookup(nd)) != 0) 1141 return (error); 1142 mp = nd->nl_nch.mount; 1143 sp = &mp->mnt_vstat; 1144 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1145 return (error); 1146 1147 sp->f_flag = 0; 1148 if (mp->mnt_flag & MNT_RDONLY) 1149 sp->f_flag |= ST_RDONLY; 1150 if (mp->mnt_flag & MNT_NOSUID) 1151 sp->f_flag |= ST_NOSUID; 1152 bcopy(sp, buf, sizeof(*buf)); 1153 return (0); 1154 } 1155 1156 /* 1157 * statfs_args(char *path, struct statfs *buf) 1158 * 1159 * Get filesystem statistics. 1160 */ 1161 int 1162 sys_statvfs(struct statvfs_args *uap) 1163 { 1164 struct nlookupdata nd; 1165 struct statvfs buf; 1166 int error; 1167 1168 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1169 if (error == 0) 1170 error = kern_statvfs(&nd, &buf); 1171 nlookup_done(&nd); 1172 if (error == 0) 1173 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1174 return (error); 1175 } 1176 1177 int 1178 kern_fstatvfs(int fd, struct statvfs *buf) 1179 { 1180 struct thread *td = curthread; 1181 struct proc *p = td->td_proc; 1182 struct file *fp; 1183 struct mount *mp; 1184 struct statvfs *sp; 1185 int error; 1186 1187 KKASSERT(p); 1188 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1189 return (error); 1190 mp = ((struct vnode *)fp->f_data)->v_mount; 1191 if (mp == NULL) { 1192 error = EBADF; 1193 goto done; 1194 } 1195 if (fp->f_cred == NULL) { 1196 error = EINVAL; 1197 goto done; 1198 } 1199 sp = &mp->mnt_vstat; 1200 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1201 goto done; 1202 1203 sp->f_flag = 0; 1204 if (mp->mnt_flag & MNT_RDONLY) 1205 sp->f_flag |= ST_RDONLY; 1206 if (mp->mnt_flag & MNT_NOSUID) 1207 sp->f_flag |= ST_NOSUID; 1208 1209 bcopy(sp, buf, sizeof(*buf)); 1210 error = 0; 1211 done: 1212 fdrop(fp); 1213 return (error); 1214 } 1215 1216 /* 1217 * fstatfs_args(int fd, struct statfs *buf) 1218 * 1219 * Get filesystem statistics. 1220 */ 1221 int 1222 sys_fstatvfs(struct fstatvfs_args *uap) 1223 { 1224 struct statvfs buf; 1225 int error; 1226 1227 error = kern_fstatvfs(uap->fd, &buf); 1228 1229 if (error == 0) 1230 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1231 return (error); 1232 } 1233 1234 /* 1235 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1236 * 1237 * Get statistics on all filesystems. 1238 */ 1239 1240 struct getfsstat_info { 1241 struct statfs *sfsp; 1242 long count; 1243 long maxcount; 1244 int error; 1245 int flags; 1246 struct proc *p; 1247 }; 1248 1249 static int getfsstat_callback(struct mount *, void *); 1250 1251 /* ARGSUSED */ 1252 int 1253 sys_getfsstat(struct getfsstat_args *uap) 1254 { 1255 struct thread *td = curthread; 1256 struct proc *p = td->td_proc; 1257 struct getfsstat_info info; 1258 1259 bzero(&info, sizeof(info)); 1260 1261 info.maxcount = uap->bufsize / sizeof(struct statfs); 1262 info.sfsp = uap->buf; 1263 info.count = 0; 1264 info.flags = uap->flags; 1265 info.p = p; 1266 1267 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1268 if (info.sfsp && info.count > info.maxcount) 1269 uap->sysmsg_result = info.maxcount; 1270 else 1271 uap->sysmsg_result = info.count; 1272 return (info.error); 1273 } 1274 1275 static int 1276 getfsstat_callback(struct mount *mp, void *data) 1277 { 1278 struct getfsstat_info *info = data; 1279 struct statfs *sp; 1280 char *freepath; 1281 char *fullpath; 1282 int error; 1283 1284 if (info->sfsp && info->count < info->maxcount) { 1285 if (info->p && !chroot_visible_mnt(mp, info->p)) 1286 return(0); 1287 sp = &mp->mnt_stat; 1288 1289 /* 1290 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1291 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1292 * overrides MNT_WAIT. 1293 */ 1294 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1295 (info->flags & MNT_WAIT)) && 1296 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) { 1297 return(0); 1298 } 1299 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1300 1301 error = mount_path(info->p, mp, &fullpath, &freepath); 1302 if (error) { 1303 info->error = error; 1304 return(-1); 1305 } 1306 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1307 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1308 kfree(freepath, M_TEMP); 1309 1310 error = copyout(sp, info->sfsp, sizeof(*sp)); 1311 if (error) { 1312 info->error = error; 1313 return (-1); 1314 } 1315 ++info->sfsp; 1316 } 1317 info->count++; 1318 return(0); 1319 } 1320 1321 /* 1322 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1323 long bufsize, int flags) 1324 * 1325 * Get statistics on all filesystems. 1326 */ 1327 1328 struct getvfsstat_info { 1329 struct statfs *sfsp; 1330 struct statvfs *vsfsp; 1331 long count; 1332 long maxcount; 1333 int error; 1334 int flags; 1335 struct proc *p; 1336 }; 1337 1338 static int getvfsstat_callback(struct mount *, void *); 1339 1340 /* ARGSUSED */ 1341 int 1342 sys_getvfsstat(struct getvfsstat_args *uap) 1343 { 1344 struct thread *td = curthread; 1345 struct proc *p = td->td_proc; 1346 struct getvfsstat_info info; 1347 1348 bzero(&info, sizeof(info)); 1349 1350 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1351 info.sfsp = uap->buf; 1352 info.vsfsp = uap->vbuf; 1353 info.count = 0; 1354 info.flags = uap->flags; 1355 info.p = p; 1356 1357 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1358 if (info.vsfsp && info.count > info.maxcount) 1359 uap->sysmsg_result = info.maxcount; 1360 else 1361 uap->sysmsg_result = info.count; 1362 return (info.error); 1363 } 1364 1365 static int 1366 getvfsstat_callback(struct mount *mp, void *data) 1367 { 1368 struct getvfsstat_info *info = data; 1369 struct statfs *sp; 1370 struct statvfs *vsp; 1371 char *freepath; 1372 char *fullpath; 1373 int error; 1374 1375 if (info->vsfsp && info->count < info->maxcount) { 1376 if (info->p && !chroot_visible_mnt(mp, info->p)) 1377 return(0); 1378 sp = &mp->mnt_stat; 1379 vsp = &mp->mnt_vstat; 1380 1381 /* 1382 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1383 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1384 * overrides MNT_WAIT. 1385 */ 1386 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1387 (info->flags & MNT_WAIT)) && 1388 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) { 1389 return(0); 1390 } 1391 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1392 1393 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1394 (info->flags & MNT_WAIT)) && 1395 (error = VFS_STATVFS(mp, vsp, info->p->p_ucred))) { 1396 return(0); 1397 } 1398 vsp->f_flag = 0; 1399 if (mp->mnt_flag & MNT_RDONLY) 1400 vsp->f_flag |= ST_RDONLY; 1401 if (mp->mnt_flag & MNT_NOSUID) 1402 vsp->f_flag |= ST_NOSUID; 1403 1404 error = mount_path(info->p, mp, &fullpath, &freepath); 1405 if (error) { 1406 info->error = error; 1407 return(-1); 1408 } 1409 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1410 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1411 kfree(freepath, M_TEMP); 1412 1413 error = copyout(sp, info->sfsp, sizeof(*sp)); 1414 if (error == 0) 1415 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1416 if (error) { 1417 info->error = error; 1418 return (-1); 1419 } 1420 ++info->sfsp; 1421 ++info->vsfsp; 1422 } 1423 info->count++; 1424 return(0); 1425 } 1426 1427 1428 /* 1429 * fchdir_args(int fd) 1430 * 1431 * Change current working directory to a given file descriptor. 1432 */ 1433 /* ARGSUSED */ 1434 int 1435 sys_fchdir(struct fchdir_args *uap) 1436 { 1437 struct thread *td = curthread; 1438 struct proc *p = td->td_proc; 1439 struct filedesc *fdp = p->p_fd; 1440 struct vnode *vp, *ovp; 1441 struct mount *mp; 1442 struct file *fp; 1443 struct nchandle nch, onch, tnch; 1444 int error; 1445 1446 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1447 return (error); 1448 vp = (struct vnode *)fp->f_data; 1449 vref(vp); 1450 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1451 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) 1452 error = ENOTDIR; 1453 else 1454 error = VOP_ACCESS(vp, VEXEC, p->p_ucred); 1455 if (error) { 1456 vput(vp); 1457 fdrop(fp); 1458 return (error); 1459 } 1460 cache_copy(&fp->f_nchandle, &nch); 1461 1462 /* 1463 * If the ncp has become a mount point, traverse through 1464 * the mount point. 1465 */ 1466 1467 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1468 (mp = cache_findmount(&nch)) != NULL 1469 ) { 1470 error = nlookup_mp(mp, &tnch); 1471 if (error == 0) { 1472 cache_unlock(&tnch); /* leave ref intact */ 1473 vput(vp); 1474 vp = tnch.ncp->nc_vp; 1475 error = vget(vp, LK_SHARED); 1476 KKASSERT(error == 0); 1477 cache_drop(&nch); 1478 nch = tnch; 1479 } 1480 } 1481 if (error == 0) { 1482 ovp = fdp->fd_cdir; 1483 onch = fdp->fd_ncdir; 1484 vn_unlock(vp); /* leave ref intact */ 1485 fdp->fd_cdir = vp; 1486 fdp->fd_ncdir = nch; 1487 cache_drop(&onch); 1488 vrele(ovp); 1489 } else { 1490 cache_drop(&nch); 1491 vput(vp); 1492 } 1493 fdrop(fp); 1494 return (error); 1495 } 1496 1497 int 1498 kern_chdir(struct nlookupdata *nd) 1499 { 1500 struct thread *td = curthread; 1501 struct proc *p = td->td_proc; 1502 struct filedesc *fdp = p->p_fd; 1503 struct vnode *vp, *ovp; 1504 struct nchandle onch; 1505 int error; 1506 1507 if ((error = nlookup(nd)) != 0) 1508 return (error); 1509 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1510 return (ENOENT); 1511 if ((error = vget(vp, LK_SHARED)) != 0) 1512 return (error); 1513 1514 error = checkvp_chdir(vp, td); 1515 vn_unlock(vp); 1516 if (error == 0) { 1517 ovp = fdp->fd_cdir; 1518 onch = fdp->fd_ncdir; 1519 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1520 fdp->fd_ncdir = nd->nl_nch; 1521 fdp->fd_cdir = vp; 1522 cache_drop(&onch); 1523 vrele(ovp); 1524 cache_zero(&nd->nl_nch); 1525 } else { 1526 vrele(vp); 1527 } 1528 return (error); 1529 } 1530 1531 /* 1532 * chdir_args(char *path) 1533 * 1534 * Change current working directory (``.''). 1535 */ 1536 int 1537 sys_chdir(struct chdir_args *uap) 1538 { 1539 struct nlookupdata nd; 1540 int error; 1541 1542 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1543 if (error == 0) 1544 error = kern_chdir(&nd); 1545 nlookup_done(&nd); 1546 return (error); 1547 } 1548 1549 /* 1550 * Helper function for raised chroot(2) security function: Refuse if 1551 * any filedescriptors are open directories. 1552 */ 1553 static int 1554 chroot_refuse_vdir_fds(struct filedesc *fdp) 1555 { 1556 struct vnode *vp; 1557 struct file *fp; 1558 int error; 1559 int fd; 1560 1561 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1562 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1563 continue; 1564 vp = (struct vnode *)fp->f_data; 1565 if (vp->v_type != VDIR) { 1566 fdrop(fp); 1567 continue; 1568 } 1569 fdrop(fp); 1570 return(EPERM); 1571 } 1572 return (0); 1573 } 1574 1575 /* 1576 * This sysctl determines if we will allow a process to chroot(2) if it 1577 * has a directory open: 1578 * 0: disallowed for all processes. 1579 * 1: allowed for processes that were not already chroot(2)'ed. 1580 * 2: allowed for all processes. 1581 */ 1582 1583 static int chroot_allow_open_directories = 1; 1584 1585 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1586 &chroot_allow_open_directories, 0, ""); 1587 1588 /* 1589 * chroot to the specified namecache entry. We obtain the vp from the 1590 * namecache data. The passed ncp must be locked and referenced and will 1591 * remain locked and referenced on return. 1592 */ 1593 int 1594 kern_chroot(struct nchandle *nch) 1595 { 1596 struct thread *td = curthread; 1597 struct proc *p = td->td_proc; 1598 struct filedesc *fdp = p->p_fd; 1599 struct vnode *vp; 1600 int error; 1601 1602 /* 1603 * Only root can chroot 1604 */ 1605 if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0) 1606 return (error); 1607 1608 /* 1609 * Disallow open directory descriptors (fchdir() breakouts). 1610 */ 1611 if (chroot_allow_open_directories == 0 || 1612 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1613 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1614 return (error); 1615 } 1616 if ((vp = nch->ncp->nc_vp) == NULL) 1617 return (ENOENT); 1618 1619 if ((error = vget(vp, LK_SHARED)) != 0) 1620 return (error); 1621 1622 /* 1623 * Check the validity of vp as a directory to change to and 1624 * associate it with rdir/jdir. 1625 */ 1626 error = checkvp_chdir(vp, td); 1627 vn_unlock(vp); /* leave reference intact */ 1628 if (error == 0) { 1629 vrele(fdp->fd_rdir); 1630 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1631 cache_drop(&fdp->fd_nrdir); 1632 cache_copy(nch, &fdp->fd_nrdir); 1633 if (fdp->fd_jdir == NULL) { 1634 fdp->fd_jdir = vp; 1635 vref(fdp->fd_jdir); 1636 cache_copy(nch, &fdp->fd_njdir); 1637 } 1638 } else { 1639 vrele(vp); 1640 } 1641 return (error); 1642 } 1643 1644 /* 1645 * chroot_args(char *path) 1646 * 1647 * Change notion of root (``/'') directory. 1648 */ 1649 /* ARGSUSED */ 1650 int 1651 sys_chroot(struct chroot_args *uap) 1652 { 1653 struct thread *td = curthread; 1654 struct nlookupdata nd; 1655 int error; 1656 1657 KKASSERT(td->td_proc); 1658 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1659 if (error) { 1660 nlookup_done(&nd); 1661 return(error); 1662 } 1663 error = nlookup(&nd); 1664 if (error == 0) 1665 error = kern_chroot(&nd.nl_nch); 1666 nlookup_done(&nd); 1667 return(error); 1668 } 1669 1670 /* 1671 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1672 * determine whether it is legal to chdir to the vnode. The vnode's state 1673 * is not changed by this call. 1674 */ 1675 int 1676 checkvp_chdir(struct vnode *vp, struct thread *td) 1677 { 1678 int error; 1679 1680 if (vp->v_type != VDIR) 1681 error = ENOTDIR; 1682 else 1683 error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred); 1684 return (error); 1685 } 1686 1687 int 1688 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1689 { 1690 struct thread *td = curthread; 1691 struct proc *p = td->td_proc; 1692 struct lwp *lp = td->td_lwp; 1693 struct filedesc *fdp = p->p_fd; 1694 int cmode, flags; 1695 struct file *nfp; 1696 struct file *fp; 1697 struct vnode *vp; 1698 int type, indx, error; 1699 struct flock lf; 1700 1701 if ((oflags & O_ACCMODE) == O_ACCMODE) 1702 return (EINVAL); 1703 flags = FFLAGS(oflags); 1704 error = falloc(p, &nfp, NULL); 1705 if (error) 1706 return (error); 1707 fp = nfp; 1708 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1709 1710 /* 1711 * XXX p_dupfd is a real mess. It allows a device to return a 1712 * file descriptor to be duplicated rather then doing the open 1713 * itself. 1714 */ 1715 lp->lwp_dupfd = -1; 1716 1717 /* 1718 * Call vn_open() to do the lookup and assign the vnode to the 1719 * file pointer. vn_open() does not change the ref count on fp 1720 * and the vnode, on success, will be inherited by the file pointer 1721 * and unlocked. 1722 */ 1723 nd->nl_flags |= NLC_LOCKVP; 1724 error = vn_open(nd, fp, flags, cmode); 1725 nlookup_done(nd); 1726 if (error) { 1727 /* 1728 * handle special fdopen() case. bleh. dupfdopen() is 1729 * responsible for dropping the old contents of ofiles[indx] 1730 * if it succeeds. 1731 * 1732 * Note that fsetfd() will add a ref to fp which represents 1733 * the fd_files[] assignment. We must still drop our 1734 * reference. 1735 */ 1736 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1737 if (fdalloc(p, 0, &indx) == 0) { 1738 error = dupfdopen(p, indx, lp->lwp_dupfd, flags, error); 1739 if (error == 0) { 1740 *res = indx; 1741 fdrop(fp); /* our ref */ 1742 return (0); 1743 } 1744 fsetfd(p, NULL, indx); 1745 } 1746 } 1747 fdrop(fp); /* our ref */ 1748 if (error == ERESTART) 1749 error = EINTR; 1750 return (error); 1751 } 1752 1753 /* 1754 * ref the vnode for ourselves so it can't be ripped out from under 1755 * is. XXX need an ND flag to request that the vnode be returned 1756 * anyway. 1757 * 1758 * Reserve a file descriptor but do not assign it until the open 1759 * succeeds. 1760 */ 1761 vp = (struct vnode *)fp->f_data; 1762 vref(vp); 1763 if ((error = fdalloc(p, 0, &indx)) != 0) { 1764 fdrop(fp); 1765 vrele(vp); 1766 return (error); 1767 } 1768 1769 /* 1770 * If no error occurs the vp will have been assigned to the file 1771 * pointer. 1772 */ 1773 lp->lwp_dupfd = 0; 1774 1775 if (flags & (O_EXLOCK | O_SHLOCK)) { 1776 lf.l_whence = SEEK_SET; 1777 lf.l_start = 0; 1778 lf.l_len = 0; 1779 if (flags & O_EXLOCK) 1780 lf.l_type = F_WRLCK; 1781 else 1782 lf.l_type = F_RDLCK; 1783 if (flags & FNONBLOCK) 1784 type = 0; 1785 else 1786 type = F_WAIT; 1787 1788 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1789 /* 1790 * lock request failed. Clean up the reserved 1791 * descriptor. 1792 */ 1793 vrele(vp); 1794 fsetfd(p, NULL, indx); 1795 fdrop(fp); 1796 return (error); 1797 } 1798 fp->f_flag |= FHASLOCK; 1799 } 1800 #if 0 1801 /* 1802 * Assert that all regular file vnodes were created with a object. 1803 */ 1804 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1805 ("open: regular file has no backing object after vn_open")); 1806 #endif 1807 1808 vrele(vp); 1809 1810 /* 1811 * release our private reference, leaving the one associated with the 1812 * descriptor table intact. 1813 */ 1814 fsetfd(p, fp, indx); 1815 fdrop(fp); 1816 *res = indx; 1817 return (0); 1818 } 1819 1820 /* 1821 * open_args(char *path, int flags, int mode) 1822 * 1823 * Check permissions, allocate an open file structure, 1824 * and call the device open routine if any. 1825 */ 1826 int 1827 sys_open(struct open_args *uap) 1828 { 1829 struct nlookupdata nd; 1830 int error; 1831 1832 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1833 if (error == 0) { 1834 error = kern_open(&nd, uap->flags, 1835 uap->mode, &uap->sysmsg_result); 1836 } 1837 nlookup_done(&nd); 1838 return (error); 1839 } 1840 1841 int 1842 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 1843 { 1844 struct thread *td = curthread; 1845 struct proc *p = td->td_proc; 1846 struct vnode *vp; 1847 struct vattr vattr; 1848 int error; 1849 int whiteout = 0; 1850 1851 KKASSERT(p); 1852 1853 switch (mode & S_IFMT) { 1854 case S_IFCHR: 1855 case S_IFBLK: 1856 error = suser(td); 1857 break; 1858 default: 1859 error = suser_cred(p->p_ucred, PRISON_ROOT); 1860 break; 1861 } 1862 if (error) 1863 return (error); 1864 1865 bwillinode(1); 1866 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 1867 if ((error = nlookup(nd)) != 0) 1868 return (error); 1869 if (nd->nl_nch.ncp->nc_vp) 1870 return (EEXIST); 1871 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1872 return (error); 1873 1874 VATTR_NULL(&vattr); 1875 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1876 vattr.va_rmajor = rmajor; 1877 vattr.va_rminor = rminor; 1878 whiteout = 0; 1879 1880 switch (mode & S_IFMT) { 1881 case S_IFMT: /* used by badsect to flag bad sectors */ 1882 vattr.va_type = VBAD; 1883 break; 1884 case S_IFCHR: 1885 vattr.va_type = VCHR; 1886 break; 1887 case S_IFBLK: 1888 vattr.va_type = VBLK; 1889 break; 1890 case S_IFWHT: 1891 whiteout = 1; 1892 break; 1893 case S_IFDIR: 1894 /* special directories support for HAMMER */ 1895 vattr.va_type = VDIR; 1896 break; 1897 default: 1898 error = EINVAL; 1899 break; 1900 } 1901 if (error == 0) { 1902 if (whiteout) { 1903 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 1904 nd->nl_cred, NAMEI_CREATE); 1905 } else { 1906 vp = NULL; 1907 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 1908 &vp, nd->nl_cred, &vattr); 1909 if (error == 0) 1910 vput(vp); 1911 } 1912 } 1913 return (error); 1914 } 1915 1916 /* 1917 * mknod_args(char *path, int mode, int dev) 1918 * 1919 * Create a special file. 1920 */ 1921 int 1922 sys_mknod(struct mknod_args *uap) 1923 { 1924 struct nlookupdata nd; 1925 int error; 1926 1927 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1928 if (error == 0) { 1929 error = kern_mknod(&nd, uap->mode, 1930 umajor(uap->dev), uminor(uap->dev)); 1931 } 1932 nlookup_done(&nd); 1933 return (error); 1934 } 1935 1936 int 1937 kern_mkfifo(struct nlookupdata *nd, int mode) 1938 { 1939 struct thread *td = curthread; 1940 struct proc *p = td->td_proc; 1941 struct vattr vattr; 1942 struct vnode *vp; 1943 int error; 1944 1945 bwillinode(1); 1946 1947 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 1948 if ((error = nlookup(nd)) != 0) 1949 return (error); 1950 if (nd->nl_nch.ncp->nc_vp) 1951 return (EEXIST); 1952 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1953 return (error); 1954 1955 VATTR_NULL(&vattr); 1956 vattr.va_type = VFIFO; 1957 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1958 vp = NULL; 1959 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 1960 if (error == 0) 1961 vput(vp); 1962 return (error); 1963 } 1964 1965 /* 1966 * mkfifo_args(char *path, int mode) 1967 * 1968 * Create a named pipe. 1969 */ 1970 int 1971 sys_mkfifo(struct mkfifo_args *uap) 1972 { 1973 struct nlookupdata nd; 1974 int error; 1975 1976 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1977 if (error == 0) 1978 error = kern_mkfifo(&nd, uap->mode); 1979 nlookup_done(&nd); 1980 return (error); 1981 } 1982 1983 static int hardlink_check_uid = 0; 1984 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1985 &hardlink_check_uid, 0, 1986 "Unprivileged processes cannot create hard links to files owned by other " 1987 "users"); 1988 static int hardlink_check_gid = 0; 1989 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1990 &hardlink_check_gid, 0, 1991 "Unprivileged processes cannot create hard links to files owned by other " 1992 "groups"); 1993 1994 static int 1995 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 1996 { 1997 struct vattr va; 1998 int error; 1999 2000 /* 2001 * Shortcut if disabled 2002 */ 2003 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2004 return (0); 2005 2006 /* 2007 * root cred can always hardlink 2008 */ 2009 if (suser_cred(cred, PRISON_ROOT) == 0) 2010 return (0); 2011 2012 /* 2013 * Otherwise only if the originating file is owned by the 2014 * same user or group. Note that any group is allowed if 2015 * the file is owned by the caller. 2016 */ 2017 error = VOP_GETATTR(vp, &va); 2018 if (error != 0) 2019 return (error); 2020 2021 if (hardlink_check_uid) { 2022 if (cred->cr_uid != va.va_uid) 2023 return (EPERM); 2024 } 2025 2026 if (hardlink_check_gid) { 2027 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2028 return (EPERM); 2029 } 2030 2031 return (0); 2032 } 2033 2034 int 2035 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2036 { 2037 struct thread *td = curthread; 2038 struct vnode *vp; 2039 int error; 2040 2041 /* 2042 * Lookup the source and obtained a locked vnode. 2043 * 2044 * XXX relookup on vget failure / race ? 2045 */ 2046 bwillinode(1); 2047 if ((error = nlookup(nd)) != 0) 2048 return (error); 2049 vp = nd->nl_nch.ncp->nc_vp; 2050 KKASSERT(vp != NULL); 2051 if (vp->v_type == VDIR) 2052 return (EPERM); /* POSIX */ 2053 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2054 return (error); 2055 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2056 return (error); 2057 2058 /* 2059 * Unlock the source so we can lookup the target without deadlocking 2060 * (XXX vp is locked already, possible other deadlock?). The target 2061 * must not exist. 2062 */ 2063 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2064 nd->nl_flags &= ~NLC_NCPISLOCKED; 2065 cache_unlock(&nd->nl_nch); 2066 2067 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2068 if ((error = nlookup(linknd)) != 0) { 2069 vput(vp); 2070 return (error); 2071 } 2072 if (linknd->nl_nch.ncp->nc_vp) { 2073 vput(vp); 2074 return (EEXIST); 2075 } 2076 2077 /* 2078 * Finally run the new API VOP. 2079 */ 2080 error = can_hardlink(vp, td, td->td_proc->p_ucred); 2081 if (error == 0) { 2082 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2083 vp, linknd->nl_cred); 2084 } 2085 vput(vp); 2086 return (error); 2087 } 2088 2089 /* 2090 * link_args(char *path, char *link) 2091 * 2092 * Make a hard file link. 2093 */ 2094 int 2095 sys_link(struct link_args *uap) 2096 { 2097 struct nlookupdata nd, linknd; 2098 int error; 2099 2100 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2101 if (error == 0) { 2102 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2103 if (error == 0) 2104 error = kern_link(&nd, &linknd); 2105 nlookup_done(&linknd); 2106 } 2107 nlookup_done(&nd); 2108 return (error); 2109 } 2110 2111 int 2112 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2113 { 2114 struct vattr vattr; 2115 struct vnode *vp; 2116 struct vnode *dvp; 2117 int error; 2118 2119 bwillinode(1); 2120 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2121 if ((error = nlookup(nd)) != 0) 2122 return (error); 2123 if (nd->nl_nch.ncp->nc_vp) 2124 return (EEXIST); 2125 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2126 return (error); 2127 dvp = nd->nl_dvp; 2128 VATTR_NULL(&vattr); 2129 vattr.va_mode = mode; 2130 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2131 if (error == 0) 2132 vput(vp); 2133 return (error); 2134 } 2135 2136 /* 2137 * symlink(char *path, char *link) 2138 * 2139 * Make a symbolic link. 2140 */ 2141 int 2142 sys_symlink(struct symlink_args *uap) 2143 { 2144 struct thread *td = curthread; 2145 struct nlookupdata nd; 2146 char *path; 2147 int error; 2148 int mode; 2149 2150 path = objcache_get(namei_oc, M_WAITOK); 2151 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2152 if (error == 0) { 2153 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2154 if (error == 0) { 2155 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2156 error = kern_symlink(&nd, path, mode); 2157 } 2158 nlookup_done(&nd); 2159 } 2160 objcache_put(namei_oc, path); 2161 return (error); 2162 } 2163 2164 /* 2165 * undelete_args(char *path) 2166 * 2167 * Delete a whiteout from the filesystem. 2168 */ 2169 /* ARGSUSED */ 2170 int 2171 sys_undelete(struct undelete_args *uap) 2172 { 2173 struct nlookupdata nd; 2174 int error; 2175 2176 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2177 bwillinode(1); 2178 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2179 if (error == 0) 2180 error = nlookup(&nd); 2181 if (error == 0) 2182 error = ncp_writechk(&nd.nl_nch); 2183 if (error == 0) { 2184 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2185 NAMEI_DELETE); 2186 } 2187 nlookup_done(&nd); 2188 return (error); 2189 } 2190 2191 int 2192 kern_unlink(struct nlookupdata *nd) 2193 { 2194 int error; 2195 2196 bwillinode(1); 2197 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2198 if ((error = nlookup(nd)) != 0) 2199 return (error); 2200 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2201 return (error); 2202 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2203 return (error); 2204 } 2205 2206 /* 2207 * unlink_args(char *path) 2208 * 2209 * Delete a name from the filesystem. 2210 */ 2211 int 2212 sys_unlink(struct unlink_args *uap) 2213 { 2214 struct nlookupdata nd; 2215 int error; 2216 2217 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2218 if (error == 0) 2219 error = kern_unlink(&nd); 2220 nlookup_done(&nd); 2221 return (error); 2222 } 2223 2224 int 2225 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2226 { 2227 struct thread *td = curthread; 2228 struct proc *p = td->td_proc; 2229 struct file *fp; 2230 struct vnode *vp; 2231 struct vattr vattr; 2232 off_t new_offset; 2233 int error; 2234 2235 fp = holdfp(p->p_fd, fd, -1); 2236 if (fp == NULL) 2237 return (EBADF); 2238 if (fp->f_type != DTYPE_VNODE) { 2239 error = ESPIPE; 2240 goto done; 2241 } 2242 vp = (struct vnode *)fp->f_data; 2243 2244 switch (whence) { 2245 case L_INCR: 2246 new_offset = fp->f_offset + offset; 2247 error = 0; 2248 break; 2249 case L_XTND: 2250 error = VOP_GETATTR(vp, &vattr); 2251 new_offset = offset + vattr.va_size; 2252 break; 2253 case L_SET: 2254 new_offset = offset; 2255 error = 0; 2256 break; 2257 default: 2258 new_offset = 0; 2259 error = EINVAL; 2260 break; 2261 } 2262 2263 /* 2264 * Validate the seek position. Negative offsets are not allowed 2265 * for regular files, block specials, or directories. 2266 */ 2267 if (error == 0) { 2268 if (new_offset < 0 && 2269 (vp->v_type == VREG || vp->v_type == VDIR || 2270 vp->v_type == VCHR || vp->v_type == VBLK)) { 2271 error = EINVAL; 2272 } else { 2273 fp->f_offset = new_offset; 2274 } 2275 } 2276 *res = fp->f_offset; 2277 done: 2278 fdrop(fp); 2279 return (error); 2280 } 2281 2282 /* 2283 * lseek_args(int fd, int pad, off_t offset, int whence) 2284 * 2285 * Reposition read/write file offset. 2286 */ 2287 int 2288 sys_lseek(struct lseek_args *uap) 2289 { 2290 int error; 2291 2292 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2293 &uap->sysmsg_offset); 2294 2295 return (error); 2296 } 2297 2298 int 2299 kern_access(struct nlookupdata *nd, int aflags) 2300 { 2301 struct vnode *vp; 2302 int error, flags; 2303 2304 if ((error = nlookup(nd)) != 0) 2305 return (error); 2306 retry: 2307 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2308 if (error) 2309 return (error); 2310 2311 /* Flags == 0 means only check for existence. */ 2312 if (aflags) { 2313 flags = 0; 2314 if (aflags & R_OK) 2315 flags |= VREAD; 2316 if (aflags & W_OK) 2317 flags |= VWRITE; 2318 if (aflags & X_OK) 2319 flags |= VEXEC; 2320 if ((flags & VWRITE) == 0 || 2321 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2322 error = VOP_ACCESS(vp, flags, nd->nl_cred); 2323 2324 /* 2325 * If the file handle is stale we have to re-resolve the 2326 * entry. This is a hack at the moment. 2327 */ 2328 if (error == ESTALE) { 2329 vput(vp); 2330 cache_setunresolved(&nd->nl_nch); 2331 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2332 if (error == 0) { 2333 vp = NULL; 2334 goto retry; 2335 } 2336 return(error); 2337 } 2338 } 2339 vput(vp); 2340 return (error); 2341 } 2342 2343 /* 2344 * access_args(char *path, int flags) 2345 * 2346 * Check access permissions. 2347 */ 2348 int 2349 sys_access(struct access_args *uap) 2350 { 2351 struct nlookupdata nd; 2352 int error; 2353 2354 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2355 if (error == 0) 2356 error = kern_access(&nd, uap->flags); 2357 nlookup_done(&nd); 2358 return (error); 2359 } 2360 2361 int 2362 kern_stat(struct nlookupdata *nd, struct stat *st) 2363 { 2364 int error; 2365 struct vnode *vp; 2366 thread_t td; 2367 2368 if ((error = nlookup(nd)) != 0) 2369 return (error); 2370 again: 2371 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2372 return (ENOENT); 2373 2374 td = curthread; 2375 if ((error = vget(vp, LK_SHARED)) != 0) 2376 return (error); 2377 error = vn_stat(vp, st, nd->nl_cred); 2378 2379 /* 2380 * If the file handle is stale we have to re-resolve the entry. This 2381 * is a hack at the moment. 2382 */ 2383 if (error == ESTALE) { 2384 vput(vp); 2385 cache_setunresolved(&nd->nl_nch); 2386 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2387 if (error == 0) 2388 goto again; 2389 } else { 2390 vput(vp); 2391 } 2392 return (error); 2393 } 2394 2395 /* 2396 * stat_args(char *path, struct stat *ub) 2397 * 2398 * Get file status; this version follows links. 2399 */ 2400 int 2401 sys_stat(struct stat_args *uap) 2402 { 2403 struct nlookupdata nd; 2404 struct stat st; 2405 int error; 2406 2407 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2408 if (error == 0) { 2409 error = kern_stat(&nd, &st); 2410 if (error == 0) 2411 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2412 } 2413 nlookup_done(&nd); 2414 return (error); 2415 } 2416 2417 /* 2418 * lstat_args(char *path, struct stat *ub) 2419 * 2420 * Get file status; this version does not follow links. 2421 */ 2422 int 2423 sys_lstat(struct lstat_args *uap) 2424 { 2425 struct nlookupdata nd; 2426 struct stat st; 2427 int error; 2428 2429 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2430 if (error == 0) { 2431 error = kern_stat(&nd, &st); 2432 if (error == 0) 2433 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2434 } 2435 nlookup_done(&nd); 2436 return (error); 2437 } 2438 2439 /* 2440 * pathconf_Args(char *path, int name) 2441 * 2442 * Get configurable pathname variables. 2443 */ 2444 /* ARGSUSED */ 2445 int 2446 sys_pathconf(struct pathconf_args *uap) 2447 { 2448 struct nlookupdata nd; 2449 struct vnode *vp; 2450 int error; 2451 2452 vp = NULL; 2453 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2454 if (error == 0) 2455 error = nlookup(&nd); 2456 if (error == 0) 2457 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2458 nlookup_done(&nd); 2459 if (error == 0) { 2460 error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds); 2461 vput(vp); 2462 } 2463 return (error); 2464 } 2465 2466 /* 2467 * XXX: daver 2468 * kern_readlink isn't properly split yet. There is a copyin burried 2469 * in VOP_READLINK(). 2470 */ 2471 int 2472 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2473 { 2474 struct thread *td = curthread; 2475 struct proc *p = td->td_proc; 2476 struct vnode *vp; 2477 struct iovec aiov; 2478 struct uio auio; 2479 int error; 2480 2481 if ((error = nlookup(nd)) != 0) 2482 return (error); 2483 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2484 if (error) 2485 return (error); 2486 if (vp->v_type != VLNK) { 2487 error = EINVAL; 2488 } else { 2489 aiov.iov_base = buf; 2490 aiov.iov_len = count; 2491 auio.uio_iov = &aiov; 2492 auio.uio_iovcnt = 1; 2493 auio.uio_offset = 0; 2494 auio.uio_rw = UIO_READ; 2495 auio.uio_segflg = UIO_USERSPACE; 2496 auio.uio_td = td; 2497 auio.uio_resid = count; 2498 error = VOP_READLINK(vp, &auio, p->p_ucred); 2499 } 2500 vput(vp); 2501 *res = count - auio.uio_resid; 2502 return (error); 2503 } 2504 2505 /* 2506 * readlink_args(char *path, char *buf, int count) 2507 * 2508 * Return target name of a symbolic link. 2509 */ 2510 int 2511 sys_readlink(struct readlink_args *uap) 2512 { 2513 struct nlookupdata nd; 2514 int error; 2515 2516 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2517 if (error == 0) { 2518 error = kern_readlink(&nd, uap->buf, uap->count, 2519 &uap->sysmsg_result); 2520 } 2521 nlookup_done(&nd); 2522 return (error); 2523 } 2524 2525 static int 2526 setfflags(struct vnode *vp, int flags) 2527 { 2528 struct thread *td = curthread; 2529 struct proc *p = td->td_proc; 2530 int error; 2531 struct vattr vattr; 2532 2533 /* 2534 * Prevent non-root users from setting flags on devices. When 2535 * a device is reused, users can retain ownership of the device 2536 * if they are allowed to set flags and programs assume that 2537 * chown can't fail when done as root. 2538 */ 2539 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2540 ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)) 2541 return (error); 2542 2543 /* 2544 * note: vget is required for any operation that might mod the vnode 2545 * so VINACTIVE is properly cleared. 2546 */ 2547 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2548 VATTR_NULL(&vattr); 2549 vattr.va_flags = flags; 2550 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2551 vput(vp); 2552 } 2553 return (error); 2554 } 2555 2556 /* 2557 * chflags(char *path, int flags) 2558 * 2559 * Change flags of a file given a path name. 2560 */ 2561 /* ARGSUSED */ 2562 int 2563 sys_chflags(struct chflags_args *uap) 2564 { 2565 struct nlookupdata nd; 2566 struct vnode *vp; 2567 int error; 2568 2569 vp = NULL; 2570 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2571 /* XXX Add NLC flag indicating modifying operation? */ 2572 if (error == 0) 2573 error = nlookup(&nd); 2574 if (error == 0) 2575 error = ncp_writechk(&nd.nl_nch); 2576 if (error == 0) 2577 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2578 nlookup_done(&nd); 2579 if (error == 0) { 2580 error = setfflags(vp, uap->flags); 2581 vrele(vp); 2582 } 2583 return (error); 2584 } 2585 2586 /* 2587 * lchflags(char *path, int flags) 2588 * 2589 * Change flags of a file given a path name, but don't follow symlinks. 2590 */ 2591 /* ARGSUSED */ 2592 int 2593 sys_lchflags(struct lchflags_args *uap) 2594 { 2595 struct nlookupdata nd; 2596 struct vnode *vp; 2597 int error; 2598 2599 vp = NULL; 2600 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2601 /* XXX Add NLC flag indicating modifying operation? */ 2602 if (error == 0) 2603 error = nlookup(&nd); 2604 if (error == 0) 2605 error = ncp_writechk(&nd.nl_nch); 2606 if (error == 0) 2607 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2608 nlookup_done(&nd); 2609 if (error == 0) { 2610 error = setfflags(vp, uap->flags); 2611 vrele(vp); 2612 } 2613 return (error); 2614 } 2615 2616 /* 2617 * fchflags_args(int fd, int flags) 2618 * 2619 * Change flags of a file given a file descriptor. 2620 */ 2621 /* ARGSUSED */ 2622 int 2623 sys_fchflags(struct fchflags_args *uap) 2624 { 2625 struct thread *td = curthread; 2626 struct proc *p = td->td_proc; 2627 struct file *fp; 2628 int error; 2629 2630 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2631 return (error); 2632 if (fp->f_nchandle.ncp) 2633 error = ncp_writechk(&fp->f_nchandle); 2634 if (error == 0) 2635 error = setfflags((struct vnode *) fp->f_data, uap->flags); 2636 fdrop(fp); 2637 return (error); 2638 } 2639 2640 static int 2641 setfmode(struct vnode *vp, int mode) 2642 { 2643 struct thread *td = curthread; 2644 struct proc *p = td->td_proc; 2645 int error; 2646 struct vattr vattr; 2647 2648 /* 2649 * note: vget is required for any operation that might mod the vnode 2650 * so VINACTIVE is properly cleared. 2651 */ 2652 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2653 VATTR_NULL(&vattr); 2654 vattr.va_mode = mode & ALLPERMS; 2655 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2656 vput(vp); 2657 } 2658 return error; 2659 } 2660 2661 int 2662 kern_chmod(struct nlookupdata *nd, int mode) 2663 { 2664 struct vnode *vp; 2665 int error; 2666 2667 /* XXX Add NLC flag indicating modifying operation? */ 2668 if ((error = nlookup(nd)) != 0) 2669 return (error); 2670 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2671 return (error); 2672 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2673 error = setfmode(vp, mode); 2674 vrele(vp); 2675 return (error); 2676 } 2677 2678 /* 2679 * chmod_args(char *path, int mode) 2680 * 2681 * Change mode of a file given path name. 2682 */ 2683 /* ARGSUSED */ 2684 int 2685 sys_chmod(struct chmod_args *uap) 2686 { 2687 struct nlookupdata nd; 2688 int error; 2689 2690 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2691 if (error == 0) 2692 error = kern_chmod(&nd, uap->mode); 2693 nlookup_done(&nd); 2694 return (error); 2695 } 2696 2697 /* 2698 * lchmod_args(char *path, int mode) 2699 * 2700 * Change mode of a file given path name (don't follow links.) 2701 */ 2702 /* ARGSUSED */ 2703 int 2704 sys_lchmod(struct lchmod_args *uap) 2705 { 2706 struct nlookupdata nd; 2707 int error; 2708 2709 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2710 if (error == 0) 2711 error = kern_chmod(&nd, uap->mode); 2712 nlookup_done(&nd); 2713 return (error); 2714 } 2715 2716 /* 2717 * fchmod_args(int fd, int mode) 2718 * 2719 * Change mode of a file given a file descriptor. 2720 */ 2721 /* ARGSUSED */ 2722 int 2723 sys_fchmod(struct fchmod_args *uap) 2724 { 2725 struct thread *td = curthread; 2726 struct proc *p = td->td_proc; 2727 struct file *fp; 2728 int error; 2729 2730 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2731 return (error); 2732 if (fp->f_nchandle.ncp) 2733 error = ncp_writechk(&fp->f_nchandle); 2734 if (error == 0) 2735 error = setfmode((struct vnode *)fp->f_data, uap->mode); 2736 fdrop(fp); 2737 return (error); 2738 } 2739 2740 static int 2741 setfown(struct vnode *vp, uid_t uid, gid_t gid) 2742 { 2743 struct thread *td = curthread; 2744 struct proc *p = td->td_proc; 2745 int error; 2746 struct vattr vattr; 2747 2748 /* 2749 * note: vget is required for any operation that might mod the vnode 2750 * so VINACTIVE is properly cleared. 2751 */ 2752 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2753 VATTR_NULL(&vattr); 2754 vattr.va_uid = uid; 2755 vattr.va_gid = gid; 2756 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2757 vput(vp); 2758 } 2759 return error; 2760 } 2761 2762 int 2763 kern_chown(struct nlookupdata *nd, int uid, int gid) 2764 { 2765 struct vnode *vp; 2766 int error; 2767 2768 /* XXX Add NLC flag indicating modifying operation? */ 2769 if ((error = nlookup(nd)) != 0) 2770 return (error); 2771 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2772 return (error); 2773 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2774 error = setfown(vp, uid, gid); 2775 vrele(vp); 2776 return (error); 2777 } 2778 2779 /* 2780 * chown(char *path, int uid, int gid) 2781 * 2782 * Set ownership given a path name. 2783 */ 2784 int 2785 sys_chown(struct chown_args *uap) 2786 { 2787 struct nlookupdata nd; 2788 int error; 2789 2790 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2791 if (error == 0) 2792 error = kern_chown(&nd, uap->uid, uap->gid); 2793 nlookup_done(&nd); 2794 return (error); 2795 } 2796 2797 /* 2798 * lchown_args(char *path, int uid, int gid) 2799 * 2800 * Set ownership given a path name, do not cross symlinks. 2801 */ 2802 int 2803 sys_lchown(struct lchown_args *uap) 2804 { 2805 struct nlookupdata nd; 2806 int error; 2807 2808 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2809 if (error == 0) 2810 error = kern_chown(&nd, uap->uid, uap->gid); 2811 nlookup_done(&nd); 2812 return (error); 2813 } 2814 2815 /* 2816 * fchown_args(int fd, int uid, int gid) 2817 * 2818 * Set ownership given a file descriptor. 2819 */ 2820 /* ARGSUSED */ 2821 int 2822 sys_fchown(struct fchown_args *uap) 2823 { 2824 struct thread *td = curthread; 2825 struct proc *p = td->td_proc; 2826 struct file *fp; 2827 int error; 2828 2829 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2830 return (error); 2831 if (fp->f_nchandle.ncp) 2832 error = ncp_writechk(&fp->f_nchandle); 2833 if (error == 0) 2834 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid); 2835 fdrop(fp); 2836 return (error); 2837 } 2838 2839 static int 2840 getutimes(const struct timeval *tvp, struct timespec *tsp) 2841 { 2842 struct timeval tv[2]; 2843 2844 if (tvp == NULL) { 2845 microtime(&tv[0]); 2846 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 2847 tsp[1] = tsp[0]; 2848 } else { 2849 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2850 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2851 } 2852 return 0; 2853 } 2854 2855 static int 2856 setutimes(struct vnode *vp, const struct timespec *ts, int nullflag) 2857 { 2858 struct thread *td = curthread; 2859 struct proc *p = td->td_proc; 2860 int error; 2861 struct vattr vattr; 2862 2863 /* 2864 * note: vget is required for any operation that might mod the vnode 2865 * so VINACTIVE is properly cleared. 2866 */ 2867 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2868 VATTR_NULL(&vattr); 2869 vattr.va_atime = ts[0]; 2870 vattr.va_mtime = ts[1]; 2871 if (nullflag) 2872 vattr.va_vaflags |= VA_UTIMES_NULL; 2873 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2874 vput(vp); 2875 } 2876 return error; 2877 } 2878 2879 int 2880 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 2881 { 2882 struct timespec ts[2]; 2883 struct vnode *vp; 2884 int error; 2885 2886 if ((error = getutimes(tptr, ts)) != 0) 2887 return (error); 2888 /* XXX Add NLC flag indicating modifying operation? */ 2889 if ((error = nlookup(nd)) != 0) 2890 return (error); 2891 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2892 return (error); 2893 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2894 return (error); 2895 error = setutimes(vp, ts, tptr == NULL); 2896 vrele(vp); 2897 return (error); 2898 } 2899 2900 /* 2901 * utimes_args(char *path, struct timeval *tptr) 2902 * 2903 * Set the access and modification times of a file. 2904 */ 2905 int 2906 sys_utimes(struct utimes_args *uap) 2907 { 2908 struct timeval tv[2]; 2909 struct nlookupdata nd; 2910 int error; 2911 2912 if (uap->tptr) { 2913 error = copyin(uap->tptr, tv, sizeof(tv)); 2914 if (error) 2915 return (error); 2916 } 2917 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2918 if (error == 0) 2919 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2920 nlookup_done(&nd); 2921 return (error); 2922 } 2923 2924 /* 2925 * lutimes_args(char *path, struct timeval *tptr) 2926 * 2927 * Set the access and modification times of a file. 2928 */ 2929 int 2930 sys_lutimes(struct lutimes_args *uap) 2931 { 2932 struct timeval tv[2]; 2933 struct nlookupdata nd; 2934 int error; 2935 2936 if (uap->tptr) { 2937 error = copyin(uap->tptr, tv, sizeof(tv)); 2938 if (error) 2939 return (error); 2940 } 2941 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2942 if (error == 0) 2943 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2944 nlookup_done(&nd); 2945 return (error); 2946 } 2947 2948 int 2949 kern_futimes(int fd, struct timeval *tptr) 2950 { 2951 struct thread *td = curthread; 2952 struct proc *p = td->td_proc; 2953 struct timespec ts[2]; 2954 struct file *fp; 2955 int error; 2956 2957 error = getutimes(tptr, ts); 2958 if (error) 2959 return (error); 2960 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 2961 return (error); 2962 if (fp->f_nchandle.ncp) 2963 error = ncp_writechk(&fp->f_nchandle); 2964 if (error == 0) 2965 error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL); 2966 fdrop(fp); 2967 return (error); 2968 } 2969 2970 /* 2971 * futimes_args(int fd, struct timeval *tptr) 2972 * 2973 * Set the access and modification times of a file. 2974 */ 2975 int 2976 sys_futimes(struct futimes_args *uap) 2977 { 2978 struct timeval tv[2]; 2979 int error; 2980 2981 if (uap->tptr) { 2982 error = copyin(uap->tptr, tv, sizeof(tv)); 2983 if (error) 2984 return (error); 2985 } 2986 2987 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 2988 2989 return (error); 2990 } 2991 2992 int 2993 kern_truncate(struct nlookupdata *nd, off_t length) 2994 { 2995 struct vnode *vp; 2996 struct vattr vattr; 2997 int error; 2998 2999 if (length < 0) 3000 return(EINVAL); 3001 /* XXX Add NLC flag indicating modifying operation? */ 3002 if ((error = nlookup(nd)) != 0) 3003 return (error); 3004 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3005 return (error); 3006 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3007 return (error); 3008 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 3009 vrele(vp); 3010 return (error); 3011 } 3012 if (vp->v_type == VDIR) { 3013 error = EISDIR; 3014 } else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0 && 3015 (error = VOP_ACCESS(vp, VWRITE, nd->nl_cred)) == 0) { 3016 VATTR_NULL(&vattr); 3017 vattr.va_size = length; 3018 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3019 } 3020 vput(vp); 3021 return (error); 3022 } 3023 3024 /* 3025 * truncate(char *path, int pad, off_t length) 3026 * 3027 * Truncate a file given its path name. 3028 */ 3029 int 3030 sys_truncate(struct truncate_args *uap) 3031 { 3032 struct nlookupdata nd; 3033 int error; 3034 3035 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3036 if (error == 0) 3037 error = kern_truncate(&nd, uap->length); 3038 nlookup_done(&nd); 3039 return error; 3040 } 3041 3042 int 3043 kern_ftruncate(int fd, off_t length) 3044 { 3045 struct thread *td = curthread; 3046 struct proc *p = td->td_proc; 3047 struct vattr vattr; 3048 struct vnode *vp; 3049 struct file *fp; 3050 int error; 3051 3052 if (length < 0) 3053 return(EINVAL); 3054 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3055 return (error); 3056 if (fp->f_nchandle.ncp) { 3057 error = ncp_writechk(&fp->f_nchandle); 3058 if (error) 3059 goto done; 3060 } 3061 if ((fp->f_flag & FWRITE) == 0) { 3062 error = EINVAL; 3063 goto done; 3064 } 3065 vp = (struct vnode *)fp->f_data; 3066 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3067 if (vp->v_type == VDIR) { 3068 error = EISDIR; 3069 } else if ((error = vn_writechk(vp, NULL)) == 0) { 3070 VATTR_NULL(&vattr); 3071 vattr.va_size = length; 3072 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3073 } 3074 vn_unlock(vp); 3075 done: 3076 fdrop(fp); 3077 return (error); 3078 } 3079 3080 /* 3081 * ftruncate_args(int fd, int pad, off_t length) 3082 * 3083 * Truncate a file given a file descriptor. 3084 */ 3085 int 3086 sys_ftruncate(struct ftruncate_args *uap) 3087 { 3088 int error; 3089 3090 error = kern_ftruncate(uap->fd, uap->length); 3091 3092 return (error); 3093 } 3094 3095 /* 3096 * fsync(int fd) 3097 * 3098 * Sync an open file. 3099 */ 3100 /* ARGSUSED */ 3101 int 3102 sys_fsync(struct fsync_args *uap) 3103 { 3104 struct thread *td = curthread; 3105 struct proc *p = td->td_proc; 3106 struct vnode *vp; 3107 struct file *fp; 3108 vm_object_t obj; 3109 int error; 3110 3111 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3112 return (error); 3113 vp = (struct vnode *)fp->f_data; 3114 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3115 if ((obj = vp->v_object) != NULL) 3116 vm_object_page_clean(obj, 0, 0, 0); 3117 if ((error = VOP_FSYNC(vp, MNT_WAIT)) == 0 && vp->v_mount) 3118 error = buf_fsync(vp); 3119 vn_unlock(vp); 3120 fdrop(fp); 3121 return (error); 3122 } 3123 3124 int 3125 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3126 { 3127 struct nchandle fnchd; 3128 struct nchandle tnchd; 3129 struct namecache *ncp; 3130 struct vnode *fdvp; 3131 struct vnode *tdvp; 3132 struct mount *mp; 3133 int error; 3134 3135 bwillinode(1); 3136 fromnd->nl_flags |= NLC_REFDVP; 3137 if ((error = nlookup(fromnd)) != 0) 3138 return (error); 3139 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3140 return (ENOENT); 3141 fnchd.mount = fromnd->nl_nch.mount; 3142 cache_hold(&fnchd); 3143 3144 /* 3145 * unlock the source nch so we can lookup the target nch without 3146 * deadlocking. The target may or may not exist so we do not check 3147 * for a target vp like kern_mkdir() and other creation functions do. 3148 * 3149 * The source and target directories are ref'd and rechecked after 3150 * everything is relocked to determine if the source or target file 3151 * has been renamed. 3152 */ 3153 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3154 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3155 cache_unlock(&fromnd->nl_nch); 3156 3157 tond->nl_flags |= NLC_CREATE | NLC_REFDVP; 3158 if ((error = nlookup(tond)) != 0) { 3159 cache_drop(&fnchd); 3160 return (error); 3161 } 3162 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3163 cache_drop(&fnchd); 3164 return (ENOENT); 3165 } 3166 tnchd.mount = tond->nl_nch.mount; 3167 cache_hold(&tnchd); 3168 3169 /* 3170 * If the source and target are the same there is nothing to do 3171 */ 3172 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3173 cache_drop(&fnchd); 3174 cache_drop(&tnchd); 3175 return (0); 3176 } 3177 3178 /* 3179 * Mount points cannot be renamed or overwritten 3180 */ 3181 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3182 NCF_ISMOUNTPT 3183 ) { 3184 cache_drop(&fnchd); 3185 cache_drop(&tnchd); 3186 return (EINVAL); 3187 } 3188 3189 /* 3190 * relock the source ncp. NOTE AFTER RELOCKING: the source ncp 3191 * may have become invalid while it was unlocked, nc_vp and nc_mount 3192 * could be NULL. 3193 */ 3194 if (cache_lock_nonblock(&fromnd->nl_nch) == 0) { 3195 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3196 } else if (fromnd->nl_nch.ncp > tond->nl_nch.ncp) { 3197 cache_lock(&fromnd->nl_nch); 3198 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3199 } else { 3200 cache_unlock(&tond->nl_nch); 3201 cache_lock(&fromnd->nl_nch); 3202 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3203 cache_lock(&tond->nl_nch); 3204 cache_resolve(&tond->nl_nch, tond->nl_cred); 3205 } 3206 fromnd->nl_flags |= NLC_NCPISLOCKED; 3207 3208 /* 3209 * make sure the parent directories linkages are the same 3210 */ 3211 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3212 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3213 cache_drop(&fnchd); 3214 cache_drop(&tnchd); 3215 return (ENOENT); 3216 } 3217 3218 /* 3219 * Both the source and target must be within the same filesystem and 3220 * in the same filesystem as their parent directories within the 3221 * namecache topology. 3222 * 3223 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3224 */ 3225 mp = fnchd.mount; 3226 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3227 mp != tond->nl_nch.mount) { 3228 cache_drop(&fnchd); 3229 cache_drop(&tnchd); 3230 return (EXDEV); 3231 } 3232 3233 /* 3234 * Make sure the mount point is writable 3235 */ 3236 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3237 cache_drop(&fnchd); 3238 cache_drop(&tnchd); 3239 return (error); 3240 } 3241 3242 /* 3243 * If the target exists and either the source or target is a directory, 3244 * then both must be directories. 3245 * 3246 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3247 * have become NULL. 3248 */ 3249 if (tond->nl_nch.ncp->nc_vp) { 3250 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3251 error = ENOENT; 3252 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3253 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3254 error = ENOTDIR; 3255 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3256 error = EISDIR; 3257 } 3258 } 3259 3260 /* 3261 * You cannot rename a source into itself or a subdirectory of itself. 3262 * We check this by travsersing the target directory upwards looking 3263 * for a match against the source. 3264 */ 3265 if (error == 0) { 3266 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3267 if (fromnd->nl_nch.ncp == ncp) { 3268 error = EINVAL; 3269 break; 3270 } 3271 } 3272 } 3273 3274 cache_drop(&fnchd); 3275 cache_drop(&tnchd); 3276 3277 /* 3278 * Even though the namespaces are different, they may still represent 3279 * hardlinks to the same file. The filesystem might have a hard time 3280 * with this so we issue a NREMOVE of the source instead of a NRENAME 3281 * when we detect the situation. 3282 */ 3283 if (error == 0) { 3284 fdvp = fromnd->nl_dvp; 3285 tdvp = tond->nl_dvp; 3286 if (fdvp == NULL || tdvp == NULL) { 3287 error = EPERM; 3288 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3289 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3290 fromnd->nl_cred); 3291 } else { 3292 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3293 fdvp, tdvp, tond->nl_cred); 3294 } 3295 } 3296 return (error); 3297 } 3298 3299 /* 3300 * rename_args(char *from, char *to) 3301 * 3302 * Rename files. Source and destination must either both be directories, 3303 * or both not be directories. If target is a directory, it must be empty. 3304 */ 3305 int 3306 sys_rename(struct rename_args *uap) 3307 { 3308 struct nlookupdata fromnd, tond; 3309 int error; 3310 3311 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3312 if (error == 0) { 3313 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3314 if (error == 0) 3315 error = kern_rename(&fromnd, &tond); 3316 nlookup_done(&tond); 3317 } 3318 nlookup_done(&fromnd); 3319 return (error); 3320 } 3321 3322 int 3323 kern_mkdir(struct nlookupdata *nd, int mode) 3324 { 3325 struct thread *td = curthread; 3326 struct proc *p = td->td_proc; 3327 struct vnode *vp; 3328 struct vattr vattr; 3329 int error; 3330 3331 bwillinode(1); 3332 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3333 if ((error = nlookup(nd)) != 0) 3334 return (error); 3335 3336 if (nd->nl_nch.ncp->nc_vp) 3337 return (EEXIST); 3338 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3339 return (error); 3340 VATTR_NULL(&vattr); 3341 vattr.va_type = VDIR; 3342 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3343 3344 vp = NULL; 3345 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, p->p_ucred, &vattr); 3346 if (error == 0) 3347 vput(vp); 3348 return (error); 3349 } 3350 3351 /* 3352 * mkdir_args(char *path, int mode) 3353 * 3354 * Make a directory file. 3355 */ 3356 /* ARGSUSED */ 3357 int 3358 sys_mkdir(struct mkdir_args *uap) 3359 { 3360 struct nlookupdata nd; 3361 int error; 3362 3363 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3364 if (error == 0) 3365 error = kern_mkdir(&nd, uap->mode); 3366 nlookup_done(&nd); 3367 return (error); 3368 } 3369 3370 int 3371 kern_rmdir(struct nlookupdata *nd) 3372 { 3373 int error; 3374 3375 bwillinode(1); 3376 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 3377 if ((error = nlookup(nd)) != 0) 3378 return (error); 3379 3380 /* 3381 * Do not allow directories representing mount points to be 3382 * deleted, even if empty. Check write perms on mount point 3383 * in case the vnode is aliased (aka nullfs). 3384 */ 3385 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 3386 return (EINVAL); 3387 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3388 return (error); 3389 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 3390 return (error); 3391 } 3392 3393 /* 3394 * rmdir_args(char *path) 3395 * 3396 * Remove a directory file. 3397 */ 3398 /* ARGSUSED */ 3399 int 3400 sys_rmdir(struct rmdir_args *uap) 3401 { 3402 struct nlookupdata nd; 3403 int error; 3404 3405 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3406 if (error == 0) 3407 error = kern_rmdir(&nd); 3408 nlookup_done(&nd); 3409 return (error); 3410 } 3411 3412 int 3413 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 3414 enum uio_seg direction) 3415 { 3416 struct thread *td = curthread; 3417 struct proc *p = td->td_proc; 3418 struct vnode *vp; 3419 struct file *fp; 3420 struct uio auio; 3421 struct iovec aiov; 3422 off_t loff; 3423 int error, eofflag; 3424 3425 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3426 return (error); 3427 if ((fp->f_flag & FREAD) == 0) { 3428 error = EBADF; 3429 goto done; 3430 } 3431 vp = (struct vnode *)fp->f_data; 3432 unionread: 3433 if (vp->v_type != VDIR) { 3434 error = EINVAL; 3435 goto done; 3436 } 3437 aiov.iov_base = buf; 3438 aiov.iov_len = count; 3439 auio.uio_iov = &aiov; 3440 auio.uio_iovcnt = 1; 3441 auio.uio_rw = UIO_READ; 3442 auio.uio_segflg = direction; 3443 auio.uio_td = td; 3444 auio.uio_resid = count; 3445 loff = auio.uio_offset = fp->f_offset; 3446 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 3447 fp->f_offset = auio.uio_offset; 3448 if (error) 3449 goto done; 3450 if (count == auio.uio_resid) { 3451 if (union_dircheckp) { 3452 error = union_dircheckp(td, &vp, fp); 3453 if (error == -1) 3454 goto unionread; 3455 if (error) 3456 goto done; 3457 } 3458 #if 0 3459 if ((vp->v_flag & VROOT) && 3460 (vp->v_mount->mnt_flag & MNT_UNION)) { 3461 struct vnode *tvp = vp; 3462 vp = vp->v_mount->mnt_vnodecovered; 3463 vref(vp); 3464 fp->f_data = vp; 3465 fp->f_offset = 0; 3466 vrele(tvp); 3467 goto unionread; 3468 } 3469 #endif 3470 } 3471 3472 /* 3473 * WARNING! *basep may not be wide enough to accomodate the 3474 * seek offset. XXX should we hack this to return the upper 32 bits 3475 * for offsets greater then 4G? 3476 */ 3477 if (basep) { 3478 *basep = (long)loff; 3479 } 3480 *res = count - auio.uio_resid; 3481 done: 3482 fdrop(fp); 3483 return (error); 3484 } 3485 3486 /* 3487 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 3488 * 3489 * Read a block of directory entries in a file system independent format. 3490 */ 3491 int 3492 sys_getdirentries(struct getdirentries_args *uap) 3493 { 3494 long base; 3495 int error; 3496 3497 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 3498 &uap->sysmsg_result, UIO_USERSPACE); 3499 3500 if (error == 0 && uap->basep) 3501 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 3502 return (error); 3503 } 3504 3505 /* 3506 * getdents_args(int fd, char *buf, size_t count) 3507 */ 3508 int 3509 sys_getdents(struct getdents_args *uap) 3510 { 3511 int error; 3512 3513 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 3514 &uap->sysmsg_result, UIO_USERSPACE); 3515 3516 return (error); 3517 } 3518 3519 /* 3520 * umask(int newmask) 3521 * 3522 * Set the mode mask for creation of filesystem nodes. 3523 * 3524 * MP SAFE 3525 */ 3526 int 3527 sys_umask(struct umask_args *uap) 3528 { 3529 struct thread *td = curthread; 3530 struct proc *p = td->td_proc; 3531 struct filedesc *fdp; 3532 3533 fdp = p->p_fd; 3534 uap->sysmsg_result = fdp->fd_cmask; 3535 fdp->fd_cmask = uap->newmask & ALLPERMS; 3536 return (0); 3537 } 3538 3539 /* 3540 * revoke(char *path) 3541 * 3542 * Void all references to file by ripping underlying filesystem 3543 * away from vnode. 3544 */ 3545 /* ARGSUSED */ 3546 int 3547 sys_revoke(struct revoke_args *uap) 3548 { 3549 struct nlookupdata nd; 3550 struct vattr vattr; 3551 struct vnode *vp; 3552 struct ucred *cred; 3553 int error; 3554 3555 vp = NULL; 3556 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3557 if (error == 0) 3558 error = nlookup(&nd); 3559 if (error == 0) 3560 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3561 cred = crhold(nd.nl_cred); 3562 nlookup_done(&nd); 3563 if (error == 0) { 3564 if (vp->v_type != VCHR && vp->v_type != VBLK) 3565 error = EINVAL; 3566 if (error == 0) 3567 error = VOP_GETATTR(vp, &vattr); 3568 if (error == 0 && cred->cr_uid != vattr.va_uid) 3569 error = suser_cred(cred, PRISON_ROOT); 3570 if (error == 0 && count_udev(vp->v_umajor, vp->v_uminor) > 0) { 3571 error = 0; 3572 vx_lock(vp); 3573 VOP_REVOKE(vp, REVOKEALL); 3574 vx_unlock(vp); 3575 } 3576 vrele(vp); 3577 } 3578 if (cred) 3579 crfree(cred); 3580 return (error); 3581 } 3582 3583 /* 3584 * getfh_args(char *fname, fhandle_t *fhp) 3585 * 3586 * Get (NFS) file handle 3587 * 3588 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 3589 * mount. This allows nullfs mounts to be explicitly exported. 3590 * 3591 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 3592 * 3593 * nullfs mounts of subdirectories are not safe. That is, it will 3594 * work, but you do not really have protection against access to 3595 * the related parent directories. 3596 */ 3597 int 3598 sys_getfh(struct getfh_args *uap) 3599 { 3600 struct thread *td = curthread; 3601 struct nlookupdata nd; 3602 fhandle_t fh; 3603 struct vnode *vp; 3604 struct mount *mp; 3605 int error; 3606 3607 /* 3608 * Must be super user 3609 */ 3610 if ((error = suser(td)) != 0) 3611 return (error); 3612 3613 vp = NULL; 3614 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 3615 if (error == 0) 3616 error = nlookup(&nd); 3617 if (error == 0) 3618 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3619 mp = nd.nl_nch.mount; 3620 nlookup_done(&nd); 3621 if (error == 0) { 3622 bzero(&fh, sizeof(fh)); 3623 fh.fh_fsid = mp->mnt_stat.f_fsid; 3624 error = VFS_VPTOFH(vp, &fh.fh_fid); 3625 vput(vp); 3626 if (error == 0) 3627 error = copyout(&fh, uap->fhp, sizeof(fh)); 3628 } 3629 return (error); 3630 } 3631 3632 /* 3633 * fhopen_args(const struct fhandle *u_fhp, int flags) 3634 * 3635 * syscall for the rpc.lockd to use to translate a NFS file handle into 3636 * an open descriptor. 3637 * 3638 * warning: do not remove the suser() call or this becomes one giant 3639 * security hole. 3640 */ 3641 int 3642 sys_fhopen(struct fhopen_args *uap) 3643 { 3644 struct thread *td = curthread; 3645 struct proc *p = td->td_proc; 3646 struct mount *mp; 3647 struct vnode *vp; 3648 struct fhandle fhp; 3649 struct vattr vat; 3650 struct vattr *vap = &vat; 3651 struct flock lf; 3652 int fmode, mode, error, type; 3653 struct file *nfp; 3654 struct file *fp; 3655 int indx; 3656 3657 /* 3658 * Must be super user 3659 */ 3660 error = suser(td); 3661 if (error) 3662 return (error); 3663 3664 fmode = FFLAGS(uap->flags); 3665 /* why not allow a non-read/write open for our lockd? */ 3666 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3667 return (EINVAL); 3668 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3669 if (error) 3670 return(error); 3671 /* find the mount point */ 3672 mp = vfs_getvfs(&fhp.fh_fsid); 3673 if (mp == NULL) 3674 return (ESTALE); 3675 /* now give me my vnode, it gets returned to me locked */ 3676 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 3677 if (error) 3678 return (error); 3679 /* 3680 * from now on we have to make sure not 3681 * to forget about the vnode 3682 * any error that causes an abort must vput(vp) 3683 * just set error = err and 'goto bad;'. 3684 */ 3685 3686 /* 3687 * from vn_open 3688 */ 3689 if (vp->v_type == VLNK) { 3690 error = EMLINK; 3691 goto bad; 3692 } 3693 if (vp->v_type == VSOCK) { 3694 error = EOPNOTSUPP; 3695 goto bad; 3696 } 3697 mode = 0; 3698 if (fmode & (FWRITE | O_TRUNC)) { 3699 if (vp->v_type == VDIR) { 3700 error = EISDIR; 3701 goto bad; 3702 } 3703 error = vn_writechk(vp, NULL); 3704 if (error) 3705 goto bad; 3706 mode |= VWRITE; 3707 } 3708 if (fmode & FREAD) 3709 mode |= VREAD; 3710 if (mode) { 3711 error = VOP_ACCESS(vp, mode, p->p_ucred); 3712 if (error) 3713 goto bad; 3714 } 3715 if (fmode & O_TRUNC) { 3716 vn_unlock(vp); /* XXX */ 3717 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 3718 VATTR_NULL(vap); 3719 vap->va_size = 0; 3720 error = VOP_SETATTR(vp, vap, p->p_ucred); 3721 if (error) 3722 goto bad; 3723 } 3724 3725 /* 3726 * VOP_OPEN needs the file pointer so it can potentially override 3727 * it. 3728 * 3729 * WARNING! no f_nchandle will be associated when fhopen()ing a 3730 * directory. XXX 3731 */ 3732 if ((error = falloc(p, &nfp, &indx)) != 0) 3733 goto bad; 3734 fp = nfp; 3735 3736 error = VOP_OPEN(vp, fmode, p->p_ucred, fp); 3737 if (error) { 3738 /* 3739 * setting f_ops this way prevents VOP_CLOSE from being 3740 * called or fdrop() releasing the vp from v_data. Since 3741 * the VOP_OPEN failed we don't want to VOP_CLOSE. 3742 */ 3743 fp->f_ops = &badfileops; 3744 fp->f_data = NULL; 3745 goto bad_drop; 3746 } 3747 3748 /* 3749 * The fp is given its own reference, we still have our ref and lock. 3750 * 3751 * Assert that all regular files must be created with a VM object. 3752 */ 3753 if (vp->v_type == VREG && vp->v_object == NULL) { 3754 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 3755 goto bad_drop; 3756 } 3757 3758 /* 3759 * The open was successful. Handle any locking requirements. 3760 */ 3761 if (fmode & (O_EXLOCK | O_SHLOCK)) { 3762 lf.l_whence = SEEK_SET; 3763 lf.l_start = 0; 3764 lf.l_len = 0; 3765 if (fmode & O_EXLOCK) 3766 lf.l_type = F_WRLCK; 3767 else 3768 lf.l_type = F_RDLCK; 3769 if (fmode & FNONBLOCK) 3770 type = 0; 3771 else 3772 type = F_WAIT; 3773 vn_unlock(vp); 3774 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 3775 /* 3776 * release our private reference. 3777 */ 3778 fsetfd(p, NULL, indx); 3779 fdrop(fp); 3780 vrele(vp); 3781 return (error); 3782 } 3783 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3784 fp->f_flag |= FHASLOCK; 3785 } 3786 3787 /* 3788 * Clean up. Associate the file pointer with the previously 3789 * reserved descriptor and return it. 3790 */ 3791 vput(vp); 3792 fsetfd(p, fp, indx); 3793 fdrop(fp); 3794 uap->sysmsg_result = indx; 3795 return (0); 3796 3797 bad_drop: 3798 fsetfd(p, NULL, indx); 3799 fdrop(fp); 3800 bad: 3801 vput(vp); 3802 return (error); 3803 } 3804 3805 /* 3806 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 3807 */ 3808 int 3809 sys_fhstat(struct fhstat_args *uap) 3810 { 3811 struct thread *td = curthread; 3812 struct stat sb; 3813 fhandle_t fh; 3814 struct mount *mp; 3815 struct vnode *vp; 3816 int error; 3817 3818 /* 3819 * Must be super user 3820 */ 3821 error = suser(td); 3822 if (error) 3823 return (error); 3824 3825 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 3826 if (error) 3827 return (error); 3828 3829 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3830 return (ESTALE); 3831 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 3832 return (error); 3833 error = vn_stat(vp, &sb, td->td_proc->p_ucred); 3834 vput(vp); 3835 if (error) 3836 return (error); 3837 error = copyout(&sb, uap->sb, sizeof(sb)); 3838 return (error); 3839 } 3840 3841 /* 3842 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 3843 */ 3844 int 3845 sys_fhstatfs(struct fhstatfs_args *uap) 3846 { 3847 struct thread *td = curthread; 3848 struct proc *p = td->td_proc; 3849 struct statfs *sp; 3850 struct mount *mp; 3851 struct vnode *vp; 3852 struct statfs sb; 3853 char *fullpath, *freepath; 3854 fhandle_t fh; 3855 int error; 3856 3857 /* 3858 * Must be super user 3859 */ 3860 if ((error = suser(td))) 3861 return (error); 3862 3863 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3864 return (error); 3865 3866 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3867 return (ESTALE); 3868 3869 if (p != NULL && !chroot_visible_mnt(mp, p)) 3870 return (ESTALE); 3871 3872 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 3873 return (error); 3874 mp = vp->v_mount; 3875 sp = &mp->mnt_stat; 3876 vput(vp); 3877 if ((error = VFS_STATFS(mp, sp, p->p_ucred)) != 0) 3878 return (error); 3879 3880 error = mount_path(p, mp, &fullpath, &freepath); 3881 if (error) 3882 return(error); 3883 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3884 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 3885 kfree(freepath, M_TEMP); 3886 3887 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 3888 if (suser(td)) { 3889 bcopy(sp, &sb, sizeof(sb)); 3890 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 3891 sp = &sb; 3892 } 3893 return (copyout(sp, uap->buf, sizeof(*sp))); 3894 } 3895 3896 /* 3897 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 3898 */ 3899 int 3900 sys_fhstatvfs(struct fhstatvfs_args *uap) 3901 { 3902 struct thread *td = curthread; 3903 struct proc *p = td->td_proc; 3904 struct statvfs *sp; 3905 struct mount *mp; 3906 struct vnode *vp; 3907 fhandle_t fh; 3908 int error; 3909 3910 /* 3911 * Must be super user 3912 */ 3913 if ((error = suser(td))) 3914 return (error); 3915 3916 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3917 return (error); 3918 3919 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3920 return (ESTALE); 3921 3922 if (p != NULL && !chroot_visible_mnt(mp, p)) 3923 return (ESTALE); 3924 3925 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 3926 return (error); 3927 mp = vp->v_mount; 3928 sp = &mp->mnt_vstat; 3929 vput(vp); 3930 if ((error = VFS_STATVFS(mp, sp, p->p_ucred)) != 0) 3931 return (error); 3932 3933 sp->f_flag = 0; 3934 if (mp->mnt_flag & MNT_RDONLY) 3935 sp->f_flag |= ST_RDONLY; 3936 if (mp->mnt_flag & MNT_NOSUID) 3937 sp->f_flag |= ST_NOSUID; 3938 3939 return (copyout(sp, uap->buf, sizeof(*sp))); 3940 } 3941 3942 3943 /* 3944 * Syscall to push extended attribute configuration information into the 3945 * VFS. Accepts a path, which it converts to a mountpoint, as well as 3946 * a command (int cmd), and attribute name and misc data. For now, the 3947 * attribute name is left in userspace for consumption by the VFS_op. 3948 * It will probably be changed to be copied into sysspace by the 3949 * syscall in the future, once issues with various consumers of the 3950 * attribute code have raised their hands. 3951 * 3952 * Currently this is used only by UFS Extended Attributes. 3953 */ 3954 int 3955 sys_extattrctl(struct extattrctl_args *uap) 3956 { 3957 struct nlookupdata nd; 3958 struct mount *mp; 3959 struct vnode *vp; 3960 int error; 3961 3962 vp = NULL; 3963 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3964 if (error == 0) 3965 error = nlookup(&nd); 3966 if (error == 0) { 3967 mp = nd.nl_nch.mount; 3968 error = VFS_EXTATTRCTL(mp, uap->cmd, 3969 uap->attrname, uap->arg, 3970 nd.nl_cred); 3971 } 3972 nlookup_done(&nd); 3973 return (error); 3974 } 3975 3976 /* 3977 * Syscall to set a named extended attribute on a file or directory. 3978 * Accepts attribute name, and a uio structure pointing to the data to set. 3979 * The uio is consumed in the style of writev(). The real work happens 3980 * in VOP_SETEXTATTR(). 3981 */ 3982 int 3983 sys_extattr_set_file(struct extattr_set_file_args *uap) 3984 { 3985 char attrname[EXTATTR_MAXNAMELEN]; 3986 struct iovec aiov[UIO_SMALLIOV]; 3987 struct iovec *needfree; 3988 struct nlookupdata nd; 3989 struct iovec *iov; 3990 struct vnode *vp; 3991 struct uio auio; 3992 u_int iovlen; 3993 u_int cnt; 3994 int error; 3995 int i; 3996 3997 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3998 if (error) 3999 return (error); 4000 4001 vp = NULL; 4002 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4003 if (error == 0) 4004 error = nlookup(&nd); 4005 if (error == 0) 4006 error = ncp_writechk(&nd.nl_nch); 4007 if (error == 0) 4008 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4009 if (error) { 4010 nlookup_done(&nd); 4011 return (error); 4012 } 4013 4014 needfree = NULL; 4015 iovlen = uap->iovcnt * sizeof(struct iovec); 4016 if (uap->iovcnt > UIO_SMALLIOV) { 4017 if (uap->iovcnt > UIO_MAXIOV) { 4018 error = EINVAL; 4019 goto done; 4020 } 4021 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 4022 needfree = iov; 4023 } else { 4024 iov = aiov; 4025 } 4026 auio.uio_iov = iov; 4027 auio.uio_iovcnt = uap->iovcnt; 4028 auio.uio_rw = UIO_WRITE; 4029 auio.uio_segflg = UIO_USERSPACE; 4030 auio.uio_td = nd.nl_td; 4031 auio.uio_offset = 0; 4032 if ((error = copyin(uap->iovp, iov, iovlen))) 4033 goto done; 4034 auio.uio_resid = 0; 4035 for (i = 0; i < uap->iovcnt; i++) { 4036 if (iov->iov_len > INT_MAX - auio.uio_resid) { 4037 error = EINVAL; 4038 goto done; 4039 } 4040 auio.uio_resid += iov->iov_len; 4041 iov++; 4042 } 4043 cnt = auio.uio_resid; 4044 error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred); 4045 cnt -= auio.uio_resid; 4046 uap->sysmsg_result = cnt; 4047 done: 4048 vput(vp); 4049 nlookup_done(&nd); 4050 if (needfree) 4051 FREE(needfree, M_IOV); 4052 return (error); 4053 } 4054 4055 /* 4056 * Syscall to get a named extended attribute on a file or directory. 4057 * Accepts attribute name, and a uio structure pointing to a buffer for the 4058 * data. The uio is consumed in the style of readv(). The real work 4059 * happens in VOP_GETEXTATTR(); 4060 */ 4061 int 4062 sys_extattr_get_file(struct extattr_get_file_args *uap) 4063 { 4064 char attrname[EXTATTR_MAXNAMELEN]; 4065 struct iovec aiov[UIO_SMALLIOV]; 4066 struct iovec *needfree; 4067 struct nlookupdata nd; 4068 struct iovec *iov; 4069 struct vnode *vp; 4070 struct uio auio; 4071 u_int iovlen; 4072 u_int cnt; 4073 int error; 4074 int i; 4075 4076 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4077 if (error) 4078 return (error); 4079 4080 vp = NULL; 4081 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4082 if (error == 0) 4083 error = nlookup(&nd); 4084 if (error == 0) 4085 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4086 if (error) { 4087 nlookup_done(&nd); 4088 return (error); 4089 } 4090 4091 iovlen = uap->iovcnt * sizeof (struct iovec); 4092 needfree = NULL; 4093 if (uap->iovcnt > UIO_SMALLIOV) { 4094 if (uap->iovcnt > UIO_MAXIOV) { 4095 error = EINVAL; 4096 goto done; 4097 } 4098 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 4099 needfree = iov; 4100 } else { 4101 iov = aiov; 4102 } 4103 auio.uio_iov = iov; 4104 auio.uio_iovcnt = uap->iovcnt; 4105 auio.uio_rw = UIO_READ; 4106 auio.uio_segflg = UIO_USERSPACE; 4107 auio.uio_td = nd.nl_td; 4108 auio.uio_offset = 0; 4109 if ((error = copyin(uap->iovp, iov, iovlen))) 4110 goto done; 4111 auio.uio_resid = 0; 4112 for (i = 0; i < uap->iovcnt; i++) { 4113 if (iov->iov_len > INT_MAX - auio.uio_resid) { 4114 error = EINVAL; 4115 goto done; 4116 } 4117 auio.uio_resid += iov->iov_len; 4118 iov++; 4119 } 4120 cnt = auio.uio_resid; 4121 error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred); 4122 cnt -= auio.uio_resid; 4123 uap->sysmsg_result = cnt; 4124 done: 4125 vput(vp); 4126 nlookup_done(&nd); 4127 if (needfree) 4128 FREE(needfree, M_IOV); 4129 return(error); 4130 } 4131 4132 /* 4133 * Syscall to delete a named extended attribute from a file or directory. 4134 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4135 */ 4136 int 4137 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4138 { 4139 char attrname[EXTATTR_MAXNAMELEN]; 4140 struct nlookupdata nd; 4141 struct vnode *vp; 4142 int error; 4143 4144 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4145 if (error) 4146 return(error); 4147 4148 vp = NULL; 4149 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4150 if (error == 0) 4151 error = nlookup(&nd); 4152 if (error == 0) 4153 error = ncp_writechk(&nd.nl_nch); 4154 if (error == 0) 4155 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4156 if (error) { 4157 nlookup_done(&nd); 4158 return (error); 4159 } 4160 4161 error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred); 4162 vput(vp); 4163 nlookup_done(&nd); 4164 return(error); 4165 } 4166 4167 /* 4168 * Determine if the mount is visible to the process. 4169 */ 4170 static int 4171 chroot_visible_mnt(struct mount *mp, struct proc *p) 4172 { 4173 struct nchandle nch; 4174 4175 /* 4176 * Traverse from the mount point upwards. If we hit the process 4177 * root then the mount point is visible to the process. 4178 */ 4179 nch = mp->mnt_ncmountpt; 4180 while (nch.ncp) { 4181 if (nch.mount == p->p_fd->fd_nrdir.mount && 4182 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4183 return(1); 4184 } 4185 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4186 nch = nch.mount->mnt_ncmounton; 4187 } else { 4188 nch.ncp = nch.ncp->nc_parent; 4189 } 4190 } 4191 4192 /* 4193 * If the mount point is not visible to the process, but the 4194 * process root is in a subdirectory of the mount, return 4195 * TRUE anyway. 4196 */ 4197 if (p->p_fd->fd_nrdir.mount == mp) 4198 return(1); 4199 4200 return(0); 4201 } 4202 4203