1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.72 2005/09/17 07:43:00 dillon Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/conf.h> 47 #include <sys/sysent.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mountctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/filedesc.h> 53 #include <sys/kernel.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/linker.h> 57 #include <sys/stat.h> 58 #include <sys/unistd.h> 59 #include <sys/vnode.h> 60 #include <sys/proc.h> 61 #include <sys/namei.h> 62 #include <sys/nlookup.h> 63 #include <sys/dirent.h> 64 #include <sys/extattr.h> 65 #include <sys/kern_syscall.h> 66 67 #include <machine/limits.h> 68 #include <vfs/union/union.h> 69 #include <sys/sysctl.h> 70 #include <vm/vm.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_zone.h> 73 #include <vm/vm_page.h> 74 75 #include <sys/file2.h> 76 77 static int checkvp_chdir (struct vnode *vn, struct thread *td); 78 static void checkdirs (struct vnode *olddp, struct namecache *ncp); 79 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 80 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 81 static int getutimes (const struct timeval *, struct timespec *); 82 static int setfown (struct vnode *, uid_t, gid_t); 83 static int setfmode (struct vnode *, int); 84 static int setfflags (struct vnode *, int); 85 static int setutimes (struct vnode *, const struct timespec *, int); 86 static int usermount = 0; /* if 1, non-root can mount fs. */ 87 88 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 89 90 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 91 92 /* 93 * Virtual File System System Calls 94 */ 95 96 /* 97 * Mount a file system. 98 */ 99 /* 100 * mount_args(char *type, char *path, int flags, caddr_t data) 101 */ 102 /* ARGSUSED */ 103 int 104 mount(struct mount_args *uap) 105 { 106 struct thread *td = curthread; 107 struct proc *p = td->td_proc; 108 struct vnode *vp; 109 struct namecache *ncp; 110 struct mount *mp; 111 struct vfsconf *vfsp; 112 int error, flag = 0, flag2 = 0; 113 struct vattr va; 114 struct nlookupdata nd; 115 char fstypename[MFSNAMELEN]; 116 struct nlcomponent nlc; 117 118 KKASSERT(p); 119 if (p->p_ucred->cr_prison != NULL) 120 return (EPERM); 121 if (usermount == 0 && (error = suser(td))) 122 return (error); 123 /* 124 * Do not allow NFS export by non-root users. 125 */ 126 if (uap->flags & MNT_EXPORTED) { 127 error = suser(td); 128 if (error) 129 return (error); 130 } 131 /* 132 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 133 */ 134 if (suser(td)) 135 uap->flags |= MNT_NOSUID | MNT_NODEV; 136 137 /* 138 * Lookup the requested path and extract the ncp and vnode. 139 */ 140 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 141 if (error == 0) { 142 if ((error = nlookup(&nd)) == 0) { 143 if (nd.nl_ncp->nc_vp == NULL) 144 error = ENOENT; 145 } 146 } 147 if (error) { 148 nlookup_done(&nd); 149 return (error); 150 } 151 152 /* 153 * Extract the locked+refd ncp and cleanup the nd structure 154 */ 155 ncp = nd.nl_ncp; 156 nd.nl_ncp = NULL; 157 nlookup_done(&nd); 158 159 /* 160 * now we have the locked ref'd ncp and unreferenced vnode. 161 */ 162 vp = ncp->nc_vp; 163 if ((error = vget(vp, LK_EXCLUSIVE, td)) != 0) { 164 cache_put(ncp); 165 return (error); 166 } 167 cache_unlock(ncp); 168 169 /* 170 * Now we have an unlocked ref'd ncp and a locked ref'd vp 171 */ 172 if (uap->flags & MNT_UPDATE) { 173 if ((vp->v_flag & VROOT) == 0) { 174 cache_drop(ncp); 175 vput(vp); 176 return (EINVAL); 177 } 178 mp = vp->v_mount; 179 flag = mp->mnt_flag; 180 flag2 = mp->mnt_kern_flag; 181 /* 182 * We only allow the filesystem to be reloaded if it 183 * is currently mounted read-only. 184 */ 185 if ((uap->flags & MNT_RELOAD) && 186 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 187 cache_drop(ncp); 188 vput(vp); 189 return (EOPNOTSUPP); /* Needs translation */ 190 } 191 /* 192 * Only root, or the user that did the original mount is 193 * permitted to update it. 194 */ 195 if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && 196 (error = suser(td))) { 197 cache_drop(ncp); 198 vput(vp); 199 return (error); 200 } 201 if (vfs_busy(mp, LK_NOWAIT, td)) { 202 cache_drop(ncp); 203 vput(vp); 204 return (EBUSY); 205 } 206 if ((vp->v_flag & VMOUNT) != 0 || 207 vp->v_mountedhere != NULL) { 208 cache_drop(ncp); 209 vfs_unbusy(mp, td); 210 vput(vp); 211 return (EBUSY); 212 } 213 vp->v_flag |= VMOUNT; 214 mp->mnt_flag |= 215 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 216 VOP_UNLOCK(vp, 0, td); 217 goto update; 218 } 219 /* 220 * If the user is not root, ensure that they own the directory 221 * onto which we are attempting to mount. 222 */ 223 if ((error = VOP_GETATTR(vp, &va, td)) || 224 (va.va_uid != p->p_ucred->cr_uid && 225 (error = suser(td)))) { 226 cache_drop(ncp); 227 vput(vp); 228 return (error); 229 } 230 if ((error = vinvalbuf(vp, V_SAVE, td, 0, 0)) != 0) { 231 cache_drop(ncp); 232 vput(vp); 233 return (error); 234 } 235 if (vp->v_type != VDIR) { 236 cache_drop(ncp); 237 vput(vp); 238 return (ENOTDIR); 239 } 240 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 241 cache_drop(ncp); 242 vput(vp); 243 return (error); 244 } 245 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 246 if (!strcmp(vfsp->vfc_name, fstypename)) 247 break; 248 } 249 if (vfsp == NULL) { 250 linker_file_t lf; 251 252 /* Only load modules for root (very important!) */ 253 if ((error = suser(td)) != 0) { 254 cache_drop(ncp); 255 vput(vp); 256 return error; 257 } 258 error = linker_load_file(fstypename, &lf); 259 if (error || lf == NULL) { 260 cache_drop(ncp); 261 vput(vp); 262 if (lf == NULL) 263 error = ENODEV; 264 return error; 265 } 266 lf->userrefs++; 267 /* lookup again, see if the VFS was loaded */ 268 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 269 if (!strcmp(vfsp->vfc_name, fstypename)) 270 break; 271 } 272 if (vfsp == NULL) { 273 lf->userrefs--; 274 linker_file_unload(lf); 275 cache_drop(ncp); 276 vput(vp); 277 return (ENODEV); 278 } 279 } 280 if ((vp->v_flag & VMOUNT) != 0 || 281 vp->v_mountedhere != NULL) { 282 cache_drop(ncp); 283 vput(vp); 284 return (EBUSY); 285 } 286 vp->v_flag |= VMOUNT; 287 288 /* 289 * Allocate and initialize the filesystem. 290 */ 291 mp = malloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 292 TAILQ_INIT(&mp->mnt_nvnodelist); 293 TAILQ_INIT(&mp->mnt_reservedvnlist); 294 TAILQ_INIT(&mp->mnt_jlist); 295 mp->mnt_nvnodelistsize = 0; 296 lockinit(&mp->mnt_lock, 0, "vfslock", 0, LK_NOPAUSE); 297 vfs_busy(mp, LK_NOWAIT, td); 298 mp->mnt_op = vfsp->vfc_vfsops; 299 mp->mnt_vfc = vfsp; 300 vfsp->vfc_refcount++; 301 mp->mnt_stat.f_type = vfsp->vfc_typenum; 302 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 303 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 304 mp->mnt_vnodecovered = vp; 305 mp->mnt_stat.f_owner = p->p_ucred->cr_uid; 306 mp->mnt_iosize_max = DFLTPHYS; 307 VOP_UNLOCK(vp, 0, td); 308 update: 309 /* 310 * Set the mount level flags. 311 */ 312 if (uap->flags & MNT_RDONLY) 313 mp->mnt_flag |= MNT_RDONLY; 314 else if (mp->mnt_flag & MNT_RDONLY) 315 mp->mnt_kern_flag |= MNTK_WANTRDWR; 316 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 317 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 318 MNT_NOSYMFOLLOW | MNT_IGNORE | 319 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 320 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 321 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 322 MNT_NOSYMFOLLOW | MNT_IGNORE | 323 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 324 /* 325 * Mount the filesystem. 326 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 327 * get. 328 */ 329 error = VFS_MOUNT(mp, uap->path, uap->data, td); 330 if (mp->mnt_flag & MNT_UPDATE) { 331 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 332 mp->mnt_flag &= ~MNT_RDONLY; 333 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 334 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 335 if (error) { 336 mp->mnt_flag = flag; 337 mp->mnt_kern_flag = flag2; 338 } 339 vfs_unbusy(mp, td); 340 vp->v_flag &= ~VMOUNT; 341 vrele(vp); 342 cache_drop(ncp); 343 return (error); 344 } 345 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 346 /* 347 * Put the new filesystem on the mount list after root. The mount 348 * point gets its own mnt_ncp which is a special ncp linking the 349 * vnode-under to the root of the new mount. The lookup code 350 * detects the mount point going forward and detects the special 351 * mnt_ncp via NCP_MOUNTPT going backwards. 352 * 353 * It is not necessary to invalidate or purge the vnode underneath 354 * because elements under the mount will be given their own glue 355 * namecache record. 356 */ 357 if (!error) { 358 nlc.nlc_nameptr = ""; 359 nlc.nlc_namelen = 0; 360 mp->mnt_ncp = cache_nlookup(ncp, &nlc); 361 cache_setunresolved(mp->mnt_ncp); 362 mp->mnt_ncp->nc_flag |= NCF_MOUNTPT; 363 mp->mnt_ncp->nc_mount = mp; 364 cache_drop(ncp); 365 /* XXX get the root of the fs and cache_setvp(mnt_ncp...) */ 366 vp->v_flag &= ~VMOUNT; 367 vp->v_mountedhere = mp; 368 mountlist_insert(mp, MNTINS_LAST); 369 checkdirs(vp, mp->mnt_ncp); 370 cache_unlock(mp->mnt_ncp); /* leave ref intact */ 371 VOP_UNLOCK(vp, 0, td); 372 error = vfs_allocate_syncvnode(mp); 373 vfs_unbusy(mp, td); 374 if ((error = VFS_START(mp, 0, td)) != 0) 375 vrele(vp); 376 } else { 377 vfs_rm_vnodeops(&mp->mnt_vn_coherency_ops); 378 vfs_rm_vnodeops(&mp->mnt_vn_journal_ops); 379 vfs_rm_vnodeops(&mp->mnt_vn_norm_ops); 380 vfs_rm_vnodeops(&mp->mnt_vn_spec_ops); 381 vfs_rm_vnodeops(&mp->mnt_vn_fifo_ops); 382 vp->v_flag &= ~VMOUNT; 383 mp->mnt_vfc->vfc_refcount--; 384 vfs_unbusy(mp, td); 385 free(mp, M_MOUNT); 386 cache_drop(ncp); 387 vput(vp); 388 } 389 return (error); 390 } 391 392 /* 393 * Scan all active processes to see if any of them have a current 394 * or root directory onto which the new filesystem has just been 395 * mounted. If so, replace them with the new mount point. 396 * 397 * The passed ncp is ref'd and locked (from the mount code) and 398 * must be associated with the vnode representing the root of the 399 * mount point. 400 */ 401 static void 402 checkdirs(struct vnode *olddp, struct namecache *ncp) 403 { 404 struct filedesc *fdp; 405 struct vnode *newdp; 406 struct mount *mp; 407 struct proc *p; 408 409 if (olddp->v_usecount == 1) 410 return; 411 mp = olddp->v_mountedhere; 412 if (VFS_ROOT(mp, &newdp)) 413 panic("mount: lost mount"); 414 cache_setvp(ncp, newdp); 415 416 if (rootvnode == olddp) { 417 vref(newdp); 418 vfs_cache_setroot(newdp, cache_hold(ncp)); 419 } 420 421 FOREACH_PROC_IN_SYSTEM(p) { 422 fdp = p->p_fd; 423 if (fdp->fd_cdir == olddp) { 424 vrele(fdp->fd_cdir); 425 vref(newdp); 426 fdp->fd_cdir = newdp; 427 cache_drop(fdp->fd_ncdir); 428 fdp->fd_ncdir = cache_hold(ncp); 429 } 430 if (fdp->fd_rdir == olddp) { 431 vrele(fdp->fd_rdir); 432 vref(newdp); 433 fdp->fd_rdir = newdp; 434 cache_drop(fdp->fd_nrdir); 435 fdp->fd_nrdir = cache_hold(ncp); 436 } 437 } 438 vput(newdp); 439 } 440 441 /* 442 * Unmount a file system. 443 * 444 * Note: unmount takes a path to the vnode mounted on as argument, 445 * not special file (as before). 446 */ 447 /* 448 * umount_args(char *path, int flags) 449 */ 450 /* ARGSUSED */ 451 int 452 unmount(struct unmount_args *uap) 453 { 454 struct thread *td = curthread; 455 struct proc *p = td->td_proc; 456 struct vnode *vp; 457 struct mount *mp; 458 int error; 459 struct nlookupdata nd; 460 461 KKASSERT(p); 462 if (p->p_ucred->cr_prison != NULL) 463 return (EPERM); 464 if (usermount == 0 && (error = suser(td))) 465 return (error); 466 467 vp = NULL; 468 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 469 if (error == 0) 470 error = nlookup(&nd); 471 if (error == 0) 472 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 473 nlookup_done(&nd); 474 if (error) 475 return (error); 476 477 mp = vp->v_mount; 478 479 /* 480 * Only root, or the user that did the original mount is 481 * permitted to unmount this filesystem. 482 */ 483 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && 484 (error = suser(td))) { 485 vput(vp); 486 return (error); 487 } 488 489 /* 490 * Don't allow unmounting the root file system. 491 */ 492 if (mp->mnt_flag & MNT_ROOTFS) { 493 vput(vp); 494 return (EINVAL); 495 } 496 497 /* 498 * Must be the root of the filesystem 499 */ 500 if ((vp->v_flag & VROOT) == 0) { 501 vput(vp); 502 return (EINVAL); 503 } 504 vput(vp); 505 return (dounmount(mp, uap->flags, td)); 506 } 507 508 /* 509 * Do the actual file system unmount. 510 */ 511 static int 512 dounmount_interlock(struct mount *mp) 513 { 514 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 515 return (EBUSY); 516 mp->mnt_kern_flag |= MNTK_UNMOUNT; 517 return(0); 518 } 519 520 int 521 dounmount(struct mount *mp, int flags, struct thread *td) 522 { 523 struct vnode *coveredvp; 524 int error; 525 int async_flag; 526 527 /* 528 * Exclusive access for unmounting purposes 529 */ 530 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 531 return (error); 532 533 /* 534 * Allow filesystems to detect that a forced unmount is in progress. 535 */ 536 if (flags & MNT_FORCE) 537 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 538 error = lockmgr(&mp->mnt_lock, LK_DRAIN | 539 ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), NULL, td); 540 if (error) { 541 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 542 if (mp->mnt_kern_flag & MNTK_MWAIT) 543 wakeup(mp); 544 return (error); 545 } 546 547 if (mp->mnt_flag & MNT_EXPUBLIC) 548 vfs_setpublicfs(NULL, NULL, NULL); 549 550 vfs_msync(mp, MNT_WAIT); 551 async_flag = mp->mnt_flag & MNT_ASYNC; 552 mp->mnt_flag &=~ MNT_ASYNC; 553 cache_purgevfs(mp); /* remove cache entries for this file sys */ 554 if (mp->mnt_syncer != NULL) 555 vrele(mp->mnt_syncer); 556 if (((mp->mnt_flag & MNT_RDONLY) || 557 (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) || 558 (flags & MNT_FORCE)) 559 error = VFS_UNMOUNT(mp, flags, td); 560 if (error) { 561 if (mp->mnt_syncer == NULL) 562 vfs_allocate_syncvnode(mp); 563 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 564 mp->mnt_flag |= async_flag; 565 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_REENABLE, NULL, td); 566 if (mp->mnt_kern_flag & MNTK_MWAIT) 567 wakeup(mp); 568 return (error); 569 } 570 /* 571 * Clean up any journals still associated with the mount after 572 * filesystem activity has ceased. 573 */ 574 journal_remove_all_journals(mp, 575 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 576 577 mountlist_remove(mp); 578 579 /* 580 * Remove any installed vnode ops here so the individual VFSs don't 581 * have to. 582 */ 583 vfs_rm_vnodeops(&mp->mnt_vn_coherency_ops); 584 vfs_rm_vnodeops(&mp->mnt_vn_journal_ops); 585 vfs_rm_vnodeops(&mp->mnt_vn_norm_ops); 586 vfs_rm_vnodeops(&mp->mnt_vn_spec_ops); 587 vfs_rm_vnodeops(&mp->mnt_vn_fifo_ops); 588 589 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 590 coveredvp->v_mountedhere = NULL; 591 vrele(coveredvp); 592 cache_drop(mp->mnt_ncp); 593 mp->mnt_ncp = NULL; 594 } 595 mp->mnt_vfc->vfc_refcount--; 596 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 597 panic("unmount: dangling vnode"); 598 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td); 599 if (mp->mnt_kern_flag & MNTK_MWAIT) 600 wakeup(mp); 601 free(mp, M_MOUNT); 602 return (0); 603 } 604 605 /* 606 * Sync each mounted filesystem. 607 */ 608 609 #ifdef DEBUG 610 static int syncprt = 0; 611 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 612 #endif /* DEBUG */ 613 614 static int sync_callback(struct mount *mp, void *data); 615 616 /* ARGSUSED */ 617 int 618 sync(struct sync_args *uap) 619 { 620 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 621 #ifdef DEBUG 622 /* 623 * print out buffer pool stat information on each sync() call. 624 */ 625 if (syncprt) 626 vfs_bufstats(); 627 #endif /* DEBUG */ 628 return (0); 629 } 630 631 static 632 int 633 sync_callback(struct mount *mp, void *data __unused) 634 { 635 int asyncflag; 636 637 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 638 asyncflag = mp->mnt_flag & MNT_ASYNC; 639 mp->mnt_flag &= ~MNT_ASYNC; 640 vfs_msync(mp, MNT_NOWAIT); 641 VFS_SYNC(mp, MNT_NOWAIT, curthread); 642 mp->mnt_flag |= asyncflag; 643 } 644 return(0); 645 } 646 647 /* XXX PRISON: could be per prison flag */ 648 static int prison_quotas; 649 #if 0 650 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 651 #endif 652 653 /* 654 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 655 * 656 * Change filesystem quotas. 657 */ 658 /* ARGSUSED */ 659 int 660 quotactl(struct quotactl_args *uap) 661 { 662 struct nlookupdata nd; 663 struct thread *td; 664 struct proc *p; 665 struct mount *mp; 666 int error; 667 668 td = curthread; 669 p = td->td_proc; 670 if (p->p_ucred->cr_prison && !prison_quotas) 671 return (EPERM); 672 673 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 674 if (error == 0) 675 error = nlookup(&nd); 676 if (error == 0) { 677 mp = nd.nl_ncp->nc_mount; 678 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 679 uap->arg, nd.nl_td); 680 } 681 nlookup_done(&nd); 682 return (error); 683 } 684 685 /* 686 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 687 * void *buf, int buflen) 688 * 689 * This function operates on a mount point and executes the specified 690 * operation using the specified control data, and possibly returns data. 691 * 692 * The actual number of bytes stored in the result buffer is returned, 0 693 * if none, otherwise an error is returned. 694 */ 695 /* ARGSUSED */ 696 int 697 mountctl(struct mountctl_args *uap) 698 { 699 struct thread *td = curthread; 700 struct proc *p = td->td_proc; 701 struct filedesc *fdp = p->p_fd; 702 struct file *fp; 703 void *ctl = NULL; 704 void *buf = NULL; 705 char *path = NULL; 706 int error; 707 708 /* 709 * Sanity and permissions checks. We must be root. 710 */ 711 KKASSERT(p); 712 if (p->p_ucred->cr_prison != NULL) 713 return (EPERM); 714 if ((error = suser(td)) != 0) 715 return (error); 716 717 /* 718 * Argument length checks 719 */ 720 if (uap->ctllen < 0 || uap->ctllen > 1024) 721 return (EINVAL); 722 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 723 return (EINVAL); 724 if (uap->path == NULL) 725 return (EINVAL); 726 727 /* 728 * Allocate the necessary buffers and copyin data 729 */ 730 path = zalloc(namei_zone); 731 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 732 if (error) 733 goto done; 734 735 if (uap->ctllen) { 736 ctl = malloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 737 error = copyin(uap->ctl, ctl, uap->ctllen); 738 if (error) 739 goto done; 740 } 741 if (uap->buflen) 742 buf = malloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 743 744 /* 745 * Validate the descriptor 746 */ 747 if (uap->fd == -1) { 748 fp = NULL; 749 } else if ((u_int)uap->fd >= fdp->fd_nfiles || 750 (fp = fdp->fd_files[uap->fd].fp) == NULL) { 751 error = EBADF; 752 goto done; 753 } 754 if (fp) 755 fhold(fp); 756 757 /* 758 * Execute the internal kernel function and clean up. 759 */ 760 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 761 if (fp) 762 fdrop(fp, td); 763 if (error == 0 && uap->sysmsg_result > 0) 764 error = copyout(buf, uap->buf, uap->sysmsg_result); 765 done: 766 if (path) 767 zfree(namei_zone, path); 768 if (ctl) 769 free(ctl, M_TEMP); 770 if (buf) 771 free(buf, M_TEMP); 772 return (error); 773 } 774 775 /* 776 * Execute a mount control operation by resolving the path to a mount point 777 * and calling vop_mountctl(). 778 */ 779 int 780 kern_mountctl(const char *path, int op, struct file *fp, 781 const void *ctl, int ctllen, 782 void *buf, int buflen, int *res) 783 { 784 struct vnode *vp; 785 struct mount *mp; 786 struct nlookupdata nd; 787 int error; 788 789 *res = 0; 790 vp = NULL; 791 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 792 if (error == 0) 793 error = nlookup(&nd); 794 if (error == 0) 795 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 796 nlookup_done(&nd); 797 if (error) 798 return (error); 799 800 mp = vp->v_mount; 801 802 /* 803 * Must be the root of the filesystem 804 */ 805 if ((vp->v_flag & VROOT) == 0) { 806 vput(vp); 807 return (EINVAL); 808 } 809 error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen, 810 buf, buflen, res); 811 vput(vp); 812 return (error); 813 } 814 815 int 816 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 817 { 818 struct thread *td = curthread; 819 struct proc *p = td->td_proc; 820 struct mount *mp; 821 struct statfs *sp; 822 char *fullpath, *freepath; 823 int error; 824 825 if ((error = nlookup(nd)) != 0) 826 return (error); 827 mp = nd->nl_ncp->nc_mount; 828 sp = &mp->mnt_stat; 829 if ((error = VFS_STATFS(mp, sp, td)) != 0) 830 return (error); 831 832 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath); 833 if (error) 834 return(error); 835 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 836 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 837 free(freepath, M_TEMP); 838 839 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 840 bcopy(sp, buf, sizeof(*buf)); 841 /* Only root should have access to the fsid's. */ 842 if (suser(td)) 843 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 844 return (0); 845 } 846 847 /* 848 * statfs_args(char *path, struct statfs *buf) 849 * 850 * Get filesystem statistics. 851 */ 852 int 853 statfs(struct statfs_args *uap) 854 { 855 struct nlookupdata nd; 856 struct statfs buf; 857 int error; 858 859 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 860 if (error == 0) 861 error = kern_statfs(&nd, &buf); 862 nlookup_done(&nd); 863 if (error == 0) 864 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 865 return (error); 866 } 867 868 int 869 kern_fstatfs(int fd, struct statfs *buf) 870 { 871 struct thread *td = curthread; 872 struct proc *p = td->td_proc; 873 struct file *fp; 874 struct mount *mp; 875 struct statfs *sp; 876 char *fullpath, *freepath; 877 int error; 878 879 KKASSERT(p); 880 error = getvnode(p->p_fd, fd, &fp); 881 if (error) 882 return (error); 883 mp = ((struct vnode *)fp->f_data)->v_mount; 884 if (mp == NULL) 885 return (EBADF); 886 sp = &mp->mnt_stat; 887 error = VFS_STATFS(mp, sp, td); 888 if (error) 889 return (error); 890 891 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath); 892 if (error) 893 return(error); 894 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 895 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 896 free(freepath, M_TEMP); 897 898 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 899 bcopy(sp, buf, sizeof(*buf)); 900 901 /* Only root should have access to the fsid's. */ 902 if (suser(td)) 903 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 904 return (0); 905 } 906 907 /* 908 * fstatfs_args(int fd, struct statfs *buf) 909 * 910 * Get filesystem statistics. 911 */ 912 int 913 fstatfs(struct fstatfs_args *uap) 914 { 915 struct statfs buf; 916 int error; 917 918 error = kern_fstatfs(uap->fd, &buf); 919 920 if (error == 0) 921 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 922 return (error); 923 } 924 925 /* 926 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 927 * 928 * Get statistics on all filesystems. 929 */ 930 931 struct getfsstat_info { 932 struct statfs *sfsp; 933 long count; 934 long maxcount; 935 int error; 936 int flags; 937 int is_chrooted; 938 struct thread *td; 939 struct proc *p; 940 }; 941 942 static int getfsstat_callback(struct mount *, void *); 943 944 /* ARGSUSED */ 945 int 946 getfsstat(struct getfsstat_args *uap) 947 { 948 struct thread *td = curthread; 949 struct proc *p = td->td_proc; 950 struct getfsstat_info info; 951 952 bzero(&info, sizeof(info)); 953 if (p != NULL && (p->p_fd->fd_nrdir->nc_flag & NCF_ROOT) == 0) 954 info.is_chrooted = 1; 955 else 956 info.is_chrooted = 0; 957 958 info.maxcount = uap->bufsize / sizeof(struct statfs); 959 info.sfsp = uap->buf; 960 info.count = 0; 961 info.flags = uap->flags; 962 info.td = td; 963 info.p = p; 964 965 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 966 if (info.sfsp && info.count > info.maxcount) 967 uap->sysmsg_result = info.maxcount; 968 else 969 uap->sysmsg_result = info.count; 970 return (info.error); 971 } 972 973 static int 974 getfsstat_callback(struct mount *mp, void *data) 975 { 976 struct getfsstat_info *info = data; 977 struct statfs *sp; 978 char *freepath; 979 char *fullpath; 980 int error; 981 982 if (info->sfsp && info->count < info->maxcount) { 983 if (info->is_chrooted && !chroot_visible_mnt(mp, info->p)) 984 return(0); 985 sp = &mp->mnt_stat; 986 987 /* 988 * If MNT_NOWAIT or MNT_LAZY is specified, do not 989 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 990 * overrides MNT_WAIT. 991 */ 992 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 993 (info->flags & MNT_WAIT)) && 994 (error = VFS_STATFS(mp, sp, info->td))) { 995 return(0); 996 } 997 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 998 999 error = cache_fullpath(info->p, mp->mnt_ncp, 1000 &fullpath, &freepath); 1001 if (error) { 1002 info->error = error; 1003 return(-1); 1004 } 1005 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1006 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1007 free(freepath, M_TEMP); 1008 1009 error = copyout(sp, info->sfsp, sizeof(*sp)); 1010 if (error) { 1011 info->error = error; 1012 return (-1); 1013 } 1014 ++info->sfsp; 1015 } 1016 info->count++; 1017 return(0); 1018 } 1019 1020 /* 1021 * fchdir_args(int fd) 1022 * 1023 * Change current working directory to a given file descriptor. 1024 */ 1025 /* ARGSUSED */ 1026 int 1027 fchdir(struct fchdir_args *uap) 1028 { 1029 struct thread *td = curthread; 1030 struct proc *p = td->td_proc; 1031 struct filedesc *fdp = p->p_fd; 1032 struct vnode *vp, *ovp; 1033 struct mount *mp; 1034 struct file *fp; 1035 struct namecache *ncp, *oncp; 1036 struct namecache *nct; 1037 int error; 1038 1039 if ((error = getvnode(fdp, uap->fd, &fp)) != 0) 1040 return (error); 1041 vp = (struct vnode *)fp->f_data; 1042 vref(vp); 1043 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1044 if (vp->v_type != VDIR || fp->f_ncp == NULL) 1045 error = ENOTDIR; 1046 else 1047 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, td); 1048 if (error) { 1049 vput(vp); 1050 return (error); 1051 } 1052 ncp = cache_hold(fp->f_ncp); 1053 while (!error && (mp = vp->v_mountedhere) != NULL) { 1054 error = nlookup_mp(mp, &nct); 1055 if (error == 0) { 1056 cache_unlock(nct); /* leave ref intact */ 1057 vput(vp); 1058 vp = nct->nc_vp; 1059 error = vget(vp, LK_SHARED, td); 1060 KKASSERT(error == 0); 1061 cache_drop(ncp); 1062 ncp = nct; 1063 } 1064 } 1065 if (error == 0) { 1066 ovp = fdp->fd_cdir; 1067 oncp = fdp->fd_ncdir; 1068 VOP_UNLOCK(vp, 0, td); /* leave ref intact */ 1069 fdp->fd_cdir = vp; 1070 fdp->fd_ncdir = ncp; 1071 cache_drop(oncp); 1072 vrele(ovp); 1073 } else { 1074 cache_drop(ncp); 1075 vput(vp); 1076 } 1077 return (error); 1078 } 1079 1080 int 1081 kern_chdir(struct nlookupdata *nd) 1082 { 1083 struct thread *td = curthread; 1084 struct proc *p = td->td_proc; 1085 struct filedesc *fdp = p->p_fd; 1086 struct vnode *vp, *ovp; 1087 struct namecache *oncp; 1088 int error; 1089 1090 if ((error = nlookup(nd)) != 0) 1091 return (error); 1092 if ((vp = nd->nl_ncp->nc_vp) == NULL) 1093 return (ENOENT); 1094 if ((error = vget(vp, LK_SHARED, td)) != 0) 1095 return (error); 1096 1097 error = checkvp_chdir(vp, td); 1098 VOP_UNLOCK(vp, 0, td); 1099 if (error == 0) { 1100 ovp = fdp->fd_cdir; 1101 oncp = fdp->fd_ncdir; 1102 cache_unlock(nd->nl_ncp); /* leave reference intact */ 1103 fdp->fd_ncdir = nd->nl_ncp; 1104 fdp->fd_cdir = vp; 1105 cache_drop(oncp); 1106 vrele(ovp); 1107 nd->nl_ncp = NULL; 1108 } else { 1109 vrele(vp); 1110 } 1111 return (error); 1112 } 1113 1114 /* 1115 * chdir_args(char *path) 1116 * 1117 * Change current working directory (``.''). 1118 */ 1119 int 1120 chdir(struct chdir_args *uap) 1121 { 1122 struct nlookupdata nd; 1123 int error; 1124 1125 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1126 if (error == 0) 1127 error = kern_chdir(&nd); 1128 nlookup_done(&nd); 1129 return (error); 1130 } 1131 1132 /* 1133 * Helper function for raised chroot(2) security function: Refuse if 1134 * any filedescriptors are open directories. 1135 */ 1136 static int 1137 chroot_refuse_vdir_fds(fdp) 1138 struct filedesc *fdp; 1139 { 1140 struct vnode *vp; 1141 struct file *fp; 1142 int error; 1143 int fd; 1144 1145 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1146 error = getvnode(fdp, fd, &fp); 1147 if (error) 1148 continue; 1149 vp = (struct vnode *)fp->f_data; 1150 if (vp->v_type != VDIR) 1151 continue; 1152 return(EPERM); 1153 } 1154 return (0); 1155 } 1156 1157 /* 1158 * This sysctl determines if we will allow a process to chroot(2) if it 1159 * has a directory open: 1160 * 0: disallowed for all processes. 1161 * 1: allowed for processes that were not already chroot(2)'ed. 1162 * 2: allowed for all processes. 1163 */ 1164 1165 static int chroot_allow_open_directories = 1; 1166 1167 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1168 &chroot_allow_open_directories, 0, ""); 1169 1170 /* 1171 * chroot to the specified namecache entry. We obtain the vp from the 1172 * namecache data. The passed ncp must be locked and referenced and will 1173 * remain locked and referenced on return. 1174 */ 1175 int 1176 kern_chroot(struct namecache *ncp) 1177 { 1178 struct thread *td = curthread; 1179 struct proc *p = td->td_proc; 1180 struct filedesc *fdp = p->p_fd; 1181 struct vnode *vp; 1182 int error; 1183 1184 /* 1185 * Only root can chroot 1186 */ 1187 if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0) 1188 return (error); 1189 1190 /* 1191 * Disallow open directory descriptors (fchdir() breakouts). 1192 */ 1193 if (chroot_allow_open_directories == 0 || 1194 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1195 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1196 return (error); 1197 } 1198 if ((vp = ncp->nc_vp) == NULL) 1199 return (ENOENT); 1200 1201 if ((error = vget(vp, LK_SHARED, td)) != 0) 1202 return (error); 1203 1204 /* 1205 * Check the validity of vp as a directory to change to and 1206 * associate it with rdir/jdir. 1207 */ 1208 error = checkvp_chdir(vp, td); 1209 VOP_UNLOCK(vp, 0, td); /* leave reference intact */ 1210 if (error == 0) { 1211 vrele(fdp->fd_rdir); 1212 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1213 cache_drop(fdp->fd_nrdir); 1214 fdp->fd_nrdir = cache_hold(ncp); 1215 if (fdp->fd_jdir == NULL) { 1216 fdp->fd_jdir = vp; 1217 vref(fdp->fd_jdir); 1218 fdp->fd_njdir = cache_hold(ncp); 1219 } 1220 } else { 1221 vrele(vp); 1222 } 1223 return (error); 1224 } 1225 1226 /* 1227 * chroot_args(char *path) 1228 * 1229 * Change notion of root (``/'') directory. 1230 */ 1231 /* ARGSUSED */ 1232 int 1233 chroot(struct chroot_args *uap) 1234 { 1235 struct thread *td = curthread; 1236 struct nlookupdata nd; 1237 int error; 1238 1239 KKASSERT(td->td_proc); 1240 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1241 if (error) { 1242 nlookup_done(&nd); 1243 return(error); 1244 } 1245 error = nlookup(&nd); 1246 if (error == 0) 1247 error = kern_chroot(nd.nl_ncp); 1248 nlookup_done(&nd); 1249 return(error); 1250 } 1251 1252 /* 1253 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1254 * determine whether it is legal to chdir to the vnode. The vnode's state 1255 * is not changed by this call. 1256 */ 1257 int 1258 checkvp_chdir(struct vnode *vp, struct thread *td) 1259 { 1260 int error; 1261 1262 if (vp->v_type != VDIR) 1263 error = ENOTDIR; 1264 else 1265 error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred, td); 1266 return (error); 1267 } 1268 1269 int 1270 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1271 { 1272 struct thread *td = curthread; 1273 struct proc *p = td->td_proc; 1274 struct filedesc *fdp = p->p_fd; 1275 int cmode, flags; 1276 struct file *nfp; 1277 struct file *fp; 1278 struct vnode *vp; 1279 int type, indx, error; 1280 struct flock lf; 1281 1282 if ((oflags & O_ACCMODE) == O_ACCMODE) 1283 return (EINVAL); 1284 flags = FFLAGS(oflags); 1285 error = falloc(p, &nfp, NULL); 1286 if (error) 1287 return (error); 1288 fp = nfp; 1289 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1290 1291 /* 1292 * XXX p_dupfd is a real mess. It allows a device to return a 1293 * file descriptor to be duplicated rather then doing the open 1294 * itself. 1295 */ 1296 p->p_dupfd = -1; 1297 1298 /* 1299 * Call vn_open() to do the lookup and assign the vnode to the 1300 * file pointer. vn_open() does not change the ref count on fp 1301 * and the vnode, on success, will be inherited by the file pointer 1302 * and unlocked. 1303 */ 1304 nd->nl_flags |= NLC_LOCKVP; 1305 error = vn_open(nd, fp, flags, cmode); 1306 nlookup_done(nd); 1307 if (error) { 1308 /* 1309 * handle special fdopen() case. bleh. dupfdopen() is 1310 * responsible for dropping the old contents of ofiles[indx] 1311 * if it succeeds. 1312 * 1313 * Note that if fsetfd() succeeds it will add a ref to fp 1314 * which represents the fd_files[] assignment. We must still 1315 * drop our reference. 1316 */ 1317 if ((error == ENODEV || error == ENXIO) && p->p_dupfd >= 0) { 1318 if (fsetfd(p, fp, &indx) == 0) { 1319 error = dupfdopen(fdp, indx, p->p_dupfd, flags, error); 1320 if (error == 0) { 1321 *res = indx; 1322 fdrop(fp, td); /* our ref */ 1323 return (0); 1324 } 1325 if (fdp->fd_files[indx].fp == fp) { 1326 funsetfd(fdp, indx); 1327 fdrop(fp, td); /* fd_files[] ref */ 1328 } 1329 } 1330 } 1331 fdrop(fp, td); /* our ref */ 1332 if (error == ERESTART) 1333 error = EINTR; 1334 return (error); 1335 } 1336 1337 /* 1338 * ref the vnode for ourselves so it can't be ripped out from under 1339 * is. XXX need an ND flag to request that the vnode be returned 1340 * anyway. 1341 */ 1342 vp = (struct vnode *)fp->f_data; 1343 vref(vp); 1344 if ((error = fsetfd(p, fp, &indx)) != 0) { 1345 fdrop(fp, td); 1346 vrele(vp); 1347 return (error); 1348 } 1349 1350 /* 1351 * If no error occurs the vp will have been assigned to the file 1352 * pointer. 1353 */ 1354 p->p_dupfd = 0; 1355 1356 /* 1357 * There should be 2 references on the file, one from the descriptor 1358 * table, and one for us. 1359 * 1360 * Handle the case where someone closed the file (via its file 1361 * descriptor) while we were blocked. The end result should look 1362 * like opening the file succeeded but it was immediately closed. 1363 */ 1364 if (fp->f_count == 1) { 1365 KASSERT(fdp->fd_files[indx].fp != fp, 1366 ("Open file descriptor lost all refs")); 1367 vrele(vp); 1368 fo_close(fp, td); 1369 fdrop(fp, td); 1370 *res = indx; 1371 return 0; 1372 } 1373 1374 if (flags & (O_EXLOCK | O_SHLOCK)) { 1375 lf.l_whence = SEEK_SET; 1376 lf.l_start = 0; 1377 lf.l_len = 0; 1378 if (flags & O_EXLOCK) 1379 lf.l_type = F_WRLCK; 1380 else 1381 lf.l_type = F_RDLCK; 1382 type = F_FLOCK; 1383 if ((flags & FNONBLOCK) == 0) 1384 type |= F_WAIT; 1385 1386 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1387 /* 1388 * lock request failed. Normally close the descriptor 1389 * but handle the case where someone might have dup()d 1390 * it when we weren't looking. One reference is 1391 * owned by the descriptor array, the other by us. 1392 */ 1393 vrele(vp); 1394 if (fdp->fd_files[indx].fp == fp) { 1395 funsetfd(fdp, indx); 1396 fdrop(fp, td); 1397 } 1398 fdrop(fp, td); 1399 return (error); 1400 } 1401 fp->f_flag |= FHASLOCK; 1402 } 1403 /* assert that vn_open created a backing object if one is needed */ 1404 KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0, 1405 ("open: vmio vnode has no backing object after vn_open")); 1406 1407 vrele(vp); 1408 1409 /* 1410 * release our private reference, leaving the one associated with the 1411 * descriptor table intact. 1412 */ 1413 fdrop(fp, td); 1414 *res = indx; 1415 return (0); 1416 } 1417 1418 /* 1419 * open_args(char *path, int flags, int mode) 1420 * 1421 * Check permissions, allocate an open file structure, 1422 * and call the device open routine if any. 1423 */ 1424 int 1425 open(struct open_args *uap) 1426 { 1427 struct nlookupdata nd; 1428 int error; 1429 1430 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1431 if (error == 0) { 1432 error = kern_open(&nd, uap->flags, 1433 uap->mode, &uap->sysmsg_result); 1434 } 1435 nlookup_done(&nd); 1436 return (error); 1437 } 1438 1439 int 1440 kern_mknod(struct nlookupdata *nd, int mode, int dev) 1441 { 1442 struct namecache *ncp; 1443 struct thread *td = curthread; 1444 struct proc *p = td->td_proc; 1445 struct vnode *vp; 1446 struct vattr vattr; 1447 int error; 1448 int whiteout = 0; 1449 1450 KKASSERT(p); 1451 1452 switch (mode & S_IFMT) { 1453 case S_IFCHR: 1454 case S_IFBLK: 1455 error = suser(td); 1456 break; 1457 default: 1458 error = suser_cred(p->p_ucred, PRISON_ROOT); 1459 break; 1460 } 1461 if (error) 1462 return (error); 1463 1464 bwillwrite(); 1465 nd->nl_flags |= NLC_CREATE; 1466 if ((error = nlookup(nd)) != 0) 1467 return (error); 1468 ncp = nd->nl_ncp; 1469 if (ncp->nc_vp) 1470 return (EEXIST); 1471 1472 VATTR_NULL(&vattr); 1473 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1474 vattr.va_rdev = dev; 1475 whiteout = 0; 1476 1477 switch (mode & S_IFMT) { 1478 case S_IFMT: /* used by badsect to flag bad sectors */ 1479 vattr.va_type = VBAD; 1480 break; 1481 case S_IFCHR: 1482 vattr.va_type = VCHR; 1483 break; 1484 case S_IFBLK: 1485 vattr.va_type = VBLK; 1486 break; 1487 case S_IFWHT: 1488 whiteout = 1; 1489 break; 1490 default: 1491 error = EINVAL; 1492 break; 1493 } 1494 if (error == 0) { 1495 if (whiteout) { 1496 error = VOP_NWHITEOUT(ncp, nd->nl_cred, NAMEI_CREATE); 1497 } else { 1498 vp = NULL; 1499 error = VOP_NMKNOD(ncp, &vp, nd->nl_cred, &vattr); 1500 if (error == 0) 1501 vput(vp); 1502 } 1503 } 1504 return (error); 1505 } 1506 1507 /* 1508 * mknod_args(char *path, int mode, int dev) 1509 * 1510 * Create a special file. 1511 */ 1512 int 1513 mknod(struct mknod_args *uap) 1514 { 1515 struct nlookupdata nd; 1516 int error; 1517 1518 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1519 if (error == 0) 1520 error = kern_mknod(&nd, uap->mode, uap->dev); 1521 nlookup_done(&nd); 1522 return (error); 1523 } 1524 1525 int 1526 kern_mkfifo(struct nlookupdata *nd, int mode) 1527 { 1528 struct namecache *ncp; 1529 struct thread *td = curthread; 1530 struct proc *p = td->td_proc; 1531 struct vattr vattr; 1532 struct vnode *vp; 1533 int error; 1534 1535 bwillwrite(); 1536 1537 nd->nl_flags |= NLC_CREATE; 1538 if ((error = nlookup(nd)) != 0) 1539 return (error); 1540 ncp = nd->nl_ncp; 1541 if (ncp->nc_vp) 1542 return (EEXIST); 1543 1544 VATTR_NULL(&vattr); 1545 vattr.va_type = VFIFO; 1546 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1547 vp = NULL; 1548 error = VOP_NMKNOD(ncp, &vp, nd->nl_cred, &vattr); 1549 if (error == 0) 1550 vput(vp); 1551 return (error); 1552 } 1553 1554 /* 1555 * mkfifo_args(char *path, int mode) 1556 * 1557 * Create a named pipe. 1558 */ 1559 int 1560 mkfifo(struct mkfifo_args *uap) 1561 { 1562 struct nlookupdata nd; 1563 int error; 1564 1565 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1566 if (error == 0) 1567 error = kern_mkfifo(&nd, uap->mode); 1568 nlookup_done(&nd); 1569 return (error); 1570 } 1571 1572 int 1573 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 1574 { 1575 struct thread *td = curthread; 1576 struct vnode *vp; 1577 int error; 1578 1579 /* 1580 * Lookup the source and obtained a locked vnode. 1581 * 1582 * XXX relookup on vget failure / race ? 1583 */ 1584 bwillwrite(); 1585 if ((error = nlookup(nd)) != 0) 1586 return (error); 1587 vp = nd->nl_ncp->nc_vp; 1588 KKASSERT(vp != NULL); 1589 if (vp->v_type == VDIR) 1590 return (EPERM); /* POSIX */ 1591 if ((error = vget(vp, LK_EXCLUSIVE, td)) != 0) 1592 return (error); 1593 1594 /* 1595 * Unlock the source so we can lookup the target without deadlocking 1596 * (XXX vp is locked already, possible other deadlock?). The target 1597 * must not exist. 1598 */ 1599 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 1600 nd->nl_flags &= ~NLC_NCPISLOCKED; 1601 cache_unlock(nd->nl_ncp); 1602 1603 linknd->nl_flags |= NLC_CREATE; 1604 if ((error = nlookup(linknd)) != 0) { 1605 vput(vp); 1606 return (error); 1607 } 1608 if (linknd->nl_ncp->nc_vp) { 1609 vput(vp); 1610 return (EEXIST); 1611 } 1612 1613 /* 1614 * Finally run the new API VOP. 1615 */ 1616 error = VOP_NLINK(linknd->nl_ncp, vp, linknd->nl_cred); 1617 vput(vp); 1618 return (error); 1619 } 1620 1621 /* 1622 * link_args(char *path, char *link) 1623 * 1624 * Make a hard file link. 1625 */ 1626 int 1627 link(struct link_args *uap) 1628 { 1629 struct nlookupdata nd, linknd; 1630 int error; 1631 1632 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1633 if (error == 0) { 1634 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 1635 if (error == 0) 1636 error = kern_link(&nd, &linknd); 1637 nlookup_done(&linknd); 1638 } 1639 nlookup_done(&nd); 1640 return (error); 1641 } 1642 1643 int 1644 kern_symlink(struct nlookupdata *nd, char *path, int mode) 1645 { 1646 struct namecache *ncp; 1647 struct vattr vattr; 1648 struct vnode *vp; 1649 int error; 1650 1651 bwillwrite(); 1652 nd->nl_flags |= NLC_CREATE; 1653 if ((error = nlookup(nd)) != 0) 1654 return (error); 1655 ncp = nd->nl_ncp; 1656 if (ncp->nc_vp) 1657 return (EEXIST); 1658 1659 VATTR_NULL(&vattr); 1660 vattr.va_mode = mode; 1661 error = VOP_NSYMLINK(ncp, &vp, nd->nl_cred, &vattr, path); 1662 if (error == 0) 1663 vput(vp); 1664 return (error); 1665 } 1666 1667 /* 1668 * symlink(char *path, char *link) 1669 * 1670 * Make a symbolic link. 1671 */ 1672 int 1673 symlink(struct symlink_args *uap) 1674 { 1675 struct thread *td = curthread; 1676 struct nlookupdata nd; 1677 char *path; 1678 int error; 1679 int mode; 1680 1681 path = zalloc(namei_zone); 1682 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1683 if (error == 0) { 1684 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 1685 if (error == 0) { 1686 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 1687 error = kern_symlink(&nd, path, mode); 1688 } 1689 nlookup_done(&nd); 1690 } 1691 zfree(namei_zone, path); 1692 return (error); 1693 } 1694 1695 /* 1696 * undelete_args(char *path) 1697 * 1698 * Delete a whiteout from the filesystem. 1699 */ 1700 /* ARGSUSED */ 1701 int 1702 undelete(struct undelete_args *uap) 1703 { 1704 struct nlookupdata nd; 1705 int error; 1706 1707 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1708 bwillwrite(); 1709 nd.nl_flags |= NLC_DELETE; 1710 if (error == 0) 1711 error = nlookup(&nd); 1712 if (error == 0) 1713 error = VOP_NWHITEOUT(nd.nl_ncp, nd.nl_cred, NAMEI_DELETE); 1714 nlookup_done(&nd); 1715 return (error); 1716 } 1717 1718 int 1719 kern_unlink(struct nlookupdata *nd) 1720 { 1721 struct namecache *ncp; 1722 int error; 1723 1724 bwillwrite(); 1725 nd->nl_flags |= NLC_DELETE; 1726 if ((error = nlookup(nd)) != 0) 1727 return (error); 1728 ncp = nd->nl_ncp; 1729 error = VOP_NREMOVE(ncp, nd->nl_cred); 1730 return (error); 1731 } 1732 1733 /* 1734 * unlink_args(char *path) 1735 * 1736 * Delete a name from the filesystem. 1737 */ 1738 int 1739 unlink(struct unlink_args *uap) 1740 { 1741 struct nlookupdata nd; 1742 int error; 1743 1744 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1745 if (error == 0) 1746 error = kern_unlink(&nd); 1747 nlookup_done(&nd); 1748 return (error); 1749 } 1750 1751 int 1752 kern_lseek(int fd, off_t offset, int whence, off_t *res) 1753 { 1754 struct thread *td = curthread; 1755 struct proc *p = td->td_proc; 1756 struct filedesc *fdp = p->p_fd; 1757 struct file *fp; 1758 struct vattr vattr; 1759 int error; 1760 1761 if ((u_int)fd >= fdp->fd_nfiles || 1762 (fp = fdp->fd_files[fd].fp) == NULL) 1763 return (EBADF); 1764 if (fp->f_type != DTYPE_VNODE) 1765 return (ESPIPE); 1766 switch (whence) { 1767 case L_INCR: 1768 fp->f_offset += offset; 1769 break; 1770 case L_XTND: 1771 error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, td); 1772 if (error) 1773 return (error); 1774 fp->f_offset = offset + vattr.va_size; 1775 break; 1776 case L_SET: 1777 fp->f_offset = offset; 1778 break; 1779 default: 1780 return (EINVAL); 1781 } 1782 *res = fp->f_offset; 1783 return (0); 1784 } 1785 1786 /* 1787 * lseek_args(int fd, int pad, off_t offset, int whence) 1788 * 1789 * Reposition read/write file offset. 1790 */ 1791 int 1792 lseek(struct lseek_args *uap) 1793 { 1794 int error; 1795 1796 error = kern_lseek(uap->fd, uap->offset, uap->whence, 1797 &uap->sysmsg_offset); 1798 1799 return (error); 1800 } 1801 1802 int 1803 kern_access(struct nlookupdata *nd, int aflags) 1804 { 1805 struct thread *td = curthread; 1806 struct vnode *vp; 1807 int error, flags; 1808 1809 if ((error = nlookup(nd)) != 0) 1810 return (error); 1811 retry: 1812 error = cache_vget(nd->nl_ncp, nd->nl_cred, LK_EXCLUSIVE, &vp); 1813 if (error) 1814 return (error); 1815 1816 /* Flags == 0 means only check for existence. */ 1817 if (aflags) { 1818 flags = 0; 1819 if (aflags & R_OK) 1820 flags |= VREAD; 1821 if (aflags & W_OK) 1822 flags |= VWRITE; 1823 if (aflags & X_OK) 1824 flags |= VEXEC; 1825 if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1826 error = VOP_ACCESS(vp, flags, nd->nl_cred, td); 1827 1828 /* 1829 * If the file handle is stale we have to re-resolve the 1830 * entry. This is a hack at the moment. 1831 */ 1832 if (error == ESTALE) { 1833 cache_setunresolved(nd->nl_ncp); 1834 error = cache_resolve(nd->nl_ncp, nd->nl_cred); 1835 if (error == 0) { 1836 vput(vp); 1837 vp = NULL; 1838 goto retry; 1839 } 1840 } 1841 } 1842 vput(vp); 1843 return (error); 1844 } 1845 1846 /* 1847 * access_args(char *path, int flags) 1848 * 1849 * Check access permissions. 1850 */ 1851 int 1852 access(struct access_args *uap) 1853 { 1854 struct nlookupdata nd; 1855 int error; 1856 1857 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1858 if (error == 0) 1859 error = kern_access(&nd, uap->flags); 1860 nlookup_done(&nd); 1861 return (error); 1862 } 1863 1864 int 1865 kern_stat(struct nlookupdata *nd, struct stat *st) 1866 { 1867 int error; 1868 struct vnode *vp; 1869 thread_t td; 1870 1871 if ((error = nlookup(nd)) != 0) 1872 return (error); 1873 again: 1874 if ((vp = nd->nl_ncp->nc_vp) == NULL) 1875 return (ENOENT); 1876 1877 td = curthread; 1878 if ((error = vget(vp, LK_SHARED, td)) != 0) 1879 return (error); 1880 error = vn_stat(vp, st, td); 1881 1882 /* 1883 * If the file handle is stale we have to re-resolve the entry. This 1884 * is a hack at the moment. 1885 */ 1886 if (error == ESTALE) { 1887 cache_setunresolved(nd->nl_ncp); 1888 error = cache_resolve(nd->nl_ncp, nd->nl_cred); 1889 if (error == 0) { 1890 vput(vp); 1891 goto again; 1892 } 1893 } 1894 vput(vp); 1895 return (error); 1896 } 1897 1898 /* 1899 * stat_args(char *path, struct stat *ub) 1900 * 1901 * Get file status; this version follows links. 1902 */ 1903 int 1904 stat(struct stat_args *uap) 1905 { 1906 struct nlookupdata nd; 1907 struct stat st; 1908 int error; 1909 1910 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1911 if (error == 0) { 1912 error = kern_stat(&nd, &st); 1913 if (error == 0) 1914 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 1915 } 1916 nlookup_done(&nd); 1917 return (error); 1918 } 1919 1920 /* 1921 * lstat_args(char *path, struct stat *ub) 1922 * 1923 * Get file status; this version does not follow links. 1924 */ 1925 int 1926 lstat(struct lstat_args *uap) 1927 { 1928 struct nlookupdata nd; 1929 struct stat st; 1930 int error; 1931 1932 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1933 if (error == 0) { 1934 error = kern_stat(&nd, &st); 1935 if (error == 0) 1936 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 1937 } 1938 nlookup_done(&nd); 1939 return (error); 1940 } 1941 1942 /* 1943 * pathconf_Args(char *path, int name) 1944 * 1945 * Get configurable pathname variables. 1946 */ 1947 /* ARGSUSED */ 1948 int 1949 pathconf(struct pathconf_args *uap) 1950 { 1951 struct nlookupdata nd; 1952 struct vnode *vp; 1953 int error; 1954 1955 vp = NULL; 1956 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1957 if (error == 0) 1958 error = nlookup(&nd); 1959 if (error == 0) 1960 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 1961 nlookup_done(&nd); 1962 if (error == 0) { 1963 error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds); 1964 vput(vp); 1965 } 1966 return (error); 1967 } 1968 1969 /* 1970 * XXX: daver 1971 * kern_readlink isn't properly split yet. There is a copyin burried 1972 * in VOP_READLINK(). 1973 */ 1974 int 1975 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 1976 { 1977 struct thread *td = curthread; 1978 struct proc *p = td->td_proc; 1979 struct vnode *vp; 1980 struct iovec aiov; 1981 struct uio auio; 1982 int error; 1983 1984 if ((error = nlookup(nd)) != 0) 1985 return (error); 1986 error = cache_vget(nd->nl_ncp, nd->nl_cred, LK_EXCLUSIVE, &vp); 1987 if (error) 1988 return (error); 1989 if (vp->v_type != VLNK) { 1990 error = EINVAL; 1991 } else { 1992 aiov.iov_base = buf; 1993 aiov.iov_len = count; 1994 auio.uio_iov = &aiov; 1995 auio.uio_iovcnt = 1; 1996 auio.uio_offset = 0; 1997 auio.uio_rw = UIO_READ; 1998 auio.uio_segflg = UIO_USERSPACE; 1999 auio.uio_td = td; 2000 auio.uio_resid = count; 2001 error = VOP_READLINK(vp, &auio, p->p_ucred); 2002 } 2003 vput(vp); 2004 *res = count - auio.uio_resid; 2005 return (error); 2006 } 2007 2008 /* 2009 * readlink_args(char *path, char *buf, int count) 2010 * 2011 * Return target name of a symbolic link. 2012 */ 2013 int 2014 readlink(struct readlink_args *uap) 2015 { 2016 struct nlookupdata nd; 2017 int error; 2018 2019 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2020 if (error == 0) { 2021 error = kern_readlink(&nd, uap->buf, uap->count, 2022 &uap->sysmsg_result); 2023 } 2024 nlookup_done(&nd); 2025 return (error); 2026 } 2027 2028 static int 2029 setfflags(struct vnode *vp, int flags) 2030 { 2031 struct thread *td = curthread; 2032 struct proc *p = td->td_proc; 2033 int error; 2034 struct vattr vattr; 2035 2036 /* 2037 * Prevent non-root users from setting flags on devices. When 2038 * a device is reused, users can retain ownership of the device 2039 * if they are allowed to set flags and programs assume that 2040 * chown can't fail when done as root. 2041 */ 2042 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2043 ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)) 2044 return (error); 2045 2046 /* 2047 * note: vget is required for any operation that might mod the vnode 2048 * so VINACTIVE is properly cleared. 2049 */ 2050 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2051 if ((error = vget(vp, LK_EXCLUSIVE, td)) == 0) { 2052 VATTR_NULL(&vattr); 2053 vattr.va_flags = flags; 2054 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2055 vput(vp); 2056 } 2057 return (error); 2058 } 2059 2060 /* 2061 * chflags(char *path, int flags) 2062 * 2063 * Change flags of a file given a path name. 2064 */ 2065 /* ARGSUSED */ 2066 int 2067 chflags(struct chflags_args *uap) 2068 { 2069 struct nlookupdata nd; 2070 struct vnode *vp; 2071 int error; 2072 2073 vp = NULL; 2074 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2075 /* XXX Add NLC flag indicating modifying operation? */ 2076 if (error == 0) 2077 error = nlookup(&nd); 2078 if (error == 0) 2079 error = cache_vref(nd.nl_ncp, nd.nl_cred, &vp); 2080 nlookup_done(&nd); 2081 if (error == 0) { 2082 error = setfflags(vp, uap->flags); 2083 vrele(vp); 2084 } 2085 return (error); 2086 } 2087 2088 /* 2089 * fchflags_args(int fd, int flags) 2090 * 2091 * Change flags of a file given a file descriptor. 2092 */ 2093 /* ARGSUSED */ 2094 int 2095 fchflags(struct fchflags_args *uap) 2096 { 2097 struct thread *td = curthread; 2098 struct proc *p = td->td_proc; 2099 struct file *fp; 2100 int error; 2101 2102 if ((error = getvnode(p->p_fd, uap->fd, &fp)) != 0) 2103 return (error); 2104 return setfflags((struct vnode *) fp->f_data, uap->flags); 2105 } 2106 2107 static int 2108 setfmode(struct vnode *vp, int mode) 2109 { 2110 struct thread *td = curthread; 2111 struct proc *p = td->td_proc; 2112 int error; 2113 struct vattr vattr; 2114 2115 /* 2116 * note: vget is required for any operation that might mod the vnode 2117 * so VINACTIVE is properly cleared. 2118 */ 2119 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2120 if ((error = vget(vp, LK_EXCLUSIVE, td)) == 0) { 2121 VATTR_NULL(&vattr); 2122 vattr.va_mode = mode & ALLPERMS; 2123 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2124 vput(vp); 2125 } 2126 return error; 2127 } 2128 2129 int 2130 kern_chmod(struct nlookupdata *nd, int mode) 2131 { 2132 struct vnode *vp; 2133 int error; 2134 2135 /* XXX Add NLC flag indicating modifying operation? */ 2136 if ((error = nlookup(nd)) != 0) 2137 return (error); 2138 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2139 return (error); 2140 error = setfmode(vp, mode); 2141 vrele(vp); 2142 return (error); 2143 } 2144 2145 /* 2146 * chmod_args(char *path, int mode) 2147 * 2148 * Change mode of a file given path name. 2149 */ 2150 /* ARGSUSED */ 2151 int 2152 chmod(struct chmod_args *uap) 2153 { 2154 struct nlookupdata nd; 2155 int error; 2156 2157 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2158 if (error == 0) 2159 error = kern_chmod(&nd, uap->mode); 2160 nlookup_done(&nd); 2161 return (error); 2162 } 2163 2164 /* 2165 * lchmod_args(char *path, int mode) 2166 * 2167 * Change mode of a file given path name (don't follow links.) 2168 */ 2169 /* ARGSUSED */ 2170 int 2171 lchmod(struct lchmod_args *uap) 2172 { 2173 struct nlookupdata nd; 2174 int error; 2175 2176 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2177 if (error == 0) 2178 error = kern_chmod(&nd, uap->mode); 2179 nlookup_done(&nd); 2180 return (error); 2181 } 2182 2183 /* 2184 * fchmod_args(int fd, int mode) 2185 * 2186 * Change mode of a file given a file descriptor. 2187 */ 2188 /* ARGSUSED */ 2189 int 2190 fchmod(struct fchmod_args *uap) 2191 { 2192 struct thread *td = curthread; 2193 struct proc *p = td->td_proc; 2194 struct file *fp; 2195 int error; 2196 2197 if ((error = getvnode(p->p_fd, uap->fd, &fp)) != 0) 2198 return (error); 2199 return setfmode((struct vnode *)fp->f_data, uap->mode); 2200 } 2201 2202 static int 2203 setfown(struct vnode *vp, uid_t uid, gid_t gid) 2204 { 2205 struct thread *td = curthread; 2206 struct proc *p = td->td_proc; 2207 int error; 2208 struct vattr vattr; 2209 2210 /* 2211 * note: vget is required for any operation that might mod the vnode 2212 * so VINACTIVE is properly cleared. 2213 */ 2214 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2215 if ((error = vget(vp, LK_EXCLUSIVE, td)) == 0) { 2216 VATTR_NULL(&vattr); 2217 vattr.va_uid = uid; 2218 vattr.va_gid = gid; 2219 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2220 vput(vp); 2221 } 2222 return error; 2223 } 2224 2225 int 2226 kern_chown(struct nlookupdata *nd, int uid, int gid) 2227 { 2228 struct vnode *vp; 2229 int error; 2230 2231 /* XXX Add NLC flag indicating modifying operation? */ 2232 if ((error = nlookup(nd)) != 0) 2233 return (error); 2234 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2235 return (error); 2236 error = setfown(vp, uid, gid); 2237 vrele(vp); 2238 return (error); 2239 } 2240 2241 /* 2242 * chown(char *path, int uid, int gid) 2243 * 2244 * Set ownership given a path name. 2245 */ 2246 int 2247 chown(struct chown_args *uap) 2248 { 2249 struct nlookupdata nd; 2250 int error; 2251 2252 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2253 if (error == 0) 2254 error = kern_chown(&nd, uap->uid, uap->gid); 2255 nlookup_done(&nd); 2256 return (error); 2257 } 2258 2259 /* 2260 * lchown_args(char *path, int uid, int gid) 2261 * 2262 * Set ownership given a path name, do not cross symlinks. 2263 */ 2264 int 2265 lchown(struct lchown_args *uap) 2266 { 2267 struct nlookupdata nd; 2268 int error; 2269 2270 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2271 if (error == 0) 2272 error = kern_chown(&nd, uap->uid, uap->gid); 2273 nlookup_done(&nd); 2274 return (error); 2275 } 2276 2277 /* 2278 * fchown_args(int fd, int uid, int gid) 2279 * 2280 * Set ownership given a file descriptor. 2281 */ 2282 /* ARGSUSED */ 2283 int 2284 fchown(struct fchown_args *uap) 2285 { 2286 struct thread *td = curthread; 2287 struct proc *p = td->td_proc; 2288 struct file *fp; 2289 int error; 2290 2291 if ((error = getvnode(p->p_fd, uap->fd, &fp)) != 0) 2292 return (error); 2293 return setfown((struct vnode *)fp->f_data, 2294 uap->uid, uap->gid); 2295 } 2296 2297 static int 2298 getutimes(const struct timeval *tvp, struct timespec *tsp) 2299 { 2300 struct timeval tv[2]; 2301 2302 if (tvp == NULL) { 2303 microtime(&tv[0]); 2304 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 2305 tsp[1] = tsp[0]; 2306 } else { 2307 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2308 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2309 } 2310 return 0; 2311 } 2312 2313 static int 2314 setutimes(struct vnode *vp, const struct timespec *ts, int nullflag) 2315 { 2316 struct thread *td = curthread; 2317 struct proc *p = td->td_proc; 2318 int error; 2319 struct vattr vattr; 2320 2321 /* 2322 * note: vget is required for any operation that might mod the vnode 2323 * so VINACTIVE is properly cleared. 2324 */ 2325 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2326 if ((error = vget(vp, LK_EXCLUSIVE, td)) == 0) { 2327 VATTR_NULL(&vattr); 2328 vattr.va_atime = ts[0]; 2329 vattr.va_mtime = ts[1]; 2330 if (nullflag) 2331 vattr.va_vaflags |= VA_UTIMES_NULL; 2332 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2333 vput(vp); 2334 } 2335 return error; 2336 } 2337 2338 int 2339 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 2340 { 2341 struct timespec ts[2]; 2342 struct vnode *vp; 2343 int error; 2344 2345 if ((error = getutimes(tptr, ts)) != 0) 2346 return (error); 2347 /* XXX Add NLC flag indicating modifying operation? */ 2348 if ((error = nlookup(nd)) != 0) 2349 return (error); 2350 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2351 return (error); 2352 error = setutimes(vp, ts, tptr == NULL); 2353 vrele(vp); 2354 return (error); 2355 } 2356 2357 /* 2358 * utimes_args(char *path, struct timeval *tptr) 2359 * 2360 * Set the access and modification times of a file. 2361 */ 2362 int 2363 utimes(struct utimes_args *uap) 2364 { 2365 struct timeval tv[2]; 2366 struct nlookupdata nd; 2367 int error; 2368 2369 if (uap->tptr) { 2370 error = copyin(uap->tptr, tv, sizeof(tv)); 2371 if (error) 2372 return (error); 2373 } 2374 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2375 if (error == 0) 2376 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2377 nlookup_done(&nd); 2378 return (error); 2379 } 2380 2381 /* 2382 * lutimes_args(char *path, struct timeval *tptr) 2383 * 2384 * Set the access and modification times of a file. 2385 */ 2386 int 2387 lutimes(struct lutimes_args *uap) 2388 { 2389 struct timeval tv[2]; 2390 struct nlookupdata nd; 2391 int error; 2392 2393 if (uap->tptr) { 2394 error = copyin(uap->tptr, tv, sizeof(tv)); 2395 if (error) 2396 return (error); 2397 } 2398 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2399 if (error == 0) 2400 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2401 nlookup_done(&nd); 2402 return (error); 2403 } 2404 2405 int 2406 kern_futimes(int fd, struct timeval *tptr) 2407 { 2408 struct thread *td = curthread; 2409 struct proc *p = td->td_proc; 2410 struct timespec ts[2]; 2411 struct file *fp; 2412 int error; 2413 2414 error = getutimes(tptr, ts); 2415 if (error) 2416 return (error); 2417 error = getvnode(p->p_fd, fd, &fp); 2418 if (error) 2419 return (error); 2420 error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL); 2421 return (error); 2422 } 2423 2424 /* 2425 * futimes_args(int fd, struct timeval *tptr) 2426 * 2427 * Set the access and modification times of a file. 2428 */ 2429 int 2430 futimes(struct futimes_args *uap) 2431 { 2432 struct timeval tv[2]; 2433 int error; 2434 2435 if (uap->tptr) { 2436 error = copyin(uap->tptr, tv, sizeof(tv)); 2437 if (error) 2438 return (error); 2439 } 2440 2441 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 2442 2443 return (error); 2444 } 2445 2446 int 2447 kern_truncate(struct nlookupdata *nd, off_t length) 2448 { 2449 struct vnode *vp; 2450 struct vattr vattr; 2451 int error; 2452 2453 if (length < 0) 2454 return(EINVAL); 2455 /* XXX Add NLC flag indicating modifying operation? */ 2456 if ((error = nlookup(nd)) != 0) 2457 return (error); 2458 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2459 return (error); 2460 VOP_LEASE(vp, nd->nl_td, nd->nl_cred, LEASE_WRITE); 2461 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, nd->nl_td)) != 0) { 2462 vrele(vp); 2463 return (error); 2464 } 2465 if (vp->v_type == VDIR) { 2466 error = EISDIR; 2467 } else if ((error = vn_writechk(vp)) == 0 && 2468 (error = VOP_ACCESS(vp, VWRITE, nd->nl_cred, nd->nl_td)) == 0) { 2469 VATTR_NULL(&vattr); 2470 vattr.va_size = length; 2471 error = VOP_SETATTR(vp, &vattr, nd->nl_cred, nd->nl_td); 2472 } 2473 vput(vp); 2474 return (error); 2475 } 2476 2477 /* 2478 * truncate(char *path, int pad, off_t length) 2479 * 2480 * Truncate a file given its path name. 2481 */ 2482 int 2483 truncate(struct truncate_args *uap) 2484 { 2485 struct nlookupdata nd; 2486 int error; 2487 2488 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2489 if (error == 0) 2490 error = kern_truncate(&nd, uap->length); 2491 nlookup_done(&nd); 2492 return error; 2493 } 2494 2495 int 2496 kern_ftruncate(int fd, off_t length) 2497 { 2498 struct thread *td = curthread; 2499 struct proc *p = td->td_proc; 2500 struct vattr vattr; 2501 struct vnode *vp; 2502 struct file *fp; 2503 int error; 2504 2505 if (length < 0) 2506 return(EINVAL); 2507 if ((error = getvnode(p->p_fd, fd, &fp)) != 0) 2508 return (error); 2509 if ((fp->f_flag & FWRITE) == 0) 2510 return (EINVAL); 2511 vp = (struct vnode *)fp->f_data; 2512 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2513 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 2514 if (vp->v_type == VDIR) 2515 error = EISDIR; 2516 else if ((error = vn_writechk(vp)) == 0) { 2517 VATTR_NULL(&vattr); 2518 vattr.va_size = length; 2519 error = VOP_SETATTR(vp, &vattr, fp->f_cred, td); 2520 } 2521 VOP_UNLOCK(vp, 0, td); 2522 return (error); 2523 } 2524 2525 /* 2526 * ftruncate_args(int fd, int pad, off_t length) 2527 * 2528 * Truncate a file given a file descriptor. 2529 */ 2530 int 2531 ftruncate(struct ftruncate_args *uap) 2532 { 2533 int error; 2534 2535 error = kern_ftruncate(uap->fd, uap->length); 2536 2537 return (error); 2538 } 2539 2540 /* 2541 * fsync(int fd) 2542 * 2543 * Sync an open file. 2544 */ 2545 /* ARGSUSED */ 2546 int 2547 fsync(struct fsync_args *uap) 2548 { 2549 struct thread *td = curthread; 2550 struct proc *p = td->td_proc; 2551 struct vnode *vp; 2552 struct file *fp; 2553 vm_object_t obj; 2554 int error; 2555 2556 if ((error = getvnode(p->p_fd, uap->fd, &fp)) != 0) 2557 return (error); 2558 vp = (struct vnode *)fp->f_data; 2559 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 2560 if (VOP_GETVOBJECT(vp, &obj) == 0) 2561 vm_object_page_clean(obj, 0, 0, 0); 2562 if ((error = VOP_FSYNC(vp, MNT_WAIT, td)) == 0 && 2563 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP) && 2564 bioops.io_fsync) 2565 error = (*bioops.io_fsync)(vp); 2566 VOP_UNLOCK(vp, 0, td); 2567 return (error); 2568 } 2569 2570 int 2571 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 2572 { 2573 struct namecache *fncpd; 2574 struct namecache *tncpd; 2575 struct namecache *ncp; 2576 struct mount *mp; 2577 int error; 2578 2579 bwillwrite(); 2580 if ((error = nlookup(fromnd)) != 0) 2581 return (error); 2582 if ((fncpd = fromnd->nl_ncp->nc_parent) == NULL) 2583 return (ENOENT); 2584 cache_hold(fncpd); 2585 2586 /* 2587 * unlock the source ncp so we can lookup the target ncp without 2588 * deadlocking. The target may or may not exist so we do not check 2589 * for a target vp like kern_mkdir() and other creation functions do. 2590 * 2591 * The source and target directories are ref'd and rechecked after 2592 * everything is relocked to determine if the source or target file 2593 * has been renamed. 2594 */ 2595 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 2596 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 2597 cache_unlock(fromnd->nl_ncp); 2598 2599 tond->nl_flags |= NLC_CREATE; 2600 if ((error = nlookup(tond)) != 0) { 2601 cache_drop(fncpd); 2602 return (error); 2603 } 2604 if ((tncpd = tond->nl_ncp->nc_parent) == NULL) { 2605 cache_drop(fncpd); 2606 return (ENOENT); 2607 } 2608 cache_hold(tncpd); 2609 2610 /* 2611 * If the source and target are the same there is nothing to do 2612 */ 2613 if (fromnd->nl_ncp == tond->nl_ncp) { 2614 cache_drop(fncpd); 2615 cache_drop(tncpd); 2616 return (0); 2617 } 2618 2619 /* 2620 * relock the source ncp. NOTE AFTER RELOCKING: the source ncp 2621 * may have become invalid while it was unlocked, nc_vp and nc_mount 2622 * could be NULL. 2623 */ 2624 if (cache_lock_nonblock(fromnd->nl_ncp) == 0) { 2625 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred); 2626 } else if (fromnd->nl_ncp > tond->nl_ncp) { 2627 cache_lock(fromnd->nl_ncp); 2628 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred); 2629 } else { 2630 cache_unlock(tond->nl_ncp); 2631 cache_lock(fromnd->nl_ncp); 2632 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred); 2633 cache_lock(tond->nl_ncp); 2634 cache_resolve(tond->nl_ncp, tond->nl_cred); 2635 } 2636 fromnd->nl_flags |= NLC_NCPISLOCKED; 2637 2638 /* 2639 * make sure the parent directories linkages are the same 2640 */ 2641 if (fncpd != fromnd->nl_ncp->nc_parent || 2642 tncpd != tond->nl_ncp->nc_parent) { 2643 cache_drop(fncpd); 2644 cache_drop(tncpd); 2645 return (ENOENT); 2646 } 2647 2648 /* 2649 * Both the source and target must be within the same filesystem and 2650 * in the same filesystem as their parent directories within the 2651 * namecache topology. 2652 * 2653 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 2654 */ 2655 mp = fncpd->nc_mount; 2656 if (mp != tncpd->nc_mount || mp != fromnd->nl_ncp->nc_mount || 2657 mp != tond->nl_ncp->nc_mount) { 2658 cache_drop(fncpd); 2659 cache_drop(tncpd); 2660 return (EXDEV); 2661 } 2662 2663 /* 2664 * If the target exists and either the source or target is a directory, 2665 * then both must be directories. 2666 * 2667 * Due to relocking of the source, fromnd->nl_ncp->nc_vp might have 2668 * become NULL. 2669 */ 2670 if (tond->nl_ncp->nc_vp) { 2671 if (fromnd->nl_ncp->nc_vp == NULL) { 2672 error = ENOENT; 2673 } else if (fromnd->nl_ncp->nc_vp->v_type == VDIR) { 2674 if (tond->nl_ncp->nc_vp->v_type != VDIR) 2675 error = ENOTDIR; 2676 } else if (tond->nl_ncp->nc_vp->v_type == VDIR) { 2677 error = EISDIR; 2678 } 2679 } 2680 2681 /* 2682 * You cannot rename a source into itself or a subdirectory of itself. 2683 * We check this by travsersing the target directory upwards looking 2684 * for a match against the source. 2685 */ 2686 if (error == 0) { 2687 for (ncp = tncpd; ncp; ncp = ncp->nc_parent) { 2688 if (fromnd->nl_ncp == ncp) { 2689 error = EINVAL; 2690 break; 2691 } 2692 } 2693 } 2694 2695 cache_drop(fncpd); 2696 cache_drop(tncpd); 2697 2698 /* 2699 * Even though the namespaces are different, they may still represent 2700 * hardlinks to the same file. The filesystem might have a hard time 2701 * with this so we issue a NREMOVE of the source instead of a NRENAME 2702 * when we detect the situation. 2703 */ 2704 if (error == 0) { 2705 if (fromnd->nl_ncp->nc_vp == tond->nl_ncp->nc_vp) { 2706 error = VOP_NREMOVE(fromnd->nl_ncp, fromnd->nl_cred); 2707 } else { 2708 error = VOP_NRENAME(fromnd->nl_ncp, tond->nl_ncp, 2709 tond->nl_cred); 2710 } 2711 } 2712 return (error); 2713 } 2714 2715 /* 2716 * rename_args(char *from, char *to) 2717 * 2718 * Rename files. Source and destination must either both be directories, 2719 * or both not be directories. If target is a directory, it must be empty. 2720 */ 2721 int 2722 rename(struct rename_args *uap) 2723 { 2724 struct nlookupdata fromnd, tond; 2725 int error; 2726 2727 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 2728 if (error == 0) { 2729 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 2730 if (error == 0) 2731 error = kern_rename(&fromnd, &tond); 2732 nlookup_done(&tond); 2733 } 2734 nlookup_done(&fromnd); 2735 return (error); 2736 } 2737 2738 int 2739 kern_mkdir(struct nlookupdata *nd, int mode) 2740 { 2741 struct thread *td = curthread; 2742 struct proc *p = td->td_proc; 2743 struct namecache *ncp; 2744 struct vnode *vp; 2745 struct vattr vattr; 2746 int error; 2747 2748 bwillwrite(); 2749 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE; 2750 if ((error = nlookup(nd)) != 0) 2751 return (error); 2752 2753 ncp = nd->nl_ncp; 2754 if (ncp->nc_vp) 2755 return (EEXIST); 2756 2757 VATTR_NULL(&vattr); 2758 vattr.va_type = VDIR; 2759 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 2760 2761 vp = NULL; 2762 error = VOP_NMKDIR(ncp, &vp, p->p_ucred, &vattr); 2763 if (error == 0) 2764 vput(vp); 2765 return (error); 2766 } 2767 2768 /* 2769 * mkdir_args(char *path, int mode) 2770 * 2771 * Make a directory file. 2772 */ 2773 /* ARGSUSED */ 2774 int 2775 mkdir(struct mkdir_args *uap) 2776 { 2777 struct nlookupdata nd; 2778 int error; 2779 2780 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2781 if (error == 0) 2782 error = kern_mkdir(&nd, uap->mode); 2783 nlookup_done(&nd); 2784 return (error); 2785 } 2786 2787 int 2788 kern_rmdir(struct nlookupdata *nd) 2789 { 2790 struct namecache *ncp; 2791 int error; 2792 2793 bwillwrite(); 2794 nd->nl_flags |= NLC_DELETE; 2795 if ((error = nlookup(nd)) != 0) 2796 return (error); 2797 2798 ncp = nd->nl_ncp; 2799 error = VOP_NRMDIR(ncp, nd->nl_cred); 2800 return (error); 2801 } 2802 2803 /* 2804 * rmdir_args(char *path) 2805 * 2806 * Remove a directory file. 2807 */ 2808 /* ARGSUSED */ 2809 int 2810 rmdir(struct rmdir_args *uap) 2811 { 2812 struct nlookupdata nd; 2813 int error; 2814 2815 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2816 if (error == 0) 2817 error = kern_rmdir(&nd); 2818 nlookup_done(&nd); 2819 return (error); 2820 } 2821 2822 int 2823 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 2824 enum uio_seg direction) 2825 { 2826 struct thread *td = curthread; 2827 struct proc *p = td->td_proc; 2828 struct vnode *vp; 2829 struct file *fp; 2830 struct uio auio; 2831 struct iovec aiov; 2832 long loff; 2833 int error, eofflag; 2834 2835 if ((error = getvnode(p->p_fd, fd, &fp)) != 0) 2836 return (error); 2837 if ((fp->f_flag & FREAD) == 0) 2838 return (EBADF); 2839 vp = (struct vnode *)fp->f_data; 2840 unionread: 2841 if (vp->v_type != VDIR) 2842 return (EINVAL); 2843 aiov.iov_base = buf; 2844 aiov.iov_len = count; 2845 auio.uio_iov = &aiov; 2846 auio.uio_iovcnt = 1; 2847 auio.uio_rw = UIO_READ; 2848 auio.uio_segflg = direction; 2849 auio.uio_td = td; 2850 auio.uio_resid = count; 2851 /* vn_lock(vp, LK_SHARED | LK_RETRY, td); */ 2852 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 2853 loff = auio.uio_offset = fp->f_offset; 2854 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 2855 fp->f_offset = auio.uio_offset; 2856 VOP_UNLOCK(vp, 0, td); 2857 if (error) 2858 return (error); 2859 if (count == auio.uio_resid) { 2860 if (union_dircheckp) { 2861 error = union_dircheckp(td, &vp, fp); 2862 if (error == -1) 2863 goto unionread; 2864 if (error) 2865 return (error); 2866 } 2867 if ((vp->v_flag & VROOT) && 2868 (vp->v_mount->mnt_flag & MNT_UNION)) { 2869 struct vnode *tvp = vp; 2870 vp = vp->v_mount->mnt_vnodecovered; 2871 vref(vp); 2872 fp->f_data = vp; 2873 fp->f_offset = 0; 2874 vrele(tvp); 2875 goto unionread; 2876 } 2877 } 2878 if (basep) { 2879 *basep = loff; 2880 } 2881 *res = count - auio.uio_resid; 2882 return (error); 2883 } 2884 2885 /* 2886 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 2887 * 2888 * Read a block of directory entries in a file system independent format. 2889 */ 2890 int 2891 getdirentries(struct getdirentries_args *uap) 2892 { 2893 long base; 2894 int error; 2895 2896 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 2897 &uap->sysmsg_result, UIO_USERSPACE); 2898 2899 if (error == 0) 2900 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 2901 return (error); 2902 } 2903 2904 /* 2905 * getdents_args(int fd, char *buf, size_t count) 2906 */ 2907 int 2908 getdents(struct getdents_args *uap) 2909 { 2910 int error; 2911 2912 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 2913 &uap->sysmsg_result, UIO_USERSPACE); 2914 2915 return (error); 2916 } 2917 2918 /* 2919 * umask(int newmask) 2920 * 2921 * Set the mode mask for creation of filesystem nodes. 2922 * 2923 * MP SAFE 2924 */ 2925 int 2926 umask(struct umask_args *uap) 2927 { 2928 struct thread *td = curthread; 2929 struct proc *p = td->td_proc; 2930 struct filedesc *fdp; 2931 2932 fdp = p->p_fd; 2933 uap->sysmsg_result = fdp->fd_cmask; 2934 fdp->fd_cmask = uap->newmask & ALLPERMS; 2935 return (0); 2936 } 2937 2938 /* 2939 * revoke(char *path) 2940 * 2941 * Void all references to file by ripping underlying filesystem 2942 * away from vnode. 2943 */ 2944 /* ARGSUSED */ 2945 int 2946 revoke(struct revoke_args *uap) 2947 { 2948 struct thread *td = curthread; 2949 struct nlookupdata nd; 2950 struct vattr vattr; 2951 struct vnode *vp; 2952 struct ucred *cred; 2953 int error; 2954 2955 vp = NULL; 2956 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2957 if (error == 0) 2958 error = nlookup(&nd); 2959 if (error == 0) 2960 error = cache_vref(nd.nl_ncp, nd.nl_cred, &vp); 2961 cred = crhold(nd.nl_cred); 2962 nlookup_done(&nd); 2963 if (error == 0) { 2964 if (vp->v_type != VCHR && vp->v_type != VBLK) 2965 error = EINVAL; 2966 if (error == 0) 2967 error = VOP_GETATTR(vp, &vattr, td); 2968 if (error == 0 && cred->cr_uid != vattr.va_uid) 2969 error = suser_cred(cred, PRISON_ROOT); 2970 if (error == 0 && count_udev(vp->v_udev) > 0) { 2971 if ((error = vx_lock(vp)) == 0) { 2972 VOP_REVOKE(vp, REVOKEALL); 2973 vx_unlock(vp); 2974 } 2975 } 2976 vrele(vp); 2977 } 2978 crfree(cred); 2979 return (error); 2980 } 2981 2982 /* 2983 * Convert a user file descriptor to a kernel file entry. 2984 */ 2985 int 2986 getvnode(struct filedesc *fdp, int fd, struct file **fpp) 2987 { 2988 struct file *fp; 2989 2990 if ((u_int)fd >= fdp->fd_nfiles || 2991 (fp = fdp->fd_files[fd].fp) == NULL) 2992 return (EBADF); 2993 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) 2994 return (EINVAL); 2995 *fpp = fp; 2996 return (0); 2997 } 2998 /* 2999 * getfh_args(char *fname, fhandle_t *fhp) 3000 * 3001 * Get (NFS) file handle 3002 */ 3003 int 3004 getfh(struct getfh_args *uap) 3005 { 3006 struct thread *td = curthread; 3007 struct nlookupdata nd; 3008 fhandle_t fh; 3009 struct vnode *vp; 3010 int error; 3011 3012 /* 3013 * Must be super user 3014 */ 3015 if ((error = suser(td)) != 0) 3016 return (error); 3017 3018 vp = NULL; 3019 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 3020 if (error == 0) 3021 error = nlookup(&nd); 3022 if (error == 0) 3023 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3024 nlookup_done(&nd); 3025 if (error == 0) { 3026 bzero(&fh, sizeof(fh)); 3027 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3028 error = VFS_VPTOFH(vp, &fh.fh_fid); 3029 vput(vp); 3030 if (error == 0) 3031 error = copyout(&fh, uap->fhp, sizeof(fh)); 3032 } 3033 return (error); 3034 } 3035 3036 /* 3037 * fhopen_args(const struct fhandle *u_fhp, int flags) 3038 * 3039 * syscall for the rpc.lockd to use to translate a NFS file handle into 3040 * an open descriptor. 3041 * 3042 * warning: do not remove the suser() call or this becomes one giant 3043 * security hole. 3044 */ 3045 int 3046 fhopen(struct fhopen_args *uap) 3047 { 3048 struct thread *td = curthread; 3049 struct proc *p = td->td_proc; 3050 struct mount *mp; 3051 struct vnode *vp; 3052 struct fhandle fhp; 3053 struct vattr vat; 3054 struct vattr *vap = &vat; 3055 struct flock lf; 3056 struct filedesc *fdp = p->p_fd; 3057 int fmode, mode, error, type; 3058 struct file *nfp; 3059 struct file *fp; 3060 int indx; 3061 3062 /* 3063 * Must be super user 3064 */ 3065 error = suser(td); 3066 if (error) 3067 return (error); 3068 3069 fmode = FFLAGS(uap->flags); 3070 /* why not allow a non-read/write open for our lockd? */ 3071 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3072 return (EINVAL); 3073 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3074 if (error) 3075 return(error); 3076 /* find the mount point */ 3077 mp = vfs_getvfs(&fhp.fh_fsid); 3078 if (mp == NULL) 3079 return (ESTALE); 3080 /* now give me my vnode, it gets returned to me locked */ 3081 error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp); 3082 if (error) 3083 return (error); 3084 /* 3085 * from now on we have to make sure not 3086 * to forget about the vnode 3087 * any error that causes an abort must vput(vp) 3088 * just set error = err and 'goto bad;'. 3089 */ 3090 3091 /* 3092 * from vn_open 3093 */ 3094 if (vp->v_type == VLNK) { 3095 error = EMLINK; 3096 goto bad; 3097 } 3098 if (vp->v_type == VSOCK) { 3099 error = EOPNOTSUPP; 3100 goto bad; 3101 } 3102 mode = 0; 3103 if (fmode & (FWRITE | O_TRUNC)) { 3104 if (vp->v_type == VDIR) { 3105 error = EISDIR; 3106 goto bad; 3107 } 3108 error = vn_writechk(vp); 3109 if (error) 3110 goto bad; 3111 mode |= VWRITE; 3112 } 3113 if (fmode & FREAD) 3114 mode |= VREAD; 3115 if (mode) { 3116 error = VOP_ACCESS(vp, mode, p->p_ucred, td); 3117 if (error) 3118 goto bad; 3119 } 3120 if (fmode & O_TRUNC) { 3121 VOP_UNLOCK(vp, 0, td); /* XXX */ 3122 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 3123 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* XXX */ 3124 VATTR_NULL(vap); 3125 vap->va_size = 0; 3126 error = VOP_SETATTR(vp, vap, p->p_ucred, td); 3127 if (error) 3128 goto bad; 3129 } 3130 3131 /* 3132 * VOP_OPEN needs the file pointer so it can potentially override 3133 * it. 3134 * 3135 * WARNING! no f_ncp will be associated when fhopen()ing a directory. 3136 * XXX 3137 */ 3138 if ((error = falloc(p, &nfp, NULL)) != 0) 3139 goto bad; 3140 fp = nfp; 3141 3142 fp->f_type = DTYPE_VNODE; 3143 fp->f_flag = fmode & FMASK; 3144 fp->f_ops = &vnode_fileops; 3145 fp->f_data = vp; 3146 3147 error = VOP_OPEN(vp, fmode, p->p_ucred, fp, td); 3148 if (error) { 3149 /* 3150 * setting f_ops this way prevents VOP_CLOSE from being 3151 * called or fdrop() releasing the vp from v_data. Since 3152 * the VOP_OPEN failed we don't want to VOP_CLOSE. 3153 */ 3154 fp->f_ops = &badfileops; 3155 fp->f_data = NULL; 3156 fdrop(fp, td); 3157 goto bad; 3158 } 3159 if (fmode & FWRITE) 3160 vp->v_writecount++; 3161 3162 /* 3163 * The fp now owns a reference on the vnode. We still have our own 3164 * ref+lock. 3165 */ 3166 vref(vp); 3167 3168 /* 3169 * Make sure that a VM object is created for VMIO support. If this 3170 * fails just fdrop() normally to clean up. 3171 */ 3172 if (vn_canvmio(vp) == TRUE) { 3173 if ((error = vfs_object_create(vp, td)) != 0) { 3174 fdrop(fp, td); 3175 goto bad; 3176 } 3177 } 3178 3179 /* 3180 * The open was successful, associate it with a file descriptor. 3181 */ 3182 if ((error = fsetfd(p, fp, &indx)) != 0) { 3183 if (fmode & FWRITE) 3184 vp->v_writecount--; 3185 fdrop(fp, td); 3186 goto bad; 3187 } 3188 3189 if (fmode & (O_EXLOCK | O_SHLOCK)) { 3190 lf.l_whence = SEEK_SET; 3191 lf.l_start = 0; 3192 lf.l_len = 0; 3193 if (fmode & O_EXLOCK) 3194 lf.l_type = F_WRLCK; 3195 else 3196 lf.l_type = F_RDLCK; 3197 type = F_FLOCK; 3198 if ((fmode & FNONBLOCK) == 0) 3199 type |= F_WAIT; 3200 VOP_UNLOCK(vp, 0, td); 3201 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 3202 /* 3203 * lock request failed. Normally close the descriptor 3204 * but handle the case where someone might have dup()d 3205 * or close()d it when we weren't looking. 3206 */ 3207 if (fdp->fd_files[indx].fp == fp) { 3208 funsetfd(fdp, indx); 3209 fdrop(fp, td); 3210 } 3211 3212 /* 3213 * release our private reference. 3214 */ 3215 fdrop(fp, td); 3216 vrele(vp); 3217 return (error); 3218 } 3219 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 3220 fp->f_flag |= FHASLOCK; 3221 } 3222 if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0)) 3223 vfs_object_create(vp, td); 3224 3225 vput(vp); 3226 fdrop(fp, td); 3227 uap->sysmsg_result = indx; 3228 return (0); 3229 3230 bad: 3231 vput(vp); 3232 return (error); 3233 } 3234 3235 /* 3236 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 3237 */ 3238 int 3239 fhstat(struct fhstat_args *uap) 3240 { 3241 struct thread *td = curthread; 3242 struct stat sb; 3243 fhandle_t fh; 3244 struct mount *mp; 3245 struct vnode *vp; 3246 int error; 3247 3248 /* 3249 * Must be super user 3250 */ 3251 error = suser(td); 3252 if (error) 3253 return (error); 3254 3255 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 3256 if (error) 3257 return (error); 3258 3259 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3260 return (ESTALE); 3261 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3262 return (error); 3263 error = vn_stat(vp, &sb, td); 3264 vput(vp); 3265 if (error) 3266 return (error); 3267 error = copyout(&sb, uap->sb, sizeof(sb)); 3268 return (error); 3269 } 3270 3271 /* 3272 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 3273 */ 3274 int 3275 fhstatfs(struct fhstatfs_args *uap) 3276 { 3277 struct thread *td = curthread; 3278 struct proc *p = td->td_proc; 3279 struct statfs *sp; 3280 struct mount *mp; 3281 struct vnode *vp; 3282 struct statfs sb; 3283 char *fullpath, *freepath; 3284 fhandle_t fh; 3285 int error; 3286 3287 /* 3288 * Must be super user 3289 */ 3290 if ((error = suser(td))) 3291 return (error); 3292 3293 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3294 return (error); 3295 3296 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3297 return (ESTALE); 3298 3299 if (p != NULL && (p->p_fd->fd_nrdir->nc_flag & NCF_ROOT) == 0 && 3300 !chroot_visible_mnt(mp, p)) 3301 return (ESTALE); 3302 3303 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3304 return (error); 3305 mp = vp->v_mount; 3306 sp = &mp->mnt_stat; 3307 vput(vp); 3308 if ((error = VFS_STATFS(mp, sp, td)) != 0) 3309 return (error); 3310 3311 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath); 3312 if (error) 3313 return(error); 3314 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3315 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 3316 free(freepath, M_TEMP); 3317 3318 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 3319 if (suser(td)) { 3320 bcopy(sp, &sb, sizeof(sb)); 3321 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 3322 sp = &sb; 3323 } 3324 return (copyout(sp, uap->buf, sizeof(*sp))); 3325 } 3326 3327 /* 3328 * Syscall to push extended attribute configuration information into the 3329 * VFS. Accepts a path, which it converts to a mountpoint, as well as 3330 * a command (int cmd), and attribute name and misc data. For now, the 3331 * attribute name is left in userspace for consumption by the VFS_op. 3332 * It will probably be changed to be copied into sysspace by the 3333 * syscall in the future, once issues with various consumers of the 3334 * attribute code have raised their hands. 3335 * 3336 * Currently this is used only by UFS Extended Attributes. 3337 */ 3338 int 3339 extattrctl(struct extattrctl_args *uap) 3340 { 3341 struct nlookupdata nd; 3342 struct mount *mp; 3343 struct vnode *vp; 3344 int error; 3345 3346 vp = NULL; 3347 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3348 if (error == 0) 3349 error = nlookup(&nd); 3350 if (error == 0) { 3351 mp = nd.nl_ncp->nc_mount; 3352 error = VFS_EXTATTRCTL(mp, uap->cmd, 3353 uap->attrname, uap->arg, 3354 nd.nl_td); 3355 } 3356 nlookup_done(&nd); 3357 return (error); 3358 } 3359 3360 /* 3361 * Syscall to set a named extended attribute on a file or directory. 3362 * Accepts attribute name, and a uio structure pointing to the data to set. 3363 * The uio is consumed in the style of writev(). The real work happens 3364 * in VOP_SETEXTATTR(). 3365 */ 3366 int 3367 extattr_set_file(struct extattr_set_file_args *uap) 3368 { 3369 char attrname[EXTATTR_MAXNAMELEN]; 3370 struct iovec aiov[UIO_SMALLIOV]; 3371 struct iovec *needfree; 3372 struct nlookupdata nd; 3373 struct iovec *iov; 3374 struct vnode *vp; 3375 struct uio auio; 3376 u_int iovlen; 3377 u_int cnt; 3378 int error; 3379 int i; 3380 3381 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3382 if (error) 3383 return (error); 3384 3385 vp = NULL; 3386 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3387 if (error == 0) 3388 error = nlookup(&nd); 3389 if (error == 0) 3390 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3391 if (error) { 3392 nlookup_done(&nd); 3393 return (error); 3394 } 3395 3396 needfree = NULL; 3397 iovlen = uap->iovcnt * sizeof(struct iovec); 3398 if (uap->iovcnt > UIO_SMALLIOV) { 3399 if (uap->iovcnt > UIO_MAXIOV) { 3400 error = EINVAL; 3401 goto done; 3402 } 3403 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3404 needfree = iov; 3405 } else { 3406 iov = aiov; 3407 } 3408 auio.uio_iov = iov; 3409 auio.uio_iovcnt = uap->iovcnt; 3410 auio.uio_rw = UIO_WRITE; 3411 auio.uio_segflg = UIO_USERSPACE; 3412 auio.uio_td = nd.nl_td; 3413 auio.uio_offset = 0; 3414 if ((error = copyin(uap->iovp, iov, iovlen))) 3415 goto done; 3416 auio.uio_resid = 0; 3417 for (i = 0; i < uap->iovcnt; i++) { 3418 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3419 error = EINVAL; 3420 goto done; 3421 } 3422 auio.uio_resid += iov->iov_len; 3423 iov++; 3424 } 3425 cnt = auio.uio_resid; 3426 error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred, nd.nl_td); 3427 cnt -= auio.uio_resid; 3428 uap->sysmsg_result = cnt; 3429 done: 3430 vput(vp); 3431 nlookup_done(&nd); 3432 if (needfree) 3433 FREE(needfree, M_IOV); 3434 return (error); 3435 } 3436 3437 /* 3438 * Syscall to get a named extended attribute on a file or directory. 3439 * Accepts attribute name, and a uio structure pointing to a buffer for the 3440 * data. The uio is consumed in the style of readv(). The real work 3441 * happens in VOP_GETEXTATTR(); 3442 */ 3443 int 3444 extattr_get_file(struct extattr_get_file_args *uap) 3445 { 3446 char attrname[EXTATTR_MAXNAMELEN]; 3447 struct iovec aiov[UIO_SMALLIOV]; 3448 struct iovec *needfree; 3449 struct nlookupdata nd; 3450 struct iovec *iov; 3451 struct vnode *vp; 3452 struct uio auio; 3453 u_int iovlen; 3454 u_int cnt; 3455 int error; 3456 int i; 3457 3458 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3459 if (error) 3460 return (error); 3461 3462 vp = NULL; 3463 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3464 if (error == 0) 3465 error = nlookup(&nd); 3466 if (error == 0) 3467 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3468 if (error) { 3469 nlookup_done(&nd); 3470 return (error); 3471 } 3472 3473 iovlen = uap->iovcnt * sizeof (struct iovec); 3474 needfree = NULL; 3475 if (uap->iovcnt > UIO_SMALLIOV) { 3476 if (uap->iovcnt > UIO_MAXIOV) { 3477 error = EINVAL; 3478 goto done; 3479 } 3480 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3481 needfree = iov; 3482 } else { 3483 iov = aiov; 3484 } 3485 auio.uio_iov = iov; 3486 auio.uio_iovcnt = uap->iovcnt; 3487 auio.uio_rw = UIO_READ; 3488 auio.uio_segflg = UIO_USERSPACE; 3489 auio.uio_td = nd.nl_td; 3490 auio.uio_offset = 0; 3491 if ((error = copyin(uap->iovp, iov, iovlen))) 3492 goto done; 3493 auio.uio_resid = 0; 3494 for (i = 0; i < uap->iovcnt; i++) { 3495 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3496 error = EINVAL; 3497 goto done; 3498 } 3499 auio.uio_resid += iov->iov_len; 3500 iov++; 3501 } 3502 cnt = auio.uio_resid; 3503 error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred, nd.nl_td); 3504 cnt -= auio.uio_resid; 3505 uap->sysmsg_result = cnt; 3506 done: 3507 vput(vp); 3508 nlookup_done(&nd); 3509 if (needfree) 3510 FREE(needfree, M_IOV); 3511 return(error); 3512 } 3513 3514 /* 3515 * Syscall to delete a named extended attribute from a file or directory. 3516 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 3517 */ 3518 int 3519 extattr_delete_file(struct extattr_delete_file_args *uap) 3520 { 3521 char attrname[EXTATTR_MAXNAMELEN]; 3522 struct nlookupdata nd; 3523 struct vnode *vp; 3524 int error; 3525 3526 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3527 if (error) 3528 return(error); 3529 3530 vp = NULL; 3531 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3532 if (error == 0) 3533 error = nlookup(&nd); 3534 if (error == 0) 3535 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3536 if (error) { 3537 nlookup_done(&nd); 3538 return (error); 3539 } 3540 3541 error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred, nd.nl_td); 3542 vput(vp); 3543 nlookup_done(&nd); 3544 return(error); 3545 } 3546 3547 static int 3548 chroot_visible_mnt(struct mount *mp, struct proc *p) 3549 { 3550 struct namecache *ncp; 3551 /* 3552 * First check if this file system is below 3553 * the chroot path. 3554 */ 3555 ncp = mp->mnt_ncp; 3556 while (ncp != NULL && ncp != p->p_fd->fd_nrdir) 3557 ncp = ncp->nc_parent; 3558 if (ncp == NULL) { 3559 /* 3560 * This is not below the chroot path. 3561 * 3562 * Check if the chroot path is on the same filesystem, 3563 * by determing if we have to cross a mount point 3564 * before reaching mp->mnt_ncp. 3565 */ 3566 ncp = p->p_fd->fd_nrdir; 3567 while (ncp != NULL && ncp != mp->mnt_ncp) { 3568 if (ncp->nc_flag & NCF_MOUNTPT) { 3569 ncp = NULL; 3570 break; 3571 } 3572 ncp = ncp->nc_parent; 3573 } 3574 } 3575 return(ncp != NULL); 3576 } 3577