1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.106 2006/09/19 18:17:46 dillon Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/conf.h> 47 #include <sys/sysent.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mountctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/filedesc.h> 53 #include <sys/kernel.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/linker.h> 57 #include <sys/stat.h> 58 #include <sys/unistd.h> 59 #include <sys/vnode.h> 60 #include <sys/proc.h> 61 #include <sys/namei.h> 62 #include <sys/nlookup.h> 63 #include <sys/dirent.h> 64 #include <sys/extattr.h> 65 #include <sys/spinlock.h> 66 #include <sys/kern_syscall.h> 67 #include <sys/objcache.h> 68 69 #include <machine/limits.h> 70 #include <vfs/union/union.h> 71 #include <sys/sysctl.h> 72 #include <vm/vm.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 76 #include <sys/file2.h> 77 #include <sys/spinlock2.h> 78 79 static int checkvp_chdir (struct vnode *vn, struct thread *td); 80 static void checkdirs (struct vnode *olddp, struct namecache *ncp); 81 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 82 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 83 static int getutimes (const struct timeval *, struct timespec *); 84 static int setfown (struct vnode *, uid_t, gid_t); 85 static int setfmode (struct vnode *, int); 86 static int setfflags (struct vnode *, int); 87 static int setutimes (struct vnode *, const struct timespec *, int); 88 static int usermount = 0; /* if 1, non-root can mount fs. */ 89 90 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 91 92 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 93 94 /* 95 * Virtual File System System Calls 96 */ 97 98 /* 99 * Mount a file system. 100 */ 101 /* 102 * mount_args(char *type, char *path, int flags, caddr_t data) 103 */ 104 /* ARGSUSED */ 105 int 106 sys_mount(struct mount_args *uap) 107 { 108 struct thread *td = curthread; 109 struct proc *p = td->td_proc; 110 struct vnode *vp; 111 struct namecache *ncp; 112 struct mount *mp; 113 struct vfsconf *vfsp; 114 int error, flag = 0, flag2 = 0; 115 int hasmount; 116 struct vattr va; 117 struct nlookupdata nd; 118 char fstypename[MFSNAMELEN]; 119 struct nlcomponent nlc; 120 struct ucred *cred = p->p_ucred; 121 122 KKASSERT(p); 123 if (cred->cr_prison != NULL) 124 return (EPERM); 125 if (usermount == 0 && (error = suser(td))) 126 return (error); 127 /* 128 * Do not allow NFS export by non-root users. 129 */ 130 if (uap->flags & MNT_EXPORTED) { 131 error = suser(td); 132 if (error) 133 return (error); 134 } 135 /* 136 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 137 */ 138 if (suser(td)) 139 uap->flags |= MNT_NOSUID | MNT_NODEV; 140 141 /* 142 * Lookup the requested path and extract the ncp and vnode. 143 */ 144 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 145 if (error == 0) { 146 if ((error = nlookup(&nd)) == 0) { 147 if (nd.nl_ncp->nc_vp == NULL) 148 error = ENOENT; 149 } 150 } 151 if (error) { 152 nlookup_done(&nd); 153 return (error); 154 } 155 156 /* 157 * Extract the locked+refd ncp and cleanup the nd structure 158 */ 159 ncp = nd.nl_ncp; 160 nd.nl_ncp = NULL; 161 nlookup_done(&nd); 162 163 if ((ncp->nc_flag & NCF_MOUNTEDHERE) && cache_findmount(ncp)) 164 hasmount = 1; 165 else 166 hasmount = 0; 167 168 169 /* 170 * now we have the locked ref'd ncp and unreferenced vnode. 171 */ 172 vp = ncp->nc_vp; 173 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 174 cache_put(ncp); 175 return (error); 176 } 177 cache_unlock(ncp); 178 179 /* 180 * Now we have an unlocked ref'd ncp and a locked ref'd vp 181 */ 182 if (uap->flags & MNT_UPDATE) { 183 if ((vp->v_flag & VROOT) == 0) { 184 cache_drop(ncp); 185 vput(vp); 186 return (EINVAL); 187 } 188 mp = vp->v_mount; 189 flag = mp->mnt_flag; 190 flag2 = mp->mnt_kern_flag; 191 /* 192 * We only allow the filesystem to be reloaded if it 193 * is currently mounted read-only. 194 */ 195 if ((uap->flags & MNT_RELOAD) && 196 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 197 cache_drop(ncp); 198 vput(vp); 199 return (EOPNOTSUPP); /* Needs translation */ 200 } 201 /* 202 * Only root, or the user that did the original mount is 203 * permitted to update it. 204 */ 205 if (mp->mnt_stat.f_owner != cred->cr_uid && 206 (error = suser(td))) { 207 cache_drop(ncp); 208 vput(vp); 209 return (error); 210 } 211 if (vfs_busy(mp, LK_NOWAIT)) { 212 cache_drop(ncp); 213 vput(vp); 214 return (EBUSY); 215 } 216 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 217 cache_drop(ncp); 218 vfs_unbusy(mp); 219 vput(vp); 220 return (EBUSY); 221 } 222 vp->v_flag |= VMOUNT; 223 mp->mnt_flag |= 224 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 225 vn_unlock(vp); 226 goto update; 227 } 228 /* 229 * If the user is not root, ensure that they own the directory 230 * onto which we are attempting to mount. 231 */ 232 if ((error = VOP_GETATTR(vp, &va)) || 233 (va.va_uid != cred->cr_uid && (error = suser(td)))) { 234 cache_drop(ncp); 235 vput(vp); 236 return (error); 237 } 238 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 239 cache_drop(ncp); 240 vput(vp); 241 return (error); 242 } 243 if (vp->v_type != VDIR) { 244 cache_drop(ncp); 245 vput(vp); 246 return (ENOTDIR); 247 } 248 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 249 cache_drop(ncp); 250 vput(vp); 251 return (error); 252 } 253 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 254 if (!strcmp(vfsp->vfc_name, fstypename)) 255 break; 256 } 257 if (vfsp == NULL) { 258 linker_file_t lf; 259 260 /* Only load modules for root (very important!) */ 261 if ((error = suser(td)) != 0) { 262 cache_drop(ncp); 263 vput(vp); 264 return error; 265 } 266 error = linker_load_file(fstypename, &lf); 267 if (error || lf == NULL) { 268 cache_drop(ncp); 269 vput(vp); 270 if (lf == NULL) 271 error = ENODEV; 272 return error; 273 } 274 lf->userrefs++; 275 /* lookup again, see if the VFS was loaded */ 276 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 277 if (!strcmp(vfsp->vfc_name, fstypename)) 278 break; 279 } 280 if (vfsp == NULL) { 281 lf->userrefs--; 282 linker_file_unload(lf); 283 cache_drop(ncp); 284 vput(vp); 285 return (ENODEV); 286 } 287 } 288 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 289 cache_drop(ncp); 290 vput(vp); 291 return (EBUSY); 292 } 293 vp->v_flag |= VMOUNT; 294 295 /* 296 * Allocate and initialize the filesystem. 297 */ 298 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 299 TAILQ_INIT(&mp->mnt_nvnodelist); 300 TAILQ_INIT(&mp->mnt_reservedvnlist); 301 TAILQ_INIT(&mp->mnt_jlist); 302 mp->mnt_nvnodelistsize = 0; 303 lockinit(&mp->mnt_lock, "vfslock", 0, 0); 304 vfs_busy(mp, LK_NOWAIT); 305 mp->mnt_op = vfsp->vfc_vfsops; 306 mp->mnt_vfc = vfsp; 307 vfsp->vfc_refcount++; 308 mp->mnt_stat.f_type = vfsp->vfc_typenum; 309 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 310 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 311 mp->mnt_stat.f_owner = cred->cr_uid; 312 mp->mnt_iosize_max = DFLTPHYS; 313 vn_unlock(vp); 314 update: 315 /* 316 * Set the mount level flags. 317 */ 318 if (uap->flags & MNT_RDONLY) 319 mp->mnt_flag |= MNT_RDONLY; 320 else if (mp->mnt_flag & MNT_RDONLY) 321 mp->mnt_kern_flag |= MNTK_WANTRDWR; 322 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 323 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 324 MNT_NOSYMFOLLOW | MNT_IGNORE | 325 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 326 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 327 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 328 MNT_NOSYMFOLLOW | MNT_IGNORE | 329 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 330 /* 331 * Mount the filesystem. 332 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 333 * get. 334 */ 335 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 336 if (mp->mnt_flag & MNT_UPDATE) { 337 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 338 mp->mnt_flag &= ~MNT_RDONLY; 339 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 340 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 341 if (error) { 342 mp->mnt_flag = flag; 343 mp->mnt_kern_flag = flag2; 344 } 345 vfs_unbusy(mp); 346 vp->v_flag &= ~VMOUNT; 347 vrele(vp); 348 cache_drop(ncp); 349 return (error); 350 } 351 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 352 /* 353 * Put the new filesystem on the mount list after root. The mount 354 * point gets its own mnt_ncp which is a special ncp linking the 355 * vnode-under to the root of the new mount. The lookup code 356 * detects the mount point going forward and detects the special 357 * mnt_ncp via NCP_MOUNTPT going backwards. 358 * 359 * It is not necessary to invalidate or purge the vnode underneath 360 * because elements under the mount will be given their own glue 361 * namecache record. 362 */ 363 if (!error) { 364 nlc.nlc_nameptr = ""; 365 nlc.nlc_namelen = 0; 366 mp->mnt_ncp = cache_nlookup(ncp, &nlc); 367 cache_setunresolved(mp->mnt_ncp); 368 cache_setmountpt(mp->mnt_ncp, mp); 369 cache_drop(ncp); 370 /* XXX get the root of the fs and cache_setvp(mnt_ncp...) */ 371 vp->v_flag &= ~VMOUNT; 372 mountlist_insert(mp, MNTINS_LAST); 373 checkdirs(vp, mp->mnt_ncp); 374 cache_unlock(mp->mnt_ncp); /* leave ref intact */ 375 vn_unlock(vp); 376 error = vfs_allocate_syncvnode(mp); 377 vfs_unbusy(mp); 378 error = VFS_START(mp, 0); 379 vrele(vp); 380 } else { 381 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 382 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 383 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 384 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 385 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 386 vp->v_flag &= ~VMOUNT; 387 mp->mnt_vfc->vfc_refcount--; 388 vfs_unbusy(mp); 389 kfree(mp, M_MOUNT); 390 cache_drop(ncp); 391 vput(vp); 392 } 393 return (error); 394 } 395 396 /* 397 * Scan all active processes to see if any of them have a current 398 * or root directory onto which the new filesystem has just been 399 * mounted. If so, replace them with the new mount point. 400 * 401 * The passed ncp is ref'd and locked (from the mount code) and 402 * must be associated with the vnode representing the root of the 403 * mount point. 404 */ 405 struct checkdirs_info { 406 struct vnode *olddp; 407 struct vnode *newdp; 408 struct namecache *ncp; 409 }; 410 411 static int checkdirs_callback(struct proc *p, void *data); 412 413 static void 414 checkdirs(struct vnode *olddp, struct namecache *ncp) 415 { 416 struct checkdirs_info info; 417 struct vnode *newdp; 418 struct mount *mp; 419 420 if (olddp->v_usecount == 1) 421 return; 422 mp = ncp->nc_mount; 423 if (VFS_ROOT(mp, &newdp)) 424 panic("mount: lost mount"); 425 cache_setvp(ncp, newdp); 426 427 if (rootvnode == olddp) { 428 vref(newdp); 429 vfs_cache_setroot(newdp, cache_hold(ncp)); 430 } 431 432 info.olddp = olddp; 433 info.newdp = newdp; 434 info.ncp = ncp; 435 allproc_scan(checkdirs_callback, &info); 436 vput(newdp); 437 } 438 439 /* 440 * NOTE: callback is not MP safe because the scanned process's filedesc 441 * structure can be ripped out from under us, amoung other things. 442 */ 443 static int 444 checkdirs_callback(struct proc *p, void *data) 445 { 446 struct checkdirs_info *info = data; 447 struct filedesc *fdp; 448 struct namecache *ncdrop1; 449 struct namecache *ncdrop2; 450 struct vnode *vprele1; 451 struct vnode *vprele2; 452 453 if ((fdp = p->p_fd) != NULL) { 454 ncdrop1 = NULL; 455 ncdrop2 = NULL; 456 vprele1 = NULL; 457 vprele2 = NULL; 458 459 /* 460 * MPUNSAFE - XXX fdp can be pulled out from under a 461 * foreign process. 462 * 463 * A shared filedesc is ok, we don't have to copy it 464 * because we are making this change globally. 465 */ 466 spin_lock_wr(&fdp->fd_spin); 467 if (fdp->fd_cdir == info->olddp) { 468 vprele1 = fdp->fd_cdir; 469 vref(info->newdp); 470 fdp->fd_cdir = info->newdp; 471 ncdrop1 = fdp->fd_ncdir; 472 fdp->fd_ncdir = cache_hold(info->ncp); 473 } 474 if (fdp->fd_rdir == info->olddp) { 475 vprele2 = fdp->fd_rdir; 476 vref(info->newdp); 477 fdp->fd_rdir = info->newdp; 478 ncdrop2 = fdp->fd_nrdir; 479 fdp->fd_nrdir = cache_hold(info->ncp); 480 } 481 spin_unlock_wr(&fdp->fd_spin); 482 if (ncdrop1) 483 cache_drop(ncdrop1); 484 if (ncdrop2) 485 cache_drop(ncdrop2); 486 if (vprele1) 487 vrele(vprele1); 488 if (vprele2) 489 vrele(vprele2); 490 } 491 return(0); 492 } 493 494 /* 495 * Unmount a file system. 496 * 497 * Note: unmount takes a path to the vnode mounted on as argument, 498 * not special file (as before). 499 */ 500 /* 501 * umount_args(char *path, int flags) 502 */ 503 /* ARGSUSED */ 504 int 505 sys_unmount(struct unmount_args *uap) 506 { 507 struct thread *td = curthread; 508 struct proc *p = td->td_proc; 509 struct mount *mp = NULL; 510 int error; 511 struct nlookupdata nd; 512 513 KKASSERT(p); 514 if (p->p_ucred->cr_prison != NULL) 515 return (EPERM); 516 if (usermount == 0 && (error = suser(td))) 517 return (error); 518 519 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 520 if (error == 0) 521 error = nlookup(&nd); 522 if (error) 523 goto out; 524 525 mp = nd.nl_ncp->nc_mount; 526 527 /* 528 * Only root, or the user that did the original mount is 529 * permitted to unmount this filesystem. 530 */ 531 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && 532 (error = suser(td))) 533 goto out; 534 535 /* 536 * Don't allow unmounting the root file system. 537 */ 538 if (mp->mnt_flag & MNT_ROOTFS) { 539 error = EINVAL; 540 goto out; 541 } 542 543 /* 544 * Must be the root of the filesystem 545 */ 546 if (!(nd.nl_ncp->nc_flag & NCF_MOUNTPT)) { 547 error = EINVAL; 548 goto out; 549 } 550 551 out: 552 nlookup_done(&nd); 553 if (error) 554 return (error); 555 return (dounmount(mp, uap->flags)); 556 } 557 558 /* 559 * Do the actual file system unmount. 560 */ 561 static int 562 dounmount_interlock(struct mount *mp) 563 { 564 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 565 return (EBUSY); 566 mp->mnt_kern_flag |= MNTK_UNMOUNT; 567 return(0); 568 } 569 570 int 571 dounmount(struct mount *mp, int flags) 572 { 573 struct namecache *ncp; 574 int error; 575 int async_flag; 576 int lflags; 577 578 /* 579 * Exclusive access for unmounting purposes 580 */ 581 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 582 return (error); 583 584 /* 585 * Allow filesystems to detect that a forced unmount is in progress. 586 */ 587 if (flags & MNT_FORCE) 588 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 589 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 590 error = lockmgr(&mp->mnt_lock, lflags); 591 if (error) { 592 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 593 if (mp->mnt_kern_flag & MNTK_MWAIT) 594 wakeup(mp); 595 return (error); 596 } 597 598 if (mp->mnt_flag & MNT_EXPUBLIC) 599 vfs_setpublicfs(NULL, NULL, NULL); 600 601 vfs_msync(mp, MNT_WAIT); 602 async_flag = mp->mnt_flag & MNT_ASYNC; 603 mp->mnt_flag &=~ MNT_ASYNC; 604 605 /* 606 * remove cache entries for this file sys and determine if anyone 607 * other then us is still holding onto any namecache references. 608 * 609 * XXX need separate ref counter on mount structure to delay 610 * kfree()ing it. 611 */ 612 cache_purgevfs(mp); 613 if ((ncp = mp->mnt_ncp) != NULL) { 614 if (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list)) { 615 char *ptr; 616 char *buf; 617 618 if ((flags & MNT_FORCE) == 0) { 619 error = EBUSY; 620 } else if (cache_fullpath(NULL, ncp, &ptr, &buf)) { 621 printf("Warning: forced unmount - " 622 "namecache references still present\n"); 623 } else { 624 printf("Warning: forced unmount of %s - " 625 "namecache references still present\n", 626 ptr 627 ); 628 kfree(buf, M_TEMP); 629 } 630 } 631 } 632 633 if (error == 0) { 634 if (mp->mnt_syncer != NULL) 635 vrele(mp->mnt_syncer); 636 if (((mp->mnt_flag & MNT_RDONLY) || 637 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 638 (flags & MNT_FORCE)) { 639 error = VFS_UNMOUNT(mp, flags); 640 } 641 } 642 if (error) { 643 if (mp->mnt_syncer == NULL) 644 vfs_allocate_syncvnode(mp); 645 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 646 mp->mnt_flag |= async_flag; 647 lockmgr(&mp->mnt_lock, LK_RELEASE); 648 if (mp->mnt_kern_flag & MNTK_MWAIT) 649 wakeup(mp); 650 return (error); 651 } 652 /* 653 * Clean up any journals still associated with the mount after 654 * filesystem activity has ceased. 655 */ 656 journal_remove_all_journals(mp, 657 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 658 659 mountlist_remove(mp); 660 661 /* 662 * Remove any installed vnode ops here so the individual VFSs don't 663 * have to. 664 */ 665 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 666 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 667 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 668 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 669 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 670 671 if ((ncp = mp->mnt_ncp) != NULL) { 672 cache_clrmountpt(ncp); 673 cache_drop(ncp); 674 mp->mnt_ncp = NULL; 675 } 676 677 mp->mnt_vfc->vfc_refcount--; 678 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 679 panic("unmount: dangling vnode"); 680 lockmgr(&mp->mnt_lock, LK_RELEASE); 681 if (mp->mnt_kern_flag & MNTK_MWAIT) 682 wakeup(mp); 683 kfree(mp, M_MOUNT); 684 return (0); 685 } 686 687 /* 688 * Sync each mounted filesystem. 689 */ 690 691 #ifdef DEBUG 692 static int syncprt = 0; 693 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 694 #endif /* DEBUG */ 695 696 static int sync_callback(struct mount *mp, void *data); 697 698 /* ARGSUSED */ 699 int 700 sys_sync(struct sync_args *uap) 701 { 702 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 703 #ifdef DEBUG 704 /* 705 * print out buffer pool stat information on each sync() call. 706 */ 707 if (syncprt) 708 vfs_bufstats(); 709 #endif /* DEBUG */ 710 return (0); 711 } 712 713 static 714 int 715 sync_callback(struct mount *mp, void *data __unused) 716 { 717 int asyncflag; 718 719 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 720 asyncflag = mp->mnt_flag & MNT_ASYNC; 721 mp->mnt_flag &= ~MNT_ASYNC; 722 vfs_msync(mp, MNT_NOWAIT); 723 VFS_SYNC(mp, MNT_NOWAIT); 724 mp->mnt_flag |= asyncflag; 725 } 726 return(0); 727 } 728 729 /* XXX PRISON: could be per prison flag */ 730 static int prison_quotas; 731 #if 0 732 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 733 #endif 734 735 /* 736 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 737 * 738 * Change filesystem quotas. 739 */ 740 /* ARGSUSED */ 741 int 742 sys_quotactl(struct quotactl_args *uap) 743 { 744 struct nlookupdata nd; 745 struct thread *td; 746 struct proc *p; 747 struct mount *mp; 748 int error; 749 750 td = curthread; 751 p = td->td_proc; 752 if (p->p_ucred->cr_prison && !prison_quotas) 753 return (EPERM); 754 755 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 756 if (error == 0) 757 error = nlookup(&nd); 758 if (error == 0) { 759 mp = nd.nl_ncp->nc_mount; 760 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 761 uap->arg, nd.nl_cred); 762 } 763 nlookup_done(&nd); 764 return (error); 765 } 766 767 /* 768 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 769 * void *buf, int buflen) 770 * 771 * This function operates on a mount point and executes the specified 772 * operation using the specified control data, and possibly returns data. 773 * 774 * The actual number of bytes stored in the result buffer is returned, 0 775 * if none, otherwise an error is returned. 776 */ 777 /* ARGSUSED */ 778 int 779 sys_mountctl(struct mountctl_args *uap) 780 { 781 struct thread *td = curthread; 782 struct proc *p = td->td_proc; 783 struct file *fp; 784 void *ctl = NULL; 785 void *buf = NULL; 786 char *path = NULL; 787 int error; 788 789 /* 790 * Sanity and permissions checks. We must be root. 791 */ 792 KKASSERT(p); 793 if (p->p_ucred->cr_prison != NULL) 794 return (EPERM); 795 if ((error = suser(td)) != 0) 796 return (error); 797 798 /* 799 * Argument length checks 800 */ 801 if (uap->ctllen < 0 || uap->ctllen > 1024) 802 return (EINVAL); 803 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 804 return (EINVAL); 805 if (uap->path == NULL) 806 return (EINVAL); 807 808 /* 809 * Allocate the necessary buffers and copyin data 810 */ 811 path = objcache_get(namei_oc, M_WAITOK); 812 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 813 if (error) 814 goto done; 815 816 if (uap->ctllen) { 817 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 818 error = copyin(uap->ctl, ctl, uap->ctllen); 819 if (error) 820 goto done; 821 } 822 if (uap->buflen) 823 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 824 825 /* 826 * Validate the descriptor 827 */ 828 fp = holdfp(p->p_fd, uap->fd, -1); 829 if (fp == NULL) { 830 error = EBADF; 831 goto done; 832 } 833 834 /* 835 * Execute the internal kernel function and clean up. 836 */ 837 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 838 if (fp) 839 fdrop(fp); 840 if (error == 0 && uap->sysmsg_result > 0) 841 error = copyout(buf, uap->buf, uap->sysmsg_result); 842 done: 843 if (path) 844 objcache_put(namei_oc, path); 845 if (ctl) 846 kfree(ctl, M_TEMP); 847 if (buf) 848 kfree(buf, M_TEMP); 849 return (error); 850 } 851 852 /* 853 * Execute a mount control operation by resolving the path to a mount point 854 * and calling vop_mountctl(). 855 */ 856 int 857 kern_mountctl(const char *path, int op, struct file *fp, 858 const void *ctl, int ctllen, 859 void *buf, int buflen, int *res) 860 { 861 struct vnode *vp; 862 struct mount *mp; 863 struct nlookupdata nd; 864 int error; 865 866 *res = 0; 867 vp = NULL; 868 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 869 if (error == 0) 870 error = nlookup(&nd); 871 if (error == 0) 872 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 873 nlookup_done(&nd); 874 if (error) 875 return (error); 876 877 mp = vp->v_mount; 878 879 /* 880 * Must be the root of the filesystem 881 */ 882 if ((vp->v_flag & VROOT) == 0) { 883 vput(vp); 884 return (EINVAL); 885 } 886 error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen, 887 buf, buflen, res); 888 vput(vp); 889 return (error); 890 } 891 892 int 893 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 894 { 895 struct thread *td = curthread; 896 struct proc *p = td->td_proc; 897 struct mount *mp; 898 struct statfs *sp; 899 char *fullpath, *freepath; 900 int error; 901 902 if ((error = nlookup(nd)) != 0) 903 return (error); 904 mp = nd->nl_ncp->nc_mount; 905 sp = &mp->mnt_stat; 906 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 907 return (error); 908 909 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath); 910 if (error) 911 return(error); 912 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 913 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 914 kfree(freepath, M_TEMP); 915 916 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 917 bcopy(sp, buf, sizeof(*buf)); 918 /* Only root should have access to the fsid's. */ 919 if (suser(td)) 920 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 921 return (0); 922 } 923 924 /* 925 * statfs_args(char *path, struct statfs *buf) 926 * 927 * Get filesystem statistics. 928 */ 929 int 930 sys_statfs(struct statfs_args *uap) 931 { 932 struct nlookupdata nd; 933 struct statfs buf; 934 int error; 935 936 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 937 if (error == 0) 938 error = kern_statfs(&nd, &buf); 939 nlookup_done(&nd); 940 if (error == 0) 941 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 942 return (error); 943 } 944 945 int 946 kern_fstatfs(int fd, struct statfs *buf) 947 { 948 struct thread *td = curthread; 949 struct proc *p = td->td_proc; 950 struct file *fp; 951 struct mount *mp; 952 struct statfs *sp; 953 char *fullpath, *freepath; 954 int error; 955 956 KKASSERT(p); 957 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 958 return (error); 959 mp = ((struct vnode *)fp->f_data)->v_mount; 960 if (mp == NULL) { 961 error = EBADF; 962 goto done; 963 } 964 if (fp->f_cred == NULL) { 965 error = EINVAL; 966 goto done; 967 } 968 sp = &mp->mnt_stat; 969 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 970 goto done; 971 972 if ((error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath)) != 0) 973 goto done; 974 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 975 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 976 kfree(freepath, M_TEMP); 977 978 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 979 bcopy(sp, buf, sizeof(*buf)); 980 981 /* Only root should have access to the fsid's. */ 982 if (suser(td)) 983 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 984 error = 0; 985 done: 986 fdrop(fp); 987 return (error); 988 } 989 990 /* 991 * fstatfs_args(int fd, struct statfs *buf) 992 * 993 * Get filesystem statistics. 994 */ 995 int 996 sys_fstatfs(struct fstatfs_args *uap) 997 { 998 struct statfs buf; 999 int error; 1000 1001 error = kern_fstatfs(uap->fd, &buf); 1002 1003 if (error == 0) 1004 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1005 return (error); 1006 } 1007 1008 /* 1009 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1010 * 1011 * Get statistics on all filesystems. 1012 */ 1013 1014 struct getfsstat_info { 1015 struct statfs *sfsp; 1016 long count; 1017 long maxcount; 1018 int error; 1019 int flags; 1020 int is_chrooted; 1021 struct proc *p; 1022 }; 1023 1024 static int getfsstat_callback(struct mount *, void *); 1025 1026 /* ARGSUSED */ 1027 int 1028 sys_getfsstat(struct getfsstat_args *uap) 1029 { 1030 struct thread *td = curthread; 1031 struct proc *p = td->td_proc; 1032 struct getfsstat_info info; 1033 1034 bzero(&info, sizeof(info)); 1035 if (p != NULL && (p->p_fd->fd_nrdir->nc_flag & NCF_ROOT) == 0) 1036 info.is_chrooted = 1; 1037 else 1038 info.is_chrooted = 0; 1039 1040 info.maxcount = uap->bufsize / sizeof(struct statfs); 1041 info.sfsp = uap->buf; 1042 info.count = 0; 1043 info.flags = uap->flags; 1044 info.p = p; 1045 1046 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1047 if (info.sfsp && info.count > info.maxcount) 1048 uap->sysmsg_result = info.maxcount; 1049 else 1050 uap->sysmsg_result = info.count; 1051 return (info.error); 1052 } 1053 1054 static int 1055 getfsstat_callback(struct mount *mp, void *data) 1056 { 1057 struct getfsstat_info *info = data; 1058 struct statfs *sp; 1059 char *freepath; 1060 char *fullpath; 1061 int error; 1062 1063 if (info->sfsp && info->count < info->maxcount) { 1064 if (info->is_chrooted && !chroot_visible_mnt(mp, info->p)) 1065 return(0); 1066 sp = &mp->mnt_stat; 1067 1068 /* 1069 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1070 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1071 * overrides MNT_WAIT. 1072 */ 1073 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1074 (info->flags & MNT_WAIT)) && 1075 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) { 1076 return(0); 1077 } 1078 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1079 1080 error = cache_fullpath(info->p, mp->mnt_ncp, 1081 &fullpath, &freepath); 1082 if (error) { 1083 info->error = error; 1084 return(-1); 1085 } 1086 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1087 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1088 kfree(freepath, M_TEMP); 1089 1090 error = copyout(sp, info->sfsp, sizeof(*sp)); 1091 if (error) { 1092 info->error = error; 1093 return (-1); 1094 } 1095 ++info->sfsp; 1096 } 1097 info->count++; 1098 return(0); 1099 } 1100 1101 /* 1102 * fchdir_args(int fd) 1103 * 1104 * Change current working directory to a given file descriptor. 1105 */ 1106 /* ARGSUSED */ 1107 int 1108 sys_fchdir(struct fchdir_args *uap) 1109 { 1110 struct thread *td = curthread; 1111 struct proc *p = td->td_proc; 1112 struct filedesc *fdp = p->p_fd; 1113 struct vnode *vp, *ovp; 1114 struct mount *mp; 1115 struct file *fp; 1116 struct namecache *ncp, *oncp; 1117 struct namecache *nct; 1118 int error; 1119 1120 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1121 return (error); 1122 vp = (struct vnode *)fp->f_data; 1123 vref(vp); 1124 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1125 if (vp->v_type != VDIR || fp->f_ncp == NULL) 1126 error = ENOTDIR; 1127 else 1128 error = VOP_ACCESS(vp, VEXEC, p->p_ucred); 1129 if (error) { 1130 vput(vp); 1131 fdrop(fp); 1132 return (error); 1133 } 1134 ncp = cache_hold(fp->f_ncp); 1135 1136 /* 1137 * If the ncp has become a mount point, traverse through 1138 * the mount point. 1139 */ 1140 1141 while (!error && (ncp->nc_flag & NCF_MOUNTEDHERE) && 1142 (mp = cache_findmount(ncp)) != NULL 1143 ) { 1144 error = nlookup_mp(mp, &nct); 1145 if (error == 0) { 1146 cache_unlock(nct); /* leave ref intact */ 1147 vput(vp); 1148 vp = nct->nc_vp; 1149 error = vget(vp, LK_SHARED); 1150 KKASSERT(error == 0); 1151 cache_drop(ncp); 1152 ncp = nct; 1153 } 1154 } 1155 if (error == 0) { 1156 ovp = fdp->fd_cdir; 1157 oncp = fdp->fd_ncdir; 1158 vn_unlock(vp); /* leave ref intact */ 1159 fdp->fd_cdir = vp; 1160 fdp->fd_ncdir = ncp; 1161 cache_drop(oncp); 1162 vrele(ovp); 1163 } else { 1164 cache_drop(ncp); 1165 vput(vp); 1166 } 1167 fdrop(fp); 1168 return (error); 1169 } 1170 1171 int 1172 kern_chdir(struct nlookupdata *nd) 1173 { 1174 struct thread *td = curthread; 1175 struct proc *p = td->td_proc; 1176 struct filedesc *fdp = p->p_fd; 1177 struct vnode *vp, *ovp; 1178 struct namecache *oncp; 1179 int error; 1180 1181 if ((error = nlookup(nd)) != 0) 1182 return (error); 1183 if ((vp = nd->nl_ncp->nc_vp) == NULL) 1184 return (ENOENT); 1185 if ((error = vget(vp, LK_SHARED)) != 0) 1186 return (error); 1187 1188 error = checkvp_chdir(vp, td); 1189 vn_unlock(vp); 1190 if (error == 0) { 1191 ovp = fdp->fd_cdir; 1192 oncp = fdp->fd_ncdir; 1193 cache_unlock(nd->nl_ncp); /* leave reference intact */ 1194 fdp->fd_ncdir = nd->nl_ncp; 1195 fdp->fd_cdir = vp; 1196 cache_drop(oncp); 1197 vrele(ovp); 1198 nd->nl_ncp = NULL; 1199 } else { 1200 vrele(vp); 1201 } 1202 return (error); 1203 } 1204 1205 /* 1206 * chdir_args(char *path) 1207 * 1208 * Change current working directory (``.''). 1209 */ 1210 int 1211 sys_chdir(struct chdir_args *uap) 1212 { 1213 struct nlookupdata nd; 1214 int error; 1215 1216 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1217 if (error == 0) 1218 error = kern_chdir(&nd); 1219 nlookup_done(&nd); 1220 return (error); 1221 } 1222 1223 /* 1224 * Helper function for raised chroot(2) security function: Refuse if 1225 * any filedescriptors are open directories. 1226 */ 1227 static int 1228 chroot_refuse_vdir_fds(fdp) 1229 struct filedesc *fdp; 1230 { 1231 struct vnode *vp; 1232 struct file *fp; 1233 int error; 1234 int fd; 1235 1236 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1237 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1238 continue; 1239 vp = (struct vnode *)fp->f_data; 1240 if (vp->v_type != VDIR) { 1241 fdrop(fp); 1242 continue; 1243 } 1244 fdrop(fp); 1245 return(EPERM); 1246 } 1247 return (0); 1248 } 1249 1250 /* 1251 * This sysctl determines if we will allow a process to chroot(2) if it 1252 * has a directory open: 1253 * 0: disallowed for all processes. 1254 * 1: allowed for processes that were not already chroot(2)'ed. 1255 * 2: allowed for all processes. 1256 */ 1257 1258 static int chroot_allow_open_directories = 1; 1259 1260 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1261 &chroot_allow_open_directories, 0, ""); 1262 1263 /* 1264 * chroot to the specified namecache entry. We obtain the vp from the 1265 * namecache data. The passed ncp must be locked and referenced and will 1266 * remain locked and referenced on return. 1267 */ 1268 int 1269 kern_chroot(struct namecache *ncp) 1270 { 1271 struct thread *td = curthread; 1272 struct proc *p = td->td_proc; 1273 struct filedesc *fdp = p->p_fd; 1274 struct vnode *vp; 1275 int error; 1276 1277 /* 1278 * Only root can chroot 1279 */ 1280 if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0) 1281 return (error); 1282 1283 /* 1284 * Disallow open directory descriptors (fchdir() breakouts). 1285 */ 1286 if (chroot_allow_open_directories == 0 || 1287 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1288 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1289 return (error); 1290 } 1291 if ((vp = ncp->nc_vp) == NULL) 1292 return (ENOENT); 1293 1294 if ((error = vget(vp, LK_SHARED)) != 0) 1295 return (error); 1296 1297 /* 1298 * Check the validity of vp as a directory to change to and 1299 * associate it with rdir/jdir. 1300 */ 1301 error = checkvp_chdir(vp, td); 1302 vn_unlock(vp); /* leave reference intact */ 1303 if (error == 0) { 1304 vrele(fdp->fd_rdir); 1305 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1306 cache_drop(fdp->fd_nrdir); 1307 fdp->fd_nrdir = cache_hold(ncp); 1308 if (fdp->fd_jdir == NULL) { 1309 fdp->fd_jdir = vp; 1310 vref(fdp->fd_jdir); 1311 fdp->fd_njdir = cache_hold(ncp); 1312 } 1313 } else { 1314 vrele(vp); 1315 } 1316 return (error); 1317 } 1318 1319 /* 1320 * chroot_args(char *path) 1321 * 1322 * Change notion of root (``/'') directory. 1323 */ 1324 /* ARGSUSED */ 1325 int 1326 sys_chroot(struct chroot_args *uap) 1327 { 1328 struct thread *td = curthread; 1329 struct nlookupdata nd; 1330 int error; 1331 1332 KKASSERT(td->td_proc); 1333 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1334 if (error) { 1335 nlookup_done(&nd); 1336 return(error); 1337 } 1338 error = nlookup(&nd); 1339 if (error == 0) 1340 error = kern_chroot(nd.nl_ncp); 1341 nlookup_done(&nd); 1342 return(error); 1343 } 1344 1345 /* 1346 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1347 * determine whether it is legal to chdir to the vnode. The vnode's state 1348 * is not changed by this call. 1349 */ 1350 int 1351 checkvp_chdir(struct vnode *vp, struct thread *td) 1352 { 1353 int error; 1354 1355 if (vp->v_type != VDIR) 1356 error = ENOTDIR; 1357 else 1358 error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred); 1359 return (error); 1360 } 1361 1362 int 1363 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1364 { 1365 struct thread *td = curthread; 1366 struct proc *p = td->td_proc; 1367 struct lwp *lp = td->td_lwp; 1368 struct filedesc *fdp = p->p_fd; 1369 int cmode, flags; 1370 struct file *nfp; 1371 struct file *fp; 1372 struct vnode *vp; 1373 int type, indx, error; 1374 struct flock lf; 1375 1376 if ((oflags & O_ACCMODE) == O_ACCMODE) 1377 return (EINVAL); 1378 flags = FFLAGS(oflags); 1379 error = falloc(p, &nfp, NULL); 1380 if (error) 1381 return (error); 1382 fp = nfp; 1383 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1384 1385 /* 1386 * XXX p_dupfd is a real mess. It allows a device to return a 1387 * file descriptor to be duplicated rather then doing the open 1388 * itself. 1389 */ 1390 lp->lwp_dupfd = -1; 1391 1392 /* 1393 * Call vn_open() to do the lookup and assign the vnode to the 1394 * file pointer. vn_open() does not change the ref count on fp 1395 * and the vnode, on success, will be inherited by the file pointer 1396 * and unlocked. 1397 */ 1398 nd->nl_flags |= NLC_LOCKVP; 1399 error = vn_open(nd, fp, flags, cmode); 1400 nlookup_done(nd); 1401 if (error) { 1402 /* 1403 * handle special fdopen() case. bleh. dupfdopen() is 1404 * responsible for dropping the old contents of ofiles[indx] 1405 * if it succeeds. 1406 * 1407 * Note that fsetfd() will add a ref to fp which represents 1408 * the fd_files[] assignment. We must still drop our 1409 * reference. 1410 */ 1411 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1412 if (fdalloc(p, 0, &indx) == 0) { 1413 error = dupfdopen(p, indx, lp->lwp_dupfd, flags, error); 1414 if (error == 0) { 1415 *res = indx; 1416 fdrop(fp); /* our ref */ 1417 return (0); 1418 } 1419 fsetfd(p, NULL, indx); 1420 } 1421 } 1422 fdrop(fp); /* our ref */ 1423 if (error == ERESTART) 1424 error = EINTR; 1425 return (error); 1426 } 1427 1428 /* 1429 * ref the vnode for ourselves so it can't be ripped out from under 1430 * is. XXX need an ND flag to request that the vnode be returned 1431 * anyway. 1432 * 1433 * Reserve a file descriptor but do not assign it until the open 1434 * succeeds. 1435 */ 1436 vp = (struct vnode *)fp->f_data; 1437 vref(vp); 1438 if ((error = fdalloc(p, 0, &indx)) != 0) { 1439 fdrop(fp); 1440 vrele(vp); 1441 return (error); 1442 } 1443 1444 /* 1445 * If no error occurs the vp will have been assigned to the file 1446 * pointer. 1447 */ 1448 lp->lwp_dupfd = 0; 1449 1450 if (flags & (O_EXLOCK | O_SHLOCK)) { 1451 lf.l_whence = SEEK_SET; 1452 lf.l_start = 0; 1453 lf.l_len = 0; 1454 if (flags & O_EXLOCK) 1455 lf.l_type = F_WRLCK; 1456 else 1457 lf.l_type = F_RDLCK; 1458 if (flags & FNONBLOCK) 1459 type = 0; 1460 else 1461 type = F_WAIT; 1462 1463 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1464 /* 1465 * lock request failed. Clean up the reserved 1466 * descriptor. 1467 */ 1468 vrele(vp); 1469 fsetfd(p, NULL, indx); 1470 fdrop(fp); 1471 return (error); 1472 } 1473 fp->f_flag |= FHASLOCK; 1474 } 1475 #if 0 1476 /* 1477 * Assert that all regular file vnodes were created with a object. 1478 */ 1479 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1480 ("open: regular file has no backing object after vn_open")); 1481 #endif 1482 1483 vrele(vp); 1484 1485 /* 1486 * release our private reference, leaving the one associated with the 1487 * descriptor table intact. 1488 */ 1489 fsetfd(p, fp, indx); 1490 fdrop(fp); 1491 *res = indx; 1492 return (0); 1493 } 1494 1495 /* 1496 * open_args(char *path, int flags, int mode) 1497 * 1498 * Check permissions, allocate an open file structure, 1499 * and call the device open routine if any. 1500 */ 1501 int 1502 sys_open(struct open_args *uap) 1503 { 1504 struct nlookupdata nd; 1505 int error; 1506 1507 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1508 if (error == 0) { 1509 error = kern_open(&nd, uap->flags, 1510 uap->mode, &uap->sysmsg_result); 1511 } 1512 nlookup_done(&nd); 1513 return (error); 1514 } 1515 1516 int 1517 kern_mknod(struct nlookupdata *nd, int mode, int dev) 1518 { 1519 struct namecache *ncp; 1520 struct thread *td = curthread; 1521 struct proc *p = td->td_proc; 1522 struct vnode *vp; 1523 struct vattr vattr; 1524 int error; 1525 int whiteout = 0; 1526 1527 KKASSERT(p); 1528 1529 switch (mode & S_IFMT) { 1530 case S_IFCHR: 1531 case S_IFBLK: 1532 error = suser(td); 1533 break; 1534 default: 1535 error = suser_cred(p->p_ucred, PRISON_ROOT); 1536 break; 1537 } 1538 if (error) 1539 return (error); 1540 1541 bwillwrite(); 1542 nd->nl_flags |= NLC_CREATE; 1543 if ((error = nlookup(nd)) != 0) 1544 return (error); 1545 ncp = nd->nl_ncp; 1546 if (ncp->nc_vp) 1547 return (EEXIST); 1548 if ((error = ncp_writechk(ncp)) != 0) 1549 return (error); 1550 1551 VATTR_NULL(&vattr); 1552 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1553 vattr.va_rdev = dev; 1554 whiteout = 0; 1555 1556 switch (mode & S_IFMT) { 1557 case S_IFMT: /* used by badsect to flag bad sectors */ 1558 vattr.va_type = VBAD; 1559 break; 1560 case S_IFCHR: 1561 vattr.va_type = VCHR; 1562 break; 1563 case S_IFBLK: 1564 vattr.va_type = VBLK; 1565 break; 1566 case S_IFWHT: 1567 whiteout = 1; 1568 break; 1569 default: 1570 error = EINVAL; 1571 break; 1572 } 1573 if (error == 0) { 1574 if (whiteout) { 1575 error = VOP_NWHITEOUT(ncp, nd->nl_cred, NAMEI_CREATE); 1576 } else { 1577 vp = NULL; 1578 error = VOP_NMKNOD(ncp, &vp, nd->nl_cred, &vattr); 1579 if (error == 0) 1580 vput(vp); 1581 } 1582 } 1583 return (error); 1584 } 1585 1586 /* 1587 * mknod_args(char *path, int mode, int dev) 1588 * 1589 * Create a special file. 1590 */ 1591 int 1592 sys_mknod(struct mknod_args *uap) 1593 { 1594 struct nlookupdata nd; 1595 int error; 1596 1597 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1598 if (error == 0) 1599 error = kern_mknod(&nd, uap->mode, uap->dev); 1600 nlookup_done(&nd); 1601 return (error); 1602 } 1603 1604 int 1605 kern_mkfifo(struct nlookupdata *nd, int mode) 1606 { 1607 struct namecache *ncp; 1608 struct thread *td = curthread; 1609 struct proc *p = td->td_proc; 1610 struct vattr vattr; 1611 struct vnode *vp; 1612 int error; 1613 1614 bwillwrite(); 1615 1616 nd->nl_flags |= NLC_CREATE; 1617 if ((error = nlookup(nd)) != 0) 1618 return (error); 1619 ncp = nd->nl_ncp; 1620 if (ncp->nc_vp) 1621 return (EEXIST); 1622 if ((error = ncp_writechk(ncp)) != 0) 1623 return (error); 1624 1625 VATTR_NULL(&vattr); 1626 vattr.va_type = VFIFO; 1627 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1628 vp = NULL; 1629 error = VOP_NMKNOD(ncp, &vp, nd->nl_cred, &vattr); 1630 if (error == 0) 1631 vput(vp); 1632 return (error); 1633 } 1634 1635 /* 1636 * mkfifo_args(char *path, int mode) 1637 * 1638 * Create a named pipe. 1639 */ 1640 int 1641 sys_mkfifo(struct mkfifo_args *uap) 1642 { 1643 struct nlookupdata nd; 1644 int error; 1645 1646 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1647 if (error == 0) 1648 error = kern_mkfifo(&nd, uap->mode); 1649 nlookup_done(&nd); 1650 return (error); 1651 } 1652 1653 static int hardlink_check_uid = 0; 1654 SYSCTL_INT(_kern, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1655 &hardlink_check_uid, 0, 1656 "Unprivileged processes cannot create hard links to files owned by other " 1657 "users"); 1658 static int hardlink_check_gid = 0; 1659 SYSCTL_INT(_kern, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1660 &hardlink_check_gid, 0, 1661 "Unprivileged processes cannot create hard links to files owned by other " 1662 "groups"); 1663 1664 static int 1665 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 1666 { 1667 struct vattr va; 1668 int error; 1669 1670 /* 1671 * Shortcut if disabled 1672 */ 1673 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 1674 return (0); 1675 1676 /* 1677 * root cred can always hardlink 1678 */ 1679 if (suser_cred(cred, PRISON_ROOT) == 0) 1680 return (0); 1681 1682 /* 1683 * Otherwise only if the originating file is owned by the 1684 * same user or group. Note that any group is allowed if 1685 * the file is owned by the caller. 1686 */ 1687 error = VOP_GETATTR(vp, &va); 1688 if (error != 0) 1689 return (error); 1690 1691 if (hardlink_check_uid) { 1692 if (cred->cr_uid != va.va_uid) 1693 return (EPERM); 1694 } 1695 1696 if (hardlink_check_gid) { 1697 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 1698 return (EPERM); 1699 } 1700 1701 return (0); 1702 } 1703 1704 int 1705 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 1706 { 1707 struct thread *td = curthread; 1708 struct vnode *vp; 1709 int error; 1710 1711 /* 1712 * Lookup the source and obtained a locked vnode. 1713 * 1714 * XXX relookup on vget failure / race ? 1715 */ 1716 bwillwrite(); 1717 if ((error = nlookup(nd)) != 0) 1718 return (error); 1719 vp = nd->nl_ncp->nc_vp; 1720 KKASSERT(vp != NULL); 1721 if (vp->v_type == VDIR) 1722 return (EPERM); /* POSIX */ 1723 if ((error = ncp_writechk(nd->nl_ncp)) != 0) 1724 return (error); 1725 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 1726 return (error); 1727 1728 /* 1729 * Unlock the source so we can lookup the target without deadlocking 1730 * (XXX vp is locked already, possible other deadlock?). The target 1731 * must not exist. 1732 */ 1733 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 1734 nd->nl_flags &= ~NLC_NCPISLOCKED; 1735 cache_unlock(nd->nl_ncp); 1736 1737 linknd->nl_flags |= NLC_CREATE; 1738 if ((error = nlookup(linknd)) != 0) { 1739 vput(vp); 1740 return (error); 1741 } 1742 if (linknd->nl_ncp->nc_vp) { 1743 vput(vp); 1744 return (EEXIST); 1745 } 1746 1747 /* 1748 * Finally run the new API VOP. 1749 */ 1750 error = can_hardlink(vp, td, td->td_proc->p_ucred); 1751 if (error == 0) 1752 error = VOP_NLINK(linknd->nl_ncp, vp, linknd->nl_cred); 1753 vput(vp); 1754 return (error); 1755 } 1756 1757 /* 1758 * link_args(char *path, char *link) 1759 * 1760 * Make a hard file link. 1761 */ 1762 int 1763 sys_link(struct link_args *uap) 1764 { 1765 struct nlookupdata nd, linknd; 1766 int error; 1767 1768 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1769 if (error == 0) { 1770 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 1771 if (error == 0) 1772 error = kern_link(&nd, &linknd); 1773 nlookup_done(&linknd); 1774 } 1775 nlookup_done(&nd); 1776 return (error); 1777 } 1778 1779 int 1780 kern_symlink(struct nlookupdata *nd, char *path, int mode) 1781 { 1782 struct namecache *ncp; 1783 struct vattr vattr; 1784 struct vnode *vp; 1785 int error; 1786 1787 bwillwrite(); 1788 nd->nl_flags |= NLC_CREATE; 1789 if ((error = nlookup(nd)) != 0) 1790 return (error); 1791 ncp = nd->nl_ncp; 1792 if (ncp->nc_vp) 1793 return (EEXIST); 1794 if ((error = ncp_writechk(ncp)) != 0) 1795 return (error); 1796 VATTR_NULL(&vattr); 1797 vattr.va_mode = mode; 1798 error = VOP_NSYMLINK(ncp, &vp, nd->nl_cred, &vattr, path); 1799 if (error == 0) 1800 vput(vp); 1801 return (error); 1802 } 1803 1804 /* 1805 * symlink(char *path, char *link) 1806 * 1807 * Make a symbolic link. 1808 */ 1809 int 1810 sys_symlink(struct symlink_args *uap) 1811 { 1812 struct thread *td = curthread; 1813 struct nlookupdata nd; 1814 char *path; 1815 int error; 1816 int mode; 1817 1818 path = objcache_get(namei_oc, M_WAITOK); 1819 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1820 if (error == 0) { 1821 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 1822 if (error == 0) { 1823 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 1824 error = kern_symlink(&nd, path, mode); 1825 } 1826 nlookup_done(&nd); 1827 } 1828 objcache_put(namei_oc, path); 1829 return (error); 1830 } 1831 1832 /* 1833 * undelete_args(char *path) 1834 * 1835 * Delete a whiteout from the filesystem. 1836 */ 1837 /* ARGSUSED */ 1838 int 1839 sys_undelete(struct undelete_args *uap) 1840 { 1841 struct nlookupdata nd; 1842 int error; 1843 1844 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1845 bwillwrite(); 1846 nd.nl_flags |= NLC_DELETE; 1847 if (error == 0) 1848 error = nlookup(&nd); 1849 if (error == 0) 1850 error = ncp_writechk(nd.nl_ncp); 1851 if (error == 0) 1852 error = VOP_NWHITEOUT(nd.nl_ncp, nd.nl_cred, NAMEI_DELETE); 1853 nlookup_done(&nd); 1854 return (error); 1855 } 1856 1857 int 1858 kern_unlink(struct nlookupdata *nd) 1859 { 1860 struct namecache *ncp; 1861 int error; 1862 1863 bwillwrite(); 1864 nd->nl_flags |= NLC_DELETE; 1865 if ((error = nlookup(nd)) != 0) 1866 return (error); 1867 ncp = nd->nl_ncp; 1868 if ((error = ncp_writechk(ncp)) != 0) 1869 return (error); 1870 error = VOP_NREMOVE(ncp, nd->nl_cred); 1871 return (error); 1872 } 1873 1874 /* 1875 * unlink_args(char *path) 1876 * 1877 * Delete a name from the filesystem. 1878 */ 1879 int 1880 sys_unlink(struct unlink_args *uap) 1881 { 1882 struct nlookupdata nd; 1883 int error; 1884 1885 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1886 if (error == 0) 1887 error = kern_unlink(&nd); 1888 nlookup_done(&nd); 1889 return (error); 1890 } 1891 1892 int 1893 kern_lseek(int fd, off_t offset, int whence, off_t *res) 1894 { 1895 struct thread *td = curthread; 1896 struct proc *p = td->td_proc; 1897 struct file *fp; 1898 struct vattr vattr; 1899 int error; 1900 1901 fp = holdfp(p->p_fd, fd, -1); 1902 if (fp == NULL) 1903 return (EBADF); 1904 if (fp->f_type != DTYPE_VNODE) { 1905 error = ESPIPE; 1906 goto done; 1907 } 1908 1909 switch (whence) { 1910 case L_INCR: 1911 fp->f_offset += offset; 1912 error = 0; 1913 break; 1914 case L_XTND: 1915 error = VOP_GETATTR((struct vnode *)fp->f_data, &vattr); 1916 if (error == 0) 1917 fp->f_offset = offset + vattr.va_size; 1918 break; 1919 case L_SET: 1920 fp->f_offset = offset; 1921 error = 0; 1922 break; 1923 default: 1924 error = EINVAL; 1925 break; 1926 } 1927 *res = fp->f_offset; 1928 done: 1929 fdrop(fp); 1930 return (error); 1931 } 1932 1933 /* 1934 * lseek_args(int fd, int pad, off_t offset, int whence) 1935 * 1936 * Reposition read/write file offset. 1937 */ 1938 int 1939 sys_lseek(struct lseek_args *uap) 1940 { 1941 int error; 1942 1943 error = kern_lseek(uap->fd, uap->offset, uap->whence, 1944 &uap->sysmsg_offset); 1945 1946 return (error); 1947 } 1948 1949 int 1950 kern_access(struct nlookupdata *nd, int aflags) 1951 { 1952 struct vnode *vp; 1953 int error, flags; 1954 1955 if ((error = nlookup(nd)) != 0) 1956 return (error); 1957 retry: 1958 error = cache_vget(nd->nl_ncp, nd->nl_cred, LK_EXCLUSIVE, &vp); 1959 if (error) 1960 return (error); 1961 1962 /* Flags == 0 means only check for existence. */ 1963 if (aflags) { 1964 flags = 0; 1965 if (aflags & R_OK) 1966 flags |= VREAD; 1967 if (aflags & W_OK) 1968 flags |= VWRITE; 1969 if (aflags & X_OK) 1970 flags |= VEXEC; 1971 if ((flags & VWRITE) == 0 || 1972 (error = vn_writechk(vp, nd->nl_ncp)) == 0) 1973 error = VOP_ACCESS(vp, flags, nd->nl_cred); 1974 1975 /* 1976 * If the file handle is stale we have to re-resolve the 1977 * entry. This is a hack at the moment. 1978 */ 1979 if (error == ESTALE) { 1980 cache_setunresolved(nd->nl_ncp); 1981 error = cache_resolve(nd->nl_ncp, nd->nl_cred); 1982 if (error == 0) { 1983 vput(vp); 1984 vp = NULL; 1985 goto retry; 1986 } 1987 } 1988 } 1989 vput(vp); 1990 return (error); 1991 } 1992 1993 /* 1994 * access_args(char *path, int flags) 1995 * 1996 * Check access permissions. 1997 */ 1998 int 1999 sys_access(struct access_args *uap) 2000 { 2001 struct nlookupdata nd; 2002 int error; 2003 2004 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2005 if (error == 0) 2006 error = kern_access(&nd, uap->flags); 2007 nlookup_done(&nd); 2008 return (error); 2009 } 2010 2011 int 2012 kern_stat(struct nlookupdata *nd, struct stat *st) 2013 { 2014 int error; 2015 struct vnode *vp; 2016 thread_t td; 2017 2018 if ((error = nlookup(nd)) != 0) 2019 return (error); 2020 again: 2021 if ((vp = nd->nl_ncp->nc_vp) == NULL) 2022 return (ENOENT); 2023 2024 td = curthread; 2025 if ((error = vget(vp, LK_SHARED)) != 0) 2026 return (error); 2027 error = vn_stat(vp, st, nd->nl_cred); 2028 2029 /* 2030 * If the file handle is stale we have to re-resolve the entry. This 2031 * is a hack at the moment. 2032 */ 2033 if (error == ESTALE) { 2034 cache_setunresolved(nd->nl_ncp); 2035 error = cache_resolve(nd->nl_ncp, nd->nl_cred); 2036 if (error == 0) { 2037 vput(vp); 2038 goto again; 2039 } 2040 } 2041 vput(vp); 2042 return (error); 2043 } 2044 2045 /* 2046 * stat_args(char *path, struct stat *ub) 2047 * 2048 * Get file status; this version follows links. 2049 */ 2050 int 2051 sys_stat(struct stat_args *uap) 2052 { 2053 struct nlookupdata nd; 2054 struct stat st; 2055 int error; 2056 2057 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2058 if (error == 0) { 2059 error = kern_stat(&nd, &st); 2060 if (error == 0) 2061 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2062 } 2063 nlookup_done(&nd); 2064 return (error); 2065 } 2066 2067 /* 2068 * lstat_args(char *path, struct stat *ub) 2069 * 2070 * Get file status; this version does not follow links. 2071 */ 2072 int 2073 sys_lstat(struct lstat_args *uap) 2074 { 2075 struct nlookupdata nd; 2076 struct stat st; 2077 int error; 2078 2079 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2080 if (error == 0) { 2081 error = kern_stat(&nd, &st); 2082 if (error == 0) 2083 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2084 } 2085 nlookup_done(&nd); 2086 return (error); 2087 } 2088 2089 /* 2090 * pathconf_Args(char *path, int name) 2091 * 2092 * Get configurable pathname variables. 2093 */ 2094 /* ARGSUSED */ 2095 int 2096 sys_pathconf(struct pathconf_args *uap) 2097 { 2098 struct nlookupdata nd; 2099 struct vnode *vp; 2100 int error; 2101 2102 vp = NULL; 2103 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2104 if (error == 0) 2105 error = nlookup(&nd); 2106 if (error == 0) 2107 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 2108 nlookup_done(&nd); 2109 if (error == 0) { 2110 error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds); 2111 vput(vp); 2112 } 2113 return (error); 2114 } 2115 2116 /* 2117 * XXX: daver 2118 * kern_readlink isn't properly split yet. There is a copyin burried 2119 * in VOP_READLINK(). 2120 */ 2121 int 2122 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2123 { 2124 struct thread *td = curthread; 2125 struct proc *p = td->td_proc; 2126 struct vnode *vp; 2127 struct iovec aiov; 2128 struct uio auio; 2129 int error; 2130 2131 if ((error = nlookup(nd)) != 0) 2132 return (error); 2133 error = cache_vget(nd->nl_ncp, nd->nl_cred, LK_EXCLUSIVE, &vp); 2134 if (error) 2135 return (error); 2136 if (vp->v_type != VLNK) { 2137 error = EINVAL; 2138 } else { 2139 aiov.iov_base = buf; 2140 aiov.iov_len = count; 2141 auio.uio_iov = &aiov; 2142 auio.uio_iovcnt = 1; 2143 auio.uio_offset = 0; 2144 auio.uio_rw = UIO_READ; 2145 auio.uio_segflg = UIO_USERSPACE; 2146 auio.uio_td = td; 2147 auio.uio_resid = count; 2148 error = VOP_READLINK(vp, &auio, p->p_ucred); 2149 } 2150 vput(vp); 2151 *res = count - auio.uio_resid; 2152 return (error); 2153 } 2154 2155 /* 2156 * readlink_args(char *path, char *buf, int count) 2157 * 2158 * Return target name of a symbolic link. 2159 */ 2160 int 2161 sys_readlink(struct readlink_args *uap) 2162 { 2163 struct nlookupdata nd; 2164 int error; 2165 2166 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2167 if (error == 0) { 2168 error = kern_readlink(&nd, uap->buf, uap->count, 2169 &uap->sysmsg_result); 2170 } 2171 nlookup_done(&nd); 2172 return (error); 2173 } 2174 2175 static int 2176 setfflags(struct vnode *vp, int flags) 2177 { 2178 struct thread *td = curthread; 2179 struct proc *p = td->td_proc; 2180 int error; 2181 struct vattr vattr; 2182 2183 /* 2184 * Prevent non-root users from setting flags on devices. When 2185 * a device is reused, users can retain ownership of the device 2186 * if they are allowed to set flags and programs assume that 2187 * chown can't fail when done as root. 2188 */ 2189 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2190 ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)) 2191 return (error); 2192 2193 /* 2194 * note: vget is required for any operation that might mod the vnode 2195 * so VINACTIVE is properly cleared. 2196 */ 2197 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2198 VATTR_NULL(&vattr); 2199 vattr.va_flags = flags; 2200 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2201 vput(vp); 2202 } 2203 return (error); 2204 } 2205 2206 /* 2207 * chflags(char *path, int flags) 2208 * 2209 * Change flags of a file given a path name. 2210 */ 2211 /* ARGSUSED */ 2212 int 2213 sys_chflags(struct chflags_args *uap) 2214 { 2215 struct nlookupdata nd; 2216 struct vnode *vp; 2217 int error; 2218 2219 vp = NULL; 2220 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2221 /* XXX Add NLC flag indicating modifying operation? */ 2222 if (error == 0) 2223 error = nlookup(&nd); 2224 if (error == 0) 2225 error = ncp_writechk(nd.nl_ncp); 2226 if (error == 0) 2227 error = cache_vref(nd.nl_ncp, nd.nl_cred, &vp); 2228 nlookup_done(&nd); 2229 if (error == 0) { 2230 error = setfflags(vp, uap->flags); 2231 vrele(vp); 2232 } 2233 return (error); 2234 } 2235 2236 /* 2237 * fchflags_args(int fd, int flags) 2238 * 2239 * Change flags of a file given a file descriptor. 2240 */ 2241 /* ARGSUSED */ 2242 int 2243 sys_fchflags(struct fchflags_args *uap) 2244 { 2245 struct thread *td = curthread; 2246 struct proc *p = td->td_proc; 2247 struct file *fp; 2248 int error; 2249 2250 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2251 return (error); 2252 if (fp->f_ncp) 2253 error = ncp_writechk(fp->f_ncp); 2254 if (error == 0) 2255 error = setfflags((struct vnode *) fp->f_data, uap->flags); 2256 fdrop(fp); 2257 return (error); 2258 } 2259 2260 static int 2261 setfmode(struct vnode *vp, int mode) 2262 { 2263 struct thread *td = curthread; 2264 struct proc *p = td->td_proc; 2265 int error; 2266 struct vattr vattr; 2267 2268 /* 2269 * note: vget is required for any operation that might mod the vnode 2270 * so VINACTIVE is properly cleared. 2271 */ 2272 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2273 VATTR_NULL(&vattr); 2274 vattr.va_mode = mode & ALLPERMS; 2275 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2276 vput(vp); 2277 } 2278 return error; 2279 } 2280 2281 int 2282 kern_chmod(struct nlookupdata *nd, int mode) 2283 { 2284 struct vnode *vp; 2285 int error; 2286 2287 /* XXX Add NLC flag indicating modifying operation? */ 2288 if ((error = nlookup(nd)) != 0) 2289 return (error); 2290 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2291 return (error); 2292 if ((error = ncp_writechk(nd->nl_ncp)) == 0) 2293 error = setfmode(vp, mode); 2294 vrele(vp); 2295 return (error); 2296 } 2297 2298 /* 2299 * chmod_args(char *path, int mode) 2300 * 2301 * Change mode of a file given path name. 2302 */ 2303 /* ARGSUSED */ 2304 int 2305 sys_chmod(struct chmod_args *uap) 2306 { 2307 struct nlookupdata nd; 2308 int error; 2309 2310 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2311 if (error == 0) 2312 error = kern_chmod(&nd, uap->mode); 2313 nlookup_done(&nd); 2314 return (error); 2315 } 2316 2317 /* 2318 * lchmod_args(char *path, int mode) 2319 * 2320 * Change mode of a file given path name (don't follow links.) 2321 */ 2322 /* ARGSUSED */ 2323 int 2324 sys_lchmod(struct lchmod_args *uap) 2325 { 2326 struct nlookupdata nd; 2327 int error; 2328 2329 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2330 if (error == 0) 2331 error = kern_chmod(&nd, uap->mode); 2332 nlookup_done(&nd); 2333 return (error); 2334 } 2335 2336 /* 2337 * fchmod_args(int fd, int mode) 2338 * 2339 * Change mode of a file given a file descriptor. 2340 */ 2341 /* ARGSUSED */ 2342 int 2343 sys_fchmod(struct fchmod_args *uap) 2344 { 2345 struct thread *td = curthread; 2346 struct proc *p = td->td_proc; 2347 struct file *fp; 2348 int error; 2349 2350 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2351 return (error); 2352 if (fp->f_ncp) 2353 error = ncp_writechk(fp->f_ncp); 2354 if (error == 0) 2355 error = setfmode((struct vnode *)fp->f_data, uap->mode); 2356 fdrop(fp); 2357 return (error); 2358 } 2359 2360 static int 2361 setfown(struct vnode *vp, uid_t uid, gid_t gid) 2362 { 2363 struct thread *td = curthread; 2364 struct proc *p = td->td_proc; 2365 int error; 2366 struct vattr vattr; 2367 2368 /* 2369 * note: vget is required for any operation that might mod the vnode 2370 * so VINACTIVE is properly cleared. 2371 */ 2372 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2373 VATTR_NULL(&vattr); 2374 vattr.va_uid = uid; 2375 vattr.va_gid = gid; 2376 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2377 vput(vp); 2378 } 2379 return error; 2380 } 2381 2382 int 2383 kern_chown(struct nlookupdata *nd, int uid, int gid) 2384 { 2385 struct vnode *vp; 2386 int error; 2387 2388 /* XXX Add NLC flag indicating modifying operation? */ 2389 if ((error = nlookup(nd)) != 0) 2390 return (error); 2391 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2392 return (error); 2393 if ((error = ncp_writechk(nd->nl_ncp)) == 0) 2394 error = setfown(vp, uid, gid); 2395 vrele(vp); 2396 return (error); 2397 } 2398 2399 /* 2400 * chown(char *path, int uid, int gid) 2401 * 2402 * Set ownership given a path name. 2403 */ 2404 int 2405 sys_chown(struct chown_args *uap) 2406 { 2407 struct nlookupdata nd; 2408 int error; 2409 2410 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2411 if (error == 0) 2412 error = kern_chown(&nd, uap->uid, uap->gid); 2413 nlookup_done(&nd); 2414 return (error); 2415 } 2416 2417 /* 2418 * lchown_args(char *path, int uid, int gid) 2419 * 2420 * Set ownership given a path name, do not cross symlinks. 2421 */ 2422 int 2423 sys_lchown(struct lchown_args *uap) 2424 { 2425 struct nlookupdata nd; 2426 int error; 2427 2428 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2429 if (error == 0) 2430 error = kern_chown(&nd, uap->uid, uap->gid); 2431 nlookup_done(&nd); 2432 return (error); 2433 } 2434 2435 /* 2436 * fchown_args(int fd, int uid, int gid) 2437 * 2438 * Set ownership given a file descriptor. 2439 */ 2440 /* ARGSUSED */ 2441 int 2442 sys_fchown(struct fchown_args *uap) 2443 { 2444 struct thread *td = curthread; 2445 struct proc *p = td->td_proc; 2446 struct file *fp; 2447 int error; 2448 2449 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2450 return (error); 2451 if (fp->f_ncp) 2452 error = ncp_writechk(fp->f_ncp); 2453 if (error == 0) 2454 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid); 2455 fdrop(fp); 2456 return (error); 2457 } 2458 2459 static int 2460 getutimes(const struct timeval *tvp, struct timespec *tsp) 2461 { 2462 struct timeval tv[2]; 2463 2464 if (tvp == NULL) { 2465 microtime(&tv[0]); 2466 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 2467 tsp[1] = tsp[0]; 2468 } else { 2469 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2470 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2471 } 2472 return 0; 2473 } 2474 2475 static int 2476 setutimes(struct vnode *vp, const struct timespec *ts, int nullflag) 2477 { 2478 struct thread *td = curthread; 2479 struct proc *p = td->td_proc; 2480 int error; 2481 struct vattr vattr; 2482 2483 /* 2484 * note: vget is required for any operation that might mod the vnode 2485 * so VINACTIVE is properly cleared. 2486 */ 2487 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2488 VATTR_NULL(&vattr); 2489 vattr.va_atime = ts[0]; 2490 vattr.va_mtime = ts[1]; 2491 if (nullflag) 2492 vattr.va_vaflags |= VA_UTIMES_NULL; 2493 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2494 vput(vp); 2495 } 2496 return error; 2497 } 2498 2499 int 2500 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 2501 { 2502 struct timespec ts[2]; 2503 struct vnode *vp; 2504 int error; 2505 2506 if ((error = getutimes(tptr, ts)) != 0) 2507 return (error); 2508 /* XXX Add NLC flag indicating modifying operation? */ 2509 if ((error = nlookup(nd)) != 0) 2510 return (error); 2511 if ((error = ncp_writechk(nd->nl_ncp)) != 0) 2512 return (error); 2513 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2514 return (error); 2515 error = setutimes(vp, ts, tptr == NULL); 2516 vrele(vp); 2517 return (error); 2518 } 2519 2520 /* 2521 * utimes_args(char *path, struct timeval *tptr) 2522 * 2523 * Set the access and modification times of a file. 2524 */ 2525 int 2526 sys_utimes(struct utimes_args *uap) 2527 { 2528 struct timeval tv[2]; 2529 struct nlookupdata nd; 2530 int error; 2531 2532 if (uap->tptr) { 2533 error = copyin(uap->tptr, tv, sizeof(tv)); 2534 if (error) 2535 return (error); 2536 } 2537 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2538 if (error == 0) 2539 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2540 nlookup_done(&nd); 2541 return (error); 2542 } 2543 2544 /* 2545 * lutimes_args(char *path, struct timeval *tptr) 2546 * 2547 * Set the access and modification times of a file. 2548 */ 2549 int 2550 sys_lutimes(struct lutimes_args *uap) 2551 { 2552 struct timeval tv[2]; 2553 struct nlookupdata nd; 2554 int error; 2555 2556 if (uap->tptr) { 2557 error = copyin(uap->tptr, tv, sizeof(tv)); 2558 if (error) 2559 return (error); 2560 } 2561 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2562 if (error == 0) 2563 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2564 nlookup_done(&nd); 2565 return (error); 2566 } 2567 2568 int 2569 kern_futimes(int fd, struct timeval *tptr) 2570 { 2571 struct thread *td = curthread; 2572 struct proc *p = td->td_proc; 2573 struct timespec ts[2]; 2574 struct file *fp; 2575 int error; 2576 2577 error = getutimes(tptr, ts); 2578 if (error) 2579 return (error); 2580 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 2581 return (error); 2582 if (fp->f_ncp) 2583 error = ncp_writechk(fp->f_ncp); 2584 if (error == 0) 2585 error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL); 2586 fdrop(fp); 2587 return (error); 2588 } 2589 2590 /* 2591 * futimes_args(int fd, struct timeval *tptr) 2592 * 2593 * Set the access and modification times of a file. 2594 */ 2595 int 2596 sys_futimes(struct futimes_args *uap) 2597 { 2598 struct timeval tv[2]; 2599 int error; 2600 2601 if (uap->tptr) { 2602 error = copyin(uap->tptr, tv, sizeof(tv)); 2603 if (error) 2604 return (error); 2605 } 2606 2607 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 2608 2609 return (error); 2610 } 2611 2612 int 2613 kern_truncate(struct nlookupdata *nd, off_t length) 2614 { 2615 struct vnode *vp; 2616 struct vattr vattr; 2617 int error; 2618 2619 if (length < 0) 2620 return(EINVAL); 2621 /* XXX Add NLC flag indicating modifying operation? */ 2622 if ((error = nlookup(nd)) != 0) 2623 return (error); 2624 if ((error = ncp_writechk(nd->nl_ncp)) != 0) 2625 return (error); 2626 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2627 return (error); 2628 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 2629 vrele(vp); 2630 return (error); 2631 } 2632 if (vp->v_type == VDIR) { 2633 error = EISDIR; 2634 } else if ((error = vn_writechk(vp, nd->nl_ncp)) == 0 && 2635 (error = VOP_ACCESS(vp, VWRITE, nd->nl_cred)) == 0) { 2636 VATTR_NULL(&vattr); 2637 vattr.va_size = length; 2638 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 2639 } 2640 vput(vp); 2641 return (error); 2642 } 2643 2644 /* 2645 * truncate(char *path, int pad, off_t length) 2646 * 2647 * Truncate a file given its path name. 2648 */ 2649 int 2650 sys_truncate(struct truncate_args *uap) 2651 { 2652 struct nlookupdata nd; 2653 int error; 2654 2655 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2656 if (error == 0) 2657 error = kern_truncate(&nd, uap->length); 2658 nlookup_done(&nd); 2659 return error; 2660 } 2661 2662 int 2663 kern_ftruncate(int fd, off_t length) 2664 { 2665 struct thread *td = curthread; 2666 struct proc *p = td->td_proc; 2667 struct vattr vattr; 2668 struct vnode *vp; 2669 struct file *fp; 2670 int error; 2671 2672 if (length < 0) 2673 return(EINVAL); 2674 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 2675 return (error); 2676 if (fp->f_ncp) { 2677 error = ncp_writechk(fp->f_ncp); 2678 if (error) 2679 goto done; 2680 } 2681 if ((fp->f_flag & FWRITE) == 0) { 2682 error = EINVAL; 2683 goto done; 2684 } 2685 vp = (struct vnode *)fp->f_data; 2686 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2687 if (vp->v_type == VDIR) { 2688 error = EISDIR; 2689 } else if ((error = vn_writechk(vp, NULL)) == 0) { 2690 VATTR_NULL(&vattr); 2691 vattr.va_size = length; 2692 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 2693 } 2694 vn_unlock(vp); 2695 done: 2696 fdrop(fp); 2697 return (error); 2698 } 2699 2700 /* 2701 * ftruncate_args(int fd, int pad, off_t length) 2702 * 2703 * Truncate a file given a file descriptor. 2704 */ 2705 int 2706 sys_ftruncate(struct ftruncate_args *uap) 2707 { 2708 int error; 2709 2710 error = kern_ftruncate(uap->fd, uap->length); 2711 2712 return (error); 2713 } 2714 2715 /* 2716 * fsync(int fd) 2717 * 2718 * Sync an open file. 2719 */ 2720 /* ARGSUSED */ 2721 int 2722 sys_fsync(struct fsync_args *uap) 2723 { 2724 struct thread *td = curthread; 2725 struct proc *p = td->td_proc; 2726 struct vnode *vp; 2727 struct file *fp; 2728 vm_object_t obj; 2729 int error; 2730 2731 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2732 return (error); 2733 vp = (struct vnode *)fp->f_data; 2734 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2735 if ((obj = vp->v_object) != NULL) 2736 vm_object_page_clean(obj, 0, 0, 0); 2737 if ((error = VOP_FSYNC(vp, MNT_WAIT)) == 0 && 2738 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP) && 2739 bioops.io_fsync) { 2740 error = (*bioops.io_fsync)(vp); 2741 } 2742 vn_unlock(vp); 2743 fdrop(fp); 2744 return (error); 2745 } 2746 2747 int 2748 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 2749 { 2750 struct namecache *fncpd; 2751 struct namecache *tncpd; 2752 struct namecache *ncp; 2753 struct mount *mp; 2754 int error; 2755 2756 bwillwrite(); 2757 if ((error = nlookup(fromnd)) != 0) 2758 return (error); 2759 if ((fncpd = fromnd->nl_ncp->nc_parent) == NULL) 2760 return (ENOENT); 2761 cache_hold(fncpd); 2762 2763 /* 2764 * unlock the source ncp so we can lookup the target ncp without 2765 * deadlocking. The target may or may not exist so we do not check 2766 * for a target vp like kern_mkdir() and other creation functions do. 2767 * 2768 * The source and target directories are ref'd and rechecked after 2769 * everything is relocked to determine if the source or target file 2770 * has been renamed. 2771 */ 2772 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 2773 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 2774 cache_unlock(fromnd->nl_ncp); 2775 2776 tond->nl_flags |= NLC_CREATE; 2777 if ((error = nlookup(tond)) != 0) { 2778 cache_drop(fncpd); 2779 return (error); 2780 } 2781 if ((tncpd = tond->nl_ncp->nc_parent) == NULL) { 2782 cache_drop(fncpd); 2783 return (ENOENT); 2784 } 2785 cache_hold(tncpd); 2786 2787 /* 2788 * If the source and target are the same there is nothing to do 2789 */ 2790 if (fromnd->nl_ncp == tond->nl_ncp) { 2791 cache_drop(fncpd); 2792 cache_drop(tncpd); 2793 return (0); 2794 } 2795 2796 /* 2797 * Mount points cannot be renamed or overwritten 2798 */ 2799 if ((fromnd->nl_ncp->nc_flag | tond->nl_ncp->nc_flag) & 2800 (NCF_MOUNTPT|NCF_MOUNTEDHERE) 2801 ) { 2802 cache_drop(fncpd); 2803 cache_drop(tncpd); 2804 return (EINVAL); 2805 } 2806 2807 /* 2808 * relock the source ncp. NOTE AFTER RELOCKING: the source ncp 2809 * may have become invalid while it was unlocked, nc_vp and nc_mount 2810 * could be NULL. 2811 */ 2812 if (cache_lock_nonblock(fromnd->nl_ncp) == 0) { 2813 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred); 2814 } else if (fromnd->nl_ncp > tond->nl_ncp) { 2815 cache_lock(fromnd->nl_ncp); 2816 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred); 2817 } else { 2818 cache_unlock(tond->nl_ncp); 2819 cache_lock(fromnd->nl_ncp); 2820 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred); 2821 cache_lock(tond->nl_ncp); 2822 cache_resolve(tond->nl_ncp, tond->nl_cred); 2823 } 2824 fromnd->nl_flags |= NLC_NCPISLOCKED; 2825 2826 /* 2827 * make sure the parent directories linkages are the same 2828 */ 2829 if (fncpd != fromnd->nl_ncp->nc_parent || 2830 tncpd != tond->nl_ncp->nc_parent) { 2831 cache_drop(fncpd); 2832 cache_drop(tncpd); 2833 return (ENOENT); 2834 } 2835 2836 /* 2837 * Both the source and target must be within the same filesystem and 2838 * in the same filesystem as their parent directories within the 2839 * namecache topology. 2840 * 2841 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 2842 */ 2843 mp = fncpd->nc_mount; 2844 if (mp != tncpd->nc_mount || mp != fromnd->nl_ncp->nc_mount || 2845 mp != tond->nl_ncp->nc_mount) { 2846 cache_drop(fncpd); 2847 cache_drop(tncpd); 2848 return (EXDEV); 2849 } 2850 2851 /* 2852 * Make sure the mount point is writable 2853 */ 2854 if ((error = ncp_writechk(tond->nl_ncp)) != 0) { 2855 cache_drop(fncpd); 2856 cache_drop(tncpd); 2857 return (error); 2858 } 2859 2860 /* 2861 * If the target exists and either the source or target is a directory, 2862 * then both must be directories. 2863 * 2864 * Due to relocking of the source, fromnd->nl_ncp->nc_vp might have 2865 * become NULL. 2866 */ 2867 if (tond->nl_ncp->nc_vp) { 2868 if (fromnd->nl_ncp->nc_vp == NULL) { 2869 error = ENOENT; 2870 } else if (fromnd->nl_ncp->nc_vp->v_type == VDIR) { 2871 if (tond->nl_ncp->nc_vp->v_type != VDIR) 2872 error = ENOTDIR; 2873 } else if (tond->nl_ncp->nc_vp->v_type == VDIR) { 2874 error = EISDIR; 2875 } 2876 } 2877 2878 /* 2879 * You cannot rename a source into itself or a subdirectory of itself. 2880 * We check this by travsersing the target directory upwards looking 2881 * for a match against the source. 2882 */ 2883 if (error == 0) { 2884 for (ncp = tncpd; ncp; ncp = ncp->nc_parent) { 2885 if (fromnd->nl_ncp == ncp) { 2886 error = EINVAL; 2887 break; 2888 } 2889 } 2890 } 2891 2892 cache_drop(fncpd); 2893 cache_drop(tncpd); 2894 2895 /* 2896 * Even though the namespaces are different, they may still represent 2897 * hardlinks to the same file. The filesystem might have a hard time 2898 * with this so we issue a NREMOVE of the source instead of a NRENAME 2899 * when we detect the situation. 2900 */ 2901 if (error == 0) { 2902 if (fromnd->nl_ncp->nc_vp == tond->nl_ncp->nc_vp) { 2903 error = VOP_NREMOVE(fromnd->nl_ncp, fromnd->nl_cred); 2904 } else { 2905 error = VOP_NRENAME(fromnd->nl_ncp, tond->nl_ncp, 2906 tond->nl_cred); 2907 } 2908 } 2909 return (error); 2910 } 2911 2912 /* 2913 * rename_args(char *from, char *to) 2914 * 2915 * Rename files. Source and destination must either both be directories, 2916 * or both not be directories. If target is a directory, it must be empty. 2917 */ 2918 int 2919 sys_rename(struct rename_args *uap) 2920 { 2921 struct nlookupdata fromnd, tond; 2922 int error; 2923 2924 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 2925 if (error == 0) { 2926 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 2927 if (error == 0) 2928 error = kern_rename(&fromnd, &tond); 2929 nlookup_done(&tond); 2930 } 2931 nlookup_done(&fromnd); 2932 return (error); 2933 } 2934 2935 int 2936 kern_mkdir(struct nlookupdata *nd, int mode) 2937 { 2938 struct thread *td = curthread; 2939 struct proc *p = td->td_proc; 2940 struct namecache *ncp; 2941 struct vnode *vp; 2942 struct vattr vattr; 2943 int error; 2944 2945 bwillwrite(); 2946 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE; 2947 if ((error = nlookup(nd)) != 0) 2948 return (error); 2949 2950 ncp = nd->nl_ncp; 2951 if (ncp->nc_vp) 2952 return (EEXIST); 2953 if ((error = ncp_writechk(ncp)) != 0) 2954 return (error); 2955 2956 VATTR_NULL(&vattr); 2957 vattr.va_type = VDIR; 2958 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 2959 2960 vp = NULL; 2961 error = VOP_NMKDIR(ncp, &vp, p->p_ucred, &vattr); 2962 if (error == 0) 2963 vput(vp); 2964 return (error); 2965 } 2966 2967 /* 2968 * mkdir_args(char *path, int mode) 2969 * 2970 * Make a directory file. 2971 */ 2972 /* ARGSUSED */ 2973 int 2974 sys_mkdir(struct mkdir_args *uap) 2975 { 2976 struct nlookupdata nd; 2977 int error; 2978 2979 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2980 if (error == 0) 2981 error = kern_mkdir(&nd, uap->mode); 2982 nlookup_done(&nd); 2983 return (error); 2984 } 2985 2986 int 2987 kern_rmdir(struct nlookupdata *nd) 2988 { 2989 struct namecache *ncp; 2990 int error; 2991 2992 bwillwrite(); 2993 nd->nl_flags |= NLC_DELETE; 2994 if ((error = nlookup(nd)) != 0) 2995 return (error); 2996 2997 /* 2998 * Do not allow directories representing mount points to be 2999 * deleted, even if empty. Check write perms on mount point 3000 * in case the vnode is aliased (aka nullfs). 3001 */ 3002 if (nd->nl_ncp->nc_flag & (NCF_MOUNTEDHERE|NCF_MOUNTPT)) 3003 return (EINVAL); 3004 if ((error = ncp_writechk(nd->nl_ncp)) != 0) 3005 return (error); 3006 3007 ncp = nd->nl_ncp; 3008 error = VOP_NRMDIR(ncp, nd->nl_cred); 3009 return (error); 3010 } 3011 3012 /* 3013 * rmdir_args(char *path) 3014 * 3015 * Remove a directory file. 3016 */ 3017 /* ARGSUSED */ 3018 int 3019 sys_rmdir(struct rmdir_args *uap) 3020 { 3021 struct nlookupdata nd; 3022 int error; 3023 3024 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3025 if (error == 0) 3026 error = kern_rmdir(&nd); 3027 nlookup_done(&nd); 3028 return (error); 3029 } 3030 3031 int 3032 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 3033 enum uio_seg direction) 3034 { 3035 struct thread *td = curthread; 3036 struct proc *p = td->td_proc; 3037 struct vnode *vp; 3038 struct file *fp; 3039 struct uio auio; 3040 struct iovec aiov; 3041 long loff; 3042 int error, eofflag; 3043 3044 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3045 return (error); 3046 if ((fp->f_flag & FREAD) == 0) { 3047 error = EBADF; 3048 goto done; 3049 } 3050 vp = (struct vnode *)fp->f_data; 3051 unionread: 3052 if (vp->v_type != VDIR) { 3053 error = EINVAL; 3054 goto done; 3055 } 3056 aiov.iov_base = buf; 3057 aiov.iov_len = count; 3058 auio.uio_iov = &aiov; 3059 auio.uio_iovcnt = 1; 3060 auio.uio_rw = UIO_READ; 3061 auio.uio_segflg = direction; 3062 auio.uio_td = td; 3063 auio.uio_resid = count; 3064 loff = auio.uio_offset = fp->f_offset; 3065 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 3066 fp->f_offset = auio.uio_offset; 3067 if (error) 3068 goto done; 3069 if (count == auio.uio_resid) { 3070 if (union_dircheckp) { 3071 error = union_dircheckp(td, &vp, fp); 3072 if (error == -1) 3073 goto unionread; 3074 if (error) 3075 goto done; 3076 } 3077 #if 0 3078 if ((vp->v_flag & VROOT) && 3079 (vp->v_mount->mnt_flag & MNT_UNION)) { 3080 struct vnode *tvp = vp; 3081 vp = vp->v_mount->mnt_vnodecovered; 3082 vref(vp); 3083 fp->f_data = vp; 3084 fp->f_offset = 0; 3085 vrele(tvp); 3086 goto unionread; 3087 } 3088 #endif 3089 } 3090 if (basep) { 3091 *basep = loff; 3092 } 3093 *res = count - auio.uio_resid; 3094 done: 3095 fdrop(fp); 3096 return (error); 3097 } 3098 3099 /* 3100 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 3101 * 3102 * Read a block of directory entries in a file system independent format. 3103 */ 3104 int 3105 sys_getdirentries(struct getdirentries_args *uap) 3106 { 3107 long base; 3108 int error; 3109 3110 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 3111 &uap->sysmsg_result, UIO_USERSPACE); 3112 3113 if (error == 0) 3114 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 3115 return (error); 3116 } 3117 3118 /* 3119 * getdents_args(int fd, char *buf, size_t count) 3120 */ 3121 int 3122 sys_getdents(struct getdents_args *uap) 3123 { 3124 int error; 3125 3126 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 3127 &uap->sysmsg_result, UIO_USERSPACE); 3128 3129 return (error); 3130 } 3131 3132 /* 3133 * umask(int newmask) 3134 * 3135 * Set the mode mask for creation of filesystem nodes. 3136 * 3137 * MP SAFE 3138 */ 3139 int 3140 sys_umask(struct umask_args *uap) 3141 { 3142 struct thread *td = curthread; 3143 struct proc *p = td->td_proc; 3144 struct filedesc *fdp; 3145 3146 fdp = p->p_fd; 3147 uap->sysmsg_result = fdp->fd_cmask; 3148 fdp->fd_cmask = uap->newmask & ALLPERMS; 3149 return (0); 3150 } 3151 3152 /* 3153 * revoke(char *path) 3154 * 3155 * Void all references to file by ripping underlying filesystem 3156 * away from vnode. 3157 */ 3158 /* ARGSUSED */ 3159 int 3160 sys_revoke(struct revoke_args *uap) 3161 { 3162 struct nlookupdata nd; 3163 struct vattr vattr; 3164 struct vnode *vp; 3165 struct ucred *cred; 3166 int error; 3167 3168 vp = NULL; 3169 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3170 if (error == 0) 3171 error = nlookup(&nd); 3172 if (error == 0) 3173 error = cache_vref(nd.nl_ncp, nd.nl_cred, &vp); 3174 cred = crhold(nd.nl_cred); 3175 nlookup_done(&nd); 3176 if (error == 0) { 3177 if (vp->v_type != VCHR && vp->v_type != VBLK) 3178 error = EINVAL; 3179 if (error == 0) 3180 error = VOP_GETATTR(vp, &vattr); 3181 if (error == 0 && cred->cr_uid != vattr.va_uid) 3182 error = suser_cred(cred, PRISON_ROOT); 3183 if (error == 0 && count_udev(vp->v_udev) > 0) { 3184 error = 0; 3185 vx_lock(vp); 3186 VOP_REVOKE(vp, REVOKEALL); 3187 vx_unlock(vp); 3188 } 3189 vrele(vp); 3190 } 3191 if (cred) 3192 crfree(cred); 3193 return (error); 3194 } 3195 3196 /* 3197 * getfh_args(char *fname, fhandle_t *fhp) 3198 * 3199 * Get (NFS) file handle 3200 */ 3201 int 3202 sys_getfh(struct getfh_args *uap) 3203 { 3204 struct thread *td = curthread; 3205 struct nlookupdata nd; 3206 fhandle_t fh; 3207 struct vnode *vp; 3208 int error; 3209 3210 /* 3211 * Must be super user 3212 */ 3213 if ((error = suser(td)) != 0) 3214 return (error); 3215 3216 vp = NULL; 3217 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 3218 if (error == 0) 3219 error = nlookup(&nd); 3220 if (error == 0) 3221 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3222 nlookup_done(&nd); 3223 if (error == 0) { 3224 bzero(&fh, sizeof(fh)); 3225 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3226 error = VFS_VPTOFH(vp, &fh.fh_fid); 3227 vput(vp); 3228 if (error == 0) 3229 error = copyout(&fh, uap->fhp, sizeof(fh)); 3230 } 3231 return (error); 3232 } 3233 3234 /* 3235 * fhopen_args(const struct fhandle *u_fhp, int flags) 3236 * 3237 * syscall for the rpc.lockd to use to translate a NFS file handle into 3238 * an open descriptor. 3239 * 3240 * warning: do not remove the suser() call or this becomes one giant 3241 * security hole. 3242 */ 3243 int 3244 sys_fhopen(struct fhopen_args *uap) 3245 { 3246 struct thread *td = curthread; 3247 struct proc *p = td->td_proc; 3248 struct mount *mp; 3249 struct vnode *vp; 3250 struct fhandle fhp; 3251 struct vattr vat; 3252 struct vattr *vap = &vat; 3253 struct flock lf; 3254 int fmode, mode, error, type; 3255 struct file *nfp; 3256 struct file *fp; 3257 int indx; 3258 3259 /* 3260 * Must be super user 3261 */ 3262 error = suser(td); 3263 if (error) 3264 return (error); 3265 3266 fmode = FFLAGS(uap->flags); 3267 /* why not allow a non-read/write open for our lockd? */ 3268 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3269 return (EINVAL); 3270 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3271 if (error) 3272 return(error); 3273 /* find the mount point */ 3274 mp = vfs_getvfs(&fhp.fh_fsid); 3275 if (mp == NULL) 3276 return (ESTALE); 3277 /* now give me my vnode, it gets returned to me locked */ 3278 error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp); 3279 if (error) 3280 return (error); 3281 /* 3282 * from now on we have to make sure not 3283 * to forget about the vnode 3284 * any error that causes an abort must vput(vp) 3285 * just set error = err and 'goto bad;'. 3286 */ 3287 3288 /* 3289 * from vn_open 3290 */ 3291 if (vp->v_type == VLNK) { 3292 error = EMLINK; 3293 goto bad; 3294 } 3295 if (vp->v_type == VSOCK) { 3296 error = EOPNOTSUPP; 3297 goto bad; 3298 } 3299 mode = 0; 3300 if (fmode & (FWRITE | O_TRUNC)) { 3301 if (vp->v_type == VDIR) { 3302 error = EISDIR; 3303 goto bad; 3304 } 3305 error = vn_writechk(vp, NULL); 3306 if (error) 3307 goto bad; 3308 mode |= VWRITE; 3309 } 3310 if (fmode & FREAD) 3311 mode |= VREAD; 3312 if (mode) { 3313 error = VOP_ACCESS(vp, mode, p->p_ucred); 3314 if (error) 3315 goto bad; 3316 } 3317 if (fmode & O_TRUNC) { 3318 vn_unlock(vp); /* XXX */ 3319 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 3320 VATTR_NULL(vap); 3321 vap->va_size = 0; 3322 error = VOP_SETATTR(vp, vap, p->p_ucred); 3323 if (error) 3324 goto bad; 3325 } 3326 3327 /* 3328 * VOP_OPEN needs the file pointer so it can potentially override 3329 * it. 3330 * 3331 * WARNING! no f_ncp will be associated when fhopen()ing a directory. 3332 * XXX 3333 */ 3334 if ((error = falloc(p, &nfp, &indx)) != 0) 3335 goto bad; 3336 fp = nfp; 3337 3338 error = VOP_OPEN(vp, fmode, p->p_ucred, fp); 3339 if (error) { 3340 /* 3341 * setting f_ops this way prevents VOP_CLOSE from being 3342 * called or fdrop() releasing the vp from v_data. Since 3343 * the VOP_OPEN failed we don't want to VOP_CLOSE. 3344 */ 3345 fp->f_ops = &badfileops; 3346 fp->f_data = NULL; 3347 goto bad_drop; 3348 } 3349 3350 /* 3351 * The fp is given its own reference, we still have our ref and lock. 3352 * 3353 * Assert that all regular files must be created with a VM object. 3354 */ 3355 if (vp->v_type == VREG && vp->v_object == NULL) { 3356 printf("fhopen: regular file did not have VM object: %p\n", vp); 3357 goto bad_drop; 3358 } 3359 3360 /* 3361 * The open was successful. Handle any locking requirements. 3362 */ 3363 if (fmode & (O_EXLOCK | O_SHLOCK)) { 3364 lf.l_whence = SEEK_SET; 3365 lf.l_start = 0; 3366 lf.l_len = 0; 3367 if (fmode & O_EXLOCK) 3368 lf.l_type = F_WRLCK; 3369 else 3370 lf.l_type = F_RDLCK; 3371 if (fmode & FNONBLOCK) 3372 type = 0; 3373 else 3374 type = F_WAIT; 3375 vn_unlock(vp); 3376 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 3377 /* 3378 * release our private reference. 3379 */ 3380 fsetfd(p, NULL, indx); 3381 fdrop(fp); 3382 vrele(vp); 3383 return (error); 3384 } 3385 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3386 fp->f_flag |= FHASLOCK; 3387 } 3388 3389 /* 3390 * Clean up. Associate the file pointer with the previously 3391 * reserved descriptor and return it. 3392 */ 3393 vput(vp); 3394 fsetfd(p, fp, indx); 3395 fdrop(fp); 3396 uap->sysmsg_result = indx; 3397 return (0); 3398 3399 bad_drop: 3400 fsetfd(p, NULL, indx); 3401 fdrop(fp); 3402 bad: 3403 vput(vp); 3404 return (error); 3405 } 3406 3407 /* 3408 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 3409 */ 3410 int 3411 sys_fhstat(struct fhstat_args *uap) 3412 { 3413 struct thread *td = curthread; 3414 struct stat sb; 3415 fhandle_t fh; 3416 struct mount *mp; 3417 struct vnode *vp; 3418 int error; 3419 3420 /* 3421 * Must be super user 3422 */ 3423 error = suser(td); 3424 if (error) 3425 return (error); 3426 3427 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 3428 if (error) 3429 return (error); 3430 3431 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3432 return (ESTALE); 3433 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3434 return (error); 3435 error = vn_stat(vp, &sb, td->td_proc->p_ucred); 3436 vput(vp); 3437 if (error) 3438 return (error); 3439 error = copyout(&sb, uap->sb, sizeof(sb)); 3440 return (error); 3441 } 3442 3443 /* 3444 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 3445 */ 3446 int 3447 sys_fhstatfs(struct fhstatfs_args *uap) 3448 { 3449 struct thread *td = curthread; 3450 struct proc *p = td->td_proc; 3451 struct statfs *sp; 3452 struct mount *mp; 3453 struct vnode *vp; 3454 struct statfs sb; 3455 char *fullpath, *freepath; 3456 fhandle_t fh; 3457 int error; 3458 3459 /* 3460 * Must be super user 3461 */ 3462 if ((error = suser(td))) 3463 return (error); 3464 3465 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3466 return (error); 3467 3468 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3469 return (ESTALE); 3470 3471 if (p != NULL && (p->p_fd->fd_nrdir->nc_flag & NCF_ROOT) == 0 && 3472 !chroot_visible_mnt(mp, p)) 3473 return (ESTALE); 3474 3475 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3476 return (error); 3477 mp = vp->v_mount; 3478 sp = &mp->mnt_stat; 3479 vput(vp); 3480 if ((error = VFS_STATFS(mp, sp, p->p_ucred)) != 0) 3481 return (error); 3482 3483 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath); 3484 if (error) 3485 return(error); 3486 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3487 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 3488 kfree(freepath, M_TEMP); 3489 3490 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 3491 if (suser(td)) { 3492 bcopy(sp, &sb, sizeof(sb)); 3493 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 3494 sp = &sb; 3495 } 3496 return (copyout(sp, uap->buf, sizeof(*sp))); 3497 } 3498 3499 /* 3500 * Syscall to push extended attribute configuration information into the 3501 * VFS. Accepts a path, which it converts to a mountpoint, as well as 3502 * a command (int cmd), and attribute name and misc data. For now, the 3503 * attribute name is left in userspace for consumption by the VFS_op. 3504 * It will probably be changed to be copied into sysspace by the 3505 * syscall in the future, once issues with various consumers of the 3506 * attribute code have raised their hands. 3507 * 3508 * Currently this is used only by UFS Extended Attributes. 3509 */ 3510 int 3511 sys_extattrctl(struct extattrctl_args *uap) 3512 { 3513 struct nlookupdata nd; 3514 struct mount *mp; 3515 struct vnode *vp; 3516 int error; 3517 3518 vp = NULL; 3519 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3520 if (error == 0) 3521 error = nlookup(&nd); 3522 if (error == 0) { 3523 mp = nd.nl_ncp->nc_mount; 3524 error = VFS_EXTATTRCTL(mp, uap->cmd, 3525 uap->attrname, uap->arg, 3526 nd.nl_cred); 3527 } 3528 nlookup_done(&nd); 3529 return (error); 3530 } 3531 3532 /* 3533 * Syscall to set a named extended attribute on a file or directory. 3534 * Accepts attribute name, and a uio structure pointing to the data to set. 3535 * The uio is consumed in the style of writev(). The real work happens 3536 * in VOP_SETEXTATTR(). 3537 */ 3538 int 3539 sys_extattr_set_file(struct extattr_set_file_args *uap) 3540 { 3541 char attrname[EXTATTR_MAXNAMELEN]; 3542 struct iovec aiov[UIO_SMALLIOV]; 3543 struct iovec *needfree; 3544 struct nlookupdata nd; 3545 struct iovec *iov; 3546 struct vnode *vp; 3547 struct uio auio; 3548 u_int iovlen; 3549 u_int cnt; 3550 int error; 3551 int i; 3552 3553 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3554 if (error) 3555 return (error); 3556 3557 vp = NULL; 3558 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3559 if (error == 0) 3560 error = nlookup(&nd); 3561 if (error == 0) 3562 error = ncp_writechk(nd.nl_ncp); 3563 if (error == 0) 3564 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3565 if (error) { 3566 nlookup_done(&nd); 3567 return (error); 3568 } 3569 3570 needfree = NULL; 3571 iovlen = uap->iovcnt * sizeof(struct iovec); 3572 if (uap->iovcnt > UIO_SMALLIOV) { 3573 if (uap->iovcnt > UIO_MAXIOV) { 3574 error = EINVAL; 3575 goto done; 3576 } 3577 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3578 needfree = iov; 3579 } else { 3580 iov = aiov; 3581 } 3582 auio.uio_iov = iov; 3583 auio.uio_iovcnt = uap->iovcnt; 3584 auio.uio_rw = UIO_WRITE; 3585 auio.uio_segflg = UIO_USERSPACE; 3586 auio.uio_td = nd.nl_td; 3587 auio.uio_offset = 0; 3588 if ((error = copyin(uap->iovp, iov, iovlen))) 3589 goto done; 3590 auio.uio_resid = 0; 3591 for (i = 0; i < uap->iovcnt; i++) { 3592 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3593 error = EINVAL; 3594 goto done; 3595 } 3596 auio.uio_resid += iov->iov_len; 3597 iov++; 3598 } 3599 cnt = auio.uio_resid; 3600 error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred); 3601 cnt -= auio.uio_resid; 3602 uap->sysmsg_result = cnt; 3603 done: 3604 vput(vp); 3605 nlookup_done(&nd); 3606 if (needfree) 3607 FREE(needfree, M_IOV); 3608 return (error); 3609 } 3610 3611 /* 3612 * Syscall to get a named extended attribute on a file or directory. 3613 * Accepts attribute name, and a uio structure pointing to a buffer for the 3614 * data. The uio is consumed in the style of readv(). The real work 3615 * happens in VOP_GETEXTATTR(); 3616 */ 3617 int 3618 sys_extattr_get_file(struct extattr_get_file_args *uap) 3619 { 3620 char attrname[EXTATTR_MAXNAMELEN]; 3621 struct iovec aiov[UIO_SMALLIOV]; 3622 struct iovec *needfree; 3623 struct nlookupdata nd; 3624 struct iovec *iov; 3625 struct vnode *vp; 3626 struct uio auio; 3627 u_int iovlen; 3628 u_int cnt; 3629 int error; 3630 int i; 3631 3632 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3633 if (error) 3634 return (error); 3635 3636 vp = NULL; 3637 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3638 if (error == 0) 3639 error = nlookup(&nd); 3640 if (error == 0) 3641 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3642 if (error) { 3643 nlookup_done(&nd); 3644 return (error); 3645 } 3646 3647 iovlen = uap->iovcnt * sizeof (struct iovec); 3648 needfree = NULL; 3649 if (uap->iovcnt > UIO_SMALLIOV) { 3650 if (uap->iovcnt > UIO_MAXIOV) { 3651 error = EINVAL; 3652 goto done; 3653 } 3654 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3655 needfree = iov; 3656 } else { 3657 iov = aiov; 3658 } 3659 auio.uio_iov = iov; 3660 auio.uio_iovcnt = uap->iovcnt; 3661 auio.uio_rw = UIO_READ; 3662 auio.uio_segflg = UIO_USERSPACE; 3663 auio.uio_td = nd.nl_td; 3664 auio.uio_offset = 0; 3665 if ((error = copyin(uap->iovp, iov, iovlen))) 3666 goto done; 3667 auio.uio_resid = 0; 3668 for (i = 0; i < uap->iovcnt; i++) { 3669 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3670 error = EINVAL; 3671 goto done; 3672 } 3673 auio.uio_resid += iov->iov_len; 3674 iov++; 3675 } 3676 cnt = auio.uio_resid; 3677 error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred); 3678 cnt -= auio.uio_resid; 3679 uap->sysmsg_result = cnt; 3680 done: 3681 vput(vp); 3682 nlookup_done(&nd); 3683 if (needfree) 3684 FREE(needfree, M_IOV); 3685 return(error); 3686 } 3687 3688 /* 3689 * Syscall to delete a named extended attribute from a file or directory. 3690 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 3691 */ 3692 int 3693 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 3694 { 3695 char attrname[EXTATTR_MAXNAMELEN]; 3696 struct nlookupdata nd; 3697 struct vnode *vp; 3698 int error; 3699 3700 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3701 if (error) 3702 return(error); 3703 3704 vp = NULL; 3705 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3706 if (error == 0) 3707 error = nlookup(&nd); 3708 if (error == 0) 3709 error = ncp_writechk(nd.nl_ncp); 3710 if (error == 0) 3711 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3712 if (error) { 3713 nlookup_done(&nd); 3714 return (error); 3715 } 3716 3717 error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred); 3718 vput(vp); 3719 nlookup_done(&nd); 3720 return(error); 3721 } 3722 3723 static int 3724 chroot_visible_mnt(struct mount *mp, struct proc *p) 3725 { 3726 struct namecache *ncp; 3727 /* 3728 * First check if this file system is below 3729 * the chroot path. 3730 */ 3731 ncp = mp->mnt_ncp; 3732 while (ncp != NULL && ncp != p->p_fd->fd_nrdir) 3733 ncp = ncp->nc_parent; 3734 if (ncp == NULL) { 3735 /* 3736 * This is not below the chroot path. 3737 * 3738 * Check if the chroot path is on the same filesystem, 3739 * by determing if we have to cross a mount point 3740 * before reaching mp->mnt_ncp. 3741 */ 3742 ncp = p->p_fd->fd_nrdir; 3743 while (ncp != NULL && ncp != mp->mnt_ncp) { 3744 if (ncp->nc_flag & NCF_MOUNTPT) { 3745 ncp = NULL; 3746 break; 3747 } 3748 ncp = ncp->nc_parent; 3749 } 3750 } 3751 return(ncp != NULL); 3752 } 3753