1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 84 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 85 static int getutimes (struct timeval *, struct timespec *); 86 static int getutimens (const struct timespec *, struct timespec *, int *); 87 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 88 static int setfmode (struct vnode *, int); 89 static int setfflags (struct vnode *, int); 90 static int setutimes (struct vnode *, struct vattr *, 91 const struct timespec *, int); 92 static int usermount = 0; /* if 1, non-root can mount fs. */ 93 94 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 95 "Allow non-root users to mount filesystems"); 96 97 /* 98 * Virtual File System System Calls 99 */ 100 101 /* 102 * Mount a file system. 103 * 104 * mount_args(char *type, char *path, int flags, caddr_t data) 105 * 106 * MPALMOSTSAFE 107 */ 108 int 109 sys_mount(struct mount_args *uap) 110 { 111 struct thread *td = curthread; 112 struct vnode *vp; 113 struct nchandle nch; 114 struct mount *mp, *nullmp; 115 struct vfsconf *vfsp; 116 int error, flag = 0, flag2 = 0; 117 int hasmount; 118 struct vattr va; 119 struct nlookupdata nd; 120 char fstypename[MFSNAMELEN]; 121 struct ucred *cred; 122 123 cred = td->td_ucred; 124 if (jailed(cred)) { 125 error = EPERM; 126 goto done; 127 } 128 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 129 goto done; 130 131 /* 132 * Do not allow NFS export by non-root users. 133 */ 134 if (uap->flags & MNT_EXPORTED) { 135 error = priv_check(td, PRIV_ROOT); 136 if (error) 137 goto done; 138 } 139 /* 140 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 141 */ 142 if (priv_check(td, PRIV_ROOT)) 143 uap->flags |= MNT_NOSUID | MNT_NODEV; 144 145 /* 146 * Lookup the requested path and extract the nch and vnode. 147 */ 148 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 149 if (error == 0) { 150 if ((error = nlookup(&nd)) == 0) { 151 if (nd.nl_nch.ncp->nc_vp == NULL) 152 error = ENOENT; 153 } 154 } 155 if (error) { 156 nlookup_done(&nd); 157 goto done; 158 } 159 160 /* 161 * If the target filesystem is resolved via a nullfs mount, then 162 * nd.nl_nch.mount will be pointing to the nullfs mount structure 163 * instead of the target file system. We need it in case we are 164 * doing an update. 165 */ 166 nullmp = nd.nl_nch.mount; 167 168 /* 169 * Extract the locked+refd ncp and cleanup the nd structure 170 */ 171 nch = nd.nl_nch; 172 cache_zero(&nd.nl_nch); 173 nlookup_done(&nd); 174 175 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 176 (mp = cache_findmount(&nch)) != NULL) { 177 cache_dropmount(mp); 178 hasmount = 1; 179 } else { 180 hasmount = 0; 181 } 182 183 184 /* 185 * now we have the locked ref'd nch and unreferenced vnode. 186 */ 187 vp = nch.ncp->nc_vp; 188 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 189 cache_put(&nch); 190 goto done; 191 } 192 cache_unlock(&nch); 193 194 /* 195 * Extract the file system type. We need to know this early, to take 196 * appropriate actions if we are dealing with a nullfs. 197 */ 198 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 199 cache_drop(&nch); 200 vput(vp); 201 goto done; 202 } 203 204 /* 205 * Now we have an unlocked ref'd nch and a locked ref'd vp 206 */ 207 if (uap->flags & MNT_UPDATE) { 208 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 209 cache_drop(&nch); 210 vput(vp); 211 error = EINVAL; 212 goto done; 213 } 214 215 if (strncmp(fstypename, "null", 5) == 0) { 216 KKASSERT(nullmp); 217 mp = nullmp; 218 } else { 219 mp = vp->v_mount; 220 } 221 222 flag = mp->mnt_flag; 223 flag2 = mp->mnt_kern_flag; 224 /* 225 * We only allow the filesystem to be reloaded if it 226 * is currently mounted read-only. 227 */ 228 if ((uap->flags & MNT_RELOAD) && 229 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 230 cache_drop(&nch); 231 vput(vp); 232 error = EOPNOTSUPP; /* Needs translation */ 233 goto done; 234 } 235 /* 236 * Only root, or the user that did the original mount is 237 * permitted to update it. 238 */ 239 if (mp->mnt_stat.f_owner != cred->cr_uid && 240 (error = priv_check(td, PRIV_ROOT))) { 241 cache_drop(&nch); 242 vput(vp); 243 goto done; 244 } 245 if (vfs_busy(mp, LK_NOWAIT)) { 246 cache_drop(&nch); 247 vput(vp); 248 error = EBUSY; 249 goto done; 250 } 251 if (hasmount) { 252 cache_drop(&nch); 253 vfs_unbusy(mp); 254 vput(vp); 255 error = EBUSY; 256 goto done; 257 } 258 mp->mnt_flag |= 259 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 260 lwkt_gettoken(&mp->mnt_token); 261 vn_unlock(vp); 262 vfsp = mp->mnt_vfc; 263 goto update; 264 } 265 266 /* 267 * If the user is not root, ensure that they own the directory 268 * onto which we are attempting to mount. 269 */ 270 if ((error = VOP_GETATTR(vp, &va)) || 271 (va.va_uid != cred->cr_uid && 272 (error = priv_check(td, PRIV_ROOT)))) { 273 cache_drop(&nch); 274 vput(vp); 275 goto done; 276 } 277 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 278 cache_drop(&nch); 279 vput(vp); 280 goto done; 281 } 282 if (vp->v_type != VDIR) { 283 cache_drop(&nch); 284 vput(vp); 285 error = ENOTDIR; 286 goto done; 287 } 288 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 289 cache_drop(&nch); 290 vput(vp); 291 error = EPERM; 292 goto done; 293 } 294 vfsp = vfsconf_find_by_name(fstypename); 295 if (vfsp == NULL) { 296 linker_file_t lf; 297 298 /* Only load modules for root (very important!) */ 299 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 300 cache_drop(&nch); 301 vput(vp); 302 goto done; 303 } 304 error = linker_load_file(fstypename, &lf); 305 if (error || lf == NULL) { 306 cache_drop(&nch); 307 vput(vp); 308 if (lf == NULL) 309 error = ENODEV; 310 goto done; 311 } 312 lf->userrefs++; 313 /* lookup again, see if the VFS was loaded */ 314 vfsp = vfsconf_find_by_name(fstypename); 315 if (vfsp == NULL) { 316 lf->userrefs--; 317 linker_file_unload(lf); 318 cache_drop(&nch); 319 vput(vp); 320 error = ENODEV; 321 goto done; 322 } 323 } 324 if (hasmount) { 325 cache_drop(&nch); 326 vput(vp); 327 error = EBUSY; 328 goto done; 329 } 330 331 /* 332 * Allocate and initialize the filesystem. 333 */ 334 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 335 mount_init(mp); 336 vfs_busy(mp, LK_NOWAIT); 337 mp->mnt_op = vfsp->vfc_vfsops; 338 mp->mnt_vfc = vfsp; 339 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 340 vfsp->vfc_refcount++; 341 mp->mnt_stat.f_type = vfsp->vfc_typenum; 342 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 343 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 344 mp->mnt_stat.f_owner = cred->cr_uid; 345 lwkt_gettoken(&mp->mnt_token); 346 vn_unlock(vp); 347 update: 348 /* 349 * (per-mount token acquired at this point) 350 * 351 * Set the mount level flags. 352 */ 353 if (uap->flags & MNT_RDONLY) 354 mp->mnt_flag |= MNT_RDONLY; 355 else if (mp->mnt_flag & MNT_RDONLY) 356 mp->mnt_kern_flag |= MNTK_WANTRDWR; 357 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 358 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 359 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 360 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 361 MNT_AUTOMOUNTED); 362 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 363 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 364 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 365 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 366 MNT_AUTOMOUNTED); 367 368 /* 369 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 370 * This way the initial VFS_MOUNT() call will also be MPSAFE. 371 */ 372 if (vfsp->vfc_flags & VFCF_MPSAFE) 373 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 374 375 /* 376 * Mount the filesystem. 377 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 378 * get. 379 */ 380 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 381 if (mp->mnt_flag & MNT_UPDATE) { 382 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 383 mp->mnt_flag &= ~MNT_RDONLY; 384 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 385 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 386 if (error) { 387 mp->mnt_flag = flag; 388 mp->mnt_kern_flag = flag2; 389 } 390 lwkt_reltoken(&mp->mnt_token); 391 vfs_unbusy(mp); 392 vrele(vp); 393 cache_drop(&nch); 394 goto done; 395 } 396 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 397 398 /* 399 * Put the new filesystem on the mount list after root. The mount 400 * point gets its own mnt_ncmountpt (unless the VFS already set one 401 * up) which represents the root of the mount. The lookup code 402 * detects the mount point going forward and checks the root of 403 * the mount going backwards. 404 * 405 * It is not necessary to invalidate or purge the vnode underneath 406 * because elements under the mount will be given their own glue 407 * namecache record. 408 */ 409 if (!error) { 410 if (mp->mnt_ncmountpt.ncp == NULL) { 411 /* 412 * Allocate, then unlock, but leave the ref intact. 413 * This is the mnt_refs (1) that we will retain 414 * through to the unmount. 415 */ 416 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 417 cache_unlock(&mp->mnt_ncmountpt); 418 } 419 vn_unlock(vp); 420 mp->mnt_ncmounton = nch; /* inherits ref */ 421 cache_lock(&nch); 422 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 423 cache_unlock(&nch); 424 cache_ismounting(mp); 425 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 426 427 mountlist_insert(mp, MNTINS_LAST); 428 vn_unlock(vp); 429 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 430 error = vfs_allocate_syncvnode(mp); 431 lwkt_reltoken(&mp->mnt_token); 432 vfs_unbusy(mp); 433 error = VFS_START(mp, 0); 434 vrele(vp); 435 KNOTE(&fs_klist, VQ_MOUNT); 436 } else { 437 vn_syncer_thr_stop(mp); 438 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 439 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 440 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 441 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 442 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 443 mp->mnt_vfc->vfc_refcount--; 444 lwkt_reltoken(&mp->mnt_token); 445 vfs_unbusy(mp); 446 kfree(mp, M_MOUNT); 447 cache_drop(&nch); 448 vput(vp); 449 } 450 done: 451 return (error); 452 } 453 454 /* 455 * Scan all active processes to see if any of them have a current 456 * or root directory onto which the new filesystem has just been 457 * mounted. If so, replace them with the new mount point. 458 * 459 * Both old_nch and new_nch are ref'd on call but not locked. 460 * new_nch must be temporarily locked so it can be associated with the 461 * vnode representing the root of the mount point. 462 */ 463 struct checkdirs_info { 464 struct nchandle old_nch; 465 struct nchandle new_nch; 466 struct vnode *old_vp; 467 struct vnode *new_vp; 468 }; 469 470 static int checkdirs_callback(struct proc *p, void *data); 471 472 static void 473 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 474 { 475 struct checkdirs_info info; 476 struct vnode *olddp; 477 struct vnode *newdp; 478 struct mount *mp; 479 480 /* 481 * If the old mount point's vnode has a usecount of 1, it is not 482 * being held as a descriptor anywhere. 483 */ 484 olddp = old_nch->ncp->nc_vp; 485 if (olddp == NULL || VREFCNT(olddp) == 1) 486 return; 487 488 /* 489 * Force the root vnode of the new mount point to be resolved 490 * so we can update any matching processes. 491 */ 492 mp = new_nch->mount; 493 if (VFS_ROOT(mp, &newdp)) 494 panic("mount: lost mount"); 495 vn_unlock(newdp); 496 cache_lock(new_nch); 497 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 498 cache_setunresolved(new_nch); 499 cache_setvp(new_nch, newdp); 500 cache_unlock(new_nch); 501 502 /* 503 * Special handling of the root node 504 */ 505 if (rootvnode == olddp) { 506 vref(newdp); 507 vfs_cache_setroot(newdp, cache_hold(new_nch)); 508 } 509 510 /* 511 * Pass newdp separately so the callback does not have to access 512 * it via new_nch->ncp->nc_vp. 513 */ 514 info.old_nch = *old_nch; 515 info.new_nch = *new_nch; 516 info.new_vp = newdp; 517 allproc_scan(checkdirs_callback, &info, 0); 518 vput(newdp); 519 } 520 521 /* 522 * NOTE: callback is not MP safe because the scanned process's filedesc 523 * structure can be ripped out from under us, amoung other things. 524 */ 525 static int 526 checkdirs_callback(struct proc *p, void *data) 527 { 528 struct checkdirs_info *info = data; 529 struct filedesc *fdp; 530 struct nchandle ncdrop1; 531 struct nchandle ncdrop2; 532 struct vnode *vprele1; 533 struct vnode *vprele2; 534 535 if ((fdp = p->p_fd) != NULL) { 536 cache_zero(&ncdrop1); 537 cache_zero(&ncdrop2); 538 vprele1 = NULL; 539 vprele2 = NULL; 540 541 /* 542 * MPUNSAFE - XXX fdp can be pulled out from under a 543 * foreign process. 544 * 545 * A shared filedesc is ok, we don't have to copy it 546 * because we are making this change globally. 547 */ 548 spin_lock(&fdp->fd_spin); 549 if (fdp->fd_ncdir.mount == info->old_nch.mount && 550 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 551 vprele1 = fdp->fd_cdir; 552 vref(info->new_vp); 553 fdp->fd_cdir = info->new_vp; 554 ncdrop1 = fdp->fd_ncdir; 555 cache_copy(&info->new_nch, &fdp->fd_ncdir); 556 } 557 if (fdp->fd_nrdir.mount == info->old_nch.mount && 558 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 559 vprele2 = fdp->fd_rdir; 560 vref(info->new_vp); 561 fdp->fd_rdir = info->new_vp; 562 ncdrop2 = fdp->fd_nrdir; 563 cache_copy(&info->new_nch, &fdp->fd_nrdir); 564 } 565 spin_unlock(&fdp->fd_spin); 566 if (ncdrop1.ncp) 567 cache_drop(&ncdrop1); 568 if (ncdrop2.ncp) 569 cache_drop(&ncdrop2); 570 if (vprele1) 571 vrele(vprele1); 572 if (vprele2) 573 vrele(vprele2); 574 } 575 return(0); 576 } 577 578 /* 579 * Unmount a file system. 580 * 581 * Note: unmount takes a path to the vnode mounted on as argument, 582 * not special file (as before). 583 * 584 * umount_args(char *path, int flags) 585 * 586 * MPALMOSTSAFE 587 */ 588 int 589 sys_unmount(struct unmount_args *uap) 590 { 591 struct thread *td = curthread; 592 struct proc *p __debugvar = td->td_proc; 593 struct mount *mp = NULL; 594 struct nlookupdata nd; 595 int error; 596 597 KKASSERT(p); 598 if (td->td_ucred->cr_prison != NULL) { 599 error = EPERM; 600 goto done; 601 } 602 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 603 goto done; 604 605 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 606 if (error == 0) 607 error = nlookup(&nd); 608 if (error) 609 goto out; 610 611 mp = nd.nl_nch.mount; 612 613 /* 614 * Only root, or the user that did the original mount is 615 * permitted to unmount this filesystem. 616 */ 617 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 618 (error = priv_check(td, PRIV_ROOT))) 619 goto out; 620 621 /* 622 * Don't allow unmounting the root file system. 623 */ 624 if (mp->mnt_flag & MNT_ROOTFS) { 625 error = EINVAL; 626 goto out; 627 } 628 629 /* 630 * Must be the root of the filesystem 631 */ 632 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 633 error = EINVAL; 634 goto out; 635 } 636 637 /* 638 * If no error try to issue the unmount. We lose our cache 639 * ref when we call nlookup_done so we must hold the mount point 640 * to prevent use-after-free races. 641 */ 642 out: 643 if (error == 0) { 644 mount_hold(mp); 645 nlookup_done(&nd); 646 error = dounmount(mp, uap->flags, 0); 647 mount_drop(mp); 648 } else { 649 nlookup_done(&nd); 650 } 651 done: 652 return (error); 653 } 654 655 /* 656 * Do the actual file system unmount (interlocked against the mountlist 657 * token and mp->mnt_token). 658 */ 659 static int 660 dounmount_interlock(struct mount *mp) 661 { 662 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 663 return (EBUSY); 664 mp->mnt_kern_flag |= MNTK_UNMOUNT; 665 return(0); 666 } 667 668 static int 669 unmount_allproc_cb(struct proc *p, void *arg) 670 { 671 struct mount *mp; 672 673 if (p->p_textnch.ncp == NULL) 674 return 0; 675 676 mp = (struct mount *)arg; 677 if (p->p_textnch.mount == mp) 678 cache_drop(&p->p_textnch); 679 680 return 0; 681 } 682 683 /* 684 * The guts of the unmount code. The mount owns one ref and one hold 685 * count. If we successfully interlock the unmount, those refs are ours. 686 * (The ref is from mnt_ncmountpt). 687 * 688 * When halting we shortcut certain mount types such as devfs by not actually 689 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 690 * from the mountlist so higher-level filesytems can unmount cleanly. 691 * 692 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 693 */ 694 int 695 dounmount(struct mount *mp, int flags, int halting) 696 { 697 struct namecache *ncp; 698 struct nchandle nch; 699 struct vnode *vp; 700 int error; 701 int async_flag; 702 int lflags; 703 int freeok = 1; 704 int retry; 705 int quickhalt; 706 707 lwkt_gettoken(&mp->mnt_token); 708 709 /* 710 * When halting, certain mount points can essentially just 711 * be unhooked and otherwise ignored. 712 */ 713 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 714 quickhalt = 1; 715 freeok = 0; 716 } else { 717 quickhalt = 0; 718 } 719 720 721 /* 722 * Exclusive access for unmounting purposes. 723 */ 724 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 725 goto out; 726 727 /* 728 * We now 'own' the last mp->mnt_refs 729 * 730 * Allow filesystems to detect that a forced unmount is in progress. 731 */ 732 if (flags & MNT_FORCE) 733 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 734 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 735 error = lockmgr(&mp->mnt_lock, lflags); 736 if (error) { 737 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 738 if (mp->mnt_kern_flag & MNTK_MWAIT) { 739 mp->mnt_kern_flag &= ~MNTK_MWAIT; 740 wakeup(mp); 741 } 742 goto out; 743 } 744 745 if (mp->mnt_flag & MNT_EXPUBLIC) 746 vfs_setpublicfs(NULL, NULL, NULL); 747 748 vfs_msync(mp, MNT_WAIT); 749 async_flag = mp->mnt_flag & MNT_ASYNC; 750 mp->mnt_flag &=~ MNT_ASYNC; 751 752 /* 753 * If this filesystem isn't aliasing other filesystems, 754 * try to invalidate any remaining namecache entries and 755 * check the count afterwords. 756 * 757 * We own the last mnt_refs by owning mnt_ncmountpt. 758 */ 759 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 760 cache_lock(&mp->mnt_ncmountpt); 761 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 762 cache_unlock(&mp->mnt_ncmountpt); 763 764 cache_clearmntcache(); 765 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 766 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 767 allproc_scan(&unmount_allproc_cb, mp, 0); 768 } 769 770 cache_clearmntcache(); 771 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 772 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 773 774 if ((flags & MNT_FORCE) == 0) { 775 error = EBUSY; 776 mount_warning(mp, "Cannot unmount: " 777 "%d namecache " 778 "references still " 779 "present", 780 ncp->nc_refs - 1); 781 } else { 782 mount_warning(mp, "Forced unmount: " 783 "%d namecache " 784 "references still " 785 "present", 786 ncp->nc_refs - 1); 787 freeok = 0; 788 } 789 } 790 } 791 792 /* 793 * Decomission our special mnt_syncer vnode. This also stops 794 * the vnlru code. If we are unable to unmount we recommission 795 * the vnode. 796 * 797 * Then sync the filesystem. 798 */ 799 if ((vp = mp->mnt_syncer) != NULL) { 800 mp->mnt_syncer = NULL; 801 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 802 vrele(vp); 803 } 804 805 if (quickhalt == 0) { 806 if ((mp->mnt_flag & MNT_RDONLY) == 0) 807 VFS_SYNC(mp, MNT_WAIT); 808 } 809 810 /* 811 * nchandle records ref the mount structure. Expect a count of 1 812 * (our mount->mnt_ncmountpt). 813 * 814 * Scans can get temporary refs on a mountpoint (thought really 815 * heavy duty stuff like cache_findmount() do not). 816 */ 817 if (mp->mnt_refs != 1) 818 cache_clearmntcache(); 819 for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) { 820 cache_unmounting(mp); 821 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 822 cache_clearmntcache(); 823 } 824 if (mp->mnt_refs != 1) { 825 if ((flags & MNT_FORCE) == 0) { 826 mount_warning(mp, "Cannot unmount: " 827 "%d mount refs still present", 828 mp->mnt_refs - 1); 829 error = EBUSY; 830 } else { 831 mount_warning(mp, "Forced unmount: " 832 "%d mount refs still present", 833 mp->mnt_refs - 1); 834 freeok = 0; 835 } 836 } 837 838 /* 839 * So far so good, sync the filesystem once more and 840 * call the VFS unmount code if the sync succeeds. 841 */ 842 if (error == 0 && quickhalt == 0) { 843 if (mp->mnt_flag & MNT_RDONLY) { 844 error = VFS_UNMOUNT(mp, flags); 845 } else { 846 error = VFS_SYNC(mp, MNT_WAIT); 847 if ((error == 0) || 848 (error == EOPNOTSUPP) || /* No sync */ 849 (flags & MNT_FORCE)) { 850 error = VFS_UNMOUNT(mp, flags); 851 } 852 } 853 } 854 855 /* 856 * If an error occurred we can still recover, restoring the 857 * syncer vnode and misc flags. 858 */ 859 if (error) { 860 if (mp->mnt_syncer == NULL) 861 vfs_allocate_syncvnode(mp); 862 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 863 mp->mnt_flag |= async_flag; 864 lockmgr(&mp->mnt_lock, LK_RELEASE); 865 if (mp->mnt_kern_flag & MNTK_MWAIT) { 866 mp->mnt_kern_flag &= ~MNTK_MWAIT; 867 wakeup(mp); 868 } 869 goto out; 870 } 871 /* 872 * Clean up any journals still associated with the mount after 873 * filesystem activity has ceased. 874 */ 875 journal_remove_all_journals(mp, 876 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 877 878 mountlist_remove(mp); 879 880 /* 881 * Remove any installed vnode ops here so the individual VFSs don't 882 * have to. 883 * 884 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 885 * 886 * When quickhalting we have to keep these intact because the 887 * underlying vnodes have not been destroyed, and some might be 888 * dirty. 889 */ 890 if (quickhalt == 0) { 891 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 892 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 893 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 894 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 895 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 896 } 897 898 if (mp->mnt_ncmountpt.ncp != NULL) { 899 nch = mp->mnt_ncmountpt; 900 cache_zero(&mp->mnt_ncmountpt); 901 cache_clrmountpt(&nch); 902 cache_drop(&nch); 903 } 904 if (mp->mnt_ncmounton.ncp != NULL) { 905 cache_unmounting(mp); 906 nch = mp->mnt_ncmounton; 907 cache_zero(&mp->mnt_ncmounton); 908 cache_clrmountpt(&nch); 909 cache_drop(&nch); 910 } 911 912 mp->mnt_vfc->vfc_refcount--; 913 914 /* 915 * If not quickhalting the mount, we expect there to be no 916 * vnodes left. 917 */ 918 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 919 panic("unmount: dangling vnode"); 920 921 /* 922 * Release the lock 923 */ 924 lockmgr(&mp->mnt_lock, LK_RELEASE); 925 if (mp->mnt_kern_flag & MNTK_MWAIT) { 926 mp->mnt_kern_flag &= ~MNTK_MWAIT; 927 wakeup(mp); 928 } 929 930 /* 931 * If we reach here and freeok != 0 we must free the mount. 932 * mnt_refs should already have dropped to 0, so if it is not 933 * zero we must cycle the caches and wait. 934 * 935 * When we are satisfied that the mount has disconnected we can 936 * drop the hold on the mp that represented the mount (though the 937 * caller might actually have another, so the caller's drop may 938 * do the actual free). 939 */ 940 if (freeok) { 941 if (mp->mnt_refs > 0) 942 cache_clearmntcache(); 943 while (mp->mnt_refs > 0) { 944 cache_unmounting(mp); 945 wakeup(mp); 946 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 947 cache_clearmntcache(); 948 } 949 lwkt_reltoken(&mp->mnt_token); 950 mount_drop(mp); 951 mp = NULL; 952 } else { 953 cache_clearmntcache(); 954 } 955 error = 0; 956 KNOTE(&fs_klist, VQ_UNMOUNT); 957 out: 958 if (mp) 959 lwkt_reltoken(&mp->mnt_token); 960 return (error); 961 } 962 963 static 964 void 965 mount_warning(struct mount *mp, const char *ctl, ...) 966 { 967 char *ptr; 968 char *buf; 969 __va_list va; 970 971 __va_start(va, ctl); 972 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 973 &ptr, &buf, 0) == 0) { 974 kprintf("unmount(%s): ", ptr); 975 kvprintf(ctl, va); 976 kprintf("\n"); 977 kfree(buf, M_TEMP); 978 } else { 979 kprintf("unmount(%p", mp); 980 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 981 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 982 kprintf("): "); 983 kvprintf(ctl, va); 984 kprintf("\n"); 985 } 986 __va_end(va); 987 } 988 989 /* 990 * Shim cache_fullpath() to handle the case where a process is chrooted into 991 * a subdirectory of a mount. In this case if the root mount matches the 992 * process root directory's mount we have to specify the process's root 993 * directory instead of the mount point, because the mount point might 994 * be above the root directory. 995 */ 996 static 997 int 998 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 999 { 1000 struct nchandle *nch; 1001 1002 if (p && p->p_fd->fd_nrdir.mount == mp) 1003 nch = &p->p_fd->fd_nrdir; 1004 else 1005 nch = &mp->mnt_ncmountpt; 1006 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1007 } 1008 1009 /* 1010 * Sync each mounted filesystem. 1011 */ 1012 1013 #ifdef DEBUG 1014 static int syncprt = 0; 1015 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1016 #endif /* DEBUG */ 1017 1018 static int sync_callback(struct mount *mp, void *data); 1019 1020 int 1021 sys_sync(struct sync_args *uap) 1022 { 1023 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1024 return (0); 1025 } 1026 1027 static 1028 int 1029 sync_callback(struct mount *mp, void *data __unused) 1030 { 1031 int asyncflag; 1032 1033 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1034 lwkt_gettoken(&mp->mnt_token); 1035 asyncflag = mp->mnt_flag & MNT_ASYNC; 1036 mp->mnt_flag &= ~MNT_ASYNC; 1037 lwkt_reltoken(&mp->mnt_token); 1038 vfs_msync(mp, MNT_NOWAIT); 1039 VFS_SYNC(mp, MNT_NOWAIT); 1040 lwkt_gettoken(&mp->mnt_token); 1041 mp->mnt_flag |= asyncflag; 1042 lwkt_reltoken(&mp->mnt_token); 1043 } 1044 return(0); 1045 } 1046 1047 /* XXX PRISON: could be per prison flag */ 1048 static int prison_quotas; 1049 #if 0 1050 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1051 #endif 1052 1053 /* 1054 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1055 * 1056 * Change filesystem quotas. 1057 * 1058 * MPALMOSTSAFE 1059 */ 1060 int 1061 sys_quotactl(struct quotactl_args *uap) 1062 { 1063 struct nlookupdata nd; 1064 struct thread *td; 1065 struct mount *mp; 1066 int error; 1067 1068 td = curthread; 1069 if (td->td_ucred->cr_prison && !prison_quotas) { 1070 error = EPERM; 1071 goto done; 1072 } 1073 1074 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1075 if (error == 0) 1076 error = nlookup(&nd); 1077 if (error == 0) { 1078 mp = nd.nl_nch.mount; 1079 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1080 uap->arg, nd.nl_cred); 1081 } 1082 nlookup_done(&nd); 1083 done: 1084 return (error); 1085 } 1086 1087 /* 1088 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1089 * void *buf, int buflen) 1090 * 1091 * This function operates on a mount point and executes the specified 1092 * operation using the specified control data, and possibly returns data. 1093 * 1094 * The actual number of bytes stored in the result buffer is returned, 0 1095 * if none, otherwise an error is returned. 1096 * 1097 * MPALMOSTSAFE 1098 */ 1099 int 1100 sys_mountctl(struct mountctl_args *uap) 1101 { 1102 struct thread *td = curthread; 1103 struct proc *p = td->td_proc; 1104 struct file *fp; 1105 void *ctl = NULL; 1106 void *buf = NULL; 1107 char *path = NULL; 1108 int error; 1109 1110 /* 1111 * Sanity and permissions checks. We must be root. 1112 */ 1113 KKASSERT(p); 1114 if (td->td_ucred->cr_prison != NULL) 1115 return (EPERM); 1116 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1117 (error = priv_check(td, PRIV_ROOT)) != 0) 1118 return (error); 1119 1120 /* 1121 * Argument length checks 1122 */ 1123 if (uap->ctllen < 0 || uap->ctllen > 1024) 1124 return (EINVAL); 1125 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1126 return (EINVAL); 1127 if (uap->path == NULL) 1128 return (EINVAL); 1129 1130 /* 1131 * Allocate the necessary buffers and copyin data 1132 */ 1133 path = objcache_get(namei_oc, M_WAITOK); 1134 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1135 if (error) 1136 goto done; 1137 1138 if (uap->ctllen) { 1139 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1140 error = copyin(uap->ctl, ctl, uap->ctllen); 1141 if (error) 1142 goto done; 1143 } 1144 if (uap->buflen) 1145 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1146 1147 /* 1148 * Validate the descriptor 1149 */ 1150 if (uap->fd >= 0) { 1151 fp = holdfp(p->p_fd, uap->fd, -1); 1152 if (fp == NULL) { 1153 error = EBADF; 1154 goto done; 1155 } 1156 } else { 1157 fp = NULL; 1158 } 1159 1160 /* 1161 * Execute the internal kernel function and clean up. 1162 */ 1163 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1164 if (fp) 1165 fdrop(fp); 1166 if (error == 0 && uap->sysmsg_result > 0) 1167 error = copyout(buf, uap->buf, uap->sysmsg_result); 1168 done: 1169 if (path) 1170 objcache_put(namei_oc, path); 1171 if (ctl) 1172 kfree(ctl, M_TEMP); 1173 if (buf) 1174 kfree(buf, M_TEMP); 1175 return (error); 1176 } 1177 1178 /* 1179 * Execute a mount control operation by resolving the path to a mount point 1180 * and calling vop_mountctl(). 1181 * 1182 * Use the mount point from the nch instead of the vnode so nullfs mounts 1183 * can properly spike the VOP. 1184 */ 1185 int 1186 kern_mountctl(const char *path, int op, struct file *fp, 1187 const void *ctl, int ctllen, 1188 void *buf, int buflen, int *res) 1189 { 1190 struct vnode *vp; 1191 struct nlookupdata nd; 1192 struct nchandle nch; 1193 struct mount *mp; 1194 int error; 1195 1196 *res = 0; 1197 vp = NULL; 1198 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1199 if (error) 1200 return (error); 1201 error = nlookup(&nd); 1202 if (error) { 1203 nlookup_done(&nd); 1204 return (error); 1205 } 1206 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1207 if (error) { 1208 nlookup_done(&nd); 1209 return (error); 1210 } 1211 1212 /* 1213 * Yes, all this is needed to use the nch.mount below, because 1214 * we must maintain a ref on the mount to avoid ripouts (e.g. 1215 * due to heavy mount/unmount use by synth or poudriere). 1216 */ 1217 nch = nd.nl_nch; 1218 cache_zero(&nd.nl_nch); 1219 cache_unlock(&nch); 1220 nlookup_done(&nd); 1221 vn_unlock(vp); 1222 1223 mp = nch.mount; 1224 1225 /* 1226 * Must be the root of the filesystem 1227 */ 1228 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1229 cache_drop(&nch); 1230 vrele(vp); 1231 return (EINVAL); 1232 } 1233 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1234 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1235 path); 1236 cache_drop(&nch); 1237 vrele(vp); 1238 return (EINVAL); 1239 } 1240 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1241 buf, buflen, res); 1242 vrele(vp); 1243 cache_drop(&nch); 1244 1245 return (error); 1246 } 1247 1248 int 1249 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1250 { 1251 struct thread *td = curthread; 1252 struct proc *p = td->td_proc; 1253 struct mount *mp; 1254 struct statfs *sp; 1255 char *fullpath, *freepath; 1256 int error; 1257 1258 if ((error = nlookup(nd)) != 0) 1259 return (error); 1260 mp = nd->nl_nch.mount; 1261 sp = &mp->mnt_stat; 1262 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1263 return (error); 1264 1265 error = mount_path(p, mp, &fullpath, &freepath); 1266 if (error) 1267 return(error); 1268 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1269 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1270 kfree(freepath, M_TEMP); 1271 1272 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1273 bcopy(sp, buf, sizeof(*buf)); 1274 /* Only root should have access to the fsid's. */ 1275 if (priv_check(td, PRIV_ROOT)) 1276 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1277 return (0); 1278 } 1279 1280 /* 1281 * statfs_args(char *path, struct statfs *buf) 1282 * 1283 * Get filesystem statistics. 1284 */ 1285 int 1286 sys_statfs(struct statfs_args *uap) 1287 { 1288 struct nlookupdata nd; 1289 struct statfs buf; 1290 int error; 1291 1292 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1293 if (error == 0) 1294 error = kern_statfs(&nd, &buf); 1295 nlookup_done(&nd); 1296 if (error == 0) 1297 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1298 return (error); 1299 } 1300 1301 int 1302 kern_fstatfs(int fd, struct statfs *buf) 1303 { 1304 struct thread *td = curthread; 1305 struct proc *p = td->td_proc; 1306 struct file *fp; 1307 struct mount *mp; 1308 struct statfs *sp; 1309 char *fullpath, *freepath; 1310 int error; 1311 1312 KKASSERT(p); 1313 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1314 return (error); 1315 1316 /* 1317 * Try to use mount info from any overlays rather than the 1318 * mount info for the underlying vnode, otherwise we will 1319 * fail when operating on null-mounted paths inside a chroot. 1320 */ 1321 if ((mp = fp->f_nchandle.mount) == NULL) 1322 mp = ((struct vnode *)fp->f_data)->v_mount; 1323 if (mp == NULL) { 1324 error = EBADF; 1325 goto done; 1326 } 1327 if (fp->f_cred == NULL) { 1328 error = EINVAL; 1329 goto done; 1330 } 1331 sp = &mp->mnt_stat; 1332 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1333 goto done; 1334 1335 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1336 goto done; 1337 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1338 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1339 kfree(freepath, M_TEMP); 1340 1341 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1342 bcopy(sp, buf, sizeof(*buf)); 1343 1344 /* Only root should have access to the fsid's. */ 1345 if (priv_check(td, PRIV_ROOT)) 1346 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1347 error = 0; 1348 done: 1349 fdrop(fp); 1350 return (error); 1351 } 1352 1353 /* 1354 * fstatfs_args(int fd, struct statfs *buf) 1355 * 1356 * Get filesystem statistics. 1357 */ 1358 int 1359 sys_fstatfs(struct fstatfs_args *uap) 1360 { 1361 struct statfs buf; 1362 int error; 1363 1364 error = kern_fstatfs(uap->fd, &buf); 1365 1366 if (error == 0) 1367 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1368 return (error); 1369 } 1370 1371 int 1372 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1373 { 1374 struct mount *mp; 1375 struct statvfs *sp; 1376 int error; 1377 1378 if ((error = nlookup(nd)) != 0) 1379 return (error); 1380 mp = nd->nl_nch.mount; 1381 sp = &mp->mnt_vstat; 1382 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1383 return (error); 1384 1385 sp->f_flag = 0; 1386 if (mp->mnt_flag & MNT_RDONLY) 1387 sp->f_flag |= ST_RDONLY; 1388 if (mp->mnt_flag & MNT_NOSUID) 1389 sp->f_flag |= ST_NOSUID; 1390 bcopy(sp, buf, sizeof(*buf)); 1391 return (0); 1392 } 1393 1394 /* 1395 * statfs_args(char *path, struct statfs *buf) 1396 * 1397 * Get filesystem statistics. 1398 */ 1399 int 1400 sys_statvfs(struct statvfs_args *uap) 1401 { 1402 struct nlookupdata nd; 1403 struct statvfs buf; 1404 int error; 1405 1406 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1407 if (error == 0) 1408 error = kern_statvfs(&nd, &buf); 1409 nlookup_done(&nd); 1410 if (error == 0) 1411 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1412 return (error); 1413 } 1414 1415 int 1416 kern_fstatvfs(int fd, struct statvfs *buf) 1417 { 1418 struct thread *td = curthread; 1419 struct proc *p = td->td_proc; 1420 struct file *fp; 1421 struct mount *mp; 1422 struct statvfs *sp; 1423 int error; 1424 1425 KKASSERT(p); 1426 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1427 return (error); 1428 if ((mp = fp->f_nchandle.mount) == NULL) 1429 mp = ((struct vnode *)fp->f_data)->v_mount; 1430 if (mp == NULL) { 1431 error = EBADF; 1432 goto done; 1433 } 1434 if (fp->f_cred == NULL) { 1435 error = EINVAL; 1436 goto done; 1437 } 1438 sp = &mp->mnt_vstat; 1439 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1440 goto done; 1441 1442 sp->f_flag = 0; 1443 if (mp->mnt_flag & MNT_RDONLY) 1444 sp->f_flag |= ST_RDONLY; 1445 if (mp->mnt_flag & MNT_NOSUID) 1446 sp->f_flag |= ST_NOSUID; 1447 1448 bcopy(sp, buf, sizeof(*buf)); 1449 error = 0; 1450 done: 1451 fdrop(fp); 1452 return (error); 1453 } 1454 1455 /* 1456 * fstatfs_args(int fd, struct statfs *buf) 1457 * 1458 * Get filesystem statistics. 1459 */ 1460 int 1461 sys_fstatvfs(struct fstatvfs_args *uap) 1462 { 1463 struct statvfs buf; 1464 int error; 1465 1466 error = kern_fstatvfs(uap->fd, &buf); 1467 1468 if (error == 0) 1469 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1470 return (error); 1471 } 1472 1473 /* 1474 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1475 * 1476 * Get statistics on all filesystems. 1477 */ 1478 1479 struct getfsstat_info { 1480 struct statfs *sfsp; 1481 long count; 1482 long maxcount; 1483 int error; 1484 int flags; 1485 struct thread *td; 1486 }; 1487 1488 static int getfsstat_callback(struct mount *, void *); 1489 1490 int 1491 sys_getfsstat(struct getfsstat_args *uap) 1492 { 1493 struct thread *td = curthread; 1494 struct getfsstat_info info; 1495 1496 bzero(&info, sizeof(info)); 1497 1498 info.maxcount = uap->bufsize / sizeof(struct statfs); 1499 info.sfsp = uap->buf; 1500 info.count = 0; 1501 info.flags = uap->flags; 1502 info.td = td; 1503 1504 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1505 if (info.sfsp && info.count > info.maxcount) 1506 uap->sysmsg_result = info.maxcount; 1507 else 1508 uap->sysmsg_result = info.count; 1509 return (info.error); 1510 } 1511 1512 static int 1513 getfsstat_callback(struct mount *mp, void *data) 1514 { 1515 struct getfsstat_info *info = data; 1516 struct statfs *sp; 1517 char *freepath; 1518 char *fullpath; 1519 int error; 1520 1521 if (info->sfsp && info->count < info->maxcount) { 1522 if (info->td->td_proc && 1523 !chroot_visible_mnt(mp, info->td->td_proc)) { 1524 return(0); 1525 } 1526 sp = &mp->mnt_stat; 1527 1528 /* 1529 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1530 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1531 * overrides MNT_WAIT. 1532 */ 1533 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1534 (info->flags & MNT_WAIT)) && 1535 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1536 return(0); 1537 } 1538 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1539 1540 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1541 if (error) { 1542 info->error = error; 1543 return(-1); 1544 } 1545 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1546 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1547 kfree(freepath, M_TEMP); 1548 1549 error = copyout(sp, info->sfsp, sizeof(*sp)); 1550 if (error) { 1551 info->error = error; 1552 return (-1); 1553 } 1554 ++info->sfsp; 1555 } 1556 info->count++; 1557 return(0); 1558 } 1559 1560 /* 1561 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1562 long bufsize, int flags) 1563 * 1564 * Get statistics on all filesystems. 1565 */ 1566 1567 struct getvfsstat_info { 1568 struct statfs *sfsp; 1569 struct statvfs *vsfsp; 1570 long count; 1571 long maxcount; 1572 int error; 1573 int flags; 1574 struct thread *td; 1575 }; 1576 1577 static int getvfsstat_callback(struct mount *, void *); 1578 1579 int 1580 sys_getvfsstat(struct getvfsstat_args *uap) 1581 { 1582 struct thread *td = curthread; 1583 struct getvfsstat_info info; 1584 1585 bzero(&info, sizeof(info)); 1586 1587 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1588 info.sfsp = uap->buf; 1589 info.vsfsp = uap->vbuf; 1590 info.count = 0; 1591 info.flags = uap->flags; 1592 info.td = td; 1593 1594 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1595 if (info.vsfsp && info.count > info.maxcount) 1596 uap->sysmsg_result = info.maxcount; 1597 else 1598 uap->sysmsg_result = info.count; 1599 return (info.error); 1600 } 1601 1602 static int 1603 getvfsstat_callback(struct mount *mp, void *data) 1604 { 1605 struct getvfsstat_info *info = data; 1606 struct statfs *sp; 1607 struct statvfs *vsp; 1608 char *freepath; 1609 char *fullpath; 1610 int error; 1611 1612 if (info->vsfsp && info->count < info->maxcount) { 1613 if (info->td->td_proc && 1614 !chroot_visible_mnt(mp, info->td->td_proc)) { 1615 return(0); 1616 } 1617 sp = &mp->mnt_stat; 1618 vsp = &mp->mnt_vstat; 1619 1620 /* 1621 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1622 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1623 * overrides MNT_WAIT. 1624 */ 1625 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1626 (info->flags & MNT_WAIT)) && 1627 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1628 return(0); 1629 } 1630 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1631 1632 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1633 (info->flags & MNT_WAIT)) && 1634 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1635 return(0); 1636 } 1637 vsp->f_flag = 0; 1638 if (mp->mnt_flag & MNT_RDONLY) 1639 vsp->f_flag |= ST_RDONLY; 1640 if (mp->mnt_flag & MNT_NOSUID) 1641 vsp->f_flag |= ST_NOSUID; 1642 1643 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1644 if (error) { 1645 info->error = error; 1646 return(-1); 1647 } 1648 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1649 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1650 kfree(freepath, M_TEMP); 1651 1652 error = copyout(sp, info->sfsp, sizeof(*sp)); 1653 if (error == 0) 1654 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1655 if (error) { 1656 info->error = error; 1657 return (-1); 1658 } 1659 ++info->sfsp; 1660 ++info->vsfsp; 1661 } 1662 info->count++; 1663 return(0); 1664 } 1665 1666 1667 /* 1668 * fchdir_args(int fd) 1669 * 1670 * Change current working directory to a given file descriptor. 1671 */ 1672 int 1673 sys_fchdir(struct fchdir_args *uap) 1674 { 1675 struct thread *td = curthread; 1676 struct proc *p = td->td_proc; 1677 struct filedesc *fdp = p->p_fd; 1678 struct vnode *vp, *ovp; 1679 struct mount *mp; 1680 struct file *fp; 1681 struct nchandle nch, onch, tnch; 1682 int error; 1683 1684 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1685 return (error); 1686 lwkt_gettoken(&p->p_token); 1687 vp = (struct vnode *)fp->f_data; 1688 vref(vp); 1689 vn_lock(vp, LK_SHARED | LK_RETRY); 1690 if (fp->f_nchandle.ncp == NULL) 1691 error = ENOTDIR; 1692 else 1693 error = checkvp_chdir(vp, td); 1694 if (error) { 1695 vput(vp); 1696 goto done; 1697 } 1698 cache_copy(&fp->f_nchandle, &nch); 1699 1700 /* 1701 * If the ncp has become a mount point, traverse through 1702 * the mount point. 1703 */ 1704 1705 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1706 (mp = cache_findmount(&nch)) != NULL 1707 ) { 1708 error = nlookup_mp(mp, &tnch); 1709 if (error == 0) { 1710 cache_unlock(&tnch); /* leave ref intact */ 1711 vput(vp); 1712 vp = tnch.ncp->nc_vp; 1713 error = vget(vp, LK_SHARED); 1714 KKASSERT(error == 0); 1715 cache_drop(&nch); 1716 nch = tnch; 1717 } 1718 cache_dropmount(mp); 1719 } 1720 if (error == 0) { 1721 spin_lock(&fdp->fd_spin); 1722 ovp = fdp->fd_cdir; 1723 onch = fdp->fd_ncdir; 1724 fdp->fd_cdir = vp; 1725 fdp->fd_ncdir = nch; 1726 spin_unlock(&fdp->fd_spin); 1727 vn_unlock(vp); /* leave ref intact */ 1728 cache_drop(&onch); 1729 vrele(ovp); 1730 } else { 1731 cache_drop(&nch); 1732 vput(vp); 1733 } 1734 fdrop(fp); 1735 done: 1736 lwkt_reltoken(&p->p_token); 1737 return (error); 1738 } 1739 1740 int 1741 kern_chdir(struct nlookupdata *nd) 1742 { 1743 struct thread *td = curthread; 1744 struct proc *p = td->td_proc; 1745 struct filedesc *fdp = p->p_fd; 1746 struct vnode *vp, *ovp; 1747 struct nchandle onch; 1748 int error; 1749 1750 nd->nl_flags |= NLC_SHAREDLOCK; 1751 if ((error = nlookup(nd)) != 0) 1752 return (error); 1753 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1754 return (ENOENT); 1755 if ((error = vget(vp, LK_SHARED)) != 0) 1756 return (error); 1757 1758 lwkt_gettoken(&p->p_token); 1759 error = checkvp_chdir(vp, td); 1760 vn_unlock(vp); 1761 if (error == 0) { 1762 spin_lock(&fdp->fd_spin); 1763 ovp = fdp->fd_cdir; 1764 onch = fdp->fd_ncdir; 1765 fdp->fd_ncdir = nd->nl_nch; 1766 fdp->fd_cdir = vp; 1767 spin_unlock(&fdp->fd_spin); 1768 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1769 cache_drop(&onch); 1770 vrele(ovp); 1771 cache_zero(&nd->nl_nch); 1772 } else { 1773 vrele(vp); 1774 } 1775 lwkt_reltoken(&p->p_token); 1776 return (error); 1777 } 1778 1779 /* 1780 * chdir_args(char *path) 1781 * 1782 * Change current working directory (``.''). 1783 */ 1784 int 1785 sys_chdir(struct chdir_args *uap) 1786 { 1787 struct nlookupdata nd; 1788 int error; 1789 1790 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1791 if (error == 0) 1792 error = kern_chdir(&nd); 1793 nlookup_done(&nd); 1794 return (error); 1795 } 1796 1797 /* 1798 * Helper function for raised chroot(2) security function: Refuse if 1799 * any filedescriptors are open directories. 1800 */ 1801 static int 1802 chroot_refuse_vdir_fds(struct filedesc *fdp) 1803 { 1804 struct vnode *vp; 1805 struct file *fp; 1806 int error; 1807 int fd; 1808 1809 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1810 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1811 continue; 1812 vp = (struct vnode *)fp->f_data; 1813 if (vp->v_type != VDIR) { 1814 fdrop(fp); 1815 continue; 1816 } 1817 fdrop(fp); 1818 return(EPERM); 1819 } 1820 return (0); 1821 } 1822 1823 /* 1824 * This sysctl determines if we will allow a process to chroot(2) if it 1825 * has a directory open: 1826 * 0: disallowed for all processes. 1827 * 1: allowed for processes that were not already chroot(2)'ed. 1828 * 2: allowed for all processes. 1829 */ 1830 1831 static int chroot_allow_open_directories = 1; 1832 1833 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1834 &chroot_allow_open_directories, 0, ""); 1835 1836 /* 1837 * chroot to the specified namecache entry. We obtain the vp from the 1838 * namecache data. The passed ncp must be locked and referenced and will 1839 * remain locked and referenced on return. 1840 */ 1841 int 1842 kern_chroot(struct nchandle *nch) 1843 { 1844 struct thread *td = curthread; 1845 struct proc *p = td->td_proc; 1846 struct filedesc *fdp = p->p_fd; 1847 struct vnode *vp; 1848 int error; 1849 1850 /* 1851 * Only privileged user can chroot 1852 */ 1853 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1854 if (error) 1855 return (error); 1856 1857 /* 1858 * Disallow open directory descriptors (fchdir() breakouts). 1859 */ 1860 if (chroot_allow_open_directories == 0 || 1861 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1862 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1863 return (error); 1864 } 1865 if ((vp = nch->ncp->nc_vp) == NULL) 1866 return (ENOENT); 1867 1868 if ((error = vget(vp, LK_SHARED)) != 0) 1869 return (error); 1870 1871 /* 1872 * Check the validity of vp as a directory to change to and 1873 * associate it with rdir/jdir. 1874 */ 1875 error = checkvp_chdir(vp, td); 1876 vn_unlock(vp); /* leave reference intact */ 1877 if (error == 0) { 1878 lwkt_gettoken(&p->p_token); 1879 vrele(fdp->fd_rdir); 1880 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1881 cache_drop(&fdp->fd_nrdir); 1882 cache_copy(nch, &fdp->fd_nrdir); 1883 if (fdp->fd_jdir == NULL) { 1884 fdp->fd_jdir = vp; 1885 vref(fdp->fd_jdir); 1886 cache_copy(nch, &fdp->fd_njdir); 1887 } 1888 if ((p->p_flags & P_DIDCHROOT) == 0) { 1889 p->p_flags |= P_DIDCHROOT; 1890 if (p->p_depth <= 65535 - 32) 1891 p->p_depth += 32; 1892 } 1893 lwkt_reltoken(&p->p_token); 1894 } else { 1895 vrele(vp); 1896 } 1897 return (error); 1898 } 1899 1900 /* 1901 * chroot_args(char *path) 1902 * 1903 * Change notion of root (``/'') directory. 1904 */ 1905 int 1906 sys_chroot(struct chroot_args *uap) 1907 { 1908 struct thread *td __debugvar = curthread; 1909 struct nlookupdata nd; 1910 int error; 1911 1912 KKASSERT(td->td_proc); 1913 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1914 if (error == 0) { 1915 nd.nl_flags |= NLC_EXEC; 1916 error = nlookup(&nd); 1917 if (error == 0) 1918 error = kern_chroot(&nd.nl_nch); 1919 } 1920 nlookup_done(&nd); 1921 return(error); 1922 } 1923 1924 int 1925 sys_chroot_kernel(struct chroot_kernel_args *uap) 1926 { 1927 struct thread *td = curthread; 1928 struct nlookupdata nd; 1929 struct nchandle *nch; 1930 struct vnode *vp; 1931 int error; 1932 1933 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1934 if (error) 1935 goto error_nond; 1936 1937 error = nlookup(&nd); 1938 if (error) 1939 goto error_out; 1940 1941 nch = &nd.nl_nch; 1942 1943 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1944 if (error) 1945 goto error_out; 1946 1947 if ((vp = nch->ncp->nc_vp) == NULL) { 1948 error = ENOENT; 1949 goto error_out; 1950 } 1951 1952 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1953 goto error_out; 1954 1955 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1956 vfs_cache_setroot(vp, cache_hold(nch)); 1957 1958 error_out: 1959 nlookup_done(&nd); 1960 error_nond: 1961 return(error); 1962 } 1963 1964 /* 1965 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1966 * determine whether it is legal to chdir to the vnode. The vnode's state 1967 * is not changed by this call. 1968 */ 1969 static int 1970 checkvp_chdir(struct vnode *vp, struct thread *td) 1971 { 1972 int error; 1973 1974 if (vp->v_type != VDIR) 1975 error = ENOTDIR; 1976 else 1977 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1978 return (error); 1979 } 1980 1981 int 1982 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1983 { 1984 struct thread *td = curthread; 1985 struct proc *p = td->td_proc; 1986 struct lwp *lp = td->td_lwp; 1987 struct filedesc *fdp = p->p_fd; 1988 int cmode, flags; 1989 struct file *nfp; 1990 struct file *fp; 1991 struct vnode *vp; 1992 int type, indx, error = 0; 1993 struct flock lf; 1994 1995 if ((oflags & O_ACCMODE) == O_ACCMODE) 1996 return (EINVAL); 1997 flags = FFLAGS(oflags); 1998 error = falloc(lp, &nfp, NULL); 1999 if (error) 2000 return (error); 2001 fp = nfp; 2002 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2003 2004 /* 2005 * XXX p_dupfd is a real mess. It allows a device to return a 2006 * file descriptor to be duplicated rather then doing the open 2007 * itself. 2008 */ 2009 lp->lwp_dupfd = -1; 2010 2011 /* 2012 * Call vn_open() to do the lookup and assign the vnode to the 2013 * file pointer. vn_open() does not change the ref count on fp 2014 * and the vnode, on success, will be inherited by the file pointer 2015 * and unlocked. 2016 * 2017 * Request a shared lock on the vnode if possible. 2018 * 2019 * Executable binaries can race VTEXT against O_RDWR opens, so 2020 * use an exclusive lock for O_RDWR opens as well. 2021 * 2022 * NOTE: We need a flag to separate terminal vnode locking from 2023 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2024 * and O_RDWR only need to lock the terminal vnode exclusively. 2025 */ 2026 nd->nl_flags |= NLC_LOCKVP; 2027 if ((flags & (O_CREAT|O_TRUNC|O_RDWR)) == 0) 2028 nd->nl_flags |= NLC_SHAREDLOCK; 2029 2030 error = vn_open(nd, fp, flags, cmode); 2031 nlookup_done(nd); 2032 2033 if (error) { 2034 /* 2035 * handle special fdopen() case. bleh. dupfdopen() is 2036 * responsible for dropping the old contents of ofiles[indx] 2037 * if it succeeds. 2038 * 2039 * Note that fsetfd() will add a ref to fp which represents 2040 * the fd_files[] assignment. We must still drop our 2041 * reference. 2042 */ 2043 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 2044 if (fdalloc(p, 0, &indx) == 0) { 2045 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 2046 if (error == 0) { 2047 *res = indx; 2048 fdrop(fp); /* our ref */ 2049 return (0); 2050 } 2051 fsetfd(fdp, NULL, indx); 2052 } 2053 } 2054 fdrop(fp); /* our ref */ 2055 if (error == ERESTART) 2056 error = EINTR; 2057 return (error); 2058 } 2059 2060 /* 2061 * ref the vnode for ourselves so it can't be ripped out from under 2062 * is. XXX need an ND flag to request that the vnode be returned 2063 * anyway. 2064 * 2065 * Reserve a file descriptor but do not assign it until the open 2066 * succeeds. 2067 */ 2068 vp = (struct vnode *)fp->f_data; 2069 vref(vp); 2070 if ((error = fdalloc(p, 0, &indx)) != 0) { 2071 fdrop(fp); 2072 vrele(vp); 2073 return (error); 2074 } 2075 2076 /* 2077 * If no error occurs the vp will have been assigned to the file 2078 * pointer. 2079 */ 2080 lp->lwp_dupfd = 0; 2081 2082 if (flags & (O_EXLOCK | O_SHLOCK)) { 2083 lf.l_whence = SEEK_SET; 2084 lf.l_start = 0; 2085 lf.l_len = 0; 2086 if (flags & O_EXLOCK) 2087 lf.l_type = F_WRLCK; 2088 else 2089 lf.l_type = F_RDLCK; 2090 if (flags & FNONBLOCK) 2091 type = 0; 2092 else 2093 type = F_WAIT; 2094 2095 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2096 /* 2097 * lock request failed. Clean up the reserved 2098 * descriptor. 2099 */ 2100 vrele(vp); 2101 fsetfd(fdp, NULL, indx); 2102 fdrop(fp); 2103 return (error); 2104 } 2105 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2106 } 2107 #if 0 2108 /* 2109 * Assert that all regular file vnodes were created with a object. 2110 */ 2111 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2112 ("open: regular file has no backing object after vn_open")); 2113 #endif 2114 2115 vrele(vp); 2116 2117 /* 2118 * release our private reference, leaving the one associated with the 2119 * descriptor table intact. 2120 */ 2121 if (oflags & O_CLOEXEC) 2122 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2123 fsetfd(fdp, fp, indx); 2124 fdrop(fp); 2125 *res = indx; 2126 return (error); 2127 } 2128 2129 /* 2130 * open_args(char *path, int flags, int mode) 2131 * 2132 * Check permissions, allocate an open file structure, 2133 * and call the device open routine if any. 2134 */ 2135 int 2136 sys_open(struct open_args *uap) 2137 { 2138 struct nlookupdata nd; 2139 int error; 2140 2141 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2142 if (error == 0) { 2143 error = kern_open(&nd, uap->flags, 2144 uap->mode, &uap->sysmsg_result); 2145 } 2146 nlookup_done(&nd); 2147 return (error); 2148 } 2149 2150 /* 2151 * openat_args(int fd, char *path, int flags, int mode) 2152 */ 2153 int 2154 sys_openat(struct openat_args *uap) 2155 { 2156 struct nlookupdata nd; 2157 int error; 2158 struct file *fp; 2159 2160 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2161 if (error == 0) { 2162 error = kern_open(&nd, uap->flags, uap->mode, 2163 &uap->sysmsg_result); 2164 } 2165 nlookup_done_at(&nd, fp); 2166 return (error); 2167 } 2168 2169 int 2170 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2171 { 2172 struct thread *td = curthread; 2173 struct proc *p = td->td_proc; 2174 struct vnode *vp; 2175 struct vattr vattr; 2176 int error; 2177 int whiteout = 0; 2178 2179 KKASSERT(p); 2180 2181 VATTR_NULL(&vattr); 2182 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2183 vattr.va_rmajor = rmajor; 2184 vattr.va_rminor = rminor; 2185 2186 switch (mode & S_IFMT) { 2187 case S_IFMT: /* used by badsect to flag bad sectors */ 2188 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2189 vattr.va_type = VBAD; 2190 break; 2191 case S_IFCHR: 2192 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2193 vattr.va_type = VCHR; 2194 break; 2195 case S_IFBLK: 2196 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2197 vattr.va_type = VBLK; 2198 break; 2199 case S_IFWHT: 2200 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2201 whiteout = 1; 2202 break; 2203 case S_IFDIR: /* special directories support for HAMMER */ 2204 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2205 vattr.va_type = VDIR; 2206 break; 2207 default: 2208 error = EINVAL; 2209 break; 2210 } 2211 2212 if (error) 2213 return (error); 2214 2215 bwillinode(1); 2216 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2217 if ((error = nlookup(nd)) != 0) 2218 return (error); 2219 if (nd->nl_nch.ncp->nc_vp) 2220 return (EEXIST); 2221 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2222 return (error); 2223 2224 if (whiteout) { 2225 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2226 nd->nl_cred, NAMEI_CREATE); 2227 } else { 2228 vp = NULL; 2229 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2230 &vp, nd->nl_cred, &vattr); 2231 if (error == 0) 2232 vput(vp); 2233 } 2234 return (error); 2235 } 2236 2237 /* 2238 * mknod_args(char *path, int mode, int dev) 2239 * 2240 * Create a special file. 2241 */ 2242 int 2243 sys_mknod(struct mknod_args *uap) 2244 { 2245 struct nlookupdata nd; 2246 int error; 2247 2248 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2249 if (error == 0) { 2250 error = kern_mknod(&nd, uap->mode, 2251 umajor(uap->dev), uminor(uap->dev)); 2252 } 2253 nlookup_done(&nd); 2254 return (error); 2255 } 2256 2257 /* 2258 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2259 * 2260 * Create a special file. The path is relative to the directory associated 2261 * with fd. 2262 */ 2263 int 2264 sys_mknodat(struct mknodat_args *uap) 2265 { 2266 struct nlookupdata nd; 2267 struct file *fp; 2268 int error; 2269 2270 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2271 if (error == 0) { 2272 error = kern_mknod(&nd, uap->mode, 2273 umajor(uap->dev), uminor(uap->dev)); 2274 } 2275 nlookup_done_at(&nd, fp); 2276 return (error); 2277 } 2278 2279 int 2280 kern_mkfifo(struct nlookupdata *nd, int mode) 2281 { 2282 struct thread *td = curthread; 2283 struct proc *p = td->td_proc; 2284 struct vattr vattr; 2285 struct vnode *vp; 2286 int error; 2287 2288 bwillinode(1); 2289 2290 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2291 if ((error = nlookup(nd)) != 0) 2292 return (error); 2293 if (nd->nl_nch.ncp->nc_vp) 2294 return (EEXIST); 2295 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2296 return (error); 2297 2298 VATTR_NULL(&vattr); 2299 vattr.va_type = VFIFO; 2300 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2301 vp = NULL; 2302 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2303 if (error == 0) 2304 vput(vp); 2305 return (error); 2306 } 2307 2308 /* 2309 * mkfifo_args(char *path, int mode) 2310 * 2311 * Create a named pipe. 2312 */ 2313 int 2314 sys_mkfifo(struct mkfifo_args *uap) 2315 { 2316 struct nlookupdata nd; 2317 int error; 2318 2319 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2320 if (error == 0) 2321 error = kern_mkfifo(&nd, uap->mode); 2322 nlookup_done(&nd); 2323 return (error); 2324 } 2325 2326 /* 2327 * mkfifoat_args(int fd, char *path, mode_t mode) 2328 * 2329 * Create a named pipe. The path is relative to the directory associated 2330 * with fd. 2331 */ 2332 int 2333 sys_mkfifoat(struct mkfifoat_args *uap) 2334 { 2335 struct nlookupdata nd; 2336 struct file *fp; 2337 int error; 2338 2339 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2340 if (error == 0) 2341 error = kern_mkfifo(&nd, uap->mode); 2342 nlookup_done_at(&nd, fp); 2343 return (error); 2344 } 2345 2346 static int hardlink_check_uid = 0; 2347 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2348 &hardlink_check_uid, 0, 2349 "Unprivileged processes cannot create hard links to files owned by other " 2350 "users"); 2351 static int hardlink_check_gid = 0; 2352 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2353 &hardlink_check_gid, 0, 2354 "Unprivileged processes cannot create hard links to files owned by other " 2355 "groups"); 2356 2357 static int 2358 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2359 { 2360 struct vattr va; 2361 int error; 2362 2363 /* 2364 * Shortcut if disabled 2365 */ 2366 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2367 return (0); 2368 2369 /* 2370 * Privileged user can always hardlink 2371 */ 2372 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2373 return (0); 2374 2375 /* 2376 * Otherwise only if the originating file is owned by the 2377 * same user or group. Note that any group is allowed if 2378 * the file is owned by the caller. 2379 */ 2380 error = VOP_GETATTR(vp, &va); 2381 if (error != 0) 2382 return (error); 2383 2384 if (hardlink_check_uid) { 2385 if (cred->cr_uid != va.va_uid) 2386 return (EPERM); 2387 } 2388 2389 if (hardlink_check_gid) { 2390 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2391 return (EPERM); 2392 } 2393 2394 return (0); 2395 } 2396 2397 int 2398 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2399 { 2400 struct thread *td = curthread; 2401 struct vnode *vp; 2402 int error; 2403 2404 /* 2405 * Lookup the source and obtained a locked vnode. 2406 * 2407 * You may only hardlink a file which you have write permission 2408 * on or which you own. 2409 * 2410 * XXX relookup on vget failure / race ? 2411 */ 2412 bwillinode(1); 2413 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2414 if ((error = nlookup(nd)) != 0) 2415 return (error); 2416 vp = nd->nl_nch.ncp->nc_vp; 2417 KKASSERT(vp != NULL); 2418 if (vp->v_type == VDIR) 2419 return (EPERM); /* POSIX */ 2420 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2421 return (error); 2422 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2423 return (error); 2424 2425 /* 2426 * Unlock the source so we can lookup the target without deadlocking 2427 * (XXX vp is locked already, possible other deadlock?). The target 2428 * must not exist. 2429 */ 2430 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2431 nd->nl_flags &= ~NLC_NCPISLOCKED; 2432 cache_unlock(&nd->nl_nch); 2433 vn_unlock(vp); 2434 2435 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2436 if ((error = nlookup(linknd)) != 0) { 2437 vrele(vp); 2438 return (error); 2439 } 2440 if (linknd->nl_nch.ncp->nc_vp) { 2441 vrele(vp); 2442 return (EEXIST); 2443 } 2444 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2445 if (error) { 2446 vrele(vp); 2447 return (error); 2448 } 2449 2450 /* 2451 * Finally run the new API VOP. 2452 */ 2453 error = can_hardlink(vp, td, td->td_ucred); 2454 if (error == 0) { 2455 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2456 vp, linknd->nl_cred); 2457 } 2458 vput(vp); 2459 return (error); 2460 } 2461 2462 /* 2463 * link_args(char *path, char *link) 2464 * 2465 * Make a hard file link. 2466 */ 2467 int 2468 sys_link(struct link_args *uap) 2469 { 2470 struct nlookupdata nd, linknd; 2471 int error; 2472 2473 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2474 if (error == 0) { 2475 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2476 if (error == 0) 2477 error = kern_link(&nd, &linknd); 2478 nlookup_done(&linknd); 2479 } 2480 nlookup_done(&nd); 2481 return (error); 2482 } 2483 2484 /* 2485 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2486 * 2487 * Make a hard file link. The path1 argument is relative to the directory 2488 * associated with fd1, and similarly the path2 argument is relative to 2489 * the directory associated with fd2. 2490 */ 2491 int 2492 sys_linkat(struct linkat_args *uap) 2493 { 2494 struct nlookupdata nd, linknd; 2495 struct file *fp1, *fp2; 2496 int error; 2497 2498 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2499 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2500 if (error == 0) { 2501 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2502 uap->path2, UIO_USERSPACE, 0); 2503 if (error == 0) 2504 error = kern_link(&nd, &linknd); 2505 nlookup_done_at(&linknd, fp2); 2506 } 2507 nlookup_done_at(&nd, fp1); 2508 return (error); 2509 } 2510 2511 int 2512 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2513 { 2514 struct vattr vattr; 2515 struct vnode *vp; 2516 struct vnode *dvp; 2517 int error; 2518 2519 bwillinode(1); 2520 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2521 if ((error = nlookup(nd)) != 0) 2522 return (error); 2523 if (nd->nl_nch.ncp->nc_vp) 2524 return (EEXIST); 2525 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2526 return (error); 2527 dvp = nd->nl_dvp; 2528 VATTR_NULL(&vattr); 2529 vattr.va_mode = mode; 2530 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2531 if (error == 0) 2532 vput(vp); 2533 return (error); 2534 } 2535 2536 /* 2537 * symlink(char *path, char *link) 2538 * 2539 * Make a symbolic link. 2540 */ 2541 int 2542 sys_symlink(struct symlink_args *uap) 2543 { 2544 struct thread *td = curthread; 2545 struct nlookupdata nd; 2546 char *path; 2547 int error; 2548 int mode; 2549 2550 path = objcache_get(namei_oc, M_WAITOK); 2551 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2552 if (error == 0) { 2553 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2554 if (error == 0) { 2555 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2556 error = kern_symlink(&nd, path, mode); 2557 } 2558 nlookup_done(&nd); 2559 } 2560 objcache_put(namei_oc, path); 2561 return (error); 2562 } 2563 2564 /* 2565 * symlinkat_args(char *path1, int fd, char *path2) 2566 * 2567 * Make a symbolic link. The path2 argument is relative to the directory 2568 * associated with fd. 2569 */ 2570 int 2571 sys_symlinkat(struct symlinkat_args *uap) 2572 { 2573 struct thread *td = curthread; 2574 struct nlookupdata nd; 2575 struct file *fp; 2576 char *path1; 2577 int error; 2578 int mode; 2579 2580 path1 = objcache_get(namei_oc, M_WAITOK); 2581 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2582 if (error == 0) { 2583 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2584 UIO_USERSPACE, 0); 2585 if (error == 0) { 2586 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2587 error = kern_symlink(&nd, path1, mode); 2588 } 2589 nlookup_done_at(&nd, fp); 2590 } 2591 objcache_put(namei_oc, path1); 2592 return (error); 2593 } 2594 2595 /* 2596 * undelete_args(char *path) 2597 * 2598 * Delete a whiteout from the filesystem. 2599 */ 2600 int 2601 sys_undelete(struct undelete_args *uap) 2602 { 2603 struct nlookupdata nd; 2604 int error; 2605 2606 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2607 bwillinode(1); 2608 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2609 if (error == 0) 2610 error = nlookup(&nd); 2611 if (error == 0) 2612 error = ncp_writechk(&nd.nl_nch); 2613 if (error == 0) { 2614 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2615 NAMEI_DELETE); 2616 } 2617 nlookup_done(&nd); 2618 return (error); 2619 } 2620 2621 int 2622 kern_unlink(struct nlookupdata *nd) 2623 { 2624 int error; 2625 2626 bwillinode(1); 2627 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2628 if ((error = nlookup(nd)) != 0) 2629 return (error); 2630 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2631 return (error); 2632 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2633 return (error); 2634 } 2635 2636 /* 2637 * unlink_args(char *path) 2638 * 2639 * Delete a name from the filesystem. 2640 */ 2641 int 2642 sys_unlink(struct unlink_args *uap) 2643 { 2644 struct nlookupdata nd; 2645 int error; 2646 2647 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2648 if (error == 0) 2649 error = kern_unlink(&nd); 2650 nlookup_done(&nd); 2651 return (error); 2652 } 2653 2654 2655 /* 2656 * unlinkat_args(int fd, char *path, int flags) 2657 * 2658 * Delete the file or directory entry pointed to by fd/path. 2659 */ 2660 int 2661 sys_unlinkat(struct unlinkat_args *uap) 2662 { 2663 struct nlookupdata nd; 2664 struct file *fp; 2665 int error; 2666 2667 if (uap->flags & ~AT_REMOVEDIR) 2668 return (EINVAL); 2669 2670 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2671 if (error == 0) { 2672 if (uap->flags & AT_REMOVEDIR) 2673 error = kern_rmdir(&nd); 2674 else 2675 error = kern_unlink(&nd); 2676 } 2677 nlookup_done_at(&nd, fp); 2678 return (error); 2679 } 2680 2681 int 2682 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2683 { 2684 struct thread *td = curthread; 2685 struct proc *p = td->td_proc; 2686 struct file *fp; 2687 struct vnode *vp; 2688 struct vattr vattr; 2689 off_t new_offset; 2690 int error; 2691 2692 fp = holdfp(p->p_fd, fd, -1); 2693 if (fp == NULL) 2694 return (EBADF); 2695 if (fp->f_type != DTYPE_VNODE) { 2696 error = ESPIPE; 2697 goto done; 2698 } 2699 vp = (struct vnode *)fp->f_data; 2700 2701 switch (whence) { 2702 case L_INCR: 2703 spin_lock(&fp->f_spin); 2704 new_offset = fp->f_offset + offset; 2705 error = 0; 2706 break; 2707 case L_XTND: 2708 error = VOP_GETATTR(vp, &vattr); 2709 spin_lock(&fp->f_spin); 2710 new_offset = offset + vattr.va_size; 2711 break; 2712 case L_SET: 2713 new_offset = offset; 2714 error = 0; 2715 spin_lock(&fp->f_spin); 2716 break; 2717 default: 2718 new_offset = 0; 2719 error = EINVAL; 2720 spin_lock(&fp->f_spin); 2721 break; 2722 } 2723 2724 /* 2725 * Validate the seek position. Negative offsets are not allowed 2726 * for regular files or directories. 2727 * 2728 * Normally we would also not want to allow negative offsets for 2729 * character and block-special devices. However kvm addresses 2730 * on 64 bit architectures might appear to be negative and must 2731 * be allowed. 2732 */ 2733 if (error == 0) { 2734 if (new_offset < 0 && 2735 (vp->v_type == VREG || vp->v_type == VDIR)) { 2736 error = EINVAL; 2737 } else { 2738 fp->f_offset = new_offset; 2739 } 2740 } 2741 *res = fp->f_offset; 2742 spin_unlock(&fp->f_spin); 2743 done: 2744 fdrop(fp); 2745 return (error); 2746 } 2747 2748 /* 2749 * lseek_args(int fd, int pad, off_t offset, int whence) 2750 * 2751 * Reposition read/write file offset. 2752 */ 2753 int 2754 sys_lseek(struct lseek_args *uap) 2755 { 2756 int error; 2757 2758 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2759 &uap->sysmsg_offset); 2760 2761 return (error); 2762 } 2763 2764 /* 2765 * Check if current process can access given file. amode is a bitmask of *_OK 2766 * access bits. flags is a bitmask of AT_* flags. 2767 */ 2768 int 2769 kern_access(struct nlookupdata *nd, int amode, int flags) 2770 { 2771 struct vnode *vp; 2772 int error, mode; 2773 2774 if (flags & ~AT_EACCESS) 2775 return (EINVAL); 2776 nd->nl_flags |= NLC_SHAREDLOCK; 2777 if ((error = nlookup(nd)) != 0) 2778 return (error); 2779 retry: 2780 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2781 if (error) 2782 return (error); 2783 2784 /* Flags == 0 means only check for existence. */ 2785 if (amode) { 2786 mode = 0; 2787 if (amode & R_OK) 2788 mode |= VREAD; 2789 if (amode & W_OK) 2790 mode |= VWRITE; 2791 if (amode & X_OK) 2792 mode |= VEXEC; 2793 if ((mode & VWRITE) == 0 || 2794 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2795 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2796 2797 /* 2798 * If the file handle is stale we have to re-resolve the 2799 * entry with the ncp held exclusively. This is a hack 2800 * at the moment. 2801 */ 2802 if (error == ESTALE) { 2803 vput(vp); 2804 cache_unlock(&nd->nl_nch); 2805 cache_lock(&nd->nl_nch); 2806 cache_setunresolved(&nd->nl_nch); 2807 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2808 if (error == 0) { 2809 vp = NULL; 2810 goto retry; 2811 } 2812 return(error); 2813 } 2814 } 2815 vput(vp); 2816 return (error); 2817 } 2818 2819 /* 2820 * access_args(char *path, int flags) 2821 * 2822 * Check access permissions. 2823 */ 2824 int 2825 sys_access(struct access_args *uap) 2826 { 2827 struct nlookupdata nd; 2828 int error; 2829 2830 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2831 if (error == 0) 2832 error = kern_access(&nd, uap->flags, 0); 2833 nlookup_done(&nd); 2834 return (error); 2835 } 2836 2837 2838 /* 2839 * eaccess_args(char *path, int flags) 2840 * 2841 * Check access permissions. 2842 */ 2843 int 2844 sys_eaccess(struct eaccess_args *uap) 2845 { 2846 struct nlookupdata nd; 2847 int error; 2848 2849 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2850 if (error == 0) 2851 error = kern_access(&nd, uap->flags, AT_EACCESS); 2852 nlookup_done(&nd); 2853 return (error); 2854 } 2855 2856 2857 /* 2858 * faccessat_args(int fd, char *path, int amode, int flags) 2859 * 2860 * Check access permissions. 2861 */ 2862 int 2863 sys_faccessat(struct faccessat_args *uap) 2864 { 2865 struct nlookupdata nd; 2866 struct file *fp; 2867 int error; 2868 2869 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2870 NLC_FOLLOW); 2871 if (error == 0) 2872 error = kern_access(&nd, uap->amode, uap->flags); 2873 nlookup_done_at(&nd, fp); 2874 return (error); 2875 } 2876 2877 int 2878 kern_stat(struct nlookupdata *nd, struct stat *st) 2879 { 2880 int error; 2881 struct vnode *vp; 2882 2883 nd->nl_flags |= NLC_SHAREDLOCK; 2884 if ((error = nlookup(nd)) != 0) 2885 return (error); 2886 again: 2887 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2888 return (ENOENT); 2889 2890 if ((error = vget(vp, LK_SHARED)) != 0) 2891 return (error); 2892 error = vn_stat(vp, st, nd->nl_cred); 2893 2894 /* 2895 * If the file handle is stale we have to re-resolve the 2896 * entry with the ncp held exclusively. This is a hack 2897 * at the moment. 2898 */ 2899 if (error == ESTALE) { 2900 vput(vp); 2901 cache_unlock(&nd->nl_nch); 2902 cache_lock(&nd->nl_nch); 2903 cache_setunresolved(&nd->nl_nch); 2904 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2905 if (error == 0) 2906 goto again; 2907 } else { 2908 vput(vp); 2909 } 2910 return (error); 2911 } 2912 2913 /* 2914 * stat_args(char *path, struct stat *ub) 2915 * 2916 * Get file status; this version follows links. 2917 */ 2918 int 2919 sys_stat(struct stat_args *uap) 2920 { 2921 struct nlookupdata nd; 2922 struct stat st; 2923 int error; 2924 2925 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2926 if (error == 0) { 2927 error = kern_stat(&nd, &st); 2928 if (error == 0) 2929 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2930 } 2931 nlookup_done(&nd); 2932 return (error); 2933 } 2934 2935 /* 2936 * lstat_args(char *path, struct stat *ub) 2937 * 2938 * Get file status; this version does not follow links. 2939 */ 2940 int 2941 sys_lstat(struct lstat_args *uap) 2942 { 2943 struct nlookupdata nd; 2944 struct stat st; 2945 int error; 2946 2947 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2948 if (error == 0) { 2949 error = kern_stat(&nd, &st); 2950 if (error == 0) 2951 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2952 } 2953 nlookup_done(&nd); 2954 return (error); 2955 } 2956 2957 /* 2958 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2959 * 2960 * Get status of file pointed to by fd/path. 2961 */ 2962 int 2963 sys_fstatat(struct fstatat_args *uap) 2964 { 2965 struct nlookupdata nd; 2966 struct stat st; 2967 int error; 2968 int flags; 2969 struct file *fp; 2970 2971 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2972 return (EINVAL); 2973 2974 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2975 2976 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2977 UIO_USERSPACE, flags); 2978 if (error == 0) { 2979 error = kern_stat(&nd, &st); 2980 if (error == 0) 2981 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2982 } 2983 nlookup_done_at(&nd, fp); 2984 return (error); 2985 } 2986 2987 static int 2988 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 2989 { 2990 struct nlookupdata nd; 2991 struct vnode *vp; 2992 int error; 2993 2994 vp = NULL; 2995 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 2996 if (error == 0) 2997 error = nlookup(&nd); 2998 if (error == 0) 2999 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3000 nlookup_done(&nd); 3001 if (error == 0) { 3002 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3003 vput(vp); 3004 } 3005 return (error); 3006 } 3007 3008 /* 3009 * pathconf_Args(char *path, int name) 3010 * 3011 * Get configurable pathname variables. 3012 */ 3013 int 3014 sys_pathconf(struct pathconf_args *uap) 3015 { 3016 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3017 &uap->sysmsg_reg)); 3018 } 3019 3020 /* 3021 * lpathconf_Args(char *path, int name) 3022 * 3023 * Get configurable pathname variables, but don't follow symlinks. 3024 */ 3025 int 3026 sys_lpathconf(struct lpathconf_args *uap) 3027 { 3028 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 3029 } 3030 3031 /* 3032 * XXX: daver 3033 * kern_readlink isn't properly split yet. There is a copyin burried 3034 * in VOP_READLINK(). 3035 */ 3036 int 3037 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3038 { 3039 struct thread *td = curthread; 3040 struct vnode *vp; 3041 struct iovec aiov; 3042 struct uio auio; 3043 int error; 3044 3045 nd->nl_flags |= NLC_SHAREDLOCK; 3046 if ((error = nlookup(nd)) != 0) 3047 return (error); 3048 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3049 if (error) 3050 return (error); 3051 if (vp->v_type != VLNK) { 3052 error = EINVAL; 3053 } else { 3054 aiov.iov_base = buf; 3055 aiov.iov_len = count; 3056 auio.uio_iov = &aiov; 3057 auio.uio_iovcnt = 1; 3058 auio.uio_offset = 0; 3059 auio.uio_rw = UIO_READ; 3060 auio.uio_segflg = UIO_USERSPACE; 3061 auio.uio_td = td; 3062 auio.uio_resid = count; 3063 error = VOP_READLINK(vp, &auio, td->td_ucred); 3064 } 3065 vput(vp); 3066 *res = count - auio.uio_resid; 3067 return (error); 3068 } 3069 3070 /* 3071 * readlink_args(char *path, char *buf, int count) 3072 * 3073 * Return target name of a symbolic link. 3074 */ 3075 int 3076 sys_readlink(struct readlink_args *uap) 3077 { 3078 struct nlookupdata nd; 3079 int error; 3080 3081 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3082 if (error == 0) { 3083 error = kern_readlink(&nd, uap->buf, uap->count, 3084 &uap->sysmsg_result); 3085 } 3086 nlookup_done(&nd); 3087 return (error); 3088 } 3089 3090 /* 3091 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3092 * 3093 * Return target name of a symbolic link. The path is relative to the 3094 * directory associated with fd. 3095 */ 3096 int 3097 sys_readlinkat(struct readlinkat_args *uap) 3098 { 3099 struct nlookupdata nd; 3100 struct file *fp; 3101 int error; 3102 3103 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3104 if (error == 0) { 3105 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3106 &uap->sysmsg_result); 3107 } 3108 nlookup_done_at(&nd, fp); 3109 return (error); 3110 } 3111 3112 static int 3113 setfflags(struct vnode *vp, int flags) 3114 { 3115 struct thread *td = curthread; 3116 int error; 3117 struct vattr vattr; 3118 3119 /* 3120 * Prevent non-root users from setting flags on devices. When 3121 * a device is reused, users can retain ownership of the device 3122 * if they are allowed to set flags and programs assume that 3123 * chown can't fail when done as root. 3124 */ 3125 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3126 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3127 return (error); 3128 3129 /* 3130 * note: vget is required for any operation that might mod the vnode 3131 * so VINACTIVE is properly cleared. 3132 */ 3133 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3134 VATTR_NULL(&vattr); 3135 vattr.va_flags = flags; 3136 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3137 vput(vp); 3138 } 3139 return (error); 3140 } 3141 3142 /* 3143 * chflags(char *path, int flags) 3144 * 3145 * Change flags of a file given a path name. 3146 */ 3147 int 3148 sys_chflags(struct chflags_args *uap) 3149 { 3150 struct nlookupdata nd; 3151 struct vnode *vp; 3152 int error; 3153 3154 vp = NULL; 3155 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3156 if (error == 0) 3157 error = nlookup(&nd); 3158 if (error == 0) 3159 error = ncp_writechk(&nd.nl_nch); 3160 if (error == 0) 3161 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3162 nlookup_done(&nd); 3163 if (error == 0) { 3164 error = setfflags(vp, uap->flags); 3165 vrele(vp); 3166 } 3167 return (error); 3168 } 3169 3170 /* 3171 * lchflags(char *path, int flags) 3172 * 3173 * Change flags of a file given a path name, but don't follow symlinks. 3174 */ 3175 int 3176 sys_lchflags(struct lchflags_args *uap) 3177 { 3178 struct nlookupdata nd; 3179 struct vnode *vp; 3180 int error; 3181 3182 vp = NULL; 3183 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3184 if (error == 0) 3185 error = nlookup(&nd); 3186 if (error == 0) 3187 error = ncp_writechk(&nd.nl_nch); 3188 if (error == 0) 3189 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3190 nlookup_done(&nd); 3191 if (error == 0) { 3192 error = setfflags(vp, uap->flags); 3193 vrele(vp); 3194 } 3195 return (error); 3196 } 3197 3198 /* 3199 * fchflags_args(int fd, int flags) 3200 * 3201 * Change flags of a file given a file descriptor. 3202 */ 3203 int 3204 sys_fchflags(struct fchflags_args *uap) 3205 { 3206 struct thread *td = curthread; 3207 struct proc *p = td->td_proc; 3208 struct file *fp; 3209 int error; 3210 3211 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3212 return (error); 3213 if (fp->f_nchandle.ncp) 3214 error = ncp_writechk(&fp->f_nchandle); 3215 if (error == 0) 3216 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3217 fdrop(fp); 3218 return (error); 3219 } 3220 3221 /* 3222 * chflagsat_args(int fd, const char *path, int flags, int atflags) 3223 * change flags given a pathname relative to a filedescriptor 3224 */ 3225 int sys_chflagsat(struct chflagsat_args *uap) 3226 { 3227 struct nlookupdata nd; 3228 struct vnode *vp; 3229 struct file *fp; 3230 int error; 3231 int lookupflags; 3232 3233 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3234 return (EINVAL); 3235 3236 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3237 3238 vp = NULL; 3239 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3240 if (error == 0) 3241 error = nlookup(&nd); 3242 if (error == 0) 3243 error = ncp_writechk(&nd.nl_nch); 3244 if (error == 0) 3245 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3246 nlookup_done_at(&nd, fp); 3247 if (error == 0) { 3248 error = setfflags(vp, uap->flags); 3249 vrele(vp); 3250 } 3251 return (error); 3252 } 3253 3254 3255 static int 3256 setfmode(struct vnode *vp, int mode) 3257 { 3258 struct thread *td = curthread; 3259 int error; 3260 struct vattr vattr; 3261 3262 /* 3263 * note: vget is required for any operation that might mod the vnode 3264 * so VINACTIVE is properly cleared. 3265 */ 3266 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3267 VATTR_NULL(&vattr); 3268 vattr.va_mode = mode & ALLPERMS; 3269 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3270 cache_inval_wxok(vp); 3271 vput(vp); 3272 } 3273 return error; 3274 } 3275 3276 int 3277 kern_chmod(struct nlookupdata *nd, int mode) 3278 { 3279 struct vnode *vp; 3280 int error; 3281 3282 if ((error = nlookup(nd)) != 0) 3283 return (error); 3284 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3285 return (error); 3286 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3287 error = setfmode(vp, mode); 3288 vrele(vp); 3289 return (error); 3290 } 3291 3292 /* 3293 * chmod_args(char *path, int mode) 3294 * 3295 * Change mode of a file given path name. 3296 */ 3297 int 3298 sys_chmod(struct chmod_args *uap) 3299 { 3300 struct nlookupdata nd; 3301 int error; 3302 3303 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3304 if (error == 0) 3305 error = kern_chmod(&nd, uap->mode); 3306 nlookup_done(&nd); 3307 return (error); 3308 } 3309 3310 /* 3311 * lchmod_args(char *path, int mode) 3312 * 3313 * Change mode of a file given path name (don't follow links.) 3314 */ 3315 int 3316 sys_lchmod(struct lchmod_args *uap) 3317 { 3318 struct nlookupdata nd; 3319 int error; 3320 3321 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3322 if (error == 0) 3323 error = kern_chmod(&nd, uap->mode); 3324 nlookup_done(&nd); 3325 return (error); 3326 } 3327 3328 /* 3329 * fchmod_args(int fd, int mode) 3330 * 3331 * Change mode of a file given a file descriptor. 3332 */ 3333 int 3334 sys_fchmod(struct fchmod_args *uap) 3335 { 3336 struct thread *td = curthread; 3337 struct proc *p = td->td_proc; 3338 struct file *fp; 3339 int error; 3340 3341 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3342 return (error); 3343 if (fp->f_nchandle.ncp) 3344 error = ncp_writechk(&fp->f_nchandle); 3345 if (error == 0) 3346 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3347 fdrop(fp); 3348 return (error); 3349 } 3350 3351 /* 3352 * fchmodat_args(char *path, int mode) 3353 * 3354 * Change mode of a file pointed to by fd/path. 3355 */ 3356 int 3357 sys_fchmodat(struct fchmodat_args *uap) 3358 { 3359 struct nlookupdata nd; 3360 struct file *fp; 3361 int error; 3362 int flags; 3363 3364 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3365 return (EINVAL); 3366 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3367 3368 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3369 UIO_USERSPACE, flags); 3370 if (error == 0) 3371 error = kern_chmod(&nd, uap->mode); 3372 nlookup_done_at(&nd, fp); 3373 return (error); 3374 } 3375 3376 static int 3377 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3378 { 3379 struct thread *td = curthread; 3380 int error; 3381 struct vattr vattr; 3382 uid_t o_uid; 3383 gid_t o_gid; 3384 uint64_t size; 3385 3386 /* 3387 * note: vget is required for any operation that might mod the vnode 3388 * so VINACTIVE is properly cleared. 3389 */ 3390 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3391 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3392 return error; 3393 o_uid = vattr.va_uid; 3394 o_gid = vattr.va_gid; 3395 size = vattr.va_size; 3396 3397 VATTR_NULL(&vattr); 3398 vattr.va_uid = uid; 3399 vattr.va_gid = gid; 3400 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3401 vput(vp); 3402 } 3403 3404 if (error == 0) { 3405 if (uid == -1) 3406 uid = o_uid; 3407 if (gid == -1) 3408 gid = o_gid; 3409 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3410 VFS_ACCOUNT(mp, uid, gid, size); 3411 } 3412 3413 return error; 3414 } 3415 3416 int 3417 kern_chown(struct nlookupdata *nd, int uid, int gid) 3418 { 3419 struct vnode *vp; 3420 int error; 3421 3422 if ((error = nlookup(nd)) != 0) 3423 return (error); 3424 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3425 return (error); 3426 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3427 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3428 vrele(vp); 3429 return (error); 3430 } 3431 3432 /* 3433 * chown(char *path, int uid, int gid) 3434 * 3435 * Set ownership given a path name. 3436 */ 3437 int 3438 sys_chown(struct chown_args *uap) 3439 { 3440 struct nlookupdata nd; 3441 int error; 3442 3443 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3444 if (error == 0) 3445 error = kern_chown(&nd, uap->uid, uap->gid); 3446 nlookup_done(&nd); 3447 return (error); 3448 } 3449 3450 /* 3451 * lchown_args(char *path, int uid, int gid) 3452 * 3453 * Set ownership given a path name, do not cross symlinks. 3454 */ 3455 int 3456 sys_lchown(struct lchown_args *uap) 3457 { 3458 struct nlookupdata nd; 3459 int error; 3460 3461 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3462 if (error == 0) 3463 error = kern_chown(&nd, uap->uid, uap->gid); 3464 nlookup_done(&nd); 3465 return (error); 3466 } 3467 3468 /* 3469 * fchown_args(int fd, int uid, int gid) 3470 * 3471 * Set ownership given a file descriptor. 3472 */ 3473 int 3474 sys_fchown(struct fchown_args *uap) 3475 { 3476 struct thread *td = curthread; 3477 struct proc *p = td->td_proc; 3478 struct file *fp; 3479 int error; 3480 3481 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3482 return (error); 3483 if (fp->f_nchandle.ncp) 3484 error = ncp_writechk(&fp->f_nchandle); 3485 if (error == 0) 3486 error = setfown(p->p_fd->fd_ncdir.mount, 3487 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3488 fdrop(fp); 3489 return (error); 3490 } 3491 3492 /* 3493 * fchownat(int fd, char *path, int uid, int gid, int flags) 3494 * 3495 * Set ownership of file pointed to by fd/path. 3496 */ 3497 int 3498 sys_fchownat(struct fchownat_args *uap) 3499 { 3500 struct nlookupdata nd; 3501 struct file *fp; 3502 int error; 3503 int flags; 3504 3505 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3506 return (EINVAL); 3507 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3508 3509 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3510 UIO_USERSPACE, flags); 3511 if (error == 0) 3512 error = kern_chown(&nd, uap->uid, uap->gid); 3513 nlookup_done_at(&nd, fp); 3514 return (error); 3515 } 3516 3517 3518 static int 3519 getutimes(struct timeval *tvp, struct timespec *tsp) 3520 { 3521 struct timeval tv[2]; 3522 int error; 3523 3524 if (tvp == NULL) { 3525 microtime(&tv[0]); 3526 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3527 tsp[1] = tsp[0]; 3528 } else { 3529 if ((error = itimerfix(tvp)) != 0) 3530 return (error); 3531 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3532 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3533 } 3534 return 0; 3535 } 3536 3537 static int 3538 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3539 { 3540 struct timespec tsnow; 3541 int error; 3542 3543 *nullflag = 0; 3544 nanotime(&tsnow); 3545 if (ts == NULL) { 3546 newts[0] = tsnow; 3547 newts[1] = tsnow; 3548 *nullflag = 1; 3549 return (0); 3550 } 3551 3552 newts[0] = ts[0]; 3553 newts[1] = ts[1]; 3554 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3555 return (0); 3556 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3557 *nullflag = 1; 3558 3559 if (newts[0].tv_nsec == UTIME_OMIT) 3560 newts[0].tv_sec = VNOVAL; 3561 else if (newts[0].tv_nsec == UTIME_NOW) 3562 newts[0] = tsnow; 3563 else if ((error = itimespecfix(&newts[0])) != 0) 3564 return (error); 3565 3566 if (newts[1].tv_nsec == UTIME_OMIT) 3567 newts[1].tv_sec = VNOVAL; 3568 else if (newts[1].tv_nsec == UTIME_NOW) 3569 newts[1] = tsnow; 3570 else if ((error = itimespecfix(&newts[1])) != 0) 3571 return (error); 3572 3573 return (0); 3574 } 3575 3576 static int 3577 setutimes(struct vnode *vp, struct vattr *vattr, 3578 const struct timespec *ts, int nullflag) 3579 { 3580 struct thread *td = curthread; 3581 int error; 3582 3583 VATTR_NULL(vattr); 3584 vattr->va_atime = ts[0]; 3585 vattr->va_mtime = ts[1]; 3586 if (nullflag) 3587 vattr->va_vaflags |= VA_UTIMES_NULL; 3588 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3589 3590 return error; 3591 } 3592 3593 int 3594 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3595 { 3596 struct timespec ts[2]; 3597 int error; 3598 3599 if (tptr) { 3600 if ((error = getutimes(tptr, ts)) != 0) 3601 return (error); 3602 } 3603 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3604 return (error); 3605 } 3606 3607 /* 3608 * utimes_args(char *path, struct timeval *tptr) 3609 * 3610 * Set the access and modification times of a file. 3611 */ 3612 int 3613 sys_utimes(struct utimes_args *uap) 3614 { 3615 struct timeval tv[2]; 3616 struct nlookupdata nd; 3617 int error; 3618 3619 if (uap->tptr) { 3620 error = copyin(uap->tptr, tv, sizeof(tv)); 3621 if (error) 3622 return (error); 3623 } 3624 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3625 if (error == 0) 3626 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3627 nlookup_done(&nd); 3628 return (error); 3629 } 3630 3631 /* 3632 * lutimes_args(char *path, struct timeval *tptr) 3633 * 3634 * Set the access and modification times of a file. 3635 */ 3636 int 3637 sys_lutimes(struct lutimes_args *uap) 3638 { 3639 struct timeval tv[2]; 3640 struct nlookupdata nd; 3641 int error; 3642 3643 if (uap->tptr) { 3644 error = copyin(uap->tptr, tv, sizeof(tv)); 3645 if (error) 3646 return (error); 3647 } 3648 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3649 if (error == 0) 3650 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3651 nlookup_done(&nd); 3652 return (error); 3653 } 3654 3655 /* 3656 * Set utimes on a file descriptor. The creds used to open the 3657 * file are used to determine whether the operation is allowed 3658 * or not. 3659 */ 3660 int 3661 kern_futimens(int fd, struct timespec *ts) 3662 { 3663 struct thread *td = curthread; 3664 struct proc *p = td->td_proc; 3665 struct timespec newts[2]; 3666 struct file *fp; 3667 struct vnode *vp; 3668 struct vattr vattr; 3669 int nullflag; 3670 int error; 3671 3672 error = getutimens(ts, newts, &nullflag); 3673 if (error) 3674 return (error); 3675 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3676 return (error); 3677 if (fp->f_nchandle.ncp) 3678 error = ncp_writechk(&fp->f_nchandle); 3679 if (error == 0) { 3680 vp = fp->f_data; 3681 error = vget(vp, LK_EXCLUSIVE); 3682 if (error == 0) { 3683 error = VOP_GETATTR(vp, &vattr); 3684 if (error == 0) { 3685 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3686 fp->f_cred); 3687 } 3688 if (error == 0) { 3689 error = setutimes(vp, &vattr, newts, nullflag); 3690 } 3691 vput(vp); 3692 } 3693 } 3694 fdrop(fp); 3695 return (error); 3696 } 3697 3698 /* 3699 * futimens_args(int fd, struct timespec *ts) 3700 * 3701 * Set the access and modification times of a file. 3702 */ 3703 int 3704 sys_futimens(struct futimens_args *uap) 3705 { 3706 struct timespec ts[2]; 3707 int error; 3708 3709 if (uap->ts) { 3710 error = copyin(uap->ts, ts, sizeof(ts)); 3711 if (error) 3712 return (error); 3713 } 3714 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3715 return (error); 3716 } 3717 3718 int 3719 kern_futimes(int fd, struct timeval *tptr) 3720 { 3721 struct timespec ts[2]; 3722 int error; 3723 3724 if (tptr) { 3725 if ((error = getutimes(tptr, ts)) != 0) 3726 return (error); 3727 } 3728 error = kern_futimens(fd, tptr ? ts : NULL); 3729 return (error); 3730 } 3731 3732 /* 3733 * futimes_args(int fd, struct timeval *tptr) 3734 * 3735 * Set the access and modification times of a file. 3736 */ 3737 int 3738 sys_futimes(struct futimes_args *uap) 3739 { 3740 struct timeval tv[2]; 3741 int error; 3742 3743 if (uap->tptr) { 3744 error = copyin(uap->tptr, tv, sizeof(tv)); 3745 if (error) 3746 return (error); 3747 } 3748 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3749 return (error); 3750 } 3751 3752 int 3753 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3754 { 3755 struct timespec newts[2]; 3756 struct vnode *vp; 3757 struct vattr vattr; 3758 int nullflag; 3759 int error; 3760 3761 if (flags & ~AT_SYMLINK_NOFOLLOW) 3762 return (EINVAL); 3763 3764 error = getutimens(ts, newts, &nullflag); 3765 if (error) 3766 return (error); 3767 3768 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3769 if ((error = nlookup(nd)) != 0) 3770 return (error); 3771 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3772 return (error); 3773 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3774 return (error); 3775 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3776 error = vget(vp, LK_EXCLUSIVE); 3777 if (error == 0) { 3778 error = setutimes(vp, &vattr, newts, nullflag); 3779 vput(vp); 3780 } 3781 } 3782 vrele(vp); 3783 return (error); 3784 } 3785 3786 /* 3787 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3788 * 3789 * Set file access and modification times of a file. 3790 */ 3791 int 3792 sys_utimensat(struct utimensat_args *uap) 3793 { 3794 struct timespec ts[2]; 3795 struct nlookupdata nd; 3796 struct file *fp; 3797 int error; 3798 int flags; 3799 3800 if (uap->ts) { 3801 error = copyin(uap->ts, ts, sizeof(ts)); 3802 if (error) 3803 return (error); 3804 } 3805 3806 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3807 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3808 UIO_USERSPACE, flags); 3809 if (error == 0) 3810 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3811 nlookup_done_at(&nd, fp); 3812 return (error); 3813 } 3814 3815 int 3816 kern_truncate(struct nlookupdata *nd, off_t length) 3817 { 3818 struct vnode *vp; 3819 struct vattr vattr; 3820 int error; 3821 uid_t uid = 0; 3822 gid_t gid = 0; 3823 uint64_t old_size = 0; 3824 3825 if (length < 0) 3826 return(EINVAL); 3827 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3828 if ((error = nlookup(nd)) != 0) 3829 return (error); 3830 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3831 return (error); 3832 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3833 return (error); 3834 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3835 if (error) { 3836 vrele(vp); 3837 return (error); 3838 } 3839 if (vp->v_type == VDIR) { 3840 error = EISDIR; 3841 goto done; 3842 } 3843 if (vfs_quota_enabled) { 3844 error = VOP_GETATTR(vp, &vattr); 3845 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3846 uid = vattr.va_uid; 3847 gid = vattr.va_gid; 3848 old_size = vattr.va_size; 3849 } 3850 3851 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3852 VATTR_NULL(&vattr); 3853 vattr.va_size = length; 3854 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3855 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3856 } 3857 done: 3858 vput(vp); 3859 return (error); 3860 } 3861 3862 /* 3863 * truncate(char *path, int pad, off_t length) 3864 * 3865 * Truncate a file given its path name. 3866 */ 3867 int 3868 sys_truncate(struct truncate_args *uap) 3869 { 3870 struct nlookupdata nd; 3871 int error; 3872 3873 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3874 if (error == 0) 3875 error = kern_truncate(&nd, uap->length); 3876 nlookup_done(&nd); 3877 return error; 3878 } 3879 3880 int 3881 kern_ftruncate(int fd, off_t length) 3882 { 3883 struct thread *td = curthread; 3884 struct proc *p = td->td_proc; 3885 struct vattr vattr; 3886 struct vnode *vp; 3887 struct file *fp; 3888 int error; 3889 uid_t uid = 0; 3890 gid_t gid = 0; 3891 uint64_t old_size = 0; 3892 struct mount *mp; 3893 3894 if (length < 0) 3895 return(EINVAL); 3896 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3897 return (error); 3898 if (fp->f_nchandle.ncp) { 3899 error = ncp_writechk(&fp->f_nchandle); 3900 if (error) 3901 goto done; 3902 } 3903 if ((fp->f_flag & FWRITE) == 0) { 3904 error = EINVAL; 3905 goto done; 3906 } 3907 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3908 error = EINVAL; 3909 goto done; 3910 } 3911 vp = (struct vnode *)fp->f_data; 3912 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3913 if (vp->v_type == VDIR) { 3914 error = EISDIR; 3915 vn_unlock(vp); 3916 goto done; 3917 } 3918 3919 if (vfs_quota_enabled) { 3920 error = VOP_GETATTR(vp, &vattr); 3921 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3922 uid = vattr.va_uid; 3923 gid = vattr.va_gid; 3924 old_size = vattr.va_size; 3925 } 3926 3927 if ((error = vn_writechk(vp, NULL)) == 0) { 3928 VATTR_NULL(&vattr); 3929 vattr.va_size = length; 3930 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3931 mp = vq_vptomp(vp); 3932 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3933 } 3934 vn_unlock(vp); 3935 done: 3936 fdrop(fp); 3937 return (error); 3938 } 3939 3940 /* 3941 * ftruncate_args(int fd, int pad, off_t length) 3942 * 3943 * Truncate a file given a file descriptor. 3944 */ 3945 int 3946 sys_ftruncate(struct ftruncate_args *uap) 3947 { 3948 int error; 3949 3950 error = kern_ftruncate(uap->fd, uap->length); 3951 3952 return (error); 3953 } 3954 3955 /* 3956 * fsync(int fd) 3957 * 3958 * Sync an open file. 3959 */ 3960 int 3961 sys_fsync(struct fsync_args *uap) 3962 { 3963 struct thread *td = curthread; 3964 struct proc *p = td->td_proc; 3965 struct vnode *vp; 3966 struct file *fp; 3967 vm_object_t obj; 3968 int error; 3969 3970 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3971 return (error); 3972 vp = (struct vnode *)fp->f_data; 3973 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3974 if ((obj = vp->v_object) != NULL) { 3975 if (vp->v_mount == NULL || 3976 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 3977 vm_object_page_clean(obj, 0, 0, 0); 3978 } 3979 } 3980 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3981 if (error == 0 && vp->v_mount) 3982 error = buf_fsync(vp); 3983 vn_unlock(vp); 3984 fdrop(fp); 3985 3986 return (error); 3987 } 3988 3989 int 3990 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3991 { 3992 struct nchandle fnchd; 3993 struct nchandle tnchd; 3994 struct namecache *ncp; 3995 struct vnode *fdvp; 3996 struct vnode *tdvp; 3997 struct mount *mp; 3998 int error; 3999 u_int fncp_gen; 4000 u_int tncp_gen; 4001 4002 bwillinode(1); 4003 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4004 if ((error = nlookup(fromnd)) != 0) 4005 return (error); 4006 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4007 return (ENOENT); 4008 fnchd.mount = fromnd->nl_nch.mount; 4009 cache_hold(&fnchd); 4010 4011 /* 4012 * unlock the source nch so we can lookup the target nch without 4013 * deadlocking. The target may or may not exist so we do not check 4014 * for a target vp like kern_mkdir() and other creation functions do. 4015 * 4016 * The source and target directories are ref'd and rechecked after 4017 * everything is relocked to determine if the source or target file 4018 * has been renamed. 4019 */ 4020 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4021 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4022 4023 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4024 4025 cache_unlock(&fromnd->nl_nch); 4026 4027 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4028 if ((error = nlookup(tond)) != 0) { 4029 cache_drop(&fnchd); 4030 return (error); 4031 } 4032 tncp_gen = tond->nl_nch.ncp->nc_generation; 4033 4034 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4035 cache_drop(&fnchd); 4036 return (ENOENT); 4037 } 4038 tnchd.mount = tond->nl_nch.mount; 4039 cache_hold(&tnchd); 4040 4041 /* 4042 * If the source and target are the same there is nothing to do 4043 */ 4044 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4045 cache_drop(&fnchd); 4046 cache_drop(&tnchd); 4047 return (0); 4048 } 4049 4050 /* 4051 * Mount points cannot be renamed or overwritten 4052 */ 4053 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4054 NCF_ISMOUNTPT 4055 ) { 4056 cache_drop(&fnchd); 4057 cache_drop(&tnchd); 4058 return (EINVAL); 4059 } 4060 4061 /* 4062 * Relock the source ncp. cache_relock() will deal with any 4063 * deadlocks against the already-locked tond and will also 4064 * make sure both are resolved. 4065 * 4066 * NOTE AFTER RELOCKING: The source or target ncp may have become 4067 * invalid while they were unlocked, nc_vp and nc_mount could 4068 * be NULL. 4069 */ 4070 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 4071 &tond->nl_nch, tond->nl_cred); 4072 fromnd->nl_flags |= NLC_NCPISLOCKED; 4073 4074 /* 4075 * If the namecache generation changed for either fromnd or tond, 4076 * we must retry. 4077 */ 4078 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 4079 tond->nl_nch.ncp->nc_generation != tncp_gen) { 4080 kprintf("kern_rename: retry due to gen on: " 4081 "\"%s\" -> \"%s\"\n", 4082 fromnd->nl_nch.ncp->nc_name, 4083 tond->nl_nch.ncp->nc_name); 4084 cache_drop(&fnchd); 4085 cache_drop(&tnchd); 4086 return (EAGAIN); 4087 } 4088 4089 /* 4090 * If either fromnd or tond are marked destroyed a ripout occured 4091 * out from under us and we must retry. 4092 */ 4093 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4094 fromnd->nl_nch.ncp->nc_vp == NULL || 4095 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 4096 kprintf("kern_rename: retry due to ripout on: " 4097 "\"%s\" -> \"%s\"\n", 4098 fromnd->nl_nch.ncp->nc_name, 4099 tond->nl_nch.ncp->nc_name); 4100 cache_drop(&fnchd); 4101 cache_drop(&tnchd); 4102 return (EAGAIN); 4103 } 4104 4105 /* 4106 * Make sure the parent directories linkages are the same. 4107 * XXX shouldn't be needed any more w/ generation check above. 4108 */ 4109 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4110 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4111 cache_drop(&fnchd); 4112 cache_drop(&tnchd); 4113 return (ENOENT); 4114 } 4115 4116 /* 4117 * Both the source and target must be within the same filesystem and 4118 * in the same filesystem as their parent directories within the 4119 * namecache topology. 4120 * 4121 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4122 */ 4123 mp = fnchd.mount; 4124 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4125 mp != tond->nl_nch.mount) { 4126 cache_drop(&fnchd); 4127 cache_drop(&tnchd); 4128 return (EXDEV); 4129 } 4130 4131 /* 4132 * Make sure the mount point is writable 4133 */ 4134 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4135 cache_drop(&fnchd); 4136 cache_drop(&tnchd); 4137 return (error); 4138 } 4139 4140 /* 4141 * If the target exists and either the source or target is a directory, 4142 * then both must be directories. 4143 * 4144 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4145 * have become NULL. 4146 */ 4147 if (tond->nl_nch.ncp->nc_vp) { 4148 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4149 error = ENOENT; 4150 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4151 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4152 error = ENOTDIR; 4153 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4154 error = EISDIR; 4155 } 4156 } 4157 4158 /* 4159 * You cannot rename a source into itself or a subdirectory of itself. 4160 * We check this by travsersing the target directory upwards looking 4161 * for a match against the source. 4162 * 4163 * XXX MPSAFE 4164 */ 4165 if (error == 0) { 4166 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4167 if (fromnd->nl_nch.ncp == ncp) { 4168 error = EINVAL; 4169 break; 4170 } 4171 } 4172 } 4173 4174 cache_drop(&fnchd); 4175 cache_drop(&tnchd); 4176 4177 /* 4178 * Even though the namespaces are different, they may still represent 4179 * hardlinks to the same file. The filesystem might have a hard time 4180 * with this so we issue a NREMOVE of the source instead of a NRENAME 4181 * when we detect the situation. 4182 */ 4183 if (error == 0) { 4184 fdvp = fromnd->nl_dvp; 4185 tdvp = tond->nl_dvp; 4186 if (fdvp == NULL || tdvp == NULL) { 4187 error = EPERM; 4188 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4189 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4190 fromnd->nl_cred); 4191 } else { 4192 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4193 fdvp, tdvp, tond->nl_cred); 4194 } 4195 } 4196 return (error); 4197 } 4198 4199 /* 4200 * rename_args(char *from, char *to) 4201 * 4202 * Rename files. Source and destination must either both be directories, 4203 * or both not be directories. If target is a directory, it must be empty. 4204 */ 4205 int 4206 sys_rename(struct rename_args *uap) 4207 { 4208 struct nlookupdata fromnd, tond; 4209 int error; 4210 4211 do { 4212 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4213 if (error == 0) { 4214 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4215 if (error == 0) 4216 error = kern_rename(&fromnd, &tond); 4217 nlookup_done(&tond); 4218 } 4219 nlookup_done(&fromnd); 4220 } while (error == EAGAIN); 4221 return (error); 4222 } 4223 4224 /* 4225 * renameat_args(int oldfd, char *old, int newfd, char *new) 4226 * 4227 * Rename files using paths relative to the directories associated with 4228 * oldfd and newfd. Source and destination must either both be directories, 4229 * or both not be directories. If target is a directory, it must be empty. 4230 */ 4231 int 4232 sys_renameat(struct renameat_args *uap) 4233 { 4234 struct nlookupdata oldnd, newnd; 4235 struct file *oldfp, *newfp; 4236 int error; 4237 4238 do { 4239 error = nlookup_init_at(&oldnd, &oldfp, 4240 uap->oldfd, uap->old, 4241 UIO_USERSPACE, 0); 4242 if (error == 0) { 4243 error = nlookup_init_at(&newnd, &newfp, 4244 uap->newfd, uap->new, 4245 UIO_USERSPACE, 0); 4246 if (error == 0) 4247 error = kern_rename(&oldnd, &newnd); 4248 nlookup_done_at(&newnd, newfp); 4249 } 4250 nlookup_done_at(&oldnd, oldfp); 4251 } while (error == EAGAIN); 4252 return (error); 4253 } 4254 4255 int 4256 kern_mkdir(struct nlookupdata *nd, int mode) 4257 { 4258 struct thread *td = curthread; 4259 struct proc *p = td->td_proc; 4260 struct vnode *vp; 4261 struct vattr vattr; 4262 int error; 4263 4264 bwillinode(1); 4265 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4266 if ((error = nlookup(nd)) != 0) 4267 return (error); 4268 4269 if (nd->nl_nch.ncp->nc_vp) 4270 return (EEXIST); 4271 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4272 return (error); 4273 VATTR_NULL(&vattr); 4274 vattr.va_type = VDIR; 4275 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4276 4277 vp = NULL; 4278 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4279 if (error == 0) 4280 vput(vp); 4281 return (error); 4282 } 4283 4284 /* 4285 * mkdir_args(char *path, int mode) 4286 * 4287 * Make a directory file. 4288 */ 4289 int 4290 sys_mkdir(struct mkdir_args *uap) 4291 { 4292 struct nlookupdata nd; 4293 int error; 4294 4295 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4296 if (error == 0) 4297 error = kern_mkdir(&nd, uap->mode); 4298 nlookup_done(&nd); 4299 return (error); 4300 } 4301 4302 /* 4303 * mkdirat_args(int fd, char *path, mode_t mode) 4304 * 4305 * Make a directory file. The path is relative to the directory associated 4306 * with fd. 4307 */ 4308 int 4309 sys_mkdirat(struct mkdirat_args *uap) 4310 { 4311 struct nlookupdata nd; 4312 struct file *fp; 4313 int error; 4314 4315 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4316 if (error == 0) 4317 error = kern_mkdir(&nd, uap->mode); 4318 nlookup_done_at(&nd, fp); 4319 return (error); 4320 } 4321 4322 int 4323 kern_rmdir(struct nlookupdata *nd) 4324 { 4325 int error; 4326 4327 bwillinode(1); 4328 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4329 if ((error = nlookup(nd)) != 0) 4330 return (error); 4331 4332 /* 4333 * Do not allow directories representing mount points to be 4334 * deleted, even if empty. Check write perms on mount point 4335 * in case the vnode is aliased (aka nullfs). 4336 */ 4337 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4338 return (EBUSY); 4339 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4340 return (error); 4341 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4342 return (error); 4343 } 4344 4345 /* 4346 * rmdir_args(char *path) 4347 * 4348 * Remove a directory file. 4349 */ 4350 int 4351 sys_rmdir(struct rmdir_args *uap) 4352 { 4353 struct nlookupdata nd; 4354 int error; 4355 4356 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4357 if (error == 0) 4358 error = kern_rmdir(&nd); 4359 nlookup_done(&nd); 4360 return (error); 4361 } 4362 4363 int 4364 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4365 enum uio_seg direction) 4366 { 4367 struct thread *td = curthread; 4368 struct proc *p = td->td_proc; 4369 struct vnode *vp; 4370 struct file *fp; 4371 struct uio auio; 4372 struct iovec aiov; 4373 off_t loff; 4374 int error, eofflag; 4375 4376 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 4377 return (error); 4378 if ((fp->f_flag & FREAD) == 0) { 4379 error = EBADF; 4380 goto done; 4381 } 4382 vp = (struct vnode *)fp->f_data; 4383 if (vp->v_type != VDIR) { 4384 error = EINVAL; 4385 goto done; 4386 } 4387 aiov.iov_base = buf; 4388 aiov.iov_len = count; 4389 auio.uio_iov = &aiov; 4390 auio.uio_iovcnt = 1; 4391 auio.uio_rw = UIO_READ; 4392 auio.uio_segflg = direction; 4393 auio.uio_td = td; 4394 auio.uio_resid = count; 4395 loff = auio.uio_offset = fp->f_offset; 4396 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4397 fp->f_offset = auio.uio_offset; 4398 if (error) 4399 goto done; 4400 4401 /* 4402 * WARNING! *basep may not be wide enough to accomodate the 4403 * seek offset. XXX should we hack this to return the upper 32 bits 4404 * for offsets greater then 4G? 4405 */ 4406 if (basep) { 4407 *basep = (long)loff; 4408 } 4409 *res = count - auio.uio_resid; 4410 done: 4411 fdrop(fp); 4412 return (error); 4413 } 4414 4415 /* 4416 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4417 * 4418 * Read a block of directory entries in a file system independent format. 4419 */ 4420 int 4421 sys_getdirentries(struct getdirentries_args *uap) 4422 { 4423 long base; 4424 int error; 4425 4426 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4427 &uap->sysmsg_result, UIO_USERSPACE); 4428 4429 if (error == 0 && uap->basep) 4430 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4431 return (error); 4432 } 4433 4434 /* 4435 * getdents_args(int fd, char *buf, size_t count) 4436 */ 4437 int 4438 sys_getdents(struct getdents_args *uap) 4439 { 4440 int error; 4441 4442 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4443 &uap->sysmsg_result, UIO_USERSPACE); 4444 4445 return (error); 4446 } 4447 4448 /* 4449 * Set the mode mask for creation of filesystem nodes. 4450 * 4451 * umask(int newmask) 4452 */ 4453 int 4454 sys_umask(struct umask_args *uap) 4455 { 4456 struct thread *td = curthread; 4457 struct proc *p = td->td_proc; 4458 struct filedesc *fdp; 4459 4460 fdp = p->p_fd; 4461 uap->sysmsg_result = fdp->fd_cmask; 4462 fdp->fd_cmask = uap->newmask & ALLPERMS; 4463 return (0); 4464 } 4465 4466 /* 4467 * revoke(char *path) 4468 * 4469 * Void all references to file by ripping underlying filesystem 4470 * away from vnode. 4471 */ 4472 int 4473 sys_revoke(struct revoke_args *uap) 4474 { 4475 struct nlookupdata nd; 4476 struct vattr vattr; 4477 struct vnode *vp; 4478 struct ucred *cred; 4479 int error; 4480 4481 vp = NULL; 4482 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4483 if (error == 0) 4484 error = nlookup(&nd); 4485 if (error == 0) 4486 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4487 cred = crhold(nd.nl_cred); 4488 nlookup_done(&nd); 4489 if (error == 0) { 4490 if (error == 0) 4491 error = VOP_GETATTR(vp, &vattr); 4492 if (error == 0 && cred->cr_uid != vattr.va_uid) 4493 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4494 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4495 if (vcount(vp) > 0) 4496 error = vrevoke(vp, cred); 4497 } else if (error == 0) { 4498 error = vrevoke(vp, cred); 4499 } 4500 vrele(vp); 4501 } 4502 if (cred) 4503 crfree(cred); 4504 return (error); 4505 } 4506 4507 /* 4508 * getfh_args(char *fname, fhandle_t *fhp) 4509 * 4510 * Get (NFS) file handle 4511 * 4512 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4513 * mount. This allows nullfs mounts to be explicitly exported. 4514 * 4515 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4516 * 4517 * nullfs mounts of subdirectories are not safe. That is, it will 4518 * work, but you do not really have protection against access to 4519 * the related parent directories. 4520 */ 4521 int 4522 sys_getfh(struct getfh_args *uap) 4523 { 4524 struct thread *td = curthread; 4525 struct nlookupdata nd; 4526 fhandle_t fh; 4527 struct vnode *vp; 4528 struct mount *mp; 4529 int error; 4530 4531 /* 4532 * Must be super user 4533 */ 4534 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4535 return (error); 4536 4537 vp = NULL; 4538 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4539 if (error == 0) 4540 error = nlookup(&nd); 4541 if (error == 0) 4542 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4543 mp = nd.nl_nch.mount; 4544 nlookup_done(&nd); 4545 if (error == 0) { 4546 bzero(&fh, sizeof(fh)); 4547 fh.fh_fsid = mp->mnt_stat.f_fsid; 4548 error = VFS_VPTOFH(vp, &fh.fh_fid); 4549 vput(vp); 4550 if (error == 0) 4551 error = copyout(&fh, uap->fhp, sizeof(fh)); 4552 } 4553 return (error); 4554 } 4555 4556 /* 4557 * fhopen_args(const struct fhandle *u_fhp, int flags) 4558 * 4559 * syscall for the rpc.lockd to use to translate a NFS file handle into 4560 * an open descriptor. 4561 * 4562 * warning: do not remove the priv_check() call or this becomes one giant 4563 * security hole. 4564 */ 4565 int 4566 sys_fhopen(struct fhopen_args *uap) 4567 { 4568 struct thread *td = curthread; 4569 struct filedesc *fdp = td->td_proc->p_fd; 4570 struct mount *mp; 4571 struct vnode *vp; 4572 struct fhandle fhp; 4573 struct vattr vat; 4574 struct vattr *vap = &vat; 4575 struct flock lf; 4576 int fmode, mode, error = 0, type; 4577 struct file *nfp; 4578 struct file *fp; 4579 int indx; 4580 4581 /* 4582 * Must be super user 4583 */ 4584 error = priv_check(td, PRIV_ROOT); 4585 if (error) 4586 return (error); 4587 4588 fmode = FFLAGS(uap->flags); 4589 4590 /* 4591 * Why not allow a non-read/write open for our lockd? 4592 */ 4593 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4594 return (EINVAL); 4595 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4596 if (error) 4597 return(error); 4598 4599 /* 4600 * Find the mount point 4601 */ 4602 mp = vfs_getvfs(&fhp.fh_fsid); 4603 if (mp == NULL) { 4604 error = ESTALE; 4605 goto done2; 4606 } 4607 /* now give me my vnode, it gets returned to me locked */ 4608 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4609 if (error) 4610 goto done; 4611 /* 4612 * from now on we have to make sure not 4613 * to forget about the vnode 4614 * any error that causes an abort must vput(vp) 4615 * just set error = err and 'goto bad;'. 4616 */ 4617 4618 /* 4619 * from vn_open 4620 */ 4621 if (vp->v_type == VLNK) { 4622 error = EMLINK; 4623 goto bad; 4624 } 4625 if (vp->v_type == VSOCK) { 4626 error = EOPNOTSUPP; 4627 goto bad; 4628 } 4629 mode = 0; 4630 if (fmode & (FWRITE | O_TRUNC)) { 4631 if (vp->v_type == VDIR) { 4632 error = EISDIR; 4633 goto bad; 4634 } 4635 error = vn_writechk(vp, NULL); 4636 if (error) 4637 goto bad; 4638 mode |= VWRITE; 4639 } 4640 if (fmode & FREAD) 4641 mode |= VREAD; 4642 if (mode) { 4643 error = VOP_ACCESS(vp, mode, td->td_ucred); 4644 if (error) 4645 goto bad; 4646 } 4647 if (fmode & O_TRUNC) { 4648 vn_unlock(vp); /* XXX */ 4649 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4650 VATTR_NULL(vap); 4651 vap->va_size = 0; 4652 error = VOP_SETATTR(vp, vap, td->td_ucred); 4653 if (error) 4654 goto bad; 4655 } 4656 4657 /* 4658 * VOP_OPEN needs the file pointer so it can potentially override 4659 * it. 4660 * 4661 * WARNING! no f_nchandle will be associated when fhopen()ing a 4662 * directory. XXX 4663 */ 4664 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4665 goto bad; 4666 fp = nfp; 4667 4668 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4669 if (error) { 4670 /* 4671 * setting f_ops this way prevents VOP_CLOSE from being 4672 * called or fdrop() releasing the vp from v_data. Since 4673 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4674 */ 4675 fp->f_ops = &badfileops; 4676 fp->f_data = NULL; 4677 goto bad_drop; 4678 } 4679 4680 /* 4681 * The fp is given its own reference, we still have our ref and lock. 4682 * 4683 * Assert that all regular files must be created with a VM object. 4684 */ 4685 if (vp->v_type == VREG && vp->v_object == NULL) { 4686 kprintf("fhopen: regular file did not " 4687 "have VM object: %p\n", 4688 vp); 4689 goto bad_drop; 4690 } 4691 4692 /* 4693 * The open was successful. Handle any locking requirements. 4694 */ 4695 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4696 lf.l_whence = SEEK_SET; 4697 lf.l_start = 0; 4698 lf.l_len = 0; 4699 if (fmode & O_EXLOCK) 4700 lf.l_type = F_WRLCK; 4701 else 4702 lf.l_type = F_RDLCK; 4703 if (fmode & FNONBLOCK) 4704 type = 0; 4705 else 4706 type = F_WAIT; 4707 vn_unlock(vp); 4708 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4709 &lf, type)) != 0) { 4710 /* 4711 * release our private reference. 4712 */ 4713 fsetfd(fdp, NULL, indx); 4714 fdrop(fp); 4715 vrele(vp); 4716 goto done; 4717 } 4718 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4719 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4720 } 4721 4722 /* 4723 * Clean up. Associate the file pointer with the previously 4724 * reserved descriptor and return it. 4725 */ 4726 vput(vp); 4727 if (uap->flags & O_CLOEXEC) 4728 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4729 fsetfd(fdp, fp, indx); 4730 fdrop(fp); 4731 uap->sysmsg_result = indx; 4732 mount_drop(mp); 4733 4734 return (error); 4735 4736 bad_drop: 4737 fsetfd(fdp, NULL, indx); 4738 fdrop(fp); 4739 bad: 4740 vput(vp); 4741 done: 4742 mount_drop(mp); 4743 done2: 4744 return (error); 4745 } 4746 4747 /* 4748 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4749 */ 4750 int 4751 sys_fhstat(struct fhstat_args *uap) 4752 { 4753 struct thread *td = curthread; 4754 struct stat sb; 4755 fhandle_t fh; 4756 struct mount *mp; 4757 struct vnode *vp; 4758 int error; 4759 4760 /* 4761 * Must be super user 4762 */ 4763 error = priv_check(td, PRIV_ROOT); 4764 if (error) 4765 return (error); 4766 4767 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4768 if (error) 4769 return (error); 4770 4771 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4772 error = ESTALE; 4773 if (error == 0) { 4774 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4775 error = vn_stat(vp, &sb, td->td_ucred); 4776 vput(vp); 4777 } 4778 } 4779 if (error == 0) 4780 error = copyout(&sb, uap->sb, sizeof(sb)); 4781 if (mp) 4782 mount_drop(mp); 4783 4784 return (error); 4785 } 4786 4787 /* 4788 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4789 */ 4790 int 4791 sys_fhstatfs(struct fhstatfs_args *uap) 4792 { 4793 struct thread *td = curthread; 4794 struct proc *p = td->td_proc; 4795 struct statfs *sp; 4796 struct mount *mp; 4797 struct vnode *vp; 4798 struct statfs sb; 4799 char *fullpath, *freepath; 4800 fhandle_t fh; 4801 int error; 4802 4803 /* 4804 * Must be super user 4805 */ 4806 if ((error = priv_check(td, PRIV_ROOT))) 4807 return (error); 4808 4809 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4810 return (error); 4811 4812 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4813 error = ESTALE; 4814 goto done; 4815 } 4816 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4817 error = ESTALE; 4818 goto done; 4819 } 4820 4821 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4822 goto done; 4823 mp = vp->v_mount; 4824 sp = &mp->mnt_stat; 4825 vput(vp); 4826 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4827 goto done; 4828 4829 error = mount_path(p, mp, &fullpath, &freepath); 4830 if (error) 4831 goto done; 4832 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4833 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4834 kfree(freepath, M_TEMP); 4835 4836 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4837 if (priv_check(td, PRIV_ROOT)) { 4838 bcopy(sp, &sb, sizeof(sb)); 4839 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4840 sp = &sb; 4841 } 4842 error = copyout(sp, uap->buf, sizeof(*sp)); 4843 done: 4844 if (mp) 4845 mount_drop(mp); 4846 4847 return (error); 4848 } 4849 4850 /* 4851 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4852 */ 4853 int 4854 sys_fhstatvfs(struct fhstatvfs_args *uap) 4855 { 4856 struct thread *td = curthread; 4857 struct proc *p = td->td_proc; 4858 struct statvfs *sp; 4859 struct mount *mp; 4860 struct vnode *vp; 4861 fhandle_t fh; 4862 int error; 4863 4864 /* 4865 * Must be super user 4866 */ 4867 if ((error = priv_check(td, PRIV_ROOT))) 4868 return (error); 4869 4870 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4871 return (error); 4872 4873 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4874 error = ESTALE; 4875 goto done; 4876 } 4877 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4878 error = ESTALE; 4879 goto done; 4880 } 4881 4882 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4883 goto done; 4884 mp = vp->v_mount; 4885 sp = &mp->mnt_vstat; 4886 vput(vp); 4887 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4888 goto done; 4889 4890 sp->f_flag = 0; 4891 if (mp->mnt_flag & MNT_RDONLY) 4892 sp->f_flag |= ST_RDONLY; 4893 if (mp->mnt_flag & MNT_NOSUID) 4894 sp->f_flag |= ST_NOSUID; 4895 error = copyout(sp, uap->buf, sizeof(*sp)); 4896 done: 4897 if (mp) 4898 mount_drop(mp); 4899 return (error); 4900 } 4901 4902 4903 /* 4904 * Syscall to push extended attribute configuration information into the 4905 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4906 * a command (int cmd), and attribute name and misc data. For now, the 4907 * attribute name is left in userspace for consumption by the VFS_op. 4908 * It will probably be changed to be copied into sysspace by the 4909 * syscall in the future, once issues with various consumers of the 4910 * attribute code have raised their hands. 4911 * 4912 * Currently this is used only by UFS Extended Attributes. 4913 */ 4914 int 4915 sys_extattrctl(struct extattrctl_args *uap) 4916 { 4917 struct nlookupdata nd; 4918 struct vnode *vp; 4919 char attrname[EXTATTR_MAXNAMELEN]; 4920 int error; 4921 size_t size; 4922 4923 attrname[0] = 0; 4924 vp = NULL; 4925 error = 0; 4926 4927 if (error == 0 && uap->filename) { 4928 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4929 NLC_FOLLOW); 4930 if (error == 0) 4931 error = nlookup(&nd); 4932 if (error == 0) 4933 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4934 nlookup_done(&nd); 4935 } 4936 4937 if (error == 0 && uap->attrname) { 4938 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4939 &size); 4940 } 4941 4942 if (error == 0) { 4943 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4944 if (error == 0) 4945 error = nlookup(&nd); 4946 if (error == 0) 4947 error = ncp_writechk(&nd.nl_nch); 4948 if (error == 0) { 4949 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4950 uap->attrnamespace, 4951 uap->attrname, nd.nl_cred); 4952 } 4953 nlookup_done(&nd); 4954 } 4955 4956 return (error); 4957 } 4958 4959 /* 4960 * Syscall to get a named extended attribute on a file or directory. 4961 */ 4962 int 4963 sys_extattr_set_file(struct extattr_set_file_args *uap) 4964 { 4965 char attrname[EXTATTR_MAXNAMELEN]; 4966 struct nlookupdata nd; 4967 struct vnode *vp; 4968 struct uio auio; 4969 struct iovec aiov; 4970 int error; 4971 4972 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4973 if (error) 4974 return (error); 4975 4976 vp = NULL; 4977 4978 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4979 if (error == 0) 4980 error = nlookup(&nd); 4981 if (error == 0) 4982 error = ncp_writechk(&nd.nl_nch); 4983 if (error == 0) 4984 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4985 if (error) { 4986 nlookup_done(&nd); 4987 return (error); 4988 } 4989 4990 bzero(&auio, sizeof(auio)); 4991 aiov.iov_base = uap->data; 4992 aiov.iov_len = uap->nbytes; 4993 auio.uio_iov = &aiov; 4994 auio.uio_iovcnt = 1; 4995 auio.uio_offset = 0; 4996 auio.uio_resid = uap->nbytes; 4997 auio.uio_rw = UIO_WRITE; 4998 auio.uio_td = curthread; 4999 5000 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5001 &auio, nd.nl_cred); 5002 5003 vput(vp); 5004 nlookup_done(&nd); 5005 return (error); 5006 } 5007 5008 /* 5009 * Syscall to get a named extended attribute on a file or directory. 5010 */ 5011 int 5012 sys_extattr_get_file(struct extattr_get_file_args *uap) 5013 { 5014 char attrname[EXTATTR_MAXNAMELEN]; 5015 struct nlookupdata nd; 5016 struct uio auio; 5017 struct iovec aiov; 5018 struct vnode *vp; 5019 int error; 5020 5021 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5022 if (error) 5023 return (error); 5024 5025 vp = NULL; 5026 5027 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5028 if (error == 0) 5029 error = nlookup(&nd); 5030 if (error == 0) 5031 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5032 if (error) { 5033 nlookup_done(&nd); 5034 return (error); 5035 } 5036 5037 bzero(&auio, sizeof(auio)); 5038 aiov.iov_base = uap->data; 5039 aiov.iov_len = uap->nbytes; 5040 auio.uio_iov = &aiov; 5041 auio.uio_iovcnt = 1; 5042 auio.uio_offset = 0; 5043 auio.uio_resid = uap->nbytes; 5044 auio.uio_rw = UIO_READ; 5045 auio.uio_td = curthread; 5046 5047 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5048 &auio, nd.nl_cred); 5049 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 5050 5051 vput(vp); 5052 nlookup_done(&nd); 5053 return(error); 5054 } 5055 5056 /* 5057 * Syscall to delete a named extended attribute from a file or directory. 5058 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5059 */ 5060 int 5061 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 5062 { 5063 char attrname[EXTATTR_MAXNAMELEN]; 5064 struct nlookupdata nd; 5065 struct vnode *vp; 5066 int error; 5067 5068 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5069 if (error) 5070 return(error); 5071 5072 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5073 if (error == 0) 5074 error = nlookup(&nd); 5075 if (error == 0) 5076 error = ncp_writechk(&nd.nl_nch); 5077 if (error == 0) { 5078 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5079 if (error == 0) { 5080 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5081 attrname, NULL, nd.nl_cred); 5082 vput(vp); 5083 } 5084 } 5085 nlookup_done(&nd); 5086 return(error); 5087 } 5088 5089 /* 5090 * Determine if the mount is visible to the process. 5091 */ 5092 static int 5093 chroot_visible_mnt(struct mount *mp, struct proc *p) 5094 { 5095 struct nchandle nch; 5096 5097 /* 5098 * Traverse from the mount point upwards. If we hit the process 5099 * root then the mount point is visible to the process. 5100 */ 5101 nch = mp->mnt_ncmountpt; 5102 while (nch.ncp) { 5103 if (nch.mount == p->p_fd->fd_nrdir.mount && 5104 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5105 return(1); 5106 } 5107 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5108 nch = nch.mount->mnt_ncmounton; 5109 } else { 5110 nch.ncp = nch.ncp->nc_parent; 5111 } 5112 } 5113 5114 /* 5115 * If the mount point is not visible to the process, but the 5116 * process root is in a subdirectory of the mount, return 5117 * TRUE anyway. 5118 */ 5119 if (p->p_fd->fd_nrdir.mount == mp) 5120 return(1); 5121 5122 return(0); 5123 } 5124 5125