1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysmsg.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/caps.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int get_fscap(const char *); 84 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 85 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 86 static int getutimes (struct timeval *, struct timespec *); 87 static int getutimens (const struct timespec *, struct timespec *, int *); 88 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, u_long); 91 static int setutimes (struct vnode *, struct vattr *, 92 const struct timespec *, int); 93 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 96 "Allow non-root users to mount filesystems"); 97 98 static int debug_unmount = 0; /* if 1 loop until unmount success */ 99 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0, 100 "Stall failed unmounts in loop"); 101 102 static struct krate krate_rename = { 1 }; 103 104 /* 105 * Virtual File System System Calls 106 */ 107 108 /* 109 * Mount a file system. 110 * 111 * mount_args(char *type, char *path, int flags, caddr_t data) 112 * 113 * MPALMOSTSAFE 114 */ 115 int 116 sys_mount(struct sysmsg *sysmsg, const struct mount_args *uap) 117 { 118 struct thread *td = curthread; 119 struct vnode *vp; 120 struct nchandle nch; 121 struct mount *mp, *nullmp; 122 struct vfsconf *vfsp; 123 int error, flag = 0, flag2 = 0; 124 int hasmount; 125 int priv = 0; 126 int flags = uap->flags; 127 struct vattr va; 128 struct nlookupdata nd; 129 char fstypename[MFSNAMELEN]; 130 struct ucred *cred; 131 132 cred = td->td_ucred; 133 134 /* We do not allow user mounts inside a jail for now */ 135 if (usermount && jailed(cred)) { 136 error = EPERM; 137 goto done; 138 } 139 140 /* 141 * Extract the file system type. We need to know this early, to take 142 * appropriate actions for jails and the filesystems to mount. 143 */ 144 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) 145 goto done; 146 147 /* 148 * Select the correct cap according to the file system type. 149 */ 150 priv = get_fscap(fstypename); 151 152 if (usermount == 0 && (error = caps_priv_check_td(td, priv))) 153 goto done; 154 155 /* 156 * Do not allow NFS export by non-root users. 157 */ 158 if (flags & MNT_EXPORTED) { 159 error = caps_priv_check_td(td, priv); 160 if (error) 161 goto done; 162 } 163 /* 164 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 165 */ 166 if (caps_priv_check_td(td, priv)) 167 flags |= MNT_NOSUID | MNT_NODEV; 168 169 /* 170 * Lookup the requested path and extract the nch and vnode. 171 */ 172 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 173 if (error == 0) { 174 if ((error = nlookup(&nd)) == 0) { 175 if (nd.nl_nch.ncp->nc_vp == NULL) 176 error = ENOENT; 177 } 178 } 179 if (error) { 180 nlookup_done(&nd); 181 goto done; 182 } 183 184 /* 185 * If the target filesystem is resolved via a nullfs mount, then 186 * nd.nl_nch.mount will be pointing to the nullfs mount structure 187 * instead of the target file system. We need it in case we are 188 * doing an update. 189 */ 190 nullmp = nd.nl_nch.mount; 191 192 /* 193 * Extract the locked+refd ncp and cleanup the nd structure 194 */ 195 nch = nd.nl_nch; 196 cache_zero(&nd.nl_nch); 197 nlookup_done(&nd); 198 199 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 200 (mp = cache_findmount(&nch)) != NULL) { 201 cache_dropmount(mp); 202 hasmount = 1; 203 } else { 204 hasmount = 0; 205 } 206 207 208 /* 209 * now we have the locked ref'd nch and unreferenced vnode. 210 */ 211 vp = nch.ncp->nc_vp; 212 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 213 cache_put(&nch); 214 goto done; 215 } 216 cache_unlock(&nch); 217 218 /* 219 * Now we have an unlocked ref'd nch and a locked ref'd vp 220 */ 221 if (flags & MNT_UPDATE) { 222 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 223 cache_drop(&nch); 224 vput(vp); 225 error = EINVAL; 226 goto done; 227 } 228 229 if (strncmp(fstypename, "null", 5) == 0) { 230 KKASSERT(nullmp); 231 mp = nullmp; 232 } else { 233 mp = vp->v_mount; 234 } 235 236 flag = mp->mnt_flag; 237 flag2 = mp->mnt_kern_flag; 238 /* 239 * We only allow the filesystem to be reloaded if it 240 * is currently mounted read-only. 241 */ 242 if ((flags & MNT_RELOAD) && 243 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 244 cache_drop(&nch); 245 vput(vp); 246 error = EOPNOTSUPP; /* Needs translation */ 247 goto done; 248 } 249 /* 250 * Only root, or the user that did the original mount is 251 * permitted to update it. 252 */ 253 if (mp->mnt_stat.f_owner != cred->cr_uid && 254 (error = caps_priv_check_td(td, priv))) { 255 cache_drop(&nch); 256 vput(vp); 257 goto done; 258 } 259 if (vfs_busy(mp, LK_NOWAIT)) { 260 cache_drop(&nch); 261 vput(vp); 262 error = EBUSY; 263 goto done; 264 } 265 if (hasmount) { 266 cache_drop(&nch); 267 vfs_unbusy(mp); 268 vput(vp); 269 error = EBUSY; 270 goto done; 271 } 272 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 273 lwkt_gettoken(&mp->mnt_token); 274 vn_unlock(vp); 275 vfsp = mp->mnt_vfc; 276 goto update; 277 } 278 279 /* 280 * If the user is not root, ensure that they own the directory 281 * onto which we are attempting to mount. 282 */ 283 if ((error = VOP_GETATTR(vp, &va)) || 284 (va.va_uid != cred->cr_uid && 285 (error = caps_priv_check_td(td, priv)))) { 286 cache_drop(&nch); 287 vput(vp); 288 goto done; 289 } 290 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 291 cache_drop(&nch); 292 vput(vp); 293 goto done; 294 } 295 if (vp->v_type != VDIR) { 296 cache_drop(&nch); 297 vput(vp); 298 error = ENOTDIR; 299 goto done; 300 } 301 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 302 cache_drop(&nch); 303 vput(vp); 304 error = EPERM; 305 goto done; 306 } 307 vfsp = vfsconf_find_by_name(fstypename); 308 if (vfsp == NULL) { 309 linker_file_t lf; 310 311 /* Only load modules for root (very important!) */ 312 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT); 313 if (error) { 314 cache_drop(&nch); 315 vput(vp); 316 goto done; 317 } 318 error = linker_load_file(fstypename, &lf); 319 if (error || lf == NULL) { 320 cache_drop(&nch); 321 vput(vp); 322 if (lf == NULL) 323 error = ENODEV; 324 goto done; 325 } 326 lf->userrefs++; 327 /* lookup again, see if the VFS was loaded */ 328 vfsp = vfsconf_find_by_name(fstypename); 329 if (vfsp == NULL) { 330 lf->userrefs--; 331 linker_file_unload(lf); 332 cache_drop(&nch); 333 vput(vp); 334 error = ENODEV; 335 goto done; 336 } 337 } 338 if (hasmount) { 339 cache_drop(&nch); 340 vput(vp); 341 error = EBUSY; 342 goto done; 343 } 344 345 /* 346 * Allocate and initialize the filesystem. 347 */ 348 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 349 mount_init(mp, vfsp->vfc_vfsops); 350 vfs_busy(mp, LK_NOWAIT); 351 mp->mnt_vfc = vfsp; 352 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 353 vfsp->vfc_refcount++; 354 mp->mnt_stat.f_type = vfsp->vfc_typenum; 355 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 356 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 357 mp->mnt_stat.f_owner = cred->cr_uid; 358 lwkt_gettoken(&mp->mnt_token); 359 vn_unlock(vp); 360 update: 361 /* 362 * (per-mount token acquired at this point) 363 * 364 * Set the mount level flags. 365 */ 366 if (flags & MNT_RDONLY) 367 mp->mnt_flag |= MNT_RDONLY; 368 else if (mp->mnt_flag & MNT_RDONLY) 369 mp->mnt_kern_flag |= MNTK_WANTRDWR; 370 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 371 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 372 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 373 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 374 MNT_AUTOMOUNTED); 375 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | 376 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 377 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 378 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 379 MNT_AUTOMOUNTED); 380 381 /* 382 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 383 * This way the initial VFS_MOUNT() call will also be MPSAFE. 384 */ 385 if (vfsp->vfc_flags & VFCF_MPSAFE) 386 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 387 388 /* 389 * Mount the filesystem. 390 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 391 * get. 392 */ 393 if (mp->mnt_flag & MNT_UPDATE) { 394 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 395 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 396 mp->mnt_flag &= ~MNT_RDONLY; 397 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 398 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 399 if (error) { 400 mp->mnt_flag = flag; 401 mp->mnt_kern_flag = flag2; 402 } 403 lwkt_reltoken(&mp->mnt_token); 404 vfs_unbusy(mp); 405 vrele(vp); 406 cache_drop(&nch); 407 goto done; 408 } 409 mp->mnt_ncmounton = nch; 410 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 411 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 412 413 /* 414 * Put the new filesystem on the mount list after root. The mount 415 * point gets its own mnt_ncmountpt (unless the VFS already set one 416 * up) which represents the root of the mount. The lookup code 417 * detects the mount point going forward and checks the root of 418 * the mount going backwards. 419 * 420 * It is not necessary to invalidate or purge the vnode underneath 421 * because elements under the mount will be given their own glue 422 * namecache record. 423 */ 424 if (!error) { 425 if (mp->mnt_ncmountpt.ncp == NULL) { 426 /* 427 * Allocate, then unlock, but leave the ref intact. 428 * This is the mnt_refs (1) that we will retain 429 * through to the unmount. 430 */ 431 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 432 cache_unlock(&mp->mnt_ncmountpt); 433 } 434 vn_unlock(vp); 435 cache_lock(&nch); 436 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 437 cache_unlock(&nch); 438 cache_ismounting(mp); 439 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 440 441 mountlist_insert(mp, MNTINS_LAST); 442 vn_unlock(vp); 443 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 444 error = vfs_allocate_syncvnode(mp); 445 lwkt_reltoken(&mp->mnt_token); 446 vfs_unbusy(mp); 447 error = VFS_START(mp, 0); 448 vrele(vp); 449 KNOTE(&fs_klist, VQ_MOUNT); 450 } else { 451 bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton)); 452 vn_syncer_thr_stop(mp); 453 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 454 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 455 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 456 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 457 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 458 if (mp->mnt_cred) { 459 crfree(mp->mnt_cred); 460 mp->mnt_cred = NULL; 461 } 462 mp->mnt_vfc->vfc_refcount--; 463 lwkt_reltoken(&mp->mnt_token); 464 vfs_unbusy(mp); 465 kfree(mp, M_MOUNT); 466 cache_drop(&nch); 467 vput(vp); 468 } 469 done: 470 return (error); 471 } 472 473 /* 474 * Scan all active processes to see if any of them have a current 475 * or root directory onto which the new filesystem has just been 476 * mounted. If so, replace them with the new mount point. 477 * 478 * Both old_nch and new_nch are ref'd on call but not locked. 479 * new_nch must be temporarily locked so it can be associated with the 480 * vnode representing the root of the mount point. 481 */ 482 struct checkdirs_info { 483 struct nchandle old_nch; 484 struct nchandle new_nch; 485 struct vnode *old_vp; 486 struct vnode *new_vp; 487 }; 488 489 static int checkdirs_callback(struct proc *p, void *data); 490 491 static void 492 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 493 { 494 struct checkdirs_info info; 495 struct vnode *olddp; 496 struct vnode *newdp; 497 struct mount *mp; 498 499 /* 500 * If the old mount point's vnode has a usecount of 1, it is not 501 * being held as a descriptor anywhere. 502 */ 503 olddp = old_nch->ncp->nc_vp; 504 if (olddp == NULL || VREFCNT(olddp) == 1) 505 return; 506 507 /* 508 * Force the root vnode of the new mount point to be resolved 509 * so we can update any matching processes. 510 */ 511 mp = new_nch->mount; 512 if (VFS_ROOT(mp, &newdp)) 513 panic("mount: lost mount"); 514 vn_unlock(newdp); 515 cache_lock(new_nch); 516 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 517 cache_setunresolved(new_nch); 518 cache_setvp(new_nch, newdp); 519 cache_unlock(new_nch); 520 521 /* 522 * Special handling of the root node 523 */ 524 if (rootvnode == olddp) { 525 vref(newdp); 526 vfs_cache_setroot(newdp, cache_hold(new_nch)); 527 } 528 529 /* 530 * Pass newdp separately so the callback does not have to access 531 * it via new_nch->ncp->nc_vp. 532 */ 533 info.old_nch = *old_nch; 534 info.new_nch = *new_nch; 535 info.new_vp = newdp; 536 allproc_scan(checkdirs_callback, &info, 0); 537 vput(newdp); 538 } 539 540 /* 541 * NOTE: callback is not MP safe because the scanned process's filedesc 542 * structure can be ripped out from under us, amoung other things. 543 */ 544 static int 545 checkdirs_callback(struct proc *p, void *data) 546 { 547 struct checkdirs_info *info = data; 548 struct filedesc *fdp; 549 struct nchandle ncdrop1; 550 struct nchandle ncdrop2; 551 struct vnode *vprele1; 552 struct vnode *vprele2; 553 554 if ((fdp = p->p_fd) != NULL) { 555 cache_zero(&ncdrop1); 556 cache_zero(&ncdrop2); 557 vprele1 = NULL; 558 vprele2 = NULL; 559 560 /* 561 * MPUNSAFE - XXX fdp can be pulled out from under a 562 * foreign process. 563 * 564 * A shared filedesc is ok, we don't have to copy it 565 * because we are making this change globally. 566 */ 567 spin_lock(&fdp->fd_spin); 568 if (fdp->fd_ncdir.mount == info->old_nch.mount && 569 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 570 vprele1 = fdp->fd_cdir; 571 vref(info->new_vp); 572 fdp->fd_cdir = info->new_vp; 573 ncdrop1 = fdp->fd_ncdir; 574 cache_copy(&info->new_nch, &fdp->fd_ncdir); 575 } 576 if (fdp->fd_nrdir.mount == info->old_nch.mount && 577 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 578 vprele2 = fdp->fd_rdir; 579 vref(info->new_vp); 580 fdp->fd_rdir = info->new_vp; 581 ncdrop2 = fdp->fd_nrdir; 582 cache_copy(&info->new_nch, &fdp->fd_nrdir); 583 } 584 spin_unlock(&fdp->fd_spin); 585 if (ncdrop1.ncp) 586 cache_drop(&ncdrop1); 587 if (ncdrop2.ncp) 588 cache_drop(&ncdrop2); 589 if (vprele1) 590 vrele(vprele1); 591 if (vprele2) 592 vrele(vprele2); 593 } 594 return(0); 595 } 596 597 /* 598 * Unmount a file system. 599 * 600 * Note: unmount takes a path to the vnode mounted on as argument, 601 * not special file (as before). 602 * 603 * umount_args(char *path, int flags) 604 * 605 * MPALMOSTSAFE 606 */ 607 int 608 sys_unmount(struct sysmsg *sysmsg, const struct unmount_args *uap) 609 { 610 struct thread *td = curthread; 611 struct proc *p __debugvar = td->td_proc; 612 struct mount *mp = NULL; 613 struct nlookupdata nd; 614 char fstypename[MFSNAMELEN]; 615 int priv = 0; 616 int error; 617 struct ucred *cred; 618 619 cred = td->td_ucred; 620 621 KKASSERT(p); 622 623 /* We do not allow user umounts inside a jail for now */ 624 if (usermount && jailed(cred)) { 625 error = EPERM; 626 goto done; 627 } 628 629 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 630 NLC_FOLLOW | NLC_IGNBADDIR); 631 if (error == 0) 632 error = nlookup(&nd); 633 if (error) 634 goto out; 635 636 mp = nd.nl_nch.mount; 637 638 /* Figure out the fsname in order to select proper privs */ 639 ksnprintf(fstypename, MFSNAMELEN, "%s", mp->mnt_vfc->vfc_name); 640 priv = get_fscap(fstypename); 641 642 if (usermount == 0 && (error = caps_priv_check_td(td, priv))) { 643 nlookup_done(&nd); 644 goto done; 645 } 646 647 /* 648 * Only root, or the user that did the original mount is 649 * permitted to unmount this filesystem. 650 */ 651 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 652 (error = caps_priv_check_td(td, priv))) 653 { 654 goto out; 655 } 656 657 /* 658 * Don't allow unmounting the root file system. 659 */ 660 if (mp->mnt_flag & MNT_ROOTFS) { 661 error = EINVAL; 662 goto out; 663 } 664 665 /* 666 * Must be the root of the filesystem 667 */ 668 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 669 error = EINVAL; 670 goto out; 671 } 672 673 /* Check if this mount belongs to this prison */ 674 if (jailed(cred) && mp->mnt_cred && (!mp->mnt_cred->cr_prison || 675 mp->mnt_cred->cr_prison != cred->cr_prison)) { 676 kprintf("mountpoint %s does not belong to this jail\n", 677 uap->path); 678 error = EPERM; 679 goto out; 680 } 681 682 /* 683 * If no error try to issue the unmount. We lose our cache 684 * ref when we call nlookup_done so we must hold the mount point 685 * to prevent use-after-free races. 686 */ 687 out: 688 if (error == 0) { 689 mount_hold(mp); 690 nlookup_done(&nd); 691 error = dounmount(mp, uap->flags, 0); 692 mount_drop(mp); 693 } else { 694 nlookup_done(&nd); 695 } 696 done: 697 return (error); 698 } 699 700 /* 701 * Do the actual file system unmount (interlocked against the mountlist 702 * token and mp->mnt_token). 703 */ 704 static int 705 dounmount_interlock(struct mount *mp) 706 { 707 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 708 return (EBUSY); 709 mp->mnt_kern_flag |= MNTK_UNMOUNT; 710 return(0); 711 } 712 713 /* 714 * Returns non-zero if the specified process uses the specified 715 * mount point. 716 */ 717 static int 718 process_uses_mount(struct proc *p, struct mount *mp) 719 { 720 struct filedesc *fdp; 721 struct file *fp; 722 int found; 723 int n; 724 725 fdp = p->p_fd; 726 if (fdp == NULL) 727 return 0; 728 if (fdp->fd_ncdir.mount == mp || 729 fdp->fd_nrdir.mount == mp || 730 fdp->fd_njdir.mount == mp) 731 { 732 return 1; 733 } 734 735 found = 0; 736 spin_lock_shared(&fdp->fd_spin); 737 for (n = 0; n < fdp->fd_nfiles; ++n) { 738 fp = fdp->fd_files[n].fp; 739 if (fp && fp->f_nchandle.mount == mp) { 740 found = 1; 741 break; 742 } 743 } 744 spin_unlock_shared(&fdp->fd_spin); 745 746 return found; 747 } 748 749 /* 750 * Cleanup processes that have references to the mount point 751 * being force-unmounted. 752 */ 753 struct unmount_allproc_info { 754 struct mount *mp; 755 int sig; 756 }; 757 758 static int 759 unmount_allproc_cb(struct proc *p, void *arg) 760 { 761 struct unmount_allproc_info *info; 762 struct mount *mp; 763 764 info = arg; 765 mp = info->mp; 766 767 if (p->p_textnch.mount == mp) 768 cache_drop(&p->p_textnch); 769 if (info->sig && process_uses_mount(p, mp)) 770 ksignal(p, info->sig); 771 772 return 0; 773 } 774 775 /* 776 * The guts of the unmount code. The mount owns one ref and one hold 777 * count. If we successfully interlock the unmount, those refs are ours. 778 * (The ref is from mnt_ncmountpt). 779 * 780 * When halting we shortcut certain mount types such as devfs by not actually 781 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 782 * from the mountlist so higher-level filesytems can unmount cleanly. 783 * 784 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 785 */ 786 int 787 dounmount(struct mount *mp, int flags, int halting) 788 { 789 struct namecache *ncp; 790 struct nchandle nch; 791 struct vnode *vp; 792 int error; 793 int async_flag; 794 int lflags; 795 int freeok = 1; 796 int hadsyncer = 0; 797 int retry; 798 int quickhalt; 799 800 lwkt_gettoken(&mp->mnt_token); 801 802 /* 803 * When halting, certain mount points can essentially just 804 * be unhooked and otherwise ignored. 805 */ 806 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 807 quickhalt = 1; 808 freeok = 0; 809 } else { 810 quickhalt = 0; 811 } 812 813 814 /* 815 * Exclusive access for unmounting purposes. 816 */ 817 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 818 goto out; 819 820 /* 821 * We now 'own' the last mp->mnt_refs 822 * 823 * Allow filesystems to detect that a forced unmount is in progress. 824 */ 825 if (flags & MNT_FORCE) 826 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 827 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 828 error = lockmgr(&mp->mnt_lock, lflags); 829 if (error) { 830 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 831 if (mp->mnt_kern_flag & MNTK_MWAIT) { 832 mp->mnt_kern_flag &= ~MNTK_MWAIT; 833 wakeup(mp); 834 } 835 goto out; 836 } 837 838 if (mp->mnt_flag & MNT_EXPUBLIC) 839 vfs_setpublicfs(NULL, NULL, NULL); 840 841 vfs_msync(mp, MNT_WAIT); 842 async_flag = mp->mnt_flag & MNT_ASYNC; 843 mp->mnt_flag &=~ MNT_ASYNC; 844 845 /* 846 * Decomission our special mnt_syncer vnode. This also stops 847 * the vnlru code. If we are unable to unmount we recommission 848 * the vnode. 849 * 850 * Then sync the filesystem. 851 */ 852 if ((vp = mp->mnt_syncer) != NULL) { 853 mp->mnt_syncer = NULL; 854 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 855 vrele(vp); 856 hadsyncer = 1; 857 } 858 859 /* 860 * Sync normally-mounted filesystem. 861 */ 862 if (quickhalt == 0) { 863 if ((mp->mnt_flag & MNT_RDONLY) == 0) 864 VFS_SYNC(mp, MNT_WAIT); 865 } 866 867 /* 868 * nchandle records ref the mount structure. Expect a count of 1 869 * (our mount->mnt_ncmountpt). 870 * 871 * Scans can get temporary refs on a mountpoint (thought really 872 * heavy duty stuff like cache_findmount() do not). 873 */ 874 for (retry = 0; (retry < 10 || debug_unmount); ++retry) { 875 /* 876 * Invalidate the namecache topology under the mount. 877 * nullfs mounts alias a real mount's namecache topology 878 * and it should not be invalidated in that case. 879 */ 880 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 881 cache_lock(&mp->mnt_ncmountpt); 882 cache_inval(&mp->mnt_ncmountpt, 883 CINV_DESTROY|CINV_CHILDREN); 884 cache_unlock(&mp->mnt_ncmountpt); 885 } 886 887 /* 888 * Clear pcpu caches 889 */ 890 cache_unmounting(mp); 891 if (mp->mnt_refs != 1) 892 cache_clearmntcache(mp); 893 894 /* 895 * Break out if we are good. Don't count ncp refs if the 896 * mount is aliased. 897 */ 898 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 899 NULL : mp->mnt_ncmountpt.ncp; 900 if (mp->mnt_refs == 1 && 901 (ncp == NULL || (ncp->nc_refs == 1 && 902 TAILQ_FIRST(&ncp->nc_list) == NULL))) { 903 break; 904 } 905 906 /* 907 * If forcing the unmount, clean out any p->p_textnch 908 * nchandles that match this mount. 909 * 910 * In addition any process which has a current, root, or 911 * jail directory matching the mount, or which has an open 912 * descriptor matching the mount, will be killed. We first 913 * try SIGKILL, and if that doesn't work we issue SIGQUIT. 914 */ 915 if (flags & MNT_FORCE) { 916 struct unmount_allproc_info info; 917 918 info.mp = mp; 919 switch(retry) { 920 case 3: 921 info.sig = SIGINT; 922 break; 923 case 7: 924 info.sig = SIGKILL; 925 break; 926 default: 927 info.sig = 0; 928 break; 929 } 930 allproc_scan(&unmount_allproc_cb, &info, 0); 931 } 932 933 /* 934 * Sleep and retry. 935 */ 936 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 4 + 1); 937 if (debug_unmount && (retry & 15) == 15) { 938 mount_warning(mp, 939 "(%p) debug - retry %d, " 940 "%d namecache refs, %d mount refs", 941 mp, retry, 942 (ncp ? ncp->nc_refs - 1 : 0), 943 mp->mnt_refs - 1); 944 } 945 } 946 if (retry == 10) { 947 mount_warning(mp, 948 "forced umount of \"%s\" - " 949 "%d namecache refs, %d mount refs", 950 (mp->mnt_ncmountpt.ncp ? 951 mp->mnt_ncmountpt.ncp->nc_name : "?"), 952 (ncp ? ncp->nc_refs - 1 : 0), 953 mp->mnt_refs - 1); 954 } 955 956 error = 0; 957 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 958 NULL : mp->mnt_ncmountpt.ncp; 959 if (mp->mnt_refs != 1 || 960 (ncp != NULL && (ncp->nc_refs != 1 || 961 TAILQ_FIRST(&ncp->nc_list)))) { 962 mount_warning(mp, 963 "(%p): %d namecache refs, %d mount refs " 964 "still present", 965 mp, 966 (ncp ? ncp->nc_refs - 1 : 0), 967 mp->mnt_refs - 1); 968 if (flags & MNT_FORCE) { 969 freeok = 0; 970 mount_warning(mp, "forcing unmount\n"); 971 } else { 972 error = EBUSY; 973 } 974 } 975 976 /* 977 * So far so good, sync the filesystem once more and 978 * call the VFS unmount code if the sync succeeds. 979 */ 980 if (error == 0 && quickhalt == 0) { 981 if (mp->mnt_flag & MNT_RDONLY) { 982 error = VFS_UNMOUNT(mp, flags); 983 } else { 984 error = VFS_SYNC(mp, MNT_WAIT); 985 if (error == 0 || /* no error */ 986 error == EOPNOTSUPP || /* no sync avail */ 987 (flags & MNT_FORCE)) { /* force anyway */ 988 error = VFS_UNMOUNT(mp, flags); 989 } 990 } 991 if (error) { 992 mount_warning(mp, 993 "(%p) unmount: vfs refused to unmount, " 994 "error %d", 995 mp, error); 996 } 997 } 998 999 /* 1000 * If an error occurred we can still recover, restoring the 1001 * syncer vnode and misc flags. 1002 */ 1003 if (error) { 1004 if (mp->mnt_syncer == NULL && hadsyncer) 1005 vfs_allocate_syncvnode(mp); 1006 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 1007 mp->mnt_flag |= async_flag; 1008 lockmgr(&mp->mnt_lock, LK_RELEASE); 1009 if (mp->mnt_kern_flag & MNTK_MWAIT) { 1010 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1011 wakeup(mp); 1012 } 1013 goto out; 1014 } 1015 /* 1016 * Clean up any journals still associated with the mount after 1017 * filesystem activity has ceased. 1018 */ 1019 journal_remove_all_journals(mp, 1020 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 1021 1022 mountlist_remove(mp); 1023 1024 /* 1025 * Remove any installed vnode ops here so the individual VFSs don't 1026 * have to. 1027 * 1028 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 1029 * 1030 * When quickhalting we have to keep these intact because the 1031 * underlying vnodes have not been destroyed, and some might be 1032 * dirty. 1033 */ 1034 if (quickhalt == 0) { 1035 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 1036 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 1037 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 1038 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 1039 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 1040 } 1041 1042 if (mp->mnt_ncmountpt.ncp != NULL) { 1043 nch = mp->mnt_ncmountpt; 1044 cache_zero(&mp->mnt_ncmountpt); 1045 cache_clrmountpt(&nch); 1046 cache_drop(&nch); 1047 } 1048 if (mp->mnt_ncmounton.ncp != NULL) { 1049 cache_unmounting(mp); 1050 nch = mp->mnt_ncmounton; 1051 cache_zero(&mp->mnt_ncmounton); 1052 cache_clrmountpt(&nch); 1053 cache_drop(&nch); 1054 } 1055 1056 if (mp->mnt_cred) { 1057 crfree(mp->mnt_cred); 1058 mp->mnt_cred = NULL; 1059 } 1060 1061 mp->mnt_vfc->vfc_refcount--; 1062 1063 /* 1064 * If not quickhalting the mount, we expect there to be no 1065 * vnodes left. 1066 */ 1067 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 1068 panic("unmount: dangling vnode"); 1069 1070 /* 1071 * Release the lock 1072 */ 1073 lockmgr(&mp->mnt_lock, LK_RELEASE); 1074 if (mp->mnt_kern_flag & MNTK_MWAIT) { 1075 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1076 wakeup(mp); 1077 } 1078 1079 /* 1080 * If we reach here and freeok != 0 we must free the mount. 1081 * mnt_refs should already have dropped to 0, so if it is not 1082 * zero we must cycle the caches and wait. 1083 * 1084 * When we are satisfied that the mount has disconnected we can 1085 * drop the hold on the mp that represented the mount (though the 1086 * caller might actually have another, so the caller's drop may 1087 * do the actual free). 1088 */ 1089 if (freeok) { 1090 if (mp->mnt_refs > 0) 1091 cache_clearmntcache(mp); 1092 while (mp->mnt_refs > 0) { 1093 cache_unmounting(mp); 1094 wakeup(mp); 1095 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 1096 cache_clearmntcache(mp); 1097 } 1098 lwkt_reltoken(&mp->mnt_token); 1099 mount_drop(mp); 1100 mp = NULL; 1101 } else { 1102 cache_clearmntcache(mp); 1103 } 1104 error = 0; 1105 KNOTE(&fs_klist, VQ_UNMOUNT); 1106 out: 1107 if (mp) 1108 lwkt_reltoken(&mp->mnt_token); 1109 return (error); 1110 } 1111 1112 static 1113 void 1114 mount_warning(struct mount *mp, const char *ctl, ...) 1115 { 1116 char *ptr; 1117 char *buf; 1118 __va_list va; 1119 1120 __va_start(va, ctl); 1121 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 1122 &ptr, &buf, 0) == 0) { 1123 kprintf("unmount(%s): ", ptr); 1124 kvprintf(ctl, va); 1125 kprintf("\n"); 1126 kfree(buf, M_TEMP); 1127 } else { 1128 kprintf("unmount(%p", mp); 1129 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 1130 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 1131 kprintf("): "); 1132 kvprintf(ctl, va); 1133 kprintf("\n"); 1134 } 1135 __va_end(va); 1136 } 1137 1138 /* 1139 * Shim cache_fullpath() to handle the case where a process is chrooted into 1140 * a subdirectory of a mount. In this case if the root mount matches the 1141 * process root directory's mount we have to specify the process's root 1142 * directory instead of the mount point, because the mount point might 1143 * be above the root directory. 1144 */ 1145 static 1146 int 1147 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 1148 { 1149 struct nchandle *nch; 1150 1151 if (p && p->p_fd->fd_nrdir.mount == mp) 1152 nch = &p->p_fd->fd_nrdir; 1153 else 1154 nch = &mp->mnt_ncmountpt; 1155 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1156 } 1157 1158 /* 1159 * Sync each mounted filesystem. 1160 */ 1161 1162 #ifdef DEBUG 1163 static int syncprt = 0; 1164 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1165 #endif /* DEBUG */ 1166 1167 static int sync_callback(struct mount *mp, void *data); 1168 1169 int 1170 sys_sync(struct sysmsg *sysmsg, const struct sync_args *uap) 1171 { 1172 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1173 return (0); 1174 } 1175 1176 static 1177 int 1178 sync_callback(struct mount *mp, void *data __unused) 1179 { 1180 int asyncflag; 1181 1182 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1183 lwkt_gettoken(&mp->mnt_token); 1184 asyncflag = mp->mnt_flag & MNT_ASYNC; 1185 mp->mnt_flag &= ~MNT_ASYNC; 1186 lwkt_reltoken(&mp->mnt_token); 1187 vfs_msync(mp, MNT_NOWAIT); 1188 VFS_SYNC(mp, MNT_NOWAIT); 1189 lwkt_gettoken(&mp->mnt_token); 1190 mp->mnt_flag |= asyncflag; 1191 lwkt_reltoken(&mp->mnt_token); 1192 } 1193 return(0); 1194 } 1195 1196 /* XXX PRISON: could be per prison flag */ 1197 static int prison_quotas; 1198 #if 0 1199 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1200 #endif 1201 1202 /* 1203 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1204 * 1205 * Change filesystem quotas. 1206 * 1207 * MPALMOSTSAFE 1208 */ 1209 int 1210 sys_quotactl(struct sysmsg *sysmsg, const struct quotactl_args *uap) 1211 { 1212 struct nlookupdata nd; 1213 struct thread *td; 1214 struct mount *mp; 1215 int error; 1216 1217 td = curthread; 1218 if (td->td_ucred->cr_prison && !prison_quotas) { 1219 error = EPERM; 1220 goto done; 1221 } 1222 1223 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1224 if (error == 0) 1225 error = nlookup(&nd); 1226 if (error == 0) { 1227 mp = nd.nl_nch.mount; 1228 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1229 uap->arg, nd.nl_cred); 1230 } 1231 nlookup_done(&nd); 1232 done: 1233 return (error); 1234 } 1235 1236 /* 1237 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1238 * void *buf, int buflen) 1239 * 1240 * This function operates on a mount point and executes the specified 1241 * operation using the specified control data, and possibly returns data. 1242 * 1243 * The actual number of bytes stored in the result buffer is returned, 0 1244 * if none, otherwise an error is returned. 1245 * 1246 * MPALMOSTSAFE 1247 */ 1248 int 1249 sys_mountctl(struct sysmsg *sysmsg, const struct mountctl_args *uap) 1250 { 1251 struct thread *td = curthread; 1252 struct file *fp; 1253 void *ctl = NULL; 1254 void *buf = NULL; 1255 char *path = NULL; 1256 int error; 1257 1258 /* 1259 * Sanity and permissions checks. We must be root. 1260 */ 1261 if (td->td_ucred->cr_prison != NULL) 1262 return (EPERM); 1263 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1264 (error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) != 0) 1265 { 1266 return (error); 1267 } 1268 1269 /* 1270 * Argument length checks 1271 */ 1272 if (uap->ctllen < 0 || uap->ctllen > 1024) 1273 return (EINVAL); 1274 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1275 return (EINVAL); 1276 if (uap->path == NULL) 1277 return (EINVAL); 1278 1279 /* 1280 * Allocate the necessary buffers and copyin data 1281 */ 1282 path = objcache_get(namei_oc, M_WAITOK); 1283 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1284 if (error) 1285 goto done; 1286 1287 if (uap->ctllen) { 1288 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1289 error = copyin(uap->ctl, ctl, uap->ctllen); 1290 if (error) 1291 goto done; 1292 } 1293 if (uap->buflen) 1294 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1295 1296 /* 1297 * Validate the descriptor 1298 */ 1299 if (uap->fd >= 0) { 1300 fp = holdfp(td, uap->fd, -1); 1301 if (fp == NULL) { 1302 error = EBADF; 1303 goto done; 1304 } 1305 } else { 1306 fp = NULL; 1307 } 1308 1309 /* 1310 * Execute the internal kernel function and clean up. 1311 */ 1312 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1313 buf, uap->buflen, &sysmsg->sysmsg_result); 1314 if (fp) 1315 dropfp(td, uap->fd, fp); 1316 if (error == 0 && sysmsg->sysmsg_result > 0) 1317 error = copyout(buf, uap->buf, sysmsg->sysmsg_result); 1318 done: 1319 if (path) 1320 objcache_put(namei_oc, path); 1321 if (ctl) 1322 kfree(ctl, M_TEMP); 1323 if (buf) 1324 kfree(buf, M_TEMP); 1325 return (error); 1326 } 1327 1328 /* 1329 * Execute a mount control operation by resolving the path to a mount point 1330 * and calling vop_mountctl(). 1331 * 1332 * Use the mount point from the nch instead of the vnode so nullfs mounts 1333 * can properly spike the VOP. 1334 */ 1335 int 1336 kern_mountctl(const char *path, int op, struct file *fp, 1337 const void *ctl, int ctllen, 1338 void *buf, int buflen, int *res) 1339 { 1340 struct vnode *vp; 1341 struct nlookupdata nd; 1342 struct nchandle nch; 1343 struct mount *mp; 1344 int error; 1345 1346 *res = 0; 1347 vp = NULL; 1348 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1349 if (error) 1350 return (error); 1351 error = nlookup(&nd); 1352 if (error) { 1353 nlookup_done(&nd); 1354 return (error); 1355 } 1356 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1357 if (error) { 1358 nlookup_done(&nd); 1359 return (error); 1360 } 1361 1362 /* 1363 * Yes, all this is needed to use the nch.mount below, because 1364 * we must maintain a ref on the mount to avoid ripouts (e.g. 1365 * due to heavy mount/unmount use by synth or poudriere). 1366 */ 1367 nch = nd.nl_nch; 1368 cache_zero(&nd.nl_nch); 1369 cache_unlock(&nch); 1370 nlookup_done(&nd); 1371 vn_unlock(vp); 1372 1373 mp = nch.mount; 1374 1375 /* 1376 * Must be the root of the filesystem 1377 */ 1378 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1379 cache_drop(&nch); 1380 vrele(vp); 1381 return (EINVAL); 1382 } 1383 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1384 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1385 path); 1386 cache_drop(&nch); 1387 vrele(vp); 1388 return (EINVAL); 1389 } 1390 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1391 buf, buflen, res); 1392 vrele(vp); 1393 cache_drop(&nch); 1394 1395 return (error); 1396 } 1397 1398 int 1399 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1400 { 1401 struct thread *td = curthread; 1402 struct proc *p = td->td_proc; 1403 struct mount *mp; 1404 struct statfs *sp; 1405 char *fullpath, *freepath; 1406 int error; 1407 1408 if ((error = nlookup(nd)) != 0) 1409 return (error); 1410 mp = nd->nl_nch.mount; 1411 sp = &mp->mnt_stat; 1412 1413 /* 1414 * Ignore refresh error, user should have visibility. 1415 * This can happen if a NFS mount goes bad (e.g. server 1416 * revokes perms or goes down). 1417 */ 1418 error = VFS_STATFS(mp, sp, nd->nl_cred); 1419 /* ignore error */ 1420 1421 error = mount_path(p, mp, &fullpath, &freepath); 1422 if (error) 1423 return(error); 1424 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1425 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1426 kfree(freepath, M_TEMP); 1427 1428 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1429 bcopy(sp, buf, sizeof(*buf)); 1430 /* Only root should have access to the fsid's. */ 1431 if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) 1432 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1433 return (0); 1434 } 1435 1436 /* 1437 * statfs_args(char *path, struct statfs *buf) 1438 * 1439 * Get filesystem statistics. 1440 */ 1441 int 1442 sys_statfs(struct sysmsg *sysmsg, const struct statfs_args *uap) 1443 { 1444 struct nlookupdata nd; 1445 struct statfs buf; 1446 int error; 1447 1448 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1449 if (error == 0) 1450 error = kern_statfs(&nd, &buf); 1451 nlookup_done(&nd); 1452 if (error == 0) 1453 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1454 return (error); 1455 } 1456 1457 int 1458 kern_fstatfs(int fd, struct statfs *buf) 1459 { 1460 struct thread *td = curthread; 1461 struct proc *p = td->td_proc; 1462 struct file *fp; 1463 struct mount *mp; 1464 struct statfs *sp; 1465 char *fullpath, *freepath; 1466 int error; 1467 1468 KKASSERT(p); 1469 if ((error = holdvnode(td, fd, &fp)) != 0) 1470 return (error); 1471 1472 /* 1473 * Try to use mount info from any overlays rather than the 1474 * mount info for the underlying vnode, otherwise we will 1475 * fail when operating on null-mounted paths inside a chroot. 1476 */ 1477 if ((mp = fp->f_nchandle.mount) == NULL) 1478 mp = ((struct vnode *)fp->f_data)->v_mount; 1479 if (mp == NULL) { 1480 error = EBADF; 1481 goto done; 1482 } 1483 if (fp->f_cred == NULL) { 1484 error = EINVAL; 1485 goto done; 1486 } 1487 1488 /* 1489 * Ignore refresh error, user should have visibility. 1490 * This can happen if a NFS mount goes bad (e.g. server 1491 * revokes perms or goes down). 1492 */ 1493 sp = &mp->mnt_stat; 1494 error = VFS_STATFS(mp, sp, fp->f_cred); 1495 1496 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1497 goto done; 1498 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1499 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1500 kfree(freepath, M_TEMP); 1501 1502 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1503 bcopy(sp, buf, sizeof(*buf)); 1504 1505 /* Only root should have access to the fsid's. */ 1506 if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) 1507 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1508 error = 0; 1509 done: 1510 fdrop(fp); 1511 return (error); 1512 } 1513 1514 /* 1515 * fstatfs_args(int fd, struct statfs *buf) 1516 * 1517 * Get filesystem statistics. 1518 */ 1519 int 1520 sys_fstatfs(struct sysmsg *sysmsg, const struct fstatfs_args *uap) 1521 { 1522 struct statfs buf; 1523 int error; 1524 1525 error = kern_fstatfs(uap->fd, &buf); 1526 1527 if (error == 0) 1528 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1529 return (error); 1530 } 1531 1532 int 1533 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1534 { 1535 struct mount *mp; 1536 struct statvfs *sp; 1537 int error; 1538 1539 if ((error = nlookup(nd)) != 0) 1540 return (error); 1541 mp = nd->nl_nch.mount; 1542 sp = &mp->mnt_vstat; 1543 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1544 return (error); 1545 1546 sp->f_flag = 0; 1547 if (mp->mnt_flag & MNT_RDONLY) 1548 sp->f_flag |= ST_RDONLY; 1549 if (mp->mnt_flag & MNT_NOSUID) 1550 sp->f_flag |= ST_NOSUID; 1551 bcopy(sp, buf, sizeof(*buf)); 1552 return (0); 1553 } 1554 1555 /* 1556 * statfs_args(char *path, struct statfs *buf) 1557 * 1558 * Get filesystem statistics. 1559 */ 1560 int 1561 sys_statvfs(struct sysmsg *sysmsg, const struct statvfs_args *uap) 1562 { 1563 struct nlookupdata nd; 1564 struct statvfs buf; 1565 int error; 1566 1567 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1568 if (error == 0) 1569 error = kern_statvfs(&nd, &buf); 1570 nlookup_done(&nd); 1571 if (error == 0) 1572 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1573 return (error); 1574 } 1575 1576 int 1577 kern_fstatvfs(int fd, struct statvfs *buf) 1578 { 1579 struct thread *td = curthread; 1580 struct file *fp; 1581 struct mount *mp; 1582 struct statvfs *sp; 1583 int error; 1584 1585 if ((error = holdvnode(td, fd, &fp)) != 0) 1586 return (error); 1587 if ((mp = fp->f_nchandle.mount) == NULL) 1588 mp = ((struct vnode *)fp->f_data)->v_mount; 1589 if (mp == NULL) { 1590 error = EBADF; 1591 goto done; 1592 } 1593 if (fp->f_cred == NULL) { 1594 error = EINVAL; 1595 goto done; 1596 } 1597 sp = &mp->mnt_vstat; 1598 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1599 goto done; 1600 1601 sp->f_flag = 0; 1602 if (mp->mnt_flag & MNT_RDONLY) 1603 sp->f_flag |= ST_RDONLY; 1604 if (mp->mnt_flag & MNT_NOSUID) 1605 sp->f_flag |= ST_NOSUID; 1606 1607 bcopy(sp, buf, sizeof(*buf)); 1608 error = 0; 1609 done: 1610 fdrop(fp); 1611 return (error); 1612 } 1613 1614 /* 1615 * fstatfs_args(int fd, struct statfs *buf) 1616 * 1617 * Get filesystem statistics. 1618 */ 1619 int 1620 sys_fstatvfs(struct sysmsg *sysmsg, const struct fstatvfs_args *uap) 1621 { 1622 struct statvfs buf; 1623 int error; 1624 1625 error = kern_fstatvfs(uap->fd, &buf); 1626 1627 if (error == 0) 1628 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1629 return (error); 1630 } 1631 1632 /* 1633 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1634 * 1635 * Get statistics on all filesystems. 1636 */ 1637 1638 struct getfsstat_info { 1639 struct statfs *sfsp; 1640 long count; 1641 long maxcount; 1642 int error; 1643 int flags; 1644 struct thread *td; 1645 }; 1646 1647 static int getfsstat_callback(struct mount *, void *); 1648 1649 int 1650 sys_getfsstat(struct sysmsg *sysmsg, const struct getfsstat_args *uap) 1651 { 1652 struct thread *td = curthread; 1653 struct getfsstat_info info; 1654 1655 bzero(&info, sizeof(info)); 1656 1657 info.maxcount = uap->bufsize / sizeof(struct statfs); 1658 info.sfsp = uap->buf; 1659 info.count = 0; 1660 info.flags = uap->flags; 1661 info.td = td; 1662 1663 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1664 if (info.sfsp && info.count > info.maxcount) 1665 sysmsg->sysmsg_result = info.maxcount; 1666 else 1667 sysmsg->sysmsg_result = info.count; 1668 return (info.error); 1669 } 1670 1671 static int 1672 getfsstat_callback(struct mount *mp, void *data) 1673 { 1674 struct getfsstat_info *info = data; 1675 struct statfs *sp; 1676 char *freepath; 1677 char *fullpath; 1678 int error; 1679 1680 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1681 return(0); 1682 1683 if (info->sfsp && info->count < info->maxcount) { 1684 sp = &mp->mnt_stat; 1685 1686 /* 1687 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1688 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1689 * overrides MNT_WAIT. 1690 * 1691 * Ignore refresh error, user should have visibility. 1692 * This can happen if a NFS mount goes bad (e.g. server 1693 * revokes perms or goes down). 1694 */ 1695 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1696 (info->flags & MNT_WAIT)) && 1697 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1698 /* ignore error */ 1699 } 1700 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1701 1702 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1703 if (error) { 1704 info->error = error; 1705 return(-1); 1706 } 1707 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1708 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1709 kfree(freepath, M_TEMP); 1710 1711 error = copyout(sp, info->sfsp, sizeof(*sp)); 1712 if (error) { 1713 info->error = error; 1714 return (-1); 1715 } 1716 ++info->sfsp; 1717 } 1718 info->count++; 1719 return(0); 1720 } 1721 1722 /* 1723 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1724 long bufsize, int flags) 1725 * 1726 * Get statistics on all filesystems. 1727 */ 1728 1729 struct getvfsstat_info { 1730 struct statfs *sfsp; 1731 struct statvfs *vsfsp; 1732 long count; 1733 long maxcount; 1734 int error; 1735 int flags; 1736 struct thread *td; 1737 }; 1738 1739 static int getvfsstat_callback(struct mount *, void *); 1740 1741 int 1742 sys_getvfsstat(struct sysmsg *sysmsg, const struct getvfsstat_args *uap) 1743 { 1744 struct thread *td = curthread; 1745 struct getvfsstat_info info; 1746 1747 bzero(&info, sizeof(info)); 1748 1749 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1750 info.sfsp = uap->buf; 1751 info.vsfsp = uap->vbuf; 1752 info.count = 0; 1753 info.flags = uap->flags; 1754 info.td = td; 1755 1756 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1757 if (info.vsfsp && info.count > info.maxcount) 1758 sysmsg->sysmsg_result = info.maxcount; 1759 else 1760 sysmsg->sysmsg_result = info.count; 1761 return (info.error); 1762 } 1763 1764 static int 1765 getvfsstat_callback(struct mount *mp, void *data) 1766 { 1767 struct getvfsstat_info *info = data; 1768 struct statfs *sp; 1769 struct statvfs *vsp; 1770 char *freepath; 1771 char *fullpath; 1772 int error; 1773 1774 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1775 return(0); 1776 1777 if (info->vsfsp && info->count < info->maxcount) { 1778 sp = &mp->mnt_stat; 1779 vsp = &mp->mnt_vstat; 1780 1781 /* 1782 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1783 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1784 * overrides MNT_WAIT. 1785 * 1786 * Ignore refresh error, user should have visibility. 1787 * This can happen if a NFS mount goes bad (e.g. server 1788 * revokes perms or goes down). 1789 */ 1790 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1791 (info->flags & MNT_WAIT)) && 1792 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1793 /* ignore error */ 1794 } 1795 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1796 1797 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1798 (info->flags & MNT_WAIT)) && 1799 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1800 /* ignore error */ 1801 } 1802 vsp->f_flag = 0; 1803 if (mp->mnt_flag & MNT_RDONLY) 1804 vsp->f_flag |= ST_RDONLY; 1805 if (mp->mnt_flag & MNT_NOSUID) 1806 vsp->f_flag |= ST_NOSUID; 1807 1808 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1809 if (error) { 1810 info->error = error; 1811 return(-1); 1812 } 1813 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1814 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1815 kfree(freepath, M_TEMP); 1816 1817 error = copyout(sp, info->sfsp, sizeof(*sp)); 1818 if (error == 0) 1819 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1820 if (error) { 1821 info->error = error; 1822 return (-1); 1823 } 1824 ++info->sfsp; 1825 ++info->vsfsp; 1826 } 1827 info->count++; 1828 return(0); 1829 } 1830 1831 1832 /* 1833 * fchdir_args(int fd) 1834 * 1835 * Change current working directory to a given file descriptor. 1836 */ 1837 int 1838 sys_fchdir(struct sysmsg *sysmsg, const struct fchdir_args *uap) 1839 { 1840 struct thread *td = curthread; 1841 struct proc *p = td->td_proc; 1842 struct filedesc *fdp = p->p_fd; 1843 struct vnode *vp, *ovp; 1844 struct mount *mp; 1845 struct file *fp; 1846 struct nchandle nch, onch, tnch; 1847 int error; 1848 1849 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1850 return (error); 1851 lwkt_gettoken(&p->p_token); 1852 vp = (struct vnode *)fp->f_data; 1853 vref(vp); 1854 vn_lock(vp, LK_SHARED | LK_RETRY); 1855 if (fp->f_nchandle.ncp == NULL) 1856 error = ENOTDIR; 1857 else 1858 error = checkvp_chdir(vp, td); 1859 if (error) { 1860 vput(vp); 1861 goto done; 1862 } 1863 cache_copy(&fp->f_nchandle, &nch); 1864 1865 /* 1866 * If the ncp has become a mount point, traverse through 1867 * the mount point. 1868 */ 1869 1870 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1871 (mp = cache_findmount(&nch)) != NULL 1872 ) { 1873 error = nlookup_mp(mp, &tnch); 1874 if (error == 0) { 1875 cache_unlock(&tnch); /* leave ref intact */ 1876 vput(vp); 1877 vp = tnch.ncp->nc_vp; 1878 error = vget(vp, LK_SHARED); 1879 KKASSERT(error == 0); 1880 cache_drop(&nch); 1881 nch = tnch; 1882 } 1883 cache_dropmount(mp); 1884 } 1885 if (error == 0) { 1886 spin_lock(&fdp->fd_spin); 1887 ovp = fdp->fd_cdir; 1888 onch = fdp->fd_ncdir; 1889 fdp->fd_cdir = vp; 1890 fdp->fd_ncdir = nch; 1891 spin_unlock(&fdp->fd_spin); 1892 vn_unlock(vp); /* leave ref intact */ 1893 cache_drop(&onch); 1894 vrele(ovp); 1895 } else { 1896 cache_drop(&nch); 1897 vput(vp); 1898 } 1899 fdrop(fp); 1900 done: 1901 lwkt_reltoken(&p->p_token); 1902 return (error); 1903 } 1904 1905 int 1906 kern_chdir(struct nlookupdata *nd) 1907 { 1908 struct thread *td = curthread; 1909 struct proc *p = td->td_proc; 1910 struct filedesc *fdp = p->p_fd; 1911 struct vnode *vp, *ovp; 1912 struct nchandle onch; 1913 int error; 1914 1915 nd->nl_flags |= NLC_SHAREDLOCK; 1916 if ((error = nlookup(nd)) != 0) 1917 return (error); 1918 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1919 return (ENOENT); 1920 if ((error = vget(vp, LK_SHARED)) != 0) 1921 return (error); 1922 1923 lwkt_gettoken(&p->p_token); 1924 error = checkvp_chdir(vp, td); 1925 vn_unlock(vp); 1926 if (error == 0) { 1927 spin_lock(&fdp->fd_spin); 1928 ovp = fdp->fd_cdir; 1929 onch = fdp->fd_ncdir; 1930 fdp->fd_ncdir = nd->nl_nch; 1931 fdp->fd_cdir = vp; 1932 spin_unlock(&fdp->fd_spin); 1933 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1934 cache_drop(&onch); 1935 vrele(ovp); 1936 cache_zero(&nd->nl_nch); 1937 } else { 1938 vrele(vp); 1939 } 1940 lwkt_reltoken(&p->p_token); 1941 return (error); 1942 } 1943 1944 /* 1945 * chdir_args(char *path) 1946 * 1947 * Change current working directory (``.''). 1948 */ 1949 int 1950 sys_chdir(struct sysmsg *sysmsg, const struct chdir_args *uap) 1951 { 1952 struct nlookupdata nd; 1953 int error; 1954 1955 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1956 if (error == 0) 1957 error = kern_chdir(&nd); 1958 nlookup_done(&nd); 1959 return (error); 1960 } 1961 1962 /* 1963 * Helper function for raised chroot(2) security function: Refuse if 1964 * any filedescriptors are open directories. 1965 */ 1966 static int 1967 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1968 { 1969 struct vnode *vp; 1970 struct file *fp; 1971 int error; 1972 int fd; 1973 1974 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1975 if ((error = holdvnode(td, fd, &fp)) != 0) 1976 continue; 1977 vp = (struct vnode *)fp->f_data; 1978 if (vp->v_type != VDIR) { 1979 fdrop(fp); 1980 continue; 1981 } 1982 fdrop(fp); 1983 return(EPERM); 1984 } 1985 return (0); 1986 } 1987 1988 /* 1989 * This sysctl determines if we will allow a process to chroot(2) if it 1990 * has a directory open: 1991 * 0: disallowed for all processes. 1992 * 1: allowed for processes that were not already chroot(2)'ed. 1993 * 2: allowed for all processes. 1994 */ 1995 1996 static int chroot_allow_open_directories = 1; 1997 1998 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1999 &chroot_allow_open_directories, 0, ""); 2000 2001 /* 2002 * chroot to the specified namecache entry. We obtain the vp from the 2003 * namecache data. The passed ncp must be locked and referenced and will 2004 * remain locked and referenced on return. 2005 */ 2006 int 2007 kern_chroot(struct nchandle *nch) 2008 { 2009 struct thread *td = curthread; 2010 struct proc *p = td->td_proc; 2011 struct filedesc *fdp = p->p_fd; 2012 struct vnode *vp; 2013 int error; 2014 2015 /* 2016 * Only privileged user can chroot 2017 */ 2018 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHROOT); 2019 if (error) 2020 return (error); 2021 2022 /* 2023 * Disallow open directory descriptors (fchdir() breakouts). 2024 */ 2025 if (chroot_allow_open_directories == 0 || 2026 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 2027 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 2028 return (error); 2029 } 2030 if ((vp = nch->ncp->nc_vp) == NULL) 2031 return (ENOENT); 2032 2033 if ((error = vget(vp, LK_SHARED)) != 0) 2034 return (error); 2035 2036 /* 2037 * Check the validity of vp as a directory to change to and 2038 * associate it with rdir/jdir. 2039 */ 2040 error = checkvp_chdir(vp, td); 2041 vn_unlock(vp); /* leave reference intact */ 2042 if (error == 0) { 2043 lwkt_gettoken(&p->p_token); 2044 vrele(fdp->fd_rdir); 2045 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 2046 cache_drop(&fdp->fd_nrdir); 2047 cache_copy(nch, &fdp->fd_nrdir); 2048 if (fdp->fd_jdir == NULL) { 2049 fdp->fd_jdir = vp; 2050 vref(fdp->fd_jdir); 2051 cache_copy(nch, &fdp->fd_njdir); 2052 } 2053 if ((p->p_flags & P_DIDCHROOT) == 0) { 2054 p->p_flags |= P_DIDCHROOT; 2055 if (p->p_depth <= 65535 - 32) 2056 p->p_depth += 32; 2057 } 2058 lwkt_reltoken(&p->p_token); 2059 } else { 2060 vrele(vp); 2061 } 2062 return (error); 2063 } 2064 2065 /* 2066 * chroot_args(char *path) 2067 * 2068 * Change notion of root (``/'') directory. 2069 */ 2070 int 2071 sys_chroot(struct sysmsg *sysmsg, const struct chroot_args *uap) 2072 { 2073 struct thread *td __debugvar = curthread; 2074 struct nlookupdata nd; 2075 int error; 2076 2077 KKASSERT(td->td_proc); 2078 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2079 if (error == 0) { 2080 nd.nl_flags |= NLC_EXEC; 2081 error = nlookup(&nd); 2082 if (error == 0) 2083 error = kern_chroot(&nd.nl_nch); 2084 } 2085 nlookup_done(&nd); 2086 return(error); 2087 } 2088 2089 int 2090 sys_chroot_kernel(struct sysmsg *sysmsg, const struct chroot_kernel_args *uap) 2091 { 2092 struct thread *td = curthread; 2093 struct nlookupdata nd; 2094 struct nchandle *nch; 2095 struct vnode *vp; 2096 int error; 2097 2098 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2099 if (error) 2100 goto error_nond; 2101 2102 error = nlookup(&nd); 2103 if (error) 2104 goto error_out; 2105 2106 nch = &nd.nl_nch; 2107 2108 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHROOT); 2109 if (error) 2110 goto error_out; 2111 2112 if ((vp = nch->ncp->nc_vp) == NULL) { 2113 error = ENOENT; 2114 goto error_out; 2115 } 2116 2117 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 2118 goto error_out; 2119 2120 vfs_cache_setroot(vp, cache_hold(nch)); 2121 2122 error_out: 2123 nlookup_done(&nd); 2124 error_nond: 2125 return(error); 2126 } 2127 2128 /* 2129 * Common routine for chroot and chdir. Given a locked, referenced vnode, 2130 * determine whether it is legal to chdir to the vnode. The vnode's state 2131 * is not changed by this call. 2132 */ 2133 static int 2134 checkvp_chdir(struct vnode *vp, struct thread *td) 2135 { 2136 int error; 2137 2138 if (vp->v_type != VDIR) 2139 error = ENOTDIR; 2140 else 2141 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 2142 return (error); 2143 } 2144 2145 int 2146 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 2147 { 2148 struct thread *td = curthread; 2149 struct proc *p = td->td_proc; 2150 struct lwp *lp = td->td_lwp; 2151 struct filedesc *fdp = p->p_fd; 2152 int cmode, flags; 2153 struct file *nfp; 2154 struct file *fp; 2155 int type, indx, error = 0; 2156 struct flock lf; 2157 2158 if ((oflags & O_ACCMODE) == O_ACCMODE) 2159 return (EINVAL); 2160 flags = FFLAGS(oflags); 2161 error = falloc(lp, &nfp, NULL); 2162 if (error) 2163 return (error); 2164 fp = nfp; 2165 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2166 2167 /* 2168 * Call vn_open() to do the lookup and assign the vnode to the 2169 * file pointer. vn_open() does not change the ref count on fp 2170 * and the vnode, on success, will be inherited by the file pointer 2171 * and unlocked. 2172 * 2173 * Request a shared lock on the vnode if possible. 2174 * 2175 * When NLC_SHAREDLOCK is set we may still need an exclusive vnode 2176 * lock for O_RDWR opens on executables in order to avoid a VTEXT 2177 * detection race. The NLC_EXCLLOCK_IFEXEC handles this case. 2178 * 2179 * NOTE: We need a flag to separate terminal vnode locking from 2180 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2181 * and O_RDWR only need to lock the terminal vnode exclusively. 2182 */ 2183 nd->nl_flags |= NLC_LOCKVP; 2184 if ((flags & (O_CREAT|O_TRUNC)) == 0) { 2185 nd->nl_flags |= NLC_SHAREDLOCK; 2186 if (flags & O_RDWR) 2187 nd->nl_flags |= NLC_EXCLLOCK_IFEXEC; 2188 } 2189 2190 /* 2191 * Issue the vn_open, passing in the referenced fp. the vn_open() 2192 * is allowed to replace fp by fdrop()ing it and returning its own 2193 * referenced fp. 2194 */ 2195 nfp = fp; 2196 error = vn_open(nd, &nfp, flags, cmode); 2197 fp = nfp; 2198 nlookup_done(nd); 2199 2200 /* 2201 * Deal with any error condition 2202 */ 2203 if (error) { 2204 fdrop(fp); /* our ref */ 2205 if (error == ERESTART) 2206 error = EINTR; 2207 return (error); 2208 } 2209 2210 /* 2211 * Reserve a file descriptor. 2212 */ 2213 if ((error = fdalloc(p, 0, &indx)) != 0) { 2214 fdrop(fp); 2215 return (error); 2216 } 2217 2218 /* 2219 * Handle advisory lock flags. This is only supported with vnodes. 2220 * For things like /dev/fd/N we might not actually get a vnode. 2221 */ 2222 if ((flags & (O_EXLOCK | O_SHLOCK)) && fp->f_type == DTYPE_VNODE) { 2223 struct vnode *vp; 2224 2225 vp = (struct vnode *)fp->f_data; 2226 vref(vp); 2227 2228 lf.l_whence = SEEK_SET; 2229 lf.l_start = 0; 2230 lf.l_len = 0; 2231 if (flags & O_EXLOCK) 2232 lf.l_type = F_WRLCK; 2233 else 2234 lf.l_type = F_RDLCK; 2235 if (flags & FNONBLOCK) 2236 type = 0; 2237 else 2238 type = F_WAIT; 2239 2240 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type); 2241 if (error) { 2242 /* 2243 * lock request failed. Clean up the reserved 2244 * descriptor. 2245 */ 2246 vrele(vp); 2247 fsetfd(fdp, NULL, indx); 2248 fdrop(fp); 2249 return (error); 2250 } 2251 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2252 vrele(vp); 2253 } 2254 2255 /* 2256 * release our private reference, leaving the one associated with the 2257 * descriptor table intact. 2258 */ 2259 if (oflags & O_CLOEXEC) 2260 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2261 fsetfd(fdp, fp, indx); 2262 fdrop(fp); 2263 *res = indx; 2264 2265 return (error); 2266 } 2267 2268 /* 2269 * open_args(char *path, int flags, int mode) 2270 * 2271 * Check permissions, allocate an open file structure, 2272 * and call the device open routine if any. 2273 */ 2274 int 2275 sys_open(struct sysmsg *sysmsg, const struct open_args *uap) 2276 { 2277 struct nlookupdata nd; 2278 int error; 2279 2280 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2281 if (error == 0) { 2282 error = kern_open(&nd, uap->flags, 2283 uap->mode, &sysmsg->sysmsg_result); 2284 } 2285 nlookup_done(&nd); 2286 return (error); 2287 } 2288 2289 /* 2290 * openat_args(int fd, char *path, int flags, int mode) 2291 */ 2292 int 2293 sys_openat(struct sysmsg *sysmsg, const struct openat_args *uap) 2294 { 2295 struct nlookupdata nd; 2296 int error; 2297 struct file *fp; 2298 2299 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2300 if (error == 0) { 2301 error = kern_open(&nd, uap->flags, uap->mode, 2302 &sysmsg->sysmsg_result); 2303 } 2304 nlookup_done_at(&nd, fp); 2305 return (error); 2306 } 2307 2308 int 2309 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2310 { 2311 struct thread *td = curthread; 2312 struct proc *p = td->td_proc; 2313 struct vnode *vp; 2314 struct vattr vattr; 2315 int error; 2316 int whiteout = 0; 2317 2318 KKASSERT(p); 2319 2320 VATTR_NULL(&vattr); 2321 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2322 vattr.va_rmajor = rmajor; 2323 vattr.va_rminor = rminor; 2324 2325 switch (mode & S_IFMT) { 2326 case S_IFMT: /* used by badsect to flag bad sectors */ 2327 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_BAD); 2328 vattr.va_type = VBAD; 2329 break; 2330 case S_IFCHR: 2331 error = caps_priv_check_td(td, SYSCAP_NOVFS_MKNOD_DEV); 2332 vattr.va_type = VCHR; 2333 break; 2334 case S_IFBLK: 2335 error = caps_priv_check_td(td, SYSCAP_NOVFS_MKNOD_DEV); 2336 vattr.va_type = VBLK; 2337 break; 2338 case S_IFWHT: 2339 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_WHT); 2340 whiteout = 1; 2341 break; 2342 case S_IFDIR: /* special directories support for HAMMER */ 2343 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_DIR); 2344 vattr.va_type = VDIR; 2345 break; 2346 case S_IFIFO: 2347 return (kern_mkfifo(nd, mode)); 2348 break; 2349 default: 2350 error = EINVAL; 2351 break; 2352 } 2353 2354 if (error) 2355 return (error); 2356 2357 bwillinode(1); 2358 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2359 if ((error = nlookup(nd)) != 0) 2360 return (error); 2361 if (nd->nl_nch.ncp->nc_vp) 2362 return (EEXIST); 2363 if (nd->nl_dvp == NULL) 2364 return (EINVAL); 2365 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2366 return (error); 2367 2368 if (whiteout) { 2369 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2370 nd->nl_cred, NAMEI_CREATE); 2371 } else { 2372 vp = NULL; 2373 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2374 &vp, nd->nl_cred, &vattr); 2375 if (error == 0) 2376 vput(vp); 2377 } 2378 return (error); 2379 } 2380 2381 /* 2382 * mknod_args(char *path, int mode, int dev) 2383 * 2384 * Create a special file. 2385 */ 2386 int 2387 sys_mknod(struct sysmsg *sysmsg, const struct mknod_args *uap) 2388 { 2389 struct nlookupdata nd; 2390 int error; 2391 2392 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2393 if (error == 0) { 2394 error = kern_mknod(&nd, uap->mode, 2395 umajor(uap->dev), uminor(uap->dev)); 2396 } 2397 nlookup_done(&nd); 2398 return (error); 2399 } 2400 2401 /* 2402 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2403 * 2404 * Create a special file. The path is relative to the directory associated 2405 * with fd. 2406 */ 2407 int 2408 sys_mknodat(struct sysmsg *sysmsg, const struct mknodat_args *uap) 2409 { 2410 struct nlookupdata nd; 2411 struct file *fp; 2412 int error; 2413 2414 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2415 if (error == 0) { 2416 error = kern_mknod(&nd, uap->mode, 2417 umajor(uap->dev), uminor(uap->dev)); 2418 } 2419 nlookup_done_at(&nd, fp); 2420 return (error); 2421 } 2422 2423 int 2424 kern_mkfifo(struct nlookupdata *nd, int mode) 2425 { 2426 struct thread *td = curthread; 2427 struct proc *p = td->td_proc; 2428 struct vattr vattr; 2429 struct vnode *vp; 2430 int error; 2431 2432 bwillinode(1); 2433 2434 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2435 if ((error = nlookup(nd)) != 0) 2436 return (error); 2437 if (nd->nl_nch.ncp->nc_vp) 2438 return (EEXIST); 2439 if (nd->nl_dvp == NULL) 2440 return (EINVAL); 2441 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2442 return (error); 2443 2444 VATTR_NULL(&vattr); 2445 vattr.va_type = VFIFO; 2446 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2447 vp = NULL; 2448 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2449 if (error == 0) 2450 vput(vp); 2451 return (error); 2452 } 2453 2454 /* 2455 * mkfifo_args(char *path, int mode) 2456 * 2457 * Create a named pipe. 2458 */ 2459 int 2460 sys_mkfifo(struct sysmsg *sysmsg, const struct mkfifo_args *uap) 2461 { 2462 struct nlookupdata nd; 2463 int error; 2464 2465 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2466 if (error == 0) 2467 error = kern_mkfifo(&nd, uap->mode); 2468 nlookup_done(&nd); 2469 return (error); 2470 } 2471 2472 /* 2473 * mkfifoat_args(int fd, char *path, mode_t mode) 2474 * 2475 * Create a named pipe. The path is relative to the directory associated 2476 * with fd. 2477 */ 2478 int 2479 sys_mkfifoat(struct sysmsg *sysmsg, const struct mkfifoat_args *uap) 2480 { 2481 struct nlookupdata nd; 2482 struct file *fp; 2483 int error; 2484 2485 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2486 if (error == 0) 2487 error = kern_mkfifo(&nd, uap->mode); 2488 nlookup_done_at(&nd, fp); 2489 return (error); 2490 } 2491 2492 static int hardlink_check_uid = 0; 2493 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2494 &hardlink_check_uid, 0, 2495 "Unprivileged processes cannot create hard links to files owned by other " 2496 "users"); 2497 static int hardlink_check_gid = 0; 2498 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2499 &hardlink_check_gid, 0, 2500 "Unprivileged processes cannot create hard links to files owned by other " 2501 "groups"); 2502 2503 static int 2504 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2505 { 2506 struct vattr va; 2507 int error; 2508 2509 /* 2510 * Shortcut if disabled 2511 */ 2512 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2513 return (0); 2514 2515 /* 2516 * Privileged user can always hardlink 2517 */ 2518 if (caps_priv_check(cred, SYSCAP_NOVFS_LINK) == 0) 2519 return (0); 2520 2521 /* 2522 * Otherwise only if the originating file is owned by the 2523 * same user or group. Note that any group is allowed if 2524 * the file is owned by the caller. 2525 */ 2526 error = VOP_GETATTR(vp, &va); 2527 if (error != 0) 2528 return (error); 2529 2530 if (hardlink_check_uid) { 2531 if (cred->cr_uid != va.va_uid) 2532 return (EPERM); 2533 } 2534 2535 if (hardlink_check_gid) { 2536 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2537 return (EPERM); 2538 } 2539 2540 return (0); 2541 } 2542 2543 int 2544 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2545 { 2546 struct thread *td = curthread; 2547 struct vnode *vp; 2548 int error; 2549 2550 /* 2551 * Lookup the source and obtained a locked vnode. 2552 * 2553 * You may only hardlink a file which you have write permission 2554 * on or which you own. 2555 * 2556 * XXX relookup on vget failure / race ? 2557 */ 2558 bwillinode(1); 2559 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2560 if ((error = nlookup(nd)) != 0) 2561 return (error); 2562 vp = nd->nl_nch.ncp->nc_vp; 2563 KKASSERT(vp != NULL); 2564 if (vp->v_type == VDIR) 2565 return (EPERM); /* POSIX */ 2566 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2567 return (error); 2568 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2569 return (error); 2570 2571 /* 2572 * Unlock the source so we can lookup the target without deadlocking 2573 * (XXX vp is locked already, possible other deadlock?). The target 2574 * must not exist. 2575 */ 2576 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2577 nd->nl_flags &= ~NLC_NCPISLOCKED; 2578 cache_unlock(&nd->nl_nch); 2579 vn_unlock(vp); 2580 2581 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2582 if ((error = nlookup(linknd)) != 0) { 2583 vrele(vp); 2584 return (error); 2585 } 2586 if (linknd->nl_nch.ncp->nc_vp) { 2587 vrele(vp); 2588 return (EEXIST); 2589 } 2590 if (linknd->nl_dvp == NULL) { 2591 vrele(vp); 2592 return (EINVAL); 2593 } 2594 VFS_MODIFYING(vp->v_mount); 2595 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2596 if (error) { 2597 vrele(vp); 2598 return (error); 2599 } 2600 2601 /* 2602 * Finally run the new API VOP. 2603 */ 2604 error = can_hardlink(vp, td, td->td_ucred); 2605 if (error == 0) { 2606 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2607 vp, linknd->nl_cred); 2608 } 2609 vput(vp); 2610 return (error); 2611 } 2612 2613 /* 2614 * link_args(char *path, char *link) 2615 * 2616 * Make a hard file link. 2617 */ 2618 int 2619 sys_link(struct sysmsg *sysmsg, const struct link_args *uap) 2620 { 2621 struct nlookupdata nd, linknd; 2622 int error; 2623 2624 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2625 if (error == 0) { 2626 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2627 if (error == 0) 2628 error = kern_link(&nd, &linknd); 2629 nlookup_done(&linknd); 2630 } 2631 nlookup_done(&nd); 2632 return (error); 2633 } 2634 2635 /* 2636 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2637 * 2638 * Make a hard file link. The path1 argument is relative to the directory 2639 * associated with fd1, and similarly the path2 argument is relative to 2640 * the directory associated with fd2. 2641 */ 2642 int 2643 sys_linkat(struct sysmsg *sysmsg, const struct linkat_args *uap) 2644 { 2645 struct nlookupdata nd, linknd; 2646 struct file *fp1, *fp2; 2647 int error; 2648 2649 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2650 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2651 if (error == 0) { 2652 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2653 uap->path2, UIO_USERSPACE, 0); 2654 if (error == 0) 2655 error = kern_link(&nd, &linknd); 2656 nlookup_done_at(&linknd, fp2); 2657 } 2658 nlookup_done_at(&nd, fp1); 2659 return (error); 2660 } 2661 2662 int 2663 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2664 { 2665 struct vattr vattr; 2666 struct vnode *vp; 2667 struct vnode *dvp; 2668 int error; 2669 2670 bwillinode(1); 2671 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2672 if ((error = nlookup(nd)) != 0) 2673 return (error); 2674 if (nd->nl_nch.ncp->nc_vp) 2675 return (EEXIST); 2676 if (nd->nl_dvp == NULL) 2677 return (EINVAL); 2678 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2679 return (error); 2680 dvp = nd->nl_dvp; 2681 VATTR_NULL(&vattr); 2682 vattr.va_mode = mode; 2683 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2684 if (error == 0) 2685 vput(vp); 2686 return (error); 2687 } 2688 2689 /* 2690 * symlink(char *path, char *link) 2691 * 2692 * Make a symbolic link. 2693 */ 2694 int 2695 sys_symlink(struct sysmsg *sysmsg, const struct symlink_args *uap) 2696 { 2697 struct thread *td = curthread; 2698 struct nlookupdata nd; 2699 char *path; 2700 int error; 2701 int mode; 2702 2703 path = objcache_get(namei_oc, M_WAITOK); 2704 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2705 if (error == 0) { 2706 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2707 if (error == 0) { 2708 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2709 error = kern_symlink(&nd, path, mode); 2710 } 2711 nlookup_done(&nd); 2712 } 2713 objcache_put(namei_oc, path); 2714 return (error); 2715 } 2716 2717 /* 2718 * symlinkat_args(char *path1, int fd, char *path2) 2719 * 2720 * Make a symbolic link. The path2 argument is relative to the directory 2721 * associated with fd. 2722 */ 2723 int 2724 sys_symlinkat(struct sysmsg *sysmsg, const struct symlinkat_args *uap) 2725 { 2726 struct thread *td = curthread; 2727 struct nlookupdata nd; 2728 struct file *fp; 2729 char *path1; 2730 int error; 2731 int mode; 2732 2733 path1 = objcache_get(namei_oc, M_WAITOK); 2734 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2735 if (error == 0) { 2736 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2737 UIO_USERSPACE, 0); 2738 if (error == 0) { 2739 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2740 error = kern_symlink(&nd, path1, mode); 2741 } 2742 nlookup_done_at(&nd, fp); 2743 } 2744 objcache_put(namei_oc, path1); 2745 return (error); 2746 } 2747 2748 /* 2749 * undelete_args(char *path) 2750 * 2751 * Delete a whiteout from the filesystem. 2752 */ 2753 int 2754 sys_undelete(struct sysmsg *sysmsg, const struct undelete_args *uap) 2755 { 2756 struct nlookupdata nd; 2757 int error; 2758 2759 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2760 bwillinode(1); 2761 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2762 if (error == 0) 2763 error = nlookup(&nd); 2764 if (error == 0 && nd.nl_dvp == NULL) 2765 error = EINVAL; 2766 if (error == 0) 2767 error = ncp_writechk(&nd.nl_nch); 2768 if (error == 0) { 2769 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2770 NAMEI_DELETE); 2771 } 2772 nlookup_done(&nd); 2773 return (error); 2774 } 2775 2776 int 2777 kern_unlink(struct nlookupdata *nd) 2778 { 2779 int error; 2780 2781 bwillinode(1); 2782 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2783 if ((error = nlookup(nd)) != 0) 2784 return (error); 2785 if (nd->nl_dvp == NULL) 2786 return EINVAL; 2787 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2788 return (error); 2789 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2790 return (error); 2791 } 2792 2793 /* 2794 * unlink_args(char *path) 2795 * 2796 * Delete a name from the filesystem. 2797 */ 2798 int 2799 sys_unlink(struct sysmsg *sysmsg, const struct unlink_args *uap) 2800 { 2801 struct nlookupdata nd; 2802 int error; 2803 2804 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2805 if (error == 0) 2806 error = kern_unlink(&nd); 2807 nlookup_done(&nd); 2808 return (error); 2809 } 2810 2811 2812 /* 2813 * unlinkat_args(int fd, char *path, int flags) 2814 * 2815 * Delete the file or directory entry pointed to by fd/path. 2816 */ 2817 int 2818 sys_unlinkat(struct sysmsg *sysmsg, const struct unlinkat_args *uap) 2819 { 2820 struct nlookupdata nd; 2821 struct file *fp; 2822 int error; 2823 2824 if (uap->flags & ~AT_REMOVEDIR) 2825 return (EINVAL); 2826 2827 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2828 if (error == 0) { 2829 if (uap->flags & AT_REMOVEDIR) 2830 error = kern_rmdir(&nd); 2831 else 2832 error = kern_unlink(&nd); 2833 } 2834 nlookup_done_at(&nd, fp); 2835 return (error); 2836 } 2837 2838 int 2839 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2840 { 2841 struct thread *td = curthread; 2842 struct file *fp; 2843 int error; 2844 2845 fp = holdfp(td, fd, -1); 2846 if (fp == NULL) 2847 return (EBADF); 2848 2849 error = fo_seek(fp, offset, whence, res); 2850 dropfp(td, fd, fp); 2851 2852 return (error); 2853 } 2854 2855 /* 2856 * lseek_args(int fd, int pad, off_t offset, int whence) 2857 * 2858 * Reposition read/write file offset. 2859 */ 2860 int 2861 sys_lseek(struct sysmsg *sysmsg, const struct lseek_args *uap) 2862 { 2863 int error; 2864 2865 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2866 &sysmsg->sysmsg_offset); 2867 2868 return (error); 2869 } 2870 2871 /* 2872 * Check if current process can access given file. amode is a bitmask of *_OK 2873 * access bits. flags is a bitmask of AT_* flags. 2874 */ 2875 int 2876 kern_access(struct nlookupdata *nd, int amode, int flags) 2877 { 2878 struct vnode *vp; 2879 int error, mode; 2880 2881 if (flags & ~AT_EACCESS) 2882 return (EINVAL); 2883 nd->nl_flags |= NLC_SHAREDLOCK; 2884 if ((error = nlookup(nd)) != 0) 2885 return (error); 2886 if ((amode & W_OK) && (error = ncp_writechk(&nd->nl_nch)) != 0) 2887 return (error); 2888 retry: 2889 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2890 if (error) 2891 return (error); 2892 2893 /* Flags == 0 means only check for existence. */ 2894 if (amode) { 2895 mode = 0; 2896 if (amode & R_OK) 2897 mode |= VREAD; 2898 if (amode & W_OK) 2899 mode |= VWRITE; 2900 if (amode & X_OK) 2901 mode |= VEXEC; 2902 if ((mode & VWRITE) == 0 || 2903 (error = vn_writechk(vp)) == 0) { 2904 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2905 } 2906 2907 /* 2908 * If the file handle is stale we have to re-resolve the 2909 * entry with the ncp held exclusively. This is a hack 2910 * at the moment. 2911 */ 2912 if (error == ESTALE) { 2913 u_int dummy_gen; 2914 2915 vput(vp); 2916 cache_unlock(&nd->nl_nch); 2917 cache_lock(&nd->nl_nch); 2918 dummy_gen = nd->nl_nch.ncp->nc_generation; 2919 cache_setunresolved(&nd->nl_nch); 2920 error = cache_resolve(&nd->nl_nch, &dummy_gen, 2921 nd->nl_cred); 2922 if (error == 0) { 2923 vp = NULL; 2924 goto retry; 2925 } 2926 return(error); 2927 } 2928 } 2929 vput(vp); 2930 return (error); 2931 } 2932 2933 /* 2934 * access_args(char *path, int flags) 2935 * 2936 * Check access permissions. 2937 */ 2938 int 2939 sys_access(struct sysmsg *sysmsg, const struct access_args *uap) 2940 { 2941 struct nlookupdata nd; 2942 int error; 2943 2944 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2945 if (error == 0) 2946 error = kern_access(&nd, uap->flags, 0); 2947 nlookup_done(&nd); 2948 return (error); 2949 } 2950 2951 2952 /* 2953 * eaccess_args(char *path, int flags) 2954 * 2955 * Check access permissions. 2956 */ 2957 int 2958 sys_eaccess(struct sysmsg *sysmsg, const struct eaccess_args *uap) 2959 { 2960 struct nlookupdata nd; 2961 int error; 2962 2963 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2964 if (error == 0) 2965 error = kern_access(&nd, uap->flags, AT_EACCESS); 2966 nlookup_done(&nd); 2967 return (error); 2968 } 2969 2970 2971 /* 2972 * faccessat_args(int fd, char *path, int amode, int flags) 2973 * 2974 * Check access permissions. 2975 */ 2976 int 2977 sys_faccessat(struct sysmsg *sysmsg, const struct faccessat_args *uap) 2978 { 2979 struct nlookupdata nd; 2980 struct file *fp; 2981 int error; 2982 2983 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2984 NLC_FOLLOW); 2985 if (error == 0) 2986 error = kern_access(&nd, uap->amode, uap->flags); 2987 nlookup_done_at(&nd, fp); 2988 return (error); 2989 } 2990 2991 int 2992 kern_stat(struct nlookupdata *nd, struct stat *st) 2993 { 2994 int error; 2995 struct vnode *vp; 2996 2997 nd->nl_flags |= NLC_SHAREDLOCK; 2998 if ((error = nlookup(nd)) != 0) 2999 return (error); 3000 again: 3001 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 3002 return (ENOENT); 3003 3004 #if 1 3005 error = cache_vref(&nd->nl_nch, NULL, &vp); 3006 #else 3007 error = vget(vp, LK_SHARED); 3008 #endif 3009 if (error) 3010 return (error); 3011 error = vn_stat(vp, st, nd->nl_cred); 3012 3013 /* 3014 * If the file handle is stale we have to re-resolve the 3015 * entry with the ncp held exclusively. This is a hack 3016 * at the moment. 3017 */ 3018 if (error == ESTALE) { 3019 u_int dummy_gen; 3020 #if 1 3021 vrele(vp); 3022 #else 3023 vput(vp); 3024 #endif 3025 cache_unlock(&nd->nl_nch); 3026 cache_lock(&nd->nl_nch); 3027 dummy_gen = nd->nl_nch.ncp->nc_generation; 3028 cache_setunresolved(&nd->nl_nch); 3029 error = cache_resolve(&nd->nl_nch, &dummy_gen, nd->nl_cred); 3030 if (error == 0) 3031 goto again; 3032 } else { 3033 #if 1 3034 vrele(vp); 3035 #else 3036 vput(vp); 3037 #endif 3038 } 3039 return (error); 3040 } 3041 3042 /* 3043 * stat_args(char *path, struct stat *ub) 3044 * 3045 * Get file status; this version follows links. 3046 */ 3047 int 3048 sys_stat(struct sysmsg *sysmsg, const struct stat_args *uap) 3049 { 3050 struct nlookupdata nd; 3051 struct stat st; 3052 int error; 3053 3054 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3055 if (error == 0) { 3056 error = kern_stat(&nd, &st); 3057 if (error == 0) 3058 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3059 } 3060 nlookup_done(&nd); 3061 return (error); 3062 } 3063 3064 /* 3065 * lstat_args(char *path, struct stat *ub) 3066 * 3067 * Get file status; this version does not follow links. 3068 */ 3069 int 3070 sys_lstat(struct sysmsg *sysmsg, const struct lstat_args *uap) 3071 { 3072 struct nlookupdata nd; 3073 struct stat st; 3074 int error; 3075 3076 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3077 if (error == 0) { 3078 error = kern_stat(&nd, &st); 3079 if (error == 0) 3080 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3081 } 3082 nlookup_done(&nd); 3083 return (error); 3084 } 3085 3086 /* 3087 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 3088 * 3089 * Get status of file pointed to by fd/path. 3090 */ 3091 int 3092 sys_fstatat(struct sysmsg *sysmsg, const struct fstatat_args *uap) 3093 { 3094 struct nlookupdata nd; 3095 struct stat st; 3096 int error; 3097 int flags; 3098 struct file *fp; 3099 3100 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3101 return (EINVAL); 3102 3103 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3104 3105 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3106 UIO_USERSPACE, flags); 3107 if (error == 0) { 3108 error = kern_stat(&nd, &st); 3109 if (error == 0) 3110 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 3111 } 3112 nlookup_done_at(&nd, fp); 3113 return (error); 3114 } 3115 3116 static int 3117 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 3118 { 3119 struct nlookupdata nd; 3120 struct vnode *vp; 3121 int error; 3122 3123 vp = NULL; 3124 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 3125 if (error == 0) 3126 error = nlookup(&nd); 3127 if (error == 0) 3128 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3129 nlookup_done(&nd); 3130 if (error == 0) { 3131 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3132 vput(vp); 3133 } 3134 return (error); 3135 } 3136 3137 /* 3138 * pathconf_Args(char *path, int name) 3139 * 3140 * Get configurable pathname variables. 3141 */ 3142 int 3143 sys_pathconf(struct sysmsg *sysmsg, const struct pathconf_args *uap) 3144 { 3145 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3146 &sysmsg->sysmsg_reg)); 3147 } 3148 3149 /* 3150 * lpathconf_Args(char *path, int name) 3151 * 3152 * Get configurable pathname variables, but don't follow symlinks. 3153 */ 3154 int 3155 sys_lpathconf(struct sysmsg *sysmsg, const struct lpathconf_args *uap) 3156 { 3157 return (kern_pathconf(uap->path, uap->name, 0, &sysmsg->sysmsg_reg)); 3158 } 3159 3160 /* 3161 * XXX: daver 3162 * kern_readlink isn't properly split yet. There is a copyin burried 3163 * in VOP_READLINK(). 3164 */ 3165 int 3166 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3167 { 3168 struct thread *td = curthread; 3169 struct vnode *vp; 3170 struct iovec aiov; 3171 struct uio auio; 3172 int error; 3173 3174 nd->nl_flags |= NLC_SHAREDLOCK; 3175 if ((error = nlookup(nd)) != 0) 3176 return (error); 3177 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3178 if (error) 3179 return (error); 3180 if (vp->v_type != VLNK) { 3181 error = EINVAL; 3182 } else { 3183 aiov.iov_base = buf; 3184 aiov.iov_len = count; 3185 auio.uio_iov = &aiov; 3186 auio.uio_iovcnt = 1; 3187 auio.uio_offset = 0; 3188 auio.uio_rw = UIO_READ; 3189 auio.uio_segflg = UIO_USERSPACE; 3190 auio.uio_td = td; 3191 auio.uio_resid = count; 3192 error = VOP_READLINK(vp, &auio, td->td_ucred); 3193 } 3194 vput(vp); 3195 *res = count - auio.uio_resid; 3196 return (error); 3197 } 3198 3199 /* 3200 * readlink_args(char *path, char *buf, int count) 3201 * 3202 * Return target name of a symbolic link. 3203 */ 3204 int 3205 sys_readlink(struct sysmsg *sysmsg, const struct readlink_args *uap) 3206 { 3207 struct nlookupdata nd; 3208 int error; 3209 3210 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3211 if (error == 0) { 3212 error = kern_readlink(&nd, uap->buf, uap->count, 3213 &sysmsg->sysmsg_result); 3214 } 3215 nlookup_done(&nd); 3216 return (error); 3217 } 3218 3219 /* 3220 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3221 * 3222 * Return target name of a symbolic link. The path is relative to the 3223 * directory associated with fd. 3224 */ 3225 int 3226 sys_readlinkat(struct sysmsg *sysmsg, const struct readlinkat_args *uap) 3227 { 3228 struct nlookupdata nd; 3229 struct file *fp; 3230 int error; 3231 3232 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3233 if (error == 0) { 3234 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3235 &sysmsg->sysmsg_result); 3236 } 3237 nlookup_done_at(&nd, fp); 3238 return (error); 3239 } 3240 3241 static int 3242 setfflags(struct vnode *vp, u_long flags) 3243 { 3244 struct thread *td = curthread; 3245 int error; 3246 struct vattr vattr; 3247 3248 /* 3249 * Prevent non-root users from setting flags on devices. When 3250 * a device is reused, users can retain ownership of the device 3251 * if they are allowed to set flags and programs assume that 3252 * chown can't fail when done as root. 3253 */ 3254 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3255 ((error = 3256 caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHFLAGS_DEV)) != 0)) 3257 { 3258 return (error); 3259 } 3260 3261 /* 3262 * note: vget is required for any operation that might mod the vnode 3263 * so VINACTIVE is properly cleared. 3264 */ 3265 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3266 VATTR_NULL(&vattr); 3267 vattr.va_flags = flags; 3268 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3269 vput(vp); 3270 } 3271 return (error); 3272 } 3273 3274 /* 3275 * chflags(const char *path, u_long flags) 3276 * 3277 * Change flags of a file given a path name. 3278 */ 3279 int 3280 sys_chflags(struct sysmsg *sysmsg, const struct chflags_args *uap) 3281 { 3282 struct nlookupdata nd; 3283 struct vnode *vp; 3284 int error; 3285 3286 vp = NULL; 3287 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3288 if (error == 0) 3289 error = nlookup(&nd); 3290 if (error == 0) 3291 error = ncp_writechk(&nd.nl_nch); 3292 if (error == 0) 3293 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3294 nlookup_done(&nd); 3295 if (error == 0) { 3296 error = setfflags(vp, uap->flags); 3297 vrele(vp); 3298 } 3299 return (error); 3300 } 3301 3302 /* 3303 * lchflags(const char *path, u_long flags) 3304 * 3305 * Change flags of a file given a path name, but don't follow symlinks. 3306 */ 3307 int 3308 sys_lchflags(struct sysmsg *sysmsg, const struct lchflags_args *uap) 3309 { 3310 struct nlookupdata nd; 3311 struct vnode *vp; 3312 int error; 3313 3314 vp = NULL; 3315 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3316 if (error == 0) 3317 error = nlookup(&nd); 3318 if (error == 0) 3319 error = ncp_writechk(&nd.nl_nch); 3320 if (error == 0) 3321 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3322 nlookup_done(&nd); 3323 if (error == 0) { 3324 error = setfflags(vp, uap->flags); 3325 vrele(vp); 3326 } 3327 return (error); 3328 } 3329 3330 /* 3331 * fchflags_args(int fd, u_flags flags) 3332 * 3333 * Change flags of a file given a file descriptor. 3334 */ 3335 int 3336 sys_fchflags(struct sysmsg *sysmsg, const struct fchflags_args *uap) 3337 { 3338 struct thread *td = curthread; 3339 struct file *fp; 3340 int error; 3341 3342 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3343 return (error); 3344 if (fp->f_nchandle.ncp) 3345 error = ncp_writechk(&fp->f_nchandle); 3346 if (error == 0) 3347 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3348 fdrop(fp); 3349 return (error); 3350 } 3351 3352 /* 3353 * chflagsat_args(int fd, const char *path, u_long flags, int atflags) 3354 * change flags given a pathname relative to a filedescriptor 3355 */ 3356 int 3357 sys_chflagsat(struct sysmsg *sysmsg, const struct chflagsat_args *uap) 3358 { 3359 struct nlookupdata nd; 3360 struct vnode *vp; 3361 struct file *fp; 3362 int error; 3363 int lookupflags; 3364 3365 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3366 return (EINVAL); 3367 3368 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3369 3370 vp = NULL; 3371 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3372 if (error == 0) 3373 error = nlookup(&nd); 3374 if (error == 0) 3375 error = ncp_writechk(&nd.nl_nch); 3376 if (error == 0) 3377 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3378 nlookup_done_at(&nd, fp); 3379 if (error == 0) { 3380 error = setfflags(vp, uap->flags); 3381 vrele(vp); 3382 } 3383 return (error); 3384 } 3385 3386 3387 static int 3388 setfmode(struct vnode *vp, int mode) 3389 { 3390 struct thread *td = curthread; 3391 int error; 3392 struct vattr vattr; 3393 3394 /* 3395 * note: vget is required for any operation that might mod the vnode 3396 * so VINACTIVE is properly cleared. 3397 */ 3398 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3399 VATTR_NULL(&vattr); 3400 vattr.va_mode = mode & ALLPERMS; 3401 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3402 cache_inval_wxok(vp); 3403 vput(vp); 3404 } 3405 return error; 3406 } 3407 3408 int 3409 kern_chmod(struct nlookupdata *nd, int mode) 3410 { 3411 struct vnode *vp; 3412 int error; 3413 3414 if ((error = nlookup(nd)) != 0) 3415 return (error); 3416 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3417 return (error); 3418 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3419 error = setfmode(vp, mode); 3420 vrele(vp); 3421 return (error); 3422 } 3423 3424 /* 3425 * chmod_args(char *path, int mode) 3426 * 3427 * Change mode of a file given path name. 3428 */ 3429 int 3430 sys_chmod(struct sysmsg *sysmsg, const struct chmod_args *uap) 3431 { 3432 struct nlookupdata nd; 3433 int error; 3434 3435 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3436 if (error == 0) 3437 error = kern_chmod(&nd, uap->mode); 3438 nlookup_done(&nd); 3439 return (error); 3440 } 3441 3442 /* 3443 * lchmod_args(char *path, int mode) 3444 * 3445 * Change mode of a file given path name (don't follow links.) 3446 */ 3447 int 3448 sys_lchmod(struct sysmsg *sysmsg, const struct lchmod_args *uap) 3449 { 3450 struct nlookupdata nd; 3451 int error; 3452 3453 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3454 if (error == 0) 3455 error = kern_chmod(&nd, uap->mode); 3456 nlookup_done(&nd); 3457 return (error); 3458 } 3459 3460 /* 3461 * fchmod_args(int fd, int mode) 3462 * 3463 * Change mode of a file given a file descriptor. 3464 */ 3465 int 3466 sys_fchmod(struct sysmsg *sysmsg, const struct fchmod_args *uap) 3467 { 3468 struct thread *td = curthread; 3469 struct file *fp; 3470 int error; 3471 3472 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3473 return (error); 3474 if (fp->f_nchandle.ncp) 3475 error = ncp_writechk(&fp->f_nchandle); 3476 if (error == 0) 3477 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3478 fdrop(fp); 3479 return (error); 3480 } 3481 3482 /* 3483 * fchmodat_args(char *path, int mode) 3484 * 3485 * Change mode of a file pointed to by fd/path. 3486 */ 3487 int 3488 sys_fchmodat(struct sysmsg *sysmsg, const struct fchmodat_args *uap) 3489 { 3490 struct nlookupdata nd; 3491 struct file *fp; 3492 int error; 3493 int flags; 3494 3495 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3496 return (EINVAL); 3497 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3498 3499 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3500 UIO_USERSPACE, flags); 3501 if (error == 0) 3502 error = kern_chmod(&nd, uap->mode); 3503 nlookup_done_at(&nd, fp); 3504 return (error); 3505 } 3506 3507 static int 3508 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3509 { 3510 struct thread *td = curthread; 3511 int error; 3512 struct vattr vattr; 3513 uid_t o_uid; 3514 gid_t o_gid; 3515 uint64_t size; 3516 3517 /* 3518 * note: vget is required for any operation that might mod the vnode 3519 * so VINACTIVE is properly cleared. 3520 */ 3521 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3522 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3523 return error; 3524 o_uid = vattr.va_uid; 3525 o_gid = vattr.va_gid; 3526 size = vattr.va_size; 3527 3528 VATTR_NULL(&vattr); 3529 vattr.va_uid = uid; 3530 vattr.va_gid = gid; 3531 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3532 vput(vp); 3533 } 3534 3535 if (error == 0) { 3536 if (uid == -1) 3537 uid = o_uid; 3538 if (gid == -1) 3539 gid = o_gid; 3540 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3541 VFS_ACCOUNT(mp, uid, gid, size); 3542 } 3543 3544 return error; 3545 } 3546 3547 int 3548 kern_chown(struct nlookupdata *nd, int uid, int gid) 3549 { 3550 struct vnode *vp; 3551 int error; 3552 3553 if ((error = nlookup(nd)) != 0) 3554 return (error); 3555 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3556 return (error); 3557 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3558 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3559 vrele(vp); 3560 return (error); 3561 } 3562 3563 /* 3564 * chown(char *path, int uid, int gid) 3565 * 3566 * Set ownership given a path name. 3567 */ 3568 int 3569 sys_chown(struct sysmsg *sysmsg, const struct chown_args *uap) 3570 { 3571 struct nlookupdata nd; 3572 int error; 3573 3574 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3575 if (error == 0) 3576 error = kern_chown(&nd, uap->uid, uap->gid); 3577 nlookup_done(&nd); 3578 return (error); 3579 } 3580 3581 /* 3582 * lchown_args(char *path, int uid, int gid) 3583 * 3584 * Set ownership given a path name, do not cross symlinks. 3585 */ 3586 int 3587 sys_lchown(struct sysmsg *sysmsg, const struct lchown_args *uap) 3588 { 3589 struct nlookupdata nd; 3590 int error; 3591 3592 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3593 if (error == 0) 3594 error = kern_chown(&nd, uap->uid, uap->gid); 3595 nlookup_done(&nd); 3596 return (error); 3597 } 3598 3599 /* 3600 * fchown_args(int fd, int uid, int gid) 3601 * 3602 * Set ownership given a file descriptor. 3603 */ 3604 int 3605 sys_fchown(struct sysmsg *sysmsg, const struct fchown_args *uap) 3606 { 3607 struct thread *td = curthread; 3608 struct proc *p = td->td_proc; 3609 struct file *fp; 3610 int error; 3611 3612 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3613 return (error); 3614 if (fp->f_nchandle.ncp) 3615 error = ncp_writechk(&fp->f_nchandle); 3616 if (error == 0) 3617 error = setfown(p->p_fd->fd_ncdir.mount, 3618 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3619 fdrop(fp); 3620 return (error); 3621 } 3622 3623 /* 3624 * fchownat(int fd, char *path, int uid, int gid, int flags) 3625 * 3626 * Set ownership of file pointed to by fd/path. 3627 */ 3628 int 3629 sys_fchownat(struct sysmsg *sysmsg, const struct fchownat_args *uap) 3630 { 3631 struct nlookupdata nd; 3632 struct file *fp; 3633 int error; 3634 int flags; 3635 3636 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3637 return (EINVAL); 3638 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3639 3640 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3641 UIO_USERSPACE, flags); 3642 if (error == 0) 3643 error = kern_chown(&nd, uap->uid, uap->gid); 3644 nlookup_done_at(&nd, fp); 3645 return (error); 3646 } 3647 3648 3649 static int 3650 getutimes(struct timeval *tvp, struct timespec *tsp) 3651 { 3652 struct timeval tv[2]; 3653 int error; 3654 3655 if (tvp == NULL) { 3656 microtime(&tv[0]); 3657 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3658 tsp[1] = tsp[0]; 3659 } else { 3660 if ((error = itimerfix(tvp)) != 0) 3661 return (error); 3662 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3663 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3664 } 3665 return 0; 3666 } 3667 3668 static int 3669 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3670 { 3671 struct timespec tsnow; 3672 int error; 3673 3674 *nullflag = 0; 3675 nanotime(&tsnow); 3676 if (ts == NULL) { 3677 newts[0] = tsnow; 3678 newts[1] = tsnow; 3679 *nullflag = 1; 3680 return (0); 3681 } 3682 3683 newts[0] = ts[0]; 3684 newts[1] = ts[1]; 3685 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) { 3686 newts[0].tv_sec = VNOVAL; 3687 newts[1].tv_sec = VNOVAL; 3688 return (0); 3689 } 3690 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3691 *nullflag = 1; 3692 3693 if (newts[0].tv_nsec == UTIME_OMIT) 3694 newts[0].tv_sec = VNOVAL; 3695 else if (newts[0].tv_nsec == UTIME_NOW) 3696 newts[0] = tsnow; 3697 else if ((error = itimespecfix(&newts[0])) != 0) 3698 return (error); 3699 3700 if (newts[1].tv_nsec == UTIME_OMIT) 3701 newts[1].tv_sec = VNOVAL; 3702 else if (newts[1].tv_nsec == UTIME_NOW) 3703 newts[1] = tsnow; 3704 else if ((error = itimespecfix(&newts[1])) != 0) 3705 return (error); 3706 3707 return (0); 3708 } 3709 3710 static int 3711 setutimes(struct vnode *vp, struct vattr *vattr, 3712 const struct timespec *ts, int nullflag) 3713 { 3714 struct thread *td = curthread; 3715 int error; 3716 3717 VATTR_NULL(vattr); 3718 vattr->va_atime = ts[0]; 3719 vattr->va_mtime = ts[1]; 3720 if (nullflag) 3721 vattr->va_vaflags |= VA_UTIMES_NULL; 3722 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3723 3724 return error; 3725 } 3726 3727 int 3728 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3729 { 3730 struct timespec ts[2]; 3731 int error; 3732 3733 if (tptr) { 3734 if ((error = getutimes(tptr, ts)) != 0) 3735 return (error); 3736 } 3737 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3738 return (error); 3739 } 3740 3741 /* 3742 * utimes_args(char *path, struct timeval *tptr) 3743 * 3744 * Set the access and modification times of a file. 3745 */ 3746 int 3747 sys_utimes(struct sysmsg *sysmsg, const struct utimes_args *uap) 3748 { 3749 struct timeval tv[2]; 3750 struct nlookupdata nd; 3751 int error; 3752 3753 if (uap->tptr) { 3754 error = copyin(uap->tptr, tv, sizeof(tv)); 3755 if (error) 3756 return (error); 3757 } 3758 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3759 if (error == 0) 3760 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3761 nlookup_done(&nd); 3762 return (error); 3763 } 3764 3765 /* 3766 * lutimes_args(char *path, struct timeval *tptr) 3767 * 3768 * Set the access and modification times of a file. 3769 */ 3770 int 3771 sys_lutimes(struct sysmsg *sysmsg, const struct lutimes_args *uap) 3772 { 3773 struct timeval tv[2]; 3774 struct nlookupdata nd; 3775 int error; 3776 3777 if (uap->tptr) { 3778 error = copyin(uap->tptr, tv, sizeof(tv)); 3779 if (error) 3780 return (error); 3781 } 3782 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3783 if (error == 0) 3784 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3785 nlookup_done(&nd); 3786 return (error); 3787 } 3788 3789 /* 3790 * Set utimes on a file descriptor. The creds used to open the 3791 * file are used to determine whether the operation is allowed 3792 * or not. 3793 */ 3794 int 3795 kern_futimens(int fd, struct timespec *ts) 3796 { 3797 struct thread *td = curthread; 3798 struct timespec newts[2]; 3799 struct file *fp; 3800 struct vnode *vp; 3801 struct vattr vattr; 3802 struct vattr_lite lva; 3803 int nullflag; 3804 int error; 3805 3806 error = getutimens(ts, newts, &nullflag); 3807 if (error) 3808 return (error); 3809 if ((error = holdvnode(td, fd, &fp)) != 0) 3810 return (error); 3811 if (fp->f_nchandle.ncp) 3812 error = ncp_writechk(&fp->f_nchandle); 3813 if (error == 0) { 3814 vp = fp->f_data; 3815 error = vget(vp, LK_EXCLUSIVE); 3816 if (error == 0) { 3817 error = VOP_GETATTR_FP(vp, &vattr, fp); 3818 if (error == 0) { 3819 lva.va_type = vattr.va_type; 3820 lva.va_nlink = vattr.va_nlink; 3821 lva.va_mode = vattr.va_mode; 3822 lva.va_uid = vattr.va_uid; 3823 lva.va_gid = vattr.va_gid; 3824 lva.va_size = vattr.va_size; 3825 lva.va_flags = vattr.va_flags; 3826 3827 error = naccess_lva(&lva, NLC_OWN | NLC_WRITE, 3828 fp->f_cred); 3829 } 3830 if (error == 0) { 3831 error = setutimes(vp, &vattr, newts, nullflag); 3832 } 3833 vput(vp); 3834 } 3835 } 3836 fdrop(fp); 3837 return (error); 3838 } 3839 3840 /* 3841 * futimens_args(int fd, struct timespec *ts) 3842 * 3843 * Set the access and modification times of a file. 3844 */ 3845 int 3846 sys_futimens(struct sysmsg *sysmsg, const struct futimens_args *uap) 3847 { 3848 struct timespec ts[2]; 3849 int error; 3850 3851 if (uap->ts) { 3852 error = copyin(uap->ts, ts, sizeof(ts)); 3853 if (error) 3854 return (error); 3855 } 3856 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3857 return (error); 3858 } 3859 3860 int 3861 kern_futimes(int fd, struct timeval *tptr) 3862 { 3863 struct timespec ts[2]; 3864 int error; 3865 3866 if (tptr) { 3867 if ((error = getutimes(tptr, ts)) != 0) 3868 return (error); 3869 } 3870 error = kern_futimens(fd, tptr ? ts : NULL); 3871 return (error); 3872 } 3873 3874 /* 3875 * futimes_args(int fd, struct timeval *tptr) 3876 * 3877 * Set the access and modification times of a file. 3878 */ 3879 int 3880 sys_futimes(struct sysmsg *sysmsg, const struct futimes_args *uap) 3881 { 3882 struct timeval tv[2]; 3883 int error; 3884 3885 if (uap->tptr) { 3886 error = copyin(uap->tptr, tv, sizeof(tv)); 3887 if (error) 3888 return (error); 3889 } 3890 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3891 return (error); 3892 } 3893 3894 /* 3895 * futimesat_args(int fd, const char *path, struct timeval *tptr) 3896 * 3897 * Set the access and modification times of a file. 3898 */ 3899 int 3900 sys_futimesat(struct sysmsg *sysmsg, const struct futimesat_args *uap) 3901 { 3902 struct timespec ts[2]; 3903 struct nlookupdata nd; 3904 struct file *fp; 3905 int error; 3906 3907 if (uap->tptr) { 3908 struct timeval tv[2]; 3909 3910 if ((error = copyin(uap->tptr, tv, sizeof(tv))) != 0) 3911 return error; 3912 if ((error = getutimes(tv, ts)) != 0) 3913 return error; 3914 } 3915 3916 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3917 UIO_USERSPACE, 0); 3918 if (error == 0) 3919 error = kern_utimensat(&nd, uap->tptr ? ts : NULL, 0); 3920 nlookup_done_at(&nd, fp); 3921 3922 return (error); 3923 } 3924 3925 int 3926 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3927 { 3928 struct timespec newts[2]; 3929 struct vnode *vp; 3930 struct vattr vattr; 3931 int nullflag; 3932 int error; 3933 3934 if (flags & ~AT_SYMLINK_NOFOLLOW) 3935 return (EINVAL); 3936 3937 error = getutimens(ts, newts, &nullflag); 3938 if (error) 3939 return (error); 3940 3941 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3942 if ((error = nlookup(nd)) != 0) 3943 return (error); 3944 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3945 return (error); 3946 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3947 return (error); 3948 if ((error = vn_writechk(vp)) == 0) { 3949 error = vget(vp, LK_EXCLUSIVE); 3950 if (error == 0) { 3951 error = setutimes(vp, &vattr, newts, nullflag); 3952 vput(vp); 3953 } 3954 } 3955 vrele(vp); 3956 return (error); 3957 } 3958 3959 /* 3960 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3961 * 3962 * Set file access and modification times of a file. 3963 */ 3964 int 3965 sys_utimensat(struct sysmsg *sysmsg, const struct utimensat_args *uap) 3966 { 3967 struct timespec ts[2]; 3968 struct nlookupdata nd; 3969 struct file *fp; 3970 int error; 3971 int flags; 3972 3973 if (uap->ts) { 3974 error = copyin(uap->ts, ts, sizeof(ts)); 3975 if (error) 3976 return (error); 3977 } 3978 3979 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3980 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3981 UIO_USERSPACE, flags); 3982 if (error == 0) 3983 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3984 nlookup_done_at(&nd, fp); 3985 return (error); 3986 } 3987 3988 int 3989 kern_truncate(struct nlookupdata *nd, off_t length) 3990 { 3991 struct vnode *vp; 3992 struct vattr vattr; 3993 int error; 3994 uid_t uid = 0; 3995 gid_t gid = 0; 3996 uint64_t old_size = 0; 3997 3998 if (length < 0) 3999 return(EINVAL); 4000 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 4001 if ((error = nlookup(nd)) != 0) 4002 return (error); 4003 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4004 return (error); 4005 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 4006 return (error); 4007 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 4008 if (error) { 4009 vrele(vp); 4010 return (error); 4011 } 4012 if (vp->v_type == VDIR) { 4013 error = EISDIR; 4014 goto done; 4015 } 4016 if (vfs_quota_enabled) { 4017 error = VOP_GETATTR(vp, &vattr); 4018 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 4019 uid = vattr.va_uid; 4020 gid = vattr.va_gid; 4021 old_size = vattr.va_size; 4022 } 4023 4024 if ((error = vn_writechk(vp)) == 0) { 4025 VATTR_NULL(&vattr); 4026 vattr.va_size = length; 4027 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 4028 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 4029 } 4030 done: 4031 vput(vp); 4032 return (error); 4033 } 4034 4035 /* 4036 * truncate(char *path, int pad, off_t length) 4037 * 4038 * Truncate a file given its path name. 4039 */ 4040 int 4041 sys_truncate(struct sysmsg *sysmsg, const struct truncate_args *uap) 4042 { 4043 struct nlookupdata nd; 4044 int error; 4045 4046 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4047 if (error == 0) 4048 error = kern_truncate(&nd, uap->length); 4049 nlookup_done(&nd); 4050 return error; 4051 } 4052 4053 int 4054 kern_ftruncate(int fd, off_t length) 4055 { 4056 struct thread *td = curthread; 4057 struct vattr vattr; 4058 struct vnode *vp; 4059 struct file *fp; 4060 int error; 4061 uid_t uid = 0; 4062 gid_t gid = 0; 4063 uint64_t old_size = 0; 4064 struct mount *mp; 4065 4066 if (length < 0) 4067 return(EINVAL); 4068 if ((error = holdvnode(td, fd, &fp)) != 0) 4069 return (error); 4070 if (fp->f_nchandle.ncp) { 4071 error = ncp_writechk(&fp->f_nchandle); 4072 if (error) 4073 goto done; 4074 } 4075 if ((fp->f_flag & FWRITE) == 0) { 4076 error = EINVAL; 4077 goto done; 4078 } 4079 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 4080 error = EINVAL; 4081 goto done; 4082 } 4083 vp = (struct vnode *)fp->f_data; 4084 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4085 if (vp->v_type == VDIR) { 4086 error = EISDIR; 4087 vn_unlock(vp); 4088 goto done; 4089 } 4090 4091 if (vfs_quota_enabled) { 4092 error = VOP_GETATTR_FP(vp, &vattr, fp); 4093 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 4094 uid = vattr.va_uid; 4095 gid = vattr.va_gid; 4096 old_size = vattr.va_size; 4097 } 4098 4099 if ((error = vn_writechk(vp)) == 0) { 4100 VATTR_NULL(&vattr); 4101 vattr.va_size = length; 4102 error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp); 4103 mp = vq_vptomp(vp); 4104 VFS_ACCOUNT(mp, uid, gid, length - old_size); 4105 } 4106 vn_unlock(vp); 4107 done: 4108 fdrop(fp); 4109 return (error); 4110 } 4111 4112 /* 4113 * ftruncate_args(int fd, int pad, off_t length) 4114 * 4115 * Truncate a file given a file descriptor. 4116 */ 4117 int 4118 sys_ftruncate(struct sysmsg *sysmsg, const struct ftruncate_args *uap) 4119 { 4120 int error; 4121 4122 error = kern_ftruncate(uap->fd, uap->length); 4123 4124 return (error); 4125 } 4126 4127 int 4128 kern_fsync(int fd, bool fullsync) 4129 { 4130 struct thread *td = curthread; 4131 struct vnode *vp; 4132 struct file *fp; 4133 vm_object_t obj; 4134 int error; 4135 4136 if ((error = holdvnode(td, fd, &fp)) != 0) 4137 return (error); 4138 vp = (struct vnode *)fp->f_data; 4139 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4140 if ((obj = vp->v_object) != NULL) { 4141 if (vp->v_mount == NULL || 4142 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 4143 vm_object_page_clean(obj, 0, 0, 0); 4144 } 4145 } 4146 error = fullsync ? 4147 VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp) : 4148 VOP_FDATASYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp); 4149 if (error == 0 && vp->v_mount) 4150 error = buf_fsync(vp); 4151 vn_unlock(vp); 4152 fdrop(fp); 4153 4154 return (error); 4155 } 4156 4157 /* 4158 * fsync(int fd) 4159 * 4160 * Sync an open file. 4161 */ 4162 int 4163 sys_fsync(struct sysmsg *sysmsg, const struct fsync_args *uap) 4164 { 4165 return (kern_fsync(uap->fd, true)); 4166 } 4167 4168 /* 4169 * fdatasync(int fd) 4170 * 4171 * Data-sync an open file. 4172 */ 4173 int 4174 sys_fdatasync(struct sysmsg *sysmsg, const struct fdatasync_args *uap) 4175 { 4176 return (kern_fsync(uap->fd, false)); 4177 } 4178 4179 /* 4180 * rename op. 4181 * 4182 * NOTE: error == 0 and nl_dvp is NULL indicates a mount point, operation 4183 * disallowed. e.g. /var/cache where /var/cache is a null-mount, for 4184 * example. 4185 */ 4186 int 4187 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 4188 { 4189 struct nchandle fnchd; 4190 struct nchandle tnchd; 4191 struct namecache *ncp; 4192 struct vnode *fdvp; 4193 struct vnode *tdvp; 4194 struct mount *mp; 4195 struct mount *userenlk; 4196 int error; 4197 u_int fncp_gen; 4198 u_int tncp_gen; 4199 4200 bwillinode(1); 4201 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4202 if ((error = nlookup(fromnd)) != 0) 4203 return (error); 4204 4205 /* 4206 * Attempt to rename a mount point (from or to) 4207 */ 4208 if (error == 0 && fromnd->nl_dvp == NULL) 4209 return (EINVAL); 4210 4211 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4212 return (ENOENT); 4213 fnchd.mount = fromnd->nl_nch.mount; 4214 cache_hold(&fnchd); 4215 4216 /* 4217 * unlock the source nch so we can lookup the target nch without 4218 * deadlocking. The target may or may not exist so we do not check 4219 * for a target vp like kern_mkdir() and other creation functions do. 4220 * 4221 * The source and target directories are ref'd and rechecked after 4222 * everything is relocked to determine if the source or target file 4223 * has been renamed. 4224 */ 4225 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4226 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4227 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4228 4229 if (fromnd->nl_nch.ncp->nc_vp && 4230 fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4231 userenlk = fnchd.mount; 4232 cache_unlock(&fromnd->nl_nch); 4233 lockmgr(&userenlk->mnt_renlock, LK_EXCLUSIVE); 4234 } else { 4235 userenlk = NULL; 4236 cache_unlock(&fromnd->nl_nch); 4237 } 4238 4239 /* 4240 * Lookup target 4241 */ 4242 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4243 if ((error = nlookup(tond)) != 0) { 4244 cache_drop(&fnchd); 4245 goto done; 4246 } 4247 tncp_gen = tond->nl_nch.ncp->nc_generation; 4248 4249 /* 4250 * Attempt to rename a mount point (from or to) 4251 */ 4252 if (error == 0 && tond->nl_dvp == NULL) { 4253 cache_drop(&fnchd); 4254 error = ENOENT; 4255 goto done; 4256 } 4257 4258 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4259 cache_drop(&fnchd); 4260 error = ENOENT; 4261 goto done; 4262 } 4263 tnchd.mount = tond->nl_nch.mount; 4264 cache_hold(&tnchd); 4265 4266 /* 4267 * If the source and target are the same there is nothing to do 4268 */ 4269 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4270 cache_drop(&fnchd); 4271 cache_drop(&tnchd); 4272 error = 0; 4273 goto done; 4274 } 4275 4276 /* 4277 * Mount points cannot be renamed or overwritten 4278 */ 4279 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4280 NCF_ISMOUNTPT 4281 ) { 4282 cache_drop(&fnchd); 4283 cache_drop(&tnchd); 4284 error = EINVAL; 4285 goto done; 4286 } 4287 4288 /* 4289 * Lock all four namecache entries. tond is already locked. 4290 */ 4291 cache_lock4_tondlocked(&fnchd, &fromnd->nl_nch, 4292 &tnchd, &tond->nl_nch, 4293 fromnd->nl_cred, tond->nl_cred); 4294 fromnd->nl_flags |= NLC_NCPISLOCKED; 4295 4296 /* 4297 * If the namecache generation changed for either fromnd or tond, 4298 * we must retry. 4299 */ 4300 if (((fromnd->nl_nch.ncp->nc_generation - fncp_gen) & ~1) || 4301 ((tond->nl_nch.ncp->nc_generation - tncp_gen) & ~1)) 4302 { 4303 krateprintf(&krate_rename, 4304 "kern_rename: retry due to race on: " 4305 "\"%s\" -> \"%s\" (%d,%d)\n", 4306 fromnd->nl_nch.ncp->nc_name, 4307 tond->nl_nch.ncp->nc_name, 4308 fromnd->nl_nch.ncp->nc_generation - fncp_gen, 4309 tond->nl_nch.ncp->nc_generation - tncp_gen); 4310 error = EAGAIN; 4311 goto finish; 4312 } 4313 4314 /* 4315 * If either fromnd or tond are marked destroyed a ripout occured 4316 * out from under us and we must retry. 4317 */ 4318 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4319 fromnd->nl_nch.ncp->nc_vp == NULL || 4320 (tond->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED))) { 4321 krateprintf(&krate_rename, 4322 "kern_rename: retry due to ripout on: " 4323 "\"%s\" -> \"%s\"\n", 4324 fromnd->nl_nch.ncp->nc_name, 4325 tond->nl_nch.ncp->nc_name); 4326 error = EAGAIN; 4327 goto finish; 4328 } 4329 4330 /* 4331 * Make sure the parent directories linkages are the same. We have 4332 * already checked that fromnd and tond are not mount points so this 4333 * should not loop forever on a cross-mount. 4334 */ 4335 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4336 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4337 error = EAGAIN; 4338 goto finish; 4339 } 4340 4341 /* 4342 * Both the source and target must be within the same filesystem and 4343 * in the same filesystem as their parent directories within the 4344 * namecache topology. 4345 * 4346 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4347 */ 4348 mp = fnchd.mount; 4349 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4350 mp != tond->nl_nch.mount) { 4351 error = EXDEV; 4352 goto finish; 4353 } 4354 4355 /* 4356 * Make sure the mount point is writable 4357 */ 4358 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4359 goto finish; 4360 } 4361 4362 /* 4363 * If the target exists and either the source or target is a directory, 4364 * then both must be directories. 4365 * 4366 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4367 * have become NULL. 4368 */ 4369 if (tond->nl_nch.ncp->nc_vp) { 4370 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4371 error = ENOENT; 4372 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4373 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4374 error = ENOTDIR; 4375 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4376 error = EISDIR; 4377 } 4378 } 4379 4380 /* 4381 * You cannot rename a source into itself or a subdirectory of itself. 4382 * We check this by travsersing the target directory upwards looking 4383 * for a match against the source. 4384 * 4385 * Only required when renaming a directory, in which case userenlk is 4386 * non-NULL. 4387 */ 4388 if (__predict_false(userenlk && error == 0)) { 4389 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4390 if (fromnd->nl_nch.ncp == ncp) { 4391 error = EINVAL; 4392 break; 4393 } 4394 } 4395 } 4396 4397 /* 4398 * Even though the namespaces are different, they may still represent 4399 * hardlinks to the same file. The filesystem might have a hard time 4400 * with this so we issue a NREMOVE of the source instead of a NRENAME 4401 * when we detect the situation. 4402 */ 4403 if (error == 0) { 4404 fdvp = fromnd->nl_dvp; 4405 tdvp = tond->nl_dvp; 4406 if (fdvp == NULL || tdvp == NULL) { 4407 error = EPERM; 4408 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4409 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4410 fromnd->nl_cred); 4411 } else { 4412 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4413 fdvp, tdvp, tond->nl_cred); 4414 } 4415 } 4416 finish: 4417 cache_put(&tnchd); 4418 cache_put(&fnchd); 4419 done: 4420 if (userenlk) 4421 lockmgr(&userenlk->mnt_renlock, LK_RELEASE); 4422 return (error); 4423 } 4424 4425 /* 4426 * rename_args(char *from, char *to) 4427 * 4428 * Rename files. Source and destination must either both be directories, 4429 * or both not be directories. If target is a directory, it must be empty. 4430 */ 4431 int 4432 sys_rename(struct sysmsg *sysmsg, const struct rename_args *uap) 4433 { 4434 struct nlookupdata fromnd, tond; 4435 int error; 4436 4437 do { 4438 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4439 if (error == 0) { 4440 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4441 if (error == 0) 4442 error = kern_rename(&fromnd, &tond); 4443 nlookup_done(&tond); 4444 } 4445 nlookup_done(&fromnd); 4446 } while (error == EAGAIN); 4447 return (error); 4448 } 4449 4450 /* 4451 * renameat_args(int oldfd, char *old, int newfd, char *new) 4452 * 4453 * Rename files using paths relative to the directories associated with 4454 * oldfd and newfd. Source and destination must either both be directories, 4455 * or both not be directories. If target is a directory, it must be empty. 4456 */ 4457 int 4458 sys_renameat(struct sysmsg *sysmsg, const struct renameat_args *uap) 4459 { 4460 struct nlookupdata oldnd, newnd; 4461 struct file *oldfp, *newfp; 4462 int error; 4463 4464 do { 4465 error = nlookup_init_at(&oldnd, &oldfp, 4466 uap->oldfd, uap->old, 4467 UIO_USERSPACE, 0); 4468 if (error == 0) { 4469 error = nlookup_init_at(&newnd, &newfp, 4470 uap->newfd, uap->new, 4471 UIO_USERSPACE, 0); 4472 if (error == 0) 4473 error = kern_rename(&oldnd, &newnd); 4474 nlookup_done_at(&newnd, newfp); 4475 } 4476 nlookup_done_at(&oldnd, oldfp); 4477 } while (error == EAGAIN); 4478 return (error); 4479 } 4480 4481 int 4482 kern_mkdir(struct nlookupdata *nd, int mode) 4483 { 4484 struct thread *td = curthread; 4485 struct proc *p = td->td_proc; 4486 struct vnode *vp; 4487 struct vattr vattr; 4488 int error; 4489 4490 bwillinode(1); 4491 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4492 if ((error = nlookup(nd)) != 0) 4493 return (error); 4494 4495 if (nd->nl_nch.ncp->nc_vp) 4496 return (EEXIST); 4497 if (nd->nl_dvp == NULL) 4498 return (EINVAL); 4499 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4500 return (error); 4501 VATTR_NULL(&vattr); 4502 vattr.va_type = VDIR; 4503 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4504 4505 vp = NULL; 4506 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4507 if (error == 0) 4508 vput(vp); 4509 return (error); 4510 } 4511 4512 /* 4513 * mkdir_args(char *path, int mode) 4514 * 4515 * Make a directory file. 4516 */ 4517 int 4518 sys_mkdir(struct sysmsg *sysmsg, const struct mkdir_args *uap) 4519 { 4520 struct nlookupdata nd; 4521 int error; 4522 4523 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4524 if (error == 0) 4525 error = kern_mkdir(&nd, uap->mode); 4526 nlookup_done(&nd); 4527 return (error); 4528 } 4529 4530 /* 4531 * mkdirat_args(int fd, char *path, mode_t mode) 4532 * 4533 * Make a directory file. The path is relative to the directory associated 4534 * with fd. 4535 */ 4536 int 4537 sys_mkdirat(struct sysmsg *sysmsg, const struct mkdirat_args *uap) 4538 { 4539 struct nlookupdata nd; 4540 struct file *fp; 4541 int error; 4542 4543 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4544 if (error == 0) 4545 error = kern_mkdir(&nd, uap->mode); 4546 nlookup_done_at(&nd, fp); 4547 return (error); 4548 } 4549 4550 int 4551 kern_rmdir(struct nlookupdata *nd) 4552 { 4553 int error; 4554 4555 bwillinode(1); 4556 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4557 if ((error = nlookup(nd)) != 0) 4558 return (error); 4559 4560 /* 4561 * Do not allow directories representing mount points to be 4562 * deleted, even if empty. Check write perms on mount point 4563 * in case the vnode is aliased (aka nullfs). 4564 */ 4565 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4566 return (EBUSY); 4567 if (nd->nl_dvp == NULL) 4568 return (EINVAL); 4569 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4570 return (error); 4571 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4572 return (error); 4573 } 4574 4575 /* 4576 * rmdir_args(char *path) 4577 * 4578 * Remove a directory file. 4579 */ 4580 int 4581 sys_rmdir(struct sysmsg *sysmsg, const struct rmdir_args *uap) 4582 { 4583 struct nlookupdata nd; 4584 int error; 4585 4586 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4587 if (error == 0) 4588 error = kern_rmdir(&nd); 4589 nlookup_done(&nd); 4590 return (error); 4591 } 4592 4593 int 4594 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4595 enum uio_seg direction) 4596 { 4597 struct thread *td = curthread; 4598 struct vnode *vp; 4599 struct file *fp; 4600 struct uio auio; 4601 struct iovec aiov; 4602 off_t loff; 4603 int error, eofflag; 4604 4605 if ((error = holdvnode(td, fd, &fp)) != 0) 4606 return (error); 4607 if ((fp->f_flag & FREAD) == 0) { 4608 error = EBADF; 4609 goto done; 4610 } 4611 vp = (struct vnode *)fp->f_data; 4612 if (vp->v_type != VDIR) { 4613 error = EINVAL; 4614 goto done; 4615 } 4616 aiov.iov_base = buf; 4617 aiov.iov_len = count; 4618 auio.uio_iov = &aiov; 4619 auio.uio_iovcnt = 1; 4620 auio.uio_rw = UIO_READ; 4621 auio.uio_segflg = direction; 4622 auio.uio_td = td; 4623 auio.uio_resid = count; 4624 loff = auio.uio_offset = fp->f_offset; 4625 error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp); 4626 fp->f_offset = auio.uio_offset; 4627 if (error) 4628 goto done; 4629 4630 /* 4631 * WARNING! *basep may not be wide enough to accomodate the 4632 * seek offset. XXX should we hack this to return the upper 32 bits 4633 * for offsets greater then 4G? 4634 */ 4635 if (basep) { 4636 *basep = (long)loff; 4637 } 4638 *res = count - auio.uio_resid; 4639 done: 4640 fdrop(fp); 4641 return (error); 4642 } 4643 4644 /* 4645 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4646 * 4647 * Read a block of directory entries in a file system independent format. 4648 */ 4649 int 4650 sys_getdirentries(struct sysmsg *sysmsg, const struct getdirentries_args *uap) 4651 { 4652 long base; 4653 int error; 4654 4655 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4656 &sysmsg->sysmsg_result, UIO_USERSPACE); 4657 4658 if (error == 0 && uap->basep) 4659 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4660 return (error); 4661 } 4662 4663 /* 4664 * getdents_args(int fd, char *buf, size_t count) 4665 */ 4666 int 4667 sys_getdents(struct sysmsg *sysmsg, const struct getdents_args *uap) 4668 { 4669 int error; 4670 4671 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4672 &sysmsg->sysmsg_result, UIO_USERSPACE); 4673 4674 return (error); 4675 } 4676 4677 /* 4678 * Set the mode mask for creation of filesystem nodes. 4679 * 4680 * umask(int newmask) 4681 */ 4682 int 4683 sys_umask(struct sysmsg *sysmsg, const struct umask_args *uap) 4684 { 4685 struct thread *td = curthread; 4686 struct proc *p = td->td_proc; 4687 struct filedesc *fdp; 4688 4689 fdp = p->p_fd; 4690 sysmsg->sysmsg_result = fdp->fd_cmask; 4691 fdp->fd_cmask = uap->newmask & ALLPERMS; 4692 return (0); 4693 } 4694 4695 /* 4696 * revoke(char *path) 4697 * 4698 * Void all references to file by ripping underlying filesystem 4699 * away from vnode. 4700 */ 4701 int 4702 sys_revoke(struct sysmsg *sysmsg, const struct revoke_args *uap) 4703 { 4704 struct nlookupdata nd; 4705 struct vattr vattr; 4706 struct vnode *vp; 4707 struct ucred *cred; 4708 int error; 4709 4710 vp = NULL; 4711 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4712 if (error == 0) 4713 error = nlookup(&nd); 4714 if (error == 0) 4715 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4716 cred = crhold(nd.nl_cred); 4717 nlookup_done(&nd); 4718 if (error == 0) { 4719 if (error == 0) 4720 error = VOP_GETATTR(vp, &vattr); 4721 if (error == 0 && cred->cr_uid != vattr.va_uid) 4722 error = caps_priv_check(cred, SYSCAP_NOVFS_REVOKE); 4723 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4724 if (vcount(vp) > 0) 4725 error = vrevoke(vp, cred); 4726 } else if (error == 0) { 4727 error = vrevoke(vp, cred); 4728 } 4729 vrele(vp); 4730 } 4731 if (cred) 4732 crfree(cred); 4733 return (error); 4734 } 4735 4736 /* 4737 * getfh_args(char *fname, fhandle_t *fhp) 4738 * 4739 * Get (NFS) file handle 4740 * 4741 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4742 * mount. This allows nullfs mounts to be explicitly exported. 4743 * 4744 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4745 * 4746 * nullfs mounts of subdirectories are not safe. That is, it will 4747 * work, but you do not really have protection against access to 4748 * the related parent directories. 4749 */ 4750 int 4751 sys_getfh(struct sysmsg *sysmsg, const struct getfh_args *uap) 4752 { 4753 struct nlookupdata nd; 4754 fhandle_t fh; 4755 struct vnode *vp; 4756 struct mount *mp; 4757 int error; 4758 4759 /* 4760 * Must be super user 4761 */ 4762 if ((error = caps_priv_check_self(SYSCAP_RESTRICTEDROOT)) != 0) 4763 return (error); 4764 4765 vp = NULL; 4766 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4767 if (error == 0) 4768 error = nlookup(&nd); 4769 if (error == 0) 4770 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4771 mp = nd.nl_nch.mount; 4772 nlookup_done(&nd); 4773 if (error == 0) { 4774 bzero(&fh, sizeof(fh)); 4775 fh.fh_fsid = mp->mnt_stat.f_fsid; 4776 error = VFS_VPTOFH(vp, &fh.fh_fid); 4777 vput(vp); 4778 if (error == 0) 4779 error = copyout(&fh, uap->fhp, sizeof(fh)); 4780 } 4781 return (error); 4782 } 4783 4784 /* 4785 * fhopen_args(const struct fhandle *u_fhp, int flags) 4786 * 4787 * syscall for the rpc.lockd to use to translate a NFS file handle into 4788 * an open descriptor. 4789 * 4790 * WARNING: Do not remove the caps_priv_check() call or this becomes 4791 * one giant security hole. 4792 */ 4793 int 4794 sys_fhopen(struct sysmsg *sysmsg, const struct fhopen_args *uap) 4795 { 4796 struct thread *td = curthread; 4797 struct filedesc *fdp = td->td_proc->p_fd; 4798 struct mount *mp; 4799 struct vnode *vp; 4800 struct fhandle fhp; 4801 struct vattr vat; 4802 struct vattr *vap = &vat; 4803 struct flock lf; 4804 int fmode, mode, error = 0, type; 4805 struct file *nfp; 4806 struct file *fp; 4807 int indx; 4808 4809 /* 4810 * Must be super user 4811 */ 4812 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT); 4813 if (error) 4814 return (error); 4815 4816 fmode = FFLAGS(uap->flags); 4817 4818 /* 4819 * Why not allow a non-read/write open for our lockd? 4820 */ 4821 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4822 return (EINVAL); 4823 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4824 if (error) 4825 return(error); 4826 4827 /* 4828 * Find the mount point 4829 */ 4830 mp = vfs_getvfs(&fhp.fh_fsid); 4831 if (mp == NULL) { 4832 error = ESTALE; 4833 goto done2; 4834 } 4835 /* now give me my vnode, it gets returned to me locked */ 4836 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4837 if (error) 4838 goto done; 4839 /* 4840 * from now on we have to make sure not 4841 * to forget about the vnode 4842 * any error that causes an abort must vput(vp) 4843 * just set error = err and 'goto bad;'. 4844 */ 4845 4846 /* 4847 * from vn_open 4848 */ 4849 if (vp->v_type == VLNK) { 4850 error = EMLINK; 4851 goto bad; 4852 } 4853 if (vp->v_type == VSOCK) { 4854 error = EOPNOTSUPP; 4855 goto bad; 4856 } 4857 mode = 0; 4858 if (fmode & (FWRITE | O_TRUNC)) { 4859 if (vp->v_type == VDIR) { 4860 error = EISDIR; 4861 goto bad; 4862 } 4863 error = vn_writechk(vp); 4864 if (error) 4865 goto bad; 4866 mode |= VWRITE; 4867 } 4868 if (fmode & FREAD) 4869 mode |= VREAD; 4870 if (mode) { 4871 error = VOP_ACCESS(vp, mode, td->td_ucred); 4872 if (error) 4873 goto bad; 4874 } 4875 if (fmode & O_TRUNC) { 4876 vn_unlock(vp); /* XXX */ 4877 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4878 VATTR_NULL(vap); 4879 vap->va_size = 0; 4880 error = VOP_SETATTR(vp, vap, td->td_ucred); 4881 if (error) 4882 goto bad; 4883 } 4884 4885 /* 4886 * VOP_OPEN needs the file pointer so it can potentially override 4887 * it. 4888 * 4889 * WARNING! no f_nchandle will be associated when fhopen()ing a 4890 * directory. XXX 4891 */ 4892 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4893 goto bad; 4894 error = VOP_OPEN(vp, fmode, td->td_ucred, &nfp); 4895 fp = nfp; 4896 4897 if (error) { 4898 /* 4899 * setting f_ops this way prevents VOP_CLOSE from being 4900 * called or fdrop() releasing the vp from v_data. Since 4901 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4902 */ 4903 fp->f_ops = &badfileops; 4904 fp->f_data = NULL; 4905 goto bad_drop; 4906 } 4907 4908 /* 4909 * The fp is given its own reference, we still have our ref and lock. 4910 * 4911 * Assert that all regular files must be created with a VM object. 4912 */ 4913 if (vp->v_type == VREG && vp->v_object == NULL) { 4914 kprintf("fhopen: regular file did not " 4915 "have VM object: %p\n", 4916 vp); 4917 goto bad_drop; 4918 } 4919 4920 /* 4921 * The open was successful. Handle any locking requirements. 4922 */ 4923 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4924 lf.l_whence = SEEK_SET; 4925 lf.l_start = 0; 4926 lf.l_len = 0; 4927 if (fmode & O_EXLOCK) 4928 lf.l_type = F_WRLCK; 4929 else 4930 lf.l_type = F_RDLCK; 4931 if (fmode & FNONBLOCK) 4932 type = 0; 4933 else 4934 type = F_WAIT; 4935 vn_unlock(vp); 4936 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4937 &lf, type)) != 0) { 4938 /* 4939 * release our private reference. 4940 */ 4941 fsetfd(fdp, NULL, indx); 4942 fdrop(fp); 4943 vrele(vp); 4944 goto done; 4945 } 4946 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4947 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4948 } 4949 4950 /* 4951 * Clean up. Associate the file pointer with the previously 4952 * reserved descriptor and return it. 4953 */ 4954 vput(vp); 4955 if (uap->flags & O_CLOEXEC) 4956 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4957 fsetfd(fdp, fp, indx); 4958 fdrop(fp); 4959 sysmsg->sysmsg_result = indx; 4960 mount_drop(mp); 4961 4962 return (error); 4963 4964 bad_drop: 4965 fsetfd(fdp, NULL, indx); 4966 fdrop(fp); 4967 bad: 4968 vput(vp); 4969 done: 4970 mount_drop(mp); 4971 done2: 4972 return (error); 4973 } 4974 4975 /* 4976 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4977 */ 4978 int 4979 sys_fhstat(struct sysmsg *sysmsg, const struct fhstat_args *uap) 4980 { 4981 struct thread *td = curthread; 4982 struct stat sb; 4983 fhandle_t fh; 4984 struct mount *mp; 4985 struct vnode *vp; 4986 int error; 4987 4988 /* 4989 * Must be super user 4990 */ 4991 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT); 4992 if (error) 4993 return (error); 4994 4995 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4996 if (error) 4997 return (error); 4998 4999 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 5000 error = ESTALE; 5001 if (error == 0) { 5002 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 5003 error = vn_stat(vp, &sb, td->td_ucred); 5004 vput(vp); 5005 } 5006 } 5007 if (error == 0) 5008 error = copyout(&sb, uap->sb, sizeof(sb)); 5009 if (mp) 5010 mount_drop(mp); 5011 5012 return (error); 5013 } 5014 5015 /* 5016 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 5017 */ 5018 int 5019 sys_fhstatfs(struct sysmsg *sysmsg, const struct fhstatfs_args *uap) 5020 { 5021 struct thread *td = curthread; 5022 struct proc *p = td->td_proc; 5023 struct statfs *sp; 5024 struct mount *mp; 5025 struct vnode *vp; 5026 struct statfs sb; 5027 char *fullpath, *freepath; 5028 fhandle_t fh; 5029 int error; 5030 5031 /* 5032 * Must be super user 5033 */ 5034 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT); 5035 if (error) 5036 return (error); 5037 5038 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 5039 return (error); 5040 5041 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 5042 error = ESTALE; 5043 goto done; 5044 } 5045 if (p != NULL && !chroot_visible_mnt(mp, p)) { 5046 error = ESTALE; 5047 goto done; 5048 } 5049 5050 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 5051 goto done; 5052 mp = vp->v_mount; 5053 sp = &mp->mnt_stat; 5054 vput(vp); 5055 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 5056 goto done; 5057 5058 error = mount_path(p, mp, &fullpath, &freepath); 5059 if (error) 5060 goto done; 5061 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 5062 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 5063 kfree(freepath, M_TEMP); 5064 5065 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 5066 if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) { 5067 bcopy(sp, &sb, sizeof(sb)); 5068 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 5069 sp = &sb; 5070 } 5071 error = copyout(sp, uap->buf, sizeof(*sp)); 5072 done: 5073 if (mp) 5074 mount_drop(mp); 5075 5076 return (error); 5077 } 5078 5079 /* 5080 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 5081 */ 5082 int 5083 sys_fhstatvfs(struct sysmsg *sysmsg, const struct fhstatvfs_args *uap) 5084 { 5085 struct thread *td = curthread; 5086 struct proc *p = td->td_proc; 5087 struct statvfs *sp; 5088 struct mount *mp; 5089 struct vnode *vp; 5090 fhandle_t fh; 5091 int error; 5092 5093 /* 5094 * Must be super user 5095 */ 5096 if ((error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT))) 5097 return (error); 5098 5099 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 5100 return (error); 5101 5102 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 5103 error = ESTALE; 5104 goto done; 5105 } 5106 if (p != NULL && !chroot_visible_mnt(mp, p)) { 5107 error = ESTALE; 5108 goto done; 5109 } 5110 5111 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 5112 goto done; 5113 mp = vp->v_mount; 5114 sp = &mp->mnt_vstat; 5115 vput(vp); 5116 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 5117 goto done; 5118 5119 sp->f_flag = 0; 5120 if (mp->mnt_flag & MNT_RDONLY) 5121 sp->f_flag |= ST_RDONLY; 5122 if (mp->mnt_flag & MNT_NOSUID) 5123 sp->f_flag |= ST_NOSUID; 5124 error = copyout(sp, uap->buf, sizeof(*sp)); 5125 done: 5126 if (mp) 5127 mount_drop(mp); 5128 return (error); 5129 } 5130 5131 5132 /* 5133 * Syscall to push extended attribute configuration information into the 5134 * VFS. Accepts a path, which it converts to a mountpoint, as well as 5135 * a command (int cmd), and attribute name and misc data. For now, the 5136 * attribute name is left in userspace for consumption by the VFS_op. 5137 * It will probably be changed to be copied into sysspace by the 5138 * syscall in the future, once issues with various consumers of the 5139 * attribute code have raised their hands. 5140 * 5141 * Currently this is used only by UFS Extended Attributes. 5142 */ 5143 int 5144 sys_extattrctl(struct sysmsg *sysmsg, const struct extattrctl_args *uap) 5145 { 5146 struct nlookupdata nd; 5147 struct vnode *vp; 5148 char attrname[EXTATTR_MAXNAMELEN]; 5149 int error; 5150 size_t size; 5151 5152 attrname[0] = 0; 5153 vp = NULL; 5154 error = 0; 5155 5156 if (error == 0 && uap->filename) { 5157 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 5158 NLC_FOLLOW); 5159 if (error == 0) 5160 error = nlookup(&nd); 5161 if (error == 0) 5162 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 5163 nlookup_done(&nd); 5164 } 5165 5166 if (error == 0 && uap->attrname) { 5167 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 5168 &size); 5169 } 5170 5171 if (error == 0) { 5172 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5173 if (error == 0) 5174 error = nlookup(&nd); 5175 if (error == 0) 5176 error = ncp_writechk(&nd.nl_nch); 5177 if (error == 0) { 5178 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 5179 uap->attrnamespace, 5180 uap->attrname, nd.nl_cred); 5181 } 5182 nlookup_done(&nd); 5183 } 5184 5185 return (error); 5186 } 5187 5188 /* 5189 * Syscall to get a named extended attribute on a file or directory. 5190 */ 5191 int 5192 sys_extattr_set_file(struct sysmsg *sysmsg, 5193 const struct extattr_set_file_args *uap) 5194 { 5195 char attrname[EXTATTR_MAXNAMELEN]; 5196 struct nlookupdata nd; 5197 struct vnode *vp; 5198 struct uio auio; 5199 struct iovec aiov; 5200 int error; 5201 5202 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5203 if (error) 5204 return (error); 5205 5206 vp = NULL; 5207 5208 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5209 if (error == 0) 5210 error = nlookup(&nd); 5211 if (error == 0) 5212 error = ncp_writechk(&nd.nl_nch); 5213 if (error == 0) 5214 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5215 if (error) { 5216 nlookup_done(&nd); 5217 return (error); 5218 } 5219 5220 bzero(&auio, sizeof(auio)); 5221 aiov.iov_base = uap->data; 5222 aiov.iov_len = uap->nbytes; 5223 auio.uio_iov = &aiov; 5224 auio.uio_iovcnt = 1; 5225 auio.uio_offset = 0; 5226 auio.uio_resid = uap->nbytes; 5227 auio.uio_rw = UIO_WRITE; 5228 auio.uio_td = curthread; 5229 5230 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5231 &auio, nd.nl_cred); 5232 5233 vput(vp); 5234 nlookup_done(&nd); 5235 return (error); 5236 } 5237 5238 /* 5239 * Syscall to get a named extended attribute on a file or directory. 5240 */ 5241 int 5242 sys_extattr_get_file(struct sysmsg *sysmsg, 5243 const struct extattr_get_file_args *uap) 5244 { 5245 char attrname[EXTATTR_MAXNAMELEN]; 5246 struct nlookupdata nd; 5247 struct uio auio; 5248 struct iovec aiov; 5249 struct vnode *vp; 5250 int error; 5251 5252 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5253 if (error) 5254 return (error); 5255 5256 vp = NULL; 5257 5258 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5259 if (error == 0) 5260 error = nlookup(&nd); 5261 if (error == 0) 5262 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5263 if (error) { 5264 nlookup_done(&nd); 5265 return (error); 5266 } 5267 5268 bzero(&auio, sizeof(auio)); 5269 aiov.iov_base = uap->data; 5270 aiov.iov_len = uap->nbytes; 5271 auio.uio_iov = &aiov; 5272 auio.uio_iovcnt = 1; 5273 auio.uio_offset = 0; 5274 auio.uio_resid = uap->nbytes; 5275 auio.uio_rw = UIO_READ; 5276 auio.uio_td = curthread; 5277 5278 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5279 &auio, nd.nl_cred); 5280 sysmsg->sysmsg_result = uap->nbytes - auio.uio_resid; 5281 5282 vput(vp); 5283 nlookup_done(&nd); 5284 return(error); 5285 } 5286 5287 /* 5288 * Syscall to delete a named extended attribute from a file or directory. 5289 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5290 */ 5291 int 5292 sys_extattr_delete_file(struct sysmsg *sysmsg, 5293 const struct extattr_delete_file_args *uap) 5294 { 5295 char attrname[EXTATTR_MAXNAMELEN]; 5296 struct nlookupdata nd; 5297 struct vnode *vp; 5298 int error; 5299 5300 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5301 if (error) 5302 return(error); 5303 5304 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5305 if (error == 0) 5306 error = nlookup(&nd); 5307 if (error == 0) 5308 error = ncp_writechk(&nd.nl_nch); 5309 if (error == 0) { 5310 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5311 if (error == 0) { 5312 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5313 attrname, NULL, nd.nl_cred); 5314 vput(vp); 5315 } 5316 } 5317 nlookup_done(&nd); 5318 return(error); 5319 } 5320 5321 /* 5322 * Determine if the mount is visible to the process. 5323 */ 5324 static int 5325 chroot_visible_mnt(struct mount *mp, struct proc *p) 5326 { 5327 struct nchandle nch; 5328 5329 /* 5330 * Traverse from the mount point upwards. If we hit the process 5331 * root then the mount point is visible to the process. 5332 */ 5333 nch = mp->mnt_ncmountpt; 5334 while (nch.ncp) { 5335 if (nch.mount == p->p_fd->fd_nrdir.mount && 5336 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5337 return(1); 5338 } 5339 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5340 nch = nch.mount->mnt_ncmounton; 5341 } else { 5342 nch.ncp = nch.ncp->nc_parent; 5343 } 5344 } 5345 5346 /* 5347 * If the mount point is not visible to the process, but the 5348 * process root is in a subdirectory of the mount, return 5349 * TRUE anyway. 5350 */ 5351 if (p->p_fd->fd_nrdir.mount == mp) 5352 return(1); 5353 5354 return(0); 5355 } 5356 5357 /* 5358 * Return the appropriate system capability restriction. 5359 */ 5360 static int 5361 get_fscap(const char *fsname) 5362 { 5363 5364 if (strncmp("null", fsname, 5) == 0) { 5365 return SYSCAP_NOMOUNT_NULLFS; 5366 } else if (strncmp(fsname, "devfs", 6) == 0) { 5367 return SYSCAP_NOMOUNT_DEVFS; 5368 } else if (strncmp(fsname, "procfs", 7) == 0) { 5369 return SYSCAP_NOMOUNT_PROCFS; 5370 } else if (strncmp(fsname, "tmpfs", 6) == 0) { 5371 return SYSCAP_NOMOUNT_TMPFS; 5372 } else if (strncmp(fsname, "fusefs", 7) == 0) { 5373 return SYSCAP_NOMOUNT_FUSE; 5374 } 5375 return SYSCAP_RESTRICTEDROOT; 5376 } 5377 5378 int 5379 sys___realpath(struct sysmsg *sysmsg, const struct __realpath_args *uap) 5380 { 5381 struct nlookupdata nd; 5382 char *rbuf; 5383 char *fbuf; 5384 ssize_t rlen; 5385 int error; 5386 5387 /* 5388 * Invalid length if less than 0. 0 is allowed 5389 */ 5390 if ((ssize_t)uap->len < 0) 5391 return EINVAL; 5392 5393 rbuf = NULL; 5394 fbuf = NULL; 5395 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5396 if (error) 5397 goto done; 5398 5399 nd.nl_flags |= NLC_SHAREDLOCK; 5400 error = nlookup(&nd); 5401 if (error) 5402 goto done; 5403 5404 if (nd.nl_nch.ncp->nc_vp == NULL) { 5405 error = ENOENT; 5406 goto done; 5407 } 5408 5409 /* 5410 * Shortcut test for existence. 5411 */ 5412 if (uap->len == 0) { 5413 error = ENAMETOOLONG; 5414 goto done; 5415 } 5416 5417 /* 5418 * Obtain the path relative to the process root. The nch must not 5419 * be locked for the cache_fullpath() call. 5420 */ 5421 if (nd.nl_flags & NLC_NCPISLOCKED) { 5422 nd.nl_flags &= ~NLC_NCPISLOCKED; 5423 cache_unlock(&nd.nl_nch); 5424 } 5425 error = cache_fullpath(curproc, &nd.nl_nch, NULL, &rbuf, &fbuf, 0); 5426 if (error) 5427 goto done; 5428 5429 rlen = (ssize_t)strlen(rbuf); 5430 if (rlen >= uap->len) { 5431 error = ENAMETOOLONG; 5432 goto done; 5433 } 5434 error = copyout(rbuf, uap->buf, rlen + 1); 5435 if (error == 0) 5436 sysmsg->sysmsg_szresult = rlen; 5437 done: 5438 nlookup_done(&nd); 5439 if (fbuf) 5440 kfree(fbuf, M_TEMP); 5441 5442 return error; 5443 } 5444 5445 int 5446 sys_posix_fallocate(struct sysmsg *sysmsg, const struct posix_fallocate_args *uap) 5447 { 5448 return (kern_posix_fallocate(uap->fd, uap->offset, uap->len)); 5449 } 5450 5451 int 5452 kern_posix_fallocate(int fd, off_t offset, off_t len) 5453 { 5454 struct thread *td = curthread; 5455 struct vnode *vp; 5456 struct file *fp; 5457 int error; 5458 5459 if (offset < 0 || len <= 0) 5460 return (EINVAL); 5461 /* Check for wrap. */ 5462 if (offset > OFF_MAX - len) 5463 return (EFBIG); 5464 5465 fp = holdfp(td, fd, -1); 5466 if (fp == NULL) 5467 return (EBADF); 5468 5469 switch (fp->f_type) { 5470 case DTYPE_VNODE: 5471 break; 5472 case DTYPE_PIPE: 5473 case DTYPE_FIFO: 5474 error = ESPIPE; 5475 goto out; 5476 default: 5477 error = ENODEV; 5478 goto out; 5479 } 5480 5481 if ((fp->f_flag & FWRITE) == 0) { 5482 error = EBADF; 5483 goto out; 5484 } 5485 5486 vp = fp->f_data; 5487 if (vp->v_type != VREG) { 5488 error = ENODEV; 5489 goto out; 5490 } 5491 5492 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 5493 error = VOP_ALLOCATE(vp, offset, len); 5494 vn_unlock(vp); 5495 out: 5496 dropfp(td, fd, fp); 5497 return (error); 5498 } 5499