1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysmsg.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/caps.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int get_fscap(const char *); 84 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 85 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 86 static int getutimes (struct timeval *, struct timespec *); 87 static int getutimens (const struct timespec *, struct timespec *, int *); 88 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, u_long); 91 static int setutimes (struct vnode *, struct vattr *, 92 const struct timespec *, int); 93 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 96 "Allow non-root users to mount filesystems"); 97 98 static int debug_unmount = 0; /* if 1 loop until unmount success */ 99 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0, 100 "Stall failed unmounts in loop"); 101 102 static struct krate krate_rename = { 1 }; 103 104 /* 105 * Virtual File System System Calls 106 */ 107 108 /* 109 * Mount a file system. 110 * 111 * mount_args(char *type, char *path, int flags, caddr_t data) 112 * 113 * MPALMOSTSAFE 114 */ 115 int 116 sys_mount(struct sysmsg *sysmsg, const struct mount_args *uap) 117 { 118 struct thread *td = curthread; 119 struct vnode *vp; 120 struct nchandle nch; 121 struct mount *mp, *nullmp; 122 struct vfsconf *vfsp; 123 int error, flag = 0, flag2 = 0; 124 int hasmount; 125 int priv = 0; 126 int flags = uap->flags; 127 struct vattr va; 128 struct nlookupdata nd; 129 char fstypename[MFSNAMELEN]; 130 struct ucred *cred; 131 132 cred = td->td_ucred; 133 134 /* We do not allow user mounts inside a jail for now */ 135 if (usermount && jailed(cred)) { 136 error = EPERM; 137 goto done; 138 } 139 140 /* 141 * Extract the file system type. We need to know this early, to take 142 * appropriate actions for jails and the filesystems to mount. 143 */ 144 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) 145 goto done; 146 147 /* 148 * Select the correct cap according to the file system type. 149 */ 150 priv = get_fscap(fstypename); 151 152 if (usermount == 0 && (error = caps_priv_check_td(td, priv))) 153 goto done; 154 155 /* 156 * Do not allow NFS export by non-root users. 157 */ 158 if (flags & MNT_EXPORTED) { 159 error = caps_priv_check_td(td, priv); 160 if (error) 161 goto done; 162 } 163 /* 164 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 165 */ 166 if (caps_priv_check_td(td, priv)) 167 flags |= MNT_NOSUID | MNT_NODEV; 168 169 /* 170 * Lookup the requested path and extract the nch and vnode. 171 */ 172 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 173 if (error == 0) { 174 if ((error = nlookup(&nd)) == 0) { 175 if (nd.nl_nch.ncp->nc_vp == NULL) 176 error = ENOENT; 177 } 178 } 179 if (error) { 180 nlookup_done(&nd); 181 goto done; 182 } 183 184 /* 185 * If the target filesystem is resolved via a nullfs mount, then 186 * nd.nl_nch.mount will be pointing to the nullfs mount structure 187 * instead of the target file system. We need it in case we are 188 * doing an update. 189 */ 190 nullmp = nd.nl_nch.mount; 191 192 /* 193 * Extract the locked+refd ncp and cleanup the nd structure 194 */ 195 nch = nd.nl_nch; 196 cache_zero(&nd.nl_nch); 197 nlookup_done(&nd); 198 199 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 200 (mp = cache_findmount(&nch)) != NULL) { 201 cache_dropmount(mp); 202 hasmount = 1; 203 } else { 204 hasmount = 0; 205 } 206 207 208 /* 209 * now we have the locked ref'd nch and unreferenced vnode. 210 */ 211 vp = nch.ncp->nc_vp; 212 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 213 cache_put(&nch); 214 goto done; 215 } 216 cache_unlock(&nch); 217 218 /* 219 * Now we have an unlocked ref'd nch and a locked ref'd vp 220 */ 221 if (flags & MNT_UPDATE) { 222 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 223 cache_drop(&nch); 224 vput(vp); 225 error = EINVAL; 226 goto done; 227 } 228 229 if (strncmp(fstypename, "null", 5) == 0) { 230 KKASSERT(nullmp); 231 mp = nullmp; 232 } else { 233 mp = vp->v_mount; 234 } 235 236 flag = mp->mnt_flag; 237 flag2 = mp->mnt_kern_flag; 238 /* 239 * We only allow the filesystem to be reloaded if it 240 * is currently mounted read-only. 241 */ 242 if ((flags & MNT_RELOAD) && 243 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 244 cache_drop(&nch); 245 vput(vp); 246 error = EOPNOTSUPP; /* Needs translation */ 247 goto done; 248 } 249 /* 250 * Only root, or the user that did the original mount is 251 * permitted to update it. 252 */ 253 if (mp->mnt_stat.f_owner != cred->cr_uid && 254 (error = caps_priv_check_td(td, priv))) { 255 cache_drop(&nch); 256 vput(vp); 257 goto done; 258 } 259 if (vfs_busy(mp, LK_NOWAIT)) { 260 cache_drop(&nch); 261 vput(vp); 262 error = EBUSY; 263 goto done; 264 } 265 if (hasmount) { 266 cache_drop(&nch); 267 vfs_unbusy(mp); 268 vput(vp); 269 error = EBUSY; 270 goto done; 271 } 272 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 273 lwkt_gettoken(&mp->mnt_token); 274 vn_unlock(vp); 275 vfsp = mp->mnt_vfc; 276 goto update; 277 } 278 279 /* 280 * If the user is not root, ensure that they own the directory 281 * onto which we are attempting to mount. 282 */ 283 if ((error = VOP_GETATTR(vp, &va)) || 284 (va.va_uid != cred->cr_uid && 285 (error = caps_priv_check_td(td, priv)))) { 286 cache_drop(&nch); 287 vput(vp); 288 goto done; 289 } 290 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 291 cache_drop(&nch); 292 vput(vp); 293 goto done; 294 } 295 if (vp->v_type != VDIR) { 296 cache_drop(&nch); 297 vput(vp); 298 error = ENOTDIR; 299 goto done; 300 } 301 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 302 cache_drop(&nch); 303 vput(vp); 304 error = EPERM; 305 goto done; 306 } 307 vfsp = vfsconf_find_by_name(fstypename); 308 if (vfsp == NULL) { 309 linker_file_t lf; 310 311 /* Only load modules for root (very important!) */ 312 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT); 313 if (error) { 314 cache_drop(&nch); 315 vput(vp); 316 goto done; 317 } 318 error = linker_load_file(fstypename, &lf); 319 if (error || lf == NULL) { 320 cache_drop(&nch); 321 vput(vp); 322 if (lf == NULL) 323 error = ENODEV; 324 goto done; 325 } 326 lf->userrefs++; 327 /* lookup again, see if the VFS was loaded */ 328 vfsp = vfsconf_find_by_name(fstypename); 329 if (vfsp == NULL) { 330 lf->userrefs--; 331 linker_file_unload(lf); 332 cache_drop(&nch); 333 vput(vp); 334 error = ENODEV; 335 goto done; 336 } 337 } 338 if (hasmount) { 339 cache_drop(&nch); 340 vput(vp); 341 error = EBUSY; 342 goto done; 343 } 344 345 /* 346 * Allocate and initialize the filesystem. 347 */ 348 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 349 mount_init(mp, vfsp->vfc_vfsops); 350 vfs_busy(mp, LK_NOWAIT); 351 mp->mnt_vfc = vfsp; 352 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 353 vfsp->vfc_refcount++; 354 mp->mnt_stat.f_type = vfsp->vfc_typenum; 355 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 356 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 357 mp->mnt_stat.f_owner = cred->cr_uid; 358 lwkt_gettoken(&mp->mnt_token); 359 vn_unlock(vp); 360 update: 361 /* 362 * (per-mount token acquired at this point) 363 * 364 * Set the mount level flags. 365 */ 366 if (flags & MNT_RDONLY) 367 mp->mnt_flag |= MNT_RDONLY; 368 else if (mp->mnt_flag & MNT_RDONLY) 369 mp->mnt_kern_flag |= MNTK_WANTRDWR; 370 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 371 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 372 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 373 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 374 MNT_AUTOMOUNTED); 375 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | 376 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 377 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 378 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 379 MNT_AUTOMOUNTED); 380 381 /* 382 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 383 * This way the initial VFS_MOUNT() call will also be MPSAFE. 384 */ 385 if (vfsp->vfc_flags & VFCF_MPSAFE) 386 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 387 388 /* 389 * Mount the filesystem. 390 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 391 * get. 392 */ 393 if (mp->mnt_flag & MNT_UPDATE) { 394 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 395 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 396 mp->mnt_flag &= ~MNT_RDONLY; 397 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 398 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 399 if (error) { 400 mp->mnt_flag = flag; 401 mp->mnt_kern_flag = flag2; 402 } 403 lwkt_reltoken(&mp->mnt_token); 404 vfs_unbusy(mp); 405 vrele(vp); 406 cache_drop(&nch); 407 goto done; 408 } 409 mp->mnt_ncmounton = nch; 410 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 411 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 412 413 /* 414 * Put the new filesystem on the mount list after root. The mount 415 * point gets its own mnt_ncmountpt (unless the VFS already set one 416 * up) which represents the root of the mount. The lookup code 417 * detects the mount point going forward and checks the root of 418 * the mount going backwards. 419 * 420 * It is not necessary to invalidate or purge the vnode underneath 421 * because elements under the mount will be given their own glue 422 * namecache record. 423 */ 424 if (!error) { 425 if (mp->mnt_ncmountpt.ncp == NULL) { 426 /* 427 * Allocate, then unlock, but leave the ref intact. 428 * This is the mnt_refs (1) that we will retain 429 * through to the unmount. 430 */ 431 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 432 cache_unlock(&mp->mnt_ncmountpt); 433 } 434 vn_unlock(vp); 435 cache_lock(&nch); 436 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 437 cache_unlock(&nch); 438 cache_ismounting(mp); 439 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 440 441 mountlist_insert(mp, MNTINS_LAST); 442 vn_unlock(vp); 443 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 444 error = vfs_allocate_syncvnode(mp); 445 lwkt_reltoken(&mp->mnt_token); 446 vfs_unbusy(mp); 447 error = VFS_START(mp, 0); 448 vrele(vp); 449 KNOTE(&fs_klist, VQ_MOUNT); 450 } else { 451 bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton)); 452 vn_syncer_thr_stop(mp); 453 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 454 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 455 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 456 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 457 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 458 if (mp->mnt_cred) { 459 crfree(mp->mnt_cred); 460 mp->mnt_cred = NULL; 461 } 462 mp->mnt_vfc->vfc_refcount--; 463 lwkt_reltoken(&mp->mnt_token); 464 vfs_unbusy(mp); 465 kfree(mp, M_MOUNT); 466 cache_drop(&nch); 467 vput(vp); 468 } 469 done: 470 return (error); 471 } 472 473 /* 474 * Scan all active processes to see if any of them have a current 475 * or root directory onto which the new filesystem has just been 476 * mounted. If so, replace them with the new mount point. 477 * 478 * Both old_nch and new_nch are ref'd on call but not locked. 479 * new_nch must be temporarily locked so it can be associated with the 480 * vnode representing the root of the mount point. 481 */ 482 struct checkdirs_info { 483 struct nchandle old_nch; 484 struct nchandle new_nch; 485 struct vnode *old_vp; 486 struct vnode *new_vp; 487 }; 488 489 static int checkdirs_callback(struct proc *p, void *data); 490 491 static void 492 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 493 { 494 struct checkdirs_info info; 495 struct vnode *olddp; 496 struct vnode *newdp; 497 struct mount *mp; 498 499 /* 500 * If the old mount point's vnode has a usecount of 1, it is not 501 * being held as a descriptor anywhere. 502 */ 503 olddp = old_nch->ncp->nc_vp; 504 if (olddp == NULL || VREFCNT(olddp) == 1) 505 return; 506 507 /* 508 * Force the root vnode of the new mount point to be resolved 509 * so we can update any matching processes. 510 */ 511 mp = new_nch->mount; 512 if (VFS_ROOT(mp, &newdp)) 513 panic("mount: lost mount"); 514 vn_unlock(newdp); 515 cache_lock(new_nch); 516 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 517 cache_setunresolved(new_nch); 518 cache_setvp(new_nch, newdp); 519 cache_unlock(new_nch); 520 521 /* 522 * Special handling of the root node 523 */ 524 if (rootvnode == olddp) { 525 vref(newdp); 526 vfs_cache_setroot(newdp, cache_hold(new_nch)); 527 } 528 529 /* 530 * Pass newdp separately so the callback does not have to access 531 * it via new_nch->ncp->nc_vp. 532 */ 533 info.old_nch = *old_nch; 534 info.new_nch = *new_nch; 535 info.new_vp = newdp; 536 allproc_scan(checkdirs_callback, &info, 0); 537 vput(newdp); 538 } 539 540 /* 541 * NOTE: callback is not MP safe because the scanned process's filedesc 542 * structure can be ripped out from under us, amoung other things. 543 */ 544 static int 545 checkdirs_callback(struct proc *p, void *data) 546 { 547 struct checkdirs_info *info = data; 548 struct filedesc *fdp; 549 struct nchandle ncdrop1; 550 struct nchandle ncdrop2; 551 struct vnode *vprele1; 552 struct vnode *vprele2; 553 554 if ((fdp = p->p_fd) != NULL) { 555 cache_zero(&ncdrop1); 556 cache_zero(&ncdrop2); 557 vprele1 = NULL; 558 vprele2 = NULL; 559 560 /* 561 * MPUNSAFE - XXX fdp can be pulled out from under a 562 * foreign process. 563 * 564 * A shared filedesc is ok, we don't have to copy it 565 * because we are making this change globally. 566 */ 567 spin_lock(&fdp->fd_spin); 568 if (fdp->fd_ncdir.mount == info->old_nch.mount && 569 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 570 vprele1 = fdp->fd_cdir; 571 vref(info->new_vp); 572 fdp->fd_cdir = info->new_vp; 573 ncdrop1 = fdp->fd_ncdir; 574 cache_copy(&info->new_nch, &fdp->fd_ncdir); 575 } 576 if (fdp->fd_nrdir.mount == info->old_nch.mount && 577 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 578 vprele2 = fdp->fd_rdir; 579 vref(info->new_vp); 580 fdp->fd_rdir = info->new_vp; 581 ncdrop2 = fdp->fd_nrdir; 582 cache_copy(&info->new_nch, &fdp->fd_nrdir); 583 } 584 spin_unlock(&fdp->fd_spin); 585 if (ncdrop1.ncp) 586 cache_drop(&ncdrop1); 587 if (ncdrop2.ncp) 588 cache_drop(&ncdrop2); 589 if (vprele1) 590 vrele(vprele1); 591 if (vprele2) 592 vrele(vprele2); 593 } 594 return(0); 595 } 596 597 /* 598 * Unmount a file system. 599 * 600 * Note: unmount takes a path to the vnode mounted on as argument, 601 * not special file (as before). 602 * 603 * umount_args(char *path, int flags) 604 * 605 * MPALMOSTSAFE 606 */ 607 int 608 sys_unmount(struct sysmsg *sysmsg, const struct unmount_args *uap) 609 { 610 struct thread *td = curthread; 611 struct proc *p __debugvar = td->td_proc; 612 struct mount *mp = NULL; 613 struct nlookupdata nd; 614 char fstypename[MFSNAMELEN]; 615 int priv = 0; 616 int error; 617 struct ucred *cred; 618 619 cred = td->td_ucred; 620 621 KKASSERT(p); 622 623 /* We do not allow user umounts inside a jail for now */ 624 if (usermount && jailed(cred)) { 625 error = EPERM; 626 goto done; 627 } 628 629 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 630 NLC_FOLLOW | NLC_IGNBADDIR); 631 if (error == 0) 632 error = nlookup(&nd); 633 if (error) 634 goto out; 635 636 mp = nd.nl_nch.mount; 637 638 /* Figure out the fsname in order to select proper privs */ 639 ksnprintf(fstypename, MFSNAMELEN, "%s", mp->mnt_vfc->vfc_name); 640 priv = get_fscap(fstypename); 641 642 if (usermount == 0 && (error = caps_priv_check_td(td, priv))) { 643 nlookup_done(&nd); 644 goto done; 645 } 646 647 /* 648 * Only root, or the user that did the original mount is 649 * permitted to unmount this filesystem. 650 */ 651 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 652 (error = caps_priv_check_td(td, priv))) 653 { 654 goto out; 655 } 656 657 /* 658 * Don't allow unmounting the root file system. 659 */ 660 if (mp->mnt_flag & MNT_ROOTFS) { 661 error = EINVAL; 662 goto out; 663 } 664 665 /* 666 * Must be the root of the filesystem 667 */ 668 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 669 error = EINVAL; 670 goto out; 671 } 672 673 /* Check if this mount belongs to this prison */ 674 if (jailed(cred) && mp->mnt_cred && (!mp->mnt_cred->cr_prison || 675 mp->mnt_cred->cr_prison != cred->cr_prison)) { 676 kprintf("mountpoint %s does not belong to this jail\n", 677 uap->path); 678 error = EPERM; 679 goto out; 680 } 681 682 /* 683 * If no error try to issue the unmount. We lose our cache 684 * ref when we call nlookup_done so we must hold the mount point 685 * to prevent use-after-free races. 686 */ 687 out: 688 if (error == 0) { 689 mount_hold(mp); 690 nlookup_done(&nd); 691 error = dounmount(mp, uap->flags, 0); 692 mount_drop(mp); 693 } else { 694 nlookup_done(&nd); 695 } 696 done: 697 return (error); 698 } 699 700 /* 701 * Do the actual file system unmount (interlocked against the mountlist 702 * token and mp->mnt_token). 703 */ 704 static int 705 dounmount_interlock(struct mount *mp) 706 { 707 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 708 return (EBUSY); 709 mp->mnt_kern_flag |= MNTK_UNMOUNT; 710 return(0); 711 } 712 713 /* 714 * Returns non-zero if the specified process uses the specified 715 * mount point. 716 */ 717 static int 718 process_uses_mount(struct proc *p, struct mount *mp) 719 { 720 struct filedesc *fdp; 721 struct file *fp; 722 int found; 723 int n; 724 725 fdp = p->p_fd; 726 if (fdp == NULL) 727 return 0; 728 if (fdp->fd_ncdir.mount == mp || 729 fdp->fd_nrdir.mount == mp || 730 fdp->fd_njdir.mount == mp) 731 { 732 return 1; 733 } 734 735 found = 0; 736 spin_lock_shared(&fdp->fd_spin); 737 for (n = 0; n < fdp->fd_nfiles; ++n) { 738 fp = fdp->fd_files[n].fp; 739 if (fp && fp->f_nchandle.mount == mp) { 740 found = 1; 741 break; 742 } 743 } 744 spin_unlock_shared(&fdp->fd_spin); 745 746 return found; 747 } 748 749 /* 750 * Cleanup processes that have references to the mount point 751 * being force-unmounted. 752 */ 753 struct unmount_allproc_info { 754 struct mount *mp; 755 int sig; 756 }; 757 758 static int 759 unmount_allproc_cb(struct proc *p, void *arg) 760 { 761 struct unmount_allproc_info *info; 762 struct mount *mp; 763 764 info = arg; 765 mp = info->mp; 766 767 if (p->p_textnch.mount == mp) 768 cache_drop(&p->p_textnch); 769 if (info->sig && process_uses_mount(p, mp)) 770 ksignal(p, info->sig); 771 772 return 0; 773 } 774 775 /* 776 * The guts of the unmount code. The mount owns one ref and one hold 777 * count. If we successfully interlock the unmount, those refs are ours. 778 * (The ref is from mnt_ncmountpt). 779 * 780 * When halting we shortcut certain mount types such as devfs by not actually 781 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 782 * from the mountlist so higher-level filesytems can unmount cleanly. 783 * 784 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 785 */ 786 int 787 dounmount(struct mount *mp, int flags, int halting) 788 { 789 struct namecache *ncp; 790 struct nchandle nch; 791 struct vnode *vp; 792 int error; 793 int async_flag; 794 int lflags; 795 int freeok = 1; 796 int hadsyncer = 0; 797 int retry; 798 int quickhalt; 799 800 lwkt_gettoken(&mp->mnt_token); 801 802 /* 803 * When halting, certain mount points can essentially just 804 * be unhooked and otherwise ignored. 805 */ 806 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 807 quickhalt = 1; 808 freeok = 0; 809 } else { 810 quickhalt = 0; 811 } 812 813 814 /* 815 * Exclusive access for unmounting purposes. 816 */ 817 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 818 goto out; 819 820 /* 821 * We now 'own' the last mp->mnt_refs 822 * 823 * Allow filesystems to detect that a forced unmount is in progress. 824 */ 825 if (flags & MNT_FORCE) 826 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 827 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 828 error = lockmgr(&mp->mnt_lock, lflags); 829 if (error) { 830 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 831 if (mp->mnt_kern_flag & MNTK_MWAIT) { 832 mp->mnt_kern_flag &= ~MNTK_MWAIT; 833 wakeup(mp); 834 } 835 goto out; 836 } 837 838 if (mp->mnt_flag & MNT_EXPUBLIC) 839 vfs_setpublicfs(NULL, NULL, NULL); 840 841 vfs_msync(mp, MNT_WAIT); 842 async_flag = mp->mnt_flag & MNT_ASYNC; 843 mp->mnt_flag &=~ MNT_ASYNC; 844 845 /* 846 * Decomission our special mnt_syncer vnode. This also stops 847 * the vnlru code. If we are unable to unmount we recommission 848 * the vnode. 849 * 850 * Then sync the filesystem. 851 */ 852 if ((vp = mp->mnt_syncer) != NULL) { 853 mp->mnt_syncer = NULL; 854 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 855 vrele(vp); 856 hadsyncer = 1; 857 } 858 859 /* 860 * Sync normally-mounted filesystem. 861 */ 862 if (quickhalt == 0) { 863 if ((mp->mnt_flag & MNT_RDONLY) == 0) 864 VFS_SYNC(mp, MNT_WAIT); 865 } 866 867 /* 868 * nchandle records ref the mount structure. Expect a count of 1 869 * (our mount->mnt_ncmountpt). 870 * 871 * Scans can get temporary refs on a mountpoint (thought really 872 * heavy duty stuff like cache_findmount() do not). 873 */ 874 for (retry = 0; (retry < 10 || debug_unmount); ++retry) { 875 /* 876 * Invalidate the namecache topology under the mount. 877 * nullfs mounts alias a real mount's namecache topology 878 * and it should not be invalidated in that case. 879 */ 880 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 881 cache_lock(&mp->mnt_ncmountpt); 882 cache_inval(&mp->mnt_ncmountpt, 883 CINV_DESTROY|CINV_CHILDREN); 884 cache_unlock(&mp->mnt_ncmountpt); 885 } 886 887 /* 888 * Clear pcpu caches 889 */ 890 cache_unmounting(mp); 891 if (mp->mnt_refs != 1) 892 cache_clearmntcache(mp); 893 894 /* 895 * Break out if we are good. Don't count ncp refs if the 896 * mount is aliased. 897 */ 898 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 899 NULL : mp->mnt_ncmountpt.ncp; 900 if (mp->mnt_refs == 1 && 901 (ncp == NULL || (ncp->nc_refs == 1 && 902 TAILQ_FIRST(&ncp->nc_list) == NULL))) { 903 break; 904 } 905 906 /* 907 * If forcing the unmount, clean out any p->p_textnch 908 * nchandles that match this mount. 909 * 910 * In addition any process which has a current, root, or 911 * jail directory matching the mount, or which has an open 912 * descriptor matching the mount, will be killed. We first 913 * try SIGKILL, and if that doesn't work we issue SIGQUIT. 914 */ 915 if (flags & MNT_FORCE) { 916 struct unmount_allproc_info info; 917 918 info.mp = mp; 919 switch(retry) { 920 case 3: 921 info.sig = SIGINT; 922 break; 923 case 7: 924 info.sig = SIGKILL; 925 break; 926 default: 927 info.sig = 0; 928 break; 929 } 930 allproc_scan(&unmount_allproc_cb, &info, 0); 931 } 932 933 /* 934 * Sleep and retry. 935 */ 936 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 4 + 1); 937 if (debug_unmount && (retry & 15) == 15) { 938 mount_warning(mp, 939 "(%p) debug - retry %d, " 940 "%d namecache refs, %d mount refs", 941 mp, retry, 942 (ncp ? ncp->nc_refs - 1 : 0), 943 mp->mnt_refs - 1); 944 } 945 } 946 if (retry == 10) { 947 mount_warning(mp, 948 "forced umount of \"%s\" - " 949 "%d namecache refs, %d mount refs", 950 (mp->mnt_ncmountpt.ncp ? 951 mp->mnt_ncmountpt.ncp->nc_name : "?"), 952 (ncp ? ncp->nc_refs - 1 : 0), 953 mp->mnt_refs - 1); 954 } 955 956 error = 0; 957 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 958 NULL : mp->mnt_ncmountpt.ncp; 959 if (mp->mnt_refs != 1 || 960 (ncp != NULL && (ncp->nc_refs != 1 || 961 TAILQ_FIRST(&ncp->nc_list)))) { 962 mount_warning(mp, 963 "(%p): %d namecache refs, %d mount refs " 964 "still present", 965 mp, 966 (ncp ? ncp->nc_refs - 1 : 0), 967 mp->mnt_refs - 1); 968 if (flags & MNT_FORCE) { 969 freeok = 0; 970 mount_warning(mp, "forcing unmount\n"); 971 } else { 972 error = EBUSY; 973 } 974 } 975 976 /* 977 * So far so good, sync the filesystem once more and 978 * call the VFS unmount code if the sync succeeds. 979 */ 980 if (error == 0 && quickhalt == 0) { 981 if (mp->mnt_flag & MNT_RDONLY) { 982 error = VFS_UNMOUNT(mp, flags); 983 } else { 984 error = VFS_SYNC(mp, MNT_WAIT); 985 if (error == 0 || /* no error */ 986 error == EOPNOTSUPP || /* no sync avail */ 987 (flags & MNT_FORCE)) { /* force anyway */ 988 error = VFS_UNMOUNT(mp, flags); 989 } 990 } 991 if (error) { 992 mount_warning(mp, 993 "(%p) unmount: vfs refused to unmount, " 994 "error %d", 995 mp, error); 996 } 997 } 998 999 /* 1000 * If an error occurred we can still recover, restoring the 1001 * syncer vnode and misc flags. 1002 */ 1003 if (error) { 1004 if (mp->mnt_syncer == NULL && hadsyncer) 1005 vfs_allocate_syncvnode(mp); 1006 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 1007 mp->mnt_flag |= async_flag; 1008 lockmgr(&mp->mnt_lock, LK_RELEASE); 1009 if (mp->mnt_kern_flag & MNTK_MWAIT) { 1010 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1011 wakeup(mp); 1012 } 1013 goto out; 1014 } 1015 /* 1016 * Clean up any journals still associated with the mount after 1017 * filesystem activity has ceased. 1018 */ 1019 journal_remove_all_journals(mp, 1020 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 1021 1022 mountlist_remove(mp); 1023 1024 /* 1025 * Remove any installed vnode ops here so the individual VFSs don't 1026 * have to. 1027 * 1028 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 1029 * 1030 * When quickhalting we have to keep these intact because the 1031 * underlying vnodes have not been destroyed, and some might be 1032 * dirty. 1033 */ 1034 if (quickhalt == 0) { 1035 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 1036 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 1037 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 1038 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 1039 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 1040 } 1041 1042 if (mp->mnt_ncmountpt.ncp != NULL) { 1043 nch = mp->mnt_ncmountpt; 1044 cache_zero(&mp->mnt_ncmountpt); 1045 cache_clrmountpt(&nch); 1046 cache_drop(&nch); 1047 } 1048 if (mp->mnt_ncmounton.ncp != NULL) { 1049 cache_unmounting(mp); 1050 nch = mp->mnt_ncmounton; 1051 cache_zero(&mp->mnt_ncmounton); 1052 cache_clrmountpt(&nch); 1053 cache_drop(&nch); 1054 } 1055 1056 if (mp->mnt_cred) { 1057 crfree(mp->mnt_cred); 1058 mp->mnt_cred = NULL; 1059 } 1060 1061 mp->mnt_vfc->vfc_refcount--; 1062 1063 /* 1064 * If not quickhalting the mount, we expect there to be no 1065 * vnodes left. 1066 */ 1067 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 1068 panic("unmount: dangling vnode"); 1069 1070 /* 1071 * Release the lock 1072 */ 1073 lockmgr(&mp->mnt_lock, LK_RELEASE); 1074 if (mp->mnt_kern_flag & MNTK_MWAIT) { 1075 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1076 wakeup(mp); 1077 } 1078 1079 /* 1080 * If we reach here and freeok != 0 we must free the mount. 1081 * mnt_refs should already have dropped to 0, so if it is not 1082 * zero we must cycle the caches and wait. 1083 * 1084 * When we are satisfied that the mount has disconnected we can 1085 * drop the hold on the mp that represented the mount (though the 1086 * caller might actually have another, so the caller's drop may 1087 * do the actual free). 1088 */ 1089 if (freeok) { 1090 if (mp->mnt_refs > 0) 1091 cache_clearmntcache(mp); 1092 while (mp->mnt_refs > 0) { 1093 cache_unmounting(mp); 1094 wakeup(mp); 1095 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 1096 cache_clearmntcache(mp); 1097 } 1098 lwkt_reltoken(&mp->mnt_token); 1099 mount_drop(mp); 1100 mp = NULL; 1101 } else { 1102 cache_clearmntcache(mp); 1103 } 1104 error = 0; 1105 KNOTE(&fs_klist, VQ_UNMOUNT); 1106 out: 1107 if (mp) 1108 lwkt_reltoken(&mp->mnt_token); 1109 return (error); 1110 } 1111 1112 static 1113 void 1114 mount_warning(struct mount *mp, const char *ctl, ...) 1115 { 1116 char *ptr; 1117 char *buf; 1118 __va_list va; 1119 1120 __va_start(va, ctl); 1121 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 1122 &ptr, &buf, 0) == 0) { 1123 kprintf("unmount(%s): ", ptr); 1124 kvprintf(ctl, va); 1125 kprintf("\n"); 1126 kfree(buf, M_TEMP); 1127 } else { 1128 kprintf("unmount(%p", mp); 1129 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 1130 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 1131 kprintf("): "); 1132 kvprintf(ctl, va); 1133 kprintf("\n"); 1134 } 1135 __va_end(va); 1136 } 1137 1138 /* 1139 * Shim cache_fullpath() to handle the case where a process is chrooted into 1140 * a subdirectory of a mount. In this case if the root mount matches the 1141 * process root directory's mount we have to specify the process's root 1142 * directory instead of the mount point, because the mount point might 1143 * be above the root directory. 1144 */ 1145 static 1146 int 1147 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 1148 { 1149 struct nchandle *nch; 1150 1151 if (p && p->p_fd->fd_nrdir.mount == mp) 1152 nch = &p->p_fd->fd_nrdir; 1153 else 1154 nch = &mp->mnt_ncmountpt; 1155 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1156 } 1157 1158 /* 1159 * Sync each mounted filesystem. 1160 */ 1161 1162 #ifdef DEBUG 1163 static int syncprt = 0; 1164 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1165 #endif /* DEBUG */ 1166 1167 static int sync_callback(struct mount *mp, void *data); 1168 1169 int 1170 sys_sync(struct sysmsg *sysmsg, const struct sync_args *uap) 1171 { 1172 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1173 return (0); 1174 } 1175 1176 static 1177 int 1178 sync_callback(struct mount *mp, void *data __unused) 1179 { 1180 int asyncflag; 1181 1182 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1183 lwkt_gettoken(&mp->mnt_token); 1184 asyncflag = mp->mnt_flag & MNT_ASYNC; 1185 mp->mnt_flag &= ~MNT_ASYNC; 1186 lwkt_reltoken(&mp->mnt_token); 1187 vfs_msync(mp, MNT_NOWAIT); 1188 VFS_SYNC(mp, MNT_NOWAIT); 1189 lwkt_gettoken(&mp->mnt_token); 1190 mp->mnt_flag |= asyncflag; 1191 lwkt_reltoken(&mp->mnt_token); 1192 } 1193 return(0); 1194 } 1195 1196 /* XXX PRISON: could be per prison flag */ 1197 static int prison_quotas; 1198 #if 0 1199 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1200 #endif 1201 1202 /* 1203 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1204 * 1205 * Change filesystem quotas. 1206 * 1207 * MPALMOSTSAFE 1208 */ 1209 int 1210 sys_quotactl(struct sysmsg *sysmsg, const struct quotactl_args *uap) 1211 { 1212 struct nlookupdata nd; 1213 struct thread *td; 1214 struct mount *mp; 1215 int error; 1216 1217 td = curthread; 1218 if (td->td_ucred->cr_prison && !prison_quotas) { 1219 error = EPERM; 1220 goto done; 1221 } 1222 1223 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1224 if (error == 0) 1225 error = nlookup(&nd); 1226 if (error == 0) { 1227 mp = nd.nl_nch.mount; 1228 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1229 uap->arg, nd.nl_cred); 1230 } 1231 nlookup_done(&nd); 1232 done: 1233 return (error); 1234 } 1235 1236 /* 1237 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1238 * void *buf, int buflen) 1239 * 1240 * This function operates on a mount point and executes the specified 1241 * operation using the specified control data, and possibly returns data. 1242 * 1243 * The actual number of bytes stored in the result buffer is returned, 0 1244 * if none, otherwise an error is returned. 1245 * 1246 * MPALMOSTSAFE 1247 */ 1248 int 1249 sys_mountctl(struct sysmsg *sysmsg, const struct mountctl_args *uap) 1250 { 1251 struct thread *td = curthread; 1252 struct file *fp; 1253 void *ctl = NULL; 1254 void *buf = NULL; 1255 char *path = NULL; 1256 int error; 1257 1258 /* 1259 * Sanity and permissions checks. We must be root. 1260 */ 1261 if (td->td_ucred->cr_prison != NULL) 1262 return (EPERM); 1263 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1264 (error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) != 0) 1265 { 1266 return (error); 1267 } 1268 1269 /* 1270 * Argument length checks 1271 */ 1272 if (uap->ctllen < 0 || uap->ctllen > 1024) 1273 return (EINVAL); 1274 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1275 return (EINVAL); 1276 if (uap->path == NULL) 1277 return (EINVAL); 1278 1279 /* 1280 * Allocate the necessary buffers and copyin data 1281 */ 1282 path = objcache_get(namei_oc, M_WAITOK); 1283 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1284 if (error) 1285 goto done; 1286 1287 if (uap->ctllen) { 1288 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1289 error = copyin(uap->ctl, ctl, uap->ctllen); 1290 if (error) 1291 goto done; 1292 } 1293 if (uap->buflen) 1294 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1295 1296 /* 1297 * Validate the descriptor 1298 */ 1299 if (uap->fd >= 0) { 1300 fp = holdfp(td, uap->fd, -1); 1301 if (fp == NULL) { 1302 error = EBADF; 1303 goto done; 1304 } 1305 } else { 1306 fp = NULL; 1307 } 1308 1309 /* 1310 * Execute the internal kernel function and clean up. 1311 */ 1312 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1313 buf, uap->buflen, &sysmsg->sysmsg_result); 1314 if (fp) 1315 dropfp(td, uap->fd, fp); 1316 if (error == 0 && sysmsg->sysmsg_result > 0) 1317 error = copyout(buf, uap->buf, sysmsg->sysmsg_result); 1318 done: 1319 if (path) 1320 objcache_put(namei_oc, path); 1321 if (ctl) 1322 kfree(ctl, M_TEMP); 1323 if (buf) 1324 kfree(buf, M_TEMP); 1325 return (error); 1326 } 1327 1328 /* 1329 * Execute a mount control operation by resolving the path to a mount point 1330 * and calling vop_mountctl(). 1331 * 1332 * Use the mount point from the nch instead of the vnode so nullfs mounts 1333 * can properly spike the VOP. 1334 */ 1335 int 1336 kern_mountctl(const char *path, int op, struct file *fp, 1337 const void *ctl, int ctllen, 1338 void *buf, int buflen, int *res) 1339 { 1340 struct vnode *vp; 1341 struct nlookupdata nd; 1342 struct nchandle nch; 1343 struct mount *mp; 1344 int error; 1345 1346 *res = 0; 1347 vp = NULL; 1348 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1349 if (error) 1350 return (error); 1351 error = nlookup(&nd); 1352 if (error) { 1353 nlookup_done(&nd); 1354 return (error); 1355 } 1356 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1357 if (error) { 1358 nlookup_done(&nd); 1359 return (error); 1360 } 1361 1362 /* 1363 * Yes, all this is needed to use the nch.mount below, because 1364 * we must maintain a ref on the mount to avoid ripouts (e.g. 1365 * due to heavy mount/unmount use by synth or poudriere). 1366 */ 1367 nch = nd.nl_nch; 1368 cache_zero(&nd.nl_nch); 1369 cache_unlock(&nch); 1370 nlookup_done(&nd); 1371 vn_unlock(vp); 1372 1373 mp = nch.mount; 1374 1375 /* 1376 * Must be the root of the filesystem 1377 */ 1378 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1379 cache_drop(&nch); 1380 vrele(vp); 1381 return (EINVAL); 1382 } 1383 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1384 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1385 path); 1386 cache_drop(&nch); 1387 vrele(vp); 1388 return (EINVAL); 1389 } 1390 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1391 buf, buflen, res); 1392 vrele(vp); 1393 cache_drop(&nch); 1394 1395 return (error); 1396 } 1397 1398 int 1399 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1400 { 1401 struct thread *td = curthread; 1402 struct proc *p = td->td_proc; 1403 struct mount *mp; 1404 struct statfs *sp; 1405 char *fullpath, *freepath; 1406 int error; 1407 1408 if ((error = nlookup(nd)) != 0) 1409 return (error); 1410 mp = nd->nl_nch.mount; 1411 sp = &mp->mnt_stat; 1412 1413 /* 1414 * Ignore refresh error, user should have visibility. 1415 * This can happen if a NFS mount goes bad (e.g. server 1416 * revokes perms or goes down). 1417 */ 1418 error = VFS_STATFS(mp, sp, nd->nl_cred); 1419 /* ignore error */ 1420 1421 error = mount_path(p, mp, &fullpath, &freepath); 1422 if (error) 1423 return(error); 1424 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1425 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1426 kfree(freepath, M_TEMP); 1427 1428 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1429 bcopy(sp, buf, sizeof(*buf)); 1430 /* Only root should have access to the fsid's. */ 1431 if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) 1432 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1433 return (0); 1434 } 1435 1436 /* 1437 * statfs_args(char *path, struct statfs *buf) 1438 * 1439 * Get filesystem statistics. 1440 */ 1441 int 1442 sys_statfs(struct sysmsg *sysmsg, const struct statfs_args *uap) 1443 { 1444 struct nlookupdata nd; 1445 struct statfs buf; 1446 int error; 1447 1448 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1449 if (error == 0) 1450 error = kern_statfs(&nd, &buf); 1451 nlookup_done(&nd); 1452 if (error == 0) 1453 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1454 return (error); 1455 } 1456 1457 int 1458 kern_fstatfs(int fd, struct statfs *buf) 1459 { 1460 struct thread *td = curthread; 1461 struct proc *p = td->td_proc; 1462 struct file *fp; 1463 struct mount *mp; 1464 struct statfs *sp; 1465 char *fullpath, *freepath; 1466 int error; 1467 1468 KKASSERT(p); 1469 if ((error = holdvnode(td, fd, &fp)) != 0) 1470 return (error); 1471 1472 /* 1473 * Try to use mount info from any overlays rather than the 1474 * mount info for the underlying vnode, otherwise we will 1475 * fail when operating on null-mounted paths inside a chroot. 1476 */ 1477 if ((mp = fp->f_nchandle.mount) == NULL) 1478 mp = ((struct vnode *)fp->f_data)->v_mount; 1479 if (mp == NULL) { 1480 error = EBADF; 1481 goto done; 1482 } 1483 if (fp->f_cred == NULL) { 1484 error = EINVAL; 1485 goto done; 1486 } 1487 1488 /* 1489 * Ignore refresh error, user should have visibility. 1490 * This can happen if a NFS mount goes bad (e.g. server 1491 * revokes perms or goes down). 1492 */ 1493 sp = &mp->mnt_stat; 1494 error = VFS_STATFS(mp, sp, fp->f_cred); 1495 1496 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1497 goto done; 1498 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1499 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1500 kfree(freepath, M_TEMP); 1501 1502 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1503 bcopy(sp, buf, sizeof(*buf)); 1504 1505 /* Only root should have access to the fsid's. */ 1506 if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) 1507 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1508 error = 0; 1509 done: 1510 fdrop(fp); 1511 return (error); 1512 } 1513 1514 /* 1515 * fstatfs_args(int fd, struct statfs *buf) 1516 * 1517 * Get filesystem statistics. 1518 */ 1519 int 1520 sys_fstatfs(struct sysmsg *sysmsg, const struct fstatfs_args *uap) 1521 { 1522 struct statfs buf; 1523 int error; 1524 1525 error = kern_fstatfs(uap->fd, &buf); 1526 1527 if (error == 0) 1528 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1529 return (error); 1530 } 1531 1532 int 1533 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1534 { 1535 struct mount *mp; 1536 struct statvfs *sp; 1537 int error; 1538 1539 if ((error = nlookup(nd)) != 0) 1540 return (error); 1541 mp = nd->nl_nch.mount; 1542 sp = &mp->mnt_vstat; 1543 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1544 return (error); 1545 1546 sp->f_flag = 0; 1547 if (mp->mnt_flag & MNT_RDONLY) 1548 sp->f_flag |= ST_RDONLY; 1549 if (mp->mnt_flag & MNT_NOSUID) 1550 sp->f_flag |= ST_NOSUID; 1551 bcopy(sp, buf, sizeof(*buf)); 1552 return (0); 1553 } 1554 1555 /* 1556 * statfs_args(char *path, struct statfs *buf) 1557 * 1558 * Get filesystem statistics. 1559 */ 1560 int 1561 sys_statvfs(struct sysmsg *sysmsg, const struct statvfs_args *uap) 1562 { 1563 struct nlookupdata nd; 1564 struct statvfs buf; 1565 int error; 1566 1567 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1568 if (error == 0) 1569 error = kern_statvfs(&nd, &buf); 1570 nlookup_done(&nd); 1571 if (error == 0) 1572 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1573 return (error); 1574 } 1575 1576 int 1577 kern_fstatvfs(int fd, struct statvfs *buf) 1578 { 1579 struct thread *td = curthread; 1580 struct file *fp; 1581 struct mount *mp; 1582 struct statvfs *sp; 1583 int error; 1584 1585 if ((error = holdvnode(td, fd, &fp)) != 0) 1586 return (error); 1587 if ((mp = fp->f_nchandle.mount) == NULL) 1588 mp = ((struct vnode *)fp->f_data)->v_mount; 1589 if (mp == NULL) { 1590 error = EBADF; 1591 goto done; 1592 } 1593 if (fp->f_cred == NULL) { 1594 error = EINVAL; 1595 goto done; 1596 } 1597 sp = &mp->mnt_vstat; 1598 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1599 goto done; 1600 1601 sp->f_flag = 0; 1602 if (mp->mnt_flag & MNT_RDONLY) 1603 sp->f_flag |= ST_RDONLY; 1604 if (mp->mnt_flag & MNT_NOSUID) 1605 sp->f_flag |= ST_NOSUID; 1606 1607 bcopy(sp, buf, sizeof(*buf)); 1608 error = 0; 1609 done: 1610 fdrop(fp); 1611 return (error); 1612 } 1613 1614 /* 1615 * fstatfs_args(int fd, struct statfs *buf) 1616 * 1617 * Get filesystem statistics. 1618 */ 1619 int 1620 sys_fstatvfs(struct sysmsg *sysmsg, const struct fstatvfs_args *uap) 1621 { 1622 struct statvfs buf; 1623 int error; 1624 1625 error = kern_fstatvfs(uap->fd, &buf); 1626 1627 if (error == 0) 1628 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1629 return (error); 1630 } 1631 1632 /* 1633 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1634 * 1635 * Get statistics on all filesystems. 1636 */ 1637 1638 struct getfsstat_info { 1639 struct statfs *sfsp; 1640 long count; 1641 long maxcount; 1642 int error; 1643 int flags; 1644 struct thread *td; 1645 }; 1646 1647 static int getfsstat_callback(struct mount *, void *); 1648 1649 int 1650 sys_getfsstat(struct sysmsg *sysmsg, const struct getfsstat_args *uap) 1651 { 1652 struct thread *td = curthread; 1653 struct getfsstat_info info; 1654 1655 bzero(&info, sizeof(info)); 1656 1657 info.maxcount = uap->bufsize / sizeof(struct statfs); 1658 info.sfsp = uap->buf; 1659 info.count = 0; 1660 info.flags = uap->flags; 1661 info.td = td; 1662 1663 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1664 if (info.sfsp && info.count > info.maxcount) 1665 sysmsg->sysmsg_result = info.maxcount; 1666 else 1667 sysmsg->sysmsg_result = info.count; 1668 return (info.error); 1669 } 1670 1671 static int 1672 getfsstat_callback(struct mount *mp, void *data) 1673 { 1674 struct getfsstat_info *info = data; 1675 struct statfs *sp; 1676 char *freepath; 1677 char *fullpath; 1678 int error; 1679 1680 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1681 return(0); 1682 1683 if (info->sfsp && info->count < info->maxcount) { 1684 sp = &mp->mnt_stat; 1685 1686 /* 1687 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1688 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1689 * overrides MNT_WAIT. 1690 * 1691 * Ignore refresh error, user should have visibility. 1692 * This can happen if a NFS mount goes bad (e.g. server 1693 * revokes perms or goes down). 1694 */ 1695 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1696 (info->flags & MNT_WAIT)) && 1697 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1698 /* ignore error */ 1699 } 1700 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1701 1702 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1703 if (error) { 1704 info->error = error; 1705 return(-1); 1706 } 1707 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1708 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1709 kfree(freepath, M_TEMP); 1710 1711 error = copyout(sp, info->sfsp, sizeof(*sp)); 1712 if (error) { 1713 info->error = error; 1714 return (-1); 1715 } 1716 ++info->sfsp; 1717 } 1718 info->count++; 1719 return(0); 1720 } 1721 1722 /* 1723 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1724 long bufsize, int flags) 1725 * 1726 * Get statistics on all filesystems. 1727 */ 1728 1729 struct getvfsstat_info { 1730 struct statfs *sfsp; 1731 struct statvfs *vsfsp; 1732 long count; 1733 long maxcount; 1734 int error; 1735 int flags; 1736 struct thread *td; 1737 }; 1738 1739 static int getvfsstat_callback(struct mount *, void *); 1740 1741 int 1742 sys_getvfsstat(struct sysmsg *sysmsg, const struct getvfsstat_args *uap) 1743 { 1744 struct thread *td = curthread; 1745 struct getvfsstat_info info; 1746 1747 bzero(&info, sizeof(info)); 1748 1749 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1750 info.sfsp = uap->buf; 1751 info.vsfsp = uap->vbuf; 1752 info.count = 0; 1753 info.flags = uap->flags; 1754 info.td = td; 1755 1756 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1757 if (info.vsfsp && info.count > info.maxcount) 1758 sysmsg->sysmsg_result = info.maxcount; 1759 else 1760 sysmsg->sysmsg_result = info.count; 1761 return (info.error); 1762 } 1763 1764 static int 1765 getvfsstat_callback(struct mount *mp, void *data) 1766 { 1767 struct getvfsstat_info *info = data; 1768 struct statfs *sp; 1769 struct statvfs *vsp; 1770 char *freepath; 1771 char *fullpath; 1772 int error; 1773 1774 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1775 return(0); 1776 1777 if (info->vsfsp && info->count < info->maxcount) { 1778 sp = &mp->mnt_stat; 1779 vsp = &mp->mnt_vstat; 1780 1781 /* 1782 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1783 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1784 * overrides MNT_WAIT. 1785 * 1786 * Ignore refresh error, user should have visibility. 1787 * This can happen if a NFS mount goes bad (e.g. server 1788 * revokes perms or goes down). 1789 */ 1790 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1791 (info->flags & MNT_WAIT)) && 1792 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1793 /* ignore error */ 1794 } 1795 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1796 1797 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1798 (info->flags & MNT_WAIT)) && 1799 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1800 /* ignore error */ 1801 } 1802 vsp->f_flag = 0; 1803 if (mp->mnt_flag & MNT_RDONLY) 1804 vsp->f_flag |= ST_RDONLY; 1805 if (mp->mnt_flag & MNT_NOSUID) 1806 vsp->f_flag |= ST_NOSUID; 1807 1808 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1809 if (error) { 1810 info->error = error; 1811 return(-1); 1812 } 1813 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1814 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1815 kfree(freepath, M_TEMP); 1816 1817 error = copyout(sp, info->sfsp, sizeof(*sp)); 1818 if (error == 0) 1819 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1820 if (error) { 1821 info->error = error; 1822 return (-1); 1823 } 1824 ++info->sfsp; 1825 ++info->vsfsp; 1826 } 1827 info->count++; 1828 return(0); 1829 } 1830 1831 1832 /* 1833 * fchdir_args(int fd) 1834 * 1835 * Change current working directory to a given file descriptor. 1836 */ 1837 int 1838 sys_fchdir(struct sysmsg *sysmsg, const struct fchdir_args *uap) 1839 { 1840 struct thread *td = curthread; 1841 struct proc *p = td->td_proc; 1842 struct filedesc *fdp = p->p_fd; 1843 struct vnode *vp, *ovp; 1844 struct mount *mp; 1845 struct file *fp; 1846 struct nchandle nch, onch, tnch; 1847 int error; 1848 1849 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1850 return (error); 1851 lwkt_gettoken(&p->p_token); 1852 vp = (struct vnode *)fp->f_data; 1853 vref(vp); 1854 vn_lock(vp, LK_SHARED | LK_RETRY); 1855 if (fp->f_nchandle.ncp == NULL) 1856 error = ENOTDIR; 1857 else 1858 error = checkvp_chdir(vp, td); 1859 if (error) { 1860 vput(vp); 1861 goto done; 1862 } 1863 cache_copy(&fp->f_nchandle, &nch); 1864 1865 /* 1866 * If the ncp has become a mount point, traverse through 1867 * the mount point. 1868 */ 1869 1870 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1871 (mp = cache_findmount(&nch)) != NULL 1872 ) { 1873 error = nlookup_mp(mp, &tnch); 1874 if (error == 0) { 1875 cache_unlock(&tnch); /* leave ref intact */ 1876 vput(vp); 1877 vp = tnch.ncp->nc_vp; 1878 error = vget(vp, LK_SHARED); 1879 KKASSERT(error == 0); 1880 cache_drop(&nch); 1881 nch = tnch; 1882 } 1883 cache_dropmount(mp); 1884 } 1885 if (error == 0) { 1886 spin_lock(&fdp->fd_spin); 1887 ovp = fdp->fd_cdir; 1888 onch = fdp->fd_ncdir; 1889 fdp->fd_cdir = vp; 1890 fdp->fd_ncdir = nch; 1891 spin_unlock(&fdp->fd_spin); 1892 vn_unlock(vp); /* leave ref intact */ 1893 cache_drop(&onch); 1894 vrele(ovp); 1895 } else { 1896 cache_drop(&nch); 1897 vput(vp); 1898 } 1899 fdrop(fp); 1900 done: 1901 lwkt_reltoken(&p->p_token); 1902 return (error); 1903 } 1904 1905 int 1906 kern_chdir(struct nlookupdata *nd) 1907 { 1908 struct thread *td = curthread; 1909 struct proc *p = td->td_proc; 1910 struct filedesc *fdp = p->p_fd; 1911 struct vnode *vp, *ovp; 1912 struct nchandle onch; 1913 int error; 1914 1915 nd->nl_flags |= NLC_SHAREDLOCK; 1916 if ((error = nlookup(nd)) != 0) 1917 return (error); 1918 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1919 return (ENOENT); 1920 if ((error = vget(vp, LK_SHARED)) != 0) 1921 return (error); 1922 1923 lwkt_gettoken(&p->p_token); 1924 error = checkvp_chdir(vp, td); 1925 vn_unlock(vp); 1926 if (error == 0) { 1927 spin_lock(&fdp->fd_spin); 1928 ovp = fdp->fd_cdir; 1929 onch = fdp->fd_ncdir; 1930 fdp->fd_ncdir = nd->nl_nch; 1931 fdp->fd_cdir = vp; 1932 spin_unlock(&fdp->fd_spin); 1933 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1934 cache_drop(&onch); 1935 vrele(ovp); 1936 cache_zero(&nd->nl_nch); 1937 } else { 1938 vrele(vp); 1939 } 1940 lwkt_reltoken(&p->p_token); 1941 return (error); 1942 } 1943 1944 /* 1945 * chdir_args(char *path) 1946 * 1947 * Change current working directory (``.''). 1948 */ 1949 int 1950 sys_chdir(struct sysmsg *sysmsg, const struct chdir_args *uap) 1951 { 1952 struct nlookupdata nd; 1953 int error; 1954 1955 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1956 if (error == 0) 1957 error = kern_chdir(&nd); 1958 nlookup_done(&nd); 1959 return (error); 1960 } 1961 1962 /* 1963 * Helper function for raised chroot(2) security function: Refuse if 1964 * any filedescriptors are open directories. 1965 */ 1966 static int 1967 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1968 { 1969 struct vnode *vp; 1970 struct file *fp; 1971 int error; 1972 int fd; 1973 1974 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1975 if ((error = holdvnode(td, fd, &fp)) != 0) 1976 continue; 1977 vp = (struct vnode *)fp->f_data; 1978 if (vp->v_type != VDIR) { 1979 fdrop(fp); 1980 continue; 1981 } 1982 fdrop(fp); 1983 return(EPERM); 1984 } 1985 return (0); 1986 } 1987 1988 /* 1989 * This sysctl determines if we will allow a process to chroot(2) if it 1990 * has a directory open: 1991 * 0: disallowed for all processes. 1992 * 1: allowed for processes that were not already chroot(2)'ed. 1993 * 2: allowed for all processes. 1994 */ 1995 1996 static int chroot_allow_open_directories = 1; 1997 1998 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1999 &chroot_allow_open_directories, 0, ""); 2000 2001 /* 2002 * chroot to the specified namecache entry. We obtain the vp from the 2003 * namecache data. The passed ncp must be locked and referenced and will 2004 * remain locked and referenced on return. 2005 */ 2006 int 2007 kern_chroot(struct nchandle *nch) 2008 { 2009 struct thread *td = curthread; 2010 struct proc *p = td->td_proc; 2011 struct filedesc *fdp = p->p_fd; 2012 struct vnode *vp; 2013 int error; 2014 2015 /* 2016 * Only privileged user can chroot 2017 */ 2018 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHROOT); 2019 if (error) 2020 return (error); 2021 2022 /* 2023 * Disallow open directory descriptors (fchdir() breakouts). 2024 */ 2025 if (chroot_allow_open_directories == 0 || 2026 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 2027 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 2028 return (error); 2029 } 2030 if ((vp = nch->ncp->nc_vp) == NULL) 2031 return (ENOENT); 2032 2033 if ((error = vget(vp, LK_SHARED)) != 0) 2034 return (error); 2035 2036 /* 2037 * Check the validity of vp as a directory to change to and 2038 * associate it with rdir/jdir. 2039 */ 2040 error = checkvp_chdir(vp, td); 2041 vn_unlock(vp); /* leave reference intact */ 2042 if (error == 0) { 2043 lwkt_gettoken(&p->p_token); 2044 vrele(fdp->fd_rdir); 2045 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 2046 cache_drop(&fdp->fd_nrdir); 2047 cache_copy(nch, &fdp->fd_nrdir); 2048 if (fdp->fd_jdir == NULL) { 2049 fdp->fd_jdir = vp; 2050 vref(fdp->fd_jdir); 2051 cache_copy(nch, &fdp->fd_njdir); 2052 } 2053 if ((p->p_flags & P_DIDCHROOT) == 0) { 2054 p->p_flags |= P_DIDCHROOT; 2055 if (p->p_depth <= 65535 - 32) 2056 p->p_depth += 32; 2057 } 2058 lwkt_reltoken(&p->p_token); 2059 } else { 2060 vrele(vp); 2061 } 2062 return (error); 2063 } 2064 2065 /* 2066 * chroot_args(char *path) 2067 * 2068 * Change notion of root (``/'') directory. 2069 */ 2070 int 2071 sys_chroot(struct sysmsg *sysmsg, const struct chroot_args *uap) 2072 { 2073 struct thread *td __debugvar = curthread; 2074 struct nlookupdata nd; 2075 int error; 2076 2077 KKASSERT(td->td_proc); 2078 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2079 if (error == 0) { 2080 nd.nl_flags |= NLC_EXEC; 2081 error = nlookup(&nd); 2082 if (error == 0) 2083 error = kern_chroot(&nd.nl_nch); 2084 } 2085 nlookup_done(&nd); 2086 return(error); 2087 } 2088 2089 int 2090 sys_chroot_kernel(struct sysmsg *sysmsg, const struct chroot_kernel_args *uap) 2091 { 2092 struct thread *td = curthread; 2093 struct nlookupdata nd; 2094 struct nchandle *nch; 2095 struct vnode *vp; 2096 int error; 2097 2098 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2099 if (error) 2100 goto error_nond; 2101 2102 error = nlookup(&nd); 2103 if (error) 2104 goto error_out; 2105 2106 nch = &nd.nl_nch; 2107 2108 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHROOT); 2109 if (error) 2110 goto error_out; 2111 2112 if ((vp = nch->ncp->nc_vp) == NULL) { 2113 error = ENOENT; 2114 goto error_out; 2115 } 2116 2117 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 2118 goto error_out; 2119 2120 vfs_cache_setroot(vp, cache_hold(nch)); 2121 2122 error_out: 2123 nlookup_done(&nd); 2124 error_nond: 2125 return(error); 2126 } 2127 2128 /* 2129 * Common routine for chroot and chdir. Given a locked, referenced vnode, 2130 * determine whether it is legal to chdir to the vnode. The vnode's state 2131 * is not changed by this call. 2132 */ 2133 static int 2134 checkvp_chdir(struct vnode *vp, struct thread *td) 2135 { 2136 int error; 2137 2138 if (vp->v_type != VDIR) 2139 error = ENOTDIR; 2140 else 2141 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 2142 return (error); 2143 } 2144 2145 int 2146 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 2147 { 2148 struct thread *td = curthread; 2149 struct proc *p = td->td_proc; 2150 struct lwp *lp = td->td_lwp; 2151 struct filedesc *fdp = p->p_fd; 2152 int cmode, flags; 2153 struct file *nfp; 2154 struct file *fp; 2155 int type, indx, error = 0; 2156 struct flock lf; 2157 2158 if ((oflags & O_ACCMODE) == O_ACCMODE) 2159 return (EINVAL); 2160 flags = FFLAGS(oflags); 2161 error = falloc(lp, &nfp, NULL); 2162 if (error) 2163 return (error); 2164 fp = nfp; 2165 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2166 2167 /* 2168 * Call vn_open() to do the lookup and assign the vnode to the 2169 * file pointer. vn_open() does not change the ref count on fp 2170 * and the vnode, on success, will be inherited by the file pointer 2171 * and unlocked. 2172 * 2173 * Request a shared lock on the vnode if possible. 2174 * 2175 * When NLC_SHAREDLOCK is set we may still need an exclusive vnode 2176 * lock for O_RDWR opens on executables in order to avoid a VTEXT 2177 * detection race. The NLC_EXCLLOCK_IFEXEC handles this case. 2178 * 2179 * NOTE: We need a flag to separate terminal vnode locking from 2180 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2181 * and O_RDWR only need to lock the terminal vnode exclusively. 2182 */ 2183 nd->nl_flags |= NLC_LOCKVP; 2184 if ((flags & (O_CREAT|O_TRUNC)) == 0) { 2185 nd->nl_flags |= NLC_SHAREDLOCK; 2186 if (flags & O_RDWR) 2187 nd->nl_flags |= NLC_EXCLLOCK_IFEXEC; 2188 } 2189 2190 /* 2191 * Issue the vn_open, passing in the referenced fp. the vn_open() 2192 * is allowed to replace fp by fdrop()ing it and returning its own 2193 * referenced fp. 2194 */ 2195 nfp = fp; 2196 error = vn_open(nd, &nfp, flags, cmode); 2197 fp = nfp; 2198 nlookup_done(nd); 2199 2200 /* 2201 * Deal with any error condition 2202 */ 2203 if (error) { 2204 fdrop(fp); /* our ref */ 2205 if (error == ERESTART) 2206 error = EINTR; 2207 return (error); 2208 } 2209 2210 /* 2211 * Reserve a file descriptor. 2212 */ 2213 if ((error = fdalloc(p, 0, &indx)) != 0) { 2214 fdrop(fp); 2215 return (error); 2216 } 2217 2218 /* 2219 * Handle advisory lock flags. This is only supported with vnodes. 2220 * For things like /dev/fd/N we might not actually get a vnode. 2221 */ 2222 if ((flags & (O_EXLOCK | O_SHLOCK)) && fp->f_type == DTYPE_VNODE) { 2223 struct vnode *vp; 2224 2225 vp = (struct vnode *)fp->f_data; 2226 vref(vp); 2227 2228 lf.l_whence = SEEK_SET; 2229 lf.l_start = 0; 2230 lf.l_len = 0; 2231 if (flags & O_EXLOCK) 2232 lf.l_type = F_WRLCK; 2233 else 2234 lf.l_type = F_RDLCK; 2235 if (flags & FNONBLOCK) 2236 type = 0; 2237 else 2238 type = F_WAIT; 2239 2240 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type); 2241 if (error) { 2242 /* 2243 * lock request failed. Clean up the reserved 2244 * descriptor. 2245 */ 2246 vrele(vp); 2247 fsetfd(fdp, NULL, indx); 2248 fdrop(fp); 2249 return (error); 2250 } 2251 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2252 vrele(vp); 2253 } 2254 2255 /* 2256 * release our private reference, leaving the one associated with the 2257 * descriptor table intact. 2258 */ 2259 if (oflags & O_CLOEXEC) 2260 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2261 fsetfd(fdp, fp, indx); 2262 fdrop(fp); 2263 *res = indx; 2264 2265 return (error); 2266 } 2267 2268 /* 2269 * open_args(char *path, int flags, int mode) 2270 * 2271 * Check permissions, allocate an open file structure, 2272 * and call the device open routine if any. 2273 */ 2274 int 2275 sys_open(struct sysmsg *sysmsg, const struct open_args *uap) 2276 { 2277 struct nlookupdata nd; 2278 int error; 2279 2280 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2281 if (error == 0) { 2282 error = kern_open(&nd, uap->flags, 2283 uap->mode, &sysmsg->sysmsg_result); 2284 } 2285 nlookup_done(&nd); 2286 return (error); 2287 } 2288 2289 /* 2290 * openat_args(int fd, char *path, int flags, int mode) 2291 */ 2292 int 2293 sys_openat(struct sysmsg *sysmsg, const struct openat_args *uap) 2294 { 2295 struct nlookupdata nd; 2296 int error; 2297 struct file *fp; 2298 2299 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2300 if (error == 0) { 2301 error = kern_open(&nd, uap->flags, uap->mode, 2302 &sysmsg->sysmsg_result); 2303 } 2304 nlookup_done_at(&nd, fp); 2305 return (error); 2306 } 2307 2308 int 2309 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2310 { 2311 struct thread *td = curthread; 2312 struct proc *p = td->td_proc; 2313 struct vnode *vp; 2314 struct vattr vattr; 2315 int error; 2316 int whiteout = 0; 2317 2318 KKASSERT(p); 2319 2320 VATTR_NULL(&vattr); 2321 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2322 vattr.va_rmajor = rmajor; 2323 vattr.va_rminor = rminor; 2324 2325 switch (mode & S_IFMT) { 2326 case S_IFMT: /* used by badsect to flag bad sectors */ 2327 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_BAD); 2328 vattr.va_type = VBAD; 2329 break; 2330 case S_IFCHR: 2331 error = caps_priv_check_td(td, SYSCAP_NOVFS_MKNOD_DEV); 2332 vattr.va_type = VCHR; 2333 break; 2334 case S_IFBLK: 2335 error = caps_priv_check_td(td, SYSCAP_NOVFS_MKNOD_DEV); 2336 vattr.va_type = VBLK; 2337 break; 2338 case S_IFWHT: 2339 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_WHT); 2340 whiteout = 1; 2341 break; 2342 case S_IFDIR: /* special directories support for HAMMER */ 2343 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_DIR); 2344 vattr.va_type = VDIR; 2345 break; 2346 case S_IFIFO: 2347 return (kern_mkfifo(nd, mode)); 2348 break; 2349 default: 2350 error = EINVAL; 2351 break; 2352 } 2353 2354 if (error) 2355 return (error); 2356 2357 bwillinode(1); 2358 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2359 if ((error = nlookup(nd)) != 0) 2360 return (error); 2361 if (nd->nl_nch.ncp->nc_vp) 2362 return (EEXIST); 2363 if (nd->nl_dvp == NULL) 2364 return (EINVAL); 2365 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2366 return (error); 2367 2368 if (whiteout) { 2369 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2370 nd->nl_cred, NAMEI_CREATE); 2371 } else { 2372 vp = NULL; 2373 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2374 &vp, nd->nl_cred, &vattr); 2375 if (error == 0) 2376 vput(vp); 2377 } 2378 return (error); 2379 } 2380 2381 /* 2382 * mknod_args(char *path, int mode, int dev) 2383 * 2384 * Create a special file. 2385 */ 2386 int 2387 sys_mknod(struct sysmsg *sysmsg, const struct mknod_args *uap) 2388 { 2389 struct nlookupdata nd; 2390 int error; 2391 2392 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2393 if (error == 0) { 2394 error = kern_mknod(&nd, uap->mode, 2395 umajor(uap->dev), uminor(uap->dev)); 2396 } 2397 nlookup_done(&nd); 2398 return (error); 2399 } 2400 2401 /* 2402 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2403 * 2404 * Create a special file. The path is relative to the directory associated 2405 * with fd. 2406 */ 2407 int 2408 sys_mknodat(struct sysmsg *sysmsg, const struct mknodat_args *uap) 2409 { 2410 struct nlookupdata nd; 2411 struct file *fp; 2412 int error; 2413 2414 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2415 if (error == 0) { 2416 error = kern_mknod(&nd, uap->mode, 2417 umajor(uap->dev), uminor(uap->dev)); 2418 } 2419 nlookup_done_at(&nd, fp); 2420 return (error); 2421 } 2422 2423 int 2424 kern_mkfifo(struct nlookupdata *nd, int mode) 2425 { 2426 struct thread *td = curthread; 2427 struct proc *p = td->td_proc; 2428 struct vattr vattr; 2429 struct vnode *vp; 2430 int error; 2431 2432 bwillinode(1); 2433 2434 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2435 if ((error = nlookup(nd)) != 0) 2436 return (error); 2437 if (nd->nl_nch.ncp->nc_vp) 2438 return (EEXIST); 2439 if (nd->nl_dvp == NULL) 2440 return (EINVAL); 2441 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2442 return (error); 2443 2444 VATTR_NULL(&vattr); 2445 vattr.va_type = VFIFO; 2446 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2447 vp = NULL; 2448 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2449 if (error == 0) 2450 vput(vp); 2451 return (error); 2452 } 2453 2454 /* 2455 * mkfifo_args(char *path, int mode) 2456 * 2457 * Create a named pipe. 2458 */ 2459 int 2460 sys_mkfifo(struct sysmsg *sysmsg, const struct mkfifo_args *uap) 2461 { 2462 struct nlookupdata nd; 2463 int error; 2464 2465 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2466 if (error == 0) 2467 error = kern_mkfifo(&nd, uap->mode); 2468 nlookup_done(&nd); 2469 return (error); 2470 } 2471 2472 /* 2473 * mkfifoat_args(int fd, char *path, mode_t mode) 2474 * 2475 * Create a named pipe. The path is relative to the directory associated 2476 * with fd. 2477 */ 2478 int 2479 sys_mkfifoat(struct sysmsg *sysmsg, const struct mkfifoat_args *uap) 2480 { 2481 struct nlookupdata nd; 2482 struct file *fp; 2483 int error; 2484 2485 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2486 if (error == 0) 2487 error = kern_mkfifo(&nd, uap->mode); 2488 nlookup_done_at(&nd, fp); 2489 return (error); 2490 } 2491 2492 static int hardlink_check_uid = 0; 2493 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2494 &hardlink_check_uid, 0, 2495 "Unprivileged processes cannot create hard links to files owned by other " 2496 "users"); 2497 static int hardlink_check_gid = 0; 2498 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2499 &hardlink_check_gid, 0, 2500 "Unprivileged processes cannot create hard links to files owned by other " 2501 "groups"); 2502 2503 static int 2504 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2505 { 2506 struct vattr va; 2507 int error; 2508 2509 /* 2510 * Shortcut if disabled 2511 */ 2512 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2513 return (0); 2514 2515 /* 2516 * Privileged user can always hardlink 2517 */ 2518 if (caps_priv_check(cred, SYSCAP_NOVFS_LINK) == 0) 2519 return (0); 2520 2521 /* 2522 * Otherwise only if the originating file is owned by the 2523 * same user or group. Note that any group is allowed if 2524 * the file is owned by the caller. 2525 */ 2526 error = VOP_GETATTR(vp, &va); 2527 if (error != 0) 2528 return (error); 2529 2530 if (hardlink_check_uid) { 2531 if (cred->cr_uid != va.va_uid) 2532 return (EPERM); 2533 } 2534 2535 if (hardlink_check_gid) { 2536 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2537 return (EPERM); 2538 } 2539 2540 return (0); 2541 } 2542 2543 int 2544 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2545 { 2546 struct thread *td = curthread; 2547 struct vnode *vp; 2548 int error; 2549 2550 /* 2551 * Lookup the source and obtained a locked vnode. 2552 * 2553 * You may only hardlink a file which you have write permission 2554 * on or which you own. 2555 * 2556 * XXX relookup on vget failure / race ? 2557 */ 2558 bwillinode(1); 2559 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2560 if ((error = nlookup(nd)) != 0) 2561 return (error); 2562 vp = nd->nl_nch.ncp->nc_vp; 2563 KKASSERT(vp != NULL); 2564 if (vp->v_type == VDIR) 2565 return (EPERM); /* POSIX */ 2566 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2567 return (error); 2568 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2569 return (error); 2570 2571 /* 2572 * Unlock the source so we can lookup the target without deadlocking 2573 * (XXX vp is locked already, possible other deadlock?). The target 2574 * must not exist. 2575 */ 2576 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2577 nd->nl_flags &= ~NLC_NCPISLOCKED; 2578 cache_unlock(&nd->nl_nch); 2579 vn_unlock(vp); 2580 2581 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2582 if ((error = nlookup(linknd)) != 0) { 2583 vrele(vp); 2584 return (error); 2585 } 2586 if (linknd->nl_nch.ncp->nc_vp) { 2587 vrele(vp); 2588 return (EEXIST); 2589 } 2590 if (linknd->nl_dvp == NULL) { 2591 vrele(vp); 2592 return (EINVAL); 2593 } 2594 VFS_MODIFYING(vp->v_mount); 2595 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2596 if (error) { 2597 vrele(vp); 2598 return (error); 2599 } 2600 2601 /* 2602 * Finally run the new API VOP. 2603 */ 2604 error = can_hardlink(vp, td, td->td_ucred); 2605 if (error == 0) { 2606 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2607 vp, linknd->nl_cred); 2608 } 2609 vput(vp); 2610 return (error); 2611 } 2612 2613 /* 2614 * link_args(char *path, char *link) 2615 * 2616 * Make a hard file link. 2617 */ 2618 int 2619 sys_link(struct sysmsg *sysmsg, const struct link_args *uap) 2620 { 2621 struct nlookupdata nd, linknd; 2622 int error; 2623 2624 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2625 if (error == 0) { 2626 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2627 if (error == 0) 2628 error = kern_link(&nd, &linknd); 2629 nlookup_done(&linknd); 2630 } 2631 nlookup_done(&nd); 2632 return (error); 2633 } 2634 2635 /* 2636 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2637 * 2638 * Make a hard file link. The path1 argument is relative to the directory 2639 * associated with fd1, and similarly the path2 argument is relative to 2640 * the directory associated with fd2. 2641 */ 2642 int 2643 sys_linkat(struct sysmsg *sysmsg, const struct linkat_args *uap) 2644 { 2645 struct nlookupdata nd, linknd; 2646 struct file *fp1, *fp2; 2647 int error; 2648 2649 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2650 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2651 if (error == 0) { 2652 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2653 uap->path2, UIO_USERSPACE, 0); 2654 if (error == 0) 2655 error = kern_link(&nd, &linknd); 2656 nlookup_done_at(&linknd, fp2); 2657 } 2658 nlookup_done_at(&nd, fp1); 2659 return (error); 2660 } 2661 2662 int 2663 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2664 { 2665 struct vattr vattr; 2666 struct vnode *vp; 2667 struct vnode *dvp; 2668 int error; 2669 2670 bwillinode(1); 2671 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2672 if ((error = nlookup(nd)) != 0) 2673 return (error); 2674 if (nd->nl_nch.ncp->nc_vp) 2675 return (EEXIST); 2676 if (nd->nl_dvp == NULL) 2677 return (EINVAL); 2678 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2679 return (error); 2680 dvp = nd->nl_dvp; 2681 VATTR_NULL(&vattr); 2682 vattr.va_mode = mode; 2683 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2684 if (error == 0) 2685 vput(vp); 2686 return (error); 2687 } 2688 2689 /* 2690 * symlink(char *path, char *link) 2691 * 2692 * Make a symbolic link. 2693 */ 2694 int 2695 sys_symlink(struct sysmsg *sysmsg, const struct symlink_args *uap) 2696 { 2697 struct thread *td = curthread; 2698 struct nlookupdata nd; 2699 char *path; 2700 int error; 2701 int mode; 2702 2703 path = objcache_get(namei_oc, M_WAITOK); 2704 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2705 if (error == 0) { 2706 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2707 if (error == 0) { 2708 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2709 error = kern_symlink(&nd, path, mode); 2710 } 2711 nlookup_done(&nd); 2712 } 2713 objcache_put(namei_oc, path); 2714 return (error); 2715 } 2716 2717 /* 2718 * symlinkat_args(char *path1, int fd, char *path2) 2719 * 2720 * Make a symbolic link. The path2 argument is relative to the directory 2721 * associated with fd. 2722 */ 2723 int 2724 sys_symlinkat(struct sysmsg *sysmsg, const struct symlinkat_args *uap) 2725 { 2726 struct thread *td = curthread; 2727 struct nlookupdata nd; 2728 struct file *fp; 2729 char *path1; 2730 int error; 2731 int mode; 2732 2733 path1 = objcache_get(namei_oc, M_WAITOK); 2734 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2735 if (error == 0) { 2736 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2737 UIO_USERSPACE, 0); 2738 if (error == 0) { 2739 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2740 error = kern_symlink(&nd, path1, mode); 2741 } 2742 nlookup_done_at(&nd, fp); 2743 } 2744 objcache_put(namei_oc, path1); 2745 return (error); 2746 } 2747 2748 /* 2749 * undelete_args(char *path) 2750 * 2751 * Delete a whiteout from the filesystem. 2752 */ 2753 int 2754 sys_undelete(struct sysmsg *sysmsg, const struct undelete_args *uap) 2755 { 2756 struct nlookupdata nd; 2757 int error; 2758 2759 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2760 bwillinode(1); 2761 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2762 if (error == 0) 2763 error = nlookup(&nd); 2764 if (error == 0 && nd.nl_dvp == NULL) 2765 error = EINVAL; 2766 if (error == 0) 2767 error = ncp_writechk(&nd.nl_nch); 2768 if (error == 0) { 2769 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2770 NAMEI_DELETE); 2771 } 2772 nlookup_done(&nd); 2773 return (error); 2774 } 2775 2776 int 2777 kern_unlink(struct nlookupdata *nd) 2778 { 2779 int error; 2780 2781 bwillinode(1); 2782 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2783 if ((error = nlookup(nd)) != 0) 2784 return (error); 2785 if (nd->nl_dvp == NULL) 2786 return EINVAL; 2787 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2788 return (error); 2789 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2790 return (error); 2791 } 2792 2793 /* 2794 * unlink_args(char *path) 2795 * 2796 * Delete a name from the filesystem. 2797 */ 2798 int 2799 sys_unlink(struct sysmsg *sysmsg, const struct unlink_args *uap) 2800 { 2801 struct nlookupdata nd; 2802 int error; 2803 2804 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2805 if (error == 0) 2806 error = kern_unlink(&nd); 2807 nlookup_done(&nd); 2808 return (error); 2809 } 2810 2811 2812 /* 2813 * unlinkat_args(int fd, char *path, int flags) 2814 * 2815 * Delete the file or directory entry pointed to by fd/path. 2816 */ 2817 int 2818 sys_unlinkat(struct sysmsg *sysmsg, const struct unlinkat_args *uap) 2819 { 2820 struct nlookupdata nd; 2821 struct file *fp; 2822 int error; 2823 2824 if (uap->flags & ~AT_REMOVEDIR) 2825 return (EINVAL); 2826 2827 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2828 if (error == 0) { 2829 if (uap->flags & AT_REMOVEDIR) 2830 error = kern_rmdir(&nd); 2831 else 2832 error = kern_unlink(&nd); 2833 } 2834 nlookup_done_at(&nd, fp); 2835 return (error); 2836 } 2837 2838 int 2839 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2840 { 2841 struct thread *td = curthread; 2842 struct file *fp; 2843 struct vnode *vp; 2844 struct vattr_lite lva; 2845 off_t new_offset; 2846 int error; 2847 2848 fp = holdfp(td, fd, -1); 2849 if (fp == NULL) 2850 return (EBADF); 2851 if (fp->f_type != DTYPE_VNODE) { 2852 error = ESPIPE; 2853 goto done; 2854 } 2855 vp = (struct vnode *)fp->f_data; 2856 2857 switch (whence) { 2858 case L_INCR: 2859 spin_lock(&fp->f_spin); 2860 new_offset = fp->f_offset + offset; 2861 error = 0; 2862 break; 2863 case L_XTND: 2864 error = VOP_GETATTR_LITE(vp, &lva); 2865 spin_lock(&fp->f_spin); 2866 new_offset = offset + lva.va_size; 2867 break; 2868 case L_SET: 2869 new_offset = offset; 2870 error = 0; 2871 spin_lock(&fp->f_spin); 2872 break; 2873 default: 2874 new_offset = 0; 2875 error = EINVAL; 2876 spin_lock(&fp->f_spin); 2877 break; 2878 } 2879 2880 /* 2881 * Validate the seek position. Negative offsets are not allowed 2882 * for regular files or directories. 2883 * 2884 * Normally we would also not want to allow negative offsets for 2885 * character and block-special devices. However kvm addresses 2886 * on 64 bit architectures might appear to be negative and must 2887 * be allowed. 2888 */ 2889 if (error == 0) { 2890 if (new_offset < 0 && 2891 (vp->v_type == VREG || vp->v_type == VDIR)) { 2892 error = EINVAL; 2893 } else { 2894 fp->f_offset = new_offset; 2895 } 2896 } 2897 *res = fp->f_offset; 2898 spin_unlock(&fp->f_spin); 2899 done: 2900 dropfp(td, fd, fp); 2901 2902 return (error); 2903 } 2904 2905 /* 2906 * lseek_args(int fd, int pad, off_t offset, int whence) 2907 * 2908 * Reposition read/write file offset. 2909 */ 2910 int 2911 sys_lseek(struct sysmsg *sysmsg, const struct lseek_args *uap) 2912 { 2913 int error; 2914 2915 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2916 &sysmsg->sysmsg_offset); 2917 2918 return (error); 2919 } 2920 2921 /* 2922 * Check if current process can access given file. amode is a bitmask of *_OK 2923 * access bits. flags is a bitmask of AT_* flags. 2924 */ 2925 int 2926 kern_access(struct nlookupdata *nd, int amode, int flags) 2927 { 2928 struct vnode *vp; 2929 int error, mode; 2930 2931 if (flags & ~AT_EACCESS) 2932 return (EINVAL); 2933 nd->nl_flags |= NLC_SHAREDLOCK; 2934 if ((error = nlookup(nd)) != 0) 2935 return (error); 2936 if ((amode & W_OK) && (error = ncp_writechk(&nd->nl_nch)) != 0) 2937 return (error); 2938 retry: 2939 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2940 if (error) 2941 return (error); 2942 2943 /* Flags == 0 means only check for existence. */ 2944 if (amode) { 2945 mode = 0; 2946 if (amode & R_OK) 2947 mode |= VREAD; 2948 if (amode & W_OK) 2949 mode |= VWRITE; 2950 if (amode & X_OK) 2951 mode |= VEXEC; 2952 if ((mode & VWRITE) == 0 || 2953 (error = vn_writechk(vp)) == 0) { 2954 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2955 } 2956 2957 /* 2958 * If the file handle is stale we have to re-resolve the 2959 * entry with the ncp held exclusively. This is a hack 2960 * at the moment. 2961 */ 2962 if (error == ESTALE) { 2963 u_int dummy_gen; 2964 2965 vput(vp); 2966 cache_unlock(&nd->nl_nch); 2967 cache_lock(&nd->nl_nch); 2968 dummy_gen = nd->nl_nch.ncp->nc_generation; 2969 cache_setunresolved(&nd->nl_nch); 2970 error = cache_resolve(&nd->nl_nch, &dummy_gen, 2971 nd->nl_cred); 2972 if (error == 0) { 2973 vp = NULL; 2974 goto retry; 2975 } 2976 return(error); 2977 } 2978 } 2979 vput(vp); 2980 return (error); 2981 } 2982 2983 /* 2984 * access_args(char *path, int flags) 2985 * 2986 * Check access permissions. 2987 */ 2988 int 2989 sys_access(struct sysmsg *sysmsg, const struct access_args *uap) 2990 { 2991 struct nlookupdata nd; 2992 int error; 2993 2994 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2995 if (error == 0) 2996 error = kern_access(&nd, uap->flags, 0); 2997 nlookup_done(&nd); 2998 return (error); 2999 } 3000 3001 3002 /* 3003 * eaccess_args(char *path, int flags) 3004 * 3005 * Check access permissions. 3006 */ 3007 int 3008 sys_eaccess(struct sysmsg *sysmsg, const struct eaccess_args *uap) 3009 { 3010 struct nlookupdata nd; 3011 int error; 3012 3013 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3014 if (error == 0) 3015 error = kern_access(&nd, uap->flags, AT_EACCESS); 3016 nlookup_done(&nd); 3017 return (error); 3018 } 3019 3020 3021 /* 3022 * faccessat_args(int fd, char *path, int amode, int flags) 3023 * 3024 * Check access permissions. 3025 */ 3026 int 3027 sys_faccessat(struct sysmsg *sysmsg, const struct faccessat_args *uap) 3028 { 3029 struct nlookupdata nd; 3030 struct file *fp; 3031 int error; 3032 3033 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 3034 NLC_FOLLOW); 3035 if (error == 0) 3036 error = kern_access(&nd, uap->amode, uap->flags); 3037 nlookup_done_at(&nd, fp); 3038 return (error); 3039 } 3040 3041 int 3042 kern_stat(struct nlookupdata *nd, struct stat *st) 3043 { 3044 int error; 3045 struct vnode *vp; 3046 3047 nd->nl_flags |= NLC_SHAREDLOCK; 3048 if ((error = nlookup(nd)) != 0) 3049 return (error); 3050 again: 3051 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 3052 return (ENOENT); 3053 3054 #if 1 3055 error = cache_vref(&nd->nl_nch, NULL, &vp); 3056 #else 3057 error = vget(vp, LK_SHARED); 3058 #endif 3059 if (error) 3060 return (error); 3061 error = vn_stat(vp, st, nd->nl_cred); 3062 3063 /* 3064 * If the file handle is stale we have to re-resolve the 3065 * entry with the ncp held exclusively. This is a hack 3066 * at the moment. 3067 */ 3068 if (error == ESTALE) { 3069 u_int dummy_gen; 3070 #if 1 3071 vrele(vp); 3072 #else 3073 vput(vp); 3074 #endif 3075 cache_unlock(&nd->nl_nch); 3076 cache_lock(&nd->nl_nch); 3077 dummy_gen = nd->nl_nch.ncp->nc_generation; 3078 cache_setunresolved(&nd->nl_nch); 3079 error = cache_resolve(&nd->nl_nch, &dummy_gen, nd->nl_cred); 3080 if (error == 0) 3081 goto again; 3082 } else { 3083 #if 1 3084 vrele(vp); 3085 #else 3086 vput(vp); 3087 #endif 3088 } 3089 return (error); 3090 } 3091 3092 /* 3093 * stat_args(char *path, struct stat *ub) 3094 * 3095 * Get file status; this version follows links. 3096 */ 3097 int 3098 sys_stat(struct sysmsg *sysmsg, const struct stat_args *uap) 3099 { 3100 struct nlookupdata nd; 3101 struct stat st; 3102 int error; 3103 3104 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3105 if (error == 0) { 3106 error = kern_stat(&nd, &st); 3107 if (error == 0) 3108 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3109 } 3110 nlookup_done(&nd); 3111 return (error); 3112 } 3113 3114 /* 3115 * lstat_args(char *path, struct stat *ub) 3116 * 3117 * Get file status; this version does not follow links. 3118 */ 3119 int 3120 sys_lstat(struct sysmsg *sysmsg, const struct lstat_args *uap) 3121 { 3122 struct nlookupdata nd; 3123 struct stat st; 3124 int error; 3125 3126 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3127 if (error == 0) { 3128 error = kern_stat(&nd, &st); 3129 if (error == 0) 3130 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3131 } 3132 nlookup_done(&nd); 3133 return (error); 3134 } 3135 3136 /* 3137 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 3138 * 3139 * Get status of file pointed to by fd/path. 3140 */ 3141 int 3142 sys_fstatat(struct sysmsg *sysmsg, const struct fstatat_args *uap) 3143 { 3144 struct nlookupdata nd; 3145 struct stat st; 3146 int error; 3147 int flags; 3148 struct file *fp; 3149 3150 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3151 return (EINVAL); 3152 3153 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3154 3155 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3156 UIO_USERSPACE, flags); 3157 if (error == 0) { 3158 error = kern_stat(&nd, &st); 3159 if (error == 0) 3160 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 3161 } 3162 nlookup_done_at(&nd, fp); 3163 return (error); 3164 } 3165 3166 static int 3167 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 3168 { 3169 struct nlookupdata nd; 3170 struct vnode *vp; 3171 int error; 3172 3173 vp = NULL; 3174 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 3175 if (error == 0) 3176 error = nlookup(&nd); 3177 if (error == 0) 3178 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3179 nlookup_done(&nd); 3180 if (error == 0) { 3181 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3182 vput(vp); 3183 } 3184 return (error); 3185 } 3186 3187 /* 3188 * pathconf_Args(char *path, int name) 3189 * 3190 * Get configurable pathname variables. 3191 */ 3192 int 3193 sys_pathconf(struct sysmsg *sysmsg, const struct pathconf_args *uap) 3194 { 3195 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3196 &sysmsg->sysmsg_reg)); 3197 } 3198 3199 /* 3200 * lpathconf_Args(char *path, int name) 3201 * 3202 * Get configurable pathname variables, but don't follow symlinks. 3203 */ 3204 int 3205 sys_lpathconf(struct sysmsg *sysmsg, const struct lpathconf_args *uap) 3206 { 3207 return (kern_pathconf(uap->path, uap->name, 0, &sysmsg->sysmsg_reg)); 3208 } 3209 3210 /* 3211 * XXX: daver 3212 * kern_readlink isn't properly split yet. There is a copyin burried 3213 * in VOP_READLINK(). 3214 */ 3215 int 3216 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3217 { 3218 struct thread *td = curthread; 3219 struct vnode *vp; 3220 struct iovec aiov; 3221 struct uio auio; 3222 int error; 3223 3224 nd->nl_flags |= NLC_SHAREDLOCK; 3225 if ((error = nlookup(nd)) != 0) 3226 return (error); 3227 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3228 if (error) 3229 return (error); 3230 if (vp->v_type != VLNK) { 3231 error = EINVAL; 3232 } else { 3233 aiov.iov_base = buf; 3234 aiov.iov_len = count; 3235 auio.uio_iov = &aiov; 3236 auio.uio_iovcnt = 1; 3237 auio.uio_offset = 0; 3238 auio.uio_rw = UIO_READ; 3239 auio.uio_segflg = UIO_USERSPACE; 3240 auio.uio_td = td; 3241 auio.uio_resid = count; 3242 error = VOP_READLINK(vp, &auio, td->td_ucred); 3243 } 3244 vput(vp); 3245 *res = count - auio.uio_resid; 3246 return (error); 3247 } 3248 3249 /* 3250 * readlink_args(char *path, char *buf, int count) 3251 * 3252 * Return target name of a symbolic link. 3253 */ 3254 int 3255 sys_readlink(struct sysmsg *sysmsg, const struct readlink_args *uap) 3256 { 3257 struct nlookupdata nd; 3258 int error; 3259 3260 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3261 if (error == 0) { 3262 error = kern_readlink(&nd, uap->buf, uap->count, 3263 &sysmsg->sysmsg_result); 3264 } 3265 nlookup_done(&nd); 3266 return (error); 3267 } 3268 3269 /* 3270 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3271 * 3272 * Return target name of a symbolic link. The path is relative to the 3273 * directory associated with fd. 3274 */ 3275 int 3276 sys_readlinkat(struct sysmsg *sysmsg, const struct readlinkat_args *uap) 3277 { 3278 struct nlookupdata nd; 3279 struct file *fp; 3280 int error; 3281 3282 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3283 if (error == 0) { 3284 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3285 &sysmsg->sysmsg_result); 3286 } 3287 nlookup_done_at(&nd, fp); 3288 return (error); 3289 } 3290 3291 static int 3292 setfflags(struct vnode *vp, u_long flags) 3293 { 3294 struct thread *td = curthread; 3295 int error; 3296 struct vattr vattr; 3297 3298 /* 3299 * Prevent non-root users from setting flags on devices. When 3300 * a device is reused, users can retain ownership of the device 3301 * if they are allowed to set flags and programs assume that 3302 * chown can't fail when done as root. 3303 */ 3304 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3305 ((error = 3306 caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHFLAGS_DEV)) != 0)) 3307 { 3308 return (error); 3309 } 3310 3311 /* 3312 * note: vget is required for any operation that might mod the vnode 3313 * so VINACTIVE is properly cleared. 3314 */ 3315 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3316 VATTR_NULL(&vattr); 3317 vattr.va_flags = flags; 3318 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3319 vput(vp); 3320 } 3321 return (error); 3322 } 3323 3324 /* 3325 * chflags(const char *path, u_long flags) 3326 * 3327 * Change flags of a file given a path name. 3328 */ 3329 int 3330 sys_chflags(struct sysmsg *sysmsg, const struct chflags_args *uap) 3331 { 3332 struct nlookupdata nd; 3333 struct vnode *vp; 3334 int error; 3335 3336 vp = NULL; 3337 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3338 if (error == 0) 3339 error = nlookup(&nd); 3340 if (error == 0) 3341 error = ncp_writechk(&nd.nl_nch); 3342 if (error == 0) 3343 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3344 nlookup_done(&nd); 3345 if (error == 0) { 3346 error = setfflags(vp, uap->flags); 3347 vrele(vp); 3348 } 3349 return (error); 3350 } 3351 3352 /* 3353 * lchflags(const char *path, u_long flags) 3354 * 3355 * Change flags of a file given a path name, but don't follow symlinks. 3356 */ 3357 int 3358 sys_lchflags(struct sysmsg *sysmsg, const struct lchflags_args *uap) 3359 { 3360 struct nlookupdata nd; 3361 struct vnode *vp; 3362 int error; 3363 3364 vp = NULL; 3365 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3366 if (error == 0) 3367 error = nlookup(&nd); 3368 if (error == 0) 3369 error = ncp_writechk(&nd.nl_nch); 3370 if (error == 0) 3371 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3372 nlookup_done(&nd); 3373 if (error == 0) { 3374 error = setfflags(vp, uap->flags); 3375 vrele(vp); 3376 } 3377 return (error); 3378 } 3379 3380 /* 3381 * fchflags_args(int fd, u_flags flags) 3382 * 3383 * Change flags of a file given a file descriptor. 3384 */ 3385 int 3386 sys_fchflags(struct sysmsg *sysmsg, const struct fchflags_args *uap) 3387 { 3388 struct thread *td = curthread; 3389 struct file *fp; 3390 int error; 3391 3392 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3393 return (error); 3394 if (fp->f_nchandle.ncp) 3395 error = ncp_writechk(&fp->f_nchandle); 3396 if (error == 0) 3397 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3398 fdrop(fp); 3399 return (error); 3400 } 3401 3402 /* 3403 * chflagsat_args(int fd, const char *path, u_long flags, int atflags) 3404 * change flags given a pathname relative to a filedescriptor 3405 */ 3406 int 3407 sys_chflagsat(struct sysmsg *sysmsg, const struct chflagsat_args *uap) 3408 { 3409 struct nlookupdata nd; 3410 struct vnode *vp; 3411 struct file *fp; 3412 int error; 3413 int lookupflags; 3414 3415 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3416 return (EINVAL); 3417 3418 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3419 3420 vp = NULL; 3421 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3422 if (error == 0) 3423 error = nlookup(&nd); 3424 if (error == 0) 3425 error = ncp_writechk(&nd.nl_nch); 3426 if (error == 0) 3427 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3428 nlookup_done_at(&nd, fp); 3429 if (error == 0) { 3430 error = setfflags(vp, uap->flags); 3431 vrele(vp); 3432 } 3433 return (error); 3434 } 3435 3436 3437 static int 3438 setfmode(struct vnode *vp, int mode) 3439 { 3440 struct thread *td = curthread; 3441 int error; 3442 struct vattr vattr; 3443 3444 /* 3445 * note: vget is required for any operation that might mod the vnode 3446 * so VINACTIVE is properly cleared. 3447 */ 3448 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3449 VATTR_NULL(&vattr); 3450 vattr.va_mode = mode & ALLPERMS; 3451 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3452 cache_inval_wxok(vp); 3453 vput(vp); 3454 } 3455 return error; 3456 } 3457 3458 int 3459 kern_chmod(struct nlookupdata *nd, int mode) 3460 { 3461 struct vnode *vp; 3462 int error; 3463 3464 if ((error = nlookup(nd)) != 0) 3465 return (error); 3466 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3467 return (error); 3468 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3469 error = setfmode(vp, mode); 3470 vrele(vp); 3471 return (error); 3472 } 3473 3474 /* 3475 * chmod_args(char *path, int mode) 3476 * 3477 * Change mode of a file given path name. 3478 */ 3479 int 3480 sys_chmod(struct sysmsg *sysmsg, const struct chmod_args *uap) 3481 { 3482 struct nlookupdata nd; 3483 int error; 3484 3485 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3486 if (error == 0) 3487 error = kern_chmod(&nd, uap->mode); 3488 nlookup_done(&nd); 3489 return (error); 3490 } 3491 3492 /* 3493 * lchmod_args(char *path, int mode) 3494 * 3495 * Change mode of a file given path name (don't follow links.) 3496 */ 3497 int 3498 sys_lchmod(struct sysmsg *sysmsg, const struct lchmod_args *uap) 3499 { 3500 struct nlookupdata nd; 3501 int error; 3502 3503 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3504 if (error == 0) 3505 error = kern_chmod(&nd, uap->mode); 3506 nlookup_done(&nd); 3507 return (error); 3508 } 3509 3510 /* 3511 * fchmod_args(int fd, int mode) 3512 * 3513 * Change mode of a file given a file descriptor. 3514 */ 3515 int 3516 sys_fchmod(struct sysmsg *sysmsg, const struct fchmod_args *uap) 3517 { 3518 struct thread *td = curthread; 3519 struct file *fp; 3520 int error; 3521 3522 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3523 return (error); 3524 if (fp->f_nchandle.ncp) 3525 error = ncp_writechk(&fp->f_nchandle); 3526 if (error == 0) 3527 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3528 fdrop(fp); 3529 return (error); 3530 } 3531 3532 /* 3533 * fchmodat_args(char *path, int mode) 3534 * 3535 * Change mode of a file pointed to by fd/path. 3536 */ 3537 int 3538 sys_fchmodat(struct sysmsg *sysmsg, const struct fchmodat_args *uap) 3539 { 3540 struct nlookupdata nd; 3541 struct file *fp; 3542 int error; 3543 int flags; 3544 3545 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3546 return (EINVAL); 3547 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3548 3549 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3550 UIO_USERSPACE, flags); 3551 if (error == 0) 3552 error = kern_chmod(&nd, uap->mode); 3553 nlookup_done_at(&nd, fp); 3554 return (error); 3555 } 3556 3557 static int 3558 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3559 { 3560 struct thread *td = curthread; 3561 int error; 3562 struct vattr vattr; 3563 uid_t o_uid; 3564 gid_t o_gid; 3565 uint64_t size; 3566 3567 /* 3568 * note: vget is required for any operation that might mod the vnode 3569 * so VINACTIVE is properly cleared. 3570 */ 3571 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3572 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3573 return error; 3574 o_uid = vattr.va_uid; 3575 o_gid = vattr.va_gid; 3576 size = vattr.va_size; 3577 3578 VATTR_NULL(&vattr); 3579 vattr.va_uid = uid; 3580 vattr.va_gid = gid; 3581 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3582 vput(vp); 3583 } 3584 3585 if (error == 0) { 3586 if (uid == -1) 3587 uid = o_uid; 3588 if (gid == -1) 3589 gid = o_gid; 3590 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3591 VFS_ACCOUNT(mp, uid, gid, size); 3592 } 3593 3594 return error; 3595 } 3596 3597 int 3598 kern_chown(struct nlookupdata *nd, int uid, int gid) 3599 { 3600 struct vnode *vp; 3601 int error; 3602 3603 if ((error = nlookup(nd)) != 0) 3604 return (error); 3605 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3606 return (error); 3607 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3608 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3609 vrele(vp); 3610 return (error); 3611 } 3612 3613 /* 3614 * chown(char *path, int uid, int gid) 3615 * 3616 * Set ownership given a path name. 3617 */ 3618 int 3619 sys_chown(struct sysmsg *sysmsg, const struct chown_args *uap) 3620 { 3621 struct nlookupdata nd; 3622 int error; 3623 3624 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3625 if (error == 0) 3626 error = kern_chown(&nd, uap->uid, uap->gid); 3627 nlookup_done(&nd); 3628 return (error); 3629 } 3630 3631 /* 3632 * lchown_args(char *path, int uid, int gid) 3633 * 3634 * Set ownership given a path name, do not cross symlinks. 3635 */ 3636 int 3637 sys_lchown(struct sysmsg *sysmsg, const struct lchown_args *uap) 3638 { 3639 struct nlookupdata nd; 3640 int error; 3641 3642 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3643 if (error == 0) 3644 error = kern_chown(&nd, uap->uid, uap->gid); 3645 nlookup_done(&nd); 3646 return (error); 3647 } 3648 3649 /* 3650 * fchown_args(int fd, int uid, int gid) 3651 * 3652 * Set ownership given a file descriptor. 3653 */ 3654 int 3655 sys_fchown(struct sysmsg *sysmsg, const struct fchown_args *uap) 3656 { 3657 struct thread *td = curthread; 3658 struct proc *p = td->td_proc; 3659 struct file *fp; 3660 int error; 3661 3662 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3663 return (error); 3664 if (fp->f_nchandle.ncp) 3665 error = ncp_writechk(&fp->f_nchandle); 3666 if (error == 0) 3667 error = setfown(p->p_fd->fd_ncdir.mount, 3668 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3669 fdrop(fp); 3670 return (error); 3671 } 3672 3673 /* 3674 * fchownat(int fd, char *path, int uid, int gid, int flags) 3675 * 3676 * Set ownership of file pointed to by fd/path. 3677 */ 3678 int 3679 sys_fchownat(struct sysmsg *sysmsg, const struct fchownat_args *uap) 3680 { 3681 struct nlookupdata nd; 3682 struct file *fp; 3683 int error; 3684 int flags; 3685 3686 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3687 return (EINVAL); 3688 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3689 3690 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3691 UIO_USERSPACE, flags); 3692 if (error == 0) 3693 error = kern_chown(&nd, uap->uid, uap->gid); 3694 nlookup_done_at(&nd, fp); 3695 return (error); 3696 } 3697 3698 3699 static int 3700 getutimes(struct timeval *tvp, struct timespec *tsp) 3701 { 3702 struct timeval tv[2]; 3703 int error; 3704 3705 if (tvp == NULL) { 3706 microtime(&tv[0]); 3707 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3708 tsp[1] = tsp[0]; 3709 } else { 3710 if ((error = itimerfix(tvp)) != 0) 3711 return (error); 3712 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3713 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3714 } 3715 return 0; 3716 } 3717 3718 static int 3719 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3720 { 3721 struct timespec tsnow; 3722 int error; 3723 3724 *nullflag = 0; 3725 nanotime(&tsnow); 3726 if (ts == NULL) { 3727 newts[0] = tsnow; 3728 newts[1] = tsnow; 3729 *nullflag = 1; 3730 return (0); 3731 } 3732 3733 newts[0] = ts[0]; 3734 newts[1] = ts[1]; 3735 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) { 3736 newts[0].tv_sec = VNOVAL; 3737 newts[1].tv_sec = VNOVAL; 3738 return (0); 3739 } 3740 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3741 *nullflag = 1; 3742 3743 if (newts[0].tv_nsec == UTIME_OMIT) 3744 newts[0].tv_sec = VNOVAL; 3745 else if (newts[0].tv_nsec == UTIME_NOW) 3746 newts[0] = tsnow; 3747 else if ((error = itimespecfix(&newts[0])) != 0) 3748 return (error); 3749 3750 if (newts[1].tv_nsec == UTIME_OMIT) 3751 newts[1].tv_sec = VNOVAL; 3752 else if (newts[1].tv_nsec == UTIME_NOW) 3753 newts[1] = tsnow; 3754 else if ((error = itimespecfix(&newts[1])) != 0) 3755 return (error); 3756 3757 return (0); 3758 } 3759 3760 static int 3761 setutimes(struct vnode *vp, struct vattr *vattr, 3762 const struct timespec *ts, int nullflag) 3763 { 3764 struct thread *td = curthread; 3765 int error; 3766 3767 VATTR_NULL(vattr); 3768 vattr->va_atime = ts[0]; 3769 vattr->va_mtime = ts[1]; 3770 if (nullflag) 3771 vattr->va_vaflags |= VA_UTIMES_NULL; 3772 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3773 3774 return error; 3775 } 3776 3777 int 3778 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3779 { 3780 struct timespec ts[2]; 3781 int error; 3782 3783 if (tptr) { 3784 if ((error = getutimes(tptr, ts)) != 0) 3785 return (error); 3786 } 3787 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3788 return (error); 3789 } 3790 3791 /* 3792 * utimes_args(char *path, struct timeval *tptr) 3793 * 3794 * Set the access and modification times of a file. 3795 */ 3796 int 3797 sys_utimes(struct sysmsg *sysmsg, const struct utimes_args *uap) 3798 { 3799 struct timeval tv[2]; 3800 struct nlookupdata nd; 3801 int error; 3802 3803 if (uap->tptr) { 3804 error = copyin(uap->tptr, tv, sizeof(tv)); 3805 if (error) 3806 return (error); 3807 } 3808 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3809 if (error == 0) 3810 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3811 nlookup_done(&nd); 3812 return (error); 3813 } 3814 3815 /* 3816 * lutimes_args(char *path, struct timeval *tptr) 3817 * 3818 * Set the access and modification times of a file. 3819 */ 3820 int 3821 sys_lutimes(struct sysmsg *sysmsg, const struct lutimes_args *uap) 3822 { 3823 struct timeval tv[2]; 3824 struct nlookupdata nd; 3825 int error; 3826 3827 if (uap->tptr) { 3828 error = copyin(uap->tptr, tv, sizeof(tv)); 3829 if (error) 3830 return (error); 3831 } 3832 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3833 if (error == 0) 3834 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3835 nlookup_done(&nd); 3836 return (error); 3837 } 3838 3839 /* 3840 * Set utimes on a file descriptor. The creds used to open the 3841 * file are used to determine whether the operation is allowed 3842 * or not. 3843 */ 3844 int 3845 kern_futimens(int fd, struct timespec *ts) 3846 { 3847 struct thread *td = curthread; 3848 struct timespec newts[2]; 3849 struct file *fp; 3850 struct vnode *vp; 3851 struct vattr vattr; 3852 struct vattr_lite lva; 3853 int nullflag; 3854 int error; 3855 3856 error = getutimens(ts, newts, &nullflag); 3857 if (error) 3858 return (error); 3859 if ((error = holdvnode(td, fd, &fp)) != 0) 3860 return (error); 3861 if (fp->f_nchandle.ncp) 3862 error = ncp_writechk(&fp->f_nchandle); 3863 if (error == 0) { 3864 vp = fp->f_data; 3865 error = vget(vp, LK_EXCLUSIVE); 3866 if (error == 0) { 3867 error = VOP_GETATTR_FP(vp, &vattr, fp); 3868 if (error == 0) { 3869 lva.va_type = vattr.va_type; 3870 lva.va_nlink = vattr.va_nlink; 3871 lva.va_mode = vattr.va_mode; 3872 lva.va_uid = vattr.va_uid; 3873 lva.va_gid = vattr.va_gid; 3874 lva.va_size = vattr.va_size; 3875 lva.va_flags = vattr.va_flags; 3876 3877 error = naccess_lva(&lva, NLC_OWN | NLC_WRITE, 3878 fp->f_cred); 3879 } 3880 if (error == 0) { 3881 error = setutimes(vp, &vattr, newts, nullflag); 3882 } 3883 vput(vp); 3884 } 3885 } 3886 fdrop(fp); 3887 return (error); 3888 } 3889 3890 /* 3891 * futimens_args(int fd, struct timespec *ts) 3892 * 3893 * Set the access and modification times of a file. 3894 */ 3895 int 3896 sys_futimens(struct sysmsg *sysmsg, const struct futimens_args *uap) 3897 { 3898 struct timespec ts[2]; 3899 int error; 3900 3901 if (uap->ts) { 3902 error = copyin(uap->ts, ts, sizeof(ts)); 3903 if (error) 3904 return (error); 3905 } 3906 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3907 return (error); 3908 } 3909 3910 int 3911 kern_futimes(int fd, struct timeval *tptr) 3912 { 3913 struct timespec ts[2]; 3914 int error; 3915 3916 if (tptr) { 3917 if ((error = getutimes(tptr, ts)) != 0) 3918 return (error); 3919 } 3920 error = kern_futimens(fd, tptr ? ts : NULL); 3921 return (error); 3922 } 3923 3924 /* 3925 * futimes_args(int fd, struct timeval *tptr) 3926 * 3927 * Set the access and modification times of a file. 3928 */ 3929 int 3930 sys_futimes(struct sysmsg *sysmsg, const struct futimes_args *uap) 3931 { 3932 struct timeval tv[2]; 3933 int error; 3934 3935 if (uap->tptr) { 3936 error = copyin(uap->tptr, tv, sizeof(tv)); 3937 if (error) 3938 return (error); 3939 } 3940 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3941 return (error); 3942 } 3943 3944 /* 3945 * futimesat_args(int fd, const char *path, struct timeval *tptr) 3946 * 3947 * Set the access and modification times of a file. 3948 */ 3949 int 3950 sys_futimesat(struct sysmsg *sysmsg, const struct futimesat_args *uap) 3951 { 3952 struct timespec ts[2]; 3953 struct nlookupdata nd; 3954 struct file *fp; 3955 int error; 3956 3957 if (uap->tptr) { 3958 struct timeval tv[2]; 3959 3960 if ((error = copyin(uap->tptr, tv, sizeof(tv))) != 0) 3961 return error; 3962 if ((error = getutimes(tv, ts)) != 0) 3963 return error; 3964 } 3965 3966 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3967 UIO_USERSPACE, 0); 3968 if (error == 0) 3969 error = kern_utimensat(&nd, uap->tptr ? ts : NULL, 0); 3970 nlookup_done_at(&nd, fp); 3971 3972 return (error); 3973 } 3974 3975 int 3976 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3977 { 3978 struct timespec newts[2]; 3979 struct vnode *vp; 3980 struct vattr vattr; 3981 int nullflag; 3982 int error; 3983 3984 if (flags & ~AT_SYMLINK_NOFOLLOW) 3985 return (EINVAL); 3986 3987 error = getutimens(ts, newts, &nullflag); 3988 if (error) 3989 return (error); 3990 3991 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3992 if ((error = nlookup(nd)) != 0) 3993 return (error); 3994 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3995 return (error); 3996 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3997 return (error); 3998 if ((error = vn_writechk(vp)) == 0) { 3999 error = vget(vp, LK_EXCLUSIVE); 4000 if (error == 0) { 4001 error = setutimes(vp, &vattr, newts, nullflag); 4002 vput(vp); 4003 } 4004 } 4005 vrele(vp); 4006 return (error); 4007 } 4008 4009 /* 4010 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 4011 * 4012 * Set file access and modification times of a file. 4013 */ 4014 int 4015 sys_utimensat(struct sysmsg *sysmsg, const struct utimensat_args *uap) 4016 { 4017 struct timespec ts[2]; 4018 struct nlookupdata nd; 4019 struct file *fp; 4020 int error; 4021 int flags; 4022 4023 if (uap->ts) { 4024 error = copyin(uap->ts, ts, sizeof(ts)); 4025 if (error) 4026 return (error); 4027 } 4028 4029 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 4030 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 4031 UIO_USERSPACE, flags); 4032 if (error == 0) 4033 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 4034 nlookup_done_at(&nd, fp); 4035 return (error); 4036 } 4037 4038 int 4039 kern_truncate(struct nlookupdata *nd, off_t length) 4040 { 4041 struct vnode *vp; 4042 struct vattr vattr; 4043 int error; 4044 uid_t uid = 0; 4045 gid_t gid = 0; 4046 uint64_t old_size = 0; 4047 4048 if (length < 0) 4049 return(EINVAL); 4050 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 4051 if ((error = nlookup(nd)) != 0) 4052 return (error); 4053 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4054 return (error); 4055 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 4056 return (error); 4057 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 4058 if (error) { 4059 vrele(vp); 4060 return (error); 4061 } 4062 if (vp->v_type == VDIR) { 4063 error = EISDIR; 4064 goto done; 4065 } 4066 if (vfs_quota_enabled) { 4067 error = VOP_GETATTR(vp, &vattr); 4068 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 4069 uid = vattr.va_uid; 4070 gid = vattr.va_gid; 4071 old_size = vattr.va_size; 4072 } 4073 4074 if ((error = vn_writechk(vp)) == 0) { 4075 VATTR_NULL(&vattr); 4076 vattr.va_size = length; 4077 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 4078 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 4079 } 4080 done: 4081 vput(vp); 4082 return (error); 4083 } 4084 4085 /* 4086 * truncate(char *path, int pad, off_t length) 4087 * 4088 * Truncate a file given its path name. 4089 */ 4090 int 4091 sys_truncate(struct sysmsg *sysmsg, const struct truncate_args *uap) 4092 { 4093 struct nlookupdata nd; 4094 int error; 4095 4096 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4097 if (error == 0) 4098 error = kern_truncate(&nd, uap->length); 4099 nlookup_done(&nd); 4100 return error; 4101 } 4102 4103 int 4104 kern_ftruncate(int fd, off_t length) 4105 { 4106 struct thread *td = curthread; 4107 struct vattr vattr; 4108 struct vnode *vp; 4109 struct file *fp; 4110 int error; 4111 uid_t uid = 0; 4112 gid_t gid = 0; 4113 uint64_t old_size = 0; 4114 struct mount *mp; 4115 4116 if (length < 0) 4117 return(EINVAL); 4118 if ((error = holdvnode(td, fd, &fp)) != 0) 4119 return (error); 4120 if (fp->f_nchandle.ncp) { 4121 error = ncp_writechk(&fp->f_nchandle); 4122 if (error) 4123 goto done; 4124 } 4125 if ((fp->f_flag & FWRITE) == 0) { 4126 error = EINVAL; 4127 goto done; 4128 } 4129 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 4130 error = EINVAL; 4131 goto done; 4132 } 4133 vp = (struct vnode *)fp->f_data; 4134 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4135 if (vp->v_type == VDIR) { 4136 error = EISDIR; 4137 vn_unlock(vp); 4138 goto done; 4139 } 4140 4141 if (vfs_quota_enabled) { 4142 error = VOP_GETATTR_FP(vp, &vattr, fp); 4143 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 4144 uid = vattr.va_uid; 4145 gid = vattr.va_gid; 4146 old_size = vattr.va_size; 4147 } 4148 4149 if ((error = vn_writechk(vp)) == 0) { 4150 VATTR_NULL(&vattr); 4151 vattr.va_size = length; 4152 error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp); 4153 mp = vq_vptomp(vp); 4154 VFS_ACCOUNT(mp, uid, gid, length - old_size); 4155 } 4156 vn_unlock(vp); 4157 done: 4158 fdrop(fp); 4159 return (error); 4160 } 4161 4162 /* 4163 * ftruncate_args(int fd, int pad, off_t length) 4164 * 4165 * Truncate a file given a file descriptor. 4166 */ 4167 int 4168 sys_ftruncate(struct sysmsg *sysmsg, const struct ftruncate_args *uap) 4169 { 4170 int error; 4171 4172 error = kern_ftruncate(uap->fd, uap->length); 4173 4174 return (error); 4175 } 4176 4177 int 4178 kern_fsync(int fd, bool fullsync) 4179 { 4180 struct thread *td = curthread; 4181 struct vnode *vp; 4182 struct file *fp; 4183 vm_object_t obj; 4184 int error; 4185 4186 if ((error = holdvnode(td, fd, &fp)) != 0) 4187 return (error); 4188 vp = (struct vnode *)fp->f_data; 4189 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4190 if ((obj = vp->v_object) != NULL) { 4191 if (vp->v_mount == NULL || 4192 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 4193 vm_object_page_clean(obj, 0, 0, 0); 4194 } 4195 } 4196 error = fullsync ? 4197 VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp) : 4198 VOP_FDATASYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp); 4199 if (error == 0 && vp->v_mount) 4200 error = buf_fsync(vp); 4201 vn_unlock(vp); 4202 fdrop(fp); 4203 4204 return (error); 4205 } 4206 4207 /* 4208 * fsync(int fd) 4209 * 4210 * Sync an open file. 4211 */ 4212 int 4213 sys_fsync(struct sysmsg *sysmsg, const struct fsync_args *uap) 4214 { 4215 return (kern_fsync(uap->fd, true)); 4216 } 4217 4218 /* 4219 * fdatasync(int fd) 4220 * 4221 * Data-sync an open file. 4222 */ 4223 int 4224 sys_fdatasync(struct sysmsg *sysmsg, const struct fdatasync_args *uap) 4225 { 4226 return (kern_fsync(uap->fd, false)); 4227 } 4228 4229 /* 4230 * rename op. 4231 * 4232 * NOTE: error == 0 and nl_dvp is NULL indicates a mount point, operation 4233 * disallowed. e.g. /var/cache where /var/cache is a null-mount, for 4234 * example. 4235 */ 4236 int 4237 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 4238 { 4239 struct nchandle fnchd; 4240 struct nchandle tnchd; 4241 struct namecache *ncp; 4242 struct vnode *fdvp; 4243 struct vnode *tdvp; 4244 struct mount *mp; 4245 struct mount *userenlk; 4246 int error; 4247 u_int fncp_gen; 4248 u_int tncp_gen; 4249 4250 bwillinode(1); 4251 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4252 if ((error = nlookup(fromnd)) != 0) 4253 return (error); 4254 4255 /* 4256 * Attempt to rename a mount point (from or to) 4257 */ 4258 if (error == 0 && fromnd->nl_dvp == NULL) 4259 return (EINVAL); 4260 4261 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4262 return (ENOENT); 4263 fnchd.mount = fromnd->nl_nch.mount; 4264 cache_hold(&fnchd); 4265 4266 /* 4267 * unlock the source nch so we can lookup the target nch without 4268 * deadlocking. The target may or may not exist so we do not check 4269 * for a target vp like kern_mkdir() and other creation functions do. 4270 * 4271 * The source and target directories are ref'd and rechecked after 4272 * everything is relocked to determine if the source or target file 4273 * has been renamed. 4274 */ 4275 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4276 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4277 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4278 4279 if (fromnd->nl_nch.ncp->nc_vp && 4280 fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4281 userenlk = fnchd.mount; 4282 cache_unlock(&fromnd->nl_nch); 4283 lockmgr(&userenlk->mnt_renlock, LK_EXCLUSIVE); 4284 } else { 4285 userenlk = NULL; 4286 cache_unlock(&fromnd->nl_nch); 4287 } 4288 4289 /* 4290 * Lookup target 4291 */ 4292 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4293 if ((error = nlookup(tond)) != 0) { 4294 cache_drop(&fnchd); 4295 goto done; 4296 } 4297 tncp_gen = tond->nl_nch.ncp->nc_generation; 4298 4299 /* 4300 * Attempt to rename a mount point (from or to) 4301 */ 4302 if (error == 0 && tond->nl_dvp == NULL) { 4303 cache_drop(&fnchd); 4304 error = ENOENT; 4305 goto done; 4306 } 4307 4308 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4309 cache_drop(&fnchd); 4310 error = ENOENT; 4311 goto done; 4312 } 4313 tnchd.mount = tond->nl_nch.mount; 4314 cache_hold(&tnchd); 4315 4316 /* 4317 * If the source and target are the same there is nothing to do 4318 */ 4319 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4320 cache_drop(&fnchd); 4321 cache_drop(&tnchd); 4322 error = 0; 4323 goto done; 4324 } 4325 4326 /* 4327 * Mount points cannot be renamed or overwritten 4328 */ 4329 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4330 NCF_ISMOUNTPT 4331 ) { 4332 cache_drop(&fnchd); 4333 cache_drop(&tnchd); 4334 error = EINVAL; 4335 goto done; 4336 } 4337 4338 /* 4339 * Lock all four namecache entries. tond is already locked. 4340 */ 4341 cache_lock4_tondlocked(&fnchd, &fromnd->nl_nch, 4342 &tnchd, &tond->nl_nch, 4343 fromnd->nl_cred, tond->nl_cred); 4344 fromnd->nl_flags |= NLC_NCPISLOCKED; 4345 4346 /* 4347 * If the namecache generation changed for either fromnd or tond, 4348 * we must retry. 4349 */ 4350 if (((fromnd->nl_nch.ncp->nc_generation - fncp_gen) & ~1) || 4351 ((tond->nl_nch.ncp->nc_generation - tncp_gen) & ~1)) 4352 { 4353 krateprintf(&krate_rename, 4354 "kern_rename: retry due to race on: " 4355 "\"%s\" -> \"%s\" (%d,%d)\n", 4356 fromnd->nl_nch.ncp->nc_name, 4357 tond->nl_nch.ncp->nc_name, 4358 fromnd->nl_nch.ncp->nc_generation - fncp_gen, 4359 tond->nl_nch.ncp->nc_generation - tncp_gen); 4360 error = EAGAIN; 4361 goto finish; 4362 } 4363 4364 /* 4365 * If either fromnd or tond are marked destroyed a ripout occured 4366 * out from under us and we must retry. 4367 */ 4368 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4369 fromnd->nl_nch.ncp->nc_vp == NULL || 4370 (tond->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED))) { 4371 krateprintf(&krate_rename, 4372 "kern_rename: retry due to ripout on: " 4373 "\"%s\" -> \"%s\"\n", 4374 fromnd->nl_nch.ncp->nc_name, 4375 tond->nl_nch.ncp->nc_name); 4376 error = EAGAIN; 4377 goto finish; 4378 } 4379 4380 /* 4381 * Make sure the parent directories linkages are the same. We have 4382 * already checked that fromnd and tond are not mount points so this 4383 * should not loop forever on a cross-mount. 4384 */ 4385 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4386 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4387 error = EAGAIN; 4388 goto finish; 4389 } 4390 4391 /* 4392 * Both the source and target must be within the same filesystem and 4393 * in the same filesystem as their parent directories within the 4394 * namecache topology. 4395 * 4396 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4397 */ 4398 mp = fnchd.mount; 4399 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4400 mp != tond->nl_nch.mount) { 4401 error = EXDEV; 4402 goto finish; 4403 } 4404 4405 /* 4406 * Make sure the mount point is writable 4407 */ 4408 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4409 goto finish; 4410 } 4411 4412 /* 4413 * If the target exists and either the source or target is a directory, 4414 * then both must be directories. 4415 * 4416 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4417 * have become NULL. 4418 */ 4419 if (tond->nl_nch.ncp->nc_vp) { 4420 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4421 error = ENOENT; 4422 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4423 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4424 error = ENOTDIR; 4425 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4426 error = EISDIR; 4427 } 4428 } 4429 4430 /* 4431 * You cannot rename a source into itself or a subdirectory of itself. 4432 * We check this by travsersing the target directory upwards looking 4433 * for a match against the source. 4434 * 4435 * Only required when renaming a directory, in which case userenlk is 4436 * non-NULL. 4437 */ 4438 if (__predict_false(userenlk && error == 0)) { 4439 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4440 if (fromnd->nl_nch.ncp == ncp) { 4441 error = EINVAL; 4442 break; 4443 } 4444 } 4445 } 4446 4447 /* 4448 * Even though the namespaces are different, they may still represent 4449 * hardlinks to the same file. The filesystem might have a hard time 4450 * with this so we issue a NREMOVE of the source instead of a NRENAME 4451 * when we detect the situation. 4452 */ 4453 if (error == 0) { 4454 fdvp = fromnd->nl_dvp; 4455 tdvp = tond->nl_dvp; 4456 if (fdvp == NULL || tdvp == NULL) { 4457 error = EPERM; 4458 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4459 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4460 fromnd->nl_cred); 4461 } else { 4462 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4463 fdvp, tdvp, tond->nl_cred); 4464 } 4465 } 4466 finish: 4467 cache_put(&tnchd); 4468 cache_put(&fnchd); 4469 done: 4470 if (userenlk) 4471 lockmgr(&userenlk->mnt_renlock, LK_RELEASE); 4472 return (error); 4473 } 4474 4475 /* 4476 * rename_args(char *from, char *to) 4477 * 4478 * Rename files. Source and destination must either both be directories, 4479 * or both not be directories. If target is a directory, it must be empty. 4480 */ 4481 int 4482 sys_rename(struct sysmsg *sysmsg, const struct rename_args *uap) 4483 { 4484 struct nlookupdata fromnd, tond; 4485 int error; 4486 4487 do { 4488 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4489 if (error == 0) { 4490 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4491 if (error == 0) 4492 error = kern_rename(&fromnd, &tond); 4493 nlookup_done(&tond); 4494 } 4495 nlookup_done(&fromnd); 4496 } while (error == EAGAIN); 4497 return (error); 4498 } 4499 4500 /* 4501 * renameat_args(int oldfd, char *old, int newfd, char *new) 4502 * 4503 * Rename files using paths relative to the directories associated with 4504 * oldfd and newfd. Source and destination must either both be directories, 4505 * or both not be directories. If target is a directory, it must be empty. 4506 */ 4507 int 4508 sys_renameat(struct sysmsg *sysmsg, const struct renameat_args *uap) 4509 { 4510 struct nlookupdata oldnd, newnd; 4511 struct file *oldfp, *newfp; 4512 int error; 4513 4514 do { 4515 error = nlookup_init_at(&oldnd, &oldfp, 4516 uap->oldfd, uap->old, 4517 UIO_USERSPACE, 0); 4518 if (error == 0) { 4519 error = nlookup_init_at(&newnd, &newfp, 4520 uap->newfd, uap->new, 4521 UIO_USERSPACE, 0); 4522 if (error == 0) 4523 error = kern_rename(&oldnd, &newnd); 4524 nlookup_done_at(&newnd, newfp); 4525 } 4526 nlookup_done_at(&oldnd, oldfp); 4527 } while (error == EAGAIN); 4528 return (error); 4529 } 4530 4531 int 4532 kern_mkdir(struct nlookupdata *nd, int mode) 4533 { 4534 struct thread *td = curthread; 4535 struct proc *p = td->td_proc; 4536 struct vnode *vp; 4537 struct vattr vattr; 4538 int error; 4539 4540 bwillinode(1); 4541 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4542 if ((error = nlookup(nd)) != 0) 4543 return (error); 4544 4545 if (nd->nl_nch.ncp->nc_vp) 4546 return (EEXIST); 4547 if (nd->nl_dvp == NULL) 4548 return (EINVAL); 4549 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4550 return (error); 4551 VATTR_NULL(&vattr); 4552 vattr.va_type = VDIR; 4553 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4554 4555 vp = NULL; 4556 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4557 if (error == 0) 4558 vput(vp); 4559 return (error); 4560 } 4561 4562 /* 4563 * mkdir_args(char *path, int mode) 4564 * 4565 * Make a directory file. 4566 */ 4567 int 4568 sys_mkdir(struct sysmsg *sysmsg, const struct mkdir_args *uap) 4569 { 4570 struct nlookupdata nd; 4571 int error; 4572 4573 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4574 if (error == 0) 4575 error = kern_mkdir(&nd, uap->mode); 4576 nlookup_done(&nd); 4577 return (error); 4578 } 4579 4580 /* 4581 * mkdirat_args(int fd, char *path, mode_t mode) 4582 * 4583 * Make a directory file. The path is relative to the directory associated 4584 * with fd. 4585 */ 4586 int 4587 sys_mkdirat(struct sysmsg *sysmsg, const struct mkdirat_args *uap) 4588 { 4589 struct nlookupdata nd; 4590 struct file *fp; 4591 int error; 4592 4593 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4594 if (error == 0) 4595 error = kern_mkdir(&nd, uap->mode); 4596 nlookup_done_at(&nd, fp); 4597 return (error); 4598 } 4599 4600 int 4601 kern_rmdir(struct nlookupdata *nd) 4602 { 4603 int error; 4604 4605 bwillinode(1); 4606 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4607 if ((error = nlookup(nd)) != 0) 4608 return (error); 4609 4610 /* 4611 * Do not allow directories representing mount points to be 4612 * deleted, even if empty. Check write perms on mount point 4613 * in case the vnode is aliased (aka nullfs). 4614 */ 4615 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4616 return (EBUSY); 4617 if (nd->nl_dvp == NULL) 4618 return (EINVAL); 4619 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4620 return (error); 4621 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4622 return (error); 4623 } 4624 4625 /* 4626 * rmdir_args(char *path) 4627 * 4628 * Remove a directory file. 4629 */ 4630 int 4631 sys_rmdir(struct sysmsg *sysmsg, const struct rmdir_args *uap) 4632 { 4633 struct nlookupdata nd; 4634 int error; 4635 4636 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4637 if (error == 0) 4638 error = kern_rmdir(&nd); 4639 nlookup_done(&nd); 4640 return (error); 4641 } 4642 4643 int 4644 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4645 enum uio_seg direction) 4646 { 4647 struct thread *td = curthread; 4648 struct vnode *vp; 4649 struct file *fp; 4650 struct uio auio; 4651 struct iovec aiov; 4652 off_t loff; 4653 int error, eofflag; 4654 4655 if ((error = holdvnode(td, fd, &fp)) != 0) 4656 return (error); 4657 if ((fp->f_flag & FREAD) == 0) { 4658 error = EBADF; 4659 goto done; 4660 } 4661 vp = (struct vnode *)fp->f_data; 4662 if (vp->v_type != VDIR) { 4663 error = EINVAL; 4664 goto done; 4665 } 4666 aiov.iov_base = buf; 4667 aiov.iov_len = count; 4668 auio.uio_iov = &aiov; 4669 auio.uio_iovcnt = 1; 4670 auio.uio_rw = UIO_READ; 4671 auio.uio_segflg = direction; 4672 auio.uio_td = td; 4673 auio.uio_resid = count; 4674 loff = auio.uio_offset = fp->f_offset; 4675 error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp); 4676 fp->f_offset = auio.uio_offset; 4677 if (error) 4678 goto done; 4679 4680 /* 4681 * WARNING! *basep may not be wide enough to accomodate the 4682 * seek offset. XXX should we hack this to return the upper 32 bits 4683 * for offsets greater then 4G? 4684 */ 4685 if (basep) { 4686 *basep = (long)loff; 4687 } 4688 *res = count - auio.uio_resid; 4689 done: 4690 fdrop(fp); 4691 return (error); 4692 } 4693 4694 /* 4695 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4696 * 4697 * Read a block of directory entries in a file system independent format. 4698 */ 4699 int 4700 sys_getdirentries(struct sysmsg *sysmsg, const struct getdirentries_args *uap) 4701 { 4702 long base; 4703 int error; 4704 4705 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4706 &sysmsg->sysmsg_result, UIO_USERSPACE); 4707 4708 if (error == 0 && uap->basep) 4709 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4710 return (error); 4711 } 4712 4713 /* 4714 * getdents_args(int fd, char *buf, size_t count) 4715 */ 4716 int 4717 sys_getdents(struct sysmsg *sysmsg, const struct getdents_args *uap) 4718 { 4719 int error; 4720 4721 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4722 &sysmsg->sysmsg_result, UIO_USERSPACE); 4723 4724 return (error); 4725 } 4726 4727 /* 4728 * Set the mode mask for creation of filesystem nodes. 4729 * 4730 * umask(int newmask) 4731 */ 4732 int 4733 sys_umask(struct sysmsg *sysmsg, const struct umask_args *uap) 4734 { 4735 struct thread *td = curthread; 4736 struct proc *p = td->td_proc; 4737 struct filedesc *fdp; 4738 4739 fdp = p->p_fd; 4740 sysmsg->sysmsg_result = fdp->fd_cmask; 4741 fdp->fd_cmask = uap->newmask & ALLPERMS; 4742 return (0); 4743 } 4744 4745 /* 4746 * revoke(char *path) 4747 * 4748 * Void all references to file by ripping underlying filesystem 4749 * away from vnode. 4750 */ 4751 int 4752 sys_revoke(struct sysmsg *sysmsg, const struct revoke_args *uap) 4753 { 4754 struct nlookupdata nd; 4755 struct vattr vattr; 4756 struct vnode *vp; 4757 struct ucred *cred; 4758 int error; 4759 4760 vp = NULL; 4761 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4762 if (error == 0) 4763 error = nlookup(&nd); 4764 if (error == 0) 4765 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4766 cred = crhold(nd.nl_cred); 4767 nlookup_done(&nd); 4768 if (error == 0) { 4769 if (error == 0) 4770 error = VOP_GETATTR(vp, &vattr); 4771 if (error == 0 && cred->cr_uid != vattr.va_uid) 4772 error = caps_priv_check(cred, SYSCAP_NOVFS_REVOKE); 4773 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4774 if (vcount(vp) > 0) 4775 error = vrevoke(vp, cred); 4776 } else if (error == 0) { 4777 error = vrevoke(vp, cred); 4778 } 4779 vrele(vp); 4780 } 4781 if (cred) 4782 crfree(cred); 4783 return (error); 4784 } 4785 4786 /* 4787 * getfh_args(char *fname, fhandle_t *fhp) 4788 * 4789 * Get (NFS) file handle 4790 * 4791 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4792 * mount. This allows nullfs mounts to be explicitly exported. 4793 * 4794 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4795 * 4796 * nullfs mounts of subdirectories are not safe. That is, it will 4797 * work, but you do not really have protection against access to 4798 * the related parent directories. 4799 */ 4800 int 4801 sys_getfh(struct sysmsg *sysmsg, const struct getfh_args *uap) 4802 { 4803 struct nlookupdata nd; 4804 fhandle_t fh; 4805 struct vnode *vp; 4806 struct mount *mp; 4807 int error; 4808 4809 /* 4810 * Must be super user 4811 */ 4812 if ((error = caps_priv_check_self(SYSCAP_RESTRICTEDROOT)) != 0) 4813 return (error); 4814 4815 vp = NULL; 4816 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4817 if (error == 0) 4818 error = nlookup(&nd); 4819 if (error == 0) 4820 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4821 mp = nd.nl_nch.mount; 4822 nlookup_done(&nd); 4823 if (error == 0) { 4824 bzero(&fh, sizeof(fh)); 4825 fh.fh_fsid = mp->mnt_stat.f_fsid; 4826 error = VFS_VPTOFH(vp, &fh.fh_fid); 4827 vput(vp); 4828 if (error == 0) 4829 error = copyout(&fh, uap->fhp, sizeof(fh)); 4830 } 4831 return (error); 4832 } 4833 4834 /* 4835 * fhopen_args(const struct fhandle *u_fhp, int flags) 4836 * 4837 * syscall for the rpc.lockd to use to translate a NFS file handle into 4838 * an open descriptor. 4839 * 4840 * WARNING: Do not remove the caps_priv_check() call or this becomes 4841 * one giant security hole. 4842 */ 4843 int 4844 sys_fhopen(struct sysmsg *sysmsg, const struct fhopen_args *uap) 4845 { 4846 struct thread *td = curthread; 4847 struct filedesc *fdp = td->td_proc->p_fd; 4848 struct mount *mp; 4849 struct vnode *vp; 4850 struct fhandle fhp; 4851 struct vattr vat; 4852 struct vattr *vap = &vat; 4853 struct flock lf; 4854 int fmode, mode, error = 0, type; 4855 struct file *nfp; 4856 struct file *fp; 4857 int indx; 4858 4859 /* 4860 * Must be super user 4861 */ 4862 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT); 4863 if (error) 4864 return (error); 4865 4866 fmode = FFLAGS(uap->flags); 4867 4868 /* 4869 * Why not allow a non-read/write open for our lockd? 4870 */ 4871 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4872 return (EINVAL); 4873 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4874 if (error) 4875 return(error); 4876 4877 /* 4878 * Find the mount point 4879 */ 4880 mp = vfs_getvfs(&fhp.fh_fsid); 4881 if (mp == NULL) { 4882 error = ESTALE; 4883 goto done2; 4884 } 4885 /* now give me my vnode, it gets returned to me locked */ 4886 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4887 if (error) 4888 goto done; 4889 /* 4890 * from now on we have to make sure not 4891 * to forget about the vnode 4892 * any error that causes an abort must vput(vp) 4893 * just set error = err and 'goto bad;'. 4894 */ 4895 4896 /* 4897 * from vn_open 4898 */ 4899 if (vp->v_type == VLNK) { 4900 error = EMLINK; 4901 goto bad; 4902 } 4903 if (vp->v_type == VSOCK) { 4904 error = EOPNOTSUPP; 4905 goto bad; 4906 } 4907 mode = 0; 4908 if (fmode & (FWRITE | O_TRUNC)) { 4909 if (vp->v_type == VDIR) { 4910 error = EISDIR; 4911 goto bad; 4912 } 4913 error = vn_writechk(vp); 4914 if (error) 4915 goto bad; 4916 mode |= VWRITE; 4917 } 4918 if (fmode & FREAD) 4919 mode |= VREAD; 4920 if (mode) { 4921 error = VOP_ACCESS(vp, mode, td->td_ucred); 4922 if (error) 4923 goto bad; 4924 } 4925 if (fmode & O_TRUNC) { 4926 vn_unlock(vp); /* XXX */ 4927 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4928 VATTR_NULL(vap); 4929 vap->va_size = 0; 4930 error = VOP_SETATTR(vp, vap, td->td_ucred); 4931 if (error) 4932 goto bad; 4933 } 4934 4935 /* 4936 * VOP_OPEN needs the file pointer so it can potentially override 4937 * it. 4938 * 4939 * WARNING! no f_nchandle will be associated when fhopen()ing a 4940 * directory. XXX 4941 */ 4942 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4943 goto bad; 4944 error = VOP_OPEN(vp, fmode, td->td_ucred, &nfp); 4945 fp = nfp; 4946 4947 if (error) { 4948 /* 4949 * setting f_ops this way prevents VOP_CLOSE from being 4950 * called or fdrop() releasing the vp from v_data. Since 4951 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4952 */ 4953 fp->f_ops = &badfileops; 4954 fp->f_data = NULL; 4955 goto bad_drop; 4956 } 4957 4958 /* 4959 * The fp is given its own reference, we still have our ref and lock. 4960 * 4961 * Assert that all regular files must be created with a VM object. 4962 */ 4963 if (vp->v_type == VREG && vp->v_object == NULL) { 4964 kprintf("fhopen: regular file did not " 4965 "have VM object: %p\n", 4966 vp); 4967 goto bad_drop; 4968 } 4969 4970 /* 4971 * The open was successful. Handle any locking requirements. 4972 */ 4973 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4974 lf.l_whence = SEEK_SET; 4975 lf.l_start = 0; 4976 lf.l_len = 0; 4977 if (fmode & O_EXLOCK) 4978 lf.l_type = F_WRLCK; 4979 else 4980 lf.l_type = F_RDLCK; 4981 if (fmode & FNONBLOCK) 4982 type = 0; 4983 else 4984 type = F_WAIT; 4985 vn_unlock(vp); 4986 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4987 &lf, type)) != 0) { 4988 /* 4989 * release our private reference. 4990 */ 4991 fsetfd(fdp, NULL, indx); 4992 fdrop(fp); 4993 vrele(vp); 4994 goto done; 4995 } 4996 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4997 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4998 } 4999 5000 /* 5001 * Clean up. Associate the file pointer with the previously 5002 * reserved descriptor and return it. 5003 */ 5004 vput(vp); 5005 if (uap->flags & O_CLOEXEC) 5006 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 5007 fsetfd(fdp, fp, indx); 5008 fdrop(fp); 5009 sysmsg->sysmsg_result = indx; 5010 mount_drop(mp); 5011 5012 return (error); 5013 5014 bad_drop: 5015 fsetfd(fdp, NULL, indx); 5016 fdrop(fp); 5017 bad: 5018 vput(vp); 5019 done: 5020 mount_drop(mp); 5021 done2: 5022 return (error); 5023 } 5024 5025 /* 5026 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 5027 */ 5028 int 5029 sys_fhstat(struct sysmsg *sysmsg, const struct fhstat_args *uap) 5030 { 5031 struct thread *td = curthread; 5032 struct stat sb; 5033 fhandle_t fh; 5034 struct mount *mp; 5035 struct vnode *vp; 5036 int error; 5037 5038 /* 5039 * Must be super user 5040 */ 5041 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT); 5042 if (error) 5043 return (error); 5044 5045 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 5046 if (error) 5047 return (error); 5048 5049 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 5050 error = ESTALE; 5051 if (error == 0) { 5052 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 5053 error = vn_stat(vp, &sb, td->td_ucred); 5054 vput(vp); 5055 } 5056 } 5057 if (error == 0) 5058 error = copyout(&sb, uap->sb, sizeof(sb)); 5059 if (mp) 5060 mount_drop(mp); 5061 5062 return (error); 5063 } 5064 5065 /* 5066 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 5067 */ 5068 int 5069 sys_fhstatfs(struct sysmsg *sysmsg, const struct fhstatfs_args *uap) 5070 { 5071 struct thread *td = curthread; 5072 struct proc *p = td->td_proc; 5073 struct statfs *sp; 5074 struct mount *mp; 5075 struct vnode *vp; 5076 struct statfs sb; 5077 char *fullpath, *freepath; 5078 fhandle_t fh; 5079 int error; 5080 5081 /* 5082 * Must be super user 5083 */ 5084 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT); 5085 if (error) 5086 return (error); 5087 5088 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 5089 return (error); 5090 5091 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 5092 error = ESTALE; 5093 goto done; 5094 } 5095 if (p != NULL && !chroot_visible_mnt(mp, p)) { 5096 error = ESTALE; 5097 goto done; 5098 } 5099 5100 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 5101 goto done; 5102 mp = vp->v_mount; 5103 sp = &mp->mnt_stat; 5104 vput(vp); 5105 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 5106 goto done; 5107 5108 error = mount_path(p, mp, &fullpath, &freepath); 5109 if (error) 5110 goto done; 5111 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 5112 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 5113 kfree(freepath, M_TEMP); 5114 5115 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 5116 if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) { 5117 bcopy(sp, &sb, sizeof(sb)); 5118 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 5119 sp = &sb; 5120 } 5121 error = copyout(sp, uap->buf, sizeof(*sp)); 5122 done: 5123 if (mp) 5124 mount_drop(mp); 5125 5126 return (error); 5127 } 5128 5129 /* 5130 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 5131 */ 5132 int 5133 sys_fhstatvfs(struct sysmsg *sysmsg, const struct fhstatvfs_args *uap) 5134 { 5135 struct thread *td = curthread; 5136 struct proc *p = td->td_proc; 5137 struct statvfs *sp; 5138 struct mount *mp; 5139 struct vnode *vp; 5140 fhandle_t fh; 5141 int error; 5142 5143 /* 5144 * Must be super user 5145 */ 5146 if ((error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT))) 5147 return (error); 5148 5149 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 5150 return (error); 5151 5152 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 5153 error = ESTALE; 5154 goto done; 5155 } 5156 if (p != NULL && !chroot_visible_mnt(mp, p)) { 5157 error = ESTALE; 5158 goto done; 5159 } 5160 5161 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 5162 goto done; 5163 mp = vp->v_mount; 5164 sp = &mp->mnt_vstat; 5165 vput(vp); 5166 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 5167 goto done; 5168 5169 sp->f_flag = 0; 5170 if (mp->mnt_flag & MNT_RDONLY) 5171 sp->f_flag |= ST_RDONLY; 5172 if (mp->mnt_flag & MNT_NOSUID) 5173 sp->f_flag |= ST_NOSUID; 5174 error = copyout(sp, uap->buf, sizeof(*sp)); 5175 done: 5176 if (mp) 5177 mount_drop(mp); 5178 return (error); 5179 } 5180 5181 5182 /* 5183 * Syscall to push extended attribute configuration information into the 5184 * VFS. Accepts a path, which it converts to a mountpoint, as well as 5185 * a command (int cmd), and attribute name and misc data. For now, the 5186 * attribute name is left in userspace for consumption by the VFS_op. 5187 * It will probably be changed to be copied into sysspace by the 5188 * syscall in the future, once issues with various consumers of the 5189 * attribute code have raised their hands. 5190 * 5191 * Currently this is used only by UFS Extended Attributes. 5192 */ 5193 int 5194 sys_extattrctl(struct sysmsg *sysmsg, const struct extattrctl_args *uap) 5195 { 5196 struct nlookupdata nd; 5197 struct vnode *vp; 5198 char attrname[EXTATTR_MAXNAMELEN]; 5199 int error; 5200 size_t size; 5201 5202 attrname[0] = 0; 5203 vp = NULL; 5204 error = 0; 5205 5206 if (error == 0 && uap->filename) { 5207 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 5208 NLC_FOLLOW); 5209 if (error == 0) 5210 error = nlookup(&nd); 5211 if (error == 0) 5212 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 5213 nlookup_done(&nd); 5214 } 5215 5216 if (error == 0 && uap->attrname) { 5217 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 5218 &size); 5219 } 5220 5221 if (error == 0) { 5222 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5223 if (error == 0) 5224 error = nlookup(&nd); 5225 if (error == 0) 5226 error = ncp_writechk(&nd.nl_nch); 5227 if (error == 0) { 5228 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 5229 uap->attrnamespace, 5230 uap->attrname, nd.nl_cred); 5231 } 5232 nlookup_done(&nd); 5233 } 5234 5235 return (error); 5236 } 5237 5238 /* 5239 * Syscall to get a named extended attribute on a file or directory. 5240 */ 5241 int 5242 sys_extattr_set_file(struct sysmsg *sysmsg, 5243 const struct extattr_set_file_args *uap) 5244 { 5245 char attrname[EXTATTR_MAXNAMELEN]; 5246 struct nlookupdata nd; 5247 struct vnode *vp; 5248 struct uio auio; 5249 struct iovec aiov; 5250 int error; 5251 5252 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5253 if (error) 5254 return (error); 5255 5256 vp = NULL; 5257 5258 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5259 if (error == 0) 5260 error = nlookup(&nd); 5261 if (error == 0) 5262 error = ncp_writechk(&nd.nl_nch); 5263 if (error == 0) 5264 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5265 if (error) { 5266 nlookup_done(&nd); 5267 return (error); 5268 } 5269 5270 bzero(&auio, sizeof(auio)); 5271 aiov.iov_base = uap->data; 5272 aiov.iov_len = uap->nbytes; 5273 auio.uio_iov = &aiov; 5274 auio.uio_iovcnt = 1; 5275 auio.uio_offset = 0; 5276 auio.uio_resid = uap->nbytes; 5277 auio.uio_rw = UIO_WRITE; 5278 auio.uio_td = curthread; 5279 5280 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5281 &auio, nd.nl_cred); 5282 5283 vput(vp); 5284 nlookup_done(&nd); 5285 return (error); 5286 } 5287 5288 /* 5289 * Syscall to get a named extended attribute on a file or directory. 5290 */ 5291 int 5292 sys_extattr_get_file(struct sysmsg *sysmsg, 5293 const struct extattr_get_file_args *uap) 5294 { 5295 char attrname[EXTATTR_MAXNAMELEN]; 5296 struct nlookupdata nd; 5297 struct uio auio; 5298 struct iovec aiov; 5299 struct vnode *vp; 5300 int error; 5301 5302 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5303 if (error) 5304 return (error); 5305 5306 vp = NULL; 5307 5308 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5309 if (error == 0) 5310 error = nlookup(&nd); 5311 if (error == 0) 5312 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5313 if (error) { 5314 nlookup_done(&nd); 5315 return (error); 5316 } 5317 5318 bzero(&auio, sizeof(auio)); 5319 aiov.iov_base = uap->data; 5320 aiov.iov_len = uap->nbytes; 5321 auio.uio_iov = &aiov; 5322 auio.uio_iovcnt = 1; 5323 auio.uio_offset = 0; 5324 auio.uio_resid = uap->nbytes; 5325 auio.uio_rw = UIO_READ; 5326 auio.uio_td = curthread; 5327 5328 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5329 &auio, nd.nl_cred); 5330 sysmsg->sysmsg_result = uap->nbytes - auio.uio_resid; 5331 5332 vput(vp); 5333 nlookup_done(&nd); 5334 return(error); 5335 } 5336 5337 /* 5338 * Syscall to delete a named extended attribute from a file or directory. 5339 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5340 */ 5341 int 5342 sys_extattr_delete_file(struct sysmsg *sysmsg, 5343 const struct extattr_delete_file_args *uap) 5344 { 5345 char attrname[EXTATTR_MAXNAMELEN]; 5346 struct nlookupdata nd; 5347 struct vnode *vp; 5348 int error; 5349 5350 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5351 if (error) 5352 return(error); 5353 5354 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5355 if (error == 0) 5356 error = nlookup(&nd); 5357 if (error == 0) 5358 error = ncp_writechk(&nd.nl_nch); 5359 if (error == 0) { 5360 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5361 if (error == 0) { 5362 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5363 attrname, NULL, nd.nl_cred); 5364 vput(vp); 5365 } 5366 } 5367 nlookup_done(&nd); 5368 return(error); 5369 } 5370 5371 /* 5372 * Determine if the mount is visible to the process. 5373 */ 5374 static int 5375 chroot_visible_mnt(struct mount *mp, struct proc *p) 5376 { 5377 struct nchandle nch; 5378 5379 /* 5380 * Traverse from the mount point upwards. If we hit the process 5381 * root then the mount point is visible to the process. 5382 */ 5383 nch = mp->mnt_ncmountpt; 5384 while (nch.ncp) { 5385 if (nch.mount == p->p_fd->fd_nrdir.mount && 5386 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5387 return(1); 5388 } 5389 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5390 nch = nch.mount->mnt_ncmounton; 5391 } else { 5392 nch.ncp = nch.ncp->nc_parent; 5393 } 5394 } 5395 5396 /* 5397 * If the mount point is not visible to the process, but the 5398 * process root is in a subdirectory of the mount, return 5399 * TRUE anyway. 5400 */ 5401 if (p->p_fd->fd_nrdir.mount == mp) 5402 return(1); 5403 5404 return(0); 5405 } 5406 5407 /* 5408 * Return the appropriate system capability restriction. 5409 */ 5410 static int 5411 get_fscap(const char *fsname) 5412 { 5413 5414 if (strncmp("null", fsname, 5) == 0) { 5415 return SYSCAP_NOMOUNT_NULLFS; 5416 } else if (strncmp(fsname, "devfs", 6) == 0) { 5417 return SYSCAP_NOMOUNT_DEVFS; 5418 } else if (strncmp(fsname, "procfs", 7) == 0) { 5419 return SYSCAP_NOMOUNT_PROCFS; 5420 } else if (strncmp(fsname, "tmpfs", 6) == 0) { 5421 return SYSCAP_NOMOUNT_TMPFS; 5422 } else if (strncmp(fsname, "fusefs", 7) == 0) { 5423 return SYSCAP_NOMOUNT_FUSE; 5424 } 5425 return SYSCAP_RESTRICTEDROOT; 5426 } 5427 5428 int 5429 sys___realpath(struct sysmsg *sysmsg, const struct __realpath_args *uap) 5430 { 5431 struct nlookupdata nd; 5432 char *rbuf; 5433 char *fbuf; 5434 ssize_t rlen; 5435 int error; 5436 5437 /* 5438 * Invalid length if less than 0. 0 is allowed 5439 */ 5440 if ((ssize_t)uap->len < 0) 5441 return EINVAL; 5442 5443 rbuf = NULL; 5444 fbuf = NULL; 5445 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5446 if (error) 5447 goto done; 5448 5449 nd.nl_flags |= NLC_SHAREDLOCK; 5450 error = nlookup(&nd); 5451 if (error) 5452 goto done; 5453 5454 if (nd.nl_nch.ncp->nc_vp == NULL) { 5455 error = ENOENT; 5456 goto done; 5457 } 5458 5459 /* 5460 * Shortcut test for existence. 5461 */ 5462 if (uap->len == 0) { 5463 error = ENAMETOOLONG; 5464 goto done; 5465 } 5466 5467 /* 5468 * Obtain the path relative to the process root. The nch must not 5469 * be locked for the cache_fullpath() call. 5470 */ 5471 if (nd.nl_flags & NLC_NCPISLOCKED) { 5472 nd.nl_flags &= ~NLC_NCPISLOCKED; 5473 cache_unlock(&nd.nl_nch); 5474 } 5475 error = cache_fullpath(curproc, &nd.nl_nch, NULL, &rbuf, &fbuf, 0); 5476 if (error) 5477 goto done; 5478 5479 rlen = (ssize_t)strlen(rbuf); 5480 if (rlen >= uap->len) { 5481 error = ENAMETOOLONG; 5482 goto done; 5483 } 5484 error = copyout(rbuf, uap->buf, rlen + 1); 5485 if (error == 0) 5486 sysmsg->sysmsg_szresult = rlen; 5487 done: 5488 nlookup_done(&nd); 5489 if (fbuf) 5490 kfree(fbuf, M_TEMP); 5491 5492 return error; 5493 } 5494 5495 int 5496 sys_posix_fallocate(struct sysmsg *sysmsg, const struct posix_fallocate_args *uap) 5497 { 5498 return (kern_posix_fallocate(uap->fd, uap->offset, uap->len)); 5499 } 5500 5501 int 5502 kern_posix_fallocate(int fd, off_t offset, off_t len) 5503 { 5504 struct thread *td = curthread; 5505 struct vnode *vp; 5506 struct file *fp; 5507 int error; 5508 5509 if (offset < 0 || len <= 0) 5510 return (EINVAL); 5511 /* Check for wrap. */ 5512 if (offset > OFF_MAX - len) 5513 return (EFBIG); 5514 5515 fp = holdfp(td, fd, -1); 5516 if (fp == NULL) 5517 return (EBADF); 5518 5519 switch (fp->f_type) { 5520 case DTYPE_VNODE: 5521 break; 5522 case DTYPE_PIPE: 5523 case DTYPE_FIFO: 5524 error = ESPIPE; 5525 goto out; 5526 default: 5527 error = ENODEV; 5528 goto out; 5529 } 5530 5531 if ((fp->f_flag & FWRITE) == 0) { 5532 error = EBADF; 5533 goto out; 5534 } 5535 5536 vp = fp->f_data; 5537 if (vp->v_type != VREG) { 5538 error = ENODEV; 5539 goto out; 5540 } 5541 5542 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 5543 error = VOP_ALLOCATE(vp, offset, len); 5544 vn_unlock(vp); 5545 out: 5546 dropfp(td, fd, fp); 5547 return (error); 5548 } 5549