1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysmsg.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/caps.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int get_fspriv(const char *); 84 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 85 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 86 static int getutimes (struct timeval *, struct timespec *); 87 static int getutimens (const struct timespec *, struct timespec *, int *); 88 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, u_long); 91 static int setutimes (struct vnode *, struct vattr *, 92 const struct timespec *, int); 93 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 96 "Allow non-root users to mount filesystems"); 97 98 static int debug_unmount = 0; /* if 1 loop until unmount success */ 99 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0, 100 "Stall failed unmounts in loop"); 101 102 static struct krate krate_rename = { 1 }; 103 104 /* 105 * Virtual File System System Calls 106 */ 107 108 /* 109 * Mount a file system. 110 * 111 * mount_args(char *type, char *path, int flags, caddr_t data) 112 * 113 * MPALMOSTSAFE 114 */ 115 int 116 sys_mount(struct sysmsg *sysmsg, const struct mount_args *uap) 117 { 118 struct thread *td = curthread; 119 struct vnode *vp; 120 struct nchandle nch; 121 struct mount *mp, *nullmp; 122 struct vfsconf *vfsp; 123 int error, flag = 0, flag2 = 0; 124 int hasmount; 125 int priv = 0; 126 int flags = uap->flags; 127 struct vattr va; 128 struct nlookupdata nd; 129 char fstypename[MFSNAMELEN]; 130 struct ucred *cred; 131 132 cred = td->td_ucred; 133 134 /* We do not allow user mounts inside a jail for now */ 135 if (usermount && jailed(cred)) { 136 error = EPERM; 137 goto done; 138 } 139 140 /* 141 * Extract the file system type. We need to know this early, to take 142 * appropriate actions for jails and nullfs mounts. 143 */ 144 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) 145 goto done; 146 147 /* 148 * Select the correct priv according to the file system type. 149 */ 150 priv = get_fspriv(fstypename); 151 152 if (usermount == 0 && (error = caps_priv_check_td(td, priv))) 153 goto done; 154 155 /* 156 * Do not allow NFS export by non-root users. 157 */ 158 if (flags & MNT_EXPORTED) { 159 error = caps_priv_check_td(td, priv); 160 if (error) 161 goto done; 162 } 163 /* 164 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 165 */ 166 if (caps_priv_check_td(td, priv)) 167 flags |= MNT_NOSUID | MNT_NODEV; 168 169 /* 170 * Lookup the requested path and extract the nch and vnode. 171 */ 172 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 173 if (error == 0) { 174 if ((error = nlookup(&nd)) == 0) { 175 if (nd.nl_nch.ncp->nc_vp == NULL) 176 error = ENOENT; 177 } 178 } 179 if (error) { 180 nlookup_done(&nd); 181 goto done; 182 } 183 184 /* 185 * If the target filesystem is resolved via a nullfs mount, then 186 * nd.nl_nch.mount will be pointing to the nullfs mount structure 187 * instead of the target file system. We need it in case we are 188 * doing an update. 189 */ 190 nullmp = nd.nl_nch.mount; 191 192 /* 193 * Extract the locked+refd ncp and cleanup the nd structure 194 */ 195 nch = nd.nl_nch; 196 cache_zero(&nd.nl_nch); 197 nlookup_done(&nd); 198 199 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 200 (mp = cache_findmount(&nch)) != NULL) { 201 cache_dropmount(mp); 202 hasmount = 1; 203 } else { 204 hasmount = 0; 205 } 206 207 208 /* 209 * now we have the locked ref'd nch and unreferenced vnode. 210 */ 211 vp = nch.ncp->nc_vp; 212 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 213 cache_put(&nch); 214 goto done; 215 } 216 cache_unlock(&nch); 217 218 /* 219 * Now we have an unlocked ref'd nch and a locked ref'd vp 220 */ 221 if (flags & MNT_UPDATE) { 222 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 223 cache_drop(&nch); 224 vput(vp); 225 error = EINVAL; 226 goto done; 227 } 228 229 if (strncmp(fstypename, "null", 5) == 0) { 230 KKASSERT(nullmp); 231 mp = nullmp; 232 } else { 233 mp = vp->v_mount; 234 } 235 236 flag = mp->mnt_flag; 237 flag2 = mp->mnt_kern_flag; 238 /* 239 * We only allow the filesystem to be reloaded if it 240 * is currently mounted read-only. 241 */ 242 if ((flags & MNT_RELOAD) && 243 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 244 cache_drop(&nch); 245 vput(vp); 246 error = EOPNOTSUPP; /* Needs translation */ 247 goto done; 248 } 249 /* 250 * Only root, or the user that did the original mount is 251 * permitted to update it. 252 */ 253 if (mp->mnt_stat.f_owner != cred->cr_uid && 254 (error = caps_priv_check_td(td, priv))) { 255 cache_drop(&nch); 256 vput(vp); 257 goto done; 258 } 259 if (vfs_busy(mp, LK_NOWAIT)) { 260 cache_drop(&nch); 261 vput(vp); 262 error = EBUSY; 263 goto done; 264 } 265 if (hasmount) { 266 cache_drop(&nch); 267 vfs_unbusy(mp); 268 vput(vp); 269 error = EBUSY; 270 goto done; 271 } 272 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 273 lwkt_gettoken(&mp->mnt_token); 274 vn_unlock(vp); 275 vfsp = mp->mnt_vfc; 276 goto update; 277 } 278 279 /* 280 * If the user is not root, ensure that they own the directory 281 * onto which we are attempting to mount. 282 */ 283 if ((error = VOP_GETATTR(vp, &va)) || 284 (va.va_uid != cred->cr_uid && 285 (error = caps_priv_check_td(td, priv)))) { 286 cache_drop(&nch); 287 vput(vp); 288 goto done; 289 } 290 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 291 cache_drop(&nch); 292 vput(vp); 293 goto done; 294 } 295 if (vp->v_type != VDIR) { 296 cache_drop(&nch); 297 vput(vp); 298 error = ENOTDIR; 299 goto done; 300 } 301 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 302 cache_drop(&nch); 303 vput(vp); 304 error = EPERM; 305 goto done; 306 } 307 vfsp = vfsconf_find_by_name(fstypename); 308 if (vfsp == NULL) { 309 linker_file_t lf; 310 311 /* Only load modules for root (very important!) */ 312 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT); 313 if (error) { 314 cache_drop(&nch); 315 vput(vp); 316 goto done; 317 } 318 error = linker_load_file(fstypename, &lf); 319 if (error || lf == NULL) { 320 cache_drop(&nch); 321 vput(vp); 322 if (lf == NULL) 323 error = ENODEV; 324 goto done; 325 } 326 lf->userrefs++; 327 /* lookup again, see if the VFS was loaded */ 328 vfsp = vfsconf_find_by_name(fstypename); 329 if (vfsp == NULL) { 330 lf->userrefs--; 331 linker_file_unload(lf); 332 cache_drop(&nch); 333 vput(vp); 334 error = ENODEV; 335 goto done; 336 } 337 } 338 if (hasmount) { 339 cache_drop(&nch); 340 vput(vp); 341 error = EBUSY; 342 goto done; 343 } 344 345 /* 346 * Allocate and initialize the filesystem. 347 */ 348 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 349 mount_init(mp, vfsp->vfc_vfsops); 350 vfs_busy(mp, LK_NOWAIT); 351 mp->mnt_vfc = vfsp; 352 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 353 vfsp->vfc_refcount++; 354 mp->mnt_stat.f_type = vfsp->vfc_typenum; 355 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 356 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 357 mp->mnt_stat.f_owner = cred->cr_uid; 358 lwkt_gettoken(&mp->mnt_token); 359 vn_unlock(vp); 360 update: 361 /* 362 * (per-mount token acquired at this point) 363 * 364 * Set the mount level flags. 365 */ 366 if (flags & MNT_RDONLY) 367 mp->mnt_flag |= MNT_RDONLY; 368 else if (mp->mnt_flag & MNT_RDONLY) 369 mp->mnt_kern_flag |= MNTK_WANTRDWR; 370 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 371 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 372 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 373 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 374 MNT_AUTOMOUNTED); 375 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | 376 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 377 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 378 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 379 MNT_AUTOMOUNTED); 380 381 /* 382 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 383 * This way the initial VFS_MOUNT() call will also be MPSAFE. 384 */ 385 if (vfsp->vfc_flags & VFCF_MPSAFE) 386 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 387 388 /* 389 * Mount the filesystem. 390 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 391 * get. 392 */ 393 if (mp->mnt_flag & MNT_UPDATE) { 394 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 395 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 396 mp->mnt_flag &= ~MNT_RDONLY; 397 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 398 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 399 if (error) { 400 mp->mnt_flag = flag; 401 mp->mnt_kern_flag = flag2; 402 } 403 lwkt_reltoken(&mp->mnt_token); 404 vfs_unbusy(mp); 405 vrele(vp); 406 cache_drop(&nch); 407 goto done; 408 } 409 mp->mnt_ncmounton = nch; 410 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 411 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 412 413 /* 414 * Put the new filesystem on the mount list after root. The mount 415 * point gets its own mnt_ncmountpt (unless the VFS already set one 416 * up) which represents the root of the mount. The lookup code 417 * detects the mount point going forward and checks the root of 418 * the mount going backwards. 419 * 420 * It is not necessary to invalidate or purge the vnode underneath 421 * because elements under the mount will be given their own glue 422 * namecache record. 423 */ 424 if (!error) { 425 if (mp->mnt_ncmountpt.ncp == NULL) { 426 /* 427 * Allocate, then unlock, but leave the ref intact. 428 * This is the mnt_refs (1) that we will retain 429 * through to the unmount. 430 */ 431 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 432 cache_unlock(&mp->mnt_ncmountpt); 433 } 434 vn_unlock(vp); 435 cache_lock(&nch); 436 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 437 cache_unlock(&nch); 438 cache_ismounting(mp); 439 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 440 441 mountlist_insert(mp, MNTINS_LAST); 442 vn_unlock(vp); 443 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 444 error = vfs_allocate_syncvnode(mp); 445 lwkt_reltoken(&mp->mnt_token); 446 vfs_unbusy(mp); 447 error = VFS_START(mp, 0); 448 vrele(vp); 449 KNOTE(&fs_klist, VQ_MOUNT); 450 } else { 451 bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton)); 452 vn_syncer_thr_stop(mp); 453 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 454 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 455 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 456 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 457 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 458 if (mp->mnt_cred) { 459 crfree(mp->mnt_cred); 460 mp->mnt_cred = NULL; 461 } 462 mp->mnt_vfc->vfc_refcount--; 463 lwkt_reltoken(&mp->mnt_token); 464 vfs_unbusy(mp); 465 kfree(mp, M_MOUNT); 466 cache_drop(&nch); 467 vput(vp); 468 } 469 done: 470 return (error); 471 } 472 473 /* 474 * Scan all active processes to see if any of them have a current 475 * or root directory onto which the new filesystem has just been 476 * mounted. If so, replace them with the new mount point. 477 * 478 * Both old_nch and new_nch are ref'd on call but not locked. 479 * new_nch must be temporarily locked so it can be associated with the 480 * vnode representing the root of the mount point. 481 */ 482 struct checkdirs_info { 483 struct nchandle old_nch; 484 struct nchandle new_nch; 485 struct vnode *old_vp; 486 struct vnode *new_vp; 487 }; 488 489 static int checkdirs_callback(struct proc *p, void *data); 490 491 static void 492 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 493 { 494 struct checkdirs_info info; 495 struct vnode *olddp; 496 struct vnode *newdp; 497 struct mount *mp; 498 499 /* 500 * If the old mount point's vnode has a usecount of 1, it is not 501 * being held as a descriptor anywhere. 502 */ 503 olddp = old_nch->ncp->nc_vp; 504 if (olddp == NULL || VREFCNT(olddp) == 1) 505 return; 506 507 /* 508 * Force the root vnode of the new mount point to be resolved 509 * so we can update any matching processes. 510 */ 511 mp = new_nch->mount; 512 if (VFS_ROOT(mp, &newdp)) 513 panic("mount: lost mount"); 514 vn_unlock(newdp); 515 cache_lock(new_nch); 516 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 517 cache_setunresolved(new_nch); 518 cache_setvp(new_nch, newdp); 519 cache_unlock(new_nch); 520 521 /* 522 * Special handling of the root node 523 */ 524 if (rootvnode == olddp) { 525 vref(newdp); 526 vfs_cache_setroot(newdp, cache_hold(new_nch)); 527 } 528 529 /* 530 * Pass newdp separately so the callback does not have to access 531 * it via new_nch->ncp->nc_vp. 532 */ 533 info.old_nch = *old_nch; 534 info.new_nch = *new_nch; 535 info.new_vp = newdp; 536 allproc_scan(checkdirs_callback, &info, 0); 537 vput(newdp); 538 } 539 540 /* 541 * NOTE: callback is not MP safe because the scanned process's filedesc 542 * structure can be ripped out from under us, amoung other things. 543 */ 544 static int 545 checkdirs_callback(struct proc *p, void *data) 546 { 547 struct checkdirs_info *info = data; 548 struct filedesc *fdp; 549 struct nchandle ncdrop1; 550 struct nchandle ncdrop2; 551 struct vnode *vprele1; 552 struct vnode *vprele2; 553 554 if ((fdp = p->p_fd) != NULL) { 555 cache_zero(&ncdrop1); 556 cache_zero(&ncdrop2); 557 vprele1 = NULL; 558 vprele2 = NULL; 559 560 /* 561 * MPUNSAFE - XXX fdp can be pulled out from under a 562 * foreign process. 563 * 564 * A shared filedesc is ok, we don't have to copy it 565 * because we are making this change globally. 566 */ 567 spin_lock(&fdp->fd_spin); 568 if (fdp->fd_ncdir.mount == info->old_nch.mount && 569 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 570 vprele1 = fdp->fd_cdir; 571 vref(info->new_vp); 572 fdp->fd_cdir = info->new_vp; 573 ncdrop1 = fdp->fd_ncdir; 574 cache_copy(&info->new_nch, &fdp->fd_ncdir); 575 } 576 if (fdp->fd_nrdir.mount == info->old_nch.mount && 577 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 578 vprele2 = fdp->fd_rdir; 579 vref(info->new_vp); 580 fdp->fd_rdir = info->new_vp; 581 ncdrop2 = fdp->fd_nrdir; 582 cache_copy(&info->new_nch, &fdp->fd_nrdir); 583 } 584 spin_unlock(&fdp->fd_spin); 585 if (ncdrop1.ncp) 586 cache_drop(&ncdrop1); 587 if (ncdrop2.ncp) 588 cache_drop(&ncdrop2); 589 if (vprele1) 590 vrele(vprele1); 591 if (vprele2) 592 vrele(vprele2); 593 } 594 return(0); 595 } 596 597 /* 598 * Unmount a file system. 599 * 600 * Note: unmount takes a path to the vnode mounted on as argument, 601 * not special file (as before). 602 * 603 * umount_args(char *path, int flags) 604 * 605 * MPALMOSTSAFE 606 */ 607 int 608 sys_unmount(struct sysmsg *sysmsg, const struct unmount_args *uap) 609 { 610 struct thread *td = curthread; 611 struct proc *p __debugvar = td->td_proc; 612 struct mount *mp = NULL; 613 struct nlookupdata nd; 614 char fstypename[MFSNAMELEN]; 615 int priv = 0; 616 int error; 617 struct ucred *cred; 618 619 cred = td->td_ucred; 620 621 KKASSERT(p); 622 623 /* We do not allow user umounts inside a jail for now */ 624 if (usermount && jailed(cred)) { 625 error = EPERM; 626 goto done; 627 } 628 629 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 630 NLC_FOLLOW | NLC_IGNBADDIR); 631 if (error == 0) 632 error = nlookup(&nd); 633 if (error) 634 goto out; 635 636 mp = nd.nl_nch.mount; 637 638 /* Figure out the fsname in order to select proper privs */ 639 ksnprintf(fstypename, MFSNAMELEN, "%s", mp->mnt_vfc->vfc_name); 640 priv = get_fspriv(fstypename); 641 642 if (usermount == 0 && (error = caps_priv_check_td(td, priv))) { 643 nlookup_done(&nd); 644 goto done; 645 } 646 647 /* 648 * Only root, or the user that did the original mount is 649 * permitted to unmount this filesystem. 650 */ 651 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 652 (error = caps_priv_check_td(td, priv))) 653 { 654 goto out; 655 } 656 657 /* 658 * Don't allow unmounting the root file system. 659 */ 660 if (mp->mnt_flag & MNT_ROOTFS) { 661 error = EINVAL; 662 goto out; 663 } 664 665 /* 666 * Must be the root of the filesystem 667 */ 668 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 669 error = EINVAL; 670 goto out; 671 } 672 673 /* Check if this mount belongs to this prison */ 674 if (jailed(cred) && mp->mnt_cred && (!mp->mnt_cred->cr_prison || 675 mp->mnt_cred->cr_prison != cred->cr_prison)) { 676 kprintf("mountpoint %s does not belong to this jail\n", 677 uap->path); 678 error = EPERM; 679 goto out; 680 } 681 682 /* 683 * If no error try to issue the unmount. We lose our cache 684 * ref when we call nlookup_done so we must hold the mount point 685 * to prevent use-after-free races. 686 */ 687 out: 688 if (error == 0) { 689 mount_hold(mp); 690 nlookup_done(&nd); 691 error = dounmount(mp, uap->flags, 0); 692 mount_drop(mp); 693 } else { 694 nlookup_done(&nd); 695 } 696 done: 697 return (error); 698 } 699 700 /* 701 * Do the actual file system unmount (interlocked against the mountlist 702 * token and mp->mnt_token). 703 */ 704 static int 705 dounmount_interlock(struct mount *mp) 706 { 707 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 708 return (EBUSY); 709 mp->mnt_kern_flag |= MNTK_UNMOUNT; 710 return(0); 711 } 712 713 static int 714 unmount_allproc_cb(struct proc *p, void *arg) 715 { 716 struct mount *mp; 717 718 if (p->p_textnch.ncp == NULL) 719 return 0; 720 721 mp = (struct mount *)arg; 722 if (p->p_textnch.mount == mp) 723 cache_drop(&p->p_textnch); 724 725 return 0; 726 } 727 728 /* 729 * The guts of the unmount code. The mount owns one ref and one hold 730 * count. If we successfully interlock the unmount, those refs are ours. 731 * (The ref is from mnt_ncmountpt). 732 * 733 * When halting we shortcut certain mount types such as devfs by not actually 734 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 735 * from the mountlist so higher-level filesytems can unmount cleanly. 736 * 737 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 738 */ 739 int 740 dounmount(struct mount *mp, int flags, int halting) 741 { 742 struct namecache *ncp; 743 struct nchandle nch; 744 struct vnode *vp; 745 int error; 746 int async_flag; 747 int lflags; 748 int freeok = 1; 749 int hadsyncer = 0; 750 int retry; 751 int quickhalt; 752 753 lwkt_gettoken(&mp->mnt_token); 754 755 /* 756 * When halting, certain mount points can essentially just 757 * be unhooked and otherwise ignored. 758 */ 759 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 760 quickhalt = 1; 761 freeok = 0; 762 } else { 763 quickhalt = 0; 764 } 765 766 767 /* 768 * Exclusive access for unmounting purposes. 769 */ 770 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 771 goto out; 772 773 /* 774 * We now 'own' the last mp->mnt_refs 775 * 776 * Allow filesystems to detect that a forced unmount is in progress. 777 */ 778 if (flags & MNT_FORCE) 779 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 780 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 781 error = lockmgr(&mp->mnt_lock, lflags); 782 if (error) { 783 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 784 if (mp->mnt_kern_flag & MNTK_MWAIT) { 785 mp->mnt_kern_flag &= ~MNTK_MWAIT; 786 wakeup(mp); 787 } 788 goto out; 789 } 790 791 if (mp->mnt_flag & MNT_EXPUBLIC) 792 vfs_setpublicfs(NULL, NULL, NULL); 793 794 vfs_msync(mp, MNT_WAIT); 795 async_flag = mp->mnt_flag & MNT_ASYNC; 796 mp->mnt_flag &=~ MNT_ASYNC; 797 798 /* 799 * Decomission our special mnt_syncer vnode. This also stops 800 * the vnlru code. If we are unable to unmount we recommission 801 * the vnode. 802 * 803 * Then sync the filesystem. 804 */ 805 if ((vp = mp->mnt_syncer) != NULL) { 806 mp->mnt_syncer = NULL; 807 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 808 vrele(vp); 809 hadsyncer = 1; 810 } 811 812 /* 813 * Sync normally-mounted filesystem. 814 */ 815 if (quickhalt == 0) { 816 if ((mp->mnt_flag & MNT_RDONLY) == 0) 817 VFS_SYNC(mp, MNT_WAIT); 818 } 819 820 /* 821 * nchandle records ref the mount structure. Expect a count of 1 822 * (our mount->mnt_ncmountpt). 823 * 824 * Scans can get temporary refs on a mountpoint (thought really 825 * heavy duty stuff like cache_findmount() do not). 826 */ 827 for (retry = 0; (retry < 10 || debug_unmount); ++retry) { 828 /* 829 * Invalidate the namecache topology under the mount. 830 * nullfs mounts alias a real mount's namecache topology 831 * and it should not be invalidated in that case. 832 */ 833 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 834 cache_lock(&mp->mnt_ncmountpt); 835 cache_inval(&mp->mnt_ncmountpt, 836 CINV_DESTROY|CINV_CHILDREN); 837 cache_unlock(&mp->mnt_ncmountpt); 838 } 839 840 /* 841 * Clear pcpu caches 842 */ 843 cache_unmounting(mp); 844 if (mp->mnt_refs != 1) 845 cache_clearmntcache(mp); 846 847 /* 848 * Break out if we are good. Don't count ncp refs if the 849 * mount is aliased. 850 */ 851 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 852 NULL : mp->mnt_ncmountpt.ncp; 853 if (mp->mnt_refs == 1 && 854 (ncp == NULL || (ncp->nc_refs == 1 && 855 TAILQ_FIRST(&ncp->nc_list) == NULL))) { 856 break; 857 } 858 859 /* 860 * If forcing the unmount, clean out any p->p_textnch 861 * nchandles that match this mount. 862 */ 863 if (flags & MNT_FORCE) 864 allproc_scan(&unmount_allproc_cb, mp, 0); 865 866 /* 867 * Sleep and retry. 868 */ 869 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 870 if ((retry & 15) == 15) { 871 mount_warning(mp, 872 "(%p) debug - retry %d, " 873 "%d namecache refs, %d mount refs", 874 mp, retry, 875 (ncp ? ncp->nc_refs - 1 : 0), 876 mp->mnt_refs - 1); 877 } 878 } 879 880 error = 0; 881 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 882 NULL : mp->mnt_ncmountpt.ncp; 883 if (mp->mnt_refs != 1 || 884 (ncp != NULL && (ncp->nc_refs != 1 || 885 TAILQ_FIRST(&ncp->nc_list)))) { 886 mount_warning(mp, 887 "(%p): %d namecache refs, %d mount refs " 888 "still present", 889 mp, 890 (ncp ? ncp->nc_refs - 1 : 0), 891 mp->mnt_refs - 1); 892 if (flags & MNT_FORCE) { 893 freeok = 0; 894 mount_warning(mp, "forcing unmount\n"); 895 } else { 896 error = EBUSY; 897 } 898 } 899 900 /* 901 * So far so good, sync the filesystem once more and 902 * call the VFS unmount code if the sync succeeds. 903 */ 904 if (error == 0 && quickhalt == 0) { 905 if (mp->mnt_flag & MNT_RDONLY) { 906 error = VFS_UNMOUNT(mp, flags); 907 } else { 908 error = VFS_SYNC(mp, MNT_WAIT); 909 if (error == 0 || /* no error */ 910 error == EOPNOTSUPP || /* no sync avail */ 911 (flags & MNT_FORCE)) { /* force anyway */ 912 error = VFS_UNMOUNT(mp, flags); 913 } 914 } 915 if (error) { 916 mount_warning(mp, 917 "(%p) unmount: vfs refused to unmount, " 918 "error %d", 919 mp, error); 920 } 921 } 922 923 /* 924 * If an error occurred we can still recover, restoring the 925 * syncer vnode and misc flags. 926 */ 927 if (error) { 928 if (mp->mnt_syncer == NULL && hadsyncer) 929 vfs_allocate_syncvnode(mp); 930 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 931 mp->mnt_flag |= async_flag; 932 lockmgr(&mp->mnt_lock, LK_RELEASE); 933 if (mp->mnt_kern_flag & MNTK_MWAIT) { 934 mp->mnt_kern_flag &= ~MNTK_MWAIT; 935 wakeup(mp); 936 } 937 goto out; 938 } 939 /* 940 * Clean up any journals still associated with the mount after 941 * filesystem activity has ceased. 942 */ 943 journal_remove_all_journals(mp, 944 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 945 946 mountlist_remove(mp); 947 948 /* 949 * Remove any installed vnode ops here so the individual VFSs don't 950 * have to. 951 * 952 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 953 * 954 * When quickhalting we have to keep these intact because the 955 * underlying vnodes have not been destroyed, and some might be 956 * dirty. 957 */ 958 if (quickhalt == 0) { 959 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 960 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 961 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 962 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 963 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 964 } 965 966 if (mp->mnt_ncmountpt.ncp != NULL) { 967 nch = mp->mnt_ncmountpt; 968 cache_zero(&mp->mnt_ncmountpt); 969 cache_clrmountpt(&nch); 970 cache_drop(&nch); 971 } 972 if (mp->mnt_ncmounton.ncp != NULL) { 973 cache_unmounting(mp); 974 nch = mp->mnt_ncmounton; 975 cache_zero(&mp->mnt_ncmounton); 976 cache_clrmountpt(&nch); 977 cache_drop(&nch); 978 } 979 980 if (mp->mnt_cred) { 981 crfree(mp->mnt_cred); 982 mp->mnt_cred = NULL; 983 } 984 985 mp->mnt_vfc->vfc_refcount--; 986 987 /* 988 * If not quickhalting the mount, we expect there to be no 989 * vnodes left. 990 */ 991 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 992 panic("unmount: dangling vnode"); 993 994 /* 995 * Release the lock 996 */ 997 lockmgr(&mp->mnt_lock, LK_RELEASE); 998 if (mp->mnt_kern_flag & MNTK_MWAIT) { 999 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1000 wakeup(mp); 1001 } 1002 1003 /* 1004 * If we reach here and freeok != 0 we must free the mount. 1005 * mnt_refs should already have dropped to 0, so if it is not 1006 * zero we must cycle the caches and wait. 1007 * 1008 * When we are satisfied that the mount has disconnected we can 1009 * drop the hold on the mp that represented the mount (though the 1010 * caller might actually have another, so the caller's drop may 1011 * do the actual free). 1012 */ 1013 if (freeok) { 1014 if (mp->mnt_refs > 0) 1015 cache_clearmntcache(mp); 1016 while (mp->mnt_refs > 0) { 1017 cache_unmounting(mp); 1018 wakeup(mp); 1019 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 1020 cache_clearmntcache(mp); 1021 } 1022 lwkt_reltoken(&mp->mnt_token); 1023 mount_drop(mp); 1024 mp = NULL; 1025 } else { 1026 cache_clearmntcache(mp); 1027 } 1028 error = 0; 1029 KNOTE(&fs_klist, VQ_UNMOUNT); 1030 out: 1031 if (mp) 1032 lwkt_reltoken(&mp->mnt_token); 1033 return (error); 1034 } 1035 1036 static 1037 void 1038 mount_warning(struct mount *mp, const char *ctl, ...) 1039 { 1040 char *ptr; 1041 char *buf; 1042 __va_list va; 1043 1044 __va_start(va, ctl); 1045 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 1046 &ptr, &buf, 0) == 0) { 1047 kprintf("unmount(%s): ", ptr); 1048 kvprintf(ctl, va); 1049 kprintf("\n"); 1050 kfree(buf, M_TEMP); 1051 } else { 1052 kprintf("unmount(%p", mp); 1053 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 1054 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 1055 kprintf("): "); 1056 kvprintf(ctl, va); 1057 kprintf("\n"); 1058 } 1059 __va_end(va); 1060 } 1061 1062 /* 1063 * Shim cache_fullpath() to handle the case where a process is chrooted into 1064 * a subdirectory of a mount. In this case if the root mount matches the 1065 * process root directory's mount we have to specify the process's root 1066 * directory instead of the mount point, because the mount point might 1067 * be above the root directory. 1068 */ 1069 static 1070 int 1071 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 1072 { 1073 struct nchandle *nch; 1074 1075 if (p && p->p_fd->fd_nrdir.mount == mp) 1076 nch = &p->p_fd->fd_nrdir; 1077 else 1078 nch = &mp->mnt_ncmountpt; 1079 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1080 } 1081 1082 /* 1083 * Sync each mounted filesystem. 1084 */ 1085 1086 #ifdef DEBUG 1087 static int syncprt = 0; 1088 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1089 #endif /* DEBUG */ 1090 1091 static int sync_callback(struct mount *mp, void *data); 1092 1093 int 1094 sys_sync(struct sysmsg *sysmsg, const struct sync_args *uap) 1095 { 1096 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1097 return (0); 1098 } 1099 1100 static 1101 int 1102 sync_callback(struct mount *mp, void *data __unused) 1103 { 1104 int asyncflag; 1105 1106 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1107 lwkt_gettoken(&mp->mnt_token); 1108 asyncflag = mp->mnt_flag & MNT_ASYNC; 1109 mp->mnt_flag &= ~MNT_ASYNC; 1110 lwkt_reltoken(&mp->mnt_token); 1111 vfs_msync(mp, MNT_NOWAIT); 1112 VFS_SYNC(mp, MNT_NOWAIT); 1113 lwkt_gettoken(&mp->mnt_token); 1114 mp->mnt_flag |= asyncflag; 1115 lwkt_reltoken(&mp->mnt_token); 1116 } 1117 return(0); 1118 } 1119 1120 /* XXX PRISON: could be per prison flag */ 1121 static int prison_quotas; 1122 #if 0 1123 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1124 #endif 1125 1126 /* 1127 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1128 * 1129 * Change filesystem quotas. 1130 * 1131 * MPALMOSTSAFE 1132 */ 1133 int 1134 sys_quotactl(struct sysmsg *sysmsg, const struct quotactl_args *uap) 1135 { 1136 struct nlookupdata nd; 1137 struct thread *td; 1138 struct mount *mp; 1139 int error; 1140 1141 td = curthread; 1142 if (td->td_ucred->cr_prison && !prison_quotas) { 1143 error = EPERM; 1144 goto done; 1145 } 1146 1147 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1148 if (error == 0) 1149 error = nlookup(&nd); 1150 if (error == 0) { 1151 mp = nd.nl_nch.mount; 1152 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1153 uap->arg, nd.nl_cred); 1154 } 1155 nlookup_done(&nd); 1156 done: 1157 return (error); 1158 } 1159 1160 /* 1161 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1162 * void *buf, int buflen) 1163 * 1164 * This function operates on a mount point and executes the specified 1165 * operation using the specified control data, and possibly returns data. 1166 * 1167 * The actual number of bytes stored in the result buffer is returned, 0 1168 * if none, otherwise an error is returned. 1169 * 1170 * MPALMOSTSAFE 1171 */ 1172 int 1173 sys_mountctl(struct sysmsg *sysmsg, const struct mountctl_args *uap) 1174 { 1175 struct thread *td = curthread; 1176 struct file *fp; 1177 void *ctl = NULL; 1178 void *buf = NULL; 1179 char *path = NULL; 1180 int error; 1181 1182 /* 1183 * Sanity and permissions checks. We must be root. 1184 */ 1185 if (td->td_ucred->cr_prison != NULL) 1186 return (EPERM); 1187 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1188 (error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) != 0) 1189 { 1190 return (error); 1191 } 1192 1193 /* 1194 * Argument length checks 1195 */ 1196 if (uap->ctllen < 0 || uap->ctllen > 1024) 1197 return (EINVAL); 1198 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1199 return (EINVAL); 1200 if (uap->path == NULL) 1201 return (EINVAL); 1202 1203 /* 1204 * Allocate the necessary buffers and copyin data 1205 */ 1206 path = objcache_get(namei_oc, M_WAITOK); 1207 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1208 if (error) 1209 goto done; 1210 1211 if (uap->ctllen) { 1212 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1213 error = copyin(uap->ctl, ctl, uap->ctllen); 1214 if (error) 1215 goto done; 1216 } 1217 if (uap->buflen) 1218 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1219 1220 /* 1221 * Validate the descriptor 1222 */ 1223 if (uap->fd >= 0) { 1224 fp = holdfp(td, uap->fd, -1); 1225 if (fp == NULL) { 1226 error = EBADF; 1227 goto done; 1228 } 1229 } else { 1230 fp = NULL; 1231 } 1232 1233 /* 1234 * Execute the internal kernel function and clean up. 1235 */ 1236 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1237 buf, uap->buflen, &sysmsg->sysmsg_result); 1238 if (fp) 1239 dropfp(td, uap->fd, fp); 1240 if (error == 0 && sysmsg->sysmsg_result > 0) 1241 error = copyout(buf, uap->buf, sysmsg->sysmsg_result); 1242 done: 1243 if (path) 1244 objcache_put(namei_oc, path); 1245 if (ctl) 1246 kfree(ctl, M_TEMP); 1247 if (buf) 1248 kfree(buf, M_TEMP); 1249 return (error); 1250 } 1251 1252 /* 1253 * Execute a mount control operation by resolving the path to a mount point 1254 * and calling vop_mountctl(). 1255 * 1256 * Use the mount point from the nch instead of the vnode so nullfs mounts 1257 * can properly spike the VOP. 1258 */ 1259 int 1260 kern_mountctl(const char *path, int op, struct file *fp, 1261 const void *ctl, int ctllen, 1262 void *buf, int buflen, int *res) 1263 { 1264 struct vnode *vp; 1265 struct nlookupdata nd; 1266 struct nchandle nch; 1267 struct mount *mp; 1268 int error; 1269 1270 *res = 0; 1271 vp = NULL; 1272 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1273 if (error) 1274 return (error); 1275 error = nlookup(&nd); 1276 if (error) { 1277 nlookup_done(&nd); 1278 return (error); 1279 } 1280 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1281 if (error) { 1282 nlookup_done(&nd); 1283 return (error); 1284 } 1285 1286 /* 1287 * Yes, all this is needed to use the nch.mount below, because 1288 * we must maintain a ref on the mount to avoid ripouts (e.g. 1289 * due to heavy mount/unmount use by synth or poudriere). 1290 */ 1291 nch = nd.nl_nch; 1292 cache_zero(&nd.nl_nch); 1293 cache_unlock(&nch); 1294 nlookup_done(&nd); 1295 vn_unlock(vp); 1296 1297 mp = nch.mount; 1298 1299 /* 1300 * Must be the root of the filesystem 1301 */ 1302 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1303 cache_drop(&nch); 1304 vrele(vp); 1305 return (EINVAL); 1306 } 1307 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1308 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1309 path); 1310 cache_drop(&nch); 1311 vrele(vp); 1312 return (EINVAL); 1313 } 1314 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1315 buf, buflen, res); 1316 vrele(vp); 1317 cache_drop(&nch); 1318 1319 return (error); 1320 } 1321 1322 int 1323 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1324 { 1325 struct thread *td = curthread; 1326 struct proc *p = td->td_proc; 1327 struct mount *mp; 1328 struct statfs *sp; 1329 char *fullpath, *freepath; 1330 int error; 1331 1332 if ((error = nlookup(nd)) != 0) 1333 return (error); 1334 mp = nd->nl_nch.mount; 1335 sp = &mp->mnt_stat; 1336 1337 /* 1338 * Ignore refresh error, user should have visibility. 1339 * This can happen if a NFS mount goes bad (e.g. server 1340 * revokes perms or goes down). 1341 */ 1342 error = VFS_STATFS(mp, sp, nd->nl_cred); 1343 /* ignore error */ 1344 1345 error = mount_path(p, mp, &fullpath, &freepath); 1346 if (error) 1347 return(error); 1348 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1349 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1350 kfree(freepath, M_TEMP); 1351 1352 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1353 bcopy(sp, buf, sizeof(*buf)); 1354 /* Only root should have access to the fsid's. */ 1355 if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) 1356 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1357 return (0); 1358 } 1359 1360 /* 1361 * statfs_args(char *path, struct statfs *buf) 1362 * 1363 * Get filesystem statistics. 1364 */ 1365 int 1366 sys_statfs(struct sysmsg *sysmsg, const struct statfs_args *uap) 1367 { 1368 struct nlookupdata nd; 1369 struct statfs buf; 1370 int error; 1371 1372 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1373 if (error == 0) 1374 error = kern_statfs(&nd, &buf); 1375 nlookup_done(&nd); 1376 if (error == 0) 1377 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1378 return (error); 1379 } 1380 1381 int 1382 kern_fstatfs(int fd, struct statfs *buf) 1383 { 1384 struct thread *td = curthread; 1385 struct proc *p = td->td_proc; 1386 struct file *fp; 1387 struct mount *mp; 1388 struct statfs *sp; 1389 char *fullpath, *freepath; 1390 int error; 1391 1392 KKASSERT(p); 1393 if ((error = holdvnode(td, fd, &fp)) != 0) 1394 return (error); 1395 1396 /* 1397 * Try to use mount info from any overlays rather than the 1398 * mount info for the underlying vnode, otherwise we will 1399 * fail when operating on null-mounted paths inside a chroot. 1400 */ 1401 if ((mp = fp->f_nchandle.mount) == NULL) 1402 mp = ((struct vnode *)fp->f_data)->v_mount; 1403 if (mp == NULL) { 1404 error = EBADF; 1405 goto done; 1406 } 1407 if (fp->f_cred == NULL) { 1408 error = EINVAL; 1409 goto done; 1410 } 1411 1412 /* 1413 * Ignore refresh error, user should have visibility. 1414 * This can happen if a NFS mount goes bad (e.g. server 1415 * revokes perms or goes down). 1416 */ 1417 sp = &mp->mnt_stat; 1418 error = VFS_STATFS(mp, sp, fp->f_cred); 1419 1420 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1421 goto done; 1422 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1423 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1424 kfree(freepath, M_TEMP); 1425 1426 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1427 bcopy(sp, buf, sizeof(*buf)); 1428 1429 /* Only root should have access to the fsid's. */ 1430 if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) 1431 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1432 error = 0; 1433 done: 1434 fdrop(fp); 1435 return (error); 1436 } 1437 1438 /* 1439 * fstatfs_args(int fd, struct statfs *buf) 1440 * 1441 * Get filesystem statistics. 1442 */ 1443 int 1444 sys_fstatfs(struct sysmsg *sysmsg, const struct fstatfs_args *uap) 1445 { 1446 struct statfs buf; 1447 int error; 1448 1449 error = kern_fstatfs(uap->fd, &buf); 1450 1451 if (error == 0) 1452 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1453 return (error); 1454 } 1455 1456 int 1457 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1458 { 1459 struct mount *mp; 1460 struct statvfs *sp; 1461 int error; 1462 1463 if ((error = nlookup(nd)) != 0) 1464 return (error); 1465 mp = nd->nl_nch.mount; 1466 sp = &mp->mnt_vstat; 1467 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1468 return (error); 1469 1470 sp->f_flag = 0; 1471 if (mp->mnt_flag & MNT_RDONLY) 1472 sp->f_flag |= ST_RDONLY; 1473 if (mp->mnt_flag & MNT_NOSUID) 1474 sp->f_flag |= ST_NOSUID; 1475 bcopy(sp, buf, sizeof(*buf)); 1476 return (0); 1477 } 1478 1479 /* 1480 * statfs_args(char *path, struct statfs *buf) 1481 * 1482 * Get filesystem statistics. 1483 */ 1484 int 1485 sys_statvfs(struct sysmsg *sysmsg, const struct statvfs_args *uap) 1486 { 1487 struct nlookupdata nd; 1488 struct statvfs buf; 1489 int error; 1490 1491 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1492 if (error == 0) 1493 error = kern_statvfs(&nd, &buf); 1494 nlookup_done(&nd); 1495 if (error == 0) 1496 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1497 return (error); 1498 } 1499 1500 int 1501 kern_fstatvfs(int fd, struct statvfs *buf) 1502 { 1503 struct thread *td = curthread; 1504 struct file *fp; 1505 struct mount *mp; 1506 struct statvfs *sp; 1507 int error; 1508 1509 if ((error = holdvnode(td, fd, &fp)) != 0) 1510 return (error); 1511 if ((mp = fp->f_nchandle.mount) == NULL) 1512 mp = ((struct vnode *)fp->f_data)->v_mount; 1513 if (mp == NULL) { 1514 error = EBADF; 1515 goto done; 1516 } 1517 if (fp->f_cred == NULL) { 1518 error = EINVAL; 1519 goto done; 1520 } 1521 sp = &mp->mnt_vstat; 1522 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1523 goto done; 1524 1525 sp->f_flag = 0; 1526 if (mp->mnt_flag & MNT_RDONLY) 1527 sp->f_flag |= ST_RDONLY; 1528 if (mp->mnt_flag & MNT_NOSUID) 1529 sp->f_flag |= ST_NOSUID; 1530 1531 bcopy(sp, buf, sizeof(*buf)); 1532 error = 0; 1533 done: 1534 fdrop(fp); 1535 return (error); 1536 } 1537 1538 /* 1539 * fstatfs_args(int fd, struct statfs *buf) 1540 * 1541 * Get filesystem statistics. 1542 */ 1543 int 1544 sys_fstatvfs(struct sysmsg *sysmsg, const struct fstatvfs_args *uap) 1545 { 1546 struct statvfs buf; 1547 int error; 1548 1549 error = kern_fstatvfs(uap->fd, &buf); 1550 1551 if (error == 0) 1552 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1553 return (error); 1554 } 1555 1556 /* 1557 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1558 * 1559 * Get statistics on all filesystems. 1560 */ 1561 1562 struct getfsstat_info { 1563 struct statfs *sfsp; 1564 long count; 1565 long maxcount; 1566 int error; 1567 int flags; 1568 struct thread *td; 1569 }; 1570 1571 static int getfsstat_callback(struct mount *, void *); 1572 1573 int 1574 sys_getfsstat(struct sysmsg *sysmsg, const struct getfsstat_args *uap) 1575 { 1576 struct thread *td = curthread; 1577 struct getfsstat_info info; 1578 1579 bzero(&info, sizeof(info)); 1580 1581 info.maxcount = uap->bufsize / sizeof(struct statfs); 1582 info.sfsp = uap->buf; 1583 info.count = 0; 1584 info.flags = uap->flags; 1585 info.td = td; 1586 1587 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1588 if (info.sfsp && info.count > info.maxcount) 1589 sysmsg->sysmsg_result = info.maxcount; 1590 else 1591 sysmsg->sysmsg_result = info.count; 1592 return (info.error); 1593 } 1594 1595 static int 1596 getfsstat_callback(struct mount *mp, void *data) 1597 { 1598 struct getfsstat_info *info = data; 1599 struct statfs *sp; 1600 char *freepath; 1601 char *fullpath; 1602 int error; 1603 1604 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1605 return(0); 1606 1607 if (info->sfsp && info->count < info->maxcount) { 1608 sp = &mp->mnt_stat; 1609 1610 /* 1611 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1612 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1613 * overrides MNT_WAIT. 1614 * 1615 * Ignore refresh error, user should have visibility. 1616 * This can happen if a NFS mount goes bad (e.g. server 1617 * revokes perms or goes down). 1618 */ 1619 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1620 (info->flags & MNT_WAIT)) && 1621 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1622 /* ignore error */ 1623 } 1624 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1625 1626 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1627 if (error) { 1628 info->error = error; 1629 return(-1); 1630 } 1631 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1632 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1633 kfree(freepath, M_TEMP); 1634 1635 error = copyout(sp, info->sfsp, sizeof(*sp)); 1636 if (error) { 1637 info->error = error; 1638 return (-1); 1639 } 1640 ++info->sfsp; 1641 } 1642 info->count++; 1643 return(0); 1644 } 1645 1646 /* 1647 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1648 long bufsize, int flags) 1649 * 1650 * Get statistics on all filesystems. 1651 */ 1652 1653 struct getvfsstat_info { 1654 struct statfs *sfsp; 1655 struct statvfs *vsfsp; 1656 long count; 1657 long maxcount; 1658 int error; 1659 int flags; 1660 struct thread *td; 1661 }; 1662 1663 static int getvfsstat_callback(struct mount *, void *); 1664 1665 int 1666 sys_getvfsstat(struct sysmsg *sysmsg, const struct getvfsstat_args *uap) 1667 { 1668 struct thread *td = curthread; 1669 struct getvfsstat_info info; 1670 1671 bzero(&info, sizeof(info)); 1672 1673 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1674 info.sfsp = uap->buf; 1675 info.vsfsp = uap->vbuf; 1676 info.count = 0; 1677 info.flags = uap->flags; 1678 info.td = td; 1679 1680 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1681 if (info.vsfsp && info.count > info.maxcount) 1682 sysmsg->sysmsg_result = info.maxcount; 1683 else 1684 sysmsg->sysmsg_result = info.count; 1685 return (info.error); 1686 } 1687 1688 static int 1689 getvfsstat_callback(struct mount *mp, void *data) 1690 { 1691 struct getvfsstat_info *info = data; 1692 struct statfs *sp; 1693 struct statvfs *vsp; 1694 char *freepath; 1695 char *fullpath; 1696 int error; 1697 1698 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1699 return(0); 1700 1701 if (info->vsfsp && info->count < info->maxcount) { 1702 sp = &mp->mnt_stat; 1703 vsp = &mp->mnt_vstat; 1704 1705 /* 1706 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1707 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1708 * overrides MNT_WAIT. 1709 * 1710 * Ignore refresh error, user should have visibility. 1711 * This can happen if a NFS mount goes bad (e.g. server 1712 * revokes perms or goes down). 1713 */ 1714 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1715 (info->flags & MNT_WAIT)) && 1716 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1717 /* ignore error */ 1718 } 1719 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1720 1721 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1722 (info->flags & MNT_WAIT)) && 1723 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1724 /* ignore error */ 1725 } 1726 vsp->f_flag = 0; 1727 if (mp->mnt_flag & MNT_RDONLY) 1728 vsp->f_flag |= ST_RDONLY; 1729 if (mp->mnt_flag & MNT_NOSUID) 1730 vsp->f_flag |= ST_NOSUID; 1731 1732 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1733 if (error) { 1734 info->error = error; 1735 return(-1); 1736 } 1737 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1738 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1739 kfree(freepath, M_TEMP); 1740 1741 error = copyout(sp, info->sfsp, sizeof(*sp)); 1742 if (error == 0) 1743 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1744 if (error) { 1745 info->error = error; 1746 return (-1); 1747 } 1748 ++info->sfsp; 1749 ++info->vsfsp; 1750 } 1751 info->count++; 1752 return(0); 1753 } 1754 1755 1756 /* 1757 * fchdir_args(int fd) 1758 * 1759 * Change current working directory to a given file descriptor. 1760 */ 1761 int 1762 sys_fchdir(struct sysmsg *sysmsg, const struct fchdir_args *uap) 1763 { 1764 struct thread *td = curthread; 1765 struct proc *p = td->td_proc; 1766 struct filedesc *fdp = p->p_fd; 1767 struct vnode *vp, *ovp; 1768 struct mount *mp; 1769 struct file *fp; 1770 struct nchandle nch, onch, tnch; 1771 int error; 1772 1773 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1774 return (error); 1775 lwkt_gettoken(&p->p_token); 1776 vp = (struct vnode *)fp->f_data; 1777 vref(vp); 1778 vn_lock(vp, LK_SHARED | LK_RETRY); 1779 if (fp->f_nchandle.ncp == NULL) 1780 error = ENOTDIR; 1781 else 1782 error = checkvp_chdir(vp, td); 1783 if (error) { 1784 vput(vp); 1785 goto done; 1786 } 1787 cache_copy(&fp->f_nchandle, &nch); 1788 1789 /* 1790 * If the ncp has become a mount point, traverse through 1791 * the mount point. 1792 */ 1793 1794 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1795 (mp = cache_findmount(&nch)) != NULL 1796 ) { 1797 error = nlookup_mp(mp, &tnch); 1798 if (error == 0) { 1799 cache_unlock(&tnch); /* leave ref intact */ 1800 vput(vp); 1801 vp = tnch.ncp->nc_vp; 1802 error = vget(vp, LK_SHARED); 1803 KKASSERT(error == 0); 1804 cache_drop(&nch); 1805 nch = tnch; 1806 } 1807 cache_dropmount(mp); 1808 } 1809 if (error == 0) { 1810 spin_lock(&fdp->fd_spin); 1811 ovp = fdp->fd_cdir; 1812 onch = fdp->fd_ncdir; 1813 fdp->fd_cdir = vp; 1814 fdp->fd_ncdir = nch; 1815 spin_unlock(&fdp->fd_spin); 1816 vn_unlock(vp); /* leave ref intact */ 1817 cache_drop(&onch); 1818 vrele(ovp); 1819 } else { 1820 cache_drop(&nch); 1821 vput(vp); 1822 } 1823 fdrop(fp); 1824 done: 1825 lwkt_reltoken(&p->p_token); 1826 return (error); 1827 } 1828 1829 int 1830 kern_chdir(struct nlookupdata *nd) 1831 { 1832 struct thread *td = curthread; 1833 struct proc *p = td->td_proc; 1834 struct filedesc *fdp = p->p_fd; 1835 struct vnode *vp, *ovp; 1836 struct nchandle onch; 1837 int error; 1838 1839 nd->nl_flags |= NLC_SHAREDLOCK; 1840 if ((error = nlookup(nd)) != 0) 1841 return (error); 1842 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1843 return (ENOENT); 1844 if ((error = vget(vp, LK_SHARED)) != 0) 1845 return (error); 1846 1847 lwkt_gettoken(&p->p_token); 1848 error = checkvp_chdir(vp, td); 1849 vn_unlock(vp); 1850 if (error == 0) { 1851 spin_lock(&fdp->fd_spin); 1852 ovp = fdp->fd_cdir; 1853 onch = fdp->fd_ncdir; 1854 fdp->fd_ncdir = nd->nl_nch; 1855 fdp->fd_cdir = vp; 1856 spin_unlock(&fdp->fd_spin); 1857 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1858 cache_drop(&onch); 1859 vrele(ovp); 1860 cache_zero(&nd->nl_nch); 1861 } else { 1862 vrele(vp); 1863 } 1864 lwkt_reltoken(&p->p_token); 1865 return (error); 1866 } 1867 1868 /* 1869 * chdir_args(char *path) 1870 * 1871 * Change current working directory (``.''). 1872 */ 1873 int 1874 sys_chdir(struct sysmsg *sysmsg, const struct chdir_args *uap) 1875 { 1876 struct nlookupdata nd; 1877 int error; 1878 1879 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1880 if (error == 0) 1881 error = kern_chdir(&nd); 1882 nlookup_done(&nd); 1883 return (error); 1884 } 1885 1886 /* 1887 * Helper function for raised chroot(2) security function: Refuse if 1888 * any filedescriptors are open directories. 1889 */ 1890 static int 1891 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1892 { 1893 struct vnode *vp; 1894 struct file *fp; 1895 int error; 1896 int fd; 1897 1898 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1899 if ((error = holdvnode(td, fd, &fp)) != 0) 1900 continue; 1901 vp = (struct vnode *)fp->f_data; 1902 if (vp->v_type != VDIR) { 1903 fdrop(fp); 1904 continue; 1905 } 1906 fdrop(fp); 1907 return(EPERM); 1908 } 1909 return (0); 1910 } 1911 1912 /* 1913 * This sysctl determines if we will allow a process to chroot(2) if it 1914 * has a directory open: 1915 * 0: disallowed for all processes. 1916 * 1: allowed for processes that were not already chroot(2)'ed. 1917 * 2: allowed for all processes. 1918 */ 1919 1920 static int chroot_allow_open_directories = 1; 1921 1922 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1923 &chroot_allow_open_directories, 0, ""); 1924 1925 /* 1926 * chroot to the specified namecache entry. We obtain the vp from the 1927 * namecache data. The passed ncp must be locked and referenced and will 1928 * remain locked and referenced on return. 1929 */ 1930 int 1931 kern_chroot(struct nchandle *nch) 1932 { 1933 struct thread *td = curthread; 1934 struct proc *p = td->td_proc; 1935 struct filedesc *fdp = p->p_fd; 1936 struct vnode *vp; 1937 int error; 1938 1939 /* 1940 * Only privileged user can chroot 1941 */ 1942 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHROOT); 1943 if (error) 1944 return (error); 1945 1946 /* 1947 * Disallow open directory descriptors (fchdir() breakouts). 1948 */ 1949 if (chroot_allow_open_directories == 0 || 1950 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1951 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 1952 return (error); 1953 } 1954 if ((vp = nch->ncp->nc_vp) == NULL) 1955 return (ENOENT); 1956 1957 if ((error = vget(vp, LK_SHARED)) != 0) 1958 return (error); 1959 1960 /* 1961 * Check the validity of vp as a directory to change to and 1962 * associate it with rdir/jdir. 1963 */ 1964 error = checkvp_chdir(vp, td); 1965 vn_unlock(vp); /* leave reference intact */ 1966 if (error == 0) { 1967 lwkt_gettoken(&p->p_token); 1968 vrele(fdp->fd_rdir); 1969 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1970 cache_drop(&fdp->fd_nrdir); 1971 cache_copy(nch, &fdp->fd_nrdir); 1972 if (fdp->fd_jdir == NULL) { 1973 fdp->fd_jdir = vp; 1974 vref(fdp->fd_jdir); 1975 cache_copy(nch, &fdp->fd_njdir); 1976 } 1977 if ((p->p_flags & P_DIDCHROOT) == 0) { 1978 p->p_flags |= P_DIDCHROOT; 1979 if (p->p_depth <= 65535 - 32) 1980 p->p_depth += 32; 1981 } 1982 lwkt_reltoken(&p->p_token); 1983 } else { 1984 vrele(vp); 1985 } 1986 return (error); 1987 } 1988 1989 /* 1990 * chroot_args(char *path) 1991 * 1992 * Change notion of root (``/'') directory. 1993 */ 1994 int 1995 sys_chroot(struct sysmsg *sysmsg, const struct chroot_args *uap) 1996 { 1997 struct thread *td __debugvar = curthread; 1998 struct nlookupdata nd; 1999 int error; 2000 2001 KKASSERT(td->td_proc); 2002 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2003 if (error == 0) { 2004 nd.nl_flags |= NLC_EXEC; 2005 error = nlookup(&nd); 2006 if (error == 0) 2007 error = kern_chroot(&nd.nl_nch); 2008 } 2009 nlookup_done(&nd); 2010 return(error); 2011 } 2012 2013 int 2014 sys_chroot_kernel(struct sysmsg *sysmsg, const struct chroot_kernel_args *uap) 2015 { 2016 struct thread *td = curthread; 2017 struct nlookupdata nd; 2018 struct nchandle *nch; 2019 struct vnode *vp; 2020 int error; 2021 2022 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2023 if (error) 2024 goto error_nond; 2025 2026 error = nlookup(&nd); 2027 if (error) 2028 goto error_out; 2029 2030 nch = &nd.nl_nch; 2031 2032 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHROOT); 2033 if (error) 2034 goto error_out; 2035 2036 if ((vp = nch->ncp->nc_vp) == NULL) { 2037 error = ENOENT; 2038 goto error_out; 2039 } 2040 2041 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 2042 goto error_out; 2043 2044 vfs_cache_setroot(vp, cache_hold(nch)); 2045 2046 error_out: 2047 nlookup_done(&nd); 2048 error_nond: 2049 return(error); 2050 } 2051 2052 /* 2053 * Common routine for chroot and chdir. Given a locked, referenced vnode, 2054 * determine whether it is legal to chdir to the vnode. The vnode's state 2055 * is not changed by this call. 2056 */ 2057 static int 2058 checkvp_chdir(struct vnode *vp, struct thread *td) 2059 { 2060 int error; 2061 2062 if (vp->v_type != VDIR) 2063 error = ENOTDIR; 2064 else 2065 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 2066 return (error); 2067 } 2068 2069 int 2070 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 2071 { 2072 struct thread *td = curthread; 2073 struct proc *p = td->td_proc; 2074 struct lwp *lp = td->td_lwp; 2075 struct filedesc *fdp = p->p_fd; 2076 int cmode, flags; 2077 struct file *nfp; 2078 struct file *fp; 2079 int type, indx, error = 0; 2080 struct flock lf; 2081 2082 if ((oflags & O_ACCMODE) == O_ACCMODE) 2083 return (EINVAL); 2084 flags = FFLAGS(oflags); 2085 error = falloc(lp, &nfp, NULL); 2086 if (error) 2087 return (error); 2088 fp = nfp; 2089 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2090 2091 /* 2092 * Call vn_open() to do the lookup and assign the vnode to the 2093 * file pointer. vn_open() does not change the ref count on fp 2094 * and the vnode, on success, will be inherited by the file pointer 2095 * and unlocked. 2096 * 2097 * Request a shared lock on the vnode if possible. 2098 * 2099 * When NLC_SHAREDLOCK is set we may still need an exclusive vnode 2100 * lock for O_RDWR opens on executables in order to avoid a VTEXT 2101 * detection race. The NLC_EXCLLOCK_IFEXEC handles this case. 2102 * 2103 * NOTE: We need a flag to separate terminal vnode locking from 2104 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2105 * and O_RDWR only need to lock the terminal vnode exclusively. 2106 */ 2107 nd->nl_flags |= NLC_LOCKVP; 2108 if ((flags & (O_CREAT|O_TRUNC)) == 0) { 2109 nd->nl_flags |= NLC_SHAREDLOCK; 2110 if (flags & O_RDWR) 2111 nd->nl_flags |= NLC_EXCLLOCK_IFEXEC; 2112 } 2113 2114 /* 2115 * Issue the vn_open, passing in the referenced fp. the vn_open() 2116 * is allowed to replace fp by fdrop()ing it and returning its own 2117 * referenced fp. 2118 */ 2119 nfp = fp; 2120 error = vn_open(nd, &nfp, flags, cmode); 2121 fp = nfp; 2122 nlookup_done(nd); 2123 2124 /* 2125 * Deal with any error condition 2126 */ 2127 if (error) { 2128 fdrop(fp); /* our ref */ 2129 if (error == ERESTART) 2130 error = EINTR; 2131 return (error); 2132 } 2133 2134 /* 2135 * Reserve a file descriptor. 2136 */ 2137 if ((error = fdalloc(p, 0, &indx)) != 0) { 2138 fdrop(fp); 2139 return (error); 2140 } 2141 2142 /* 2143 * Handle advisory lock flags. This is only supported with vnodes. 2144 * For things like /dev/fd/N we might not actually get a vnode. 2145 */ 2146 if ((flags & (O_EXLOCK | O_SHLOCK)) && fp->f_type == DTYPE_VNODE) { 2147 struct vnode *vp; 2148 2149 vp = (struct vnode *)fp->f_data; 2150 vref(vp); 2151 2152 lf.l_whence = SEEK_SET; 2153 lf.l_start = 0; 2154 lf.l_len = 0; 2155 if (flags & O_EXLOCK) 2156 lf.l_type = F_WRLCK; 2157 else 2158 lf.l_type = F_RDLCK; 2159 if (flags & FNONBLOCK) 2160 type = 0; 2161 else 2162 type = F_WAIT; 2163 2164 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type); 2165 if (error) { 2166 /* 2167 * lock request failed. Clean up the reserved 2168 * descriptor. 2169 */ 2170 vrele(vp); 2171 fsetfd(fdp, NULL, indx); 2172 fdrop(fp); 2173 return (error); 2174 } 2175 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2176 vrele(vp); 2177 } 2178 2179 /* 2180 * release our private reference, leaving the one associated with the 2181 * descriptor table intact. 2182 */ 2183 if (oflags & O_CLOEXEC) 2184 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2185 fsetfd(fdp, fp, indx); 2186 fdrop(fp); 2187 *res = indx; 2188 2189 return (error); 2190 } 2191 2192 /* 2193 * open_args(char *path, int flags, int mode) 2194 * 2195 * Check permissions, allocate an open file structure, 2196 * and call the device open routine if any. 2197 */ 2198 int 2199 sys_open(struct sysmsg *sysmsg, const struct open_args *uap) 2200 { 2201 struct nlookupdata nd; 2202 int error; 2203 2204 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2205 if (error == 0) { 2206 error = kern_open(&nd, uap->flags, 2207 uap->mode, &sysmsg->sysmsg_result); 2208 } 2209 nlookup_done(&nd); 2210 return (error); 2211 } 2212 2213 /* 2214 * openat_args(int fd, char *path, int flags, int mode) 2215 */ 2216 int 2217 sys_openat(struct sysmsg *sysmsg, const struct openat_args *uap) 2218 { 2219 struct nlookupdata nd; 2220 int error; 2221 struct file *fp; 2222 2223 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2224 if (error == 0) { 2225 error = kern_open(&nd, uap->flags, uap->mode, 2226 &sysmsg->sysmsg_result); 2227 } 2228 nlookup_done_at(&nd, fp); 2229 return (error); 2230 } 2231 2232 int 2233 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2234 { 2235 struct thread *td = curthread; 2236 struct proc *p = td->td_proc; 2237 struct vnode *vp; 2238 struct vattr vattr; 2239 int error; 2240 int whiteout = 0; 2241 2242 KKASSERT(p); 2243 2244 VATTR_NULL(&vattr); 2245 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2246 vattr.va_rmajor = rmajor; 2247 vattr.va_rminor = rminor; 2248 2249 switch (mode & S_IFMT) { 2250 case S_IFMT: /* used by badsect to flag bad sectors */ 2251 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_BAD); 2252 vattr.va_type = VBAD; 2253 break; 2254 case S_IFCHR: 2255 error = caps_priv_check_td(td, SYSCAP_NOVFS_MKNOD_DEV); 2256 vattr.va_type = VCHR; 2257 break; 2258 case S_IFBLK: 2259 error = caps_priv_check_td(td, SYSCAP_NOVFS_MKNOD_DEV); 2260 vattr.va_type = VBLK; 2261 break; 2262 case S_IFWHT: 2263 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_WHT); 2264 whiteout = 1; 2265 break; 2266 case S_IFDIR: /* special directories support for HAMMER */ 2267 error = caps_priv_check(td->td_ucred, SYSCAP_NOVFS_MKNOD_DIR); 2268 vattr.va_type = VDIR; 2269 break; 2270 case S_IFIFO: 2271 return (kern_mkfifo(nd, mode)); 2272 break; 2273 default: 2274 error = EINVAL; 2275 break; 2276 } 2277 2278 if (error) 2279 return (error); 2280 2281 bwillinode(1); 2282 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2283 if ((error = nlookup(nd)) != 0) 2284 return (error); 2285 if (nd->nl_nch.ncp->nc_vp) 2286 return (EEXIST); 2287 if (nd->nl_dvp == NULL) 2288 return (EINVAL); 2289 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2290 return (error); 2291 2292 if (whiteout) { 2293 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2294 nd->nl_cred, NAMEI_CREATE); 2295 } else { 2296 vp = NULL; 2297 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2298 &vp, nd->nl_cred, &vattr); 2299 if (error == 0) 2300 vput(vp); 2301 } 2302 return (error); 2303 } 2304 2305 /* 2306 * mknod_args(char *path, int mode, int dev) 2307 * 2308 * Create a special file. 2309 */ 2310 int 2311 sys_mknod(struct sysmsg *sysmsg, const struct mknod_args *uap) 2312 { 2313 struct nlookupdata nd; 2314 int error; 2315 2316 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2317 if (error == 0) { 2318 error = kern_mknod(&nd, uap->mode, 2319 umajor(uap->dev), uminor(uap->dev)); 2320 } 2321 nlookup_done(&nd); 2322 return (error); 2323 } 2324 2325 /* 2326 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2327 * 2328 * Create a special file. The path is relative to the directory associated 2329 * with fd. 2330 */ 2331 int 2332 sys_mknodat(struct sysmsg *sysmsg, const struct mknodat_args *uap) 2333 { 2334 struct nlookupdata nd; 2335 struct file *fp; 2336 int error; 2337 2338 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2339 if (error == 0) { 2340 error = kern_mknod(&nd, uap->mode, 2341 umajor(uap->dev), uminor(uap->dev)); 2342 } 2343 nlookup_done_at(&nd, fp); 2344 return (error); 2345 } 2346 2347 int 2348 kern_mkfifo(struct nlookupdata *nd, int mode) 2349 { 2350 struct thread *td = curthread; 2351 struct proc *p = td->td_proc; 2352 struct vattr vattr; 2353 struct vnode *vp; 2354 int error; 2355 2356 bwillinode(1); 2357 2358 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2359 if ((error = nlookup(nd)) != 0) 2360 return (error); 2361 if (nd->nl_nch.ncp->nc_vp) 2362 return (EEXIST); 2363 if (nd->nl_dvp == NULL) 2364 return (EINVAL); 2365 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2366 return (error); 2367 2368 VATTR_NULL(&vattr); 2369 vattr.va_type = VFIFO; 2370 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2371 vp = NULL; 2372 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2373 if (error == 0) 2374 vput(vp); 2375 return (error); 2376 } 2377 2378 /* 2379 * mkfifo_args(char *path, int mode) 2380 * 2381 * Create a named pipe. 2382 */ 2383 int 2384 sys_mkfifo(struct sysmsg *sysmsg, const struct mkfifo_args *uap) 2385 { 2386 struct nlookupdata nd; 2387 int error; 2388 2389 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2390 if (error == 0) 2391 error = kern_mkfifo(&nd, uap->mode); 2392 nlookup_done(&nd); 2393 return (error); 2394 } 2395 2396 /* 2397 * mkfifoat_args(int fd, char *path, mode_t mode) 2398 * 2399 * Create a named pipe. The path is relative to the directory associated 2400 * with fd. 2401 */ 2402 int 2403 sys_mkfifoat(struct sysmsg *sysmsg, const struct mkfifoat_args *uap) 2404 { 2405 struct nlookupdata nd; 2406 struct file *fp; 2407 int error; 2408 2409 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2410 if (error == 0) 2411 error = kern_mkfifo(&nd, uap->mode); 2412 nlookup_done_at(&nd, fp); 2413 return (error); 2414 } 2415 2416 static int hardlink_check_uid = 0; 2417 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2418 &hardlink_check_uid, 0, 2419 "Unprivileged processes cannot create hard links to files owned by other " 2420 "users"); 2421 static int hardlink_check_gid = 0; 2422 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2423 &hardlink_check_gid, 0, 2424 "Unprivileged processes cannot create hard links to files owned by other " 2425 "groups"); 2426 2427 static int 2428 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2429 { 2430 struct vattr va; 2431 int error; 2432 2433 /* 2434 * Shortcut if disabled 2435 */ 2436 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2437 return (0); 2438 2439 /* 2440 * Privileged user can always hardlink 2441 */ 2442 if (caps_priv_check(cred, SYSCAP_NOVFS_LINK) == 0) 2443 return (0); 2444 2445 /* 2446 * Otherwise only if the originating file is owned by the 2447 * same user or group. Note that any group is allowed if 2448 * the file is owned by the caller. 2449 */ 2450 error = VOP_GETATTR(vp, &va); 2451 if (error != 0) 2452 return (error); 2453 2454 if (hardlink_check_uid) { 2455 if (cred->cr_uid != va.va_uid) 2456 return (EPERM); 2457 } 2458 2459 if (hardlink_check_gid) { 2460 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2461 return (EPERM); 2462 } 2463 2464 return (0); 2465 } 2466 2467 int 2468 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2469 { 2470 struct thread *td = curthread; 2471 struct vnode *vp; 2472 int error; 2473 2474 /* 2475 * Lookup the source and obtained a locked vnode. 2476 * 2477 * You may only hardlink a file which you have write permission 2478 * on or which you own. 2479 * 2480 * XXX relookup on vget failure / race ? 2481 */ 2482 bwillinode(1); 2483 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2484 if ((error = nlookup(nd)) != 0) 2485 return (error); 2486 vp = nd->nl_nch.ncp->nc_vp; 2487 KKASSERT(vp != NULL); 2488 if (vp->v_type == VDIR) 2489 return (EPERM); /* POSIX */ 2490 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2491 return (error); 2492 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2493 return (error); 2494 2495 /* 2496 * Unlock the source so we can lookup the target without deadlocking 2497 * (XXX vp is locked already, possible other deadlock?). The target 2498 * must not exist. 2499 */ 2500 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2501 nd->nl_flags &= ~NLC_NCPISLOCKED; 2502 cache_unlock(&nd->nl_nch); 2503 vn_unlock(vp); 2504 2505 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2506 if ((error = nlookup(linknd)) != 0) { 2507 vrele(vp); 2508 return (error); 2509 } 2510 if (linknd->nl_nch.ncp->nc_vp) { 2511 vrele(vp); 2512 return (EEXIST); 2513 } 2514 if (linknd->nl_dvp == NULL) { 2515 vrele(vp); 2516 return (EINVAL); 2517 } 2518 VFS_MODIFYING(vp->v_mount); 2519 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2520 if (error) { 2521 vrele(vp); 2522 return (error); 2523 } 2524 2525 /* 2526 * Finally run the new API VOP. 2527 */ 2528 error = can_hardlink(vp, td, td->td_ucred); 2529 if (error == 0) { 2530 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2531 vp, linknd->nl_cred); 2532 } 2533 vput(vp); 2534 return (error); 2535 } 2536 2537 /* 2538 * link_args(char *path, char *link) 2539 * 2540 * Make a hard file link. 2541 */ 2542 int 2543 sys_link(struct sysmsg *sysmsg, const struct link_args *uap) 2544 { 2545 struct nlookupdata nd, linknd; 2546 int error; 2547 2548 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2549 if (error == 0) { 2550 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2551 if (error == 0) 2552 error = kern_link(&nd, &linknd); 2553 nlookup_done(&linknd); 2554 } 2555 nlookup_done(&nd); 2556 return (error); 2557 } 2558 2559 /* 2560 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2561 * 2562 * Make a hard file link. The path1 argument is relative to the directory 2563 * associated with fd1, and similarly the path2 argument is relative to 2564 * the directory associated with fd2. 2565 */ 2566 int 2567 sys_linkat(struct sysmsg *sysmsg, const struct linkat_args *uap) 2568 { 2569 struct nlookupdata nd, linknd; 2570 struct file *fp1, *fp2; 2571 int error; 2572 2573 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2574 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2575 if (error == 0) { 2576 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2577 uap->path2, UIO_USERSPACE, 0); 2578 if (error == 0) 2579 error = kern_link(&nd, &linknd); 2580 nlookup_done_at(&linknd, fp2); 2581 } 2582 nlookup_done_at(&nd, fp1); 2583 return (error); 2584 } 2585 2586 int 2587 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2588 { 2589 struct vattr vattr; 2590 struct vnode *vp; 2591 struct vnode *dvp; 2592 int error; 2593 2594 bwillinode(1); 2595 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2596 if ((error = nlookup(nd)) != 0) 2597 return (error); 2598 if (nd->nl_nch.ncp->nc_vp) 2599 return (EEXIST); 2600 if (nd->nl_dvp == NULL) 2601 return (EINVAL); 2602 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2603 return (error); 2604 dvp = nd->nl_dvp; 2605 VATTR_NULL(&vattr); 2606 vattr.va_mode = mode; 2607 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2608 if (error == 0) 2609 vput(vp); 2610 return (error); 2611 } 2612 2613 /* 2614 * symlink(char *path, char *link) 2615 * 2616 * Make a symbolic link. 2617 */ 2618 int 2619 sys_symlink(struct sysmsg *sysmsg, const struct symlink_args *uap) 2620 { 2621 struct thread *td = curthread; 2622 struct nlookupdata nd; 2623 char *path; 2624 int error; 2625 int mode; 2626 2627 path = objcache_get(namei_oc, M_WAITOK); 2628 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2629 if (error == 0) { 2630 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2631 if (error == 0) { 2632 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2633 error = kern_symlink(&nd, path, mode); 2634 } 2635 nlookup_done(&nd); 2636 } 2637 objcache_put(namei_oc, path); 2638 return (error); 2639 } 2640 2641 /* 2642 * symlinkat_args(char *path1, int fd, char *path2) 2643 * 2644 * Make a symbolic link. The path2 argument is relative to the directory 2645 * associated with fd. 2646 */ 2647 int 2648 sys_symlinkat(struct sysmsg *sysmsg, const struct symlinkat_args *uap) 2649 { 2650 struct thread *td = curthread; 2651 struct nlookupdata nd; 2652 struct file *fp; 2653 char *path1; 2654 int error; 2655 int mode; 2656 2657 path1 = objcache_get(namei_oc, M_WAITOK); 2658 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2659 if (error == 0) { 2660 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2661 UIO_USERSPACE, 0); 2662 if (error == 0) { 2663 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2664 error = kern_symlink(&nd, path1, mode); 2665 } 2666 nlookup_done_at(&nd, fp); 2667 } 2668 objcache_put(namei_oc, path1); 2669 return (error); 2670 } 2671 2672 /* 2673 * undelete_args(char *path) 2674 * 2675 * Delete a whiteout from the filesystem. 2676 */ 2677 int 2678 sys_undelete(struct sysmsg *sysmsg, const struct undelete_args *uap) 2679 { 2680 struct nlookupdata nd; 2681 int error; 2682 2683 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2684 bwillinode(1); 2685 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2686 if (error == 0) 2687 error = nlookup(&nd); 2688 if (error == 0 && nd.nl_dvp == NULL) 2689 error = EINVAL; 2690 if (error == 0) 2691 error = ncp_writechk(&nd.nl_nch); 2692 if (error == 0) { 2693 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2694 NAMEI_DELETE); 2695 } 2696 nlookup_done(&nd); 2697 return (error); 2698 } 2699 2700 int 2701 kern_unlink(struct nlookupdata *nd) 2702 { 2703 int error; 2704 2705 bwillinode(1); 2706 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2707 if ((error = nlookup(nd)) != 0) 2708 return (error); 2709 if (nd->nl_dvp == NULL) 2710 return EINVAL; 2711 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2712 return (error); 2713 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2714 return (error); 2715 } 2716 2717 /* 2718 * unlink_args(char *path) 2719 * 2720 * Delete a name from the filesystem. 2721 */ 2722 int 2723 sys_unlink(struct sysmsg *sysmsg, const struct unlink_args *uap) 2724 { 2725 struct nlookupdata nd; 2726 int error; 2727 2728 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2729 if (error == 0) 2730 error = kern_unlink(&nd); 2731 nlookup_done(&nd); 2732 return (error); 2733 } 2734 2735 2736 /* 2737 * unlinkat_args(int fd, char *path, int flags) 2738 * 2739 * Delete the file or directory entry pointed to by fd/path. 2740 */ 2741 int 2742 sys_unlinkat(struct sysmsg *sysmsg, const struct unlinkat_args *uap) 2743 { 2744 struct nlookupdata nd; 2745 struct file *fp; 2746 int error; 2747 2748 if (uap->flags & ~AT_REMOVEDIR) 2749 return (EINVAL); 2750 2751 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2752 if (error == 0) { 2753 if (uap->flags & AT_REMOVEDIR) 2754 error = kern_rmdir(&nd); 2755 else 2756 error = kern_unlink(&nd); 2757 } 2758 nlookup_done_at(&nd, fp); 2759 return (error); 2760 } 2761 2762 int 2763 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2764 { 2765 struct thread *td = curthread; 2766 struct file *fp; 2767 struct vnode *vp; 2768 struct vattr_lite lva; 2769 off_t new_offset; 2770 int error; 2771 2772 fp = holdfp(td, fd, -1); 2773 if (fp == NULL) 2774 return (EBADF); 2775 if (fp->f_type != DTYPE_VNODE) { 2776 error = ESPIPE; 2777 goto done; 2778 } 2779 vp = (struct vnode *)fp->f_data; 2780 2781 switch (whence) { 2782 case L_INCR: 2783 spin_lock(&fp->f_spin); 2784 new_offset = fp->f_offset + offset; 2785 error = 0; 2786 break; 2787 case L_XTND: 2788 error = VOP_GETATTR_LITE(vp, &lva); 2789 spin_lock(&fp->f_spin); 2790 new_offset = offset + lva.va_size; 2791 break; 2792 case L_SET: 2793 new_offset = offset; 2794 error = 0; 2795 spin_lock(&fp->f_spin); 2796 break; 2797 default: 2798 new_offset = 0; 2799 error = EINVAL; 2800 spin_lock(&fp->f_spin); 2801 break; 2802 } 2803 2804 /* 2805 * Validate the seek position. Negative offsets are not allowed 2806 * for regular files or directories. 2807 * 2808 * Normally we would also not want to allow negative offsets for 2809 * character and block-special devices. However kvm addresses 2810 * on 64 bit architectures might appear to be negative and must 2811 * be allowed. 2812 */ 2813 if (error == 0) { 2814 if (new_offset < 0 && 2815 (vp->v_type == VREG || vp->v_type == VDIR)) { 2816 error = EINVAL; 2817 } else { 2818 fp->f_offset = new_offset; 2819 } 2820 } 2821 *res = fp->f_offset; 2822 spin_unlock(&fp->f_spin); 2823 done: 2824 dropfp(td, fd, fp); 2825 2826 return (error); 2827 } 2828 2829 /* 2830 * lseek_args(int fd, int pad, off_t offset, int whence) 2831 * 2832 * Reposition read/write file offset. 2833 */ 2834 int 2835 sys_lseek(struct sysmsg *sysmsg, const struct lseek_args *uap) 2836 { 2837 int error; 2838 2839 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2840 &sysmsg->sysmsg_offset); 2841 2842 return (error); 2843 } 2844 2845 /* 2846 * Check if current process can access given file. amode is a bitmask of *_OK 2847 * access bits. flags is a bitmask of AT_* flags. 2848 */ 2849 int 2850 kern_access(struct nlookupdata *nd, int amode, int flags) 2851 { 2852 struct vnode *vp; 2853 int error, mode; 2854 2855 if (flags & ~AT_EACCESS) 2856 return (EINVAL); 2857 nd->nl_flags |= NLC_SHAREDLOCK; 2858 if ((error = nlookup(nd)) != 0) 2859 return (error); 2860 if ((amode & W_OK) && (error = ncp_writechk(&nd->nl_nch)) != 0) 2861 return (error); 2862 retry: 2863 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2864 if (error) 2865 return (error); 2866 2867 /* Flags == 0 means only check for existence. */ 2868 if (amode) { 2869 mode = 0; 2870 if (amode & R_OK) 2871 mode |= VREAD; 2872 if (amode & W_OK) 2873 mode |= VWRITE; 2874 if (amode & X_OK) 2875 mode |= VEXEC; 2876 if ((mode & VWRITE) == 0 || 2877 (error = vn_writechk(vp)) == 0) { 2878 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2879 } 2880 2881 /* 2882 * If the file handle is stale we have to re-resolve the 2883 * entry with the ncp held exclusively. This is a hack 2884 * at the moment. 2885 */ 2886 if (error == ESTALE) { 2887 u_int dummy_gen; 2888 2889 vput(vp); 2890 cache_unlock(&nd->nl_nch); 2891 cache_lock(&nd->nl_nch); 2892 dummy_gen = nd->nl_nch.ncp->nc_generation; 2893 cache_setunresolved(&nd->nl_nch); 2894 error = cache_resolve(&nd->nl_nch, &dummy_gen, 2895 nd->nl_cred); 2896 if (error == 0) { 2897 vp = NULL; 2898 goto retry; 2899 } 2900 return(error); 2901 } 2902 } 2903 vput(vp); 2904 return (error); 2905 } 2906 2907 /* 2908 * access_args(char *path, int flags) 2909 * 2910 * Check access permissions. 2911 */ 2912 int 2913 sys_access(struct sysmsg *sysmsg, const struct access_args *uap) 2914 { 2915 struct nlookupdata nd; 2916 int error; 2917 2918 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2919 if (error == 0) 2920 error = kern_access(&nd, uap->flags, 0); 2921 nlookup_done(&nd); 2922 return (error); 2923 } 2924 2925 2926 /* 2927 * eaccess_args(char *path, int flags) 2928 * 2929 * Check access permissions. 2930 */ 2931 int 2932 sys_eaccess(struct sysmsg *sysmsg, const struct eaccess_args *uap) 2933 { 2934 struct nlookupdata nd; 2935 int error; 2936 2937 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2938 if (error == 0) 2939 error = kern_access(&nd, uap->flags, AT_EACCESS); 2940 nlookup_done(&nd); 2941 return (error); 2942 } 2943 2944 2945 /* 2946 * faccessat_args(int fd, char *path, int amode, int flags) 2947 * 2948 * Check access permissions. 2949 */ 2950 int 2951 sys_faccessat(struct sysmsg *sysmsg, const struct faccessat_args *uap) 2952 { 2953 struct nlookupdata nd; 2954 struct file *fp; 2955 int error; 2956 2957 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2958 NLC_FOLLOW); 2959 if (error == 0) 2960 error = kern_access(&nd, uap->amode, uap->flags); 2961 nlookup_done_at(&nd, fp); 2962 return (error); 2963 } 2964 2965 int 2966 kern_stat(struct nlookupdata *nd, struct stat *st) 2967 { 2968 int error; 2969 struct vnode *vp; 2970 2971 nd->nl_flags |= NLC_SHAREDLOCK; 2972 if ((error = nlookup(nd)) != 0) 2973 return (error); 2974 again: 2975 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2976 return (ENOENT); 2977 2978 #if 1 2979 error = cache_vref(&nd->nl_nch, NULL, &vp); 2980 #else 2981 error = vget(vp, LK_SHARED); 2982 #endif 2983 if (error) 2984 return (error); 2985 error = vn_stat(vp, st, nd->nl_cred); 2986 2987 /* 2988 * If the file handle is stale we have to re-resolve the 2989 * entry with the ncp held exclusively. This is a hack 2990 * at the moment. 2991 */ 2992 if (error == ESTALE) { 2993 u_int dummy_gen; 2994 #if 1 2995 vrele(vp); 2996 #else 2997 vput(vp); 2998 #endif 2999 cache_unlock(&nd->nl_nch); 3000 cache_lock(&nd->nl_nch); 3001 dummy_gen = nd->nl_nch.ncp->nc_generation; 3002 cache_setunresolved(&nd->nl_nch); 3003 error = cache_resolve(&nd->nl_nch, &dummy_gen, nd->nl_cred); 3004 if (error == 0) 3005 goto again; 3006 } else { 3007 #if 1 3008 vrele(vp); 3009 #else 3010 vput(vp); 3011 #endif 3012 } 3013 return (error); 3014 } 3015 3016 /* 3017 * stat_args(char *path, struct stat *ub) 3018 * 3019 * Get file status; this version follows links. 3020 */ 3021 int 3022 sys_stat(struct sysmsg *sysmsg, const struct stat_args *uap) 3023 { 3024 struct nlookupdata nd; 3025 struct stat st; 3026 int error; 3027 3028 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3029 if (error == 0) { 3030 error = kern_stat(&nd, &st); 3031 if (error == 0) 3032 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3033 } 3034 nlookup_done(&nd); 3035 return (error); 3036 } 3037 3038 /* 3039 * lstat_args(char *path, struct stat *ub) 3040 * 3041 * Get file status; this version does not follow links. 3042 */ 3043 int 3044 sys_lstat(struct sysmsg *sysmsg, const struct lstat_args *uap) 3045 { 3046 struct nlookupdata nd; 3047 struct stat st; 3048 int error; 3049 3050 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3051 if (error == 0) { 3052 error = kern_stat(&nd, &st); 3053 if (error == 0) 3054 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3055 } 3056 nlookup_done(&nd); 3057 return (error); 3058 } 3059 3060 /* 3061 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 3062 * 3063 * Get status of file pointed to by fd/path. 3064 */ 3065 int 3066 sys_fstatat(struct sysmsg *sysmsg, const struct fstatat_args *uap) 3067 { 3068 struct nlookupdata nd; 3069 struct stat st; 3070 int error; 3071 int flags; 3072 struct file *fp; 3073 3074 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3075 return (EINVAL); 3076 3077 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3078 3079 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3080 UIO_USERSPACE, flags); 3081 if (error == 0) { 3082 error = kern_stat(&nd, &st); 3083 if (error == 0) 3084 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 3085 } 3086 nlookup_done_at(&nd, fp); 3087 return (error); 3088 } 3089 3090 static int 3091 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 3092 { 3093 struct nlookupdata nd; 3094 struct vnode *vp; 3095 int error; 3096 3097 vp = NULL; 3098 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 3099 if (error == 0) 3100 error = nlookup(&nd); 3101 if (error == 0) 3102 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3103 nlookup_done(&nd); 3104 if (error == 0) { 3105 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3106 vput(vp); 3107 } 3108 return (error); 3109 } 3110 3111 /* 3112 * pathconf_Args(char *path, int name) 3113 * 3114 * Get configurable pathname variables. 3115 */ 3116 int 3117 sys_pathconf(struct sysmsg *sysmsg, const struct pathconf_args *uap) 3118 { 3119 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3120 &sysmsg->sysmsg_reg)); 3121 } 3122 3123 /* 3124 * lpathconf_Args(char *path, int name) 3125 * 3126 * Get configurable pathname variables, but don't follow symlinks. 3127 */ 3128 int 3129 sys_lpathconf(struct sysmsg *sysmsg, const struct lpathconf_args *uap) 3130 { 3131 return (kern_pathconf(uap->path, uap->name, 0, &sysmsg->sysmsg_reg)); 3132 } 3133 3134 /* 3135 * XXX: daver 3136 * kern_readlink isn't properly split yet. There is a copyin burried 3137 * in VOP_READLINK(). 3138 */ 3139 int 3140 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3141 { 3142 struct thread *td = curthread; 3143 struct vnode *vp; 3144 struct iovec aiov; 3145 struct uio auio; 3146 int error; 3147 3148 nd->nl_flags |= NLC_SHAREDLOCK; 3149 if ((error = nlookup(nd)) != 0) 3150 return (error); 3151 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3152 if (error) 3153 return (error); 3154 if (vp->v_type != VLNK) { 3155 error = EINVAL; 3156 } else { 3157 aiov.iov_base = buf; 3158 aiov.iov_len = count; 3159 auio.uio_iov = &aiov; 3160 auio.uio_iovcnt = 1; 3161 auio.uio_offset = 0; 3162 auio.uio_rw = UIO_READ; 3163 auio.uio_segflg = UIO_USERSPACE; 3164 auio.uio_td = td; 3165 auio.uio_resid = count; 3166 error = VOP_READLINK(vp, &auio, td->td_ucred); 3167 } 3168 vput(vp); 3169 *res = count - auio.uio_resid; 3170 return (error); 3171 } 3172 3173 /* 3174 * readlink_args(char *path, char *buf, int count) 3175 * 3176 * Return target name of a symbolic link. 3177 */ 3178 int 3179 sys_readlink(struct sysmsg *sysmsg, const struct readlink_args *uap) 3180 { 3181 struct nlookupdata nd; 3182 int error; 3183 3184 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3185 if (error == 0) { 3186 error = kern_readlink(&nd, uap->buf, uap->count, 3187 &sysmsg->sysmsg_result); 3188 } 3189 nlookup_done(&nd); 3190 return (error); 3191 } 3192 3193 /* 3194 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3195 * 3196 * Return target name of a symbolic link. The path is relative to the 3197 * directory associated with fd. 3198 */ 3199 int 3200 sys_readlinkat(struct sysmsg *sysmsg, const struct readlinkat_args *uap) 3201 { 3202 struct nlookupdata nd; 3203 struct file *fp; 3204 int error; 3205 3206 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3207 if (error == 0) { 3208 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3209 &sysmsg->sysmsg_result); 3210 } 3211 nlookup_done_at(&nd, fp); 3212 return (error); 3213 } 3214 3215 static int 3216 setfflags(struct vnode *vp, u_long flags) 3217 { 3218 struct thread *td = curthread; 3219 int error; 3220 struct vattr vattr; 3221 3222 /* 3223 * Prevent non-root users from setting flags on devices. When 3224 * a device is reused, users can retain ownership of the device 3225 * if they are allowed to set flags and programs assume that 3226 * chown can't fail when done as root. 3227 */ 3228 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3229 ((error = 3230 caps_priv_check(td->td_ucred, SYSCAP_NOVFS_CHFLAGS_DEV)) != 0)) 3231 { 3232 return (error); 3233 } 3234 3235 /* 3236 * note: vget is required for any operation that might mod the vnode 3237 * so VINACTIVE is properly cleared. 3238 */ 3239 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3240 VATTR_NULL(&vattr); 3241 vattr.va_flags = flags; 3242 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3243 vput(vp); 3244 } 3245 return (error); 3246 } 3247 3248 /* 3249 * chflags(const char *path, u_long flags) 3250 * 3251 * Change flags of a file given a path name. 3252 */ 3253 int 3254 sys_chflags(struct sysmsg *sysmsg, const struct chflags_args *uap) 3255 { 3256 struct nlookupdata nd; 3257 struct vnode *vp; 3258 int error; 3259 3260 vp = NULL; 3261 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3262 if (error == 0) 3263 error = nlookup(&nd); 3264 if (error == 0) 3265 error = ncp_writechk(&nd.nl_nch); 3266 if (error == 0) 3267 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3268 nlookup_done(&nd); 3269 if (error == 0) { 3270 error = setfflags(vp, uap->flags); 3271 vrele(vp); 3272 } 3273 return (error); 3274 } 3275 3276 /* 3277 * lchflags(const char *path, u_long flags) 3278 * 3279 * Change flags of a file given a path name, but don't follow symlinks. 3280 */ 3281 int 3282 sys_lchflags(struct sysmsg *sysmsg, const struct lchflags_args *uap) 3283 { 3284 struct nlookupdata nd; 3285 struct vnode *vp; 3286 int error; 3287 3288 vp = NULL; 3289 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3290 if (error == 0) 3291 error = nlookup(&nd); 3292 if (error == 0) 3293 error = ncp_writechk(&nd.nl_nch); 3294 if (error == 0) 3295 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3296 nlookup_done(&nd); 3297 if (error == 0) { 3298 error = setfflags(vp, uap->flags); 3299 vrele(vp); 3300 } 3301 return (error); 3302 } 3303 3304 /* 3305 * fchflags_args(int fd, u_flags flags) 3306 * 3307 * Change flags of a file given a file descriptor. 3308 */ 3309 int 3310 sys_fchflags(struct sysmsg *sysmsg, const struct fchflags_args *uap) 3311 { 3312 struct thread *td = curthread; 3313 struct file *fp; 3314 int error; 3315 3316 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3317 return (error); 3318 if (fp->f_nchandle.ncp) 3319 error = ncp_writechk(&fp->f_nchandle); 3320 if (error == 0) 3321 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3322 fdrop(fp); 3323 return (error); 3324 } 3325 3326 /* 3327 * chflagsat_args(int fd, const char *path, u_long flags, int atflags) 3328 * change flags given a pathname relative to a filedescriptor 3329 */ 3330 int 3331 sys_chflagsat(struct sysmsg *sysmsg, const struct chflagsat_args *uap) 3332 { 3333 struct nlookupdata nd; 3334 struct vnode *vp; 3335 struct file *fp; 3336 int error; 3337 int lookupflags; 3338 3339 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3340 return (EINVAL); 3341 3342 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3343 3344 vp = NULL; 3345 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3346 if (error == 0) 3347 error = nlookup(&nd); 3348 if (error == 0) 3349 error = ncp_writechk(&nd.nl_nch); 3350 if (error == 0) 3351 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3352 nlookup_done_at(&nd, fp); 3353 if (error == 0) { 3354 error = setfflags(vp, uap->flags); 3355 vrele(vp); 3356 } 3357 return (error); 3358 } 3359 3360 3361 static int 3362 setfmode(struct vnode *vp, int mode) 3363 { 3364 struct thread *td = curthread; 3365 int error; 3366 struct vattr vattr; 3367 3368 /* 3369 * note: vget is required for any operation that might mod the vnode 3370 * so VINACTIVE is properly cleared. 3371 */ 3372 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3373 VATTR_NULL(&vattr); 3374 vattr.va_mode = mode & ALLPERMS; 3375 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3376 cache_inval_wxok(vp); 3377 vput(vp); 3378 } 3379 return error; 3380 } 3381 3382 int 3383 kern_chmod(struct nlookupdata *nd, int mode) 3384 { 3385 struct vnode *vp; 3386 int error; 3387 3388 if ((error = nlookup(nd)) != 0) 3389 return (error); 3390 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3391 return (error); 3392 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3393 error = setfmode(vp, mode); 3394 vrele(vp); 3395 return (error); 3396 } 3397 3398 /* 3399 * chmod_args(char *path, int mode) 3400 * 3401 * Change mode of a file given path name. 3402 */ 3403 int 3404 sys_chmod(struct sysmsg *sysmsg, const struct chmod_args *uap) 3405 { 3406 struct nlookupdata nd; 3407 int error; 3408 3409 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3410 if (error == 0) 3411 error = kern_chmod(&nd, uap->mode); 3412 nlookup_done(&nd); 3413 return (error); 3414 } 3415 3416 /* 3417 * lchmod_args(char *path, int mode) 3418 * 3419 * Change mode of a file given path name (don't follow links.) 3420 */ 3421 int 3422 sys_lchmod(struct sysmsg *sysmsg, const struct lchmod_args *uap) 3423 { 3424 struct nlookupdata nd; 3425 int error; 3426 3427 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3428 if (error == 0) 3429 error = kern_chmod(&nd, uap->mode); 3430 nlookup_done(&nd); 3431 return (error); 3432 } 3433 3434 /* 3435 * fchmod_args(int fd, int mode) 3436 * 3437 * Change mode of a file given a file descriptor. 3438 */ 3439 int 3440 sys_fchmod(struct sysmsg *sysmsg, const struct fchmod_args *uap) 3441 { 3442 struct thread *td = curthread; 3443 struct file *fp; 3444 int error; 3445 3446 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3447 return (error); 3448 if (fp->f_nchandle.ncp) 3449 error = ncp_writechk(&fp->f_nchandle); 3450 if (error == 0) 3451 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3452 fdrop(fp); 3453 return (error); 3454 } 3455 3456 /* 3457 * fchmodat_args(char *path, int mode) 3458 * 3459 * Change mode of a file pointed to by fd/path. 3460 */ 3461 int 3462 sys_fchmodat(struct sysmsg *sysmsg, const struct fchmodat_args *uap) 3463 { 3464 struct nlookupdata nd; 3465 struct file *fp; 3466 int error; 3467 int flags; 3468 3469 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3470 return (EINVAL); 3471 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3472 3473 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3474 UIO_USERSPACE, flags); 3475 if (error == 0) 3476 error = kern_chmod(&nd, uap->mode); 3477 nlookup_done_at(&nd, fp); 3478 return (error); 3479 } 3480 3481 static int 3482 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3483 { 3484 struct thread *td = curthread; 3485 int error; 3486 struct vattr vattr; 3487 uid_t o_uid; 3488 gid_t o_gid; 3489 uint64_t size; 3490 3491 /* 3492 * note: vget is required for any operation that might mod the vnode 3493 * so VINACTIVE is properly cleared. 3494 */ 3495 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3496 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3497 return error; 3498 o_uid = vattr.va_uid; 3499 o_gid = vattr.va_gid; 3500 size = vattr.va_size; 3501 3502 VATTR_NULL(&vattr); 3503 vattr.va_uid = uid; 3504 vattr.va_gid = gid; 3505 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3506 vput(vp); 3507 } 3508 3509 if (error == 0) { 3510 if (uid == -1) 3511 uid = o_uid; 3512 if (gid == -1) 3513 gid = o_gid; 3514 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3515 VFS_ACCOUNT(mp, uid, gid, size); 3516 } 3517 3518 return error; 3519 } 3520 3521 int 3522 kern_chown(struct nlookupdata *nd, int uid, int gid) 3523 { 3524 struct vnode *vp; 3525 int error; 3526 3527 if ((error = nlookup(nd)) != 0) 3528 return (error); 3529 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3530 return (error); 3531 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3532 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3533 vrele(vp); 3534 return (error); 3535 } 3536 3537 /* 3538 * chown(char *path, int uid, int gid) 3539 * 3540 * Set ownership given a path name. 3541 */ 3542 int 3543 sys_chown(struct sysmsg *sysmsg, const struct chown_args *uap) 3544 { 3545 struct nlookupdata nd; 3546 int error; 3547 3548 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3549 if (error == 0) 3550 error = kern_chown(&nd, uap->uid, uap->gid); 3551 nlookup_done(&nd); 3552 return (error); 3553 } 3554 3555 /* 3556 * lchown_args(char *path, int uid, int gid) 3557 * 3558 * Set ownership given a path name, do not cross symlinks. 3559 */ 3560 int 3561 sys_lchown(struct sysmsg *sysmsg, const struct lchown_args *uap) 3562 { 3563 struct nlookupdata nd; 3564 int error; 3565 3566 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3567 if (error == 0) 3568 error = kern_chown(&nd, uap->uid, uap->gid); 3569 nlookup_done(&nd); 3570 return (error); 3571 } 3572 3573 /* 3574 * fchown_args(int fd, int uid, int gid) 3575 * 3576 * Set ownership given a file descriptor. 3577 */ 3578 int 3579 sys_fchown(struct sysmsg *sysmsg, const struct fchown_args *uap) 3580 { 3581 struct thread *td = curthread; 3582 struct proc *p = td->td_proc; 3583 struct file *fp; 3584 int error; 3585 3586 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3587 return (error); 3588 if (fp->f_nchandle.ncp) 3589 error = ncp_writechk(&fp->f_nchandle); 3590 if (error == 0) 3591 error = setfown(p->p_fd->fd_ncdir.mount, 3592 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3593 fdrop(fp); 3594 return (error); 3595 } 3596 3597 /* 3598 * fchownat(int fd, char *path, int uid, int gid, int flags) 3599 * 3600 * Set ownership of file pointed to by fd/path. 3601 */ 3602 int 3603 sys_fchownat(struct sysmsg *sysmsg, const struct fchownat_args *uap) 3604 { 3605 struct nlookupdata nd; 3606 struct file *fp; 3607 int error; 3608 int flags; 3609 3610 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3611 return (EINVAL); 3612 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3613 3614 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3615 UIO_USERSPACE, flags); 3616 if (error == 0) 3617 error = kern_chown(&nd, uap->uid, uap->gid); 3618 nlookup_done_at(&nd, fp); 3619 return (error); 3620 } 3621 3622 3623 static int 3624 getutimes(struct timeval *tvp, struct timespec *tsp) 3625 { 3626 struct timeval tv[2]; 3627 int error; 3628 3629 if (tvp == NULL) { 3630 microtime(&tv[0]); 3631 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3632 tsp[1] = tsp[0]; 3633 } else { 3634 if ((error = itimerfix(tvp)) != 0) 3635 return (error); 3636 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3637 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3638 } 3639 return 0; 3640 } 3641 3642 static int 3643 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3644 { 3645 struct timespec tsnow; 3646 int error; 3647 3648 *nullflag = 0; 3649 nanotime(&tsnow); 3650 if (ts == NULL) { 3651 newts[0] = tsnow; 3652 newts[1] = tsnow; 3653 *nullflag = 1; 3654 return (0); 3655 } 3656 3657 newts[0] = ts[0]; 3658 newts[1] = ts[1]; 3659 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3660 return (0); 3661 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3662 *nullflag = 1; 3663 3664 if (newts[0].tv_nsec == UTIME_OMIT) 3665 newts[0].tv_sec = VNOVAL; 3666 else if (newts[0].tv_nsec == UTIME_NOW) 3667 newts[0] = tsnow; 3668 else if ((error = itimespecfix(&newts[0])) != 0) 3669 return (error); 3670 3671 if (newts[1].tv_nsec == UTIME_OMIT) 3672 newts[1].tv_sec = VNOVAL; 3673 else if (newts[1].tv_nsec == UTIME_NOW) 3674 newts[1] = tsnow; 3675 else if ((error = itimespecfix(&newts[1])) != 0) 3676 return (error); 3677 3678 return (0); 3679 } 3680 3681 static int 3682 setutimes(struct vnode *vp, struct vattr *vattr, 3683 const struct timespec *ts, int nullflag) 3684 { 3685 struct thread *td = curthread; 3686 int error; 3687 3688 VATTR_NULL(vattr); 3689 vattr->va_atime = ts[0]; 3690 vattr->va_mtime = ts[1]; 3691 if (nullflag) 3692 vattr->va_vaflags |= VA_UTIMES_NULL; 3693 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3694 3695 return error; 3696 } 3697 3698 int 3699 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3700 { 3701 struct timespec ts[2]; 3702 int error; 3703 3704 if (tptr) { 3705 if ((error = getutimes(tptr, ts)) != 0) 3706 return (error); 3707 } 3708 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3709 return (error); 3710 } 3711 3712 /* 3713 * utimes_args(char *path, struct timeval *tptr) 3714 * 3715 * Set the access and modification times of a file. 3716 */ 3717 int 3718 sys_utimes(struct sysmsg *sysmsg, const struct utimes_args *uap) 3719 { 3720 struct timeval tv[2]; 3721 struct nlookupdata nd; 3722 int error; 3723 3724 if (uap->tptr) { 3725 error = copyin(uap->tptr, tv, sizeof(tv)); 3726 if (error) 3727 return (error); 3728 } 3729 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3730 if (error == 0) 3731 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3732 nlookup_done(&nd); 3733 return (error); 3734 } 3735 3736 /* 3737 * lutimes_args(char *path, struct timeval *tptr) 3738 * 3739 * Set the access and modification times of a file. 3740 */ 3741 int 3742 sys_lutimes(struct sysmsg *sysmsg, const struct lutimes_args *uap) 3743 { 3744 struct timeval tv[2]; 3745 struct nlookupdata nd; 3746 int error; 3747 3748 if (uap->tptr) { 3749 error = copyin(uap->tptr, tv, sizeof(tv)); 3750 if (error) 3751 return (error); 3752 } 3753 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3754 if (error == 0) 3755 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3756 nlookup_done(&nd); 3757 return (error); 3758 } 3759 3760 /* 3761 * Set utimes on a file descriptor. The creds used to open the 3762 * file are used to determine whether the operation is allowed 3763 * or not. 3764 */ 3765 int 3766 kern_futimens(int fd, struct timespec *ts) 3767 { 3768 struct thread *td = curthread; 3769 struct timespec newts[2]; 3770 struct file *fp; 3771 struct vnode *vp; 3772 struct vattr vattr; 3773 struct vattr_lite lva; 3774 int nullflag; 3775 int error; 3776 3777 error = getutimens(ts, newts, &nullflag); 3778 if (error) 3779 return (error); 3780 if ((error = holdvnode(td, fd, &fp)) != 0) 3781 return (error); 3782 if (fp->f_nchandle.ncp) 3783 error = ncp_writechk(&fp->f_nchandle); 3784 if (error == 0) { 3785 vp = fp->f_data; 3786 error = vget(vp, LK_EXCLUSIVE); 3787 if (error == 0) { 3788 error = VOP_GETATTR_FP(vp, &vattr, fp); 3789 if (error == 0) { 3790 lva.va_type = vattr.va_type; 3791 lva.va_nlink = vattr.va_nlink; 3792 lva.va_mode = vattr.va_mode; 3793 lva.va_uid = vattr.va_uid; 3794 lva.va_gid = vattr.va_gid; 3795 lva.va_size = vattr.va_size; 3796 lva.va_flags = vattr.va_flags; 3797 3798 error = naccess_lva(&lva, NLC_OWN | NLC_WRITE, 3799 fp->f_cred); 3800 } 3801 if (error == 0) { 3802 error = setutimes(vp, &vattr, newts, nullflag); 3803 } 3804 vput(vp); 3805 } 3806 } 3807 fdrop(fp); 3808 return (error); 3809 } 3810 3811 /* 3812 * futimens_args(int fd, struct timespec *ts) 3813 * 3814 * Set the access and modification times of a file. 3815 */ 3816 int 3817 sys_futimens(struct sysmsg *sysmsg, const struct futimens_args *uap) 3818 { 3819 struct timespec ts[2]; 3820 int error; 3821 3822 if (uap->ts) { 3823 error = copyin(uap->ts, ts, sizeof(ts)); 3824 if (error) 3825 return (error); 3826 } 3827 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3828 return (error); 3829 } 3830 3831 int 3832 kern_futimes(int fd, struct timeval *tptr) 3833 { 3834 struct timespec ts[2]; 3835 int error; 3836 3837 if (tptr) { 3838 if ((error = getutimes(tptr, ts)) != 0) 3839 return (error); 3840 } 3841 error = kern_futimens(fd, tptr ? ts : NULL); 3842 return (error); 3843 } 3844 3845 /* 3846 * futimes_args(int fd, struct timeval *tptr) 3847 * 3848 * Set the access and modification times of a file. 3849 */ 3850 int 3851 sys_futimes(struct sysmsg *sysmsg, const struct futimes_args *uap) 3852 { 3853 struct timeval tv[2]; 3854 int error; 3855 3856 if (uap->tptr) { 3857 error = copyin(uap->tptr, tv, sizeof(tv)); 3858 if (error) 3859 return (error); 3860 } 3861 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3862 return (error); 3863 } 3864 3865 int 3866 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3867 { 3868 struct timespec newts[2]; 3869 struct vnode *vp; 3870 struct vattr vattr; 3871 int nullflag; 3872 int error; 3873 3874 if (flags & ~AT_SYMLINK_NOFOLLOW) 3875 return (EINVAL); 3876 3877 error = getutimens(ts, newts, &nullflag); 3878 if (error) 3879 return (error); 3880 3881 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3882 if ((error = nlookup(nd)) != 0) 3883 return (error); 3884 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3885 return (error); 3886 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3887 return (error); 3888 if ((error = vn_writechk(vp)) == 0) { 3889 error = vget(vp, LK_EXCLUSIVE); 3890 if (error == 0) { 3891 error = setutimes(vp, &vattr, newts, nullflag); 3892 vput(vp); 3893 } 3894 } 3895 vrele(vp); 3896 return (error); 3897 } 3898 3899 /* 3900 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3901 * 3902 * Set file access and modification times of a file. 3903 */ 3904 int 3905 sys_utimensat(struct sysmsg *sysmsg, const struct utimensat_args *uap) 3906 { 3907 struct timespec ts[2]; 3908 struct nlookupdata nd; 3909 struct file *fp; 3910 int error; 3911 int flags; 3912 3913 if (uap->ts) { 3914 error = copyin(uap->ts, ts, sizeof(ts)); 3915 if (error) 3916 return (error); 3917 } 3918 3919 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3920 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3921 UIO_USERSPACE, flags); 3922 if (error == 0) 3923 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3924 nlookup_done_at(&nd, fp); 3925 return (error); 3926 } 3927 3928 int 3929 kern_truncate(struct nlookupdata *nd, off_t length) 3930 { 3931 struct vnode *vp; 3932 struct vattr vattr; 3933 int error; 3934 uid_t uid = 0; 3935 gid_t gid = 0; 3936 uint64_t old_size = 0; 3937 3938 if (length < 0) 3939 return(EINVAL); 3940 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3941 if ((error = nlookup(nd)) != 0) 3942 return (error); 3943 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3944 return (error); 3945 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3946 return (error); 3947 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3948 if (error) { 3949 vrele(vp); 3950 return (error); 3951 } 3952 if (vp->v_type == VDIR) { 3953 error = EISDIR; 3954 goto done; 3955 } 3956 if (vfs_quota_enabled) { 3957 error = VOP_GETATTR(vp, &vattr); 3958 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3959 uid = vattr.va_uid; 3960 gid = vattr.va_gid; 3961 old_size = vattr.va_size; 3962 } 3963 3964 if ((error = vn_writechk(vp)) == 0) { 3965 VATTR_NULL(&vattr); 3966 vattr.va_size = length; 3967 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3968 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3969 } 3970 done: 3971 vput(vp); 3972 return (error); 3973 } 3974 3975 /* 3976 * truncate(char *path, int pad, off_t length) 3977 * 3978 * Truncate a file given its path name. 3979 */ 3980 int 3981 sys_truncate(struct sysmsg *sysmsg, const struct truncate_args *uap) 3982 { 3983 struct nlookupdata nd; 3984 int error; 3985 3986 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3987 if (error == 0) 3988 error = kern_truncate(&nd, uap->length); 3989 nlookup_done(&nd); 3990 return error; 3991 } 3992 3993 int 3994 kern_ftruncate(int fd, off_t length) 3995 { 3996 struct thread *td = curthread; 3997 struct vattr vattr; 3998 struct vnode *vp; 3999 struct file *fp; 4000 int error; 4001 uid_t uid = 0; 4002 gid_t gid = 0; 4003 uint64_t old_size = 0; 4004 struct mount *mp; 4005 4006 if (length < 0) 4007 return(EINVAL); 4008 if ((error = holdvnode(td, fd, &fp)) != 0) 4009 return (error); 4010 if (fp->f_nchandle.ncp) { 4011 error = ncp_writechk(&fp->f_nchandle); 4012 if (error) 4013 goto done; 4014 } 4015 if ((fp->f_flag & FWRITE) == 0) { 4016 error = EINVAL; 4017 goto done; 4018 } 4019 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 4020 error = EINVAL; 4021 goto done; 4022 } 4023 vp = (struct vnode *)fp->f_data; 4024 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4025 if (vp->v_type == VDIR) { 4026 error = EISDIR; 4027 vn_unlock(vp); 4028 goto done; 4029 } 4030 4031 if (vfs_quota_enabled) { 4032 error = VOP_GETATTR_FP(vp, &vattr, fp); 4033 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 4034 uid = vattr.va_uid; 4035 gid = vattr.va_gid; 4036 old_size = vattr.va_size; 4037 } 4038 4039 if ((error = vn_writechk(vp)) == 0) { 4040 VATTR_NULL(&vattr); 4041 vattr.va_size = length; 4042 error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp); 4043 mp = vq_vptomp(vp); 4044 VFS_ACCOUNT(mp, uid, gid, length - old_size); 4045 } 4046 vn_unlock(vp); 4047 done: 4048 fdrop(fp); 4049 return (error); 4050 } 4051 4052 /* 4053 * ftruncate_args(int fd, int pad, off_t length) 4054 * 4055 * Truncate a file given a file descriptor. 4056 */ 4057 int 4058 sys_ftruncate(struct sysmsg *sysmsg, const struct ftruncate_args *uap) 4059 { 4060 int error; 4061 4062 error = kern_ftruncate(uap->fd, uap->length); 4063 4064 return (error); 4065 } 4066 4067 int 4068 kern_fsync(int fd, bool fullsync) 4069 { 4070 struct thread *td = curthread; 4071 struct vnode *vp; 4072 struct file *fp; 4073 vm_object_t obj; 4074 int error; 4075 4076 if ((error = holdvnode(td, fd, &fp)) != 0) 4077 return (error); 4078 vp = (struct vnode *)fp->f_data; 4079 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4080 if ((obj = vp->v_object) != NULL) { 4081 if (vp->v_mount == NULL || 4082 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 4083 vm_object_page_clean(obj, 0, 0, 0); 4084 } 4085 } 4086 error = fullsync ? 4087 VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp) : 4088 VOP_FDATASYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp); 4089 if (error == 0 && vp->v_mount) 4090 error = buf_fsync(vp); 4091 vn_unlock(vp); 4092 fdrop(fp); 4093 4094 return (error); 4095 } 4096 4097 /* 4098 * fsync(int fd) 4099 * 4100 * Sync an open file. 4101 */ 4102 int 4103 sys_fsync(struct sysmsg *sysmsg, const struct fsync_args *uap) 4104 { 4105 return (kern_fsync(uap->fd, true)); 4106 } 4107 4108 /* 4109 * fdatasync(int fd) 4110 * 4111 * Data-sync an open file. 4112 */ 4113 int 4114 sys_fdatasync(struct sysmsg *sysmsg, const struct fdatasync_args *uap) 4115 { 4116 return (kern_fsync(uap->fd, false)); 4117 } 4118 4119 /* 4120 * rename op. 4121 * 4122 * NOTE: error == 0 and nl_dvp is NULL indicates a mount point, operation 4123 * disallowed. e.g. /var/cache where /var/cache is a null-mount, for 4124 * example. 4125 */ 4126 int 4127 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 4128 { 4129 struct nchandle fnchd; 4130 struct nchandle tnchd; 4131 struct namecache *ncp; 4132 struct vnode *fdvp; 4133 struct vnode *tdvp; 4134 struct mount *mp; 4135 struct mount *userenlk; 4136 int error; 4137 u_int fncp_gen; 4138 u_int tncp_gen; 4139 4140 bwillinode(1); 4141 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4142 if ((error = nlookup(fromnd)) != 0) 4143 return (error); 4144 4145 /* 4146 * Attempt to rename a mount point (from or to) 4147 */ 4148 if (error == 0 && fromnd->nl_dvp == NULL) 4149 return (EINVAL); 4150 4151 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4152 return (ENOENT); 4153 fnchd.mount = fromnd->nl_nch.mount; 4154 cache_hold(&fnchd); 4155 4156 /* 4157 * unlock the source nch so we can lookup the target nch without 4158 * deadlocking. The target may or may not exist so we do not check 4159 * for a target vp like kern_mkdir() and other creation functions do. 4160 * 4161 * The source and target directories are ref'd and rechecked after 4162 * everything is relocked to determine if the source or target file 4163 * has been renamed. 4164 */ 4165 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4166 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4167 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4168 4169 if (fromnd->nl_nch.ncp->nc_vp && 4170 fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4171 userenlk = fnchd.mount; 4172 cache_unlock(&fromnd->nl_nch); 4173 lockmgr(&userenlk->mnt_renlock, LK_EXCLUSIVE); 4174 } else { 4175 userenlk = NULL; 4176 cache_unlock(&fromnd->nl_nch); 4177 } 4178 4179 /* 4180 * Lookup target 4181 */ 4182 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4183 if ((error = nlookup(tond)) != 0) { 4184 cache_drop(&fnchd); 4185 goto done; 4186 } 4187 tncp_gen = tond->nl_nch.ncp->nc_generation; 4188 4189 /* 4190 * Attempt to rename a mount point (from or to) 4191 */ 4192 if (error == 0 && tond->nl_dvp == NULL) { 4193 cache_drop(&fnchd); 4194 error = ENOENT; 4195 goto done; 4196 } 4197 4198 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4199 cache_drop(&fnchd); 4200 error = ENOENT; 4201 goto done; 4202 } 4203 tnchd.mount = tond->nl_nch.mount; 4204 cache_hold(&tnchd); 4205 4206 /* 4207 * If the source and target are the same there is nothing to do 4208 */ 4209 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4210 cache_drop(&fnchd); 4211 cache_drop(&tnchd); 4212 error = 0; 4213 goto done; 4214 } 4215 4216 /* 4217 * Mount points cannot be renamed or overwritten 4218 */ 4219 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4220 NCF_ISMOUNTPT 4221 ) { 4222 cache_drop(&fnchd); 4223 cache_drop(&tnchd); 4224 error = EINVAL; 4225 goto done; 4226 } 4227 4228 /* 4229 * Lock all four namecache entries. tond is already locked. 4230 */ 4231 cache_lock4_tondlocked(&fnchd, &fromnd->nl_nch, 4232 &tnchd, &tond->nl_nch, 4233 fromnd->nl_cred, tond->nl_cred); 4234 fromnd->nl_flags |= NLC_NCPISLOCKED; 4235 4236 /* 4237 * If the namecache generation changed for either fromnd or tond, 4238 * we must retry. 4239 */ 4240 if (((fromnd->nl_nch.ncp->nc_generation - fncp_gen) & ~1) || 4241 ((tond->nl_nch.ncp->nc_generation - tncp_gen) & ~1)) 4242 { 4243 krateprintf(&krate_rename, 4244 "kern_rename: retry due to race on: " 4245 "\"%s\" -> \"%s\" (%d,%d)\n", 4246 fromnd->nl_nch.ncp->nc_name, 4247 tond->nl_nch.ncp->nc_name, 4248 fromnd->nl_nch.ncp->nc_generation - fncp_gen, 4249 tond->nl_nch.ncp->nc_generation - tncp_gen); 4250 error = EAGAIN; 4251 goto finish; 4252 } 4253 4254 /* 4255 * If either fromnd or tond are marked destroyed a ripout occured 4256 * out from under us and we must retry. 4257 */ 4258 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4259 fromnd->nl_nch.ncp->nc_vp == NULL || 4260 (tond->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED))) { 4261 krateprintf(&krate_rename, 4262 "kern_rename: retry due to ripout on: " 4263 "\"%s\" -> \"%s\"\n", 4264 fromnd->nl_nch.ncp->nc_name, 4265 tond->nl_nch.ncp->nc_name); 4266 error = EAGAIN; 4267 goto finish; 4268 } 4269 4270 /* 4271 * Make sure the parent directories linkages are the same. We have 4272 * already checked that fromnd and tond are not mount points so this 4273 * should not loop forever on a cross-mount. 4274 */ 4275 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4276 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4277 error = EAGAIN; 4278 goto finish; 4279 } 4280 4281 /* 4282 * Both the source and target must be within the same filesystem and 4283 * in the same filesystem as their parent directories within the 4284 * namecache topology. 4285 * 4286 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4287 */ 4288 mp = fnchd.mount; 4289 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4290 mp != tond->nl_nch.mount) { 4291 error = EXDEV; 4292 goto finish; 4293 } 4294 4295 /* 4296 * Make sure the mount point is writable 4297 */ 4298 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4299 goto finish; 4300 } 4301 4302 /* 4303 * If the target exists and either the source or target is a directory, 4304 * then both must be directories. 4305 * 4306 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4307 * have become NULL. 4308 */ 4309 if (tond->nl_nch.ncp->nc_vp) { 4310 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4311 error = ENOENT; 4312 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4313 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4314 error = ENOTDIR; 4315 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4316 error = EISDIR; 4317 } 4318 } 4319 4320 /* 4321 * You cannot rename a source into itself or a subdirectory of itself. 4322 * We check this by travsersing the target directory upwards looking 4323 * for a match against the source. 4324 * 4325 * Only required when renaming a directory, in which case userenlk is 4326 * non-NULL. 4327 */ 4328 if (__predict_false(userenlk && error == 0)) { 4329 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4330 if (fromnd->nl_nch.ncp == ncp) { 4331 error = EINVAL; 4332 break; 4333 } 4334 } 4335 } 4336 4337 /* 4338 * Even though the namespaces are different, they may still represent 4339 * hardlinks to the same file. The filesystem might have a hard time 4340 * with this so we issue a NREMOVE of the source instead of a NRENAME 4341 * when we detect the situation. 4342 */ 4343 if (error == 0) { 4344 fdvp = fromnd->nl_dvp; 4345 tdvp = tond->nl_dvp; 4346 if (fdvp == NULL || tdvp == NULL) { 4347 error = EPERM; 4348 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4349 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4350 fromnd->nl_cred); 4351 } else { 4352 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4353 fdvp, tdvp, tond->nl_cred); 4354 } 4355 } 4356 finish: 4357 cache_put(&tnchd); 4358 cache_put(&fnchd); 4359 done: 4360 if (userenlk) 4361 lockmgr(&userenlk->mnt_renlock, LK_RELEASE); 4362 return (error); 4363 } 4364 4365 /* 4366 * rename_args(char *from, char *to) 4367 * 4368 * Rename files. Source and destination must either both be directories, 4369 * or both not be directories. If target is a directory, it must be empty. 4370 */ 4371 int 4372 sys_rename(struct sysmsg *sysmsg, const struct rename_args *uap) 4373 { 4374 struct nlookupdata fromnd, tond; 4375 int error; 4376 4377 do { 4378 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4379 if (error == 0) { 4380 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4381 if (error == 0) 4382 error = kern_rename(&fromnd, &tond); 4383 nlookup_done(&tond); 4384 } 4385 nlookup_done(&fromnd); 4386 } while (error == EAGAIN); 4387 return (error); 4388 } 4389 4390 /* 4391 * renameat_args(int oldfd, char *old, int newfd, char *new) 4392 * 4393 * Rename files using paths relative to the directories associated with 4394 * oldfd and newfd. Source and destination must either both be directories, 4395 * or both not be directories. If target is a directory, it must be empty. 4396 */ 4397 int 4398 sys_renameat(struct sysmsg *sysmsg, const struct renameat_args *uap) 4399 { 4400 struct nlookupdata oldnd, newnd; 4401 struct file *oldfp, *newfp; 4402 int error; 4403 4404 do { 4405 error = nlookup_init_at(&oldnd, &oldfp, 4406 uap->oldfd, uap->old, 4407 UIO_USERSPACE, 0); 4408 if (error == 0) { 4409 error = nlookup_init_at(&newnd, &newfp, 4410 uap->newfd, uap->new, 4411 UIO_USERSPACE, 0); 4412 if (error == 0) 4413 error = kern_rename(&oldnd, &newnd); 4414 nlookup_done_at(&newnd, newfp); 4415 } 4416 nlookup_done_at(&oldnd, oldfp); 4417 } while (error == EAGAIN); 4418 return (error); 4419 } 4420 4421 int 4422 kern_mkdir(struct nlookupdata *nd, int mode) 4423 { 4424 struct thread *td = curthread; 4425 struct proc *p = td->td_proc; 4426 struct vnode *vp; 4427 struct vattr vattr; 4428 int error; 4429 4430 bwillinode(1); 4431 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4432 if ((error = nlookup(nd)) != 0) 4433 return (error); 4434 4435 if (nd->nl_nch.ncp->nc_vp) 4436 return (EEXIST); 4437 if (nd->nl_dvp == NULL) 4438 return (EINVAL); 4439 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4440 return (error); 4441 VATTR_NULL(&vattr); 4442 vattr.va_type = VDIR; 4443 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4444 4445 vp = NULL; 4446 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4447 if (error == 0) 4448 vput(vp); 4449 return (error); 4450 } 4451 4452 /* 4453 * mkdir_args(char *path, int mode) 4454 * 4455 * Make a directory file. 4456 */ 4457 int 4458 sys_mkdir(struct sysmsg *sysmsg, const struct mkdir_args *uap) 4459 { 4460 struct nlookupdata nd; 4461 int error; 4462 4463 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4464 if (error == 0) 4465 error = kern_mkdir(&nd, uap->mode); 4466 nlookup_done(&nd); 4467 return (error); 4468 } 4469 4470 /* 4471 * mkdirat_args(int fd, char *path, mode_t mode) 4472 * 4473 * Make a directory file. The path is relative to the directory associated 4474 * with fd. 4475 */ 4476 int 4477 sys_mkdirat(struct sysmsg *sysmsg, const struct mkdirat_args *uap) 4478 { 4479 struct nlookupdata nd; 4480 struct file *fp; 4481 int error; 4482 4483 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4484 if (error == 0) 4485 error = kern_mkdir(&nd, uap->mode); 4486 nlookup_done_at(&nd, fp); 4487 return (error); 4488 } 4489 4490 int 4491 kern_rmdir(struct nlookupdata *nd) 4492 { 4493 int error; 4494 4495 bwillinode(1); 4496 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4497 if ((error = nlookup(nd)) != 0) 4498 return (error); 4499 4500 /* 4501 * Do not allow directories representing mount points to be 4502 * deleted, even if empty. Check write perms on mount point 4503 * in case the vnode is aliased (aka nullfs). 4504 */ 4505 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4506 return (EBUSY); 4507 if (nd->nl_dvp == NULL) 4508 return (EINVAL); 4509 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4510 return (error); 4511 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4512 return (error); 4513 } 4514 4515 /* 4516 * rmdir_args(char *path) 4517 * 4518 * Remove a directory file. 4519 */ 4520 int 4521 sys_rmdir(struct sysmsg *sysmsg, const struct rmdir_args *uap) 4522 { 4523 struct nlookupdata nd; 4524 int error; 4525 4526 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4527 if (error == 0) 4528 error = kern_rmdir(&nd); 4529 nlookup_done(&nd); 4530 return (error); 4531 } 4532 4533 int 4534 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4535 enum uio_seg direction) 4536 { 4537 struct thread *td = curthread; 4538 struct vnode *vp; 4539 struct file *fp; 4540 struct uio auio; 4541 struct iovec aiov; 4542 off_t loff; 4543 int error, eofflag; 4544 4545 if ((error = holdvnode(td, fd, &fp)) != 0) 4546 return (error); 4547 if ((fp->f_flag & FREAD) == 0) { 4548 error = EBADF; 4549 goto done; 4550 } 4551 vp = (struct vnode *)fp->f_data; 4552 if (vp->v_type != VDIR) { 4553 error = EINVAL; 4554 goto done; 4555 } 4556 aiov.iov_base = buf; 4557 aiov.iov_len = count; 4558 auio.uio_iov = &aiov; 4559 auio.uio_iovcnt = 1; 4560 auio.uio_rw = UIO_READ; 4561 auio.uio_segflg = direction; 4562 auio.uio_td = td; 4563 auio.uio_resid = count; 4564 loff = auio.uio_offset = fp->f_offset; 4565 error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp); 4566 fp->f_offset = auio.uio_offset; 4567 if (error) 4568 goto done; 4569 4570 /* 4571 * WARNING! *basep may not be wide enough to accomodate the 4572 * seek offset. XXX should we hack this to return the upper 32 bits 4573 * for offsets greater then 4G? 4574 */ 4575 if (basep) { 4576 *basep = (long)loff; 4577 } 4578 *res = count - auio.uio_resid; 4579 done: 4580 fdrop(fp); 4581 return (error); 4582 } 4583 4584 /* 4585 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4586 * 4587 * Read a block of directory entries in a file system independent format. 4588 */ 4589 int 4590 sys_getdirentries(struct sysmsg *sysmsg, const struct getdirentries_args *uap) 4591 { 4592 long base; 4593 int error; 4594 4595 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4596 &sysmsg->sysmsg_result, UIO_USERSPACE); 4597 4598 if (error == 0 && uap->basep) 4599 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4600 return (error); 4601 } 4602 4603 /* 4604 * getdents_args(int fd, char *buf, size_t count) 4605 */ 4606 int 4607 sys_getdents(struct sysmsg *sysmsg, const struct getdents_args *uap) 4608 { 4609 int error; 4610 4611 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4612 &sysmsg->sysmsg_result, UIO_USERSPACE); 4613 4614 return (error); 4615 } 4616 4617 /* 4618 * Set the mode mask for creation of filesystem nodes. 4619 * 4620 * umask(int newmask) 4621 */ 4622 int 4623 sys_umask(struct sysmsg *sysmsg, const struct umask_args *uap) 4624 { 4625 struct thread *td = curthread; 4626 struct proc *p = td->td_proc; 4627 struct filedesc *fdp; 4628 4629 fdp = p->p_fd; 4630 sysmsg->sysmsg_result = fdp->fd_cmask; 4631 fdp->fd_cmask = uap->newmask & ALLPERMS; 4632 return (0); 4633 } 4634 4635 /* 4636 * revoke(char *path) 4637 * 4638 * Void all references to file by ripping underlying filesystem 4639 * away from vnode. 4640 */ 4641 int 4642 sys_revoke(struct sysmsg *sysmsg, const struct revoke_args *uap) 4643 { 4644 struct nlookupdata nd; 4645 struct vattr vattr; 4646 struct vnode *vp; 4647 struct ucred *cred; 4648 int error; 4649 4650 vp = NULL; 4651 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4652 if (error == 0) 4653 error = nlookup(&nd); 4654 if (error == 0) 4655 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4656 cred = crhold(nd.nl_cred); 4657 nlookup_done(&nd); 4658 if (error == 0) { 4659 if (error == 0) 4660 error = VOP_GETATTR(vp, &vattr); 4661 if (error == 0 && cred->cr_uid != vattr.va_uid) 4662 error = caps_priv_check(cred, SYSCAP_NOVFS_REVOKE); 4663 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4664 if (vcount(vp) > 0) 4665 error = vrevoke(vp, cred); 4666 } else if (error == 0) { 4667 error = vrevoke(vp, cred); 4668 } 4669 vrele(vp); 4670 } 4671 if (cred) 4672 crfree(cred); 4673 return (error); 4674 } 4675 4676 /* 4677 * getfh_args(char *fname, fhandle_t *fhp) 4678 * 4679 * Get (NFS) file handle 4680 * 4681 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4682 * mount. This allows nullfs mounts to be explicitly exported. 4683 * 4684 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4685 * 4686 * nullfs mounts of subdirectories are not safe. That is, it will 4687 * work, but you do not really have protection against access to 4688 * the related parent directories. 4689 */ 4690 int 4691 sys_getfh(struct sysmsg *sysmsg, const struct getfh_args *uap) 4692 { 4693 struct nlookupdata nd; 4694 fhandle_t fh; 4695 struct vnode *vp; 4696 struct mount *mp; 4697 int error; 4698 4699 /* 4700 * Must be super user 4701 */ 4702 if ((error = caps_priv_check_self(SYSCAP_RESTRICTEDROOT)) != 0) 4703 return (error); 4704 4705 vp = NULL; 4706 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4707 if (error == 0) 4708 error = nlookup(&nd); 4709 if (error == 0) 4710 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4711 mp = nd.nl_nch.mount; 4712 nlookup_done(&nd); 4713 if (error == 0) { 4714 bzero(&fh, sizeof(fh)); 4715 fh.fh_fsid = mp->mnt_stat.f_fsid; 4716 error = VFS_VPTOFH(vp, &fh.fh_fid); 4717 vput(vp); 4718 if (error == 0) 4719 error = copyout(&fh, uap->fhp, sizeof(fh)); 4720 } 4721 return (error); 4722 } 4723 4724 /* 4725 * fhopen_args(const struct fhandle *u_fhp, int flags) 4726 * 4727 * syscall for the rpc.lockd to use to translate a NFS file handle into 4728 * an open descriptor. 4729 * 4730 * WARNING: Do not remove the caps_priv_check() call or this becomes 4731 * one giant security hole. 4732 */ 4733 int 4734 sys_fhopen(struct sysmsg *sysmsg, const struct fhopen_args *uap) 4735 { 4736 struct thread *td = curthread; 4737 struct filedesc *fdp = td->td_proc->p_fd; 4738 struct mount *mp; 4739 struct vnode *vp; 4740 struct fhandle fhp; 4741 struct vattr vat; 4742 struct vattr *vap = &vat; 4743 struct flock lf; 4744 int fmode, mode, error = 0, type; 4745 struct file *nfp; 4746 struct file *fp; 4747 int indx; 4748 4749 /* 4750 * Must be super user 4751 */ 4752 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT); 4753 if (error) 4754 return (error); 4755 4756 fmode = FFLAGS(uap->flags); 4757 4758 /* 4759 * Why not allow a non-read/write open for our lockd? 4760 */ 4761 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4762 return (EINVAL); 4763 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4764 if (error) 4765 return(error); 4766 4767 /* 4768 * Find the mount point 4769 */ 4770 mp = vfs_getvfs(&fhp.fh_fsid); 4771 if (mp == NULL) { 4772 error = ESTALE; 4773 goto done2; 4774 } 4775 /* now give me my vnode, it gets returned to me locked */ 4776 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4777 if (error) 4778 goto done; 4779 /* 4780 * from now on we have to make sure not 4781 * to forget about the vnode 4782 * any error that causes an abort must vput(vp) 4783 * just set error = err and 'goto bad;'. 4784 */ 4785 4786 /* 4787 * from vn_open 4788 */ 4789 if (vp->v_type == VLNK) { 4790 error = EMLINK; 4791 goto bad; 4792 } 4793 if (vp->v_type == VSOCK) { 4794 error = EOPNOTSUPP; 4795 goto bad; 4796 } 4797 mode = 0; 4798 if (fmode & (FWRITE | O_TRUNC)) { 4799 if (vp->v_type == VDIR) { 4800 error = EISDIR; 4801 goto bad; 4802 } 4803 error = vn_writechk(vp); 4804 if (error) 4805 goto bad; 4806 mode |= VWRITE; 4807 } 4808 if (fmode & FREAD) 4809 mode |= VREAD; 4810 if (mode) { 4811 error = VOP_ACCESS(vp, mode, td->td_ucred); 4812 if (error) 4813 goto bad; 4814 } 4815 if (fmode & O_TRUNC) { 4816 vn_unlock(vp); /* XXX */ 4817 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4818 VATTR_NULL(vap); 4819 vap->va_size = 0; 4820 error = VOP_SETATTR(vp, vap, td->td_ucred); 4821 if (error) 4822 goto bad; 4823 } 4824 4825 /* 4826 * VOP_OPEN needs the file pointer so it can potentially override 4827 * it. 4828 * 4829 * WARNING! no f_nchandle will be associated when fhopen()ing a 4830 * directory. XXX 4831 */ 4832 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4833 goto bad; 4834 error = VOP_OPEN(vp, fmode, td->td_ucred, &nfp); 4835 fp = nfp; 4836 4837 if (error) { 4838 /* 4839 * setting f_ops this way prevents VOP_CLOSE from being 4840 * called or fdrop() releasing the vp from v_data. Since 4841 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4842 */ 4843 fp->f_ops = &badfileops; 4844 fp->f_data = NULL; 4845 goto bad_drop; 4846 } 4847 4848 /* 4849 * The fp is given its own reference, we still have our ref and lock. 4850 * 4851 * Assert that all regular files must be created with a VM object. 4852 */ 4853 if (vp->v_type == VREG && vp->v_object == NULL) { 4854 kprintf("fhopen: regular file did not " 4855 "have VM object: %p\n", 4856 vp); 4857 goto bad_drop; 4858 } 4859 4860 /* 4861 * The open was successful. Handle any locking requirements. 4862 */ 4863 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4864 lf.l_whence = SEEK_SET; 4865 lf.l_start = 0; 4866 lf.l_len = 0; 4867 if (fmode & O_EXLOCK) 4868 lf.l_type = F_WRLCK; 4869 else 4870 lf.l_type = F_RDLCK; 4871 if (fmode & FNONBLOCK) 4872 type = 0; 4873 else 4874 type = F_WAIT; 4875 vn_unlock(vp); 4876 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4877 &lf, type)) != 0) { 4878 /* 4879 * release our private reference. 4880 */ 4881 fsetfd(fdp, NULL, indx); 4882 fdrop(fp); 4883 vrele(vp); 4884 goto done; 4885 } 4886 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4887 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4888 } 4889 4890 /* 4891 * Clean up. Associate the file pointer with the previously 4892 * reserved descriptor and return it. 4893 */ 4894 vput(vp); 4895 if (uap->flags & O_CLOEXEC) 4896 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4897 fsetfd(fdp, fp, indx); 4898 fdrop(fp); 4899 sysmsg->sysmsg_result = indx; 4900 mount_drop(mp); 4901 4902 return (error); 4903 4904 bad_drop: 4905 fsetfd(fdp, NULL, indx); 4906 fdrop(fp); 4907 bad: 4908 vput(vp); 4909 done: 4910 mount_drop(mp); 4911 done2: 4912 return (error); 4913 } 4914 4915 /* 4916 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4917 */ 4918 int 4919 sys_fhstat(struct sysmsg *sysmsg, const struct fhstat_args *uap) 4920 { 4921 struct thread *td = curthread; 4922 struct stat sb; 4923 fhandle_t fh; 4924 struct mount *mp; 4925 struct vnode *vp; 4926 int error; 4927 4928 /* 4929 * Must be super user 4930 */ 4931 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT); 4932 if (error) 4933 return (error); 4934 4935 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4936 if (error) 4937 return (error); 4938 4939 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4940 error = ESTALE; 4941 if (error == 0) { 4942 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4943 error = vn_stat(vp, &sb, td->td_ucred); 4944 vput(vp); 4945 } 4946 } 4947 if (error == 0) 4948 error = copyout(&sb, uap->sb, sizeof(sb)); 4949 if (mp) 4950 mount_drop(mp); 4951 4952 return (error); 4953 } 4954 4955 /* 4956 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4957 */ 4958 int 4959 sys_fhstatfs(struct sysmsg *sysmsg, const struct fhstatfs_args *uap) 4960 { 4961 struct thread *td = curthread; 4962 struct proc *p = td->td_proc; 4963 struct statfs *sp; 4964 struct mount *mp; 4965 struct vnode *vp; 4966 struct statfs sb; 4967 char *fullpath, *freepath; 4968 fhandle_t fh; 4969 int error; 4970 4971 /* 4972 * Must be super user 4973 */ 4974 error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT); 4975 if (error) 4976 return (error); 4977 4978 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4979 return (error); 4980 4981 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4982 error = ESTALE; 4983 goto done; 4984 } 4985 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4986 error = ESTALE; 4987 goto done; 4988 } 4989 4990 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4991 goto done; 4992 mp = vp->v_mount; 4993 sp = &mp->mnt_stat; 4994 vput(vp); 4995 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4996 goto done; 4997 4998 error = mount_path(p, mp, &fullpath, &freepath); 4999 if (error) 5000 goto done; 5001 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 5002 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 5003 kfree(freepath, M_TEMP); 5004 5005 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 5006 if (caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT)) { 5007 bcopy(sp, &sb, sizeof(sb)); 5008 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 5009 sp = &sb; 5010 } 5011 error = copyout(sp, uap->buf, sizeof(*sp)); 5012 done: 5013 if (mp) 5014 mount_drop(mp); 5015 5016 return (error); 5017 } 5018 5019 /* 5020 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 5021 */ 5022 int 5023 sys_fhstatvfs(struct sysmsg *sysmsg, const struct fhstatvfs_args *uap) 5024 { 5025 struct thread *td = curthread; 5026 struct proc *p = td->td_proc; 5027 struct statvfs *sp; 5028 struct mount *mp; 5029 struct vnode *vp; 5030 fhandle_t fh; 5031 int error; 5032 5033 /* 5034 * Must be super user 5035 */ 5036 if ((error = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT))) 5037 return (error); 5038 5039 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 5040 return (error); 5041 5042 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 5043 error = ESTALE; 5044 goto done; 5045 } 5046 if (p != NULL && !chroot_visible_mnt(mp, p)) { 5047 error = ESTALE; 5048 goto done; 5049 } 5050 5051 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 5052 goto done; 5053 mp = vp->v_mount; 5054 sp = &mp->mnt_vstat; 5055 vput(vp); 5056 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 5057 goto done; 5058 5059 sp->f_flag = 0; 5060 if (mp->mnt_flag & MNT_RDONLY) 5061 sp->f_flag |= ST_RDONLY; 5062 if (mp->mnt_flag & MNT_NOSUID) 5063 sp->f_flag |= ST_NOSUID; 5064 error = copyout(sp, uap->buf, sizeof(*sp)); 5065 done: 5066 if (mp) 5067 mount_drop(mp); 5068 return (error); 5069 } 5070 5071 5072 /* 5073 * Syscall to push extended attribute configuration information into the 5074 * VFS. Accepts a path, which it converts to a mountpoint, as well as 5075 * a command (int cmd), and attribute name and misc data. For now, the 5076 * attribute name is left in userspace for consumption by the VFS_op. 5077 * It will probably be changed to be copied into sysspace by the 5078 * syscall in the future, once issues with various consumers of the 5079 * attribute code have raised their hands. 5080 * 5081 * Currently this is used only by UFS Extended Attributes. 5082 */ 5083 int 5084 sys_extattrctl(struct sysmsg *sysmsg, const struct extattrctl_args *uap) 5085 { 5086 struct nlookupdata nd; 5087 struct vnode *vp; 5088 char attrname[EXTATTR_MAXNAMELEN]; 5089 int error; 5090 size_t size; 5091 5092 attrname[0] = 0; 5093 vp = NULL; 5094 error = 0; 5095 5096 if (error == 0 && uap->filename) { 5097 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 5098 NLC_FOLLOW); 5099 if (error == 0) 5100 error = nlookup(&nd); 5101 if (error == 0) 5102 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 5103 nlookup_done(&nd); 5104 } 5105 5106 if (error == 0 && uap->attrname) { 5107 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 5108 &size); 5109 } 5110 5111 if (error == 0) { 5112 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5113 if (error == 0) 5114 error = nlookup(&nd); 5115 if (error == 0) 5116 error = ncp_writechk(&nd.nl_nch); 5117 if (error == 0) { 5118 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 5119 uap->attrnamespace, 5120 uap->attrname, nd.nl_cred); 5121 } 5122 nlookup_done(&nd); 5123 } 5124 5125 return (error); 5126 } 5127 5128 /* 5129 * Syscall to get a named extended attribute on a file or directory. 5130 */ 5131 int 5132 sys_extattr_set_file(struct sysmsg *sysmsg, 5133 const struct extattr_set_file_args *uap) 5134 { 5135 char attrname[EXTATTR_MAXNAMELEN]; 5136 struct nlookupdata nd; 5137 struct vnode *vp; 5138 struct uio auio; 5139 struct iovec aiov; 5140 int error; 5141 5142 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5143 if (error) 5144 return (error); 5145 5146 vp = NULL; 5147 5148 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5149 if (error == 0) 5150 error = nlookup(&nd); 5151 if (error == 0) 5152 error = ncp_writechk(&nd.nl_nch); 5153 if (error == 0) 5154 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5155 if (error) { 5156 nlookup_done(&nd); 5157 return (error); 5158 } 5159 5160 bzero(&auio, sizeof(auio)); 5161 aiov.iov_base = uap->data; 5162 aiov.iov_len = uap->nbytes; 5163 auio.uio_iov = &aiov; 5164 auio.uio_iovcnt = 1; 5165 auio.uio_offset = 0; 5166 auio.uio_resid = uap->nbytes; 5167 auio.uio_rw = UIO_WRITE; 5168 auio.uio_td = curthread; 5169 5170 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5171 &auio, nd.nl_cred); 5172 5173 vput(vp); 5174 nlookup_done(&nd); 5175 return (error); 5176 } 5177 5178 /* 5179 * Syscall to get a named extended attribute on a file or directory. 5180 */ 5181 int 5182 sys_extattr_get_file(struct sysmsg *sysmsg, 5183 const struct extattr_get_file_args *uap) 5184 { 5185 char attrname[EXTATTR_MAXNAMELEN]; 5186 struct nlookupdata nd; 5187 struct uio auio; 5188 struct iovec aiov; 5189 struct vnode *vp; 5190 int error; 5191 5192 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5193 if (error) 5194 return (error); 5195 5196 vp = NULL; 5197 5198 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5199 if (error == 0) 5200 error = nlookup(&nd); 5201 if (error == 0) 5202 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5203 if (error) { 5204 nlookup_done(&nd); 5205 return (error); 5206 } 5207 5208 bzero(&auio, sizeof(auio)); 5209 aiov.iov_base = uap->data; 5210 aiov.iov_len = uap->nbytes; 5211 auio.uio_iov = &aiov; 5212 auio.uio_iovcnt = 1; 5213 auio.uio_offset = 0; 5214 auio.uio_resid = uap->nbytes; 5215 auio.uio_rw = UIO_READ; 5216 auio.uio_td = curthread; 5217 5218 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5219 &auio, nd.nl_cred); 5220 sysmsg->sysmsg_result = uap->nbytes - auio.uio_resid; 5221 5222 vput(vp); 5223 nlookup_done(&nd); 5224 return(error); 5225 } 5226 5227 /* 5228 * Syscall to delete a named extended attribute from a file or directory. 5229 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5230 */ 5231 int 5232 sys_extattr_delete_file(struct sysmsg *sysmsg, 5233 const struct extattr_delete_file_args *uap) 5234 { 5235 char attrname[EXTATTR_MAXNAMELEN]; 5236 struct nlookupdata nd; 5237 struct vnode *vp; 5238 int error; 5239 5240 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5241 if (error) 5242 return(error); 5243 5244 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5245 if (error == 0) 5246 error = nlookup(&nd); 5247 if (error == 0) 5248 error = ncp_writechk(&nd.nl_nch); 5249 if (error == 0) { 5250 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5251 if (error == 0) { 5252 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5253 attrname, NULL, nd.nl_cred); 5254 vput(vp); 5255 } 5256 } 5257 nlookup_done(&nd); 5258 return(error); 5259 } 5260 5261 /* 5262 * Determine if the mount is visible to the process. 5263 */ 5264 static int 5265 chroot_visible_mnt(struct mount *mp, struct proc *p) 5266 { 5267 struct nchandle nch; 5268 5269 /* 5270 * Traverse from the mount point upwards. If we hit the process 5271 * root then the mount point is visible to the process. 5272 */ 5273 nch = mp->mnt_ncmountpt; 5274 while (nch.ncp) { 5275 if (nch.mount == p->p_fd->fd_nrdir.mount && 5276 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5277 return(1); 5278 } 5279 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5280 nch = nch.mount->mnt_ncmounton; 5281 } else { 5282 nch.ncp = nch.ncp->nc_parent; 5283 } 5284 } 5285 5286 /* 5287 * If the mount point is not visible to the process, but the 5288 * process root is in a subdirectory of the mount, return 5289 * TRUE anyway. 5290 */ 5291 if (p->p_fd->fd_nrdir.mount == mp) 5292 return(1); 5293 5294 return(0); 5295 } 5296 5297 /* 5298 * Return the appropriate system capability restriction. 5299 */ 5300 static int 5301 get_fspriv(const char *fsname) 5302 { 5303 5304 if (strncmp("null", fsname, 5) == 0) { 5305 return SYSCAP_NOMOUNT_NULLFS; 5306 } else if (strncmp(fsname, "tmpfs", 6) == 0) { 5307 return SYSCAP_NOMOUNT_TMPFS; 5308 } 5309 return SYSCAP_RESTRICTEDROOT; 5310 } 5311 5312 int 5313 sys___realpath(struct sysmsg *sysmsg, const struct __realpath_args *uap) 5314 { 5315 struct nlookupdata nd; 5316 char *rbuf; 5317 char *fbuf; 5318 ssize_t rlen; 5319 int error; 5320 5321 /* 5322 * Invalid length if less than 0. 0 is allowed 5323 */ 5324 if ((ssize_t)uap->len < 0) 5325 return EINVAL; 5326 5327 rbuf = NULL; 5328 fbuf = NULL; 5329 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5330 if (error) 5331 goto done; 5332 5333 nd.nl_flags |= NLC_SHAREDLOCK; 5334 error = nlookup(&nd); 5335 if (error) 5336 goto done; 5337 5338 if (nd.nl_nch.ncp->nc_vp == NULL) { 5339 error = ENOENT; 5340 goto done; 5341 } 5342 5343 /* 5344 * Shortcut test for existence. 5345 */ 5346 if (uap->len == 0) { 5347 error = ENAMETOOLONG; 5348 goto done; 5349 } 5350 5351 /* 5352 * Obtain the path relative to the process root. The nch must not 5353 * be locked for the cache_fullpath() call. 5354 */ 5355 if (nd.nl_flags & NLC_NCPISLOCKED) { 5356 nd.nl_flags &= ~NLC_NCPISLOCKED; 5357 cache_unlock(&nd.nl_nch); 5358 } 5359 error = cache_fullpath(curproc, &nd.nl_nch, NULL, &rbuf, &fbuf, 0); 5360 if (error) 5361 goto done; 5362 5363 rlen = (ssize_t)strlen(rbuf); 5364 if (rlen >= uap->len) { 5365 error = ENAMETOOLONG; 5366 goto done; 5367 } 5368 error = copyout(rbuf, uap->buf, rlen + 1); 5369 if (error == 0) 5370 sysmsg->sysmsg_szresult = rlen; 5371 done: 5372 nlookup_done(&nd); 5373 if (fbuf) 5374 kfree(fbuf, M_TEMP); 5375 5376 return error; 5377 } 5378 5379 int 5380 sys_posix_fallocate(struct sysmsg *sysmsg, const struct posix_fallocate_args *uap) 5381 { 5382 return (kern_posix_fallocate(uap->fd, uap->offset, uap->len)); 5383 } 5384 5385 int 5386 kern_posix_fallocate(int fd, off_t offset, off_t len) 5387 { 5388 struct thread *td = curthread; 5389 struct vnode *vp; 5390 struct file *fp; 5391 int error; 5392 5393 if (offset < 0 || len <= 0) 5394 return (EINVAL); 5395 /* Check for wrap. */ 5396 if (offset > OFF_MAX - len) 5397 return (EFBIG); 5398 5399 fp = holdfp(td, fd, -1); 5400 if (fp == NULL) 5401 return (EBADF); 5402 5403 switch (fp->f_type) { 5404 case DTYPE_VNODE: 5405 break; 5406 case DTYPE_PIPE: 5407 case DTYPE_FIFO: 5408 error = ESPIPE; 5409 goto out; 5410 default: 5411 error = ENODEV; 5412 goto out; 5413 } 5414 5415 if ((fp->f_flag & FWRITE) == 0) { 5416 error = EBADF; 5417 goto out; 5418 } 5419 5420 vp = fp->f_data; 5421 if (vp->v_type != VREG) { 5422 error = ENODEV; 5423 goto out; 5424 } 5425 5426 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 5427 error = VOP_ALLOCATE(vp, offset, len); 5428 vn_unlock(vp); 5429 out: 5430 dropfp(td, fd, fp); 5431 return (error); 5432 } 5433