1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysmsg.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int get_fspriv(const char *); 84 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 85 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 86 static int getutimes (struct timeval *, struct timespec *); 87 static int getutimens (const struct timespec *, struct timespec *, int *); 88 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, u_long); 91 static int setutimes (struct vnode *, struct vattr *, 92 const struct timespec *, int); 93 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 96 "Allow non-root users to mount filesystems"); 97 98 static int debug_unmount = 0; /* if 1 loop until unmount success */ 99 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0, 100 "Stall failed unmounts in loop"); 101 /* 102 * Virtual File System System Calls 103 */ 104 105 /* 106 * Mount a file system. 107 * 108 * mount_args(char *type, char *path, int flags, caddr_t data) 109 * 110 * MPALMOSTSAFE 111 */ 112 int 113 sys_mount(struct sysmsg *sysmsg, const struct mount_args *uap) 114 { 115 struct thread *td = curthread; 116 struct vnode *vp; 117 struct nchandle nch; 118 struct mount *mp, *nullmp; 119 struct vfsconf *vfsp; 120 int error, flag = 0, flag2 = 0; 121 int hasmount; 122 int priv = 0; 123 int flags = uap->flags; 124 struct vattr va; 125 struct nlookupdata nd; 126 char fstypename[MFSNAMELEN]; 127 struct ucred *cred; 128 129 cred = td->td_ucred; 130 131 /* We do not allow user mounts inside a jail for now */ 132 if (usermount && jailed(cred)) { 133 error = EPERM; 134 goto done; 135 } 136 137 /* 138 * Extract the file system type. We need to know this early, to take 139 * appropriate actions for jails and nullfs mounts. 140 */ 141 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) 142 goto done; 143 144 /* 145 * Select the correct priv according to the file system type. 146 */ 147 priv = get_fspriv(fstypename); 148 149 if (usermount == 0 && (error = priv_check(td, priv))) 150 goto done; 151 152 /* 153 * Do not allow NFS export by non-root users. 154 */ 155 if (flags & MNT_EXPORTED) { 156 error = priv_check(td, priv); 157 if (error) 158 goto done; 159 } 160 /* 161 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 162 */ 163 if (priv_check(td, priv)) 164 flags |= MNT_NOSUID | MNT_NODEV; 165 166 /* 167 * Lookup the requested path and extract the nch and vnode. 168 */ 169 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 170 if (error == 0) { 171 if ((error = nlookup(&nd)) == 0) { 172 if (nd.nl_nch.ncp->nc_vp == NULL) 173 error = ENOENT; 174 } 175 } 176 if (error) { 177 nlookup_done(&nd); 178 goto done; 179 } 180 181 /* 182 * If the target filesystem is resolved via a nullfs mount, then 183 * nd.nl_nch.mount will be pointing to the nullfs mount structure 184 * instead of the target file system. We need it in case we are 185 * doing an update. 186 */ 187 nullmp = nd.nl_nch.mount; 188 189 /* 190 * Extract the locked+refd ncp and cleanup the nd structure 191 */ 192 nch = nd.nl_nch; 193 cache_zero(&nd.nl_nch); 194 nlookup_done(&nd); 195 196 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 197 (mp = cache_findmount(&nch)) != NULL) { 198 cache_dropmount(mp); 199 hasmount = 1; 200 } else { 201 hasmount = 0; 202 } 203 204 205 /* 206 * now we have the locked ref'd nch and unreferenced vnode. 207 */ 208 vp = nch.ncp->nc_vp; 209 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 210 cache_put(&nch); 211 goto done; 212 } 213 cache_unlock(&nch); 214 215 /* 216 * Now we have an unlocked ref'd nch and a locked ref'd vp 217 */ 218 if (flags & MNT_UPDATE) { 219 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 220 cache_drop(&nch); 221 vput(vp); 222 error = EINVAL; 223 goto done; 224 } 225 226 if (strncmp(fstypename, "null", 5) == 0) { 227 KKASSERT(nullmp); 228 mp = nullmp; 229 } else { 230 mp = vp->v_mount; 231 } 232 233 flag = mp->mnt_flag; 234 flag2 = mp->mnt_kern_flag; 235 /* 236 * We only allow the filesystem to be reloaded if it 237 * is currently mounted read-only. 238 */ 239 if ((flags & MNT_RELOAD) && 240 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 241 cache_drop(&nch); 242 vput(vp); 243 error = EOPNOTSUPP; /* Needs translation */ 244 goto done; 245 } 246 /* 247 * Only root, or the user that did the original mount is 248 * permitted to update it. 249 */ 250 if (mp->mnt_stat.f_owner != cred->cr_uid && 251 (error = priv_check(td, priv))) { 252 cache_drop(&nch); 253 vput(vp); 254 goto done; 255 } 256 if (vfs_busy(mp, LK_NOWAIT)) { 257 cache_drop(&nch); 258 vput(vp); 259 error = EBUSY; 260 goto done; 261 } 262 if (hasmount) { 263 cache_drop(&nch); 264 vfs_unbusy(mp); 265 vput(vp); 266 error = EBUSY; 267 goto done; 268 } 269 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 270 lwkt_gettoken(&mp->mnt_token); 271 vn_unlock(vp); 272 vfsp = mp->mnt_vfc; 273 goto update; 274 } 275 276 /* 277 * If the user is not root, ensure that they own the directory 278 * onto which we are attempting to mount. 279 */ 280 if ((error = VOP_GETATTR(vp, &va)) || 281 (va.va_uid != cred->cr_uid && 282 (error = priv_check(td, priv)))) { 283 cache_drop(&nch); 284 vput(vp); 285 goto done; 286 } 287 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 288 cache_drop(&nch); 289 vput(vp); 290 goto done; 291 } 292 if (vp->v_type != VDIR) { 293 cache_drop(&nch); 294 vput(vp); 295 error = ENOTDIR; 296 goto done; 297 } 298 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 299 cache_drop(&nch); 300 vput(vp); 301 error = EPERM; 302 goto done; 303 } 304 vfsp = vfsconf_find_by_name(fstypename); 305 if (vfsp == NULL) { 306 linker_file_t lf; 307 308 /* Only load modules for root (very important!) */ 309 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 310 cache_drop(&nch); 311 vput(vp); 312 goto done; 313 } 314 error = linker_load_file(fstypename, &lf); 315 if (error || lf == NULL) { 316 cache_drop(&nch); 317 vput(vp); 318 if (lf == NULL) 319 error = ENODEV; 320 goto done; 321 } 322 lf->userrefs++; 323 /* lookup again, see if the VFS was loaded */ 324 vfsp = vfsconf_find_by_name(fstypename); 325 if (vfsp == NULL) { 326 lf->userrefs--; 327 linker_file_unload(lf); 328 cache_drop(&nch); 329 vput(vp); 330 error = ENODEV; 331 goto done; 332 } 333 } 334 if (hasmount) { 335 cache_drop(&nch); 336 vput(vp); 337 error = EBUSY; 338 goto done; 339 } 340 341 /* 342 * Allocate and initialize the filesystem. 343 */ 344 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 345 mount_init(mp, vfsp->vfc_vfsops); 346 vfs_busy(mp, LK_NOWAIT); 347 mp->mnt_vfc = vfsp; 348 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 349 vfsp->vfc_refcount++; 350 mp->mnt_stat.f_type = vfsp->vfc_typenum; 351 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 352 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 353 mp->mnt_stat.f_owner = cred->cr_uid; 354 lwkt_gettoken(&mp->mnt_token); 355 vn_unlock(vp); 356 update: 357 /* 358 * (per-mount token acquired at this point) 359 * 360 * Set the mount level flags. 361 */ 362 if (flags & MNT_RDONLY) 363 mp->mnt_flag |= MNT_RDONLY; 364 else if (mp->mnt_flag & MNT_RDONLY) 365 mp->mnt_kern_flag |= MNTK_WANTRDWR; 366 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 367 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 368 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 369 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 370 MNT_AUTOMOUNTED); 371 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | 372 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 373 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 374 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 375 MNT_AUTOMOUNTED); 376 377 /* 378 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 379 * This way the initial VFS_MOUNT() call will also be MPSAFE. 380 */ 381 if (vfsp->vfc_flags & VFCF_MPSAFE) 382 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 383 384 /* 385 * Mount the filesystem. 386 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 387 * get. 388 */ 389 if (mp->mnt_flag & MNT_UPDATE) { 390 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 391 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 392 mp->mnt_flag &= ~MNT_RDONLY; 393 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 394 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 395 if (error) { 396 mp->mnt_flag = flag; 397 mp->mnt_kern_flag = flag2; 398 } 399 lwkt_reltoken(&mp->mnt_token); 400 vfs_unbusy(mp); 401 vrele(vp); 402 cache_drop(&nch); 403 goto done; 404 } 405 mp->mnt_ncmounton = nch; 406 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 407 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 408 409 /* 410 * Put the new filesystem on the mount list after root. The mount 411 * point gets its own mnt_ncmountpt (unless the VFS already set one 412 * up) which represents the root of the mount. The lookup code 413 * detects the mount point going forward and checks the root of 414 * the mount going backwards. 415 * 416 * It is not necessary to invalidate or purge the vnode underneath 417 * because elements under the mount will be given their own glue 418 * namecache record. 419 */ 420 if (!error) { 421 if (mp->mnt_ncmountpt.ncp == NULL) { 422 /* 423 * Allocate, then unlock, but leave the ref intact. 424 * This is the mnt_refs (1) that we will retain 425 * through to the unmount. 426 */ 427 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 428 cache_unlock(&mp->mnt_ncmountpt); 429 } 430 vn_unlock(vp); 431 cache_lock(&nch); 432 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 433 cache_unlock(&nch); 434 cache_ismounting(mp); 435 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 436 437 mountlist_insert(mp, MNTINS_LAST); 438 vn_unlock(vp); 439 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 440 error = vfs_allocate_syncvnode(mp); 441 lwkt_reltoken(&mp->mnt_token); 442 vfs_unbusy(mp); 443 error = VFS_START(mp, 0); 444 vrele(vp); 445 KNOTE(&fs_klist, VQ_MOUNT); 446 } else { 447 bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton)); 448 vn_syncer_thr_stop(mp); 449 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 450 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 451 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 452 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 453 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 454 if (mp->mnt_cred) { 455 crfree(mp->mnt_cred); 456 mp->mnt_cred = NULL; 457 } 458 mp->mnt_vfc->vfc_refcount--; 459 lwkt_reltoken(&mp->mnt_token); 460 vfs_unbusy(mp); 461 kfree(mp, M_MOUNT); 462 cache_drop(&nch); 463 vput(vp); 464 } 465 done: 466 return (error); 467 } 468 469 /* 470 * Scan all active processes to see if any of them have a current 471 * or root directory onto which the new filesystem has just been 472 * mounted. If so, replace them with the new mount point. 473 * 474 * Both old_nch and new_nch are ref'd on call but not locked. 475 * new_nch must be temporarily locked so it can be associated with the 476 * vnode representing the root of the mount point. 477 */ 478 struct checkdirs_info { 479 struct nchandle old_nch; 480 struct nchandle new_nch; 481 struct vnode *old_vp; 482 struct vnode *new_vp; 483 }; 484 485 static int checkdirs_callback(struct proc *p, void *data); 486 487 static void 488 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 489 { 490 struct checkdirs_info info; 491 struct vnode *olddp; 492 struct vnode *newdp; 493 struct mount *mp; 494 495 /* 496 * If the old mount point's vnode has a usecount of 1, it is not 497 * being held as a descriptor anywhere. 498 */ 499 olddp = old_nch->ncp->nc_vp; 500 if (olddp == NULL || VREFCNT(olddp) == 1) 501 return; 502 503 /* 504 * Force the root vnode of the new mount point to be resolved 505 * so we can update any matching processes. 506 */ 507 mp = new_nch->mount; 508 if (VFS_ROOT(mp, &newdp)) 509 panic("mount: lost mount"); 510 vn_unlock(newdp); 511 cache_lock(new_nch); 512 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 513 cache_setunresolved(new_nch); 514 cache_setvp(new_nch, newdp); 515 cache_unlock(new_nch); 516 517 /* 518 * Special handling of the root node 519 */ 520 if (rootvnode == olddp) { 521 vref(newdp); 522 vfs_cache_setroot(newdp, cache_hold(new_nch)); 523 } 524 525 /* 526 * Pass newdp separately so the callback does not have to access 527 * it via new_nch->ncp->nc_vp. 528 */ 529 info.old_nch = *old_nch; 530 info.new_nch = *new_nch; 531 info.new_vp = newdp; 532 allproc_scan(checkdirs_callback, &info, 0); 533 vput(newdp); 534 } 535 536 /* 537 * NOTE: callback is not MP safe because the scanned process's filedesc 538 * structure can be ripped out from under us, amoung other things. 539 */ 540 static int 541 checkdirs_callback(struct proc *p, void *data) 542 { 543 struct checkdirs_info *info = data; 544 struct filedesc *fdp; 545 struct nchandle ncdrop1; 546 struct nchandle ncdrop2; 547 struct vnode *vprele1; 548 struct vnode *vprele2; 549 550 if ((fdp = p->p_fd) != NULL) { 551 cache_zero(&ncdrop1); 552 cache_zero(&ncdrop2); 553 vprele1 = NULL; 554 vprele2 = NULL; 555 556 /* 557 * MPUNSAFE - XXX fdp can be pulled out from under a 558 * foreign process. 559 * 560 * A shared filedesc is ok, we don't have to copy it 561 * because we are making this change globally. 562 */ 563 spin_lock(&fdp->fd_spin); 564 if (fdp->fd_ncdir.mount == info->old_nch.mount && 565 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 566 vprele1 = fdp->fd_cdir; 567 vref(info->new_vp); 568 fdp->fd_cdir = info->new_vp; 569 ncdrop1 = fdp->fd_ncdir; 570 cache_copy(&info->new_nch, &fdp->fd_ncdir); 571 } 572 if (fdp->fd_nrdir.mount == info->old_nch.mount && 573 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 574 vprele2 = fdp->fd_rdir; 575 vref(info->new_vp); 576 fdp->fd_rdir = info->new_vp; 577 ncdrop2 = fdp->fd_nrdir; 578 cache_copy(&info->new_nch, &fdp->fd_nrdir); 579 } 580 spin_unlock(&fdp->fd_spin); 581 if (ncdrop1.ncp) 582 cache_drop(&ncdrop1); 583 if (ncdrop2.ncp) 584 cache_drop(&ncdrop2); 585 if (vprele1) 586 vrele(vprele1); 587 if (vprele2) 588 vrele(vprele2); 589 } 590 return(0); 591 } 592 593 /* 594 * Unmount a file system. 595 * 596 * Note: unmount takes a path to the vnode mounted on as argument, 597 * not special file (as before). 598 * 599 * umount_args(char *path, int flags) 600 * 601 * MPALMOSTSAFE 602 */ 603 int 604 sys_unmount(struct sysmsg *sysmsg, const struct unmount_args *uap) 605 { 606 struct thread *td = curthread; 607 struct proc *p __debugvar = td->td_proc; 608 struct mount *mp = NULL; 609 struct nlookupdata nd; 610 char fstypename[MFSNAMELEN]; 611 int priv = 0; 612 int error; 613 struct ucred *cred; 614 615 cred = td->td_ucred; 616 617 KKASSERT(p); 618 619 /* We do not allow user umounts inside a jail for now */ 620 if (usermount && jailed(cred)) { 621 error = EPERM; 622 goto done; 623 } 624 625 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 626 NLC_FOLLOW | NLC_IGNBADDIR); 627 if (error == 0) 628 error = nlookup(&nd); 629 if (error) 630 goto out; 631 632 mp = nd.nl_nch.mount; 633 634 /* Figure out the fsname in order to select proper privs */ 635 ksnprintf(fstypename, MFSNAMELEN, "%s", mp->mnt_vfc->vfc_name); 636 priv = get_fspriv(fstypename); 637 638 if (usermount == 0 && (error = priv_check(td, priv))) { 639 nlookup_done(&nd); 640 goto done; 641 } 642 643 /* 644 * Only root, or the user that did the original mount is 645 * permitted to unmount this filesystem. 646 */ 647 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 648 (error = priv_check(td, priv))) 649 goto out; 650 651 /* 652 * Don't allow unmounting the root file system. 653 */ 654 if (mp->mnt_flag & MNT_ROOTFS) { 655 error = EINVAL; 656 goto out; 657 } 658 659 /* 660 * Must be the root of the filesystem 661 */ 662 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 663 error = EINVAL; 664 goto out; 665 } 666 667 /* Check if this mount belongs to this prison */ 668 if (jailed(cred) && mp->mnt_cred && (!mp->mnt_cred->cr_prison || 669 mp->mnt_cred->cr_prison != cred->cr_prison)) { 670 kprintf("mountpoint %s does not belong to this jail\n", 671 uap->path); 672 error = EPERM; 673 goto out; 674 } 675 676 /* 677 * If no error try to issue the unmount. We lose our cache 678 * ref when we call nlookup_done so we must hold the mount point 679 * to prevent use-after-free races. 680 */ 681 out: 682 if (error == 0) { 683 mount_hold(mp); 684 nlookup_done(&nd); 685 error = dounmount(mp, uap->flags, 0); 686 mount_drop(mp); 687 } else { 688 nlookup_done(&nd); 689 } 690 done: 691 return (error); 692 } 693 694 /* 695 * Do the actual file system unmount (interlocked against the mountlist 696 * token and mp->mnt_token). 697 */ 698 static int 699 dounmount_interlock(struct mount *mp) 700 { 701 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 702 return (EBUSY); 703 mp->mnt_kern_flag |= MNTK_UNMOUNT; 704 return(0); 705 } 706 707 static int 708 unmount_allproc_cb(struct proc *p, void *arg) 709 { 710 struct mount *mp; 711 712 if (p->p_textnch.ncp == NULL) 713 return 0; 714 715 mp = (struct mount *)arg; 716 if (p->p_textnch.mount == mp) 717 cache_drop(&p->p_textnch); 718 719 return 0; 720 } 721 722 /* 723 * The guts of the unmount code. The mount owns one ref and one hold 724 * count. If we successfully interlock the unmount, those refs are ours. 725 * (The ref is from mnt_ncmountpt). 726 * 727 * When halting we shortcut certain mount types such as devfs by not actually 728 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 729 * from the mountlist so higher-level filesytems can unmount cleanly. 730 * 731 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 732 */ 733 int 734 dounmount(struct mount *mp, int flags, int halting) 735 { 736 struct namecache *ncp; 737 struct nchandle nch; 738 struct vnode *vp; 739 int error; 740 int async_flag; 741 int lflags; 742 int freeok = 1; 743 int hadsyncer = 0; 744 int retry; 745 int quickhalt; 746 747 lwkt_gettoken(&mp->mnt_token); 748 749 /* 750 * When halting, certain mount points can essentially just 751 * be unhooked and otherwise ignored. 752 */ 753 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 754 quickhalt = 1; 755 freeok = 0; 756 } else { 757 quickhalt = 0; 758 } 759 760 761 /* 762 * Exclusive access for unmounting purposes. 763 */ 764 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 765 goto out; 766 767 /* 768 * We now 'own' the last mp->mnt_refs 769 * 770 * Allow filesystems to detect that a forced unmount is in progress. 771 */ 772 if (flags & MNT_FORCE) 773 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 774 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 775 error = lockmgr(&mp->mnt_lock, lflags); 776 if (error) { 777 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 778 if (mp->mnt_kern_flag & MNTK_MWAIT) { 779 mp->mnt_kern_flag &= ~MNTK_MWAIT; 780 wakeup(mp); 781 } 782 goto out; 783 } 784 785 if (mp->mnt_flag & MNT_EXPUBLIC) 786 vfs_setpublicfs(NULL, NULL, NULL); 787 788 vfs_msync(mp, MNT_WAIT); 789 async_flag = mp->mnt_flag & MNT_ASYNC; 790 mp->mnt_flag &=~ MNT_ASYNC; 791 792 /* 793 * Decomission our special mnt_syncer vnode. This also stops 794 * the vnlru code. If we are unable to unmount we recommission 795 * the vnode. 796 * 797 * Then sync the filesystem. 798 */ 799 if ((vp = mp->mnt_syncer) != NULL) { 800 mp->mnt_syncer = NULL; 801 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 802 vrele(vp); 803 hadsyncer = 1; 804 } 805 806 /* 807 * Sync normally-mounted filesystem. 808 */ 809 if (quickhalt == 0) { 810 if ((mp->mnt_flag & MNT_RDONLY) == 0) 811 VFS_SYNC(mp, MNT_WAIT); 812 } 813 814 /* 815 * nchandle records ref the mount structure. Expect a count of 1 816 * (our mount->mnt_ncmountpt). 817 * 818 * Scans can get temporary refs on a mountpoint (thought really 819 * heavy duty stuff like cache_findmount() do not). 820 */ 821 for (retry = 0; (retry < 10 || debug_unmount); ++retry) { 822 /* 823 * Invalidate the namecache topology under the mount. 824 * nullfs mounts alias a real mount's namecache topology 825 * and it should not be invalidated in that case. 826 */ 827 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 828 cache_lock(&mp->mnt_ncmountpt); 829 cache_inval(&mp->mnt_ncmountpt, 830 CINV_DESTROY|CINV_CHILDREN); 831 cache_unlock(&mp->mnt_ncmountpt); 832 } 833 834 /* 835 * Clear pcpu caches 836 */ 837 cache_unmounting(mp); 838 if (mp->mnt_refs != 1) 839 cache_clearmntcache(mp); 840 841 /* 842 * Break out if we are good. Don't count ncp refs if the 843 * mount is aliased. 844 */ 845 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 846 NULL : mp->mnt_ncmountpt.ncp; 847 if (mp->mnt_refs == 1 && 848 (ncp == NULL || (ncp->nc_refs == 1 && 849 TAILQ_FIRST(&ncp->nc_list) == NULL))) { 850 break; 851 } 852 853 /* 854 * If forcing the unmount, clean out any p->p_textnch 855 * nchandles that match this mount. 856 */ 857 if (flags & MNT_FORCE) 858 allproc_scan(&unmount_allproc_cb, mp, 0); 859 860 /* 861 * Sleep and retry. 862 */ 863 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 864 if ((retry & 15) == 15) { 865 mount_warning(mp, 866 "(%p) debug - retry %d, " 867 "%d namecache refs, %d mount refs", 868 mp, retry, 869 (ncp ? ncp->nc_refs - 1 : 0), 870 mp->mnt_refs - 1); 871 } 872 } 873 874 error = 0; 875 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 876 NULL : mp->mnt_ncmountpt.ncp; 877 if (mp->mnt_refs != 1 || 878 (ncp != NULL && (ncp->nc_refs != 1 || 879 TAILQ_FIRST(&ncp->nc_list)))) { 880 mount_warning(mp, 881 "(%p): %d namecache refs, %d mount refs " 882 "still present", 883 mp, 884 (ncp ? ncp->nc_refs - 1 : 0), 885 mp->mnt_refs - 1); 886 if (flags & MNT_FORCE) { 887 freeok = 0; 888 mount_warning(mp, "forcing unmount\n"); 889 } else { 890 error = EBUSY; 891 } 892 } 893 894 /* 895 * So far so good, sync the filesystem once more and 896 * call the VFS unmount code if the sync succeeds. 897 */ 898 if (error == 0 && quickhalt == 0) { 899 if (mp->mnt_flag & MNT_RDONLY) { 900 error = VFS_UNMOUNT(mp, flags); 901 } else { 902 error = VFS_SYNC(mp, MNT_WAIT); 903 if (error == 0 || /* no error */ 904 error == EOPNOTSUPP || /* no sync avail */ 905 (flags & MNT_FORCE)) { /* force anyway */ 906 error = VFS_UNMOUNT(mp, flags); 907 } 908 } 909 if (error) { 910 mount_warning(mp, 911 "(%p) unmount: vfs refused to unmount, " 912 "error %d", 913 mp, error); 914 } 915 } 916 917 /* 918 * If an error occurred we can still recover, restoring the 919 * syncer vnode and misc flags. 920 */ 921 if (error) { 922 if (mp->mnt_syncer == NULL && hadsyncer) 923 vfs_allocate_syncvnode(mp); 924 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 925 mp->mnt_flag |= async_flag; 926 lockmgr(&mp->mnt_lock, LK_RELEASE); 927 if (mp->mnt_kern_flag & MNTK_MWAIT) { 928 mp->mnt_kern_flag &= ~MNTK_MWAIT; 929 wakeup(mp); 930 } 931 goto out; 932 } 933 /* 934 * Clean up any journals still associated with the mount after 935 * filesystem activity has ceased. 936 */ 937 journal_remove_all_journals(mp, 938 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 939 940 mountlist_remove(mp); 941 942 /* 943 * Remove any installed vnode ops here so the individual VFSs don't 944 * have to. 945 * 946 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 947 * 948 * When quickhalting we have to keep these intact because the 949 * underlying vnodes have not been destroyed, and some might be 950 * dirty. 951 */ 952 if (quickhalt == 0) { 953 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 954 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 955 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 956 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 957 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 958 } 959 960 if (mp->mnt_ncmountpt.ncp != NULL) { 961 nch = mp->mnt_ncmountpt; 962 cache_zero(&mp->mnt_ncmountpt); 963 cache_clrmountpt(&nch); 964 cache_drop(&nch); 965 } 966 if (mp->mnt_ncmounton.ncp != NULL) { 967 cache_unmounting(mp); 968 nch = mp->mnt_ncmounton; 969 cache_zero(&mp->mnt_ncmounton); 970 cache_clrmountpt(&nch); 971 cache_drop(&nch); 972 } 973 974 if (mp->mnt_cred) { 975 crfree(mp->mnt_cred); 976 mp->mnt_cred = NULL; 977 } 978 979 mp->mnt_vfc->vfc_refcount--; 980 981 /* 982 * If not quickhalting the mount, we expect there to be no 983 * vnodes left. 984 */ 985 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 986 panic("unmount: dangling vnode"); 987 988 /* 989 * Release the lock 990 */ 991 lockmgr(&mp->mnt_lock, LK_RELEASE); 992 if (mp->mnt_kern_flag & MNTK_MWAIT) { 993 mp->mnt_kern_flag &= ~MNTK_MWAIT; 994 wakeup(mp); 995 } 996 997 /* 998 * If we reach here and freeok != 0 we must free the mount. 999 * mnt_refs should already have dropped to 0, so if it is not 1000 * zero we must cycle the caches and wait. 1001 * 1002 * When we are satisfied that the mount has disconnected we can 1003 * drop the hold on the mp that represented the mount (though the 1004 * caller might actually have another, so the caller's drop may 1005 * do the actual free). 1006 */ 1007 if (freeok) { 1008 if (mp->mnt_refs > 0) 1009 cache_clearmntcache(mp); 1010 while (mp->mnt_refs > 0) { 1011 cache_unmounting(mp); 1012 wakeup(mp); 1013 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 1014 cache_clearmntcache(mp); 1015 } 1016 lwkt_reltoken(&mp->mnt_token); 1017 mount_drop(mp); 1018 mp = NULL; 1019 } else { 1020 cache_clearmntcache(mp); 1021 } 1022 error = 0; 1023 KNOTE(&fs_klist, VQ_UNMOUNT); 1024 out: 1025 if (mp) 1026 lwkt_reltoken(&mp->mnt_token); 1027 return (error); 1028 } 1029 1030 static 1031 void 1032 mount_warning(struct mount *mp, const char *ctl, ...) 1033 { 1034 char *ptr; 1035 char *buf; 1036 __va_list va; 1037 1038 __va_start(va, ctl); 1039 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 1040 &ptr, &buf, 0) == 0) { 1041 kprintf("unmount(%s): ", ptr); 1042 kvprintf(ctl, va); 1043 kprintf("\n"); 1044 kfree(buf, M_TEMP); 1045 } else { 1046 kprintf("unmount(%p", mp); 1047 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 1048 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 1049 kprintf("): "); 1050 kvprintf(ctl, va); 1051 kprintf("\n"); 1052 } 1053 __va_end(va); 1054 } 1055 1056 /* 1057 * Shim cache_fullpath() to handle the case where a process is chrooted into 1058 * a subdirectory of a mount. In this case if the root mount matches the 1059 * process root directory's mount we have to specify the process's root 1060 * directory instead of the mount point, because the mount point might 1061 * be above the root directory. 1062 */ 1063 static 1064 int 1065 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 1066 { 1067 struct nchandle *nch; 1068 1069 if (p && p->p_fd->fd_nrdir.mount == mp) 1070 nch = &p->p_fd->fd_nrdir; 1071 else 1072 nch = &mp->mnt_ncmountpt; 1073 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1074 } 1075 1076 /* 1077 * Sync each mounted filesystem. 1078 */ 1079 1080 #ifdef DEBUG 1081 static int syncprt = 0; 1082 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1083 #endif /* DEBUG */ 1084 1085 static int sync_callback(struct mount *mp, void *data); 1086 1087 int 1088 sys_sync(struct sysmsg *sysmsg, const struct sync_args *uap) 1089 { 1090 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1091 return (0); 1092 } 1093 1094 static 1095 int 1096 sync_callback(struct mount *mp, void *data __unused) 1097 { 1098 int asyncflag; 1099 1100 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1101 lwkt_gettoken(&mp->mnt_token); 1102 asyncflag = mp->mnt_flag & MNT_ASYNC; 1103 mp->mnt_flag &= ~MNT_ASYNC; 1104 lwkt_reltoken(&mp->mnt_token); 1105 vfs_msync(mp, MNT_NOWAIT); 1106 VFS_SYNC(mp, MNT_NOWAIT); 1107 lwkt_gettoken(&mp->mnt_token); 1108 mp->mnt_flag |= asyncflag; 1109 lwkt_reltoken(&mp->mnt_token); 1110 } 1111 return(0); 1112 } 1113 1114 /* XXX PRISON: could be per prison flag */ 1115 static int prison_quotas; 1116 #if 0 1117 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1118 #endif 1119 1120 /* 1121 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1122 * 1123 * Change filesystem quotas. 1124 * 1125 * MPALMOSTSAFE 1126 */ 1127 int 1128 sys_quotactl(struct sysmsg *sysmsg, const struct quotactl_args *uap) 1129 { 1130 struct nlookupdata nd; 1131 struct thread *td; 1132 struct mount *mp; 1133 int error; 1134 1135 td = curthread; 1136 if (td->td_ucred->cr_prison && !prison_quotas) { 1137 error = EPERM; 1138 goto done; 1139 } 1140 1141 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1142 if (error == 0) 1143 error = nlookup(&nd); 1144 if (error == 0) { 1145 mp = nd.nl_nch.mount; 1146 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1147 uap->arg, nd.nl_cred); 1148 } 1149 nlookup_done(&nd); 1150 done: 1151 return (error); 1152 } 1153 1154 /* 1155 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1156 * void *buf, int buflen) 1157 * 1158 * This function operates on a mount point and executes the specified 1159 * operation using the specified control data, and possibly returns data. 1160 * 1161 * The actual number of bytes stored in the result buffer is returned, 0 1162 * if none, otherwise an error is returned. 1163 * 1164 * MPALMOSTSAFE 1165 */ 1166 int 1167 sys_mountctl(struct sysmsg *sysmsg, const struct mountctl_args *uap) 1168 { 1169 struct thread *td = curthread; 1170 struct file *fp; 1171 void *ctl = NULL; 1172 void *buf = NULL; 1173 char *path = NULL; 1174 int error; 1175 1176 /* 1177 * Sanity and permissions checks. We must be root. 1178 */ 1179 if (td->td_ucred->cr_prison != NULL) 1180 return (EPERM); 1181 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1182 (error = priv_check(td, PRIV_ROOT)) != 0) 1183 return (error); 1184 1185 /* 1186 * Argument length checks 1187 */ 1188 if (uap->ctllen < 0 || uap->ctllen > 1024) 1189 return (EINVAL); 1190 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1191 return (EINVAL); 1192 if (uap->path == NULL) 1193 return (EINVAL); 1194 1195 /* 1196 * Allocate the necessary buffers and copyin data 1197 */ 1198 path = objcache_get(namei_oc, M_WAITOK); 1199 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1200 if (error) 1201 goto done; 1202 1203 if (uap->ctllen) { 1204 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1205 error = copyin(uap->ctl, ctl, uap->ctllen); 1206 if (error) 1207 goto done; 1208 } 1209 if (uap->buflen) 1210 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1211 1212 /* 1213 * Validate the descriptor 1214 */ 1215 if (uap->fd >= 0) { 1216 fp = holdfp(td, uap->fd, -1); 1217 if (fp == NULL) { 1218 error = EBADF; 1219 goto done; 1220 } 1221 } else { 1222 fp = NULL; 1223 } 1224 1225 /* 1226 * Execute the internal kernel function and clean up. 1227 */ 1228 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1229 buf, uap->buflen, &sysmsg->sysmsg_result); 1230 if (fp) 1231 dropfp(td, uap->fd, fp); 1232 if (error == 0 && sysmsg->sysmsg_result > 0) 1233 error = copyout(buf, uap->buf, sysmsg->sysmsg_result); 1234 done: 1235 if (path) 1236 objcache_put(namei_oc, path); 1237 if (ctl) 1238 kfree(ctl, M_TEMP); 1239 if (buf) 1240 kfree(buf, M_TEMP); 1241 return (error); 1242 } 1243 1244 /* 1245 * Execute a mount control operation by resolving the path to a mount point 1246 * and calling vop_mountctl(). 1247 * 1248 * Use the mount point from the nch instead of the vnode so nullfs mounts 1249 * can properly spike the VOP. 1250 */ 1251 int 1252 kern_mountctl(const char *path, int op, struct file *fp, 1253 const void *ctl, int ctllen, 1254 void *buf, int buflen, int *res) 1255 { 1256 struct vnode *vp; 1257 struct nlookupdata nd; 1258 struct nchandle nch; 1259 struct mount *mp; 1260 int error; 1261 1262 *res = 0; 1263 vp = NULL; 1264 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1265 if (error) 1266 return (error); 1267 error = nlookup(&nd); 1268 if (error) { 1269 nlookup_done(&nd); 1270 return (error); 1271 } 1272 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1273 if (error) { 1274 nlookup_done(&nd); 1275 return (error); 1276 } 1277 1278 /* 1279 * Yes, all this is needed to use the nch.mount below, because 1280 * we must maintain a ref on the mount to avoid ripouts (e.g. 1281 * due to heavy mount/unmount use by synth or poudriere). 1282 */ 1283 nch = nd.nl_nch; 1284 cache_zero(&nd.nl_nch); 1285 cache_unlock(&nch); 1286 nlookup_done(&nd); 1287 vn_unlock(vp); 1288 1289 mp = nch.mount; 1290 1291 /* 1292 * Must be the root of the filesystem 1293 */ 1294 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1295 cache_drop(&nch); 1296 vrele(vp); 1297 return (EINVAL); 1298 } 1299 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1300 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1301 path); 1302 cache_drop(&nch); 1303 vrele(vp); 1304 return (EINVAL); 1305 } 1306 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1307 buf, buflen, res); 1308 vrele(vp); 1309 cache_drop(&nch); 1310 1311 return (error); 1312 } 1313 1314 int 1315 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1316 { 1317 struct thread *td = curthread; 1318 struct proc *p = td->td_proc; 1319 struct mount *mp; 1320 struct statfs *sp; 1321 char *fullpath, *freepath; 1322 int error; 1323 1324 if ((error = nlookup(nd)) != 0) 1325 return (error); 1326 mp = nd->nl_nch.mount; 1327 sp = &mp->mnt_stat; 1328 1329 /* 1330 * Ignore refresh error, user should have visibility. 1331 * This can happen if a NFS mount goes bad (e.g. server 1332 * revokes perms or goes down). 1333 */ 1334 error = VFS_STATFS(mp, sp, nd->nl_cred); 1335 /* ignore error */ 1336 1337 error = mount_path(p, mp, &fullpath, &freepath); 1338 if (error) 1339 return(error); 1340 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1341 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1342 kfree(freepath, M_TEMP); 1343 1344 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1345 bcopy(sp, buf, sizeof(*buf)); 1346 /* Only root should have access to the fsid's. */ 1347 if (priv_check(td, PRIV_ROOT)) 1348 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1349 return (0); 1350 } 1351 1352 /* 1353 * statfs_args(char *path, struct statfs *buf) 1354 * 1355 * Get filesystem statistics. 1356 */ 1357 int 1358 sys_statfs(struct sysmsg *sysmsg, const struct statfs_args *uap) 1359 { 1360 struct nlookupdata nd; 1361 struct statfs buf; 1362 int error; 1363 1364 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1365 if (error == 0) 1366 error = kern_statfs(&nd, &buf); 1367 nlookup_done(&nd); 1368 if (error == 0) 1369 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1370 return (error); 1371 } 1372 1373 int 1374 kern_fstatfs(int fd, struct statfs *buf) 1375 { 1376 struct thread *td = curthread; 1377 struct proc *p = td->td_proc; 1378 struct file *fp; 1379 struct mount *mp; 1380 struct statfs *sp; 1381 char *fullpath, *freepath; 1382 int error; 1383 1384 KKASSERT(p); 1385 if ((error = holdvnode(td, fd, &fp)) != 0) 1386 return (error); 1387 1388 /* 1389 * Try to use mount info from any overlays rather than the 1390 * mount info for the underlying vnode, otherwise we will 1391 * fail when operating on null-mounted paths inside a chroot. 1392 */ 1393 if ((mp = fp->f_nchandle.mount) == NULL) 1394 mp = ((struct vnode *)fp->f_data)->v_mount; 1395 if (mp == NULL) { 1396 error = EBADF; 1397 goto done; 1398 } 1399 if (fp->f_cred == NULL) { 1400 error = EINVAL; 1401 goto done; 1402 } 1403 1404 /* 1405 * Ignore refresh error, user should have visibility. 1406 * This can happen if a NFS mount goes bad (e.g. server 1407 * revokes perms or goes down). 1408 */ 1409 sp = &mp->mnt_stat; 1410 error = VFS_STATFS(mp, sp, fp->f_cred); 1411 1412 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1413 goto done; 1414 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1415 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1416 kfree(freepath, M_TEMP); 1417 1418 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1419 bcopy(sp, buf, sizeof(*buf)); 1420 1421 /* Only root should have access to the fsid's. */ 1422 if (priv_check(td, PRIV_ROOT)) 1423 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1424 error = 0; 1425 done: 1426 fdrop(fp); 1427 return (error); 1428 } 1429 1430 /* 1431 * fstatfs_args(int fd, struct statfs *buf) 1432 * 1433 * Get filesystem statistics. 1434 */ 1435 int 1436 sys_fstatfs(struct sysmsg *sysmsg, const struct fstatfs_args *uap) 1437 { 1438 struct statfs buf; 1439 int error; 1440 1441 error = kern_fstatfs(uap->fd, &buf); 1442 1443 if (error == 0) 1444 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1445 return (error); 1446 } 1447 1448 int 1449 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1450 { 1451 struct mount *mp; 1452 struct statvfs *sp; 1453 int error; 1454 1455 if ((error = nlookup(nd)) != 0) 1456 return (error); 1457 mp = nd->nl_nch.mount; 1458 sp = &mp->mnt_vstat; 1459 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1460 return (error); 1461 1462 sp->f_flag = 0; 1463 if (mp->mnt_flag & MNT_RDONLY) 1464 sp->f_flag |= ST_RDONLY; 1465 if (mp->mnt_flag & MNT_NOSUID) 1466 sp->f_flag |= ST_NOSUID; 1467 bcopy(sp, buf, sizeof(*buf)); 1468 return (0); 1469 } 1470 1471 /* 1472 * statfs_args(char *path, struct statfs *buf) 1473 * 1474 * Get filesystem statistics. 1475 */ 1476 int 1477 sys_statvfs(struct sysmsg *sysmsg, const struct statvfs_args *uap) 1478 { 1479 struct nlookupdata nd; 1480 struct statvfs buf; 1481 int error; 1482 1483 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1484 if (error == 0) 1485 error = kern_statvfs(&nd, &buf); 1486 nlookup_done(&nd); 1487 if (error == 0) 1488 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1489 return (error); 1490 } 1491 1492 int 1493 kern_fstatvfs(int fd, struct statvfs *buf) 1494 { 1495 struct thread *td = curthread; 1496 struct file *fp; 1497 struct mount *mp; 1498 struct statvfs *sp; 1499 int error; 1500 1501 if ((error = holdvnode(td, fd, &fp)) != 0) 1502 return (error); 1503 if ((mp = fp->f_nchandle.mount) == NULL) 1504 mp = ((struct vnode *)fp->f_data)->v_mount; 1505 if (mp == NULL) { 1506 error = EBADF; 1507 goto done; 1508 } 1509 if (fp->f_cred == NULL) { 1510 error = EINVAL; 1511 goto done; 1512 } 1513 sp = &mp->mnt_vstat; 1514 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1515 goto done; 1516 1517 sp->f_flag = 0; 1518 if (mp->mnt_flag & MNT_RDONLY) 1519 sp->f_flag |= ST_RDONLY; 1520 if (mp->mnt_flag & MNT_NOSUID) 1521 sp->f_flag |= ST_NOSUID; 1522 1523 bcopy(sp, buf, sizeof(*buf)); 1524 error = 0; 1525 done: 1526 fdrop(fp); 1527 return (error); 1528 } 1529 1530 /* 1531 * fstatfs_args(int fd, struct statfs *buf) 1532 * 1533 * Get filesystem statistics. 1534 */ 1535 int 1536 sys_fstatvfs(struct sysmsg *sysmsg, const struct fstatvfs_args *uap) 1537 { 1538 struct statvfs buf; 1539 int error; 1540 1541 error = kern_fstatvfs(uap->fd, &buf); 1542 1543 if (error == 0) 1544 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1545 return (error); 1546 } 1547 1548 /* 1549 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1550 * 1551 * Get statistics on all filesystems. 1552 */ 1553 1554 struct getfsstat_info { 1555 struct statfs *sfsp; 1556 long count; 1557 long maxcount; 1558 int error; 1559 int flags; 1560 struct thread *td; 1561 }; 1562 1563 static int getfsstat_callback(struct mount *, void *); 1564 1565 int 1566 sys_getfsstat(struct sysmsg *sysmsg, const struct getfsstat_args *uap) 1567 { 1568 struct thread *td = curthread; 1569 struct getfsstat_info info; 1570 1571 bzero(&info, sizeof(info)); 1572 1573 info.maxcount = uap->bufsize / sizeof(struct statfs); 1574 info.sfsp = uap->buf; 1575 info.count = 0; 1576 info.flags = uap->flags; 1577 info.td = td; 1578 1579 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1580 if (info.sfsp && info.count > info.maxcount) 1581 sysmsg->sysmsg_result = info.maxcount; 1582 else 1583 sysmsg->sysmsg_result = info.count; 1584 return (info.error); 1585 } 1586 1587 static int 1588 getfsstat_callback(struct mount *mp, void *data) 1589 { 1590 struct getfsstat_info *info = data; 1591 struct statfs *sp; 1592 char *freepath; 1593 char *fullpath; 1594 int error; 1595 1596 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1597 return(0); 1598 1599 if (info->sfsp && info->count < info->maxcount) { 1600 sp = &mp->mnt_stat; 1601 1602 /* 1603 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1604 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1605 * overrides MNT_WAIT. 1606 * 1607 * Ignore refresh error, user should have visibility. 1608 * This can happen if a NFS mount goes bad (e.g. server 1609 * revokes perms or goes down). 1610 */ 1611 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1612 (info->flags & MNT_WAIT)) && 1613 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1614 /* ignore error */ 1615 } 1616 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1617 1618 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1619 if (error) { 1620 info->error = error; 1621 return(-1); 1622 } 1623 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1624 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1625 kfree(freepath, M_TEMP); 1626 1627 error = copyout(sp, info->sfsp, sizeof(*sp)); 1628 if (error) { 1629 info->error = error; 1630 return (-1); 1631 } 1632 ++info->sfsp; 1633 } 1634 info->count++; 1635 return(0); 1636 } 1637 1638 /* 1639 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1640 long bufsize, int flags) 1641 * 1642 * Get statistics on all filesystems. 1643 */ 1644 1645 struct getvfsstat_info { 1646 struct statfs *sfsp; 1647 struct statvfs *vsfsp; 1648 long count; 1649 long maxcount; 1650 int error; 1651 int flags; 1652 struct thread *td; 1653 }; 1654 1655 static int getvfsstat_callback(struct mount *, void *); 1656 1657 int 1658 sys_getvfsstat(struct sysmsg *sysmsg, const struct getvfsstat_args *uap) 1659 { 1660 struct thread *td = curthread; 1661 struct getvfsstat_info info; 1662 1663 bzero(&info, sizeof(info)); 1664 1665 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1666 info.sfsp = uap->buf; 1667 info.vsfsp = uap->vbuf; 1668 info.count = 0; 1669 info.flags = uap->flags; 1670 info.td = td; 1671 1672 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1673 if (info.vsfsp && info.count > info.maxcount) 1674 sysmsg->sysmsg_result = info.maxcount; 1675 else 1676 sysmsg->sysmsg_result = info.count; 1677 return (info.error); 1678 } 1679 1680 static int 1681 getvfsstat_callback(struct mount *mp, void *data) 1682 { 1683 struct getvfsstat_info *info = data; 1684 struct statfs *sp; 1685 struct statvfs *vsp; 1686 char *freepath; 1687 char *fullpath; 1688 int error; 1689 1690 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1691 return(0); 1692 1693 if (info->vsfsp && info->count < info->maxcount) { 1694 sp = &mp->mnt_stat; 1695 vsp = &mp->mnt_vstat; 1696 1697 /* 1698 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1699 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1700 * overrides MNT_WAIT. 1701 * 1702 * Ignore refresh error, user should have visibility. 1703 * This can happen if a NFS mount goes bad (e.g. server 1704 * revokes perms or goes down). 1705 */ 1706 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1707 (info->flags & MNT_WAIT)) && 1708 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1709 /* ignore error */ 1710 } 1711 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1712 1713 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1714 (info->flags & MNT_WAIT)) && 1715 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1716 /* ignore error */ 1717 } 1718 vsp->f_flag = 0; 1719 if (mp->mnt_flag & MNT_RDONLY) 1720 vsp->f_flag |= ST_RDONLY; 1721 if (mp->mnt_flag & MNT_NOSUID) 1722 vsp->f_flag |= ST_NOSUID; 1723 1724 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1725 if (error) { 1726 info->error = error; 1727 return(-1); 1728 } 1729 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1730 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1731 kfree(freepath, M_TEMP); 1732 1733 error = copyout(sp, info->sfsp, sizeof(*sp)); 1734 if (error == 0) 1735 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1736 if (error) { 1737 info->error = error; 1738 return (-1); 1739 } 1740 ++info->sfsp; 1741 ++info->vsfsp; 1742 } 1743 info->count++; 1744 return(0); 1745 } 1746 1747 1748 /* 1749 * fchdir_args(int fd) 1750 * 1751 * Change current working directory to a given file descriptor. 1752 */ 1753 int 1754 sys_fchdir(struct sysmsg *sysmsg, const struct fchdir_args *uap) 1755 { 1756 struct thread *td = curthread; 1757 struct proc *p = td->td_proc; 1758 struct filedesc *fdp = p->p_fd; 1759 struct vnode *vp, *ovp; 1760 struct mount *mp; 1761 struct file *fp; 1762 struct nchandle nch, onch, tnch; 1763 int error; 1764 1765 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1766 return (error); 1767 lwkt_gettoken(&p->p_token); 1768 vp = (struct vnode *)fp->f_data; 1769 vref(vp); 1770 vn_lock(vp, LK_SHARED | LK_RETRY); 1771 if (fp->f_nchandle.ncp == NULL) 1772 error = ENOTDIR; 1773 else 1774 error = checkvp_chdir(vp, td); 1775 if (error) { 1776 vput(vp); 1777 goto done; 1778 } 1779 cache_copy(&fp->f_nchandle, &nch); 1780 1781 /* 1782 * If the ncp has become a mount point, traverse through 1783 * the mount point. 1784 */ 1785 1786 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1787 (mp = cache_findmount(&nch)) != NULL 1788 ) { 1789 error = nlookup_mp(mp, &tnch); 1790 if (error == 0) { 1791 cache_unlock(&tnch); /* leave ref intact */ 1792 vput(vp); 1793 vp = tnch.ncp->nc_vp; 1794 error = vget(vp, LK_SHARED); 1795 KKASSERT(error == 0); 1796 cache_drop(&nch); 1797 nch = tnch; 1798 } 1799 cache_dropmount(mp); 1800 } 1801 if (error == 0) { 1802 spin_lock(&fdp->fd_spin); 1803 ovp = fdp->fd_cdir; 1804 onch = fdp->fd_ncdir; 1805 fdp->fd_cdir = vp; 1806 fdp->fd_ncdir = nch; 1807 spin_unlock(&fdp->fd_spin); 1808 vn_unlock(vp); /* leave ref intact */ 1809 cache_drop(&onch); 1810 vrele(ovp); 1811 } else { 1812 cache_drop(&nch); 1813 vput(vp); 1814 } 1815 fdrop(fp); 1816 done: 1817 lwkt_reltoken(&p->p_token); 1818 return (error); 1819 } 1820 1821 int 1822 kern_chdir(struct nlookupdata *nd) 1823 { 1824 struct thread *td = curthread; 1825 struct proc *p = td->td_proc; 1826 struct filedesc *fdp = p->p_fd; 1827 struct vnode *vp, *ovp; 1828 struct nchandle onch; 1829 int error; 1830 1831 nd->nl_flags |= NLC_SHAREDLOCK; 1832 if ((error = nlookup(nd)) != 0) 1833 return (error); 1834 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1835 return (ENOENT); 1836 if ((error = vget(vp, LK_SHARED)) != 0) 1837 return (error); 1838 1839 lwkt_gettoken(&p->p_token); 1840 error = checkvp_chdir(vp, td); 1841 vn_unlock(vp); 1842 if (error == 0) { 1843 spin_lock(&fdp->fd_spin); 1844 ovp = fdp->fd_cdir; 1845 onch = fdp->fd_ncdir; 1846 fdp->fd_ncdir = nd->nl_nch; 1847 fdp->fd_cdir = vp; 1848 spin_unlock(&fdp->fd_spin); 1849 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1850 cache_drop(&onch); 1851 vrele(ovp); 1852 cache_zero(&nd->nl_nch); 1853 } else { 1854 vrele(vp); 1855 } 1856 lwkt_reltoken(&p->p_token); 1857 return (error); 1858 } 1859 1860 /* 1861 * chdir_args(char *path) 1862 * 1863 * Change current working directory (``.''). 1864 */ 1865 int 1866 sys_chdir(struct sysmsg *sysmsg, const struct chdir_args *uap) 1867 { 1868 struct nlookupdata nd; 1869 int error; 1870 1871 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1872 if (error == 0) 1873 error = kern_chdir(&nd); 1874 nlookup_done(&nd); 1875 return (error); 1876 } 1877 1878 /* 1879 * Helper function for raised chroot(2) security function: Refuse if 1880 * any filedescriptors are open directories. 1881 */ 1882 static int 1883 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1884 { 1885 struct vnode *vp; 1886 struct file *fp; 1887 int error; 1888 int fd; 1889 1890 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1891 if ((error = holdvnode(td, fd, &fp)) != 0) 1892 continue; 1893 vp = (struct vnode *)fp->f_data; 1894 if (vp->v_type != VDIR) { 1895 fdrop(fp); 1896 continue; 1897 } 1898 fdrop(fp); 1899 return(EPERM); 1900 } 1901 return (0); 1902 } 1903 1904 /* 1905 * This sysctl determines if we will allow a process to chroot(2) if it 1906 * has a directory open: 1907 * 0: disallowed for all processes. 1908 * 1: allowed for processes that were not already chroot(2)'ed. 1909 * 2: allowed for all processes. 1910 */ 1911 1912 static int chroot_allow_open_directories = 1; 1913 1914 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1915 &chroot_allow_open_directories, 0, ""); 1916 1917 /* 1918 * chroot to the specified namecache entry. We obtain the vp from the 1919 * namecache data. The passed ncp must be locked and referenced and will 1920 * remain locked and referenced on return. 1921 */ 1922 int 1923 kern_chroot(struct nchandle *nch) 1924 { 1925 struct thread *td = curthread; 1926 struct proc *p = td->td_proc; 1927 struct filedesc *fdp = p->p_fd; 1928 struct vnode *vp; 1929 int error; 1930 1931 /* 1932 * Only privileged user can chroot 1933 */ 1934 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1935 if (error) 1936 return (error); 1937 1938 /* 1939 * Disallow open directory descriptors (fchdir() breakouts). 1940 */ 1941 if (chroot_allow_open_directories == 0 || 1942 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1943 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 1944 return (error); 1945 } 1946 if ((vp = nch->ncp->nc_vp) == NULL) 1947 return (ENOENT); 1948 1949 if ((error = vget(vp, LK_SHARED)) != 0) 1950 return (error); 1951 1952 /* 1953 * Check the validity of vp as a directory to change to and 1954 * associate it with rdir/jdir. 1955 */ 1956 error = checkvp_chdir(vp, td); 1957 vn_unlock(vp); /* leave reference intact */ 1958 if (error == 0) { 1959 lwkt_gettoken(&p->p_token); 1960 vrele(fdp->fd_rdir); 1961 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1962 cache_drop(&fdp->fd_nrdir); 1963 cache_copy(nch, &fdp->fd_nrdir); 1964 if (fdp->fd_jdir == NULL) { 1965 fdp->fd_jdir = vp; 1966 vref(fdp->fd_jdir); 1967 cache_copy(nch, &fdp->fd_njdir); 1968 } 1969 if ((p->p_flags & P_DIDCHROOT) == 0) { 1970 p->p_flags |= P_DIDCHROOT; 1971 if (p->p_depth <= 65535 - 32) 1972 p->p_depth += 32; 1973 } 1974 lwkt_reltoken(&p->p_token); 1975 } else { 1976 vrele(vp); 1977 } 1978 return (error); 1979 } 1980 1981 /* 1982 * chroot_args(char *path) 1983 * 1984 * Change notion of root (``/'') directory. 1985 */ 1986 int 1987 sys_chroot(struct sysmsg *sysmsg, const struct chroot_args *uap) 1988 { 1989 struct thread *td __debugvar = curthread; 1990 struct nlookupdata nd; 1991 int error; 1992 1993 KKASSERT(td->td_proc); 1994 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1995 if (error == 0) { 1996 nd.nl_flags |= NLC_EXEC; 1997 error = nlookup(&nd); 1998 if (error == 0) 1999 error = kern_chroot(&nd.nl_nch); 2000 } 2001 nlookup_done(&nd); 2002 return(error); 2003 } 2004 2005 int 2006 sys_chroot_kernel(struct sysmsg *sysmsg, const struct chroot_kernel_args *uap) 2007 { 2008 struct thread *td = curthread; 2009 struct nlookupdata nd; 2010 struct nchandle *nch; 2011 struct vnode *vp; 2012 int error; 2013 2014 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2015 if (error) 2016 goto error_nond; 2017 2018 error = nlookup(&nd); 2019 if (error) 2020 goto error_out; 2021 2022 nch = &nd.nl_nch; 2023 2024 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 2025 if (error) 2026 goto error_out; 2027 2028 if ((vp = nch->ncp->nc_vp) == NULL) { 2029 error = ENOENT; 2030 goto error_out; 2031 } 2032 2033 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 2034 goto error_out; 2035 2036 vfs_cache_setroot(vp, cache_hold(nch)); 2037 2038 error_out: 2039 nlookup_done(&nd); 2040 error_nond: 2041 return(error); 2042 } 2043 2044 /* 2045 * Common routine for chroot and chdir. Given a locked, referenced vnode, 2046 * determine whether it is legal to chdir to the vnode. The vnode's state 2047 * is not changed by this call. 2048 */ 2049 static int 2050 checkvp_chdir(struct vnode *vp, struct thread *td) 2051 { 2052 int error; 2053 2054 if (vp->v_type != VDIR) 2055 error = ENOTDIR; 2056 else 2057 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 2058 return (error); 2059 } 2060 2061 int 2062 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 2063 { 2064 struct thread *td = curthread; 2065 struct proc *p = td->td_proc; 2066 struct lwp *lp = td->td_lwp; 2067 struct filedesc *fdp = p->p_fd; 2068 int cmode, flags; 2069 struct file *nfp; 2070 struct file *fp; 2071 struct vnode *vp; 2072 int type, indx, error = 0; 2073 struct flock lf; 2074 2075 if ((oflags & O_ACCMODE) == O_ACCMODE) 2076 return (EINVAL); 2077 flags = FFLAGS(oflags); 2078 error = falloc(lp, &nfp, NULL); 2079 if (error) 2080 return (error); 2081 fp = nfp; 2082 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2083 2084 /* 2085 * XXX p_dupfd is a real mess. It allows a device to return a 2086 * file descriptor to be duplicated rather then doing the open 2087 * itself. 2088 */ 2089 lp->lwp_dupfd = -1; 2090 2091 /* 2092 * Call vn_open() to do the lookup and assign the vnode to the 2093 * file pointer. vn_open() does not change the ref count on fp 2094 * and the vnode, on success, will be inherited by the file pointer 2095 * and unlocked. 2096 * 2097 * Request a shared lock on the vnode if possible. 2098 * 2099 * When NLC_SHAREDLOCK is set we may still need an exclusive vnode 2100 * lock for O_RDWR opens on executables in order to avoid a VTEXT 2101 * detection race. The NLC_EXCLLOCK_IFEXEC handles this case. 2102 * 2103 * NOTE: We need a flag to separate terminal vnode locking from 2104 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2105 * and O_RDWR only need to lock the terminal vnode exclusively. 2106 */ 2107 nd->nl_flags |= NLC_LOCKVP; 2108 if ((flags & (O_CREAT|O_TRUNC)) == 0) { 2109 nd->nl_flags |= NLC_SHAREDLOCK; 2110 if (flags & O_RDWR) 2111 nd->nl_flags |= NLC_EXCLLOCK_IFEXEC; 2112 } 2113 2114 error = vn_open(nd, fp, flags, cmode); 2115 nlookup_done(nd); 2116 2117 if (error) { 2118 /* 2119 * handle special fdopen() case. bleh. dupfdopen() is 2120 * responsible for dropping the old contents of ofiles[indx] 2121 * if it succeeds. 2122 * 2123 * Note that fsetfd() will add a ref to fp which represents 2124 * the fd_files[] assignment. We must still drop our 2125 * reference. 2126 */ 2127 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 2128 if (fdalloc(p, 0, &indx) == 0) { 2129 error = dupfdopen(td, indx, lp->lwp_dupfd, flags, error); 2130 if (error == 0) { 2131 *res = indx; 2132 fdrop(fp); /* our ref */ 2133 return (0); 2134 } 2135 fsetfd(fdp, NULL, indx); 2136 } 2137 } 2138 fdrop(fp); /* our ref */ 2139 if (error == ERESTART) 2140 error = EINTR; 2141 return (error); 2142 } 2143 2144 /* 2145 * ref the vnode for ourselves so it can't be ripped out from under 2146 * is. XXX need an ND flag to request that the vnode be returned 2147 * anyway. 2148 * 2149 * Reserve a file descriptor but do not assign it until the open 2150 * succeeds. 2151 */ 2152 vp = (struct vnode *)fp->f_data; 2153 vref(vp); 2154 if ((error = fdalloc(p, 0, &indx)) != 0) { 2155 fdrop(fp); 2156 vrele(vp); 2157 return (error); 2158 } 2159 2160 /* 2161 * If no error occurs the vp will have been assigned to the file 2162 * pointer. 2163 */ 2164 lp->lwp_dupfd = 0; 2165 2166 if (flags & (O_EXLOCK | O_SHLOCK)) { 2167 lf.l_whence = SEEK_SET; 2168 lf.l_start = 0; 2169 lf.l_len = 0; 2170 if (flags & O_EXLOCK) 2171 lf.l_type = F_WRLCK; 2172 else 2173 lf.l_type = F_RDLCK; 2174 if (flags & FNONBLOCK) 2175 type = 0; 2176 else 2177 type = F_WAIT; 2178 2179 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2180 /* 2181 * lock request failed. Clean up the reserved 2182 * descriptor. 2183 */ 2184 vrele(vp); 2185 fsetfd(fdp, NULL, indx); 2186 fdrop(fp); 2187 return (error); 2188 } 2189 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2190 } 2191 #if 0 2192 /* 2193 * Assert that all regular file vnodes were created with a object. 2194 */ 2195 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2196 ("open: regular file has no backing object after vn_open")); 2197 #endif 2198 2199 vrele(vp); 2200 2201 /* 2202 * release our private reference, leaving the one associated with the 2203 * descriptor table intact. 2204 */ 2205 if (oflags & O_CLOEXEC) 2206 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2207 fsetfd(fdp, fp, indx); 2208 fdrop(fp); 2209 *res = indx; 2210 2211 return (error); 2212 } 2213 2214 /* 2215 * open_args(char *path, int flags, int mode) 2216 * 2217 * Check permissions, allocate an open file structure, 2218 * and call the device open routine if any. 2219 */ 2220 int 2221 sys_open(struct sysmsg *sysmsg, const struct open_args *uap) 2222 { 2223 struct nlookupdata nd; 2224 int error; 2225 2226 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2227 if (error == 0) { 2228 error = kern_open(&nd, uap->flags, 2229 uap->mode, &sysmsg->sysmsg_result); 2230 } 2231 nlookup_done(&nd); 2232 return (error); 2233 } 2234 2235 /* 2236 * openat_args(int fd, char *path, int flags, int mode) 2237 */ 2238 int 2239 sys_openat(struct sysmsg *sysmsg, const struct openat_args *uap) 2240 { 2241 struct nlookupdata nd; 2242 int error; 2243 struct file *fp; 2244 2245 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2246 if (error == 0) { 2247 error = kern_open(&nd, uap->flags, uap->mode, 2248 &sysmsg->sysmsg_result); 2249 } 2250 nlookup_done_at(&nd, fp); 2251 return (error); 2252 } 2253 2254 int 2255 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2256 { 2257 struct thread *td = curthread; 2258 struct proc *p = td->td_proc; 2259 struct vnode *vp; 2260 struct vattr vattr; 2261 int error; 2262 int whiteout = 0; 2263 2264 KKASSERT(p); 2265 2266 VATTR_NULL(&vattr); 2267 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2268 vattr.va_rmajor = rmajor; 2269 vattr.va_rminor = rminor; 2270 2271 switch (mode & S_IFMT) { 2272 case S_IFMT: /* used by badsect to flag bad sectors */ 2273 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2274 vattr.va_type = VBAD; 2275 break; 2276 case S_IFCHR: 2277 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2278 vattr.va_type = VCHR; 2279 break; 2280 case S_IFBLK: 2281 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2282 vattr.va_type = VBLK; 2283 break; 2284 case S_IFWHT: 2285 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2286 whiteout = 1; 2287 break; 2288 case S_IFDIR: /* special directories support for HAMMER */ 2289 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2290 vattr.va_type = VDIR; 2291 break; 2292 default: 2293 error = EINVAL; 2294 break; 2295 } 2296 2297 if (error) 2298 return (error); 2299 2300 bwillinode(1); 2301 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2302 if ((error = nlookup(nd)) != 0) 2303 return (error); 2304 if (nd->nl_nch.ncp->nc_vp) 2305 return (EEXIST); 2306 if (nd->nl_dvp == NULL) 2307 return (EINVAL); 2308 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2309 return (error); 2310 2311 if (whiteout) { 2312 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2313 nd->nl_cred, NAMEI_CREATE); 2314 } else { 2315 vp = NULL; 2316 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2317 &vp, nd->nl_cred, &vattr); 2318 if (error == 0) 2319 vput(vp); 2320 } 2321 return (error); 2322 } 2323 2324 /* 2325 * mknod_args(char *path, int mode, int dev) 2326 * 2327 * Create a special file. 2328 */ 2329 int 2330 sys_mknod(struct sysmsg *sysmsg, const struct mknod_args *uap) 2331 { 2332 struct nlookupdata nd; 2333 int error; 2334 2335 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2336 if (error == 0) { 2337 error = kern_mknod(&nd, uap->mode, 2338 umajor(uap->dev), uminor(uap->dev)); 2339 } 2340 nlookup_done(&nd); 2341 return (error); 2342 } 2343 2344 /* 2345 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2346 * 2347 * Create a special file. The path is relative to the directory associated 2348 * with fd. 2349 */ 2350 int 2351 sys_mknodat(struct sysmsg *sysmsg, const struct mknodat_args *uap) 2352 { 2353 struct nlookupdata nd; 2354 struct file *fp; 2355 int error; 2356 2357 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2358 if (error == 0) { 2359 error = kern_mknod(&nd, uap->mode, 2360 umajor(uap->dev), uminor(uap->dev)); 2361 } 2362 nlookup_done_at(&nd, fp); 2363 return (error); 2364 } 2365 2366 int 2367 kern_mkfifo(struct nlookupdata *nd, int mode) 2368 { 2369 struct thread *td = curthread; 2370 struct proc *p = td->td_proc; 2371 struct vattr vattr; 2372 struct vnode *vp; 2373 int error; 2374 2375 bwillinode(1); 2376 2377 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2378 if ((error = nlookup(nd)) != 0) 2379 return (error); 2380 if (nd->nl_nch.ncp->nc_vp) 2381 return (EEXIST); 2382 if (nd->nl_dvp == NULL) 2383 return (EINVAL); 2384 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2385 return (error); 2386 2387 VATTR_NULL(&vattr); 2388 vattr.va_type = VFIFO; 2389 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2390 vp = NULL; 2391 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2392 if (error == 0) 2393 vput(vp); 2394 return (error); 2395 } 2396 2397 /* 2398 * mkfifo_args(char *path, int mode) 2399 * 2400 * Create a named pipe. 2401 */ 2402 int 2403 sys_mkfifo(struct sysmsg *sysmsg, const struct mkfifo_args *uap) 2404 { 2405 struct nlookupdata nd; 2406 int error; 2407 2408 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2409 if (error == 0) 2410 error = kern_mkfifo(&nd, uap->mode); 2411 nlookup_done(&nd); 2412 return (error); 2413 } 2414 2415 /* 2416 * mkfifoat_args(int fd, char *path, mode_t mode) 2417 * 2418 * Create a named pipe. The path is relative to the directory associated 2419 * with fd. 2420 */ 2421 int 2422 sys_mkfifoat(struct sysmsg *sysmsg, const struct mkfifoat_args *uap) 2423 { 2424 struct nlookupdata nd; 2425 struct file *fp; 2426 int error; 2427 2428 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2429 if (error == 0) 2430 error = kern_mkfifo(&nd, uap->mode); 2431 nlookup_done_at(&nd, fp); 2432 return (error); 2433 } 2434 2435 static int hardlink_check_uid = 0; 2436 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2437 &hardlink_check_uid, 0, 2438 "Unprivileged processes cannot create hard links to files owned by other " 2439 "users"); 2440 static int hardlink_check_gid = 0; 2441 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2442 &hardlink_check_gid, 0, 2443 "Unprivileged processes cannot create hard links to files owned by other " 2444 "groups"); 2445 2446 static int 2447 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2448 { 2449 struct vattr va; 2450 int error; 2451 2452 /* 2453 * Shortcut if disabled 2454 */ 2455 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2456 return (0); 2457 2458 /* 2459 * Privileged user can always hardlink 2460 */ 2461 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2462 return (0); 2463 2464 /* 2465 * Otherwise only if the originating file is owned by the 2466 * same user or group. Note that any group is allowed if 2467 * the file is owned by the caller. 2468 */ 2469 error = VOP_GETATTR(vp, &va); 2470 if (error != 0) 2471 return (error); 2472 2473 if (hardlink_check_uid) { 2474 if (cred->cr_uid != va.va_uid) 2475 return (EPERM); 2476 } 2477 2478 if (hardlink_check_gid) { 2479 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2480 return (EPERM); 2481 } 2482 2483 return (0); 2484 } 2485 2486 int 2487 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2488 { 2489 struct thread *td = curthread; 2490 struct vnode *vp; 2491 int error; 2492 2493 /* 2494 * Lookup the source and obtained a locked vnode. 2495 * 2496 * You may only hardlink a file which you have write permission 2497 * on or which you own. 2498 * 2499 * XXX relookup on vget failure / race ? 2500 */ 2501 bwillinode(1); 2502 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2503 if ((error = nlookup(nd)) != 0) 2504 return (error); 2505 vp = nd->nl_nch.ncp->nc_vp; 2506 KKASSERT(vp != NULL); 2507 if (vp->v_type == VDIR) 2508 return (EPERM); /* POSIX */ 2509 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2510 return (error); 2511 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2512 return (error); 2513 2514 /* 2515 * Unlock the source so we can lookup the target without deadlocking 2516 * (XXX vp is locked already, possible other deadlock?). The target 2517 * must not exist. 2518 */ 2519 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2520 nd->nl_flags &= ~NLC_NCPISLOCKED; 2521 cache_unlock(&nd->nl_nch); 2522 vn_unlock(vp); 2523 2524 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2525 if ((error = nlookup(linknd)) != 0) { 2526 vrele(vp); 2527 return (error); 2528 } 2529 if (linknd->nl_nch.ncp->nc_vp) { 2530 vrele(vp); 2531 return (EEXIST); 2532 } 2533 if (linknd->nl_dvp == NULL) { 2534 vrele(vp); 2535 return (EINVAL); 2536 } 2537 VFS_MODIFYING(vp->v_mount); 2538 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2539 if (error) { 2540 vrele(vp); 2541 return (error); 2542 } 2543 2544 /* 2545 * Finally run the new API VOP. 2546 */ 2547 error = can_hardlink(vp, td, td->td_ucred); 2548 if (error == 0) { 2549 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2550 vp, linknd->nl_cred); 2551 } 2552 vput(vp); 2553 return (error); 2554 } 2555 2556 /* 2557 * link_args(char *path, char *link) 2558 * 2559 * Make a hard file link. 2560 */ 2561 int 2562 sys_link(struct sysmsg *sysmsg, const struct link_args *uap) 2563 { 2564 struct nlookupdata nd, linknd; 2565 int error; 2566 2567 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2568 if (error == 0) { 2569 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2570 if (error == 0) 2571 error = kern_link(&nd, &linknd); 2572 nlookup_done(&linknd); 2573 } 2574 nlookup_done(&nd); 2575 return (error); 2576 } 2577 2578 /* 2579 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2580 * 2581 * Make a hard file link. The path1 argument is relative to the directory 2582 * associated with fd1, and similarly the path2 argument is relative to 2583 * the directory associated with fd2. 2584 */ 2585 int 2586 sys_linkat(struct sysmsg *sysmsg, const struct linkat_args *uap) 2587 { 2588 struct nlookupdata nd, linknd; 2589 struct file *fp1, *fp2; 2590 int error; 2591 2592 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2593 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2594 if (error == 0) { 2595 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2596 uap->path2, UIO_USERSPACE, 0); 2597 if (error == 0) 2598 error = kern_link(&nd, &linknd); 2599 nlookup_done_at(&linknd, fp2); 2600 } 2601 nlookup_done_at(&nd, fp1); 2602 return (error); 2603 } 2604 2605 int 2606 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2607 { 2608 struct vattr vattr; 2609 struct vnode *vp; 2610 struct vnode *dvp; 2611 int error; 2612 2613 bwillinode(1); 2614 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2615 if ((error = nlookup(nd)) != 0) 2616 return (error); 2617 if (nd->nl_nch.ncp->nc_vp) 2618 return (EEXIST); 2619 if (nd->nl_dvp == NULL) 2620 return (EINVAL); 2621 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2622 return (error); 2623 dvp = nd->nl_dvp; 2624 VATTR_NULL(&vattr); 2625 vattr.va_mode = mode; 2626 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2627 if (error == 0) 2628 vput(vp); 2629 return (error); 2630 } 2631 2632 /* 2633 * symlink(char *path, char *link) 2634 * 2635 * Make a symbolic link. 2636 */ 2637 int 2638 sys_symlink(struct sysmsg *sysmsg, const struct symlink_args *uap) 2639 { 2640 struct thread *td = curthread; 2641 struct nlookupdata nd; 2642 char *path; 2643 int error; 2644 int mode; 2645 2646 path = objcache_get(namei_oc, M_WAITOK); 2647 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2648 if (error == 0) { 2649 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2650 if (error == 0) { 2651 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2652 error = kern_symlink(&nd, path, mode); 2653 } 2654 nlookup_done(&nd); 2655 } 2656 objcache_put(namei_oc, path); 2657 return (error); 2658 } 2659 2660 /* 2661 * symlinkat_args(char *path1, int fd, char *path2) 2662 * 2663 * Make a symbolic link. The path2 argument is relative to the directory 2664 * associated with fd. 2665 */ 2666 int 2667 sys_symlinkat(struct sysmsg *sysmsg, const struct symlinkat_args *uap) 2668 { 2669 struct thread *td = curthread; 2670 struct nlookupdata nd; 2671 struct file *fp; 2672 char *path1; 2673 int error; 2674 int mode; 2675 2676 path1 = objcache_get(namei_oc, M_WAITOK); 2677 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2678 if (error == 0) { 2679 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2680 UIO_USERSPACE, 0); 2681 if (error == 0) { 2682 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2683 error = kern_symlink(&nd, path1, mode); 2684 } 2685 nlookup_done_at(&nd, fp); 2686 } 2687 objcache_put(namei_oc, path1); 2688 return (error); 2689 } 2690 2691 /* 2692 * undelete_args(char *path) 2693 * 2694 * Delete a whiteout from the filesystem. 2695 */ 2696 int 2697 sys_undelete(struct sysmsg *sysmsg, const struct undelete_args *uap) 2698 { 2699 struct nlookupdata nd; 2700 int error; 2701 2702 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2703 bwillinode(1); 2704 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2705 if (error == 0) 2706 error = nlookup(&nd); 2707 if (error == 0 && nd.nl_dvp == NULL) 2708 error = EINVAL; 2709 if (error == 0) 2710 error = ncp_writechk(&nd.nl_nch); 2711 if (error == 0) { 2712 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2713 NAMEI_DELETE); 2714 } 2715 nlookup_done(&nd); 2716 return (error); 2717 } 2718 2719 int 2720 kern_unlink(struct nlookupdata *nd) 2721 { 2722 int error; 2723 2724 bwillinode(1); 2725 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2726 if ((error = nlookup(nd)) != 0) 2727 return (error); 2728 if (nd->nl_dvp == NULL) 2729 return EINVAL; 2730 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2731 return (error); 2732 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2733 return (error); 2734 } 2735 2736 /* 2737 * unlink_args(char *path) 2738 * 2739 * Delete a name from the filesystem. 2740 */ 2741 int 2742 sys_unlink(struct sysmsg *sysmsg, const struct unlink_args *uap) 2743 { 2744 struct nlookupdata nd; 2745 int error; 2746 2747 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2748 if (error == 0) 2749 error = kern_unlink(&nd); 2750 nlookup_done(&nd); 2751 return (error); 2752 } 2753 2754 2755 /* 2756 * unlinkat_args(int fd, char *path, int flags) 2757 * 2758 * Delete the file or directory entry pointed to by fd/path. 2759 */ 2760 int 2761 sys_unlinkat(struct sysmsg *sysmsg, const struct unlinkat_args *uap) 2762 { 2763 struct nlookupdata nd; 2764 struct file *fp; 2765 int error; 2766 2767 if (uap->flags & ~AT_REMOVEDIR) 2768 return (EINVAL); 2769 2770 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2771 if (error == 0) { 2772 if (uap->flags & AT_REMOVEDIR) 2773 error = kern_rmdir(&nd); 2774 else 2775 error = kern_unlink(&nd); 2776 } 2777 nlookup_done_at(&nd, fp); 2778 return (error); 2779 } 2780 2781 int 2782 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2783 { 2784 struct thread *td = curthread; 2785 struct file *fp; 2786 struct vnode *vp; 2787 struct vattr_lite lva; 2788 off_t new_offset; 2789 int error; 2790 2791 fp = holdfp(td, fd, -1); 2792 if (fp == NULL) 2793 return (EBADF); 2794 if (fp->f_type != DTYPE_VNODE) { 2795 error = ESPIPE; 2796 goto done; 2797 } 2798 vp = (struct vnode *)fp->f_data; 2799 2800 switch (whence) { 2801 case L_INCR: 2802 spin_lock(&fp->f_spin); 2803 new_offset = fp->f_offset + offset; 2804 error = 0; 2805 break; 2806 case L_XTND: 2807 error = VOP_GETATTR_LITE(vp, &lva); 2808 spin_lock(&fp->f_spin); 2809 new_offset = offset + lva.va_size; 2810 break; 2811 case L_SET: 2812 new_offset = offset; 2813 error = 0; 2814 spin_lock(&fp->f_spin); 2815 break; 2816 default: 2817 new_offset = 0; 2818 error = EINVAL; 2819 spin_lock(&fp->f_spin); 2820 break; 2821 } 2822 2823 /* 2824 * Validate the seek position. Negative offsets are not allowed 2825 * for regular files or directories. 2826 * 2827 * Normally we would also not want to allow negative offsets for 2828 * character and block-special devices. However kvm addresses 2829 * on 64 bit architectures might appear to be negative and must 2830 * be allowed. 2831 */ 2832 if (error == 0) { 2833 if (new_offset < 0 && 2834 (vp->v_type == VREG || vp->v_type == VDIR)) { 2835 error = EINVAL; 2836 } else { 2837 fp->f_offset = new_offset; 2838 } 2839 } 2840 *res = fp->f_offset; 2841 spin_unlock(&fp->f_spin); 2842 done: 2843 dropfp(td, fd, fp); 2844 2845 return (error); 2846 } 2847 2848 /* 2849 * lseek_args(int fd, int pad, off_t offset, int whence) 2850 * 2851 * Reposition read/write file offset. 2852 */ 2853 int 2854 sys_lseek(struct sysmsg *sysmsg, const struct lseek_args *uap) 2855 { 2856 int error; 2857 2858 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2859 &sysmsg->sysmsg_offset); 2860 2861 return (error); 2862 } 2863 2864 /* 2865 * Check if current process can access given file. amode is a bitmask of *_OK 2866 * access bits. flags is a bitmask of AT_* flags. 2867 */ 2868 int 2869 kern_access(struct nlookupdata *nd, int amode, int flags) 2870 { 2871 struct vnode *vp; 2872 int error, mode; 2873 2874 if (flags & ~AT_EACCESS) 2875 return (EINVAL); 2876 nd->nl_flags |= NLC_SHAREDLOCK; 2877 if ((error = nlookup(nd)) != 0) 2878 return (error); 2879 if ((amode & W_OK) && (error = ncp_writechk(&nd->nl_nch)) != 0) 2880 return (error); 2881 retry: 2882 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2883 if (error) 2884 return (error); 2885 2886 /* Flags == 0 means only check for existence. */ 2887 if (amode) { 2888 mode = 0; 2889 if (amode & R_OK) 2890 mode |= VREAD; 2891 if (amode & W_OK) 2892 mode |= VWRITE; 2893 if (amode & X_OK) 2894 mode |= VEXEC; 2895 if ((mode & VWRITE) == 0 || 2896 (error = vn_writechk(vp)) == 0) { 2897 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2898 } 2899 2900 /* 2901 * If the file handle is stale we have to re-resolve the 2902 * entry with the ncp held exclusively. This is a hack 2903 * at the moment. 2904 */ 2905 if (error == ESTALE) { 2906 vput(vp); 2907 cache_unlock(&nd->nl_nch); 2908 cache_lock(&nd->nl_nch); 2909 cache_setunresolved(&nd->nl_nch); 2910 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2911 if (error == 0) { 2912 vp = NULL; 2913 goto retry; 2914 } 2915 return(error); 2916 } 2917 } 2918 vput(vp); 2919 return (error); 2920 } 2921 2922 /* 2923 * access_args(char *path, int flags) 2924 * 2925 * Check access permissions. 2926 */ 2927 int 2928 sys_access(struct sysmsg *sysmsg, const struct access_args *uap) 2929 { 2930 struct nlookupdata nd; 2931 int error; 2932 2933 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2934 if (error == 0) 2935 error = kern_access(&nd, uap->flags, 0); 2936 nlookup_done(&nd); 2937 return (error); 2938 } 2939 2940 2941 /* 2942 * eaccess_args(char *path, int flags) 2943 * 2944 * Check access permissions. 2945 */ 2946 int 2947 sys_eaccess(struct sysmsg *sysmsg, const struct eaccess_args *uap) 2948 { 2949 struct nlookupdata nd; 2950 int error; 2951 2952 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2953 if (error == 0) 2954 error = kern_access(&nd, uap->flags, AT_EACCESS); 2955 nlookup_done(&nd); 2956 return (error); 2957 } 2958 2959 2960 /* 2961 * faccessat_args(int fd, char *path, int amode, int flags) 2962 * 2963 * Check access permissions. 2964 */ 2965 int 2966 sys_faccessat(struct sysmsg *sysmsg, const struct faccessat_args *uap) 2967 { 2968 struct nlookupdata nd; 2969 struct file *fp; 2970 int error; 2971 2972 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2973 NLC_FOLLOW); 2974 if (error == 0) 2975 error = kern_access(&nd, uap->amode, uap->flags); 2976 nlookup_done_at(&nd, fp); 2977 return (error); 2978 } 2979 2980 int 2981 kern_stat(struct nlookupdata *nd, struct stat *st) 2982 { 2983 int error; 2984 struct vnode *vp; 2985 2986 nd->nl_flags |= NLC_SHAREDLOCK; 2987 if ((error = nlookup(nd)) != 0) 2988 return (error); 2989 again: 2990 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2991 return (ENOENT); 2992 2993 #if 1 2994 error = cache_vref(&nd->nl_nch, NULL, &vp); 2995 #else 2996 error = vget(vp, LK_SHARED); 2997 #endif 2998 if (error) 2999 return (error); 3000 error = vn_stat(vp, st, nd->nl_cred); 3001 3002 /* 3003 * If the file handle is stale we have to re-resolve the 3004 * entry with the ncp held exclusively. This is a hack 3005 * at the moment. 3006 */ 3007 if (error == ESTALE) { 3008 #if 1 3009 vrele(vp); 3010 #else 3011 vput(vp); 3012 #endif 3013 cache_unlock(&nd->nl_nch); 3014 cache_lock(&nd->nl_nch); 3015 cache_setunresolved(&nd->nl_nch); 3016 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 3017 if (error == 0) 3018 goto again; 3019 } else { 3020 #if 1 3021 vrele(vp); 3022 #else 3023 vput(vp); 3024 #endif 3025 } 3026 return (error); 3027 } 3028 3029 /* 3030 * stat_args(char *path, struct stat *ub) 3031 * 3032 * Get file status; this version follows links. 3033 */ 3034 int 3035 sys_stat(struct sysmsg *sysmsg, const struct stat_args *uap) 3036 { 3037 struct nlookupdata nd; 3038 struct stat st; 3039 int error; 3040 3041 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3042 if (error == 0) { 3043 error = kern_stat(&nd, &st); 3044 if (error == 0) 3045 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3046 } 3047 nlookup_done(&nd); 3048 return (error); 3049 } 3050 3051 /* 3052 * lstat_args(char *path, struct stat *ub) 3053 * 3054 * Get file status; this version does not follow links. 3055 */ 3056 int 3057 sys_lstat(struct sysmsg *sysmsg, const struct lstat_args *uap) 3058 { 3059 struct nlookupdata nd; 3060 struct stat st; 3061 int error; 3062 3063 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3064 if (error == 0) { 3065 error = kern_stat(&nd, &st); 3066 if (error == 0) 3067 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3068 } 3069 nlookup_done(&nd); 3070 return (error); 3071 } 3072 3073 /* 3074 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 3075 * 3076 * Get status of file pointed to by fd/path. 3077 */ 3078 int 3079 sys_fstatat(struct sysmsg *sysmsg, const struct fstatat_args *uap) 3080 { 3081 struct nlookupdata nd; 3082 struct stat st; 3083 int error; 3084 int flags; 3085 struct file *fp; 3086 3087 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3088 return (EINVAL); 3089 3090 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3091 3092 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3093 UIO_USERSPACE, flags); 3094 if (error == 0) { 3095 error = kern_stat(&nd, &st); 3096 if (error == 0) 3097 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 3098 } 3099 nlookup_done_at(&nd, fp); 3100 return (error); 3101 } 3102 3103 static int 3104 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 3105 { 3106 struct nlookupdata nd; 3107 struct vnode *vp; 3108 int error; 3109 3110 vp = NULL; 3111 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 3112 if (error == 0) 3113 error = nlookup(&nd); 3114 if (error == 0) 3115 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3116 nlookup_done(&nd); 3117 if (error == 0) { 3118 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3119 vput(vp); 3120 } 3121 return (error); 3122 } 3123 3124 /* 3125 * pathconf_Args(char *path, int name) 3126 * 3127 * Get configurable pathname variables. 3128 */ 3129 int 3130 sys_pathconf(struct sysmsg *sysmsg, const struct pathconf_args *uap) 3131 { 3132 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3133 &sysmsg->sysmsg_reg)); 3134 } 3135 3136 /* 3137 * lpathconf_Args(char *path, int name) 3138 * 3139 * Get configurable pathname variables, but don't follow symlinks. 3140 */ 3141 int 3142 sys_lpathconf(struct sysmsg *sysmsg, const struct lpathconf_args *uap) 3143 { 3144 return (kern_pathconf(uap->path, uap->name, 0, &sysmsg->sysmsg_reg)); 3145 } 3146 3147 /* 3148 * XXX: daver 3149 * kern_readlink isn't properly split yet. There is a copyin burried 3150 * in VOP_READLINK(). 3151 */ 3152 int 3153 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3154 { 3155 struct thread *td = curthread; 3156 struct vnode *vp; 3157 struct iovec aiov; 3158 struct uio auio; 3159 int error; 3160 3161 nd->nl_flags |= NLC_SHAREDLOCK; 3162 if ((error = nlookup(nd)) != 0) 3163 return (error); 3164 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3165 if (error) 3166 return (error); 3167 if (vp->v_type != VLNK) { 3168 error = EINVAL; 3169 } else { 3170 aiov.iov_base = buf; 3171 aiov.iov_len = count; 3172 auio.uio_iov = &aiov; 3173 auio.uio_iovcnt = 1; 3174 auio.uio_offset = 0; 3175 auio.uio_rw = UIO_READ; 3176 auio.uio_segflg = UIO_USERSPACE; 3177 auio.uio_td = td; 3178 auio.uio_resid = count; 3179 error = VOP_READLINK(vp, &auio, td->td_ucred); 3180 } 3181 vput(vp); 3182 *res = count - auio.uio_resid; 3183 return (error); 3184 } 3185 3186 /* 3187 * readlink_args(char *path, char *buf, int count) 3188 * 3189 * Return target name of a symbolic link. 3190 */ 3191 int 3192 sys_readlink(struct sysmsg *sysmsg, const struct readlink_args *uap) 3193 { 3194 struct nlookupdata nd; 3195 int error; 3196 3197 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3198 if (error == 0) { 3199 error = kern_readlink(&nd, uap->buf, uap->count, 3200 &sysmsg->sysmsg_result); 3201 } 3202 nlookup_done(&nd); 3203 return (error); 3204 } 3205 3206 /* 3207 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3208 * 3209 * Return target name of a symbolic link. The path is relative to the 3210 * directory associated with fd. 3211 */ 3212 int 3213 sys_readlinkat(struct sysmsg *sysmsg, const struct readlinkat_args *uap) 3214 { 3215 struct nlookupdata nd; 3216 struct file *fp; 3217 int error; 3218 3219 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3220 if (error == 0) { 3221 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3222 &sysmsg->sysmsg_result); 3223 } 3224 nlookup_done_at(&nd, fp); 3225 return (error); 3226 } 3227 3228 static int 3229 setfflags(struct vnode *vp, u_long flags) 3230 { 3231 struct thread *td = curthread; 3232 int error; 3233 struct vattr vattr; 3234 3235 /* 3236 * Prevent non-root users from setting flags on devices. When 3237 * a device is reused, users can retain ownership of the device 3238 * if they are allowed to set flags and programs assume that 3239 * chown can't fail when done as root. 3240 */ 3241 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3242 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3243 return (error); 3244 3245 /* 3246 * note: vget is required for any operation that might mod the vnode 3247 * so VINACTIVE is properly cleared. 3248 */ 3249 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3250 VATTR_NULL(&vattr); 3251 vattr.va_flags = flags; 3252 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3253 vput(vp); 3254 } 3255 return (error); 3256 } 3257 3258 /* 3259 * chflags(const char *path, u_long flags) 3260 * 3261 * Change flags of a file given a path name. 3262 */ 3263 int 3264 sys_chflags(struct sysmsg *sysmsg, const struct chflags_args *uap) 3265 { 3266 struct nlookupdata nd; 3267 struct vnode *vp; 3268 int error; 3269 3270 vp = NULL; 3271 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3272 if (error == 0) 3273 error = nlookup(&nd); 3274 if (error == 0) 3275 error = ncp_writechk(&nd.nl_nch); 3276 if (error == 0) 3277 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3278 nlookup_done(&nd); 3279 if (error == 0) { 3280 error = setfflags(vp, uap->flags); 3281 vrele(vp); 3282 } 3283 return (error); 3284 } 3285 3286 /* 3287 * lchflags(const char *path, u_long flags) 3288 * 3289 * Change flags of a file given a path name, but don't follow symlinks. 3290 */ 3291 int 3292 sys_lchflags(struct sysmsg *sysmsg, const struct lchflags_args *uap) 3293 { 3294 struct nlookupdata nd; 3295 struct vnode *vp; 3296 int error; 3297 3298 vp = NULL; 3299 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3300 if (error == 0) 3301 error = nlookup(&nd); 3302 if (error == 0) 3303 error = ncp_writechk(&nd.nl_nch); 3304 if (error == 0) 3305 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3306 nlookup_done(&nd); 3307 if (error == 0) { 3308 error = setfflags(vp, uap->flags); 3309 vrele(vp); 3310 } 3311 return (error); 3312 } 3313 3314 /* 3315 * fchflags_args(int fd, u_flags flags) 3316 * 3317 * Change flags of a file given a file descriptor. 3318 */ 3319 int 3320 sys_fchflags(struct sysmsg *sysmsg, const struct fchflags_args *uap) 3321 { 3322 struct thread *td = curthread; 3323 struct file *fp; 3324 int error; 3325 3326 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3327 return (error); 3328 if (fp->f_nchandle.ncp) 3329 error = ncp_writechk(&fp->f_nchandle); 3330 if (error == 0) 3331 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3332 fdrop(fp); 3333 return (error); 3334 } 3335 3336 /* 3337 * chflagsat_args(int fd, const char *path, u_long flags, int atflags) 3338 * change flags given a pathname relative to a filedescriptor 3339 */ 3340 int 3341 sys_chflagsat(struct sysmsg *sysmsg, const struct chflagsat_args *uap) 3342 { 3343 struct nlookupdata nd; 3344 struct vnode *vp; 3345 struct file *fp; 3346 int error; 3347 int lookupflags; 3348 3349 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3350 return (EINVAL); 3351 3352 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3353 3354 vp = NULL; 3355 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3356 if (error == 0) 3357 error = nlookup(&nd); 3358 if (error == 0) 3359 error = ncp_writechk(&nd.nl_nch); 3360 if (error == 0) 3361 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3362 nlookup_done_at(&nd, fp); 3363 if (error == 0) { 3364 error = setfflags(vp, uap->flags); 3365 vrele(vp); 3366 } 3367 return (error); 3368 } 3369 3370 3371 static int 3372 setfmode(struct vnode *vp, int mode) 3373 { 3374 struct thread *td = curthread; 3375 int error; 3376 struct vattr vattr; 3377 3378 /* 3379 * note: vget is required for any operation that might mod the vnode 3380 * so VINACTIVE is properly cleared. 3381 */ 3382 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3383 VATTR_NULL(&vattr); 3384 vattr.va_mode = mode & ALLPERMS; 3385 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3386 cache_inval_wxok(vp); 3387 vput(vp); 3388 } 3389 return error; 3390 } 3391 3392 int 3393 kern_chmod(struct nlookupdata *nd, int mode) 3394 { 3395 struct vnode *vp; 3396 int error; 3397 3398 if ((error = nlookup(nd)) != 0) 3399 return (error); 3400 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3401 return (error); 3402 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3403 error = setfmode(vp, mode); 3404 vrele(vp); 3405 return (error); 3406 } 3407 3408 /* 3409 * chmod_args(char *path, int mode) 3410 * 3411 * Change mode of a file given path name. 3412 */ 3413 int 3414 sys_chmod(struct sysmsg *sysmsg, const struct chmod_args *uap) 3415 { 3416 struct nlookupdata nd; 3417 int error; 3418 3419 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3420 if (error == 0) 3421 error = kern_chmod(&nd, uap->mode); 3422 nlookup_done(&nd); 3423 return (error); 3424 } 3425 3426 /* 3427 * lchmod_args(char *path, int mode) 3428 * 3429 * Change mode of a file given path name (don't follow links.) 3430 */ 3431 int 3432 sys_lchmod(struct sysmsg *sysmsg, const struct lchmod_args *uap) 3433 { 3434 struct nlookupdata nd; 3435 int error; 3436 3437 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3438 if (error == 0) 3439 error = kern_chmod(&nd, uap->mode); 3440 nlookup_done(&nd); 3441 return (error); 3442 } 3443 3444 /* 3445 * fchmod_args(int fd, int mode) 3446 * 3447 * Change mode of a file given a file descriptor. 3448 */ 3449 int 3450 sys_fchmod(struct sysmsg *sysmsg, const struct fchmod_args *uap) 3451 { 3452 struct thread *td = curthread; 3453 struct file *fp; 3454 int error; 3455 3456 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3457 return (error); 3458 if (fp->f_nchandle.ncp) 3459 error = ncp_writechk(&fp->f_nchandle); 3460 if (error == 0) 3461 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3462 fdrop(fp); 3463 return (error); 3464 } 3465 3466 /* 3467 * fchmodat_args(char *path, int mode) 3468 * 3469 * Change mode of a file pointed to by fd/path. 3470 */ 3471 int 3472 sys_fchmodat(struct sysmsg *sysmsg, const struct fchmodat_args *uap) 3473 { 3474 struct nlookupdata nd; 3475 struct file *fp; 3476 int error; 3477 int flags; 3478 3479 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3480 return (EINVAL); 3481 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3482 3483 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3484 UIO_USERSPACE, flags); 3485 if (error == 0) 3486 error = kern_chmod(&nd, uap->mode); 3487 nlookup_done_at(&nd, fp); 3488 return (error); 3489 } 3490 3491 static int 3492 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3493 { 3494 struct thread *td = curthread; 3495 int error; 3496 struct vattr vattr; 3497 uid_t o_uid; 3498 gid_t o_gid; 3499 uint64_t size; 3500 3501 /* 3502 * note: vget is required for any operation that might mod the vnode 3503 * so VINACTIVE is properly cleared. 3504 */ 3505 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3506 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3507 return error; 3508 o_uid = vattr.va_uid; 3509 o_gid = vattr.va_gid; 3510 size = vattr.va_size; 3511 3512 VATTR_NULL(&vattr); 3513 vattr.va_uid = uid; 3514 vattr.va_gid = gid; 3515 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3516 vput(vp); 3517 } 3518 3519 if (error == 0) { 3520 if (uid == -1) 3521 uid = o_uid; 3522 if (gid == -1) 3523 gid = o_gid; 3524 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3525 VFS_ACCOUNT(mp, uid, gid, size); 3526 } 3527 3528 return error; 3529 } 3530 3531 int 3532 kern_chown(struct nlookupdata *nd, int uid, int gid) 3533 { 3534 struct vnode *vp; 3535 int error; 3536 3537 if ((error = nlookup(nd)) != 0) 3538 return (error); 3539 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3540 return (error); 3541 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3542 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3543 vrele(vp); 3544 return (error); 3545 } 3546 3547 /* 3548 * chown(char *path, int uid, int gid) 3549 * 3550 * Set ownership given a path name. 3551 */ 3552 int 3553 sys_chown(struct sysmsg *sysmsg, const struct chown_args *uap) 3554 { 3555 struct nlookupdata nd; 3556 int error; 3557 3558 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3559 if (error == 0) 3560 error = kern_chown(&nd, uap->uid, uap->gid); 3561 nlookup_done(&nd); 3562 return (error); 3563 } 3564 3565 /* 3566 * lchown_args(char *path, int uid, int gid) 3567 * 3568 * Set ownership given a path name, do not cross symlinks. 3569 */ 3570 int 3571 sys_lchown(struct sysmsg *sysmsg, const struct lchown_args *uap) 3572 { 3573 struct nlookupdata nd; 3574 int error; 3575 3576 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3577 if (error == 0) 3578 error = kern_chown(&nd, uap->uid, uap->gid); 3579 nlookup_done(&nd); 3580 return (error); 3581 } 3582 3583 /* 3584 * fchown_args(int fd, int uid, int gid) 3585 * 3586 * Set ownership given a file descriptor. 3587 */ 3588 int 3589 sys_fchown(struct sysmsg *sysmsg, const struct fchown_args *uap) 3590 { 3591 struct thread *td = curthread; 3592 struct proc *p = td->td_proc; 3593 struct file *fp; 3594 int error; 3595 3596 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3597 return (error); 3598 if (fp->f_nchandle.ncp) 3599 error = ncp_writechk(&fp->f_nchandle); 3600 if (error == 0) 3601 error = setfown(p->p_fd->fd_ncdir.mount, 3602 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3603 fdrop(fp); 3604 return (error); 3605 } 3606 3607 /* 3608 * fchownat(int fd, char *path, int uid, int gid, int flags) 3609 * 3610 * Set ownership of file pointed to by fd/path. 3611 */ 3612 int 3613 sys_fchownat(struct sysmsg *sysmsg, const struct fchownat_args *uap) 3614 { 3615 struct nlookupdata nd; 3616 struct file *fp; 3617 int error; 3618 int flags; 3619 3620 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3621 return (EINVAL); 3622 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3623 3624 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3625 UIO_USERSPACE, flags); 3626 if (error == 0) 3627 error = kern_chown(&nd, uap->uid, uap->gid); 3628 nlookup_done_at(&nd, fp); 3629 return (error); 3630 } 3631 3632 3633 static int 3634 getutimes(struct timeval *tvp, struct timespec *tsp) 3635 { 3636 struct timeval tv[2]; 3637 int error; 3638 3639 if (tvp == NULL) { 3640 microtime(&tv[0]); 3641 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3642 tsp[1] = tsp[0]; 3643 } else { 3644 if ((error = itimerfix(tvp)) != 0) 3645 return (error); 3646 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3647 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3648 } 3649 return 0; 3650 } 3651 3652 static int 3653 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3654 { 3655 struct timespec tsnow; 3656 int error; 3657 3658 *nullflag = 0; 3659 nanotime(&tsnow); 3660 if (ts == NULL) { 3661 newts[0] = tsnow; 3662 newts[1] = tsnow; 3663 *nullflag = 1; 3664 return (0); 3665 } 3666 3667 newts[0] = ts[0]; 3668 newts[1] = ts[1]; 3669 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3670 return (0); 3671 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3672 *nullflag = 1; 3673 3674 if (newts[0].tv_nsec == UTIME_OMIT) 3675 newts[0].tv_sec = VNOVAL; 3676 else if (newts[0].tv_nsec == UTIME_NOW) 3677 newts[0] = tsnow; 3678 else if ((error = itimespecfix(&newts[0])) != 0) 3679 return (error); 3680 3681 if (newts[1].tv_nsec == UTIME_OMIT) 3682 newts[1].tv_sec = VNOVAL; 3683 else if (newts[1].tv_nsec == UTIME_NOW) 3684 newts[1] = tsnow; 3685 else if ((error = itimespecfix(&newts[1])) != 0) 3686 return (error); 3687 3688 return (0); 3689 } 3690 3691 static int 3692 setutimes(struct vnode *vp, struct vattr *vattr, 3693 const struct timespec *ts, int nullflag) 3694 { 3695 struct thread *td = curthread; 3696 int error; 3697 3698 VATTR_NULL(vattr); 3699 vattr->va_atime = ts[0]; 3700 vattr->va_mtime = ts[1]; 3701 if (nullflag) 3702 vattr->va_vaflags |= VA_UTIMES_NULL; 3703 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3704 3705 return error; 3706 } 3707 3708 int 3709 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3710 { 3711 struct timespec ts[2]; 3712 int error; 3713 3714 if (tptr) { 3715 if ((error = getutimes(tptr, ts)) != 0) 3716 return (error); 3717 } 3718 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3719 return (error); 3720 } 3721 3722 /* 3723 * utimes_args(char *path, struct timeval *tptr) 3724 * 3725 * Set the access and modification times of a file. 3726 */ 3727 int 3728 sys_utimes(struct sysmsg *sysmsg, const struct utimes_args *uap) 3729 { 3730 struct timeval tv[2]; 3731 struct nlookupdata nd; 3732 int error; 3733 3734 if (uap->tptr) { 3735 error = copyin(uap->tptr, tv, sizeof(tv)); 3736 if (error) 3737 return (error); 3738 } 3739 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3740 if (error == 0) 3741 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3742 nlookup_done(&nd); 3743 return (error); 3744 } 3745 3746 /* 3747 * lutimes_args(char *path, struct timeval *tptr) 3748 * 3749 * Set the access and modification times of a file. 3750 */ 3751 int 3752 sys_lutimes(struct sysmsg *sysmsg, const struct lutimes_args *uap) 3753 { 3754 struct timeval tv[2]; 3755 struct nlookupdata nd; 3756 int error; 3757 3758 if (uap->tptr) { 3759 error = copyin(uap->tptr, tv, sizeof(tv)); 3760 if (error) 3761 return (error); 3762 } 3763 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3764 if (error == 0) 3765 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3766 nlookup_done(&nd); 3767 return (error); 3768 } 3769 3770 /* 3771 * Set utimes on a file descriptor. The creds used to open the 3772 * file are used to determine whether the operation is allowed 3773 * or not. 3774 */ 3775 int 3776 kern_futimens(int fd, struct timespec *ts) 3777 { 3778 struct thread *td = curthread; 3779 struct timespec newts[2]; 3780 struct file *fp; 3781 struct vnode *vp; 3782 struct vattr vattr; 3783 struct vattr_lite lva; 3784 int nullflag; 3785 int error; 3786 3787 error = getutimens(ts, newts, &nullflag); 3788 if (error) 3789 return (error); 3790 if ((error = holdvnode(td, fd, &fp)) != 0) 3791 return (error); 3792 if (fp->f_nchandle.ncp) 3793 error = ncp_writechk(&fp->f_nchandle); 3794 if (error == 0) { 3795 vp = fp->f_data; 3796 error = vget(vp, LK_EXCLUSIVE); 3797 if (error == 0) { 3798 error = VOP_GETATTR_FP(vp, &vattr, fp); 3799 if (error == 0) { 3800 lva.va_type = vattr.va_type; 3801 lva.va_nlink = vattr.va_nlink; 3802 lva.va_mode = vattr.va_mode; 3803 lva.va_uid = vattr.va_uid; 3804 lva.va_gid = vattr.va_gid; 3805 lva.va_size = vattr.va_size; 3806 lva.va_flags = vattr.va_flags; 3807 3808 error = naccess_lva(&lva, NLC_OWN | NLC_WRITE, 3809 fp->f_cred); 3810 } 3811 if (error == 0) { 3812 error = setutimes(vp, &vattr, newts, nullflag); 3813 } 3814 vput(vp); 3815 } 3816 } 3817 fdrop(fp); 3818 return (error); 3819 } 3820 3821 /* 3822 * futimens_args(int fd, struct timespec *ts) 3823 * 3824 * Set the access and modification times of a file. 3825 */ 3826 int 3827 sys_futimens(struct sysmsg *sysmsg, const struct futimens_args *uap) 3828 { 3829 struct timespec ts[2]; 3830 int error; 3831 3832 if (uap->ts) { 3833 error = copyin(uap->ts, ts, sizeof(ts)); 3834 if (error) 3835 return (error); 3836 } 3837 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3838 return (error); 3839 } 3840 3841 int 3842 kern_futimes(int fd, struct timeval *tptr) 3843 { 3844 struct timespec ts[2]; 3845 int error; 3846 3847 if (tptr) { 3848 if ((error = getutimes(tptr, ts)) != 0) 3849 return (error); 3850 } 3851 error = kern_futimens(fd, tptr ? ts : NULL); 3852 return (error); 3853 } 3854 3855 /* 3856 * futimes_args(int fd, struct timeval *tptr) 3857 * 3858 * Set the access and modification times of a file. 3859 */ 3860 int 3861 sys_futimes(struct sysmsg *sysmsg, const struct futimes_args *uap) 3862 { 3863 struct timeval tv[2]; 3864 int error; 3865 3866 if (uap->tptr) { 3867 error = copyin(uap->tptr, tv, sizeof(tv)); 3868 if (error) 3869 return (error); 3870 } 3871 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3872 return (error); 3873 } 3874 3875 int 3876 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3877 { 3878 struct timespec newts[2]; 3879 struct vnode *vp; 3880 struct vattr vattr; 3881 int nullflag; 3882 int error; 3883 3884 if (flags & ~AT_SYMLINK_NOFOLLOW) 3885 return (EINVAL); 3886 3887 error = getutimens(ts, newts, &nullflag); 3888 if (error) 3889 return (error); 3890 3891 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3892 if ((error = nlookup(nd)) != 0) 3893 return (error); 3894 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3895 return (error); 3896 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3897 return (error); 3898 if ((error = vn_writechk(vp)) == 0) { 3899 error = vget(vp, LK_EXCLUSIVE); 3900 if (error == 0) { 3901 error = setutimes(vp, &vattr, newts, nullflag); 3902 vput(vp); 3903 } 3904 } 3905 vrele(vp); 3906 return (error); 3907 } 3908 3909 /* 3910 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3911 * 3912 * Set file access and modification times of a file. 3913 */ 3914 int 3915 sys_utimensat(struct sysmsg *sysmsg, const struct utimensat_args *uap) 3916 { 3917 struct timespec ts[2]; 3918 struct nlookupdata nd; 3919 struct file *fp; 3920 int error; 3921 int flags; 3922 3923 if (uap->ts) { 3924 error = copyin(uap->ts, ts, sizeof(ts)); 3925 if (error) 3926 return (error); 3927 } 3928 3929 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3930 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3931 UIO_USERSPACE, flags); 3932 if (error == 0) 3933 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3934 nlookup_done_at(&nd, fp); 3935 return (error); 3936 } 3937 3938 int 3939 kern_truncate(struct nlookupdata *nd, off_t length) 3940 { 3941 struct vnode *vp; 3942 struct vattr vattr; 3943 int error; 3944 uid_t uid = 0; 3945 gid_t gid = 0; 3946 uint64_t old_size = 0; 3947 3948 if (length < 0) 3949 return(EINVAL); 3950 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3951 if ((error = nlookup(nd)) != 0) 3952 return (error); 3953 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3954 return (error); 3955 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3956 return (error); 3957 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3958 if (error) { 3959 vrele(vp); 3960 return (error); 3961 } 3962 if (vp->v_type == VDIR) { 3963 error = EISDIR; 3964 goto done; 3965 } 3966 if (vfs_quota_enabled) { 3967 error = VOP_GETATTR(vp, &vattr); 3968 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3969 uid = vattr.va_uid; 3970 gid = vattr.va_gid; 3971 old_size = vattr.va_size; 3972 } 3973 3974 if ((error = vn_writechk(vp)) == 0) { 3975 VATTR_NULL(&vattr); 3976 vattr.va_size = length; 3977 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3978 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3979 } 3980 done: 3981 vput(vp); 3982 return (error); 3983 } 3984 3985 /* 3986 * truncate(char *path, int pad, off_t length) 3987 * 3988 * Truncate a file given its path name. 3989 */ 3990 int 3991 sys_truncate(struct sysmsg *sysmsg, const struct truncate_args *uap) 3992 { 3993 struct nlookupdata nd; 3994 int error; 3995 3996 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3997 if (error == 0) 3998 error = kern_truncate(&nd, uap->length); 3999 nlookup_done(&nd); 4000 return error; 4001 } 4002 4003 int 4004 kern_ftruncate(int fd, off_t length) 4005 { 4006 struct thread *td = curthread; 4007 struct vattr vattr; 4008 struct vnode *vp; 4009 struct file *fp; 4010 int error; 4011 uid_t uid = 0; 4012 gid_t gid = 0; 4013 uint64_t old_size = 0; 4014 struct mount *mp; 4015 4016 if (length < 0) 4017 return(EINVAL); 4018 if ((error = holdvnode(td, fd, &fp)) != 0) 4019 return (error); 4020 if (fp->f_nchandle.ncp) { 4021 error = ncp_writechk(&fp->f_nchandle); 4022 if (error) 4023 goto done; 4024 } 4025 if ((fp->f_flag & FWRITE) == 0) { 4026 error = EINVAL; 4027 goto done; 4028 } 4029 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 4030 error = EINVAL; 4031 goto done; 4032 } 4033 vp = (struct vnode *)fp->f_data; 4034 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4035 if (vp->v_type == VDIR) { 4036 error = EISDIR; 4037 vn_unlock(vp); 4038 goto done; 4039 } 4040 4041 if (vfs_quota_enabled) { 4042 error = VOP_GETATTR_FP(vp, &vattr, fp); 4043 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 4044 uid = vattr.va_uid; 4045 gid = vattr.va_gid; 4046 old_size = vattr.va_size; 4047 } 4048 4049 if ((error = vn_writechk(vp)) == 0) { 4050 VATTR_NULL(&vattr); 4051 vattr.va_size = length; 4052 error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp); 4053 mp = vq_vptomp(vp); 4054 VFS_ACCOUNT(mp, uid, gid, length - old_size); 4055 } 4056 vn_unlock(vp); 4057 done: 4058 fdrop(fp); 4059 return (error); 4060 } 4061 4062 /* 4063 * ftruncate_args(int fd, int pad, off_t length) 4064 * 4065 * Truncate a file given a file descriptor. 4066 */ 4067 int 4068 sys_ftruncate(struct sysmsg *sysmsg, const struct ftruncate_args *uap) 4069 { 4070 int error; 4071 4072 error = kern_ftruncate(uap->fd, uap->length); 4073 4074 return (error); 4075 } 4076 4077 /* 4078 * fsync(int fd) 4079 * 4080 * Sync an open file. 4081 */ 4082 int 4083 sys_fsync(struct sysmsg *sysmsg, const struct fsync_args *uap) 4084 { 4085 struct thread *td = curthread; 4086 struct vnode *vp; 4087 struct file *fp; 4088 vm_object_t obj; 4089 int error; 4090 4091 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 4092 return (error); 4093 vp = (struct vnode *)fp->f_data; 4094 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4095 if ((obj = vp->v_object) != NULL) { 4096 if (vp->v_mount == NULL || 4097 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 4098 vm_object_page_clean(obj, 0, 0, 0); 4099 } 4100 } 4101 error = VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp); 4102 if (error == 0 && vp->v_mount) 4103 error = buf_fsync(vp); 4104 vn_unlock(vp); 4105 fdrop(fp); 4106 4107 return (error); 4108 } 4109 4110 int 4111 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 4112 { 4113 struct nchandle fnchd; 4114 struct nchandle tnchd; 4115 struct namecache *ncp; 4116 struct vnode *fdvp; 4117 struct vnode *tdvp; 4118 struct mount *mp; 4119 int error; 4120 u_int fncp_gen; 4121 u_int tncp_gen; 4122 4123 bwillinode(1); 4124 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4125 if ((error = nlookup(fromnd)) != 0) 4126 return (error); 4127 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4128 return (ENOENT); 4129 fnchd.mount = fromnd->nl_nch.mount; 4130 cache_hold(&fnchd); 4131 4132 /* 4133 * unlock the source nch so we can lookup the target nch without 4134 * deadlocking. The target may or may not exist so we do not check 4135 * for a target vp like kern_mkdir() and other creation functions do. 4136 * 4137 * The source and target directories are ref'd and rechecked after 4138 * everything is relocked to determine if the source or target file 4139 * has been renamed. 4140 */ 4141 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4142 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4143 4144 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4145 4146 cache_unlock(&fromnd->nl_nch); 4147 4148 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4149 if ((error = nlookup(tond)) != 0) { 4150 cache_drop(&fnchd); 4151 return (error); 4152 } 4153 tncp_gen = tond->nl_nch.ncp->nc_generation; 4154 4155 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4156 cache_drop(&fnchd); 4157 return (ENOENT); 4158 } 4159 tnchd.mount = tond->nl_nch.mount; 4160 cache_hold(&tnchd); 4161 4162 /* 4163 * If the source and target are the same there is nothing to do 4164 */ 4165 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4166 cache_drop(&fnchd); 4167 cache_drop(&tnchd); 4168 return (0); 4169 } 4170 4171 /* 4172 * Mount points cannot be renamed or overwritten 4173 */ 4174 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4175 NCF_ISMOUNTPT 4176 ) { 4177 cache_drop(&fnchd); 4178 cache_drop(&tnchd); 4179 return (EINVAL); 4180 } 4181 4182 /* 4183 * Relock the source ncp. cache_relock() will deal with any 4184 * deadlocks against the already-locked tond and will also 4185 * make sure both are resolved. 4186 * 4187 * NOTE AFTER RELOCKING: The source or target ncp may have become 4188 * invalid while they were unlocked, nc_vp and nc_mount could 4189 * be NULL. 4190 */ 4191 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 4192 &tond->nl_nch, tond->nl_cred); 4193 fromnd->nl_flags |= NLC_NCPISLOCKED; 4194 4195 /* 4196 * If the namecache generation changed for either fromnd or tond, 4197 * we must retry. 4198 */ 4199 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 4200 tond->nl_nch.ncp->nc_generation != tncp_gen) { 4201 kprintf("kern_rename: retry due to gen on: " 4202 "\"%s\" -> \"%s\"\n", 4203 fromnd->nl_nch.ncp->nc_name, 4204 tond->nl_nch.ncp->nc_name); 4205 cache_drop(&fnchd); 4206 cache_drop(&tnchd); 4207 return (EAGAIN); 4208 } 4209 4210 /* 4211 * If either fromnd or tond are marked destroyed a ripout occured 4212 * out from under us and we must retry. 4213 */ 4214 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4215 fromnd->nl_nch.ncp->nc_vp == NULL || 4216 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 4217 kprintf("kern_rename: retry due to ripout on: " 4218 "\"%s\" -> \"%s\"\n", 4219 fromnd->nl_nch.ncp->nc_name, 4220 tond->nl_nch.ncp->nc_name); 4221 cache_drop(&fnchd); 4222 cache_drop(&tnchd); 4223 return (EAGAIN); 4224 } 4225 4226 /* 4227 * Make sure the parent directories linkages are the same. 4228 * XXX shouldn't be needed any more w/ generation check above. 4229 */ 4230 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4231 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4232 cache_drop(&fnchd); 4233 cache_drop(&tnchd); 4234 return (ENOENT); 4235 } 4236 4237 /* 4238 * Both the source and target must be within the same filesystem and 4239 * in the same filesystem as their parent directories within the 4240 * namecache topology. 4241 * 4242 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4243 */ 4244 mp = fnchd.mount; 4245 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4246 mp != tond->nl_nch.mount) { 4247 cache_drop(&fnchd); 4248 cache_drop(&tnchd); 4249 return (EXDEV); 4250 } 4251 4252 /* 4253 * Make sure the mount point is writable 4254 */ 4255 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4256 cache_drop(&fnchd); 4257 cache_drop(&tnchd); 4258 return (error); 4259 } 4260 4261 /* 4262 * If the target exists and either the source or target is a directory, 4263 * then both must be directories. 4264 * 4265 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4266 * have become NULL. 4267 */ 4268 if (tond->nl_nch.ncp->nc_vp) { 4269 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4270 error = ENOENT; 4271 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4272 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4273 error = ENOTDIR; 4274 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4275 error = EISDIR; 4276 } 4277 } 4278 4279 /* 4280 * You cannot rename a source into itself or a subdirectory of itself. 4281 * We check this by travsersing the target directory upwards looking 4282 * for a match against the source. 4283 * 4284 * XXX MPSAFE 4285 */ 4286 if (error == 0) { 4287 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4288 if (fromnd->nl_nch.ncp == ncp) { 4289 error = EINVAL; 4290 break; 4291 } 4292 } 4293 } 4294 4295 cache_drop(&fnchd); 4296 cache_drop(&tnchd); 4297 4298 /* 4299 * Even though the namespaces are different, they may still represent 4300 * hardlinks to the same file. The filesystem might have a hard time 4301 * with this so we issue a NREMOVE of the source instead of a NRENAME 4302 * when we detect the situation. 4303 */ 4304 if (error == 0) { 4305 fdvp = fromnd->nl_dvp; 4306 tdvp = tond->nl_dvp; 4307 if (fdvp == NULL || tdvp == NULL) { 4308 error = EPERM; 4309 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4310 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4311 fromnd->nl_cred); 4312 } else { 4313 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4314 fdvp, tdvp, tond->nl_cred); 4315 } 4316 } 4317 return (error); 4318 } 4319 4320 /* 4321 * rename_args(char *from, char *to) 4322 * 4323 * Rename files. Source and destination must either both be directories, 4324 * or both not be directories. If target is a directory, it must be empty. 4325 */ 4326 int 4327 sys_rename(struct sysmsg *sysmsg, const struct rename_args *uap) 4328 { 4329 struct nlookupdata fromnd, tond; 4330 int error; 4331 4332 do { 4333 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4334 if (error == 0) { 4335 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4336 if (error == 0) 4337 error = kern_rename(&fromnd, &tond); 4338 nlookup_done(&tond); 4339 } 4340 nlookup_done(&fromnd); 4341 } while (error == EAGAIN); 4342 return (error); 4343 } 4344 4345 /* 4346 * renameat_args(int oldfd, char *old, int newfd, char *new) 4347 * 4348 * Rename files using paths relative to the directories associated with 4349 * oldfd and newfd. Source and destination must either both be directories, 4350 * or both not be directories. If target is a directory, it must be empty. 4351 */ 4352 int 4353 sys_renameat(struct sysmsg *sysmsg, const struct renameat_args *uap) 4354 { 4355 struct nlookupdata oldnd, newnd; 4356 struct file *oldfp, *newfp; 4357 int error; 4358 4359 do { 4360 error = nlookup_init_at(&oldnd, &oldfp, 4361 uap->oldfd, uap->old, 4362 UIO_USERSPACE, 0); 4363 if (error == 0) { 4364 error = nlookup_init_at(&newnd, &newfp, 4365 uap->newfd, uap->new, 4366 UIO_USERSPACE, 0); 4367 if (error == 0) 4368 error = kern_rename(&oldnd, &newnd); 4369 nlookup_done_at(&newnd, newfp); 4370 } 4371 nlookup_done_at(&oldnd, oldfp); 4372 } while (error == EAGAIN); 4373 return (error); 4374 } 4375 4376 int 4377 kern_mkdir(struct nlookupdata *nd, int mode) 4378 { 4379 struct thread *td = curthread; 4380 struct proc *p = td->td_proc; 4381 struct vnode *vp; 4382 struct vattr vattr; 4383 int error; 4384 4385 bwillinode(1); 4386 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4387 if ((error = nlookup(nd)) != 0) 4388 return (error); 4389 4390 if (nd->nl_nch.ncp->nc_vp) 4391 return (EEXIST); 4392 if (nd->nl_dvp == NULL) 4393 return (EINVAL); 4394 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4395 return (error); 4396 VATTR_NULL(&vattr); 4397 vattr.va_type = VDIR; 4398 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4399 4400 vp = NULL; 4401 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4402 if (error == 0) 4403 vput(vp); 4404 return (error); 4405 } 4406 4407 /* 4408 * mkdir_args(char *path, int mode) 4409 * 4410 * Make a directory file. 4411 */ 4412 int 4413 sys_mkdir(struct sysmsg *sysmsg, const struct mkdir_args *uap) 4414 { 4415 struct nlookupdata nd; 4416 int error; 4417 4418 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4419 if (error == 0) 4420 error = kern_mkdir(&nd, uap->mode); 4421 nlookup_done(&nd); 4422 return (error); 4423 } 4424 4425 /* 4426 * mkdirat_args(int fd, char *path, mode_t mode) 4427 * 4428 * Make a directory file. The path is relative to the directory associated 4429 * with fd. 4430 */ 4431 int 4432 sys_mkdirat(struct sysmsg *sysmsg, const struct mkdirat_args *uap) 4433 { 4434 struct nlookupdata nd; 4435 struct file *fp; 4436 int error; 4437 4438 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4439 if (error == 0) 4440 error = kern_mkdir(&nd, uap->mode); 4441 nlookup_done_at(&nd, fp); 4442 return (error); 4443 } 4444 4445 int 4446 kern_rmdir(struct nlookupdata *nd) 4447 { 4448 int error; 4449 4450 bwillinode(1); 4451 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4452 if ((error = nlookup(nd)) != 0) 4453 return (error); 4454 4455 /* 4456 * Do not allow directories representing mount points to be 4457 * deleted, even if empty. Check write perms on mount point 4458 * in case the vnode is aliased (aka nullfs). 4459 */ 4460 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4461 return (EBUSY); 4462 if (nd->nl_dvp == NULL) 4463 return (EINVAL); 4464 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4465 return (error); 4466 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4467 return (error); 4468 } 4469 4470 /* 4471 * rmdir_args(char *path) 4472 * 4473 * Remove a directory file. 4474 */ 4475 int 4476 sys_rmdir(struct sysmsg *sysmsg, const struct rmdir_args *uap) 4477 { 4478 struct nlookupdata nd; 4479 int error; 4480 4481 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4482 if (error == 0) 4483 error = kern_rmdir(&nd); 4484 nlookup_done(&nd); 4485 return (error); 4486 } 4487 4488 int 4489 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4490 enum uio_seg direction) 4491 { 4492 struct thread *td = curthread; 4493 struct vnode *vp; 4494 struct file *fp; 4495 struct uio auio; 4496 struct iovec aiov; 4497 off_t loff; 4498 int error, eofflag; 4499 4500 if ((error = holdvnode(td, fd, &fp)) != 0) 4501 return (error); 4502 if ((fp->f_flag & FREAD) == 0) { 4503 error = EBADF; 4504 goto done; 4505 } 4506 vp = (struct vnode *)fp->f_data; 4507 if (vp->v_type != VDIR) { 4508 error = EINVAL; 4509 goto done; 4510 } 4511 aiov.iov_base = buf; 4512 aiov.iov_len = count; 4513 auio.uio_iov = &aiov; 4514 auio.uio_iovcnt = 1; 4515 auio.uio_rw = UIO_READ; 4516 auio.uio_segflg = direction; 4517 auio.uio_td = td; 4518 auio.uio_resid = count; 4519 loff = auio.uio_offset = fp->f_offset; 4520 error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp); 4521 fp->f_offset = auio.uio_offset; 4522 if (error) 4523 goto done; 4524 4525 /* 4526 * WARNING! *basep may not be wide enough to accomodate the 4527 * seek offset. XXX should we hack this to return the upper 32 bits 4528 * for offsets greater then 4G? 4529 */ 4530 if (basep) { 4531 *basep = (long)loff; 4532 } 4533 *res = count - auio.uio_resid; 4534 done: 4535 fdrop(fp); 4536 return (error); 4537 } 4538 4539 /* 4540 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4541 * 4542 * Read a block of directory entries in a file system independent format. 4543 */ 4544 int 4545 sys_getdirentries(struct sysmsg *sysmsg, const struct getdirentries_args *uap) 4546 { 4547 long base; 4548 int error; 4549 4550 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4551 &sysmsg->sysmsg_result, UIO_USERSPACE); 4552 4553 if (error == 0 && uap->basep) 4554 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4555 return (error); 4556 } 4557 4558 /* 4559 * getdents_args(int fd, char *buf, size_t count) 4560 */ 4561 int 4562 sys_getdents(struct sysmsg *sysmsg, const struct getdents_args *uap) 4563 { 4564 int error; 4565 4566 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4567 &sysmsg->sysmsg_result, UIO_USERSPACE); 4568 4569 return (error); 4570 } 4571 4572 /* 4573 * Set the mode mask for creation of filesystem nodes. 4574 * 4575 * umask(int newmask) 4576 */ 4577 int 4578 sys_umask(struct sysmsg *sysmsg, const struct umask_args *uap) 4579 { 4580 struct thread *td = curthread; 4581 struct proc *p = td->td_proc; 4582 struct filedesc *fdp; 4583 4584 fdp = p->p_fd; 4585 sysmsg->sysmsg_result = fdp->fd_cmask; 4586 fdp->fd_cmask = uap->newmask & ALLPERMS; 4587 return (0); 4588 } 4589 4590 /* 4591 * revoke(char *path) 4592 * 4593 * Void all references to file by ripping underlying filesystem 4594 * away from vnode. 4595 */ 4596 int 4597 sys_revoke(struct sysmsg *sysmsg, const struct revoke_args *uap) 4598 { 4599 struct nlookupdata nd; 4600 struct vattr vattr; 4601 struct vnode *vp; 4602 struct ucred *cred; 4603 int error; 4604 4605 vp = NULL; 4606 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4607 if (error == 0) 4608 error = nlookup(&nd); 4609 if (error == 0) 4610 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4611 cred = crhold(nd.nl_cred); 4612 nlookup_done(&nd); 4613 if (error == 0) { 4614 if (error == 0) 4615 error = VOP_GETATTR(vp, &vattr); 4616 if (error == 0 && cred->cr_uid != vattr.va_uid) 4617 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4618 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4619 if (vcount(vp) > 0) 4620 error = vrevoke(vp, cred); 4621 } else if (error == 0) { 4622 error = vrevoke(vp, cred); 4623 } 4624 vrele(vp); 4625 } 4626 if (cred) 4627 crfree(cred); 4628 return (error); 4629 } 4630 4631 /* 4632 * getfh_args(char *fname, fhandle_t *fhp) 4633 * 4634 * Get (NFS) file handle 4635 * 4636 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4637 * mount. This allows nullfs mounts to be explicitly exported. 4638 * 4639 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4640 * 4641 * nullfs mounts of subdirectories are not safe. That is, it will 4642 * work, but you do not really have protection against access to 4643 * the related parent directories. 4644 */ 4645 int 4646 sys_getfh(struct sysmsg *sysmsg, const struct getfh_args *uap) 4647 { 4648 struct thread *td = curthread; 4649 struct nlookupdata nd; 4650 fhandle_t fh; 4651 struct vnode *vp; 4652 struct mount *mp; 4653 int error; 4654 4655 /* 4656 * Must be super user 4657 */ 4658 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4659 return (error); 4660 4661 vp = NULL; 4662 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4663 if (error == 0) 4664 error = nlookup(&nd); 4665 if (error == 0) 4666 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4667 mp = nd.nl_nch.mount; 4668 nlookup_done(&nd); 4669 if (error == 0) { 4670 bzero(&fh, sizeof(fh)); 4671 fh.fh_fsid = mp->mnt_stat.f_fsid; 4672 error = VFS_VPTOFH(vp, &fh.fh_fid); 4673 vput(vp); 4674 if (error == 0) 4675 error = copyout(&fh, uap->fhp, sizeof(fh)); 4676 } 4677 return (error); 4678 } 4679 4680 /* 4681 * fhopen_args(const struct fhandle *u_fhp, int flags) 4682 * 4683 * syscall for the rpc.lockd to use to translate a NFS file handle into 4684 * an open descriptor. 4685 * 4686 * warning: do not remove the priv_check() call or this becomes one giant 4687 * security hole. 4688 */ 4689 int 4690 sys_fhopen(struct sysmsg *sysmsg, const struct fhopen_args *uap) 4691 { 4692 struct thread *td = curthread; 4693 struct filedesc *fdp = td->td_proc->p_fd; 4694 struct mount *mp; 4695 struct vnode *vp; 4696 struct fhandle fhp; 4697 struct vattr vat; 4698 struct vattr *vap = &vat; 4699 struct flock lf; 4700 int fmode, mode, error = 0, type; 4701 struct file *nfp; 4702 struct file *fp; 4703 int indx; 4704 4705 /* 4706 * Must be super user 4707 */ 4708 error = priv_check(td, PRIV_ROOT); 4709 if (error) 4710 return (error); 4711 4712 fmode = FFLAGS(uap->flags); 4713 4714 /* 4715 * Why not allow a non-read/write open for our lockd? 4716 */ 4717 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4718 return (EINVAL); 4719 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4720 if (error) 4721 return(error); 4722 4723 /* 4724 * Find the mount point 4725 */ 4726 mp = vfs_getvfs(&fhp.fh_fsid); 4727 if (mp == NULL) { 4728 error = ESTALE; 4729 goto done2; 4730 } 4731 /* now give me my vnode, it gets returned to me locked */ 4732 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4733 if (error) 4734 goto done; 4735 /* 4736 * from now on we have to make sure not 4737 * to forget about the vnode 4738 * any error that causes an abort must vput(vp) 4739 * just set error = err and 'goto bad;'. 4740 */ 4741 4742 /* 4743 * from vn_open 4744 */ 4745 if (vp->v_type == VLNK) { 4746 error = EMLINK; 4747 goto bad; 4748 } 4749 if (vp->v_type == VSOCK) { 4750 error = EOPNOTSUPP; 4751 goto bad; 4752 } 4753 mode = 0; 4754 if (fmode & (FWRITE | O_TRUNC)) { 4755 if (vp->v_type == VDIR) { 4756 error = EISDIR; 4757 goto bad; 4758 } 4759 error = vn_writechk(vp); 4760 if (error) 4761 goto bad; 4762 mode |= VWRITE; 4763 } 4764 if (fmode & FREAD) 4765 mode |= VREAD; 4766 if (mode) { 4767 error = VOP_ACCESS(vp, mode, td->td_ucred); 4768 if (error) 4769 goto bad; 4770 } 4771 if (fmode & O_TRUNC) { 4772 vn_unlock(vp); /* XXX */ 4773 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4774 VATTR_NULL(vap); 4775 vap->va_size = 0; 4776 error = VOP_SETATTR(vp, vap, td->td_ucred); 4777 if (error) 4778 goto bad; 4779 } 4780 4781 /* 4782 * VOP_OPEN needs the file pointer so it can potentially override 4783 * it. 4784 * 4785 * WARNING! no f_nchandle will be associated when fhopen()ing a 4786 * directory. XXX 4787 */ 4788 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4789 goto bad; 4790 fp = nfp; 4791 4792 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4793 if (error) { 4794 /* 4795 * setting f_ops this way prevents VOP_CLOSE from being 4796 * called or fdrop() releasing the vp from v_data. Since 4797 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4798 */ 4799 fp->f_ops = &badfileops; 4800 fp->f_data = NULL; 4801 goto bad_drop; 4802 } 4803 4804 /* 4805 * The fp is given its own reference, we still have our ref and lock. 4806 * 4807 * Assert that all regular files must be created with a VM object. 4808 */ 4809 if (vp->v_type == VREG && vp->v_object == NULL) { 4810 kprintf("fhopen: regular file did not " 4811 "have VM object: %p\n", 4812 vp); 4813 goto bad_drop; 4814 } 4815 4816 /* 4817 * The open was successful. Handle any locking requirements. 4818 */ 4819 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4820 lf.l_whence = SEEK_SET; 4821 lf.l_start = 0; 4822 lf.l_len = 0; 4823 if (fmode & O_EXLOCK) 4824 lf.l_type = F_WRLCK; 4825 else 4826 lf.l_type = F_RDLCK; 4827 if (fmode & FNONBLOCK) 4828 type = 0; 4829 else 4830 type = F_WAIT; 4831 vn_unlock(vp); 4832 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4833 &lf, type)) != 0) { 4834 /* 4835 * release our private reference. 4836 */ 4837 fsetfd(fdp, NULL, indx); 4838 fdrop(fp); 4839 vrele(vp); 4840 goto done; 4841 } 4842 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4843 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4844 } 4845 4846 /* 4847 * Clean up. Associate the file pointer with the previously 4848 * reserved descriptor and return it. 4849 */ 4850 vput(vp); 4851 if (uap->flags & O_CLOEXEC) 4852 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4853 fsetfd(fdp, fp, indx); 4854 fdrop(fp); 4855 sysmsg->sysmsg_result = indx; 4856 mount_drop(mp); 4857 4858 return (error); 4859 4860 bad_drop: 4861 fsetfd(fdp, NULL, indx); 4862 fdrop(fp); 4863 bad: 4864 vput(vp); 4865 done: 4866 mount_drop(mp); 4867 done2: 4868 return (error); 4869 } 4870 4871 /* 4872 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4873 */ 4874 int 4875 sys_fhstat(struct sysmsg *sysmsg, const struct fhstat_args *uap) 4876 { 4877 struct thread *td = curthread; 4878 struct stat sb; 4879 fhandle_t fh; 4880 struct mount *mp; 4881 struct vnode *vp; 4882 int error; 4883 4884 /* 4885 * Must be super user 4886 */ 4887 error = priv_check(td, PRIV_ROOT); 4888 if (error) 4889 return (error); 4890 4891 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4892 if (error) 4893 return (error); 4894 4895 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4896 error = ESTALE; 4897 if (error == 0) { 4898 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4899 error = vn_stat(vp, &sb, td->td_ucred); 4900 vput(vp); 4901 } 4902 } 4903 if (error == 0) 4904 error = copyout(&sb, uap->sb, sizeof(sb)); 4905 if (mp) 4906 mount_drop(mp); 4907 4908 return (error); 4909 } 4910 4911 /* 4912 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4913 */ 4914 int 4915 sys_fhstatfs(struct sysmsg *sysmsg, const struct fhstatfs_args *uap) 4916 { 4917 struct thread *td = curthread; 4918 struct proc *p = td->td_proc; 4919 struct statfs *sp; 4920 struct mount *mp; 4921 struct vnode *vp; 4922 struct statfs sb; 4923 char *fullpath, *freepath; 4924 fhandle_t fh; 4925 int error; 4926 4927 /* 4928 * Must be super user 4929 */ 4930 if ((error = priv_check(td, PRIV_ROOT))) 4931 return (error); 4932 4933 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4934 return (error); 4935 4936 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4937 error = ESTALE; 4938 goto done; 4939 } 4940 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4941 error = ESTALE; 4942 goto done; 4943 } 4944 4945 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4946 goto done; 4947 mp = vp->v_mount; 4948 sp = &mp->mnt_stat; 4949 vput(vp); 4950 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4951 goto done; 4952 4953 error = mount_path(p, mp, &fullpath, &freepath); 4954 if (error) 4955 goto done; 4956 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4957 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4958 kfree(freepath, M_TEMP); 4959 4960 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4961 if (priv_check(td, PRIV_ROOT)) { 4962 bcopy(sp, &sb, sizeof(sb)); 4963 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4964 sp = &sb; 4965 } 4966 error = copyout(sp, uap->buf, sizeof(*sp)); 4967 done: 4968 if (mp) 4969 mount_drop(mp); 4970 4971 return (error); 4972 } 4973 4974 /* 4975 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4976 */ 4977 int 4978 sys_fhstatvfs(struct sysmsg *sysmsg, const struct fhstatvfs_args *uap) 4979 { 4980 struct thread *td = curthread; 4981 struct proc *p = td->td_proc; 4982 struct statvfs *sp; 4983 struct mount *mp; 4984 struct vnode *vp; 4985 fhandle_t fh; 4986 int error; 4987 4988 /* 4989 * Must be super user 4990 */ 4991 if ((error = priv_check(td, PRIV_ROOT))) 4992 return (error); 4993 4994 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4995 return (error); 4996 4997 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4998 error = ESTALE; 4999 goto done; 5000 } 5001 if (p != NULL && !chroot_visible_mnt(mp, p)) { 5002 error = ESTALE; 5003 goto done; 5004 } 5005 5006 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 5007 goto done; 5008 mp = vp->v_mount; 5009 sp = &mp->mnt_vstat; 5010 vput(vp); 5011 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 5012 goto done; 5013 5014 sp->f_flag = 0; 5015 if (mp->mnt_flag & MNT_RDONLY) 5016 sp->f_flag |= ST_RDONLY; 5017 if (mp->mnt_flag & MNT_NOSUID) 5018 sp->f_flag |= ST_NOSUID; 5019 error = copyout(sp, uap->buf, sizeof(*sp)); 5020 done: 5021 if (mp) 5022 mount_drop(mp); 5023 return (error); 5024 } 5025 5026 5027 /* 5028 * Syscall to push extended attribute configuration information into the 5029 * VFS. Accepts a path, which it converts to a mountpoint, as well as 5030 * a command (int cmd), and attribute name and misc data. For now, the 5031 * attribute name is left in userspace for consumption by the VFS_op. 5032 * It will probably be changed to be copied into sysspace by the 5033 * syscall in the future, once issues with various consumers of the 5034 * attribute code have raised their hands. 5035 * 5036 * Currently this is used only by UFS Extended Attributes. 5037 */ 5038 int 5039 sys_extattrctl(struct sysmsg *sysmsg, const struct extattrctl_args *uap) 5040 { 5041 struct nlookupdata nd; 5042 struct vnode *vp; 5043 char attrname[EXTATTR_MAXNAMELEN]; 5044 int error; 5045 size_t size; 5046 5047 attrname[0] = 0; 5048 vp = NULL; 5049 error = 0; 5050 5051 if (error == 0 && uap->filename) { 5052 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 5053 NLC_FOLLOW); 5054 if (error == 0) 5055 error = nlookup(&nd); 5056 if (error == 0) 5057 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 5058 nlookup_done(&nd); 5059 } 5060 5061 if (error == 0 && uap->attrname) { 5062 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 5063 &size); 5064 } 5065 5066 if (error == 0) { 5067 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5068 if (error == 0) 5069 error = nlookup(&nd); 5070 if (error == 0) 5071 error = ncp_writechk(&nd.nl_nch); 5072 if (error == 0) { 5073 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 5074 uap->attrnamespace, 5075 uap->attrname, nd.nl_cred); 5076 } 5077 nlookup_done(&nd); 5078 } 5079 5080 return (error); 5081 } 5082 5083 /* 5084 * Syscall to get a named extended attribute on a file or directory. 5085 */ 5086 int 5087 sys_extattr_set_file(struct sysmsg *sysmsg, 5088 const struct extattr_set_file_args *uap) 5089 { 5090 char attrname[EXTATTR_MAXNAMELEN]; 5091 struct nlookupdata nd; 5092 struct vnode *vp; 5093 struct uio auio; 5094 struct iovec aiov; 5095 int error; 5096 5097 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5098 if (error) 5099 return (error); 5100 5101 vp = NULL; 5102 5103 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5104 if (error == 0) 5105 error = nlookup(&nd); 5106 if (error == 0) 5107 error = ncp_writechk(&nd.nl_nch); 5108 if (error == 0) 5109 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5110 if (error) { 5111 nlookup_done(&nd); 5112 return (error); 5113 } 5114 5115 bzero(&auio, sizeof(auio)); 5116 aiov.iov_base = uap->data; 5117 aiov.iov_len = uap->nbytes; 5118 auio.uio_iov = &aiov; 5119 auio.uio_iovcnt = 1; 5120 auio.uio_offset = 0; 5121 auio.uio_resid = uap->nbytes; 5122 auio.uio_rw = UIO_WRITE; 5123 auio.uio_td = curthread; 5124 5125 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5126 &auio, nd.nl_cred); 5127 5128 vput(vp); 5129 nlookup_done(&nd); 5130 return (error); 5131 } 5132 5133 /* 5134 * Syscall to get a named extended attribute on a file or directory. 5135 */ 5136 int 5137 sys_extattr_get_file(struct sysmsg *sysmsg, 5138 const struct extattr_get_file_args *uap) 5139 { 5140 char attrname[EXTATTR_MAXNAMELEN]; 5141 struct nlookupdata nd; 5142 struct uio auio; 5143 struct iovec aiov; 5144 struct vnode *vp; 5145 int error; 5146 5147 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5148 if (error) 5149 return (error); 5150 5151 vp = NULL; 5152 5153 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5154 if (error == 0) 5155 error = nlookup(&nd); 5156 if (error == 0) 5157 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5158 if (error) { 5159 nlookup_done(&nd); 5160 return (error); 5161 } 5162 5163 bzero(&auio, sizeof(auio)); 5164 aiov.iov_base = uap->data; 5165 aiov.iov_len = uap->nbytes; 5166 auio.uio_iov = &aiov; 5167 auio.uio_iovcnt = 1; 5168 auio.uio_offset = 0; 5169 auio.uio_resid = uap->nbytes; 5170 auio.uio_rw = UIO_READ; 5171 auio.uio_td = curthread; 5172 5173 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5174 &auio, nd.nl_cred); 5175 sysmsg->sysmsg_result = uap->nbytes - auio.uio_resid; 5176 5177 vput(vp); 5178 nlookup_done(&nd); 5179 return(error); 5180 } 5181 5182 /* 5183 * Syscall to delete a named extended attribute from a file or directory. 5184 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5185 */ 5186 int 5187 sys_extattr_delete_file(struct sysmsg *sysmsg, 5188 const struct extattr_delete_file_args *uap) 5189 { 5190 char attrname[EXTATTR_MAXNAMELEN]; 5191 struct nlookupdata nd; 5192 struct vnode *vp; 5193 int error; 5194 5195 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5196 if (error) 5197 return(error); 5198 5199 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5200 if (error == 0) 5201 error = nlookup(&nd); 5202 if (error == 0) 5203 error = ncp_writechk(&nd.nl_nch); 5204 if (error == 0) { 5205 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5206 if (error == 0) { 5207 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5208 attrname, NULL, nd.nl_cred); 5209 vput(vp); 5210 } 5211 } 5212 nlookup_done(&nd); 5213 return(error); 5214 } 5215 5216 /* 5217 * Determine if the mount is visible to the process. 5218 */ 5219 static int 5220 chroot_visible_mnt(struct mount *mp, struct proc *p) 5221 { 5222 struct nchandle nch; 5223 5224 /* 5225 * Traverse from the mount point upwards. If we hit the process 5226 * root then the mount point is visible to the process. 5227 */ 5228 nch = mp->mnt_ncmountpt; 5229 while (nch.ncp) { 5230 if (nch.mount == p->p_fd->fd_nrdir.mount && 5231 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5232 return(1); 5233 } 5234 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5235 nch = nch.mount->mnt_ncmounton; 5236 } else { 5237 nch.ncp = nch.ncp->nc_parent; 5238 } 5239 } 5240 5241 /* 5242 * If the mount point is not visible to the process, but the 5243 * process root is in a subdirectory of the mount, return 5244 * TRUE anyway. 5245 */ 5246 if (p->p_fd->fd_nrdir.mount == mp) 5247 return(1); 5248 5249 return(0); 5250 } 5251 5252 /* Sets priv to PRIV_ROOT in case no matching fs */ 5253 static int 5254 get_fspriv(const char *fsname) 5255 { 5256 5257 if (strncmp("null", fsname, 5) == 0) { 5258 return PRIV_VFS_MOUNT_NULLFS; 5259 } else if (strncmp(fsname, "tmpfs", 6) == 0) { 5260 return PRIV_VFS_MOUNT_TMPFS; 5261 } 5262 5263 return PRIV_ROOT; 5264 } 5265 5266 int 5267 sys___realpath(struct sysmsg *sysmsg, const struct __realpath_args *uap) 5268 { 5269 struct nlookupdata nd; 5270 char *rbuf; 5271 char *fbuf; 5272 ssize_t rlen; 5273 int error; 5274 5275 /* 5276 * Invalid length if less than 0. 0 is allowed 5277 */ 5278 if ((ssize_t)uap->len < 0) 5279 return EINVAL; 5280 5281 rbuf = NULL; 5282 fbuf = NULL; 5283 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5284 if (error) 5285 goto done; 5286 5287 nd.nl_flags |= NLC_SHAREDLOCK; 5288 error = nlookup(&nd); 5289 if (error) 5290 goto done; 5291 5292 if (nd.nl_nch.ncp->nc_vp == NULL) { 5293 error = ENOENT; 5294 goto done; 5295 } 5296 5297 /* 5298 * Shortcut test for existence. 5299 */ 5300 if (uap->len == 0) { 5301 error = ENAMETOOLONG; 5302 goto done; 5303 } 5304 5305 /* 5306 * Obtain the path relative to the process root. The nch must not 5307 * be locked for the cache_fullpath() call. 5308 */ 5309 if (nd.nl_flags & NLC_NCPISLOCKED) { 5310 nd.nl_flags &= ~NLC_NCPISLOCKED; 5311 cache_unlock(&nd.nl_nch); 5312 } 5313 error = cache_fullpath(curproc, &nd.nl_nch, NULL, &rbuf, &fbuf, 0); 5314 if (error) 5315 goto done; 5316 5317 rlen = (ssize_t)strlen(rbuf); 5318 if (rlen >= uap->len) { 5319 error = ENAMETOOLONG; 5320 goto done; 5321 } 5322 error = copyout(rbuf, uap->buf, rlen + 1); 5323 if (error == 0) 5324 sysmsg->sysmsg_szresult = rlen; 5325 done: 5326 nlookup_done(&nd); 5327 if (fbuf) 5328 kfree(fbuf, M_TEMP); 5329 5330 return error; 5331 } 5332