1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int get_fspriv(const char *); 84 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 85 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 86 static int getutimes (struct timeval *, struct timespec *); 87 static int getutimens (const struct timespec *, struct timespec *, int *); 88 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, u_long); 91 static int setutimes (struct vnode *, struct vattr *, 92 const struct timespec *, int); 93 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 96 "Allow non-root users to mount filesystems"); 97 98 static int debug_unmount = 0; /* if 1 loop until unmount success */ 99 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0, 100 "Stall failed unmounts in loop"); 101 /* 102 * Virtual File System System Calls 103 */ 104 105 /* 106 * Mount a file system. 107 * 108 * mount_args(char *type, char *path, int flags, caddr_t data) 109 * 110 * MPALMOSTSAFE 111 */ 112 int 113 sys_mount(struct mount_args *uap) 114 { 115 struct thread *td = curthread; 116 struct vnode *vp; 117 struct nchandle nch; 118 struct mount *mp, *nullmp; 119 struct vfsconf *vfsp; 120 int error, flag = 0, flag2 = 0; 121 int hasmount; 122 int priv = 0; 123 struct vattr va; 124 struct nlookupdata nd; 125 char fstypename[MFSNAMELEN]; 126 struct ucred *cred; 127 128 cred = td->td_ucred; 129 130 /* We do not allow user mounts inside a jail for now */ 131 if (usermount && jailed(cred)) { 132 error = EPERM; 133 goto done; 134 } 135 136 /* 137 * Extract the file system type. We need to know this early, to take 138 * appropriate actions for jails and nullfs mounts. 139 */ 140 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) 141 goto done; 142 143 /* 144 * Select the correct priv according to the file system type. 145 */ 146 priv = get_fspriv(fstypename); 147 148 if (usermount == 0 && (error = priv_check(td, priv))) 149 goto done; 150 151 /* 152 * Do not allow NFS export by non-root users. 153 */ 154 if (uap->flags & MNT_EXPORTED) { 155 error = priv_check(td, priv); 156 if (error) 157 goto done; 158 } 159 /* 160 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 161 */ 162 if (priv_check(td, priv)) 163 uap->flags |= MNT_NOSUID | MNT_NODEV; 164 165 /* 166 * Lookup the requested path and extract the nch and vnode. 167 */ 168 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 169 if (error == 0) { 170 if ((error = nlookup(&nd)) == 0) { 171 if (nd.nl_nch.ncp->nc_vp == NULL) 172 error = ENOENT; 173 } 174 } 175 if (error) { 176 nlookup_done(&nd); 177 goto done; 178 } 179 180 /* 181 * If the target filesystem is resolved via a nullfs mount, then 182 * nd.nl_nch.mount will be pointing to the nullfs mount structure 183 * instead of the target file system. We need it in case we are 184 * doing an update. 185 */ 186 nullmp = nd.nl_nch.mount; 187 188 /* 189 * Extract the locked+refd ncp and cleanup the nd structure 190 */ 191 nch = nd.nl_nch; 192 cache_zero(&nd.nl_nch); 193 nlookup_done(&nd); 194 195 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 196 (mp = cache_findmount(&nch)) != NULL) { 197 cache_dropmount(mp); 198 hasmount = 1; 199 } else { 200 hasmount = 0; 201 } 202 203 204 /* 205 * now we have the locked ref'd nch and unreferenced vnode. 206 */ 207 vp = nch.ncp->nc_vp; 208 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 209 cache_put(&nch); 210 goto done; 211 } 212 cache_unlock(&nch); 213 214 /* 215 * Now we have an unlocked ref'd nch and a locked ref'd vp 216 */ 217 if (uap->flags & MNT_UPDATE) { 218 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 219 cache_drop(&nch); 220 vput(vp); 221 error = EINVAL; 222 goto done; 223 } 224 225 if (strncmp(fstypename, "null", 5) == 0) { 226 KKASSERT(nullmp); 227 mp = nullmp; 228 } else { 229 mp = vp->v_mount; 230 } 231 232 flag = mp->mnt_flag; 233 flag2 = mp->mnt_kern_flag; 234 /* 235 * We only allow the filesystem to be reloaded if it 236 * is currently mounted read-only. 237 */ 238 if ((uap->flags & MNT_RELOAD) && 239 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 240 cache_drop(&nch); 241 vput(vp); 242 error = EOPNOTSUPP; /* Needs translation */ 243 goto done; 244 } 245 /* 246 * Only root, or the user that did the original mount is 247 * permitted to update it. 248 */ 249 if (mp->mnt_stat.f_owner != cred->cr_uid && 250 (error = priv_check(td, priv))) { 251 cache_drop(&nch); 252 vput(vp); 253 goto done; 254 } 255 if (vfs_busy(mp, LK_NOWAIT)) { 256 cache_drop(&nch); 257 vput(vp); 258 error = EBUSY; 259 goto done; 260 } 261 if (hasmount) { 262 cache_drop(&nch); 263 vfs_unbusy(mp); 264 vput(vp); 265 error = EBUSY; 266 goto done; 267 } 268 mp->mnt_flag |= 269 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 270 lwkt_gettoken(&mp->mnt_token); 271 vn_unlock(vp); 272 vfsp = mp->mnt_vfc; 273 goto update; 274 } 275 276 /* 277 * If the user is not root, ensure that they own the directory 278 * onto which we are attempting to mount. 279 */ 280 if ((error = VOP_GETATTR(vp, &va)) || 281 (va.va_uid != cred->cr_uid && 282 (error = priv_check(td, priv)))) { 283 cache_drop(&nch); 284 vput(vp); 285 goto done; 286 } 287 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 288 cache_drop(&nch); 289 vput(vp); 290 goto done; 291 } 292 if (vp->v_type != VDIR) { 293 cache_drop(&nch); 294 vput(vp); 295 error = ENOTDIR; 296 goto done; 297 } 298 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 299 cache_drop(&nch); 300 vput(vp); 301 error = EPERM; 302 goto done; 303 } 304 vfsp = vfsconf_find_by_name(fstypename); 305 if (vfsp == NULL) { 306 linker_file_t lf; 307 308 /* Only load modules for root (very important!) */ 309 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 310 cache_drop(&nch); 311 vput(vp); 312 goto done; 313 } 314 error = linker_load_file(fstypename, &lf); 315 if (error || lf == NULL) { 316 cache_drop(&nch); 317 vput(vp); 318 if (lf == NULL) 319 error = ENODEV; 320 goto done; 321 } 322 lf->userrefs++; 323 /* lookup again, see if the VFS was loaded */ 324 vfsp = vfsconf_find_by_name(fstypename); 325 if (vfsp == NULL) { 326 lf->userrefs--; 327 linker_file_unload(lf); 328 cache_drop(&nch); 329 vput(vp); 330 error = ENODEV; 331 goto done; 332 } 333 } 334 if (hasmount) { 335 cache_drop(&nch); 336 vput(vp); 337 error = EBUSY; 338 goto done; 339 } 340 341 /* 342 * Allocate and initialize the filesystem. 343 */ 344 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 345 mount_init(mp, vfsp->vfc_vfsops); 346 vfs_busy(mp, LK_NOWAIT); 347 mp->mnt_vfc = vfsp; 348 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 349 vfsp->vfc_refcount++; 350 mp->mnt_stat.f_type = vfsp->vfc_typenum; 351 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 352 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 353 mp->mnt_stat.f_owner = cred->cr_uid; 354 lwkt_gettoken(&mp->mnt_token); 355 vn_unlock(vp); 356 update: 357 /* 358 * (per-mount token acquired at this point) 359 * 360 * Set the mount level flags. 361 */ 362 if (uap->flags & MNT_RDONLY) 363 mp->mnt_flag |= MNT_RDONLY; 364 else if (mp->mnt_flag & MNT_RDONLY) 365 mp->mnt_kern_flag |= MNTK_WANTRDWR; 366 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 367 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 368 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 369 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 370 MNT_AUTOMOUNTED); 371 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 372 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 373 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 374 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 375 MNT_AUTOMOUNTED); 376 377 /* 378 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 379 * This way the initial VFS_MOUNT() call will also be MPSAFE. 380 */ 381 if (vfsp->vfc_flags & VFCF_MPSAFE) 382 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 383 384 /* 385 * Mount the filesystem. 386 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 387 * get. 388 */ 389 if (mp->mnt_flag & MNT_UPDATE) { 390 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 391 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 392 mp->mnt_flag &= ~MNT_RDONLY; 393 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 394 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 395 if (error) { 396 mp->mnt_flag = flag; 397 mp->mnt_kern_flag = flag2; 398 } 399 lwkt_reltoken(&mp->mnt_token); 400 vfs_unbusy(mp); 401 vrele(vp); 402 cache_drop(&nch); 403 goto done; 404 } 405 mp->mnt_ncmounton = nch; 406 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 407 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 408 409 /* 410 * Put the new filesystem on the mount list after root. The mount 411 * point gets its own mnt_ncmountpt (unless the VFS already set one 412 * up) which represents the root of the mount. The lookup code 413 * detects the mount point going forward and checks the root of 414 * the mount going backwards. 415 * 416 * It is not necessary to invalidate or purge the vnode underneath 417 * because elements under the mount will be given their own glue 418 * namecache record. 419 */ 420 if (!error) { 421 if (mp->mnt_ncmountpt.ncp == NULL) { 422 /* 423 * Allocate, then unlock, but leave the ref intact. 424 * This is the mnt_refs (1) that we will retain 425 * through to the unmount. 426 */ 427 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 428 cache_unlock(&mp->mnt_ncmountpt); 429 } 430 vn_unlock(vp); 431 cache_lock(&nch); 432 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 433 cache_unlock(&nch); 434 cache_ismounting(mp); 435 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 436 437 mountlist_insert(mp, MNTINS_LAST); 438 vn_unlock(vp); 439 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 440 error = vfs_allocate_syncvnode(mp); 441 lwkt_reltoken(&mp->mnt_token); 442 vfs_unbusy(mp); 443 error = VFS_START(mp, 0); 444 vrele(vp); 445 KNOTE(&fs_klist, VQ_MOUNT); 446 } else { 447 bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton)); 448 vn_syncer_thr_stop(mp); 449 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 450 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 451 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 452 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 453 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 454 if (mp->mnt_cred) { 455 crfree(mp->mnt_cred); 456 mp->mnt_cred = NULL; 457 } 458 mp->mnt_vfc->vfc_refcount--; 459 lwkt_reltoken(&mp->mnt_token); 460 vfs_unbusy(mp); 461 kfree(mp, M_MOUNT); 462 cache_drop(&nch); 463 vput(vp); 464 } 465 done: 466 return (error); 467 } 468 469 /* 470 * Scan all active processes to see if any of them have a current 471 * or root directory onto which the new filesystem has just been 472 * mounted. If so, replace them with the new mount point. 473 * 474 * Both old_nch and new_nch are ref'd on call but not locked. 475 * new_nch must be temporarily locked so it can be associated with the 476 * vnode representing the root of the mount point. 477 */ 478 struct checkdirs_info { 479 struct nchandle old_nch; 480 struct nchandle new_nch; 481 struct vnode *old_vp; 482 struct vnode *new_vp; 483 }; 484 485 static int checkdirs_callback(struct proc *p, void *data); 486 487 static void 488 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 489 { 490 struct checkdirs_info info; 491 struct vnode *olddp; 492 struct vnode *newdp; 493 struct mount *mp; 494 495 /* 496 * If the old mount point's vnode has a usecount of 1, it is not 497 * being held as a descriptor anywhere. 498 */ 499 olddp = old_nch->ncp->nc_vp; 500 if (olddp == NULL || VREFCNT(olddp) == 1) 501 return; 502 503 /* 504 * Force the root vnode of the new mount point to be resolved 505 * so we can update any matching processes. 506 */ 507 mp = new_nch->mount; 508 if (VFS_ROOT(mp, &newdp)) 509 panic("mount: lost mount"); 510 vn_unlock(newdp); 511 cache_lock(new_nch); 512 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 513 cache_setunresolved(new_nch); 514 cache_setvp(new_nch, newdp); 515 cache_unlock(new_nch); 516 517 /* 518 * Special handling of the root node 519 */ 520 if (rootvnode == olddp) { 521 vref(newdp); 522 vfs_cache_setroot(newdp, cache_hold(new_nch)); 523 } 524 525 /* 526 * Pass newdp separately so the callback does not have to access 527 * it via new_nch->ncp->nc_vp. 528 */ 529 info.old_nch = *old_nch; 530 info.new_nch = *new_nch; 531 info.new_vp = newdp; 532 allproc_scan(checkdirs_callback, &info, 0); 533 vput(newdp); 534 } 535 536 /* 537 * NOTE: callback is not MP safe because the scanned process's filedesc 538 * structure can be ripped out from under us, amoung other things. 539 */ 540 static int 541 checkdirs_callback(struct proc *p, void *data) 542 { 543 struct checkdirs_info *info = data; 544 struct filedesc *fdp; 545 struct nchandle ncdrop1; 546 struct nchandle ncdrop2; 547 struct vnode *vprele1; 548 struct vnode *vprele2; 549 550 if ((fdp = p->p_fd) != NULL) { 551 cache_zero(&ncdrop1); 552 cache_zero(&ncdrop2); 553 vprele1 = NULL; 554 vprele2 = NULL; 555 556 /* 557 * MPUNSAFE - XXX fdp can be pulled out from under a 558 * foreign process. 559 * 560 * A shared filedesc is ok, we don't have to copy it 561 * because we are making this change globally. 562 */ 563 spin_lock(&fdp->fd_spin); 564 if (fdp->fd_ncdir.mount == info->old_nch.mount && 565 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 566 vprele1 = fdp->fd_cdir; 567 vref(info->new_vp); 568 fdp->fd_cdir = info->new_vp; 569 ncdrop1 = fdp->fd_ncdir; 570 cache_copy(&info->new_nch, &fdp->fd_ncdir); 571 } 572 if (fdp->fd_nrdir.mount == info->old_nch.mount && 573 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 574 vprele2 = fdp->fd_rdir; 575 vref(info->new_vp); 576 fdp->fd_rdir = info->new_vp; 577 ncdrop2 = fdp->fd_nrdir; 578 cache_copy(&info->new_nch, &fdp->fd_nrdir); 579 } 580 spin_unlock(&fdp->fd_spin); 581 if (ncdrop1.ncp) 582 cache_drop(&ncdrop1); 583 if (ncdrop2.ncp) 584 cache_drop(&ncdrop2); 585 if (vprele1) 586 vrele(vprele1); 587 if (vprele2) 588 vrele(vprele2); 589 } 590 return(0); 591 } 592 593 /* 594 * Unmount a file system. 595 * 596 * Note: unmount takes a path to the vnode mounted on as argument, 597 * not special file (as before). 598 * 599 * umount_args(char *path, int flags) 600 * 601 * MPALMOSTSAFE 602 */ 603 int 604 sys_unmount(struct unmount_args *uap) 605 { 606 struct thread *td = curthread; 607 struct proc *p __debugvar = td->td_proc; 608 struct mount *mp = NULL; 609 struct nlookupdata nd; 610 char fstypename[MFSNAMELEN]; 611 int priv = 0; 612 int error; 613 struct ucred *cred; 614 615 cred = td->td_ucred; 616 617 KKASSERT(p); 618 619 /* We do not allow user umounts inside a jail for now */ 620 if (usermount && jailed(cred)) { 621 error = EPERM; 622 goto done; 623 } 624 625 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 626 NLC_FOLLOW | NLC_IGNBADDIR); 627 if (error == 0) 628 error = nlookup(&nd); 629 if (error) 630 goto out; 631 632 mp = nd.nl_nch.mount; 633 634 /* Figure out the fsname in order to select proper privs */ 635 ksnprintf(fstypename, MFSNAMELEN, "%s", mp->mnt_vfc->vfc_name); 636 priv = get_fspriv(fstypename); 637 638 if (usermount == 0 && (error = priv_check(td, priv))) { 639 nlookup_done(&nd); 640 goto done; 641 } 642 643 /* 644 * Only root, or the user that did the original mount is 645 * permitted to unmount this filesystem. 646 */ 647 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 648 (error = priv_check(td, priv))) 649 goto out; 650 651 /* 652 * Don't allow unmounting the root file system. 653 */ 654 if (mp->mnt_flag & MNT_ROOTFS) { 655 error = EINVAL; 656 goto out; 657 } 658 659 /* 660 * Must be the root of the filesystem 661 */ 662 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 663 error = EINVAL; 664 goto out; 665 } 666 667 /* Check if this mount belongs to this prison */ 668 if (jailed(cred) && mp->mnt_cred && (!mp->mnt_cred->cr_prison || 669 mp->mnt_cred->cr_prison != cred->cr_prison)) { 670 kprintf("mountpoint %s does not belong to this jail\n", 671 uap->path); 672 error = EPERM; 673 goto out; 674 } 675 676 /* 677 * If no error try to issue the unmount. We lose our cache 678 * ref when we call nlookup_done so we must hold the mount point 679 * to prevent use-after-free races. 680 */ 681 out: 682 if (error == 0) { 683 mount_hold(mp); 684 nlookup_done(&nd); 685 error = dounmount(mp, uap->flags, 0); 686 mount_drop(mp); 687 } else { 688 nlookup_done(&nd); 689 } 690 done: 691 return (error); 692 } 693 694 /* 695 * Do the actual file system unmount (interlocked against the mountlist 696 * token and mp->mnt_token). 697 */ 698 static int 699 dounmount_interlock(struct mount *mp) 700 { 701 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 702 return (EBUSY); 703 mp->mnt_kern_flag |= MNTK_UNMOUNT; 704 return(0); 705 } 706 707 static int 708 unmount_allproc_cb(struct proc *p, void *arg) 709 { 710 struct mount *mp; 711 712 if (p->p_textnch.ncp == NULL) 713 return 0; 714 715 mp = (struct mount *)arg; 716 if (p->p_textnch.mount == mp) 717 cache_drop(&p->p_textnch); 718 719 return 0; 720 } 721 722 /* 723 * The guts of the unmount code. The mount owns one ref and one hold 724 * count. If we successfully interlock the unmount, those refs are ours. 725 * (The ref is from mnt_ncmountpt). 726 * 727 * When halting we shortcut certain mount types such as devfs by not actually 728 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 729 * from the mountlist so higher-level filesytems can unmount cleanly. 730 * 731 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 732 */ 733 int 734 dounmount(struct mount *mp, int flags, int halting) 735 { 736 struct namecache *ncp; 737 struct nchandle nch; 738 struct vnode *vp; 739 int error; 740 int async_flag; 741 int lflags; 742 int freeok = 1; 743 int hadsyncer = 0; 744 int retry; 745 int quickhalt; 746 747 lwkt_gettoken(&mp->mnt_token); 748 749 /* 750 * When halting, certain mount points can essentially just 751 * be unhooked and otherwise ignored. 752 */ 753 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 754 quickhalt = 1; 755 freeok = 0; 756 } else { 757 quickhalt = 0; 758 } 759 760 761 /* 762 * Exclusive access for unmounting purposes. 763 */ 764 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 765 goto out; 766 767 /* 768 * We now 'own' the last mp->mnt_refs 769 * 770 * Allow filesystems to detect that a forced unmount is in progress. 771 */ 772 if (flags & MNT_FORCE) 773 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 774 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 775 error = lockmgr(&mp->mnt_lock, lflags); 776 if (error) { 777 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 778 if (mp->mnt_kern_flag & MNTK_MWAIT) { 779 mp->mnt_kern_flag &= ~MNTK_MWAIT; 780 wakeup(mp); 781 } 782 goto out; 783 } 784 785 if (mp->mnt_flag & MNT_EXPUBLIC) 786 vfs_setpublicfs(NULL, NULL, NULL); 787 788 vfs_msync(mp, MNT_WAIT); 789 async_flag = mp->mnt_flag & MNT_ASYNC; 790 mp->mnt_flag &=~ MNT_ASYNC; 791 792 /* 793 * Decomission our special mnt_syncer vnode. This also stops 794 * the vnlru code. If we are unable to unmount we recommission 795 * the vnode. 796 * 797 * Then sync the filesystem. 798 */ 799 if ((vp = mp->mnt_syncer) != NULL) { 800 mp->mnt_syncer = NULL; 801 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 802 vrele(vp); 803 hadsyncer = 1; 804 } 805 806 /* 807 * Sync normally-mounted filesystem. 808 */ 809 if (quickhalt == 0) { 810 if ((mp->mnt_flag & MNT_RDONLY) == 0) 811 VFS_SYNC(mp, MNT_WAIT); 812 } 813 814 /* 815 * nchandle records ref the mount structure. Expect a count of 1 816 * (our mount->mnt_ncmountpt). 817 * 818 * Scans can get temporary refs on a mountpoint (thought really 819 * heavy duty stuff like cache_findmount() do not). 820 */ 821 for (retry = 0; (retry < 10 || debug_unmount); ++retry) { 822 /* 823 * Invalidate the namecache topology under the mount. 824 * nullfs mounts alias a real mount's namecache topology 825 * and it should not be invalidated in that case. 826 */ 827 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 828 cache_lock(&mp->mnt_ncmountpt); 829 cache_inval(&mp->mnt_ncmountpt, 830 CINV_DESTROY|CINV_CHILDREN); 831 cache_unlock(&mp->mnt_ncmountpt); 832 } 833 834 /* 835 * Clear pcpu caches 836 */ 837 cache_unmounting(mp); 838 if (mp->mnt_refs != 1) 839 cache_clearmntcache(); 840 841 /* 842 * Break out if we are good. Don't count ncp refs if the 843 * mount is aliased. 844 */ 845 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 846 NULL : mp->mnt_ncmountpt.ncp; 847 if (mp->mnt_refs == 1 && 848 (ncp == NULL || (ncp->nc_refs == 1 && 849 TAILQ_FIRST(&ncp->nc_list) == NULL))) { 850 break; 851 } 852 853 /* 854 * If forcing the unmount, clean out any p->p_textnch 855 * nchandles that match this mount. 856 */ 857 if (flags & MNT_FORCE) 858 allproc_scan(&unmount_allproc_cb, mp, 0); 859 860 /* 861 * Sleep and retry. 862 */ 863 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 864 if ((retry & 15) == 15) { 865 mount_warning(mp, 866 "(%p) debug - retry %d, " 867 "%d namecache refs, %d mount refs", 868 mp, retry, 869 (ncp ? ncp->nc_refs - 1 : 0), 870 mp->mnt_refs - 1); 871 } 872 } 873 874 error = 0; 875 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 876 NULL : mp->mnt_ncmountpt.ncp; 877 if (mp->mnt_refs != 1 || 878 (ncp != NULL && (ncp->nc_refs != 1 || 879 TAILQ_FIRST(&ncp->nc_list)))) { 880 mount_warning(mp, 881 "(%p): %d namecache refs, %d mount refs " 882 "still present", 883 mp, 884 (ncp ? ncp->nc_refs - 1 : 0), 885 mp->mnt_refs - 1); 886 if (flags & MNT_FORCE) { 887 freeok = 0; 888 mount_warning(mp, "forcing unmount\n"); 889 } else { 890 error = EBUSY; 891 } 892 } 893 894 /* 895 * So far so good, sync the filesystem once more and 896 * call the VFS unmount code if the sync succeeds. 897 */ 898 if (error == 0 && quickhalt == 0) { 899 if (mp->mnt_flag & MNT_RDONLY) { 900 error = VFS_UNMOUNT(mp, flags); 901 } else { 902 error = VFS_SYNC(mp, MNT_WAIT); 903 if (error == 0 || /* no error */ 904 error == EOPNOTSUPP || /* no sync avail */ 905 (flags & MNT_FORCE)) { /* force anyway */ 906 error = VFS_UNMOUNT(mp, flags); 907 } 908 } 909 if (error) { 910 mount_warning(mp, 911 "(%p) unmount: vfs refused to unmount, " 912 "error %d", 913 mp, error); 914 } 915 } 916 917 /* 918 * If an error occurred we can still recover, restoring the 919 * syncer vnode and misc flags. 920 */ 921 if (error) { 922 if (mp->mnt_syncer == NULL && hadsyncer) 923 vfs_allocate_syncvnode(mp); 924 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 925 mp->mnt_flag |= async_flag; 926 lockmgr(&mp->mnt_lock, LK_RELEASE); 927 if (mp->mnt_kern_flag & MNTK_MWAIT) { 928 mp->mnt_kern_flag &= ~MNTK_MWAIT; 929 wakeup(mp); 930 } 931 goto out; 932 } 933 /* 934 * Clean up any journals still associated with the mount after 935 * filesystem activity has ceased. 936 */ 937 journal_remove_all_journals(mp, 938 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 939 940 mountlist_remove(mp); 941 942 /* 943 * Remove any installed vnode ops here so the individual VFSs don't 944 * have to. 945 * 946 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 947 * 948 * When quickhalting we have to keep these intact because the 949 * underlying vnodes have not been destroyed, and some might be 950 * dirty. 951 */ 952 if (quickhalt == 0) { 953 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 954 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 955 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 956 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 957 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 958 } 959 960 if (mp->mnt_ncmountpt.ncp != NULL) { 961 nch = mp->mnt_ncmountpt; 962 cache_zero(&mp->mnt_ncmountpt); 963 cache_clrmountpt(&nch); 964 cache_drop(&nch); 965 } 966 if (mp->mnt_ncmounton.ncp != NULL) { 967 cache_unmounting(mp); 968 nch = mp->mnt_ncmounton; 969 cache_zero(&mp->mnt_ncmounton); 970 cache_clrmountpt(&nch); 971 cache_drop(&nch); 972 } 973 974 if (mp->mnt_cred) { 975 crfree(mp->mnt_cred); 976 mp->mnt_cred = NULL; 977 } 978 979 mp->mnt_vfc->vfc_refcount--; 980 981 /* 982 * If not quickhalting the mount, we expect there to be no 983 * vnodes left. 984 */ 985 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 986 panic("unmount: dangling vnode"); 987 988 /* 989 * Release the lock 990 */ 991 lockmgr(&mp->mnt_lock, LK_RELEASE); 992 if (mp->mnt_kern_flag & MNTK_MWAIT) { 993 mp->mnt_kern_flag &= ~MNTK_MWAIT; 994 wakeup(mp); 995 } 996 997 /* 998 * If we reach here and freeok != 0 we must free the mount. 999 * mnt_refs should already have dropped to 0, so if it is not 1000 * zero we must cycle the caches and wait. 1001 * 1002 * When we are satisfied that the mount has disconnected we can 1003 * drop the hold on the mp that represented the mount (though the 1004 * caller might actually have another, so the caller's drop may 1005 * do the actual free). 1006 */ 1007 if (freeok) { 1008 if (mp->mnt_refs > 0) 1009 cache_clearmntcache(); 1010 while (mp->mnt_refs > 0) { 1011 cache_unmounting(mp); 1012 wakeup(mp); 1013 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 1014 cache_clearmntcache(); 1015 } 1016 lwkt_reltoken(&mp->mnt_token); 1017 mount_drop(mp); 1018 mp = NULL; 1019 } else { 1020 cache_clearmntcache(); 1021 } 1022 error = 0; 1023 KNOTE(&fs_klist, VQ_UNMOUNT); 1024 out: 1025 if (mp) 1026 lwkt_reltoken(&mp->mnt_token); 1027 return (error); 1028 } 1029 1030 static 1031 void 1032 mount_warning(struct mount *mp, const char *ctl, ...) 1033 { 1034 char *ptr; 1035 char *buf; 1036 __va_list va; 1037 1038 __va_start(va, ctl); 1039 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 1040 &ptr, &buf, 0) == 0) { 1041 kprintf("unmount(%s): ", ptr); 1042 kvprintf(ctl, va); 1043 kprintf("\n"); 1044 kfree(buf, M_TEMP); 1045 } else { 1046 kprintf("unmount(%p", mp); 1047 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 1048 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 1049 kprintf("): "); 1050 kvprintf(ctl, va); 1051 kprintf("\n"); 1052 } 1053 __va_end(va); 1054 } 1055 1056 /* 1057 * Shim cache_fullpath() to handle the case where a process is chrooted into 1058 * a subdirectory of a mount. In this case if the root mount matches the 1059 * process root directory's mount we have to specify the process's root 1060 * directory instead of the mount point, because the mount point might 1061 * be above the root directory. 1062 */ 1063 static 1064 int 1065 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 1066 { 1067 struct nchandle *nch; 1068 1069 if (p && p->p_fd->fd_nrdir.mount == mp) 1070 nch = &p->p_fd->fd_nrdir; 1071 else 1072 nch = &mp->mnt_ncmountpt; 1073 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1074 } 1075 1076 /* 1077 * Sync each mounted filesystem. 1078 */ 1079 1080 #ifdef DEBUG 1081 static int syncprt = 0; 1082 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1083 #endif /* DEBUG */ 1084 1085 static int sync_callback(struct mount *mp, void *data); 1086 1087 int 1088 sys_sync(struct sync_args *uap) 1089 { 1090 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1091 return (0); 1092 } 1093 1094 static 1095 int 1096 sync_callback(struct mount *mp, void *data __unused) 1097 { 1098 int asyncflag; 1099 1100 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1101 lwkt_gettoken(&mp->mnt_token); 1102 asyncflag = mp->mnt_flag & MNT_ASYNC; 1103 mp->mnt_flag &= ~MNT_ASYNC; 1104 lwkt_reltoken(&mp->mnt_token); 1105 vfs_msync(mp, MNT_NOWAIT); 1106 VFS_SYNC(mp, MNT_NOWAIT); 1107 lwkt_gettoken(&mp->mnt_token); 1108 mp->mnt_flag |= asyncflag; 1109 lwkt_reltoken(&mp->mnt_token); 1110 } 1111 return(0); 1112 } 1113 1114 /* XXX PRISON: could be per prison flag */ 1115 static int prison_quotas; 1116 #if 0 1117 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1118 #endif 1119 1120 /* 1121 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1122 * 1123 * Change filesystem quotas. 1124 * 1125 * MPALMOSTSAFE 1126 */ 1127 int 1128 sys_quotactl(struct quotactl_args *uap) 1129 { 1130 struct nlookupdata nd; 1131 struct thread *td; 1132 struct mount *mp; 1133 int error; 1134 1135 td = curthread; 1136 if (td->td_ucred->cr_prison && !prison_quotas) { 1137 error = EPERM; 1138 goto done; 1139 } 1140 1141 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1142 if (error == 0) 1143 error = nlookup(&nd); 1144 if (error == 0) { 1145 mp = nd.nl_nch.mount; 1146 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1147 uap->arg, nd.nl_cred); 1148 } 1149 nlookup_done(&nd); 1150 done: 1151 return (error); 1152 } 1153 1154 /* 1155 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1156 * void *buf, int buflen) 1157 * 1158 * This function operates on a mount point and executes the specified 1159 * operation using the specified control data, and possibly returns data. 1160 * 1161 * The actual number of bytes stored in the result buffer is returned, 0 1162 * if none, otherwise an error is returned. 1163 * 1164 * MPALMOSTSAFE 1165 */ 1166 int 1167 sys_mountctl(struct mountctl_args *uap) 1168 { 1169 struct thread *td = curthread; 1170 struct file *fp; 1171 void *ctl = NULL; 1172 void *buf = NULL; 1173 char *path = NULL; 1174 int error; 1175 1176 /* 1177 * Sanity and permissions checks. We must be root. 1178 */ 1179 if (td->td_ucred->cr_prison != NULL) 1180 return (EPERM); 1181 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1182 (error = priv_check(td, PRIV_ROOT)) != 0) 1183 return (error); 1184 1185 /* 1186 * Argument length checks 1187 */ 1188 if (uap->ctllen < 0 || uap->ctllen > 1024) 1189 return (EINVAL); 1190 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1191 return (EINVAL); 1192 if (uap->path == NULL) 1193 return (EINVAL); 1194 1195 /* 1196 * Allocate the necessary buffers and copyin data 1197 */ 1198 path = objcache_get(namei_oc, M_WAITOK); 1199 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1200 if (error) 1201 goto done; 1202 1203 if (uap->ctllen) { 1204 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1205 error = copyin(uap->ctl, ctl, uap->ctllen); 1206 if (error) 1207 goto done; 1208 } 1209 if (uap->buflen) 1210 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1211 1212 /* 1213 * Validate the descriptor 1214 */ 1215 if (uap->fd >= 0) { 1216 fp = holdfp(td, uap->fd, -1); 1217 if (fp == NULL) { 1218 error = EBADF; 1219 goto done; 1220 } 1221 } else { 1222 fp = NULL; 1223 } 1224 1225 /* 1226 * Execute the internal kernel function and clean up. 1227 */ 1228 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1229 buf, uap->buflen, &uap->sysmsg_result); 1230 if (fp) 1231 dropfp(td, uap->fd, fp); 1232 if (error == 0 && uap->sysmsg_result > 0) 1233 error = copyout(buf, uap->buf, uap->sysmsg_result); 1234 done: 1235 if (path) 1236 objcache_put(namei_oc, path); 1237 if (ctl) 1238 kfree(ctl, M_TEMP); 1239 if (buf) 1240 kfree(buf, M_TEMP); 1241 return (error); 1242 } 1243 1244 /* 1245 * Execute a mount control operation by resolving the path to a mount point 1246 * and calling vop_mountctl(). 1247 * 1248 * Use the mount point from the nch instead of the vnode so nullfs mounts 1249 * can properly spike the VOP. 1250 */ 1251 int 1252 kern_mountctl(const char *path, int op, struct file *fp, 1253 const void *ctl, int ctllen, 1254 void *buf, int buflen, int *res) 1255 { 1256 struct vnode *vp; 1257 struct nlookupdata nd; 1258 struct nchandle nch; 1259 struct mount *mp; 1260 int error; 1261 1262 *res = 0; 1263 vp = NULL; 1264 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1265 if (error) 1266 return (error); 1267 error = nlookup(&nd); 1268 if (error) { 1269 nlookup_done(&nd); 1270 return (error); 1271 } 1272 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1273 if (error) { 1274 nlookup_done(&nd); 1275 return (error); 1276 } 1277 1278 /* 1279 * Yes, all this is needed to use the nch.mount below, because 1280 * we must maintain a ref on the mount to avoid ripouts (e.g. 1281 * due to heavy mount/unmount use by synth or poudriere). 1282 */ 1283 nch = nd.nl_nch; 1284 cache_zero(&nd.nl_nch); 1285 cache_unlock(&nch); 1286 nlookup_done(&nd); 1287 vn_unlock(vp); 1288 1289 mp = nch.mount; 1290 1291 /* 1292 * Must be the root of the filesystem 1293 */ 1294 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1295 cache_drop(&nch); 1296 vrele(vp); 1297 return (EINVAL); 1298 } 1299 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1300 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1301 path); 1302 cache_drop(&nch); 1303 vrele(vp); 1304 return (EINVAL); 1305 } 1306 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1307 buf, buflen, res); 1308 vrele(vp); 1309 cache_drop(&nch); 1310 1311 return (error); 1312 } 1313 1314 int 1315 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1316 { 1317 struct thread *td = curthread; 1318 struct proc *p = td->td_proc; 1319 struct mount *mp; 1320 struct statfs *sp; 1321 char *fullpath, *freepath; 1322 int error; 1323 1324 if ((error = nlookup(nd)) != 0) 1325 return (error); 1326 mp = nd->nl_nch.mount; 1327 sp = &mp->mnt_stat; 1328 1329 /* 1330 * Ignore refresh error, user should have visibility. 1331 * This can happen if a NFS mount goes bad (e.g. server 1332 * revokes perms or goes down). 1333 */ 1334 error = VFS_STATFS(mp, sp, nd->nl_cred); 1335 /* ignore error */ 1336 1337 error = mount_path(p, mp, &fullpath, &freepath); 1338 if (error) 1339 return(error); 1340 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1341 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1342 kfree(freepath, M_TEMP); 1343 1344 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1345 bcopy(sp, buf, sizeof(*buf)); 1346 /* Only root should have access to the fsid's. */ 1347 if (priv_check(td, PRIV_ROOT)) 1348 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1349 return (0); 1350 } 1351 1352 /* 1353 * statfs_args(char *path, struct statfs *buf) 1354 * 1355 * Get filesystem statistics. 1356 */ 1357 int 1358 sys_statfs(struct statfs_args *uap) 1359 { 1360 struct nlookupdata nd; 1361 struct statfs buf; 1362 int error; 1363 1364 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1365 if (error == 0) 1366 error = kern_statfs(&nd, &buf); 1367 nlookup_done(&nd); 1368 if (error == 0) 1369 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1370 return (error); 1371 } 1372 1373 int 1374 kern_fstatfs(int fd, struct statfs *buf) 1375 { 1376 struct thread *td = curthread; 1377 struct proc *p = td->td_proc; 1378 struct file *fp; 1379 struct mount *mp; 1380 struct statfs *sp; 1381 char *fullpath, *freepath; 1382 int error; 1383 1384 KKASSERT(p); 1385 if ((error = holdvnode(td, fd, &fp)) != 0) 1386 return (error); 1387 1388 /* 1389 * Try to use mount info from any overlays rather than the 1390 * mount info for the underlying vnode, otherwise we will 1391 * fail when operating on null-mounted paths inside a chroot. 1392 */ 1393 if ((mp = fp->f_nchandle.mount) == NULL) 1394 mp = ((struct vnode *)fp->f_data)->v_mount; 1395 if (mp == NULL) { 1396 error = EBADF; 1397 goto done; 1398 } 1399 if (fp->f_cred == NULL) { 1400 error = EINVAL; 1401 goto done; 1402 } 1403 1404 /* 1405 * Ignore refresh error, user should have visibility. 1406 * This can happen if a NFS mount goes bad (e.g. server 1407 * revokes perms or goes down). 1408 */ 1409 sp = &mp->mnt_stat; 1410 error = VFS_STATFS(mp, sp, fp->f_cred); 1411 1412 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1413 goto done; 1414 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1415 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1416 kfree(freepath, M_TEMP); 1417 1418 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1419 bcopy(sp, buf, sizeof(*buf)); 1420 1421 /* Only root should have access to the fsid's. */ 1422 if (priv_check(td, PRIV_ROOT)) 1423 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1424 error = 0; 1425 done: 1426 fdrop(fp); 1427 return (error); 1428 } 1429 1430 /* 1431 * fstatfs_args(int fd, struct statfs *buf) 1432 * 1433 * Get filesystem statistics. 1434 */ 1435 int 1436 sys_fstatfs(struct fstatfs_args *uap) 1437 { 1438 struct statfs buf; 1439 int error; 1440 1441 error = kern_fstatfs(uap->fd, &buf); 1442 1443 if (error == 0) 1444 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1445 return (error); 1446 } 1447 1448 int 1449 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1450 { 1451 struct mount *mp; 1452 struct statvfs *sp; 1453 int error; 1454 1455 if ((error = nlookup(nd)) != 0) 1456 return (error); 1457 mp = nd->nl_nch.mount; 1458 sp = &mp->mnt_vstat; 1459 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1460 return (error); 1461 1462 sp->f_flag = 0; 1463 if (mp->mnt_flag & MNT_RDONLY) 1464 sp->f_flag |= ST_RDONLY; 1465 if (mp->mnt_flag & MNT_NOSUID) 1466 sp->f_flag |= ST_NOSUID; 1467 bcopy(sp, buf, sizeof(*buf)); 1468 return (0); 1469 } 1470 1471 /* 1472 * statfs_args(char *path, struct statfs *buf) 1473 * 1474 * Get filesystem statistics. 1475 */ 1476 int 1477 sys_statvfs(struct statvfs_args *uap) 1478 { 1479 struct nlookupdata nd; 1480 struct statvfs buf; 1481 int error; 1482 1483 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1484 if (error == 0) 1485 error = kern_statvfs(&nd, &buf); 1486 nlookup_done(&nd); 1487 if (error == 0) 1488 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1489 return (error); 1490 } 1491 1492 int 1493 kern_fstatvfs(int fd, struct statvfs *buf) 1494 { 1495 struct thread *td = curthread; 1496 struct file *fp; 1497 struct mount *mp; 1498 struct statvfs *sp; 1499 int error; 1500 1501 if ((error = holdvnode(td, fd, &fp)) != 0) 1502 return (error); 1503 if ((mp = fp->f_nchandle.mount) == NULL) 1504 mp = ((struct vnode *)fp->f_data)->v_mount; 1505 if (mp == NULL) { 1506 error = EBADF; 1507 goto done; 1508 } 1509 if (fp->f_cred == NULL) { 1510 error = EINVAL; 1511 goto done; 1512 } 1513 sp = &mp->mnt_vstat; 1514 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1515 goto done; 1516 1517 sp->f_flag = 0; 1518 if (mp->mnt_flag & MNT_RDONLY) 1519 sp->f_flag |= ST_RDONLY; 1520 if (mp->mnt_flag & MNT_NOSUID) 1521 sp->f_flag |= ST_NOSUID; 1522 1523 bcopy(sp, buf, sizeof(*buf)); 1524 error = 0; 1525 done: 1526 fdrop(fp); 1527 return (error); 1528 } 1529 1530 /* 1531 * fstatfs_args(int fd, struct statfs *buf) 1532 * 1533 * Get filesystem statistics. 1534 */ 1535 int 1536 sys_fstatvfs(struct fstatvfs_args *uap) 1537 { 1538 struct statvfs buf; 1539 int error; 1540 1541 error = kern_fstatvfs(uap->fd, &buf); 1542 1543 if (error == 0) 1544 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1545 return (error); 1546 } 1547 1548 /* 1549 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1550 * 1551 * Get statistics on all filesystems. 1552 */ 1553 1554 struct getfsstat_info { 1555 struct statfs *sfsp; 1556 long count; 1557 long maxcount; 1558 int error; 1559 int flags; 1560 struct thread *td; 1561 }; 1562 1563 static int getfsstat_callback(struct mount *, void *); 1564 1565 int 1566 sys_getfsstat(struct getfsstat_args *uap) 1567 { 1568 struct thread *td = curthread; 1569 struct getfsstat_info info; 1570 1571 bzero(&info, sizeof(info)); 1572 1573 info.maxcount = uap->bufsize / sizeof(struct statfs); 1574 info.sfsp = uap->buf; 1575 info.count = 0; 1576 info.flags = uap->flags; 1577 info.td = td; 1578 1579 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1580 if (info.sfsp && info.count > info.maxcount) 1581 uap->sysmsg_result = info.maxcount; 1582 else 1583 uap->sysmsg_result = info.count; 1584 return (info.error); 1585 } 1586 1587 static int 1588 getfsstat_callback(struct mount *mp, void *data) 1589 { 1590 struct getfsstat_info *info = data; 1591 struct statfs *sp; 1592 char *freepath; 1593 char *fullpath; 1594 int error; 1595 1596 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1597 return(0); 1598 1599 if (info->sfsp && info->count < info->maxcount) { 1600 sp = &mp->mnt_stat; 1601 1602 /* 1603 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1604 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1605 * overrides MNT_WAIT. 1606 * 1607 * Ignore refresh error, user should have visibility. 1608 * This can happen if a NFS mount goes bad (e.g. server 1609 * revokes perms or goes down). 1610 */ 1611 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1612 (info->flags & MNT_WAIT)) && 1613 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1614 /* ignore error */ 1615 } 1616 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1617 1618 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1619 if (error) { 1620 info->error = error; 1621 return(-1); 1622 } 1623 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1624 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1625 kfree(freepath, M_TEMP); 1626 1627 error = copyout(sp, info->sfsp, sizeof(*sp)); 1628 if (error) { 1629 info->error = error; 1630 return (-1); 1631 } 1632 ++info->sfsp; 1633 } 1634 info->count++; 1635 return(0); 1636 } 1637 1638 /* 1639 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1640 long bufsize, int flags) 1641 * 1642 * Get statistics on all filesystems. 1643 */ 1644 1645 struct getvfsstat_info { 1646 struct statfs *sfsp; 1647 struct statvfs *vsfsp; 1648 long count; 1649 long maxcount; 1650 int error; 1651 int flags; 1652 struct thread *td; 1653 }; 1654 1655 static int getvfsstat_callback(struct mount *, void *); 1656 1657 int 1658 sys_getvfsstat(struct getvfsstat_args *uap) 1659 { 1660 struct thread *td = curthread; 1661 struct getvfsstat_info info; 1662 1663 bzero(&info, sizeof(info)); 1664 1665 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1666 info.sfsp = uap->buf; 1667 info.vsfsp = uap->vbuf; 1668 info.count = 0; 1669 info.flags = uap->flags; 1670 info.td = td; 1671 1672 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1673 if (info.vsfsp && info.count > info.maxcount) 1674 uap->sysmsg_result = info.maxcount; 1675 else 1676 uap->sysmsg_result = info.count; 1677 return (info.error); 1678 } 1679 1680 static int 1681 getvfsstat_callback(struct mount *mp, void *data) 1682 { 1683 struct getvfsstat_info *info = data; 1684 struct statfs *sp; 1685 struct statvfs *vsp; 1686 char *freepath; 1687 char *fullpath; 1688 int error; 1689 1690 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1691 return(0); 1692 1693 if (info->vsfsp && info->count < info->maxcount) { 1694 sp = &mp->mnt_stat; 1695 vsp = &mp->mnt_vstat; 1696 1697 /* 1698 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1699 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1700 * overrides MNT_WAIT. 1701 * 1702 * Ignore refresh error, user should have visibility. 1703 * This can happen if a NFS mount goes bad (e.g. server 1704 * revokes perms or goes down). 1705 */ 1706 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1707 (info->flags & MNT_WAIT)) && 1708 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1709 /* ignore error */ 1710 } 1711 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1712 1713 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1714 (info->flags & MNT_WAIT)) && 1715 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1716 /* ignore error */ 1717 } 1718 vsp->f_flag = 0; 1719 if (mp->mnt_flag & MNT_RDONLY) 1720 vsp->f_flag |= ST_RDONLY; 1721 if (mp->mnt_flag & MNT_NOSUID) 1722 vsp->f_flag |= ST_NOSUID; 1723 1724 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1725 if (error) { 1726 info->error = error; 1727 return(-1); 1728 } 1729 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1730 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1731 kfree(freepath, M_TEMP); 1732 1733 error = copyout(sp, info->sfsp, sizeof(*sp)); 1734 if (error == 0) 1735 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1736 if (error) { 1737 info->error = error; 1738 return (-1); 1739 } 1740 ++info->sfsp; 1741 ++info->vsfsp; 1742 } 1743 info->count++; 1744 return(0); 1745 } 1746 1747 1748 /* 1749 * fchdir_args(int fd) 1750 * 1751 * Change current working directory to a given file descriptor. 1752 */ 1753 int 1754 sys_fchdir(struct fchdir_args *uap) 1755 { 1756 struct thread *td = curthread; 1757 struct proc *p = td->td_proc; 1758 struct filedesc *fdp = p->p_fd; 1759 struct vnode *vp, *ovp; 1760 struct mount *mp; 1761 struct file *fp; 1762 struct nchandle nch, onch, tnch; 1763 int error; 1764 1765 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1766 return (error); 1767 lwkt_gettoken(&p->p_token); 1768 vp = (struct vnode *)fp->f_data; 1769 vref(vp); 1770 vn_lock(vp, LK_SHARED | LK_RETRY); 1771 if (fp->f_nchandle.ncp == NULL) 1772 error = ENOTDIR; 1773 else 1774 error = checkvp_chdir(vp, td); 1775 if (error) { 1776 vput(vp); 1777 goto done; 1778 } 1779 cache_copy(&fp->f_nchandle, &nch); 1780 1781 /* 1782 * If the ncp has become a mount point, traverse through 1783 * the mount point. 1784 */ 1785 1786 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1787 (mp = cache_findmount(&nch)) != NULL 1788 ) { 1789 error = nlookup_mp(mp, &tnch); 1790 if (error == 0) { 1791 cache_unlock(&tnch); /* leave ref intact */ 1792 vput(vp); 1793 vp = tnch.ncp->nc_vp; 1794 error = vget(vp, LK_SHARED); 1795 KKASSERT(error == 0); 1796 cache_drop(&nch); 1797 nch = tnch; 1798 } 1799 cache_dropmount(mp); 1800 } 1801 if (error == 0) { 1802 spin_lock(&fdp->fd_spin); 1803 ovp = fdp->fd_cdir; 1804 onch = fdp->fd_ncdir; 1805 fdp->fd_cdir = vp; 1806 fdp->fd_ncdir = nch; 1807 spin_unlock(&fdp->fd_spin); 1808 vn_unlock(vp); /* leave ref intact */ 1809 cache_drop(&onch); 1810 vrele(ovp); 1811 } else { 1812 cache_drop(&nch); 1813 vput(vp); 1814 } 1815 fdrop(fp); 1816 done: 1817 lwkt_reltoken(&p->p_token); 1818 return (error); 1819 } 1820 1821 int 1822 kern_chdir(struct nlookupdata *nd) 1823 { 1824 struct thread *td = curthread; 1825 struct proc *p = td->td_proc; 1826 struct filedesc *fdp = p->p_fd; 1827 struct vnode *vp, *ovp; 1828 struct nchandle onch; 1829 int error; 1830 1831 nd->nl_flags |= NLC_SHAREDLOCK; 1832 if ((error = nlookup(nd)) != 0) 1833 return (error); 1834 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1835 return (ENOENT); 1836 if ((error = vget(vp, LK_SHARED)) != 0) 1837 return (error); 1838 1839 lwkt_gettoken(&p->p_token); 1840 error = checkvp_chdir(vp, td); 1841 vn_unlock(vp); 1842 if (error == 0) { 1843 spin_lock(&fdp->fd_spin); 1844 ovp = fdp->fd_cdir; 1845 onch = fdp->fd_ncdir; 1846 fdp->fd_ncdir = nd->nl_nch; 1847 fdp->fd_cdir = vp; 1848 spin_unlock(&fdp->fd_spin); 1849 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1850 cache_drop(&onch); 1851 vrele(ovp); 1852 cache_zero(&nd->nl_nch); 1853 } else { 1854 vrele(vp); 1855 } 1856 lwkt_reltoken(&p->p_token); 1857 return (error); 1858 } 1859 1860 /* 1861 * chdir_args(char *path) 1862 * 1863 * Change current working directory (``.''). 1864 */ 1865 int 1866 sys_chdir(struct chdir_args *uap) 1867 { 1868 struct nlookupdata nd; 1869 int error; 1870 1871 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1872 if (error == 0) 1873 error = kern_chdir(&nd); 1874 nlookup_done(&nd); 1875 return (error); 1876 } 1877 1878 /* 1879 * Helper function for raised chroot(2) security function: Refuse if 1880 * any filedescriptors are open directories. 1881 */ 1882 static int 1883 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1884 { 1885 struct vnode *vp; 1886 struct file *fp; 1887 int error; 1888 int fd; 1889 1890 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1891 if ((error = holdvnode(td, fd, &fp)) != 0) 1892 continue; 1893 vp = (struct vnode *)fp->f_data; 1894 if (vp->v_type != VDIR) { 1895 fdrop(fp); 1896 continue; 1897 } 1898 fdrop(fp); 1899 return(EPERM); 1900 } 1901 return (0); 1902 } 1903 1904 /* 1905 * This sysctl determines if we will allow a process to chroot(2) if it 1906 * has a directory open: 1907 * 0: disallowed for all processes. 1908 * 1: allowed for processes that were not already chroot(2)'ed. 1909 * 2: allowed for all processes. 1910 */ 1911 1912 static int chroot_allow_open_directories = 1; 1913 1914 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1915 &chroot_allow_open_directories, 0, ""); 1916 1917 /* 1918 * chroot to the specified namecache entry. We obtain the vp from the 1919 * namecache data. The passed ncp must be locked and referenced and will 1920 * remain locked and referenced on return. 1921 */ 1922 int 1923 kern_chroot(struct nchandle *nch) 1924 { 1925 struct thread *td = curthread; 1926 struct proc *p = td->td_proc; 1927 struct filedesc *fdp = p->p_fd; 1928 struct vnode *vp; 1929 int error; 1930 1931 /* 1932 * Only privileged user can chroot 1933 */ 1934 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1935 if (error) 1936 return (error); 1937 1938 /* 1939 * Disallow open directory descriptors (fchdir() breakouts). 1940 */ 1941 if (chroot_allow_open_directories == 0 || 1942 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1943 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 1944 return (error); 1945 } 1946 if ((vp = nch->ncp->nc_vp) == NULL) 1947 return (ENOENT); 1948 1949 if ((error = vget(vp, LK_SHARED)) != 0) 1950 return (error); 1951 1952 /* 1953 * Check the validity of vp as a directory to change to and 1954 * associate it with rdir/jdir. 1955 */ 1956 error = checkvp_chdir(vp, td); 1957 vn_unlock(vp); /* leave reference intact */ 1958 if (error == 0) { 1959 lwkt_gettoken(&p->p_token); 1960 vrele(fdp->fd_rdir); 1961 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1962 cache_drop(&fdp->fd_nrdir); 1963 cache_copy(nch, &fdp->fd_nrdir); 1964 if (fdp->fd_jdir == NULL) { 1965 fdp->fd_jdir = vp; 1966 vref(fdp->fd_jdir); 1967 cache_copy(nch, &fdp->fd_njdir); 1968 } 1969 if ((p->p_flags & P_DIDCHROOT) == 0) { 1970 p->p_flags |= P_DIDCHROOT; 1971 if (p->p_depth <= 65535 - 32) 1972 p->p_depth += 32; 1973 } 1974 lwkt_reltoken(&p->p_token); 1975 } else { 1976 vrele(vp); 1977 } 1978 return (error); 1979 } 1980 1981 /* 1982 * chroot_args(char *path) 1983 * 1984 * Change notion of root (``/'') directory. 1985 */ 1986 int 1987 sys_chroot(struct chroot_args *uap) 1988 { 1989 struct thread *td __debugvar = curthread; 1990 struct nlookupdata nd; 1991 int error; 1992 1993 KKASSERT(td->td_proc); 1994 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1995 if (error == 0) { 1996 nd.nl_flags |= NLC_EXEC; 1997 error = nlookup(&nd); 1998 if (error == 0) 1999 error = kern_chroot(&nd.nl_nch); 2000 } 2001 nlookup_done(&nd); 2002 return(error); 2003 } 2004 2005 int 2006 sys_chroot_kernel(struct chroot_kernel_args *uap) 2007 { 2008 struct thread *td = curthread; 2009 struct nlookupdata nd; 2010 struct nchandle *nch; 2011 struct vnode *vp; 2012 int error; 2013 2014 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2015 if (error) 2016 goto error_nond; 2017 2018 error = nlookup(&nd); 2019 if (error) 2020 goto error_out; 2021 2022 nch = &nd.nl_nch; 2023 2024 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 2025 if (error) 2026 goto error_out; 2027 2028 if ((vp = nch->ncp->nc_vp) == NULL) { 2029 error = ENOENT; 2030 goto error_out; 2031 } 2032 2033 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 2034 goto error_out; 2035 2036 vfs_cache_setroot(vp, cache_hold(nch)); 2037 2038 error_out: 2039 nlookup_done(&nd); 2040 error_nond: 2041 return(error); 2042 } 2043 2044 /* 2045 * Common routine for chroot and chdir. Given a locked, referenced vnode, 2046 * determine whether it is legal to chdir to the vnode. The vnode's state 2047 * is not changed by this call. 2048 */ 2049 static int 2050 checkvp_chdir(struct vnode *vp, struct thread *td) 2051 { 2052 int error; 2053 2054 if (vp->v_type != VDIR) 2055 error = ENOTDIR; 2056 else 2057 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 2058 return (error); 2059 } 2060 2061 int 2062 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 2063 { 2064 struct thread *td = curthread; 2065 struct proc *p = td->td_proc; 2066 struct lwp *lp = td->td_lwp; 2067 struct filedesc *fdp = p->p_fd; 2068 int cmode, flags; 2069 struct file *nfp; 2070 struct file *fp; 2071 struct vnode *vp; 2072 int type, indx, error = 0; 2073 struct flock lf; 2074 2075 if ((oflags & O_ACCMODE) == O_ACCMODE) 2076 return (EINVAL); 2077 flags = FFLAGS(oflags); 2078 error = falloc(lp, &nfp, NULL); 2079 if (error) 2080 return (error); 2081 fp = nfp; 2082 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2083 2084 /* 2085 * XXX p_dupfd is a real mess. It allows a device to return a 2086 * file descriptor to be duplicated rather then doing the open 2087 * itself. 2088 */ 2089 lp->lwp_dupfd = -1; 2090 2091 /* 2092 * Call vn_open() to do the lookup and assign the vnode to the 2093 * file pointer. vn_open() does not change the ref count on fp 2094 * and the vnode, on success, will be inherited by the file pointer 2095 * and unlocked. 2096 * 2097 * Request a shared lock on the vnode if possible. 2098 * 2099 * When NLC_SHAREDLOCK is set we may still need an exclusive vnode 2100 * lock for O_RDWR opens on executables in order to avoid a VTEXT 2101 * detection race. The NLC_EXCLLOCK_IFEXEC handles this case. 2102 * 2103 * NOTE: We need a flag to separate terminal vnode locking from 2104 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2105 * and O_RDWR only need to lock the terminal vnode exclusively. 2106 */ 2107 nd->nl_flags |= NLC_LOCKVP; 2108 if ((flags & (O_CREAT|O_TRUNC)) == 0) { 2109 nd->nl_flags |= NLC_SHAREDLOCK; 2110 if (flags & O_RDWR) 2111 nd->nl_flags |= NLC_EXCLLOCK_IFEXEC; 2112 } 2113 2114 error = vn_open(nd, fp, flags, cmode); 2115 nlookup_done(nd); 2116 2117 if (error) { 2118 /* 2119 * handle special fdopen() case. bleh. dupfdopen() is 2120 * responsible for dropping the old contents of ofiles[indx] 2121 * if it succeeds. 2122 * 2123 * Note that fsetfd() will add a ref to fp which represents 2124 * the fd_files[] assignment. We must still drop our 2125 * reference. 2126 */ 2127 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 2128 if (fdalloc(p, 0, &indx) == 0) { 2129 error = dupfdopen(td, indx, lp->lwp_dupfd, flags, error); 2130 if (error == 0) { 2131 *res = indx; 2132 fdrop(fp); /* our ref */ 2133 return (0); 2134 } 2135 fsetfd(fdp, NULL, indx); 2136 } 2137 } 2138 fdrop(fp); /* our ref */ 2139 if (error == ERESTART) 2140 error = EINTR; 2141 return (error); 2142 } 2143 2144 /* 2145 * ref the vnode for ourselves so it can't be ripped out from under 2146 * is. XXX need an ND flag to request that the vnode be returned 2147 * anyway. 2148 * 2149 * Reserve a file descriptor but do not assign it until the open 2150 * succeeds. 2151 */ 2152 vp = (struct vnode *)fp->f_data; 2153 vref(vp); 2154 if ((error = fdalloc(p, 0, &indx)) != 0) { 2155 fdrop(fp); 2156 vrele(vp); 2157 return (error); 2158 } 2159 2160 /* 2161 * If no error occurs the vp will have been assigned to the file 2162 * pointer. 2163 */ 2164 lp->lwp_dupfd = 0; 2165 2166 if (flags & (O_EXLOCK | O_SHLOCK)) { 2167 lf.l_whence = SEEK_SET; 2168 lf.l_start = 0; 2169 lf.l_len = 0; 2170 if (flags & O_EXLOCK) 2171 lf.l_type = F_WRLCK; 2172 else 2173 lf.l_type = F_RDLCK; 2174 if (flags & FNONBLOCK) 2175 type = 0; 2176 else 2177 type = F_WAIT; 2178 2179 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2180 /* 2181 * lock request failed. Clean up the reserved 2182 * descriptor. 2183 */ 2184 vrele(vp); 2185 fsetfd(fdp, NULL, indx); 2186 fdrop(fp); 2187 return (error); 2188 } 2189 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2190 } 2191 #if 0 2192 /* 2193 * Assert that all regular file vnodes were created with a object. 2194 */ 2195 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2196 ("open: regular file has no backing object after vn_open")); 2197 #endif 2198 2199 vrele(vp); 2200 2201 /* 2202 * release our private reference, leaving the one associated with the 2203 * descriptor table intact. 2204 */ 2205 if (oflags & O_CLOEXEC) 2206 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2207 fsetfd(fdp, fp, indx); 2208 fdrop(fp); 2209 *res = indx; 2210 2211 return (error); 2212 } 2213 2214 /* 2215 * open_args(char *path, int flags, int mode) 2216 * 2217 * Check permissions, allocate an open file structure, 2218 * and call the device open routine if any. 2219 */ 2220 int 2221 sys_open(struct open_args *uap) 2222 { 2223 struct nlookupdata nd; 2224 int error; 2225 2226 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2227 if (error == 0) { 2228 error = kern_open(&nd, uap->flags, 2229 uap->mode, &uap->sysmsg_result); 2230 } 2231 nlookup_done(&nd); 2232 return (error); 2233 } 2234 2235 /* 2236 * openat_args(int fd, char *path, int flags, int mode) 2237 */ 2238 int 2239 sys_openat(struct openat_args *uap) 2240 { 2241 struct nlookupdata nd; 2242 int error; 2243 struct file *fp; 2244 2245 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2246 if (error == 0) { 2247 error = kern_open(&nd, uap->flags, uap->mode, 2248 &uap->sysmsg_result); 2249 } 2250 nlookup_done_at(&nd, fp); 2251 return (error); 2252 } 2253 2254 int 2255 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2256 { 2257 struct thread *td = curthread; 2258 struct proc *p = td->td_proc; 2259 struct vnode *vp; 2260 struct vattr vattr; 2261 int error; 2262 int whiteout = 0; 2263 2264 KKASSERT(p); 2265 2266 VATTR_NULL(&vattr); 2267 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2268 vattr.va_rmajor = rmajor; 2269 vattr.va_rminor = rminor; 2270 2271 switch (mode & S_IFMT) { 2272 case S_IFMT: /* used by badsect to flag bad sectors */ 2273 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2274 vattr.va_type = VBAD; 2275 break; 2276 case S_IFCHR: 2277 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2278 vattr.va_type = VCHR; 2279 break; 2280 case S_IFBLK: 2281 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2282 vattr.va_type = VBLK; 2283 break; 2284 case S_IFWHT: 2285 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2286 whiteout = 1; 2287 break; 2288 case S_IFDIR: /* special directories support for HAMMER */ 2289 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2290 vattr.va_type = VDIR; 2291 break; 2292 default: 2293 error = EINVAL; 2294 break; 2295 } 2296 2297 if (error) 2298 return (error); 2299 2300 bwillinode(1); 2301 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2302 if ((error = nlookup(nd)) != 0) 2303 return (error); 2304 if (nd->nl_nch.ncp->nc_vp) 2305 return (EEXIST); 2306 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2307 return (error); 2308 2309 if (whiteout) { 2310 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2311 nd->nl_cred, NAMEI_CREATE); 2312 } else { 2313 vp = NULL; 2314 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2315 &vp, nd->nl_cred, &vattr); 2316 if (error == 0) 2317 vput(vp); 2318 } 2319 return (error); 2320 } 2321 2322 /* 2323 * mknod_args(char *path, int mode, int dev) 2324 * 2325 * Create a special file. 2326 */ 2327 int 2328 sys_mknod(struct mknod_args *uap) 2329 { 2330 struct nlookupdata nd; 2331 int error; 2332 2333 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2334 if (error == 0) { 2335 error = kern_mknod(&nd, uap->mode, 2336 umajor(uap->dev), uminor(uap->dev)); 2337 } 2338 nlookup_done(&nd); 2339 return (error); 2340 } 2341 2342 /* 2343 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2344 * 2345 * Create a special file. The path is relative to the directory associated 2346 * with fd. 2347 */ 2348 int 2349 sys_mknodat(struct mknodat_args *uap) 2350 { 2351 struct nlookupdata nd; 2352 struct file *fp; 2353 int error; 2354 2355 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2356 if (error == 0) { 2357 error = kern_mknod(&nd, uap->mode, 2358 umajor(uap->dev), uminor(uap->dev)); 2359 } 2360 nlookup_done_at(&nd, fp); 2361 return (error); 2362 } 2363 2364 int 2365 kern_mkfifo(struct nlookupdata *nd, int mode) 2366 { 2367 struct thread *td = curthread; 2368 struct proc *p = td->td_proc; 2369 struct vattr vattr; 2370 struct vnode *vp; 2371 int error; 2372 2373 bwillinode(1); 2374 2375 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2376 if ((error = nlookup(nd)) != 0) 2377 return (error); 2378 if (nd->nl_nch.ncp->nc_vp) 2379 return (EEXIST); 2380 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2381 return (error); 2382 2383 VATTR_NULL(&vattr); 2384 vattr.va_type = VFIFO; 2385 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2386 vp = NULL; 2387 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2388 if (error == 0) 2389 vput(vp); 2390 return (error); 2391 } 2392 2393 /* 2394 * mkfifo_args(char *path, int mode) 2395 * 2396 * Create a named pipe. 2397 */ 2398 int 2399 sys_mkfifo(struct mkfifo_args *uap) 2400 { 2401 struct nlookupdata nd; 2402 int error; 2403 2404 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2405 if (error == 0) 2406 error = kern_mkfifo(&nd, uap->mode); 2407 nlookup_done(&nd); 2408 return (error); 2409 } 2410 2411 /* 2412 * mkfifoat_args(int fd, char *path, mode_t mode) 2413 * 2414 * Create a named pipe. The path is relative to the directory associated 2415 * with fd. 2416 */ 2417 int 2418 sys_mkfifoat(struct mkfifoat_args *uap) 2419 { 2420 struct nlookupdata nd; 2421 struct file *fp; 2422 int error; 2423 2424 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2425 if (error == 0) 2426 error = kern_mkfifo(&nd, uap->mode); 2427 nlookup_done_at(&nd, fp); 2428 return (error); 2429 } 2430 2431 static int hardlink_check_uid = 0; 2432 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2433 &hardlink_check_uid, 0, 2434 "Unprivileged processes cannot create hard links to files owned by other " 2435 "users"); 2436 static int hardlink_check_gid = 0; 2437 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2438 &hardlink_check_gid, 0, 2439 "Unprivileged processes cannot create hard links to files owned by other " 2440 "groups"); 2441 2442 static int 2443 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2444 { 2445 struct vattr va; 2446 int error; 2447 2448 /* 2449 * Shortcut if disabled 2450 */ 2451 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2452 return (0); 2453 2454 /* 2455 * Privileged user can always hardlink 2456 */ 2457 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2458 return (0); 2459 2460 /* 2461 * Otherwise only if the originating file is owned by the 2462 * same user or group. Note that any group is allowed if 2463 * the file is owned by the caller. 2464 */ 2465 error = VOP_GETATTR(vp, &va); 2466 if (error != 0) 2467 return (error); 2468 2469 if (hardlink_check_uid) { 2470 if (cred->cr_uid != va.va_uid) 2471 return (EPERM); 2472 } 2473 2474 if (hardlink_check_gid) { 2475 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2476 return (EPERM); 2477 } 2478 2479 return (0); 2480 } 2481 2482 int 2483 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2484 { 2485 struct thread *td = curthread; 2486 struct vnode *vp; 2487 int error; 2488 2489 /* 2490 * Lookup the source and obtained a locked vnode. 2491 * 2492 * You may only hardlink a file which you have write permission 2493 * on or which you own. 2494 * 2495 * XXX relookup on vget failure / race ? 2496 */ 2497 bwillinode(1); 2498 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2499 if ((error = nlookup(nd)) != 0) 2500 return (error); 2501 vp = nd->nl_nch.ncp->nc_vp; 2502 KKASSERT(vp != NULL); 2503 if (vp->v_type == VDIR) 2504 return (EPERM); /* POSIX */ 2505 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2506 return (error); 2507 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2508 return (error); 2509 2510 /* 2511 * Unlock the source so we can lookup the target without deadlocking 2512 * (XXX vp is locked already, possible other deadlock?). The target 2513 * must not exist. 2514 */ 2515 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2516 nd->nl_flags &= ~NLC_NCPISLOCKED; 2517 cache_unlock(&nd->nl_nch); 2518 vn_unlock(vp); 2519 2520 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2521 if ((error = nlookup(linknd)) != 0) { 2522 vrele(vp); 2523 return (error); 2524 } 2525 if (linknd->nl_nch.ncp->nc_vp) { 2526 vrele(vp); 2527 return (EEXIST); 2528 } 2529 VFS_MODIFYING(vp->v_mount); 2530 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2531 if (error) { 2532 vrele(vp); 2533 return (error); 2534 } 2535 2536 /* 2537 * Finally run the new API VOP. 2538 */ 2539 error = can_hardlink(vp, td, td->td_ucred); 2540 if (error == 0) { 2541 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2542 vp, linknd->nl_cred); 2543 } 2544 vput(vp); 2545 return (error); 2546 } 2547 2548 /* 2549 * link_args(char *path, char *link) 2550 * 2551 * Make a hard file link. 2552 */ 2553 int 2554 sys_link(struct link_args *uap) 2555 { 2556 struct nlookupdata nd, linknd; 2557 int error; 2558 2559 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2560 if (error == 0) { 2561 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2562 if (error == 0) 2563 error = kern_link(&nd, &linknd); 2564 nlookup_done(&linknd); 2565 } 2566 nlookup_done(&nd); 2567 return (error); 2568 } 2569 2570 /* 2571 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2572 * 2573 * Make a hard file link. The path1 argument is relative to the directory 2574 * associated with fd1, and similarly the path2 argument is relative to 2575 * the directory associated with fd2. 2576 */ 2577 int 2578 sys_linkat(struct linkat_args *uap) 2579 { 2580 struct nlookupdata nd, linknd; 2581 struct file *fp1, *fp2; 2582 int error; 2583 2584 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2585 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2586 if (error == 0) { 2587 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2588 uap->path2, UIO_USERSPACE, 0); 2589 if (error == 0) 2590 error = kern_link(&nd, &linknd); 2591 nlookup_done_at(&linknd, fp2); 2592 } 2593 nlookup_done_at(&nd, fp1); 2594 return (error); 2595 } 2596 2597 int 2598 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2599 { 2600 struct vattr vattr; 2601 struct vnode *vp; 2602 struct vnode *dvp; 2603 int error; 2604 2605 bwillinode(1); 2606 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2607 if ((error = nlookup(nd)) != 0) 2608 return (error); 2609 if (nd->nl_nch.ncp->nc_vp) 2610 return (EEXIST); 2611 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2612 return (error); 2613 dvp = nd->nl_dvp; 2614 VATTR_NULL(&vattr); 2615 vattr.va_mode = mode; 2616 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2617 if (error == 0) 2618 vput(vp); 2619 return (error); 2620 } 2621 2622 /* 2623 * symlink(char *path, char *link) 2624 * 2625 * Make a symbolic link. 2626 */ 2627 int 2628 sys_symlink(struct symlink_args *uap) 2629 { 2630 struct thread *td = curthread; 2631 struct nlookupdata nd; 2632 char *path; 2633 int error; 2634 int mode; 2635 2636 path = objcache_get(namei_oc, M_WAITOK); 2637 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2638 if (error == 0) { 2639 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2640 if (error == 0) { 2641 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2642 error = kern_symlink(&nd, path, mode); 2643 } 2644 nlookup_done(&nd); 2645 } 2646 objcache_put(namei_oc, path); 2647 return (error); 2648 } 2649 2650 /* 2651 * symlinkat_args(char *path1, int fd, char *path2) 2652 * 2653 * Make a symbolic link. The path2 argument is relative to the directory 2654 * associated with fd. 2655 */ 2656 int 2657 sys_symlinkat(struct symlinkat_args *uap) 2658 { 2659 struct thread *td = curthread; 2660 struct nlookupdata nd; 2661 struct file *fp; 2662 char *path1; 2663 int error; 2664 int mode; 2665 2666 path1 = objcache_get(namei_oc, M_WAITOK); 2667 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2668 if (error == 0) { 2669 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2670 UIO_USERSPACE, 0); 2671 if (error == 0) { 2672 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2673 error = kern_symlink(&nd, path1, mode); 2674 } 2675 nlookup_done_at(&nd, fp); 2676 } 2677 objcache_put(namei_oc, path1); 2678 return (error); 2679 } 2680 2681 /* 2682 * undelete_args(char *path) 2683 * 2684 * Delete a whiteout from the filesystem. 2685 */ 2686 int 2687 sys_undelete(struct undelete_args *uap) 2688 { 2689 struct nlookupdata nd; 2690 int error; 2691 2692 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2693 bwillinode(1); 2694 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2695 if (error == 0) 2696 error = nlookup(&nd); 2697 if (error == 0) 2698 error = ncp_writechk(&nd.nl_nch); 2699 if (error == 0) { 2700 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2701 NAMEI_DELETE); 2702 } 2703 nlookup_done(&nd); 2704 return (error); 2705 } 2706 2707 int 2708 kern_unlink(struct nlookupdata *nd) 2709 { 2710 int error; 2711 2712 bwillinode(1); 2713 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2714 if ((error = nlookup(nd)) != 0) 2715 return (error); 2716 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2717 return (error); 2718 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2719 return (error); 2720 } 2721 2722 /* 2723 * unlink_args(char *path) 2724 * 2725 * Delete a name from the filesystem. 2726 */ 2727 int 2728 sys_unlink(struct unlink_args *uap) 2729 { 2730 struct nlookupdata nd; 2731 int error; 2732 2733 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2734 if (error == 0) 2735 error = kern_unlink(&nd); 2736 nlookup_done(&nd); 2737 return (error); 2738 } 2739 2740 2741 /* 2742 * unlinkat_args(int fd, char *path, int flags) 2743 * 2744 * Delete the file or directory entry pointed to by fd/path. 2745 */ 2746 int 2747 sys_unlinkat(struct unlinkat_args *uap) 2748 { 2749 struct nlookupdata nd; 2750 struct file *fp; 2751 int error; 2752 2753 if (uap->flags & ~AT_REMOVEDIR) 2754 return (EINVAL); 2755 2756 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2757 if (error == 0) { 2758 if (uap->flags & AT_REMOVEDIR) 2759 error = kern_rmdir(&nd); 2760 else 2761 error = kern_unlink(&nd); 2762 } 2763 nlookup_done_at(&nd, fp); 2764 return (error); 2765 } 2766 2767 int 2768 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2769 { 2770 struct thread *td = curthread; 2771 struct file *fp; 2772 struct vnode *vp; 2773 struct vattr vattr; 2774 off_t new_offset; 2775 int error; 2776 2777 fp = holdfp(td, fd, -1); 2778 if (fp == NULL) 2779 return (EBADF); 2780 if (fp->f_type != DTYPE_VNODE) { 2781 error = ESPIPE; 2782 goto done; 2783 } 2784 vp = (struct vnode *)fp->f_data; 2785 2786 switch (whence) { 2787 case L_INCR: 2788 spin_lock(&fp->f_spin); 2789 new_offset = fp->f_offset + offset; 2790 error = 0; 2791 break; 2792 case L_XTND: 2793 error = VOP_GETATTR_FP(vp, &vattr, fp); 2794 spin_lock(&fp->f_spin); 2795 new_offset = offset + vattr.va_size; 2796 break; 2797 case L_SET: 2798 new_offset = offset; 2799 error = 0; 2800 spin_lock(&fp->f_spin); 2801 break; 2802 default: 2803 new_offset = 0; 2804 error = EINVAL; 2805 spin_lock(&fp->f_spin); 2806 break; 2807 } 2808 2809 /* 2810 * Validate the seek position. Negative offsets are not allowed 2811 * for regular files or directories. 2812 * 2813 * Normally we would also not want to allow negative offsets for 2814 * character and block-special devices. However kvm addresses 2815 * on 64 bit architectures might appear to be negative and must 2816 * be allowed. 2817 */ 2818 if (error == 0) { 2819 if (new_offset < 0 && 2820 (vp->v_type == VREG || vp->v_type == VDIR)) { 2821 error = EINVAL; 2822 } else { 2823 fp->f_offset = new_offset; 2824 } 2825 } 2826 *res = fp->f_offset; 2827 spin_unlock(&fp->f_spin); 2828 done: 2829 dropfp(td, fd, fp); 2830 2831 return (error); 2832 } 2833 2834 /* 2835 * lseek_args(int fd, int pad, off_t offset, int whence) 2836 * 2837 * Reposition read/write file offset. 2838 */ 2839 int 2840 sys_lseek(struct lseek_args *uap) 2841 { 2842 int error; 2843 2844 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2845 &uap->sysmsg_offset); 2846 2847 return (error); 2848 } 2849 2850 /* 2851 * Check if current process can access given file. amode is a bitmask of *_OK 2852 * access bits. flags is a bitmask of AT_* flags. 2853 */ 2854 int 2855 kern_access(struct nlookupdata *nd, int amode, int flags) 2856 { 2857 struct vnode *vp; 2858 int error, mode; 2859 2860 if (flags & ~AT_EACCESS) 2861 return (EINVAL); 2862 nd->nl_flags |= NLC_SHAREDLOCK; 2863 if ((error = nlookup(nd)) != 0) 2864 return (error); 2865 if ((amode & W_OK) && (error = ncp_writechk(&nd->nl_nch)) != 0) 2866 return (error); 2867 retry: 2868 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2869 if (error) 2870 return (error); 2871 2872 /* Flags == 0 means only check for existence. */ 2873 if (amode) { 2874 mode = 0; 2875 if (amode & R_OK) 2876 mode |= VREAD; 2877 if (amode & W_OK) 2878 mode |= VWRITE; 2879 if (amode & X_OK) 2880 mode |= VEXEC; 2881 if ((mode & VWRITE) == 0 || 2882 (error = vn_writechk(vp)) == 0) { 2883 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2884 } 2885 2886 /* 2887 * If the file handle is stale we have to re-resolve the 2888 * entry with the ncp held exclusively. This is a hack 2889 * at the moment. 2890 */ 2891 if (error == ESTALE) { 2892 vput(vp); 2893 cache_unlock(&nd->nl_nch); 2894 cache_lock(&nd->nl_nch); 2895 cache_setunresolved(&nd->nl_nch); 2896 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2897 if (error == 0) { 2898 vp = NULL; 2899 goto retry; 2900 } 2901 return(error); 2902 } 2903 } 2904 vput(vp); 2905 return (error); 2906 } 2907 2908 /* 2909 * access_args(char *path, int flags) 2910 * 2911 * Check access permissions. 2912 */ 2913 int 2914 sys_access(struct access_args *uap) 2915 { 2916 struct nlookupdata nd; 2917 int error; 2918 2919 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2920 if (error == 0) 2921 error = kern_access(&nd, uap->flags, 0); 2922 nlookup_done(&nd); 2923 return (error); 2924 } 2925 2926 2927 /* 2928 * eaccess_args(char *path, int flags) 2929 * 2930 * Check access permissions. 2931 */ 2932 int 2933 sys_eaccess(struct eaccess_args *uap) 2934 { 2935 struct nlookupdata nd; 2936 int error; 2937 2938 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2939 if (error == 0) 2940 error = kern_access(&nd, uap->flags, AT_EACCESS); 2941 nlookup_done(&nd); 2942 return (error); 2943 } 2944 2945 2946 /* 2947 * faccessat_args(int fd, char *path, int amode, int flags) 2948 * 2949 * Check access permissions. 2950 */ 2951 int 2952 sys_faccessat(struct faccessat_args *uap) 2953 { 2954 struct nlookupdata nd; 2955 struct file *fp; 2956 int error; 2957 2958 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2959 NLC_FOLLOW); 2960 if (error == 0) 2961 error = kern_access(&nd, uap->amode, uap->flags); 2962 nlookup_done_at(&nd, fp); 2963 return (error); 2964 } 2965 2966 int 2967 kern_stat(struct nlookupdata *nd, struct stat *st) 2968 { 2969 int error; 2970 struct vnode *vp; 2971 2972 nd->nl_flags |= NLC_SHAREDLOCK; 2973 if ((error = nlookup(nd)) != 0) 2974 return (error); 2975 again: 2976 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2977 return (ENOENT); 2978 2979 if ((error = vget(vp, LK_SHARED)) != 0) 2980 return (error); 2981 error = vn_stat(vp, st, nd->nl_cred); 2982 2983 /* 2984 * If the file handle is stale we have to re-resolve the 2985 * entry with the ncp held exclusively. This is a hack 2986 * at the moment. 2987 */ 2988 if (error == ESTALE) { 2989 vput(vp); 2990 cache_unlock(&nd->nl_nch); 2991 cache_lock(&nd->nl_nch); 2992 cache_setunresolved(&nd->nl_nch); 2993 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2994 if (error == 0) 2995 goto again; 2996 } else { 2997 vput(vp); 2998 } 2999 return (error); 3000 } 3001 3002 /* 3003 * stat_args(char *path, struct stat *ub) 3004 * 3005 * Get file status; this version follows links. 3006 */ 3007 int 3008 sys_stat(struct stat_args *uap) 3009 { 3010 struct nlookupdata nd; 3011 struct stat st; 3012 int error; 3013 3014 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3015 if (error == 0) { 3016 error = kern_stat(&nd, &st); 3017 if (error == 0) 3018 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3019 } 3020 nlookup_done(&nd); 3021 return (error); 3022 } 3023 3024 /* 3025 * lstat_args(char *path, struct stat *ub) 3026 * 3027 * Get file status; this version does not follow links. 3028 */ 3029 int 3030 sys_lstat(struct lstat_args *uap) 3031 { 3032 struct nlookupdata nd; 3033 struct stat st; 3034 int error; 3035 3036 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3037 if (error == 0) { 3038 error = kern_stat(&nd, &st); 3039 if (error == 0) 3040 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3041 } 3042 nlookup_done(&nd); 3043 return (error); 3044 } 3045 3046 /* 3047 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 3048 * 3049 * Get status of file pointed to by fd/path. 3050 */ 3051 int 3052 sys_fstatat(struct fstatat_args *uap) 3053 { 3054 struct nlookupdata nd; 3055 struct stat st; 3056 int error; 3057 int flags; 3058 struct file *fp; 3059 3060 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3061 return (EINVAL); 3062 3063 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3064 3065 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3066 UIO_USERSPACE, flags); 3067 if (error == 0) { 3068 error = kern_stat(&nd, &st); 3069 if (error == 0) 3070 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 3071 } 3072 nlookup_done_at(&nd, fp); 3073 return (error); 3074 } 3075 3076 static int 3077 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 3078 { 3079 struct nlookupdata nd; 3080 struct vnode *vp; 3081 int error; 3082 3083 vp = NULL; 3084 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 3085 if (error == 0) 3086 error = nlookup(&nd); 3087 if (error == 0) 3088 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3089 nlookup_done(&nd); 3090 if (error == 0) { 3091 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3092 vput(vp); 3093 } 3094 return (error); 3095 } 3096 3097 /* 3098 * pathconf_Args(char *path, int name) 3099 * 3100 * Get configurable pathname variables. 3101 */ 3102 int 3103 sys_pathconf(struct pathconf_args *uap) 3104 { 3105 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3106 &uap->sysmsg_reg)); 3107 } 3108 3109 /* 3110 * lpathconf_Args(char *path, int name) 3111 * 3112 * Get configurable pathname variables, but don't follow symlinks. 3113 */ 3114 int 3115 sys_lpathconf(struct lpathconf_args *uap) 3116 { 3117 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 3118 } 3119 3120 /* 3121 * XXX: daver 3122 * kern_readlink isn't properly split yet. There is a copyin burried 3123 * in VOP_READLINK(). 3124 */ 3125 int 3126 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3127 { 3128 struct thread *td = curthread; 3129 struct vnode *vp; 3130 struct iovec aiov; 3131 struct uio auio; 3132 int error; 3133 3134 nd->nl_flags |= NLC_SHAREDLOCK; 3135 if ((error = nlookup(nd)) != 0) 3136 return (error); 3137 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3138 if (error) 3139 return (error); 3140 if (vp->v_type != VLNK) { 3141 error = EINVAL; 3142 } else { 3143 aiov.iov_base = buf; 3144 aiov.iov_len = count; 3145 auio.uio_iov = &aiov; 3146 auio.uio_iovcnt = 1; 3147 auio.uio_offset = 0; 3148 auio.uio_rw = UIO_READ; 3149 auio.uio_segflg = UIO_USERSPACE; 3150 auio.uio_td = td; 3151 auio.uio_resid = count; 3152 error = VOP_READLINK(vp, &auio, td->td_ucred); 3153 } 3154 vput(vp); 3155 *res = count - auio.uio_resid; 3156 return (error); 3157 } 3158 3159 /* 3160 * readlink_args(char *path, char *buf, int count) 3161 * 3162 * Return target name of a symbolic link. 3163 */ 3164 int 3165 sys_readlink(struct readlink_args *uap) 3166 { 3167 struct nlookupdata nd; 3168 int error; 3169 3170 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3171 if (error == 0) { 3172 error = kern_readlink(&nd, uap->buf, uap->count, 3173 &uap->sysmsg_result); 3174 } 3175 nlookup_done(&nd); 3176 return (error); 3177 } 3178 3179 /* 3180 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3181 * 3182 * Return target name of a symbolic link. The path is relative to the 3183 * directory associated with fd. 3184 */ 3185 int 3186 sys_readlinkat(struct readlinkat_args *uap) 3187 { 3188 struct nlookupdata nd; 3189 struct file *fp; 3190 int error; 3191 3192 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3193 if (error == 0) { 3194 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3195 &uap->sysmsg_result); 3196 } 3197 nlookup_done_at(&nd, fp); 3198 return (error); 3199 } 3200 3201 static int 3202 setfflags(struct vnode *vp, u_long flags) 3203 { 3204 struct thread *td = curthread; 3205 int error; 3206 struct vattr vattr; 3207 3208 /* 3209 * Prevent non-root users from setting flags on devices. When 3210 * a device is reused, users can retain ownership of the device 3211 * if they are allowed to set flags and programs assume that 3212 * chown can't fail when done as root. 3213 */ 3214 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3215 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3216 return (error); 3217 3218 /* 3219 * note: vget is required for any operation that might mod the vnode 3220 * so VINACTIVE is properly cleared. 3221 */ 3222 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3223 VATTR_NULL(&vattr); 3224 vattr.va_flags = flags; 3225 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3226 vput(vp); 3227 } 3228 return (error); 3229 } 3230 3231 /* 3232 * chflags(const char *path, u_long flags) 3233 * 3234 * Change flags of a file given a path name. 3235 */ 3236 int 3237 sys_chflags(struct chflags_args *uap) 3238 { 3239 struct nlookupdata nd; 3240 struct vnode *vp; 3241 int error; 3242 3243 vp = NULL; 3244 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3245 if (error == 0) 3246 error = nlookup(&nd); 3247 if (error == 0) 3248 error = ncp_writechk(&nd.nl_nch); 3249 if (error == 0) 3250 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3251 nlookup_done(&nd); 3252 if (error == 0) { 3253 error = setfflags(vp, uap->flags); 3254 vrele(vp); 3255 } 3256 return (error); 3257 } 3258 3259 /* 3260 * lchflags(const char *path, u_long flags) 3261 * 3262 * Change flags of a file given a path name, but don't follow symlinks. 3263 */ 3264 int 3265 sys_lchflags(struct lchflags_args *uap) 3266 { 3267 struct nlookupdata nd; 3268 struct vnode *vp; 3269 int error; 3270 3271 vp = NULL; 3272 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3273 if (error == 0) 3274 error = nlookup(&nd); 3275 if (error == 0) 3276 error = ncp_writechk(&nd.nl_nch); 3277 if (error == 0) 3278 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3279 nlookup_done(&nd); 3280 if (error == 0) { 3281 error = setfflags(vp, uap->flags); 3282 vrele(vp); 3283 } 3284 return (error); 3285 } 3286 3287 /* 3288 * fchflags_args(int fd, u_flags flags) 3289 * 3290 * Change flags of a file given a file descriptor. 3291 */ 3292 int 3293 sys_fchflags(struct fchflags_args *uap) 3294 { 3295 struct thread *td = curthread; 3296 struct file *fp; 3297 int error; 3298 3299 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3300 return (error); 3301 if (fp->f_nchandle.ncp) 3302 error = ncp_writechk(&fp->f_nchandle); 3303 if (error == 0) 3304 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3305 fdrop(fp); 3306 return (error); 3307 } 3308 3309 /* 3310 * chflagsat_args(int fd, const char *path, u_long flags, int atflags) 3311 * change flags given a pathname relative to a filedescriptor 3312 */ 3313 int sys_chflagsat(struct chflagsat_args *uap) 3314 { 3315 struct nlookupdata nd; 3316 struct vnode *vp; 3317 struct file *fp; 3318 int error; 3319 int lookupflags; 3320 3321 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3322 return (EINVAL); 3323 3324 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3325 3326 vp = NULL; 3327 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3328 if (error == 0) 3329 error = nlookup(&nd); 3330 if (error == 0) 3331 error = ncp_writechk(&nd.nl_nch); 3332 if (error == 0) 3333 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3334 nlookup_done_at(&nd, fp); 3335 if (error == 0) { 3336 error = setfflags(vp, uap->flags); 3337 vrele(vp); 3338 } 3339 return (error); 3340 } 3341 3342 3343 static int 3344 setfmode(struct vnode *vp, int mode) 3345 { 3346 struct thread *td = curthread; 3347 int error; 3348 struct vattr vattr; 3349 3350 /* 3351 * note: vget is required for any operation that might mod the vnode 3352 * so VINACTIVE is properly cleared. 3353 */ 3354 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3355 VATTR_NULL(&vattr); 3356 vattr.va_mode = mode & ALLPERMS; 3357 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3358 cache_inval_wxok(vp); 3359 vput(vp); 3360 } 3361 return error; 3362 } 3363 3364 int 3365 kern_chmod(struct nlookupdata *nd, int mode) 3366 { 3367 struct vnode *vp; 3368 int error; 3369 3370 if ((error = nlookup(nd)) != 0) 3371 return (error); 3372 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3373 return (error); 3374 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3375 error = setfmode(vp, mode); 3376 vrele(vp); 3377 return (error); 3378 } 3379 3380 /* 3381 * chmod_args(char *path, int mode) 3382 * 3383 * Change mode of a file given path name. 3384 */ 3385 int 3386 sys_chmod(struct chmod_args *uap) 3387 { 3388 struct nlookupdata nd; 3389 int error; 3390 3391 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3392 if (error == 0) 3393 error = kern_chmod(&nd, uap->mode); 3394 nlookup_done(&nd); 3395 return (error); 3396 } 3397 3398 /* 3399 * lchmod_args(char *path, int mode) 3400 * 3401 * Change mode of a file given path name (don't follow links.) 3402 */ 3403 int 3404 sys_lchmod(struct lchmod_args *uap) 3405 { 3406 struct nlookupdata nd; 3407 int error; 3408 3409 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3410 if (error == 0) 3411 error = kern_chmod(&nd, uap->mode); 3412 nlookup_done(&nd); 3413 return (error); 3414 } 3415 3416 /* 3417 * fchmod_args(int fd, int mode) 3418 * 3419 * Change mode of a file given a file descriptor. 3420 */ 3421 int 3422 sys_fchmod(struct fchmod_args *uap) 3423 { 3424 struct thread *td = curthread; 3425 struct file *fp; 3426 int error; 3427 3428 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3429 return (error); 3430 if (fp->f_nchandle.ncp) 3431 error = ncp_writechk(&fp->f_nchandle); 3432 if (error == 0) 3433 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3434 fdrop(fp); 3435 return (error); 3436 } 3437 3438 /* 3439 * fchmodat_args(char *path, int mode) 3440 * 3441 * Change mode of a file pointed to by fd/path. 3442 */ 3443 int 3444 sys_fchmodat(struct fchmodat_args *uap) 3445 { 3446 struct nlookupdata nd; 3447 struct file *fp; 3448 int error; 3449 int flags; 3450 3451 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3452 return (EINVAL); 3453 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3454 3455 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3456 UIO_USERSPACE, flags); 3457 if (error == 0) 3458 error = kern_chmod(&nd, uap->mode); 3459 nlookup_done_at(&nd, fp); 3460 return (error); 3461 } 3462 3463 static int 3464 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3465 { 3466 struct thread *td = curthread; 3467 int error; 3468 struct vattr vattr; 3469 uid_t o_uid; 3470 gid_t o_gid; 3471 uint64_t size; 3472 3473 /* 3474 * note: vget is required for any operation that might mod the vnode 3475 * so VINACTIVE is properly cleared. 3476 */ 3477 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3478 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3479 return error; 3480 o_uid = vattr.va_uid; 3481 o_gid = vattr.va_gid; 3482 size = vattr.va_size; 3483 3484 VATTR_NULL(&vattr); 3485 vattr.va_uid = uid; 3486 vattr.va_gid = gid; 3487 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3488 vput(vp); 3489 } 3490 3491 if (error == 0) { 3492 if (uid == -1) 3493 uid = o_uid; 3494 if (gid == -1) 3495 gid = o_gid; 3496 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3497 VFS_ACCOUNT(mp, uid, gid, size); 3498 } 3499 3500 return error; 3501 } 3502 3503 int 3504 kern_chown(struct nlookupdata *nd, int uid, int gid) 3505 { 3506 struct vnode *vp; 3507 int error; 3508 3509 if ((error = nlookup(nd)) != 0) 3510 return (error); 3511 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3512 return (error); 3513 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3514 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3515 vrele(vp); 3516 return (error); 3517 } 3518 3519 /* 3520 * chown(char *path, int uid, int gid) 3521 * 3522 * Set ownership given a path name. 3523 */ 3524 int 3525 sys_chown(struct chown_args *uap) 3526 { 3527 struct nlookupdata nd; 3528 int error; 3529 3530 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3531 if (error == 0) 3532 error = kern_chown(&nd, uap->uid, uap->gid); 3533 nlookup_done(&nd); 3534 return (error); 3535 } 3536 3537 /* 3538 * lchown_args(char *path, int uid, int gid) 3539 * 3540 * Set ownership given a path name, do not cross symlinks. 3541 */ 3542 int 3543 sys_lchown(struct lchown_args *uap) 3544 { 3545 struct nlookupdata nd; 3546 int error; 3547 3548 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3549 if (error == 0) 3550 error = kern_chown(&nd, uap->uid, uap->gid); 3551 nlookup_done(&nd); 3552 return (error); 3553 } 3554 3555 /* 3556 * fchown_args(int fd, int uid, int gid) 3557 * 3558 * Set ownership given a file descriptor. 3559 */ 3560 int 3561 sys_fchown(struct fchown_args *uap) 3562 { 3563 struct thread *td = curthread; 3564 struct proc *p = td->td_proc; 3565 struct file *fp; 3566 int error; 3567 3568 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3569 return (error); 3570 if (fp->f_nchandle.ncp) 3571 error = ncp_writechk(&fp->f_nchandle); 3572 if (error == 0) 3573 error = setfown(p->p_fd->fd_ncdir.mount, 3574 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3575 fdrop(fp); 3576 return (error); 3577 } 3578 3579 /* 3580 * fchownat(int fd, char *path, int uid, int gid, int flags) 3581 * 3582 * Set ownership of file pointed to by fd/path. 3583 */ 3584 int 3585 sys_fchownat(struct fchownat_args *uap) 3586 { 3587 struct nlookupdata nd; 3588 struct file *fp; 3589 int error; 3590 int flags; 3591 3592 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3593 return (EINVAL); 3594 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3595 3596 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3597 UIO_USERSPACE, flags); 3598 if (error == 0) 3599 error = kern_chown(&nd, uap->uid, uap->gid); 3600 nlookup_done_at(&nd, fp); 3601 return (error); 3602 } 3603 3604 3605 static int 3606 getutimes(struct timeval *tvp, struct timespec *tsp) 3607 { 3608 struct timeval tv[2]; 3609 int error; 3610 3611 if (tvp == NULL) { 3612 microtime(&tv[0]); 3613 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3614 tsp[1] = tsp[0]; 3615 } else { 3616 if ((error = itimerfix(tvp)) != 0) 3617 return (error); 3618 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3619 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3620 } 3621 return 0; 3622 } 3623 3624 static int 3625 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3626 { 3627 struct timespec tsnow; 3628 int error; 3629 3630 *nullflag = 0; 3631 nanotime(&tsnow); 3632 if (ts == NULL) { 3633 newts[0] = tsnow; 3634 newts[1] = tsnow; 3635 *nullflag = 1; 3636 return (0); 3637 } 3638 3639 newts[0] = ts[0]; 3640 newts[1] = ts[1]; 3641 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3642 return (0); 3643 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3644 *nullflag = 1; 3645 3646 if (newts[0].tv_nsec == UTIME_OMIT) 3647 newts[0].tv_sec = VNOVAL; 3648 else if (newts[0].tv_nsec == UTIME_NOW) 3649 newts[0] = tsnow; 3650 else if ((error = itimespecfix(&newts[0])) != 0) 3651 return (error); 3652 3653 if (newts[1].tv_nsec == UTIME_OMIT) 3654 newts[1].tv_sec = VNOVAL; 3655 else if (newts[1].tv_nsec == UTIME_NOW) 3656 newts[1] = tsnow; 3657 else if ((error = itimespecfix(&newts[1])) != 0) 3658 return (error); 3659 3660 return (0); 3661 } 3662 3663 static int 3664 setutimes(struct vnode *vp, struct vattr *vattr, 3665 const struct timespec *ts, int nullflag) 3666 { 3667 struct thread *td = curthread; 3668 int error; 3669 3670 VATTR_NULL(vattr); 3671 vattr->va_atime = ts[0]; 3672 vattr->va_mtime = ts[1]; 3673 if (nullflag) 3674 vattr->va_vaflags |= VA_UTIMES_NULL; 3675 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3676 3677 return error; 3678 } 3679 3680 int 3681 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3682 { 3683 struct timespec ts[2]; 3684 int error; 3685 3686 if (tptr) { 3687 if ((error = getutimes(tptr, ts)) != 0) 3688 return (error); 3689 } 3690 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3691 return (error); 3692 } 3693 3694 /* 3695 * utimes_args(char *path, struct timeval *tptr) 3696 * 3697 * Set the access and modification times of a file. 3698 */ 3699 int 3700 sys_utimes(struct utimes_args *uap) 3701 { 3702 struct timeval tv[2]; 3703 struct nlookupdata nd; 3704 int error; 3705 3706 if (uap->tptr) { 3707 error = copyin(uap->tptr, tv, sizeof(tv)); 3708 if (error) 3709 return (error); 3710 } 3711 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3712 if (error == 0) 3713 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3714 nlookup_done(&nd); 3715 return (error); 3716 } 3717 3718 /* 3719 * lutimes_args(char *path, struct timeval *tptr) 3720 * 3721 * Set the access and modification times of a file. 3722 */ 3723 int 3724 sys_lutimes(struct lutimes_args *uap) 3725 { 3726 struct timeval tv[2]; 3727 struct nlookupdata nd; 3728 int error; 3729 3730 if (uap->tptr) { 3731 error = copyin(uap->tptr, tv, sizeof(tv)); 3732 if (error) 3733 return (error); 3734 } 3735 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3736 if (error == 0) 3737 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3738 nlookup_done(&nd); 3739 return (error); 3740 } 3741 3742 /* 3743 * Set utimes on a file descriptor. The creds used to open the 3744 * file are used to determine whether the operation is allowed 3745 * or not. 3746 */ 3747 int 3748 kern_futimens(int fd, struct timespec *ts) 3749 { 3750 struct thread *td = curthread; 3751 struct timespec newts[2]; 3752 struct file *fp; 3753 struct vnode *vp; 3754 struct vattr vattr; 3755 int nullflag; 3756 int error; 3757 3758 error = getutimens(ts, newts, &nullflag); 3759 if (error) 3760 return (error); 3761 if ((error = holdvnode(td, fd, &fp)) != 0) 3762 return (error); 3763 if (fp->f_nchandle.ncp) 3764 error = ncp_writechk(&fp->f_nchandle); 3765 if (error == 0) { 3766 vp = fp->f_data; 3767 error = vget(vp, LK_EXCLUSIVE); 3768 if (error == 0) { 3769 error = VOP_GETATTR_FP(vp, &vattr, fp); 3770 if (error == 0) { 3771 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3772 fp->f_cred); 3773 } 3774 if (error == 0) { 3775 error = setutimes(vp, &vattr, newts, nullflag); 3776 } 3777 vput(vp); 3778 } 3779 } 3780 fdrop(fp); 3781 return (error); 3782 } 3783 3784 /* 3785 * futimens_args(int fd, struct timespec *ts) 3786 * 3787 * Set the access and modification times of a file. 3788 */ 3789 int 3790 sys_futimens(struct futimens_args *uap) 3791 { 3792 struct timespec ts[2]; 3793 int error; 3794 3795 if (uap->ts) { 3796 error = copyin(uap->ts, ts, sizeof(ts)); 3797 if (error) 3798 return (error); 3799 } 3800 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3801 return (error); 3802 } 3803 3804 int 3805 kern_futimes(int fd, struct timeval *tptr) 3806 { 3807 struct timespec ts[2]; 3808 int error; 3809 3810 if (tptr) { 3811 if ((error = getutimes(tptr, ts)) != 0) 3812 return (error); 3813 } 3814 error = kern_futimens(fd, tptr ? ts : NULL); 3815 return (error); 3816 } 3817 3818 /* 3819 * futimes_args(int fd, struct timeval *tptr) 3820 * 3821 * Set the access and modification times of a file. 3822 */ 3823 int 3824 sys_futimes(struct futimes_args *uap) 3825 { 3826 struct timeval tv[2]; 3827 int error; 3828 3829 if (uap->tptr) { 3830 error = copyin(uap->tptr, tv, sizeof(tv)); 3831 if (error) 3832 return (error); 3833 } 3834 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3835 return (error); 3836 } 3837 3838 int 3839 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3840 { 3841 struct timespec newts[2]; 3842 struct vnode *vp; 3843 struct vattr vattr; 3844 int nullflag; 3845 int error; 3846 3847 if (flags & ~AT_SYMLINK_NOFOLLOW) 3848 return (EINVAL); 3849 3850 error = getutimens(ts, newts, &nullflag); 3851 if (error) 3852 return (error); 3853 3854 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3855 if ((error = nlookup(nd)) != 0) 3856 return (error); 3857 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3858 return (error); 3859 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3860 return (error); 3861 if ((error = vn_writechk(vp)) == 0) { 3862 error = vget(vp, LK_EXCLUSIVE); 3863 if (error == 0) { 3864 error = setutimes(vp, &vattr, newts, nullflag); 3865 vput(vp); 3866 } 3867 } 3868 vrele(vp); 3869 return (error); 3870 } 3871 3872 /* 3873 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3874 * 3875 * Set file access and modification times of a file. 3876 */ 3877 int 3878 sys_utimensat(struct utimensat_args *uap) 3879 { 3880 struct timespec ts[2]; 3881 struct nlookupdata nd; 3882 struct file *fp; 3883 int error; 3884 int flags; 3885 3886 if (uap->ts) { 3887 error = copyin(uap->ts, ts, sizeof(ts)); 3888 if (error) 3889 return (error); 3890 } 3891 3892 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3893 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3894 UIO_USERSPACE, flags); 3895 if (error == 0) 3896 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3897 nlookup_done_at(&nd, fp); 3898 return (error); 3899 } 3900 3901 int 3902 kern_truncate(struct nlookupdata *nd, off_t length) 3903 { 3904 struct vnode *vp; 3905 struct vattr vattr; 3906 int error; 3907 uid_t uid = 0; 3908 gid_t gid = 0; 3909 uint64_t old_size = 0; 3910 3911 if (length < 0) 3912 return(EINVAL); 3913 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3914 if ((error = nlookup(nd)) != 0) 3915 return (error); 3916 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3917 return (error); 3918 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3919 return (error); 3920 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3921 if (error) { 3922 vrele(vp); 3923 return (error); 3924 } 3925 if (vp->v_type == VDIR) { 3926 error = EISDIR; 3927 goto done; 3928 } 3929 if (vfs_quota_enabled) { 3930 error = VOP_GETATTR(vp, &vattr); 3931 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3932 uid = vattr.va_uid; 3933 gid = vattr.va_gid; 3934 old_size = vattr.va_size; 3935 } 3936 3937 if ((error = vn_writechk(vp)) == 0) { 3938 VATTR_NULL(&vattr); 3939 vattr.va_size = length; 3940 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3941 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3942 } 3943 done: 3944 vput(vp); 3945 return (error); 3946 } 3947 3948 /* 3949 * truncate(char *path, int pad, off_t length) 3950 * 3951 * Truncate a file given its path name. 3952 */ 3953 int 3954 sys_truncate(struct truncate_args *uap) 3955 { 3956 struct nlookupdata nd; 3957 int error; 3958 3959 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3960 if (error == 0) 3961 error = kern_truncate(&nd, uap->length); 3962 nlookup_done(&nd); 3963 return error; 3964 } 3965 3966 int 3967 kern_ftruncate(int fd, off_t length) 3968 { 3969 struct thread *td = curthread; 3970 struct vattr vattr; 3971 struct vnode *vp; 3972 struct file *fp; 3973 int error; 3974 uid_t uid = 0; 3975 gid_t gid = 0; 3976 uint64_t old_size = 0; 3977 struct mount *mp; 3978 3979 if (length < 0) 3980 return(EINVAL); 3981 if ((error = holdvnode(td, fd, &fp)) != 0) 3982 return (error); 3983 if (fp->f_nchandle.ncp) { 3984 error = ncp_writechk(&fp->f_nchandle); 3985 if (error) 3986 goto done; 3987 } 3988 if ((fp->f_flag & FWRITE) == 0) { 3989 error = EINVAL; 3990 goto done; 3991 } 3992 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3993 error = EINVAL; 3994 goto done; 3995 } 3996 vp = (struct vnode *)fp->f_data; 3997 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3998 if (vp->v_type == VDIR) { 3999 error = EISDIR; 4000 vn_unlock(vp); 4001 goto done; 4002 } 4003 4004 if (vfs_quota_enabled) { 4005 error = VOP_GETATTR_FP(vp, &vattr, fp); 4006 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 4007 uid = vattr.va_uid; 4008 gid = vattr.va_gid; 4009 old_size = vattr.va_size; 4010 } 4011 4012 if ((error = vn_writechk(vp)) == 0) { 4013 VATTR_NULL(&vattr); 4014 vattr.va_size = length; 4015 error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp); 4016 mp = vq_vptomp(vp); 4017 VFS_ACCOUNT(mp, uid, gid, length - old_size); 4018 } 4019 vn_unlock(vp); 4020 done: 4021 fdrop(fp); 4022 return (error); 4023 } 4024 4025 /* 4026 * ftruncate_args(int fd, int pad, off_t length) 4027 * 4028 * Truncate a file given a file descriptor. 4029 */ 4030 int 4031 sys_ftruncate(struct ftruncate_args *uap) 4032 { 4033 int error; 4034 4035 error = kern_ftruncate(uap->fd, uap->length); 4036 4037 return (error); 4038 } 4039 4040 /* 4041 * fsync(int fd) 4042 * 4043 * Sync an open file. 4044 */ 4045 int 4046 sys_fsync(struct fsync_args *uap) 4047 { 4048 struct thread *td = curthread; 4049 struct vnode *vp; 4050 struct file *fp; 4051 vm_object_t obj; 4052 int error; 4053 4054 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 4055 return (error); 4056 vp = (struct vnode *)fp->f_data; 4057 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4058 if ((obj = vp->v_object) != NULL) { 4059 if (vp->v_mount == NULL || 4060 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 4061 vm_object_page_clean(obj, 0, 0, 0); 4062 } 4063 } 4064 error = VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp); 4065 if (error == 0 && vp->v_mount) 4066 error = buf_fsync(vp); 4067 vn_unlock(vp); 4068 fdrop(fp); 4069 4070 return (error); 4071 } 4072 4073 int 4074 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 4075 { 4076 struct nchandle fnchd; 4077 struct nchandle tnchd; 4078 struct namecache *ncp; 4079 struct vnode *fdvp; 4080 struct vnode *tdvp; 4081 struct mount *mp; 4082 int error; 4083 u_int fncp_gen; 4084 u_int tncp_gen; 4085 4086 bwillinode(1); 4087 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4088 if ((error = nlookup(fromnd)) != 0) 4089 return (error); 4090 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4091 return (ENOENT); 4092 fnchd.mount = fromnd->nl_nch.mount; 4093 cache_hold(&fnchd); 4094 4095 /* 4096 * unlock the source nch so we can lookup the target nch without 4097 * deadlocking. The target may or may not exist so we do not check 4098 * for a target vp like kern_mkdir() and other creation functions do. 4099 * 4100 * The source and target directories are ref'd and rechecked after 4101 * everything is relocked to determine if the source or target file 4102 * has been renamed. 4103 */ 4104 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4105 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4106 4107 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4108 4109 cache_unlock(&fromnd->nl_nch); 4110 4111 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4112 if ((error = nlookup(tond)) != 0) { 4113 cache_drop(&fnchd); 4114 return (error); 4115 } 4116 tncp_gen = tond->nl_nch.ncp->nc_generation; 4117 4118 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4119 cache_drop(&fnchd); 4120 return (ENOENT); 4121 } 4122 tnchd.mount = tond->nl_nch.mount; 4123 cache_hold(&tnchd); 4124 4125 /* 4126 * If the source and target are the same there is nothing to do 4127 */ 4128 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4129 cache_drop(&fnchd); 4130 cache_drop(&tnchd); 4131 return (0); 4132 } 4133 4134 /* 4135 * Mount points cannot be renamed or overwritten 4136 */ 4137 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4138 NCF_ISMOUNTPT 4139 ) { 4140 cache_drop(&fnchd); 4141 cache_drop(&tnchd); 4142 return (EINVAL); 4143 } 4144 4145 /* 4146 * Relock the source ncp. cache_relock() will deal with any 4147 * deadlocks against the already-locked tond and will also 4148 * make sure both are resolved. 4149 * 4150 * NOTE AFTER RELOCKING: The source or target ncp may have become 4151 * invalid while they were unlocked, nc_vp and nc_mount could 4152 * be NULL. 4153 */ 4154 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 4155 &tond->nl_nch, tond->nl_cred); 4156 fromnd->nl_flags |= NLC_NCPISLOCKED; 4157 4158 /* 4159 * If the namecache generation changed for either fromnd or tond, 4160 * we must retry. 4161 */ 4162 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 4163 tond->nl_nch.ncp->nc_generation != tncp_gen) { 4164 kprintf("kern_rename: retry due to gen on: " 4165 "\"%s\" -> \"%s\"\n", 4166 fromnd->nl_nch.ncp->nc_name, 4167 tond->nl_nch.ncp->nc_name); 4168 cache_drop(&fnchd); 4169 cache_drop(&tnchd); 4170 return (EAGAIN); 4171 } 4172 4173 /* 4174 * If either fromnd or tond are marked destroyed a ripout occured 4175 * out from under us and we must retry. 4176 */ 4177 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4178 fromnd->nl_nch.ncp->nc_vp == NULL || 4179 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 4180 kprintf("kern_rename: retry due to ripout on: " 4181 "\"%s\" -> \"%s\"\n", 4182 fromnd->nl_nch.ncp->nc_name, 4183 tond->nl_nch.ncp->nc_name); 4184 cache_drop(&fnchd); 4185 cache_drop(&tnchd); 4186 return (EAGAIN); 4187 } 4188 4189 /* 4190 * Make sure the parent directories linkages are the same. 4191 * XXX shouldn't be needed any more w/ generation check above. 4192 */ 4193 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4194 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4195 cache_drop(&fnchd); 4196 cache_drop(&tnchd); 4197 return (ENOENT); 4198 } 4199 4200 /* 4201 * Both the source and target must be within the same filesystem and 4202 * in the same filesystem as their parent directories within the 4203 * namecache topology. 4204 * 4205 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4206 */ 4207 mp = fnchd.mount; 4208 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4209 mp != tond->nl_nch.mount) { 4210 cache_drop(&fnchd); 4211 cache_drop(&tnchd); 4212 return (EXDEV); 4213 } 4214 4215 /* 4216 * Make sure the mount point is writable 4217 */ 4218 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4219 cache_drop(&fnchd); 4220 cache_drop(&tnchd); 4221 return (error); 4222 } 4223 4224 /* 4225 * If the target exists and either the source or target is a directory, 4226 * then both must be directories. 4227 * 4228 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4229 * have become NULL. 4230 */ 4231 if (tond->nl_nch.ncp->nc_vp) { 4232 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4233 error = ENOENT; 4234 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4235 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4236 error = ENOTDIR; 4237 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4238 error = EISDIR; 4239 } 4240 } 4241 4242 /* 4243 * You cannot rename a source into itself or a subdirectory of itself. 4244 * We check this by travsersing the target directory upwards looking 4245 * for a match against the source. 4246 * 4247 * XXX MPSAFE 4248 */ 4249 if (error == 0) { 4250 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4251 if (fromnd->nl_nch.ncp == ncp) { 4252 error = EINVAL; 4253 break; 4254 } 4255 } 4256 } 4257 4258 cache_drop(&fnchd); 4259 cache_drop(&tnchd); 4260 4261 /* 4262 * Even though the namespaces are different, they may still represent 4263 * hardlinks to the same file. The filesystem might have a hard time 4264 * with this so we issue a NREMOVE of the source instead of a NRENAME 4265 * when we detect the situation. 4266 */ 4267 if (error == 0) { 4268 fdvp = fromnd->nl_dvp; 4269 tdvp = tond->nl_dvp; 4270 if (fdvp == NULL || tdvp == NULL) { 4271 error = EPERM; 4272 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4273 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4274 fromnd->nl_cred); 4275 } else { 4276 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4277 fdvp, tdvp, tond->nl_cred); 4278 } 4279 } 4280 return (error); 4281 } 4282 4283 /* 4284 * rename_args(char *from, char *to) 4285 * 4286 * Rename files. Source and destination must either both be directories, 4287 * or both not be directories. If target is a directory, it must be empty. 4288 */ 4289 int 4290 sys_rename(struct rename_args *uap) 4291 { 4292 struct nlookupdata fromnd, tond; 4293 int error; 4294 4295 do { 4296 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4297 if (error == 0) { 4298 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4299 if (error == 0) 4300 error = kern_rename(&fromnd, &tond); 4301 nlookup_done(&tond); 4302 } 4303 nlookup_done(&fromnd); 4304 } while (error == EAGAIN); 4305 return (error); 4306 } 4307 4308 /* 4309 * renameat_args(int oldfd, char *old, int newfd, char *new) 4310 * 4311 * Rename files using paths relative to the directories associated with 4312 * oldfd and newfd. Source and destination must either both be directories, 4313 * or both not be directories. If target is a directory, it must be empty. 4314 */ 4315 int 4316 sys_renameat(struct renameat_args *uap) 4317 { 4318 struct nlookupdata oldnd, newnd; 4319 struct file *oldfp, *newfp; 4320 int error; 4321 4322 do { 4323 error = nlookup_init_at(&oldnd, &oldfp, 4324 uap->oldfd, uap->old, 4325 UIO_USERSPACE, 0); 4326 if (error == 0) { 4327 error = nlookup_init_at(&newnd, &newfp, 4328 uap->newfd, uap->new, 4329 UIO_USERSPACE, 0); 4330 if (error == 0) 4331 error = kern_rename(&oldnd, &newnd); 4332 nlookup_done_at(&newnd, newfp); 4333 } 4334 nlookup_done_at(&oldnd, oldfp); 4335 } while (error == EAGAIN); 4336 return (error); 4337 } 4338 4339 int 4340 kern_mkdir(struct nlookupdata *nd, int mode) 4341 { 4342 struct thread *td = curthread; 4343 struct proc *p = td->td_proc; 4344 struct vnode *vp; 4345 struct vattr vattr; 4346 int error; 4347 4348 bwillinode(1); 4349 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4350 if ((error = nlookup(nd)) != 0) 4351 return (error); 4352 4353 if (nd->nl_nch.ncp->nc_vp) 4354 return (EEXIST); 4355 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4356 return (error); 4357 VATTR_NULL(&vattr); 4358 vattr.va_type = VDIR; 4359 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4360 4361 vp = NULL; 4362 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4363 if (error == 0) 4364 vput(vp); 4365 return (error); 4366 } 4367 4368 /* 4369 * mkdir_args(char *path, int mode) 4370 * 4371 * Make a directory file. 4372 */ 4373 int 4374 sys_mkdir(struct mkdir_args *uap) 4375 { 4376 struct nlookupdata nd; 4377 int error; 4378 4379 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4380 if (error == 0) 4381 error = kern_mkdir(&nd, uap->mode); 4382 nlookup_done(&nd); 4383 return (error); 4384 } 4385 4386 /* 4387 * mkdirat_args(int fd, char *path, mode_t mode) 4388 * 4389 * Make a directory file. The path is relative to the directory associated 4390 * with fd. 4391 */ 4392 int 4393 sys_mkdirat(struct mkdirat_args *uap) 4394 { 4395 struct nlookupdata nd; 4396 struct file *fp; 4397 int error; 4398 4399 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4400 if (error == 0) 4401 error = kern_mkdir(&nd, uap->mode); 4402 nlookup_done_at(&nd, fp); 4403 return (error); 4404 } 4405 4406 int 4407 kern_rmdir(struct nlookupdata *nd) 4408 { 4409 int error; 4410 4411 bwillinode(1); 4412 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4413 if ((error = nlookup(nd)) != 0) 4414 return (error); 4415 4416 /* 4417 * Do not allow directories representing mount points to be 4418 * deleted, even if empty. Check write perms on mount point 4419 * in case the vnode is aliased (aka nullfs). 4420 */ 4421 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4422 return (EBUSY); 4423 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4424 return (error); 4425 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4426 return (error); 4427 } 4428 4429 /* 4430 * rmdir_args(char *path) 4431 * 4432 * Remove a directory file. 4433 */ 4434 int 4435 sys_rmdir(struct rmdir_args *uap) 4436 { 4437 struct nlookupdata nd; 4438 int error; 4439 4440 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4441 if (error == 0) 4442 error = kern_rmdir(&nd); 4443 nlookup_done(&nd); 4444 return (error); 4445 } 4446 4447 int 4448 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4449 enum uio_seg direction) 4450 { 4451 struct thread *td = curthread; 4452 struct vnode *vp; 4453 struct file *fp; 4454 struct uio auio; 4455 struct iovec aiov; 4456 off_t loff; 4457 int error, eofflag; 4458 4459 if ((error = holdvnode(td, fd, &fp)) != 0) 4460 return (error); 4461 if ((fp->f_flag & FREAD) == 0) { 4462 error = EBADF; 4463 goto done; 4464 } 4465 vp = (struct vnode *)fp->f_data; 4466 if (vp->v_type != VDIR) { 4467 error = EINVAL; 4468 goto done; 4469 } 4470 aiov.iov_base = buf; 4471 aiov.iov_len = count; 4472 auio.uio_iov = &aiov; 4473 auio.uio_iovcnt = 1; 4474 auio.uio_rw = UIO_READ; 4475 auio.uio_segflg = direction; 4476 auio.uio_td = td; 4477 auio.uio_resid = count; 4478 loff = auio.uio_offset = fp->f_offset; 4479 error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp); 4480 fp->f_offset = auio.uio_offset; 4481 if (error) 4482 goto done; 4483 4484 /* 4485 * WARNING! *basep may not be wide enough to accomodate the 4486 * seek offset. XXX should we hack this to return the upper 32 bits 4487 * for offsets greater then 4G? 4488 */ 4489 if (basep) { 4490 *basep = (long)loff; 4491 } 4492 *res = count - auio.uio_resid; 4493 done: 4494 fdrop(fp); 4495 return (error); 4496 } 4497 4498 /* 4499 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4500 * 4501 * Read a block of directory entries in a file system independent format. 4502 */ 4503 int 4504 sys_getdirentries(struct getdirentries_args *uap) 4505 { 4506 long base; 4507 int error; 4508 4509 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4510 &uap->sysmsg_result, UIO_USERSPACE); 4511 4512 if (error == 0 && uap->basep) 4513 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4514 return (error); 4515 } 4516 4517 /* 4518 * getdents_args(int fd, char *buf, size_t count) 4519 */ 4520 int 4521 sys_getdents(struct getdents_args *uap) 4522 { 4523 int error; 4524 4525 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4526 &uap->sysmsg_result, UIO_USERSPACE); 4527 4528 return (error); 4529 } 4530 4531 /* 4532 * Set the mode mask for creation of filesystem nodes. 4533 * 4534 * umask(int newmask) 4535 */ 4536 int 4537 sys_umask(struct umask_args *uap) 4538 { 4539 struct thread *td = curthread; 4540 struct proc *p = td->td_proc; 4541 struct filedesc *fdp; 4542 4543 fdp = p->p_fd; 4544 uap->sysmsg_result = fdp->fd_cmask; 4545 fdp->fd_cmask = uap->newmask & ALLPERMS; 4546 return (0); 4547 } 4548 4549 /* 4550 * revoke(char *path) 4551 * 4552 * Void all references to file by ripping underlying filesystem 4553 * away from vnode. 4554 */ 4555 int 4556 sys_revoke(struct revoke_args *uap) 4557 { 4558 struct nlookupdata nd; 4559 struct vattr vattr; 4560 struct vnode *vp; 4561 struct ucred *cred; 4562 int error; 4563 4564 vp = NULL; 4565 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4566 if (error == 0) 4567 error = nlookup(&nd); 4568 if (error == 0) 4569 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4570 cred = crhold(nd.nl_cred); 4571 nlookup_done(&nd); 4572 if (error == 0) { 4573 if (error == 0) 4574 error = VOP_GETATTR(vp, &vattr); 4575 if (error == 0 && cred->cr_uid != vattr.va_uid) 4576 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4577 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4578 if (vcount(vp) > 0) 4579 error = vrevoke(vp, cred); 4580 } else if (error == 0) { 4581 error = vrevoke(vp, cred); 4582 } 4583 vrele(vp); 4584 } 4585 if (cred) 4586 crfree(cred); 4587 return (error); 4588 } 4589 4590 /* 4591 * getfh_args(char *fname, fhandle_t *fhp) 4592 * 4593 * Get (NFS) file handle 4594 * 4595 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4596 * mount. This allows nullfs mounts to be explicitly exported. 4597 * 4598 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4599 * 4600 * nullfs mounts of subdirectories are not safe. That is, it will 4601 * work, but you do not really have protection against access to 4602 * the related parent directories. 4603 */ 4604 int 4605 sys_getfh(struct getfh_args *uap) 4606 { 4607 struct thread *td = curthread; 4608 struct nlookupdata nd; 4609 fhandle_t fh; 4610 struct vnode *vp; 4611 struct mount *mp; 4612 int error; 4613 4614 /* 4615 * Must be super user 4616 */ 4617 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4618 return (error); 4619 4620 vp = NULL; 4621 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4622 if (error == 0) 4623 error = nlookup(&nd); 4624 if (error == 0) 4625 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4626 mp = nd.nl_nch.mount; 4627 nlookup_done(&nd); 4628 if (error == 0) { 4629 bzero(&fh, sizeof(fh)); 4630 fh.fh_fsid = mp->mnt_stat.f_fsid; 4631 error = VFS_VPTOFH(vp, &fh.fh_fid); 4632 vput(vp); 4633 if (error == 0) 4634 error = copyout(&fh, uap->fhp, sizeof(fh)); 4635 } 4636 return (error); 4637 } 4638 4639 /* 4640 * fhopen_args(const struct fhandle *u_fhp, int flags) 4641 * 4642 * syscall for the rpc.lockd to use to translate a NFS file handle into 4643 * an open descriptor. 4644 * 4645 * warning: do not remove the priv_check() call or this becomes one giant 4646 * security hole. 4647 */ 4648 int 4649 sys_fhopen(struct fhopen_args *uap) 4650 { 4651 struct thread *td = curthread; 4652 struct filedesc *fdp = td->td_proc->p_fd; 4653 struct mount *mp; 4654 struct vnode *vp; 4655 struct fhandle fhp; 4656 struct vattr vat; 4657 struct vattr *vap = &vat; 4658 struct flock lf; 4659 int fmode, mode, error = 0, type; 4660 struct file *nfp; 4661 struct file *fp; 4662 int indx; 4663 4664 /* 4665 * Must be super user 4666 */ 4667 error = priv_check(td, PRIV_ROOT); 4668 if (error) 4669 return (error); 4670 4671 fmode = FFLAGS(uap->flags); 4672 4673 /* 4674 * Why not allow a non-read/write open for our lockd? 4675 */ 4676 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4677 return (EINVAL); 4678 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4679 if (error) 4680 return(error); 4681 4682 /* 4683 * Find the mount point 4684 */ 4685 mp = vfs_getvfs(&fhp.fh_fsid); 4686 if (mp == NULL) { 4687 error = ESTALE; 4688 goto done2; 4689 } 4690 /* now give me my vnode, it gets returned to me locked */ 4691 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4692 if (error) 4693 goto done; 4694 /* 4695 * from now on we have to make sure not 4696 * to forget about the vnode 4697 * any error that causes an abort must vput(vp) 4698 * just set error = err and 'goto bad;'. 4699 */ 4700 4701 /* 4702 * from vn_open 4703 */ 4704 if (vp->v_type == VLNK) { 4705 error = EMLINK; 4706 goto bad; 4707 } 4708 if (vp->v_type == VSOCK) { 4709 error = EOPNOTSUPP; 4710 goto bad; 4711 } 4712 mode = 0; 4713 if (fmode & (FWRITE | O_TRUNC)) { 4714 if (vp->v_type == VDIR) { 4715 error = EISDIR; 4716 goto bad; 4717 } 4718 error = vn_writechk(vp); 4719 if (error) 4720 goto bad; 4721 mode |= VWRITE; 4722 } 4723 if (fmode & FREAD) 4724 mode |= VREAD; 4725 if (mode) { 4726 error = VOP_ACCESS(vp, mode, td->td_ucred); 4727 if (error) 4728 goto bad; 4729 } 4730 if (fmode & O_TRUNC) { 4731 vn_unlock(vp); /* XXX */ 4732 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4733 VATTR_NULL(vap); 4734 vap->va_size = 0; 4735 error = VOP_SETATTR(vp, vap, td->td_ucred); 4736 if (error) 4737 goto bad; 4738 } 4739 4740 /* 4741 * VOP_OPEN needs the file pointer so it can potentially override 4742 * it. 4743 * 4744 * WARNING! no f_nchandle will be associated when fhopen()ing a 4745 * directory. XXX 4746 */ 4747 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4748 goto bad; 4749 fp = nfp; 4750 4751 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4752 if (error) { 4753 /* 4754 * setting f_ops this way prevents VOP_CLOSE from being 4755 * called or fdrop() releasing the vp from v_data. Since 4756 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4757 */ 4758 fp->f_ops = &badfileops; 4759 fp->f_data = NULL; 4760 goto bad_drop; 4761 } 4762 4763 /* 4764 * The fp is given its own reference, we still have our ref and lock. 4765 * 4766 * Assert that all regular files must be created with a VM object. 4767 */ 4768 if (vp->v_type == VREG && vp->v_object == NULL) { 4769 kprintf("fhopen: regular file did not " 4770 "have VM object: %p\n", 4771 vp); 4772 goto bad_drop; 4773 } 4774 4775 /* 4776 * The open was successful. Handle any locking requirements. 4777 */ 4778 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4779 lf.l_whence = SEEK_SET; 4780 lf.l_start = 0; 4781 lf.l_len = 0; 4782 if (fmode & O_EXLOCK) 4783 lf.l_type = F_WRLCK; 4784 else 4785 lf.l_type = F_RDLCK; 4786 if (fmode & FNONBLOCK) 4787 type = 0; 4788 else 4789 type = F_WAIT; 4790 vn_unlock(vp); 4791 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4792 &lf, type)) != 0) { 4793 /* 4794 * release our private reference. 4795 */ 4796 fsetfd(fdp, NULL, indx); 4797 fdrop(fp); 4798 vrele(vp); 4799 goto done; 4800 } 4801 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4802 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4803 } 4804 4805 /* 4806 * Clean up. Associate the file pointer with the previously 4807 * reserved descriptor and return it. 4808 */ 4809 vput(vp); 4810 if (uap->flags & O_CLOEXEC) 4811 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4812 fsetfd(fdp, fp, indx); 4813 fdrop(fp); 4814 uap->sysmsg_result = indx; 4815 mount_drop(mp); 4816 4817 return (error); 4818 4819 bad_drop: 4820 fsetfd(fdp, NULL, indx); 4821 fdrop(fp); 4822 bad: 4823 vput(vp); 4824 done: 4825 mount_drop(mp); 4826 done2: 4827 return (error); 4828 } 4829 4830 /* 4831 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4832 */ 4833 int 4834 sys_fhstat(struct fhstat_args *uap) 4835 { 4836 struct thread *td = curthread; 4837 struct stat sb; 4838 fhandle_t fh; 4839 struct mount *mp; 4840 struct vnode *vp; 4841 int error; 4842 4843 /* 4844 * Must be super user 4845 */ 4846 error = priv_check(td, PRIV_ROOT); 4847 if (error) 4848 return (error); 4849 4850 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4851 if (error) 4852 return (error); 4853 4854 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4855 error = ESTALE; 4856 if (error == 0) { 4857 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4858 error = vn_stat(vp, &sb, td->td_ucred); 4859 vput(vp); 4860 } 4861 } 4862 if (error == 0) 4863 error = copyout(&sb, uap->sb, sizeof(sb)); 4864 if (mp) 4865 mount_drop(mp); 4866 4867 return (error); 4868 } 4869 4870 /* 4871 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4872 */ 4873 int 4874 sys_fhstatfs(struct fhstatfs_args *uap) 4875 { 4876 struct thread *td = curthread; 4877 struct proc *p = td->td_proc; 4878 struct statfs *sp; 4879 struct mount *mp; 4880 struct vnode *vp; 4881 struct statfs sb; 4882 char *fullpath, *freepath; 4883 fhandle_t fh; 4884 int error; 4885 4886 /* 4887 * Must be super user 4888 */ 4889 if ((error = priv_check(td, PRIV_ROOT))) 4890 return (error); 4891 4892 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4893 return (error); 4894 4895 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4896 error = ESTALE; 4897 goto done; 4898 } 4899 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4900 error = ESTALE; 4901 goto done; 4902 } 4903 4904 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4905 goto done; 4906 mp = vp->v_mount; 4907 sp = &mp->mnt_stat; 4908 vput(vp); 4909 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4910 goto done; 4911 4912 error = mount_path(p, mp, &fullpath, &freepath); 4913 if (error) 4914 goto done; 4915 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4916 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4917 kfree(freepath, M_TEMP); 4918 4919 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4920 if (priv_check(td, PRIV_ROOT)) { 4921 bcopy(sp, &sb, sizeof(sb)); 4922 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4923 sp = &sb; 4924 } 4925 error = copyout(sp, uap->buf, sizeof(*sp)); 4926 done: 4927 if (mp) 4928 mount_drop(mp); 4929 4930 return (error); 4931 } 4932 4933 /* 4934 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4935 */ 4936 int 4937 sys_fhstatvfs(struct fhstatvfs_args *uap) 4938 { 4939 struct thread *td = curthread; 4940 struct proc *p = td->td_proc; 4941 struct statvfs *sp; 4942 struct mount *mp; 4943 struct vnode *vp; 4944 fhandle_t fh; 4945 int error; 4946 4947 /* 4948 * Must be super user 4949 */ 4950 if ((error = priv_check(td, PRIV_ROOT))) 4951 return (error); 4952 4953 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4954 return (error); 4955 4956 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4957 error = ESTALE; 4958 goto done; 4959 } 4960 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4961 error = ESTALE; 4962 goto done; 4963 } 4964 4965 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4966 goto done; 4967 mp = vp->v_mount; 4968 sp = &mp->mnt_vstat; 4969 vput(vp); 4970 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4971 goto done; 4972 4973 sp->f_flag = 0; 4974 if (mp->mnt_flag & MNT_RDONLY) 4975 sp->f_flag |= ST_RDONLY; 4976 if (mp->mnt_flag & MNT_NOSUID) 4977 sp->f_flag |= ST_NOSUID; 4978 error = copyout(sp, uap->buf, sizeof(*sp)); 4979 done: 4980 if (mp) 4981 mount_drop(mp); 4982 return (error); 4983 } 4984 4985 4986 /* 4987 * Syscall to push extended attribute configuration information into the 4988 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4989 * a command (int cmd), and attribute name and misc data. For now, the 4990 * attribute name is left in userspace for consumption by the VFS_op. 4991 * It will probably be changed to be copied into sysspace by the 4992 * syscall in the future, once issues with various consumers of the 4993 * attribute code have raised their hands. 4994 * 4995 * Currently this is used only by UFS Extended Attributes. 4996 */ 4997 int 4998 sys_extattrctl(struct extattrctl_args *uap) 4999 { 5000 struct nlookupdata nd; 5001 struct vnode *vp; 5002 char attrname[EXTATTR_MAXNAMELEN]; 5003 int error; 5004 size_t size; 5005 5006 attrname[0] = 0; 5007 vp = NULL; 5008 error = 0; 5009 5010 if (error == 0 && uap->filename) { 5011 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 5012 NLC_FOLLOW); 5013 if (error == 0) 5014 error = nlookup(&nd); 5015 if (error == 0) 5016 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 5017 nlookup_done(&nd); 5018 } 5019 5020 if (error == 0 && uap->attrname) { 5021 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 5022 &size); 5023 } 5024 5025 if (error == 0) { 5026 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5027 if (error == 0) 5028 error = nlookup(&nd); 5029 if (error == 0) 5030 error = ncp_writechk(&nd.nl_nch); 5031 if (error == 0) { 5032 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 5033 uap->attrnamespace, 5034 uap->attrname, nd.nl_cred); 5035 } 5036 nlookup_done(&nd); 5037 } 5038 5039 return (error); 5040 } 5041 5042 /* 5043 * Syscall to get a named extended attribute on a file or directory. 5044 */ 5045 int 5046 sys_extattr_set_file(struct extattr_set_file_args *uap) 5047 { 5048 char attrname[EXTATTR_MAXNAMELEN]; 5049 struct nlookupdata nd; 5050 struct vnode *vp; 5051 struct uio auio; 5052 struct iovec aiov; 5053 int error; 5054 5055 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5056 if (error) 5057 return (error); 5058 5059 vp = NULL; 5060 5061 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5062 if (error == 0) 5063 error = nlookup(&nd); 5064 if (error == 0) 5065 error = ncp_writechk(&nd.nl_nch); 5066 if (error == 0) 5067 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5068 if (error) { 5069 nlookup_done(&nd); 5070 return (error); 5071 } 5072 5073 bzero(&auio, sizeof(auio)); 5074 aiov.iov_base = uap->data; 5075 aiov.iov_len = uap->nbytes; 5076 auio.uio_iov = &aiov; 5077 auio.uio_iovcnt = 1; 5078 auio.uio_offset = 0; 5079 auio.uio_resid = uap->nbytes; 5080 auio.uio_rw = UIO_WRITE; 5081 auio.uio_td = curthread; 5082 5083 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5084 &auio, nd.nl_cred); 5085 5086 vput(vp); 5087 nlookup_done(&nd); 5088 return (error); 5089 } 5090 5091 /* 5092 * Syscall to get a named extended attribute on a file or directory. 5093 */ 5094 int 5095 sys_extattr_get_file(struct extattr_get_file_args *uap) 5096 { 5097 char attrname[EXTATTR_MAXNAMELEN]; 5098 struct nlookupdata nd; 5099 struct uio auio; 5100 struct iovec aiov; 5101 struct vnode *vp; 5102 int error; 5103 5104 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5105 if (error) 5106 return (error); 5107 5108 vp = NULL; 5109 5110 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5111 if (error == 0) 5112 error = nlookup(&nd); 5113 if (error == 0) 5114 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5115 if (error) { 5116 nlookup_done(&nd); 5117 return (error); 5118 } 5119 5120 bzero(&auio, sizeof(auio)); 5121 aiov.iov_base = uap->data; 5122 aiov.iov_len = uap->nbytes; 5123 auio.uio_iov = &aiov; 5124 auio.uio_iovcnt = 1; 5125 auio.uio_offset = 0; 5126 auio.uio_resid = uap->nbytes; 5127 auio.uio_rw = UIO_READ; 5128 auio.uio_td = curthread; 5129 5130 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5131 &auio, nd.nl_cred); 5132 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 5133 5134 vput(vp); 5135 nlookup_done(&nd); 5136 return(error); 5137 } 5138 5139 /* 5140 * Syscall to delete a named extended attribute from a file or directory. 5141 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5142 */ 5143 int 5144 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 5145 { 5146 char attrname[EXTATTR_MAXNAMELEN]; 5147 struct nlookupdata nd; 5148 struct vnode *vp; 5149 int error; 5150 5151 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5152 if (error) 5153 return(error); 5154 5155 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5156 if (error == 0) 5157 error = nlookup(&nd); 5158 if (error == 0) 5159 error = ncp_writechk(&nd.nl_nch); 5160 if (error == 0) { 5161 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5162 if (error == 0) { 5163 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5164 attrname, NULL, nd.nl_cred); 5165 vput(vp); 5166 } 5167 } 5168 nlookup_done(&nd); 5169 return(error); 5170 } 5171 5172 /* 5173 * Determine if the mount is visible to the process. 5174 */ 5175 static int 5176 chroot_visible_mnt(struct mount *mp, struct proc *p) 5177 { 5178 struct nchandle nch; 5179 5180 /* 5181 * Traverse from the mount point upwards. If we hit the process 5182 * root then the mount point is visible to the process. 5183 */ 5184 nch = mp->mnt_ncmountpt; 5185 while (nch.ncp) { 5186 if (nch.mount == p->p_fd->fd_nrdir.mount && 5187 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5188 return(1); 5189 } 5190 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5191 nch = nch.mount->mnt_ncmounton; 5192 } else { 5193 nch.ncp = nch.ncp->nc_parent; 5194 } 5195 } 5196 5197 /* 5198 * If the mount point is not visible to the process, but the 5199 * process root is in a subdirectory of the mount, return 5200 * TRUE anyway. 5201 */ 5202 if (p->p_fd->fd_nrdir.mount == mp) 5203 return(1); 5204 5205 return(0); 5206 } 5207 5208 /* Sets priv to PRIV_ROOT in case no matching fs */ 5209 static int 5210 get_fspriv(const char *fsname) 5211 { 5212 5213 if (strncmp("null", fsname, 5) == 0) { 5214 return PRIV_VFS_MOUNT_NULLFS; 5215 } else if (strncmp(fsname, "tmpfs", 6) == 0) { 5216 return PRIV_VFS_MOUNT_TMPFS; 5217 } 5218 5219 return PRIV_ROOT; 5220 } 5221 5222 int 5223 sys___realpath(struct __realpath_args *uap) 5224 { 5225 struct nlookupdata nd; 5226 char *rbuf; 5227 char *fbuf; 5228 ssize_t rlen; 5229 int error; 5230 5231 /* 5232 * Invalid length if less than 0. 0 is allowed 5233 */ 5234 if ((ssize_t)uap->len < 0) 5235 return EINVAL; 5236 5237 rbuf = NULL; 5238 fbuf = NULL; 5239 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5240 if (error) 5241 goto done; 5242 5243 nd.nl_flags |= NLC_SHAREDLOCK; 5244 error = nlookup(&nd); 5245 if (error) 5246 goto done; 5247 5248 if (nd.nl_nch.ncp->nc_vp == NULL) { 5249 error = ENOENT; 5250 goto done; 5251 } 5252 5253 /* 5254 * Shortcut test for existence. 5255 */ 5256 if (uap->len == 0) { 5257 error = ENAMETOOLONG; 5258 goto done; 5259 } 5260 5261 /* 5262 * Obtain the path relative to the process root. The nch must not 5263 * be locked for the cache_fullpath() call. 5264 */ 5265 if (nd.nl_flags & NLC_NCPISLOCKED) { 5266 nd.nl_flags &= ~NLC_NCPISLOCKED; 5267 cache_unlock(&nd.nl_nch); 5268 } 5269 error = cache_fullpath(curproc, &nd.nl_nch, NULL, &rbuf, &fbuf, 0); 5270 if (error) 5271 goto done; 5272 5273 rlen = (ssize_t)strlen(rbuf); 5274 if (rlen >= uap->len) { 5275 error = ENAMETOOLONG; 5276 goto done; 5277 } 5278 error = copyout(rbuf, uap->buf, rlen + 1); 5279 if (error == 0) 5280 uap->sysmsg_szresult = rlen; 5281 done: 5282 nlookup_done(&nd); 5283 if (fbuf) 5284 kfree(fbuf, M_TEMP); 5285 5286 return error; 5287 } 5288