1 /* $NetBSD: vfs_syscalls.c,v 1.396 2009/07/02 12:53:47 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.396 2009/07/02 12:53:47 pooka Exp $"); 70 71 #ifdef _KERNEL_OPT 72 #include "opt_fileassoc.h" 73 #include "veriexec.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/filedesc.h> 80 #include <sys/kernel.h> 81 #include <sys/file.h> 82 #include <sys/stat.h> 83 #include <sys/vnode.h> 84 #include <sys/mount.h> 85 #include <sys/proc.h> 86 #include <sys/uio.h> 87 #include <sys/kmem.h> 88 #include <sys/dirent.h> 89 #include <sys/sysctl.h> 90 #include <sys/syscallargs.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/ktrace.h> 93 #ifdef FILEASSOC 94 #include <sys/fileassoc.h> 95 #endif /* FILEASSOC */ 96 #include <sys/verified_exec.h> 97 #include <sys/kauth.h> 98 #include <sys/atomic.h> 99 #include <sys/module.h> 100 #include <sys/buf.h> 101 102 #include <miscfs/genfs/genfs.h> 103 #include <miscfs/syncfs/syncfs.h> 104 #include <miscfs/specfs/specdev.h> 105 106 #include <nfs/rpcv2.h> 107 #include <nfs/nfsproto.h> 108 #include <nfs/nfs.h> 109 #include <nfs/nfs_var.h> 110 111 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 112 113 static int change_dir(struct nameidata *, struct lwp *); 114 static int change_flags(struct vnode *, u_long, struct lwp *); 115 static int change_mode(struct vnode *, int, struct lwp *l); 116 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 117 118 void checkdirs(struct vnode *); 119 120 /* 121 * Virtual File System System Calls 122 */ 123 124 /* 125 * Mount a file system. 126 */ 127 128 /* 129 * This table is used to maintain compatibility with 4.3BSD 130 * and NetBSD 0.9 mount syscalls - and possibly other systems. 131 * Note, the order is important! 132 * 133 * Do not modify this table. It should only contain filesystems 134 * supported by NetBSD 0.9 and 4.3BSD. 135 */ 136 const char * const mountcompatnames[] = { 137 NULL, /* 0 = MOUNT_NONE */ 138 MOUNT_FFS, /* 1 = MOUNT_UFS */ 139 MOUNT_NFS, /* 2 */ 140 MOUNT_MFS, /* 3 */ 141 MOUNT_MSDOS, /* 4 */ 142 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 143 MOUNT_FDESC, /* 6 */ 144 MOUNT_KERNFS, /* 7 */ 145 NULL, /* 8 = MOUNT_DEVFS */ 146 MOUNT_AFS, /* 9 */ 147 }; 148 const int nmountcompatnames = sizeof(mountcompatnames) / 149 sizeof(mountcompatnames[0]); 150 151 static int 152 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 153 void *data, size_t *data_len) 154 { 155 struct mount *mp; 156 int error = 0, saved_flags; 157 158 mp = vp->v_mount; 159 saved_flags = mp->mnt_flag; 160 161 /* We can operate only on VV_ROOT nodes. */ 162 if ((vp->v_vflag & VV_ROOT) == 0) { 163 error = EINVAL; 164 goto out; 165 } 166 167 /* 168 * We only allow the filesystem to be reloaded if it 169 * is currently mounted read-only. Additionally, we 170 * prevent read-write to read-only downgrades. 171 */ 172 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 173 (mp->mnt_flag & MNT_RDONLY) == 0) { 174 error = EOPNOTSUPP; /* Needs translation */ 175 goto out; 176 } 177 178 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 179 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 180 if (error) 181 goto out; 182 183 if (vfs_busy(mp, NULL)) { 184 error = EPERM; 185 goto out; 186 } 187 188 mutex_enter(&mp->mnt_updating); 189 190 mp->mnt_flag &= ~MNT_OP_FLAGS; 191 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 192 193 /* 194 * Set the mount level flags. 195 */ 196 if (flags & MNT_RDONLY) 197 mp->mnt_flag |= MNT_RDONLY; 198 else if (mp->mnt_flag & MNT_RDONLY) 199 mp->mnt_iflag |= IMNT_WANTRDWR; 200 mp->mnt_flag &= 201 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 202 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 203 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 204 MNT_LOG); 205 mp->mnt_flag |= flags & 206 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 207 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 208 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 209 MNT_LOG | MNT_IGNORE); 210 211 error = VFS_MOUNT(mp, path, data, data_len); 212 213 if (error && data != NULL) { 214 int error2; 215 216 /* 217 * Update failed; let's try and see if it was an 218 * export request. For compat with 3.0 and earlier. 219 */ 220 error2 = vfs_hooks_reexport(mp, path, data); 221 222 /* 223 * Only update error code if the export request was 224 * understood but some problem occurred while 225 * processing it. 226 */ 227 if (error2 != EJUSTRETURN) 228 error = error2; 229 } 230 231 if (mp->mnt_iflag & IMNT_WANTRDWR) 232 mp->mnt_flag &= ~MNT_RDONLY; 233 if (error) 234 mp->mnt_flag = saved_flags; 235 mp->mnt_flag &= ~MNT_OP_FLAGS; 236 mp->mnt_iflag &= ~IMNT_WANTRDWR; 237 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 238 if (mp->mnt_syncer == NULL) 239 error = vfs_allocate_syncvnode(mp); 240 } else { 241 if (mp->mnt_syncer != NULL) 242 vfs_deallocate_syncvnode(mp); 243 } 244 mutex_exit(&mp->mnt_updating); 245 vfs_unbusy(mp, false, NULL); 246 247 out: 248 return (error); 249 } 250 251 static int 252 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 253 { 254 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 255 int error; 256 257 /* Copy file-system type from userspace. */ 258 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 259 if (error) { 260 /* 261 * Historically, filesystem types were identified by numbers. 262 * If we get an integer for the filesystem type instead of a 263 * string, we check to see if it matches one of the historic 264 * filesystem types. 265 */ 266 u_long fsindex = (u_long)fstype; 267 if (fsindex >= nmountcompatnames || 268 mountcompatnames[fsindex] == NULL) 269 return ENODEV; 270 strlcpy(fstypename, mountcompatnames[fsindex], 271 sizeof(fstypename)); 272 } 273 274 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 275 if (strcmp(fstypename, "ufs") == 0) 276 fstypename[0] = 'f'; 277 278 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 279 return 0; 280 281 /* If we can autoload a vfs module, try again */ 282 mutex_enter(&module_lock); 283 (void)module_autoload(fstype, MODULE_CLASS_VFS); 284 mutex_exit(&module_lock); 285 286 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 287 return 0; 288 289 return ENODEV; 290 } 291 292 static int 293 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 294 const char *path, int flags, void *data, size_t *data_len, u_int recurse) 295 { 296 struct mount *mp; 297 struct vnode *vp = *vpp; 298 struct vattr va; 299 int error; 300 301 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 302 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 303 if (error) 304 return error; 305 306 /* Can't make a non-dir a mount-point (from here anyway). */ 307 if (vp->v_type != VDIR) 308 return ENOTDIR; 309 310 /* 311 * If the user is not root, ensure that they own the directory 312 * onto which we are attempting to mount. 313 */ 314 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 315 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 316 (error = kauth_authorize_generic(l->l_cred, 317 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 318 return error; 319 } 320 321 if (flags & MNT_EXPORTED) 322 return EINVAL; 323 324 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) 325 return error; 326 327 /* 328 * Check if a file-system is not already mounted on this vnode. 329 */ 330 if (vp->v_mountedhere != NULL) 331 return EBUSY; 332 333 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) 334 return ENOMEM; 335 336 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 337 338 /* 339 * The underlying file system may refuse the mount for 340 * various reasons. Allow the user to force it to happen. 341 * 342 * Set the mount level flags. 343 */ 344 mp->mnt_flag = flags & 345 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 346 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 347 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 348 MNT_LOG | MNT_IGNORE | MNT_RDONLY); 349 350 mutex_enter(&mp->mnt_updating); 351 error = VFS_MOUNT(mp, path, data, data_len); 352 mp->mnt_flag &= ~MNT_OP_FLAGS; 353 354 /* 355 * Put the new filesystem on the mount list after root. 356 */ 357 cache_purge(vp); 358 if (error != 0) { 359 vp->v_mountedhere = NULL; 360 mutex_exit(&mp->mnt_updating); 361 vfs_unbusy(mp, false, NULL); 362 vfs_destroy(mp); 363 return error; 364 } 365 366 mp->mnt_iflag &= ~IMNT_WANTRDWR; 367 mutex_enter(&mountlist_lock); 368 vp->v_mountedhere = mp; 369 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 370 mutex_exit(&mountlist_lock); 371 vn_restorerecurse(vp, recurse); 372 VOP_UNLOCK(vp, 0); 373 checkdirs(vp); 374 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 375 error = vfs_allocate_syncvnode(mp); 376 /* Hold an additional reference to the mount across VFS_START(). */ 377 mutex_exit(&mp->mnt_updating); 378 vfs_unbusy(mp, true, NULL); 379 (void) VFS_STATVFS(mp, &mp->mnt_stat); 380 error = VFS_START(mp, 0); 381 if (error) 382 vrele(vp); 383 /* Drop reference held for VFS_START(). */ 384 vfs_destroy(mp); 385 *vpp = NULL; 386 return error; 387 } 388 389 static int 390 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 391 void *data, size_t *data_len) 392 { 393 struct mount *mp; 394 int error; 395 396 /* If MNT_GETARGS is specified, it should be the only flag. */ 397 if (flags & ~MNT_GETARGS) 398 return EINVAL; 399 400 mp = vp->v_mount; 401 402 /* XXX: probably some notion of "can see" here if we want isolation. */ 403 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 404 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 405 if (error) 406 return error; 407 408 if ((vp->v_vflag & VV_ROOT) == 0) 409 return EINVAL; 410 411 if (vfs_busy(mp, NULL)) 412 return EPERM; 413 414 mutex_enter(&mp->mnt_updating); 415 mp->mnt_flag &= ~MNT_OP_FLAGS; 416 mp->mnt_flag |= MNT_GETARGS; 417 error = VFS_MOUNT(mp, path, data, data_len); 418 mp->mnt_flag &= ~MNT_OP_FLAGS; 419 mutex_exit(&mp->mnt_updating); 420 421 vfs_unbusy(mp, false, NULL); 422 return (error); 423 } 424 425 int 426 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 427 { 428 /* { 429 syscallarg(const char *) type; 430 syscallarg(const char *) path; 431 syscallarg(int) flags; 432 syscallarg(void *) data; 433 syscallarg(size_t) data_len; 434 } */ 435 436 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 437 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 438 SCARG(uap, data_len), retval); 439 } 440 441 int 442 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 443 const char *path, int flags, void *data, enum uio_seg data_seg, 444 size_t data_len, register_t *retval) 445 { 446 struct vnode *vp; 447 void *data_buf = data; 448 u_int recurse; 449 int error; 450 451 /* 452 * Get vnode to be covered 453 */ 454 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 455 if (error != 0) 456 return (error); 457 458 /* 459 * A lookup in VFS_MOUNT might result in an attempt to 460 * lock this vnode again, so make the lock recursive. 461 */ 462 if (vfsops == NULL) { 463 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 464 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 465 recurse = vn_setrecurse(vp); 466 vfsops = vp->v_mount->mnt_op; 467 } else { 468 /* 'type' is userspace */ 469 error = mount_get_vfsops(type, &vfsops); 470 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 471 recurse = vn_setrecurse(vp); 472 if (error != 0) 473 goto done; 474 } 475 } else { 476 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 477 recurse = vn_setrecurse(vp); 478 } 479 480 if (data != NULL && data_seg == UIO_USERSPACE) { 481 if (data_len == 0) { 482 /* No length supplied, use default for filesystem */ 483 data_len = vfsops->vfs_min_mount_data; 484 if (data_len > VFS_MAX_MOUNT_DATA) { 485 error = EINVAL; 486 goto done; 487 } 488 /* 489 * Hopefully a longer buffer won't make copyin() fail. 490 * For compatibility with 3.0 and earlier. 491 */ 492 if (flags & MNT_UPDATE 493 && data_len < sizeof (struct mnt_export_args30)) 494 data_len = sizeof (struct mnt_export_args30); 495 } 496 data_buf = kmem_alloc(data_len, KM_SLEEP); 497 498 /* NFS needs the buffer even for mnt_getargs .... */ 499 error = copyin(data, data_buf, data_len); 500 if (error != 0) 501 goto done; 502 } 503 504 if (flags & MNT_GETARGS) { 505 if (data_len == 0) { 506 error = EINVAL; 507 goto done; 508 } 509 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 510 if (error != 0) 511 goto done; 512 if (data_seg == UIO_USERSPACE) 513 error = copyout(data_buf, data, data_len); 514 *retval = data_len; 515 } else if (flags & MNT_UPDATE) { 516 error = mount_update(l, vp, path, flags, data_buf, &data_len); 517 } else { 518 /* Locking is handled internally in mount_domount(). */ 519 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 520 &data_len, recurse); 521 } 522 523 done: 524 if (vp != NULL) { 525 vn_restorerecurse(vp, recurse); 526 vput(vp); 527 } 528 if (data_buf != data) 529 kmem_free(data_buf, data_len); 530 return (error); 531 } 532 533 /* 534 * Scan all active processes to see if any of them have a current 535 * or root directory onto which the new filesystem has just been 536 * mounted. If so, replace them with the new mount point. 537 */ 538 void 539 checkdirs(struct vnode *olddp) 540 { 541 struct cwdinfo *cwdi; 542 struct vnode *newdp, *rele1, *rele2; 543 struct proc *p; 544 bool retry; 545 546 if (olddp->v_usecount == 1) 547 return; 548 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 549 panic("mount: lost mount"); 550 551 do { 552 retry = false; 553 mutex_enter(proc_lock); 554 PROCLIST_FOREACH(p, &allproc) { 555 if ((p->p_flag & PK_MARKER) != 0) 556 continue; 557 if ((cwdi = p->p_cwdi) == NULL) 558 continue; 559 /* 560 * Can't change to the old directory any more, 561 * so even if we see a stale value it's not a 562 * problem. 563 */ 564 if (cwdi->cwdi_cdir != olddp && 565 cwdi->cwdi_rdir != olddp) 566 continue; 567 retry = true; 568 rele1 = NULL; 569 rele2 = NULL; 570 atomic_inc_uint(&cwdi->cwdi_refcnt); 571 mutex_exit(proc_lock); 572 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 573 if (cwdi->cwdi_cdir == olddp) { 574 rele1 = cwdi->cwdi_cdir; 575 VREF(newdp); 576 cwdi->cwdi_cdir = newdp; 577 } 578 if (cwdi->cwdi_rdir == olddp) { 579 rele2 = cwdi->cwdi_rdir; 580 VREF(newdp); 581 cwdi->cwdi_rdir = newdp; 582 } 583 rw_exit(&cwdi->cwdi_lock); 584 cwdfree(cwdi); 585 if (rele1 != NULL) 586 vrele(rele1); 587 if (rele2 != NULL) 588 vrele(rele2); 589 mutex_enter(proc_lock); 590 break; 591 } 592 mutex_exit(proc_lock); 593 } while (retry); 594 595 if (rootvnode == olddp) { 596 vrele(rootvnode); 597 VREF(newdp); 598 rootvnode = newdp; 599 } 600 vput(newdp); 601 } 602 603 /* 604 * Unmount a file system. 605 * 606 * Note: unmount takes a path to the vnode mounted on as argument, 607 * not special file (as before). 608 */ 609 /* ARGSUSED */ 610 int 611 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 612 { 613 /* { 614 syscallarg(const char *) path; 615 syscallarg(int) flags; 616 } */ 617 struct vnode *vp; 618 struct mount *mp; 619 int error; 620 struct nameidata nd; 621 622 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 623 SCARG(uap, path)); 624 if ((error = namei(&nd)) != 0) 625 return (error); 626 vp = nd.ni_vp; 627 mp = vp->v_mount; 628 atomic_inc_uint(&mp->mnt_refcnt); 629 VOP_UNLOCK(vp, 0); 630 631 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 632 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 633 if (error) { 634 vrele(vp); 635 vfs_destroy(mp); 636 return (error); 637 } 638 639 /* 640 * Don't allow unmounting the root file system. 641 */ 642 if (mp->mnt_flag & MNT_ROOTFS) { 643 vrele(vp); 644 vfs_destroy(mp); 645 return (EINVAL); 646 } 647 648 /* 649 * Must be the root of the filesystem 650 */ 651 if ((vp->v_vflag & VV_ROOT) == 0) { 652 vrele(vp); 653 vfs_destroy(mp); 654 return (EINVAL); 655 } 656 657 vrele(vp); 658 error = dounmount(mp, SCARG(uap, flags), l); 659 vfs_destroy(mp); 660 return error; 661 } 662 663 /* 664 * Do the actual file system unmount. File system is assumed to have 665 * been locked by the caller. 666 * 667 * => Caller hold reference to the mount, explicitly for dounmount(). 668 */ 669 int 670 dounmount(struct mount *mp, int flags, struct lwp *l) 671 { 672 struct vnode *coveredvp; 673 int error; 674 int async; 675 int used_syncer; 676 677 #if NVERIEXEC > 0 678 error = veriexec_unmountchk(mp); 679 if (error) 680 return (error); 681 #endif /* NVERIEXEC > 0 */ 682 683 /* 684 * XXX Freeze syncer. Must do this before locking the 685 * mount point. See dounmount() for details. 686 */ 687 mutex_enter(&syncer_mutex); 688 rw_enter(&mp->mnt_unmounting, RW_WRITER); 689 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 690 rw_exit(&mp->mnt_unmounting); 691 mutex_exit(&syncer_mutex); 692 return ENOENT; 693 } 694 695 used_syncer = (mp->mnt_syncer != NULL); 696 697 /* 698 * XXX Syncer must be frozen when we get here. This should really 699 * be done on a per-mountpoint basis, but the syncer doesn't work 700 * like that. 701 * 702 * The caller of dounmount() must acquire syncer_mutex because 703 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 704 * order, and we must preserve that order to avoid deadlock. 705 * 706 * So, if the file system did not use the syncer, now is 707 * the time to release the syncer_mutex. 708 */ 709 if (used_syncer == 0) 710 mutex_exit(&syncer_mutex); 711 712 mp->mnt_iflag |= IMNT_UNMOUNT; 713 async = mp->mnt_flag & MNT_ASYNC; 714 mp->mnt_flag &= ~MNT_ASYNC; 715 cache_purgevfs(mp); /* remove cache entries for this file sys */ 716 if (mp->mnt_syncer != NULL) 717 vfs_deallocate_syncvnode(mp); 718 error = 0; 719 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 720 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 721 } 722 vfs_scrubvnlist(mp); 723 if (error == 0 || (flags & MNT_FORCE)) 724 error = VFS_UNMOUNT(mp, flags); 725 if (error) { 726 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 727 (void) vfs_allocate_syncvnode(mp); 728 mp->mnt_iflag &= ~IMNT_UNMOUNT; 729 mp->mnt_flag |= async; 730 rw_exit(&mp->mnt_unmounting); 731 if (used_syncer) 732 mutex_exit(&syncer_mutex); 733 return (error); 734 } 735 vfs_scrubvnlist(mp); 736 mutex_enter(&mountlist_lock); 737 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 738 coveredvp->v_mountedhere = NULL; 739 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 740 mp->mnt_iflag |= IMNT_GONE; 741 mutex_exit(&mountlist_lock); 742 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 743 panic("unmount: dangling vnode"); 744 if (used_syncer) 745 mutex_exit(&syncer_mutex); 746 vfs_hooks_unmount(mp); 747 rw_exit(&mp->mnt_unmounting); 748 vfs_destroy(mp); /* reference from mount() */ 749 if (coveredvp != NULLVP) 750 vrele(coveredvp); 751 return (0); 752 } 753 754 /* 755 * Sync each mounted filesystem. 756 */ 757 #ifdef DEBUG 758 int syncprt = 0; 759 struct ctldebug debug0 = { "syncprt", &syncprt }; 760 #endif 761 762 /* ARGSUSED */ 763 int 764 sys_sync(struct lwp *l, const void *v, register_t *retval) 765 { 766 struct mount *mp, *nmp; 767 int asyncflag; 768 769 if (l == NULL) 770 l = &lwp0; 771 772 mutex_enter(&mountlist_lock); 773 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 774 mp = nmp) { 775 if (vfs_busy(mp, &nmp)) { 776 continue; 777 } 778 mutex_enter(&mp->mnt_updating); 779 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 780 asyncflag = mp->mnt_flag & MNT_ASYNC; 781 mp->mnt_flag &= ~MNT_ASYNC; 782 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 783 if (asyncflag) 784 mp->mnt_flag |= MNT_ASYNC; 785 } 786 mutex_exit(&mp->mnt_updating); 787 vfs_unbusy(mp, false, &nmp); 788 } 789 mutex_exit(&mountlist_lock); 790 #ifdef DEBUG 791 if (syncprt) 792 vfs_bufstats(); 793 #endif /* DEBUG */ 794 return (0); 795 } 796 797 /* 798 * Change filesystem quotas. 799 */ 800 /* ARGSUSED */ 801 int 802 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 803 { 804 /* { 805 syscallarg(const char *) path; 806 syscallarg(int) cmd; 807 syscallarg(int) uid; 808 syscallarg(void *) arg; 809 } */ 810 struct mount *mp; 811 int error; 812 struct vnode *vp; 813 814 error = namei_simple_user(SCARG(uap, path), 815 NSM_FOLLOW_TRYEMULROOT, &vp); 816 if (error != 0) 817 return (error); 818 mp = vp->v_mount; 819 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 820 SCARG(uap, arg)); 821 vrele(vp); 822 return (error); 823 } 824 825 int 826 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 827 int root) 828 { 829 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 830 int error = 0; 831 832 /* 833 * If MNT_NOWAIT or MNT_LAZY is specified, do not 834 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 835 * overrides MNT_NOWAIT. 836 */ 837 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 838 (flags != MNT_WAIT && flags != 0)) { 839 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 840 goto done; 841 } 842 843 /* Get the filesystem stats now */ 844 memset(sp, 0, sizeof(*sp)); 845 if ((error = VFS_STATVFS(mp, sp)) != 0) { 846 return error; 847 } 848 849 if (cwdi->cwdi_rdir == NULL) 850 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 851 done: 852 if (cwdi->cwdi_rdir != NULL) { 853 size_t len; 854 char *bp; 855 char c; 856 char *path = PNBUF_GET(); 857 858 bp = path + MAXPATHLEN; 859 *--bp = '\0'; 860 rw_enter(&cwdi->cwdi_lock, RW_READER); 861 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 862 MAXPATHLEN / 2, 0, l); 863 rw_exit(&cwdi->cwdi_lock); 864 if (error) { 865 PNBUF_PUT(path); 866 return error; 867 } 868 len = strlen(bp); 869 if (len != 1) { 870 /* 871 * for mount points that are below our root, we can see 872 * them, so we fix up the pathname and return them. The 873 * rest we cannot see, so we don't allow viewing the 874 * data. 875 */ 876 if (strncmp(bp, sp->f_mntonname, len) == 0 && 877 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 878 (void)strlcpy(sp->f_mntonname, 879 c == '\0' ? "/" : &sp->f_mntonname[len], 880 sizeof(sp->f_mntonname)); 881 } else { 882 if (root) 883 (void)strlcpy(sp->f_mntonname, "/", 884 sizeof(sp->f_mntonname)); 885 else 886 error = EPERM; 887 } 888 } 889 PNBUF_PUT(path); 890 } 891 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 892 return error; 893 } 894 895 /* 896 * Get filesystem statistics by path. 897 */ 898 int 899 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 900 { 901 struct mount *mp; 902 int error; 903 struct vnode *vp; 904 905 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 906 if (error != 0) 907 return error; 908 mp = vp->v_mount; 909 error = dostatvfs(mp, sb, l, flags, 1); 910 vrele(vp); 911 return error; 912 } 913 914 /* ARGSUSED */ 915 int 916 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 917 { 918 /* { 919 syscallarg(const char *) path; 920 syscallarg(struct statvfs *) buf; 921 syscallarg(int) flags; 922 } */ 923 struct statvfs *sb; 924 int error; 925 926 sb = STATVFSBUF_GET(); 927 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 928 if (error == 0) 929 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 930 STATVFSBUF_PUT(sb); 931 return error; 932 } 933 934 /* 935 * Get filesystem statistics by fd. 936 */ 937 int 938 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 939 { 940 file_t *fp; 941 struct mount *mp; 942 int error; 943 944 /* fd_getvnode() will use the descriptor for us */ 945 if ((error = fd_getvnode(fd, &fp)) != 0) 946 return (error); 947 mp = ((struct vnode *)fp->f_data)->v_mount; 948 error = dostatvfs(mp, sb, curlwp, flags, 1); 949 fd_putfile(fd); 950 return error; 951 } 952 953 /* ARGSUSED */ 954 int 955 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 956 { 957 /* { 958 syscallarg(int) fd; 959 syscallarg(struct statvfs *) buf; 960 syscallarg(int) flags; 961 } */ 962 struct statvfs *sb; 963 int error; 964 965 sb = STATVFSBUF_GET(); 966 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 967 if (error == 0) 968 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 969 STATVFSBUF_PUT(sb); 970 return error; 971 } 972 973 974 /* 975 * Get statistics on all filesystems. 976 */ 977 int 978 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 979 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 980 register_t *retval) 981 { 982 int root = 0; 983 struct proc *p = l->l_proc; 984 struct mount *mp, *nmp; 985 struct statvfs *sb; 986 size_t count, maxcount; 987 int error = 0; 988 989 sb = STATVFSBUF_GET(); 990 maxcount = bufsize / entry_sz; 991 mutex_enter(&mountlist_lock); 992 count = 0; 993 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 994 mp = nmp) { 995 if (vfs_busy(mp, &nmp)) { 996 continue; 997 } 998 if (sfsp && count < maxcount) { 999 error = dostatvfs(mp, sb, l, flags, 0); 1000 if (error) { 1001 vfs_unbusy(mp, false, &nmp); 1002 error = 0; 1003 continue; 1004 } 1005 error = copyfn(sb, sfsp, entry_sz); 1006 if (error) { 1007 vfs_unbusy(mp, false, NULL); 1008 goto out; 1009 } 1010 sfsp = (char *)sfsp + entry_sz; 1011 root |= strcmp(sb->f_mntonname, "/") == 0; 1012 } 1013 count++; 1014 vfs_unbusy(mp, false, &nmp); 1015 } 1016 mutex_exit(&mountlist_lock); 1017 1018 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1019 /* 1020 * fake a root entry 1021 */ 1022 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1023 sb, l, flags, 1); 1024 if (error != 0) 1025 goto out; 1026 if (sfsp) { 1027 error = copyfn(sb, sfsp, entry_sz); 1028 if (error != 0) 1029 goto out; 1030 } 1031 count++; 1032 } 1033 if (sfsp && count > maxcount) 1034 *retval = maxcount; 1035 else 1036 *retval = count; 1037 out: 1038 STATVFSBUF_PUT(sb); 1039 return error; 1040 } 1041 1042 int 1043 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1044 { 1045 /* { 1046 syscallarg(struct statvfs *) buf; 1047 syscallarg(size_t) bufsize; 1048 syscallarg(int) flags; 1049 } */ 1050 1051 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1052 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1053 } 1054 1055 /* 1056 * Change current working directory to a given file descriptor. 1057 */ 1058 /* ARGSUSED */ 1059 int 1060 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1061 { 1062 /* { 1063 syscallarg(int) fd; 1064 } */ 1065 struct proc *p = l->l_proc; 1066 struct cwdinfo *cwdi; 1067 struct vnode *vp, *tdp; 1068 struct mount *mp; 1069 file_t *fp; 1070 int error, fd; 1071 1072 /* fd_getvnode() will use the descriptor for us */ 1073 fd = SCARG(uap, fd); 1074 if ((error = fd_getvnode(fd, &fp)) != 0) 1075 return (error); 1076 vp = fp->f_data; 1077 1078 VREF(vp); 1079 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1080 if (vp->v_type != VDIR) 1081 error = ENOTDIR; 1082 else 1083 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1084 if (error) { 1085 vput(vp); 1086 goto out; 1087 } 1088 while ((mp = vp->v_mountedhere) != NULL) { 1089 error = vfs_busy(mp, NULL); 1090 vput(vp); 1091 if (error != 0) 1092 goto out; 1093 error = VFS_ROOT(mp, &tdp); 1094 vfs_unbusy(mp, false, NULL); 1095 if (error) 1096 goto out; 1097 vp = tdp; 1098 } 1099 VOP_UNLOCK(vp, 0); 1100 1101 /* 1102 * Disallow changing to a directory not under the process's 1103 * current root directory (if there is one). 1104 */ 1105 cwdi = p->p_cwdi; 1106 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1107 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1108 vrele(vp); 1109 error = EPERM; /* operation not permitted */ 1110 } else { 1111 vrele(cwdi->cwdi_cdir); 1112 cwdi->cwdi_cdir = vp; 1113 } 1114 rw_exit(&cwdi->cwdi_lock); 1115 1116 out: 1117 fd_putfile(fd); 1118 return (error); 1119 } 1120 1121 /* 1122 * Change this process's notion of the root directory to a given file 1123 * descriptor. 1124 */ 1125 int 1126 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1127 { 1128 struct proc *p = l->l_proc; 1129 struct cwdinfo *cwdi; 1130 struct vnode *vp; 1131 file_t *fp; 1132 int error, fd = SCARG(uap, fd); 1133 1134 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1135 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1136 return error; 1137 /* fd_getvnode() will use the descriptor for us */ 1138 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 1139 return error; 1140 vp = fp->f_data; 1141 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1142 if (vp->v_type != VDIR) 1143 error = ENOTDIR; 1144 else 1145 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1146 VOP_UNLOCK(vp, 0); 1147 if (error) 1148 goto out; 1149 VREF(vp); 1150 1151 /* 1152 * Prevent escaping from chroot by putting the root under 1153 * the working directory. Silently chdir to / if we aren't 1154 * already there. 1155 */ 1156 cwdi = p->p_cwdi; 1157 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1158 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1159 /* 1160 * XXX would be more failsafe to change directory to a 1161 * deadfs node here instead 1162 */ 1163 vrele(cwdi->cwdi_cdir); 1164 VREF(vp); 1165 cwdi->cwdi_cdir = vp; 1166 } 1167 1168 if (cwdi->cwdi_rdir != NULL) 1169 vrele(cwdi->cwdi_rdir); 1170 cwdi->cwdi_rdir = vp; 1171 rw_exit(&cwdi->cwdi_lock); 1172 1173 out: 1174 fd_putfile(fd); 1175 return (error); 1176 } 1177 1178 /* 1179 * Change current working directory (``.''). 1180 */ 1181 /* ARGSUSED */ 1182 int 1183 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1184 { 1185 /* { 1186 syscallarg(const char *) path; 1187 } */ 1188 struct proc *p = l->l_proc; 1189 struct cwdinfo *cwdi; 1190 int error; 1191 struct nameidata nd; 1192 1193 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1194 SCARG(uap, path)); 1195 if ((error = change_dir(&nd, l)) != 0) 1196 return (error); 1197 cwdi = p->p_cwdi; 1198 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1199 vrele(cwdi->cwdi_cdir); 1200 cwdi->cwdi_cdir = nd.ni_vp; 1201 rw_exit(&cwdi->cwdi_lock); 1202 return (0); 1203 } 1204 1205 /* 1206 * Change notion of root (``/'') directory. 1207 */ 1208 /* ARGSUSED */ 1209 int 1210 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1211 { 1212 /* { 1213 syscallarg(const char *) path; 1214 } */ 1215 struct proc *p = l->l_proc; 1216 struct cwdinfo *cwdi; 1217 struct vnode *vp; 1218 int error; 1219 struct nameidata nd; 1220 1221 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1222 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1223 return (error); 1224 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1225 SCARG(uap, path)); 1226 if ((error = change_dir(&nd, l)) != 0) 1227 return (error); 1228 1229 cwdi = p->p_cwdi; 1230 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1231 if (cwdi->cwdi_rdir != NULL) 1232 vrele(cwdi->cwdi_rdir); 1233 vp = nd.ni_vp; 1234 cwdi->cwdi_rdir = vp; 1235 1236 /* 1237 * Prevent escaping from chroot by putting the root under 1238 * the working directory. Silently chdir to / if we aren't 1239 * already there. 1240 */ 1241 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1242 /* 1243 * XXX would be more failsafe to change directory to a 1244 * deadfs node here instead 1245 */ 1246 vrele(cwdi->cwdi_cdir); 1247 VREF(vp); 1248 cwdi->cwdi_cdir = vp; 1249 } 1250 rw_exit(&cwdi->cwdi_lock); 1251 1252 return (0); 1253 } 1254 1255 /* 1256 * Common routine for chroot and chdir. 1257 */ 1258 static int 1259 change_dir(struct nameidata *ndp, struct lwp *l) 1260 { 1261 struct vnode *vp; 1262 int error; 1263 1264 if ((error = namei(ndp)) != 0) 1265 return (error); 1266 vp = ndp->ni_vp; 1267 if (vp->v_type != VDIR) 1268 error = ENOTDIR; 1269 else 1270 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1271 1272 if (error) 1273 vput(vp); 1274 else 1275 VOP_UNLOCK(vp, 0); 1276 return (error); 1277 } 1278 1279 /* 1280 * Check permissions, allocate an open file structure, 1281 * and call the device open routine if any. 1282 */ 1283 int 1284 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1285 { 1286 /* { 1287 syscallarg(const char *) path; 1288 syscallarg(int) flags; 1289 syscallarg(int) mode; 1290 } */ 1291 struct proc *p = l->l_proc; 1292 struct cwdinfo *cwdi = p->p_cwdi; 1293 file_t *fp; 1294 struct vnode *vp; 1295 int flags, cmode; 1296 int type, indx, error; 1297 struct flock lf; 1298 struct nameidata nd; 1299 1300 flags = FFLAGS(SCARG(uap, flags)); 1301 if ((flags & (FREAD | FWRITE)) == 0) 1302 return (EINVAL); 1303 if ((error = fd_allocfile(&fp, &indx)) != 0) 1304 return (error); 1305 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1306 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1307 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1308 SCARG(uap, path)); 1309 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1310 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1311 fd_abort(p, fp, indx); 1312 if ((error == EDUPFD || error == EMOVEFD) && 1313 l->l_dupfd >= 0 && /* XXX from fdopen */ 1314 (error = 1315 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1316 *retval = indx; 1317 return (0); 1318 } 1319 if (error == ERESTART) 1320 error = EINTR; 1321 return (error); 1322 } 1323 1324 l->l_dupfd = 0; 1325 vp = nd.ni_vp; 1326 fp->f_flag = flags & FMASK; 1327 fp->f_type = DTYPE_VNODE; 1328 fp->f_ops = &vnops; 1329 fp->f_data = vp; 1330 if (flags & (O_EXLOCK | O_SHLOCK)) { 1331 lf.l_whence = SEEK_SET; 1332 lf.l_start = 0; 1333 lf.l_len = 0; 1334 if (flags & O_EXLOCK) 1335 lf.l_type = F_WRLCK; 1336 else 1337 lf.l_type = F_RDLCK; 1338 type = F_FLOCK; 1339 if ((flags & FNONBLOCK) == 0) 1340 type |= F_WAIT; 1341 VOP_UNLOCK(vp, 0); 1342 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1343 if (error) { 1344 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1345 fd_abort(p, fp, indx); 1346 return (error); 1347 } 1348 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1349 atomic_or_uint(&fp->f_flag, FHASLOCK); 1350 } 1351 VOP_UNLOCK(vp, 0); 1352 *retval = indx; 1353 fd_affix(p, fp, indx); 1354 return (0); 1355 } 1356 1357 static void 1358 vfs__fhfree(fhandle_t *fhp) 1359 { 1360 size_t fhsize; 1361 1362 if (fhp == NULL) { 1363 return; 1364 } 1365 fhsize = FHANDLE_SIZE(fhp); 1366 kmem_free(fhp, fhsize); 1367 } 1368 1369 /* 1370 * vfs_composefh: compose a filehandle. 1371 */ 1372 1373 int 1374 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1375 { 1376 struct mount *mp; 1377 struct fid *fidp; 1378 int error; 1379 size_t needfhsize; 1380 size_t fidsize; 1381 1382 mp = vp->v_mount; 1383 fidp = NULL; 1384 if (*fh_size < FHANDLE_SIZE_MIN) { 1385 fidsize = 0; 1386 } else { 1387 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1388 if (fhp != NULL) { 1389 memset(fhp, 0, *fh_size); 1390 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1391 fidp = &fhp->fh_fid; 1392 } 1393 } 1394 error = VFS_VPTOFH(vp, fidp, &fidsize); 1395 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1396 if (error == 0 && *fh_size < needfhsize) { 1397 error = E2BIG; 1398 } 1399 *fh_size = needfhsize; 1400 return error; 1401 } 1402 1403 int 1404 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1405 { 1406 struct mount *mp; 1407 fhandle_t *fhp; 1408 size_t fhsize; 1409 size_t fidsize; 1410 int error; 1411 1412 *fhpp = NULL; 1413 mp = vp->v_mount; 1414 fidsize = 0; 1415 error = VFS_VPTOFH(vp, NULL, &fidsize); 1416 KASSERT(error != 0); 1417 if (error != E2BIG) { 1418 goto out; 1419 } 1420 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1421 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1422 if (fhp == NULL) { 1423 error = ENOMEM; 1424 goto out; 1425 } 1426 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1427 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1428 if (error == 0) { 1429 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1430 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1431 *fhpp = fhp; 1432 } else { 1433 kmem_free(fhp, fhsize); 1434 } 1435 out: 1436 return error; 1437 } 1438 1439 void 1440 vfs_composefh_free(fhandle_t *fhp) 1441 { 1442 1443 vfs__fhfree(fhp); 1444 } 1445 1446 /* 1447 * vfs_fhtovp: lookup a vnode by a filehandle. 1448 */ 1449 1450 int 1451 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1452 { 1453 struct mount *mp; 1454 int error; 1455 1456 *vpp = NULL; 1457 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1458 if (mp == NULL) { 1459 error = ESTALE; 1460 goto out; 1461 } 1462 if (mp->mnt_op->vfs_fhtovp == NULL) { 1463 error = EOPNOTSUPP; 1464 goto out; 1465 } 1466 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1467 out: 1468 return error; 1469 } 1470 1471 /* 1472 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1473 * the needed size. 1474 */ 1475 1476 int 1477 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1478 { 1479 fhandle_t *fhp; 1480 int error; 1481 1482 *fhpp = NULL; 1483 if (fhsize > FHANDLE_SIZE_MAX) { 1484 return EINVAL; 1485 } 1486 if (fhsize < FHANDLE_SIZE_MIN) { 1487 return EINVAL; 1488 } 1489 again: 1490 fhp = kmem_alloc(fhsize, KM_SLEEP); 1491 if (fhp == NULL) { 1492 return ENOMEM; 1493 } 1494 error = copyin(ufhp, fhp, fhsize); 1495 if (error == 0) { 1496 /* XXX this check shouldn't be here */ 1497 if (FHANDLE_SIZE(fhp) == fhsize) { 1498 *fhpp = fhp; 1499 return 0; 1500 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1501 /* 1502 * a kludge for nfsv2 padded handles. 1503 */ 1504 size_t sz; 1505 1506 sz = FHANDLE_SIZE(fhp); 1507 kmem_free(fhp, fhsize); 1508 fhsize = sz; 1509 goto again; 1510 } else { 1511 /* 1512 * userland told us wrong size. 1513 */ 1514 error = EINVAL; 1515 } 1516 } 1517 kmem_free(fhp, fhsize); 1518 return error; 1519 } 1520 1521 void 1522 vfs_copyinfh_free(fhandle_t *fhp) 1523 { 1524 1525 vfs__fhfree(fhp); 1526 } 1527 1528 /* 1529 * Get file handle system call 1530 */ 1531 int 1532 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1533 { 1534 /* { 1535 syscallarg(char *) fname; 1536 syscallarg(fhandle_t *) fhp; 1537 syscallarg(size_t *) fh_size; 1538 } */ 1539 struct vnode *vp; 1540 fhandle_t *fh; 1541 int error; 1542 struct nameidata nd; 1543 size_t sz; 1544 size_t usz; 1545 1546 /* 1547 * Must be super user 1548 */ 1549 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1550 0, NULL, NULL, NULL); 1551 if (error) 1552 return (error); 1553 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1554 SCARG(uap, fname)); 1555 error = namei(&nd); 1556 if (error) 1557 return (error); 1558 vp = nd.ni_vp; 1559 error = vfs_composefh_alloc(vp, &fh); 1560 vput(vp); 1561 if (error != 0) { 1562 goto out; 1563 } 1564 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1565 if (error != 0) { 1566 goto out; 1567 } 1568 sz = FHANDLE_SIZE(fh); 1569 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1570 if (error != 0) { 1571 goto out; 1572 } 1573 if (usz >= sz) { 1574 error = copyout(fh, SCARG(uap, fhp), sz); 1575 } else { 1576 error = E2BIG; 1577 } 1578 out: 1579 vfs_composefh_free(fh); 1580 return (error); 1581 } 1582 1583 /* 1584 * Open a file given a file handle. 1585 * 1586 * Check permissions, allocate an open file structure, 1587 * and call the device open routine if any. 1588 */ 1589 1590 int 1591 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1592 register_t *retval) 1593 { 1594 file_t *fp; 1595 struct vnode *vp = NULL; 1596 kauth_cred_t cred = l->l_cred; 1597 file_t *nfp; 1598 int type, indx, error=0; 1599 struct flock lf; 1600 struct vattr va; 1601 fhandle_t *fh; 1602 int flags; 1603 proc_t *p; 1604 1605 p = curproc; 1606 1607 /* 1608 * Must be super user 1609 */ 1610 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1611 0, NULL, NULL, NULL))) 1612 return (error); 1613 1614 flags = FFLAGS(oflags); 1615 if ((flags & (FREAD | FWRITE)) == 0) 1616 return (EINVAL); 1617 if ((flags & O_CREAT)) 1618 return (EINVAL); 1619 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1620 return (error); 1621 fp = nfp; 1622 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1623 if (error != 0) { 1624 goto bad; 1625 } 1626 error = vfs_fhtovp(fh, &vp); 1627 if (error != 0) { 1628 goto bad; 1629 } 1630 1631 /* Now do an effective vn_open */ 1632 1633 if (vp->v_type == VSOCK) { 1634 error = EOPNOTSUPP; 1635 goto bad; 1636 } 1637 error = vn_openchk(vp, cred, flags); 1638 if (error != 0) 1639 goto bad; 1640 if (flags & O_TRUNC) { 1641 VOP_UNLOCK(vp, 0); /* XXX */ 1642 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1643 VATTR_NULL(&va); 1644 va.va_size = 0; 1645 error = VOP_SETATTR(vp, &va, cred); 1646 if (error) 1647 goto bad; 1648 } 1649 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1650 goto bad; 1651 if (flags & FWRITE) { 1652 mutex_enter(&vp->v_interlock); 1653 vp->v_writecount++; 1654 mutex_exit(&vp->v_interlock); 1655 } 1656 1657 /* done with modified vn_open, now finish what sys_open does. */ 1658 1659 fp->f_flag = flags & FMASK; 1660 fp->f_type = DTYPE_VNODE; 1661 fp->f_ops = &vnops; 1662 fp->f_data = vp; 1663 if (flags & (O_EXLOCK | O_SHLOCK)) { 1664 lf.l_whence = SEEK_SET; 1665 lf.l_start = 0; 1666 lf.l_len = 0; 1667 if (flags & O_EXLOCK) 1668 lf.l_type = F_WRLCK; 1669 else 1670 lf.l_type = F_RDLCK; 1671 type = F_FLOCK; 1672 if ((flags & FNONBLOCK) == 0) 1673 type |= F_WAIT; 1674 VOP_UNLOCK(vp, 0); 1675 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1676 if (error) { 1677 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1678 fd_abort(p, fp, indx); 1679 return (error); 1680 } 1681 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1682 atomic_or_uint(&fp->f_flag, FHASLOCK); 1683 } 1684 VOP_UNLOCK(vp, 0); 1685 *retval = indx; 1686 fd_affix(p, fp, indx); 1687 vfs_copyinfh_free(fh); 1688 return (0); 1689 1690 bad: 1691 fd_abort(p, fp, indx); 1692 if (vp != NULL) 1693 vput(vp); 1694 vfs_copyinfh_free(fh); 1695 return (error); 1696 } 1697 1698 int 1699 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1700 { 1701 /* { 1702 syscallarg(const void *) fhp; 1703 syscallarg(size_t) fh_size; 1704 syscallarg(int) flags; 1705 } */ 1706 1707 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1708 SCARG(uap, flags), retval); 1709 } 1710 1711 int 1712 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1713 { 1714 int error; 1715 fhandle_t *fh; 1716 struct vnode *vp; 1717 1718 /* 1719 * Must be super user 1720 */ 1721 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1722 0, NULL, NULL, NULL))) 1723 return (error); 1724 1725 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1726 if (error != 0) 1727 return error; 1728 1729 error = vfs_fhtovp(fh, &vp); 1730 vfs_copyinfh_free(fh); 1731 if (error != 0) 1732 return error; 1733 1734 error = vn_stat(vp, sb); 1735 vput(vp); 1736 return error; 1737 } 1738 1739 1740 /* ARGSUSED */ 1741 int 1742 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 1743 { 1744 /* { 1745 syscallarg(const void *) fhp; 1746 syscallarg(size_t) fh_size; 1747 syscallarg(struct stat *) sb; 1748 } */ 1749 struct stat sb; 1750 int error; 1751 1752 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1753 if (error) 1754 return error; 1755 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1756 } 1757 1758 int 1759 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1760 int flags) 1761 { 1762 fhandle_t *fh; 1763 struct mount *mp; 1764 struct vnode *vp; 1765 int error; 1766 1767 /* 1768 * Must be super user 1769 */ 1770 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1771 0, NULL, NULL, NULL))) 1772 return error; 1773 1774 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1775 if (error != 0) 1776 return error; 1777 1778 error = vfs_fhtovp(fh, &vp); 1779 vfs_copyinfh_free(fh); 1780 if (error != 0) 1781 return error; 1782 1783 mp = vp->v_mount; 1784 error = dostatvfs(mp, sb, l, flags, 1); 1785 vput(vp); 1786 return error; 1787 } 1788 1789 /* ARGSUSED */ 1790 int 1791 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1792 { 1793 /* { 1794 syscallarg(const void *) fhp; 1795 syscallarg(size_t) fh_size; 1796 syscallarg(struct statvfs *) buf; 1797 syscallarg(int) flags; 1798 } */ 1799 struct statvfs *sb = STATVFSBUF_GET(); 1800 int error; 1801 1802 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1803 SCARG(uap, flags)); 1804 if (error == 0) 1805 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1806 STATVFSBUF_PUT(sb); 1807 return error; 1808 } 1809 1810 /* 1811 * Create a special file. 1812 */ 1813 /* ARGSUSED */ 1814 int 1815 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 1816 register_t *retval) 1817 { 1818 /* { 1819 syscallarg(const char *) path; 1820 syscallarg(mode_t) mode; 1821 syscallarg(dev_t) dev; 1822 } */ 1823 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 1824 SCARG(uap, dev), retval); 1825 } 1826 1827 int 1828 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 1829 register_t *retval) 1830 { 1831 struct proc *p = l->l_proc; 1832 struct vnode *vp; 1833 struct vattr vattr; 1834 int error, optype; 1835 struct nameidata nd; 1836 char *path; 1837 const char *cpath; 1838 enum uio_seg seg = UIO_USERSPACE; 1839 1840 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1841 0, NULL, NULL, NULL)) != 0) 1842 return (error); 1843 1844 optype = VOP_MKNOD_DESCOFFSET; 1845 1846 VERIEXEC_PATH_GET(pathname, seg, cpath, path); 1847 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1848 1849 if ((error = namei(&nd)) != 0) 1850 goto out; 1851 vp = nd.ni_vp; 1852 if (vp != NULL) 1853 error = EEXIST; 1854 else { 1855 VATTR_NULL(&vattr); 1856 /* We will read cwdi->cwdi_cmask unlocked. */ 1857 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1858 vattr.va_rdev = dev; 1859 1860 switch (mode & S_IFMT) { 1861 case S_IFMT: /* used by badsect to flag bad sectors */ 1862 vattr.va_type = VBAD; 1863 break; 1864 case S_IFCHR: 1865 vattr.va_type = VCHR; 1866 break; 1867 case S_IFBLK: 1868 vattr.va_type = VBLK; 1869 break; 1870 case S_IFWHT: 1871 optype = VOP_WHITEOUT_DESCOFFSET; 1872 break; 1873 case S_IFREG: 1874 #if NVERIEXEC > 0 1875 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1876 O_CREAT); 1877 #endif /* NVERIEXEC > 0 */ 1878 vattr.va_type = VREG; 1879 vattr.va_rdev = VNOVAL; 1880 optype = VOP_CREATE_DESCOFFSET; 1881 break; 1882 default: 1883 error = EINVAL; 1884 break; 1885 } 1886 } 1887 if (!error) { 1888 switch (optype) { 1889 case VOP_WHITEOUT_DESCOFFSET: 1890 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1891 if (error) 1892 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1893 vput(nd.ni_dvp); 1894 break; 1895 1896 case VOP_MKNOD_DESCOFFSET: 1897 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1898 &nd.ni_cnd, &vattr); 1899 if (error == 0) 1900 vput(nd.ni_vp); 1901 break; 1902 1903 case VOP_CREATE_DESCOFFSET: 1904 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1905 &nd.ni_cnd, &vattr); 1906 if (error == 0) 1907 vput(nd.ni_vp); 1908 break; 1909 } 1910 } else { 1911 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1912 if (nd.ni_dvp == vp) 1913 vrele(nd.ni_dvp); 1914 else 1915 vput(nd.ni_dvp); 1916 if (vp) 1917 vrele(vp); 1918 } 1919 out: 1920 VERIEXEC_PATH_PUT(path); 1921 return (error); 1922 } 1923 1924 /* 1925 * Create a named pipe. 1926 */ 1927 /* ARGSUSED */ 1928 int 1929 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1930 { 1931 /* { 1932 syscallarg(const char *) path; 1933 syscallarg(int) mode; 1934 } */ 1935 struct proc *p = l->l_proc; 1936 struct vattr vattr; 1937 int error; 1938 struct nameidata nd; 1939 1940 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1941 SCARG(uap, path)); 1942 if ((error = namei(&nd)) != 0) 1943 return (error); 1944 if (nd.ni_vp != NULL) { 1945 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1946 if (nd.ni_dvp == nd.ni_vp) 1947 vrele(nd.ni_dvp); 1948 else 1949 vput(nd.ni_dvp); 1950 vrele(nd.ni_vp); 1951 return (EEXIST); 1952 } 1953 VATTR_NULL(&vattr); 1954 vattr.va_type = VFIFO; 1955 /* We will read cwdi->cwdi_cmask unlocked. */ 1956 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1957 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1958 if (error == 0) 1959 vput(nd.ni_vp); 1960 return (error); 1961 } 1962 1963 /* 1964 * Make a hard file link. 1965 */ 1966 /* ARGSUSED */ 1967 int 1968 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 1969 { 1970 /* { 1971 syscallarg(const char *) path; 1972 syscallarg(const char *) link; 1973 } */ 1974 struct vnode *vp; 1975 struct nameidata nd; 1976 int error; 1977 1978 error = namei_simple_user(SCARG(uap, path), 1979 NSM_FOLLOW_TRYEMULROOT, &vp); 1980 if (error != 0) 1981 return (error); 1982 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1983 SCARG(uap, link)); 1984 if ((error = namei(&nd)) != 0) 1985 goto out; 1986 if (nd.ni_vp) { 1987 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1988 if (nd.ni_dvp == nd.ni_vp) 1989 vrele(nd.ni_dvp); 1990 else 1991 vput(nd.ni_dvp); 1992 vrele(nd.ni_vp); 1993 error = EEXIST; 1994 goto out; 1995 } 1996 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1997 out: 1998 vrele(vp); 1999 return (error); 2000 } 2001 2002 /* 2003 * Make a symbolic link. 2004 */ 2005 /* ARGSUSED */ 2006 int 2007 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2008 { 2009 /* { 2010 syscallarg(const char *) path; 2011 syscallarg(const char *) link; 2012 } */ 2013 struct proc *p = l->l_proc; 2014 struct vattr vattr; 2015 char *path; 2016 int error; 2017 struct nameidata nd; 2018 2019 path = PNBUF_GET(); 2020 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL); 2021 if (error) 2022 goto out; 2023 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2024 SCARG(uap, link)); 2025 if ((error = namei(&nd)) != 0) 2026 goto out; 2027 if (nd.ni_vp) { 2028 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2029 if (nd.ni_dvp == nd.ni_vp) 2030 vrele(nd.ni_dvp); 2031 else 2032 vput(nd.ni_dvp); 2033 vrele(nd.ni_vp); 2034 error = EEXIST; 2035 goto out; 2036 } 2037 VATTR_NULL(&vattr); 2038 vattr.va_type = VLNK; 2039 /* We will read cwdi->cwdi_cmask unlocked. */ 2040 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2041 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2042 if (error == 0) 2043 vput(nd.ni_vp); 2044 out: 2045 PNBUF_PUT(path); 2046 return (error); 2047 } 2048 2049 /* 2050 * Delete a whiteout from the filesystem. 2051 */ 2052 /* ARGSUSED */ 2053 int 2054 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2055 { 2056 /* { 2057 syscallarg(const char *) path; 2058 } */ 2059 int error; 2060 struct nameidata nd; 2061 2062 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2063 UIO_USERSPACE, SCARG(uap, path)); 2064 error = namei(&nd); 2065 if (error) 2066 return (error); 2067 2068 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2069 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2070 if (nd.ni_dvp == nd.ni_vp) 2071 vrele(nd.ni_dvp); 2072 else 2073 vput(nd.ni_dvp); 2074 if (nd.ni_vp) 2075 vrele(nd.ni_vp); 2076 return (EEXIST); 2077 } 2078 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2079 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2080 vput(nd.ni_dvp); 2081 return (error); 2082 } 2083 2084 /* 2085 * Delete a name from the filesystem. 2086 */ 2087 /* ARGSUSED */ 2088 int 2089 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2090 { 2091 /* { 2092 syscallarg(const char *) path; 2093 } */ 2094 2095 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2096 } 2097 2098 int 2099 do_sys_unlink(const char *arg, enum uio_seg seg) 2100 { 2101 struct vnode *vp; 2102 int error; 2103 struct nameidata nd; 2104 char *path; 2105 const char *cpath; 2106 2107 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2108 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2109 2110 if ((error = namei(&nd)) != 0) 2111 goto out; 2112 vp = nd.ni_vp; 2113 2114 /* 2115 * The root of a mounted filesystem cannot be deleted. 2116 */ 2117 if (vp->v_vflag & VV_ROOT) { 2118 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2119 if (nd.ni_dvp == vp) 2120 vrele(nd.ni_dvp); 2121 else 2122 vput(nd.ni_dvp); 2123 vput(vp); 2124 error = EBUSY; 2125 goto out; 2126 } 2127 2128 #if NVERIEXEC > 0 2129 /* Handle remove requests for veriexec entries. */ 2130 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2131 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2132 if (nd.ni_dvp == vp) 2133 vrele(nd.ni_dvp); 2134 else 2135 vput(nd.ni_dvp); 2136 vput(vp); 2137 goto out; 2138 } 2139 #endif /* NVERIEXEC > 0 */ 2140 2141 #ifdef FILEASSOC 2142 (void)fileassoc_file_delete(vp); 2143 #endif /* FILEASSOC */ 2144 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2145 out: 2146 VERIEXEC_PATH_PUT(path); 2147 return (error); 2148 } 2149 2150 /* 2151 * Reposition read/write file offset. 2152 */ 2153 int 2154 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2155 { 2156 /* { 2157 syscallarg(int) fd; 2158 syscallarg(int) pad; 2159 syscallarg(off_t) offset; 2160 syscallarg(int) whence; 2161 } */ 2162 kauth_cred_t cred = l->l_cred; 2163 file_t *fp; 2164 struct vnode *vp; 2165 struct vattr vattr; 2166 off_t newoff; 2167 int error, fd; 2168 2169 fd = SCARG(uap, fd); 2170 2171 if ((fp = fd_getfile(fd)) == NULL) 2172 return (EBADF); 2173 2174 vp = fp->f_data; 2175 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2176 error = ESPIPE; 2177 goto out; 2178 } 2179 2180 switch (SCARG(uap, whence)) { 2181 case SEEK_CUR: 2182 newoff = fp->f_offset + SCARG(uap, offset); 2183 break; 2184 case SEEK_END: 2185 error = VOP_GETATTR(vp, &vattr, cred); 2186 if (error) { 2187 goto out; 2188 } 2189 newoff = SCARG(uap, offset) + vattr.va_size; 2190 break; 2191 case SEEK_SET: 2192 newoff = SCARG(uap, offset); 2193 break; 2194 default: 2195 error = EINVAL; 2196 goto out; 2197 } 2198 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2199 *(off_t *)retval = fp->f_offset = newoff; 2200 } 2201 out: 2202 fd_putfile(fd); 2203 return (error); 2204 } 2205 2206 /* 2207 * Positional read system call. 2208 */ 2209 int 2210 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2211 { 2212 /* { 2213 syscallarg(int) fd; 2214 syscallarg(void *) buf; 2215 syscallarg(size_t) nbyte; 2216 syscallarg(off_t) offset; 2217 } */ 2218 file_t *fp; 2219 struct vnode *vp; 2220 off_t offset; 2221 int error, fd = SCARG(uap, fd); 2222 2223 if ((fp = fd_getfile(fd)) == NULL) 2224 return (EBADF); 2225 2226 if ((fp->f_flag & FREAD) == 0) { 2227 fd_putfile(fd); 2228 return (EBADF); 2229 } 2230 2231 vp = fp->f_data; 2232 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2233 error = ESPIPE; 2234 goto out; 2235 } 2236 2237 offset = SCARG(uap, offset); 2238 2239 /* 2240 * XXX This works because no file systems actually 2241 * XXX take any action on the seek operation. 2242 */ 2243 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2244 goto out; 2245 2246 /* dofileread() will unuse the descriptor for us */ 2247 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2248 &offset, 0, retval)); 2249 2250 out: 2251 fd_putfile(fd); 2252 return (error); 2253 } 2254 2255 /* 2256 * Positional scatter read system call. 2257 */ 2258 int 2259 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2260 { 2261 /* { 2262 syscallarg(int) fd; 2263 syscallarg(const struct iovec *) iovp; 2264 syscallarg(int) iovcnt; 2265 syscallarg(off_t) offset; 2266 } */ 2267 off_t offset = SCARG(uap, offset); 2268 2269 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2270 SCARG(uap, iovcnt), &offset, 0, retval); 2271 } 2272 2273 /* 2274 * Positional write system call. 2275 */ 2276 int 2277 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2278 { 2279 /* { 2280 syscallarg(int) fd; 2281 syscallarg(const void *) buf; 2282 syscallarg(size_t) nbyte; 2283 syscallarg(off_t) offset; 2284 } */ 2285 file_t *fp; 2286 struct vnode *vp; 2287 off_t offset; 2288 int error, fd = SCARG(uap, fd); 2289 2290 if ((fp = fd_getfile(fd)) == NULL) 2291 return (EBADF); 2292 2293 if ((fp->f_flag & FWRITE) == 0) { 2294 fd_putfile(fd); 2295 return (EBADF); 2296 } 2297 2298 vp = fp->f_data; 2299 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2300 error = ESPIPE; 2301 goto out; 2302 } 2303 2304 offset = SCARG(uap, offset); 2305 2306 /* 2307 * XXX This works because no file systems actually 2308 * XXX take any action on the seek operation. 2309 */ 2310 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2311 goto out; 2312 2313 /* dofilewrite() will unuse the descriptor for us */ 2314 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2315 &offset, 0, retval)); 2316 2317 out: 2318 fd_putfile(fd); 2319 return (error); 2320 } 2321 2322 /* 2323 * Positional gather write system call. 2324 */ 2325 int 2326 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2327 { 2328 /* { 2329 syscallarg(int) fd; 2330 syscallarg(const struct iovec *) iovp; 2331 syscallarg(int) iovcnt; 2332 syscallarg(off_t) offset; 2333 } */ 2334 off_t offset = SCARG(uap, offset); 2335 2336 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2337 SCARG(uap, iovcnt), &offset, 0, retval); 2338 } 2339 2340 /* 2341 * Check access permissions. 2342 */ 2343 int 2344 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2345 { 2346 /* { 2347 syscallarg(const char *) path; 2348 syscallarg(int) flags; 2349 } */ 2350 kauth_cred_t cred; 2351 struct vnode *vp; 2352 int error, flags; 2353 struct nameidata nd; 2354 2355 cred = kauth_cred_dup(l->l_cred); 2356 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2357 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2358 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2359 SCARG(uap, path)); 2360 /* Override default credentials */ 2361 nd.ni_cnd.cn_cred = cred; 2362 if ((error = namei(&nd)) != 0) 2363 goto out; 2364 vp = nd.ni_vp; 2365 2366 /* Flags == 0 means only check for existence. */ 2367 if (SCARG(uap, flags)) { 2368 flags = 0; 2369 if (SCARG(uap, flags) & R_OK) 2370 flags |= VREAD; 2371 if (SCARG(uap, flags) & W_OK) 2372 flags |= VWRITE; 2373 if (SCARG(uap, flags) & X_OK) 2374 flags |= VEXEC; 2375 2376 error = VOP_ACCESS(vp, flags, cred); 2377 if (!error && (flags & VWRITE)) 2378 error = vn_writechk(vp); 2379 } 2380 vput(vp); 2381 out: 2382 kauth_cred_free(cred); 2383 return (error); 2384 } 2385 2386 /* 2387 * Common code for all sys_stat functions, including compat versions. 2388 */ 2389 int 2390 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2391 { 2392 int error; 2393 struct nameidata nd; 2394 2395 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2396 UIO_USERSPACE, path); 2397 error = namei(&nd); 2398 if (error != 0) 2399 return error; 2400 error = vn_stat(nd.ni_vp, sb); 2401 vput(nd.ni_vp); 2402 return error; 2403 } 2404 2405 /* 2406 * Get file status; this version follows links. 2407 */ 2408 /* ARGSUSED */ 2409 int 2410 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 2411 { 2412 /* { 2413 syscallarg(const char *) path; 2414 syscallarg(struct stat *) ub; 2415 } */ 2416 struct stat sb; 2417 int error; 2418 2419 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2420 if (error) 2421 return error; 2422 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2423 } 2424 2425 /* 2426 * Get file status; this version does not follow links. 2427 */ 2428 /* ARGSUSED */ 2429 int 2430 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 2431 { 2432 /* { 2433 syscallarg(const char *) path; 2434 syscallarg(struct stat *) ub; 2435 } */ 2436 struct stat sb; 2437 int error; 2438 2439 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2440 if (error) 2441 return error; 2442 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2443 } 2444 2445 /* 2446 * Get configurable pathname variables. 2447 */ 2448 /* ARGSUSED */ 2449 int 2450 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2451 { 2452 /* { 2453 syscallarg(const char *) path; 2454 syscallarg(int) name; 2455 } */ 2456 int error; 2457 struct nameidata nd; 2458 2459 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2460 SCARG(uap, path)); 2461 if ((error = namei(&nd)) != 0) 2462 return (error); 2463 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2464 vput(nd.ni_vp); 2465 return (error); 2466 } 2467 2468 /* 2469 * Return target name of a symbolic link. 2470 */ 2471 /* ARGSUSED */ 2472 int 2473 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2474 { 2475 /* { 2476 syscallarg(const char *) path; 2477 syscallarg(char *) buf; 2478 syscallarg(size_t) count; 2479 } */ 2480 struct vnode *vp; 2481 struct iovec aiov; 2482 struct uio auio; 2483 int error; 2484 struct nameidata nd; 2485 2486 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2487 SCARG(uap, path)); 2488 if ((error = namei(&nd)) != 0) 2489 return (error); 2490 vp = nd.ni_vp; 2491 if (vp->v_type != VLNK) 2492 error = EINVAL; 2493 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2494 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2495 aiov.iov_base = SCARG(uap, buf); 2496 aiov.iov_len = SCARG(uap, count); 2497 auio.uio_iov = &aiov; 2498 auio.uio_iovcnt = 1; 2499 auio.uio_offset = 0; 2500 auio.uio_rw = UIO_READ; 2501 KASSERT(l == curlwp); 2502 auio.uio_vmspace = l->l_proc->p_vmspace; 2503 auio.uio_resid = SCARG(uap, count); 2504 error = VOP_READLINK(vp, &auio, l->l_cred); 2505 } 2506 vput(vp); 2507 *retval = SCARG(uap, count) - auio.uio_resid; 2508 return (error); 2509 } 2510 2511 /* 2512 * Change flags of a file given a path name. 2513 */ 2514 /* ARGSUSED */ 2515 int 2516 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2517 { 2518 /* { 2519 syscallarg(const char *) path; 2520 syscallarg(u_long) flags; 2521 } */ 2522 struct vnode *vp; 2523 int error; 2524 2525 error = namei_simple_user(SCARG(uap, path), 2526 NSM_FOLLOW_TRYEMULROOT, &vp); 2527 if (error != 0) 2528 return (error); 2529 error = change_flags(vp, SCARG(uap, flags), l); 2530 vput(vp); 2531 return (error); 2532 } 2533 2534 /* 2535 * Change flags of a file given a file descriptor. 2536 */ 2537 /* ARGSUSED */ 2538 int 2539 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2540 { 2541 /* { 2542 syscallarg(int) fd; 2543 syscallarg(u_long) flags; 2544 } */ 2545 struct vnode *vp; 2546 file_t *fp; 2547 int error; 2548 2549 /* fd_getvnode() will use the descriptor for us */ 2550 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2551 return (error); 2552 vp = fp->f_data; 2553 error = change_flags(vp, SCARG(uap, flags), l); 2554 VOP_UNLOCK(vp, 0); 2555 fd_putfile(SCARG(uap, fd)); 2556 return (error); 2557 } 2558 2559 /* 2560 * Change flags of a file given a path name; this version does 2561 * not follow links. 2562 */ 2563 int 2564 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2565 { 2566 /* { 2567 syscallarg(const char *) path; 2568 syscallarg(u_long) flags; 2569 } */ 2570 struct vnode *vp; 2571 int error; 2572 2573 error = namei_simple_user(SCARG(uap, path), 2574 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2575 if (error != 0) 2576 return (error); 2577 error = change_flags(vp, SCARG(uap, flags), l); 2578 vput(vp); 2579 return (error); 2580 } 2581 2582 /* 2583 * Common routine to change flags of a file. 2584 */ 2585 int 2586 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2587 { 2588 struct vattr vattr; 2589 int error; 2590 2591 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2592 /* 2593 * Non-superusers cannot change the flags on devices, even if they 2594 * own them. 2595 */ 2596 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2597 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2598 goto out; 2599 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2600 error = EINVAL; 2601 goto out; 2602 } 2603 } 2604 VATTR_NULL(&vattr); 2605 vattr.va_flags = flags; 2606 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2607 out: 2608 return (error); 2609 } 2610 2611 /* 2612 * Change mode of a file given path name; this version follows links. 2613 */ 2614 /* ARGSUSED */ 2615 int 2616 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2617 { 2618 /* { 2619 syscallarg(const char *) path; 2620 syscallarg(int) mode; 2621 } */ 2622 int error; 2623 struct vnode *vp; 2624 2625 error = namei_simple_user(SCARG(uap, path), 2626 NSM_FOLLOW_TRYEMULROOT, &vp); 2627 if (error != 0) 2628 return (error); 2629 2630 error = change_mode(vp, SCARG(uap, mode), l); 2631 2632 vrele(vp); 2633 return (error); 2634 } 2635 2636 /* 2637 * Change mode of a file given a file descriptor. 2638 */ 2639 /* ARGSUSED */ 2640 int 2641 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2642 { 2643 /* { 2644 syscallarg(int) fd; 2645 syscallarg(int) mode; 2646 } */ 2647 file_t *fp; 2648 int error; 2649 2650 /* fd_getvnode() will use the descriptor for us */ 2651 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2652 return (error); 2653 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2654 fd_putfile(SCARG(uap, fd)); 2655 return (error); 2656 } 2657 2658 /* 2659 * Change mode of a file given path name; this version does not follow links. 2660 */ 2661 /* ARGSUSED */ 2662 int 2663 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2664 { 2665 /* { 2666 syscallarg(const char *) path; 2667 syscallarg(int) mode; 2668 } */ 2669 int error; 2670 struct vnode *vp; 2671 2672 error = namei_simple_user(SCARG(uap, path), 2673 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2674 if (error != 0) 2675 return (error); 2676 2677 error = change_mode(vp, SCARG(uap, mode), l); 2678 2679 vrele(vp); 2680 return (error); 2681 } 2682 2683 /* 2684 * Common routine to set mode given a vnode. 2685 */ 2686 static int 2687 change_mode(struct vnode *vp, int mode, struct lwp *l) 2688 { 2689 struct vattr vattr; 2690 int error; 2691 2692 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2693 VATTR_NULL(&vattr); 2694 vattr.va_mode = mode & ALLPERMS; 2695 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2696 VOP_UNLOCK(vp, 0); 2697 return (error); 2698 } 2699 2700 /* 2701 * Set ownership given a path name; this version follows links. 2702 */ 2703 /* ARGSUSED */ 2704 int 2705 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2706 { 2707 /* { 2708 syscallarg(const char *) path; 2709 syscallarg(uid_t) uid; 2710 syscallarg(gid_t) gid; 2711 } */ 2712 int error; 2713 struct vnode *vp; 2714 2715 error = namei_simple_user(SCARG(uap, path), 2716 NSM_FOLLOW_TRYEMULROOT, &vp); 2717 if (error != 0) 2718 return (error); 2719 2720 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2721 2722 vrele(vp); 2723 return (error); 2724 } 2725 2726 /* 2727 * Set ownership given a path name; this version follows links. 2728 * Provides POSIX semantics. 2729 */ 2730 /* ARGSUSED */ 2731 int 2732 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2733 { 2734 /* { 2735 syscallarg(const char *) path; 2736 syscallarg(uid_t) uid; 2737 syscallarg(gid_t) gid; 2738 } */ 2739 int error; 2740 struct vnode *vp; 2741 2742 error = namei_simple_user(SCARG(uap, path), 2743 NSM_FOLLOW_TRYEMULROOT, &vp); 2744 if (error != 0) 2745 return (error); 2746 2747 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2748 2749 vrele(vp); 2750 return (error); 2751 } 2752 2753 /* 2754 * Set ownership given a file descriptor. 2755 */ 2756 /* ARGSUSED */ 2757 int 2758 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2759 { 2760 /* { 2761 syscallarg(int) fd; 2762 syscallarg(uid_t) uid; 2763 syscallarg(gid_t) gid; 2764 } */ 2765 int error; 2766 file_t *fp; 2767 2768 /* fd_getvnode() will use the descriptor for us */ 2769 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2770 return (error); 2771 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2772 l, 0); 2773 fd_putfile(SCARG(uap, fd)); 2774 return (error); 2775 } 2776 2777 /* 2778 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2779 */ 2780 /* ARGSUSED */ 2781 int 2782 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2783 { 2784 /* { 2785 syscallarg(int) fd; 2786 syscallarg(uid_t) uid; 2787 syscallarg(gid_t) gid; 2788 } */ 2789 int error; 2790 file_t *fp; 2791 2792 /* fd_getvnode() will use the descriptor for us */ 2793 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2794 return (error); 2795 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2796 l, 1); 2797 fd_putfile(SCARG(uap, fd)); 2798 return (error); 2799 } 2800 2801 /* 2802 * Set ownership given a path name; this version does not follow links. 2803 */ 2804 /* ARGSUSED */ 2805 int 2806 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2807 { 2808 /* { 2809 syscallarg(const char *) path; 2810 syscallarg(uid_t) uid; 2811 syscallarg(gid_t) gid; 2812 } */ 2813 int error; 2814 struct vnode *vp; 2815 2816 error = namei_simple_user(SCARG(uap, path), 2817 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2818 if (error != 0) 2819 return (error); 2820 2821 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2822 2823 vrele(vp); 2824 return (error); 2825 } 2826 2827 /* 2828 * Set ownership given a path name; this version does not follow links. 2829 * Provides POSIX/XPG semantics. 2830 */ 2831 /* ARGSUSED */ 2832 int 2833 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2834 { 2835 /* { 2836 syscallarg(const char *) path; 2837 syscallarg(uid_t) uid; 2838 syscallarg(gid_t) gid; 2839 } */ 2840 int error; 2841 struct vnode *vp; 2842 2843 error = namei_simple_user(SCARG(uap, path), 2844 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2845 if (error != 0) 2846 return (error); 2847 2848 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2849 2850 vrele(vp); 2851 return (error); 2852 } 2853 2854 /* 2855 * Common routine to set ownership given a vnode. 2856 */ 2857 static int 2858 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2859 int posix_semantics) 2860 { 2861 struct vattr vattr; 2862 mode_t newmode; 2863 int error; 2864 2865 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2866 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2867 goto out; 2868 2869 #define CHANGED(x) ((int)(x) != -1) 2870 newmode = vattr.va_mode; 2871 if (posix_semantics) { 2872 /* 2873 * POSIX/XPG semantics: if the caller is not the super-user, 2874 * clear set-user-id and set-group-id bits. Both POSIX and 2875 * the XPG consider the behaviour for calls by the super-user 2876 * implementation-defined; we leave the set-user-id and set- 2877 * group-id settings intact in that case. 2878 */ 2879 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2880 NULL) != 0) 2881 newmode &= ~(S_ISUID | S_ISGID); 2882 } else { 2883 /* 2884 * NetBSD semantics: when changing owner and/or group, 2885 * clear the respective bit(s). 2886 */ 2887 if (CHANGED(uid)) 2888 newmode &= ~S_ISUID; 2889 if (CHANGED(gid)) 2890 newmode &= ~S_ISGID; 2891 } 2892 /* Update va_mode iff altered. */ 2893 if (vattr.va_mode == newmode) 2894 newmode = VNOVAL; 2895 2896 VATTR_NULL(&vattr); 2897 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2898 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2899 vattr.va_mode = newmode; 2900 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2901 #undef CHANGED 2902 2903 out: 2904 VOP_UNLOCK(vp, 0); 2905 return (error); 2906 } 2907 2908 /* 2909 * Set the access and modification times given a path name; this 2910 * version follows links. 2911 */ 2912 /* ARGSUSED */ 2913 int 2914 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 2915 register_t *retval) 2916 { 2917 /* { 2918 syscallarg(const char *) path; 2919 syscallarg(const struct timeval *) tptr; 2920 } */ 2921 2922 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2923 SCARG(uap, tptr), UIO_USERSPACE); 2924 } 2925 2926 /* 2927 * Set the access and modification times given a file descriptor. 2928 */ 2929 /* ARGSUSED */ 2930 int 2931 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 2932 register_t *retval) 2933 { 2934 /* { 2935 syscallarg(int) fd; 2936 syscallarg(const struct timeval *) tptr; 2937 } */ 2938 int error; 2939 file_t *fp; 2940 2941 /* fd_getvnode() will use the descriptor for us */ 2942 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2943 return (error); 2944 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2945 UIO_USERSPACE); 2946 fd_putfile(SCARG(uap, fd)); 2947 return (error); 2948 } 2949 2950 /* 2951 * Set the access and modification times given a path name; this 2952 * version does not follow links. 2953 */ 2954 int 2955 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 2956 register_t *retval) 2957 { 2958 /* { 2959 syscallarg(const char *) path; 2960 syscallarg(const struct timeval *) tptr; 2961 } */ 2962 2963 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 2964 SCARG(uap, tptr), UIO_USERSPACE); 2965 } 2966 2967 /* 2968 * Common routine to set access and modification times given a vnode. 2969 */ 2970 int 2971 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 2972 const struct timeval *tptr, enum uio_seg seg) 2973 { 2974 struct vattr vattr; 2975 int error, dorele = 0; 2976 namei_simple_flags_t sflags; 2977 2978 bool vanull, setbirthtime; 2979 struct timespec ts[2]; 2980 2981 /* 2982 * I have checked all callers and they pass either FOLLOW, 2983 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 2984 * is 0. More to the point, they don't pass anything else. 2985 * Let's keep it that way at least until the namei interfaces 2986 * are fully sanitized. 2987 */ 2988 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 2989 sflags = (flag == FOLLOW) ? 2990 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 2991 2992 if (tptr == NULL) { 2993 vanull = true; 2994 nanotime(&ts[0]); 2995 ts[1] = ts[0]; 2996 } else { 2997 struct timeval tv[2]; 2998 2999 vanull = false; 3000 if (seg != UIO_SYSSPACE) { 3001 error = copyin(tptr, tv, sizeof (tv)); 3002 if (error != 0) 3003 return error; 3004 tptr = tv; 3005 } 3006 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3007 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3008 } 3009 3010 if (vp == NULL) { 3011 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3012 error = namei_simple_user(path, sflags, &vp); 3013 if (error != 0) 3014 return error; 3015 dorele = 1; 3016 } 3017 3018 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3019 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3020 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3021 VATTR_NULL(&vattr); 3022 vattr.va_atime = ts[0]; 3023 vattr.va_mtime = ts[1]; 3024 if (setbirthtime) 3025 vattr.va_birthtime = ts[1]; 3026 if (vanull) 3027 vattr.va_vaflags |= VA_UTIMES_NULL; 3028 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3029 VOP_UNLOCK(vp, 0); 3030 3031 if (dorele != 0) 3032 vrele(vp); 3033 3034 return error; 3035 } 3036 3037 /* 3038 * Truncate a file given its path name. 3039 */ 3040 /* ARGSUSED */ 3041 int 3042 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3043 { 3044 /* { 3045 syscallarg(const char *) path; 3046 syscallarg(int) pad; 3047 syscallarg(off_t) length; 3048 } */ 3049 struct vnode *vp; 3050 struct vattr vattr; 3051 int error; 3052 3053 error = namei_simple_user(SCARG(uap, path), 3054 NSM_FOLLOW_TRYEMULROOT, &vp); 3055 if (error != 0) 3056 return (error); 3057 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3058 if (vp->v_type == VDIR) 3059 error = EISDIR; 3060 else if ((error = vn_writechk(vp)) == 0 && 3061 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3062 VATTR_NULL(&vattr); 3063 vattr.va_size = SCARG(uap, length); 3064 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3065 } 3066 vput(vp); 3067 return (error); 3068 } 3069 3070 /* 3071 * Truncate a file given a file descriptor. 3072 */ 3073 /* ARGSUSED */ 3074 int 3075 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3076 { 3077 /* { 3078 syscallarg(int) fd; 3079 syscallarg(int) pad; 3080 syscallarg(off_t) length; 3081 } */ 3082 struct vattr vattr; 3083 struct vnode *vp; 3084 file_t *fp; 3085 int error; 3086 3087 /* fd_getvnode() will use the descriptor for us */ 3088 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3089 return (error); 3090 if ((fp->f_flag & FWRITE) == 0) { 3091 error = EINVAL; 3092 goto out; 3093 } 3094 vp = fp->f_data; 3095 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3096 if (vp->v_type == VDIR) 3097 error = EISDIR; 3098 else if ((error = vn_writechk(vp)) == 0) { 3099 VATTR_NULL(&vattr); 3100 vattr.va_size = SCARG(uap, length); 3101 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3102 } 3103 VOP_UNLOCK(vp, 0); 3104 out: 3105 fd_putfile(SCARG(uap, fd)); 3106 return (error); 3107 } 3108 3109 /* 3110 * Sync an open file. 3111 */ 3112 /* ARGSUSED */ 3113 int 3114 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3115 { 3116 /* { 3117 syscallarg(int) fd; 3118 } */ 3119 struct vnode *vp; 3120 file_t *fp; 3121 int error; 3122 3123 /* fd_getvnode() will use the descriptor for us */ 3124 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3125 return (error); 3126 vp = fp->f_data; 3127 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3128 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3129 VOP_UNLOCK(vp, 0); 3130 fd_putfile(SCARG(uap, fd)); 3131 return (error); 3132 } 3133 3134 /* 3135 * Sync a range of file data. API modeled after that found in AIX. 3136 * 3137 * FDATASYNC indicates that we need only save enough metadata to be able 3138 * to re-read the written data. Note we duplicate AIX's requirement that 3139 * the file be open for writing. 3140 */ 3141 /* ARGSUSED */ 3142 int 3143 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3144 { 3145 /* { 3146 syscallarg(int) fd; 3147 syscallarg(int) flags; 3148 syscallarg(off_t) start; 3149 syscallarg(off_t) length; 3150 } */ 3151 struct vnode *vp; 3152 file_t *fp; 3153 int flags, nflags; 3154 off_t s, e, len; 3155 int error; 3156 3157 /* fd_getvnode() will use the descriptor for us */ 3158 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3159 return (error); 3160 3161 if ((fp->f_flag & FWRITE) == 0) { 3162 error = EBADF; 3163 goto out; 3164 } 3165 3166 flags = SCARG(uap, flags); 3167 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3168 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3169 error = EINVAL; 3170 goto out; 3171 } 3172 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3173 if (flags & FDATASYNC) 3174 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3175 else 3176 nflags = FSYNC_WAIT; 3177 if (flags & FDISKSYNC) 3178 nflags |= FSYNC_CACHE; 3179 3180 len = SCARG(uap, length); 3181 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3182 if (len) { 3183 s = SCARG(uap, start); 3184 e = s + len; 3185 if (e < s) { 3186 error = EINVAL; 3187 goto out; 3188 } 3189 } else { 3190 e = 0; 3191 s = 0; 3192 } 3193 3194 vp = fp->f_data; 3195 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3196 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3197 VOP_UNLOCK(vp, 0); 3198 out: 3199 fd_putfile(SCARG(uap, fd)); 3200 return (error); 3201 } 3202 3203 /* 3204 * Sync the data of an open file. 3205 */ 3206 /* ARGSUSED */ 3207 int 3208 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3209 { 3210 /* { 3211 syscallarg(int) fd; 3212 } */ 3213 struct vnode *vp; 3214 file_t *fp; 3215 int error; 3216 3217 /* fd_getvnode() will use the descriptor for us */ 3218 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3219 return (error); 3220 if ((fp->f_flag & FWRITE) == 0) { 3221 fd_putfile(SCARG(uap, fd)); 3222 return (EBADF); 3223 } 3224 vp = fp->f_data; 3225 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3226 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3227 VOP_UNLOCK(vp, 0); 3228 fd_putfile(SCARG(uap, fd)); 3229 return (error); 3230 } 3231 3232 /* 3233 * Rename files, (standard) BSD semantics frontend. 3234 */ 3235 /* ARGSUSED */ 3236 int 3237 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3238 { 3239 /* { 3240 syscallarg(const char *) from; 3241 syscallarg(const char *) to; 3242 } */ 3243 3244 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3245 } 3246 3247 /* 3248 * Rename files, POSIX semantics frontend. 3249 */ 3250 /* ARGSUSED */ 3251 int 3252 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3253 { 3254 /* { 3255 syscallarg(const char *) from; 3256 syscallarg(const char *) to; 3257 } */ 3258 3259 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3260 } 3261 3262 /* 3263 * Rename files. Source and destination must either both be directories, 3264 * or both not be directories. If target is a directory, it must be empty. 3265 * If `from' and `to' refer to the same object, the value of the `retain' 3266 * argument is used to determine whether `from' will be 3267 * 3268 * (retain == 0) deleted unless `from' and `to' refer to the same 3269 * object in the file system's name space (BSD). 3270 * (retain == 1) always retained (POSIX). 3271 */ 3272 int 3273 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3274 { 3275 struct vnode *tvp, *fvp, *tdvp; 3276 struct nameidata fromnd, tond; 3277 struct mount *fs; 3278 struct lwp *l = curlwp; 3279 struct proc *p; 3280 uint32_t saveflag; 3281 int error; 3282 3283 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, 3284 seg, from); 3285 if ((error = namei(&fromnd)) != 0) 3286 return (error); 3287 if (fromnd.ni_dvp != fromnd.ni_vp) 3288 VOP_UNLOCK(fromnd.ni_dvp, 0); 3289 fvp = fromnd.ni_vp; 3290 3291 fs = fvp->v_mount; 3292 error = VFS_RENAMELOCK_ENTER(fs); 3293 if (error) { 3294 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3295 vrele(fromnd.ni_dvp); 3296 vrele(fvp); 3297 goto out1; 3298 } 3299 3300 /* 3301 * close, partially, yet another race - ideally we should only 3302 * go as far as getting fromnd.ni_dvp before getting the per-fs 3303 * lock, and then continue to get fromnd.ni_vp, but we can't do 3304 * that with namei as it stands. 3305 * 3306 * This still won't prevent rmdir from nuking fromnd.ni_vp 3307 * under us. The real fix is to get the locks in the right 3308 * order and do the lookups in the right places, but that's a 3309 * major rototill. 3310 * 3311 * Preserve the SAVESTART in cn_flags, because who knows what 3312 * might happen if we don't. 3313 * 3314 * Note: this logic (as well as this whole function) is cloned 3315 * in nfs_serv.c. Proceed accordingly. 3316 */ 3317 vrele(fvp); 3318 if ((fromnd.ni_cnd.cn_namelen == 1 && 3319 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3320 (fromnd.ni_cnd.cn_namelen == 2 && 3321 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3322 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3323 error = EINVAL; 3324 VFS_RENAMELOCK_EXIT(fs); 3325 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3326 vrele(fromnd.ni_dvp); 3327 goto out1; 3328 } 3329 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3330 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3331 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3332 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3333 fromnd.ni_cnd.cn_flags |= saveflag; 3334 if (error) { 3335 VOP_UNLOCK(fromnd.ni_dvp, 0); 3336 VFS_RENAMELOCK_EXIT(fs); 3337 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3338 vrele(fromnd.ni_dvp); 3339 goto out1; 3340 } 3341 VOP_UNLOCK(fromnd.ni_vp, 0); 3342 if (fromnd.ni_dvp != fromnd.ni_vp) 3343 VOP_UNLOCK(fromnd.ni_dvp, 0); 3344 fvp = fromnd.ni_vp; 3345 3346 NDINIT(&tond, RENAME, 3347 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3348 | (fvp->v_type == VDIR ? CREATEDIR : 0), 3349 seg, to); 3350 if ((error = namei(&tond)) != 0) { 3351 VFS_RENAMELOCK_EXIT(fs); 3352 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3353 vrele(fromnd.ni_dvp); 3354 vrele(fvp); 3355 goto out1; 3356 } 3357 tdvp = tond.ni_dvp; 3358 tvp = tond.ni_vp; 3359 3360 if (tvp != NULL) { 3361 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3362 error = ENOTDIR; 3363 goto out; 3364 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3365 error = EISDIR; 3366 goto out; 3367 } 3368 } 3369 3370 if (fvp == tdvp) 3371 error = EINVAL; 3372 3373 /* 3374 * Source and destination refer to the same object. 3375 */ 3376 if (fvp == tvp) { 3377 if (retain) 3378 error = -1; 3379 else if (fromnd.ni_dvp == tdvp && 3380 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3381 !memcmp(fromnd.ni_cnd.cn_nameptr, 3382 tond.ni_cnd.cn_nameptr, 3383 fromnd.ni_cnd.cn_namelen)) 3384 error = -1; 3385 } 3386 3387 #if NVERIEXEC > 0 3388 if (!error) { 3389 char *f1, *f2; 3390 size_t f1_len; 3391 size_t f2_len; 3392 3393 f1_len = fromnd.ni_cnd.cn_namelen + 1; 3394 f1 = kmem_alloc(f1_len, KM_SLEEP); 3395 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len); 3396 3397 f2_len = tond.ni_cnd.cn_namelen + 1; 3398 f2 = kmem_alloc(f2_len, KM_SLEEP); 3399 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len); 3400 3401 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3402 3403 kmem_free(f1, f1_len); 3404 kmem_free(f2, f2_len); 3405 } 3406 #endif /* NVERIEXEC > 0 */ 3407 3408 out: 3409 p = l->l_proc; 3410 if (!error) { 3411 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3412 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3413 VFS_RENAMELOCK_EXIT(fs); 3414 } else { 3415 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3416 if (tdvp == tvp) 3417 vrele(tdvp); 3418 else 3419 vput(tdvp); 3420 if (tvp) 3421 vput(tvp); 3422 VFS_RENAMELOCK_EXIT(fs); 3423 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3424 vrele(fromnd.ni_dvp); 3425 vrele(fvp); 3426 } 3427 vrele(tond.ni_startdir); 3428 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3429 out1: 3430 if (fromnd.ni_startdir) 3431 vrele(fromnd.ni_startdir); 3432 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3433 return (error == -1 ? 0 : error); 3434 } 3435 3436 /* 3437 * Make a directory file. 3438 */ 3439 /* ARGSUSED */ 3440 int 3441 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3442 { 3443 /* { 3444 syscallarg(const char *) path; 3445 syscallarg(int) mode; 3446 } */ 3447 3448 return do_sys_mkdir(SCARG(uap, path), SCARG(uap, mode)); 3449 } 3450 3451 int 3452 do_sys_mkdir(const char *path, mode_t mode) 3453 { 3454 struct proc *p = curlwp->l_proc; 3455 struct vnode *vp; 3456 struct vattr vattr; 3457 int error; 3458 struct nameidata nd; 3459 3460 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, 3461 UIO_USERSPACE, path); 3462 if ((error = namei(&nd)) != 0) 3463 return (error); 3464 vp = nd.ni_vp; 3465 if (vp != NULL) { 3466 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3467 if (nd.ni_dvp == vp) 3468 vrele(nd.ni_dvp); 3469 else 3470 vput(nd.ni_dvp); 3471 vrele(vp); 3472 return (EEXIST); 3473 } 3474 VATTR_NULL(&vattr); 3475 vattr.va_type = VDIR; 3476 /* We will read cwdi->cwdi_cmask unlocked. */ 3477 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3478 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3479 if (!error) 3480 vput(nd.ni_vp); 3481 return (error); 3482 } 3483 3484 /* 3485 * Remove a directory file. 3486 */ 3487 /* ARGSUSED */ 3488 int 3489 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3490 { 3491 /* { 3492 syscallarg(const char *) path; 3493 } */ 3494 struct vnode *vp; 3495 int error; 3496 struct nameidata nd; 3497 3498 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3499 SCARG(uap, path)); 3500 if ((error = namei(&nd)) != 0) 3501 return (error); 3502 vp = nd.ni_vp; 3503 if (vp->v_type != VDIR) { 3504 error = ENOTDIR; 3505 goto out; 3506 } 3507 /* 3508 * No rmdir "." please. 3509 */ 3510 if (nd.ni_dvp == vp) { 3511 error = EINVAL; 3512 goto out; 3513 } 3514 /* 3515 * The root of a mounted filesystem cannot be deleted. 3516 */ 3517 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3518 error = EBUSY; 3519 goto out; 3520 } 3521 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3522 return (error); 3523 3524 out: 3525 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3526 if (nd.ni_dvp == vp) 3527 vrele(nd.ni_dvp); 3528 else 3529 vput(nd.ni_dvp); 3530 vput(vp); 3531 return (error); 3532 } 3533 3534 /* 3535 * Read a block of directory entries in a file system independent format. 3536 */ 3537 int 3538 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3539 { 3540 /* { 3541 syscallarg(int) fd; 3542 syscallarg(char *) buf; 3543 syscallarg(size_t) count; 3544 } */ 3545 file_t *fp; 3546 int error, done; 3547 3548 /* fd_getvnode() will use the descriptor for us */ 3549 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3550 return (error); 3551 if ((fp->f_flag & FREAD) == 0) { 3552 error = EBADF; 3553 goto out; 3554 } 3555 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3556 SCARG(uap, count), &done, l, 0, 0); 3557 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3558 *retval = done; 3559 out: 3560 fd_putfile(SCARG(uap, fd)); 3561 return (error); 3562 } 3563 3564 /* 3565 * Set the mode mask for creation of filesystem nodes. 3566 */ 3567 int 3568 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3569 { 3570 /* { 3571 syscallarg(mode_t) newmask; 3572 } */ 3573 struct proc *p = l->l_proc; 3574 struct cwdinfo *cwdi; 3575 3576 /* 3577 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3578 * important is that we serialize changes to the mask. The 3579 * rw_exit() will issue a write memory barrier on our behalf, 3580 * and force the changes out to other CPUs (as it must use an 3581 * atomic operation, draining the local CPU's store buffers). 3582 */ 3583 cwdi = p->p_cwdi; 3584 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3585 *retval = cwdi->cwdi_cmask; 3586 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3587 rw_exit(&cwdi->cwdi_lock); 3588 3589 return (0); 3590 } 3591 3592 int 3593 dorevoke(struct vnode *vp, kauth_cred_t cred) 3594 { 3595 struct vattr vattr; 3596 int error; 3597 3598 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3599 return error; 3600 if (kauth_cred_geteuid(cred) == vattr.va_uid || 3601 (error = kauth_authorize_generic(cred, 3602 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3603 VOP_REVOKE(vp, REVOKEALL); 3604 return (error); 3605 } 3606 3607 /* 3608 * Void all references to file by ripping underlying filesystem 3609 * away from vnode. 3610 */ 3611 /* ARGSUSED */ 3612 int 3613 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3614 { 3615 /* { 3616 syscallarg(const char *) path; 3617 } */ 3618 struct vnode *vp; 3619 int error; 3620 3621 error = namei_simple_user(SCARG(uap, path), 3622 NSM_FOLLOW_TRYEMULROOT, &vp); 3623 if (error != 0) 3624 return (error); 3625 error = dorevoke(vp, l->l_cred); 3626 vrele(vp); 3627 return (error); 3628 } 3629