1 /* $NetBSD: vfs_syscalls.c,v 1.407 2010/06/30 15:44:54 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.407 2010/06/30 15:44:54 pooka Exp $"); 70 71 #ifdef _KERNEL_OPT 72 #include "opt_fileassoc.h" 73 #include "veriexec.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/filedesc.h> 80 #include <sys/kernel.h> 81 #include <sys/file.h> 82 #include <sys/stat.h> 83 #include <sys/vnode.h> 84 #include <sys/mount.h> 85 #include <sys/proc.h> 86 #include <sys/uio.h> 87 #include <sys/kmem.h> 88 #include <sys/dirent.h> 89 #include <sys/sysctl.h> 90 #include <sys/syscallargs.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/ktrace.h> 93 #ifdef FILEASSOC 94 #include <sys/fileassoc.h> 95 #endif /* FILEASSOC */ 96 #include <sys/verified_exec.h> 97 #include <sys/kauth.h> 98 #include <sys/atomic.h> 99 #include <sys/module.h> 100 #include <sys/buf.h> 101 102 #include <miscfs/genfs/genfs.h> 103 #include <miscfs/syncfs/syncfs.h> 104 #include <miscfs/specfs/specdev.h> 105 106 #include <nfs/rpcv2.h> 107 #include <nfs/nfsproto.h> 108 #include <nfs/nfs.h> 109 #include <nfs/nfs_var.h> 110 111 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 112 113 static int change_flags(struct vnode *, u_long, struct lwp *); 114 static int change_mode(struct vnode *, int, struct lwp *l); 115 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 116 117 void checkdirs(struct vnode *); 118 119 /* 120 * Virtual File System System Calls 121 */ 122 123 /* 124 * Mount a file system. 125 */ 126 127 /* 128 * This table is used to maintain compatibility with 4.3BSD 129 * and NetBSD 0.9 mount syscalls - and possibly other systems. 130 * Note, the order is important! 131 * 132 * Do not modify this table. It should only contain filesystems 133 * supported by NetBSD 0.9 and 4.3BSD. 134 */ 135 const char * const mountcompatnames[] = { 136 NULL, /* 0 = MOUNT_NONE */ 137 MOUNT_FFS, /* 1 = MOUNT_UFS */ 138 MOUNT_NFS, /* 2 */ 139 MOUNT_MFS, /* 3 */ 140 MOUNT_MSDOS, /* 4 */ 141 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 142 MOUNT_FDESC, /* 6 */ 143 MOUNT_KERNFS, /* 7 */ 144 NULL, /* 8 = MOUNT_DEVFS */ 145 MOUNT_AFS, /* 9 */ 146 }; 147 const int nmountcompatnames = sizeof(mountcompatnames) / 148 sizeof(mountcompatnames[0]); 149 150 static int 151 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 152 void *data, size_t *data_len) 153 { 154 struct mount *mp; 155 int error = 0, saved_flags; 156 157 mp = vp->v_mount; 158 saved_flags = mp->mnt_flag; 159 160 /* We can operate only on VV_ROOT nodes. */ 161 if ((vp->v_vflag & VV_ROOT) == 0) { 162 error = EINVAL; 163 goto out; 164 } 165 166 /* 167 * We only allow the filesystem to be reloaded if it 168 * is currently mounted read-only. Additionally, we 169 * prevent read-write to read-only downgrades. 170 */ 171 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 172 (mp->mnt_flag & MNT_RDONLY) == 0) { 173 error = EOPNOTSUPP; /* Needs translation */ 174 goto out; 175 } 176 177 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 178 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 179 if (error) 180 goto out; 181 182 if (vfs_busy(mp, NULL)) { 183 error = EPERM; 184 goto out; 185 } 186 187 mutex_enter(&mp->mnt_updating); 188 189 mp->mnt_flag &= ~MNT_OP_FLAGS; 190 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 191 192 /* 193 * Set the mount level flags. 194 */ 195 if (flags & MNT_RDONLY) 196 mp->mnt_flag |= MNT_RDONLY; 197 else if (mp->mnt_flag & MNT_RDONLY) 198 mp->mnt_iflag |= IMNT_WANTRDWR; 199 mp->mnt_flag &= 200 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 201 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 202 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 203 MNT_LOG); 204 mp->mnt_flag |= flags & 205 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 206 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 207 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 208 MNT_LOG | MNT_IGNORE); 209 210 error = VFS_MOUNT(mp, path, data, data_len); 211 212 if (error && data != NULL) { 213 int error2; 214 215 /* 216 * Update failed; let's try and see if it was an 217 * export request. For compat with 3.0 and earlier. 218 */ 219 error2 = vfs_hooks_reexport(mp, path, data); 220 221 /* 222 * Only update error code if the export request was 223 * understood but some problem occurred while 224 * processing it. 225 */ 226 if (error2 != EJUSTRETURN) 227 error = error2; 228 } 229 230 if (mp->mnt_iflag & IMNT_WANTRDWR) 231 mp->mnt_flag &= ~MNT_RDONLY; 232 if (error) 233 mp->mnt_flag = saved_flags; 234 mp->mnt_flag &= ~MNT_OP_FLAGS; 235 mp->mnt_iflag &= ~IMNT_WANTRDWR; 236 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 237 if (mp->mnt_syncer == NULL) 238 error = vfs_allocate_syncvnode(mp); 239 } else { 240 if (mp->mnt_syncer != NULL) 241 vfs_deallocate_syncvnode(mp); 242 } 243 mutex_exit(&mp->mnt_updating); 244 vfs_unbusy(mp, false, NULL); 245 246 out: 247 return (error); 248 } 249 250 static int 251 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 252 { 253 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 254 int error; 255 256 /* Copy file-system type from userspace. */ 257 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 258 if (error) { 259 /* 260 * Historically, filesystem types were identified by numbers. 261 * If we get an integer for the filesystem type instead of a 262 * string, we check to see if it matches one of the historic 263 * filesystem types. 264 */ 265 u_long fsindex = (u_long)fstype; 266 if (fsindex >= nmountcompatnames || 267 mountcompatnames[fsindex] == NULL) 268 return ENODEV; 269 strlcpy(fstypename, mountcompatnames[fsindex], 270 sizeof(fstypename)); 271 } 272 273 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 274 if (strcmp(fstypename, "ufs") == 0) 275 fstypename[0] = 'f'; 276 277 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 278 return 0; 279 280 /* If we can autoload a vfs module, try again */ 281 mutex_enter(&module_lock); 282 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 283 mutex_exit(&module_lock); 284 285 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 286 return 0; 287 288 return ENODEV; 289 } 290 291 static int 292 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 293 const char *path, int flags, void *data, size_t *data_len) 294 { 295 struct mount *mp; 296 struct vnode *vp = *vpp; 297 struct vattr va; 298 struct nameidata nd; 299 int error; 300 301 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 302 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 303 if (error) { 304 vfs_delref(vfsops); 305 return error; 306 } 307 308 /* Can't make a non-dir a mount-point (from here anyway). */ 309 if (vp->v_type != VDIR) { 310 vfs_delref(vfsops); 311 return ENOTDIR; 312 } 313 314 /* 315 * If the user is not root, ensure that they own the directory 316 * onto which we are attempting to mount. 317 */ 318 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 319 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 320 (error = kauth_authorize_generic(l->l_cred, 321 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 322 vfs_delref(vfsops); 323 return error; 324 } 325 326 if (flags & MNT_EXPORTED) { 327 vfs_delref(vfsops); 328 return EINVAL; 329 } 330 331 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 332 vfs_delref(vfsops); 333 return ENOMEM; 334 } 335 336 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 337 338 /* 339 * The underlying file system may refuse the mount for 340 * various reasons. Allow the user to force it to happen. 341 * 342 * Set the mount level flags. 343 */ 344 mp->mnt_flag = flags & 345 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 346 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 347 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 348 MNT_LOG | MNT_IGNORE | MNT_RDONLY); 349 350 mutex_enter(&mp->mnt_updating); 351 error = VFS_MOUNT(mp, path, data, data_len); 352 mp->mnt_flag &= ~MNT_OP_FLAGS; 353 354 if (error != 0) 355 goto err_unmounted; 356 357 /* 358 * Validate and prepare the mount point. 359 */ 360 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, 361 UIO_USERSPACE, path); 362 error = namei(&nd); 363 if (error != 0) { 364 goto err_mounted; 365 } 366 if (nd.ni_vp != vp) { 367 vput(nd.ni_vp); 368 error = EINVAL; 369 goto err_mounted; 370 } 371 if (vp->v_mountedhere != NULL) { 372 vput(nd.ni_vp); 373 error = EBUSY; 374 goto err_mounted; 375 } 376 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 377 if (error != 0) { 378 vput(nd.ni_vp); 379 goto err_mounted; 380 } 381 382 /* 383 * Put the new filesystem on the mount list after root. 384 */ 385 cache_purge(vp); 386 mp->mnt_iflag &= ~IMNT_WANTRDWR; 387 388 mutex_enter(&mountlist_lock); 389 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 390 mutex_exit(&mountlist_lock); 391 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 392 error = vfs_allocate_syncvnode(mp); 393 if (error == 0) 394 vp->v_mountedhere = mp; 395 vput(nd.ni_vp); 396 if (error != 0) 397 goto err_onmountlist; 398 399 checkdirs(vp); 400 mutex_exit(&mp->mnt_updating); 401 402 /* Hold an additional reference to the mount across VFS_START(). */ 403 vfs_unbusy(mp, true, NULL); 404 (void) VFS_STATVFS(mp, &mp->mnt_stat); 405 error = VFS_START(mp, 0); 406 if (error) 407 vrele(vp); 408 /* Drop reference held for VFS_START(). */ 409 vfs_destroy(mp); 410 *vpp = NULL; 411 return error; 412 413 err_onmountlist: 414 mutex_enter(&mountlist_lock); 415 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 416 mp->mnt_iflag |= IMNT_GONE; 417 mutex_exit(&mountlist_lock); 418 419 err_mounted: 420 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 421 panic("Unmounting fresh file system failed"); 422 423 err_unmounted: 424 vp->v_mountedhere = NULL; 425 mutex_exit(&mp->mnt_updating); 426 vfs_unbusy(mp, false, NULL); 427 vfs_destroy(mp); 428 429 return error; 430 } 431 432 static int 433 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 434 void *data, size_t *data_len) 435 { 436 struct mount *mp; 437 int error; 438 439 /* If MNT_GETARGS is specified, it should be the only flag. */ 440 if (flags & ~MNT_GETARGS) 441 return EINVAL; 442 443 mp = vp->v_mount; 444 445 /* XXX: probably some notion of "can see" here if we want isolation. */ 446 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 447 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 448 if (error) 449 return error; 450 451 if ((vp->v_vflag & VV_ROOT) == 0) 452 return EINVAL; 453 454 if (vfs_busy(mp, NULL)) 455 return EPERM; 456 457 mutex_enter(&mp->mnt_updating); 458 mp->mnt_flag &= ~MNT_OP_FLAGS; 459 mp->mnt_flag |= MNT_GETARGS; 460 error = VFS_MOUNT(mp, path, data, data_len); 461 mp->mnt_flag &= ~MNT_OP_FLAGS; 462 mutex_exit(&mp->mnt_updating); 463 464 vfs_unbusy(mp, false, NULL); 465 return (error); 466 } 467 468 int 469 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 470 { 471 /* { 472 syscallarg(const char *) type; 473 syscallarg(const char *) path; 474 syscallarg(int) flags; 475 syscallarg(void *) data; 476 syscallarg(size_t) data_len; 477 } */ 478 479 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 480 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 481 SCARG(uap, data_len), retval); 482 } 483 484 int 485 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 486 const char *path, int flags, void *data, enum uio_seg data_seg, 487 size_t data_len, register_t *retval) 488 { 489 struct vnode *vp; 490 void *data_buf = data; 491 bool vfsopsrele = false; 492 int error; 493 494 /* XXX: The calling convention of this routine is totally bizarre */ 495 if (vfsops) 496 vfsopsrele = true; 497 498 /* 499 * Get vnode to be covered 500 */ 501 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 502 if (error != 0) { 503 vp = NULL; 504 goto done; 505 } 506 507 if (vfsops == NULL) { 508 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 509 vfsops = vp->v_mount->mnt_op; 510 } else { 511 /* 'type' is userspace */ 512 error = mount_get_vfsops(type, &vfsops); 513 if (error != 0) 514 goto done; 515 vfsopsrele = true; 516 } 517 } 518 519 if (data != NULL && data_seg == UIO_USERSPACE) { 520 if (data_len == 0) { 521 /* No length supplied, use default for filesystem */ 522 data_len = vfsops->vfs_min_mount_data; 523 if (data_len > VFS_MAX_MOUNT_DATA) { 524 error = EINVAL; 525 goto done; 526 } 527 /* 528 * Hopefully a longer buffer won't make copyin() fail. 529 * For compatibility with 3.0 and earlier. 530 */ 531 if (flags & MNT_UPDATE 532 && data_len < sizeof (struct mnt_export_args30)) 533 data_len = sizeof (struct mnt_export_args30); 534 } 535 data_buf = kmem_alloc(data_len, KM_SLEEP); 536 537 /* NFS needs the buffer even for mnt_getargs .... */ 538 error = copyin(data, data_buf, data_len); 539 if (error != 0) 540 goto done; 541 } 542 543 if (flags & MNT_GETARGS) { 544 if (data_len == 0) { 545 error = EINVAL; 546 goto done; 547 } 548 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 549 if (error != 0) 550 goto done; 551 if (data_seg == UIO_USERSPACE) 552 error = copyout(data_buf, data, data_len); 553 *retval = data_len; 554 } else if (flags & MNT_UPDATE) { 555 error = mount_update(l, vp, path, flags, data_buf, &data_len); 556 } else { 557 /* Locking is handled internally in mount_domount(). */ 558 KASSERT(vfsopsrele == true); 559 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 560 &data_len); 561 vfsopsrele = false; 562 } 563 564 done: 565 if (vfsopsrele) 566 vfs_delref(vfsops); 567 if (vp != NULL) { 568 vrele(vp); 569 } 570 if (data_buf != data) 571 kmem_free(data_buf, data_len); 572 return (error); 573 } 574 575 /* 576 * Scan all active processes to see if any of them have a current 577 * or root directory onto which the new filesystem has just been 578 * mounted. If so, replace them with the new mount point. 579 */ 580 void 581 checkdirs(struct vnode *olddp) 582 { 583 struct cwdinfo *cwdi; 584 struct vnode *newdp, *rele1, *rele2; 585 struct proc *p; 586 bool retry; 587 588 if (olddp->v_usecount == 1) 589 return; 590 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 591 panic("mount: lost mount"); 592 593 do { 594 retry = false; 595 mutex_enter(proc_lock); 596 PROCLIST_FOREACH(p, &allproc) { 597 if ((cwdi = p->p_cwdi) == NULL) 598 continue; 599 /* 600 * Can't change to the old directory any more, 601 * so even if we see a stale value it's not a 602 * problem. 603 */ 604 if (cwdi->cwdi_cdir != olddp && 605 cwdi->cwdi_rdir != olddp) 606 continue; 607 retry = true; 608 rele1 = NULL; 609 rele2 = NULL; 610 atomic_inc_uint(&cwdi->cwdi_refcnt); 611 mutex_exit(proc_lock); 612 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 613 if (cwdi->cwdi_cdir == olddp) { 614 rele1 = cwdi->cwdi_cdir; 615 vref(newdp); 616 cwdi->cwdi_cdir = newdp; 617 } 618 if (cwdi->cwdi_rdir == olddp) { 619 rele2 = cwdi->cwdi_rdir; 620 vref(newdp); 621 cwdi->cwdi_rdir = newdp; 622 } 623 rw_exit(&cwdi->cwdi_lock); 624 cwdfree(cwdi); 625 if (rele1 != NULL) 626 vrele(rele1); 627 if (rele2 != NULL) 628 vrele(rele2); 629 mutex_enter(proc_lock); 630 break; 631 } 632 mutex_exit(proc_lock); 633 } while (retry); 634 635 if (rootvnode == olddp) { 636 vrele(rootvnode); 637 vref(newdp); 638 rootvnode = newdp; 639 } 640 vput(newdp); 641 } 642 643 /* 644 * Unmount a file system. 645 * 646 * Note: unmount takes a path to the vnode mounted on as argument, 647 * not special file (as before). 648 */ 649 /* ARGSUSED */ 650 int 651 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 652 { 653 /* { 654 syscallarg(const char *) path; 655 syscallarg(int) flags; 656 } */ 657 struct vnode *vp; 658 struct mount *mp; 659 int error; 660 struct nameidata nd; 661 662 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 663 SCARG(uap, path)); 664 if ((error = namei(&nd)) != 0) 665 return (error); 666 vp = nd.ni_vp; 667 mp = vp->v_mount; 668 atomic_inc_uint(&mp->mnt_refcnt); 669 VOP_UNLOCK(vp); 670 671 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 672 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 673 if (error) { 674 vrele(vp); 675 vfs_destroy(mp); 676 return (error); 677 } 678 679 /* 680 * Don't allow unmounting the root file system. 681 */ 682 if (mp->mnt_flag & MNT_ROOTFS) { 683 vrele(vp); 684 vfs_destroy(mp); 685 return (EINVAL); 686 } 687 688 /* 689 * Must be the root of the filesystem 690 */ 691 if ((vp->v_vflag & VV_ROOT) == 0) { 692 vrele(vp); 693 vfs_destroy(mp); 694 return (EINVAL); 695 } 696 697 vrele(vp); 698 error = dounmount(mp, SCARG(uap, flags), l); 699 vfs_destroy(mp); 700 return error; 701 } 702 703 /* 704 * Do the actual file system unmount. File system is assumed to have 705 * been locked by the caller. 706 * 707 * => Caller hold reference to the mount, explicitly for dounmount(). 708 */ 709 int 710 dounmount(struct mount *mp, int flags, struct lwp *l) 711 { 712 struct vnode *coveredvp; 713 int error; 714 int async; 715 int used_syncer; 716 717 #if NVERIEXEC > 0 718 error = veriexec_unmountchk(mp); 719 if (error) 720 return (error); 721 #endif /* NVERIEXEC > 0 */ 722 723 /* 724 * XXX Freeze syncer. Must do this before locking the 725 * mount point. See dounmount() for details. 726 */ 727 mutex_enter(&syncer_mutex); 728 rw_enter(&mp->mnt_unmounting, RW_WRITER); 729 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 730 rw_exit(&mp->mnt_unmounting); 731 mutex_exit(&syncer_mutex); 732 return ENOENT; 733 } 734 735 used_syncer = (mp->mnt_syncer != NULL); 736 737 /* 738 * XXX Syncer must be frozen when we get here. This should really 739 * be done on a per-mountpoint basis, but the syncer doesn't work 740 * like that. 741 * 742 * The caller of dounmount() must acquire syncer_mutex because 743 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 744 * order, and we must preserve that order to avoid deadlock. 745 * 746 * So, if the file system did not use the syncer, now is 747 * the time to release the syncer_mutex. 748 */ 749 if (used_syncer == 0) 750 mutex_exit(&syncer_mutex); 751 752 mp->mnt_iflag |= IMNT_UNMOUNT; 753 async = mp->mnt_flag & MNT_ASYNC; 754 mp->mnt_flag &= ~MNT_ASYNC; 755 cache_purgevfs(mp); /* remove cache entries for this file sys */ 756 if (mp->mnt_syncer != NULL) 757 vfs_deallocate_syncvnode(mp); 758 error = 0; 759 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 760 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 761 } 762 vfs_scrubvnlist(mp); 763 if (error == 0 || (flags & MNT_FORCE)) 764 error = VFS_UNMOUNT(mp, flags); 765 if (error) { 766 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 767 (void) vfs_allocate_syncvnode(mp); 768 mp->mnt_iflag &= ~IMNT_UNMOUNT; 769 mp->mnt_flag |= async; 770 rw_exit(&mp->mnt_unmounting); 771 if (used_syncer) 772 mutex_exit(&syncer_mutex); 773 return (error); 774 } 775 vfs_scrubvnlist(mp); 776 mutex_enter(&mountlist_lock); 777 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 778 coveredvp->v_mountedhere = NULL; 779 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 780 mp->mnt_iflag |= IMNT_GONE; 781 mutex_exit(&mountlist_lock); 782 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 783 panic("unmount: dangling vnode"); 784 if (used_syncer) 785 mutex_exit(&syncer_mutex); 786 vfs_hooks_unmount(mp); 787 rw_exit(&mp->mnt_unmounting); 788 vfs_destroy(mp); /* reference from mount() */ 789 if (coveredvp != NULLVP) 790 vrele(coveredvp); 791 return (0); 792 } 793 794 /* 795 * Sync each mounted filesystem. 796 */ 797 #ifdef DEBUG 798 int syncprt = 0; 799 struct ctldebug debug0 = { "syncprt", &syncprt }; 800 #endif 801 802 /* ARGSUSED */ 803 int 804 sys_sync(struct lwp *l, const void *v, register_t *retval) 805 { 806 struct mount *mp, *nmp; 807 int asyncflag; 808 809 if (l == NULL) 810 l = &lwp0; 811 812 mutex_enter(&mountlist_lock); 813 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 814 mp = nmp) { 815 if (vfs_busy(mp, &nmp)) { 816 continue; 817 } 818 mutex_enter(&mp->mnt_updating); 819 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 820 asyncflag = mp->mnt_flag & MNT_ASYNC; 821 mp->mnt_flag &= ~MNT_ASYNC; 822 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 823 if (asyncflag) 824 mp->mnt_flag |= MNT_ASYNC; 825 } 826 mutex_exit(&mp->mnt_updating); 827 vfs_unbusy(mp, false, &nmp); 828 } 829 mutex_exit(&mountlist_lock); 830 #ifdef DEBUG 831 if (syncprt) 832 vfs_bufstats(); 833 #endif /* DEBUG */ 834 return (0); 835 } 836 837 /* 838 * Change filesystem quotas. 839 */ 840 /* ARGSUSED */ 841 int 842 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 843 { 844 /* { 845 syscallarg(const char *) path; 846 syscallarg(int) cmd; 847 syscallarg(int) uid; 848 syscallarg(void *) arg; 849 } */ 850 struct mount *mp; 851 int error; 852 struct vnode *vp; 853 854 error = namei_simple_user(SCARG(uap, path), 855 NSM_FOLLOW_TRYEMULROOT, &vp); 856 if (error != 0) 857 return (error); 858 mp = vp->v_mount; 859 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 860 SCARG(uap, arg)); 861 vrele(vp); 862 return (error); 863 } 864 865 int 866 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 867 int root) 868 { 869 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 870 int error = 0; 871 872 /* 873 * If MNT_NOWAIT or MNT_LAZY is specified, do not 874 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 875 * overrides MNT_NOWAIT. 876 */ 877 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 878 (flags != MNT_WAIT && flags != 0)) { 879 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 880 goto done; 881 } 882 883 /* Get the filesystem stats now */ 884 memset(sp, 0, sizeof(*sp)); 885 if ((error = VFS_STATVFS(mp, sp)) != 0) { 886 return error; 887 } 888 889 if (cwdi->cwdi_rdir == NULL) 890 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 891 done: 892 if (cwdi->cwdi_rdir != NULL) { 893 size_t len; 894 char *bp; 895 char c; 896 char *path = PNBUF_GET(); 897 898 bp = path + MAXPATHLEN; 899 *--bp = '\0'; 900 rw_enter(&cwdi->cwdi_lock, RW_READER); 901 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 902 MAXPATHLEN / 2, 0, l); 903 rw_exit(&cwdi->cwdi_lock); 904 if (error) { 905 PNBUF_PUT(path); 906 return error; 907 } 908 len = strlen(bp); 909 if (len != 1) { 910 /* 911 * for mount points that are below our root, we can see 912 * them, so we fix up the pathname and return them. The 913 * rest we cannot see, so we don't allow viewing the 914 * data. 915 */ 916 if (strncmp(bp, sp->f_mntonname, len) == 0 && 917 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 918 (void)strlcpy(sp->f_mntonname, 919 c == '\0' ? "/" : &sp->f_mntonname[len], 920 sizeof(sp->f_mntonname)); 921 } else { 922 if (root) 923 (void)strlcpy(sp->f_mntonname, "/", 924 sizeof(sp->f_mntonname)); 925 else 926 error = EPERM; 927 } 928 } 929 PNBUF_PUT(path); 930 } 931 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 932 return error; 933 } 934 935 /* 936 * Get filesystem statistics by path. 937 */ 938 int 939 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 940 { 941 struct mount *mp; 942 int error; 943 struct vnode *vp; 944 945 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 946 if (error != 0) 947 return error; 948 mp = vp->v_mount; 949 error = dostatvfs(mp, sb, l, flags, 1); 950 vrele(vp); 951 return error; 952 } 953 954 /* ARGSUSED */ 955 int 956 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 957 { 958 /* { 959 syscallarg(const char *) path; 960 syscallarg(struct statvfs *) buf; 961 syscallarg(int) flags; 962 } */ 963 struct statvfs *sb; 964 int error; 965 966 sb = STATVFSBUF_GET(); 967 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 968 if (error == 0) 969 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 970 STATVFSBUF_PUT(sb); 971 return error; 972 } 973 974 /* 975 * Get filesystem statistics by fd. 976 */ 977 int 978 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 979 { 980 file_t *fp; 981 struct mount *mp; 982 int error; 983 984 /* fd_getvnode() will use the descriptor for us */ 985 if ((error = fd_getvnode(fd, &fp)) != 0) 986 return (error); 987 mp = ((struct vnode *)fp->f_data)->v_mount; 988 error = dostatvfs(mp, sb, curlwp, flags, 1); 989 fd_putfile(fd); 990 return error; 991 } 992 993 /* ARGSUSED */ 994 int 995 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 996 { 997 /* { 998 syscallarg(int) fd; 999 syscallarg(struct statvfs *) buf; 1000 syscallarg(int) flags; 1001 } */ 1002 struct statvfs *sb; 1003 int error; 1004 1005 sb = STATVFSBUF_GET(); 1006 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1007 if (error == 0) 1008 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1009 STATVFSBUF_PUT(sb); 1010 return error; 1011 } 1012 1013 1014 /* 1015 * Get statistics on all filesystems. 1016 */ 1017 int 1018 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1019 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1020 register_t *retval) 1021 { 1022 int root = 0; 1023 struct proc *p = l->l_proc; 1024 struct mount *mp, *nmp; 1025 struct statvfs *sb; 1026 size_t count, maxcount; 1027 int error = 0; 1028 1029 sb = STATVFSBUF_GET(); 1030 maxcount = bufsize / entry_sz; 1031 mutex_enter(&mountlist_lock); 1032 count = 0; 1033 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1034 mp = nmp) { 1035 if (vfs_busy(mp, &nmp)) { 1036 continue; 1037 } 1038 if (sfsp && count < maxcount) { 1039 error = dostatvfs(mp, sb, l, flags, 0); 1040 if (error) { 1041 vfs_unbusy(mp, false, &nmp); 1042 error = 0; 1043 continue; 1044 } 1045 error = copyfn(sb, sfsp, entry_sz); 1046 if (error) { 1047 vfs_unbusy(mp, false, NULL); 1048 goto out; 1049 } 1050 sfsp = (char *)sfsp + entry_sz; 1051 root |= strcmp(sb->f_mntonname, "/") == 0; 1052 } 1053 count++; 1054 vfs_unbusy(mp, false, &nmp); 1055 } 1056 mutex_exit(&mountlist_lock); 1057 1058 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1059 /* 1060 * fake a root entry 1061 */ 1062 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1063 sb, l, flags, 1); 1064 if (error != 0) 1065 goto out; 1066 if (sfsp) { 1067 error = copyfn(sb, sfsp, entry_sz); 1068 if (error != 0) 1069 goto out; 1070 } 1071 count++; 1072 } 1073 if (sfsp && count > maxcount) 1074 *retval = maxcount; 1075 else 1076 *retval = count; 1077 out: 1078 STATVFSBUF_PUT(sb); 1079 return error; 1080 } 1081 1082 int 1083 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1084 { 1085 /* { 1086 syscallarg(struct statvfs *) buf; 1087 syscallarg(size_t) bufsize; 1088 syscallarg(int) flags; 1089 } */ 1090 1091 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1092 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1093 } 1094 1095 /* 1096 * Change current working directory to a given file descriptor. 1097 */ 1098 /* ARGSUSED */ 1099 int 1100 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1101 { 1102 /* { 1103 syscallarg(int) fd; 1104 } */ 1105 struct proc *p = l->l_proc; 1106 struct cwdinfo *cwdi; 1107 struct vnode *vp, *tdp; 1108 struct mount *mp; 1109 file_t *fp; 1110 int error, fd; 1111 1112 /* fd_getvnode() will use the descriptor for us */ 1113 fd = SCARG(uap, fd); 1114 if ((error = fd_getvnode(fd, &fp)) != 0) 1115 return (error); 1116 vp = fp->f_data; 1117 1118 vref(vp); 1119 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1120 if (vp->v_type != VDIR) 1121 error = ENOTDIR; 1122 else 1123 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1124 if (error) { 1125 vput(vp); 1126 goto out; 1127 } 1128 while ((mp = vp->v_mountedhere) != NULL) { 1129 error = vfs_busy(mp, NULL); 1130 vput(vp); 1131 if (error != 0) 1132 goto out; 1133 error = VFS_ROOT(mp, &tdp); 1134 vfs_unbusy(mp, false, NULL); 1135 if (error) 1136 goto out; 1137 vp = tdp; 1138 } 1139 VOP_UNLOCK(vp); 1140 1141 /* 1142 * Disallow changing to a directory not under the process's 1143 * current root directory (if there is one). 1144 */ 1145 cwdi = p->p_cwdi; 1146 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1147 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1148 vrele(vp); 1149 error = EPERM; /* operation not permitted */ 1150 } else { 1151 vrele(cwdi->cwdi_cdir); 1152 cwdi->cwdi_cdir = vp; 1153 } 1154 rw_exit(&cwdi->cwdi_lock); 1155 1156 out: 1157 fd_putfile(fd); 1158 return (error); 1159 } 1160 1161 /* 1162 * Change this process's notion of the root directory to a given file 1163 * descriptor. 1164 */ 1165 int 1166 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1167 { 1168 struct proc *p = l->l_proc; 1169 struct vnode *vp; 1170 file_t *fp; 1171 int error, fd = SCARG(uap, fd); 1172 1173 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1174 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1175 return error; 1176 /* fd_getvnode() will use the descriptor for us */ 1177 if ((error = fd_getvnode(fd, &fp)) != 0) 1178 return error; 1179 vp = fp->f_data; 1180 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1181 if (vp->v_type != VDIR) 1182 error = ENOTDIR; 1183 else 1184 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1185 VOP_UNLOCK(vp); 1186 if (error) 1187 goto out; 1188 vref(vp); 1189 1190 change_root(p->p_cwdi, vp, l); 1191 1192 out: 1193 fd_putfile(fd); 1194 return (error); 1195 } 1196 1197 /* 1198 * Change current working directory (``.''). 1199 */ 1200 /* ARGSUSED */ 1201 int 1202 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1203 { 1204 /* { 1205 syscallarg(const char *) path; 1206 } */ 1207 struct proc *p = l->l_proc; 1208 struct cwdinfo *cwdi; 1209 int error; 1210 struct vnode *vp; 1211 1212 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1213 &vp, l)) != 0) 1214 return (error); 1215 cwdi = p->p_cwdi; 1216 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1217 vrele(cwdi->cwdi_cdir); 1218 cwdi->cwdi_cdir = vp; 1219 rw_exit(&cwdi->cwdi_lock); 1220 return (0); 1221 } 1222 1223 /* 1224 * Change notion of root (``/'') directory. 1225 */ 1226 /* ARGSUSED */ 1227 int 1228 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1229 { 1230 /* { 1231 syscallarg(const char *) path; 1232 } */ 1233 struct proc *p = l->l_proc; 1234 int error; 1235 struct vnode *vp; 1236 1237 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1238 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1239 return (error); 1240 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1241 &vp, l)) != 0) 1242 return (error); 1243 1244 change_root(p->p_cwdi, vp, l); 1245 1246 return (0); 1247 } 1248 1249 /* 1250 * Common routine for chroot and fchroot. 1251 * NB: callers need to properly authorize the change root operation. 1252 */ 1253 void 1254 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1255 { 1256 1257 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1258 if (cwdi->cwdi_rdir != NULL) 1259 vrele(cwdi->cwdi_rdir); 1260 cwdi->cwdi_rdir = vp; 1261 1262 /* 1263 * Prevent escaping from chroot by putting the root under 1264 * the working directory. Silently chdir to / if we aren't 1265 * already there. 1266 */ 1267 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1268 /* 1269 * XXX would be more failsafe to change directory to a 1270 * deadfs node here instead 1271 */ 1272 vrele(cwdi->cwdi_cdir); 1273 vref(vp); 1274 cwdi->cwdi_cdir = vp; 1275 } 1276 rw_exit(&cwdi->cwdi_lock); 1277 } 1278 1279 /* 1280 * Common routine for chroot and chdir. 1281 */ 1282 int 1283 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1284 { 1285 struct nameidata nd; 1286 int error; 1287 1288 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, where, 1289 path); 1290 if ((error = namei(&nd)) != 0) 1291 return (error); 1292 *vpp = nd.ni_vp; 1293 if ((*vpp)->v_type != VDIR) 1294 error = ENOTDIR; 1295 else 1296 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1297 1298 if (error) 1299 vput(*vpp); 1300 else 1301 VOP_UNLOCK(*vpp); 1302 return (error); 1303 } 1304 1305 /* 1306 * Check permissions, allocate an open file structure, 1307 * and call the device open routine if any. 1308 */ 1309 int 1310 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1311 { 1312 /* { 1313 syscallarg(const char *) path; 1314 syscallarg(int) flags; 1315 syscallarg(int) mode; 1316 } */ 1317 struct proc *p = l->l_proc; 1318 struct cwdinfo *cwdi = p->p_cwdi; 1319 file_t *fp; 1320 struct vnode *vp; 1321 int flags, cmode; 1322 int type, indx, error; 1323 struct flock lf; 1324 struct nameidata nd; 1325 1326 flags = FFLAGS(SCARG(uap, flags)); 1327 if ((flags & (FREAD | FWRITE)) == 0) 1328 return (EINVAL); 1329 if ((error = fd_allocfile(&fp, &indx)) != 0) 1330 return (error); 1331 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1332 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1333 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1334 SCARG(uap, path)); 1335 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1336 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1337 fd_abort(p, fp, indx); 1338 if ((error == EDUPFD || error == EMOVEFD) && 1339 l->l_dupfd >= 0 && /* XXX from fdopen */ 1340 (error = 1341 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1342 *retval = indx; 1343 return (0); 1344 } 1345 if (error == ERESTART) 1346 error = EINTR; 1347 return (error); 1348 } 1349 1350 l->l_dupfd = 0; 1351 vp = nd.ni_vp; 1352 fp->f_flag = flags & FMASK; 1353 fp->f_type = DTYPE_VNODE; 1354 fp->f_ops = &vnops; 1355 fp->f_data = vp; 1356 if (flags & (O_EXLOCK | O_SHLOCK)) { 1357 lf.l_whence = SEEK_SET; 1358 lf.l_start = 0; 1359 lf.l_len = 0; 1360 if (flags & O_EXLOCK) 1361 lf.l_type = F_WRLCK; 1362 else 1363 lf.l_type = F_RDLCK; 1364 type = F_FLOCK; 1365 if ((flags & FNONBLOCK) == 0) 1366 type |= F_WAIT; 1367 VOP_UNLOCK(vp); 1368 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1369 if (error) { 1370 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1371 fd_abort(p, fp, indx); 1372 return (error); 1373 } 1374 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1375 atomic_or_uint(&fp->f_flag, FHASLOCK); 1376 } 1377 VOP_UNLOCK(vp); 1378 *retval = indx; 1379 fd_affix(p, fp, indx); 1380 return (0); 1381 } 1382 1383 static void 1384 vfs__fhfree(fhandle_t *fhp) 1385 { 1386 size_t fhsize; 1387 1388 if (fhp == NULL) { 1389 return; 1390 } 1391 fhsize = FHANDLE_SIZE(fhp); 1392 kmem_free(fhp, fhsize); 1393 } 1394 1395 /* 1396 * vfs_composefh: compose a filehandle. 1397 */ 1398 1399 int 1400 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1401 { 1402 struct mount *mp; 1403 struct fid *fidp; 1404 int error; 1405 size_t needfhsize; 1406 size_t fidsize; 1407 1408 mp = vp->v_mount; 1409 fidp = NULL; 1410 if (*fh_size < FHANDLE_SIZE_MIN) { 1411 fidsize = 0; 1412 } else { 1413 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1414 if (fhp != NULL) { 1415 memset(fhp, 0, *fh_size); 1416 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1417 fidp = &fhp->fh_fid; 1418 } 1419 } 1420 error = VFS_VPTOFH(vp, fidp, &fidsize); 1421 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1422 if (error == 0 && *fh_size < needfhsize) { 1423 error = E2BIG; 1424 } 1425 *fh_size = needfhsize; 1426 return error; 1427 } 1428 1429 int 1430 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1431 { 1432 struct mount *mp; 1433 fhandle_t *fhp; 1434 size_t fhsize; 1435 size_t fidsize; 1436 int error; 1437 1438 *fhpp = NULL; 1439 mp = vp->v_mount; 1440 fidsize = 0; 1441 error = VFS_VPTOFH(vp, NULL, &fidsize); 1442 KASSERT(error != 0); 1443 if (error != E2BIG) { 1444 goto out; 1445 } 1446 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1447 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1448 if (fhp == NULL) { 1449 error = ENOMEM; 1450 goto out; 1451 } 1452 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1453 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1454 if (error == 0) { 1455 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1456 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1457 *fhpp = fhp; 1458 } else { 1459 kmem_free(fhp, fhsize); 1460 } 1461 out: 1462 return error; 1463 } 1464 1465 void 1466 vfs_composefh_free(fhandle_t *fhp) 1467 { 1468 1469 vfs__fhfree(fhp); 1470 } 1471 1472 /* 1473 * vfs_fhtovp: lookup a vnode by a filehandle. 1474 */ 1475 1476 int 1477 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1478 { 1479 struct mount *mp; 1480 int error; 1481 1482 *vpp = NULL; 1483 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1484 if (mp == NULL) { 1485 error = ESTALE; 1486 goto out; 1487 } 1488 if (mp->mnt_op->vfs_fhtovp == NULL) { 1489 error = EOPNOTSUPP; 1490 goto out; 1491 } 1492 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1493 out: 1494 return error; 1495 } 1496 1497 /* 1498 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1499 * the needed size. 1500 */ 1501 1502 int 1503 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1504 { 1505 fhandle_t *fhp; 1506 int error; 1507 1508 *fhpp = NULL; 1509 if (fhsize > FHANDLE_SIZE_MAX) { 1510 return EINVAL; 1511 } 1512 if (fhsize < FHANDLE_SIZE_MIN) { 1513 return EINVAL; 1514 } 1515 again: 1516 fhp = kmem_alloc(fhsize, KM_SLEEP); 1517 if (fhp == NULL) { 1518 return ENOMEM; 1519 } 1520 error = copyin(ufhp, fhp, fhsize); 1521 if (error == 0) { 1522 /* XXX this check shouldn't be here */ 1523 if (FHANDLE_SIZE(fhp) == fhsize) { 1524 *fhpp = fhp; 1525 return 0; 1526 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1527 /* 1528 * a kludge for nfsv2 padded handles. 1529 */ 1530 size_t sz; 1531 1532 sz = FHANDLE_SIZE(fhp); 1533 kmem_free(fhp, fhsize); 1534 fhsize = sz; 1535 goto again; 1536 } else { 1537 /* 1538 * userland told us wrong size. 1539 */ 1540 error = EINVAL; 1541 } 1542 } 1543 kmem_free(fhp, fhsize); 1544 return error; 1545 } 1546 1547 void 1548 vfs_copyinfh_free(fhandle_t *fhp) 1549 { 1550 1551 vfs__fhfree(fhp); 1552 } 1553 1554 /* 1555 * Get file handle system call 1556 */ 1557 int 1558 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1559 { 1560 /* { 1561 syscallarg(char *) fname; 1562 syscallarg(fhandle_t *) fhp; 1563 syscallarg(size_t *) fh_size; 1564 } */ 1565 struct vnode *vp; 1566 fhandle_t *fh; 1567 int error; 1568 struct nameidata nd; 1569 size_t sz; 1570 size_t usz; 1571 1572 /* 1573 * Must be super user 1574 */ 1575 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1576 0, NULL, NULL, NULL); 1577 if (error) 1578 return (error); 1579 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1580 SCARG(uap, fname)); 1581 error = namei(&nd); 1582 if (error) 1583 return (error); 1584 vp = nd.ni_vp; 1585 error = vfs_composefh_alloc(vp, &fh); 1586 vput(vp); 1587 if (error != 0) { 1588 goto out; 1589 } 1590 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1591 if (error != 0) { 1592 goto out; 1593 } 1594 sz = FHANDLE_SIZE(fh); 1595 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1596 if (error != 0) { 1597 goto out; 1598 } 1599 if (usz >= sz) { 1600 error = copyout(fh, SCARG(uap, fhp), sz); 1601 } else { 1602 error = E2BIG; 1603 } 1604 out: 1605 vfs_composefh_free(fh); 1606 return (error); 1607 } 1608 1609 /* 1610 * Open a file given a file handle. 1611 * 1612 * Check permissions, allocate an open file structure, 1613 * and call the device open routine if any. 1614 */ 1615 1616 int 1617 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1618 register_t *retval) 1619 { 1620 file_t *fp; 1621 struct vnode *vp = NULL; 1622 kauth_cred_t cred = l->l_cred; 1623 file_t *nfp; 1624 int type, indx, error=0; 1625 struct flock lf; 1626 struct vattr va; 1627 fhandle_t *fh; 1628 int flags; 1629 proc_t *p; 1630 1631 p = curproc; 1632 1633 /* 1634 * Must be super user 1635 */ 1636 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1637 0, NULL, NULL, NULL))) 1638 return (error); 1639 1640 flags = FFLAGS(oflags); 1641 if ((flags & (FREAD | FWRITE)) == 0) 1642 return (EINVAL); 1643 if ((flags & O_CREAT)) 1644 return (EINVAL); 1645 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1646 return (error); 1647 fp = nfp; 1648 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1649 if (error != 0) { 1650 goto bad; 1651 } 1652 error = vfs_fhtovp(fh, &vp); 1653 if (error != 0) { 1654 goto bad; 1655 } 1656 1657 /* Now do an effective vn_open */ 1658 1659 if (vp->v_type == VSOCK) { 1660 error = EOPNOTSUPP; 1661 goto bad; 1662 } 1663 error = vn_openchk(vp, cred, flags); 1664 if (error != 0) 1665 goto bad; 1666 if (flags & O_TRUNC) { 1667 VOP_UNLOCK(vp); /* XXX */ 1668 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1669 vattr_null(&va); 1670 va.va_size = 0; 1671 error = VOP_SETATTR(vp, &va, cred); 1672 if (error) 1673 goto bad; 1674 } 1675 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1676 goto bad; 1677 if (flags & FWRITE) { 1678 mutex_enter(&vp->v_interlock); 1679 vp->v_writecount++; 1680 mutex_exit(&vp->v_interlock); 1681 } 1682 1683 /* done with modified vn_open, now finish what sys_open does. */ 1684 1685 fp->f_flag = flags & FMASK; 1686 fp->f_type = DTYPE_VNODE; 1687 fp->f_ops = &vnops; 1688 fp->f_data = vp; 1689 if (flags & (O_EXLOCK | O_SHLOCK)) { 1690 lf.l_whence = SEEK_SET; 1691 lf.l_start = 0; 1692 lf.l_len = 0; 1693 if (flags & O_EXLOCK) 1694 lf.l_type = F_WRLCK; 1695 else 1696 lf.l_type = F_RDLCK; 1697 type = F_FLOCK; 1698 if ((flags & FNONBLOCK) == 0) 1699 type |= F_WAIT; 1700 VOP_UNLOCK(vp); 1701 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1702 if (error) { 1703 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1704 fd_abort(p, fp, indx); 1705 return (error); 1706 } 1707 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1708 atomic_or_uint(&fp->f_flag, FHASLOCK); 1709 } 1710 VOP_UNLOCK(vp); 1711 *retval = indx; 1712 fd_affix(p, fp, indx); 1713 vfs_copyinfh_free(fh); 1714 return (0); 1715 1716 bad: 1717 fd_abort(p, fp, indx); 1718 if (vp != NULL) 1719 vput(vp); 1720 vfs_copyinfh_free(fh); 1721 return (error); 1722 } 1723 1724 int 1725 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1726 { 1727 /* { 1728 syscallarg(const void *) fhp; 1729 syscallarg(size_t) fh_size; 1730 syscallarg(int) flags; 1731 } */ 1732 1733 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1734 SCARG(uap, flags), retval); 1735 } 1736 1737 int 1738 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1739 { 1740 int error; 1741 fhandle_t *fh; 1742 struct vnode *vp; 1743 1744 /* 1745 * Must be super user 1746 */ 1747 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1748 0, NULL, NULL, NULL))) 1749 return (error); 1750 1751 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1752 if (error != 0) 1753 return error; 1754 1755 error = vfs_fhtovp(fh, &vp); 1756 vfs_copyinfh_free(fh); 1757 if (error != 0) 1758 return error; 1759 1760 error = vn_stat(vp, sb); 1761 vput(vp); 1762 return error; 1763 } 1764 1765 1766 /* ARGSUSED */ 1767 int 1768 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 1769 { 1770 /* { 1771 syscallarg(const void *) fhp; 1772 syscallarg(size_t) fh_size; 1773 syscallarg(struct stat *) sb; 1774 } */ 1775 struct stat sb; 1776 int error; 1777 1778 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1779 if (error) 1780 return error; 1781 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1782 } 1783 1784 int 1785 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1786 int flags) 1787 { 1788 fhandle_t *fh; 1789 struct mount *mp; 1790 struct vnode *vp; 1791 int error; 1792 1793 /* 1794 * Must be super user 1795 */ 1796 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1797 0, NULL, NULL, NULL))) 1798 return error; 1799 1800 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1801 if (error != 0) 1802 return error; 1803 1804 error = vfs_fhtovp(fh, &vp); 1805 vfs_copyinfh_free(fh); 1806 if (error != 0) 1807 return error; 1808 1809 mp = vp->v_mount; 1810 error = dostatvfs(mp, sb, l, flags, 1); 1811 vput(vp); 1812 return error; 1813 } 1814 1815 /* ARGSUSED */ 1816 int 1817 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1818 { 1819 /* { 1820 syscallarg(const void *) fhp; 1821 syscallarg(size_t) fh_size; 1822 syscallarg(struct statvfs *) buf; 1823 syscallarg(int) flags; 1824 } */ 1825 struct statvfs *sb = STATVFSBUF_GET(); 1826 int error; 1827 1828 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1829 SCARG(uap, flags)); 1830 if (error == 0) 1831 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1832 STATVFSBUF_PUT(sb); 1833 return error; 1834 } 1835 1836 /* 1837 * Create a special file. 1838 */ 1839 /* ARGSUSED */ 1840 int 1841 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 1842 register_t *retval) 1843 { 1844 /* { 1845 syscallarg(const char *) path; 1846 syscallarg(mode_t) mode; 1847 syscallarg(dev_t) dev; 1848 } */ 1849 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 1850 SCARG(uap, dev), retval, UIO_USERSPACE); 1851 } 1852 1853 int 1854 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 1855 register_t *retval, enum uio_seg seg) 1856 { 1857 struct proc *p = l->l_proc; 1858 struct vnode *vp; 1859 struct vattr vattr; 1860 int error, optype; 1861 struct nameidata nd; 1862 char *path; 1863 const char *cpath; 1864 1865 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1866 0, NULL, NULL, NULL)) != 0) 1867 return (error); 1868 1869 optype = VOP_MKNOD_DESCOFFSET; 1870 1871 VERIEXEC_PATH_GET(pathname, seg, cpath, path); 1872 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1873 1874 if ((error = namei(&nd)) != 0) 1875 goto out; 1876 vp = nd.ni_vp; 1877 if (vp != NULL) 1878 error = EEXIST; 1879 else { 1880 vattr_null(&vattr); 1881 /* We will read cwdi->cwdi_cmask unlocked. */ 1882 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1883 vattr.va_rdev = dev; 1884 1885 switch (mode & S_IFMT) { 1886 case S_IFMT: /* used by badsect to flag bad sectors */ 1887 vattr.va_type = VBAD; 1888 break; 1889 case S_IFCHR: 1890 vattr.va_type = VCHR; 1891 break; 1892 case S_IFBLK: 1893 vattr.va_type = VBLK; 1894 break; 1895 case S_IFWHT: 1896 optype = VOP_WHITEOUT_DESCOFFSET; 1897 break; 1898 case S_IFREG: 1899 #if NVERIEXEC > 0 1900 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1901 O_CREAT); 1902 #endif /* NVERIEXEC > 0 */ 1903 vattr.va_type = VREG; 1904 vattr.va_rdev = VNOVAL; 1905 optype = VOP_CREATE_DESCOFFSET; 1906 break; 1907 default: 1908 error = EINVAL; 1909 break; 1910 } 1911 } 1912 if (!error) { 1913 switch (optype) { 1914 case VOP_WHITEOUT_DESCOFFSET: 1915 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1916 if (error) 1917 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1918 vput(nd.ni_dvp); 1919 break; 1920 1921 case VOP_MKNOD_DESCOFFSET: 1922 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1923 &nd.ni_cnd, &vattr); 1924 if (error == 0) 1925 vput(nd.ni_vp); 1926 break; 1927 1928 case VOP_CREATE_DESCOFFSET: 1929 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1930 &nd.ni_cnd, &vattr); 1931 if (error == 0) 1932 vput(nd.ni_vp); 1933 break; 1934 } 1935 } else { 1936 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1937 if (nd.ni_dvp == vp) 1938 vrele(nd.ni_dvp); 1939 else 1940 vput(nd.ni_dvp); 1941 if (vp) 1942 vrele(vp); 1943 } 1944 out: 1945 VERIEXEC_PATH_PUT(path); 1946 return (error); 1947 } 1948 1949 /* 1950 * Create a named pipe. 1951 */ 1952 /* ARGSUSED */ 1953 int 1954 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1955 { 1956 /* { 1957 syscallarg(const char *) path; 1958 syscallarg(int) mode; 1959 } */ 1960 struct proc *p = l->l_proc; 1961 struct vattr vattr; 1962 int error; 1963 struct nameidata nd; 1964 1965 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1966 SCARG(uap, path)); 1967 if ((error = namei(&nd)) != 0) 1968 return (error); 1969 if (nd.ni_vp != NULL) { 1970 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1971 if (nd.ni_dvp == nd.ni_vp) 1972 vrele(nd.ni_dvp); 1973 else 1974 vput(nd.ni_dvp); 1975 vrele(nd.ni_vp); 1976 return (EEXIST); 1977 } 1978 vattr_null(&vattr); 1979 vattr.va_type = VFIFO; 1980 /* We will read cwdi->cwdi_cmask unlocked. */ 1981 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1982 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1983 if (error == 0) 1984 vput(nd.ni_vp); 1985 return (error); 1986 } 1987 1988 /* 1989 * Make a hard file link. 1990 */ 1991 /* ARGSUSED */ 1992 int 1993 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 1994 { 1995 /* { 1996 syscallarg(const char *) path; 1997 syscallarg(const char *) link; 1998 } */ 1999 struct vnode *vp; 2000 struct nameidata nd; 2001 int error; 2002 2003 error = namei_simple_user(SCARG(uap, path), 2004 NSM_FOLLOW_TRYEMULROOT, &vp); 2005 if (error != 0) 2006 return (error); 2007 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2008 SCARG(uap, link)); 2009 if ((error = namei(&nd)) != 0) 2010 goto out; 2011 if (nd.ni_vp) { 2012 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2013 if (nd.ni_dvp == nd.ni_vp) 2014 vrele(nd.ni_dvp); 2015 else 2016 vput(nd.ni_dvp); 2017 vrele(nd.ni_vp); 2018 error = EEXIST; 2019 goto out; 2020 } 2021 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2022 out: 2023 vrele(vp); 2024 return (error); 2025 } 2026 2027 int 2028 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2029 { 2030 struct proc *p = curproc; 2031 struct vattr vattr; 2032 char *path; 2033 int error; 2034 struct nameidata nd; 2035 2036 path = PNBUF_GET(); 2037 if (seg == UIO_USERSPACE) { 2038 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2039 goto out; 2040 } else { 2041 KASSERT(strlen(patharg) < MAXPATHLEN); 2042 strcpy(path, patharg); 2043 } 2044 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, link); 2045 if ((error = namei(&nd)) != 0) 2046 goto out; 2047 if (nd.ni_vp) { 2048 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2049 if (nd.ni_dvp == nd.ni_vp) 2050 vrele(nd.ni_dvp); 2051 else 2052 vput(nd.ni_dvp); 2053 vrele(nd.ni_vp); 2054 error = EEXIST; 2055 goto out; 2056 } 2057 vattr_null(&vattr); 2058 vattr.va_type = VLNK; 2059 /* We will read cwdi->cwdi_cmask unlocked. */ 2060 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2061 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2062 if (error == 0) 2063 vput(nd.ni_vp); 2064 out: 2065 PNBUF_PUT(path); 2066 return (error); 2067 } 2068 2069 /* 2070 * Make a symbolic link. 2071 */ 2072 /* ARGSUSED */ 2073 int 2074 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2075 { 2076 /* { 2077 syscallarg(const char *) path; 2078 syscallarg(const char *) link; 2079 } */ 2080 2081 return do_sys_symlink(SCARG(uap, path), SCARG(uap, link), 2082 UIO_USERSPACE); 2083 } 2084 2085 /* 2086 * Delete a whiteout from the filesystem. 2087 */ 2088 /* ARGSUSED */ 2089 int 2090 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2091 { 2092 /* { 2093 syscallarg(const char *) path; 2094 } */ 2095 int error; 2096 struct nameidata nd; 2097 2098 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2099 UIO_USERSPACE, SCARG(uap, path)); 2100 error = namei(&nd); 2101 if (error) 2102 return (error); 2103 2104 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2105 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2106 if (nd.ni_dvp == nd.ni_vp) 2107 vrele(nd.ni_dvp); 2108 else 2109 vput(nd.ni_dvp); 2110 if (nd.ni_vp) 2111 vrele(nd.ni_vp); 2112 return (EEXIST); 2113 } 2114 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2115 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2116 vput(nd.ni_dvp); 2117 return (error); 2118 } 2119 2120 /* 2121 * Delete a name from the filesystem. 2122 */ 2123 /* ARGSUSED */ 2124 int 2125 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2126 { 2127 /* { 2128 syscallarg(const char *) path; 2129 } */ 2130 2131 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2132 } 2133 2134 int 2135 do_sys_unlink(const char *arg, enum uio_seg seg) 2136 { 2137 struct vnode *vp; 2138 int error; 2139 struct nameidata nd; 2140 char *path; 2141 const char *cpath; 2142 2143 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2144 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2145 2146 if ((error = namei(&nd)) != 0) 2147 goto out; 2148 vp = nd.ni_vp; 2149 2150 /* 2151 * The root of a mounted filesystem cannot be deleted. 2152 */ 2153 if (vp->v_vflag & VV_ROOT) { 2154 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2155 if (nd.ni_dvp == vp) 2156 vrele(nd.ni_dvp); 2157 else 2158 vput(nd.ni_dvp); 2159 vput(vp); 2160 error = EBUSY; 2161 goto out; 2162 } 2163 2164 #if NVERIEXEC > 0 2165 /* Handle remove requests for veriexec entries. */ 2166 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2167 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2168 if (nd.ni_dvp == vp) 2169 vrele(nd.ni_dvp); 2170 else 2171 vput(nd.ni_dvp); 2172 vput(vp); 2173 goto out; 2174 } 2175 #endif /* NVERIEXEC > 0 */ 2176 2177 #ifdef FILEASSOC 2178 (void)fileassoc_file_delete(vp); 2179 #endif /* FILEASSOC */ 2180 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2181 out: 2182 VERIEXEC_PATH_PUT(path); 2183 return (error); 2184 } 2185 2186 /* 2187 * Reposition read/write file offset. 2188 */ 2189 int 2190 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2191 { 2192 /* { 2193 syscallarg(int) fd; 2194 syscallarg(int) pad; 2195 syscallarg(off_t) offset; 2196 syscallarg(int) whence; 2197 } */ 2198 kauth_cred_t cred = l->l_cred; 2199 file_t *fp; 2200 struct vnode *vp; 2201 struct vattr vattr; 2202 off_t newoff; 2203 int error, fd; 2204 2205 fd = SCARG(uap, fd); 2206 2207 if ((fp = fd_getfile(fd)) == NULL) 2208 return (EBADF); 2209 2210 vp = fp->f_data; 2211 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2212 error = ESPIPE; 2213 goto out; 2214 } 2215 2216 switch (SCARG(uap, whence)) { 2217 case SEEK_CUR: 2218 newoff = fp->f_offset + SCARG(uap, offset); 2219 break; 2220 case SEEK_END: 2221 error = VOP_GETATTR(vp, &vattr, cred); 2222 if (error) { 2223 goto out; 2224 } 2225 newoff = SCARG(uap, offset) + vattr.va_size; 2226 break; 2227 case SEEK_SET: 2228 newoff = SCARG(uap, offset); 2229 break; 2230 default: 2231 error = EINVAL; 2232 goto out; 2233 } 2234 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2235 *(off_t *)retval = fp->f_offset = newoff; 2236 } 2237 out: 2238 fd_putfile(fd); 2239 return (error); 2240 } 2241 2242 /* 2243 * Positional read system call. 2244 */ 2245 int 2246 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2247 { 2248 /* { 2249 syscallarg(int) fd; 2250 syscallarg(void *) buf; 2251 syscallarg(size_t) nbyte; 2252 syscallarg(off_t) offset; 2253 } */ 2254 file_t *fp; 2255 struct vnode *vp; 2256 off_t offset; 2257 int error, fd = SCARG(uap, fd); 2258 2259 if ((fp = fd_getfile(fd)) == NULL) 2260 return (EBADF); 2261 2262 if ((fp->f_flag & FREAD) == 0) { 2263 fd_putfile(fd); 2264 return (EBADF); 2265 } 2266 2267 vp = fp->f_data; 2268 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2269 error = ESPIPE; 2270 goto out; 2271 } 2272 2273 offset = SCARG(uap, offset); 2274 2275 /* 2276 * XXX This works because no file systems actually 2277 * XXX take any action on the seek operation. 2278 */ 2279 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2280 goto out; 2281 2282 /* dofileread() will unuse the descriptor for us */ 2283 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2284 &offset, 0, retval)); 2285 2286 out: 2287 fd_putfile(fd); 2288 return (error); 2289 } 2290 2291 /* 2292 * Positional scatter read system call. 2293 */ 2294 int 2295 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2296 { 2297 /* { 2298 syscallarg(int) fd; 2299 syscallarg(const struct iovec *) iovp; 2300 syscallarg(int) iovcnt; 2301 syscallarg(off_t) offset; 2302 } */ 2303 off_t offset = SCARG(uap, offset); 2304 2305 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2306 SCARG(uap, iovcnt), &offset, 0, retval); 2307 } 2308 2309 /* 2310 * Positional write system call. 2311 */ 2312 int 2313 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2314 { 2315 /* { 2316 syscallarg(int) fd; 2317 syscallarg(const void *) buf; 2318 syscallarg(size_t) nbyte; 2319 syscallarg(off_t) offset; 2320 } */ 2321 file_t *fp; 2322 struct vnode *vp; 2323 off_t offset; 2324 int error, fd = SCARG(uap, fd); 2325 2326 if ((fp = fd_getfile(fd)) == NULL) 2327 return (EBADF); 2328 2329 if ((fp->f_flag & FWRITE) == 0) { 2330 fd_putfile(fd); 2331 return (EBADF); 2332 } 2333 2334 vp = fp->f_data; 2335 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2336 error = ESPIPE; 2337 goto out; 2338 } 2339 2340 offset = SCARG(uap, offset); 2341 2342 /* 2343 * XXX This works because no file systems actually 2344 * XXX take any action on the seek operation. 2345 */ 2346 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2347 goto out; 2348 2349 /* dofilewrite() will unuse the descriptor for us */ 2350 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2351 &offset, 0, retval)); 2352 2353 out: 2354 fd_putfile(fd); 2355 return (error); 2356 } 2357 2358 /* 2359 * Positional gather write system call. 2360 */ 2361 int 2362 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2363 { 2364 /* { 2365 syscallarg(int) fd; 2366 syscallarg(const struct iovec *) iovp; 2367 syscallarg(int) iovcnt; 2368 syscallarg(off_t) offset; 2369 } */ 2370 off_t offset = SCARG(uap, offset); 2371 2372 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2373 SCARG(uap, iovcnt), &offset, 0, retval); 2374 } 2375 2376 /* 2377 * Check access permissions. 2378 */ 2379 int 2380 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2381 { 2382 /* { 2383 syscallarg(const char *) path; 2384 syscallarg(int) flags; 2385 } */ 2386 kauth_cred_t cred; 2387 struct vnode *vp; 2388 int error, flags; 2389 struct nameidata nd; 2390 2391 cred = kauth_cred_dup(l->l_cred); 2392 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2393 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2394 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2395 SCARG(uap, path)); 2396 /* Override default credentials */ 2397 nd.ni_cnd.cn_cred = cred; 2398 if ((error = namei(&nd)) != 0) 2399 goto out; 2400 vp = nd.ni_vp; 2401 2402 /* Flags == 0 means only check for existence. */ 2403 if (SCARG(uap, flags)) { 2404 flags = 0; 2405 if (SCARG(uap, flags) & R_OK) 2406 flags |= VREAD; 2407 if (SCARG(uap, flags) & W_OK) 2408 flags |= VWRITE; 2409 if (SCARG(uap, flags) & X_OK) 2410 flags |= VEXEC; 2411 2412 error = VOP_ACCESS(vp, flags, cred); 2413 if (!error && (flags & VWRITE)) 2414 error = vn_writechk(vp); 2415 } 2416 vput(vp); 2417 out: 2418 kauth_cred_free(cred); 2419 return (error); 2420 } 2421 2422 /* 2423 * Common code for all sys_stat functions, including compat versions. 2424 */ 2425 int 2426 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2427 { 2428 int error; 2429 struct nameidata nd; 2430 2431 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2432 UIO_USERSPACE, path); 2433 error = namei(&nd); 2434 if (error != 0) 2435 return error; 2436 error = vn_stat(nd.ni_vp, sb); 2437 vput(nd.ni_vp); 2438 return error; 2439 } 2440 2441 /* 2442 * Get file status; this version follows links. 2443 */ 2444 /* ARGSUSED */ 2445 int 2446 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 2447 { 2448 /* { 2449 syscallarg(const char *) path; 2450 syscallarg(struct stat *) ub; 2451 } */ 2452 struct stat sb; 2453 int error; 2454 2455 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2456 if (error) 2457 return error; 2458 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2459 } 2460 2461 /* 2462 * Get file status; this version does not follow links. 2463 */ 2464 /* ARGSUSED */ 2465 int 2466 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 2467 { 2468 /* { 2469 syscallarg(const char *) path; 2470 syscallarg(struct stat *) ub; 2471 } */ 2472 struct stat sb; 2473 int error; 2474 2475 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2476 if (error) 2477 return error; 2478 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2479 } 2480 2481 /* 2482 * Get configurable pathname variables. 2483 */ 2484 /* ARGSUSED */ 2485 int 2486 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2487 { 2488 /* { 2489 syscallarg(const char *) path; 2490 syscallarg(int) name; 2491 } */ 2492 int error; 2493 struct nameidata nd; 2494 2495 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2496 SCARG(uap, path)); 2497 if ((error = namei(&nd)) != 0) 2498 return (error); 2499 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2500 vput(nd.ni_vp); 2501 return (error); 2502 } 2503 2504 /* 2505 * Return target name of a symbolic link. 2506 */ 2507 /* ARGSUSED */ 2508 int 2509 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2510 { 2511 /* { 2512 syscallarg(const char *) path; 2513 syscallarg(char *) buf; 2514 syscallarg(size_t) count; 2515 } */ 2516 struct vnode *vp; 2517 struct iovec aiov; 2518 struct uio auio; 2519 int error; 2520 struct nameidata nd; 2521 2522 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2523 SCARG(uap, path)); 2524 if ((error = namei(&nd)) != 0) 2525 return (error); 2526 vp = nd.ni_vp; 2527 if (vp->v_type != VLNK) 2528 error = EINVAL; 2529 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2530 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2531 aiov.iov_base = SCARG(uap, buf); 2532 aiov.iov_len = SCARG(uap, count); 2533 auio.uio_iov = &aiov; 2534 auio.uio_iovcnt = 1; 2535 auio.uio_offset = 0; 2536 auio.uio_rw = UIO_READ; 2537 KASSERT(l == curlwp); 2538 auio.uio_vmspace = l->l_proc->p_vmspace; 2539 auio.uio_resid = SCARG(uap, count); 2540 error = VOP_READLINK(vp, &auio, l->l_cred); 2541 } 2542 vput(vp); 2543 *retval = SCARG(uap, count) - auio.uio_resid; 2544 return (error); 2545 } 2546 2547 /* 2548 * Change flags of a file given a path name. 2549 */ 2550 /* ARGSUSED */ 2551 int 2552 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2553 { 2554 /* { 2555 syscallarg(const char *) path; 2556 syscallarg(u_long) flags; 2557 } */ 2558 struct vnode *vp; 2559 int error; 2560 2561 error = namei_simple_user(SCARG(uap, path), 2562 NSM_FOLLOW_TRYEMULROOT, &vp); 2563 if (error != 0) 2564 return (error); 2565 error = change_flags(vp, SCARG(uap, flags), l); 2566 vput(vp); 2567 return (error); 2568 } 2569 2570 /* 2571 * Change flags of a file given a file descriptor. 2572 */ 2573 /* ARGSUSED */ 2574 int 2575 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2576 { 2577 /* { 2578 syscallarg(int) fd; 2579 syscallarg(u_long) flags; 2580 } */ 2581 struct vnode *vp; 2582 file_t *fp; 2583 int error; 2584 2585 /* fd_getvnode() will use the descriptor for us */ 2586 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2587 return (error); 2588 vp = fp->f_data; 2589 error = change_flags(vp, SCARG(uap, flags), l); 2590 VOP_UNLOCK(vp); 2591 fd_putfile(SCARG(uap, fd)); 2592 return (error); 2593 } 2594 2595 /* 2596 * Change flags of a file given a path name; this version does 2597 * not follow links. 2598 */ 2599 int 2600 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2601 { 2602 /* { 2603 syscallarg(const char *) path; 2604 syscallarg(u_long) flags; 2605 } */ 2606 struct vnode *vp; 2607 int error; 2608 2609 error = namei_simple_user(SCARG(uap, path), 2610 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2611 if (error != 0) 2612 return (error); 2613 error = change_flags(vp, SCARG(uap, flags), l); 2614 vput(vp); 2615 return (error); 2616 } 2617 2618 /* 2619 * Common routine to change flags of a file. 2620 */ 2621 int 2622 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2623 { 2624 struct vattr vattr; 2625 int error; 2626 2627 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2628 /* 2629 * Non-superusers cannot change the flags on devices, even if they 2630 * own them. 2631 */ 2632 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2633 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2634 goto out; 2635 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2636 error = EINVAL; 2637 goto out; 2638 } 2639 } 2640 vattr_null(&vattr); 2641 vattr.va_flags = flags; 2642 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2643 out: 2644 return (error); 2645 } 2646 2647 /* 2648 * Change mode of a file given path name; this version follows links. 2649 */ 2650 /* ARGSUSED */ 2651 int 2652 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2653 { 2654 /* { 2655 syscallarg(const char *) path; 2656 syscallarg(int) mode; 2657 } */ 2658 int error; 2659 struct vnode *vp; 2660 2661 error = namei_simple_user(SCARG(uap, path), 2662 NSM_FOLLOW_TRYEMULROOT, &vp); 2663 if (error != 0) 2664 return (error); 2665 2666 error = change_mode(vp, SCARG(uap, mode), l); 2667 2668 vrele(vp); 2669 return (error); 2670 } 2671 2672 /* 2673 * Change mode of a file given a file descriptor. 2674 */ 2675 /* ARGSUSED */ 2676 int 2677 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2678 { 2679 /* { 2680 syscallarg(int) fd; 2681 syscallarg(int) mode; 2682 } */ 2683 file_t *fp; 2684 int error; 2685 2686 /* fd_getvnode() will use the descriptor for us */ 2687 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2688 return (error); 2689 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2690 fd_putfile(SCARG(uap, fd)); 2691 return (error); 2692 } 2693 2694 /* 2695 * Change mode of a file given path name; this version does not follow links. 2696 */ 2697 /* ARGSUSED */ 2698 int 2699 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2700 { 2701 /* { 2702 syscallarg(const char *) path; 2703 syscallarg(int) mode; 2704 } */ 2705 int error; 2706 struct vnode *vp; 2707 2708 error = namei_simple_user(SCARG(uap, path), 2709 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2710 if (error != 0) 2711 return (error); 2712 2713 error = change_mode(vp, SCARG(uap, mode), l); 2714 2715 vrele(vp); 2716 return (error); 2717 } 2718 2719 /* 2720 * Common routine to set mode given a vnode. 2721 */ 2722 static int 2723 change_mode(struct vnode *vp, int mode, struct lwp *l) 2724 { 2725 struct vattr vattr; 2726 int error; 2727 2728 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2729 vattr_null(&vattr); 2730 vattr.va_mode = mode & ALLPERMS; 2731 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2732 VOP_UNLOCK(vp); 2733 return (error); 2734 } 2735 2736 /* 2737 * Set ownership given a path name; this version follows links. 2738 */ 2739 /* ARGSUSED */ 2740 int 2741 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2742 { 2743 /* { 2744 syscallarg(const char *) path; 2745 syscallarg(uid_t) uid; 2746 syscallarg(gid_t) gid; 2747 } */ 2748 int error; 2749 struct vnode *vp; 2750 2751 error = namei_simple_user(SCARG(uap, path), 2752 NSM_FOLLOW_TRYEMULROOT, &vp); 2753 if (error != 0) 2754 return (error); 2755 2756 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2757 2758 vrele(vp); 2759 return (error); 2760 } 2761 2762 /* 2763 * Set ownership given a path name; this version follows links. 2764 * Provides POSIX semantics. 2765 */ 2766 /* ARGSUSED */ 2767 int 2768 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2769 { 2770 /* { 2771 syscallarg(const char *) path; 2772 syscallarg(uid_t) uid; 2773 syscallarg(gid_t) gid; 2774 } */ 2775 int error; 2776 struct vnode *vp; 2777 2778 error = namei_simple_user(SCARG(uap, path), 2779 NSM_FOLLOW_TRYEMULROOT, &vp); 2780 if (error != 0) 2781 return (error); 2782 2783 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2784 2785 vrele(vp); 2786 return (error); 2787 } 2788 2789 /* 2790 * Set ownership given a file descriptor. 2791 */ 2792 /* ARGSUSED */ 2793 int 2794 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2795 { 2796 /* { 2797 syscallarg(int) fd; 2798 syscallarg(uid_t) uid; 2799 syscallarg(gid_t) gid; 2800 } */ 2801 int error; 2802 file_t *fp; 2803 2804 /* fd_getvnode() will use the descriptor for us */ 2805 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2806 return (error); 2807 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2808 l, 0); 2809 fd_putfile(SCARG(uap, fd)); 2810 return (error); 2811 } 2812 2813 /* 2814 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2815 */ 2816 /* ARGSUSED */ 2817 int 2818 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2819 { 2820 /* { 2821 syscallarg(int) fd; 2822 syscallarg(uid_t) uid; 2823 syscallarg(gid_t) gid; 2824 } */ 2825 int error; 2826 file_t *fp; 2827 2828 /* fd_getvnode() will use the descriptor for us */ 2829 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2830 return (error); 2831 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2832 l, 1); 2833 fd_putfile(SCARG(uap, fd)); 2834 return (error); 2835 } 2836 2837 /* 2838 * Set ownership given a path name; this version does not follow links. 2839 */ 2840 /* ARGSUSED */ 2841 int 2842 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2843 { 2844 /* { 2845 syscallarg(const char *) path; 2846 syscallarg(uid_t) uid; 2847 syscallarg(gid_t) gid; 2848 } */ 2849 int error; 2850 struct vnode *vp; 2851 2852 error = namei_simple_user(SCARG(uap, path), 2853 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2854 if (error != 0) 2855 return (error); 2856 2857 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2858 2859 vrele(vp); 2860 return (error); 2861 } 2862 2863 /* 2864 * Set ownership given a path name; this version does not follow links. 2865 * Provides POSIX/XPG semantics. 2866 */ 2867 /* ARGSUSED */ 2868 int 2869 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2870 { 2871 /* { 2872 syscallarg(const char *) path; 2873 syscallarg(uid_t) uid; 2874 syscallarg(gid_t) gid; 2875 } */ 2876 int error; 2877 struct vnode *vp; 2878 2879 error = namei_simple_user(SCARG(uap, path), 2880 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2881 if (error != 0) 2882 return (error); 2883 2884 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2885 2886 vrele(vp); 2887 return (error); 2888 } 2889 2890 /* 2891 * Common routine to set ownership given a vnode. 2892 */ 2893 static int 2894 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2895 int posix_semantics) 2896 { 2897 struct vattr vattr; 2898 mode_t newmode; 2899 int error; 2900 2901 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2902 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2903 goto out; 2904 2905 #define CHANGED(x) ((int)(x) != -1) 2906 newmode = vattr.va_mode; 2907 if (posix_semantics) { 2908 /* 2909 * POSIX/XPG semantics: if the caller is not the super-user, 2910 * clear set-user-id and set-group-id bits. Both POSIX and 2911 * the XPG consider the behaviour for calls by the super-user 2912 * implementation-defined; we leave the set-user-id and set- 2913 * group-id settings intact in that case. 2914 */ 2915 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2916 NULL) != 0) 2917 newmode &= ~(S_ISUID | S_ISGID); 2918 } else { 2919 /* 2920 * NetBSD semantics: when changing owner and/or group, 2921 * clear the respective bit(s). 2922 */ 2923 if (CHANGED(uid)) 2924 newmode &= ~S_ISUID; 2925 if (CHANGED(gid)) 2926 newmode &= ~S_ISGID; 2927 } 2928 /* Update va_mode iff altered. */ 2929 if (vattr.va_mode == newmode) 2930 newmode = VNOVAL; 2931 2932 vattr_null(&vattr); 2933 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2934 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2935 vattr.va_mode = newmode; 2936 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2937 #undef CHANGED 2938 2939 out: 2940 VOP_UNLOCK(vp); 2941 return (error); 2942 } 2943 2944 /* 2945 * Set the access and modification times given a path name; this 2946 * version follows links. 2947 */ 2948 /* ARGSUSED */ 2949 int 2950 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 2951 register_t *retval) 2952 { 2953 /* { 2954 syscallarg(const char *) path; 2955 syscallarg(const struct timeval *) tptr; 2956 } */ 2957 2958 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2959 SCARG(uap, tptr), UIO_USERSPACE); 2960 } 2961 2962 /* 2963 * Set the access and modification times given a file descriptor. 2964 */ 2965 /* ARGSUSED */ 2966 int 2967 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 2968 register_t *retval) 2969 { 2970 /* { 2971 syscallarg(int) fd; 2972 syscallarg(const struct timeval *) tptr; 2973 } */ 2974 int error; 2975 file_t *fp; 2976 2977 /* fd_getvnode() will use the descriptor for us */ 2978 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2979 return (error); 2980 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2981 UIO_USERSPACE); 2982 fd_putfile(SCARG(uap, fd)); 2983 return (error); 2984 } 2985 2986 /* 2987 * Set the access and modification times given a path name; this 2988 * version does not follow links. 2989 */ 2990 int 2991 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 2992 register_t *retval) 2993 { 2994 /* { 2995 syscallarg(const char *) path; 2996 syscallarg(const struct timeval *) tptr; 2997 } */ 2998 2999 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3000 SCARG(uap, tptr), UIO_USERSPACE); 3001 } 3002 3003 /* 3004 * Common routine to set access and modification times given a vnode. 3005 */ 3006 int 3007 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3008 const struct timeval *tptr, enum uio_seg seg) 3009 { 3010 struct vattr vattr; 3011 int error, dorele = 0; 3012 namei_simple_flags_t sflags; 3013 3014 bool vanull, setbirthtime; 3015 struct timespec ts[2]; 3016 3017 /* 3018 * I have checked all callers and they pass either FOLLOW, 3019 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3020 * is 0. More to the point, they don't pass anything else. 3021 * Let's keep it that way at least until the namei interfaces 3022 * are fully sanitized. 3023 */ 3024 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3025 sflags = (flag == FOLLOW) ? 3026 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3027 3028 if (tptr == NULL) { 3029 vanull = true; 3030 nanotime(&ts[0]); 3031 ts[1] = ts[0]; 3032 } else { 3033 struct timeval tv[2]; 3034 3035 vanull = false; 3036 if (seg != UIO_SYSSPACE) { 3037 error = copyin(tptr, tv, sizeof (tv)); 3038 if (error != 0) 3039 return error; 3040 tptr = tv; 3041 } 3042 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3043 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3044 } 3045 3046 if (vp == NULL) { 3047 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3048 error = namei_simple_user(path, sflags, &vp); 3049 if (error != 0) 3050 return error; 3051 dorele = 1; 3052 } 3053 3054 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3055 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3056 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3057 vattr_null(&vattr); 3058 vattr.va_atime = ts[0]; 3059 vattr.va_mtime = ts[1]; 3060 if (setbirthtime) 3061 vattr.va_birthtime = ts[1]; 3062 if (vanull) 3063 vattr.va_vaflags |= VA_UTIMES_NULL; 3064 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3065 VOP_UNLOCK(vp); 3066 3067 if (dorele != 0) 3068 vrele(vp); 3069 3070 return error; 3071 } 3072 3073 /* 3074 * Truncate a file given its path name. 3075 */ 3076 /* ARGSUSED */ 3077 int 3078 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3079 { 3080 /* { 3081 syscallarg(const char *) path; 3082 syscallarg(int) pad; 3083 syscallarg(off_t) length; 3084 } */ 3085 struct vnode *vp; 3086 struct vattr vattr; 3087 int error; 3088 3089 error = namei_simple_user(SCARG(uap, path), 3090 NSM_FOLLOW_TRYEMULROOT, &vp); 3091 if (error != 0) 3092 return (error); 3093 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3094 if (vp->v_type == VDIR) 3095 error = EISDIR; 3096 else if ((error = vn_writechk(vp)) == 0 && 3097 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3098 vattr_null(&vattr); 3099 vattr.va_size = SCARG(uap, length); 3100 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3101 } 3102 vput(vp); 3103 return (error); 3104 } 3105 3106 /* 3107 * Truncate a file given a file descriptor. 3108 */ 3109 /* ARGSUSED */ 3110 int 3111 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3112 { 3113 /* { 3114 syscallarg(int) fd; 3115 syscallarg(int) pad; 3116 syscallarg(off_t) length; 3117 } */ 3118 struct vattr vattr; 3119 struct vnode *vp; 3120 file_t *fp; 3121 int error; 3122 3123 /* fd_getvnode() will use the descriptor for us */ 3124 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3125 return (error); 3126 if ((fp->f_flag & FWRITE) == 0) { 3127 error = EINVAL; 3128 goto out; 3129 } 3130 vp = fp->f_data; 3131 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3132 if (vp->v_type == VDIR) 3133 error = EISDIR; 3134 else if ((error = vn_writechk(vp)) == 0) { 3135 vattr_null(&vattr); 3136 vattr.va_size = SCARG(uap, length); 3137 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3138 } 3139 VOP_UNLOCK(vp); 3140 out: 3141 fd_putfile(SCARG(uap, fd)); 3142 return (error); 3143 } 3144 3145 /* 3146 * Sync an open file. 3147 */ 3148 /* ARGSUSED */ 3149 int 3150 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3151 { 3152 /* { 3153 syscallarg(int) fd; 3154 } */ 3155 struct vnode *vp; 3156 file_t *fp; 3157 int error; 3158 3159 /* fd_getvnode() will use the descriptor for us */ 3160 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3161 return (error); 3162 vp = fp->f_data; 3163 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3164 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3165 VOP_UNLOCK(vp); 3166 fd_putfile(SCARG(uap, fd)); 3167 return (error); 3168 } 3169 3170 /* 3171 * Sync a range of file data. API modeled after that found in AIX. 3172 * 3173 * FDATASYNC indicates that we need only save enough metadata to be able 3174 * to re-read the written data. Note we duplicate AIX's requirement that 3175 * the file be open for writing. 3176 */ 3177 /* ARGSUSED */ 3178 int 3179 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3180 { 3181 /* { 3182 syscallarg(int) fd; 3183 syscallarg(int) flags; 3184 syscallarg(off_t) start; 3185 syscallarg(off_t) length; 3186 } */ 3187 struct vnode *vp; 3188 file_t *fp; 3189 int flags, nflags; 3190 off_t s, e, len; 3191 int error; 3192 3193 /* fd_getvnode() will use the descriptor for us */ 3194 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3195 return (error); 3196 3197 if ((fp->f_flag & FWRITE) == 0) { 3198 error = EBADF; 3199 goto out; 3200 } 3201 3202 flags = SCARG(uap, flags); 3203 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3204 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3205 error = EINVAL; 3206 goto out; 3207 } 3208 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3209 if (flags & FDATASYNC) 3210 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3211 else 3212 nflags = FSYNC_WAIT; 3213 if (flags & FDISKSYNC) 3214 nflags |= FSYNC_CACHE; 3215 3216 len = SCARG(uap, length); 3217 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3218 if (len) { 3219 s = SCARG(uap, start); 3220 e = s + len; 3221 if (e < s) { 3222 error = EINVAL; 3223 goto out; 3224 } 3225 } else { 3226 e = 0; 3227 s = 0; 3228 } 3229 3230 vp = fp->f_data; 3231 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3232 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3233 VOP_UNLOCK(vp); 3234 out: 3235 fd_putfile(SCARG(uap, fd)); 3236 return (error); 3237 } 3238 3239 /* 3240 * Sync the data of an open file. 3241 */ 3242 /* ARGSUSED */ 3243 int 3244 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3245 { 3246 /* { 3247 syscallarg(int) fd; 3248 } */ 3249 struct vnode *vp; 3250 file_t *fp; 3251 int error; 3252 3253 /* fd_getvnode() will use the descriptor for us */ 3254 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3255 return (error); 3256 if ((fp->f_flag & FWRITE) == 0) { 3257 fd_putfile(SCARG(uap, fd)); 3258 return (EBADF); 3259 } 3260 vp = fp->f_data; 3261 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3262 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3263 VOP_UNLOCK(vp); 3264 fd_putfile(SCARG(uap, fd)); 3265 return (error); 3266 } 3267 3268 /* 3269 * Rename files, (standard) BSD semantics frontend. 3270 */ 3271 /* ARGSUSED */ 3272 int 3273 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3274 { 3275 /* { 3276 syscallarg(const char *) from; 3277 syscallarg(const char *) to; 3278 } */ 3279 3280 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3281 } 3282 3283 /* 3284 * Rename files, POSIX semantics frontend. 3285 */ 3286 /* ARGSUSED */ 3287 int 3288 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3289 { 3290 /* { 3291 syscallarg(const char *) from; 3292 syscallarg(const char *) to; 3293 } */ 3294 3295 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3296 } 3297 3298 /* 3299 * Rename files. Source and destination must either both be directories, 3300 * or both not be directories. If target is a directory, it must be empty. 3301 * If `from' and `to' refer to the same object, the value of the `retain' 3302 * argument is used to determine whether `from' will be 3303 * 3304 * (retain == 0) deleted unless `from' and `to' refer to the same 3305 * object in the file system's name space (BSD). 3306 * (retain == 1) always retained (POSIX). 3307 */ 3308 int 3309 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3310 { 3311 struct vnode *tvp, *fvp, *tdvp; 3312 struct nameidata fromnd, tond; 3313 struct mount *fs; 3314 struct lwp *l = curlwp; 3315 struct proc *p; 3316 uint32_t saveflag; 3317 int error; 3318 3319 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT | INRENAME, 3320 seg, from); 3321 if ((error = namei(&fromnd)) != 0) 3322 return (error); 3323 if (fromnd.ni_dvp != fromnd.ni_vp) 3324 VOP_UNLOCK(fromnd.ni_dvp); 3325 fvp = fromnd.ni_vp; 3326 3327 fs = fvp->v_mount; 3328 error = VFS_RENAMELOCK_ENTER(fs); 3329 if (error) { 3330 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3331 vrele(fromnd.ni_dvp); 3332 vrele(fvp); 3333 goto out1; 3334 } 3335 3336 /* 3337 * close, partially, yet another race - ideally we should only 3338 * go as far as getting fromnd.ni_dvp before getting the per-fs 3339 * lock, and then continue to get fromnd.ni_vp, but we can't do 3340 * that with namei as it stands. 3341 * 3342 * This still won't prevent rmdir from nuking fromnd.ni_vp 3343 * under us. The real fix is to get the locks in the right 3344 * order and do the lookups in the right places, but that's a 3345 * major rototill. 3346 * 3347 * Preserve the SAVESTART in cn_flags, because who knows what 3348 * might happen if we don't. 3349 * 3350 * Note: this logic (as well as this whole function) is cloned 3351 * in nfs_serv.c. Proceed accordingly. 3352 */ 3353 vrele(fvp); 3354 if ((fromnd.ni_cnd.cn_namelen == 1 && 3355 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3356 (fromnd.ni_cnd.cn_namelen == 2 && 3357 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3358 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3359 error = EINVAL; 3360 VFS_RENAMELOCK_EXIT(fs); 3361 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3362 vrele(fromnd.ni_dvp); 3363 goto out1; 3364 } 3365 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3366 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3367 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3368 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3369 fromnd.ni_cnd.cn_flags |= saveflag; 3370 if (error) { 3371 VOP_UNLOCK(fromnd.ni_dvp); 3372 VFS_RENAMELOCK_EXIT(fs); 3373 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3374 vrele(fromnd.ni_dvp); 3375 goto out1; 3376 } 3377 VOP_UNLOCK(fromnd.ni_vp); 3378 if (fromnd.ni_dvp != fromnd.ni_vp) 3379 VOP_UNLOCK(fromnd.ni_dvp); 3380 fvp = fromnd.ni_vp; 3381 3382 NDINIT(&tond, RENAME, 3383 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3384 | INRENAME | (fvp->v_type == VDIR ? CREATEDIR : 0), 3385 seg, to); 3386 if ((error = namei(&tond)) != 0) { 3387 VFS_RENAMELOCK_EXIT(fs); 3388 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3389 vrele(fromnd.ni_dvp); 3390 vrele(fvp); 3391 goto out1; 3392 } 3393 tdvp = tond.ni_dvp; 3394 tvp = tond.ni_vp; 3395 3396 if (tvp != NULL) { 3397 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3398 error = ENOTDIR; 3399 goto out; 3400 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3401 error = EISDIR; 3402 goto out; 3403 } 3404 } 3405 3406 if (fvp == tdvp) 3407 error = EINVAL; 3408 3409 /* 3410 * Source and destination refer to the same object. 3411 */ 3412 if (fvp == tvp) { 3413 if (retain) 3414 error = -1; 3415 else if (fromnd.ni_dvp == tdvp && 3416 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3417 !memcmp(fromnd.ni_cnd.cn_nameptr, 3418 tond.ni_cnd.cn_nameptr, 3419 fromnd.ni_cnd.cn_namelen)) 3420 error = -1; 3421 } 3422 3423 #if NVERIEXEC > 0 3424 if (!error) { 3425 char *f1, *f2; 3426 size_t f1_len; 3427 size_t f2_len; 3428 3429 f1_len = fromnd.ni_cnd.cn_namelen + 1; 3430 f1 = kmem_alloc(f1_len, KM_SLEEP); 3431 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len); 3432 3433 f2_len = tond.ni_cnd.cn_namelen + 1; 3434 f2 = kmem_alloc(f2_len, KM_SLEEP); 3435 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len); 3436 3437 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3438 3439 kmem_free(f1, f1_len); 3440 kmem_free(f2, f2_len); 3441 } 3442 #endif /* NVERIEXEC > 0 */ 3443 3444 out: 3445 p = l->l_proc; 3446 if (!error) { 3447 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3448 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3449 VFS_RENAMELOCK_EXIT(fs); 3450 } else { 3451 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3452 if (tdvp == tvp) 3453 vrele(tdvp); 3454 else 3455 vput(tdvp); 3456 if (tvp) 3457 vput(tvp); 3458 VFS_RENAMELOCK_EXIT(fs); 3459 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3460 vrele(fromnd.ni_dvp); 3461 vrele(fvp); 3462 } 3463 vrele(tond.ni_startdir); 3464 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3465 out1: 3466 if (fromnd.ni_startdir) 3467 vrele(fromnd.ni_startdir); 3468 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3469 return (error == -1 ? 0 : error); 3470 } 3471 3472 /* 3473 * Make a directory file. 3474 */ 3475 /* ARGSUSED */ 3476 int 3477 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3478 { 3479 /* { 3480 syscallarg(const char *) path; 3481 syscallarg(int) mode; 3482 } */ 3483 3484 return do_sys_mkdir(SCARG(uap, path), SCARG(uap, mode), UIO_USERSPACE); 3485 } 3486 3487 int 3488 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 3489 { 3490 struct proc *p = curlwp->l_proc; 3491 struct vnode *vp; 3492 struct vattr vattr; 3493 int error; 3494 struct nameidata nd; 3495 3496 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, 3497 seg, path); 3498 if ((error = namei(&nd)) != 0) 3499 return (error); 3500 vp = nd.ni_vp; 3501 if (vp != NULL) { 3502 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3503 if (nd.ni_dvp == vp) 3504 vrele(nd.ni_dvp); 3505 else 3506 vput(nd.ni_dvp); 3507 vrele(vp); 3508 return (EEXIST); 3509 } 3510 vattr_null(&vattr); 3511 vattr.va_type = VDIR; 3512 /* We will read cwdi->cwdi_cmask unlocked. */ 3513 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3514 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3515 if (!error) 3516 vput(nd.ni_vp); 3517 return (error); 3518 } 3519 3520 /* 3521 * Remove a directory file. 3522 */ 3523 /* ARGSUSED */ 3524 int 3525 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3526 { 3527 /* { 3528 syscallarg(const char *) path; 3529 } */ 3530 struct vnode *vp; 3531 int error; 3532 struct nameidata nd; 3533 3534 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3535 SCARG(uap, path)); 3536 if ((error = namei(&nd)) != 0) 3537 return (error); 3538 vp = nd.ni_vp; 3539 if (vp->v_type != VDIR) { 3540 error = ENOTDIR; 3541 goto out; 3542 } 3543 /* 3544 * No rmdir "." please. 3545 */ 3546 if (nd.ni_dvp == vp) { 3547 error = EINVAL; 3548 goto out; 3549 } 3550 /* 3551 * The root of a mounted filesystem cannot be deleted. 3552 */ 3553 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3554 error = EBUSY; 3555 goto out; 3556 } 3557 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3558 return (error); 3559 3560 out: 3561 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3562 if (nd.ni_dvp == vp) 3563 vrele(nd.ni_dvp); 3564 else 3565 vput(nd.ni_dvp); 3566 vput(vp); 3567 return (error); 3568 } 3569 3570 /* 3571 * Read a block of directory entries in a file system independent format. 3572 */ 3573 int 3574 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3575 { 3576 /* { 3577 syscallarg(int) fd; 3578 syscallarg(char *) buf; 3579 syscallarg(size_t) count; 3580 } */ 3581 file_t *fp; 3582 int error, done; 3583 3584 /* fd_getvnode() will use the descriptor for us */ 3585 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3586 return (error); 3587 if ((fp->f_flag & FREAD) == 0) { 3588 error = EBADF; 3589 goto out; 3590 } 3591 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3592 SCARG(uap, count), &done, l, 0, 0); 3593 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3594 *retval = done; 3595 out: 3596 fd_putfile(SCARG(uap, fd)); 3597 return (error); 3598 } 3599 3600 /* 3601 * Set the mode mask for creation of filesystem nodes. 3602 */ 3603 int 3604 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3605 { 3606 /* { 3607 syscallarg(mode_t) newmask; 3608 } */ 3609 struct proc *p = l->l_proc; 3610 struct cwdinfo *cwdi; 3611 3612 /* 3613 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3614 * important is that we serialize changes to the mask. The 3615 * rw_exit() will issue a write memory barrier on our behalf, 3616 * and force the changes out to other CPUs (as it must use an 3617 * atomic operation, draining the local CPU's store buffers). 3618 */ 3619 cwdi = p->p_cwdi; 3620 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3621 *retval = cwdi->cwdi_cmask; 3622 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3623 rw_exit(&cwdi->cwdi_lock); 3624 3625 return (0); 3626 } 3627 3628 int 3629 dorevoke(struct vnode *vp, kauth_cred_t cred) 3630 { 3631 struct vattr vattr; 3632 int error; 3633 3634 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3635 return error; 3636 if (kauth_cred_geteuid(cred) == vattr.va_uid || 3637 (error = kauth_authorize_generic(cred, 3638 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3639 VOP_REVOKE(vp, REVOKEALL); 3640 return (error); 3641 } 3642 3643 /* 3644 * Void all references to file by ripping underlying filesystem 3645 * away from vnode. 3646 */ 3647 /* ARGSUSED */ 3648 int 3649 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3650 { 3651 /* { 3652 syscallarg(const char *) path; 3653 } */ 3654 struct vnode *vp; 3655 int error; 3656 3657 error = namei_simple_user(SCARG(uap, path), 3658 NSM_FOLLOW_TRYEMULROOT, &vp); 3659 if (error != 0) 3660 return (error); 3661 error = dorevoke(vp, l->l_cred); 3662 vrele(vp); 3663 return (error); 3664 } 3665