1 /* $NetBSD: vfs_syscalls.c,v 1.408 2010/08/21 13:19:39 pgoyette Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.408 2010/08/21 13:19:39 pgoyette Exp $"); 70 71 #ifdef _KERNEL_OPT 72 #include "opt_fileassoc.h" 73 #include "veriexec.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/filedesc.h> 80 #include <sys/kernel.h> 81 #include <sys/file.h> 82 #include <sys/stat.h> 83 #include <sys/vnode.h> 84 #include <sys/mount.h> 85 #include <sys/proc.h> 86 #include <sys/uio.h> 87 #include <sys/kmem.h> 88 #include <sys/dirent.h> 89 #include <sys/sysctl.h> 90 #include <sys/syscallargs.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/ktrace.h> 93 #ifdef FILEASSOC 94 #include <sys/fileassoc.h> 95 #endif /* FILEASSOC */ 96 #include <sys/verified_exec.h> 97 #include <sys/kauth.h> 98 #include <sys/atomic.h> 99 #include <sys/module.h> 100 #include <sys/buf.h> 101 102 #include <miscfs/genfs/genfs.h> 103 #include <miscfs/syncfs/syncfs.h> 104 #include <miscfs/specfs/specdev.h> 105 106 #include <nfs/rpcv2.h> 107 #include <nfs/nfsproto.h> 108 #include <nfs/nfs.h> 109 #include <nfs/nfs_var.h> 110 111 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 112 113 static int change_flags(struct vnode *, u_long, struct lwp *); 114 static int change_mode(struct vnode *, int, struct lwp *l); 115 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 116 117 void checkdirs(struct vnode *); 118 119 /* 120 * Virtual File System System Calls 121 */ 122 123 /* 124 * Mount a file system. 125 */ 126 127 /* 128 * This table is used to maintain compatibility with 4.3BSD 129 * and NetBSD 0.9 mount syscalls - and possibly other systems. 130 * Note, the order is important! 131 * 132 * Do not modify this table. It should only contain filesystems 133 * supported by NetBSD 0.9 and 4.3BSD. 134 */ 135 const char * const mountcompatnames[] = { 136 NULL, /* 0 = MOUNT_NONE */ 137 MOUNT_FFS, /* 1 = MOUNT_UFS */ 138 MOUNT_NFS, /* 2 */ 139 MOUNT_MFS, /* 3 */ 140 MOUNT_MSDOS, /* 4 */ 141 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 142 MOUNT_FDESC, /* 6 */ 143 MOUNT_KERNFS, /* 7 */ 144 NULL, /* 8 = MOUNT_DEVFS */ 145 MOUNT_AFS, /* 9 */ 146 }; 147 const int nmountcompatnames = sizeof(mountcompatnames) / 148 sizeof(mountcompatnames[0]); 149 150 static int 151 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 152 void *data, size_t *data_len) 153 { 154 struct mount *mp; 155 int error = 0, saved_flags; 156 157 mp = vp->v_mount; 158 saved_flags = mp->mnt_flag; 159 160 /* We can operate only on VV_ROOT nodes. */ 161 if ((vp->v_vflag & VV_ROOT) == 0) { 162 error = EINVAL; 163 goto out; 164 } 165 166 /* 167 * We only allow the filesystem to be reloaded if it 168 * is currently mounted read-only. Additionally, we 169 * prevent read-write to read-only downgrades. 170 */ 171 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 172 (mp->mnt_flag & MNT_RDONLY) == 0) { 173 error = EOPNOTSUPP; /* Needs translation */ 174 goto out; 175 } 176 177 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 178 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 179 if (error) 180 goto out; 181 182 if (vfs_busy(mp, NULL)) { 183 error = EPERM; 184 goto out; 185 } 186 187 mutex_enter(&mp->mnt_updating); 188 189 mp->mnt_flag &= ~MNT_OP_FLAGS; 190 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 191 192 /* 193 * Set the mount level flags. 194 */ 195 if (flags & MNT_RDONLY) 196 mp->mnt_flag |= MNT_RDONLY; 197 else if (mp->mnt_flag & MNT_RDONLY) 198 mp->mnt_iflag |= IMNT_WANTRDWR; 199 mp->mnt_flag &= 200 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 201 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 202 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 203 MNT_LOG); 204 mp->mnt_flag |= flags & 205 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 206 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 207 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 208 MNT_LOG | MNT_IGNORE); 209 210 error = VFS_MOUNT(mp, path, data, data_len); 211 212 if (error && data != NULL) { 213 int error2; 214 215 /* 216 * Update failed; let's try and see if it was an 217 * export request. For compat with 3.0 and earlier. 218 */ 219 error2 = vfs_hooks_reexport(mp, path, data); 220 221 /* 222 * Only update error code if the export request was 223 * understood but some problem occurred while 224 * processing it. 225 */ 226 if (error2 != EJUSTRETURN) 227 error = error2; 228 } 229 230 if (mp->mnt_iflag & IMNT_WANTRDWR) 231 mp->mnt_flag &= ~MNT_RDONLY; 232 if (error) 233 mp->mnt_flag = saved_flags; 234 mp->mnt_flag &= ~MNT_OP_FLAGS; 235 mp->mnt_iflag &= ~IMNT_WANTRDWR; 236 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 237 if (mp->mnt_syncer == NULL) 238 error = vfs_allocate_syncvnode(mp); 239 } else { 240 if (mp->mnt_syncer != NULL) 241 vfs_deallocate_syncvnode(mp); 242 } 243 mutex_exit(&mp->mnt_updating); 244 vfs_unbusy(mp, false, NULL); 245 246 out: 247 return (error); 248 } 249 250 static int 251 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 252 { 253 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 254 int error; 255 256 /* Copy file-system type from userspace. */ 257 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 258 if (error) { 259 /* 260 * Historically, filesystem types were identified by numbers. 261 * If we get an integer for the filesystem type instead of a 262 * string, we check to see if it matches one of the historic 263 * filesystem types. 264 */ 265 u_long fsindex = (u_long)fstype; 266 if (fsindex >= nmountcompatnames || 267 mountcompatnames[fsindex] == NULL) 268 return ENODEV; 269 strlcpy(fstypename, mountcompatnames[fsindex], 270 sizeof(fstypename)); 271 } 272 273 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 274 if (strcmp(fstypename, "ufs") == 0) 275 fstypename[0] = 'f'; 276 277 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 278 return 0; 279 280 /* If we can autoload a vfs module, try again */ 281 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 282 283 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 284 return 0; 285 286 return ENODEV; 287 } 288 289 static int 290 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 291 const char *path, int flags, void *data, size_t *data_len) 292 { 293 struct mount *mp; 294 struct vnode *vp = *vpp; 295 struct vattr va; 296 struct nameidata nd; 297 int error; 298 299 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 300 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 301 if (error) { 302 vfs_delref(vfsops); 303 return error; 304 } 305 306 /* Can't make a non-dir a mount-point (from here anyway). */ 307 if (vp->v_type != VDIR) { 308 vfs_delref(vfsops); 309 return ENOTDIR; 310 } 311 312 /* 313 * If the user is not root, ensure that they own the directory 314 * onto which we are attempting to mount. 315 */ 316 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 317 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 318 (error = kauth_authorize_generic(l->l_cred, 319 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 320 vfs_delref(vfsops); 321 return error; 322 } 323 324 if (flags & MNT_EXPORTED) { 325 vfs_delref(vfsops); 326 return EINVAL; 327 } 328 329 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 330 vfs_delref(vfsops); 331 return ENOMEM; 332 } 333 334 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 335 336 /* 337 * The underlying file system may refuse the mount for 338 * various reasons. Allow the user to force it to happen. 339 * 340 * Set the mount level flags. 341 */ 342 mp->mnt_flag = flags & 343 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 344 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 345 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 346 MNT_LOG | MNT_IGNORE | MNT_RDONLY); 347 348 mutex_enter(&mp->mnt_updating); 349 error = VFS_MOUNT(mp, path, data, data_len); 350 mp->mnt_flag &= ~MNT_OP_FLAGS; 351 352 if (error != 0) 353 goto err_unmounted; 354 355 /* 356 * Validate and prepare the mount point. 357 */ 358 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, 359 UIO_USERSPACE, path); 360 error = namei(&nd); 361 if (error != 0) { 362 goto err_mounted; 363 } 364 if (nd.ni_vp != vp) { 365 vput(nd.ni_vp); 366 error = EINVAL; 367 goto err_mounted; 368 } 369 if (vp->v_mountedhere != NULL) { 370 vput(nd.ni_vp); 371 error = EBUSY; 372 goto err_mounted; 373 } 374 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 375 if (error != 0) { 376 vput(nd.ni_vp); 377 goto err_mounted; 378 } 379 380 /* 381 * Put the new filesystem on the mount list after root. 382 */ 383 cache_purge(vp); 384 mp->mnt_iflag &= ~IMNT_WANTRDWR; 385 386 mutex_enter(&mountlist_lock); 387 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 388 mutex_exit(&mountlist_lock); 389 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 390 error = vfs_allocate_syncvnode(mp); 391 if (error == 0) 392 vp->v_mountedhere = mp; 393 vput(nd.ni_vp); 394 if (error != 0) 395 goto err_onmountlist; 396 397 checkdirs(vp); 398 mutex_exit(&mp->mnt_updating); 399 400 /* Hold an additional reference to the mount across VFS_START(). */ 401 vfs_unbusy(mp, true, NULL); 402 (void) VFS_STATVFS(mp, &mp->mnt_stat); 403 error = VFS_START(mp, 0); 404 if (error) 405 vrele(vp); 406 /* Drop reference held for VFS_START(). */ 407 vfs_destroy(mp); 408 *vpp = NULL; 409 return error; 410 411 err_onmountlist: 412 mutex_enter(&mountlist_lock); 413 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 414 mp->mnt_iflag |= IMNT_GONE; 415 mutex_exit(&mountlist_lock); 416 417 err_mounted: 418 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 419 panic("Unmounting fresh file system failed"); 420 421 err_unmounted: 422 vp->v_mountedhere = NULL; 423 mutex_exit(&mp->mnt_updating); 424 vfs_unbusy(mp, false, NULL); 425 vfs_destroy(mp); 426 427 return error; 428 } 429 430 static int 431 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 432 void *data, size_t *data_len) 433 { 434 struct mount *mp; 435 int error; 436 437 /* If MNT_GETARGS is specified, it should be the only flag. */ 438 if (flags & ~MNT_GETARGS) 439 return EINVAL; 440 441 mp = vp->v_mount; 442 443 /* XXX: probably some notion of "can see" here if we want isolation. */ 444 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 445 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 446 if (error) 447 return error; 448 449 if ((vp->v_vflag & VV_ROOT) == 0) 450 return EINVAL; 451 452 if (vfs_busy(mp, NULL)) 453 return EPERM; 454 455 mutex_enter(&mp->mnt_updating); 456 mp->mnt_flag &= ~MNT_OP_FLAGS; 457 mp->mnt_flag |= MNT_GETARGS; 458 error = VFS_MOUNT(mp, path, data, data_len); 459 mp->mnt_flag &= ~MNT_OP_FLAGS; 460 mutex_exit(&mp->mnt_updating); 461 462 vfs_unbusy(mp, false, NULL); 463 return (error); 464 } 465 466 int 467 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 468 { 469 /* { 470 syscallarg(const char *) type; 471 syscallarg(const char *) path; 472 syscallarg(int) flags; 473 syscallarg(void *) data; 474 syscallarg(size_t) data_len; 475 } */ 476 477 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 478 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 479 SCARG(uap, data_len), retval); 480 } 481 482 int 483 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 484 const char *path, int flags, void *data, enum uio_seg data_seg, 485 size_t data_len, register_t *retval) 486 { 487 struct vnode *vp; 488 void *data_buf = data; 489 bool vfsopsrele = false; 490 int error; 491 492 /* XXX: The calling convention of this routine is totally bizarre */ 493 if (vfsops) 494 vfsopsrele = true; 495 496 /* 497 * Get vnode to be covered 498 */ 499 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 500 if (error != 0) { 501 vp = NULL; 502 goto done; 503 } 504 505 if (vfsops == NULL) { 506 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 507 vfsops = vp->v_mount->mnt_op; 508 } else { 509 /* 'type' is userspace */ 510 error = mount_get_vfsops(type, &vfsops); 511 if (error != 0) 512 goto done; 513 vfsopsrele = true; 514 } 515 } 516 517 if (data != NULL && data_seg == UIO_USERSPACE) { 518 if (data_len == 0) { 519 /* No length supplied, use default for filesystem */ 520 data_len = vfsops->vfs_min_mount_data; 521 if (data_len > VFS_MAX_MOUNT_DATA) { 522 error = EINVAL; 523 goto done; 524 } 525 /* 526 * Hopefully a longer buffer won't make copyin() fail. 527 * For compatibility with 3.0 and earlier. 528 */ 529 if (flags & MNT_UPDATE 530 && data_len < sizeof (struct mnt_export_args30)) 531 data_len = sizeof (struct mnt_export_args30); 532 } 533 data_buf = kmem_alloc(data_len, KM_SLEEP); 534 535 /* NFS needs the buffer even for mnt_getargs .... */ 536 error = copyin(data, data_buf, data_len); 537 if (error != 0) 538 goto done; 539 } 540 541 if (flags & MNT_GETARGS) { 542 if (data_len == 0) { 543 error = EINVAL; 544 goto done; 545 } 546 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 547 if (error != 0) 548 goto done; 549 if (data_seg == UIO_USERSPACE) 550 error = copyout(data_buf, data, data_len); 551 *retval = data_len; 552 } else if (flags & MNT_UPDATE) { 553 error = mount_update(l, vp, path, flags, data_buf, &data_len); 554 } else { 555 /* Locking is handled internally in mount_domount(). */ 556 KASSERT(vfsopsrele == true); 557 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 558 &data_len); 559 vfsopsrele = false; 560 } 561 562 done: 563 if (vfsopsrele) 564 vfs_delref(vfsops); 565 if (vp != NULL) { 566 vrele(vp); 567 } 568 if (data_buf != data) 569 kmem_free(data_buf, data_len); 570 return (error); 571 } 572 573 /* 574 * Scan all active processes to see if any of them have a current 575 * or root directory onto which the new filesystem has just been 576 * mounted. If so, replace them with the new mount point. 577 */ 578 void 579 checkdirs(struct vnode *olddp) 580 { 581 struct cwdinfo *cwdi; 582 struct vnode *newdp, *rele1, *rele2; 583 struct proc *p; 584 bool retry; 585 586 if (olddp->v_usecount == 1) 587 return; 588 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 589 panic("mount: lost mount"); 590 591 do { 592 retry = false; 593 mutex_enter(proc_lock); 594 PROCLIST_FOREACH(p, &allproc) { 595 if ((cwdi = p->p_cwdi) == NULL) 596 continue; 597 /* 598 * Can't change to the old directory any more, 599 * so even if we see a stale value it's not a 600 * problem. 601 */ 602 if (cwdi->cwdi_cdir != olddp && 603 cwdi->cwdi_rdir != olddp) 604 continue; 605 retry = true; 606 rele1 = NULL; 607 rele2 = NULL; 608 atomic_inc_uint(&cwdi->cwdi_refcnt); 609 mutex_exit(proc_lock); 610 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 611 if (cwdi->cwdi_cdir == olddp) { 612 rele1 = cwdi->cwdi_cdir; 613 vref(newdp); 614 cwdi->cwdi_cdir = newdp; 615 } 616 if (cwdi->cwdi_rdir == olddp) { 617 rele2 = cwdi->cwdi_rdir; 618 vref(newdp); 619 cwdi->cwdi_rdir = newdp; 620 } 621 rw_exit(&cwdi->cwdi_lock); 622 cwdfree(cwdi); 623 if (rele1 != NULL) 624 vrele(rele1); 625 if (rele2 != NULL) 626 vrele(rele2); 627 mutex_enter(proc_lock); 628 break; 629 } 630 mutex_exit(proc_lock); 631 } while (retry); 632 633 if (rootvnode == olddp) { 634 vrele(rootvnode); 635 vref(newdp); 636 rootvnode = newdp; 637 } 638 vput(newdp); 639 } 640 641 /* 642 * Unmount a file system. 643 * 644 * Note: unmount takes a path to the vnode mounted on as argument, 645 * not special file (as before). 646 */ 647 /* ARGSUSED */ 648 int 649 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 650 { 651 /* { 652 syscallarg(const char *) path; 653 syscallarg(int) flags; 654 } */ 655 struct vnode *vp; 656 struct mount *mp; 657 int error; 658 struct nameidata nd; 659 660 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 661 SCARG(uap, path)); 662 if ((error = namei(&nd)) != 0) 663 return (error); 664 vp = nd.ni_vp; 665 mp = vp->v_mount; 666 atomic_inc_uint(&mp->mnt_refcnt); 667 VOP_UNLOCK(vp); 668 669 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 670 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 671 if (error) { 672 vrele(vp); 673 vfs_destroy(mp); 674 return (error); 675 } 676 677 /* 678 * Don't allow unmounting the root file system. 679 */ 680 if (mp->mnt_flag & MNT_ROOTFS) { 681 vrele(vp); 682 vfs_destroy(mp); 683 return (EINVAL); 684 } 685 686 /* 687 * Must be the root of the filesystem 688 */ 689 if ((vp->v_vflag & VV_ROOT) == 0) { 690 vrele(vp); 691 vfs_destroy(mp); 692 return (EINVAL); 693 } 694 695 vrele(vp); 696 error = dounmount(mp, SCARG(uap, flags), l); 697 vfs_destroy(mp); 698 return error; 699 } 700 701 /* 702 * Do the actual file system unmount. File system is assumed to have 703 * been locked by the caller. 704 * 705 * => Caller hold reference to the mount, explicitly for dounmount(). 706 */ 707 int 708 dounmount(struct mount *mp, int flags, struct lwp *l) 709 { 710 struct vnode *coveredvp; 711 int error; 712 int async; 713 int used_syncer; 714 715 #if NVERIEXEC > 0 716 error = veriexec_unmountchk(mp); 717 if (error) 718 return (error); 719 #endif /* NVERIEXEC > 0 */ 720 721 /* 722 * XXX Freeze syncer. Must do this before locking the 723 * mount point. See dounmount() for details. 724 */ 725 mutex_enter(&syncer_mutex); 726 rw_enter(&mp->mnt_unmounting, RW_WRITER); 727 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 728 rw_exit(&mp->mnt_unmounting); 729 mutex_exit(&syncer_mutex); 730 return ENOENT; 731 } 732 733 used_syncer = (mp->mnt_syncer != NULL); 734 735 /* 736 * XXX Syncer must be frozen when we get here. This should really 737 * be done on a per-mountpoint basis, but the syncer doesn't work 738 * like that. 739 * 740 * The caller of dounmount() must acquire syncer_mutex because 741 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 742 * order, and we must preserve that order to avoid deadlock. 743 * 744 * So, if the file system did not use the syncer, now is 745 * the time to release the syncer_mutex. 746 */ 747 if (used_syncer == 0) 748 mutex_exit(&syncer_mutex); 749 750 mp->mnt_iflag |= IMNT_UNMOUNT; 751 async = mp->mnt_flag & MNT_ASYNC; 752 mp->mnt_flag &= ~MNT_ASYNC; 753 cache_purgevfs(mp); /* remove cache entries for this file sys */ 754 if (mp->mnt_syncer != NULL) 755 vfs_deallocate_syncvnode(mp); 756 error = 0; 757 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 758 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 759 } 760 vfs_scrubvnlist(mp); 761 if (error == 0 || (flags & MNT_FORCE)) 762 error = VFS_UNMOUNT(mp, flags); 763 if (error) { 764 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 765 (void) vfs_allocate_syncvnode(mp); 766 mp->mnt_iflag &= ~IMNT_UNMOUNT; 767 mp->mnt_flag |= async; 768 rw_exit(&mp->mnt_unmounting); 769 if (used_syncer) 770 mutex_exit(&syncer_mutex); 771 return (error); 772 } 773 vfs_scrubvnlist(mp); 774 mutex_enter(&mountlist_lock); 775 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 776 coveredvp->v_mountedhere = NULL; 777 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 778 mp->mnt_iflag |= IMNT_GONE; 779 mutex_exit(&mountlist_lock); 780 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 781 panic("unmount: dangling vnode"); 782 if (used_syncer) 783 mutex_exit(&syncer_mutex); 784 vfs_hooks_unmount(mp); 785 rw_exit(&mp->mnt_unmounting); 786 vfs_destroy(mp); /* reference from mount() */ 787 if (coveredvp != NULLVP) 788 vrele(coveredvp); 789 return (0); 790 } 791 792 /* 793 * Sync each mounted filesystem. 794 */ 795 #ifdef DEBUG 796 int syncprt = 0; 797 struct ctldebug debug0 = { "syncprt", &syncprt }; 798 #endif 799 800 /* ARGSUSED */ 801 int 802 sys_sync(struct lwp *l, const void *v, register_t *retval) 803 { 804 struct mount *mp, *nmp; 805 int asyncflag; 806 807 if (l == NULL) 808 l = &lwp0; 809 810 mutex_enter(&mountlist_lock); 811 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 812 mp = nmp) { 813 if (vfs_busy(mp, &nmp)) { 814 continue; 815 } 816 mutex_enter(&mp->mnt_updating); 817 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 818 asyncflag = mp->mnt_flag & MNT_ASYNC; 819 mp->mnt_flag &= ~MNT_ASYNC; 820 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 821 if (asyncflag) 822 mp->mnt_flag |= MNT_ASYNC; 823 } 824 mutex_exit(&mp->mnt_updating); 825 vfs_unbusy(mp, false, &nmp); 826 } 827 mutex_exit(&mountlist_lock); 828 #ifdef DEBUG 829 if (syncprt) 830 vfs_bufstats(); 831 #endif /* DEBUG */ 832 return (0); 833 } 834 835 /* 836 * Change filesystem quotas. 837 */ 838 /* ARGSUSED */ 839 int 840 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 841 { 842 /* { 843 syscallarg(const char *) path; 844 syscallarg(int) cmd; 845 syscallarg(int) uid; 846 syscallarg(void *) arg; 847 } */ 848 struct mount *mp; 849 int error; 850 struct vnode *vp; 851 852 error = namei_simple_user(SCARG(uap, path), 853 NSM_FOLLOW_TRYEMULROOT, &vp); 854 if (error != 0) 855 return (error); 856 mp = vp->v_mount; 857 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 858 SCARG(uap, arg)); 859 vrele(vp); 860 return (error); 861 } 862 863 int 864 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 865 int root) 866 { 867 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 868 int error = 0; 869 870 /* 871 * If MNT_NOWAIT or MNT_LAZY is specified, do not 872 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 873 * overrides MNT_NOWAIT. 874 */ 875 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 876 (flags != MNT_WAIT && flags != 0)) { 877 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 878 goto done; 879 } 880 881 /* Get the filesystem stats now */ 882 memset(sp, 0, sizeof(*sp)); 883 if ((error = VFS_STATVFS(mp, sp)) != 0) { 884 return error; 885 } 886 887 if (cwdi->cwdi_rdir == NULL) 888 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 889 done: 890 if (cwdi->cwdi_rdir != NULL) { 891 size_t len; 892 char *bp; 893 char c; 894 char *path = PNBUF_GET(); 895 896 bp = path + MAXPATHLEN; 897 *--bp = '\0'; 898 rw_enter(&cwdi->cwdi_lock, RW_READER); 899 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 900 MAXPATHLEN / 2, 0, l); 901 rw_exit(&cwdi->cwdi_lock); 902 if (error) { 903 PNBUF_PUT(path); 904 return error; 905 } 906 len = strlen(bp); 907 if (len != 1) { 908 /* 909 * for mount points that are below our root, we can see 910 * them, so we fix up the pathname and return them. The 911 * rest we cannot see, so we don't allow viewing the 912 * data. 913 */ 914 if (strncmp(bp, sp->f_mntonname, len) == 0 && 915 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 916 (void)strlcpy(sp->f_mntonname, 917 c == '\0' ? "/" : &sp->f_mntonname[len], 918 sizeof(sp->f_mntonname)); 919 } else { 920 if (root) 921 (void)strlcpy(sp->f_mntonname, "/", 922 sizeof(sp->f_mntonname)); 923 else 924 error = EPERM; 925 } 926 } 927 PNBUF_PUT(path); 928 } 929 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 930 return error; 931 } 932 933 /* 934 * Get filesystem statistics by path. 935 */ 936 int 937 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 938 { 939 struct mount *mp; 940 int error; 941 struct vnode *vp; 942 943 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 944 if (error != 0) 945 return error; 946 mp = vp->v_mount; 947 error = dostatvfs(mp, sb, l, flags, 1); 948 vrele(vp); 949 return error; 950 } 951 952 /* ARGSUSED */ 953 int 954 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 955 { 956 /* { 957 syscallarg(const char *) path; 958 syscallarg(struct statvfs *) buf; 959 syscallarg(int) flags; 960 } */ 961 struct statvfs *sb; 962 int error; 963 964 sb = STATVFSBUF_GET(); 965 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 966 if (error == 0) 967 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 968 STATVFSBUF_PUT(sb); 969 return error; 970 } 971 972 /* 973 * Get filesystem statistics by fd. 974 */ 975 int 976 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 977 { 978 file_t *fp; 979 struct mount *mp; 980 int error; 981 982 /* fd_getvnode() will use the descriptor for us */ 983 if ((error = fd_getvnode(fd, &fp)) != 0) 984 return (error); 985 mp = ((struct vnode *)fp->f_data)->v_mount; 986 error = dostatvfs(mp, sb, curlwp, flags, 1); 987 fd_putfile(fd); 988 return error; 989 } 990 991 /* ARGSUSED */ 992 int 993 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 994 { 995 /* { 996 syscallarg(int) fd; 997 syscallarg(struct statvfs *) buf; 998 syscallarg(int) flags; 999 } */ 1000 struct statvfs *sb; 1001 int error; 1002 1003 sb = STATVFSBUF_GET(); 1004 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1005 if (error == 0) 1006 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1007 STATVFSBUF_PUT(sb); 1008 return error; 1009 } 1010 1011 1012 /* 1013 * Get statistics on all filesystems. 1014 */ 1015 int 1016 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1017 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1018 register_t *retval) 1019 { 1020 int root = 0; 1021 struct proc *p = l->l_proc; 1022 struct mount *mp, *nmp; 1023 struct statvfs *sb; 1024 size_t count, maxcount; 1025 int error = 0; 1026 1027 sb = STATVFSBUF_GET(); 1028 maxcount = bufsize / entry_sz; 1029 mutex_enter(&mountlist_lock); 1030 count = 0; 1031 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1032 mp = nmp) { 1033 if (vfs_busy(mp, &nmp)) { 1034 continue; 1035 } 1036 if (sfsp && count < maxcount) { 1037 error = dostatvfs(mp, sb, l, flags, 0); 1038 if (error) { 1039 vfs_unbusy(mp, false, &nmp); 1040 error = 0; 1041 continue; 1042 } 1043 error = copyfn(sb, sfsp, entry_sz); 1044 if (error) { 1045 vfs_unbusy(mp, false, NULL); 1046 goto out; 1047 } 1048 sfsp = (char *)sfsp + entry_sz; 1049 root |= strcmp(sb->f_mntonname, "/") == 0; 1050 } 1051 count++; 1052 vfs_unbusy(mp, false, &nmp); 1053 } 1054 mutex_exit(&mountlist_lock); 1055 1056 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1057 /* 1058 * fake a root entry 1059 */ 1060 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1061 sb, l, flags, 1); 1062 if (error != 0) 1063 goto out; 1064 if (sfsp) { 1065 error = copyfn(sb, sfsp, entry_sz); 1066 if (error != 0) 1067 goto out; 1068 } 1069 count++; 1070 } 1071 if (sfsp && count > maxcount) 1072 *retval = maxcount; 1073 else 1074 *retval = count; 1075 out: 1076 STATVFSBUF_PUT(sb); 1077 return error; 1078 } 1079 1080 int 1081 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1082 { 1083 /* { 1084 syscallarg(struct statvfs *) buf; 1085 syscallarg(size_t) bufsize; 1086 syscallarg(int) flags; 1087 } */ 1088 1089 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1090 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1091 } 1092 1093 /* 1094 * Change current working directory to a given file descriptor. 1095 */ 1096 /* ARGSUSED */ 1097 int 1098 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1099 { 1100 /* { 1101 syscallarg(int) fd; 1102 } */ 1103 struct proc *p = l->l_proc; 1104 struct cwdinfo *cwdi; 1105 struct vnode *vp, *tdp; 1106 struct mount *mp; 1107 file_t *fp; 1108 int error, fd; 1109 1110 /* fd_getvnode() will use the descriptor for us */ 1111 fd = SCARG(uap, fd); 1112 if ((error = fd_getvnode(fd, &fp)) != 0) 1113 return (error); 1114 vp = fp->f_data; 1115 1116 vref(vp); 1117 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1118 if (vp->v_type != VDIR) 1119 error = ENOTDIR; 1120 else 1121 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1122 if (error) { 1123 vput(vp); 1124 goto out; 1125 } 1126 while ((mp = vp->v_mountedhere) != NULL) { 1127 error = vfs_busy(mp, NULL); 1128 vput(vp); 1129 if (error != 0) 1130 goto out; 1131 error = VFS_ROOT(mp, &tdp); 1132 vfs_unbusy(mp, false, NULL); 1133 if (error) 1134 goto out; 1135 vp = tdp; 1136 } 1137 VOP_UNLOCK(vp); 1138 1139 /* 1140 * Disallow changing to a directory not under the process's 1141 * current root directory (if there is one). 1142 */ 1143 cwdi = p->p_cwdi; 1144 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1145 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1146 vrele(vp); 1147 error = EPERM; /* operation not permitted */ 1148 } else { 1149 vrele(cwdi->cwdi_cdir); 1150 cwdi->cwdi_cdir = vp; 1151 } 1152 rw_exit(&cwdi->cwdi_lock); 1153 1154 out: 1155 fd_putfile(fd); 1156 return (error); 1157 } 1158 1159 /* 1160 * Change this process's notion of the root directory to a given file 1161 * descriptor. 1162 */ 1163 int 1164 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1165 { 1166 struct proc *p = l->l_proc; 1167 struct vnode *vp; 1168 file_t *fp; 1169 int error, fd = SCARG(uap, fd); 1170 1171 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1172 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1173 return error; 1174 /* fd_getvnode() will use the descriptor for us */ 1175 if ((error = fd_getvnode(fd, &fp)) != 0) 1176 return error; 1177 vp = fp->f_data; 1178 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1179 if (vp->v_type != VDIR) 1180 error = ENOTDIR; 1181 else 1182 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1183 VOP_UNLOCK(vp); 1184 if (error) 1185 goto out; 1186 vref(vp); 1187 1188 change_root(p->p_cwdi, vp, l); 1189 1190 out: 1191 fd_putfile(fd); 1192 return (error); 1193 } 1194 1195 /* 1196 * Change current working directory (``.''). 1197 */ 1198 /* ARGSUSED */ 1199 int 1200 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1201 { 1202 /* { 1203 syscallarg(const char *) path; 1204 } */ 1205 struct proc *p = l->l_proc; 1206 struct cwdinfo *cwdi; 1207 int error; 1208 struct vnode *vp; 1209 1210 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1211 &vp, l)) != 0) 1212 return (error); 1213 cwdi = p->p_cwdi; 1214 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1215 vrele(cwdi->cwdi_cdir); 1216 cwdi->cwdi_cdir = vp; 1217 rw_exit(&cwdi->cwdi_lock); 1218 return (0); 1219 } 1220 1221 /* 1222 * Change notion of root (``/'') directory. 1223 */ 1224 /* ARGSUSED */ 1225 int 1226 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1227 { 1228 /* { 1229 syscallarg(const char *) path; 1230 } */ 1231 struct proc *p = l->l_proc; 1232 int error; 1233 struct vnode *vp; 1234 1235 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1236 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1237 return (error); 1238 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1239 &vp, l)) != 0) 1240 return (error); 1241 1242 change_root(p->p_cwdi, vp, l); 1243 1244 return (0); 1245 } 1246 1247 /* 1248 * Common routine for chroot and fchroot. 1249 * NB: callers need to properly authorize the change root operation. 1250 */ 1251 void 1252 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1253 { 1254 1255 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1256 if (cwdi->cwdi_rdir != NULL) 1257 vrele(cwdi->cwdi_rdir); 1258 cwdi->cwdi_rdir = vp; 1259 1260 /* 1261 * Prevent escaping from chroot by putting the root under 1262 * the working directory. Silently chdir to / if we aren't 1263 * already there. 1264 */ 1265 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1266 /* 1267 * XXX would be more failsafe to change directory to a 1268 * deadfs node here instead 1269 */ 1270 vrele(cwdi->cwdi_cdir); 1271 vref(vp); 1272 cwdi->cwdi_cdir = vp; 1273 } 1274 rw_exit(&cwdi->cwdi_lock); 1275 } 1276 1277 /* 1278 * Common routine for chroot and chdir. 1279 */ 1280 int 1281 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1282 { 1283 struct nameidata nd; 1284 int error; 1285 1286 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, where, 1287 path); 1288 if ((error = namei(&nd)) != 0) 1289 return (error); 1290 *vpp = nd.ni_vp; 1291 if ((*vpp)->v_type != VDIR) 1292 error = ENOTDIR; 1293 else 1294 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1295 1296 if (error) 1297 vput(*vpp); 1298 else 1299 VOP_UNLOCK(*vpp); 1300 return (error); 1301 } 1302 1303 /* 1304 * Check permissions, allocate an open file structure, 1305 * and call the device open routine if any. 1306 */ 1307 int 1308 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1309 { 1310 /* { 1311 syscallarg(const char *) path; 1312 syscallarg(int) flags; 1313 syscallarg(int) mode; 1314 } */ 1315 struct proc *p = l->l_proc; 1316 struct cwdinfo *cwdi = p->p_cwdi; 1317 file_t *fp; 1318 struct vnode *vp; 1319 int flags, cmode; 1320 int type, indx, error; 1321 struct flock lf; 1322 struct nameidata nd; 1323 1324 flags = FFLAGS(SCARG(uap, flags)); 1325 if ((flags & (FREAD | FWRITE)) == 0) 1326 return (EINVAL); 1327 if ((error = fd_allocfile(&fp, &indx)) != 0) 1328 return (error); 1329 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1330 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1331 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, 1332 SCARG(uap, path)); 1333 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1334 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1335 fd_abort(p, fp, indx); 1336 if ((error == EDUPFD || error == EMOVEFD) && 1337 l->l_dupfd >= 0 && /* XXX from fdopen */ 1338 (error = 1339 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1340 *retval = indx; 1341 return (0); 1342 } 1343 if (error == ERESTART) 1344 error = EINTR; 1345 return (error); 1346 } 1347 1348 l->l_dupfd = 0; 1349 vp = nd.ni_vp; 1350 fp->f_flag = flags & FMASK; 1351 fp->f_type = DTYPE_VNODE; 1352 fp->f_ops = &vnops; 1353 fp->f_data = vp; 1354 if (flags & (O_EXLOCK | O_SHLOCK)) { 1355 lf.l_whence = SEEK_SET; 1356 lf.l_start = 0; 1357 lf.l_len = 0; 1358 if (flags & O_EXLOCK) 1359 lf.l_type = F_WRLCK; 1360 else 1361 lf.l_type = F_RDLCK; 1362 type = F_FLOCK; 1363 if ((flags & FNONBLOCK) == 0) 1364 type |= F_WAIT; 1365 VOP_UNLOCK(vp); 1366 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1367 if (error) { 1368 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1369 fd_abort(p, fp, indx); 1370 return (error); 1371 } 1372 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1373 atomic_or_uint(&fp->f_flag, FHASLOCK); 1374 } 1375 VOP_UNLOCK(vp); 1376 *retval = indx; 1377 fd_affix(p, fp, indx); 1378 return (0); 1379 } 1380 1381 static void 1382 vfs__fhfree(fhandle_t *fhp) 1383 { 1384 size_t fhsize; 1385 1386 if (fhp == NULL) { 1387 return; 1388 } 1389 fhsize = FHANDLE_SIZE(fhp); 1390 kmem_free(fhp, fhsize); 1391 } 1392 1393 /* 1394 * vfs_composefh: compose a filehandle. 1395 */ 1396 1397 int 1398 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1399 { 1400 struct mount *mp; 1401 struct fid *fidp; 1402 int error; 1403 size_t needfhsize; 1404 size_t fidsize; 1405 1406 mp = vp->v_mount; 1407 fidp = NULL; 1408 if (*fh_size < FHANDLE_SIZE_MIN) { 1409 fidsize = 0; 1410 } else { 1411 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1412 if (fhp != NULL) { 1413 memset(fhp, 0, *fh_size); 1414 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1415 fidp = &fhp->fh_fid; 1416 } 1417 } 1418 error = VFS_VPTOFH(vp, fidp, &fidsize); 1419 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1420 if (error == 0 && *fh_size < needfhsize) { 1421 error = E2BIG; 1422 } 1423 *fh_size = needfhsize; 1424 return error; 1425 } 1426 1427 int 1428 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1429 { 1430 struct mount *mp; 1431 fhandle_t *fhp; 1432 size_t fhsize; 1433 size_t fidsize; 1434 int error; 1435 1436 *fhpp = NULL; 1437 mp = vp->v_mount; 1438 fidsize = 0; 1439 error = VFS_VPTOFH(vp, NULL, &fidsize); 1440 KASSERT(error != 0); 1441 if (error != E2BIG) { 1442 goto out; 1443 } 1444 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1445 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1446 if (fhp == NULL) { 1447 error = ENOMEM; 1448 goto out; 1449 } 1450 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1451 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1452 if (error == 0) { 1453 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1454 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1455 *fhpp = fhp; 1456 } else { 1457 kmem_free(fhp, fhsize); 1458 } 1459 out: 1460 return error; 1461 } 1462 1463 void 1464 vfs_composefh_free(fhandle_t *fhp) 1465 { 1466 1467 vfs__fhfree(fhp); 1468 } 1469 1470 /* 1471 * vfs_fhtovp: lookup a vnode by a filehandle. 1472 */ 1473 1474 int 1475 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1476 { 1477 struct mount *mp; 1478 int error; 1479 1480 *vpp = NULL; 1481 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1482 if (mp == NULL) { 1483 error = ESTALE; 1484 goto out; 1485 } 1486 if (mp->mnt_op->vfs_fhtovp == NULL) { 1487 error = EOPNOTSUPP; 1488 goto out; 1489 } 1490 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1491 out: 1492 return error; 1493 } 1494 1495 /* 1496 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1497 * the needed size. 1498 */ 1499 1500 int 1501 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1502 { 1503 fhandle_t *fhp; 1504 int error; 1505 1506 *fhpp = NULL; 1507 if (fhsize > FHANDLE_SIZE_MAX) { 1508 return EINVAL; 1509 } 1510 if (fhsize < FHANDLE_SIZE_MIN) { 1511 return EINVAL; 1512 } 1513 again: 1514 fhp = kmem_alloc(fhsize, KM_SLEEP); 1515 if (fhp == NULL) { 1516 return ENOMEM; 1517 } 1518 error = copyin(ufhp, fhp, fhsize); 1519 if (error == 0) { 1520 /* XXX this check shouldn't be here */ 1521 if (FHANDLE_SIZE(fhp) == fhsize) { 1522 *fhpp = fhp; 1523 return 0; 1524 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1525 /* 1526 * a kludge for nfsv2 padded handles. 1527 */ 1528 size_t sz; 1529 1530 sz = FHANDLE_SIZE(fhp); 1531 kmem_free(fhp, fhsize); 1532 fhsize = sz; 1533 goto again; 1534 } else { 1535 /* 1536 * userland told us wrong size. 1537 */ 1538 error = EINVAL; 1539 } 1540 } 1541 kmem_free(fhp, fhsize); 1542 return error; 1543 } 1544 1545 void 1546 vfs_copyinfh_free(fhandle_t *fhp) 1547 { 1548 1549 vfs__fhfree(fhp); 1550 } 1551 1552 /* 1553 * Get file handle system call 1554 */ 1555 int 1556 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1557 { 1558 /* { 1559 syscallarg(char *) fname; 1560 syscallarg(fhandle_t *) fhp; 1561 syscallarg(size_t *) fh_size; 1562 } */ 1563 struct vnode *vp; 1564 fhandle_t *fh; 1565 int error; 1566 struct nameidata nd; 1567 size_t sz; 1568 size_t usz; 1569 1570 /* 1571 * Must be super user 1572 */ 1573 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1574 0, NULL, NULL, NULL); 1575 if (error) 1576 return (error); 1577 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 1578 SCARG(uap, fname)); 1579 error = namei(&nd); 1580 if (error) 1581 return (error); 1582 vp = nd.ni_vp; 1583 error = vfs_composefh_alloc(vp, &fh); 1584 vput(vp); 1585 if (error != 0) { 1586 goto out; 1587 } 1588 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1589 if (error != 0) { 1590 goto out; 1591 } 1592 sz = FHANDLE_SIZE(fh); 1593 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1594 if (error != 0) { 1595 goto out; 1596 } 1597 if (usz >= sz) { 1598 error = copyout(fh, SCARG(uap, fhp), sz); 1599 } else { 1600 error = E2BIG; 1601 } 1602 out: 1603 vfs_composefh_free(fh); 1604 return (error); 1605 } 1606 1607 /* 1608 * Open a file given a file handle. 1609 * 1610 * Check permissions, allocate an open file structure, 1611 * and call the device open routine if any. 1612 */ 1613 1614 int 1615 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1616 register_t *retval) 1617 { 1618 file_t *fp; 1619 struct vnode *vp = NULL; 1620 kauth_cred_t cred = l->l_cred; 1621 file_t *nfp; 1622 int type, indx, error=0; 1623 struct flock lf; 1624 struct vattr va; 1625 fhandle_t *fh; 1626 int flags; 1627 proc_t *p; 1628 1629 p = curproc; 1630 1631 /* 1632 * Must be super user 1633 */ 1634 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1635 0, NULL, NULL, NULL))) 1636 return (error); 1637 1638 flags = FFLAGS(oflags); 1639 if ((flags & (FREAD | FWRITE)) == 0) 1640 return (EINVAL); 1641 if ((flags & O_CREAT)) 1642 return (EINVAL); 1643 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1644 return (error); 1645 fp = nfp; 1646 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1647 if (error != 0) { 1648 goto bad; 1649 } 1650 error = vfs_fhtovp(fh, &vp); 1651 if (error != 0) { 1652 goto bad; 1653 } 1654 1655 /* Now do an effective vn_open */ 1656 1657 if (vp->v_type == VSOCK) { 1658 error = EOPNOTSUPP; 1659 goto bad; 1660 } 1661 error = vn_openchk(vp, cred, flags); 1662 if (error != 0) 1663 goto bad; 1664 if (flags & O_TRUNC) { 1665 VOP_UNLOCK(vp); /* XXX */ 1666 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1667 vattr_null(&va); 1668 va.va_size = 0; 1669 error = VOP_SETATTR(vp, &va, cred); 1670 if (error) 1671 goto bad; 1672 } 1673 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1674 goto bad; 1675 if (flags & FWRITE) { 1676 mutex_enter(&vp->v_interlock); 1677 vp->v_writecount++; 1678 mutex_exit(&vp->v_interlock); 1679 } 1680 1681 /* done with modified vn_open, now finish what sys_open does. */ 1682 1683 fp->f_flag = flags & FMASK; 1684 fp->f_type = DTYPE_VNODE; 1685 fp->f_ops = &vnops; 1686 fp->f_data = vp; 1687 if (flags & (O_EXLOCK | O_SHLOCK)) { 1688 lf.l_whence = SEEK_SET; 1689 lf.l_start = 0; 1690 lf.l_len = 0; 1691 if (flags & O_EXLOCK) 1692 lf.l_type = F_WRLCK; 1693 else 1694 lf.l_type = F_RDLCK; 1695 type = F_FLOCK; 1696 if ((flags & FNONBLOCK) == 0) 1697 type |= F_WAIT; 1698 VOP_UNLOCK(vp); 1699 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1700 if (error) { 1701 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1702 fd_abort(p, fp, indx); 1703 return (error); 1704 } 1705 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1706 atomic_or_uint(&fp->f_flag, FHASLOCK); 1707 } 1708 VOP_UNLOCK(vp); 1709 *retval = indx; 1710 fd_affix(p, fp, indx); 1711 vfs_copyinfh_free(fh); 1712 return (0); 1713 1714 bad: 1715 fd_abort(p, fp, indx); 1716 if (vp != NULL) 1717 vput(vp); 1718 vfs_copyinfh_free(fh); 1719 return (error); 1720 } 1721 1722 int 1723 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1724 { 1725 /* { 1726 syscallarg(const void *) fhp; 1727 syscallarg(size_t) fh_size; 1728 syscallarg(int) flags; 1729 } */ 1730 1731 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1732 SCARG(uap, flags), retval); 1733 } 1734 1735 int 1736 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1737 { 1738 int error; 1739 fhandle_t *fh; 1740 struct vnode *vp; 1741 1742 /* 1743 * Must be super user 1744 */ 1745 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1746 0, NULL, NULL, NULL))) 1747 return (error); 1748 1749 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1750 if (error != 0) 1751 return error; 1752 1753 error = vfs_fhtovp(fh, &vp); 1754 vfs_copyinfh_free(fh); 1755 if (error != 0) 1756 return error; 1757 1758 error = vn_stat(vp, sb); 1759 vput(vp); 1760 return error; 1761 } 1762 1763 1764 /* ARGSUSED */ 1765 int 1766 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 1767 { 1768 /* { 1769 syscallarg(const void *) fhp; 1770 syscallarg(size_t) fh_size; 1771 syscallarg(struct stat *) sb; 1772 } */ 1773 struct stat sb; 1774 int error; 1775 1776 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1777 if (error) 1778 return error; 1779 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1780 } 1781 1782 int 1783 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1784 int flags) 1785 { 1786 fhandle_t *fh; 1787 struct mount *mp; 1788 struct vnode *vp; 1789 int error; 1790 1791 /* 1792 * Must be super user 1793 */ 1794 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1795 0, NULL, NULL, NULL))) 1796 return error; 1797 1798 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1799 if (error != 0) 1800 return error; 1801 1802 error = vfs_fhtovp(fh, &vp); 1803 vfs_copyinfh_free(fh); 1804 if (error != 0) 1805 return error; 1806 1807 mp = vp->v_mount; 1808 error = dostatvfs(mp, sb, l, flags, 1); 1809 vput(vp); 1810 return error; 1811 } 1812 1813 /* ARGSUSED */ 1814 int 1815 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1816 { 1817 /* { 1818 syscallarg(const void *) fhp; 1819 syscallarg(size_t) fh_size; 1820 syscallarg(struct statvfs *) buf; 1821 syscallarg(int) flags; 1822 } */ 1823 struct statvfs *sb = STATVFSBUF_GET(); 1824 int error; 1825 1826 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1827 SCARG(uap, flags)); 1828 if (error == 0) 1829 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1830 STATVFSBUF_PUT(sb); 1831 return error; 1832 } 1833 1834 /* 1835 * Create a special file. 1836 */ 1837 /* ARGSUSED */ 1838 int 1839 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 1840 register_t *retval) 1841 { 1842 /* { 1843 syscallarg(const char *) path; 1844 syscallarg(mode_t) mode; 1845 syscallarg(dev_t) dev; 1846 } */ 1847 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 1848 SCARG(uap, dev), retval, UIO_USERSPACE); 1849 } 1850 1851 int 1852 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 1853 register_t *retval, enum uio_seg seg) 1854 { 1855 struct proc *p = l->l_proc; 1856 struct vnode *vp; 1857 struct vattr vattr; 1858 int error, optype; 1859 struct nameidata nd; 1860 char *path; 1861 const char *cpath; 1862 1863 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1864 0, NULL, NULL, NULL)) != 0) 1865 return (error); 1866 1867 optype = VOP_MKNOD_DESCOFFSET; 1868 1869 VERIEXEC_PATH_GET(pathname, seg, cpath, path); 1870 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath); 1871 1872 if ((error = namei(&nd)) != 0) 1873 goto out; 1874 vp = nd.ni_vp; 1875 if (vp != NULL) 1876 error = EEXIST; 1877 else { 1878 vattr_null(&vattr); 1879 /* We will read cwdi->cwdi_cmask unlocked. */ 1880 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1881 vattr.va_rdev = dev; 1882 1883 switch (mode & S_IFMT) { 1884 case S_IFMT: /* used by badsect to flag bad sectors */ 1885 vattr.va_type = VBAD; 1886 break; 1887 case S_IFCHR: 1888 vattr.va_type = VCHR; 1889 break; 1890 case S_IFBLK: 1891 vattr.va_type = VBLK; 1892 break; 1893 case S_IFWHT: 1894 optype = VOP_WHITEOUT_DESCOFFSET; 1895 break; 1896 case S_IFREG: 1897 #if NVERIEXEC > 0 1898 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp, 1899 O_CREAT); 1900 #endif /* NVERIEXEC > 0 */ 1901 vattr.va_type = VREG; 1902 vattr.va_rdev = VNOVAL; 1903 optype = VOP_CREATE_DESCOFFSET; 1904 break; 1905 default: 1906 error = EINVAL; 1907 break; 1908 } 1909 } 1910 if (!error) { 1911 switch (optype) { 1912 case VOP_WHITEOUT_DESCOFFSET: 1913 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1914 if (error) 1915 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1916 vput(nd.ni_dvp); 1917 break; 1918 1919 case VOP_MKNOD_DESCOFFSET: 1920 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1921 &nd.ni_cnd, &vattr); 1922 if (error == 0) 1923 vput(nd.ni_vp); 1924 break; 1925 1926 case VOP_CREATE_DESCOFFSET: 1927 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1928 &nd.ni_cnd, &vattr); 1929 if (error == 0) 1930 vput(nd.ni_vp); 1931 break; 1932 } 1933 } else { 1934 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1935 if (nd.ni_dvp == vp) 1936 vrele(nd.ni_dvp); 1937 else 1938 vput(nd.ni_dvp); 1939 if (vp) 1940 vrele(vp); 1941 } 1942 out: 1943 VERIEXEC_PATH_PUT(path); 1944 return (error); 1945 } 1946 1947 /* 1948 * Create a named pipe. 1949 */ 1950 /* ARGSUSED */ 1951 int 1952 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1953 { 1954 /* { 1955 syscallarg(const char *) path; 1956 syscallarg(int) mode; 1957 } */ 1958 struct proc *p = l->l_proc; 1959 struct vattr vattr; 1960 int error; 1961 struct nameidata nd; 1962 1963 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 1964 SCARG(uap, path)); 1965 if ((error = namei(&nd)) != 0) 1966 return (error); 1967 if (nd.ni_vp != NULL) { 1968 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1969 if (nd.ni_dvp == nd.ni_vp) 1970 vrele(nd.ni_dvp); 1971 else 1972 vput(nd.ni_dvp); 1973 vrele(nd.ni_vp); 1974 return (EEXIST); 1975 } 1976 vattr_null(&vattr); 1977 vattr.va_type = VFIFO; 1978 /* We will read cwdi->cwdi_cmask unlocked. */ 1979 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1980 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1981 if (error == 0) 1982 vput(nd.ni_vp); 1983 return (error); 1984 } 1985 1986 /* 1987 * Make a hard file link. 1988 */ 1989 /* ARGSUSED */ 1990 int 1991 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 1992 { 1993 /* { 1994 syscallarg(const char *) path; 1995 syscallarg(const char *) link; 1996 } */ 1997 struct vnode *vp; 1998 struct nameidata nd; 1999 int error; 2000 2001 error = namei_simple_user(SCARG(uap, path), 2002 NSM_FOLLOW_TRYEMULROOT, &vp); 2003 if (error != 0) 2004 return (error); 2005 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, 2006 SCARG(uap, link)); 2007 if ((error = namei(&nd)) != 0) 2008 goto out; 2009 if (nd.ni_vp) { 2010 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2011 if (nd.ni_dvp == nd.ni_vp) 2012 vrele(nd.ni_dvp); 2013 else 2014 vput(nd.ni_dvp); 2015 vrele(nd.ni_vp); 2016 error = EEXIST; 2017 goto out; 2018 } 2019 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2020 out: 2021 vrele(vp); 2022 return (error); 2023 } 2024 2025 int 2026 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2027 { 2028 struct proc *p = curproc; 2029 struct vattr vattr; 2030 char *path; 2031 int error; 2032 struct nameidata nd; 2033 2034 path = PNBUF_GET(); 2035 if (seg == UIO_USERSPACE) { 2036 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2037 goto out; 2038 } else { 2039 KASSERT(strlen(patharg) < MAXPATHLEN); 2040 strcpy(path, patharg); 2041 } 2042 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, link); 2043 if ((error = namei(&nd)) != 0) 2044 goto out; 2045 if (nd.ni_vp) { 2046 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2047 if (nd.ni_dvp == nd.ni_vp) 2048 vrele(nd.ni_dvp); 2049 else 2050 vput(nd.ni_dvp); 2051 vrele(nd.ni_vp); 2052 error = EEXIST; 2053 goto out; 2054 } 2055 vattr_null(&vattr); 2056 vattr.va_type = VLNK; 2057 /* We will read cwdi->cwdi_cmask unlocked. */ 2058 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2059 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2060 if (error == 0) 2061 vput(nd.ni_vp); 2062 out: 2063 PNBUF_PUT(path); 2064 return (error); 2065 } 2066 2067 /* 2068 * Make a symbolic link. 2069 */ 2070 /* ARGSUSED */ 2071 int 2072 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2073 { 2074 /* { 2075 syscallarg(const char *) path; 2076 syscallarg(const char *) link; 2077 } */ 2078 2079 return do_sys_symlink(SCARG(uap, path), SCARG(uap, link), 2080 UIO_USERSPACE); 2081 } 2082 2083 /* 2084 * Delete a whiteout from the filesystem. 2085 */ 2086 /* ARGSUSED */ 2087 int 2088 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2089 { 2090 /* { 2091 syscallarg(const char *) path; 2092 } */ 2093 int error; 2094 struct nameidata nd; 2095 2096 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, 2097 UIO_USERSPACE, SCARG(uap, path)); 2098 error = namei(&nd); 2099 if (error) 2100 return (error); 2101 2102 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2103 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2104 if (nd.ni_dvp == nd.ni_vp) 2105 vrele(nd.ni_dvp); 2106 else 2107 vput(nd.ni_dvp); 2108 if (nd.ni_vp) 2109 vrele(nd.ni_vp); 2110 return (EEXIST); 2111 } 2112 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2113 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2114 vput(nd.ni_dvp); 2115 return (error); 2116 } 2117 2118 /* 2119 * Delete a name from the filesystem. 2120 */ 2121 /* ARGSUSED */ 2122 int 2123 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2124 { 2125 /* { 2126 syscallarg(const char *) path; 2127 } */ 2128 2129 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2130 } 2131 2132 int 2133 do_sys_unlink(const char *arg, enum uio_seg seg) 2134 { 2135 struct vnode *vp; 2136 int error; 2137 struct nameidata nd; 2138 char *path; 2139 const char *cpath; 2140 2141 VERIEXEC_PATH_GET(arg, seg, cpath, path); 2142 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath); 2143 2144 if ((error = namei(&nd)) != 0) 2145 goto out; 2146 vp = nd.ni_vp; 2147 2148 /* 2149 * The root of a mounted filesystem cannot be deleted. 2150 */ 2151 if (vp->v_vflag & VV_ROOT) { 2152 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2153 if (nd.ni_dvp == vp) 2154 vrele(nd.ni_dvp); 2155 else 2156 vput(nd.ni_dvp); 2157 vput(vp); 2158 error = EBUSY; 2159 goto out; 2160 } 2161 2162 #if NVERIEXEC > 0 2163 /* Handle remove requests for veriexec entries. */ 2164 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) { 2165 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2166 if (nd.ni_dvp == vp) 2167 vrele(nd.ni_dvp); 2168 else 2169 vput(nd.ni_dvp); 2170 vput(vp); 2171 goto out; 2172 } 2173 #endif /* NVERIEXEC > 0 */ 2174 2175 #ifdef FILEASSOC 2176 (void)fileassoc_file_delete(vp); 2177 #endif /* FILEASSOC */ 2178 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2179 out: 2180 VERIEXEC_PATH_PUT(path); 2181 return (error); 2182 } 2183 2184 /* 2185 * Reposition read/write file offset. 2186 */ 2187 int 2188 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2189 { 2190 /* { 2191 syscallarg(int) fd; 2192 syscallarg(int) pad; 2193 syscallarg(off_t) offset; 2194 syscallarg(int) whence; 2195 } */ 2196 kauth_cred_t cred = l->l_cred; 2197 file_t *fp; 2198 struct vnode *vp; 2199 struct vattr vattr; 2200 off_t newoff; 2201 int error, fd; 2202 2203 fd = SCARG(uap, fd); 2204 2205 if ((fp = fd_getfile(fd)) == NULL) 2206 return (EBADF); 2207 2208 vp = fp->f_data; 2209 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2210 error = ESPIPE; 2211 goto out; 2212 } 2213 2214 switch (SCARG(uap, whence)) { 2215 case SEEK_CUR: 2216 newoff = fp->f_offset + SCARG(uap, offset); 2217 break; 2218 case SEEK_END: 2219 error = VOP_GETATTR(vp, &vattr, cred); 2220 if (error) { 2221 goto out; 2222 } 2223 newoff = SCARG(uap, offset) + vattr.va_size; 2224 break; 2225 case SEEK_SET: 2226 newoff = SCARG(uap, offset); 2227 break; 2228 default: 2229 error = EINVAL; 2230 goto out; 2231 } 2232 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2233 *(off_t *)retval = fp->f_offset = newoff; 2234 } 2235 out: 2236 fd_putfile(fd); 2237 return (error); 2238 } 2239 2240 /* 2241 * Positional read system call. 2242 */ 2243 int 2244 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2245 { 2246 /* { 2247 syscallarg(int) fd; 2248 syscallarg(void *) buf; 2249 syscallarg(size_t) nbyte; 2250 syscallarg(off_t) offset; 2251 } */ 2252 file_t *fp; 2253 struct vnode *vp; 2254 off_t offset; 2255 int error, fd = SCARG(uap, fd); 2256 2257 if ((fp = fd_getfile(fd)) == NULL) 2258 return (EBADF); 2259 2260 if ((fp->f_flag & FREAD) == 0) { 2261 fd_putfile(fd); 2262 return (EBADF); 2263 } 2264 2265 vp = fp->f_data; 2266 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2267 error = ESPIPE; 2268 goto out; 2269 } 2270 2271 offset = SCARG(uap, offset); 2272 2273 /* 2274 * XXX This works because no file systems actually 2275 * XXX take any action on the seek operation. 2276 */ 2277 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2278 goto out; 2279 2280 /* dofileread() will unuse the descriptor for us */ 2281 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2282 &offset, 0, retval)); 2283 2284 out: 2285 fd_putfile(fd); 2286 return (error); 2287 } 2288 2289 /* 2290 * Positional scatter read system call. 2291 */ 2292 int 2293 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2294 { 2295 /* { 2296 syscallarg(int) fd; 2297 syscallarg(const struct iovec *) iovp; 2298 syscallarg(int) iovcnt; 2299 syscallarg(off_t) offset; 2300 } */ 2301 off_t offset = SCARG(uap, offset); 2302 2303 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2304 SCARG(uap, iovcnt), &offset, 0, retval); 2305 } 2306 2307 /* 2308 * Positional write system call. 2309 */ 2310 int 2311 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2312 { 2313 /* { 2314 syscallarg(int) fd; 2315 syscallarg(const void *) buf; 2316 syscallarg(size_t) nbyte; 2317 syscallarg(off_t) offset; 2318 } */ 2319 file_t *fp; 2320 struct vnode *vp; 2321 off_t offset; 2322 int error, fd = SCARG(uap, fd); 2323 2324 if ((fp = fd_getfile(fd)) == NULL) 2325 return (EBADF); 2326 2327 if ((fp->f_flag & FWRITE) == 0) { 2328 fd_putfile(fd); 2329 return (EBADF); 2330 } 2331 2332 vp = fp->f_data; 2333 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2334 error = ESPIPE; 2335 goto out; 2336 } 2337 2338 offset = SCARG(uap, offset); 2339 2340 /* 2341 * XXX This works because no file systems actually 2342 * XXX take any action on the seek operation. 2343 */ 2344 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2345 goto out; 2346 2347 /* dofilewrite() will unuse the descriptor for us */ 2348 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2349 &offset, 0, retval)); 2350 2351 out: 2352 fd_putfile(fd); 2353 return (error); 2354 } 2355 2356 /* 2357 * Positional gather write system call. 2358 */ 2359 int 2360 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2361 { 2362 /* { 2363 syscallarg(int) fd; 2364 syscallarg(const struct iovec *) iovp; 2365 syscallarg(int) iovcnt; 2366 syscallarg(off_t) offset; 2367 } */ 2368 off_t offset = SCARG(uap, offset); 2369 2370 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2371 SCARG(uap, iovcnt), &offset, 0, retval); 2372 } 2373 2374 /* 2375 * Check access permissions. 2376 */ 2377 int 2378 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2379 { 2380 /* { 2381 syscallarg(const char *) path; 2382 syscallarg(int) flags; 2383 } */ 2384 kauth_cred_t cred; 2385 struct vnode *vp; 2386 int error, flags; 2387 struct nameidata nd; 2388 2389 cred = kauth_cred_dup(l->l_cred); 2390 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2391 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2392 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2393 SCARG(uap, path)); 2394 /* Override default credentials */ 2395 nd.ni_cnd.cn_cred = cred; 2396 if ((error = namei(&nd)) != 0) 2397 goto out; 2398 vp = nd.ni_vp; 2399 2400 /* Flags == 0 means only check for existence. */ 2401 if (SCARG(uap, flags)) { 2402 flags = 0; 2403 if (SCARG(uap, flags) & R_OK) 2404 flags |= VREAD; 2405 if (SCARG(uap, flags) & W_OK) 2406 flags |= VWRITE; 2407 if (SCARG(uap, flags) & X_OK) 2408 flags |= VEXEC; 2409 2410 error = VOP_ACCESS(vp, flags, cred); 2411 if (!error && (flags & VWRITE)) 2412 error = vn_writechk(vp); 2413 } 2414 vput(vp); 2415 out: 2416 kauth_cred_free(cred); 2417 return (error); 2418 } 2419 2420 /* 2421 * Common code for all sys_stat functions, including compat versions. 2422 */ 2423 int 2424 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb) 2425 { 2426 int error; 2427 struct nameidata nd; 2428 2429 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, 2430 UIO_USERSPACE, path); 2431 error = namei(&nd); 2432 if (error != 0) 2433 return error; 2434 error = vn_stat(nd.ni_vp, sb); 2435 vput(nd.ni_vp); 2436 return error; 2437 } 2438 2439 /* 2440 * Get file status; this version follows links. 2441 */ 2442 /* ARGSUSED */ 2443 int 2444 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 2445 { 2446 /* { 2447 syscallarg(const char *) path; 2448 syscallarg(struct stat *) ub; 2449 } */ 2450 struct stat sb; 2451 int error; 2452 2453 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2454 if (error) 2455 return error; 2456 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2457 } 2458 2459 /* 2460 * Get file status; this version does not follow links. 2461 */ 2462 /* ARGSUSED */ 2463 int 2464 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 2465 { 2466 /* { 2467 syscallarg(const char *) path; 2468 syscallarg(struct stat *) ub; 2469 } */ 2470 struct stat sb; 2471 int error; 2472 2473 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2474 if (error) 2475 return error; 2476 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2477 } 2478 2479 /* 2480 * Get configurable pathname variables. 2481 */ 2482 /* ARGSUSED */ 2483 int 2484 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2485 { 2486 /* { 2487 syscallarg(const char *) path; 2488 syscallarg(int) name; 2489 } */ 2490 int error; 2491 struct nameidata nd; 2492 2493 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2494 SCARG(uap, path)); 2495 if ((error = namei(&nd)) != 0) 2496 return (error); 2497 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2498 vput(nd.ni_vp); 2499 return (error); 2500 } 2501 2502 /* 2503 * Return target name of a symbolic link. 2504 */ 2505 /* ARGSUSED */ 2506 int 2507 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2508 { 2509 /* { 2510 syscallarg(const char *) path; 2511 syscallarg(char *) buf; 2512 syscallarg(size_t) count; 2513 } */ 2514 struct vnode *vp; 2515 struct iovec aiov; 2516 struct uio auio; 2517 int error; 2518 struct nameidata nd; 2519 2520 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 2521 SCARG(uap, path)); 2522 if ((error = namei(&nd)) != 0) 2523 return (error); 2524 vp = nd.ni_vp; 2525 if (vp->v_type != VLNK) 2526 error = EINVAL; 2527 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2528 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2529 aiov.iov_base = SCARG(uap, buf); 2530 aiov.iov_len = SCARG(uap, count); 2531 auio.uio_iov = &aiov; 2532 auio.uio_iovcnt = 1; 2533 auio.uio_offset = 0; 2534 auio.uio_rw = UIO_READ; 2535 KASSERT(l == curlwp); 2536 auio.uio_vmspace = l->l_proc->p_vmspace; 2537 auio.uio_resid = SCARG(uap, count); 2538 error = VOP_READLINK(vp, &auio, l->l_cred); 2539 } 2540 vput(vp); 2541 *retval = SCARG(uap, count) - auio.uio_resid; 2542 return (error); 2543 } 2544 2545 /* 2546 * Change flags of a file given a path name. 2547 */ 2548 /* ARGSUSED */ 2549 int 2550 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2551 { 2552 /* { 2553 syscallarg(const char *) path; 2554 syscallarg(u_long) flags; 2555 } */ 2556 struct vnode *vp; 2557 int error; 2558 2559 error = namei_simple_user(SCARG(uap, path), 2560 NSM_FOLLOW_TRYEMULROOT, &vp); 2561 if (error != 0) 2562 return (error); 2563 error = change_flags(vp, SCARG(uap, flags), l); 2564 vput(vp); 2565 return (error); 2566 } 2567 2568 /* 2569 * Change flags of a file given a file descriptor. 2570 */ 2571 /* ARGSUSED */ 2572 int 2573 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2574 { 2575 /* { 2576 syscallarg(int) fd; 2577 syscallarg(u_long) flags; 2578 } */ 2579 struct vnode *vp; 2580 file_t *fp; 2581 int error; 2582 2583 /* fd_getvnode() will use the descriptor for us */ 2584 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2585 return (error); 2586 vp = fp->f_data; 2587 error = change_flags(vp, SCARG(uap, flags), l); 2588 VOP_UNLOCK(vp); 2589 fd_putfile(SCARG(uap, fd)); 2590 return (error); 2591 } 2592 2593 /* 2594 * Change flags of a file given a path name; this version does 2595 * not follow links. 2596 */ 2597 int 2598 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2599 { 2600 /* { 2601 syscallarg(const char *) path; 2602 syscallarg(u_long) flags; 2603 } */ 2604 struct vnode *vp; 2605 int error; 2606 2607 error = namei_simple_user(SCARG(uap, path), 2608 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2609 if (error != 0) 2610 return (error); 2611 error = change_flags(vp, SCARG(uap, flags), l); 2612 vput(vp); 2613 return (error); 2614 } 2615 2616 /* 2617 * Common routine to change flags of a file. 2618 */ 2619 int 2620 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2621 { 2622 struct vattr vattr; 2623 int error; 2624 2625 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2626 /* 2627 * Non-superusers cannot change the flags on devices, even if they 2628 * own them. 2629 */ 2630 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2631 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2632 goto out; 2633 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2634 error = EINVAL; 2635 goto out; 2636 } 2637 } 2638 vattr_null(&vattr); 2639 vattr.va_flags = flags; 2640 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2641 out: 2642 return (error); 2643 } 2644 2645 /* 2646 * Change mode of a file given path name; this version follows links. 2647 */ 2648 /* ARGSUSED */ 2649 int 2650 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2651 { 2652 /* { 2653 syscallarg(const char *) path; 2654 syscallarg(int) mode; 2655 } */ 2656 int error; 2657 struct vnode *vp; 2658 2659 error = namei_simple_user(SCARG(uap, path), 2660 NSM_FOLLOW_TRYEMULROOT, &vp); 2661 if (error != 0) 2662 return (error); 2663 2664 error = change_mode(vp, SCARG(uap, mode), l); 2665 2666 vrele(vp); 2667 return (error); 2668 } 2669 2670 /* 2671 * Change mode of a file given a file descriptor. 2672 */ 2673 /* ARGSUSED */ 2674 int 2675 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2676 { 2677 /* { 2678 syscallarg(int) fd; 2679 syscallarg(int) mode; 2680 } */ 2681 file_t *fp; 2682 int error; 2683 2684 /* fd_getvnode() will use the descriptor for us */ 2685 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2686 return (error); 2687 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2688 fd_putfile(SCARG(uap, fd)); 2689 return (error); 2690 } 2691 2692 /* 2693 * Change mode of a file given path name; this version does not follow links. 2694 */ 2695 /* ARGSUSED */ 2696 int 2697 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2698 { 2699 /* { 2700 syscallarg(const char *) path; 2701 syscallarg(int) mode; 2702 } */ 2703 int error; 2704 struct vnode *vp; 2705 2706 error = namei_simple_user(SCARG(uap, path), 2707 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2708 if (error != 0) 2709 return (error); 2710 2711 error = change_mode(vp, SCARG(uap, mode), l); 2712 2713 vrele(vp); 2714 return (error); 2715 } 2716 2717 /* 2718 * Common routine to set mode given a vnode. 2719 */ 2720 static int 2721 change_mode(struct vnode *vp, int mode, struct lwp *l) 2722 { 2723 struct vattr vattr; 2724 int error; 2725 2726 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2727 vattr_null(&vattr); 2728 vattr.va_mode = mode & ALLPERMS; 2729 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2730 VOP_UNLOCK(vp); 2731 return (error); 2732 } 2733 2734 /* 2735 * Set ownership given a path name; this version follows links. 2736 */ 2737 /* ARGSUSED */ 2738 int 2739 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2740 { 2741 /* { 2742 syscallarg(const char *) path; 2743 syscallarg(uid_t) uid; 2744 syscallarg(gid_t) gid; 2745 } */ 2746 int error; 2747 struct vnode *vp; 2748 2749 error = namei_simple_user(SCARG(uap, path), 2750 NSM_FOLLOW_TRYEMULROOT, &vp); 2751 if (error != 0) 2752 return (error); 2753 2754 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2755 2756 vrele(vp); 2757 return (error); 2758 } 2759 2760 /* 2761 * Set ownership given a path name; this version follows links. 2762 * Provides POSIX semantics. 2763 */ 2764 /* ARGSUSED */ 2765 int 2766 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2767 { 2768 /* { 2769 syscallarg(const char *) path; 2770 syscallarg(uid_t) uid; 2771 syscallarg(gid_t) gid; 2772 } */ 2773 int error; 2774 struct vnode *vp; 2775 2776 error = namei_simple_user(SCARG(uap, path), 2777 NSM_FOLLOW_TRYEMULROOT, &vp); 2778 if (error != 0) 2779 return (error); 2780 2781 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2782 2783 vrele(vp); 2784 return (error); 2785 } 2786 2787 /* 2788 * Set ownership given a file descriptor. 2789 */ 2790 /* ARGSUSED */ 2791 int 2792 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2793 { 2794 /* { 2795 syscallarg(int) fd; 2796 syscallarg(uid_t) uid; 2797 syscallarg(gid_t) gid; 2798 } */ 2799 int error; 2800 file_t *fp; 2801 2802 /* fd_getvnode() will use the descriptor for us */ 2803 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2804 return (error); 2805 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2806 l, 0); 2807 fd_putfile(SCARG(uap, fd)); 2808 return (error); 2809 } 2810 2811 /* 2812 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2813 */ 2814 /* ARGSUSED */ 2815 int 2816 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2817 { 2818 /* { 2819 syscallarg(int) fd; 2820 syscallarg(uid_t) uid; 2821 syscallarg(gid_t) gid; 2822 } */ 2823 int error; 2824 file_t *fp; 2825 2826 /* fd_getvnode() will use the descriptor for us */ 2827 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2828 return (error); 2829 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2830 l, 1); 2831 fd_putfile(SCARG(uap, fd)); 2832 return (error); 2833 } 2834 2835 /* 2836 * Set ownership given a path name; this version does not follow links. 2837 */ 2838 /* ARGSUSED */ 2839 int 2840 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2841 { 2842 /* { 2843 syscallarg(const char *) path; 2844 syscallarg(uid_t) uid; 2845 syscallarg(gid_t) gid; 2846 } */ 2847 int error; 2848 struct vnode *vp; 2849 2850 error = namei_simple_user(SCARG(uap, path), 2851 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2852 if (error != 0) 2853 return (error); 2854 2855 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2856 2857 vrele(vp); 2858 return (error); 2859 } 2860 2861 /* 2862 * Set ownership given a path name; this version does not follow links. 2863 * Provides POSIX/XPG semantics. 2864 */ 2865 /* ARGSUSED */ 2866 int 2867 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2868 { 2869 /* { 2870 syscallarg(const char *) path; 2871 syscallarg(uid_t) uid; 2872 syscallarg(gid_t) gid; 2873 } */ 2874 int error; 2875 struct vnode *vp; 2876 2877 error = namei_simple_user(SCARG(uap, path), 2878 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2879 if (error != 0) 2880 return (error); 2881 2882 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2883 2884 vrele(vp); 2885 return (error); 2886 } 2887 2888 /* 2889 * Common routine to set ownership given a vnode. 2890 */ 2891 static int 2892 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2893 int posix_semantics) 2894 { 2895 struct vattr vattr; 2896 mode_t newmode; 2897 int error; 2898 2899 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2900 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2901 goto out; 2902 2903 #define CHANGED(x) ((int)(x) != -1) 2904 newmode = vattr.va_mode; 2905 if (posix_semantics) { 2906 /* 2907 * POSIX/XPG semantics: if the caller is not the super-user, 2908 * clear set-user-id and set-group-id bits. Both POSIX and 2909 * the XPG consider the behaviour for calls by the super-user 2910 * implementation-defined; we leave the set-user-id and set- 2911 * group-id settings intact in that case. 2912 */ 2913 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2914 NULL) != 0) 2915 newmode &= ~(S_ISUID | S_ISGID); 2916 } else { 2917 /* 2918 * NetBSD semantics: when changing owner and/or group, 2919 * clear the respective bit(s). 2920 */ 2921 if (CHANGED(uid)) 2922 newmode &= ~S_ISUID; 2923 if (CHANGED(gid)) 2924 newmode &= ~S_ISGID; 2925 } 2926 /* Update va_mode iff altered. */ 2927 if (vattr.va_mode == newmode) 2928 newmode = VNOVAL; 2929 2930 vattr_null(&vattr); 2931 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2932 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2933 vattr.va_mode = newmode; 2934 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2935 #undef CHANGED 2936 2937 out: 2938 VOP_UNLOCK(vp); 2939 return (error); 2940 } 2941 2942 /* 2943 * Set the access and modification times given a path name; this 2944 * version follows links. 2945 */ 2946 /* ARGSUSED */ 2947 int 2948 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 2949 register_t *retval) 2950 { 2951 /* { 2952 syscallarg(const char *) path; 2953 syscallarg(const struct timeval *) tptr; 2954 } */ 2955 2956 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 2957 SCARG(uap, tptr), UIO_USERSPACE); 2958 } 2959 2960 /* 2961 * Set the access and modification times given a file descriptor. 2962 */ 2963 /* ARGSUSED */ 2964 int 2965 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 2966 register_t *retval) 2967 { 2968 /* { 2969 syscallarg(int) fd; 2970 syscallarg(const struct timeval *) tptr; 2971 } */ 2972 int error; 2973 file_t *fp; 2974 2975 /* fd_getvnode() will use the descriptor for us */ 2976 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2977 return (error); 2978 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 2979 UIO_USERSPACE); 2980 fd_putfile(SCARG(uap, fd)); 2981 return (error); 2982 } 2983 2984 /* 2985 * Set the access and modification times given a path name; this 2986 * version does not follow links. 2987 */ 2988 int 2989 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 2990 register_t *retval) 2991 { 2992 /* { 2993 syscallarg(const char *) path; 2994 syscallarg(const struct timeval *) tptr; 2995 } */ 2996 2997 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 2998 SCARG(uap, tptr), UIO_USERSPACE); 2999 } 3000 3001 /* 3002 * Common routine to set access and modification times given a vnode. 3003 */ 3004 int 3005 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3006 const struct timeval *tptr, enum uio_seg seg) 3007 { 3008 struct vattr vattr; 3009 int error, dorele = 0; 3010 namei_simple_flags_t sflags; 3011 3012 bool vanull, setbirthtime; 3013 struct timespec ts[2]; 3014 3015 /* 3016 * I have checked all callers and they pass either FOLLOW, 3017 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3018 * is 0. More to the point, they don't pass anything else. 3019 * Let's keep it that way at least until the namei interfaces 3020 * are fully sanitized. 3021 */ 3022 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3023 sflags = (flag == FOLLOW) ? 3024 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3025 3026 if (tptr == NULL) { 3027 vanull = true; 3028 nanotime(&ts[0]); 3029 ts[1] = ts[0]; 3030 } else { 3031 struct timeval tv[2]; 3032 3033 vanull = false; 3034 if (seg != UIO_SYSSPACE) { 3035 error = copyin(tptr, tv, sizeof (tv)); 3036 if (error != 0) 3037 return error; 3038 tptr = tv; 3039 } 3040 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3041 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3042 } 3043 3044 if (vp == NULL) { 3045 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3046 error = namei_simple_user(path, sflags, &vp); 3047 if (error != 0) 3048 return error; 3049 dorele = 1; 3050 } 3051 3052 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3053 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3054 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3055 vattr_null(&vattr); 3056 vattr.va_atime = ts[0]; 3057 vattr.va_mtime = ts[1]; 3058 if (setbirthtime) 3059 vattr.va_birthtime = ts[1]; 3060 if (vanull) 3061 vattr.va_vaflags |= VA_UTIMES_NULL; 3062 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3063 VOP_UNLOCK(vp); 3064 3065 if (dorele != 0) 3066 vrele(vp); 3067 3068 return error; 3069 } 3070 3071 /* 3072 * Truncate a file given its path name. 3073 */ 3074 /* ARGSUSED */ 3075 int 3076 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3077 { 3078 /* { 3079 syscallarg(const char *) path; 3080 syscallarg(int) pad; 3081 syscallarg(off_t) length; 3082 } */ 3083 struct vnode *vp; 3084 struct vattr vattr; 3085 int error; 3086 3087 error = namei_simple_user(SCARG(uap, path), 3088 NSM_FOLLOW_TRYEMULROOT, &vp); 3089 if (error != 0) 3090 return (error); 3091 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3092 if (vp->v_type == VDIR) 3093 error = EISDIR; 3094 else if ((error = vn_writechk(vp)) == 0 && 3095 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3096 vattr_null(&vattr); 3097 vattr.va_size = SCARG(uap, length); 3098 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3099 } 3100 vput(vp); 3101 return (error); 3102 } 3103 3104 /* 3105 * Truncate a file given a file descriptor. 3106 */ 3107 /* ARGSUSED */ 3108 int 3109 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3110 { 3111 /* { 3112 syscallarg(int) fd; 3113 syscallarg(int) pad; 3114 syscallarg(off_t) length; 3115 } */ 3116 struct vattr vattr; 3117 struct vnode *vp; 3118 file_t *fp; 3119 int error; 3120 3121 /* fd_getvnode() will use the descriptor for us */ 3122 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3123 return (error); 3124 if ((fp->f_flag & FWRITE) == 0) { 3125 error = EINVAL; 3126 goto out; 3127 } 3128 vp = fp->f_data; 3129 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3130 if (vp->v_type == VDIR) 3131 error = EISDIR; 3132 else if ((error = vn_writechk(vp)) == 0) { 3133 vattr_null(&vattr); 3134 vattr.va_size = SCARG(uap, length); 3135 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3136 } 3137 VOP_UNLOCK(vp); 3138 out: 3139 fd_putfile(SCARG(uap, fd)); 3140 return (error); 3141 } 3142 3143 /* 3144 * Sync an open file. 3145 */ 3146 /* ARGSUSED */ 3147 int 3148 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3149 { 3150 /* { 3151 syscallarg(int) fd; 3152 } */ 3153 struct vnode *vp; 3154 file_t *fp; 3155 int error; 3156 3157 /* fd_getvnode() will use the descriptor for us */ 3158 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3159 return (error); 3160 vp = fp->f_data; 3161 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3162 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3163 VOP_UNLOCK(vp); 3164 fd_putfile(SCARG(uap, fd)); 3165 return (error); 3166 } 3167 3168 /* 3169 * Sync a range of file data. API modeled after that found in AIX. 3170 * 3171 * FDATASYNC indicates that we need only save enough metadata to be able 3172 * to re-read the written data. Note we duplicate AIX's requirement that 3173 * the file be open for writing. 3174 */ 3175 /* ARGSUSED */ 3176 int 3177 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3178 { 3179 /* { 3180 syscallarg(int) fd; 3181 syscallarg(int) flags; 3182 syscallarg(off_t) start; 3183 syscallarg(off_t) length; 3184 } */ 3185 struct vnode *vp; 3186 file_t *fp; 3187 int flags, nflags; 3188 off_t s, e, len; 3189 int error; 3190 3191 /* fd_getvnode() will use the descriptor for us */ 3192 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3193 return (error); 3194 3195 if ((fp->f_flag & FWRITE) == 0) { 3196 error = EBADF; 3197 goto out; 3198 } 3199 3200 flags = SCARG(uap, flags); 3201 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3202 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3203 error = EINVAL; 3204 goto out; 3205 } 3206 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3207 if (flags & FDATASYNC) 3208 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3209 else 3210 nflags = FSYNC_WAIT; 3211 if (flags & FDISKSYNC) 3212 nflags |= FSYNC_CACHE; 3213 3214 len = SCARG(uap, length); 3215 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3216 if (len) { 3217 s = SCARG(uap, start); 3218 e = s + len; 3219 if (e < s) { 3220 error = EINVAL; 3221 goto out; 3222 } 3223 } else { 3224 e = 0; 3225 s = 0; 3226 } 3227 3228 vp = fp->f_data; 3229 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3230 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3231 VOP_UNLOCK(vp); 3232 out: 3233 fd_putfile(SCARG(uap, fd)); 3234 return (error); 3235 } 3236 3237 /* 3238 * Sync the data of an open file. 3239 */ 3240 /* ARGSUSED */ 3241 int 3242 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3243 { 3244 /* { 3245 syscallarg(int) fd; 3246 } */ 3247 struct vnode *vp; 3248 file_t *fp; 3249 int error; 3250 3251 /* fd_getvnode() will use the descriptor for us */ 3252 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3253 return (error); 3254 if ((fp->f_flag & FWRITE) == 0) { 3255 fd_putfile(SCARG(uap, fd)); 3256 return (EBADF); 3257 } 3258 vp = fp->f_data; 3259 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3260 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3261 VOP_UNLOCK(vp); 3262 fd_putfile(SCARG(uap, fd)); 3263 return (error); 3264 } 3265 3266 /* 3267 * Rename files, (standard) BSD semantics frontend. 3268 */ 3269 /* ARGSUSED */ 3270 int 3271 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3272 { 3273 /* { 3274 syscallarg(const char *) from; 3275 syscallarg(const char *) to; 3276 } */ 3277 3278 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3279 } 3280 3281 /* 3282 * Rename files, POSIX semantics frontend. 3283 */ 3284 /* ARGSUSED */ 3285 int 3286 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3287 { 3288 /* { 3289 syscallarg(const char *) from; 3290 syscallarg(const char *) to; 3291 } */ 3292 3293 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3294 } 3295 3296 /* 3297 * Rename files. Source and destination must either both be directories, 3298 * or both not be directories. If target is a directory, it must be empty. 3299 * If `from' and `to' refer to the same object, the value of the `retain' 3300 * argument is used to determine whether `from' will be 3301 * 3302 * (retain == 0) deleted unless `from' and `to' refer to the same 3303 * object in the file system's name space (BSD). 3304 * (retain == 1) always retained (POSIX). 3305 */ 3306 int 3307 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3308 { 3309 struct vnode *tvp, *fvp, *tdvp; 3310 struct nameidata fromnd, tond; 3311 struct mount *fs; 3312 struct lwp *l = curlwp; 3313 struct proc *p; 3314 uint32_t saveflag; 3315 int error; 3316 3317 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT | INRENAME, 3318 seg, from); 3319 if ((error = namei(&fromnd)) != 0) 3320 return (error); 3321 if (fromnd.ni_dvp != fromnd.ni_vp) 3322 VOP_UNLOCK(fromnd.ni_dvp); 3323 fvp = fromnd.ni_vp; 3324 3325 fs = fvp->v_mount; 3326 error = VFS_RENAMELOCK_ENTER(fs); 3327 if (error) { 3328 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3329 vrele(fromnd.ni_dvp); 3330 vrele(fvp); 3331 goto out1; 3332 } 3333 3334 /* 3335 * close, partially, yet another race - ideally we should only 3336 * go as far as getting fromnd.ni_dvp before getting the per-fs 3337 * lock, and then continue to get fromnd.ni_vp, but we can't do 3338 * that with namei as it stands. 3339 * 3340 * This still won't prevent rmdir from nuking fromnd.ni_vp 3341 * under us. The real fix is to get the locks in the right 3342 * order and do the lookups in the right places, but that's a 3343 * major rototill. 3344 * 3345 * Preserve the SAVESTART in cn_flags, because who knows what 3346 * might happen if we don't. 3347 * 3348 * Note: this logic (as well as this whole function) is cloned 3349 * in nfs_serv.c. Proceed accordingly. 3350 */ 3351 vrele(fvp); 3352 if ((fromnd.ni_cnd.cn_namelen == 1 && 3353 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3354 (fromnd.ni_cnd.cn_namelen == 2 && 3355 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3356 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3357 error = EINVAL; 3358 VFS_RENAMELOCK_EXIT(fs); 3359 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3360 vrele(fromnd.ni_dvp); 3361 goto out1; 3362 } 3363 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3364 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3365 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3366 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3367 fromnd.ni_cnd.cn_flags |= saveflag; 3368 if (error) { 3369 VOP_UNLOCK(fromnd.ni_dvp); 3370 VFS_RENAMELOCK_EXIT(fs); 3371 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3372 vrele(fromnd.ni_dvp); 3373 goto out1; 3374 } 3375 VOP_UNLOCK(fromnd.ni_vp); 3376 if (fromnd.ni_dvp != fromnd.ni_vp) 3377 VOP_UNLOCK(fromnd.ni_dvp); 3378 fvp = fromnd.ni_vp; 3379 3380 NDINIT(&tond, RENAME, 3381 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3382 | INRENAME | (fvp->v_type == VDIR ? CREATEDIR : 0), 3383 seg, to); 3384 if ((error = namei(&tond)) != 0) { 3385 VFS_RENAMELOCK_EXIT(fs); 3386 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3387 vrele(fromnd.ni_dvp); 3388 vrele(fvp); 3389 goto out1; 3390 } 3391 tdvp = tond.ni_dvp; 3392 tvp = tond.ni_vp; 3393 3394 if (tvp != NULL) { 3395 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3396 error = ENOTDIR; 3397 goto out; 3398 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3399 error = EISDIR; 3400 goto out; 3401 } 3402 } 3403 3404 if (fvp == tdvp) 3405 error = EINVAL; 3406 3407 /* 3408 * Source and destination refer to the same object. 3409 */ 3410 if (fvp == tvp) { 3411 if (retain) 3412 error = -1; 3413 else if (fromnd.ni_dvp == tdvp && 3414 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3415 !memcmp(fromnd.ni_cnd.cn_nameptr, 3416 tond.ni_cnd.cn_nameptr, 3417 fromnd.ni_cnd.cn_namelen)) 3418 error = -1; 3419 } 3420 3421 #if NVERIEXEC > 0 3422 if (!error) { 3423 char *f1, *f2; 3424 size_t f1_len; 3425 size_t f2_len; 3426 3427 f1_len = fromnd.ni_cnd.cn_namelen + 1; 3428 f1 = kmem_alloc(f1_len, KM_SLEEP); 3429 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len); 3430 3431 f2_len = tond.ni_cnd.cn_namelen + 1; 3432 f2 = kmem_alloc(f2_len, KM_SLEEP); 3433 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len); 3434 3435 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3436 3437 kmem_free(f1, f1_len); 3438 kmem_free(f2, f2_len); 3439 } 3440 #endif /* NVERIEXEC > 0 */ 3441 3442 out: 3443 p = l->l_proc; 3444 if (!error) { 3445 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3446 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3447 VFS_RENAMELOCK_EXIT(fs); 3448 } else { 3449 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3450 if (tdvp == tvp) 3451 vrele(tdvp); 3452 else 3453 vput(tdvp); 3454 if (tvp) 3455 vput(tvp); 3456 VFS_RENAMELOCK_EXIT(fs); 3457 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3458 vrele(fromnd.ni_dvp); 3459 vrele(fvp); 3460 } 3461 vrele(tond.ni_startdir); 3462 PNBUF_PUT(tond.ni_cnd.cn_pnbuf); 3463 out1: 3464 if (fromnd.ni_startdir) 3465 vrele(fromnd.ni_startdir); 3466 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf); 3467 return (error == -1 ? 0 : error); 3468 } 3469 3470 /* 3471 * Make a directory file. 3472 */ 3473 /* ARGSUSED */ 3474 int 3475 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3476 { 3477 /* { 3478 syscallarg(const char *) path; 3479 syscallarg(int) mode; 3480 } */ 3481 3482 return do_sys_mkdir(SCARG(uap, path), SCARG(uap, mode), UIO_USERSPACE); 3483 } 3484 3485 int 3486 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 3487 { 3488 struct proc *p = curlwp->l_proc; 3489 struct vnode *vp; 3490 struct vattr vattr; 3491 int error; 3492 struct nameidata nd; 3493 3494 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, 3495 seg, path); 3496 if ((error = namei(&nd)) != 0) 3497 return (error); 3498 vp = nd.ni_vp; 3499 if (vp != NULL) { 3500 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3501 if (nd.ni_dvp == vp) 3502 vrele(nd.ni_dvp); 3503 else 3504 vput(nd.ni_dvp); 3505 vrele(vp); 3506 return (EEXIST); 3507 } 3508 vattr_null(&vattr); 3509 vattr.va_type = VDIR; 3510 /* We will read cwdi->cwdi_cmask unlocked. */ 3511 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3512 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3513 if (!error) 3514 vput(nd.ni_vp); 3515 return (error); 3516 } 3517 3518 /* 3519 * Remove a directory file. 3520 */ 3521 /* ARGSUSED */ 3522 int 3523 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3524 { 3525 /* { 3526 syscallarg(const char *) path; 3527 } */ 3528 struct vnode *vp; 3529 int error; 3530 struct nameidata nd; 3531 3532 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 3533 SCARG(uap, path)); 3534 if ((error = namei(&nd)) != 0) 3535 return (error); 3536 vp = nd.ni_vp; 3537 if (vp->v_type != VDIR) { 3538 error = ENOTDIR; 3539 goto out; 3540 } 3541 /* 3542 * No rmdir "." please. 3543 */ 3544 if (nd.ni_dvp == vp) { 3545 error = EINVAL; 3546 goto out; 3547 } 3548 /* 3549 * The root of a mounted filesystem cannot be deleted. 3550 */ 3551 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3552 error = EBUSY; 3553 goto out; 3554 } 3555 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3556 return (error); 3557 3558 out: 3559 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3560 if (nd.ni_dvp == vp) 3561 vrele(nd.ni_dvp); 3562 else 3563 vput(nd.ni_dvp); 3564 vput(vp); 3565 return (error); 3566 } 3567 3568 /* 3569 * Read a block of directory entries in a file system independent format. 3570 */ 3571 int 3572 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3573 { 3574 /* { 3575 syscallarg(int) fd; 3576 syscallarg(char *) buf; 3577 syscallarg(size_t) count; 3578 } */ 3579 file_t *fp; 3580 int error, done; 3581 3582 /* fd_getvnode() will use the descriptor for us */ 3583 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3584 return (error); 3585 if ((fp->f_flag & FREAD) == 0) { 3586 error = EBADF; 3587 goto out; 3588 } 3589 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3590 SCARG(uap, count), &done, l, 0, 0); 3591 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3592 *retval = done; 3593 out: 3594 fd_putfile(SCARG(uap, fd)); 3595 return (error); 3596 } 3597 3598 /* 3599 * Set the mode mask for creation of filesystem nodes. 3600 */ 3601 int 3602 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3603 { 3604 /* { 3605 syscallarg(mode_t) newmask; 3606 } */ 3607 struct proc *p = l->l_proc; 3608 struct cwdinfo *cwdi; 3609 3610 /* 3611 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3612 * important is that we serialize changes to the mask. The 3613 * rw_exit() will issue a write memory barrier on our behalf, 3614 * and force the changes out to other CPUs (as it must use an 3615 * atomic operation, draining the local CPU's store buffers). 3616 */ 3617 cwdi = p->p_cwdi; 3618 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3619 *retval = cwdi->cwdi_cmask; 3620 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3621 rw_exit(&cwdi->cwdi_lock); 3622 3623 return (0); 3624 } 3625 3626 int 3627 dorevoke(struct vnode *vp, kauth_cred_t cred) 3628 { 3629 struct vattr vattr; 3630 int error; 3631 3632 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3633 return error; 3634 if (kauth_cred_geteuid(cred) == vattr.va_uid || 3635 (error = kauth_authorize_generic(cred, 3636 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3637 VOP_REVOKE(vp, REVOKEALL); 3638 return (error); 3639 } 3640 3641 /* 3642 * Void all references to file by ripping underlying filesystem 3643 * away from vnode. 3644 */ 3645 /* ARGSUSED */ 3646 int 3647 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3648 { 3649 /* { 3650 syscallarg(const char *) path; 3651 } */ 3652 struct vnode *vp; 3653 int error; 3654 3655 error = namei_simple_user(SCARG(uap, path), 3656 NSM_FOLLOW_TRYEMULROOT, &vp); 3657 if (error != 0) 3658 return (error); 3659 error = dorevoke(vp, l->l_cred); 3660 vrele(vp); 3661 return (error); 3662 } 3663