1 /* $NetBSD: vfs_syscalls.c,v 1.410 2010/11/30 10:30:02 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.410 2010/11/30 10:30:02 dholland Exp $"); 70 71 #ifdef _KERNEL_OPT 72 #include "opt_fileassoc.h" 73 #include "veriexec.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/filedesc.h> 80 #include <sys/kernel.h> 81 #include <sys/file.h> 82 #include <sys/stat.h> 83 #include <sys/vnode.h> 84 #include <sys/mount.h> 85 #include <sys/proc.h> 86 #include <sys/uio.h> 87 #include <sys/kmem.h> 88 #include <sys/dirent.h> 89 #include <sys/sysctl.h> 90 #include <sys/syscallargs.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/ktrace.h> 93 #ifdef FILEASSOC 94 #include <sys/fileassoc.h> 95 #endif /* FILEASSOC */ 96 #include <sys/verified_exec.h> 97 #include <sys/kauth.h> 98 #include <sys/atomic.h> 99 #include <sys/module.h> 100 #include <sys/buf.h> 101 102 #include <miscfs/genfs/genfs.h> 103 #include <miscfs/syncfs/syncfs.h> 104 #include <miscfs/specfs/specdev.h> 105 106 #include <nfs/rpcv2.h> 107 #include <nfs/nfsproto.h> 108 #include <nfs/nfs.h> 109 #include <nfs/nfs_var.h> 110 111 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 112 113 static int change_flags(struct vnode *, u_long, struct lwp *); 114 static int change_mode(struct vnode *, int, struct lwp *l); 115 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 116 117 void checkdirs(struct vnode *); 118 119 /* 120 * Virtual File System System Calls 121 */ 122 123 /* 124 * Mount a file system. 125 */ 126 127 /* 128 * This table is used to maintain compatibility with 4.3BSD 129 * and NetBSD 0.9 mount syscalls - and possibly other systems. 130 * Note, the order is important! 131 * 132 * Do not modify this table. It should only contain filesystems 133 * supported by NetBSD 0.9 and 4.3BSD. 134 */ 135 const char * const mountcompatnames[] = { 136 NULL, /* 0 = MOUNT_NONE */ 137 MOUNT_FFS, /* 1 = MOUNT_UFS */ 138 MOUNT_NFS, /* 2 */ 139 MOUNT_MFS, /* 3 */ 140 MOUNT_MSDOS, /* 4 */ 141 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 142 MOUNT_FDESC, /* 6 */ 143 MOUNT_KERNFS, /* 7 */ 144 NULL, /* 8 = MOUNT_DEVFS */ 145 MOUNT_AFS, /* 9 */ 146 }; 147 const int nmountcompatnames = sizeof(mountcompatnames) / 148 sizeof(mountcompatnames[0]); 149 150 static int 151 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 152 void *data, size_t *data_len) 153 { 154 struct mount *mp; 155 int error = 0, saved_flags; 156 157 mp = vp->v_mount; 158 saved_flags = mp->mnt_flag; 159 160 /* We can operate only on VV_ROOT nodes. */ 161 if ((vp->v_vflag & VV_ROOT) == 0) { 162 error = EINVAL; 163 goto out; 164 } 165 166 /* 167 * We only allow the filesystem to be reloaded if it 168 * is currently mounted read-only. Additionally, we 169 * prevent read-write to read-only downgrades. 170 */ 171 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 172 (mp->mnt_flag & MNT_RDONLY) == 0) { 173 error = EOPNOTSUPP; /* Needs translation */ 174 goto out; 175 } 176 177 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 178 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 179 if (error) 180 goto out; 181 182 if (vfs_busy(mp, NULL)) { 183 error = EPERM; 184 goto out; 185 } 186 187 mutex_enter(&mp->mnt_updating); 188 189 mp->mnt_flag &= ~MNT_OP_FLAGS; 190 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 191 192 /* 193 * Set the mount level flags. 194 */ 195 if (flags & MNT_RDONLY) 196 mp->mnt_flag |= MNT_RDONLY; 197 else if (mp->mnt_flag & MNT_RDONLY) 198 mp->mnt_iflag |= IMNT_WANTRDWR; 199 mp->mnt_flag &= 200 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 201 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 202 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 203 MNT_LOG); 204 mp->mnt_flag |= flags & 205 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 206 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 207 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 208 MNT_LOG | MNT_IGNORE); 209 210 error = VFS_MOUNT(mp, path, data, data_len); 211 212 if (error && data != NULL) { 213 int error2; 214 215 /* 216 * Update failed; let's try and see if it was an 217 * export request. For compat with 3.0 and earlier. 218 */ 219 error2 = vfs_hooks_reexport(mp, path, data); 220 221 /* 222 * Only update error code if the export request was 223 * understood but some problem occurred while 224 * processing it. 225 */ 226 if (error2 != EJUSTRETURN) 227 error = error2; 228 } 229 230 if (mp->mnt_iflag & IMNT_WANTRDWR) 231 mp->mnt_flag &= ~MNT_RDONLY; 232 if (error) 233 mp->mnt_flag = saved_flags; 234 mp->mnt_flag &= ~MNT_OP_FLAGS; 235 mp->mnt_iflag &= ~IMNT_WANTRDWR; 236 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 237 if (mp->mnt_syncer == NULL) 238 error = vfs_allocate_syncvnode(mp); 239 } else { 240 if (mp->mnt_syncer != NULL) 241 vfs_deallocate_syncvnode(mp); 242 } 243 mutex_exit(&mp->mnt_updating); 244 vfs_unbusy(mp, false, NULL); 245 246 out: 247 return (error); 248 } 249 250 static int 251 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 252 { 253 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 254 int error; 255 256 /* Copy file-system type from userspace. */ 257 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 258 if (error) { 259 /* 260 * Historically, filesystem types were identified by numbers. 261 * If we get an integer for the filesystem type instead of a 262 * string, we check to see if it matches one of the historic 263 * filesystem types. 264 */ 265 u_long fsindex = (u_long)fstype; 266 if (fsindex >= nmountcompatnames || 267 mountcompatnames[fsindex] == NULL) 268 return ENODEV; 269 strlcpy(fstypename, mountcompatnames[fsindex], 270 sizeof(fstypename)); 271 } 272 273 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 274 if (strcmp(fstypename, "ufs") == 0) 275 fstypename[0] = 'f'; 276 277 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 278 return 0; 279 280 /* If we can autoload a vfs module, try again */ 281 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 282 283 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 284 return 0; 285 286 return ENODEV; 287 } 288 289 static int 290 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 291 const char *path, int flags, void *data, size_t *data_len) 292 { 293 struct mount *mp; 294 struct vnode *vp = *vpp; 295 struct vattr va; 296 struct pathbuf *pb; 297 struct nameidata nd; 298 int error; 299 300 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 301 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 302 if (error) { 303 vfs_delref(vfsops); 304 return error; 305 } 306 307 /* Can't make a non-dir a mount-point (from here anyway). */ 308 if (vp->v_type != VDIR) { 309 vfs_delref(vfsops); 310 return ENOTDIR; 311 } 312 313 /* 314 * If the user is not root, ensure that they own the directory 315 * onto which we are attempting to mount. 316 */ 317 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 318 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 319 (error = kauth_authorize_generic(l->l_cred, 320 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 321 vfs_delref(vfsops); 322 return error; 323 } 324 325 if (flags & MNT_EXPORTED) { 326 vfs_delref(vfsops); 327 return EINVAL; 328 } 329 330 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 331 vfs_delref(vfsops); 332 return ENOMEM; 333 } 334 335 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 336 337 /* 338 * The underlying file system may refuse the mount for 339 * various reasons. Allow the user to force it to happen. 340 * 341 * Set the mount level flags. 342 */ 343 mp->mnt_flag = flags & 344 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 345 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 346 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 347 MNT_LOG | MNT_IGNORE | MNT_RDONLY); 348 349 mutex_enter(&mp->mnt_updating); 350 error = VFS_MOUNT(mp, path, data, data_len); 351 mp->mnt_flag &= ~MNT_OP_FLAGS; 352 353 if (error != 0) 354 goto err_unmounted; 355 356 /* 357 * Validate and prepare the mount point. 358 */ 359 error = pathbuf_copyin(path, &pb); 360 if (error != 0) { 361 goto err_mounted; 362 } 363 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 364 error = namei(&nd); 365 pathbuf_destroy(pb); 366 if (error != 0) { 367 goto err_mounted; 368 } 369 if (nd.ni_vp != vp) { 370 vput(nd.ni_vp); 371 error = EINVAL; 372 goto err_mounted; 373 } 374 if (vp->v_mountedhere != NULL) { 375 vput(nd.ni_vp); 376 error = EBUSY; 377 goto err_mounted; 378 } 379 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 380 if (error != 0) { 381 vput(nd.ni_vp); 382 goto err_mounted; 383 } 384 385 /* 386 * Put the new filesystem on the mount list after root. 387 */ 388 cache_purge(vp); 389 mp->mnt_iflag &= ~IMNT_WANTRDWR; 390 391 mutex_enter(&mountlist_lock); 392 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 393 mutex_exit(&mountlist_lock); 394 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 395 error = vfs_allocate_syncvnode(mp); 396 if (error == 0) 397 vp->v_mountedhere = mp; 398 vput(nd.ni_vp); 399 if (error != 0) 400 goto err_onmountlist; 401 402 checkdirs(vp); 403 mutex_exit(&mp->mnt_updating); 404 405 /* Hold an additional reference to the mount across VFS_START(). */ 406 vfs_unbusy(mp, true, NULL); 407 (void) VFS_STATVFS(mp, &mp->mnt_stat); 408 error = VFS_START(mp, 0); 409 if (error) 410 vrele(vp); 411 /* Drop reference held for VFS_START(). */ 412 vfs_destroy(mp); 413 *vpp = NULL; 414 return error; 415 416 err_onmountlist: 417 mutex_enter(&mountlist_lock); 418 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 419 mp->mnt_iflag |= IMNT_GONE; 420 mutex_exit(&mountlist_lock); 421 422 err_mounted: 423 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 424 panic("Unmounting fresh file system failed"); 425 426 err_unmounted: 427 vp->v_mountedhere = NULL; 428 mutex_exit(&mp->mnt_updating); 429 vfs_unbusy(mp, false, NULL); 430 vfs_destroy(mp); 431 432 return error; 433 } 434 435 static int 436 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 437 void *data, size_t *data_len) 438 { 439 struct mount *mp; 440 int error; 441 442 /* If MNT_GETARGS is specified, it should be the only flag. */ 443 if (flags & ~MNT_GETARGS) 444 return EINVAL; 445 446 mp = vp->v_mount; 447 448 /* XXX: probably some notion of "can see" here if we want isolation. */ 449 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 450 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 451 if (error) 452 return error; 453 454 if ((vp->v_vflag & VV_ROOT) == 0) 455 return EINVAL; 456 457 if (vfs_busy(mp, NULL)) 458 return EPERM; 459 460 mutex_enter(&mp->mnt_updating); 461 mp->mnt_flag &= ~MNT_OP_FLAGS; 462 mp->mnt_flag |= MNT_GETARGS; 463 error = VFS_MOUNT(mp, path, data, data_len); 464 mp->mnt_flag &= ~MNT_OP_FLAGS; 465 mutex_exit(&mp->mnt_updating); 466 467 vfs_unbusy(mp, false, NULL); 468 return (error); 469 } 470 471 int 472 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 473 { 474 /* { 475 syscallarg(const char *) type; 476 syscallarg(const char *) path; 477 syscallarg(int) flags; 478 syscallarg(void *) data; 479 syscallarg(size_t) data_len; 480 } */ 481 482 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 483 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 484 SCARG(uap, data_len), retval); 485 } 486 487 int 488 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 489 const char *path, int flags, void *data, enum uio_seg data_seg, 490 size_t data_len, register_t *retval) 491 { 492 struct vnode *vp; 493 void *data_buf = data; 494 bool vfsopsrele = false; 495 int error; 496 497 /* XXX: The calling convention of this routine is totally bizarre */ 498 if (vfsops) 499 vfsopsrele = true; 500 501 /* 502 * Get vnode to be covered 503 */ 504 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 505 if (error != 0) { 506 vp = NULL; 507 goto done; 508 } 509 510 if (vfsops == NULL) { 511 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 512 vfsops = vp->v_mount->mnt_op; 513 } else { 514 /* 'type' is userspace */ 515 error = mount_get_vfsops(type, &vfsops); 516 if (error != 0) 517 goto done; 518 vfsopsrele = true; 519 } 520 } 521 522 if (data != NULL && data_seg == UIO_USERSPACE) { 523 if (data_len == 0) { 524 /* No length supplied, use default for filesystem */ 525 data_len = vfsops->vfs_min_mount_data; 526 if (data_len > VFS_MAX_MOUNT_DATA) { 527 error = EINVAL; 528 goto done; 529 } 530 /* 531 * Hopefully a longer buffer won't make copyin() fail. 532 * For compatibility with 3.0 and earlier. 533 */ 534 if (flags & MNT_UPDATE 535 && data_len < sizeof (struct mnt_export_args30)) 536 data_len = sizeof (struct mnt_export_args30); 537 } 538 data_buf = kmem_alloc(data_len, KM_SLEEP); 539 540 /* NFS needs the buffer even for mnt_getargs .... */ 541 error = copyin(data, data_buf, data_len); 542 if (error != 0) 543 goto done; 544 } 545 546 if (flags & MNT_GETARGS) { 547 if (data_len == 0) { 548 error = EINVAL; 549 goto done; 550 } 551 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 552 if (error != 0) 553 goto done; 554 if (data_seg == UIO_USERSPACE) 555 error = copyout(data_buf, data, data_len); 556 *retval = data_len; 557 } else if (flags & MNT_UPDATE) { 558 error = mount_update(l, vp, path, flags, data_buf, &data_len); 559 } else { 560 /* Locking is handled internally in mount_domount(). */ 561 KASSERT(vfsopsrele == true); 562 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 563 &data_len); 564 vfsopsrele = false; 565 } 566 567 done: 568 if (vfsopsrele) 569 vfs_delref(vfsops); 570 if (vp != NULL) { 571 vrele(vp); 572 } 573 if (data_buf != data) 574 kmem_free(data_buf, data_len); 575 return (error); 576 } 577 578 /* 579 * Scan all active processes to see if any of them have a current 580 * or root directory onto which the new filesystem has just been 581 * mounted. If so, replace them with the new mount point. 582 */ 583 void 584 checkdirs(struct vnode *olddp) 585 { 586 struct cwdinfo *cwdi; 587 struct vnode *newdp, *rele1, *rele2; 588 struct proc *p; 589 bool retry; 590 591 if (olddp->v_usecount == 1) 592 return; 593 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 594 panic("mount: lost mount"); 595 596 do { 597 retry = false; 598 mutex_enter(proc_lock); 599 PROCLIST_FOREACH(p, &allproc) { 600 if ((cwdi = p->p_cwdi) == NULL) 601 continue; 602 /* 603 * Can't change to the old directory any more, 604 * so even if we see a stale value it's not a 605 * problem. 606 */ 607 if (cwdi->cwdi_cdir != olddp && 608 cwdi->cwdi_rdir != olddp) 609 continue; 610 retry = true; 611 rele1 = NULL; 612 rele2 = NULL; 613 atomic_inc_uint(&cwdi->cwdi_refcnt); 614 mutex_exit(proc_lock); 615 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 616 if (cwdi->cwdi_cdir == olddp) { 617 rele1 = cwdi->cwdi_cdir; 618 vref(newdp); 619 cwdi->cwdi_cdir = newdp; 620 } 621 if (cwdi->cwdi_rdir == olddp) { 622 rele2 = cwdi->cwdi_rdir; 623 vref(newdp); 624 cwdi->cwdi_rdir = newdp; 625 } 626 rw_exit(&cwdi->cwdi_lock); 627 cwdfree(cwdi); 628 if (rele1 != NULL) 629 vrele(rele1); 630 if (rele2 != NULL) 631 vrele(rele2); 632 mutex_enter(proc_lock); 633 break; 634 } 635 mutex_exit(proc_lock); 636 } while (retry); 637 638 if (rootvnode == olddp) { 639 vrele(rootvnode); 640 vref(newdp); 641 rootvnode = newdp; 642 } 643 vput(newdp); 644 } 645 646 /* 647 * Unmount a file system. 648 * 649 * Note: unmount takes a path to the vnode mounted on as argument, 650 * not special file (as before). 651 */ 652 /* ARGSUSED */ 653 int 654 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 655 { 656 /* { 657 syscallarg(const char *) path; 658 syscallarg(int) flags; 659 } */ 660 struct vnode *vp; 661 struct mount *mp; 662 int error; 663 struct pathbuf *pb; 664 struct nameidata nd; 665 666 error = pathbuf_copyin(SCARG(uap, path), &pb); 667 if (error) { 668 return error; 669 } 670 671 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 672 if ((error = namei(&nd)) != 0) { 673 pathbuf_destroy(pb); 674 return error; 675 } 676 vp = nd.ni_vp; 677 pathbuf_destroy(pb); 678 679 mp = vp->v_mount; 680 atomic_inc_uint(&mp->mnt_refcnt); 681 VOP_UNLOCK(vp); 682 683 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 684 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 685 if (error) { 686 vrele(vp); 687 vfs_destroy(mp); 688 return (error); 689 } 690 691 /* 692 * Don't allow unmounting the root file system. 693 */ 694 if (mp->mnt_flag & MNT_ROOTFS) { 695 vrele(vp); 696 vfs_destroy(mp); 697 return (EINVAL); 698 } 699 700 /* 701 * Must be the root of the filesystem 702 */ 703 if ((vp->v_vflag & VV_ROOT) == 0) { 704 vrele(vp); 705 vfs_destroy(mp); 706 return (EINVAL); 707 } 708 709 vrele(vp); 710 error = dounmount(mp, SCARG(uap, flags), l); 711 vfs_destroy(mp); 712 return error; 713 } 714 715 /* 716 * Do the actual file system unmount. File system is assumed to have 717 * been locked by the caller. 718 * 719 * => Caller hold reference to the mount, explicitly for dounmount(). 720 */ 721 int 722 dounmount(struct mount *mp, int flags, struct lwp *l) 723 { 724 struct vnode *coveredvp; 725 int error; 726 int async; 727 int used_syncer; 728 729 #if NVERIEXEC > 0 730 error = veriexec_unmountchk(mp); 731 if (error) 732 return (error); 733 #endif /* NVERIEXEC > 0 */ 734 735 /* 736 * XXX Freeze syncer. Must do this before locking the 737 * mount point. See dounmount() for details. 738 */ 739 mutex_enter(&syncer_mutex); 740 rw_enter(&mp->mnt_unmounting, RW_WRITER); 741 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 742 rw_exit(&mp->mnt_unmounting); 743 mutex_exit(&syncer_mutex); 744 return ENOENT; 745 } 746 747 used_syncer = (mp->mnt_syncer != NULL); 748 749 /* 750 * XXX Syncer must be frozen when we get here. This should really 751 * be done on a per-mountpoint basis, but the syncer doesn't work 752 * like that. 753 * 754 * The caller of dounmount() must acquire syncer_mutex because 755 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 756 * order, and we must preserve that order to avoid deadlock. 757 * 758 * So, if the file system did not use the syncer, now is 759 * the time to release the syncer_mutex. 760 */ 761 if (used_syncer == 0) 762 mutex_exit(&syncer_mutex); 763 764 mp->mnt_iflag |= IMNT_UNMOUNT; 765 async = mp->mnt_flag & MNT_ASYNC; 766 mp->mnt_flag &= ~MNT_ASYNC; 767 cache_purgevfs(mp); /* remove cache entries for this file sys */ 768 if (mp->mnt_syncer != NULL) 769 vfs_deallocate_syncvnode(mp); 770 error = 0; 771 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 772 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 773 } 774 vfs_scrubvnlist(mp); 775 if (error == 0 || (flags & MNT_FORCE)) 776 error = VFS_UNMOUNT(mp, flags); 777 if (error) { 778 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 779 (void) vfs_allocate_syncvnode(mp); 780 mp->mnt_iflag &= ~IMNT_UNMOUNT; 781 mp->mnt_flag |= async; 782 rw_exit(&mp->mnt_unmounting); 783 if (used_syncer) 784 mutex_exit(&syncer_mutex); 785 return (error); 786 } 787 vfs_scrubvnlist(mp); 788 mutex_enter(&mountlist_lock); 789 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 790 coveredvp->v_mountedhere = NULL; 791 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 792 mp->mnt_iflag |= IMNT_GONE; 793 mutex_exit(&mountlist_lock); 794 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 795 panic("unmount: dangling vnode"); 796 if (used_syncer) 797 mutex_exit(&syncer_mutex); 798 vfs_hooks_unmount(mp); 799 rw_exit(&mp->mnt_unmounting); 800 vfs_destroy(mp); /* reference from mount() */ 801 if (coveredvp != NULLVP) 802 vrele(coveredvp); 803 return (0); 804 } 805 806 /* 807 * Sync each mounted filesystem. 808 */ 809 #ifdef DEBUG 810 int syncprt = 0; 811 struct ctldebug debug0 = { "syncprt", &syncprt }; 812 #endif 813 814 /* ARGSUSED */ 815 int 816 sys_sync(struct lwp *l, const void *v, register_t *retval) 817 { 818 struct mount *mp, *nmp; 819 int asyncflag; 820 821 if (l == NULL) 822 l = &lwp0; 823 824 mutex_enter(&mountlist_lock); 825 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 826 mp = nmp) { 827 if (vfs_busy(mp, &nmp)) { 828 continue; 829 } 830 mutex_enter(&mp->mnt_updating); 831 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 832 asyncflag = mp->mnt_flag & MNT_ASYNC; 833 mp->mnt_flag &= ~MNT_ASYNC; 834 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 835 if (asyncflag) 836 mp->mnt_flag |= MNT_ASYNC; 837 } 838 mutex_exit(&mp->mnt_updating); 839 vfs_unbusy(mp, false, &nmp); 840 } 841 mutex_exit(&mountlist_lock); 842 #ifdef DEBUG 843 if (syncprt) 844 vfs_bufstats(); 845 #endif /* DEBUG */ 846 return (0); 847 } 848 849 /* 850 * Change filesystem quotas. 851 */ 852 /* ARGSUSED */ 853 int 854 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 855 { 856 /* { 857 syscallarg(const char *) path; 858 syscallarg(int) cmd; 859 syscallarg(int) uid; 860 syscallarg(void *) arg; 861 } */ 862 struct mount *mp; 863 int error; 864 struct vnode *vp; 865 866 error = namei_simple_user(SCARG(uap, path), 867 NSM_FOLLOW_TRYEMULROOT, &vp); 868 if (error != 0) 869 return (error); 870 mp = vp->v_mount; 871 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 872 SCARG(uap, arg)); 873 vrele(vp); 874 return (error); 875 } 876 877 int 878 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 879 int root) 880 { 881 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 882 int error = 0; 883 884 /* 885 * If MNT_NOWAIT or MNT_LAZY is specified, do not 886 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 887 * overrides MNT_NOWAIT. 888 */ 889 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 890 (flags != MNT_WAIT && flags != 0)) { 891 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 892 goto done; 893 } 894 895 /* Get the filesystem stats now */ 896 memset(sp, 0, sizeof(*sp)); 897 if ((error = VFS_STATVFS(mp, sp)) != 0) { 898 return error; 899 } 900 901 if (cwdi->cwdi_rdir == NULL) 902 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 903 done: 904 if (cwdi->cwdi_rdir != NULL) { 905 size_t len; 906 char *bp; 907 char c; 908 char *path = PNBUF_GET(); 909 910 bp = path + MAXPATHLEN; 911 *--bp = '\0'; 912 rw_enter(&cwdi->cwdi_lock, RW_READER); 913 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 914 MAXPATHLEN / 2, 0, l); 915 rw_exit(&cwdi->cwdi_lock); 916 if (error) { 917 PNBUF_PUT(path); 918 return error; 919 } 920 len = strlen(bp); 921 if (len != 1) { 922 /* 923 * for mount points that are below our root, we can see 924 * them, so we fix up the pathname and return them. The 925 * rest we cannot see, so we don't allow viewing the 926 * data. 927 */ 928 if (strncmp(bp, sp->f_mntonname, len) == 0 && 929 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 930 (void)strlcpy(sp->f_mntonname, 931 c == '\0' ? "/" : &sp->f_mntonname[len], 932 sizeof(sp->f_mntonname)); 933 } else { 934 if (root) 935 (void)strlcpy(sp->f_mntonname, "/", 936 sizeof(sp->f_mntonname)); 937 else 938 error = EPERM; 939 } 940 } 941 PNBUF_PUT(path); 942 } 943 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 944 return error; 945 } 946 947 /* 948 * Get filesystem statistics by path. 949 */ 950 int 951 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 952 { 953 struct mount *mp; 954 int error; 955 struct vnode *vp; 956 957 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 958 if (error != 0) 959 return error; 960 mp = vp->v_mount; 961 error = dostatvfs(mp, sb, l, flags, 1); 962 vrele(vp); 963 return error; 964 } 965 966 /* ARGSUSED */ 967 int 968 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 969 { 970 /* { 971 syscallarg(const char *) path; 972 syscallarg(struct statvfs *) buf; 973 syscallarg(int) flags; 974 } */ 975 struct statvfs *sb; 976 int error; 977 978 sb = STATVFSBUF_GET(); 979 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 980 if (error == 0) 981 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 982 STATVFSBUF_PUT(sb); 983 return error; 984 } 985 986 /* 987 * Get filesystem statistics by fd. 988 */ 989 int 990 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 991 { 992 file_t *fp; 993 struct mount *mp; 994 int error; 995 996 /* fd_getvnode() will use the descriptor for us */ 997 if ((error = fd_getvnode(fd, &fp)) != 0) 998 return (error); 999 mp = ((struct vnode *)fp->f_data)->v_mount; 1000 error = dostatvfs(mp, sb, curlwp, flags, 1); 1001 fd_putfile(fd); 1002 return error; 1003 } 1004 1005 /* ARGSUSED */ 1006 int 1007 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1008 { 1009 /* { 1010 syscallarg(int) fd; 1011 syscallarg(struct statvfs *) buf; 1012 syscallarg(int) flags; 1013 } */ 1014 struct statvfs *sb; 1015 int error; 1016 1017 sb = STATVFSBUF_GET(); 1018 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1019 if (error == 0) 1020 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1021 STATVFSBUF_PUT(sb); 1022 return error; 1023 } 1024 1025 1026 /* 1027 * Get statistics on all filesystems. 1028 */ 1029 int 1030 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1031 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1032 register_t *retval) 1033 { 1034 int root = 0; 1035 struct proc *p = l->l_proc; 1036 struct mount *mp, *nmp; 1037 struct statvfs *sb; 1038 size_t count, maxcount; 1039 int error = 0; 1040 1041 sb = STATVFSBUF_GET(); 1042 maxcount = bufsize / entry_sz; 1043 mutex_enter(&mountlist_lock); 1044 count = 0; 1045 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1046 mp = nmp) { 1047 if (vfs_busy(mp, &nmp)) { 1048 continue; 1049 } 1050 if (sfsp && count < maxcount) { 1051 error = dostatvfs(mp, sb, l, flags, 0); 1052 if (error) { 1053 vfs_unbusy(mp, false, &nmp); 1054 error = 0; 1055 continue; 1056 } 1057 error = copyfn(sb, sfsp, entry_sz); 1058 if (error) { 1059 vfs_unbusy(mp, false, NULL); 1060 goto out; 1061 } 1062 sfsp = (char *)sfsp + entry_sz; 1063 root |= strcmp(sb->f_mntonname, "/") == 0; 1064 } 1065 count++; 1066 vfs_unbusy(mp, false, &nmp); 1067 } 1068 mutex_exit(&mountlist_lock); 1069 1070 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1071 /* 1072 * fake a root entry 1073 */ 1074 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1075 sb, l, flags, 1); 1076 if (error != 0) 1077 goto out; 1078 if (sfsp) { 1079 error = copyfn(sb, sfsp, entry_sz); 1080 if (error != 0) 1081 goto out; 1082 } 1083 count++; 1084 } 1085 if (sfsp && count > maxcount) 1086 *retval = maxcount; 1087 else 1088 *retval = count; 1089 out: 1090 STATVFSBUF_PUT(sb); 1091 return error; 1092 } 1093 1094 int 1095 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1096 { 1097 /* { 1098 syscallarg(struct statvfs *) buf; 1099 syscallarg(size_t) bufsize; 1100 syscallarg(int) flags; 1101 } */ 1102 1103 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1104 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1105 } 1106 1107 /* 1108 * Change current working directory to a given file descriptor. 1109 */ 1110 /* ARGSUSED */ 1111 int 1112 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1113 { 1114 /* { 1115 syscallarg(int) fd; 1116 } */ 1117 struct proc *p = l->l_proc; 1118 struct cwdinfo *cwdi; 1119 struct vnode *vp, *tdp; 1120 struct mount *mp; 1121 file_t *fp; 1122 int error, fd; 1123 1124 /* fd_getvnode() will use the descriptor for us */ 1125 fd = SCARG(uap, fd); 1126 if ((error = fd_getvnode(fd, &fp)) != 0) 1127 return (error); 1128 vp = fp->f_data; 1129 1130 vref(vp); 1131 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1132 if (vp->v_type != VDIR) 1133 error = ENOTDIR; 1134 else 1135 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1136 if (error) { 1137 vput(vp); 1138 goto out; 1139 } 1140 while ((mp = vp->v_mountedhere) != NULL) { 1141 error = vfs_busy(mp, NULL); 1142 vput(vp); 1143 if (error != 0) 1144 goto out; 1145 error = VFS_ROOT(mp, &tdp); 1146 vfs_unbusy(mp, false, NULL); 1147 if (error) 1148 goto out; 1149 vp = tdp; 1150 } 1151 VOP_UNLOCK(vp); 1152 1153 /* 1154 * Disallow changing to a directory not under the process's 1155 * current root directory (if there is one). 1156 */ 1157 cwdi = p->p_cwdi; 1158 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1159 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1160 vrele(vp); 1161 error = EPERM; /* operation not permitted */ 1162 } else { 1163 vrele(cwdi->cwdi_cdir); 1164 cwdi->cwdi_cdir = vp; 1165 } 1166 rw_exit(&cwdi->cwdi_lock); 1167 1168 out: 1169 fd_putfile(fd); 1170 return (error); 1171 } 1172 1173 /* 1174 * Change this process's notion of the root directory to a given file 1175 * descriptor. 1176 */ 1177 int 1178 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1179 { 1180 struct proc *p = l->l_proc; 1181 struct vnode *vp; 1182 file_t *fp; 1183 int error, fd = SCARG(uap, fd); 1184 1185 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1186 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1187 return error; 1188 /* fd_getvnode() will use the descriptor for us */ 1189 if ((error = fd_getvnode(fd, &fp)) != 0) 1190 return error; 1191 vp = fp->f_data; 1192 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1193 if (vp->v_type != VDIR) 1194 error = ENOTDIR; 1195 else 1196 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1197 VOP_UNLOCK(vp); 1198 if (error) 1199 goto out; 1200 vref(vp); 1201 1202 change_root(p->p_cwdi, vp, l); 1203 1204 out: 1205 fd_putfile(fd); 1206 return (error); 1207 } 1208 1209 /* 1210 * Change current working directory (``.''). 1211 */ 1212 /* ARGSUSED */ 1213 int 1214 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1215 { 1216 /* { 1217 syscallarg(const char *) path; 1218 } */ 1219 struct proc *p = l->l_proc; 1220 struct cwdinfo *cwdi; 1221 int error; 1222 struct vnode *vp; 1223 1224 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1225 &vp, l)) != 0) 1226 return (error); 1227 cwdi = p->p_cwdi; 1228 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1229 vrele(cwdi->cwdi_cdir); 1230 cwdi->cwdi_cdir = vp; 1231 rw_exit(&cwdi->cwdi_lock); 1232 return (0); 1233 } 1234 1235 /* 1236 * Change notion of root (``/'') directory. 1237 */ 1238 /* ARGSUSED */ 1239 int 1240 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1241 { 1242 /* { 1243 syscallarg(const char *) path; 1244 } */ 1245 struct proc *p = l->l_proc; 1246 int error; 1247 struct vnode *vp; 1248 1249 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1250 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1251 return (error); 1252 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1253 &vp, l)) != 0) 1254 return (error); 1255 1256 change_root(p->p_cwdi, vp, l); 1257 1258 return (0); 1259 } 1260 1261 /* 1262 * Common routine for chroot and fchroot. 1263 * NB: callers need to properly authorize the change root operation. 1264 */ 1265 void 1266 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1267 { 1268 1269 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1270 if (cwdi->cwdi_rdir != NULL) 1271 vrele(cwdi->cwdi_rdir); 1272 cwdi->cwdi_rdir = vp; 1273 1274 /* 1275 * Prevent escaping from chroot by putting the root under 1276 * the working directory. Silently chdir to / if we aren't 1277 * already there. 1278 */ 1279 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1280 /* 1281 * XXX would be more failsafe to change directory to a 1282 * deadfs node here instead 1283 */ 1284 vrele(cwdi->cwdi_cdir); 1285 vref(vp); 1286 cwdi->cwdi_cdir = vp; 1287 } 1288 rw_exit(&cwdi->cwdi_lock); 1289 } 1290 1291 /* 1292 * Common routine for chroot and chdir. 1293 * XXX "where" should be enum uio_seg 1294 */ 1295 int 1296 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1297 { 1298 struct pathbuf *pb; 1299 struct nameidata nd; 1300 int error; 1301 1302 error = pathbuf_maybe_copyin(path, where, &pb); 1303 if (error) { 1304 return error; 1305 } 1306 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1307 if ((error = namei(&nd)) != 0) { 1308 pathbuf_destroy(pb); 1309 return error; 1310 } 1311 *vpp = nd.ni_vp; 1312 pathbuf_destroy(pb); 1313 1314 if ((*vpp)->v_type != VDIR) 1315 error = ENOTDIR; 1316 else 1317 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1318 1319 if (error) 1320 vput(*vpp); 1321 else 1322 VOP_UNLOCK(*vpp); 1323 return (error); 1324 } 1325 1326 /* 1327 * Check permissions, allocate an open file structure, 1328 * and call the device open routine if any. 1329 */ 1330 int 1331 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1332 { 1333 /* { 1334 syscallarg(const char *) path; 1335 syscallarg(int) flags; 1336 syscallarg(int) mode; 1337 } */ 1338 struct proc *p = l->l_proc; 1339 struct cwdinfo *cwdi = p->p_cwdi; 1340 file_t *fp; 1341 struct vnode *vp; 1342 int flags, cmode; 1343 int type, indx, error; 1344 struct flock lf; 1345 struct pathbuf *pb; 1346 struct nameidata nd; 1347 1348 flags = FFLAGS(SCARG(uap, flags)); 1349 if ((flags & (FREAD | FWRITE)) == 0) 1350 return (EINVAL); 1351 1352 error = pathbuf_copyin(SCARG(uap, path), &pb); 1353 if (error) { 1354 return error; 1355 } 1356 1357 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1358 pathbuf_destroy(pb); 1359 return error; 1360 } 1361 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1362 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1363 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1364 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1365 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1366 fd_abort(p, fp, indx); 1367 if ((error == EDUPFD || error == EMOVEFD) && 1368 l->l_dupfd >= 0 && /* XXX from fdopen */ 1369 (error = 1370 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1371 *retval = indx; 1372 pathbuf_destroy(pb); 1373 return (0); 1374 } 1375 if (error == ERESTART) 1376 error = EINTR; 1377 pathbuf_destroy(pb); 1378 return (error); 1379 } 1380 1381 l->l_dupfd = 0; 1382 vp = nd.ni_vp; 1383 pathbuf_destroy(pb); 1384 1385 fp->f_flag = flags & FMASK; 1386 fp->f_type = DTYPE_VNODE; 1387 fp->f_ops = &vnops; 1388 fp->f_data = vp; 1389 if (flags & (O_EXLOCK | O_SHLOCK)) { 1390 lf.l_whence = SEEK_SET; 1391 lf.l_start = 0; 1392 lf.l_len = 0; 1393 if (flags & O_EXLOCK) 1394 lf.l_type = F_WRLCK; 1395 else 1396 lf.l_type = F_RDLCK; 1397 type = F_FLOCK; 1398 if ((flags & FNONBLOCK) == 0) 1399 type |= F_WAIT; 1400 VOP_UNLOCK(vp); 1401 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1402 if (error) { 1403 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1404 fd_abort(p, fp, indx); 1405 return (error); 1406 } 1407 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1408 atomic_or_uint(&fp->f_flag, FHASLOCK); 1409 } 1410 VOP_UNLOCK(vp); 1411 *retval = indx; 1412 fd_affix(p, fp, indx); 1413 return (0); 1414 } 1415 1416 static void 1417 vfs__fhfree(fhandle_t *fhp) 1418 { 1419 size_t fhsize; 1420 1421 if (fhp == NULL) { 1422 return; 1423 } 1424 fhsize = FHANDLE_SIZE(fhp); 1425 kmem_free(fhp, fhsize); 1426 } 1427 1428 /* 1429 * vfs_composefh: compose a filehandle. 1430 */ 1431 1432 int 1433 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1434 { 1435 struct mount *mp; 1436 struct fid *fidp; 1437 int error; 1438 size_t needfhsize; 1439 size_t fidsize; 1440 1441 mp = vp->v_mount; 1442 fidp = NULL; 1443 if (*fh_size < FHANDLE_SIZE_MIN) { 1444 fidsize = 0; 1445 } else { 1446 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1447 if (fhp != NULL) { 1448 memset(fhp, 0, *fh_size); 1449 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1450 fidp = &fhp->fh_fid; 1451 } 1452 } 1453 error = VFS_VPTOFH(vp, fidp, &fidsize); 1454 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1455 if (error == 0 && *fh_size < needfhsize) { 1456 error = E2BIG; 1457 } 1458 *fh_size = needfhsize; 1459 return error; 1460 } 1461 1462 int 1463 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1464 { 1465 struct mount *mp; 1466 fhandle_t *fhp; 1467 size_t fhsize; 1468 size_t fidsize; 1469 int error; 1470 1471 *fhpp = NULL; 1472 mp = vp->v_mount; 1473 fidsize = 0; 1474 error = VFS_VPTOFH(vp, NULL, &fidsize); 1475 KASSERT(error != 0); 1476 if (error != E2BIG) { 1477 goto out; 1478 } 1479 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1480 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1481 if (fhp == NULL) { 1482 error = ENOMEM; 1483 goto out; 1484 } 1485 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1486 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1487 if (error == 0) { 1488 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1489 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1490 *fhpp = fhp; 1491 } else { 1492 kmem_free(fhp, fhsize); 1493 } 1494 out: 1495 return error; 1496 } 1497 1498 void 1499 vfs_composefh_free(fhandle_t *fhp) 1500 { 1501 1502 vfs__fhfree(fhp); 1503 } 1504 1505 /* 1506 * vfs_fhtovp: lookup a vnode by a filehandle. 1507 */ 1508 1509 int 1510 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1511 { 1512 struct mount *mp; 1513 int error; 1514 1515 *vpp = NULL; 1516 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1517 if (mp == NULL) { 1518 error = ESTALE; 1519 goto out; 1520 } 1521 if (mp->mnt_op->vfs_fhtovp == NULL) { 1522 error = EOPNOTSUPP; 1523 goto out; 1524 } 1525 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1526 out: 1527 return error; 1528 } 1529 1530 /* 1531 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1532 * the needed size. 1533 */ 1534 1535 int 1536 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1537 { 1538 fhandle_t *fhp; 1539 int error; 1540 1541 *fhpp = NULL; 1542 if (fhsize > FHANDLE_SIZE_MAX) { 1543 return EINVAL; 1544 } 1545 if (fhsize < FHANDLE_SIZE_MIN) { 1546 return EINVAL; 1547 } 1548 again: 1549 fhp = kmem_alloc(fhsize, KM_SLEEP); 1550 if (fhp == NULL) { 1551 return ENOMEM; 1552 } 1553 error = copyin(ufhp, fhp, fhsize); 1554 if (error == 0) { 1555 /* XXX this check shouldn't be here */ 1556 if (FHANDLE_SIZE(fhp) == fhsize) { 1557 *fhpp = fhp; 1558 return 0; 1559 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1560 /* 1561 * a kludge for nfsv2 padded handles. 1562 */ 1563 size_t sz; 1564 1565 sz = FHANDLE_SIZE(fhp); 1566 kmem_free(fhp, fhsize); 1567 fhsize = sz; 1568 goto again; 1569 } else { 1570 /* 1571 * userland told us wrong size. 1572 */ 1573 error = EINVAL; 1574 } 1575 } 1576 kmem_free(fhp, fhsize); 1577 return error; 1578 } 1579 1580 void 1581 vfs_copyinfh_free(fhandle_t *fhp) 1582 { 1583 1584 vfs__fhfree(fhp); 1585 } 1586 1587 /* 1588 * Get file handle system call 1589 */ 1590 int 1591 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1592 { 1593 /* { 1594 syscallarg(char *) fname; 1595 syscallarg(fhandle_t *) fhp; 1596 syscallarg(size_t *) fh_size; 1597 } */ 1598 struct vnode *vp; 1599 fhandle_t *fh; 1600 int error; 1601 struct pathbuf *pb; 1602 struct nameidata nd; 1603 size_t sz; 1604 size_t usz; 1605 1606 /* 1607 * Must be super user 1608 */ 1609 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1610 0, NULL, NULL, NULL); 1611 if (error) 1612 return (error); 1613 1614 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1615 if (error) { 1616 return error; 1617 } 1618 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1619 error = namei(&nd); 1620 if (error) { 1621 pathbuf_destroy(pb); 1622 return error; 1623 } 1624 vp = nd.ni_vp; 1625 pathbuf_destroy(pb); 1626 1627 error = vfs_composefh_alloc(vp, &fh); 1628 vput(vp); 1629 if (error != 0) { 1630 goto out; 1631 } 1632 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1633 if (error != 0) { 1634 goto out; 1635 } 1636 sz = FHANDLE_SIZE(fh); 1637 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1638 if (error != 0) { 1639 goto out; 1640 } 1641 if (usz >= sz) { 1642 error = copyout(fh, SCARG(uap, fhp), sz); 1643 } else { 1644 error = E2BIG; 1645 } 1646 out: 1647 vfs_composefh_free(fh); 1648 return (error); 1649 } 1650 1651 /* 1652 * Open a file given a file handle. 1653 * 1654 * Check permissions, allocate an open file structure, 1655 * and call the device open routine if any. 1656 */ 1657 1658 int 1659 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1660 register_t *retval) 1661 { 1662 file_t *fp; 1663 struct vnode *vp = NULL; 1664 kauth_cred_t cred = l->l_cred; 1665 file_t *nfp; 1666 int type, indx, error=0; 1667 struct flock lf; 1668 struct vattr va; 1669 fhandle_t *fh; 1670 int flags; 1671 proc_t *p; 1672 1673 p = curproc; 1674 1675 /* 1676 * Must be super user 1677 */ 1678 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1679 0, NULL, NULL, NULL))) 1680 return (error); 1681 1682 flags = FFLAGS(oflags); 1683 if ((flags & (FREAD | FWRITE)) == 0) 1684 return (EINVAL); 1685 if ((flags & O_CREAT)) 1686 return (EINVAL); 1687 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1688 return (error); 1689 fp = nfp; 1690 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1691 if (error != 0) { 1692 goto bad; 1693 } 1694 error = vfs_fhtovp(fh, &vp); 1695 if (error != 0) { 1696 goto bad; 1697 } 1698 1699 /* Now do an effective vn_open */ 1700 1701 if (vp->v_type == VSOCK) { 1702 error = EOPNOTSUPP; 1703 goto bad; 1704 } 1705 error = vn_openchk(vp, cred, flags); 1706 if (error != 0) 1707 goto bad; 1708 if (flags & O_TRUNC) { 1709 VOP_UNLOCK(vp); /* XXX */ 1710 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1711 vattr_null(&va); 1712 va.va_size = 0; 1713 error = VOP_SETATTR(vp, &va, cred); 1714 if (error) 1715 goto bad; 1716 } 1717 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1718 goto bad; 1719 if (flags & FWRITE) { 1720 mutex_enter(&vp->v_interlock); 1721 vp->v_writecount++; 1722 mutex_exit(&vp->v_interlock); 1723 } 1724 1725 /* done with modified vn_open, now finish what sys_open does. */ 1726 1727 fp->f_flag = flags & FMASK; 1728 fp->f_type = DTYPE_VNODE; 1729 fp->f_ops = &vnops; 1730 fp->f_data = vp; 1731 if (flags & (O_EXLOCK | O_SHLOCK)) { 1732 lf.l_whence = SEEK_SET; 1733 lf.l_start = 0; 1734 lf.l_len = 0; 1735 if (flags & O_EXLOCK) 1736 lf.l_type = F_WRLCK; 1737 else 1738 lf.l_type = F_RDLCK; 1739 type = F_FLOCK; 1740 if ((flags & FNONBLOCK) == 0) 1741 type |= F_WAIT; 1742 VOP_UNLOCK(vp); 1743 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1744 if (error) { 1745 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1746 fd_abort(p, fp, indx); 1747 return (error); 1748 } 1749 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1750 atomic_or_uint(&fp->f_flag, FHASLOCK); 1751 } 1752 VOP_UNLOCK(vp); 1753 *retval = indx; 1754 fd_affix(p, fp, indx); 1755 vfs_copyinfh_free(fh); 1756 return (0); 1757 1758 bad: 1759 fd_abort(p, fp, indx); 1760 if (vp != NULL) 1761 vput(vp); 1762 vfs_copyinfh_free(fh); 1763 return (error); 1764 } 1765 1766 int 1767 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1768 { 1769 /* { 1770 syscallarg(const void *) fhp; 1771 syscallarg(size_t) fh_size; 1772 syscallarg(int) flags; 1773 } */ 1774 1775 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1776 SCARG(uap, flags), retval); 1777 } 1778 1779 int 1780 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1781 { 1782 int error; 1783 fhandle_t *fh; 1784 struct vnode *vp; 1785 1786 /* 1787 * Must be super user 1788 */ 1789 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1790 0, NULL, NULL, NULL))) 1791 return (error); 1792 1793 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1794 if (error != 0) 1795 return error; 1796 1797 error = vfs_fhtovp(fh, &vp); 1798 vfs_copyinfh_free(fh); 1799 if (error != 0) 1800 return error; 1801 1802 error = vn_stat(vp, sb); 1803 vput(vp); 1804 return error; 1805 } 1806 1807 1808 /* ARGSUSED */ 1809 int 1810 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 1811 { 1812 /* { 1813 syscallarg(const void *) fhp; 1814 syscallarg(size_t) fh_size; 1815 syscallarg(struct stat *) sb; 1816 } */ 1817 struct stat sb; 1818 int error; 1819 1820 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1821 if (error) 1822 return error; 1823 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1824 } 1825 1826 int 1827 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1828 int flags) 1829 { 1830 fhandle_t *fh; 1831 struct mount *mp; 1832 struct vnode *vp; 1833 int error; 1834 1835 /* 1836 * Must be super user 1837 */ 1838 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1839 0, NULL, NULL, NULL))) 1840 return error; 1841 1842 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1843 if (error != 0) 1844 return error; 1845 1846 error = vfs_fhtovp(fh, &vp); 1847 vfs_copyinfh_free(fh); 1848 if (error != 0) 1849 return error; 1850 1851 mp = vp->v_mount; 1852 error = dostatvfs(mp, sb, l, flags, 1); 1853 vput(vp); 1854 return error; 1855 } 1856 1857 /* ARGSUSED */ 1858 int 1859 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1860 { 1861 /* { 1862 syscallarg(const void *) fhp; 1863 syscallarg(size_t) fh_size; 1864 syscallarg(struct statvfs *) buf; 1865 syscallarg(int) flags; 1866 } */ 1867 struct statvfs *sb = STATVFSBUF_GET(); 1868 int error; 1869 1870 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1871 SCARG(uap, flags)); 1872 if (error == 0) 1873 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1874 STATVFSBUF_PUT(sb); 1875 return error; 1876 } 1877 1878 /* 1879 * Create a special file. 1880 */ 1881 /* ARGSUSED */ 1882 int 1883 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 1884 register_t *retval) 1885 { 1886 /* { 1887 syscallarg(const char *) path; 1888 syscallarg(mode_t) mode; 1889 syscallarg(dev_t) dev; 1890 } */ 1891 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 1892 SCARG(uap, dev), retval, UIO_USERSPACE); 1893 } 1894 1895 int 1896 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 1897 register_t *retval, enum uio_seg seg) 1898 { 1899 struct proc *p = l->l_proc; 1900 struct vnode *vp; 1901 struct vattr vattr; 1902 int error, optype; 1903 struct pathbuf *pb; 1904 struct nameidata nd; 1905 const char *pathstring; 1906 1907 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1908 0, NULL, NULL, NULL)) != 0) 1909 return (error); 1910 1911 optype = VOP_MKNOD_DESCOFFSET; 1912 1913 error = pathbuf_maybe_copyin(pathname, seg, &pb); 1914 if (error) { 1915 return error; 1916 } 1917 pathstring = pathbuf_stringcopy_get(pb); 1918 if (pathstring == NULL) { 1919 pathbuf_destroy(pb); 1920 return ENOMEM; 1921 } 1922 1923 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 1924 if ((error = namei(&nd)) != 0) 1925 goto out; 1926 vp = nd.ni_vp; 1927 1928 if (vp != NULL) 1929 error = EEXIST; 1930 else { 1931 vattr_null(&vattr); 1932 /* We will read cwdi->cwdi_cmask unlocked. */ 1933 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1934 vattr.va_rdev = dev; 1935 1936 switch (mode & S_IFMT) { 1937 case S_IFMT: /* used by badsect to flag bad sectors */ 1938 vattr.va_type = VBAD; 1939 break; 1940 case S_IFCHR: 1941 vattr.va_type = VCHR; 1942 break; 1943 case S_IFBLK: 1944 vattr.va_type = VBLK; 1945 break; 1946 case S_IFWHT: 1947 optype = VOP_WHITEOUT_DESCOFFSET; 1948 break; 1949 case S_IFREG: 1950 #if NVERIEXEC > 0 1951 error = veriexec_openchk(l, nd.ni_vp, pathstring, 1952 O_CREAT); 1953 #endif /* NVERIEXEC > 0 */ 1954 vattr.va_type = VREG; 1955 vattr.va_rdev = VNOVAL; 1956 optype = VOP_CREATE_DESCOFFSET; 1957 break; 1958 default: 1959 error = EINVAL; 1960 break; 1961 } 1962 } 1963 if (!error) { 1964 switch (optype) { 1965 case VOP_WHITEOUT_DESCOFFSET: 1966 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1967 if (error) 1968 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1969 vput(nd.ni_dvp); 1970 break; 1971 1972 case VOP_MKNOD_DESCOFFSET: 1973 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1974 &nd.ni_cnd, &vattr); 1975 if (error == 0) 1976 vput(nd.ni_vp); 1977 break; 1978 1979 case VOP_CREATE_DESCOFFSET: 1980 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1981 &nd.ni_cnd, &vattr); 1982 if (error == 0) 1983 vput(nd.ni_vp); 1984 break; 1985 } 1986 } else { 1987 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1988 if (nd.ni_dvp == vp) 1989 vrele(nd.ni_dvp); 1990 else 1991 vput(nd.ni_dvp); 1992 if (vp) 1993 vrele(vp); 1994 } 1995 out: 1996 pathbuf_stringcopy_put(pb, pathstring); 1997 pathbuf_destroy(pb); 1998 return (error); 1999 } 2000 2001 /* 2002 * Create a named pipe. 2003 */ 2004 /* ARGSUSED */ 2005 int 2006 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2007 { 2008 /* { 2009 syscallarg(const char *) path; 2010 syscallarg(int) mode; 2011 } */ 2012 struct proc *p = l->l_proc; 2013 struct vattr vattr; 2014 int error; 2015 struct pathbuf *pb; 2016 struct nameidata nd; 2017 2018 error = pathbuf_copyin(SCARG(uap, path), &pb); 2019 if (error) { 2020 return error; 2021 } 2022 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2023 if ((error = namei(&nd)) != 0) { 2024 pathbuf_destroy(pb); 2025 return error; 2026 } 2027 if (nd.ni_vp != NULL) { 2028 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2029 if (nd.ni_dvp == nd.ni_vp) 2030 vrele(nd.ni_dvp); 2031 else 2032 vput(nd.ni_dvp); 2033 vrele(nd.ni_vp); 2034 pathbuf_destroy(pb); 2035 return (EEXIST); 2036 } 2037 vattr_null(&vattr); 2038 vattr.va_type = VFIFO; 2039 /* We will read cwdi->cwdi_cmask unlocked. */ 2040 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2041 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2042 if (error == 0) 2043 vput(nd.ni_vp); 2044 pathbuf_destroy(pb); 2045 return (error); 2046 } 2047 2048 /* 2049 * Make a hard file link. 2050 */ 2051 /* ARGSUSED */ 2052 int 2053 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2054 { 2055 /* { 2056 syscallarg(const char *) path; 2057 syscallarg(const char *) link; 2058 } */ 2059 struct vnode *vp; 2060 struct pathbuf *linkpb; 2061 struct nameidata nd; 2062 int error; 2063 2064 error = namei_simple_user(SCARG(uap, path), 2065 NSM_FOLLOW_TRYEMULROOT, &vp); 2066 if (error != 0) 2067 return (error); 2068 error = pathbuf_copyin(SCARG(uap, link), &linkpb); 2069 if (error) { 2070 goto out1; 2071 } 2072 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2073 if ((error = namei(&nd)) != 0) 2074 goto out2; 2075 if (nd.ni_vp) { 2076 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2077 if (nd.ni_dvp == nd.ni_vp) 2078 vrele(nd.ni_dvp); 2079 else 2080 vput(nd.ni_dvp); 2081 vrele(nd.ni_vp); 2082 error = EEXIST; 2083 goto out2; 2084 } 2085 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2086 out2: 2087 pathbuf_destroy(linkpb); 2088 out1: 2089 vrele(vp); 2090 return (error); 2091 } 2092 2093 int 2094 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2095 { 2096 struct proc *p = curproc; 2097 struct vattr vattr; 2098 char *path; 2099 int error; 2100 struct pathbuf *linkpb; 2101 struct nameidata nd; 2102 2103 path = PNBUF_GET(); 2104 if (seg == UIO_USERSPACE) { 2105 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2106 goto out1; 2107 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2108 goto out1; 2109 } else { 2110 KASSERT(strlen(patharg) < MAXPATHLEN); 2111 strcpy(path, patharg); 2112 linkpb = pathbuf_create(link); 2113 if (linkpb == NULL) { 2114 error = ENOMEM; 2115 goto out1; 2116 } 2117 } 2118 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2119 if ((error = namei(&nd)) != 0) 2120 goto out2; 2121 if (nd.ni_vp) { 2122 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2123 if (nd.ni_dvp == nd.ni_vp) 2124 vrele(nd.ni_dvp); 2125 else 2126 vput(nd.ni_dvp); 2127 vrele(nd.ni_vp); 2128 error = EEXIST; 2129 goto out2; 2130 } 2131 vattr_null(&vattr); 2132 vattr.va_type = VLNK; 2133 /* We will read cwdi->cwdi_cmask unlocked. */ 2134 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2135 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2136 if (error == 0) 2137 vput(nd.ni_vp); 2138 out2: 2139 pathbuf_destroy(linkpb); 2140 out1: 2141 PNBUF_PUT(path); 2142 return (error); 2143 } 2144 2145 /* 2146 * Make a symbolic link. 2147 */ 2148 /* ARGSUSED */ 2149 int 2150 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2151 { 2152 /* { 2153 syscallarg(const char *) path; 2154 syscallarg(const char *) link; 2155 } */ 2156 2157 return do_sys_symlink(SCARG(uap, path), SCARG(uap, link), 2158 UIO_USERSPACE); 2159 } 2160 2161 /* 2162 * Delete a whiteout from the filesystem. 2163 */ 2164 /* ARGSUSED */ 2165 int 2166 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2167 { 2168 /* { 2169 syscallarg(const char *) path; 2170 } */ 2171 int error; 2172 struct pathbuf *pb; 2173 struct nameidata nd; 2174 2175 error = pathbuf_copyin(SCARG(uap, path), &pb); 2176 if (error) { 2177 return error; 2178 } 2179 2180 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2181 error = namei(&nd); 2182 if (error) { 2183 pathbuf_destroy(pb); 2184 return (error); 2185 } 2186 2187 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2188 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2189 if (nd.ni_dvp == nd.ni_vp) 2190 vrele(nd.ni_dvp); 2191 else 2192 vput(nd.ni_dvp); 2193 if (nd.ni_vp) 2194 vrele(nd.ni_vp); 2195 pathbuf_destroy(pb); 2196 return (EEXIST); 2197 } 2198 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2199 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2200 vput(nd.ni_dvp); 2201 pathbuf_destroy(pb); 2202 return (error); 2203 } 2204 2205 /* 2206 * Delete a name from the filesystem. 2207 */ 2208 /* ARGSUSED */ 2209 int 2210 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2211 { 2212 /* { 2213 syscallarg(const char *) path; 2214 } */ 2215 2216 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2217 } 2218 2219 int 2220 do_sys_unlink(const char *arg, enum uio_seg seg) 2221 { 2222 struct vnode *vp; 2223 int error; 2224 struct pathbuf *pb; 2225 struct nameidata nd; 2226 const char *pathstring; 2227 2228 error = pathbuf_maybe_copyin(arg, seg, &pb); 2229 if (error) { 2230 return error; 2231 } 2232 pathstring = pathbuf_stringcopy_get(pb); 2233 if (pathstring == NULL) { 2234 pathbuf_destroy(pb); 2235 return ENOMEM; 2236 } 2237 2238 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2239 if ((error = namei(&nd)) != 0) 2240 goto out; 2241 vp = nd.ni_vp; 2242 2243 /* 2244 * The root of a mounted filesystem cannot be deleted. 2245 */ 2246 if (vp->v_vflag & VV_ROOT) { 2247 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2248 if (nd.ni_dvp == vp) 2249 vrele(nd.ni_dvp); 2250 else 2251 vput(nd.ni_dvp); 2252 vput(vp); 2253 error = EBUSY; 2254 goto out; 2255 } 2256 2257 #if NVERIEXEC > 0 2258 /* Handle remove requests for veriexec entries. */ 2259 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2260 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2261 if (nd.ni_dvp == vp) 2262 vrele(nd.ni_dvp); 2263 else 2264 vput(nd.ni_dvp); 2265 vput(vp); 2266 goto out; 2267 } 2268 #endif /* NVERIEXEC > 0 */ 2269 2270 #ifdef FILEASSOC 2271 (void)fileassoc_file_delete(vp); 2272 #endif /* FILEASSOC */ 2273 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2274 out: 2275 pathbuf_stringcopy_put(pb, pathstring); 2276 pathbuf_destroy(pb); 2277 return (error); 2278 } 2279 2280 /* 2281 * Reposition read/write file offset. 2282 */ 2283 int 2284 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2285 { 2286 /* { 2287 syscallarg(int) fd; 2288 syscallarg(int) pad; 2289 syscallarg(off_t) offset; 2290 syscallarg(int) whence; 2291 } */ 2292 kauth_cred_t cred = l->l_cred; 2293 file_t *fp; 2294 struct vnode *vp; 2295 struct vattr vattr; 2296 off_t newoff; 2297 int error, fd; 2298 2299 fd = SCARG(uap, fd); 2300 2301 if ((fp = fd_getfile(fd)) == NULL) 2302 return (EBADF); 2303 2304 vp = fp->f_data; 2305 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2306 error = ESPIPE; 2307 goto out; 2308 } 2309 2310 switch (SCARG(uap, whence)) { 2311 case SEEK_CUR: 2312 newoff = fp->f_offset + SCARG(uap, offset); 2313 break; 2314 case SEEK_END: 2315 error = VOP_GETATTR(vp, &vattr, cred); 2316 if (error) { 2317 goto out; 2318 } 2319 newoff = SCARG(uap, offset) + vattr.va_size; 2320 break; 2321 case SEEK_SET: 2322 newoff = SCARG(uap, offset); 2323 break; 2324 default: 2325 error = EINVAL; 2326 goto out; 2327 } 2328 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2329 *(off_t *)retval = fp->f_offset = newoff; 2330 } 2331 out: 2332 fd_putfile(fd); 2333 return (error); 2334 } 2335 2336 /* 2337 * Positional read system call. 2338 */ 2339 int 2340 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2341 { 2342 /* { 2343 syscallarg(int) fd; 2344 syscallarg(void *) buf; 2345 syscallarg(size_t) nbyte; 2346 syscallarg(off_t) offset; 2347 } */ 2348 file_t *fp; 2349 struct vnode *vp; 2350 off_t offset; 2351 int error, fd = SCARG(uap, fd); 2352 2353 if ((fp = fd_getfile(fd)) == NULL) 2354 return (EBADF); 2355 2356 if ((fp->f_flag & FREAD) == 0) { 2357 fd_putfile(fd); 2358 return (EBADF); 2359 } 2360 2361 vp = fp->f_data; 2362 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2363 error = ESPIPE; 2364 goto out; 2365 } 2366 2367 offset = SCARG(uap, offset); 2368 2369 /* 2370 * XXX This works because no file systems actually 2371 * XXX take any action on the seek operation. 2372 */ 2373 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2374 goto out; 2375 2376 /* dofileread() will unuse the descriptor for us */ 2377 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2378 &offset, 0, retval)); 2379 2380 out: 2381 fd_putfile(fd); 2382 return (error); 2383 } 2384 2385 /* 2386 * Positional scatter read system call. 2387 */ 2388 int 2389 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2390 { 2391 /* { 2392 syscallarg(int) fd; 2393 syscallarg(const struct iovec *) iovp; 2394 syscallarg(int) iovcnt; 2395 syscallarg(off_t) offset; 2396 } */ 2397 off_t offset = SCARG(uap, offset); 2398 2399 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2400 SCARG(uap, iovcnt), &offset, 0, retval); 2401 } 2402 2403 /* 2404 * Positional write system call. 2405 */ 2406 int 2407 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2408 { 2409 /* { 2410 syscallarg(int) fd; 2411 syscallarg(const void *) buf; 2412 syscallarg(size_t) nbyte; 2413 syscallarg(off_t) offset; 2414 } */ 2415 file_t *fp; 2416 struct vnode *vp; 2417 off_t offset; 2418 int error, fd = SCARG(uap, fd); 2419 2420 if ((fp = fd_getfile(fd)) == NULL) 2421 return (EBADF); 2422 2423 if ((fp->f_flag & FWRITE) == 0) { 2424 fd_putfile(fd); 2425 return (EBADF); 2426 } 2427 2428 vp = fp->f_data; 2429 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2430 error = ESPIPE; 2431 goto out; 2432 } 2433 2434 offset = SCARG(uap, offset); 2435 2436 /* 2437 * XXX This works because no file systems actually 2438 * XXX take any action on the seek operation. 2439 */ 2440 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2441 goto out; 2442 2443 /* dofilewrite() will unuse the descriptor for us */ 2444 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2445 &offset, 0, retval)); 2446 2447 out: 2448 fd_putfile(fd); 2449 return (error); 2450 } 2451 2452 /* 2453 * Positional gather write system call. 2454 */ 2455 int 2456 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2457 { 2458 /* { 2459 syscallarg(int) fd; 2460 syscallarg(const struct iovec *) iovp; 2461 syscallarg(int) iovcnt; 2462 syscallarg(off_t) offset; 2463 } */ 2464 off_t offset = SCARG(uap, offset); 2465 2466 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2467 SCARG(uap, iovcnt), &offset, 0, retval); 2468 } 2469 2470 /* 2471 * Check access permissions. 2472 */ 2473 int 2474 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2475 { 2476 /* { 2477 syscallarg(const char *) path; 2478 syscallarg(int) flags; 2479 } */ 2480 kauth_cred_t cred; 2481 struct vnode *vp; 2482 int error, flags; 2483 struct pathbuf *pb; 2484 struct nameidata nd; 2485 2486 error = pathbuf_copyin(SCARG(uap, path), &pb); 2487 if (error) { 2488 return error; 2489 } 2490 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2491 2492 /* Override default credentials */ 2493 cred = kauth_cred_dup(l->l_cred); 2494 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2495 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2496 nd.ni_cnd.cn_cred = cred; 2497 2498 if ((error = namei(&nd)) != 0) { 2499 pathbuf_destroy(pb); 2500 goto out; 2501 } 2502 vp = nd.ni_vp; 2503 pathbuf_destroy(pb); 2504 2505 /* Flags == 0 means only check for existence. */ 2506 if (SCARG(uap, flags)) { 2507 flags = 0; 2508 if (SCARG(uap, flags) & R_OK) 2509 flags |= VREAD; 2510 if (SCARG(uap, flags) & W_OK) 2511 flags |= VWRITE; 2512 if (SCARG(uap, flags) & X_OK) 2513 flags |= VEXEC; 2514 2515 error = VOP_ACCESS(vp, flags, cred); 2516 if (!error && (flags & VWRITE)) 2517 error = vn_writechk(vp); 2518 } 2519 vput(vp); 2520 out: 2521 kauth_cred_free(cred); 2522 return (error); 2523 } 2524 2525 /* 2526 * Common code for all sys_stat functions, including compat versions. 2527 */ 2528 int 2529 do_sys_stat(const char *userpath, unsigned int nd_flags, struct stat *sb) 2530 { 2531 int error; 2532 struct pathbuf *pb; 2533 struct nameidata nd; 2534 2535 error = pathbuf_copyin(userpath, &pb); 2536 if (error) { 2537 return error; 2538 } 2539 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, pb); 2540 error = namei(&nd); 2541 if (error != 0) { 2542 pathbuf_destroy(pb); 2543 return error; 2544 } 2545 error = vn_stat(nd.ni_vp, sb); 2546 vput(nd.ni_vp); 2547 pathbuf_destroy(pb); 2548 return error; 2549 } 2550 2551 /* 2552 * Get file status; this version follows links. 2553 */ 2554 /* ARGSUSED */ 2555 int 2556 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 2557 { 2558 /* { 2559 syscallarg(const char *) path; 2560 syscallarg(struct stat *) ub; 2561 } */ 2562 struct stat sb; 2563 int error; 2564 2565 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2566 if (error) 2567 return error; 2568 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2569 } 2570 2571 /* 2572 * Get file status; this version does not follow links. 2573 */ 2574 /* ARGSUSED */ 2575 int 2576 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 2577 { 2578 /* { 2579 syscallarg(const char *) path; 2580 syscallarg(struct stat *) ub; 2581 } */ 2582 struct stat sb; 2583 int error; 2584 2585 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2586 if (error) 2587 return error; 2588 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2589 } 2590 2591 /* 2592 * Get configurable pathname variables. 2593 */ 2594 /* ARGSUSED */ 2595 int 2596 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2597 { 2598 /* { 2599 syscallarg(const char *) path; 2600 syscallarg(int) name; 2601 } */ 2602 int error; 2603 struct pathbuf *pb; 2604 struct nameidata nd; 2605 2606 error = pathbuf_copyin(SCARG(uap, path), &pb); 2607 if (error) { 2608 return error; 2609 } 2610 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2611 if ((error = namei(&nd)) != 0) { 2612 pathbuf_destroy(pb); 2613 return (error); 2614 } 2615 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2616 vput(nd.ni_vp); 2617 pathbuf_destroy(pb); 2618 return (error); 2619 } 2620 2621 /* 2622 * Return target name of a symbolic link. 2623 */ 2624 /* ARGSUSED */ 2625 int 2626 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2627 { 2628 /* { 2629 syscallarg(const char *) path; 2630 syscallarg(char *) buf; 2631 syscallarg(size_t) count; 2632 } */ 2633 struct vnode *vp; 2634 struct iovec aiov; 2635 struct uio auio; 2636 int error; 2637 struct pathbuf *pb; 2638 struct nameidata nd; 2639 2640 error = pathbuf_copyin(SCARG(uap, path), &pb); 2641 if (error) { 2642 return error; 2643 } 2644 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2645 if ((error = namei(&nd)) != 0) { 2646 pathbuf_destroy(pb); 2647 return error; 2648 } 2649 vp = nd.ni_vp; 2650 pathbuf_destroy(pb); 2651 if (vp->v_type != VLNK) 2652 error = EINVAL; 2653 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2654 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2655 aiov.iov_base = SCARG(uap, buf); 2656 aiov.iov_len = SCARG(uap, count); 2657 auio.uio_iov = &aiov; 2658 auio.uio_iovcnt = 1; 2659 auio.uio_offset = 0; 2660 auio.uio_rw = UIO_READ; 2661 KASSERT(l == curlwp); 2662 auio.uio_vmspace = l->l_proc->p_vmspace; 2663 auio.uio_resid = SCARG(uap, count); 2664 error = VOP_READLINK(vp, &auio, l->l_cred); 2665 } 2666 vput(vp); 2667 *retval = SCARG(uap, count) - auio.uio_resid; 2668 return (error); 2669 } 2670 2671 /* 2672 * Change flags of a file given a path name. 2673 */ 2674 /* ARGSUSED */ 2675 int 2676 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2677 { 2678 /* { 2679 syscallarg(const char *) path; 2680 syscallarg(u_long) flags; 2681 } */ 2682 struct vnode *vp; 2683 int error; 2684 2685 error = namei_simple_user(SCARG(uap, path), 2686 NSM_FOLLOW_TRYEMULROOT, &vp); 2687 if (error != 0) 2688 return (error); 2689 error = change_flags(vp, SCARG(uap, flags), l); 2690 vput(vp); 2691 return (error); 2692 } 2693 2694 /* 2695 * Change flags of a file given a file descriptor. 2696 */ 2697 /* ARGSUSED */ 2698 int 2699 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2700 { 2701 /* { 2702 syscallarg(int) fd; 2703 syscallarg(u_long) flags; 2704 } */ 2705 struct vnode *vp; 2706 file_t *fp; 2707 int error; 2708 2709 /* fd_getvnode() will use the descriptor for us */ 2710 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2711 return (error); 2712 vp = fp->f_data; 2713 error = change_flags(vp, SCARG(uap, flags), l); 2714 VOP_UNLOCK(vp); 2715 fd_putfile(SCARG(uap, fd)); 2716 return (error); 2717 } 2718 2719 /* 2720 * Change flags of a file given a path name; this version does 2721 * not follow links. 2722 */ 2723 int 2724 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2725 { 2726 /* { 2727 syscallarg(const char *) path; 2728 syscallarg(u_long) flags; 2729 } */ 2730 struct vnode *vp; 2731 int error; 2732 2733 error = namei_simple_user(SCARG(uap, path), 2734 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2735 if (error != 0) 2736 return (error); 2737 error = change_flags(vp, SCARG(uap, flags), l); 2738 vput(vp); 2739 return (error); 2740 } 2741 2742 /* 2743 * Common routine to change flags of a file. 2744 */ 2745 int 2746 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2747 { 2748 struct vattr vattr; 2749 int error; 2750 2751 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2752 /* 2753 * Non-superusers cannot change the flags on devices, even if they 2754 * own them. 2755 */ 2756 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2757 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2758 goto out; 2759 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2760 error = EINVAL; 2761 goto out; 2762 } 2763 } 2764 vattr_null(&vattr); 2765 vattr.va_flags = flags; 2766 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2767 out: 2768 return (error); 2769 } 2770 2771 /* 2772 * Change mode of a file given path name; this version follows links. 2773 */ 2774 /* ARGSUSED */ 2775 int 2776 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2777 { 2778 /* { 2779 syscallarg(const char *) path; 2780 syscallarg(int) mode; 2781 } */ 2782 int error; 2783 struct vnode *vp; 2784 2785 error = namei_simple_user(SCARG(uap, path), 2786 NSM_FOLLOW_TRYEMULROOT, &vp); 2787 if (error != 0) 2788 return (error); 2789 2790 error = change_mode(vp, SCARG(uap, mode), l); 2791 2792 vrele(vp); 2793 return (error); 2794 } 2795 2796 /* 2797 * Change mode of a file given a file descriptor. 2798 */ 2799 /* ARGSUSED */ 2800 int 2801 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2802 { 2803 /* { 2804 syscallarg(int) fd; 2805 syscallarg(int) mode; 2806 } */ 2807 file_t *fp; 2808 int error; 2809 2810 /* fd_getvnode() will use the descriptor for us */ 2811 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2812 return (error); 2813 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2814 fd_putfile(SCARG(uap, fd)); 2815 return (error); 2816 } 2817 2818 /* 2819 * Change mode of a file given path name; this version does not follow links. 2820 */ 2821 /* ARGSUSED */ 2822 int 2823 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2824 { 2825 /* { 2826 syscallarg(const char *) path; 2827 syscallarg(int) mode; 2828 } */ 2829 int error; 2830 struct vnode *vp; 2831 2832 error = namei_simple_user(SCARG(uap, path), 2833 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2834 if (error != 0) 2835 return (error); 2836 2837 error = change_mode(vp, SCARG(uap, mode), l); 2838 2839 vrele(vp); 2840 return (error); 2841 } 2842 2843 /* 2844 * Common routine to set mode given a vnode. 2845 */ 2846 static int 2847 change_mode(struct vnode *vp, int mode, struct lwp *l) 2848 { 2849 struct vattr vattr; 2850 int error; 2851 2852 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2853 vattr_null(&vattr); 2854 vattr.va_mode = mode & ALLPERMS; 2855 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2856 VOP_UNLOCK(vp); 2857 return (error); 2858 } 2859 2860 /* 2861 * Set ownership given a path name; this version follows links. 2862 */ 2863 /* ARGSUSED */ 2864 int 2865 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2866 { 2867 /* { 2868 syscallarg(const char *) path; 2869 syscallarg(uid_t) uid; 2870 syscallarg(gid_t) gid; 2871 } */ 2872 int error; 2873 struct vnode *vp; 2874 2875 error = namei_simple_user(SCARG(uap, path), 2876 NSM_FOLLOW_TRYEMULROOT, &vp); 2877 if (error != 0) 2878 return (error); 2879 2880 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2881 2882 vrele(vp); 2883 return (error); 2884 } 2885 2886 /* 2887 * Set ownership given a path name; this version follows links. 2888 * Provides POSIX semantics. 2889 */ 2890 /* ARGSUSED */ 2891 int 2892 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2893 { 2894 /* { 2895 syscallarg(const char *) path; 2896 syscallarg(uid_t) uid; 2897 syscallarg(gid_t) gid; 2898 } */ 2899 int error; 2900 struct vnode *vp; 2901 2902 error = namei_simple_user(SCARG(uap, path), 2903 NSM_FOLLOW_TRYEMULROOT, &vp); 2904 if (error != 0) 2905 return (error); 2906 2907 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2908 2909 vrele(vp); 2910 return (error); 2911 } 2912 2913 /* 2914 * Set ownership given a file descriptor. 2915 */ 2916 /* ARGSUSED */ 2917 int 2918 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2919 { 2920 /* { 2921 syscallarg(int) fd; 2922 syscallarg(uid_t) uid; 2923 syscallarg(gid_t) gid; 2924 } */ 2925 int error; 2926 file_t *fp; 2927 2928 /* fd_getvnode() will use the descriptor for us */ 2929 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2930 return (error); 2931 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2932 l, 0); 2933 fd_putfile(SCARG(uap, fd)); 2934 return (error); 2935 } 2936 2937 /* 2938 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2939 */ 2940 /* ARGSUSED */ 2941 int 2942 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2943 { 2944 /* { 2945 syscallarg(int) fd; 2946 syscallarg(uid_t) uid; 2947 syscallarg(gid_t) gid; 2948 } */ 2949 int error; 2950 file_t *fp; 2951 2952 /* fd_getvnode() will use the descriptor for us */ 2953 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2954 return (error); 2955 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2956 l, 1); 2957 fd_putfile(SCARG(uap, fd)); 2958 return (error); 2959 } 2960 2961 /* 2962 * Set ownership given a path name; this version does not follow links. 2963 */ 2964 /* ARGSUSED */ 2965 int 2966 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2967 { 2968 /* { 2969 syscallarg(const char *) path; 2970 syscallarg(uid_t) uid; 2971 syscallarg(gid_t) gid; 2972 } */ 2973 int error; 2974 struct vnode *vp; 2975 2976 error = namei_simple_user(SCARG(uap, path), 2977 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2978 if (error != 0) 2979 return (error); 2980 2981 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2982 2983 vrele(vp); 2984 return (error); 2985 } 2986 2987 /* 2988 * Set ownership given a path name; this version does not follow links. 2989 * Provides POSIX/XPG semantics. 2990 */ 2991 /* ARGSUSED */ 2992 int 2993 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2994 { 2995 /* { 2996 syscallarg(const char *) path; 2997 syscallarg(uid_t) uid; 2998 syscallarg(gid_t) gid; 2999 } */ 3000 int error; 3001 struct vnode *vp; 3002 3003 error = namei_simple_user(SCARG(uap, path), 3004 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3005 if (error != 0) 3006 return (error); 3007 3008 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3009 3010 vrele(vp); 3011 return (error); 3012 } 3013 3014 /* 3015 * Common routine to set ownership given a vnode. 3016 */ 3017 static int 3018 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3019 int posix_semantics) 3020 { 3021 struct vattr vattr; 3022 mode_t newmode; 3023 int error; 3024 3025 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3026 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3027 goto out; 3028 3029 #define CHANGED(x) ((int)(x) != -1) 3030 newmode = vattr.va_mode; 3031 if (posix_semantics) { 3032 /* 3033 * POSIX/XPG semantics: if the caller is not the super-user, 3034 * clear set-user-id and set-group-id bits. Both POSIX and 3035 * the XPG consider the behaviour for calls by the super-user 3036 * implementation-defined; we leave the set-user-id and set- 3037 * group-id settings intact in that case. 3038 */ 3039 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 3040 NULL) != 0) 3041 newmode &= ~(S_ISUID | S_ISGID); 3042 } else { 3043 /* 3044 * NetBSD semantics: when changing owner and/or group, 3045 * clear the respective bit(s). 3046 */ 3047 if (CHANGED(uid)) 3048 newmode &= ~S_ISUID; 3049 if (CHANGED(gid)) 3050 newmode &= ~S_ISGID; 3051 } 3052 /* Update va_mode iff altered. */ 3053 if (vattr.va_mode == newmode) 3054 newmode = VNOVAL; 3055 3056 vattr_null(&vattr); 3057 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3058 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3059 vattr.va_mode = newmode; 3060 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3061 #undef CHANGED 3062 3063 out: 3064 VOP_UNLOCK(vp); 3065 return (error); 3066 } 3067 3068 /* 3069 * Set the access and modification times given a path name; this 3070 * version follows links. 3071 */ 3072 /* ARGSUSED */ 3073 int 3074 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3075 register_t *retval) 3076 { 3077 /* { 3078 syscallarg(const char *) path; 3079 syscallarg(const struct timeval *) tptr; 3080 } */ 3081 3082 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3083 SCARG(uap, tptr), UIO_USERSPACE); 3084 } 3085 3086 /* 3087 * Set the access and modification times given a file descriptor. 3088 */ 3089 /* ARGSUSED */ 3090 int 3091 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3092 register_t *retval) 3093 { 3094 /* { 3095 syscallarg(int) fd; 3096 syscallarg(const struct timeval *) tptr; 3097 } */ 3098 int error; 3099 file_t *fp; 3100 3101 /* fd_getvnode() will use the descriptor for us */ 3102 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3103 return (error); 3104 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3105 UIO_USERSPACE); 3106 fd_putfile(SCARG(uap, fd)); 3107 return (error); 3108 } 3109 3110 /* 3111 * Set the access and modification times given a path name; this 3112 * version does not follow links. 3113 */ 3114 int 3115 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3116 register_t *retval) 3117 { 3118 /* { 3119 syscallarg(const char *) path; 3120 syscallarg(const struct timeval *) tptr; 3121 } */ 3122 3123 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3124 SCARG(uap, tptr), UIO_USERSPACE); 3125 } 3126 3127 /* 3128 * Common routine to set access and modification times given a vnode. 3129 */ 3130 int 3131 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3132 const struct timeval *tptr, enum uio_seg seg) 3133 { 3134 struct vattr vattr; 3135 int error, dorele = 0; 3136 namei_simple_flags_t sflags; 3137 3138 bool vanull, setbirthtime; 3139 struct timespec ts[2]; 3140 3141 /* 3142 * I have checked all callers and they pass either FOLLOW, 3143 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3144 * is 0. More to the point, they don't pass anything else. 3145 * Let's keep it that way at least until the namei interfaces 3146 * are fully sanitized. 3147 */ 3148 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3149 sflags = (flag == FOLLOW) ? 3150 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3151 3152 if (tptr == NULL) { 3153 vanull = true; 3154 nanotime(&ts[0]); 3155 ts[1] = ts[0]; 3156 } else { 3157 struct timeval tv[2]; 3158 3159 vanull = false; 3160 if (seg != UIO_SYSSPACE) { 3161 error = copyin(tptr, tv, sizeof (tv)); 3162 if (error != 0) 3163 return error; 3164 tptr = tv; 3165 } 3166 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3167 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3168 } 3169 3170 if (vp == NULL) { 3171 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3172 error = namei_simple_user(path, sflags, &vp); 3173 if (error != 0) 3174 return error; 3175 dorele = 1; 3176 } 3177 3178 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3179 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3180 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3181 vattr_null(&vattr); 3182 vattr.va_atime = ts[0]; 3183 vattr.va_mtime = ts[1]; 3184 if (setbirthtime) 3185 vattr.va_birthtime = ts[1]; 3186 if (vanull) 3187 vattr.va_vaflags |= VA_UTIMES_NULL; 3188 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3189 VOP_UNLOCK(vp); 3190 3191 if (dorele != 0) 3192 vrele(vp); 3193 3194 return error; 3195 } 3196 3197 /* 3198 * Truncate a file given its path name. 3199 */ 3200 /* ARGSUSED */ 3201 int 3202 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3203 { 3204 /* { 3205 syscallarg(const char *) path; 3206 syscallarg(int) pad; 3207 syscallarg(off_t) length; 3208 } */ 3209 struct vnode *vp; 3210 struct vattr vattr; 3211 int error; 3212 3213 error = namei_simple_user(SCARG(uap, path), 3214 NSM_FOLLOW_TRYEMULROOT, &vp); 3215 if (error != 0) 3216 return (error); 3217 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3218 if (vp->v_type == VDIR) 3219 error = EISDIR; 3220 else if ((error = vn_writechk(vp)) == 0 && 3221 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3222 vattr_null(&vattr); 3223 vattr.va_size = SCARG(uap, length); 3224 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3225 } 3226 vput(vp); 3227 return (error); 3228 } 3229 3230 /* 3231 * Truncate a file given a file descriptor. 3232 */ 3233 /* ARGSUSED */ 3234 int 3235 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3236 { 3237 /* { 3238 syscallarg(int) fd; 3239 syscallarg(int) pad; 3240 syscallarg(off_t) length; 3241 } */ 3242 struct vattr vattr; 3243 struct vnode *vp; 3244 file_t *fp; 3245 int error; 3246 3247 /* fd_getvnode() will use the descriptor for us */ 3248 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3249 return (error); 3250 if ((fp->f_flag & FWRITE) == 0) { 3251 error = EINVAL; 3252 goto out; 3253 } 3254 vp = fp->f_data; 3255 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3256 if (vp->v_type == VDIR) 3257 error = EISDIR; 3258 else if ((error = vn_writechk(vp)) == 0) { 3259 vattr_null(&vattr); 3260 vattr.va_size = SCARG(uap, length); 3261 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3262 } 3263 VOP_UNLOCK(vp); 3264 out: 3265 fd_putfile(SCARG(uap, fd)); 3266 return (error); 3267 } 3268 3269 /* 3270 * Sync an open file. 3271 */ 3272 /* ARGSUSED */ 3273 int 3274 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3275 { 3276 /* { 3277 syscallarg(int) fd; 3278 } */ 3279 struct vnode *vp; 3280 file_t *fp; 3281 int error; 3282 3283 /* fd_getvnode() will use the descriptor for us */ 3284 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3285 return (error); 3286 vp = fp->f_data; 3287 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3288 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3289 VOP_UNLOCK(vp); 3290 fd_putfile(SCARG(uap, fd)); 3291 return (error); 3292 } 3293 3294 /* 3295 * Sync a range of file data. API modeled after that found in AIX. 3296 * 3297 * FDATASYNC indicates that we need only save enough metadata to be able 3298 * to re-read the written data. Note we duplicate AIX's requirement that 3299 * the file be open for writing. 3300 */ 3301 /* ARGSUSED */ 3302 int 3303 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3304 { 3305 /* { 3306 syscallarg(int) fd; 3307 syscallarg(int) flags; 3308 syscallarg(off_t) start; 3309 syscallarg(off_t) length; 3310 } */ 3311 struct vnode *vp; 3312 file_t *fp; 3313 int flags, nflags; 3314 off_t s, e, len; 3315 int error; 3316 3317 /* fd_getvnode() will use the descriptor for us */ 3318 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3319 return (error); 3320 3321 if ((fp->f_flag & FWRITE) == 0) { 3322 error = EBADF; 3323 goto out; 3324 } 3325 3326 flags = SCARG(uap, flags); 3327 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3328 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3329 error = EINVAL; 3330 goto out; 3331 } 3332 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3333 if (flags & FDATASYNC) 3334 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3335 else 3336 nflags = FSYNC_WAIT; 3337 if (flags & FDISKSYNC) 3338 nflags |= FSYNC_CACHE; 3339 3340 len = SCARG(uap, length); 3341 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3342 if (len) { 3343 s = SCARG(uap, start); 3344 e = s + len; 3345 if (e < s) { 3346 error = EINVAL; 3347 goto out; 3348 } 3349 } else { 3350 e = 0; 3351 s = 0; 3352 } 3353 3354 vp = fp->f_data; 3355 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3356 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3357 VOP_UNLOCK(vp); 3358 out: 3359 fd_putfile(SCARG(uap, fd)); 3360 return (error); 3361 } 3362 3363 /* 3364 * Sync the data of an open file. 3365 */ 3366 /* ARGSUSED */ 3367 int 3368 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3369 { 3370 /* { 3371 syscallarg(int) fd; 3372 } */ 3373 struct vnode *vp; 3374 file_t *fp; 3375 int error; 3376 3377 /* fd_getvnode() will use the descriptor for us */ 3378 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3379 return (error); 3380 if ((fp->f_flag & FWRITE) == 0) { 3381 fd_putfile(SCARG(uap, fd)); 3382 return (EBADF); 3383 } 3384 vp = fp->f_data; 3385 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3386 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3387 VOP_UNLOCK(vp); 3388 fd_putfile(SCARG(uap, fd)); 3389 return (error); 3390 } 3391 3392 /* 3393 * Rename files, (standard) BSD semantics frontend. 3394 */ 3395 /* ARGSUSED */ 3396 int 3397 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3398 { 3399 /* { 3400 syscallarg(const char *) from; 3401 syscallarg(const char *) to; 3402 } */ 3403 3404 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3405 } 3406 3407 /* 3408 * Rename files, POSIX semantics frontend. 3409 */ 3410 /* ARGSUSED */ 3411 int 3412 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3413 { 3414 /* { 3415 syscallarg(const char *) from; 3416 syscallarg(const char *) to; 3417 } */ 3418 3419 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3420 } 3421 3422 /* 3423 * Rename files. Source and destination must either both be directories, 3424 * or both not be directories. If target is a directory, it must be empty. 3425 * If `from' and `to' refer to the same object, the value of the `retain' 3426 * argument is used to determine whether `from' will be 3427 * 3428 * (retain == 0) deleted unless `from' and `to' refer to the same 3429 * object in the file system's name space (BSD). 3430 * (retain == 1) always retained (POSIX). 3431 */ 3432 int 3433 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3434 { 3435 struct vnode *tvp, *fvp, *tdvp; 3436 struct pathbuf *frompb, *topb; 3437 struct nameidata fromnd, tond; 3438 struct mount *fs; 3439 struct lwp *l = curlwp; 3440 struct proc *p; 3441 uint32_t saveflag; 3442 int error; 3443 3444 error = pathbuf_maybe_copyin(from, seg, &frompb); 3445 if (error) { 3446 return error; 3447 } 3448 error = pathbuf_maybe_copyin(to, seg, &topb); 3449 if (error) { 3450 pathbuf_destroy(frompb); 3451 return error; 3452 } 3453 3454 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT | INRENAME, 3455 frompb); 3456 if ((error = namei(&fromnd)) != 0) { 3457 pathbuf_destroy(frompb); 3458 pathbuf_destroy(topb); 3459 return (error); 3460 } 3461 if (fromnd.ni_dvp != fromnd.ni_vp) 3462 VOP_UNLOCK(fromnd.ni_dvp); 3463 fvp = fromnd.ni_vp; 3464 3465 fs = fvp->v_mount; 3466 error = VFS_RENAMELOCK_ENTER(fs); 3467 if (error) { 3468 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3469 vrele(fromnd.ni_dvp); 3470 vrele(fvp); 3471 goto out1; 3472 } 3473 3474 /* 3475 * close, partially, yet another race - ideally we should only 3476 * go as far as getting fromnd.ni_dvp before getting the per-fs 3477 * lock, and then continue to get fromnd.ni_vp, but we can't do 3478 * that with namei as it stands. 3479 * 3480 * This still won't prevent rmdir from nuking fromnd.ni_vp 3481 * under us. The real fix is to get the locks in the right 3482 * order and do the lookups in the right places, but that's a 3483 * major rototill. 3484 * 3485 * Preserve the SAVESTART in cn_flags, because who knows what 3486 * might happen if we don't. 3487 * 3488 * Note: this logic (as well as this whole function) is cloned 3489 * in nfs_serv.c. Proceed accordingly. 3490 */ 3491 vrele(fvp); 3492 if ((fromnd.ni_cnd.cn_namelen == 1 && 3493 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3494 (fromnd.ni_cnd.cn_namelen == 2 && 3495 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3496 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3497 error = EINVAL; 3498 VFS_RENAMELOCK_EXIT(fs); 3499 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3500 vrele(fromnd.ni_dvp); 3501 goto out1; 3502 } 3503 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART; 3504 fromnd.ni_cnd.cn_flags &= ~SAVESTART; 3505 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3506 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd); 3507 fromnd.ni_cnd.cn_flags |= saveflag; 3508 if (error) { 3509 VOP_UNLOCK(fromnd.ni_dvp); 3510 VFS_RENAMELOCK_EXIT(fs); 3511 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3512 vrele(fromnd.ni_dvp); 3513 goto out1; 3514 } 3515 VOP_UNLOCK(fromnd.ni_vp); 3516 if (fromnd.ni_dvp != fromnd.ni_vp) 3517 VOP_UNLOCK(fromnd.ni_dvp); 3518 fvp = fromnd.ni_vp; 3519 3520 NDINIT(&tond, RENAME, 3521 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT 3522 | INRENAME | (fvp->v_type == VDIR ? CREATEDIR : 0), 3523 topb); 3524 if ((error = namei(&tond)) != 0) { 3525 VFS_RENAMELOCK_EXIT(fs); 3526 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3527 vrele(fromnd.ni_dvp); 3528 vrele(fvp); 3529 goto out1; 3530 } 3531 tdvp = tond.ni_dvp; 3532 tvp = tond.ni_vp; 3533 3534 if (tvp != NULL) { 3535 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3536 error = ENOTDIR; 3537 goto out; 3538 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3539 error = EISDIR; 3540 goto out; 3541 } 3542 } 3543 3544 if (fvp == tdvp) 3545 error = EINVAL; 3546 3547 /* 3548 * Source and destination refer to the same object. 3549 */ 3550 if (fvp == tvp) { 3551 if (retain) 3552 error = -1; 3553 else if (fromnd.ni_dvp == tdvp && 3554 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3555 !memcmp(fromnd.ni_cnd.cn_nameptr, 3556 tond.ni_cnd.cn_nameptr, 3557 fromnd.ni_cnd.cn_namelen)) 3558 error = -1; 3559 } 3560 3561 #if NVERIEXEC > 0 3562 if (!error) { 3563 char *f1, *f2; 3564 size_t f1_len; 3565 size_t f2_len; 3566 3567 f1_len = fromnd.ni_cnd.cn_namelen + 1; 3568 f1 = kmem_alloc(f1_len, KM_SLEEP); 3569 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len); 3570 3571 f2_len = tond.ni_cnd.cn_namelen + 1; 3572 f2 = kmem_alloc(f2_len, KM_SLEEP); 3573 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len); 3574 3575 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3576 3577 kmem_free(f1, f1_len); 3578 kmem_free(f2, f2_len); 3579 } 3580 #endif /* NVERIEXEC > 0 */ 3581 3582 out: 3583 p = l->l_proc; 3584 if (!error) { 3585 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3586 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3587 VFS_RENAMELOCK_EXIT(fs); 3588 } else { 3589 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3590 if (tdvp == tvp) 3591 vrele(tdvp); 3592 else 3593 vput(tdvp); 3594 if (tvp) 3595 vput(tvp); 3596 VFS_RENAMELOCK_EXIT(fs); 3597 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3598 vrele(fromnd.ni_dvp); 3599 vrele(fvp); 3600 } 3601 vrele(tond.ni_startdir); 3602 out1: 3603 if (fromnd.ni_startdir) 3604 vrele(fromnd.ni_startdir); 3605 pathbuf_destroy(frompb); 3606 pathbuf_destroy(topb); 3607 return (error == -1 ? 0 : error); 3608 } 3609 3610 /* 3611 * Make a directory file. 3612 */ 3613 /* ARGSUSED */ 3614 int 3615 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3616 { 3617 /* { 3618 syscallarg(const char *) path; 3619 syscallarg(int) mode; 3620 } */ 3621 3622 return do_sys_mkdir(SCARG(uap, path), SCARG(uap, mode), UIO_USERSPACE); 3623 } 3624 3625 int 3626 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 3627 { 3628 struct proc *p = curlwp->l_proc; 3629 struct vnode *vp; 3630 struct vattr vattr; 3631 int error; 3632 struct pathbuf *pb; 3633 struct nameidata nd; 3634 3635 /* XXX bollocks, should pass in a pathbuf */ 3636 error = pathbuf_maybe_copyin(path, seg, &pb); 3637 if (error) { 3638 return error; 3639 } 3640 3641 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 3642 if ((error = namei(&nd)) != 0) { 3643 pathbuf_destroy(pb); 3644 return (error); 3645 } 3646 vp = nd.ni_vp; 3647 if (vp != NULL) { 3648 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3649 if (nd.ni_dvp == vp) 3650 vrele(nd.ni_dvp); 3651 else 3652 vput(nd.ni_dvp); 3653 vrele(vp); 3654 pathbuf_destroy(pb); 3655 return (EEXIST); 3656 } 3657 vattr_null(&vattr); 3658 vattr.va_type = VDIR; 3659 /* We will read cwdi->cwdi_cmask unlocked. */ 3660 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3661 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3662 if (!error) 3663 vput(nd.ni_vp); 3664 pathbuf_destroy(pb); 3665 return (error); 3666 } 3667 3668 /* 3669 * Remove a directory file. 3670 */ 3671 /* ARGSUSED */ 3672 int 3673 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3674 { 3675 /* { 3676 syscallarg(const char *) path; 3677 } */ 3678 struct vnode *vp; 3679 int error; 3680 struct pathbuf *pb; 3681 struct nameidata nd; 3682 3683 error = pathbuf_copyin(SCARG(uap, path), &pb); 3684 if (error) { 3685 return error; 3686 } 3687 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 3688 if ((error = namei(&nd)) != 0) { 3689 pathbuf_destroy(pb); 3690 return error; 3691 } 3692 vp = nd.ni_vp; 3693 if (vp->v_type != VDIR) { 3694 error = ENOTDIR; 3695 goto out; 3696 } 3697 /* 3698 * No rmdir "." please. 3699 */ 3700 if (nd.ni_dvp == vp) { 3701 error = EINVAL; 3702 goto out; 3703 } 3704 /* 3705 * The root of a mounted filesystem cannot be deleted. 3706 */ 3707 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3708 error = EBUSY; 3709 goto out; 3710 } 3711 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3712 pathbuf_destroy(pb); 3713 return (error); 3714 3715 out: 3716 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3717 if (nd.ni_dvp == vp) 3718 vrele(nd.ni_dvp); 3719 else 3720 vput(nd.ni_dvp); 3721 vput(vp); 3722 pathbuf_destroy(pb); 3723 return (error); 3724 } 3725 3726 /* 3727 * Read a block of directory entries in a file system independent format. 3728 */ 3729 int 3730 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3731 { 3732 /* { 3733 syscallarg(int) fd; 3734 syscallarg(char *) buf; 3735 syscallarg(size_t) count; 3736 } */ 3737 file_t *fp; 3738 int error, done; 3739 3740 /* fd_getvnode() will use the descriptor for us */ 3741 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3742 return (error); 3743 if ((fp->f_flag & FREAD) == 0) { 3744 error = EBADF; 3745 goto out; 3746 } 3747 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3748 SCARG(uap, count), &done, l, 0, 0); 3749 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3750 *retval = done; 3751 out: 3752 fd_putfile(SCARG(uap, fd)); 3753 return (error); 3754 } 3755 3756 /* 3757 * Set the mode mask for creation of filesystem nodes. 3758 */ 3759 int 3760 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3761 { 3762 /* { 3763 syscallarg(mode_t) newmask; 3764 } */ 3765 struct proc *p = l->l_proc; 3766 struct cwdinfo *cwdi; 3767 3768 /* 3769 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3770 * important is that we serialize changes to the mask. The 3771 * rw_exit() will issue a write memory barrier on our behalf, 3772 * and force the changes out to other CPUs (as it must use an 3773 * atomic operation, draining the local CPU's store buffers). 3774 */ 3775 cwdi = p->p_cwdi; 3776 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3777 *retval = cwdi->cwdi_cmask; 3778 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3779 rw_exit(&cwdi->cwdi_lock); 3780 3781 return (0); 3782 } 3783 3784 int 3785 dorevoke(struct vnode *vp, kauth_cred_t cred) 3786 { 3787 struct vattr vattr; 3788 int error; 3789 3790 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3791 return error; 3792 if (kauth_cred_geteuid(cred) == vattr.va_uid || 3793 (error = kauth_authorize_generic(cred, 3794 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3795 VOP_REVOKE(vp, REVOKEALL); 3796 return (error); 3797 } 3798 3799 /* 3800 * Void all references to file by ripping underlying filesystem 3801 * away from vnode. 3802 */ 3803 /* ARGSUSED */ 3804 int 3805 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3806 { 3807 /* { 3808 syscallarg(const char *) path; 3809 } */ 3810 struct vnode *vp; 3811 int error; 3812 3813 error = namei_simple_user(SCARG(uap, path), 3814 NSM_FOLLOW_TRYEMULROOT, &vp); 3815 if (error != 0) 3816 return (error); 3817 error = dorevoke(vp, l->l_cred); 3818 vrele(vp); 3819 return (error); 3820 } 3821