1 /* $NetBSD: vfs_syscalls.c,v 1.414 2011/01/13 07:25:50 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.414 2011/01/13 07:25:50 pooka Exp $"); 70 71 #ifdef _KERNEL_OPT 72 #include "opt_fileassoc.h" 73 #include "veriexec.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/filedesc.h> 80 #include <sys/kernel.h> 81 #include <sys/file.h> 82 #include <sys/stat.h> 83 #include <sys/vnode.h> 84 #include <sys/mount.h> 85 #include <sys/proc.h> 86 #include <sys/uio.h> 87 #include <sys/kmem.h> 88 #include <sys/dirent.h> 89 #include <sys/sysctl.h> 90 #include <sys/syscallargs.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/ktrace.h> 93 #ifdef FILEASSOC 94 #include <sys/fileassoc.h> 95 #endif /* FILEASSOC */ 96 #include <sys/verified_exec.h> 97 #include <sys/kauth.h> 98 #include <sys/atomic.h> 99 #include <sys/module.h> 100 #include <sys/buf.h> 101 102 #include <miscfs/genfs/genfs.h> 103 #include <miscfs/syncfs/syncfs.h> 104 #include <miscfs/specfs/specdev.h> 105 106 #include <nfs/rpcv2.h> 107 #include <nfs/nfsproto.h> 108 #include <nfs/nfs.h> 109 #include <nfs/nfs_var.h> 110 111 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct"); 112 113 static int change_flags(struct vnode *, u_long, struct lwp *); 114 static int change_mode(struct vnode *, int, struct lwp *l); 115 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 116 117 void checkdirs(struct vnode *); 118 119 /* 120 * Virtual File System System Calls 121 */ 122 123 /* 124 * Mount a file system. 125 */ 126 127 /* 128 * This table is used to maintain compatibility with 4.3BSD 129 * and NetBSD 0.9 mount syscalls - and possibly other systems. 130 * Note, the order is important! 131 * 132 * Do not modify this table. It should only contain filesystems 133 * supported by NetBSD 0.9 and 4.3BSD. 134 */ 135 const char * const mountcompatnames[] = { 136 NULL, /* 0 = MOUNT_NONE */ 137 MOUNT_FFS, /* 1 = MOUNT_UFS */ 138 MOUNT_NFS, /* 2 */ 139 MOUNT_MFS, /* 3 */ 140 MOUNT_MSDOS, /* 4 */ 141 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 142 MOUNT_FDESC, /* 6 */ 143 MOUNT_KERNFS, /* 7 */ 144 NULL, /* 8 = MOUNT_DEVFS */ 145 MOUNT_AFS, /* 9 */ 146 }; 147 const int nmountcompatnames = sizeof(mountcompatnames) / 148 sizeof(mountcompatnames[0]); 149 150 static int 151 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 152 void *data, size_t *data_len) 153 { 154 struct mount *mp; 155 int error = 0, saved_flags; 156 157 mp = vp->v_mount; 158 saved_flags = mp->mnt_flag; 159 160 /* We can operate only on VV_ROOT nodes. */ 161 if ((vp->v_vflag & VV_ROOT) == 0) { 162 error = EINVAL; 163 goto out; 164 } 165 166 /* 167 * We only allow the filesystem to be reloaded if it 168 * is currently mounted read-only. Additionally, we 169 * prevent read-write to read-only downgrades. 170 */ 171 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 172 (mp->mnt_flag & MNT_RDONLY) == 0 && 173 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 174 error = EOPNOTSUPP; /* Needs translation */ 175 goto out; 176 } 177 178 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 179 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 180 if (error) 181 goto out; 182 183 if (vfs_busy(mp, NULL)) { 184 error = EPERM; 185 goto out; 186 } 187 188 mutex_enter(&mp->mnt_updating); 189 190 mp->mnt_flag &= ~MNT_OP_FLAGS; 191 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 192 193 /* 194 * Set the mount level flags. 195 */ 196 if (flags & MNT_RDONLY) 197 mp->mnt_flag |= MNT_RDONLY; 198 else if (mp->mnt_flag & MNT_RDONLY) 199 mp->mnt_iflag |= IMNT_WANTRDWR; 200 mp->mnt_flag &= 201 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 202 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 203 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 204 MNT_LOG); 205 mp->mnt_flag |= flags & 206 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 207 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 208 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 209 MNT_LOG | MNT_IGNORE); 210 211 error = VFS_MOUNT(mp, path, data, data_len); 212 213 if (error && data != NULL) { 214 int error2; 215 216 /* 217 * Update failed; let's try and see if it was an 218 * export request. For compat with 3.0 and earlier. 219 */ 220 error2 = vfs_hooks_reexport(mp, path, data); 221 222 /* 223 * Only update error code if the export request was 224 * understood but some problem occurred while 225 * processing it. 226 */ 227 if (error2 != EJUSTRETURN) 228 error = error2; 229 } 230 231 if (mp->mnt_iflag & IMNT_WANTRDWR) 232 mp->mnt_flag &= ~MNT_RDONLY; 233 if (error) 234 mp->mnt_flag = saved_flags; 235 mp->mnt_flag &= ~MNT_OP_FLAGS; 236 mp->mnt_iflag &= ~IMNT_WANTRDWR; 237 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 238 if (mp->mnt_syncer == NULL) 239 error = vfs_allocate_syncvnode(mp); 240 } else { 241 if (mp->mnt_syncer != NULL) 242 vfs_deallocate_syncvnode(mp); 243 } 244 mutex_exit(&mp->mnt_updating); 245 vfs_unbusy(mp, false, NULL); 246 247 out: 248 return (error); 249 } 250 251 static int 252 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 253 { 254 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 255 int error; 256 257 /* Copy file-system type from userspace. */ 258 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 259 if (error) { 260 /* 261 * Historically, filesystem types were identified by numbers. 262 * If we get an integer for the filesystem type instead of a 263 * string, we check to see if it matches one of the historic 264 * filesystem types. 265 */ 266 u_long fsindex = (u_long)fstype; 267 if (fsindex >= nmountcompatnames || 268 mountcompatnames[fsindex] == NULL) 269 return ENODEV; 270 strlcpy(fstypename, mountcompatnames[fsindex], 271 sizeof(fstypename)); 272 } 273 274 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 275 if (strcmp(fstypename, "ufs") == 0) 276 fstypename[0] = 'f'; 277 278 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 279 return 0; 280 281 /* If we can autoload a vfs module, try again */ 282 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 283 284 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 285 return 0; 286 287 return ENODEV; 288 } 289 290 static int 291 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, 292 const char *path, int flags, void *data, size_t *data_len) 293 { 294 struct mount *mp; 295 struct vnode *vp = *vpp; 296 struct vattr va; 297 struct pathbuf *pb; 298 struct nameidata nd; 299 int error; 300 301 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 302 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 303 if (error) { 304 vfs_delref(vfsops); 305 return error; 306 } 307 308 /* Can't make a non-dir a mount-point (from here anyway). */ 309 if (vp->v_type != VDIR) { 310 vfs_delref(vfsops); 311 return ENOTDIR; 312 } 313 314 /* 315 * If the user is not root, ensure that they own the directory 316 * onto which we are attempting to mount. 317 */ 318 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 319 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 320 (error = kauth_authorize_generic(l->l_cred, 321 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 322 vfs_delref(vfsops); 323 return error; 324 } 325 326 if (flags & MNT_EXPORTED) { 327 vfs_delref(vfsops); 328 return EINVAL; 329 } 330 331 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 332 vfs_delref(vfsops); 333 return ENOMEM; 334 } 335 336 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 337 338 /* 339 * The underlying file system may refuse the mount for 340 * various reasons. Allow the user to force it to happen. 341 * 342 * Set the mount level flags. 343 */ 344 mp->mnt_flag = flags & 345 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 346 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 347 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 348 MNT_LOG | MNT_IGNORE | MNT_RDONLY); 349 350 mutex_enter(&mp->mnt_updating); 351 error = VFS_MOUNT(mp, path, data, data_len); 352 mp->mnt_flag &= ~MNT_OP_FLAGS; 353 354 if (error != 0) 355 goto err_unmounted; 356 357 /* 358 * Validate and prepare the mount point. 359 */ 360 error = pathbuf_copyin(path, &pb); 361 if (error != 0) { 362 goto err_mounted; 363 } 364 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 365 error = namei(&nd); 366 pathbuf_destroy(pb); 367 if (error != 0) { 368 goto err_mounted; 369 } 370 if (nd.ni_vp != vp) { 371 vput(nd.ni_vp); 372 error = EINVAL; 373 goto err_mounted; 374 } 375 if (vp->v_mountedhere != NULL) { 376 vput(nd.ni_vp); 377 error = EBUSY; 378 goto err_mounted; 379 } 380 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 381 if (error != 0) { 382 vput(nd.ni_vp); 383 goto err_mounted; 384 } 385 386 /* 387 * Put the new filesystem on the mount list after root. 388 */ 389 cache_purge(vp); 390 mp->mnt_iflag &= ~IMNT_WANTRDWR; 391 392 mutex_enter(&mountlist_lock); 393 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 394 mutex_exit(&mountlist_lock); 395 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 396 error = vfs_allocate_syncvnode(mp); 397 if (error == 0) 398 vp->v_mountedhere = mp; 399 vput(nd.ni_vp); 400 if (error != 0) 401 goto err_onmountlist; 402 403 checkdirs(vp); 404 mutex_exit(&mp->mnt_updating); 405 406 /* Hold an additional reference to the mount across VFS_START(). */ 407 vfs_unbusy(mp, true, NULL); 408 (void) VFS_STATVFS(mp, &mp->mnt_stat); 409 error = VFS_START(mp, 0); 410 if (error) 411 vrele(vp); 412 /* Drop reference held for VFS_START(). */ 413 vfs_destroy(mp); 414 *vpp = NULL; 415 return error; 416 417 err_onmountlist: 418 mutex_enter(&mountlist_lock); 419 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 420 mp->mnt_iflag |= IMNT_GONE; 421 mutex_exit(&mountlist_lock); 422 423 err_mounted: 424 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 425 panic("Unmounting fresh file system failed"); 426 427 err_unmounted: 428 vp->v_mountedhere = NULL; 429 mutex_exit(&mp->mnt_updating); 430 vfs_unbusy(mp, false, NULL); 431 vfs_destroy(mp); 432 433 return error; 434 } 435 436 static int 437 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 438 void *data, size_t *data_len) 439 { 440 struct mount *mp; 441 int error; 442 443 /* If MNT_GETARGS is specified, it should be the only flag. */ 444 if (flags & ~MNT_GETARGS) 445 return EINVAL; 446 447 mp = vp->v_mount; 448 449 /* XXX: probably some notion of "can see" here if we want isolation. */ 450 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 451 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 452 if (error) 453 return error; 454 455 if ((vp->v_vflag & VV_ROOT) == 0) 456 return EINVAL; 457 458 if (vfs_busy(mp, NULL)) 459 return EPERM; 460 461 mutex_enter(&mp->mnt_updating); 462 mp->mnt_flag &= ~MNT_OP_FLAGS; 463 mp->mnt_flag |= MNT_GETARGS; 464 error = VFS_MOUNT(mp, path, data, data_len); 465 mp->mnt_flag &= ~MNT_OP_FLAGS; 466 mutex_exit(&mp->mnt_updating); 467 468 vfs_unbusy(mp, false, NULL); 469 return (error); 470 } 471 472 int 473 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 474 { 475 /* { 476 syscallarg(const char *) type; 477 syscallarg(const char *) path; 478 syscallarg(int) flags; 479 syscallarg(void *) data; 480 syscallarg(size_t) data_len; 481 } */ 482 483 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 484 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 485 SCARG(uap, data_len), retval); 486 } 487 488 int 489 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 490 const char *path, int flags, void *data, enum uio_seg data_seg, 491 size_t data_len, register_t *retval) 492 { 493 struct vnode *vp; 494 void *data_buf = data; 495 bool vfsopsrele = false; 496 int error; 497 498 /* XXX: The calling convention of this routine is totally bizarre */ 499 if (vfsops) 500 vfsopsrele = true; 501 502 /* 503 * Get vnode to be covered 504 */ 505 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 506 if (error != 0) { 507 vp = NULL; 508 goto done; 509 } 510 511 if (vfsops == NULL) { 512 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 513 vfsops = vp->v_mount->mnt_op; 514 } else { 515 /* 'type' is userspace */ 516 error = mount_get_vfsops(type, &vfsops); 517 if (error != 0) 518 goto done; 519 vfsopsrele = true; 520 } 521 } 522 523 if (data != NULL && data_seg == UIO_USERSPACE) { 524 if (data_len == 0) { 525 /* No length supplied, use default for filesystem */ 526 data_len = vfsops->vfs_min_mount_data; 527 if (data_len > VFS_MAX_MOUNT_DATA) { 528 error = EINVAL; 529 goto done; 530 } 531 /* 532 * Hopefully a longer buffer won't make copyin() fail. 533 * For compatibility with 3.0 and earlier. 534 */ 535 if (flags & MNT_UPDATE 536 && data_len < sizeof (struct mnt_export_args30)) 537 data_len = sizeof (struct mnt_export_args30); 538 } 539 data_buf = kmem_alloc(data_len, KM_SLEEP); 540 541 /* NFS needs the buffer even for mnt_getargs .... */ 542 error = copyin(data, data_buf, data_len); 543 if (error != 0) 544 goto done; 545 } 546 547 if (flags & MNT_GETARGS) { 548 if (data_len == 0) { 549 error = EINVAL; 550 goto done; 551 } 552 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 553 if (error != 0) 554 goto done; 555 if (data_seg == UIO_USERSPACE) 556 error = copyout(data_buf, data, data_len); 557 *retval = data_len; 558 } else if (flags & MNT_UPDATE) { 559 error = mount_update(l, vp, path, flags, data_buf, &data_len); 560 } else { 561 /* Locking is handled internally in mount_domount(). */ 562 KASSERT(vfsopsrele == true); 563 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 564 &data_len); 565 vfsopsrele = false; 566 } 567 568 done: 569 if (vfsopsrele) 570 vfs_delref(vfsops); 571 if (vp != NULL) { 572 vrele(vp); 573 } 574 if (data_buf != data) 575 kmem_free(data_buf, data_len); 576 return (error); 577 } 578 579 /* 580 * Scan all active processes to see if any of them have a current 581 * or root directory onto which the new filesystem has just been 582 * mounted. If so, replace them with the new mount point. 583 */ 584 void 585 checkdirs(struct vnode *olddp) 586 { 587 struct cwdinfo *cwdi; 588 struct vnode *newdp, *rele1, *rele2; 589 struct proc *p; 590 bool retry; 591 592 if (olddp->v_usecount == 1) 593 return; 594 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 595 panic("mount: lost mount"); 596 597 do { 598 retry = false; 599 mutex_enter(proc_lock); 600 PROCLIST_FOREACH(p, &allproc) { 601 if ((cwdi = p->p_cwdi) == NULL) 602 continue; 603 /* 604 * Can't change to the old directory any more, 605 * so even if we see a stale value it's not a 606 * problem. 607 */ 608 if (cwdi->cwdi_cdir != olddp && 609 cwdi->cwdi_rdir != olddp) 610 continue; 611 retry = true; 612 rele1 = NULL; 613 rele2 = NULL; 614 atomic_inc_uint(&cwdi->cwdi_refcnt); 615 mutex_exit(proc_lock); 616 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 617 if (cwdi->cwdi_cdir == olddp) { 618 rele1 = cwdi->cwdi_cdir; 619 vref(newdp); 620 cwdi->cwdi_cdir = newdp; 621 } 622 if (cwdi->cwdi_rdir == olddp) { 623 rele2 = cwdi->cwdi_rdir; 624 vref(newdp); 625 cwdi->cwdi_rdir = newdp; 626 } 627 rw_exit(&cwdi->cwdi_lock); 628 cwdfree(cwdi); 629 if (rele1 != NULL) 630 vrele(rele1); 631 if (rele2 != NULL) 632 vrele(rele2); 633 mutex_enter(proc_lock); 634 break; 635 } 636 mutex_exit(proc_lock); 637 } while (retry); 638 639 if (rootvnode == olddp) { 640 vrele(rootvnode); 641 vref(newdp); 642 rootvnode = newdp; 643 } 644 vput(newdp); 645 } 646 647 /* 648 * Unmount a file system. 649 * 650 * Note: unmount takes a path to the vnode mounted on as argument, 651 * not special file (as before). 652 */ 653 /* ARGSUSED */ 654 int 655 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 656 { 657 /* { 658 syscallarg(const char *) path; 659 syscallarg(int) flags; 660 } */ 661 struct vnode *vp; 662 struct mount *mp; 663 int error; 664 struct pathbuf *pb; 665 struct nameidata nd; 666 667 error = pathbuf_copyin(SCARG(uap, path), &pb); 668 if (error) { 669 return error; 670 } 671 672 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 673 if ((error = namei(&nd)) != 0) { 674 pathbuf_destroy(pb); 675 return error; 676 } 677 vp = nd.ni_vp; 678 pathbuf_destroy(pb); 679 680 mp = vp->v_mount; 681 atomic_inc_uint(&mp->mnt_refcnt); 682 VOP_UNLOCK(vp); 683 684 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 685 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 686 if (error) { 687 vrele(vp); 688 vfs_destroy(mp); 689 return (error); 690 } 691 692 /* 693 * Don't allow unmounting the root file system. 694 */ 695 if (mp->mnt_flag & MNT_ROOTFS) { 696 vrele(vp); 697 vfs_destroy(mp); 698 return (EINVAL); 699 } 700 701 /* 702 * Must be the root of the filesystem 703 */ 704 if ((vp->v_vflag & VV_ROOT) == 0) { 705 vrele(vp); 706 vfs_destroy(mp); 707 return (EINVAL); 708 } 709 710 vrele(vp); 711 error = dounmount(mp, SCARG(uap, flags), l); 712 vfs_destroy(mp); 713 return error; 714 } 715 716 /* 717 * Do the actual file system unmount. File system is assumed to have 718 * been locked by the caller. 719 * 720 * => Caller hold reference to the mount, explicitly for dounmount(). 721 */ 722 int 723 dounmount(struct mount *mp, int flags, struct lwp *l) 724 { 725 struct vnode *coveredvp; 726 int error; 727 int async; 728 int used_syncer; 729 730 #if NVERIEXEC > 0 731 error = veriexec_unmountchk(mp); 732 if (error) 733 return (error); 734 #endif /* NVERIEXEC > 0 */ 735 736 /* 737 * XXX Freeze syncer. Must do this before locking the 738 * mount point. See dounmount() for details. 739 */ 740 mutex_enter(&syncer_mutex); 741 rw_enter(&mp->mnt_unmounting, RW_WRITER); 742 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 743 rw_exit(&mp->mnt_unmounting); 744 mutex_exit(&syncer_mutex); 745 return ENOENT; 746 } 747 748 used_syncer = (mp->mnt_syncer != NULL); 749 750 /* 751 * XXX Syncer must be frozen when we get here. This should really 752 * be done on a per-mountpoint basis, but the syncer doesn't work 753 * like that. 754 * 755 * The caller of dounmount() must acquire syncer_mutex because 756 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 757 * order, and we must preserve that order to avoid deadlock. 758 * 759 * So, if the file system did not use the syncer, now is 760 * the time to release the syncer_mutex. 761 */ 762 if (used_syncer == 0) 763 mutex_exit(&syncer_mutex); 764 765 mp->mnt_iflag |= IMNT_UNMOUNT; 766 async = mp->mnt_flag & MNT_ASYNC; 767 mp->mnt_flag &= ~MNT_ASYNC; 768 cache_purgevfs(mp); /* remove cache entries for this file sys */ 769 if (mp->mnt_syncer != NULL) 770 vfs_deallocate_syncvnode(mp); 771 error = 0; 772 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 773 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 774 } 775 vfs_scrubvnlist(mp); 776 if (error == 0 || (flags & MNT_FORCE)) 777 error = VFS_UNMOUNT(mp, flags); 778 if (error) { 779 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 780 (void) vfs_allocate_syncvnode(mp); 781 mp->mnt_iflag &= ~IMNT_UNMOUNT; 782 mp->mnt_flag |= async; 783 rw_exit(&mp->mnt_unmounting); 784 if (used_syncer) 785 mutex_exit(&syncer_mutex); 786 return (error); 787 } 788 vfs_scrubvnlist(mp); 789 mutex_enter(&mountlist_lock); 790 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 791 coveredvp->v_mountedhere = NULL; 792 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 793 mp->mnt_iflag |= IMNT_GONE; 794 mutex_exit(&mountlist_lock); 795 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 796 panic("unmount: dangling vnode"); 797 if (used_syncer) 798 mutex_exit(&syncer_mutex); 799 vfs_hooks_unmount(mp); 800 rw_exit(&mp->mnt_unmounting); 801 vfs_destroy(mp); /* reference from mount() */ 802 if (coveredvp != NULLVP) 803 vrele(coveredvp); 804 return (0); 805 } 806 807 /* 808 * Sync each mounted filesystem. 809 */ 810 #ifdef DEBUG 811 int syncprt = 0; 812 struct ctldebug debug0 = { "syncprt", &syncprt }; 813 #endif 814 815 /* ARGSUSED */ 816 int 817 sys_sync(struct lwp *l, const void *v, register_t *retval) 818 { 819 struct mount *mp, *nmp; 820 int asyncflag; 821 822 if (l == NULL) 823 l = &lwp0; 824 825 mutex_enter(&mountlist_lock); 826 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 827 mp = nmp) { 828 if (vfs_busy(mp, &nmp)) { 829 continue; 830 } 831 mutex_enter(&mp->mnt_updating); 832 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 833 asyncflag = mp->mnt_flag & MNT_ASYNC; 834 mp->mnt_flag &= ~MNT_ASYNC; 835 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 836 if (asyncflag) 837 mp->mnt_flag |= MNT_ASYNC; 838 } 839 mutex_exit(&mp->mnt_updating); 840 vfs_unbusy(mp, false, &nmp); 841 } 842 mutex_exit(&mountlist_lock); 843 #ifdef DEBUG 844 if (syncprt) 845 vfs_bufstats(); 846 #endif /* DEBUG */ 847 return (0); 848 } 849 850 /* 851 * Change filesystem quotas. 852 */ 853 /* ARGSUSED */ 854 int 855 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval) 856 { 857 /* { 858 syscallarg(const char *) path; 859 syscallarg(int) cmd; 860 syscallarg(int) uid; 861 syscallarg(void *) arg; 862 } */ 863 struct mount *mp; 864 int error; 865 struct vnode *vp; 866 867 error = namei_simple_user(SCARG(uap, path), 868 NSM_FOLLOW_TRYEMULROOT, &vp); 869 if (error != 0) 870 return (error); 871 mp = vp->v_mount; 872 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 873 SCARG(uap, arg)); 874 vrele(vp); 875 return (error); 876 } 877 878 int 879 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 880 int root) 881 { 882 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 883 int error = 0; 884 885 /* 886 * If MNT_NOWAIT or MNT_LAZY is specified, do not 887 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 888 * overrides MNT_NOWAIT. 889 */ 890 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 891 (flags != MNT_WAIT && flags != 0)) { 892 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 893 goto done; 894 } 895 896 /* Get the filesystem stats now */ 897 memset(sp, 0, sizeof(*sp)); 898 if ((error = VFS_STATVFS(mp, sp)) != 0) { 899 return error; 900 } 901 902 if (cwdi->cwdi_rdir == NULL) 903 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 904 done: 905 if (cwdi->cwdi_rdir != NULL) { 906 size_t len; 907 char *bp; 908 char c; 909 char *path = PNBUF_GET(); 910 911 bp = path + MAXPATHLEN; 912 *--bp = '\0'; 913 rw_enter(&cwdi->cwdi_lock, RW_READER); 914 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 915 MAXPATHLEN / 2, 0, l); 916 rw_exit(&cwdi->cwdi_lock); 917 if (error) { 918 PNBUF_PUT(path); 919 return error; 920 } 921 len = strlen(bp); 922 if (len != 1) { 923 /* 924 * for mount points that are below our root, we can see 925 * them, so we fix up the pathname and return them. The 926 * rest we cannot see, so we don't allow viewing the 927 * data. 928 */ 929 if (strncmp(bp, sp->f_mntonname, len) == 0 && 930 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 931 (void)strlcpy(sp->f_mntonname, 932 c == '\0' ? "/" : &sp->f_mntonname[len], 933 sizeof(sp->f_mntonname)); 934 } else { 935 if (root) 936 (void)strlcpy(sp->f_mntonname, "/", 937 sizeof(sp->f_mntonname)); 938 else 939 error = EPERM; 940 } 941 } 942 PNBUF_PUT(path); 943 } 944 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 945 return error; 946 } 947 948 /* 949 * Get filesystem statistics by path. 950 */ 951 int 952 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 953 { 954 struct mount *mp; 955 int error; 956 struct vnode *vp; 957 958 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 959 if (error != 0) 960 return error; 961 mp = vp->v_mount; 962 error = dostatvfs(mp, sb, l, flags, 1); 963 vrele(vp); 964 return error; 965 } 966 967 /* ARGSUSED */ 968 int 969 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 970 { 971 /* { 972 syscallarg(const char *) path; 973 syscallarg(struct statvfs *) buf; 974 syscallarg(int) flags; 975 } */ 976 struct statvfs *sb; 977 int error; 978 979 sb = STATVFSBUF_GET(); 980 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 981 if (error == 0) 982 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 983 STATVFSBUF_PUT(sb); 984 return error; 985 } 986 987 /* 988 * Get filesystem statistics by fd. 989 */ 990 int 991 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 992 { 993 file_t *fp; 994 struct mount *mp; 995 int error; 996 997 /* fd_getvnode() will use the descriptor for us */ 998 if ((error = fd_getvnode(fd, &fp)) != 0) 999 return (error); 1000 mp = ((struct vnode *)fp->f_data)->v_mount; 1001 error = dostatvfs(mp, sb, curlwp, flags, 1); 1002 fd_putfile(fd); 1003 return error; 1004 } 1005 1006 /* ARGSUSED */ 1007 int 1008 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1009 { 1010 /* { 1011 syscallarg(int) fd; 1012 syscallarg(struct statvfs *) buf; 1013 syscallarg(int) flags; 1014 } */ 1015 struct statvfs *sb; 1016 int error; 1017 1018 sb = STATVFSBUF_GET(); 1019 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1020 if (error == 0) 1021 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1022 STATVFSBUF_PUT(sb); 1023 return error; 1024 } 1025 1026 1027 /* 1028 * Get statistics on all filesystems. 1029 */ 1030 int 1031 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1032 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1033 register_t *retval) 1034 { 1035 int root = 0; 1036 struct proc *p = l->l_proc; 1037 struct mount *mp, *nmp; 1038 struct statvfs *sb; 1039 size_t count, maxcount; 1040 int error = 0; 1041 1042 sb = STATVFSBUF_GET(); 1043 maxcount = bufsize / entry_sz; 1044 mutex_enter(&mountlist_lock); 1045 count = 0; 1046 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1047 mp = nmp) { 1048 if (vfs_busy(mp, &nmp)) { 1049 continue; 1050 } 1051 if (sfsp && count < maxcount) { 1052 error = dostatvfs(mp, sb, l, flags, 0); 1053 if (error) { 1054 vfs_unbusy(mp, false, &nmp); 1055 error = 0; 1056 continue; 1057 } 1058 error = copyfn(sb, sfsp, entry_sz); 1059 if (error) { 1060 vfs_unbusy(mp, false, NULL); 1061 goto out; 1062 } 1063 sfsp = (char *)sfsp + entry_sz; 1064 root |= strcmp(sb->f_mntonname, "/") == 0; 1065 } 1066 count++; 1067 vfs_unbusy(mp, false, &nmp); 1068 } 1069 mutex_exit(&mountlist_lock); 1070 1071 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1072 /* 1073 * fake a root entry 1074 */ 1075 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1076 sb, l, flags, 1); 1077 if (error != 0) 1078 goto out; 1079 if (sfsp) { 1080 error = copyfn(sb, sfsp, entry_sz); 1081 if (error != 0) 1082 goto out; 1083 } 1084 count++; 1085 } 1086 if (sfsp && count > maxcount) 1087 *retval = maxcount; 1088 else 1089 *retval = count; 1090 out: 1091 STATVFSBUF_PUT(sb); 1092 return error; 1093 } 1094 1095 int 1096 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1097 { 1098 /* { 1099 syscallarg(struct statvfs *) buf; 1100 syscallarg(size_t) bufsize; 1101 syscallarg(int) flags; 1102 } */ 1103 1104 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1105 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1106 } 1107 1108 /* 1109 * Change current working directory to a given file descriptor. 1110 */ 1111 /* ARGSUSED */ 1112 int 1113 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1114 { 1115 /* { 1116 syscallarg(int) fd; 1117 } */ 1118 struct proc *p = l->l_proc; 1119 struct cwdinfo *cwdi; 1120 struct vnode *vp, *tdp; 1121 struct mount *mp; 1122 file_t *fp; 1123 int error, fd; 1124 1125 /* fd_getvnode() will use the descriptor for us */ 1126 fd = SCARG(uap, fd); 1127 if ((error = fd_getvnode(fd, &fp)) != 0) 1128 return (error); 1129 vp = fp->f_data; 1130 1131 vref(vp); 1132 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1133 if (vp->v_type != VDIR) 1134 error = ENOTDIR; 1135 else 1136 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1137 if (error) { 1138 vput(vp); 1139 goto out; 1140 } 1141 while ((mp = vp->v_mountedhere) != NULL) { 1142 error = vfs_busy(mp, NULL); 1143 vput(vp); 1144 if (error != 0) 1145 goto out; 1146 error = VFS_ROOT(mp, &tdp); 1147 vfs_unbusy(mp, false, NULL); 1148 if (error) 1149 goto out; 1150 vp = tdp; 1151 } 1152 VOP_UNLOCK(vp); 1153 1154 /* 1155 * Disallow changing to a directory not under the process's 1156 * current root directory (if there is one). 1157 */ 1158 cwdi = p->p_cwdi; 1159 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1160 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1161 vrele(vp); 1162 error = EPERM; /* operation not permitted */ 1163 } else { 1164 vrele(cwdi->cwdi_cdir); 1165 cwdi->cwdi_cdir = vp; 1166 } 1167 rw_exit(&cwdi->cwdi_lock); 1168 1169 out: 1170 fd_putfile(fd); 1171 return (error); 1172 } 1173 1174 /* 1175 * Change this process's notion of the root directory to a given file 1176 * descriptor. 1177 */ 1178 int 1179 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1180 { 1181 struct proc *p = l->l_proc; 1182 struct vnode *vp; 1183 file_t *fp; 1184 int error, fd = SCARG(uap, fd); 1185 1186 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1187 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1188 return error; 1189 /* fd_getvnode() will use the descriptor for us */ 1190 if ((error = fd_getvnode(fd, &fp)) != 0) 1191 return error; 1192 vp = fp->f_data; 1193 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1194 if (vp->v_type != VDIR) 1195 error = ENOTDIR; 1196 else 1197 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1198 VOP_UNLOCK(vp); 1199 if (error) 1200 goto out; 1201 vref(vp); 1202 1203 change_root(p->p_cwdi, vp, l); 1204 1205 out: 1206 fd_putfile(fd); 1207 return (error); 1208 } 1209 1210 /* 1211 * Change current working directory (``.''). 1212 */ 1213 /* ARGSUSED */ 1214 int 1215 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1216 { 1217 /* { 1218 syscallarg(const char *) path; 1219 } */ 1220 struct proc *p = l->l_proc; 1221 struct cwdinfo *cwdi; 1222 int error; 1223 struct vnode *vp; 1224 1225 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1226 &vp, l)) != 0) 1227 return (error); 1228 cwdi = p->p_cwdi; 1229 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1230 vrele(cwdi->cwdi_cdir); 1231 cwdi->cwdi_cdir = vp; 1232 rw_exit(&cwdi->cwdi_lock); 1233 return (0); 1234 } 1235 1236 /* 1237 * Change notion of root (``/'') directory. 1238 */ 1239 /* ARGSUSED */ 1240 int 1241 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1242 { 1243 /* { 1244 syscallarg(const char *) path; 1245 } */ 1246 struct proc *p = l->l_proc; 1247 int error; 1248 struct vnode *vp; 1249 1250 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1251 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1252 return (error); 1253 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1254 &vp, l)) != 0) 1255 return (error); 1256 1257 change_root(p->p_cwdi, vp, l); 1258 1259 return (0); 1260 } 1261 1262 /* 1263 * Common routine for chroot and fchroot. 1264 * NB: callers need to properly authorize the change root operation. 1265 */ 1266 void 1267 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1268 { 1269 1270 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1271 if (cwdi->cwdi_rdir != NULL) 1272 vrele(cwdi->cwdi_rdir); 1273 cwdi->cwdi_rdir = vp; 1274 1275 /* 1276 * Prevent escaping from chroot by putting the root under 1277 * the working directory. Silently chdir to / if we aren't 1278 * already there. 1279 */ 1280 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1281 /* 1282 * XXX would be more failsafe to change directory to a 1283 * deadfs node here instead 1284 */ 1285 vrele(cwdi->cwdi_cdir); 1286 vref(vp); 1287 cwdi->cwdi_cdir = vp; 1288 } 1289 rw_exit(&cwdi->cwdi_lock); 1290 } 1291 1292 /* 1293 * Common routine for chroot and chdir. 1294 * XXX "where" should be enum uio_seg 1295 */ 1296 int 1297 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1298 { 1299 struct pathbuf *pb; 1300 struct nameidata nd; 1301 int error; 1302 1303 error = pathbuf_maybe_copyin(path, where, &pb); 1304 if (error) { 1305 return error; 1306 } 1307 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1308 if ((error = namei(&nd)) != 0) { 1309 pathbuf_destroy(pb); 1310 return error; 1311 } 1312 *vpp = nd.ni_vp; 1313 pathbuf_destroy(pb); 1314 1315 if ((*vpp)->v_type != VDIR) 1316 error = ENOTDIR; 1317 else 1318 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1319 1320 if (error) 1321 vput(*vpp); 1322 else 1323 VOP_UNLOCK(*vpp); 1324 return (error); 1325 } 1326 1327 /* 1328 * Check permissions, allocate an open file structure, 1329 * and call the device open routine if any. 1330 */ 1331 int 1332 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1333 { 1334 /* { 1335 syscallarg(const char *) path; 1336 syscallarg(int) flags; 1337 syscallarg(int) mode; 1338 } */ 1339 struct proc *p = l->l_proc; 1340 struct cwdinfo *cwdi = p->p_cwdi; 1341 file_t *fp; 1342 struct vnode *vp; 1343 int flags, cmode; 1344 int type, indx, error; 1345 struct flock lf; 1346 struct pathbuf *pb; 1347 struct nameidata nd; 1348 1349 flags = FFLAGS(SCARG(uap, flags)); 1350 if ((flags & (FREAD | FWRITE)) == 0) 1351 return (EINVAL); 1352 1353 error = pathbuf_copyin(SCARG(uap, path), &pb); 1354 if (error) { 1355 return error; 1356 } 1357 1358 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1359 pathbuf_destroy(pb); 1360 return error; 1361 } 1362 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1363 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1364 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1365 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1366 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1367 fd_abort(p, fp, indx); 1368 if ((error == EDUPFD || error == EMOVEFD) && 1369 l->l_dupfd >= 0 && /* XXX from fdopen */ 1370 (error = 1371 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1372 *retval = indx; 1373 pathbuf_destroy(pb); 1374 return (0); 1375 } 1376 if (error == ERESTART) 1377 error = EINTR; 1378 pathbuf_destroy(pb); 1379 return (error); 1380 } 1381 1382 l->l_dupfd = 0; 1383 vp = nd.ni_vp; 1384 pathbuf_destroy(pb); 1385 1386 fp->f_flag = flags & FMASK; 1387 fp->f_type = DTYPE_VNODE; 1388 fp->f_ops = &vnops; 1389 fp->f_data = vp; 1390 if (flags & (O_EXLOCK | O_SHLOCK)) { 1391 lf.l_whence = SEEK_SET; 1392 lf.l_start = 0; 1393 lf.l_len = 0; 1394 if (flags & O_EXLOCK) 1395 lf.l_type = F_WRLCK; 1396 else 1397 lf.l_type = F_RDLCK; 1398 type = F_FLOCK; 1399 if ((flags & FNONBLOCK) == 0) 1400 type |= F_WAIT; 1401 VOP_UNLOCK(vp); 1402 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1403 if (error) { 1404 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1405 fd_abort(p, fp, indx); 1406 return (error); 1407 } 1408 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1409 atomic_or_uint(&fp->f_flag, FHASLOCK); 1410 } 1411 VOP_UNLOCK(vp); 1412 *retval = indx; 1413 fd_affix(p, fp, indx); 1414 return (0); 1415 } 1416 1417 static void 1418 vfs__fhfree(fhandle_t *fhp) 1419 { 1420 size_t fhsize; 1421 1422 if (fhp == NULL) { 1423 return; 1424 } 1425 fhsize = FHANDLE_SIZE(fhp); 1426 kmem_free(fhp, fhsize); 1427 } 1428 1429 /* 1430 * vfs_composefh: compose a filehandle. 1431 */ 1432 1433 int 1434 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1435 { 1436 struct mount *mp; 1437 struct fid *fidp; 1438 int error; 1439 size_t needfhsize; 1440 size_t fidsize; 1441 1442 mp = vp->v_mount; 1443 fidp = NULL; 1444 if (*fh_size < FHANDLE_SIZE_MIN) { 1445 fidsize = 0; 1446 } else { 1447 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1448 if (fhp != NULL) { 1449 memset(fhp, 0, *fh_size); 1450 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1451 fidp = &fhp->fh_fid; 1452 } 1453 } 1454 error = VFS_VPTOFH(vp, fidp, &fidsize); 1455 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1456 if (error == 0 && *fh_size < needfhsize) { 1457 error = E2BIG; 1458 } 1459 *fh_size = needfhsize; 1460 return error; 1461 } 1462 1463 int 1464 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1465 { 1466 struct mount *mp; 1467 fhandle_t *fhp; 1468 size_t fhsize; 1469 size_t fidsize; 1470 int error; 1471 1472 *fhpp = NULL; 1473 mp = vp->v_mount; 1474 fidsize = 0; 1475 error = VFS_VPTOFH(vp, NULL, &fidsize); 1476 KASSERT(error != 0); 1477 if (error != E2BIG) { 1478 goto out; 1479 } 1480 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1481 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1482 if (fhp == NULL) { 1483 error = ENOMEM; 1484 goto out; 1485 } 1486 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1487 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1488 if (error == 0) { 1489 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1490 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1491 *fhpp = fhp; 1492 } else { 1493 kmem_free(fhp, fhsize); 1494 } 1495 out: 1496 return error; 1497 } 1498 1499 void 1500 vfs_composefh_free(fhandle_t *fhp) 1501 { 1502 1503 vfs__fhfree(fhp); 1504 } 1505 1506 /* 1507 * vfs_fhtovp: lookup a vnode by a filehandle. 1508 */ 1509 1510 int 1511 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1512 { 1513 struct mount *mp; 1514 int error; 1515 1516 *vpp = NULL; 1517 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1518 if (mp == NULL) { 1519 error = ESTALE; 1520 goto out; 1521 } 1522 if (mp->mnt_op->vfs_fhtovp == NULL) { 1523 error = EOPNOTSUPP; 1524 goto out; 1525 } 1526 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1527 out: 1528 return error; 1529 } 1530 1531 /* 1532 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1533 * the needed size. 1534 */ 1535 1536 int 1537 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1538 { 1539 fhandle_t *fhp; 1540 int error; 1541 1542 *fhpp = NULL; 1543 if (fhsize > FHANDLE_SIZE_MAX) { 1544 return EINVAL; 1545 } 1546 if (fhsize < FHANDLE_SIZE_MIN) { 1547 return EINVAL; 1548 } 1549 again: 1550 fhp = kmem_alloc(fhsize, KM_SLEEP); 1551 if (fhp == NULL) { 1552 return ENOMEM; 1553 } 1554 error = copyin(ufhp, fhp, fhsize); 1555 if (error == 0) { 1556 /* XXX this check shouldn't be here */ 1557 if (FHANDLE_SIZE(fhp) == fhsize) { 1558 *fhpp = fhp; 1559 return 0; 1560 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1561 /* 1562 * a kludge for nfsv2 padded handles. 1563 */ 1564 size_t sz; 1565 1566 sz = FHANDLE_SIZE(fhp); 1567 kmem_free(fhp, fhsize); 1568 fhsize = sz; 1569 goto again; 1570 } else { 1571 /* 1572 * userland told us wrong size. 1573 */ 1574 error = EINVAL; 1575 } 1576 } 1577 kmem_free(fhp, fhsize); 1578 return error; 1579 } 1580 1581 void 1582 vfs_copyinfh_free(fhandle_t *fhp) 1583 { 1584 1585 vfs__fhfree(fhp); 1586 } 1587 1588 /* 1589 * Get file handle system call 1590 */ 1591 int 1592 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1593 { 1594 /* { 1595 syscallarg(char *) fname; 1596 syscallarg(fhandle_t *) fhp; 1597 syscallarg(size_t *) fh_size; 1598 } */ 1599 struct vnode *vp; 1600 fhandle_t *fh; 1601 int error; 1602 struct pathbuf *pb; 1603 struct nameidata nd; 1604 size_t sz; 1605 size_t usz; 1606 1607 /* 1608 * Must be super user 1609 */ 1610 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1611 0, NULL, NULL, NULL); 1612 if (error) 1613 return (error); 1614 1615 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1616 if (error) { 1617 return error; 1618 } 1619 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1620 error = namei(&nd); 1621 if (error) { 1622 pathbuf_destroy(pb); 1623 return error; 1624 } 1625 vp = nd.ni_vp; 1626 pathbuf_destroy(pb); 1627 1628 error = vfs_composefh_alloc(vp, &fh); 1629 vput(vp); 1630 if (error != 0) { 1631 goto out; 1632 } 1633 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1634 if (error != 0) { 1635 goto out; 1636 } 1637 sz = FHANDLE_SIZE(fh); 1638 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1639 if (error != 0) { 1640 goto out; 1641 } 1642 if (usz >= sz) { 1643 error = copyout(fh, SCARG(uap, fhp), sz); 1644 } else { 1645 error = E2BIG; 1646 } 1647 out: 1648 vfs_composefh_free(fh); 1649 return (error); 1650 } 1651 1652 /* 1653 * Open a file given a file handle. 1654 * 1655 * Check permissions, allocate an open file structure, 1656 * and call the device open routine if any. 1657 */ 1658 1659 int 1660 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1661 register_t *retval) 1662 { 1663 file_t *fp; 1664 struct vnode *vp = NULL; 1665 kauth_cred_t cred = l->l_cred; 1666 file_t *nfp; 1667 int type, indx, error=0; 1668 struct flock lf; 1669 struct vattr va; 1670 fhandle_t *fh; 1671 int flags; 1672 proc_t *p; 1673 1674 p = curproc; 1675 1676 /* 1677 * Must be super user 1678 */ 1679 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1680 0, NULL, NULL, NULL))) 1681 return (error); 1682 1683 flags = FFLAGS(oflags); 1684 if ((flags & (FREAD | FWRITE)) == 0) 1685 return (EINVAL); 1686 if ((flags & O_CREAT)) 1687 return (EINVAL); 1688 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1689 return (error); 1690 fp = nfp; 1691 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1692 if (error != 0) { 1693 goto bad; 1694 } 1695 error = vfs_fhtovp(fh, &vp); 1696 if (error != 0) { 1697 goto bad; 1698 } 1699 1700 /* Now do an effective vn_open */ 1701 1702 if (vp->v_type == VSOCK) { 1703 error = EOPNOTSUPP; 1704 goto bad; 1705 } 1706 error = vn_openchk(vp, cred, flags); 1707 if (error != 0) 1708 goto bad; 1709 if (flags & O_TRUNC) { 1710 VOP_UNLOCK(vp); /* XXX */ 1711 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1712 vattr_null(&va); 1713 va.va_size = 0; 1714 error = VOP_SETATTR(vp, &va, cred); 1715 if (error) 1716 goto bad; 1717 } 1718 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1719 goto bad; 1720 if (flags & FWRITE) { 1721 mutex_enter(&vp->v_interlock); 1722 vp->v_writecount++; 1723 mutex_exit(&vp->v_interlock); 1724 } 1725 1726 /* done with modified vn_open, now finish what sys_open does. */ 1727 1728 fp->f_flag = flags & FMASK; 1729 fp->f_type = DTYPE_VNODE; 1730 fp->f_ops = &vnops; 1731 fp->f_data = vp; 1732 if (flags & (O_EXLOCK | O_SHLOCK)) { 1733 lf.l_whence = SEEK_SET; 1734 lf.l_start = 0; 1735 lf.l_len = 0; 1736 if (flags & O_EXLOCK) 1737 lf.l_type = F_WRLCK; 1738 else 1739 lf.l_type = F_RDLCK; 1740 type = F_FLOCK; 1741 if ((flags & FNONBLOCK) == 0) 1742 type |= F_WAIT; 1743 VOP_UNLOCK(vp); 1744 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 1745 if (error) { 1746 (void) vn_close(vp, fp->f_flag, fp->f_cred); 1747 fd_abort(p, fp, indx); 1748 return (error); 1749 } 1750 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1751 atomic_or_uint(&fp->f_flag, FHASLOCK); 1752 } 1753 VOP_UNLOCK(vp); 1754 *retval = indx; 1755 fd_affix(p, fp, indx); 1756 vfs_copyinfh_free(fh); 1757 return (0); 1758 1759 bad: 1760 fd_abort(p, fp, indx); 1761 if (vp != NULL) 1762 vput(vp); 1763 vfs_copyinfh_free(fh); 1764 return (error); 1765 } 1766 1767 int 1768 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1769 { 1770 /* { 1771 syscallarg(const void *) fhp; 1772 syscallarg(size_t) fh_size; 1773 syscallarg(int) flags; 1774 } */ 1775 1776 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1777 SCARG(uap, flags), retval); 1778 } 1779 1780 int 1781 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1782 { 1783 int error; 1784 fhandle_t *fh; 1785 struct vnode *vp; 1786 1787 /* 1788 * Must be super user 1789 */ 1790 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1791 0, NULL, NULL, NULL))) 1792 return (error); 1793 1794 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1795 if (error != 0) 1796 return error; 1797 1798 error = vfs_fhtovp(fh, &vp); 1799 vfs_copyinfh_free(fh); 1800 if (error != 0) 1801 return error; 1802 1803 error = vn_stat(vp, sb); 1804 vput(vp); 1805 return error; 1806 } 1807 1808 1809 /* ARGSUSED */ 1810 int 1811 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 1812 { 1813 /* { 1814 syscallarg(const void *) fhp; 1815 syscallarg(size_t) fh_size; 1816 syscallarg(struct stat *) sb; 1817 } */ 1818 struct stat sb; 1819 int error; 1820 1821 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1822 if (error) 1823 return error; 1824 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1825 } 1826 1827 int 1828 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1829 int flags) 1830 { 1831 fhandle_t *fh; 1832 struct mount *mp; 1833 struct vnode *vp; 1834 int error; 1835 1836 /* 1837 * Must be super user 1838 */ 1839 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1840 0, NULL, NULL, NULL))) 1841 return error; 1842 1843 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1844 if (error != 0) 1845 return error; 1846 1847 error = vfs_fhtovp(fh, &vp); 1848 vfs_copyinfh_free(fh); 1849 if (error != 0) 1850 return error; 1851 1852 mp = vp->v_mount; 1853 error = dostatvfs(mp, sb, l, flags, 1); 1854 vput(vp); 1855 return error; 1856 } 1857 1858 /* ARGSUSED */ 1859 int 1860 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1861 { 1862 /* { 1863 syscallarg(const void *) fhp; 1864 syscallarg(size_t) fh_size; 1865 syscallarg(struct statvfs *) buf; 1866 syscallarg(int) flags; 1867 } */ 1868 struct statvfs *sb = STATVFSBUF_GET(); 1869 int error; 1870 1871 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1872 SCARG(uap, flags)); 1873 if (error == 0) 1874 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1875 STATVFSBUF_PUT(sb); 1876 return error; 1877 } 1878 1879 /* 1880 * Create a special file. 1881 */ 1882 /* ARGSUSED */ 1883 int 1884 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 1885 register_t *retval) 1886 { 1887 /* { 1888 syscallarg(const char *) path; 1889 syscallarg(mode_t) mode; 1890 syscallarg(dev_t) dev; 1891 } */ 1892 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 1893 SCARG(uap, dev), retval, UIO_USERSPACE); 1894 } 1895 1896 int 1897 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 1898 register_t *retval, enum uio_seg seg) 1899 { 1900 struct proc *p = l->l_proc; 1901 struct vnode *vp; 1902 struct vattr vattr; 1903 int error, optype; 1904 struct pathbuf *pb; 1905 struct nameidata nd; 1906 const char *pathstring; 1907 1908 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1909 0, NULL, NULL, NULL)) != 0) 1910 return (error); 1911 1912 optype = VOP_MKNOD_DESCOFFSET; 1913 1914 error = pathbuf_maybe_copyin(pathname, seg, &pb); 1915 if (error) { 1916 return error; 1917 } 1918 pathstring = pathbuf_stringcopy_get(pb); 1919 if (pathstring == NULL) { 1920 pathbuf_destroy(pb); 1921 return ENOMEM; 1922 } 1923 1924 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 1925 if ((error = namei(&nd)) != 0) 1926 goto out; 1927 vp = nd.ni_vp; 1928 1929 if (vp != NULL) 1930 error = EEXIST; 1931 else { 1932 vattr_null(&vattr); 1933 /* We will read cwdi->cwdi_cmask unlocked. */ 1934 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1935 vattr.va_rdev = dev; 1936 1937 switch (mode & S_IFMT) { 1938 case S_IFMT: /* used by badsect to flag bad sectors */ 1939 vattr.va_type = VBAD; 1940 break; 1941 case S_IFCHR: 1942 vattr.va_type = VCHR; 1943 break; 1944 case S_IFBLK: 1945 vattr.va_type = VBLK; 1946 break; 1947 case S_IFWHT: 1948 optype = VOP_WHITEOUT_DESCOFFSET; 1949 break; 1950 case S_IFREG: 1951 #if NVERIEXEC > 0 1952 error = veriexec_openchk(l, nd.ni_vp, pathstring, 1953 O_CREAT); 1954 #endif /* NVERIEXEC > 0 */ 1955 vattr.va_type = VREG; 1956 vattr.va_rdev = VNOVAL; 1957 optype = VOP_CREATE_DESCOFFSET; 1958 break; 1959 default: 1960 error = EINVAL; 1961 break; 1962 } 1963 } 1964 if (!error) { 1965 switch (optype) { 1966 case VOP_WHITEOUT_DESCOFFSET: 1967 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1968 if (error) 1969 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1970 vput(nd.ni_dvp); 1971 break; 1972 1973 case VOP_MKNOD_DESCOFFSET: 1974 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1975 &nd.ni_cnd, &vattr); 1976 if (error == 0) 1977 vput(nd.ni_vp); 1978 break; 1979 1980 case VOP_CREATE_DESCOFFSET: 1981 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1982 &nd.ni_cnd, &vattr); 1983 if (error == 0) 1984 vput(nd.ni_vp); 1985 break; 1986 } 1987 } else { 1988 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1989 if (nd.ni_dvp == vp) 1990 vrele(nd.ni_dvp); 1991 else 1992 vput(nd.ni_dvp); 1993 if (vp) 1994 vrele(vp); 1995 } 1996 out: 1997 pathbuf_stringcopy_put(pb, pathstring); 1998 pathbuf_destroy(pb); 1999 return (error); 2000 } 2001 2002 /* 2003 * Create a named pipe. 2004 */ 2005 /* ARGSUSED */ 2006 int 2007 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2008 { 2009 /* { 2010 syscallarg(const char *) path; 2011 syscallarg(int) mode; 2012 } */ 2013 struct proc *p = l->l_proc; 2014 struct vattr vattr; 2015 int error; 2016 struct pathbuf *pb; 2017 struct nameidata nd; 2018 2019 error = pathbuf_copyin(SCARG(uap, path), &pb); 2020 if (error) { 2021 return error; 2022 } 2023 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2024 if ((error = namei(&nd)) != 0) { 2025 pathbuf_destroy(pb); 2026 return error; 2027 } 2028 if (nd.ni_vp != NULL) { 2029 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2030 if (nd.ni_dvp == nd.ni_vp) 2031 vrele(nd.ni_dvp); 2032 else 2033 vput(nd.ni_dvp); 2034 vrele(nd.ni_vp); 2035 pathbuf_destroy(pb); 2036 return (EEXIST); 2037 } 2038 vattr_null(&vattr); 2039 vattr.va_type = VFIFO; 2040 /* We will read cwdi->cwdi_cmask unlocked. */ 2041 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2042 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2043 if (error == 0) 2044 vput(nd.ni_vp); 2045 pathbuf_destroy(pb); 2046 return (error); 2047 } 2048 2049 /* 2050 * Make a hard file link. 2051 */ 2052 /* ARGSUSED */ 2053 int 2054 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2055 { 2056 /* { 2057 syscallarg(const char *) path; 2058 syscallarg(const char *) link; 2059 } */ 2060 struct vnode *vp; 2061 struct pathbuf *linkpb; 2062 struct nameidata nd; 2063 int error; 2064 2065 error = namei_simple_user(SCARG(uap, path), 2066 NSM_FOLLOW_TRYEMULROOT, &vp); 2067 if (error != 0) 2068 return (error); 2069 error = pathbuf_copyin(SCARG(uap, link), &linkpb); 2070 if (error) { 2071 goto out1; 2072 } 2073 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2074 if ((error = namei(&nd)) != 0) 2075 goto out2; 2076 if (nd.ni_vp) { 2077 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2078 if (nd.ni_dvp == nd.ni_vp) 2079 vrele(nd.ni_dvp); 2080 else 2081 vput(nd.ni_dvp); 2082 vrele(nd.ni_vp); 2083 error = EEXIST; 2084 goto out2; 2085 } 2086 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2087 out2: 2088 pathbuf_destroy(linkpb); 2089 out1: 2090 vrele(vp); 2091 return (error); 2092 } 2093 2094 int 2095 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2096 { 2097 struct proc *p = curproc; 2098 struct vattr vattr; 2099 char *path; 2100 int error; 2101 struct pathbuf *linkpb; 2102 struct nameidata nd; 2103 2104 path = PNBUF_GET(); 2105 if (seg == UIO_USERSPACE) { 2106 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2107 goto out1; 2108 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2109 goto out1; 2110 } else { 2111 KASSERT(strlen(patharg) < MAXPATHLEN); 2112 strcpy(path, patharg); 2113 linkpb = pathbuf_create(link); 2114 if (linkpb == NULL) { 2115 error = ENOMEM; 2116 goto out1; 2117 } 2118 } 2119 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2120 if ((error = namei(&nd)) != 0) 2121 goto out2; 2122 if (nd.ni_vp) { 2123 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2124 if (nd.ni_dvp == nd.ni_vp) 2125 vrele(nd.ni_dvp); 2126 else 2127 vput(nd.ni_dvp); 2128 vrele(nd.ni_vp); 2129 error = EEXIST; 2130 goto out2; 2131 } 2132 vattr_null(&vattr); 2133 vattr.va_type = VLNK; 2134 /* We will read cwdi->cwdi_cmask unlocked. */ 2135 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2136 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2137 if (error == 0) 2138 vput(nd.ni_vp); 2139 out2: 2140 pathbuf_destroy(linkpb); 2141 out1: 2142 PNBUF_PUT(path); 2143 return (error); 2144 } 2145 2146 /* 2147 * Make a symbolic link. 2148 */ 2149 /* ARGSUSED */ 2150 int 2151 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2152 { 2153 /* { 2154 syscallarg(const char *) path; 2155 syscallarg(const char *) link; 2156 } */ 2157 2158 return do_sys_symlink(SCARG(uap, path), SCARG(uap, link), 2159 UIO_USERSPACE); 2160 } 2161 2162 /* 2163 * Delete a whiteout from the filesystem. 2164 */ 2165 /* ARGSUSED */ 2166 int 2167 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2168 { 2169 /* { 2170 syscallarg(const char *) path; 2171 } */ 2172 int error; 2173 struct pathbuf *pb; 2174 struct nameidata nd; 2175 2176 error = pathbuf_copyin(SCARG(uap, path), &pb); 2177 if (error) { 2178 return error; 2179 } 2180 2181 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2182 error = namei(&nd); 2183 if (error) { 2184 pathbuf_destroy(pb); 2185 return (error); 2186 } 2187 2188 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2189 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2190 if (nd.ni_dvp == nd.ni_vp) 2191 vrele(nd.ni_dvp); 2192 else 2193 vput(nd.ni_dvp); 2194 if (nd.ni_vp) 2195 vrele(nd.ni_vp); 2196 pathbuf_destroy(pb); 2197 return (EEXIST); 2198 } 2199 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2200 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2201 vput(nd.ni_dvp); 2202 pathbuf_destroy(pb); 2203 return (error); 2204 } 2205 2206 /* 2207 * Delete a name from the filesystem. 2208 */ 2209 /* ARGSUSED */ 2210 int 2211 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2212 { 2213 /* { 2214 syscallarg(const char *) path; 2215 } */ 2216 2217 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2218 } 2219 2220 int 2221 do_sys_unlink(const char *arg, enum uio_seg seg) 2222 { 2223 struct vnode *vp; 2224 int error; 2225 struct pathbuf *pb; 2226 struct nameidata nd; 2227 const char *pathstring; 2228 2229 error = pathbuf_maybe_copyin(arg, seg, &pb); 2230 if (error) { 2231 return error; 2232 } 2233 pathstring = pathbuf_stringcopy_get(pb); 2234 if (pathstring == NULL) { 2235 pathbuf_destroy(pb); 2236 return ENOMEM; 2237 } 2238 2239 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2240 if ((error = namei(&nd)) != 0) 2241 goto out; 2242 vp = nd.ni_vp; 2243 2244 /* 2245 * The root of a mounted filesystem cannot be deleted. 2246 */ 2247 if (vp->v_vflag & VV_ROOT) { 2248 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2249 if (nd.ni_dvp == vp) 2250 vrele(nd.ni_dvp); 2251 else 2252 vput(nd.ni_dvp); 2253 vput(vp); 2254 error = EBUSY; 2255 goto out; 2256 } 2257 2258 #if NVERIEXEC > 0 2259 /* Handle remove requests for veriexec entries. */ 2260 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2261 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2262 if (nd.ni_dvp == vp) 2263 vrele(nd.ni_dvp); 2264 else 2265 vput(nd.ni_dvp); 2266 vput(vp); 2267 goto out; 2268 } 2269 #endif /* NVERIEXEC > 0 */ 2270 2271 #ifdef FILEASSOC 2272 (void)fileassoc_file_delete(vp); 2273 #endif /* FILEASSOC */ 2274 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2275 out: 2276 pathbuf_stringcopy_put(pb, pathstring); 2277 pathbuf_destroy(pb); 2278 return (error); 2279 } 2280 2281 /* 2282 * Reposition read/write file offset. 2283 */ 2284 int 2285 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2286 { 2287 /* { 2288 syscallarg(int) fd; 2289 syscallarg(int) pad; 2290 syscallarg(off_t) offset; 2291 syscallarg(int) whence; 2292 } */ 2293 kauth_cred_t cred = l->l_cred; 2294 file_t *fp; 2295 struct vnode *vp; 2296 struct vattr vattr; 2297 off_t newoff; 2298 int error, fd; 2299 2300 fd = SCARG(uap, fd); 2301 2302 if ((fp = fd_getfile(fd)) == NULL) 2303 return (EBADF); 2304 2305 vp = fp->f_data; 2306 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2307 error = ESPIPE; 2308 goto out; 2309 } 2310 2311 switch (SCARG(uap, whence)) { 2312 case SEEK_CUR: 2313 newoff = fp->f_offset + SCARG(uap, offset); 2314 break; 2315 case SEEK_END: 2316 error = VOP_GETATTR(vp, &vattr, cred); 2317 if (error) { 2318 goto out; 2319 } 2320 newoff = SCARG(uap, offset) + vattr.va_size; 2321 break; 2322 case SEEK_SET: 2323 newoff = SCARG(uap, offset); 2324 break; 2325 default: 2326 error = EINVAL; 2327 goto out; 2328 } 2329 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2330 *(off_t *)retval = fp->f_offset = newoff; 2331 } 2332 out: 2333 fd_putfile(fd); 2334 return (error); 2335 } 2336 2337 /* 2338 * Positional read system call. 2339 */ 2340 int 2341 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2342 { 2343 /* { 2344 syscallarg(int) fd; 2345 syscallarg(void *) buf; 2346 syscallarg(size_t) nbyte; 2347 syscallarg(off_t) offset; 2348 } */ 2349 file_t *fp; 2350 struct vnode *vp; 2351 off_t offset; 2352 int error, fd = SCARG(uap, fd); 2353 2354 if ((fp = fd_getfile(fd)) == NULL) 2355 return (EBADF); 2356 2357 if ((fp->f_flag & FREAD) == 0) { 2358 fd_putfile(fd); 2359 return (EBADF); 2360 } 2361 2362 vp = fp->f_data; 2363 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2364 error = ESPIPE; 2365 goto out; 2366 } 2367 2368 offset = SCARG(uap, offset); 2369 2370 /* 2371 * XXX This works because no file systems actually 2372 * XXX take any action on the seek operation. 2373 */ 2374 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2375 goto out; 2376 2377 /* dofileread() will unuse the descriptor for us */ 2378 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2379 &offset, 0, retval)); 2380 2381 out: 2382 fd_putfile(fd); 2383 return (error); 2384 } 2385 2386 /* 2387 * Positional scatter read system call. 2388 */ 2389 int 2390 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2391 { 2392 /* { 2393 syscallarg(int) fd; 2394 syscallarg(const struct iovec *) iovp; 2395 syscallarg(int) iovcnt; 2396 syscallarg(off_t) offset; 2397 } */ 2398 off_t offset = SCARG(uap, offset); 2399 2400 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2401 SCARG(uap, iovcnt), &offset, 0, retval); 2402 } 2403 2404 /* 2405 * Positional write system call. 2406 */ 2407 int 2408 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2409 { 2410 /* { 2411 syscallarg(int) fd; 2412 syscallarg(const void *) buf; 2413 syscallarg(size_t) nbyte; 2414 syscallarg(off_t) offset; 2415 } */ 2416 file_t *fp; 2417 struct vnode *vp; 2418 off_t offset; 2419 int error, fd = SCARG(uap, fd); 2420 2421 if ((fp = fd_getfile(fd)) == NULL) 2422 return (EBADF); 2423 2424 if ((fp->f_flag & FWRITE) == 0) { 2425 fd_putfile(fd); 2426 return (EBADF); 2427 } 2428 2429 vp = fp->f_data; 2430 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2431 error = ESPIPE; 2432 goto out; 2433 } 2434 2435 offset = SCARG(uap, offset); 2436 2437 /* 2438 * XXX This works because no file systems actually 2439 * XXX take any action on the seek operation. 2440 */ 2441 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2442 goto out; 2443 2444 /* dofilewrite() will unuse the descriptor for us */ 2445 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2446 &offset, 0, retval)); 2447 2448 out: 2449 fd_putfile(fd); 2450 return (error); 2451 } 2452 2453 /* 2454 * Positional gather write system call. 2455 */ 2456 int 2457 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2458 { 2459 /* { 2460 syscallarg(int) fd; 2461 syscallarg(const struct iovec *) iovp; 2462 syscallarg(int) iovcnt; 2463 syscallarg(off_t) offset; 2464 } */ 2465 off_t offset = SCARG(uap, offset); 2466 2467 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2468 SCARG(uap, iovcnt), &offset, 0, retval); 2469 } 2470 2471 /* 2472 * Check access permissions. 2473 */ 2474 int 2475 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2476 { 2477 /* { 2478 syscallarg(const char *) path; 2479 syscallarg(int) flags; 2480 } */ 2481 kauth_cred_t cred; 2482 struct vnode *vp; 2483 int error, flags; 2484 struct pathbuf *pb; 2485 struct nameidata nd; 2486 2487 error = pathbuf_copyin(SCARG(uap, path), &pb); 2488 if (error) { 2489 return error; 2490 } 2491 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2492 2493 /* Override default credentials */ 2494 cred = kauth_cred_dup(l->l_cred); 2495 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2496 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2497 nd.ni_cnd.cn_cred = cred; 2498 2499 if ((error = namei(&nd)) != 0) { 2500 pathbuf_destroy(pb); 2501 goto out; 2502 } 2503 vp = nd.ni_vp; 2504 pathbuf_destroy(pb); 2505 2506 /* Flags == 0 means only check for existence. */ 2507 if (SCARG(uap, flags)) { 2508 flags = 0; 2509 if (SCARG(uap, flags) & R_OK) 2510 flags |= VREAD; 2511 if (SCARG(uap, flags) & W_OK) 2512 flags |= VWRITE; 2513 if (SCARG(uap, flags) & X_OK) 2514 flags |= VEXEC; 2515 2516 error = VOP_ACCESS(vp, flags, cred); 2517 if (!error && (flags & VWRITE)) 2518 error = vn_writechk(vp); 2519 } 2520 vput(vp); 2521 out: 2522 kauth_cred_free(cred); 2523 return (error); 2524 } 2525 2526 /* 2527 * Common code for all sys_stat functions, including compat versions. 2528 */ 2529 int 2530 do_sys_stat(const char *userpath, unsigned int nd_flags, struct stat *sb) 2531 { 2532 int error; 2533 struct pathbuf *pb; 2534 struct nameidata nd; 2535 2536 error = pathbuf_copyin(userpath, &pb); 2537 if (error) { 2538 return error; 2539 } 2540 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, pb); 2541 error = namei(&nd); 2542 if (error != 0) { 2543 pathbuf_destroy(pb); 2544 return error; 2545 } 2546 error = vn_stat(nd.ni_vp, sb); 2547 vput(nd.ni_vp); 2548 pathbuf_destroy(pb); 2549 return error; 2550 } 2551 2552 /* 2553 * Get file status; this version follows links. 2554 */ 2555 /* ARGSUSED */ 2556 int 2557 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 2558 { 2559 /* { 2560 syscallarg(const char *) path; 2561 syscallarg(struct stat *) ub; 2562 } */ 2563 struct stat sb; 2564 int error; 2565 2566 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2567 if (error) 2568 return error; 2569 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2570 } 2571 2572 /* 2573 * Get file status; this version does not follow links. 2574 */ 2575 /* ARGSUSED */ 2576 int 2577 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 2578 { 2579 /* { 2580 syscallarg(const char *) path; 2581 syscallarg(struct stat *) ub; 2582 } */ 2583 struct stat sb; 2584 int error; 2585 2586 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2587 if (error) 2588 return error; 2589 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2590 } 2591 2592 /* 2593 * Get configurable pathname variables. 2594 */ 2595 /* ARGSUSED */ 2596 int 2597 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2598 { 2599 /* { 2600 syscallarg(const char *) path; 2601 syscallarg(int) name; 2602 } */ 2603 int error; 2604 struct pathbuf *pb; 2605 struct nameidata nd; 2606 2607 error = pathbuf_copyin(SCARG(uap, path), &pb); 2608 if (error) { 2609 return error; 2610 } 2611 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2612 if ((error = namei(&nd)) != 0) { 2613 pathbuf_destroy(pb); 2614 return (error); 2615 } 2616 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2617 vput(nd.ni_vp); 2618 pathbuf_destroy(pb); 2619 return (error); 2620 } 2621 2622 /* 2623 * Return target name of a symbolic link. 2624 */ 2625 /* ARGSUSED */ 2626 int 2627 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2628 { 2629 /* { 2630 syscallarg(const char *) path; 2631 syscallarg(char *) buf; 2632 syscallarg(size_t) count; 2633 } */ 2634 struct vnode *vp; 2635 struct iovec aiov; 2636 struct uio auio; 2637 int error; 2638 struct pathbuf *pb; 2639 struct nameidata nd; 2640 2641 error = pathbuf_copyin(SCARG(uap, path), &pb); 2642 if (error) { 2643 return error; 2644 } 2645 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2646 if ((error = namei(&nd)) != 0) { 2647 pathbuf_destroy(pb); 2648 return error; 2649 } 2650 vp = nd.ni_vp; 2651 pathbuf_destroy(pb); 2652 if (vp->v_type != VLNK) 2653 error = EINVAL; 2654 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2655 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2656 aiov.iov_base = SCARG(uap, buf); 2657 aiov.iov_len = SCARG(uap, count); 2658 auio.uio_iov = &aiov; 2659 auio.uio_iovcnt = 1; 2660 auio.uio_offset = 0; 2661 auio.uio_rw = UIO_READ; 2662 KASSERT(l == curlwp); 2663 auio.uio_vmspace = l->l_proc->p_vmspace; 2664 auio.uio_resid = SCARG(uap, count); 2665 error = VOP_READLINK(vp, &auio, l->l_cred); 2666 } 2667 vput(vp); 2668 *retval = SCARG(uap, count) - auio.uio_resid; 2669 return (error); 2670 } 2671 2672 /* 2673 * Change flags of a file given a path name. 2674 */ 2675 /* ARGSUSED */ 2676 int 2677 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2678 { 2679 /* { 2680 syscallarg(const char *) path; 2681 syscallarg(u_long) flags; 2682 } */ 2683 struct vnode *vp; 2684 int error; 2685 2686 error = namei_simple_user(SCARG(uap, path), 2687 NSM_FOLLOW_TRYEMULROOT, &vp); 2688 if (error != 0) 2689 return (error); 2690 error = change_flags(vp, SCARG(uap, flags), l); 2691 vput(vp); 2692 return (error); 2693 } 2694 2695 /* 2696 * Change flags of a file given a file descriptor. 2697 */ 2698 /* ARGSUSED */ 2699 int 2700 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2701 { 2702 /* { 2703 syscallarg(int) fd; 2704 syscallarg(u_long) flags; 2705 } */ 2706 struct vnode *vp; 2707 file_t *fp; 2708 int error; 2709 2710 /* fd_getvnode() will use the descriptor for us */ 2711 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2712 return (error); 2713 vp = fp->f_data; 2714 error = change_flags(vp, SCARG(uap, flags), l); 2715 VOP_UNLOCK(vp); 2716 fd_putfile(SCARG(uap, fd)); 2717 return (error); 2718 } 2719 2720 /* 2721 * Change flags of a file given a path name; this version does 2722 * not follow links. 2723 */ 2724 int 2725 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2726 { 2727 /* { 2728 syscallarg(const char *) path; 2729 syscallarg(u_long) flags; 2730 } */ 2731 struct vnode *vp; 2732 int error; 2733 2734 error = namei_simple_user(SCARG(uap, path), 2735 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2736 if (error != 0) 2737 return (error); 2738 error = change_flags(vp, SCARG(uap, flags), l); 2739 vput(vp); 2740 return (error); 2741 } 2742 2743 /* 2744 * Common routine to change flags of a file. 2745 */ 2746 int 2747 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2748 { 2749 struct vattr vattr; 2750 int error; 2751 2752 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2753 /* 2754 * Non-superusers cannot change the flags on devices, even if they 2755 * own them. 2756 */ 2757 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2758 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2759 goto out; 2760 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2761 error = EINVAL; 2762 goto out; 2763 } 2764 } 2765 vattr_null(&vattr); 2766 vattr.va_flags = flags; 2767 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2768 out: 2769 return (error); 2770 } 2771 2772 /* 2773 * Change mode of a file given path name; this version follows links. 2774 */ 2775 /* ARGSUSED */ 2776 int 2777 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2778 { 2779 /* { 2780 syscallarg(const char *) path; 2781 syscallarg(int) mode; 2782 } */ 2783 int error; 2784 struct vnode *vp; 2785 2786 error = namei_simple_user(SCARG(uap, path), 2787 NSM_FOLLOW_TRYEMULROOT, &vp); 2788 if (error != 0) 2789 return (error); 2790 2791 error = change_mode(vp, SCARG(uap, mode), l); 2792 2793 vrele(vp); 2794 return (error); 2795 } 2796 2797 /* 2798 * Change mode of a file given a file descriptor. 2799 */ 2800 /* ARGSUSED */ 2801 int 2802 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2803 { 2804 /* { 2805 syscallarg(int) fd; 2806 syscallarg(int) mode; 2807 } */ 2808 file_t *fp; 2809 int error; 2810 2811 /* fd_getvnode() will use the descriptor for us */ 2812 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2813 return (error); 2814 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2815 fd_putfile(SCARG(uap, fd)); 2816 return (error); 2817 } 2818 2819 /* 2820 * Change mode of a file given path name; this version does not follow links. 2821 */ 2822 /* ARGSUSED */ 2823 int 2824 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2825 { 2826 /* { 2827 syscallarg(const char *) path; 2828 syscallarg(int) mode; 2829 } */ 2830 int error; 2831 struct vnode *vp; 2832 2833 error = namei_simple_user(SCARG(uap, path), 2834 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2835 if (error != 0) 2836 return (error); 2837 2838 error = change_mode(vp, SCARG(uap, mode), l); 2839 2840 vrele(vp); 2841 return (error); 2842 } 2843 2844 /* 2845 * Common routine to set mode given a vnode. 2846 */ 2847 static int 2848 change_mode(struct vnode *vp, int mode, struct lwp *l) 2849 { 2850 struct vattr vattr; 2851 int error; 2852 2853 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2854 vattr_null(&vattr); 2855 vattr.va_mode = mode & ALLPERMS; 2856 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2857 VOP_UNLOCK(vp); 2858 return (error); 2859 } 2860 2861 /* 2862 * Set ownership given a path name; this version follows links. 2863 */ 2864 /* ARGSUSED */ 2865 int 2866 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2867 { 2868 /* { 2869 syscallarg(const char *) path; 2870 syscallarg(uid_t) uid; 2871 syscallarg(gid_t) gid; 2872 } */ 2873 int error; 2874 struct vnode *vp; 2875 2876 error = namei_simple_user(SCARG(uap, path), 2877 NSM_FOLLOW_TRYEMULROOT, &vp); 2878 if (error != 0) 2879 return (error); 2880 2881 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2882 2883 vrele(vp); 2884 return (error); 2885 } 2886 2887 /* 2888 * Set ownership given a path name; this version follows links. 2889 * Provides POSIX semantics. 2890 */ 2891 /* ARGSUSED */ 2892 int 2893 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2894 { 2895 /* { 2896 syscallarg(const char *) path; 2897 syscallarg(uid_t) uid; 2898 syscallarg(gid_t) gid; 2899 } */ 2900 int error; 2901 struct vnode *vp; 2902 2903 error = namei_simple_user(SCARG(uap, path), 2904 NSM_FOLLOW_TRYEMULROOT, &vp); 2905 if (error != 0) 2906 return (error); 2907 2908 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2909 2910 vrele(vp); 2911 return (error); 2912 } 2913 2914 /* 2915 * Set ownership given a file descriptor. 2916 */ 2917 /* ARGSUSED */ 2918 int 2919 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2920 { 2921 /* { 2922 syscallarg(int) fd; 2923 syscallarg(uid_t) uid; 2924 syscallarg(gid_t) gid; 2925 } */ 2926 int error; 2927 file_t *fp; 2928 2929 /* fd_getvnode() will use the descriptor for us */ 2930 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2931 return (error); 2932 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2933 l, 0); 2934 fd_putfile(SCARG(uap, fd)); 2935 return (error); 2936 } 2937 2938 /* 2939 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2940 */ 2941 /* ARGSUSED */ 2942 int 2943 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2944 { 2945 /* { 2946 syscallarg(int) fd; 2947 syscallarg(uid_t) uid; 2948 syscallarg(gid_t) gid; 2949 } */ 2950 int error; 2951 file_t *fp; 2952 2953 /* fd_getvnode() will use the descriptor for us */ 2954 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2955 return (error); 2956 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2957 l, 1); 2958 fd_putfile(SCARG(uap, fd)); 2959 return (error); 2960 } 2961 2962 /* 2963 * Set ownership given a path name; this version does not follow links. 2964 */ 2965 /* ARGSUSED */ 2966 int 2967 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2968 { 2969 /* { 2970 syscallarg(const char *) path; 2971 syscallarg(uid_t) uid; 2972 syscallarg(gid_t) gid; 2973 } */ 2974 int error; 2975 struct vnode *vp; 2976 2977 error = namei_simple_user(SCARG(uap, path), 2978 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2979 if (error != 0) 2980 return (error); 2981 2982 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2983 2984 vrele(vp); 2985 return (error); 2986 } 2987 2988 /* 2989 * Set ownership given a path name; this version does not follow links. 2990 * Provides POSIX/XPG semantics. 2991 */ 2992 /* ARGSUSED */ 2993 int 2994 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2995 { 2996 /* { 2997 syscallarg(const char *) path; 2998 syscallarg(uid_t) uid; 2999 syscallarg(gid_t) gid; 3000 } */ 3001 int error; 3002 struct vnode *vp; 3003 3004 error = namei_simple_user(SCARG(uap, path), 3005 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3006 if (error != 0) 3007 return (error); 3008 3009 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3010 3011 vrele(vp); 3012 return (error); 3013 } 3014 3015 /* 3016 * Common routine to set ownership given a vnode. 3017 */ 3018 static int 3019 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3020 int posix_semantics) 3021 { 3022 struct vattr vattr; 3023 mode_t newmode; 3024 int error; 3025 3026 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3027 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3028 goto out; 3029 3030 #define CHANGED(x) ((int)(x) != -1) 3031 newmode = vattr.va_mode; 3032 if (posix_semantics) { 3033 /* 3034 * POSIX/XPG semantics: if the caller is not the super-user, 3035 * clear set-user-id and set-group-id bits. Both POSIX and 3036 * the XPG consider the behaviour for calls by the super-user 3037 * implementation-defined; we leave the set-user-id and set- 3038 * group-id settings intact in that case. 3039 */ 3040 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 3041 NULL) != 0) 3042 newmode &= ~(S_ISUID | S_ISGID); 3043 } else { 3044 /* 3045 * NetBSD semantics: when changing owner and/or group, 3046 * clear the respective bit(s). 3047 */ 3048 if (CHANGED(uid)) 3049 newmode &= ~S_ISUID; 3050 if (CHANGED(gid)) 3051 newmode &= ~S_ISGID; 3052 } 3053 /* Update va_mode iff altered. */ 3054 if (vattr.va_mode == newmode) 3055 newmode = VNOVAL; 3056 3057 vattr_null(&vattr); 3058 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3059 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3060 vattr.va_mode = newmode; 3061 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3062 #undef CHANGED 3063 3064 out: 3065 VOP_UNLOCK(vp); 3066 return (error); 3067 } 3068 3069 /* 3070 * Set the access and modification times given a path name; this 3071 * version follows links. 3072 */ 3073 /* ARGSUSED */ 3074 int 3075 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3076 register_t *retval) 3077 { 3078 /* { 3079 syscallarg(const char *) path; 3080 syscallarg(const struct timeval *) tptr; 3081 } */ 3082 3083 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3084 SCARG(uap, tptr), UIO_USERSPACE); 3085 } 3086 3087 /* 3088 * Set the access and modification times given a file descriptor. 3089 */ 3090 /* ARGSUSED */ 3091 int 3092 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3093 register_t *retval) 3094 { 3095 /* { 3096 syscallarg(int) fd; 3097 syscallarg(const struct timeval *) tptr; 3098 } */ 3099 int error; 3100 file_t *fp; 3101 3102 /* fd_getvnode() will use the descriptor for us */ 3103 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3104 return (error); 3105 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3106 UIO_USERSPACE); 3107 fd_putfile(SCARG(uap, fd)); 3108 return (error); 3109 } 3110 3111 /* 3112 * Set the access and modification times given a path name; this 3113 * version does not follow links. 3114 */ 3115 int 3116 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3117 register_t *retval) 3118 { 3119 /* { 3120 syscallarg(const char *) path; 3121 syscallarg(const struct timeval *) tptr; 3122 } */ 3123 3124 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3125 SCARG(uap, tptr), UIO_USERSPACE); 3126 } 3127 3128 /* 3129 * Common routine to set access and modification times given a vnode. 3130 */ 3131 int 3132 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3133 const struct timeval *tptr, enum uio_seg seg) 3134 { 3135 struct vattr vattr; 3136 int error, dorele = 0; 3137 namei_simple_flags_t sflags; 3138 3139 bool vanull, setbirthtime; 3140 struct timespec ts[2]; 3141 3142 /* 3143 * I have checked all callers and they pass either FOLLOW, 3144 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3145 * is 0. More to the point, they don't pass anything else. 3146 * Let's keep it that way at least until the namei interfaces 3147 * are fully sanitized. 3148 */ 3149 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3150 sflags = (flag == FOLLOW) ? 3151 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3152 3153 if (tptr == NULL) { 3154 vanull = true; 3155 nanotime(&ts[0]); 3156 ts[1] = ts[0]; 3157 } else { 3158 struct timeval tv[2]; 3159 3160 vanull = false; 3161 if (seg != UIO_SYSSPACE) { 3162 error = copyin(tptr, tv, sizeof (tv)); 3163 if (error != 0) 3164 return error; 3165 tptr = tv; 3166 } 3167 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3168 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3169 } 3170 3171 if (vp == NULL) { 3172 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3173 error = namei_simple_user(path, sflags, &vp); 3174 if (error != 0) 3175 return error; 3176 dorele = 1; 3177 } 3178 3179 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3180 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3181 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3182 vattr_null(&vattr); 3183 vattr.va_atime = ts[0]; 3184 vattr.va_mtime = ts[1]; 3185 if (setbirthtime) 3186 vattr.va_birthtime = ts[1]; 3187 if (vanull) 3188 vattr.va_vaflags |= VA_UTIMES_NULL; 3189 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3190 VOP_UNLOCK(vp); 3191 3192 if (dorele != 0) 3193 vrele(vp); 3194 3195 return error; 3196 } 3197 3198 /* 3199 * Truncate a file given its path name. 3200 */ 3201 /* ARGSUSED */ 3202 int 3203 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3204 { 3205 /* { 3206 syscallarg(const char *) path; 3207 syscallarg(int) pad; 3208 syscallarg(off_t) length; 3209 } */ 3210 struct vnode *vp; 3211 struct vattr vattr; 3212 int error; 3213 3214 error = namei_simple_user(SCARG(uap, path), 3215 NSM_FOLLOW_TRYEMULROOT, &vp); 3216 if (error != 0) 3217 return (error); 3218 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3219 if (vp->v_type == VDIR) 3220 error = EISDIR; 3221 else if ((error = vn_writechk(vp)) == 0 && 3222 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3223 vattr_null(&vattr); 3224 vattr.va_size = SCARG(uap, length); 3225 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3226 } 3227 vput(vp); 3228 return (error); 3229 } 3230 3231 /* 3232 * Truncate a file given a file descriptor. 3233 */ 3234 /* ARGSUSED */ 3235 int 3236 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3237 { 3238 /* { 3239 syscallarg(int) fd; 3240 syscallarg(int) pad; 3241 syscallarg(off_t) length; 3242 } */ 3243 struct vattr vattr; 3244 struct vnode *vp; 3245 file_t *fp; 3246 int error; 3247 3248 /* fd_getvnode() will use the descriptor for us */ 3249 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3250 return (error); 3251 if ((fp->f_flag & FWRITE) == 0) { 3252 error = EINVAL; 3253 goto out; 3254 } 3255 vp = fp->f_data; 3256 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3257 if (vp->v_type == VDIR) 3258 error = EISDIR; 3259 else if ((error = vn_writechk(vp)) == 0) { 3260 vattr_null(&vattr); 3261 vattr.va_size = SCARG(uap, length); 3262 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3263 } 3264 VOP_UNLOCK(vp); 3265 out: 3266 fd_putfile(SCARG(uap, fd)); 3267 return (error); 3268 } 3269 3270 /* 3271 * Sync an open file. 3272 */ 3273 /* ARGSUSED */ 3274 int 3275 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3276 { 3277 /* { 3278 syscallarg(int) fd; 3279 } */ 3280 struct vnode *vp; 3281 file_t *fp; 3282 int error; 3283 3284 /* fd_getvnode() will use the descriptor for us */ 3285 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3286 return (error); 3287 vp = fp->f_data; 3288 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3289 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3290 VOP_UNLOCK(vp); 3291 fd_putfile(SCARG(uap, fd)); 3292 return (error); 3293 } 3294 3295 /* 3296 * Sync a range of file data. API modeled after that found in AIX. 3297 * 3298 * FDATASYNC indicates that we need only save enough metadata to be able 3299 * to re-read the written data. Note we duplicate AIX's requirement that 3300 * the file be open for writing. 3301 */ 3302 /* ARGSUSED */ 3303 int 3304 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3305 { 3306 /* { 3307 syscallarg(int) fd; 3308 syscallarg(int) flags; 3309 syscallarg(off_t) start; 3310 syscallarg(off_t) length; 3311 } */ 3312 struct vnode *vp; 3313 file_t *fp; 3314 int flags, nflags; 3315 off_t s, e, len; 3316 int error; 3317 3318 /* fd_getvnode() will use the descriptor for us */ 3319 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3320 return (error); 3321 3322 if ((fp->f_flag & FWRITE) == 0) { 3323 error = EBADF; 3324 goto out; 3325 } 3326 3327 flags = SCARG(uap, flags); 3328 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3329 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3330 error = EINVAL; 3331 goto out; 3332 } 3333 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3334 if (flags & FDATASYNC) 3335 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3336 else 3337 nflags = FSYNC_WAIT; 3338 if (flags & FDISKSYNC) 3339 nflags |= FSYNC_CACHE; 3340 3341 len = SCARG(uap, length); 3342 /* If length == 0, we do the whole file, and s = l = 0 will do that */ 3343 if (len) { 3344 s = SCARG(uap, start); 3345 e = s + len; 3346 if (e < s) { 3347 error = EINVAL; 3348 goto out; 3349 } 3350 } else { 3351 e = 0; 3352 s = 0; 3353 } 3354 3355 vp = fp->f_data; 3356 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3357 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3358 VOP_UNLOCK(vp); 3359 out: 3360 fd_putfile(SCARG(uap, fd)); 3361 return (error); 3362 } 3363 3364 /* 3365 * Sync the data of an open file. 3366 */ 3367 /* ARGSUSED */ 3368 int 3369 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3370 { 3371 /* { 3372 syscallarg(int) fd; 3373 } */ 3374 struct vnode *vp; 3375 file_t *fp; 3376 int error; 3377 3378 /* fd_getvnode() will use the descriptor for us */ 3379 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3380 return (error); 3381 if ((fp->f_flag & FWRITE) == 0) { 3382 fd_putfile(SCARG(uap, fd)); 3383 return (EBADF); 3384 } 3385 vp = fp->f_data; 3386 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3387 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3388 VOP_UNLOCK(vp); 3389 fd_putfile(SCARG(uap, fd)); 3390 return (error); 3391 } 3392 3393 /* 3394 * Rename files, (standard) BSD semantics frontend. 3395 */ 3396 /* ARGSUSED */ 3397 int 3398 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3399 { 3400 /* { 3401 syscallarg(const char *) from; 3402 syscallarg(const char *) to; 3403 } */ 3404 3405 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3406 } 3407 3408 /* 3409 * Rename files, POSIX semantics frontend. 3410 */ 3411 /* ARGSUSED */ 3412 int 3413 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3414 { 3415 /* { 3416 syscallarg(const char *) from; 3417 syscallarg(const char *) to; 3418 } */ 3419 3420 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3421 } 3422 3423 /* 3424 * Rename files. Source and destination must either both be directories, 3425 * or both not be directories. If target is a directory, it must be empty. 3426 * If `from' and `to' refer to the same object, the value of the `retain' 3427 * argument is used to determine whether `from' will be 3428 * 3429 * (retain == 0) deleted unless `from' and `to' refer to the same 3430 * object in the file system's name space (BSD). 3431 * (retain == 1) always retained (POSIX). 3432 */ 3433 int 3434 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3435 { 3436 struct vnode *tvp, *fvp, *tdvp; 3437 struct pathbuf *frompb, *topb; 3438 struct nameidata fromnd, tond; 3439 struct mount *fs; 3440 struct lwp *l = curlwp; 3441 struct proc *p; 3442 int error; 3443 3444 error = pathbuf_maybe_copyin(from, seg, &frompb); 3445 if (error) { 3446 return error; 3447 } 3448 error = pathbuf_maybe_copyin(to, seg, &topb); 3449 if (error) { 3450 pathbuf_destroy(frompb); 3451 return error; 3452 } 3453 3454 NDINIT(&fromnd, DELETE, LOCKPARENT | TRYEMULROOT | INRENAME, 3455 frompb); 3456 if ((error = namei(&fromnd)) != 0) { 3457 pathbuf_destroy(frompb); 3458 pathbuf_destroy(topb); 3459 return (error); 3460 } 3461 if (fromnd.ni_dvp != fromnd.ni_vp) 3462 VOP_UNLOCK(fromnd.ni_dvp); 3463 fvp = fromnd.ni_vp; 3464 3465 fs = fvp->v_mount; 3466 error = VFS_RENAMELOCK_ENTER(fs); 3467 if (error) { 3468 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3469 vrele(fromnd.ni_dvp); 3470 vrele(fvp); 3471 goto out1; 3472 } 3473 3474 /* 3475 * close, partially, yet another race - ideally we should only 3476 * go as far as getting fromnd.ni_dvp before getting the per-fs 3477 * lock, and then continue to get fromnd.ni_vp, but we can't do 3478 * that with namei as it stands. 3479 * 3480 * This still won't prevent rmdir from nuking fromnd.ni_vp 3481 * under us. The real fix is to get the locks in the right 3482 * order and do the lookups in the right places, but that's a 3483 * major rototill. 3484 * 3485 * Note: this logic (as well as this whole function) is cloned 3486 * in nfs_serv.c. Proceed accordingly. 3487 */ 3488 vrele(fvp); 3489 if ((fromnd.ni_cnd.cn_namelen == 1 && 3490 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3491 (fromnd.ni_cnd.cn_namelen == 2 && 3492 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3493 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3494 error = EINVAL; 3495 VFS_RENAMELOCK_EXIT(fs); 3496 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3497 vrele(fromnd.ni_dvp); 3498 goto out1; 3499 } 3500 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3501 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd, 0); 3502 if (error) { 3503 VOP_UNLOCK(fromnd.ni_dvp); 3504 VFS_RENAMELOCK_EXIT(fs); 3505 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3506 vrele(fromnd.ni_dvp); 3507 goto out1; 3508 } 3509 VOP_UNLOCK(fromnd.ni_vp); 3510 if (fromnd.ni_dvp != fromnd.ni_vp) 3511 VOP_UNLOCK(fromnd.ni_dvp); 3512 fvp = fromnd.ni_vp; 3513 3514 NDINIT(&tond, RENAME, 3515 LOCKPARENT | LOCKLEAF | NOCACHE | TRYEMULROOT 3516 | INRENAME | (fvp->v_type == VDIR ? CREATEDIR : 0), 3517 topb); 3518 if ((error = namei(&tond)) != 0) { 3519 VFS_RENAMELOCK_EXIT(fs); 3520 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3521 vrele(fromnd.ni_dvp); 3522 vrele(fvp); 3523 goto out1; 3524 } 3525 tdvp = tond.ni_dvp; 3526 tvp = tond.ni_vp; 3527 3528 if (tvp != NULL) { 3529 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3530 error = ENOTDIR; 3531 goto out; 3532 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3533 error = EISDIR; 3534 goto out; 3535 } 3536 } 3537 3538 if (fvp == tdvp) 3539 error = EINVAL; 3540 3541 /* 3542 * Source and destination refer to the same object. 3543 */ 3544 if (fvp == tvp) { 3545 if (retain) 3546 error = -1; 3547 else if (fromnd.ni_dvp == tdvp && 3548 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3549 !memcmp(fromnd.ni_cnd.cn_nameptr, 3550 tond.ni_cnd.cn_nameptr, 3551 fromnd.ni_cnd.cn_namelen)) 3552 error = -1; 3553 } 3554 3555 #if NVERIEXEC > 0 3556 if (!error) { 3557 char *f1, *f2; 3558 size_t f1_len; 3559 size_t f2_len; 3560 3561 f1_len = fromnd.ni_cnd.cn_namelen + 1; 3562 f1 = kmem_alloc(f1_len, KM_SLEEP); 3563 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len); 3564 3565 f2_len = tond.ni_cnd.cn_namelen + 1; 3566 f2 = kmem_alloc(f2_len, KM_SLEEP); 3567 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len); 3568 3569 error = veriexec_renamechk(l, fvp, f1, tvp, f2); 3570 3571 kmem_free(f1, f1_len); 3572 kmem_free(f2, f2_len); 3573 } 3574 #endif /* NVERIEXEC > 0 */ 3575 3576 out: 3577 p = l->l_proc; 3578 if (!error) { 3579 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3580 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3581 VFS_RENAMELOCK_EXIT(fs); 3582 } else { 3583 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3584 if (tdvp == tvp) 3585 vrele(tdvp); 3586 else 3587 vput(tdvp); 3588 if (tvp) 3589 vput(tvp); 3590 VFS_RENAMELOCK_EXIT(fs); 3591 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3592 vrele(fromnd.ni_dvp); 3593 vrele(fvp); 3594 } 3595 out1: 3596 pathbuf_destroy(frompb); 3597 pathbuf_destroy(topb); 3598 return (error == -1 ? 0 : error); 3599 } 3600 3601 /* 3602 * Make a directory file. 3603 */ 3604 /* ARGSUSED */ 3605 int 3606 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3607 { 3608 /* { 3609 syscallarg(const char *) path; 3610 syscallarg(int) mode; 3611 } */ 3612 3613 return do_sys_mkdir(SCARG(uap, path), SCARG(uap, mode), UIO_USERSPACE); 3614 } 3615 3616 int 3617 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 3618 { 3619 struct proc *p = curlwp->l_proc; 3620 struct vnode *vp; 3621 struct vattr vattr; 3622 int error; 3623 struct pathbuf *pb; 3624 struct nameidata nd; 3625 3626 /* XXX bollocks, should pass in a pathbuf */ 3627 error = pathbuf_maybe_copyin(path, seg, &pb); 3628 if (error) { 3629 return error; 3630 } 3631 3632 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 3633 if ((error = namei(&nd)) != 0) { 3634 pathbuf_destroy(pb); 3635 return (error); 3636 } 3637 vp = nd.ni_vp; 3638 if (vp != NULL) { 3639 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3640 if (nd.ni_dvp == vp) 3641 vrele(nd.ni_dvp); 3642 else 3643 vput(nd.ni_dvp); 3644 vrele(vp); 3645 pathbuf_destroy(pb); 3646 return (EEXIST); 3647 } 3648 vattr_null(&vattr); 3649 vattr.va_type = VDIR; 3650 /* We will read cwdi->cwdi_cmask unlocked. */ 3651 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3652 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3653 if (!error) 3654 vput(nd.ni_vp); 3655 pathbuf_destroy(pb); 3656 return (error); 3657 } 3658 3659 /* 3660 * Remove a directory file. 3661 */ 3662 /* ARGSUSED */ 3663 int 3664 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3665 { 3666 /* { 3667 syscallarg(const char *) path; 3668 } */ 3669 struct vnode *vp; 3670 int error; 3671 struct pathbuf *pb; 3672 struct nameidata nd; 3673 3674 error = pathbuf_copyin(SCARG(uap, path), &pb); 3675 if (error) { 3676 return error; 3677 } 3678 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 3679 if ((error = namei(&nd)) != 0) { 3680 pathbuf_destroy(pb); 3681 return error; 3682 } 3683 vp = nd.ni_vp; 3684 if (vp->v_type != VDIR) { 3685 error = ENOTDIR; 3686 goto out; 3687 } 3688 /* 3689 * No rmdir "." please. 3690 */ 3691 if (nd.ni_dvp == vp) { 3692 error = EINVAL; 3693 goto out; 3694 } 3695 /* 3696 * The root of a mounted filesystem cannot be deleted. 3697 */ 3698 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3699 error = EBUSY; 3700 goto out; 3701 } 3702 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3703 pathbuf_destroy(pb); 3704 return (error); 3705 3706 out: 3707 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3708 if (nd.ni_dvp == vp) 3709 vrele(nd.ni_dvp); 3710 else 3711 vput(nd.ni_dvp); 3712 vput(vp); 3713 pathbuf_destroy(pb); 3714 return (error); 3715 } 3716 3717 /* 3718 * Read a block of directory entries in a file system independent format. 3719 */ 3720 int 3721 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3722 { 3723 /* { 3724 syscallarg(int) fd; 3725 syscallarg(char *) buf; 3726 syscallarg(size_t) count; 3727 } */ 3728 file_t *fp; 3729 int error, done; 3730 3731 /* fd_getvnode() will use the descriptor for us */ 3732 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3733 return (error); 3734 if ((fp->f_flag & FREAD) == 0) { 3735 error = EBADF; 3736 goto out; 3737 } 3738 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3739 SCARG(uap, count), &done, l, 0, 0); 3740 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3741 *retval = done; 3742 out: 3743 fd_putfile(SCARG(uap, fd)); 3744 return (error); 3745 } 3746 3747 /* 3748 * Set the mode mask for creation of filesystem nodes. 3749 */ 3750 int 3751 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3752 { 3753 /* { 3754 syscallarg(mode_t) newmask; 3755 } */ 3756 struct proc *p = l->l_proc; 3757 struct cwdinfo *cwdi; 3758 3759 /* 3760 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3761 * important is that we serialize changes to the mask. The 3762 * rw_exit() will issue a write memory barrier on our behalf, 3763 * and force the changes out to other CPUs (as it must use an 3764 * atomic operation, draining the local CPU's store buffers). 3765 */ 3766 cwdi = p->p_cwdi; 3767 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3768 *retval = cwdi->cwdi_cmask; 3769 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3770 rw_exit(&cwdi->cwdi_lock); 3771 3772 return (0); 3773 } 3774 3775 int 3776 dorevoke(struct vnode *vp, kauth_cred_t cred) 3777 { 3778 struct vattr vattr; 3779 int error; 3780 3781 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0) 3782 return error; 3783 if (kauth_cred_geteuid(cred) == vattr.va_uid || 3784 (error = kauth_authorize_generic(cred, 3785 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3786 VOP_REVOKE(vp, REVOKEALL); 3787 return (error); 3788 } 3789 3790 /* 3791 * Void all references to file by ripping underlying filesystem 3792 * away from vnode. 3793 */ 3794 /* ARGSUSED */ 3795 int 3796 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3797 { 3798 /* { 3799 syscallarg(const char *) path; 3800 } */ 3801 struct vnode *vp; 3802 int error; 3803 3804 error = namei_simple_user(SCARG(uap, path), 3805 NSM_FOLLOW_TRYEMULROOT, &vp); 3806 if (error != 0) 3807 return (error); 3808 error = dorevoke(vp, l->l_cred); 3809 vrele(vp); 3810 return (error); 3811 } 3812