1 /* $NetBSD: vfs_syscalls.c,v 1.487 2014/06/30 17:51:31 maxv Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.487 2014/06/30 17:51:31 maxv Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/proc.h> 91 #include <sys/uio.h> 92 #include <sys/kmem.h> 93 #include <sys/dirent.h> 94 #include <sys/sysctl.h> 95 #include <sys/syscallargs.h> 96 #include <sys/vfs_syscalls.h> 97 #include <sys/quota.h> 98 #include <sys/quotactl.h> 99 #include <sys/ktrace.h> 100 #ifdef FILEASSOC 101 #include <sys/fileassoc.h> 102 #endif /* FILEASSOC */ 103 #include <sys/extattr.h> 104 #include <sys/verified_exec.h> 105 #include <sys/kauth.h> 106 #include <sys/atomic.h> 107 #include <sys/module.h> 108 #include <sys/buf.h> 109 110 #include <miscfs/genfs/genfs.h> 111 #include <miscfs/syncfs/syncfs.h> 112 #include <miscfs/specfs/specdev.h> 113 114 #include <nfs/rpcv2.h> 115 #include <nfs/nfsproto.h> 116 #include <nfs/nfs.h> 117 #include <nfs/nfs_var.h> 118 119 static int change_flags(struct vnode *, u_long, struct lwp *); 120 static int change_mode(struct vnode *, int, struct lwp *); 121 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 122 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 123 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 124 enum uio_seg); 125 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 126 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 127 enum uio_seg); 128 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 129 enum uio_seg, int); 130 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 131 size_t, register_t *); 132 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 133 134 static int fd_nameiat(struct lwp *, int, struct nameidata *); 135 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 136 namei_simple_flags_t, struct vnode **); 137 138 139 /* 140 * This table is used to maintain compatibility with 4.3BSD 141 * and NetBSD 0.9 mount syscalls - and possibly other systems. 142 * Note, the order is important! 143 * 144 * Do not modify this table. It should only contain filesystems 145 * supported by NetBSD 0.9 and 4.3BSD. 146 */ 147 const char * const mountcompatnames[] = { 148 NULL, /* 0 = MOUNT_NONE */ 149 MOUNT_FFS, /* 1 = MOUNT_UFS */ 150 MOUNT_NFS, /* 2 */ 151 MOUNT_MFS, /* 3 */ 152 MOUNT_MSDOS, /* 4 */ 153 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 154 MOUNT_FDESC, /* 6 */ 155 MOUNT_KERNFS, /* 7 */ 156 NULL, /* 8 = MOUNT_DEVFS */ 157 MOUNT_AFS, /* 9 */ 158 }; 159 160 const int nmountcompatnames = __arraycount(mountcompatnames); 161 162 static int 163 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 164 { 165 file_t *dfp; 166 int error; 167 168 if (fdat != AT_FDCWD) { 169 if ((error = fd_getvnode(fdat, &dfp)) != 0) 170 goto out; 171 172 NDAT(ndp, dfp->f_data); 173 } 174 175 error = namei(ndp); 176 177 if (fdat != AT_FDCWD) 178 fd_putfile(fdat); 179 out: 180 return error; 181 } 182 183 static int 184 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 185 namei_simple_flags_t sflags, struct vnode **vp_ret) 186 { 187 file_t *dfp; 188 struct vnode *dvp; 189 int error; 190 191 if (fdat != AT_FDCWD) { 192 if ((error = fd_getvnode(fdat, &dfp)) != 0) 193 goto out; 194 195 dvp = dfp->f_data; 196 } else { 197 dvp = NULL; 198 } 199 200 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 201 202 if (fdat != AT_FDCWD) 203 fd_putfile(fdat); 204 out: 205 return error; 206 } 207 208 static int 209 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 210 { 211 int error; 212 213 fp->f_flag = flags & FMASK; 214 fp->f_type = DTYPE_VNODE; 215 fp->f_ops = &vnops; 216 fp->f_data = vp; 217 218 if (flags & (O_EXLOCK | O_SHLOCK)) { 219 struct flock lf; 220 int type; 221 222 lf.l_whence = SEEK_SET; 223 lf.l_start = 0; 224 lf.l_len = 0; 225 if (flags & O_EXLOCK) 226 lf.l_type = F_WRLCK; 227 else 228 lf.l_type = F_RDLCK; 229 type = F_FLOCK; 230 if ((flags & FNONBLOCK) == 0) 231 type |= F_WAIT; 232 VOP_UNLOCK(vp); 233 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 234 if (error) { 235 (void) vn_close(vp, fp->f_flag, fp->f_cred); 236 fd_abort(l->l_proc, fp, indx); 237 return error; 238 } 239 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 240 atomic_or_uint(&fp->f_flag, FHASLOCK); 241 } 242 if (flags & O_CLOEXEC) 243 fd_set_exclose(l, indx, true); 244 return 0; 245 } 246 247 static int 248 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 249 void *data, size_t *data_len) 250 { 251 struct mount *mp; 252 int error = 0, saved_flags; 253 254 mp = vp->v_mount; 255 saved_flags = mp->mnt_flag; 256 257 /* We can operate only on VV_ROOT nodes. */ 258 if ((vp->v_vflag & VV_ROOT) == 0) { 259 error = EINVAL; 260 goto out; 261 } 262 263 /* 264 * We only allow the filesystem to be reloaded if it 265 * is currently mounted read-only. Additionally, we 266 * prevent read-write to read-only downgrades. 267 */ 268 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 269 (mp->mnt_flag & MNT_RDONLY) == 0 && 270 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 271 error = EOPNOTSUPP; /* Needs translation */ 272 goto out; 273 } 274 275 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 276 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 277 if (error) 278 goto out; 279 280 if (vfs_busy(mp, NULL)) { 281 error = EPERM; 282 goto out; 283 } 284 285 mutex_enter(&mp->mnt_updating); 286 287 mp->mnt_flag &= ~MNT_OP_FLAGS; 288 mp->mnt_flag |= flags & MNT_OP_FLAGS; 289 290 /* 291 * Set the mount level flags. 292 */ 293 if (flags & MNT_RDONLY) 294 mp->mnt_flag |= MNT_RDONLY; 295 else if (mp->mnt_flag & MNT_RDONLY) 296 mp->mnt_iflag |= IMNT_WANTRDWR; 297 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 298 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 299 error = VFS_MOUNT(mp, path, data, data_len); 300 301 if (error && data != NULL) { 302 int error2; 303 304 /* 305 * Update failed; let's try and see if it was an 306 * export request. For compat with 3.0 and earlier. 307 */ 308 error2 = vfs_hooks_reexport(mp, path, data); 309 310 /* 311 * Only update error code if the export request was 312 * understood but some problem occurred while 313 * processing it. 314 */ 315 if (error2 != EJUSTRETURN) 316 error = error2; 317 } 318 319 if (mp->mnt_iflag & IMNT_WANTRDWR) 320 mp->mnt_flag &= ~MNT_RDONLY; 321 if (error) 322 mp->mnt_flag = saved_flags; 323 mp->mnt_flag &= ~MNT_OP_FLAGS; 324 mp->mnt_iflag &= ~IMNT_WANTRDWR; 325 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 326 if (mp->mnt_syncer == NULL) 327 error = vfs_allocate_syncvnode(mp); 328 } else { 329 if (mp->mnt_syncer != NULL) 330 vfs_deallocate_syncvnode(mp); 331 } 332 mutex_exit(&mp->mnt_updating); 333 vfs_unbusy(mp, false, NULL); 334 335 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 336 (flags & MNT_EXTATTR)) { 337 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 338 NULL, 0, NULL) != 0) { 339 printf("%s: failed to start extattr, error = %d", 340 mp->mnt_stat.f_mntonname, error); 341 mp->mnt_flag &= ~MNT_EXTATTR; 342 } 343 } 344 345 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 346 !(flags & MNT_EXTATTR)) { 347 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 348 NULL, 0, NULL) != 0) { 349 printf("%s: failed to stop extattr, error = %d", 350 mp->mnt_stat.f_mntonname, error); 351 mp->mnt_flag |= MNT_RDONLY; 352 } 353 } 354 out: 355 return (error); 356 } 357 358 static int 359 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 360 { 361 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 362 int error; 363 364 /* Copy file-system type from userspace. */ 365 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 366 if (error) { 367 /* 368 * Historically, filesystem types were identified by numbers. 369 * If we get an integer for the filesystem type instead of a 370 * string, we check to see if it matches one of the historic 371 * filesystem types. 372 */ 373 u_long fsindex = (u_long)fstype; 374 if (fsindex >= nmountcompatnames || 375 mountcompatnames[fsindex] == NULL) 376 return ENODEV; 377 strlcpy(fstypename, mountcompatnames[fsindex], 378 sizeof(fstypename)); 379 } 380 381 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 382 if (strcmp(fstypename, "ufs") == 0) 383 fstypename[0] = 'f'; 384 385 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 386 return 0; 387 388 /* If we can autoload a vfs module, try again */ 389 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 390 391 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 392 return 0; 393 394 return ENODEV; 395 } 396 397 static int 398 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 399 void *data, size_t *data_len) 400 { 401 struct mount *mp; 402 int error; 403 404 /* If MNT_GETARGS is specified, it should be the only flag. */ 405 if (flags & ~MNT_GETARGS) 406 return EINVAL; 407 408 mp = vp->v_mount; 409 410 /* XXX: probably some notion of "can see" here if we want isolation. */ 411 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 412 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 413 if (error) 414 return error; 415 416 if ((vp->v_vflag & VV_ROOT) == 0) 417 return EINVAL; 418 419 if (vfs_busy(mp, NULL)) 420 return EPERM; 421 422 mutex_enter(&mp->mnt_updating); 423 mp->mnt_flag &= ~MNT_OP_FLAGS; 424 mp->mnt_flag |= MNT_GETARGS; 425 error = VFS_MOUNT(mp, path, data, data_len); 426 mp->mnt_flag &= ~MNT_OP_FLAGS; 427 mutex_exit(&mp->mnt_updating); 428 429 vfs_unbusy(mp, false, NULL); 430 return (error); 431 } 432 433 int 434 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 435 { 436 /* { 437 syscallarg(const char *) type; 438 syscallarg(const char *) path; 439 syscallarg(int) flags; 440 syscallarg(void *) data; 441 syscallarg(size_t) data_len; 442 } */ 443 444 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 445 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 446 SCARG(uap, data_len), retval); 447 } 448 449 int 450 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 451 const char *path, int flags, void *data, enum uio_seg data_seg, 452 size_t data_len, register_t *retval) 453 { 454 struct vnode *vp; 455 void *data_buf = data; 456 bool vfsopsrele = false; 457 size_t alloc_sz = 0; 458 int error; 459 460 /* XXX: The calling convention of this routine is totally bizarre */ 461 if (vfsops) 462 vfsopsrele = true; 463 464 /* 465 * Get vnode to be covered 466 */ 467 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 468 if (error != 0) { 469 vp = NULL; 470 goto done; 471 } 472 473 if (vfsops == NULL) { 474 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 475 vfsops = vp->v_mount->mnt_op; 476 } else { 477 /* 'type' is userspace */ 478 error = mount_get_vfsops(type, &vfsops); 479 if (error != 0) 480 goto done; 481 vfsopsrele = true; 482 } 483 } 484 485 /* 486 * We allow data to be NULL, even for userspace. Some fs's don't need 487 * it. The others will handle NULL. 488 */ 489 if (data != NULL && data_seg == UIO_USERSPACE) { 490 if (data_len == 0) { 491 /* No length supplied, use default for filesystem */ 492 data_len = vfsops->vfs_min_mount_data; 493 494 /* 495 * Hopefully a longer buffer won't make copyin() fail. 496 * For compatibility with 3.0 and earlier. 497 */ 498 if (flags & MNT_UPDATE 499 && data_len < sizeof (struct mnt_export_args30)) 500 data_len = sizeof (struct mnt_export_args30); 501 } 502 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 503 error = EINVAL; 504 goto done; 505 } 506 alloc_sz = data_len; 507 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 508 509 /* NFS needs the buffer even for mnt_getargs .... */ 510 error = copyin(data, data_buf, data_len); 511 if (error != 0) 512 goto done; 513 } 514 515 if (flags & MNT_GETARGS) { 516 if (data_len == 0) { 517 error = EINVAL; 518 goto done; 519 } 520 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 521 if (error != 0) 522 goto done; 523 if (data_seg == UIO_USERSPACE) 524 error = copyout(data_buf, data, data_len); 525 *retval = data_len; 526 } else if (flags & MNT_UPDATE) { 527 error = mount_update(l, vp, path, flags, data_buf, &data_len); 528 } else { 529 /* Locking is handled internally in mount_domount(). */ 530 KASSERT(vfsopsrele == true); 531 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 532 &data_len); 533 vfsopsrele = false; 534 } 535 536 done: 537 if (vfsopsrele) 538 vfs_delref(vfsops); 539 if (vp != NULL) { 540 vrele(vp); 541 } 542 if (data_buf != data) 543 kmem_free(data_buf, alloc_sz); 544 return (error); 545 } 546 547 /* 548 * Unmount a file system. 549 * 550 * Note: unmount takes a path to the vnode mounted on as argument, 551 * not special file (as before). 552 */ 553 /* ARGSUSED */ 554 int 555 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 556 { 557 /* { 558 syscallarg(const char *) path; 559 syscallarg(int) flags; 560 } */ 561 struct vnode *vp; 562 struct mount *mp; 563 int error; 564 struct pathbuf *pb; 565 struct nameidata nd; 566 567 error = pathbuf_copyin(SCARG(uap, path), &pb); 568 if (error) { 569 return error; 570 } 571 572 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 573 if ((error = namei(&nd)) != 0) { 574 pathbuf_destroy(pb); 575 return error; 576 } 577 vp = nd.ni_vp; 578 pathbuf_destroy(pb); 579 580 mp = vp->v_mount; 581 atomic_inc_uint(&mp->mnt_refcnt); 582 VOP_UNLOCK(vp); 583 584 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 585 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 586 if (error) { 587 vrele(vp); 588 vfs_destroy(mp); 589 return (error); 590 } 591 592 /* 593 * Don't allow unmounting the root file system. 594 */ 595 if (mp->mnt_flag & MNT_ROOTFS) { 596 vrele(vp); 597 vfs_destroy(mp); 598 return (EINVAL); 599 } 600 601 /* 602 * Must be the root of the filesystem 603 */ 604 if ((vp->v_vflag & VV_ROOT) == 0) { 605 vrele(vp); 606 vfs_destroy(mp); 607 return (EINVAL); 608 } 609 610 vrele(vp); 611 error = dounmount(mp, SCARG(uap, flags), l); 612 vfs_destroy(mp); 613 return error; 614 } 615 616 /* 617 * Sync each mounted filesystem. 618 */ 619 #ifdef DEBUG 620 int syncprt = 0; 621 struct ctldebug debug0 = { "syncprt", &syncprt }; 622 #endif 623 624 void 625 do_sys_sync(struct lwp *l) 626 { 627 struct mount *mp, *nmp; 628 int asyncflag; 629 630 mutex_enter(&mountlist_lock); 631 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 632 if (vfs_busy(mp, &nmp)) { 633 continue; 634 } 635 mutex_enter(&mp->mnt_updating); 636 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 637 asyncflag = mp->mnt_flag & MNT_ASYNC; 638 mp->mnt_flag &= ~MNT_ASYNC; 639 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 640 if (asyncflag) 641 mp->mnt_flag |= MNT_ASYNC; 642 } 643 mutex_exit(&mp->mnt_updating); 644 vfs_unbusy(mp, false, &nmp); 645 } 646 mutex_exit(&mountlist_lock); 647 #ifdef DEBUG 648 if (syncprt) 649 vfs_bufstats(); 650 #endif /* DEBUG */ 651 } 652 653 /* ARGSUSED */ 654 int 655 sys_sync(struct lwp *l, const void *v, register_t *retval) 656 { 657 do_sys_sync(l); 658 return (0); 659 } 660 661 662 /* 663 * Access or change filesystem quotas. 664 * 665 * (this is really 14 different calls bundled into one) 666 */ 667 668 static int 669 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 670 { 671 struct quotastat info_k; 672 int error; 673 674 /* ensure any padding bytes are cleared */ 675 memset(&info_k, 0, sizeof(info_k)); 676 677 error = vfs_quotactl_stat(mp, &info_k); 678 if (error) { 679 return error; 680 } 681 682 return copyout(&info_k, info_u, sizeof(info_k)); 683 } 684 685 static int 686 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 687 struct quotaidtypestat *info_u) 688 { 689 struct quotaidtypestat info_k; 690 int error; 691 692 /* ensure any padding bytes are cleared */ 693 memset(&info_k, 0, sizeof(info_k)); 694 695 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 696 if (error) { 697 return error; 698 } 699 700 return copyout(&info_k, info_u, sizeof(info_k)); 701 } 702 703 static int 704 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 705 struct quotaobjtypestat *info_u) 706 { 707 struct quotaobjtypestat info_k; 708 int error; 709 710 /* ensure any padding bytes are cleared */ 711 memset(&info_k, 0, sizeof(info_k)); 712 713 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 714 if (error) { 715 return error; 716 } 717 718 return copyout(&info_k, info_u, sizeof(info_k)); 719 } 720 721 static int 722 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 723 struct quotaval *val_u) 724 { 725 struct quotakey key_k; 726 struct quotaval val_k; 727 int error; 728 729 /* ensure any padding bytes are cleared */ 730 memset(&val_k, 0, sizeof(val_k)); 731 732 error = copyin(key_u, &key_k, sizeof(key_k)); 733 if (error) { 734 return error; 735 } 736 737 error = vfs_quotactl_get(mp, &key_k, &val_k); 738 if (error) { 739 return error; 740 } 741 742 return copyout(&val_k, val_u, sizeof(val_k)); 743 } 744 745 static int 746 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 747 const struct quotaval *val_u) 748 { 749 struct quotakey key_k; 750 struct quotaval val_k; 751 int error; 752 753 error = copyin(key_u, &key_k, sizeof(key_k)); 754 if (error) { 755 return error; 756 } 757 758 error = copyin(val_u, &val_k, sizeof(val_k)); 759 if (error) { 760 return error; 761 } 762 763 return vfs_quotactl_put(mp, &key_k, &val_k); 764 } 765 766 static int 767 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 768 { 769 struct quotakey key_k; 770 int error; 771 772 error = copyin(key_u, &key_k, sizeof(key_k)); 773 if (error) { 774 return error; 775 } 776 777 return vfs_quotactl_del(mp, &key_k); 778 } 779 780 static int 781 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 782 { 783 struct quotakcursor cursor_k; 784 int error; 785 786 /* ensure any padding bytes are cleared */ 787 memset(&cursor_k, 0, sizeof(cursor_k)); 788 789 error = vfs_quotactl_cursoropen(mp, &cursor_k); 790 if (error) { 791 return error; 792 } 793 794 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 795 } 796 797 static int 798 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 799 { 800 struct quotakcursor cursor_k; 801 int error; 802 803 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 804 if (error) { 805 return error; 806 } 807 808 return vfs_quotactl_cursorclose(mp, &cursor_k); 809 } 810 811 static int 812 do_sys_quotactl_cursorskipidtype(struct mount *mp, 813 struct quotakcursor *cursor_u, int idtype) 814 { 815 struct quotakcursor cursor_k; 816 int error; 817 818 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 819 if (error) { 820 return error; 821 } 822 823 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 824 if (error) { 825 return error; 826 } 827 828 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 829 } 830 831 static int 832 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 833 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 834 unsigned *ret_u) 835 { 836 #define CGET_STACK_MAX 8 837 struct quotakcursor cursor_k; 838 struct quotakey stackkeys[CGET_STACK_MAX]; 839 struct quotaval stackvals[CGET_STACK_MAX]; 840 struct quotakey *keys_k; 841 struct quotaval *vals_k; 842 unsigned ret_k; 843 int error; 844 845 if (maxnum > 128) { 846 maxnum = 128; 847 } 848 849 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 850 if (error) { 851 return error; 852 } 853 854 if (maxnum <= CGET_STACK_MAX) { 855 keys_k = stackkeys; 856 vals_k = stackvals; 857 /* ensure any padding bytes are cleared */ 858 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 859 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 860 } else { 861 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 862 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 863 } 864 865 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 866 &ret_k); 867 if (error) { 868 goto fail; 869 } 870 871 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 872 if (error) { 873 goto fail; 874 } 875 876 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 877 if (error) { 878 goto fail; 879 } 880 881 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 882 if (error) { 883 goto fail; 884 } 885 886 /* do last to maximize the chance of being able to recover a failure */ 887 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 888 889 fail: 890 if (keys_k != stackkeys) { 891 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 892 } 893 if (vals_k != stackvals) { 894 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 895 } 896 return error; 897 } 898 899 static int 900 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 901 int *ret_u) 902 { 903 struct quotakcursor cursor_k; 904 int ret_k; 905 int error; 906 907 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 908 if (error) { 909 return error; 910 } 911 912 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 913 if (error) { 914 return error; 915 } 916 917 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 918 if (error) { 919 return error; 920 } 921 922 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 923 } 924 925 static int 926 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 927 { 928 struct quotakcursor cursor_k; 929 int error; 930 931 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 932 if (error) { 933 return error; 934 } 935 936 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 937 if (error) { 938 return error; 939 } 940 941 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 942 } 943 944 static int 945 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 946 { 947 char *path_k; 948 int error; 949 950 /* XXX this should probably be a struct pathbuf */ 951 path_k = PNBUF_GET(); 952 error = copyin(path_u, path_k, PATH_MAX); 953 if (error) { 954 PNBUF_PUT(path_k); 955 return error; 956 } 957 958 error = vfs_quotactl_quotaon(mp, idtype, path_k); 959 960 PNBUF_PUT(path_k); 961 return error; 962 } 963 964 static int 965 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 966 { 967 return vfs_quotactl_quotaoff(mp, idtype); 968 } 969 970 int 971 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 972 { 973 struct mount *mp; 974 struct vnode *vp; 975 int error; 976 977 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 978 if (error != 0) 979 return (error); 980 mp = vp->v_mount; 981 982 switch (args->qc_op) { 983 case QUOTACTL_STAT: 984 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 985 break; 986 case QUOTACTL_IDTYPESTAT: 987 error = do_sys_quotactl_idtypestat(mp, 988 args->u.idtypestat.qc_idtype, 989 args->u.idtypestat.qc_info); 990 break; 991 case QUOTACTL_OBJTYPESTAT: 992 error = do_sys_quotactl_objtypestat(mp, 993 args->u.objtypestat.qc_objtype, 994 args->u.objtypestat.qc_info); 995 break; 996 case QUOTACTL_GET: 997 error = do_sys_quotactl_get(mp, 998 args->u.get.qc_key, 999 args->u.get.qc_val); 1000 break; 1001 case QUOTACTL_PUT: 1002 error = do_sys_quotactl_put(mp, 1003 args->u.put.qc_key, 1004 args->u.put.qc_val); 1005 break; 1006 case QUOTACTL_DEL: 1007 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1008 break; 1009 case QUOTACTL_CURSOROPEN: 1010 error = do_sys_quotactl_cursoropen(mp, 1011 args->u.cursoropen.qc_cursor); 1012 break; 1013 case QUOTACTL_CURSORCLOSE: 1014 error = do_sys_quotactl_cursorclose(mp, 1015 args->u.cursorclose.qc_cursor); 1016 break; 1017 case QUOTACTL_CURSORSKIPIDTYPE: 1018 error = do_sys_quotactl_cursorskipidtype(mp, 1019 args->u.cursorskipidtype.qc_cursor, 1020 args->u.cursorskipidtype.qc_idtype); 1021 break; 1022 case QUOTACTL_CURSORGET: 1023 error = do_sys_quotactl_cursorget(mp, 1024 args->u.cursorget.qc_cursor, 1025 args->u.cursorget.qc_keys, 1026 args->u.cursorget.qc_vals, 1027 args->u.cursorget.qc_maxnum, 1028 args->u.cursorget.qc_ret); 1029 break; 1030 case QUOTACTL_CURSORATEND: 1031 error = do_sys_quotactl_cursoratend(mp, 1032 args->u.cursoratend.qc_cursor, 1033 args->u.cursoratend.qc_ret); 1034 break; 1035 case QUOTACTL_CURSORREWIND: 1036 error = do_sys_quotactl_cursorrewind(mp, 1037 args->u.cursorrewind.qc_cursor); 1038 break; 1039 case QUOTACTL_QUOTAON: 1040 error = do_sys_quotactl_quotaon(mp, 1041 args->u.quotaon.qc_idtype, 1042 args->u.quotaon.qc_quotafile); 1043 break; 1044 case QUOTACTL_QUOTAOFF: 1045 error = do_sys_quotactl_quotaoff(mp, 1046 args->u.quotaoff.qc_idtype); 1047 break; 1048 default: 1049 error = EINVAL; 1050 break; 1051 } 1052 1053 vrele(vp); 1054 return error; 1055 } 1056 1057 /* ARGSUSED */ 1058 int 1059 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1060 register_t *retval) 1061 { 1062 /* { 1063 syscallarg(const char *) path; 1064 syscallarg(struct quotactl_args *) args; 1065 } */ 1066 struct quotactl_args args; 1067 int error; 1068 1069 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1070 if (error) { 1071 return error; 1072 } 1073 1074 return do_sys_quotactl(SCARG(uap, path), &args); 1075 } 1076 1077 int 1078 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1079 int root) 1080 { 1081 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1082 int error = 0; 1083 1084 /* 1085 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1086 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1087 * overrides MNT_NOWAIT. 1088 */ 1089 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1090 (flags != MNT_WAIT && flags != 0)) { 1091 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1092 goto done; 1093 } 1094 1095 /* Get the filesystem stats now */ 1096 memset(sp, 0, sizeof(*sp)); 1097 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1098 return error; 1099 } 1100 1101 if (cwdi->cwdi_rdir == NULL) 1102 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1103 done: 1104 if (cwdi->cwdi_rdir != NULL) { 1105 size_t len; 1106 char *bp; 1107 char c; 1108 char *path = PNBUF_GET(); 1109 1110 bp = path + MAXPATHLEN; 1111 *--bp = '\0'; 1112 rw_enter(&cwdi->cwdi_lock, RW_READER); 1113 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1114 MAXPATHLEN / 2, 0, l); 1115 rw_exit(&cwdi->cwdi_lock); 1116 if (error) { 1117 PNBUF_PUT(path); 1118 return error; 1119 } 1120 len = strlen(bp); 1121 if (len != 1) { 1122 /* 1123 * for mount points that are below our root, we can see 1124 * them, so we fix up the pathname and return them. The 1125 * rest we cannot see, so we don't allow viewing the 1126 * data. 1127 */ 1128 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1129 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1130 (void)strlcpy(sp->f_mntonname, 1131 c == '\0' ? "/" : &sp->f_mntonname[len], 1132 sizeof(sp->f_mntonname)); 1133 } else { 1134 if (root) 1135 (void)strlcpy(sp->f_mntonname, "/", 1136 sizeof(sp->f_mntonname)); 1137 else 1138 error = EPERM; 1139 } 1140 } 1141 PNBUF_PUT(path); 1142 } 1143 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1144 return error; 1145 } 1146 1147 /* 1148 * Get filesystem statistics by path. 1149 */ 1150 int 1151 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1152 { 1153 struct mount *mp; 1154 int error; 1155 struct vnode *vp; 1156 1157 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1158 if (error != 0) 1159 return error; 1160 mp = vp->v_mount; 1161 error = dostatvfs(mp, sb, l, flags, 1); 1162 vrele(vp); 1163 return error; 1164 } 1165 1166 /* ARGSUSED */ 1167 int 1168 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1169 { 1170 /* { 1171 syscallarg(const char *) path; 1172 syscallarg(struct statvfs *) buf; 1173 syscallarg(int) flags; 1174 } */ 1175 struct statvfs *sb; 1176 int error; 1177 1178 sb = STATVFSBUF_GET(); 1179 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1180 if (error == 0) 1181 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1182 STATVFSBUF_PUT(sb); 1183 return error; 1184 } 1185 1186 /* 1187 * Get filesystem statistics by fd. 1188 */ 1189 int 1190 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1191 { 1192 file_t *fp; 1193 struct mount *mp; 1194 int error; 1195 1196 /* fd_getvnode() will use the descriptor for us */ 1197 if ((error = fd_getvnode(fd, &fp)) != 0) 1198 return (error); 1199 mp = ((struct vnode *)fp->f_data)->v_mount; 1200 error = dostatvfs(mp, sb, curlwp, flags, 1); 1201 fd_putfile(fd); 1202 return error; 1203 } 1204 1205 /* ARGSUSED */ 1206 int 1207 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1208 { 1209 /* { 1210 syscallarg(int) fd; 1211 syscallarg(struct statvfs *) buf; 1212 syscallarg(int) flags; 1213 } */ 1214 struct statvfs *sb; 1215 int error; 1216 1217 sb = STATVFSBUF_GET(); 1218 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1219 if (error == 0) 1220 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1221 STATVFSBUF_PUT(sb); 1222 return error; 1223 } 1224 1225 1226 /* 1227 * Get statistics on all filesystems. 1228 */ 1229 int 1230 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1231 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1232 register_t *retval) 1233 { 1234 int root = 0; 1235 struct proc *p = l->l_proc; 1236 struct mount *mp, *nmp; 1237 struct statvfs *sb; 1238 size_t count, maxcount; 1239 int error = 0; 1240 1241 sb = STATVFSBUF_GET(); 1242 maxcount = bufsize / entry_sz; 1243 mutex_enter(&mountlist_lock); 1244 count = 0; 1245 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 1246 if (vfs_busy(mp, &nmp)) { 1247 continue; 1248 } 1249 if (sfsp && count < maxcount) { 1250 error = dostatvfs(mp, sb, l, flags, 0); 1251 if (error) { 1252 vfs_unbusy(mp, false, &nmp); 1253 error = 0; 1254 continue; 1255 } 1256 error = copyfn(sb, sfsp, entry_sz); 1257 if (error) { 1258 vfs_unbusy(mp, false, NULL); 1259 goto out; 1260 } 1261 sfsp = (char *)sfsp + entry_sz; 1262 root |= strcmp(sb->f_mntonname, "/") == 0; 1263 } 1264 count++; 1265 vfs_unbusy(mp, false, &nmp); 1266 } 1267 mutex_exit(&mountlist_lock); 1268 1269 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1270 /* 1271 * fake a root entry 1272 */ 1273 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1274 sb, l, flags, 1); 1275 if (error != 0) 1276 goto out; 1277 if (sfsp) { 1278 error = copyfn(sb, sfsp, entry_sz); 1279 if (error != 0) 1280 goto out; 1281 } 1282 count++; 1283 } 1284 if (sfsp && count > maxcount) 1285 *retval = maxcount; 1286 else 1287 *retval = count; 1288 out: 1289 STATVFSBUF_PUT(sb); 1290 return error; 1291 } 1292 1293 int 1294 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1295 { 1296 /* { 1297 syscallarg(struct statvfs *) buf; 1298 syscallarg(size_t) bufsize; 1299 syscallarg(int) flags; 1300 } */ 1301 1302 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1303 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1304 } 1305 1306 /* 1307 * Change current working directory to a given file descriptor. 1308 */ 1309 /* ARGSUSED */ 1310 int 1311 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1312 { 1313 /* { 1314 syscallarg(int) fd; 1315 } */ 1316 struct proc *p = l->l_proc; 1317 struct cwdinfo *cwdi; 1318 struct vnode *vp, *tdp; 1319 struct mount *mp; 1320 file_t *fp; 1321 int error, fd; 1322 1323 /* fd_getvnode() will use the descriptor for us */ 1324 fd = SCARG(uap, fd); 1325 if ((error = fd_getvnode(fd, &fp)) != 0) 1326 return (error); 1327 vp = fp->f_data; 1328 1329 vref(vp); 1330 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1331 if (vp->v_type != VDIR) 1332 error = ENOTDIR; 1333 else 1334 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1335 if (error) { 1336 vput(vp); 1337 goto out; 1338 } 1339 while ((mp = vp->v_mountedhere) != NULL) { 1340 error = vfs_busy(mp, NULL); 1341 vput(vp); 1342 if (error != 0) 1343 goto out; 1344 error = VFS_ROOT(mp, &tdp); 1345 vfs_unbusy(mp, false, NULL); 1346 if (error) 1347 goto out; 1348 vp = tdp; 1349 } 1350 VOP_UNLOCK(vp); 1351 1352 /* 1353 * Disallow changing to a directory not under the process's 1354 * current root directory (if there is one). 1355 */ 1356 cwdi = p->p_cwdi; 1357 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1358 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1359 vrele(vp); 1360 error = EPERM; /* operation not permitted */ 1361 } else { 1362 vrele(cwdi->cwdi_cdir); 1363 cwdi->cwdi_cdir = vp; 1364 } 1365 rw_exit(&cwdi->cwdi_lock); 1366 1367 out: 1368 fd_putfile(fd); 1369 return (error); 1370 } 1371 1372 /* 1373 * Change this process's notion of the root directory to a given file 1374 * descriptor. 1375 */ 1376 int 1377 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1378 { 1379 struct proc *p = l->l_proc; 1380 struct vnode *vp; 1381 file_t *fp; 1382 int error, fd = SCARG(uap, fd); 1383 1384 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1385 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1386 return error; 1387 /* fd_getvnode() will use the descriptor for us */ 1388 if ((error = fd_getvnode(fd, &fp)) != 0) 1389 return error; 1390 vp = fp->f_data; 1391 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1392 if (vp->v_type != VDIR) 1393 error = ENOTDIR; 1394 else 1395 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1396 VOP_UNLOCK(vp); 1397 if (error) 1398 goto out; 1399 vref(vp); 1400 1401 change_root(p->p_cwdi, vp, l); 1402 1403 out: 1404 fd_putfile(fd); 1405 return (error); 1406 } 1407 1408 /* 1409 * Change current working directory (``.''). 1410 */ 1411 /* ARGSUSED */ 1412 int 1413 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1414 { 1415 /* { 1416 syscallarg(const char *) path; 1417 } */ 1418 struct proc *p = l->l_proc; 1419 struct cwdinfo *cwdi; 1420 int error; 1421 struct vnode *vp; 1422 1423 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1424 &vp, l)) != 0) 1425 return (error); 1426 cwdi = p->p_cwdi; 1427 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1428 vrele(cwdi->cwdi_cdir); 1429 cwdi->cwdi_cdir = vp; 1430 rw_exit(&cwdi->cwdi_lock); 1431 return (0); 1432 } 1433 1434 /* 1435 * Change notion of root (``/'') directory. 1436 */ 1437 /* ARGSUSED */ 1438 int 1439 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1440 { 1441 /* { 1442 syscallarg(const char *) path; 1443 } */ 1444 struct proc *p = l->l_proc; 1445 int error; 1446 struct vnode *vp; 1447 1448 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1449 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1450 return (error); 1451 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1452 &vp, l)) != 0) 1453 return (error); 1454 1455 change_root(p->p_cwdi, vp, l); 1456 1457 return (0); 1458 } 1459 1460 /* 1461 * Common routine for chroot and fchroot. 1462 * NB: callers need to properly authorize the change root operation. 1463 */ 1464 void 1465 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1466 { 1467 struct proc *p = l->l_proc; 1468 kauth_cred_t ncred; 1469 1470 ncred = kauth_cred_alloc(); 1471 1472 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1473 if (cwdi->cwdi_rdir != NULL) 1474 vrele(cwdi->cwdi_rdir); 1475 cwdi->cwdi_rdir = vp; 1476 1477 /* 1478 * Prevent escaping from chroot by putting the root under 1479 * the working directory. Silently chdir to / if we aren't 1480 * already there. 1481 */ 1482 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1483 /* 1484 * XXX would be more failsafe to change directory to a 1485 * deadfs node here instead 1486 */ 1487 vrele(cwdi->cwdi_cdir); 1488 vref(vp); 1489 cwdi->cwdi_cdir = vp; 1490 } 1491 rw_exit(&cwdi->cwdi_lock); 1492 1493 /* Get a write lock on the process credential. */ 1494 proc_crmod_enter(); 1495 1496 kauth_cred_clone(p->p_cred, ncred); 1497 kauth_proc_chroot(ncred, p->p_cwdi); 1498 1499 /* Broadcast our credentials to the process and other LWPs. */ 1500 proc_crmod_leave(ncred, p->p_cred, true); 1501 } 1502 1503 /* 1504 * Common routine for chroot and chdir. 1505 * XXX "where" should be enum uio_seg 1506 */ 1507 int 1508 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1509 { 1510 struct pathbuf *pb; 1511 struct nameidata nd; 1512 int error; 1513 1514 error = pathbuf_maybe_copyin(path, where, &pb); 1515 if (error) { 1516 return error; 1517 } 1518 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1519 if ((error = namei(&nd)) != 0) { 1520 pathbuf_destroy(pb); 1521 return error; 1522 } 1523 *vpp = nd.ni_vp; 1524 pathbuf_destroy(pb); 1525 1526 if ((*vpp)->v_type != VDIR) 1527 error = ENOTDIR; 1528 else 1529 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1530 1531 if (error) 1532 vput(*vpp); 1533 else 1534 VOP_UNLOCK(*vpp); 1535 return (error); 1536 } 1537 1538 /* 1539 * Internals of sys_open - path has already been converted into a pathbuf 1540 * (so we can easily reuse this function from other parts of the kernel, 1541 * like posix_spawn post-processing). 1542 */ 1543 int 1544 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1545 int open_mode, int *fd) 1546 { 1547 struct proc *p = l->l_proc; 1548 struct cwdinfo *cwdi = p->p_cwdi; 1549 file_t *fp; 1550 struct vnode *vp; 1551 int flags, cmode; 1552 int indx, error; 1553 struct nameidata nd; 1554 1555 if (open_flags & O_SEARCH) { 1556 open_flags &= ~(int)O_SEARCH; 1557 } 1558 1559 flags = FFLAGS(open_flags); 1560 if ((flags & (FREAD | FWRITE)) == 0) 1561 return EINVAL; 1562 1563 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1564 return error; 1565 } 1566 1567 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1568 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1569 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1570 if (dvp != NULL) 1571 NDAT(&nd, dvp); 1572 1573 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1574 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1575 fd_abort(p, fp, indx); 1576 if ((error == EDUPFD || error == EMOVEFD) && 1577 l->l_dupfd >= 0 && /* XXX from fdopen */ 1578 (error = 1579 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1580 *fd = indx; 1581 return 0; 1582 } 1583 if (error == ERESTART) 1584 error = EINTR; 1585 return error; 1586 } 1587 1588 l->l_dupfd = 0; 1589 vp = nd.ni_vp; 1590 1591 if ((error = open_setfp(l, fp, vp, indx, flags))) 1592 return error; 1593 1594 VOP_UNLOCK(vp); 1595 *fd = indx; 1596 fd_affix(p, fp, indx); 1597 return 0; 1598 } 1599 1600 int 1601 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1602 { 1603 struct pathbuf *pb; 1604 int error, oflags; 1605 1606 oflags = FFLAGS(open_flags); 1607 if ((oflags & (FREAD | FWRITE)) == 0) 1608 return EINVAL; 1609 1610 pb = pathbuf_create(path); 1611 if (pb == NULL) 1612 return ENOMEM; 1613 1614 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1615 pathbuf_destroy(pb); 1616 1617 return error; 1618 } 1619 1620 /* 1621 * Check permissions, allocate an open file structure, 1622 * and call the device open routine if any. 1623 */ 1624 static int 1625 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1626 int mode, int *fd) 1627 { 1628 file_t *dfp = NULL; 1629 struct vnode *dvp = NULL; 1630 struct pathbuf *pb; 1631 int error; 1632 1633 #ifdef COMPAT_10 /* XXX: and perhaps later */ 1634 if (path == NULL) { 1635 pb = pathbuf_create("."); 1636 if (pb == NULL) 1637 return ENOMEM; 1638 } else 1639 #endif 1640 { 1641 error = pathbuf_copyin(path, &pb); 1642 if (error) 1643 return error; 1644 } 1645 1646 if (fdat != AT_FDCWD) { 1647 /* fd_getvnode() will use the descriptor for us */ 1648 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1649 goto out; 1650 1651 dvp = dfp->f_data; 1652 } 1653 1654 error = do_open(l, dvp, pb, flags, mode, fd); 1655 1656 if (dfp != NULL) 1657 fd_putfile(fdat); 1658 out: 1659 pathbuf_destroy(pb); 1660 return error; 1661 } 1662 1663 int 1664 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1665 { 1666 /* { 1667 syscallarg(const char *) path; 1668 syscallarg(int) flags; 1669 syscallarg(int) mode; 1670 } */ 1671 int error; 1672 int fd; 1673 1674 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1675 SCARG(uap, flags), SCARG(uap, mode), &fd); 1676 1677 if (error == 0) 1678 *retval = fd; 1679 1680 return error; 1681 } 1682 1683 int 1684 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1685 { 1686 /* { 1687 syscallarg(int) fd; 1688 syscallarg(const char *) path; 1689 syscallarg(int) oflags; 1690 syscallarg(int) mode; 1691 } */ 1692 int error; 1693 int fd; 1694 1695 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1696 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1697 1698 if (error == 0) 1699 *retval = fd; 1700 1701 return error; 1702 } 1703 1704 static void 1705 vfs__fhfree(fhandle_t *fhp) 1706 { 1707 size_t fhsize; 1708 1709 fhsize = FHANDLE_SIZE(fhp); 1710 kmem_free(fhp, fhsize); 1711 } 1712 1713 /* 1714 * vfs_composefh: compose a filehandle. 1715 */ 1716 1717 int 1718 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1719 { 1720 struct mount *mp; 1721 struct fid *fidp; 1722 int error; 1723 size_t needfhsize; 1724 size_t fidsize; 1725 1726 mp = vp->v_mount; 1727 fidp = NULL; 1728 if (*fh_size < FHANDLE_SIZE_MIN) { 1729 fidsize = 0; 1730 } else { 1731 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1732 if (fhp != NULL) { 1733 memset(fhp, 0, *fh_size); 1734 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1735 fidp = &fhp->fh_fid; 1736 } 1737 } 1738 error = VFS_VPTOFH(vp, fidp, &fidsize); 1739 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1740 if (error == 0 && *fh_size < needfhsize) { 1741 error = E2BIG; 1742 } 1743 *fh_size = needfhsize; 1744 return error; 1745 } 1746 1747 int 1748 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1749 { 1750 struct mount *mp; 1751 fhandle_t *fhp; 1752 size_t fhsize; 1753 size_t fidsize; 1754 int error; 1755 1756 mp = vp->v_mount; 1757 fidsize = 0; 1758 error = VFS_VPTOFH(vp, NULL, &fidsize); 1759 KASSERT(error != 0); 1760 if (error != E2BIG) { 1761 goto out; 1762 } 1763 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1764 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1765 if (fhp == NULL) { 1766 error = ENOMEM; 1767 goto out; 1768 } 1769 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1770 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1771 if (error == 0) { 1772 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1773 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1774 *fhpp = fhp; 1775 } else { 1776 kmem_free(fhp, fhsize); 1777 } 1778 out: 1779 return error; 1780 } 1781 1782 void 1783 vfs_composefh_free(fhandle_t *fhp) 1784 { 1785 1786 vfs__fhfree(fhp); 1787 } 1788 1789 /* 1790 * vfs_fhtovp: lookup a vnode by a filehandle. 1791 */ 1792 1793 int 1794 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1795 { 1796 struct mount *mp; 1797 int error; 1798 1799 *vpp = NULL; 1800 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1801 if (mp == NULL) { 1802 error = ESTALE; 1803 goto out; 1804 } 1805 if (mp->mnt_op->vfs_fhtovp == NULL) { 1806 error = EOPNOTSUPP; 1807 goto out; 1808 } 1809 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1810 out: 1811 return error; 1812 } 1813 1814 /* 1815 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1816 * the needed size. 1817 */ 1818 1819 int 1820 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1821 { 1822 fhandle_t *fhp; 1823 int error; 1824 1825 if (fhsize > FHANDLE_SIZE_MAX) { 1826 return EINVAL; 1827 } 1828 if (fhsize < FHANDLE_SIZE_MIN) { 1829 return EINVAL; 1830 } 1831 again: 1832 fhp = kmem_alloc(fhsize, KM_SLEEP); 1833 if (fhp == NULL) { 1834 return ENOMEM; 1835 } 1836 error = copyin(ufhp, fhp, fhsize); 1837 if (error == 0) { 1838 /* XXX this check shouldn't be here */ 1839 if (FHANDLE_SIZE(fhp) == fhsize) { 1840 *fhpp = fhp; 1841 return 0; 1842 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1843 /* 1844 * a kludge for nfsv2 padded handles. 1845 */ 1846 size_t sz; 1847 1848 sz = FHANDLE_SIZE(fhp); 1849 kmem_free(fhp, fhsize); 1850 fhsize = sz; 1851 goto again; 1852 } else { 1853 /* 1854 * userland told us wrong size. 1855 */ 1856 error = EINVAL; 1857 } 1858 } 1859 kmem_free(fhp, fhsize); 1860 return error; 1861 } 1862 1863 void 1864 vfs_copyinfh_free(fhandle_t *fhp) 1865 { 1866 1867 vfs__fhfree(fhp); 1868 } 1869 1870 /* 1871 * Get file handle system call 1872 */ 1873 int 1874 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1875 { 1876 /* { 1877 syscallarg(char *) fname; 1878 syscallarg(fhandle_t *) fhp; 1879 syscallarg(size_t *) fh_size; 1880 } */ 1881 struct vnode *vp; 1882 fhandle_t *fh; 1883 int error; 1884 struct pathbuf *pb; 1885 struct nameidata nd; 1886 size_t sz; 1887 size_t usz; 1888 1889 /* 1890 * Must be super user 1891 */ 1892 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1893 0, NULL, NULL, NULL); 1894 if (error) 1895 return (error); 1896 1897 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1898 if (error) { 1899 return error; 1900 } 1901 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1902 error = namei(&nd); 1903 if (error) { 1904 pathbuf_destroy(pb); 1905 return error; 1906 } 1907 vp = nd.ni_vp; 1908 pathbuf_destroy(pb); 1909 1910 error = vfs_composefh_alloc(vp, &fh); 1911 vput(vp); 1912 if (error != 0) { 1913 return error; 1914 } 1915 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1916 if (error != 0) { 1917 goto out; 1918 } 1919 sz = FHANDLE_SIZE(fh); 1920 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1921 if (error != 0) { 1922 goto out; 1923 } 1924 if (usz >= sz) { 1925 error = copyout(fh, SCARG(uap, fhp), sz); 1926 } else { 1927 error = E2BIG; 1928 } 1929 out: 1930 vfs_composefh_free(fh); 1931 return (error); 1932 } 1933 1934 /* 1935 * Open a file given a file handle. 1936 * 1937 * Check permissions, allocate an open file structure, 1938 * and call the device open routine if any. 1939 */ 1940 1941 int 1942 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1943 register_t *retval) 1944 { 1945 file_t *fp; 1946 struct vnode *vp = NULL; 1947 kauth_cred_t cred = l->l_cred; 1948 file_t *nfp; 1949 int indx, error = 0; 1950 struct vattr va; 1951 fhandle_t *fh; 1952 int flags; 1953 proc_t *p; 1954 1955 p = curproc; 1956 1957 /* 1958 * Must be super user 1959 */ 1960 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1961 0, NULL, NULL, NULL))) 1962 return (error); 1963 1964 if (oflags & O_SEARCH) { 1965 oflags &= ~(int)O_SEARCH; 1966 } 1967 1968 flags = FFLAGS(oflags); 1969 if ((flags & (FREAD | FWRITE)) == 0) 1970 return (EINVAL); 1971 if ((flags & O_CREAT)) 1972 return (EINVAL); 1973 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1974 return (error); 1975 fp = nfp; 1976 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1977 if (error != 0) { 1978 goto bad; 1979 } 1980 error = vfs_fhtovp(fh, &vp); 1981 vfs_copyinfh_free(fh); 1982 if (error != 0) { 1983 goto bad; 1984 } 1985 1986 /* Now do an effective vn_open */ 1987 1988 if (vp->v_type == VSOCK) { 1989 error = EOPNOTSUPP; 1990 goto bad; 1991 } 1992 error = vn_openchk(vp, cred, flags); 1993 if (error != 0) 1994 goto bad; 1995 if (flags & O_TRUNC) { 1996 VOP_UNLOCK(vp); /* XXX */ 1997 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1998 vattr_null(&va); 1999 va.va_size = 0; 2000 error = VOP_SETATTR(vp, &va, cred); 2001 if (error) 2002 goto bad; 2003 } 2004 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2005 goto bad; 2006 if (flags & FWRITE) { 2007 mutex_enter(vp->v_interlock); 2008 vp->v_writecount++; 2009 mutex_exit(vp->v_interlock); 2010 } 2011 2012 /* done with modified vn_open, now finish what sys_open does. */ 2013 if ((error = open_setfp(l, fp, vp, indx, flags))) 2014 return error; 2015 2016 VOP_UNLOCK(vp); 2017 *retval = indx; 2018 fd_affix(p, fp, indx); 2019 return (0); 2020 2021 bad: 2022 fd_abort(p, fp, indx); 2023 if (vp != NULL) 2024 vput(vp); 2025 return (error); 2026 } 2027 2028 int 2029 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2030 { 2031 /* { 2032 syscallarg(const void *) fhp; 2033 syscallarg(size_t) fh_size; 2034 syscallarg(int) flags; 2035 } */ 2036 2037 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2038 SCARG(uap, flags), retval); 2039 } 2040 2041 int 2042 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2043 { 2044 int error; 2045 fhandle_t *fh; 2046 struct vnode *vp; 2047 2048 /* 2049 * Must be super user 2050 */ 2051 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2052 0, NULL, NULL, NULL))) 2053 return (error); 2054 2055 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2056 if (error != 0) 2057 return error; 2058 2059 error = vfs_fhtovp(fh, &vp); 2060 vfs_copyinfh_free(fh); 2061 if (error != 0) 2062 return error; 2063 2064 error = vn_stat(vp, sb); 2065 vput(vp); 2066 return error; 2067 } 2068 2069 2070 /* ARGSUSED */ 2071 int 2072 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2073 { 2074 /* { 2075 syscallarg(const void *) fhp; 2076 syscallarg(size_t) fh_size; 2077 syscallarg(struct stat *) sb; 2078 } */ 2079 struct stat sb; 2080 int error; 2081 2082 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2083 if (error) 2084 return error; 2085 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2086 } 2087 2088 int 2089 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2090 int flags) 2091 { 2092 fhandle_t *fh; 2093 struct mount *mp; 2094 struct vnode *vp; 2095 int error; 2096 2097 /* 2098 * Must be super user 2099 */ 2100 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2101 0, NULL, NULL, NULL))) 2102 return error; 2103 2104 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2105 if (error != 0) 2106 return error; 2107 2108 error = vfs_fhtovp(fh, &vp); 2109 vfs_copyinfh_free(fh); 2110 if (error != 0) 2111 return error; 2112 2113 mp = vp->v_mount; 2114 error = dostatvfs(mp, sb, l, flags, 1); 2115 vput(vp); 2116 return error; 2117 } 2118 2119 /* ARGSUSED */ 2120 int 2121 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2122 { 2123 /* { 2124 syscallarg(const void *) fhp; 2125 syscallarg(size_t) fh_size; 2126 syscallarg(struct statvfs *) buf; 2127 syscallarg(int) flags; 2128 } */ 2129 struct statvfs *sb = STATVFSBUF_GET(); 2130 int error; 2131 2132 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2133 SCARG(uap, flags)); 2134 if (error == 0) 2135 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2136 STATVFSBUF_PUT(sb); 2137 return error; 2138 } 2139 2140 /* 2141 * Create a special file. 2142 */ 2143 /* ARGSUSED */ 2144 int 2145 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2146 register_t *retval) 2147 { 2148 /* { 2149 syscallarg(const char *) path; 2150 syscallarg(mode_t) mode; 2151 syscallarg(dev_t) dev; 2152 } */ 2153 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 2154 SCARG(uap, dev), retval, UIO_USERSPACE); 2155 } 2156 2157 int 2158 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2159 register_t *retval) 2160 { 2161 /* { 2162 syscallarg(int) fd; 2163 syscallarg(const char *) path; 2164 syscallarg(mode_t) mode; 2165 syscallarg(int) pad; 2166 syscallarg(dev_t) dev; 2167 } */ 2168 2169 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2170 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE); 2171 } 2172 2173 int 2174 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2175 register_t *retval, enum uio_seg seg) 2176 { 2177 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg); 2178 } 2179 2180 int 2181 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2182 dev_t dev, register_t *retval, enum uio_seg seg) 2183 { 2184 struct proc *p = l->l_proc; 2185 struct vnode *vp; 2186 struct vattr vattr; 2187 int error, optype; 2188 struct pathbuf *pb; 2189 struct nameidata nd; 2190 const char *pathstring; 2191 2192 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2193 0, NULL, NULL, NULL)) != 0) 2194 return (error); 2195 2196 optype = VOP_MKNOD_DESCOFFSET; 2197 2198 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2199 if (error) { 2200 return error; 2201 } 2202 pathstring = pathbuf_stringcopy_get(pb); 2203 if (pathstring == NULL) { 2204 pathbuf_destroy(pb); 2205 return ENOMEM; 2206 } 2207 2208 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2209 2210 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2211 goto out; 2212 vp = nd.ni_vp; 2213 2214 if (vp != NULL) 2215 error = EEXIST; 2216 else { 2217 vattr_null(&vattr); 2218 /* We will read cwdi->cwdi_cmask unlocked. */ 2219 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2220 vattr.va_rdev = dev; 2221 2222 switch (mode & S_IFMT) { 2223 case S_IFMT: /* used by badsect to flag bad sectors */ 2224 vattr.va_type = VBAD; 2225 break; 2226 case S_IFCHR: 2227 vattr.va_type = VCHR; 2228 break; 2229 case S_IFBLK: 2230 vattr.va_type = VBLK; 2231 break; 2232 case S_IFWHT: 2233 optype = VOP_WHITEOUT_DESCOFFSET; 2234 break; 2235 case S_IFREG: 2236 #if NVERIEXEC > 0 2237 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2238 O_CREAT); 2239 #endif /* NVERIEXEC > 0 */ 2240 vattr.va_type = VREG; 2241 vattr.va_rdev = VNOVAL; 2242 optype = VOP_CREATE_DESCOFFSET; 2243 break; 2244 default: 2245 error = EINVAL; 2246 break; 2247 } 2248 } 2249 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2250 && vattr.va_rdev == VNOVAL) 2251 error = EINVAL; 2252 if (!error) { 2253 switch (optype) { 2254 case VOP_WHITEOUT_DESCOFFSET: 2255 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2256 if (error) 2257 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2258 vput(nd.ni_dvp); 2259 break; 2260 2261 case VOP_MKNOD_DESCOFFSET: 2262 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2263 &nd.ni_cnd, &vattr); 2264 if (error == 0) 2265 vrele(nd.ni_vp); 2266 vput(nd.ni_dvp); 2267 break; 2268 2269 case VOP_CREATE_DESCOFFSET: 2270 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2271 &nd.ni_cnd, &vattr); 2272 if (error == 0) 2273 vrele(nd.ni_vp); 2274 vput(nd.ni_dvp); 2275 break; 2276 } 2277 } else { 2278 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2279 if (nd.ni_dvp == vp) 2280 vrele(nd.ni_dvp); 2281 else 2282 vput(nd.ni_dvp); 2283 if (vp) 2284 vrele(vp); 2285 } 2286 out: 2287 pathbuf_stringcopy_put(pb, pathstring); 2288 pathbuf_destroy(pb); 2289 return (error); 2290 } 2291 2292 /* 2293 * Create a named pipe. 2294 */ 2295 /* ARGSUSED */ 2296 int 2297 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2298 { 2299 /* { 2300 syscallarg(const char *) path; 2301 syscallarg(int) mode; 2302 } */ 2303 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2304 } 2305 2306 int 2307 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2308 register_t *retval) 2309 { 2310 /* { 2311 syscallarg(int) fd; 2312 syscallarg(const char *) path; 2313 syscallarg(int) mode; 2314 } */ 2315 2316 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2317 SCARG(uap, mode)); 2318 } 2319 2320 static int 2321 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2322 { 2323 struct proc *p = l->l_proc; 2324 struct vattr vattr; 2325 int error; 2326 struct pathbuf *pb; 2327 struct nameidata nd; 2328 2329 error = pathbuf_copyin(path, &pb); 2330 if (error) { 2331 return error; 2332 } 2333 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2334 2335 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2336 pathbuf_destroy(pb); 2337 return error; 2338 } 2339 if (nd.ni_vp != NULL) { 2340 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2341 if (nd.ni_dvp == nd.ni_vp) 2342 vrele(nd.ni_dvp); 2343 else 2344 vput(nd.ni_dvp); 2345 vrele(nd.ni_vp); 2346 pathbuf_destroy(pb); 2347 return (EEXIST); 2348 } 2349 vattr_null(&vattr); 2350 vattr.va_type = VFIFO; 2351 /* We will read cwdi->cwdi_cmask unlocked. */ 2352 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2353 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2354 if (error == 0) 2355 vrele(nd.ni_vp); 2356 vput(nd.ni_dvp); 2357 pathbuf_destroy(pb); 2358 return (error); 2359 } 2360 2361 /* 2362 * Make a hard file link. 2363 */ 2364 /* ARGSUSED */ 2365 int 2366 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2367 const char *link, int follow, register_t *retval) 2368 { 2369 struct vnode *vp; 2370 struct pathbuf *linkpb; 2371 struct nameidata nd; 2372 namei_simple_flags_t ns_flags; 2373 int error; 2374 2375 if (follow & AT_SYMLINK_FOLLOW) 2376 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2377 else 2378 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2379 2380 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2381 if (error != 0) 2382 return (error); 2383 error = pathbuf_copyin(link, &linkpb); 2384 if (error) { 2385 goto out1; 2386 } 2387 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2388 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2389 goto out2; 2390 if (nd.ni_vp) { 2391 error = EEXIST; 2392 goto abortop; 2393 } 2394 /* Prevent hard links on directories. */ 2395 if (vp->v_type == VDIR) { 2396 error = EPERM; 2397 goto abortop; 2398 } 2399 /* Prevent cross-mount operation. */ 2400 if (nd.ni_dvp->v_mount != vp->v_mount) { 2401 error = EXDEV; 2402 goto abortop; 2403 } 2404 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2405 out2: 2406 pathbuf_destroy(linkpb); 2407 out1: 2408 vrele(vp); 2409 return (error); 2410 abortop: 2411 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2412 if (nd.ni_dvp == nd.ni_vp) 2413 vrele(nd.ni_dvp); 2414 else 2415 vput(nd.ni_dvp); 2416 if (nd.ni_vp != NULL) 2417 vrele(nd.ni_vp); 2418 goto out2; 2419 } 2420 2421 int 2422 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2423 { 2424 /* { 2425 syscallarg(const char *) path; 2426 syscallarg(const char *) link; 2427 } */ 2428 const char *path = SCARG(uap, path); 2429 const char *link = SCARG(uap, link); 2430 2431 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2432 AT_SYMLINK_FOLLOW, retval); 2433 } 2434 2435 int 2436 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2437 register_t *retval) 2438 { 2439 /* { 2440 syscallarg(int) fd1; 2441 syscallarg(const char *) name1; 2442 syscallarg(int) fd2; 2443 syscallarg(const char *) name2; 2444 syscallarg(int) flags; 2445 } */ 2446 int fd1 = SCARG(uap, fd1); 2447 const char *name1 = SCARG(uap, name1); 2448 int fd2 = SCARG(uap, fd2); 2449 const char *name2 = SCARG(uap, name2); 2450 int follow; 2451 2452 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2453 2454 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2455 } 2456 2457 2458 int 2459 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2460 { 2461 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2462 } 2463 2464 static int 2465 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2466 const char *link, enum uio_seg seg) 2467 { 2468 struct proc *p = curproc; 2469 struct vattr vattr; 2470 char *path; 2471 int error; 2472 struct pathbuf *linkpb; 2473 struct nameidata nd; 2474 2475 KASSERT(l != NULL || fdat == AT_FDCWD); 2476 2477 path = PNBUF_GET(); 2478 if (seg == UIO_USERSPACE) { 2479 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2480 goto out1; 2481 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2482 goto out1; 2483 } else { 2484 KASSERT(strlen(patharg) < MAXPATHLEN); 2485 strcpy(path, patharg); 2486 linkpb = pathbuf_create(link); 2487 if (linkpb == NULL) { 2488 error = ENOMEM; 2489 goto out1; 2490 } 2491 } 2492 ktrkuser("symlink-target", path, strlen(path)); 2493 2494 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2495 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2496 goto out2; 2497 if (nd.ni_vp) { 2498 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2499 if (nd.ni_dvp == nd.ni_vp) 2500 vrele(nd.ni_dvp); 2501 else 2502 vput(nd.ni_dvp); 2503 vrele(nd.ni_vp); 2504 error = EEXIST; 2505 goto out2; 2506 } 2507 vattr_null(&vattr); 2508 vattr.va_type = VLNK; 2509 /* We will read cwdi->cwdi_cmask unlocked. */ 2510 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2511 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2512 if (error == 0) 2513 vrele(nd.ni_vp); 2514 vput(nd.ni_dvp); 2515 out2: 2516 pathbuf_destroy(linkpb); 2517 out1: 2518 PNBUF_PUT(path); 2519 return (error); 2520 } 2521 2522 /* 2523 * Make a symbolic link. 2524 */ 2525 /* ARGSUSED */ 2526 int 2527 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2528 { 2529 /* { 2530 syscallarg(const char *) path; 2531 syscallarg(const char *) link; 2532 } */ 2533 2534 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2535 UIO_USERSPACE); 2536 } 2537 2538 int 2539 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2540 register_t *retval) 2541 { 2542 /* { 2543 syscallarg(const char *) path1; 2544 syscallarg(int) fd; 2545 syscallarg(const char *) path2; 2546 } */ 2547 2548 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2549 SCARG(uap, path2), UIO_USERSPACE); 2550 } 2551 2552 /* 2553 * Delete a whiteout from the filesystem. 2554 */ 2555 /* ARGSUSED */ 2556 int 2557 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2558 { 2559 /* { 2560 syscallarg(const char *) path; 2561 } */ 2562 int error; 2563 struct pathbuf *pb; 2564 struct nameidata nd; 2565 2566 error = pathbuf_copyin(SCARG(uap, path), &pb); 2567 if (error) { 2568 return error; 2569 } 2570 2571 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2572 error = namei(&nd); 2573 if (error) { 2574 pathbuf_destroy(pb); 2575 return (error); 2576 } 2577 2578 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2579 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2580 if (nd.ni_dvp == nd.ni_vp) 2581 vrele(nd.ni_dvp); 2582 else 2583 vput(nd.ni_dvp); 2584 if (nd.ni_vp) 2585 vrele(nd.ni_vp); 2586 pathbuf_destroy(pb); 2587 return (EEXIST); 2588 } 2589 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2590 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2591 vput(nd.ni_dvp); 2592 pathbuf_destroy(pb); 2593 return (error); 2594 } 2595 2596 /* 2597 * Delete a name from the filesystem. 2598 */ 2599 /* ARGSUSED */ 2600 int 2601 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2602 { 2603 /* { 2604 syscallarg(const char *) path; 2605 } */ 2606 2607 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2608 } 2609 2610 int 2611 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2612 register_t *retval) 2613 { 2614 /* { 2615 syscallarg(int) fd; 2616 syscallarg(const char *) path; 2617 syscallarg(int) flag; 2618 } */ 2619 2620 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2621 SCARG(uap, flag), UIO_USERSPACE); 2622 } 2623 2624 int 2625 do_sys_unlink(const char *arg, enum uio_seg seg) 2626 { 2627 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2628 } 2629 2630 static int 2631 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2632 enum uio_seg seg) 2633 { 2634 struct vnode *vp; 2635 int error; 2636 struct pathbuf *pb; 2637 struct nameidata nd; 2638 const char *pathstring; 2639 2640 KASSERT(l != NULL || fdat == AT_FDCWD); 2641 2642 error = pathbuf_maybe_copyin(arg, seg, &pb); 2643 if (error) { 2644 return error; 2645 } 2646 pathstring = pathbuf_stringcopy_get(pb); 2647 if (pathstring == NULL) { 2648 pathbuf_destroy(pb); 2649 return ENOMEM; 2650 } 2651 2652 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2653 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2654 goto out; 2655 vp = nd.ni_vp; 2656 2657 /* 2658 * The root of a mounted filesystem cannot be deleted. 2659 */ 2660 if ((vp->v_vflag & VV_ROOT) != 0) { 2661 error = EBUSY; 2662 goto abort; 2663 } 2664 2665 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2666 error = EBUSY; 2667 goto abort; 2668 } 2669 2670 /* 2671 * No rmdir "." please. 2672 */ 2673 if (nd.ni_dvp == vp) { 2674 error = EINVAL; 2675 goto abort; 2676 } 2677 2678 /* 2679 * AT_REMOVEDIR is required to remove a directory 2680 */ 2681 if (vp->v_type == VDIR) { 2682 if (!(flags & AT_REMOVEDIR)) { 2683 error = EPERM; 2684 goto abort; 2685 } else { 2686 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2687 goto out; 2688 } 2689 } 2690 2691 /* 2692 * Starting here we only deal with non directories. 2693 */ 2694 if (flags & AT_REMOVEDIR) { 2695 error = ENOTDIR; 2696 goto abort; 2697 } 2698 2699 #if NVERIEXEC > 0 2700 /* Handle remove requests for veriexec entries. */ 2701 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2702 goto abort; 2703 } 2704 #endif /* NVERIEXEC > 0 */ 2705 2706 #ifdef FILEASSOC 2707 (void)fileassoc_file_delete(vp); 2708 #endif /* FILEASSOC */ 2709 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2710 goto out; 2711 2712 abort: 2713 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2714 if (nd.ni_dvp == vp) 2715 vrele(nd.ni_dvp); 2716 else 2717 vput(nd.ni_dvp); 2718 vput(vp); 2719 2720 out: 2721 pathbuf_stringcopy_put(pb, pathstring); 2722 pathbuf_destroy(pb); 2723 return (error); 2724 } 2725 2726 /* 2727 * Reposition read/write file offset. 2728 */ 2729 int 2730 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2731 { 2732 /* { 2733 syscallarg(int) fd; 2734 syscallarg(int) pad; 2735 syscallarg(off_t) offset; 2736 syscallarg(int) whence; 2737 } */ 2738 kauth_cred_t cred = l->l_cred; 2739 file_t *fp; 2740 struct vnode *vp; 2741 struct vattr vattr; 2742 off_t newoff; 2743 int error, fd; 2744 2745 fd = SCARG(uap, fd); 2746 2747 if ((fp = fd_getfile(fd)) == NULL) 2748 return (EBADF); 2749 2750 vp = fp->f_data; 2751 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2752 error = ESPIPE; 2753 goto out; 2754 } 2755 2756 switch (SCARG(uap, whence)) { 2757 case SEEK_CUR: 2758 newoff = fp->f_offset + SCARG(uap, offset); 2759 break; 2760 case SEEK_END: 2761 vn_lock(vp, LK_SHARED | LK_RETRY); 2762 error = VOP_GETATTR(vp, &vattr, cred); 2763 VOP_UNLOCK(vp); 2764 if (error) { 2765 goto out; 2766 } 2767 newoff = SCARG(uap, offset) + vattr.va_size; 2768 break; 2769 case SEEK_SET: 2770 newoff = SCARG(uap, offset); 2771 break; 2772 default: 2773 error = EINVAL; 2774 goto out; 2775 } 2776 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2777 *(off_t *)retval = fp->f_offset = newoff; 2778 } 2779 out: 2780 fd_putfile(fd); 2781 return (error); 2782 } 2783 2784 /* 2785 * Positional read system call. 2786 */ 2787 int 2788 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2789 { 2790 /* { 2791 syscallarg(int) fd; 2792 syscallarg(void *) buf; 2793 syscallarg(size_t) nbyte; 2794 syscallarg(off_t) offset; 2795 } */ 2796 file_t *fp; 2797 struct vnode *vp; 2798 off_t offset; 2799 int error, fd = SCARG(uap, fd); 2800 2801 if ((fp = fd_getfile(fd)) == NULL) 2802 return (EBADF); 2803 2804 if ((fp->f_flag & FREAD) == 0) { 2805 fd_putfile(fd); 2806 return (EBADF); 2807 } 2808 2809 vp = fp->f_data; 2810 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2811 error = ESPIPE; 2812 goto out; 2813 } 2814 2815 offset = SCARG(uap, offset); 2816 2817 /* 2818 * XXX This works because no file systems actually 2819 * XXX take any action on the seek operation. 2820 */ 2821 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2822 goto out; 2823 2824 /* dofileread() will unuse the descriptor for us */ 2825 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2826 &offset, 0, retval)); 2827 2828 out: 2829 fd_putfile(fd); 2830 return (error); 2831 } 2832 2833 /* 2834 * Positional scatter read system call. 2835 */ 2836 int 2837 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2838 { 2839 /* { 2840 syscallarg(int) fd; 2841 syscallarg(const struct iovec *) iovp; 2842 syscallarg(int) iovcnt; 2843 syscallarg(off_t) offset; 2844 } */ 2845 off_t offset = SCARG(uap, offset); 2846 2847 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2848 SCARG(uap, iovcnt), &offset, 0, retval); 2849 } 2850 2851 /* 2852 * Positional write system call. 2853 */ 2854 int 2855 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2856 { 2857 /* { 2858 syscallarg(int) fd; 2859 syscallarg(const void *) buf; 2860 syscallarg(size_t) nbyte; 2861 syscallarg(off_t) offset; 2862 } */ 2863 file_t *fp; 2864 struct vnode *vp; 2865 off_t offset; 2866 int error, fd = SCARG(uap, fd); 2867 2868 if ((fp = fd_getfile(fd)) == NULL) 2869 return (EBADF); 2870 2871 if ((fp->f_flag & FWRITE) == 0) { 2872 fd_putfile(fd); 2873 return (EBADF); 2874 } 2875 2876 vp = fp->f_data; 2877 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2878 error = ESPIPE; 2879 goto out; 2880 } 2881 2882 offset = SCARG(uap, offset); 2883 2884 /* 2885 * XXX This works because no file systems actually 2886 * XXX take any action on the seek operation. 2887 */ 2888 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2889 goto out; 2890 2891 /* dofilewrite() will unuse the descriptor for us */ 2892 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2893 &offset, 0, retval)); 2894 2895 out: 2896 fd_putfile(fd); 2897 return (error); 2898 } 2899 2900 /* 2901 * Positional gather write system call. 2902 */ 2903 int 2904 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2905 { 2906 /* { 2907 syscallarg(int) fd; 2908 syscallarg(const struct iovec *) iovp; 2909 syscallarg(int) iovcnt; 2910 syscallarg(off_t) offset; 2911 } */ 2912 off_t offset = SCARG(uap, offset); 2913 2914 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2915 SCARG(uap, iovcnt), &offset, 0, retval); 2916 } 2917 2918 /* 2919 * Check access permissions. 2920 */ 2921 int 2922 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2923 { 2924 /* { 2925 syscallarg(const char *) path; 2926 syscallarg(int) flags; 2927 } */ 2928 2929 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2930 SCARG(uap, flags), 0); 2931 } 2932 2933 int 2934 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2935 int mode, int flags) 2936 { 2937 kauth_cred_t cred; 2938 struct vnode *vp; 2939 int error, nd_flag, vmode; 2940 struct pathbuf *pb; 2941 struct nameidata nd; 2942 2943 CTASSERT(F_OK == 0); 2944 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2945 /* nonsense mode */ 2946 return EINVAL; 2947 } 2948 2949 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2950 if (flags & AT_SYMLINK_NOFOLLOW) 2951 nd_flag &= ~FOLLOW; 2952 2953 error = pathbuf_copyin(path, &pb); 2954 if (error) 2955 return error; 2956 2957 NDINIT(&nd, LOOKUP, nd_flag, pb); 2958 2959 /* Override default credentials */ 2960 cred = kauth_cred_dup(l->l_cred); 2961 if (!(flags & AT_EACCESS)) { 2962 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2963 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2964 } 2965 nd.ni_cnd.cn_cred = cred; 2966 2967 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2968 pathbuf_destroy(pb); 2969 goto out; 2970 } 2971 vp = nd.ni_vp; 2972 pathbuf_destroy(pb); 2973 2974 /* Flags == 0 means only check for existence. */ 2975 if (mode) { 2976 vmode = 0; 2977 if (mode & R_OK) 2978 vmode |= VREAD; 2979 if (mode & W_OK) 2980 vmode |= VWRITE; 2981 if (mode & X_OK) 2982 vmode |= VEXEC; 2983 2984 error = VOP_ACCESS(vp, vmode, cred); 2985 if (!error && (vmode & VWRITE)) 2986 error = vn_writechk(vp); 2987 } 2988 vput(vp); 2989 out: 2990 kauth_cred_free(cred); 2991 return (error); 2992 } 2993 2994 int 2995 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 2996 register_t *retval) 2997 { 2998 /* { 2999 syscallarg(int) fd; 3000 syscallarg(const char *) path; 3001 syscallarg(int) amode; 3002 syscallarg(int) flag; 3003 } */ 3004 3005 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3006 SCARG(uap, amode), SCARG(uap, flag)); 3007 } 3008 3009 /* 3010 * Common code for all sys_stat functions, including compat versions. 3011 */ 3012 int 3013 do_sys_stat(const char *userpath, unsigned int nd_flag, 3014 struct stat *sb) 3015 { 3016 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3017 } 3018 3019 int 3020 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3021 unsigned int nd_flag, struct stat *sb) 3022 { 3023 int error; 3024 struct pathbuf *pb; 3025 struct nameidata nd; 3026 3027 KASSERT(l != NULL || fdat == AT_FDCWD); 3028 3029 error = pathbuf_copyin(userpath, &pb); 3030 if (error) { 3031 return error; 3032 } 3033 3034 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3035 3036 error = fd_nameiat(l, fdat, &nd); 3037 if (error != 0) { 3038 pathbuf_destroy(pb); 3039 return error; 3040 } 3041 error = vn_stat(nd.ni_vp, sb); 3042 vput(nd.ni_vp); 3043 pathbuf_destroy(pb); 3044 return error; 3045 } 3046 3047 /* 3048 * Get file status; this version follows links. 3049 */ 3050 /* ARGSUSED */ 3051 int 3052 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3053 { 3054 /* { 3055 syscallarg(const char *) path; 3056 syscallarg(struct stat *) ub; 3057 } */ 3058 struct stat sb; 3059 int error; 3060 3061 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3062 if (error) 3063 return error; 3064 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3065 } 3066 3067 /* 3068 * Get file status; this version does not follow links. 3069 */ 3070 /* ARGSUSED */ 3071 int 3072 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3073 { 3074 /* { 3075 syscallarg(const char *) path; 3076 syscallarg(struct stat *) ub; 3077 } */ 3078 struct stat sb; 3079 int error; 3080 3081 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3082 if (error) 3083 return error; 3084 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3085 } 3086 3087 int 3088 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3089 register_t *retval) 3090 { 3091 /* { 3092 syscallarg(int) fd; 3093 syscallarg(const char *) path; 3094 syscallarg(struct stat *) buf; 3095 syscallarg(int) flag; 3096 } */ 3097 unsigned int nd_flag; 3098 struct stat sb; 3099 int error; 3100 3101 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3102 nd_flag = NOFOLLOW; 3103 else 3104 nd_flag = FOLLOW; 3105 3106 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3107 &sb); 3108 if (error) 3109 return error; 3110 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3111 } 3112 3113 /* 3114 * Get configurable pathname variables. 3115 */ 3116 /* ARGSUSED */ 3117 int 3118 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3119 { 3120 /* { 3121 syscallarg(const char *) path; 3122 syscallarg(int) name; 3123 } */ 3124 int error; 3125 struct pathbuf *pb; 3126 struct nameidata nd; 3127 3128 error = pathbuf_copyin(SCARG(uap, path), &pb); 3129 if (error) { 3130 return error; 3131 } 3132 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3133 if ((error = namei(&nd)) != 0) { 3134 pathbuf_destroy(pb); 3135 return (error); 3136 } 3137 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3138 vput(nd.ni_vp); 3139 pathbuf_destroy(pb); 3140 return (error); 3141 } 3142 3143 /* 3144 * Return target name of a symbolic link. 3145 */ 3146 /* ARGSUSED */ 3147 int 3148 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3149 register_t *retval) 3150 { 3151 /* { 3152 syscallarg(const char *) path; 3153 syscallarg(char *) buf; 3154 syscallarg(size_t) count; 3155 } */ 3156 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3157 SCARG(uap, buf), SCARG(uap, count), retval); 3158 } 3159 3160 static int 3161 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3162 size_t count, register_t *retval) 3163 { 3164 struct vnode *vp; 3165 struct iovec aiov; 3166 struct uio auio; 3167 int error; 3168 struct pathbuf *pb; 3169 struct nameidata nd; 3170 3171 error = pathbuf_copyin(path, &pb); 3172 if (error) { 3173 return error; 3174 } 3175 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3176 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3177 pathbuf_destroy(pb); 3178 return error; 3179 } 3180 vp = nd.ni_vp; 3181 pathbuf_destroy(pb); 3182 if (vp->v_type != VLNK) 3183 error = EINVAL; 3184 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3185 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3186 aiov.iov_base = buf; 3187 aiov.iov_len = count; 3188 auio.uio_iov = &aiov; 3189 auio.uio_iovcnt = 1; 3190 auio.uio_offset = 0; 3191 auio.uio_rw = UIO_READ; 3192 KASSERT(l == curlwp); 3193 auio.uio_vmspace = l->l_proc->p_vmspace; 3194 auio.uio_resid = count; 3195 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3196 *retval = count - auio.uio_resid; 3197 } 3198 vput(vp); 3199 return (error); 3200 } 3201 3202 int 3203 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3204 register_t *retval) 3205 { 3206 /* { 3207 syscallarg(int) fd; 3208 syscallarg(const char *) path; 3209 syscallarg(char *) buf; 3210 syscallarg(size_t) bufsize; 3211 } */ 3212 3213 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3214 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3215 } 3216 3217 /* 3218 * Change flags of a file given a path name. 3219 */ 3220 /* ARGSUSED */ 3221 int 3222 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3223 { 3224 /* { 3225 syscallarg(const char *) path; 3226 syscallarg(u_long) flags; 3227 } */ 3228 struct vnode *vp; 3229 int error; 3230 3231 error = namei_simple_user(SCARG(uap, path), 3232 NSM_FOLLOW_TRYEMULROOT, &vp); 3233 if (error != 0) 3234 return (error); 3235 error = change_flags(vp, SCARG(uap, flags), l); 3236 vput(vp); 3237 return (error); 3238 } 3239 3240 /* 3241 * Change flags of a file given a file descriptor. 3242 */ 3243 /* ARGSUSED */ 3244 int 3245 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3246 { 3247 /* { 3248 syscallarg(int) fd; 3249 syscallarg(u_long) flags; 3250 } */ 3251 struct vnode *vp; 3252 file_t *fp; 3253 int error; 3254 3255 /* fd_getvnode() will use the descriptor for us */ 3256 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3257 return (error); 3258 vp = fp->f_data; 3259 error = change_flags(vp, SCARG(uap, flags), l); 3260 VOP_UNLOCK(vp); 3261 fd_putfile(SCARG(uap, fd)); 3262 return (error); 3263 } 3264 3265 /* 3266 * Change flags of a file given a path name; this version does 3267 * not follow links. 3268 */ 3269 int 3270 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3271 { 3272 /* { 3273 syscallarg(const char *) path; 3274 syscallarg(u_long) flags; 3275 } */ 3276 struct vnode *vp; 3277 int error; 3278 3279 error = namei_simple_user(SCARG(uap, path), 3280 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3281 if (error != 0) 3282 return (error); 3283 error = change_flags(vp, SCARG(uap, flags), l); 3284 vput(vp); 3285 return (error); 3286 } 3287 3288 /* 3289 * Common routine to change flags of a file. 3290 */ 3291 int 3292 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3293 { 3294 struct vattr vattr; 3295 int error; 3296 3297 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3298 3299 vattr_null(&vattr); 3300 vattr.va_flags = flags; 3301 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3302 3303 return (error); 3304 } 3305 3306 /* 3307 * Change mode of a file given path name; this version follows links. 3308 */ 3309 /* ARGSUSED */ 3310 int 3311 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3312 { 3313 /* { 3314 syscallarg(const char *) path; 3315 syscallarg(int) mode; 3316 } */ 3317 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3318 SCARG(uap, mode), 0); 3319 } 3320 3321 int 3322 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3323 { 3324 int error; 3325 struct vnode *vp; 3326 namei_simple_flags_t ns_flag; 3327 3328 if (flags & AT_SYMLINK_NOFOLLOW) 3329 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3330 else 3331 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3332 3333 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3334 if (error != 0) 3335 return error; 3336 3337 error = change_mode(vp, mode, l); 3338 3339 vrele(vp); 3340 3341 return (error); 3342 } 3343 3344 /* 3345 * Change mode of a file given a file descriptor. 3346 */ 3347 /* ARGSUSED */ 3348 int 3349 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3350 { 3351 /* { 3352 syscallarg(int) fd; 3353 syscallarg(int) mode; 3354 } */ 3355 file_t *fp; 3356 int error; 3357 3358 /* fd_getvnode() will use the descriptor for us */ 3359 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3360 return (error); 3361 error = change_mode(fp->f_data, SCARG(uap, mode), l); 3362 fd_putfile(SCARG(uap, fd)); 3363 return (error); 3364 } 3365 3366 int 3367 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3368 register_t *retval) 3369 { 3370 /* { 3371 syscallarg(int) fd; 3372 syscallarg(const char *) path; 3373 syscallarg(int) mode; 3374 syscallarg(int) flag; 3375 } */ 3376 3377 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3378 SCARG(uap, mode), SCARG(uap, flag)); 3379 } 3380 3381 /* 3382 * Change mode of a file given path name; this version does not follow links. 3383 */ 3384 /* ARGSUSED */ 3385 int 3386 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3387 { 3388 /* { 3389 syscallarg(const char *) path; 3390 syscallarg(int) mode; 3391 } */ 3392 int error; 3393 struct vnode *vp; 3394 3395 error = namei_simple_user(SCARG(uap, path), 3396 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3397 if (error != 0) 3398 return (error); 3399 3400 error = change_mode(vp, SCARG(uap, mode), l); 3401 3402 vrele(vp); 3403 return (error); 3404 } 3405 3406 /* 3407 * Common routine to set mode given a vnode. 3408 */ 3409 static int 3410 change_mode(struct vnode *vp, int mode, struct lwp *l) 3411 { 3412 struct vattr vattr; 3413 int error; 3414 3415 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3416 vattr_null(&vattr); 3417 vattr.va_mode = mode & ALLPERMS; 3418 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3419 VOP_UNLOCK(vp); 3420 return (error); 3421 } 3422 3423 /* 3424 * Set ownership given a path name; this version follows links. 3425 */ 3426 /* ARGSUSED */ 3427 int 3428 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3429 { 3430 /* { 3431 syscallarg(const char *) path; 3432 syscallarg(uid_t) uid; 3433 syscallarg(gid_t) gid; 3434 } */ 3435 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3436 SCARG(uap, gid), 0); 3437 } 3438 3439 int 3440 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3441 gid_t gid, int flags) 3442 { 3443 int error; 3444 struct vnode *vp; 3445 namei_simple_flags_t ns_flag; 3446 3447 if (flags & AT_SYMLINK_NOFOLLOW) 3448 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3449 else 3450 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3451 3452 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3453 if (error != 0) 3454 return error; 3455 3456 error = change_owner(vp, uid, gid, l, 0); 3457 3458 vrele(vp); 3459 3460 return (error); 3461 } 3462 3463 /* 3464 * Set ownership given a path name; this version follows links. 3465 * Provides POSIX semantics. 3466 */ 3467 /* ARGSUSED */ 3468 int 3469 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3470 { 3471 /* { 3472 syscallarg(const char *) path; 3473 syscallarg(uid_t) uid; 3474 syscallarg(gid_t) gid; 3475 } */ 3476 int error; 3477 struct vnode *vp; 3478 3479 error = namei_simple_user(SCARG(uap, path), 3480 NSM_FOLLOW_TRYEMULROOT, &vp); 3481 if (error != 0) 3482 return (error); 3483 3484 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3485 3486 vrele(vp); 3487 return (error); 3488 } 3489 3490 /* 3491 * Set ownership given a file descriptor. 3492 */ 3493 /* ARGSUSED */ 3494 int 3495 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3496 { 3497 /* { 3498 syscallarg(int) fd; 3499 syscallarg(uid_t) uid; 3500 syscallarg(gid_t) gid; 3501 } */ 3502 int error; 3503 file_t *fp; 3504 3505 /* fd_getvnode() will use the descriptor for us */ 3506 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3507 return (error); 3508 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3509 l, 0); 3510 fd_putfile(SCARG(uap, fd)); 3511 return (error); 3512 } 3513 3514 int 3515 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3516 register_t *retval) 3517 { 3518 /* { 3519 syscallarg(int) fd; 3520 syscallarg(const char *) path; 3521 syscallarg(uid_t) owner; 3522 syscallarg(gid_t) group; 3523 syscallarg(int) flag; 3524 } */ 3525 3526 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3527 SCARG(uap, owner), SCARG(uap, group), 3528 SCARG(uap, flag)); 3529 } 3530 3531 /* 3532 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3533 */ 3534 /* ARGSUSED */ 3535 int 3536 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3537 { 3538 /* { 3539 syscallarg(int) fd; 3540 syscallarg(uid_t) uid; 3541 syscallarg(gid_t) gid; 3542 } */ 3543 int error; 3544 file_t *fp; 3545 3546 /* fd_getvnode() will use the descriptor for us */ 3547 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3548 return (error); 3549 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3550 l, 1); 3551 fd_putfile(SCARG(uap, fd)); 3552 return (error); 3553 } 3554 3555 /* 3556 * Set ownership given a path name; this version does not follow links. 3557 */ 3558 /* ARGSUSED */ 3559 int 3560 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3561 { 3562 /* { 3563 syscallarg(const char *) path; 3564 syscallarg(uid_t) uid; 3565 syscallarg(gid_t) gid; 3566 } */ 3567 int error; 3568 struct vnode *vp; 3569 3570 error = namei_simple_user(SCARG(uap, path), 3571 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3572 if (error != 0) 3573 return (error); 3574 3575 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3576 3577 vrele(vp); 3578 return (error); 3579 } 3580 3581 /* 3582 * Set ownership given a path name; this version does not follow links. 3583 * Provides POSIX/XPG semantics. 3584 */ 3585 /* ARGSUSED */ 3586 int 3587 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3588 { 3589 /* { 3590 syscallarg(const char *) path; 3591 syscallarg(uid_t) uid; 3592 syscallarg(gid_t) gid; 3593 } */ 3594 int error; 3595 struct vnode *vp; 3596 3597 error = namei_simple_user(SCARG(uap, path), 3598 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3599 if (error != 0) 3600 return (error); 3601 3602 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3603 3604 vrele(vp); 3605 return (error); 3606 } 3607 3608 /* 3609 * Common routine to set ownership given a vnode. 3610 */ 3611 static int 3612 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3613 int posix_semantics) 3614 { 3615 struct vattr vattr; 3616 mode_t newmode; 3617 int error; 3618 3619 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3620 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3621 goto out; 3622 3623 #define CHANGED(x) ((int)(x) != -1) 3624 newmode = vattr.va_mode; 3625 if (posix_semantics) { 3626 /* 3627 * POSIX/XPG semantics: if the caller is not the super-user, 3628 * clear set-user-id and set-group-id bits. Both POSIX and 3629 * the XPG consider the behaviour for calls by the super-user 3630 * implementation-defined; we leave the set-user-id and set- 3631 * group-id settings intact in that case. 3632 */ 3633 if (vattr.va_mode & S_ISUID) { 3634 if (kauth_authorize_vnode(l->l_cred, 3635 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3636 newmode &= ~S_ISUID; 3637 } 3638 if (vattr.va_mode & S_ISGID) { 3639 if (kauth_authorize_vnode(l->l_cred, 3640 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3641 newmode &= ~S_ISGID; 3642 } 3643 } else { 3644 /* 3645 * NetBSD semantics: when changing owner and/or group, 3646 * clear the respective bit(s). 3647 */ 3648 if (CHANGED(uid)) 3649 newmode &= ~S_ISUID; 3650 if (CHANGED(gid)) 3651 newmode &= ~S_ISGID; 3652 } 3653 /* Update va_mode iff altered. */ 3654 if (vattr.va_mode == newmode) 3655 newmode = VNOVAL; 3656 3657 vattr_null(&vattr); 3658 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3659 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3660 vattr.va_mode = newmode; 3661 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3662 #undef CHANGED 3663 3664 out: 3665 VOP_UNLOCK(vp); 3666 return (error); 3667 } 3668 3669 /* 3670 * Set the access and modification times given a path name; this 3671 * version follows links. 3672 */ 3673 /* ARGSUSED */ 3674 int 3675 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3676 register_t *retval) 3677 { 3678 /* { 3679 syscallarg(const char *) path; 3680 syscallarg(const struct timeval *) tptr; 3681 } */ 3682 3683 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3684 SCARG(uap, tptr), UIO_USERSPACE); 3685 } 3686 3687 /* 3688 * Set the access and modification times given a file descriptor. 3689 */ 3690 /* ARGSUSED */ 3691 int 3692 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3693 register_t *retval) 3694 { 3695 /* { 3696 syscallarg(int) fd; 3697 syscallarg(const struct timeval *) tptr; 3698 } */ 3699 int error; 3700 file_t *fp; 3701 3702 /* fd_getvnode() will use the descriptor for us */ 3703 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3704 return (error); 3705 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3706 UIO_USERSPACE); 3707 fd_putfile(SCARG(uap, fd)); 3708 return (error); 3709 } 3710 3711 int 3712 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3713 register_t *retval) 3714 { 3715 /* { 3716 syscallarg(int) fd; 3717 syscallarg(const struct timespec *) tptr; 3718 } */ 3719 int error; 3720 file_t *fp; 3721 3722 /* fd_getvnode() will use the descriptor for us */ 3723 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3724 return (error); 3725 error = do_sys_utimensat(l, AT_FDCWD, fp->f_data, NULL, 0, 3726 SCARG(uap, tptr), UIO_USERSPACE); 3727 fd_putfile(SCARG(uap, fd)); 3728 return (error); 3729 } 3730 3731 /* 3732 * Set the access and modification times given a path name; this 3733 * version does not follow links. 3734 */ 3735 int 3736 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3737 register_t *retval) 3738 { 3739 /* { 3740 syscallarg(const char *) path; 3741 syscallarg(const struct timeval *) tptr; 3742 } */ 3743 3744 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3745 SCARG(uap, tptr), UIO_USERSPACE); 3746 } 3747 3748 int 3749 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3750 register_t *retval) 3751 { 3752 /* { 3753 syscallarg(int) fd; 3754 syscallarg(const char *) path; 3755 syscallarg(const struct timespec *) tptr; 3756 syscallarg(int) flag; 3757 } */ 3758 int follow; 3759 const struct timespec *tptr; 3760 int error; 3761 3762 tptr = SCARG(uap, tptr); 3763 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3764 3765 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3766 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3767 3768 return error; 3769 } 3770 3771 /* 3772 * Common routine to set access and modification times given a vnode. 3773 */ 3774 int 3775 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3776 const struct timespec *tptr, enum uio_seg seg) 3777 { 3778 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3779 } 3780 3781 int 3782 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3783 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3784 { 3785 struct vattr vattr; 3786 int error, dorele = 0; 3787 namei_simple_flags_t sflags; 3788 bool vanull, setbirthtime; 3789 struct timespec ts[2]; 3790 3791 KASSERT(l != NULL || fdat == AT_FDCWD); 3792 3793 /* 3794 * I have checked all callers and they pass either FOLLOW, 3795 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3796 * is 0. More to the point, they don't pass anything else. 3797 * Let's keep it that way at least until the namei interfaces 3798 * are fully sanitized. 3799 */ 3800 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3801 sflags = (flag == FOLLOW) ? 3802 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3803 3804 if (tptr == NULL) { 3805 vanull = true; 3806 nanotime(&ts[0]); 3807 ts[1] = ts[0]; 3808 } else { 3809 vanull = false; 3810 if (seg != UIO_SYSSPACE) { 3811 error = copyin(tptr, ts, sizeof (ts)); 3812 if (error != 0) 3813 return error; 3814 } else { 3815 ts[0] = tptr[0]; 3816 ts[1] = tptr[1]; 3817 } 3818 } 3819 3820 if (ts[0].tv_nsec == UTIME_NOW) { 3821 nanotime(&ts[0]); 3822 if (ts[1].tv_nsec == UTIME_NOW) { 3823 vanull = true; 3824 ts[1] = ts[0]; 3825 } 3826 } else if (ts[1].tv_nsec == UTIME_NOW) 3827 nanotime(&ts[1]); 3828 3829 if (vp == NULL) { 3830 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3831 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3832 if (error != 0) 3833 return error; 3834 dorele = 1; 3835 } 3836 3837 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3838 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3839 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3840 vattr_null(&vattr); 3841 3842 if (ts[0].tv_nsec != UTIME_OMIT) 3843 vattr.va_atime = ts[0]; 3844 3845 if (ts[1].tv_nsec != UTIME_OMIT) { 3846 vattr.va_mtime = ts[1]; 3847 if (setbirthtime) 3848 vattr.va_birthtime = ts[1]; 3849 } 3850 3851 if (vanull) 3852 vattr.va_vaflags |= VA_UTIMES_NULL; 3853 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3854 VOP_UNLOCK(vp); 3855 3856 if (dorele != 0) 3857 vrele(vp); 3858 3859 return error; 3860 } 3861 3862 int 3863 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3864 const struct timeval *tptr, enum uio_seg seg) 3865 { 3866 struct timespec ts[2]; 3867 struct timespec *tsptr = NULL; 3868 int error; 3869 3870 if (tptr != NULL) { 3871 struct timeval tv[2]; 3872 3873 if (seg != UIO_SYSSPACE) { 3874 error = copyin(tptr, tv, sizeof (tv)); 3875 if (error != 0) 3876 return error; 3877 tptr = tv; 3878 } 3879 3880 if ((tv[0].tv_usec == UTIME_NOW) || 3881 (tv[0].tv_usec == UTIME_OMIT)) 3882 ts[0].tv_nsec = tv[0].tv_usec; 3883 else 3884 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3885 3886 if ((tv[1].tv_usec == UTIME_NOW) || 3887 (tv[1].tv_usec == UTIME_OMIT)) 3888 ts[1].tv_nsec = tv[1].tv_usec; 3889 else 3890 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3891 3892 tsptr = &ts[0]; 3893 } 3894 3895 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3896 } 3897 3898 /* 3899 * Truncate a file given its path name. 3900 */ 3901 /* ARGSUSED */ 3902 int 3903 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3904 { 3905 /* { 3906 syscallarg(const char *) path; 3907 syscallarg(int) pad; 3908 syscallarg(off_t) length; 3909 } */ 3910 struct vnode *vp; 3911 struct vattr vattr; 3912 int error; 3913 3914 if (SCARG(uap, length) < 0) 3915 return EINVAL; 3916 3917 error = namei_simple_user(SCARG(uap, path), 3918 NSM_FOLLOW_TRYEMULROOT, &vp); 3919 if (error != 0) 3920 return (error); 3921 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3922 if (vp->v_type == VDIR) 3923 error = EISDIR; 3924 else if ((error = vn_writechk(vp)) == 0 && 3925 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3926 vattr_null(&vattr); 3927 vattr.va_size = SCARG(uap, length); 3928 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3929 } 3930 vput(vp); 3931 return (error); 3932 } 3933 3934 /* 3935 * Truncate a file given a file descriptor. 3936 */ 3937 /* ARGSUSED */ 3938 int 3939 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3940 { 3941 /* { 3942 syscallarg(int) fd; 3943 syscallarg(int) pad; 3944 syscallarg(off_t) length; 3945 } */ 3946 struct vattr vattr; 3947 struct vnode *vp; 3948 file_t *fp; 3949 int error; 3950 3951 if (SCARG(uap, length) < 0) 3952 return EINVAL; 3953 3954 /* fd_getvnode() will use the descriptor for us */ 3955 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3956 return (error); 3957 if ((fp->f_flag & FWRITE) == 0) { 3958 error = EINVAL; 3959 goto out; 3960 } 3961 vp = fp->f_data; 3962 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3963 if (vp->v_type == VDIR) 3964 error = EISDIR; 3965 else if ((error = vn_writechk(vp)) == 0) { 3966 vattr_null(&vattr); 3967 vattr.va_size = SCARG(uap, length); 3968 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3969 } 3970 VOP_UNLOCK(vp); 3971 out: 3972 fd_putfile(SCARG(uap, fd)); 3973 return (error); 3974 } 3975 3976 /* 3977 * Sync an open file. 3978 */ 3979 /* ARGSUSED */ 3980 int 3981 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3982 { 3983 /* { 3984 syscallarg(int) fd; 3985 } */ 3986 struct vnode *vp; 3987 file_t *fp; 3988 int error; 3989 3990 /* fd_getvnode() will use the descriptor for us */ 3991 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3992 return (error); 3993 vp = fp->f_data; 3994 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3995 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3996 VOP_UNLOCK(vp); 3997 fd_putfile(SCARG(uap, fd)); 3998 return (error); 3999 } 4000 4001 /* 4002 * Sync a range of file data. API modeled after that found in AIX. 4003 * 4004 * FDATASYNC indicates that we need only save enough metadata to be able 4005 * to re-read the written data. Note we duplicate AIX's requirement that 4006 * the file be open for writing. 4007 */ 4008 /* ARGSUSED */ 4009 int 4010 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4011 { 4012 /* { 4013 syscallarg(int) fd; 4014 syscallarg(int) flags; 4015 syscallarg(off_t) start; 4016 syscallarg(off_t) length; 4017 } */ 4018 struct vnode *vp; 4019 file_t *fp; 4020 int flags, nflags; 4021 off_t s, e, len; 4022 int error; 4023 4024 /* fd_getvnode() will use the descriptor for us */ 4025 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4026 return (error); 4027 4028 if ((fp->f_flag & FWRITE) == 0) { 4029 error = EBADF; 4030 goto out; 4031 } 4032 4033 flags = SCARG(uap, flags); 4034 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4035 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4036 error = EINVAL; 4037 goto out; 4038 } 4039 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4040 if (flags & FDATASYNC) 4041 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4042 else 4043 nflags = FSYNC_WAIT; 4044 if (flags & FDISKSYNC) 4045 nflags |= FSYNC_CACHE; 4046 4047 len = SCARG(uap, length); 4048 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4049 if (len) { 4050 s = SCARG(uap, start); 4051 e = s + len; 4052 if (e < s) { 4053 error = EINVAL; 4054 goto out; 4055 } 4056 } else { 4057 e = 0; 4058 s = 0; 4059 } 4060 4061 vp = fp->f_data; 4062 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4063 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4064 VOP_UNLOCK(vp); 4065 out: 4066 fd_putfile(SCARG(uap, fd)); 4067 return (error); 4068 } 4069 4070 /* 4071 * Sync the data of an open file. 4072 */ 4073 /* ARGSUSED */ 4074 int 4075 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4076 { 4077 /* { 4078 syscallarg(int) fd; 4079 } */ 4080 struct vnode *vp; 4081 file_t *fp; 4082 int error; 4083 4084 /* fd_getvnode() will use the descriptor for us */ 4085 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4086 return (error); 4087 if ((fp->f_flag & FWRITE) == 0) { 4088 fd_putfile(SCARG(uap, fd)); 4089 return (EBADF); 4090 } 4091 vp = fp->f_data; 4092 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4093 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4094 VOP_UNLOCK(vp); 4095 fd_putfile(SCARG(uap, fd)); 4096 return (error); 4097 } 4098 4099 /* 4100 * Rename files, (standard) BSD semantics frontend. 4101 */ 4102 /* ARGSUSED */ 4103 int 4104 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4105 { 4106 /* { 4107 syscallarg(const char *) from; 4108 syscallarg(const char *) to; 4109 } */ 4110 4111 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4112 SCARG(uap, to), UIO_USERSPACE, 0)); 4113 } 4114 4115 int 4116 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4117 register_t *retval) 4118 { 4119 /* { 4120 syscallarg(int) fromfd; 4121 syscallarg(const char *) from; 4122 syscallarg(int) tofd; 4123 syscallarg(const char *) to; 4124 } */ 4125 4126 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4127 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4128 } 4129 4130 /* 4131 * Rename files, POSIX semantics frontend. 4132 */ 4133 /* ARGSUSED */ 4134 int 4135 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4136 { 4137 /* { 4138 syscallarg(const char *) from; 4139 syscallarg(const char *) to; 4140 } */ 4141 4142 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4143 SCARG(uap, to), UIO_USERSPACE, 1)); 4144 } 4145 4146 /* 4147 * Rename files. Source and destination must either both be directories, 4148 * or both not be directories. If target is a directory, it must be empty. 4149 * If `from' and `to' refer to the same object, the value of the `retain' 4150 * argument is used to determine whether `from' will be 4151 * 4152 * (retain == 0) deleted unless `from' and `to' refer to the same 4153 * object in the file system's name space (BSD). 4154 * (retain == 1) always retained (POSIX). 4155 * 4156 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4157 */ 4158 int 4159 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4160 { 4161 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4162 } 4163 4164 static int 4165 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4166 const char *to, enum uio_seg seg, int retain) 4167 { 4168 struct pathbuf *fpb, *tpb; 4169 struct nameidata fnd, tnd; 4170 struct vnode *fdvp, *fvp; 4171 struct vnode *tdvp, *tvp; 4172 struct mount *mp, *tmp; 4173 int error; 4174 4175 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4176 4177 error = pathbuf_maybe_copyin(from, seg, &fpb); 4178 if (error) 4179 goto out0; 4180 KASSERT(fpb != NULL); 4181 4182 error = pathbuf_maybe_copyin(to, seg, &tpb); 4183 if (error) 4184 goto out1; 4185 KASSERT(tpb != NULL); 4186 4187 /* 4188 * Lookup from. 4189 * 4190 * XXX LOCKPARENT is wrong because we don't actually want it 4191 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4192 * insane, so for the time being we need to leave it like this. 4193 */ 4194 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT | INRENAME), fpb); 4195 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4196 goto out2; 4197 4198 /* 4199 * Pull out the important results of the lookup, fdvp and fvp. 4200 * Of course, fvp is bogus because we're about to unlock fdvp. 4201 */ 4202 fdvp = fnd.ni_dvp; 4203 fvp = fnd.ni_vp; 4204 KASSERT(fdvp != NULL); 4205 KASSERT(fvp != NULL); 4206 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4207 4208 /* 4209 * Make sure neither fdvp nor fvp is locked. 4210 */ 4211 if (fdvp != fvp) 4212 VOP_UNLOCK(fdvp); 4213 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4214 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4215 4216 /* 4217 * Reject renaming `.' and `..'. Can't do this until after 4218 * namei because we need namei's parsing to find the final 4219 * component name. (namei should just leave us with the final 4220 * component name and not look it up itself, but anyway...) 4221 * 4222 * This was here before because we used to relookup from 4223 * instead of to and relookup requires the caller to check 4224 * this, but now file systems may depend on this check, so we 4225 * must retain it until the file systems are all rototilled. 4226 */ 4227 if (((fnd.ni_cnd.cn_namelen == 1) && 4228 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4229 ((fnd.ni_cnd.cn_namelen == 2) && 4230 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4231 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4232 error = EINVAL; /* XXX EISDIR? */ 4233 goto abort0; 4234 } 4235 4236 /* 4237 * Lookup to. 4238 * 4239 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4240 * fvp here to decide whether to add CREATEDIR is a load of 4241 * bollocks because fvp might be the wrong node by now, since 4242 * fdvp is unlocked. 4243 * 4244 * XXX Why not pass CREATEDIR always? 4245 */ 4246 NDINIT(&tnd, RENAME, 4247 (LOCKPARENT | NOCACHE | TRYEMULROOT | INRENAME | 4248 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4249 tpb); 4250 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4251 goto abort0; 4252 4253 /* 4254 * Pull out the important results of the lookup, tdvp and tvp. 4255 * Of course, tvp is bogus because we're about to unlock tdvp. 4256 */ 4257 tdvp = tnd.ni_dvp; 4258 tvp = tnd.ni_vp; 4259 KASSERT(tdvp != NULL); 4260 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4261 4262 /* 4263 * Make sure neither tdvp nor tvp is locked. 4264 */ 4265 if (tdvp != tvp) 4266 VOP_UNLOCK(tdvp); 4267 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4268 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4269 4270 /* 4271 * Reject renaming onto `.' or `..'. relookup is unhappy with 4272 * these, which is why we must do this here. Once upon a time 4273 * we relooked up from instead of to, and consequently didn't 4274 * need this check, but now that we relookup to instead of 4275 * from, we need this; and we shall need it forever forward 4276 * until the VOP_RENAME protocol changes, because file systems 4277 * will no doubt begin to depend on this check. 4278 */ 4279 if (((tnd.ni_cnd.cn_namelen == 1) && 4280 (tnd.ni_cnd.cn_nameptr[0] == '.')) || 4281 ((tnd.ni_cnd.cn_namelen == 2) && 4282 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4283 (tnd.ni_cnd.cn_nameptr[1] == '.'))) { 4284 error = EINVAL; /* XXX EISDIR? */ 4285 goto abort1; 4286 } 4287 4288 /* 4289 * Get the mount point. If the file system has been unmounted, 4290 * which it may be because we're not holding any vnode locks, 4291 * then v_mount will be NULL. We're not really supposed to 4292 * read v_mount without holding the vnode lock, but since we 4293 * have fdvp referenced, if fdvp->v_mount changes then at worst 4294 * it will be set to NULL, not changed to another mount point. 4295 * And, of course, since it is up to the file system to 4296 * determine the real lock order, we can't lock both fdvp and 4297 * tdvp at the same time. 4298 */ 4299 mp = fdvp->v_mount; 4300 if (mp == NULL) { 4301 error = ENOENT; 4302 goto abort1; 4303 } 4304 4305 /* 4306 * Make sure the mount points match. Again, although we don't 4307 * hold any vnode locks, the v_mount fields may change -- but 4308 * at worst they will change to NULL, so this will never become 4309 * a cross-device rename, because we hold vnode references. 4310 * 4311 * XXX Because nothing is locked and the compiler may reorder 4312 * things here, unmounting the file system at an inopportune 4313 * moment may cause rename to fail with ENXDEV when it really 4314 * should fail with ENOENT. 4315 */ 4316 tmp = tdvp->v_mount; 4317 if (tmp == NULL) { 4318 error = ENOENT; 4319 goto abort1; 4320 } 4321 4322 if (mp != tmp) { 4323 error = EXDEV; 4324 goto abort1; 4325 } 4326 4327 /* 4328 * Take the vfs rename lock to avoid cross-directory screw cases. 4329 * Nothing is locked currently, so taking this lock is safe. 4330 */ 4331 error = VFS_RENAMELOCK_ENTER(mp); 4332 if (error) 4333 goto abort1; 4334 4335 /* 4336 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4337 * and nothing is locked except for the vfs rename lock. 4338 * 4339 * The next step is a little rain dance to conform to the 4340 * insane lock protocol, even though it does nothing to ward 4341 * off race conditions. 4342 * 4343 * We need tdvp and tvp to be locked. However, because we have 4344 * unlocked tdvp in order to hold no locks while we take the 4345 * vfs rename lock, tvp may be wrong here, and we can't safely 4346 * lock it even if the sensible file systems will just unlock 4347 * it straight away. Consequently, we must lock tdvp and then 4348 * relookup tvp to get it locked. 4349 * 4350 * Finally, because the VOP_RENAME protocol is brain-damaged 4351 * and various file systems insanely depend on the semantics of 4352 * this brain damage, the lookup of to must be the last lookup 4353 * before VOP_RENAME. 4354 */ 4355 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4356 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4357 if (error) 4358 goto abort2; 4359 4360 /* 4361 * Drop the old tvp and pick up the new one -- which might be 4362 * the same, but that doesn't matter to us. After this, tdvp 4363 * and tvp should both be locked. 4364 */ 4365 if (tvp != NULL) 4366 vrele(tvp); 4367 tvp = tnd.ni_vp; 4368 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4369 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4370 4371 /* 4372 * The old do_sys_rename had various consistency checks here 4373 * involving fvp and tvp. fvp is bogus already here, and tvp 4374 * will become bogus soon in any sensible file system, so the 4375 * only purpose in putting these checks here is to give lip 4376 * service to these screw cases and to acknowledge that they 4377 * exist, not actually to handle them, but here you go 4378 * anyway... 4379 */ 4380 4381 /* 4382 * Acknowledge that directories and non-directories aren't 4383 * suposed to mix. 4384 */ 4385 if (tvp != NULL) { 4386 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4387 error = ENOTDIR; 4388 goto abort3; 4389 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4390 error = EISDIR; 4391 goto abort3; 4392 } 4393 } 4394 4395 /* 4396 * Acknowledge some random screw case, among the dozens that 4397 * might arise. 4398 */ 4399 if (fvp == tdvp) { 4400 error = EINVAL; 4401 goto abort3; 4402 } 4403 4404 /* 4405 * Acknowledge that POSIX has a wacky screw case. 4406 * 4407 * XXX Eventually the retain flag needs to be passed on to 4408 * VOP_RENAME. 4409 */ 4410 if (fvp == tvp) { 4411 if (retain) { 4412 error = 0; 4413 goto abort3; 4414 } else if ((fdvp == tdvp) && 4415 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4416 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4417 fnd.ni_cnd.cn_namelen))) { 4418 error = 0; 4419 goto abort3; 4420 } 4421 } 4422 4423 /* 4424 * Make sure veriexec can screw us up. (But a race can screw 4425 * up veriexec, of course -- remember, fvp and (soon) tvp are 4426 * bogus.) 4427 */ 4428 #if NVERIEXEC > 0 4429 { 4430 char *f1, *f2; 4431 size_t f1_len; 4432 size_t f2_len; 4433 4434 f1_len = fnd.ni_cnd.cn_namelen + 1; 4435 f1 = kmem_alloc(f1_len, KM_SLEEP); 4436 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4437 4438 f2_len = tnd.ni_cnd.cn_namelen + 1; 4439 f2 = kmem_alloc(f2_len, KM_SLEEP); 4440 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4441 4442 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4443 4444 kmem_free(f1, f1_len); 4445 kmem_free(f2, f2_len); 4446 4447 if (error) 4448 goto abort3; 4449 } 4450 #endif /* NVERIEXEC > 0 */ 4451 4452 /* 4453 * All ready. Incant the rename vop. 4454 */ 4455 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4456 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4457 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4458 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4459 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4460 4461 /* 4462 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4463 * tdvp and tvp. But we can't assert any of that. 4464 */ 4465 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4466 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4467 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4468 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4469 4470 /* 4471 * So all we have left to do is to drop the rename lock and 4472 * destroy the pathbufs. 4473 */ 4474 VFS_RENAMELOCK_EXIT(mp); 4475 goto out2; 4476 4477 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4478 VOP_UNLOCK(tvp); 4479 abort2: VOP_UNLOCK(tdvp); 4480 VFS_RENAMELOCK_EXIT(mp); 4481 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4482 vrele(tdvp); 4483 if (tvp != NULL) 4484 vrele(tvp); 4485 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4486 vrele(fdvp); 4487 vrele(fvp); 4488 out2: pathbuf_destroy(tpb); 4489 out1: pathbuf_destroy(fpb); 4490 out0: return error; 4491 } 4492 4493 /* 4494 * Make a directory file. 4495 */ 4496 /* ARGSUSED */ 4497 int 4498 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4499 { 4500 /* { 4501 syscallarg(const char *) path; 4502 syscallarg(int) mode; 4503 } */ 4504 4505 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4506 SCARG(uap, mode), UIO_USERSPACE); 4507 } 4508 4509 int 4510 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4511 register_t *retval) 4512 { 4513 /* { 4514 syscallarg(int) fd; 4515 syscallarg(const char *) path; 4516 syscallarg(int) mode; 4517 } */ 4518 4519 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4520 SCARG(uap, mode), UIO_USERSPACE); 4521 } 4522 4523 4524 int 4525 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4526 { 4527 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE); 4528 } 4529 4530 static int 4531 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4532 enum uio_seg seg) 4533 { 4534 struct proc *p = curlwp->l_proc; 4535 struct vnode *vp; 4536 struct vattr vattr; 4537 int error; 4538 struct pathbuf *pb; 4539 struct nameidata nd; 4540 4541 KASSERT(l != NULL || fdat == AT_FDCWD); 4542 4543 /* XXX bollocks, should pass in a pathbuf */ 4544 error = pathbuf_maybe_copyin(path, seg, &pb); 4545 if (error) { 4546 return error; 4547 } 4548 4549 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4550 4551 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4552 pathbuf_destroy(pb); 4553 return (error); 4554 } 4555 vp = nd.ni_vp; 4556 if (vp != NULL) { 4557 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4558 if (nd.ni_dvp == vp) 4559 vrele(nd.ni_dvp); 4560 else 4561 vput(nd.ni_dvp); 4562 vrele(vp); 4563 pathbuf_destroy(pb); 4564 return (EEXIST); 4565 } 4566 vattr_null(&vattr); 4567 vattr.va_type = VDIR; 4568 /* We will read cwdi->cwdi_cmask unlocked. */ 4569 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4570 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4571 if (!error) 4572 vrele(nd.ni_vp); 4573 vput(nd.ni_dvp); 4574 pathbuf_destroy(pb); 4575 return (error); 4576 } 4577 4578 /* 4579 * Remove a directory file. 4580 */ 4581 /* ARGSUSED */ 4582 int 4583 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4584 { 4585 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4586 AT_REMOVEDIR, UIO_USERSPACE); 4587 } 4588 4589 /* 4590 * Read a block of directory entries in a file system independent format. 4591 */ 4592 int 4593 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4594 { 4595 /* { 4596 syscallarg(int) fd; 4597 syscallarg(char *) buf; 4598 syscallarg(size_t) count; 4599 } */ 4600 file_t *fp; 4601 int error, done; 4602 4603 /* fd_getvnode() will use the descriptor for us */ 4604 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4605 return (error); 4606 if ((fp->f_flag & FREAD) == 0) { 4607 error = EBADF; 4608 goto out; 4609 } 4610 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4611 SCARG(uap, count), &done, l, 0, 0); 4612 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4613 *retval = done; 4614 out: 4615 fd_putfile(SCARG(uap, fd)); 4616 return (error); 4617 } 4618 4619 /* 4620 * Set the mode mask for creation of filesystem nodes. 4621 */ 4622 int 4623 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4624 { 4625 /* { 4626 syscallarg(mode_t) newmask; 4627 } */ 4628 struct proc *p = l->l_proc; 4629 struct cwdinfo *cwdi; 4630 4631 /* 4632 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4633 * important is that we serialize changes to the mask. The 4634 * rw_exit() will issue a write memory barrier on our behalf, 4635 * and force the changes out to other CPUs (as it must use an 4636 * atomic operation, draining the local CPU's store buffers). 4637 */ 4638 cwdi = p->p_cwdi; 4639 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4640 *retval = cwdi->cwdi_cmask; 4641 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4642 rw_exit(&cwdi->cwdi_lock); 4643 4644 return (0); 4645 } 4646 4647 int 4648 dorevoke(struct vnode *vp, kauth_cred_t cred) 4649 { 4650 struct vattr vattr; 4651 int error, fs_decision; 4652 4653 vn_lock(vp, LK_SHARED | LK_RETRY); 4654 error = VOP_GETATTR(vp, &vattr, cred); 4655 VOP_UNLOCK(vp); 4656 if (error != 0) 4657 return error; 4658 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4659 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4660 fs_decision); 4661 if (!error) 4662 VOP_REVOKE(vp, REVOKEALL); 4663 return (error); 4664 } 4665 4666 /* 4667 * Void all references to file by ripping underlying filesystem 4668 * away from vnode. 4669 */ 4670 /* ARGSUSED */ 4671 int 4672 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4673 { 4674 /* { 4675 syscallarg(const char *) path; 4676 } */ 4677 struct vnode *vp; 4678 int error; 4679 4680 error = namei_simple_user(SCARG(uap, path), 4681 NSM_FOLLOW_TRYEMULROOT, &vp); 4682 if (error != 0) 4683 return (error); 4684 error = dorevoke(vp, l->l_cred); 4685 vrele(vp); 4686 return (error); 4687 } 4688