1 /* $NetBSD: vfs_syscalls.c,v 1.491 2014/09/05 09:20:59 matt Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.491 2014/09/05 09:20:59 matt Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/proc.h> 91 #include <sys/uio.h> 92 #include <sys/kmem.h> 93 #include <sys/dirent.h> 94 #include <sys/sysctl.h> 95 #include <sys/syscallargs.h> 96 #include <sys/vfs_syscalls.h> 97 #include <sys/quota.h> 98 #include <sys/quotactl.h> 99 #include <sys/ktrace.h> 100 #ifdef FILEASSOC 101 #include <sys/fileassoc.h> 102 #endif /* FILEASSOC */ 103 #include <sys/extattr.h> 104 #include <sys/verified_exec.h> 105 #include <sys/kauth.h> 106 #include <sys/atomic.h> 107 #include <sys/module.h> 108 #include <sys/buf.h> 109 110 #include <miscfs/genfs/genfs.h> 111 #include <miscfs/syncfs/syncfs.h> 112 #include <miscfs/specfs/specdev.h> 113 114 #include <nfs/rpcv2.h> 115 #include <nfs/nfsproto.h> 116 #include <nfs/nfs.h> 117 #include <nfs/nfs_var.h> 118 119 /* XXX this shouldn't be here */ 120 #ifndef OFF_T_MAX 121 #define OFF_T_MAX __type_max(off_t) 122 #endif 123 124 static int change_flags(struct vnode *, u_long, struct lwp *); 125 static int change_mode(struct vnode *, int, struct lwp *); 126 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 127 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 128 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 129 enum uio_seg); 130 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 131 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 132 enum uio_seg); 133 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 134 enum uio_seg, int); 135 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 136 size_t, register_t *); 137 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 138 139 static int fd_nameiat(struct lwp *, int, struct nameidata *); 140 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 141 namei_simple_flags_t, struct vnode **); 142 143 144 /* 145 * This table is used to maintain compatibility with 4.3BSD 146 * and NetBSD 0.9 mount syscalls - and possibly other systems. 147 * Note, the order is important! 148 * 149 * Do not modify this table. It should only contain filesystems 150 * supported by NetBSD 0.9 and 4.3BSD. 151 */ 152 const char * const mountcompatnames[] = { 153 NULL, /* 0 = MOUNT_NONE */ 154 MOUNT_FFS, /* 1 = MOUNT_UFS */ 155 MOUNT_NFS, /* 2 */ 156 MOUNT_MFS, /* 3 */ 157 MOUNT_MSDOS, /* 4 */ 158 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 159 MOUNT_FDESC, /* 6 */ 160 MOUNT_KERNFS, /* 7 */ 161 NULL, /* 8 = MOUNT_DEVFS */ 162 MOUNT_AFS, /* 9 */ 163 }; 164 165 const int nmountcompatnames = __arraycount(mountcompatnames); 166 167 static int 168 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 169 { 170 file_t *dfp; 171 int error; 172 173 if (fdat != AT_FDCWD) { 174 if ((error = fd_getvnode(fdat, &dfp)) != 0) 175 goto out; 176 177 NDAT(ndp, dfp->f_vnode); 178 } 179 180 error = namei(ndp); 181 182 if (fdat != AT_FDCWD) 183 fd_putfile(fdat); 184 out: 185 return error; 186 } 187 188 static int 189 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 190 namei_simple_flags_t sflags, struct vnode **vp_ret) 191 { 192 file_t *dfp; 193 struct vnode *dvp; 194 int error; 195 196 if (fdat != AT_FDCWD) { 197 if ((error = fd_getvnode(fdat, &dfp)) != 0) 198 goto out; 199 200 dvp = dfp->f_vnode; 201 } else { 202 dvp = NULL; 203 } 204 205 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 206 207 if (fdat != AT_FDCWD) 208 fd_putfile(fdat); 209 out: 210 return error; 211 } 212 213 static int 214 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 215 { 216 int error; 217 218 fp->f_flag = flags & FMASK; 219 fp->f_type = DTYPE_VNODE; 220 fp->f_ops = &vnops; 221 fp->f_vnode = vp; 222 223 if (flags & (O_EXLOCK | O_SHLOCK)) { 224 struct flock lf; 225 int type; 226 227 lf.l_whence = SEEK_SET; 228 lf.l_start = 0; 229 lf.l_len = 0; 230 if (flags & O_EXLOCK) 231 lf.l_type = F_WRLCK; 232 else 233 lf.l_type = F_RDLCK; 234 type = F_FLOCK; 235 if ((flags & FNONBLOCK) == 0) 236 type |= F_WAIT; 237 VOP_UNLOCK(vp); 238 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 239 if (error) { 240 (void) vn_close(vp, fp->f_flag, fp->f_cred); 241 fd_abort(l->l_proc, fp, indx); 242 return error; 243 } 244 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 245 atomic_or_uint(&fp->f_flag, FHASLOCK); 246 } 247 if (flags & O_CLOEXEC) 248 fd_set_exclose(l, indx, true); 249 return 0; 250 } 251 252 static int 253 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 254 void *data, size_t *data_len) 255 { 256 struct mount *mp; 257 int error = 0, saved_flags; 258 259 mp = vp->v_mount; 260 saved_flags = mp->mnt_flag; 261 262 /* We can operate only on VV_ROOT nodes. */ 263 if ((vp->v_vflag & VV_ROOT) == 0) { 264 error = EINVAL; 265 goto out; 266 } 267 268 /* 269 * We only allow the filesystem to be reloaded if it 270 * is currently mounted read-only. Additionally, we 271 * prevent read-write to read-only downgrades. 272 */ 273 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 274 (mp->mnt_flag & MNT_RDONLY) == 0 && 275 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 276 error = EOPNOTSUPP; /* Needs translation */ 277 goto out; 278 } 279 280 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 281 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 282 if (error) 283 goto out; 284 285 if (vfs_busy(mp, NULL)) { 286 error = EPERM; 287 goto out; 288 } 289 290 mutex_enter(&mp->mnt_updating); 291 292 mp->mnt_flag &= ~MNT_OP_FLAGS; 293 mp->mnt_flag |= flags & MNT_OP_FLAGS; 294 295 /* 296 * Set the mount level flags. 297 */ 298 if (flags & MNT_RDONLY) 299 mp->mnt_flag |= MNT_RDONLY; 300 else if (mp->mnt_flag & MNT_RDONLY) 301 mp->mnt_iflag |= IMNT_WANTRDWR; 302 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 303 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 304 error = VFS_MOUNT(mp, path, data, data_len); 305 306 if (error && data != NULL) { 307 int error2; 308 309 /* 310 * Update failed; let's try and see if it was an 311 * export request. For compat with 3.0 and earlier. 312 */ 313 error2 = vfs_hooks_reexport(mp, path, data); 314 315 /* 316 * Only update error code if the export request was 317 * understood but some problem occurred while 318 * processing it. 319 */ 320 if (error2 != EJUSTRETURN) 321 error = error2; 322 } 323 324 if (mp->mnt_iflag & IMNT_WANTRDWR) 325 mp->mnt_flag &= ~MNT_RDONLY; 326 if (error) 327 mp->mnt_flag = saved_flags; 328 mp->mnt_flag &= ~MNT_OP_FLAGS; 329 mp->mnt_iflag &= ~IMNT_WANTRDWR; 330 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 331 if (mp->mnt_syncer == NULL) 332 error = vfs_allocate_syncvnode(mp); 333 } else { 334 if (mp->mnt_syncer != NULL) 335 vfs_deallocate_syncvnode(mp); 336 } 337 mutex_exit(&mp->mnt_updating); 338 vfs_unbusy(mp, false, NULL); 339 340 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 341 (flags & MNT_EXTATTR)) { 342 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 343 NULL, 0, NULL) != 0) { 344 printf("%s: failed to start extattr, error = %d", 345 mp->mnt_stat.f_mntonname, error); 346 mp->mnt_flag &= ~MNT_EXTATTR; 347 } 348 } 349 350 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 351 !(flags & MNT_EXTATTR)) { 352 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 353 NULL, 0, NULL) != 0) { 354 printf("%s: failed to stop extattr, error = %d", 355 mp->mnt_stat.f_mntonname, error); 356 mp->mnt_flag |= MNT_RDONLY; 357 } 358 } 359 out: 360 return (error); 361 } 362 363 static int 364 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 365 { 366 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 367 int error; 368 369 /* Copy file-system type from userspace. */ 370 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 371 if (error) { 372 /* 373 * Historically, filesystem types were identified by numbers. 374 * If we get an integer for the filesystem type instead of a 375 * string, we check to see if it matches one of the historic 376 * filesystem types. 377 */ 378 u_long fsindex = (u_long)fstype; 379 if (fsindex >= nmountcompatnames || 380 mountcompatnames[fsindex] == NULL) 381 return ENODEV; 382 strlcpy(fstypename, mountcompatnames[fsindex], 383 sizeof(fstypename)); 384 } 385 386 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 387 if (strcmp(fstypename, "ufs") == 0) 388 fstypename[0] = 'f'; 389 390 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 391 return 0; 392 393 /* If we can autoload a vfs module, try again */ 394 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 395 396 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 397 return 0; 398 399 return ENODEV; 400 } 401 402 static int 403 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 404 void *data, size_t *data_len) 405 { 406 struct mount *mp; 407 int error; 408 409 /* If MNT_GETARGS is specified, it should be the only flag. */ 410 if (flags & ~MNT_GETARGS) 411 return EINVAL; 412 413 mp = vp->v_mount; 414 415 /* XXX: probably some notion of "can see" here if we want isolation. */ 416 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 417 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 418 if (error) 419 return error; 420 421 if ((vp->v_vflag & VV_ROOT) == 0) 422 return EINVAL; 423 424 if (vfs_busy(mp, NULL)) 425 return EPERM; 426 427 mutex_enter(&mp->mnt_updating); 428 mp->mnt_flag &= ~MNT_OP_FLAGS; 429 mp->mnt_flag |= MNT_GETARGS; 430 error = VFS_MOUNT(mp, path, data, data_len); 431 mp->mnt_flag &= ~MNT_OP_FLAGS; 432 mutex_exit(&mp->mnt_updating); 433 434 vfs_unbusy(mp, false, NULL); 435 return (error); 436 } 437 438 int 439 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 440 { 441 /* { 442 syscallarg(const char *) type; 443 syscallarg(const char *) path; 444 syscallarg(int) flags; 445 syscallarg(void *) data; 446 syscallarg(size_t) data_len; 447 } */ 448 449 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 450 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 451 SCARG(uap, data_len), retval); 452 } 453 454 int 455 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 456 const char *path, int flags, void *data, enum uio_seg data_seg, 457 size_t data_len, register_t *retval) 458 { 459 struct vnode *vp; 460 void *data_buf = data; 461 bool vfsopsrele = false; 462 size_t alloc_sz = 0; 463 int error; 464 465 /* XXX: The calling convention of this routine is totally bizarre */ 466 if (vfsops) 467 vfsopsrele = true; 468 469 /* 470 * Get vnode to be covered 471 */ 472 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 473 if (error != 0) { 474 vp = NULL; 475 goto done; 476 } 477 478 if (vfsops == NULL) { 479 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 480 vfsops = vp->v_mount->mnt_op; 481 } else { 482 /* 'type' is userspace */ 483 error = mount_get_vfsops(type, &vfsops); 484 if (error != 0) 485 goto done; 486 vfsopsrele = true; 487 } 488 } 489 490 /* 491 * We allow data to be NULL, even for userspace. Some fs's don't need 492 * it. The others will handle NULL. 493 */ 494 if (data != NULL && data_seg == UIO_USERSPACE) { 495 if (data_len == 0) { 496 /* No length supplied, use default for filesystem */ 497 data_len = vfsops->vfs_min_mount_data; 498 499 /* 500 * Hopefully a longer buffer won't make copyin() fail. 501 * For compatibility with 3.0 and earlier. 502 */ 503 if (flags & MNT_UPDATE 504 && data_len < sizeof (struct mnt_export_args30)) 505 data_len = sizeof (struct mnt_export_args30); 506 } 507 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 508 error = EINVAL; 509 goto done; 510 } 511 alloc_sz = data_len; 512 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 513 514 /* NFS needs the buffer even for mnt_getargs .... */ 515 error = copyin(data, data_buf, data_len); 516 if (error != 0) 517 goto done; 518 } 519 520 if (flags & MNT_GETARGS) { 521 if (data_len == 0) { 522 error = EINVAL; 523 goto done; 524 } 525 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 526 if (error != 0) 527 goto done; 528 if (data_seg == UIO_USERSPACE) 529 error = copyout(data_buf, data, data_len); 530 *retval = data_len; 531 } else if (flags & MNT_UPDATE) { 532 error = mount_update(l, vp, path, flags, data_buf, &data_len); 533 } else { 534 /* Locking is handled internally in mount_domount(). */ 535 KASSERT(vfsopsrele == true); 536 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 537 &data_len); 538 vfsopsrele = false; 539 } 540 541 done: 542 if (vfsopsrele) 543 vfs_delref(vfsops); 544 if (vp != NULL) { 545 vrele(vp); 546 } 547 if (data_buf != data) 548 kmem_free(data_buf, alloc_sz); 549 return (error); 550 } 551 552 /* 553 * Unmount a file system. 554 * 555 * Note: unmount takes a path to the vnode mounted on as argument, 556 * not special file (as before). 557 */ 558 /* ARGSUSED */ 559 int 560 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 561 { 562 /* { 563 syscallarg(const char *) path; 564 syscallarg(int) flags; 565 } */ 566 struct vnode *vp; 567 struct mount *mp; 568 int error; 569 struct pathbuf *pb; 570 struct nameidata nd; 571 572 error = pathbuf_copyin(SCARG(uap, path), &pb); 573 if (error) { 574 return error; 575 } 576 577 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 578 if ((error = namei(&nd)) != 0) { 579 pathbuf_destroy(pb); 580 return error; 581 } 582 vp = nd.ni_vp; 583 pathbuf_destroy(pb); 584 585 mp = vp->v_mount; 586 atomic_inc_uint(&mp->mnt_refcnt); 587 VOP_UNLOCK(vp); 588 589 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 590 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 591 if (error) { 592 vrele(vp); 593 vfs_destroy(mp); 594 return (error); 595 } 596 597 /* 598 * Don't allow unmounting the root file system. 599 */ 600 if (mp->mnt_flag & MNT_ROOTFS) { 601 vrele(vp); 602 vfs_destroy(mp); 603 return (EINVAL); 604 } 605 606 /* 607 * Must be the root of the filesystem 608 */ 609 if ((vp->v_vflag & VV_ROOT) == 0) { 610 vrele(vp); 611 vfs_destroy(mp); 612 return (EINVAL); 613 } 614 615 vrele(vp); 616 error = dounmount(mp, SCARG(uap, flags), l); 617 vfs_destroy(mp); 618 return error; 619 } 620 621 /* 622 * Sync each mounted filesystem. 623 */ 624 #ifdef DEBUG 625 int syncprt = 0; 626 struct ctldebug debug0 = { "syncprt", &syncprt }; 627 #endif 628 629 void 630 do_sys_sync(struct lwp *l) 631 { 632 struct mount *mp, *nmp; 633 int asyncflag; 634 635 mutex_enter(&mountlist_lock); 636 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 637 if (vfs_busy(mp, &nmp)) { 638 continue; 639 } 640 mutex_enter(&mp->mnt_updating); 641 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 642 asyncflag = mp->mnt_flag & MNT_ASYNC; 643 mp->mnt_flag &= ~MNT_ASYNC; 644 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 645 if (asyncflag) 646 mp->mnt_flag |= MNT_ASYNC; 647 } 648 mutex_exit(&mp->mnt_updating); 649 vfs_unbusy(mp, false, &nmp); 650 } 651 mutex_exit(&mountlist_lock); 652 #ifdef DEBUG 653 if (syncprt) 654 vfs_bufstats(); 655 #endif /* DEBUG */ 656 } 657 658 /* ARGSUSED */ 659 int 660 sys_sync(struct lwp *l, const void *v, register_t *retval) 661 { 662 do_sys_sync(l); 663 return (0); 664 } 665 666 667 /* 668 * Access or change filesystem quotas. 669 * 670 * (this is really 14 different calls bundled into one) 671 */ 672 673 static int 674 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 675 { 676 struct quotastat info_k; 677 int error; 678 679 /* ensure any padding bytes are cleared */ 680 memset(&info_k, 0, sizeof(info_k)); 681 682 error = vfs_quotactl_stat(mp, &info_k); 683 if (error) { 684 return error; 685 } 686 687 return copyout(&info_k, info_u, sizeof(info_k)); 688 } 689 690 static int 691 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 692 struct quotaidtypestat *info_u) 693 { 694 struct quotaidtypestat info_k; 695 int error; 696 697 /* ensure any padding bytes are cleared */ 698 memset(&info_k, 0, sizeof(info_k)); 699 700 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 701 if (error) { 702 return error; 703 } 704 705 return copyout(&info_k, info_u, sizeof(info_k)); 706 } 707 708 static int 709 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 710 struct quotaobjtypestat *info_u) 711 { 712 struct quotaobjtypestat info_k; 713 int error; 714 715 /* ensure any padding bytes are cleared */ 716 memset(&info_k, 0, sizeof(info_k)); 717 718 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 719 if (error) { 720 return error; 721 } 722 723 return copyout(&info_k, info_u, sizeof(info_k)); 724 } 725 726 static int 727 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 728 struct quotaval *val_u) 729 { 730 struct quotakey key_k; 731 struct quotaval val_k; 732 int error; 733 734 /* ensure any padding bytes are cleared */ 735 memset(&val_k, 0, sizeof(val_k)); 736 737 error = copyin(key_u, &key_k, sizeof(key_k)); 738 if (error) { 739 return error; 740 } 741 742 error = vfs_quotactl_get(mp, &key_k, &val_k); 743 if (error) { 744 return error; 745 } 746 747 return copyout(&val_k, val_u, sizeof(val_k)); 748 } 749 750 static int 751 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 752 const struct quotaval *val_u) 753 { 754 struct quotakey key_k; 755 struct quotaval val_k; 756 int error; 757 758 error = copyin(key_u, &key_k, sizeof(key_k)); 759 if (error) { 760 return error; 761 } 762 763 error = copyin(val_u, &val_k, sizeof(val_k)); 764 if (error) { 765 return error; 766 } 767 768 return vfs_quotactl_put(mp, &key_k, &val_k); 769 } 770 771 static int 772 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 773 { 774 struct quotakey key_k; 775 int error; 776 777 error = copyin(key_u, &key_k, sizeof(key_k)); 778 if (error) { 779 return error; 780 } 781 782 return vfs_quotactl_del(mp, &key_k); 783 } 784 785 static int 786 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 787 { 788 struct quotakcursor cursor_k; 789 int error; 790 791 /* ensure any padding bytes are cleared */ 792 memset(&cursor_k, 0, sizeof(cursor_k)); 793 794 error = vfs_quotactl_cursoropen(mp, &cursor_k); 795 if (error) { 796 return error; 797 } 798 799 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 800 } 801 802 static int 803 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 804 { 805 struct quotakcursor cursor_k; 806 int error; 807 808 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 809 if (error) { 810 return error; 811 } 812 813 return vfs_quotactl_cursorclose(mp, &cursor_k); 814 } 815 816 static int 817 do_sys_quotactl_cursorskipidtype(struct mount *mp, 818 struct quotakcursor *cursor_u, int idtype) 819 { 820 struct quotakcursor cursor_k; 821 int error; 822 823 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 824 if (error) { 825 return error; 826 } 827 828 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 829 if (error) { 830 return error; 831 } 832 833 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 834 } 835 836 static int 837 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 838 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 839 unsigned *ret_u) 840 { 841 #define CGET_STACK_MAX 8 842 struct quotakcursor cursor_k; 843 struct quotakey stackkeys[CGET_STACK_MAX]; 844 struct quotaval stackvals[CGET_STACK_MAX]; 845 struct quotakey *keys_k; 846 struct quotaval *vals_k; 847 unsigned ret_k; 848 int error; 849 850 if (maxnum > 128) { 851 maxnum = 128; 852 } 853 854 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 855 if (error) { 856 return error; 857 } 858 859 if (maxnum <= CGET_STACK_MAX) { 860 keys_k = stackkeys; 861 vals_k = stackvals; 862 /* ensure any padding bytes are cleared */ 863 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 864 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 865 } else { 866 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 867 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 868 } 869 870 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 871 &ret_k); 872 if (error) { 873 goto fail; 874 } 875 876 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 877 if (error) { 878 goto fail; 879 } 880 881 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 882 if (error) { 883 goto fail; 884 } 885 886 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 887 if (error) { 888 goto fail; 889 } 890 891 /* do last to maximize the chance of being able to recover a failure */ 892 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 893 894 fail: 895 if (keys_k != stackkeys) { 896 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 897 } 898 if (vals_k != stackvals) { 899 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 900 } 901 return error; 902 } 903 904 static int 905 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 906 int *ret_u) 907 { 908 struct quotakcursor cursor_k; 909 int ret_k; 910 int error; 911 912 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 913 if (error) { 914 return error; 915 } 916 917 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 918 if (error) { 919 return error; 920 } 921 922 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 923 if (error) { 924 return error; 925 } 926 927 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 928 } 929 930 static int 931 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 932 { 933 struct quotakcursor cursor_k; 934 int error; 935 936 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 937 if (error) { 938 return error; 939 } 940 941 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 942 if (error) { 943 return error; 944 } 945 946 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 947 } 948 949 static int 950 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 951 { 952 char *path_k; 953 int error; 954 955 /* XXX this should probably be a struct pathbuf */ 956 path_k = PNBUF_GET(); 957 error = copyin(path_u, path_k, PATH_MAX); 958 if (error) { 959 PNBUF_PUT(path_k); 960 return error; 961 } 962 963 error = vfs_quotactl_quotaon(mp, idtype, path_k); 964 965 PNBUF_PUT(path_k); 966 return error; 967 } 968 969 static int 970 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 971 { 972 return vfs_quotactl_quotaoff(mp, idtype); 973 } 974 975 int 976 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 977 { 978 struct mount *mp; 979 struct vnode *vp; 980 int error; 981 982 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 983 if (error != 0) 984 return (error); 985 mp = vp->v_mount; 986 987 switch (args->qc_op) { 988 case QUOTACTL_STAT: 989 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 990 break; 991 case QUOTACTL_IDTYPESTAT: 992 error = do_sys_quotactl_idtypestat(mp, 993 args->u.idtypestat.qc_idtype, 994 args->u.idtypestat.qc_info); 995 break; 996 case QUOTACTL_OBJTYPESTAT: 997 error = do_sys_quotactl_objtypestat(mp, 998 args->u.objtypestat.qc_objtype, 999 args->u.objtypestat.qc_info); 1000 break; 1001 case QUOTACTL_GET: 1002 error = do_sys_quotactl_get(mp, 1003 args->u.get.qc_key, 1004 args->u.get.qc_val); 1005 break; 1006 case QUOTACTL_PUT: 1007 error = do_sys_quotactl_put(mp, 1008 args->u.put.qc_key, 1009 args->u.put.qc_val); 1010 break; 1011 case QUOTACTL_DEL: 1012 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1013 break; 1014 case QUOTACTL_CURSOROPEN: 1015 error = do_sys_quotactl_cursoropen(mp, 1016 args->u.cursoropen.qc_cursor); 1017 break; 1018 case QUOTACTL_CURSORCLOSE: 1019 error = do_sys_quotactl_cursorclose(mp, 1020 args->u.cursorclose.qc_cursor); 1021 break; 1022 case QUOTACTL_CURSORSKIPIDTYPE: 1023 error = do_sys_quotactl_cursorskipidtype(mp, 1024 args->u.cursorskipidtype.qc_cursor, 1025 args->u.cursorskipidtype.qc_idtype); 1026 break; 1027 case QUOTACTL_CURSORGET: 1028 error = do_sys_quotactl_cursorget(mp, 1029 args->u.cursorget.qc_cursor, 1030 args->u.cursorget.qc_keys, 1031 args->u.cursorget.qc_vals, 1032 args->u.cursorget.qc_maxnum, 1033 args->u.cursorget.qc_ret); 1034 break; 1035 case QUOTACTL_CURSORATEND: 1036 error = do_sys_quotactl_cursoratend(mp, 1037 args->u.cursoratend.qc_cursor, 1038 args->u.cursoratend.qc_ret); 1039 break; 1040 case QUOTACTL_CURSORREWIND: 1041 error = do_sys_quotactl_cursorrewind(mp, 1042 args->u.cursorrewind.qc_cursor); 1043 break; 1044 case QUOTACTL_QUOTAON: 1045 error = do_sys_quotactl_quotaon(mp, 1046 args->u.quotaon.qc_idtype, 1047 args->u.quotaon.qc_quotafile); 1048 break; 1049 case QUOTACTL_QUOTAOFF: 1050 error = do_sys_quotactl_quotaoff(mp, 1051 args->u.quotaoff.qc_idtype); 1052 break; 1053 default: 1054 error = EINVAL; 1055 break; 1056 } 1057 1058 vrele(vp); 1059 return error; 1060 } 1061 1062 /* ARGSUSED */ 1063 int 1064 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1065 register_t *retval) 1066 { 1067 /* { 1068 syscallarg(const char *) path; 1069 syscallarg(struct quotactl_args *) args; 1070 } */ 1071 struct quotactl_args args; 1072 int error; 1073 1074 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1075 if (error) { 1076 return error; 1077 } 1078 1079 return do_sys_quotactl(SCARG(uap, path), &args); 1080 } 1081 1082 int 1083 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1084 int root) 1085 { 1086 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1087 int error = 0; 1088 1089 /* 1090 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1091 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1092 * overrides MNT_NOWAIT. 1093 */ 1094 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1095 (flags != MNT_WAIT && flags != 0)) { 1096 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1097 goto done; 1098 } 1099 1100 /* Get the filesystem stats now */ 1101 memset(sp, 0, sizeof(*sp)); 1102 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1103 return error; 1104 } 1105 1106 if (cwdi->cwdi_rdir == NULL) 1107 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1108 done: 1109 if (cwdi->cwdi_rdir != NULL) { 1110 size_t len; 1111 char *bp; 1112 char c; 1113 char *path = PNBUF_GET(); 1114 1115 bp = path + MAXPATHLEN; 1116 *--bp = '\0'; 1117 rw_enter(&cwdi->cwdi_lock, RW_READER); 1118 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1119 MAXPATHLEN / 2, 0, l); 1120 rw_exit(&cwdi->cwdi_lock); 1121 if (error) { 1122 PNBUF_PUT(path); 1123 return error; 1124 } 1125 len = strlen(bp); 1126 if (len != 1) { 1127 /* 1128 * for mount points that are below our root, we can see 1129 * them, so we fix up the pathname and return them. The 1130 * rest we cannot see, so we don't allow viewing the 1131 * data. 1132 */ 1133 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1134 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1135 (void)strlcpy(sp->f_mntonname, 1136 c == '\0' ? "/" : &sp->f_mntonname[len], 1137 sizeof(sp->f_mntonname)); 1138 } else { 1139 if (root) 1140 (void)strlcpy(sp->f_mntonname, "/", 1141 sizeof(sp->f_mntonname)); 1142 else 1143 error = EPERM; 1144 } 1145 } 1146 PNBUF_PUT(path); 1147 } 1148 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1149 return error; 1150 } 1151 1152 /* 1153 * Get filesystem statistics by path. 1154 */ 1155 int 1156 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1157 { 1158 struct mount *mp; 1159 int error; 1160 struct vnode *vp; 1161 1162 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1163 if (error != 0) 1164 return error; 1165 mp = vp->v_mount; 1166 error = dostatvfs(mp, sb, l, flags, 1); 1167 vrele(vp); 1168 return error; 1169 } 1170 1171 /* ARGSUSED */ 1172 int 1173 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1174 { 1175 /* { 1176 syscallarg(const char *) path; 1177 syscallarg(struct statvfs *) buf; 1178 syscallarg(int) flags; 1179 } */ 1180 struct statvfs *sb; 1181 int error; 1182 1183 sb = STATVFSBUF_GET(); 1184 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1185 if (error == 0) 1186 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1187 STATVFSBUF_PUT(sb); 1188 return error; 1189 } 1190 1191 /* 1192 * Get filesystem statistics by fd. 1193 */ 1194 int 1195 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1196 { 1197 file_t *fp; 1198 struct mount *mp; 1199 int error; 1200 1201 /* fd_getvnode() will use the descriptor for us */ 1202 if ((error = fd_getvnode(fd, &fp)) != 0) 1203 return (error); 1204 mp = fp->f_vnode->v_mount; 1205 error = dostatvfs(mp, sb, curlwp, flags, 1); 1206 fd_putfile(fd); 1207 return error; 1208 } 1209 1210 /* ARGSUSED */ 1211 int 1212 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1213 { 1214 /* { 1215 syscallarg(int) fd; 1216 syscallarg(struct statvfs *) buf; 1217 syscallarg(int) flags; 1218 } */ 1219 struct statvfs *sb; 1220 int error; 1221 1222 sb = STATVFSBUF_GET(); 1223 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1224 if (error == 0) 1225 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1226 STATVFSBUF_PUT(sb); 1227 return error; 1228 } 1229 1230 1231 /* 1232 * Get statistics on all filesystems. 1233 */ 1234 int 1235 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1236 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1237 register_t *retval) 1238 { 1239 int root = 0; 1240 struct proc *p = l->l_proc; 1241 struct mount *mp, *nmp; 1242 struct statvfs *sb; 1243 size_t count, maxcount; 1244 int error = 0; 1245 1246 sb = STATVFSBUF_GET(); 1247 maxcount = bufsize / entry_sz; 1248 mutex_enter(&mountlist_lock); 1249 count = 0; 1250 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 1251 if (vfs_busy(mp, &nmp)) { 1252 continue; 1253 } 1254 if (sfsp && count < maxcount) { 1255 error = dostatvfs(mp, sb, l, flags, 0); 1256 if (error) { 1257 vfs_unbusy(mp, false, &nmp); 1258 error = 0; 1259 continue; 1260 } 1261 error = copyfn(sb, sfsp, entry_sz); 1262 if (error) { 1263 vfs_unbusy(mp, false, NULL); 1264 goto out; 1265 } 1266 sfsp = (char *)sfsp + entry_sz; 1267 root |= strcmp(sb->f_mntonname, "/") == 0; 1268 } 1269 count++; 1270 vfs_unbusy(mp, false, &nmp); 1271 } 1272 mutex_exit(&mountlist_lock); 1273 1274 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1275 /* 1276 * fake a root entry 1277 */ 1278 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1279 sb, l, flags, 1); 1280 if (error != 0) 1281 goto out; 1282 if (sfsp) { 1283 error = copyfn(sb, sfsp, entry_sz); 1284 if (error != 0) 1285 goto out; 1286 } 1287 count++; 1288 } 1289 if (sfsp && count > maxcount) 1290 *retval = maxcount; 1291 else 1292 *retval = count; 1293 out: 1294 STATVFSBUF_PUT(sb); 1295 return error; 1296 } 1297 1298 int 1299 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1300 { 1301 /* { 1302 syscallarg(struct statvfs *) buf; 1303 syscallarg(size_t) bufsize; 1304 syscallarg(int) flags; 1305 } */ 1306 1307 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1308 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1309 } 1310 1311 /* 1312 * Change current working directory to a given file descriptor. 1313 */ 1314 /* ARGSUSED */ 1315 int 1316 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1317 { 1318 /* { 1319 syscallarg(int) fd; 1320 } */ 1321 struct proc *p = l->l_proc; 1322 struct cwdinfo *cwdi; 1323 struct vnode *vp, *tdp; 1324 struct mount *mp; 1325 file_t *fp; 1326 int error, fd; 1327 1328 /* fd_getvnode() will use the descriptor for us */ 1329 fd = SCARG(uap, fd); 1330 if ((error = fd_getvnode(fd, &fp)) != 0) 1331 return (error); 1332 vp = fp->f_vnode; 1333 1334 vref(vp); 1335 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1336 if (vp->v_type != VDIR) 1337 error = ENOTDIR; 1338 else 1339 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1340 if (error) { 1341 vput(vp); 1342 goto out; 1343 } 1344 while ((mp = vp->v_mountedhere) != NULL) { 1345 error = vfs_busy(mp, NULL); 1346 vput(vp); 1347 if (error != 0) 1348 goto out; 1349 error = VFS_ROOT(mp, &tdp); 1350 vfs_unbusy(mp, false, NULL); 1351 if (error) 1352 goto out; 1353 vp = tdp; 1354 } 1355 VOP_UNLOCK(vp); 1356 1357 /* 1358 * Disallow changing to a directory not under the process's 1359 * current root directory (if there is one). 1360 */ 1361 cwdi = p->p_cwdi; 1362 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1363 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1364 vrele(vp); 1365 error = EPERM; /* operation not permitted */ 1366 } else { 1367 vrele(cwdi->cwdi_cdir); 1368 cwdi->cwdi_cdir = vp; 1369 } 1370 rw_exit(&cwdi->cwdi_lock); 1371 1372 out: 1373 fd_putfile(fd); 1374 return (error); 1375 } 1376 1377 /* 1378 * Change this process's notion of the root directory to a given file 1379 * descriptor. 1380 */ 1381 int 1382 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1383 { 1384 struct proc *p = l->l_proc; 1385 struct vnode *vp; 1386 file_t *fp; 1387 int error, fd = SCARG(uap, fd); 1388 1389 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1390 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1391 return error; 1392 /* fd_getvnode() will use the descriptor for us */ 1393 if ((error = fd_getvnode(fd, &fp)) != 0) 1394 return error; 1395 vp = fp->f_vnode; 1396 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1397 if (vp->v_type != VDIR) 1398 error = ENOTDIR; 1399 else 1400 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1401 VOP_UNLOCK(vp); 1402 if (error) 1403 goto out; 1404 vref(vp); 1405 1406 change_root(p->p_cwdi, vp, l); 1407 1408 out: 1409 fd_putfile(fd); 1410 return (error); 1411 } 1412 1413 /* 1414 * Change current working directory (``.''). 1415 */ 1416 /* ARGSUSED */ 1417 int 1418 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1419 { 1420 /* { 1421 syscallarg(const char *) path; 1422 } */ 1423 struct proc *p = l->l_proc; 1424 struct cwdinfo *cwdi; 1425 int error; 1426 struct vnode *vp; 1427 1428 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1429 &vp, l)) != 0) 1430 return (error); 1431 cwdi = p->p_cwdi; 1432 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1433 vrele(cwdi->cwdi_cdir); 1434 cwdi->cwdi_cdir = vp; 1435 rw_exit(&cwdi->cwdi_lock); 1436 return (0); 1437 } 1438 1439 /* 1440 * Change notion of root (``/'') directory. 1441 */ 1442 /* ARGSUSED */ 1443 int 1444 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1445 { 1446 /* { 1447 syscallarg(const char *) path; 1448 } */ 1449 struct proc *p = l->l_proc; 1450 int error; 1451 struct vnode *vp; 1452 1453 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1454 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1455 return (error); 1456 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1457 &vp, l)) != 0) 1458 return (error); 1459 1460 change_root(p->p_cwdi, vp, l); 1461 1462 return (0); 1463 } 1464 1465 /* 1466 * Common routine for chroot and fchroot. 1467 * NB: callers need to properly authorize the change root operation. 1468 */ 1469 void 1470 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1471 { 1472 struct proc *p = l->l_proc; 1473 kauth_cred_t ncred; 1474 1475 ncred = kauth_cred_alloc(); 1476 1477 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1478 if (cwdi->cwdi_rdir != NULL) 1479 vrele(cwdi->cwdi_rdir); 1480 cwdi->cwdi_rdir = vp; 1481 1482 /* 1483 * Prevent escaping from chroot by putting the root under 1484 * the working directory. Silently chdir to / if we aren't 1485 * already there. 1486 */ 1487 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1488 /* 1489 * XXX would be more failsafe to change directory to a 1490 * deadfs node here instead 1491 */ 1492 vrele(cwdi->cwdi_cdir); 1493 vref(vp); 1494 cwdi->cwdi_cdir = vp; 1495 } 1496 rw_exit(&cwdi->cwdi_lock); 1497 1498 /* Get a write lock on the process credential. */ 1499 proc_crmod_enter(); 1500 1501 kauth_cred_clone(p->p_cred, ncred); 1502 kauth_proc_chroot(ncred, p->p_cwdi); 1503 1504 /* Broadcast our credentials to the process and other LWPs. */ 1505 proc_crmod_leave(ncred, p->p_cred, true); 1506 } 1507 1508 /* 1509 * Common routine for chroot and chdir. 1510 * XXX "where" should be enum uio_seg 1511 */ 1512 int 1513 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1514 { 1515 struct pathbuf *pb; 1516 struct nameidata nd; 1517 int error; 1518 1519 error = pathbuf_maybe_copyin(path, where, &pb); 1520 if (error) { 1521 return error; 1522 } 1523 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1524 if ((error = namei(&nd)) != 0) { 1525 pathbuf_destroy(pb); 1526 return error; 1527 } 1528 *vpp = nd.ni_vp; 1529 pathbuf_destroy(pb); 1530 1531 if ((*vpp)->v_type != VDIR) 1532 error = ENOTDIR; 1533 else 1534 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1535 1536 if (error) 1537 vput(*vpp); 1538 else 1539 VOP_UNLOCK(*vpp); 1540 return (error); 1541 } 1542 1543 /* 1544 * Internals of sys_open - path has already been converted into a pathbuf 1545 * (so we can easily reuse this function from other parts of the kernel, 1546 * like posix_spawn post-processing). 1547 */ 1548 int 1549 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1550 int open_mode, int *fd) 1551 { 1552 struct proc *p = l->l_proc; 1553 struct cwdinfo *cwdi = p->p_cwdi; 1554 file_t *fp; 1555 struct vnode *vp; 1556 int flags, cmode; 1557 int indx, error; 1558 struct nameidata nd; 1559 1560 if (open_flags & O_SEARCH) { 1561 open_flags &= ~(int)O_SEARCH; 1562 } 1563 1564 flags = FFLAGS(open_flags); 1565 if ((flags & (FREAD | FWRITE)) == 0) 1566 return EINVAL; 1567 1568 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1569 return error; 1570 } 1571 1572 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1573 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1574 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1575 if (dvp != NULL) 1576 NDAT(&nd, dvp); 1577 1578 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1579 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1580 fd_abort(p, fp, indx); 1581 if ((error == EDUPFD || error == EMOVEFD) && 1582 l->l_dupfd >= 0 && /* XXX from fdopen */ 1583 (error = 1584 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1585 *fd = indx; 1586 return 0; 1587 } 1588 if (error == ERESTART) 1589 error = EINTR; 1590 return error; 1591 } 1592 1593 l->l_dupfd = 0; 1594 vp = nd.ni_vp; 1595 1596 if ((error = open_setfp(l, fp, vp, indx, flags))) 1597 return error; 1598 1599 VOP_UNLOCK(vp); 1600 *fd = indx; 1601 fd_affix(p, fp, indx); 1602 return 0; 1603 } 1604 1605 int 1606 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1607 { 1608 struct pathbuf *pb; 1609 int error, oflags; 1610 1611 oflags = FFLAGS(open_flags); 1612 if ((oflags & (FREAD | FWRITE)) == 0) 1613 return EINVAL; 1614 1615 pb = pathbuf_create(path); 1616 if (pb == NULL) 1617 return ENOMEM; 1618 1619 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1620 pathbuf_destroy(pb); 1621 1622 return error; 1623 } 1624 1625 /* 1626 * Check permissions, allocate an open file structure, 1627 * and call the device open routine if any. 1628 */ 1629 static int 1630 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1631 int mode, int *fd) 1632 { 1633 file_t *dfp = NULL; 1634 struct vnode *dvp = NULL; 1635 struct pathbuf *pb; 1636 int error; 1637 1638 #ifdef COMPAT_10 /* XXX: and perhaps later */ 1639 if (path == NULL) { 1640 pb = pathbuf_create("."); 1641 if (pb == NULL) 1642 return ENOMEM; 1643 } else 1644 #endif 1645 { 1646 error = pathbuf_copyin(path, &pb); 1647 if (error) 1648 return error; 1649 } 1650 1651 if (fdat != AT_FDCWD) { 1652 /* fd_getvnode() will use the descriptor for us */ 1653 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1654 goto out; 1655 1656 dvp = dfp->f_vnode; 1657 } 1658 1659 error = do_open(l, dvp, pb, flags, mode, fd); 1660 1661 if (dfp != NULL) 1662 fd_putfile(fdat); 1663 out: 1664 pathbuf_destroy(pb); 1665 return error; 1666 } 1667 1668 int 1669 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1670 { 1671 /* { 1672 syscallarg(const char *) path; 1673 syscallarg(int) flags; 1674 syscallarg(int) mode; 1675 } */ 1676 int error; 1677 int fd; 1678 1679 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1680 SCARG(uap, flags), SCARG(uap, mode), &fd); 1681 1682 if (error == 0) 1683 *retval = fd; 1684 1685 return error; 1686 } 1687 1688 int 1689 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1690 { 1691 /* { 1692 syscallarg(int) fd; 1693 syscallarg(const char *) path; 1694 syscallarg(int) oflags; 1695 syscallarg(int) mode; 1696 } */ 1697 int error; 1698 int fd; 1699 1700 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1701 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1702 1703 if (error == 0) 1704 *retval = fd; 1705 1706 return error; 1707 } 1708 1709 static void 1710 vfs__fhfree(fhandle_t *fhp) 1711 { 1712 size_t fhsize; 1713 1714 fhsize = FHANDLE_SIZE(fhp); 1715 kmem_free(fhp, fhsize); 1716 } 1717 1718 /* 1719 * vfs_composefh: compose a filehandle. 1720 */ 1721 1722 int 1723 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1724 { 1725 struct mount *mp; 1726 struct fid *fidp; 1727 int error; 1728 size_t needfhsize; 1729 size_t fidsize; 1730 1731 mp = vp->v_mount; 1732 fidp = NULL; 1733 if (*fh_size < FHANDLE_SIZE_MIN) { 1734 fidsize = 0; 1735 } else { 1736 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1737 if (fhp != NULL) { 1738 memset(fhp, 0, *fh_size); 1739 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1740 fidp = &fhp->fh_fid; 1741 } 1742 } 1743 error = VFS_VPTOFH(vp, fidp, &fidsize); 1744 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1745 if (error == 0 && *fh_size < needfhsize) { 1746 error = E2BIG; 1747 } 1748 *fh_size = needfhsize; 1749 return error; 1750 } 1751 1752 int 1753 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1754 { 1755 struct mount *mp; 1756 fhandle_t *fhp; 1757 size_t fhsize; 1758 size_t fidsize; 1759 int error; 1760 1761 mp = vp->v_mount; 1762 fidsize = 0; 1763 error = VFS_VPTOFH(vp, NULL, &fidsize); 1764 KASSERT(error != 0); 1765 if (error != E2BIG) { 1766 goto out; 1767 } 1768 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1769 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1770 if (fhp == NULL) { 1771 error = ENOMEM; 1772 goto out; 1773 } 1774 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1775 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1776 if (error == 0) { 1777 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1778 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1779 *fhpp = fhp; 1780 } else { 1781 kmem_free(fhp, fhsize); 1782 } 1783 out: 1784 return error; 1785 } 1786 1787 void 1788 vfs_composefh_free(fhandle_t *fhp) 1789 { 1790 1791 vfs__fhfree(fhp); 1792 } 1793 1794 /* 1795 * vfs_fhtovp: lookup a vnode by a filehandle. 1796 */ 1797 1798 int 1799 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1800 { 1801 struct mount *mp; 1802 int error; 1803 1804 *vpp = NULL; 1805 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1806 if (mp == NULL) { 1807 error = ESTALE; 1808 goto out; 1809 } 1810 if (mp->mnt_op->vfs_fhtovp == NULL) { 1811 error = EOPNOTSUPP; 1812 goto out; 1813 } 1814 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1815 out: 1816 return error; 1817 } 1818 1819 /* 1820 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1821 * the needed size. 1822 */ 1823 1824 int 1825 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1826 { 1827 fhandle_t *fhp; 1828 int error; 1829 1830 if (fhsize > FHANDLE_SIZE_MAX) { 1831 return EINVAL; 1832 } 1833 if (fhsize < FHANDLE_SIZE_MIN) { 1834 return EINVAL; 1835 } 1836 again: 1837 fhp = kmem_alloc(fhsize, KM_SLEEP); 1838 if (fhp == NULL) { 1839 return ENOMEM; 1840 } 1841 error = copyin(ufhp, fhp, fhsize); 1842 if (error == 0) { 1843 /* XXX this check shouldn't be here */ 1844 if (FHANDLE_SIZE(fhp) == fhsize) { 1845 *fhpp = fhp; 1846 return 0; 1847 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1848 /* 1849 * a kludge for nfsv2 padded handles. 1850 */ 1851 size_t sz; 1852 1853 sz = FHANDLE_SIZE(fhp); 1854 kmem_free(fhp, fhsize); 1855 fhsize = sz; 1856 goto again; 1857 } else { 1858 /* 1859 * userland told us wrong size. 1860 */ 1861 error = EINVAL; 1862 } 1863 } 1864 kmem_free(fhp, fhsize); 1865 return error; 1866 } 1867 1868 void 1869 vfs_copyinfh_free(fhandle_t *fhp) 1870 { 1871 1872 vfs__fhfree(fhp); 1873 } 1874 1875 /* 1876 * Get file handle system call 1877 */ 1878 int 1879 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1880 { 1881 /* { 1882 syscallarg(char *) fname; 1883 syscallarg(fhandle_t *) fhp; 1884 syscallarg(size_t *) fh_size; 1885 } */ 1886 struct vnode *vp; 1887 fhandle_t *fh; 1888 int error; 1889 struct pathbuf *pb; 1890 struct nameidata nd; 1891 size_t sz; 1892 size_t usz; 1893 1894 /* 1895 * Must be super user 1896 */ 1897 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1898 0, NULL, NULL, NULL); 1899 if (error) 1900 return (error); 1901 1902 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1903 if (error) { 1904 return error; 1905 } 1906 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1907 error = namei(&nd); 1908 if (error) { 1909 pathbuf_destroy(pb); 1910 return error; 1911 } 1912 vp = nd.ni_vp; 1913 pathbuf_destroy(pb); 1914 1915 error = vfs_composefh_alloc(vp, &fh); 1916 vput(vp); 1917 if (error != 0) { 1918 return error; 1919 } 1920 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1921 if (error != 0) { 1922 goto out; 1923 } 1924 sz = FHANDLE_SIZE(fh); 1925 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1926 if (error != 0) { 1927 goto out; 1928 } 1929 if (usz >= sz) { 1930 error = copyout(fh, SCARG(uap, fhp), sz); 1931 } else { 1932 error = E2BIG; 1933 } 1934 out: 1935 vfs_composefh_free(fh); 1936 return (error); 1937 } 1938 1939 /* 1940 * Open a file given a file handle. 1941 * 1942 * Check permissions, allocate an open file structure, 1943 * and call the device open routine if any. 1944 */ 1945 1946 int 1947 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1948 register_t *retval) 1949 { 1950 file_t *fp; 1951 struct vnode *vp = NULL; 1952 kauth_cred_t cred = l->l_cred; 1953 file_t *nfp; 1954 int indx, error = 0; 1955 struct vattr va; 1956 fhandle_t *fh; 1957 int flags; 1958 proc_t *p; 1959 1960 p = curproc; 1961 1962 /* 1963 * Must be super user 1964 */ 1965 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1966 0, NULL, NULL, NULL))) 1967 return (error); 1968 1969 if (oflags & O_SEARCH) { 1970 oflags &= ~(int)O_SEARCH; 1971 } 1972 1973 flags = FFLAGS(oflags); 1974 if ((flags & (FREAD | FWRITE)) == 0) 1975 return (EINVAL); 1976 if ((flags & O_CREAT)) 1977 return (EINVAL); 1978 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1979 return (error); 1980 fp = nfp; 1981 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1982 if (error != 0) { 1983 goto bad; 1984 } 1985 error = vfs_fhtovp(fh, &vp); 1986 vfs_copyinfh_free(fh); 1987 if (error != 0) { 1988 goto bad; 1989 } 1990 1991 /* Now do an effective vn_open */ 1992 1993 if (vp->v_type == VSOCK) { 1994 error = EOPNOTSUPP; 1995 goto bad; 1996 } 1997 error = vn_openchk(vp, cred, flags); 1998 if (error != 0) 1999 goto bad; 2000 if (flags & O_TRUNC) { 2001 VOP_UNLOCK(vp); /* XXX */ 2002 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2003 vattr_null(&va); 2004 va.va_size = 0; 2005 error = VOP_SETATTR(vp, &va, cred); 2006 if (error) 2007 goto bad; 2008 } 2009 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2010 goto bad; 2011 if (flags & FWRITE) { 2012 mutex_enter(vp->v_interlock); 2013 vp->v_writecount++; 2014 mutex_exit(vp->v_interlock); 2015 } 2016 2017 /* done with modified vn_open, now finish what sys_open does. */ 2018 if ((error = open_setfp(l, fp, vp, indx, flags))) 2019 return error; 2020 2021 VOP_UNLOCK(vp); 2022 *retval = indx; 2023 fd_affix(p, fp, indx); 2024 return (0); 2025 2026 bad: 2027 fd_abort(p, fp, indx); 2028 if (vp != NULL) 2029 vput(vp); 2030 return (error); 2031 } 2032 2033 int 2034 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2035 { 2036 /* { 2037 syscallarg(const void *) fhp; 2038 syscallarg(size_t) fh_size; 2039 syscallarg(int) flags; 2040 } */ 2041 2042 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2043 SCARG(uap, flags), retval); 2044 } 2045 2046 int 2047 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2048 { 2049 int error; 2050 fhandle_t *fh; 2051 struct vnode *vp; 2052 2053 /* 2054 * Must be super user 2055 */ 2056 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2057 0, NULL, NULL, NULL))) 2058 return (error); 2059 2060 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2061 if (error != 0) 2062 return error; 2063 2064 error = vfs_fhtovp(fh, &vp); 2065 vfs_copyinfh_free(fh); 2066 if (error != 0) 2067 return error; 2068 2069 error = vn_stat(vp, sb); 2070 vput(vp); 2071 return error; 2072 } 2073 2074 2075 /* ARGSUSED */ 2076 int 2077 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2078 { 2079 /* { 2080 syscallarg(const void *) fhp; 2081 syscallarg(size_t) fh_size; 2082 syscallarg(struct stat *) sb; 2083 } */ 2084 struct stat sb; 2085 int error; 2086 2087 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2088 if (error) 2089 return error; 2090 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2091 } 2092 2093 int 2094 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2095 int flags) 2096 { 2097 fhandle_t *fh; 2098 struct mount *mp; 2099 struct vnode *vp; 2100 int error; 2101 2102 /* 2103 * Must be super user 2104 */ 2105 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2106 0, NULL, NULL, NULL))) 2107 return error; 2108 2109 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2110 if (error != 0) 2111 return error; 2112 2113 error = vfs_fhtovp(fh, &vp); 2114 vfs_copyinfh_free(fh); 2115 if (error != 0) 2116 return error; 2117 2118 mp = vp->v_mount; 2119 error = dostatvfs(mp, sb, l, flags, 1); 2120 vput(vp); 2121 return error; 2122 } 2123 2124 /* ARGSUSED */ 2125 int 2126 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2127 { 2128 /* { 2129 syscallarg(const void *) fhp; 2130 syscallarg(size_t) fh_size; 2131 syscallarg(struct statvfs *) buf; 2132 syscallarg(int) flags; 2133 } */ 2134 struct statvfs *sb = STATVFSBUF_GET(); 2135 int error; 2136 2137 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2138 SCARG(uap, flags)); 2139 if (error == 0) 2140 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2141 STATVFSBUF_PUT(sb); 2142 return error; 2143 } 2144 2145 /* 2146 * Create a special file. 2147 */ 2148 /* ARGSUSED */ 2149 int 2150 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2151 register_t *retval) 2152 { 2153 /* { 2154 syscallarg(const char *) path; 2155 syscallarg(mode_t) mode; 2156 syscallarg(dev_t) dev; 2157 } */ 2158 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 2159 SCARG(uap, dev), retval, UIO_USERSPACE); 2160 } 2161 2162 int 2163 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2164 register_t *retval) 2165 { 2166 /* { 2167 syscallarg(int) fd; 2168 syscallarg(const char *) path; 2169 syscallarg(mode_t) mode; 2170 syscallarg(int) pad; 2171 syscallarg(dev_t) dev; 2172 } */ 2173 2174 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2175 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE); 2176 } 2177 2178 int 2179 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2180 register_t *retval, enum uio_seg seg) 2181 { 2182 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg); 2183 } 2184 2185 int 2186 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2187 dev_t dev, register_t *retval, enum uio_seg seg) 2188 { 2189 struct proc *p = l->l_proc; 2190 struct vnode *vp; 2191 struct vattr vattr; 2192 int error, optype; 2193 struct pathbuf *pb; 2194 struct nameidata nd; 2195 const char *pathstring; 2196 2197 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2198 0, NULL, NULL, NULL)) != 0) 2199 return (error); 2200 2201 optype = VOP_MKNOD_DESCOFFSET; 2202 2203 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2204 if (error) { 2205 return error; 2206 } 2207 pathstring = pathbuf_stringcopy_get(pb); 2208 if (pathstring == NULL) { 2209 pathbuf_destroy(pb); 2210 return ENOMEM; 2211 } 2212 2213 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2214 2215 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2216 goto out; 2217 vp = nd.ni_vp; 2218 2219 if (vp != NULL) 2220 error = EEXIST; 2221 else { 2222 vattr_null(&vattr); 2223 /* We will read cwdi->cwdi_cmask unlocked. */ 2224 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2225 vattr.va_rdev = dev; 2226 2227 switch (mode & S_IFMT) { 2228 case S_IFMT: /* used by badsect to flag bad sectors */ 2229 vattr.va_type = VBAD; 2230 break; 2231 case S_IFCHR: 2232 vattr.va_type = VCHR; 2233 break; 2234 case S_IFBLK: 2235 vattr.va_type = VBLK; 2236 break; 2237 case S_IFWHT: 2238 optype = VOP_WHITEOUT_DESCOFFSET; 2239 break; 2240 case S_IFREG: 2241 #if NVERIEXEC > 0 2242 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2243 O_CREAT); 2244 #endif /* NVERIEXEC > 0 */ 2245 vattr.va_type = VREG; 2246 vattr.va_rdev = VNOVAL; 2247 optype = VOP_CREATE_DESCOFFSET; 2248 break; 2249 default: 2250 error = EINVAL; 2251 break; 2252 } 2253 } 2254 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2255 && vattr.va_rdev == VNOVAL) 2256 error = EINVAL; 2257 if (!error) { 2258 switch (optype) { 2259 case VOP_WHITEOUT_DESCOFFSET: 2260 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2261 if (error) 2262 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2263 vput(nd.ni_dvp); 2264 break; 2265 2266 case VOP_MKNOD_DESCOFFSET: 2267 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2268 &nd.ni_cnd, &vattr); 2269 if (error == 0) 2270 vrele(nd.ni_vp); 2271 vput(nd.ni_dvp); 2272 break; 2273 2274 case VOP_CREATE_DESCOFFSET: 2275 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2276 &nd.ni_cnd, &vattr); 2277 if (error == 0) 2278 vrele(nd.ni_vp); 2279 vput(nd.ni_dvp); 2280 break; 2281 } 2282 } else { 2283 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2284 if (nd.ni_dvp == vp) 2285 vrele(nd.ni_dvp); 2286 else 2287 vput(nd.ni_dvp); 2288 if (vp) 2289 vrele(vp); 2290 } 2291 out: 2292 pathbuf_stringcopy_put(pb, pathstring); 2293 pathbuf_destroy(pb); 2294 return (error); 2295 } 2296 2297 /* 2298 * Create a named pipe. 2299 */ 2300 /* ARGSUSED */ 2301 int 2302 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2303 { 2304 /* { 2305 syscallarg(const char *) path; 2306 syscallarg(int) mode; 2307 } */ 2308 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2309 } 2310 2311 int 2312 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2313 register_t *retval) 2314 { 2315 /* { 2316 syscallarg(int) fd; 2317 syscallarg(const char *) path; 2318 syscallarg(int) mode; 2319 } */ 2320 2321 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2322 SCARG(uap, mode)); 2323 } 2324 2325 static int 2326 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2327 { 2328 struct proc *p = l->l_proc; 2329 struct vattr vattr; 2330 int error; 2331 struct pathbuf *pb; 2332 struct nameidata nd; 2333 2334 error = pathbuf_copyin(path, &pb); 2335 if (error) { 2336 return error; 2337 } 2338 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2339 2340 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2341 pathbuf_destroy(pb); 2342 return error; 2343 } 2344 if (nd.ni_vp != NULL) { 2345 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2346 if (nd.ni_dvp == nd.ni_vp) 2347 vrele(nd.ni_dvp); 2348 else 2349 vput(nd.ni_dvp); 2350 vrele(nd.ni_vp); 2351 pathbuf_destroy(pb); 2352 return (EEXIST); 2353 } 2354 vattr_null(&vattr); 2355 vattr.va_type = VFIFO; 2356 /* We will read cwdi->cwdi_cmask unlocked. */ 2357 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2358 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2359 if (error == 0) 2360 vrele(nd.ni_vp); 2361 vput(nd.ni_dvp); 2362 pathbuf_destroy(pb); 2363 return (error); 2364 } 2365 2366 /* 2367 * Make a hard file link. 2368 */ 2369 /* ARGSUSED */ 2370 int 2371 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2372 const char *link, int follow, register_t *retval) 2373 { 2374 struct vnode *vp; 2375 struct pathbuf *linkpb; 2376 struct nameidata nd; 2377 namei_simple_flags_t ns_flags; 2378 int error; 2379 2380 if (follow & AT_SYMLINK_FOLLOW) 2381 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2382 else 2383 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2384 2385 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2386 if (error != 0) 2387 return (error); 2388 error = pathbuf_copyin(link, &linkpb); 2389 if (error) { 2390 goto out1; 2391 } 2392 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2393 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2394 goto out2; 2395 if (nd.ni_vp) { 2396 error = EEXIST; 2397 goto abortop; 2398 } 2399 /* Prevent hard links on directories. */ 2400 if (vp->v_type == VDIR) { 2401 error = EPERM; 2402 goto abortop; 2403 } 2404 /* Prevent cross-mount operation. */ 2405 if (nd.ni_dvp->v_mount != vp->v_mount) { 2406 error = EXDEV; 2407 goto abortop; 2408 } 2409 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2410 out2: 2411 pathbuf_destroy(linkpb); 2412 out1: 2413 vrele(vp); 2414 return (error); 2415 abortop: 2416 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2417 if (nd.ni_dvp == nd.ni_vp) 2418 vrele(nd.ni_dvp); 2419 else 2420 vput(nd.ni_dvp); 2421 if (nd.ni_vp != NULL) 2422 vrele(nd.ni_vp); 2423 goto out2; 2424 } 2425 2426 int 2427 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2428 { 2429 /* { 2430 syscallarg(const char *) path; 2431 syscallarg(const char *) link; 2432 } */ 2433 const char *path = SCARG(uap, path); 2434 const char *link = SCARG(uap, link); 2435 2436 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2437 AT_SYMLINK_FOLLOW, retval); 2438 } 2439 2440 int 2441 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2442 register_t *retval) 2443 { 2444 /* { 2445 syscallarg(int) fd1; 2446 syscallarg(const char *) name1; 2447 syscallarg(int) fd2; 2448 syscallarg(const char *) name2; 2449 syscallarg(int) flags; 2450 } */ 2451 int fd1 = SCARG(uap, fd1); 2452 const char *name1 = SCARG(uap, name1); 2453 int fd2 = SCARG(uap, fd2); 2454 const char *name2 = SCARG(uap, name2); 2455 int follow; 2456 2457 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2458 2459 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2460 } 2461 2462 2463 int 2464 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2465 { 2466 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2467 } 2468 2469 static int 2470 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2471 const char *link, enum uio_seg seg) 2472 { 2473 struct proc *p = curproc; 2474 struct vattr vattr; 2475 char *path; 2476 int error; 2477 struct pathbuf *linkpb; 2478 struct nameidata nd; 2479 2480 KASSERT(l != NULL || fdat == AT_FDCWD); 2481 2482 path = PNBUF_GET(); 2483 if (seg == UIO_USERSPACE) { 2484 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2485 goto out1; 2486 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2487 goto out1; 2488 } else { 2489 KASSERT(strlen(patharg) < MAXPATHLEN); 2490 strcpy(path, patharg); 2491 linkpb = pathbuf_create(link); 2492 if (linkpb == NULL) { 2493 error = ENOMEM; 2494 goto out1; 2495 } 2496 } 2497 ktrkuser("symlink-target", path, strlen(path)); 2498 2499 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2500 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2501 goto out2; 2502 if (nd.ni_vp) { 2503 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2504 if (nd.ni_dvp == nd.ni_vp) 2505 vrele(nd.ni_dvp); 2506 else 2507 vput(nd.ni_dvp); 2508 vrele(nd.ni_vp); 2509 error = EEXIST; 2510 goto out2; 2511 } 2512 vattr_null(&vattr); 2513 vattr.va_type = VLNK; 2514 /* We will read cwdi->cwdi_cmask unlocked. */ 2515 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2516 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2517 if (error == 0) 2518 vrele(nd.ni_vp); 2519 vput(nd.ni_dvp); 2520 out2: 2521 pathbuf_destroy(linkpb); 2522 out1: 2523 PNBUF_PUT(path); 2524 return (error); 2525 } 2526 2527 /* 2528 * Make a symbolic link. 2529 */ 2530 /* ARGSUSED */ 2531 int 2532 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2533 { 2534 /* { 2535 syscallarg(const char *) path; 2536 syscallarg(const char *) link; 2537 } */ 2538 2539 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2540 UIO_USERSPACE); 2541 } 2542 2543 int 2544 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2545 register_t *retval) 2546 { 2547 /* { 2548 syscallarg(const char *) path1; 2549 syscallarg(int) fd; 2550 syscallarg(const char *) path2; 2551 } */ 2552 2553 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2554 SCARG(uap, path2), UIO_USERSPACE); 2555 } 2556 2557 /* 2558 * Delete a whiteout from the filesystem. 2559 */ 2560 /* ARGSUSED */ 2561 int 2562 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2563 { 2564 /* { 2565 syscallarg(const char *) path; 2566 } */ 2567 int error; 2568 struct pathbuf *pb; 2569 struct nameidata nd; 2570 2571 error = pathbuf_copyin(SCARG(uap, path), &pb); 2572 if (error) { 2573 return error; 2574 } 2575 2576 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2577 error = namei(&nd); 2578 if (error) { 2579 pathbuf_destroy(pb); 2580 return (error); 2581 } 2582 2583 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2584 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2585 if (nd.ni_dvp == nd.ni_vp) 2586 vrele(nd.ni_dvp); 2587 else 2588 vput(nd.ni_dvp); 2589 if (nd.ni_vp) 2590 vrele(nd.ni_vp); 2591 pathbuf_destroy(pb); 2592 return (EEXIST); 2593 } 2594 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2595 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2596 vput(nd.ni_dvp); 2597 pathbuf_destroy(pb); 2598 return (error); 2599 } 2600 2601 /* 2602 * Delete a name from the filesystem. 2603 */ 2604 /* ARGSUSED */ 2605 int 2606 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2607 { 2608 /* { 2609 syscallarg(const char *) path; 2610 } */ 2611 2612 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2613 } 2614 2615 int 2616 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2617 register_t *retval) 2618 { 2619 /* { 2620 syscallarg(int) fd; 2621 syscallarg(const char *) path; 2622 syscallarg(int) flag; 2623 } */ 2624 2625 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2626 SCARG(uap, flag), UIO_USERSPACE); 2627 } 2628 2629 int 2630 do_sys_unlink(const char *arg, enum uio_seg seg) 2631 { 2632 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2633 } 2634 2635 static int 2636 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2637 enum uio_seg seg) 2638 { 2639 struct vnode *vp; 2640 int error; 2641 struct pathbuf *pb; 2642 struct nameidata nd; 2643 const char *pathstring; 2644 2645 KASSERT(l != NULL || fdat == AT_FDCWD); 2646 2647 error = pathbuf_maybe_copyin(arg, seg, &pb); 2648 if (error) { 2649 return error; 2650 } 2651 pathstring = pathbuf_stringcopy_get(pb); 2652 if (pathstring == NULL) { 2653 pathbuf_destroy(pb); 2654 return ENOMEM; 2655 } 2656 2657 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2658 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2659 goto out; 2660 vp = nd.ni_vp; 2661 2662 /* 2663 * The root of a mounted filesystem cannot be deleted. 2664 */ 2665 if ((vp->v_vflag & VV_ROOT) != 0) { 2666 error = EBUSY; 2667 goto abort; 2668 } 2669 2670 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2671 error = EBUSY; 2672 goto abort; 2673 } 2674 2675 /* 2676 * No rmdir "." please. 2677 */ 2678 if (nd.ni_dvp == vp) { 2679 error = EINVAL; 2680 goto abort; 2681 } 2682 2683 /* 2684 * AT_REMOVEDIR is required to remove a directory 2685 */ 2686 if (vp->v_type == VDIR) { 2687 if (!(flags & AT_REMOVEDIR)) { 2688 error = EPERM; 2689 goto abort; 2690 } else { 2691 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2692 goto out; 2693 } 2694 } 2695 2696 /* 2697 * Starting here we only deal with non directories. 2698 */ 2699 if (flags & AT_REMOVEDIR) { 2700 error = ENOTDIR; 2701 goto abort; 2702 } 2703 2704 #if NVERIEXEC > 0 2705 /* Handle remove requests for veriexec entries. */ 2706 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2707 goto abort; 2708 } 2709 #endif /* NVERIEXEC > 0 */ 2710 2711 #ifdef FILEASSOC 2712 (void)fileassoc_file_delete(vp); 2713 #endif /* FILEASSOC */ 2714 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2715 goto out; 2716 2717 abort: 2718 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2719 if (nd.ni_dvp == vp) 2720 vrele(nd.ni_dvp); 2721 else 2722 vput(nd.ni_dvp); 2723 vput(vp); 2724 2725 out: 2726 pathbuf_stringcopy_put(pb, pathstring); 2727 pathbuf_destroy(pb); 2728 return (error); 2729 } 2730 2731 /* 2732 * Reposition read/write file offset. 2733 */ 2734 int 2735 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2736 { 2737 /* { 2738 syscallarg(int) fd; 2739 syscallarg(int) pad; 2740 syscallarg(off_t) offset; 2741 syscallarg(int) whence; 2742 } */ 2743 kauth_cred_t cred = l->l_cred; 2744 file_t *fp; 2745 struct vnode *vp; 2746 struct vattr vattr; 2747 off_t newoff; 2748 int error, fd; 2749 2750 fd = SCARG(uap, fd); 2751 2752 if ((fp = fd_getfile(fd)) == NULL) 2753 return (EBADF); 2754 2755 vp = fp->f_vnode; 2756 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2757 error = ESPIPE; 2758 goto out; 2759 } 2760 2761 switch (SCARG(uap, whence)) { 2762 case SEEK_CUR: 2763 newoff = fp->f_offset + SCARG(uap, offset); 2764 break; 2765 case SEEK_END: 2766 vn_lock(vp, LK_SHARED | LK_RETRY); 2767 error = VOP_GETATTR(vp, &vattr, cred); 2768 VOP_UNLOCK(vp); 2769 if (error) { 2770 goto out; 2771 } 2772 newoff = SCARG(uap, offset) + vattr.va_size; 2773 break; 2774 case SEEK_SET: 2775 newoff = SCARG(uap, offset); 2776 break; 2777 default: 2778 error = EINVAL; 2779 goto out; 2780 } 2781 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2782 *(off_t *)retval = fp->f_offset = newoff; 2783 } 2784 out: 2785 fd_putfile(fd); 2786 return (error); 2787 } 2788 2789 /* 2790 * Positional read system call. 2791 */ 2792 int 2793 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2794 { 2795 /* { 2796 syscallarg(int) fd; 2797 syscallarg(void *) buf; 2798 syscallarg(size_t) nbyte; 2799 syscallarg(off_t) offset; 2800 } */ 2801 file_t *fp; 2802 struct vnode *vp; 2803 off_t offset; 2804 int error, fd = SCARG(uap, fd); 2805 2806 if ((fp = fd_getfile(fd)) == NULL) 2807 return (EBADF); 2808 2809 if ((fp->f_flag & FREAD) == 0) { 2810 fd_putfile(fd); 2811 return (EBADF); 2812 } 2813 2814 vp = fp->f_vnode; 2815 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2816 error = ESPIPE; 2817 goto out; 2818 } 2819 2820 offset = SCARG(uap, offset); 2821 2822 /* 2823 * XXX This works because no file systems actually 2824 * XXX take any action on the seek operation. 2825 */ 2826 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2827 goto out; 2828 2829 /* dofileread() will unuse the descriptor for us */ 2830 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2831 &offset, 0, retval)); 2832 2833 out: 2834 fd_putfile(fd); 2835 return (error); 2836 } 2837 2838 /* 2839 * Positional scatter read system call. 2840 */ 2841 int 2842 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2843 { 2844 /* { 2845 syscallarg(int) fd; 2846 syscallarg(const struct iovec *) iovp; 2847 syscallarg(int) iovcnt; 2848 syscallarg(off_t) offset; 2849 } */ 2850 off_t offset = SCARG(uap, offset); 2851 2852 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2853 SCARG(uap, iovcnt), &offset, 0, retval); 2854 } 2855 2856 /* 2857 * Positional write system call. 2858 */ 2859 int 2860 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2861 { 2862 /* { 2863 syscallarg(int) fd; 2864 syscallarg(const void *) buf; 2865 syscallarg(size_t) nbyte; 2866 syscallarg(off_t) offset; 2867 } */ 2868 file_t *fp; 2869 struct vnode *vp; 2870 off_t offset; 2871 int error, fd = SCARG(uap, fd); 2872 2873 if ((fp = fd_getfile(fd)) == NULL) 2874 return (EBADF); 2875 2876 if ((fp->f_flag & FWRITE) == 0) { 2877 fd_putfile(fd); 2878 return (EBADF); 2879 } 2880 2881 vp = fp->f_vnode; 2882 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2883 error = ESPIPE; 2884 goto out; 2885 } 2886 2887 offset = SCARG(uap, offset); 2888 2889 /* 2890 * XXX This works because no file systems actually 2891 * XXX take any action on the seek operation. 2892 */ 2893 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2894 goto out; 2895 2896 /* dofilewrite() will unuse the descriptor for us */ 2897 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2898 &offset, 0, retval)); 2899 2900 out: 2901 fd_putfile(fd); 2902 return (error); 2903 } 2904 2905 /* 2906 * Positional gather write system call. 2907 */ 2908 int 2909 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2910 { 2911 /* { 2912 syscallarg(int) fd; 2913 syscallarg(const struct iovec *) iovp; 2914 syscallarg(int) iovcnt; 2915 syscallarg(off_t) offset; 2916 } */ 2917 off_t offset = SCARG(uap, offset); 2918 2919 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2920 SCARG(uap, iovcnt), &offset, 0, retval); 2921 } 2922 2923 /* 2924 * Check access permissions. 2925 */ 2926 int 2927 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2928 { 2929 /* { 2930 syscallarg(const char *) path; 2931 syscallarg(int) flags; 2932 } */ 2933 2934 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2935 SCARG(uap, flags), 0); 2936 } 2937 2938 int 2939 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2940 int mode, int flags) 2941 { 2942 kauth_cred_t cred; 2943 struct vnode *vp; 2944 int error, nd_flag, vmode; 2945 struct pathbuf *pb; 2946 struct nameidata nd; 2947 2948 CTASSERT(F_OK == 0); 2949 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2950 /* nonsense mode */ 2951 return EINVAL; 2952 } 2953 2954 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2955 if (flags & AT_SYMLINK_NOFOLLOW) 2956 nd_flag &= ~FOLLOW; 2957 2958 error = pathbuf_copyin(path, &pb); 2959 if (error) 2960 return error; 2961 2962 NDINIT(&nd, LOOKUP, nd_flag, pb); 2963 2964 /* Override default credentials */ 2965 cred = kauth_cred_dup(l->l_cred); 2966 if (!(flags & AT_EACCESS)) { 2967 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2968 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2969 } 2970 nd.ni_cnd.cn_cred = cred; 2971 2972 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2973 pathbuf_destroy(pb); 2974 goto out; 2975 } 2976 vp = nd.ni_vp; 2977 pathbuf_destroy(pb); 2978 2979 /* Flags == 0 means only check for existence. */ 2980 if (mode) { 2981 vmode = 0; 2982 if (mode & R_OK) 2983 vmode |= VREAD; 2984 if (mode & W_OK) 2985 vmode |= VWRITE; 2986 if (mode & X_OK) 2987 vmode |= VEXEC; 2988 2989 error = VOP_ACCESS(vp, vmode, cred); 2990 if (!error && (vmode & VWRITE)) 2991 error = vn_writechk(vp); 2992 } 2993 vput(vp); 2994 out: 2995 kauth_cred_free(cred); 2996 return (error); 2997 } 2998 2999 int 3000 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3001 register_t *retval) 3002 { 3003 /* { 3004 syscallarg(int) fd; 3005 syscallarg(const char *) path; 3006 syscallarg(int) amode; 3007 syscallarg(int) flag; 3008 } */ 3009 3010 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3011 SCARG(uap, amode), SCARG(uap, flag)); 3012 } 3013 3014 /* 3015 * Common code for all sys_stat functions, including compat versions. 3016 */ 3017 int 3018 do_sys_stat(const char *userpath, unsigned int nd_flag, 3019 struct stat *sb) 3020 { 3021 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3022 } 3023 3024 int 3025 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3026 unsigned int nd_flag, struct stat *sb) 3027 { 3028 int error; 3029 struct pathbuf *pb; 3030 struct nameidata nd; 3031 3032 KASSERT(l != NULL || fdat == AT_FDCWD); 3033 3034 error = pathbuf_copyin(userpath, &pb); 3035 if (error) { 3036 return error; 3037 } 3038 3039 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3040 3041 error = fd_nameiat(l, fdat, &nd); 3042 if (error != 0) { 3043 pathbuf_destroy(pb); 3044 return error; 3045 } 3046 error = vn_stat(nd.ni_vp, sb); 3047 vput(nd.ni_vp); 3048 pathbuf_destroy(pb); 3049 return error; 3050 } 3051 3052 /* 3053 * Get file status; this version follows links. 3054 */ 3055 /* ARGSUSED */ 3056 int 3057 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3058 { 3059 /* { 3060 syscallarg(const char *) path; 3061 syscallarg(struct stat *) ub; 3062 } */ 3063 struct stat sb; 3064 int error; 3065 3066 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3067 if (error) 3068 return error; 3069 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3070 } 3071 3072 /* 3073 * Get file status; this version does not follow links. 3074 */ 3075 /* ARGSUSED */ 3076 int 3077 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3078 { 3079 /* { 3080 syscallarg(const char *) path; 3081 syscallarg(struct stat *) ub; 3082 } */ 3083 struct stat sb; 3084 int error; 3085 3086 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3087 if (error) 3088 return error; 3089 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3090 } 3091 3092 int 3093 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3094 register_t *retval) 3095 { 3096 /* { 3097 syscallarg(int) fd; 3098 syscallarg(const char *) path; 3099 syscallarg(struct stat *) buf; 3100 syscallarg(int) flag; 3101 } */ 3102 unsigned int nd_flag; 3103 struct stat sb; 3104 int error; 3105 3106 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3107 nd_flag = NOFOLLOW; 3108 else 3109 nd_flag = FOLLOW; 3110 3111 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3112 &sb); 3113 if (error) 3114 return error; 3115 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3116 } 3117 3118 /* 3119 * Get configurable pathname variables. 3120 */ 3121 /* ARGSUSED */ 3122 int 3123 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3124 { 3125 /* { 3126 syscallarg(const char *) path; 3127 syscallarg(int) name; 3128 } */ 3129 int error; 3130 struct pathbuf *pb; 3131 struct nameidata nd; 3132 3133 error = pathbuf_copyin(SCARG(uap, path), &pb); 3134 if (error) { 3135 return error; 3136 } 3137 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3138 if ((error = namei(&nd)) != 0) { 3139 pathbuf_destroy(pb); 3140 return (error); 3141 } 3142 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3143 vput(nd.ni_vp); 3144 pathbuf_destroy(pb); 3145 return (error); 3146 } 3147 3148 /* 3149 * Return target name of a symbolic link. 3150 */ 3151 /* ARGSUSED */ 3152 int 3153 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3154 register_t *retval) 3155 { 3156 /* { 3157 syscallarg(const char *) path; 3158 syscallarg(char *) buf; 3159 syscallarg(size_t) count; 3160 } */ 3161 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3162 SCARG(uap, buf), SCARG(uap, count), retval); 3163 } 3164 3165 static int 3166 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3167 size_t count, register_t *retval) 3168 { 3169 struct vnode *vp; 3170 struct iovec aiov; 3171 struct uio auio; 3172 int error; 3173 struct pathbuf *pb; 3174 struct nameidata nd; 3175 3176 error = pathbuf_copyin(path, &pb); 3177 if (error) { 3178 return error; 3179 } 3180 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3181 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3182 pathbuf_destroy(pb); 3183 return error; 3184 } 3185 vp = nd.ni_vp; 3186 pathbuf_destroy(pb); 3187 if (vp->v_type != VLNK) 3188 error = EINVAL; 3189 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3190 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3191 aiov.iov_base = buf; 3192 aiov.iov_len = count; 3193 auio.uio_iov = &aiov; 3194 auio.uio_iovcnt = 1; 3195 auio.uio_offset = 0; 3196 auio.uio_rw = UIO_READ; 3197 KASSERT(l == curlwp); 3198 auio.uio_vmspace = l->l_proc->p_vmspace; 3199 auio.uio_resid = count; 3200 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3201 *retval = count - auio.uio_resid; 3202 } 3203 vput(vp); 3204 return (error); 3205 } 3206 3207 int 3208 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3209 register_t *retval) 3210 { 3211 /* { 3212 syscallarg(int) fd; 3213 syscallarg(const char *) path; 3214 syscallarg(char *) buf; 3215 syscallarg(size_t) bufsize; 3216 } */ 3217 3218 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3219 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3220 } 3221 3222 /* 3223 * Change flags of a file given a path name. 3224 */ 3225 /* ARGSUSED */ 3226 int 3227 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3228 { 3229 /* { 3230 syscallarg(const char *) path; 3231 syscallarg(u_long) flags; 3232 } */ 3233 struct vnode *vp; 3234 int error; 3235 3236 error = namei_simple_user(SCARG(uap, path), 3237 NSM_FOLLOW_TRYEMULROOT, &vp); 3238 if (error != 0) 3239 return (error); 3240 error = change_flags(vp, SCARG(uap, flags), l); 3241 vput(vp); 3242 return (error); 3243 } 3244 3245 /* 3246 * Change flags of a file given a file descriptor. 3247 */ 3248 /* ARGSUSED */ 3249 int 3250 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3251 { 3252 /* { 3253 syscallarg(int) fd; 3254 syscallarg(u_long) flags; 3255 } */ 3256 struct vnode *vp; 3257 file_t *fp; 3258 int error; 3259 3260 /* fd_getvnode() will use the descriptor for us */ 3261 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3262 return (error); 3263 vp = fp->f_vnode; 3264 error = change_flags(vp, SCARG(uap, flags), l); 3265 VOP_UNLOCK(vp); 3266 fd_putfile(SCARG(uap, fd)); 3267 return (error); 3268 } 3269 3270 /* 3271 * Change flags of a file given a path name; this version does 3272 * not follow links. 3273 */ 3274 int 3275 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3276 { 3277 /* { 3278 syscallarg(const char *) path; 3279 syscallarg(u_long) flags; 3280 } */ 3281 struct vnode *vp; 3282 int error; 3283 3284 error = namei_simple_user(SCARG(uap, path), 3285 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3286 if (error != 0) 3287 return (error); 3288 error = change_flags(vp, SCARG(uap, flags), l); 3289 vput(vp); 3290 return (error); 3291 } 3292 3293 /* 3294 * Common routine to change flags of a file. 3295 */ 3296 int 3297 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3298 { 3299 struct vattr vattr; 3300 int error; 3301 3302 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3303 3304 vattr_null(&vattr); 3305 vattr.va_flags = flags; 3306 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3307 3308 return (error); 3309 } 3310 3311 /* 3312 * Change mode of a file given path name; this version follows links. 3313 */ 3314 /* ARGSUSED */ 3315 int 3316 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3317 { 3318 /* { 3319 syscallarg(const char *) path; 3320 syscallarg(int) mode; 3321 } */ 3322 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3323 SCARG(uap, mode), 0); 3324 } 3325 3326 int 3327 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3328 { 3329 int error; 3330 struct vnode *vp; 3331 namei_simple_flags_t ns_flag; 3332 3333 if (flags & AT_SYMLINK_NOFOLLOW) 3334 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3335 else 3336 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3337 3338 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3339 if (error != 0) 3340 return error; 3341 3342 error = change_mode(vp, mode, l); 3343 3344 vrele(vp); 3345 3346 return (error); 3347 } 3348 3349 /* 3350 * Change mode of a file given a file descriptor. 3351 */ 3352 /* ARGSUSED */ 3353 int 3354 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3355 { 3356 /* { 3357 syscallarg(int) fd; 3358 syscallarg(int) mode; 3359 } */ 3360 file_t *fp; 3361 int error; 3362 3363 /* fd_getvnode() will use the descriptor for us */ 3364 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3365 return (error); 3366 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3367 fd_putfile(SCARG(uap, fd)); 3368 return (error); 3369 } 3370 3371 int 3372 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3373 register_t *retval) 3374 { 3375 /* { 3376 syscallarg(int) fd; 3377 syscallarg(const char *) path; 3378 syscallarg(int) mode; 3379 syscallarg(int) flag; 3380 } */ 3381 3382 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3383 SCARG(uap, mode), SCARG(uap, flag)); 3384 } 3385 3386 /* 3387 * Change mode of a file given path name; this version does not follow links. 3388 */ 3389 /* ARGSUSED */ 3390 int 3391 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3392 { 3393 /* { 3394 syscallarg(const char *) path; 3395 syscallarg(int) mode; 3396 } */ 3397 int error; 3398 struct vnode *vp; 3399 3400 error = namei_simple_user(SCARG(uap, path), 3401 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3402 if (error != 0) 3403 return (error); 3404 3405 error = change_mode(vp, SCARG(uap, mode), l); 3406 3407 vrele(vp); 3408 return (error); 3409 } 3410 3411 /* 3412 * Common routine to set mode given a vnode. 3413 */ 3414 static int 3415 change_mode(struct vnode *vp, int mode, struct lwp *l) 3416 { 3417 struct vattr vattr; 3418 int error; 3419 3420 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3421 vattr_null(&vattr); 3422 vattr.va_mode = mode & ALLPERMS; 3423 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3424 VOP_UNLOCK(vp); 3425 return (error); 3426 } 3427 3428 /* 3429 * Set ownership given a path name; this version follows links. 3430 */ 3431 /* ARGSUSED */ 3432 int 3433 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3434 { 3435 /* { 3436 syscallarg(const char *) path; 3437 syscallarg(uid_t) uid; 3438 syscallarg(gid_t) gid; 3439 } */ 3440 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3441 SCARG(uap, gid), 0); 3442 } 3443 3444 int 3445 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3446 gid_t gid, int flags) 3447 { 3448 int error; 3449 struct vnode *vp; 3450 namei_simple_flags_t ns_flag; 3451 3452 if (flags & AT_SYMLINK_NOFOLLOW) 3453 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3454 else 3455 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3456 3457 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3458 if (error != 0) 3459 return error; 3460 3461 error = change_owner(vp, uid, gid, l, 0); 3462 3463 vrele(vp); 3464 3465 return (error); 3466 } 3467 3468 /* 3469 * Set ownership given a path name; this version follows links. 3470 * Provides POSIX semantics. 3471 */ 3472 /* ARGSUSED */ 3473 int 3474 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3475 { 3476 /* { 3477 syscallarg(const char *) path; 3478 syscallarg(uid_t) uid; 3479 syscallarg(gid_t) gid; 3480 } */ 3481 int error; 3482 struct vnode *vp; 3483 3484 error = namei_simple_user(SCARG(uap, path), 3485 NSM_FOLLOW_TRYEMULROOT, &vp); 3486 if (error != 0) 3487 return (error); 3488 3489 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3490 3491 vrele(vp); 3492 return (error); 3493 } 3494 3495 /* 3496 * Set ownership given a file descriptor. 3497 */ 3498 /* ARGSUSED */ 3499 int 3500 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3501 { 3502 /* { 3503 syscallarg(int) fd; 3504 syscallarg(uid_t) uid; 3505 syscallarg(gid_t) gid; 3506 } */ 3507 int error; 3508 file_t *fp; 3509 3510 /* fd_getvnode() will use the descriptor for us */ 3511 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3512 return (error); 3513 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3514 l, 0); 3515 fd_putfile(SCARG(uap, fd)); 3516 return (error); 3517 } 3518 3519 int 3520 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3521 register_t *retval) 3522 { 3523 /* { 3524 syscallarg(int) fd; 3525 syscallarg(const char *) path; 3526 syscallarg(uid_t) owner; 3527 syscallarg(gid_t) group; 3528 syscallarg(int) flag; 3529 } */ 3530 3531 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3532 SCARG(uap, owner), SCARG(uap, group), 3533 SCARG(uap, flag)); 3534 } 3535 3536 /* 3537 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3538 */ 3539 /* ARGSUSED */ 3540 int 3541 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3542 { 3543 /* { 3544 syscallarg(int) fd; 3545 syscallarg(uid_t) uid; 3546 syscallarg(gid_t) gid; 3547 } */ 3548 int error; 3549 file_t *fp; 3550 3551 /* fd_getvnode() will use the descriptor for us */ 3552 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3553 return (error); 3554 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3555 l, 1); 3556 fd_putfile(SCARG(uap, fd)); 3557 return (error); 3558 } 3559 3560 /* 3561 * Set ownership given a path name; this version does not follow links. 3562 */ 3563 /* ARGSUSED */ 3564 int 3565 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3566 { 3567 /* { 3568 syscallarg(const char *) path; 3569 syscallarg(uid_t) uid; 3570 syscallarg(gid_t) gid; 3571 } */ 3572 int error; 3573 struct vnode *vp; 3574 3575 error = namei_simple_user(SCARG(uap, path), 3576 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3577 if (error != 0) 3578 return (error); 3579 3580 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3581 3582 vrele(vp); 3583 return (error); 3584 } 3585 3586 /* 3587 * Set ownership given a path name; this version does not follow links. 3588 * Provides POSIX/XPG semantics. 3589 */ 3590 /* ARGSUSED */ 3591 int 3592 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3593 { 3594 /* { 3595 syscallarg(const char *) path; 3596 syscallarg(uid_t) uid; 3597 syscallarg(gid_t) gid; 3598 } */ 3599 int error; 3600 struct vnode *vp; 3601 3602 error = namei_simple_user(SCARG(uap, path), 3603 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3604 if (error != 0) 3605 return (error); 3606 3607 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3608 3609 vrele(vp); 3610 return (error); 3611 } 3612 3613 /* 3614 * Common routine to set ownership given a vnode. 3615 */ 3616 static int 3617 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3618 int posix_semantics) 3619 { 3620 struct vattr vattr; 3621 mode_t newmode; 3622 int error; 3623 3624 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3625 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3626 goto out; 3627 3628 #define CHANGED(x) ((int)(x) != -1) 3629 newmode = vattr.va_mode; 3630 if (posix_semantics) { 3631 /* 3632 * POSIX/XPG semantics: if the caller is not the super-user, 3633 * clear set-user-id and set-group-id bits. Both POSIX and 3634 * the XPG consider the behaviour for calls by the super-user 3635 * implementation-defined; we leave the set-user-id and set- 3636 * group-id settings intact in that case. 3637 */ 3638 if (vattr.va_mode & S_ISUID) { 3639 if (kauth_authorize_vnode(l->l_cred, 3640 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3641 newmode &= ~S_ISUID; 3642 } 3643 if (vattr.va_mode & S_ISGID) { 3644 if (kauth_authorize_vnode(l->l_cred, 3645 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3646 newmode &= ~S_ISGID; 3647 } 3648 } else { 3649 /* 3650 * NetBSD semantics: when changing owner and/or group, 3651 * clear the respective bit(s). 3652 */ 3653 if (CHANGED(uid)) 3654 newmode &= ~S_ISUID; 3655 if (CHANGED(gid)) 3656 newmode &= ~S_ISGID; 3657 } 3658 /* Update va_mode iff altered. */ 3659 if (vattr.va_mode == newmode) 3660 newmode = VNOVAL; 3661 3662 vattr_null(&vattr); 3663 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3664 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3665 vattr.va_mode = newmode; 3666 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3667 #undef CHANGED 3668 3669 out: 3670 VOP_UNLOCK(vp); 3671 return (error); 3672 } 3673 3674 /* 3675 * Set the access and modification times given a path name; this 3676 * version follows links. 3677 */ 3678 /* ARGSUSED */ 3679 int 3680 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3681 register_t *retval) 3682 { 3683 /* { 3684 syscallarg(const char *) path; 3685 syscallarg(const struct timeval *) tptr; 3686 } */ 3687 3688 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3689 SCARG(uap, tptr), UIO_USERSPACE); 3690 } 3691 3692 /* 3693 * Set the access and modification times given a file descriptor. 3694 */ 3695 /* ARGSUSED */ 3696 int 3697 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3698 register_t *retval) 3699 { 3700 /* { 3701 syscallarg(int) fd; 3702 syscallarg(const struct timeval *) tptr; 3703 } */ 3704 int error; 3705 file_t *fp; 3706 3707 /* fd_getvnode() will use the descriptor for us */ 3708 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3709 return (error); 3710 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3711 UIO_USERSPACE); 3712 fd_putfile(SCARG(uap, fd)); 3713 return (error); 3714 } 3715 3716 int 3717 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3718 register_t *retval) 3719 { 3720 /* { 3721 syscallarg(int) fd; 3722 syscallarg(const struct timespec *) tptr; 3723 } */ 3724 int error; 3725 file_t *fp; 3726 3727 /* fd_getvnode() will use the descriptor for us */ 3728 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3729 return (error); 3730 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3731 SCARG(uap, tptr), UIO_USERSPACE); 3732 fd_putfile(SCARG(uap, fd)); 3733 return (error); 3734 } 3735 3736 /* 3737 * Set the access and modification times given a path name; this 3738 * version does not follow links. 3739 */ 3740 int 3741 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3742 register_t *retval) 3743 { 3744 /* { 3745 syscallarg(const char *) path; 3746 syscallarg(const struct timeval *) tptr; 3747 } */ 3748 3749 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3750 SCARG(uap, tptr), UIO_USERSPACE); 3751 } 3752 3753 int 3754 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3755 register_t *retval) 3756 { 3757 /* { 3758 syscallarg(int) fd; 3759 syscallarg(const char *) path; 3760 syscallarg(const struct timespec *) tptr; 3761 syscallarg(int) flag; 3762 } */ 3763 int follow; 3764 const struct timespec *tptr; 3765 int error; 3766 3767 tptr = SCARG(uap, tptr); 3768 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3769 3770 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3771 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3772 3773 return error; 3774 } 3775 3776 /* 3777 * Common routine to set access and modification times given a vnode. 3778 */ 3779 int 3780 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3781 const struct timespec *tptr, enum uio_seg seg) 3782 { 3783 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3784 } 3785 3786 int 3787 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3788 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3789 { 3790 struct vattr vattr; 3791 int error, dorele = 0; 3792 namei_simple_flags_t sflags; 3793 bool vanull, setbirthtime; 3794 struct timespec ts[2]; 3795 3796 KASSERT(l != NULL || fdat == AT_FDCWD); 3797 3798 /* 3799 * I have checked all callers and they pass either FOLLOW, 3800 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3801 * is 0. More to the point, they don't pass anything else. 3802 * Let's keep it that way at least until the namei interfaces 3803 * are fully sanitized. 3804 */ 3805 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3806 sflags = (flag == FOLLOW) ? 3807 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3808 3809 if (tptr == NULL) { 3810 vanull = true; 3811 nanotime(&ts[0]); 3812 ts[1] = ts[0]; 3813 } else { 3814 vanull = false; 3815 if (seg != UIO_SYSSPACE) { 3816 error = copyin(tptr, ts, sizeof (ts)); 3817 if (error != 0) 3818 return error; 3819 } else { 3820 ts[0] = tptr[0]; 3821 ts[1] = tptr[1]; 3822 } 3823 } 3824 3825 if (ts[0].tv_nsec == UTIME_NOW) { 3826 nanotime(&ts[0]); 3827 if (ts[1].tv_nsec == UTIME_NOW) { 3828 vanull = true; 3829 ts[1] = ts[0]; 3830 } 3831 } else if (ts[1].tv_nsec == UTIME_NOW) 3832 nanotime(&ts[1]); 3833 3834 if (vp == NULL) { 3835 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3836 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3837 if (error != 0) 3838 return error; 3839 dorele = 1; 3840 } 3841 3842 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3843 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3844 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3845 vattr_null(&vattr); 3846 3847 if (ts[0].tv_nsec != UTIME_OMIT) 3848 vattr.va_atime = ts[0]; 3849 3850 if (ts[1].tv_nsec != UTIME_OMIT) { 3851 vattr.va_mtime = ts[1]; 3852 if (setbirthtime) 3853 vattr.va_birthtime = ts[1]; 3854 } 3855 3856 if (vanull) 3857 vattr.va_vaflags |= VA_UTIMES_NULL; 3858 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3859 VOP_UNLOCK(vp); 3860 3861 if (dorele != 0) 3862 vrele(vp); 3863 3864 return error; 3865 } 3866 3867 int 3868 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3869 const struct timeval *tptr, enum uio_seg seg) 3870 { 3871 struct timespec ts[2]; 3872 struct timespec *tsptr = NULL; 3873 int error; 3874 3875 if (tptr != NULL) { 3876 struct timeval tv[2]; 3877 3878 if (seg != UIO_SYSSPACE) { 3879 error = copyin(tptr, tv, sizeof (tv)); 3880 if (error != 0) 3881 return error; 3882 tptr = tv; 3883 } 3884 3885 if ((tv[0].tv_usec == UTIME_NOW) || 3886 (tv[0].tv_usec == UTIME_OMIT)) 3887 ts[0].tv_nsec = tv[0].tv_usec; 3888 else 3889 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3890 3891 if ((tv[1].tv_usec == UTIME_NOW) || 3892 (tv[1].tv_usec == UTIME_OMIT)) 3893 ts[1].tv_nsec = tv[1].tv_usec; 3894 else 3895 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3896 3897 tsptr = &ts[0]; 3898 } 3899 3900 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3901 } 3902 3903 /* 3904 * Truncate a file given its path name. 3905 */ 3906 /* ARGSUSED */ 3907 int 3908 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3909 { 3910 /* { 3911 syscallarg(const char *) path; 3912 syscallarg(int) pad; 3913 syscallarg(off_t) length; 3914 } */ 3915 struct vnode *vp; 3916 struct vattr vattr; 3917 int error; 3918 3919 if (SCARG(uap, length) < 0) 3920 return EINVAL; 3921 3922 error = namei_simple_user(SCARG(uap, path), 3923 NSM_FOLLOW_TRYEMULROOT, &vp); 3924 if (error != 0) 3925 return (error); 3926 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3927 if (vp->v_type == VDIR) 3928 error = EISDIR; 3929 else if ((error = vn_writechk(vp)) == 0 && 3930 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3931 vattr_null(&vattr); 3932 vattr.va_size = SCARG(uap, length); 3933 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3934 } 3935 vput(vp); 3936 return (error); 3937 } 3938 3939 /* 3940 * Truncate a file given a file descriptor. 3941 */ 3942 /* ARGSUSED */ 3943 int 3944 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3945 { 3946 /* { 3947 syscallarg(int) fd; 3948 syscallarg(int) pad; 3949 syscallarg(off_t) length; 3950 } */ 3951 struct vattr vattr; 3952 struct vnode *vp; 3953 file_t *fp; 3954 int error; 3955 3956 if (SCARG(uap, length) < 0) 3957 return EINVAL; 3958 3959 /* fd_getvnode() will use the descriptor for us */ 3960 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3961 return (error); 3962 if ((fp->f_flag & FWRITE) == 0) { 3963 error = EINVAL; 3964 goto out; 3965 } 3966 vp = fp->f_vnode; 3967 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3968 if (vp->v_type == VDIR) 3969 error = EISDIR; 3970 else if ((error = vn_writechk(vp)) == 0) { 3971 vattr_null(&vattr); 3972 vattr.va_size = SCARG(uap, length); 3973 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3974 } 3975 VOP_UNLOCK(vp); 3976 out: 3977 fd_putfile(SCARG(uap, fd)); 3978 return (error); 3979 } 3980 3981 /* 3982 * Sync an open file. 3983 */ 3984 /* ARGSUSED */ 3985 int 3986 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3987 { 3988 /* { 3989 syscallarg(int) fd; 3990 } */ 3991 struct vnode *vp; 3992 file_t *fp; 3993 int error; 3994 3995 /* fd_getvnode() will use the descriptor for us */ 3996 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3997 return (error); 3998 vp = fp->f_vnode; 3999 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4000 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4001 VOP_UNLOCK(vp); 4002 fd_putfile(SCARG(uap, fd)); 4003 return (error); 4004 } 4005 4006 /* 4007 * Sync a range of file data. API modeled after that found in AIX. 4008 * 4009 * FDATASYNC indicates that we need only save enough metadata to be able 4010 * to re-read the written data. Note we duplicate AIX's requirement that 4011 * the file be open for writing. 4012 */ 4013 /* ARGSUSED */ 4014 int 4015 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4016 { 4017 /* { 4018 syscallarg(int) fd; 4019 syscallarg(int) flags; 4020 syscallarg(off_t) start; 4021 syscallarg(off_t) length; 4022 } */ 4023 struct vnode *vp; 4024 file_t *fp; 4025 int flags, nflags; 4026 off_t s, e, len; 4027 int error; 4028 4029 /* fd_getvnode() will use the descriptor for us */ 4030 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4031 return (error); 4032 4033 if ((fp->f_flag & FWRITE) == 0) { 4034 error = EBADF; 4035 goto out; 4036 } 4037 4038 flags = SCARG(uap, flags); 4039 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4040 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4041 error = EINVAL; 4042 goto out; 4043 } 4044 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4045 if (flags & FDATASYNC) 4046 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4047 else 4048 nflags = FSYNC_WAIT; 4049 if (flags & FDISKSYNC) 4050 nflags |= FSYNC_CACHE; 4051 4052 len = SCARG(uap, length); 4053 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4054 if (len) { 4055 s = SCARG(uap, start); 4056 e = s + len; 4057 if (e < s) { 4058 error = EINVAL; 4059 goto out; 4060 } 4061 } else { 4062 e = 0; 4063 s = 0; 4064 } 4065 4066 vp = fp->f_vnode; 4067 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4068 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4069 VOP_UNLOCK(vp); 4070 out: 4071 fd_putfile(SCARG(uap, fd)); 4072 return (error); 4073 } 4074 4075 /* 4076 * Sync the data of an open file. 4077 */ 4078 /* ARGSUSED */ 4079 int 4080 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4081 { 4082 /* { 4083 syscallarg(int) fd; 4084 } */ 4085 struct vnode *vp; 4086 file_t *fp; 4087 int error; 4088 4089 /* fd_getvnode() will use the descriptor for us */ 4090 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4091 return (error); 4092 if ((fp->f_flag & FWRITE) == 0) { 4093 fd_putfile(SCARG(uap, fd)); 4094 return (EBADF); 4095 } 4096 vp = fp->f_vnode; 4097 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4098 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4099 VOP_UNLOCK(vp); 4100 fd_putfile(SCARG(uap, fd)); 4101 return (error); 4102 } 4103 4104 /* 4105 * Rename files, (standard) BSD semantics frontend. 4106 */ 4107 /* ARGSUSED */ 4108 int 4109 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4110 { 4111 /* { 4112 syscallarg(const char *) from; 4113 syscallarg(const char *) to; 4114 } */ 4115 4116 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4117 SCARG(uap, to), UIO_USERSPACE, 0)); 4118 } 4119 4120 int 4121 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4122 register_t *retval) 4123 { 4124 /* { 4125 syscallarg(int) fromfd; 4126 syscallarg(const char *) from; 4127 syscallarg(int) tofd; 4128 syscallarg(const char *) to; 4129 } */ 4130 4131 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4132 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4133 } 4134 4135 /* 4136 * Rename files, POSIX semantics frontend. 4137 */ 4138 /* ARGSUSED */ 4139 int 4140 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4141 { 4142 /* { 4143 syscallarg(const char *) from; 4144 syscallarg(const char *) to; 4145 } */ 4146 4147 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4148 SCARG(uap, to), UIO_USERSPACE, 1)); 4149 } 4150 4151 /* 4152 * Rename files. Source and destination must either both be directories, 4153 * or both not be directories. If target is a directory, it must be empty. 4154 * If `from' and `to' refer to the same object, the value of the `retain' 4155 * argument is used to determine whether `from' will be 4156 * 4157 * (retain == 0) deleted unless `from' and `to' refer to the same 4158 * object in the file system's name space (BSD). 4159 * (retain == 1) always retained (POSIX). 4160 * 4161 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4162 */ 4163 int 4164 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4165 { 4166 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4167 } 4168 4169 static int 4170 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4171 const char *to, enum uio_seg seg, int retain) 4172 { 4173 struct pathbuf *fpb, *tpb; 4174 struct nameidata fnd, tnd; 4175 struct vnode *fdvp, *fvp; 4176 struct vnode *tdvp, *tvp; 4177 struct mount *mp, *tmp; 4178 int error; 4179 4180 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4181 4182 error = pathbuf_maybe_copyin(from, seg, &fpb); 4183 if (error) 4184 goto out0; 4185 KASSERT(fpb != NULL); 4186 4187 error = pathbuf_maybe_copyin(to, seg, &tpb); 4188 if (error) 4189 goto out1; 4190 KASSERT(tpb != NULL); 4191 4192 /* 4193 * Lookup from. 4194 * 4195 * XXX LOCKPARENT is wrong because we don't actually want it 4196 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4197 * insane, so for the time being we need to leave it like this. 4198 */ 4199 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT | INRENAME), fpb); 4200 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4201 goto out2; 4202 4203 /* 4204 * Pull out the important results of the lookup, fdvp and fvp. 4205 * Of course, fvp is bogus because we're about to unlock fdvp. 4206 */ 4207 fdvp = fnd.ni_dvp; 4208 fvp = fnd.ni_vp; 4209 KASSERT(fdvp != NULL); 4210 KASSERT(fvp != NULL); 4211 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4212 4213 /* 4214 * Make sure neither fdvp nor fvp is locked. 4215 */ 4216 if (fdvp != fvp) 4217 VOP_UNLOCK(fdvp); 4218 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4219 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4220 4221 /* 4222 * Reject renaming `.' and `..'. Can't do this until after 4223 * namei because we need namei's parsing to find the final 4224 * component name. (namei should just leave us with the final 4225 * component name and not look it up itself, but anyway...) 4226 * 4227 * This was here before because we used to relookup from 4228 * instead of to and relookup requires the caller to check 4229 * this, but now file systems may depend on this check, so we 4230 * must retain it until the file systems are all rototilled. 4231 */ 4232 if (((fnd.ni_cnd.cn_namelen == 1) && 4233 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4234 ((fnd.ni_cnd.cn_namelen == 2) && 4235 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4236 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4237 error = EINVAL; /* XXX EISDIR? */ 4238 goto abort0; 4239 } 4240 4241 /* 4242 * Lookup to. 4243 * 4244 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4245 * fvp here to decide whether to add CREATEDIR is a load of 4246 * bollocks because fvp might be the wrong node by now, since 4247 * fdvp is unlocked. 4248 * 4249 * XXX Why not pass CREATEDIR always? 4250 */ 4251 NDINIT(&tnd, RENAME, 4252 (LOCKPARENT | NOCACHE | TRYEMULROOT | INRENAME | 4253 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4254 tpb); 4255 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4256 goto abort0; 4257 4258 /* 4259 * Pull out the important results of the lookup, tdvp and tvp. 4260 * Of course, tvp is bogus because we're about to unlock tdvp. 4261 */ 4262 tdvp = tnd.ni_dvp; 4263 tvp = tnd.ni_vp; 4264 KASSERT(tdvp != NULL); 4265 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4266 4267 /* 4268 * Make sure neither tdvp nor tvp is locked. 4269 */ 4270 if (tdvp != tvp) 4271 VOP_UNLOCK(tdvp); 4272 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4273 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4274 4275 /* 4276 * Reject renaming onto `.' or `..'. relookup is unhappy with 4277 * these, which is why we must do this here. Once upon a time 4278 * we relooked up from instead of to, and consequently didn't 4279 * need this check, but now that we relookup to instead of 4280 * from, we need this; and we shall need it forever forward 4281 * until the VOP_RENAME protocol changes, because file systems 4282 * will no doubt begin to depend on this check. 4283 */ 4284 if (((tnd.ni_cnd.cn_namelen == 1) && 4285 (tnd.ni_cnd.cn_nameptr[0] == '.')) || 4286 ((tnd.ni_cnd.cn_namelen == 2) && 4287 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4288 (tnd.ni_cnd.cn_nameptr[1] == '.'))) { 4289 error = EINVAL; /* XXX EISDIR? */ 4290 goto abort1; 4291 } 4292 4293 /* 4294 * Get the mount point. If the file system has been unmounted, 4295 * which it may be because we're not holding any vnode locks, 4296 * then v_mount will be NULL. We're not really supposed to 4297 * read v_mount without holding the vnode lock, but since we 4298 * have fdvp referenced, if fdvp->v_mount changes then at worst 4299 * it will be set to NULL, not changed to another mount point. 4300 * And, of course, since it is up to the file system to 4301 * determine the real lock order, we can't lock both fdvp and 4302 * tdvp at the same time. 4303 */ 4304 mp = fdvp->v_mount; 4305 if (mp == NULL) { 4306 error = ENOENT; 4307 goto abort1; 4308 } 4309 4310 /* 4311 * Make sure the mount points match. Again, although we don't 4312 * hold any vnode locks, the v_mount fields may change -- but 4313 * at worst they will change to NULL, so this will never become 4314 * a cross-device rename, because we hold vnode references. 4315 * 4316 * XXX Because nothing is locked and the compiler may reorder 4317 * things here, unmounting the file system at an inopportune 4318 * moment may cause rename to fail with ENXDEV when it really 4319 * should fail with ENOENT. 4320 */ 4321 tmp = tdvp->v_mount; 4322 if (tmp == NULL) { 4323 error = ENOENT; 4324 goto abort1; 4325 } 4326 4327 if (mp != tmp) { 4328 error = EXDEV; 4329 goto abort1; 4330 } 4331 4332 /* 4333 * Take the vfs rename lock to avoid cross-directory screw cases. 4334 * Nothing is locked currently, so taking this lock is safe. 4335 */ 4336 error = VFS_RENAMELOCK_ENTER(mp); 4337 if (error) 4338 goto abort1; 4339 4340 /* 4341 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4342 * and nothing is locked except for the vfs rename lock. 4343 * 4344 * The next step is a little rain dance to conform to the 4345 * insane lock protocol, even though it does nothing to ward 4346 * off race conditions. 4347 * 4348 * We need tdvp and tvp to be locked. However, because we have 4349 * unlocked tdvp in order to hold no locks while we take the 4350 * vfs rename lock, tvp may be wrong here, and we can't safely 4351 * lock it even if the sensible file systems will just unlock 4352 * it straight away. Consequently, we must lock tdvp and then 4353 * relookup tvp to get it locked. 4354 * 4355 * Finally, because the VOP_RENAME protocol is brain-damaged 4356 * and various file systems insanely depend on the semantics of 4357 * this brain damage, the lookup of to must be the last lookup 4358 * before VOP_RENAME. 4359 */ 4360 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4361 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4362 if (error) 4363 goto abort2; 4364 4365 /* 4366 * Drop the old tvp and pick up the new one -- which might be 4367 * the same, but that doesn't matter to us. After this, tdvp 4368 * and tvp should both be locked. 4369 */ 4370 if (tvp != NULL) 4371 vrele(tvp); 4372 tvp = tnd.ni_vp; 4373 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4374 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4375 4376 /* 4377 * The old do_sys_rename had various consistency checks here 4378 * involving fvp and tvp. fvp is bogus already here, and tvp 4379 * will become bogus soon in any sensible file system, so the 4380 * only purpose in putting these checks here is to give lip 4381 * service to these screw cases and to acknowledge that they 4382 * exist, not actually to handle them, but here you go 4383 * anyway... 4384 */ 4385 4386 /* 4387 * Acknowledge that directories and non-directories aren't 4388 * suposed to mix. 4389 */ 4390 if (tvp != NULL) { 4391 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4392 error = ENOTDIR; 4393 goto abort3; 4394 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4395 error = EISDIR; 4396 goto abort3; 4397 } 4398 } 4399 4400 /* 4401 * Acknowledge some random screw case, among the dozens that 4402 * might arise. 4403 */ 4404 if (fvp == tdvp) { 4405 error = EINVAL; 4406 goto abort3; 4407 } 4408 4409 /* 4410 * Acknowledge that POSIX has a wacky screw case. 4411 * 4412 * XXX Eventually the retain flag needs to be passed on to 4413 * VOP_RENAME. 4414 */ 4415 if (fvp == tvp) { 4416 if (retain) { 4417 error = 0; 4418 goto abort3; 4419 } else if ((fdvp == tdvp) && 4420 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4421 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4422 fnd.ni_cnd.cn_namelen))) { 4423 error = 0; 4424 goto abort3; 4425 } 4426 } 4427 4428 /* 4429 * Make sure veriexec can screw us up. (But a race can screw 4430 * up veriexec, of course -- remember, fvp and (soon) tvp are 4431 * bogus.) 4432 */ 4433 #if NVERIEXEC > 0 4434 { 4435 char *f1, *f2; 4436 size_t f1_len; 4437 size_t f2_len; 4438 4439 f1_len = fnd.ni_cnd.cn_namelen + 1; 4440 f1 = kmem_alloc(f1_len, KM_SLEEP); 4441 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4442 4443 f2_len = tnd.ni_cnd.cn_namelen + 1; 4444 f2 = kmem_alloc(f2_len, KM_SLEEP); 4445 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4446 4447 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4448 4449 kmem_free(f1, f1_len); 4450 kmem_free(f2, f2_len); 4451 4452 if (error) 4453 goto abort3; 4454 } 4455 #endif /* NVERIEXEC > 0 */ 4456 4457 /* 4458 * All ready. Incant the rename vop. 4459 */ 4460 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4461 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4462 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4463 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4464 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4465 4466 /* 4467 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4468 * tdvp and tvp. But we can't assert any of that. 4469 */ 4470 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4471 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4472 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4473 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4474 4475 /* 4476 * So all we have left to do is to drop the rename lock and 4477 * destroy the pathbufs. 4478 */ 4479 VFS_RENAMELOCK_EXIT(mp); 4480 goto out2; 4481 4482 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4483 VOP_UNLOCK(tvp); 4484 abort2: VOP_UNLOCK(tdvp); 4485 VFS_RENAMELOCK_EXIT(mp); 4486 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4487 vrele(tdvp); 4488 if (tvp != NULL) 4489 vrele(tvp); 4490 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4491 vrele(fdvp); 4492 vrele(fvp); 4493 out2: pathbuf_destroy(tpb); 4494 out1: pathbuf_destroy(fpb); 4495 out0: return error; 4496 } 4497 4498 /* 4499 * Make a directory file. 4500 */ 4501 /* ARGSUSED */ 4502 int 4503 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4504 { 4505 /* { 4506 syscallarg(const char *) path; 4507 syscallarg(int) mode; 4508 } */ 4509 4510 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4511 SCARG(uap, mode), UIO_USERSPACE); 4512 } 4513 4514 int 4515 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4516 register_t *retval) 4517 { 4518 /* { 4519 syscallarg(int) fd; 4520 syscallarg(const char *) path; 4521 syscallarg(int) mode; 4522 } */ 4523 4524 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4525 SCARG(uap, mode), UIO_USERSPACE); 4526 } 4527 4528 4529 int 4530 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4531 { 4532 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE); 4533 } 4534 4535 static int 4536 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4537 enum uio_seg seg) 4538 { 4539 struct proc *p = curlwp->l_proc; 4540 struct vnode *vp; 4541 struct vattr vattr; 4542 int error; 4543 struct pathbuf *pb; 4544 struct nameidata nd; 4545 4546 KASSERT(l != NULL || fdat == AT_FDCWD); 4547 4548 /* XXX bollocks, should pass in a pathbuf */ 4549 error = pathbuf_maybe_copyin(path, seg, &pb); 4550 if (error) { 4551 return error; 4552 } 4553 4554 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4555 4556 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4557 pathbuf_destroy(pb); 4558 return (error); 4559 } 4560 vp = nd.ni_vp; 4561 if (vp != NULL) { 4562 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4563 if (nd.ni_dvp == vp) 4564 vrele(nd.ni_dvp); 4565 else 4566 vput(nd.ni_dvp); 4567 vrele(vp); 4568 pathbuf_destroy(pb); 4569 return (EEXIST); 4570 } 4571 vattr_null(&vattr); 4572 vattr.va_type = VDIR; 4573 /* We will read cwdi->cwdi_cmask unlocked. */ 4574 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4575 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4576 if (!error) 4577 vrele(nd.ni_vp); 4578 vput(nd.ni_dvp); 4579 pathbuf_destroy(pb); 4580 return (error); 4581 } 4582 4583 /* 4584 * Remove a directory file. 4585 */ 4586 /* ARGSUSED */ 4587 int 4588 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4589 { 4590 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4591 AT_REMOVEDIR, UIO_USERSPACE); 4592 } 4593 4594 /* 4595 * Read a block of directory entries in a file system independent format. 4596 */ 4597 int 4598 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4599 { 4600 /* { 4601 syscallarg(int) fd; 4602 syscallarg(char *) buf; 4603 syscallarg(size_t) count; 4604 } */ 4605 file_t *fp; 4606 int error, done; 4607 4608 /* fd_getvnode() will use the descriptor for us */ 4609 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4610 return (error); 4611 if ((fp->f_flag & FREAD) == 0) { 4612 error = EBADF; 4613 goto out; 4614 } 4615 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4616 SCARG(uap, count), &done, l, 0, 0); 4617 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4618 *retval = done; 4619 out: 4620 fd_putfile(SCARG(uap, fd)); 4621 return (error); 4622 } 4623 4624 /* 4625 * Set the mode mask for creation of filesystem nodes. 4626 */ 4627 int 4628 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4629 { 4630 /* { 4631 syscallarg(mode_t) newmask; 4632 } */ 4633 struct proc *p = l->l_proc; 4634 struct cwdinfo *cwdi; 4635 4636 /* 4637 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4638 * important is that we serialize changes to the mask. The 4639 * rw_exit() will issue a write memory barrier on our behalf, 4640 * and force the changes out to other CPUs (as it must use an 4641 * atomic operation, draining the local CPU's store buffers). 4642 */ 4643 cwdi = p->p_cwdi; 4644 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4645 *retval = cwdi->cwdi_cmask; 4646 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4647 rw_exit(&cwdi->cwdi_lock); 4648 4649 return (0); 4650 } 4651 4652 int 4653 dorevoke(struct vnode *vp, kauth_cred_t cred) 4654 { 4655 struct vattr vattr; 4656 int error, fs_decision; 4657 4658 vn_lock(vp, LK_SHARED | LK_RETRY); 4659 error = VOP_GETATTR(vp, &vattr, cred); 4660 VOP_UNLOCK(vp); 4661 if (error != 0) 4662 return error; 4663 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4664 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4665 fs_decision); 4666 if (!error) 4667 VOP_REVOKE(vp, REVOKEALL); 4668 return (error); 4669 } 4670 4671 /* 4672 * Void all references to file by ripping underlying filesystem 4673 * away from vnode. 4674 */ 4675 /* ARGSUSED */ 4676 int 4677 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4678 { 4679 /* { 4680 syscallarg(const char *) path; 4681 } */ 4682 struct vnode *vp; 4683 int error; 4684 4685 error = namei_simple_user(SCARG(uap, path), 4686 NSM_FOLLOW_TRYEMULROOT, &vp); 4687 if (error != 0) 4688 return (error); 4689 error = dorevoke(vp, l->l_cred); 4690 vrele(vp); 4691 return (error); 4692 } 4693 4694 /* 4695 * Allocate backing store for a file, filling a hole without having to 4696 * explicitly write anything out. 4697 */ 4698 /* ARGSUSED */ 4699 int 4700 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4701 register_t *retval) 4702 { 4703 /* { 4704 syscallarg(int) fd; 4705 syscallarg(off_t) pos; 4706 syscallarg(off_t) len; 4707 } */ 4708 int fd; 4709 off_t pos, len; 4710 struct file *fp; 4711 struct vnode *vp; 4712 int error; 4713 4714 fd = SCARG(uap, fd); 4715 pos = SCARG(uap, pos); 4716 len = SCARG(uap, len); 4717 4718 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4719 return EINVAL; 4720 } 4721 4722 error = fd_getvnode(fd, &fp); 4723 if (error) { 4724 return error; 4725 } 4726 if ((fp->f_flag & FWRITE) == 0) { 4727 error = EBADF; 4728 goto fail; 4729 } 4730 vp = fp->f_vnode; 4731 4732 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4733 if (vp->v_type == VDIR) { 4734 error = EISDIR; 4735 } else { 4736 error = VOP_FALLOCATE(vp, pos, len); 4737 } 4738 VOP_UNLOCK(vp); 4739 4740 fail: 4741 fd_putfile(fd); 4742 return error; 4743 } 4744 4745 /* 4746 * Deallocate backing store for a file, creating a hole. Also used for 4747 * invoking TRIM on disks. 4748 */ 4749 /* ARGSUSED */ 4750 int 4751 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4752 register_t *retval) 4753 { 4754 /* { 4755 syscallarg(int) fd; 4756 syscallarg(off_t) pos; 4757 syscallarg(off_t) len; 4758 } */ 4759 int fd; 4760 off_t pos, len; 4761 struct file *fp; 4762 struct vnode *vp; 4763 int error; 4764 4765 fd = SCARG(uap, fd); 4766 pos = SCARG(uap, pos); 4767 len = SCARG(uap, len); 4768 4769 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4770 return EINVAL; 4771 } 4772 4773 error = fd_getvnode(fd, &fp); 4774 if (error) { 4775 return error; 4776 } 4777 if ((fp->f_flag & FWRITE) == 0) { 4778 error = EBADF; 4779 goto fail; 4780 } 4781 vp = fp->f_vnode; 4782 4783 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4784 if (vp->v_type == VDIR) { 4785 error = EISDIR; 4786 } else { 4787 error = VOP_FDISCARD(vp, pos, len); 4788 } 4789 VOP_UNLOCK(vp); 4790 4791 fail: 4792 fd_putfile(fd); 4793 return error; 4794 } 4795