1 /* $NetBSD: vfs_syscalls.c,v 1.505 2016/07/31 20:34:04 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.505 2016/07/31 20:34:04 dholland Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/proc.h> 91 #include <sys/uio.h> 92 #include <sys/kmem.h> 93 #include <sys/dirent.h> 94 #include <sys/sysctl.h> 95 #include <sys/syscallargs.h> 96 #include <sys/vfs_syscalls.h> 97 #include <sys/quota.h> 98 #include <sys/quotactl.h> 99 #include <sys/ktrace.h> 100 #ifdef FILEASSOC 101 #include <sys/fileassoc.h> 102 #endif /* FILEASSOC */ 103 #include <sys/extattr.h> 104 #include <sys/verified_exec.h> 105 #include <sys/kauth.h> 106 #include <sys/atomic.h> 107 #include <sys/module.h> 108 #include <sys/buf.h> 109 110 #include <miscfs/genfs/genfs.h> 111 #include <miscfs/specfs/specdev.h> 112 113 #include <nfs/rpcv2.h> 114 #include <nfs/nfsproto.h> 115 #include <nfs/nfs.h> 116 #include <nfs/nfs_var.h> 117 118 /* XXX this shouldn't be here */ 119 #ifndef OFF_T_MAX 120 #define OFF_T_MAX __type_max(off_t) 121 #endif 122 123 static int change_flags(struct vnode *, u_long, struct lwp *); 124 static int change_mode(struct vnode *, int, struct lwp *); 125 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 126 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 127 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 128 enum uio_seg); 129 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 130 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 131 enum uio_seg); 132 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 133 enum uio_seg, int); 134 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 135 size_t, register_t *); 136 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 137 138 static int fd_nameiat(struct lwp *, int, struct nameidata *); 139 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 140 namei_simple_flags_t, struct vnode **); 141 142 143 /* 144 * This table is used to maintain compatibility with 4.3BSD 145 * and NetBSD 0.9 mount syscalls - and possibly other systems. 146 * Note, the order is important! 147 * 148 * Do not modify this table. It should only contain filesystems 149 * supported by NetBSD 0.9 and 4.3BSD. 150 */ 151 const char * const mountcompatnames[] = { 152 NULL, /* 0 = MOUNT_NONE */ 153 MOUNT_FFS, /* 1 = MOUNT_UFS */ 154 MOUNT_NFS, /* 2 */ 155 MOUNT_MFS, /* 3 */ 156 MOUNT_MSDOS, /* 4 */ 157 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 158 MOUNT_FDESC, /* 6 */ 159 MOUNT_KERNFS, /* 7 */ 160 NULL, /* 8 = MOUNT_DEVFS */ 161 MOUNT_AFS, /* 9 */ 162 }; 163 164 const int nmountcompatnames = __arraycount(mountcompatnames); 165 166 static int 167 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 168 { 169 file_t *dfp; 170 int error; 171 172 if (fdat != AT_FDCWD) { 173 if ((error = fd_getvnode(fdat, &dfp)) != 0) 174 goto out; 175 176 NDAT(ndp, dfp->f_vnode); 177 } 178 179 error = namei(ndp); 180 181 if (fdat != AT_FDCWD) 182 fd_putfile(fdat); 183 out: 184 return error; 185 } 186 187 static int 188 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 189 namei_simple_flags_t sflags, struct vnode **vp_ret) 190 { 191 file_t *dfp; 192 struct vnode *dvp; 193 int error; 194 195 if (fdat != AT_FDCWD) { 196 if ((error = fd_getvnode(fdat, &dfp)) != 0) 197 goto out; 198 199 dvp = dfp->f_vnode; 200 } else { 201 dvp = NULL; 202 } 203 204 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 205 206 if (fdat != AT_FDCWD) 207 fd_putfile(fdat); 208 out: 209 return error; 210 } 211 212 static int 213 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 214 { 215 int error; 216 217 fp->f_flag = flags & FMASK; 218 fp->f_type = DTYPE_VNODE; 219 fp->f_ops = &vnops; 220 fp->f_vnode = vp; 221 222 if (flags & (O_EXLOCK | O_SHLOCK)) { 223 struct flock lf; 224 int type; 225 226 lf.l_whence = SEEK_SET; 227 lf.l_start = 0; 228 lf.l_len = 0; 229 if (flags & O_EXLOCK) 230 lf.l_type = F_WRLCK; 231 else 232 lf.l_type = F_RDLCK; 233 type = F_FLOCK; 234 if ((flags & FNONBLOCK) == 0) 235 type |= F_WAIT; 236 VOP_UNLOCK(vp); 237 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 238 if (error) { 239 (void) vn_close(vp, fp->f_flag, fp->f_cred); 240 fd_abort(l->l_proc, fp, indx); 241 return error; 242 } 243 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 244 atomic_or_uint(&fp->f_flag, FHASLOCK); 245 } 246 if (flags & O_CLOEXEC) 247 fd_set_exclose(l, indx, true); 248 return 0; 249 } 250 251 static int 252 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 253 void *data, size_t *data_len) 254 { 255 struct mount *mp; 256 int error = 0, saved_flags; 257 258 mp = vp->v_mount; 259 saved_flags = mp->mnt_flag; 260 261 /* We can operate only on VV_ROOT nodes. */ 262 if ((vp->v_vflag & VV_ROOT) == 0) { 263 error = EINVAL; 264 goto out; 265 } 266 267 /* 268 * We only allow the filesystem to be reloaded if it 269 * is currently mounted read-only. Additionally, we 270 * prevent read-write to read-only downgrades. 271 */ 272 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 273 (mp->mnt_flag & MNT_RDONLY) == 0 && 274 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 275 error = EOPNOTSUPP; /* Needs translation */ 276 goto out; 277 } 278 279 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 280 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 281 if (error) 282 goto out; 283 284 if (vfs_busy(mp, NULL)) { 285 error = EPERM; 286 goto out; 287 } 288 289 mutex_enter(&mp->mnt_updating); 290 291 mp->mnt_flag &= ~MNT_OP_FLAGS; 292 mp->mnt_flag |= flags & MNT_OP_FLAGS; 293 294 /* 295 * Set the mount level flags. 296 */ 297 if (flags & MNT_RDONLY) 298 mp->mnt_flag |= MNT_RDONLY; 299 else if (mp->mnt_flag & MNT_RDONLY) 300 mp->mnt_iflag |= IMNT_WANTRDWR; 301 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 302 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 303 error = VFS_MOUNT(mp, path, data, data_len); 304 305 if (error && data != NULL) { 306 int error2; 307 308 /* 309 * Update failed; let's try and see if it was an 310 * export request. For compat with 3.0 and earlier. 311 */ 312 error2 = vfs_hooks_reexport(mp, path, data); 313 314 /* 315 * Only update error code if the export request was 316 * understood but some problem occurred while 317 * processing it. 318 */ 319 if (error2 != EJUSTRETURN) 320 error = error2; 321 } 322 323 if (mp->mnt_iflag & IMNT_WANTRDWR) 324 mp->mnt_flag &= ~MNT_RDONLY; 325 if (error) 326 mp->mnt_flag = saved_flags; 327 mp->mnt_flag &= ~MNT_OP_FLAGS; 328 mp->mnt_iflag &= ~IMNT_WANTRDWR; 329 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 330 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 331 vfs_syncer_add_to_worklist(mp); 332 } else { 333 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 334 vfs_syncer_remove_from_worklist(mp); 335 } 336 mutex_exit(&mp->mnt_updating); 337 vfs_unbusy(mp, false, NULL); 338 339 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 340 (flags & MNT_EXTATTR)) { 341 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 342 NULL, 0, NULL) != 0) { 343 printf("%s: failed to start extattr, error = %d", 344 mp->mnt_stat.f_mntonname, error); 345 mp->mnt_flag &= ~MNT_EXTATTR; 346 } 347 } 348 349 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 350 !(flags & MNT_EXTATTR)) { 351 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 352 NULL, 0, NULL) != 0) { 353 printf("%s: failed to stop extattr, error = %d", 354 mp->mnt_stat.f_mntonname, error); 355 mp->mnt_flag |= MNT_RDONLY; 356 } 357 } 358 out: 359 return (error); 360 } 361 362 static int 363 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 364 struct vfsops **vfsops) 365 { 366 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 367 int error; 368 369 if (type_seg == UIO_USERSPACE) { 370 /* Copy file-system type from userspace. */ 371 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 372 } else { 373 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 374 KASSERT(error == 0); 375 } 376 377 if (error) { 378 /* 379 * Historically, filesystem types were identified by numbers. 380 * If we get an integer for the filesystem type instead of a 381 * string, we check to see if it matches one of the historic 382 * filesystem types. 383 */ 384 u_long fsindex = (u_long)fstype; 385 if (fsindex >= nmountcompatnames || 386 mountcompatnames[fsindex] == NULL) 387 return ENODEV; 388 strlcpy(fstypename, mountcompatnames[fsindex], 389 sizeof(fstypename)); 390 } 391 392 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 393 if (strcmp(fstypename, "ufs") == 0) 394 fstypename[0] = 'f'; 395 396 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 397 return 0; 398 399 /* If we can autoload a vfs module, try again */ 400 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 401 402 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 403 return 0; 404 405 return ENODEV; 406 } 407 408 static int 409 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 410 void *data, size_t *data_len) 411 { 412 struct mount *mp; 413 int error; 414 415 /* If MNT_GETARGS is specified, it should be the only flag. */ 416 if (flags & ~MNT_GETARGS) 417 return EINVAL; 418 419 mp = vp->v_mount; 420 421 /* XXX: probably some notion of "can see" here if we want isolation. */ 422 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 423 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 424 if (error) 425 return error; 426 427 if ((vp->v_vflag & VV_ROOT) == 0) 428 return EINVAL; 429 430 if (vfs_busy(mp, NULL)) 431 return EPERM; 432 433 mutex_enter(&mp->mnt_updating); 434 mp->mnt_flag &= ~MNT_OP_FLAGS; 435 mp->mnt_flag |= MNT_GETARGS; 436 error = VFS_MOUNT(mp, path, data, data_len); 437 mp->mnt_flag &= ~MNT_OP_FLAGS; 438 mutex_exit(&mp->mnt_updating); 439 440 vfs_unbusy(mp, false, NULL); 441 return (error); 442 } 443 444 int 445 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 446 { 447 /* { 448 syscallarg(const char *) type; 449 syscallarg(const char *) path; 450 syscallarg(int) flags; 451 syscallarg(void *) data; 452 syscallarg(size_t) data_len; 453 } */ 454 455 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path), 456 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 457 SCARG(uap, data_len), retval); 458 } 459 460 int 461 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 462 const char *path, int flags, void *data, enum uio_seg data_seg, 463 size_t data_len, register_t *retval) 464 { 465 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 466 struct vnode *vp; 467 void *data_buf = data; 468 bool vfsopsrele = false; 469 size_t alloc_sz = 0; 470 int error; 471 472 /* 473 * Get vnode to be covered 474 */ 475 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 476 if (error != 0) { 477 vp = NULL; 478 goto done; 479 } 480 481 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 482 vfsops = vp->v_mount->mnt_op; 483 } else { 484 /* 'type' is userspace */ 485 error = mount_get_vfsops(type, type_seg, &vfsops); 486 if (error != 0) 487 goto done; 488 vfsopsrele = true; 489 } 490 491 /* 492 * We allow data to be NULL, even for userspace. Some fs's don't need 493 * it. The others will handle NULL. 494 */ 495 if (data != NULL && data_seg == UIO_USERSPACE) { 496 if (data_len == 0) { 497 /* No length supplied, use default for filesystem */ 498 data_len = vfsops->vfs_min_mount_data; 499 500 /* 501 * Hopefully a longer buffer won't make copyin() fail. 502 * For compatibility with 3.0 and earlier. 503 */ 504 if (flags & MNT_UPDATE 505 && data_len < sizeof (struct mnt_export_args30)) 506 data_len = sizeof (struct mnt_export_args30); 507 } 508 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 509 error = EINVAL; 510 goto done; 511 } 512 alloc_sz = data_len; 513 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 514 515 /* NFS needs the buffer even for mnt_getargs .... */ 516 error = copyin(data, data_buf, data_len); 517 if (error != 0) 518 goto done; 519 } 520 521 if (flags & MNT_GETARGS) { 522 if (data_len == 0) { 523 error = EINVAL; 524 goto done; 525 } 526 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 527 if (error != 0) 528 goto done; 529 if (data_seg == UIO_USERSPACE) 530 error = copyout(data_buf, data, data_len); 531 *retval = data_len; 532 } else if (flags & MNT_UPDATE) { 533 error = mount_update(l, vp, path, flags, data_buf, &data_len); 534 } else { 535 /* Locking is handled internally in mount_domount(). */ 536 KASSERT(vfsopsrele == true); 537 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 538 &data_len); 539 vfsopsrele = false; 540 } 541 542 done: 543 if (vfsopsrele) 544 vfs_delref(vfsops); 545 if (vp != NULL) { 546 vrele(vp); 547 } 548 if (data_buf != data) 549 kmem_free(data_buf, alloc_sz); 550 return (error); 551 } 552 553 /* 554 * Unmount a file system. 555 * 556 * Note: unmount takes a path to the vnode mounted on as argument, 557 * not special file (as before). 558 */ 559 /* ARGSUSED */ 560 int 561 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 562 { 563 /* { 564 syscallarg(const char *) path; 565 syscallarg(int) flags; 566 } */ 567 struct vnode *vp; 568 struct mount *mp; 569 int error; 570 struct pathbuf *pb; 571 struct nameidata nd; 572 573 error = pathbuf_copyin(SCARG(uap, path), &pb); 574 if (error) { 575 return error; 576 } 577 578 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 579 if ((error = namei(&nd)) != 0) { 580 pathbuf_destroy(pb); 581 return error; 582 } 583 vp = nd.ni_vp; 584 pathbuf_destroy(pb); 585 586 mp = vp->v_mount; 587 atomic_inc_uint(&mp->mnt_refcnt); 588 VOP_UNLOCK(vp); 589 590 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 591 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 592 if (error) { 593 vrele(vp); 594 vfs_destroy(mp); 595 return (error); 596 } 597 598 /* 599 * Don't allow unmounting the root file system. 600 */ 601 if (mp->mnt_flag & MNT_ROOTFS) { 602 vrele(vp); 603 vfs_destroy(mp); 604 return (EINVAL); 605 } 606 607 /* 608 * Must be the root of the filesystem 609 */ 610 if ((vp->v_vflag & VV_ROOT) == 0) { 611 vrele(vp); 612 vfs_destroy(mp); 613 return (EINVAL); 614 } 615 616 vrele(vp); 617 error = dounmount(mp, SCARG(uap, flags), l); 618 vfs_destroy(mp); 619 return error; 620 } 621 622 /* 623 * Sync each mounted filesystem. 624 */ 625 #ifdef DEBUG 626 int syncprt = 0; 627 struct ctldebug debug0 = { "syncprt", &syncprt }; 628 #endif 629 630 void 631 do_sys_sync(struct lwp *l) 632 { 633 struct mount *mp, *nmp; 634 int asyncflag; 635 636 mutex_enter(&mountlist_lock); 637 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 638 if (vfs_busy(mp, &nmp)) { 639 continue; 640 } 641 mutex_enter(&mp->mnt_updating); 642 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 643 asyncflag = mp->mnt_flag & MNT_ASYNC; 644 mp->mnt_flag &= ~MNT_ASYNC; 645 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 646 if (asyncflag) 647 mp->mnt_flag |= MNT_ASYNC; 648 } 649 mutex_exit(&mp->mnt_updating); 650 vfs_unbusy(mp, false, &nmp); 651 } 652 mutex_exit(&mountlist_lock); 653 #ifdef DEBUG 654 if (syncprt) 655 vfs_bufstats(); 656 #endif /* DEBUG */ 657 } 658 659 /* ARGSUSED */ 660 int 661 sys_sync(struct lwp *l, const void *v, register_t *retval) 662 { 663 do_sys_sync(l); 664 return (0); 665 } 666 667 668 /* 669 * Access or change filesystem quotas. 670 * 671 * (this is really 14 different calls bundled into one) 672 */ 673 674 static int 675 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 676 { 677 struct quotastat info_k; 678 int error; 679 680 /* ensure any padding bytes are cleared */ 681 memset(&info_k, 0, sizeof(info_k)); 682 683 error = vfs_quotactl_stat(mp, &info_k); 684 if (error) { 685 return error; 686 } 687 688 return copyout(&info_k, info_u, sizeof(info_k)); 689 } 690 691 static int 692 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 693 struct quotaidtypestat *info_u) 694 { 695 struct quotaidtypestat info_k; 696 int error; 697 698 /* ensure any padding bytes are cleared */ 699 memset(&info_k, 0, sizeof(info_k)); 700 701 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 702 if (error) { 703 return error; 704 } 705 706 return copyout(&info_k, info_u, sizeof(info_k)); 707 } 708 709 static int 710 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 711 struct quotaobjtypestat *info_u) 712 { 713 struct quotaobjtypestat info_k; 714 int error; 715 716 /* ensure any padding bytes are cleared */ 717 memset(&info_k, 0, sizeof(info_k)); 718 719 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 720 if (error) { 721 return error; 722 } 723 724 return copyout(&info_k, info_u, sizeof(info_k)); 725 } 726 727 static int 728 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 729 struct quotaval *val_u) 730 { 731 struct quotakey key_k; 732 struct quotaval val_k; 733 int error; 734 735 /* ensure any padding bytes are cleared */ 736 memset(&val_k, 0, sizeof(val_k)); 737 738 error = copyin(key_u, &key_k, sizeof(key_k)); 739 if (error) { 740 return error; 741 } 742 743 error = vfs_quotactl_get(mp, &key_k, &val_k); 744 if (error) { 745 return error; 746 } 747 748 return copyout(&val_k, val_u, sizeof(val_k)); 749 } 750 751 static int 752 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 753 const struct quotaval *val_u) 754 { 755 struct quotakey key_k; 756 struct quotaval val_k; 757 int error; 758 759 error = copyin(key_u, &key_k, sizeof(key_k)); 760 if (error) { 761 return error; 762 } 763 764 error = copyin(val_u, &val_k, sizeof(val_k)); 765 if (error) { 766 return error; 767 } 768 769 return vfs_quotactl_put(mp, &key_k, &val_k); 770 } 771 772 static int 773 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 774 { 775 struct quotakey key_k; 776 int error; 777 778 error = copyin(key_u, &key_k, sizeof(key_k)); 779 if (error) { 780 return error; 781 } 782 783 return vfs_quotactl_del(mp, &key_k); 784 } 785 786 static int 787 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 788 { 789 struct quotakcursor cursor_k; 790 int error; 791 792 /* ensure any padding bytes are cleared */ 793 memset(&cursor_k, 0, sizeof(cursor_k)); 794 795 error = vfs_quotactl_cursoropen(mp, &cursor_k); 796 if (error) { 797 return error; 798 } 799 800 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 801 } 802 803 static int 804 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 805 { 806 struct quotakcursor cursor_k; 807 int error; 808 809 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 810 if (error) { 811 return error; 812 } 813 814 return vfs_quotactl_cursorclose(mp, &cursor_k); 815 } 816 817 static int 818 do_sys_quotactl_cursorskipidtype(struct mount *mp, 819 struct quotakcursor *cursor_u, int idtype) 820 { 821 struct quotakcursor cursor_k; 822 int error; 823 824 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 825 if (error) { 826 return error; 827 } 828 829 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 830 if (error) { 831 return error; 832 } 833 834 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 835 } 836 837 static int 838 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 839 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 840 unsigned *ret_u) 841 { 842 #define CGET_STACK_MAX 8 843 struct quotakcursor cursor_k; 844 struct quotakey stackkeys[CGET_STACK_MAX]; 845 struct quotaval stackvals[CGET_STACK_MAX]; 846 struct quotakey *keys_k; 847 struct quotaval *vals_k; 848 unsigned ret_k; 849 int error; 850 851 if (maxnum > 128) { 852 maxnum = 128; 853 } 854 855 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 856 if (error) { 857 return error; 858 } 859 860 if (maxnum <= CGET_STACK_MAX) { 861 keys_k = stackkeys; 862 vals_k = stackvals; 863 /* ensure any padding bytes are cleared */ 864 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 865 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 866 } else { 867 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 868 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 869 } 870 871 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 872 &ret_k); 873 if (error) { 874 goto fail; 875 } 876 877 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 878 if (error) { 879 goto fail; 880 } 881 882 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 883 if (error) { 884 goto fail; 885 } 886 887 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 888 if (error) { 889 goto fail; 890 } 891 892 /* do last to maximize the chance of being able to recover a failure */ 893 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 894 895 fail: 896 if (keys_k != stackkeys) { 897 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 898 } 899 if (vals_k != stackvals) { 900 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 901 } 902 return error; 903 } 904 905 static int 906 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 907 int *ret_u) 908 { 909 struct quotakcursor cursor_k; 910 int ret_k; 911 int error; 912 913 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 914 if (error) { 915 return error; 916 } 917 918 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 919 if (error) { 920 return error; 921 } 922 923 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 924 if (error) { 925 return error; 926 } 927 928 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 929 } 930 931 static int 932 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 933 { 934 struct quotakcursor cursor_k; 935 int error; 936 937 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 938 if (error) { 939 return error; 940 } 941 942 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 943 if (error) { 944 return error; 945 } 946 947 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 948 } 949 950 static int 951 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 952 { 953 char *path_k; 954 int error; 955 956 /* XXX this should probably be a struct pathbuf */ 957 path_k = PNBUF_GET(); 958 error = copyin(path_u, path_k, PATH_MAX); 959 if (error) { 960 PNBUF_PUT(path_k); 961 return error; 962 } 963 964 error = vfs_quotactl_quotaon(mp, idtype, path_k); 965 966 PNBUF_PUT(path_k); 967 return error; 968 } 969 970 static int 971 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 972 { 973 return vfs_quotactl_quotaoff(mp, idtype); 974 } 975 976 int 977 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 978 { 979 struct mount *mp; 980 struct vnode *vp; 981 int error; 982 983 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 984 if (error != 0) 985 return (error); 986 mp = vp->v_mount; 987 988 switch (args->qc_op) { 989 case QUOTACTL_STAT: 990 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 991 break; 992 case QUOTACTL_IDTYPESTAT: 993 error = do_sys_quotactl_idtypestat(mp, 994 args->u.idtypestat.qc_idtype, 995 args->u.idtypestat.qc_info); 996 break; 997 case QUOTACTL_OBJTYPESTAT: 998 error = do_sys_quotactl_objtypestat(mp, 999 args->u.objtypestat.qc_objtype, 1000 args->u.objtypestat.qc_info); 1001 break; 1002 case QUOTACTL_GET: 1003 error = do_sys_quotactl_get(mp, 1004 args->u.get.qc_key, 1005 args->u.get.qc_val); 1006 break; 1007 case QUOTACTL_PUT: 1008 error = do_sys_quotactl_put(mp, 1009 args->u.put.qc_key, 1010 args->u.put.qc_val); 1011 break; 1012 case QUOTACTL_DEL: 1013 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1014 break; 1015 case QUOTACTL_CURSOROPEN: 1016 error = do_sys_quotactl_cursoropen(mp, 1017 args->u.cursoropen.qc_cursor); 1018 break; 1019 case QUOTACTL_CURSORCLOSE: 1020 error = do_sys_quotactl_cursorclose(mp, 1021 args->u.cursorclose.qc_cursor); 1022 break; 1023 case QUOTACTL_CURSORSKIPIDTYPE: 1024 error = do_sys_quotactl_cursorskipidtype(mp, 1025 args->u.cursorskipidtype.qc_cursor, 1026 args->u.cursorskipidtype.qc_idtype); 1027 break; 1028 case QUOTACTL_CURSORGET: 1029 error = do_sys_quotactl_cursorget(mp, 1030 args->u.cursorget.qc_cursor, 1031 args->u.cursorget.qc_keys, 1032 args->u.cursorget.qc_vals, 1033 args->u.cursorget.qc_maxnum, 1034 args->u.cursorget.qc_ret); 1035 break; 1036 case QUOTACTL_CURSORATEND: 1037 error = do_sys_quotactl_cursoratend(mp, 1038 args->u.cursoratend.qc_cursor, 1039 args->u.cursoratend.qc_ret); 1040 break; 1041 case QUOTACTL_CURSORREWIND: 1042 error = do_sys_quotactl_cursorrewind(mp, 1043 args->u.cursorrewind.qc_cursor); 1044 break; 1045 case QUOTACTL_QUOTAON: 1046 error = do_sys_quotactl_quotaon(mp, 1047 args->u.quotaon.qc_idtype, 1048 args->u.quotaon.qc_quotafile); 1049 break; 1050 case QUOTACTL_QUOTAOFF: 1051 error = do_sys_quotactl_quotaoff(mp, 1052 args->u.quotaoff.qc_idtype); 1053 break; 1054 default: 1055 error = EINVAL; 1056 break; 1057 } 1058 1059 vrele(vp); 1060 return error; 1061 } 1062 1063 /* ARGSUSED */ 1064 int 1065 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1066 register_t *retval) 1067 { 1068 /* { 1069 syscallarg(const char *) path; 1070 syscallarg(struct quotactl_args *) args; 1071 } */ 1072 struct quotactl_args args; 1073 int error; 1074 1075 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1076 if (error) { 1077 return error; 1078 } 1079 1080 return do_sys_quotactl(SCARG(uap, path), &args); 1081 } 1082 1083 int 1084 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1085 int root) 1086 { 1087 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1088 int error = 0; 1089 1090 /* 1091 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1092 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1093 * overrides MNT_NOWAIT. 1094 */ 1095 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1096 (flags != MNT_WAIT && flags != 0)) { 1097 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1098 goto done; 1099 } 1100 1101 /* Get the filesystem stats now */ 1102 memset(sp, 0, sizeof(*sp)); 1103 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1104 return error; 1105 } 1106 1107 if (cwdi->cwdi_rdir == NULL) 1108 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1109 done: 1110 if (cwdi->cwdi_rdir != NULL) { 1111 size_t len; 1112 char *bp; 1113 char c; 1114 char *path = PNBUF_GET(); 1115 1116 bp = path + MAXPATHLEN; 1117 *--bp = '\0'; 1118 rw_enter(&cwdi->cwdi_lock, RW_READER); 1119 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1120 MAXPATHLEN / 2, 0, l); 1121 rw_exit(&cwdi->cwdi_lock); 1122 if (error) { 1123 PNBUF_PUT(path); 1124 return error; 1125 } 1126 len = strlen(bp); 1127 if (len != 1) { 1128 /* 1129 * for mount points that are below our root, we can see 1130 * them, so we fix up the pathname and return them. The 1131 * rest we cannot see, so we don't allow viewing the 1132 * data. 1133 */ 1134 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1135 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1136 (void)strlcpy(sp->f_mntonname, 1137 c == '\0' ? "/" : &sp->f_mntonname[len], 1138 sizeof(sp->f_mntonname)); 1139 } else { 1140 if (root) 1141 (void)strlcpy(sp->f_mntonname, "/", 1142 sizeof(sp->f_mntonname)); 1143 else 1144 error = EPERM; 1145 } 1146 } 1147 PNBUF_PUT(path); 1148 } 1149 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1150 return error; 1151 } 1152 1153 /* 1154 * Get filesystem statistics by path. 1155 */ 1156 int 1157 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1158 { 1159 struct mount *mp; 1160 int error; 1161 struct vnode *vp; 1162 1163 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1164 if (error != 0) 1165 return error; 1166 mp = vp->v_mount; 1167 error = dostatvfs(mp, sb, l, flags, 1); 1168 vrele(vp); 1169 return error; 1170 } 1171 1172 /* ARGSUSED */ 1173 int 1174 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1175 { 1176 /* { 1177 syscallarg(const char *) path; 1178 syscallarg(struct statvfs *) buf; 1179 syscallarg(int) flags; 1180 } */ 1181 struct statvfs *sb; 1182 int error; 1183 1184 sb = STATVFSBUF_GET(); 1185 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1186 if (error == 0) 1187 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1188 STATVFSBUF_PUT(sb); 1189 return error; 1190 } 1191 1192 /* 1193 * Get filesystem statistics by fd. 1194 */ 1195 int 1196 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1197 { 1198 file_t *fp; 1199 struct mount *mp; 1200 int error; 1201 1202 /* fd_getvnode() will use the descriptor for us */ 1203 if ((error = fd_getvnode(fd, &fp)) != 0) 1204 return (error); 1205 mp = fp->f_vnode->v_mount; 1206 error = dostatvfs(mp, sb, curlwp, flags, 1); 1207 fd_putfile(fd); 1208 return error; 1209 } 1210 1211 /* ARGSUSED */ 1212 int 1213 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1214 { 1215 /* { 1216 syscallarg(int) fd; 1217 syscallarg(struct statvfs *) buf; 1218 syscallarg(int) flags; 1219 } */ 1220 struct statvfs *sb; 1221 int error; 1222 1223 sb = STATVFSBUF_GET(); 1224 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1225 if (error == 0) 1226 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1227 STATVFSBUF_PUT(sb); 1228 return error; 1229 } 1230 1231 1232 /* 1233 * Get statistics on all filesystems. 1234 */ 1235 int 1236 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1237 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1238 register_t *retval) 1239 { 1240 int root = 0; 1241 struct proc *p = l->l_proc; 1242 struct mount *mp, *nmp; 1243 struct statvfs *sb; 1244 size_t count, maxcount; 1245 int error = 0; 1246 1247 sb = STATVFSBUF_GET(); 1248 maxcount = bufsize / entry_sz; 1249 mutex_enter(&mountlist_lock); 1250 count = 0; 1251 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 1252 if (vfs_busy(mp, &nmp)) { 1253 continue; 1254 } 1255 if (sfsp && count < maxcount) { 1256 error = dostatvfs(mp, sb, l, flags, 0); 1257 if (error) { 1258 vfs_unbusy(mp, false, &nmp); 1259 error = 0; 1260 continue; 1261 } 1262 error = copyfn(sb, sfsp, entry_sz); 1263 if (error) { 1264 vfs_unbusy(mp, false, NULL); 1265 goto out; 1266 } 1267 sfsp = (char *)sfsp + entry_sz; 1268 root |= strcmp(sb->f_mntonname, "/") == 0; 1269 } 1270 count++; 1271 vfs_unbusy(mp, false, &nmp); 1272 } 1273 mutex_exit(&mountlist_lock); 1274 1275 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1276 /* 1277 * fake a root entry 1278 */ 1279 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1280 sb, l, flags, 1); 1281 if (error != 0) 1282 goto out; 1283 if (sfsp) { 1284 error = copyfn(sb, sfsp, entry_sz); 1285 if (error != 0) 1286 goto out; 1287 } 1288 count++; 1289 } 1290 if (sfsp && count > maxcount) 1291 *retval = maxcount; 1292 else 1293 *retval = count; 1294 out: 1295 STATVFSBUF_PUT(sb); 1296 return error; 1297 } 1298 1299 int 1300 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1301 { 1302 /* { 1303 syscallarg(struct statvfs *) buf; 1304 syscallarg(size_t) bufsize; 1305 syscallarg(int) flags; 1306 } */ 1307 1308 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1309 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1310 } 1311 1312 /* 1313 * Change current working directory to a given file descriptor. 1314 */ 1315 /* ARGSUSED */ 1316 int 1317 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1318 { 1319 /* { 1320 syscallarg(int) fd; 1321 } */ 1322 struct proc *p = l->l_proc; 1323 struct cwdinfo *cwdi; 1324 struct vnode *vp, *tdp; 1325 struct mount *mp; 1326 file_t *fp; 1327 int error, fd; 1328 1329 /* fd_getvnode() will use the descriptor for us */ 1330 fd = SCARG(uap, fd); 1331 if ((error = fd_getvnode(fd, &fp)) != 0) 1332 return (error); 1333 vp = fp->f_vnode; 1334 1335 vref(vp); 1336 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1337 if (vp->v_type != VDIR) 1338 error = ENOTDIR; 1339 else 1340 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1341 if (error) { 1342 vput(vp); 1343 goto out; 1344 } 1345 while ((mp = vp->v_mountedhere) != NULL) { 1346 error = vfs_busy(mp, NULL); 1347 vput(vp); 1348 if (error != 0) 1349 goto out; 1350 error = VFS_ROOT(mp, &tdp); 1351 vfs_unbusy(mp, false, NULL); 1352 if (error) 1353 goto out; 1354 vp = tdp; 1355 } 1356 VOP_UNLOCK(vp); 1357 1358 /* 1359 * Disallow changing to a directory not under the process's 1360 * current root directory (if there is one). 1361 */ 1362 cwdi = p->p_cwdi; 1363 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1364 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1365 vrele(vp); 1366 error = EPERM; /* operation not permitted */ 1367 } else { 1368 vrele(cwdi->cwdi_cdir); 1369 cwdi->cwdi_cdir = vp; 1370 } 1371 rw_exit(&cwdi->cwdi_lock); 1372 1373 out: 1374 fd_putfile(fd); 1375 return (error); 1376 } 1377 1378 /* 1379 * Change this process's notion of the root directory to a given file 1380 * descriptor. 1381 */ 1382 int 1383 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1384 { 1385 struct proc *p = l->l_proc; 1386 struct vnode *vp; 1387 file_t *fp; 1388 int error, fd = SCARG(uap, fd); 1389 1390 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1391 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1392 return error; 1393 /* fd_getvnode() will use the descriptor for us */ 1394 if ((error = fd_getvnode(fd, &fp)) != 0) 1395 return error; 1396 vp = fp->f_vnode; 1397 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1398 if (vp->v_type != VDIR) 1399 error = ENOTDIR; 1400 else 1401 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1402 VOP_UNLOCK(vp); 1403 if (error) 1404 goto out; 1405 vref(vp); 1406 1407 change_root(p->p_cwdi, vp, l); 1408 1409 out: 1410 fd_putfile(fd); 1411 return (error); 1412 } 1413 1414 /* 1415 * Change current working directory (``.''). 1416 */ 1417 /* ARGSUSED */ 1418 int 1419 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1420 { 1421 /* { 1422 syscallarg(const char *) path; 1423 } */ 1424 struct proc *p = l->l_proc; 1425 struct cwdinfo *cwdi; 1426 int error; 1427 struct vnode *vp; 1428 1429 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1430 &vp, l)) != 0) 1431 return (error); 1432 cwdi = p->p_cwdi; 1433 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1434 vrele(cwdi->cwdi_cdir); 1435 cwdi->cwdi_cdir = vp; 1436 rw_exit(&cwdi->cwdi_lock); 1437 return (0); 1438 } 1439 1440 /* 1441 * Change notion of root (``/'') directory. 1442 */ 1443 /* ARGSUSED */ 1444 int 1445 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1446 { 1447 /* { 1448 syscallarg(const char *) path; 1449 } */ 1450 struct proc *p = l->l_proc; 1451 int error; 1452 struct vnode *vp; 1453 1454 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1455 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1456 return (error); 1457 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1458 &vp, l)) != 0) 1459 return (error); 1460 1461 change_root(p->p_cwdi, vp, l); 1462 1463 return (0); 1464 } 1465 1466 /* 1467 * Common routine for chroot and fchroot. 1468 * NB: callers need to properly authorize the change root operation. 1469 */ 1470 void 1471 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1472 { 1473 struct proc *p = l->l_proc; 1474 kauth_cred_t ncred; 1475 1476 ncred = kauth_cred_alloc(); 1477 1478 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1479 if (cwdi->cwdi_rdir != NULL) 1480 vrele(cwdi->cwdi_rdir); 1481 cwdi->cwdi_rdir = vp; 1482 1483 /* 1484 * Prevent escaping from chroot by putting the root under 1485 * the working directory. Silently chdir to / if we aren't 1486 * already there. 1487 */ 1488 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1489 /* 1490 * XXX would be more failsafe to change directory to a 1491 * deadfs node here instead 1492 */ 1493 vrele(cwdi->cwdi_cdir); 1494 vref(vp); 1495 cwdi->cwdi_cdir = vp; 1496 } 1497 rw_exit(&cwdi->cwdi_lock); 1498 1499 /* Get a write lock on the process credential. */ 1500 proc_crmod_enter(); 1501 1502 kauth_cred_clone(p->p_cred, ncred); 1503 kauth_proc_chroot(ncred, p->p_cwdi); 1504 1505 /* Broadcast our credentials to the process and other LWPs. */ 1506 proc_crmod_leave(ncred, p->p_cred, true); 1507 } 1508 1509 /* 1510 * Common routine for chroot and chdir. 1511 * XXX "where" should be enum uio_seg 1512 */ 1513 int 1514 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1515 { 1516 struct pathbuf *pb; 1517 struct nameidata nd; 1518 int error; 1519 1520 error = pathbuf_maybe_copyin(path, where, &pb); 1521 if (error) { 1522 return error; 1523 } 1524 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1525 if ((error = namei(&nd)) != 0) { 1526 pathbuf_destroy(pb); 1527 return error; 1528 } 1529 *vpp = nd.ni_vp; 1530 pathbuf_destroy(pb); 1531 1532 if ((*vpp)->v_type != VDIR) 1533 error = ENOTDIR; 1534 else 1535 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1536 1537 if (error) 1538 vput(*vpp); 1539 else 1540 VOP_UNLOCK(*vpp); 1541 return (error); 1542 } 1543 1544 /* 1545 * Internals of sys_open - path has already been converted into a pathbuf 1546 * (so we can easily reuse this function from other parts of the kernel, 1547 * like posix_spawn post-processing). 1548 */ 1549 int 1550 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1551 int open_mode, int *fd) 1552 { 1553 struct proc *p = l->l_proc; 1554 struct cwdinfo *cwdi = p->p_cwdi; 1555 file_t *fp; 1556 struct vnode *vp; 1557 int flags, cmode; 1558 int indx, error; 1559 struct nameidata nd; 1560 1561 if (open_flags & O_SEARCH) { 1562 open_flags &= ~(int)O_SEARCH; 1563 } 1564 1565 flags = FFLAGS(open_flags); 1566 if ((flags & (FREAD | FWRITE)) == 0) 1567 return EINVAL; 1568 1569 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1570 return error; 1571 } 1572 1573 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1574 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1575 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1576 if (dvp != NULL) 1577 NDAT(&nd, dvp); 1578 1579 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1580 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1581 fd_abort(p, fp, indx); 1582 if ((error == EDUPFD || error == EMOVEFD) && 1583 l->l_dupfd >= 0 && /* XXX from fdopen */ 1584 (error = 1585 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1586 *fd = indx; 1587 return 0; 1588 } 1589 if (error == ERESTART) 1590 error = EINTR; 1591 return error; 1592 } 1593 1594 l->l_dupfd = 0; 1595 vp = nd.ni_vp; 1596 1597 if ((error = open_setfp(l, fp, vp, indx, flags))) 1598 return error; 1599 1600 VOP_UNLOCK(vp); 1601 *fd = indx; 1602 fd_affix(p, fp, indx); 1603 return 0; 1604 } 1605 1606 int 1607 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1608 { 1609 struct pathbuf *pb; 1610 int error, oflags; 1611 1612 oflags = FFLAGS(open_flags); 1613 if ((oflags & (FREAD | FWRITE)) == 0) 1614 return EINVAL; 1615 1616 pb = pathbuf_create(path); 1617 if (pb == NULL) 1618 return ENOMEM; 1619 1620 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1621 pathbuf_destroy(pb); 1622 1623 return error; 1624 } 1625 1626 /* 1627 * Check permissions, allocate an open file structure, 1628 * and call the device open routine if any. 1629 */ 1630 static int 1631 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1632 int mode, int *fd) 1633 { 1634 file_t *dfp = NULL; 1635 struct vnode *dvp = NULL; 1636 struct pathbuf *pb; 1637 int error; 1638 1639 #ifdef COMPAT_10 /* XXX: and perhaps later */ 1640 if (path == NULL) { 1641 pb = pathbuf_create("."); 1642 if (pb == NULL) 1643 return ENOMEM; 1644 } else 1645 #endif 1646 { 1647 error = pathbuf_copyin(path, &pb); 1648 if (error) 1649 return error; 1650 } 1651 1652 if (fdat != AT_FDCWD) { 1653 /* fd_getvnode() will use the descriptor for us */ 1654 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1655 goto out; 1656 1657 dvp = dfp->f_vnode; 1658 } 1659 1660 error = do_open(l, dvp, pb, flags, mode, fd); 1661 1662 if (dfp != NULL) 1663 fd_putfile(fdat); 1664 out: 1665 pathbuf_destroy(pb); 1666 return error; 1667 } 1668 1669 int 1670 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1671 { 1672 /* { 1673 syscallarg(const char *) path; 1674 syscallarg(int) flags; 1675 syscallarg(int) mode; 1676 } */ 1677 int error; 1678 int fd; 1679 1680 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1681 SCARG(uap, flags), SCARG(uap, mode), &fd); 1682 1683 if (error == 0) 1684 *retval = fd; 1685 1686 return error; 1687 } 1688 1689 int 1690 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1691 { 1692 /* { 1693 syscallarg(int) fd; 1694 syscallarg(const char *) path; 1695 syscallarg(int) oflags; 1696 syscallarg(int) mode; 1697 } */ 1698 int error; 1699 int fd; 1700 1701 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1702 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1703 1704 if (error == 0) 1705 *retval = fd; 1706 1707 return error; 1708 } 1709 1710 static void 1711 vfs__fhfree(fhandle_t *fhp) 1712 { 1713 size_t fhsize; 1714 1715 fhsize = FHANDLE_SIZE(fhp); 1716 kmem_free(fhp, fhsize); 1717 } 1718 1719 /* 1720 * vfs_composefh: compose a filehandle. 1721 */ 1722 1723 int 1724 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1725 { 1726 struct mount *mp; 1727 struct fid *fidp; 1728 int error; 1729 size_t needfhsize; 1730 size_t fidsize; 1731 1732 mp = vp->v_mount; 1733 fidp = NULL; 1734 if (*fh_size < FHANDLE_SIZE_MIN) { 1735 fidsize = 0; 1736 } else { 1737 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1738 if (fhp != NULL) { 1739 memset(fhp, 0, *fh_size); 1740 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1741 fidp = &fhp->fh_fid; 1742 } 1743 } 1744 error = VFS_VPTOFH(vp, fidp, &fidsize); 1745 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1746 if (error == 0 && *fh_size < needfhsize) { 1747 error = E2BIG; 1748 } 1749 *fh_size = needfhsize; 1750 return error; 1751 } 1752 1753 int 1754 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1755 { 1756 struct mount *mp; 1757 fhandle_t *fhp; 1758 size_t fhsize; 1759 size_t fidsize; 1760 int error; 1761 1762 mp = vp->v_mount; 1763 fidsize = 0; 1764 error = VFS_VPTOFH(vp, NULL, &fidsize); 1765 KASSERT(error != 0); 1766 if (error != E2BIG) { 1767 goto out; 1768 } 1769 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1770 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1771 if (fhp == NULL) { 1772 error = ENOMEM; 1773 goto out; 1774 } 1775 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1776 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1777 if (error == 0) { 1778 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1779 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1780 *fhpp = fhp; 1781 } else { 1782 kmem_free(fhp, fhsize); 1783 } 1784 out: 1785 return error; 1786 } 1787 1788 void 1789 vfs_composefh_free(fhandle_t *fhp) 1790 { 1791 1792 vfs__fhfree(fhp); 1793 } 1794 1795 /* 1796 * vfs_fhtovp: lookup a vnode by a filehandle. 1797 */ 1798 1799 int 1800 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1801 { 1802 struct mount *mp; 1803 int error; 1804 1805 *vpp = NULL; 1806 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1807 if (mp == NULL) { 1808 error = ESTALE; 1809 goto out; 1810 } 1811 if (mp->mnt_op->vfs_fhtovp == NULL) { 1812 error = EOPNOTSUPP; 1813 goto out; 1814 } 1815 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1816 out: 1817 return error; 1818 } 1819 1820 /* 1821 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1822 * the needed size. 1823 */ 1824 1825 int 1826 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1827 { 1828 fhandle_t *fhp; 1829 int error; 1830 1831 if (fhsize > FHANDLE_SIZE_MAX) { 1832 return EINVAL; 1833 } 1834 if (fhsize < FHANDLE_SIZE_MIN) { 1835 return EINVAL; 1836 } 1837 again: 1838 fhp = kmem_alloc(fhsize, KM_SLEEP); 1839 if (fhp == NULL) { 1840 return ENOMEM; 1841 } 1842 error = copyin(ufhp, fhp, fhsize); 1843 if (error == 0) { 1844 /* XXX this check shouldn't be here */ 1845 if (FHANDLE_SIZE(fhp) == fhsize) { 1846 *fhpp = fhp; 1847 return 0; 1848 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1849 /* 1850 * a kludge for nfsv2 padded handles. 1851 */ 1852 size_t sz; 1853 1854 sz = FHANDLE_SIZE(fhp); 1855 kmem_free(fhp, fhsize); 1856 fhsize = sz; 1857 goto again; 1858 } else { 1859 /* 1860 * userland told us wrong size. 1861 */ 1862 error = EINVAL; 1863 } 1864 } 1865 kmem_free(fhp, fhsize); 1866 return error; 1867 } 1868 1869 void 1870 vfs_copyinfh_free(fhandle_t *fhp) 1871 { 1872 1873 vfs__fhfree(fhp); 1874 } 1875 1876 /* 1877 * Get file handle system call 1878 */ 1879 int 1880 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1881 { 1882 /* { 1883 syscallarg(char *) fname; 1884 syscallarg(fhandle_t *) fhp; 1885 syscallarg(size_t *) fh_size; 1886 } */ 1887 struct vnode *vp; 1888 fhandle_t *fh; 1889 int error; 1890 struct pathbuf *pb; 1891 struct nameidata nd; 1892 size_t sz; 1893 size_t usz; 1894 1895 /* 1896 * Must be super user 1897 */ 1898 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1899 0, NULL, NULL, NULL); 1900 if (error) 1901 return (error); 1902 1903 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1904 if (error) { 1905 return error; 1906 } 1907 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1908 error = namei(&nd); 1909 if (error) { 1910 pathbuf_destroy(pb); 1911 return error; 1912 } 1913 vp = nd.ni_vp; 1914 pathbuf_destroy(pb); 1915 1916 error = vfs_composefh_alloc(vp, &fh); 1917 vput(vp); 1918 if (error != 0) { 1919 return error; 1920 } 1921 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1922 if (error != 0) { 1923 goto out; 1924 } 1925 sz = FHANDLE_SIZE(fh); 1926 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1927 if (error != 0) { 1928 goto out; 1929 } 1930 if (usz >= sz) { 1931 error = copyout(fh, SCARG(uap, fhp), sz); 1932 } else { 1933 error = E2BIG; 1934 } 1935 out: 1936 vfs_composefh_free(fh); 1937 return (error); 1938 } 1939 1940 /* 1941 * Open a file given a file handle. 1942 * 1943 * Check permissions, allocate an open file structure, 1944 * and call the device open routine if any. 1945 */ 1946 1947 int 1948 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1949 register_t *retval) 1950 { 1951 file_t *fp; 1952 struct vnode *vp = NULL; 1953 kauth_cred_t cred = l->l_cred; 1954 file_t *nfp; 1955 int indx, error; 1956 struct vattr va; 1957 fhandle_t *fh; 1958 int flags; 1959 proc_t *p; 1960 1961 p = curproc; 1962 1963 /* 1964 * Must be super user 1965 */ 1966 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1967 0, NULL, NULL, NULL))) 1968 return (error); 1969 1970 if (oflags & O_SEARCH) { 1971 oflags &= ~(int)O_SEARCH; 1972 } 1973 1974 flags = FFLAGS(oflags); 1975 if ((flags & (FREAD | FWRITE)) == 0) 1976 return (EINVAL); 1977 if ((flags & O_CREAT)) 1978 return (EINVAL); 1979 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1980 return (error); 1981 fp = nfp; 1982 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1983 if (error != 0) { 1984 goto bad; 1985 } 1986 error = vfs_fhtovp(fh, &vp); 1987 vfs_copyinfh_free(fh); 1988 if (error != 0) { 1989 goto bad; 1990 } 1991 1992 /* Now do an effective vn_open */ 1993 1994 if (vp->v_type == VSOCK) { 1995 error = EOPNOTSUPP; 1996 goto bad; 1997 } 1998 error = vn_openchk(vp, cred, flags); 1999 if (error != 0) 2000 goto bad; 2001 if (flags & O_TRUNC) { 2002 VOP_UNLOCK(vp); /* XXX */ 2003 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2004 vattr_null(&va); 2005 va.va_size = 0; 2006 error = VOP_SETATTR(vp, &va, cred); 2007 if (error) 2008 goto bad; 2009 } 2010 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2011 goto bad; 2012 if (flags & FWRITE) { 2013 mutex_enter(vp->v_interlock); 2014 vp->v_writecount++; 2015 mutex_exit(vp->v_interlock); 2016 } 2017 2018 /* done with modified vn_open, now finish what sys_open does. */ 2019 if ((error = open_setfp(l, fp, vp, indx, flags))) 2020 return error; 2021 2022 VOP_UNLOCK(vp); 2023 *retval = indx; 2024 fd_affix(p, fp, indx); 2025 return (0); 2026 2027 bad: 2028 fd_abort(p, fp, indx); 2029 if (vp != NULL) 2030 vput(vp); 2031 return (error); 2032 } 2033 2034 int 2035 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2036 { 2037 /* { 2038 syscallarg(const void *) fhp; 2039 syscallarg(size_t) fh_size; 2040 syscallarg(int) flags; 2041 } */ 2042 2043 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2044 SCARG(uap, flags), retval); 2045 } 2046 2047 int 2048 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2049 { 2050 int error; 2051 fhandle_t *fh; 2052 struct vnode *vp; 2053 2054 /* 2055 * Must be super user 2056 */ 2057 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2058 0, NULL, NULL, NULL))) 2059 return (error); 2060 2061 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2062 if (error != 0) 2063 return error; 2064 2065 error = vfs_fhtovp(fh, &vp); 2066 vfs_copyinfh_free(fh); 2067 if (error != 0) 2068 return error; 2069 2070 error = vn_stat(vp, sb); 2071 vput(vp); 2072 return error; 2073 } 2074 2075 2076 /* ARGSUSED */ 2077 int 2078 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2079 { 2080 /* { 2081 syscallarg(const void *) fhp; 2082 syscallarg(size_t) fh_size; 2083 syscallarg(struct stat *) sb; 2084 } */ 2085 struct stat sb; 2086 int error; 2087 2088 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2089 if (error) 2090 return error; 2091 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2092 } 2093 2094 int 2095 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2096 int flags) 2097 { 2098 fhandle_t *fh; 2099 struct mount *mp; 2100 struct vnode *vp; 2101 int error; 2102 2103 /* 2104 * Must be super user 2105 */ 2106 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2107 0, NULL, NULL, NULL))) 2108 return error; 2109 2110 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2111 if (error != 0) 2112 return error; 2113 2114 error = vfs_fhtovp(fh, &vp); 2115 vfs_copyinfh_free(fh); 2116 if (error != 0) 2117 return error; 2118 2119 mp = vp->v_mount; 2120 error = dostatvfs(mp, sb, l, flags, 1); 2121 vput(vp); 2122 return error; 2123 } 2124 2125 /* ARGSUSED */ 2126 int 2127 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2128 { 2129 /* { 2130 syscallarg(const void *) fhp; 2131 syscallarg(size_t) fh_size; 2132 syscallarg(struct statvfs *) buf; 2133 syscallarg(int) flags; 2134 } */ 2135 struct statvfs *sb = STATVFSBUF_GET(); 2136 int error; 2137 2138 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2139 SCARG(uap, flags)); 2140 if (error == 0) 2141 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2142 STATVFSBUF_PUT(sb); 2143 return error; 2144 } 2145 2146 /* 2147 * Create a special file. 2148 */ 2149 /* ARGSUSED */ 2150 int 2151 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2152 register_t *retval) 2153 { 2154 /* { 2155 syscallarg(const char *) path; 2156 syscallarg(mode_t) mode; 2157 syscallarg(dev_t) dev; 2158 } */ 2159 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 2160 SCARG(uap, dev), retval, UIO_USERSPACE); 2161 } 2162 2163 int 2164 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2165 register_t *retval) 2166 { 2167 /* { 2168 syscallarg(int) fd; 2169 syscallarg(const char *) path; 2170 syscallarg(mode_t) mode; 2171 syscallarg(int) pad; 2172 syscallarg(dev_t) dev; 2173 } */ 2174 2175 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2176 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE); 2177 } 2178 2179 int 2180 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2181 register_t *retval, enum uio_seg seg) 2182 { 2183 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg); 2184 } 2185 2186 int 2187 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2188 dev_t dev, register_t *retval, enum uio_seg seg) 2189 { 2190 struct proc *p = l->l_proc; 2191 struct vnode *vp; 2192 struct vattr vattr; 2193 int error, optype; 2194 struct pathbuf *pb; 2195 struct nameidata nd; 2196 const char *pathstring; 2197 2198 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2199 0, NULL, NULL, NULL)) != 0) 2200 return (error); 2201 2202 optype = VOP_MKNOD_DESCOFFSET; 2203 2204 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2205 if (error) { 2206 return error; 2207 } 2208 pathstring = pathbuf_stringcopy_get(pb); 2209 if (pathstring == NULL) { 2210 pathbuf_destroy(pb); 2211 return ENOMEM; 2212 } 2213 2214 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2215 2216 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2217 goto out; 2218 vp = nd.ni_vp; 2219 2220 if (vp != NULL) 2221 error = EEXIST; 2222 else { 2223 vattr_null(&vattr); 2224 /* We will read cwdi->cwdi_cmask unlocked. */ 2225 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2226 vattr.va_rdev = dev; 2227 2228 switch (mode & S_IFMT) { 2229 case S_IFMT: /* used by badsect to flag bad sectors */ 2230 vattr.va_type = VBAD; 2231 break; 2232 case S_IFCHR: 2233 vattr.va_type = VCHR; 2234 break; 2235 case S_IFBLK: 2236 vattr.va_type = VBLK; 2237 break; 2238 case S_IFWHT: 2239 optype = VOP_WHITEOUT_DESCOFFSET; 2240 break; 2241 case S_IFREG: 2242 #if NVERIEXEC > 0 2243 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2244 O_CREAT); 2245 #endif /* NVERIEXEC > 0 */ 2246 vattr.va_type = VREG; 2247 vattr.va_rdev = VNOVAL; 2248 optype = VOP_CREATE_DESCOFFSET; 2249 break; 2250 default: 2251 error = EINVAL; 2252 break; 2253 } 2254 } 2255 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2256 && vattr.va_rdev == VNOVAL) 2257 error = EINVAL; 2258 if (!error) { 2259 switch (optype) { 2260 case VOP_WHITEOUT_DESCOFFSET: 2261 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2262 if (error) 2263 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2264 vput(nd.ni_dvp); 2265 break; 2266 2267 case VOP_MKNOD_DESCOFFSET: 2268 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2269 &nd.ni_cnd, &vattr); 2270 if (error == 0) 2271 vrele(nd.ni_vp); 2272 vput(nd.ni_dvp); 2273 break; 2274 2275 case VOP_CREATE_DESCOFFSET: 2276 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2277 &nd.ni_cnd, &vattr); 2278 if (error == 0) 2279 vrele(nd.ni_vp); 2280 vput(nd.ni_dvp); 2281 break; 2282 } 2283 } else { 2284 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2285 if (nd.ni_dvp == vp) 2286 vrele(nd.ni_dvp); 2287 else 2288 vput(nd.ni_dvp); 2289 if (vp) 2290 vrele(vp); 2291 } 2292 out: 2293 pathbuf_stringcopy_put(pb, pathstring); 2294 pathbuf_destroy(pb); 2295 return (error); 2296 } 2297 2298 /* 2299 * Create a named pipe. 2300 */ 2301 /* ARGSUSED */ 2302 int 2303 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2304 { 2305 /* { 2306 syscallarg(const char *) path; 2307 syscallarg(int) mode; 2308 } */ 2309 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2310 } 2311 2312 int 2313 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2314 register_t *retval) 2315 { 2316 /* { 2317 syscallarg(int) fd; 2318 syscallarg(const char *) path; 2319 syscallarg(int) mode; 2320 } */ 2321 2322 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2323 SCARG(uap, mode)); 2324 } 2325 2326 static int 2327 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2328 { 2329 struct proc *p = l->l_proc; 2330 struct vattr vattr; 2331 int error; 2332 struct pathbuf *pb; 2333 struct nameidata nd; 2334 2335 error = pathbuf_copyin(path, &pb); 2336 if (error) { 2337 return error; 2338 } 2339 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2340 2341 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2342 pathbuf_destroy(pb); 2343 return error; 2344 } 2345 if (nd.ni_vp != NULL) { 2346 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2347 if (nd.ni_dvp == nd.ni_vp) 2348 vrele(nd.ni_dvp); 2349 else 2350 vput(nd.ni_dvp); 2351 vrele(nd.ni_vp); 2352 pathbuf_destroy(pb); 2353 return (EEXIST); 2354 } 2355 vattr_null(&vattr); 2356 vattr.va_type = VFIFO; 2357 /* We will read cwdi->cwdi_cmask unlocked. */ 2358 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2359 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2360 if (error == 0) 2361 vrele(nd.ni_vp); 2362 vput(nd.ni_dvp); 2363 pathbuf_destroy(pb); 2364 return (error); 2365 } 2366 2367 /* 2368 * Make a hard file link. 2369 */ 2370 /* ARGSUSED */ 2371 int 2372 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2373 const char *link, int follow, register_t *retval) 2374 { 2375 struct vnode *vp; 2376 struct pathbuf *linkpb; 2377 struct nameidata nd; 2378 namei_simple_flags_t ns_flags; 2379 int error; 2380 2381 if (follow & AT_SYMLINK_FOLLOW) 2382 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2383 else 2384 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2385 2386 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2387 if (error != 0) 2388 return (error); 2389 error = pathbuf_copyin(link, &linkpb); 2390 if (error) { 2391 goto out1; 2392 } 2393 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2394 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2395 goto out2; 2396 if (nd.ni_vp) { 2397 error = EEXIST; 2398 goto abortop; 2399 } 2400 /* Prevent hard links on directories. */ 2401 if (vp->v_type == VDIR) { 2402 error = EPERM; 2403 goto abortop; 2404 } 2405 /* Prevent cross-mount operation. */ 2406 if (nd.ni_dvp->v_mount != vp->v_mount) { 2407 error = EXDEV; 2408 goto abortop; 2409 } 2410 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2411 VOP_UNLOCK(nd.ni_dvp); 2412 vrele(nd.ni_dvp); 2413 out2: 2414 pathbuf_destroy(linkpb); 2415 out1: 2416 vrele(vp); 2417 return (error); 2418 abortop: 2419 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2420 if (nd.ni_dvp == nd.ni_vp) 2421 vrele(nd.ni_dvp); 2422 else 2423 vput(nd.ni_dvp); 2424 if (nd.ni_vp != NULL) 2425 vrele(nd.ni_vp); 2426 goto out2; 2427 } 2428 2429 int 2430 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2431 { 2432 /* { 2433 syscallarg(const char *) path; 2434 syscallarg(const char *) link; 2435 } */ 2436 const char *path = SCARG(uap, path); 2437 const char *link = SCARG(uap, link); 2438 2439 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2440 AT_SYMLINK_FOLLOW, retval); 2441 } 2442 2443 int 2444 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2445 register_t *retval) 2446 { 2447 /* { 2448 syscallarg(int) fd1; 2449 syscallarg(const char *) name1; 2450 syscallarg(int) fd2; 2451 syscallarg(const char *) name2; 2452 syscallarg(int) flags; 2453 } */ 2454 int fd1 = SCARG(uap, fd1); 2455 const char *name1 = SCARG(uap, name1); 2456 int fd2 = SCARG(uap, fd2); 2457 const char *name2 = SCARG(uap, name2); 2458 int follow; 2459 2460 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2461 2462 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2463 } 2464 2465 2466 int 2467 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2468 { 2469 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2470 } 2471 2472 static int 2473 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2474 const char *link, enum uio_seg seg) 2475 { 2476 struct proc *p = curproc; 2477 struct vattr vattr; 2478 char *path; 2479 int error; 2480 struct pathbuf *linkpb; 2481 struct nameidata nd; 2482 2483 KASSERT(l != NULL || fdat == AT_FDCWD); 2484 2485 path = PNBUF_GET(); 2486 if (seg == UIO_USERSPACE) { 2487 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2488 goto out1; 2489 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2490 goto out1; 2491 } else { 2492 KASSERT(strlen(patharg) < MAXPATHLEN); 2493 strcpy(path, patharg); 2494 linkpb = pathbuf_create(link); 2495 if (linkpb == NULL) { 2496 error = ENOMEM; 2497 goto out1; 2498 } 2499 } 2500 ktrkuser("symlink-target", path, strlen(path)); 2501 2502 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2503 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2504 goto out2; 2505 if (nd.ni_vp) { 2506 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2507 if (nd.ni_dvp == nd.ni_vp) 2508 vrele(nd.ni_dvp); 2509 else 2510 vput(nd.ni_dvp); 2511 vrele(nd.ni_vp); 2512 error = EEXIST; 2513 goto out2; 2514 } 2515 vattr_null(&vattr); 2516 vattr.va_type = VLNK; 2517 /* We will read cwdi->cwdi_cmask unlocked. */ 2518 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2519 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2520 if (error == 0) 2521 vrele(nd.ni_vp); 2522 vput(nd.ni_dvp); 2523 out2: 2524 pathbuf_destroy(linkpb); 2525 out1: 2526 PNBUF_PUT(path); 2527 return (error); 2528 } 2529 2530 /* 2531 * Make a symbolic link. 2532 */ 2533 /* ARGSUSED */ 2534 int 2535 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2536 { 2537 /* { 2538 syscallarg(const char *) path; 2539 syscallarg(const char *) link; 2540 } */ 2541 2542 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2543 UIO_USERSPACE); 2544 } 2545 2546 int 2547 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2548 register_t *retval) 2549 { 2550 /* { 2551 syscallarg(const char *) path1; 2552 syscallarg(int) fd; 2553 syscallarg(const char *) path2; 2554 } */ 2555 2556 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2557 SCARG(uap, path2), UIO_USERSPACE); 2558 } 2559 2560 /* 2561 * Delete a whiteout from the filesystem. 2562 */ 2563 /* ARGSUSED */ 2564 int 2565 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2566 { 2567 /* { 2568 syscallarg(const char *) path; 2569 } */ 2570 int error; 2571 struct pathbuf *pb; 2572 struct nameidata nd; 2573 2574 error = pathbuf_copyin(SCARG(uap, path), &pb); 2575 if (error) { 2576 return error; 2577 } 2578 2579 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2580 error = namei(&nd); 2581 if (error) { 2582 pathbuf_destroy(pb); 2583 return (error); 2584 } 2585 2586 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2587 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2588 if (nd.ni_dvp == nd.ni_vp) 2589 vrele(nd.ni_dvp); 2590 else 2591 vput(nd.ni_dvp); 2592 if (nd.ni_vp) 2593 vrele(nd.ni_vp); 2594 pathbuf_destroy(pb); 2595 return (EEXIST); 2596 } 2597 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2598 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2599 vput(nd.ni_dvp); 2600 pathbuf_destroy(pb); 2601 return (error); 2602 } 2603 2604 /* 2605 * Delete a name from the filesystem. 2606 */ 2607 /* ARGSUSED */ 2608 int 2609 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2610 { 2611 /* { 2612 syscallarg(const char *) path; 2613 } */ 2614 2615 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2616 } 2617 2618 int 2619 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2620 register_t *retval) 2621 { 2622 /* { 2623 syscallarg(int) fd; 2624 syscallarg(const char *) path; 2625 syscallarg(int) flag; 2626 } */ 2627 2628 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2629 SCARG(uap, flag), UIO_USERSPACE); 2630 } 2631 2632 int 2633 do_sys_unlink(const char *arg, enum uio_seg seg) 2634 { 2635 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2636 } 2637 2638 static int 2639 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2640 enum uio_seg seg) 2641 { 2642 struct vnode *vp; 2643 int error; 2644 struct pathbuf *pb; 2645 struct nameidata nd; 2646 const char *pathstring; 2647 2648 KASSERT(l != NULL || fdat == AT_FDCWD); 2649 2650 error = pathbuf_maybe_copyin(arg, seg, &pb); 2651 if (error) { 2652 return error; 2653 } 2654 pathstring = pathbuf_stringcopy_get(pb); 2655 if (pathstring == NULL) { 2656 pathbuf_destroy(pb); 2657 return ENOMEM; 2658 } 2659 2660 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2661 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2662 goto out; 2663 vp = nd.ni_vp; 2664 2665 /* 2666 * The root of a mounted filesystem cannot be deleted. 2667 */ 2668 if ((vp->v_vflag & VV_ROOT) != 0) { 2669 error = EBUSY; 2670 goto abort; 2671 } 2672 2673 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2674 error = EBUSY; 2675 goto abort; 2676 } 2677 2678 /* 2679 * No rmdir "." please. 2680 */ 2681 if (nd.ni_dvp == vp) { 2682 error = EINVAL; 2683 goto abort; 2684 } 2685 2686 /* 2687 * AT_REMOVEDIR is required to remove a directory 2688 */ 2689 if (vp->v_type == VDIR) { 2690 if (!(flags & AT_REMOVEDIR)) { 2691 error = EPERM; 2692 goto abort; 2693 } else { 2694 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2695 goto out; 2696 } 2697 } 2698 2699 /* 2700 * Starting here we only deal with non directories. 2701 */ 2702 if (flags & AT_REMOVEDIR) { 2703 error = ENOTDIR; 2704 goto abort; 2705 } 2706 2707 #if NVERIEXEC > 0 2708 /* Handle remove requests for veriexec entries. */ 2709 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2710 goto abort; 2711 } 2712 #endif /* NVERIEXEC > 0 */ 2713 2714 #ifdef FILEASSOC 2715 (void)fileassoc_file_delete(vp); 2716 #endif /* FILEASSOC */ 2717 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2718 goto out; 2719 2720 abort: 2721 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2722 if (nd.ni_dvp == vp) 2723 vrele(nd.ni_dvp); 2724 else 2725 vput(nd.ni_dvp); 2726 vput(vp); 2727 2728 out: 2729 pathbuf_stringcopy_put(pb, pathstring); 2730 pathbuf_destroy(pb); 2731 return (error); 2732 } 2733 2734 /* 2735 * Reposition read/write file offset. 2736 */ 2737 int 2738 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2739 { 2740 /* { 2741 syscallarg(int) fd; 2742 syscallarg(int) pad; 2743 syscallarg(off_t) offset; 2744 syscallarg(int) whence; 2745 } */ 2746 kauth_cred_t cred = l->l_cred; 2747 file_t *fp; 2748 struct vnode *vp; 2749 struct vattr vattr; 2750 off_t newoff; 2751 int error, fd; 2752 2753 fd = SCARG(uap, fd); 2754 2755 if ((fp = fd_getfile(fd)) == NULL) 2756 return (EBADF); 2757 2758 vp = fp->f_vnode; 2759 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2760 error = ESPIPE; 2761 goto out; 2762 } 2763 2764 vn_lock(vp, LK_SHARED | LK_RETRY); 2765 2766 switch (SCARG(uap, whence)) { 2767 case SEEK_CUR: 2768 newoff = fp->f_offset + SCARG(uap, offset); 2769 break; 2770 case SEEK_END: 2771 error = VOP_GETATTR(vp, &vattr, cred); 2772 if (error) { 2773 VOP_UNLOCK(vp); 2774 goto out; 2775 } 2776 newoff = SCARG(uap, offset) + vattr.va_size; 2777 break; 2778 case SEEK_SET: 2779 newoff = SCARG(uap, offset); 2780 break; 2781 default: 2782 error = EINVAL; 2783 VOP_UNLOCK(vp); 2784 goto out; 2785 } 2786 VOP_UNLOCK(vp); 2787 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2788 *(off_t *)retval = fp->f_offset = newoff; 2789 } 2790 out: 2791 fd_putfile(fd); 2792 return (error); 2793 } 2794 2795 /* 2796 * Positional read system call. 2797 */ 2798 int 2799 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2800 { 2801 /* { 2802 syscallarg(int) fd; 2803 syscallarg(void *) buf; 2804 syscallarg(size_t) nbyte; 2805 syscallarg(off_t) offset; 2806 } */ 2807 file_t *fp; 2808 struct vnode *vp; 2809 off_t offset; 2810 int error, fd = SCARG(uap, fd); 2811 2812 if ((fp = fd_getfile(fd)) == NULL) 2813 return (EBADF); 2814 2815 if ((fp->f_flag & FREAD) == 0) { 2816 fd_putfile(fd); 2817 return (EBADF); 2818 } 2819 2820 vp = fp->f_vnode; 2821 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2822 error = ESPIPE; 2823 goto out; 2824 } 2825 2826 offset = SCARG(uap, offset); 2827 2828 /* 2829 * XXX This works because no file systems actually 2830 * XXX take any action on the seek operation. 2831 */ 2832 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2833 goto out; 2834 2835 /* dofileread() will unuse the descriptor for us */ 2836 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2837 &offset, 0, retval)); 2838 2839 out: 2840 fd_putfile(fd); 2841 return (error); 2842 } 2843 2844 /* 2845 * Positional scatter read system call. 2846 */ 2847 int 2848 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2849 { 2850 /* { 2851 syscallarg(int) fd; 2852 syscallarg(const struct iovec *) iovp; 2853 syscallarg(int) iovcnt; 2854 syscallarg(off_t) offset; 2855 } */ 2856 off_t offset = SCARG(uap, offset); 2857 2858 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2859 SCARG(uap, iovcnt), &offset, 0, retval); 2860 } 2861 2862 /* 2863 * Positional write system call. 2864 */ 2865 int 2866 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2867 { 2868 /* { 2869 syscallarg(int) fd; 2870 syscallarg(const void *) buf; 2871 syscallarg(size_t) nbyte; 2872 syscallarg(off_t) offset; 2873 } */ 2874 file_t *fp; 2875 struct vnode *vp; 2876 off_t offset; 2877 int error, fd = SCARG(uap, fd); 2878 2879 if ((fp = fd_getfile(fd)) == NULL) 2880 return (EBADF); 2881 2882 if ((fp->f_flag & FWRITE) == 0) { 2883 fd_putfile(fd); 2884 return (EBADF); 2885 } 2886 2887 vp = fp->f_vnode; 2888 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2889 error = ESPIPE; 2890 goto out; 2891 } 2892 2893 offset = SCARG(uap, offset); 2894 2895 /* 2896 * XXX This works because no file systems actually 2897 * XXX take any action on the seek operation. 2898 */ 2899 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2900 goto out; 2901 2902 /* dofilewrite() will unuse the descriptor for us */ 2903 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2904 &offset, 0, retval)); 2905 2906 out: 2907 fd_putfile(fd); 2908 return (error); 2909 } 2910 2911 /* 2912 * Positional gather write system call. 2913 */ 2914 int 2915 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2916 { 2917 /* { 2918 syscallarg(int) fd; 2919 syscallarg(const struct iovec *) iovp; 2920 syscallarg(int) iovcnt; 2921 syscallarg(off_t) offset; 2922 } */ 2923 off_t offset = SCARG(uap, offset); 2924 2925 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2926 SCARG(uap, iovcnt), &offset, 0, retval); 2927 } 2928 2929 /* 2930 * Check access permissions. 2931 */ 2932 int 2933 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2934 { 2935 /* { 2936 syscallarg(const char *) path; 2937 syscallarg(int) flags; 2938 } */ 2939 2940 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2941 SCARG(uap, flags), 0); 2942 } 2943 2944 int 2945 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2946 int mode, int flags) 2947 { 2948 kauth_cred_t cred; 2949 struct vnode *vp; 2950 int error, nd_flag, vmode; 2951 struct pathbuf *pb; 2952 struct nameidata nd; 2953 2954 CTASSERT(F_OK == 0); 2955 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2956 /* nonsense mode */ 2957 return EINVAL; 2958 } 2959 2960 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2961 if (flags & AT_SYMLINK_NOFOLLOW) 2962 nd_flag &= ~FOLLOW; 2963 2964 error = pathbuf_copyin(path, &pb); 2965 if (error) 2966 return error; 2967 2968 NDINIT(&nd, LOOKUP, nd_flag, pb); 2969 2970 /* Override default credentials */ 2971 cred = kauth_cred_dup(l->l_cred); 2972 if (!(flags & AT_EACCESS)) { 2973 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2974 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2975 } 2976 nd.ni_cnd.cn_cred = cred; 2977 2978 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2979 pathbuf_destroy(pb); 2980 goto out; 2981 } 2982 vp = nd.ni_vp; 2983 pathbuf_destroy(pb); 2984 2985 /* Flags == 0 means only check for existence. */ 2986 if (mode) { 2987 vmode = 0; 2988 if (mode & R_OK) 2989 vmode |= VREAD; 2990 if (mode & W_OK) 2991 vmode |= VWRITE; 2992 if (mode & X_OK) 2993 vmode |= VEXEC; 2994 2995 error = VOP_ACCESS(vp, vmode, cred); 2996 if (!error && (vmode & VWRITE)) 2997 error = vn_writechk(vp); 2998 } 2999 vput(vp); 3000 out: 3001 kauth_cred_free(cred); 3002 return (error); 3003 } 3004 3005 int 3006 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3007 register_t *retval) 3008 { 3009 /* { 3010 syscallarg(int) fd; 3011 syscallarg(const char *) path; 3012 syscallarg(int) amode; 3013 syscallarg(int) flag; 3014 } */ 3015 3016 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3017 SCARG(uap, amode), SCARG(uap, flag)); 3018 } 3019 3020 /* 3021 * Common code for all sys_stat functions, including compat versions. 3022 */ 3023 int 3024 do_sys_stat(const char *userpath, unsigned int nd_flag, 3025 struct stat *sb) 3026 { 3027 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3028 } 3029 3030 int 3031 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3032 unsigned int nd_flag, struct stat *sb) 3033 { 3034 int error; 3035 struct pathbuf *pb; 3036 struct nameidata nd; 3037 3038 KASSERT(l != NULL || fdat == AT_FDCWD); 3039 3040 error = pathbuf_copyin(userpath, &pb); 3041 if (error) { 3042 return error; 3043 } 3044 3045 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3046 3047 error = fd_nameiat(l, fdat, &nd); 3048 if (error != 0) { 3049 pathbuf_destroy(pb); 3050 return error; 3051 } 3052 error = vn_stat(nd.ni_vp, sb); 3053 vput(nd.ni_vp); 3054 pathbuf_destroy(pb); 3055 return error; 3056 } 3057 3058 /* 3059 * Get file status; this version follows links. 3060 */ 3061 /* ARGSUSED */ 3062 int 3063 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3064 { 3065 /* { 3066 syscallarg(const char *) path; 3067 syscallarg(struct stat *) ub; 3068 } */ 3069 struct stat sb; 3070 int error; 3071 3072 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3073 if (error) 3074 return error; 3075 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3076 } 3077 3078 /* 3079 * Get file status; this version does not follow links. 3080 */ 3081 /* ARGSUSED */ 3082 int 3083 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3084 { 3085 /* { 3086 syscallarg(const char *) path; 3087 syscallarg(struct stat *) ub; 3088 } */ 3089 struct stat sb; 3090 int error; 3091 3092 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3093 if (error) 3094 return error; 3095 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3096 } 3097 3098 int 3099 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3100 register_t *retval) 3101 { 3102 /* { 3103 syscallarg(int) fd; 3104 syscallarg(const char *) path; 3105 syscallarg(struct stat *) buf; 3106 syscallarg(int) flag; 3107 } */ 3108 unsigned int nd_flag; 3109 struct stat sb; 3110 int error; 3111 3112 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3113 nd_flag = NOFOLLOW; 3114 else 3115 nd_flag = FOLLOW; 3116 3117 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3118 &sb); 3119 if (error) 3120 return error; 3121 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3122 } 3123 3124 /* 3125 * Get configurable pathname variables. 3126 */ 3127 /* ARGSUSED */ 3128 int 3129 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3130 { 3131 /* { 3132 syscallarg(const char *) path; 3133 syscallarg(int) name; 3134 } */ 3135 int error; 3136 struct pathbuf *pb; 3137 struct nameidata nd; 3138 3139 error = pathbuf_copyin(SCARG(uap, path), &pb); 3140 if (error) { 3141 return error; 3142 } 3143 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3144 if ((error = namei(&nd)) != 0) { 3145 pathbuf_destroy(pb); 3146 return (error); 3147 } 3148 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3149 vput(nd.ni_vp); 3150 pathbuf_destroy(pb); 3151 return (error); 3152 } 3153 3154 /* 3155 * Return target name of a symbolic link. 3156 */ 3157 /* ARGSUSED */ 3158 int 3159 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3160 register_t *retval) 3161 { 3162 /* { 3163 syscallarg(const char *) path; 3164 syscallarg(char *) buf; 3165 syscallarg(size_t) count; 3166 } */ 3167 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3168 SCARG(uap, buf), SCARG(uap, count), retval); 3169 } 3170 3171 static int 3172 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3173 size_t count, register_t *retval) 3174 { 3175 struct vnode *vp; 3176 struct iovec aiov; 3177 struct uio auio; 3178 int error; 3179 struct pathbuf *pb; 3180 struct nameidata nd; 3181 3182 error = pathbuf_copyin(path, &pb); 3183 if (error) { 3184 return error; 3185 } 3186 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3187 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3188 pathbuf_destroy(pb); 3189 return error; 3190 } 3191 vp = nd.ni_vp; 3192 pathbuf_destroy(pb); 3193 if (vp->v_type != VLNK) 3194 error = EINVAL; 3195 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3196 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3197 aiov.iov_base = buf; 3198 aiov.iov_len = count; 3199 auio.uio_iov = &aiov; 3200 auio.uio_iovcnt = 1; 3201 auio.uio_offset = 0; 3202 auio.uio_rw = UIO_READ; 3203 KASSERT(l == curlwp); 3204 auio.uio_vmspace = l->l_proc->p_vmspace; 3205 auio.uio_resid = count; 3206 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3207 *retval = count - auio.uio_resid; 3208 } 3209 vput(vp); 3210 return (error); 3211 } 3212 3213 int 3214 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3215 register_t *retval) 3216 { 3217 /* { 3218 syscallarg(int) fd; 3219 syscallarg(const char *) path; 3220 syscallarg(char *) buf; 3221 syscallarg(size_t) bufsize; 3222 } */ 3223 3224 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3225 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3226 } 3227 3228 /* 3229 * Change flags of a file given a path name. 3230 */ 3231 /* ARGSUSED */ 3232 int 3233 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3234 { 3235 /* { 3236 syscallarg(const char *) path; 3237 syscallarg(u_long) flags; 3238 } */ 3239 struct vnode *vp; 3240 int error; 3241 3242 error = namei_simple_user(SCARG(uap, path), 3243 NSM_FOLLOW_TRYEMULROOT, &vp); 3244 if (error != 0) 3245 return (error); 3246 error = change_flags(vp, SCARG(uap, flags), l); 3247 vput(vp); 3248 return (error); 3249 } 3250 3251 /* 3252 * Change flags of a file given a file descriptor. 3253 */ 3254 /* ARGSUSED */ 3255 int 3256 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3257 { 3258 /* { 3259 syscallarg(int) fd; 3260 syscallarg(u_long) flags; 3261 } */ 3262 struct vnode *vp; 3263 file_t *fp; 3264 int error; 3265 3266 /* fd_getvnode() will use the descriptor for us */ 3267 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3268 return (error); 3269 vp = fp->f_vnode; 3270 error = change_flags(vp, SCARG(uap, flags), l); 3271 VOP_UNLOCK(vp); 3272 fd_putfile(SCARG(uap, fd)); 3273 return (error); 3274 } 3275 3276 /* 3277 * Change flags of a file given a path name; this version does 3278 * not follow links. 3279 */ 3280 int 3281 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3282 { 3283 /* { 3284 syscallarg(const char *) path; 3285 syscallarg(u_long) flags; 3286 } */ 3287 struct vnode *vp; 3288 int error; 3289 3290 error = namei_simple_user(SCARG(uap, path), 3291 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3292 if (error != 0) 3293 return (error); 3294 error = change_flags(vp, SCARG(uap, flags), l); 3295 vput(vp); 3296 return (error); 3297 } 3298 3299 /* 3300 * Common routine to change flags of a file. 3301 */ 3302 int 3303 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3304 { 3305 struct vattr vattr; 3306 int error; 3307 3308 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3309 3310 vattr_null(&vattr); 3311 vattr.va_flags = flags; 3312 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3313 3314 return (error); 3315 } 3316 3317 /* 3318 * Change mode of a file given path name; this version follows links. 3319 */ 3320 /* ARGSUSED */ 3321 int 3322 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3323 { 3324 /* { 3325 syscallarg(const char *) path; 3326 syscallarg(int) mode; 3327 } */ 3328 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3329 SCARG(uap, mode), 0); 3330 } 3331 3332 int 3333 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3334 { 3335 int error; 3336 struct vnode *vp; 3337 namei_simple_flags_t ns_flag; 3338 3339 if (flags & AT_SYMLINK_NOFOLLOW) 3340 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3341 else 3342 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3343 3344 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3345 if (error != 0) 3346 return error; 3347 3348 error = change_mode(vp, mode, l); 3349 3350 vrele(vp); 3351 3352 return (error); 3353 } 3354 3355 /* 3356 * Change mode of a file given a file descriptor. 3357 */ 3358 /* ARGSUSED */ 3359 int 3360 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3361 { 3362 /* { 3363 syscallarg(int) fd; 3364 syscallarg(int) mode; 3365 } */ 3366 file_t *fp; 3367 int error; 3368 3369 /* fd_getvnode() will use the descriptor for us */ 3370 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3371 return (error); 3372 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3373 fd_putfile(SCARG(uap, fd)); 3374 return (error); 3375 } 3376 3377 int 3378 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3379 register_t *retval) 3380 { 3381 /* { 3382 syscallarg(int) fd; 3383 syscallarg(const char *) path; 3384 syscallarg(int) mode; 3385 syscallarg(int) flag; 3386 } */ 3387 3388 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3389 SCARG(uap, mode), SCARG(uap, flag)); 3390 } 3391 3392 /* 3393 * Change mode of a file given path name; this version does not follow links. 3394 */ 3395 /* ARGSUSED */ 3396 int 3397 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3398 { 3399 /* { 3400 syscallarg(const char *) path; 3401 syscallarg(int) mode; 3402 } */ 3403 int error; 3404 struct vnode *vp; 3405 3406 error = namei_simple_user(SCARG(uap, path), 3407 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3408 if (error != 0) 3409 return (error); 3410 3411 error = change_mode(vp, SCARG(uap, mode), l); 3412 3413 vrele(vp); 3414 return (error); 3415 } 3416 3417 /* 3418 * Common routine to set mode given a vnode. 3419 */ 3420 static int 3421 change_mode(struct vnode *vp, int mode, struct lwp *l) 3422 { 3423 struct vattr vattr; 3424 int error; 3425 3426 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3427 vattr_null(&vattr); 3428 vattr.va_mode = mode & ALLPERMS; 3429 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3430 VOP_UNLOCK(vp); 3431 return (error); 3432 } 3433 3434 /* 3435 * Set ownership given a path name; this version follows links. 3436 */ 3437 /* ARGSUSED */ 3438 int 3439 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3440 { 3441 /* { 3442 syscallarg(const char *) path; 3443 syscallarg(uid_t) uid; 3444 syscallarg(gid_t) gid; 3445 } */ 3446 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3447 SCARG(uap, gid), 0); 3448 } 3449 3450 int 3451 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3452 gid_t gid, int flags) 3453 { 3454 int error; 3455 struct vnode *vp; 3456 namei_simple_flags_t ns_flag; 3457 3458 if (flags & AT_SYMLINK_NOFOLLOW) 3459 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3460 else 3461 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3462 3463 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3464 if (error != 0) 3465 return error; 3466 3467 error = change_owner(vp, uid, gid, l, 0); 3468 3469 vrele(vp); 3470 3471 return (error); 3472 } 3473 3474 /* 3475 * Set ownership given a path name; this version follows links. 3476 * Provides POSIX semantics. 3477 */ 3478 /* ARGSUSED */ 3479 int 3480 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3481 { 3482 /* { 3483 syscallarg(const char *) path; 3484 syscallarg(uid_t) uid; 3485 syscallarg(gid_t) gid; 3486 } */ 3487 int error; 3488 struct vnode *vp; 3489 3490 error = namei_simple_user(SCARG(uap, path), 3491 NSM_FOLLOW_TRYEMULROOT, &vp); 3492 if (error != 0) 3493 return (error); 3494 3495 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3496 3497 vrele(vp); 3498 return (error); 3499 } 3500 3501 /* 3502 * Set ownership given a file descriptor. 3503 */ 3504 /* ARGSUSED */ 3505 int 3506 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3507 { 3508 /* { 3509 syscallarg(int) fd; 3510 syscallarg(uid_t) uid; 3511 syscallarg(gid_t) gid; 3512 } */ 3513 int error; 3514 file_t *fp; 3515 3516 /* fd_getvnode() will use the descriptor for us */ 3517 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3518 return (error); 3519 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3520 l, 0); 3521 fd_putfile(SCARG(uap, fd)); 3522 return (error); 3523 } 3524 3525 int 3526 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3527 register_t *retval) 3528 { 3529 /* { 3530 syscallarg(int) fd; 3531 syscallarg(const char *) path; 3532 syscallarg(uid_t) owner; 3533 syscallarg(gid_t) group; 3534 syscallarg(int) flag; 3535 } */ 3536 3537 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3538 SCARG(uap, owner), SCARG(uap, group), 3539 SCARG(uap, flag)); 3540 } 3541 3542 /* 3543 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3544 */ 3545 /* ARGSUSED */ 3546 int 3547 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3548 { 3549 /* { 3550 syscallarg(int) fd; 3551 syscallarg(uid_t) uid; 3552 syscallarg(gid_t) gid; 3553 } */ 3554 int error; 3555 file_t *fp; 3556 3557 /* fd_getvnode() will use the descriptor for us */ 3558 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3559 return (error); 3560 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3561 l, 1); 3562 fd_putfile(SCARG(uap, fd)); 3563 return (error); 3564 } 3565 3566 /* 3567 * Set ownership given a path name; this version does not follow links. 3568 */ 3569 /* ARGSUSED */ 3570 int 3571 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3572 { 3573 /* { 3574 syscallarg(const char *) path; 3575 syscallarg(uid_t) uid; 3576 syscallarg(gid_t) gid; 3577 } */ 3578 int error; 3579 struct vnode *vp; 3580 3581 error = namei_simple_user(SCARG(uap, path), 3582 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3583 if (error != 0) 3584 return (error); 3585 3586 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3587 3588 vrele(vp); 3589 return (error); 3590 } 3591 3592 /* 3593 * Set ownership given a path name; this version does not follow links. 3594 * Provides POSIX/XPG semantics. 3595 */ 3596 /* ARGSUSED */ 3597 int 3598 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3599 { 3600 /* { 3601 syscallarg(const char *) path; 3602 syscallarg(uid_t) uid; 3603 syscallarg(gid_t) gid; 3604 } */ 3605 int error; 3606 struct vnode *vp; 3607 3608 error = namei_simple_user(SCARG(uap, path), 3609 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3610 if (error != 0) 3611 return (error); 3612 3613 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3614 3615 vrele(vp); 3616 return (error); 3617 } 3618 3619 /* 3620 * Common routine to set ownership given a vnode. 3621 */ 3622 static int 3623 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3624 int posix_semantics) 3625 { 3626 struct vattr vattr; 3627 mode_t newmode; 3628 int error; 3629 3630 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3631 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3632 goto out; 3633 3634 #define CHANGED(x) ((int)(x) != -1) 3635 newmode = vattr.va_mode; 3636 if (posix_semantics) { 3637 /* 3638 * POSIX/XPG semantics: if the caller is not the super-user, 3639 * clear set-user-id and set-group-id bits. Both POSIX and 3640 * the XPG consider the behaviour for calls by the super-user 3641 * implementation-defined; we leave the set-user-id and set- 3642 * group-id settings intact in that case. 3643 */ 3644 if (vattr.va_mode & S_ISUID) { 3645 if (kauth_authorize_vnode(l->l_cred, 3646 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3647 newmode &= ~S_ISUID; 3648 } 3649 if (vattr.va_mode & S_ISGID) { 3650 if (kauth_authorize_vnode(l->l_cred, 3651 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3652 newmode &= ~S_ISGID; 3653 } 3654 } else { 3655 /* 3656 * NetBSD semantics: when changing owner and/or group, 3657 * clear the respective bit(s). 3658 */ 3659 if (CHANGED(uid)) 3660 newmode &= ~S_ISUID; 3661 if (CHANGED(gid)) 3662 newmode &= ~S_ISGID; 3663 } 3664 /* Update va_mode iff altered. */ 3665 if (vattr.va_mode == newmode) 3666 newmode = VNOVAL; 3667 3668 vattr_null(&vattr); 3669 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3670 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3671 vattr.va_mode = newmode; 3672 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3673 #undef CHANGED 3674 3675 out: 3676 VOP_UNLOCK(vp); 3677 return (error); 3678 } 3679 3680 /* 3681 * Set the access and modification times given a path name; this 3682 * version follows links. 3683 */ 3684 /* ARGSUSED */ 3685 int 3686 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3687 register_t *retval) 3688 { 3689 /* { 3690 syscallarg(const char *) path; 3691 syscallarg(const struct timeval *) tptr; 3692 } */ 3693 3694 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3695 SCARG(uap, tptr), UIO_USERSPACE); 3696 } 3697 3698 /* 3699 * Set the access and modification times given a file descriptor. 3700 */ 3701 /* ARGSUSED */ 3702 int 3703 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3704 register_t *retval) 3705 { 3706 /* { 3707 syscallarg(int) fd; 3708 syscallarg(const struct timeval *) tptr; 3709 } */ 3710 int error; 3711 file_t *fp; 3712 3713 /* fd_getvnode() will use the descriptor for us */ 3714 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3715 return (error); 3716 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3717 UIO_USERSPACE); 3718 fd_putfile(SCARG(uap, fd)); 3719 return (error); 3720 } 3721 3722 int 3723 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3724 register_t *retval) 3725 { 3726 /* { 3727 syscallarg(int) fd; 3728 syscallarg(const struct timespec *) tptr; 3729 } */ 3730 int error; 3731 file_t *fp; 3732 3733 /* fd_getvnode() will use the descriptor for us */ 3734 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3735 return (error); 3736 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3737 SCARG(uap, tptr), UIO_USERSPACE); 3738 fd_putfile(SCARG(uap, fd)); 3739 return (error); 3740 } 3741 3742 /* 3743 * Set the access and modification times given a path name; this 3744 * version does not follow links. 3745 */ 3746 int 3747 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3748 register_t *retval) 3749 { 3750 /* { 3751 syscallarg(const char *) path; 3752 syscallarg(const struct timeval *) tptr; 3753 } */ 3754 3755 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3756 SCARG(uap, tptr), UIO_USERSPACE); 3757 } 3758 3759 int 3760 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3761 register_t *retval) 3762 { 3763 /* { 3764 syscallarg(int) fd; 3765 syscallarg(const char *) path; 3766 syscallarg(const struct timespec *) tptr; 3767 syscallarg(int) flag; 3768 } */ 3769 int follow; 3770 const struct timespec *tptr; 3771 int error; 3772 3773 tptr = SCARG(uap, tptr); 3774 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3775 3776 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3777 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3778 3779 return error; 3780 } 3781 3782 /* 3783 * Common routine to set access and modification times given a vnode. 3784 */ 3785 int 3786 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3787 const struct timespec *tptr, enum uio_seg seg) 3788 { 3789 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3790 } 3791 3792 int 3793 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3794 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3795 { 3796 struct vattr vattr; 3797 int error, dorele = 0; 3798 namei_simple_flags_t sflags; 3799 bool vanull, setbirthtime; 3800 struct timespec ts[2]; 3801 3802 KASSERT(l != NULL || fdat == AT_FDCWD); 3803 3804 /* 3805 * I have checked all callers and they pass either FOLLOW, 3806 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3807 * is 0. More to the point, they don't pass anything else. 3808 * Let's keep it that way at least until the namei interfaces 3809 * are fully sanitized. 3810 */ 3811 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3812 sflags = (flag == FOLLOW) ? 3813 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3814 3815 if (tptr == NULL) { 3816 vanull = true; 3817 nanotime(&ts[0]); 3818 ts[1] = ts[0]; 3819 } else { 3820 vanull = false; 3821 if (seg != UIO_SYSSPACE) { 3822 error = copyin(tptr, ts, sizeof (ts)); 3823 if (error != 0) 3824 return error; 3825 } else { 3826 ts[0] = tptr[0]; 3827 ts[1] = tptr[1]; 3828 } 3829 } 3830 3831 if (ts[0].tv_nsec == UTIME_NOW) { 3832 nanotime(&ts[0]); 3833 if (ts[1].tv_nsec == UTIME_NOW) { 3834 vanull = true; 3835 ts[1] = ts[0]; 3836 } 3837 } else if (ts[1].tv_nsec == UTIME_NOW) 3838 nanotime(&ts[1]); 3839 3840 if (vp == NULL) { 3841 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3842 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3843 if (error != 0) 3844 return error; 3845 dorele = 1; 3846 } 3847 3848 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3849 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3850 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3851 vattr_null(&vattr); 3852 3853 if (ts[0].tv_nsec != UTIME_OMIT) 3854 vattr.va_atime = ts[0]; 3855 3856 if (ts[1].tv_nsec != UTIME_OMIT) { 3857 vattr.va_mtime = ts[1]; 3858 if (setbirthtime) 3859 vattr.va_birthtime = ts[1]; 3860 } 3861 3862 if (vanull) 3863 vattr.va_vaflags |= VA_UTIMES_NULL; 3864 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3865 VOP_UNLOCK(vp); 3866 3867 if (dorele != 0) 3868 vrele(vp); 3869 3870 return error; 3871 } 3872 3873 int 3874 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3875 const struct timeval *tptr, enum uio_seg seg) 3876 { 3877 struct timespec ts[2]; 3878 struct timespec *tsptr = NULL; 3879 int error; 3880 3881 if (tptr != NULL) { 3882 struct timeval tv[2]; 3883 3884 if (seg != UIO_SYSSPACE) { 3885 error = copyin(tptr, tv, sizeof (tv)); 3886 if (error != 0) 3887 return error; 3888 tptr = tv; 3889 } 3890 3891 if ((tv[0].tv_usec == UTIME_NOW) || 3892 (tv[0].tv_usec == UTIME_OMIT)) 3893 ts[0].tv_nsec = tv[0].tv_usec; 3894 else 3895 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3896 3897 if ((tv[1].tv_usec == UTIME_NOW) || 3898 (tv[1].tv_usec == UTIME_OMIT)) 3899 ts[1].tv_nsec = tv[1].tv_usec; 3900 else 3901 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3902 3903 tsptr = &ts[0]; 3904 } 3905 3906 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3907 } 3908 3909 /* 3910 * Truncate a file given its path name. 3911 */ 3912 /* ARGSUSED */ 3913 int 3914 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3915 { 3916 /* { 3917 syscallarg(const char *) path; 3918 syscallarg(int) pad; 3919 syscallarg(off_t) length; 3920 } */ 3921 struct vnode *vp; 3922 struct vattr vattr; 3923 int error; 3924 3925 if (SCARG(uap, length) < 0) 3926 return EINVAL; 3927 3928 error = namei_simple_user(SCARG(uap, path), 3929 NSM_FOLLOW_TRYEMULROOT, &vp); 3930 if (error != 0) 3931 return (error); 3932 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3933 if (vp->v_type == VDIR) 3934 error = EISDIR; 3935 else if ((error = vn_writechk(vp)) == 0 && 3936 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3937 vattr_null(&vattr); 3938 vattr.va_size = SCARG(uap, length); 3939 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3940 } 3941 vput(vp); 3942 return (error); 3943 } 3944 3945 /* 3946 * Truncate a file given a file descriptor. 3947 */ 3948 /* ARGSUSED */ 3949 int 3950 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3951 { 3952 /* { 3953 syscallarg(int) fd; 3954 syscallarg(int) pad; 3955 syscallarg(off_t) length; 3956 } */ 3957 struct vattr vattr; 3958 struct vnode *vp; 3959 file_t *fp; 3960 int error; 3961 3962 if (SCARG(uap, length) < 0) 3963 return EINVAL; 3964 3965 /* fd_getvnode() will use the descriptor for us */ 3966 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3967 return (error); 3968 if ((fp->f_flag & FWRITE) == 0) { 3969 error = EINVAL; 3970 goto out; 3971 } 3972 vp = fp->f_vnode; 3973 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3974 if (vp->v_type == VDIR) 3975 error = EISDIR; 3976 else if ((error = vn_writechk(vp)) == 0) { 3977 vattr_null(&vattr); 3978 vattr.va_size = SCARG(uap, length); 3979 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3980 } 3981 VOP_UNLOCK(vp); 3982 out: 3983 fd_putfile(SCARG(uap, fd)); 3984 return (error); 3985 } 3986 3987 /* 3988 * Sync an open file. 3989 */ 3990 /* ARGSUSED */ 3991 int 3992 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3993 { 3994 /* { 3995 syscallarg(int) fd; 3996 } */ 3997 struct vnode *vp; 3998 file_t *fp; 3999 int error; 4000 4001 /* fd_getvnode() will use the descriptor for us */ 4002 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4003 return (error); 4004 vp = fp->f_vnode; 4005 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4006 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4007 VOP_UNLOCK(vp); 4008 fd_putfile(SCARG(uap, fd)); 4009 return (error); 4010 } 4011 4012 /* 4013 * Sync a range of file data. API modeled after that found in AIX. 4014 * 4015 * FDATASYNC indicates that we need only save enough metadata to be able 4016 * to re-read the written data. Note we duplicate AIX's requirement that 4017 * the file be open for writing. 4018 */ 4019 /* ARGSUSED */ 4020 int 4021 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4022 { 4023 /* { 4024 syscallarg(int) fd; 4025 syscallarg(int) flags; 4026 syscallarg(off_t) start; 4027 syscallarg(off_t) length; 4028 } */ 4029 struct vnode *vp; 4030 file_t *fp; 4031 int flags, nflags; 4032 off_t s, e, len; 4033 int error; 4034 4035 /* fd_getvnode() will use the descriptor for us */ 4036 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4037 return (error); 4038 4039 if ((fp->f_flag & FWRITE) == 0) { 4040 error = EBADF; 4041 goto out; 4042 } 4043 4044 flags = SCARG(uap, flags); 4045 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4046 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4047 error = EINVAL; 4048 goto out; 4049 } 4050 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4051 if (flags & FDATASYNC) 4052 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4053 else 4054 nflags = FSYNC_WAIT; 4055 if (flags & FDISKSYNC) 4056 nflags |= FSYNC_CACHE; 4057 4058 len = SCARG(uap, length); 4059 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4060 if (len) { 4061 s = SCARG(uap, start); 4062 e = s + len; 4063 if (e < s) { 4064 error = EINVAL; 4065 goto out; 4066 } 4067 } else { 4068 e = 0; 4069 s = 0; 4070 } 4071 4072 vp = fp->f_vnode; 4073 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4074 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4075 VOP_UNLOCK(vp); 4076 out: 4077 fd_putfile(SCARG(uap, fd)); 4078 return (error); 4079 } 4080 4081 /* 4082 * Sync the data of an open file. 4083 */ 4084 /* ARGSUSED */ 4085 int 4086 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4087 { 4088 /* { 4089 syscallarg(int) fd; 4090 } */ 4091 struct vnode *vp; 4092 file_t *fp; 4093 int error; 4094 4095 /* fd_getvnode() will use the descriptor for us */ 4096 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4097 return (error); 4098 if ((fp->f_flag & FWRITE) == 0) { 4099 fd_putfile(SCARG(uap, fd)); 4100 return (EBADF); 4101 } 4102 vp = fp->f_vnode; 4103 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4104 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4105 VOP_UNLOCK(vp); 4106 fd_putfile(SCARG(uap, fd)); 4107 return (error); 4108 } 4109 4110 /* 4111 * Rename files, (standard) BSD semantics frontend. 4112 */ 4113 /* ARGSUSED */ 4114 int 4115 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4116 { 4117 /* { 4118 syscallarg(const char *) from; 4119 syscallarg(const char *) to; 4120 } */ 4121 4122 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4123 SCARG(uap, to), UIO_USERSPACE, 0)); 4124 } 4125 4126 int 4127 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4128 register_t *retval) 4129 { 4130 /* { 4131 syscallarg(int) fromfd; 4132 syscallarg(const char *) from; 4133 syscallarg(int) tofd; 4134 syscallarg(const char *) to; 4135 } */ 4136 4137 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4138 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4139 } 4140 4141 /* 4142 * Rename files, POSIX semantics frontend. 4143 */ 4144 /* ARGSUSED */ 4145 int 4146 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4147 { 4148 /* { 4149 syscallarg(const char *) from; 4150 syscallarg(const char *) to; 4151 } */ 4152 4153 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4154 SCARG(uap, to), UIO_USERSPACE, 1)); 4155 } 4156 4157 /* 4158 * Rename files. Source and destination must either both be directories, 4159 * or both not be directories. If target is a directory, it must be empty. 4160 * If `from' and `to' refer to the same object, the value of the `retain' 4161 * argument is used to determine whether `from' will be 4162 * 4163 * (retain == 0) deleted unless `from' and `to' refer to the same 4164 * object in the file system's name space (BSD). 4165 * (retain == 1) always retained (POSIX). 4166 * 4167 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4168 */ 4169 int 4170 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4171 { 4172 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4173 } 4174 4175 static int 4176 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4177 const char *to, enum uio_seg seg, int retain) 4178 { 4179 struct pathbuf *fpb, *tpb; 4180 struct nameidata fnd, tnd; 4181 struct vnode *fdvp, *fvp; 4182 struct vnode *tdvp, *tvp; 4183 struct mount *mp, *tmp; 4184 int error; 4185 4186 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4187 4188 error = pathbuf_maybe_copyin(from, seg, &fpb); 4189 if (error) 4190 goto out0; 4191 KASSERT(fpb != NULL); 4192 4193 error = pathbuf_maybe_copyin(to, seg, &tpb); 4194 if (error) 4195 goto out1; 4196 KASSERT(tpb != NULL); 4197 4198 /* 4199 * Lookup from. 4200 * 4201 * XXX LOCKPARENT is wrong because we don't actually want it 4202 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4203 * insane, so for the time being we need to leave it like this. 4204 */ 4205 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4206 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4207 goto out2; 4208 4209 /* 4210 * Pull out the important results of the lookup, fdvp and fvp. 4211 * Of course, fvp is bogus because we're about to unlock fdvp. 4212 */ 4213 fdvp = fnd.ni_dvp; 4214 fvp = fnd.ni_vp; 4215 KASSERT(fdvp != NULL); 4216 KASSERT(fvp != NULL); 4217 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4218 4219 /* 4220 * Make sure neither fdvp nor fvp is locked. 4221 */ 4222 if (fdvp != fvp) 4223 VOP_UNLOCK(fdvp); 4224 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4225 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4226 4227 /* 4228 * Reject renaming `.' and `..'. Can't do this until after 4229 * namei because we need namei's parsing to find the final 4230 * component name. (namei should just leave us with the final 4231 * component name and not look it up itself, but anyway...) 4232 * 4233 * This was here before because we used to relookup from 4234 * instead of to and relookup requires the caller to check 4235 * this, but now file systems may depend on this check, so we 4236 * must retain it until the file systems are all rototilled. 4237 */ 4238 if (((fnd.ni_cnd.cn_namelen == 1) && 4239 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4240 ((fnd.ni_cnd.cn_namelen == 2) && 4241 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4242 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4243 error = EINVAL; /* XXX EISDIR? */ 4244 goto abort0; 4245 } 4246 4247 /* 4248 * Lookup to. 4249 * 4250 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4251 * fvp here to decide whether to add CREATEDIR is a load of 4252 * bollocks because fvp might be the wrong node by now, since 4253 * fdvp is unlocked. 4254 * 4255 * XXX Why not pass CREATEDIR always? 4256 */ 4257 NDINIT(&tnd, RENAME, 4258 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4259 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4260 tpb); 4261 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4262 goto abort0; 4263 4264 /* 4265 * Pull out the important results of the lookup, tdvp and tvp. 4266 * Of course, tvp is bogus because we're about to unlock tdvp. 4267 */ 4268 tdvp = tnd.ni_dvp; 4269 tvp = tnd.ni_vp; 4270 KASSERT(tdvp != NULL); 4271 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4272 4273 /* 4274 * Make sure neither tdvp nor tvp is locked. 4275 */ 4276 if (tdvp != tvp) 4277 VOP_UNLOCK(tdvp); 4278 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4279 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4280 4281 /* 4282 * Reject renaming onto `.' or `..'. relookup is unhappy with 4283 * these, which is why we must do this here. Once upon a time 4284 * we relooked up from instead of to, and consequently didn't 4285 * need this check, but now that we relookup to instead of 4286 * from, we need this; and we shall need it forever forward 4287 * until the VOP_RENAME protocol changes, because file systems 4288 * will no doubt begin to depend on this check. 4289 */ 4290 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) { 4291 error = EISDIR; 4292 goto abort1; 4293 } 4294 if ((tnd.ni_cnd.cn_namelen == 2) && 4295 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4296 (tnd.ni_cnd.cn_nameptr[1] == '.')) { 4297 error = EINVAL; 4298 goto abort1; 4299 } 4300 4301 /* 4302 * Get the mount point. If the file system has been unmounted, 4303 * which it may be because we're not holding any vnode locks, 4304 * then v_mount will be NULL. We're not really supposed to 4305 * read v_mount without holding the vnode lock, but since we 4306 * have fdvp referenced, if fdvp->v_mount changes then at worst 4307 * it will be set to NULL, not changed to another mount point. 4308 * And, of course, since it is up to the file system to 4309 * determine the real lock order, we can't lock both fdvp and 4310 * tdvp at the same time. 4311 */ 4312 mp = fdvp->v_mount; 4313 if (mp == NULL) { 4314 error = ENOENT; 4315 goto abort1; 4316 } 4317 4318 /* 4319 * Make sure the mount points match. Again, although we don't 4320 * hold any vnode locks, the v_mount fields may change -- but 4321 * at worst they will change to NULL, so this will never become 4322 * a cross-device rename, because we hold vnode references. 4323 * 4324 * XXX Because nothing is locked and the compiler may reorder 4325 * things here, unmounting the file system at an inopportune 4326 * moment may cause rename to fail with EXDEV when it really 4327 * should fail with ENOENT. 4328 */ 4329 tmp = tdvp->v_mount; 4330 if (tmp == NULL) { 4331 error = ENOENT; 4332 goto abort1; 4333 } 4334 4335 if (mp != tmp) { 4336 error = EXDEV; 4337 goto abort1; 4338 } 4339 4340 /* 4341 * Take the vfs rename lock to avoid cross-directory screw cases. 4342 * Nothing is locked currently, so taking this lock is safe. 4343 */ 4344 error = VFS_RENAMELOCK_ENTER(mp); 4345 if (error) 4346 goto abort1; 4347 4348 /* 4349 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4350 * and nothing is locked except for the vfs rename lock. 4351 * 4352 * The next step is a little rain dance to conform to the 4353 * insane lock protocol, even though it does nothing to ward 4354 * off race conditions. 4355 * 4356 * We need tdvp and tvp to be locked. However, because we have 4357 * unlocked tdvp in order to hold no locks while we take the 4358 * vfs rename lock, tvp may be wrong here, and we can't safely 4359 * lock it even if the sensible file systems will just unlock 4360 * it straight away. Consequently, we must lock tdvp and then 4361 * relookup tvp to get it locked. 4362 * 4363 * Finally, because the VOP_RENAME protocol is brain-damaged 4364 * and various file systems insanely depend on the semantics of 4365 * this brain damage, the lookup of to must be the last lookup 4366 * before VOP_RENAME. 4367 */ 4368 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4369 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4370 if (error) 4371 goto abort2; 4372 4373 /* 4374 * Drop the old tvp and pick up the new one -- which might be 4375 * the same, but that doesn't matter to us. After this, tdvp 4376 * and tvp should both be locked. 4377 */ 4378 if (tvp != NULL) 4379 vrele(tvp); 4380 tvp = tnd.ni_vp; 4381 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4382 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4383 4384 /* 4385 * The old do_sys_rename had various consistency checks here 4386 * involving fvp and tvp. fvp is bogus already here, and tvp 4387 * will become bogus soon in any sensible file system, so the 4388 * only purpose in putting these checks here is to give lip 4389 * service to these screw cases and to acknowledge that they 4390 * exist, not actually to handle them, but here you go 4391 * anyway... 4392 */ 4393 4394 /* 4395 * Acknowledge that directories and non-directories aren't 4396 * suposed to mix. 4397 */ 4398 if (tvp != NULL) { 4399 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4400 error = ENOTDIR; 4401 goto abort3; 4402 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4403 error = EISDIR; 4404 goto abort3; 4405 } 4406 } 4407 4408 /* 4409 * Acknowledge some random screw case, among the dozens that 4410 * might arise. 4411 */ 4412 if (fvp == tdvp) { 4413 error = EINVAL; 4414 goto abort3; 4415 } 4416 4417 /* 4418 * Acknowledge that POSIX has a wacky screw case. 4419 * 4420 * XXX Eventually the retain flag needs to be passed on to 4421 * VOP_RENAME. 4422 */ 4423 if (fvp == tvp) { 4424 if (retain) { 4425 error = 0; 4426 goto abort3; 4427 } else if ((fdvp == tdvp) && 4428 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4429 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4430 fnd.ni_cnd.cn_namelen))) { 4431 error = 0; 4432 goto abort3; 4433 } 4434 } 4435 4436 /* 4437 * Make sure veriexec can screw us up. (But a race can screw 4438 * up veriexec, of course -- remember, fvp and (soon) tvp are 4439 * bogus.) 4440 */ 4441 #if NVERIEXEC > 0 4442 { 4443 char *f1, *f2; 4444 size_t f1_len; 4445 size_t f2_len; 4446 4447 f1_len = fnd.ni_cnd.cn_namelen + 1; 4448 f1 = kmem_alloc(f1_len, KM_SLEEP); 4449 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4450 4451 f2_len = tnd.ni_cnd.cn_namelen + 1; 4452 f2 = kmem_alloc(f2_len, KM_SLEEP); 4453 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4454 4455 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4456 4457 kmem_free(f1, f1_len); 4458 kmem_free(f2, f2_len); 4459 4460 if (error) 4461 goto abort3; 4462 } 4463 #endif /* NVERIEXEC > 0 */ 4464 4465 /* 4466 * All ready. Incant the rename vop. 4467 */ 4468 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4469 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4470 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4471 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4472 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4473 4474 /* 4475 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4476 * tdvp and tvp. But we can't assert any of that. 4477 */ 4478 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4479 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4480 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4481 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4482 4483 /* 4484 * So all we have left to do is to drop the rename lock and 4485 * destroy the pathbufs. 4486 */ 4487 VFS_RENAMELOCK_EXIT(mp); 4488 goto out2; 4489 4490 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4491 VOP_UNLOCK(tvp); 4492 abort2: VOP_UNLOCK(tdvp); 4493 VFS_RENAMELOCK_EXIT(mp); 4494 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4495 vrele(tdvp); 4496 if (tvp != NULL) 4497 vrele(tvp); 4498 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4499 vrele(fdvp); 4500 vrele(fvp); 4501 out2: pathbuf_destroy(tpb); 4502 out1: pathbuf_destroy(fpb); 4503 out0: return error; 4504 } 4505 4506 /* 4507 * Make a directory file. 4508 */ 4509 /* ARGSUSED */ 4510 int 4511 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4512 { 4513 /* { 4514 syscallarg(const char *) path; 4515 syscallarg(int) mode; 4516 } */ 4517 4518 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4519 SCARG(uap, mode), UIO_USERSPACE); 4520 } 4521 4522 int 4523 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4524 register_t *retval) 4525 { 4526 /* { 4527 syscallarg(int) fd; 4528 syscallarg(const char *) path; 4529 syscallarg(int) mode; 4530 } */ 4531 4532 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4533 SCARG(uap, mode), UIO_USERSPACE); 4534 } 4535 4536 4537 int 4538 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4539 { 4540 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE); 4541 } 4542 4543 static int 4544 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4545 enum uio_seg seg) 4546 { 4547 struct proc *p = curlwp->l_proc; 4548 struct vnode *vp; 4549 struct vattr vattr; 4550 int error; 4551 struct pathbuf *pb; 4552 struct nameidata nd; 4553 4554 KASSERT(l != NULL || fdat == AT_FDCWD); 4555 4556 /* XXX bollocks, should pass in a pathbuf */ 4557 error = pathbuf_maybe_copyin(path, seg, &pb); 4558 if (error) { 4559 return error; 4560 } 4561 4562 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4563 4564 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4565 pathbuf_destroy(pb); 4566 return (error); 4567 } 4568 vp = nd.ni_vp; 4569 if (vp != NULL) { 4570 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4571 if (nd.ni_dvp == vp) 4572 vrele(nd.ni_dvp); 4573 else 4574 vput(nd.ni_dvp); 4575 vrele(vp); 4576 pathbuf_destroy(pb); 4577 return (EEXIST); 4578 } 4579 vattr_null(&vattr); 4580 vattr.va_type = VDIR; 4581 /* We will read cwdi->cwdi_cmask unlocked. */ 4582 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4583 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4584 if (!error) 4585 vrele(nd.ni_vp); 4586 vput(nd.ni_dvp); 4587 pathbuf_destroy(pb); 4588 return (error); 4589 } 4590 4591 /* 4592 * Remove a directory file. 4593 */ 4594 /* ARGSUSED */ 4595 int 4596 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4597 { 4598 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4599 AT_REMOVEDIR, UIO_USERSPACE); 4600 } 4601 4602 /* 4603 * Read a block of directory entries in a file system independent format. 4604 */ 4605 int 4606 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4607 { 4608 /* { 4609 syscallarg(int) fd; 4610 syscallarg(char *) buf; 4611 syscallarg(size_t) count; 4612 } */ 4613 file_t *fp; 4614 int error, done; 4615 4616 /* fd_getvnode() will use the descriptor for us */ 4617 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4618 return (error); 4619 if ((fp->f_flag & FREAD) == 0) { 4620 error = EBADF; 4621 goto out; 4622 } 4623 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4624 SCARG(uap, count), &done, l, 0, 0); 4625 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4626 *retval = done; 4627 out: 4628 fd_putfile(SCARG(uap, fd)); 4629 return (error); 4630 } 4631 4632 /* 4633 * Set the mode mask for creation of filesystem nodes. 4634 */ 4635 int 4636 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4637 { 4638 /* { 4639 syscallarg(mode_t) newmask; 4640 } */ 4641 struct proc *p = l->l_proc; 4642 struct cwdinfo *cwdi; 4643 4644 /* 4645 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4646 * important is that we serialize changes to the mask. The 4647 * rw_exit() will issue a write memory barrier on our behalf, 4648 * and force the changes out to other CPUs (as it must use an 4649 * atomic operation, draining the local CPU's store buffers). 4650 */ 4651 cwdi = p->p_cwdi; 4652 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4653 *retval = cwdi->cwdi_cmask; 4654 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4655 rw_exit(&cwdi->cwdi_lock); 4656 4657 return (0); 4658 } 4659 4660 int 4661 dorevoke(struct vnode *vp, kauth_cred_t cred) 4662 { 4663 struct vattr vattr; 4664 int error, fs_decision; 4665 4666 vn_lock(vp, LK_SHARED | LK_RETRY); 4667 error = VOP_GETATTR(vp, &vattr, cred); 4668 VOP_UNLOCK(vp); 4669 if (error != 0) 4670 return error; 4671 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4672 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4673 fs_decision); 4674 if (!error) 4675 VOP_REVOKE(vp, REVOKEALL); 4676 return (error); 4677 } 4678 4679 /* 4680 * Void all references to file by ripping underlying filesystem 4681 * away from vnode. 4682 */ 4683 /* ARGSUSED */ 4684 int 4685 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4686 { 4687 /* { 4688 syscallarg(const char *) path; 4689 } */ 4690 struct vnode *vp; 4691 int error; 4692 4693 error = namei_simple_user(SCARG(uap, path), 4694 NSM_FOLLOW_TRYEMULROOT, &vp); 4695 if (error != 0) 4696 return (error); 4697 error = dorevoke(vp, l->l_cred); 4698 vrele(vp); 4699 return (error); 4700 } 4701 4702 /* 4703 * Allocate backing store for a file, filling a hole without having to 4704 * explicitly write anything out. 4705 */ 4706 /* ARGSUSED */ 4707 int 4708 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4709 register_t *retval) 4710 { 4711 /* { 4712 syscallarg(int) fd; 4713 syscallarg(off_t) pos; 4714 syscallarg(off_t) len; 4715 } */ 4716 int fd; 4717 off_t pos, len; 4718 struct file *fp; 4719 struct vnode *vp; 4720 int error; 4721 4722 fd = SCARG(uap, fd); 4723 pos = SCARG(uap, pos); 4724 len = SCARG(uap, len); 4725 4726 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4727 *retval = EINVAL; 4728 return 0; 4729 } 4730 4731 error = fd_getvnode(fd, &fp); 4732 if (error) { 4733 *retval = error; 4734 return 0; 4735 } 4736 if ((fp->f_flag & FWRITE) == 0) { 4737 error = EBADF; 4738 goto fail; 4739 } 4740 vp = fp->f_vnode; 4741 4742 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4743 if (vp->v_type == VDIR) { 4744 error = EISDIR; 4745 } else { 4746 error = VOP_FALLOCATE(vp, pos, len); 4747 } 4748 VOP_UNLOCK(vp); 4749 4750 fail: 4751 fd_putfile(fd); 4752 *retval = error; 4753 return 0; 4754 } 4755 4756 /* 4757 * Deallocate backing store for a file, creating a hole. Also used for 4758 * invoking TRIM on disks. 4759 */ 4760 /* ARGSUSED */ 4761 int 4762 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4763 register_t *retval) 4764 { 4765 /* { 4766 syscallarg(int) fd; 4767 syscallarg(off_t) pos; 4768 syscallarg(off_t) len; 4769 } */ 4770 int fd; 4771 off_t pos, len; 4772 struct file *fp; 4773 struct vnode *vp; 4774 int error; 4775 4776 fd = SCARG(uap, fd); 4777 pos = SCARG(uap, pos); 4778 len = SCARG(uap, len); 4779 4780 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4781 return EINVAL; 4782 } 4783 4784 error = fd_getvnode(fd, &fp); 4785 if (error) { 4786 return error; 4787 } 4788 if ((fp->f_flag & FWRITE) == 0) { 4789 error = EBADF; 4790 goto fail; 4791 } 4792 vp = fp->f_vnode; 4793 4794 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4795 if (vp->v_type == VDIR) { 4796 error = EISDIR; 4797 } else { 4798 error = VOP_FDISCARD(vp, pos, len); 4799 } 4800 VOP_UNLOCK(vp); 4801 4802 fail: 4803 fd_putfile(fd); 4804 return error; 4805 } 4806