1 /* $NetBSD: vfs_syscalls.c,v 1.482 2014/04/20 21:26:51 maxv Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.482 2014/04/20 21:26:51 maxv Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/proc.h> 91 #include <sys/uio.h> 92 #include <sys/kmem.h> 93 #include <sys/dirent.h> 94 #include <sys/sysctl.h> 95 #include <sys/syscallargs.h> 96 #include <sys/vfs_syscalls.h> 97 #include <sys/quota.h> 98 #include <sys/quotactl.h> 99 #include <sys/ktrace.h> 100 #ifdef FILEASSOC 101 #include <sys/fileassoc.h> 102 #endif /* FILEASSOC */ 103 #include <sys/extattr.h> 104 #include <sys/verified_exec.h> 105 #include <sys/kauth.h> 106 #include <sys/atomic.h> 107 #include <sys/module.h> 108 #include <sys/buf.h> 109 110 #include <miscfs/genfs/genfs.h> 111 #include <miscfs/syncfs/syncfs.h> 112 #include <miscfs/specfs/specdev.h> 113 114 #include <nfs/rpcv2.h> 115 #include <nfs/nfsproto.h> 116 #include <nfs/nfs.h> 117 #include <nfs/nfs_var.h> 118 119 static int change_flags(struct vnode *, u_long, struct lwp *); 120 static int change_mode(struct vnode *, int, struct lwp *); 121 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 122 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 123 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 124 enum uio_seg); 125 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 126 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 127 enum uio_seg); 128 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 129 enum uio_seg, int); 130 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 131 size_t, register_t *); 132 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 133 134 static int fd_nameiat(struct lwp *, int, struct nameidata *); 135 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 136 namei_simple_flags_t, struct vnode **); 137 138 139 /* 140 * This table is used to maintain compatibility with 4.3BSD 141 * and NetBSD 0.9 mount syscalls - and possibly other systems. 142 * Note, the order is important! 143 * 144 * Do not modify this table. It should only contain filesystems 145 * supported by NetBSD 0.9 and 4.3BSD. 146 */ 147 const char * const mountcompatnames[] = { 148 NULL, /* 0 = MOUNT_NONE */ 149 MOUNT_FFS, /* 1 = MOUNT_UFS */ 150 MOUNT_NFS, /* 2 */ 151 MOUNT_MFS, /* 3 */ 152 MOUNT_MSDOS, /* 4 */ 153 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 154 MOUNT_FDESC, /* 6 */ 155 MOUNT_KERNFS, /* 7 */ 156 NULL, /* 8 = MOUNT_DEVFS */ 157 MOUNT_AFS, /* 9 */ 158 }; 159 160 const int nmountcompatnames = __arraycount(mountcompatnames); 161 162 static int 163 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 164 { 165 file_t *dfp; 166 int error; 167 168 if (fdat != AT_FDCWD) { 169 if ((error = fd_getvnode(fdat, &dfp)) != 0) 170 goto out; 171 172 NDAT(ndp, dfp->f_data); 173 } 174 175 error = namei(ndp); 176 177 if (fdat != AT_FDCWD) 178 fd_putfile(fdat); 179 out: 180 return error; 181 } 182 183 static int 184 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 185 namei_simple_flags_t sflags, struct vnode **vp_ret) 186 { 187 file_t *dfp; 188 struct vnode *dvp; 189 int error; 190 191 if (fdat != AT_FDCWD) { 192 if ((error = fd_getvnode(fdat, &dfp)) != 0) 193 goto out; 194 195 dvp = dfp->f_data; 196 } else { 197 dvp = NULL; 198 } 199 200 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 201 202 if (fdat != AT_FDCWD) 203 fd_putfile(fdat); 204 out: 205 return error; 206 } 207 208 static int 209 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 210 { 211 int error; 212 213 fp->f_flag = flags & FMASK; 214 fp->f_type = DTYPE_VNODE; 215 fp->f_ops = &vnops; 216 fp->f_data = vp; 217 218 if (flags & (O_EXLOCK | O_SHLOCK)) { 219 struct flock lf; 220 int type; 221 222 lf.l_whence = SEEK_SET; 223 lf.l_start = 0; 224 lf.l_len = 0; 225 if (flags & O_EXLOCK) 226 lf.l_type = F_WRLCK; 227 else 228 lf.l_type = F_RDLCK; 229 type = F_FLOCK; 230 if ((flags & FNONBLOCK) == 0) 231 type |= F_WAIT; 232 VOP_UNLOCK(vp); 233 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 234 if (error) { 235 (void) vn_close(vp, fp->f_flag, fp->f_cred); 236 fd_abort(l->l_proc, fp, indx); 237 return error; 238 } 239 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 240 atomic_or_uint(&fp->f_flag, FHASLOCK); 241 } 242 if (flags & O_CLOEXEC) 243 fd_set_exclose(l, indx, true); 244 return 0; 245 } 246 247 static int 248 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 249 void *data, size_t *data_len) 250 { 251 struct mount *mp; 252 int error = 0, saved_flags; 253 254 mp = vp->v_mount; 255 saved_flags = mp->mnt_flag; 256 257 /* We can operate only on VV_ROOT nodes. */ 258 if ((vp->v_vflag & VV_ROOT) == 0) { 259 error = EINVAL; 260 goto out; 261 } 262 263 /* 264 * We only allow the filesystem to be reloaded if it 265 * is currently mounted read-only. Additionally, we 266 * prevent read-write to read-only downgrades. 267 */ 268 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 269 (mp->mnt_flag & MNT_RDONLY) == 0 && 270 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 271 error = EOPNOTSUPP; /* Needs translation */ 272 goto out; 273 } 274 275 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 276 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 277 if (error) 278 goto out; 279 280 if (vfs_busy(mp, NULL)) { 281 error = EPERM; 282 goto out; 283 } 284 285 mutex_enter(&mp->mnt_updating); 286 287 mp->mnt_flag &= ~MNT_OP_FLAGS; 288 mp->mnt_flag |= flags & MNT_OP_FLAGS; 289 290 /* 291 * Set the mount level flags. 292 */ 293 if (flags & MNT_RDONLY) 294 mp->mnt_flag |= MNT_RDONLY; 295 else if (mp->mnt_flag & MNT_RDONLY) 296 mp->mnt_iflag |= IMNT_WANTRDWR; 297 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 298 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 299 error = VFS_MOUNT(mp, path, data, data_len); 300 301 if (error && data != NULL) { 302 int error2; 303 304 /* 305 * Update failed; let's try and see if it was an 306 * export request. For compat with 3.0 and earlier. 307 */ 308 error2 = vfs_hooks_reexport(mp, path, data); 309 310 /* 311 * Only update error code if the export request was 312 * understood but some problem occurred while 313 * processing it. 314 */ 315 if (error2 != EJUSTRETURN) 316 error = error2; 317 } 318 319 if (mp->mnt_iflag & IMNT_WANTRDWR) 320 mp->mnt_flag &= ~MNT_RDONLY; 321 if (error) 322 mp->mnt_flag = saved_flags; 323 mp->mnt_flag &= ~MNT_OP_FLAGS; 324 mp->mnt_iflag &= ~IMNT_WANTRDWR; 325 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 326 if (mp->mnt_syncer == NULL) 327 error = vfs_allocate_syncvnode(mp); 328 } else { 329 if (mp->mnt_syncer != NULL) 330 vfs_deallocate_syncvnode(mp); 331 } 332 mutex_exit(&mp->mnt_updating); 333 vfs_unbusy(mp, false, NULL); 334 335 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 336 (flags & MNT_EXTATTR)) { 337 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 338 NULL, 0, NULL) != 0) { 339 printf("%s: failed to start extattr, error = %d", 340 mp->mnt_stat.f_mntonname, error); 341 mp->mnt_flag &= ~MNT_EXTATTR; 342 } 343 } 344 345 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 346 !(flags & MNT_EXTATTR)) { 347 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 348 NULL, 0, NULL) != 0) { 349 printf("%s: failed to stop extattr, error = %d", 350 mp->mnt_stat.f_mntonname, error); 351 mp->mnt_flag |= MNT_RDONLY; 352 } 353 } 354 out: 355 return (error); 356 } 357 358 static int 359 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 360 { 361 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 362 int error; 363 364 /* Copy file-system type from userspace. */ 365 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 366 if (error) { 367 /* 368 * Historically, filesystem types were identified by numbers. 369 * If we get an integer for the filesystem type instead of a 370 * string, we check to see if it matches one of the historic 371 * filesystem types. 372 */ 373 u_long fsindex = (u_long)fstype; 374 if (fsindex >= nmountcompatnames || 375 mountcompatnames[fsindex] == NULL) 376 return ENODEV; 377 strlcpy(fstypename, mountcompatnames[fsindex], 378 sizeof(fstypename)); 379 } 380 381 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 382 if (strcmp(fstypename, "ufs") == 0) 383 fstypename[0] = 'f'; 384 385 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 386 return 0; 387 388 /* If we can autoload a vfs module, try again */ 389 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 390 391 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 392 return 0; 393 394 return ENODEV; 395 } 396 397 static int 398 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 399 void *data, size_t *data_len) 400 { 401 struct mount *mp; 402 int error; 403 404 /* If MNT_GETARGS is specified, it should be the only flag. */ 405 if (flags & ~MNT_GETARGS) 406 return EINVAL; 407 408 mp = vp->v_mount; 409 410 /* XXX: probably some notion of "can see" here if we want isolation. */ 411 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 412 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 413 if (error) 414 return error; 415 416 if ((vp->v_vflag & VV_ROOT) == 0) 417 return EINVAL; 418 419 if (vfs_busy(mp, NULL)) 420 return EPERM; 421 422 mutex_enter(&mp->mnt_updating); 423 mp->mnt_flag &= ~MNT_OP_FLAGS; 424 mp->mnt_flag |= MNT_GETARGS; 425 error = VFS_MOUNT(mp, path, data, data_len); 426 mp->mnt_flag &= ~MNT_OP_FLAGS; 427 mutex_exit(&mp->mnt_updating); 428 429 vfs_unbusy(mp, false, NULL); 430 return (error); 431 } 432 433 int 434 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 435 { 436 /* { 437 syscallarg(const char *) type; 438 syscallarg(const char *) path; 439 syscallarg(int) flags; 440 syscallarg(void *) data; 441 syscallarg(size_t) data_len; 442 } */ 443 444 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 445 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 446 SCARG(uap, data_len), retval); 447 } 448 449 int 450 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 451 const char *path, int flags, void *data, enum uio_seg data_seg, 452 size_t data_len, register_t *retval) 453 { 454 struct vnode *vp; 455 void *data_buf = data; 456 bool vfsopsrele = false; 457 size_t alloc_sz = 0; 458 int error; 459 460 /* XXX: The calling convention of this routine is totally bizarre */ 461 if (vfsops) 462 vfsopsrele = true; 463 464 /* 465 * Get vnode to be covered 466 */ 467 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 468 if (error != 0) { 469 vp = NULL; 470 goto done; 471 } 472 473 if (vfsops == NULL) { 474 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 475 vfsops = vp->v_mount->mnt_op; 476 } else { 477 /* 'type' is userspace */ 478 error = mount_get_vfsops(type, &vfsops); 479 if (error != 0) 480 goto done; 481 vfsopsrele = true; 482 } 483 } 484 485 /* 486 * We allow data to be NULL, even for userspace. Some fs's don't need 487 * it. The others will handle NULL. 488 */ 489 if (data != NULL && data_seg == UIO_USERSPACE) { 490 if (data_len == 0) { 491 /* No length supplied, use default for filesystem */ 492 data_len = vfsops->vfs_min_mount_data; 493 494 /* 495 * Hopefully a longer buffer won't make copyin() fail. 496 * For compatibility with 3.0 and earlier. 497 */ 498 if (flags & MNT_UPDATE 499 && data_len < sizeof (struct mnt_export_args30)) 500 data_len = sizeof (struct mnt_export_args30); 501 } 502 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 503 error = EINVAL; 504 goto done; 505 } 506 alloc_sz = data_len; 507 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 508 509 /* NFS needs the buffer even for mnt_getargs .... */ 510 error = copyin(data, data_buf, data_len); 511 if (error != 0) 512 goto done; 513 } 514 515 if (flags & MNT_GETARGS) { 516 if (data_len == 0) { 517 error = EINVAL; 518 goto done; 519 } 520 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 521 if (error != 0) 522 goto done; 523 if (data_seg == UIO_USERSPACE) 524 error = copyout(data_buf, data, data_len); 525 *retval = data_len; 526 } else if (flags & MNT_UPDATE) { 527 error = mount_update(l, vp, path, flags, data_buf, &data_len); 528 } else { 529 /* Locking is handled internally in mount_domount(). */ 530 KASSERT(vfsopsrele == true); 531 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 532 &data_len); 533 vfsopsrele = false; 534 } 535 536 done: 537 if (vfsopsrele) 538 vfs_delref(vfsops); 539 if (vp != NULL) { 540 vrele(vp); 541 } 542 if (data_buf != data) 543 kmem_free(data_buf, alloc_sz); 544 return (error); 545 } 546 547 /* 548 * Unmount a file system. 549 * 550 * Note: unmount takes a path to the vnode mounted on as argument, 551 * not special file (as before). 552 */ 553 /* ARGSUSED */ 554 int 555 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 556 { 557 /* { 558 syscallarg(const char *) path; 559 syscallarg(int) flags; 560 } */ 561 struct vnode *vp; 562 struct mount *mp; 563 int error; 564 struct pathbuf *pb; 565 struct nameidata nd; 566 567 error = pathbuf_copyin(SCARG(uap, path), &pb); 568 if (error) { 569 return error; 570 } 571 572 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 573 if ((error = namei(&nd)) != 0) { 574 pathbuf_destroy(pb); 575 return error; 576 } 577 vp = nd.ni_vp; 578 pathbuf_destroy(pb); 579 580 mp = vp->v_mount; 581 atomic_inc_uint(&mp->mnt_refcnt); 582 VOP_UNLOCK(vp); 583 584 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 585 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 586 if (error) { 587 vrele(vp); 588 vfs_destroy(mp); 589 return (error); 590 } 591 592 /* 593 * Don't allow unmounting the root file system. 594 */ 595 if (mp->mnt_flag & MNT_ROOTFS) { 596 vrele(vp); 597 vfs_destroy(mp); 598 return (EINVAL); 599 } 600 601 /* 602 * Must be the root of the filesystem 603 */ 604 if ((vp->v_vflag & VV_ROOT) == 0) { 605 vrele(vp); 606 vfs_destroy(mp); 607 return (EINVAL); 608 } 609 610 vrele(vp); 611 error = dounmount(mp, SCARG(uap, flags), l); 612 vfs_destroy(mp); 613 return error; 614 } 615 616 /* 617 * Sync each mounted filesystem. 618 */ 619 #ifdef DEBUG 620 int syncprt = 0; 621 struct ctldebug debug0 = { "syncprt", &syncprt }; 622 #endif 623 624 void 625 do_sys_sync(struct lwp *l) 626 { 627 struct mount *mp, *nmp; 628 int asyncflag; 629 630 mutex_enter(&mountlist_lock); 631 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 632 if (vfs_busy(mp, &nmp)) { 633 continue; 634 } 635 mutex_enter(&mp->mnt_updating); 636 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 637 asyncflag = mp->mnt_flag & MNT_ASYNC; 638 mp->mnt_flag &= ~MNT_ASYNC; 639 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 640 if (asyncflag) 641 mp->mnt_flag |= MNT_ASYNC; 642 } 643 mutex_exit(&mp->mnt_updating); 644 vfs_unbusy(mp, false, &nmp); 645 } 646 mutex_exit(&mountlist_lock); 647 #ifdef DEBUG 648 if (syncprt) 649 vfs_bufstats(); 650 #endif /* DEBUG */ 651 } 652 653 /* ARGSUSED */ 654 int 655 sys_sync(struct lwp *l, const void *v, register_t *retval) 656 { 657 do_sys_sync(l); 658 return (0); 659 } 660 661 662 /* 663 * Access or change filesystem quotas. 664 * 665 * (this is really 14 different calls bundled into one) 666 */ 667 668 static int 669 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 670 { 671 struct quotastat info_k; 672 int error; 673 674 /* ensure any padding bytes are cleared */ 675 memset(&info_k, 0, sizeof(info_k)); 676 677 error = vfs_quotactl_stat(mp, &info_k); 678 if (error) { 679 return error; 680 } 681 682 return copyout(&info_k, info_u, sizeof(info_k)); 683 } 684 685 static int 686 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 687 struct quotaidtypestat *info_u) 688 { 689 struct quotaidtypestat info_k; 690 int error; 691 692 /* ensure any padding bytes are cleared */ 693 memset(&info_k, 0, sizeof(info_k)); 694 695 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 696 if (error) { 697 return error; 698 } 699 700 return copyout(&info_k, info_u, sizeof(info_k)); 701 } 702 703 static int 704 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 705 struct quotaobjtypestat *info_u) 706 { 707 struct quotaobjtypestat info_k; 708 int error; 709 710 /* ensure any padding bytes are cleared */ 711 memset(&info_k, 0, sizeof(info_k)); 712 713 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 714 if (error) { 715 return error; 716 } 717 718 return copyout(&info_k, info_u, sizeof(info_k)); 719 } 720 721 static int 722 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 723 struct quotaval *val_u) 724 { 725 struct quotakey key_k; 726 struct quotaval val_k; 727 int error; 728 729 /* ensure any padding bytes are cleared */ 730 memset(&val_k, 0, sizeof(val_k)); 731 732 error = copyin(key_u, &key_k, sizeof(key_k)); 733 if (error) { 734 return error; 735 } 736 737 error = vfs_quotactl_get(mp, &key_k, &val_k); 738 if (error) { 739 return error; 740 } 741 742 return copyout(&val_k, val_u, sizeof(val_k)); 743 } 744 745 static int 746 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 747 const struct quotaval *val_u) 748 { 749 struct quotakey key_k; 750 struct quotaval val_k; 751 int error; 752 753 error = copyin(key_u, &key_k, sizeof(key_k)); 754 if (error) { 755 return error; 756 } 757 758 error = copyin(val_u, &val_k, sizeof(val_k)); 759 if (error) { 760 return error; 761 } 762 763 return vfs_quotactl_put(mp, &key_k, &val_k); 764 } 765 766 static int 767 do_sys_quotactl_delete(struct mount *mp, const struct quotakey *key_u) 768 { 769 struct quotakey key_k; 770 int error; 771 772 error = copyin(key_u, &key_k, sizeof(key_k)); 773 if (error) { 774 return error; 775 } 776 777 return vfs_quotactl_delete(mp, &key_k); 778 } 779 780 static int 781 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 782 { 783 struct quotakcursor cursor_k; 784 int error; 785 786 /* ensure any padding bytes are cleared */ 787 memset(&cursor_k, 0, sizeof(cursor_k)); 788 789 error = vfs_quotactl_cursoropen(mp, &cursor_k); 790 if (error) { 791 return error; 792 } 793 794 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 795 } 796 797 static int 798 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 799 { 800 struct quotakcursor cursor_k; 801 int error; 802 803 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 804 if (error) { 805 return error; 806 } 807 808 return vfs_quotactl_cursorclose(mp, &cursor_k); 809 } 810 811 static int 812 do_sys_quotactl_cursorskipidtype(struct mount *mp, 813 struct quotakcursor *cursor_u, int idtype) 814 { 815 struct quotakcursor cursor_k; 816 int error; 817 818 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 819 if (error) { 820 return error; 821 } 822 823 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 824 if (error) { 825 return error; 826 } 827 828 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 829 } 830 831 static int 832 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 833 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 834 unsigned *ret_u) 835 { 836 #define CGET_STACK_MAX 8 837 struct quotakcursor cursor_k; 838 struct quotakey stackkeys[CGET_STACK_MAX]; 839 struct quotaval stackvals[CGET_STACK_MAX]; 840 struct quotakey *keys_k; 841 struct quotaval *vals_k; 842 unsigned ret_k; 843 int error; 844 845 if (maxnum > 128) { 846 maxnum = 128; 847 } 848 849 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 850 if (error) { 851 return error; 852 } 853 854 if (maxnum <= CGET_STACK_MAX) { 855 keys_k = stackkeys; 856 vals_k = stackvals; 857 /* ensure any padding bytes are cleared */ 858 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 859 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 860 } else { 861 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 862 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 863 } 864 865 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 866 &ret_k); 867 if (error) { 868 goto fail; 869 } 870 871 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 872 if (error) { 873 goto fail; 874 } 875 876 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 877 if (error) { 878 goto fail; 879 } 880 881 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 882 if (error) { 883 goto fail; 884 } 885 886 /* do last to maximize the chance of being able to recover a failure */ 887 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 888 889 fail: 890 if (keys_k != stackkeys) { 891 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 892 } 893 if (vals_k != stackvals) { 894 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 895 } 896 return error; 897 } 898 899 static int 900 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 901 int *ret_u) 902 { 903 struct quotakcursor cursor_k; 904 int ret_k; 905 int error; 906 907 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 908 if (error) { 909 return error; 910 } 911 912 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 913 if (error) { 914 return error; 915 } 916 917 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 918 if (error) { 919 return error; 920 } 921 922 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 923 } 924 925 static int 926 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 927 { 928 struct quotakcursor cursor_k; 929 int error; 930 931 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 932 if (error) { 933 return error; 934 } 935 936 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 937 if (error) { 938 return error; 939 } 940 941 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 942 } 943 944 static int 945 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 946 { 947 char *path_k; 948 int error; 949 950 /* XXX this should probably be a struct pathbuf */ 951 path_k = PNBUF_GET(); 952 error = copyin(path_u, path_k, PATH_MAX); 953 if (error) { 954 PNBUF_PUT(path_k); 955 return error; 956 } 957 958 error = vfs_quotactl_quotaon(mp, idtype, path_k); 959 960 PNBUF_PUT(path_k); 961 return error; 962 } 963 964 static int 965 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 966 { 967 return vfs_quotactl_quotaoff(mp, idtype); 968 } 969 970 int 971 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 972 { 973 struct mount *mp; 974 struct vnode *vp; 975 int error; 976 977 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 978 if (error != 0) 979 return (error); 980 mp = vp->v_mount; 981 982 switch (args->qc_op) { 983 case QUOTACTL_STAT: 984 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 985 break; 986 case QUOTACTL_IDTYPESTAT: 987 error = do_sys_quotactl_idtypestat(mp, 988 args->u.idtypestat.qc_idtype, 989 args->u.idtypestat.qc_info); 990 break; 991 case QUOTACTL_OBJTYPESTAT: 992 error = do_sys_quotactl_objtypestat(mp, 993 args->u.objtypestat.qc_objtype, 994 args->u.objtypestat.qc_info); 995 break; 996 case QUOTACTL_GET: 997 error = do_sys_quotactl_get(mp, 998 args->u.get.qc_key, 999 args->u.get.qc_val); 1000 break; 1001 case QUOTACTL_PUT: 1002 error = do_sys_quotactl_put(mp, 1003 args->u.put.qc_key, 1004 args->u.put.qc_val); 1005 break; 1006 case QUOTACTL_DELETE: 1007 error = do_sys_quotactl_delete(mp, args->u.delete.qc_key); 1008 break; 1009 case QUOTACTL_CURSOROPEN: 1010 error = do_sys_quotactl_cursoropen(mp, 1011 args->u.cursoropen.qc_cursor); 1012 break; 1013 case QUOTACTL_CURSORCLOSE: 1014 error = do_sys_quotactl_cursorclose(mp, 1015 args->u.cursorclose.qc_cursor); 1016 break; 1017 case QUOTACTL_CURSORSKIPIDTYPE: 1018 error = do_sys_quotactl_cursorskipidtype(mp, 1019 args->u.cursorskipidtype.qc_cursor, 1020 args->u.cursorskipidtype.qc_idtype); 1021 break; 1022 case QUOTACTL_CURSORGET: 1023 error = do_sys_quotactl_cursorget(mp, 1024 args->u.cursorget.qc_cursor, 1025 args->u.cursorget.qc_keys, 1026 args->u.cursorget.qc_vals, 1027 args->u.cursorget.qc_maxnum, 1028 args->u.cursorget.qc_ret); 1029 break; 1030 case QUOTACTL_CURSORATEND: 1031 error = do_sys_quotactl_cursoratend(mp, 1032 args->u.cursoratend.qc_cursor, 1033 args->u.cursoratend.qc_ret); 1034 break; 1035 case QUOTACTL_CURSORREWIND: 1036 error = do_sys_quotactl_cursorrewind(mp, 1037 args->u.cursorrewind.qc_cursor); 1038 break; 1039 case QUOTACTL_QUOTAON: 1040 error = do_sys_quotactl_quotaon(mp, 1041 args->u.quotaon.qc_idtype, 1042 args->u.quotaon.qc_quotafile); 1043 break; 1044 case QUOTACTL_QUOTAOFF: 1045 error = do_sys_quotactl_quotaoff(mp, 1046 args->u.quotaoff.qc_idtype); 1047 break; 1048 default: 1049 error = EINVAL; 1050 break; 1051 } 1052 1053 vrele(vp); 1054 return error; 1055 } 1056 1057 /* ARGSUSED */ 1058 int 1059 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1060 register_t *retval) 1061 { 1062 /* { 1063 syscallarg(const char *) path; 1064 syscallarg(struct quotactl_args *) args; 1065 } */ 1066 struct quotactl_args args; 1067 int error; 1068 1069 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1070 if (error) { 1071 return error; 1072 } 1073 1074 return do_sys_quotactl(SCARG(uap, path), &args); 1075 } 1076 1077 int 1078 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1079 int root) 1080 { 1081 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1082 int error = 0; 1083 1084 /* 1085 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1086 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1087 * overrides MNT_NOWAIT. 1088 */ 1089 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1090 (flags != MNT_WAIT && flags != 0)) { 1091 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1092 goto done; 1093 } 1094 1095 /* Get the filesystem stats now */ 1096 memset(sp, 0, sizeof(*sp)); 1097 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1098 return error; 1099 } 1100 1101 if (cwdi->cwdi_rdir == NULL) 1102 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1103 done: 1104 if (cwdi->cwdi_rdir != NULL) { 1105 size_t len; 1106 char *bp; 1107 char c; 1108 char *path = PNBUF_GET(); 1109 1110 bp = path + MAXPATHLEN; 1111 *--bp = '\0'; 1112 rw_enter(&cwdi->cwdi_lock, RW_READER); 1113 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1114 MAXPATHLEN / 2, 0, l); 1115 rw_exit(&cwdi->cwdi_lock); 1116 if (error) { 1117 PNBUF_PUT(path); 1118 return error; 1119 } 1120 len = strlen(bp); 1121 if (len != 1) { 1122 /* 1123 * for mount points that are below our root, we can see 1124 * them, so we fix up the pathname and return them. The 1125 * rest we cannot see, so we don't allow viewing the 1126 * data. 1127 */ 1128 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1129 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1130 (void)strlcpy(sp->f_mntonname, 1131 c == '\0' ? "/" : &sp->f_mntonname[len], 1132 sizeof(sp->f_mntonname)); 1133 } else { 1134 if (root) 1135 (void)strlcpy(sp->f_mntonname, "/", 1136 sizeof(sp->f_mntonname)); 1137 else 1138 error = EPERM; 1139 } 1140 } 1141 PNBUF_PUT(path); 1142 } 1143 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1144 return error; 1145 } 1146 1147 /* 1148 * Get filesystem statistics by path. 1149 */ 1150 int 1151 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1152 { 1153 struct mount *mp; 1154 int error; 1155 struct vnode *vp; 1156 1157 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1158 if (error != 0) 1159 return error; 1160 mp = vp->v_mount; 1161 error = dostatvfs(mp, sb, l, flags, 1); 1162 vrele(vp); 1163 return error; 1164 } 1165 1166 /* ARGSUSED */ 1167 int 1168 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1169 { 1170 /* { 1171 syscallarg(const char *) path; 1172 syscallarg(struct statvfs *) buf; 1173 syscallarg(int) flags; 1174 } */ 1175 struct statvfs *sb; 1176 int error; 1177 1178 sb = STATVFSBUF_GET(); 1179 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1180 if (error == 0) 1181 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1182 STATVFSBUF_PUT(sb); 1183 return error; 1184 } 1185 1186 /* 1187 * Get filesystem statistics by fd. 1188 */ 1189 int 1190 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1191 { 1192 file_t *fp; 1193 struct mount *mp; 1194 int error; 1195 1196 /* fd_getvnode() will use the descriptor for us */ 1197 if ((error = fd_getvnode(fd, &fp)) != 0) 1198 return (error); 1199 mp = ((struct vnode *)fp->f_data)->v_mount; 1200 error = dostatvfs(mp, sb, curlwp, flags, 1); 1201 fd_putfile(fd); 1202 return error; 1203 } 1204 1205 /* ARGSUSED */ 1206 int 1207 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1208 { 1209 /* { 1210 syscallarg(int) fd; 1211 syscallarg(struct statvfs *) buf; 1212 syscallarg(int) flags; 1213 } */ 1214 struct statvfs *sb; 1215 int error; 1216 1217 sb = STATVFSBUF_GET(); 1218 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1219 if (error == 0) 1220 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1221 STATVFSBUF_PUT(sb); 1222 return error; 1223 } 1224 1225 1226 /* 1227 * Get statistics on all filesystems. 1228 */ 1229 int 1230 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1231 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1232 register_t *retval) 1233 { 1234 int root = 0; 1235 struct proc *p = l->l_proc; 1236 struct mount *mp, *nmp; 1237 struct statvfs *sb; 1238 size_t count, maxcount; 1239 int error = 0; 1240 1241 sb = STATVFSBUF_GET(); 1242 maxcount = bufsize / entry_sz; 1243 mutex_enter(&mountlist_lock); 1244 count = 0; 1245 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 1246 if (vfs_busy(mp, &nmp)) { 1247 continue; 1248 } 1249 if (sfsp && count < maxcount) { 1250 error = dostatvfs(mp, sb, l, flags, 0); 1251 if (error) { 1252 vfs_unbusy(mp, false, &nmp); 1253 error = 0; 1254 continue; 1255 } 1256 error = copyfn(sb, sfsp, entry_sz); 1257 if (error) { 1258 vfs_unbusy(mp, false, NULL); 1259 goto out; 1260 } 1261 sfsp = (char *)sfsp + entry_sz; 1262 root |= strcmp(sb->f_mntonname, "/") == 0; 1263 } 1264 count++; 1265 vfs_unbusy(mp, false, &nmp); 1266 } 1267 mutex_exit(&mountlist_lock); 1268 1269 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1270 /* 1271 * fake a root entry 1272 */ 1273 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1274 sb, l, flags, 1); 1275 if (error != 0) 1276 goto out; 1277 if (sfsp) { 1278 error = copyfn(sb, sfsp, entry_sz); 1279 if (error != 0) 1280 goto out; 1281 } 1282 count++; 1283 } 1284 if (sfsp && count > maxcount) 1285 *retval = maxcount; 1286 else 1287 *retval = count; 1288 out: 1289 STATVFSBUF_PUT(sb); 1290 return error; 1291 } 1292 1293 int 1294 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1295 { 1296 /* { 1297 syscallarg(struct statvfs *) buf; 1298 syscallarg(size_t) bufsize; 1299 syscallarg(int) flags; 1300 } */ 1301 1302 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1303 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1304 } 1305 1306 /* 1307 * Change current working directory to a given file descriptor. 1308 */ 1309 /* ARGSUSED */ 1310 int 1311 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1312 { 1313 /* { 1314 syscallarg(int) fd; 1315 } */ 1316 struct proc *p = l->l_proc; 1317 struct cwdinfo *cwdi; 1318 struct vnode *vp, *tdp; 1319 struct mount *mp; 1320 file_t *fp; 1321 int error, fd; 1322 1323 /* fd_getvnode() will use the descriptor for us */ 1324 fd = SCARG(uap, fd); 1325 if ((error = fd_getvnode(fd, &fp)) != 0) 1326 return (error); 1327 vp = fp->f_data; 1328 1329 vref(vp); 1330 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1331 if (vp->v_type != VDIR) 1332 error = ENOTDIR; 1333 else 1334 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1335 if (error) { 1336 vput(vp); 1337 goto out; 1338 } 1339 while ((mp = vp->v_mountedhere) != NULL) { 1340 error = vfs_busy(mp, NULL); 1341 vput(vp); 1342 if (error != 0) 1343 goto out; 1344 error = VFS_ROOT(mp, &tdp); 1345 vfs_unbusy(mp, false, NULL); 1346 if (error) 1347 goto out; 1348 vp = tdp; 1349 } 1350 VOP_UNLOCK(vp); 1351 1352 /* 1353 * Disallow changing to a directory not under the process's 1354 * current root directory (if there is one). 1355 */ 1356 cwdi = p->p_cwdi; 1357 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1358 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1359 vrele(vp); 1360 error = EPERM; /* operation not permitted */ 1361 } else { 1362 vrele(cwdi->cwdi_cdir); 1363 cwdi->cwdi_cdir = vp; 1364 } 1365 rw_exit(&cwdi->cwdi_lock); 1366 1367 out: 1368 fd_putfile(fd); 1369 return (error); 1370 } 1371 1372 /* 1373 * Change this process's notion of the root directory to a given file 1374 * descriptor. 1375 */ 1376 int 1377 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1378 { 1379 struct proc *p = l->l_proc; 1380 struct vnode *vp; 1381 file_t *fp; 1382 int error, fd = SCARG(uap, fd); 1383 1384 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1385 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1386 return error; 1387 /* fd_getvnode() will use the descriptor for us */ 1388 if ((error = fd_getvnode(fd, &fp)) != 0) 1389 return error; 1390 vp = fp->f_data; 1391 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1392 if (vp->v_type != VDIR) 1393 error = ENOTDIR; 1394 else 1395 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1396 VOP_UNLOCK(vp); 1397 if (error) 1398 goto out; 1399 vref(vp); 1400 1401 change_root(p->p_cwdi, vp, l); 1402 1403 out: 1404 fd_putfile(fd); 1405 return (error); 1406 } 1407 1408 /* 1409 * Change current working directory (``.''). 1410 */ 1411 /* ARGSUSED */ 1412 int 1413 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1414 { 1415 /* { 1416 syscallarg(const char *) path; 1417 } */ 1418 struct proc *p = l->l_proc; 1419 struct cwdinfo *cwdi; 1420 int error; 1421 struct vnode *vp; 1422 1423 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1424 &vp, l)) != 0) 1425 return (error); 1426 cwdi = p->p_cwdi; 1427 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1428 vrele(cwdi->cwdi_cdir); 1429 cwdi->cwdi_cdir = vp; 1430 rw_exit(&cwdi->cwdi_lock); 1431 return (0); 1432 } 1433 1434 /* 1435 * Change notion of root (``/'') directory. 1436 */ 1437 /* ARGSUSED */ 1438 int 1439 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1440 { 1441 /* { 1442 syscallarg(const char *) path; 1443 } */ 1444 struct proc *p = l->l_proc; 1445 int error; 1446 struct vnode *vp; 1447 1448 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1449 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1450 return (error); 1451 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1452 &vp, l)) != 0) 1453 return (error); 1454 1455 change_root(p->p_cwdi, vp, l); 1456 1457 return (0); 1458 } 1459 1460 /* 1461 * Common routine for chroot and fchroot. 1462 * NB: callers need to properly authorize the change root operation. 1463 */ 1464 void 1465 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1466 { 1467 struct proc *p = l->l_proc; 1468 kauth_cred_t ncred; 1469 1470 ncred = kauth_cred_alloc(); 1471 1472 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1473 if (cwdi->cwdi_rdir != NULL) 1474 vrele(cwdi->cwdi_rdir); 1475 cwdi->cwdi_rdir = vp; 1476 1477 /* 1478 * Prevent escaping from chroot by putting the root under 1479 * the working directory. Silently chdir to / if we aren't 1480 * already there. 1481 */ 1482 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1483 /* 1484 * XXX would be more failsafe to change directory to a 1485 * deadfs node here instead 1486 */ 1487 vrele(cwdi->cwdi_cdir); 1488 vref(vp); 1489 cwdi->cwdi_cdir = vp; 1490 } 1491 rw_exit(&cwdi->cwdi_lock); 1492 1493 /* Get a write lock on the process credential. */ 1494 proc_crmod_enter(); 1495 1496 kauth_cred_clone(p->p_cred, ncred); 1497 kauth_proc_chroot(ncred, p->p_cwdi); 1498 1499 /* Broadcast our credentials to the process and other LWPs. */ 1500 proc_crmod_leave(ncred, p->p_cred, true); 1501 } 1502 1503 /* 1504 * Common routine for chroot and chdir. 1505 * XXX "where" should be enum uio_seg 1506 */ 1507 int 1508 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1509 { 1510 struct pathbuf *pb; 1511 struct nameidata nd; 1512 int error; 1513 1514 error = pathbuf_maybe_copyin(path, where, &pb); 1515 if (error) { 1516 return error; 1517 } 1518 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1519 if ((error = namei(&nd)) != 0) { 1520 pathbuf_destroy(pb); 1521 return error; 1522 } 1523 *vpp = nd.ni_vp; 1524 pathbuf_destroy(pb); 1525 1526 if ((*vpp)->v_type != VDIR) 1527 error = ENOTDIR; 1528 else 1529 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1530 1531 if (error) 1532 vput(*vpp); 1533 else 1534 VOP_UNLOCK(*vpp); 1535 return (error); 1536 } 1537 1538 /* 1539 * Internals of sys_open - path has already been converted into a pathbuf 1540 * (so we can easily reuse this function from other parts of the kernel, 1541 * like posix_spawn post-processing). 1542 */ 1543 int 1544 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1545 int open_mode, int *fd) 1546 { 1547 struct proc *p = l->l_proc; 1548 struct cwdinfo *cwdi = p->p_cwdi; 1549 file_t *fp; 1550 struct vnode *vp; 1551 int flags, cmode; 1552 int indx, error; 1553 struct nameidata nd; 1554 1555 if (open_flags & O_SEARCH) { 1556 open_flags &= ~(int)O_SEARCH; 1557 } 1558 1559 flags = FFLAGS(open_flags); 1560 if ((flags & (FREAD | FWRITE)) == 0) 1561 return EINVAL; 1562 1563 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1564 return error; 1565 } 1566 1567 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1568 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1569 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1570 if (dvp != NULL) 1571 NDAT(&nd, dvp); 1572 1573 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1574 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1575 fd_abort(p, fp, indx); 1576 if ((error == EDUPFD || error == EMOVEFD) && 1577 l->l_dupfd >= 0 && /* XXX from fdopen */ 1578 (error = 1579 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1580 *fd = indx; 1581 return 0; 1582 } 1583 if (error == ERESTART) 1584 error = EINTR; 1585 return error; 1586 } 1587 1588 l->l_dupfd = 0; 1589 vp = nd.ni_vp; 1590 1591 if ((error = open_setfp(l, fp, vp, indx, flags))) 1592 return error; 1593 1594 VOP_UNLOCK(vp); 1595 *fd = indx; 1596 fd_affix(p, fp, indx); 1597 return 0; 1598 } 1599 1600 int 1601 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1602 { 1603 struct pathbuf *pb; 1604 int error, oflags; 1605 1606 oflags = FFLAGS(open_flags); 1607 if ((oflags & (FREAD | FWRITE)) == 0) 1608 return EINVAL; 1609 1610 pb = pathbuf_create(path); 1611 if (pb == NULL) 1612 return ENOMEM; 1613 1614 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1615 pathbuf_destroy(pb); 1616 1617 return error; 1618 } 1619 1620 /* 1621 * Check permissions, allocate an open file structure, 1622 * and call the device open routine if any. 1623 */ 1624 static int 1625 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1626 int mode, int *fd) 1627 { 1628 file_t *dfp = NULL; 1629 struct vnode *dvp = NULL; 1630 struct pathbuf *pb; 1631 int error; 1632 1633 #ifdef COMPAT_10 /* XXX: and perhaps later */ 1634 if (path == NULL) { 1635 pb = pathbuf_create("."); 1636 if (pb == NULL) 1637 return ENOMEM; 1638 } else 1639 #endif 1640 { 1641 error = pathbuf_copyin(path, &pb); 1642 if (error) 1643 return error; 1644 } 1645 1646 if (fdat != AT_FDCWD) { 1647 /* fd_getvnode() will use the descriptor for us */ 1648 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1649 goto out; 1650 1651 dvp = dfp->f_data; 1652 } 1653 1654 error = do_open(l, dvp, pb, flags, mode, fd); 1655 1656 if (dfp != NULL) 1657 fd_putfile(fdat); 1658 out: 1659 pathbuf_destroy(pb); 1660 return error; 1661 } 1662 1663 int 1664 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1665 { 1666 /* { 1667 syscallarg(const char *) path; 1668 syscallarg(int) flags; 1669 syscallarg(int) mode; 1670 } */ 1671 int error; 1672 int fd; 1673 1674 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1675 SCARG(uap, flags), SCARG(uap, mode), &fd); 1676 1677 if (error == 0) 1678 *retval = fd; 1679 1680 return error; 1681 } 1682 1683 int 1684 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1685 { 1686 /* { 1687 syscallarg(int) fd; 1688 syscallarg(const char *) path; 1689 syscallarg(int) oflags; 1690 syscallarg(int) mode; 1691 } */ 1692 int error; 1693 int fd; 1694 1695 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1696 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1697 1698 if (error == 0) 1699 *retval = fd; 1700 1701 return error; 1702 } 1703 1704 static void 1705 vfs__fhfree(fhandle_t *fhp) 1706 { 1707 size_t fhsize; 1708 1709 if (fhp == NULL) { 1710 return; 1711 } 1712 fhsize = FHANDLE_SIZE(fhp); 1713 kmem_free(fhp, fhsize); 1714 } 1715 1716 /* 1717 * vfs_composefh: compose a filehandle. 1718 */ 1719 1720 int 1721 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1722 { 1723 struct mount *mp; 1724 struct fid *fidp; 1725 int error; 1726 size_t needfhsize; 1727 size_t fidsize; 1728 1729 mp = vp->v_mount; 1730 fidp = NULL; 1731 if (*fh_size < FHANDLE_SIZE_MIN) { 1732 fidsize = 0; 1733 } else { 1734 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1735 if (fhp != NULL) { 1736 memset(fhp, 0, *fh_size); 1737 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1738 fidp = &fhp->fh_fid; 1739 } 1740 } 1741 error = VFS_VPTOFH(vp, fidp, &fidsize); 1742 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1743 if (error == 0 && *fh_size < needfhsize) { 1744 error = E2BIG; 1745 } 1746 *fh_size = needfhsize; 1747 return error; 1748 } 1749 1750 int 1751 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1752 { 1753 struct mount *mp; 1754 fhandle_t *fhp; 1755 size_t fhsize; 1756 size_t fidsize; 1757 int error; 1758 1759 *fhpp = NULL; 1760 mp = vp->v_mount; 1761 fidsize = 0; 1762 error = VFS_VPTOFH(vp, NULL, &fidsize); 1763 KASSERT(error != 0); 1764 if (error != E2BIG) { 1765 goto out; 1766 } 1767 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1768 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1769 if (fhp == NULL) { 1770 error = ENOMEM; 1771 goto out; 1772 } 1773 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1774 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1775 if (error == 0) { 1776 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1777 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1778 *fhpp = fhp; 1779 } else { 1780 kmem_free(fhp, fhsize); 1781 } 1782 out: 1783 return error; 1784 } 1785 1786 void 1787 vfs_composefh_free(fhandle_t *fhp) 1788 { 1789 1790 vfs__fhfree(fhp); 1791 } 1792 1793 /* 1794 * vfs_fhtovp: lookup a vnode by a filehandle. 1795 */ 1796 1797 int 1798 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1799 { 1800 struct mount *mp; 1801 int error; 1802 1803 *vpp = NULL; 1804 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1805 if (mp == NULL) { 1806 error = ESTALE; 1807 goto out; 1808 } 1809 if (mp->mnt_op->vfs_fhtovp == NULL) { 1810 error = EOPNOTSUPP; 1811 goto out; 1812 } 1813 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1814 out: 1815 return error; 1816 } 1817 1818 /* 1819 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1820 * the needed size. 1821 */ 1822 1823 int 1824 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1825 { 1826 fhandle_t *fhp; 1827 int error; 1828 1829 *fhpp = NULL; 1830 if (fhsize > FHANDLE_SIZE_MAX) { 1831 return EINVAL; 1832 } 1833 if (fhsize < FHANDLE_SIZE_MIN) { 1834 return EINVAL; 1835 } 1836 again: 1837 fhp = kmem_alloc(fhsize, KM_SLEEP); 1838 if (fhp == NULL) { 1839 return ENOMEM; 1840 } 1841 error = copyin(ufhp, fhp, fhsize); 1842 if (error == 0) { 1843 /* XXX this check shouldn't be here */ 1844 if (FHANDLE_SIZE(fhp) == fhsize) { 1845 *fhpp = fhp; 1846 return 0; 1847 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1848 /* 1849 * a kludge for nfsv2 padded handles. 1850 */ 1851 size_t sz; 1852 1853 sz = FHANDLE_SIZE(fhp); 1854 kmem_free(fhp, fhsize); 1855 fhsize = sz; 1856 goto again; 1857 } else { 1858 /* 1859 * userland told us wrong size. 1860 */ 1861 error = EINVAL; 1862 } 1863 } 1864 kmem_free(fhp, fhsize); 1865 return error; 1866 } 1867 1868 void 1869 vfs_copyinfh_free(fhandle_t *fhp) 1870 { 1871 1872 vfs__fhfree(fhp); 1873 } 1874 1875 /* 1876 * Get file handle system call 1877 */ 1878 int 1879 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1880 { 1881 /* { 1882 syscallarg(char *) fname; 1883 syscallarg(fhandle_t *) fhp; 1884 syscallarg(size_t *) fh_size; 1885 } */ 1886 struct vnode *vp; 1887 fhandle_t *fh; 1888 int error; 1889 struct pathbuf *pb; 1890 struct nameidata nd; 1891 size_t sz; 1892 size_t usz; 1893 1894 /* 1895 * Must be super user 1896 */ 1897 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1898 0, NULL, NULL, NULL); 1899 if (error) 1900 return (error); 1901 1902 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1903 if (error) { 1904 return error; 1905 } 1906 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1907 error = namei(&nd); 1908 if (error) { 1909 pathbuf_destroy(pb); 1910 return error; 1911 } 1912 vp = nd.ni_vp; 1913 pathbuf_destroy(pb); 1914 1915 error = vfs_composefh_alloc(vp, &fh); 1916 vput(vp); 1917 if (error != 0) { 1918 goto out; 1919 } 1920 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1921 if (error != 0) { 1922 goto out; 1923 } 1924 sz = FHANDLE_SIZE(fh); 1925 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1926 if (error != 0) { 1927 goto out; 1928 } 1929 if (usz >= sz) { 1930 error = copyout(fh, SCARG(uap, fhp), sz); 1931 } else { 1932 error = E2BIG; 1933 } 1934 out: 1935 vfs_composefh_free(fh); 1936 return (error); 1937 } 1938 1939 /* 1940 * Open a file given a file handle. 1941 * 1942 * Check permissions, allocate an open file structure, 1943 * and call the device open routine if any. 1944 */ 1945 1946 int 1947 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1948 register_t *retval) 1949 { 1950 file_t *fp; 1951 struct vnode *vp = NULL; 1952 kauth_cred_t cred = l->l_cred; 1953 file_t *nfp; 1954 int indx, error = 0; 1955 struct vattr va; 1956 fhandle_t *fh; 1957 int flags; 1958 proc_t *p; 1959 1960 p = curproc; 1961 1962 /* 1963 * Must be super user 1964 */ 1965 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1966 0, NULL, NULL, NULL))) 1967 return (error); 1968 1969 if (oflags & O_SEARCH) { 1970 oflags &= ~(int)O_SEARCH; 1971 } 1972 1973 flags = FFLAGS(oflags); 1974 if ((flags & (FREAD | FWRITE)) == 0) 1975 return (EINVAL); 1976 if ((flags & O_CREAT)) 1977 return (EINVAL); 1978 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1979 return (error); 1980 fp = nfp; 1981 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1982 if (error != 0) { 1983 goto bad; 1984 } 1985 error = vfs_fhtovp(fh, &vp); 1986 vfs_copyinfh_free(fh); 1987 if (error != 0) { 1988 goto bad; 1989 } 1990 1991 /* Now do an effective vn_open */ 1992 1993 if (vp->v_type == VSOCK) { 1994 error = EOPNOTSUPP; 1995 goto bad; 1996 } 1997 error = vn_openchk(vp, cred, flags); 1998 if (error != 0) 1999 goto bad; 2000 if (flags & O_TRUNC) { 2001 VOP_UNLOCK(vp); /* XXX */ 2002 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2003 vattr_null(&va); 2004 va.va_size = 0; 2005 error = VOP_SETATTR(vp, &va, cred); 2006 if (error) 2007 goto bad; 2008 } 2009 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2010 goto bad; 2011 if (flags & FWRITE) { 2012 mutex_enter(vp->v_interlock); 2013 vp->v_writecount++; 2014 mutex_exit(vp->v_interlock); 2015 } 2016 2017 /* done with modified vn_open, now finish what sys_open does. */ 2018 if ((error = open_setfp(l, fp, vp, indx, flags))) 2019 return error; 2020 2021 VOP_UNLOCK(vp); 2022 *retval = indx; 2023 fd_affix(p, fp, indx); 2024 return (0); 2025 2026 bad: 2027 fd_abort(p, fp, indx); 2028 if (vp != NULL) 2029 vput(vp); 2030 return (error); 2031 } 2032 2033 int 2034 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2035 { 2036 /* { 2037 syscallarg(const void *) fhp; 2038 syscallarg(size_t) fh_size; 2039 syscallarg(int) flags; 2040 } */ 2041 2042 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2043 SCARG(uap, flags), retval); 2044 } 2045 2046 int 2047 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2048 { 2049 int error; 2050 fhandle_t *fh; 2051 struct vnode *vp; 2052 2053 /* 2054 * Must be super user 2055 */ 2056 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2057 0, NULL, NULL, NULL))) 2058 return (error); 2059 2060 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2061 if (error != 0) 2062 return error; 2063 2064 error = vfs_fhtovp(fh, &vp); 2065 vfs_copyinfh_free(fh); 2066 if (error != 0) 2067 return error; 2068 2069 error = vn_stat(vp, sb); 2070 vput(vp); 2071 return error; 2072 } 2073 2074 2075 /* ARGSUSED */ 2076 int 2077 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2078 { 2079 /* { 2080 syscallarg(const void *) fhp; 2081 syscallarg(size_t) fh_size; 2082 syscallarg(struct stat *) sb; 2083 } */ 2084 struct stat sb; 2085 int error; 2086 2087 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2088 if (error) 2089 return error; 2090 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2091 } 2092 2093 int 2094 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2095 int flags) 2096 { 2097 fhandle_t *fh; 2098 struct mount *mp; 2099 struct vnode *vp; 2100 int error; 2101 2102 /* 2103 * Must be super user 2104 */ 2105 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2106 0, NULL, NULL, NULL))) 2107 return error; 2108 2109 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2110 if (error != 0) 2111 return error; 2112 2113 error = vfs_fhtovp(fh, &vp); 2114 vfs_copyinfh_free(fh); 2115 if (error != 0) 2116 return error; 2117 2118 mp = vp->v_mount; 2119 error = dostatvfs(mp, sb, l, flags, 1); 2120 vput(vp); 2121 return error; 2122 } 2123 2124 /* ARGSUSED */ 2125 int 2126 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2127 { 2128 /* { 2129 syscallarg(const void *) fhp; 2130 syscallarg(size_t) fh_size; 2131 syscallarg(struct statvfs *) buf; 2132 syscallarg(int) flags; 2133 } */ 2134 struct statvfs *sb = STATVFSBUF_GET(); 2135 int error; 2136 2137 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2138 SCARG(uap, flags)); 2139 if (error == 0) 2140 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2141 STATVFSBUF_PUT(sb); 2142 return error; 2143 } 2144 2145 /* 2146 * Create a special file. 2147 */ 2148 /* ARGSUSED */ 2149 int 2150 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2151 register_t *retval) 2152 { 2153 /* { 2154 syscallarg(const char *) path; 2155 syscallarg(mode_t) mode; 2156 syscallarg(dev_t) dev; 2157 } */ 2158 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 2159 SCARG(uap, dev), retval, UIO_USERSPACE); 2160 } 2161 2162 int 2163 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2164 register_t *retval) 2165 { 2166 /* { 2167 syscallarg(int) fd; 2168 syscallarg(const char *) path; 2169 syscallarg(mode_t) mode; 2170 syscallarg(int) pad; 2171 syscallarg(dev_t) dev; 2172 } */ 2173 2174 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2175 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE); 2176 } 2177 2178 int 2179 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2180 register_t *retval, enum uio_seg seg) 2181 { 2182 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg); 2183 } 2184 2185 int 2186 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2187 dev_t dev, register_t *retval, enum uio_seg seg) 2188 { 2189 struct proc *p = l->l_proc; 2190 struct vnode *vp; 2191 struct vattr vattr; 2192 int error, optype; 2193 struct pathbuf *pb; 2194 struct nameidata nd; 2195 const char *pathstring; 2196 2197 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2198 0, NULL, NULL, NULL)) != 0) 2199 return (error); 2200 2201 optype = VOP_MKNOD_DESCOFFSET; 2202 2203 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2204 if (error) { 2205 return error; 2206 } 2207 pathstring = pathbuf_stringcopy_get(pb); 2208 if (pathstring == NULL) { 2209 pathbuf_destroy(pb); 2210 return ENOMEM; 2211 } 2212 2213 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2214 2215 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2216 goto out; 2217 vp = nd.ni_vp; 2218 2219 if (vp != NULL) 2220 error = EEXIST; 2221 else { 2222 vattr_null(&vattr); 2223 /* We will read cwdi->cwdi_cmask unlocked. */ 2224 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2225 vattr.va_rdev = dev; 2226 2227 switch (mode & S_IFMT) { 2228 case S_IFMT: /* used by badsect to flag bad sectors */ 2229 vattr.va_type = VBAD; 2230 break; 2231 case S_IFCHR: 2232 vattr.va_type = VCHR; 2233 break; 2234 case S_IFBLK: 2235 vattr.va_type = VBLK; 2236 break; 2237 case S_IFWHT: 2238 optype = VOP_WHITEOUT_DESCOFFSET; 2239 break; 2240 case S_IFREG: 2241 #if NVERIEXEC > 0 2242 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2243 O_CREAT); 2244 #endif /* NVERIEXEC > 0 */ 2245 vattr.va_type = VREG; 2246 vattr.va_rdev = VNOVAL; 2247 optype = VOP_CREATE_DESCOFFSET; 2248 break; 2249 default: 2250 error = EINVAL; 2251 break; 2252 } 2253 } 2254 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2255 && vattr.va_rdev == VNOVAL) 2256 error = EINVAL; 2257 if (!error) { 2258 switch (optype) { 2259 case VOP_WHITEOUT_DESCOFFSET: 2260 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2261 if (error) 2262 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2263 vput(nd.ni_dvp); 2264 break; 2265 2266 case VOP_MKNOD_DESCOFFSET: 2267 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2268 &nd.ni_cnd, &vattr); 2269 if (error == 0) 2270 vrele(nd.ni_vp); 2271 vput(nd.ni_dvp); 2272 break; 2273 2274 case VOP_CREATE_DESCOFFSET: 2275 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2276 &nd.ni_cnd, &vattr); 2277 if (error == 0) 2278 vrele(nd.ni_vp); 2279 vput(nd.ni_dvp); 2280 break; 2281 } 2282 } else { 2283 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2284 if (nd.ni_dvp == vp) 2285 vrele(nd.ni_dvp); 2286 else 2287 vput(nd.ni_dvp); 2288 if (vp) 2289 vrele(vp); 2290 } 2291 out: 2292 pathbuf_stringcopy_put(pb, pathstring); 2293 pathbuf_destroy(pb); 2294 return (error); 2295 } 2296 2297 /* 2298 * Create a named pipe. 2299 */ 2300 /* ARGSUSED */ 2301 int 2302 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2303 { 2304 /* { 2305 syscallarg(const char *) path; 2306 syscallarg(int) mode; 2307 } */ 2308 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2309 } 2310 2311 int 2312 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2313 register_t *retval) 2314 { 2315 /* { 2316 syscallarg(int) fd; 2317 syscallarg(const char *) path; 2318 syscallarg(int) mode; 2319 } */ 2320 2321 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2322 SCARG(uap, mode)); 2323 } 2324 2325 static int 2326 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2327 { 2328 struct proc *p = l->l_proc; 2329 struct vattr vattr; 2330 int error; 2331 struct pathbuf *pb; 2332 struct nameidata nd; 2333 2334 error = pathbuf_copyin(path, &pb); 2335 if (error) { 2336 return error; 2337 } 2338 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2339 2340 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2341 pathbuf_destroy(pb); 2342 return error; 2343 } 2344 if (nd.ni_vp != NULL) { 2345 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2346 if (nd.ni_dvp == nd.ni_vp) 2347 vrele(nd.ni_dvp); 2348 else 2349 vput(nd.ni_dvp); 2350 vrele(nd.ni_vp); 2351 pathbuf_destroy(pb); 2352 return (EEXIST); 2353 } 2354 vattr_null(&vattr); 2355 vattr.va_type = VFIFO; 2356 /* We will read cwdi->cwdi_cmask unlocked. */ 2357 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2358 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2359 if (error == 0) 2360 vrele(nd.ni_vp); 2361 vput(nd.ni_dvp); 2362 pathbuf_destroy(pb); 2363 return (error); 2364 } 2365 2366 /* 2367 * Make a hard file link. 2368 */ 2369 /* ARGSUSED */ 2370 int 2371 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2372 const char *link, int follow, register_t *retval) 2373 { 2374 struct vnode *vp; 2375 struct pathbuf *linkpb; 2376 struct nameidata nd; 2377 namei_simple_flags_t ns_flags; 2378 int error; 2379 2380 if (follow & AT_SYMLINK_FOLLOW) 2381 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2382 else 2383 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2384 2385 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2386 if (error != 0) 2387 return (error); 2388 error = pathbuf_copyin(link, &linkpb); 2389 if (error) { 2390 goto out1; 2391 } 2392 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2393 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2394 goto out2; 2395 if (nd.ni_vp) { 2396 error = EEXIST; 2397 goto abortop; 2398 } 2399 /* Prevent hard links on directories. */ 2400 if (vp->v_type == VDIR) { 2401 error = EPERM; 2402 goto abortop; 2403 } 2404 /* Prevent cross-mount operation. */ 2405 if (nd.ni_dvp->v_mount != vp->v_mount) { 2406 error = EXDEV; 2407 goto abortop; 2408 } 2409 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2410 out2: 2411 pathbuf_destroy(linkpb); 2412 out1: 2413 vrele(vp); 2414 return (error); 2415 abortop: 2416 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2417 if (nd.ni_dvp == nd.ni_vp) 2418 vrele(nd.ni_dvp); 2419 else 2420 vput(nd.ni_dvp); 2421 if (nd.ni_vp != NULL) 2422 vrele(nd.ni_vp); 2423 goto out2; 2424 } 2425 2426 int 2427 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2428 { 2429 /* { 2430 syscallarg(const char *) path; 2431 syscallarg(const char *) link; 2432 } */ 2433 const char *path = SCARG(uap, path); 2434 const char *link = SCARG(uap, link); 2435 2436 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2437 AT_SYMLINK_FOLLOW, retval); 2438 } 2439 2440 int 2441 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2442 register_t *retval) 2443 { 2444 /* { 2445 syscallarg(int) fd1; 2446 syscallarg(const char *) name1; 2447 syscallarg(int) fd2; 2448 syscallarg(const char *) name2; 2449 syscallarg(int) flags; 2450 } */ 2451 int fd1 = SCARG(uap, fd1); 2452 const char *name1 = SCARG(uap, name1); 2453 int fd2 = SCARG(uap, fd2); 2454 const char *name2 = SCARG(uap, name2); 2455 int follow; 2456 2457 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2458 2459 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2460 } 2461 2462 2463 int 2464 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2465 { 2466 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2467 } 2468 2469 static int 2470 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2471 const char *link, enum uio_seg seg) 2472 { 2473 struct proc *p = curproc; 2474 struct vattr vattr; 2475 char *path; 2476 int error; 2477 struct pathbuf *linkpb; 2478 struct nameidata nd; 2479 2480 KASSERT(l != NULL || fdat == AT_FDCWD); 2481 2482 path = PNBUF_GET(); 2483 if (seg == UIO_USERSPACE) { 2484 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2485 goto out1; 2486 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2487 goto out1; 2488 } else { 2489 KASSERT(strlen(patharg) < MAXPATHLEN); 2490 strcpy(path, patharg); 2491 linkpb = pathbuf_create(link); 2492 if (linkpb == NULL) { 2493 error = ENOMEM; 2494 goto out1; 2495 } 2496 } 2497 ktrkuser("symlink-target", path, strlen(path)); 2498 2499 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2500 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2501 goto out2; 2502 if (nd.ni_vp) { 2503 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2504 if (nd.ni_dvp == nd.ni_vp) 2505 vrele(nd.ni_dvp); 2506 else 2507 vput(nd.ni_dvp); 2508 vrele(nd.ni_vp); 2509 error = EEXIST; 2510 goto out2; 2511 } 2512 vattr_null(&vattr); 2513 vattr.va_type = VLNK; 2514 /* We will read cwdi->cwdi_cmask unlocked. */ 2515 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2516 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2517 if (error == 0) 2518 vrele(nd.ni_vp); 2519 vput(nd.ni_dvp); 2520 out2: 2521 pathbuf_destroy(linkpb); 2522 out1: 2523 PNBUF_PUT(path); 2524 return (error); 2525 } 2526 2527 /* 2528 * Make a symbolic link. 2529 */ 2530 /* ARGSUSED */ 2531 int 2532 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2533 { 2534 /* { 2535 syscallarg(const char *) path; 2536 syscallarg(const char *) link; 2537 } */ 2538 2539 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2540 UIO_USERSPACE); 2541 } 2542 2543 int 2544 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2545 register_t *retval) 2546 { 2547 /* { 2548 syscallarg(const char *) path1; 2549 syscallarg(int) fd; 2550 syscallarg(const char *) path2; 2551 } */ 2552 2553 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2554 SCARG(uap, path2), UIO_USERSPACE); 2555 } 2556 2557 /* 2558 * Delete a whiteout from the filesystem. 2559 */ 2560 /* ARGSUSED */ 2561 int 2562 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2563 { 2564 /* { 2565 syscallarg(const char *) path; 2566 } */ 2567 int error; 2568 struct pathbuf *pb; 2569 struct nameidata nd; 2570 2571 error = pathbuf_copyin(SCARG(uap, path), &pb); 2572 if (error) { 2573 return error; 2574 } 2575 2576 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2577 error = namei(&nd); 2578 if (error) { 2579 pathbuf_destroy(pb); 2580 return (error); 2581 } 2582 2583 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2584 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2585 if (nd.ni_dvp == nd.ni_vp) 2586 vrele(nd.ni_dvp); 2587 else 2588 vput(nd.ni_dvp); 2589 if (nd.ni_vp) 2590 vrele(nd.ni_vp); 2591 pathbuf_destroy(pb); 2592 return (EEXIST); 2593 } 2594 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2595 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2596 vput(nd.ni_dvp); 2597 pathbuf_destroy(pb); 2598 return (error); 2599 } 2600 2601 /* 2602 * Delete a name from the filesystem. 2603 */ 2604 /* ARGSUSED */ 2605 int 2606 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2607 { 2608 /* { 2609 syscallarg(const char *) path; 2610 } */ 2611 2612 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2613 } 2614 2615 int 2616 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2617 register_t *retval) 2618 { 2619 /* { 2620 syscallarg(int) fd; 2621 syscallarg(const char *) path; 2622 syscallarg(int) flag; 2623 } */ 2624 2625 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2626 SCARG(uap, flag), UIO_USERSPACE); 2627 } 2628 2629 int 2630 do_sys_unlink(const char *arg, enum uio_seg seg) 2631 { 2632 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2633 } 2634 2635 static int 2636 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2637 enum uio_seg seg) 2638 { 2639 struct vnode *vp; 2640 int error; 2641 struct pathbuf *pb; 2642 struct nameidata nd; 2643 const char *pathstring; 2644 2645 KASSERT(l != NULL || fdat == AT_FDCWD); 2646 2647 error = pathbuf_maybe_copyin(arg, seg, &pb); 2648 if (error) { 2649 return error; 2650 } 2651 pathstring = pathbuf_stringcopy_get(pb); 2652 if (pathstring == NULL) { 2653 pathbuf_destroy(pb); 2654 return ENOMEM; 2655 } 2656 2657 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2658 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2659 goto out; 2660 vp = nd.ni_vp; 2661 2662 /* 2663 * The root of a mounted filesystem cannot be deleted. 2664 */ 2665 if ((vp->v_vflag & VV_ROOT) != 0) { 2666 error = EBUSY; 2667 goto abort; 2668 } 2669 2670 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2671 error = EBUSY; 2672 goto abort; 2673 } 2674 2675 /* 2676 * No rmdir "." please. 2677 */ 2678 if (nd.ni_dvp == vp) { 2679 error = EINVAL; 2680 goto abort; 2681 } 2682 2683 /* 2684 * AT_REMOVEDIR is required to remove a directory 2685 */ 2686 if (vp->v_type == VDIR) { 2687 if (!(flags & AT_REMOVEDIR)) { 2688 error = EPERM; 2689 goto abort; 2690 } else { 2691 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2692 goto out; 2693 } 2694 } 2695 2696 /* 2697 * Starting here we only deal with non directories. 2698 */ 2699 if (flags & AT_REMOVEDIR) { 2700 error = ENOTDIR; 2701 goto abort; 2702 } 2703 2704 2705 #if NVERIEXEC > 0 2706 /* Handle remove requests for veriexec entries. */ 2707 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2708 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2709 if (nd.ni_dvp == vp) 2710 vrele(nd.ni_dvp); 2711 else 2712 vput(nd.ni_dvp); 2713 vput(vp); 2714 goto out; 2715 } 2716 #endif /* NVERIEXEC > 0 */ 2717 2718 #ifdef FILEASSOC 2719 (void)fileassoc_file_delete(vp); 2720 #endif /* FILEASSOC */ 2721 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2722 goto out; 2723 2724 abort: 2725 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2726 if (nd.ni_dvp == vp) 2727 vrele(nd.ni_dvp); 2728 else 2729 vput(nd.ni_dvp); 2730 vput(vp); 2731 2732 out: 2733 pathbuf_stringcopy_put(pb, pathstring); 2734 pathbuf_destroy(pb); 2735 return (error); 2736 } 2737 2738 /* 2739 * Reposition read/write file offset. 2740 */ 2741 int 2742 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2743 { 2744 /* { 2745 syscallarg(int) fd; 2746 syscallarg(int) pad; 2747 syscallarg(off_t) offset; 2748 syscallarg(int) whence; 2749 } */ 2750 kauth_cred_t cred = l->l_cred; 2751 file_t *fp; 2752 struct vnode *vp; 2753 struct vattr vattr; 2754 off_t newoff; 2755 int error, fd; 2756 2757 fd = SCARG(uap, fd); 2758 2759 if ((fp = fd_getfile(fd)) == NULL) 2760 return (EBADF); 2761 2762 vp = fp->f_data; 2763 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2764 error = ESPIPE; 2765 goto out; 2766 } 2767 2768 switch (SCARG(uap, whence)) { 2769 case SEEK_CUR: 2770 newoff = fp->f_offset + SCARG(uap, offset); 2771 break; 2772 case SEEK_END: 2773 vn_lock(vp, LK_SHARED | LK_RETRY); 2774 error = VOP_GETATTR(vp, &vattr, cred); 2775 VOP_UNLOCK(vp); 2776 if (error) { 2777 goto out; 2778 } 2779 newoff = SCARG(uap, offset) + vattr.va_size; 2780 break; 2781 case SEEK_SET: 2782 newoff = SCARG(uap, offset); 2783 break; 2784 default: 2785 error = EINVAL; 2786 goto out; 2787 } 2788 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2789 *(off_t *)retval = fp->f_offset = newoff; 2790 } 2791 out: 2792 fd_putfile(fd); 2793 return (error); 2794 } 2795 2796 /* 2797 * Positional read system call. 2798 */ 2799 int 2800 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2801 { 2802 /* { 2803 syscallarg(int) fd; 2804 syscallarg(void *) buf; 2805 syscallarg(size_t) nbyte; 2806 syscallarg(off_t) offset; 2807 } */ 2808 file_t *fp; 2809 struct vnode *vp; 2810 off_t offset; 2811 int error, fd = SCARG(uap, fd); 2812 2813 if ((fp = fd_getfile(fd)) == NULL) 2814 return (EBADF); 2815 2816 if ((fp->f_flag & FREAD) == 0) { 2817 fd_putfile(fd); 2818 return (EBADF); 2819 } 2820 2821 vp = fp->f_data; 2822 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2823 error = ESPIPE; 2824 goto out; 2825 } 2826 2827 offset = SCARG(uap, offset); 2828 2829 /* 2830 * XXX This works because no file systems actually 2831 * XXX take any action on the seek operation. 2832 */ 2833 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2834 goto out; 2835 2836 /* dofileread() will unuse the descriptor for us */ 2837 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2838 &offset, 0, retval)); 2839 2840 out: 2841 fd_putfile(fd); 2842 return (error); 2843 } 2844 2845 /* 2846 * Positional scatter read system call. 2847 */ 2848 int 2849 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2850 { 2851 /* { 2852 syscallarg(int) fd; 2853 syscallarg(const struct iovec *) iovp; 2854 syscallarg(int) iovcnt; 2855 syscallarg(off_t) offset; 2856 } */ 2857 off_t offset = SCARG(uap, offset); 2858 2859 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2860 SCARG(uap, iovcnt), &offset, 0, retval); 2861 } 2862 2863 /* 2864 * Positional write system call. 2865 */ 2866 int 2867 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2868 { 2869 /* { 2870 syscallarg(int) fd; 2871 syscallarg(const void *) buf; 2872 syscallarg(size_t) nbyte; 2873 syscallarg(off_t) offset; 2874 } */ 2875 file_t *fp; 2876 struct vnode *vp; 2877 off_t offset; 2878 int error, fd = SCARG(uap, fd); 2879 2880 if ((fp = fd_getfile(fd)) == NULL) 2881 return (EBADF); 2882 2883 if ((fp->f_flag & FWRITE) == 0) { 2884 fd_putfile(fd); 2885 return (EBADF); 2886 } 2887 2888 vp = fp->f_data; 2889 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2890 error = ESPIPE; 2891 goto out; 2892 } 2893 2894 offset = SCARG(uap, offset); 2895 2896 /* 2897 * XXX This works because no file systems actually 2898 * XXX take any action on the seek operation. 2899 */ 2900 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2901 goto out; 2902 2903 /* dofilewrite() will unuse the descriptor for us */ 2904 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2905 &offset, 0, retval)); 2906 2907 out: 2908 fd_putfile(fd); 2909 return (error); 2910 } 2911 2912 /* 2913 * Positional gather write system call. 2914 */ 2915 int 2916 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2917 { 2918 /* { 2919 syscallarg(int) fd; 2920 syscallarg(const struct iovec *) iovp; 2921 syscallarg(int) iovcnt; 2922 syscallarg(off_t) offset; 2923 } */ 2924 off_t offset = SCARG(uap, offset); 2925 2926 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2927 SCARG(uap, iovcnt), &offset, 0, retval); 2928 } 2929 2930 /* 2931 * Check access permissions. 2932 */ 2933 int 2934 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2935 { 2936 /* { 2937 syscallarg(const char *) path; 2938 syscallarg(int) flags; 2939 } */ 2940 2941 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2942 SCARG(uap, flags), 0); 2943 } 2944 2945 int 2946 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2947 int mode, int flags) 2948 { 2949 kauth_cred_t cred; 2950 struct vnode *vp; 2951 int error, nd_flag, vmode; 2952 struct pathbuf *pb; 2953 struct nameidata nd; 2954 2955 CTASSERT(F_OK == 0); 2956 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2957 /* nonsense mode */ 2958 return EINVAL; 2959 } 2960 2961 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2962 if (flags & AT_SYMLINK_NOFOLLOW) 2963 nd_flag &= ~FOLLOW; 2964 2965 error = pathbuf_copyin(path, &pb); 2966 if (error) 2967 return error; 2968 2969 NDINIT(&nd, LOOKUP, nd_flag, pb); 2970 2971 /* Override default credentials */ 2972 cred = kauth_cred_dup(l->l_cred); 2973 if (!(flags & AT_EACCESS)) { 2974 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2975 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2976 } 2977 nd.ni_cnd.cn_cred = cred; 2978 2979 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2980 pathbuf_destroy(pb); 2981 goto out; 2982 } 2983 vp = nd.ni_vp; 2984 pathbuf_destroy(pb); 2985 2986 /* Flags == 0 means only check for existence. */ 2987 if (mode) { 2988 vmode = 0; 2989 if (mode & R_OK) 2990 vmode |= VREAD; 2991 if (mode & W_OK) 2992 vmode |= VWRITE; 2993 if (mode & X_OK) 2994 vmode |= VEXEC; 2995 2996 error = VOP_ACCESS(vp, vmode, cred); 2997 if (!error && (vmode & VWRITE)) 2998 error = vn_writechk(vp); 2999 } 3000 vput(vp); 3001 out: 3002 kauth_cred_free(cred); 3003 return (error); 3004 } 3005 3006 int 3007 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3008 register_t *retval) 3009 { 3010 /* { 3011 syscallarg(int) fd; 3012 syscallarg(const char *) path; 3013 syscallarg(int) amode; 3014 syscallarg(int) flag; 3015 } */ 3016 3017 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3018 SCARG(uap, amode), SCARG(uap, flag)); 3019 } 3020 3021 /* 3022 * Common code for all sys_stat functions, including compat versions. 3023 */ 3024 int 3025 do_sys_stat(const char *userpath, unsigned int nd_flag, 3026 struct stat *sb) 3027 { 3028 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3029 } 3030 3031 int 3032 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3033 unsigned int nd_flag, struct stat *sb) 3034 { 3035 int error; 3036 struct pathbuf *pb; 3037 struct nameidata nd; 3038 3039 KASSERT(l != NULL || fdat == AT_FDCWD); 3040 3041 error = pathbuf_copyin(userpath, &pb); 3042 if (error) { 3043 return error; 3044 } 3045 3046 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3047 3048 error = fd_nameiat(l, fdat, &nd); 3049 if (error != 0) { 3050 pathbuf_destroy(pb); 3051 return error; 3052 } 3053 error = vn_stat(nd.ni_vp, sb); 3054 vput(nd.ni_vp); 3055 pathbuf_destroy(pb); 3056 return error; 3057 } 3058 3059 /* 3060 * Get file status; this version follows links. 3061 */ 3062 /* ARGSUSED */ 3063 int 3064 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3065 { 3066 /* { 3067 syscallarg(const char *) path; 3068 syscallarg(struct stat *) ub; 3069 } */ 3070 struct stat sb; 3071 int error; 3072 3073 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3074 if (error) 3075 return error; 3076 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3077 } 3078 3079 /* 3080 * Get file status; this version does not follow links. 3081 */ 3082 /* ARGSUSED */ 3083 int 3084 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3085 { 3086 /* { 3087 syscallarg(const char *) path; 3088 syscallarg(struct stat *) ub; 3089 } */ 3090 struct stat sb; 3091 int error; 3092 3093 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3094 if (error) 3095 return error; 3096 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3097 } 3098 3099 int 3100 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3101 register_t *retval) 3102 { 3103 /* { 3104 syscallarg(int) fd; 3105 syscallarg(const char *) path; 3106 syscallarg(struct stat *) buf; 3107 syscallarg(int) flag; 3108 } */ 3109 unsigned int nd_flag; 3110 struct stat sb; 3111 int error; 3112 3113 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3114 nd_flag = NOFOLLOW; 3115 else 3116 nd_flag = FOLLOW; 3117 3118 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3119 &sb); 3120 if (error) 3121 return error; 3122 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3123 } 3124 3125 /* 3126 * Get configurable pathname variables. 3127 */ 3128 /* ARGSUSED */ 3129 int 3130 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3131 { 3132 /* { 3133 syscallarg(const char *) path; 3134 syscallarg(int) name; 3135 } */ 3136 int error; 3137 struct pathbuf *pb; 3138 struct nameidata nd; 3139 3140 error = pathbuf_copyin(SCARG(uap, path), &pb); 3141 if (error) { 3142 return error; 3143 } 3144 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3145 if ((error = namei(&nd)) != 0) { 3146 pathbuf_destroy(pb); 3147 return (error); 3148 } 3149 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3150 vput(nd.ni_vp); 3151 pathbuf_destroy(pb); 3152 return (error); 3153 } 3154 3155 /* 3156 * Return target name of a symbolic link. 3157 */ 3158 /* ARGSUSED */ 3159 int 3160 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3161 register_t *retval) 3162 { 3163 /* { 3164 syscallarg(const char *) path; 3165 syscallarg(char *) buf; 3166 syscallarg(size_t) count; 3167 } */ 3168 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3169 SCARG(uap, buf), SCARG(uap, count), retval); 3170 } 3171 3172 static int 3173 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3174 size_t count, register_t *retval) 3175 { 3176 struct vnode *vp; 3177 struct iovec aiov; 3178 struct uio auio; 3179 int error; 3180 struct pathbuf *pb; 3181 struct nameidata nd; 3182 3183 error = pathbuf_copyin(path, &pb); 3184 if (error) { 3185 return error; 3186 } 3187 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3188 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3189 pathbuf_destroy(pb); 3190 return error; 3191 } 3192 vp = nd.ni_vp; 3193 pathbuf_destroy(pb); 3194 if (vp->v_type != VLNK) 3195 error = EINVAL; 3196 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3197 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3198 aiov.iov_base = buf; 3199 aiov.iov_len = count; 3200 auio.uio_iov = &aiov; 3201 auio.uio_iovcnt = 1; 3202 auio.uio_offset = 0; 3203 auio.uio_rw = UIO_READ; 3204 KASSERT(l == curlwp); 3205 auio.uio_vmspace = l->l_proc->p_vmspace; 3206 auio.uio_resid = count; 3207 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3208 *retval = count - auio.uio_resid; 3209 } 3210 vput(vp); 3211 return (error); 3212 } 3213 3214 int 3215 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3216 register_t *retval) 3217 { 3218 /* { 3219 syscallarg(int) fd; 3220 syscallarg(const char *) path; 3221 syscallarg(char *) buf; 3222 syscallarg(size_t) bufsize; 3223 } */ 3224 3225 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3226 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3227 } 3228 3229 /* 3230 * Change flags of a file given a path name. 3231 */ 3232 /* ARGSUSED */ 3233 int 3234 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3235 { 3236 /* { 3237 syscallarg(const char *) path; 3238 syscallarg(u_long) flags; 3239 } */ 3240 struct vnode *vp; 3241 int error; 3242 3243 error = namei_simple_user(SCARG(uap, path), 3244 NSM_FOLLOW_TRYEMULROOT, &vp); 3245 if (error != 0) 3246 return (error); 3247 error = change_flags(vp, SCARG(uap, flags), l); 3248 vput(vp); 3249 return (error); 3250 } 3251 3252 /* 3253 * Change flags of a file given a file descriptor. 3254 */ 3255 /* ARGSUSED */ 3256 int 3257 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3258 { 3259 /* { 3260 syscallarg(int) fd; 3261 syscallarg(u_long) flags; 3262 } */ 3263 struct vnode *vp; 3264 file_t *fp; 3265 int error; 3266 3267 /* fd_getvnode() will use the descriptor for us */ 3268 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3269 return (error); 3270 vp = fp->f_data; 3271 error = change_flags(vp, SCARG(uap, flags), l); 3272 VOP_UNLOCK(vp); 3273 fd_putfile(SCARG(uap, fd)); 3274 return (error); 3275 } 3276 3277 /* 3278 * Change flags of a file given a path name; this version does 3279 * not follow links. 3280 */ 3281 int 3282 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3283 { 3284 /* { 3285 syscallarg(const char *) path; 3286 syscallarg(u_long) flags; 3287 } */ 3288 struct vnode *vp; 3289 int error; 3290 3291 error = namei_simple_user(SCARG(uap, path), 3292 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3293 if (error != 0) 3294 return (error); 3295 error = change_flags(vp, SCARG(uap, flags), l); 3296 vput(vp); 3297 return (error); 3298 } 3299 3300 /* 3301 * Common routine to change flags of a file. 3302 */ 3303 int 3304 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3305 { 3306 struct vattr vattr; 3307 int error; 3308 3309 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3310 3311 vattr_null(&vattr); 3312 vattr.va_flags = flags; 3313 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3314 3315 return (error); 3316 } 3317 3318 /* 3319 * Change mode of a file given path name; this version follows links. 3320 */ 3321 /* ARGSUSED */ 3322 int 3323 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3324 { 3325 /* { 3326 syscallarg(const char *) path; 3327 syscallarg(int) mode; 3328 } */ 3329 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3330 SCARG(uap, mode), 0); 3331 } 3332 3333 int 3334 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3335 { 3336 int error; 3337 struct vnode *vp; 3338 namei_simple_flags_t ns_flag; 3339 3340 if (flags & AT_SYMLINK_NOFOLLOW) 3341 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3342 else 3343 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3344 3345 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3346 if (error != 0) 3347 return error; 3348 3349 error = change_mode(vp, mode, l); 3350 3351 vrele(vp); 3352 3353 return (error); 3354 } 3355 3356 /* 3357 * Change mode of a file given a file descriptor. 3358 */ 3359 /* ARGSUSED */ 3360 int 3361 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3362 { 3363 /* { 3364 syscallarg(int) fd; 3365 syscallarg(int) mode; 3366 } */ 3367 file_t *fp; 3368 int error; 3369 3370 /* fd_getvnode() will use the descriptor for us */ 3371 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3372 return (error); 3373 error = change_mode(fp->f_data, SCARG(uap, mode), l); 3374 fd_putfile(SCARG(uap, fd)); 3375 return (error); 3376 } 3377 3378 int 3379 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3380 register_t *retval) 3381 { 3382 /* { 3383 syscallarg(int) fd; 3384 syscallarg(const char *) path; 3385 syscallarg(int) mode; 3386 syscallarg(int) flag; 3387 } */ 3388 3389 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3390 SCARG(uap, mode), SCARG(uap, flag)); 3391 } 3392 3393 /* 3394 * Change mode of a file given path name; this version does not follow links. 3395 */ 3396 /* ARGSUSED */ 3397 int 3398 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3399 { 3400 /* { 3401 syscallarg(const char *) path; 3402 syscallarg(int) mode; 3403 } */ 3404 int error; 3405 struct vnode *vp; 3406 3407 error = namei_simple_user(SCARG(uap, path), 3408 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3409 if (error != 0) 3410 return (error); 3411 3412 error = change_mode(vp, SCARG(uap, mode), l); 3413 3414 vrele(vp); 3415 return (error); 3416 } 3417 3418 /* 3419 * Common routine to set mode given a vnode. 3420 */ 3421 static int 3422 change_mode(struct vnode *vp, int mode, struct lwp *l) 3423 { 3424 struct vattr vattr; 3425 int error; 3426 3427 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3428 vattr_null(&vattr); 3429 vattr.va_mode = mode & ALLPERMS; 3430 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3431 VOP_UNLOCK(vp); 3432 return (error); 3433 } 3434 3435 /* 3436 * Set ownership given a path name; this version follows links. 3437 */ 3438 /* ARGSUSED */ 3439 int 3440 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3441 { 3442 /* { 3443 syscallarg(const char *) path; 3444 syscallarg(uid_t) uid; 3445 syscallarg(gid_t) gid; 3446 } */ 3447 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3448 SCARG(uap, gid), 0); 3449 } 3450 3451 int 3452 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3453 gid_t gid, int flags) 3454 { 3455 int error; 3456 struct vnode *vp; 3457 namei_simple_flags_t ns_flag; 3458 3459 if (flags & AT_SYMLINK_NOFOLLOW) 3460 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3461 else 3462 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3463 3464 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3465 if (error != 0) 3466 return error; 3467 3468 error = change_owner(vp, uid, gid, l, 0); 3469 3470 vrele(vp); 3471 3472 return (error); 3473 } 3474 3475 /* 3476 * Set ownership given a path name; this version follows links. 3477 * Provides POSIX semantics. 3478 */ 3479 /* ARGSUSED */ 3480 int 3481 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3482 { 3483 /* { 3484 syscallarg(const char *) path; 3485 syscallarg(uid_t) uid; 3486 syscallarg(gid_t) gid; 3487 } */ 3488 int error; 3489 struct vnode *vp; 3490 3491 error = namei_simple_user(SCARG(uap, path), 3492 NSM_FOLLOW_TRYEMULROOT, &vp); 3493 if (error != 0) 3494 return (error); 3495 3496 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3497 3498 vrele(vp); 3499 return (error); 3500 } 3501 3502 /* 3503 * Set ownership given a file descriptor. 3504 */ 3505 /* ARGSUSED */ 3506 int 3507 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3508 { 3509 /* { 3510 syscallarg(int) fd; 3511 syscallarg(uid_t) uid; 3512 syscallarg(gid_t) gid; 3513 } */ 3514 int error; 3515 file_t *fp; 3516 3517 /* fd_getvnode() will use the descriptor for us */ 3518 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3519 return (error); 3520 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3521 l, 0); 3522 fd_putfile(SCARG(uap, fd)); 3523 return (error); 3524 } 3525 3526 int 3527 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3528 register_t *retval) 3529 { 3530 /* { 3531 syscallarg(int) fd; 3532 syscallarg(const char *) path; 3533 syscallarg(uid_t) owner; 3534 syscallarg(gid_t) group; 3535 syscallarg(int) flag; 3536 } */ 3537 3538 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3539 SCARG(uap, owner), SCARG(uap, group), 3540 SCARG(uap, flag)); 3541 } 3542 3543 /* 3544 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3545 */ 3546 /* ARGSUSED */ 3547 int 3548 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3549 { 3550 /* { 3551 syscallarg(int) fd; 3552 syscallarg(uid_t) uid; 3553 syscallarg(gid_t) gid; 3554 } */ 3555 int error; 3556 file_t *fp; 3557 3558 /* fd_getvnode() will use the descriptor for us */ 3559 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3560 return (error); 3561 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3562 l, 1); 3563 fd_putfile(SCARG(uap, fd)); 3564 return (error); 3565 } 3566 3567 /* 3568 * Set ownership given a path name; this version does not follow links. 3569 */ 3570 /* ARGSUSED */ 3571 int 3572 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3573 { 3574 /* { 3575 syscallarg(const char *) path; 3576 syscallarg(uid_t) uid; 3577 syscallarg(gid_t) gid; 3578 } */ 3579 int error; 3580 struct vnode *vp; 3581 3582 error = namei_simple_user(SCARG(uap, path), 3583 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3584 if (error != 0) 3585 return (error); 3586 3587 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3588 3589 vrele(vp); 3590 return (error); 3591 } 3592 3593 /* 3594 * Set ownership given a path name; this version does not follow links. 3595 * Provides POSIX/XPG semantics. 3596 */ 3597 /* ARGSUSED */ 3598 int 3599 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3600 { 3601 /* { 3602 syscallarg(const char *) path; 3603 syscallarg(uid_t) uid; 3604 syscallarg(gid_t) gid; 3605 } */ 3606 int error; 3607 struct vnode *vp; 3608 3609 error = namei_simple_user(SCARG(uap, path), 3610 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3611 if (error != 0) 3612 return (error); 3613 3614 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3615 3616 vrele(vp); 3617 return (error); 3618 } 3619 3620 /* 3621 * Common routine to set ownership given a vnode. 3622 */ 3623 static int 3624 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3625 int posix_semantics) 3626 { 3627 struct vattr vattr; 3628 mode_t newmode; 3629 int error; 3630 3631 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3632 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3633 goto out; 3634 3635 #define CHANGED(x) ((int)(x) != -1) 3636 newmode = vattr.va_mode; 3637 if (posix_semantics) { 3638 /* 3639 * POSIX/XPG semantics: if the caller is not the super-user, 3640 * clear set-user-id and set-group-id bits. Both POSIX and 3641 * the XPG consider the behaviour for calls by the super-user 3642 * implementation-defined; we leave the set-user-id and set- 3643 * group-id settings intact in that case. 3644 */ 3645 if (vattr.va_mode & S_ISUID) { 3646 if (kauth_authorize_vnode(l->l_cred, 3647 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3648 newmode &= ~S_ISUID; 3649 } 3650 if (vattr.va_mode & S_ISGID) { 3651 if (kauth_authorize_vnode(l->l_cred, 3652 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3653 newmode &= ~S_ISGID; 3654 } 3655 } else { 3656 /* 3657 * NetBSD semantics: when changing owner and/or group, 3658 * clear the respective bit(s). 3659 */ 3660 if (CHANGED(uid)) 3661 newmode &= ~S_ISUID; 3662 if (CHANGED(gid)) 3663 newmode &= ~S_ISGID; 3664 } 3665 /* Update va_mode iff altered. */ 3666 if (vattr.va_mode == newmode) 3667 newmode = VNOVAL; 3668 3669 vattr_null(&vattr); 3670 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3671 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3672 vattr.va_mode = newmode; 3673 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3674 #undef CHANGED 3675 3676 out: 3677 VOP_UNLOCK(vp); 3678 return (error); 3679 } 3680 3681 /* 3682 * Set the access and modification times given a path name; this 3683 * version follows links. 3684 */ 3685 /* ARGSUSED */ 3686 int 3687 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3688 register_t *retval) 3689 { 3690 /* { 3691 syscallarg(const char *) path; 3692 syscallarg(const struct timeval *) tptr; 3693 } */ 3694 3695 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3696 SCARG(uap, tptr), UIO_USERSPACE); 3697 } 3698 3699 /* 3700 * Set the access and modification times given a file descriptor. 3701 */ 3702 /* ARGSUSED */ 3703 int 3704 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3705 register_t *retval) 3706 { 3707 /* { 3708 syscallarg(int) fd; 3709 syscallarg(const struct timeval *) tptr; 3710 } */ 3711 int error; 3712 file_t *fp; 3713 3714 /* fd_getvnode() will use the descriptor for us */ 3715 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3716 return (error); 3717 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3718 UIO_USERSPACE); 3719 fd_putfile(SCARG(uap, fd)); 3720 return (error); 3721 } 3722 3723 int 3724 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3725 register_t *retval) 3726 { 3727 /* { 3728 syscallarg(int) fd; 3729 syscallarg(const struct timespec *) tptr; 3730 } */ 3731 int error; 3732 file_t *fp; 3733 3734 /* fd_getvnode() will use the descriptor for us */ 3735 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3736 return (error); 3737 error = do_sys_utimensat(l, AT_FDCWD, fp->f_data, NULL, 0, 3738 SCARG(uap, tptr), UIO_USERSPACE); 3739 fd_putfile(SCARG(uap, fd)); 3740 return (error); 3741 } 3742 3743 /* 3744 * Set the access and modification times given a path name; this 3745 * version does not follow links. 3746 */ 3747 int 3748 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3749 register_t *retval) 3750 { 3751 /* { 3752 syscallarg(const char *) path; 3753 syscallarg(const struct timeval *) tptr; 3754 } */ 3755 3756 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3757 SCARG(uap, tptr), UIO_USERSPACE); 3758 } 3759 3760 int 3761 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3762 register_t *retval) 3763 { 3764 /* { 3765 syscallarg(int) fd; 3766 syscallarg(const char *) path; 3767 syscallarg(const struct timespec *) tptr; 3768 syscallarg(int) flag; 3769 } */ 3770 int follow; 3771 const struct timespec *tptr; 3772 int error; 3773 3774 tptr = SCARG(uap, tptr); 3775 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3776 3777 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3778 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3779 3780 return error; 3781 } 3782 3783 /* 3784 * Common routine to set access and modification times given a vnode. 3785 */ 3786 int 3787 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3788 const struct timespec *tptr, enum uio_seg seg) 3789 { 3790 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3791 } 3792 3793 int 3794 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3795 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3796 { 3797 struct vattr vattr; 3798 int error, dorele = 0; 3799 namei_simple_flags_t sflags; 3800 bool vanull, setbirthtime; 3801 struct timespec ts[2]; 3802 3803 KASSERT(l != NULL || fdat == AT_FDCWD); 3804 3805 /* 3806 * I have checked all callers and they pass either FOLLOW, 3807 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3808 * is 0. More to the point, they don't pass anything else. 3809 * Let's keep it that way at least until the namei interfaces 3810 * are fully sanitized. 3811 */ 3812 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3813 sflags = (flag == FOLLOW) ? 3814 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3815 3816 if (tptr == NULL) { 3817 vanull = true; 3818 nanotime(&ts[0]); 3819 ts[1] = ts[0]; 3820 } else { 3821 vanull = false; 3822 if (seg != UIO_SYSSPACE) { 3823 error = copyin(tptr, ts, sizeof (ts)); 3824 if (error != 0) 3825 return error; 3826 } else { 3827 ts[0] = tptr[0]; 3828 ts[1] = tptr[1]; 3829 } 3830 } 3831 3832 if (ts[0].tv_nsec == UTIME_NOW) { 3833 nanotime(&ts[0]); 3834 if (ts[1].tv_nsec == UTIME_NOW) { 3835 vanull = true; 3836 ts[1] = ts[0]; 3837 } 3838 } else if (ts[1].tv_nsec == UTIME_NOW) 3839 nanotime(&ts[1]); 3840 3841 if (vp == NULL) { 3842 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3843 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3844 if (error != 0) 3845 return error; 3846 dorele = 1; 3847 } 3848 3849 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3850 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3851 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3852 vattr_null(&vattr); 3853 3854 if (ts[0].tv_nsec != UTIME_OMIT) 3855 vattr.va_atime = ts[0]; 3856 3857 if (ts[1].tv_nsec != UTIME_OMIT) { 3858 vattr.va_mtime = ts[1]; 3859 if (setbirthtime) 3860 vattr.va_birthtime = ts[1]; 3861 } 3862 3863 if (vanull) 3864 vattr.va_vaflags |= VA_UTIMES_NULL; 3865 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3866 VOP_UNLOCK(vp); 3867 3868 if (dorele != 0) 3869 vrele(vp); 3870 3871 return error; 3872 } 3873 3874 int 3875 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3876 const struct timeval *tptr, enum uio_seg seg) 3877 { 3878 struct timespec ts[2]; 3879 struct timespec *tsptr = NULL; 3880 int error; 3881 3882 if (tptr != NULL) { 3883 struct timeval tv[2]; 3884 3885 if (seg != UIO_SYSSPACE) { 3886 error = copyin(tptr, tv, sizeof (tv)); 3887 if (error != 0) 3888 return error; 3889 tptr = tv; 3890 } 3891 3892 if ((tv[0].tv_usec == UTIME_NOW) || 3893 (tv[0].tv_usec == UTIME_OMIT)) 3894 ts[0].tv_nsec = tv[0].tv_usec; 3895 else 3896 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3897 3898 if ((tv[1].tv_usec == UTIME_NOW) || 3899 (tv[1].tv_usec == UTIME_OMIT)) 3900 ts[1].tv_nsec = tv[1].tv_usec; 3901 else 3902 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3903 3904 tsptr = &ts[0]; 3905 } 3906 3907 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3908 } 3909 3910 /* 3911 * Truncate a file given its path name. 3912 */ 3913 /* ARGSUSED */ 3914 int 3915 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3916 { 3917 /* { 3918 syscallarg(const char *) path; 3919 syscallarg(int) pad; 3920 syscallarg(off_t) length; 3921 } */ 3922 struct vnode *vp; 3923 struct vattr vattr; 3924 int error; 3925 3926 error = namei_simple_user(SCARG(uap, path), 3927 NSM_FOLLOW_TRYEMULROOT, &vp); 3928 if (error != 0) 3929 return (error); 3930 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3931 if (vp->v_type == VDIR) 3932 error = EISDIR; 3933 else if ((error = vn_writechk(vp)) == 0 && 3934 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3935 vattr_null(&vattr); 3936 vattr.va_size = SCARG(uap, length); 3937 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3938 } 3939 vput(vp); 3940 return (error); 3941 } 3942 3943 /* 3944 * Truncate a file given a file descriptor. 3945 */ 3946 /* ARGSUSED */ 3947 int 3948 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3949 { 3950 /* { 3951 syscallarg(int) fd; 3952 syscallarg(int) pad; 3953 syscallarg(off_t) length; 3954 } */ 3955 struct vattr vattr; 3956 struct vnode *vp; 3957 file_t *fp; 3958 int error; 3959 3960 /* fd_getvnode() will use the descriptor for us */ 3961 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3962 return (error); 3963 if ((fp->f_flag & FWRITE) == 0) { 3964 error = EINVAL; 3965 goto out; 3966 } 3967 vp = fp->f_data; 3968 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3969 if (vp->v_type == VDIR) 3970 error = EISDIR; 3971 else if ((error = vn_writechk(vp)) == 0) { 3972 vattr_null(&vattr); 3973 vattr.va_size = SCARG(uap, length); 3974 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3975 } 3976 VOP_UNLOCK(vp); 3977 out: 3978 fd_putfile(SCARG(uap, fd)); 3979 return (error); 3980 } 3981 3982 /* 3983 * Sync an open file. 3984 */ 3985 /* ARGSUSED */ 3986 int 3987 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3988 { 3989 /* { 3990 syscallarg(int) fd; 3991 } */ 3992 struct vnode *vp; 3993 file_t *fp; 3994 int error; 3995 3996 /* fd_getvnode() will use the descriptor for us */ 3997 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3998 return (error); 3999 vp = fp->f_data; 4000 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4001 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4002 VOP_UNLOCK(vp); 4003 fd_putfile(SCARG(uap, fd)); 4004 return (error); 4005 } 4006 4007 /* 4008 * Sync a range of file data. API modeled after that found in AIX. 4009 * 4010 * FDATASYNC indicates that we need only save enough metadata to be able 4011 * to re-read the written data. Note we duplicate AIX's requirement that 4012 * the file be open for writing. 4013 */ 4014 /* ARGSUSED */ 4015 int 4016 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4017 { 4018 /* { 4019 syscallarg(int) fd; 4020 syscallarg(int) flags; 4021 syscallarg(off_t) start; 4022 syscallarg(off_t) length; 4023 } */ 4024 struct vnode *vp; 4025 file_t *fp; 4026 int flags, nflags; 4027 off_t s, e, len; 4028 int error; 4029 4030 /* fd_getvnode() will use the descriptor for us */ 4031 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4032 return (error); 4033 4034 if ((fp->f_flag & FWRITE) == 0) { 4035 error = EBADF; 4036 goto out; 4037 } 4038 4039 flags = SCARG(uap, flags); 4040 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4041 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4042 error = EINVAL; 4043 goto out; 4044 } 4045 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4046 if (flags & FDATASYNC) 4047 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4048 else 4049 nflags = FSYNC_WAIT; 4050 if (flags & FDISKSYNC) 4051 nflags |= FSYNC_CACHE; 4052 4053 len = SCARG(uap, length); 4054 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4055 if (len) { 4056 s = SCARG(uap, start); 4057 e = s + len; 4058 if (e < s) { 4059 error = EINVAL; 4060 goto out; 4061 } 4062 } else { 4063 e = 0; 4064 s = 0; 4065 } 4066 4067 vp = fp->f_data; 4068 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4069 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4070 VOP_UNLOCK(vp); 4071 out: 4072 fd_putfile(SCARG(uap, fd)); 4073 return (error); 4074 } 4075 4076 /* 4077 * Sync the data of an open file. 4078 */ 4079 /* ARGSUSED */ 4080 int 4081 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4082 { 4083 /* { 4084 syscallarg(int) fd; 4085 } */ 4086 struct vnode *vp; 4087 file_t *fp; 4088 int error; 4089 4090 /* fd_getvnode() will use the descriptor for us */ 4091 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4092 return (error); 4093 if ((fp->f_flag & FWRITE) == 0) { 4094 fd_putfile(SCARG(uap, fd)); 4095 return (EBADF); 4096 } 4097 vp = fp->f_data; 4098 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4099 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4100 VOP_UNLOCK(vp); 4101 fd_putfile(SCARG(uap, fd)); 4102 return (error); 4103 } 4104 4105 /* 4106 * Rename files, (standard) BSD semantics frontend. 4107 */ 4108 /* ARGSUSED */ 4109 int 4110 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4111 { 4112 /* { 4113 syscallarg(const char *) from; 4114 syscallarg(const char *) to; 4115 } */ 4116 4117 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4118 SCARG(uap, to), UIO_USERSPACE, 0)); 4119 } 4120 4121 int 4122 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4123 register_t *retval) 4124 { 4125 /* { 4126 syscallarg(int) fromfd; 4127 syscallarg(const char *) from; 4128 syscallarg(int) tofd; 4129 syscallarg(const char *) to; 4130 } */ 4131 4132 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4133 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4134 } 4135 4136 /* 4137 * Rename files, POSIX semantics frontend. 4138 */ 4139 /* ARGSUSED */ 4140 int 4141 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4142 { 4143 /* { 4144 syscallarg(const char *) from; 4145 syscallarg(const char *) to; 4146 } */ 4147 4148 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4149 SCARG(uap, to), UIO_USERSPACE, 1)); 4150 } 4151 4152 /* 4153 * Rename files. Source and destination must either both be directories, 4154 * or both not be directories. If target is a directory, it must be empty. 4155 * If `from' and `to' refer to the same object, the value of the `retain' 4156 * argument is used to determine whether `from' will be 4157 * 4158 * (retain == 0) deleted unless `from' and `to' refer to the same 4159 * object in the file system's name space (BSD). 4160 * (retain == 1) always retained (POSIX). 4161 * 4162 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4163 */ 4164 int 4165 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4166 { 4167 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4168 } 4169 4170 static int 4171 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4172 const char *to, enum uio_seg seg, int retain) 4173 { 4174 struct pathbuf *fpb, *tpb; 4175 struct nameidata fnd, tnd; 4176 struct vnode *fdvp, *fvp; 4177 struct vnode *tdvp, *tvp; 4178 struct mount *mp, *tmp; 4179 int error; 4180 4181 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4182 4183 error = pathbuf_maybe_copyin(from, seg, &fpb); 4184 if (error) 4185 goto out0; 4186 KASSERT(fpb != NULL); 4187 4188 error = pathbuf_maybe_copyin(to, seg, &tpb); 4189 if (error) 4190 goto out1; 4191 KASSERT(tpb != NULL); 4192 4193 /* 4194 * Lookup from. 4195 * 4196 * XXX LOCKPARENT is wrong because we don't actually want it 4197 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4198 * insane, so for the time being we need to leave it like this. 4199 */ 4200 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT | INRENAME), fpb); 4201 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4202 goto out2; 4203 4204 /* 4205 * Pull out the important results of the lookup, fdvp and fvp. 4206 * Of course, fvp is bogus because we're about to unlock fdvp. 4207 */ 4208 fdvp = fnd.ni_dvp; 4209 fvp = fnd.ni_vp; 4210 KASSERT(fdvp != NULL); 4211 KASSERT(fvp != NULL); 4212 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4213 4214 /* 4215 * Make sure neither fdvp nor fvp is locked. 4216 */ 4217 if (fdvp != fvp) 4218 VOP_UNLOCK(fdvp); 4219 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4220 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4221 4222 /* 4223 * Reject renaming `.' and `..'. Can't do this until after 4224 * namei because we need namei's parsing to find the final 4225 * component name. (namei should just leave us with the final 4226 * component name and not look it up itself, but anyway...) 4227 * 4228 * This was here before because we used to relookup from 4229 * instead of to and relookup requires the caller to check 4230 * this, but now file systems may depend on this check, so we 4231 * must retain it until the file systems are all rototilled. 4232 */ 4233 if (((fnd.ni_cnd.cn_namelen == 1) && 4234 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4235 ((fnd.ni_cnd.cn_namelen == 2) && 4236 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4237 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4238 error = EINVAL; /* XXX EISDIR? */ 4239 goto abort0; 4240 } 4241 4242 /* 4243 * Lookup to. 4244 * 4245 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4246 * fvp here to decide whether to add CREATEDIR is a load of 4247 * bollocks because fvp might be the wrong node by now, since 4248 * fdvp is unlocked. 4249 * 4250 * XXX Why not pass CREATEDIR always? 4251 */ 4252 NDINIT(&tnd, RENAME, 4253 (LOCKPARENT | NOCACHE | TRYEMULROOT | INRENAME | 4254 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4255 tpb); 4256 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4257 goto abort0; 4258 4259 /* 4260 * Pull out the important results of the lookup, tdvp and tvp. 4261 * Of course, tvp is bogus because we're about to unlock tdvp. 4262 */ 4263 tdvp = tnd.ni_dvp; 4264 tvp = tnd.ni_vp; 4265 KASSERT(tdvp != NULL); 4266 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4267 4268 /* 4269 * Make sure neither tdvp nor tvp is locked. 4270 */ 4271 if (tdvp != tvp) 4272 VOP_UNLOCK(tdvp); 4273 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4274 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4275 4276 /* 4277 * Reject renaming onto `.' or `..'. relookup is unhappy with 4278 * these, which is why we must do this here. Once upon a time 4279 * we relooked up from instead of to, and consequently didn't 4280 * need this check, but now that we relookup to instead of 4281 * from, we need this; and we shall need it forever forward 4282 * until the VOP_RENAME protocol changes, because file systems 4283 * will no doubt begin to depend on this check. 4284 */ 4285 if (((tnd.ni_cnd.cn_namelen == 1) && 4286 (tnd.ni_cnd.cn_nameptr[0] == '.')) || 4287 ((tnd.ni_cnd.cn_namelen == 2) && 4288 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4289 (tnd.ni_cnd.cn_nameptr[1] == '.'))) { 4290 error = EINVAL; /* XXX EISDIR? */ 4291 goto abort1; 4292 } 4293 4294 /* 4295 * Get the mount point. If the file system has been unmounted, 4296 * which it may be because we're not holding any vnode locks, 4297 * then v_mount will be NULL. We're not really supposed to 4298 * read v_mount without holding the vnode lock, but since we 4299 * have fdvp referenced, if fdvp->v_mount changes then at worst 4300 * it will be set to NULL, not changed to another mount point. 4301 * And, of course, since it is up to the file system to 4302 * determine the real lock order, we can't lock both fdvp and 4303 * tdvp at the same time. 4304 */ 4305 mp = fdvp->v_mount; 4306 if (mp == NULL) { 4307 error = ENOENT; 4308 goto abort1; 4309 } 4310 4311 /* 4312 * Make sure the mount points match. Again, although we don't 4313 * hold any vnode locks, the v_mount fields may change -- but 4314 * at worst they will change to NULL, so this will never become 4315 * a cross-device rename, because we hold vnode references. 4316 * 4317 * XXX Because nothing is locked and the compiler may reorder 4318 * things here, unmounting the file system at an inopportune 4319 * moment may cause rename to fail with ENXDEV when it really 4320 * should fail with ENOENT. 4321 */ 4322 tmp = tdvp->v_mount; 4323 if (tmp == NULL) { 4324 error = ENOENT; 4325 goto abort1; 4326 } 4327 4328 if (mp != tmp) { 4329 error = EXDEV; 4330 goto abort1; 4331 } 4332 4333 /* 4334 * Take the vfs rename lock to avoid cross-directory screw cases. 4335 * Nothing is locked currently, so taking this lock is safe. 4336 */ 4337 error = VFS_RENAMELOCK_ENTER(mp); 4338 if (error) 4339 goto abort1; 4340 4341 /* 4342 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4343 * and nothing is locked except for the vfs rename lock. 4344 * 4345 * The next step is a little rain dance to conform to the 4346 * insane lock protocol, even though it does nothing to ward 4347 * off race conditions. 4348 * 4349 * We need tdvp and tvp to be locked. However, because we have 4350 * unlocked tdvp in order to hold no locks while we take the 4351 * vfs rename lock, tvp may be wrong here, and we can't safely 4352 * lock it even if the sensible file systems will just unlock 4353 * it straight away. Consequently, we must lock tdvp and then 4354 * relookup tvp to get it locked. 4355 * 4356 * Finally, because the VOP_RENAME protocol is brain-damaged 4357 * and various file systems insanely depend on the semantics of 4358 * this brain damage, the lookup of to must be the last lookup 4359 * before VOP_RENAME. 4360 */ 4361 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4362 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4363 if (error) 4364 goto abort2; 4365 4366 /* 4367 * Drop the old tvp and pick up the new one -- which might be 4368 * the same, but that doesn't matter to us. After this, tdvp 4369 * and tvp should both be locked. 4370 */ 4371 if (tvp != NULL) 4372 vrele(tvp); 4373 tvp = tnd.ni_vp; 4374 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4375 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4376 4377 /* 4378 * The old do_sys_rename had various consistency checks here 4379 * involving fvp and tvp. fvp is bogus already here, and tvp 4380 * will become bogus soon in any sensible file system, so the 4381 * only purpose in putting these checks here is to give lip 4382 * service to these screw cases and to acknowledge that they 4383 * exist, not actually to handle them, but here you go 4384 * anyway... 4385 */ 4386 4387 /* 4388 * Acknowledge that directories and non-directories aren't 4389 * suposed to mix. 4390 */ 4391 if (tvp != NULL) { 4392 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4393 error = ENOTDIR; 4394 goto abort3; 4395 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4396 error = EISDIR; 4397 goto abort3; 4398 } 4399 } 4400 4401 /* 4402 * Acknowledge some random screw case, among the dozens that 4403 * might arise. 4404 */ 4405 if (fvp == tdvp) { 4406 error = EINVAL; 4407 goto abort3; 4408 } 4409 4410 /* 4411 * Acknowledge that POSIX has a wacky screw case. 4412 * 4413 * XXX Eventually the retain flag needs to be passed on to 4414 * VOP_RENAME. 4415 */ 4416 if (fvp == tvp) { 4417 if (retain) { 4418 error = 0; 4419 goto abort3; 4420 } else if ((fdvp == tdvp) && 4421 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4422 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4423 fnd.ni_cnd.cn_namelen))) { 4424 error = 0; 4425 goto abort3; 4426 } 4427 } 4428 4429 /* 4430 * Make sure veriexec can screw us up. (But a race can screw 4431 * up veriexec, of course -- remember, fvp and (soon) tvp are 4432 * bogus.) 4433 */ 4434 #if NVERIEXEC > 0 4435 { 4436 char *f1, *f2; 4437 size_t f1_len; 4438 size_t f2_len; 4439 4440 f1_len = fnd.ni_cnd.cn_namelen + 1; 4441 f1 = kmem_alloc(f1_len, KM_SLEEP); 4442 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4443 4444 f2_len = tnd.ni_cnd.cn_namelen + 1; 4445 f2 = kmem_alloc(f2_len, KM_SLEEP); 4446 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4447 4448 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4449 4450 kmem_free(f1, f1_len); 4451 kmem_free(f2, f2_len); 4452 4453 if (error) 4454 goto abort3; 4455 } 4456 #endif /* NVERIEXEC > 0 */ 4457 4458 /* 4459 * All ready. Incant the rename vop. 4460 */ 4461 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4462 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4463 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4464 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4465 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4466 4467 /* 4468 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4469 * tdvp and tvp. But we can't assert any of that. 4470 */ 4471 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4472 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4473 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4474 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4475 4476 /* 4477 * So all we have left to do is to drop the rename lock and 4478 * destroy the pathbufs. 4479 */ 4480 VFS_RENAMELOCK_EXIT(mp); 4481 goto out2; 4482 4483 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4484 VOP_UNLOCK(tvp); 4485 abort2: VOP_UNLOCK(tdvp); 4486 VFS_RENAMELOCK_EXIT(mp); 4487 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4488 vrele(tdvp); 4489 if (tvp != NULL) 4490 vrele(tvp); 4491 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4492 vrele(fdvp); 4493 vrele(fvp); 4494 out2: pathbuf_destroy(tpb); 4495 out1: pathbuf_destroy(fpb); 4496 out0: return error; 4497 } 4498 4499 /* 4500 * Make a directory file. 4501 */ 4502 /* ARGSUSED */ 4503 int 4504 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4505 { 4506 /* { 4507 syscallarg(const char *) path; 4508 syscallarg(int) mode; 4509 } */ 4510 4511 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4512 SCARG(uap, mode), UIO_USERSPACE); 4513 } 4514 4515 int 4516 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4517 register_t *retval) 4518 { 4519 /* { 4520 syscallarg(int) fd; 4521 syscallarg(const char *) path; 4522 syscallarg(int) mode; 4523 } */ 4524 4525 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4526 SCARG(uap, mode), UIO_USERSPACE); 4527 } 4528 4529 4530 int 4531 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4532 { 4533 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE); 4534 } 4535 4536 static int 4537 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4538 enum uio_seg seg) 4539 { 4540 struct proc *p = curlwp->l_proc; 4541 struct vnode *vp; 4542 struct vattr vattr; 4543 int error; 4544 struct pathbuf *pb; 4545 struct nameidata nd; 4546 4547 KASSERT(l != NULL || fdat == AT_FDCWD); 4548 4549 /* XXX bollocks, should pass in a pathbuf */ 4550 error = pathbuf_maybe_copyin(path, seg, &pb); 4551 if (error) { 4552 return error; 4553 } 4554 4555 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4556 4557 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4558 pathbuf_destroy(pb); 4559 return (error); 4560 } 4561 vp = nd.ni_vp; 4562 if (vp != NULL) { 4563 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4564 if (nd.ni_dvp == vp) 4565 vrele(nd.ni_dvp); 4566 else 4567 vput(nd.ni_dvp); 4568 vrele(vp); 4569 pathbuf_destroy(pb); 4570 return (EEXIST); 4571 } 4572 vattr_null(&vattr); 4573 vattr.va_type = VDIR; 4574 /* We will read cwdi->cwdi_cmask unlocked. */ 4575 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4576 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4577 if (!error) 4578 vrele(nd.ni_vp); 4579 vput(nd.ni_dvp); 4580 pathbuf_destroy(pb); 4581 return (error); 4582 } 4583 4584 /* 4585 * Remove a directory file. 4586 */ 4587 /* ARGSUSED */ 4588 int 4589 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4590 { 4591 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4592 AT_REMOVEDIR, UIO_USERSPACE); 4593 } 4594 4595 /* 4596 * Read a block of directory entries in a file system independent format. 4597 */ 4598 int 4599 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4600 { 4601 /* { 4602 syscallarg(int) fd; 4603 syscallarg(char *) buf; 4604 syscallarg(size_t) count; 4605 } */ 4606 file_t *fp; 4607 int error, done; 4608 4609 /* fd_getvnode() will use the descriptor for us */ 4610 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4611 return (error); 4612 if ((fp->f_flag & FREAD) == 0) { 4613 error = EBADF; 4614 goto out; 4615 } 4616 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4617 SCARG(uap, count), &done, l, 0, 0); 4618 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4619 *retval = done; 4620 out: 4621 fd_putfile(SCARG(uap, fd)); 4622 return (error); 4623 } 4624 4625 /* 4626 * Set the mode mask for creation of filesystem nodes. 4627 */ 4628 int 4629 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4630 { 4631 /* { 4632 syscallarg(mode_t) newmask; 4633 } */ 4634 struct proc *p = l->l_proc; 4635 struct cwdinfo *cwdi; 4636 4637 /* 4638 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4639 * important is that we serialize changes to the mask. The 4640 * rw_exit() will issue a write memory barrier on our behalf, 4641 * and force the changes out to other CPUs (as it must use an 4642 * atomic operation, draining the local CPU's store buffers). 4643 */ 4644 cwdi = p->p_cwdi; 4645 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4646 *retval = cwdi->cwdi_cmask; 4647 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4648 rw_exit(&cwdi->cwdi_lock); 4649 4650 return (0); 4651 } 4652 4653 int 4654 dorevoke(struct vnode *vp, kauth_cred_t cred) 4655 { 4656 struct vattr vattr; 4657 int error, fs_decision; 4658 4659 vn_lock(vp, LK_SHARED | LK_RETRY); 4660 error = VOP_GETATTR(vp, &vattr, cred); 4661 VOP_UNLOCK(vp); 4662 if (error != 0) 4663 return error; 4664 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4665 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4666 fs_decision); 4667 if (!error) 4668 VOP_REVOKE(vp, REVOKEALL); 4669 return (error); 4670 } 4671 4672 /* 4673 * Void all references to file by ripping underlying filesystem 4674 * away from vnode. 4675 */ 4676 /* ARGSUSED */ 4677 int 4678 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4679 { 4680 /* { 4681 syscallarg(const char *) path; 4682 } */ 4683 struct vnode *vp; 4684 int error; 4685 4686 error = namei_simple_user(SCARG(uap, path), 4687 NSM_FOLLOW_TRYEMULROOT, &vp); 4688 if (error != 0) 4689 return (error); 4690 error = dorevoke(vp, l->l_cred); 4691 vrele(vp); 4692 return (error); 4693 } 4694