1 /* $NetBSD: vfs_syscalls.c,v 1.467 2013/07/20 15:55:57 njoly Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.467 2013/07/20 15:55:57 njoly Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/proc.h> 91 #include <sys/uio.h> 92 #include <sys/kmem.h> 93 #include <sys/dirent.h> 94 #include <sys/sysctl.h> 95 #include <sys/syscallargs.h> 96 #include <sys/vfs_syscalls.h> 97 #include <sys/quota.h> 98 #include <sys/quotactl.h> 99 #include <sys/ktrace.h> 100 #ifdef FILEASSOC 101 #include <sys/fileassoc.h> 102 #endif /* FILEASSOC */ 103 #include <sys/extattr.h> 104 #include <sys/verified_exec.h> 105 #include <sys/kauth.h> 106 #include <sys/atomic.h> 107 #include <sys/module.h> 108 #include <sys/buf.h> 109 110 #include <miscfs/genfs/genfs.h> 111 #include <miscfs/syncfs/syncfs.h> 112 #include <miscfs/specfs/specdev.h> 113 114 #include <nfs/rpcv2.h> 115 #include <nfs/nfsproto.h> 116 #include <nfs/nfs.h> 117 #include <nfs/nfs_var.h> 118 119 static int change_flags(struct vnode *, u_long, struct lwp *); 120 static int change_mode(struct vnode *, int, struct lwp *l); 121 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 122 static int do_open(lwp_t *, struct vnode *, struct pathbuf *, int, int, int *); 123 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 124 static int do_sys_mknodat(struct lwp *, int, const char *, mode_t, 125 dev_t, register_t *, enum uio_seg); 126 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 127 enum uio_seg); 128 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 129 static int do_sys_chmodat(struct lwp *, int, const char *, int, int); 130 static int do_sys_chownat(struct lwp *, int, const char *, uid_t, gid_t, int); 131 static int do_sys_accessat(struct lwp *, int, const char *, int ,int); 132 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 133 enum uio_seg); 134 static int do_sys_linkat(struct lwp *, int, const char *, int, const char *, 135 int, register_t *); 136 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 137 enum uio_seg, int); 138 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 139 size_t, register_t *); 140 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 141 142 static int fd_nameiat(struct lwp *, int, struct nameidata *); 143 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 144 namei_simple_flags_t, struct vnode **); 145 146 147 /* 148 * This table is used to maintain compatibility with 4.3BSD 149 * and NetBSD 0.9 mount syscalls - and possibly other systems. 150 * Note, the order is important! 151 * 152 * Do not modify this table. It should only contain filesystems 153 * supported by NetBSD 0.9 and 4.3BSD. 154 */ 155 const char * const mountcompatnames[] = { 156 NULL, /* 0 = MOUNT_NONE */ 157 MOUNT_FFS, /* 1 = MOUNT_UFS */ 158 MOUNT_NFS, /* 2 */ 159 MOUNT_MFS, /* 3 */ 160 MOUNT_MSDOS, /* 4 */ 161 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 162 MOUNT_FDESC, /* 6 */ 163 MOUNT_KERNFS, /* 7 */ 164 NULL, /* 8 = MOUNT_DEVFS */ 165 MOUNT_AFS, /* 9 */ 166 }; 167 168 const int nmountcompatnames = __arraycount(mountcompatnames); 169 170 static int 171 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 172 { 173 file_t *dfp; 174 int error; 175 176 if (fdat != AT_FDCWD) { 177 if ((error = fd_getvnode(fdat, &dfp)) != 0) 178 goto out; 179 180 NDAT(ndp, dfp->f_data); 181 } 182 183 error = namei(ndp); 184 185 if (fdat != AT_FDCWD) 186 fd_putfile(fdat); 187 out: 188 return error; 189 } 190 191 static int 192 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 193 namei_simple_flags_t sflags, struct vnode **vp_ret) 194 { 195 file_t *dfp; 196 struct vnode *dvp; 197 int error; 198 199 if (fdat != AT_FDCWD) { 200 if ((error = fd_getvnode(fdat, &dfp)) != 0) 201 goto out; 202 203 dvp = dfp->f_data; 204 } else { 205 dvp = NULL; 206 } 207 208 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 209 210 if (fdat != AT_FDCWD) 211 fd_putfile(fdat); 212 out: 213 return error; 214 } 215 216 static int 217 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 218 { 219 int error; 220 221 fp->f_flag = flags & FMASK; 222 fp->f_type = DTYPE_VNODE; 223 fp->f_ops = &vnops; 224 fp->f_data = vp; 225 226 if (flags & (O_EXLOCK | O_SHLOCK)) { 227 struct flock lf; 228 int type; 229 230 lf.l_whence = SEEK_SET; 231 lf.l_start = 0; 232 lf.l_len = 0; 233 if (flags & O_EXLOCK) 234 lf.l_type = F_WRLCK; 235 else 236 lf.l_type = F_RDLCK; 237 type = F_FLOCK; 238 if ((flags & FNONBLOCK) == 0) 239 type |= F_WAIT; 240 VOP_UNLOCK(vp); 241 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 242 if (error) { 243 (void) vn_close(vp, fp->f_flag, fp->f_cred); 244 fd_abort(l->l_proc, fp, indx); 245 return error; 246 } 247 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 248 atomic_or_uint(&fp->f_flag, FHASLOCK); 249 } 250 if (flags & O_CLOEXEC) 251 fd_set_exclose(l, indx, true); 252 return 0; 253 } 254 255 static int 256 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 257 void *data, size_t *data_len) 258 { 259 struct mount *mp; 260 int error = 0, saved_flags; 261 262 mp = vp->v_mount; 263 saved_flags = mp->mnt_flag; 264 265 /* We can operate only on VV_ROOT nodes. */ 266 if ((vp->v_vflag & VV_ROOT) == 0) { 267 error = EINVAL; 268 goto out; 269 } 270 271 /* 272 * We only allow the filesystem to be reloaded if it 273 * is currently mounted read-only. Additionally, we 274 * prevent read-write to read-only downgrades. 275 */ 276 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 277 (mp->mnt_flag & MNT_RDONLY) == 0 && 278 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 279 error = EOPNOTSUPP; /* Needs translation */ 280 goto out; 281 } 282 283 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 284 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 285 if (error) 286 goto out; 287 288 if (vfs_busy(mp, NULL)) { 289 error = EPERM; 290 goto out; 291 } 292 293 mutex_enter(&mp->mnt_updating); 294 295 mp->mnt_flag &= ~MNT_OP_FLAGS; 296 mp->mnt_flag |= flags & MNT_OP_FLAGS; 297 298 /* 299 * Set the mount level flags. 300 */ 301 if (flags & MNT_RDONLY) 302 mp->mnt_flag |= MNT_RDONLY; 303 else if (mp->mnt_flag & MNT_RDONLY) 304 mp->mnt_iflag |= IMNT_WANTRDWR; 305 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 306 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 307 error = VFS_MOUNT(mp, path, data, data_len); 308 309 if (error && data != NULL) { 310 int error2; 311 312 /* 313 * Update failed; let's try and see if it was an 314 * export request. For compat with 3.0 and earlier. 315 */ 316 error2 = vfs_hooks_reexport(mp, path, data); 317 318 /* 319 * Only update error code if the export request was 320 * understood but some problem occurred while 321 * processing it. 322 */ 323 if (error2 != EJUSTRETURN) 324 error = error2; 325 } 326 327 if (mp->mnt_iflag & IMNT_WANTRDWR) 328 mp->mnt_flag &= ~MNT_RDONLY; 329 if (error) 330 mp->mnt_flag = saved_flags; 331 mp->mnt_flag &= ~MNT_OP_FLAGS; 332 mp->mnt_iflag &= ~IMNT_WANTRDWR; 333 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 334 if (mp->mnt_syncer == NULL) 335 error = vfs_allocate_syncvnode(mp); 336 } else { 337 if (mp->mnt_syncer != NULL) 338 vfs_deallocate_syncvnode(mp); 339 } 340 mutex_exit(&mp->mnt_updating); 341 vfs_unbusy(mp, false, NULL); 342 343 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 344 (flags & MNT_EXTATTR)) { 345 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 346 NULL, 0, NULL) != 0) { 347 printf("%s: failed to start extattr, error = %d", 348 mp->mnt_stat.f_mntonname, error); 349 mp->mnt_flag &= ~MNT_EXTATTR; 350 } 351 } 352 353 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 354 !(flags & MNT_EXTATTR)) { 355 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 356 NULL, 0, NULL) != 0) { 357 printf("%s: failed to stop extattr, error = %d", 358 mp->mnt_stat.f_mntonname, error); 359 mp->mnt_flag |= MNT_RDONLY; 360 } 361 } 362 out: 363 return (error); 364 } 365 366 static int 367 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 368 { 369 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 370 int error; 371 372 /* Copy file-system type from userspace. */ 373 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 374 if (error) { 375 /* 376 * Historically, filesystem types were identified by numbers. 377 * If we get an integer for the filesystem type instead of a 378 * string, we check to see if it matches one of the historic 379 * filesystem types. 380 */ 381 u_long fsindex = (u_long)fstype; 382 if (fsindex >= nmountcompatnames || 383 mountcompatnames[fsindex] == NULL) 384 return ENODEV; 385 strlcpy(fstypename, mountcompatnames[fsindex], 386 sizeof(fstypename)); 387 } 388 389 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 390 if (strcmp(fstypename, "ufs") == 0) 391 fstypename[0] = 'f'; 392 393 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 394 return 0; 395 396 /* If we can autoload a vfs module, try again */ 397 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 398 399 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 400 return 0; 401 402 return ENODEV; 403 } 404 405 static int 406 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 407 void *data, size_t *data_len) 408 { 409 struct mount *mp; 410 int error; 411 412 /* If MNT_GETARGS is specified, it should be the only flag. */ 413 if (flags & ~MNT_GETARGS) 414 return EINVAL; 415 416 mp = vp->v_mount; 417 418 /* XXX: probably some notion of "can see" here if we want isolation. */ 419 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 420 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 421 if (error) 422 return error; 423 424 if ((vp->v_vflag & VV_ROOT) == 0) 425 return EINVAL; 426 427 if (vfs_busy(mp, NULL)) 428 return EPERM; 429 430 mutex_enter(&mp->mnt_updating); 431 mp->mnt_flag &= ~MNT_OP_FLAGS; 432 mp->mnt_flag |= MNT_GETARGS; 433 error = VFS_MOUNT(mp, path, data, data_len); 434 mp->mnt_flag &= ~MNT_OP_FLAGS; 435 mutex_exit(&mp->mnt_updating); 436 437 vfs_unbusy(mp, false, NULL); 438 return (error); 439 } 440 441 int 442 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 443 { 444 /* { 445 syscallarg(const char *) type; 446 syscallarg(const char *) path; 447 syscallarg(int) flags; 448 syscallarg(void *) data; 449 syscallarg(size_t) data_len; 450 } */ 451 452 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 453 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 454 SCARG(uap, data_len), retval); 455 } 456 457 int 458 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 459 const char *path, int flags, void *data, enum uio_seg data_seg, 460 size_t data_len, register_t *retval) 461 { 462 struct vnode *vp; 463 void *data_buf = data; 464 bool vfsopsrele = false; 465 int error; 466 467 /* XXX: The calling convention of this routine is totally bizarre */ 468 if (vfsops) 469 vfsopsrele = true; 470 471 /* 472 * Get vnode to be covered 473 */ 474 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 475 if (error != 0) { 476 vp = NULL; 477 goto done; 478 } 479 480 if (vfsops == NULL) { 481 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 482 vfsops = vp->v_mount->mnt_op; 483 } else { 484 /* 'type' is userspace */ 485 error = mount_get_vfsops(type, &vfsops); 486 if (error != 0) 487 goto done; 488 vfsopsrele = true; 489 } 490 } 491 492 if (data != NULL && data_seg == UIO_USERSPACE) { 493 if (data_len == 0) { 494 /* No length supplied, use default for filesystem */ 495 data_len = vfsops->vfs_min_mount_data; 496 if (data_len > VFS_MAX_MOUNT_DATA) { 497 error = EINVAL; 498 goto done; 499 } 500 /* 501 * Hopefully a longer buffer won't make copyin() fail. 502 * For compatibility with 3.0 and earlier. 503 */ 504 if (flags & MNT_UPDATE 505 && data_len < sizeof (struct mnt_export_args30)) 506 data_len = sizeof (struct mnt_export_args30); 507 } 508 data_buf = kmem_alloc(data_len, KM_SLEEP); 509 510 /* NFS needs the buffer even for mnt_getargs .... */ 511 error = copyin(data, data_buf, data_len); 512 if (error != 0) 513 goto done; 514 } 515 516 if (flags & MNT_GETARGS) { 517 if (data_len == 0) { 518 error = EINVAL; 519 goto done; 520 } 521 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 522 if (error != 0) 523 goto done; 524 if (data_seg == UIO_USERSPACE) 525 error = copyout(data_buf, data, data_len); 526 *retval = data_len; 527 } else if (flags & MNT_UPDATE) { 528 error = mount_update(l, vp, path, flags, data_buf, &data_len); 529 } else { 530 /* Locking is handled internally in mount_domount(). */ 531 KASSERT(vfsopsrele == true); 532 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 533 &data_len); 534 vfsopsrele = false; 535 } 536 537 done: 538 if (vfsopsrele) 539 vfs_delref(vfsops); 540 if (vp != NULL) { 541 vrele(vp); 542 } 543 if (data_buf != data) 544 kmem_free(data_buf, data_len); 545 return (error); 546 } 547 548 /* 549 * Unmount a file system. 550 * 551 * Note: unmount takes a path to the vnode mounted on as argument, 552 * not special file (as before). 553 */ 554 /* ARGSUSED */ 555 int 556 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 557 { 558 /* { 559 syscallarg(const char *) path; 560 syscallarg(int) flags; 561 } */ 562 struct vnode *vp; 563 struct mount *mp; 564 int error; 565 struct pathbuf *pb; 566 struct nameidata nd; 567 568 error = pathbuf_copyin(SCARG(uap, path), &pb); 569 if (error) { 570 return error; 571 } 572 573 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 574 if ((error = namei(&nd)) != 0) { 575 pathbuf_destroy(pb); 576 return error; 577 } 578 vp = nd.ni_vp; 579 pathbuf_destroy(pb); 580 581 mp = vp->v_mount; 582 atomic_inc_uint(&mp->mnt_refcnt); 583 VOP_UNLOCK(vp); 584 585 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 586 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 587 if (error) { 588 vrele(vp); 589 vfs_destroy(mp); 590 return (error); 591 } 592 593 /* 594 * Don't allow unmounting the root file system. 595 */ 596 if (mp->mnt_flag & MNT_ROOTFS) { 597 vrele(vp); 598 vfs_destroy(mp); 599 return (EINVAL); 600 } 601 602 /* 603 * Must be the root of the filesystem 604 */ 605 if ((vp->v_vflag & VV_ROOT) == 0) { 606 vrele(vp); 607 vfs_destroy(mp); 608 return (EINVAL); 609 } 610 611 vrele(vp); 612 error = dounmount(mp, SCARG(uap, flags), l); 613 vfs_destroy(mp); 614 return error; 615 } 616 617 /* 618 * Sync each mounted filesystem. 619 */ 620 #ifdef DEBUG 621 int syncprt = 0; 622 struct ctldebug debug0 = { "syncprt", &syncprt }; 623 #endif 624 625 void 626 do_sys_sync(struct lwp *l) 627 { 628 struct mount *mp, *nmp; 629 int asyncflag; 630 631 mutex_enter(&mountlist_lock); 632 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 633 mp = nmp) { 634 if (vfs_busy(mp, &nmp)) { 635 continue; 636 } 637 mutex_enter(&mp->mnt_updating); 638 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 639 asyncflag = mp->mnt_flag & MNT_ASYNC; 640 mp->mnt_flag &= ~MNT_ASYNC; 641 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 642 if (asyncflag) 643 mp->mnt_flag |= MNT_ASYNC; 644 } 645 mutex_exit(&mp->mnt_updating); 646 vfs_unbusy(mp, false, &nmp); 647 } 648 mutex_exit(&mountlist_lock); 649 #ifdef DEBUG 650 if (syncprt) 651 vfs_bufstats(); 652 #endif /* DEBUG */ 653 } 654 655 /* ARGSUSED */ 656 int 657 sys_sync(struct lwp *l, const void *v, register_t *retval) 658 { 659 do_sys_sync(l); 660 return (0); 661 } 662 663 664 /* 665 * Access or change filesystem quotas. 666 * 667 * (this is really 14 different calls bundled into one) 668 */ 669 670 static int 671 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 672 { 673 struct quotastat info_k; 674 int error; 675 676 /* ensure any padding bytes are cleared */ 677 memset(&info_k, 0, sizeof(info_k)); 678 679 error = vfs_quotactl_stat(mp, &info_k); 680 if (error) { 681 return error; 682 } 683 684 return copyout(&info_k, info_u, sizeof(info_k)); 685 } 686 687 static int 688 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 689 struct quotaidtypestat *info_u) 690 { 691 struct quotaidtypestat info_k; 692 int error; 693 694 /* ensure any padding bytes are cleared */ 695 memset(&info_k, 0, sizeof(info_k)); 696 697 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 698 if (error) { 699 return error; 700 } 701 702 return copyout(&info_k, info_u, sizeof(info_k)); 703 } 704 705 static int 706 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 707 struct quotaobjtypestat *info_u) 708 { 709 struct quotaobjtypestat info_k; 710 int error; 711 712 /* ensure any padding bytes are cleared */ 713 memset(&info_k, 0, sizeof(info_k)); 714 715 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 716 if (error) { 717 return error; 718 } 719 720 return copyout(&info_k, info_u, sizeof(info_k)); 721 } 722 723 static int 724 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 725 struct quotaval *val_u) 726 { 727 struct quotakey key_k; 728 struct quotaval val_k; 729 int error; 730 731 /* ensure any padding bytes are cleared */ 732 memset(&val_k, 0, sizeof(val_k)); 733 734 error = copyin(key_u, &key_k, sizeof(key_k)); 735 if (error) { 736 return error; 737 } 738 739 error = vfs_quotactl_get(mp, &key_k, &val_k); 740 if (error) { 741 return error; 742 } 743 744 return copyout(&val_k, val_u, sizeof(val_k)); 745 } 746 747 static int 748 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 749 const struct quotaval *val_u) 750 { 751 struct quotakey key_k; 752 struct quotaval val_k; 753 int error; 754 755 error = copyin(key_u, &key_k, sizeof(key_k)); 756 if (error) { 757 return error; 758 } 759 760 error = copyin(val_u, &val_k, sizeof(val_k)); 761 if (error) { 762 return error; 763 } 764 765 return vfs_quotactl_put(mp, &key_k, &val_k); 766 } 767 768 static int 769 do_sys_quotactl_delete(struct mount *mp, const struct quotakey *key_u) 770 { 771 struct quotakey key_k; 772 int error; 773 774 error = copyin(key_u, &key_k, sizeof(key_k)); 775 if (error) { 776 return error; 777 } 778 779 return vfs_quotactl_delete(mp, &key_k); 780 } 781 782 static int 783 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 784 { 785 struct quotakcursor cursor_k; 786 int error; 787 788 /* ensure any padding bytes are cleared */ 789 memset(&cursor_k, 0, sizeof(cursor_k)); 790 791 error = vfs_quotactl_cursoropen(mp, &cursor_k); 792 if (error) { 793 return error; 794 } 795 796 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 797 } 798 799 static int 800 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 801 { 802 struct quotakcursor cursor_k; 803 int error; 804 805 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 806 if (error) { 807 return error; 808 } 809 810 return vfs_quotactl_cursorclose(mp, &cursor_k); 811 } 812 813 static int 814 do_sys_quotactl_cursorskipidtype(struct mount *mp, 815 struct quotakcursor *cursor_u, int idtype) 816 { 817 struct quotakcursor cursor_k; 818 int error; 819 820 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 821 if (error) { 822 return error; 823 } 824 825 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 826 if (error) { 827 return error; 828 } 829 830 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 831 } 832 833 static int 834 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 835 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 836 unsigned *ret_u) 837 { 838 #define CGET_STACK_MAX 8 839 struct quotakcursor cursor_k; 840 struct quotakey stackkeys[CGET_STACK_MAX]; 841 struct quotaval stackvals[CGET_STACK_MAX]; 842 struct quotakey *keys_k; 843 struct quotaval *vals_k; 844 unsigned ret_k; 845 int error; 846 847 if (maxnum > 128) { 848 maxnum = 128; 849 } 850 851 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 852 if (error) { 853 return error; 854 } 855 856 if (maxnum <= CGET_STACK_MAX) { 857 keys_k = stackkeys; 858 vals_k = stackvals; 859 /* ensure any padding bytes are cleared */ 860 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 861 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 862 } else { 863 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 864 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 865 } 866 867 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 868 &ret_k); 869 if (error) { 870 goto fail; 871 } 872 873 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 874 if (error) { 875 goto fail; 876 } 877 878 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 879 if (error) { 880 goto fail; 881 } 882 883 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 884 if (error) { 885 goto fail; 886 } 887 888 /* do last to maximize the chance of being able to recover a failure */ 889 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 890 891 fail: 892 if (keys_k != stackkeys) { 893 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 894 } 895 if (vals_k != stackvals) { 896 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 897 } 898 return error; 899 } 900 901 static int 902 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 903 int *ret_u) 904 { 905 struct quotakcursor cursor_k; 906 int ret_k; 907 int error; 908 909 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 910 if (error) { 911 return error; 912 } 913 914 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 915 if (error) { 916 return error; 917 } 918 919 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 920 if (error) { 921 return error; 922 } 923 924 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 925 } 926 927 static int 928 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 929 { 930 struct quotakcursor cursor_k; 931 int error; 932 933 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 934 if (error) { 935 return error; 936 } 937 938 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 939 if (error) { 940 return error; 941 } 942 943 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 944 } 945 946 static int 947 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 948 { 949 char *path_k; 950 int error; 951 952 /* XXX this should probably be a struct pathbuf */ 953 path_k = PNBUF_GET(); 954 error = copyin(path_u, path_k, PATH_MAX); 955 if (error) { 956 PNBUF_PUT(path_k); 957 return error; 958 } 959 960 error = vfs_quotactl_quotaon(mp, idtype, path_k); 961 962 PNBUF_PUT(path_k); 963 return error; 964 } 965 966 static int 967 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 968 { 969 return vfs_quotactl_quotaoff(mp, idtype); 970 } 971 972 int 973 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 974 { 975 struct mount *mp; 976 struct vnode *vp; 977 int error; 978 979 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 980 if (error != 0) 981 return (error); 982 mp = vp->v_mount; 983 984 switch (args->qc_op) { 985 case QUOTACTL_STAT: 986 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 987 break; 988 case QUOTACTL_IDTYPESTAT: 989 error = do_sys_quotactl_idtypestat(mp, 990 args->u.idtypestat.qc_idtype, 991 args->u.idtypestat.qc_info); 992 break; 993 case QUOTACTL_OBJTYPESTAT: 994 error = do_sys_quotactl_objtypestat(mp, 995 args->u.objtypestat.qc_objtype, 996 args->u.objtypestat.qc_info); 997 break; 998 case QUOTACTL_GET: 999 error = do_sys_quotactl_get(mp, 1000 args->u.get.qc_key, 1001 args->u.get.qc_val); 1002 break; 1003 case QUOTACTL_PUT: 1004 error = do_sys_quotactl_put(mp, 1005 args->u.put.qc_key, 1006 args->u.put.qc_val); 1007 break; 1008 case QUOTACTL_DELETE: 1009 error = do_sys_quotactl_delete(mp, args->u.delete.qc_key); 1010 break; 1011 case QUOTACTL_CURSOROPEN: 1012 error = do_sys_quotactl_cursoropen(mp, 1013 args->u.cursoropen.qc_cursor); 1014 break; 1015 case QUOTACTL_CURSORCLOSE: 1016 error = do_sys_quotactl_cursorclose(mp, 1017 args->u.cursorclose.qc_cursor); 1018 break; 1019 case QUOTACTL_CURSORSKIPIDTYPE: 1020 error = do_sys_quotactl_cursorskipidtype(mp, 1021 args->u.cursorskipidtype.qc_cursor, 1022 args->u.cursorskipidtype.qc_idtype); 1023 break; 1024 case QUOTACTL_CURSORGET: 1025 error = do_sys_quotactl_cursorget(mp, 1026 args->u.cursorget.qc_cursor, 1027 args->u.cursorget.qc_keys, 1028 args->u.cursorget.qc_vals, 1029 args->u.cursorget.qc_maxnum, 1030 args->u.cursorget.qc_ret); 1031 break; 1032 case QUOTACTL_CURSORATEND: 1033 error = do_sys_quotactl_cursoratend(mp, 1034 args->u.cursoratend.qc_cursor, 1035 args->u.cursoratend.qc_ret); 1036 break; 1037 case QUOTACTL_CURSORREWIND: 1038 error = do_sys_quotactl_cursorrewind(mp, 1039 args->u.cursorrewind.qc_cursor); 1040 break; 1041 case QUOTACTL_QUOTAON: 1042 error = do_sys_quotactl_quotaon(mp, 1043 args->u.quotaon.qc_idtype, 1044 args->u.quotaon.qc_quotafile); 1045 break; 1046 case QUOTACTL_QUOTAOFF: 1047 error = do_sys_quotactl_quotaoff(mp, 1048 args->u.quotaoff.qc_idtype); 1049 break; 1050 default: 1051 error = EINVAL; 1052 break; 1053 } 1054 1055 vrele(vp); 1056 return error; 1057 } 1058 1059 /* ARGSUSED */ 1060 int 1061 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1062 register_t *retval) 1063 { 1064 /* { 1065 syscallarg(const char *) path; 1066 syscallarg(struct quotactl_args *) args; 1067 } */ 1068 struct quotactl_args args; 1069 int error; 1070 1071 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1072 if (error) { 1073 return error; 1074 } 1075 1076 return do_sys_quotactl(SCARG(uap, path), &args); 1077 } 1078 1079 int 1080 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1081 int root) 1082 { 1083 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1084 int error = 0; 1085 1086 /* 1087 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1088 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1089 * overrides MNT_NOWAIT. 1090 */ 1091 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1092 (flags != MNT_WAIT && flags != 0)) { 1093 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1094 goto done; 1095 } 1096 1097 /* Get the filesystem stats now */ 1098 memset(sp, 0, sizeof(*sp)); 1099 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1100 return error; 1101 } 1102 1103 if (cwdi->cwdi_rdir == NULL) 1104 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1105 done: 1106 if (cwdi->cwdi_rdir != NULL) { 1107 size_t len; 1108 char *bp; 1109 char c; 1110 char *path = PNBUF_GET(); 1111 1112 bp = path + MAXPATHLEN; 1113 *--bp = '\0'; 1114 rw_enter(&cwdi->cwdi_lock, RW_READER); 1115 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1116 MAXPATHLEN / 2, 0, l); 1117 rw_exit(&cwdi->cwdi_lock); 1118 if (error) { 1119 PNBUF_PUT(path); 1120 return error; 1121 } 1122 len = strlen(bp); 1123 if (len != 1) { 1124 /* 1125 * for mount points that are below our root, we can see 1126 * them, so we fix up the pathname and return them. The 1127 * rest we cannot see, so we don't allow viewing the 1128 * data. 1129 */ 1130 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1131 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1132 (void)strlcpy(sp->f_mntonname, 1133 c == '\0' ? "/" : &sp->f_mntonname[len], 1134 sizeof(sp->f_mntonname)); 1135 } else { 1136 if (root) 1137 (void)strlcpy(sp->f_mntonname, "/", 1138 sizeof(sp->f_mntonname)); 1139 else 1140 error = EPERM; 1141 } 1142 } 1143 PNBUF_PUT(path); 1144 } 1145 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1146 return error; 1147 } 1148 1149 /* 1150 * Get filesystem statistics by path. 1151 */ 1152 int 1153 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1154 { 1155 struct mount *mp; 1156 int error; 1157 struct vnode *vp; 1158 1159 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1160 if (error != 0) 1161 return error; 1162 mp = vp->v_mount; 1163 error = dostatvfs(mp, sb, l, flags, 1); 1164 vrele(vp); 1165 return error; 1166 } 1167 1168 /* ARGSUSED */ 1169 int 1170 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1171 { 1172 /* { 1173 syscallarg(const char *) path; 1174 syscallarg(struct statvfs *) buf; 1175 syscallarg(int) flags; 1176 } */ 1177 struct statvfs *sb; 1178 int error; 1179 1180 sb = STATVFSBUF_GET(); 1181 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1182 if (error == 0) 1183 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1184 STATVFSBUF_PUT(sb); 1185 return error; 1186 } 1187 1188 /* 1189 * Get filesystem statistics by fd. 1190 */ 1191 int 1192 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1193 { 1194 file_t *fp; 1195 struct mount *mp; 1196 int error; 1197 1198 /* fd_getvnode() will use the descriptor for us */ 1199 if ((error = fd_getvnode(fd, &fp)) != 0) 1200 return (error); 1201 mp = ((struct vnode *)fp->f_data)->v_mount; 1202 error = dostatvfs(mp, sb, curlwp, flags, 1); 1203 fd_putfile(fd); 1204 return error; 1205 } 1206 1207 /* ARGSUSED */ 1208 int 1209 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1210 { 1211 /* { 1212 syscallarg(int) fd; 1213 syscallarg(struct statvfs *) buf; 1214 syscallarg(int) flags; 1215 } */ 1216 struct statvfs *sb; 1217 int error; 1218 1219 sb = STATVFSBUF_GET(); 1220 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1221 if (error == 0) 1222 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1223 STATVFSBUF_PUT(sb); 1224 return error; 1225 } 1226 1227 1228 /* 1229 * Get statistics on all filesystems. 1230 */ 1231 int 1232 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1233 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1234 register_t *retval) 1235 { 1236 int root = 0; 1237 struct proc *p = l->l_proc; 1238 struct mount *mp, *nmp; 1239 struct statvfs *sb; 1240 size_t count, maxcount; 1241 int error = 0; 1242 1243 sb = STATVFSBUF_GET(); 1244 maxcount = bufsize / entry_sz; 1245 mutex_enter(&mountlist_lock); 1246 count = 0; 1247 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1248 mp = nmp) { 1249 if (vfs_busy(mp, &nmp)) { 1250 continue; 1251 } 1252 if (sfsp && count < maxcount) { 1253 error = dostatvfs(mp, sb, l, flags, 0); 1254 if (error) { 1255 vfs_unbusy(mp, false, &nmp); 1256 error = 0; 1257 continue; 1258 } 1259 error = copyfn(sb, sfsp, entry_sz); 1260 if (error) { 1261 vfs_unbusy(mp, false, NULL); 1262 goto out; 1263 } 1264 sfsp = (char *)sfsp + entry_sz; 1265 root |= strcmp(sb->f_mntonname, "/") == 0; 1266 } 1267 count++; 1268 vfs_unbusy(mp, false, &nmp); 1269 } 1270 mutex_exit(&mountlist_lock); 1271 1272 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1273 /* 1274 * fake a root entry 1275 */ 1276 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1277 sb, l, flags, 1); 1278 if (error != 0) 1279 goto out; 1280 if (sfsp) { 1281 error = copyfn(sb, sfsp, entry_sz); 1282 if (error != 0) 1283 goto out; 1284 } 1285 count++; 1286 } 1287 if (sfsp && count > maxcount) 1288 *retval = maxcount; 1289 else 1290 *retval = count; 1291 out: 1292 STATVFSBUF_PUT(sb); 1293 return error; 1294 } 1295 1296 int 1297 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1298 { 1299 /* { 1300 syscallarg(struct statvfs *) buf; 1301 syscallarg(size_t) bufsize; 1302 syscallarg(int) flags; 1303 } */ 1304 1305 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1306 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1307 } 1308 1309 /* 1310 * Change current working directory to a given file descriptor. 1311 */ 1312 /* ARGSUSED */ 1313 int 1314 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1315 { 1316 /* { 1317 syscallarg(int) fd; 1318 } */ 1319 struct proc *p = l->l_proc; 1320 struct cwdinfo *cwdi; 1321 struct vnode *vp, *tdp; 1322 struct mount *mp; 1323 file_t *fp; 1324 int error, fd; 1325 1326 /* fd_getvnode() will use the descriptor for us */ 1327 fd = SCARG(uap, fd); 1328 if ((error = fd_getvnode(fd, &fp)) != 0) 1329 return (error); 1330 vp = fp->f_data; 1331 1332 vref(vp); 1333 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1334 if (vp->v_type != VDIR) 1335 error = ENOTDIR; 1336 else 1337 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1338 if (error) { 1339 vput(vp); 1340 goto out; 1341 } 1342 while ((mp = vp->v_mountedhere) != NULL) { 1343 error = vfs_busy(mp, NULL); 1344 vput(vp); 1345 if (error != 0) 1346 goto out; 1347 error = VFS_ROOT(mp, &tdp); 1348 vfs_unbusy(mp, false, NULL); 1349 if (error) 1350 goto out; 1351 vp = tdp; 1352 } 1353 VOP_UNLOCK(vp); 1354 1355 /* 1356 * Disallow changing to a directory not under the process's 1357 * current root directory (if there is one). 1358 */ 1359 cwdi = p->p_cwdi; 1360 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1361 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1362 vrele(vp); 1363 error = EPERM; /* operation not permitted */ 1364 } else { 1365 vrele(cwdi->cwdi_cdir); 1366 cwdi->cwdi_cdir = vp; 1367 } 1368 rw_exit(&cwdi->cwdi_lock); 1369 1370 out: 1371 fd_putfile(fd); 1372 return (error); 1373 } 1374 1375 /* 1376 * Change this process's notion of the root directory to a given file 1377 * descriptor. 1378 */ 1379 int 1380 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1381 { 1382 struct proc *p = l->l_proc; 1383 struct vnode *vp; 1384 file_t *fp; 1385 int error, fd = SCARG(uap, fd); 1386 1387 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1388 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1389 return error; 1390 /* fd_getvnode() will use the descriptor for us */ 1391 if ((error = fd_getvnode(fd, &fp)) != 0) 1392 return error; 1393 vp = fp->f_data; 1394 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1395 if (vp->v_type != VDIR) 1396 error = ENOTDIR; 1397 else 1398 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1399 VOP_UNLOCK(vp); 1400 if (error) 1401 goto out; 1402 vref(vp); 1403 1404 change_root(p->p_cwdi, vp, l); 1405 1406 out: 1407 fd_putfile(fd); 1408 return (error); 1409 } 1410 1411 /* 1412 * Change current working directory (``.''). 1413 */ 1414 /* ARGSUSED */ 1415 int 1416 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1417 { 1418 /* { 1419 syscallarg(const char *) path; 1420 } */ 1421 struct proc *p = l->l_proc; 1422 struct cwdinfo *cwdi; 1423 int error; 1424 struct vnode *vp; 1425 1426 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1427 &vp, l)) != 0) 1428 return (error); 1429 cwdi = p->p_cwdi; 1430 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1431 vrele(cwdi->cwdi_cdir); 1432 cwdi->cwdi_cdir = vp; 1433 rw_exit(&cwdi->cwdi_lock); 1434 return (0); 1435 } 1436 1437 /* 1438 * Change notion of root (``/'') directory. 1439 */ 1440 /* ARGSUSED */ 1441 int 1442 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1443 { 1444 /* { 1445 syscallarg(const char *) path; 1446 } */ 1447 struct proc *p = l->l_proc; 1448 int error; 1449 struct vnode *vp; 1450 1451 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1452 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1453 return (error); 1454 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1455 &vp, l)) != 0) 1456 return (error); 1457 1458 change_root(p->p_cwdi, vp, l); 1459 1460 return (0); 1461 } 1462 1463 /* 1464 * Common routine for chroot and fchroot. 1465 * NB: callers need to properly authorize the change root operation. 1466 */ 1467 void 1468 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1469 { 1470 struct proc *p = l->l_proc; 1471 kauth_cred_t ncred; 1472 1473 ncred = kauth_cred_alloc(); 1474 1475 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1476 if (cwdi->cwdi_rdir != NULL) 1477 vrele(cwdi->cwdi_rdir); 1478 cwdi->cwdi_rdir = vp; 1479 1480 /* 1481 * Prevent escaping from chroot by putting the root under 1482 * the working directory. Silently chdir to / if we aren't 1483 * already there. 1484 */ 1485 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1486 /* 1487 * XXX would be more failsafe to change directory to a 1488 * deadfs node here instead 1489 */ 1490 vrele(cwdi->cwdi_cdir); 1491 vref(vp); 1492 cwdi->cwdi_cdir = vp; 1493 } 1494 rw_exit(&cwdi->cwdi_lock); 1495 1496 /* Get a write lock on the process credential. */ 1497 proc_crmod_enter(); 1498 1499 kauth_cred_clone(p->p_cred, ncred); 1500 kauth_proc_chroot(ncred, p->p_cwdi); 1501 1502 /* Broadcast our credentials to the process and other LWPs. */ 1503 proc_crmod_leave(ncred, p->p_cred, true); 1504 } 1505 1506 /* 1507 * Common routine for chroot and chdir. 1508 * XXX "where" should be enum uio_seg 1509 */ 1510 int 1511 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1512 { 1513 struct pathbuf *pb; 1514 struct nameidata nd; 1515 int error; 1516 1517 error = pathbuf_maybe_copyin(path, where, &pb); 1518 if (error) { 1519 return error; 1520 } 1521 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1522 if ((error = namei(&nd)) != 0) { 1523 pathbuf_destroy(pb); 1524 return error; 1525 } 1526 *vpp = nd.ni_vp; 1527 pathbuf_destroy(pb); 1528 1529 if ((*vpp)->v_type != VDIR) 1530 error = ENOTDIR; 1531 else 1532 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1533 1534 if (error) 1535 vput(*vpp); 1536 else 1537 VOP_UNLOCK(*vpp); 1538 return (error); 1539 } 1540 1541 /* 1542 * Internals of sys_open - path has already been converted into a pathbuf 1543 * (so we can easily reuse this function from other parts of the kernel, 1544 * like posix_spawn post-processing). 1545 */ 1546 static int 1547 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1548 int open_mode, int *fd) 1549 { 1550 struct proc *p = l->l_proc; 1551 struct cwdinfo *cwdi = p->p_cwdi; 1552 file_t *fp; 1553 struct vnode *vp; 1554 int flags, cmode; 1555 int indx, error; 1556 struct nameidata nd; 1557 1558 if (open_flags & O_SEARCH) { 1559 open_flags &= ~(int)O_SEARCH; 1560 } 1561 1562 flags = FFLAGS(open_flags); 1563 if ((flags & (FREAD | FWRITE)) == 0) 1564 return EINVAL; 1565 1566 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1567 return error; 1568 } 1569 1570 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1571 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1572 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1573 if (dvp != NULL) 1574 NDAT(&nd, dvp); 1575 1576 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1577 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1578 fd_abort(p, fp, indx); 1579 if ((error == EDUPFD || error == EMOVEFD) && 1580 l->l_dupfd >= 0 && /* XXX from fdopen */ 1581 (error = 1582 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1583 *fd = indx; 1584 return 0; 1585 } 1586 if (error == ERESTART) 1587 error = EINTR; 1588 return error; 1589 } 1590 1591 l->l_dupfd = 0; 1592 vp = nd.ni_vp; 1593 1594 if ((error = open_setfp(l, fp, vp, indx, flags))) 1595 return error; 1596 1597 VOP_UNLOCK(vp); 1598 *fd = indx; 1599 fd_affix(p, fp, indx); 1600 return 0; 1601 } 1602 1603 int 1604 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1605 { 1606 struct pathbuf *pb; 1607 int error, oflags; 1608 1609 oflags = FFLAGS(open_flags); 1610 if ((oflags & (FREAD | FWRITE)) == 0) 1611 return EINVAL; 1612 1613 pb = pathbuf_create(path); 1614 if (pb == NULL) 1615 return ENOMEM; 1616 1617 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1618 pathbuf_destroy(pb); 1619 1620 return error; 1621 } 1622 1623 /* 1624 * Check permissions, allocate an open file structure, 1625 * and call the device open routine if any. 1626 */ 1627 static int 1628 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1629 int mode, int *fd) 1630 { 1631 file_t *dfp = NULL; 1632 struct vnode *dvp = NULL; 1633 struct pathbuf *pb; 1634 int error; 1635 1636 error = pathbuf_copyin(path, &pb); 1637 if (error) 1638 return error; 1639 1640 if (fdat != AT_FDCWD) { 1641 /* fd_getvnode() will use the descriptor for us */ 1642 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1643 goto out; 1644 1645 dvp = dfp->f_data; 1646 } 1647 1648 error = do_open(l, dvp, pb, flags, mode, fd); 1649 1650 if (dfp != NULL) 1651 fd_putfile(fdat); 1652 out: 1653 pathbuf_destroy(pb); 1654 return error; 1655 } 1656 1657 int 1658 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1659 { 1660 /* { 1661 syscallarg(const char *) path; 1662 syscallarg(int) flags; 1663 syscallarg(int) mode; 1664 } */ 1665 int error; 1666 int fd; 1667 1668 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1669 SCARG(uap, flags), SCARG(uap, mode), &fd); 1670 1671 if (error == 0) 1672 *retval = fd; 1673 1674 return error; 1675 } 1676 1677 int 1678 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1679 { 1680 /* { 1681 syscallarg(int) fd; 1682 syscallarg(const char *) path; 1683 syscallarg(int) oflags; 1684 syscallarg(int) mode; 1685 } */ 1686 int error; 1687 int fd; 1688 1689 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1690 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1691 1692 if (error == 0) 1693 *retval = fd; 1694 1695 return error; 1696 } 1697 1698 static void 1699 vfs__fhfree(fhandle_t *fhp) 1700 { 1701 size_t fhsize; 1702 1703 if (fhp == NULL) { 1704 return; 1705 } 1706 fhsize = FHANDLE_SIZE(fhp); 1707 kmem_free(fhp, fhsize); 1708 } 1709 1710 /* 1711 * vfs_composefh: compose a filehandle. 1712 */ 1713 1714 int 1715 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1716 { 1717 struct mount *mp; 1718 struct fid *fidp; 1719 int error; 1720 size_t needfhsize; 1721 size_t fidsize; 1722 1723 mp = vp->v_mount; 1724 fidp = NULL; 1725 if (*fh_size < FHANDLE_SIZE_MIN) { 1726 fidsize = 0; 1727 } else { 1728 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1729 if (fhp != NULL) { 1730 memset(fhp, 0, *fh_size); 1731 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1732 fidp = &fhp->fh_fid; 1733 } 1734 } 1735 error = VFS_VPTOFH(vp, fidp, &fidsize); 1736 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1737 if (error == 0 && *fh_size < needfhsize) { 1738 error = E2BIG; 1739 } 1740 *fh_size = needfhsize; 1741 return error; 1742 } 1743 1744 int 1745 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1746 { 1747 struct mount *mp; 1748 fhandle_t *fhp; 1749 size_t fhsize; 1750 size_t fidsize; 1751 int error; 1752 1753 *fhpp = NULL; 1754 mp = vp->v_mount; 1755 fidsize = 0; 1756 error = VFS_VPTOFH(vp, NULL, &fidsize); 1757 KASSERT(error != 0); 1758 if (error != E2BIG) { 1759 goto out; 1760 } 1761 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1762 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1763 if (fhp == NULL) { 1764 error = ENOMEM; 1765 goto out; 1766 } 1767 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1768 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1769 if (error == 0) { 1770 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1771 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1772 *fhpp = fhp; 1773 } else { 1774 kmem_free(fhp, fhsize); 1775 } 1776 out: 1777 return error; 1778 } 1779 1780 void 1781 vfs_composefh_free(fhandle_t *fhp) 1782 { 1783 1784 vfs__fhfree(fhp); 1785 } 1786 1787 /* 1788 * vfs_fhtovp: lookup a vnode by a filehandle. 1789 */ 1790 1791 int 1792 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1793 { 1794 struct mount *mp; 1795 int error; 1796 1797 *vpp = NULL; 1798 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1799 if (mp == NULL) { 1800 error = ESTALE; 1801 goto out; 1802 } 1803 if (mp->mnt_op->vfs_fhtovp == NULL) { 1804 error = EOPNOTSUPP; 1805 goto out; 1806 } 1807 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1808 out: 1809 return error; 1810 } 1811 1812 /* 1813 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1814 * the needed size. 1815 */ 1816 1817 int 1818 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1819 { 1820 fhandle_t *fhp; 1821 int error; 1822 1823 *fhpp = NULL; 1824 if (fhsize > FHANDLE_SIZE_MAX) { 1825 return EINVAL; 1826 } 1827 if (fhsize < FHANDLE_SIZE_MIN) { 1828 return EINVAL; 1829 } 1830 again: 1831 fhp = kmem_alloc(fhsize, KM_SLEEP); 1832 if (fhp == NULL) { 1833 return ENOMEM; 1834 } 1835 error = copyin(ufhp, fhp, fhsize); 1836 if (error == 0) { 1837 /* XXX this check shouldn't be here */ 1838 if (FHANDLE_SIZE(fhp) == fhsize) { 1839 *fhpp = fhp; 1840 return 0; 1841 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1842 /* 1843 * a kludge for nfsv2 padded handles. 1844 */ 1845 size_t sz; 1846 1847 sz = FHANDLE_SIZE(fhp); 1848 kmem_free(fhp, fhsize); 1849 fhsize = sz; 1850 goto again; 1851 } else { 1852 /* 1853 * userland told us wrong size. 1854 */ 1855 error = EINVAL; 1856 } 1857 } 1858 kmem_free(fhp, fhsize); 1859 return error; 1860 } 1861 1862 void 1863 vfs_copyinfh_free(fhandle_t *fhp) 1864 { 1865 1866 vfs__fhfree(fhp); 1867 } 1868 1869 /* 1870 * Get file handle system call 1871 */ 1872 int 1873 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1874 { 1875 /* { 1876 syscallarg(char *) fname; 1877 syscallarg(fhandle_t *) fhp; 1878 syscallarg(size_t *) fh_size; 1879 } */ 1880 struct vnode *vp; 1881 fhandle_t *fh; 1882 int error; 1883 struct pathbuf *pb; 1884 struct nameidata nd; 1885 size_t sz; 1886 size_t usz; 1887 1888 /* 1889 * Must be super user 1890 */ 1891 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1892 0, NULL, NULL, NULL); 1893 if (error) 1894 return (error); 1895 1896 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1897 if (error) { 1898 return error; 1899 } 1900 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1901 error = namei(&nd); 1902 if (error) { 1903 pathbuf_destroy(pb); 1904 return error; 1905 } 1906 vp = nd.ni_vp; 1907 pathbuf_destroy(pb); 1908 1909 error = vfs_composefh_alloc(vp, &fh); 1910 vput(vp); 1911 if (error != 0) { 1912 goto out; 1913 } 1914 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1915 if (error != 0) { 1916 goto out; 1917 } 1918 sz = FHANDLE_SIZE(fh); 1919 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1920 if (error != 0) { 1921 goto out; 1922 } 1923 if (usz >= sz) { 1924 error = copyout(fh, SCARG(uap, fhp), sz); 1925 } else { 1926 error = E2BIG; 1927 } 1928 out: 1929 vfs_composefh_free(fh); 1930 return (error); 1931 } 1932 1933 /* 1934 * Open a file given a file handle. 1935 * 1936 * Check permissions, allocate an open file structure, 1937 * and call the device open routine if any. 1938 */ 1939 1940 int 1941 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1942 register_t *retval) 1943 { 1944 file_t *fp; 1945 struct vnode *vp = NULL; 1946 kauth_cred_t cred = l->l_cred; 1947 file_t *nfp; 1948 int indx, error = 0; 1949 struct vattr va; 1950 fhandle_t *fh; 1951 int flags; 1952 proc_t *p; 1953 1954 p = curproc; 1955 1956 /* 1957 * Must be super user 1958 */ 1959 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1960 0, NULL, NULL, NULL))) 1961 return (error); 1962 1963 if (oflags & O_SEARCH) { 1964 oflags &= ~(int)O_SEARCH; 1965 } 1966 1967 flags = FFLAGS(oflags); 1968 if ((flags & (FREAD | FWRITE)) == 0) 1969 return (EINVAL); 1970 if ((flags & O_CREAT)) 1971 return (EINVAL); 1972 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1973 return (error); 1974 fp = nfp; 1975 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1976 if (error != 0) { 1977 goto bad; 1978 } 1979 error = vfs_fhtovp(fh, &vp); 1980 if (error != 0) { 1981 goto bad; 1982 } 1983 1984 /* Now do an effective vn_open */ 1985 1986 if (vp->v_type == VSOCK) { 1987 error = EOPNOTSUPP; 1988 goto bad; 1989 } 1990 error = vn_openchk(vp, cred, flags); 1991 if (error != 0) 1992 goto bad; 1993 if (flags & O_TRUNC) { 1994 VOP_UNLOCK(vp); /* XXX */ 1995 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1996 vattr_null(&va); 1997 va.va_size = 0; 1998 error = VOP_SETATTR(vp, &va, cred); 1999 if (error) 2000 goto bad; 2001 } 2002 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2003 goto bad; 2004 if (flags & FWRITE) { 2005 mutex_enter(vp->v_interlock); 2006 vp->v_writecount++; 2007 mutex_exit(vp->v_interlock); 2008 } 2009 2010 /* done with modified vn_open, now finish what sys_open does. */ 2011 if ((error = open_setfp(l, fp, vp, indx, flags))) 2012 return error; 2013 2014 VOP_UNLOCK(vp); 2015 *retval = indx; 2016 fd_affix(p, fp, indx); 2017 vfs_copyinfh_free(fh); 2018 return (0); 2019 2020 bad: 2021 fd_abort(p, fp, indx); 2022 if (vp != NULL) 2023 vput(vp); 2024 vfs_copyinfh_free(fh); 2025 return (error); 2026 } 2027 2028 int 2029 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2030 { 2031 /* { 2032 syscallarg(const void *) fhp; 2033 syscallarg(size_t) fh_size; 2034 syscallarg(int) flags; 2035 } */ 2036 2037 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2038 SCARG(uap, flags), retval); 2039 } 2040 2041 int 2042 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2043 { 2044 int error; 2045 fhandle_t *fh; 2046 struct vnode *vp; 2047 2048 /* 2049 * Must be super user 2050 */ 2051 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2052 0, NULL, NULL, NULL))) 2053 return (error); 2054 2055 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2056 if (error != 0) 2057 return error; 2058 2059 error = vfs_fhtovp(fh, &vp); 2060 vfs_copyinfh_free(fh); 2061 if (error != 0) 2062 return error; 2063 2064 error = vn_stat(vp, sb); 2065 vput(vp); 2066 return error; 2067 } 2068 2069 2070 /* ARGSUSED */ 2071 int 2072 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2073 { 2074 /* { 2075 syscallarg(const void *) fhp; 2076 syscallarg(size_t) fh_size; 2077 syscallarg(struct stat *) sb; 2078 } */ 2079 struct stat sb; 2080 int error; 2081 2082 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2083 if (error) 2084 return error; 2085 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2086 } 2087 2088 int 2089 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2090 int flags) 2091 { 2092 fhandle_t *fh; 2093 struct mount *mp; 2094 struct vnode *vp; 2095 int error; 2096 2097 /* 2098 * Must be super user 2099 */ 2100 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2101 0, NULL, NULL, NULL))) 2102 return error; 2103 2104 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2105 if (error != 0) 2106 return error; 2107 2108 error = vfs_fhtovp(fh, &vp); 2109 vfs_copyinfh_free(fh); 2110 if (error != 0) 2111 return error; 2112 2113 mp = vp->v_mount; 2114 error = dostatvfs(mp, sb, l, flags, 1); 2115 vput(vp); 2116 return error; 2117 } 2118 2119 /* ARGSUSED */ 2120 int 2121 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2122 { 2123 /* { 2124 syscallarg(const void *) fhp; 2125 syscallarg(size_t) fh_size; 2126 syscallarg(struct statvfs *) buf; 2127 syscallarg(int) flags; 2128 } */ 2129 struct statvfs *sb = STATVFSBUF_GET(); 2130 int error; 2131 2132 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2133 SCARG(uap, flags)); 2134 if (error == 0) 2135 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2136 STATVFSBUF_PUT(sb); 2137 return error; 2138 } 2139 2140 /* 2141 * Create a special file. 2142 */ 2143 /* ARGSUSED */ 2144 int 2145 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2146 register_t *retval) 2147 { 2148 /* { 2149 syscallarg(const char *) path; 2150 syscallarg(mode_t) mode; 2151 syscallarg(dev_t) dev; 2152 } */ 2153 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 2154 SCARG(uap, dev), retval, UIO_USERSPACE); 2155 } 2156 2157 int 2158 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2159 register_t *retval) 2160 { 2161 /* { 2162 syscallarg(int) fd; 2163 syscallarg(const char *) path; 2164 syscallarg(mode_t) mode; 2165 syscallarg(uint32_t) dev; 2166 } */ 2167 2168 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2169 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE); 2170 } 2171 2172 int 2173 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2174 register_t *retval, enum uio_seg seg) 2175 { 2176 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg); 2177 } 2178 2179 int 2180 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2181 dev_t dev, register_t *retval, enum uio_seg seg) 2182 { 2183 struct proc *p = l->l_proc; 2184 struct vnode *vp; 2185 struct vattr vattr; 2186 int error, optype; 2187 struct pathbuf *pb; 2188 struct nameidata nd; 2189 const char *pathstring; 2190 2191 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2192 0, NULL, NULL, NULL)) != 0) 2193 return (error); 2194 2195 optype = VOP_MKNOD_DESCOFFSET; 2196 2197 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2198 if (error) { 2199 return error; 2200 } 2201 pathstring = pathbuf_stringcopy_get(pb); 2202 if (pathstring == NULL) { 2203 pathbuf_destroy(pb); 2204 return ENOMEM; 2205 } 2206 2207 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2208 2209 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2210 goto out; 2211 vp = nd.ni_vp; 2212 2213 if (vp != NULL) 2214 error = EEXIST; 2215 else { 2216 vattr_null(&vattr); 2217 /* We will read cwdi->cwdi_cmask unlocked. */ 2218 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2219 vattr.va_rdev = dev; 2220 2221 switch (mode & S_IFMT) { 2222 case S_IFMT: /* used by badsect to flag bad sectors */ 2223 vattr.va_type = VBAD; 2224 break; 2225 case S_IFCHR: 2226 vattr.va_type = VCHR; 2227 break; 2228 case S_IFBLK: 2229 vattr.va_type = VBLK; 2230 break; 2231 case S_IFWHT: 2232 optype = VOP_WHITEOUT_DESCOFFSET; 2233 break; 2234 case S_IFREG: 2235 #if NVERIEXEC > 0 2236 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2237 O_CREAT); 2238 #endif /* NVERIEXEC > 0 */ 2239 vattr.va_type = VREG; 2240 vattr.va_rdev = VNOVAL; 2241 optype = VOP_CREATE_DESCOFFSET; 2242 break; 2243 default: 2244 error = EINVAL; 2245 break; 2246 } 2247 } 2248 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2249 && vattr.va_rdev == VNOVAL) 2250 error = EINVAL; 2251 if (!error) { 2252 switch (optype) { 2253 case VOP_WHITEOUT_DESCOFFSET: 2254 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2255 if (error) 2256 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2257 vput(nd.ni_dvp); 2258 break; 2259 2260 case VOP_MKNOD_DESCOFFSET: 2261 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2262 &nd.ni_cnd, &vattr); 2263 if (error == 0) 2264 vput(nd.ni_vp); 2265 break; 2266 2267 case VOP_CREATE_DESCOFFSET: 2268 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2269 &nd.ni_cnd, &vattr); 2270 if (error == 0) 2271 vput(nd.ni_vp); 2272 break; 2273 } 2274 } else { 2275 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2276 if (nd.ni_dvp == vp) 2277 vrele(nd.ni_dvp); 2278 else 2279 vput(nd.ni_dvp); 2280 if (vp) 2281 vrele(vp); 2282 } 2283 out: 2284 pathbuf_stringcopy_put(pb, pathstring); 2285 pathbuf_destroy(pb); 2286 return (error); 2287 } 2288 2289 /* 2290 * Create a named pipe. 2291 */ 2292 /* ARGSUSED */ 2293 int 2294 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2295 { 2296 /* { 2297 syscallarg(const char *) path; 2298 syscallarg(int) mode; 2299 } */ 2300 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2301 } 2302 2303 int 2304 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2305 register_t *retval) 2306 { 2307 /* { 2308 syscallarg(int) fd; 2309 syscallarg(const char *) path; 2310 syscallarg(int) mode; 2311 } */ 2312 2313 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2314 SCARG(uap, mode)); 2315 } 2316 2317 static int 2318 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2319 { 2320 struct proc *p = l->l_proc; 2321 struct vattr vattr; 2322 int error; 2323 struct pathbuf *pb; 2324 struct nameidata nd; 2325 2326 error = pathbuf_copyin(path, &pb); 2327 if (error) { 2328 return error; 2329 } 2330 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2331 2332 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2333 pathbuf_destroy(pb); 2334 return error; 2335 } 2336 if (nd.ni_vp != NULL) { 2337 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2338 if (nd.ni_dvp == nd.ni_vp) 2339 vrele(nd.ni_dvp); 2340 else 2341 vput(nd.ni_dvp); 2342 vrele(nd.ni_vp); 2343 pathbuf_destroy(pb); 2344 return (EEXIST); 2345 } 2346 vattr_null(&vattr); 2347 vattr.va_type = VFIFO; 2348 /* We will read cwdi->cwdi_cmask unlocked. */ 2349 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2350 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2351 if (error == 0) 2352 vput(nd.ni_vp); 2353 pathbuf_destroy(pb); 2354 return (error); 2355 } 2356 2357 /* 2358 * Make a hard file link. 2359 */ 2360 /* ARGSUSED */ 2361 static int 2362 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2363 const char *link, int follow, register_t *retval) 2364 { 2365 struct vnode *vp; 2366 struct pathbuf *linkpb; 2367 struct nameidata nd; 2368 namei_simple_flags_t ns_flags; 2369 int error; 2370 2371 if (follow & AT_SYMLINK_FOLLOW) 2372 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2373 else 2374 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2375 2376 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2377 if (error != 0) 2378 return (error); 2379 error = pathbuf_copyin(link, &linkpb); 2380 if (error) { 2381 goto out1; 2382 } 2383 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2384 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2385 goto out2; 2386 if (nd.ni_vp) { 2387 error = EEXIST; 2388 goto abortop; 2389 } 2390 /* Prevent hard links on directories. */ 2391 if (vp->v_type == VDIR) { 2392 error = EPERM; 2393 goto abortop; 2394 } 2395 /* Prevent cross-mount operation. */ 2396 if (nd.ni_dvp->v_mount != vp->v_mount) { 2397 error = EXDEV; 2398 goto abortop; 2399 } 2400 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2401 out2: 2402 pathbuf_destroy(linkpb); 2403 out1: 2404 vrele(vp); 2405 return (error); 2406 abortop: 2407 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2408 if (nd.ni_dvp == nd.ni_vp) 2409 vrele(nd.ni_dvp); 2410 else 2411 vput(nd.ni_dvp); 2412 if (nd.ni_vp != NULL) 2413 vrele(nd.ni_vp); 2414 goto out2; 2415 } 2416 2417 int 2418 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2419 { 2420 /* { 2421 syscallarg(const char *) path; 2422 syscallarg(const char *) link; 2423 } */ 2424 const char *path = SCARG(uap, path); 2425 const char *link = SCARG(uap, link); 2426 2427 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2428 AT_SYMLINK_FOLLOW, retval); 2429 } 2430 2431 int 2432 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2433 register_t *retval) 2434 { 2435 /* { 2436 syscallarg(int) fd1; 2437 syscallarg(const char *) name1; 2438 syscallarg(int) fd2; 2439 syscallarg(const char *) name2; 2440 syscallarg(int) flags; 2441 } */ 2442 int fd1 = SCARG(uap, fd1); 2443 const char *name1 = SCARG(uap, name1); 2444 int fd2 = SCARG(uap, fd2); 2445 const char *name2 = SCARG(uap, name2); 2446 int follow; 2447 2448 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2449 2450 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2451 } 2452 2453 2454 int 2455 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2456 { 2457 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2458 } 2459 2460 static int 2461 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2462 const char *link, enum uio_seg seg) 2463 { 2464 struct proc *p = curproc; 2465 struct vattr vattr; 2466 char *path; 2467 int error; 2468 struct pathbuf *linkpb; 2469 struct nameidata nd; 2470 2471 KASSERT(l != NULL || fdat == AT_FDCWD); 2472 2473 path = PNBUF_GET(); 2474 if (seg == UIO_USERSPACE) { 2475 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2476 goto out1; 2477 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2478 goto out1; 2479 } else { 2480 KASSERT(strlen(patharg) < MAXPATHLEN); 2481 strcpy(path, patharg); 2482 linkpb = pathbuf_create(link); 2483 if (linkpb == NULL) { 2484 error = ENOMEM; 2485 goto out1; 2486 } 2487 } 2488 ktrkuser("symlink-target", path, strlen(path)); 2489 2490 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2491 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2492 goto out2; 2493 if (nd.ni_vp) { 2494 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2495 if (nd.ni_dvp == nd.ni_vp) 2496 vrele(nd.ni_dvp); 2497 else 2498 vput(nd.ni_dvp); 2499 vrele(nd.ni_vp); 2500 error = EEXIST; 2501 goto out2; 2502 } 2503 vattr_null(&vattr); 2504 vattr.va_type = VLNK; 2505 /* We will read cwdi->cwdi_cmask unlocked. */ 2506 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2507 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2508 if (error == 0) 2509 vput(nd.ni_vp); 2510 out2: 2511 pathbuf_destroy(linkpb); 2512 out1: 2513 PNBUF_PUT(path); 2514 return (error); 2515 } 2516 2517 /* 2518 * Make a symbolic link. 2519 */ 2520 /* ARGSUSED */ 2521 int 2522 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2523 { 2524 /* { 2525 syscallarg(const char *) path; 2526 syscallarg(const char *) link; 2527 } */ 2528 2529 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2530 UIO_USERSPACE); 2531 } 2532 2533 int 2534 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2535 register_t *retval) 2536 { 2537 /* { 2538 syscallarg(const char *) path1; 2539 syscallarg(int) fd; 2540 syscallarg(const char *) path2; 2541 } */ 2542 2543 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2544 SCARG(uap, path2), UIO_USERSPACE); 2545 } 2546 2547 /* 2548 * Delete a whiteout from the filesystem. 2549 */ 2550 /* ARGSUSED */ 2551 int 2552 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2553 { 2554 /* { 2555 syscallarg(const char *) path; 2556 } */ 2557 int error; 2558 struct pathbuf *pb; 2559 struct nameidata nd; 2560 2561 error = pathbuf_copyin(SCARG(uap, path), &pb); 2562 if (error) { 2563 return error; 2564 } 2565 2566 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2567 error = namei(&nd); 2568 if (error) { 2569 pathbuf_destroy(pb); 2570 return (error); 2571 } 2572 2573 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2574 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2575 if (nd.ni_dvp == nd.ni_vp) 2576 vrele(nd.ni_dvp); 2577 else 2578 vput(nd.ni_dvp); 2579 if (nd.ni_vp) 2580 vrele(nd.ni_vp); 2581 pathbuf_destroy(pb); 2582 return (EEXIST); 2583 } 2584 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2585 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2586 vput(nd.ni_dvp); 2587 pathbuf_destroy(pb); 2588 return (error); 2589 } 2590 2591 /* 2592 * Delete a name from the filesystem. 2593 */ 2594 /* ARGSUSED */ 2595 int 2596 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2597 { 2598 /* { 2599 syscallarg(const char *) path; 2600 } */ 2601 2602 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2603 } 2604 2605 int 2606 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2607 register_t *retval) 2608 { 2609 /* { 2610 syscallarg(int) fd; 2611 syscallarg(const char *) path; 2612 syscallarg(int) flag; 2613 } */ 2614 2615 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2616 SCARG(uap, flag), UIO_USERSPACE); 2617 } 2618 2619 int 2620 do_sys_unlink(const char *arg, enum uio_seg seg) 2621 { 2622 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2623 } 2624 2625 static int 2626 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2627 enum uio_seg seg) 2628 { 2629 struct vnode *vp; 2630 int error; 2631 struct pathbuf *pb; 2632 struct nameidata nd; 2633 const char *pathstring; 2634 2635 KASSERT(l != NULL || fdat == AT_FDCWD); 2636 2637 error = pathbuf_maybe_copyin(arg, seg, &pb); 2638 if (error) { 2639 return error; 2640 } 2641 pathstring = pathbuf_stringcopy_get(pb); 2642 if (pathstring == NULL) { 2643 pathbuf_destroy(pb); 2644 return ENOMEM; 2645 } 2646 2647 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2648 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2649 goto out; 2650 vp = nd.ni_vp; 2651 2652 /* 2653 * The root of a mounted filesystem cannot be deleted. 2654 */ 2655 if ((vp->v_vflag & VV_ROOT) != 0) { 2656 error = EBUSY; 2657 goto abort; 2658 } 2659 2660 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2661 error = EBUSY; 2662 goto abort; 2663 } 2664 2665 /* 2666 * No rmdir "." please. 2667 */ 2668 if (nd.ni_dvp == vp) { 2669 error = EINVAL; 2670 goto abort; 2671 } 2672 2673 /* 2674 * AT_REMOVEDIR is required to remove a directory 2675 */ 2676 if (vp->v_type == VDIR) { 2677 if (!(flags & AT_REMOVEDIR)) { 2678 error = EPERM; 2679 goto abort; 2680 } else { 2681 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2682 goto out; 2683 } 2684 } 2685 2686 /* 2687 * Starting here we only deal with non directories. 2688 */ 2689 if (flags & AT_REMOVEDIR) { 2690 error = ENOTDIR; 2691 goto abort; 2692 } 2693 2694 2695 #if NVERIEXEC > 0 2696 /* Handle remove requests for veriexec entries. */ 2697 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2698 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2699 if (nd.ni_dvp == vp) 2700 vrele(nd.ni_dvp); 2701 else 2702 vput(nd.ni_dvp); 2703 vput(vp); 2704 goto out; 2705 } 2706 #endif /* NVERIEXEC > 0 */ 2707 2708 #ifdef FILEASSOC 2709 (void)fileassoc_file_delete(vp); 2710 #endif /* FILEASSOC */ 2711 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2712 goto out; 2713 2714 abort: 2715 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2716 if (nd.ni_dvp == vp) 2717 vrele(nd.ni_dvp); 2718 else 2719 vput(nd.ni_dvp); 2720 vput(vp); 2721 2722 out: 2723 pathbuf_stringcopy_put(pb, pathstring); 2724 pathbuf_destroy(pb); 2725 return (error); 2726 } 2727 2728 /* 2729 * Reposition read/write file offset. 2730 */ 2731 int 2732 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2733 { 2734 /* { 2735 syscallarg(int) fd; 2736 syscallarg(int) pad; 2737 syscallarg(off_t) offset; 2738 syscallarg(int) whence; 2739 } */ 2740 kauth_cred_t cred = l->l_cred; 2741 file_t *fp; 2742 struct vnode *vp; 2743 struct vattr vattr; 2744 off_t newoff; 2745 int error, fd; 2746 2747 fd = SCARG(uap, fd); 2748 2749 if ((fp = fd_getfile(fd)) == NULL) 2750 return (EBADF); 2751 2752 vp = fp->f_data; 2753 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2754 error = ESPIPE; 2755 goto out; 2756 } 2757 2758 switch (SCARG(uap, whence)) { 2759 case SEEK_CUR: 2760 newoff = fp->f_offset + SCARG(uap, offset); 2761 break; 2762 case SEEK_END: 2763 vn_lock(vp, LK_SHARED | LK_RETRY); 2764 error = VOP_GETATTR(vp, &vattr, cred); 2765 VOP_UNLOCK(vp); 2766 if (error) { 2767 goto out; 2768 } 2769 newoff = SCARG(uap, offset) + vattr.va_size; 2770 break; 2771 case SEEK_SET: 2772 newoff = SCARG(uap, offset); 2773 break; 2774 default: 2775 error = EINVAL; 2776 goto out; 2777 } 2778 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2779 *(off_t *)retval = fp->f_offset = newoff; 2780 } 2781 out: 2782 fd_putfile(fd); 2783 return (error); 2784 } 2785 2786 /* 2787 * Positional read system call. 2788 */ 2789 int 2790 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2791 { 2792 /* { 2793 syscallarg(int) fd; 2794 syscallarg(void *) buf; 2795 syscallarg(size_t) nbyte; 2796 syscallarg(off_t) offset; 2797 } */ 2798 file_t *fp; 2799 struct vnode *vp; 2800 off_t offset; 2801 int error, fd = SCARG(uap, fd); 2802 2803 if ((fp = fd_getfile(fd)) == NULL) 2804 return (EBADF); 2805 2806 if ((fp->f_flag & FREAD) == 0) { 2807 fd_putfile(fd); 2808 return (EBADF); 2809 } 2810 2811 vp = fp->f_data; 2812 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2813 error = ESPIPE; 2814 goto out; 2815 } 2816 2817 offset = SCARG(uap, offset); 2818 2819 /* 2820 * XXX This works because no file systems actually 2821 * XXX take any action on the seek operation. 2822 */ 2823 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2824 goto out; 2825 2826 /* dofileread() will unuse the descriptor for us */ 2827 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2828 &offset, 0, retval)); 2829 2830 out: 2831 fd_putfile(fd); 2832 return (error); 2833 } 2834 2835 /* 2836 * Positional scatter read system call. 2837 */ 2838 int 2839 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2840 { 2841 /* { 2842 syscallarg(int) fd; 2843 syscallarg(const struct iovec *) iovp; 2844 syscallarg(int) iovcnt; 2845 syscallarg(off_t) offset; 2846 } */ 2847 off_t offset = SCARG(uap, offset); 2848 2849 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2850 SCARG(uap, iovcnt), &offset, 0, retval); 2851 } 2852 2853 /* 2854 * Positional write system call. 2855 */ 2856 int 2857 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2858 { 2859 /* { 2860 syscallarg(int) fd; 2861 syscallarg(const void *) buf; 2862 syscallarg(size_t) nbyte; 2863 syscallarg(off_t) offset; 2864 } */ 2865 file_t *fp; 2866 struct vnode *vp; 2867 off_t offset; 2868 int error, fd = SCARG(uap, fd); 2869 2870 if ((fp = fd_getfile(fd)) == NULL) 2871 return (EBADF); 2872 2873 if ((fp->f_flag & FWRITE) == 0) { 2874 fd_putfile(fd); 2875 return (EBADF); 2876 } 2877 2878 vp = fp->f_data; 2879 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2880 error = ESPIPE; 2881 goto out; 2882 } 2883 2884 offset = SCARG(uap, offset); 2885 2886 /* 2887 * XXX This works because no file systems actually 2888 * XXX take any action on the seek operation. 2889 */ 2890 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2891 goto out; 2892 2893 /* dofilewrite() will unuse the descriptor for us */ 2894 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2895 &offset, 0, retval)); 2896 2897 out: 2898 fd_putfile(fd); 2899 return (error); 2900 } 2901 2902 /* 2903 * Positional gather write system call. 2904 */ 2905 int 2906 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2907 { 2908 /* { 2909 syscallarg(int) fd; 2910 syscallarg(const struct iovec *) iovp; 2911 syscallarg(int) iovcnt; 2912 syscallarg(off_t) offset; 2913 } */ 2914 off_t offset = SCARG(uap, offset); 2915 2916 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2917 SCARG(uap, iovcnt), &offset, 0, retval); 2918 } 2919 2920 /* 2921 * Check access permissions. 2922 */ 2923 int 2924 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2925 { 2926 /* { 2927 syscallarg(const char *) path; 2928 syscallarg(int) flags; 2929 } */ 2930 2931 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2932 SCARG(uap, flags), 0); 2933 } 2934 2935 static int 2936 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2937 int mode, int flags) 2938 { 2939 kauth_cred_t cred; 2940 struct vnode *vp; 2941 int error, nd_flag, vmode; 2942 struct pathbuf *pb; 2943 struct nameidata nd; 2944 2945 CTASSERT(F_OK == 0); 2946 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2947 /* nonsense mode */ 2948 return EINVAL; 2949 } 2950 2951 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2952 if (flags & AT_SYMLINK_NOFOLLOW) 2953 nd_flag &= ~FOLLOW; 2954 2955 error = pathbuf_copyin(path, &pb); 2956 if (error) 2957 return error; 2958 2959 NDINIT(&nd, LOOKUP, nd_flag, pb); 2960 2961 /* Override default credentials */ 2962 cred = kauth_cred_dup(l->l_cred); 2963 if (!(flags & AT_EACCESS)) { 2964 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2965 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2966 } 2967 nd.ni_cnd.cn_cred = cred; 2968 2969 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2970 pathbuf_destroy(pb); 2971 goto out; 2972 } 2973 vp = nd.ni_vp; 2974 pathbuf_destroy(pb); 2975 2976 /* Flags == 0 means only check for existence. */ 2977 if (mode) { 2978 vmode = 0; 2979 if (mode & R_OK) 2980 vmode |= VREAD; 2981 if (mode & W_OK) 2982 vmode |= VWRITE; 2983 if (mode & X_OK) 2984 vmode |= VEXEC; 2985 2986 error = VOP_ACCESS(vp, vmode, cred); 2987 if (!error && (vmode & VWRITE)) 2988 error = vn_writechk(vp); 2989 } 2990 vput(vp); 2991 out: 2992 kauth_cred_free(cred); 2993 return (error); 2994 } 2995 2996 int 2997 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 2998 register_t *retval) 2999 { 3000 /* { 3001 syscallarg(int) fd; 3002 syscallarg(const char *) path; 3003 syscallarg(int) amode; 3004 syscallarg(int) flag; 3005 } */ 3006 3007 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3008 SCARG(uap, amode), SCARG(uap, flag)); 3009 } 3010 3011 /* 3012 * Common code for all sys_stat functions, including compat versions. 3013 */ 3014 int 3015 do_sys_stat(const char *userpath, unsigned int nd_flag, 3016 struct stat *sb) 3017 { 3018 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3019 } 3020 3021 int 3022 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3023 unsigned int nd_flag, struct stat *sb) 3024 { 3025 int error; 3026 struct pathbuf *pb; 3027 struct nameidata nd; 3028 3029 KASSERT(l != NULL || fdat == AT_FDCWD); 3030 3031 error = pathbuf_copyin(userpath, &pb); 3032 if (error) { 3033 return error; 3034 } 3035 3036 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3037 3038 error = fd_nameiat(l, fdat, &nd); 3039 if (error != 0) { 3040 pathbuf_destroy(pb); 3041 return error; 3042 } 3043 error = vn_stat(nd.ni_vp, sb); 3044 vput(nd.ni_vp); 3045 pathbuf_destroy(pb); 3046 return error; 3047 } 3048 3049 /* 3050 * Get file status; this version follows links. 3051 */ 3052 /* ARGSUSED */ 3053 int 3054 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3055 { 3056 /* { 3057 syscallarg(const char *) path; 3058 syscallarg(struct stat *) ub; 3059 } */ 3060 struct stat sb; 3061 int error; 3062 3063 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3064 if (error) 3065 return error; 3066 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3067 } 3068 3069 /* 3070 * Get file status; this version does not follow links. 3071 */ 3072 /* ARGSUSED */ 3073 int 3074 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3075 { 3076 /* { 3077 syscallarg(const char *) path; 3078 syscallarg(struct stat *) ub; 3079 } */ 3080 struct stat sb; 3081 int error; 3082 3083 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3084 if (error) 3085 return error; 3086 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3087 } 3088 3089 int 3090 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3091 register_t *retval) 3092 { 3093 /* { 3094 syscallarg(int) fd; 3095 syscallarg(const char *) path; 3096 syscallarg(struct stat *) buf; 3097 syscallarg(int) flag; 3098 } */ 3099 unsigned int nd_flag; 3100 struct stat sb; 3101 int error; 3102 3103 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3104 nd_flag = NOFOLLOW; 3105 else 3106 nd_flag = FOLLOW; 3107 3108 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3109 &sb); 3110 if (error) 3111 return error; 3112 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3113 } 3114 3115 /* 3116 * Get configurable pathname variables. 3117 */ 3118 /* ARGSUSED */ 3119 int 3120 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3121 { 3122 /* { 3123 syscallarg(const char *) path; 3124 syscallarg(int) name; 3125 } */ 3126 int error; 3127 struct pathbuf *pb; 3128 struct nameidata nd; 3129 3130 error = pathbuf_copyin(SCARG(uap, path), &pb); 3131 if (error) { 3132 return error; 3133 } 3134 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3135 if ((error = namei(&nd)) != 0) { 3136 pathbuf_destroy(pb); 3137 return (error); 3138 } 3139 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3140 vput(nd.ni_vp); 3141 pathbuf_destroy(pb); 3142 return (error); 3143 } 3144 3145 /* 3146 * Return target name of a symbolic link. 3147 */ 3148 /* ARGSUSED */ 3149 int 3150 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3151 register_t *retval) 3152 { 3153 /* { 3154 syscallarg(const char *) path; 3155 syscallarg(char *) buf; 3156 syscallarg(size_t) count; 3157 } */ 3158 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3159 SCARG(uap, buf), SCARG(uap, count), retval); 3160 } 3161 3162 static int 3163 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3164 size_t count, register_t *retval) 3165 { 3166 struct vnode *vp; 3167 struct iovec aiov; 3168 struct uio auio; 3169 int error; 3170 struct pathbuf *pb; 3171 struct nameidata nd; 3172 3173 error = pathbuf_copyin(path, &pb); 3174 if (error) { 3175 return error; 3176 } 3177 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3178 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3179 pathbuf_destroy(pb); 3180 return error; 3181 } 3182 vp = nd.ni_vp; 3183 pathbuf_destroy(pb); 3184 if (vp->v_type != VLNK) 3185 error = EINVAL; 3186 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3187 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3188 aiov.iov_base = buf; 3189 aiov.iov_len = count; 3190 auio.uio_iov = &aiov; 3191 auio.uio_iovcnt = 1; 3192 auio.uio_offset = 0; 3193 auio.uio_rw = UIO_READ; 3194 KASSERT(l == curlwp); 3195 auio.uio_vmspace = l->l_proc->p_vmspace; 3196 auio.uio_resid = count; 3197 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3198 *retval = count - auio.uio_resid; 3199 } 3200 vput(vp); 3201 return (error); 3202 } 3203 3204 int 3205 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3206 register_t *retval) 3207 { 3208 /* { 3209 syscallarg(int) fd; 3210 syscallarg(const char *) path; 3211 syscallarg(char *) buf; 3212 syscallarg(size_t) bufsize; 3213 } */ 3214 3215 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3216 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3217 } 3218 3219 /* 3220 * Change flags of a file given a path name. 3221 */ 3222 /* ARGSUSED */ 3223 int 3224 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3225 { 3226 /* { 3227 syscallarg(const char *) path; 3228 syscallarg(u_long) flags; 3229 } */ 3230 struct vnode *vp; 3231 int error; 3232 3233 error = namei_simple_user(SCARG(uap, path), 3234 NSM_FOLLOW_TRYEMULROOT, &vp); 3235 if (error != 0) 3236 return (error); 3237 error = change_flags(vp, SCARG(uap, flags), l); 3238 vput(vp); 3239 return (error); 3240 } 3241 3242 /* 3243 * Change flags of a file given a file descriptor. 3244 */ 3245 /* ARGSUSED */ 3246 int 3247 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3248 { 3249 /* { 3250 syscallarg(int) fd; 3251 syscallarg(u_long) flags; 3252 } */ 3253 struct vnode *vp; 3254 file_t *fp; 3255 int error; 3256 3257 /* fd_getvnode() will use the descriptor for us */ 3258 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3259 return (error); 3260 vp = fp->f_data; 3261 error = change_flags(vp, SCARG(uap, flags), l); 3262 VOP_UNLOCK(vp); 3263 fd_putfile(SCARG(uap, fd)); 3264 return (error); 3265 } 3266 3267 /* 3268 * Change flags of a file given a path name; this version does 3269 * not follow links. 3270 */ 3271 int 3272 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3273 { 3274 /* { 3275 syscallarg(const char *) path; 3276 syscallarg(u_long) flags; 3277 } */ 3278 struct vnode *vp; 3279 int error; 3280 3281 error = namei_simple_user(SCARG(uap, path), 3282 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3283 if (error != 0) 3284 return (error); 3285 error = change_flags(vp, SCARG(uap, flags), l); 3286 vput(vp); 3287 return (error); 3288 } 3289 3290 /* 3291 * Common routine to change flags of a file. 3292 */ 3293 int 3294 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3295 { 3296 struct vattr vattr; 3297 int error; 3298 3299 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3300 3301 vattr_null(&vattr); 3302 vattr.va_flags = flags; 3303 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3304 3305 return (error); 3306 } 3307 3308 /* 3309 * Change mode of a file given path name; this version follows links. 3310 */ 3311 /* ARGSUSED */ 3312 int 3313 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3314 { 3315 /* { 3316 syscallarg(const char *) path; 3317 syscallarg(int) mode; 3318 } */ 3319 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3320 SCARG(uap, mode), 0); 3321 } 3322 3323 static int 3324 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3325 { 3326 int error; 3327 struct vnode *vp; 3328 namei_simple_flags_t ns_flag; 3329 3330 if (flags & AT_SYMLINK_NOFOLLOW) 3331 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3332 else 3333 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3334 3335 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3336 if (error != 0) 3337 return error; 3338 3339 error = change_mode(vp, mode, l); 3340 3341 vrele(vp); 3342 3343 return (error); 3344 } 3345 3346 /* 3347 * Change mode of a file given a file descriptor. 3348 */ 3349 /* ARGSUSED */ 3350 int 3351 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3352 { 3353 /* { 3354 syscallarg(int) fd; 3355 syscallarg(int) mode; 3356 } */ 3357 file_t *fp; 3358 int error; 3359 3360 /* fd_getvnode() will use the descriptor for us */ 3361 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3362 return (error); 3363 error = change_mode(fp->f_data, SCARG(uap, mode), l); 3364 fd_putfile(SCARG(uap, fd)); 3365 return (error); 3366 } 3367 3368 int 3369 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3370 register_t *retval) 3371 { 3372 /* { 3373 syscallarg(int) fd; 3374 syscallarg(const char *) path; 3375 syscallarg(int) mode; 3376 syscallarg(int) flag; 3377 } */ 3378 3379 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3380 SCARG(uap, mode), SCARG(uap, flag)); 3381 } 3382 3383 /* 3384 * Change mode of a file given path name; this version does not follow links. 3385 */ 3386 /* ARGSUSED */ 3387 int 3388 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3389 { 3390 /* { 3391 syscallarg(const char *) path; 3392 syscallarg(int) mode; 3393 } */ 3394 int error; 3395 struct vnode *vp; 3396 3397 error = namei_simple_user(SCARG(uap, path), 3398 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3399 if (error != 0) 3400 return (error); 3401 3402 error = change_mode(vp, SCARG(uap, mode), l); 3403 3404 vrele(vp); 3405 return (error); 3406 } 3407 3408 /* 3409 * Common routine to set mode given a vnode. 3410 */ 3411 static int 3412 change_mode(struct vnode *vp, int mode, struct lwp *l) 3413 { 3414 struct vattr vattr; 3415 int error; 3416 3417 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3418 vattr_null(&vattr); 3419 vattr.va_mode = mode & ALLPERMS; 3420 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3421 VOP_UNLOCK(vp); 3422 return (error); 3423 } 3424 3425 /* 3426 * Set ownership given a path name; this version follows links. 3427 */ 3428 /* ARGSUSED */ 3429 int 3430 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3431 { 3432 /* { 3433 syscallarg(const char *) path; 3434 syscallarg(uid_t) uid; 3435 syscallarg(gid_t) gid; 3436 } */ 3437 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3438 SCARG(uap, gid), 0); 3439 } 3440 3441 static int 3442 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3443 gid_t gid, int flags) 3444 { 3445 int error; 3446 struct vnode *vp; 3447 namei_simple_flags_t ns_flag; 3448 3449 if (flags & AT_SYMLINK_NOFOLLOW) 3450 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3451 else 3452 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3453 3454 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3455 if (error != 0) 3456 return error; 3457 3458 error = change_owner(vp, uid, gid, l, 0); 3459 3460 vrele(vp); 3461 3462 return (error); 3463 } 3464 3465 /* 3466 * Set ownership given a path name; this version follows links. 3467 * Provides POSIX semantics. 3468 */ 3469 /* ARGSUSED */ 3470 int 3471 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3472 { 3473 /* { 3474 syscallarg(const char *) path; 3475 syscallarg(uid_t) uid; 3476 syscallarg(gid_t) gid; 3477 } */ 3478 int error; 3479 struct vnode *vp; 3480 3481 error = namei_simple_user(SCARG(uap, path), 3482 NSM_FOLLOW_TRYEMULROOT, &vp); 3483 if (error != 0) 3484 return (error); 3485 3486 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3487 3488 vrele(vp); 3489 return (error); 3490 } 3491 3492 /* 3493 * Set ownership given a file descriptor. 3494 */ 3495 /* ARGSUSED */ 3496 int 3497 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3498 { 3499 /* { 3500 syscallarg(int) fd; 3501 syscallarg(uid_t) uid; 3502 syscallarg(gid_t) gid; 3503 } */ 3504 int error; 3505 file_t *fp; 3506 3507 /* fd_getvnode() will use the descriptor for us */ 3508 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3509 return (error); 3510 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3511 l, 0); 3512 fd_putfile(SCARG(uap, fd)); 3513 return (error); 3514 } 3515 3516 int 3517 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3518 register_t *retval) 3519 { 3520 /* { 3521 syscallarg(int) fd; 3522 syscallarg(const char *) path; 3523 syscallarg(uid_t) owner; 3524 syscallarg(gid_t) group; 3525 syscallarg(int) flag; 3526 } */ 3527 3528 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3529 SCARG(uap, owner), SCARG(uap, group), 3530 SCARG(uap, flag)); 3531 } 3532 3533 /* 3534 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3535 */ 3536 /* ARGSUSED */ 3537 int 3538 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3539 { 3540 /* { 3541 syscallarg(int) fd; 3542 syscallarg(uid_t) uid; 3543 syscallarg(gid_t) gid; 3544 } */ 3545 int error; 3546 file_t *fp; 3547 3548 /* fd_getvnode() will use the descriptor for us */ 3549 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3550 return (error); 3551 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3552 l, 1); 3553 fd_putfile(SCARG(uap, fd)); 3554 return (error); 3555 } 3556 3557 /* 3558 * Set ownership given a path name; this version does not follow links. 3559 */ 3560 /* ARGSUSED */ 3561 int 3562 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3563 { 3564 /* { 3565 syscallarg(const char *) path; 3566 syscallarg(uid_t) uid; 3567 syscallarg(gid_t) gid; 3568 } */ 3569 int error; 3570 struct vnode *vp; 3571 3572 error = namei_simple_user(SCARG(uap, path), 3573 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3574 if (error != 0) 3575 return (error); 3576 3577 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3578 3579 vrele(vp); 3580 return (error); 3581 } 3582 3583 /* 3584 * Set ownership given a path name; this version does not follow links. 3585 * Provides POSIX/XPG semantics. 3586 */ 3587 /* ARGSUSED */ 3588 int 3589 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3590 { 3591 /* { 3592 syscallarg(const char *) path; 3593 syscallarg(uid_t) uid; 3594 syscallarg(gid_t) gid; 3595 } */ 3596 int error; 3597 struct vnode *vp; 3598 3599 error = namei_simple_user(SCARG(uap, path), 3600 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3601 if (error != 0) 3602 return (error); 3603 3604 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3605 3606 vrele(vp); 3607 return (error); 3608 } 3609 3610 /* 3611 * Common routine to set ownership given a vnode. 3612 */ 3613 static int 3614 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3615 int posix_semantics) 3616 { 3617 struct vattr vattr; 3618 mode_t newmode; 3619 int error; 3620 3621 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3622 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3623 goto out; 3624 3625 #define CHANGED(x) ((int)(x) != -1) 3626 newmode = vattr.va_mode; 3627 if (posix_semantics) { 3628 /* 3629 * POSIX/XPG semantics: if the caller is not the super-user, 3630 * clear set-user-id and set-group-id bits. Both POSIX and 3631 * the XPG consider the behaviour for calls by the super-user 3632 * implementation-defined; we leave the set-user-id and set- 3633 * group-id settings intact in that case. 3634 */ 3635 if (vattr.va_mode & S_ISUID) { 3636 if (kauth_authorize_vnode(l->l_cred, 3637 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3638 newmode &= ~S_ISUID; 3639 } 3640 if (vattr.va_mode & S_ISGID) { 3641 if (kauth_authorize_vnode(l->l_cred, 3642 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3643 newmode &= ~S_ISGID; 3644 } 3645 } else { 3646 /* 3647 * NetBSD semantics: when changing owner and/or group, 3648 * clear the respective bit(s). 3649 */ 3650 if (CHANGED(uid)) 3651 newmode &= ~S_ISUID; 3652 if (CHANGED(gid)) 3653 newmode &= ~S_ISGID; 3654 } 3655 /* Update va_mode iff altered. */ 3656 if (vattr.va_mode == newmode) 3657 newmode = VNOVAL; 3658 3659 vattr_null(&vattr); 3660 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3661 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3662 vattr.va_mode = newmode; 3663 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3664 #undef CHANGED 3665 3666 out: 3667 VOP_UNLOCK(vp); 3668 return (error); 3669 } 3670 3671 /* 3672 * Set the access and modification times given a path name; this 3673 * version follows links. 3674 */ 3675 /* ARGSUSED */ 3676 int 3677 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3678 register_t *retval) 3679 { 3680 /* { 3681 syscallarg(const char *) path; 3682 syscallarg(const struct timeval *) tptr; 3683 } */ 3684 3685 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3686 SCARG(uap, tptr), UIO_USERSPACE); 3687 } 3688 3689 /* 3690 * Set the access and modification times given a file descriptor. 3691 */ 3692 /* ARGSUSED */ 3693 int 3694 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3695 register_t *retval) 3696 { 3697 /* { 3698 syscallarg(int) fd; 3699 syscallarg(const struct timeval *) tptr; 3700 } */ 3701 int error; 3702 file_t *fp; 3703 3704 /* fd_getvnode() will use the descriptor for us */ 3705 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3706 return (error); 3707 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3708 UIO_USERSPACE); 3709 fd_putfile(SCARG(uap, fd)); 3710 return (error); 3711 } 3712 3713 int 3714 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3715 register_t *retval) 3716 { 3717 /* { 3718 syscallarg(int) fd; 3719 syscallarg(const struct timespec *) tptr; 3720 } */ 3721 int error; 3722 file_t *fp; 3723 3724 /* fd_getvnode() will use the descriptor for us */ 3725 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3726 return (error); 3727 error = do_sys_utimensat(l, AT_FDCWD, fp->f_data, NULL, 0, 3728 SCARG(uap, tptr), UIO_USERSPACE); 3729 fd_putfile(SCARG(uap, fd)); 3730 return (error); 3731 } 3732 3733 /* 3734 * Set the access and modification times given a path name; this 3735 * version does not follow links. 3736 */ 3737 int 3738 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3739 register_t *retval) 3740 { 3741 /* { 3742 syscallarg(const char *) path; 3743 syscallarg(const struct timeval *) tptr; 3744 } */ 3745 3746 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3747 SCARG(uap, tptr), UIO_USERSPACE); 3748 } 3749 3750 int 3751 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3752 register_t *retval) 3753 { 3754 /* { 3755 syscallarg(int) fd; 3756 syscallarg(const char *) path; 3757 syscallarg(const struct timespec *) tptr; 3758 syscallarg(int) flag; 3759 } */ 3760 int follow; 3761 const struct timespec *tptr; 3762 int error; 3763 3764 tptr = SCARG(uap, tptr); 3765 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3766 3767 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3768 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3769 3770 return error; 3771 } 3772 3773 /* 3774 * Common routine to set access and modification times given a vnode. 3775 */ 3776 int 3777 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3778 const struct timespec *tptr, enum uio_seg seg) 3779 { 3780 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3781 } 3782 3783 int 3784 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3785 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3786 { 3787 struct vattr vattr; 3788 int error, dorele = 0; 3789 namei_simple_flags_t sflags; 3790 bool vanull, setbirthtime; 3791 struct timespec ts[2]; 3792 3793 KASSERT(l != NULL || fdat == AT_FDCWD); 3794 3795 /* 3796 * I have checked all callers and they pass either FOLLOW, 3797 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3798 * is 0. More to the point, they don't pass anything else. 3799 * Let's keep it that way at least until the namei interfaces 3800 * are fully sanitized. 3801 */ 3802 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3803 sflags = (flag == FOLLOW) ? 3804 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3805 3806 if (tptr == NULL) { 3807 vanull = true; 3808 nanotime(&ts[0]); 3809 ts[1] = ts[0]; 3810 } else { 3811 vanull = false; 3812 if (seg != UIO_SYSSPACE) { 3813 error = copyin(tptr, ts, sizeof (ts)); 3814 if (error != 0) 3815 return error; 3816 } else { 3817 ts[0] = tptr[0]; 3818 ts[1] = tptr[1]; 3819 } 3820 } 3821 3822 if (ts[0].tv_nsec == UTIME_NOW) { 3823 nanotime(&ts[0]); 3824 if (ts[1].tv_nsec == UTIME_NOW) { 3825 vanull = true; 3826 ts[1] = ts[0]; 3827 } 3828 } else if (ts[1].tv_nsec == UTIME_NOW) 3829 nanotime(&ts[1]); 3830 3831 if (vp == NULL) { 3832 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3833 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3834 if (error != 0) 3835 return error; 3836 dorele = 1; 3837 } 3838 3839 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3840 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3841 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3842 vattr_null(&vattr); 3843 3844 if (ts[0].tv_nsec != UTIME_OMIT) 3845 vattr.va_atime = ts[0]; 3846 3847 if (ts[1].tv_nsec != UTIME_OMIT) { 3848 vattr.va_mtime = ts[1]; 3849 if (setbirthtime) 3850 vattr.va_birthtime = ts[1]; 3851 } 3852 3853 if (vanull) 3854 vattr.va_vaflags |= VA_UTIMES_NULL; 3855 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3856 VOP_UNLOCK(vp); 3857 3858 if (dorele != 0) 3859 vrele(vp); 3860 3861 return error; 3862 } 3863 3864 int 3865 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3866 const struct timeval *tptr, enum uio_seg seg) 3867 { 3868 struct timespec ts[2]; 3869 struct timespec *tsptr = NULL; 3870 int error; 3871 3872 if (tptr != NULL) { 3873 struct timeval tv[2]; 3874 3875 if (seg != UIO_SYSSPACE) { 3876 error = copyin(tptr, tv, sizeof (tv)); 3877 if (error != 0) 3878 return error; 3879 tptr = tv; 3880 } 3881 3882 if ((tv[0].tv_usec == UTIME_NOW) || 3883 (tv[0].tv_usec == UTIME_OMIT)) 3884 ts[0].tv_nsec = tv[0].tv_usec; 3885 else 3886 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3887 3888 if ((tv[1].tv_usec == UTIME_NOW) || 3889 (tv[1].tv_usec == UTIME_OMIT)) 3890 ts[1].tv_nsec = tv[1].tv_usec; 3891 else 3892 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3893 3894 tsptr = &ts[0]; 3895 } 3896 3897 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3898 } 3899 3900 /* 3901 * Truncate a file given its path name. 3902 */ 3903 /* ARGSUSED */ 3904 int 3905 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3906 { 3907 /* { 3908 syscallarg(const char *) path; 3909 syscallarg(int) pad; 3910 syscallarg(off_t) length; 3911 } */ 3912 struct vnode *vp; 3913 struct vattr vattr; 3914 int error; 3915 3916 error = namei_simple_user(SCARG(uap, path), 3917 NSM_FOLLOW_TRYEMULROOT, &vp); 3918 if (error != 0) 3919 return (error); 3920 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3921 if (vp->v_type == VDIR) 3922 error = EISDIR; 3923 else if ((error = vn_writechk(vp)) == 0 && 3924 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3925 vattr_null(&vattr); 3926 vattr.va_size = SCARG(uap, length); 3927 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3928 } 3929 vput(vp); 3930 return (error); 3931 } 3932 3933 /* 3934 * Truncate a file given a file descriptor. 3935 */ 3936 /* ARGSUSED */ 3937 int 3938 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3939 { 3940 /* { 3941 syscallarg(int) fd; 3942 syscallarg(int) pad; 3943 syscallarg(off_t) length; 3944 } */ 3945 struct vattr vattr; 3946 struct vnode *vp; 3947 file_t *fp; 3948 int error; 3949 3950 /* fd_getvnode() will use the descriptor for us */ 3951 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3952 return (error); 3953 if ((fp->f_flag & FWRITE) == 0) { 3954 error = EINVAL; 3955 goto out; 3956 } 3957 vp = fp->f_data; 3958 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3959 if (vp->v_type == VDIR) 3960 error = EISDIR; 3961 else if ((error = vn_writechk(vp)) == 0) { 3962 vattr_null(&vattr); 3963 vattr.va_size = SCARG(uap, length); 3964 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3965 } 3966 VOP_UNLOCK(vp); 3967 out: 3968 fd_putfile(SCARG(uap, fd)); 3969 return (error); 3970 } 3971 3972 /* 3973 * Sync an open file. 3974 */ 3975 /* ARGSUSED */ 3976 int 3977 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3978 { 3979 /* { 3980 syscallarg(int) fd; 3981 } */ 3982 struct vnode *vp; 3983 file_t *fp; 3984 int error; 3985 3986 /* fd_getvnode() will use the descriptor for us */ 3987 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3988 return (error); 3989 vp = fp->f_data; 3990 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3991 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3992 VOP_UNLOCK(vp); 3993 fd_putfile(SCARG(uap, fd)); 3994 return (error); 3995 } 3996 3997 /* 3998 * Sync a range of file data. API modeled after that found in AIX. 3999 * 4000 * FDATASYNC indicates that we need only save enough metadata to be able 4001 * to re-read the written data. Note we duplicate AIX's requirement that 4002 * the file be open for writing. 4003 */ 4004 /* ARGSUSED */ 4005 int 4006 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4007 { 4008 /* { 4009 syscallarg(int) fd; 4010 syscallarg(int) flags; 4011 syscallarg(off_t) start; 4012 syscallarg(off_t) length; 4013 } */ 4014 struct vnode *vp; 4015 file_t *fp; 4016 int flags, nflags; 4017 off_t s, e, len; 4018 int error; 4019 4020 /* fd_getvnode() will use the descriptor for us */ 4021 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4022 return (error); 4023 4024 if ((fp->f_flag & FWRITE) == 0) { 4025 error = EBADF; 4026 goto out; 4027 } 4028 4029 flags = SCARG(uap, flags); 4030 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4031 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4032 error = EINVAL; 4033 goto out; 4034 } 4035 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4036 if (flags & FDATASYNC) 4037 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4038 else 4039 nflags = FSYNC_WAIT; 4040 if (flags & FDISKSYNC) 4041 nflags |= FSYNC_CACHE; 4042 4043 len = SCARG(uap, length); 4044 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4045 if (len) { 4046 s = SCARG(uap, start); 4047 e = s + len; 4048 if (e < s) { 4049 error = EINVAL; 4050 goto out; 4051 } 4052 } else { 4053 e = 0; 4054 s = 0; 4055 } 4056 4057 vp = fp->f_data; 4058 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4059 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4060 VOP_UNLOCK(vp); 4061 out: 4062 fd_putfile(SCARG(uap, fd)); 4063 return (error); 4064 } 4065 4066 /* 4067 * Sync the data of an open file. 4068 */ 4069 /* ARGSUSED */ 4070 int 4071 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4072 { 4073 /* { 4074 syscallarg(int) fd; 4075 } */ 4076 struct vnode *vp; 4077 file_t *fp; 4078 int error; 4079 4080 /* fd_getvnode() will use the descriptor for us */ 4081 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4082 return (error); 4083 if ((fp->f_flag & FWRITE) == 0) { 4084 fd_putfile(SCARG(uap, fd)); 4085 return (EBADF); 4086 } 4087 vp = fp->f_data; 4088 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4089 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4090 VOP_UNLOCK(vp); 4091 fd_putfile(SCARG(uap, fd)); 4092 return (error); 4093 } 4094 4095 /* 4096 * Rename files, (standard) BSD semantics frontend. 4097 */ 4098 /* ARGSUSED */ 4099 int 4100 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4101 { 4102 /* { 4103 syscallarg(const char *) from; 4104 syscallarg(const char *) to; 4105 } */ 4106 4107 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4108 SCARG(uap, to), UIO_USERSPACE, 0)); 4109 } 4110 4111 int 4112 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4113 register_t *retval) 4114 { 4115 /* { 4116 syscallarg(int) fromfd; 4117 syscallarg(const char *) from; 4118 syscallarg(int) tofd; 4119 syscallarg(const char *) to; 4120 } */ 4121 4122 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4123 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4124 } 4125 4126 /* 4127 * Rename files, POSIX semantics frontend. 4128 */ 4129 /* ARGSUSED */ 4130 int 4131 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4132 { 4133 /* { 4134 syscallarg(const char *) from; 4135 syscallarg(const char *) to; 4136 } */ 4137 4138 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4139 SCARG(uap, to), UIO_USERSPACE, 1)); 4140 } 4141 4142 /* 4143 * Rename files. Source and destination must either both be directories, 4144 * or both not be directories. If target is a directory, it must be empty. 4145 * If `from' and `to' refer to the same object, the value of the `retain' 4146 * argument is used to determine whether `from' will be 4147 * 4148 * (retain == 0) deleted unless `from' and `to' refer to the same 4149 * object in the file system's name space (BSD). 4150 * (retain == 1) always retained (POSIX). 4151 * 4152 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4153 */ 4154 int 4155 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4156 { 4157 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4158 } 4159 4160 static int 4161 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4162 const char *to, enum uio_seg seg, int retain) 4163 { 4164 struct pathbuf *fpb, *tpb; 4165 struct nameidata fnd, tnd; 4166 struct vnode *fdvp, *fvp; 4167 struct vnode *tdvp, *tvp; 4168 struct mount *mp, *tmp; 4169 int error; 4170 4171 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4172 4173 error = pathbuf_maybe_copyin(from, seg, &fpb); 4174 if (error) 4175 goto out0; 4176 KASSERT(fpb != NULL); 4177 4178 error = pathbuf_maybe_copyin(to, seg, &tpb); 4179 if (error) 4180 goto out1; 4181 KASSERT(tpb != NULL); 4182 4183 /* 4184 * Lookup from. 4185 * 4186 * XXX LOCKPARENT is wrong because we don't actually want it 4187 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4188 * insane, so for the time being we need to leave it like this. 4189 */ 4190 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT | INRENAME), fpb); 4191 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4192 goto out2; 4193 4194 /* 4195 * Pull out the important results of the lookup, fdvp and fvp. 4196 * Of course, fvp is bogus because we're about to unlock fdvp. 4197 */ 4198 fdvp = fnd.ni_dvp; 4199 fvp = fnd.ni_vp; 4200 KASSERT(fdvp != NULL); 4201 KASSERT(fvp != NULL); 4202 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4203 4204 /* 4205 * Make sure neither fdvp nor fvp is locked. 4206 */ 4207 if (fdvp != fvp) 4208 VOP_UNLOCK(fdvp); 4209 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4210 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4211 4212 /* 4213 * Reject renaming `.' and `..'. Can't do this until after 4214 * namei because we need namei's parsing to find the final 4215 * component name. (namei should just leave us with the final 4216 * component name and not look it up itself, but anyway...) 4217 * 4218 * This was here before because we used to relookup from 4219 * instead of to and relookup requires the caller to check 4220 * this, but now file systems may depend on this check, so we 4221 * must retain it until the file systems are all rototilled. 4222 */ 4223 if (((fnd.ni_cnd.cn_namelen == 1) && 4224 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4225 ((fnd.ni_cnd.cn_namelen == 2) && 4226 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4227 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4228 error = EINVAL; /* XXX EISDIR? */ 4229 goto abort0; 4230 } 4231 4232 /* 4233 * Lookup to. 4234 * 4235 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4236 * fvp here to decide whether to add CREATEDIR is a load of 4237 * bollocks because fvp might be the wrong node by now, since 4238 * fdvp is unlocked. 4239 * 4240 * XXX Why not pass CREATEDIR always? 4241 */ 4242 NDINIT(&tnd, RENAME, 4243 (LOCKPARENT | NOCACHE | TRYEMULROOT | INRENAME | 4244 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4245 tpb); 4246 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4247 goto abort0; 4248 4249 /* 4250 * Pull out the important results of the lookup, tdvp and tvp. 4251 * Of course, tvp is bogus because we're about to unlock tdvp. 4252 */ 4253 tdvp = tnd.ni_dvp; 4254 tvp = tnd.ni_vp; 4255 KASSERT(tdvp != NULL); 4256 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4257 4258 /* 4259 * Make sure neither tdvp nor tvp is locked. 4260 */ 4261 if (tdvp != tvp) 4262 VOP_UNLOCK(tdvp); 4263 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4264 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4265 4266 /* 4267 * Reject renaming onto `.' or `..'. relookup is unhappy with 4268 * these, which is why we must do this here. Once upon a time 4269 * we relooked up from instead of to, and consequently didn't 4270 * need this check, but now that we relookup to instead of 4271 * from, we need this; and we shall need it forever forward 4272 * until the VOP_RENAME protocol changes, because file systems 4273 * will no doubt begin to depend on this check. 4274 */ 4275 if (((tnd.ni_cnd.cn_namelen == 1) && 4276 (tnd.ni_cnd.cn_nameptr[0] == '.')) || 4277 ((tnd.ni_cnd.cn_namelen == 2) && 4278 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4279 (tnd.ni_cnd.cn_nameptr[1] == '.'))) { 4280 error = EINVAL; /* XXX EISDIR? */ 4281 goto abort1; 4282 } 4283 4284 /* 4285 * Get the mount point. If the file system has been unmounted, 4286 * which it may be because we're not holding any vnode locks, 4287 * then v_mount will be NULL. We're not really supposed to 4288 * read v_mount without holding the vnode lock, but since we 4289 * have fdvp referenced, if fdvp->v_mount changes then at worst 4290 * it will be set to NULL, not changed to another mount point. 4291 * And, of course, since it is up to the file system to 4292 * determine the real lock order, we can't lock both fdvp and 4293 * tdvp at the same time. 4294 */ 4295 mp = fdvp->v_mount; 4296 if (mp == NULL) { 4297 error = ENOENT; 4298 goto abort1; 4299 } 4300 4301 /* 4302 * Make sure the mount points match. Again, although we don't 4303 * hold any vnode locks, the v_mount fields may change -- but 4304 * at worst they will change to NULL, so this will never become 4305 * a cross-device rename, because we hold vnode references. 4306 * 4307 * XXX Because nothing is locked and the compiler may reorder 4308 * things here, unmounting the file system at an inopportune 4309 * moment may cause rename to fail with ENXDEV when it really 4310 * should fail with ENOENT. 4311 */ 4312 tmp = tdvp->v_mount; 4313 if (tmp == NULL) { 4314 error = ENOENT; 4315 goto abort1; 4316 } 4317 4318 if (mp != tmp) { 4319 error = EXDEV; 4320 goto abort1; 4321 } 4322 4323 /* 4324 * Take the vfs rename lock to avoid cross-directory screw cases. 4325 * Nothing is locked currently, so taking this lock is safe. 4326 */ 4327 error = VFS_RENAMELOCK_ENTER(mp); 4328 if (error) 4329 goto abort1; 4330 4331 /* 4332 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4333 * and nothing is locked except for the vfs rename lock. 4334 * 4335 * The next step is a little rain dance to conform to the 4336 * insane lock protocol, even though it does nothing to ward 4337 * off race conditions. 4338 * 4339 * We need tdvp and tvp to be locked. However, because we have 4340 * unlocked tdvp in order to hold no locks while we take the 4341 * vfs rename lock, tvp may be wrong here, and we can't safely 4342 * lock it even if the sensible file systems will just unlock 4343 * it straight away. Consequently, we must lock tdvp and then 4344 * relookup tvp to get it locked. 4345 * 4346 * Finally, because the VOP_RENAME protocol is brain-damaged 4347 * and various file systems insanely depend on the semantics of 4348 * this brain damage, the lookup of to must be the last lookup 4349 * before VOP_RENAME. 4350 */ 4351 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4352 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4353 if (error) 4354 goto abort2; 4355 4356 /* 4357 * Drop the old tvp and pick up the new one -- which might be 4358 * the same, but that doesn't matter to us. After this, tdvp 4359 * and tvp should both be locked. 4360 */ 4361 if (tvp != NULL) 4362 vrele(tvp); 4363 tvp = tnd.ni_vp; 4364 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4365 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4366 4367 /* 4368 * The old do_sys_rename had various consistency checks here 4369 * involving fvp and tvp. fvp is bogus already here, and tvp 4370 * will become bogus soon in any sensible file system, so the 4371 * only purpose in putting these checks here is to give lip 4372 * service to these screw cases and to acknowledge that they 4373 * exist, not actually to handle them, but here you go 4374 * anyway... 4375 */ 4376 4377 /* 4378 * Acknowledge that directories and non-directories aren't 4379 * suposed to mix. 4380 */ 4381 if (tvp != NULL) { 4382 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4383 error = ENOTDIR; 4384 goto abort3; 4385 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4386 error = EISDIR; 4387 goto abort3; 4388 } 4389 } 4390 4391 /* 4392 * Acknowledge some random screw case, among the dozens that 4393 * might arise. 4394 */ 4395 if (fvp == tdvp) { 4396 error = EINVAL; 4397 goto abort3; 4398 } 4399 4400 /* 4401 * Acknowledge that POSIX has a wacky screw case. 4402 * 4403 * XXX Eventually the retain flag needs to be passed on to 4404 * VOP_RENAME. 4405 */ 4406 if (fvp == tvp) { 4407 if (retain) { 4408 error = 0; 4409 goto abort3; 4410 } else if ((fdvp == tdvp) && 4411 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4412 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4413 fnd.ni_cnd.cn_namelen))) { 4414 error = 0; 4415 goto abort3; 4416 } 4417 } 4418 4419 /* 4420 * Make sure veriexec can screw us up. (But a race can screw 4421 * up veriexec, of course -- remember, fvp and (soon) tvp are 4422 * bogus.) 4423 */ 4424 #if NVERIEXEC > 0 4425 { 4426 char *f1, *f2; 4427 size_t f1_len; 4428 size_t f2_len; 4429 4430 f1_len = fnd.ni_cnd.cn_namelen + 1; 4431 f1 = kmem_alloc(f1_len, KM_SLEEP); 4432 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4433 4434 f2_len = tnd.ni_cnd.cn_namelen + 1; 4435 f2 = kmem_alloc(f2_len, KM_SLEEP); 4436 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4437 4438 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4439 4440 kmem_free(f1, f1_len); 4441 kmem_free(f2, f2_len); 4442 4443 if (error) 4444 goto abort3; 4445 } 4446 #endif /* NVERIEXEC > 0 */ 4447 4448 /* 4449 * All ready. Incant the rename vop. 4450 */ 4451 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4452 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4453 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4454 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4455 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4456 4457 /* 4458 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4459 * tdvp and tvp. But we can't assert any of that. 4460 */ 4461 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4462 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4463 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4464 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4465 4466 /* 4467 * So all we have left to do is to drop the rename lock and 4468 * destroy the pathbufs. 4469 */ 4470 VFS_RENAMELOCK_EXIT(mp); 4471 goto out2; 4472 4473 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4474 VOP_UNLOCK(tvp); 4475 abort2: VOP_UNLOCK(tdvp); 4476 VFS_RENAMELOCK_EXIT(mp); 4477 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4478 vrele(tdvp); 4479 if (tvp != NULL) 4480 vrele(tvp); 4481 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4482 vrele(fdvp); 4483 vrele(fvp); 4484 out2: pathbuf_destroy(tpb); 4485 out1: pathbuf_destroy(fpb); 4486 out0: return error; 4487 } 4488 4489 /* 4490 * Make a directory file. 4491 */ 4492 /* ARGSUSED */ 4493 int 4494 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4495 { 4496 /* { 4497 syscallarg(const char *) path; 4498 syscallarg(int) mode; 4499 } */ 4500 4501 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4502 SCARG(uap, mode), UIO_USERSPACE); 4503 } 4504 4505 int 4506 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4507 register_t *retval) 4508 { 4509 /* { 4510 syscallarg(int) fd; 4511 syscallarg(const char *) path; 4512 syscallarg(int) mode; 4513 } */ 4514 4515 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4516 SCARG(uap, mode), UIO_USERSPACE); 4517 } 4518 4519 4520 int 4521 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4522 { 4523 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE); 4524 } 4525 4526 static int 4527 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4528 enum uio_seg seg) 4529 { 4530 struct proc *p = curlwp->l_proc; 4531 struct vnode *vp; 4532 struct vattr vattr; 4533 int error; 4534 struct pathbuf *pb; 4535 struct nameidata nd; 4536 4537 KASSERT(l != NULL || fdat == AT_FDCWD); 4538 4539 /* XXX bollocks, should pass in a pathbuf */ 4540 error = pathbuf_maybe_copyin(path, seg, &pb); 4541 if (error) { 4542 return error; 4543 } 4544 4545 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4546 4547 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4548 pathbuf_destroy(pb); 4549 return (error); 4550 } 4551 vp = nd.ni_vp; 4552 if (vp != NULL) { 4553 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4554 if (nd.ni_dvp == vp) 4555 vrele(nd.ni_dvp); 4556 else 4557 vput(nd.ni_dvp); 4558 vrele(vp); 4559 pathbuf_destroy(pb); 4560 return (EEXIST); 4561 } 4562 vattr_null(&vattr); 4563 vattr.va_type = VDIR; 4564 /* We will read cwdi->cwdi_cmask unlocked. */ 4565 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4566 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4567 if (!error) 4568 vput(nd.ni_vp); 4569 pathbuf_destroy(pb); 4570 return (error); 4571 } 4572 4573 /* 4574 * Remove a directory file. 4575 */ 4576 /* ARGSUSED */ 4577 int 4578 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4579 { 4580 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4581 AT_REMOVEDIR, UIO_USERSPACE); 4582 } 4583 4584 /* 4585 * Read a block of directory entries in a file system independent format. 4586 */ 4587 int 4588 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4589 { 4590 /* { 4591 syscallarg(int) fd; 4592 syscallarg(char *) buf; 4593 syscallarg(size_t) count; 4594 } */ 4595 file_t *fp; 4596 int error, done; 4597 4598 /* fd_getvnode() will use the descriptor for us */ 4599 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4600 return (error); 4601 if ((fp->f_flag & FREAD) == 0) { 4602 error = EBADF; 4603 goto out; 4604 } 4605 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4606 SCARG(uap, count), &done, l, 0, 0); 4607 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4608 *retval = done; 4609 out: 4610 fd_putfile(SCARG(uap, fd)); 4611 return (error); 4612 } 4613 4614 /* 4615 * Set the mode mask for creation of filesystem nodes. 4616 */ 4617 int 4618 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4619 { 4620 /* { 4621 syscallarg(mode_t) newmask; 4622 } */ 4623 struct proc *p = l->l_proc; 4624 struct cwdinfo *cwdi; 4625 4626 /* 4627 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4628 * important is that we serialize changes to the mask. The 4629 * rw_exit() will issue a write memory barrier on our behalf, 4630 * and force the changes out to other CPUs (as it must use an 4631 * atomic operation, draining the local CPU's store buffers). 4632 */ 4633 cwdi = p->p_cwdi; 4634 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4635 *retval = cwdi->cwdi_cmask; 4636 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4637 rw_exit(&cwdi->cwdi_lock); 4638 4639 return (0); 4640 } 4641 4642 int 4643 dorevoke(struct vnode *vp, kauth_cred_t cred) 4644 { 4645 struct vattr vattr; 4646 int error, fs_decision; 4647 4648 vn_lock(vp, LK_SHARED | LK_RETRY); 4649 error = VOP_GETATTR(vp, &vattr, cred); 4650 VOP_UNLOCK(vp); 4651 if (error != 0) 4652 return error; 4653 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4654 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4655 fs_decision); 4656 if (!error) 4657 VOP_REVOKE(vp, REVOKEALL); 4658 return (error); 4659 } 4660 4661 /* 4662 * Void all references to file by ripping underlying filesystem 4663 * away from vnode. 4664 */ 4665 /* ARGSUSED */ 4666 int 4667 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4668 { 4669 /* { 4670 syscallarg(const char *) path; 4671 } */ 4672 struct vnode *vp; 4673 int error; 4674 4675 error = namei_simple_user(SCARG(uap, path), 4676 NSM_FOLLOW_TRYEMULROOT, &vp); 4677 if (error != 0) 4678 return (error); 4679 error = dorevoke(vp, l->l_cred); 4680 vrele(vp); 4681 return (error); 4682 } 4683